bzip2-libtoolizeautoconf.patch

   1 diff -Nru bzip2-1.0.2/Makefile.am bzip2-1.0.2.new/Makefile.am
   2 --- bzip2-1.0.2/Makefile.am     Thu Jan  1 01:00:00 1970
   3 +++ bzip2-1.0.2.new/Makefile.am Fri Feb  1 04:19:09 2002
   4 @@ -0,0 +1,31 @@
   5 +SUBDIRS                = doc
   6 +
   7 +bin_PROGRAMS   = bzip2 bzip2recover
   8 +bzip2_SOURCES  = bzip2.c
   9 +
  10 +bzip2_LDADD            =  libbz2.la
  11 +bzip2recover_SOURCES   = bzip2recover.c
  12 +lib_LTLIBRARIES                = libbz2.la
  13 +libbz2_la_SOURCES = \
  14 +       blocksort.c \
  15 +       huffman.c \
  16 +       crctable.c \
  17 +       randtable.c \
  18 +       compress.c \
  19 +       decompress.c \
  20 +       bzlib.c \
  21 +       bzlib.h \
  22 +       bzlib_private.h
  23 +
  24 +libbz2_la_LDFLAGS      = -version-info 1:0:0
  25 +include_HEADERS                = bzlib.h bzlib_private.h
  26 +
  27 +bin_SCRIPTS            = bzless bzgrep
  28 +
  29 +EXTRA_DIST = README README.COMPILATION.PROBLEMS \
  30 +       Y2K_INFO libbz2.def libbz2.dsp \
  31 +       sample1.bz2 sample1.ref sample2.bz2 sample2.ref sample3.bz2 sample3.ref
  32 +
  33 +install-exec-hook:
  34 +       $(LN_S) -f bzip2 $(DESTDIR)$(bindir)/bunzip2
  35 +       $(LN_S) -f bzip2 $(DESTDIR)$(bindir)/bzcat
  36 diff -Nru bzip2-1.0.2/bzdiff.1 bzip2-1.0.2.new/bzdiff.1
  37 --- bzip2-1.0.2/bzdiff.1        Sun Dec 30 03:12:35 2001
  38 +++ bzip2-1.0.2.new/bzdiff.1    Thu Jan  1 01:00:00 1970
  39 @@ -1,47 +0,0 @@
  40 -\"Shamelessly copied from zmore.1 by Philippe Troin <phil@fifi.org>
  41 -\"for Debian GNU/Linux
  42 -.TH BZDIFF 1
  43 -.SH NAME
  44 -bzcmp, bzdiff \- compare bzip2 compressed files
  45 -.SH SYNOPSIS
  46 -.B bzcmp
  47 -[ cmp_options ] file1
  48 -[ file2 ]
  49 -.br
  50 -.B bzdiff
  51 -[ diff_options ] file1
  52 -[ file2 ]
  53 -.SH DESCRIPTION
  54 -.I  Bzcmp
  55 -and
  56 -.I bzdiff
  57 -are used to invoke the
  58 -.I cmp
  59 -or the
  60 -.I diff
  61 -program on bzip2 compressed files.  All options specified are passed
  62 -directly to
  63 -.I cmp
  64 -or
  65 -.IR diff "."
  66 -If only 1 file is specified, then the files compared are
  67 -.I file1
  68 -and an uncompressed
  69 -.IR file1 ".bz2."
  70 -If two files are specified, then they are uncompressed if necessary and fed to
  71 -.I cmp
  72 -or
  73 -.IR diff "."
  74 -The exit status from
  75 -.I cmp
  76 -or
  77 -.I diff
  78 -is preserved.
  79 -.SH "SEE ALSO"
  80 -cmp(1), diff(1), bzmore(1), bzless(1), bzgrep(1), bzip2(1)
  81 -.SH BUGS
  82 -Messages from the
  83 -.I cmp
  84 -or
  85 -.I diff
  86 -programs refer to temporary filenames instead of those specified.
  87 diff -Nru bzip2-1.0.2/bzgrep.1 bzip2-1.0.2.new/bzgrep.1
  88 --- bzip2-1.0.2/bzgrep.1        Sun Dec 30 03:12:35 2001
  89 +++ bzip2-1.0.2.new/bzgrep.1    Thu Jan  1 01:00:00 1970
  90 @@ -1,56 +0,0 @@
  91 -\"Shamelessly copied from zmore.1 by Philippe Troin <phil@fifi.org>
  92 -\"for Debian GNU/Linux
  93 -.TH BZGREP 1
  94 -.SH NAME
  95 -bzgrep, bzfgrep, bzegrep \- search possibly bzip2 compressed files for a regular expression
  96 -.SH SYNOPSIS
  97 -.B bzgrep
  98 -[ grep_options ]
  99 -.BI  [\ -e\ ] " pattern"
 100 -.IR filename ".\|.\|."
 101 -.br
 102 -.B bzegrep
 103 -[ egrep_options ]
 104 -.BI  [\ -e\ ] " pattern"
 105 -.IR filename ".\|.\|."
 106 -.br
 107 -.B bzfgrep
 108 -[ fgrep_options ]
 109 -.BI  [\ -e\ ] " pattern"
 110 -.IR filename ".\|.\|."
 111 -.SH DESCRIPTION
 112 -.IR  Bzgrep
 113 -is used to invoke the
 114 -.I grep
 115 -on bzip2-compressed files. All options specified are passed directly to
 116 -.I grep.
 117 -If no file is specified, then the standard input is decompressed
 118 -if necessary and fed to grep.
 119 -Otherwise the given files are uncompressed if necessary and fed to
 120 -.I grep.
 121 -.PP
 122 -If
 123 -.I bzgrep
 124 -is invoked as
 125 -.I bzegrep
 126 -or
 127 -.I bzfgrep
 128 -then
 129 -.I egrep
 130 -or
 131 -.I fgrep
 132 -is used instead of
 133 -.I grep.
 134 -If the GREP environment variable is set,
 135 -.I bzgrep
 136 -uses it as the
 137 -.I grep
 138 -program to be invoked. For example:
 139 -
 140 -    for sh:  GREP=fgrep  bzgrep string files
 141 -    for csh: (setenv GREP fgrep; bzgrep string files)
 142 -.SH AUTHOR
 143 -Charles Levert (charles@comm.polymtl.ca). Adapted to bzip2 by Philippe
 144 -Troin <phil@fifi.org> for Debian GNU/Linux.
 145 -.SH "SEE ALSO"
 146 -grep(1), egrep(1), fgrep(1), bzdiff(1), bzmore(1), bzless(1), bzip2(1)
 147 diff -Nru bzip2-1.0.2/bzless bzip2-1.0.2.new/bzless
 148 --- bzip2-1.0.2/bzless  Thu Jan  1 01:00:00 1970
 149 +++ bzip2-1.0.2.new/bzless      Fri Feb  1 04:19:11 2002
 150 @@ -0,0 +1,2 @@
 151 +#!/bin/sh
 152 +%{_bindir}/bunzip2 -c "$@" | %{_bindir}/less
 153 diff -Nru bzip2-1.0.2/bzmore.1 bzip2-1.0.2.new/bzmore.1
 154 --- bzip2-1.0.2/bzmore.1        Sun Dec 30 03:12:35 2001
 155 +++ bzip2-1.0.2.new/bzmore.1    Thu Jan  1 01:00:00 1970
 156 @@ -1,152 +0,0 @@
 157 -.\"Shamelessly copied from zmore.1 by Philippe Troin <phil@fifi.org>
 158 -.\"for Debian GNU/Linux
 159 -.TH BZMORE 1
 160 -.SH NAME
 161 -bzmore, bzless \- file perusal filter for crt viewing of bzip2 compressed text
 162 -.SH SYNOPSIS
 163 -.B bzmore
 164 -[ name ...  ]
 165 -.br
 166 -.B bzless
 167 -[ name ...  ]
 168 -.SH NOTE
 169 -In the following description,
 170 -.I bzless
 171 -and
 172 -.I less
 173 -can be used interchangeably with
 174 -.I bzmore
 175 -and
 176 -.I more.
 177 -.SH DESCRIPTION
 178 -.I  Bzmore
 179 -is a filter which allows examination of compressed or plain text files
 180 -one screenful at a time on a soft-copy terminal.
 181 -.I bzmore
 182 -works on files compressed with
 183 -.I bzip2
 184 -and also on uncompressed files.
 185 -If a file does not exist,
 186 -.I bzmore
 187 -looks for a file of the same name with the addition of a .bz2 suffix.
 188 -.PP
 189 -.I Bzmore
 190 -normally pauses after each screenful, printing --More--
 191 -at the bottom of the screen.
 192 -If the user then types a carriage return, one more line is displayed.
 193 -If the user hits a space,
 194 -another screenful is displayed.  Other possibilities are enumerated later.
 195 -.PP
 196 -.I Bzmore
 197 -looks in the file
 198 -.I /etc/termcap
 199 -to determine terminal characteristics,
 200 -and to determine the default window size.
 201 -On a terminal capable of displaying 24 lines,
 202 -the default window size is 22 lines.
 203 -Other sequences which may be typed when
 204 -.I bzmore
 205 -pauses, and their effects, are as follows (\fIi\fP is an optional integer
 206 -argument, defaulting to 1) :
 207 -.PP
 208 -.IP \fIi\|\fP<space>
 209 -display
 210 -.I i
 211 -more lines, (or another screenful if no argument is given)
 212 -.PP
 213 -.IP ^D
 214 -display 11 more lines (a ``scroll'').
 215 -If
 216 -.I i
 217 -is given, then the scroll size is set to \fIi\|\fP.
 218 -.PP
 219 -.IP d
 220 -same as ^D (control-D)
 221 -.PP
 222 -.IP \fIi\|\fPz
 223 -same as typing a space except that \fIi\|\fP, if present, becomes the new
 224 -window size.  Note that the window size reverts back to the default at the
 225 -end of the current file.
 226 -.PP
 227 -.IP \fIi\|\fPs
 228 -skip \fIi\|\fP lines and print a screenful of lines
 229 -.PP
 230 -.IP \fIi\|\fPf
 231 -skip \fIi\fP screenfuls and print a screenful of lines
 232 -.PP
 233 -.IP "q or Q"
 234 -quit reading the current file; go on to the next (if any)
 235 -.PP
 236 -.IP "e or q"
 237 -When the prompt --More--(Next file:
 238 -.IR file )
 239 -is printed, this command causes bzmore to exit.
 240 -.PP
 241 -.IP s
 242 -When the prompt --More--(Next file:
 243 -.IR file )
 244 -is printed, this command causes bzmore to skip the next file and continue.
 245 -.PP
 246 -.IP =
 247 -Display the current line number.
 248 -.PP
 249 -.IP \fIi\|\fP/expr
 250 -search for the \fIi\|\fP-th occurrence of the regular expression \fIexpr.\fP
 251 -If the pattern is not found,
 252 -.I bzmore
 253 -goes on to the next file (if any).
 254 -Otherwise, a screenful is displayed, starting two lines before the place
 255 -where the expression was found.
 256 -The user's erase and kill characters may be used to edit the regular
 257 -expression.
 258 -Erasing back past the first column cancels the search command.
 259 -.PP
 260 -.IP \fIi\|\fPn
 261 -search for the \fIi\|\fP-th occurrence of the last regular expression entered.
 262 -.PP
 263 -.IP !command
 264 -invoke a shell with \fIcommand\|\fP.
 265 -The character `!' in "command" are replaced with the
 266 -previous shell command.  The sequence "\\!" is replaced by "!".
 267 -.PP
 268 -.IP ":q or :Q"
 269 -quit reading the current file; go on to the next (if any)
 270 -(same as q or Q).
 271 -.PP
 272 -.IP .
 273 -(dot) repeat the previous command.
 274 -.PP
 275 -The commands take effect immediately, i.e., it is not necessary to
 276 -type a carriage return.
 277 -Up to the time when the command character itself is given,
 278 -the user may hit the line kill character to cancel the numerical
 279 -argument being formed.
 280 -In addition, the user may hit the erase character to redisplay the
 281 ---More-- message.
 282 -.PP
 283 -At any time when output is being sent to the terminal, the user can
 284 -hit the quit key (normally control\-\\).
 285 -.I Bzmore
 286 -will stop sending output, and will display the usual --More--
 287 -prompt.
 288 -The user may then enter one of the above commands in the normal manner.
 289 -Unfortunately, some output is lost when this is done, due to the
 290 -fact that any characters waiting in the terminal's output queue
 291 -are flushed when the quit signal occurs.
 292 -.PP
 293 -The terminal is set to
 294 -.I noecho
 295 -mode by this program so that the output can be continuous.
 296 -What you type will thus not show on your terminal, except for the / and !
 297 -commands.
 298 -.PP
 299 -If the standard output is not a teletype, then
 300 -.I bzmore
 301 -acts just like
 302 -.I bzcat,
 303 -except that a header is printed before each file.
 304 -.SH FILES
 305 -.DT
 306 -/etc/termcap           Terminal data base
 307 -.SH "SEE ALSO"
 308 -more(1), less(1), bzip2(1), bzdiff(1), bzgrep(1)
 309 diff -Nru bzip2-1.0.2/config.h.in bzip2-1.0.2.new/config.h.in
 310 --- bzip2-1.0.2/config.h.in     Thu Jan  1 01:00:00 1970
 311 +++ bzip2-1.0.2.new/config.h.in Fri Feb  1 04:19:11 2002
 312 @@ -0,0 +1,17 @@
 313 +/* config.h.in.  Generated automatically from configure.in by autoheader.  */
 314 +
 315 +/* Name of package */
 316 +#undef PACKAGE
 317 +
 318 +/* Version number of package */
 319 +#undef VERSION
 320 +
 321 +/* Number of bits in a file offset, on hosts where this is settable. */
 322 +#undef _FILE_OFFSET_BITS
 323 +
 324 +/* Define to make fseeko etc. visible, on some hosts. */
 325 +#undef _LARGEFILE_SOURCE
 326 +
 327 +/* Define for large files, on AIX-style hosts. */
 328 +#undef _LARGE_FILES
 329 +
 330 diff -Nru bzip2-1.0.2/configure.in bzip2-1.0.2.new/configure.in
 331 --- bzip2-1.0.2/configure.in    Thu Jan  1 01:00:00 1970
 332 +++ bzip2-1.0.2.new/configure.in        Fri Feb  1 04:19:11 2002
 333 @@ -0,0 +1,10 @@
 334 +AC_INIT(bzip2.c)
 335 +AM_INIT_AUTOMAKE(bzip2,1.0.1)
 336 +AM_CONFIG_HEADER(config.h)
 337 +AC_PROG_CC
 338 +AM_PROG_LIBTOOL
 339 +AC_PROG_LN_S
 340 +AC_SYS_LARGEFILE
 341 +AC_OUTPUT(Makefile
 342 +       doc/Makefile
 343 +       doc/pl/Makefile)
 344 diff -Nru bzip2-1.0.2/crctable.c bzip2-1.0.2.new/crctable.c
 345 --- bzip2-1.0.2/crctable.c      Sun Dec 30 03:19:28 2001
 346 +++ bzip2-1.0.2.new/crctable.c  Fri Feb  1 04:19:11 2002
 347 @@ -58,6 +58,10 @@
 348    For more information on these sources, see the manual.
 349  --*/
 350
 351 +#ifdef HAVE_CONFIG_H
 352 +#include <config.h>
 353 +#endif
 354 +
 355
 356  #include "bzlib_private.h"
 357
 358 diff -Nru bzip2-1.0.2/decompress.c bzip2-1.0.2.new/decompress.c
 359 --- bzip2-1.0.2/decompress.c    Sun Dec 30 21:45:53 2001
 360 +++ bzip2-1.0.2.new/decompress.c        Fri Feb  1 04:19:11 2002
 361 @@ -58,6 +58,10 @@
 362    For more information on these sources, see the manual.
 363  --*/
 364
 365 +#ifdef HAVE_CONFIG_H
 366 +#include <config.h>
 367 +#endif
 368 +
 369
 370  #include "bzlib_private.h"
 371
 372 diff -Nru bzip2-1.0.2/dlltest.c bzip2-1.0.2.new/dlltest.c
 373 --- bzip2-1.0.2/dlltest.c       Sun Dec 30 20:44:07 2001
 374 +++ bzip2-1.0.2.new/dlltest.c   Fri Feb  1 04:19:11 2002
 375 @@ -8,6 +8,10 @@
 376     usage: minibz2 [-d] [-{1,2,..9}] [[srcfilename] destfilename]\r
 377  */\r
 378  \r
 379 +#ifdef HAVE_CONFIG_H
 380 +#include <config.h>
 381 +#endif
 382 +
 383  #define BZ_IMPORT\r
 384  #include <stdio.h>\r
 385  #include <stdlib.h>\r
 386 diff -Nru bzip2-1.0.2/doc/Makefile.am bzip2-1.0.2.new/doc/Makefile.am
 387 --- bzip2-1.0.2/doc/Makefile.am Thu Jan  1 01:00:00 1970
 388 +++ bzip2-1.0.2.new/doc/Makefile.am     Fri Feb  1 04:27:21 2002
 389 @@ -0,0 +1,6 @@
 390 +
 391 +SUBDIRS                = pl
 392 +
 393 +man_MANS       = bunzip2.1 bzcat.1 bzdiff.1 bzgrep.1 bzip2.1 \
 394 +                 bzip2recover.1 bzmore.1
 395 +#info_TEXINFOS = bzip2.texi
 396 diff -Nru bzip2-1.0.2/doc/bunzip2.1 bzip2-1.0.2.new/doc/bunzip2.1
 397 --- bzip2-1.0.2/doc/bunzip2.1   Thu Jan  1 01:00:00 1970
 398 +++ bzip2-1.0.2.new/doc/bunzip2.1       Fri Feb  1 04:19:11 2002
 399 @@ -0,0 +1 @@
 400 +.so bzip2.1
 401 \ No newline at end of file
 402 diff -Nru bzip2-1.0.2/doc/bzcat.1 bzip2-1.0.2.new/doc/bzcat.1
 403 --- bzip2-1.0.2/doc/bzcat.1     Thu Jan  1 01:00:00 1970
 404 +++ bzip2-1.0.2.new/doc/bzcat.1 Fri Feb  1 04:19:11 2002
 405 @@ -0,0 +1 @@
 406 +.so bzip2.1
 407 \ No newline at end of file
 408 diff -Nru bzip2-1.0.2/doc/bzdiff.1 bzip2-1.0.2.new/doc/bzdiff.1
 409 --- bzip2-1.0.2/doc/bzdiff.1    Thu Jan  1 01:00:00 1970
 410 +++ bzip2-1.0.2.new/doc/bzdiff.1        Sun Dec 30 03:12:35 2001
 411 @@ -0,0 +1,47 @@
 412 +\"Shamelessly copied from zmore.1 by Philippe Troin <phil@fifi.org>
 413 +\"for Debian GNU/Linux
 414 +.TH BZDIFF 1
 415 +.SH NAME
 416 +bzcmp, bzdiff \- compare bzip2 compressed files
 417 +.SH SYNOPSIS
 418 +.B bzcmp
 419 +[ cmp_options ] file1
 420 +[ file2 ]
 421 +.br
 422 +.B bzdiff
 423 +[ diff_options ] file1
 424 +[ file2 ]
 425 +.SH DESCRIPTION
 426 +.I  Bzcmp
 427 +and
 428 +.I bzdiff
 429 +are used to invoke the
 430 +.I cmp
 431 +or the
 432 +.I diff
 433 +program on bzip2 compressed files.  All options specified are passed
 434 +directly to
 435 +.I cmp
 436 +or
 437 +.IR diff "."
 438 +If only 1 file is specified, then the files compared are
 439 +.I file1
 440 +and an uncompressed
 441 +.IR file1 ".bz2."
 442 +If two files are specified, then they are uncompressed if necessary and fed to
 443 +.I cmp
 444 +or
 445 +.IR diff "."
 446 +The exit status from
 447 +.I cmp
 448 +or
 449 +.I diff
 450 +is preserved.
 451 +.SH "SEE ALSO"
 452 +cmp(1), diff(1), bzmore(1), bzless(1), bzgrep(1), bzip2(1)
 453 +.SH BUGS
 454 +Messages from the
 455 +.I cmp
 456 +or
 457 +.I diff
 458 +programs refer to temporary filenames instead of those specified.
 459 diff -Nru bzip2-1.0.2/doc/bzgrep.1 bzip2-1.0.2.new/doc/bzgrep.1
 460 --- bzip2-1.0.2/doc/bzgrep.1    Thu Jan  1 01:00:00 1970
 461 +++ bzip2-1.0.2.new/doc/bzgrep.1        Sun Dec 30 03:12:35 2001
 462 @@ -0,0 +1,56 @@
 463 +\"Shamelessly copied from zmore.1 by Philippe Troin <phil@fifi.org>
 464 +\"for Debian GNU/Linux
 465 +.TH BZGREP 1
 466 +.SH NAME
 467 +bzgrep, bzfgrep, bzegrep \- search possibly bzip2 compressed files for a regular expression
 468 +.SH SYNOPSIS
 469 +.B bzgrep
 470 +[ grep_options ]
 471 +.BI  [\ -e\ ] " pattern"
 472 +.IR filename ".\|.\|."
 473 +.br
 474 +.B bzegrep
 475 +[ egrep_options ]
 476 +.BI  [\ -e\ ] " pattern"
 477 +.IR filename ".\|.\|."
 478 +.br
 479 +.B bzfgrep
 480 +[ fgrep_options ]
 481 +.BI  [\ -e\ ] " pattern"
 482 +.IR filename ".\|.\|."
 483 +.SH DESCRIPTION
 484 +.IR  Bzgrep
 485 +is used to invoke the
 486 +.I grep
 487 +on bzip2-compressed files. All options specified are passed directly to
 488 +.I grep.
 489 +If no file is specified, then the standard input is decompressed
 490 +if necessary and fed to grep.
 491 +Otherwise the given files are uncompressed if necessary and fed to
 492 +.I grep.
 493 +.PP
 494 +If
 495 +.I bzgrep
 496 +is invoked as
 497 +.I bzegrep
 498 +or
 499 +.I bzfgrep
 500 +then
 501 +.I egrep
 502 +or
 503 +.I fgrep
 504 +is used instead of
 505 +.I grep.
 506 +If the GREP environment variable is set,
 507 +.I bzgrep
 508 +uses it as the
 509 +.I grep
 510 +program to be invoked. For example:
 511 +
 512 +    for sh:  GREP=fgrep  bzgrep string files
 513 +    for csh: (setenv GREP fgrep; bzgrep string files)
 514 +.SH AUTHOR
 515 +Charles Levert (charles@comm.polymtl.ca). Adapted to bzip2 by Philippe
 516 +Troin <phil@fifi.org> for Debian GNU/Linux.
 517 +.SH "SEE ALSO"
 518 +grep(1), egrep(1), fgrep(1), bzdiff(1), bzmore(1), bzless(1), bzip2(1)
 519 diff -Nru bzip2-1.0.2/doc/bzip2.1 bzip2-1.0.2.new/doc/bzip2.1
 520 --- bzip2-1.0.2/doc/bzip2.1     Thu Jan  1 01:00:00 1970
 521 +++ bzip2-1.0.2.new/doc/bzip2.1 Thu Jan  3 00:14:36 2002
 522 @@ -0,0 +1,453 @@
 523 +.PU
 524 +.TH bzip2 1
 525 +.SH NAME
 526 +bzip2, bunzip2 \- a block-sorting file compressor, v1.0.2
 527 +.br
 528 +bzcat \- decompresses files to stdout
 529 +.br
 530 +bzip2recover \- recovers data from damaged bzip2 files
 531 +
 532 +.SH SYNOPSIS
 533 +.ll +8
 534 +.B bzip2
 535 +.RB [ " \-cdfkqstvzVL123456789 " ]
 536 +[
 537 +.I "filenames \&..."
 538 +]
 539 +.ll -8
 540 +.br
 541 +.B bunzip2
 542 +.RB [ " \-fkvsVL " ]
 543 +[
 544 +.I "filenames \&..."
 545 +]
 546 +.br
 547 +.B bzcat
 548 +.RB [ " \-s " ]
 549 +[
 550 +.I "filenames \&..."
 551 +]
 552 +.br
 553 +.B bzip2recover
 554 +.I "filename"
 555 +
 556 +.SH DESCRIPTION
 557 +.I bzip2
 558 +compresses files using the Burrows-Wheeler block sorting
 559 +text compression algorithm, and Huffman coding.  Compression is
 560 +generally considerably better than that achieved by more conventional
 561 +LZ77/LZ78-based compressors, and approaches the performance of the PPM
 562 +family of statistical compressors.
 563 +
 564 +The command-line options are deliberately very similar to
 565 +those of
 566 +.I GNU gzip,
 567 +but they are not identical.
 568 +
 569 +.I bzip2
 570 +expects a list of file names to accompany the
 571 +command-line flags.  Each file is replaced by a compressed version of
 572 +itself, with the name "original_name.bz2".
 573 +Each compressed file
 574 +has the same modification date, permissions, and, when possible,
 575 +ownership as the corresponding original, so that these properties can
 576 +be correctly restored at decompression time.  File name handling is
 577 +naive in the sense that there is no mechanism for preserving original
 578 +file names, permissions, ownerships or dates in filesystems which lack
 579 +these concepts, or have serious file name length restrictions, such as
 580 +MS-DOS.
 581 +
 582 +.I bzip2
 583 +and
 584 +.I bunzip2
 585 +will by default not overwrite existing
 586 +files.  If you want this to happen, specify the \-f flag.
 587 +
 588 +If no file names are specified,
 589 +.I bzip2
 590 +compresses from standard
 591 +input to standard output.  In this case,
 592 +.I bzip2
 593 +will decline to
 594 +write compressed output to a terminal, as this would be entirely
 595 +incomprehensible and therefore pointless.
 596 +
 597 +.I bunzip2
 598 +(or
 599 +.I bzip2 \-d)
 600 +decompresses all
 601 +specified files.  Files which were not created by
 602 +.I bzip2
 603 +will be detected and ignored, and a warning issued.
 604 +.I bzip2
 605 +attempts to guess the filename for the decompressed file
 606 +from that of the compressed file as follows:
 607 +
 608 +       filename.bz2    becomes   filename
 609 +       filename.bz     becomes   filename
 610 +       filename.tbz2   becomes   filename.tar
 611 +       filename.tbz    becomes   filename.tar
 612 +       anyothername    becomes   anyothername.out
 613 +
 614 +If the file does not end in one of the recognised endings,
 615 +.I .bz2,
 616 +.I .bz,
 617 +.I .tbz2
 618 +or
 619 +.I .tbz,
 620 +.I bzip2
 621 +complains that it cannot
 622 +guess the name of the original file, and uses the original name
 623 +with
 624 +.I .out
 625 +appended.
 626 +
 627 +As with compression, supplying no
 628 +filenames causes decompression from
 629 +standard input to standard output.
 630 +
 631 +.I bunzip2
 632 +will correctly decompress a file which is the
 633 +concatenation of two or more compressed files.  The result is the
 634 +concatenation of the corresponding uncompressed files.  Integrity
 635 +testing (\-t)
 636 +of concatenated
 637 +compressed files is also supported.
 638 +
 639 +You can also compress or decompress files to the standard output by
 640 +giving the \-c flag.  Multiple files may be compressed and
 641 +decompressed like this.  The resulting outputs are fed sequentially to
 642 +stdout.  Compression of multiple files
 643 +in this manner generates a stream
 644 +containing multiple compressed file representations.  Such a stream
 645 +can be decompressed correctly only by
 646 +.I bzip2
 647 +version 0.9.0 or
 648 +later.  Earlier versions of
 649 +.I bzip2
 650 +will stop after decompressing
 651 +the first file in the stream.
 652 +
 653 +.I bzcat
 654 +(or
 655 +.I bzip2 -dc)
 656 +decompresses all specified files to
 657 +the standard output.
 658 +
 659 +.I bzip2
 660 +will read arguments from the environment variables
 661 +.I BZIP2
 662 +and
 663 +.I BZIP,
 664 +in that order, and will process them
 665 +before any arguments read from the command line.  This gives a
 666 +convenient way to supply default arguments.
 667 +
 668 +Compression is always performed, even if the compressed
 669 +file is slightly
 670 +larger than the original.  Files of less than about one hundred bytes
 671 +tend to get larger, since the compression mechanism has a constant
 672 +overhead in the region of 50 bytes.  Random data (including the output
 673 +of most file compressors) is coded at about 8.05 bits per byte, giving
 674 +an expansion of around 0.5%.
 675 +
 676 +As a self-check for your protection,
 677 +.I
 678 +bzip2
 679 +uses 32-bit CRCs to
 680 +make sure that the decompressed version of a file is identical to the
 681 +original.  This guards against corruption of the compressed data, and
 682 +against undetected bugs in
 683 +.I bzip2
 684 +(hopefully very unlikely).  The
 685 +chances of data corruption going undetected is microscopic, about one
 686 +chance in four billion for each file processed.  Be aware, though, that
 687 +the check occurs upon decompression, so it can only tell you that
 688 +something is wrong.  It can't help you
 689 +recover the original uncompressed
 690 +data.  You can use
 691 +.I bzip2recover
 692 +to try to recover data from
 693 +damaged files.
 694 +
 695 +Return values: 0 for a normal exit, 1 for environmental problems (file
 696 +not found, invalid flags, I/O errors, &c), 2 to indicate a corrupt
 697 +compressed file, 3 for an internal consistency error (eg, bug) which
 698 +caused
 699 +.I bzip2
 700 +to panic.
 701 +
 702 +.SH OPTIONS
 703 +.TP
 704 +.B \-c --stdout
 705 +Compress or decompress to standard output.
 706 +.TP
 707 +.B \-d --decompress
 708 +Force decompression.
 709 +.I bzip2,
 710 +.I bunzip2
 711 +and
 712 +.I bzcat
 713 +are
 714 +really the same program, and the decision about what actions to take is
 715 +done on the basis of which name is used.  This flag overrides that
 716 +mechanism, and forces
 717 +.I bzip2
 718 +to decompress.
 719 +.TP
 720 +.B \-z --compress
 721 +The complement to \-d: forces compression, regardless of the
 722 +invocation name.
 723 +.TP
 724 +.B \-t --test
 725 +Check integrity of the specified file(s), but don't decompress them.
 726 +This really performs a trial decompression and throws away the result.
 727 +.TP
 728 +.B \-f --force
 729 +Force overwrite of output files.  Normally,
 730 +.I bzip2
 731 +will not overwrite
 732 +existing output files.  Also forces
 733 +.I bzip2
 734 +to break hard links
 735 +to files, which it otherwise wouldn't do.
 736 +
 737 +bzip2 normally declines to decompress files which don't have the
 738 +correct magic header bytes.  If forced (-f), however, it will pass
 739 +such files through unmodified.  This is how GNU gzip behaves.
 740 +.TP
 741 +.B \-k --keep
 742 +Keep (don't delete) input files during compression
 743 +or decompression.
 744 +.TP
 745 +.B \-s --small
 746 +Reduce memory usage, for compression, decompression and testing.  Files
 747 +are decompressed and tested using a modified algorithm which only
 748 +requires 2.5 bytes per block byte.  This means any file can be
 749 +decompressed in 2300k of memory, albeit at about half the normal speed.
 750 +
 751 +During compression, \-s selects a block size of 200k, which limits
 752 +memory use to around the same figure, at the expense of your compression
 753 +ratio.  In short, if your machine is low on memory (8 megabytes or
 754 +less), use \-s for everything.  See MEMORY MANAGEMENT below.
 755 +.TP
 756 +.B \-q --quiet
 757 +Suppress non-essential warning messages.  Messages pertaining to
 758 +I/O errors and other critical events will not be suppressed.
 759 +.TP
 760 +.B \-v --verbose
 761 +Verbose mode -- show the compression ratio for each file processed.
 762 +Further \-v's increase the verbosity level, spewing out lots of
 763 +information which is primarily of interest for diagnostic purposes.
 764 +.TP
 765 +.B \-L --license -V --version
 766 +Display the software version, license terms and conditions.
 767 +.TP
 768 +.B \-1 (or \-\-fast) to \-9 (or \-\-best)
 769 +Set the block size to 100 k, 200 k ..  900 k when compressing.  Has no
 770 +effect when decompressing.  See MEMORY MANAGEMENT below.
 771 +The \-\-fast and \-\-best aliases are primarily for GNU gzip
 772 +compatibility.  In particular, \-\-fast doesn't make things
 773 +significantly faster.
 774 +And \-\-best merely selects the default behaviour.
 775 +.TP
 776 +.B \--
 777 +Treats all subsequent arguments as file names, even if they start
 778 +with a dash.  This is so you can handle files with names beginning
 779 +with a dash, for example: bzip2 \-- \-myfilename.
 780 +.TP
 781 +.B \--repetitive-fast --repetitive-best
 782 +These flags are redundant in versions 0.9.5 and above.  They provided
 783 +some coarse control over the behaviour of the sorting algorithm in
 784 +earlier versions, which was sometimes useful.  0.9.5 and above have an
 785 +improved algorithm which renders these flags irrelevant.
 786 +
 787 +.SH MEMORY MANAGEMENT
 788 +.I bzip2
 789 +compresses large files in blocks.  The block size affects
 790 +both the compression ratio achieved, and the amount of memory needed for
 791 +compression and decompression.  The flags \-1 through \-9
 792 +specify the block size to be 100,000 bytes through 900,000 bytes (the
 793 +default) respectively.  At decompression time, the block size used for
 794 +compression is read from the header of the compressed file, and
 795 +.I bunzip2
 796 +then allocates itself just enough memory to decompress
 797 +the file.  Since block sizes are stored in compressed files, it follows
 798 +that the flags \-1 to \-9 are irrelevant to and so ignored
 799 +during decompression.
 800 +
 801 +Compression and decompression requirements,
 802 +in bytes, can be estimated as:
 803 +
 804 +       Compression:   400k + ( 8 x block size )
 805 +
 806 +       Decompression: 100k + ( 4 x block size ), or
 807 +                      100k + ( 2.5 x block size )
 808 +
 809 +Larger block sizes give rapidly diminishing marginal returns.  Most of
 810 +the compression comes from the first two or three hundred k of block
 811 +size, a fact worth bearing in mind when using
 812 +.I bzip2
 813 +on small machines.
 814 +It is also important to appreciate that the decompression memory
 815 +requirement is set at compression time by the choice of block size.
 816 +
 817 +For files compressed with the default 900k block size,
 818 +.I bunzip2
 819 +will require about 3700 kbytes to decompress.  To support decompression
 820 +of any file on a 4 megabyte machine,
 821 +.I bunzip2
 822 +has an option to
 823 +decompress using approximately half this amount of memory, about 2300
 824 +kbytes.  Decompression speed is also halved, so you should use this
 825 +option only where necessary.  The relevant flag is -s.
 826 +
 827 +In general, try and use the largest block size memory constraints allow,
 828 +since that maximises the compression achieved.  Compression and
 829 +decompression speed are virtually unaffected by block size.
 830 +
 831 +Another significant point applies to files which fit in a single block
 832 +-- that means most files you'd encounter using a large block size.  The
 833 +amount of real memory touched is proportional to the size of the file,
 834 +since the file is smaller than a block.  For example, compressing a file
 835 +20,000 bytes long with the flag -9 will cause the compressor to
 836 +allocate around 7600k of memory, but only touch 400k + 20000 * 8 = 560
 837 +kbytes of it.  Similarly, the decompressor will allocate 3700k but only
 838 +touch 100k + 20000 * 4 = 180 kbytes.
 839 +
 840 +Here is a table which summarises the maximum memory usage for different
 841 +block sizes.  Also recorded is the total compressed size for 14 files of
 842 +the Calgary Text Compression Corpus totalling 3,141,622 bytes.  This
 843 +column gives some feel for how compression varies with block size.
 844 +These figures tend to understate the advantage of larger block sizes for
 845 +larger files, since the Corpus is dominated by smaller files.
 846 +
 847 +           Compress   Decompress   Decompress   Corpus
 848 +    Flag     usage      usage       -s usage     Size
 849 +
 850 +     -1      1200k       500k         350k      914704
 851 +     -2      2000k       900k         600k      877703
 852 +     -3      2800k      1300k         850k      860338
 853 +     -4      3600k      1700k        1100k      846899
 854 +     -5      4400k      2100k        1350k      845160
 855 +     -6      5200k      2500k        1600k      838626
 856 +     -7      6100k      2900k        1850k      834096
 857 +     -8      6800k      3300k        2100k      828642
 858 +     -9      7600k      3700k        2350k      828642
 859 +
 860 +.SH RECOVERING DATA FROM DAMAGED FILES
 861 +.I bzip2
 862 +compresses files in blocks, usually 900kbytes long.  Each
 863 +block is handled independently.  If a media or transmission error causes
 864 +a multi-block .bz2
 865 +file to become damaged, it may be possible to
 866 +recover data from the undamaged blocks in the file.
 867 +
 868 +The compressed representation of each block is delimited by a 48-bit
 869 +pattern, which makes it possible to find the block boundaries with
 870 +reasonable certainty.  Each block also carries its own 32-bit CRC, so
 871 +damaged blocks can be distinguished from undamaged ones.
 872 +
 873 +.I bzip2recover
 874 +is a simple program whose purpose is to search for
 875 +blocks in .bz2 files, and write each block out into its own .bz2
 876 +file.  You can then use
 877 +.I bzip2
 878 +\-t
 879 +to test the
 880 +integrity of the resulting files, and decompress those which are
 881 +undamaged.
 882 +
 883 +.I bzip2recover
 884 +takes a single argument, the name of the damaged file,
 885 +and writes a number of files "rec00001file.bz2",
 886 +"rec00002file.bz2", etc, containing the  extracted  blocks.
 887 +The  output  filenames  are  designed  so  that the use of
 888 +wildcards in subsequent processing -- for example,
 889 +"bzip2 -dc  rec*file.bz2 > recovered_data" -- processes the files in
 890 +the correct order.
 891 +
 892 +.I bzip2recover
 893 +should be of most use dealing with large .bz2
 894 +files,  as  these will contain many blocks.  It is clearly
 895 +futile to use it on damaged single-block  files,  since  a
 896 +damaged  block  cannot  be recovered.  If you wish to minimise
 897 +any potential data loss through media  or  transmission errors,
 898 +you might consider compressing with a smaller
 899 +block size.
 900 +
 901 +.SH PERFORMANCE NOTES
 902 +The sorting phase of compression gathers together similar strings in the
 903 +file.  Because of this, files containing very long runs of repeated
 904 +symbols, like "aabaabaabaab ..."  (repeated several hundred times) may
 905 +compress more slowly than normal.  Versions 0.9.5 and above fare much
 906 +better than previous versions in this respect.  The ratio between
 907 +worst-case and average-case compression time is in the region of 10:1.
 908 +For previous versions, this figure was more like 100:1.  You can use the
 909 +\-vvvv option to monitor progress in great detail, if you want.
 910 +
 911 +Decompression speed is unaffected by these phenomena.
 912 +
 913 +.I bzip2
 914 +usually allocates several megabytes of memory to operate
 915 +in, and then charges all over it in a fairly random fashion.  This means
 916 +that performance, both for compressing and decompressing, is largely
 917 +determined by the speed at which your machine can service cache misses.
 918 +Because of this, small changes to the code to reduce the miss rate have
 919 +been observed to give disproportionately large performance improvements.
 920 +I imagine
 921 +.I bzip2
 922 +will perform best on machines with very large caches.
 923 +
 924 +.SH CAVEATS
 925 +I/O error messages are not as helpful as they could be.
 926 +.I bzip2
 927 +tries hard to detect I/O errors and exit cleanly, but the details of
 928 +what the problem is sometimes seem rather misleading.
 929 +
 930 +This manual page pertains to version 1.0.2 of
 931 +.I bzip2.
 932 +Compressed data created by this version is entirely forwards and
 933 +backwards compatible with the previous public releases, versions
 934 +0.1pl2, 0.9.0, 0.9.5, 1.0.0 and 1.0.1, but with the following
 935 +exception: 0.9.0 and above can correctly decompress multiple
 936 +concatenated compressed files.  0.1pl2 cannot do this; it will stop
 937 +after decompressing just the first file in the stream.
 938 +
 939 +.I bzip2recover
 940 +versions prior to this one, 1.0.2, used 32-bit integers to represent
 941 +bit positions in compressed files, so it could not handle compressed
 942 +files more than 512 megabytes long.  Version 1.0.2 and above uses
 943 +64-bit ints on some platforms which support them (GNU supported
 944 +targets, and Windows).  To establish whether or not bzip2recover was
 945 +built with such a limitation, run it without arguments.  In any event
 946 +you can build yourself an unlimited version if you can recompile it
 947 +with MaybeUInt64 set to be an unsigned 64-bit integer.
 948 +
 949 +
 950 +
 951 +.SH AUTHOR
 952 +Julian Seward, jseward@acm.org.
 953 +
 954 +http://sources.redhat.com/bzip2
 955 +
 956 +The ideas embodied in
 957 +.I bzip2
 958 +are due to (at least) the following
 959 +people: Michael Burrows and David Wheeler (for the block sorting
 960 +transformation), David Wheeler (again, for the Huffman coder), Peter
 961 +Fenwick (for the structured coding model in the original
 962 +.I bzip,
 963 +and many refinements), and Alistair Moffat, Radford Neal and Ian Witten
 964 +(for the arithmetic coder in the original
 965 +.I bzip).
 966 +I am much
 967 +indebted for their help, support and advice.  See the manual in the
 968 +source distribution for pointers to sources of documentation.  Christian
 969 +von Roques encouraged me to look for faster sorting algorithms, so as to
 970 +speed up compression.  Bela Lubkin encouraged me to improve the
 971 +worst-case compression performance.
 972 +The bz* scripts are derived from those of GNU gzip.
 973 +Many people sent patches, helped
 974 +with portability problems, lent machines, gave advice and were generally
 975 +helpful.
 976 diff -Nru bzip2-1.0.2/doc/bzip2.texi bzip2-1.0.2.new/doc/bzip2.texi
 977 --- bzip2-1.0.2/doc/bzip2.texi  Thu Jan  1 01:00:00 1970
 978 +++ bzip2-1.0.2.new/doc/bzip2.texi      Fri Feb  1 04:26:21 2002
 979 @@ -0,0 +1,2234 @@
 980 +\input texinfo  @c                                  -*- Texinfo -*-
 981 +@setfilename bzip2.info
 982 +
 983 +@ignore
 984 +This file documents bzip2 version 1.0.2, and associated library
 985 +libbzip2, written by Julian Seward (jseward@acm.org).
 986 +
 987 +Copyright (C) 1996-2002 Julian R Seward
 988 +
 989 +Permission is granted to make and distribute verbatim copies of
 990 +this manual provided the copyright notice and this permission notice
 991 +are preserved on all copies.
 992 +
 993 +Permission is granted to copy and distribute translations of this manual
 994 +into another language, under the above conditions for verbatim copies.
 995 +@end ignore
 996 +
 997 +@iftex
 998 +@c @finalout
 999 +@settitle bzip2 and libbzip2
1000 +@titlepage
1001 +@title bzip2 and libbzip2
1002 +@subtitle a program and library for data compression
1003 +@subtitle copyright (C) 1996-2002 Julian Seward
1004 +@subtitle version 1.0.2 of 30 December 2001
1005 +@author Julian Seward
1006 +
1007 +@end titlepage
1008 +
1009 +@parindent 0mm
1010 +@parskip 2mm
1011 +
1012 +@end iftex
1013 +@node Top,,, (dir)
1014 +
1015 +The following text is the License for this software.  You should
1016 +find it identical to that contained in the file LICENSE in the
1017 +source distribution.
1018 +
1019 +------------------ START OF THE LICENSE ------------------
1020 +
1021 +This program, @code{bzip2},
1022 +and associated library @code{libbzip2}, are
1023 +Copyright (C) 1996-2002 Julian R Seward.  All rights reserved.
1024 +
1025 +Redistribution and use in source and binary forms, with or without
1026 +modification, are permitted provided that the following conditions
1027 +are met:
1028 +@itemize @bullet
1029 +@item
1030 +   Redistributions of source code must retain the above copyright
1031 +   notice, this list of conditions and the following disclaimer.
1032 +@item
1033 +   The origin of this software must not be misrepresented; you must
1034 +   not claim that you wrote the original software.  If you use this
1035 +   software in a product, an acknowledgment in the product
1036 +   documentation would be appreciated but is not required.
1037 +@item
1038 +   Altered source versions must be plainly marked as such, and must
1039 +   not be misrepresented as being the original software.
1040 +@item
1041 +   The name of the author may not be used to endorse or promote
1042 +   products derived from this software without specific prior written
1043 +   permission.
1044 +@end itemize
1045 +THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
1046 +OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
1047 +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
1048 +ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
1049 +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
1050 +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
1051 +GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
1052 +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
1053 +WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
1054 +NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
1055 +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
1056 +
1057 +Julian Seward, Cambridge, UK.
1058 +
1059 +@code{jseward@@acm.org}
1060 +
1061 +@code{bzip2}/@code{libbzip2} version 1.0.2 of 30 December 2001.
1062 +
1063 +------------------ END OF THE LICENSE ------------------
1064 +
1065 +Web sites:
1066 +
1067 +@code{http://sources.redhat.com/bzip2}
1068 +
1069 +@code{http://www.cacheprof.org}
1070 +
1071 +PATENTS: To the best of my knowledge, @code{bzip2} does not use any patented
1072 +algorithms.  However, I do not have the resources available to carry out
1073 +a full patent search.  Therefore I cannot give any guarantee of the
1074 +above statement.
1075 +
1076 +
1077 +
1078 +
1079 +
1080 +
1081 +
1082 +@chapter Introduction
1083 +
1084 +@code{bzip2}  compresses  files  using the Burrows-Wheeler
1085 +block-sorting text compression algorithm,  and  Huffman  coding.
1086 +Compression  is  generally  considerably  better than that
1087 +achieved by more conventional LZ77/LZ78-based compressors,
1088 +and  approaches  the performance of the PPM family of statistical compressors.
1089 +
1090 +@code{bzip2} is built on top of @code{libbzip2}, a flexible library
1091 +for handling compressed data in the @code{bzip2} format.  This manual
1092 +describes both how to use the program and
1093 +how to work with the library interface.  Most of the
1094 +manual is devoted to this library, not the program,
1095 +which is good news if your interest is only in the program.
1096 +
1097 +Chapter 2 describes how to use @code{bzip2}; this is the only part
1098 +you need to read if you just want to know how to operate the program.
1099 +Chapter 3 describes the programming interfaces in detail, and
1100 +Chapter 4 records some miscellaneous notes which I thought
1101 +ought to be recorded somewhere.
1102 +
1103 +
1104 +@chapter How to use @code{bzip2}
1105 +
1106 +This chapter contains a copy of the @code{bzip2} man page,
1107 +and nothing else.
1108 +
1109 +@quotation
1110 +
1111 +@unnumberedsubsubsec NAME
1112 +@itemize
1113 +@item @code{bzip2}, @code{bunzip2}
1114 +- a block-sorting file compressor, v1.0.2
1115 +@item @code{bzcat}
1116 +- decompresses files to stdout
1117 +@item @code{bzip2recover}
1118 +- recovers data from damaged bzip2 files
1119 +@end itemize
1120 +
1121 +@unnumberedsubsubsec SYNOPSIS
1122 +@itemize
1123 +@item @code{bzip2} [ -cdfkqstvzVL123456789 ] [ filenames ...  ]
1124 +@item @code{bunzip2} [ -fkvsVL ] [ filenames ...  ]
1125 +@item @code{bzcat} [ -s ] [ filenames ...  ]
1126 +@item @code{bzip2recover} filename
1127 +@end itemize
1128 +
1129 +@unnumberedsubsubsec DESCRIPTION
1130 +
1131 +@code{bzip2} compresses files using the Burrows-Wheeler block sorting
1132 +text compression algorithm, and Huffman coding.  Compression is
1133 +generally considerably better than that achieved by more conventional
1134 +LZ77/LZ78-based compressors, and approaches the performance of the PPM
1135 +family of statistical compressors.
1136 +
1137 +The command-line options are deliberately very similar to those of GNU
1138 +@code{gzip}, but they are not identical.
1139 +
1140 +@code{bzip2} expects a list of file names to accompany the command-line
1141 +flags.  Each file is replaced by a compressed version of itself, with
1142 +the name @code{original_name.bz2}.  Each compressed file has the same
1143 +modification date, permissions, and, when possible, ownership as the
1144 +corresponding original, so that these properties can be correctly
1145 +restored at decompression time.  File name handling is naive in the
1146 +sense that there is no mechanism for preserving original file names,
1147 +permissions, ownerships or dates in filesystems which lack these
1148 +concepts, or have serious file name length restrictions, such as MS-DOS.
1149 +
1150 +@code{bzip2} and @code{bunzip2} will by default not overwrite existing
1151 +files.  If you want this to happen, specify the @code{-f} flag.
1152 +
1153 +If no file names are specified, @code{bzip2} compresses from standard
1154 +input to standard output.  In this case, @code{bzip2} will decline to
1155 +write compressed output to a terminal, as this would be entirely
1156 +incomprehensible and therefore pointless.
1157 +
1158 +@code{bunzip2} (or @code{bzip2 -d}) decompresses all
1159 +specified files.  Files which were not created by @code{bzip2}
1160 +will be detected and ignored, and a warning issued.
1161 +@code{bzip2} attempts to guess the filename for the decompressed file
1162 +from that of the compressed file as follows:
1163 +@itemize
1164 +@item @code{filename.bz2 } becomes @code{filename}
1165 +@item @code{filename.bz  } becomes @code{filename}
1166 +@item @code{filename.tbz2} becomes @code{filename.tar}
1167 +@item @code{filename.tbz } becomes @code{filename.tar}
1168 +@item @code{anyothername } becomes @code{anyothername.out}
1169 +@end itemize
1170 +If the file does not end in one of the recognised endings,
1171 +@code{.bz2}, @code{.bz},
1172 +@code{.tbz2} or @code{.tbz}, @code{bzip2} complains that it cannot
1173 +guess the name of the original file, and uses the original name
1174 +with @code{.out} appended.
1175 +
1176 +As with compression, supplying no
1177 +filenames causes decompression from standard input to standard output.
1178 +
1179 +@code{bunzip2} will correctly decompress a file which is the
1180 +concatenation of two or more compressed files.  The result is the
1181 +concatenation of the corresponding uncompressed files.  Integrity
1182 +testing (@code{-t}) of concatenated compressed files is also supported.
1183 +
1184 +You can also compress or decompress files to the standard output by
1185 +giving the @code{-c} flag.  Multiple files may be compressed and
1186 +decompressed like this.  The resulting outputs are fed sequentially to
1187 +stdout.  Compression of multiple files in this manner generates a stream
1188 +containing multiple compressed file representations.  Such a stream
1189 +can be decompressed correctly only by @code{bzip2} version 0.9.0 or
1190 +later.  Earlier versions of @code{bzip2} will stop after decompressing
1191 +the first file in the stream.
1192 +
1193 +@code{bzcat} (or @code{bzip2 -dc}) decompresses all specified files to
1194 +the standard output.
1195 +
1196 +@code{bzip2} will read arguments from the environment variables
1197 +@code{BZIP2} and @code{BZIP}, in that order, and will process them
1198 +before any arguments read from the command line.  This gives a
1199 +convenient way to supply default arguments.
1200 +
1201 +Compression is always performed, even if the compressed file is slightly
1202 +larger than the original.  Files of less than about one hundred bytes
1203 +tend to get larger, since the compression mechanism has a constant
1204 +overhead in the region of 50 bytes.  Random data (including the output
1205 +of most file compressors) is coded at about 8.05 bits per byte, giving
1206 +an expansion of around 0.5%.
1207 +
1208 +As a self-check for your protection, @code{bzip2} uses 32-bit CRCs to
1209 +make sure that the decompressed version of a file is identical to the
1210 +original.  This guards against corruption of the compressed data, and
1211 +against undetected bugs in @code{bzip2} (hopefully very unlikely).  The
1212 +chances of data corruption going undetected is microscopic, about one
1213 +chance in four billion for each file processed.  Be aware, though, that
1214 +the check occurs upon decompression, so it can only tell you that
1215 +something is wrong.  It can't help you recover the original uncompressed
1216 +data.  You can use @code{bzip2recover} to try to recover data from
1217 +damaged files.
1218 +
1219 +Return values: 0 for a normal exit, 1 for environmental problems (file
1220 +not found, invalid flags, I/O errors, &c), 2 to indicate a corrupt
1221 +compressed file, 3 for an internal consistency error (eg, bug) which
1222 +caused @code{bzip2} to panic.
1223 +
1224 +
1225 +@unnumberedsubsubsec OPTIONS
1226 +@table @code
1227 +@item -c  --stdout
1228 +Compress or decompress to standard output.
1229 +@item -d  --decompress
1230 +Force decompression.  @code{bzip2}, @code{bunzip2} and @code{bzcat} are
1231 +really the same program, and the decision about what actions to take is
1232 +done on the basis of which name is used.  This flag overrides that
1233 +mechanism, and forces bzip2 to decompress.
1234 +@item -z --compress
1235 +The complement to @code{-d}: forces compression, regardless of the
1236 +invokation name.
1237 +@item -t --test
1238 +Check integrity of the specified file(s), but don't decompress them.
1239 +This really performs a trial decompression and throws away the result.
1240 +@item -f --force
1241 +Force overwrite of output files.  Normally, @code{bzip2} will not overwrite
1242 +existing output files.  Also forces @code{bzip2} to break hard links
1243 +to files, which it otherwise wouldn't do.
1244 +
1245 +@code{bzip2} normally declines to decompress files which don't have the
1246 +correct magic header bytes.  If forced (@code{-f}), however, it will
1247 +pass such files through unmodified.  This is how GNU @code{gzip}
1248 +behaves.
1249 +@item -k --keep
1250 +Keep (don't delete) input files during compression
1251 +or decompression.
1252 +@item -s --small
1253 +Reduce memory usage, for compression, decompression and testing.  Files
1254 +are decompressed and tested using a modified algorithm which only
1255 +requires 2.5 bytes per block byte.  This means any file can be
1256 +decompressed in 2300k of memory, albeit at about half the normal speed.
1257 +
1258 +During compression, @code{-s} selects a block size of 200k, which limits
1259 +memory use to around the same figure, at the expense of your compression
1260 +ratio.  In short, if your machine is low on memory (8 megabytes or
1261 +less), use -s for everything.  See MEMORY MANAGEMENT below.
1262 +@item -q --quiet
1263 +Suppress non-essential warning messages.  Messages pertaining to
1264 +I/O errors and other critical events will not be suppressed.
1265 +@item -v --verbose
1266 +Verbose mode -- show the compression ratio for each file processed.
1267 +Further @code{-v}'s increase the verbosity level, spewing out lots of
1268 +information which is primarily of interest for diagnostic purposes.
1269 +@item -L --license -V --version
1270 +Display the software version, license terms and conditions.
1271 +@item -1 (or --fast) to -9 (or --best)
1272 +Set the block size to 100 k, 200 k ..  900 k when compressing.  Has no
1273 +effect when decompressing.  See MEMORY MANAGEMENT below.
1274 +The @code{--fast} and @code{--best} aliases are primarily for GNU
1275 +@code{gzip} compatibility.  In particular, @code{--fast} doesn't make
1276 +things significantly faster.  And @code{--best} merely selects the
1277 +default behaviour.
1278 +@item --
1279 +Treats all subsequent arguments as file names, even if they start
1280 +with a dash.  This is so you can handle files with names beginning
1281 +with a dash, for example: @code{bzip2 -- -myfilename}.
1282 +@item --repetitive-fast
1283 +@item --repetitive-best
1284 +These flags are redundant in versions 0.9.5 and above.  They provided
1285 +some coarse control over the behaviour of the sorting algorithm in
1286 +earlier versions, which was sometimes useful.  0.9.5 and above have an
1287 +improved algorithm which renders these flags irrelevant.
1288 +@end table
1289 +
1290 +
1291 +@unnumberedsubsubsec MEMORY MANAGEMENT
1292 +
1293 +@code{bzip2} compresses large files in blocks.  The block size affects
1294 +both the compression ratio achieved, and the amount of memory needed for
1295 +compression and decompression.  The flags @code{-1} through @code{-9}
1296 +specify the block size to be 100,000 bytes through 900,000 bytes (the
1297 +default) respectively.  At decompression time, the block size used for
1298 +compression is read from the header of the compressed file, and
1299 +@code{bunzip2} then allocates itself just enough memory to decompress
1300 +the file.  Since block sizes are stored in compressed files, it follows
1301 +that the flags @code{-1} to @code{-9} are irrelevant to and so ignored
1302 +during decompression.
1303 +
1304 +Compression and decompression requirements, in bytes, can be estimated
1305 +as:
1306 +@example
1307 +     Compression:   400k + ( 8 x block size )
1308 +
1309 +     Decompression: 100k + ( 4 x block size ), or
1310 +                    100k + ( 2.5 x block size )
1311 +@end example
1312 +Larger block sizes give rapidly diminishing marginal returns.  Most of
1313 +the compression comes from the first two or three hundred k of block
1314 +size, a fact worth bearing in mind when using @code{bzip2} on small machines.
1315 +It is also important to appreciate that the decompression memory
1316 +requirement is set at compression time by the choice of block size.
1317 +
1318 +For files compressed with the default 900k block size, @code{bunzip2}
1319 +will require about 3700 kbytes to decompress.  To support decompression
1320 +of any file on a 4 megabyte machine, @code{bunzip2} has an option to
1321 +decompress using approximately half this amount of memory, about 2300
1322 +kbytes.  Decompression speed is also halved, so you should use this
1323 +option only where necessary.  The relevant flag is @code{-s}.
1324 +
1325 +In general, try and use the largest block size memory constraints allow,
1326 +since that maximises the compression achieved.  Compression and
1327 +decompression speed are virtually unaffected by block size.
1328 +
1329 +Another significant point applies to files which fit in a single block
1330 +-- that means most files you'd encounter using a large block size.  The
1331 +amount of real memory touched is proportional to the size of the file,
1332 +since the file is smaller than a block.  For example, compressing a file
1333 +20,000 bytes long with the flag @code{-9} will cause the compressor to
1334 +allocate around 7600k of memory, but only touch 400k + 20000 * 8 = 560
1335 +kbytes of it.  Similarly, the decompressor will allocate 3700k but only
1336 +touch 100k + 20000 * 4 = 180 kbytes.
1337 +
1338 +Here is a table which summarises the maximum memory usage for different
1339 +block sizes.  Also recorded is the total compressed size for 14 files of
1340 +the Calgary Text Compression Corpus totalling 3,141,622 bytes.  This
1341 +column gives some feel for how compression varies with block size.
1342 +These figures tend to understate the advantage of larger block sizes for
1343 +larger files, since the Corpus is dominated by smaller files.
1344 +@example
1345 +          Compress   Decompress   Decompress   Corpus
1346 +   Flag     usage      usage       -s usage     Size
1347 +
1348 +    -1      1200k       500k         350k      914704
1349 +    -2      2000k       900k         600k      877703
1350 +    -3      2800k      1300k         850k      860338
1351 +    -4      3600k      1700k        1100k      846899
1352 +    -5      4400k      2100k        1350k      845160
1353 +    -6      5200k      2500k        1600k      838626
1354 +    -7      6100k      2900k        1850k      834096
1355 +    -8      6800k      3300k        2100k      828642
1356 +    -9      7600k      3700k        2350k      828642
1357 +@end example
1358 +
1359 +@unnumberedsubsubsec RECOVERING DATA FROM DAMAGED FILES
1360 +
1361 +@code{bzip2} compresses files in blocks, usually 900kbytes long.  Each
1362 +block is handled independently.  If a media or transmission error causes
1363 +a multi-block @code{.bz2} file to become damaged, it may be possible to
1364 +recover data from the undamaged blocks in the file.
1365 +
1366 +The compressed representation of each block is delimited by a 48-bit
1367 +pattern, which makes it possible to find the block boundaries with
1368 +reasonable certainty.  Each block also carries its own 32-bit CRC, so
1369 +damaged blocks can be distinguished from undamaged ones.
1370 +
1371 +@code{bzip2recover} is a simple program whose purpose is to search for
1372 +blocks in @code{.bz2} files, and write each block out into its own
1373 +@code{.bz2} file.  You can then use @code{bzip2 -t} to test the
1374 +integrity of the resulting files, and decompress those which are
1375 +undamaged.
1376 +
1377 +@code{bzip2recover}
1378 +takes a single argument, the name of the damaged file, and writes a
1379 +number of files @code{rec00001file.bz2}, @code{rec00002file.bz2}, etc,
1380 +containing the extracted blocks.  The output filenames are designed so
1381 +that the use of wildcards in subsequent processing -- for example,
1382 +@code{bzip2 -dc rec*file.bz2 > recovered_data} -- processes the files in
1383 +the correct order.
1384 +
1385 +@code{bzip2recover} should be of most use dealing with large @code{.bz2}
1386 +files, as these will contain many blocks.  It is clearly futile to use
1387 +it on damaged single-block files, since a damaged block cannot be
1388 +recovered.  If you wish to minimise any potential data loss through
1389 +media or transmission errors, you might consider compressing with a
1390 +smaller block size.
1391 +
1392 +
1393 +@unnumberedsubsubsec PERFORMANCE NOTES
1394 +
1395 +The sorting phase of compression gathers together similar strings in the
1396 +file.  Because of this, files containing very long runs of repeated
1397 +symbols, like "aabaabaabaab ..."  (repeated several hundred times) may
1398 +compress more slowly than normal.  Versions 0.9.5 and above fare much
1399 +better than previous versions in this respect.  The ratio between
1400 +worst-case and average-case compression time is in the region of 10:1.
1401 +For previous versions, this figure was more like 100:1.  You can use the
1402 +@code{-vvvv} option to monitor progress in great detail, if you want.
1403 +
1404 +Decompression speed is unaffected by these phenomena.
1405 +
1406 +@code{bzip2} usually allocates several megabytes of memory to operate
1407 +in, and then charges all over it in a fairly random fashion.  This means
1408 +that performance, both for compressing and decompressing, is largely
1409 +determined by the speed at which your machine can service cache misses.
1410 +Because of this, small changes to the code to reduce the miss rate have
1411 +been observed to give disproportionately large performance improvements.
1412 +I imagine @code{bzip2} will perform best on machines with very large
1413 +caches.
1414 +
1415 +
1416 +@unnumberedsubsubsec CAVEATS
1417 +
1418 +I/O error messages are not as helpful as they could be.  @code{bzip2}
1419 +tries hard to detect I/O errors and exit cleanly, but the details of
1420 +what the problem is sometimes seem rather misleading.
1421 +
1422 +This manual page pertains to version 1.0.2 of @code{bzip2}.  Compressed
1423 +data created by this version is entirely forwards and backwards
1424 +compatible with the previous public releases, versions 0.1pl2, 0.9.0,
1425 +0.9.5, 1.0.0 and 1.0.1, but with the following exception: 0.9.0 and
1426 +above can correctly decompress multiple concatenated compressed files.
1427 +0.1pl2 cannot do this; it will stop after decompressing just the first
1428 +file in the stream.
1429 +
1430 +@code{bzip2recover} versions prior to this one, 1.0.2, used 32-bit
1431 +integers to represent bit positions in compressed files, so it could not
1432 +handle compressed files more than 512 megabytes long.  Version 1.0.2 and
1433 +above uses 64-bit ints on some platforms which support them (GNU
1434 +supported targets, and Windows).  To establish whether or not
1435 +@code{bzip2recover} was built with such a limitation, run it without
1436 +arguments.  In any event you can build yourself an unlimited version if
1437 +you can recompile it with @code{MaybeUInt64} set to be an unsigned
1438 +64-bit integer.
1439 +
1440 +
1441 +
1442 +@unnumberedsubsubsec AUTHOR
1443 +Julian Seward, @code{jseward@@acm.org}.
1444 +
1445 +@code{http://sources.redhat.com/bzip2}
1446 +
1447 +The ideas embodied in @code{bzip2} are due to (at least) the following
1448 +people: Michael Burrows and David Wheeler (for the block sorting
1449 +transformation), David Wheeler (again, for the Huffman coder), Peter
1450 +Fenwick (for the structured coding model in the original @code{bzip},
1451 +and many refinements), and Alistair Moffat, Radford Neal and Ian Witten
1452 +(for the arithmetic coder in the original @code{bzip}).  I am much
1453 +indebted for their help, support and advice.  See the manual in the
1454 +source distribution for pointers to sources of documentation.  Christian
1455 +von Roques encouraged me to look for faster sorting algorithms, so as to
1456 +speed up compression.  Bela Lubkin encouraged me to improve the
1457 +worst-case compression performance.  The @code{bz*} scripts are derived
1458 +from those of GNU @code{gzip}.  Many people sent patches, helped with
1459 +portability problems, lent machines, gave advice and were generally
1460 +helpful.
1461 +
1462 +@end quotation
1463 +
1464 +
1465 +
1466 +
1467 +@chapter Programming with @code{libbzip2}
1468 +
1469 +This chapter describes the programming interface to @code{libbzip2}.
1470 +
1471 +For general background information, particularly about memory
1472 +use and performance aspects, you'd be well advised to read Chapter 2
1473 +as well.
1474 +
1475 +@section Top-level structure
1476 +
1477 +@code{libbzip2} is a flexible library for compressing and decompressing
1478 +data in the @code{bzip2} data format.  Although packaged as a single
1479 +entity, it helps to regard the library as three separate parts: the low
1480 +level interface, and the high level interface, and some utility
1481 +functions.
1482 +
1483 +The structure of @code{libbzip2}'s interfaces is similar to
1484 +that of Jean-loup Gailly's and Mark Adler's excellent @code{zlib}
1485 +library.
1486 +
1487 +All externally visible symbols have names beginning @code{BZ2_}.
1488 +This is new in version 1.0.  The intention is to minimise pollution
1489 +of the namespaces of library clients.
1490 +
1491 +@subsection Low-level summary
1492 +
1493 +This interface provides services for compressing and decompressing
1494 +data in memory.  There's no provision for dealing with files, streams
1495 +or any other I/O mechanisms, just straight memory-to-memory work.
1496 +In fact, this part of the library can be compiled without inclusion
1497 +of @code{stdio.h}, which may be helpful for embedded applications.
1498 +
1499 +The low-level part of the library has no global variables and
1500 +is therefore thread-safe.
1501 +
1502 +Six routines make up the low level interface:
1503 +@code{BZ2_bzCompressInit}, @code{BZ2_bzCompress}, and @* @code{BZ2_bzCompressEnd}
1504 +for compression,
1505 +and a corresponding trio @code{BZ2_bzDecompressInit}, @* @code{BZ2_bzDecompress}
1506 +and @code{BZ2_bzDecompressEnd} for decompression.
1507 +The @code{*Init} functions allocate
1508 +memory for compression/decompression and do other
1509 +initialisations, whilst the @code{*End} functions close down operations
1510 +and release memory.
1511 +
1512 +The real work is done by @code{BZ2_bzCompress} and @code{BZ2_bzDecompress}.
1513 +These compress and decompress data from a user-supplied input buffer
1514 +to a user-supplied output buffer.  These buffers can be any size;
1515 +arbitrary quantities of data are handled by making repeated calls
1516 +to these functions.  This is a flexible mechanism allowing a
1517 +consumer-pull style of activity, or producer-push, or a mixture of
1518 +both.
1519 +
1520 +
1521 +
1522 +@subsection High-level summary
1523 +
1524 +This interface provides some handy wrappers around the low-level
1525 +interface to facilitate reading and writing @code{bzip2} format
1526 +files (@code{.bz2} files).  The routines provide hooks to facilitate
1527 +reading files in which the @code{bzip2} data stream is embedded
1528 +within some larger-scale file structure, or where there are
1529 +multiple @code{bzip2} data streams concatenated end-to-end.
1530 +
1531 +For reading files, @code{BZ2_bzReadOpen}, @code{BZ2_bzRead},
1532 +@code{BZ2_bzReadClose} and @* @code{BZ2_bzReadGetUnused} are supplied.  For
1533 +writing files, @code{BZ2_bzWriteOpen}, @code{BZ2_bzWrite} and
1534 +@code{BZ2_bzWriteFinish} are available.
1535 +
1536 +As with the low-level library, no global variables are used
1537 +so the library is per se thread-safe.  However, if I/O errors
1538 +occur whilst reading or writing the underlying compressed files,
1539 +you may have to consult @code{errno} to determine the cause of
1540 +the error.  In that case, you'd need a C library which correctly
1541 +supports @code{errno} in a multithreaded environment.
1542 +
1543 +To make the library a little simpler and more portable,
1544 +@code{BZ2_bzReadOpen} and @code{BZ2_bzWriteOpen} require you to pass them file
1545 +handles (@code{FILE*}s) which have previously been opened for reading or
1546 +writing respectively.  That avoids portability problems associated with
1547 +file operations and file attributes, whilst not being much of an
1548 +imposition on the programmer.
1549 +
1550 +
1551 +
1552 +@subsection Utility functions summary
1553 +For very simple needs, @code{BZ2_bzBuffToBuffCompress} and
1554 +@code{BZ2_bzBuffToBuffDecompress} are provided.  These compress
1555 +data in memory from one buffer to another buffer in a single
1556 +function call.  You should assess whether these functions
1557 +fulfill your memory-to-memory compression/decompression
1558 +requirements before investing effort in understanding the more
1559 +general but more complex low-level interface.
1560 +
1561 +Yoshioka Tsuneo (@code{QWF00133@@niftyserve.or.jp} /
1562 +@code{tsuneo-y@@is.aist-nara.ac.jp}) has contributed some functions to
1563 +give better @code{zlib} compatibility.  These functions are
1564 +@code{BZ2_bzopen}, @code{BZ2_bzread}, @code{BZ2_bzwrite}, @code{BZ2_bzflush},
1565 +@code{BZ2_bzclose},
1566 +@code{BZ2_bzerror} and @code{BZ2_bzlibVersion}.  You may find these functions
1567 +more convenient for simple file reading and writing, than those in the
1568 +high-level interface.  These functions are not (yet) officially part of
1569 +the library, and are minimally documented here.  If they break, you
1570 +get to keep all the pieces.  I hope to document them properly when time
1571 +permits.
1572 +
1573 +Yoshioka also contributed modifications to allow the library to be
1574 +built as a Windows DLL.
1575 +
1576 +
1577 +@section Error handling
1578 +
1579 +The library is designed to recover cleanly in all situations, including
1580 +the worst-case situation of decompressing random data.  I'm not
1581 +100% sure that it can always do this, so you might want to add
1582 +a signal handler to catch segmentation violations during decompression
1583 +if you are feeling especially paranoid.  I would be interested in
1584 +hearing more about the robustness of the library to corrupted
1585 +compressed data.
1586 +
1587 +Version 1.0 is much more robust in this respect than
1588 +0.9.0 or 0.9.5.  Investigations with Checker (a tool for
1589 +detecting problems with memory management, similar to Purify)
1590 +indicate that, at least for the few files I tested, all single-bit
1591 +errors in the decompressed data are caught properly, with no
1592 +segmentation faults, no reads of uninitialised data and no
1593 +out of range reads or writes.  So it's certainly much improved,
1594 +although I wouldn't claim it to be totally bombproof.
1595 +
1596 +The file @code{bzlib.h} contains all definitions needed to use
1597 +the library.  In particular, you should definitely not include
1598 +@code{bzlib_private.h}.
1599 +
1600 +In @code{bzlib.h}, the various return values are defined.  The following
1601 +list is not intended as an exhaustive description of the circumstances
1602 +in which a given value may be returned -- those descriptions are given
1603 +later.  Rather, it is intended to convey the rough meaning of each
1604 +return value.  The first five actions are normal and not intended to
1605 +denote an error situation.
1606 +@table @code
1607 +@item BZ_OK
1608 +The requested action was completed successfully.
1609 +@item BZ_RUN_OK
1610 +@itemx BZ_FLUSH_OK
1611 +@itemx BZ_FINISH_OK
1612 +In @code{BZ2_bzCompress}, the requested flush/finish/nothing-special action
1613 +was completed successfully.
1614 +@item BZ_STREAM_END
1615 +Compression of data was completed, or the logical stream end was
1616 +detected during decompression.
1617 +@end table
1618 +
1619 +The following return values indicate an error of some kind.
1620 +@table @code
1621 +@item BZ_CONFIG_ERROR
1622 +Indicates that the library has been improperly compiled on your
1623 +platform -- a major configuration error.  Specifically, it means
1624 +that @code{sizeof(char)}, @code{sizeof(short)} and @code{sizeof(int)}
1625 +are not 1, 2 and 4 respectively, as they should be.  Note that the
1626 +library should still work properly on 64-bit platforms which follow
1627 +the LP64 programming model -- that is, where @code{sizeof(long)}
1628 +and @code{sizeof(void*)} are 8.  Under LP64, @code{sizeof(int)} is
1629 +still 4, so @code{libbzip2}, which doesn't use the @code{long} type,
1630 +is OK.
1631 +@item BZ_SEQUENCE_ERROR
1632 +When using the library, it is important to call the functions in the
1633 +correct sequence and with data structures (buffers etc) in the correct
1634 +states.  @code{libbzip2} checks as much as it can to ensure this is
1635 +happening, and returns @code{BZ_SEQUENCE_ERROR} if not.  Code which
1636 +complies precisely with the function semantics, as detailed below,
1637 +should never receive this value; such an event denotes buggy code
1638 +which you should investigate.
1639 +@item BZ_PARAM_ERROR
1640 +Returned when a parameter to a function call is out of range
1641 +or otherwise manifestly incorrect.  As with @code{BZ_SEQUENCE_ERROR},
1642 +this denotes a bug in the client code.  The distinction between
1643 +@code{BZ_PARAM_ERROR} and @code{BZ_SEQUENCE_ERROR} is a bit hazy, but still worth
1644 +making.
1645 +@item BZ_MEM_ERROR
1646 +Returned when a request to allocate memory failed.  Note that the
1647 +quantity of memory needed to decompress a stream cannot be determined
1648 +until the stream's header has been read.  So @code{BZ2_bzDecompress} and
1649 +@code{BZ2_bzRead} may return @code{BZ_MEM_ERROR} even though some of
1650 +the compressed data has been read.  The same is not true for
1651 +compression; once @code{BZ2_bzCompressInit} or @code{BZ2_bzWriteOpen} have
1652 +successfully completed, @code{BZ_MEM_ERROR} cannot occur.
1653 +@item BZ_DATA_ERROR
1654 +Returned when a data integrity error is detected during decompression.
1655 +Most importantly, this means when stored and computed CRCs for the
1656 +data do not match.  This value is also returned upon detection of any
1657 +other anomaly in the compressed data.
1658 +@item BZ_DATA_ERROR_MAGIC
1659 +As a special case of @code{BZ_DATA_ERROR}, it is sometimes useful to
1660 +know when the compressed stream does not start with the correct
1661 +magic bytes (@code{'B' 'Z' 'h'}).
1662 +@item BZ_IO_ERROR
1663 +Returned by @code{BZ2_bzRead} and @code{BZ2_bzWrite} when there is an error
1664 +reading or writing in the compressed file, and by @code{BZ2_bzReadOpen}
1665 +and @code{BZ2_bzWriteOpen} for attempts to use a file for which the
1666 +error indicator (viz, @code{ferror(f)}) is set.
1667 +On receipt of @code{BZ_IO_ERROR}, the caller should consult
1668 +@code{errno} and/or @code{perror} to acquire operating-system
1669 +specific information about the problem.
1670 +@item BZ_UNEXPECTED_EOF
1671 +Returned by @code{BZ2_bzRead} when the compressed file finishes
1672 +before the logical end of stream is detected.
1673 +@item BZ_OUTBUFF_FULL
1674 +Returned by @code{BZ2_bzBuffToBuffCompress} and
1675 +@code{BZ2_bzBuffToBuffDecompress} to indicate that the output data
1676 +will not fit into the output buffer provided.
1677 +@end table
1678 +
1679 +
1680 +
1681 +@section Low-level interface
1682 +
1683 +@subsection @code{BZ2_bzCompressInit}
1684 +@example
1685 +typedef
1686 +   struct @{
1687 +      char *next_in;
1688 +      unsigned int avail_in;
1689 +      unsigned int total_in_lo32;
1690 +      unsigned int total_in_hi32;
1691 +
1692 +      char *next_out;
1693 +      unsigned int avail_out;
1694 +      unsigned int total_out_lo32;
1695 +      unsigned int total_out_hi32;
1696 +
1697 +      void *state;
1698 +
1699 +      void *(*bzalloc)(void *,int,int);
1700 +      void (*bzfree)(void *,void *);
1701 +      void *opaque;
1702 +   @}
1703 +   bz_stream;
1704 +
1705 +int BZ2_bzCompressInit ( bz_stream *strm,
1706 +                         int blockSize100k,
1707 +                         int verbosity,
1708 +                         int workFactor );
1709 +
1710 +@end example
1711 +
1712 +Prepares for compression.  The @code{bz_stream} structure
1713 +holds all data pertaining to the compression activity.
1714 +A @code{bz_stream} structure should be allocated and initialised
1715 +prior to the call.
1716 +The fields of @code{bz_stream}
1717 +comprise the entirety of the user-visible data.  @code{state}
1718 +is a pointer to the private data structures required for compression.
1719 +
1720 +Custom memory allocators are supported, via fields @code{bzalloc},
1721 +@code{bzfree},
1722 +and @code{opaque}.  The value
1723 +@code{opaque} is passed to as the first argument to
1724 +all calls to @code{bzalloc} and @code{bzfree}, but is
1725 +otherwise ignored by the library.
1726 +The call @code{bzalloc ( opaque, n, m )} is expected to return a
1727 +pointer @code{p} to
1728 +@code{n * m} bytes of memory, and @code{bzfree ( opaque, p )}
1729 +should free
1730 +that memory.
1731 +
1732 +If you don't want to use a custom memory allocator, set @code{bzalloc},
1733 +@code{bzfree} and
1734 +@code{opaque} to @code{NULL},
1735 +and the library will then use the standard @code{malloc}/@code{free}
1736 +routines.
1737 +
1738 +Before calling @code{BZ2_bzCompressInit}, fields @code{bzalloc},
1739 +@code{bzfree} and @code{opaque} should
1740 +be filled appropriately, as just described.  Upon return, the internal
1741 +state will have been allocated and initialised, and @code{total_in_lo32},
1742 +@code{total_in_hi32}, @code{total_out_lo32} and
1743 +@code{total_out_hi32} will have been set to zero.
1744 +These four fields are used by the library
1745 +to inform the caller of the total amount of data passed into and out of
1746 +the library, respectively.  You should not try to change them.
1747 +As of version 1.0, 64-bit counts are maintained, even on 32-bit
1748 +platforms, using the @code{_hi32} fields to store the upper 32 bits
1749 +of the count.  So, for example, the total amount of data in
1750 +is @code{(total_in_hi32 << 32) + total_in_lo32}.
1751 +
1752 +Parameter @code{blockSize100k} specifies the block size to be used for
1753 +compression.  It should be a value between 1 and 9 inclusive, and the
1754 +actual block size used is 100000 x this figure.  9 gives the best
1755 +compression but takes most memory.
1756 +
1757 +Parameter @code{verbosity} should be set to a number between 0 and 4
1758 +inclusive.  0 is silent, and greater numbers give increasingly verbose
1759 +monitoring/debugging output.  If the library has been compiled with
1760 +@code{-DBZ_NO_STDIO}, no such output will appear for any verbosity
1761 +setting.
1762 +
1763 +Parameter @code{workFactor} controls how the compression phase behaves
1764 +when presented with worst case, highly repetitive, input data.  If
1765 +compression runs into difficulties caused by repetitive data, the
1766 +library switches from the standard sorting algorithm to a fallback
1767 +algorithm.  The fallback is slower than the standard algorithm by
1768 +perhaps a factor of three, but always behaves reasonably, no matter how
1769 +bad the input.
1770 +
1771 +Lower values of @code{workFactor} reduce the amount of effort the
1772 +standard algorithm will expend before resorting to the fallback.  You
1773 +should set this parameter carefully; too low, and many inputs will be
1774 +handled by the fallback algorithm and so compress rather slowly, too
1775 +high, and your average-to-worst case compression times can become very
1776 +large.  The default value of 30 gives reasonable behaviour over a wide
1777 +range of circumstances.
1778 +
1779 +Allowable values range from 0 to 250 inclusive.  0 is a special case,
1780 +equivalent to using the default value of 30.
1781 +
1782 +Note that the compressed output generated is the same regardless of
1783 +whether or not the fallback algorithm is used.
1784 +
1785 +Be aware also that this parameter may disappear entirely in future
1786 +versions of the library.  In principle it should be possible to devise a
1787 +good way to automatically choose which algorithm to use.  Such a
1788 +mechanism would render the parameter obsolete.
1789 +
1790 +Possible return values:
1791 +@display
1792 +      @code{BZ_CONFIG_ERROR}
1793 +         if the library has been mis-compiled
1794 +      @code{BZ_PARAM_ERROR}
1795 +         if @code{strm} is @code{NULL}
1796 +         or @code{blockSize} < 1 or @code{blockSize} > 9
1797 +         or @code{verbosity} < 0 or @code{verbosity} > 4
1798 +         or @code{workFactor} < 0 or @code{workFactor} > 250
1799 +      @code{BZ_MEM_ERROR}
1800 +         if not enough memory is available
1801 +      @code{BZ_OK}
1802 +         otherwise
1803 +@end display
1804 +Allowable next actions:
1805 +@display
1806 +      @code{BZ2_bzCompress}
1807 +         if @code{BZ_OK} is returned
1808 +      no specific action needed in case of error
1809 +@end display
1810 +
1811 +@subsection @code{BZ2_bzCompress}
1812 +@example
1813 +   int BZ2_bzCompress ( bz_stream *strm, int action );
1814 +@end example
1815 +Provides more input and/or output buffer space for the library.  The
1816 +caller maintains input and output buffers, and calls @code{BZ2_bzCompress} to
1817 +transfer data between them.
1818 +
1819 +Before each call to @code{BZ2_bzCompress}, @code{next_in} should point at
1820 +the data to be compressed, and @code{avail_in} should indicate how many
1821 +bytes the library may read.  @code{BZ2_bzCompress} updates @code{next_in},
1822 +@code{avail_in} and @code{total_in} to reflect the number of bytes it
1823 +has read.
1824 +
1825 +Similarly, @code{next_out} should point to a buffer in which the
1826 +compressed data is to be placed, with @code{avail_out} indicating how
1827 +much output space is available.  @code{BZ2_bzCompress} updates
1828 +@code{next_out}, @code{avail_out} and @code{total_out} to reflect the
1829 +number of bytes output.
1830 +
1831 +You may provide and remove as little or as much data as you like on each
1832 +call of @code{BZ2_bzCompress}.  In the limit, it is acceptable to supply and
1833 +remove data one byte at a time, although this would be terribly
1834 +inefficient.  You should always ensure that at least one byte of output
1835 +space is available at each call.
1836 +
1837 +A second purpose of @code{BZ2_bzCompress} is to request a change of mode of the
1838 +compressed stream.
1839 +
1840 +Conceptually, a compressed stream can be in one of four states: IDLE,
1841 +RUNNING, FLUSHING and FINISHING.  Before initialisation
1842 +(@code{BZ2_bzCompressInit}) and after termination (@code{BZ2_bzCompressEnd}), a
1843 +stream is regarded as IDLE.
1844 +
1845 +Upon initialisation (@code{BZ2_bzCompressInit}), the stream is placed in the
1846 +RUNNING state.  Subsequent calls to @code{BZ2_bzCompress} should pass
1847 +@code{BZ_RUN} as the requested action; other actions are illegal and
1848 +will result in @code{BZ_SEQUENCE_ERROR}.
1849 +
1850 +At some point, the calling program will have provided all the input data
1851 +it wants to.  It will then want to finish up -- in effect, asking the
1852 +library to process any data it might have buffered internally.  In this
1853 +state, @code{BZ2_bzCompress} will no longer attempt to read data from
1854 +@code{next_in}, but it will want to write data to @code{next_out}.
1855 +Because the output buffer supplied by the user can be arbitrarily small,
1856 +the finishing-up operation cannot necessarily be done with a single call
1857 +of @code{BZ2_bzCompress}.
1858 +
1859 +Instead, the calling program passes @code{BZ_FINISH} as an action to
1860 +@code{BZ2_bzCompress}.  This changes the stream's state to FINISHING.  Any
1861 +remaining input (ie, @code{next_in[0 .. avail_in-1]}) is compressed and
1862 +transferred to the output buffer.  To do this, @code{BZ2_bzCompress} must be
1863 +called repeatedly until all the output has been consumed.  At that
1864 +point, @code{BZ2_bzCompress} returns @code{BZ_STREAM_END}, and the stream's
1865 +state is set back to IDLE.  @code{BZ2_bzCompressEnd} should then be
1866 +called.
1867 +
1868 +Just to make sure the calling program does not cheat, the library makes
1869 +a note of @code{avail_in} at the time of the first call to
1870 +@code{BZ2_bzCompress} which has @code{BZ_FINISH} as an action (ie, at the
1871 +time the program has announced its intention to not supply any more
1872 +input).  By comparing this value with that of @code{avail_in} over
1873 +subsequent calls to @code{BZ2_bzCompress}, the library can detect any
1874 +attempts to slip in more data to compress.  Any calls for which this is
1875 +detected will return @code{BZ_SEQUENCE_ERROR}.  This indicates a
1876 +programming mistake which should be corrected.
1877 +
1878 +Instead of asking to finish, the calling program may ask
1879 +@code{BZ2_bzCompress} to take all the remaining input, compress it and
1880 +terminate the current (Burrows-Wheeler) compression block.  This could
1881 +be useful for error control purposes.  The mechanism is analogous to
1882 +that for finishing: call @code{BZ2_bzCompress} with an action of
1883 +@code{BZ_FLUSH}, remove output data, and persist with the
1884 +@code{BZ_FLUSH} action until the value @code{BZ_RUN} is returned.  As
1885 +with finishing, @code{BZ2_bzCompress} detects any attempt to provide more
1886 +input data once the flush has begun.
1887 +
1888 +Once the flush is complete, the stream returns to the normal RUNNING
1889 +state.
1890 +
1891 +This all sounds pretty complex, but isn't really.  Here's a table
1892 +which shows which actions are allowable in each state, what action
1893 +will be taken, what the next state is, and what the non-error return
1894 +values are.  Note that you can't explicitly ask what state the
1895 +stream is in, but nor do you need to -- it can be inferred from the
1896 +values returned by @code{BZ2_bzCompress}.
1897 +@display
1898 +IDLE/@code{any}
1899 +      Illegal.  IDLE state only exists after @code{BZ2_bzCompressEnd} or
1900 +      before @code{BZ2_bzCompressInit}.
1901 +      Return value = @code{BZ_SEQUENCE_ERROR}
1902 +
1903 +RUNNING/@code{BZ_RUN}
1904 +      Compress from @code{next_in} to @code{next_out} as much as possible.
1905 +      Next state = RUNNING
1906 +      Return value = @code{BZ_RUN_OK}
1907 +
1908 +RUNNING/@code{BZ_FLUSH}
1909 +      Remember current value of @code{next_in}.  Compress from @code{next_in}
1910 +      to @code{next_out} as much as possible, but do not accept any more input.
1911 +      Next state = FLUSHING
1912 +      Return value = @code{BZ_FLUSH_OK}
1913 +
1914 +RUNNING/@code{BZ_FINISH}
1915 +      Remember current value of @code{next_in}.  Compress from @code{next_in}
1916 +      to @code{next_out} as much as possible, but do not accept any more input.
1917 +      Next state = FINISHING
1918 +      Return value = @code{BZ_FINISH_OK}
1919 +
1920 +FLUSHING/@code{BZ_FLUSH}
1921 +      Compress from @code{next_in} to @code{next_out} as much as possible,
1922 +      but do not accept any more input.
1923 +      If all the existing input has been used up and all compressed
1924 +      output has been removed
1925 +         Next state = RUNNING; Return value = @code{BZ_RUN_OK}
1926 +      else
1927 +         Next state = FLUSHING; Return value = @code{BZ_FLUSH_OK}
1928 +
1929 +FLUSHING/other
1930 +      Illegal.
1931 +      Return value = @code{BZ_SEQUENCE_ERROR}
1932 +
1933 +FINISHING/@code{BZ_FINISH}
1934 +      Compress from @code{next_in} to @code{next_out} as much as possible,
1935 +      but to not accept any more input.
1936 +      If all the existing input has been used up and all compressed
1937 +      output has been removed
1938 +         Next state = IDLE; Return value = @code{BZ_STREAM_END}
1939 +      else
1940 +         Next state = FINISHING; Return value = @code{BZ_FINISHING}
1941 +
1942 +FINISHING/other
1943 +      Illegal.
1944 +      Return value = @code{BZ_SEQUENCE_ERROR}
1945 +@end display
1946 +
1947 +That still looks complicated?  Well, fair enough.  The usual sequence
1948 +of calls for compressing a load of data is:
1949 +@itemize @bullet
1950 +@item Get started with @code{BZ2_bzCompressInit}.
1951 +@item Shovel data in and shlurp out its compressed form using zero or more
1952 +calls of @code{BZ2_bzCompress} with action = @code{BZ_RUN}.
1953 +@item Finish up.
1954 +Repeatedly call @code{BZ2_bzCompress} with action = @code{BZ_FINISH},
1955 +copying out the compressed output, until @code{BZ_STREAM_END} is returned.
1956 +@item Close up and go home.  Call @code{BZ2_bzCompressEnd}.
1957 +@end itemize
1958 +If the data you want to compress fits into your input buffer all
1959 +at once, you can skip the calls of @code{BZ2_bzCompress ( ..., BZ_RUN )} and
1960 +just do the @code{BZ2_bzCompress ( ..., BZ_FINISH )} calls.
1961 +
1962 +All required memory is allocated by @code{BZ2_bzCompressInit}.  The
1963 +compression library can accept any data at all (obviously).  So you
1964 +shouldn't get any error return values from the @code{BZ2_bzCompress} calls.
1965 +If you do, they will be @code{BZ_SEQUENCE_ERROR}, and indicate a bug in
1966 +your programming.
1967 +
1968 +Trivial other possible return values:
1969 +@display
1970 +      @code{BZ_PARAM_ERROR}
1971 +         if @code{strm} is @code{NULL}, or @code{strm->s} is @code{NULL}
1972 +@end display
1973 +
1974 +@subsection @code{BZ2_bzCompressEnd}
1975 +@example
1976 +int BZ2_bzCompressEnd ( bz_stream *strm );
1977 +@end example
1978 +Releases all memory associated with a compression stream.
1979 +
1980 +Possible return values:
1981 +@display
1982 +   @code{BZ_PARAM_ERROR}    if @code{strm} is @code{NULL} or @code{strm->s} is @code{NULL}
1983 +   @code{BZ_OK}    otherwise
1984 +@end display
1985 +
1986 +
1987 +@subsection @code{BZ2_bzDecompressInit}
1988 +@example
1989 +int BZ2_bzDecompressInit ( bz_stream *strm, int verbosity, int small );
1990 +@end example
1991 +Prepares for decompression.  As with @code{BZ2_bzCompressInit}, a
1992 +@code{bz_stream} record should be allocated and initialised before the
1993 +call.  Fields @code{bzalloc}, @code{bzfree} and @code{opaque} should be
1994 +set if a custom memory allocator is required, or made @code{NULL} for
1995 +the normal @code{malloc}/@code{free} routines.  Upon return, the internal
1996 +state will have been initialised, and @code{total_in} and
1997 +@code{total_out} will be zero.
1998 +
1999 +For the meaning of parameter @code{verbosity}, see @code{BZ2_bzCompressInit}.
2000 +
2001 +If @code{small} is nonzero, the library will use an alternative
2002 +decompression algorithm which uses less memory but at the cost of
2003 +decompressing more slowly (roughly speaking, half the speed, but the
2004 +maximum memory requirement drops to around 2300k).  See Chapter 2 for
2005 +more information on memory management.
2006 +
2007 +Note that the amount of memory needed to decompress
2008 +a stream cannot be determined until the stream's header has been read,
2009 +so even if @code{BZ2_bzDecompressInit} succeeds, a subsequent
2010 +@code{BZ2_bzDecompress} could fail with @code{BZ_MEM_ERROR}.
2011 +
2012 +Possible return values:
2013 +@display
2014 +      @code{BZ_CONFIG_ERROR}
2015 +         if the library has been mis-compiled
2016 +      @code{BZ_PARAM_ERROR}
2017 +         if @code{(small != 0 && small != 1)}
2018 +         or @code{(verbosity < 0 || verbosity > 4)}
2019 +      @code{BZ_MEM_ERROR}
2020 +         if insufficient memory is available
2021 +@end display
2022 +
2023 +Allowable next actions:
2024 +@display
2025 +      @code{BZ2_bzDecompress}
2026 +         if @code{BZ_OK} was returned
2027 +      no specific action required in case of error
2028 +@end display
2029 +
2030 +
2031 +
2032 +@subsection @code{BZ2_bzDecompress}
2033 +@example
2034 +int BZ2_bzDecompress ( bz_stream *strm );
2035 +@end example
2036 +Provides more input and/out output buffer space for the library.  The
2037 +caller maintains input and output buffers, and uses @code{BZ2_bzDecompress}
2038 +to transfer data between them.
2039 +
2040 +Before each call to @code{BZ2_bzDecompress}, @code{next_in}
2041 +should point at the compressed data,
2042 +and @code{avail_in} should indicate how many bytes the library
2043 +may read.  @code{BZ2_bzDecompress} updates @code{next_in}, @code{avail_in}
2044 +and @code{total_in}
2045 +to reflect the number of bytes it has read.
2046 +
2047 +Similarly, @code{next_out} should point to a buffer in which the uncompressed
2048 +output is to be placed, with @code{avail_out} indicating how much output space
2049 +is available.  @code{BZ2_bzCompress} updates @code{next_out},
2050 +@code{avail_out} and @code{total_out} to reflect
2051 +the number of bytes output.
2052 +
2053 +You may provide and remove as little or as much data as you like on
2054 +each call of @code{BZ2_bzDecompress}.
2055 +In the limit, it is acceptable to
2056 +supply and remove data one byte at a time, although this would be
2057 +terribly inefficient.  You should always ensure that at least one
2058 +byte of output space is available at each call.
2059 +
2060 +Use of @code{BZ2_bzDecompress} is simpler than @code{BZ2_bzCompress}.
2061 +
2062 +You should provide input and remove output as described above, and
2063 +repeatedly call @code{BZ2_bzDecompress} until @code{BZ_STREAM_END} is
2064 +returned.  Appearance of @code{BZ_STREAM_END} denotes that
2065 +@code{BZ2_bzDecompress} has detected the logical end of the compressed
2066 +stream.  @code{BZ2_bzDecompress} will not produce @code{BZ_STREAM_END} until
2067 +all output data has been placed into the output buffer, so once
2068 +@code{BZ_STREAM_END} appears, you are guaranteed to have available all
2069 +the decompressed output, and @code{BZ2_bzDecompressEnd} can safely be
2070 +called.
2071 +
2072 +If case of an error return value, you should call @code{BZ2_bzDecompressEnd}
2073 +to clean up and release memory.
2074 +
2075 +Possible return values:
2076 +@display
2077 +      @code{BZ_PARAM_ERROR}
2078 +         if @code{strm} is @code{NULL} or @code{strm->s} is @code{NULL}
2079 +         or @code{strm->avail_out < 1}
2080 +      @code{BZ_DATA_ERROR}
2081 +         if a data integrity error is detected in the compressed stream
2082 +      @code{BZ_DATA_ERROR_MAGIC}
2083 +         if the compressed stream doesn't begin with the right magic bytes
2084 +      @code{BZ_MEM_ERROR}
2085 +         if there wasn't enough memory available
2086 +      @code{BZ_STREAM_END}
2087 +         if the logical end of the data stream was detected and all
2088 +         output in has been consumed, eg @code{s->avail_out > 0}
2089 +      @code{BZ_OK}
2090 +         otherwise
2091 +@end display
2092 +Allowable next actions:
2093 +@display
2094 +      @code{BZ2_bzDecompress}
2095 +         if @code{BZ_OK} was returned
2096 +      @code{BZ2_bzDecompressEnd}
2097 +         otherwise
2098 +@end display
2099 +
2100 +
2101 +@subsection @code{BZ2_bzDecompressEnd}
2102 +@example
2103 +int BZ2_bzDecompressEnd ( bz_stream *strm );
2104 +@end example
2105 +Releases all memory associated with a decompression stream.
2106 +
2107 +Possible return values:
2108 +@display
2109 +      @code{BZ_PARAM_ERROR}
2110 +         if @code{strm} is @code{NULL} or @code{strm->s} is @code{NULL}
2111 +      @code{BZ_OK}
2112 +         otherwise
2113 +@end display
2114 +
2115 +Allowable next actions:
2116 +@display
2117 +      None.
2118 +@end display
2119 +
2120 +
2121 +@section High-level interface
2122 +
2123 +This interface provides functions for reading and writing
2124 +@code{bzip2} format files.  First, some general points.
2125 +
2126 +@itemize @bullet
2127 +@item All of the functions take an @code{int*} first argument,
2128 +  @code{bzerror}.
2129 +  After each call, @code{bzerror} should be consulted first to determine
2130 +  the outcome of the call.  If @code{bzerror} is @code{BZ_OK},
2131 +  the call completed
2132 +  successfully, and only then should the return value of the function
2133 +  (if any) be consulted.  If @code{bzerror} is @code{BZ_IO_ERROR},
2134 +  there was an error
2135 +  reading/writing the underlying compressed file, and you should
2136 +  then consult @code{errno}/@code{perror} to determine the
2137 +  cause of the difficulty.
2138 +  @code{bzerror} may also be set to various other values; precise details are
2139 +  given on a per-function basis below.
2140 +@item If @code{bzerror} indicates an error
2141 +  (ie, anything except @code{BZ_OK} and @code{BZ_STREAM_END}),
2142 +  you should immediately call @code{BZ2_bzReadClose} (or @code{BZ2_bzWriteClose},
2143 +  depending on whether you are attempting to read or to write)
2144 +  to free up all resources associated
2145 +  with the stream.  Once an error has been indicated, behaviour of all calls
2146 +  except @code{BZ2_bzReadClose} (@code{BZ2_bzWriteClose}) is undefined.
2147 +  The implication is that (1) @code{bzerror} should
2148 +  be checked after each call, and (2) if @code{bzerror} indicates an error,
2149 +  @code{BZ2_bzReadClose} (@code{BZ2_bzWriteClose}) should then be called to clean up.
2150 +@item The @code{FILE*} arguments passed to
2151 +   @code{BZ2_bzReadOpen}/@code{BZ2_bzWriteOpen}
2152 +  should be set to binary mode.
2153 +  Most Unix systems will do this by default, but other platforms,
2154 +  including Windows and Mac, will not.  If you omit this, you may
2155 +  encounter problems when moving code to new platforms.
2156 +@item Memory allocation requests are handled by
2157 +  @code{malloc}/@code{free}.
2158 +  At present
2159 +  there is no facility for user-defined memory allocators in the file I/O
2160 +  functions (could easily be added, though).
2161 +@end itemize
2162 +
2163 +
2164 +
2165 +@subsection @code{BZ2_bzReadOpen}
2166 +@example
2167 +   typedef void BZFILE;
2168 +
2169 +   BZFILE *BZ2_bzReadOpen ( int *bzerror, FILE *f,
2170 +                            int small, int verbosity,
2171 +                            void *unused, int nUnused );
2172 +@end example
2173 +Prepare to read compressed data from file handle @code{f}.  @code{f}
2174 +should refer to a file which has been opened for reading, and for which
2175 +the error indicator (@code{ferror(f)})is not set.  If @code{small} is 1,
2176 +the library will try to decompress using less memory, at the expense of
2177 +speed.
2178 +
2179 +For reasons explained below, @code{BZ2_bzRead} will decompress the
2180 +@code{nUnused} bytes starting at @code{unused}, before starting to read
2181 +from the file @code{f}.  At most @code{BZ_MAX_UNUSED} bytes may be
2182 +supplied like this.  If this facility is not required, you should pass
2183 +@code{NULL} and @code{0} for @code{unused} and n@code{Unused}
2184 +respectively.
2185 +
2186 +For the meaning of parameters @code{small} and @code{verbosity},
2187 +see @code{BZ2_bzDecompressInit}.
2188 +
2189 +The amount of memory needed to decompress a file cannot be determined
2190 +until the file's header has been read.  So it is possible that
2191 +@code{BZ2_bzReadOpen} returns @code{BZ_OK} but a subsequent call of
2192 +@code{BZ2_bzRead} will return @code{BZ_MEM_ERROR}.
2193 +
2194 +Possible assignments to @code{bzerror}:
2195 +@display
2196 +      @code{BZ_CONFIG_ERROR}
2197 +         if the library has been mis-compiled
2198 +      @code{BZ_PARAM_ERROR}
2199 +         if @code{f} is @code{NULL}
2200 +         or @code{small} is neither @code{0} nor @code{1}
2201 +         or @code{(unused == NULL && nUnused != 0)}
2202 +         or @code{(unused != NULL && !(0 <= nUnused <= BZ_MAX_UNUSED))}
2203 +      @code{BZ_IO_ERROR}
2204 +         if @code{ferror(f)} is nonzero
2205 +      @code{BZ_MEM_ERROR}
2206 +         if insufficient memory is available
2207 +      @code{BZ_OK}
2208 +         otherwise.
2209 +@end display
2210 +
2211 +Possible return values:
2212 +@display
2213 +      Pointer to an abstract @code{BZFILE}
2214 +         if @code{bzerror} is @code{BZ_OK}
2215 +      @code{NULL}
2216 +         otherwise
2217 +@end display
2218 +
2219 +Allowable next actions:
2220 +@display
2221 +      @code{BZ2_bzRead}
2222 +         if @code{bzerror} is @code{BZ_OK}
2223 +      @code{BZ2_bzClose}
2224 +         otherwise
2225 +@end display
2226 +
2227 +
2228 +@subsection @code{BZ2_bzRead}
2229 +@example
2230 +   int BZ2_bzRead ( int *bzerror, BZFILE *b, void *buf, int len );
2231 +@end example
2232 +Reads up to @code{len} (uncompressed) bytes from the compressed file
2233 +@code{b} into
2234 +the buffer @code{buf}.  If the read was successful,
2235 +@code{bzerror} is set to @code{BZ_OK}
2236 +and the number of bytes read is returned.  If the logical end-of-stream
2237 +was detected, @code{bzerror} will be set to @code{BZ_STREAM_END},
2238 +and the number
2239 +of bytes read is returned.  All other @code{bzerror} values denote an error.
2240 +
2241 +@code{BZ2_bzRead} will supply @code{len} bytes,
2242 +unless the logical stream end is detected
2243 +or an error occurs.  Because of this, it is possible to detect the
2244 +stream end by observing when the number of bytes returned is
2245 +less than the number
2246 +requested.  Nevertheless, this is regarded as inadvisable; you should
2247 +instead check @code{bzerror} after every call and watch out for
2248 +@code{BZ_STREAM_END}.
2249 +
2250 +Internally, @code{BZ2_bzRead} copies data from the compressed file in chunks
2251 +of size @code{BZ_MAX_UNUSED} bytes
2252 +before decompressing it.  If the file contains more bytes than strictly
2253 +needed to reach the logical end-of-stream, @code{BZ2_bzRead} will almost certainly
2254 +read some of the trailing data before signalling @code{BZ_SEQUENCE_END}.
2255 +To collect the read but unused data once @code{BZ_SEQUENCE_END} has
2256 +appeared, call @code{BZ2_bzReadGetUnused} immediately before @code{BZ2_bzReadClose}.
2257 +
2258 +Possible assignments to @code{bzerror}:
2259 +@display
2260 +      @code{BZ_PARAM_ERROR}
2261 +         if @code{b} is @code{NULL} or @code{buf} is @code{NULL} or @code{len < 0}
2262 +      @code{BZ_SEQUENCE_ERROR}
2263 +         if @code{b} was opened with @code{BZ2_bzWriteOpen}
2264 +      @code{BZ_IO_ERROR}
2265 +         if there is an error reading from the compressed file
2266 +      @code{BZ_UNEXPECTED_EOF}
2267 +         if the compressed file ended before the logical end-of-stream was detected
2268 +      @code{BZ_DATA_ERROR}
2269 +         if a data integrity error was detected in the compressed stream
2270 +      @code{BZ_DATA_ERROR_MAGIC}
2271 +         if the stream does not begin with the requisite header bytes (ie, is not
2272 +         a @code{bzip2} data file).  This is really a special case of @code{BZ_DATA_ERROR}.
2273 +      @code{BZ_MEM_ERROR}
2274 +         if insufficient memory was available
2275 +      @code{BZ_STREAM_END}
2276 +         if the logical end of stream was detected.
2277 +      @code{BZ_OK}
2278 +         otherwise.
2279 +@end display
2280 +
2281 +Possible return values:
2282 +@display
2283 +      number of bytes read
2284 +         if @code{bzerror} is @code{BZ_OK} or @code{BZ_STREAM_END}
2285 +      undefined
2286 +         otherwise
2287 +@end display
2288 +
2289 +Allowable next actions:
2290 +@display
2291 +      collect data from @code{buf}, then @code{BZ2_bzRead} or @code{BZ2_bzReadClose}
2292 +         if @code{bzerror} is @code{BZ_OK}
2293 +      collect data from @code{buf}, then @code{BZ2_bzReadClose} or @code{BZ2_bzReadGetUnused}
2294 +         if @code{bzerror} is @code{BZ_SEQUENCE_END}
2295 +      @code{BZ2_bzReadClose}
2296 +         otherwise
2297 +@end display
2298 +
2299 +
2300 +
2301 +@subsection @code{BZ2_bzReadGetUnused}
2302 +@example
2303 +   void BZ2_bzReadGetUnused ( int* bzerror, BZFILE *b,
2304 +                              void** unused, int* nUnused );
2305 +@end example
2306 +Returns data which was read from the compressed file but was not needed
2307 +to get to the logical end-of-stream.  @code{*unused} is set to the address
2308 +of the data, and @code{*nUnused} to the number of bytes.  @code{*nUnused} will
2309 +be set to a value between @code{0} and @code{BZ_MAX_UNUSED} inclusive.
2310 +
2311 +This function may only be called once @code{BZ2_bzRead} has signalled
2312 +@code{BZ_STREAM_END} but before @code{BZ2_bzReadClose}.
2313 +
2314 +Possible assignments to @code{bzerror}:
2315 +@display
2316 +      @code{BZ_PARAM_ERROR}
2317 +         if @code{b} is @code{NULL}
2318 +         or @code{unused} is @code{NULL} or @code{nUnused} is @code{NULL}
2319 +      @code{BZ_SEQUENCE_ERROR}
2320 +         if @code{BZ_STREAM_END} has not been signalled
2321 +         or if @code{b} was opened with @code{BZ2_bzWriteOpen}
2322 +     @code{BZ_OK}
2323 +         otherwise
2324 +@end display
2325 +
2326 +Allowable next actions:
2327 +@display
2328 +      @code{BZ2_bzReadClose}
2329 +@end display
2330 +
2331 +
2332 +@subsection @code{BZ2_bzReadClose}
2333 +@example
2334 +   void BZ2_bzReadClose ( int *bzerror, BZFILE *b );
2335 +@end example
2336 +Releases all memory pertaining to the compressed file @code{b}.
2337 +@code{BZ2_bzReadClose} does not call @code{fclose} on the underlying file
2338 +handle, so you should do that yourself if appropriate.
2339 +@code{BZ2_bzReadClose} should be called to clean up after all error
2340 +situations.
2341 +
2342 +Possible assignments to @code{bzerror}:
2343 +@display
2344 +      @code{BZ_SEQUENCE_ERROR}
2345 +         if @code{b} was opened with @code{BZ2_bzOpenWrite}
2346 +      @code{BZ_OK}
2347 +         otherwise
2348 +@end display
2349 +
2350 +Allowable next actions:
2351 +@display
2352 +      none
2353 +@end display
2354 +
2355 +
2356 +
2357 +@subsection @code{BZ2_bzWriteOpen}
2358 +@example
2359 +   BZFILE *BZ2_bzWriteOpen ( int *bzerror, FILE *f,
2360 +                             int blockSize100k, int verbosity,
2361 +                             int workFactor );
2362 +@end example
2363 +Prepare to write compressed data to file handle @code{f}.
2364 +@code{f} should refer to
2365 +a file which has been opened for writing, and for which the error
2366 +indicator (@code{ferror(f)})is not set.
2367 +
2368 +For the meaning of parameters @code{blockSize100k},
2369 +@code{verbosity} and @code{workFactor}, see
2370 +@* @code{BZ2_bzCompressInit}.
2371 +
2372 +All required memory is allocated at this stage, so if the call
2373 +completes successfully, @code{BZ_MEM_ERROR} cannot be signalled by a
2374 +subsequent call to @code{BZ2_bzWrite}.
2375 +
2376 +Possible assignments to @code{bzerror}:
2377 +@display
2378 +      @code{BZ_CONFIG_ERROR}
2379 +         if the library has been mis-compiled
2380 +      @code{BZ_PARAM_ERROR}
2381 +         if @code{f} is @code{NULL}
2382 +         or @code{blockSize100k < 1} or @code{blockSize100k > 9}
2383 +      @code{BZ_IO_ERROR}
2384 +         if @code{ferror(f)} is nonzero
2385 +      @code{BZ_MEM_ERROR}
2386 +         if insufficient memory is available
2387 +      @code{BZ_OK}
2388 +         otherwise
2389 +@end display
2390 +
2391 +Possible return values:
2392 +@display
2393 +      Pointer to an abstract @code{BZFILE}
2394 +         if @code{bzerror} is @code{BZ_OK}
2395 +      @code{NULL}
2396 +         otherwise
2397 +@end display
2398 +
2399 +Allowable next actions:
2400 +@display
2401 +      @code{BZ2_bzWrite}
2402 +         if @code{bzerror} is @code{BZ_OK}
2403 +         (you could go directly to @code{BZ2_bzWriteClose}, but this would be pretty pointless)
2404 +      @code{BZ2_bzWriteClose}
2405 +         otherwise
2406 +@end display
2407 +
2408 +
2409 +
2410 +@subsection @code{BZ2_bzWrite}
2411 +@example
2412 +   void BZ2_bzWrite ( int *bzerror, BZFILE *b, void *buf, int len );
2413 +@end example
2414 +Absorbs @code{len} bytes from the buffer @code{buf}, eventually to be
2415 +compressed and written to the file.
2416 +
2417 +Possible assignments to @code{bzerror}:
2418 +@display
2419 +      @code{BZ_PARAM_ERROR}
2420 +         if @code{b} is @code{NULL} or @code{buf} is @code{NULL} or @code{len < 0}
2421 +      @code{BZ_SEQUENCE_ERROR}
2422 +         if b was opened with @code{BZ2_bzReadOpen}
2423 +      @code{BZ_IO_ERROR}
2424 +         if there is an error writing the compressed file.
2425 +      @code{BZ_OK}
2426 +         otherwise
2427 +@end display
2428 +
2429 +
2430 +
2431 +
2432 +@subsection @code{BZ2_bzWriteClose}
2433 +@example
2434 +   void BZ2_bzWriteClose ( int *bzerror, BZFILE* f,
2435 +                           int abandon,
2436 +                           unsigned int* nbytes_in,
2437 +                           unsigned int* nbytes_out );
2438 +
2439 +   void BZ2_bzWriteClose64 ( int *bzerror, BZFILE* f,
2440 +                             int abandon,
2441 +                             unsigned int* nbytes_in_lo32,
2442 +                             unsigned int* nbytes_in_hi32,
2443 +                             unsigned int* nbytes_out_lo32,
2444 +                             unsigned int* nbytes_out_hi32 );
2445 +@end example
2446 +
2447 +Compresses and flushes to the compressed file all data so far supplied
2448 +by @code{BZ2_bzWrite}.  The logical end-of-stream markers are also written, so
2449 +subsequent calls to @code{BZ2_bzWrite} are illegal.  All memory associated
2450 +with the compressed file @code{b} is released.
2451 +@code{fflush} is called on the
2452 +compressed file, but it is not @code{fclose}'d.
2453 +
2454 +If @code{BZ2_bzWriteClose} is called to clean up after an error, the only
2455 +action is to release the memory.  The library records the error codes
2456 +issued by previous calls, so this situation will be detected
2457 +automatically.  There is no attempt to complete the compression
2458 +operation, nor to @code{fflush} the compressed file.  You can force this
2459 +behaviour to happen even in the case of no error, by passing a nonzero
2460 +value to @code{abandon}.
2461 +
2462 +If @code{nbytes_in} is non-null, @code{*nbytes_in} will be set to be the
2463 +total volume of uncompressed data handled.  Similarly, @code{nbytes_out}
2464 +will be set to the total volume of compressed data written.  For
2465 +compatibility with older versions of the library, @code{BZ2_bzWriteClose}
2466 +only yields the lower 32 bits of these counts.  Use
2467 +@code{BZ2_bzWriteClose64} if you want the full 64 bit counts.  These
2468 +two functions are otherwise absolutely identical.
2469 +
2470 +
2471 +Possible assignments to @code{bzerror}:
2472 +@display
2473 +      @code{BZ_SEQUENCE_ERROR}
2474 +         if @code{b} was opened with @code{BZ2_bzReadOpen}
2475 +      @code{BZ_IO_ERROR}
2476 +         if there is an error writing the compressed file
2477 +      @code{BZ_OK}
2478 +         otherwise
2479 +@end display
2480 +
2481 +@subsection Handling embedded compressed data streams
2482 +
2483 +The high-level library facilitates use of
2484 +@code{bzip2} data streams which form some part of a surrounding, larger
2485 +data stream.
2486 +@itemize @bullet
2487 +@item For writing, the library takes an open file handle, writes
2488 +compressed data to it, @code{fflush}es it but does not @code{fclose} it.
2489 +The calling application can write its own data before and after the
2490 +compressed data stream, using that same file handle.
2491 +@item Reading is more complex, and the facilities are not as general
2492 +as they could be since generality is hard to reconcile with efficiency.
2493 +@code{BZ2_bzRead} reads from the compressed file in blocks of size
2494 +@code{BZ_MAX_UNUSED} bytes, and in doing so probably will overshoot
2495 +the logical end of compressed stream.
2496 +To recover this data once decompression has
2497 +ended, call @code{BZ2_bzReadGetUnused} after the last call of @code{BZ2_bzRead}
2498 +(the one returning @code{BZ_STREAM_END}) but before calling
2499 +@code{BZ2_bzReadClose}.
2500 +@end itemize
2501 +
2502 +This mechanism makes it easy to decompress multiple @code{bzip2}
2503 +streams placed end-to-end.  As the end of one stream, when @code{BZ2_bzRead}
2504 +returns @code{BZ_STREAM_END}, call @code{BZ2_bzReadGetUnused} to collect the
2505 +unused data (copy it into your own buffer somewhere).
2506 +That data forms the start of the next compressed stream.
2507 +To start uncompressing that next stream, call @code{BZ2_bzReadOpen} again,
2508 +feeding in the unused data via the @code{unused}/@code{nUnused}
2509 +parameters.
2510 +Keep doing this until @code{BZ_STREAM_END} return coincides with the
2511 +physical end of file (@code{feof(f)}).  In this situation
2512 +@code{BZ2_bzReadGetUnused}
2513 +will of course return no data.
2514 +
2515 +This should give some feel for how the high-level interface can be used.
2516 +If you require extra flexibility, you'll have to bite the bullet and get
2517 +to grips with the low-level interface.
2518 +
2519 +@subsection Standard file-reading/writing code
2520 +Here's how you'd write data to a compressed file:
2521 +@example @code
2522 +FILE*   f;
2523 +BZFILE* b;
2524 +int     nBuf;
2525 +char    buf[ /* whatever size you like */ ];
2526 +int     bzerror;
2527 +int     nWritten;
2528 +
2529 +f = fopen ( "myfile.bz2", "w" );
2530 +if (!f) @{
2531 +   /* handle error */
2532 +@}
2533 +b = BZ2_bzWriteOpen ( &bzerror, f, 9 );
2534 +if (bzerror != BZ_OK) @{
2535 +   BZ2_bzWriteClose ( b );
2536 +   /* handle error */
2537 +@}
2538 +
2539 +while ( /* condition */ ) @{
2540 +   /* get data to write into buf, and set nBuf appropriately */
2541 +   nWritten = BZ2_bzWrite ( &bzerror, b, buf, nBuf );
2542 +   if (bzerror == BZ_IO_ERROR) @{
2543 +      BZ2_bzWriteClose ( &bzerror, b );
2544 +      /* handle error */
2545 +   @}
2546 +@}
2547 +
2548 +BZ2_bzWriteClose ( &bzerror, b );
2549 +if (bzerror == BZ_IO_ERROR) @{
2550 +   /* handle error */
2551 +@}
2552 +@end example
2553 +And to read from a compressed file:
2554 +@example
2555 +FILE*   f;
2556 +BZFILE* b;
2557 +int     nBuf;
2558 +char    buf[ /* whatever size you like */ ];
2559 +int     bzerror;
2560 +int     nWritten;
2561 +
2562 +f = fopen ( "myfile.bz2", "r" );
2563 +if (!f) @{
2564 +   /* handle error */
2565 +@}
2566 +b = BZ2_bzReadOpen ( &bzerror, f, 0, NULL, 0 );
2567 +if (bzerror != BZ_OK) @{
2568 +   BZ2_bzReadClose ( &bzerror, b );
2569 +   /* handle error */
2570 +@}
2571 +
2572 +bzerror = BZ_OK;
2573 +while (bzerror == BZ_OK && /* arbitrary other conditions */) @{
2574 +   nBuf = BZ2_bzRead ( &bzerror, b, buf, /* size of buf */ );
2575 +   if (bzerror == BZ_OK) @{
2576 +      /* do something with buf[0 .. nBuf-1] */
2577 +   @}
2578 +@}
2579 +if (bzerror != BZ_STREAM_END) @{
2580 +   BZ2_bzReadClose ( &bzerror, b );
2581 +   /* handle error */
2582 +@} else @{
2583 +   BZ2_bzReadClose ( &bzerror );
2584 +@}
2585 +@end example
2586 +
2587 +
2588 +
2589 +@section Utility functions
2590 +@subsection @code{BZ2_bzBuffToBuffCompress}
2591 +@example
2592 +   int BZ2_bzBuffToBuffCompress( char*         dest,
2593 +                                 unsigned int* destLen,
2594 +                                 char*         source,
2595 +                                 unsigned int  sourceLen,
2596 +                                 int           blockSize100k,
2597 +                                 int           verbosity,
2598 +                                 int           workFactor );
2599 +@end example
2600 +Attempts to compress the data in @code{source[0 .. sourceLen-1]}
2601 +into the destination buffer, @code{dest[0 .. *destLen-1]}.
2602 +If the destination buffer is big enough, @code{*destLen} is
2603 +set to the size of the compressed data, and @code{BZ_OK} is
2604 +returned.  If the compressed data won't fit, @code{*destLen}
2605 +is unchanged, and @code{BZ_OUTBUFF_FULL} is returned.
2606 +
2607 +Compression in this manner is a one-shot event, done with a single call
2608 +to this function.  The resulting compressed data is a complete
2609 +@code{bzip2} format data stream.  There is no mechanism for making
2610 +additional calls to provide extra input data.  If you want that kind of
2611 +mechanism, use the low-level interface.
2612 +
2613 +For the meaning of parameters @code{blockSize100k}, @code{verbosity}
2614 +and @code{workFactor}, @* see @code{BZ2_bzCompressInit}.
2615 +
2616 +To guarantee that the compressed data will fit in its buffer, allocate
2617 +an output buffer of size 1% larger than the uncompressed data, plus
2618 +six hundred extra bytes.
2619 +
2620 +@code{BZ2_bzBuffToBuffDecompress} will not write data at or
2621 +beyond @code{dest[*destLen]}, even in case of buffer overflow.
2622 +
2623 +Possible return values:
2624 +@display
2625 +      @code{BZ_CONFIG_ERROR}
2626 +         if the library has been mis-compiled
2627 +      @code{BZ_PARAM_ERROR}
2628 +         if @code{dest} is @code{NULL} or @code{destLen} is @code{NULL}
2629 +         or @code{blockSize100k < 1} or @code{blockSize100k > 9}
2630 +         or @code{verbosity < 0} or @code{verbosity > 4}
2631 +         or @code{workFactor < 0} or @code{workFactor > 250}
2632 +      @code{BZ_MEM_ERROR}
2633 +         if insufficient memory is available
2634 +      @code{BZ_OUTBUFF_FULL}
2635 +         if the size of the compressed data exceeds @code{*destLen}
2636 +      @code{BZ_OK}
2637 +         otherwise
2638 +@end display
2639 +
2640 +
2641 +
2642 +@subsection @code{BZ2_bzBuffToBuffDecompress}
2643 +@example
2644 +   int BZ2_bzBuffToBuffDecompress ( char*         dest,
2645 +                                    unsigned int* destLen,
2646 +                                    char*         source,
2647 +                                    unsigned int  sourceLen,
2648 +                                    int           small,
2649 +                                    int           verbosity );
2650 +@end example
2651 +Attempts to decompress the data in @code{source[0 .. sourceLen-1]}
2652 +into the destination buffer, @code{dest[0 .. *destLen-1]}.
2653 +If the destination buffer is big enough, @code{*destLen} is
2654 +set to the size of the uncompressed data, and @code{BZ_OK} is
2655 +returned.  If the compressed data won't fit, @code{*destLen}
2656 +is unchanged, and @code{BZ_OUTBUFF_FULL} is returned.
2657 +
2658 +@code{source} is assumed to hold a complete @code{bzip2} format
2659 +data stream.  @* @code{BZ2_bzBuffToBuffDecompress} tries to decompress
2660 +the entirety of the stream into the output buffer.
2661 +
2662 +For the meaning of parameters @code{small} and @code{verbosity},
2663 +see @code{BZ2_bzDecompressInit}.
2664 +
2665 +Because the compression ratio of the compressed data cannot be known in
2666 +advance, there is no easy way to guarantee that the output buffer will
2667 +be big enough.  You may of course make arrangements in your code to
2668 +record the size of the uncompressed data, but such a mechanism is beyond
2669 +the scope of this library.
2670 +
2671 +@code{BZ2_bzBuffToBuffDecompress} will not write data at or
2672 +beyond @code{dest[*destLen]}, even in case of buffer overflow.
2673 +
2674 +Possible return values:
2675 +@display
2676 +      @code{BZ_CONFIG_ERROR}
2677 +         if the library has been mis-compiled
2678 +      @code{BZ_PARAM_ERROR}
2679 +         if @code{dest} is @code{NULL} or @code{destLen} is @code{NULL}
2680 +         or @code{small != 0 && small != 1}
2681 +         or @code{verbosity < 0} or @code{verbosity > 4}
2682 +      @code{BZ_MEM_ERROR}
2683 +         if insufficient memory is available
2684 +      @code{BZ_OUTBUFF_FULL}
2685 +         if the size of the compressed data exceeds @code{*destLen}
2686 +      @code{BZ_DATA_ERROR}
2687 +         if a data integrity error was detected in the compressed data
2688 +      @code{BZ_DATA_ERROR_MAGIC}
2689 +         if the compressed data doesn't begin with the right magic bytes
2690 +      @code{BZ_UNEXPECTED_EOF}
2691 +         if the compressed data ends unexpectedly
2692 +      @code{BZ_OK}
2693 +         otherwise
2694 +@end display
2695 +
2696 +
2697 +
2698 +@section @code{zlib} compatibility functions
2699 +Yoshioka Tsuneo has contributed some functions to
2700 +give better @code{zlib} compatibility.  These functions are
2701 +@code{BZ2_bzopen}, @code{BZ2_bzread}, @code{BZ2_bzwrite}, @code{BZ2_bzflush},
2702 +@code{BZ2_bzclose},
2703 +@code{BZ2_bzerror} and @code{BZ2_bzlibVersion}.
2704 +These functions are not (yet) officially part of
2705 +the library.  If they break, you get to keep all the pieces.
2706 +Nevertheless, I think they work ok.
2707 +@example
2708 +typedef void BZFILE;
2709 +
2710 +const char * BZ2_bzlibVersion ( void );
2711 +@end example
2712 +Returns a string indicating the library version.
2713 +@example
2714 +BZFILE * BZ2_bzopen  ( const char *path, const char *mode );
2715 +BZFILE * BZ2_bzdopen ( int        fd,    const char *mode );
2716 +@end example
2717 +Opens a @code{.bz2} file for reading or writing, using either its name
2718 +or a pre-existing file descriptor.
2719 +Analogous to @code{fopen} and @code{fdopen}.
2720 +@example
2721 +int BZ2_bzread  ( BZFILE* b, void* buf, int len );
2722 +int BZ2_bzwrite ( BZFILE* b, void* buf, int len );
2723 +@end example
2724 +Reads/writes data from/to a previously opened @code{BZFILE}.
2725 +Analogous to @code{fread} and @code{fwrite}.
2726 +@example
2727 +int  BZ2_bzflush ( BZFILE* b );
2728 +void BZ2_bzclose ( BZFILE* b );
2729 +@end example
2730 +Flushes/closes a @code{BZFILE}.  @code{BZ2_bzflush} doesn't actually do
2731 +anything.  Analogous to @code{fflush} and @code{fclose}.
2732 +
2733 +@example
2734 +const char * BZ2_bzerror ( BZFILE *b, int *errnum )
2735 +@end example
2736 +Returns a string describing the more recent error status of
2737 +@code{b}, and also sets @code{*errnum} to its numerical value.
2738 +
2739 +
2740 +@section Using the library in a @code{stdio}-free environment
2741 +
2742 +@subsection Getting rid of @code{stdio}
2743 +
2744 +In a deeply embedded application, you might want to use just
2745 +the memory-to-memory functions.  You can do this conveniently
2746 +by compiling the library with preprocessor symbol @code{BZ_NO_STDIO}
2747 +defined.  Doing this gives you a library containing only the following
2748 +eight functions:
2749 +
2750 +@code{BZ2_bzCompressInit}, @code{BZ2_bzCompress}, @code{BZ2_bzCompressEnd} @*
2751 +@code{BZ2_bzDecompressInit}, @code{BZ2_bzDecompress}, @code{BZ2_bzDecompressEnd} @*
2752 +@code{BZ2_bzBuffToBuffCompress}, @code{BZ2_bzBuffToBuffDecompress}
2753 +
2754 +When compiled like this, all functions will ignore @code{verbosity}
2755 +settings.
2756 +
2757 +@subsection Critical error handling
2758 +@code{libbzip2} contains a number of internal assertion checks which
2759 +should, needless to say, never be activated.  Nevertheless, if an
2760 +assertion should fail, behaviour depends on whether or not the library
2761 +was compiled with @code{BZ_NO_STDIO} set.
2762 +
2763 +For a normal compile, an assertion failure yields the message
2764 +@example
2765 +   bzip2/libbzip2: internal error number N.
2766 +   This is a bug in bzip2/libbzip2, 1.0.2, 30-Dec-2001.
2767 +   Please report it to me at: jseward@@acm.org.  If this happened
2768 +   when you were using some program which uses libbzip2 as a
2769 +   component, you should also report this bug to the author(s)
2770 +   of that program.  Please make an effort to report this bug;
2771 +   timely and accurate bug reports eventually lead to higher
2772 +   quality software.  Thanks.  Julian Seward, 30 December 2001.
2773 +@end example
2774 +where @code{N} is some error code number.  If @code{N == 1007}, it also
2775 +prints some extra text advising the reader that unreliable memory is
2776 +often associated with internal error 1007.  (This is a
2777 +frequently-observed-phenomenon with versions 1.0.0/1.0.1).
2778 +
2779 +@code{exit(3)} is then called.
2780 +
2781 +For a @code{stdio}-free library, assertion failures result
2782 +in a call to a function declared as:
2783 +@example
2784 +   extern void bz_internal_error ( int errcode );
2785 +@end example
2786 +The relevant code is passed as a parameter.  You should supply
2787 +such a function.
2788 +
2789 +In either case, once an assertion failure has occurred, any
2790 +@code{bz_stream} records involved can be regarded as invalid.
2791 +You should not attempt to resume normal operation with them.
2792 +
2793 +You may, of course, change critical error handling to suit
2794 +your needs.  As I said above, critical errors indicate bugs
2795 +in the library and should not occur.  All "normal" error
2796 +situations are indicated via error return codes from functions,
2797 +and can be recovered from.
2798 +
2799 +
2800 +@section Making a Windows DLL
2801 +Everything related to Windows has been contributed by Yoshioka Tsuneo
2802 +@* (@code{QWF00133@@niftyserve.or.jp} /
2803 +@code{tsuneo-y@@is.aist-nara.ac.jp}), so you should send your queries to
2804 +him (but perhaps Cc: me, @code{jseward@@acm.org}).
2805 +
2806 +My vague understanding of what to do is: using Visual C++ 5.0,
2807 +open the project file @code{libbz2.dsp}, and build.  That's all.
2808 +
2809 +If you can't
2810 +open the project file for some reason, make a new one, naming these files:
2811 +@code{blocksort.c}, @code{bzlib.c}, @code{compress.c},
2812 +@code{crctable.c}, @code{decompress.c}, @code{huffman.c}, @*
2813 +@code{randtable.c} and @code{libbz2.def}.  You will also need
2814 +to name the header files @code{bzlib.h} and @code{bzlib_private.h}.
2815 +
2816 +If you don't use VC++, you may need to define the proprocessor symbol
2817 +@code{_WIN32}.
2818 +
2819 +Finally, @code{dlltest.c} is a sample program using the DLL.  It has a
2820 +project file, @code{dlltest.dsp}.
2821 +
2822 +If you just want a makefile for Visual C, have a look at
2823 +@code{makefile.msc}.
2824 +
2825 +Be aware that if you compile @code{bzip2} itself on Win32, you must set
2826 +@code{BZ_UNIX} to 0 and @code{BZ_LCCWIN32} to 1, in the file
2827 +@code{bzip2.c}, before compiling.  Otherwise the resulting binary won't
2828 +work correctly.
2829 +
2830 +I haven't tried any of this stuff myself, but it all looks plausible.
2831 +
2832 +
2833 +
2834 +@chapter Miscellanea
2835 +
2836 +These are just some random thoughts of mine.  Your mileage may
2837 +vary.
2838 +
2839 +@section Limitations of the compressed file format
2840 +@code{bzip2-1.0}, @code{0.9.5} and @code{0.9.0}
2841 +use exactly the same file format as the previous
2842 +version, @code{bzip2-0.1}.  This decision was made in the interests of
2843 +stability.  Creating yet another incompatible compressed file format
2844 +would create further confusion and disruption for users.
2845 +
2846 +Nevertheless, this is not a painless decision.  Development
2847 +work since the release of @code{bzip2-0.1} in August 1997
2848 +has shown complexities in the file format which slow down
2849 +decompression and, in retrospect, are unnecessary.  These are:
2850 +@itemize @bullet
2851 +@item The run-length encoder, which is the first of the
2852 +      compression transformations, is entirely irrelevant.
2853 +      The original purpose was to protect the sorting algorithm
2854 +      from the very worst case input: a string of repeated
2855 +      symbols.  But algorithm steps Q6a and Q6b in the original
2856 +      Burrows-Wheeler technical report (SRC-124) show how
2857 +      repeats can be handled without difficulty in block
2858 +      sorting.
2859 +@item The randomisation mechanism doesn't really need to be
2860 +      there.  Udi Manber and Gene Myers published a suffix
2861 +      array construction algorithm a few years back, which
2862 +      can be employed to sort any block, no matter how
2863 +      repetitive, in O(N log N) time.  Subsequent work by
2864 +      Kunihiko Sadakane has produced a derivative O(N (log N)^2)
2865 +      algorithm which usually outperforms the Manber-Myers
2866 +      algorithm.
2867 +
2868 +      I could have changed to Sadakane's algorithm, but I find
2869 +      it to be slower than @code{bzip2}'s existing algorithm for
2870 +      most inputs, and the randomisation mechanism protects
2871 +      adequately against bad cases.  I didn't think it was
2872 +      a good tradeoff to make.  Partly this is due to the fact
2873 +      that I was not flooded with email complaints about
2874 +      @code{bzip2-0.1}'s performance on repetitive data, so
2875 +      perhaps it isn't a problem for real inputs.
2876 +
2877 +      Probably the best long-term solution,
2878 +      and the one I have incorporated into 0.9.5 and above,
2879 +      is to use the existing sorting
2880 +      algorithm initially, and fall back to a O(N (log N)^2)
2881 +      algorithm if the standard algorithm gets into difficulties.
2882 +@item The compressed file format was never designed to be
2883 +      handled by a library, and I have had to jump though
2884 +      some hoops to produce an efficient implementation of
2885 +      decompression.  It's a bit hairy.  Try passing
2886 +      @code{decompress.c} through the C preprocessor
2887 +      and you'll see what I mean.  Much of this complexity
2888 +      could have been avoided if the compressed size of
2889 +      each block of data was recorded in the data stream.
2890 +@item An Adler-32 checksum, rather than a CRC32 checksum,
2891 +      would be faster to compute.
2892 +@end itemize
2893 +It would be fair to say that the @code{bzip2} format was frozen
2894 +before I properly and fully understood the performance
2895 +consequences of doing so.
2896 +
2897 +Improvements which I was able to incorporate into
2898 +0.9.0, despite using the same file format, are:
2899 +@itemize @bullet
2900 +@item Single array implementation of the inverse BWT.  This
2901 +      significantly speeds up decompression, presumably
2902 +      because it reduces the number of cache misses.
2903 +@item Faster inverse MTF transform for large MTF values.  The
2904 +      new implementation is based on the notion of sliding blocks
2905 +      of values.
2906 +@item @code{bzip2-0.9.0} now reads and writes files with @code{fread}
2907 +      and @code{fwrite}; version 0.1 used @code{putc} and @code{getc}.
2908 +      Duh!  Well, you live and learn.
2909 +
2910 +@end itemize
2911 +Further ahead, it would be nice
2912 +to be able to do random access into files.  This will
2913 +require some careful design of compressed file formats.
2914 +
2915 +
2916 +
2917 +@section Portability issues
2918 +After some consideration, I have decided not to use
2919 +GNU @code{autoconf} to configure 0.9.5 or 1.0.
2920 +
2921 +@code{autoconf}, admirable and wonderful though it is,
2922 +mainly assists with portability problems between Unix-like
2923 +platforms.  But @code{bzip2} doesn't have much in the way
2924 +of portability problems on Unix; most of the difficulties appear
2925 +when porting to the Mac, or to Microsoft's operating systems.
2926 +@code{autoconf} doesn't help in those cases, and brings in a
2927 +whole load of new complexity.
2928 +
2929 +Most people should be able to compile the library and program
2930 +under Unix straight out-of-the-box, so to speak, especially
2931 +if you have a version of GNU C available.
2932 +
2933 +There are a couple of @code{__inline__} directives in the code.  GNU C
2934 +(@code{gcc}) should be able to handle them.  If you're not using
2935 +GNU C, your C compiler shouldn't see them at all.
2936 +If your compiler does, for some reason, see them and doesn't
2937 +like them, just @code{#define} @code{__inline__} to be @code{/* */}.  One
2938 +easy way to do this is to compile with the flag @code{-D__inline__=},
2939 +which should be understood by most Unix compilers.
2940 +
2941 +If you still have difficulties, try compiling with the macro
2942 +@code{BZ_STRICT_ANSI} defined.  This should enable you to build the
2943 +library in a strictly ANSI compliant environment.  Building the program
2944 +itself like this is dangerous and not supported, since you remove
2945 +@code{bzip2}'s checks against compressing directories, symbolic links,
2946 +devices, and other not-really-a-file entities.  This could cause
2947 +filesystem corruption!
2948 +
2949 +One other thing: if you create a @code{bzip2} binary for public
2950 +distribution, please try and link it statically (@code{gcc -s}).  This
2951 +avoids all sorts of library-version issues that others may encounter
2952 +later on.
2953 +
2954 +If you build @code{bzip2} on Win32, you must set @code{BZ_UNIX} to 0 and
2955 +@code{BZ_LCCWIN32} to 1, in the file @code{bzip2.c}, before compiling.
2956 +Otherwise the resulting binary won't work correctly.
2957 +
2958 +
2959 +
2960 +@section Reporting bugs
2961 +I tried pretty hard to make sure @code{bzip2} is
2962 +bug free, both by design and by testing.  Hopefully
2963 +you'll never need to read this section for real.
2964 +
2965 +Nevertheless, if @code{bzip2} dies with a segmentation
2966 +fault, a bus error or an internal assertion failure, it
2967 +will ask you to email me a bug report.  Experience with
2968 +version 0.1 shows that almost all these problems can
2969 +be traced to either compiler bugs or hardware problems.
2970 +@itemize @bullet
2971 +@item
2972 +Recompile the program with no optimisation, and see if it
2973 +works.  And/or try a different compiler.
2974 +I heard all sorts of stories about various flavours
2975 +of GNU C (and other compilers) generating bad code for
2976 +@code{bzip2}, and I've run across two such examples myself.
2977 +
2978 +2.7.X versions of GNU C are known to generate bad code from
2979 +time to time, at high optimisation levels.
2980 +If you get problems, try using the flags
2981 +@code{-O2} @code{-fomit-frame-pointer} @code{-fno-strength-reduce}.
2982 +You should specifically @emph{not} use @code{-funroll-loops}.
2983 +
2984 +You may notice that the Makefile runs six tests as part of
2985 +the build process.  If the program passes all of these, it's
2986 +a pretty good (but not 100%) indication that the compiler has
2987 +done its job correctly.
2988 +@item
2989 +If @code{bzip2} crashes randomly, and the crashes are not
2990 +repeatable, you may have a flaky memory subsystem.  @code{bzip2}
2991 +really hammers your memory hierarchy, and if it's a bit marginal,
2992 +you may get these problems.  Ditto if your disk or I/O subsystem
2993 +is slowly failing.  Yup, this really does happen.
2994 +
2995 +Try using a different machine of the same type, and see if
2996 +you can repeat the problem.
2997 +@item This isn't really a bug, but ... If @code{bzip2} tells
2998 +you your file is corrupted on decompression, and you
2999 +obtained the file via FTP, there is a possibility that you
3000 +forgot to tell FTP to do a binary mode transfer.  That absolutely
3001 +will cause the file to be non-decompressible.  You'll have to transfer
3002 +it again.
3003 +@end itemize
3004 +
3005 +If you've incorporated @code{libbzip2} into your own program
3006 +and are getting problems, please, please, please, check that the
3007 +parameters you are passing in calls to the library, are
3008 +correct, and in accordance with what the documentation says
3009 +is allowable.  I have tried to make the library robust against
3010 +such problems, but I'm sure I haven't succeeded.
3011 +
3012 +Finally, if the above comments don't help, you'll have to send
3013 +me a bug report.  Now, it's just amazing how many people will
3014 +send me a bug report saying something like
3015 +@display
3016 +   bzip2 crashed with segmentation fault on my machine
3017 +@end display
3018 +and absolutely nothing else.  Needless to say, a such a report
3019 +is @emph{totally, utterly, completely and comprehensively 100% useless;
3020 +a waste of your time, my time, and net bandwidth}.
3021 +With no details at all, there's no way I can possibly begin
3022 +to figure out what the problem is.
3023 +
3024 +The rules of the game are: facts, facts, facts.  Don't omit
3025 +them because "oh, they won't be relevant".  At the bare
3026 +minimum:
3027 +@display
3028 +   Machine type.  Operating system version.
3029 +   Exact version of @code{bzip2} (do @code{bzip2 -V}).
3030 +   Exact version of the compiler used.
3031 +   Flags passed to the compiler.
3032 +@end display
3033 +However, the most important single thing that will help me is
3034 +the file that you were trying to compress or decompress at the
3035 +time the problem happened.  Without that, my ability to do anything
3036 +more than speculate about the cause, is limited.
3037 +
3038 +Please remember that I connect to the Internet with a modem, so
3039 +you should contact me before mailing me huge files.
3040 +
3041 +
3042 +@section Did you get the right package?
3043 +
3044 +@code{bzip2} is a resource hog.  It soaks up large amounts of CPU cycles
3045 +and memory.  Also, it gives very large latencies.  In the worst case, you
3046 +can feed many megabytes of uncompressed data into the library before
3047 +getting any compressed output, so this probably rules out applications
3048 +requiring interactive behaviour.
3049 +
3050 +These aren't faults of my implementation, I hope, but more
3051 +an intrinsic property of the Burrows-Wheeler transform (unfortunately).
3052 +Maybe this isn't what you want.
3053 +
3054 +If you want a compressor and/or library which is faster, uses less
3055 +memory but gets pretty good compression, and has minimal latency,
3056 +consider Jean-loup
3057 +Gailly's and Mark Adler's work, @code{zlib-1.1.3} and
3058 +@code{gzip-1.2.4}.  Look for them at
3059 +
3060 +@code{http://www.zlib.org} and
3061 +@code{http://www.gzip.org} respectively.
3062 +
3063 +For something faster and lighter still, you might try Markus F X J
3064 +Oberhumer's @code{LZO} real-time compression/decompression library, at
3065 +@* @code{http://wildsau.idv.uni-linz.ac.at/mfx/lzo.html}.
3066 +
3067 +If you want to use the @code{bzip2} algorithms to compress small blocks
3068 +of data, 64k bytes or smaller, for example on an on-the-fly disk
3069 +compressor, you'd be well advised not to use this library.  Instead,
3070 +I've made a special library tuned for that kind of use.  It's part of
3071 +@code{e2compr-0.40}, an on-the-fly disk compressor for the Linux
3072 +@code{ext2} filesystem.  Look at
3073 +@code{http://www.netspace.net.au/~reiter/e2compr}.
3074 +
3075 +
3076 +
3077 +@section Testing
3078 +
3079 +A record of the tests I've done.
3080 +
3081 +First, some data sets:
3082 +@itemize @bullet
3083 +@item B: a directory containing 6001 files, one for every length in the
3084 +      range 0 to 6000 bytes.  The files contain random lowercase
3085 +      letters.  18.7 megabytes.
3086 +@item H: my home directory tree.  Documents, source code, mail files,
3087 +      compressed data.  H contains B, and also a directory of
3088 +      files designed as boundary cases for the sorting; mostly very
3089 +      repetitive, nasty files.  565 megabytes.
3090 +@item A: directory tree holding various applications built from source:
3091 +      @code{egcs}, @code{gcc-2.8.1}, KDE, GTK, Octave, etc.
3092 +      2200 megabytes.
3093 +@end itemize
3094 +The tests conducted are as follows.  Each test means compressing
3095 +(a copy of) each file in the data set, decompressing it and
3096 +comparing it against the original.
3097 +
3098 +First, a bunch of tests with block sizes and internal buffer
3099 +sizes set very small,
3100 +to detect any problems with the
3101 +blocking and buffering mechanisms.
3102 +This required modifying the source code so as to try to
3103 +break it.
3104 +@enumerate
3105 +@item Data set H, with
3106 +      buffer size of 1 byte, and block size of 23 bytes.
3107 +@item Data set B, buffer sizes 1 byte, block size 1 byte.
3108 +@item As (2) but small-mode decompression.
3109 +@item As (2) with block size 2 bytes.
3110 +@item As (2) with block size 3 bytes.
3111 +@item As (2) with block size 4 bytes.
3112 +@item As (2) with block size 5 bytes.
3113 +@item As (2) with block size 6 bytes and small-mode decompression.
3114 +@item H with buffer size of 1 byte, but normal block
3115 +      size (up to 900000 bytes).
3116 +@end enumerate
3117 +Then some tests with unmodified source code.
3118 +@enumerate
3119 +@item H, all settings normal.
3120 +@item As (1), with small-mode decompress.
3121 +@item H, compress with flag @code{-1}.
3122 +@item H, compress with flag @code{-s}, decompress with flag @code{-s}.
3123 +@item Forwards compatibility: H, @code{bzip2-0.1pl2} compressing,
3124 +      @code{bzip2-0.9.5} decompressing, all settings normal.
3125 +@item Backwards compatibility:  H, @code{bzip2-0.9.5} compressing,
3126 +      @code{bzip2-0.1pl2} decompressing, all settings normal.
3127 +@item Bigger tests: A, all settings normal.
3128 +@item As (7), using the fallback (Sadakane-like) sorting algorithm.
3129 +@item As (8), compress with flag @code{-1}, decompress with flag
3130 +      @code{-s}.
3131 +@item H, using the fallback sorting algorithm.
3132 +@item Forwards compatibility: A, @code{bzip2-0.1pl2} compressing,
3133 +      @code{bzip2-0.9.5} decompressing, all settings normal.
3134 +@item Backwards compatibility:  A, @code{bzip2-0.9.5} compressing,
3135 +      @code{bzip2-0.1pl2} decompressing, all settings normal.
3136 +@item Misc test: about 400 megabytes of @code{.tar} files with
3137 +      @code{bzip2} compiled with Checker (a memory access error
3138 +       detector, like Purify).
3139 +@item Misc tests to make sure it builds and runs ok on non-Linux/x86
3140 +      platforms.
3141 +@end enumerate
3142 +These tests were conducted on a 225 MHz IDT WinChip machine, running
3143 +Linux 2.0.36.  They represent nearly a week of continuous computation.
3144 +All tests completed successfully.
3145 +
3146 +
3147 +@section Further reading
3148 +@code{bzip2} is not research work, in the sense that it doesn't present
3149 +any new ideas.  Rather, it's an engineering exercise based on existing
3150 +ideas.
3151 +
3152 +Four documents describe essentially all the ideas behind @code{bzip2}:
3153 +@example
3154 +Michael Burrows and D. J. Wheeler:
3155 +  "A block-sorting lossless data compression algorithm"
3156 +   10th May 1994.
3157 +   Digital SRC Research Report 124.
3158 +   ftp://ftp.digital.com/pub/DEC/SRC/research-reports/SRC-124.ps.gz
3159 +   If you have trouble finding it, try searching at the
3160 +   New Zealand Digital Library, http://www.nzdl.org.
3161 +
3162 +Daniel S. Hirschberg and Debra A. LeLewer
3163 +  "Efficient Decoding of Prefix Codes"
3164 +   Communications of the ACM, April 1990, Vol 33, Number 4.
3165 +   You might be able to get an electronic copy of this
3166 +      from the ACM Digital Library.
3167 +
3168 +David J. Wheeler
3169 +   Program bred3.c and accompanying document bred3.ps.
3170 +   This contains the idea behind the multi-table Huffman
3171 +   coding scheme.
3172 +   ftp://ftp.cl.cam.ac.uk/users/djw3/
3173 +
3174 +Jon L. Bentley and Robert Sedgewick
3175 +  "Fast Algorithms for Sorting and Searching Strings"
3176 +   Available from Sedgewick's web page,
3177 +   www.cs.princeton.edu/~rs
3178 +@end example
3179 +The following paper gives valuable additional insights into the
3180 +algorithm, but is not immediately the basis of any code
3181 +used in bzip2.
3182 +@example
3183 +Peter Fenwick:
3184 +   Block Sorting Text Compression
3185 +   Proceedings of the 19th Australasian Computer Science Conference,
3186 +     Melbourne, Australia.  Jan 31 - Feb 2, 1996.
3187 +   ftp://ftp.cs.auckland.ac.nz/pub/peter-f/ACSC96paper.ps
3188 +@end example
3189 +Kunihiko Sadakane's sorting algorithm, mentioned above,
3190 +is available from:
3191 +@example
3192 +http://naomi.is.s.u-tokyo.ac.jp/~sada/papers/Sada98b.ps.gz
3193 +@end example
3194 +The Manber-Myers suffix array construction
3195 +algorithm is described in a paper
3196 +available from:
3197 +@example
3198 +http://www.cs.arizona.edu/people/gene/PAPERS/suffix.ps
3199 +@end example
3200 +Finally, the following paper documents some recent investigations
3201 +I made into the performance of sorting algorithms:
3202 +@example
3203 +Julian Seward:
3204 +   On the Performance of BWT Sorting Algorithms
3205 +   Proceedings of the IEEE Data Compression Conference 2000
3206 +     Snowbird, Utah.  28-30 March 2000.
3207 +@end example
3208 +
3209 +
3210 +@contents
3211 +
3212 +@bye
3213 +
3214 diff -Nru bzip2-1.0.2/doc/bzip2recover.1 bzip2-1.0.2.new/doc/bzip2recover.1
3215 --- bzip2-1.0.2/doc/bzip2recover.1      Thu Jan  1 01:00:00 1970
3216 +++ bzip2-1.0.2.new/doc/bzip2recover.1  Fri Feb  1 04:19:11 2002
3217 @@ -0,0 +1 @@
3218 +.so bzip2.1
3219 \ No newline at end of file
3220 diff -Nru bzip2-1.0.2/doc/bzmore.1 bzip2-1.0.2.new/doc/bzmore.1
3221 --- bzip2-1.0.2/doc/bzmore.1    Thu Jan  1 01:00:00 1970
3222 +++ bzip2-1.0.2.new/doc/bzmore.1        Sun Dec 30 03:12:35 2001
3223 @@ -0,0 +1,152 @@
3224 +.\"Shamelessly copied from zmore.1 by Philippe Troin <phil@fifi.org>
3225 +.\"for Debian GNU/Linux
3226 +.TH BZMORE 1
3227 +.SH NAME
3228 +bzmore, bzless \- file perusal filter for crt viewing of bzip2 compressed text
3229 +.SH SYNOPSIS
3230 +.B bzmore
3231 +[ name ...  ]
3232 +.br
3233 +.B bzless
3234 +[ name ...  ]
3235 +.SH NOTE
3236 +In the following description,
3237 +.I bzless
3238 +and
3239 +.I less
3240 +can be used interchangeably with
3241 +.I bzmore
3242 +and
3243 +.I more.
3244 +.SH DESCRIPTION
3245 +.I  Bzmore
3246 +is a filter which allows examination of compressed or plain text files
3247 +one screenful at a time on a soft-copy terminal.
3248 +.I bzmore
3249 +works on files compressed with
3250 +.I bzip2
3251 +and also on uncompressed files.
3252 +If a file does not exist,
3253 +.I bzmore
3254 +looks for a file of the same name with the addition of a .bz2 suffix.
3255 +.PP
3256 +.I Bzmore
3257 +normally pauses after each screenful, printing --More--
3258 +at the bottom of the screen.
3259 +If the user then types a carriage return, one more line is displayed.
3260 +If the user hits a space,
3261 +another screenful is displayed.  Other possibilities are enumerated later.
3262 +.PP
3263 +.I Bzmore
3264 +looks in the file
3265 +.I /etc/termcap
3266 +to determine terminal characteristics,
3267 +and to determine the default window size.
3268 +On a terminal capable of displaying 24 lines,
3269 +the default window size is 22 lines.
3270 +Other sequences which may be typed when
3271 +.I bzmore
3272 +pauses, and their effects, are as follows (\fIi\fP is an optional integer
3273 +argument, defaulting to 1) :
3274 +.PP
3275 +.IP \fIi\|\fP<space>
3276 +display
3277 +.I i
3278 +more lines, (or another screenful if no argument is given)
3279 +.PP
3280 +.IP ^D
3281 +display 11 more lines (a ``scroll'').
3282 +If
3283 +.I i
3284 +is given, then the scroll size is set to \fIi\|\fP.
3285 +.PP
3286 +.IP d
3287 +same as ^D (control-D)
3288 +.PP
3289 +.IP \fIi\|\fPz
3290 +same as typing a space except that \fIi\|\fP, if present, becomes the new
3291 +window size.  Note that the window size reverts back to the default at the
3292 +end of the current file.
3293 +.PP
3294 +.IP \fIi\|\fPs
3295 +skip \fIi\|\fP lines and print a screenful of lines
3296 +.PP
3297 +.IP \fIi\|\fPf
3298 +skip \fIi\fP screenfuls and print a screenful of lines
3299 +.PP
3300 +.IP "q or Q"
3301 +quit reading the current file; go on to the next (if any)
3302 +.PP
3303 +.IP "e or q"
3304 +When the prompt --More--(Next file:
3305 +.IR file )
3306 +is printed, this command causes bzmore to exit.
3307 +.PP
3308 +.IP s
3309 +When the prompt --More--(Next file:
3310 +.IR file )
3311 +is printed, this command causes bzmore to skip the next file and continue.
3312 +.PP
3313 +.IP =
3314 +Display the current line number.
3315 +.PP
3316 +.IP \fIi\|\fP/expr
3317 +search for the \fIi\|\fP-th occurrence of the regular expression \fIexpr.\fP
3318 +If the pattern is not found,
3319 +.I bzmore
3320 +goes on to the next file (if any).
3321 +Otherwise, a screenful is displayed, starting two lines before the place
3322 +where the expression was found.
3323 +The user's erase and kill characters may be used to edit the regular
3324 +expression.
3325 +Erasing back past the first column cancels the search command.
3326 +.PP
3327 +.IP \fIi\|\fPn
3328 +search for the \fIi\|\fP-th occurrence of the last regular expression entered.
3329 +.PP
3330 +.IP !command
3331 +invoke a shell with \fIcommand\|\fP.
3332 +The character `!' in "command" are replaced with the
3333 +previous shell command.  The sequence "\\!" is replaced by "!".
3334 +.PP
3335 +.IP ":q or :Q"
3336 +quit reading the current file; go on to the next (if any)
3337 +(same as q or Q).
3338 +.PP
3339 +.IP .
3340 +(dot) repeat the previous command.
3341 +.PP
3342 +The commands take effect immediately, i.e., it is not necessary to
3343 +type a carriage return.
3344 +Up to the time when the command character itself is given,
3345 +the user may hit the line kill character to cancel the numerical
3346 +argument being formed.
3347 +In addition, the user may hit the erase character to redisplay the
3348 +--More-- message.
3349 +.PP
3350 +At any time when output is being sent to the terminal, the user can
3351 +hit the quit key (normally control\-\\).
3352 +.I Bzmore
3353 +will stop sending output, and will display the usual --More--
3354 +prompt.
3355 +The user may then enter one of the above commands in the normal manner.
3356 +Unfortunately, some output is lost when this is done, due to the
3357 +fact that any characters waiting in the terminal's output queue
3358 +are flushed when the quit signal occurs.
3359 +.PP
3360 +The terminal is set to
3361 +.I noecho
3362 +mode by this program so that the output can be continuous.
3363 +What you type will thus not show on your terminal, except for the / and !
3364 +commands.
3365 +.PP
3366 +If the standard output is not a teletype, then
3367 +.I bzmore
3368 +acts just like
3369 +.I bzcat,
3370 +except that a header is printed before each file.
3371 +.SH FILES
3372 +.DT
3373 +/etc/termcap           Terminal data base
3374 +.SH "SEE ALSO"
3375 +more(1), less(1), bzip2(1), bzdiff(1), bzgrep(1)
3376 diff -Nru bzip2-1.0.2/doc/pl/Makefile.am bzip2-1.0.2.new/doc/pl/Makefile.am
3377 --- bzip2-1.0.2/doc/pl/Makefile.am      Thu Jan  1 01:00:00 1970
3378 +++ bzip2-1.0.2.new/doc/pl/Makefile.am  Fri Feb  1 04:19:11 2002
3379 @@ -0,0 +1,4 @@
3380 +
3381 +mandir = @mandir@/pl
3382 +man_MANS = bzip2.1 bunzip2.1 bzcat.1 bzip2recover.1
3383 +
3384 diff -Nru bzip2-1.0.2/doc/pl/bunzip2.1 bzip2-1.0.2.new/doc/pl/bunzip2.1
3385 --- bzip2-1.0.2/doc/pl/bunzip2.1        Thu Jan  1 01:00:00 1970
3386 +++ bzip2-1.0.2.new/doc/pl/bunzip2.1    Fri Feb  1 04:19:11 2002
3387 @@ -0,0 +1 @@
3388 +.so bzip2.1
3389 \ No newline at end of file
3390 diff -Nru bzip2-1.0.2/doc/pl/bzcat.1 bzip2-1.0.2.new/doc/pl/bzcat.1
3391 --- bzip2-1.0.2/doc/pl/bzcat.1  Thu Jan  1 01:00:00 1970
3392 +++ bzip2-1.0.2.new/doc/pl/bzcat.1      Fri Feb  1 04:19:11 2002
3393 @@ -0,0 +1 @@
3394 +.so bzip2.1
3395 \ No newline at end of file
3396 diff -Nru bzip2-1.0.2/doc/pl/bzip2.1 bzip2-1.0.2.new/doc/pl/bzip2.1
3397 --- bzip2-1.0.2/doc/pl/bzip2.1  Thu Jan  1 01:00:00 1970
3398 +++ bzip2-1.0.2.new/doc/pl/bzip2.1      Fri Feb  1 04:19:11 2002
3399 @@ -0,0 +1,384 @@
3400 +.\" T³umaczenie  Maciej Wojciechowski     wojciech@staszic.waw.pl
3401 +.PU
3402 +.TH bzip2 1 "" "" "wersja 1.0"
3403 +.SH NAZWA
3404 +bzip2, bunzip2 \- sortuj±cy bloki kompresor/dekompresor plików, v1.0
3405 +.br
3406 +bzcat \- dekompresuje pliki na standardowe wyj¶cie
3407 +.br
3408 +bzip2recover \- odzyskuje dane ze zniszczonych archiwów bzip2
3409 +.SH SK£ADNIA
3410 +.ll +8
3411 +.B bzip2
3412 +.RB [ \-cdfkqstvzVL123456789 ]
3413 +.RI [ nazwy_plików \&...]
3414 +.ll -8
3415 +.br
3416 +.B bunzip2
3417 +.RB [ \-fkvsVL ]
3418 +.RI [ nazwy_plików \&...]
3419 +.br
3420 +.B bzcat
3421 +.RB [ \-s ]
3422 +.RI [ nazwy_plików \&...]
3423 +.br
3424 +.B bzip2recover
3425 +.I nazwa_pliku
3426 +.SH OPIS
3427 +.I bzip2
3428 +kompresuje pliki u¿ywaj±c algorytmu sortowania bloków Burrowsa-Wheelera i
3429 +kodu Huffmana. Kompresja jest generalnie sporo lepsza od konwencjonalnych
3430 +kompresorów opartych o metodê LZ77/LZ78, i jest porównywalna z
3431 +osi±gniêciami statystycznych kompresorów z rodziny PPM.
3432 +
3433 +Opcje linii poleceñ s± w wiêkszo¶ci bardzo podobne do tych z
3434 +.IR "GNU gzip" ,
3435 +ale nie s± identyczne.
3436 +
3437 +.I bzip2
3438 +oczekuje listy plików towarzysz±cych parametrom linii poleceñ. Ka¿dy plik jest
3439 +zastêpowany przez swoj± skompresowan± wersjê, z nazw±
3440 +"oryginalny_plik.bz2". Ka¿dy skompresowany plik ma ten sam czas modyfikacji,
3441 +uprawnienia i, je¶li to mo¿liwe, w³a¶ciciela co orygina³, po to, aby te
3442 +ustawienia mog³y zostaæ odtworzone podczas dekompresji. Utrzymywanie nazwy
3443 +plików nie jest do koñca dok³adne w tym sensie, ¿e nie ma mo¿liwo¶ci
3444 +przetrzymywania daty, uprawnieñ, w³a¶ciciela i nazw plików na systemach, na
3445 +których brakuje tych mo¿liwo¶ci lub maj± ograniczenia co do d³ugo¶ci nazwy,
3446 +tak np. jak MS-DOS.
3447 +
3448 +.I bzip2
3449 +i
3450 +.I bunzip2
3451 +standardowo nie nadpisuj± istniej±cych ju¿ plików. Je¶li chcesz aby to
3452 +robi³y, musisz u¿yæ parametru \-f.
3453 +
3454 +Je¶li nie podano ¿adnej nazwy pliku,
3455 +.I bzip2
3456 +kompresuje ze standardowego wej¶cia na standardowe wyj¶cie. Odmiawia wówczas
3457 +wypisywania skompresowanego wyj¶cie na terminal, gdy¿ by³oby to
3458 +ca³kiem niezrozumia³e i przez to bez wiêkszego sensu.
3459 +
3460 +.I bunzip2
3461 +(lub
3462 +.IR bzip2 \-d )
3463 +dekompresuje wszystkie podane pliki. Pliki, które nie by³y
3464 +utworzone przez
3465 +.I bzip2
3466 +zostan± wykryte i zignorowane, a na ekranie pojawi siê komunikat
3467 +ostrzegawczy.
3468 +.I bzip2
3469 +próbuje zgadn±æ nazwê dla dekompresowanego pliku w nastêpuj±cy sposób:
3470 +.nf
3471 +       nazwa_pliku.bz2    staje siê   nazwa_pliku
3472 +       nazwa_pliku.bz     staje siê   nazwa_pliku
3473 +       nazwa_pliku.tbz2   staje siê   nazwa_pliku.tar
3474 +       nazwa_pliku.tbz    staje siê   nazwa_pliku.tar
3475 +       inna_nazwa        staje siê   inna_nazwa.out
3476 +.fi
3477 +Je¶li plik nie ma jednego z nastêpuj±cych rozpoznawalnych rozszerzeñ,
3478 +.IR .bz2 ,
3479 +.IR .bz ,
3480 +.I .tbz2
3481 +lub
3482 +.IR .tbz ,
3483 +to
3484 +.I bzip2
3485 +napisze, ¿e nie mo¿e zgadn±æ nazwy pierwotnego pliku, i u¿yje
3486 +oryginalnej nazwy z dodanym rozszerzeniem
3487 +.IR .out .
3488 +
3489 +Tak jak kompresja, nie posiadaj±ca ¿adnych plików, powoduje kompresjê ze
3490 +standardowego wej¶cia na standardowe wyj¶cie.
3491 +
3492 +.I bunzip2
3493 +poprawnie zdekompresuje plik, który jest po³aczeniem dwóch lub wiêcej
3494 +skompresowanych plików. Rezultatem jest po³±czony odpowiedni
3495 +nieskompresowany plik. Obs³ugiwane jest równie¿ sprawdzanie spójno¶ci
3496 +(\-t) po³±czonych skompresowanych plików.
3497 +
3498 +Mo¿esz równie¿ kompresowaæ lub dekompresowaæ pliki na standardowe wyj¶cie
3499 +u¿ywaj±c parametru \-c. W ten w³a¶nie sposób mo¿na przeprowadzaæ kompresjê
3500 +wielu plików równocze¶nie.
3501 +Powsta³e wyniki s± przesy³ane sekwencyjnie na standardowe wyj¶cie.
3502 +W ten sposób kompresja wielu plików generuje strumieñ
3503 +zawieraj±cy reprezentacje kilku skompresowanych plików. Taki strumieñ mo¿e
3504 +byæ zdekompresowany poprawnie tylko przez
3505 +.I bzip2
3506 +w wersji 0.9.0 lub pó¼niejszej. Wcze¶niejsze wersje
3507 +.I bzip2
3508 +zatrzymaj± siê po zdekmpresowaniu pierwszego pliku w strumieniu.
3509 +
3510 +.I bzcat
3511 +(lub
3512 +.I bzip2 -dc)
3513 +dekompresuje wszystkie wybrane pliki na standardowe wyj¶cie.
3514 +
3515 +.I bzip2
3516 +czyta argumenty ze zmiennych ¶rodowiskowych
3517 +.I BZIP2
3518 +i
3519 +.I BZIP,
3520 +w podanej kolejno¶ci, i przetwarza je przed jakimikolwiek argumentami
3521 +przeczytanymi z linii poleceñ. To dobra metoda na specyfikowanie
3522 +standardowych ustawieñ.
3523 +
3524 +Kompresja stosowana jest zawsze, nawet je¶li skompresowany plik jest
3525 +nieznaczniej wiêkszy od pliku oryginalnego. Pliki mniejsze ni¿ mniej wiêcej
3526 +sto bajtów staj± siê wiêksze, poniewa¿ mechanizm kompresji ma sta³y
3527 +nag³ówek wynosz±cy oko³o 50 bajtów. Przypadkowe dane (w³±czaj±c wyj¶cie
3528 +wiêkszo¶ci kompresorów plików) d± kodowane na mniej wiêcej 8.05 bitu na
3529 +bajt, daj±c zysk oko³o 0.5%.
3530 +
3531 +Jako samosprawdzenie dla twojej ochrony
3532 +.I bzip2
3533 +u¿ywa 32-bitowego CRC aby upewniæ siê, ¿e zdekompresowana wersja pliku jest
3534 +identyczna z oryginaln±. To strze¿e przed stratami w skompresowanych danych
3535 +i przed niewykrytymi b³êdami w
3536 +.I bzip2
3537 +(na szczê¶cie bardzo rzadkich). Mo¿liwo¶æ niewykrycia utraty danych
3538 +jest mikroskopijna, mniej wiêcej jedna szansa na cztery biliony dla ka¿dego
3539 +pliku. Uwa¿aj jednak, gdy¿ sprawdzenie jest dokonywane przed dekompresj±,
3540 +wiêc dowiesz siê tylko tego, ¿e co¶ jest nie w porz±dku. Nie pomo¿e ci to odzyskaæ
3541 +oryginalnych nieskompresowanych danych. Mo¿esz u¿yæ
3542 +.I bzip2recover
3543 +aby spróbowaæ odzyskaæ dane z uszkodzonych plików.
3544 +
3545 +Zwracane warto¶ci: 0 dla normalnego wyj¶cia, 1 dla problemów technicznych
3546 +(plik nie znaleziony, niew³a¶ciwy parametr, b³±d wyj¶cia/wyj¶cia itp.), 2 dla
3547 +zasygnalizowania b³êdu skompresowanego pliku, 3 dla wewnêtrznego b³êdu (np.
3548 +bug), który zmusi³ \fIbzip2\fP do przerwania.
3549 +
3550 +.SH OPCJE
3551 +.TP
3552 +.B \-c --stdout
3553 +Kompresuje lub dekompresuje na standardowe wyj¶cie.
3554 +.TP
3555 +.B \-d --decompress
3556 +Wymusza dekompresjê.
3557 +.IR bzip2 ,
3558 +.I bunzip2
3559 +i
3560 +.I bzcat
3561 +s± tak naprawdê tymi samymi programami i decyzja jakie akcje bêd± wykonane
3562 +jest wykonywana na podstawie nazwy jaka zosta³a u¿yta. Ten parametr ma wy¿szy
3563 +priorytet i wymusza na \fIbzip2\fP dekompresjê.
3564 +.TP
3565 +.B \-z --compress
3566 +Podobne do \-d: wymusza kompresjê, bez wzglêdu na sposób wywo³ania.
3567 +.TP
3568 +.B \-t --test
3569 +Sprawdza integralno¶æ wybranego pliku(ów), ale nie dekompresuje ich. Wymusza
3570 +to próbn± dekompresjê i mówi, jaki jest rezultat.
3571 +.TP
3572 +.B \-f --force
3573 +Wymusza zastêpowanie plików wyj¶ciowych. Normalnie, \fIbzip2\fP nie
3574 +zastêpuje istniej±cych plików wyj¶ciowych. Wymusza równie¿ na \fIbzip2\fP
3575 +³amanie dowi±zañ twardych, czego normalnie nie robi.
3576 +.TP
3577 +.B \-k --keep
3578 +Zatrzymaj (nie kasuj) pliki wej¶ciowe przy kompresji lub dekompresji.
3579 +.TP
3580 +.B \-s --small
3581 +Zredukuj u¿ycie pamiêci na kompresjê, dekompresjê i testowanie. Pliki s±
3582 +dekompresowane i testowane przy u¿yciu zmodyfikowanego algorytmu, który
3583 +potrzebuje tylko 2.5 bajtu na blok bajtów. Oznacza to, ¿e ka¿dy plik mo¿e
3584 +byæ zdekompresowany przy u¿yciu oko³o 2300k pamiêci, jednak trac±c oko³o po³owê
3585 +normalnej szybko¶ci.
3586 +
3587 +Podczas kompresji, \-s wybiera bloki wielko¶ci 200k, których limity
3588 +pamiêci wynosz± mniej wiêcej tyle samo, w zamian za jako¶æ kompresji. W
3589 +skrócie, je¶li twój komputer ma ma³o pamiêci (8 megabajtów lub mniej),
3590 +u¿ywaj opcji \-s do wszystkiego. Zobacz \fBzarz±dzanie pamiêci±\fP poni¿ej.
3591 +.TP
3592 +.B \-q --quiet
3593 +Wy³±cza wszystkie nieistotne komunikaty ostrzegawcze.
3594 +Nie s± eliminowane komunikaty dotycz±ce b³êdów wej¶cia/wyj¶cia i innych
3595 +zdarzeñ krytycznych.
3596 +.TP
3597 +.B \-v --verbose
3598 +Tryb gadatliwy -- pokazuje stopieñ kompresji dla ka¿dego pliku. Nastêpne
3599 +\fB\-v\fP zwiêkszaj± stopieñ gadatliwo¶ci, powoduj±c wy¶wietlanie du¿ej
3600 +ilo¶ci informacji, przydatnych g³ównie przy diagnostyce.
3601 +.TP
3602 +.B \-L --license -V --version
3603 +Wy¶wietla wersjê programu i warunki licencji.
3604 +.TP
3605 +.B \-1 to \-9
3606 +Ustawia wielko¶æ bloku na 100 k, 200 k .. 900 k przy kompresji. Nie ma
3607 +¿adnego znaczenia przy dekompresji. Zobacz \fBzarz±dzanie pamiêci±\fP
3608 +poni¿ej.
3609 +.TP
3610 +.B \--
3611 +Traktuje wszystkie nastêpuj±ce po nim argumenty jako nazwy plików, nawet je¶li
3612 +zaczynaj± siê one od my¶lnika. Mo¿esz wiêc kompresowaæ i dekompresowaæ
3613 +pliki, których nazwa zaczyna siê od my¶lnika, na przyk³ad: bzip2 \--
3614 +\-mój_plik.
3615 +.TP
3616 +.B \--repetitive-fast --repetitive-best
3617 +Te parametry nie maj± znaczenia w wersjach 0.9.5 i wy¿szych. Umo¿liwia³y one
3618 +pewn± infantyln± kontrolê nad zachowaniem algorytmu sortuj±cego we
3619 +wcze¶niejszych wersjach, co by³o czasami u¿yteczne. Wersje 0.9.5 i wy¿sze
3620 +maj± usprawniony algorytm, który powoduje bezu¿yteczno¶æ tej funkcji.
3621 +
3622 +.SH ZARZ¡DZANIE PAMIÊCI¡
3623 +.I bzip2
3624 +kompresuje du¿e pliki w blokach. Rozmiar bloku ma wp³yw zarówno na stopieñ
3625 +osi±ganej kompresji, jak równie¿ na ilo¶æ pamiêci potrzebnej do kompresji
3626 +i dekompresji. Parametry od \-1 do \-9 wybieraj± rozmiar bloku odpowiednio
3627 +od 100,000 bajtów a¿ do 900,000 bajtów (standardowo). W czasie dekompresji,
3628 +rozmiar bloku u¿ytego do kompresji jest odczytywany z nag³ówku pliku
3629 +skompresowanego i
3630 +.I bunzip2
3631 +sam zajmuje odpowiedni± do dekompresji ilo¶æ pamiêci. Poniewa¿ rozmiar
3632 +bloków jest przetrzymywany w pliku skompresowanym, parametry od \-1 do \-9
3633 +nie maj± przy dekompresji ¿adnego znaczenia.
3634 +
3635 +Wymagania kompresji i dekompresji w bajtach, mog± byæ wyliczone przez:
3636 +
3637 +       Kompresja :   400k + ( 8 x rozmiar bloku )
3638 +
3639 +       Dekompresja :  100k + ( 4 x rozmiar bloku ) lub
3640 +                      100k + ( 2.5 x rozmiar bloku )
3641 +
3642 +Wiêksze bloki daj± du¿e zmniejszenie zwrotów marginalnych. Wiêkszo¶æ
3643 +kompresji pochodzi z pierwszych stu lub dwustu kilobajtów rozmiaru bloku.
3644 +Warto o tym pamiêtaæ u¿ywaj±c \fIbzip2\fP na wolnych
3645 +komputerach. Warto równie¿ podkre¶liæ, ¿e rozmiar pamiêci potrzebnej do
3646 +dekompresji jest wybierany poprzez ustawienie odpowiedniej
3647 +wielko¶ci bloku przy kompresji.
3648 +
3649 +Dla plików skompresowanych standardowym blokiem wielko¶ci 900k,
3650 +\fIbunzip2\fP bêdzie wymaga³ oko³o 3700 kilobajtów do dekompresji. Aby
3651 +umo¿liwiæ dekompresjê na komputerze wyposa¿onym jedynie w 4 megabajty
3652 +pamiêci, \fIbunzip2\fP ma opcjê, która mo¿e zmniejszyæ wymagania prawie do
3653 +po³owy, tzn. oko³o 2300 kilobajtów. Prêdko¶æ dekompresji jest równie¿ bardzo
3654 +zmiejszona, wiêc u¿ywaj tej opcji tylko wtedy, kiedy jest to konieczne. Tym
3655 +parametrem jest -s.
3656 +
3657 +Generalnie, próbuj i u¿ywaj najwiêkszych rozmiarów bloków, je¶li ilo¶æ
3658 +pamiêci ci na to pozwala. Prêdko¶æ kompresji i dekompresji w zasadzie nie
3659 +zale¿y od wielko¶ci u¿ytego bloku.
3660 +
3661 +Inna wa¿na rzecz dotyczy plików, które mieszcz± siê w pojedyñczym bloku --
3662 +oznacza to wiêkszo¶æ plików na które siê natkniesz u¿ywaj±c du¿ych bloków.
3663 +Rozmiar realny pamiêci zabieranej jest proporcjonalny do wielko¶ci pliku,
3664 +je¶li plik jest mniejszy ni¿ blok. Na przyk³ad, kompresja pliku o
3665 +wielko¶ci 20,000 bajtów z parametrem -9 wymusi na kompresorze odnalezienie
3666 +7600 k pamiêci, ale zajêcie tylko 400k + 20000 * 8 = 560 kilobajtów z
3667 +tego. Podobnie, dekompresor odnajdzie 3700k, ale zajmie tylko 100k + 20000
3668 +* 4 = 180 kilobajtów.
3669 +
3670 +Tu jest tabela, która podsumowuje maksymalne u¿ycie pamiêci dla ró¿nych
3671 +rozmiarów bloków. Podano te¿ ca³kowity rozmiar skompresowanych 14
3672 +plików tekstowych (Calgary Text Compressione Corpus) zajmuj±cych razem
3673 +3,141,622 bajtów. Ta kolumna daje pewne pojêcie o tym, jaki wp³yw na
3674 +kompresjê ma wielko¶æ bloków. Ta tabela uzmys³awia równie¿ przewagê u¿ycia
3675 +wiêkszych bloków dla wiêkszych plików, poniewa¿ "Corpus" jest zdominowany
3676 +przez mniejsze pliki.
3677 +.nf
3678 +             U¿ycie       U¿ycie        U¿ycie         Corpus
3679 +   Parametr kompresji   dekompresji   dekompresji -s    Size
3680 +
3681 +     -1      1200k         500k          350k          914704
3682 +     -2      2000k         900k          600k          877703
3683 +     -3      2800k         1300k         850k          860338
3684 +     -4      3600k         1700k        1100k          846899
3685 +     -5      4400k         2100k        1350k          845160
3686 +     -6      5200k         2500k        1600k          838626
3687 +     -7      6100k         2900k        1850k          834096
3688 +     -8      6800k         3300k        2100k          828642
3689 +     -9      7600k         3700k        2350k          828642
3690 +.fi
3691 +.SH ODZYSKIWANIE DANYCH ZE ZNISZCZONYCH PLIKÓW BZIP2
3692 +.I bzip2
3693 +kompresuje pliki w blokach, zazwyczaj 900 kilbajtowych. Ka¿dy blok jest
3694 +trzymany osobno. Je¶li b³êdy transmisji lub no¶nika uszkodz± plik
3695 +wieloblokowy .bz2, mo¿liwe jest odtworzenie danych zawartych w
3696 +niezniszczonych blokach pliku.
3697 +
3698 +Ka¿dy blok jest reprezentowany przez 48-bitowy wzorzec, który umo¿liwia
3699 +znajdowanie przyporz±dkowañ bloków z rozs±dn± pewno¶ci±. Ka¿dy blok
3700 +ma równie¿ swój 32-bitowy CRC, wiêc bloki uszkodzone mog± byæ ³atwo
3701 +odseparowane od poprawnych.
3702 +
3703 +.I bzip2recover
3704 +jest oddzielnym programem, którego zadaniem jest poszukiwanie bloków w
3705 +plikach .bz2 i zapisywanie ich do w³asnego pliku .bz2. Mo¿esz potem u¿yæ
3706 +\fIbzip2\fP \-t aby sprawdziæ spójno¶æ wyj¶ciowego pliku i zdekompresowaæ
3707 +te, które nie s± uszkodzone.
3708 +
3709 +.I bzip2recover
3710 +pobiera pojedynczy argument, nazwê uszkodzonego pliku, i tworzy pewn± liczbê
3711 +plików "rec0001plik.bz2", "rec0002plik.bz2", itd., przetrzymuj±ce odzyskane
3712 +bloki. Wyj¶ciowe nazwy plików s± tak tworzone, aby ³atwo by³o potem u¿ywaæ
3713 +ich razem za pomoc± gwiazdek -- na przyk³ad, "bzip2 -dc rec*plik.bz2 >
3714 +odzyskany_plik" -- wylistuje pliki we w³a¶ciwej kolejno¶ci.
3715 +
3716 +.I bzip2recover
3717 +powinien byæ u¿ywany najczê¶ciej z du¿ymi plikami .bz2, jako i¿ one
3718 +zawieraj± najczê¶ciej du¿o bloków. Jest czystym bezsensem u¿ywaæ go na
3719 +uszkodzonym jedno-blokowym pliku, poniewa¿ uszkodzony blok nie mo¿e byæ
3720 +odzyskany. Je¶li chcesz zminimalizowaæ jakiekolwiek mo¿liwe straty danych
3721 +poprzez no¶nik lub transmisjê, powiniene¶ zastanowiæ siê nad u¿yciem
3722 +mniejszych bloków.
3723 +
3724 +.SH OPISY WYNIKÓW
3725 +Etap sortuj±cy kompresji ³±czy razem podobne ci±gi znaków w pliku. Przez
3726 +to, pliki zawieraj±ce bardzo d³ugie ci±gi powtarzaj±cych siê symboli, jak
3727 +"aabaabaabaab ..." (powtórzone kilkaset razy) mog± byæ kompresowane wolniej
3728 +ni¿ normalnie. Wersje 0.9.5 i wy¿sze zachowuj± siê du¿o lepiej w tej
3729 +sytuacji ni¿ wersje poprzednie. Ró¿nica stopnia kompresji pomiêdzy
3730 +najgorszym i najlepszym przypadkiem kompresji wynosi oko³o 10:1. Dla
3731 +wcze¶niejszych wersji by³o to nawet oko³o 100:1. Je¶li chcesz, mo¿esz u¿yæ
3732 +parametru \-vvvv aby monitorowaæ postêpy bardzo szczegó³owo.
3733 +
3734 +Prêdko¶æ dekompresji nie jest zmieniana przez to zjawisko.
3735 +
3736 +.I bzip2
3737 +zazwyczaj rezerwuje kilka megabajtów pamiêci do dzia³ania a
3738 +potem wykorzystuje j± w sposób zupe³nie przypadkowy.
3739 +Oznacza to, ¿e zarówno prêdko¶æ kompresji jak i dekompresji jest w
3740 +du¿ej czê¶ci zale¿na od prêdko¶ci, z jak± twój komputer mo¿e naprawiaæ braki
3741 +bufora podrêcznego. Z tego powodu, wprowadzone zosta³y ma³e zmiany kody aby
3742 +zmniejszyæ straty, które da³y nieproporcjonalnie du¿y wzrost osi±gniêæ.
3743 +My¶lê, ¿e
3744 +.I bzip2
3745 +bêdzie dzia³a³ najlepiej na komputerach z du¿ymi buforami podrêcznymi.
3746 +
3747 +.SH ZAKAMARKI
3748 +Wiadomo¶ci o b³êdach wej¶cia/wyj¶cia nie s± a¿ tak pomocne, jak mog³yby byæ.
3749 +.I bzip2
3750 +stara siê wykryæ b³±d wej¶cia/wyj¶cia i wyj¶æ "czysto", ale
3751 +szczegó³y tego, jaki to problem mog± byæ czasami bardzo myl±ce.
3752 +
3753 +Ta strona podrêcznika odnosi siê do wersji 1.0 programu \fIbzip2\fP.
3754 +Skompresowane pliki utworzone przez tê wersjê s± kompatybilne zarówno z
3755 +w przód jak i wstecznie z poprzednimi publicznymi wydaniami,
3756 +wersjami 0.1pl2, 0.9.0 i 0.9.5 ale z ma³ymi wyj±tkami: 0.9.0 i wy¿sze potrafi±
3757 +poprawnie dekompresowaæ wiele skompresowanych plików z³±czonych w jeden.
3758 +0.1pl2 nie potrafi tego; zatrzyma siê ju¿ po dekompresji pierwszego pliku w
3759 +strumieniu.
3760 +
3761 +.I bzip2recover
3762 +u¿ywa 32-bitowych liczb do reprezentacji pozycji bitu w skompresowanym
3763 +pliku, wiêc nie mo¿e przetwarzaæ skompresowanych plików d³u¿szych ni¿ 512
3764 +megabajtów. Mo¿na to ³atwo naprawiæ.
3765 +
3766 +.SH AUTOR
3767 +Julian Seward, jseward@acm.org.
3768 +
3769 +http://www.muraroa.demon.co.uk
3770 +http://sourceware.cygnus.com/bzip2
3771 +
3772 +Idee zawarte w \fIbzip2\fP s± podzielone (przynajmniej) pomiêdzy
3773 +nastepuj±cy ludzi: Michael Burrows i David Wheeler (transformacja
3774 +sortuj±c± bloki), David Wheeler (znów, koder Huffmana), Peter Fenwick
3775 +(struktura kodowania modelu w oryginalnym \fIbzip2\fP, i wiele
3776 +udoskonaleñ), i Alistair Moffar, Radford Neal i Ian Witten (arytmetyczny
3777 +koder w oryginalnym \fIbzip2\fP). Jestem im bardzo wdziêczny za ich pomoc,
3778 +wsparcie i porady. Zobacz stronê manuala w ¼ród³owej dystrybucji po
3779 +wska¼niki do ¼róde³ dokumentacji. Christian von Roques zachêci³ mnie do
3780 +wymy¶lenia szybszego algorytmu sortuj±cego, po to ¿eby przyspieszyæ
3781 +kompresjê. Bela Lubkin zachêci³a mnie do polepszenia najgorszych wyników
3782 +kompresji. Wiele ludzi przys³a³o ³atki, pomog³o w ró¿nych problemach,
3783 +po¿yczy³o komputerów, da³o rady i by³o ogólnie pomocnych.
3784 diff -Nru bzip2-1.0.2/doc/pl/bzip2recover.1 bzip2-1.0.2.new/doc/pl/bzip2recover.1
3785 --- bzip2-1.0.2/doc/pl/bzip2recover.1   Thu Jan  1 01:00:00 1970
3786 +++ bzip2-1.0.2.new/doc/pl/bzip2recover.1       Fri Feb  1 04:19:11 2002
3787 @@ -0,0 +1 @@
3788 +.so bzip2.1
3789 \ No newline at end of file
3790 diff -Nru bzip2-1.0.2/huffman.c bzip2-1.0.2.new/huffman.c
3791 --- bzip2-1.0.2/huffman.c       Sun Dec 30 03:19:17 2001
3792 +++ bzip2-1.0.2.new/huffman.c   Fri Feb  1 04:19:11 2002
3793 @@ -58,6 +58,10 @@
3794    For more information on these sources, see the manual.
3795  --*/
3796
3797 +#ifdef HAVE_CONFIG_H
3798 +#include <config.h>
3799 +#endif
3800 +
3801
3802  #include "bzlib_private.h"
3803
3804 diff -Nru bzip2-1.0.2/makefile.msc bzip2-1.0.2.new/makefile.msc
3805 --- bzip2-1.0.2/makefile.msc    Wed Jan  2 05:02:33 2002
3806 +++ bzip2-1.0.2.new/makefile.msc        Thu Jan  1 01:00:00 1970
3807 @@ -1,63 +0,0 @@
3808 -# Makefile for Microsoft Visual C++ 6.0\r
3809 -# usage: nmake -f makefile.msc\r
3810 -# K.M. Syring (syring@gsf.de)\r
3811 -# Fixed up by JRS for bzip2-0.9.5d release.\r
3812 -\r
3813 -CC=cl\r
3814 -CFLAGS= -DWIN32 -MD -Ox -D_FILE_OFFSET_BITS=64 -nologo\r
3815 -\r
3816 -OBJS= blocksort.obj  \\r
3817 -      huffman.obj    \\r
3818 -      crctable.obj   \\r
3819 -      randtable.obj  \\r
3820 -      compress.obj   \\r
3821 -      decompress.obj \\r
3822 -      bzlib.obj\r
3823 -\r
3824 -all: lib bzip2 test\r
3825 -\r
3826 -bzip2: lib\r
3827 -       $(CC) $(CFLAGS) -o bzip2 bzip2.c libbz2.lib setargv.obj\r
3828 -       $(CC) $(CFLAGS) -o bzip2recover bzip2recover.c\r
3829 -\r
3830 -lib: $(OBJS)\r
3831 -       lib /out:libbz2.lib $(OBJS)\r
3832 -\r
3833 -test: bzip2\r
3834 -       type words1\r
3835 -       .\\bzip2 -1  < sample1.ref > sample1.rb2\r
3836 -       .\\bzip2 -2  < sample2.ref > sample2.rb2\r
3837 -       .\\bzip2 -3  < sample3.ref > sample3.rb2\r
3838 -       .\\bzip2 -d  < sample1.bz2 > sample1.tst\r
3839 -       .\\bzip2 -d  < sample2.bz2 > sample2.tst\r
3840 -       .\\bzip2 -ds < sample3.bz2 > sample3.tst\r
3841 -       @echo All six of the fc's should find no differences.\r
3842 -       @echo If fc finds an error on sample3.bz2, this could be\r
3843 -       @echo because WinZip's 'TAR file smart CR/LF conversion'\r
3844 -       @echo is too clever for its own good.  Disable this option.\r
3845 -       @echo The correct size for sample3.ref is 120,244.  If it\r
3846 -       @echo is 150,251, WinZip has messed it up.\r
3847 -       fc sample1.bz2 sample1.rb2 \r
3848 -       fc sample2.bz2 sample2.rb2\r
3849 -       fc sample3.bz2 sample3.rb2\r
3850 -       fc sample1.tst sample1.ref\r
3851 -       fc sample2.tst sample2.ref\r
3852 -       fc sample3.tst sample3.ref\r
3853 -\r
3854 -\r
3855 -\r
3856 -clean: \r
3857 -       del *.obj\r
3858 -       del libbz2.lib \r
3859 -       del bzip2.exe\r
3860 -       del bzip2recover.exe\r
3861 -       del sample1.rb2 \r
3862 -       del sample2.rb2 \r
3863 -       del sample3.rb2\r
3864 -       del sample1.tst \r
3865 -       del sample2.tst\r
3866 -       del sample3.tst\r
3867 -\r
3868 -.c.obj: \r
3869 -       $(CC) $(CFLAGS) -c $*.c -o $*.obj\r
3870 -\r
3871 diff -Nru bzip2-1.0.2/randtable.c bzip2-1.0.2.new/randtable.c
3872 --- bzip2-1.0.2/randtable.c     Sun Dec 30 03:19:04 2001
3873 +++ bzip2-1.0.2.new/randtable.c Fri Feb  1 04:19:18 2002
3874 @@ -58,6 +58,10 @@
3875    For more information on these sources, see the manual.
3876  --*/
3877
3878 +#ifdef HAVE_CONFIG_H
3879 +#include <config.h>
3880 +#endif
3881 +
3882
3883  #include "bzlib_private.h"
3884
3885 diff -Nru bzip2-1.0.2/spewG.c bzip2-1.0.2.new/spewG.c
3886 --- bzip2-1.0.2/spewG.c Thu Apr  6 01:24:45 2000
3887 +++ bzip2-1.0.2.new/spewG.c     Fri Feb  1 04:19:18 2002
3888 @@ -9,7 +9,10 @@
3889     (but is otherwise harmless).
3890  */
3891
3892 -#define _FILE_OFFSET_BITS 64
3893 +#ifdef HAVE_CONFIG_H
3894 +#include <config.h>
3895 +#endif
3896 +
3897
3898  #include <stdio.h>
3899  #include <stdlib.h>
3900 diff -Nru bzip2-1.0.2/stamp-h.in bzip2-1.0.2.new/stamp-h.in
3901 --- bzip2-1.0.2/stamp-h.in      Thu Jan  1 01:00:00 1970
3902 +++ bzip2-1.0.2.new/stamp-h.in  Fri Feb  1 04:19:18 2002
3903 @@ -0,0 +1 @@
3904 +timestamp
3905 diff -Nru bzip2-1.0.2/unzcrash.c bzip2-1.0.2.new/unzcrash.c
3906 --- bzip2-1.0.2/unzcrash.c      Thu Apr  6 01:24:46 2000
3907 +++ bzip2-1.0.2.new/unzcrash.c  Fri Feb  1 04:19:18 2002
3908 @@ -13,6 +13,12 @@
3909     many hours.
3910  */
3911
3912 +#ifdef HAVE_CONFIG_H
3913 +#include <config.h>
3914 +#endif
3915 +
3916 +
3917 +
3918  #include <stdio.h>
3919  #include <assert.h>
3920  #include "bzlib.h"