diff -Nru bzip2-1.0.2/Makefile.am bzip2-1.0.2.new/Makefile.am --- bzip2-1.0.2/Makefile.am Thu Jan 1 01:00:00 1970 +++ bzip2-1.0.2.new/Makefile.am Fri Feb 1 04:19:09 2002 @@ -0,0 +1,31 @@ +SUBDIRS = doc + +bin_PROGRAMS = bzip2 bzip2recover +bzip2_SOURCES = bzip2.c + +bzip2_LDADD = libbz2.la +bzip2recover_SOURCES = bzip2recover.c +lib_LTLIBRARIES = libbz2.la +libbz2_la_SOURCES = \ + blocksort.c \ + huffman.c \ + crctable.c \ + randtable.c \ + compress.c \ + decompress.c \ + bzlib.c \ + bzlib.h \ + bzlib_private.h + +libbz2_la_LDFLAGS = -version-info 1:0:0 +include_HEADERS = bzlib.h bzlib_private.h + +bin_SCRIPTS = bzless bzgrep + +EXTRA_DIST = README README.COMPILATION.PROBLEMS \ + Y2K_INFO libbz2.def libbz2.dsp \ + sample1.bz2 sample1.ref sample2.bz2 sample2.ref sample3.bz2 sample3.ref + +install-exec-hook: + $(LN_S) -f bzip2 $(DESTDIR)$(bindir)/bunzip2 + $(LN_S) -f bzip2 $(DESTDIR)$(bindir)/bzcat diff -Nru bzip2-1.0.2/bzdiff.1 bzip2-1.0.2.new/bzdiff.1 --- bzip2-1.0.2/bzdiff.1 Sun Dec 30 03:12:35 2001 +++ bzip2-1.0.2.new/bzdiff.1 Thu Jan 1 01:00:00 1970 @@ -1,47 +0,0 @@ -\"Shamelessly copied from zmore.1 by Philippe Troin -\"for Debian GNU/Linux -.TH BZDIFF 1 -.SH NAME -bzcmp, bzdiff \- compare bzip2 compressed files -.SH SYNOPSIS -.B bzcmp -[ cmp_options ] file1 -[ file2 ] -.br -.B bzdiff -[ diff_options ] file1 -[ file2 ] -.SH DESCRIPTION -.I Bzcmp -and -.I bzdiff -are used to invoke the -.I cmp -or the -.I diff -program on bzip2 compressed files. All options specified are passed -directly to -.I cmp -or -.IR diff "." -If only 1 file is specified, then the files compared are -.I file1 -and an uncompressed -.IR file1 ".bz2." -If two files are specified, then they are uncompressed if necessary and fed to -.I cmp -or -.IR diff "." -The exit status from -.I cmp -or -.I diff -is preserved. -.SH "SEE ALSO" -cmp(1), diff(1), bzmore(1), bzless(1), bzgrep(1), bzip2(1) -.SH BUGS -Messages from the -.I cmp -or -.I diff -programs refer to temporary filenames instead of those specified. diff -Nru bzip2-1.0.2/bzgrep.1 bzip2-1.0.2.new/bzgrep.1 --- bzip2-1.0.2/bzgrep.1 Sun Dec 30 03:12:35 2001 +++ bzip2-1.0.2.new/bzgrep.1 Thu Jan 1 01:00:00 1970 @@ -1,56 +0,0 @@ -\"Shamelessly copied from zmore.1 by Philippe Troin -\"for Debian GNU/Linux -.TH BZGREP 1 -.SH NAME -bzgrep, bzfgrep, bzegrep \- search possibly bzip2 compressed files for a regular expression -.SH SYNOPSIS -.B bzgrep -[ grep_options ] -.BI [\ -e\ ] " pattern" -.IR filename ".\|.\|." -.br -.B bzegrep -[ egrep_options ] -.BI [\ -e\ ] " pattern" -.IR filename ".\|.\|." -.br -.B bzfgrep -[ fgrep_options ] -.BI [\ -e\ ] " pattern" -.IR filename ".\|.\|." -.SH DESCRIPTION -.IR Bzgrep -is used to invoke the -.I grep -on bzip2-compressed files. All options specified are passed directly to -.I grep. -If no file is specified, then the standard input is decompressed -if necessary and fed to grep. -Otherwise the given files are uncompressed if necessary and fed to -.I grep. -.PP -If -.I bzgrep -is invoked as -.I bzegrep -or -.I bzfgrep -then -.I egrep -or -.I fgrep -is used instead of -.I grep. -If the GREP environment variable is set, -.I bzgrep -uses it as the -.I grep -program to be invoked. For example: - - for sh: GREP=fgrep bzgrep string files - for csh: (setenv GREP fgrep; bzgrep string files) -.SH AUTHOR -Charles Levert (charles@comm.polymtl.ca). Adapted to bzip2 by Philippe -Troin for Debian GNU/Linux. -.SH "SEE ALSO" -grep(1), egrep(1), fgrep(1), bzdiff(1), bzmore(1), bzless(1), bzip2(1) diff -Nru bzip2-1.0.2/bzless bzip2-1.0.2.new/bzless --- bzip2-1.0.2/bzless Thu Jan 1 01:00:00 1970 +++ bzip2-1.0.2.new/bzless Fri Feb 1 04:19:11 2002 @@ -0,0 +1,2 @@ +#!/bin/sh +%{_bindir}/bunzip2 -c "$@" | %{_bindir}/less diff -Nru bzip2-1.0.2/bzmore.1 bzip2-1.0.2.new/bzmore.1 --- bzip2-1.0.2/bzmore.1 Sun Dec 30 03:12:35 2001 +++ bzip2-1.0.2.new/bzmore.1 Thu Jan 1 01:00:00 1970 @@ -1,152 +0,0 @@ -.\"Shamelessly copied from zmore.1 by Philippe Troin -.\"for Debian GNU/Linux -.TH BZMORE 1 -.SH NAME -bzmore, bzless \- file perusal filter for crt viewing of bzip2 compressed text -.SH SYNOPSIS -.B bzmore -[ name ... ] -.br -.B bzless -[ name ... ] -.SH NOTE -In the following description, -.I bzless -and -.I less -can be used interchangeably with -.I bzmore -and -.I more. -.SH DESCRIPTION -.I Bzmore -is a filter which allows examination of compressed or plain text files -one screenful at a time on a soft-copy terminal. -.I bzmore -works on files compressed with -.I bzip2 -and also on uncompressed files. -If a file does not exist, -.I bzmore -looks for a file of the same name with the addition of a .bz2 suffix. -.PP -.I Bzmore -normally pauses after each screenful, printing --More-- -at the bottom of the screen. -If the user then types a carriage return, one more line is displayed. -If the user hits a space, -another screenful is displayed. Other possibilities are enumerated later. -.PP -.I Bzmore -looks in the file -.I /etc/termcap -to determine terminal characteristics, -and to determine the default window size. -On a terminal capable of displaying 24 lines, -the default window size is 22 lines. -Other sequences which may be typed when -.I bzmore -pauses, and their effects, are as follows (\fIi\fP is an optional integer -argument, defaulting to 1) : -.PP -.IP \fIi\|\fP -display -.I i -more lines, (or another screenful if no argument is given) -.PP -.IP ^D -display 11 more lines (a ``scroll''). -If -.I i -is given, then the scroll size is set to \fIi\|\fP. -.PP -.IP d -same as ^D (control-D) -.PP -.IP \fIi\|\fPz -same as typing a space except that \fIi\|\fP, if present, becomes the new -window size. Note that the window size reverts back to the default at the -end of the current file. -.PP -.IP \fIi\|\fPs -skip \fIi\|\fP lines and print a screenful of lines -.PP -.IP \fIi\|\fPf -skip \fIi\fP screenfuls and print a screenful of lines -.PP -.IP "q or Q" -quit reading the current file; go on to the next (if any) -.PP -.IP "e or q" -When the prompt --More--(Next file: -.IR file ) -is printed, this command causes bzmore to exit. -.PP -.IP s -When the prompt --More--(Next file: -.IR file ) -is printed, this command causes bzmore to skip the next file and continue. -.PP -.IP = -Display the current line number. -.PP -.IP \fIi\|\fP/expr -search for the \fIi\|\fP-th occurrence of the regular expression \fIexpr.\fP -If the pattern is not found, -.I bzmore -goes on to the next file (if any). -Otherwise, a screenful is displayed, starting two lines before the place -where the expression was found. -The user's erase and kill characters may be used to edit the regular -expression. -Erasing back past the first column cancels the search command. -.PP -.IP \fIi\|\fPn -search for the \fIi\|\fP-th occurrence of the last regular expression entered. -.PP -.IP !command -invoke a shell with \fIcommand\|\fP. -The character `!' in "command" are replaced with the -previous shell command. The sequence "\\!" is replaced by "!". -.PP -.IP ":q or :Q" -quit reading the current file; go on to the next (if any) -(same as q or Q). -.PP -.IP . -(dot) repeat the previous command. -.PP -The commands take effect immediately, i.e., it is not necessary to -type a carriage return. -Up to the time when the command character itself is given, -the user may hit the line kill character to cancel the numerical -argument being formed. -In addition, the user may hit the erase character to redisplay the ---More-- message. -.PP -At any time when output is being sent to the terminal, the user can -hit the quit key (normally control\-\\). -.I Bzmore -will stop sending output, and will display the usual --More-- -prompt. -The user may then enter one of the above commands in the normal manner. -Unfortunately, some output is lost when this is done, due to the -fact that any characters waiting in the terminal's output queue -are flushed when the quit signal occurs. -.PP -The terminal is set to -.I noecho -mode by this program so that the output can be continuous. -What you type will thus not show on your terminal, except for the / and ! -commands. -.PP -If the standard output is not a teletype, then -.I bzmore -acts just like -.I bzcat, -except that a header is printed before each file. -.SH FILES -.DT -/etc/termcap Terminal data base -.SH "SEE ALSO" -more(1), less(1), bzip2(1), bzdiff(1), bzgrep(1) diff -Nru bzip2-1.0.2/config.h.in bzip2-1.0.2.new/config.h.in --- bzip2-1.0.2/config.h.in Thu Jan 1 01:00:00 1970 +++ bzip2-1.0.2.new/config.h.in Fri Feb 1 04:19:11 2002 @@ -0,0 +1,17 @@ +/* config.h.in. Generated automatically from configure.in by autoheader. */ + +/* Name of package */ +#undef PACKAGE + +/* Version number of package */ +#undef VERSION + +/* Number of bits in a file offset, on hosts where this is settable. */ +#undef _FILE_OFFSET_BITS + +/* Define to make fseeko etc. visible, on some hosts. */ +#undef _LARGEFILE_SOURCE + +/* Define for large files, on AIX-style hosts. */ +#undef _LARGE_FILES + diff -Nru bzip2-1.0.2/configure.in bzip2-1.0.2.new/configure.in --- bzip2-1.0.2/configure.in Thu Jan 1 01:00:00 1970 +++ bzip2-1.0.2.new/configure.in Fri Feb 1 04:19:11 2002 @@ -0,0 +1,10 @@ +AC_INIT(bzip2.c) +AM_INIT_AUTOMAKE(bzip2,1.0.1) +AM_CONFIG_HEADER(config.h) +AC_PROG_CC +AM_PROG_LIBTOOL +AC_PROG_LN_S +AC_SYS_LARGEFILE +AC_OUTPUT(Makefile + doc/Makefile + doc/pl/Makefile) diff -Nru bzip2-1.0.2/crctable.c bzip2-1.0.2.new/crctable.c --- bzip2-1.0.2/crctable.c Sun Dec 30 03:19:28 2001 +++ bzip2-1.0.2.new/crctable.c Fri Feb 1 04:19:11 2002 @@ -58,6 +58,10 @@ For more information on these sources, see the manual. --*/ +#ifdef HAVE_CONFIG_H +#include +#endif + #include "bzlib_private.h" diff -Nru bzip2-1.0.2/decompress.c bzip2-1.0.2.new/decompress.c --- bzip2-1.0.2/decompress.c Sun Dec 30 21:45:53 2001 +++ bzip2-1.0.2.new/decompress.c Fri Feb 1 04:19:11 2002 @@ -58,6 +58,10 @@ For more information on these sources, see the manual. --*/ +#ifdef HAVE_CONFIG_H +#include +#endif + #include "bzlib_private.h" diff -Nru bzip2-1.0.2/dlltest.c bzip2-1.0.2.new/dlltest.c --- bzip2-1.0.2/dlltest.c Sun Dec 30 20:44:07 2001 +++ bzip2-1.0.2.new/dlltest.c Fri Feb 1 04:19:11 2002 @@ -8,6 +8,10 @@ usage: minibz2 [-d] [-{1,2,..9}] [[srcfilename] destfilename] */ +#ifdef HAVE_CONFIG_H +#include +#endif + #define BZ_IMPORT #include #include diff -Nru bzip2-1.0.2/doc/Makefile.am bzip2-1.0.2.new/doc/Makefile.am --- bzip2-1.0.2/doc/Makefile.am Thu Jan 1 01:00:00 1970 +++ bzip2-1.0.2.new/doc/Makefile.am Fri Feb 1 04:27:21 2002 @@ -0,0 +1,6 @@ + +SUBDIRS = pl + +man_MANS = bunzip2.1 bzcat.1 bzdiff.1 bzgrep.1 bzip2.1 \ + bzip2recover.1 bzmore.1 +#info_TEXINFOS = bzip2.texi diff -Nru bzip2-1.0.2/doc/bunzip2.1 bzip2-1.0.2.new/doc/bunzip2.1 --- bzip2-1.0.2/doc/bunzip2.1 Thu Jan 1 01:00:00 1970 +++ bzip2-1.0.2.new/doc/bunzip2.1 Fri Feb 1 04:19:11 2002 @@ -0,0 +1 @@ +.so bzip2.1 \ No newline at end of file diff -Nru bzip2-1.0.2/doc/bzcat.1 bzip2-1.0.2.new/doc/bzcat.1 --- bzip2-1.0.2/doc/bzcat.1 Thu Jan 1 01:00:00 1970 +++ bzip2-1.0.2.new/doc/bzcat.1 Fri Feb 1 04:19:11 2002 @@ -0,0 +1 @@ +.so bzip2.1 \ No newline at end of file diff -Nru bzip2-1.0.2/doc/bzdiff.1 bzip2-1.0.2.new/doc/bzdiff.1 --- bzip2-1.0.2/doc/bzdiff.1 Thu Jan 1 01:00:00 1970 +++ bzip2-1.0.2.new/doc/bzdiff.1 Sun Dec 30 03:12:35 2001 @@ -0,0 +1,47 @@ +\"Shamelessly copied from zmore.1 by Philippe Troin +\"for Debian GNU/Linux +.TH BZDIFF 1 +.SH NAME +bzcmp, bzdiff \- compare bzip2 compressed files +.SH SYNOPSIS +.B bzcmp +[ cmp_options ] file1 +[ file2 ] +.br +.B bzdiff +[ diff_options ] file1 +[ file2 ] +.SH DESCRIPTION +.I Bzcmp +and +.I bzdiff +are used to invoke the +.I cmp +or the +.I diff +program on bzip2 compressed files. All options specified are passed +directly to +.I cmp +or +.IR diff "." +If only 1 file is specified, then the files compared are +.I file1 +and an uncompressed +.IR file1 ".bz2." +If two files are specified, then they are uncompressed if necessary and fed to +.I cmp +or +.IR diff "." +The exit status from +.I cmp +or +.I diff +is preserved. +.SH "SEE ALSO" +cmp(1), diff(1), bzmore(1), bzless(1), bzgrep(1), bzip2(1) +.SH BUGS +Messages from the +.I cmp +or +.I diff +programs refer to temporary filenames instead of those specified. diff -Nru bzip2-1.0.2/doc/bzgrep.1 bzip2-1.0.2.new/doc/bzgrep.1 --- bzip2-1.0.2/doc/bzgrep.1 Thu Jan 1 01:00:00 1970 +++ bzip2-1.0.2.new/doc/bzgrep.1 Sun Dec 30 03:12:35 2001 @@ -0,0 +1,56 @@ +\"Shamelessly copied from zmore.1 by Philippe Troin +\"for Debian GNU/Linux +.TH BZGREP 1 +.SH NAME +bzgrep, bzfgrep, bzegrep \- search possibly bzip2 compressed files for a regular expression +.SH SYNOPSIS +.B bzgrep +[ grep_options ] +.BI [\ -e\ ] " pattern" +.IR filename ".\|.\|." +.br +.B bzegrep +[ egrep_options ] +.BI [\ -e\ ] " pattern" +.IR filename ".\|.\|." +.br +.B bzfgrep +[ fgrep_options ] +.BI [\ -e\ ] " pattern" +.IR filename ".\|.\|." +.SH DESCRIPTION +.IR Bzgrep +is used to invoke the +.I grep +on bzip2-compressed files. All options specified are passed directly to +.I grep. +If no file is specified, then the standard input is decompressed +if necessary and fed to grep. +Otherwise the given files are uncompressed if necessary and fed to +.I grep. +.PP +If +.I bzgrep +is invoked as +.I bzegrep +or +.I bzfgrep +then +.I egrep +or +.I fgrep +is used instead of +.I grep. +If the GREP environment variable is set, +.I bzgrep +uses it as the +.I grep +program to be invoked. For example: + + for sh: GREP=fgrep bzgrep string files + for csh: (setenv GREP fgrep; bzgrep string files) +.SH AUTHOR +Charles Levert (charles@comm.polymtl.ca). Adapted to bzip2 by Philippe +Troin for Debian GNU/Linux. +.SH "SEE ALSO" +grep(1), egrep(1), fgrep(1), bzdiff(1), bzmore(1), bzless(1), bzip2(1) diff -Nru bzip2-1.0.2/doc/bzip2.1 bzip2-1.0.2.new/doc/bzip2.1 --- bzip2-1.0.2/doc/bzip2.1 Thu Jan 1 01:00:00 1970 +++ bzip2-1.0.2.new/doc/bzip2.1 Thu Jan 3 00:14:36 2002 @@ -0,0 +1,453 @@ +.PU +.TH bzip2 1 +.SH NAME +bzip2, bunzip2 \- a block-sorting file compressor, v1.0.2 +.br +bzcat \- decompresses files to stdout +.br +bzip2recover \- recovers data from damaged bzip2 files + +.SH SYNOPSIS +.ll +8 +.B bzip2 +.RB [ " \-cdfkqstvzVL123456789 " ] +[ +.I "filenames \&..." +] +.ll -8 +.br +.B bunzip2 +.RB [ " \-fkvsVL " ] +[ +.I "filenames \&..." +] +.br +.B bzcat +.RB [ " \-s " ] +[ +.I "filenames \&..." +] +.br +.B bzip2recover +.I "filename" + +.SH DESCRIPTION +.I bzip2 +compresses files using the Burrows-Wheeler block sorting +text compression algorithm, and Huffman coding. Compression is +generally considerably better than that achieved by more conventional +LZ77/LZ78-based compressors, and approaches the performance of the PPM +family of statistical compressors. + +The command-line options are deliberately very similar to +those of +.I GNU gzip, +but they are not identical. + +.I bzip2 +expects a list of file names to accompany the +command-line flags. Each file is replaced by a compressed version of +itself, with the name "original_name.bz2". +Each compressed file +has the same modification date, permissions, and, when possible, +ownership as the corresponding original, so that these properties can +be correctly restored at decompression time. File name handling is +naive in the sense that there is no mechanism for preserving original +file names, permissions, ownerships or dates in filesystems which lack +these concepts, or have serious file name length restrictions, such as +MS-DOS. + +.I bzip2 +and +.I bunzip2 +will by default not overwrite existing +files. If you want this to happen, specify the \-f flag. + +If no file names are specified, +.I bzip2 +compresses from standard +input to standard output. In this case, +.I bzip2 +will decline to +write compressed output to a terminal, as this would be entirely +incomprehensible and therefore pointless. + +.I bunzip2 +(or +.I bzip2 \-d) +decompresses all +specified files. Files which were not created by +.I bzip2 +will be detected and ignored, and a warning issued. +.I bzip2 +attempts to guess the filename for the decompressed file +from that of the compressed file as follows: + + filename.bz2 becomes filename + filename.bz becomes filename + filename.tbz2 becomes filename.tar + filename.tbz becomes filename.tar + anyothername becomes anyothername.out + +If the file does not end in one of the recognised endings, +.I .bz2, +.I .bz, +.I .tbz2 +or +.I .tbz, +.I bzip2 +complains that it cannot +guess the name of the original file, and uses the original name +with +.I .out +appended. + +As with compression, supplying no +filenames causes decompression from +standard input to standard output. + +.I bunzip2 +will correctly decompress a file which is the +concatenation of two or more compressed files. The result is the +concatenation of the corresponding uncompressed files. Integrity +testing (\-t) +of concatenated +compressed files is also supported. + +You can also compress or decompress files to the standard output by +giving the \-c flag. Multiple files may be compressed and +decompressed like this. The resulting outputs are fed sequentially to +stdout. Compression of multiple files +in this manner generates a stream +containing multiple compressed file representations. Such a stream +can be decompressed correctly only by +.I bzip2 +version 0.9.0 or +later. Earlier versions of +.I bzip2 +will stop after decompressing +the first file in the stream. + +.I bzcat +(or +.I bzip2 -dc) +decompresses all specified files to +the standard output. + +.I bzip2 +will read arguments from the environment variables +.I BZIP2 +and +.I BZIP, +in that order, and will process them +before any arguments read from the command line. This gives a +convenient way to supply default arguments. + +Compression is always performed, even if the compressed +file is slightly +larger than the original. Files of less than about one hundred bytes +tend to get larger, since the compression mechanism has a constant +overhead in the region of 50 bytes. Random data (including the output +of most file compressors) is coded at about 8.05 bits per byte, giving +an expansion of around 0.5%. + +As a self-check for your protection, +.I +bzip2 +uses 32-bit CRCs to +make sure that the decompressed version of a file is identical to the +original. This guards against corruption of the compressed data, and +against undetected bugs in +.I bzip2 +(hopefully very unlikely). The +chances of data corruption going undetected is microscopic, about one +chance in four billion for each file processed. Be aware, though, that +the check occurs upon decompression, so it can only tell you that +something is wrong. It can't help you +recover the original uncompressed +data. You can use +.I bzip2recover +to try to recover data from +damaged files. + +Return values: 0 for a normal exit, 1 for environmental problems (file +not found, invalid flags, I/O errors, &c), 2 to indicate a corrupt +compressed file, 3 for an internal consistency error (eg, bug) which +caused +.I bzip2 +to panic. + +.SH OPTIONS +.TP +.B \-c --stdout +Compress or decompress to standard output. +.TP +.B \-d --decompress +Force decompression. +.I bzip2, +.I bunzip2 +and +.I bzcat +are +really the same program, and the decision about what actions to take is +done on the basis of which name is used. This flag overrides that +mechanism, and forces +.I bzip2 +to decompress. +.TP +.B \-z --compress +The complement to \-d: forces compression, regardless of the +invocation name. +.TP +.B \-t --test +Check integrity of the specified file(s), but don't decompress them. +This really performs a trial decompression and throws away the result. +.TP +.B \-f --force +Force overwrite of output files. Normally, +.I bzip2 +will not overwrite +existing output files. Also forces +.I bzip2 +to break hard links +to files, which it otherwise wouldn't do. + +bzip2 normally declines to decompress files which don't have the +correct magic header bytes. If forced (-f), however, it will pass +such files through unmodified. This is how GNU gzip behaves. +.TP +.B \-k --keep +Keep (don't delete) input files during compression +or decompression. +.TP +.B \-s --small +Reduce memory usage, for compression, decompression and testing. Files +are decompressed and tested using a modified algorithm which only +requires 2.5 bytes per block byte. This means any file can be +decompressed in 2300k of memory, albeit at about half the normal speed. + +During compression, \-s selects a block size of 200k, which limits +memory use to around the same figure, at the expense of your compression +ratio. In short, if your machine is low on memory (8 megabytes or +less), use \-s for everything. See MEMORY MANAGEMENT below. +.TP +.B \-q --quiet +Suppress non-essential warning messages. Messages pertaining to +I/O errors and other critical events will not be suppressed. +.TP +.B \-v --verbose +Verbose mode -- show the compression ratio for each file processed. +Further \-v's increase the verbosity level, spewing out lots of +information which is primarily of interest for diagnostic purposes. +.TP +.B \-L --license -V --version +Display the software version, license terms and conditions. +.TP +.B \-1 (or \-\-fast) to \-9 (or \-\-best) +Set the block size to 100 k, 200 k .. 900 k when compressing. Has no +effect when decompressing. See MEMORY MANAGEMENT below. +The \-\-fast and \-\-best aliases are primarily for GNU gzip +compatibility. In particular, \-\-fast doesn't make things +significantly faster. +And \-\-best merely selects the default behaviour. +.TP +.B \-- +Treats all subsequent arguments as file names, even if they start +with a dash. This is so you can handle files with names beginning +with a dash, for example: bzip2 \-- \-myfilename. +.TP +.B \--repetitive-fast --repetitive-best +These flags are redundant in versions 0.9.5 and above. They provided +some coarse control over the behaviour of the sorting algorithm in +earlier versions, which was sometimes useful. 0.9.5 and above have an +improved algorithm which renders these flags irrelevant. + +.SH MEMORY MANAGEMENT +.I bzip2 +compresses large files in blocks. The block size affects +both the compression ratio achieved, and the amount of memory needed for +compression and decompression. The flags \-1 through \-9 +specify the block size to be 100,000 bytes through 900,000 bytes (the +default) respectively. At decompression time, the block size used for +compression is read from the header of the compressed file, and +.I bunzip2 +then allocates itself just enough memory to decompress +the file. Since block sizes are stored in compressed files, it follows +that the flags \-1 to \-9 are irrelevant to and so ignored +during decompression. + +Compression and decompression requirements, +in bytes, can be estimated as: + + Compression: 400k + ( 8 x block size ) + + Decompression: 100k + ( 4 x block size ), or + 100k + ( 2.5 x block size ) + +Larger block sizes give rapidly diminishing marginal returns. Most of +the compression comes from the first two or three hundred k of block +size, a fact worth bearing in mind when using +.I bzip2 +on small machines. +It is also important to appreciate that the decompression memory +requirement is set at compression time by the choice of block size. + +For files compressed with the default 900k block size, +.I bunzip2 +will require about 3700 kbytes to decompress. To support decompression +of any file on a 4 megabyte machine, +.I bunzip2 +has an option to +decompress using approximately half this amount of memory, about 2300 +kbytes. Decompression speed is also halved, so you should use this +option only where necessary. The relevant flag is -s. + +In general, try and use the largest block size memory constraints allow, +since that maximises the compression achieved. Compression and +decompression speed are virtually unaffected by block size. + +Another significant point applies to files which fit in a single block +-- that means most files you'd encounter using a large block size. The +amount of real memory touched is proportional to the size of the file, +since the file is smaller than a block. For example, compressing a file +20,000 bytes long with the flag -9 will cause the compressor to +allocate around 7600k of memory, but only touch 400k + 20000 * 8 = 560 +kbytes of it. Similarly, the decompressor will allocate 3700k but only +touch 100k + 20000 * 4 = 180 kbytes. + +Here is a table which summarises the maximum memory usage for different +block sizes. Also recorded is the total compressed size for 14 files of +the Calgary Text Compression Corpus totalling 3,141,622 bytes. This +column gives some feel for how compression varies with block size. +These figures tend to understate the advantage of larger block sizes for +larger files, since the Corpus is dominated by smaller files. + + Compress Decompress Decompress Corpus + Flag usage usage -s usage Size + + -1 1200k 500k 350k 914704 + -2 2000k 900k 600k 877703 + -3 2800k 1300k 850k 860338 + -4 3600k 1700k 1100k 846899 + -5 4400k 2100k 1350k 845160 + -6 5200k 2500k 1600k 838626 + -7 6100k 2900k 1850k 834096 + -8 6800k 3300k 2100k 828642 + -9 7600k 3700k 2350k 828642 + +.SH RECOVERING DATA FROM DAMAGED FILES +.I bzip2 +compresses files in blocks, usually 900kbytes long. Each +block is handled independently. If a media or transmission error causes +a multi-block .bz2 +file to become damaged, it may be possible to +recover data from the undamaged blocks in the file. + +The compressed representation of each block is delimited by a 48-bit +pattern, which makes it possible to find the block boundaries with +reasonable certainty. Each block also carries its own 32-bit CRC, so +damaged blocks can be distinguished from undamaged ones. + +.I bzip2recover +is a simple program whose purpose is to search for +blocks in .bz2 files, and write each block out into its own .bz2 +file. You can then use +.I bzip2 +\-t +to test the +integrity of the resulting files, and decompress those which are +undamaged. + +.I bzip2recover +takes a single argument, the name of the damaged file, +and writes a number of files "rec00001file.bz2", +"rec00002file.bz2", etc, containing the extracted blocks. +The output filenames are designed so that the use of +wildcards in subsequent processing -- for example, +"bzip2 -dc rec*file.bz2 > recovered_data" -- processes the files in +the correct order. + +.I bzip2recover +should be of most use dealing with large .bz2 +files, as these will contain many blocks. It is clearly +futile to use it on damaged single-block files, since a +damaged block cannot be recovered. If you wish to minimise +any potential data loss through media or transmission errors, +you might consider compressing with a smaller +block size. + +.SH PERFORMANCE NOTES +The sorting phase of compression gathers together similar strings in the +file. Because of this, files containing very long runs of repeated +symbols, like "aabaabaabaab ..." (repeated several hundred times) may +compress more slowly than normal. Versions 0.9.5 and above fare much +better than previous versions in this respect. The ratio between +worst-case and average-case compression time is in the region of 10:1. +For previous versions, this figure was more like 100:1. You can use the +\-vvvv option to monitor progress in great detail, if you want. + +Decompression speed is unaffected by these phenomena. + +.I bzip2 +usually allocates several megabytes of memory to operate +in, and then charges all over it in a fairly random fashion. This means +that performance, both for compressing and decompressing, is largely +determined by the speed at which your machine can service cache misses. +Because of this, small changes to the code to reduce the miss rate have +been observed to give disproportionately large performance improvements. +I imagine +.I bzip2 +will perform best on machines with very large caches. + +.SH CAVEATS +I/O error messages are not as helpful as they could be. +.I bzip2 +tries hard to detect I/O errors and exit cleanly, but the details of +what the problem is sometimes seem rather misleading. + +This manual page pertains to version 1.0.2 of +.I bzip2. +Compressed data created by this version is entirely forwards and +backwards compatible with the previous public releases, versions +0.1pl2, 0.9.0, 0.9.5, 1.0.0 and 1.0.1, but with the following +exception: 0.9.0 and above can correctly decompress multiple +concatenated compressed files. 0.1pl2 cannot do this; it will stop +after decompressing just the first file in the stream. + +.I bzip2recover +versions prior to this one, 1.0.2, used 32-bit integers to represent +bit positions in compressed files, so it could not handle compressed +files more than 512 megabytes long. Version 1.0.2 and above uses +64-bit ints on some platforms which support them (GNU supported +targets, and Windows). To establish whether or not bzip2recover was +built with such a limitation, run it without arguments. In any event +you can build yourself an unlimited version if you can recompile it +with MaybeUInt64 set to be an unsigned 64-bit integer. + + + +.SH AUTHOR +Julian Seward, jseward@acm.org. + +http://sources.redhat.com/bzip2 + +The ideas embodied in +.I bzip2 +are due to (at least) the following +people: Michael Burrows and David Wheeler (for the block sorting +transformation), David Wheeler (again, for the Huffman coder), Peter +Fenwick (for the structured coding model in the original +.I bzip, +and many refinements), and Alistair Moffat, Radford Neal and Ian Witten +(for the arithmetic coder in the original +.I bzip). +I am much +indebted for their help, support and advice. See the manual in the +source distribution for pointers to sources of documentation. Christian +von Roques encouraged me to look for faster sorting algorithms, so as to +speed up compression. Bela Lubkin encouraged me to improve the +worst-case compression performance. +The bz* scripts are derived from those of GNU gzip. +Many people sent patches, helped +with portability problems, lent machines, gave advice and were generally +helpful. diff -Nru bzip2-1.0.2/doc/bzip2.texi bzip2-1.0.2.new/doc/bzip2.texi --- bzip2-1.0.2/doc/bzip2.texi Thu Jan 1 01:00:00 1970 +++ bzip2-1.0.2.new/doc/bzip2.texi Fri Feb 1 04:26:21 2002 @@ -0,0 +1,2234 @@ +\input texinfo @c -*- Texinfo -*- +@setfilename bzip2.info + +@ignore +This file documents bzip2 version 1.0.2, and associated library +libbzip2, written by Julian Seward (jseward@acm.org). + +Copyright (C) 1996-2002 Julian R Seward + +Permission is granted to make and distribute verbatim copies of +this manual provided the copyright notice and this permission notice +are preserved on all copies. + +Permission is granted to copy and distribute translations of this manual +into another language, under the above conditions for verbatim copies. +@end ignore + +@iftex +@c @finalout +@settitle bzip2 and libbzip2 +@titlepage +@title bzip2 and libbzip2 +@subtitle a program and library for data compression +@subtitle copyright (C) 1996-2002 Julian Seward +@subtitle version 1.0.2 of 30 December 2001 +@author Julian Seward + +@end titlepage + +@parindent 0mm +@parskip 2mm + +@end iftex +@node Top,,, (dir) + +The following text is the License for this software. You should +find it identical to that contained in the file LICENSE in the +source distribution. + +------------------ START OF THE LICENSE ------------------ + +This program, @code{bzip2}, +and associated library @code{libbzip2}, are +Copyright (C) 1996-2002 Julian R Seward. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +@itemize @bullet +@item + Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. +@item + The origin of this software must not be misrepresented; you must + not claim that you wrote the original software. If you use this + software in a product, an acknowledgment in the product + documentation would be appreciated but is not required. +@item + Altered source versions must be plainly marked as such, and must + not be misrepresented as being the original software. +@item + The name of the author may not be used to endorse or promote + products derived from this software without specific prior written + permission. +@end itemize +THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS +OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE +GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +Julian Seward, Cambridge, UK. + +@code{jseward@@acm.org} + +@code{bzip2}/@code{libbzip2} version 1.0.2 of 30 December 2001. + +------------------ END OF THE LICENSE ------------------ + +Web sites: + +@code{http://sources.redhat.com/bzip2} + +@code{http://www.cacheprof.org} + +PATENTS: To the best of my knowledge, @code{bzip2} does not use any patented +algorithms. However, I do not have the resources available to carry out +a full patent search. Therefore I cannot give any guarantee of the +above statement. + + + + + + + +@chapter Introduction + +@code{bzip2} compresses files using the Burrows-Wheeler +block-sorting text compression algorithm, and Huffman coding. +Compression is generally considerably better than that +achieved by more conventional LZ77/LZ78-based compressors, +and approaches the performance of the PPM family of statistical compressors. + +@code{bzip2} is built on top of @code{libbzip2}, a flexible library +for handling compressed data in the @code{bzip2} format. This manual +describes both how to use the program and +how to work with the library interface. Most of the +manual is devoted to this library, not the program, +which is good news if your interest is only in the program. + +Chapter 2 describes how to use @code{bzip2}; this is the only part +you need to read if you just want to know how to operate the program. +Chapter 3 describes the programming interfaces in detail, and +Chapter 4 records some miscellaneous notes which I thought +ought to be recorded somewhere. + + +@chapter How to use @code{bzip2} + +This chapter contains a copy of the @code{bzip2} man page, +and nothing else. + +@quotation + +@unnumberedsubsubsec NAME +@itemize +@item @code{bzip2}, @code{bunzip2} +- a block-sorting file compressor, v1.0.2 +@item @code{bzcat} +- decompresses files to stdout +@item @code{bzip2recover} +- recovers data from damaged bzip2 files +@end itemize + +@unnumberedsubsubsec SYNOPSIS +@itemize +@item @code{bzip2} [ -cdfkqstvzVL123456789 ] [ filenames ... ] +@item @code{bunzip2} [ -fkvsVL ] [ filenames ... ] +@item @code{bzcat} [ -s ] [ filenames ... ] +@item @code{bzip2recover} filename +@end itemize + +@unnumberedsubsubsec DESCRIPTION + +@code{bzip2} compresses files using the Burrows-Wheeler block sorting +text compression algorithm, and Huffman coding. Compression is +generally considerably better than that achieved by more conventional +LZ77/LZ78-based compressors, and approaches the performance of the PPM +family of statistical compressors. + +The command-line options are deliberately very similar to those of GNU +@code{gzip}, but they are not identical. + +@code{bzip2} expects a list of file names to accompany the command-line +flags. Each file is replaced by a compressed version of itself, with +the name @code{original_name.bz2}. Each compressed file has the same +modification date, permissions, and, when possible, ownership as the +corresponding original, so that these properties can be correctly +restored at decompression time. File name handling is naive in the +sense that there is no mechanism for preserving original file names, +permissions, ownerships or dates in filesystems which lack these +concepts, or have serious file name length restrictions, such as MS-DOS. + +@code{bzip2} and @code{bunzip2} will by default not overwrite existing +files. If you want this to happen, specify the @code{-f} flag. + +If no file names are specified, @code{bzip2} compresses from standard +input to standard output. In this case, @code{bzip2} will decline to +write compressed output to a terminal, as this would be entirely +incomprehensible and therefore pointless. + +@code{bunzip2} (or @code{bzip2 -d}) decompresses all +specified files. Files which were not created by @code{bzip2} +will be detected and ignored, and a warning issued. +@code{bzip2} attempts to guess the filename for the decompressed file +from that of the compressed file as follows: +@itemize +@item @code{filename.bz2 } becomes @code{filename} +@item @code{filename.bz } becomes @code{filename} +@item @code{filename.tbz2} becomes @code{filename.tar} +@item @code{filename.tbz } becomes @code{filename.tar} +@item @code{anyothername } becomes @code{anyothername.out} +@end itemize +If the file does not end in one of the recognised endings, +@code{.bz2}, @code{.bz}, +@code{.tbz2} or @code{.tbz}, @code{bzip2} complains that it cannot +guess the name of the original file, and uses the original name +with @code{.out} appended. + +As with compression, supplying no +filenames causes decompression from standard input to standard output. + +@code{bunzip2} will correctly decompress a file which is the +concatenation of two or more compressed files. The result is the +concatenation of the corresponding uncompressed files. Integrity +testing (@code{-t}) of concatenated compressed files is also supported. + +You can also compress or decompress files to the standard output by +giving the @code{-c} flag. Multiple files may be compressed and +decompressed like this. The resulting outputs are fed sequentially to +stdout. Compression of multiple files in this manner generates a stream +containing multiple compressed file representations. Such a stream +can be decompressed correctly only by @code{bzip2} version 0.9.0 or +later. Earlier versions of @code{bzip2} will stop after decompressing +the first file in the stream. + +@code{bzcat} (or @code{bzip2 -dc}) decompresses all specified files to +the standard output. + +@code{bzip2} will read arguments from the environment variables +@code{BZIP2} and @code{BZIP}, in that order, and will process them +before any arguments read from the command line. This gives a +convenient way to supply default arguments. + +Compression is always performed, even if the compressed file is slightly +larger than the original. Files of less than about one hundred bytes +tend to get larger, since the compression mechanism has a constant +overhead in the region of 50 bytes. Random data (including the output +of most file compressors) is coded at about 8.05 bits per byte, giving +an expansion of around 0.5%. + +As a self-check for your protection, @code{bzip2} uses 32-bit CRCs to +make sure that the decompressed version of a file is identical to the +original. This guards against corruption of the compressed data, and +against undetected bugs in @code{bzip2} (hopefully very unlikely). The +chances of data corruption going undetected is microscopic, about one +chance in four billion for each file processed. Be aware, though, that +the check occurs upon decompression, so it can only tell you that +something is wrong. It can't help you recover the original uncompressed +data. You can use @code{bzip2recover} to try to recover data from +damaged files. + +Return values: 0 for a normal exit, 1 for environmental problems (file +not found, invalid flags, I/O errors, &c), 2 to indicate a corrupt +compressed file, 3 for an internal consistency error (eg, bug) which +caused @code{bzip2} to panic. + + +@unnumberedsubsubsec OPTIONS +@table @code +@item -c --stdout +Compress or decompress to standard output. +@item -d --decompress +Force decompression. @code{bzip2}, @code{bunzip2} and @code{bzcat} are +really the same program, and the decision about what actions to take is +done on the basis of which name is used. This flag overrides that +mechanism, and forces bzip2 to decompress. +@item -z --compress +The complement to @code{-d}: forces compression, regardless of the +invokation name. +@item -t --test +Check integrity of the specified file(s), but don't decompress them. +This really performs a trial decompression and throws away the result. +@item -f --force +Force overwrite of output files. Normally, @code{bzip2} will not overwrite +existing output files. Also forces @code{bzip2} to break hard links +to files, which it otherwise wouldn't do. + +@code{bzip2} normally declines to decompress files which don't have the +correct magic header bytes. If forced (@code{-f}), however, it will +pass such files through unmodified. This is how GNU @code{gzip} +behaves. +@item -k --keep +Keep (don't delete) input files during compression +or decompression. +@item -s --small +Reduce memory usage, for compression, decompression and testing. Files +are decompressed and tested using a modified algorithm which only +requires 2.5 bytes per block byte. This means any file can be +decompressed in 2300k of memory, albeit at about half the normal speed. + +During compression, @code{-s} selects a block size of 200k, which limits +memory use to around the same figure, at the expense of your compression +ratio. In short, if your machine is low on memory (8 megabytes or +less), use -s for everything. See MEMORY MANAGEMENT below. +@item -q --quiet +Suppress non-essential warning messages. Messages pertaining to +I/O errors and other critical events will not be suppressed. +@item -v --verbose +Verbose mode -- show the compression ratio for each file processed. +Further @code{-v}'s increase the verbosity level, spewing out lots of +information which is primarily of interest for diagnostic purposes. +@item -L --license -V --version +Display the software version, license terms and conditions. +@item -1 (or --fast) to -9 (or --best) +Set the block size to 100 k, 200 k .. 900 k when compressing. Has no +effect when decompressing. See MEMORY MANAGEMENT below. +The @code{--fast} and @code{--best} aliases are primarily for GNU +@code{gzip} compatibility. In particular, @code{--fast} doesn't make +things significantly faster. And @code{--best} merely selects the +default behaviour. +@item -- +Treats all subsequent arguments as file names, even if they start +with a dash. This is so you can handle files with names beginning +with a dash, for example: @code{bzip2 -- -myfilename}. +@item --repetitive-fast +@item --repetitive-best +These flags are redundant in versions 0.9.5 and above. They provided +some coarse control over the behaviour of the sorting algorithm in +earlier versions, which was sometimes useful. 0.9.5 and above have an +improved algorithm which renders these flags irrelevant. +@end table + + +@unnumberedsubsubsec MEMORY MANAGEMENT + +@code{bzip2} compresses large files in blocks. The block size affects +both the compression ratio achieved, and the amount of memory needed for +compression and decompression. The flags @code{-1} through @code{-9} +specify the block size to be 100,000 bytes through 900,000 bytes (the +default) respectively. At decompression time, the block size used for +compression is read from the header of the compressed file, and +@code{bunzip2} then allocates itself just enough memory to decompress +the file. Since block sizes are stored in compressed files, it follows +that the flags @code{-1} to @code{-9} are irrelevant to and so ignored +during decompression. + +Compression and decompression requirements, in bytes, can be estimated +as: +@example + Compression: 400k + ( 8 x block size ) + + Decompression: 100k + ( 4 x block size ), or + 100k + ( 2.5 x block size ) +@end example +Larger block sizes give rapidly diminishing marginal returns. Most of +the compression comes from the first two or three hundred k of block +size, a fact worth bearing in mind when using @code{bzip2} on small machines. +It is also important to appreciate that the decompression memory +requirement is set at compression time by the choice of block size. + +For files compressed with the default 900k block size, @code{bunzip2} +will require about 3700 kbytes to decompress. To support decompression +of any file on a 4 megabyte machine, @code{bunzip2} has an option to +decompress using approximately half this amount of memory, about 2300 +kbytes. Decompression speed is also halved, so you should use this +option only where necessary. The relevant flag is @code{-s}. + +In general, try and use the largest block size memory constraints allow, +since that maximises the compression achieved. Compression and +decompression speed are virtually unaffected by block size. + +Another significant point applies to files which fit in a single block +-- that means most files you'd encounter using a large block size. The +amount of real memory touched is proportional to the size of the file, +since the file is smaller than a block. For example, compressing a file +20,000 bytes long with the flag @code{-9} will cause the compressor to +allocate around 7600k of memory, but only touch 400k + 20000 * 8 = 560 +kbytes of it. Similarly, the decompressor will allocate 3700k but only +touch 100k + 20000 * 4 = 180 kbytes. + +Here is a table which summarises the maximum memory usage for different +block sizes. Also recorded is the total compressed size for 14 files of +the Calgary Text Compression Corpus totalling 3,141,622 bytes. This +column gives some feel for how compression varies with block size. +These figures tend to understate the advantage of larger block sizes for +larger files, since the Corpus is dominated by smaller files. +@example + Compress Decompress Decompress Corpus + Flag usage usage -s usage Size + + -1 1200k 500k 350k 914704 + -2 2000k 900k 600k 877703 + -3 2800k 1300k 850k 860338 + -4 3600k 1700k 1100k 846899 + -5 4400k 2100k 1350k 845160 + -6 5200k 2500k 1600k 838626 + -7 6100k 2900k 1850k 834096 + -8 6800k 3300k 2100k 828642 + -9 7600k 3700k 2350k 828642 +@end example + +@unnumberedsubsubsec RECOVERING DATA FROM DAMAGED FILES + +@code{bzip2} compresses files in blocks, usually 900kbytes long. Each +block is handled independently. If a media or transmission error causes +a multi-block @code{.bz2} file to become damaged, it may be possible to +recover data from the undamaged blocks in the file. + +The compressed representation of each block is delimited by a 48-bit +pattern, which makes it possible to find the block boundaries with +reasonable certainty. Each block also carries its own 32-bit CRC, so +damaged blocks can be distinguished from undamaged ones. + +@code{bzip2recover} is a simple program whose purpose is to search for +blocks in @code{.bz2} files, and write each block out into its own +@code{.bz2} file. You can then use @code{bzip2 -t} to test the +integrity of the resulting files, and decompress those which are +undamaged. + +@code{bzip2recover} +takes a single argument, the name of the damaged file, and writes a +number of files @code{rec00001file.bz2}, @code{rec00002file.bz2}, etc, +containing the extracted blocks. The output filenames are designed so +that the use of wildcards in subsequent processing -- for example, +@code{bzip2 -dc rec*file.bz2 > recovered_data} -- processes the files in +the correct order. + +@code{bzip2recover} should be of most use dealing with large @code{.bz2} +files, as these will contain many blocks. It is clearly futile to use +it on damaged single-block files, since a damaged block cannot be +recovered. If you wish to minimise any potential data loss through +media or transmission errors, you might consider compressing with a +smaller block size. + + +@unnumberedsubsubsec PERFORMANCE NOTES + +The sorting phase of compression gathers together similar strings in the +file. Because of this, files containing very long runs of repeated +symbols, like "aabaabaabaab ..." (repeated several hundred times) may +compress more slowly than normal. Versions 0.9.5 and above fare much +better than previous versions in this respect. The ratio between +worst-case and average-case compression time is in the region of 10:1. +For previous versions, this figure was more like 100:1. You can use the +@code{-vvvv} option to monitor progress in great detail, if you want. + +Decompression speed is unaffected by these phenomena. + +@code{bzip2} usually allocates several megabytes of memory to operate +in, and then charges all over it in a fairly random fashion. This means +that performance, both for compressing and decompressing, is largely +determined by the speed at which your machine can service cache misses. +Because of this, small changes to the code to reduce the miss rate have +been observed to give disproportionately large performance improvements. +I imagine @code{bzip2} will perform best on machines with very large +caches. + + +@unnumberedsubsubsec CAVEATS + +I/O error messages are not as helpful as they could be. @code{bzip2} +tries hard to detect I/O errors and exit cleanly, but the details of +what the problem is sometimes seem rather misleading. + +This manual page pertains to version 1.0.2 of @code{bzip2}. Compressed +data created by this version is entirely forwards and backwards +compatible with the previous public releases, versions 0.1pl2, 0.9.0, +0.9.5, 1.0.0 and 1.0.1, but with the following exception: 0.9.0 and +above can correctly decompress multiple concatenated compressed files. +0.1pl2 cannot do this; it will stop after decompressing just the first +file in the stream. + +@code{bzip2recover} versions prior to this one, 1.0.2, used 32-bit +integers to represent bit positions in compressed files, so it could not +handle compressed files more than 512 megabytes long. Version 1.0.2 and +above uses 64-bit ints on some platforms which support them (GNU +supported targets, and Windows). To establish whether or not +@code{bzip2recover} was built with such a limitation, run it without +arguments. In any event you can build yourself an unlimited version if +you can recompile it with @code{MaybeUInt64} set to be an unsigned +64-bit integer. + + + +@unnumberedsubsubsec AUTHOR +Julian Seward, @code{jseward@@acm.org}. + +@code{http://sources.redhat.com/bzip2} + +The ideas embodied in @code{bzip2} are due to (at least) the following +people: Michael Burrows and David Wheeler (for the block sorting +transformation), David Wheeler (again, for the Huffman coder), Peter +Fenwick (for the structured coding model in the original @code{bzip}, +and many refinements), and Alistair Moffat, Radford Neal and Ian Witten +(for the arithmetic coder in the original @code{bzip}). I am much +indebted for their help, support and advice. See the manual in the +source distribution for pointers to sources of documentation. Christian +von Roques encouraged me to look for faster sorting algorithms, so as to +speed up compression. Bela Lubkin encouraged me to improve the +worst-case compression performance. The @code{bz*} scripts are derived +from those of GNU @code{gzip}. Many people sent patches, helped with +portability problems, lent machines, gave advice and were generally +helpful. + +@end quotation + + + + +@chapter Programming with @code{libbzip2} + +This chapter describes the programming interface to @code{libbzip2}. + +For general background information, particularly about memory +use and performance aspects, you'd be well advised to read Chapter 2 +as well. + +@section Top-level structure + +@code{libbzip2} is a flexible library for compressing and decompressing +data in the @code{bzip2} data format. Although packaged as a single +entity, it helps to regard the library as three separate parts: the low +level interface, and the high level interface, and some utility +functions. + +The structure of @code{libbzip2}'s interfaces is similar to +that of Jean-loup Gailly's and Mark Adler's excellent @code{zlib} +library. + +All externally visible symbols have names beginning @code{BZ2_}. +This is new in version 1.0. The intention is to minimise pollution +of the namespaces of library clients. + +@subsection Low-level summary + +This interface provides services for compressing and decompressing +data in memory. There's no provision for dealing with files, streams +or any other I/O mechanisms, just straight memory-to-memory work. +In fact, this part of the library can be compiled without inclusion +of @code{stdio.h}, which may be helpful for embedded applications. + +The low-level part of the library has no global variables and +is therefore thread-safe. + +Six routines make up the low level interface: +@code{BZ2_bzCompressInit}, @code{BZ2_bzCompress}, and @* @code{BZ2_bzCompressEnd} +for compression, +and a corresponding trio @code{BZ2_bzDecompressInit}, @* @code{BZ2_bzDecompress} +and @code{BZ2_bzDecompressEnd} for decompression. +The @code{*Init} functions allocate +memory for compression/decompression and do other +initialisations, whilst the @code{*End} functions close down operations +and release memory. + +The real work is done by @code{BZ2_bzCompress} and @code{BZ2_bzDecompress}. +These compress and decompress data from a user-supplied input buffer +to a user-supplied output buffer. These buffers can be any size; +arbitrary quantities of data are handled by making repeated calls +to these functions. This is a flexible mechanism allowing a +consumer-pull style of activity, or producer-push, or a mixture of +both. + + + +@subsection High-level summary + +This interface provides some handy wrappers around the low-level +interface to facilitate reading and writing @code{bzip2} format +files (@code{.bz2} files). The routines provide hooks to facilitate +reading files in which the @code{bzip2} data stream is embedded +within some larger-scale file structure, or where there are +multiple @code{bzip2} data streams concatenated end-to-end. + +For reading files, @code{BZ2_bzReadOpen}, @code{BZ2_bzRead}, +@code{BZ2_bzReadClose} and @* @code{BZ2_bzReadGetUnused} are supplied. For +writing files, @code{BZ2_bzWriteOpen}, @code{BZ2_bzWrite} and +@code{BZ2_bzWriteFinish} are available. + +As with the low-level library, no global variables are used +so the library is per se thread-safe. However, if I/O errors +occur whilst reading or writing the underlying compressed files, +you may have to consult @code{errno} to determine the cause of +the error. In that case, you'd need a C library which correctly +supports @code{errno} in a multithreaded environment. + +To make the library a little simpler and more portable, +@code{BZ2_bzReadOpen} and @code{BZ2_bzWriteOpen} require you to pass them file +handles (@code{FILE*}s) which have previously been opened for reading or +writing respectively. That avoids portability problems associated with +file operations and file attributes, whilst not being much of an +imposition on the programmer. + + + +@subsection Utility functions summary +For very simple needs, @code{BZ2_bzBuffToBuffCompress} and +@code{BZ2_bzBuffToBuffDecompress} are provided. These compress +data in memory from one buffer to another buffer in a single +function call. You should assess whether these functions +fulfill your memory-to-memory compression/decompression +requirements before investing effort in understanding the more +general but more complex low-level interface. + +Yoshioka Tsuneo (@code{QWF00133@@niftyserve.or.jp} / +@code{tsuneo-y@@is.aist-nara.ac.jp}) has contributed some functions to +give better @code{zlib} compatibility. These functions are +@code{BZ2_bzopen}, @code{BZ2_bzread}, @code{BZ2_bzwrite}, @code{BZ2_bzflush}, +@code{BZ2_bzclose}, +@code{BZ2_bzerror} and @code{BZ2_bzlibVersion}. You may find these functions +more convenient for simple file reading and writing, than those in the +high-level interface. These functions are not (yet) officially part of +the library, and are minimally documented here. If they break, you +get to keep all the pieces. I hope to document them properly when time +permits. + +Yoshioka also contributed modifications to allow the library to be +built as a Windows DLL. + + +@section Error handling + +The library is designed to recover cleanly in all situations, including +the worst-case situation of decompressing random data. I'm not +100% sure that it can always do this, so you might want to add +a signal handler to catch segmentation violations during decompression +if you are feeling especially paranoid. I would be interested in +hearing more about the robustness of the library to corrupted +compressed data. + +Version 1.0 is much more robust in this respect than +0.9.0 or 0.9.5. Investigations with Checker (a tool for +detecting problems with memory management, similar to Purify) +indicate that, at least for the few files I tested, all single-bit +errors in the decompressed data are caught properly, with no +segmentation faults, no reads of uninitialised data and no +out of range reads or writes. So it's certainly much improved, +although I wouldn't claim it to be totally bombproof. + +The file @code{bzlib.h} contains all definitions needed to use +the library. In particular, you should definitely not include +@code{bzlib_private.h}. + +In @code{bzlib.h}, the various return values are defined. The following +list is not intended as an exhaustive description of the circumstances +in which a given value may be returned -- those descriptions are given +later. Rather, it is intended to convey the rough meaning of each +return value. The first five actions are normal and not intended to +denote an error situation. +@table @code +@item BZ_OK +The requested action was completed successfully. +@item BZ_RUN_OK +@itemx BZ_FLUSH_OK +@itemx BZ_FINISH_OK +In @code{BZ2_bzCompress}, the requested flush/finish/nothing-special action +was completed successfully. +@item BZ_STREAM_END +Compression of data was completed, or the logical stream end was +detected during decompression. +@end table + +The following return values indicate an error of some kind. +@table @code +@item BZ_CONFIG_ERROR +Indicates that the library has been improperly compiled on your +platform -- a major configuration error. Specifically, it means +that @code{sizeof(char)}, @code{sizeof(short)} and @code{sizeof(int)} +are not 1, 2 and 4 respectively, as they should be. Note that the +library should still work properly on 64-bit platforms which follow +the LP64 programming model -- that is, where @code{sizeof(long)} +and @code{sizeof(void*)} are 8. Under LP64, @code{sizeof(int)} is +still 4, so @code{libbzip2}, which doesn't use the @code{long} type, +is OK. +@item BZ_SEQUENCE_ERROR +When using the library, it is important to call the functions in the +correct sequence and with data structures (buffers etc) in the correct +states. @code{libbzip2} checks as much as it can to ensure this is +happening, and returns @code{BZ_SEQUENCE_ERROR} if not. Code which +complies precisely with the function semantics, as detailed below, +should never receive this value; such an event denotes buggy code +which you should investigate. +@item BZ_PARAM_ERROR +Returned when a parameter to a function call is out of range +or otherwise manifestly incorrect. As with @code{BZ_SEQUENCE_ERROR}, +this denotes a bug in the client code. The distinction between +@code{BZ_PARAM_ERROR} and @code{BZ_SEQUENCE_ERROR} is a bit hazy, but still worth +making. +@item BZ_MEM_ERROR +Returned when a request to allocate memory failed. Note that the +quantity of memory needed to decompress a stream cannot be determined +until the stream's header has been read. So @code{BZ2_bzDecompress} and +@code{BZ2_bzRead} may return @code{BZ_MEM_ERROR} even though some of +the compressed data has been read. The same is not true for +compression; once @code{BZ2_bzCompressInit} or @code{BZ2_bzWriteOpen} have +successfully completed, @code{BZ_MEM_ERROR} cannot occur. +@item BZ_DATA_ERROR +Returned when a data integrity error is detected during decompression. +Most importantly, this means when stored and computed CRCs for the +data do not match. This value is also returned upon detection of any +other anomaly in the compressed data. +@item BZ_DATA_ERROR_MAGIC +As a special case of @code{BZ_DATA_ERROR}, it is sometimes useful to +know when the compressed stream does not start with the correct +magic bytes (@code{'B' 'Z' 'h'}). +@item BZ_IO_ERROR +Returned by @code{BZ2_bzRead} and @code{BZ2_bzWrite} when there is an error +reading or writing in the compressed file, and by @code{BZ2_bzReadOpen} +and @code{BZ2_bzWriteOpen} for attempts to use a file for which the +error indicator (viz, @code{ferror(f)}) is set. +On receipt of @code{BZ_IO_ERROR}, the caller should consult +@code{errno} and/or @code{perror} to acquire operating-system +specific information about the problem. +@item BZ_UNEXPECTED_EOF +Returned by @code{BZ2_bzRead} when the compressed file finishes +before the logical end of stream is detected. +@item BZ_OUTBUFF_FULL +Returned by @code{BZ2_bzBuffToBuffCompress} and +@code{BZ2_bzBuffToBuffDecompress} to indicate that the output data +will not fit into the output buffer provided. +@end table + + + +@section Low-level interface + +@subsection @code{BZ2_bzCompressInit} +@example +typedef + struct @{ + char *next_in; + unsigned int avail_in; + unsigned int total_in_lo32; + unsigned int total_in_hi32; + + char *next_out; + unsigned int avail_out; + unsigned int total_out_lo32; + unsigned int total_out_hi32; + + void *state; + + void *(*bzalloc)(void *,int,int); + void (*bzfree)(void *,void *); + void *opaque; + @} + bz_stream; + +int BZ2_bzCompressInit ( bz_stream *strm, + int blockSize100k, + int verbosity, + int workFactor ); + +@end example + +Prepares for compression. The @code{bz_stream} structure +holds all data pertaining to the compression activity. +A @code{bz_stream} structure should be allocated and initialised +prior to the call. +The fields of @code{bz_stream} +comprise the entirety of the user-visible data. @code{state} +is a pointer to the private data structures required for compression. + +Custom memory allocators are supported, via fields @code{bzalloc}, +@code{bzfree}, +and @code{opaque}. The value +@code{opaque} is passed to as the first argument to +all calls to @code{bzalloc} and @code{bzfree}, but is +otherwise ignored by the library. +The call @code{bzalloc ( opaque, n, m )} is expected to return a +pointer @code{p} to +@code{n * m} bytes of memory, and @code{bzfree ( opaque, p )} +should free +that memory. + +If you don't want to use a custom memory allocator, set @code{bzalloc}, +@code{bzfree} and +@code{opaque} to @code{NULL}, +and the library will then use the standard @code{malloc}/@code{free} +routines. + +Before calling @code{BZ2_bzCompressInit}, fields @code{bzalloc}, +@code{bzfree} and @code{opaque} should +be filled appropriately, as just described. Upon return, the internal +state will have been allocated and initialised, and @code{total_in_lo32}, +@code{total_in_hi32}, @code{total_out_lo32} and +@code{total_out_hi32} will have been set to zero. +These four fields are used by the library +to inform the caller of the total amount of data passed into and out of +the library, respectively. You should not try to change them. +As of version 1.0, 64-bit counts are maintained, even on 32-bit +platforms, using the @code{_hi32} fields to store the upper 32 bits +of the count. So, for example, the total amount of data in +is @code{(total_in_hi32 << 32) + total_in_lo32}. + +Parameter @code{blockSize100k} specifies the block size to be used for +compression. It should be a value between 1 and 9 inclusive, and the +actual block size used is 100000 x this figure. 9 gives the best +compression but takes most memory. + +Parameter @code{verbosity} should be set to a number between 0 and 4 +inclusive. 0 is silent, and greater numbers give increasingly verbose +monitoring/debugging output. If the library has been compiled with +@code{-DBZ_NO_STDIO}, no such output will appear for any verbosity +setting. + +Parameter @code{workFactor} controls how the compression phase behaves +when presented with worst case, highly repetitive, input data. If +compression runs into difficulties caused by repetitive data, the +library switches from the standard sorting algorithm to a fallback +algorithm. The fallback is slower than the standard algorithm by +perhaps a factor of three, but always behaves reasonably, no matter how +bad the input. + +Lower values of @code{workFactor} reduce the amount of effort the +standard algorithm will expend before resorting to the fallback. You +should set this parameter carefully; too low, and many inputs will be +handled by the fallback algorithm and so compress rather slowly, too +high, and your average-to-worst case compression times can become very +large. The default value of 30 gives reasonable behaviour over a wide +range of circumstances. + +Allowable values range from 0 to 250 inclusive. 0 is a special case, +equivalent to using the default value of 30. + +Note that the compressed output generated is the same regardless of +whether or not the fallback algorithm is used. + +Be aware also that this parameter may disappear entirely in future +versions of the library. In principle it should be possible to devise a +good way to automatically choose which algorithm to use. Such a +mechanism would render the parameter obsolete. + +Possible return values: +@display + @code{BZ_CONFIG_ERROR} + if the library has been mis-compiled + @code{BZ_PARAM_ERROR} + if @code{strm} is @code{NULL} + or @code{blockSize} < 1 or @code{blockSize} > 9 + or @code{verbosity} < 0 or @code{verbosity} > 4 + or @code{workFactor} < 0 or @code{workFactor} > 250 + @code{BZ_MEM_ERROR} + if not enough memory is available + @code{BZ_OK} + otherwise +@end display +Allowable next actions: +@display + @code{BZ2_bzCompress} + if @code{BZ_OK} is returned + no specific action needed in case of error +@end display + +@subsection @code{BZ2_bzCompress} +@example + int BZ2_bzCompress ( bz_stream *strm, int action ); +@end example +Provides more input and/or output buffer space for the library. The +caller maintains input and output buffers, and calls @code{BZ2_bzCompress} to +transfer data between them. + +Before each call to @code{BZ2_bzCompress}, @code{next_in} should point at +the data to be compressed, and @code{avail_in} should indicate how many +bytes the library may read. @code{BZ2_bzCompress} updates @code{next_in}, +@code{avail_in} and @code{total_in} to reflect the number of bytes it +has read. + +Similarly, @code{next_out} should point to a buffer in which the +compressed data is to be placed, with @code{avail_out} indicating how +much output space is available. @code{BZ2_bzCompress} updates +@code{next_out}, @code{avail_out} and @code{total_out} to reflect the +number of bytes output. + +You may provide and remove as little or as much data as you like on each +call of @code{BZ2_bzCompress}. In the limit, it is acceptable to supply and +remove data one byte at a time, although this would be terribly +inefficient. You should always ensure that at least one byte of output +space is available at each call. + +A second purpose of @code{BZ2_bzCompress} is to request a change of mode of the +compressed stream. + +Conceptually, a compressed stream can be in one of four states: IDLE, +RUNNING, FLUSHING and FINISHING. Before initialisation +(@code{BZ2_bzCompressInit}) and after termination (@code{BZ2_bzCompressEnd}), a +stream is regarded as IDLE. + +Upon initialisation (@code{BZ2_bzCompressInit}), the stream is placed in the +RUNNING state. Subsequent calls to @code{BZ2_bzCompress} should pass +@code{BZ_RUN} as the requested action; other actions are illegal and +will result in @code{BZ_SEQUENCE_ERROR}. + +At some point, the calling program will have provided all the input data +it wants to. It will then want to finish up -- in effect, asking the +library to process any data it might have buffered internally. In this +state, @code{BZ2_bzCompress} will no longer attempt to read data from +@code{next_in}, but it will want to write data to @code{next_out}. +Because the output buffer supplied by the user can be arbitrarily small, +the finishing-up operation cannot necessarily be done with a single call +of @code{BZ2_bzCompress}. + +Instead, the calling program passes @code{BZ_FINISH} as an action to +@code{BZ2_bzCompress}. This changes the stream's state to FINISHING. Any +remaining input (ie, @code{next_in[0 .. avail_in-1]}) is compressed and +transferred to the output buffer. To do this, @code{BZ2_bzCompress} must be +called repeatedly until all the output has been consumed. At that +point, @code{BZ2_bzCompress} returns @code{BZ_STREAM_END}, and the stream's +state is set back to IDLE. @code{BZ2_bzCompressEnd} should then be +called. + +Just to make sure the calling program does not cheat, the library makes +a note of @code{avail_in} at the time of the first call to +@code{BZ2_bzCompress} which has @code{BZ_FINISH} as an action (ie, at the +time the program has announced its intention to not supply any more +input). By comparing this value with that of @code{avail_in} over +subsequent calls to @code{BZ2_bzCompress}, the library can detect any +attempts to slip in more data to compress. Any calls for which this is +detected will return @code{BZ_SEQUENCE_ERROR}. This indicates a +programming mistake which should be corrected. + +Instead of asking to finish, the calling program may ask +@code{BZ2_bzCompress} to take all the remaining input, compress it and +terminate the current (Burrows-Wheeler) compression block. This could +be useful for error control purposes. The mechanism is analogous to +that for finishing: call @code{BZ2_bzCompress} with an action of +@code{BZ_FLUSH}, remove output data, and persist with the +@code{BZ_FLUSH} action until the value @code{BZ_RUN} is returned. As +with finishing, @code{BZ2_bzCompress} detects any attempt to provide more +input data once the flush has begun. + +Once the flush is complete, the stream returns to the normal RUNNING +state. + +This all sounds pretty complex, but isn't really. Here's a table +which shows which actions are allowable in each state, what action +will be taken, what the next state is, and what the non-error return +values are. Note that you can't explicitly ask what state the +stream is in, but nor do you need to -- it can be inferred from the +values returned by @code{BZ2_bzCompress}. +@display +IDLE/@code{any} + Illegal. IDLE state only exists after @code{BZ2_bzCompressEnd} or + before @code{BZ2_bzCompressInit}. + Return value = @code{BZ_SEQUENCE_ERROR} + +RUNNING/@code{BZ_RUN} + Compress from @code{next_in} to @code{next_out} as much as possible. + Next state = RUNNING + Return value = @code{BZ_RUN_OK} + +RUNNING/@code{BZ_FLUSH} + Remember current value of @code{next_in}. Compress from @code{next_in} + to @code{next_out} as much as possible, but do not accept any more input. + Next state = FLUSHING + Return value = @code{BZ_FLUSH_OK} + +RUNNING/@code{BZ_FINISH} + Remember current value of @code{next_in}. Compress from @code{next_in} + to @code{next_out} as much as possible, but do not accept any more input. + Next state = FINISHING + Return value = @code{BZ_FINISH_OK} + +FLUSHING/@code{BZ_FLUSH} + Compress from @code{next_in} to @code{next_out} as much as possible, + but do not accept any more input. + If all the existing input has been used up and all compressed + output has been removed + Next state = RUNNING; Return value = @code{BZ_RUN_OK} + else + Next state = FLUSHING; Return value = @code{BZ_FLUSH_OK} + +FLUSHING/other + Illegal. + Return value = @code{BZ_SEQUENCE_ERROR} + +FINISHING/@code{BZ_FINISH} + Compress from @code{next_in} to @code{next_out} as much as possible, + but to not accept any more input. + If all the existing input has been used up and all compressed + output has been removed + Next state = IDLE; Return value = @code{BZ_STREAM_END} + else + Next state = FINISHING; Return value = @code{BZ_FINISHING} + +FINISHING/other + Illegal. + Return value = @code{BZ_SEQUENCE_ERROR} +@end display + +That still looks complicated? Well, fair enough. The usual sequence +of calls for compressing a load of data is: +@itemize @bullet +@item Get started with @code{BZ2_bzCompressInit}. +@item Shovel data in and shlurp out its compressed form using zero or more +calls of @code{BZ2_bzCompress} with action = @code{BZ_RUN}. +@item Finish up. +Repeatedly call @code{BZ2_bzCompress} with action = @code{BZ_FINISH}, +copying out the compressed output, until @code{BZ_STREAM_END} is returned. +@item Close up and go home. Call @code{BZ2_bzCompressEnd}. +@end itemize +If the data you want to compress fits into your input buffer all +at once, you can skip the calls of @code{BZ2_bzCompress ( ..., BZ_RUN )} and +just do the @code{BZ2_bzCompress ( ..., BZ_FINISH )} calls. + +All required memory is allocated by @code{BZ2_bzCompressInit}. The +compression library can accept any data at all (obviously). So you +shouldn't get any error return values from the @code{BZ2_bzCompress} calls. +If you do, they will be @code{BZ_SEQUENCE_ERROR}, and indicate a bug in +your programming. + +Trivial other possible return values: +@display + @code{BZ_PARAM_ERROR} + if @code{strm} is @code{NULL}, or @code{strm->s} is @code{NULL} +@end display + +@subsection @code{BZ2_bzCompressEnd} +@example +int BZ2_bzCompressEnd ( bz_stream *strm ); +@end example +Releases all memory associated with a compression stream. + +Possible return values: +@display + @code{BZ_PARAM_ERROR} if @code{strm} is @code{NULL} or @code{strm->s} is @code{NULL} + @code{BZ_OK} otherwise +@end display + + +@subsection @code{BZ2_bzDecompressInit} +@example +int BZ2_bzDecompressInit ( bz_stream *strm, int verbosity, int small ); +@end example +Prepares for decompression. As with @code{BZ2_bzCompressInit}, a +@code{bz_stream} record should be allocated and initialised before the +call. Fields @code{bzalloc}, @code{bzfree} and @code{opaque} should be +set if a custom memory allocator is required, or made @code{NULL} for +the normal @code{malloc}/@code{free} routines. Upon return, the internal +state will have been initialised, and @code{total_in} and +@code{total_out} will be zero. + +For the meaning of parameter @code{verbosity}, see @code{BZ2_bzCompressInit}. + +If @code{small} is nonzero, the library will use an alternative +decompression algorithm which uses less memory but at the cost of +decompressing more slowly (roughly speaking, half the speed, but the +maximum memory requirement drops to around 2300k). See Chapter 2 for +more information on memory management. + +Note that the amount of memory needed to decompress +a stream cannot be determined until the stream's header has been read, +so even if @code{BZ2_bzDecompressInit} succeeds, a subsequent +@code{BZ2_bzDecompress} could fail with @code{BZ_MEM_ERROR}. + +Possible return values: +@display + @code{BZ_CONFIG_ERROR} + if the library has been mis-compiled + @code{BZ_PARAM_ERROR} + if @code{(small != 0 && small != 1)} + or @code{(verbosity < 0 || verbosity > 4)} + @code{BZ_MEM_ERROR} + if insufficient memory is available +@end display + +Allowable next actions: +@display + @code{BZ2_bzDecompress} + if @code{BZ_OK} was returned + no specific action required in case of error +@end display + + + +@subsection @code{BZ2_bzDecompress} +@example +int BZ2_bzDecompress ( bz_stream *strm ); +@end example +Provides more input and/out output buffer space for the library. The +caller maintains input and output buffers, and uses @code{BZ2_bzDecompress} +to transfer data between them. + +Before each call to @code{BZ2_bzDecompress}, @code{next_in} +should point at the compressed data, +and @code{avail_in} should indicate how many bytes the library +may read. @code{BZ2_bzDecompress} updates @code{next_in}, @code{avail_in} +and @code{total_in} +to reflect the number of bytes it has read. + +Similarly, @code{next_out} should point to a buffer in which the uncompressed +output is to be placed, with @code{avail_out} indicating how much output space +is available. @code{BZ2_bzCompress} updates @code{next_out}, +@code{avail_out} and @code{total_out} to reflect +the number of bytes output. + +You may provide and remove as little or as much data as you like on +each call of @code{BZ2_bzDecompress}. +In the limit, it is acceptable to +supply and remove data one byte at a time, although this would be +terribly inefficient. You should always ensure that at least one +byte of output space is available at each call. + +Use of @code{BZ2_bzDecompress} is simpler than @code{BZ2_bzCompress}. + +You should provide input and remove output as described above, and +repeatedly call @code{BZ2_bzDecompress} until @code{BZ_STREAM_END} is +returned. Appearance of @code{BZ_STREAM_END} denotes that +@code{BZ2_bzDecompress} has detected the logical end of the compressed +stream. @code{BZ2_bzDecompress} will not produce @code{BZ_STREAM_END} until +all output data has been placed into the output buffer, so once +@code{BZ_STREAM_END} appears, you are guaranteed to have available all +the decompressed output, and @code{BZ2_bzDecompressEnd} can safely be +called. + +If case of an error return value, you should call @code{BZ2_bzDecompressEnd} +to clean up and release memory. + +Possible return values: +@display + @code{BZ_PARAM_ERROR} + if @code{strm} is @code{NULL} or @code{strm->s} is @code{NULL} + or @code{strm->avail_out < 1} + @code{BZ_DATA_ERROR} + if a data integrity error is detected in the compressed stream + @code{BZ_DATA_ERROR_MAGIC} + if the compressed stream doesn't begin with the right magic bytes + @code{BZ_MEM_ERROR} + if there wasn't enough memory available + @code{BZ_STREAM_END} + if the logical end of the data stream was detected and all + output in has been consumed, eg @code{s->avail_out > 0} + @code{BZ_OK} + otherwise +@end display +Allowable next actions: +@display + @code{BZ2_bzDecompress} + if @code{BZ_OK} was returned + @code{BZ2_bzDecompressEnd} + otherwise +@end display + + +@subsection @code{BZ2_bzDecompressEnd} +@example +int BZ2_bzDecompressEnd ( bz_stream *strm ); +@end example +Releases all memory associated with a decompression stream. + +Possible return values: +@display + @code{BZ_PARAM_ERROR} + if @code{strm} is @code{NULL} or @code{strm->s} is @code{NULL} + @code{BZ_OK} + otherwise +@end display + +Allowable next actions: +@display + None. +@end display + + +@section High-level interface + +This interface provides functions for reading and writing +@code{bzip2} format files. First, some general points. + +@itemize @bullet +@item All of the functions take an @code{int*} first argument, + @code{bzerror}. + After each call, @code{bzerror} should be consulted first to determine + the outcome of the call. If @code{bzerror} is @code{BZ_OK}, + the call completed + successfully, and only then should the return value of the function + (if any) be consulted. If @code{bzerror} is @code{BZ_IO_ERROR}, + there was an error + reading/writing the underlying compressed file, and you should + then consult @code{errno}/@code{perror} to determine the + cause of the difficulty. + @code{bzerror} may also be set to various other values; precise details are + given on a per-function basis below. +@item If @code{bzerror} indicates an error + (ie, anything except @code{BZ_OK} and @code{BZ_STREAM_END}), + you should immediately call @code{BZ2_bzReadClose} (or @code{BZ2_bzWriteClose}, + depending on whether you are attempting to read or to write) + to free up all resources associated + with the stream. Once an error has been indicated, behaviour of all calls + except @code{BZ2_bzReadClose} (@code{BZ2_bzWriteClose}) is undefined. + The implication is that (1) @code{bzerror} should + be checked after each call, and (2) if @code{bzerror} indicates an error, + @code{BZ2_bzReadClose} (@code{BZ2_bzWriteClose}) should then be called to clean up. +@item The @code{FILE*} arguments passed to + @code{BZ2_bzReadOpen}/@code{BZ2_bzWriteOpen} + should be set to binary mode. + Most Unix systems will do this by default, but other platforms, + including Windows and Mac, will not. If you omit this, you may + encounter problems when moving code to new platforms. +@item Memory allocation requests are handled by + @code{malloc}/@code{free}. + At present + there is no facility for user-defined memory allocators in the file I/O + functions (could easily be added, though). +@end itemize + + + +@subsection @code{BZ2_bzReadOpen} +@example + typedef void BZFILE; + + BZFILE *BZ2_bzReadOpen ( int *bzerror, FILE *f, + int small, int verbosity, + void *unused, int nUnused ); +@end example +Prepare to read compressed data from file handle @code{f}. @code{f} +should refer to a file which has been opened for reading, and for which +the error indicator (@code{ferror(f)})is not set. If @code{small} is 1, +the library will try to decompress using less memory, at the expense of +speed. + +For reasons explained below, @code{BZ2_bzRead} will decompress the +@code{nUnused} bytes starting at @code{unused}, before starting to read +from the file @code{f}. At most @code{BZ_MAX_UNUSED} bytes may be +supplied like this. If this facility is not required, you should pass +@code{NULL} and @code{0} for @code{unused} and n@code{Unused} +respectively. + +For the meaning of parameters @code{small} and @code{verbosity}, +see @code{BZ2_bzDecompressInit}. + +The amount of memory needed to decompress a file cannot be determined +until the file's header has been read. So it is possible that +@code{BZ2_bzReadOpen} returns @code{BZ_OK} but a subsequent call of +@code{BZ2_bzRead} will return @code{BZ_MEM_ERROR}. + +Possible assignments to @code{bzerror}: +@display + @code{BZ_CONFIG_ERROR} + if the library has been mis-compiled + @code{BZ_PARAM_ERROR} + if @code{f} is @code{NULL} + or @code{small} is neither @code{0} nor @code{1} + or @code{(unused == NULL && nUnused != 0)} + or @code{(unused != NULL && !(0 <= nUnused <= BZ_MAX_UNUSED))} + @code{BZ_IO_ERROR} + if @code{ferror(f)} is nonzero + @code{BZ_MEM_ERROR} + if insufficient memory is available + @code{BZ_OK} + otherwise. +@end display + +Possible return values: +@display + Pointer to an abstract @code{BZFILE} + if @code{bzerror} is @code{BZ_OK} + @code{NULL} + otherwise +@end display + +Allowable next actions: +@display + @code{BZ2_bzRead} + if @code{bzerror} is @code{BZ_OK} + @code{BZ2_bzClose} + otherwise +@end display + + +@subsection @code{BZ2_bzRead} +@example + int BZ2_bzRead ( int *bzerror, BZFILE *b, void *buf, int len ); +@end example +Reads up to @code{len} (uncompressed) bytes from the compressed file +@code{b} into +the buffer @code{buf}. If the read was successful, +@code{bzerror} is set to @code{BZ_OK} +and the number of bytes read is returned. If the logical end-of-stream +was detected, @code{bzerror} will be set to @code{BZ_STREAM_END}, +and the number +of bytes read is returned. All other @code{bzerror} values denote an error. + +@code{BZ2_bzRead} will supply @code{len} bytes, +unless the logical stream end is detected +or an error occurs. Because of this, it is possible to detect the +stream end by observing when the number of bytes returned is +less than the number +requested. Nevertheless, this is regarded as inadvisable; you should +instead check @code{bzerror} after every call and watch out for +@code{BZ_STREAM_END}. + +Internally, @code{BZ2_bzRead} copies data from the compressed file in chunks +of size @code{BZ_MAX_UNUSED} bytes +before decompressing it. If the file contains more bytes than strictly +needed to reach the logical end-of-stream, @code{BZ2_bzRead} will almost certainly +read some of the trailing data before signalling @code{BZ_SEQUENCE_END}. +To collect the read but unused data once @code{BZ_SEQUENCE_END} has +appeared, call @code{BZ2_bzReadGetUnused} immediately before @code{BZ2_bzReadClose}. + +Possible assignments to @code{bzerror}: +@display + @code{BZ_PARAM_ERROR} + if @code{b} is @code{NULL} or @code{buf} is @code{NULL} or @code{len < 0} + @code{BZ_SEQUENCE_ERROR} + if @code{b} was opened with @code{BZ2_bzWriteOpen} + @code{BZ_IO_ERROR} + if there is an error reading from the compressed file + @code{BZ_UNEXPECTED_EOF} + if the compressed file ended before the logical end-of-stream was detected + @code{BZ_DATA_ERROR} + if a data integrity error was detected in the compressed stream + @code{BZ_DATA_ERROR_MAGIC} + if the stream does not begin with the requisite header bytes (ie, is not + a @code{bzip2} data file). This is really a special case of @code{BZ_DATA_ERROR}. + @code{BZ_MEM_ERROR} + if insufficient memory was available + @code{BZ_STREAM_END} + if the logical end of stream was detected. + @code{BZ_OK} + otherwise. +@end display + +Possible return values: +@display + number of bytes read + if @code{bzerror} is @code{BZ_OK} or @code{BZ_STREAM_END} + undefined + otherwise +@end display + +Allowable next actions: +@display + collect data from @code{buf}, then @code{BZ2_bzRead} or @code{BZ2_bzReadClose} + if @code{bzerror} is @code{BZ_OK} + collect data from @code{buf}, then @code{BZ2_bzReadClose} or @code{BZ2_bzReadGetUnused} + if @code{bzerror} is @code{BZ_SEQUENCE_END} + @code{BZ2_bzReadClose} + otherwise +@end display + + + +@subsection @code{BZ2_bzReadGetUnused} +@example + void BZ2_bzReadGetUnused ( int* bzerror, BZFILE *b, + void** unused, int* nUnused ); +@end example +Returns data which was read from the compressed file but was not needed +to get to the logical end-of-stream. @code{*unused} is set to the address +of the data, and @code{*nUnused} to the number of bytes. @code{*nUnused} will +be set to a value between @code{0} and @code{BZ_MAX_UNUSED} inclusive. + +This function may only be called once @code{BZ2_bzRead} has signalled +@code{BZ_STREAM_END} but before @code{BZ2_bzReadClose}. + +Possible assignments to @code{bzerror}: +@display + @code{BZ_PARAM_ERROR} + if @code{b} is @code{NULL} + or @code{unused} is @code{NULL} or @code{nUnused} is @code{NULL} + @code{BZ_SEQUENCE_ERROR} + if @code{BZ_STREAM_END} has not been signalled + or if @code{b} was opened with @code{BZ2_bzWriteOpen} + @code{BZ_OK} + otherwise +@end display + +Allowable next actions: +@display + @code{BZ2_bzReadClose} +@end display + + +@subsection @code{BZ2_bzReadClose} +@example + void BZ2_bzReadClose ( int *bzerror, BZFILE *b ); +@end example +Releases all memory pertaining to the compressed file @code{b}. +@code{BZ2_bzReadClose} does not call @code{fclose} on the underlying file +handle, so you should do that yourself if appropriate. +@code{BZ2_bzReadClose} should be called to clean up after all error +situations. + +Possible assignments to @code{bzerror}: +@display + @code{BZ_SEQUENCE_ERROR} + if @code{b} was opened with @code{BZ2_bzOpenWrite} + @code{BZ_OK} + otherwise +@end display + +Allowable next actions: +@display + none +@end display + + + +@subsection @code{BZ2_bzWriteOpen} +@example + BZFILE *BZ2_bzWriteOpen ( int *bzerror, FILE *f, + int blockSize100k, int verbosity, + int workFactor ); +@end example +Prepare to write compressed data to file handle @code{f}. +@code{f} should refer to +a file which has been opened for writing, and for which the error +indicator (@code{ferror(f)})is not set. + +For the meaning of parameters @code{blockSize100k}, +@code{verbosity} and @code{workFactor}, see +@* @code{BZ2_bzCompressInit}. + +All required memory is allocated at this stage, so if the call +completes successfully, @code{BZ_MEM_ERROR} cannot be signalled by a +subsequent call to @code{BZ2_bzWrite}. + +Possible assignments to @code{bzerror}: +@display + @code{BZ_CONFIG_ERROR} + if the library has been mis-compiled + @code{BZ_PARAM_ERROR} + if @code{f} is @code{NULL} + or @code{blockSize100k < 1} or @code{blockSize100k > 9} + @code{BZ_IO_ERROR} + if @code{ferror(f)} is nonzero + @code{BZ_MEM_ERROR} + if insufficient memory is available + @code{BZ_OK} + otherwise +@end display + +Possible return values: +@display + Pointer to an abstract @code{BZFILE} + if @code{bzerror} is @code{BZ_OK} + @code{NULL} + otherwise +@end display + +Allowable next actions: +@display + @code{BZ2_bzWrite} + if @code{bzerror} is @code{BZ_OK} + (you could go directly to @code{BZ2_bzWriteClose}, but this would be pretty pointless) + @code{BZ2_bzWriteClose} + otherwise +@end display + + + +@subsection @code{BZ2_bzWrite} +@example + void BZ2_bzWrite ( int *bzerror, BZFILE *b, void *buf, int len ); +@end example +Absorbs @code{len} bytes from the buffer @code{buf}, eventually to be +compressed and written to the file. + +Possible assignments to @code{bzerror}: +@display + @code{BZ_PARAM_ERROR} + if @code{b} is @code{NULL} or @code{buf} is @code{NULL} or @code{len < 0} + @code{BZ_SEQUENCE_ERROR} + if b was opened with @code{BZ2_bzReadOpen} + @code{BZ_IO_ERROR} + if there is an error writing the compressed file. + @code{BZ_OK} + otherwise +@end display + + + + +@subsection @code{BZ2_bzWriteClose} +@example + void BZ2_bzWriteClose ( int *bzerror, BZFILE* f, + int abandon, + unsigned int* nbytes_in, + unsigned int* nbytes_out ); + + void BZ2_bzWriteClose64 ( int *bzerror, BZFILE* f, + int abandon, + unsigned int* nbytes_in_lo32, + unsigned int* nbytes_in_hi32, + unsigned int* nbytes_out_lo32, + unsigned int* nbytes_out_hi32 ); +@end example + +Compresses and flushes to the compressed file all data so far supplied +by @code{BZ2_bzWrite}. The logical end-of-stream markers are also written, so +subsequent calls to @code{BZ2_bzWrite} are illegal. All memory associated +with the compressed file @code{b} is released. +@code{fflush} is called on the +compressed file, but it is not @code{fclose}'d. + +If @code{BZ2_bzWriteClose} is called to clean up after an error, the only +action is to release the memory. The library records the error codes +issued by previous calls, so this situation will be detected +automatically. There is no attempt to complete the compression +operation, nor to @code{fflush} the compressed file. You can force this +behaviour to happen even in the case of no error, by passing a nonzero +value to @code{abandon}. + +If @code{nbytes_in} is non-null, @code{*nbytes_in} will be set to be the +total volume of uncompressed data handled. Similarly, @code{nbytes_out} +will be set to the total volume of compressed data written. For +compatibility with older versions of the library, @code{BZ2_bzWriteClose} +only yields the lower 32 bits of these counts. Use +@code{BZ2_bzWriteClose64} if you want the full 64 bit counts. These +two functions are otherwise absolutely identical. + + +Possible assignments to @code{bzerror}: +@display + @code{BZ_SEQUENCE_ERROR} + if @code{b} was opened with @code{BZ2_bzReadOpen} + @code{BZ_IO_ERROR} + if there is an error writing the compressed file + @code{BZ_OK} + otherwise +@end display + +@subsection Handling embedded compressed data streams + +The high-level library facilitates use of +@code{bzip2} data streams which form some part of a surrounding, larger +data stream. +@itemize @bullet +@item For writing, the library takes an open file handle, writes +compressed data to it, @code{fflush}es it but does not @code{fclose} it. +The calling application can write its own data before and after the +compressed data stream, using that same file handle. +@item Reading is more complex, and the facilities are not as general +as they could be since generality is hard to reconcile with efficiency. +@code{BZ2_bzRead} reads from the compressed file in blocks of size +@code{BZ_MAX_UNUSED} bytes, and in doing so probably will overshoot +the logical end of compressed stream. +To recover this data once decompression has +ended, call @code{BZ2_bzReadGetUnused} after the last call of @code{BZ2_bzRead} +(the one returning @code{BZ_STREAM_END}) but before calling +@code{BZ2_bzReadClose}. +@end itemize + +This mechanism makes it easy to decompress multiple @code{bzip2} +streams placed end-to-end. As the end of one stream, when @code{BZ2_bzRead} +returns @code{BZ_STREAM_END}, call @code{BZ2_bzReadGetUnused} to collect the +unused data (copy it into your own buffer somewhere). +That data forms the start of the next compressed stream. +To start uncompressing that next stream, call @code{BZ2_bzReadOpen} again, +feeding in the unused data via the @code{unused}/@code{nUnused} +parameters. +Keep doing this until @code{BZ_STREAM_END} return coincides with the +physical end of file (@code{feof(f)}). In this situation +@code{BZ2_bzReadGetUnused} +will of course return no data. + +This should give some feel for how the high-level interface can be used. +If you require extra flexibility, you'll have to bite the bullet and get +to grips with the low-level interface. + +@subsection Standard file-reading/writing code +Here's how you'd write data to a compressed file: +@example @code +FILE* f; +BZFILE* b; +int nBuf; +char buf[ /* whatever size you like */ ]; +int bzerror; +int nWritten; + +f = fopen ( "myfile.bz2", "w" ); +if (!f) @{ + /* handle error */ +@} +b = BZ2_bzWriteOpen ( &bzerror, f, 9 ); +if (bzerror != BZ_OK) @{ + BZ2_bzWriteClose ( b ); + /* handle error */ +@} + +while ( /* condition */ ) @{ + /* get data to write into buf, and set nBuf appropriately */ + nWritten = BZ2_bzWrite ( &bzerror, b, buf, nBuf ); + if (bzerror == BZ_IO_ERROR) @{ + BZ2_bzWriteClose ( &bzerror, b ); + /* handle error */ + @} +@} + +BZ2_bzWriteClose ( &bzerror, b ); +if (bzerror == BZ_IO_ERROR) @{ + /* handle error */ +@} +@end example +And to read from a compressed file: +@example +FILE* f; +BZFILE* b; +int nBuf; +char buf[ /* whatever size you like */ ]; +int bzerror; +int nWritten; + +f = fopen ( "myfile.bz2", "r" ); +if (!f) @{ + /* handle error */ +@} +b = BZ2_bzReadOpen ( &bzerror, f, 0, NULL, 0 ); +if (bzerror != BZ_OK) @{ + BZ2_bzReadClose ( &bzerror, b ); + /* handle error */ +@} + +bzerror = BZ_OK; +while (bzerror == BZ_OK && /* arbitrary other conditions */) @{ + nBuf = BZ2_bzRead ( &bzerror, b, buf, /* size of buf */ ); + if (bzerror == BZ_OK) @{ + /* do something with buf[0 .. nBuf-1] */ + @} +@} +if (bzerror != BZ_STREAM_END) @{ + BZ2_bzReadClose ( &bzerror, b ); + /* handle error */ +@} else @{ + BZ2_bzReadClose ( &bzerror ); +@} +@end example + + + +@section Utility functions +@subsection @code{BZ2_bzBuffToBuffCompress} +@example + int BZ2_bzBuffToBuffCompress( char* dest, + unsigned int* destLen, + char* source, + unsigned int sourceLen, + int blockSize100k, + int verbosity, + int workFactor ); +@end example +Attempts to compress the data in @code{source[0 .. sourceLen-1]} +into the destination buffer, @code{dest[0 .. *destLen-1]}. +If the destination buffer is big enough, @code{*destLen} is +set to the size of the compressed data, and @code{BZ_OK} is +returned. If the compressed data won't fit, @code{*destLen} +is unchanged, and @code{BZ_OUTBUFF_FULL} is returned. + +Compression in this manner is a one-shot event, done with a single call +to this function. The resulting compressed data is a complete +@code{bzip2} format data stream. There is no mechanism for making +additional calls to provide extra input data. If you want that kind of +mechanism, use the low-level interface. + +For the meaning of parameters @code{blockSize100k}, @code{verbosity} +and @code{workFactor}, @* see @code{BZ2_bzCompressInit}. + +To guarantee that the compressed data will fit in its buffer, allocate +an output buffer of size 1% larger than the uncompressed data, plus +six hundred extra bytes. + +@code{BZ2_bzBuffToBuffDecompress} will not write data at or +beyond @code{dest[*destLen]}, even in case of buffer overflow. + +Possible return values: +@display + @code{BZ_CONFIG_ERROR} + if the library has been mis-compiled + @code{BZ_PARAM_ERROR} + if @code{dest} is @code{NULL} or @code{destLen} is @code{NULL} + or @code{blockSize100k < 1} or @code{blockSize100k > 9} + or @code{verbosity < 0} or @code{verbosity > 4} + or @code{workFactor < 0} or @code{workFactor > 250} + @code{BZ_MEM_ERROR} + if insufficient memory is available + @code{BZ_OUTBUFF_FULL} + if the size of the compressed data exceeds @code{*destLen} + @code{BZ_OK} + otherwise +@end display + + + +@subsection @code{BZ2_bzBuffToBuffDecompress} +@example + int BZ2_bzBuffToBuffDecompress ( char* dest, + unsigned int* destLen, + char* source, + unsigned int sourceLen, + int small, + int verbosity ); +@end example +Attempts to decompress the data in @code{source[0 .. sourceLen-1]} +into the destination buffer, @code{dest[0 .. *destLen-1]}. +If the destination buffer is big enough, @code{*destLen} is +set to the size of the uncompressed data, and @code{BZ_OK} is +returned. If the compressed data won't fit, @code{*destLen} +is unchanged, and @code{BZ_OUTBUFF_FULL} is returned. + +@code{source} is assumed to hold a complete @code{bzip2} format +data stream. @* @code{BZ2_bzBuffToBuffDecompress} tries to decompress +the entirety of the stream into the output buffer. + +For the meaning of parameters @code{small} and @code{verbosity}, +see @code{BZ2_bzDecompressInit}. + +Because the compression ratio of the compressed data cannot be known in +advance, there is no easy way to guarantee that the output buffer will +be big enough. You may of course make arrangements in your code to +record the size of the uncompressed data, but such a mechanism is beyond +the scope of this library. + +@code{BZ2_bzBuffToBuffDecompress} will not write data at or +beyond @code{dest[*destLen]}, even in case of buffer overflow. + +Possible return values: +@display + @code{BZ_CONFIG_ERROR} + if the library has been mis-compiled + @code{BZ_PARAM_ERROR} + if @code{dest} is @code{NULL} or @code{destLen} is @code{NULL} + or @code{small != 0 && small != 1} + or @code{verbosity < 0} or @code{verbosity > 4} + @code{BZ_MEM_ERROR} + if insufficient memory is available + @code{BZ_OUTBUFF_FULL} + if the size of the compressed data exceeds @code{*destLen} + @code{BZ_DATA_ERROR} + if a data integrity error was detected in the compressed data + @code{BZ_DATA_ERROR_MAGIC} + if the compressed data doesn't begin with the right magic bytes + @code{BZ_UNEXPECTED_EOF} + if the compressed data ends unexpectedly + @code{BZ_OK} + otherwise +@end display + + + +@section @code{zlib} compatibility functions +Yoshioka Tsuneo has contributed some functions to +give better @code{zlib} compatibility. These functions are +@code{BZ2_bzopen}, @code{BZ2_bzread}, @code{BZ2_bzwrite}, @code{BZ2_bzflush}, +@code{BZ2_bzclose}, +@code{BZ2_bzerror} and @code{BZ2_bzlibVersion}. +These functions are not (yet) officially part of +the library. If they break, you get to keep all the pieces. +Nevertheless, I think they work ok. +@example +typedef void BZFILE; + +const char * BZ2_bzlibVersion ( void ); +@end example +Returns a string indicating the library version. +@example +BZFILE * BZ2_bzopen ( const char *path, const char *mode ); +BZFILE * BZ2_bzdopen ( int fd, const char *mode ); +@end example +Opens a @code{.bz2} file for reading or writing, using either its name +or a pre-existing file descriptor. +Analogous to @code{fopen} and @code{fdopen}. +@example +int BZ2_bzread ( BZFILE* b, void* buf, int len ); +int BZ2_bzwrite ( BZFILE* b, void* buf, int len ); +@end example +Reads/writes data from/to a previously opened @code{BZFILE}. +Analogous to @code{fread} and @code{fwrite}. +@example +int BZ2_bzflush ( BZFILE* b ); +void BZ2_bzclose ( BZFILE* b ); +@end example +Flushes/closes a @code{BZFILE}. @code{BZ2_bzflush} doesn't actually do +anything. Analogous to @code{fflush} and @code{fclose}. + +@example +const char * BZ2_bzerror ( BZFILE *b, int *errnum ) +@end example +Returns a string describing the more recent error status of +@code{b}, and also sets @code{*errnum} to its numerical value. + + +@section Using the library in a @code{stdio}-free environment + +@subsection Getting rid of @code{stdio} + +In a deeply embedded application, you might want to use just +the memory-to-memory functions. You can do this conveniently +by compiling the library with preprocessor symbol @code{BZ_NO_STDIO} +defined. Doing this gives you a library containing only the following +eight functions: + +@code{BZ2_bzCompressInit}, @code{BZ2_bzCompress}, @code{BZ2_bzCompressEnd} @* +@code{BZ2_bzDecompressInit}, @code{BZ2_bzDecompress}, @code{BZ2_bzDecompressEnd} @* +@code{BZ2_bzBuffToBuffCompress}, @code{BZ2_bzBuffToBuffDecompress} + +When compiled like this, all functions will ignore @code{verbosity} +settings. + +@subsection Critical error handling +@code{libbzip2} contains a number of internal assertion checks which +should, needless to say, never be activated. Nevertheless, if an +assertion should fail, behaviour depends on whether or not the library +was compiled with @code{BZ_NO_STDIO} set. + +For a normal compile, an assertion failure yields the message +@example + bzip2/libbzip2: internal error number N. + This is a bug in bzip2/libbzip2, 1.0.2, 30-Dec-2001. + Please report it to me at: jseward@@acm.org. If this happened + when you were using some program which uses libbzip2 as a + component, you should also report this bug to the author(s) + of that program. Please make an effort to report this bug; + timely and accurate bug reports eventually lead to higher + quality software. Thanks. Julian Seward, 30 December 2001. +@end example +where @code{N} is some error code number. If @code{N == 1007}, it also +prints some extra text advising the reader that unreliable memory is +often associated with internal error 1007. (This is a +frequently-observed-phenomenon with versions 1.0.0/1.0.1). + +@code{exit(3)} is then called. + +For a @code{stdio}-free library, assertion failures result +in a call to a function declared as: +@example + extern void bz_internal_error ( int errcode ); +@end example +The relevant code is passed as a parameter. You should supply +such a function. + +In either case, once an assertion failure has occurred, any +@code{bz_stream} records involved can be regarded as invalid. +You should not attempt to resume normal operation with them. + +You may, of course, change critical error handling to suit +your needs. As I said above, critical errors indicate bugs +in the library and should not occur. All "normal" error +situations are indicated via error return codes from functions, +and can be recovered from. + + +@section Making a Windows DLL +Everything related to Windows has been contributed by Yoshioka Tsuneo +@* (@code{QWF00133@@niftyserve.or.jp} / +@code{tsuneo-y@@is.aist-nara.ac.jp}), so you should send your queries to +him (but perhaps Cc: me, @code{jseward@@acm.org}). + +My vague understanding of what to do is: using Visual C++ 5.0, +open the project file @code{libbz2.dsp}, and build. That's all. + +If you can't +open the project file for some reason, make a new one, naming these files: +@code{blocksort.c}, @code{bzlib.c}, @code{compress.c}, +@code{crctable.c}, @code{decompress.c}, @code{huffman.c}, @* +@code{randtable.c} and @code{libbz2.def}. You will also need +to name the header files @code{bzlib.h} and @code{bzlib_private.h}. + +If you don't use VC++, you may need to define the proprocessor symbol +@code{_WIN32}. + +Finally, @code{dlltest.c} is a sample program using the DLL. It has a +project file, @code{dlltest.dsp}. + +If you just want a makefile for Visual C, have a look at +@code{makefile.msc}. + +Be aware that if you compile @code{bzip2} itself on Win32, you must set +@code{BZ_UNIX} to 0 and @code{BZ_LCCWIN32} to 1, in the file +@code{bzip2.c}, before compiling. Otherwise the resulting binary won't +work correctly. + +I haven't tried any of this stuff myself, but it all looks plausible. + + + +@chapter Miscellanea + +These are just some random thoughts of mine. Your mileage may +vary. + +@section Limitations of the compressed file format +@code{bzip2-1.0}, @code{0.9.5} and @code{0.9.0} +use exactly the same file format as the previous +version, @code{bzip2-0.1}. This decision was made in the interests of +stability. Creating yet another incompatible compressed file format +would create further confusion and disruption for users. + +Nevertheless, this is not a painless decision. Development +work since the release of @code{bzip2-0.1} in August 1997 +has shown complexities in the file format which slow down +decompression and, in retrospect, are unnecessary. These are: +@itemize @bullet +@item The run-length encoder, which is the first of the + compression transformations, is entirely irrelevant. + The original purpose was to protect the sorting algorithm + from the very worst case input: a string of repeated + symbols. But algorithm steps Q6a and Q6b in the original + Burrows-Wheeler technical report (SRC-124) show how + repeats can be handled without difficulty in block + sorting. +@item The randomisation mechanism doesn't really need to be + there. Udi Manber and Gene Myers published a suffix + array construction algorithm a few years back, which + can be employed to sort any block, no matter how + repetitive, in O(N log N) time. Subsequent work by + Kunihiko Sadakane has produced a derivative O(N (log N)^2) + algorithm which usually outperforms the Manber-Myers + algorithm. + + I could have changed to Sadakane's algorithm, but I find + it to be slower than @code{bzip2}'s existing algorithm for + most inputs, and the randomisation mechanism protects + adequately against bad cases. I didn't think it was + a good tradeoff to make. Partly this is due to the fact + that I was not flooded with email complaints about + @code{bzip2-0.1}'s performance on repetitive data, so + perhaps it isn't a problem for real inputs. + + Probably the best long-term solution, + and the one I have incorporated into 0.9.5 and above, + is to use the existing sorting + algorithm initially, and fall back to a O(N (log N)^2) + algorithm if the standard algorithm gets into difficulties. +@item The compressed file format was never designed to be + handled by a library, and I have had to jump though + some hoops to produce an efficient implementation of + decompression. It's a bit hairy. Try passing + @code{decompress.c} through the C preprocessor + and you'll see what I mean. Much of this complexity + could have been avoided if the compressed size of + each block of data was recorded in the data stream. +@item An Adler-32 checksum, rather than a CRC32 checksum, + would be faster to compute. +@end itemize +It would be fair to say that the @code{bzip2} format was frozen +before I properly and fully understood the performance +consequences of doing so. + +Improvements which I was able to incorporate into +0.9.0, despite using the same file format, are: +@itemize @bullet +@item Single array implementation of the inverse BWT. This + significantly speeds up decompression, presumably + because it reduces the number of cache misses. +@item Faster inverse MTF transform for large MTF values. The + new implementation is based on the notion of sliding blocks + of values. +@item @code{bzip2-0.9.0} now reads and writes files with @code{fread} + and @code{fwrite}; version 0.1 used @code{putc} and @code{getc}. + Duh! Well, you live and learn. + +@end itemize +Further ahead, it would be nice +to be able to do random access into files. This will +require some careful design of compressed file formats. + + + +@section Portability issues +After some consideration, I have decided not to use +GNU @code{autoconf} to configure 0.9.5 or 1.0. + +@code{autoconf}, admirable and wonderful though it is, +mainly assists with portability problems between Unix-like +platforms. But @code{bzip2} doesn't have much in the way +of portability problems on Unix; most of the difficulties appear +when porting to the Mac, or to Microsoft's operating systems. +@code{autoconf} doesn't help in those cases, and brings in a +whole load of new complexity. + +Most people should be able to compile the library and program +under Unix straight out-of-the-box, so to speak, especially +if you have a version of GNU C available. + +There are a couple of @code{__inline__} directives in the code. GNU C +(@code{gcc}) should be able to handle them. If you're not using +GNU C, your C compiler shouldn't see them at all. +If your compiler does, for some reason, see them and doesn't +like them, just @code{#define} @code{__inline__} to be @code{/* */}. One +easy way to do this is to compile with the flag @code{-D__inline__=}, +which should be understood by most Unix compilers. + +If you still have difficulties, try compiling with the macro +@code{BZ_STRICT_ANSI} defined. This should enable you to build the +library in a strictly ANSI compliant environment. Building the program +itself like this is dangerous and not supported, since you remove +@code{bzip2}'s checks against compressing directories, symbolic links, +devices, and other not-really-a-file entities. This could cause +filesystem corruption! + +One other thing: if you create a @code{bzip2} binary for public +distribution, please try and link it statically (@code{gcc -s}). This +avoids all sorts of library-version issues that others may encounter +later on. + +If you build @code{bzip2} on Win32, you must set @code{BZ_UNIX} to 0 and +@code{BZ_LCCWIN32} to 1, in the file @code{bzip2.c}, before compiling. +Otherwise the resulting binary won't work correctly. + + + +@section Reporting bugs +I tried pretty hard to make sure @code{bzip2} is +bug free, both by design and by testing. Hopefully +you'll never need to read this section for real. + +Nevertheless, if @code{bzip2} dies with a segmentation +fault, a bus error or an internal assertion failure, it +will ask you to email me a bug report. Experience with +version 0.1 shows that almost all these problems can +be traced to either compiler bugs or hardware problems. +@itemize @bullet +@item +Recompile the program with no optimisation, and see if it +works. And/or try a different compiler. +I heard all sorts of stories about various flavours +of GNU C (and other compilers) generating bad code for +@code{bzip2}, and I've run across two such examples myself. + +2.7.X versions of GNU C are known to generate bad code from +time to time, at high optimisation levels. +If you get problems, try using the flags +@code{-O2} @code{-fomit-frame-pointer} @code{-fno-strength-reduce}. +You should specifically @emph{not} use @code{-funroll-loops}. + +You may notice that the Makefile runs six tests as part of +the build process. If the program passes all of these, it's +a pretty good (but not 100%) indication that the compiler has +done its job correctly. +@item +If @code{bzip2} crashes randomly, and the crashes are not +repeatable, you may have a flaky memory subsystem. @code{bzip2} +really hammers your memory hierarchy, and if it's a bit marginal, +you may get these problems. Ditto if your disk or I/O subsystem +is slowly failing. Yup, this really does happen. + +Try using a different machine of the same type, and see if +you can repeat the problem. +@item This isn't really a bug, but ... If @code{bzip2} tells +you your file is corrupted on decompression, and you +obtained the file via FTP, there is a possibility that you +forgot to tell FTP to do a binary mode transfer. That absolutely +will cause the file to be non-decompressible. You'll have to transfer +it again. +@end itemize + +If you've incorporated @code{libbzip2} into your own program +and are getting problems, please, please, please, check that the +parameters you are passing in calls to the library, are +correct, and in accordance with what the documentation says +is allowable. I have tried to make the library robust against +such problems, but I'm sure I haven't succeeded. + +Finally, if the above comments don't help, you'll have to send +me a bug report. Now, it's just amazing how many people will +send me a bug report saying something like +@display + bzip2 crashed with segmentation fault on my machine +@end display +and absolutely nothing else. Needless to say, a such a report +is @emph{totally, utterly, completely and comprehensively 100% useless; +a waste of your time, my time, and net bandwidth}. +With no details at all, there's no way I can possibly begin +to figure out what the problem is. + +The rules of the game are: facts, facts, facts. Don't omit +them because "oh, they won't be relevant". At the bare +minimum: +@display + Machine type. Operating system version. + Exact version of @code{bzip2} (do @code{bzip2 -V}). + Exact version of the compiler used. + Flags passed to the compiler. +@end display +However, the most important single thing that will help me is +the file that you were trying to compress or decompress at the +time the problem happened. Without that, my ability to do anything +more than speculate about the cause, is limited. + +Please remember that I connect to the Internet with a modem, so +you should contact me before mailing me huge files. + + +@section Did you get the right package? + +@code{bzip2} is a resource hog. It soaks up large amounts of CPU cycles +and memory. Also, it gives very large latencies. In the worst case, you +can feed many megabytes of uncompressed data into the library before +getting any compressed output, so this probably rules out applications +requiring interactive behaviour. + +These aren't faults of my implementation, I hope, but more +an intrinsic property of the Burrows-Wheeler transform (unfortunately). +Maybe this isn't what you want. + +If you want a compressor and/or library which is faster, uses less +memory but gets pretty good compression, and has minimal latency, +consider Jean-loup +Gailly's and Mark Adler's work, @code{zlib-1.1.3} and +@code{gzip-1.2.4}. Look for them at + +@code{http://www.zlib.org} and +@code{http://www.gzip.org} respectively. + +For something faster and lighter still, you might try Markus F X J +Oberhumer's @code{LZO} real-time compression/decompression library, at +@* @code{http://wildsau.idv.uni-linz.ac.at/mfx/lzo.html}. + +If you want to use the @code{bzip2} algorithms to compress small blocks +of data, 64k bytes or smaller, for example on an on-the-fly disk +compressor, you'd be well advised not to use this library. Instead, +I've made a special library tuned for that kind of use. It's part of +@code{e2compr-0.40}, an on-the-fly disk compressor for the Linux +@code{ext2} filesystem. Look at +@code{http://www.netspace.net.au/~reiter/e2compr}. + + + +@section Testing + +A record of the tests I've done. + +First, some data sets: +@itemize @bullet +@item B: a directory containing 6001 files, one for every length in the + range 0 to 6000 bytes. The files contain random lowercase + letters. 18.7 megabytes. +@item H: my home directory tree. Documents, source code, mail files, + compressed data. H contains B, and also a directory of + files designed as boundary cases for the sorting; mostly very + repetitive, nasty files. 565 megabytes. +@item A: directory tree holding various applications built from source: + @code{egcs}, @code{gcc-2.8.1}, KDE, GTK, Octave, etc. + 2200 megabytes. +@end itemize +The tests conducted are as follows. Each test means compressing +(a copy of) each file in the data set, decompressing it and +comparing it against the original. + +First, a bunch of tests with block sizes and internal buffer +sizes set very small, +to detect any problems with the +blocking and buffering mechanisms. +This required modifying the source code so as to try to +break it. +@enumerate +@item Data set H, with + buffer size of 1 byte, and block size of 23 bytes. +@item Data set B, buffer sizes 1 byte, block size 1 byte. +@item As (2) but small-mode decompression. +@item As (2) with block size 2 bytes. +@item As (2) with block size 3 bytes. +@item As (2) with block size 4 bytes. +@item As (2) with block size 5 bytes. +@item As (2) with block size 6 bytes and small-mode decompression. +@item H with buffer size of 1 byte, but normal block + size (up to 900000 bytes). +@end enumerate +Then some tests with unmodified source code. +@enumerate +@item H, all settings normal. +@item As (1), with small-mode decompress. +@item H, compress with flag @code{-1}. +@item H, compress with flag @code{-s}, decompress with flag @code{-s}. +@item Forwards compatibility: H, @code{bzip2-0.1pl2} compressing, + @code{bzip2-0.9.5} decompressing, all settings normal. +@item Backwards compatibility: H, @code{bzip2-0.9.5} compressing, + @code{bzip2-0.1pl2} decompressing, all settings normal. +@item Bigger tests: A, all settings normal. +@item As (7), using the fallback (Sadakane-like) sorting algorithm. +@item As (8), compress with flag @code{-1}, decompress with flag + @code{-s}. +@item H, using the fallback sorting algorithm. +@item Forwards compatibility: A, @code{bzip2-0.1pl2} compressing, + @code{bzip2-0.9.5} decompressing, all settings normal. +@item Backwards compatibility: A, @code{bzip2-0.9.5} compressing, + @code{bzip2-0.1pl2} decompressing, all settings normal. +@item Misc test: about 400 megabytes of @code{.tar} files with + @code{bzip2} compiled with Checker (a memory access error + detector, like Purify). +@item Misc tests to make sure it builds and runs ok on non-Linux/x86 + platforms. +@end enumerate +These tests were conducted on a 225 MHz IDT WinChip machine, running +Linux 2.0.36. They represent nearly a week of continuous computation. +All tests completed successfully. + + +@section Further reading +@code{bzip2} is not research work, in the sense that it doesn't present +any new ideas. Rather, it's an engineering exercise based on existing +ideas. + +Four documents describe essentially all the ideas behind @code{bzip2}: +@example +Michael Burrows and D. J. Wheeler: + "A block-sorting lossless data compression algorithm" + 10th May 1994. + Digital SRC Research Report 124. + ftp://ftp.digital.com/pub/DEC/SRC/research-reports/SRC-124.ps.gz + If you have trouble finding it, try searching at the + New Zealand Digital Library, http://www.nzdl.org. + +Daniel S. Hirschberg and Debra A. LeLewer + "Efficient Decoding of Prefix Codes" + Communications of the ACM, April 1990, Vol 33, Number 4. + You might be able to get an electronic copy of this + from the ACM Digital Library. + +David J. Wheeler + Program bred3.c and accompanying document bred3.ps. + This contains the idea behind the multi-table Huffman + coding scheme. + ftp://ftp.cl.cam.ac.uk/users/djw3/ + +Jon L. Bentley and Robert Sedgewick + "Fast Algorithms for Sorting and Searching Strings" + Available from Sedgewick's web page, + www.cs.princeton.edu/~rs +@end example +The following paper gives valuable additional insights into the +algorithm, but is not immediately the basis of any code +used in bzip2. +@example +Peter Fenwick: + Block Sorting Text Compression + Proceedings of the 19th Australasian Computer Science Conference, + Melbourne, Australia. Jan 31 - Feb 2, 1996. + ftp://ftp.cs.auckland.ac.nz/pub/peter-f/ACSC96paper.ps +@end example +Kunihiko Sadakane's sorting algorithm, mentioned above, +is available from: +@example +http://naomi.is.s.u-tokyo.ac.jp/~sada/papers/Sada98b.ps.gz +@end example +The Manber-Myers suffix array construction +algorithm is described in a paper +available from: +@example +http://www.cs.arizona.edu/people/gene/PAPERS/suffix.ps +@end example +Finally, the following paper documents some recent investigations +I made into the performance of sorting algorithms: +@example +Julian Seward: + On the Performance of BWT Sorting Algorithms + Proceedings of the IEEE Data Compression Conference 2000 + Snowbird, Utah. 28-30 March 2000. +@end example + + +@contents + +@bye + diff -Nru bzip2-1.0.2/doc/bzip2recover.1 bzip2-1.0.2.new/doc/bzip2recover.1 --- bzip2-1.0.2/doc/bzip2recover.1 Thu Jan 1 01:00:00 1970 +++ bzip2-1.0.2.new/doc/bzip2recover.1 Fri Feb 1 04:19:11 2002 @@ -0,0 +1 @@ +.so bzip2.1 \ No newline at end of file diff -Nru bzip2-1.0.2/doc/bzmore.1 bzip2-1.0.2.new/doc/bzmore.1 --- bzip2-1.0.2/doc/bzmore.1 Thu Jan 1 01:00:00 1970 +++ bzip2-1.0.2.new/doc/bzmore.1 Sun Dec 30 03:12:35 2001 @@ -0,0 +1,152 @@ +.\"Shamelessly copied from zmore.1 by Philippe Troin +.\"for Debian GNU/Linux +.TH BZMORE 1 +.SH NAME +bzmore, bzless \- file perusal filter for crt viewing of bzip2 compressed text +.SH SYNOPSIS +.B bzmore +[ name ... ] +.br +.B bzless +[ name ... ] +.SH NOTE +In the following description, +.I bzless +and +.I less +can be used interchangeably with +.I bzmore +and +.I more. +.SH DESCRIPTION +.I Bzmore +is a filter which allows examination of compressed or plain text files +one screenful at a time on a soft-copy terminal. +.I bzmore +works on files compressed with +.I bzip2 +and also on uncompressed files. +If a file does not exist, +.I bzmore +looks for a file of the same name with the addition of a .bz2 suffix. +.PP +.I Bzmore +normally pauses after each screenful, printing --More-- +at the bottom of the screen. +If the user then types a carriage return, one more line is displayed. +If the user hits a space, +another screenful is displayed. Other possibilities are enumerated later. +.PP +.I Bzmore +looks in the file +.I /etc/termcap +to determine terminal characteristics, +and to determine the default window size. +On a terminal capable of displaying 24 lines, +the default window size is 22 lines. +Other sequences which may be typed when +.I bzmore +pauses, and their effects, are as follows (\fIi\fP is an optional integer +argument, defaulting to 1) : +.PP +.IP \fIi\|\fP +display +.I i +more lines, (or another screenful if no argument is given) +.PP +.IP ^D +display 11 more lines (a ``scroll''). +If +.I i +is given, then the scroll size is set to \fIi\|\fP. +.PP +.IP d +same as ^D (control-D) +.PP +.IP \fIi\|\fPz +same as typing a space except that \fIi\|\fP, if present, becomes the new +window size. Note that the window size reverts back to the default at the +end of the current file. +.PP +.IP \fIi\|\fPs +skip \fIi\|\fP lines and print a screenful of lines +.PP +.IP \fIi\|\fPf +skip \fIi\fP screenfuls and print a screenful of lines +.PP +.IP "q or Q" +quit reading the current file; go on to the next (if any) +.PP +.IP "e or q" +When the prompt --More--(Next file: +.IR file ) +is printed, this command causes bzmore to exit. +.PP +.IP s +When the prompt --More--(Next file: +.IR file ) +is printed, this command causes bzmore to skip the next file and continue. +.PP +.IP = +Display the current line number. +.PP +.IP \fIi\|\fP/expr +search for the \fIi\|\fP-th occurrence of the regular expression \fIexpr.\fP +If the pattern is not found, +.I bzmore +goes on to the next file (if any). +Otherwise, a screenful is displayed, starting two lines before the place +where the expression was found. +The user's erase and kill characters may be used to edit the regular +expression. +Erasing back past the first column cancels the search command. +.PP +.IP \fIi\|\fPn +search for the \fIi\|\fP-th occurrence of the last regular expression entered. +.PP +.IP !command +invoke a shell with \fIcommand\|\fP. +The character `!' in "command" are replaced with the +previous shell command. The sequence "\\!" is replaced by "!". +.PP +.IP ":q or :Q" +quit reading the current file; go on to the next (if any) +(same as q or Q). +.PP +.IP . +(dot) repeat the previous command. +.PP +The commands take effect immediately, i.e., it is not necessary to +type a carriage return. +Up to the time when the command character itself is given, +the user may hit the line kill character to cancel the numerical +argument being formed. +In addition, the user may hit the erase character to redisplay the +--More-- message. +.PP +At any time when output is being sent to the terminal, the user can +hit the quit key (normally control\-\\). +.I Bzmore +will stop sending output, and will display the usual --More-- +prompt. +The user may then enter one of the above commands in the normal manner. +Unfortunately, some output is lost when this is done, due to the +fact that any characters waiting in the terminal's output queue +are flushed when the quit signal occurs. +.PP +The terminal is set to +.I noecho +mode by this program so that the output can be continuous. +What you type will thus not show on your terminal, except for the / and ! +commands. +.PP +If the standard output is not a teletype, then +.I bzmore +acts just like +.I bzcat, +except that a header is printed before each file. +.SH FILES +.DT +/etc/termcap Terminal data base +.SH "SEE ALSO" +more(1), less(1), bzip2(1), bzdiff(1), bzgrep(1) diff -Nru bzip2-1.0.2/doc/pl/Makefile.am bzip2-1.0.2.new/doc/pl/Makefile.am --- bzip2-1.0.2/doc/pl/Makefile.am Thu Jan 1 01:00:00 1970 +++ bzip2-1.0.2.new/doc/pl/Makefile.am Fri Feb 1 04:19:11 2002 @@ -0,0 +1,4 @@ + +mandir = @mandir@/pl +man_MANS = bzip2.1 bunzip2.1 bzcat.1 bzip2recover.1 + diff -Nru bzip2-1.0.2/doc/pl/bunzip2.1 bzip2-1.0.2.new/doc/pl/bunzip2.1 --- bzip2-1.0.2/doc/pl/bunzip2.1 Thu Jan 1 01:00:00 1970 +++ bzip2-1.0.2.new/doc/pl/bunzip2.1 Fri Feb 1 04:19:11 2002 @@ -0,0 +1 @@ +.so bzip2.1 \ No newline at end of file diff -Nru bzip2-1.0.2/doc/pl/bzcat.1 bzip2-1.0.2.new/doc/pl/bzcat.1 --- bzip2-1.0.2/doc/pl/bzcat.1 Thu Jan 1 01:00:00 1970 +++ bzip2-1.0.2.new/doc/pl/bzcat.1 Fri Feb 1 04:19:11 2002 @@ -0,0 +1 @@ +.so bzip2.1 \ No newline at end of file diff -Nru bzip2-1.0.2/doc/pl/bzip2.1 bzip2-1.0.2.new/doc/pl/bzip2.1 --- bzip2-1.0.2/doc/pl/bzip2.1 Thu Jan 1 01:00:00 1970 +++ bzip2-1.0.2.new/doc/pl/bzip2.1 Fri Feb 1 04:19:11 2002 @@ -0,0 +1,384 @@ +.\" Tłumaczenie Maciej Wojciechowski wojciech@staszic.waw.pl +.PU +.TH bzip2 1 "" "" "wersja 1.0" +.SH NAZWA +bzip2, bunzip2 \- sortujący bloki kompresor/dekompresor plików, v1.0 +.br +bzcat \- dekompresuje pliki na standardowe wyjście +.br +bzip2recover \- odzyskuje dane ze zniszczonych archiwów bzip2 +.SH SKŁADNIA +.ll +8 +.B bzip2 +.RB [ \-cdfkqstvzVL123456789 ] +.RI [ nazwy_plików \&...] +.ll -8 +.br +.B bunzip2 +.RB [ \-fkvsVL ] +.RI [ nazwy_plików \&...] +.br +.B bzcat +.RB [ \-s ] +.RI [ nazwy_plików \&...] +.br +.B bzip2recover +.I nazwa_pliku +.SH OPIS +.I bzip2 +kompresuje pliki używając algorytmu sortowania bloków Burrowsa-Wheelera i +kodu Huffmana. Kompresja jest generalnie sporo lepsza od konwencjonalnych +kompresorów opartych o metodę LZ77/LZ78, i jest porównywalna z +osiągnięciami statystycznych kompresorów z rodziny PPM. + +Opcje linii poleceń są w większości bardzo podobne do tych z +.IR "GNU gzip" , +ale nie są identyczne. + +.I bzip2 +oczekuje listy plików towarzyszących parametrom linii poleceń. Każdy plik jest +zastępowany przez swoją skompresowaną wersję, z nazwą +"oryginalny_plik.bz2". Każdy skompresowany plik ma ten sam czas modyfikacji, +uprawnienia i, jeśli to możliwe, właściciela co oryginał, po to, aby te +ustawienia mogły zostać odtworzone podczas dekompresji. Utrzymywanie nazwy +plików nie jest do końca dokładne w tym sensie, że nie ma możliwości +przetrzymywania daty, uprawnień, właściciela i nazw plików na systemach, na +których brakuje tych możliwości lub mają ograniczenia co do długości nazwy, +tak np. jak MS-DOS. + +.I bzip2 +i +.I bunzip2 +standardowo nie nadpisują istniejących już plików. Jeśli chcesz aby to +robiły, musisz użyć parametru \-f. + +Jeśli nie podano żadnej nazwy pliku, +.I bzip2 +kompresuje ze standardowego wejścia na standardowe wyjście. Odmiawia wówczas +wypisywania skompresowanego wyjście na terminal, gdyż byłoby to +całkiem niezrozumiałe i przez to bez większego sensu. + +.I bunzip2 +(lub +.IR bzip2 \-d ) +dekompresuje wszystkie podane pliki. Pliki, które nie były +utworzone przez +.I bzip2 +zostaną wykryte i zignorowane, a na ekranie pojawi się komunikat +ostrzegawczy. +.I bzip2 +próbuje zgadnąć nazwę dla dekompresowanego pliku w następujący sposób: +.nf + nazwa_pliku.bz2 staje się nazwa_pliku + nazwa_pliku.bz staje się nazwa_pliku + nazwa_pliku.tbz2 staje się nazwa_pliku.tar + nazwa_pliku.tbz staje się nazwa_pliku.tar + inna_nazwa staje się inna_nazwa.out +.fi +Jeśli plik nie ma jednego z następujących rozpoznawalnych rozszerzeń, +.IR .bz2 , +.IR .bz , +.I .tbz2 +lub +.IR .tbz , +to +.I bzip2 +napisze, że nie może zgadnąć nazwy pierwotnego pliku, i użyje +oryginalnej nazwy z dodanym rozszerzeniem +.IR .out . + +Tak jak kompresja, nie posiadająca żadnych plików, powoduje kompresję ze +standardowego wejścia na standardowe wyjście. + +.I bunzip2 +poprawnie zdekompresuje plik, który jest połaczeniem dwóch lub więcej +skompresowanych plików. Rezultatem jest połączony odpowiedni +nieskompresowany plik. Obsługiwane jest również sprawdzanie spójności +(\-t) połączonych skompresowanych plików. + +Możesz również kompresować lub dekompresować pliki na standardowe wyjście +używając parametru \-c. W ten właśnie sposób można przeprowadzać kompresję +wielu plików równocześnie. +Powstałe wyniki są przesyłane sekwencyjnie na standardowe wyjście. +W ten sposób kompresja wielu plików generuje strumień +zawierający reprezentacje kilku skompresowanych plików. Taki strumień może +być zdekompresowany poprawnie tylko przez +.I bzip2 +w wersji 0.9.0 lub późniejszej. Wcześniejsze wersje +.I bzip2 +zatrzymają się po zdekmpresowaniu pierwszego pliku w strumieniu. + +.I bzcat +(lub +.I bzip2 -dc) +dekompresuje wszystkie wybrane pliki na standardowe wyjście. + +.I bzip2 +czyta argumenty ze zmiennych środowiskowych +.I BZIP2 +i +.I BZIP, +w podanej kolejności, i przetwarza je przed jakimikolwiek argumentami +przeczytanymi z linii poleceń. To dobra metoda na specyfikowanie +standardowych ustawień. + +Kompresja stosowana jest zawsze, nawet jeśli skompresowany plik jest +nieznaczniej większy od pliku oryginalnego. Pliki mniejsze niż mniej więcej +sto bajtów stają się większe, ponieważ mechanizm kompresji ma stały +nagłówek wynoszący około 50 bajtów. Przypadkowe dane (włączając wyjście +większości kompresorów plików) dą kodowane na mniej więcej 8.05 bitu na +bajt, dając zysk około 0.5%. + +Jako samosprawdzenie dla twojej ochrony +.I bzip2 +używa 32-bitowego CRC aby upewnić się, że zdekompresowana wersja pliku jest +identyczna z oryginalną. To strzeże przed stratami w skompresowanych danych +i przed niewykrytymi błędami w +.I bzip2 +(na szczęście bardzo rzadkich). Możliwość niewykrycia utraty danych +jest mikroskopijna, mniej więcej jedna szansa na cztery biliony dla każdego +pliku. Uważaj jednak, gdyż sprawdzenie jest dokonywane przed dekompresją, +więc dowiesz się tylko tego, że coś jest nie w porządku. Nie pomoże ci to odzyskać +oryginalnych nieskompresowanych danych. Możesz użyć +.I bzip2recover +aby spróbować odzyskać dane z uszkodzonych plików. + +Zwracane wartości: 0 dla normalnego wyjścia, 1 dla problemów technicznych +(plik nie znaleziony, niewłaściwy parametr, błąd wyjścia/wyjścia itp.), 2 dla +zasygnalizowania błędu skompresowanego pliku, 3 dla wewnętrznego błędu (np. +bug), który zmusił \fIbzip2\fP do przerwania. + +.SH OPCJE +.TP +.B \-c --stdout +Kompresuje lub dekompresuje na standardowe wyjście. +.TP +.B \-d --decompress +Wymusza dekompresję. +.IR bzip2 , +.I bunzip2 +i +.I bzcat +są tak naprawdę tymi samymi programami i decyzja jakie akcje będą wykonane +jest wykonywana na podstawie nazwy jaka została użyta. Ten parametr ma wyższy +priorytet i wymusza na \fIbzip2\fP dekompresję. +.TP +.B \-z --compress +Podobne do \-d: wymusza kompresję, bez względu na sposób wywołania. +.TP +.B \-t --test +Sprawdza integralność wybranego pliku(ów), ale nie dekompresuje ich. Wymusza +to próbną dekompresję i mówi, jaki jest rezultat. +.TP +.B \-f --force +Wymusza zastępowanie plików wyjściowych. Normalnie, \fIbzip2\fP nie +zastępuje istniejących plików wyjściowych. Wymusza również na \fIbzip2\fP +łamanie dowiązań twardych, czego normalnie nie robi. +.TP +.B \-k --keep +Zatrzymaj (nie kasuj) pliki wejściowe przy kompresji lub dekompresji. +.TP +.B \-s --small +Zredukuj użycie pamięci na kompresję, dekompresję i testowanie. Pliki są +dekompresowane i testowane przy użyciu zmodyfikowanego algorytmu, który +potrzebuje tylko 2.5 bajtu na blok bajtów. Oznacza to, że każdy plik może +być zdekompresowany przy użyciu około 2300k pamięci, jednak tracąc około połowę +normalnej szybkości. + +Podczas kompresji, \-s wybiera bloki wielkości 200k, których limity +pamięci wynoszą mniej więcej tyle samo, w zamian za jakość kompresji. W +skrócie, jeśli twój komputer ma mało pamięci (8 megabajtów lub mniej), +używaj opcji \-s do wszystkiego. Zobacz \fBzarządzanie pamięcią\fP poniżej. +.TP +.B \-q --quiet +Wyłącza wszystkie nieistotne komunikaty ostrzegawcze. +Nie są eliminowane komunikaty dotyczące błędów wejścia/wyjścia i innych +zdarzeń krytycznych. +.TP +.B \-v --verbose +Tryb gadatliwy -- pokazuje stopień kompresji dla każdego pliku. Następne +\fB\-v\fP zwiększają stopień gadatliwości, powodując wyświetlanie dużej +ilości informacji, przydatnych głównie przy diagnostyce. +.TP +.B \-L --license -V --version +Wyświetla wersję programu i warunki licencji. +.TP +.B \-1 to \-9 +Ustawia wielkość bloku na 100 k, 200 k .. 900 k przy kompresji. Nie ma +żadnego znaczenia przy dekompresji. Zobacz \fBzarządzanie pamięcią\fP +poniżej. +.TP +.B \-- +Traktuje wszystkie następujące po nim argumenty jako nazwy plików, nawet jeśli +zaczynają się one od myślnika. Możesz więc kompresować i dekompresować +pliki, których nazwa zaczyna się od myślnika, na przykład: bzip2 \-- +\-mój_plik. +.TP +.B \--repetitive-fast --repetitive-best +Te parametry nie mają znaczenia w wersjach 0.9.5 i wyższych. Umożliwiały one +pewną infantylną kontrolę nad zachowaniem algorytmu sortującego we +wcześniejszych wersjach, co było czasami użyteczne. Wersje 0.9.5 i wyższe +mają usprawniony algorytm, który powoduje bezużyteczność tej funkcji. + +.SH ZARZĄDZANIE PAMIĘCIĄ +.I bzip2 +kompresuje duże pliki w blokach. Rozmiar bloku ma wpływ zarówno na stopień +osiąganej kompresji, jak również na ilość pamięci potrzebnej do kompresji +i dekompresji. Parametry od \-1 do \-9 wybierają rozmiar bloku odpowiednio +od 100,000 bajtów aż do 900,000 bajtów (standardowo). W czasie dekompresji, +rozmiar bloku użytego do kompresji jest odczytywany z nagłówku pliku +skompresowanego i +.I bunzip2 +sam zajmuje odpowiednią do dekompresji ilość pamięci. Ponieważ rozmiar +bloków jest przetrzymywany w pliku skompresowanym, parametry od \-1 do \-9 +nie mają przy dekompresji żadnego znaczenia. + +Wymagania kompresji i dekompresji w bajtach, mogą być wyliczone przez: + + Kompresja : 400k + ( 8 x rozmiar bloku ) + + Dekompresja : 100k + ( 4 x rozmiar bloku ) lub + 100k + ( 2.5 x rozmiar bloku ) + +Większe bloki dają duże zmniejszenie zwrotów marginalnych. Większość +kompresji pochodzi z pierwszych stu lub dwustu kilobajtów rozmiaru bloku. +Warto o tym pamiętać używając \fIbzip2\fP na wolnych +komputerach. Warto również podkreślić, że rozmiar pamięci potrzebnej do +dekompresji jest wybierany poprzez ustawienie odpowiedniej +wielkości bloku przy kompresji. + +Dla plików skompresowanych standardowym blokiem wielkości 900k, +\fIbunzip2\fP będzie wymagał około 3700 kilobajtów do dekompresji. Aby +umożliwić dekompresję na komputerze wyposażonym jedynie w 4 megabajty +pamięci, \fIbunzip2\fP ma opcję, która może zmniejszyć wymagania prawie do +połowy, tzn. około 2300 kilobajtów. Prędkość dekompresji jest również bardzo +zmiejszona, więc używaj tej opcji tylko wtedy, kiedy jest to konieczne. Tym +parametrem jest -s. + +Generalnie, próbuj i używaj największych rozmiarów bloków, jeśli ilość +pamięci ci na to pozwala. Prędkość kompresji i dekompresji w zasadzie nie +zależy od wielkości użytego bloku. + +Inna ważna rzecz dotyczy plików, które mieszczą się w pojedyńczym bloku -- +oznacza to większość plików na które się natkniesz używając dużych bloków. +Rozmiar realny pamięci zabieranej jest proporcjonalny do wielkości pliku, +jeśli plik jest mniejszy niż blok. Na przykład, kompresja pliku o +wielkości 20,000 bajtów z parametrem -9 wymusi na kompresorze odnalezienie +7600 k pamięci, ale zajęcie tylko 400k + 20000 * 8 = 560 kilobajtów z +tego. Podobnie, dekompresor odnajdzie 3700k, ale zajmie tylko 100k + 20000 +* 4 = 180 kilobajtów. + +Tu jest tabela, która podsumowuje maksymalne użycie pamięci dla różnych +rozmiarów bloków. Podano też całkowity rozmiar skompresowanych 14 +plików tekstowych (Calgary Text Compressione Corpus) zajmujących razem +3,141,622 bajtów. Ta kolumna daje pewne pojęcie o tym, jaki wpływ na +kompresję ma wielkość bloków. Ta tabela uzmysławia również przewagę użycia +większych bloków dla większych plików, ponieważ "Corpus" jest zdominowany +przez mniejsze pliki. +.nf + Użycie Użycie Użycie Corpus + Parametr kompresji dekompresji dekompresji -s Size + + -1 1200k 500k 350k 914704 + -2 2000k 900k 600k 877703 + -3 2800k 1300k 850k 860338 + -4 3600k 1700k 1100k 846899 + -5 4400k 2100k 1350k 845160 + -6 5200k 2500k 1600k 838626 + -7 6100k 2900k 1850k 834096 + -8 6800k 3300k 2100k 828642 + -9 7600k 3700k 2350k 828642 +.fi +.SH ODZYSKIWANIE DANYCH ZE ZNISZCZONYCH PLIKÓW BZIP2 +.I bzip2 +kompresuje pliki w blokach, zazwyczaj 900 kilbajtowych. Każdy blok jest +trzymany osobno. Jeśli błędy transmisji lub nośnika uszkodzą plik +wieloblokowy .bz2, możliwe jest odtworzenie danych zawartych w +niezniszczonych blokach pliku. + +Każdy blok jest reprezentowany przez 48-bitowy wzorzec, który umożliwia +znajdowanie przyporządkowań bloków z rozsądną pewnością. Każdy blok +ma również swój 32-bitowy CRC, więc bloki uszkodzone mogą być łatwo +odseparowane od poprawnych. + +.I bzip2recover +jest oddzielnym programem, którego zadaniem jest poszukiwanie bloków w +plikach .bz2 i zapisywanie ich do własnego pliku .bz2. Możesz potem użyć +\fIbzip2\fP \-t aby sprawdzić spójność wyjściowego pliku i zdekompresować +te, które nie są uszkodzone. + +.I bzip2recover +pobiera pojedynczy argument, nazwę uszkodzonego pliku, i tworzy pewną liczbę +plików "rec0001plik.bz2", "rec0002plik.bz2", itd., przetrzymujące odzyskane +bloki. Wyjściowe nazwy plików są tak tworzone, aby łatwo było potem używać +ich razem za pomocą gwiazdek -- na przykład, "bzip2 -dc rec*plik.bz2 > +odzyskany_plik" -- wylistuje pliki we właściwej kolejności. + +.I bzip2recover +powinien być używany najczęściej z dużymi plikami .bz2, jako iż one +zawierają najczęściej dużo bloków. Jest czystym bezsensem używać go na +uszkodzonym jedno-blokowym pliku, ponieważ uszkodzony blok nie może być +odzyskany. Jeśli chcesz zminimalizować jakiekolwiek możliwe straty danych +poprzez nośnik lub transmisję, powinieneś zastanowić się nad użyciem +mniejszych bloków. + +.SH OPISY WYNIKÓW +Etap sortujący kompresji łączy razem podobne ciągi znaków w pliku. Przez +to, pliki zawierające bardzo długie ciągi powtarzających się symboli, jak +"aabaabaabaab ..." (powtórzone kilkaset razy) mogą być kompresowane wolniej +niż normalnie. Wersje 0.9.5 i wyższe zachowują się dużo lepiej w tej +sytuacji niż wersje poprzednie. Różnica stopnia kompresji pomiędzy +najgorszym i najlepszym przypadkiem kompresji wynosi około 10:1. Dla +wcześniejszych wersji było to nawet około 100:1. Jeśli chcesz, możesz użyć +parametru \-vvvv aby monitorować postępy bardzo szczegółowo. + +Prędkość dekompresji nie jest zmieniana przez to zjawisko. + +.I bzip2 +zazwyczaj rezerwuje kilka megabajtów pamięci do działania a +potem wykorzystuje ją w sposób zupełnie przypadkowy. +Oznacza to, że zarówno prędkość kompresji jak i dekompresji jest w +dużej części zależna od prędkości, z jaką twój komputer może naprawiać braki +bufora podręcznego. Z tego powodu, wprowadzone zostały małe zmiany kody aby +zmniejszyć straty, które dały nieproporcjonalnie duży wzrost osiągnięć. +Myślę, że +.I bzip2 +będzie działał najlepiej na komputerach z dużymi buforami podręcznymi. + +.SH ZAKAMARKI +Wiadomości o błędach wejścia/wyjścia nie są aż tak pomocne, jak mogłyby być. +.I bzip2 +stara się wykryć błąd wejścia/wyjścia i wyjść "czysto", ale +szczegóły tego, jaki to problem mogą być czasami bardzo mylące. + +Ta strona podręcznika odnosi się do wersji 1.0 programu \fIbzip2\fP. +Skompresowane pliki utworzone przez tę wersję są kompatybilne zarówno z +w przód jak i wstecznie z poprzednimi publicznymi wydaniami, +wersjami 0.1pl2, 0.9.0 i 0.9.5 ale z małymi wyjątkami: 0.9.0 i wyższe potrafią +poprawnie dekompresować wiele skompresowanych plików złączonych w jeden. +0.1pl2 nie potrafi tego; zatrzyma się już po dekompresji pierwszego pliku w +strumieniu. + +.I bzip2recover +używa 32-bitowych liczb do reprezentacji pozycji bitu w skompresowanym +pliku, więc nie może przetwarzać skompresowanych plików dłuższych niż 512 +megabajtów. Można to łatwo naprawić. + +.SH AUTOR +Julian Seward, jseward@acm.org. + +http://www.muraroa.demon.co.uk +http://sourceware.cygnus.com/bzip2 + +Idee zawarte w \fIbzip2\fP są podzielone (przynajmniej) pomiędzy +nastepujący ludzi: Michael Burrows i David Wheeler (transformacja +sortującą bloki), David Wheeler (znów, koder Huffmana), Peter Fenwick +(struktura kodowania modelu w oryginalnym \fIbzip2\fP, i wiele +udoskonaleń), i Alistair Moffar, Radford Neal i Ian Witten (arytmetyczny +koder w oryginalnym \fIbzip2\fP). Jestem im bardzo wdzięczny za ich pomoc, +wsparcie i porady. Zobacz stronę manuala w źródłowej dystrybucji po +wskaźniki do źródeł dokumentacji. Christian von Roques zachęcił mnie do +wymyślenia szybszego algorytmu sortującego, po to żeby przyspieszyć +kompresję. Bela Lubkin zachęciła mnie do polepszenia najgorszych wyników +kompresji. Wiele ludzi przysłało łatki, pomogło w różnych problemach, +pożyczyło komputerów, dało rady i było ogólnie pomocnych. diff -Nru bzip2-1.0.2/doc/pl/bzip2recover.1 bzip2-1.0.2.new/doc/pl/bzip2recover.1 --- bzip2-1.0.2/doc/pl/bzip2recover.1 Thu Jan 1 01:00:00 1970 +++ bzip2-1.0.2.new/doc/pl/bzip2recover.1 Fri Feb 1 04:19:11 2002 @@ -0,0 +1 @@ +.so bzip2.1 \ No newline at end of file diff -Nru bzip2-1.0.2/huffman.c bzip2-1.0.2.new/huffman.c --- bzip2-1.0.2/huffman.c Sun Dec 30 03:19:17 2001 +++ bzip2-1.0.2.new/huffman.c Fri Feb 1 04:19:11 2002 @@ -58,6 +58,10 @@ For more information on these sources, see the manual. --*/ +#ifdef HAVE_CONFIG_H +#include +#endif + #include "bzlib_private.h" diff -Nru bzip2-1.0.2/makefile.msc bzip2-1.0.2.new/makefile.msc --- bzip2-1.0.2/makefile.msc Wed Jan 2 05:02:33 2002 +++ bzip2-1.0.2.new/makefile.msc Thu Jan 1 01:00:00 1970 @@ -1,63 +0,0 @@ -# Makefile for Microsoft Visual C++ 6.0 -# usage: nmake -f makefile.msc -# K.M. Syring (syring@gsf.de) -# Fixed up by JRS for bzip2-0.9.5d release. - -CC=cl -CFLAGS= -DWIN32 -MD -Ox -D_FILE_OFFSET_BITS=64 -nologo - -OBJS= blocksort.obj \ - huffman.obj \ - crctable.obj \ - randtable.obj \ - compress.obj \ - decompress.obj \ - bzlib.obj - -all: lib bzip2 test - -bzip2: lib - $(CC) $(CFLAGS) -o bzip2 bzip2.c libbz2.lib setargv.obj - $(CC) $(CFLAGS) -o bzip2recover bzip2recover.c - -lib: $(OBJS) - lib /out:libbz2.lib $(OBJS) - -test: bzip2 - type words1 - .\\bzip2 -1 < sample1.ref > sample1.rb2 - .\\bzip2 -2 < sample2.ref > sample2.rb2 - .\\bzip2 -3 < sample3.ref > sample3.rb2 - .\\bzip2 -d < sample1.bz2 > sample1.tst - .\\bzip2 -d < sample2.bz2 > sample2.tst - .\\bzip2 -ds < sample3.bz2 > sample3.tst - @echo All six of the fc's should find no differences. - @echo If fc finds an error on sample3.bz2, this could be - @echo because WinZip's 'TAR file smart CR/LF conversion' - @echo is too clever for its own good. Disable this option. - @echo The correct size for sample3.ref is 120,244. If it - @echo is 150,251, WinZip has messed it up. - fc sample1.bz2 sample1.rb2 - fc sample2.bz2 sample2.rb2 - fc sample3.bz2 sample3.rb2 - fc sample1.tst sample1.ref - fc sample2.tst sample2.ref - fc sample3.tst sample3.ref - - - -clean: - del *.obj - del libbz2.lib - del bzip2.exe - del bzip2recover.exe - del sample1.rb2 - del sample2.rb2 - del sample3.rb2 - del sample1.tst - del sample2.tst - del sample3.tst - -.c.obj: - $(CC) $(CFLAGS) -c $*.c -o $*.obj - diff -Nru bzip2-1.0.2/randtable.c bzip2-1.0.2.new/randtable.c --- bzip2-1.0.2/randtable.c Sun Dec 30 03:19:04 2001 +++ bzip2-1.0.2.new/randtable.c Fri Feb 1 04:19:18 2002 @@ -58,6 +58,10 @@ For more information on these sources, see the manual. --*/ +#ifdef HAVE_CONFIG_H +#include +#endif + #include "bzlib_private.h" diff -Nru bzip2-1.0.2/spewG.c bzip2-1.0.2.new/spewG.c --- bzip2-1.0.2/spewG.c Thu Apr 6 01:24:45 2000 +++ bzip2-1.0.2.new/spewG.c Fri Feb 1 04:19:18 2002 @@ -9,7 +9,10 @@ (but is otherwise harmless). */ -#define _FILE_OFFSET_BITS 64 +#ifdef HAVE_CONFIG_H +#include +#endif + #include #include diff -Nru bzip2-1.0.2/stamp-h.in bzip2-1.0.2.new/stamp-h.in --- bzip2-1.0.2/stamp-h.in Thu Jan 1 01:00:00 1970 +++ bzip2-1.0.2.new/stamp-h.in Fri Feb 1 04:19:18 2002 @@ -0,0 +1 @@ +timestamp diff -Nru bzip2-1.0.2/unzcrash.c bzip2-1.0.2.new/unzcrash.c --- bzip2-1.0.2/unzcrash.c Thu Apr 6 01:24:46 2000 +++ bzip2-1.0.2.new/unzcrash.c Fri Feb 1 04:19:18 2002 @@ -13,6 +13,12 @@ many hours. */ +#ifdef HAVE_CONFIG_H +#include +#endif + + + #include #include #include "bzlib.h"