From 12e8ce2c86c2f80e45a7ab5ac4b9a1d01f71d34f Mon Sep 17 00:00:00 2001 From: Jakub Bogusz Date: Sat, 22 Mar 2008 17:09:38 +0000 Subject: [PATCH] - rewritten in less invasive way - pl man pages moved to common man-pages tarball - outdated bzip2.texi dropped Changed files: bzip2-libtoolizeautoconf.patch -> 1.9 --- bzip2-libtoolizeautoconf.patch | 3876 +------------------------------- 1 file changed, 42 insertions(+), 3834 deletions(-) diff --git a/bzip2-libtoolizeautoconf.patch b/bzip2-libtoolizeautoconf.patch index 549d841..528893e 100644 --- a/bzip2-libtoolizeautoconf.patch +++ b/bzip2-libtoolizeautoconf.patch @@ -1,16 +1,25 @@ -diff -Nru bzip2-1.0.2/Makefile.am bzip2-1.0.2.new/Makefile.am ---- bzip2-1.0.2/Makefile.am Thu Jan 1 01:00:00 1970 -+++ bzip2-1.0.2.new/Makefile.am Fri Feb 1 04:19:09 2002 -@@ -0,0 +1,31 @@ -+SUBDIRS = doc -+ -+bin_PROGRAMS = bzip2 bzip2recover -+bzip2_SOURCES = bzip2.c -+ -+bzip2_LDADD = libbz2.la -+bzip2recover_SOURCES = bzip2recover.c -+lib_LTLIBRARIES = libbz2.la -+libbz2_la_SOURCES = \ +--- bzip2-1.0.5.orig/configure.ac 1970-01-01 01:00:00.000000000 +0100 ++++ bzip2-1.0.5/configure.ac 2008-03-22 16:15:55.057937371 +0100 +@@ -0,0 +1,9 @@ ++AC_INIT([bzip2], [1.0.5]) ++AM_INIT_AUTOMAKE ++AC_CONFIG_HEADER([config.h]) ++AC_PROG_CC ++AC_PROG_LIBTOOL ++AC_PROG_LN_S ++AC_SYS_LARGEFILE ++AC_CONFIG_FILES([Makefile]) ++AC_OUTPUT +--- bzip2-1.0.5.orig/Makefile.am 1970-01-01 01:00:00.000000000 +0100 ++++ bzip2-1.0.5/Makefile.am 2008-03-22 16:14:38.913593962 +0100 +@@ -0,0 +1,37 @@ ++include_HEADERS = bzlib.h bzlib_private.h ++lib_LTLIBRARIES = libbz2.la ++bin_PROGRAMS = bzip2 bzip2recover ++bin_SCRIPTS = bzdiff bzgrep bzmore ++man_MANS = bzip2.1 bzgrep.1 bzmore.1 bzdiff.1 ++ ++libbz2_la_SOURCES = \ + blocksort.c \ + huffman.c \ + crctable.c \ @@ -20,356 +29,51 @@ diff -Nru bzip2-1.0.2/Makefile.am bzip2-1.0.2.new/Makefile.am + bzlib.c \ + bzlib.h \ + bzlib_private.h -+ +libbz2_la_LDFLAGS = -version-info 1:0:0 -+include_HEADERS = bzlib.h bzlib_private.h -+ -+bin_SCRIPTS = bzless bzgrep + -+EXTRA_DIST = README README.COMPILATION.PROBLEMS \ -+ Y2K_INFO libbz2.def libbz2.dsp \ -+ sample1.bz2 sample1.ref sample2.bz2 sample2.ref sample3.bz2 sample3.ref ++bzip2_SOURCES = bzip2.c ++bzip2_LDADD = libbz2.la + +install-exec-hook: + $(LN_S) -f bzip2 $(DESTDIR)$(bindir)/bunzip2 + $(LN_S) -f bzip2 $(DESTDIR)$(bindir)/bzcat -diff -Nru bzip2-1.0.2/bzdiff.1 bzip2-1.0.2.new/bzdiff.1 ---- bzip2-1.0.2/bzdiff.1 Sun Dec 30 03:12:35 2001 -+++ bzip2-1.0.2.new/bzdiff.1 Thu Jan 1 01:00:00 1970 -@@ -1,47 +0,0 @@ --\"Shamelessly copied from zmore.1 by Philippe Troin --\"for Debian GNU/Linux --.TH BZDIFF 1 --.SH NAME --bzcmp, bzdiff \- compare bzip2 compressed files --.SH SYNOPSIS --.B bzcmp --[ cmp_options ] file1 --[ file2 ] --.br --.B bzdiff --[ diff_options ] file1 --[ file2 ] --.SH DESCRIPTION --.I Bzcmp --and --.I bzdiff --are used to invoke the --.I cmp --or the --.I diff --program on bzip2 compressed files. All options specified are passed --directly to --.I cmp --or --.IR diff "." --If only 1 file is specified, then the files compared are --.I file1 --and an uncompressed --.IR file1 ".bz2." --If two files are specified, then they are uncompressed if necessary and fed to --.I cmp --or --.IR diff "." --The exit status from --.I cmp --or --.I diff --is preserved. --.SH "SEE ALSO" --cmp(1), diff(1), bzmore(1), bzless(1), bzgrep(1), bzip2(1) --.SH BUGS --Messages from the --.I cmp --or --.I diff --programs refer to temporary filenames instead of those specified. -diff -Nru bzip2-1.0.2/bzgrep.1 bzip2-1.0.2.new/bzgrep.1 ---- bzip2-1.0.2/bzgrep.1 Sun Dec 30 03:12:35 2001 -+++ bzip2-1.0.2.new/bzgrep.1 Thu Jan 1 01:00:00 1970 -@@ -1,56 +0,0 @@ --\"Shamelessly copied from zmore.1 by Philippe Troin --\"for Debian GNU/Linux --.TH BZGREP 1 --.SH NAME --bzgrep, bzfgrep, bzegrep \- search possibly bzip2 compressed files for a regular expression --.SH SYNOPSIS --.B bzgrep --[ grep_options ] --.BI [\ -e\ ] " pattern" --.IR filename ".\|.\|." --.br --.B bzegrep --[ egrep_options ] --.BI [\ -e\ ] " pattern" --.IR filename ".\|.\|." --.br --.B bzfgrep --[ fgrep_options ] --.BI [\ -e\ ] " pattern" --.IR filename ".\|.\|." --.SH DESCRIPTION --.IR Bzgrep --is used to invoke the --.I grep --on bzip2-compressed files. All options specified are passed directly to --.I grep. --If no file is specified, then the standard input is decompressed --if necessary and fed to grep. --Otherwise the given files are uncompressed if necessary and fed to --.I grep. --.PP --If --.I bzgrep --is invoked as --.I bzegrep --or --.I bzfgrep --then --.I egrep --or --.I fgrep --is used instead of --.I grep. --If the GREP environment variable is set, --.I bzgrep --uses it as the --.I grep --program to be invoked. For example: -- -- for sh: GREP=fgrep bzgrep string files -- for csh: (setenv GREP fgrep; bzgrep string files) --.SH AUTHOR --Charles Levert (charles@comm.polymtl.ca). Adapted to bzip2 by Philippe --Troin for Debian GNU/Linux. --.SH "SEE ALSO" --grep(1), egrep(1), fgrep(1), bzdiff(1), bzmore(1), bzless(1), bzip2(1) -diff -Nru bzip2-1.0.2/bzless bzip2-1.0.2.new/bzless ---- bzip2-1.0.2/bzless Thu Jan 1 01:00:00 1970 -+++ bzip2-1.0.2.new/bzless Fri Feb 1 04:19:11 2002 -@@ -0,0 +1,2 @@ -+#!/bin/sh -+%{_bindir}/bunzip2 -c "$@" | %{_bindir}/less -diff -Nru bzip2-1.0.2/bzmore.1 bzip2-1.0.2.new/bzmore.1 ---- bzip2-1.0.2/bzmore.1 Sun Dec 30 03:12:35 2001 -+++ bzip2-1.0.2.new/bzmore.1 Thu Jan 1 01:00:00 1970 -@@ -1,152 +0,0 @@ --.\"Shamelessly copied from zmore.1 by Philippe Troin --.\"for Debian GNU/Linux --.TH BZMORE 1 --.SH NAME --bzmore, bzless \- file perusal filter for crt viewing of bzip2 compressed text --.SH SYNOPSIS --.B bzmore --[ name ... ] --.br --.B bzless --[ name ... ] --.SH NOTE --In the following description, --.I bzless --and --.I less --can be used interchangeably with --.I bzmore --and --.I more. --.SH DESCRIPTION --.I Bzmore --is a filter which allows examination of compressed or plain text files --one screenful at a time on a soft-copy terminal. --.I bzmore --works on files compressed with --.I bzip2 --and also on uncompressed files. --If a file does not exist, --.I bzmore --looks for a file of the same name with the addition of a .bz2 suffix. --.PP --.I Bzmore --normally pauses after each screenful, printing --More-- --at the bottom of the screen. --If the user then types a carriage return, one more line is displayed. --If the user hits a space, --another screenful is displayed. Other possibilities are enumerated later. --.PP --.I Bzmore --looks in the file --.I /etc/termcap --to determine terminal characteristics, --and to determine the default window size. --On a terminal capable of displaying 24 lines, --the default window size is 22 lines. --Other sequences which may be typed when --.I bzmore --pauses, and their effects, are as follows (\fIi\fP is an optional integer --argument, defaulting to 1) : --.PP --.IP \fIi\|\fP --display --.I i --more lines, (or another screenful if no argument is given) --.PP --.IP ^D --display 11 more lines (a ``scroll''). --If --.I i --is given, then the scroll size is set to \fIi\|\fP. --.PP --.IP d --same as ^D (control-D) --.PP --.IP \fIi\|\fPz --same as typing a space except that \fIi\|\fP, if present, becomes the new --window size. Note that the window size reverts back to the default at the --end of the current file. --.PP --.IP \fIi\|\fPs --skip \fIi\|\fP lines and print a screenful of lines --.PP --.IP \fIi\|\fPf --skip \fIi\fP screenfuls and print a screenful of lines --.PP --.IP "q or Q" --quit reading the current file; go on to the next (if any) --.PP --.IP "e or q" --When the prompt --More--(Next file: --.IR file ) --is printed, this command causes bzmore to exit. --.PP --.IP s --When the prompt --More--(Next file: --.IR file ) --is printed, this command causes bzmore to skip the next file and continue. --.PP --.IP = --Display the current line number. --.PP --.IP \fIi\|\fP/expr --search for the \fIi\|\fP-th occurrence of the regular expression \fIexpr.\fP --If the pattern is not found, --.I bzmore --goes on to the next file (if any). --Otherwise, a screenful is displayed, starting two lines before the place --where the expression was found. --The user's erase and kill characters may be used to edit the regular --expression. --Erasing back past the first column cancels the search command. --.PP --.IP \fIi\|\fPn --search for the \fIi\|\fP-th occurrence of the last regular expression entered. --.PP --.IP !command --invoke a shell with \fIcommand\|\fP. --The character `!' in "command" are replaced with the --previous shell command. The sequence "\\!" is replaced by "!". --.PP --.IP ":q or :Q" --quit reading the current file; go on to the next (if any) --(same as q or Q). --.PP --.IP . --(dot) repeat the previous command. --.PP --The commands take effect immediately, i.e., it is not necessary to --type a carriage return. --Up to the time when the command character itself is given, --the user may hit the line kill character to cancel the numerical --argument being formed. --In addition, the user may hit the erase character to redisplay the ----More-- message. --.PP --At any time when output is being sent to the terminal, the user can --hit the quit key (normally control\-\\). --.I Bzmore --will stop sending output, and will display the usual --More-- --prompt. --The user may then enter one of the above commands in the normal manner. --Unfortunately, some output is lost when this is done, due to the --fact that any characters waiting in the terminal's output queue --are flushed when the quit signal occurs. --.PP --The terminal is set to --.I noecho --mode by this program so that the output can be continuous. --What you type will thus not show on your terminal, except for the / and ! --commands. --.PP --If the standard output is not a teletype, then --.I bzmore --acts just like --.I bzcat, --except that a header is printed before each file. --.SH FILES --.DT --/etc/termcap Terminal data base --.SH "SEE ALSO" --more(1), less(1), bzip2(1), bzdiff(1), bzgrep(1) -diff -Nru bzip2-1.0.2/config.h.in bzip2-1.0.2.new/config.h.in ---- bzip2-1.0.2/config.h.in Thu Jan 1 01:00:00 1970 -+++ bzip2-1.0.2.new/config.h.in Fri Feb 1 04:19:11 2002 -@@ -0,0 +1,17 @@ -+/* config.h.in. Generated automatically from configure.in by autoheader. */ -+ -+/* Name of package */ -+#undef PACKAGE -+ -+/* Version number of package */ -+#undef VERSION -+ -+/* Number of bits in a file offset, on hosts where this is settable. */ -+#undef _FILE_OFFSET_BITS -+ -+/* Define to make fseeko etc. visible, on some hosts. */ -+#undef _LARGEFILE_SOURCE -+ -+/* Define for large files, on AIX-style hosts. */ -+#undef _LARGE_FILES -+ -diff -Nru bzip2-1.0.2/configure.in bzip2-1.0.2.new/configure.in ---- bzip2-1.0.2/configure.in Thu Jan 1 01:00:00 1970 -+++ bzip2-1.0.2.new/configure.in Fri Feb 1 04:19:11 2002 -@@ -0,0 +1,10 @@ -+AC_INIT(bzip2.c) -+AM_INIT_AUTOMAKE(bzip2,1.0.1) -+AM_CONFIG_HEADER(config.h) -+AC_PROG_CC -+AM_PROG_LIBTOOL -+AC_PROG_LN_S -+AC_SYS_LARGEFILE -+AC_OUTPUT(Makefile -+ doc/Makefile -+ doc/pl/Makefile) -diff -Nru bzip2-1.0.2/crctable.c bzip2-1.0.2.new/crctable.c ++ $(LN_S) -f bzgrep $(DESTDIR)$(bindir)/bzegrep ++ $(LN_S) -f bzgrep $(DESTDIR)$(bindir)/bzfgrep ++ $(LN_S) -f bzmore $(DESTDIR)$(bindir)/bzless ++ $(LN_S) -f bzdiff $(DESTDIR)$(bindir)/bzcmp ++ ++install-data-hook: ++ echo '.so bzip2.1' >$(DESTDIR)$(mandir)/man1/bzip2recover.1 ++ echo '.so bzip2.1' >$(DESTDIR)$(mandir)/man1/bunzip2.1 ++ echo '.so bzip2.1' >$(DESTDIR)$(mandir)/man1/bzcat.1 ++ echo '.so bzgrep.1' >$(DESTDIR)$(mandir)/man1/bzegrep.1 ++ echo '.so bzgrep.1' >$(DESTDIR)$(mandir)/man1/bzfgrep.1 ++ echo '.so bzmore.1' >$(DESTDIR)$(mandir)/man1/bzless.1 ++ echo '.so bzdiff.1' >$(DESTDIR)$(mandir)/man1/bzcmp.1 --- bzip2-1.0.2/crctable.c Sun Dec 30 03:19:28 2001 +++ bzip2-1.0.2.new/crctable.c Fri Feb 1 04:19:11 2002 -@@ -58,6 +58,10 @@ +@@ -58,6 +58,9 @@ For more information on these sources, see the manual. --*/ +#ifdef HAVE_CONFIG_H +#include +#endif -+ #include "bzlib_private.h" -diff -Nru bzip2-1.0.2/decompress.c bzip2-1.0.2.new/decompress.c --- bzip2-1.0.2/decompress.c Sun Dec 30 21:45:53 2001 +++ bzip2-1.0.2.new/decompress.c Fri Feb 1 04:19:11 2002 -@@ -58,6 +58,10 @@ +@@ -58,6 +58,9 @@ For more information on these sources, see the manual. --*/ +#ifdef HAVE_CONFIG_H +#include +#endif -+ #include "bzlib_private.h" -diff -Nru bzip2-1.0.2/dlltest.c bzip2-1.0.2.new/dlltest.c --- bzip2-1.0.2/dlltest.c Sun Dec 30 20:44:07 2001 +++ bzip2-1.0.2.new/dlltest.c Fri Feb 1 04:19:11 2002 @@ -8,6 +8,10 @@ @@ -383,3509 +87,21 @@ diff -Nru bzip2-1.0.2/dlltest.c bzip2-1.0.2.new/dlltest.c #define BZ_IMPORT #include #include -diff -Nru bzip2-1.0.2/doc/Makefile.am bzip2-1.0.2.new/doc/Makefile.am ---- bzip2-1.0.2/doc/Makefile.am Thu Jan 1 01:00:00 1970 -+++ bzip2-1.0.2.new/doc/Makefile.am Fri Feb 1 04:27:21 2002 -@@ -0,0 +1,6 @@ -+ -+SUBDIRS = pl -+ -+man_MANS = bunzip2.1 bzcat.1 bzdiff.1 bzgrep.1 bzip2.1 \ -+ bzip2recover.1 bzmore.1 -+#info_TEXINFOS = bzip2.texi -diff -Nru bzip2-1.0.2/doc/bunzip2.1 bzip2-1.0.2.new/doc/bunzip2.1 ---- bzip2-1.0.2/doc/bunzip2.1 Thu Jan 1 01:00:00 1970 -+++ bzip2-1.0.2.new/doc/bunzip2.1 Fri Feb 1 04:19:11 2002 -@@ -0,0 +1 @@ -+.so bzip2.1 -\ No newline at end of file -diff -Nru bzip2-1.0.2/doc/bzcat.1 bzip2-1.0.2.new/doc/bzcat.1 ---- bzip2-1.0.2/doc/bzcat.1 Thu Jan 1 01:00:00 1970 -+++ bzip2-1.0.2.new/doc/bzcat.1 Fri Feb 1 04:19:11 2002 -@@ -0,0 +1 @@ -+.so bzip2.1 -\ No newline at end of file -diff -Nru bzip2-1.0.2/doc/bzdiff.1 bzip2-1.0.2.new/doc/bzdiff.1 ---- bzip2-1.0.2/doc/bzdiff.1 Thu Jan 1 01:00:00 1970 -+++ bzip2-1.0.2.new/doc/bzdiff.1 Sun Dec 30 03:12:35 2001 -@@ -0,0 +1,47 @@ -+\"Shamelessly copied from zmore.1 by Philippe Troin -+\"for Debian GNU/Linux -+.TH BZDIFF 1 -+.SH NAME -+bzcmp, bzdiff \- compare bzip2 compressed files -+.SH SYNOPSIS -+.B bzcmp -+[ cmp_options ] file1 -+[ file2 ] -+.br -+.B bzdiff -+[ diff_options ] file1 -+[ file2 ] -+.SH DESCRIPTION -+.I Bzcmp -+and -+.I bzdiff -+are used to invoke the -+.I cmp -+or the -+.I diff -+program on bzip2 compressed files. All options specified are passed -+directly to -+.I cmp -+or -+.IR diff "." -+If only 1 file is specified, then the files compared are -+.I file1 -+and an uncompressed -+.IR file1 ".bz2." -+If two files are specified, then they are uncompressed if necessary and fed to -+.I cmp -+or -+.IR diff "." -+The exit status from -+.I cmp -+or -+.I diff -+is preserved. -+.SH "SEE ALSO" -+cmp(1), diff(1), bzmore(1), bzless(1), bzgrep(1), bzip2(1) -+.SH BUGS -+Messages from the -+.I cmp -+or -+.I diff -+programs refer to temporary filenames instead of those specified. -diff -Nru bzip2-1.0.2/doc/bzgrep.1 bzip2-1.0.2.new/doc/bzgrep.1 ---- bzip2-1.0.2/doc/bzgrep.1 Thu Jan 1 01:00:00 1970 -+++ bzip2-1.0.2.new/doc/bzgrep.1 Sun Dec 30 03:12:35 2001 -@@ -0,0 +1,56 @@ -+\"Shamelessly copied from zmore.1 by Philippe Troin -+\"for Debian GNU/Linux -+.TH BZGREP 1 -+.SH NAME -+bzgrep, bzfgrep, bzegrep \- search possibly bzip2 compressed files for a regular expression -+.SH SYNOPSIS -+.B bzgrep -+[ grep_options ] -+.BI [\ -e\ ] " pattern" -+.IR filename ".\|.\|." -+.br -+.B bzegrep -+[ egrep_options ] -+.BI [\ -e\ ] " pattern" -+.IR filename ".\|.\|." -+.br -+.B bzfgrep -+[ fgrep_options ] -+.BI [\ -e\ ] " pattern" -+.IR filename ".\|.\|." -+.SH DESCRIPTION -+.IR Bzgrep -+is used to invoke the -+.I grep -+on bzip2-compressed files. All options specified are passed directly to -+.I grep. -+If no file is specified, then the standard input is decompressed -+if necessary and fed to grep. -+Otherwise the given files are uncompressed if necessary and fed to -+.I grep. -+.PP -+If -+.I bzgrep -+is invoked as -+.I bzegrep -+or -+.I bzfgrep -+then -+.I egrep -+or -+.I fgrep -+is used instead of -+.I grep. -+If the GREP environment variable is set, -+.I bzgrep -+uses it as the -+.I grep -+program to be invoked. For example: -+ -+ for sh: GREP=fgrep bzgrep string files -+ for csh: (setenv GREP fgrep; bzgrep string files) -+.SH AUTHOR -+Charles Levert (charles@comm.polymtl.ca). Adapted to bzip2 by Philippe -+Troin for Debian GNU/Linux. -+.SH "SEE ALSO" -+grep(1), egrep(1), fgrep(1), bzdiff(1), bzmore(1), bzless(1), bzip2(1) -diff -Nru bzip2-1.0.2/doc/bzip2.1 bzip2-1.0.2.new/doc/bzip2.1 ---- bzip2-1.0.2/doc/bzip2.1 Thu Jan 1 01:00:00 1970 -+++ bzip2-1.0.2.new/doc/bzip2.1 Thu Jan 3 00:14:36 2002 -@@ -0,0 +1,453 @@ -+.PU -+.TH bzip2 1 -+.SH NAME -+bzip2, bunzip2 \- a block-sorting file compressor, v1.0.2 -+.br -+bzcat \- decompresses files to stdout -+.br -+bzip2recover \- recovers data from damaged bzip2 files -+ -+.SH SYNOPSIS -+.ll +8 -+.B bzip2 -+.RB [ " \-cdfkqstvzVL123456789 " ] -+[ -+.I "filenames \&..." -+] -+.ll -8 -+.br -+.B bunzip2 -+.RB [ " \-fkvsVL " ] -+[ -+.I "filenames \&..." -+] -+.br -+.B bzcat -+.RB [ " \-s " ] -+[ -+.I "filenames \&..." -+] -+.br -+.B bzip2recover -+.I "filename" -+ -+.SH DESCRIPTION -+.I bzip2 -+compresses files using the Burrows-Wheeler block sorting -+text compression algorithm, and Huffman coding. Compression is -+generally considerably better than that achieved by more conventional -+LZ77/LZ78-based compressors, and approaches the performance of the PPM -+family of statistical compressors. -+ -+The command-line options are deliberately very similar to -+those of -+.I GNU gzip, -+but they are not identical. -+ -+.I bzip2 -+expects a list of file names to accompany the -+command-line flags. Each file is replaced by a compressed version of -+itself, with the name "original_name.bz2". -+Each compressed file -+has the same modification date, permissions, and, when possible, -+ownership as the corresponding original, so that these properties can -+be correctly restored at decompression time. File name handling is -+naive in the sense that there is no mechanism for preserving original -+file names, permissions, ownerships or dates in filesystems which lack -+these concepts, or have serious file name length restrictions, such as -+MS-DOS. -+ -+.I bzip2 -+and -+.I bunzip2 -+will by default not overwrite existing -+files. If you want this to happen, specify the \-f flag. -+ -+If no file names are specified, -+.I bzip2 -+compresses from standard -+input to standard output. In this case, -+.I bzip2 -+will decline to -+write compressed output to a terminal, as this would be entirely -+incomprehensible and therefore pointless. -+ -+.I bunzip2 -+(or -+.I bzip2 \-d) -+decompresses all -+specified files. Files which were not created by -+.I bzip2 -+will be detected and ignored, and a warning issued. -+.I bzip2 -+attempts to guess the filename for the decompressed file -+from that of the compressed file as follows: -+ -+ filename.bz2 becomes filename -+ filename.bz becomes filename -+ filename.tbz2 becomes filename.tar -+ filename.tbz becomes filename.tar -+ anyothername becomes anyothername.out -+ -+If the file does not end in one of the recognised endings, -+.I .bz2, -+.I .bz, -+.I .tbz2 -+or -+.I .tbz, -+.I bzip2 -+complains that it cannot -+guess the name of the original file, and uses the original name -+with -+.I .out -+appended. -+ -+As with compression, supplying no -+filenames causes decompression from -+standard input to standard output. -+ -+.I bunzip2 -+will correctly decompress a file which is the -+concatenation of two or more compressed files. The result is the -+concatenation of the corresponding uncompressed files. Integrity -+testing (\-t) -+of concatenated -+compressed files is also supported. -+ -+You can also compress or decompress files to the standard output by -+giving the \-c flag. Multiple files may be compressed and -+decompressed like this. The resulting outputs are fed sequentially to -+stdout. Compression of multiple files -+in this manner generates a stream -+containing multiple compressed file representations. Such a stream -+can be decompressed correctly only by -+.I bzip2 -+version 0.9.0 or -+later. Earlier versions of -+.I bzip2 -+will stop after decompressing -+the first file in the stream. -+ -+.I bzcat -+(or -+.I bzip2 -dc) -+decompresses all specified files to -+the standard output. -+ -+.I bzip2 -+will read arguments from the environment variables -+.I BZIP2 -+and -+.I BZIP, -+in that order, and will process them -+before any arguments read from the command line. This gives a -+convenient way to supply default arguments. -+ -+Compression is always performed, even if the compressed -+file is slightly -+larger than the original. Files of less than about one hundred bytes -+tend to get larger, since the compression mechanism has a constant -+overhead in the region of 50 bytes. Random data (including the output -+of most file compressors) is coded at about 8.05 bits per byte, giving -+an expansion of around 0.5%. -+ -+As a self-check for your protection, -+.I -+bzip2 -+uses 32-bit CRCs to -+make sure that the decompressed version of a file is identical to the -+original. This guards against corruption of the compressed data, and -+against undetected bugs in -+.I bzip2 -+(hopefully very unlikely). The -+chances of data corruption going undetected is microscopic, about one -+chance in four billion for each file processed. Be aware, though, that -+the check occurs upon decompression, so it can only tell you that -+something is wrong. It can't help you -+recover the original uncompressed -+data. You can use -+.I bzip2recover -+to try to recover data from -+damaged files. -+ -+Return values: 0 for a normal exit, 1 for environmental problems (file -+not found, invalid flags, I/O errors, &c), 2 to indicate a corrupt -+compressed file, 3 for an internal consistency error (eg, bug) which -+caused -+.I bzip2 -+to panic. -+ -+.SH OPTIONS -+.TP -+.B \-c --stdout -+Compress or decompress to standard output. -+.TP -+.B \-d --decompress -+Force decompression. -+.I bzip2, -+.I bunzip2 -+and -+.I bzcat -+are -+really the same program, and the decision about what actions to take is -+done on the basis of which name is used. This flag overrides that -+mechanism, and forces -+.I bzip2 -+to decompress. -+.TP -+.B \-z --compress -+The complement to \-d: forces compression, regardless of the -+invocation name. -+.TP -+.B \-t --test -+Check integrity of the specified file(s), but don't decompress them. -+This really performs a trial decompression and throws away the result. -+.TP -+.B \-f --force -+Force overwrite of output files. Normally, -+.I bzip2 -+will not overwrite -+existing output files. Also forces -+.I bzip2 -+to break hard links -+to files, which it otherwise wouldn't do. -+ -+bzip2 normally declines to decompress files which don't have the -+correct magic header bytes. If forced (-f), however, it will pass -+such files through unmodified. This is how GNU gzip behaves. -+.TP -+.B \-k --keep -+Keep (don't delete) input files during compression -+or decompression. -+.TP -+.B \-s --small -+Reduce memory usage, for compression, decompression and testing. Files -+are decompressed and tested using a modified algorithm which only -+requires 2.5 bytes per block byte. This means any file can be -+decompressed in 2300k of memory, albeit at about half the normal speed. -+ -+During compression, \-s selects a block size of 200k, which limits -+memory use to around the same figure, at the expense of your compression -+ratio. In short, if your machine is low on memory (8 megabytes or -+less), use \-s for everything. See MEMORY MANAGEMENT below. -+.TP -+.B \-q --quiet -+Suppress non-essential warning messages. Messages pertaining to -+I/O errors and other critical events will not be suppressed. -+.TP -+.B \-v --verbose -+Verbose mode -- show the compression ratio for each file processed. -+Further \-v's increase the verbosity level, spewing out lots of -+information which is primarily of interest for diagnostic purposes. -+.TP -+.B \-L --license -V --version -+Display the software version, license terms and conditions. -+.TP -+.B \-1 (or \-\-fast) to \-9 (or \-\-best) -+Set the block size to 100 k, 200 k .. 900 k when compressing. Has no -+effect when decompressing. See MEMORY MANAGEMENT below. -+The \-\-fast and \-\-best aliases are primarily for GNU gzip -+compatibility. In particular, \-\-fast doesn't make things -+significantly faster. -+And \-\-best merely selects the default behaviour. -+.TP -+.B \-- -+Treats all subsequent arguments as file names, even if they start -+with a dash. This is so you can handle files with names beginning -+with a dash, for example: bzip2 \-- \-myfilename. -+.TP -+.B \--repetitive-fast --repetitive-best -+These flags are redundant in versions 0.9.5 and above. They provided -+some coarse control over the behaviour of the sorting algorithm in -+earlier versions, which was sometimes useful. 0.9.5 and above have an -+improved algorithm which renders these flags irrelevant. -+ -+.SH MEMORY MANAGEMENT -+.I bzip2 -+compresses large files in blocks. The block size affects -+both the compression ratio achieved, and the amount of memory needed for -+compression and decompression. The flags \-1 through \-9 -+specify the block size to be 100,000 bytes through 900,000 bytes (the -+default) respectively. At decompression time, the block size used for -+compression is read from the header of the compressed file, and -+.I bunzip2 -+then allocates itself just enough memory to decompress -+the file. Since block sizes are stored in compressed files, it follows -+that the flags \-1 to \-9 are irrelevant to and so ignored -+during decompression. -+ -+Compression and decompression requirements, -+in bytes, can be estimated as: -+ -+ Compression: 400k + ( 8 x block size ) -+ -+ Decompression: 100k + ( 4 x block size ), or -+ 100k + ( 2.5 x block size ) -+ -+Larger block sizes give rapidly diminishing marginal returns. Most of -+the compression comes from the first two or three hundred k of block -+size, a fact worth bearing in mind when using -+.I bzip2 -+on small machines. -+It is also important to appreciate that the decompression memory -+requirement is set at compression time by the choice of block size. -+ -+For files compressed with the default 900k block size, -+.I bunzip2 -+will require about 3700 kbytes to decompress. To support decompression -+of any file on a 4 megabyte machine, -+.I bunzip2 -+has an option to -+decompress using approximately half this amount of memory, about 2300 -+kbytes. Decompression speed is also halved, so you should use this -+option only where necessary. The relevant flag is -s. -+ -+In general, try and use the largest block size memory constraints allow, -+since that maximises the compression achieved. Compression and -+decompression speed are virtually unaffected by block size. -+ -+Another significant point applies to files which fit in a single block -+-- that means most files you'd encounter using a large block size. The -+amount of real memory touched is proportional to the size of the file, -+since the file is smaller than a block. For example, compressing a file -+20,000 bytes long with the flag -9 will cause the compressor to -+allocate around 7600k of memory, but only touch 400k + 20000 * 8 = 560 -+kbytes of it. Similarly, the decompressor will allocate 3700k but only -+touch 100k + 20000 * 4 = 180 kbytes. -+ -+Here is a table which summarises the maximum memory usage for different -+block sizes. Also recorded is the total compressed size for 14 files of -+the Calgary Text Compression Corpus totalling 3,141,622 bytes. This -+column gives some feel for how compression varies with block size. -+These figures tend to understate the advantage of larger block sizes for -+larger files, since the Corpus is dominated by smaller files. -+ -+ Compress Decompress Decompress Corpus -+ Flag usage usage -s usage Size -+ -+ -1 1200k 500k 350k 914704 -+ -2 2000k 900k 600k 877703 -+ -3 2800k 1300k 850k 860338 -+ -4 3600k 1700k 1100k 846899 -+ -5 4400k 2100k 1350k 845160 -+ -6 5200k 2500k 1600k 838626 -+ -7 6100k 2900k 1850k 834096 -+ -8 6800k 3300k 2100k 828642 -+ -9 7600k 3700k 2350k 828642 -+ -+.SH RECOVERING DATA FROM DAMAGED FILES -+.I bzip2 -+compresses files in blocks, usually 900kbytes long. Each -+block is handled independently. If a media or transmission error causes -+a multi-block .bz2 -+file to become damaged, it may be possible to -+recover data from the undamaged blocks in the file. -+ -+The compressed representation of each block is delimited by a 48-bit -+pattern, which makes it possible to find the block boundaries with -+reasonable certainty. Each block also carries its own 32-bit CRC, so -+damaged blocks can be distinguished from undamaged ones. -+ -+.I bzip2recover -+is a simple program whose purpose is to search for -+blocks in .bz2 files, and write each block out into its own .bz2 -+file. You can then use -+.I bzip2 -+\-t -+to test the -+integrity of the resulting files, and decompress those which are -+undamaged. -+ -+.I bzip2recover -+takes a single argument, the name of the damaged file, -+and writes a number of files "rec00001file.bz2", -+"rec00002file.bz2", etc, containing the extracted blocks. -+The output filenames are designed so that the use of -+wildcards in subsequent processing -- for example, -+"bzip2 -dc rec*file.bz2 > recovered_data" -- processes the files in -+the correct order. -+ -+.I bzip2recover -+should be of most use dealing with large .bz2 -+files, as these will contain many blocks. It is clearly -+futile to use it on damaged single-block files, since a -+damaged block cannot be recovered. If you wish to minimise -+any potential data loss through media or transmission errors, -+you might consider compressing with a smaller -+block size. -+ -+.SH PERFORMANCE NOTES -+The sorting phase of compression gathers together similar strings in the -+file. Because of this, files containing very long runs of repeated -+symbols, like "aabaabaabaab ..." (repeated several hundred times) may -+compress more slowly than normal. Versions 0.9.5 and above fare much -+better than previous versions in this respect. The ratio between -+worst-case and average-case compression time is in the region of 10:1. -+For previous versions, this figure was more like 100:1. You can use the -+\-vvvv option to monitor progress in great detail, if you want. -+ -+Decompression speed is unaffected by these phenomena. -+ -+.I bzip2 -+usually allocates several megabytes of memory to operate -+in, and then charges all over it in a fairly random fashion. This means -+that performance, both for compressing and decompressing, is largely -+determined by the speed at which your machine can service cache misses. -+Because of this, small changes to the code to reduce the miss rate have -+been observed to give disproportionately large performance improvements. -+I imagine -+.I bzip2 -+will perform best on machines with very large caches. -+ -+.SH CAVEATS -+I/O error messages are not as helpful as they could be. -+.I bzip2 -+tries hard to detect I/O errors and exit cleanly, but the details of -+what the problem is sometimes seem rather misleading. -+ -+This manual page pertains to version 1.0.2 of -+.I bzip2. -+Compressed data created by this version is entirely forwards and -+backwards compatible with the previous public releases, versions -+0.1pl2, 0.9.0, 0.9.5, 1.0.0 and 1.0.1, but with the following -+exception: 0.9.0 and above can correctly decompress multiple -+concatenated compressed files. 0.1pl2 cannot do this; it will stop -+after decompressing just the first file in the stream. -+ -+.I bzip2recover -+versions prior to this one, 1.0.2, used 32-bit integers to represent -+bit positions in compressed files, so it could not handle compressed -+files more than 512 megabytes long. Version 1.0.2 and above uses -+64-bit ints on some platforms which support them (GNU supported -+targets, and Windows). To establish whether or not bzip2recover was -+built with such a limitation, run it without arguments. In any event -+you can build yourself an unlimited version if you can recompile it -+with MaybeUInt64 set to be an unsigned 64-bit integer. -+ -+ -+ -+.SH AUTHOR -+Julian Seward, jseward@acm.org. -+ -+http://sources.redhat.com/bzip2 -+ -+The ideas embodied in -+.I bzip2 -+are due to (at least) the following -+people: Michael Burrows and David Wheeler (for the block sorting -+transformation), David Wheeler (again, for the Huffman coder), Peter -+Fenwick (for the structured coding model in the original -+.I bzip, -+and many refinements), and Alistair Moffat, Radford Neal and Ian Witten -+(for the arithmetic coder in the original -+.I bzip). -+I am much -+indebted for their help, support and advice. See the manual in the -+source distribution for pointers to sources of documentation. Christian -+von Roques encouraged me to look for faster sorting algorithms, so as to -+speed up compression. Bela Lubkin encouraged me to improve the -+worst-case compression performance. -+The bz* scripts are derived from those of GNU gzip. -+Many people sent patches, helped -+with portability problems, lent machines, gave advice and were generally -+helpful. -diff -Nru bzip2-1.0.2/doc/bzip2.texi bzip2-1.0.2.new/doc/bzip2.texi ---- bzip2-1.0.2/doc/bzip2.texi Thu Jan 1 01:00:00 1970 -+++ bzip2-1.0.2.new/doc/bzip2.texi Fri Feb 1 04:26:21 2002 -@@ -0,0 +1,2234 @@ -+\input texinfo @c -*- Texinfo -*- -+@setfilename bzip2.info -+ -+@ignore -+This file documents bzip2 version 1.0.2, and associated library -+libbzip2, written by Julian Seward (jseward@acm.org). -+ -+Copyright (C) 1996-2002 Julian R Seward -+ -+Permission is granted to make and distribute verbatim copies of -+this manual provided the copyright notice and this permission notice -+are preserved on all copies. -+ -+Permission is granted to copy and distribute translations of this manual -+into another language, under the above conditions for verbatim copies. -+@end ignore -+ -+@iftex -+@c @finalout -+@settitle bzip2 and libbzip2 -+@titlepage -+@title bzip2 and libbzip2 -+@subtitle a program and library for data compression -+@subtitle copyright (C) 1996-2002 Julian Seward -+@subtitle version 1.0.2 of 30 December 2001 -+@author Julian Seward -+ -+@end titlepage -+ -+@parindent 0mm -+@parskip 2mm -+ -+@end iftex -+@node Top,,, (dir) -+ -+The following text is the License for this software. You should -+find it identical to that contained in the file LICENSE in the -+source distribution. -+ -+------------------ START OF THE LICENSE ------------------ -+ -+This program, @code{bzip2}, -+and associated library @code{libbzip2}, are -+Copyright (C) 1996-2002 Julian R Seward. All rights reserved. -+ -+Redistribution and use in source and binary forms, with or without -+modification, are permitted provided that the following conditions -+are met: -+@itemize @bullet -+@item -+ Redistributions of source code must retain the above copyright -+ notice, this list of conditions and the following disclaimer. -+@item -+ The origin of this software must not be misrepresented; you must -+ not claim that you wrote the original software. If you use this -+ software in a product, an acknowledgment in the product -+ documentation would be appreciated but is not required. -+@item -+ Altered source versions must be plainly marked as such, and must -+ not be misrepresented as being the original software. -+@item -+ The name of the author may not be used to endorse or promote -+ products derived from this software without specific prior written -+ permission. -+@end itemize -+THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS -+OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -+ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY -+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE -+GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, -+WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -+NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -+ -+Julian Seward, Cambridge, UK. -+ -+@code{jseward@@acm.org} -+ -+@code{bzip2}/@code{libbzip2} version 1.0.2 of 30 December 2001. -+ -+------------------ END OF THE LICENSE ------------------ -+ -+Web sites: -+ -+@code{http://sources.redhat.com/bzip2} -+ -+@code{http://www.cacheprof.org} -+ -+PATENTS: To the best of my knowledge, @code{bzip2} does not use any patented -+algorithms. However, I do not have the resources available to carry out -+a full patent search. Therefore I cannot give any guarantee of the -+above statement. -+ -+ -+ -+ -+ -+ -+ -+@chapter Introduction -+ -+@code{bzip2} compresses files using the Burrows-Wheeler -+block-sorting text compression algorithm, and Huffman coding. -+Compression is generally considerably better than that -+achieved by more conventional LZ77/LZ78-based compressors, -+and approaches the performance of the PPM family of statistical compressors. -+ -+@code{bzip2} is built on top of @code{libbzip2}, a flexible library -+for handling compressed data in the @code{bzip2} format. This manual -+describes both how to use the program and -+how to work with the library interface. Most of the -+manual is devoted to this library, not the program, -+which is good news if your interest is only in the program. -+ -+Chapter 2 describes how to use @code{bzip2}; this is the only part -+you need to read if you just want to know how to operate the program. -+Chapter 3 describes the programming interfaces in detail, and -+Chapter 4 records some miscellaneous notes which I thought -+ought to be recorded somewhere. -+ -+ -+@chapter How to use @code{bzip2} -+ -+This chapter contains a copy of the @code{bzip2} man page, -+and nothing else. -+ -+@quotation -+ -+@unnumberedsubsubsec NAME -+@itemize -+@item @code{bzip2}, @code{bunzip2} -+- a block-sorting file compressor, v1.0.2 -+@item @code{bzcat} -+- decompresses files to stdout -+@item @code{bzip2recover} -+- recovers data from damaged bzip2 files -+@end itemize -+ -+@unnumberedsubsubsec SYNOPSIS -+@itemize -+@item @code{bzip2} [ -cdfkqstvzVL123456789 ] [ filenames ... ] -+@item @code{bunzip2} [ -fkvsVL ] [ filenames ... ] -+@item @code{bzcat} [ -s ] [ filenames ... ] -+@item @code{bzip2recover} filename -+@end itemize -+ -+@unnumberedsubsubsec DESCRIPTION -+ -+@code{bzip2} compresses files using the Burrows-Wheeler block sorting -+text compression algorithm, and Huffman coding. Compression is -+generally considerably better than that achieved by more conventional -+LZ77/LZ78-based compressors, and approaches the performance of the PPM -+family of statistical compressors. -+ -+The command-line options are deliberately very similar to those of GNU -+@code{gzip}, but they are not identical. -+ -+@code{bzip2} expects a list of file names to accompany the command-line -+flags. Each file is replaced by a compressed version of itself, with -+the name @code{original_name.bz2}. Each compressed file has the same -+modification date, permissions, and, when possible, ownership as the -+corresponding original, so that these properties can be correctly -+restored at decompression time. File name handling is naive in the -+sense that there is no mechanism for preserving original file names, -+permissions, ownerships or dates in filesystems which lack these -+concepts, or have serious file name length restrictions, such as MS-DOS. -+ -+@code{bzip2} and @code{bunzip2} will by default not overwrite existing -+files. If you want this to happen, specify the @code{-f} flag. -+ -+If no file names are specified, @code{bzip2} compresses from standard -+input to standard output. In this case, @code{bzip2} will decline to -+write compressed output to a terminal, as this would be entirely -+incomprehensible and therefore pointless. -+ -+@code{bunzip2} (or @code{bzip2 -d}) decompresses all -+specified files. Files which were not created by @code{bzip2} -+will be detected and ignored, and a warning issued. -+@code{bzip2} attempts to guess the filename for the decompressed file -+from that of the compressed file as follows: -+@itemize -+@item @code{filename.bz2 } becomes @code{filename} -+@item @code{filename.bz } becomes @code{filename} -+@item @code{filename.tbz2} becomes @code{filename.tar} -+@item @code{filename.tbz } becomes @code{filename.tar} -+@item @code{anyothername } becomes @code{anyothername.out} -+@end itemize -+If the file does not end in one of the recognised endings, -+@code{.bz2}, @code{.bz}, -+@code{.tbz2} or @code{.tbz}, @code{bzip2} complains that it cannot -+guess the name of the original file, and uses the original name -+with @code{.out} appended. -+ -+As with compression, supplying no -+filenames causes decompression from standard input to standard output. -+ -+@code{bunzip2} will correctly decompress a file which is the -+concatenation of two or more compressed files. The result is the -+concatenation of the corresponding uncompressed files. Integrity -+testing (@code{-t}) of concatenated compressed files is also supported. -+ -+You can also compress or decompress files to the standard output by -+giving the @code{-c} flag. Multiple files may be compressed and -+decompressed like this. The resulting outputs are fed sequentially to -+stdout. Compression of multiple files in this manner generates a stream -+containing multiple compressed file representations. Such a stream -+can be decompressed correctly only by @code{bzip2} version 0.9.0 or -+later. Earlier versions of @code{bzip2} will stop after decompressing -+the first file in the stream. -+ -+@code{bzcat} (or @code{bzip2 -dc}) decompresses all specified files to -+the standard output. -+ -+@code{bzip2} will read arguments from the environment variables -+@code{BZIP2} and @code{BZIP}, in that order, and will process them -+before any arguments read from the command line. This gives a -+convenient way to supply default arguments. -+ -+Compression is always performed, even if the compressed file is slightly -+larger than the original. Files of less than about one hundred bytes -+tend to get larger, since the compression mechanism has a constant -+overhead in the region of 50 bytes. Random data (including the output -+of most file compressors) is coded at about 8.05 bits per byte, giving -+an expansion of around 0.5%. -+ -+As a self-check for your protection, @code{bzip2} uses 32-bit CRCs to -+make sure that the decompressed version of a file is identical to the -+original. This guards against corruption of the compressed data, and -+against undetected bugs in @code{bzip2} (hopefully very unlikely). The -+chances of data corruption going undetected is microscopic, about one -+chance in four billion for each file processed. Be aware, though, that -+the check occurs upon decompression, so it can only tell you that -+something is wrong. It can't help you recover the original uncompressed -+data. You can use @code{bzip2recover} to try to recover data from -+damaged files. -+ -+Return values: 0 for a normal exit, 1 for environmental problems (file -+not found, invalid flags, I/O errors, &c), 2 to indicate a corrupt -+compressed file, 3 for an internal consistency error (eg, bug) which -+caused @code{bzip2} to panic. -+ -+ -+@unnumberedsubsubsec OPTIONS -+@table @code -+@item -c --stdout -+Compress or decompress to standard output. -+@item -d --decompress -+Force decompression. @code{bzip2}, @code{bunzip2} and @code{bzcat} are -+really the same program, and the decision about what actions to take is -+done on the basis of which name is used. This flag overrides that -+mechanism, and forces bzip2 to decompress. -+@item -z --compress -+The complement to @code{-d}: forces compression, regardless of the -+invokation name. -+@item -t --test -+Check integrity of the specified file(s), but don't decompress them. -+This really performs a trial decompression and throws away the result. -+@item -f --force -+Force overwrite of output files. Normally, @code{bzip2} will not overwrite -+existing output files. Also forces @code{bzip2} to break hard links -+to files, which it otherwise wouldn't do. -+ -+@code{bzip2} normally declines to decompress files which don't have the -+correct magic header bytes. If forced (@code{-f}), however, it will -+pass such files through unmodified. This is how GNU @code{gzip} -+behaves. -+@item -k --keep -+Keep (don't delete) input files during compression -+or decompression. -+@item -s --small -+Reduce memory usage, for compression, decompression and testing. Files -+are decompressed and tested using a modified algorithm which only -+requires 2.5 bytes per block byte. This means any file can be -+decompressed in 2300k of memory, albeit at about half the normal speed. -+ -+During compression, @code{-s} selects a block size of 200k, which limits -+memory use to around the same figure, at the expense of your compression -+ratio. In short, if your machine is low on memory (8 megabytes or -+less), use -s for everything. See MEMORY MANAGEMENT below. -+@item -q --quiet -+Suppress non-essential warning messages. Messages pertaining to -+I/O errors and other critical events will not be suppressed. -+@item -v --verbose -+Verbose mode -- show the compression ratio for each file processed. -+Further @code{-v}'s increase the verbosity level, spewing out lots of -+information which is primarily of interest for diagnostic purposes. -+@item -L --license -V --version -+Display the software version, license terms and conditions. -+@item -1 (or --fast) to -9 (or --best) -+Set the block size to 100 k, 200 k .. 900 k when compressing. Has no -+effect when decompressing. See MEMORY MANAGEMENT below. -+The @code{--fast} and @code{--best} aliases are primarily for GNU -+@code{gzip} compatibility. In particular, @code{--fast} doesn't make -+things significantly faster. And @code{--best} merely selects the -+default behaviour. -+@item -- -+Treats all subsequent arguments as file names, even if they start -+with a dash. This is so you can handle files with names beginning -+with a dash, for example: @code{bzip2 -- -myfilename}. -+@item --repetitive-fast -+@item --repetitive-best -+These flags are redundant in versions 0.9.5 and above. They provided -+some coarse control over the behaviour of the sorting algorithm in -+earlier versions, which was sometimes useful. 0.9.5 and above have an -+improved algorithm which renders these flags irrelevant. -+@end table -+ -+ -+@unnumberedsubsubsec MEMORY MANAGEMENT -+ -+@code{bzip2} compresses large files in blocks. The block size affects -+both the compression ratio achieved, and the amount of memory needed for -+compression and decompression. The flags @code{-1} through @code{-9} -+specify the block size to be 100,000 bytes through 900,000 bytes (the -+default) respectively. At decompression time, the block size used for -+compression is read from the header of the compressed file, and -+@code{bunzip2} then allocates itself just enough memory to decompress -+the file. Since block sizes are stored in compressed files, it follows -+that the flags @code{-1} to @code{-9} are irrelevant to and so ignored -+during decompression. -+ -+Compression and decompression requirements, in bytes, can be estimated -+as: -+@example -+ Compression: 400k + ( 8 x block size ) -+ -+ Decompression: 100k + ( 4 x block size ), or -+ 100k + ( 2.5 x block size ) -+@end example -+Larger block sizes give rapidly diminishing marginal returns. Most of -+the compression comes from the first two or three hundred k of block -+size, a fact worth bearing in mind when using @code{bzip2} on small machines. -+It is also important to appreciate that the decompression memory -+requirement is set at compression time by the choice of block size. -+ -+For files compressed with the default 900k block size, @code{bunzip2} -+will require about 3700 kbytes to decompress. To support decompression -+of any file on a 4 megabyte machine, @code{bunzip2} has an option to -+decompress using approximately half this amount of memory, about 2300 -+kbytes. Decompression speed is also halved, so you should use this -+option only where necessary. The relevant flag is @code{-s}. -+ -+In general, try and use the largest block size memory constraints allow, -+since that maximises the compression achieved. Compression and -+decompression speed are virtually unaffected by block size. -+ -+Another significant point applies to files which fit in a single block -+-- that means most files you'd encounter using a large block size. The -+amount of real memory touched is proportional to the size of the file, -+since the file is smaller than a block. For example, compressing a file -+20,000 bytes long with the flag @code{-9} will cause the compressor to -+allocate around 7600k of memory, but only touch 400k + 20000 * 8 = 560 -+kbytes of it. Similarly, the decompressor will allocate 3700k but only -+touch 100k + 20000 * 4 = 180 kbytes. -+ -+Here is a table which summarises the maximum memory usage for different -+block sizes. Also recorded is the total compressed size for 14 files of -+the Calgary Text Compression Corpus totalling 3,141,622 bytes. This -+column gives some feel for how compression varies with block size. -+These figures tend to understate the advantage of larger block sizes for -+larger files, since the Corpus is dominated by smaller files. -+@example -+ Compress Decompress Decompress Corpus -+ Flag usage usage -s usage Size -+ -+ -1 1200k 500k 350k 914704 -+ -2 2000k 900k 600k 877703 -+ -3 2800k 1300k 850k 860338 -+ -4 3600k 1700k 1100k 846899 -+ -5 4400k 2100k 1350k 845160 -+ -6 5200k 2500k 1600k 838626 -+ -7 6100k 2900k 1850k 834096 -+ -8 6800k 3300k 2100k 828642 -+ -9 7600k 3700k 2350k 828642 -+@end example -+ -+@unnumberedsubsubsec RECOVERING DATA FROM DAMAGED FILES -+ -+@code{bzip2} compresses files in blocks, usually 900kbytes long. Each -+block is handled independently. If a media or transmission error causes -+a multi-block @code{.bz2} file to become damaged, it may be possible to -+recover data from the undamaged blocks in the file. -+ -+The compressed representation of each block is delimited by a 48-bit -+pattern, which makes it possible to find the block boundaries with -+reasonable certainty. Each block also carries its own 32-bit CRC, so -+damaged blocks can be distinguished from undamaged ones. -+ -+@code{bzip2recover} is a simple program whose purpose is to search for -+blocks in @code{.bz2} files, and write each block out into its own -+@code{.bz2} file. You can then use @code{bzip2 -t} to test the -+integrity of the resulting files, and decompress those which are -+undamaged. -+ -+@code{bzip2recover} -+takes a single argument, the name of the damaged file, and writes a -+number of files @code{rec00001file.bz2}, @code{rec00002file.bz2}, etc, -+containing the extracted blocks. The output filenames are designed so -+that the use of wildcards in subsequent processing -- for example, -+@code{bzip2 -dc rec*file.bz2 > recovered_data} -- processes the files in -+the correct order. -+ -+@code{bzip2recover} should be of most use dealing with large @code{.bz2} -+files, as these will contain many blocks. It is clearly futile to use -+it on damaged single-block files, since a damaged block cannot be -+recovered. If you wish to minimise any potential data loss through -+media or transmission errors, you might consider compressing with a -+smaller block size. -+ -+ -+@unnumberedsubsubsec PERFORMANCE NOTES -+ -+The sorting phase of compression gathers together similar strings in the -+file. Because of this, files containing very long runs of repeated -+symbols, like "aabaabaabaab ..." (repeated several hundred times) may -+compress more slowly than normal. Versions 0.9.5 and above fare much -+better than previous versions in this respect. The ratio between -+worst-case and average-case compression time is in the region of 10:1. -+For previous versions, this figure was more like 100:1. You can use the -+@code{-vvvv} option to monitor progress in great detail, if you want. -+ -+Decompression speed is unaffected by these phenomena. -+ -+@code{bzip2} usually allocates several megabytes of memory to operate -+in, and then charges all over it in a fairly random fashion. This means -+that performance, both for compressing and decompressing, is largely -+determined by the speed at which your machine can service cache misses. -+Because of this, small changes to the code to reduce the miss rate have -+been observed to give disproportionately large performance improvements. -+I imagine @code{bzip2} will perform best on machines with very large -+caches. -+ -+ -+@unnumberedsubsubsec CAVEATS -+ -+I/O error messages are not as helpful as they could be. @code{bzip2} -+tries hard to detect I/O errors and exit cleanly, but the details of -+what the problem is sometimes seem rather misleading. -+ -+This manual page pertains to version 1.0.2 of @code{bzip2}. Compressed -+data created by this version is entirely forwards and backwards -+compatible with the previous public releases, versions 0.1pl2, 0.9.0, -+0.9.5, 1.0.0 and 1.0.1, but with the following exception: 0.9.0 and -+above can correctly decompress multiple concatenated compressed files. -+0.1pl2 cannot do this; it will stop after decompressing just the first -+file in the stream. -+ -+@code{bzip2recover} versions prior to this one, 1.0.2, used 32-bit -+integers to represent bit positions in compressed files, so it could not -+handle compressed files more than 512 megabytes long. Version 1.0.2 and -+above uses 64-bit ints on some platforms which support them (GNU -+supported targets, and Windows). To establish whether or not -+@code{bzip2recover} was built with such a limitation, run it without -+arguments. In any event you can build yourself an unlimited version if -+you can recompile it with @code{MaybeUInt64} set to be an unsigned -+64-bit integer. -+ -+ -+ -+@unnumberedsubsubsec AUTHOR -+Julian Seward, @code{jseward@@acm.org}. -+ -+@code{http://sources.redhat.com/bzip2} -+ -+The ideas embodied in @code{bzip2} are due to (at least) the following -+people: Michael Burrows and David Wheeler (for the block sorting -+transformation), David Wheeler (again, for the Huffman coder), Peter -+Fenwick (for the structured coding model in the original @code{bzip}, -+and many refinements), and Alistair Moffat, Radford Neal and Ian Witten -+(for the arithmetic coder in the original @code{bzip}). I am much -+indebted for their help, support and advice. See the manual in the -+source distribution for pointers to sources of documentation. Christian -+von Roques encouraged me to look for faster sorting algorithms, so as to -+speed up compression. Bela Lubkin encouraged me to improve the -+worst-case compression performance. The @code{bz*} scripts are derived -+from those of GNU @code{gzip}. Many people sent patches, helped with -+portability problems, lent machines, gave advice and were generally -+helpful. -+ -+@end quotation -+ -+ -+ -+ -+@chapter Programming with @code{libbzip2} -+ -+This chapter describes the programming interface to @code{libbzip2}. -+ -+For general background information, particularly about memory -+use and performance aspects, you'd be well advised to read Chapter 2 -+as well. -+ -+@section Top-level structure -+ -+@code{libbzip2} is a flexible library for compressing and decompressing -+data in the @code{bzip2} data format. Although packaged as a single -+entity, it helps to regard the library as three separate parts: the low -+level interface, and the high level interface, and some utility -+functions. -+ -+The structure of @code{libbzip2}'s interfaces is similar to -+that of Jean-loup Gailly's and Mark Adler's excellent @code{zlib} -+library. -+ -+All externally visible symbols have names beginning @code{BZ2_}. -+This is new in version 1.0. The intention is to minimise pollution -+of the namespaces of library clients. -+ -+@subsection Low-level summary -+ -+This interface provides services for compressing and decompressing -+data in memory. There's no provision for dealing with files, streams -+or any other I/O mechanisms, just straight memory-to-memory work. -+In fact, this part of the library can be compiled without inclusion -+of @code{stdio.h}, which may be helpful for embedded applications. -+ -+The low-level part of the library has no global variables and -+is therefore thread-safe. -+ -+Six routines make up the low level interface: -+@code{BZ2_bzCompressInit}, @code{BZ2_bzCompress}, and @* @code{BZ2_bzCompressEnd} -+for compression, -+and a corresponding trio @code{BZ2_bzDecompressInit}, @* @code{BZ2_bzDecompress} -+and @code{BZ2_bzDecompressEnd} for decompression. -+The @code{*Init} functions allocate -+memory for compression/decompression and do other -+initialisations, whilst the @code{*End} functions close down operations -+and release memory. -+ -+The real work is done by @code{BZ2_bzCompress} and @code{BZ2_bzDecompress}. -+These compress and decompress data from a user-supplied input buffer -+to a user-supplied output buffer. These buffers can be any size; -+arbitrary quantities of data are handled by making repeated calls -+to these functions. This is a flexible mechanism allowing a -+consumer-pull style of activity, or producer-push, or a mixture of -+both. -+ -+ -+ -+@subsection High-level summary -+ -+This interface provides some handy wrappers around the low-level -+interface to facilitate reading and writing @code{bzip2} format -+files (@code{.bz2} files). The routines provide hooks to facilitate -+reading files in which the @code{bzip2} data stream is embedded -+within some larger-scale file structure, or where there are -+multiple @code{bzip2} data streams concatenated end-to-end. -+ -+For reading files, @code{BZ2_bzReadOpen}, @code{BZ2_bzRead}, -+@code{BZ2_bzReadClose} and @* @code{BZ2_bzReadGetUnused} are supplied. For -+writing files, @code{BZ2_bzWriteOpen}, @code{BZ2_bzWrite} and -+@code{BZ2_bzWriteFinish} are available. -+ -+As with the low-level library, no global variables are used -+so the library is per se thread-safe. However, if I/O errors -+occur whilst reading or writing the underlying compressed files, -+you may have to consult @code{errno} to determine the cause of -+the error. In that case, you'd need a C library which correctly -+supports @code{errno} in a multithreaded environment. -+ -+To make the library a little simpler and more portable, -+@code{BZ2_bzReadOpen} and @code{BZ2_bzWriteOpen} require you to pass them file -+handles (@code{FILE*}s) which have previously been opened for reading or -+writing respectively. That avoids portability problems associated with -+file operations and file attributes, whilst not being much of an -+imposition on the programmer. -+ -+ -+ -+@subsection Utility functions summary -+For very simple needs, @code{BZ2_bzBuffToBuffCompress} and -+@code{BZ2_bzBuffToBuffDecompress} are provided. These compress -+data in memory from one buffer to another buffer in a single -+function call. You should assess whether these functions -+fulfill your memory-to-memory compression/decompression -+requirements before investing effort in understanding the more -+general but more complex low-level interface. -+ -+Yoshioka Tsuneo (@code{QWF00133@@niftyserve.or.jp} / -+@code{tsuneo-y@@is.aist-nara.ac.jp}) has contributed some functions to -+give better @code{zlib} compatibility. These functions are -+@code{BZ2_bzopen}, @code{BZ2_bzread}, @code{BZ2_bzwrite}, @code{BZ2_bzflush}, -+@code{BZ2_bzclose}, -+@code{BZ2_bzerror} and @code{BZ2_bzlibVersion}. You may find these functions -+more convenient for simple file reading and writing, than those in the -+high-level interface. These functions are not (yet) officially part of -+the library, and are minimally documented here. If they break, you -+get to keep all the pieces. I hope to document them properly when time -+permits. -+ -+Yoshioka also contributed modifications to allow the library to be -+built as a Windows DLL. -+ -+ -+@section Error handling -+ -+The library is designed to recover cleanly in all situations, including -+the worst-case situation of decompressing random data. I'm not -+100% sure that it can always do this, so you might want to add -+a signal handler to catch segmentation violations during decompression -+if you are feeling especially paranoid. I would be interested in -+hearing more about the robustness of the library to corrupted -+compressed data. -+ -+Version 1.0 is much more robust in this respect than -+0.9.0 or 0.9.5. Investigations with Checker (a tool for -+detecting problems with memory management, similar to Purify) -+indicate that, at least for the few files I tested, all single-bit -+errors in the decompressed data are caught properly, with no -+segmentation faults, no reads of uninitialised data and no -+out of range reads or writes. So it's certainly much improved, -+although I wouldn't claim it to be totally bombproof. -+ -+The file @code{bzlib.h} contains all definitions needed to use -+the library. In particular, you should definitely not include -+@code{bzlib_private.h}. -+ -+In @code{bzlib.h}, the various return values are defined. The following -+list is not intended as an exhaustive description of the circumstances -+in which a given value may be returned -- those descriptions are given -+later. Rather, it is intended to convey the rough meaning of each -+return value. The first five actions are normal and not intended to -+denote an error situation. -+@table @code -+@item BZ_OK -+The requested action was completed successfully. -+@item BZ_RUN_OK -+@itemx BZ_FLUSH_OK -+@itemx BZ_FINISH_OK -+In @code{BZ2_bzCompress}, the requested flush/finish/nothing-special action -+was completed successfully. -+@item BZ_STREAM_END -+Compression of data was completed, or the logical stream end was -+detected during decompression. -+@end table -+ -+The following return values indicate an error of some kind. -+@table @code -+@item BZ_CONFIG_ERROR -+Indicates that the library has been improperly compiled on your -+platform -- a major configuration error. Specifically, it means -+that @code{sizeof(char)}, @code{sizeof(short)} and @code{sizeof(int)} -+are not 1, 2 and 4 respectively, as they should be. Note that the -+library should still work properly on 64-bit platforms which follow -+the LP64 programming model -- that is, where @code{sizeof(long)} -+and @code{sizeof(void*)} are 8. Under LP64, @code{sizeof(int)} is -+still 4, so @code{libbzip2}, which doesn't use the @code{long} type, -+is OK. -+@item BZ_SEQUENCE_ERROR -+When using the library, it is important to call the functions in the -+correct sequence and with data structures (buffers etc) in the correct -+states. @code{libbzip2} checks as much as it can to ensure this is -+happening, and returns @code{BZ_SEQUENCE_ERROR} if not. Code which -+complies precisely with the function semantics, as detailed below, -+should never receive this value; such an event denotes buggy code -+which you should investigate. -+@item BZ_PARAM_ERROR -+Returned when a parameter to a function call is out of range -+or otherwise manifestly incorrect. As with @code{BZ_SEQUENCE_ERROR}, -+this denotes a bug in the client code. The distinction between -+@code{BZ_PARAM_ERROR} and @code{BZ_SEQUENCE_ERROR} is a bit hazy, but still worth -+making. -+@item BZ_MEM_ERROR -+Returned when a request to allocate memory failed. Note that the -+quantity of memory needed to decompress a stream cannot be determined -+until the stream's header has been read. So @code{BZ2_bzDecompress} and -+@code{BZ2_bzRead} may return @code{BZ_MEM_ERROR} even though some of -+the compressed data has been read. The same is not true for -+compression; once @code{BZ2_bzCompressInit} or @code{BZ2_bzWriteOpen} have -+successfully completed, @code{BZ_MEM_ERROR} cannot occur. -+@item BZ_DATA_ERROR -+Returned when a data integrity error is detected during decompression. -+Most importantly, this means when stored and computed CRCs for the -+data do not match. This value is also returned upon detection of any -+other anomaly in the compressed data. -+@item BZ_DATA_ERROR_MAGIC -+As a special case of @code{BZ_DATA_ERROR}, it is sometimes useful to -+know when the compressed stream does not start with the correct -+magic bytes (@code{'B' 'Z' 'h'}). -+@item BZ_IO_ERROR -+Returned by @code{BZ2_bzRead} and @code{BZ2_bzWrite} when there is an error -+reading or writing in the compressed file, and by @code{BZ2_bzReadOpen} -+and @code{BZ2_bzWriteOpen} for attempts to use a file for which the -+error indicator (viz, @code{ferror(f)}) is set. -+On receipt of @code{BZ_IO_ERROR}, the caller should consult -+@code{errno} and/or @code{perror} to acquire operating-system -+specific information about the problem. -+@item BZ_UNEXPECTED_EOF -+Returned by @code{BZ2_bzRead} when the compressed file finishes -+before the logical end of stream is detected. -+@item BZ_OUTBUFF_FULL -+Returned by @code{BZ2_bzBuffToBuffCompress} and -+@code{BZ2_bzBuffToBuffDecompress} to indicate that the output data -+will not fit into the output buffer provided. -+@end table -+ -+ -+ -+@section Low-level interface -+ -+@subsection @code{BZ2_bzCompressInit} -+@example -+typedef -+ struct @{ -+ char *next_in; -+ unsigned int avail_in; -+ unsigned int total_in_lo32; -+ unsigned int total_in_hi32; -+ -+ char *next_out; -+ unsigned int avail_out; -+ unsigned int total_out_lo32; -+ unsigned int total_out_hi32; -+ -+ void *state; -+ -+ void *(*bzalloc)(void *,int,int); -+ void (*bzfree)(void *,void *); -+ void *opaque; -+ @} -+ bz_stream; -+ -+int BZ2_bzCompressInit ( bz_stream *strm, -+ int blockSize100k, -+ int verbosity, -+ int workFactor ); -+ -+@end example -+ -+Prepares for compression. The @code{bz_stream} structure -+holds all data pertaining to the compression activity. -+A @code{bz_stream} structure should be allocated and initialised -+prior to the call. -+The fields of @code{bz_stream} -+comprise the entirety of the user-visible data. @code{state} -+is a pointer to the private data structures required for compression. -+ -+Custom memory allocators are supported, via fields @code{bzalloc}, -+@code{bzfree}, -+and @code{opaque}. The value -+@code{opaque} is passed to as the first argument to -+all calls to @code{bzalloc} and @code{bzfree}, but is -+otherwise ignored by the library. -+The call @code{bzalloc ( opaque, n, m )} is expected to return a -+pointer @code{p} to -+@code{n * m} bytes of memory, and @code{bzfree ( opaque, p )} -+should free -+that memory. -+ -+If you don't want to use a custom memory allocator, set @code{bzalloc}, -+@code{bzfree} and -+@code{opaque} to @code{NULL}, -+and the library will then use the standard @code{malloc}/@code{free} -+routines. -+ -+Before calling @code{BZ2_bzCompressInit}, fields @code{bzalloc}, -+@code{bzfree} and @code{opaque} should -+be filled appropriately, as just described. Upon return, the internal -+state will have been allocated and initialised, and @code{total_in_lo32}, -+@code{total_in_hi32}, @code{total_out_lo32} and -+@code{total_out_hi32} will have been set to zero. -+These four fields are used by the library -+to inform the caller of the total amount of data passed into and out of -+the library, respectively. You should not try to change them. -+As of version 1.0, 64-bit counts are maintained, even on 32-bit -+platforms, using the @code{_hi32} fields to store the upper 32 bits -+of the count. So, for example, the total amount of data in -+is @code{(total_in_hi32 << 32) + total_in_lo32}. -+ -+Parameter @code{blockSize100k} specifies the block size to be used for -+compression. It should be a value between 1 and 9 inclusive, and the -+actual block size used is 100000 x this figure. 9 gives the best -+compression but takes most memory. -+ -+Parameter @code{verbosity} should be set to a number between 0 and 4 -+inclusive. 0 is silent, and greater numbers give increasingly verbose -+monitoring/debugging output. If the library has been compiled with -+@code{-DBZ_NO_STDIO}, no such output will appear for any verbosity -+setting. -+ -+Parameter @code{workFactor} controls how the compression phase behaves -+when presented with worst case, highly repetitive, input data. If -+compression runs into difficulties caused by repetitive data, the -+library switches from the standard sorting algorithm to a fallback -+algorithm. The fallback is slower than the standard algorithm by -+perhaps a factor of three, but always behaves reasonably, no matter how -+bad the input. -+ -+Lower values of @code{workFactor} reduce the amount of effort the -+standard algorithm will expend before resorting to the fallback. You -+should set this parameter carefully; too low, and many inputs will be -+handled by the fallback algorithm and so compress rather slowly, too -+high, and your average-to-worst case compression times can become very -+large. The default value of 30 gives reasonable behaviour over a wide -+range of circumstances. -+ -+Allowable values range from 0 to 250 inclusive. 0 is a special case, -+equivalent to using the default value of 30. -+ -+Note that the compressed output generated is the same regardless of -+whether or not the fallback algorithm is used. -+ -+Be aware also that this parameter may disappear entirely in future -+versions of the library. In principle it should be possible to devise a -+good way to automatically choose which algorithm to use. Such a -+mechanism would render the parameter obsolete. -+ -+Possible return values: -+@display -+ @code{BZ_CONFIG_ERROR} -+ if the library has been mis-compiled -+ @code{BZ_PARAM_ERROR} -+ if @code{strm} is @code{NULL} -+ or @code{blockSize} < 1 or @code{blockSize} > 9 -+ or @code{verbosity} < 0 or @code{verbosity} > 4 -+ or @code{workFactor} < 0 or @code{workFactor} > 250 -+ @code{BZ_MEM_ERROR} -+ if not enough memory is available -+ @code{BZ_OK} -+ otherwise -+@end display -+Allowable next actions: -+@display -+ @code{BZ2_bzCompress} -+ if @code{BZ_OK} is returned -+ no specific action needed in case of error -+@end display -+ -+@subsection @code{BZ2_bzCompress} -+@example -+ int BZ2_bzCompress ( bz_stream *strm, int action ); -+@end example -+Provides more input and/or output buffer space for the library. The -+caller maintains input and output buffers, and calls @code{BZ2_bzCompress} to -+transfer data between them. -+ -+Before each call to @code{BZ2_bzCompress}, @code{next_in} should point at -+the data to be compressed, and @code{avail_in} should indicate how many -+bytes the library may read. @code{BZ2_bzCompress} updates @code{next_in}, -+@code{avail_in} and @code{total_in} to reflect the number of bytes it -+has read. -+ -+Similarly, @code{next_out} should point to a buffer in which the -+compressed data is to be placed, with @code{avail_out} indicating how -+much output space is available. @code{BZ2_bzCompress} updates -+@code{next_out}, @code{avail_out} and @code{total_out} to reflect the -+number of bytes output. -+ -+You may provide and remove as little or as much data as you like on each -+call of @code{BZ2_bzCompress}. In the limit, it is acceptable to supply and -+remove data one byte at a time, although this would be terribly -+inefficient. You should always ensure that at least one byte of output -+space is available at each call. -+ -+A second purpose of @code{BZ2_bzCompress} is to request a change of mode of the -+compressed stream. -+ -+Conceptually, a compressed stream can be in one of four states: IDLE, -+RUNNING, FLUSHING and FINISHING. Before initialisation -+(@code{BZ2_bzCompressInit}) and after termination (@code{BZ2_bzCompressEnd}), a -+stream is regarded as IDLE. -+ -+Upon initialisation (@code{BZ2_bzCompressInit}), the stream is placed in the -+RUNNING state. Subsequent calls to @code{BZ2_bzCompress} should pass -+@code{BZ_RUN} as the requested action; other actions are illegal and -+will result in @code{BZ_SEQUENCE_ERROR}. -+ -+At some point, the calling program will have provided all the input data -+it wants to. It will then want to finish up -- in effect, asking the -+library to process any data it might have buffered internally. In this -+state, @code{BZ2_bzCompress} will no longer attempt to read data from -+@code{next_in}, but it will want to write data to @code{next_out}. -+Because the output buffer supplied by the user can be arbitrarily small, -+the finishing-up operation cannot necessarily be done with a single call -+of @code{BZ2_bzCompress}. -+ -+Instead, the calling program passes @code{BZ_FINISH} as an action to -+@code{BZ2_bzCompress}. This changes the stream's state to FINISHING. Any -+remaining input (ie, @code{next_in[0 .. avail_in-1]}) is compressed and -+transferred to the output buffer. To do this, @code{BZ2_bzCompress} must be -+called repeatedly until all the output has been consumed. At that -+point, @code{BZ2_bzCompress} returns @code{BZ_STREAM_END}, and the stream's -+state is set back to IDLE. @code{BZ2_bzCompressEnd} should then be -+called. -+ -+Just to make sure the calling program does not cheat, the library makes -+a note of @code{avail_in} at the time of the first call to -+@code{BZ2_bzCompress} which has @code{BZ_FINISH} as an action (ie, at the -+time the program has announced its intention to not supply any more -+input). By comparing this value with that of @code{avail_in} over -+subsequent calls to @code{BZ2_bzCompress}, the library can detect any -+attempts to slip in more data to compress. Any calls for which this is -+detected will return @code{BZ_SEQUENCE_ERROR}. This indicates a -+programming mistake which should be corrected. -+ -+Instead of asking to finish, the calling program may ask -+@code{BZ2_bzCompress} to take all the remaining input, compress it and -+terminate the current (Burrows-Wheeler) compression block. This could -+be useful for error control purposes. The mechanism is analogous to -+that for finishing: call @code{BZ2_bzCompress} with an action of -+@code{BZ_FLUSH}, remove output data, and persist with the -+@code{BZ_FLUSH} action until the value @code{BZ_RUN} is returned. As -+with finishing, @code{BZ2_bzCompress} detects any attempt to provide more -+input data once the flush has begun. -+ -+Once the flush is complete, the stream returns to the normal RUNNING -+state. -+ -+This all sounds pretty complex, but isn't really. Here's a table -+which shows which actions are allowable in each state, what action -+will be taken, what the next state is, and what the non-error return -+values are. Note that you can't explicitly ask what state the -+stream is in, but nor do you need to -- it can be inferred from the -+values returned by @code{BZ2_bzCompress}. -+@display -+IDLE/@code{any} -+ Illegal. IDLE state only exists after @code{BZ2_bzCompressEnd} or -+ before @code{BZ2_bzCompressInit}. -+ Return value = @code{BZ_SEQUENCE_ERROR} -+ -+RUNNING/@code{BZ_RUN} -+ Compress from @code{next_in} to @code{next_out} as much as possible. -+ Next state = RUNNING -+ Return value = @code{BZ_RUN_OK} -+ -+RUNNING/@code{BZ_FLUSH} -+ Remember current value of @code{next_in}. Compress from @code{next_in} -+ to @code{next_out} as much as possible, but do not accept any more input. -+ Next state = FLUSHING -+ Return value = @code{BZ_FLUSH_OK} -+ -+RUNNING/@code{BZ_FINISH} -+ Remember current value of @code{next_in}. Compress from @code{next_in} -+ to @code{next_out} as much as possible, but do not accept any more input. -+ Next state = FINISHING -+ Return value = @code{BZ_FINISH_OK} -+ -+FLUSHING/@code{BZ_FLUSH} -+ Compress from @code{next_in} to @code{next_out} as much as possible, -+ but do not accept any more input. -+ If all the existing input has been used up and all compressed -+ output has been removed -+ Next state = RUNNING; Return value = @code{BZ_RUN_OK} -+ else -+ Next state = FLUSHING; Return value = @code{BZ_FLUSH_OK} -+ -+FLUSHING/other -+ Illegal. -+ Return value = @code{BZ_SEQUENCE_ERROR} -+ -+FINISHING/@code{BZ_FINISH} -+ Compress from @code{next_in} to @code{next_out} as much as possible, -+ but to not accept any more input. -+ If all the existing input has been used up and all compressed -+ output has been removed -+ Next state = IDLE; Return value = @code{BZ_STREAM_END} -+ else -+ Next state = FINISHING; Return value = @code{BZ_FINISHING} -+ -+FINISHING/other -+ Illegal. -+ Return value = @code{BZ_SEQUENCE_ERROR} -+@end display -+ -+That still looks complicated? Well, fair enough. The usual sequence -+of calls for compressing a load of data is: -+@itemize @bullet -+@item Get started with @code{BZ2_bzCompressInit}. -+@item Shovel data in and shlurp out its compressed form using zero or more -+calls of @code{BZ2_bzCompress} with action = @code{BZ_RUN}. -+@item Finish up. -+Repeatedly call @code{BZ2_bzCompress} with action = @code{BZ_FINISH}, -+copying out the compressed output, until @code{BZ_STREAM_END} is returned. -+@item Close up and go home. Call @code{BZ2_bzCompressEnd}. -+@end itemize -+If the data you want to compress fits into your input buffer all -+at once, you can skip the calls of @code{BZ2_bzCompress ( ..., BZ_RUN )} and -+just do the @code{BZ2_bzCompress ( ..., BZ_FINISH )} calls. -+ -+All required memory is allocated by @code{BZ2_bzCompressInit}. The -+compression library can accept any data at all (obviously). So you -+shouldn't get any error return values from the @code{BZ2_bzCompress} calls. -+If you do, they will be @code{BZ_SEQUENCE_ERROR}, and indicate a bug in -+your programming. -+ -+Trivial other possible return values: -+@display -+ @code{BZ_PARAM_ERROR} -+ if @code{strm} is @code{NULL}, or @code{strm->s} is @code{NULL} -+@end display -+ -+@subsection @code{BZ2_bzCompressEnd} -+@example -+int BZ2_bzCompressEnd ( bz_stream *strm ); -+@end example -+Releases all memory associated with a compression stream. -+ -+Possible return values: -+@display -+ @code{BZ_PARAM_ERROR} if @code{strm} is @code{NULL} or @code{strm->s} is @code{NULL} -+ @code{BZ_OK} otherwise -+@end display -+ -+ -+@subsection @code{BZ2_bzDecompressInit} -+@example -+int BZ2_bzDecompressInit ( bz_stream *strm, int verbosity, int small ); -+@end example -+Prepares for decompression. As with @code{BZ2_bzCompressInit}, a -+@code{bz_stream} record should be allocated and initialised before the -+call. Fields @code{bzalloc}, @code{bzfree} and @code{opaque} should be -+set if a custom memory allocator is required, or made @code{NULL} for -+the normal @code{malloc}/@code{free} routines. Upon return, the internal -+state will have been initialised, and @code{total_in} and -+@code{total_out} will be zero. -+ -+For the meaning of parameter @code{verbosity}, see @code{BZ2_bzCompressInit}. -+ -+If @code{small} is nonzero, the library will use an alternative -+decompression algorithm which uses less memory but at the cost of -+decompressing more slowly (roughly speaking, half the speed, but the -+maximum memory requirement drops to around 2300k). See Chapter 2 for -+more information on memory management. -+ -+Note that the amount of memory needed to decompress -+a stream cannot be determined until the stream's header has been read, -+so even if @code{BZ2_bzDecompressInit} succeeds, a subsequent -+@code{BZ2_bzDecompress} could fail with @code{BZ_MEM_ERROR}. -+ -+Possible return values: -+@display -+ @code{BZ_CONFIG_ERROR} -+ if the library has been mis-compiled -+ @code{BZ_PARAM_ERROR} -+ if @code{(small != 0 && small != 1)} -+ or @code{(verbosity < 0 || verbosity > 4)} -+ @code{BZ_MEM_ERROR} -+ if insufficient memory is available -+@end display -+ -+Allowable next actions: -+@display -+ @code{BZ2_bzDecompress} -+ if @code{BZ_OK} was returned -+ no specific action required in case of error -+@end display -+ -+ -+ -+@subsection @code{BZ2_bzDecompress} -+@example -+int BZ2_bzDecompress ( bz_stream *strm ); -+@end example -+Provides more input and/out output buffer space for the library. The -+caller maintains input and output buffers, and uses @code{BZ2_bzDecompress} -+to transfer data between them. -+ -+Before each call to @code{BZ2_bzDecompress}, @code{next_in} -+should point at the compressed data, -+and @code{avail_in} should indicate how many bytes the library -+may read. @code{BZ2_bzDecompress} updates @code{next_in}, @code{avail_in} -+and @code{total_in} -+to reflect the number of bytes it has read. -+ -+Similarly, @code{next_out} should point to a buffer in which the uncompressed -+output is to be placed, with @code{avail_out} indicating how much output space -+is available. @code{BZ2_bzCompress} updates @code{next_out}, -+@code{avail_out} and @code{total_out} to reflect -+the number of bytes output. -+ -+You may provide and remove as little or as much data as you like on -+each call of @code{BZ2_bzDecompress}. -+In the limit, it is acceptable to -+supply and remove data one byte at a time, although this would be -+terribly inefficient. You should always ensure that at least one -+byte of output space is available at each call. -+ -+Use of @code{BZ2_bzDecompress} is simpler than @code{BZ2_bzCompress}. -+ -+You should provide input and remove output as described above, and -+repeatedly call @code{BZ2_bzDecompress} until @code{BZ_STREAM_END} is -+returned. Appearance of @code{BZ_STREAM_END} denotes that -+@code{BZ2_bzDecompress} has detected the logical end of the compressed -+stream. @code{BZ2_bzDecompress} will not produce @code{BZ_STREAM_END} until -+all output data has been placed into the output buffer, so once -+@code{BZ_STREAM_END} appears, you are guaranteed to have available all -+the decompressed output, and @code{BZ2_bzDecompressEnd} can safely be -+called. -+ -+If case of an error return value, you should call @code{BZ2_bzDecompressEnd} -+to clean up and release memory. -+ -+Possible return values: -+@display -+ @code{BZ_PARAM_ERROR} -+ if @code{strm} is @code{NULL} or @code{strm->s} is @code{NULL} -+ or @code{strm->avail_out < 1} -+ @code{BZ_DATA_ERROR} -+ if a data integrity error is detected in the compressed stream -+ @code{BZ_DATA_ERROR_MAGIC} -+ if the compressed stream doesn't begin with the right magic bytes -+ @code{BZ_MEM_ERROR} -+ if there wasn't enough memory available -+ @code{BZ_STREAM_END} -+ if the logical end of the data stream was detected and all -+ output in has been consumed, eg @code{s->avail_out > 0} -+ @code{BZ_OK} -+ otherwise -+@end display -+Allowable next actions: -+@display -+ @code{BZ2_bzDecompress} -+ if @code{BZ_OK} was returned -+ @code{BZ2_bzDecompressEnd} -+ otherwise -+@end display -+ -+ -+@subsection @code{BZ2_bzDecompressEnd} -+@example -+int BZ2_bzDecompressEnd ( bz_stream *strm ); -+@end example -+Releases all memory associated with a decompression stream. -+ -+Possible return values: -+@display -+ @code{BZ_PARAM_ERROR} -+ if @code{strm} is @code{NULL} or @code{strm->s} is @code{NULL} -+ @code{BZ_OK} -+ otherwise -+@end display -+ -+Allowable next actions: -+@display -+ None. -+@end display -+ -+ -+@section High-level interface -+ -+This interface provides functions for reading and writing -+@code{bzip2} format files. First, some general points. -+ -+@itemize @bullet -+@item All of the functions take an @code{int*} first argument, -+ @code{bzerror}. -+ After each call, @code{bzerror} should be consulted first to determine -+ the outcome of the call. If @code{bzerror} is @code{BZ_OK}, -+ the call completed -+ successfully, and only then should the return value of the function -+ (if any) be consulted. If @code{bzerror} is @code{BZ_IO_ERROR}, -+ there was an error -+ reading/writing the underlying compressed file, and you should -+ then consult @code{errno}/@code{perror} to determine the -+ cause of the difficulty. -+ @code{bzerror} may also be set to various other values; precise details are -+ given on a per-function basis below. -+@item If @code{bzerror} indicates an error -+ (ie, anything except @code{BZ_OK} and @code{BZ_STREAM_END}), -+ you should immediately call @code{BZ2_bzReadClose} (or @code{BZ2_bzWriteClose}, -+ depending on whether you are attempting to read or to write) -+ to free up all resources associated -+ with the stream. Once an error has been indicated, behaviour of all calls -+ except @code{BZ2_bzReadClose} (@code{BZ2_bzWriteClose}) is undefined. -+ The implication is that (1) @code{bzerror} should -+ be checked after each call, and (2) if @code{bzerror} indicates an error, -+ @code{BZ2_bzReadClose} (@code{BZ2_bzWriteClose}) should then be called to clean up. -+@item The @code{FILE*} arguments passed to -+ @code{BZ2_bzReadOpen}/@code{BZ2_bzWriteOpen} -+ should be set to binary mode. -+ Most Unix systems will do this by default, but other platforms, -+ including Windows and Mac, will not. If you omit this, you may -+ encounter problems when moving code to new platforms. -+@item Memory allocation requests are handled by -+ @code{malloc}/@code{free}. -+ At present -+ there is no facility for user-defined memory allocators in the file I/O -+ functions (could easily be added, though). -+@end itemize -+ -+ -+ -+@subsection @code{BZ2_bzReadOpen} -+@example -+ typedef void BZFILE; -+ -+ BZFILE *BZ2_bzReadOpen ( int *bzerror, FILE *f, -+ int small, int verbosity, -+ void *unused, int nUnused ); -+@end example -+Prepare to read compressed data from file handle @code{f}. @code{f} -+should refer to a file which has been opened for reading, and for which -+the error indicator (@code{ferror(f)})is not set. If @code{small} is 1, -+the library will try to decompress using less memory, at the expense of -+speed. -+ -+For reasons explained below, @code{BZ2_bzRead} will decompress the -+@code{nUnused} bytes starting at @code{unused}, before starting to read -+from the file @code{f}. At most @code{BZ_MAX_UNUSED} bytes may be -+supplied like this. If this facility is not required, you should pass -+@code{NULL} and @code{0} for @code{unused} and n@code{Unused} -+respectively. -+ -+For the meaning of parameters @code{small} and @code{verbosity}, -+see @code{BZ2_bzDecompressInit}. -+ -+The amount of memory needed to decompress a file cannot be determined -+until the file's header has been read. So it is possible that -+@code{BZ2_bzReadOpen} returns @code{BZ_OK} but a subsequent call of -+@code{BZ2_bzRead} will return @code{BZ_MEM_ERROR}. -+ -+Possible assignments to @code{bzerror}: -+@display -+ @code{BZ_CONFIG_ERROR} -+ if the library has been mis-compiled -+ @code{BZ_PARAM_ERROR} -+ if @code{f} is @code{NULL} -+ or @code{small} is neither @code{0} nor @code{1} -+ or @code{(unused == NULL && nUnused != 0)} -+ or @code{(unused != NULL && !(0 <= nUnused <= BZ_MAX_UNUSED))} -+ @code{BZ_IO_ERROR} -+ if @code{ferror(f)} is nonzero -+ @code{BZ_MEM_ERROR} -+ if insufficient memory is available -+ @code{BZ_OK} -+ otherwise. -+@end display -+ -+Possible return values: -+@display -+ Pointer to an abstract @code{BZFILE} -+ if @code{bzerror} is @code{BZ_OK} -+ @code{NULL} -+ otherwise -+@end display -+ -+Allowable next actions: -+@display -+ @code{BZ2_bzRead} -+ if @code{bzerror} is @code{BZ_OK} -+ @code{BZ2_bzClose} -+ otherwise -+@end display -+ -+ -+@subsection @code{BZ2_bzRead} -+@example -+ int BZ2_bzRead ( int *bzerror, BZFILE *b, void *buf, int len ); -+@end example -+Reads up to @code{len} (uncompressed) bytes from the compressed file -+@code{b} into -+the buffer @code{buf}. If the read was successful, -+@code{bzerror} is set to @code{BZ_OK} -+and the number of bytes read is returned. If the logical end-of-stream -+was detected, @code{bzerror} will be set to @code{BZ_STREAM_END}, -+and the number -+of bytes read is returned. All other @code{bzerror} values denote an error. -+ -+@code{BZ2_bzRead} will supply @code{len} bytes, -+unless the logical stream end is detected -+or an error occurs. Because of this, it is possible to detect the -+stream end by observing when the number of bytes returned is -+less than the number -+requested. Nevertheless, this is regarded as inadvisable; you should -+instead check @code{bzerror} after every call and watch out for -+@code{BZ_STREAM_END}. -+ -+Internally, @code{BZ2_bzRead} copies data from the compressed file in chunks -+of size @code{BZ_MAX_UNUSED} bytes -+before decompressing it. If the file contains more bytes than strictly -+needed to reach the logical end-of-stream, @code{BZ2_bzRead} will almost certainly -+read some of the trailing data before signalling @code{BZ_SEQUENCE_END}. -+To collect the read but unused data once @code{BZ_SEQUENCE_END} has -+appeared, call @code{BZ2_bzReadGetUnused} immediately before @code{BZ2_bzReadClose}. -+ -+Possible assignments to @code{bzerror}: -+@display -+ @code{BZ_PARAM_ERROR} -+ if @code{b} is @code{NULL} or @code{buf} is @code{NULL} or @code{len < 0} -+ @code{BZ_SEQUENCE_ERROR} -+ if @code{b} was opened with @code{BZ2_bzWriteOpen} -+ @code{BZ_IO_ERROR} -+ if there is an error reading from the compressed file -+ @code{BZ_UNEXPECTED_EOF} -+ if the compressed file ended before the logical end-of-stream was detected -+ @code{BZ_DATA_ERROR} -+ if a data integrity error was detected in the compressed stream -+ @code{BZ_DATA_ERROR_MAGIC} -+ if the stream does not begin with the requisite header bytes (ie, is not -+ a @code{bzip2} data file). This is really a special case of @code{BZ_DATA_ERROR}. -+ @code{BZ_MEM_ERROR} -+ if insufficient memory was available -+ @code{BZ_STREAM_END} -+ if the logical end of stream was detected. -+ @code{BZ_OK} -+ otherwise. -+@end display -+ -+Possible return values: -+@display -+ number of bytes read -+ if @code{bzerror} is @code{BZ_OK} or @code{BZ_STREAM_END} -+ undefined -+ otherwise -+@end display -+ -+Allowable next actions: -+@display -+ collect data from @code{buf}, then @code{BZ2_bzRead} or @code{BZ2_bzReadClose} -+ if @code{bzerror} is @code{BZ_OK} -+ collect data from @code{buf}, then @code{BZ2_bzReadClose} or @code{BZ2_bzReadGetUnused} -+ if @code{bzerror} is @code{BZ_SEQUENCE_END} -+ @code{BZ2_bzReadClose} -+ otherwise -+@end display -+ -+ -+ -+@subsection @code{BZ2_bzReadGetUnused} -+@example -+ void BZ2_bzReadGetUnused ( int* bzerror, BZFILE *b, -+ void** unused, int* nUnused ); -+@end example -+Returns data which was read from the compressed file but was not needed -+to get to the logical end-of-stream. @code{*unused} is set to the address -+of the data, and @code{*nUnused} to the number of bytes. @code{*nUnused} will -+be set to a value between @code{0} and @code{BZ_MAX_UNUSED} inclusive. -+ -+This function may only be called once @code{BZ2_bzRead} has signalled -+@code{BZ_STREAM_END} but before @code{BZ2_bzReadClose}. -+ -+Possible assignments to @code{bzerror}: -+@display -+ @code{BZ_PARAM_ERROR} -+ if @code{b} is @code{NULL} -+ or @code{unused} is @code{NULL} or @code{nUnused} is @code{NULL} -+ @code{BZ_SEQUENCE_ERROR} -+ if @code{BZ_STREAM_END} has not been signalled -+ or if @code{b} was opened with @code{BZ2_bzWriteOpen} -+ @code{BZ_OK} -+ otherwise -+@end display -+ -+Allowable next actions: -+@display -+ @code{BZ2_bzReadClose} -+@end display -+ -+ -+@subsection @code{BZ2_bzReadClose} -+@example -+ void BZ2_bzReadClose ( int *bzerror, BZFILE *b ); -+@end example -+Releases all memory pertaining to the compressed file @code{b}. -+@code{BZ2_bzReadClose} does not call @code{fclose} on the underlying file -+handle, so you should do that yourself if appropriate. -+@code{BZ2_bzReadClose} should be called to clean up after all error -+situations. -+ -+Possible assignments to @code{bzerror}: -+@display -+ @code{BZ_SEQUENCE_ERROR} -+ if @code{b} was opened with @code{BZ2_bzOpenWrite} -+ @code{BZ_OK} -+ otherwise -+@end display -+ -+Allowable next actions: -+@display -+ none -+@end display -+ -+ -+ -+@subsection @code{BZ2_bzWriteOpen} -+@example -+ BZFILE *BZ2_bzWriteOpen ( int *bzerror, FILE *f, -+ int blockSize100k, int verbosity, -+ int workFactor ); -+@end example -+Prepare to write compressed data to file handle @code{f}. -+@code{f} should refer to -+a file which has been opened for writing, and for which the error -+indicator (@code{ferror(f)})is not set. -+ -+For the meaning of parameters @code{blockSize100k}, -+@code{verbosity} and @code{workFactor}, see -+@* @code{BZ2_bzCompressInit}. -+ -+All required memory is allocated at this stage, so if the call -+completes successfully, @code{BZ_MEM_ERROR} cannot be signalled by a -+subsequent call to @code{BZ2_bzWrite}. -+ -+Possible assignments to @code{bzerror}: -+@display -+ @code{BZ_CONFIG_ERROR} -+ if the library has been mis-compiled -+ @code{BZ_PARAM_ERROR} -+ if @code{f} is @code{NULL} -+ or @code{blockSize100k < 1} or @code{blockSize100k > 9} -+ @code{BZ_IO_ERROR} -+ if @code{ferror(f)} is nonzero -+ @code{BZ_MEM_ERROR} -+ if insufficient memory is available -+ @code{BZ_OK} -+ otherwise -+@end display -+ -+Possible return values: -+@display -+ Pointer to an abstract @code{BZFILE} -+ if @code{bzerror} is @code{BZ_OK} -+ @code{NULL} -+ otherwise -+@end display -+ -+Allowable next actions: -+@display -+ @code{BZ2_bzWrite} -+ if @code{bzerror} is @code{BZ_OK} -+ (you could go directly to @code{BZ2_bzWriteClose}, but this would be pretty pointless) -+ @code{BZ2_bzWriteClose} -+ otherwise -+@end display -+ -+ -+ -+@subsection @code{BZ2_bzWrite} -+@example -+ void BZ2_bzWrite ( int *bzerror, BZFILE *b, void *buf, int len ); -+@end example -+Absorbs @code{len} bytes from the buffer @code{buf}, eventually to be -+compressed and written to the file. -+ -+Possible assignments to @code{bzerror}: -+@display -+ @code{BZ_PARAM_ERROR} -+ if @code{b} is @code{NULL} or @code{buf} is @code{NULL} or @code{len < 0} -+ @code{BZ_SEQUENCE_ERROR} -+ if b was opened with @code{BZ2_bzReadOpen} -+ @code{BZ_IO_ERROR} -+ if there is an error writing the compressed file. -+ @code{BZ_OK} -+ otherwise -+@end display -+ -+ -+ -+ -+@subsection @code{BZ2_bzWriteClose} -+@example -+ void BZ2_bzWriteClose ( int *bzerror, BZFILE* f, -+ int abandon, -+ unsigned int* nbytes_in, -+ unsigned int* nbytes_out ); -+ -+ void BZ2_bzWriteClose64 ( int *bzerror, BZFILE* f, -+ int abandon, -+ unsigned int* nbytes_in_lo32, -+ unsigned int* nbytes_in_hi32, -+ unsigned int* nbytes_out_lo32, -+ unsigned int* nbytes_out_hi32 ); -+@end example -+ -+Compresses and flushes to the compressed file all data so far supplied -+by @code{BZ2_bzWrite}. The logical end-of-stream markers are also written, so -+subsequent calls to @code{BZ2_bzWrite} are illegal. All memory associated -+with the compressed file @code{b} is released. -+@code{fflush} is called on the -+compressed file, but it is not @code{fclose}'d. -+ -+If @code{BZ2_bzWriteClose} is called to clean up after an error, the only -+action is to release the memory. The library records the error codes -+issued by previous calls, so this situation will be detected -+automatically. There is no attempt to complete the compression -+operation, nor to @code{fflush} the compressed file. You can force this -+behaviour to happen even in the case of no error, by passing a nonzero -+value to @code{abandon}. -+ -+If @code{nbytes_in} is non-null, @code{*nbytes_in} will be set to be the -+total volume of uncompressed data handled. Similarly, @code{nbytes_out} -+will be set to the total volume of compressed data written. For -+compatibility with older versions of the library, @code{BZ2_bzWriteClose} -+only yields the lower 32 bits of these counts. Use -+@code{BZ2_bzWriteClose64} if you want the full 64 bit counts. These -+two functions are otherwise absolutely identical. -+ -+ -+Possible assignments to @code{bzerror}: -+@display -+ @code{BZ_SEQUENCE_ERROR} -+ if @code{b} was opened with @code{BZ2_bzReadOpen} -+ @code{BZ_IO_ERROR} -+ if there is an error writing the compressed file -+ @code{BZ_OK} -+ otherwise -+@end display -+ -+@subsection Handling embedded compressed data streams -+ -+The high-level library facilitates use of -+@code{bzip2} data streams which form some part of a surrounding, larger -+data stream. -+@itemize @bullet -+@item For writing, the library takes an open file handle, writes -+compressed data to it, @code{fflush}es it but does not @code{fclose} it. -+The calling application can write its own data before and after the -+compressed data stream, using that same file handle. -+@item Reading is more complex, and the facilities are not as general -+as they could be since generality is hard to reconcile with efficiency. -+@code{BZ2_bzRead} reads from the compressed file in blocks of size -+@code{BZ_MAX_UNUSED} bytes, and in doing so probably will overshoot -+the logical end of compressed stream. -+To recover this data once decompression has -+ended, call @code{BZ2_bzReadGetUnused} after the last call of @code{BZ2_bzRead} -+(the one returning @code{BZ_STREAM_END}) but before calling -+@code{BZ2_bzReadClose}. -+@end itemize -+ -+This mechanism makes it easy to decompress multiple @code{bzip2} -+streams placed end-to-end. As the end of one stream, when @code{BZ2_bzRead} -+returns @code{BZ_STREAM_END}, call @code{BZ2_bzReadGetUnused} to collect the -+unused data (copy it into your own buffer somewhere). -+That data forms the start of the next compressed stream. -+To start uncompressing that next stream, call @code{BZ2_bzReadOpen} again, -+feeding in the unused data via the @code{unused}/@code{nUnused} -+parameters. -+Keep doing this until @code{BZ_STREAM_END} return coincides with the -+physical end of file (@code{feof(f)}). In this situation -+@code{BZ2_bzReadGetUnused} -+will of course return no data. -+ -+This should give some feel for how the high-level interface can be used. -+If you require extra flexibility, you'll have to bite the bullet and get -+to grips with the low-level interface. -+ -+@subsection Standard file-reading/writing code -+Here's how you'd write data to a compressed file: -+@example @code -+FILE* f; -+BZFILE* b; -+int nBuf; -+char buf[ /* whatever size you like */ ]; -+int bzerror; -+int nWritten; -+ -+f = fopen ( "myfile.bz2", "w" ); -+if (!f) @{ -+ /* handle error */ -+@} -+b = BZ2_bzWriteOpen ( &bzerror, f, 9 ); -+if (bzerror != BZ_OK) @{ -+ BZ2_bzWriteClose ( b ); -+ /* handle error */ -+@} -+ -+while ( /* condition */ ) @{ -+ /* get data to write into buf, and set nBuf appropriately */ -+ nWritten = BZ2_bzWrite ( &bzerror, b, buf, nBuf ); -+ if (bzerror == BZ_IO_ERROR) @{ -+ BZ2_bzWriteClose ( &bzerror, b ); -+ /* handle error */ -+ @} -+@} -+ -+BZ2_bzWriteClose ( &bzerror, b ); -+if (bzerror == BZ_IO_ERROR) @{ -+ /* handle error */ -+@} -+@end example -+And to read from a compressed file: -+@example -+FILE* f; -+BZFILE* b; -+int nBuf; -+char buf[ /* whatever size you like */ ]; -+int bzerror; -+int nWritten; -+ -+f = fopen ( "myfile.bz2", "r" ); -+if (!f) @{ -+ /* handle error */ -+@} -+b = BZ2_bzReadOpen ( &bzerror, f, 0, NULL, 0 ); -+if (bzerror != BZ_OK) @{ -+ BZ2_bzReadClose ( &bzerror, b ); -+ /* handle error */ -+@} -+ -+bzerror = BZ_OK; -+while (bzerror == BZ_OK && /* arbitrary other conditions */) @{ -+ nBuf = BZ2_bzRead ( &bzerror, b, buf, /* size of buf */ ); -+ if (bzerror == BZ_OK) @{ -+ /* do something with buf[0 .. nBuf-1] */ -+ @} -+@} -+if (bzerror != BZ_STREAM_END) @{ -+ BZ2_bzReadClose ( &bzerror, b ); -+ /* handle error */ -+@} else @{ -+ BZ2_bzReadClose ( &bzerror ); -+@} -+@end example -+ -+ -+ -+@section Utility functions -+@subsection @code{BZ2_bzBuffToBuffCompress} -+@example -+ int BZ2_bzBuffToBuffCompress( char* dest, -+ unsigned int* destLen, -+ char* source, -+ unsigned int sourceLen, -+ int blockSize100k, -+ int verbosity, -+ int workFactor ); -+@end example -+Attempts to compress the data in @code{source[0 .. sourceLen-1]} -+into the destination buffer, @code{dest[0 .. *destLen-1]}. -+If the destination buffer is big enough, @code{*destLen} is -+set to the size of the compressed data, and @code{BZ_OK} is -+returned. If the compressed data won't fit, @code{*destLen} -+is unchanged, and @code{BZ_OUTBUFF_FULL} is returned. -+ -+Compression in this manner is a one-shot event, done with a single call -+to this function. The resulting compressed data is a complete -+@code{bzip2} format data stream. There is no mechanism for making -+additional calls to provide extra input data. If you want that kind of -+mechanism, use the low-level interface. -+ -+For the meaning of parameters @code{blockSize100k}, @code{verbosity} -+and @code{workFactor}, @* see @code{BZ2_bzCompressInit}. -+ -+To guarantee that the compressed data will fit in its buffer, allocate -+an output buffer of size 1% larger than the uncompressed data, plus -+six hundred extra bytes. -+ -+@code{BZ2_bzBuffToBuffDecompress} will not write data at or -+beyond @code{dest[*destLen]}, even in case of buffer overflow. -+ -+Possible return values: -+@display -+ @code{BZ_CONFIG_ERROR} -+ if the library has been mis-compiled -+ @code{BZ_PARAM_ERROR} -+ if @code{dest} is @code{NULL} or @code{destLen} is @code{NULL} -+ or @code{blockSize100k < 1} or @code{blockSize100k > 9} -+ or @code{verbosity < 0} or @code{verbosity > 4} -+ or @code{workFactor < 0} or @code{workFactor > 250} -+ @code{BZ_MEM_ERROR} -+ if insufficient memory is available -+ @code{BZ_OUTBUFF_FULL} -+ if the size of the compressed data exceeds @code{*destLen} -+ @code{BZ_OK} -+ otherwise -+@end display -+ -+ -+ -+@subsection @code{BZ2_bzBuffToBuffDecompress} -+@example -+ int BZ2_bzBuffToBuffDecompress ( char* dest, -+ unsigned int* destLen, -+ char* source, -+ unsigned int sourceLen, -+ int small, -+ int verbosity ); -+@end example -+Attempts to decompress the data in @code{source[0 .. sourceLen-1]} -+into the destination buffer, @code{dest[0 .. *destLen-1]}. -+If the destination buffer is big enough, @code{*destLen} is -+set to the size of the uncompressed data, and @code{BZ_OK} is -+returned. If the compressed data won't fit, @code{*destLen} -+is unchanged, and @code{BZ_OUTBUFF_FULL} is returned. -+ -+@code{source} is assumed to hold a complete @code{bzip2} format -+data stream. @* @code{BZ2_bzBuffToBuffDecompress} tries to decompress -+the entirety of the stream into the output buffer. -+ -+For the meaning of parameters @code{small} and @code{verbosity}, -+see @code{BZ2_bzDecompressInit}. -+ -+Because the compression ratio of the compressed data cannot be known in -+advance, there is no easy way to guarantee that the output buffer will -+be big enough. You may of course make arrangements in your code to -+record the size of the uncompressed data, but such a mechanism is beyond -+the scope of this library. -+ -+@code{BZ2_bzBuffToBuffDecompress} will not write data at or -+beyond @code{dest[*destLen]}, even in case of buffer overflow. -+ -+Possible return values: -+@display -+ @code{BZ_CONFIG_ERROR} -+ if the library has been mis-compiled -+ @code{BZ_PARAM_ERROR} -+ if @code{dest} is @code{NULL} or @code{destLen} is @code{NULL} -+ or @code{small != 0 && small != 1} -+ or @code{verbosity < 0} or @code{verbosity > 4} -+ @code{BZ_MEM_ERROR} -+ if insufficient memory is available -+ @code{BZ_OUTBUFF_FULL} -+ if the size of the compressed data exceeds @code{*destLen} -+ @code{BZ_DATA_ERROR} -+ if a data integrity error was detected in the compressed data -+ @code{BZ_DATA_ERROR_MAGIC} -+ if the compressed data doesn't begin with the right magic bytes -+ @code{BZ_UNEXPECTED_EOF} -+ if the compressed data ends unexpectedly -+ @code{BZ_OK} -+ otherwise -+@end display -+ -+ -+ -+@section @code{zlib} compatibility functions -+Yoshioka Tsuneo has contributed some functions to -+give better @code{zlib} compatibility. These functions are -+@code{BZ2_bzopen}, @code{BZ2_bzread}, @code{BZ2_bzwrite}, @code{BZ2_bzflush}, -+@code{BZ2_bzclose}, -+@code{BZ2_bzerror} and @code{BZ2_bzlibVersion}. -+These functions are not (yet) officially part of -+the library. If they break, you get to keep all the pieces. -+Nevertheless, I think they work ok. -+@example -+typedef void BZFILE; -+ -+const char * BZ2_bzlibVersion ( void ); -+@end example -+Returns a string indicating the library version. -+@example -+BZFILE * BZ2_bzopen ( const char *path, const char *mode ); -+BZFILE * BZ2_bzdopen ( int fd, const char *mode ); -+@end example -+Opens a @code{.bz2} file for reading or writing, using either its name -+or a pre-existing file descriptor. -+Analogous to @code{fopen} and @code{fdopen}. -+@example -+int BZ2_bzread ( BZFILE* b, void* buf, int len ); -+int BZ2_bzwrite ( BZFILE* b, void* buf, int len ); -+@end example -+Reads/writes data from/to a previously opened @code{BZFILE}. -+Analogous to @code{fread} and @code{fwrite}. -+@example -+int BZ2_bzflush ( BZFILE* b ); -+void BZ2_bzclose ( BZFILE* b ); -+@end example -+Flushes/closes a @code{BZFILE}. @code{BZ2_bzflush} doesn't actually do -+anything. Analogous to @code{fflush} and @code{fclose}. -+ -+@example -+const char * BZ2_bzerror ( BZFILE *b, int *errnum ) -+@end example -+Returns a string describing the more recent error status of -+@code{b}, and also sets @code{*errnum} to its numerical value. -+ -+ -+@section Using the library in a @code{stdio}-free environment -+ -+@subsection Getting rid of @code{stdio} -+ -+In a deeply embedded application, you might want to use just -+the memory-to-memory functions. You can do this conveniently -+by compiling the library with preprocessor symbol @code{BZ_NO_STDIO} -+defined. Doing this gives you a library containing only the following -+eight functions: -+ -+@code{BZ2_bzCompressInit}, @code{BZ2_bzCompress}, @code{BZ2_bzCompressEnd} @* -+@code{BZ2_bzDecompressInit}, @code{BZ2_bzDecompress}, @code{BZ2_bzDecompressEnd} @* -+@code{BZ2_bzBuffToBuffCompress}, @code{BZ2_bzBuffToBuffDecompress} -+ -+When compiled like this, all functions will ignore @code{verbosity} -+settings. -+ -+@subsection Critical error handling -+@code{libbzip2} contains a number of internal assertion checks which -+should, needless to say, never be activated. Nevertheless, if an -+assertion should fail, behaviour depends on whether or not the library -+was compiled with @code{BZ_NO_STDIO} set. -+ -+For a normal compile, an assertion failure yields the message -+@example -+ bzip2/libbzip2: internal error number N. -+ This is a bug in bzip2/libbzip2, 1.0.2, 30-Dec-2001. -+ Please report it to me at: jseward@@acm.org. If this happened -+ when you were using some program which uses libbzip2 as a -+ component, you should also report this bug to the author(s) -+ of that program. Please make an effort to report this bug; -+ timely and accurate bug reports eventually lead to higher -+ quality software. Thanks. Julian Seward, 30 December 2001. -+@end example -+where @code{N} is some error code number. If @code{N == 1007}, it also -+prints some extra text advising the reader that unreliable memory is -+often associated with internal error 1007. (This is a -+frequently-observed-phenomenon with versions 1.0.0/1.0.1). -+ -+@code{exit(3)} is then called. -+ -+For a @code{stdio}-free library, assertion failures result -+in a call to a function declared as: -+@example -+ extern void bz_internal_error ( int errcode ); -+@end example -+The relevant code is passed as a parameter. You should supply -+such a function. -+ -+In either case, once an assertion failure has occurred, any -+@code{bz_stream} records involved can be regarded as invalid. -+You should not attempt to resume normal operation with them. -+ -+You may, of course, change critical error handling to suit -+your needs. As I said above, critical errors indicate bugs -+in the library and should not occur. All "normal" error -+situations are indicated via error return codes from functions, -+and can be recovered from. -+ -+ -+@section Making a Windows DLL -+Everything related to Windows has been contributed by Yoshioka Tsuneo -+@* (@code{QWF00133@@niftyserve.or.jp} / -+@code{tsuneo-y@@is.aist-nara.ac.jp}), so you should send your queries to -+him (but perhaps Cc: me, @code{jseward@@acm.org}). -+ -+My vague understanding of what to do is: using Visual C++ 5.0, -+open the project file @code{libbz2.dsp}, and build. That's all. -+ -+If you can't -+open the project file for some reason, make a new one, naming these files: -+@code{blocksort.c}, @code{bzlib.c}, @code{compress.c}, -+@code{crctable.c}, @code{decompress.c}, @code{huffman.c}, @* -+@code{randtable.c} and @code{libbz2.def}. You will also need -+to name the header files @code{bzlib.h} and @code{bzlib_private.h}. -+ -+If you don't use VC++, you may need to define the proprocessor symbol -+@code{_WIN32}. -+ -+Finally, @code{dlltest.c} is a sample program using the DLL. It has a -+project file, @code{dlltest.dsp}. -+ -+If you just want a makefile for Visual C, have a look at -+@code{makefile.msc}. -+ -+Be aware that if you compile @code{bzip2} itself on Win32, you must set -+@code{BZ_UNIX} to 0 and @code{BZ_LCCWIN32} to 1, in the file -+@code{bzip2.c}, before compiling. Otherwise the resulting binary won't -+work correctly. -+ -+I haven't tried any of this stuff myself, but it all looks plausible. -+ -+ -+ -+@chapter Miscellanea -+ -+These are just some random thoughts of mine. Your mileage may -+vary. -+ -+@section Limitations of the compressed file format -+@code{bzip2-1.0}, @code{0.9.5} and @code{0.9.0} -+use exactly the same file format as the previous -+version, @code{bzip2-0.1}. This decision was made in the interests of -+stability. Creating yet another incompatible compressed file format -+would create further confusion and disruption for users. -+ -+Nevertheless, this is not a painless decision. Development -+work since the release of @code{bzip2-0.1} in August 1997 -+has shown complexities in the file format which slow down -+decompression and, in retrospect, are unnecessary. These are: -+@itemize @bullet -+@item The run-length encoder, which is the first of the -+ compression transformations, is entirely irrelevant. -+ The original purpose was to protect the sorting algorithm -+ from the very worst case input: a string of repeated -+ symbols. But algorithm steps Q6a and Q6b in the original -+ Burrows-Wheeler technical report (SRC-124) show how -+ repeats can be handled without difficulty in block -+ sorting. -+@item The randomisation mechanism doesn't really need to be -+ there. Udi Manber and Gene Myers published a suffix -+ array construction algorithm a few years back, which -+ can be employed to sort any block, no matter how -+ repetitive, in O(N log N) time. Subsequent work by -+ Kunihiko Sadakane has produced a derivative O(N (log N)^2) -+ algorithm which usually outperforms the Manber-Myers -+ algorithm. -+ -+ I could have changed to Sadakane's algorithm, but I find -+ it to be slower than @code{bzip2}'s existing algorithm for -+ most inputs, and the randomisation mechanism protects -+ adequately against bad cases. I didn't think it was -+ a good tradeoff to make. Partly this is due to the fact -+ that I was not flooded with email complaints about -+ @code{bzip2-0.1}'s performance on repetitive data, so -+ perhaps it isn't a problem for real inputs. -+ -+ Probably the best long-term solution, -+ and the one I have incorporated into 0.9.5 and above, -+ is to use the existing sorting -+ algorithm initially, and fall back to a O(N (log N)^2) -+ algorithm if the standard algorithm gets into difficulties. -+@item The compressed file format was never designed to be -+ handled by a library, and I have had to jump though -+ some hoops to produce an efficient implementation of -+ decompression. It's a bit hairy. Try passing -+ @code{decompress.c} through the C preprocessor -+ and you'll see what I mean. Much of this complexity -+ could have been avoided if the compressed size of -+ each block of data was recorded in the data stream. -+@item An Adler-32 checksum, rather than a CRC32 checksum, -+ would be faster to compute. -+@end itemize -+It would be fair to say that the @code{bzip2} format was frozen -+before I properly and fully understood the performance -+consequences of doing so. -+ -+Improvements which I was able to incorporate into -+0.9.0, despite using the same file format, are: -+@itemize @bullet -+@item Single array implementation of the inverse BWT. This -+ significantly speeds up decompression, presumably -+ because it reduces the number of cache misses. -+@item Faster inverse MTF transform for large MTF values. The -+ new implementation is based on the notion of sliding blocks -+ of values. -+@item @code{bzip2-0.9.0} now reads and writes files with @code{fread} -+ and @code{fwrite}; version 0.1 used @code{putc} and @code{getc}. -+ Duh! Well, you live and learn. -+ -+@end itemize -+Further ahead, it would be nice -+to be able to do random access into files. This will -+require some careful design of compressed file formats. -+ -+ -+ -+@section Portability issues -+After some consideration, I have decided not to use -+GNU @code{autoconf} to configure 0.9.5 or 1.0. -+ -+@code{autoconf}, admirable and wonderful though it is, -+mainly assists with portability problems between Unix-like -+platforms. But @code{bzip2} doesn't have much in the way -+of portability problems on Unix; most of the difficulties appear -+when porting to the Mac, or to Microsoft's operating systems. -+@code{autoconf} doesn't help in those cases, and brings in a -+whole load of new complexity. -+ -+Most people should be able to compile the library and program -+under Unix straight out-of-the-box, so to speak, especially -+if you have a version of GNU C available. -+ -+There are a couple of @code{__inline__} directives in the code. GNU C -+(@code{gcc}) should be able to handle them. If you're not using -+GNU C, your C compiler shouldn't see them at all. -+If your compiler does, for some reason, see them and doesn't -+like them, just @code{#define} @code{__inline__} to be @code{/* */}. One -+easy way to do this is to compile with the flag @code{-D__inline__=}, -+which should be understood by most Unix compilers. -+ -+If you still have difficulties, try compiling with the macro -+@code{BZ_STRICT_ANSI} defined. This should enable you to build the -+library in a strictly ANSI compliant environment. Building the program -+itself like this is dangerous and not supported, since you remove -+@code{bzip2}'s checks against compressing directories, symbolic links, -+devices, and other not-really-a-file entities. This could cause -+filesystem corruption! -+ -+One other thing: if you create a @code{bzip2} binary for public -+distribution, please try and link it statically (@code{gcc -s}). This -+avoids all sorts of library-version issues that others may encounter -+later on. -+ -+If you build @code{bzip2} on Win32, you must set @code{BZ_UNIX} to 0 and -+@code{BZ_LCCWIN32} to 1, in the file @code{bzip2.c}, before compiling. -+Otherwise the resulting binary won't work correctly. -+ -+ -+ -+@section Reporting bugs -+I tried pretty hard to make sure @code{bzip2} is -+bug free, both by design and by testing. Hopefully -+you'll never need to read this section for real. -+ -+Nevertheless, if @code{bzip2} dies with a segmentation -+fault, a bus error or an internal assertion failure, it -+will ask you to email me a bug report. Experience with -+version 0.1 shows that almost all these problems can -+be traced to either compiler bugs or hardware problems. -+@itemize @bullet -+@item -+Recompile the program with no optimisation, and see if it -+works. And/or try a different compiler. -+I heard all sorts of stories about various flavours -+of GNU C (and other compilers) generating bad code for -+@code{bzip2}, and I've run across two such examples myself. -+ -+2.7.X versions of GNU C are known to generate bad code from -+time to time, at high optimisation levels. -+If you get problems, try using the flags -+@code{-O2} @code{-fomit-frame-pointer} @code{-fno-strength-reduce}. -+You should specifically @emph{not} use @code{-funroll-loops}. -+ -+You may notice that the Makefile runs six tests as part of -+the build process. If the program passes all of these, it's -+a pretty good (but not 100%) indication that the compiler has -+done its job correctly. -+@item -+If @code{bzip2} crashes randomly, and the crashes are not -+repeatable, you may have a flaky memory subsystem. @code{bzip2} -+really hammers your memory hierarchy, and if it's a bit marginal, -+you may get these problems. Ditto if your disk or I/O subsystem -+is slowly failing. Yup, this really does happen. -+ -+Try using a different machine of the same type, and see if -+you can repeat the problem. -+@item This isn't really a bug, but ... If @code{bzip2} tells -+you your file is corrupted on decompression, and you -+obtained the file via FTP, there is a possibility that you -+forgot to tell FTP to do a binary mode transfer. That absolutely -+will cause the file to be non-decompressible. You'll have to transfer -+it again. -+@end itemize -+ -+If you've incorporated @code{libbzip2} into your own program -+and are getting problems, please, please, please, check that the -+parameters you are passing in calls to the library, are -+correct, and in accordance with what the documentation says -+is allowable. I have tried to make the library robust against -+such problems, but I'm sure I haven't succeeded. -+ -+Finally, if the above comments don't help, you'll have to send -+me a bug report. Now, it's just amazing how many people will -+send me a bug report saying something like -+@display -+ bzip2 crashed with segmentation fault on my machine -+@end display -+and absolutely nothing else. Needless to say, a such a report -+is @emph{totally, utterly, completely and comprehensively 100% useless; -+a waste of your time, my time, and net bandwidth}. -+With no details at all, there's no way I can possibly begin -+to figure out what the problem is. -+ -+The rules of the game are: facts, facts, facts. Don't omit -+them because "oh, they won't be relevant". At the bare -+minimum: -+@display -+ Machine type. Operating system version. -+ Exact version of @code{bzip2} (do @code{bzip2 -V}). -+ Exact version of the compiler used. -+ Flags passed to the compiler. -+@end display -+However, the most important single thing that will help me is -+the file that you were trying to compress or decompress at the -+time the problem happened. Without that, my ability to do anything -+more than speculate about the cause, is limited. -+ -+Please remember that I connect to the Internet with a modem, so -+you should contact me before mailing me huge files. -+ -+ -+@section Did you get the right package? -+ -+@code{bzip2} is a resource hog. It soaks up large amounts of CPU cycles -+and memory. Also, it gives very large latencies. In the worst case, you -+can feed many megabytes of uncompressed data into the library before -+getting any compressed output, so this probably rules out applications -+requiring interactive behaviour. -+ -+These aren't faults of my implementation, I hope, but more -+an intrinsic property of the Burrows-Wheeler transform (unfortunately). -+Maybe this isn't what you want. -+ -+If you want a compressor and/or library which is faster, uses less -+memory but gets pretty good compression, and has minimal latency, -+consider Jean-loup -+Gailly's and Mark Adler's work, @code{zlib-1.1.3} and -+@code{gzip-1.2.4}. Look for them at -+ -+@code{http://www.zlib.org} and -+@code{http://www.gzip.org} respectively. -+ -+For something faster and lighter still, you might try Markus F X J -+Oberhumer's @code{LZO} real-time compression/decompression library, at -+@* @code{http://wildsau.idv.uni-linz.ac.at/mfx/lzo.html}. -+ -+If you want to use the @code{bzip2} algorithms to compress small blocks -+of data, 64k bytes or smaller, for example on an on-the-fly disk -+compressor, you'd be well advised not to use this library. Instead, -+I've made a special library tuned for that kind of use. It's part of -+@code{e2compr-0.40}, an on-the-fly disk compressor for the Linux -+@code{ext2} filesystem. Look at -+@code{http://www.netspace.net.au/~reiter/e2compr}. -+ -+ -+ -+@section Testing -+ -+A record of the tests I've done. -+ -+First, some data sets: -+@itemize @bullet -+@item B: a directory containing 6001 files, one for every length in the -+ range 0 to 6000 bytes. The files contain random lowercase -+ letters. 18.7 megabytes. -+@item H: my home directory tree. Documents, source code, mail files, -+ compressed data. H contains B, and also a directory of -+ files designed as boundary cases for the sorting; mostly very -+ repetitive, nasty files. 565 megabytes. -+@item A: directory tree holding various applications built from source: -+ @code{egcs}, @code{gcc-2.8.1}, KDE, GTK, Octave, etc. -+ 2200 megabytes. -+@end itemize -+The tests conducted are as follows. Each test means compressing -+(a copy of) each file in the data set, decompressing it and -+comparing it against the original. -+ -+First, a bunch of tests with block sizes and internal buffer -+sizes set very small, -+to detect any problems with the -+blocking and buffering mechanisms. -+This required modifying the source code so as to try to -+break it. -+@enumerate -+@item Data set H, with -+ buffer size of 1 byte, and block size of 23 bytes. -+@item Data set B, buffer sizes 1 byte, block size 1 byte. -+@item As (2) but small-mode decompression. -+@item As (2) with block size 2 bytes. -+@item As (2) with block size 3 bytes. -+@item As (2) with block size 4 bytes. -+@item As (2) with block size 5 bytes. -+@item As (2) with block size 6 bytes and small-mode decompression. -+@item H with buffer size of 1 byte, but normal block -+ size (up to 900000 bytes). -+@end enumerate -+Then some tests with unmodified source code. -+@enumerate -+@item H, all settings normal. -+@item As (1), with small-mode decompress. -+@item H, compress with flag @code{-1}. -+@item H, compress with flag @code{-s}, decompress with flag @code{-s}. -+@item Forwards compatibility: H, @code{bzip2-0.1pl2} compressing, -+ @code{bzip2-0.9.5} decompressing, all settings normal. -+@item Backwards compatibility: H, @code{bzip2-0.9.5} compressing, -+ @code{bzip2-0.1pl2} decompressing, all settings normal. -+@item Bigger tests: A, all settings normal. -+@item As (7), using the fallback (Sadakane-like) sorting algorithm. -+@item As (8), compress with flag @code{-1}, decompress with flag -+ @code{-s}. -+@item H, using the fallback sorting algorithm. -+@item Forwards compatibility: A, @code{bzip2-0.1pl2} compressing, -+ @code{bzip2-0.9.5} decompressing, all settings normal. -+@item Backwards compatibility: A, @code{bzip2-0.9.5} compressing, -+ @code{bzip2-0.1pl2} decompressing, all settings normal. -+@item Misc test: about 400 megabytes of @code{.tar} files with -+ @code{bzip2} compiled with Checker (a memory access error -+ detector, like Purify). -+@item Misc tests to make sure it builds and runs ok on non-Linux/x86 -+ platforms. -+@end enumerate -+These tests were conducted on a 225 MHz IDT WinChip machine, running -+Linux 2.0.36. They represent nearly a week of continuous computation. -+All tests completed successfully. -+ -+ -+@section Further reading -+@code{bzip2} is not research work, in the sense that it doesn't present -+any new ideas. Rather, it's an engineering exercise based on existing -+ideas. -+ -+Four documents describe essentially all the ideas behind @code{bzip2}: -+@example -+Michael Burrows and D. J. Wheeler: -+ "A block-sorting lossless data compression algorithm" -+ 10th May 1994. -+ Digital SRC Research Report 124. -+ ftp://ftp.digital.com/pub/DEC/SRC/research-reports/SRC-124.ps.gz -+ If you have trouble finding it, try searching at the -+ New Zealand Digital Library, http://www.nzdl.org. -+ -+Daniel S. Hirschberg and Debra A. LeLewer -+ "Efficient Decoding of Prefix Codes" -+ Communications of the ACM, April 1990, Vol 33, Number 4. -+ You might be able to get an electronic copy of this -+ from the ACM Digital Library. -+ -+David J. Wheeler -+ Program bred3.c and accompanying document bred3.ps. -+ This contains the idea behind the multi-table Huffman -+ coding scheme. -+ ftp://ftp.cl.cam.ac.uk/users/djw3/ -+ -+Jon L. Bentley and Robert Sedgewick -+ "Fast Algorithms for Sorting and Searching Strings" -+ Available from Sedgewick's web page, -+ www.cs.princeton.edu/~rs -+@end example -+The following paper gives valuable additional insights into the -+algorithm, but is not immediately the basis of any code -+used in bzip2. -+@example -+Peter Fenwick: -+ Block Sorting Text Compression -+ Proceedings of the 19th Australasian Computer Science Conference, -+ Melbourne, Australia. Jan 31 - Feb 2, 1996. -+ ftp://ftp.cs.auckland.ac.nz/pub/peter-f/ACSC96paper.ps -+@end example -+Kunihiko Sadakane's sorting algorithm, mentioned above, -+is available from: -+@example -+http://naomi.is.s.u-tokyo.ac.jp/~sada/papers/Sada98b.ps.gz -+@end example -+The Manber-Myers suffix array construction -+algorithm is described in a paper -+available from: -+@example -+http://www.cs.arizona.edu/people/gene/PAPERS/suffix.ps -+@end example -+Finally, the following paper documents some recent investigations -+I made into the performance of sorting algorithms: -+@example -+Julian Seward: -+ On the Performance of BWT Sorting Algorithms -+ Proceedings of the IEEE Data Compression Conference 2000 -+ Snowbird, Utah. 28-30 March 2000. -+@end example -+ -+ -+@contents -+ -+@bye -+ -diff -Nru bzip2-1.0.2/doc/bzip2recover.1 bzip2-1.0.2.new/doc/bzip2recover.1 ---- bzip2-1.0.2/doc/bzip2recover.1 Thu Jan 1 01:00:00 1970 -+++ bzip2-1.0.2.new/doc/bzip2recover.1 Fri Feb 1 04:19:11 2002 -@@ -0,0 +1 @@ -+.so bzip2.1 -\ No newline at end of file -diff -Nru bzip2-1.0.2/doc/bzmore.1 bzip2-1.0.2.new/doc/bzmore.1 ---- bzip2-1.0.2/doc/bzmore.1 Thu Jan 1 01:00:00 1970 -+++ bzip2-1.0.2.new/doc/bzmore.1 Sun Dec 30 03:12:35 2001 -@@ -0,0 +1,152 @@ -+.\"Shamelessly copied from zmore.1 by Philippe Troin -+.\"for Debian GNU/Linux -+.TH BZMORE 1 -+.SH NAME -+bzmore, bzless \- file perusal filter for crt viewing of bzip2 compressed text -+.SH SYNOPSIS -+.B bzmore -+[ name ... ] -+.br -+.B bzless -+[ name ... ] -+.SH NOTE -+In the following description, -+.I bzless -+and -+.I less -+can be used interchangeably with -+.I bzmore -+and -+.I more. -+.SH DESCRIPTION -+.I Bzmore -+is a filter which allows examination of compressed or plain text files -+one screenful at a time on a soft-copy terminal. -+.I bzmore -+works on files compressed with -+.I bzip2 -+and also on uncompressed files. -+If a file does not exist, -+.I bzmore -+looks for a file of the same name with the addition of a .bz2 suffix. -+.PP -+.I Bzmore -+normally pauses after each screenful, printing --More-- -+at the bottom of the screen. -+If the user then types a carriage return, one more line is displayed. -+If the user hits a space, -+another screenful is displayed. Other possibilities are enumerated later. -+.PP -+.I Bzmore -+looks in the file -+.I /etc/termcap -+to determine terminal characteristics, -+and to determine the default window size. -+On a terminal capable of displaying 24 lines, -+the default window size is 22 lines. -+Other sequences which may be typed when -+.I bzmore -+pauses, and their effects, are as follows (\fIi\fP is an optional integer -+argument, defaulting to 1) : -+.PP -+.IP \fIi\|\fP -+display -+.I i -+more lines, (or another screenful if no argument is given) -+.PP -+.IP ^D -+display 11 more lines (a ``scroll''). -+If -+.I i -+is given, then the scroll size is set to \fIi\|\fP. -+.PP -+.IP d -+same as ^D (control-D) -+.PP -+.IP \fIi\|\fPz -+same as typing a space except that \fIi\|\fP, if present, becomes the new -+window size. Note that the window size reverts back to the default at the -+end of the current file. -+.PP -+.IP \fIi\|\fPs -+skip \fIi\|\fP lines and print a screenful of lines -+.PP -+.IP \fIi\|\fPf -+skip \fIi\fP screenfuls and print a screenful of lines -+.PP -+.IP "q or Q" -+quit reading the current file; go on to the next (if any) -+.PP -+.IP "e or q" -+When the prompt --More--(Next file: -+.IR file ) -+is printed, this command causes bzmore to exit. -+.PP -+.IP s -+When the prompt --More--(Next file: -+.IR file ) -+is printed, this command causes bzmore to skip the next file and continue. -+.PP -+.IP = -+Display the current line number. -+.PP -+.IP \fIi\|\fP/expr -+search for the \fIi\|\fP-th occurrence of the regular expression \fIexpr.\fP -+If the pattern is not found, -+.I bzmore -+goes on to the next file (if any). -+Otherwise, a screenful is displayed, starting two lines before the place -+where the expression was found. -+The user's erase and kill characters may be used to edit the regular -+expression. -+Erasing back past the first column cancels the search command. -+.PP -+.IP \fIi\|\fPn -+search for the \fIi\|\fP-th occurrence of the last regular expression entered. -+.PP -+.IP !command -+invoke a shell with \fIcommand\|\fP. -+The character `!' in "command" are replaced with the -+previous shell command. The sequence "\\!" is replaced by "!". -+.PP -+.IP ":q or :Q" -+quit reading the current file; go on to the next (if any) -+(same as q or Q). -+.PP -+.IP . -+(dot) repeat the previous command. -+.PP -+The commands take effect immediately, i.e., it is not necessary to -+type a carriage return. -+Up to the time when the command character itself is given, -+the user may hit the line kill character to cancel the numerical -+argument being formed. -+In addition, the user may hit the erase character to redisplay the -+--More-- message. -+.PP -+At any time when output is being sent to the terminal, the user can -+hit the quit key (normally control\-\\). -+.I Bzmore -+will stop sending output, and will display the usual --More-- -+prompt. -+The user may then enter one of the above commands in the normal manner. -+Unfortunately, some output is lost when this is done, due to the -+fact that any characters waiting in the terminal's output queue -+are flushed when the quit signal occurs. -+.PP -+The terminal is set to -+.I noecho -+mode by this program so that the output can be continuous. -+What you type will thus not show on your terminal, except for the / and ! -+commands. -+.PP -+If the standard output is not a teletype, then -+.I bzmore -+acts just like -+.I bzcat, -+except that a header is printed before each file. -+.SH FILES -+.DT -+/etc/termcap Terminal data base -+.SH "SEE ALSO" -+more(1), less(1), bzip2(1), bzdiff(1), bzgrep(1) -diff -Nru bzip2-1.0.2/doc/pl/Makefile.am bzip2-1.0.2.new/doc/pl/Makefile.am ---- bzip2-1.0.2/doc/pl/Makefile.am Thu Jan 1 01:00:00 1970 -+++ bzip2-1.0.2.new/doc/pl/Makefile.am Fri Feb 1 04:19:11 2002 -@@ -0,0 +1,4 @@ -+ -+mandir = @mandir@/pl -+man_MANS = bzip2.1 bunzip2.1 bzcat.1 bzip2recover.1 -+ -diff -Nru bzip2-1.0.2/doc/pl/bunzip2.1 bzip2-1.0.2.new/doc/pl/bunzip2.1 ---- bzip2-1.0.2/doc/pl/bunzip2.1 Thu Jan 1 01:00:00 1970 -+++ bzip2-1.0.2.new/doc/pl/bunzip2.1 Fri Feb 1 04:19:11 2002 -@@ -0,0 +1 @@ -+.so bzip2.1 -\ No newline at end of file -diff -Nru bzip2-1.0.2/doc/pl/bzcat.1 bzip2-1.0.2.new/doc/pl/bzcat.1 ---- bzip2-1.0.2/doc/pl/bzcat.1 Thu Jan 1 01:00:00 1970 -+++ bzip2-1.0.2.new/doc/pl/bzcat.1 Fri Feb 1 04:19:11 2002 -@@ -0,0 +1 @@ -+.so bzip2.1 -\ No newline at end of file -diff -Nru bzip2-1.0.2/doc/pl/bzip2.1 bzip2-1.0.2.new/doc/pl/bzip2.1 ---- bzip2-1.0.2/doc/pl/bzip2.1 Thu Jan 1 01:00:00 1970 -+++ bzip2-1.0.2.new/doc/pl/bzip2.1 Fri Feb 1 04:19:11 2002 -@@ -0,0 +1,384 @@ -+.\" T³umaczenie Maciej Wojciechowski wojciech@staszic.waw.pl -+.PU -+.TH bzip2 1 "" "" "wersja 1.0" -+.SH NAZWA -+bzip2, bunzip2 \- sortuj±cy bloki kompresor/dekompresor plików, v1.0 -+.br -+bzcat \- dekompresuje pliki na standardowe wyj¶cie -+.br -+bzip2recover \- odzyskuje dane ze zniszczonych archiwów bzip2 -+.SH SK£ADNIA -+.ll +8 -+.B bzip2 -+.RB [ \-cdfkqstvzVL123456789 ] -+.RI [ nazwy_plików \&...] -+.ll -8 -+.br -+.B bunzip2 -+.RB [ \-fkvsVL ] -+.RI [ nazwy_plików \&...] -+.br -+.B bzcat -+.RB [ \-s ] -+.RI [ nazwy_plików \&...] -+.br -+.B bzip2recover -+.I nazwa_pliku -+.SH OPIS -+.I bzip2 -+kompresuje pliki u¿ywaj±c algorytmu sortowania bloków Burrowsa-Wheelera i -+kodu Huffmana. Kompresja jest generalnie sporo lepsza od konwencjonalnych -+kompresorów opartych o metodê LZ77/LZ78, i jest porównywalna z -+osi±gniêciami statystycznych kompresorów z rodziny PPM. -+ -+Opcje linii poleceñ s± w wiêkszo¶ci bardzo podobne do tych z -+.IR "GNU gzip" , -+ale nie s± identyczne. -+ -+.I bzip2 -+oczekuje listy plików towarzysz±cych parametrom linii poleceñ. Ka¿dy plik jest -+zastêpowany przez swoj± skompresowan± wersjê, z nazw± -+"oryginalny_plik.bz2". Ka¿dy skompresowany plik ma ten sam czas modyfikacji, -+uprawnienia i, je¶li to mo¿liwe, w³a¶ciciela co orygina³, po to, aby te -+ustawienia mog³y zostaæ odtworzone podczas dekompresji. Utrzymywanie nazwy -+plików nie jest do koñca dok³adne w tym sensie, ¿e nie ma mo¿liwo¶ci -+przetrzymywania daty, uprawnieñ, w³a¶ciciela i nazw plików na systemach, na -+których brakuje tych mo¿liwo¶ci lub maj± ograniczenia co do d³ugo¶ci nazwy, -+tak np. jak MS-DOS. -+ -+.I bzip2 -+i -+.I bunzip2 -+standardowo nie nadpisuj± istniej±cych ju¿ plików. Je¶li chcesz aby to -+robi³y, musisz u¿yæ parametru \-f. -+ -+Je¶li nie podano ¿adnej nazwy pliku, -+.I bzip2 -+kompresuje ze standardowego wej¶cia na standardowe wyj¶cie. Odmiawia wówczas -+wypisywania skompresowanego wyj¶cie na terminal, gdy¿ by³oby to -+ca³kiem niezrozumia³e i przez to bez wiêkszego sensu. -+ -+.I bunzip2 -+(lub -+.IR bzip2 \-d ) -+dekompresuje wszystkie podane pliki. Pliki, które nie by³y -+utworzone przez -+.I bzip2 -+zostan± wykryte i zignorowane, a na ekranie pojawi siê komunikat -+ostrzegawczy. -+.I bzip2 -+próbuje zgadn±æ nazwê dla dekompresowanego pliku w nastêpuj±cy sposób: -+.nf -+ nazwa_pliku.bz2 staje siê nazwa_pliku -+ nazwa_pliku.bz staje siê nazwa_pliku -+ nazwa_pliku.tbz2 staje siê nazwa_pliku.tar -+ nazwa_pliku.tbz staje siê nazwa_pliku.tar -+ inna_nazwa staje siê inna_nazwa.out -+.fi -+Je¶li plik nie ma jednego z nastêpuj±cych rozpoznawalnych rozszerzeñ, -+.IR .bz2 , -+.IR .bz , -+.I .tbz2 -+lub -+.IR .tbz , -+to -+.I bzip2 -+napisze, ¿e nie mo¿e zgadn±æ nazwy pierwotnego pliku, i u¿yje -+oryginalnej nazwy z dodanym rozszerzeniem -+.IR .out . -+ -+Tak jak kompresja, nie posiadaj±ca ¿adnych plików, powoduje kompresjê ze -+standardowego wej¶cia na standardowe wyj¶cie. -+ -+.I bunzip2 -+poprawnie zdekompresuje plik, który jest po³aczeniem dwóch lub wiêcej -+skompresowanych plików. Rezultatem jest po³±czony odpowiedni -+nieskompresowany plik. Obs³ugiwane jest równie¿ sprawdzanie spójno¶ci -+(\-t) po³±czonych skompresowanych plików. -+ -+Mo¿esz równie¿ kompresowaæ lub dekompresowaæ pliki na standardowe wyj¶cie -+u¿ywaj±c parametru \-c. W ten w³a¶nie sposób mo¿na przeprowadzaæ kompresjê -+wielu plików równocze¶nie. -+Powsta³e wyniki s± przesy³ane sekwencyjnie na standardowe wyj¶cie. -+W ten sposób kompresja wielu plików generuje strumieñ -+zawieraj±cy reprezentacje kilku skompresowanych plików. Taki strumieñ mo¿e -+byæ zdekompresowany poprawnie tylko przez -+.I bzip2 -+w wersji 0.9.0 lub pó¼niejszej. Wcze¶niejsze wersje -+.I bzip2 -+zatrzymaj± siê po zdekmpresowaniu pierwszego pliku w strumieniu. -+ -+.I bzcat -+(lub -+.I bzip2 -dc) -+dekompresuje wszystkie wybrane pliki na standardowe wyj¶cie. -+ -+.I bzip2 -+czyta argumenty ze zmiennych ¶rodowiskowych -+.I BZIP2 -+i -+.I BZIP, -+w podanej kolejno¶ci, i przetwarza je przed jakimikolwiek argumentami -+przeczytanymi z linii poleceñ. To dobra metoda na specyfikowanie -+standardowych ustawieñ. -+ -+Kompresja stosowana jest zawsze, nawet je¶li skompresowany plik jest -+nieznaczniej wiêkszy od pliku oryginalnego. Pliki mniejsze ni¿ mniej wiêcej -+sto bajtów staj± siê wiêksze, poniewa¿ mechanizm kompresji ma sta³y -+nag³ówek wynosz±cy oko³o 50 bajtów. Przypadkowe dane (w³±czaj±c wyj¶cie -+wiêkszo¶ci kompresorów plików) d± kodowane na mniej wiêcej 8.05 bitu na -+bajt, daj±c zysk oko³o 0.5%. -+ -+Jako samosprawdzenie dla twojej ochrony -+.I bzip2 -+u¿ywa 32-bitowego CRC aby upewniæ siê, ¿e zdekompresowana wersja pliku jest -+identyczna z oryginaln±. To strze¿e przed stratami w skompresowanych danych -+i przed niewykrytymi b³êdami w -+.I bzip2 -+(na szczê¶cie bardzo rzadkich). Mo¿liwo¶æ niewykrycia utraty danych -+jest mikroskopijna, mniej wiêcej jedna szansa na cztery biliony dla ka¿dego -+pliku. Uwa¿aj jednak, gdy¿ sprawdzenie jest dokonywane przed dekompresj±, -+wiêc dowiesz siê tylko tego, ¿e co¶ jest nie w porz±dku. Nie pomo¿e ci to odzyskaæ -+oryginalnych nieskompresowanych danych. Mo¿esz u¿yæ -+.I bzip2recover -+aby spróbowaæ odzyskaæ dane z uszkodzonych plików. -+ -+Zwracane warto¶ci: 0 dla normalnego wyj¶cia, 1 dla problemów technicznych -+(plik nie znaleziony, niew³a¶ciwy parametr, b³±d wyj¶cia/wyj¶cia itp.), 2 dla -+zasygnalizowania b³êdu skompresowanego pliku, 3 dla wewnêtrznego b³êdu (np. -+bug), który zmusi³ \fIbzip2\fP do przerwania. -+ -+.SH OPCJE -+.TP -+.B \-c --stdout -+Kompresuje lub dekompresuje na standardowe wyj¶cie. -+.TP -+.B \-d --decompress -+Wymusza dekompresjê. -+.IR bzip2 , -+.I bunzip2 -+i -+.I bzcat -+s± tak naprawdê tymi samymi programami i decyzja jakie akcje bêd± wykonane -+jest wykonywana na podstawie nazwy jaka zosta³a u¿yta. Ten parametr ma wy¿szy -+priorytet i wymusza na \fIbzip2\fP dekompresjê. -+.TP -+.B \-z --compress -+Podobne do \-d: wymusza kompresjê, bez wzglêdu na sposób wywo³ania. -+.TP -+.B \-t --test -+Sprawdza integralno¶æ wybranego pliku(ów), ale nie dekompresuje ich. Wymusza -+to próbn± dekompresjê i mówi, jaki jest rezultat. -+.TP -+.B \-f --force -+Wymusza zastêpowanie plików wyj¶ciowych. Normalnie, \fIbzip2\fP nie -+zastêpuje istniej±cych plików wyj¶ciowych. Wymusza równie¿ na \fIbzip2\fP -+³amanie dowi±zañ twardych, czego normalnie nie robi. -+.TP -+.B \-k --keep -+Zatrzymaj (nie kasuj) pliki wej¶ciowe przy kompresji lub dekompresji. -+.TP -+.B \-s --small -+Zredukuj u¿ycie pamiêci na kompresjê, dekompresjê i testowanie. Pliki s± -+dekompresowane i testowane przy u¿yciu zmodyfikowanego algorytmu, który -+potrzebuje tylko 2.5 bajtu na blok bajtów. Oznacza to, ¿e ka¿dy plik mo¿e -+byæ zdekompresowany przy u¿yciu oko³o 2300k pamiêci, jednak trac±c oko³o po³owê -+normalnej szybko¶ci. -+ -+Podczas kompresji, \-s wybiera bloki wielko¶ci 200k, których limity -+pamiêci wynosz± mniej wiêcej tyle samo, w zamian za jako¶æ kompresji. W -+skrócie, je¶li twój komputer ma ma³o pamiêci (8 megabajtów lub mniej), -+u¿ywaj opcji \-s do wszystkiego. Zobacz \fBzarz±dzanie pamiêci±\fP poni¿ej. -+.TP -+.B \-q --quiet -+Wy³±cza wszystkie nieistotne komunikaty ostrzegawcze. -+Nie s± eliminowane komunikaty dotycz±ce b³êdów wej¶cia/wyj¶cia i innych -+zdarzeñ krytycznych. -+.TP -+.B \-v --verbose -+Tryb gadatliwy -- pokazuje stopieñ kompresji dla ka¿dego pliku. Nastêpne -+\fB\-v\fP zwiêkszaj± stopieñ gadatliwo¶ci, powoduj±c wy¶wietlanie du¿ej -+ilo¶ci informacji, przydatnych g³ównie przy diagnostyce. -+.TP -+.B \-L --license -V --version -+Wy¶wietla wersjê programu i warunki licencji. -+.TP -+.B \-1 to \-9 -+Ustawia wielko¶æ bloku na 100 k, 200 k .. 900 k przy kompresji. Nie ma -+¿adnego znaczenia przy dekompresji. Zobacz \fBzarz±dzanie pamiêci±\fP -+poni¿ej. -+.TP -+.B \-- -+Traktuje wszystkie nastêpuj±ce po nim argumenty jako nazwy plików, nawet je¶li -+zaczynaj± siê one od my¶lnika. Mo¿esz wiêc kompresowaæ i dekompresowaæ -+pliki, których nazwa zaczyna siê od my¶lnika, na przyk³ad: bzip2 \-- -+\-mój_plik. -+.TP -+.B \--repetitive-fast --repetitive-best -+Te parametry nie maj± znaczenia w wersjach 0.9.5 i wy¿szych. Umo¿liwia³y one -+pewn± infantyln± kontrolê nad zachowaniem algorytmu sortuj±cego we -+wcze¶niejszych wersjach, co by³o czasami u¿yteczne. Wersje 0.9.5 i wy¿sze -+maj± usprawniony algorytm, który powoduje bezu¿yteczno¶æ tej funkcji. -+ -+.SH ZARZ¡DZANIE PAMIÊCI¡ -+.I bzip2 -+kompresuje du¿e pliki w blokach. Rozmiar bloku ma wp³yw zarówno na stopieñ -+osi±ganej kompresji, jak równie¿ na ilo¶æ pamiêci potrzebnej do kompresji -+i dekompresji. Parametry od \-1 do \-9 wybieraj± rozmiar bloku odpowiednio -+od 100,000 bajtów a¿ do 900,000 bajtów (standardowo). W czasie dekompresji, -+rozmiar bloku u¿ytego do kompresji jest odczytywany z nag³ówku pliku -+skompresowanego i -+.I bunzip2 -+sam zajmuje odpowiedni± do dekompresji ilo¶æ pamiêci. Poniewa¿ rozmiar -+bloków jest przetrzymywany w pliku skompresowanym, parametry od \-1 do \-9 -+nie maj± przy dekompresji ¿adnego znaczenia. -+ -+Wymagania kompresji i dekompresji w bajtach, mog± byæ wyliczone przez: -+ -+ Kompresja : 400k + ( 8 x rozmiar bloku ) -+ -+ Dekompresja : 100k + ( 4 x rozmiar bloku ) lub -+ 100k + ( 2.5 x rozmiar bloku ) -+ -+Wiêksze bloki daj± du¿e zmniejszenie zwrotów marginalnych. Wiêkszo¶æ -+kompresji pochodzi z pierwszych stu lub dwustu kilobajtów rozmiaru bloku. -+Warto o tym pamiêtaæ u¿ywaj±c \fIbzip2\fP na wolnych -+komputerach. Warto równie¿ podkre¶liæ, ¿e rozmiar pamiêci potrzebnej do -+dekompresji jest wybierany poprzez ustawienie odpowiedniej -+wielko¶ci bloku przy kompresji. -+ -+Dla plików skompresowanych standardowym blokiem wielko¶ci 900k, -+\fIbunzip2\fP bêdzie wymaga³ oko³o 3700 kilobajtów do dekompresji. Aby -+umo¿liwiæ dekompresjê na komputerze wyposa¿onym jedynie w 4 megabajty -+pamiêci, \fIbunzip2\fP ma opcjê, która mo¿e zmniejszyæ wymagania prawie do -+po³owy, tzn. oko³o 2300 kilobajtów. Prêdko¶æ dekompresji jest równie¿ bardzo -+zmiejszona, wiêc u¿ywaj tej opcji tylko wtedy, kiedy jest to konieczne. Tym -+parametrem jest -s. -+ -+Generalnie, próbuj i u¿ywaj najwiêkszych rozmiarów bloków, je¶li ilo¶æ -+pamiêci ci na to pozwala. Prêdko¶æ kompresji i dekompresji w zasadzie nie -+zale¿y od wielko¶ci u¿ytego bloku. -+ -+Inna wa¿na rzecz dotyczy plików, które mieszcz± siê w pojedyñczym bloku -- -+oznacza to wiêkszo¶æ plików na które siê natkniesz u¿ywaj±c du¿ych bloków. -+Rozmiar realny pamiêci zabieranej jest proporcjonalny do wielko¶ci pliku, -+je¶li plik jest mniejszy ni¿ blok. Na przyk³ad, kompresja pliku o -+wielko¶ci 20,000 bajtów z parametrem -9 wymusi na kompresorze odnalezienie -+7600 k pamiêci, ale zajêcie tylko 400k + 20000 * 8 = 560 kilobajtów z -+tego. Podobnie, dekompresor odnajdzie 3700k, ale zajmie tylko 100k + 20000 -+* 4 = 180 kilobajtów. -+ -+Tu jest tabela, która podsumowuje maksymalne u¿ycie pamiêci dla ró¿nych -+rozmiarów bloków. Podano te¿ ca³kowity rozmiar skompresowanych 14 -+plików tekstowych (Calgary Text Compressione Corpus) zajmuj±cych razem -+3,141,622 bajtów. Ta kolumna daje pewne pojêcie o tym, jaki wp³yw na -+kompresjê ma wielko¶æ bloków. Ta tabela uzmys³awia równie¿ przewagê u¿ycia -+wiêkszych bloków dla wiêkszych plików, poniewa¿ "Corpus" jest zdominowany -+przez mniejsze pliki. -+.nf -+ U¿ycie U¿ycie U¿ycie Corpus -+ Parametr kompresji dekompresji dekompresji -s Size -+ -+ -1 1200k 500k 350k 914704 -+ -2 2000k 900k 600k 877703 -+ -3 2800k 1300k 850k 860338 -+ -4 3600k 1700k 1100k 846899 -+ -5 4400k 2100k 1350k 845160 -+ -6 5200k 2500k 1600k 838626 -+ -7 6100k 2900k 1850k 834096 -+ -8 6800k 3300k 2100k 828642 -+ -9 7600k 3700k 2350k 828642 -+.fi -+.SH ODZYSKIWANIE DANYCH ZE ZNISZCZONYCH PLIKÓW BZIP2 -+.I bzip2 -+kompresuje pliki w blokach, zazwyczaj 900 kilbajtowych. Ka¿dy blok jest -+trzymany osobno. Je¶li b³êdy transmisji lub no¶nika uszkodz± plik -+wieloblokowy .bz2, mo¿liwe jest odtworzenie danych zawartych w -+niezniszczonych blokach pliku. -+ -+Ka¿dy blok jest reprezentowany przez 48-bitowy wzorzec, który umo¿liwia -+znajdowanie przyporz±dkowañ bloków z rozs±dn± pewno¶ci±. Ka¿dy blok -+ma równie¿ swój 32-bitowy CRC, wiêc bloki uszkodzone mog± byæ ³atwo -+odseparowane od poprawnych. -+ -+.I bzip2recover -+jest oddzielnym programem, którego zadaniem jest poszukiwanie bloków w -+plikach .bz2 i zapisywanie ich do w³asnego pliku .bz2. Mo¿esz potem u¿yæ -+\fIbzip2\fP \-t aby sprawdziæ spójno¶æ wyj¶ciowego pliku i zdekompresowaæ -+te, które nie s± uszkodzone. -+ -+.I bzip2recover -+pobiera pojedynczy argument, nazwê uszkodzonego pliku, i tworzy pewn± liczbê -+plików "rec0001plik.bz2", "rec0002plik.bz2", itd., przetrzymuj±ce odzyskane -+bloki. Wyj¶ciowe nazwy plików s± tak tworzone, aby ³atwo by³o potem u¿ywaæ -+ich razem za pomoc± gwiazdek -- na przyk³ad, "bzip2 -dc rec*plik.bz2 > -+odzyskany_plik" -- wylistuje pliki we w³a¶ciwej kolejno¶ci. -+ -+.I bzip2recover -+powinien byæ u¿ywany najczê¶ciej z du¿ymi plikami .bz2, jako i¿ one -+zawieraj± najczê¶ciej du¿o bloków. Jest czystym bezsensem u¿ywaæ go na -+uszkodzonym jedno-blokowym pliku, poniewa¿ uszkodzony blok nie mo¿e byæ -+odzyskany. Je¶li chcesz zminimalizowaæ jakiekolwiek mo¿liwe straty danych -+poprzez no¶nik lub transmisjê, powiniene¶ zastanowiæ siê nad u¿yciem -+mniejszych bloków. -+ -+.SH OPISY WYNIKÓW -+Etap sortuj±cy kompresji ³±czy razem podobne ci±gi znaków w pliku. Przez -+to, pliki zawieraj±ce bardzo d³ugie ci±gi powtarzaj±cych siê symboli, jak -+"aabaabaabaab ..." (powtórzone kilkaset razy) mog± byæ kompresowane wolniej -+ni¿ normalnie. Wersje 0.9.5 i wy¿sze zachowuj± siê du¿o lepiej w tej -+sytuacji ni¿ wersje poprzednie. Ró¿nica stopnia kompresji pomiêdzy -+najgorszym i najlepszym przypadkiem kompresji wynosi oko³o 10:1. Dla -+wcze¶niejszych wersji by³o to nawet oko³o 100:1. Je¶li chcesz, mo¿esz u¿yæ -+parametru \-vvvv aby monitorowaæ postêpy bardzo szczegó³owo. -+ -+Prêdko¶æ dekompresji nie jest zmieniana przez to zjawisko. -+ -+.I bzip2 -+zazwyczaj rezerwuje kilka megabajtów pamiêci do dzia³ania a -+potem wykorzystuje j± w sposób zupe³nie przypadkowy. -+Oznacza to, ¿e zarówno prêdko¶æ kompresji jak i dekompresji jest w -+du¿ej czê¶ci zale¿na od prêdko¶ci, z jak± twój komputer mo¿e naprawiaæ braki -+bufora podrêcznego. Z tego powodu, wprowadzone zosta³y ma³e zmiany kody aby -+zmniejszyæ straty, które da³y nieproporcjonalnie du¿y wzrost osi±gniêæ. -+My¶lê, ¿e -+.I bzip2 -+bêdzie dzia³a³ najlepiej na komputerach z du¿ymi buforami podrêcznymi. -+ -+.SH ZAKAMARKI -+Wiadomo¶ci o b³êdach wej¶cia/wyj¶cia nie s± a¿ tak pomocne, jak mog³yby byæ. -+.I bzip2 -+stara siê wykryæ b³±d wej¶cia/wyj¶cia i wyj¶æ "czysto", ale -+szczegó³y tego, jaki to problem mog± byæ czasami bardzo myl±ce. -+ -+Ta strona podrêcznika odnosi siê do wersji 1.0 programu \fIbzip2\fP. -+Skompresowane pliki utworzone przez tê wersjê s± kompatybilne zarówno z -+w przód jak i wstecznie z poprzednimi publicznymi wydaniami, -+wersjami 0.1pl2, 0.9.0 i 0.9.5 ale z ma³ymi wyj±tkami: 0.9.0 i wy¿sze potrafi± -+poprawnie dekompresowaæ wiele skompresowanych plików z³±czonych w jeden. -+0.1pl2 nie potrafi tego; zatrzyma siê ju¿ po dekompresji pierwszego pliku w -+strumieniu. -+ -+.I bzip2recover -+u¿ywa 32-bitowych liczb do reprezentacji pozycji bitu w skompresowanym -+pliku, wiêc nie mo¿e przetwarzaæ skompresowanych plików d³u¿szych ni¿ 512 -+megabajtów. Mo¿na to ³atwo naprawiæ. -+ -+.SH AUTOR -+Julian Seward, jseward@acm.org. -+ -+http://www.muraroa.demon.co.uk -+http://sourceware.cygnus.com/bzip2 -+ -+Idee zawarte w \fIbzip2\fP s± podzielone (przynajmniej) pomiêdzy -+nastepuj±cy ludzi: Michael Burrows i David Wheeler (transformacja -+sortuj±c± bloki), David Wheeler (znów, koder Huffmana), Peter Fenwick -+(struktura kodowania modelu w oryginalnym \fIbzip2\fP, i wiele -+udoskonaleñ), i Alistair Moffar, Radford Neal i Ian Witten (arytmetyczny -+koder w oryginalnym \fIbzip2\fP). Jestem im bardzo wdziêczny za ich pomoc, -+wsparcie i porady. Zobacz stronê manuala w ¼ród³owej dystrybucji po -+wska¼niki do ¼róde³ dokumentacji. Christian von Roques zachêci³ mnie do -+wymy¶lenia szybszego algorytmu sortuj±cego, po to ¿eby przyspieszyæ -+kompresjê. Bela Lubkin zachêci³a mnie do polepszenia najgorszych wyników -+kompresji. Wiele ludzi przys³a³o ³atki, pomog³o w ró¿nych problemach, -+po¿yczy³o komputerów, da³o rady i by³o ogólnie pomocnych. -diff -Nru bzip2-1.0.2/doc/pl/bzip2recover.1 bzip2-1.0.2.new/doc/pl/bzip2recover.1 ---- bzip2-1.0.2/doc/pl/bzip2recover.1 Thu Jan 1 01:00:00 1970 -+++ bzip2-1.0.2.new/doc/pl/bzip2recover.1 Fri Feb 1 04:19:11 2002 -@@ -0,0 +1 @@ -+.so bzip2.1 -\ No newline at end of file -diff -Nru bzip2-1.0.2/huffman.c bzip2-1.0.2.new/huffman.c ---- bzip2-1.0.2/huffman.c Sun Dec 30 03:19:17 2001 -+++ bzip2-1.0.2.new/huffman.c Fri Feb 1 04:19:11 2002 -@@ -58,6 +58,10 @@ - For more information on these sources, see the manual. - --*/ - -+#ifdef HAVE_CONFIG_H -+#include -+#endif -+ - - #include "bzlib_private.h" - -diff -Nru bzip2-1.0.2/makefile.msc bzip2-1.0.2.new/makefile.msc ---- bzip2-1.0.2/makefile.msc Wed Jan 2 05:02:33 2002 -+++ bzip2-1.0.2.new/makefile.msc Thu Jan 1 01:00:00 1970 -@@ -1,63 +0,0 @@ --# Makefile for Microsoft Visual C++ 6.0 --# usage: nmake -f makefile.msc --# K.M. Syring (syring@gsf.de) --# Fixed up by JRS for bzip2-0.9.5d release. -- --CC=cl --CFLAGS= -DWIN32 -MD -Ox -D_FILE_OFFSET_BITS=64 -nologo -- --OBJS= blocksort.obj \ -- huffman.obj \ -- crctable.obj \ -- randtable.obj \ -- compress.obj \ -- decompress.obj \ -- bzlib.obj -- --all: lib bzip2 test -- --bzip2: lib -- $(CC) $(CFLAGS) -o bzip2 bzip2.c libbz2.lib setargv.obj -- $(CC) $(CFLAGS) -o bzip2recover bzip2recover.c -- --lib: $(OBJS) -- lib /out:libbz2.lib $(OBJS) -- --test: bzip2 -- type words1 -- .\\bzip2 -1 < sample1.ref > sample1.rb2 -- .\\bzip2 -2 < sample2.ref > sample2.rb2 -- .\\bzip2 -3 < sample3.ref > sample3.rb2 -- .\\bzip2 -d < sample1.bz2 > sample1.tst -- .\\bzip2 -d < sample2.bz2 > sample2.tst -- .\\bzip2 -ds < sample3.bz2 > sample3.tst -- @echo All six of the fc's should find no differences. -- @echo If fc finds an error on sample3.bz2, this could be -- @echo because WinZip's 'TAR file smart CR/LF conversion' -- @echo is too clever for its own good. Disable this option. -- @echo The correct size for sample3.ref is 120,244. If it -- @echo is 150,251, WinZip has messed it up. -- fc sample1.bz2 sample1.rb2 -- fc sample2.bz2 sample2.rb2 -- fc sample3.bz2 sample3.rb2 -- fc sample1.tst sample1.ref -- fc sample2.tst sample2.ref -- fc sample3.tst sample3.ref -- -- -- --clean: -- del *.obj -- del libbz2.lib -- del bzip2.exe -- del bzip2recover.exe -- del sample1.rb2 -- del sample2.rb2 -- del sample3.rb2 -- del sample1.tst -- del sample2.tst -- del sample3.tst -- --.c.obj: -- $(CC) $(CFLAGS) -c $*.c -o $*.obj -- -diff -Nru bzip2-1.0.2/randtable.c bzip2-1.0.2.new/randtable.c --- bzip2-1.0.2/randtable.c Sun Dec 30 03:19:04 2001 +++ bzip2-1.0.2.new/randtable.c Fri Feb 1 04:19:18 2002 -@@ -58,6 +58,10 @@ +@@ -58,6 +58,9 @@ For more information on these sources, see the manual. --*/ +#ifdef HAVE_CONFIG_H +#include +#endif -+ #include "bzlib_private.h" -diff -Nru bzip2-1.0.2/spewG.c bzip2-1.0.2.new/spewG.c --- bzip2-1.0.2/spewG.c Thu Apr 6 01:24:45 2000 +++ bzip2-1.0.2.new/spewG.c Fri Feb 1 04:19:18 2002 -@@ -9,7 +9,10 @@ +@@ -9,7 +9,9 @@ (but is otherwise harmless). */ @@ -3893,27 +109,19 @@ diff -Nru bzip2-1.0.2/spewG.c bzip2-1.0.2.new/spewG.c +#ifdef HAVE_CONFIG_H +#include +#endif -+ #include #include -diff -Nru bzip2-1.0.2/stamp-h.in bzip2-1.0.2.new/stamp-h.in ---- bzip2-1.0.2/stamp-h.in Thu Jan 1 01:00:00 1970 -+++ bzip2-1.0.2.new/stamp-h.in Fri Feb 1 04:19:18 2002 -@@ -0,0 +1 @@ -+timestamp diff -Nru bzip2-1.0.2/unzcrash.c bzip2-1.0.2.new/unzcrash.c --- bzip2-1.0.2/unzcrash.c Thu Apr 6 01:24:46 2000 +++ bzip2-1.0.2.new/unzcrash.c Fri Feb 1 04:19:18 2002 -@@ -13,6 +13,12 @@ +@@ -13,6 +13,10 @@ many hours. */ +#ifdef HAVE_CONFIG_H +#include +#endif -+ -+ + #include #include -- 2.44.0