]>
Commit | Line | Data |
---|---|---|
15ee0650 | 1 | diff -Nru bzip2-1.0.2/Makefile.am bzip2-1.0.2.new/Makefile.am |
2 | --- bzip2-1.0.2/Makefile.am Thu Jan 1 01:00:00 1970 | |
3 | +++ bzip2-1.0.2.new/Makefile.am Fri Feb 1 04:19:09 2002 | |
d967e3ec | 4 | @@ -0,0 +1,31 @@ |
5 | +SUBDIRS = doc | |
6 | + | |
7 | +bin_PROGRAMS = bzip2 bzip2recover | |
8 | +bzip2_SOURCES = bzip2.c | |
9 | + | |
10 | +bzip2_LDADD = libbz2.la | |
11 | +bzip2recover_SOURCES = bzip2recover.c | |
12 | +lib_LTLIBRARIES = libbz2.la | |
13 | +libbz2_la_SOURCES = \ | |
14 | + blocksort.c \ | |
15 | + huffman.c \ | |
16 | + crctable.c \ | |
17 | + randtable.c \ | |
18 | + compress.c \ | |
19 | + decompress.c \ | |
20 | + bzlib.c \ | |
21 | + bzlib.h \ | |
22 | + bzlib_private.h | |
23 | + | |
24 | +libbz2_la_LDFLAGS = -version-info 1:0:0 | |
25 | +include_HEADERS = bzlib.h bzlib_private.h | |
26 | + | |
ff248cb7 | 27 | +bin_SCRIPTS = bzless bzgrep |
d967e3ec | 28 | + |
29 | +EXTRA_DIST = README README.COMPILATION.PROBLEMS \ | |
30 | + Y2K_INFO libbz2.def libbz2.dsp \ | |
31 | + sample1.bz2 sample1.ref sample2.bz2 sample2.ref sample3.bz2 sample3.ref | |
32 | + | |
33 | +install-exec-hook: | |
34 | + $(LN_S) -f bzip2 $(DESTDIR)$(bindir)/bunzip2 | |
35 | + $(LN_S) -f bzip2 $(DESTDIR)$(bindir)/bzcat | |
15ee0650 | 36 | diff -Nru bzip2-1.0.2/bzdiff.1 bzip2-1.0.2.new/bzdiff.1 |
37 | --- bzip2-1.0.2/bzdiff.1 Sun Dec 30 03:12:35 2001 | |
38 | +++ bzip2-1.0.2.new/bzdiff.1 Thu Jan 1 01:00:00 1970 | |
39 | @@ -1,47 +0,0 @@ | |
40 | -\"Shamelessly copied from zmore.1 by Philippe Troin <phil@fifi.org> | |
41 | -\"for Debian GNU/Linux | |
42 | -.TH BZDIFF 1 | |
43 | -.SH NAME | |
44 | -bzcmp, bzdiff \- compare bzip2 compressed files | |
45 | -.SH SYNOPSIS | |
46 | -.B bzcmp | |
47 | -[ cmp_options ] file1 | |
48 | -[ file2 ] | |
49 | -.br | |
50 | -.B bzdiff | |
51 | -[ diff_options ] file1 | |
52 | -[ file2 ] | |
53 | -.SH DESCRIPTION | |
54 | -.I Bzcmp | |
55 | -and | |
56 | -.I bzdiff | |
57 | -are used to invoke the | |
58 | -.I cmp | |
59 | -or the | |
60 | -.I diff | |
61 | -program on bzip2 compressed files. All options specified are passed | |
62 | -directly to | |
63 | -.I cmp | |
64 | -or | |
65 | -.IR diff "." | |
66 | -If only 1 file is specified, then the files compared are | |
67 | -.I file1 | |
68 | -and an uncompressed | |
69 | -.IR file1 ".bz2." | |
70 | -If two files are specified, then they are uncompressed if necessary and fed to | |
71 | -.I cmp | |
72 | -or | |
73 | -.IR diff "." | |
74 | -The exit status from | |
75 | -.I cmp | |
76 | -or | |
77 | -.I diff | |
78 | -is preserved. | |
79 | -.SH "SEE ALSO" | |
80 | -cmp(1), diff(1), bzmore(1), bzless(1), bzgrep(1), bzip2(1) | |
81 | -.SH BUGS | |
82 | -Messages from the | |
83 | -.I cmp | |
84 | -or | |
85 | -.I diff | |
86 | -programs refer to temporary filenames instead of those specified. | |
87 | diff -Nru bzip2-1.0.2/bzgrep.1 bzip2-1.0.2.new/bzgrep.1 | |
88 | --- bzip2-1.0.2/bzgrep.1 Sun Dec 30 03:12:35 2001 | |
89 | +++ bzip2-1.0.2.new/bzgrep.1 Thu Jan 1 01:00:00 1970 | |
90 | @@ -1,56 +0,0 @@ | |
91 | -\"Shamelessly copied from zmore.1 by Philippe Troin <phil@fifi.org> | |
92 | -\"for Debian GNU/Linux | |
93 | -.TH BZGREP 1 | |
94 | -.SH NAME | |
95 | -bzgrep, bzfgrep, bzegrep \- search possibly bzip2 compressed files for a regular expression | |
96 | -.SH SYNOPSIS | |
97 | -.B bzgrep | |
98 | -[ grep_options ] | |
99 | -.BI [\ -e\ ] " pattern" | |
100 | -.IR filename ".\|.\|." | |
101 | -.br | |
102 | -.B bzegrep | |
103 | -[ egrep_options ] | |
104 | -.BI [\ -e\ ] " pattern" | |
105 | -.IR filename ".\|.\|." | |
106 | -.br | |
107 | -.B bzfgrep | |
108 | -[ fgrep_options ] | |
109 | -.BI [\ -e\ ] " pattern" | |
110 | -.IR filename ".\|.\|." | |
111 | -.SH DESCRIPTION | |
112 | -.IR Bzgrep | |
113 | -is used to invoke the | |
114 | -.I grep | |
115 | -on bzip2-compressed files. All options specified are passed directly to | |
116 | -.I grep. | |
117 | -If no file is specified, then the standard input is decompressed | |
118 | -if necessary and fed to grep. | |
119 | -Otherwise the given files are uncompressed if necessary and fed to | |
120 | -.I grep. | |
121 | -.PP | |
122 | -If | |
123 | -.I bzgrep | |
124 | -is invoked as | |
125 | -.I bzegrep | |
126 | -or | |
127 | -.I bzfgrep | |
128 | -then | |
129 | -.I egrep | |
130 | -or | |
131 | -.I fgrep | |
132 | -is used instead of | |
133 | -.I grep. | |
134 | -If the GREP environment variable is set, | |
135 | -.I bzgrep | |
136 | -uses it as the | |
137 | -.I grep | |
138 | -program to be invoked. For example: | |
139 | - | |
140 | - for sh: GREP=fgrep bzgrep string files | |
141 | - for csh: (setenv GREP fgrep; bzgrep string files) | |
142 | -.SH AUTHOR | |
143 | -Charles Levert (charles@comm.polymtl.ca). Adapted to bzip2 by Philippe | |
144 | -Troin <phil@fifi.org> for Debian GNU/Linux. | |
145 | -.SH "SEE ALSO" | |
146 | -grep(1), egrep(1), fgrep(1), bzdiff(1), bzmore(1), bzless(1), bzip2(1) | |
15ee0650 | 147 | diff -Nru bzip2-1.0.2/bzless bzip2-1.0.2.new/bzless |
148 | --- bzip2-1.0.2/bzless Thu Jan 1 01:00:00 1970 | |
149 | +++ bzip2-1.0.2.new/bzless Fri Feb 1 04:19:11 2002 | |
d967e3ec | 150 | @@ -0,0 +1,2 @@ |
151 | +#!/bin/sh | |
906ef59a | 152 | +%{_bindir}/bunzip2 -c "$@" | %{_bindir}/less |
15ee0650 | 153 | diff -Nru bzip2-1.0.2/bzmore.1 bzip2-1.0.2.new/bzmore.1 |
154 | --- bzip2-1.0.2/bzmore.1 Sun Dec 30 03:12:35 2001 | |
155 | +++ bzip2-1.0.2.new/bzmore.1 Thu Jan 1 01:00:00 1970 | |
156 | @@ -1,152 +0,0 @@ | |
157 | -.\"Shamelessly copied from zmore.1 by Philippe Troin <phil@fifi.org> | |
158 | -.\"for Debian GNU/Linux | |
159 | -.TH BZMORE 1 | |
160 | -.SH NAME | |
161 | -bzmore, bzless \- file perusal filter for crt viewing of bzip2 compressed text | |
162 | -.SH SYNOPSIS | |
163 | -.B bzmore | |
164 | -[ name ... ] | |
165 | -.br | |
166 | -.B bzless | |
167 | -[ name ... ] | |
168 | -.SH NOTE | |
169 | -In the following description, | |
170 | -.I bzless | |
171 | -and | |
172 | -.I less | |
173 | -can be used interchangeably with | |
174 | -.I bzmore | |
175 | -and | |
176 | -.I more. | |
177 | -.SH DESCRIPTION | |
178 | -.I Bzmore | |
179 | -is a filter which allows examination of compressed or plain text files | |
180 | -one screenful at a time on a soft-copy terminal. | |
181 | -.I bzmore | |
182 | -works on files compressed with | |
183 | -.I bzip2 | |
184 | -and also on uncompressed files. | |
185 | -If a file does not exist, | |
186 | -.I bzmore | |
187 | -looks for a file of the same name with the addition of a .bz2 suffix. | |
188 | -.PP | |
189 | -.I Bzmore | |
190 | -normally pauses after each screenful, printing --More-- | |
191 | -at the bottom of the screen. | |
192 | -If the user then types a carriage return, one more line is displayed. | |
193 | -If the user hits a space, | |
194 | -another screenful is displayed. Other possibilities are enumerated later. | |
195 | -.PP | |
196 | -.I Bzmore | |
197 | -looks in the file | |
198 | -.I /etc/termcap | |
199 | -to determine terminal characteristics, | |
200 | -and to determine the default window size. | |
201 | -On a terminal capable of displaying 24 lines, | |
202 | -the default window size is 22 lines. | |
203 | -Other sequences which may be typed when | |
204 | -.I bzmore | |
205 | -pauses, and their effects, are as follows (\fIi\fP is an optional integer | |
206 | -argument, defaulting to 1) : | |
207 | -.PP | |
208 | -.IP \fIi\|\fP<space> | |
209 | -display | |
210 | -.I i | |
211 | -more lines, (or another screenful if no argument is given) | |
212 | -.PP | |
213 | -.IP ^D | |
214 | -display 11 more lines (a ``scroll''). | |
215 | -If | |
216 | -.I i | |
217 | -is given, then the scroll size is set to \fIi\|\fP. | |
218 | -.PP | |
219 | -.IP d | |
220 | -same as ^D (control-D) | |
221 | -.PP | |
222 | -.IP \fIi\|\fPz | |
223 | -same as typing a space except that \fIi\|\fP, if present, becomes the new | |
224 | -window size. Note that the window size reverts back to the default at the | |
225 | -end of the current file. | |
226 | -.PP | |
227 | -.IP \fIi\|\fPs | |
228 | -skip \fIi\|\fP lines and print a screenful of lines | |
229 | -.PP | |
230 | -.IP \fIi\|\fPf | |
231 | -skip \fIi\fP screenfuls and print a screenful of lines | |
232 | -.PP | |
233 | -.IP "q or Q" | |
234 | -quit reading the current file; go on to the next (if any) | |
235 | -.PP | |
236 | -.IP "e or q" | |
237 | -When the prompt --More--(Next file: | |
238 | -.IR file ) | |
239 | -is printed, this command causes bzmore to exit. | |
240 | -.PP | |
241 | -.IP s | |
242 | -When the prompt --More--(Next file: | |
243 | -.IR file ) | |
244 | -is printed, this command causes bzmore to skip the next file and continue. | |
245 | -.PP | |
246 | -.IP = | |
247 | -Display the current line number. | |
248 | -.PP | |
249 | -.IP \fIi\|\fP/expr | |
250 | -search for the \fIi\|\fP-th occurrence of the regular expression \fIexpr.\fP | |
251 | -If the pattern is not found, | |
252 | -.I bzmore | |
253 | -goes on to the next file (if any). | |
254 | -Otherwise, a screenful is displayed, starting two lines before the place | |
255 | -where the expression was found. | |
256 | -The user's erase and kill characters may be used to edit the regular | |
257 | -expression. | |
258 | -Erasing back past the first column cancels the search command. | |
259 | -.PP | |
260 | -.IP \fIi\|\fPn | |
261 | -search for the \fIi\|\fP-th occurrence of the last regular expression entered. | |
262 | -.PP | |
263 | -.IP !command | |
264 | -invoke a shell with \fIcommand\|\fP. | |
265 | -The character `!' in "command" are replaced with the | |
266 | -previous shell command. The sequence "\\!" is replaced by "!". | |
267 | -.PP | |
268 | -.IP ":q or :Q" | |
269 | -quit reading the current file; go on to the next (if any) | |
270 | -(same as q or Q). | |
271 | -.PP | |
272 | -.IP . | |
273 | -(dot) repeat the previous command. | |
274 | -.PP | |
275 | -The commands take effect immediately, i.e., it is not necessary to | |
276 | -type a carriage return. | |
277 | -Up to the time when the command character itself is given, | |
278 | -the user may hit the line kill character to cancel the numerical | |
279 | -argument being formed. | |
280 | -In addition, the user may hit the erase character to redisplay the | |
281 | ---More-- message. | |
282 | -.PP | |
283 | -At any time when output is being sent to the terminal, the user can | |
284 | -hit the quit key (normally control\-\\). | |
285 | -.I Bzmore | |
286 | -will stop sending output, and will display the usual --More-- | |
287 | -prompt. | |
288 | -The user may then enter one of the above commands in the normal manner. | |
289 | -Unfortunately, some output is lost when this is done, due to the | |
290 | -fact that any characters waiting in the terminal's output queue | |
291 | -are flushed when the quit signal occurs. | |
292 | -.PP | |
293 | -The terminal is set to | |
294 | -.I noecho | |
295 | -mode by this program so that the output can be continuous. | |
296 | -What you type will thus not show on your terminal, except for the / and ! | |
297 | -commands. | |
298 | -.PP | |
299 | -If the standard output is not a teletype, then | |
300 | -.I bzmore | |
301 | -acts just like | |
302 | -.I bzcat, | |
303 | -except that a header is printed before each file. | |
304 | -.SH FILES | |
305 | -.DT | |
306 | -/etc/termcap Terminal data base | |
307 | -.SH "SEE ALSO" | |
308 | -more(1), less(1), bzip2(1), bzdiff(1), bzgrep(1) | |
309 | diff -Nru bzip2-1.0.2/config.h.in bzip2-1.0.2.new/config.h.in | |
310 | --- bzip2-1.0.2/config.h.in Thu Jan 1 01:00:00 1970 | |
311 | +++ bzip2-1.0.2.new/config.h.in Fri Feb 1 04:19:11 2002 | |
d967e3ec | 312 | @@ -0,0 +1,17 @@ |
313 | +/* config.h.in. Generated automatically from configure.in by autoheader. */ | |
314 | + | |
315 | +/* Name of package */ | |
316 | +#undef PACKAGE | |
317 | + | |
318 | +/* Version number of package */ | |
319 | +#undef VERSION | |
320 | + | |
321 | +/* Number of bits in a file offset, on hosts where this is settable. */ | |
322 | +#undef _FILE_OFFSET_BITS | |
323 | + | |
324 | +/* Define to make fseeko etc. visible, on some hosts. */ | |
325 | +#undef _LARGEFILE_SOURCE | |
326 | + | |
327 | +/* Define for large files, on AIX-style hosts. */ | |
328 | +#undef _LARGE_FILES | |
329 | + | |
15ee0650 | 330 | diff -Nru bzip2-1.0.2/configure.in bzip2-1.0.2.new/configure.in |
331 | --- bzip2-1.0.2/configure.in Thu Jan 1 01:00:00 1970 | |
332 | +++ bzip2-1.0.2.new/configure.in Fri Feb 1 04:19:11 2002 | |
d967e3ec | 333 | @@ -0,0 +1,10 @@ |
334 | +AC_INIT(bzip2.c) | |
335 | +AM_INIT_AUTOMAKE(bzip2,1.0.1) | |
336 | +AM_CONFIG_HEADER(config.h) | |
337 | +AC_PROG_CC | |
338 | +AM_PROG_LIBTOOL | |
339 | +AC_PROG_LN_S | |
340 | +AC_SYS_LARGEFILE | |
341 | +AC_OUTPUT(Makefile | |
342 | + doc/Makefile | |
343 | + doc/pl/Makefile) | |
15ee0650 | 344 | diff -Nru bzip2-1.0.2/crctable.c bzip2-1.0.2.new/crctable.c |
345 | --- bzip2-1.0.2/crctable.c Sun Dec 30 03:19:28 2001 | |
346 | +++ bzip2-1.0.2.new/crctable.c Fri Feb 1 04:19:11 2002 | |
d967e3ec | 347 | @@ -58,6 +58,10 @@ |
348 | For more information on these sources, see the manual. | |
349 | --*/ | |
350 | ||
351 | +#ifdef HAVE_CONFIG_H | |
352 | +#include <config.h> | |
353 | +#endif | |
354 | + | |
355 | ||
356 | #include "bzlib_private.h" | |
357 | ||
15ee0650 | 358 | diff -Nru bzip2-1.0.2/decompress.c bzip2-1.0.2.new/decompress.c |
359 | --- bzip2-1.0.2/decompress.c Sun Dec 30 21:45:53 2001 | |
360 | +++ bzip2-1.0.2.new/decompress.c Fri Feb 1 04:19:11 2002 | |
d967e3ec | 361 | @@ -58,6 +58,10 @@ |
362 | For more information on these sources, see the manual. | |
363 | --*/ | |
364 | ||
365 | +#ifdef HAVE_CONFIG_H | |
366 | +#include <config.h> | |
367 | +#endif | |
368 | + | |
369 | ||
370 | #include "bzlib_private.h" | |
371 | ||
15ee0650 | 372 | diff -Nru bzip2-1.0.2/dlltest.c bzip2-1.0.2.new/dlltest.c |
373 | --- bzip2-1.0.2/dlltest.c Sun Dec 30 20:44:07 2001 | |
374 | +++ bzip2-1.0.2.new/dlltest.c Fri Feb 1 04:19:11 2002 | |
d967e3ec | 375 | @@ -8,6 +8,10 @@ |
376 | usage: minibz2 [-d] [-{1,2,..9}] [[srcfilename] destfilename]\r | |
377 | */\r | |
378 | \r | |
379 | +#ifdef HAVE_CONFIG_H | |
380 | +#include <config.h> | |
381 | +#endif | |
382 | + | |
383 | #define BZ_IMPORT\r | |
384 | #include <stdio.h>\r | |
385 | #include <stdlib.h>\r | |
15ee0650 | 386 | diff -Nru bzip2-1.0.2/doc/Makefile.am bzip2-1.0.2.new/doc/Makefile.am |
387 | --- bzip2-1.0.2/doc/Makefile.am Thu Jan 1 01:00:00 1970 | |
388 | +++ bzip2-1.0.2.new/doc/Makefile.am Fri Feb 1 04:27:21 2002 | |
389 | @@ -0,0 +1,6 @@ | |
d967e3ec | 390 | + |
391 | +SUBDIRS = pl | |
392 | + | |
15ee0650 | 393 | +man_MANS = bunzip2.1 bzcat.1 bzdiff.1 bzgrep.1 bzip2.1 \ |
394 | + bzip2recover.1 bzmore.1 | |
d967e3ec | 395 | +#info_TEXINFOS = bzip2.texi |
15ee0650 | 396 | diff -Nru bzip2-1.0.2/doc/bunzip2.1 bzip2-1.0.2.new/doc/bunzip2.1 |
397 | --- bzip2-1.0.2/doc/bunzip2.1 Thu Jan 1 01:00:00 1970 | |
398 | +++ bzip2-1.0.2.new/doc/bunzip2.1 Fri Feb 1 04:19:11 2002 | |
d967e3ec | 399 | @@ -0,0 +1 @@ |
400 | +.so bzip2.1 | |
401 | \ No newline at end of file | |
15ee0650 | 402 | diff -Nru bzip2-1.0.2/doc/bzcat.1 bzip2-1.0.2.new/doc/bzcat.1 |
403 | --- bzip2-1.0.2/doc/bzcat.1 Thu Jan 1 01:00:00 1970 | |
404 | +++ bzip2-1.0.2.new/doc/bzcat.1 Fri Feb 1 04:19:11 2002 | |
d967e3ec | 405 | @@ -0,0 +1 @@ |
406 | +.so bzip2.1 | |
407 | \ No newline at end of file | |
15ee0650 | 408 | diff -Nru bzip2-1.0.2/doc/bzdiff.1 bzip2-1.0.2.new/doc/bzdiff.1 |
409 | --- bzip2-1.0.2/doc/bzdiff.1 Thu Jan 1 01:00:00 1970 | |
410 | +++ bzip2-1.0.2.new/doc/bzdiff.1 Sun Dec 30 03:12:35 2001 | |
411 | @@ -0,0 +1,47 @@ | |
412 | +\"Shamelessly copied from zmore.1 by Philippe Troin <phil@fifi.org> | |
413 | +\"for Debian GNU/Linux | |
414 | +.TH BZDIFF 1 | |
415 | +.SH NAME | |
416 | +bzcmp, bzdiff \- compare bzip2 compressed files | |
417 | +.SH SYNOPSIS | |
418 | +.B bzcmp | |
419 | +[ cmp_options ] file1 | |
420 | +[ file2 ] | |
421 | +.br | |
422 | +.B bzdiff | |
423 | +[ diff_options ] file1 | |
424 | +[ file2 ] | |
425 | +.SH DESCRIPTION | |
426 | +.I Bzcmp | |
427 | +and | |
428 | +.I bzdiff | |
429 | +are used to invoke the | |
430 | +.I cmp | |
431 | +or the | |
432 | +.I diff | |
433 | +program on bzip2 compressed files. All options specified are passed | |
434 | +directly to | |
435 | +.I cmp | |
436 | +or | |
437 | +.IR diff "." | |
438 | +If only 1 file is specified, then the files compared are | |
439 | +.I file1 | |
440 | +and an uncompressed | |
441 | +.IR file1 ".bz2." | |
442 | +If two files are specified, then they are uncompressed if necessary and fed to | |
443 | +.I cmp | |
444 | +or | |
445 | +.IR diff "." | |
446 | +The exit status from | |
447 | +.I cmp | |
448 | +or | |
449 | +.I diff | |
450 | +is preserved. | |
451 | +.SH "SEE ALSO" | |
452 | +cmp(1), diff(1), bzmore(1), bzless(1), bzgrep(1), bzip2(1) | |
453 | +.SH BUGS | |
454 | +Messages from the | |
455 | +.I cmp | |
456 | +or | |
457 | +.I diff | |
458 | +programs refer to temporary filenames instead of those specified. | |
459 | diff -Nru bzip2-1.0.2/doc/bzgrep.1 bzip2-1.0.2.new/doc/bzgrep.1 | |
460 | --- bzip2-1.0.2/doc/bzgrep.1 Thu Jan 1 01:00:00 1970 | |
461 | +++ bzip2-1.0.2.new/doc/bzgrep.1 Sun Dec 30 03:12:35 2001 | |
462 | @@ -0,0 +1,56 @@ | |
463 | +\"Shamelessly copied from zmore.1 by Philippe Troin <phil@fifi.org> | |
464 | +\"for Debian GNU/Linux | |
465 | +.TH BZGREP 1 | |
466 | +.SH NAME | |
467 | +bzgrep, bzfgrep, bzegrep \- search possibly bzip2 compressed files for a regular expression | |
468 | +.SH SYNOPSIS | |
469 | +.B bzgrep | |
470 | +[ grep_options ] | |
471 | +.BI [\ -e\ ] " pattern" | |
472 | +.IR filename ".\|.\|." | |
473 | +.br | |
474 | +.B bzegrep | |
475 | +[ egrep_options ] | |
476 | +.BI [\ -e\ ] " pattern" | |
477 | +.IR filename ".\|.\|." | |
478 | +.br | |
479 | +.B bzfgrep | |
480 | +[ fgrep_options ] | |
481 | +.BI [\ -e\ ] " pattern" | |
482 | +.IR filename ".\|.\|." | |
483 | +.SH DESCRIPTION | |
484 | +.IR Bzgrep | |
485 | +is used to invoke the | |
486 | +.I grep | |
487 | +on bzip2-compressed files. All options specified are passed directly to | |
488 | +.I grep. | |
489 | +If no file is specified, then the standard input is decompressed | |
490 | +if necessary and fed to grep. | |
491 | +Otherwise the given files are uncompressed if necessary and fed to | |
492 | +.I grep. | |
493 | +.PP | |
494 | +If | |
495 | +.I bzgrep | |
496 | +is invoked as | |
497 | +.I bzegrep | |
498 | +or | |
499 | +.I bzfgrep | |
500 | +then | |
501 | +.I egrep | |
502 | +or | |
503 | +.I fgrep | |
504 | +is used instead of | |
505 | +.I grep. | |
506 | +If the GREP environment variable is set, | |
507 | +.I bzgrep | |
508 | +uses it as the | |
509 | +.I grep | |
510 | +program to be invoked. For example: | |
511 | + | |
512 | + for sh: GREP=fgrep bzgrep string files | |
513 | + for csh: (setenv GREP fgrep; bzgrep string files) | |
514 | +.SH AUTHOR | |
515 | +Charles Levert (charles@comm.polymtl.ca). Adapted to bzip2 by Philippe | |
516 | +Troin <phil@fifi.org> for Debian GNU/Linux. | |
517 | +.SH "SEE ALSO" | |
518 | +grep(1), egrep(1), fgrep(1), bzdiff(1), bzmore(1), bzless(1), bzip2(1) | |
519 | diff -Nru bzip2-1.0.2/doc/bzip2.1 bzip2-1.0.2.new/doc/bzip2.1 | |
520 | --- bzip2-1.0.2/doc/bzip2.1 Thu Jan 1 01:00:00 1970 | |
521 | +++ bzip2-1.0.2.new/doc/bzip2.1 Thu Jan 3 00:14:36 2002 | |
522 | @@ -0,0 +1,453 @@ | |
d967e3ec | 523 | +.PU |
524 | +.TH bzip2 1 | |
525 | +.SH NAME | |
15ee0650 | 526 | +bzip2, bunzip2 \- a block-sorting file compressor, v1.0.2 |
d967e3ec | 527 | +.br |
528 | +bzcat \- decompresses files to stdout | |
529 | +.br | |
530 | +bzip2recover \- recovers data from damaged bzip2 files | |
531 | + | |
532 | +.SH SYNOPSIS | |
533 | +.ll +8 | |
534 | +.B bzip2 | |
535 | +.RB [ " \-cdfkqstvzVL123456789 " ] | |
536 | +[ | |
537 | +.I "filenames \&..." | |
538 | +] | |
539 | +.ll -8 | |
540 | +.br | |
541 | +.B bunzip2 | |
542 | +.RB [ " \-fkvsVL " ] | |
543 | +[ | |
544 | +.I "filenames \&..." | |
545 | +] | |
546 | +.br | |
547 | +.B bzcat | |
548 | +.RB [ " \-s " ] | |
549 | +[ | |
550 | +.I "filenames \&..." | |
551 | +] | |
552 | +.br | |
553 | +.B bzip2recover | |
554 | +.I "filename" | |
555 | + | |
556 | +.SH DESCRIPTION | |
557 | +.I bzip2 | |
558 | +compresses files using the Burrows-Wheeler block sorting | |
559 | +text compression algorithm, and Huffman coding. Compression is | |
560 | +generally considerably better than that achieved by more conventional | |
561 | +LZ77/LZ78-based compressors, and approaches the performance of the PPM | |
562 | +family of statistical compressors. | |
563 | + | |
564 | +The command-line options are deliberately very similar to | |
565 | +those of | |
566 | +.I GNU gzip, | |
567 | +but they are not identical. | |
568 | + | |
569 | +.I bzip2 | |
570 | +expects a list of file names to accompany the | |
571 | +command-line flags. Each file is replaced by a compressed version of | |
572 | +itself, with the name "original_name.bz2". | |
573 | +Each compressed file | |
574 | +has the same modification date, permissions, and, when possible, | |
575 | +ownership as the corresponding original, so that these properties can | |
576 | +be correctly restored at decompression time. File name handling is | |
577 | +naive in the sense that there is no mechanism for preserving original | |
578 | +file names, permissions, ownerships or dates in filesystems which lack | |
579 | +these concepts, or have serious file name length restrictions, such as | |
580 | +MS-DOS. | |
581 | + | |
582 | +.I bzip2 | |
583 | +and | |
584 | +.I bunzip2 | |
585 | +will by default not overwrite existing | |
586 | +files. If you want this to happen, specify the \-f flag. | |
587 | + | |
588 | +If no file names are specified, | |
589 | +.I bzip2 | |
590 | +compresses from standard | |
591 | +input to standard output. In this case, | |
592 | +.I bzip2 | |
593 | +will decline to | |
594 | +write compressed output to a terminal, as this would be entirely | |
595 | +incomprehensible and therefore pointless. | |
596 | + | |
597 | +.I bunzip2 | |
598 | +(or | |
599 | +.I bzip2 \-d) | |
600 | +decompresses all | |
601 | +specified files. Files which were not created by | |
602 | +.I bzip2 | |
603 | +will be detected and ignored, and a warning issued. | |
604 | +.I bzip2 | |
605 | +attempts to guess the filename for the decompressed file | |
606 | +from that of the compressed file as follows: | |
607 | + | |
608 | + filename.bz2 becomes filename | |
609 | + filename.bz becomes filename | |
610 | + filename.tbz2 becomes filename.tar | |
611 | + filename.tbz becomes filename.tar | |
612 | + anyothername becomes anyothername.out | |
613 | + | |
614 | +If the file does not end in one of the recognised endings, | |
615 | +.I .bz2, | |
616 | +.I .bz, | |
617 | +.I .tbz2 | |
618 | +or | |
619 | +.I .tbz, | |
620 | +.I bzip2 | |
621 | +complains that it cannot | |
622 | +guess the name of the original file, and uses the original name | |
623 | +with | |
624 | +.I .out | |
625 | +appended. | |
626 | + | |
627 | +As with compression, supplying no | |
628 | +filenames causes decompression from | |
629 | +standard input to standard output. | |
630 | + | |
631 | +.I bunzip2 | |
632 | +will correctly decompress a file which is the | |
633 | +concatenation of two or more compressed files. The result is the | |
634 | +concatenation of the corresponding uncompressed files. Integrity | |
635 | +testing (\-t) | |
636 | +of concatenated | |
637 | +compressed files is also supported. | |
638 | + | |
639 | +You can also compress or decompress files to the standard output by | |
640 | +giving the \-c flag. Multiple files may be compressed and | |
641 | +decompressed like this. The resulting outputs are fed sequentially to | |
642 | +stdout. Compression of multiple files | |
643 | +in this manner generates a stream | |
644 | +containing multiple compressed file representations. Such a stream | |
645 | +can be decompressed correctly only by | |
646 | +.I bzip2 | |
647 | +version 0.9.0 or | |
648 | +later. Earlier versions of | |
649 | +.I bzip2 | |
650 | +will stop after decompressing | |
651 | +the first file in the stream. | |
652 | + | |
653 | +.I bzcat | |
654 | +(or | |
655 | +.I bzip2 -dc) | |
656 | +decompresses all specified files to | |
657 | +the standard output. | |
658 | + | |
659 | +.I bzip2 | |
660 | +will read arguments from the environment variables | |
661 | +.I BZIP2 | |
662 | +and | |
663 | +.I BZIP, | |
664 | +in that order, and will process them | |
665 | +before any arguments read from the command line. This gives a | |
666 | +convenient way to supply default arguments. | |
667 | + | |
668 | +Compression is always performed, even if the compressed | |
669 | +file is slightly | |
670 | +larger than the original. Files of less than about one hundred bytes | |
671 | +tend to get larger, since the compression mechanism has a constant | |
672 | +overhead in the region of 50 bytes. Random data (including the output | |
673 | +of most file compressors) is coded at about 8.05 bits per byte, giving | |
674 | +an expansion of around 0.5%. | |
675 | + | |
676 | +As a self-check for your protection, | |
677 | +.I | |
678 | +bzip2 | |
679 | +uses 32-bit CRCs to | |
680 | +make sure that the decompressed version of a file is identical to the | |
681 | +original. This guards against corruption of the compressed data, and | |
682 | +against undetected bugs in | |
683 | +.I bzip2 | |
684 | +(hopefully very unlikely). The | |
685 | +chances of data corruption going undetected is microscopic, about one | |
686 | +chance in four billion for each file processed. Be aware, though, that | |
687 | +the check occurs upon decompression, so it can only tell you that | |
688 | +something is wrong. It can't help you | |
689 | +recover the original uncompressed | |
690 | +data. You can use | |
691 | +.I bzip2recover | |
692 | +to try to recover data from | |
693 | +damaged files. | |
694 | + | |
695 | +Return values: 0 for a normal exit, 1 for environmental problems (file | |
696 | +not found, invalid flags, I/O errors, &c), 2 to indicate a corrupt | |
697 | +compressed file, 3 for an internal consistency error (eg, bug) which | |
698 | +caused | |
699 | +.I bzip2 | |
700 | +to panic. | |
701 | + | |
702 | +.SH OPTIONS | |
703 | +.TP | |
704 | +.B \-c --stdout | |
705 | +Compress or decompress to standard output. | |
706 | +.TP | |
707 | +.B \-d --decompress | |
708 | +Force decompression. | |
709 | +.I bzip2, | |
710 | +.I bunzip2 | |
711 | +and | |
712 | +.I bzcat | |
713 | +are | |
714 | +really the same program, and the decision about what actions to take is | |
715 | +done on the basis of which name is used. This flag overrides that | |
716 | +mechanism, and forces | |
717 | +.I bzip2 | |
718 | +to decompress. | |
719 | +.TP | |
720 | +.B \-z --compress | |
721 | +The complement to \-d: forces compression, regardless of the | |
15ee0650 | 722 | +invocation name. |
d967e3ec | 723 | +.TP |
724 | +.B \-t --test | |
725 | +Check integrity of the specified file(s), but don't decompress them. | |
726 | +This really performs a trial decompression and throws away the result. | |
727 | +.TP | |
728 | +.B \-f --force | |
729 | +Force overwrite of output files. Normally, | |
730 | +.I bzip2 | |
731 | +will not overwrite | |
732 | +existing output files. Also forces | |
733 | +.I bzip2 | |
734 | +to break hard links | |
735 | +to files, which it otherwise wouldn't do. | |
15ee0650 | 736 | + |
737 | +bzip2 normally declines to decompress files which don't have the | |
738 | +correct magic header bytes. If forced (-f), however, it will pass | |
739 | +such files through unmodified. This is how GNU gzip behaves. | |
d967e3ec | 740 | +.TP |
741 | +.B \-k --keep | |
742 | +Keep (don't delete) input files during compression | |
743 | +or decompression. | |
744 | +.TP | |
745 | +.B \-s --small | |
746 | +Reduce memory usage, for compression, decompression and testing. Files | |
747 | +are decompressed and tested using a modified algorithm which only | |
748 | +requires 2.5 bytes per block byte. This means any file can be | |
749 | +decompressed in 2300k of memory, albeit at about half the normal speed. | |
750 | + | |
751 | +During compression, \-s selects a block size of 200k, which limits | |
752 | +memory use to around the same figure, at the expense of your compression | |
753 | +ratio. In short, if your machine is low on memory (8 megabytes or | |
754 | +less), use \-s for everything. See MEMORY MANAGEMENT below. | |
755 | +.TP | |
756 | +.B \-q --quiet | |
757 | +Suppress non-essential warning messages. Messages pertaining to | |
758 | +I/O errors and other critical events will not be suppressed. | |
759 | +.TP | |
760 | +.B \-v --verbose | |
761 | +Verbose mode -- show the compression ratio for each file processed. | |
762 | +Further \-v's increase the verbosity level, spewing out lots of | |
763 | +information which is primarily of interest for diagnostic purposes. | |
764 | +.TP | |
765 | +.B \-L --license -V --version | |
766 | +Display the software version, license terms and conditions. | |
767 | +.TP | |
15ee0650 | 768 | +.B \-1 (or \-\-fast) to \-9 (or \-\-best) |
d967e3ec | 769 | +Set the block size to 100 k, 200 k .. 900 k when compressing. Has no |
770 | +effect when decompressing. See MEMORY MANAGEMENT below. | |
15ee0650 | 771 | +The \-\-fast and \-\-best aliases are primarily for GNU gzip |
772 | +compatibility. In particular, \-\-fast doesn't make things | |
773 | +significantly faster. | |
774 | +And \-\-best merely selects the default behaviour. | |
d967e3ec | 775 | +.TP |
776 | +.B \-- | |
777 | +Treats all subsequent arguments as file names, even if they start | |
778 | +with a dash. This is so you can handle files with names beginning | |
779 | +with a dash, for example: bzip2 \-- \-myfilename. | |
780 | +.TP | |
781 | +.B \--repetitive-fast --repetitive-best | |
782 | +These flags are redundant in versions 0.9.5 and above. They provided | |
783 | +some coarse control over the behaviour of the sorting algorithm in | |
784 | +earlier versions, which was sometimes useful. 0.9.5 and above have an | |
785 | +improved algorithm which renders these flags irrelevant. | |
786 | + | |
787 | +.SH MEMORY MANAGEMENT | |
788 | +.I bzip2 | |
789 | +compresses large files in blocks. The block size affects | |
790 | +both the compression ratio achieved, and the amount of memory needed for | |
791 | +compression and decompression. The flags \-1 through \-9 | |
792 | +specify the block size to be 100,000 bytes through 900,000 bytes (the | |
793 | +default) respectively. At decompression time, the block size used for | |
794 | +compression is read from the header of the compressed file, and | |
795 | +.I bunzip2 | |
796 | +then allocates itself just enough memory to decompress | |
797 | +the file. Since block sizes are stored in compressed files, it follows | |
798 | +that the flags \-1 to \-9 are irrelevant to and so ignored | |
799 | +during decompression. | |
800 | + | |
801 | +Compression and decompression requirements, | |
802 | +in bytes, can be estimated as: | |
803 | + | |
804 | + Compression: 400k + ( 8 x block size ) | |
805 | + | |
806 | + Decompression: 100k + ( 4 x block size ), or | |
807 | + 100k + ( 2.5 x block size ) | |
808 | + | |
809 | +Larger block sizes give rapidly diminishing marginal returns. Most of | |
810 | +the compression comes from the first two or three hundred k of block | |
811 | +size, a fact worth bearing in mind when using | |
812 | +.I bzip2 | |
813 | +on small machines. | |
814 | +It is also important to appreciate that the decompression memory | |
815 | +requirement is set at compression time by the choice of block size. | |
816 | + | |
817 | +For files compressed with the default 900k block size, | |
818 | +.I bunzip2 | |
819 | +will require about 3700 kbytes to decompress. To support decompression | |
820 | +of any file on a 4 megabyte machine, | |
821 | +.I bunzip2 | |
822 | +has an option to | |
823 | +decompress using approximately half this amount of memory, about 2300 | |
824 | +kbytes. Decompression speed is also halved, so you should use this | |
825 | +option only where necessary. The relevant flag is -s. | |
826 | + | |
827 | +In general, try and use the largest block size memory constraints allow, | |
828 | +since that maximises the compression achieved. Compression and | |
829 | +decompression speed are virtually unaffected by block size. | |
830 | + | |
831 | +Another significant point applies to files which fit in a single block | |
832 | +-- that means most files you'd encounter using a large block size. The | |
833 | +amount of real memory touched is proportional to the size of the file, | |
834 | +since the file is smaller than a block. For example, compressing a file | |
835 | +20,000 bytes long with the flag -9 will cause the compressor to | |
836 | +allocate around 7600k of memory, but only touch 400k + 20000 * 8 = 560 | |
837 | +kbytes of it. Similarly, the decompressor will allocate 3700k but only | |
838 | +touch 100k + 20000 * 4 = 180 kbytes. | |
839 | + | |
840 | +Here is a table which summarises the maximum memory usage for different | |
841 | +block sizes. Also recorded is the total compressed size for 14 files of | |
842 | +the Calgary Text Compression Corpus totalling 3,141,622 bytes. This | |
843 | +column gives some feel for how compression varies with block size. | |
844 | +These figures tend to understate the advantage of larger block sizes for | |
845 | +larger files, since the Corpus is dominated by smaller files. | |
846 | + | |
847 | + Compress Decompress Decompress Corpus | |
848 | + Flag usage usage -s usage Size | |
849 | + | |
850 | + -1 1200k 500k 350k 914704 | |
851 | + -2 2000k 900k 600k 877703 | |
852 | + -3 2800k 1300k 850k 860338 | |
853 | + -4 3600k 1700k 1100k 846899 | |
854 | + -5 4400k 2100k 1350k 845160 | |
855 | + -6 5200k 2500k 1600k 838626 | |
856 | + -7 6100k 2900k 1850k 834096 | |
857 | + -8 6800k 3300k 2100k 828642 | |
858 | + -9 7600k 3700k 2350k 828642 | |
859 | + | |
860 | +.SH RECOVERING DATA FROM DAMAGED FILES | |
861 | +.I bzip2 | |
862 | +compresses files in blocks, usually 900kbytes long. Each | |
863 | +block is handled independently. If a media or transmission error causes | |
864 | +a multi-block .bz2 | |
865 | +file to become damaged, it may be possible to | |
866 | +recover data from the undamaged blocks in the file. | |
867 | + | |
868 | +The compressed representation of each block is delimited by a 48-bit | |
869 | +pattern, which makes it possible to find the block boundaries with | |
870 | +reasonable certainty. Each block also carries its own 32-bit CRC, so | |
871 | +damaged blocks can be distinguished from undamaged ones. | |
872 | + | |
873 | +.I bzip2recover | |
874 | +is a simple program whose purpose is to search for | |
875 | +blocks in .bz2 files, and write each block out into its own .bz2 | |
876 | +file. You can then use | |
877 | +.I bzip2 | |
878 | +\-t | |
879 | +to test the | |
880 | +integrity of the resulting files, and decompress those which are | |
881 | +undamaged. | |
882 | + | |
883 | +.I bzip2recover | |
884 | +takes a single argument, the name of the damaged file, | |
15ee0650 | 885 | +and writes a number of files "rec00001file.bz2", |
886 | +"rec00002file.bz2", etc, containing the extracted blocks. | |
d967e3ec | 887 | +The output filenames are designed so that the use of |
888 | +wildcards in subsequent processing -- for example, | |
15ee0650 | 889 | +"bzip2 -dc rec*file.bz2 > recovered_data" -- processes the files in |
d967e3ec | 890 | +the correct order. |
891 | + | |
892 | +.I bzip2recover | |
893 | +should be of most use dealing with large .bz2 | |
894 | +files, as these will contain many blocks. It is clearly | |
895 | +futile to use it on damaged single-block files, since a | |
896 | +damaged block cannot be recovered. If you wish to minimise | |
897 | +any potential data loss through media or transmission errors, | |
898 | +you might consider compressing with a smaller | |
899 | +block size. | |
900 | + | |
901 | +.SH PERFORMANCE NOTES | |
902 | +The sorting phase of compression gathers together similar strings in the | |
903 | +file. Because of this, files containing very long runs of repeated | |
904 | +symbols, like "aabaabaabaab ..." (repeated several hundred times) may | |
905 | +compress more slowly than normal. Versions 0.9.5 and above fare much | |
906 | +better than previous versions in this respect. The ratio between | |
907 | +worst-case and average-case compression time is in the region of 10:1. | |
908 | +For previous versions, this figure was more like 100:1. You can use the | |
909 | +\-vvvv option to monitor progress in great detail, if you want. | |
910 | + | |
911 | +Decompression speed is unaffected by these phenomena. | |
912 | + | |
913 | +.I bzip2 | |
914 | +usually allocates several megabytes of memory to operate | |
915 | +in, and then charges all over it in a fairly random fashion. This means | |
916 | +that performance, both for compressing and decompressing, is largely | |
917 | +determined by the speed at which your machine can service cache misses. | |
918 | +Because of this, small changes to the code to reduce the miss rate have | |
919 | +been observed to give disproportionately large performance improvements. | |
920 | +I imagine | |
921 | +.I bzip2 | |
922 | +will perform best on machines with very large caches. | |
923 | + | |
924 | +.SH CAVEATS | |
925 | +I/O error messages are not as helpful as they could be. | |
926 | +.I bzip2 | |
927 | +tries hard to detect I/O errors and exit cleanly, but the details of | |
928 | +what the problem is sometimes seem rather misleading. | |
929 | + | |
15ee0650 | 930 | +This manual page pertains to version 1.0.2 of |
d967e3ec | 931 | +.I bzip2. |
15ee0650 | 932 | +Compressed data created by this version is entirely forwards and |
933 | +backwards compatible with the previous public releases, versions | |
934 | +0.1pl2, 0.9.0, 0.9.5, 1.0.0 and 1.0.1, but with the following | |
935 | +exception: 0.9.0 and above can correctly decompress multiple | |
936 | +concatenated compressed files. 0.1pl2 cannot do this; it will stop | |
937 | +after decompressing just the first file in the stream. | |
d967e3ec | 938 | + |
939 | +.I bzip2recover | |
15ee0650 | 940 | +versions prior to this one, 1.0.2, used 32-bit integers to represent |
941 | +bit positions in compressed files, so it could not handle compressed | |
942 | +files more than 512 megabytes long. Version 1.0.2 and above uses | |
943 | +64-bit ints on some platforms which support them (GNU supported | |
944 | +targets, and Windows). To establish whether or not bzip2recover was | |
945 | +built with such a limitation, run it without arguments. In any event | |
946 | +you can build yourself an unlimited version if you can recompile it | |
947 | +with MaybeUInt64 set to be an unsigned 64-bit integer. | |
948 | + | |
949 | + | |
d967e3ec | 950 | + |
951 | +.SH AUTHOR | |
952 | +Julian Seward, jseward@acm.org. | |
953 | + | |
15ee0650 | 954 | +http://sources.redhat.com/bzip2 |
d967e3ec | 955 | + |
956 | +The ideas embodied in | |
957 | +.I bzip2 | |
958 | +are due to (at least) the following | |
959 | +people: Michael Burrows and David Wheeler (for the block sorting | |
960 | +transformation), David Wheeler (again, for the Huffman coder), Peter | |
961 | +Fenwick (for the structured coding model in the original | |
962 | +.I bzip, | |
963 | +and many refinements), and Alistair Moffat, Radford Neal and Ian Witten | |
964 | +(for the arithmetic coder in the original | |
965 | +.I bzip). | |
966 | +I am much | |
967 | +indebted for their help, support and advice. See the manual in the | |
968 | +source distribution for pointers to sources of documentation. Christian | |
969 | +von Roques encouraged me to look for faster sorting algorithms, so as to | |
970 | +speed up compression. Bela Lubkin encouraged me to improve the | |
15ee0650 | 971 | +worst-case compression performance. |
972 | +The bz* scripts are derived from those of GNU gzip. | |
973 | +Many people sent patches, helped | |
d967e3ec | 974 | +with portability problems, lent machines, gave advice and were generally |
975 | +helpful. | |
15ee0650 | 976 | diff -Nru bzip2-1.0.2/doc/bzip2.texi bzip2-1.0.2.new/doc/bzip2.texi |
977 | --- bzip2-1.0.2/doc/bzip2.texi Thu Jan 1 01:00:00 1970 | |
978 | +++ bzip2-1.0.2.new/doc/bzip2.texi Fri Feb 1 04:26:21 2002 | |
979 | @@ -0,0 +1,2234 @@ | |
d967e3ec | 980 | +\input texinfo @c -*- Texinfo -*- |
981 | +@setfilename bzip2.info | |
982 | + | |
983 | +@ignore | |
15ee0650 | 984 | +This file documents bzip2 version 1.0.2, and associated library |
d967e3ec | 985 | +libbzip2, written by Julian Seward (jseward@acm.org). |
986 | + | |
15ee0650 | 987 | +Copyright (C) 1996-2002 Julian R Seward |
d967e3ec | 988 | + |
989 | +Permission is granted to make and distribute verbatim copies of | |
990 | +this manual provided the copyright notice and this permission notice | |
991 | +are preserved on all copies. | |
992 | + | |
993 | +Permission is granted to copy and distribute translations of this manual | |
994 | +into another language, under the above conditions for verbatim copies. | |
995 | +@end ignore | |
996 | + | |
d967e3ec | 997 | +@iftex |
998 | +@c @finalout | |
999 | +@settitle bzip2 and libbzip2 | |
1000 | +@titlepage | |
1001 | +@title bzip2 and libbzip2 | |
1002 | +@subtitle a program and library for data compression | |
15ee0650 | 1003 | +@subtitle copyright (C) 1996-2002 Julian Seward |
1004 | +@subtitle version 1.0.2 of 30 December 2001 | |
d967e3ec | 1005 | +@author Julian Seward |
1006 | + | |
1007 | +@end titlepage | |
1008 | + | |
1009 | +@parindent 0mm | |
1010 | +@parskip 2mm | |
1011 | + | |
1012 | +@end iftex | |
15ee0650 | 1013 | +@node Top,,, (dir) |
1014 | + | |
1015 | +The following text is the License for this software. You should | |
1016 | +find it identical to that contained in the file LICENSE in the | |
1017 | +source distribution. | |
d967e3ec | 1018 | + |
15ee0650 | 1019 | +------------------ START OF THE LICENSE ------------------ |
d967e3ec | 1020 | + |
1021 | +This program, @code{bzip2}, | |
1022 | +and associated library @code{libbzip2}, are | |
15ee0650 | 1023 | +Copyright (C) 1996-2002 Julian R Seward. All rights reserved. |
d967e3ec | 1024 | + |
1025 | +Redistribution and use in source and binary forms, with or without | |
1026 | +modification, are permitted provided that the following conditions | |
1027 | +are met: | |
1028 | +@itemize @bullet | |
1029 | +@item | |
1030 | + Redistributions of source code must retain the above copyright | |
1031 | + notice, this list of conditions and the following disclaimer. | |
1032 | +@item | |
1033 | + The origin of this software must not be misrepresented; you must | |
1034 | + not claim that you wrote the original software. If you use this | |
1035 | + software in a product, an acknowledgment in the product | |
1036 | + documentation would be appreciated but is not required. | |
1037 | +@item | |
1038 | + Altered source versions must be plainly marked as such, and must | |
1039 | + not be misrepresented as being the original software. | |
1040 | +@item | |
1041 | + The name of the author may not be used to endorse or promote | |
1042 | + products derived from this software without specific prior written | |
1043 | + permission. | |
1044 | +@end itemize | |
1045 | +THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS | |
1046 | +OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED | |
1047 | +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
1048 | +ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY | |
1049 | +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
1050 | +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE | |
1051 | +GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | |
1052 | +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, | |
1053 | +WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING | |
1054 | +NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS | |
1055 | +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
1056 | + | |
1057 | +Julian Seward, Cambridge, UK. | |
1058 | + | |
1059 | +@code{jseward@@acm.org} | |
1060 | + | |
15ee0650 | 1061 | +@code{bzip2}/@code{libbzip2} version 1.0.2 of 30 December 2001. |
d967e3ec | 1062 | + |
15ee0650 | 1063 | +------------------ END OF THE LICENSE ------------------ |
1064 | + | |
1065 | +Web sites: | |
d967e3ec | 1066 | + |
15ee0650 | 1067 | +@code{http://sources.redhat.com/bzip2} |
d967e3ec | 1068 | + |
15ee0650 | 1069 | +@code{http://www.cacheprof.org} |
d967e3ec | 1070 | + |
1071 | +PATENTS: To the best of my knowledge, @code{bzip2} does not use any patented | |
1072 | +algorithms. However, I do not have the resources available to carry out | |
1073 | +a full patent search. Therefore I cannot give any guarantee of the | |
1074 | +above statement. | |
1075 | + | |
1076 | + | |
1077 | + | |
1078 | + | |
1079 | + | |
1080 | + | |
1081 | + | |
d967e3ec | 1082 | +@chapter Introduction |
1083 | + | |
1084 | +@code{bzip2} compresses files using the Burrows-Wheeler | |
1085 | +block-sorting text compression algorithm, and Huffman coding. | |
1086 | +Compression is generally considerably better than that | |
1087 | +achieved by more conventional LZ77/LZ78-based compressors, | |
1088 | +and approaches the performance of the PPM family of statistical compressors. | |
1089 | + | |
1090 | +@code{bzip2} is built on top of @code{libbzip2}, a flexible library | |
1091 | +for handling compressed data in the @code{bzip2} format. This manual | |
1092 | +describes both how to use the program and | |
1093 | +how to work with the library interface. Most of the | |
1094 | +manual is devoted to this library, not the program, | |
1095 | +which is good news if your interest is only in the program. | |
1096 | + | |
1097 | +Chapter 2 describes how to use @code{bzip2}; this is the only part | |
1098 | +you need to read if you just want to know how to operate the program. | |
1099 | +Chapter 3 describes the programming interfaces in detail, and | |
1100 | +Chapter 4 records some miscellaneous notes which I thought | |
1101 | +ought to be recorded somewhere. | |
1102 | + | |
1103 | + | |
1104 | +@chapter How to use @code{bzip2} | |
1105 | + | |
1106 | +This chapter contains a copy of the @code{bzip2} man page, | |
1107 | +and nothing else. | |
1108 | + | |
1109 | +@quotation | |
1110 | + | |
1111 | +@unnumberedsubsubsec NAME | |
1112 | +@itemize | |
1113 | +@item @code{bzip2}, @code{bunzip2} | |
15ee0650 | 1114 | +- a block-sorting file compressor, v1.0.2 |
d967e3ec | 1115 | +@item @code{bzcat} |
1116 | +- decompresses files to stdout | |
1117 | +@item @code{bzip2recover} | |
1118 | +- recovers data from damaged bzip2 files | |
1119 | +@end itemize | |
1120 | + | |
1121 | +@unnumberedsubsubsec SYNOPSIS | |
1122 | +@itemize | |
1123 | +@item @code{bzip2} [ -cdfkqstvzVL123456789 ] [ filenames ... ] | |
1124 | +@item @code{bunzip2} [ -fkvsVL ] [ filenames ... ] | |
1125 | +@item @code{bzcat} [ -s ] [ filenames ... ] | |
1126 | +@item @code{bzip2recover} filename | |
1127 | +@end itemize | |
1128 | + | |
1129 | +@unnumberedsubsubsec DESCRIPTION | |
1130 | + | |
1131 | +@code{bzip2} compresses files using the Burrows-Wheeler block sorting | |
1132 | +text compression algorithm, and Huffman coding. Compression is | |
1133 | +generally considerably better than that achieved by more conventional | |
1134 | +LZ77/LZ78-based compressors, and approaches the performance of the PPM | |
1135 | +family of statistical compressors. | |
1136 | + | |
1137 | +The command-line options are deliberately very similar to those of GNU | |
1138 | +@code{gzip}, but they are not identical. | |
1139 | + | |
1140 | +@code{bzip2} expects a list of file names to accompany the command-line | |
1141 | +flags. Each file is replaced by a compressed version of itself, with | |
1142 | +the name @code{original_name.bz2}. Each compressed file has the same | |
1143 | +modification date, permissions, and, when possible, ownership as the | |
1144 | +corresponding original, so that these properties can be correctly | |
1145 | +restored at decompression time. File name handling is naive in the | |
1146 | +sense that there is no mechanism for preserving original file names, | |
1147 | +permissions, ownerships or dates in filesystems which lack these | |
1148 | +concepts, or have serious file name length restrictions, such as MS-DOS. | |
1149 | + | |
1150 | +@code{bzip2} and @code{bunzip2} will by default not overwrite existing | |
1151 | +files. If you want this to happen, specify the @code{-f} flag. | |
1152 | + | |
1153 | +If no file names are specified, @code{bzip2} compresses from standard | |
1154 | +input to standard output. In this case, @code{bzip2} will decline to | |
1155 | +write compressed output to a terminal, as this would be entirely | |
1156 | +incomprehensible and therefore pointless. | |
1157 | + | |
1158 | +@code{bunzip2} (or @code{bzip2 -d}) decompresses all | |
1159 | +specified files. Files which were not created by @code{bzip2} | |
1160 | +will be detected and ignored, and a warning issued. | |
1161 | +@code{bzip2} attempts to guess the filename for the decompressed file | |
1162 | +from that of the compressed file as follows: | |
1163 | +@itemize | |
1164 | +@item @code{filename.bz2 } becomes @code{filename} | |
1165 | +@item @code{filename.bz } becomes @code{filename} | |
1166 | +@item @code{filename.tbz2} becomes @code{filename.tar} | |
1167 | +@item @code{filename.tbz } becomes @code{filename.tar} | |
1168 | +@item @code{anyothername } becomes @code{anyothername.out} | |
1169 | +@end itemize | |
1170 | +If the file does not end in one of the recognised endings, | |
1171 | +@code{.bz2}, @code{.bz}, | |
1172 | +@code{.tbz2} or @code{.tbz}, @code{bzip2} complains that it cannot | |
1173 | +guess the name of the original file, and uses the original name | |
1174 | +with @code{.out} appended. | |
1175 | + | |
1176 | +As with compression, supplying no | |
1177 | +filenames causes decompression from standard input to standard output. | |
1178 | + | |
1179 | +@code{bunzip2} will correctly decompress a file which is the | |
1180 | +concatenation of two or more compressed files. The result is the | |
1181 | +concatenation of the corresponding uncompressed files. Integrity | |
1182 | +testing (@code{-t}) of concatenated compressed files is also supported. | |
1183 | + | |
1184 | +You can also compress or decompress files to the standard output by | |
1185 | +giving the @code{-c} flag. Multiple files may be compressed and | |
1186 | +decompressed like this. The resulting outputs are fed sequentially to | |
1187 | +stdout. Compression of multiple files in this manner generates a stream | |
1188 | +containing multiple compressed file representations. Such a stream | |
1189 | +can be decompressed correctly only by @code{bzip2} version 0.9.0 or | |
1190 | +later. Earlier versions of @code{bzip2} will stop after decompressing | |
1191 | +the first file in the stream. | |
1192 | + | |
1193 | +@code{bzcat} (or @code{bzip2 -dc}) decompresses all specified files to | |
1194 | +the standard output. | |
1195 | + | |
1196 | +@code{bzip2} will read arguments from the environment variables | |
1197 | +@code{BZIP2} and @code{BZIP}, in that order, and will process them | |
1198 | +before any arguments read from the command line. This gives a | |
1199 | +convenient way to supply default arguments. | |
1200 | + | |
1201 | +Compression is always performed, even if the compressed file is slightly | |
1202 | +larger than the original. Files of less than about one hundred bytes | |
1203 | +tend to get larger, since the compression mechanism has a constant | |
1204 | +overhead in the region of 50 bytes. Random data (including the output | |
1205 | +of most file compressors) is coded at about 8.05 bits per byte, giving | |
1206 | +an expansion of around 0.5%. | |
1207 | + | |
1208 | +As a self-check for your protection, @code{bzip2} uses 32-bit CRCs to | |
1209 | +make sure that the decompressed version of a file is identical to the | |
1210 | +original. This guards against corruption of the compressed data, and | |
1211 | +against undetected bugs in @code{bzip2} (hopefully very unlikely). The | |
1212 | +chances of data corruption going undetected is microscopic, about one | |
1213 | +chance in four billion for each file processed. Be aware, though, that | |
1214 | +the check occurs upon decompression, so it can only tell you that | |
1215 | +something is wrong. It can't help you recover the original uncompressed | |
1216 | +data. You can use @code{bzip2recover} to try to recover data from | |
1217 | +damaged files. | |
1218 | + | |
1219 | +Return values: 0 for a normal exit, 1 for environmental problems (file | |
1220 | +not found, invalid flags, I/O errors, &c), 2 to indicate a corrupt | |
1221 | +compressed file, 3 for an internal consistency error (eg, bug) which | |
1222 | +caused @code{bzip2} to panic. | |
1223 | + | |
1224 | + | |
1225 | +@unnumberedsubsubsec OPTIONS | |
1226 | +@table @code | |
1227 | +@item -c --stdout | |
1228 | +Compress or decompress to standard output. | |
1229 | +@item -d --decompress | |
1230 | +Force decompression. @code{bzip2}, @code{bunzip2} and @code{bzcat} are | |
1231 | +really the same program, and the decision about what actions to take is | |
1232 | +done on the basis of which name is used. This flag overrides that | |
1233 | +mechanism, and forces bzip2 to decompress. | |
1234 | +@item -z --compress | |
1235 | +The complement to @code{-d}: forces compression, regardless of the | |
1236 | +invokation name. | |
1237 | +@item -t --test | |
1238 | +Check integrity of the specified file(s), but don't decompress them. | |
1239 | +This really performs a trial decompression and throws away the result. | |
1240 | +@item -f --force | |
1241 | +Force overwrite of output files. Normally, @code{bzip2} will not overwrite | |
1242 | +existing output files. Also forces @code{bzip2} to break hard links | |
1243 | +to files, which it otherwise wouldn't do. | |
15ee0650 | 1244 | + |
1245 | +@code{bzip2} normally declines to decompress files which don't have the | |
1246 | +correct magic header bytes. If forced (@code{-f}), however, it will | |
1247 | +pass such files through unmodified. This is how GNU @code{gzip} | |
1248 | +behaves. | |
d967e3ec | 1249 | +@item -k --keep |
1250 | +Keep (don't delete) input files during compression | |
1251 | +or decompression. | |
1252 | +@item -s --small | |
1253 | +Reduce memory usage, for compression, decompression and testing. Files | |
1254 | +are decompressed and tested using a modified algorithm which only | |
1255 | +requires 2.5 bytes per block byte. This means any file can be | |
1256 | +decompressed in 2300k of memory, albeit at about half the normal speed. | |
1257 | + | |
1258 | +During compression, @code{-s} selects a block size of 200k, which limits | |
1259 | +memory use to around the same figure, at the expense of your compression | |
1260 | +ratio. In short, if your machine is low on memory (8 megabytes or | |
1261 | +less), use -s for everything. See MEMORY MANAGEMENT below. | |
1262 | +@item -q --quiet | |
1263 | +Suppress non-essential warning messages. Messages pertaining to | |
1264 | +I/O errors and other critical events will not be suppressed. | |
1265 | +@item -v --verbose | |
1266 | +Verbose mode -- show the compression ratio for each file processed. | |
1267 | +Further @code{-v}'s increase the verbosity level, spewing out lots of | |
1268 | +information which is primarily of interest for diagnostic purposes. | |
1269 | +@item -L --license -V --version | |
1270 | +Display the software version, license terms and conditions. | |
15ee0650 | 1271 | +@item -1 (or --fast) to -9 (or --best) |
d967e3ec | 1272 | +Set the block size to 100 k, 200 k .. 900 k when compressing. Has no |
1273 | +effect when decompressing. See MEMORY MANAGEMENT below. | |
15ee0650 | 1274 | +The @code{--fast} and @code{--best} aliases are primarily for GNU |
1275 | +@code{gzip} compatibility. In particular, @code{--fast} doesn't make | |
1276 | +things significantly faster. And @code{--best} merely selects the | |
1277 | +default behaviour. | |
d967e3ec | 1278 | +@item -- |
1279 | +Treats all subsequent arguments as file names, even if they start | |
1280 | +with a dash. This is so you can handle files with names beginning | |
1281 | +with a dash, for example: @code{bzip2 -- -myfilename}. | |
1282 | +@item --repetitive-fast | |
1283 | +@item --repetitive-best | |
1284 | +These flags are redundant in versions 0.9.5 and above. They provided | |
1285 | +some coarse control over the behaviour of the sorting algorithm in | |
1286 | +earlier versions, which was sometimes useful. 0.9.5 and above have an | |
1287 | +improved algorithm which renders these flags irrelevant. | |
1288 | +@end table | |
1289 | + | |
1290 | + | |
1291 | +@unnumberedsubsubsec MEMORY MANAGEMENT | |
1292 | + | |
1293 | +@code{bzip2} compresses large files in blocks. The block size affects | |
1294 | +both the compression ratio achieved, and the amount of memory needed for | |
1295 | +compression and decompression. The flags @code{-1} through @code{-9} | |
1296 | +specify the block size to be 100,000 bytes through 900,000 bytes (the | |
1297 | +default) respectively. At decompression time, the block size used for | |
1298 | +compression is read from the header of the compressed file, and | |
1299 | +@code{bunzip2} then allocates itself just enough memory to decompress | |
1300 | +the file. Since block sizes are stored in compressed files, it follows | |
1301 | +that the flags @code{-1} to @code{-9} are irrelevant to and so ignored | |
1302 | +during decompression. | |
1303 | + | |
1304 | +Compression and decompression requirements, in bytes, can be estimated | |
1305 | +as: | |
1306 | +@example | |
1307 | + Compression: 400k + ( 8 x block size ) | |
1308 | + | |
1309 | + Decompression: 100k + ( 4 x block size ), or | |
1310 | + 100k + ( 2.5 x block size ) | |
1311 | +@end example | |
1312 | +Larger block sizes give rapidly diminishing marginal returns. Most of | |
1313 | +the compression comes from the first two or three hundred k of block | |
1314 | +size, a fact worth bearing in mind when using @code{bzip2} on small machines. | |
1315 | +It is also important to appreciate that the decompression memory | |
1316 | +requirement is set at compression time by the choice of block size. | |
1317 | + | |
1318 | +For files compressed with the default 900k block size, @code{bunzip2} | |
1319 | +will require about 3700 kbytes to decompress. To support decompression | |
1320 | +of any file on a 4 megabyte machine, @code{bunzip2} has an option to | |
1321 | +decompress using approximately half this amount of memory, about 2300 | |
1322 | +kbytes. Decompression speed is also halved, so you should use this | |
1323 | +option only where necessary. The relevant flag is @code{-s}. | |
1324 | + | |
1325 | +In general, try and use the largest block size memory constraints allow, | |
1326 | +since that maximises the compression achieved. Compression and | |
1327 | +decompression speed are virtually unaffected by block size. | |
1328 | + | |
1329 | +Another significant point applies to files which fit in a single block | |
1330 | +-- that means most files you'd encounter using a large block size. The | |
1331 | +amount of real memory touched is proportional to the size of the file, | |
1332 | +since the file is smaller than a block. For example, compressing a file | |
1333 | +20,000 bytes long with the flag @code{-9} will cause the compressor to | |
1334 | +allocate around 7600k of memory, but only touch 400k + 20000 * 8 = 560 | |
1335 | +kbytes of it. Similarly, the decompressor will allocate 3700k but only | |
1336 | +touch 100k + 20000 * 4 = 180 kbytes. | |
1337 | + | |
1338 | +Here is a table which summarises the maximum memory usage for different | |
1339 | +block sizes. Also recorded is the total compressed size for 14 files of | |
1340 | +the Calgary Text Compression Corpus totalling 3,141,622 bytes. This | |
1341 | +column gives some feel for how compression varies with block size. | |
1342 | +These figures tend to understate the advantage of larger block sizes for | |
1343 | +larger files, since the Corpus is dominated by smaller files. | |
1344 | +@example | |
1345 | + Compress Decompress Decompress Corpus | |
1346 | + Flag usage usage -s usage Size | |
1347 | + | |
1348 | + -1 1200k 500k 350k 914704 | |
1349 | + -2 2000k 900k 600k 877703 | |
1350 | + -3 2800k 1300k 850k 860338 | |
1351 | + -4 3600k 1700k 1100k 846899 | |
1352 | + -5 4400k 2100k 1350k 845160 | |
1353 | + -6 5200k 2500k 1600k 838626 | |
1354 | + -7 6100k 2900k 1850k 834096 | |
1355 | + -8 6800k 3300k 2100k 828642 | |
1356 | + -9 7600k 3700k 2350k 828642 | |
1357 | +@end example | |
1358 | + | |
1359 | +@unnumberedsubsubsec RECOVERING DATA FROM DAMAGED FILES | |
1360 | + | |
1361 | +@code{bzip2} compresses files in blocks, usually 900kbytes long. Each | |
1362 | +block is handled independently. If a media or transmission error causes | |
1363 | +a multi-block @code{.bz2} file to become damaged, it may be possible to | |
1364 | +recover data from the undamaged blocks in the file. | |
1365 | + | |
1366 | +The compressed representation of each block is delimited by a 48-bit | |
1367 | +pattern, which makes it possible to find the block boundaries with | |
1368 | +reasonable certainty. Each block also carries its own 32-bit CRC, so | |
1369 | +damaged blocks can be distinguished from undamaged ones. | |
1370 | + | |
1371 | +@code{bzip2recover} is a simple program whose purpose is to search for | |
1372 | +blocks in @code{.bz2} files, and write each block out into its own | |
1373 | +@code{.bz2} file. You can then use @code{bzip2 -t} to test the | |
1374 | +integrity of the resulting files, and decompress those which are | |
1375 | +undamaged. | |
1376 | + | |
1377 | +@code{bzip2recover} | |
15ee0650 | 1378 | +takes a single argument, the name of the damaged file, and writes a |
1379 | +number of files @code{rec00001file.bz2}, @code{rec00002file.bz2}, etc, | |
1380 | +containing the extracted blocks. The output filenames are designed so | |
1381 | +that the use of wildcards in subsequent processing -- for example, | |
1382 | +@code{bzip2 -dc rec*file.bz2 > recovered_data} -- processes the files in | |
1383 | +the correct order. | |
d967e3ec | 1384 | + |
1385 | +@code{bzip2recover} should be of most use dealing with large @code{.bz2} | |
15ee0650 | 1386 | +files, as these will contain many blocks. It is clearly futile to use |
1387 | +it on damaged single-block files, since a damaged block cannot be | |
1388 | +recovered. If you wish to minimise any potential data loss through | |
1389 | +media or transmission errors, you might consider compressing with a | |
1390 | +smaller block size. | |
d967e3ec | 1391 | + |
1392 | + | |
1393 | +@unnumberedsubsubsec PERFORMANCE NOTES | |
1394 | + | |
1395 | +The sorting phase of compression gathers together similar strings in the | |
1396 | +file. Because of this, files containing very long runs of repeated | |
1397 | +symbols, like "aabaabaabaab ..." (repeated several hundred times) may | |
1398 | +compress more slowly than normal. Versions 0.9.5 and above fare much | |
1399 | +better than previous versions in this respect. The ratio between | |
1400 | +worst-case and average-case compression time is in the region of 10:1. | |
1401 | +For previous versions, this figure was more like 100:1. You can use the | |
1402 | +@code{-vvvv} option to monitor progress in great detail, if you want. | |
1403 | + | |
1404 | +Decompression speed is unaffected by these phenomena. | |
1405 | + | |
1406 | +@code{bzip2} usually allocates several megabytes of memory to operate | |
1407 | +in, and then charges all over it in a fairly random fashion. This means | |
1408 | +that performance, both for compressing and decompressing, is largely | |
1409 | +determined by the speed at which your machine can service cache misses. | |
1410 | +Because of this, small changes to the code to reduce the miss rate have | |
1411 | +been observed to give disproportionately large performance improvements. | |
1412 | +I imagine @code{bzip2} will perform best on machines with very large | |
1413 | +caches. | |
1414 | + | |
1415 | + | |
1416 | +@unnumberedsubsubsec CAVEATS | |
1417 | + | |
1418 | +I/O error messages are not as helpful as they could be. @code{bzip2} | |
1419 | +tries hard to detect I/O errors and exit cleanly, but the details of | |
1420 | +what the problem is sometimes seem rather misleading. | |
1421 | + | |
15ee0650 | 1422 | +This manual page pertains to version 1.0.2 of @code{bzip2}. Compressed |
d967e3ec | 1423 | +data created by this version is entirely forwards and backwards |
15ee0650 | 1424 | +compatible with the previous public releases, versions 0.1pl2, 0.9.0, |
1425 | +0.9.5, 1.0.0 and 1.0.1, but with the following exception: 0.9.0 and | |
1426 | +above can correctly decompress multiple concatenated compressed files. | |
1427 | +0.1pl2 cannot do this; it will stop after decompressing just the first | |
1428 | +file in the stream. | |
1429 | + | |
1430 | +@code{bzip2recover} versions prior to this one, 1.0.2, used 32-bit | |
1431 | +integers to represent bit positions in compressed files, so it could not | |
1432 | +handle compressed files more than 512 megabytes long. Version 1.0.2 and | |
1433 | +above uses 64-bit ints on some platforms which support them (GNU | |
1434 | +supported targets, and Windows). To establish whether or not | |
1435 | +@code{bzip2recover} was built with such a limitation, run it without | |
1436 | +arguments. In any event you can build yourself an unlimited version if | |
1437 | +you can recompile it with @code{MaybeUInt64} set to be an unsigned | |
1438 | +64-bit integer. | |
d967e3ec | 1439 | + |
d967e3ec | 1440 | + |
1441 | + | |
1442 | +@unnumberedsubsubsec AUTHOR | |
1443 | +Julian Seward, @code{jseward@@acm.org}. | |
1444 | + | |
15ee0650 | 1445 | +@code{http://sources.redhat.com/bzip2} |
1446 | + | |
d967e3ec | 1447 | +The ideas embodied in @code{bzip2} are due to (at least) the following |
1448 | +people: Michael Burrows and David Wheeler (for the block sorting | |
1449 | +transformation), David Wheeler (again, for the Huffman coder), Peter | |
1450 | +Fenwick (for the structured coding model in the original @code{bzip}, | |
1451 | +and many refinements), and Alistair Moffat, Radford Neal and Ian Witten | |
1452 | +(for the arithmetic coder in the original @code{bzip}). I am much | |
1453 | +indebted for their help, support and advice. See the manual in the | |
1454 | +source distribution for pointers to sources of documentation. Christian | |
1455 | +von Roques encouraged me to look for faster sorting algorithms, so as to | |
1456 | +speed up compression. Bela Lubkin encouraged me to improve the | |
15ee0650 | 1457 | +worst-case compression performance. The @code{bz*} scripts are derived |
1458 | +from those of GNU @code{gzip}. Many people sent patches, helped with | |
1459 | +portability problems, lent machines, gave advice and were generally | |
d967e3ec | 1460 | +helpful. |
1461 | + | |
1462 | +@end quotation | |
1463 | + | |
1464 | + | |
1465 | + | |
1466 | + | |
1467 | +@chapter Programming with @code{libbzip2} | |
1468 | + | |
1469 | +This chapter describes the programming interface to @code{libbzip2}. | |
1470 | + | |
1471 | +For general background information, particularly about memory | |
1472 | +use and performance aspects, you'd be well advised to read Chapter 2 | |
1473 | +as well. | |
1474 | + | |
1475 | +@section Top-level structure | |
1476 | + | |
1477 | +@code{libbzip2} is a flexible library for compressing and decompressing | |
1478 | +data in the @code{bzip2} data format. Although packaged as a single | |
1479 | +entity, it helps to regard the library as three separate parts: the low | |
1480 | +level interface, and the high level interface, and some utility | |
1481 | +functions. | |
1482 | + | |
1483 | +The structure of @code{libbzip2}'s interfaces is similar to | |
1484 | +that of Jean-loup Gailly's and Mark Adler's excellent @code{zlib} | |
1485 | +library. | |
1486 | + | |
1487 | +All externally visible symbols have names beginning @code{BZ2_}. | |
1488 | +This is new in version 1.0. The intention is to minimise pollution | |
1489 | +of the namespaces of library clients. | |
1490 | + | |
1491 | +@subsection Low-level summary | |
1492 | + | |
1493 | +This interface provides services for compressing and decompressing | |
1494 | +data in memory. There's no provision for dealing with files, streams | |
1495 | +or any other I/O mechanisms, just straight memory-to-memory work. | |
1496 | +In fact, this part of the library can be compiled without inclusion | |
1497 | +of @code{stdio.h}, which may be helpful for embedded applications. | |
1498 | + | |
1499 | +The low-level part of the library has no global variables and | |
1500 | +is therefore thread-safe. | |
1501 | + | |
1502 | +Six routines make up the low level interface: | |
1503 | +@code{BZ2_bzCompressInit}, @code{BZ2_bzCompress}, and @* @code{BZ2_bzCompressEnd} | |
1504 | +for compression, | |
1505 | +and a corresponding trio @code{BZ2_bzDecompressInit}, @* @code{BZ2_bzDecompress} | |
1506 | +and @code{BZ2_bzDecompressEnd} for decompression. | |
1507 | +The @code{*Init} functions allocate | |
1508 | +memory for compression/decompression and do other | |
1509 | +initialisations, whilst the @code{*End} functions close down operations | |
1510 | +and release memory. | |
1511 | + | |
1512 | +The real work is done by @code{BZ2_bzCompress} and @code{BZ2_bzDecompress}. | |
1513 | +These compress and decompress data from a user-supplied input buffer | |
1514 | +to a user-supplied output buffer. These buffers can be any size; | |
1515 | +arbitrary quantities of data are handled by making repeated calls | |
1516 | +to these functions. This is a flexible mechanism allowing a | |
1517 | +consumer-pull style of activity, or producer-push, or a mixture of | |
1518 | +both. | |
1519 | + | |
1520 | + | |
1521 | + | |
1522 | +@subsection High-level summary | |
1523 | + | |
1524 | +This interface provides some handy wrappers around the low-level | |
1525 | +interface to facilitate reading and writing @code{bzip2} format | |
1526 | +files (@code{.bz2} files). The routines provide hooks to facilitate | |
1527 | +reading files in which the @code{bzip2} data stream is embedded | |
1528 | +within some larger-scale file structure, or where there are | |
1529 | +multiple @code{bzip2} data streams concatenated end-to-end. | |
1530 | + | |
1531 | +For reading files, @code{BZ2_bzReadOpen}, @code{BZ2_bzRead}, | |
1532 | +@code{BZ2_bzReadClose} and @* @code{BZ2_bzReadGetUnused} are supplied. For | |
1533 | +writing files, @code{BZ2_bzWriteOpen}, @code{BZ2_bzWrite} and | |
1534 | +@code{BZ2_bzWriteFinish} are available. | |
1535 | + | |
1536 | +As with the low-level library, no global variables are used | |
1537 | +so the library is per se thread-safe. However, if I/O errors | |
1538 | +occur whilst reading or writing the underlying compressed files, | |
1539 | +you may have to consult @code{errno} to determine the cause of | |
1540 | +the error. In that case, you'd need a C library which correctly | |
1541 | +supports @code{errno} in a multithreaded environment. | |
1542 | + | |
1543 | +To make the library a little simpler and more portable, | |
1544 | +@code{BZ2_bzReadOpen} and @code{BZ2_bzWriteOpen} require you to pass them file | |
1545 | +handles (@code{FILE*}s) which have previously been opened for reading or | |
1546 | +writing respectively. That avoids portability problems associated with | |
1547 | +file operations and file attributes, whilst not being much of an | |
1548 | +imposition on the programmer. | |
1549 | + | |
1550 | + | |
1551 | + | |
1552 | +@subsection Utility functions summary | |
1553 | +For very simple needs, @code{BZ2_bzBuffToBuffCompress} and | |
1554 | +@code{BZ2_bzBuffToBuffDecompress} are provided. These compress | |
1555 | +data in memory from one buffer to another buffer in a single | |
1556 | +function call. You should assess whether these functions | |
1557 | +fulfill your memory-to-memory compression/decompression | |
1558 | +requirements before investing effort in understanding the more | |
1559 | +general but more complex low-level interface. | |
1560 | + | |
1561 | +Yoshioka Tsuneo (@code{QWF00133@@niftyserve.or.jp} / | |
1562 | +@code{tsuneo-y@@is.aist-nara.ac.jp}) has contributed some functions to | |
1563 | +give better @code{zlib} compatibility. These functions are | |
1564 | +@code{BZ2_bzopen}, @code{BZ2_bzread}, @code{BZ2_bzwrite}, @code{BZ2_bzflush}, | |
1565 | +@code{BZ2_bzclose}, | |
1566 | +@code{BZ2_bzerror} and @code{BZ2_bzlibVersion}. You may find these functions | |
1567 | +more convenient for simple file reading and writing, than those in the | |
1568 | +high-level interface. These functions are not (yet) officially part of | |
1569 | +the library, and are minimally documented here. If they break, you | |
1570 | +get to keep all the pieces. I hope to document them properly when time | |
1571 | +permits. | |
1572 | + | |
1573 | +Yoshioka also contributed modifications to allow the library to be | |
1574 | +built as a Windows DLL. | |
1575 | + | |
1576 | + | |
1577 | +@section Error handling | |
1578 | + | |
1579 | +The library is designed to recover cleanly in all situations, including | |
1580 | +the worst-case situation of decompressing random data. I'm not | |
1581 | +100% sure that it can always do this, so you might want to add | |
1582 | +a signal handler to catch segmentation violations during decompression | |
1583 | +if you are feeling especially paranoid. I would be interested in | |
1584 | +hearing more about the robustness of the library to corrupted | |
1585 | +compressed data. | |
1586 | + | |
1587 | +Version 1.0 is much more robust in this respect than | |
1588 | +0.9.0 or 0.9.5. Investigations with Checker (a tool for | |
1589 | +detecting problems with memory management, similar to Purify) | |
1590 | +indicate that, at least for the few files I tested, all single-bit | |
1591 | +errors in the decompressed data are caught properly, with no | |
1592 | +segmentation faults, no reads of uninitialised data and no | |
1593 | +out of range reads or writes. So it's certainly much improved, | |
1594 | +although I wouldn't claim it to be totally bombproof. | |
1595 | + | |
1596 | +The file @code{bzlib.h} contains all definitions needed to use | |
1597 | +the library. In particular, you should definitely not include | |
1598 | +@code{bzlib_private.h}. | |
1599 | + | |
1600 | +In @code{bzlib.h}, the various return values are defined. The following | |
1601 | +list is not intended as an exhaustive description of the circumstances | |
1602 | +in which a given value may be returned -- those descriptions are given | |
1603 | +later. Rather, it is intended to convey the rough meaning of each | |
1604 | +return value. The first five actions are normal and not intended to | |
1605 | +denote an error situation. | |
1606 | +@table @code | |
1607 | +@item BZ_OK | |
1608 | +The requested action was completed successfully. | |
1609 | +@item BZ_RUN_OK | |
1610 | +@itemx BZ_FLUSH_OK | |
1611 | +@itemx BZ_FINISH_OK | |
1612 | +In @code{BZ2_bzCompress}, the requested flush/finish/nothing-special action | |
1613 | +was completed successfully. | |
1614 | +@item BZ_STREAM_END | |
1615 | +Compression of data was completed, or the logical stream end was | |
1616 | +detected during decompression. | |
1617 | +@end table | |
1618 | + | |
1619 | +The following return values indicate an error of some kind. | |
1620 | +@table @code | |
1621 | +@item BZ_CONFIG_ERROR | |
1622 | +Indicates that the library has been improperly compiled on your | |
1623 | +platform -- a major configuration error. Specifically, it means | |
1624 | +that @code{sizeof(char)}, @code{sizeof(short)} and @code{sizeof(int)} | |
1625 | +are not 1, 2 and 4 respectively, as they should be. Note that the | |
1626 | +library should still work properly on 64-bit platforms which follow | |
1627 | +the LP64 programming model -- that is, where @code{sizeof(long)} | |
1628 | +and @code{sizeof(void*)} are 8. Under LP64, @code{sizeof(int)} is | |
1629 | +still 4, so @code{libbzip2}, which doesn't use the @code{long} type, | |
1630 | +is OK. | |
1631 | +@item BZ_SEQUENCE_ERROR | |
1632 | +When using the library, it is important to call the functions in the | |
1633 | +correct sequence and with data structures (buffers etc) in the correct | |
1634 | +states. @code{libbzip2} checks as much as it can to ensure this is | |
1635 | +happening, and returns @code{BZ_SEQUENCE_ERROR} if not. Code which | |
1636 | +complies precisely with the function semantics, as detailed below, | |
1637 | +should never receive this value; such an event denotes buggy code | |
1638 | +which you should investigate. | |
1639 | +@item BZ_PARAM_ERROR | |
1640 | +Returned when a parameter to a function call is out of range | |
1641 | +or otherwise manifestly incorrect. As with @code{BZ_SEQUENCE_ERROR}, | |
1642 | +this denotes a bug in the client code. The distinction between | |
1643 | +@code{BZ_PARAM_ERROR} and @code{BZ_SEQUENCE_ERROR} is a bit hazy, but still worth | |
1644 | +making. | |
1645 | +@item BZ_MEM_ERROR | |
1646 | +Returned when a request to allocate memory failed. Note that the | |
1647 | +quantity of memory needed to decompress a stream cannot be determined | |
1648 | +until the stream's header has been read. So @code{BZ2_bzDecompress} and | |
1649 | +@code{BZ2_bzRead} may return @code{BZ_MEM_ERROR} even though some of | |
1650 | +the compressed data has been read. The same is not true for | |
1651 | +compression; once @code{BZ2_bzCompressInit} or @code{BZ2_bzWriteOpen} have | |
1652 | +successfully completed, @code{BZ_MEM_ERROR} cannot occur. | |
1653 | +@item BZ_DATA_ERROR | |
1654 | +Returned when a data integrity error is detected during decompression. | |
1655 | +Most importantly, this means when stored and computed CRCs for the | |
1656 | +data do not match. This value is also returned upon detection of any | |
1657 | +other anomaly in the compressed data. | |
1658 | +@item BZ_DATA_ERROR_MAGIC | |
1659 | +As a special case of @code{BZ_DATA_ERROR}, it is sometimes useful to | |
1660 | +know when the compressed stream does not start with the correct | |
1661 | +magic bytes (@code{'B' 'Z' 'h'}). | |
1662 | +@item BZ_IO_ERROR | |
1663 | +Returned by @code{BZ2_bzRead} and @code{BZ2_bzWrite} when there is an error | |
1664 | +reading or writing in the compressed file, and by @code{BZ2_bzReadOpen} | |
1665 | +and @code{BZ2_bzWriteOpen} for attempts to use a file for which the | |
1666 | +error indicator (viz, @code{ferror(f)}) is set. | |
1667 | +On receipt of @code{BZ_IO_ERROR}, the caller should consult | |
1668 | +@code{errno} and/or @code{perror} to acquire operating-system | |
1669 | +specific information about the problem. | |
1670 | +@item BZ_UNEXPECTED_EOF | |
1671 | +Returned by @code{BZ2_bzRead} when the compressed file finishes | |
1672 | +before the logical end of stream is detected. | |
1673 | +@item BZ_OUTBUFF_FULL | |
1674 | +Returned by @code{BZ2_bzBuffToBuffCompress} and | |
1675 | +@code{BZ2_bzBuffToBuffDecompress} to indicate that the output data | |
1676 | +will not fit into the output buffer provided. | |
1677 | +@end table | |
1678 | + | |
1679 | + | |
1680 | + | |
1681 | +@section Low-level interface | |
1682 | + | |
1683 | +@subsection @code{BZ2_bzCompressInit} | |
1684 | +@example | |
1685 | +typedef | |
1686 | + struct @{ | |
1687 | + char *next_in; | |
1688 | + unsigned int avail_in; | |
1689 | + unsigned int total_in_lo32; | |
1690 | + unsigned int total_in_hi32; | |
1691 | + | |
1692 | + char *next_out; | |
1693 | + unsigned int avail_out; | |
1694 | + unsigned int total_out_lo32; | |
1695 | + unsigned int total_out_hi32; | |
1696 | + | |
1697 | + void *state; | |
1698 | + | |
1699 | + void *(*bzalloc)(void *,int,int); | |
1700 | + void (*bzfree)(void *,void *); | |
1701 | + void *opaque; | |
1702 | + @} | |
1703 | + bz_stream; | |
1704 | + | |
1705 | +int BZ2_bzCompressInit ( bz_stream *strm, | |
1706 | + int blockSize100k, | |
1707 | + int verbosity, | |
1708 | + int workFactor ); | |
1709 | + | |
1710 | +@end example | |
1711 | + | |
1712 | +Prepares for compression. The @code{bz_stream} structure | |
1713 | +holds all data pertaining to the compression activity. | |
1714 | +A @code{bz_stream} structure should be allocated and initialised | |
1715 | +prior to the call. | |
1716 | +The fields of @code{bz_stream} | |
1717 | +comprise the entirety of the user-visible data. @code{state} | |
1718 | +is a pointer to the private data structures required for compression. | |
1719 | + | |
1720 | +Custom memory allocators are supported, via fields @code{bzalloc}, | |
1721 | +@code{bzfree}, | |
1722 | +and @code{opaque}. The value | |
1723 | +@code{opaque} is passed to as the first argument to | |
1724 | +all calls to @code{bzalloc} and @code{bzfree}, but is | |
1725 | +otherwise ignored by the library. | |
1726 | +The call @code{bzalloc ( opaque, n, m )} is expected to return a | |
1727 | +pointer @code{p} to | |
1728 | +@code{n * m} bytes of memory, and @code{bzfree ( opaque, p )} | |
1729 | +should free | |
1730 | +that memory. | |
1731 | + | |
1732 | +If you don't want to use a custom memory allocator, set @code{bzalloc}, | |
1733 | +@code{bzfree} and | |
1734 | +@code{opaque} to @code{NULL}, | |
1735 | +and the library will then use the standard @code{malloc}/@code{free} | |
1736 | +routines. | |
1737 | + | |
1738 | +Before calling @code{BZ2_bzCompressInit}, fields @code{bzalloc}, | |
1739 | +@code{bzfree} and @code{opaque} should | |
1740 | +be filled appropriately, as just described. Upon return, the internal | |
1741 | +state will have been allocated and initialised, and @code{total_in_lo32}, | |
1742 | +@code{total_in_hi32}, @code{total_out_lo32} and | |
1743 | +@code{total_out_hi32} will have been set to zero. | |
1744 | +These four fields are used by the library | |
1745 | +to inform the caller of the total amount of data passed into and out of | |
1746 | +the library, respectively. You should not try to change them. | |
1747 | +As of version 1.0, 64-bit counts are maintained, even on 32-bit | |
1748 | +platforms, using the @code{_hi32} fields to store the upper 32 bits | |
1749 | +of the count. So, for example, the total amount of data in | |
1750 | +is @code{(total_in_hi32 << 32) + total_in_lo32}. | |
1751 | + | |
1752 | +Parameter @code{blockSize100k} specifies the block size to be used for | |
1753 | +compression. It should be a value between 1 and 9 inclusive, and the | |
1754 | +actual block size used is 100000 x this figure. 9 gives the best | |
1755 | +compression but takes most memory. | |
1756 | + | |
1757 | +Parameter @code{verbosity} should be set to a number between 0 and 4 | |
1758 | +inclusive. 0 is silent, and greater numbers give increasingly verbose | |
1759 | +monitoring/debugging output. If the library has been compiled with | |
1760 | +@code{-DBZ_NO_STDIO}, no such output will appear for any verbosity | |
1761 | +setting. | |
1762 | + | |
1763 | +Parameter @code{workFactor} controls how the compression phase behaves | |
1764 | +when presented with worst case, highly repetitive, input data. If | |
1765 | +compression runs into difficulties caused by repetitive data, the | |
1766 | +library switches from the standard sorting algorithm to a fallback | |
1767 | +algorithm. The fallback is slower than the standard algorithm by | |
1768 | +perhaps a factor of three, but always behaves reasonably, no matter how | |
1769 | +bad the input. | |
1770 | + | |
1771 | +Lower values of @code{workFactor} reduce the amount of effort the | |
1772 | +standard algorithm will expend before resorting to the fallback. You | |
1773 | +should set this parameter carefully; too low, and many inputs will be | |
1774 | +handled by the fallback algorithm and so compress rather slowly, too | |
1775 | +high, and your average-to-worst case compression times can become very | |
1776 | +large. The default value of 30 gives reasonable behaviour over a wide | |
1777 | +range of circumstances. | |
1778 | + | |
1779 | +Allowable values range from 0 to 250 inclusive. 0 is a special case, | |
1780 | +equivalent to using the default value of 30. | |
1781 | + | |
1782 | +Note that the compressed output generated is the same regardless of | |
1783 | +whether or not the fallback algorithm is used. | |
1784 | + | |
1785 | +Be aware also that this parameter may disappear entirely in future | |
1786 | +versions of the library. In principle it should be possible to devise a | |
1787 | +good way to automatically choose which algorithm to use. Such a | |
1788 | +mechanism would render the parameter obsolete. | |
1789 | + | |
1790 | +Possible return values: | |
1791 | +@display | |
1792 | + @code{BZ_CONFIG_ERROR} | |
1793 | + if the library has been mis-compiled | |
1794 | + @code{BZ_PARAM_ERROR} | |
1795 | + if @code{strm} is @code{NULL} | |
1796 | + or @code{blockSize} < 1 or @code{blockSize} > 9 | |
1797 | + or @code{verbosity} < 0 or @code{verbosity} > 4 | |
1798 | + or @code{workFactor} < 0 or @code{workFactor} > 250 | |
1799 | + @code{BZ_MEM_ERROR} | |
1800 | + if not enough memory is available | |
1801 | + @code{BZ_OK} | |
1802 | + otherwise | |
1803 | +@end display | |
1804 | +Allowable next actions: | |
1805 | +@display | |
1806 | + @code{BZ2_bzCompress} | |
1807 | + if @code{BZ_OK} is returned | |
1808 | + no specific action needed in case of error | |
1809 | +@end display | |
1810 | + | |
1811 | +@subsection @code{BZ2_bzCompress} | |
1812 | +@example | |
1813 | + int BZ2_bzCompress ( bz_stream *strm, int action ); | |
1814 | +@end example | |
1815 | +Provides more input and/or output buffer space for the library. The | |
1816 | +caller maintains input and output buffers, and calls @code{BZ2_bzCompress} to | |
1817 | +transfer data between them. | |
1818 | + | |
1819 | +Before each call to @code{BZ2_bzCompress}, @code{next_in} should point at | |
1820 | +the data to be compressed, and @code{avail_in} should indicate how many | |
1821 | +bytes the library may read. @code{BZ2_bzCompress} updates @code{next_in}, | |
1822 | +@code{avail_in} and @code{total_in} to reflect the number of bytes it | |
1823 | +has read. | |
1824 | + | |
1825 | +Similarly, @code{next_out} should point to a buffer in which the | |
1826 | +compressed data is to be placed, with @code{avail_out} indicating how | |
1827 | +much output space is available. @code{BZ2_bzCompress} updates | |
1828 | +@code{next_out}, @code{avail_out} and @code{total_out} to reflect the | |
1829 | +number of bytes output. | |
1830 | + | |
1831 | +You may provide and remove as little or as much data as you like on each | |
1832 | +call of @code{BZ2_bzCompress}. In the limit, it is acceptable to supply and | |
1833 | +remove data one byte at a time, although this would be terribly | |
1834 | +inefficient. You should always ensure that at least one byte of output | |
1835 | +space is available at each call. | |
1836 | + | |
1837 | +A second purpose of @code{BZ2_bzCompress} is to request a change of mode of the | |
1838 | +compressed stream. | |
1839 | + | |
1840 | +Conceptually, a compressed stream can be in one of four states: IDLE, | |
1841 | +RUNNING, FLUSHING and FINISHING. Before initialisation | |
1842 | +(@code{BZ2_bzCompressInit}) and after termination (@code{BZ2_bzCompressEnd}), a | |
1843 | +stream is regarded as IDLE. | |
1844 | + | |
1845 | +Upon initialisation (@code{BZ2_bzCompressInit}), the stream is placed in the | |
1846 | +RUNNING state. Subsequent calls to @code{BZ2_bzCompress} should pass | |
1847 | +@code{BZ_RUN} as the requested action; other actions are illegal and | |
1848 | +will result in @code{BZ_SEQUENCE_ERROR}. | |
1849 | + | |
1850 | +At some point, the calling program will have provided all the input data | |
1851 | +it wants to. It will then want to finish up -- in effect, asking the | |
1852 | +library to process any data it might have buffered internally. In this | |
1853 | +state, @code{BZ2_bzCompress} will no longer attempt to read data from | |
1854 | +@code{next_in}, but it will want to write data to @code{next_out}. | |
1855 | +Because the output buffer supplied by the user can be arbitrarily small, | |
1856 | +the finishing-up operation cannot necessarily be done with a single call | |
1857 | +of @code{BZ2_bzCompress}. | |
1858 | + | |
1859 | +Instead, the calling program passes @code{BZ_FINISH} as an action to | |
1860 | +@code{BZ2_bzCompress}. This changes the stream's state to FINISHING. Any | |
1861 | +remaining input (ie, @code{next_in[0 .. avail_in-1]}) is compressed and | |
1862 | +transferred to the output buffer. To do this, @code{BZ2_bzCompress} must be | |
1863 | +called repeatedly until all the output has been consumed. At that | |
1864 | +point, @code{BZ2_bzCompress} returns @code{BZ_STREAM_END}, and the stream's | |
1865 | +state is set back to IDLE. @code{BZ2_bzCompressEnd} should then be | |
1866 | +called. | |
1867 | + | |
1868 | +Just to make sure the calling program does not cheat, the library makes | |
1869 | +a note of @code{avail_in} at the time of the first call to | |
1870 | +@code{BZ2_bzCompress} which has @code{BZ_FINISH} as an action (ie, at the | |
1871 | +time the program has announced its intention to not supply any more | |
1872 | +input). By comparing this value with that of @code{avail_in} over | |
1873 | +subsequent calls to @code{BZ2_bzCompress}, the library can detect any | |
1874 | +attempts to slip in more data to compress. Any calls for which this is | |
1875 | +detected will return @code{BZ_SEQUENCE_ERROR}. This indicates a | |
1876 | +programming mistake which should be corrected. | |
1877 | + | |
1878 | +Instead of asking to finish, the calling program may ask | |
1879 | +@code{BZ2_bzCompress} to take all the remaining input, compress it and | |
1880 | +terminate the current (Burrows-Wheeler) compression block. This could | |
1881 | +be useful for error control purposes. The mechanism is analogous to | |
1882 | +that for finishing: call @code{BZ2_bzCompress} with an action of | |
1883 | +@code{BZ_FLUSH}, remove output data, and persist with the | |
1884 | +@code{BZ_FLUSH} action until the value @code{BZ_RUN} is returned. As | |
1885 | +with finishing, @code{BZ2_bzCompress} detects any attempt to provide more | |
1886 | +input data once the flush has begun. | |
1887 | + | |
1888 | +Once the flush is complete, the stream returns to the normal RUNNING | |
1889 | +state. | |
1890 | + | |
1891 | +This all sounds pretty complex, but isn't really. Here's a table | |
1892 | +which shows which actions are allowable in each state, what action | |
1893 | +will be taken, what the next state is, and what the non-error return | |
1894 | +values are. Note that you can't explicitly ask what state the | |
1895 | +stream is in, but nor do you need to -- it can be inferred from the | |
1896 | +values returned by @code{BZ2_bzCompress}. | |
1897 | +@display | |
1898 | +IDLE/@code{any} | |
1899 | + Illegal. IDLE state only exists after @code{BZ2_bzCompressEnd} or | |
1900 | + before @code{BZ2_bzCompressInit}. | |
1901 | + Return value = @code{BZ_SEQUENCE_ERROR} | |
1902 | + | |
1903 | +RUNNING/@code{BZ_RUN} | |
1904 | + Compress from @code{next_in} to @code{next_out} as much as possible. | |
1905 | + Next state = RUNNING | |
1906 | + Return value = @code{BZ_RUN_OK} | |
1907 | + | |
1908 | +RUNNING/@code{BZ_FLUSH} | |
1909 | + Remember current value of @code{next_in}. Compress from @code{next_in} | |
1910 | + to @code{next_out} as much as possible, but do not accept any more input. | |
1911 | + Next state = FLUSHING | |
1912 | + Return value = @code{BZ_FLUSH_OK} | |
1913 | + | |
1914 | +RUNNING/@code{BZ_FINISH} | |
1915 | + Remember current value of @code{next_in}. Compress from @code{next_in} | |
1916 | + to @code{next_out} as much as possible, but do not accept any more input. | |
1917 | + Next state = FINISHING | |
1918 | + Return value = @code{BZ_FINISH_OK} | |
1919 | + | |
1920 | +FLUSHING/@code{BZ_FLUSH} | |
1921 | + Compress from @code{next_in} to @code{next_out} as much as possible, | |
1922 | + but do not accept any more input. | |
1923 | + If all the existing input has been used up and all compressed | |
1924 | + output has been removed | |
1925 | + Next state = RUNNING; Return value = @code{BZ_RUN_OK} | |
1926 | + else | |
1927 | + Next state = FLUSHING; Return value = @code{BZ_FLUSH_OK} | |
1928 | + | |
1929 | +FLUSHING/other | |
1930 | + Illegal. | |
1931 | + Return value = @code{BZ_SEQUENCE_ERROR} | |
1932 | + | |
1933 | +FINISHING/@code{BZ_FINISH} | |
1934 | + Compress from @code{next_in} to @code{next_out} as much as possible, | |
1935 | + but to not accept any more input. | |
1936 | + If all the existing input has been used up and all compressed | |
1937 | + output has been removed | |
1938 | + Next state = IDLE; Return value = @code{BZ_STREAM_END} | |
1939 | + else | |
1940 | + Next state = FINISHING; Return value = @code{BZ_FINISHING} | |
1941 | + | |
1942 | +FINISHING/other | |
1943 | + Illegal. | |
1944 | + Return value = @code{BZ_SEQUENCE_ERROR} | |
1945 | +@end display | |
1946 | + | |
1947 | +That still looks complicated? Well, fair enough. The usual sequence | |
1948 | +of calls for compressing a load of data is: | |
1949 | +@itemize @bullet | |
1950 | +@item Get started with @code{BZ2_bzCompressInit}. | |
1951 | +@item Shovel data in and shlurp out its compressed form using zero or more | |
1952 | +calls of @code{BZ2_bzCompress} with action = @code{BZ_RUN}. | |
1953 | +@item Finish up. | |
1954 | +Repeatedly call @code{BZ2_bzCompress} with action = @code{BZ_FINISH}, | |
1955 | +copying out the compressed output, until @code{BZ_STREAM_END} is returned. | |
1956 | +@item Close up and go home. Call @code{BZ2_bzCompressEnd}. | |
1957 | +@end itemize | |
1958 | +If the data you want to compress fits into your input buffer all | |
1959 | +at once, you can skip the calls of @code{BZ2_bzCompress ( ..., BZ_RUN )} and | |
1960 | +just do the @code{BZ2_bzCompress ( ..., BZ_FINISH )} calls. | |
1961 | + | |
1962 | +All required memory is allocated by @code{BZ2_bzCompressInit}. The | |
1963 | +compression library can accept any data at all (obviously). So you | |
1964 | +shouldn't get any error return values from the @code{BZ2_bzCompress} calls. | |
1965 | +If you do, they will be @code{BZ_SEQUENCE_ERROR}, and indicate a bug in | |
1966 | +your programming. | |
1967 | + | |
1968 | +Trivial other possible return values: | |
1969 | +@display | |
1970 | + @code{BZ_PARAM_ERROR} | |
1971 | + if @code{strm} is @code{NULL}, or @code{strm->s} is @code{NULL} | |
1972 | +@end display | |
1973 | + | |
1974 | +@subsection @code{BZ2_bzCompressEnd} | |
1975 | +@example | |
1976 | +int BZ2_bzCompressEnd ( bz_stream *strm ); | |
1977 | +@end example | |
1978 | +Releases all memory associated with a compression stream. | |
1979 | + | |
1980 | +Possible return values: | |
1981 | +@display | |
1982 | + @code{BZ_PARAM_ERROR} if @code{strm} is @code{NULL} or @code{strm->s} is @code{NULL} | |
1983 | + @code{BZ_OK} otherwise | |
1984 | +@end display | |
1985 | + | |
1986 | + | |
1987 | +@subsection @code{BZ2_bzDecompressInit} | |
1988 | +@example | |
1989 | +int BZ2_bzDecompressInit ( bz_stream *strm, int verbosity, int small ); | |
1990 | +@end example | |
1991 | +Prepares for decompression. As with @code{BZ2_bzCompressInit}, a | |
1992 | +@code{bz_stream} record should be allocated and initialised before the | |
1993 | +call. Fields @code{bzalloc}, @code{bzfree} and @code{opaque} should be | |
1994 | +set if a custom memory allocator is required, or made @code{NULL} for | |
1995 | +the normal @code{malloc}/@code{free} routines. Upon return, the internal | |
1996 | +state will have been initialised, and @code{total_in} and | |
1997 | +@code{total_out} will be zero. | |
1998 | + | |
1999 | +For the meaning of parameter @code{verbosity}, see @code{BZ2_bzCompressInit}. | |
2000 | + | |
2001 | +If @code{small} is nonzero, the library will use an alternative | |
2002 | +decompression algorithm which uses less memory but at the cost of | |
2003 | +decompressing more slowly (roughly speaking, half the speed, but the | |
2004 | +maximum memory requirement drops to around 2300k). See Chapter 2 for | |
2005 | +more information on memory management. | |
2006 | + | |
2007 | +Note that the amount of memory needed to decompress | |
2008 | +a stream cannot be determined until the stream's header has been read, | |
2009 | +so even if @code{BZ2_bzDecompressInit} succeeds, a subsequent | |
2010 | +@code{BZ2_bzDecompress} could fail with @code{BZ_MEM_ERROR}. | |
2011 | + | |
2012 | +Possible return values: | |
2013 | +@display | |
2014 | + @code{BZ_CONFIG_ERROR} | |
2015 | + if the library has been mis-compiled | |
2016 | + @code{BZ_PARAM_ERROR} | |
2017 | + if @code{(small != 0 && small != 1)} | |
2018 | + or @code{(verbosity < 0 || verbosity > 4)} | |
2019 | + @code{BZ_MEM_ERROR} | |
2020 | + if insufficient memory is available | |
2021 | +@end display | |
2022 | + | |
2023 | +Allowable next actions: | |
2024 | +@display | |
2025 | + @code{BZ2_bzDecompress} | |
2026 | + if @code{BZ_OK} was returned | |
2027 | + no specific action required in case of error | |
2028 | +@end display | |
2029 | + | |
2030 | + | |
2031 | + | |
2032 | +@subsection @code{BZ2_bzDecompress} | |
2033 | +@example | |
2034 | +int BZ2_bzDecompress ( bz_stream *strm ); | |
2035 | +@end example | |
2036 | +Provides more input and/out output buffer space for the library. The | |
2037 | +caller maintains input and output buffers, and uses @code{BZ2_bzDecompress} | |
2038 | +to transfer data between them. | |
2039 | + | |
2040 | +Before each call to @code{BZ2_bzDecompress}, @code{next_in} | |
2041 | +should point at the compressed data, | |
2042 | +and @code{avail_in} should indicate how many bytes the library | |
2043 | +may read. @code{BZ2_bzDecompress} updates @code{next_in}, @code{avail_in} | |
2044 | +and @code{total_in} | |
2045 | +to reflect the number of bytes it has read. | |
2046 | + | |
2047 | +Similarly, @code{next_out} should point to a buffer in which the uncompressed | |
2048 | +output is to be placed, with @code{avail_out} indicating how much output space | |
2049 | +is available. @code{BZ2_bzCompress} updates @code{next_out}, | |
2050 | +@code{avail_out} and @code{total_out} to reflect | |
2051 | +the number of bytes output. | |
2052 | + | |
2053 | +You may provide and remove as little or as much data as you like on | |
2054 | +each call of @code{BZ2_bzDecompress}. | |
2055 | +In the limit, it is acceptable to | |
2056 | +supply and remove data one byte at a time, although this would be | |
2057 | +terribly inefficient. You should always ensure that at least one | |
2058 | +byte of output space is available at each call. | |
2059 | + | |
2060 | +Use of @code{BZ2_bzDecompress} is simpler than @code{BZ2_bzCompress}. | |
2061 | + | |
2062 | +You should provide input and remove output as described above, and | |
2063 | +repeatedly call @code{BZ2_bzDecompress} until @code{BZ_STREAM_END} is | |
2064 | +returned. Appearance of @code{BZ_STREAM_END} denotes that | |
2065 | +@code{BZ2_bzDecompress} has detected the logical end of the compressed | |
2066 | +stream. @code{BZ2_bzDecompress} will not produce @code{BZ_STREAM_END} until | |
2067 | +all output data has been placed into the output buffer, so once | |
2068 | +@code{BZ_STREAM_END} appears, you are guaranteed to have available all | |
2069 | +the decompressed output, and @code{BZ2_bzDecompressEnd} can safely be | |
2070 | +called. | |
2071 | + | |
2072 | +If case of an error return value, you should call @code{BZ2_bzDecompressEnd} | |
2073 | +to clean up and release memory. | |
2074 | + | |
2075 | +Possible return values: | |
2076 | +@display | |
2077 | + @code{BZ_PARAM_ERROR} | |
2078 | + if @code{strm} is @code{NULL} or @code{strm->s} is @code{NULL} | |
2079 | + or @code{strm->avail_out < 1} | |
2080 | + @code{BZ_DATA_ERROR} | |
2081 | + if a data integrity error is detected in the compressed stream | |
2082 | + @code{BZ_DATA_ERROR_MAGIC} | |
2083 | + if the compressed stream doesn't begin with the right magic bytes | |
2084 | + @code{BZ_MEM_ERROR} | |
2085 | + if there wasn't enough memory available | |
2086 | + @code{BZ_STREAM_END} | |
2087 | + if the logical end of the data stream was detected and all | |
2088 | + output in has been consumed, eg @code{s->avail_out > 0} | |
2089 | + @code{BZ_OK} | |
2090 | + otherwise | |
2091 | +@end display | |
2092 | +Allowable next actions: | |
2093 | +@display | |
2094 | + @code{BZ2_bzDecompress} | |
2095 | + if @code{BZ_OK} was returned | |
2096 | + @code{BZ2_bzDecompressEnd} | |
2097 | + otherwise | |
2098 | +@end display | |
2099 | + | |
2100 | + | |
2101 | +@subsection @code{BZ2_bzDecompressEnd} | |
2102 | +@example | |
2103 | +int BZ2_bzDecompressEnd ( bz_stream *strm ); | |
2104 | +@end example | |
2105 | +Releases all memory associated with a decompression stream. | |
2106 | + | |
2107 | +Possible return values: | |
2108 | +@display | |
2109 | + @code{BZ_PARAM_ERROR} | |
2110 | + if @code{strm} is @code{NULL} or @code{strm->s} is @code{NULL} | |
2111 | + @code{BZ_OK} | |
2112 | + otherwise | |
2113 | +@end display | |
2114 | + | |
2115 | +Allowable next actions: | |
2116 | +@display | |
2117 | + None. | |
2118 | +@end display | |
2119 | + | |
2120 | + | |
2121 | +@section High-level interface | |
2122 | + | |
2123 | +This interface provides functions for reading and writing | |
2124 | +@code{bzip2} format files. First, some general points. | |
2125 | + | |
2126 | +@itemize @bullet | |
2127 | +@item All of the functions take an @code{int*} first argument, | |
2128 | + @code{bzerror}. | |
2129 | + After each call, @code{bzerror} should be consulted first to determine | |
2130 | + the outcome of the call. If @code{bzerror} is @code{BZ_OK}, | |
2131 | + the call completed | |
2132 | + successfully, and only then should the return value of the function | |
2133 | + (if any) be consulted. If @code{bzerror} is @code{BZ_IO_ERROR}, | |
2134 | + there was an error | |
2135 | + reading/writing the underlying compressed file, and you should | |
2136 | + then consult @code{errno}/@code{perror} to determine the | |
2137 | + cause of the difficulty. | |
2138 | + @code{bzerror} may also be set to various other values; precise details are | |
2139 | + given on a per-function basis below. | |
2140 | +@item If @code{bzerror} indicates an error | |
2141 | + (ie, anything except @code{BZ_OK} and @code{BZ_STREAM_END}), | |
2142 | + you should immediately call @code{BZ2_bzReadClose} (or @code{BZ2_bzWriteClose}, | |
2143 | + depending on whether you are attempting to read or to write) | |
2144 | + to free up all resources associated | |
2145 | + with the stream. Once an error has been indicated, behaviour of all calls | |
2146 | + except @code{BZ2_bzReadClose} (@code{BZ2_bzWriteClose}) is undefined. | |
2147 | + The implication is that (1) @code{bzerror} should | |
2148 | + be checked after each call, and (2) if @code{bzerror} indicates an error, | |
2149 | + @code{BZ2_bzReadClose} (@code{BZ2_bzWriteClose}) should then be called to clean up. | |
2150 | +@item The @code{FILE*} arguments passed to | |
2151 | + @code{BZ2_bzReadOpen}/@code{BZ2_bzWriteOpen} | |
2152 | + should be set to binary mode. | |
2153 | + Most Unix systems will do this by default, but other platforms, | |
2154 | + including Windows and Mac, will not. If you omit this, you may | |
2155 | + encounter problems when moving code to new platforms. | |
2156 | +@item Memory allocation requests are handled by | |
2157 | + @code{malloc}/@code{free}. | |
2158 | + At present | |
2159 | + there is no facility for user-defined memory allocators in the file I/O | |
2160 | + functions (could easily be added, though). | |
2161 | +@end itemize | |
2162 | + | |
2163 | + | |
2164 | + | |
2165 | +@subsection @code{BZ2_bzReadOpen} | |
2166 | +@example | |
2167 | + typedef void BZFILE; | |
2168 | + | |
2169 | + BZFILE *BZ2_bzReadOpen ( int *bzerror, FILE *f, | |
2170 | + int small, int verbosity, | |
2171 | + void *unused, int nUnused ); | |
2172 | +@end example | |
2173 | +Prepare to read compressed data from file handle @code{f}. @code{f} | |
2174 | +should refer to a file which has been opened for reading, and for which | |
2175 | +the error indicator (@code{ferror(f)})is not set. If @code{small} is 1, | |
2176 | +the library will try to decompress using less memory, at the expense of | |
2177 | +speed. | |
2178 | + | |
2179 | +For reasons explained below, @code{BZ2_bzRead} will decompress the | |
2180 | +@code{nUnused} bytes starting at @code{unused}, before starting to read | |
2181 | +from the file @code{f}. At most @code{BZ_MAX_UNUSED} bytes may be | |
2182 | +supplied like this. If this facility is not required, you should pass | |
2183 | +@code{NULL} and @code{0} for @code{unused} and n@code{Unused} | |
2184 | +respectively. | |
2185 | + | |
2186 | +For the meaning of parameters @code{small} and @code{verbosity}, | |
2187 | +see @code{BZ2_bzDecompressInit}. | |
2188 | + | |
2189 | +The amount of memory needed to decompress a file cannot be determined | |
2190 | +until the file's header has been read. So it is possible that | |
2191 | +@code{BZ2_bzReadOpen} returns @code{BZ_OK} but a subsequent call of | |
2192 | +@code{BZ2_bzRead} will return @code{BZ_MEM_ERROR}. | |
2193 | + | |
2194 | +Possible assignments to @code{bzerror}: | |
2195 | +@display | |
2196 | + @code{BZ_CONFIG_ERROR} | |
2197 | + if the library has been mis-compiled | |
2198 | + @code{BZ_PARAM_ERROR} | |
2199 | + if @code{f} is @code{NULL} | |
2200 | + or @code{small} is neither @code{0} nor @code{1} | |
2201 | + or @code{(unused == NULL && nUnused != 0)} | |
2202 | + or @code{(unused != NULL && !(0 <= nUnused <= BZ_MAX_UNUSED))} | |
2203 | + @code{BZ_IO_ERROR} | |
2204 | + if @code{ferror(f)} is nonzero | |
2205 | + @code{BZ_MEM_ERROR} | |
2206 | + if insufficient memory is available | |
2207 | + @code{BZ_OK} | |
2208 | + otherwise. | |
2209 | +@end display | |
2210 | + | |
2211 | +Possible return values: | |
2212 | +@display | |
2213 | + Pointer to an abstract @code{BZFILE} | |
2214 | + if @code{bzerror} is @code{BZ_OK} | |
2215 | + @code{NULL} | |
2216 | + otherwise | |
2217 | +@end display | |
2218 | + | |
2219 | +Allowable next actions: | |
2220 | +@display | |
2221 | + @code{BZ2_bzRead} | |
2222 | + if @code{bzerror} is @code{BZ_OK} | |
2223 | + @code{BZ2_bzClose} | |
2224 | + otherwise | |
2225 | +@end display | |
2226 | + | |
2227 | + | |
2228 | +@subsection @code{BZ2_bzRead} | |
2229 | +@example | |
2230 | + int BZ2_bzRead ( int *bzerror, BZFILE *b, void *buf, int len ); | |
2231 | +@end example | |
2232 | +Reads up to @code{len} (uncompressed) bytes from the compressed file | |
2233 | +@code{b} into | |
2234 | +the buffer @code{buf}. If the read was successful, | |
2235 | +@code{bzerror} is set to @code{BZ_OK} | |
2236 | +and the number of bytes read is returned. If the logical end-of-stream | |
2237 | +was detected, @code{bzerror} will be set to @code{BZ_STREAM_END}, | |
2238 | +and the number | |
2239 | +of bytes read is returned. All other @code{bzerror} values denote an error. | |
2240 | + | |
2241 | +@code{BZ2_bzRead} will supply @code{len} bytes, | |
2242 | +unless the logical stream end is detected | |
2243 | +or an error occurs. Because of this, it is possible to detect the | |
2244 | +stream end by observing when the number of bytes returned is | |
2245 | +less than the number | |
2246 | +requested. Nevertheless, this is regarded as inadvisable; you should | |
2247 | +instead check @code{bzerror} after every call and watch out for | |
2248 | +@code{BZ_STREAM_END}. | |
2249 | + | |
2250 | +Internally, @code{BZ2_bzRead} copies data from the compressed file in chunks | |
2251 | +of size @code{BZ_MAX_UNUSED} bytes | |
2252 | +before decompressing it. If the file contains more bytes than strictly | |
2253 | +needed to reach the logical end-of-stream, @code{BZ2_bzRead} will almost certainly | |
2254 | +read some of the trailing data before signalling @code{BZ_SEQUENCE_END}. | |
2255 | +To collect the read but unused data once @code{BZ_SEQUENCE_END} has | |
2256 | +appeared, call @code{BZ2_bzReadGetUnused} immediately before @code{BZ2_bzReadClose}. | |
2257 | + | |
2258 | +Possible assignments to @code{bzerror}: | |
2259 | +@display | |
2260 | + @code{BZ_PARAM_ERROR} | |
2261 | + if @code{b} is @code{NULL} or @code{buf} is @code{NULL} or @code{len < 0} | |
2262 | + @code{BZ_SEQUENCE_ERROR} | |
2263 | + if @code{b} was opened with @code{BZ2_bzWriteOpen} | |
2264 | + @code{BZ_IO_ERROR} | |
2265 | + if there is an error reading from the compressed file | |
2266 | + @code{BZ_UNEXPECTED_EOF} | |
2267 | + if the compressed file ended before the logical end-of-stream was detected | |
2268 | + @code{BZ_DATA_ERROR} | |
2269 | + if a data integrity error was detected in the compressed stream | |
2270 | + @code{BZ_DATA_ERROR_MAGIC} | |
2271 | + if the stream does not begin with the requisite header bytes (ie, is not | |
2272 | + a @code{bzip2} data file). This is really a special case of @code{BZ_DATA_ERROR}. | |
2273 | + @code{BZ_MEM_ERROR} | |
2274 | + if insufficient memory was available | |
2275 | + @code{BZ_STREAM_END} | |
2276 | + if the logical end of stream was detected. | |
2277 | + @code{BZ_OK} | |
2278 | + otherwise. | |
2279 | +@end display | |
2280 | + | |
2281 | +Possible return values: | |
2282 | +@display | |
2283 | + number of bytes read | |
2284 | + if @code{bzerror} is @code{BZ_OK} or @code{BZ_STREAM_END} | |
2285 | + undefined | |
2286 | + otherwise | |
2287 | +@end display | |
2288 | + | |
2289 | +Allowable next actions: | |
2290 | +@display | |
2291 | + collect data from @code{buf}, then @code{BZ2_bzRead} or @code{BZ2_bzReadClose} | |
2292 | + if @code{bzerror} is @code{BZ_OK} | |
2293 | + collect data from @code{buf}, then @code{BZ2_bzReadClose} or @code{BZ2_bzReadGetUnused} | |
2294 | + if @code{bzerror} is @code{BZ_SEQUENCE_END} | |
2295 | + @code{BZ2_bzReadClose} | |
2296 | + otherwise | |
2297 | +@end display | |
2298 | + | |
2299 | + | |
2300 | + | |
2301 | +@subsection @code{BZ2_bzReadGetUnused} | |
2302 | +@example | |
2303 | + void BZ2_bzReadGetUnused ( int* bzerror, BZFILE *b, | |
2304 | + void** unused, int* nUnused ); | |
2305 | +@end example | |
2306 | +Returns data which was read from the compressed file but was not needed | |
2307 | +to get to the logical end-of-stream. @code{*unused} is set to the address | |
2308 | +of the data, and @code{*nUnused} to the number of bytes. @code{*nUnused} will | |
2309 | +be set to a value between @code{0} and @code{BZ_MAX_UNUSED} inclusive. | |
2310 | + | |
2311 | +This function may only be called once @code{BZ2_bzRead} has signalled | |
2312 | +@code{BZ_STREAM_END} but before @code{BZ2_bzReadClose}. | |
2313 | + | |
2314 | +Possible assignments to @code{bzerror}: | |
2315 | +@display | |
2316 | + @code{BZ_PARAM_ERROR} | |
2317 | + if @code{b} is @code{NULL} | |
2318 | + or @code{unused} is @code{NULL} or @code{nUnused} is @code{NULL} | |
2319 | + @code{BZ_SEQUENCE_ERROR} | |
2320 | + if @code{BZ_STREAM_END} has not been signalled | |
2321 | + or if @code{b} was opened with @code{BZ2_bzWriteOpen} | |
2322 | + @code{BZ_OK} | |
2323 | + otherwise | |
2324 | +@end display | |
2325 | + | |
2326 | +Allowable next actions: | |
2327 | +@display | |
2328 | + @code{BZ2_bzReadClose} | |
2329 | +@end display | |
2330 | + | |
2331 | + | |
2332 | +@subsection @code{BZ2_bzReadClose} | |
2333 | +@example | |
2334 | + void BZ2_bzReadClose ( int *bzerror, BZFILE *b ); | |
2335 | +@end example | |
2336 | +Releases all memory pertaining to the compressed file @code{b}. | |
2337 | +@code{BZ2_bzReadClose} does not call @code{fclose} on the underlying file | |
2338 | +handle, so you should do that yourself if appropriate. | |
2339 | +@code{BZ2_bzReadClose} should be called to clean up after all error | |
2340 | +situations. | |
2341 | + | |
2342 | +Possible assignments to @code{bzerror}: | |
2343 | +@display | |
2344 | + @code{BZ_SEQUENCE_ERROR} | |
2345 | + if @code{b} was opened with @code{BZ2_bzOpenWrite} | |
2346 | + @code{BZ_OK} | |
2347 | + otherwise | |
2348 | +@end display | |
2349 | + | |
2350 | +Allowable next actions: | |
2351 | +@display | |
2352 | + none | |
2353 | +@end display | |
2354 | + | |
2355 | + | |
2356 | + | |
2357 | +@subsection @code{BZ2_bzWriteOpen} | |
2358 | +@example | |
2359 | + BZFILE *BZ2_bzWriteOpen ( int *bzerror, FILE *f, | |
2360 | + int blockSize100k, int verbosity, | |
2361 | + int workFactor ); | |
2362 | +@end example | |
2363 | +Prepare to write compressed data to file handle @code{f}. | |
2364 | +@code{f} should refer to | |
2365 | +a file which has been opened for writing, and for which the error | |
2366 | +indicator (@code{ferror(f)})is not set. | |
2367 | + | |
2368 | +For the meaning of parameters @code{blockSize100k}, | |
2369 | +@code{verbosity} and @code{workFactor}, see | |
2370 | +@* @code{BZ2_bzCompressInit}. | |
2371 | + | |
2372 | +All required memory is allocated at this stage, so if the call | |
2373 | +completes successfully, @code{BZ_MEM_ERROR} cannot be signalled by a | |
2374 | +subsequent call to @code{BZ2_bzWrite}. | |
2375 | + | |
2376 | +Possible assignments to @code{bzerror}: | |
2377 | +@display | |
2378 | + @code{BZ_CONFIG_ERROR} | |
2379 | + if the library has been mis-compiled | |
2380 | + @code{BZ_PARAM_ERROR} | |
2381 | + if @code{f} is @code{NULL} | |
2382 | + or @code{blockSize100k < 1} or @code{blockSize100k > 9} | |
2383 | + @code{BZ_IO_ERROR} | |
2384 | + if @code{ferror(f)} is nonzero | |
2385 | + @code{BZ_MEM_ERROR} | |
2386 | + if insufficient memory is available | |
2387 | + @code{BZ_OK} | |
2388 | + otherwise | |
2389 | +@end display | |
2390 | + | |
2391 | +Possible return values: | |
2392 | +@display | |
2393 | + Pointer to an abstract @code{BZFILE} | |
2394 | + if @code{bzerror} is @code{BZ_OK} | |
2395 | + @code{NULL} | |
2396 | + otherwise | |
2397 | +@end display | |
2398 | + | |
2399 | +Allowable next actions: | |
2400 | +@display | |
2401 | + @code{BZ2_bzWrite} | |
2402 | + if @code{bzerror} is @code{BZ_OK} | |
2403 | + (you could go directly to @code{BZ2_bzWriteClose}, but this would be pretty pointless) | |
2404 | + @code{BZ2_bzWriteClose} | |
2405 | + otherwise | |
2406 | +@end display | |
2407 | + | |
2408 | + | |
2409 | + | |
2410 | +@subsection @code{BZ2_bzWrite} | |
2411 | +@example | |
2412 | + void BZ2_bzWrite ( int *bzerror, BZFILE *b, void *buf, int len ); | |
2413 | +@end example | |
2414 | +Absorbs @code{len} bytes from the buffer @code{buf}, eventually to be | |
2415 | +compressed and written to the file. | |
2416 | + | |
2417 | +Possible assignments to @code{bzerror}: | |
2418 | +@display | |
2419 | + @code{BZ_PARAM_ERROR} | |
2420 | + if @code{b} is @code{NULL} or @code{buf} is @code{NULL} or @code{len < 0} | |
2421 | + @code{BZ_SEQUENCE_ERROR} | |
2422 | + if b was opened with @code{BZ2_bzReadOpen} | |
2423 | + @code{BZ_IO_ERROR} | |
2424 | + if there is an error writing the compressed file. | |
2425 | + @code{BZ_OK} | |
2426 | + otherwise | |
2427 | +@end display | |
2428 | + | |
2429 | + | |
2430 | + | |
2431 | + | |
2432 | +@subsection @code{BZ2_bzWriteClose} | |
2433 | +@example | |
2434 | + void BZ2_bzWriteClose ( int *bzerror, BZFILE* f, | |
2435 | + int abandon, | |
2436 | + unsigned int* nbytes_in, | |
2437 | + unsigned int* nbytes_out ); | |
2438 | + | |
2439 | + void BZ2_bzWriteClose64 ( int *bzerror, BZFILE* f, | |
2440 | + int abandon, | |
2441 | + unsigned int* nbytes_in_lo32, | |
2442 | + unsigned int* nbytes_in_hi32, | |
2443 | + unsigned int* nbytes_out_lo32, | |
2444 | + unsigned int* nbytes_out_hi32 ); | |
2445 | +@end example | |
2446 | + | |
2447 | +Compresses and flushes to the compressed file all data so far supplied | |
2448 | +by @code{BZ2_bzWrite}. The logical end-of-stream markers are also written, so | |
2449 | +subsequent calls to @code{BZ2_bzWrite} are illegal. All memory associated | |
2450 | +with the compressed file @code{b} is released. | |
2451 | +@code{fflush} is called on the | |
2452 | +compressed file, but it is not @code{fclose}'d. | |
2453 | + | |
2454 | +If @code{BZ2_bzWriteClose} is called to clean up after an error, the only | |
2455 | +action is to release the memory. The library records the error codes | |
2456 | +issued by previous calls, so this situation will be detected | |
2457 | +automatically. There is no attempt to complete the compression | |
2458 | +operation, nor to @code{fflush} the compressed file. You can force this | |
2459 | +behaviour to happen even in the case of no error, by passing a nonzero | |
2460 | +value to @code{abandon}. | |
2461 | + | |
2462 | +If @code{nbytes_in} is non-null, @code{*nbytes_in} will be set to be the | |
2463 | +total volume of uncompressed data handled. Similarly, @code{nbytes_out} | |
2464 | +will be set to the total volume of compressed data written. For | |
2465 | +compatibility with older versions of the library, @code{BZ2_bzWriteClose} | |
2466 | +only yields the lower 32 bits of these counts. Use | |
2467 | +@code{BZ2_bzWriteClose64} if you want the full 64 bit counts. These | |
2468 | +two functions are otherwise absolutely identical. | |
2469 | + | |
2470 | + | |
2471 | +Possible assignments to @code{bzerror}: | |
2472 | +@display | |
2473 | + @code{BZ_SEQUENCE_ERROR} | |
2474 | + if @code{b} was opened with @code{BZ2_bzReadOpen} | |
2475 | + @code{BZ_IO_ERROR} | |
2476 | + if there is an error writing the compressed file | |
2477 | + @code{BZ_OK} | |
2478 | + otherwise | |
2479 | +@end display | |
2480 | + | |
2481 | +@subsection Handling embedded compressed data streams | |
2482 | + | |
2483 | +The high-level library facilitates use of | |
2484 | +@code{bzip2} data streams which form some part of a surrounding, larger | |
2485 | +data stream. | |
2486 | +@itemize @bullet | |
2487 | +@item For writing, the library takes an open file handle, writes | |
2488 | +compressed data to it, @code{fflush}es it but does not @code{fclose} it. | |
2489 | +The calling application can write its own data before and after the | |
2490 | +compressed data stream, using that same file handle. | |
2491 | +@item Reading is more complex, and the facilities are not as general | |
2492 | +as they could be since generality is hard to reconcile with efficiency. | |
2493 | +@code{BZ2_bzRead} reads from the compressed file in blocks of size | |
2494 | +@code{BZ_MAX_UNUSED} bytes, and in doing so probably will overshoot | |
2495 | +the logical end of compressed stream. | |
2496 | +To recover this data once decompression has | |
2497 | +ended, call @code{BZ2_bzReadGetUnused} after the last call of @code{BZ2_bzRead} | |
2498 | +(the one returning @code{BZ_STREAM_END}) but before calling | |
2499 | +@code{BZ2_bzReadClose}. | |
2500 | +@end itemize | |
2501 | + | |
2502 | +This mechanism makes it easy to decompress multiple @code{bzip2} | |
2503 | +streams placed end-to-end. As the end of one stream, when @code{BZ2_bzRead} | |
2504 | +returns @code{BZ_STREAM_END}, call @code{BZ2_bzReadGetUnused} to collect the | |
2505 | +unused data (copy it into your own buffer somewhere). | |
2506 | +That data forms the start of the next compressed stream. | |
2507 | +To start uncompressing that next stream, call @code{BZ2_bzReadOpen} again, | |
2508 | +feeding in the unused data via the @code{unused}/@code{nUnused} | |
2509 | +parameters. | |
2510 | +Keep doing this until @code{BZ_STREAM_END} return coincides with the | |
2511 | +physical end of file (@code{feof(f)}). In this situation | |
2512 | +@code{BZ2_bzReadGetUnused} | |
2513 | +will of course return no data. | |
2514 | + | |
2515 | +This should give some feel for how the high-level interface can be used. | |
2516 | +If you require extra flexibility, you'll have to bite the bullet and get | |
2517 | +to grips with the low-level interface. | |
2518 | + | |
2519 | +@subsection Standard file-reading/writing code | |
2520 | +Here's how you'd write data to a compressed file: | |
2521 | +@example @code | |
2522 | +FILE* f; | |
2523 | +BZFILE* b; | |
2524 | +int nBuf; | |
2525 | +char buf[ /* whatever size you like */ ]; | |
2526 | +int bzerror; | |
2527 | +int nWritten; | |
2528 | + | |
2529 | +f = fopen ( "myfile.bz2", "w" ); | |
2530 | +if (!f) @{ | |
2531 | + /* handle error */ | |
2532 | +@} | |
2533 | +b = BZ2_bzWriteOpen ( &bzerror, f, 9 ); | |
2534 | +if (bzerror != BZ_OK) @{ | |
2535 | + BZ2_bzWriteClose ( b ); | |
2536 | + /* handle error */ | |
2537 | +@} | |
2538 | + | |
2539 | +while ( /* condition */ ) @{ | |
2540 | + /* get data to write into buf, and set nBuf appropriately */ | |
2541 | + nWritten = BZ2_bzWrite ( &bzerror, b, buf, nBuf ); | |
2542 | + if (bzerror == BZ_IO_ERROR) @{ | |
2543 | + BZ2_bzWriteClose ( &bzerror, b ); | |
2544 | + /* handle error */ | |
2545 | + @} | |
2546 | +@} | |
2547 | + | |
2548 | +BZ2_bzWriteClose ( &bzerror, b ); | |
2549 | +if (bzerror == BZ_IO_ERROR) @{ | |
2550 | + /* handle error */ | |
2551 | +@} | |
2552 | +@end example | |
2553 | +And to read from a compressed file: | |
2554 | +@example | |
2555 | +FILE* f; | |
2556 | +BZFILE* b; | |
2557 | +int nBuf; | |
2558 | +char buf[ /* whatever size you like */ ]; | |
2559 | +int bzerror; | |
2560 | +int nWritten; | |
2561 | + | |
2562 | +f = fopen ( "myfile.bz2", "r" ); | |
2563 | +if (!f) @{ | |
2564 | + /* handle error */ | |
2565 | +@} | |
2566 | +b = BZ2_bzReadOpen ( &bzerror, f, 0, NULL, 0 ); | |
2567 | +if (bzerror != BZ_OK) @{ | |
2568 | + BZ2_bzReadClose ( &bzerror, b ); | |
2569 | + /* handle error */ | |
2570 | +@} | |
2571 | + | |
2572 | +bzerror = BZ_OK; | |
2573 | +while (bzerror == BZ_OK && /* arbitrary other conditions */) @{ | |
2574 | + nBuf = BZ2_bzRead ( &bzerror, b, buf, /* size of buf */ ); | |
2575 | + if (bzerror == BZ_OK) @{ | |
2576 | + /* do something with buf[0 .. nBuf-1] */ | |
2577 | + @} | |
2578 | +@} | |
2579 | +if (bzerror != BZ_STREAM_END) @{ | |
2580 | + BZ2_bzReadClose ( &bzerror, b ); | |
2581 | + /* handle error */ | |
2582 | +@} else @{ | |
2583 | + BZ2_bzReadClose ( &bzerror ); | |
2584 | +@} | |
2585 | +@end example | |
2586 | + | |
2587 | + | |
2588 | + | |
2589 | +@section Utility functions | |
2590 | +@subsection @code{BZ2_bzBuffToBuffCompress} | |
2591 | +@example | |
2592 | + int BZ2_bzBuffToBuffCompress( char* dest, | |
2593 | + unsigned int* destLen, | |
2594 | + char* source, | |
2595 | + unsigned int sourceLen, | |
2596 | + int blockSize100k, | |
2597 | + int verbosity, | |
2598 | + int workFactor ); | |
2599 | +@end example | |
2600 | +Attempts to compress the data in @code{source[0 .. sourceLen-1]} | |
2601 | +into the destination buffer, @code{dest[0 .. *destLen-1]}. | |
2602 | +If the destination buffer is big enough, @code{*destLen} is | |
2603 | +set to the size of the compressed data, and @code{BZ_OK} is | |
2604 | +returned. If the compressed data won't fit, @code{*destLen} | |
2605 | +is unchanged, and @code{BZ_OUTBUFF_FULL} is returned. | |
2606 | + | |
2607 | +Compression in this manner is a one-shot event, done with a single call | |
2608 | +to this function. The resulting compressed data is a complete | |
2609 | +@code{bzip2} format data stream. There is no mechanism for making | |
2610 | +additional calls to provide extra input data. If you want that kind of | |
2611 | +mechanism, use the low-level interface. | |
2612 | + | |
2613 | +For the meaning of parameters @code{blockSize100k}, @code{verbosity} | |
2614 | +and @code{workFactor}, @* see @code{BZ2_bzCompressInit}. | |
2615 | + | |
2616 | +To guarantee that the compressed data will fit in its buffer, allocate | |
2617 | +an output buffer of size 1% larger than the uncompressed data, plus | |
2618 | +six hundred extra bytes. | |
2619 | + | |
2620 | +@code{BZ2_bzBuffToBuffDecompress} will not write data at or | |
2621 | +beyond @code{dest[*destLen]}, even in case of buffer overflow. | |
2622 | + | |
2623 | +Possible return values: | |
2624 | +@display | |
2625 | + @code{BZ_CONFIG_ERROR} | |
2626 | + if the library has been mis-compiled | |
2627 | + @code{BZ_PARAM_ERROR} | |
2628 | + if @code{dest} is @code{NULL} or @code{destLen} is @code{NULL} | |
2629 | + or @code{blockSize100k < 1} or @code{blockSize100k > 9} | |
2630 | + or @code{verbosity < 0} or @code{verbosity > 4} | |
2631 | + or @code{workFactor < 0} or @code{workFactor > 250} | |
2632 | + @code{BZ_MEM_ERROR} | |
2633 | + if insufficient memory is available | |
2634 | + @code{BZ_OUTBUFF_FULL} | |
2635 | + if the size of the compressed data exceeds @code{*destLen} | |
2636 | + @code{BZ_OK} | |
2637 | + otherwise | |
2638 | +@end display | |
2639 | + | |
2640 | + | |
2641 | + | |
2642 | +@subsection @code{BZ2_bzBuffToBuffDecompress} | |
2643 | +@example | |
2644 | + int BZ2_bzBuffToBuffDecompress ( char* dest, | |
2645 | + unsigned int* destLen, | |
2646 | + char* source, | |
2647 | + unsigned int sourceLen, | |
2648 | + int small, | |
2649 | + int verbosity ); | |
2650 | +@end example | |
2651 | +Attempts to decompress the data in @code{source[0 .. sourceLen-1]} | |
2652 | +into the destination buffer, @code{dest[0 .. *destLen-1]}. | |
2653 | +If the destination buffer is big enough, @code{*destLen} is | |
2654 | +set to the size of the uncompressed data, and @code{BZ_OK} is | |
2655 | +returned. If the compressed data won't fit, @code{*destLen} | |
2656 | +is unchanged, and @code{BZ_OUTBUFF_FULL} is returned. | |
2657 | + | |
2658 | +@code{source} is assumed to hold a complete @code{bzip2} format | |
2659 | +data stream. @* @code{BZ2_bzBuffToBuffDecompress} tries to decompress | |
2660 | +the entirety of the stream into the output buffer. | |
2661 | + | |
2662 | +For the meaning of parameters @code{small} and @code{verbosity}, | |
2663 | +see @code{BZ2_bzDecompressInit}. | |
2664 | + | |
2665 | +Because the compression ratio of the compressed data cannot be known in | |
2666 | +advance, there is no easy way to guarantee that the output buffer will | |
2667 | +be big enough. You may of course make arrangements in your code to | |
2668 | +record the size of the uncompressed data, but such a mechanism is beyond | |
2669 | +the scope of this library. | |
2670 | + | |
2671 | +@code{BZ2_bzBuffToBuffDecompress} will not write data at or | |
2672 | +beyond @code{dest[*destLen]}, even in case of buffer overflow. | |
2673 | + | |
2674 | +Possible return values: | |
2675 | +@display | |
2676 | + @code{BZ_CONFIG_ERROR} | |
2677 | + if the library has been mis-compiled | |
2678 | + @code{BZ_PARAM_ERROR} | |
2679 | + if @code{dest} is @code{NULL} or @code{destLen} is @code{NULL} | |
2680 | + or @code{small != 0 && small != 1} | |
2681 | + or @code{verbosity < 0} or @code{verbosity > 4} | |
2682 | + @code{BZ_MEM_ERROR} | |
2683 | + if insufficient memory is available | |
2684 | + @code{BZ_OUTBUFF_FULL} | |
2685 | + if the size of the compressed data exceeds @code{*destLen} | |
2686 | + @code{BZ_DATA_ERROR} | |
2687 | + if a data integrity error was detected in the compressed data | |
2688 | + @code{BZ_DATA_ERROR_MAGIC} | |
2689 | + if the compressed data doesn't begin with the right magic bytes | |
2690 | + @code{BZ_UNEXPECTED_EOF} | |
2691 | + if the compressed data ends unexpectedly | |
2692 | + @code{BZ_OK} | |
2693 | + otherwise | |
2694 | +@end display | |
2695 | + | |
2696 | + | |
2697 | + | |
2698 | +@section @code{zlib} compatibility functions | |
2699 | +Yoshioka Tsuneo has contributed some functions to | |
2700 | +give better @code{zlib} compatibility. These functions are | |
2701 | +@code{BZ2_bzopen}, @code{BZ2_bzread}, @code{BZ2_bzwrite}, @code{BZ2_bzflush}, | |
2702 | +@code{BZ2_bzclose}, | |
2703 | +@code{BZ2_bzerror} and @code{BZ2_bzlibVersion}. | |
2704 | +These functions are not (yet) officially part of | |
2705 | +the library. If they break, you get to keep all the pieces. | |
2706 | +Nevertheless, I think they work ok. | |
2707 | +@example | |
2708 | +typedef void BZFILE; | |
2709 | + | |
2710 | +const char * BZ2_bzlibVersion ( void ); | |
2711 | +@end example | |
2712 | +Returns a string indicating the library version. | |
2713 | +@example | |
2714 | +BZFILE * BZ2_bzopen ( const char *path, const char *mode ); | |
2715 | +BZFILE * BZ2_bzdopen ( int fd, const char *mode ); | |
2716 | +@end example | |
2717 | +Opens a @code{.bz2} file for reading or writing, using either its name | |
2718 | +or a pre-existing file descriptor. | |
2719 | +Analogous to @code{fopen} and @code{fdopen}. | |
2720 | +@example | |
2721 | +int BZ2_bzread ( BZFILE* b, void* buf, int len ); | |
2722 | +int BZ2_bzwrite ( BZFILE* b, void* buf, int len ); | |
2723 | +@end example | |
2724 | +Reads/writes data from/to a previously opened @code{BZFILE}. | |
2725 | +Analogous to @code{fread} and @code{fwrite}. | |
2726 | +@example | |
2727 | +int BZ2_bzflush ( BZFILE* b ); | |
2728 | +void BZ2_bzclose ( BZFILE* b ); | |
2729 | +@end example | |
2730 | +Flushes/closes a @code{BZFILE}. @code{BZ2_bzflush} doesn't actually do | |
2731 | +anything. Analogous to @code{fflush} and @code{fclose}. | |
2732 | + | |
2733 | +@example | |
2734 | +const char * BZ2_bzerror ( BZFILE *b, int *errnum ) | |
2735 | +@end example | |
2736 | +Returns a string describing the more recent error status of | |
2737 | +@code{b}, and also sets @code{*errnum} to its numerical value. | |
2738 | + | |
2739 | + | |
2740 | +@section Using the library in a @code{stdio}-free environment | |
2741 | + | |
2742 | +@subsection Getting rid of @code{stdio} | |
2743 | + | |
2744 | +In a deeply embedded application, you might want to use just | |
2745 | +the memory-to-memory functions. You can do this conveniently | |
2746 | +by compiling the library with preprocessor symbol @code{BZ_NO_STDIO} | |
2747 | +defined. Doing this gives you a library containing only the following | |
2748 | +eight functions: | |
2749 | + | |
2750 | +@code{BZ2_bzCompressInit}, @code{BZ2_bzCompress}, @code{BZ2_bzCompressEnd} @* | |
2751 | +@code{BZ2_bzDecompressInit}, @code{BZ2_bzDecompress}, @code{BZ2_bzDecompressEnd} @* | |
2752 | +@code{BZ2_bzBuffToBuffCompress}, @code{BZ2_bzBuffToBuffDecompress} | |
2753 | + | |
2754 | +When compiled like this, all functions will ignore @code{verbosity} | |
2755 | +settings. | |
2756 | + | |
2757 | +@subsection Critical error handling | |
2758 | +@code{libbzip2} contains a number of internal assertion checks which | |
2759 | +should, needless to say, never be activated. Nevertheless, if an | |
2760 | +assertion should fail, behaviour depends on whether or not the library | |
2761 | +was compiled with @code{BZ_NO_STDIO} set. | |
2762 | + | |
2763 | +For a normal compile, an assertion failure yields the message | |
2764 | +@example | |
2765 | + bzip2/libbzip2: internal error number N. | |
15ee0650 | 2766 | + This is a bug in bzip2/libbzip2, 1.0.2, 30-Dec-2001. |
d967e3ec | 2767 | + Please report it to me at: jseward@@acm.org. If this happened |
2768 | + when you were using some program which uses libbzip2 as a | |
2769 | + component, you should also report this bug to the author(s) | |
2770 | + of that program. Please make an effort to report this bug; | |
2771 | + timely and accurate bug reports eventually lead to higher | |
15ee0650 | 2772 | + quality software. Thanks. Julian Seward, 30 December 2001. |
d967e3ec | 2773 | +@end example |
15ee0650 | 2774 | +where @code{N} is some error code number. If @code{N == 1007}, it also |
2775 | +prints some extra text advising the reader that unreliable memory is | |
2776 | +often associated with internal error 1007. (This is a | |
2777 | +frequently-observed-phenomenon with versions 1.0.0/1.0.1). | |
2778 | + | |
2779 | +@code{exit(3)} is then called. | |
d967e3ec | 2780 | + |
2781 | +For a @code{stdio}-free library, assertion failures result | |
2782 | +in a call to a function declared as: | |
2783 | +@example | |
2784 | + extern void bz_internal_error ( int errcode ); | |
2785 | +@end example | |
2786 | +The relevant code is passed as a parameter. You should supply | |
2787 | +such a function. | |
2788 | + | |
2789 | +In either case, once an assertion failure has occurred, any | |
2790 | +@code{bz_stream} records involved can be regarded as invalid. | |
2791 | +You should not attempt to resume normal operation with them. | |
2792 | + | |
2793 | +You may, of course, change critical error handling to suit | |
2794 | +your needs. As I said above, critical errors indicate bugs | |
2795 | +in the library and should not occur. All "normal" error | |
2796 | +situations are indicated via error return codes from functions, | |
2797 | +and can be recovered from. | |
2798 | + | |
2799 | + | |
2800 | +@section Making a Windows DLL | |
2801 | +Everything related to Windows has been contributed by Yoshioka Tsuneo | |
2802 | +@* (@code{QWF00133@@niftyserve.or.jp} / | |
2803 | +@code{tsuneo-y@@is.aist-nara.ac.jp}), so you should send your queries to | |
2804 | +him (but perhaps Cc: me, @code{jseward@@acm.org}). | |
2805 | + | |
2806 | +My vague understanding of what to do is: using Visual C++ 5.0, | |
2807 | +open the project file @code{libbz2.dsp}, and build. That's all. | |
2808 | + | |
2809 | +If you can't | |
2810 | +open the project file for some reason, make a new one, naming these files: | |
2811 | +@code{blocksort.c}, @code{bzlib.c}, @code{compress.c}, | |
2812 | +@code{crctable.c}, @code{decompress.c}, @code{huffman.c}, @* | |
2813 | +@code{randtable.c} and @code{libbz2.def}. You will also need | |
2814 | +to name the header files @code{bzlib.h} and @code{bzlib_private.h}. | |
2815 | + | |
2816 | +If you don't use VC++, you may need to define the proprocessor symbol | |
2817 | +@code{_WIN32}. | |
2818 | + | |
2819 | +Finally, @code{dlltest.c} is a sample program using the DLL. It has a | |
2820 | +project file, @code{dlltest.dsp}. | |
2821 | + | |
2822 | +If you just want a makefile for Visual C, have a look at | |
2823 | +@code{makefile.msc}. | |
2824 | + | |
2825 | +Be aware that if you compile @code{bzip2} itself on Win32, you must set | |
2826 | +@code{BZ_UNIX} to 0 and @code{BZ_LCCWIN32} to 1, in the file | |
2827 | +@code{bzip2.c}, before compiling. Otherwise the resulting binary won't | |
2828 | +work correctly. | |
2829 | + | |
2830 | +I haven't tried any of this stuff myself, but it all looks plausible. | |
2831 | + | |
2832 | + | |
2833 | + | |
2834 | +@chapter Miscellanea | |
2835 | + | |
2836 | +These are just some random thoughts of mine. Your mileage may | |
2837 | +vary. | |
2838 | + | |
2839 | +@section Limitations of the compressed file format | |
2840 | +@code{bzip2-1.0}, @code{0.9.5} and @code{0.9.0} | |
2841 | +use exactly the same file format as the previous | |
2842 | +version, @code{bzip2-0.1}. This decision was made in the interests of | |
2843 | +stability. Creating yet another incompatible compressed file format | |
2844 | +would create further confusion and disruption for users. | |
2845 | + | |
2846 | +Nevertheless, this is not a painless decision. Development | |
2847 | +work since the release of @code{bzip2-0.1} in August 1997 | |
2848 | +has shown complexities in the file format which slow down | |
2849 | +decompression and, in retrospect, are unnecessary. These are: | |
2850 | +@itemize @bullet | |
2851 | +@item The run-length encoder, which is the first of the | |
2852 | + compression transformations, is entirely irrelevant. | |
2853 | + The original purpose was to protect the sorting algorithm | |
2854 | + from the very worst case input: a string of repeated | |
2855 | + symbols. But algorithm steps Q6a and Q6b in the original | |
2856 | + Burrows-Wheeler technical report (SRC-124) show how | |
2857 | + repeats can be handled without difficulty in block | |
2858 | + sorting. | |
2859 | +@item The randomisation mechanism doesn't really need to be | |
2860 | + there. Udi Manber and Gene Myers published a suffix | |
2861 | + array construction algorithm a few years back, which | |
2862 | + can be employed to sort any block, no matter how | |
2863 | + repetitive, in O(N log N) time. Subsequent work by | |
2864 | + Kunihiko Sadakane has produced a derivative O(N (log N)^2) | |
2865 | + algorithm which usually outperforms the Manber-Myers | |
2866 | + algorithm. | |
2867 | + | |
2868 | + I could have changed to Sadakane's algorithm, but I find | |
2869 | + it to be slower than @code{bzip2}'s existing algorithm for | |
2870 | + most inputs, and the randomisation mechanism protects | |
2871 | + adequately against bad cases. I didn't think it was | |
2872 | + a good tradeoff to make. Partly this is due to the fact | |
2873 | + that I was not flooded with email complaints about | |
2874 | + @code{bzip2-0.1}'s performance on repetitive data, so | |
2875 | + perhaps it isn't a problem for real inputs. | |
2876 | + | |
2877 | + Probably the best long-term solution, | |
2878 | + and the one I have incorporated into 0.9.5 and above, | |
2879 | + is to use the existing sorting | |
2880 | + algorithm initially, and fall back to a O(N (log N)^2) | |
2881 | + algorithm if the standard algorithm gets into difficulties. | |
2882 | +@item The compressed file format was never designed to be | |
2883 | + handled by a library, and I have had to jump though | |
2884 | + some hoops to produce an efficient implementation of | |
2885 | + decompression. It's a bit hairy. Try passing | |
2886 | + @code{decompress.c} through the C preprocessor | |
2887 | + and you'll see what I mean. Much of this complexity | |
2888 | + could have been avoided if the compressed size of | |
2889 | + each block of data was recorded in the data stream. | |
2890 | +@item An Adler-32 checksum, rather than a CRC32 checksum, | |
2891 | + would be faster to compute. | |
2892 | +@end itemize | |
2893 | +It would be fair to say that the @code{bzip2} format was frozen | |
2894 | +before I properly and fully understood the performance | |
2895 | +consequences of doing so. | |
2896 | + | |
2897 | +Improvements which I was able to incorporate into | |
2898 | +0.9.0, despite using the same file format, are: | |
2899 | +@itemize @bullet | |
2900 | +@item Single array implementation of the inverse BWT. This | |
2901 | + significantly speeds up decompression, presumably | |
2902 | + because it reduces the number of cache misses. | |
2903 | +@item Faster inverse MTF transform for large MTF values. The | |
2904 | + new implementation is based on the notion of sliding blocks | |
2905 | + of values. | |
2906 | +@item @code{bzip2-0.9.0} now reads and writes files with @code{fread} | |
2907 | + and @code{fwrite}; version 0.1 used @code{putc} and @code{getc}. | |
2908 | + Duh! Well, you live and learn. | |
2909 | + | |
2910 | +@end itemize | |
2911 | +Further ahead, it would be nice | |
2912 | +to be able to do random access into files. This will | |
2913 | +require some careful design of compressed file formats. | |
2914 | + | |
2915 | + | |
2916 | + | |
2917 | +@section Portability issues | |
2918 | +After some consideration, I have decided not to use | |
2919 | +GNU @code{autoconf} to configure 0.9.5 or 1.0. | |
2920 | + | |
2921 | +@code{autoconf}, admirable and wonderful though it is, | |
2922 | +mainly assists with portability problems between Unix-like | |
2923 | +platforms. But @code{bzip2} doesn't have much in the way | |
2924 | +of portability problems on Unix; most of the difficulties appear | |
2925 | +when porting to the Mac, or to Microsoft's operating systems. | |
2926 | +@code{autoconf} doesn't help in those cases, and brings in a | |
2927 | +whole load of new complexity. | |
2928 | + | |
2929 | +Most people should be able to compile the library and program | |
2930 | +under Unix straight out-of-the-box, so to speak, especially | |
2931 | +if you have a version of GNU C available. | |
2932 | + | |
2933 | +There are a couple of @code{__inline__} directives in the code. GNU C | |
2934 | +(@code{gcc}) should be able to handle them. If you're not using | |
2935 | +GNU C, your C compiler shouldn't see them at all. | |
2936 | +If your compiler does, for some reason, see them and doesn't | |
2937 | +like them, just @code{#define} @code{__inline__} to be @code{/* */}. One | |
2938 | +easy way to do this is to compile with the flag @code{-D__inline__=}, | |
2939 | +which should be understood by most Unix compilers. | |
2940 | + | |
2941 | +If you still have difficulties, try compiling with the macro | |
2942 | +@code{BZ_STRICT_ANSI} defined. This should enable you to build the | |
2943 | +library in a strictly ANSI compliant environment. Building the program | |
2944 | +itself like this is dangerous and not supported, since you remove | |
2945 | +@code{bzip2}'s checks against compressing directories, symbolic links, | |
2946 | +devices, and other not-really-a-file entities. This could cause | |
2947 | +filesystem corruption! | |
2948 | + | |
2949 | +One other thing: if you create a @code{bzip2} binary for public | |
2950 | +distribution, please try and link it statically (@code{gcc -s}). This | |
2951 | +avoids all sorts of library-version issues that others may encounter | |
2952 | +later on. | |
2953 | + | |
2954 | +If you build @code{bzip2} on Win32, you must set @code{BZ_UNIX} to 0 and | |
2955 | +@code{BZ_LCCWIN32} to 1, in the file @code{bzip2.c}, before compiling. | |
2956 | +Otherwise the resulting binary won't work correctly. | |
2957 | + | |
2958 | + | |
2959 | + | |
2960 | +@section Reporting bugs | |
2961 | +I tried pretty hard to make sure @code{bzip2} is | |
2962 | +bug free, both by design and by testing. Hopefully | |
2963 | +you'll never need to read this section for real. | |
2964 | + | |
2965 | +Nevertheless, if @code{bzip2} dies with a segmentation | |
2966 | +fault, a bus error or an internal assertion failure, it | |
2967 | +will ask you to email me a bug report. Experience with | |
2968 | +version 0.1 shows that almost all these problems can | |
2969 | +be traced to either compiler bugs or hardware problems. | |
2970 | +@itemize @bullet | |
2971 | +@item | |
2972 | +Recompile the program with no optimisation, and see if it | |
2973 | +works. And/or try a different compiler. | |
2974 | +I heard all sorts of stories about various flavours | |
2975 | +of GNU C (and other compilers) generating bad code for | |
2976 | +@code{bzip2}, and I've run across two such examples myself. | |
2977 | + | |
2978 | +2.7.X versions of GNU C are known to generate bad code from | |
2979 | +time to time, at high optimisation levels. | |
2980 | +If you get problems, try using the flags | |
2981 | +@code{-O2} @code{-fomit-frame-pointer} @code{-fno-strength-reduce}. | |
2982 | +You should specifically @emph{not} use @code{-funroll-loops}. | |
2983 | + | |
2984 | +You may notice that the Makefile runs six tests as part of | |
2985 | +the build process. If the program passes all of these, it's | |
2986 | +a pretty good (but not 100%) indication that the compiler has | |
2987 | +done its job correctly. | |
2988 | +@item | |
2989 | +If @code{bzip2} crashes randomly, and the crashes are not | |
2990 | +repeatable, you may have a flaky memory subsystem. @code{bzip2} | |
2991 | +really hammers your memory hierarchy, and if it's a bit marginal, | |
2992 | +you may get these problems. Ditto if your disk or I/O subsystem | |
2993 | +is slowly failing. Yup, this really does happen. | |
2994 | + | |
2995 | +Try using a different machine of the same type, and see if | |
2996 | +you can repeat the problem. | |
2997 | +@item This isn't really a bug, but ... If @code{bzip2} tells | |
2998 | +you your file is corrupted on decompression, and you | |
2999 | +obtained the file via FTP, there is a possibility that you | |
3000 | +forgot to tell FTP to do a binary mode transfer. That absolutely | |
3001 | +will cause the file to be non-decompressible. You'll have to transfer | |
3002 | +it again. | |
3003 | +@end itemize | |
3004 | + | |
3005 | +If you've incorporated @code{libbzip2} into your own program | |
3006 | +and are getting problems, please, please, please, check that the | |
3007 | +parameters you are passing in calls to the library, are | |
3008 | +correct, and in accordance with what the documentation says | |
3009 | +is allowable. I have tried to make the library robust against | |
3010 | +such problems, but I'm sure I haven't succeeded. | |
3011 | + | |
3012 | +Finally, if the above comments don't help, you'll have to send | |
3013 | +me a bug report. Now, it's just amazing how many people will | |
3014 | +send me a bug report saying something like | |
3015 | +@display | |
3016 | + bzip2 crashed with segmentation fault on my machine | |
3017 | +@end display | |
3018 | +and absolutely nothing else. Needless to say, a such a report | |
3019 | +is @emph{totally, utterly, completely and comprehensively 100% useless; | |
3020 | +a waste of your time, my time, and net bandwidth}. | |
3021 | +With no details at all, there's no way I can possibly begin | |
3022 | +to figure out what the problem is. | |
3023 | + | |
3024 | +The rules of the game are: facts, facts, facts. Don't omit | |
3025 | +them because "oh, they won't be relevant". At the bare | |
3026 | +minimum: | |
3027 | +@display | |
3028 | + Machine type. Operating system version. | |
3029 | + Exact version of @code{bzip2} (do @code{bzip2 -V}). | |
3030 | + Exact version of the compiler used. | |
3031 | + Flags passed to the compiler. | |
3032 | +@end display | |
3033 | +However, the most important single thing that will help me is | |
3034 | +the file that you were trying to compress or decompress at the | |
3035 | +time the problem happened. Without that, my ability to do anything | |
3036 | +more than speculate about the cause, is limited. | |
3037 | + | |
3038 | +Please remember that I connect to the Internet with a modem, so | |
3039 | +you should contact me before mailing me huge files. | |
3040 | + | |
3041 | + | |
3042 | +@section Did you get the right package? | |
3043 | + | |
3044 | +@code{bzip2} is a resource hog. It soaks up large amounts of CPU cycles | |
3045 | +and memory. Also, it gives very large latencies. In the worst case, you | |
3046 | +can feed many megabytes of uncompressed data into the library before | |
3047 | +getting any compressed output, so this probably rules out applications | |
3048 | +requiring interactive behaviour. | |
3049 | + | |
3050 | +These aren't faults of my implementation, I hope, but more | |
3051 | +an intrinsic property of the Burrows-Wheeler transform (unfortunately). | |
3052 | +Maybe this isn't what you want. | |
3053 | + | |
3054 | +If you want a compressor and/or library which is faster, uses less | |
3055 | +memory but gets pretty good compression, and has minimal latency, | |
3056 | +consider Jean-loup | |
15ee0650 | 3057 | +Gailly's and Mark Adler's work, @code{zlib-1.1.3} and |
d967e3ec | 3058 | +@code{gzip-1.2.4}. Look for them at |
3059 | + | |
15ee0650 | 3060 | +@code{http://www.zlib.org} and |
d967e3ec | 3061 | +@code{http://www.gzip.org} respectively. |
3062 | + | |
3063 | +For something faster and lighter still, you might try Markus F X J | |
3064 | +Oberhumer's @code{LZO} real-time compression/decompression library, at | |
3065 | +@* @code{http://wildsau.idv.uni-linz.ac.at/mfx/lzo.html}. | |
3066 | + | |
3067 | +If you want to use the @code{bzip2} algorithms to compress small blocks | |
3068 | +of data, 64k bytes or smaller, for example on an on-the-fly disk | |
3069 | +compressor, you'd be well advised not to use this library. Instead, | |
3070 | +I've made a special library tuned for that kind of use. It's part of | |
3071 | +@code{e2compr-0.40}, an on-the-fly disk compressor for the Linux | |
3072 | +@code{ext2} filesystem. Look at | |
3073 | +@code{http://www.netspace.net.au/~reiter/e2compr}. | |
3074 | + | |
3075 | + | |
3076 | + | |
3077 | +@section Testing | |
3078 | + | |
3079 | +A record of the tests I've done. | |
3080 | + | |
3081 | +First, some data sets: | |
3082 | +@itemize @bullet | |
3083 | +@item B: a directory containing 6001 files, one for every length in the | |
3084 | + range 0 to 6000 bytes. The files contain random lowercase | |
3085 | + letters. 18.7 megabytes. | |
3086 | +@item H: my home directory tree. Documents, source code, mail files, | |
3087 | + compressed data. H contains B, and also a directory of | |
3088 | + files designed as boundary cases for the sorting; mostly very | |
3089 | + repetitive, nasty files. 565 megabytes. | |
3090 | +@item A: directory tree holding various applications built from source: | |
3091 | + @code{egcs}, @code{gcc-2.8.1}, KDE, GTK, Octave, etc. | |
3092 | + 2200 megabytes. | |
3093 | +@end itemize | |
3094 | +The tests conducted are as follows. Each test means compressing | |
3095 | +(a copy of) each file in the data set, decompressing it and | |
3096 | +comparing it against the original. | |
3097 | + | |
3098 | +First, a bunch of tests with block sizes and internal buffer | |
3099 | +sizes set very small, | |
3100 | +to detect any problems with the | |
3101 | +blocking and buffering mechanisms. | |
3102 | +This required modifying the source code so as to try to | |
3103 | +break it. | |
3104 | +@enumerate | |
3105 | +@item Data set H, with | |
3106 | + buffer size of 1 byte, and block size of 23 bytes. | |
3107 | +@item Data set B, buffer sizes 1 byte, block size 1 byte. | |
3108 | +@item As (2) but small-mode decompression. | |
3109 | +@item As (2) with block size 2 bytes. | |
3110 | +@item As (2) with block size 3 bytes. | |
3111 | +@item As (2) with block size 4 bytes. | |
3112 | +@item As (2) with block size 5 bytes. | |
3113 | +@item As (2) with block size 6 bytes and small-mode decompression. | |
3114 | +@item H with buffer size of 1 byte, but normal block | |
3115 | + size (up to 900000 bytes). | |
3116 | +@end enumerate | |
3117 | +Then some tests with unmodified source code. | |
3118 | +@enumerate | |
3119 | +@item H, all settings normal. | |
3120 | +@item As (1), with small-mode decompress. | |
3121 | +@item H, compress with flag @code{-1}. | |
3122 | +@item H, compress with flag @code{-s}, decompress with flag @code{-s}. | |
3123 | +@item Forwards compatibility: H, @code{bzip2-0.1pl2} compressing, | |
3124 | + @code{bzip2-0.9.5} decompressing, all settings normal. | |
3125 | +@item Backwards compatibility: H, @code{bzip2-0.9.5} compressing, | |
3126 | + @code{bzip2-0.1pl2} decompressing, all settings normal. | |
3127 | +@item Bigger tests: A, all settings normal. | |
3128 | +@item As (7), using the fallback (Sadakane-like) sorting algorithm. | |
3129 | +@item As (8), compress with flag @code{-1}, decompress with flag | |
3130 | + @code{-s}. | |
3131 | +@item H, using the fallback sorting algorithm. | |
3132 | +@item Forwards compatibility: A, @code{bzip2-0.1pl2} compressing, | |
3133 | + @code{bzip2-0.9.5} decompressing, all settings normal. | |
3134 | +@item Backwards compatibility: A, @code{bzip2-0.9.5} compressing, | |
3135 | + @code{bzip2-0.1pl2} decompressing, all settings normal. | |
3136 | +@item Misc test: about 400 megabytes of @code{.tar} files with | |
3137 | + @code{bzip2} compiled with Checker (a memory access error | |
3138 | + detector, like Purify). | |
3139 | +@item Misc tests to make sure it builds and runs ok on non-Linux/x86 | |
3140 | + platforms. | |
3141 | +@end enumerate | |
3142 | +These tests were conducted on a 225 MHz IDT WinChip machine, running | |
3143 | +Linux 2.0.36. They represent nearly a week of continuous computation. | |
3144 | +All tests completed successfully. | |
3145 | + | |
3146 | + | |
3147 | +@section Further reading | |
3148 | +@code{bzip2} is not research work, in the sense that it doesn't present | |
3149 | +any new ideas. Rather, it's an engineering exercise based on existing | |
3150 | +ideas. | |
3151 | + | |
3152 | +Four documents describe essentially all the ideas behind @code{bzip2}: | |
3153 | +@example | |
3154 | +Michael Burrows and D. J. Wheeler: | |
3155 | + "A block-sorting lossless data compression algorithm" | |
3156 | + 10th May 1994. | |
3157 | + Digital SRC Research Report 124. | |
3158 | + ftp://ftp.digital.com/pub/DEC/SRC/research-reports/SRC-124.ps.gz | |
3159 | + If you have trouble finding it, try searching at the | |
3160 | + New Zealand Digital Library, http://www.nzdl.org. | |
3161 | + | |
3162 | +Daniel S. Hirschberg and Debra A. LeLewer | |
3163 | + "Efficient Decoding of Prefix Codes" | |
3164 | + Communications of the ACM, April 1990, Vol 33, Number 4. | |
3165 | + You might be able to get an electronic copy of this | |
3166 | + from the ACM Digital Library. | |
3167 | + | |
3168 | +David J. Wheeler | |
3169 | + Program bred3.c and accompanying document bred3.ps. | |
3170 | + This contains the idea behind the multi-table Huffman | |
3171 | + coding scheme. | |
3172 | + ftp://ftp.cl.cam.ac.uk/users/djw3/ | |
3173 | + | |
3174 | +Jon L. Bentley and Robert Sedgewick | |
3175 | + "Fast Algorithms for Sorting and Searching Strings" | |
3176 | + Available from Sedgewick's web page, | |
3177 | + www.cs.princeton.edu/~rs | |
3178 | +@end example | |
3179 | +The following paper gives valuable additional insights into the | |
3180 | +algorithm, but is not immediately the basis of any code | |
3181 | +used in bzip2. | |
3182 | +@example | |
3183 | +Peter Fenwick: | |
3184 | + Block Sorting Text Compression | |
3185 | + Proceedings of the 19th Australasian Computer Science Conference, | |
3186 | + Melbourne, Australia. Jan 31 - Feb 2, 1996. | |
3187 | + ftp://ftp.cs.auckland.ac.nz/pub/peter-f/ACSC96paper.ps | |
3188 | +@end example | |
3189 | +Kunihiko Sadakane's sorting algorithm, mentioned above, | |
3190 | +is available from: | |
3191 | +@example | |
3192 | +http://naomi.is.s.u-tokyo.ac.jp/~sada/papers/Sada98b.ps.gz | |
3193 | +@end example | |
3194 | +The Manber-Myers suffix array construction | |
3195 | +algorithm is described in a paper | |
3196 | +available from: | |
3197 | +@example | |
3198 | +http://www.cs.arizona.edu/people/gene/PAPERS/suffix.ps | |
3199 | +@end example | |
3200 | +Finally, the following paper documents some recent investigations | |
3201 | +I made into the performance of sorting algorithms: | |
3202 | +@example | |
3203 | +Julian Seward: | |
3204 | + On the Performance of BWT Sorting Algorithms | |
3205 | + Proceedings of the IEEE Data Compression Conference 2000 | |
3206 | + Snowbird, Utah. 28-30 March 2000. | |
3207 | +@end example | |
3208 | + | |
3209 | + | |
3210 | +@contents | |
3211 | + | |
3212 | +@bye | |
3213 | + | |
15ee0650 | 3214 | diff -Nru bzip2-1.0.2/doc/bzip2recover.1 bzip2-1.0.2.new/doc/bzip2recover.1 |
3215 | --- bzip2-1.0.2/doc/bzip2recover.1 Thu Jan 1 01:00:00 1970 | |
3216 | +++ bzip2-1.0.2.new/doc/bzip2recover.1 Fri Feb 1 04:19:11 2002 | |
d967e3ec | 3217 | @@ -0,0 +1 @@ |
3218 | +.so bzip2.1 | |
3219 | \ No newline at end of file | |
15ee0650 | 3220 | diff -Nru bzip2-1.0.2/doc/bzmore.1 bzip2-1.0.2.new/doc/bzmore.1 |
3221 | --- bzip2-1.0.2/doc/bzmore.1 Thu Jan 1 01:00:00 1970 | |
3222 | +++ bzip2-1.0.2.new/doc/bzmore.1 Sun Dec 30 03:12:35 2001 | |
3223 | @@ -0,0 +1,152 @@ | |
3224 | +.\"Shamelessly copied from zmore.1 by Philippe Troin <phil@fifi.org> | |
3225 | +.\"for Debian GNU/Linux | |
3226 | +.TH BZMORE 1 | |
3227 | +.SH NAME | |
3228 | +bzmore, bzless \- file perusal filter for crt viewing of bzip2 compressed text | |
3229 | +.SH SYNOPSIS | |
3230 | +.B bzmore | |
3231 | +[ name ... ] | |
3232 | +.br | |
3233 | +.B bzless | |
3234 | +[ name ... ] | |
3235 | +.SH NOTE | |
3236 | +In the following description, | |
3237 | +.I bzless | |
3238 | +and | |
3239 | +.I less | |
3240 | +can be used interchangeably with | |
3241 | +.I bzmore | |
3242 | +and | |
3243 | +.I more. | |
3244 | +.SH DESCRIPTION | |
3245 | +.I Bzmore | |
3246 | +is a filter which allows examination of compressed or plain text files | |
3247 | +one screenful at a time on a soft-copy terminal. | |
3248 | +.I bzmore | |
3249 | +works on files compressed with | |
3250 | +.I bzip2 | |
3251 | +and also on uncompressed files. | |
3252 | +If a file does not exist, | |
3253 | +.I bzmore | |
3254 | +looks for a file of the same name with the addition of a .bz2 suffix. | |
3255 | +.PP | |
3256 | +.I Bzmore | |
3257 | +normally pauses after each screenful, printing --More-- | |
3258 | +at the bottom of the screen. | |
3259 | +If the user then types a carriage return, one more line is displayed. | |
3260 | +If the user hits a space, | |
3261 | +another screenful is displayed. Other possibilities are enumerated later. | |
3262 | +.PP | |
3263 | +.I Bzmore | |
3264 | +looks in the file | |
3265 | +.I /etc/termcap | |
3266 | +to determine terminal characteristics, | |
3267 | +and to determine the default window size. | |
3268 | +On a terminal capable of displaying 24 lines, | |
3269 | +the default window size is 22 lines. | |
3270 | +Other sequences which may be typed when | |
3271 | +.I bzmore | |
3272 | +pauses, and their effects, are as follows (\fIi\fP is an optional integer | |
3273 | +argument, defaulting to 1) : | |
3274 | +.PP | |
3275 | +.IP \fIi\|\fP<space> | |
3276 | +display | |
3277 | +.I i | |
3278 | +more lines, (or another screenful if no argument is given) | |
3279 | +.PP | |
3280 | +.IP ^D | |
3281 | +display 11 more lines (a ``scroll''). | |
3282 | +If | |
3283 | +.I i | |
3284 | +is given, then the scroll size is set to \fIi\|\fP. | |
3285 | +.PP | |
3286 | +.IP d | |
3287 | +same as ^D (control-D) | |
3288 | +.PP | |
3289 | +.IP \fIi\|\fPz | |
3290 | +same as typing a space except that \fIi\|\fP, if present, becomes the new | |
3291 | +window size. Note that the window size reverts back to the default at the | |
3292 | +end of the current file. | |
3293 | +.PP | |
3294 | +.IP \fIi\|\fPs | |
3295 | +skip \fIi\|\fP lines and print a screenful of lines | |
3296 | +.PP | |
3297 | +.IP \fIi\|\fPf | |
3298 | +skip \fIi\fP screenfuls and print a screenful of lines | |
3299 | +.PP | |
3300 | +.IP "q or Q" | |
3301 | +quit reading the current file; go on to the next (if any) | |
3302 | +.PP | |
3303 | +.IP "e or q" | |
3304 | +When the prompt --More--(Next file: | |
3305 | +.IR file ) | |
3306 | +is printed, this command causes bzmore to exit. | |
3307 | +.PP | |
3308 | +.IP s | |
3309 | +When the prompt --More--(Next file: | |
3310 | +.IR file ) | |
3311 | +is printed, this command causes bzmore to skip the next file and continue. | |
3312 | +.PP | |
3313 | +.IP = | |
3314 | +Display the current line number. | |
3315 | +.PP | |
3316 | +.IP \fIi\|\fP/expr | |
3317 | +search for the \fIi\|\fP-th occurrence of the regular expression \fIexpr.\fP | |
3318 | +If the pattern is not found, | |
3319 | +.I bzmore | |
3320 | +goes on to the next file (if any). | |
3321 | +Otherwise, a screenful is displayed, starting two lines before the place | |
3322 | +where the expression was found. | |
3323 | +The user's erase and kill characters may be used to edit the regular | |
3324 | +expression. | |
3325 | +Erasing back past the first column cancels the search command. | |
3326 | +.PP | |
3327 | +.IP \fIi\|\fPn | |
3328 | +search for the \fIi\|\fP-th occurrence of the last regular expression entered. | |
3329 | +.PP | |
3330 | +.IP !command | |
3331 | +invoke a shell with \fIcommand\|\fP. | |
3332 | +The character `!' in "command" are replaced with the | |
3333 | +previous shell command. The sequence "\\!" is replaced by "!". | |
3334 | +.PP | |
3335 | +.IP ":q or :Q" | |
3336 | +quit reading the current file; go on to the next (if any) | |
3337 | +(same as q or Q). | |
3338 | +.PP | |
3339 | +.IP . | |
3340 | +(dot) repeat the previous command. | |
3341 | +.PP | |
3342 | +The commands take effect immediately, i.e., it is not necessary to | |
3343 | +type a carriage return. | |
3344 | +Up to the time when the command character itself is given, | |
3345 | +the user may hit the line kill character to cancel the numerical | |
3346 | +argument being formed. | |
3347 | +In addition, the user may hit the erase character to redisplay the | |
3348 | +--More-- message. | |
3349 | +.PP | |
3350 | +At any time when output is being sent to the terminal, the user can | |
3351 | +hit the quit key (normally control\-\\). | |
3352 | +.I Bzmore | |
3353 | +will stop sending output, and will display the usual --More-- | |
3354 | +prompt. | |
3355 | +The user may then enter one of the above commands in the normal manner. | |
3356 | +Unfortunately, some output is lost when this is done, due to the | |
3357 | +fact that any characters waiting in the terminal's output queue | |
3358 | +are flushed when the quit signal occurs. | |
3359 | +.PP | |
3360 | +The terminal is set to | |
3361 | +.I noecho | |
3362 | +mode by this program so that the output can be continuous. | |
3363 | +What you type will thus not show on your terminal, except for the / and ! | |
3364 | +commands. | |
3365 | +.PP | |
3366 | +If the standard output is not a teletype, then | |
3367 | +.I bzmore | |
3368 | +acts just like | |
3369 | +.I bzcat, | |
3370 | +except that a header is printed before each file. | |
3371 | +.SH FILES | |
3372 | +.DT | |
3373 | +/etc/termcap Terminal data base | |
3374 | +.SH "SEE ALSO" | |
3375 | +more(1), less(1), bzip2(1), bzdiff(1), bzgrep(1) | |
3376 | diff -Nru bzip2-1.0.2/doc/pl/Makefile.am bzip2-1.0.2.new/doc/pl/Makefile.am | |
3377 | --- bzip2-1.0.2/doc/pl/Makefile.am Thu Jan 1 01:00:00 1970 | |
3378 | +++ bzip2-1.0.2.new/doc/pl/Makefile.am Fri Feb 1 04:19:11 2002 | |
d967e3ec | 3379 | @@ -0,0 +1,4 @@ |
3380 | + | |
3381 | +mandir = @mandir@/pl | |
3382 | +man_MANS = bzip2.1 bunzip2.1 bzcat.1 bzip2recover.1 | |
3383 | + | |
15ee0650 | 3384 | diff -Nru bzip2-1.0.2/doc/pl/bunzip2.1 bzip2-1.0.2.new/doc/pl/bunzip2.1 |
3385 | --- bzip2-1.0.2/doc/pl/bunzip2.1 Thu Jan 1 01:00:00 1970 | |
3386 | +++ bzip2-1.0.2.new/doc/pl/bunzip2.1 Fri Feb 1 04:19:11 2002 | |
d967e3ec | 3387 | @@ -0,0 +1 @@ |
3388 | +.so bzip2.1 | |
3389 | \ No newline at end of file | |
15ee0650 | 3390 | diff -Nru bzip2-1.0.2/doc/pl/bzcat.1 bzip2-1.0.2.new/doc/pl/bzcat.1 |
3391 | --- bzip2-1.0.2/doc/pl/bzcat.1 Thu Jan 1 01:00:00 1970 | |
3392 | +++ bzip2-1.0.2.new/doc/pl/bzcat.1 Fri Feb 1 04:19:11 2002 | |
d967e3ec | 3393 | @@ -0,0 +1 @@ |
3394 | +.so bzip2.1 | |
3395 | \ No newline at end of file | |
15ee0650 | 3396 | diff -Nru bzip2-1.0.2/doc/pl/bzip2.1 bzip2-1.0.2.new/doc/pl/bzip2.1 |
3397 | --- bzip2-1.0.2/doc/pl/bzip2.1 Thu Jan 1 01:00:00 1970 | |
3398 | +++ bzip2-1.0.2.new/doc/pl/bzip2.1 Fri Feb 1 04:19:11 2002 | |
d967e3ec | 3399 | @@ -0,0 +1,384 @@ |
3400 |