source: trunk/minix/lib/zlib-1.2.3/examples/gun.c@ 9

Last change on this file since 9 was 9, checked in by Mattia Monga, 13 years ago

Minix 3.1.2a

File size: 24.9 KB
Line 
1/* gun.c -- simple gunzip to give an example of the use of inflateBack()
2 * Copyright (C) 2003, 2005 Mark Adler
3 * For conditions of distribution and use, see copyright notice in zlib.h
4 Version 1.3 12 June 2005 Mark Adler */
5
6/* Version history:
7 1.0 16 Feb 2003 First version for testing of inflateBack()
8 1.1 21 Feb 2005 Decompress concatenated gzip streams
9 Remove use of "this" variable (C++ keyword)
10 Fix return value for in()
11 Improve allocation failure checking
12 Add typecasting for void * structures
13 Add -h option for command version and usage
14 Add a bunch of comments
15 1.2 20 Mar 2005 Add Unix compress (LZW) decompression
16 Copy file attributes from input file to output file
17 1.3 12 Jun 2005 Add casts for error messages [Oberhumer]
18 */
19
20/*
21 gun [ -t ] [ name ... ]
22
23 decompresses the data in the named gzip files. If no arguments are given,
24 gun will decompress from stdin to stdout. The names must end in .gz, -gz,
25 .z, -z, _z, or .Z. The uncompressed data will be written to a file name
26 with the suffix stripped. On success, the original file is deleted. On
27 failure, the output file is deleted. For most failures, the command will
28 continue to process the remaining names on the command line. A memory
29 allocation failure will abort the command. If -t is specified, then the
30 listed files or stdin will be tested as gzip files for integrity (without
31 checking for a proper suffix), no output will be written, and no files
32 will be deleted.
33
34 Like gzip, gun allows concatenated gzip streams and will decompress them,
35 writing all of the uncompressed data to the output. Unlike gzip, gun allows
36 an empty file on input, and will produce no error writing an empty output
37 file.
38
39 gun will also decompress files made by Unix compress, which uses LZW
40 compression. These files are automatically detected by virtue of their
41 magic header bytes. Since the end of Unix compress stream is marked by the
42 end-of-file, they cannot be concantenated. If a Unix compress stream is
43 encountered in an input file, it is the last stream in that file.
44
45 Like gunzip and uncompress, the file attributes of the orignal compressed
46 file are maintained in the final uncompressed file, to the extent that the
47 user permissions allow it.
48
49 On my Mac OS X PowerPC G4, gun is almost twice as fast as gunzip (version
50 1.2.4) is on the same file, when gun is linked with zlib 1.2.2. Also the
51 LZW decompression provided by gun is about twice as fast as the standard
52 Unix uncompress command.
53 */
54
55/* external functions and related types and constants */
56#include <stdio.h> /* fprintf() */
57#include <stdlib.h> /* malloc(), free() */
58#include <string.h> /* strerror(), strcmp(), strlen(), memcpy() */
59#include <errno.h> /* errno */
60#include <fcntl.h> /* open() */
61#include <unistd.h> /* read(), write(), close(), chown(), unlink() */
62#include <sys/types.h>
63#include <sys/stat.h> /* stat(), chmod() */
64#include <utime.h> /* utime() */
65#include "zlib.h" /* inflateBackInit(), inflateBack(), */
66 /* inflateBackEnd(), crc32() */
67
68/* function declaration */
69#define local static
70
71/* buffer constants */
72#define SIZE 32768U /* input and output buffer sizes */
73#define PIECE 16384 /* limits i/o chunks for 16-bit int case */
74
75/* structure for infback() to pass to input function in() -- it maintains the
76 input file and a buffer of size SIZE */
77struct ind {
78 int infile;
79 unsigned char *inbuf;
80};
81
82/* Load input buffer, assumed to be empty, and return bytes loaded and a
83 pointer to them. read() is called until the buffer is full, or until it
84 returns end-of-file or error. Return 0 on error. */
85local unsigned in(void *in_desc, unsigned char **buf)
86{
87 int ret;
88 unsigned len;
89 unsigned char *next;
90 struct ind *me = (struct ind *)in_desc;
91
92 next = me->inbuf;
93 *buf = next;
94 len = 0;
95 do {
96 ret = PIECE;
97 if ((unsigned)ret > SIZE - len)
98 ret = (int)(SIZE - len);
99 ret = (int)read(me->infile, next, ret);
100 if (ret == -1) {
101 len = 0;
102 break;
103 }
104 next += ret;
105 len += ret;
106 } while (ret != 0 && len < SIZE);
107 return len;
108}
109
110/* structure for infback() to pass to output function out() -- it maintains the
111 output file, a running CRC-32 check on the output and the total number of
112 bytes output, both for checking against the gzip trailer. (The length in
113 the gzip trailer is stored modulo 2^32, so it's ok if a long is 32 bits and
114 the output is greater than 4 GB.) */
115struct outd {
116 int outfile;
117 int check; /* true if checking crc and total */
118 unsigned long crc;
119 unsigned long total;
120};
121
122/* Write output buffer and update the CRC-32 and total bytes written. write()
123 is called until all of the output is written or an error is encountered.
124 On success out() returns 0. For a write failure, out() returns 1. If the
125 output file descriptor is -1, then nothing is written.
126 */
127local int out(void *out_desc, unsigned char *buf, unsigned len)
128{
129 int ret;
130 struct outd *me = (struct outd *)out_desc;
131
132 if (me->check) {
133 me->crc = crc32(me->crc, buf, len);
134 me->total += len;
135 }
136 if (me->outfile != -1)
137 do {
138 ret = PIECE;
139 if ((unsigned)ret > len)
140 ret = (int)len;
141 ret = (int)write(me->outfile, buf, ret);
142 if (ret == -1)
143 return 1;
144 buf += ret;
145 len -= ret;
146 } while (len != 0);
147 return 0;
148}
149
150/* next input byte macro for use inside lunpipe() and gunpipe() */
151#define NEXT() (have ? 0 : (have = in(indp, &next)), \
152 last = have ? (have--, (int)(*next++)) : -1)
153
154/* memory for gunpipe() and lunpipe() --
155 the first 256 entries of prefix[] and suffix[] are never used, could
156 have offset the index, but it's faster to waste the memory */
157unsigned char inbuf[SIZE]; /* input buffer */
158unsigned char outbuf[SIZE]; /* output buffer */
159unsigned short prefix[65536]; /* index to LZW prefix string */
160unsigned char suffix[65536]; /* one-character LZW suffix */
161unsigned char match[65280 + 2]; /* buffer for reversed match or gzip
162 32K sliding window */
163
164/* throw out what's left in the current bits byte buffer (this is a vestigial
165 aspect of the compressed data format derived from an implementation that
166 made use of a special VAX machine instruction!) */
167#define FLUSHCODE() \
168 do { \
169 left = 0; \
170 rem = 0; \
171 if (chunk > have) { \
172 chunk -= have; \
173 have = 0; \
174 if (NEXT() == -1) \
175 break; \
176 chunk--; \
177 if (chunk > have) { \
178 chunk = have = 0; \
179 break; \
180 } \
181 } \
182 have -= chunk; \
183 next += chunk; \
184 chunk = 0; \
185 } while (0)
186
187/* Decompress a compress (LZW) file from indp to outfile. The compress magic
188 header (two bytes) has already been read and verified. There are have bytes
189 of buffered input at next. strm is used for passing error information back
190 to gunpipe().
191
192 lunpipe() will return Z_OK on success, Z_BUF_ERROR for an unexpected end of
193 file, read error, or write error (a write error indicated by strm->next_in
194 not equal to Z_NULL), or Z_DATA_ERROR for invalid input.
195 */
196local int lunpipe(unsigned have, unsigned char *next, struct ind *indp,
197 int outfile, z_stream *strm)
198{
199 int last; /* last byte read by NEXT(), or -1 if EOF */
200 int chunk; /* bytes left in current chunk */
201 int left; /* bits left in rem */
202 unsigned rem; /* unused bits from input */
203 int bits; /* current bits per code */
204 unsigned code; /* code, table traversal index */
205 unsigned mask; /* mask for current bits codes */
206 int max; /* maximum bits per code for this stream */
207 int flags; /* compress flags, then block compress flag */
208 unsigned end; /* last valid entry in prefix/suffix tables */
209 unsigned temp; /* current code */
210 unsigned prev; /* previous code */
211 unsigned final; /* last character written for previous code */
212 unsigned stack; /* next position for reversed string */
213 unsigned outcnt; /* bytes in output buffer */
214 struct outd outd; /* output structure */
215
216 /* set up output */
217 outd.outfile = outfile;
218 outd.check = 0;
219
220 /* process remainder of compress header -- a flags byte */
221 flags = NEXT();
222 if (last == -1)
223 return Z_BUF_ERROR;
224 if (flags & 0x60) {
225 strm->msg = (char *)"unknown lzw flags set";
226 return Z_DATA_ERROR;
227 }
228 max = flags & 0x1f;
229 if (max < 9 || max > 16) {
230 strm->msg = (char *)"lzw bits out of range";
231 return Z_DATA_ERROR;
232 }
233 if (max == 9) /* 9 doesn't really mean 9 */
234 max = 10;
235 flags &= 0x80; /* true if block compress */
236
237 /* clear table */
238 bits = 9;
239 mask = 0x1ff;
240 end = flags ? 256 : 255;
241
242 /* set up: get first 9-bit code, which is the first decompressed byte, but
243 don't create a table entry until the next code */
244 if (NEXT() == -1) /* no compressed data is ok */
245 return Z_OK;
246 final = prev = (unsigned)last; /* low 8 bits of code */
247 if (NEXT() == -1) /* missing a bit */
248 return Z_BUF_ERROR;
249 if (last & 1) { /* code must be < 256 */
250 strm->msg = (char *)"invalid lzw code";
251 return Z_DATA_ERROR;
252 }
253 rem = (unsigned)last >> 1; /* remaining 7 bits */
254 left = 7;
255 chunk = bits - 2; /* 7 bytes left in this chunk */
256 outbuf[0] = (unsigned char)final; /* write first decompressed byte */
257 outcnt = 1;
258
259 /* decode codes */
260 stack = 0;
261 for (;;) {
262 /* if the table will be full after this, increment the code size */
263 if (end >= mask && bits < max) {
264 FLUSHCODE();
265 bits++;
266 mask <<= 1;
267 mask++;
268 }
269
270 /* get a code of length bits */
271 if (chunk == 0) /* decrement chunk modulo bits */
272 chunk = bits;
273 code = rem; /* low bits of code */
274 if (NEXT() == -1) { /* EOF is end of compressed data */
275 /* write remaining buffered output */
276 if (outcnt && out(&outd, outbuf, outcnt)) {
277 strm->next_in = outbuf; /* signal write error */
278 return Z_BUF_ERROR;
279 }
280 return Z_OK;
281 }
282 code += (unsigned)last << left; /* middle (or high) bits of code */
283 left += 8;
284 chunk--;
285 if (bits > left) { /* need more bits */
286 if (NEXT() == -1) /* can't end in middle of code */
287 return Z_BUF_ERROR;
288 code += (unsigned)last << left; /* high bits of code */
289 left += 8;
290 chunk--;
291 }
292 code &= mask; /* mask to current code length */
293 left -= bits; /* number of unused bits */
294 rem = (unsigned)last >> (8 - left); /* unused bits from last byte */
295
296 /* process clear code (256) */
297 if (code == 256 && flags) {
298 FLUSHCODE();
299 bits = 9; /* initialize bits and mask */
300 mask = 0x1ff;
301 end = 255; /* empty table */
302 continue; /* get next code */
303 }
304
305 /* special code to reuse last match */
306 temp = code; /* save the current code */
307 if (code > end) {
308 /* Be picky on the allowed code here, and make sure that the code
309 we drop through (prev) will be a valid index so that random
310 input does not cause an exception. The code != end + 1 check is
311 empirically derived, and not checked in the original uncompress
312 code. If this ever causes a problem, that check could be safely
313 removed. Leaving this check in greatly improves gun's ability
314 to detect random or corrupted input after a compress header.
315 In any case, the prev > end check must be retained. */
316 if (code != end + 1 || prev > end) {
317 strm->msg = (char *)"invalid lzw code";
318 return Z_DATA_ERROR;
319 }
320 match[stack++] = (unsigned char)final;
321 code = prev;
322 }
323
324 /* walk through linked list to generate output in reverse order */
325 while (code >= 256) {
326 match[stack++] = suffix[code];
327 code = prefix[code];
328 }
329 match[stack++] = (unsigned char)code;
330 final = code;
331
332 /* link new table entry */
333 if (end < mask) {
334 end++;
335 prefix[end] = (unsigned short)prev;
336 suffix[end] = (unsigned char)final;
337 }
338
339 /* set previous code for next iteration */
340 prev = temp;
341
342 /* write output in forward order */
343 while (stack > SIZE - outcnt) {
344 while (outcnt < SIZE)
345 outbuf[outcnt++] = match[--stack];
346 if (out(&outd, outbuf, outcnt)) {
347 strm->next_in = outbuf; /* signal write error */
348 return Z_BUF_ERROR;
349 }
350 outcnt = 0;
351 }
352 do {
353 outbuf[outcnt++] = match[--stack];
354 } while (stack);
355
356 /* loop for next code with final and prev as the last match, rem and
357 left provide the first 0..7 bits of the next code, end is the last
358 valid table entry */
359 }
360}
361
362/* Decompress a gzip file from infile to outfile. strm is assumed to have been
363 successfully initialized with inflateBackInit(). The input file may consist
364 of a series of gzip streams, in which case all of them will be decompressed
365 to the output file. If outfile is -1, then the gzip stream(s) integrity is
366 checked and nothing is written.
367
368 The return value is a zlib error code: Z_MEM_ERROR if out of memory,
369 Z_DATA_ERROR if the header or the compressed data is invalid, or if the
370 trailer CRC-32 check or length doesn't match, Z_BUF_ERROR if the input ends
371 prematurely or a write error occurs, or Z_ERRNO if junk (not a another gzip
372 stream) follows a valid gzip stream.
373 */
374local int gunpipe(z_stream *strm, int infile, int outfile)
375{
376 int ret, first, last;
377 unsigned have, flags, len;
378 unsigned char *next;
379 struct ind ind, *indp;
380 struct outd outd;
381
382 /* setup input buffer */
383 ind.infile = infile;
384 ind.inbuf = inbuf;
385 indp = &ind;
386
387 /* decompress concatenated gzip streams */
388 have = 0; /* no input data read in yet */
389 first = 1; /* looking for first gzip header */
390 strm->next_in = Z_NULL; /* so Z_BUF_ERROR means EOF */
391 for (;;) {
392 /* look for the two magic header bytes for a gzip stream */
393 if (NEXT() == -1) {
394 ret = Z_OK;
395 break; /* empty gzip stream is ok */
396 }
397 if (last != 31 || (NEXT() != 139 && last != 157)) {
398 strm->msg = (char *)"incorrect header check";
399 ret = first ? Z_DATA_ERROR : Z_ERRNO;
400 break; /* not a gzip or compress header */
401 }
402 first = 0; /* next non-header is junk */
403
404 /* process a compress (LZW) file -- can't be concatenated after this */
405 if (last == 157) {
406 ret = lunpipe(have, next, indp, outfile, strm);
407 break;
408 }
409
410 /* process remainder of gzip header */
411 ret = Z_BUF_ERROR;
412 if (NEXT() != 8) { /* only deflate method allowed */
413 if (last == -1) break;
414 strm->msg = (char *)"unknown compression method";
415 ret = Z_DATA_ERROR;
416 break;
417 }
418 flags = NEXT(); /* header flags */
419 NEXT(); /* discard mod time, xflgs, os */
420 NEXT();
421 NEXT();
422 NEXT();
423 NEXT();
424 NEXT();
425 if (last == -1) break;
426 if (flags & 0xe0) {
427 strm->msg = (char *)"unknown header flags set";
428 ret = Z_DATA_ERROR;
429 break;
430 }
431 if (flags & 4) { /* extra field */
432 len = NEXT();
433 len += (unsigned)(NEXT()) << 8;
434 if (last == -1) break;
435 while (len > have) {
436 len -= have;
437 have = 0;
438 if (NEXT() == -1) break;
439 len--;
440 }
441 if (last == -1) break;
442 have -= len;
443 next += len;
444 }
445 if (flags & 8) /* file name */
446 while (NEXT() != 0 && last != -1)
447 ;
448 if (flags & 16) /* comment */
449 while (NEXT() != 0 && last != -1)
450 ;
451 if (flags & 2) { /* header crc */
452 NEXT();
453 NEXT();
454 }
455 if (last == -1) break;
456
457 /* set up output */
458 outd.outfile = outfile;
459 outd.check = 1;
460 outd.crc = crc32(0L, Z_NULL, 0);
461 outd.total = 0;
462
463 /* decompress data to output */
464 strm->next_in = next;
465 strm->avail_in = have;
466 ret = inflateBack(strm, in, indp, out, &outd);
467 if (ret != Z_STREAM_END) break;
468 next = strm->next_in;
469 have = strm->avail_in;
470 strm->next_in = Z_NULL; /* so Z_BUF_ERROR means EOF */
471
472 /* check trailer */
473 ret = Z_BUF_ERROR;
474 if (NEXT() != (outd.crc & 0xff) ||
475 NEXT() != ((outd.crc >> 8) & 0xff) ||
476 NEXT() != ((outd.crc >> 16) & 0xff) ||
477 NEXT() != ((outd.crc >> 24) & 0xff)) {
478 /* crc error */
479 if (last != -1) {
480 strm->msg = (char *)"incorrect data check";
481 ret = Z_DATA_ERROR;
482 }
483 break;
484 }
485 if (NEXT() != (outd.total & 0xff) ||
486 NEXT() != ((outd.total >> 8) & 0xff) ||
487 NEXT() != ((outd.total >> 16) & 0xff) ||
488 NEXT() != ((outd.total >> 24) & 0xff)) {
489 /* length error */
490 if (last != -1) {
491 strm->msg = (char *)"incorrect length check";
492 ret = Z_DATA_ERROR;
493 }
494 break;
495 }
496
497 /* go back and look for another gzip stream */
498 }
499
500 /* clean up and return */
501 return ret;
502}
503
504/* Copy file attributes, from -> to, as best we can. This is best effort, so
505 no errors are reported. The mode bits, including suid, sgid, and the sticky
506 bit are copied (if allowed), the owner's user id and group id are copied
507 (again if allowed), and the access and modify times are copied. */
508local void copymeta(char *from, char *to)
509{
510 struct stat was;
511 struct utimbuf when;
512
513 /* get all of from's Unix meta data, return if not a regular file */
514 if (stat(from, &was) != 0 || (was.st_mode & S_IFMT) != S_IFREG)
515 return;
516
517 /* set to's mode bits, ignore errors */
518 (void)chmod(to, was.st_mode & 07777);
519
520 /* copy owner's user and group, ignore errors */
521 (void)chown(to, was.st_uid, was.st_gid);
522
523 /* copy access and modify times, ignore errors */
524 when.actime = was.st_atime;
525 when.modtime = was.st_mtime;
526 (void)utime(to, &when);
527}
528
529/* Decompress the file inname to the file outnname, of if test is true, just
530 decompress without writing and check the gzip trailer for integrity. If
531 inname is NULL or an empty string, read from stdin. If outname is NULL or
532 an empty string, write to stdout. strm is a pre-initialized inflateBack
533 structure. When appropriate, copy the file attributes from inname to
534 outname.
535
536 gunzip() returns 1 if there is an out-of-memory error or an unexpected
537 return code from gunpipe(). Otherwise it returns 0.
538 */
539local int gunzip(z_stream *strm, char *inname, char *outname, int test)
540{
541 int ret;
542 int infile, outfile;
543
544 /* open files */
545 if (inname == NULL || *inname == 0) {
546 inname = "-";
547 infile = 0; /* stdin */
548 }
549 else {
550 infile = open(inname, O_RDONLY, 0);
551 if (infile == -1) {
552 fprintf(stderr, "gun cannot open %s\n", inname);
553 return 0;
554 }
555 }
556 if (test)
557 outfile = -1;
558 else if (outname == NULL || *outname == 0) {
559 outname = "-";
560 outfile = 1; /* stdout */
561 }
562 else {
563 outfile = open(outname, O_CREAT | O_TRUNC | O_WRONLY, 0666);
564 if (outfile == -1) {
565 close(infile);
566 fprintf(stderr, "gun cannot create %s\n", outname);
567 return 0;
568 }
569 }
570 errno = 0;
571
572 /* decompress */
573 ret = gunpipe(strm, infile, outfile);
574 if (outfile > 2) close(outfile);
575 if (infile > 2) close(infile);
576
577 /* interpret result */
578 switch (ret) {
579 case Z_OK:
580 case Z_ERRNO:
581 if (infile > 2 && outfile > 2) {
582 copymeta(inname, outname); /* copy attributes */
583 unlink(inname);
584 }
585 if (ret == Z_ERRNO)
586 fprintf(stderr, "gun warning: trailing garbage ignored in %s\n",
587 inname);
588 break;
589 case Z_DATA_ERROR:
590 if (outfile > 2) unlink(outname);
591 fprintf(stderr, "gun data error on %s: %s\n", inname, strm->msg);
592 break;
593 case Z_MEM_ERROR:
594 if (outfile > 2) unlink(outname);
595 fprintf(stderr, "gun out of memory error--aborting\n");
596 return 1;
597 case Z_BUF_ERROR:
598 if (outfile > 2) unlink(outname);
599 if (strm->next_in != Z_NULL) {
600 fprintf(stderr, "gun write error on %s: %s\n",
601 outname, strerror(errno));
602 }
603 else if (errno) {
604 fprintf(stderr, "gun read error on %s: %s\n",
605 inname, strerror(errno));
606 }
607 else {
608 fprintf(stderr, "gun unexpected end of file on %s\n",
609 inname);
610 }
611 break;
612 default:
613 if (outfile > 2) unlink(outname);
614 fprintf(stderr, "gun internal error--aborting\n");
615 return 1;
616 }
617 return 0;
618}
619
620/* Process the gun command line arguments. See the command syntax near the
621 beginning of this source file. */
622int main(int argc, char **argv)
623{
624 int ret, len, test;
625 char *outname;
626 unsigned char *window;
627 z_stream strm;
628
629 /* initialize inflateBack state for repeated use */
630 window = match; /* reuse LZW match buffer */
631 strm.zalloc = Z_NULL;
632 strm.zfree = Z_NULL;
633 strm.opaque = Z_NULL;
634 ret = inflateBackInit(&strm, 15, window);
635 if (ret != Z_OK) {
636 fprintf(stderr, "gun out of memory error--aborting\n");
637 return 1;
638 }
639
640 /* decompress each file to the same name with the suffix removed */
641 argc--;
642 argv++;
643 test = 0;
644 if (argc && strcmp(*argv, "-h") == 0) {
645 fprintf(stderr, "gun 1.3 (12 Jun 2005)\n");
646 fprintf(stderr, "Copyright (c) 2005 Mark Adler\n");
647 fprintf(stderr, "usage: gun [-t] [file1.gz [file2.Z ...]]\n");
648 return 0;
649 }
650 if (argc && strcmp(*argv, "-t") == 0) {
651 test = 1;
652 argc--;
653 argv++;
654 }
655 if (argc)
656 do {
657 if (test)
658 outname = NULL;
659 else {
660 len = (int)strlen(*argv);
661 if (strcmp(*argv + len - 3, ".gz") == 0 ||
662 strcmp(*argv + len - 3, "-gz") == 0)
663 len -= 3;
664 else if (strcmp(*argv + len - 2, ".z") == 0 ||
665 strcmp(*argv + len - 2, "-z") == 0 ||
666 strcmp(*argv + len - 2, "_z") == 0 ||
667 strcmp(*argv + len - 2, ".Z") == 0)
668 len -= 2;
669 else {
670 fprintf(stderr, "gun error: no gz type on %s--skipping\n",
671 *argv);
672 continue;
673 }
674 outname = malloc(len + 1);
675 if (outname == NULL) {
676 fprintf(stderr, "gun out of memory error--aborting\n");
677 ret = 1;
678 break;
679 }
680 memcpy(outname, *argv, len);
681 outname[len] = 0;
682 }
683 ret = gunzip(&strm, *argv, outname, test);
684 if (outname != NULL) free(outname);
685 if (ret) break;
686 } while (argv++, --argc);
687 else
688 ret = gunzip(&strm, NULL, NULL, test);
689
690 /* clean up */
691 inflateBackEnd(&strm);
692 return ret;
693}
Note: See TracBrowser for help on using the repository browser.