/* decomp16: decompress 16bit compressed files on a 16bit Intel processor * * Version 1.3 of 25 Mar 92. * * This was written by John N. White on 6/30/91 and is Public Domain. * Patched to run under news by Will Rose, Feb 92. * J N White's (earlier) patches added by Will Rose, 20 Feb 92. * Unsigned int increment/wrap bug fixed by Will Rose, 24 Mar 92. * Argument bug fixed, stdio generalised by Will Rose, 25 Mar 92. * * decomp16 can use as as little as 512 bytes of stack; since it forks * four additional copies, it's probably worth using minimum stack rather * than the 8192 byte Minix default. To reduce memory still further, * change BUFSZ below to 256; it is currently set to 1024 for speed. The * minimal decomp16 needs about 280k to run in pipe mode (56k per copy). * * This program acts as a filter: * decomp16 < compressed_file > decompressed_file * The arguments -0 to -4 run only the corresponding pass. * Thus: * decomp16 -4 < compressed_file > 3; * decomp16 -3 < 3 > 2; * decomp16 -2 < 2 > 1; * decomp16 -1 < 1 > 0; * decomp16 -0 < 0 > decompressed_file * will also work, as will connecting the passes by explicit pipes if * there is enough memory to do so. File name arguments can also be * given directly on the command line. * * Compress uses a modified LZW compression algorithm. A compressed file * is a set of indices into a dictionary of strings. The number of bits * used to store each index depends on the number of entries currently * in the dictionary. If there are between 257 and 512 entries, 9 bits * are used. With 513 entries, 10 bits are used, etc. The initial dictionary * consists of 0-255 (which are the corresponding chars) and 256 (which * is a special CLEAR code). As each index in the compressed file is read, * a new entry is added to the dictionary consisting of the current string * with the first char of the next string appended. When the dictionary * is full, no further entries are added. If a CLEAR code is received, * the dictionary will be completely reset. The first two bytes of the * compressed file are a magic number, and the third byte indicates the * maximum number of bits, and whether the CLEAR code is used (older versions * of compress didn't have CLEAR). * * This program works by forking four more copies of itself. The five * programs form a pipeline. Copy 0 writes to stdout, and forks copy 1 * to supply its input, which in turn forks and reads from copy 2, etc. * This sequence is used so that when the program exits, all writes * are completed and a program that has exec'd uncompress (such as news) * can immediately use the uncompressed data when the wait() call returns. * * If given a switch -#, where # is a digit from 0 to 4 (example: -2), the * program will run as that copy, reading from stdin and writing to stdout. * This allows decompressing with very limited RAM because only one of the * five passes is in memory at a time. * * The compressed data is a series of string indices (and a header at * the beginning and an occasional CLEAR code). As these indices flow * through the pipes, each program decodes the ones it can. The result * of each decoding will be indices that the following programs can handle. * * Each of the 65536 strings in the dictionary is an earlier string with * some character added to the end (except for the the 256 predefined * single char strings). When new entries are made to the dictionary, * the string index part will just be the last index to pass through. * But the char part is the first char of the next string, which isn't * known yet. So the string can be stored as a pair of indices. When * this string is specified, it is converted to this pair of indices, * which are flagged so that the first will be decoded in full while * the second will be decoded to its first char. The dictionary takes * 256k to store (64k strings of 2 indices of 2 bytes each). This is * too big for a 64k data segment, so it is divided into 5 equal parts. * Copy 4 of the program maintains the high part and copy 0 holds the * low part. */ #include #include #include #include #define BUFSZ 1024 /* size of i/o buffers */ #define BUFSZ_2 (BUFSZ/2) /* # of unsigned shorts in i/o bufs */ #define DICTSZ (unsigned)13056 /* # of local dictionary entries */ #define EOF_INDEX (unsigned short)0xFFFF /* EOF flag for pipeline */ #define FALSE 0 #define TRUE ~FALSE int fdin, fdout, fderr; /* input, output, and error file descriptors */ int ibufstart, obufind, ibufend;/* i/o buffer indices */ int ipbufind = BUFSZ_2; /* pipe buffer indices */ int opbufind = 0; int pnum = -1; /* ID of this copy */ unsigned short ipbuf[BUFSZ_2]; /* for buffering input */ unsigned short opbuf[BUFSZ_2]; /* for buffering output */ unsigned char *ibuf = (unsigned char *) ipbuf; unsigned char *obuf = (unsigned char *) opbuf; unsigned short dindex[DICTSZ]; /* dictionary: index to substring */ unsigned short dchar[DICTSZ]; /* dictionary: last char of string */ unsigned iindex, tindex, tindex2; /* holds index being processed */ unsigned base; /* where in global dict local dict starts */ unsigned tbase; unsigned locend; /* where in global dict local dict ends */ unsigned curend = 256; /* current end of global dict */ unsigned maxend; /* max end of global dict */ int dcharp; /* ptr to dchar that needs next index entry */ int curbits; /* number of bits for getbits() to read */ int maxbits; /* limit on number of bits */ int clearflg; /* if set, allow CLEAR */ int inmod; /* mod 8 for getbits() */ _PROTOTYPE(int main, (int argc, char **argv)); _PROTOTYPE(void ffork, (void)); _PROTOTYPE(void die, (char *s)); _PROTOTYPE(void myputc, (unsigned c)); _PROTOTYPE(unsigned mygetc, (void)); _PROTOTYPE(void getbits, (void)); _PROTOTYPE(void getpipe, (void)); _PROTOTYPE(void putpipe, (unsigned u, int flag)); int main(argc, argv) int argc; char **argv; { char c, *cp; int j, k, fdtmp; unsigned int len; /* Find the program name */ j = 0; while (argv[0][j] != '\0') j++; len = (unsigned int) j; while (j--) if (argv[0][j] == '/') break; if (argv[0][j] == '/') j++; cp = argv[0] + j; len -= j; /* Sort out the flags */ for (k = 1; k < argc; k++) { if (argv[k][0] == '-') { c = argv[k][1]; switch (c) { case '0': /* pass numbers */ case '1': case '2': case '3': case '4': pnum = c - '0'; break; case 'd': /* used by news */ break; default: (void) write(1, "Usage: ", 7); (void) write(1, cp, len); (void) write(1, " [-#] [in] [out]\n", 17); exit(0); break; } /* Once it's checked, lose it anyway */ for (j = k; j < argc; j++) argv[j] = argv[j + 1]; argc--; k--; } } /* Default i/o settings */ fdin = 0; fdout = 1; fderr = 2; /* Try to open specific files and connect them to stdin/stdout */ if (argc > 1) { if ((fdtmp = open(argv[1], 0)) == -1) die("input open failed"); (void) close(0); if ((fdin = dup(fdtmp)) == -1) die("input dup failed\n"); (void) close(fdtmp); } if (argc > 2) { (void) unlink(argv[2]); if ((fdtmp = creat(argv[2], 0666)) == -1) die("output creat failed"); (void) close(1); if ((fdout = dup(fdtmp)) == -1) die("output dup failed\n"); (void) close(fdtmp); } /* Sort out type of compression */ if (pnum == -1 || pnum == 4) {/* if this is pass 4 */ /* Check header of compressed file */ if (mygetc() != 0x1F || mygetc() != 0x9D) /* check magic number */ die("not a compressed file\n"); iindex = mygetc(); /* get compression style */ } else getpipe(); /* get compression style */ maxbits = iindex & 0x1F; clearflg = ((iindex & 0x80) != 0) ? TRUE : FALSE; if (maxbits < 9 || maxbits > 16) /* check for valid maxbits */ die("can't decompress\n"); if (pnum != -1 && pnum != 0) putpipe(iindex, 0); /* pass style to next copy */ /* Fork off an ancestor if necessary - ffork() increments pnum */ if (pnum == -1) { pnum = 0; if (pnum == 0) ffork(); if (pnum == 1) ffork(); if (pnum == 2) ffork(); if (pnum == 3) ffork(); } /* Preliminary inits. Note: end/maxend/curend are highest, not * highest + 1 */ base = DICTSZ * pnum + 256; locend = base + DICTSZ - 1; maxend = (1 << maxbits) - 1; if (maxend > locend) maxend = locend; while (TRUE) { curend = 255 + (clearflg ? 1 : 0); /* init dictionary */ dcharp = DICTSZ; /* flag for none needed */ curbits = 9; /* init curbits (for copy 0) */ while (TRUE) { /* for each index in input */ if (pnum == 4) {/* get index using getbits() */ if (curbits < maxbits && (1 << curbits) <= curend) { /* Curbits needs to be increased */ /* Due to uglyness in compress, these * indices in the compressed file are * wasted */ while (inmod) getbits(); curbits++; } getbits(); } else getpipe(); /* get next index */ if (iindex == 256 && clearflg) { if (pnum > 0) putpipe(iindex, 0); /* Due to uglyness in compress, these indices * in the compressed file are wasted */ while (inmod) getbits(); break; } tindex = iindex; /* Convert the index part, ignoring spawned chars */ while (tindex >= base) tindex = dindex[tindex - base]; /* Pass on the index */ putpipe(tindex, 0); /* Save the char of the last added entry, if any */ if (dcharp < DICTSZ) dchar[dcharp++] = tindex; if (curend < maxend && ++curend > (base - 1)) dindex[dcharp = (curend - base)] = iindex; /* Do spawned chars. They are naturally produced in * the wrong order. To get them in the right order * without using memory, a series of passes, * progressively less deep, are used */ tbase = base; while ((tindex = iindex) >= tbase) {/* for each char to spawn*/ while ((tindex2 = dindex[tindex - base]) >= tbase) tindex = tindex2; /* scan to desired char */ putpipe(dchar[tindex-base], 1); /* put it to the pipe*/ tbase = tindex + 1; if (tbase == 0) break; /* it's a wrap */ } } } } /* F f o r k * * Fork off the previous pass - the parent reads from the child. */ void ffork() { int j, pfd[2]; if (pipe(pfd) == -1) die("pipe() error\n"); if ((j = fork()) == -1) die("fork() error\n"); if (j == 0) { /* this is the child */ if (close(1) == -1) die("close(1) error\n"); if (dup(pfd[1]) != 1) die("dup(1) error\n"); (void) close(pfd[0]); pnum++; } else { /* this is the parent */ if (close(0) == -1) die("close(0) error\n"); if (dup(pfd[0]) != 0) die("dup(0) error\n"); (void) close(pfd[1]); } } /* D i e * * If s is a message, write it to stderr. Flush buffers if needed. Then exit. */ void die(s) char *s; { /* Flush stdout buffer if needed */ if (obufind != 0) { if (write(fdout, (char *) obuf, (unsigned) obufind) != obufind) s = "bad stdout write\n"; obufind = 0; } /* Flush pipe if needed */ do putpipe(EOF_INDEX, 0); while (opbufind); /* Write any error message */ if (s != (char *) NULL) { while (*s) (void) write(fderr, s++, 1); } exit((s == (char *) NULL) ? 0 : 1); } /* M p u t c * * Put a char to stdout. */ void myputc(c) unsigned c; { obuf[obufind++] = c; if (obufind >= BUFSZ) { /* if stdout buffer full */ if (write(fdout, (char *) obuf, BUFSZ) != BUFSZ) /* flush to stdout */ die("bad stdout write\n"); obufind = 0; } } /* M y g e t c * * Get a char from stdin. If EOF, then die() and exit. */ unsigned mygetc() { if (ibufstart >= ibufend) { /* if stdin buffer empty */ if ((ibufend = read(fdin, (char *) ibuf, BUFSZ)) <= 0) die((char *) NULL); /* if EOF, do normal exit */ ibufstart = 0; } return(ibuf[ibufstart++] & 0xff); } /* G e t b i t s * * Put curbits bits into index from stdin. Note: only copy 4 uses this. * The bits within a byte are in the correct order. But when the bits * cross a byte boundry, the lowest bits will be in the higher part of * the current byte, and the higher bits will be in the lower part of * the next byte. */ void getbits() { int have; static unsigned curbyte; /* byte having bits extracted from it */ static int left; /* how many bits are left in curbyte */ inmod = (inmod + 1) & 7; /* count input mod 8 */ iindex = curbyte; have = left; if (curbits - have > 8) { iindex |= mygetc() << have; have += 8; } iindex |= ((curbyte = mygetc()) << have) & ~((unsigned) 0xFFFF << curbits); curbyte >>= curbits - have; left = 8 - (curbits - have); } /* G e t p i p e * * Get an index from the pipeline. If flagged firstonly, handle it here. */ void getpipe() { static short flags; static int n = 0; /* number of flags in flags */ while (TRUE) { /* while index with firstonly flag set */ if (n <= 0) { if (ipbufind >= BUFSZ_2) { /* if pipe input buffer * empty */ if (read(fdin, (char *) ipbuf, BUFSZ) != BUFSZ) die("bad pipe read\n"); ipbufind = 0; } flags = ipbuf[ipbufind++]; n = 15; } iindex = ipbuf[ipbufind++]; if (iindex > curend) die((iindex == EOF_INDEX) ? (char *) NULL : "invalid data\n"); flags <<= 1; n--; /* Assume flags < 0 if highest remaining flag is set */ if (flags < 0) { /* if firstonly flag for index is not set */ while (iindex >= base) iindex = dindex[iindex - base]; putpipe(iindex, 1); } else return; /* return with valid non-firstonly index */ } } /* P u t p i p e * * put an index into the pipeline. */ void putpipe(u, flag) unsigned u; int flag; { static unsigned short flags, *flagp; static int n = 0; /* number of flags in flags */ if (pnum == 0) { /* if we should write to stdout */ myputc(u); /* index will be the char value */ return; } if (n == 0) { /* if we need to reserve a flag entry */ flags = 0; flagp = opbuf + opbufind; opbufind++; } opbuf[opbufind++] = u; /* add index to buffer */ flags = (flags << 1) | flag; /* add firstonly flag */ if (++n >= 15) { /* if block of 15 indices */ n = 0; *flagp = flags; /* insert flags entry */ if (opbufind >= BUFSZ_2) { /* if pipe out buffer full */ opbufind = 0; if (write(fdout, (char *) opbuf, BUFSZ) != BUFSZ) die("bad pipe write\n"); } } }