1 | /* decomp16: decompress 16bit compressed files on a 16bit Intel processor
|
---|
2 | *
|
---|
3 | * Version 1.3 of 25 Mar 92.
|
---|
4 | *
|
---|
5 | * This was written by John N. White on 6/30/91 and is Public Domain.
|
---|
6 | * Patched to run under news by Will Rose, Feb 92.
|
---|
7 | * J N White's (earlier) patches added by Will Rose, 20 Feb 92.
|
---|
8 | * Unsigned int increment/wrap bug fixed by Will Rose, 24 Mar 92.
|
---|
9 | * Argument bug fixed, stdio generalised by Will Rose, 25 Mar 92.
|
---|
10 | *
|
---|
11 | * decomp16 can use as as little as 512 bytes of stack; since it forks
|
---|
12 | * four additional copies, it's probably worth using minimum stack rather
|
---|
13 | * than the 8192 byte Minix default. To reduce memory still further,
|
---|
14 | * change BUFSZ below to 256; it is currently set to 1024 for speed. The
|
---|
15 | * minimal decomp16 needs about 280k to run in pipe mode (56k per copy).
|
---|
16 | *
|
---|
17 | * This program acts as a filter:
|
---|
18 | * decomp16 < compressed_file > decompressed_file
|
---|
19 | * The arguments -0 to -4 run only the corresponding pass.
|
---|
20 | * Thus:
|
---|
21 | * decomp16 -4 < compressed_file > 3;
|
---|
22 | * decomp16 -3 < 3 > 2;
|
---|
23 | * decomp16 -2 < 2 > 1;
|
---|
24 | * decomp16 -1 < 1 > 0;
|
---|
25 | * decomp16 -0 < 0 > decompressed_file
|
---|
26 | * will also work, as will connecting the passes by explicit pipes if
|
---|
27 | * there is enough memory to do so. File name arguments can also be
|
---|
28 | * given directly on the command line.
|
---|
29 | *
|
---|
30 | * Compress uses a modified LZW compression algorithm. A compressed file
|
---|
31 | * is a set of indices into a dictionary of strings. The number of bits
|
---|
32 | * used to store each index depends on the number of entries currently
|
---|
33 | * in the dictionary. If there are between 257 and 512 entries, 9 bits
|
---|
34 | * are used. With 513 entries, 10 bits are used, etc. The initial dictionary
|
---|
35 | * consists of 0-255 (which are the corresponding chars) and 256 (which
|
---|
36 | * is a special CLEAR code). As each index in the compressed file is read,
|
---|
37 | * a new entry is added to the dictionary consisting of the current string
|
---|
38 | * with the first char of the next string appended. When the dictionary
|
---|
39 | * is full, no further entries are added. If a CLEAR code is received,
|
---|
40 | * the dictionary will be completely reset. The first two bytes of the
|
---|
41 | * compressed file are a magic number, and the third byte indicates the
|
---|
42 | * maximum number of bits, and whether the CLEAR code is used (older versions
|
---|
43 | * of compress didn't have CLEAR).
|
---|
44 | *
|
---|
45 | * This program works by forking four more copies of itself. The five
|
---|
46 | * programs form a pipeline. Copy 0 writes to stdout, and forks copy 1
|
---|
47 | * to supply its input, which in turn forks and reads from copy 2, etc.
|
---|
48 | * This sequence is used so that when the program exits, all writes
|
---|
49 | * are completed and a program that has exec'd uncompress (such as news)
|
---|
50 | * can immediately use the uncompressed data when the wait() call returns.
|
---|
51 | *
|
---|
52 | * If given a switch -#, where # is a digit from 0 to 4 (example: -2), the
|
---|
53 | * program will run as that copy, reading from stdin and writing to stdout.
|
---|
54 | * This allows decompressing with very limited RAM because only one of the
|
---|
55 | * five passes is in memory at a time.
|
---|
56 | *
|
---|
57 | * The compressed data is a series of string indices (and a header at
|
---|
58 | * the beginning and an occasional CLEAR code). As these indices flow
|
---|
59 | * through the pipes, each program decodes the ones it can. The result
|
---|
60 | * of each decoding will be indices that the following programs can handle.
|
---|
61 | *
|
---|
62 | * Each of the 65536 strings in the dictionary is an earlier string with
|
---|
63 | * some character added to the end (except for the the 256 predefined
|
---|
64 | * single char strings). When new entries are made to the dictionary,
|
---|
65 | * the string index part will just be the last index to pass through.
|
---|
66 | * But the char part is the first char of the next string, which isn't
|
---|
67 | * known yet. So the string can be stored as a pair of indices. When
|
---|
68 | * this string is specified, it is converted to this pair of indices,
|
---|
69 | * which are flagged so that the first will be decoded in full while
|
---|
70 | * the second will be decoded to its first char. The dictionary takes
|
---|
71 | * 256k to store (64k strings of 2 indices of 2 bytes each). This is
|
---|
72 | * too big for a 64k data segment, so it is divided into 5 equal parts.
|
---|
73 | * Copy 4 of the program maintains the high part and copy 0 holds the
|
---|
74 | * low part.
|
---|
75 | */
|
---|
76 |
|
---|
77 | #include <sys/types.h>
|
---|
78 | #include <fcntl.h>
|
---|
79 | #include <stdlib.h>
|
---|
80 | #include <unistd.h>
|
---|
81 |
|
---|
82 | #define BUFSZ 1024 /* size of i/o buffers */
|
---|
83 | #define BUFSZ_2 (BUFSZ/2) /* # of unsigned shorts in i/o bufs */
|
---|
84 | #define DICTSZ (unsigned)13056 /* # of local dictionary entries */
|
---|
85 | #define EOF_INDEX (unsigned short)0xFFFF /* EOF flag for pipeline */
|
---|
86 | #define FALSE 0
|
---|
87 | #define TRUE ~FALSE
|
---|
88 |
|
---|
89 | int fdin, fdout, fderr; /* input, output, and error file descriptors */
|
---|
90 | int ibufstart, obufind, ibufend;/* i/o buffer indices */
|
---|
91 | int ipbufind = BUFSZ_2; /* pipe buffer indices */
|
---|
92 | int opbufind = 0;
|
---|
93 | int pnum = -1; /* ID of this copy */
|
---|
94 | unsigned short ipbuf[BUFSZ_2]; /* for buffering input */
|
---|
95 | unsigned short opbuf[BUFSZ_2]; /* for buffering output */
|
---|
96 | unsigned char *ibuf = (unsigned char *) ipbuf;
|
---|
97 | unsigned char *obuf = (unsigned char *) opbuf;
|
---|
98 |
|
---|
99 | unsigned short dindex[DICTSZ]; /* dictionary: index to substring */
|
---|
100 | unsigned short dchar[DICTSZ]; /* dictionary: last char of string */
|
---|
101 | unsigned iindex, tindex, tindex2; /* holds index being processed */
|
---|
102 | unsigned base; /* where in global dict local dict starts */
|
---|
103 | unsigned tbase;
|
---|
104 | unsigned locend; /* where in global dict local dict ends */
|
---|
105 | unsigned curend = 256; /* current end of global dict */
|
---|
106 | unsigned maxend; /* max end of global dict */
|
---|
107 | int dcharp; /* ptr to dchar that needs next index entry */
|
---|
108 | int curbits; /* number of bits for getbits() to read */
|
---|
109 | int maxbits; /* limit on number of bits */
|
---|
110 | int clearflg; /* if set, allow CLEAR */
|
---|
111 | int inmod; /* mod 8 for getbits() */
|
---|
112 |
|
---|
113 | _PROTOTYPE(int main, (int argc, char **argv));
|
---|
114 | _PROTOTYPE(void ffork, (void));
|
---|
115 | _PROTOTYPE(void die, (char *s));
|
---|
116 | _PROTOTYPE(void myputc, (unsigned c));
|
---|
117 | _PROTOTYPE(unsigned mygetc, (void));
|
---|
118 | _PROTOTYPE(void getbits, (void));
|
---|
119 | _PROTOTYPE(void getpipe, (void));
|
---|
120 | _PROTOTYPE(void putpipe, (unsigned u, int flag));
|
---|
121 |
|
---|
122 | int main(argc, argv)
|
---|
123 | int argc;
|
---|
124 | char **argv;
|
---|
125 | {
|
---|
126 | char c, *cp;
|
---|
127 | int j, k, fdtmp;
|
---|
128 | unsigned int len;
|
---|
129 |
|
---|
130 | /* Find the program name */
|
---|
131 | j = 0;
|
---|
132 | while (argv[0][j] != '\0') j++;
|
---|
133 | len = (unsigned int) j;
|
---|
134 | while (j--)
|
---|
135 | if (argv[0][j] == '/') break;
|
---|
136 | if (argv[0][j] == '/') j++;
|
---|
137 | cp = argv[0] + j;
|
---|
138 | len -= j;
|
---|
139 |
|
---|
140 | /* Sort out the flags */
|
---|
141 | for (k = 1; k < argc; k++) {
|
---|
142 | if (argv[k][0] == '-') {
|
---|
143 | c = argv[k][1];
|
---|
144 | switch (c) {
|
---|
145 | case '0': /* pass numbers */
|
---|
146 | case '1':
|
---|
147 | case '2':
|
---|
148 | case '3':
|
---|
149 | case '4': pnum = c - '0'; break;
|
---|
150 | case 'd': /* used by news */
|
---|
151 | break;
|
---|
152 | default:
|
---|
153 | (void) write(1, "Usage: ", 7);
|
---|
154 | (void) write(1, cp, len);
|
---|
155 | (void) write(1, " [-#] [in] [out]\n", 17);
|
---|
156 | exit(0);
|
---|
157 | break;
|
---|
158 | }
|
---|
159 |
|
---|
160 | /* Once it's checked, lose it anyway */
|
---|
161 | for (j = k; j < argc; j++) argv[j] = argv[j + 1];
|
---|
162 | argc--;
|
---|
163 | k--;
|
---|
164 | }
|
---|
165 | }
|
---|
166 |
|
---|
167 | /* Default i/o settings */
|
---|
168 | fdin = 0;
|
---|
169 | fdout = 1;
|
---|
170 | fderr = 2;
|
---|
171 |
|
---|
172 | /* Try to open specific files and connect them to stdin/stdout */
|
---|
173 | if (argc > 1) {
|
---|
174 | if ((fdtmp = open(argv[1], 0)) == -1) die("input open failed");
|
---|
175 | (void) close(0);
|
---|
176 | if ((fdin = dup(fdtmp)) == -1) die("input dup failed\n");
|
---|
177 | (void) close(fdtmp);
|
---|
178 | }
|
---|
179 | if (argc > 2) {
|
---|
180 | (void) unlink(argv[2]);
|
---|
181 | if ((fdtmp = creat(argv[2], 0666)) == -1) die("output creat failed");
|
---|
182 | (void) close(1);
|
---|
183 | if ((fdout = dup(fdtmp)) == -1) die("output dup failed\n");
|
---|
184 | (void) close(fdtmp);
|
---|
185 | }
|
---|
186 |
|
---|
187 | /* Sort out type of compression */
|
---|
188 | if (pnum == -1 || pnum == 4) {/* if this is pass 4 */
|
---|
189 | /* Check header of compressed file */
|
---|
190 | if (mygetc() != 0x1F || mygetc() != 0x9D) /* check magic number */
|
---|
191 | die("not a compressed file\n");
|
---|
192 | iindex = mygetc(); /* get compression style */
|
---|
193 | } else
|
---|
194 | getpipe(); /* get compression style */
|
---|
195 |
|
---|
196 | maxbits = iindex & 0x1F;
|
---|
197 | clearflg = ((iindex & 0x80) != 0) ? TRUE : FALSE;
|
---|
198 | if (maxbits < 9 || maxbits > 16) /* check for valid maxbits */
|
---|
199 | die("can't decompress\n");
|
---|
200 | if (pnum != -1 && pnum != 0)
|
---|
201 | putpipe(iindex, 0); /* pass style to next copy */
|
---|
202 |
|
---|
203 | /* Fork off an ancestor if necessary - ffork() increments pnum */
|
---|
204 | if (pnum == -1) {
|
---|
205 | pnum = 0;
|
---|
206 | if (pnum == 0) ffork();
|
---|
207 | if (pnum == 1) ffork();
|
---|
208 | if (pnum == 2) ffork();
|
---|
209 | if (pnum == 3) ffork();
|
---|
210 | }
|
---|
211 |
|
---|
212 | /* Preliminary inits. Note: end/maxend/curend are highest, not
|
---|
213 | * highest + 1 */
|
---|
214 | base = DICTSZ * pnum + 256;
|
---|
215 | locend = base + DICTSZ - 1;
|
---|
216 | maxend = (1 << maxbits) - 1;
|
---|
217 | if (maxend > locend) maxend = locend;
|
---|
218 |
|
---|
219 | while (TRUE) {
|
---|
220 | curend = 255 + (clearflg ? 1 : 0); /* init dictionary */
|
---|
221 | dcharp = DICTSZ; /* flag for none needed */
|
---|
222 | curbits = 9; /* init curbits (for copy 0) */
|
---|
223 | while (TRUE) { /* for each index in input */
|
---|
224 | if (pnum == 4) {/* get index using getbits() */
|
---|
225 | if (curbits < maxbits && (1 << curbits) <= curend) {
|
---|
226 | /* Curbits needs to be increased */
|
---|
227 | /* Due to uglyness in compress, these
|
---|
228 | * indices in the compressed file are
|
---|
229 | * wasted */
|
---|
230 | while (inmod) getbits();
|
---|
231 | curbits++;
|
---|
232 | }
|
---|
233 | getbits();
|
---|
234 | } else
|
---|
235 | getpipe(); /* get next index */
|
---|
236 |
|
---|
237 | if (iindex == 256 && clearflg) {
|
---|
238 | if (pnum > 0) putpipe(iindex, 0);
|
---|
239 | /* Due to uglyness in compress, these indices
|
---|
240 | * in the compressed file are wasted */
|
---|
241 | while (inmod) getbits();
|
---|
242 | break;
|
---|
243 | }
|
---|
244 | tindex = iindex;
|
---|
245 | /* Convert the index part, ignoring spawned chars */
|
---|
246 | while (tindex >= base) tindex = dindex[tindex - base];
|
---|
247 | /* Pass on the index */
|
---|
248 | putpipe(tindex, 0);
|
---|
249 | /* Save the char of the last added entry, if any */
|
---|
250 | if (dcharp < DICTSZ) dchar[dcharp++] = tindex;
|
---|
251 | if (curend < maxend && ++curend > (base - 1))
|
---|
252 | dindex[dcharp = (curend - base)] = iindex;
|
---|
253 |
|
---|
254 | /* Do spawned chars. They are naturally produced in
|
---|
255 | * the wrong order. To get them in the right order
|
---|
256 | * without using memory, a series of passes,
|
---|
257 | * progressively less deep, are used */
|
---|
258 | tbase = base;
|
---|
259 | while ((tindex = iindex) >= tbase) {/* for each char to spawn*/
|
---|
260 | while ((tindex2 = dindex[tindex - base]) >= tbase)
|
---|
261 | tindex = tindex2; /* scan to desired char */
|
---|
262 | putpipe(dchar[tindex-base], 1); /* put it to the pipe*/
|
---|
263 | tbase = tindex + 1;
|
---|
264 | if (tbase == 0) break; /* it's a wrap */
|
---|
265 | }
|
---|
266 | }
|
---|
267 | }
|
---|
268 | }
|
---|
269 |
|
---|
270 |
|
---|
271 | /* F f o r k
|
---|
272 | *
|
---|
273 | * Fork off the previous pass - the parent reads from the child.
|
---|
274 | */
|
---|
275 | void ffork()
|
---|
276 | {
|
---|
277 | int j, pfd[2];
|
---|
278 |
|
---|
279 | if (pipe(pfd) == -1) die("pipe() error\n");
|
---|
280 | if ((j = fork()) == -1) die("fork() error\n");
|
---|
281 | if (j == 0) { /* this is the child */
|
---|
282 | if (close(1) == -1) die("close(1) error\n");
|
---|
283 | if (dup(pfd[1]) != 1) die("dup(1) error\n");
|
---|
284 | (void) close(pfd[0]);
|
---|
285 | pnum++;
|
---|
286 | } else { /* this is the parent */
|
---|
287 | if (close(0) == -1) die("close(0) error\n");
|
---|
288 | if (dup(pfd[0]) != 0) die("dup(0) error\n");
|
---|
289 | (void) close(pfd[1]);
|
---|
290 | }
|
---|
291 | }
|
---|
292 |
|
---|
293 |
|
---|
294 | /* D i e
|
---|
295 | *
|
---|
296 | * If s is a message, write it to stderr. Flush buffers if needed. Then exit.
|
---|
297 | */
|
---|
298 | void die(s)
|
---|
299 | char *s;
|
---|
300 | {
|
---|
301 | /* Flush stdout buffer if needed */
|
---|
302 | if (obufind != 0) {
|
---|
303 | if (write(fdout, (char *) obuf, (unsigned) obufind) != obufind)
|
---|
304 | s = "bad stdout write\n";
|
---|
305 | obufind = 0;
|
---|
306 | }
|
---|
307 |
|
---|
308 | /* Flush pipe if needed */
|
---|
309 | do
|
---|
310 | putpipe(EOF_INDEX, 0);
|
---|
311 | while (opbufind);
|
---|
312 | /* Write any error message */
|
---|
313 | if (s != (char *) NULL) {
|
---|
314 | while (*s) (void) write(fderr, s++, 1);
|
---|
315 | }
|
---|
316 | exit((s == (char *) NULL) ? 0 : 1);
|
---|
317 | }
|
---|
318 |
|
---|
319 |
|
---|
320 | /* M p u t c
|
---|
321 | *
|
---|
322 | * Put a char to stdout.
|
---|
323 | */
|
---|
324 | void myputc(c)
|
---|
325 | unsigned c;
|
---|
326 | {
|
---|
327 | obuf[obufind++] = c;
|
---|
328 | if (obufind >= BUFSZ) { /* if stdout buffer full */
|
---|
329 | if (write(fdout, (char *) obuf, BUFSZ) != BUFSZ) /* flush to stdout */
|
---|
330 | die("bad stdout write\n");
|
---|
331 | obufind = 0;
|
---|
332 | }
|
---|
333 | }
|
---|
334 |
|
---|
335 |
|
---|
336 | /* M y g e t c
|
---|
337 | *
|
---|
338 | * Get a char from stdin. If EOF, then die() and exit.
|
---|
339 | */
|
---|
340 | unsigned mygetc()
|
---|
341 | {
|
---|
342 | if (ibufstart >= ibufend) { /* if stdin buffer empty */
|
---|
343 | if ((ibufend = read(fdin, (char *) ibuf, BUFSZ)) <= 0)
|
---|
344 | die((char *) NULL); /* if EOF, do normal exit */
|
---|
345 | ibufstart = 0;
|
---|
346 | }
|
---|
347 | return(ibuf[ibufstart++] & 0xff);
|
---|
348 | }
|
---|
349 |
|
---|
350 |
|
---|
351 | /* G e t b i t s
|
---|
352 | *
|
---|
353 | * Put curbits bits into index from stdin. Note: only copy 4 uses this.
|
---|
354 | * The bits within a byte are in the correct order. But when the bits
|
---|
355 | * cross a byte boundry, the lowest bits will be in the higher part of
|
---|
356 | * the current byte, and the higher bits will be in the lower part of
|
---|
357 | * the next byte.
|
---|
358 | */
|
---|
359 | void getbits()
|
---|
360 | {
|
---|
361 | int have;
|
---|
362 | static unsigned curbyte; /* byte having bits extracted from it */
|
---|
363 | static int left; /* how many bits are left in curbyte */
|
---|
364 |
|
---|
365 | inmod = (inmod + 1) & 7; /* count input mod 8 */
|
---|
366 | iindex = curbyte;
|
---|
367 | have = left;
|
---|
368 | if (curbits - have > 8) {
|
---|
369 | iindex |= mygetc() << have;
|
---|
370 | have += 8;
|
---|
371 | }
|
---|
372 | iindex |= ((curbyte = mygetc()) << have) & ~((unsigned) 0xFFFF << curbits);
|
---|
373 | curbyte >>= curbits - have;
|
---|
374 | left = 8 - (curbits - have);
|
---|
375 | }
|
---|
376 |
|
---|
377 |
|
---|
378 | /* G e t p i p e
|
---|
379 | *
|
---|
380 | * Get an index from the pipeline. If flagged firstonly, handle it here.
|
---|
381 | */
|
---|
382 | void getpipe()
|
---|
383 | {
|
---|
384 | static short flags;
|
---|
385 | static int n = 0; /* number of flags in flags */
|
---|
386 |
|
---|
387 | while (TRUE) { /* while index with firstonly flag set */
|
---|
388 | if (n <= 0) {
|
---|
389 | if (ipbufind >= BUFSZ_2) { /* if pipe input buffer
|
---|
390 | * empty */
|
---|
391 | if (read(fdin, (char *) ipbuf, BUFSZ) != BUFSZ)
|
---|
392 | die("bad pipe read\n");
|
---|
393 | ipbufind = 0;
|
---|
394 | }
|
---|
395 | flags = ipbuf[ipbufind++];
|
---|
396 | n = 15;
|
---|
397 | }
|
---|
398 | iindex = ipbuf[ipbufind++];
|
---|
399 | if (iindex > curend)
|
---|
400 | die((iindex == EOF_INDEX) ? (char *) NULL : "invalid data\n");
|
---|
401 | flags <<= 1;
|
---|
402 | n--;
|
---|
403 | /* Assume flags < 0 if highest remaining flag is set */
|
---|
404 | if (flags < 0) { /* if firstonly flag for index is not set */
|
---|
405 | while (iindex >= base) iindex = dindex[iindex - base];
|
---|
406 | putpipe(iindex, 1);
|
---|
407 | } else
|
---|
408 | return; /* return with valid non-firstonly index */
|
---|
409 | }
|
---|
410 | }
|
---|
411 |
|
---|
412 |
|
---|
413 | /* P u t p i p e
|
---|
414 | *
|
---|
415 | * put an index into the pipeline.
|
---|
416 | */
|
---|
417 | void putpipe(u, flag)
|
---|
418 | unsigned u;
|
---|
419 | int flag;
|
---|
420 | {
|
---|
421 | static unsigned short flags, *flagp;
|
---|
422 | static int n = 0; /* number of flags in flags */
|
---|
423 |
|
---|
424 | if (pnum == 0) { /* if we should write to stdout */
|
---|
425 | myputc(u); /* index will be the char value */
|
---|
426 | return;
|
---|
427 | }
|
---|
428 | if (n == 0) { /* if we need to reserve a flag entry */
|
---|
429 | flags = 0;
|
---|
430 | flagp = opbuf + opbufind;
|
---|
431 | opbufind++;
|
---|
432 | }
|
---|
433 | opbuf[opbufind++] = u; /* add index to buffer */
|
---|
434 | flags = (flags << 1) | flag; /* add firstonly flag */
|
---|
435 | if (++n >= 15) { /* if block of 15 indices */
|
---|
436 | n = 0;
|
---|
437 | *flagp = flags; /* insert flags entry */
|
---|
438 | if (opbufind >= BUFSZ_2) { /* if pipe out buffer full */
|
---|
439 | opbufind = 0;
|
---|
440 | if (write(fdout, (char *) opbuf, BUFSZ) != BUFSZ)
|
---|
441 | die("bad pipe write\n");
|
---|
442 | }
|
---|
443 | }
|
---|
444 | }
|
---|