Context Navigation

← Previous Revision
Latest Revision
Next Revision →
Blame
Revision Log

source: trunk/minix/commands/simple/decomp16.c@ 15

Last change on this file since 15 was 9, checked in by Mattia Monga, 14 years ago
Minix 3.1.2a
File size: 13.8 KB

Line
1	/* decomp16: decompress 16bit compressed files on a 16bit Intel processor
2	*
3	* Version 1.3 of 25 Mar 92.
4	*
5	* This was written by John N. White on 6/30/91 and is Public Domain.
6	* Patched to run under news by Will Rose, Feb 92.
7	* J N White's (earlier) patches added by Will Rose, 20 Feb 92.
8	* Unsigned int increment/wrap bug fixed by Will Rose, 24 Mar 92.
9	* Argument bug fixed, stdio generalised by Will Rose, 25 Mar 92.
10	*
11	* decomp16 can use as as little as 512 bytes of stack; since it forks
12	* four additional copies, it's probably worth using minimum stack rather
13	* than the 8192 byte Minix default. To reduce memory still further,
14	* change BUFSZ below to 256; it is currently set to 1024 for speed. The
15	* minimal decomp16 needs about 280k to run in pipe mode (56k per copy).
16	*
17	* This program acts as a filter:
18	* decomp16 < compressed_file > decompressed_file
19	* The arguments -0 to -4 run only the corresponding pass.
20	* Thus:
21	* decomp16 -4 < compressed_file > 3;
22	* decomp16 -3 < 3 > 2;
23	* decomp16 -2 < 2 > 1;
24	* decomp16 -1 < 1 > 0;
25	* decomp16 -0 < 0 > decompressed_file
26	* will also work, as will connecting the passes by explicit pipes if
27	* there is enough memory to do so. File name arguments can also be
28	* given directly on the command line.
29	*
30	* Compress uses a modified LZW compression algorithm. A compressed file
31	* is a set of indices into a dictionary of strings. The number of bits
32	* used to store each index depends on the number of entries currently
33	* in the dictionary. If there are between 257 and 512 entries, 9 bits
34	* are used. With 513 entries, 10 bits are used, etc. The initial dictionary
35	* consists of 0-255 (which are the corresponding chars) and 256 (which
36	* is a special CLEAR code). As each index in the compressed file is read,
37	* a new entry is added to the dictionary consisting of the current string
38	* with the first char of the next string appended. When the dictionary
39	* is full, no further entries are added. If a CLEAR code is received,
40	* the dictionary will be completely reset. The first two bytes of the
41	* compressed file are a magic number, and the third byte indicates the
42	* maximum number of bits, and whether the CLEAR code is used (older versions
43	* of compress didn't have CLEAR).
44	*
45	* This program works by forking four more copies of itself. The five
46	* programs form a pipeline. Copy 0 writes to stdout, and forks copy 1
47	* to supply its input, which in turn forks and reads from copy 2, etc.
48	* This sequence is used so that when the program exits, all writes
49	* are completed and a program that has exec'd uncompress (such as news)
50	* can immediately use the uncompressed data when the wait() call returns.
51	*
52	* If given a switch -#, where # is a digit from 0 to 4 (example: -2), the
53	* program will run as that copy, reading from stdin and writing to stdout.
54	* This allows decompressing with very limited RAM because only one of the
55	* five passes is in memory at a time.
56	*
57	* The compressed data is a series of string indices (and a header at
58	* the beginning and an occasional CLEAR code). As these indices flow
59	* through the pipes, each program decodes the ones it can. The result
60	* of each decoding will be indices that the following programs can handle.
61	*
62	* Each of the 65536 strings in the dictionary is an earlier string with
63	* some character added to the end (except for the the 256 predefined
64	* single char strings). When new entries are made to the dictionary,
65	* the string index part will just be the last index to pass through.
66	* But the char part is the first char of the next string, which isn't
67	* known yet. So the string can be stored as a pair of indices. When
68	* this string is specified, it is converted to this pair of indices,
69	* which are flagged so that the first will be decoded in full while
70	* the second will be decoded to its first char. The dictionary takes
71	* 256k to store (64k strings of 2 indices of 2 bytes each). This is
72	* too big for a 64k data segment, so it is divided into 5 equal parts.
73	* Copy 4 of the program maintains the high part and copy 0 holds the
74	* low part.
75	*/
76
77	#include <sys/types.h>
78	#include <fcntl.h>
79	#include <stdlib.h>
80	#include <unistd.h>
81
82	#define BUFSZ 1024 /* size of i/o buffers */
83	#define BUFSZ_2 (BUFSZ/2) /* # of unsigned shorts in i/o bufs */
84	#define DICTSZ (unsigned)13056 /* # of local dictionary entries */
85	#define EOF_INDEX (unsigned short)0xFFFF /* EOF flag for pipeline */
86	#define FALSE 0
87	#define TRUE ~FALSE
88
89	int fdin, fdout, fderr; /* input, output, and error file descriptors */
90	int ibufstart, obufind, ibufend;/* i/o buffer indices */
91	int ipbufind = BUFSZ_2; /* pipe buffer indices */
92	int opbufind = 0;
93	int pnum = -1; /* ID of this copy */
94	unsigned short ipbuf[BUFSZ_2]; /* for buffering input */
95	unsigned short opbuf[BUFSZ_2]; /* for buffering output */
96	unsigned char ibuf = (unsigned char ) ipbuf;
97	unsigned char obuf = (unsigned char ) opbuf;
98
99	unsigned short dindex[DICTSZ]; /* dictionary: index to substring */
100	unsigned short dchar[DICTSZ]; /* dictionary: last char of string */
101	unsigned iindex, tindex, tindex2; /* holds index being processed */
102	unsigned base; /* where in global dict local dict starts */
103	unsigned tbase;
104	unsigned locend; /* where in global dict local dict ends */
105	unsigned curend = 256; /* current end of global dict */
106	unsigned maxend; /* max end of global dict */
107	int dcharp; /* ptr to dchar that needs next index entry */
108	int curbits; /* number of bits for getbits() to read */
109	int maxbits; /* limit on number of bits */
110	int clearflg; /* if set, allow CLEAR */
111	int inmod; /* mod 8 for getbits() */
112
113	_PROTOTYPE(int main, (int argc, char **argv));
114	_PROTOTYPE(void ffork, (void));
115	_PROTOTYPE(void die, (char *s));
116	_PROTOTYPE(void myputc, (unsigned c));
117	_PROTOTYPE(unsigned mygetc, (void));
118	_PROTOTYPE(void getbits, (void));
119	_PROTOTYPE(void getpipe, (void));
120	_PROTOTYPE(void putpipe, (unsigned u, int flag));
121
122	int main(argc, argv)
123	int argc;
124	char **argv;
125	{
126	char c, *cp;
127	int j, k, fdtmp;
128	unsigned int len;
129
130	/* Find the program name */
131	j = 0;
132	while (argv[0][j] != '\0') j++;
133	len = (unsigned int) j;
134	while (j--)
135	if (argv[0][j] == '/') break;
136	if (argv[0][j] == '/') j++;
137	cp = argv[0] + j;
138	len -= j;
139
140	/* Sort out the flags */
141	for (k = 1; k < argc; k++) {
142	if (argv[k][0] == '-') {
143	c = argv[k][1];
144	switch (c) {
145	case '0': /* pass numbers */
146	case '1':
147	case '2':
148	case '3':
149	case '4': pnum = c - '0'; break;
150	case 'd': /* used by news */
151	break;
152	default:
153	(void) write(1, "Usage: ", 7);
154	(void) write(1, cp, len);
155	(void) write(1, " [-#] [in] [out]\n", 17);
156	exit(0);
157	break;
158	}
159
160	/* Once it's checked, lose it anyway */
161	for (j = k; j < argc; j++) argv[j] = argv[j + 1];
162	argc--;
163	k--;
164	}
165	}
166
167	/* Default i/o settings */
168	fdin = 0;
169	fdout = 1;
170	fderr = 2;
171
172	/* Try to open specific files and connect them to stdin/stdout */
173	if (argc > 1) {
174	if ((fdtmp = open(argv[1], 0)) == -1) die("input open failed");
175	(void) close(0);
176	if ((fdin = dup(fdtmp)) == -1) die("input dup failed\n");
177	(void) close(fdtmp);
178	}
179	if (argc > 2) {
180	(void) unlink(argv[2]);
181	if ((fdtmp = creat(argv[2], 0666)) == -1) die("output creat failed");
182	(void) close(1);
183	if ((fdout = dup(fdtmp)) == -1) die("output dup failed\n");
184	(void) close(fdtmp);
185	}
186
187	/* Sort out type of compression */
188	if (pnum == -1 \|\| pnum == 4) {/* if this is pass 4 */
189	/* Check header of compressed file */
190	if (mygetc() != 0x1F \|\| mygetc() != 0x9D) /* check magic number */
191	die("not a compressed file\n");
192	iindex = mygetc(); /* get compression style */
193	} else
194	getpipe(); /* get compression style */
195
196	maxbits = iindex & 0x1F;
197	clearflg = ((iindex & 0x80) != 0) ? TRUE : FALSE;
198	if (maxbits < 9 \|\| maxbits > 16) /* check for valid maxbits */
199	die("can't decompress\n");
200	if (pnum != -1 && pnum != 0)
201	putpipe(iindex, 0); /* pass style to next copy */
202
203	/* Fork off an ancestor if necessary - ffork() increments pnum */
204	if (pnum == -1) {
205	pnum = 0;
206	if (pnum == 0) ffork();
207	if (pnum == 1) ffork();
208	if (pnum == 2) ffork();
209	if (pnum == 3) ffork();
210	}
211
212	/* Preliminary inits. Note: end/maxend/curend are highest, not
213	* highest + 1 */
214	base = DICTSZ * pnum + 256;
215	locend = base + DICTSZ - 1;
216	maxend = (1 << maxbits) - 1;
217	if (maxend > locend) maxend = locend;
218
219	while (TRUE) {
220	curend = 255 + (clearflg ? 1 : 0); /* init dictionary */
221	dcharp = DICTSZ; /* flag for none needed */
222	curbits = 9; /* init curbits (for copy 0) */
223	while (TRUE) { /* for each index in input */
224	if (pnum == 4) {/* get index using getbits() */
225	if (curbits < maxbits && (1 << curbits) <= curend) {
226	/* Curbits needs to be increased */
227	/* Due to uglyness in compress, these
228	* indices in the compressed file are
229	* wasted */
230	while (inmod) getbits();
231	curbits++;
232	}
233	getbits();
234	} else
235	getpipe(); /* get next index */
236
237	if (iindex == 256 && clearflg) {
238	if (pnum > 0) putpipe(iindex, 0);
239	/* Due to uglyness in compress, these indices
240	* in the compressed file are wasted */
241	while (inmod) getbits();
242	break;
243	}
244	tindex = iindex;
245	/* Convert the index part, ignoring spawned chars */
246	while (tindex >= base) tindex = dindex[tindex - base];
247	/* Pass on the index */
248	putpipe(tindex, 0);
249	/* Save the char of the last added entry, if any */
250	if (dcharp < DICTSZ) dchar[dcharp++] = tindex;
251	if (curend < maxend && ++curend > (base - 1))
252	dindex[dcharp = (curend - base)] = iindex;
253
254	/* Do spawned chars. They are naturally produced in
255	* the wrong order. To get them in the right order
256	* without using memory, a series of passes,
257	* progressively less deep, are used */
258	tbase = base;
259	while ((tindex = iindex) >= tbase) {/* for each char to spawn*/
260	while ((tindex2 = dindex[tindex - base]) >= tbase)
261	tindex = tindex2; /* scan to desired char */
262	putpipe(dchar[tindex-base], 1); /* put it to the pipe*/
263	tbase = tindex + 1;
264	if (tbase == 0) break; /* it's a wrap */
265	}
266	}
267	}
268	}
269
270
271	/* F f o r k
272	*
273	* Fork off the previous pass - the parent reads from the child.
274	*/
275	void ffork()
276	{
277	int j, pfd[2];
278
279	if (pipe(pfd) == -1) die("pipe() error\n");
280	if ((j = fork()) == -1) die("fork() error\n");
281	if (j == 0) { /* this is the child */
282	if (close(1) == -1) die("close(1) error\n");
283	if (dup(pfd[1]) != 1) die("dup(1) error\n");
284	(void) close(pfd[0]);
285	pnum++;
286	} else { /* this is the parent */
287	if (close(0) == -1) die("close(0) error\n");
288	if (dup(pfd[0]) != 0) die("dup(0) error\n");
289	(void) close(pfd[1]);
290	}
291	}
292
293
294	/* D i e
295	*
296	* If s is a message, write it to stderr. Flush buffers if needed. Then exit.
297	*/
298	void die(s)
299	char *s;
300	{
301	/* Flush stdout buffer if needed */
302	if (obufind != 0) {
303	if (write(fdout, (char *) obuf, (unsigned) obufind) != obufind)
304	s = "bad stdout write\n";
305	obufind = 0;
306	}
307
308	/* Flush pipe if needed */
309	do
310	putpipe(EOF_INDEX, 0);
311	while (opbufind);
312	/* Write any error message */
313	if (s != (char *) NULL) {
314	while (*s) (void) write(fderr, s++, 1);
315	}
316	exit((s == (char *) NULL) ? 0 : 1);
317	}
318
319
320	/* M p u t c
321	*
322	* Put a char to stdout.
323	*/
324	void myputc(c)
325	unsigned c;
326	{
327	obuf[obufind++] = c;
328	if (obufind >= BUFSZ) { /* if stdout buffer full */
329	if (write(fdout, (char ) obuf, BUFSZ) != BUFSZ) / flush to stdout */
330	die("bad stdout write\n");
331	obufind = 0;
332	}
333	}
334
335
336	/* M y g e t c
337	*
338	* Get a char from stdin. If EOF, then die() and exit.
339	*/
340	unsigned mygetc()
341	{
342	if (ibufstart >= ibufend) { /* if stdin buffer empty */
343	if ((ibufend = read(fdin, (char *) ibuf, BUFSZ)) <= 0)
344	die((char ) NULL); / if EOF, do normal exit */
345	ibufstart = 0;
346	}
347	return(ibuf[ibufstart++] & 0xff);
348	}
349
350
351	/* G e t b i t s
352	*
353	* Put curbits bits into index from stdin. Note: only copy 4 uses this.
354	* The bits within a byte are in the correct order. But when the bits
355	* cross a byte boundry, the lowest bits will be in the higher part of
356	* the current byte, and the higher bits will be in the lower part of
357	* the next byte.
358	*/
359	void getbits()
360	{
361	int have;
362	static unsigned curbyte; /* byte having bits extracted from it */
363	static int left; /* how many bits are left in curbyte */
364
365	inmod = (inmod + 1) & 7; /* count input mod 8 */
366	iindex = curbyte;
367	have = left;
368	if (curbits - have > 8) {
369	iindex \|= mygetc() << have;
370	have += 8;
371	}
372	iindex \|= ((curbyte = mygetc()) << have) & ~((unsigned) 0xFFFF << curbits);
373	curbyte >>= curbits - have;
374	left = 8 - (curbits - have);
375	}
376
377
378	/* G e t p i p e
379	*
380	* Get an index from the pipeline. If flagged firstonly, handle it here.
381	*/
382	void getpipe()
383	{
384	static short flags;
385	static int n = 0; /* number of flags in flags */
386
387	while (TRUE) { /* while index with firstonly flag set */
388	if (n <= 0) {
389	if (ipbufind >= BUFSZ_2) { /* if pipe input buffer
390	* empty */
391	if (read(fdin, (char *) ipbuf, BUFSZ) != BUFSZ)
392	die("bad pipe read\n");
393	ipbufind = 0;
394	}
395	flags = ipbuf[ipbufind++];
396	n = 15;
397	}
398	iindex = ipbuf[ipbufind++];
399	if (iindex > curend)
400	die((iindex == EOF_INDEX) ? (char *) NULL : "invalid data\n");
401	flags <<= 1;
402	n--;
403	/* Assume flags < 0 if highest remaining flag is set */
404	if (flags < 0) { /* if firstonly flag for index is not set */
405	while (iindex >= base) iindex = dindex[iindex - base];
406	putpipe(iindex, 1);
407	} else
408	return; /* return with valid non-firstonly index */
409	}
410	}
411
412
413	/* P u t p i p e
414	*
415	* put an index into the pipeline.
416	*/
417	void putpipe(u, flag)
418	unsigned u;
419	int flag;
420	{
421	static unsigned short flags, *flagp;
422	static int n = 0; /* number of flags in flags */
423
424	if (pnum == 0) { /* if we should write to stdout */
425	myputc(u); /* index will be the char value */
426	return;
427	}
428	if (n == 0) { /* if we need to reserve a flag entry */
429	flags = 0;
430	flagp = opbuf + opbufind;
431	opbufind++;
432	}
433	opbuf[opbufind++] = u; /* add index to buffer */
434	flags = (flags << 1) \| flag; /* add firstonly flag */
435	if (++n >= 15) { /* if block of 15 indices */
436	n = 0;
437	flagp = flags; / insert flags entry */
438	if (opbufind >= BUFSZ_2) { /* if pipe out buffer full */
439	opbufind = 0;
440	if (write(fdout, (char *) opbuf, BUFSZ) != BUFSZ)
441	die("bad pipe write\n");
442	}
443	}
444	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: