Context Navigation

← Previous Revision
Latest Revision
Next Revision →
Normal
Revision Log

source: trunk/minix/commands/simple/decomp16.c@ 15

Last change on this file since 15 was 9, checked in by Mattia Monga, 14 years ago
Minix 3.1.2a
File size: 13.8 KB

Rev	Line
[9]	1	/* decomp16: decompress 16bit compressed files on a 16bit Intel processor
	2	*
	3	* Version 1.3 of 25 Mar 92.
	4	*
	5	* This was written by John N. White on 6/30/91 and is Public Domain.
	6	* Patched to run under news by Will Rose, Feb 92.
	7	* J N White's (earlier) patches added by Will Rose, 20 Feb 92.
	8	* Unsigned int increment/wrap bug fixed by Will Rose, 24 Mar 92.
	9	* Argument bug fixed, stdio generalised by Will Rose, 25 Mar 92.
	10	*
	11	* decomp16 can use as as little as 512 bytes of stack; since it forks
	12	* four additional copies, it's probably worth using minimum stack rather
	13	* than the 8192 byte Minix default. To reduce memory still further,
	14	* change BUFSZ below to 256; it is currently set to 1024 for speed. The
	15	* minimal decomp16 needs about 280k to run in pipe mode (56k per copy).
	16	*
	17	* This program acts as a filter:
	18	* decomp16 < compressed_file > decompressed_file
	19	* The arguments -0 to -4 run only the corresponding pass.
	20	* Thus:
	21	* decomp16 -4 < compressed_file > 3;
	22	* decomp16 -3 < 3 > 2;
	23	* decomp16 -2 < 2 > 1;
	24	* decomp16 -1 < 1 > 0;
	25	* decomp16 -0 < 0 > decompressed_file
	26	* will also work, as will connecting the passes by explicit pipes if
	27	* there is enough memory to do so. File name arguments can also be
	28	* given directly on the command line.
	29	*
	30	* Compress uses a modified LZW compression algorithm. A compressed file
	31	* is a set of indices into a dictionary of strings. The number of bits
	32	* used to store each index depends on the number of entries currently
	33	* in the dictionary. If there are between 257 and 512 entries, 9 bits
	34	* are used. With 513 entries, 10 bits are used, etc. The initial dictionary
	35	* consists of 0-255 (which are the corresponding chars) and 256 (which
	36	* is a special CLEAR code). As each index in the compressed file is read,
	37	* a new entry is added to the dictionary consisting of the current string
	38	* with the first char of the next string appended. When the dictionary
	39	* is full, no further entries are added. If a CLEAR code is received,
	40	* the dictionary will be completely reset. The first two bytes of the
	41	* compressed file are a magic number, and the third byte indicates the
	42	* maximum number of bits, and whether the CLEAR code is used (older versions
	43	* of compress didn't have CLEAR).
	44	*
	45	* This program works by forking four more copies of itself. The five
	46	* programs form a pipeline. Copy 0 writes to stdout, and forks copy 1
	47	* to supply its input, which in turn forks and reads from copy 2, etc.
	48	* This sequence is used so that when the program exits, all writes
	49	* are completed and a program that has exec'd uncompress (such as news)
	50	* can immediately use the uncompressed data when the wait() call returns.
	51	*
	52	* If given a switch -#, where # is a digit from 0 to 4 (example: -2), the
	53	* program will run as that copy, reading from stdin and writing to stdout.
	54	* This allows decompressing with very limited RAM because only one of the
	55	* five passes is in memory at a time.
	56	*
	57	* The compressed data is a series of string indices (and a header at
	58	* the beginning and an occasional CLEAR code). As these indices flow
	59	* through the pipes, each program decodes the ones it can. The result
	60	* of each decoding will be indices that the following programs can handle.
	61	*
	62	* Each of the 65536 strings in the dictionary is an earlier string with
	63	* some character added to the end (except for the the 256 predefined
	64	* single char strings). When new entries are made to the dictionary,
	65	* the string index part will just be the last index to pass through.
	66	* But the char part is the first char of the next string, which isn't
	67	* known yet. So the string can be stored as a pair of indices. When
	68	* this string is specified, it is converted to this pair of indices,
	69	* which are flagged so that the first will be decoded in full while
	70	* the second will be decoded to its first char. The dictionary takes
	71	* 256k to store (64k strings of 2 indices of 2 bytes each). This is
	72	* too big for a 64k data segment, so it is divided into 5 equal parts.
	73	* Copy 4 of the program maintains the high part and copy 0 holds the
	74	* low part.
	75	*/
	76
	77	#include <sys/types.h>
	78	#include <fcntl.h>
	79	#include <stdlib.h>
	80	#include <unistd.h>
	81
	82	#define BUFSZ 1024 /* size of i/o buffers */
	83	#define BUFSZ_2 (BUFSZ/2) /* # of unsigned shorts in i/o bufs */
	84	#define DICTSZ (unsigned)13056 /* # of local dictionary entries */
	85	#define EOF_INDEX (unsigned short)0xFFFF /* EOF flag for pipeline */
	86	#define FALSE 0
	87	#define TRUE ~FALSE
	88
	89	int fdin, fdout, fderr; /* input, output, and error file descriptors */
	90	int ibufstart, obufind, ibufend;/* i/o buffer indices */
	91	int ipbufind = BUFSZ_2; /* pipe buffer indices */
	92	int opbufind = 0;
	93	int pnum = -1; /* ID of this copy */
	94	unsigned short ipbuf[BUFSZ_2]; /* for buffering input */
	95	unsigned short opbuf[BUFSZ_2]; /* for buffering output */
	96	unsigned char ibuf = (unsigned char ) ipbuf;
	97	unsigned char obuf = (unsigned char ) opbuf;
	98
	99	unsigned short dindex[DICTSZ]; /* dictionary: index to substring */
	100	unsigned short dchar[DICTSZ]; /* dictionary: last char of string */
	101	unsigned iindex, tindex, tindex2; /* holds index being processed */
	102	unsigned base; /* where in global dict local dict starts */
	103	unsigned tbase;
	104	unsigned locend; /* where in global dict local dict ends */
	105	unsigned curend = 256; /* current end of global dict */
	106	unsigned maxend; /* max end of global dict */
	107	int dcharp; /* ptr to dchar that needs next index entry */
	108	int curbits; /* number of bits for getbits() to read */
	109	int maxbits; /* limit on number of bits */
	110	int clearflg; /* if set, allow CLEAR */
	111	int inmod; /* mod 8 for getbits() */
	112
	113	_PROTOTYPE(int main, (int argc, char **argv));
	114	_PROTOTYPE(void ffork, (void));
	115	_PROTOTYPE(void die, (char *s));
	116	_PROTOTYPE(void myputc, (unsigned c));
	117	_PROTOTYPE(unsigned mygetc, (void));
	118	_PROTOTYPE(void getbits, (void));
	119	_PROTOTYPE(void getpipe, (void));
	120	_PROTOTYPE(void putpipe, (unsigned u, int flag));
	121
	122	int main(argc, argv)
	123	int argc;
	124	char **argv;
	125	{
	126	char c, *cp;
	127	int j, k, fdtmp;
	128	unsigned int len;
	129
	130	/* Find the program name */
	131	j = 0;
	132	while (argv[0][j] != '\0') j++;
	133	len = (unsigned int) j;
	134	while (j--)
	135	if (argv[0][j] == '/') break;
	136	if (argv[0][j] == '/') j++;
	137	cp = argv[0] + j;
	138	len -= j;
	139
	140	/* Sort out the flags */
	141	for (k = 1; k < argc; k++) {
	142	if (argv[k][0] == '-') {
	143	c = argv[k][1];
	144	switch (c) {
	145	case '0': /* pass numbers */
	146	case '1':
	147	case '2':
	148	case '3':
	149	case '4': pnum = c - '0'; break;
	150	case 'd': /* used by news */
	151	break;
	152	default:
	153	(void) write(1, "Usage: ", 7);
	154	(void) write(1, cp, len);
	155	(void) write(1, " [-#] [in] [out]\n", 17);
	156	exit(0);
	157	break;
	158	}
	159
	160	/* Once it's checked, lose it anyway */
	161	for (j = k; j < argc; j++) argv[j] = argv[j + 1];
	162	argc--;
	163	k--;
	164	}
	165	}
	166
	167	/* Default i/o settings */
	168	fdin = 0;
	169	fdout = 1;
	170	fderr = 2;
	171
	172	/* Try to open specific files and connect them to stdin/stdout */
	173	if (argc > 1) {
	174	if ((fdtmp = open(argv[1], 0)) == -1) die("input open failed");
	175	(void) close(0);
	176	if ((fdin = dup(fdtmp)) == -1) die("input dup failed\n");
	177	(void) close(fdtmp);
	178	}
	179	if (argc > 2) {
	180	(void) unlink(argv[2]);
	181	if ((fdtmp = creat(argv[2], 0666)) == -1) die("output creat failed");
	182	(void) close(1);
	183	if ((fdout = dup(fdtmp)) == -1) die("output dup failed\n");
	184	(void) close(fdtmp);
	185	}
	186
	187	/* Sort out type of compression */
	188	if (pnum == -1 \|\| pnum == 4) {/* if this is pass 4 */
	189	/* Check header of compressed file */
	190	if (mygetc() != 0x1F \|\| mygetc() != 0x9D) /* check magic number */
	191	die("not a compressed file\n");
	192	iindex = mygetc(); /* get compression style */
	193	} else
	194	getpipe(); /* get compression style */
	195
	196	maxbits = iindex & 0x1F;
	197	clearflg = ((iindex & 0x80) != 0) ? TRUE : FALSE;
	198	if (maxbits < 9 \|\| maxbits > 16) /* check for valid maxbits */
	199	die("can't decompress\n");
	200	if (pnum != -1 && pnum != 0)
	201	putpipe(iindex, 0); /* pass style to next copy */
	202
	203	/* Fork off an ancestor if necessary - ffork() increments pnum */
	204	if (pnum == -1) {
	205	pnum = 0;
	206	if (pnum == 0) ffork();
	207	if (pnum == 1) ffork();
	208	if (pnum == 2) ffork();
	209	if (pnum == 3) ffork();
	210	}
	211
	212	/* Preliminary inits. Note: end/maxend/curend are highest, not
	213	* highest + 1 */
	214	base = DICTSZ * pnum + 256;
	215	locend = base + DICTSZ - 1;
	216	maxend = (1 << maxbits) - 1;
	217	if (maxend > locend) maxend = locend;
	218
	219	while (TRUE) {
	220	curend = 255 + (clearflg ? 1 : 0); /* init dictionary */
	221	dcharp = DICTSZ; /* flag for none needed */
	222	curbits = 9; /* init curbits (for copy 0) */
	223	while (TRUE) { /* for each index in input */
	224	if (pnum == 4) {/* get index using getbits() */
	225	if (curbits < maxbits && (1 << curbits) <= curend) {
	226	/* Curbits needs to be increased */
	227	/* Due to uglyness in compress, these
	228	* indices in the compressed file are
	229	* wasted */
	230	while (inmod) getbits();
	231	curbits++;
	232	}
	233	getbits();
	234	} else
	235	getpipe(); /* get next index */
	236
	237	if (iindex == 256 && clearflg) {
	238	if (pnum > 0) putpipe(iindex, 0);
	239	/* Due to uglyness in compress, these indices
	240	* in the compressed file are wasted */
	241	while (inmod) getbits();
	242	break;
	243	}
	244	tindex = iindex;
	245	/* Convert the index part, ignoring spawned chars */
	246	while (tindex >= base) tindex = dindex[tindex - base];
	247	/* Pass on the index */
	248	putpipe(tindex, 0);
	249	/* Save the char of the last added entry, if any */
	250	if (dcharp < DICTSZ) dchar[dcharp++] = tindex;
	251	if (curend < maxend && ++curend > (base - 1))
	252	dindex[dcharp = (curend - base)] = iindex;
	253
	254	/* Do spawned chars. They are naturally produced in
	255	* the wrong order. To get them in the right order
	256	* without using memory, a series of passes,
	257	* progressively less deep, are used */
	258	tbase = base;
	259	while ((tindex = iindex) >= tbase) {/* for each char to spawn*/
	260	while ((tindex2 = dindex[tindex - base]) >= tbase)
	261	tindex = tindex2; /* scan to desired char */
	262	putpipe(dchar[tindex-base], 1); /* put it to the pipe*/
	263	tbase = tindex + 1;
	264	if (tbase == 0) break; /* it's a wrap */
	265	}
	266	}
	267	}
	268	}
	269
	270
	271	/* F f o r k
	272	*
	273	* Fork off the previous pass - the parent reads from the child.
	274	*/
	275	void ffork()
	276	{
	277	int j, pfd[2];
	278
	279	if (pipe(pfd) == -1) die("pipe() error\n");
	280	if ((j = fork()) == -1) die("fork() error\n");
	281	if (j == 0) { /* this is the child */
	282	if (close(1) == -1) die("close(1) error\n");
	283	if (dup(pfd[1]) != 1) die("dup(1) error\n");
	284	(void) close(pfd[0]);
	285	pnum++;
	286	} else { /* this is the parent */
	287	if (close(0) == -1) die("close(0) error\n");
	288	if (dup(pfd[0]) != 0) die("dup(0) error\n");
	289	(void) close(pfd[1]);
	290	}
	291	}
	292
	293
	294	/* D i e
	295	*
	296	* If s is a message, write it to stderr. Flush buffers if needed. Then exit.
	297	*/
	298	void die(s)
	299	char *s;
	300	{
	301	/* Flush stdout buffer if needed */
	302	if (obufind != 0) {
	303	if (write(fdout, (char *) obuf, (unsigned) obufind) != obufind)
	304	s = "bad stdout write\n";
	305	obufind = 0;
	306	}
	307
	308	/* Flush pipe if needed */
	309	do
	310	putpipe(EOF_INDEX, 0);
	311	while (opbufind);
	312	/* Write any error message */
	313	if (s != (char *) NULL) {
	314	while (*s) (void) write(fderr, s++, 1);
	315	}
	316	exit((s == (char *) NULL) ? 0 : 1);
	317	}
	318
	319
	320	/* M p u t c
	321	*
	322	* Put a char to stdout.
	323	*/
	324	void myputc(c)
	325	unsigned c;
	326	{
	327	obuf[obufind++] = c;
	328	if (obufind >= BUFSZ) { /* if stdout buffer full */
	329	if (write(fdout, (char ) obuf, BUFSZ) != BUFSZ) / flush to stdout */
	330	die("bad stdout write\n");
	331	obufind = 0;
	332	}
	333	}
	334
	335
	336	/* M y g e t c
	337	*
	338	* Get a char from stdin. If EOF, then die() and exit.
	339	*/
	340	unsigned mygetc()
	341	{
	342	if (ibufstart >= ibufend) { /* if stdin buffer empty */
	343	if ((ibufend = read(fdin, (char *) ibuf, BUFSZ)) <= 0)
	344	die((char ) NULL); / if EOF, do normal exit */
	345	ibufstart = 0;
	346	}
	347	return(ibuf[ibufstart++] & 0xff);
	348	}
	349
	350
	351	/* G e t b i t s
	352	*
	353	* Put curbits bits into index from stdin. Note: only copy 4 uses this.
	354	* The bits within a byte are in the correct order. But when the bits
	355	* cross a byte boundry, the lowest bits will be in the higher part of
	356	* the current byte, and the higher bits will be in the lower part of
	357	* the next byte.
	358	*/
	359	void getbits()
	360	{
	361	int have;
	362	static unsigned curbyte; /* byte having bits extracted from it */
	363	static int left; /* how many bits are left in curbyte */
	364
	365	inmod = (inmod + 1) & 7; /* count input mod 8 */
	366	iindex = curbyte;
	367	have = left;
	368	if (curbits - have > 8) {
	369	iindex \|= mygetc() << have;
	370	have += 8;
	371	}
	372	iindex \|= ((curbyte = mygetc()) << have) & ~((unsigned) 0xFFFF << curbits);
	373	curbyte >>= curbits - have;
	374	left = 8 - (curbits - have);
	375	}
	376
	377
	378	/* G e t p i p e
	379	*
	380	* Get an index from the pipeline. If flagged firstonly, handle it here.
	381	*/
	382	void getpipe()
	383	{
	384	static short flags;
	385	static int n = 0; /* number of flags in flags */
	386
	387	while (TRUE) { /* while index with firstonly flag set */
	388	if (n <= 0) {
	389	if (ipbufind >= BUFSZ_2) { /* if pipe input buffer
	390	* empty */
	391	if (read(fdin, (char *) ipbuf, BUFSZ) != BUFSZ)
	392	die("bad pipe read\n");
	393	ipbufind = 0;
	394	}
	395	flags = ipbuf[ipbufind++];
	396	n = 15;
	397	}
	398	iindex = ipbuf[ipbufind++];
	399	if (iindex > curend)
	400	die((iindex == EOF_INDEX) ? (char *) NULL : "invalid data\n");
	401	flags <<= 1;
	402	n--;
	403	/* Assume flags < 0 if highest remaining flag is set */
	404	if (flags < 0) { /* if firstonly flag for index is not set */
	405	while (iindex >= base) iindex = dindex[iindex - base];
	406	putpipe(iindex, 1);
	407	} else
	408	return; /* return with valid non-firstonly index */
	409	}
	410	}
	411
	412
	413	/* P u t p i p e
	414	*
	415	* put an index into the pipeline.
	416	*/
	417	void putpipe(u, flag)
	418	unsigned u;
	419	int flag;
	420	{
	421	static unsigned short flags, *flagp;
	422	static int n = 0; /* number of flags in flags */
	423
	424	if (pnum == 0) { /* if we should write to stdout */
	425	myputc(u); /* index will be the char value */
	426	return;
	427	}
	428	if (n == 0) { /* if we need to reserve a flag entry */
	429	flags = 0;
	430	flagp = opbuf + opbufind;
	431	opbufind++;
	432	}
	433	opbuf[opbufind++] = u; /* add index to buffer */
	434	flags = (flags << 1) \| flag; /* add firstonly flag */
	435	if (++n >= 15) { /* if block of 15 indices */
	436	n = 0;
	437	flagp = flags; / insert flags entry */
	438	if (opbufind >= BUFSZ_2) { /* if pipe out buffer full */
	439	opbufind = 0;
	440	if (write(fdout, (char *) opbuf, BUFSZ) != BUFSZ)
	441	die("bad pipe write\n");
	442	}
	443	}
	444	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: