Context Navigation

← Previous Revision
Latest Revision
Next Revision →
Blame
Revision Log

source: trunk/minix/lib/zlib-1.2.3/examples/zran.c@ 9

Last change on this file since 9 was 9, checked in by Mattia Monga, 13 years ago
Minix 3.1.2a
File size: 14.9 KB

Line
1	/* zran.c -- example of zlib/gzip stream indexing and random access
2	* Copyright (C) 2005 Mark Adler
3	* For conditions of distribution and use, see copyright notice in zlib.h
4	Version 1.0 29 May 2005 Mark Adler */
5
6	/* Illustrate the use of Z_BLOCK, inflatePrime(), and inflateSetDictionary()
7	for random access of a compressed file. A file containing a zlib or gzip
8	stream is provided on the command line. The compressed stream is decoded in
9	its entirety, and an index built with access points about every SPAN bytes
10	in the uncompressed output. The compressed file is left open, and can then
11	be read randomly, having to decompress on the average SPAN/2 uncompressed
12	bytes before getting to the desired block of data.
13
14	An access point can be created at the start of any deflate block, by saving
15	the starting file offset and bit of that block, and the 32K bytes of
16	uncompressed data that precede that block. Also the uncompressed offset of
17	that block is saved to provide a referece for locating a desired starting
18	point in the uncompressed stream. build_index() works by decompressing the
19	input zlib or gzip stream a block at a time, and at the end of each block
20	deciding if enough uncompressed data has gone by to justify the creation of
21	a new access point. If so, that point is saved in a data structure that
22	grows as needed to accommodate the points.
23
24	To use the index, an offset in the uncompressed data is provided, for which
25	the latest accees point at or preceding that offset is located in the index.
26	The input file is positioned to the specified location in the index, and if
27	necessary the first few bits of the compressed data is read from the file.
28	inflate is initialized with those bits and the 32K of uncompressed data, and
29	the decompression then proceeds until the desired offset in the file is
30	reached. Then the decompression continues to read the desired uncompressed
31	data from the file.
32
33	Another approach would be to generate the index on demand. In that case,
34	requests for random access reads from the compressed data would try to use
35	the index, but if a read far enough past the end of the index is required,
36	then further index entries would be generated and added.
37
38	There is some fair bit of overhead to starting inflation for the random
39	access, mainly copying the 32K byte dictionary. So if small pieces of the
40	file are being accessed, it would make sense to implement a cache to hold
41	some lookahead and avoid many calls to extract() for small lengths.
42
43	Another way to build an index would be to use inflateCopy(). That would
44	not be constrained to have access points at block boundaries, but requires
45	more memory per access point, and also cannot be saved to file due to the
46	use of pointers in the state. The approach here allows for storage of the
47	index in a file.
48	*/
49
50	#include <stdio.h>
51	#include <stdlib.h>
52	#include <string.h>
53	#include "zlib.h"
54
55	#define local static
56
57	#define SPAN 1048576L /* desired distance between access points */
58	#define WINSIZE 32768U /* sliding window size */
59	#define CHUNK 16384 /* file input buffer size */
60
61	/* access point entry */
62	struct point {
63	off_t out; /* corresponding offset in uncompressed data */
64	off_t in; /* offset in input file of first full byte */
65	int bits; /* number of bits (1-7) from byte at in - 1, or 0 */
66	unsigned char window[WINSIZE]; /* preceding 32K of uncompressed data */
67	};
68
69	/* access point list */
70	struct access {
71	int have; /* number of list entries filled in */
72	int size; /* number of list entries allocated */
73	struct point list; / allocated list */
74	};
75
76	/* Deallocate an index built by build_index() */
77	local void free_index(struct access *index)
78	{
79	if (index != NULL) {
80	free(index->list);
81	free(index);
82	}
83	}
84
85	/* Add an entry to the access point list. If out of memory, deallocate the
86	existing list and return NULL. */
87	local struct access addpoint(struct access index, int bits,
88	off_t in, off_t out, unsigned left, unsigned char *window)
89	{
90	struct point *next;
91
92	/* if list is empty, create it (start with eight points) */
93	if (index == NULL) {
94	index = malloc(sizeof(struct access));
95	if (index == NULL) return NULL;
96	index->list = malloc(sizeof(struct point) << 3);
97	if (index->list == NULL) {
98	free(index);
99	return NULL;
100	}
101	index->size = 8;
102	index->have = 0;
103	}
104
105	/* if list is full, make it bigger */
106	else if (index->have == index->size) {
107	index->size <<= 1;
108	next = realloc(index->list, sizeof(struct point) * index->size);
109	if (next == NULL) {
110	free_index(index);
111	return NULL;
112	}
113	index->list = next;
114	}
115
116	/* fill in entry and increment how many we have */
117	next = index->list + index->have;
118	next->bits = bits;
119	next->in = in;
120	next->out = out;
121	if (left)
122	memcpy(next->window, window + WINSIZE - left, left);
123	if (left < WINSIZE)
124	memcpy(next->window + left, window, WINSIZE - left);
125	index->have++;
126
127	/* return list, possibly reallocated */
128	return index;
129	}
130
131	/* Make one entire pass through the compressed stream and build an index, with
132	access points about every span bytes of uncompressed output -- span is
133	chosen to balance the speed of random access against the memory requirements
134	of the list, about 32K bytes per access point. Note that data after the end
135	of the first zlib or gzip stream in the file is ignored. build_index()
136	returns the number of access points on success (>= 1), Z_MEM_ERROR for out
137	of memory, Z_DATA_ERROR for an error in the input file, or Z_ERRNO for a
138	file read error. On success, built points to the resulting index. /
139	local int build_index(FILE in, off_t span, struct access *built)
140	{
141	int ret;
142	off_t totin, totout; /* our own total counters to avoid 4GB limit */
143	off_t last; /* totout value of last access point */
144	struct access index; / access points being generated */
145	z_stream strm;
146	unsigned char input[CHUNK];
147	unsigned char window[WINSIZE];
148
149	/* initialize inflate */
150	strm.zalloc = Z_NULL;
151	strm.zfree = Z_NULL;
152	strm.opaque = Z_NULL;
153	strm.avail_in = 0;
154	strm.next_in = Z_NULL;
155	ret = inflateInit2(&strm, 47); /* automatic zlib or gzip decoding */
156	if (ret != Z_OK)
157	return ret;
158
159	/* inflate the input, maintain a sliding window, and build an index -- this
160	also validates the integrity of the compressed data using the check
161	information at the end of the gzip or zlib stream */
162	totin = totout = last = 0;
163	index = NULL; /* will be allocated by first addpoint() */
164	strm.avail_out = 0;
165	do {
166	/* get some compressed data from input file */
167	strm.avail_in = fread(input, 1, CHUNK, in);
168	if (ferror(in)) {
169	ret = Z_ERRNO;
170	goto build_index_error;
171	}
172	if (strm.avail_in == 0) {
173	ret = Z_DATA_ERROR;
174	goto build_index_error;
175	}
176	strm.next_in = input;
177
178	/* process all of that, or until end of stream */
179	do {
180	/* reset sliding window if necessary */
181	if (strm.avail_out == 0) {
182	strm.avail_out = WINSIZE;
183	strm.next_out = window;
184	}
185
186	/* inflate until out of input, output, or at end of block --
187	update the total input and output counters */
188	totin += strm.avail_in;
189	totout += strm.avail_out;
190	ret = inflate(&strm, Z_BLOCK); /* return at end of block */
191	totin -= strm.avail_in;
192	totout -= strm.avail_out;
193	if (ret == Z_NEED_DICT)
194	ret = Z_DATA_ERROR;
195	if (ret == Z_MEM_ERROR \|\| ret == Z_DATA_ERROR)
196	goto build_index_error;
197	if (ret == Z_STREAM_END)
198	break;
199
200	/* if at end of block, consider adding an index entry (note that if
201	data_type indicates an end-of-block, then all of the
202	uncompressed data from that block has been delivered, and none
203	of the compressed data after that block has been consumed,
204	except for up to seven bits) -- the totout == 0 provides an
205	entry point after the zlib or gzip header, and assures that the
206	index always has at least one access point; we avoid creating an
207	access point after the last block by checking bit 6 of data_type
208	*/
209	if ((strm.data_type & 128) && !(strm.data_type & 64) &&
210	(totout == 0 \|\| totout - last > span)) {
211	index = addpoint(index, strm.data_type & 7, totin,
212	totout, strm.avail_out, window);
213	if (index == NULL) {
214	ret = Z_MEM_ERROR;
215	goto build_index_error;
216	}
217	last = totout;
218	}
219	} while (strm.avail_in != 0);
220	} while (ret != Z_STREAM_END);
221
222	/* clean up and return index (release unused entries in list) */
223	(void)inflateEnd(&strm);
224	index = realloc(index, sizeof(struct point) * index->have);
225	index->size = index->have;
226	*built = index;
227	return index->size;
228
229	/* return error */
230	build_index_error:
231	(void)inflateEnd(&strm);
232	if (index != NULL)
233	free_index(index);
234	return ret;
235	}
236
237	/* Use the index to read len bytes from offset into buf, return bytes read or
238	negative for error (Z_DATA_ERROR or Z_MEM_ERROR). If data is requested past
239	the end of the uncompressed data, then extract() will return a value less
240	than len, indicating how much as actually read into buf. This function
241	should not return a data error unless the file was modified since the index
242	was generated. extract() may also return Z_ERRNO if there is an error on
243	reading or seeking the input file. */
244	local int extract(FILE in, struct access index, off_t offset,
245	unsigned char *buf, int len)
246	{
247	int ret, skip;
248	z_stream strm;
249	struct point *here;
250	unsigned char input[CHUNK];
251	unsigned char discard[WINSIZE];
252
253	/* proceed only if something reasonable to do */
254	if (len < 0)
255	return 0;
256
257	/* find where in stream to start */
258	here = index->list;
259	ret = index->have;
260	while (--ret && here[1].out <= offset)
261	here++;
262
263	/* initialize file and inflate state to start there */
264	strm.zalloc = Z_NULL;
265	strm.zfree = Z_NULL;
266	strm.opaque = Z_NULL;
267	strm.avail_in = 0;
268	strm.next_in = Z_NULL;
269	ret = inflateInit2(&strm, -15); /* raw inflate */
270	if (ret != Z_OK)
271	return ret;
272	ret = fseeko(in, here->in - (here->bits ? 1 : 0), SEEK_SET);
273	if (ret == -1)
274	goto extract_ret;
275	if (here->bits) {
276	ret = getc(in);
277	if (ret == -1) {
278	ret = ferror(in) ? Z_ERRNO : Z_DATA_ERROR;
279	goto extract_ret;
280	}
281	(void)inflatePrime(&strm, here->bits, ret >> (8 - here->bits));
282	}
283	(void)inflateSetDictionary(&strm, here->window, WINSIZE);
284
285	/* skip uncompressed bytes until offset reached, then satisfy request */
286	offset -= here->out;
287	strm.avail_in = 0;
288	skip = 1; /* while skipping to offset */
289	do {
290	/* define where to put uncompressed data, and how much */
291	if (offset == 0 && skip) { /* at offset now */
292	strm.avail_out = len;
293	strm.next_out = buf;
294	skip = 0; /* only do this once */
295	}
296	if (offset > WINSIZE) { /* skip WINSIZE bytes */
297	strm.avail_out = WINSIZE;
298	strm.next_out = discard;
299	offset -= WINSIZE;
300	}
301	else if (offset != 0) { /* last skip */
302	strm.avail_out = (unsigned)offset;
303	strm.next_out = discard;
304	offset = 0;
305	}
306
307	/* uncompress until avail_out filled, or end of stream */
308	do {
309	if (strm.avail_in == 0) {
310	strm.avail_in = fread(input, 1, CHUNK, in);
311	if (ferror(in)) {
312	ret = Z_ERRNO;
313	goto extract_ret;
314	}
315	if (strm.avail_in == 0) {
316	ret = Z_DATA_ERROR;
317	goto extract_ret;
318	}
319	strm.next_in = input;
320	}
321	ret = inflate(&strm, Z_NO_FLUSH); /* normal inflate */
322	if (ret == Z_NEED_DICT)
323	ret = Z_DATA_ERROR;
324	if (ret == Z_MEM_ERROR \|\| ret == Z_DATA_ERROR)
325	goto extract_ret;
326	if (ret == Z_STREAM_END)
327	break;
328	} while (strm.avail_out != 0);
329
330	/* if reach end of stream, then don't keep trying to get more */
331	if (ret == Z_STREAM_END)
332	break;
333
334	/* do until offset reached and requested data read, or stream ends */
335	} while (skip);
336
337	/* compute number of uncompressed bytes read after offset */
338	ret = skip ? 0 : len - strm.avail_out;
339
340	/* clean up and return bytes read or error */
341	extract_ret:
342	(void)inflateEnd(&strm);
343	return ret;
344	}
345
346	/* Demonstrate the use of build_index() and extract() by processing the file
347	provided on the command line, and the extracting 16K from about 2/3rds of
348	the way through the uncompressed output, and writing that to stdout. */
349	int main(int argc, char **argv)
350	{
351	int len;
352	off_t offset;
353	FILE *in;
354	struct access *index;
355	unsigned char buf[CHUNK];
356
357	/* open input file */
358	if (argc != 2) {
359	fprintf(stderr, "usage: zran file.gz\n");
360	return 1;
361	}
362	in = fopen(argv[1], "rb");
363	if (in == NULL) {
364	fprintf(stderr, "zran: could not open %s for reading\n", argv[1]);
365	return 1;
366	}
367
368	/* build index */
369	len = build_index(in, SPAN, &index);
370	if (len < 0) {
371	fclose(in);
372	switch (len) {
373	case Z_MEM_ERROR:
374	fprintf(stderr, "zran: out of memory\n");
375	break;
376	case Z_DATA_ERROR:
377	fprintf(stderr, "zran: compressed data error in %s\n", argv[1]);
378	break;
379	case Z_ERRNO:
380	fprintf(stderr, "zran: read error on %s\n", argv[1]);
381	break;
382	default:
383	fprintf(stderr, "zran: error %d while building index\n", len);
384	}
385	return 1;
386	}
387	fprintf(stderr, "zran: built index with %d access points\n", len);
388
389	/* use index by reading some bytes from an arbitrary offset */
390	offset = (index->list[index->have - 1].out << 1) / 3;
391	len = extract(in, index, offset, buf, CHUNK);
392	if (len < 0)
393	fprintf(stderr, "zran: extraction failed: %s error\n",
394	len == Z_MEM_ERROR ? "out of memory" : "input corrupted");
395	else {
396	fwrite(buf, 1, len, stdout);
397	fprintf(stderr, "zran: extracted %d bytes at %llu\n", len, offset);
398	}
399
400	/* clean up and exit */
401	free_index(index);
402	fclose(in);
403	return 0;
404	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: