source: trunk/minix/commands/bzip2-1.0.3/bzip2recover.c@ 9

Last change on this file since 9 was 9, checked in by Mattia Monga, 13 years ago

Minix 3.1.2a

File size: 15.8 KB
RevLine 
[9]1
2/*-----------------------------------------------------------*/
3/*--- Block recoverer program for bzip2 ---*/
4/*--- bzip2recover.c ---*/
5/*-----------------------------------------------------------*/
6
7/*--
8 This program is bzip2recover, a program to attempt data
9 salvage from damaged files created by the accompanying
10 bzip2-1.0.3 program.
11
12 Copyright (C) 1996-2005 Julian R Seward. All rights reserved.
13
14 Redistribution and use in source and binary forms, with or without
15 modification, are permitted provided that the following conditions
16 are met:
17
18 1. Redistributions of source code must retain the above copyright
19 notice, this list of conditions and the following disclaimer.
20
21 2. The origin of this software must not be misrepresented; you must
22 not claim that you wrote the original software. If you use this
23 software in a product, an acknowledgment in the product
24 documentation would be appreciated but is not required.
25
26 3. Altered source versions must be plainly marked as such, and must
27 not be misrepresented as being the original software.
28
29 4. The name of the author may not be used to endorse or promote
30 products derived from this software without specific prior written
31 permission.
32
33 THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
34 OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
35 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
36 ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
37 DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
38 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
39 GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
40 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
41 WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
42 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
43 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
44
45 Julian Seward, Cambridge, UK.
46 jseward@bzip.org
47 bzip2/libbzip2 version 1.0.3 of 15 February 2005
48--*/
49
50/*--
51 This program is a complete hack and should be rewritten
52 properly. It isn't very complicated.
53--*/
54
55#include <stdio.h>
56#include <errno.h>
57#include <stdlib.h>
58#include <string.h>
59
60
61/* This program records bit locations in the file to be recovered.
62 That means that if 64-bit ints are not supported, we will not
63 be able to recover .bz2 files over 512MB (2^32 bits) long.
64 On GNU supported platforms, we take advantage of the 64-bit
65 int support to circumvent this problem. Ditto MSVC.
66
67 This change occurred in version 1.0.2; all prior versions have
68 the 512MB limitation.
69*/
70#ifdef __GNUC__
71 typedef unsigned long long int MaybeUInt64;
72# define MaybeUInt64_FMT "%Lu"
73#else
74#ifdef _MSC_VER
75 typedef unsigned __int64 MaybeUInt64;
76# define MaybeUInt64_FMT "%I64u"
77#else
78 typedef unsigned int MaybeUInt64;
79# define MaybeUInt64_FMT "%u"
80#endif
81#endif
82
83typedef unsigned int UInt32;
84typedef int Int32;
85typedef unsigned char UChar;
86typedef char Char;
87typedef unsigned char Bool;
88#define True ((Bool)1)
89#define False ((Bool)0)
90
91
92#define BZ_MAX_FILENAME 2000
93
94Char inFileName[BZ_MAX_FILENAME];
95Char outFileName[BZ_MAX_FILENAME];
96Char progName[BZ_MAX_FILENAME];
97
98MaybeUInt64 bytesOut = 0;
99MaybeUInt64 bytesIn = 0;
100
101
102/*---------------------------------------------------*/
103/*--- Header bytes ---*/
104/*---------------------------------------------------*/
105
106#define BZ_HDR_B 0x42 /* 'B' */
107#define BZ_HDR_Z 0x5a /* 'Z' */
108#define BZ_HDR_h 0x68 /* 'h' */
109#define BZ_HDR_0 0x30 /* '0' */
110
111
112/*---------------------------------------------------*/
113/*--- I/O errors ---*/
114/*---------------------------------------------------*/
115
116/*---------------------------------------------*/
117void readError ( void )
118{
119 fprintf ( stderr,
120 "%s: I/O error reading `%s', possible reason follows.\n",
121 progName, inFileName );
122 perror ( progName );
123 fprintf ( stderr, "%s: warning: output file(s) may be incomplete.\n",
124 progName );
125 exit ( 1 );
126}
127
128
129/*---------------------------------------------*/
130void writeError ( void )
131{
132 fprintf ( stderr,
133 "%s: I/O error reading `%s', possible reason follows.\n",
134 progName, inFileName );
135 perror ( progName );
136 fprintf ( stderr, "%s: warning: output file(s) may be incomplete.\n",
137 progName );
138 exit ( 1 );
139}
140
141
142/*---------------------------------------------*/
143void mallocFail ( Int32 n )
144{
145 fprintf ( stderr,
146 "%s: malloc failed on request for %d bytes.\n",
147 progName, n );
148 fprintf ( stderr, "%s: warning: output file(s) may be incomplete.\n",
149 progName );
150 exit ( 1 );
151}
152
153
154/*---------------------------------------------*/
155void tooManyBlocks ( Int32 max_handled_blocks )
156{
157 fprintf ( stderr,
158 "%s: `%s' appears to contain more than %d blocks\n",
159 progName, inFileName, max_handled_blocks );
160 fprintf ( stderr,
161 "%s: and cannot be handled. To fix, increase\n",
162 progName );
163 fprintf ( stderr,
164 "%s: BZ_MAX_HANDLED_BLOCKS in bzip2recover.c, and recompile.\n",
165 progName );
166 exit ( 1 );
167}
168
169
170
171/*---------------------------------------------------*/
172/*--- Bit stream I/O ---*/
173/*---------------------------------------------------*/
174
175typedef
176 struct {
177 FILE* handle;
178 Int32 buffer;
179 Int32 buffLive;
180 Char mode;
181 }
182 BitStream;
183
184
185/*---------------------------------------------*/
186BitStream* bsOpenReadStream ( FILE* stream )
187{
188 BitStream *bs = malloc ( sizeof(BitStream) );
189 if (bs == NULL) mallocFail ( sizeof(BitStream) );
190 bs->handle = stream;
191 bs->buffer = 0;
192 bs->buffLive = 0;
193 bs->mode = 'r';
194 return bs;
195}
196
197
198/*---------------------------------------------*/
199BitStream* bsOpenWriteStream ( FILE* stream )
200{
201 BitStream *bs = malloc ( sizeof(BitStream) );
202 if (bs == NULL) mallocFail ( sizeof(BitStream) );
203 bs->handle = stream;
204 bs->buffer = 0;
205 bs->buffLive = 0;
206 bs->mode = 'w';
207 return bs;
208}
209
210
211/*---------------------------------------------*/
212void bsPutBit ( BitStream* bs, Int32 bit )
213{
214 if (bs->buffLive == 8) {
215 Int32 retVal = putc ( (UChar) bs->buffer, bs->handle );
216 if (retVal == EOF) writeError();
217 bytesOut++;
218 bs->buffLive = 1;
219 bs->buffer = bit & 0x1;
220 } else {
221 bs->buffer = ( (bs->buffer << 1) | (bit & 0x1) );
222 bs->buffLive++;
223 };
224}
225
226
227/*---------------------------------------------*/
228/*--
229 Returns 0 or 1, or 2 to indicate EOF.
230--*/
231Int32 bsGetBit ( BitStream* bs )
232{
233 if (bs->buffLive > 0) {
234 bs->buffLive --;
235 return ( ((bs->buffer) >> (bs->buffLive)) & 0x1 );
236 } else {
237 Int32 retVal = getc ( bs->handle );
238 if ( retVal == EOF ) {
239 if (errno != 0) readError();
240 return 2;
241 }
242 bs->buffLive = 7;
243 bs->buffer = retVal;
244 return ( ((bs->buffer) >> 7) & 0x1 );
245 }
246}
247
248
249/*---------------------------------------------*/
250void bsClose ( BitStream* bs )
251{
252 Int32 retVal;
253
254 if ( bs->mode == 'w' ) {
255 while ( bs->buffLive < 8 ) {
256 bs->buffLive++;
257 bs->buffer <<= 1;
258 };
259 retVal = putc ( (UChar) (bs->buffer), bs->handle );
260 if (retVal == EOF) writeError();
261 bytesOut++;
262 retVal = fflush ( bs->handle );
263 if (retVal == EOF) writeError();
264 }
265 retVal = fclose ( bs->handle );
266 if (retVal == EOF) {
267 if (bs->mode == 'w') writeError(); else readError();
268 }
269 free ( bs );
270}
271
272
273/*---------------------------------------------*/
274void bsPutUChar ( BitStream* bs, UChar c )
275{
276 Int32 i;
277 for (i = 7; i >= 0; i--)
278 bsPutBit ( bs, (((UInt32) c) >> i) & 0x1 );
279}
280
281
282/*---------------------------------------------*/
283void bsPutUInt32 ( BitStream* bs, UInt32 c )
284{
285 Int32 i;
286
287 for (i = 31; i >= 0; i--)
288 bsPutBit ( bs, (c >> i) & 0x1 );
289}
290
291
292/*---------------------------------------------*/
293Bool endsInBz2 ( Char* name )
294{
295 Int32 n = strlen ( name );
296 if (n <= 4) return False;
297 return
298 (name[n-4] == '.' &&
299 name[n-3] == 'b' &&
300 name[n-2] == 'z' &&
301 name[n-1] == '2');
302}
303
304
305/*---------------------------------------------------*/
306/*--- ---*/
307/*---------------------------------------------------*/
308
309/* This logic isn't really right when it comes to Cygwin. */
310#ifdef _WIN32
311# define BZ_SPLIT_SYM '\\' /* path splitter on Windows platform */
312#else
313# define BZ_SPLIT_SYM '/' /* path splitter on Unix platform */
314#endif
315
316#define BLOCK_HEADER_HI 0x00003141UL
317#define BLOCK_HEADER_LO 0x59265359UL
318
319#define BLOCK_ENDMARK_HI 0x00001772UL
320#define BLOCK_ENDMARK_LO 0x45385090UL
321
322/* Increase if necessary. However, a .bz2 file with > 50000 blocks
323 would have an uncompressed size of at least 40GB, so the chances
324 are low you'll need to up this.
325*/
326#define BZ_MAX_HANDLED_BLOCKS 50000
327
328MaybeUInt64 bStart [BZ_MAX_HANDLED_BLOCKS];
329MaybeUInt64 bEnd [BZ_MAX_HANDLED_BLOCKS];
330MaybeUInt64 rbStart[BZ_MAX_HANDLED_BLOCKS];
331MaybeUInt64 rbEnd [BZ_MAX_HANDLED_BLOCKS];
332
333Int32 main ( Int32 argc, Char** argv )
334{
335 FILE* inFile;
336 FILE* outFile;
337 BitStream* bsIn, *bsWr;
338 Int32 b, wrBlock, currBlock, rbCtr;
339 MaybeUInt64 bitsRead;
340
341 UInt32 buffHi, buffLo, blockCRC;
342 Char* p;
343
344 strcpy ( progName, argv[0] );
345 inFileName[0] = outFileName[0] = 0;
346
347 fprintf ( stderr,
348 "bzip2recover 1.0.3: extracts blocks from damaged .bz2 files.\n" );
349
350 if (argc != 2) {
351 fprintf ( stderr, "%s: usage is `%s damaged_file_name'.\n",
352 progName, progName );
353 switch (sizeof(MaybeUInt64)) {
354 case 8:
355 fprintf(stderr,
356 "\trestrictions on size of recovered file: None\n");
357 break;
358 case 4:
359 fprintf(stderr,
360 "\trestrictions on size of recovered file: 512 MB\n");
361 fprintf(stderr,
362 "\tto circumvent, recompile with MaybeUInt64 as an\n"
363 "\tunsigned 64-bit int.\n");
364 break;
365 default:
366 fprintf(stderr,
367 "\tsizeof(MaybeUInt64) is not 4 or 8 -- "
368 "configuration error.\n");
369 break;
370 }
371 exit(1);
372 }
373
374 if (strlen(argv[1]) >= BZ_MAX_FILENAME-20) {
375 fprintf ( stderr,
376 "%s: supplied filename is suspiciously (>= %d chars) long. Bye!\n",
377 progName, (int)strlen(argv[1]) );
378 exit(1);
379 }
380
381 strcpy ( inFileName, argv[1] );
382
383 inFile = fopen ( inFileName, "rb" );
384 if (inFile == NULL) {
385 fprintf ( stderr, "%s: can't read `%s'\n", progName, inFileName );
386 exit(1);
387 }
388
389 bsIn = bsOpenReadStream ( inFile );
390 fprintf ( stderr, "%s: searching for block boundaries ...\n", progName );
391
392 bitsRead = 0;
393 buffHi = buffLo = 0;
394 currBlock = 0;
395 bStart[currBlock] = 0;
396
397 rbCtr = 0;
398
399 while (True) {
400 b = bsGetBit ( bsIn );
401 bitsRead++;
402 if (b == 2) {
403 if (bitsRead >= bStart[currBlock] &&
404 (bitsRead - bStart[currBlock]) >= 40) {
405 bEnd[currBlock] = bitsRead-1;
406 if (currBlock > 0)
407 fprintf ( stderr, " block %d runs from " MaybeUInt64_FMT
408 " to " MaybeUInt64_FMT " (incomplete)\n",
409 currBlock, bStart[currBlock], bEnd[currBlock] );
410 } else
411 currBlock--;
412 break;
413 }
414 buffHi = (buffHi << 1) | (buffLo >> 31);
415 buffLo = (buffLo << 1) | (b & 1);
416 if ( ( (buffHi & 0x0000ffff) == BLOCK_HEADER_HI
417 && buffLo == BLOCK_HEADER_LO)
418 ||
419 ( (buffHi & 0x0000ffff) == BLOCK_ENDMARK_HI
420 && buffLo == BLOCK_ENDMARK_LO)
421 ) {
422 if (bitsRead > 49) {
423 bEnd[currBlock] = bitsRead-49;
424 } else {
425 bEnd[currBlock] = 0;
426 }
427 if (currBlock > 0 &&
428 (bEnd[currBlock] - bStart[currBlock]) >= 130) {
429 fprintf ( stderr, " block %d runs from " MaybeUInt64_FMT
430 " to " MaybeUInt64_FMT "\n",
431 rbCtr+1, bStart[currBlock], bEnd[currBlock] );
432 rbStart[rbCtr] = bStart[currBlock];
433 rbEnd[rbCtr] = bEnd[currBlock];
434 rbCtr++;
435 }
436 if (currBlock >= BZ_MAX_HANDLED_BLOCKS)
437 tooManyBlocks(BZ_MAX_HANDLED_BLOCKS);
438 currBlock++;
439
440 bStart[currBlock] = bitsRead;
441 }
442 }
443
444 bsClose ( bsIn );
445
446 /*-- identified blocks run from 1 to rbCtr inclusive. --*/
447
448 if (rbCtr < 1) {
449 fprintf ( stderr,
450 "%s: sorry, I couldn't find any block boundaries.\n",
451 progName );
452 exit(1);
453 };
454
455 fprintf ( stderr, "%s: splitting into blocks\n", progName );
456
457 inFile = fopen ( inFileName, "rb" );
458 if (inFile == NULL) {
459 fprintf ( stderr, "%s: can't open `%s'\n", progName, inFileName );
460 exit(1);
461 }
462 bsIn = bsOpenReadStream ( inFile );
463
464 /*-- placate gcc's dataflow analyser --*/
465 blockCRC = 0; bsWr = 0;
466
467 bitsRead = 0;
468 outFile = NULL;
469 wrBlock = 0;
470 while (True) {
471 b = bsGetBit(bsIn);
472 if (b == 2) break;
473 buffHi = (buffHi << 1) | (buffLo >> 31);
474 buffLo = (buffLo << 1) | (b & 1);
475 if (bitsRead == 47+rbStart[wrBlock])
476 blockCRC = (buffHi << 16) | (buffLo >> 16);
477
478 if (outFile != NULL && bitsRead >= rbStart[wrBlock]
479 && bitsRead <= rbEnd[wrBlock]) {
480 bsPutBit ( bsWr, b );
481 }
482
483 bitsRead++;
484
485 if (bitsRead == rbEnd[wrBlock]+1) {
486 if (outFile != NULL) {
487 bsPutUChar ( bsWr, 0x17 ); bsPutUChar ( bsWr, 0x72 );
488 bsPutUChar ( bsWr, 0x45 ); bsPutUChar ( bsWr, 0x38 );
489 bsPutUChar ( bsWr, 0x50 ); bsPutUChar ( bsWr, 0x90 );
490 bsPutUInt32 ( bsWr, blockCRC );
491 bsClose ( bsWr );
492 }
493 if (wrBlock >= rbCtr) break;
494 wrBlock++;
495 } else
496 if (bitsRead == rbStart[wrBlock]) {
497 /* Create the output file name, correctly handling leading paths.
498 (31.10.2001 by Sergey E. Kusikov) */
499 Char* split;
500 Int32 ofs, k;
501 for (k = 0; k < BZ_MAX_FILENAME; k++)
502 outFileName[k] = 0;
503 strcpy (outFileName, inFileName);
504 split = strrchr (outFileName, BZ_SPLIT_SYM);
505 if (split == NULL) {
506 split = outFileName;
507 } else {
508 ++split;
509 }
510 /* Now split points to the start of the basename. */
511 ofs = split - outFileName;
512 sprintf (split, "rec%5d", wrBlock+1);
513 for (p = split; *p != 0; p++) if (*p == ' ') *p = '0';
514 strcat (outFileName, inFileName + ofs);
515
516 if ( !endsInBz2(outFileName)) strcat ( outFileName, ".bz2" );
517
518 fprintf ( stderr, " writing block %d to `%s' ...\n",
519 wrBlock+1, outFileName );
520
521 outFile = fopen ( outFileName, "wb" );
522 if (outFile == NULL) {
523 fprintf ( stderr, "%s: can't write `%s'\n",
524 progName, outFileName );
525 exit(1);
526 }
527 bsWr = bsOpenWriteStream ( outFile );
528 bsPutUChar ( bsWr, BZ_HDR_B );
529 bsPutUChar ( bsWr, BZ_HDR_Z );
530 bsPutUChar ( bsWr, BZ_HDR_h );
531 bsPutUChar ( bsWr, BZ_HDR_0 + 9 );
532 bsPutUChar ( bsWr, 0x31 ); bsPutUChar ( bsWr, 0x41 );
533 bsPutUChar ( bsWr, 0x59 ); bsPutUChar ( bsWr, 0x26 );
534 bsPutUChar ( bsWr, 0x53 ); bsPutUChar ( bsWr, 0x59 );
535 }
536 }
537
538 fprintf ( stderr, "%s: finished\n", progName );
539 return 0;
540}
541
542
543
544/*-----------------------------------------------------------*/
545/*--- end bzip2recover.c ---*/
546/*-----------------------------------------------------------*/
Note: See TracBrowser for help on using the repository browser.