source: trunk/minix/commands/simple/sort.c@ 11

Last change on this file since 11 was 9, checked in by Mattia Monga, 14 years ago

Minix 3.1.2a

File size: 32.3 KB
RevLine 
[9]1/* sort - sort a file of lines Author: Michiel Huisjes */
2
3/* SYNOPSIS:
4 * sort [-funbirdcmt'x'] [+beg_pos[opts] [-end_pos]] [-o outfile] [file]..
5 *
6 * [opts] can be any of
7 * -f : Fold upper case to lower.
8 * -n : Sort to numeric value (optional decimal point) implies -b
9 * -b : Skip leading blanks
10 * -i : Ignore chars outside ASCII range (040 - 0176)
11 * -r : Reverse the sense of comparisons.
12 * -d : Sort to dictionary order. Only letters, digits, comma's and points
13 * are compared.
14 * If any of these flags are used in [opts], then they override all global
15 * ordering for this field.
16 *
17 * I/O control flags are:
18 * -u : Print uniq lines only once.
19 * -c : Check if files are sorted in order.
20 * -m : Merge already sorted files.
21 * -o outfile : Name of output file. (Can be one of the input files).
22 * Default is stdout.
23 * - : Take stdin as input.
24 *
25 * Fields:
26 * -t'x' : Field separating character is 'x'
27 * +a.b : Start comparing at field 'a' with offset 'b'. A missing 'b' is
28 * taken to be 0.
29 * -a.b : Stop comparing at field 'a' with offset 'b'. A missing 'b' is
30 * taken to be 0.
31 * A missing -a.b means the rest of the line.
32 */
33
34#include <sys/types.h>
35#include <sys/stat.h>
36#include <fcntl.h>
37#include <signal.h>
38#include <unistd.h>
39#include <stdlib.h>
40#include <string.h>
41#include <stdio.h>
42#include <limits.h>
43
44#define OPEN_FILES (OPEN_MAX-4) /* Nr of open files per process */
45#if __minix_vmd
46#define MEMORY_SIZE (1024 * 1024)
47#else
48#define MEMORY_SIZE ((10 * sizeof(int)) * 1024)
49#endif
50 /* Total mem_size */
51#define LINE_SIZE (1024 >> 1) /* Max length of a line */
52#define IO_SIZE (2 * 1024) /* Size of buffered output */
53#define STD_OUT 1 /* Fd of terminal */
54
55/* Return status of functions */
56#define OK 0
57#define ERROR -1
58#define NIL_PTR ((char *) 0)
59
60/* Compare return values */
61#define LOWER -1
62#define SAME 0
63#define HIGHER 1
64
65/* Table definitions. */
66#define DICT 0x001 /* Alpha, numeric, letters and . */
67#define ASCII 0x002 /* All between ' ' and '~' */
68#define BLANK 0x004 /* ' ' and '\t' */
69#define DIGIT 0x008 /* 0-9 */
70#define UPPER 0x010 /* A-Z */
71
72typedef int BOOL;
73
74#define FALSE 0
75#define TRUE 1
76
77typedef struct {
78 int fd; /* Fd of file */
79 char *buffer; /* Buffer for reads */
80 int read_chars; /* Nr of chars actually read in buffer */
81 int cnt; /* Nr of chars taken out of buffer */
82 char *line; /* Contains line currently used */
83} MERGE;
84
85#define NIL_MERGE ((MERGE *) 0)
86MERGE merge_f[OPEN_FILES]; /* Merge structs */
87int buf_size; /* Size of core available for each struct */
88
89#define FIELDS_LIMIT 10 /* 1 global + 9 user */
90#define GLOBAL 0
91
92typedef struct {
93 int beg_field, beg_pos; /* Begin field + offset */
94 int end_field, end_pos; /* End field + offset. ERROR == EOLN */
95 BOOL reverse; /* TRUE if rev. flag set on this field */
96 BOOL blanks;
97 BOOL dictionary;
98 BOOL fold_case;
99 BOOL ascii;
100 BOOL numeric;
101} FIELD;
102
103/* Field declarations. A total of FILEDS_LIMIT is allowed */
104FIELD fields[FIELDS_LIMIT];
105int field_cnt; /* Nr of field actually assigned */
106
107/* Various output control flags */
108BOOL check = FALSE;
109BOOL only_merge = FALSE;
110BOOL uniq = FALSE;
111
112char *mem_top; /* Mem_top points to lowest pos of memory. */
113char *cur_pos; /* First free position in mem */
114char **line_table; /* Pointer to the internal line table */
115BOOL in_core = TRUE; /* Set if input cannot all be sorted in core */
116
117 /* Place where temp_files should be made */
118char temp_files[] = "/tmp/sort.XXXXX.XX";
119char *output_file; /* Name of output file */
120int out_fd; /* Fd to output file (could be STD_OUT) */
121char out_buffer[IO_SIZE]; /* For buffered output */
122
123char **argptr; /* Pointer to argv structure */
124int args_offset; /* Nr of args spilled on options */
125int args_limit; /* Nr of args given */
126
127char separator; /* Char that separates fields */
128int nr_of_files = 0; /* Nr_of_files to be merged */
129int disabled; /* Nr of files done */
130
131char USAGE[] = "Usage: sort [-funbirdcmt'x'] [+beg_pos [-end_pos]] [-o outfile] [file] ..";
132
133/* Forward declarations */
134_PROTOTYPE(int main, (int argc, char **argv));
135_PROTOTYPE(void get_opts, (char *ptr, FIELD * field));
136_PROTOTYPE(void new_field, (FIELD * field, int *offset, BOOL beg_fl));
137_PROTOTYPE(void adjust_options, (FIELD * field));
138_PROTOTYPE(void error, (BOOL quit, char *message, char *arg));
139_PROTOTYPE(void open_outfile, (void));
140_PROTOTYPE(void get_file, (int fd, off_t size));
141_PROTOTYPE(int last_line, (void));
142_PROTOTYPE(void print_table, (int fd));
143_PROTOTYPE(char *file_name, (int nr));
144_PROTOTYPE(void mread, (int fd, char *address, int bytes));
145_PROTOTYPE(void mwrite, (int fd, char *address, int bytes));
146_PROTOTYPE(void sort, (void));
147_PROTOTYPE(void sort_table, (int nel));
148_PROTOTYPE(void incr, (int si, int ei));
149_PROTOTYPE(int cmp_fields, (char *el1, char *el2));
150_PROTOTYPE(void build_field, (char *dest, FIELD * field, char *src));
151_PROTOTYPE(char *skip_fields, (char *str, int nf));
152_PROTOTYPE(int compare, (char *el1, char *el2));
153_PROTOTYPE(int cmp, (unsigned char *el1, unsigned char *el2, FIELD * field));
154_PROTOTYPE(int digits, (char *str1, char *str2, BOOL check_sign));
155_PROTOTYPE(void files_merge, (int file_cnt));
156_PROTOTYPE(void merge, (int start_file, int limit_file));
157_PROTOTYPE(void put_line, (char *line));
158_PROTOTYPE(MERGE * print, (MERGE * merg, int file_cnt));
159_PROTOTYPE(int read_line, (MERGE * merg));
160_PROTOTYPE(MERGE * skip_lines, (MERGE * smallest, int file_cnt));
161_PROTOTYPE(void uniq_lines, (MERGE * merg));
162_PROTOTYPE(void check_file, (int fd, char *file));
163_PROTOTYPE(int length, (char *line));
164_PROTOTYPE(void copy, (char *dest, char *src));
165_PROTOTYPE(char *msbrk, (int size));
166_PROTOTYPE(void mbrk, (char *address));
167_PROTOTYPE(void catch, (int dummy));
168
169/* Table of all chars. 0 means no special meaning. */
170char table[256] = {
171/* '^@' to space */
172 0, 0, 0, 0, 0, 0, 0, 0,
173 0, BLANK | DICT, 0, 0, 0, 0, 0, 0,
174 0, 0, 0, 0, 0, 0, 0, 0,
175 0, 0, 0, 0, 0, 0, 0, 0,
176
177/* Space to '0' */
178 BLANK | DICT | ASCII, ASCII, ASCII, ASCII, ASCII, ASCII, ASCII,
179 ASCII, ASCII, ASCII, ASCII, ASCII, ASCII, ASCII,
180 ASCII, ASCII,
181
182/* '0' until '9' */
183 DIGIT | DICT | ASCII, DIGIT | DICT | ASCII, DIGIT | DICT | ASCII,
184 DIGIT | DICT | ASCII, DIGIT | DICT | ASCII, DIGIT | DICT | ASCII,
185 DIGIT | DICT | ASCII, DIGIT | DICT | ASCII, DIGIT | DICT | ASCII,
186 DIGIT | DICT | ASCII,
187
188/* ASCII from ':' to '@' */
189 ASCII, ASCII, ASCII, ASCII, ASCII, ASCII, ASCII,
190
191/* Upper case letters 'A' to 'Z' */
192 UPPER | DICT | ASCII, UPPER | DICT | ASCII, UPPER | DICT | ASCII,
193 UPPER | DICT | ASCII, UPPER | DICT | ASCII, UPPER | DICT | ASCII,
194 UPPER | DICT | ASCII, UPPER | DICT | ASCII, UPPER | DICT | ASCII,
195 UPPER | DICT | ASCII, UPPER | DICT | ASCII, UPPER | DICT | ASCII,
196 UPPER | DICT | ASCII, UPPER | DICT | ASCII, UPPER | DICT | ASCII,
197 UPPER | DICT | ASCII, UPPER | DICT | ASCII, UPPER | DICT | ASCII,
198 UPPER | DICT | ASCII, UPPER | DICT | ASCII, UPPER | DICT | ASCII,
199 UPPER | DICT | ASCII, UPPER | DICT | ASCII, UPPER | DICT | ASCII,
200 UPPER | DICT | ASCII, UPPER | DICT | ASCII,
201
202/* ASCII from '[' to '`' */
203 ASCII, ASCII, ASCII, ASCII, ASCII, ASCII,
204
205/* Lower case letters from 'a' to 'z' */
206 DICT | ASCII, DICT | ASCII, DICT | ASCII, DICT | ASCII,
207 DICT | ASCII, DICT | ASCII, DICT | ASCII, DICT | ASCII,
208 DICT | ASCII, DICT | ASCII, DICT | ASCII, DICT | ASCII,
209 DICT | ASCII, DICT | ASCII, DICT | ASCII, DICT | ASCII,
210 DICT | ASCII, DICT | ASCII, DICT | ASCII, DICT | ASCII,
211 DICT | ASCII, DICT | ASCII, DICT | ASCII, DICT | ASCII,
212 DICT | ASCII, DICT | ASCII,
213
214/* ASCII from '{' to '~' */
215 ASCII, ASCII, ASCII, ASCII,
216
217/* Stuff from -1 to -177 */
218 0, 0, 0, 0, 0, 0, 0, 0, 0,
219 0, 0, 0, 0, 0, 0, 0, 0, 0,
220 0, 0, 0, 0, 0, 0, 0, 0, 0,
221 0, 0, 0, 0, 0, 0, 0, 0, 0,
222 0, 0, 0, 0, 0, 0, 0, 0, 0,
223 0, 0, 0, 0, 0, 0, 0, 0, 0,
224 0, 0, 0, 0, 0, 0, 0, 0, 0,
225 0, 0, 0, 0, 0, 0, 0, 0, 0,
226 0, 0, 0, 0, 0, 0, 0
227};
228
229
230/*
231 * Get_opts () assigns the options into the field structure as described in ptr.
232 * This field structure could be the GLOBAL one.
233 */
234void get_opts(ptr, field)
235register char *ptr;
236register FIELD *field;
237{
238 switch (*ptr) {
239 case 'b': /* Skip leading blanks */
240 field->blanks = TRUE;
241 break;
242 case 'd': /* Dictionary order */
243 field->dictionary = TRUE;
244 break;
245 case 'f': /* Fold upper case to lower */
246 field->fold_case = TRUE;
247 break;
248 case 'i': /* Skip chars outside ' ' '~' */
249 field->ascii = TRUE;
250 break;
251 case 'n': /* Sort on numeric */
252 field->numeric = TRUE;
253 field->blanks = TRUE;
254 break;
255 case 'r': /* Reverse comparisons */
256 field->reverse = TRUE;
257 break;
258 default: /* Illegal options */
259 error(TRUE, USAGE, NIL_PTR);
260 }
261}
262
263/* New_field () assigns a new field as described by the arguments.
264 * A field description is of the form: +a.b[opts] -c.d, where b and d, as well
265 * as -c.d and [opts] are optional. Nr before digit is field nr. Nr after digit
266 * is offset from field.
267 */
268void new_field(field, offset, beg_fl)
269register FIELD *field; /* Field to assign */
270int *offset; /* Offset in argv structure */
271BOOL beg_fl; /* Assign beg or end of field */
272{
273 register char *ptr;
274
275 ptr = argptr[*offset];
276 *offset += 1; /* Incr offset to next arg */
277 ptr++;
278
279 if (beg_fl)
280 field->beg_field = atoi(ptr); /* Assign int of first field */
281 else
282 field->end_field = atoi(ptr);
283
284 while (table[*ptr] & DIGIT) /* Skip all digits */
285 ptr++;
286
287 if (*ptr == '.') { /* Check for offset */
288 ptr++;
289 if (beg_fl)
290 field->beg_pos = atoi(ptr);
291 else
292 field->end_pos = atoi(ptr);
293 while (table[*ptr] & DIGIT) /* Skip digits */
294 ptr++;
295 }
296 if (beg_fl) {
297 while (*ptr != '\0') /* Check options after field */
298 get_opts(ptr++, field);
299 }
300 if (beg_fl) { /* Check for end pos */
301 ptr = argptr[*offset];
302 if (ptr && *ptr == '-' && ((table[*(ptr + 1)] & DIGIT) || *(ptr + 1) == '.')) {
303 new_field(field, offset, FALSE);
304 if (field->beg_field > field->end_field)
305 error(TRUE, "End field is before start field!", NIL_PTR);
306 } else /* No end pos. */
307 field->end_field = ERROR;
308 }
309}
310
311int main(argc, argv)
312int argc;
313char *argv[];
314{
315 int arg_count = 1; /* Offset in argv */
316 struct stat st;
317 register char *ptr; /* Ptr to *argv in use */
318 register int fd;
319 int pid, pow;
320
321 argptr = argv;
322 cur_pos = mem_top = msbrk(MEMORY_SIZE); /* Find lowest mem. location */
323
324 while (arg_count < argc && ((ptr = argv[arg_count])[0] == '-' || *ptr == '+')) {
325 if (*ptr == '-' && *(ptr + 1) == '\0') /* "-" means stdin */
326 break;
327 if (*ptr == '+') { /* Assign field. */
328 if (++field_cnt == FIELDS_LIMIT)
329 error(TRUE, "Too many fields", NIL_PTR);
330 new_field(&fields[field_cnt], &arg_count, TRUE);
331 } else { /* Get output options */
332 while (*++ptr) {
333 switch (*ptr) {
334 case 'c': /* Only check file */
335 check = TRUE;
336 break;
337 case 'm': /* Merge (sorted) files */
338 only_merge = TRUE;
339 break;
340 case 'u': /* Only give uniq lines */
341 uniq = TRUE;
342 break;
343 case 'o': /* Name of output file */
344 output_file = argv[++arg_count];
345 break;
346 case 't': /* Field separator */
347 ptr++;
348 separator = *ptr;
349 break;
350 default: /* Sort options */
351 get_opts(ptr, &fields[GLOBAL]);
352 }
353 }
354 arg_count++;
355 }
356 }
357
358 for (fd = 1; fd <= field_cnt; fd++) adjust_options(&fields[fd]);
359
360/* Create name of tem_files 'sort.pid.aa' */
361 ptr = &temp_files[10];
362 pid = getpid();
363 pow = 10000;
364 while (pow != 0) {
365 *ptr++ = pid / pow + '0';
366 pid %= pow;
367 pow /= 10;
368 }
369
370 signal(SIGINT, catch);
371
372/* Only merge files. Set up */
373 if (only_merge) {
374 args_limit = args_offset = arg_count;
375 while (argv[args_limit] != NIL_PTR)
376 args_limit++; /* Find nr of args */
377 files_merge(args_limit - arg_count);
378 exit(0);
379 }
380 if (arg_count == argc) { /* No args left. Use stdin */
381 if (check)
382 check_file(0, NIL_PTR);
383 else
384 get_file(0, (off_t) 0);
385 } else
386 while (arg_count < argc) { /* Sort or check args */
387 if (strcmp(argv[arg_count], "-") == 0)
388 fd = 0;
389 else if (stat(argv[arg_count], &st) < 0) {
390 error(FALSE, "Cannot find ", argv[arg_count++]);
391 continue;
392 }
393
394 /* Open files */
395 else if ((fd = open(argv[arg_count], O_RDONLY)) < 0) {
396 error(FALSE, "Cannot open ", argv[arg_count++]);
397 continue;
398 }
399 if (check)
400 check_file(fd, argv[arg_count]);
401 else /* Get_file reads whole file */
402 get_file(fd, st.st_size);
403 arg_count++;
404 }
405
406 if (check) exit(0);
407
408 sort(); /* Sort whatever is left */
409
410 if (nr_of_files == 1) /* Only one file sorted -> don't merge */
411 exit(0);
412
413 files_merge(nr_of_files);
414 return(0);
415}
416
417/* Adjust_options() assigns all global variables set also in the fields
418 * assigned.
419 */
420void adjust_options(field)
421register FIELD *field;
422{
423 register FIELD *gfield = &fields[GLOBAL];
424
425 if (gfield->reverse) field->reverse = TRUE;
426 if (gfield->blanks) field->blanks = TRUE;
427 if (gfield->dictionary) field->dictionary = TRUE;
428 if (gfield->fold_case) field->fold_case = TRUE;
429 if (gfield->ascii) field->ascii = TRUE;
430 if (gfield->numeric) field->numeric = TRUE;
431}
432
433/* Error () prints the error message on stderr and exits if quit == TRUE. */
434void error(quit, message, arg)
435register BOOL quit;
436register char *message, *arg;
437{
438 write(2, message, strlen(message));
439 if (arg != NIL_PTR) write(2, arg, strlen(arg));
440 perror(" ");
441 if (quit) exit(1);
442}
443
444/* Open_outfile () assigns to out_fd the fd where the output must go when all
445 * the sorting is done.
446 */
447void open_outfile()
448{
449 if (output_file == NIL_PTR)
450 out_fd = STD_OUT;
451 else if ((out_fd = creat(output_file, 0644)) < 0)
452 error(TRUE, "Cannot creat ", output_file);
453}
454
455/* Get_file reads the whole file of filedescriptor fd. If the file is too big
456 * to keep in core, a partial sort is done, and the output is stashed somewhere.
457 */
458void get_file(fd, size)
459int fd; /* Fd of file to read */
460register off_t size; /* Size of file */
461{
462 register int i;
463 int rest; /* Rest in memory */
464 char save_ch; /* Used in stdin readings */
465
466 rest = MEMORY_SIZE - (cur_pos - mem_top);
467 if (fd == 0) { /* We're reding stdin */
468 while ((i = read(0, cur_pos, rest)) > 0) {
469 if ((cur_pos - mem_top) + i == MEMORY_SIZE) {
470 in_core = FALSE;
471 i = last_line(); /* End of last line */
472 save_ch = mem_top[i];
473 mem_top[i] = '\0';
474 sort(); /* Sort core */
475 mem_top[i] = save_ch; /* Restore erased char */
476 /* Restore last (half read) line */
477 for (rest = 0; i + rest != MEMORY_SIZE; rest++)
478 mem_top[rest] = mem_top[i + rest];
479 /* Assign current pos. in memory */
480 cur_pos = &mem_top[rest];
481 } else { /* Fits, just assign position in mem. */
482 cur_pos = cur_pos + i;
483 *cur_pos = '\0';
484 }
485
486 /* Calculate rest of mem */
487 rest = MEMORY_SIZE - (cur_pos - mem_top);
488 }
489 }
490
491 /* Reading file. Check size */
492 else if (size > rest) { /* Won't fit */
493 mread(fd, cur_pos, rest);
494 in_core = FALSE;
495 i = last_line(); /* Get pos. of last line */
496 mem_top[i] = '\0'; /* Truncate */
497 (void) lseek(fd, (off_t) (i - MEMORY_SIZE), SEEK_CUR); /* Do this next time */
498 size = size - rest - i + MEMORY_SIZE; /* Calculate rest */
499 cur_pos = mem_top; /* Reset mem */
500 sort(); /* Sort core */
501 get_file(fd, size); /* Get rest of file */
502 } else { /* Fits. Just read in */
503 rest = size;
504 mread(fd, cur_pos, rest);
505 cur_pos = cur_pos + rest; /* Reassign cur_pos */
506 *cur_pos = '\0';
507 (void) close(fd); /* File completed */
508 }
509}
510
511/* Last_line () find the last line in core and retuns the offset from the top
512 * of the memory.
513 */
514int last_line()
515{
516 register int i;
517
518 for (i = MEMORY_SIZE - 2; i > 0; i--)
519 if (mem_top[i] == '\n') break;
520 return i + 1;
521}
522
523/* Print_table prints the line table in the given file_descriptor. If the fd
524 * equals ERROR, it opens a temp_file itself.
525 */
526void print_table(fd)
527int fd;
528{
529 register char **line_ptr; /* Ptr in line_table */
530 register char *ptr; /* Ptr to line */
531 int index = 0; /* Index in output buffer */
532
533 if (fd == ERROR) {
534 if ((fd = creat(file_name(nr_of_files), 0644)) < 0)
535 error(TRUE, "Cannot creat ", file_name(nr_of_files));
536 }
537 for (line_ptr = line_table; *line_ptr != NIL_PTR; line_ptr++) {
538 ptr = *line_ptr;
539 /* Skip all same lines if uniq is set */
540 if (uniq && *(line_ptr + 1) != NIL_PTR) {
541 if (compare(ptr, *(line_ptr + 1)) == SAME) continue;
542 }
543 do { /* Print line in a buffered way */
544 out_buffer[index++] = *ptr;
545 if (index == IO_SIZE) {
546 mwrite(fd, out_buffer, IO_SIZE);
547 index = 0;
548 }
549 } while (*ptr++ != '\n');
550 }
551 mwrite(fd, out_buffer, index);/* Flush buffer */
552 (void) close(fd); /* Close file */
553 nr_of_files++; /* Increment nr_of_files to merge */
554}
555
556/* File_name () returns the nr argument from the argument list, or a uniq
557 * filename if the nr is too high, or the arguments were not merge files.
558 */
559char *file_name(nr)
560register int nr;
561{
562 if (only_merge) {
563 if (args_offset + nr < args_limit) return argptr[args_offset + nr];
564 }
565 temp_files[16] = nr / 26 + 'a';
566 temp_files[17] = nr % 26 + 'a';
567
568 return temp_files;
569}
570
571/* Mread () performs a normal read (), but checks the return value. */
572void mread(fd, address, bytes)
573int fd;
574char *address;
575register int bytes;
576{
577 if (read(fd, address, bytes) < 0 && bytes != 0)
578 error(TRUE, "Read error", NIL_PTR);
579}
580
581/* Mwrite () performs a normal write (), but checks the return value. */
582void mwrite(fd, address, bytes)
583int fd;
584char *address;
585register int bytes;
586{
587 if (write(fd, address, bytes) != bytes && bytes != 0)
588 error(TRUE, "Write error", NIL_PTR);
589}
590
591/* Sort () sorts the input in memory starting at mem_top. */
592void sort()
593{
594 register char *ptr = mem_top;
595 register int count = 0;
596
597/* Count number of lines in memory */
598 while (*ptr) {
599 if (*ptr++ == '\n') count++;
600 }
601
602/* Set up the line table */
603 line_table = (char **) msbrk(count * sizeof(char *) + sizeof(char *));
604
605 count = 1;
606 ptr = line_table[0] = mem_top;
607 while (*ptr) {
608 if (*ptr++ == '\n') line_table[count++] = ptr;
609 }
610
611 line_table[count - 1] = NIL_PTR;
612
613/* Sort the line table */
614 sort_table(count - 1);
615
616/* Stash output somewhere */
617 if (in_core) {
618 open_outfile();
619 print_table(out_fd);
620 } else
621 print_table(ERROR);
622
623/* Free line table */
624 mbrk((char *) line_table);
625}
626
627/* Sort_table () sorts the line table consisting of nel elements. */
628void sort_table(nel)
629register int nel;
630{
631 char *tmp;
632 register int i;
633
634 /* Make heap */
635 for (i = (nel >> 1); i >= 1; i--) incr(i, nel);
636
637 /* Sort from heap */
638 for (i = nel; i > 1; i--) {
639 tmp = line_table[0];
640 line_table[0] = line_table[i - 1];
641 line_table[i - 1] = tmp;
642 incr(1, i - 1);
643 }
644}
645
646/* Incr () increments the heap. */
647void incr(si, ei)
648register int si, ei;
649{
650 char *tmp;
651
652 while (si <= (ei >> 1)) {
653 si <<= 1;
654 if (si + 1 <= ei && compare(line_table[si - 1], line_table[si]) <= 0)
655 si++;
656 if (compare(line_table[(si >> 1) - 1], line_table[si - 1]) >= 0)
657 return;
658 tmp = line_table[(si >> 1) - 1];
659 line_table[(si >> 1) - 1] = line_table[si - 1];
660 line_table[si - 1] = tmp;
661 }
662}
663
664/* Cmp_fields builds new lines out of the lines pointed to by el1 and el2 and
665 * puts it into the line1 and line2 arrays. It then calls the cmp () routine
666 * with the field describing the arguments.
667 */
668int cmp_fields(el1, el2)
669register char *el1, *el2;
670{
671 int i, ret;
672 char line1[LINE_SIZE], line2[LINE_SIZE];
673
674 for (i = 0; i < field_cnt; i++) { /* Setup line parts */
675 build_field(line1, &fields[i + 1], el1);
676 build_field(line2, &fields[i + 1], el2);
677 if ((ret = cmp((unsigned char *) line1, (unsigned char *) line2,
678 &fields[i + 1])) != SAME)
679 break; /* If equal, try next field */
680 }
681
682/* Check for reverse flag */
683 if (i != field_cnt && fields[i + 1].reverse) return -ret;
684
685/* Else return the last return value of cmp () */
686 return ret;
687}
688
689/* Build_field builds a new line from the src as described by the field.
690 * The result is put in dest.
691 */
692void build_field(dest, field, src)
693char *dest; /* Holds result */
694register FIELD *field; /* Field description */
695register char *src; /* Source line */
696{
697 char *begin = src; /* Remember start location */
698 char *last; /* Pointer to end location */
699 int i;
700
701/* Skip begin fields */
702 src = skip_fields(src, field->beg_field);
703
704/* Skip begin positions */
705 for (i = 0; i < field->beg_pos && *src != '\n'; i++) src++;
706
707/* Copy whatever is left */
708 copy(dest, src);
709
710/* If end field is assigned truncate (perhaps) the part copied */
711 if (field->end_field != ERROR) { /* Find last field */
712 last = skip_fields(begin, field->end_field);
713/* Skip positions as given by end fields description */
714 for (i = 0; i < field->end_pos && *last != '\n'; i++) last++;
715 dest[last - src] = '\n';/* Truncate line */
716 }
717}
718
719/* Skip_fields () skips nf fields of the line pointed to by str. */
720char *skip_fields(str, nf)
721register char *str;
722int nf;
723{
724 while (nf-- > 0) {
725 if (separator == '\0') {/* Means ' ' or '\t' */
726 while (*str != ' ' && *str != '\t' && *str != '\n') str++;
727 while (table[*str] & BLANK) str++;
728 } else {
729 while (*str != separator && *str != '\n') str++;
730 if (*str == separator) str++;
731 }
732 }
733 return str; /* Return pointer to indicated field */
734}
735
736/* Compare is called by all sorting routines. It checks if fields assignments
737 * has been made. if so, it calls cmp_fields (). If not, it calls cmp () and
738 * reversed the return value if the (global) reverse flag is set.
739 */
740int compare(el1, el2)
741register char *el1, *el2;
742{
743 int ret;
744
745 if (field_cnt > GLOBAL) return cmp_fields(el1, el2);
746
747 ret = cmp((unsigned char *) el1, (unsigned char *) el2, &fields[GLOBAL]);
748 return(fields[GLOBAL].reverse) ? -ret : ret;
749}
750
751/* Cmp () is the actual compare routine. It compares according to the
752 * description given in the field pointer.
753 */
754int cmp(el1, el2, field)
755register unsigned char *el1, *el2;
756FIELD *field;
757{
758 int c1, c2;
759
760 if (field->blanks) { /* Skip leading blanks */
761 while (table[*el1] & BLANK) el1++;
762 while (table[*el2] & BLANK) el2++;
763 }
764 if (field->numeric) /* Compare numeric */
765 return digits((char *) el1, (char *) el2, TRUE);
766
767 for (;;) {
768 while (*el1 == *el2) {
769 if (*el1++ == '\n') /* EOLN on both strings */
770 return SAME;
771 el2++;
772 }
773 if (*el1 == '\n') /* EOLN on string one */
774 return LOWER;
775 if (*el2 == '\n') return HIGHER;
776 if (field->ascii) { /* Skip chars outside 040 - 0177 */
777 if ((table[*el1] & ASCII) == 0) {
778 do {
779 el1++;
780 } while ((table[*el1] & ASCII) == 0);
781 continue;
782 }
783 if ((table[*el2] & ASCII) == 0) {
784 do {
785 el2++;
786 } while ((table[*el2] & ASCII) == 0);
787 continue;
788 }
789 }
790 if (field->dictionary) {/* Skip non-dict chars */
791 if ((table[*el1] & DICT) == 0) {
792 do {
793 el1++;
794 } while ((table[*el1] & DICT) == 0);
795 continue;
796 }
797 if ((table[*el2] & DICT) == 0) {
798 do {
799 el2++;
800 } while ((table[*el2] & DICT) == 0);
801 continue;
802 }
803 }
804 if (field->fold_case) { /* Fold upper case to lower */
805 if (table[c1 = *el1++] & UPPER) c1 += 'a' - 'A';
806 if (table[c2 = *el2++] & UPPER) c2 += 'a' - 'A';
807 if (c1 == c2) continue;
808 return c1 - c2;
809 }
810 return *el1 - *el2;
811 }
812
813 /* NOTREACHED */
814}
815
816/*
817 * Digits compares () the two strings that point to a number of digits followed
818 * by an optional decimal point.
819 */
820int digits(str1, str2, check_sign)
821register char *str1, *str2;
822BOOL check_sign; /* True if sign must be checked */
823{
824 BOOL negative = FALSE; /* True if negative numbers */
825 int diff, pow, ret;
826
827/* Check for optional minus or plus sign */
828 if (check_sign) {
829 if (*str1 == '-') {
830 negative = TRUE;
831 str1++;
832 } else if (*str1 == '+')
833 str1++;
834
835 if (*str2 == '-') {
836 if (negative == FALSE) return HIGHER;
837 str2++;
838 } else if (negative)
839 return LOWER;
840 else if (*str2 == '+')
841 str2++;
842 }
843
844/* Keep incrementing as long as digits are available and equal */
845 while ((table[*str1] & DIGIT) && table[*str2] & DIGIT) {
846 if (*str1 != *str2) break;
847 str1++;
848 str2++;
849 }
850
851/* First check for the decimal point. */
852 if (*str1 == '.' || *str2 == '.') {
853 if (*str1 == '.') {
854 if (*str2 == '.') /* Both. Check decimal part */
855 ret = digits(str1 + 1, str2 + 1, FALSE);
856 else
857 ret = (table[*str2] & DIGIT) ? LOWER : HIGHER;
858 } else
859 ret = (table[*str1] & DIGIT) ? HIGHER : LOWER;
860 }
861
862/* Now either two digits differ, or unknown char is seen (e.g. end of string) */
863 else if ((table[*str1] & DIGIT) && (table[*str2] & DIGIT)) {
864 diff = *str1 - *str2; /* Basic difference */
865 pow = 0; /* Check power of numbers */
866 while (table[*str1++] & DIGIT) pow++;
867 while (table[*str2++] & DIGIT) pow--;
868 ret = (pow == 0) ? diff : pow;
869 }
870
871/* Unknown char. Check on which string it occurred */
872 else {
873 if ((table[*str1] & DIGIT) == 0)
874 ret = (table[*str2] & DIGIT) ? LOWER : SAME;
875 else
876 ret = HIGHER;
877 }
878
879/* Reverse sense of comparisons if negative is true. (-1000 < -1) */
880 return(negative) ? -ret : ret;
881}
882
883/* Files_merge () merges all files as indicated by nr_of_files. Merging goes
884 * in numbers of files that can be opened at the same time. (OPEN_FILES)
885 */
886void files_merge(file_cnt)
887register int file_cnt; /* Nr_of_files to merge */
888{
889 register int i;
890 int limit;
891
892 for (i = 0; i < file_cnt; i += OPEN_FILES) {
893 /* Merge last files and store in output file */
894 if ((limit = i + OPEN_FILES) >= file_cnt) {
895 open_outfile();
896 limit = file_cnt;
897 } else { /* Merge OPEN_FILES files and store in temp
898 * file */
899 temp_files[16] = file_cnt / 26 + 'a';
900 temp_files[17] = file_cnt % 26 + 'a';
901 if ((out_fd = creat(temp_files, 0644)) < 0)
902 error(TRUE, "Cannot creat ", temp_files);
903 file_cnt++;
904 }
905 merge(i, limit);
906 }
907
908/* Cleanup mess */
909 i = (only_merge) ? args_limit - args_offset : 0;
910 while (i < file_cnt) (void) unlink(file_name(i++));
911}
912
913/* Merge () merges the files between start_file and limit_file. */
914void merge(start_file, limit_file)
915int start_file, limit_file;
916{
917 register MERGE *smallest; /* Keeps track of smallest line */
918 register int i;
919 int file_cnt = limit_file - start_file; /* Nr of files to merge */
920
921/* Calculate size in core available for file_cnt merge structs */
922 buf_size = MEMORY_SIZE / file_cnt - LINE_SIZE;
923
924 mbrk(mem_top); /* First reset mem to lowest loc. */
925 disabled = 0; /* All files not done yet */
926
927/* Set up merge structures. */
928 for (i = start_file; i < limit_file; i++) {
929 smallest = &merge_f[i - start_file];
930 if (!strcmp(file_name(i), "-")) /* File is stdin */
931 smallest->fd = 0;
932 else if ((smallest->fd = open(file_name(i), O_RDONLY)) < 0) {
933 smallest->fd = ERROR;
934 error(FALSE, "Cannot open ", file_name(i));
935 disabled++; /* Done this file */
936 continue;
937 }
938 smallest->buffer = msbrk(buf_size);
939 smallest->line = msbrk(LINE_SIZE);
940 smallest->cnt = smallest->read_chars = 0;
941 (void) read_line(smallest); /* Read first line */
942 }
943
944 if (disabled == file_cnt) { /* Couldn't open files */
945 (void) close(out_fd);
946 return;
947 }
948
949/* Find a merg struct to assign smallest. */
950 for (i = 0; i < file_cnt; i++) {
951 if (merge_f[i].fd != ERROR) {
952 smallest = &merge_f[i];
953 break;
954 }
955 }
956
957/* Loop until all files minus one are done */
958 while (disabled < file_cnt - 1) {
959 if (uniq) /* Skip all same lines */
960 smallest = skip_lines(smallest, file_cnt);
961 else { /* Find smallest line */
962 for (i = 0; i < file_cnt; i++) {
963 if (merge_f[i].fd == ERROR)
964 continue; /* We've had this one */
965 if (compare(merge_f[i].line, smallest->line) < 0)
966 smallest = &merge_f[i];
967 }
968 } /* Print line and read next */
969 smallest = print(smallest, file_cnt);
970 }
971
972 if (only_merge && uniq)
973 uniq_lines(smallest); /* Print only uniq lines */
974 else /* Print rest of file */
975 while (print(smallest, file_cnt) != NIL_MERGE);
976
977 put_line(NIL_PTR); /* Flush output buffer */
978}
979
980/* Put_line () prints the line into the out_fd filedescriptor. If line equals
981 * NIL_PTR, the out_fd is flushed and closed.
982 */
983void put_line(line)
984register char *line;
985{
986 static int index = 0; /* Index in out_buffer */
987
988 if (line == NIL_PTR) { /* Flush and close */
989 mwrite(out_fd, out_buffer, index);
990 index = 0;
991 (void) close(out_fd);
992 return;
993 }
994 do { /* Fill out_buffer with line */
995 out_buffer[index++] = *line;
996 if (index == IO_SIZE) {
997 mwrite(out_fd, out_buffer, IO_SIZE);
998 index = 0;
999 }
1000 } while (*line++ != '\n');
1001}
1002
1003/*
1004 * Print () prints the line of the merg structure and tries to read another one.
1005 * If this fails, it returns the next merg structure which file_descriptor is
1006 * still open. If none could be found, a NIL structure is returned.
1007 */
1008MERGE *print(merg, file_cnt)
1009register MERGE *merg;
1010int file_cnt; /* Nr of files that are being merged */
1011{
1012 register int i;
1013
1014 put_line(merg->line); /* Print the line */
1015
1016 if (read_line(merg) == ERROR) { /* Read next line */
1017 for (i = 0; i < file_cnt; i++) {
1018 if (merge_f[i].fd != ERROR) {
1019 merg = &merge_f[i];
1020 break;
1021 }
1022 }
1023 if (i == file_cnt) /* No more files left */
1024 return NIL_MERGE;
1025 }
1026 return merg;
1027}
1028
1029/* Read_line () reads a line from the fd from the merg struct. If the read
1030 * failed, disabled is incremented and the file is closed. Readings are
1031 * done in buf_size bytes.
1032 * Lines longer than LINE_SIZE are silently truncated.
1033 */
1034int read_line(merg)
1035register MERGE *merg;
1036{
1037 register char *ptr = merg->line - 1; /* Ptr buf that will hold line */
1038
1039 do {
1040 ptr++;
1041 if (merg->cnt == merg->read_chars) { /* Read new buffer */
1042 if ((merg->read_chars =
1043 read(merg->fd, merg->buffer, buf_size)) <= 0) {
1044 (void) close(merg->fd); /* OOPS */
1045 merg->fd = ERROR;
1046 disabled++;
1047 return ERROR;
1048 }
1049 merg->cnt = 0;
1050 }
1051 *ptr = merg->buffer[merg->cnt++]; /* Assign next char of line */
1052 if (ptr - merg->line == LINE_SIZE - 1)
1053 *ptr = '\n'; /* Truncate very long lines */
1054 } while (*ptr != '\n' && *ptr != '\0');
1055
1056 if (*ptr == '\0') /* Add '\n' to last line */
1057 *ptr = '\n';
1058 *++ptr = '\0'; /* Add '\0' */
1059 return OK;
1060}
1061
1062/* Skip_lines () skips all same lines in all the files currently being merged.
1063 * It returns a pointer to the merge struct containing the smallest line.
1064 */
1065MERGE *skip_lines(smallest, file_cnt)
1066register MERGE *smallest;
1067int file_cnt;
1068{
1069 register int i;
1070 int ret;
1071
1072 if (disabled == file_cnt - 1) /* We've had all */
1073 return smallest;
1074
1075 for (i = 0; i < file_cnt; i++) {
1076 if (merge_f[i].fd == ERROR || smallest == &merge_f[i])
1077 continue; /* Don't check same file */
1078 while ((ret = compare(merge_f[i].line, smallest->line)) == 0) {
1079 if (read_line(&merge_f[i]) == ERROR) break; /* EOF */
1080 }
1081 if (ret < 0) /* Line wasn't smallest. Try again */
1082 return skip_lines(&merge_f[i], file_cnt);
1083 }
1084 return smallest;
1085}
1086
1087/* Uniq_lines () prints only the uniq lines out of the fd of the merg struct. */
1088void uniq_lines(merg)
1089register MERGE *merg;
1090{
1091 char lastline[LINE_SIZE]; /* Buffer to hold last line */
1092
1093 for (;;) {
1094 put_line(merg->line); /* Print this line */
1095 copy(lastline, merg->line); /* and save it */
1096 if (read_line(merg) == ERROR) /* Read the next */
1097 return;
1098 /* Keep reading until lines duffer */
1099 while (compare(lastline, merg->line) == SAME)
1100 if (read_line(merg) == ERROR) return;
1101 }
1102
1103 /* NOTREACHED */
1104}
1105
1106/*
1107 * Check_file () checks if a file is sorted in order according to the arguments
1108 * given in main ().
1109 */
1110void check_file(fd, file)
1111int fd;
1112char *file;
1113{
1114 register MERGE *merg; /* 1 file only */
1115 char lastline[LINE_SIZE]; /* Save last line */
1116 register int ret; /* ret status of compare */
1117
1118 if (fd == 0) file = "stdin";
1119 merg = (MERGE *) mem_top; /* Assign MERGE structure */
1120 merg->buffer = mem_top + sizeof(MERGE);
1121 merg->line = msbrk(LINE_SIZE);
1122 merg->cnt = merg->read_chars = 0;
1123 merg->fd = fd;
1124 buf_size = MEMORY_SIZE - sizeof(MERGE);
1125
1126 if (read_line(merg) == ERROR) /* Read first line */
1127 return;
1128 copy(lastline, merg->line); /* and save it */
1129
1130 for (;;) {
1131 if (read_line(merg) == ERROR) /* EOF reached */
1132 break;
1133 if ((ret = compare(lastline, merg->line)) > 0) {
1134 error(FALSE, "Disorder in file ", file);
1135 write(2, merg->line, length(merg->line));
1136 break;
1137 } else if (ret < 0) /* Copy if lines not equal */
1138 copy(lastline, merg->line);
1139 else if (uniq) {
1140 error(FALSE, "Non uniq line in file ", file);
1141 write(2, merg->line, length(merg->line));
1142 break;
1143 }
1144 }
1145
1146 mbrk(mem_top); /* Reset mem */
1147}
1148
1149/* Length () returns the length of the argument line including the linefeed. */
1150int length(line)
1151register char *line;
1152{
1153 register int i = 1; /* Add linefeed */
1154
1155 while (*line++ != '\n') i++;
1156 return i;
1157}
1158
1159/* Copy () copies the src line into the dest line including linefeed. */
1160void copy(dest, src)
1161register char *dest, *src;
1162{
1163 while ((*dest++ = *src++) != '\n');
1164}
1165
1166/* Msbrk() does a sbrk() and checks the return value. */
1167char *msbrk(size)
1168register int size;
1169{
1170 register char *address;
1171
1172 if ((address = sbrk(size)) == (char *) -1)
1173 error(TRUE, "Not enough memory. Use chmem to allocate more", NIL_PTR);
1174 return address;
1175}
1176
1177/* Mbrk() does a brk() and checks the return value. */
1178void mbrk(address)
1179char *address;
1180{
1181 if (brk(address) == -1) error(TRUE, "Cannot reset memory", NIL_PTR);
1182}
1183
1184void catch(dummy)
1185int dummy; /* to satisfy the prototype */
1186{
1187 register int i;
1188
1189 signal(SIGINT, SIG_IGN);
1190 only_merge = FALSE;
1191 for (i = 0; i < 26; i++) (void) unlink(file_name(i));
1192 exit(2);
1193}
Note: See TracBrowser for help on using the repository browser.