1 | /* sort - sort a file of lines Author: Michiel Huisjes */
|
---|
2 |
|
---|
3 | /* SYNOPSIS:
|
---|
4 | * sort [-funbirdcmt'x'] [+beg_pos[opts] [-end_pos]] [-o outfile] [file]..
|
---|
5 | *
|
---|
6 | * [opts] can be any of
|
---|
7 | * -f : Fold upper case to lower.
|
---|
8 | * -n : Sort to numeric value (optional decimal point) implies -b
|
---|
9 | * -b : Skip leading blanks
|
---|
10 | * -i : Ignore chars outside ASCII range (040 - 0176)
|
---|
11 | * -r : Reverse the sense of comparisons.
|
---|
12 | * -d : Sort to dictionary order. Only letters, digits, comma's and points
|
---|
13 | * are compared.
|
---|
14 | * If any of these flags are used in [opts], then they override all global
|
---|
15 | * ordering for this field.
|
---|
16 | *
|
---|
17 | * I/O control flags are:
|
---|
18 | * -u : Print uniq lines only once.
|
---|
19 | * -c : Check if files are sorted in order.
|
---|
20 | * -m : Merge already sorted files.
|
---|
21 | * -o outfile : Name of output file. (Can be one of the input files).
|
---|
22 | * Default is stdout.
|
---|
23 | * - : Take stdin as input.
|
---|
24 | *
|
---|
25 | * Fields:
|
---|
26 | * -t'x' : Field separating character is 'x'
|
---|
27 | * +a.b : Start comparing at field 'a' with offset 'b'. A missing 'b' is
|
---|
28 | * taken to be 0.
|
---|
29 | * -a.b : Stop comparing at field 'a' with offset 'b'. A missing 'b' is
|
---|
30 | * taken to be 0.
|
---|
31 | * A missing -a.b means the rest of the line.
|
---|
32 | */
|
---|
33 |
|
---|
34 | #include <sys/types.h>
|
---|
35 | #include <sys/stat.h>
|
---|
36 | #include <fcntl.h>
|
---|
37 | #include <signal.h>
|
---|
38 | #include <unistd.h>
|
---|
39 | #include <stdlib.h>
|
---|
40 | #include <string.h>
|
---|
41 | #include <stdio.h>
|
---|
42 | #include <limits.h>
|
---|
43 |
|
---|
44 | #define OPEN_FILES (OPEN_MAX-4) /* Nr of open files per process */
|
---|
45 | #if __minix_vmd
|
---|
46 | #define MEMORY_SIZE (1024 * 1024)
|
---|
47 | #else
|
---|
48 | #define MEMORY_SIZE ((10 * sizeof(int)) * 1024)
|
---|
49 | #endif
|
---|
50 | /* Total mem_size */
|
---|
51 | #define LINE_SIZE (1024 >> 1) /* Max length of a line */
|
---|
52 | #define IO_SIZE (2 * 1024) /* Size of buffered output */
|
---|
53 | #define STD_OUT 1 /* Fd of terminal */
|
---|
54 |
|
---|
55 | /* Return status of functions */
|
---|
56 | #define OK 0
|
---|
57 | #define ERROR -1
|
---|
58 | #define NIL_PTR ((char *) 0)
|
---|
59 |
|
---|
60 | /* Compare return values */
|
---|
61 | #define LOWER -1
|
---|
62 | #define SAME 0
|
---|
63 | #define HIGHER 1
|
---|
64 |
|
---|
65 | /* Table definitions. */
|
---|
66 | #define DICT 0x001 /* Alpha, numeric, letters and . */
|
---|
67 | #define ASCII 0x002 /* All between ' ' and '~' */
|
---|
68 | #define BLANK 0x004 /* ' ' and '\t' */
|
---|
69 | #define DIGIT 0x008 /* 0-9 */
|
---|
70 | #define UPPER 0x010 /* A-Z */
|
---|
71 |
|
---|
72 | typedef int BOOL;
|
---|
73 |
|
---|
74 | #define FALSE 0
|
---|
75 | #define TRUE 1
|
---|
76 |
|
---|
77 | typedef struct {
|
---|
78 | int fd; /* Fd of file */
|
---|
79 | char *buffer; /* Buffer for reads */
|
---|
80 | int read_chars; /* Nr of chars actually read in buffer */
|
---|
81 | int cnt; /* Nr of chars taken out of buffer */
|
---|
82 | char *line; /* Contains line currently used */
|
---|
83 | } MERGE;
|
---|
84 |
|
---|
85 | #define NIL_MERGE ((MERGE *) 0)
|
---|
86 | MERGE merge_f[OPEN_FILES]; /* Merge structs */
|
---|
87 | int buf_size; /* Size of core available for each struct */
|
---|
88 |
|
---|
89 | #define FIELDS_LIMIT 10 /* 1 global + 9 user */
|
---|
90 | #define GLOBAL 0
|
---|
91 |
|
---|
92 | typedef struct {
|
---|
93 | int beg_field, beg_pos; /* Begin field + offset */
|
---|
94 | int end_field, end_pos; /* End field + offset. ERROR == EOLN */
|
---|
95 | BOOL reverse; /* TRUE if rev. flag set on this field */
|
---|
96 | BOOL blanks;
|
---|
97 | BOOL dictionary;
|
---|
98 | BOOL fold_case;
|
---|
99 | BOOL ascii;
|
---|
100 | BOOL numeric;
|
---|
101 | } FIELD;
|
---|
102 |
|
---|
103 | /* Field declarations. A total of FILEDS_LIMIT is allowed */
|
---|
104 | FIELD fields[FIELDS_LIMIT];
|
---|
105 | int field_cnt; /* Nr of field actually assigned */
|
---|
106 |
|
---|
107 | /* Various output control flags */
|
---|
108 | BOOL check = FALSE;
|
---|
109 | BOOL only_merge = FALSE;
|
---|
110 | BOOL uniq = FALSE;
|
---|
111 |
|
---|
112 | char *mem_top; /* Mem_top points to lowest pos of memory. */
|
---|
113 | char *cur_pos; /* First free position in mem */
|
---|
114 | char **line_table; /* Pointer to the internal line table */
|
---|
115 | BOOL in_core = TRUE; /* Set if input cannot all be sorted in core */
|
---|
116 |
|
---|
117 | /* Place where temp_files should be made */
|
---|
118 | char temp_files[] = "/tmp/sort.XXXXX.XX";
|
---|
119 | char *output_file; /* Name of output file */
|
---|
120 | int out_fd; /* Fd to output file (could be STD_OUT) */
|
---|
121 | char out_buffer[IO_SIZE]; /* For buffered output */
|
---|
122 |
|
---|
123 | char **argptr; /* Pointer to argv structure */
|
---|
124 | int args_offset; /* Nr of args spilled on options */
|
---|
125 | int args_limit; /* Nr of args given */
|
---|
126 |
|
---|
127 | char separator; /* Char that separates fields */
|
---|
128 | int nr_of_files = 0; /* Nr_of_files to be merged */
|
---|
129 | int disabled; /* Nr of files done */
|
---|
130 |
|
---|
131 | char USAGE[] = "Usage: sort [-funbirdcmt'x'] [+beg_pos [-end_pos]] [-o outfile] [file] ..";
|
---|
132 |
|
---|
133 | /* Forward declarations */
|
---|
134 | _PROTOTYPE(int main, (int argc, char **argv));
|
---|
135 | _PROTOTYPE(void get_opts, (char *ptr, FIELD * field));
|
---|
136 | _PROTOTYPE(void new_field, (FIELD * field, int *offset, BOOL beg_fl));
|
---|
137 | _PROTOTYPE(void adjust_options, (FIELD * field));
|
---|
138 | _PROTOTYPE(void error, (BOOL quit, char *message, char *arg));
|
---|
139 | _PROTOTYPE(void open_outfile, (void));
|
---|
140 | _PROTOTYPE(void get_file, (int fd, off_t size));
|
---|
141 | _PROTOTYPE(int last_line, (void));
|
---|
142 | _PROTOTYPE(void print_table, (int fd));
|
---|
143 | _PROTOTYPE(char *file_name, (int nr));
|
---|
144 | _PROTOTYPE(void mread, (int fd, char *address, int bytes));
|
---|
145 | _PROTOTYPE(void mwrite, (int fd, char *address, int bytes));
|
---|
146 | _PROTOTYPE(void sort, (void));
|
---|
147 | _PROTOTYPE(void sort_table, (int nel));
|
---|
148 | _PROTOTYPE(void incr, (int si, int ei));
|
---|
149 | _PROTOTYPE(int cmp_fields, (char *el1, char *el2));
|
---|
150 | _PROTOTYPE(void build_field, (char *dest, FIELD * field, char *src));
|
---|
151 | _PROTOTYPE(char *skip_fields, (char *str, int nf));
|
---|
152 | _PROTOTYPE(int compare, (char *el1, char *el2));
|
---|
153 | _PROTOTYPE(int cmp, (unsigned char *el1, unsigned char *el2, FIELD * field));
|
---|
154 | _PROTOTYPE(int digits, (char *str1, char *str2, BOOL check_sign));
|
---|
155 | _PROTOTYPE(void files_merge, (int file_cnt));
|
---|
156 | _PROTOTYPE(void merge, (int start_file, int limit_file));
|
---|
157 | _PROTOTYPE(void put_line, (char *line));
|
---|
158 | _PROTOTYPE(MERGE * print, (MERGE * merg, int file_cnt));
|
---|
159 | _PROTOTYPE(int read_line, (MERGE * merg));
|
---|
160 | _PROTOTYPE(MERGE * skip_lines, (MERGE * smallest, int file_cnt));
|
---|
161 | _PROTOTYPE(void uniq_lines, (MERGE * merg));
|
---|
162 | _PROTOTYPE(void check_file, (int fd, char *file));
|
---|
163 | _PROTOTYPE(int length, (char *line));
|
---|
164 | _PROTOTYPE(void copy, (char *dest, char *src));
|
---|
165 | _PROTOTYPE(char *msbrk, (int size));
|
---|
166 | _PROTOTYPE(void mbrk, (char *address));
|
---|
167 | _PROTOTYPE(void catch, (int dummy));
|
---|
168 |
|
---|
169 | /* Table of all chars. 0 means no special meaning. */
|
---|
170 | char table[256] = {
|
---|
171 | /* '^@' to space */
|
---|
172 | 0, 0, 0, 0, 0, 0, 0, 0,
|
---|
173 | 0, BLANK | DICT, 0, 0, 0, 0, 0, 0,
|
---|
174 | 0, 0, 0, 0, 0, 0, 0, 0,
|
---|
175 | 0, 0, 0, 0, 0, 0, 0, 0,
|
---|
176 |
|
---|
177 | /* Space to '0' */
|
---|
178 | BLANK | DICT | ASCII, ASCII, ASCII, ASCII, ASCII, ASCII, ASCII,
|
---|
179 | ASCII, ASCII, ASCII, ASCII, ASCII, ASCII, ASCII,
|
---|
180 | ASCII, ASCII,
|
---|
181 |
|
---|
182 | /* '0' until '9' */
|
---|
183 | DIGIT | DICT | ASCII, DIGIT | DICT | ASCII, DIGIT | DICT | ASCII,
|
---|
184 | DIGIT | DICT | ASCII, DIGIT | DICT | ASCII, DIGIT | DICT | ASCII,
|
---|
185 | DIGIT | DICT | ASCII, DIGIT | DICT | ASCII, DIGIT | DICT | ASCII,
|
---|
186 | DIGIT | DICT | ASCII,
|
---|
187 |
|
---|
188 | /* ASCII from ':' to '@' */
|
---|
189 | ASCII, ASCII, ASCII, ASCII, ASCII, ASCII, ASCII,
|
---|
190 |
|
---|
191 | /* Upper case letters 'A' to 'Z' */
|
---|
192 | UPPER | DICT | ASCII, UPPER | DICT | ASCII, UPPER | DICT | ASCII,
|
---|
193 | UPPER | DICT | ASCII, UPPER | DICT | ASCII, UPPER | DICT | ASCII,
|
---|
194 | UPPER | DICT | ASCII, UPPER | DICT | ASCII, UPPER | DICT | ASCII,
|
---|
195 | UPPER | DICT | ASCII, UPPER | DICT | ASCII, UPPER | DICT | ASCII,
|
---|
196 | UPPER | DICT | ASCII, UPPER | DICT | ASCII, UPPER | DICT | ASCII,
|
---|
197 | UPPER | DICT | ASCII, UPPER | DICT | ASCII, UPPER | DICT | ASCII,
|
---|
198 | UPPER | DICT | ASCII, UPPER | DICT | ASCII, UPPER | DICT | ASCII,
|
---|
199 | UPPER | DICT | ASCII, UPPER | DICT | ASCII, UPPER | DICT | ASCII,
|
---|
200 | UPPER | DICT | ASCII, UPPER | DICT | ASCII,
|
---|
201 |
|
---|
202 | /* ASCII from '[' to '`' */
|
---|
203 | ASCII, ASCII, ASCII, ASCII, ASCII, ASCII,
|
---|
204 |
|
---|
205 | /* Lower case letters from 'a' to 'z' */
|
---|
206 | DICT | ASCII, DICT | ASCII, DICT | ASCII, DICT | ASCII,
|
---|
207 | DICT | ASCII, DICT | ASCII, DICT | ASCII, DICT | ASCII,
|
---|
208 | DICT | ASCII, DICT | ASCII, DICT | ASCII, DICT | ASCII,
|
---|
209 | DICT | ASCII, DICT | ASCII, DICT | ASCII, DICT | ASCII,
|
---|
210 | DICT | ASCII, DICT | ASCII, DICT | ASCII, DICT | ASCII,
|
---|
211 | DICT | ASCII, DICT | ASCII, DICT | ASCII, DICT | ASCII,
|
---|
212 | DICT | ASCII, DICT | ASCII,
|
---|
213 |
|
---|
214 | /* ASCII from '{' to '~' */
|
---|
215 | ASCII, ASCII, ASCII, ASCII,
|
---|
216 |
|
---|
217 | /* Stuff from -1 to -177 */
|
---|
218 | 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
---|
219 | 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
---|
220 | 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
---|
221 | 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
---|
222 | 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
---|
223 | 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
---|
224 | 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
---|
225 | 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
---|
226 | 0, 0, 0, 0, 0, 0, 0
|
---|
227 | };
|
---|
228 |
|
---|
229 |
|
---|
230 | /*
|
---|
231 | * Get_opts () assigns the options into the field structure as described in ptr.
|
---|
232 | * This field structure could be the GLOBAL one.
|
---|
233 | */
|
---|
234 | void get_opts(ptr, field)
|
---|
235 | register char *ptr;
|
---|
236 | register FIELD *field;
|
---|
237 | {
|
---|
238 | switch (*ptr) {
|
---|
239 | case 'b': /* Skip leading blanks */
|
---|
240 | field->blanks = TRUE;
|
---|
241 | break;
|
---|
242 | case 'd': /* Dictionary order */
|
---|
243 | field->dictionary = TRUE;
|
---|
244 | break;
|
---|
245 | case 'f': /* Fold upper case to lower */
|
---|
246 | field->fold_case = TRUE;
|
---|
247 | break;
|
---|
248 | case 'i': /* Skip chars outside ' ' '~' */
|
---|
249 | field->ascii = TRUE;
|
---|
250 | break;
|
---|
251 | case 'n': /* Sort on numeric */
|
---|
252 | field->numeric = TRUE;
|
---|
253 | field->blanks = TRUE;
|
---|
254 | break;
|
---|
255 | case 'r': /* Reverse comparisons */
|
---|
256 | field->reverse = TRUE;
|
---|
257 | break;
|
---|
258 | default: /* Illegal options */
|
---|
259 | error(TRUE, USAGE, NIL_PTR);
|
---|
260 | }
|
---|
261 | }
|
---|
262 |
|
---|
263 | /* New_field () assigns a new field as described by the arguments.
|
---|
264 | * A field description is of the form: +a.b[opts] -c.d, where b and d, as well
|
---|
265 | * as -c.d and [opts] are optional. Nr before digit is field nr. Nr after digit
|
---|
266 | * is offset from field.
|
---|
267 | */
|
---|
268 | void new_field(field, offset, beg_fl)
|
---|
269 | register FIELD *field; /* Field to assign */
|
---|
270 | int *offset; /* Offset in argv structure */
|
---|
271 | BOOL beg_fl; /* Assign beg or end of field */
|
---|
272 | {
|
---|
273 | register char *ptr;
|
---|
274 |
|
---|
275 | ptr = argptr[*offset];
|
---|
276 | *offset += 1; /* Incr offset to next arg */
|
---|
277 | ptr++;
|
---|
278 |
|
---|
279 | if (beg_fl)
|
---|
280 | field->beg_field = atoi(ptr); /* Assign int of first field */
|
---|
281 | else
|
---|
282 | field->end_field = atoi(ptr);
|
---|
283 |
|
---|
284 | while (table[*ptr] & DIGIT) /* Skip all digits */
|
---|
285 | ptr++;
|
---|
286 |
|
---|
287 | if (*ptr == '.') { /* Check for offset */
|
---|
288 | ptr++;
|
---|
289 | if (beg_fl)
|
---|
290 | field->beg_pos = atoi(ptr);
|
---|
291 | else
|
---|
292 | field->end_pos = atoi(ptr);
|
---|
293 | while (table[*ptr] & DIGIT) /* Skip digits */
|
---|
294 | ptr++;
|
---|
295 | }
|
---|
296 | if (beg_fl) {
|
---|
297 | while (*ptr != '\0') /* Check options after field */
|
---|
298 | get_opts(ptr++, field);
|
---|
299 | }
|
---|
300 | if (beg_fl) { /* Check for end pos */
|
---|
301 | ptr = argptr[*offset];
|
---|
302 | if (ptr && *ptr == '-' && ((table[*(ptr + 1)] & DIGIT) || *(ptr + 1) == '.')) {
|
---|
303 | new_field(field, offset, FALSE);
|
---|
304 | if (field->beg_field > field->end_field)
|
---|
305 | error(TRUE, "End field is before start field!", NIL_PTR);
|
---|
306 | } else /* No end pos. */
|
---|
307 | field->end_field = ERROR;
|
---|
308 | }
|
---|
309 | }
|
---|
310 |
|
---|
311 | int main(argc, argv)
|
---|
312 | int argc;
|
---|
313 | char *argv[];
|
---|
314 | {
|
---|
315 | int arg_count = 1; /* Offset in argv */
|
---|
316 | struct stat st;
|
---|
317 | register char *ptr; /* Ptr to *argv in use */
|
---|
318 | register int fd;
|
---|
319 | int pid, pow;
|
---|
320 |
|
---|
321 | argptr = argv;
|
---|
322 | cur_pos = mem_top = msbrk(MEMORY_SIZE); /* Find lowest mem. location */
|
---|
323 |
|
---|
324 | while (arg_count < argc && ((ptr = argv[arg_count])[0] == '-' || *ptr == '+')) {
|
---|
325 | if (*ptr == '-' && *(ptr + 1) == '\0') /* "-" means stdin */
|
---|
326 | break;
|
---|
327 | if (*ptr == '+') { /* Assign field. */
|
---|
328 | if (++field_cnt == FIELDS_LIMIT)
|
---|
329 | error(TRUE, "Too many fields", NIL_PTR);
|
---|
330 | new_field(&fields[field_cnt], &arg_count, TRUE);
|
---|
331 | } else { /* Get output options */
|
---|
332 | while (*++ptr) {
|
---|
333 | switch (*ptr) {
|
---|
334 | case 'c': /* Only check file */
|
---|
335 | check = TRUE;
|
---|
336 | break;
|
---|
337 | case 'm': /* Merge (sorted) files */
|
---|
338 | only_merge = TRUE;
|
---|
339 | break;
|
---|
340 | case 'u': /* Only give uniq lines */
|
---|
341 | uniq = TRUE;
|
---|
342 | break;
|
---|
343 | case 'o': /* Name of output file */
|
---|
344 | output_file = argv[++arg_count];
|
---|
345 | break;
|
---|
346 | case 't': /* Field separator */
|
---|
347 | ptr++;
|
---|
348 | separator = *ptr;
|
---|
349 | break;
|
---|
350 | default: /* Sort options */
|
---|
351 | get_opts(ptr, &fields[GLOBAL]);
|
---|
352 | }
|
---|
353 | }
|
---|
354 | arg_count++;
|
---|
355 | }
|
---|
356 | }
|
---|
357 |
|
---|
358 | for (fd = 1; fd <= field_cnt; fd++) adjust_options(&fields[fd]);
|
---|
359 |
|
---|
360 | /* Create name of tem_files 'sort.pid.aa' */
|
---|
361 | ptr = &temp_files[10];
|
---|
362 | pid = getpid();
|
---|
363 | pow = 10000;
|
---|
364 | while (pow != 0) {
|
---|
365 | *ptr++ = pid / pow + '0';
|
---|
366 | pid %= pow;
|
---|
367 | pow /= 10;
|
---|
368 | }
|
---|
369 |
|
---|
370 | signal(SIGINT, catch);
|
---|
371 |
|
---|
372 | /* Only merge files. Set up */
|
---|
373 | if (only_merge) {
|
---|
374 | args_limit = args_offset = arg_count;
|
---|
375 | while (argv[args_limit] != NIL_PTR)
|
---|
376 | args_limit++; /* Find nr of args */
|
---|
377 | files_merge(args_limit - arg_count);
|
---|
378 | exit(0);
|
---|
379 | }
|
---|
380 | if (arg_count == argc) { /* No args left. Use stdin */
|
---|
381 | if (check)
|
---|
382 | check_file(0, NIL_PTR);
|
---|
383 | else
|
---|
384 | get_file(0, (off_t) 0);
|
---|
385 | } else
|
---|
386 | while (arg_count < argc) { /* Sort or check args */
|
---|
387 | if (strcmp(argv[arg_count], "-") == 0)
|
---|
388 | fd = 0;
|
---|
389 | else if (stat(argv[arg_count], &st) < 0) {
|
---|
390 | error(FALSE, "Cannot find ", argv[arg_count++]);
|
---|
391 | continue;
|
---|
392 | }
|
---|
393 |
|
---|
394 | /* Open files */
|
---|
395 | else if ((fd = open(argv[arg_count], O_RDONLY)) < 0) {
|
---|
396 | error(FALSE, "Cannot open ", argv[arg_count++]);
|
---|
397 | continue;
|
---|
398 | }
|
---|
399 | if (check)
|
---|
400 | check_file(fd, argv[arg_count]);
|
---|
401 | else /* Get_file reads whole file */
|
---|
402 | get_file(fd, st.st_size);
|
---|
403 | arg_count++;
|
---|
404 | }
|
---|
405 |
|
---|
406 | if (check) exit(0);
|
---|
407 |
|
---|
408 | sort(); /* Sort whatever is left */
|
---|
409 |
|
---|
410 | if (nr_of_files == 1) /* Only one file sorted -> don't merge */
|
---|
411 | exit(0);
|
---|
412 |
|
---|
413 | files_merge(nr_of_files);
|
---|
414 | return(0);
|
---|
415 | }
|
---|
416 |
|
---|
417 | /* Adjust_options() assigns all global variables set also in the fields
|
---|
418 | * assigned.
|
---|
419 | */
|
---|
420 | void adjust_options(field)
|
---|
421 | register FIELD *field;
|
---|
422 | {
|
---|
423 | register FIELD *gfield = &fields[GLOBAL];
|
---|
424 |
|
---|
425 | if (gfield->reverse) field->reverse = TRUE;
|
---|
426 | if (gfield->blanks) field->blanks = TRUE;
|
---|
427 | if (gfield->dictionary) field->dictionary = TRUE;
|
---|
428 | if (gfield->fold_case) field->fold_case = TRUE;
|
---|
429 | if (gfield->ascii) field->ascii = TRUE;
|
---|
430 | if (gfield->numeric) field->numeric = TRUE;
|
---|
431 | }
|
---|
432 |
|
---|
433 | /* Error () prints the error message on stderr and exits if quit == TRUE. */
|
---|
434 | void error(quit, message, arg)
|
---|
435 | register BOOL quit;
|
---|
436 | register char *message, *arg;
|
---|
437 | {
|
---|
438 | write(2, message, strlen(message));
|
---|
439 | if (arg != NIL_PTR) write(2, arg, strlen(arg));
|
---|
440 | perror(" ");
|
---|
441 | if (quit) exit(1);
|
---|
442 | }
|
---|
443 |
|
---|
444 | /* Open_outfile () assigns to out_fd the fd where the output must go when all
|
---|
445 | * the sorting is done.
|
---|
446 | */
|
---|
447 | void open_outfile()
|
---|
448 | {
|
---|
449 | if (output_file == NIL_PTR)
|
---|
450 | out_fd = STD_OUT;
|
---|
451 | else if ((out_fd = creat(output_file, 0644)) < 0)
|
---|
452 | error(TRUE, "Cannot creat ", output_file);
|
---|
453 | }
|
---|
454 |
|
---|
455 | /* Get_file reads the whole file of filedescriptor fd. If the file is too big
|
---|
456 | * to keep in core, a partial sort is done, and the output is stashed somewhere.
|
---|
457 | */
|
---|
458 | void get_file(fd, size)
|
---|
459 | int fd; /* Fd of file to read */
|
---|
460 | register off_t size; /* Size of file */
|
---|
461 | {
|
---|
462 | register int i;
|
---|
463 | int rest; /* Rest in memory */
|
---|
464 | char save_ch; /* Used in stdin readings */
|
---|
465 |
|
---|
466 | rest = MEMORY_SIZE - (cur_pos - mem_top);
|
---|
467 | if (fd == 0) { /* We're reding stdin */
|
---|
468 | while ((i = read(0, cur_pos, rest)) > 0) {
|
---|
469 | if ((cur_pos - mem_top) + i == MEMORY_SIZE) {
|
---|
470 | in_core = FALSE;
|
---|
471 | i = last_line(); /* End of last line */
|
---|
472 | save_ch = mem_top[i];
|
---|
473 | mem_top[i] = '\0';
|
---|
474 | sort(); /* Sort core */
|
---|
475 | mem_top[i] = save_ch; /* Restore erased char */
|
---|
476 | /* Restore last (half read) line */
|
---|
477 | for (rest = 0; i + rest != MEMORY_SIZE; rest++)
|
---|
478 | mem_top[rest] = mem_top[i + rest];
|
---|
479 | /* Assign current pos. in memory */
|
---|
480 | cur_pos = &mem_top[rest];
|
---|
481 | } else { /* Fits, just assign position in mem. */
|
---|
482 | cur_pos = cur_pos + i;
|
---|
483 | *cur_pos = '\0';
|
---|
484 | }
|
---|
485 |
|
---|
486 | /* Calculate rest of mem */
|
---|
487 | rest = MEMORY_SIZE - (cur_pos - mem_top);
|
---|
488 | }
|
---|
489 | }
|
---|
490 |
|
---|
491 | /* Reading file. Check size */
|
---|
492 | else if (size > rest) { /* Won't fit */
|
---|
493 | mread(fd, cur_pos, rest);
|
---|
494 | in_core = FALSE;
|
---|
495 | i = last_line(); /* Get pos. of last line */
|
---|
496 | mem_top[i] = '\0'; /* Truncate */
|
---|
497 | (void) lseek(fd, (off_t) (i - MEMORY_SIZE), SEEK_CUR); /* Do this next time */
|
---|
498 | size = size - rest - i + MEMORY_SIZE; /* Calculate rest */
|
---|
499 | cur_pos = mem_top; /* Reset mem */
|
---|
500 | sort(); /* Sort core */
|
---|
501 | get_file(fd, size); /* Get rest of file */
|
---|
502 | } else { /* Fits. Just read in */
|
---|
503 | rest = size;
|
---|
504 | mread(fd, cur_pos, rest);
|
---|
505 | cur_pos = cur_pos + rest; /* Reassign cur_pos */
|
---|
506 | *cur_pos = '\0';
|
---|
507 | (void) close(fd); /* File completed */
|
---|
508 | }
|
---|
509 | }
|
---|
510 |
|
---|
511 | /* Last_line () find the last line in core and retuns the offset from the top
|
---|
512 | * of the memory.
|
---|
513 | */
|
---|
514 | int last_line()
|
---|
515 | {
|
---|
516 | register int i;
|
---|
517 |
|
---|
518 | for (i = MEMORY_SIZE - 2; i > 0; i--)
|
---|
519 | if (mem_top[i] == '\n') break;
|
---|
520 | return i + 1;
|
---|
521 | }
|
---|
522 |
|
---|
523 | /* Print_table prints the line table in the given file_descriptor. If the fd
|
---|
524 | * equals ERROR, it opens a temp_file itself.
|
---|
525 | */
|
---|
526 | void print_table(fd)
|
---|
527 | int fd;
|
---|
528 | {
|
---|
529 | register char **line_ptr; /* Ptr in line_table */
|
---|
530 | register char *ptr; /* Ptr to line */
|
---|
531 | int index = 0; /* Index in output buffer */
|
---|
532 |
|
---|
533 | if (fd == ERROR) {
|
---|
534 | if ((fd = creat(file_name(nr_of_files), 0644)) < 0)
|
---|
535 | error(TRUE, "Cannot creat ", file_name(nr_of_files));
|
---|
536 | }
|
---|
537 | for (line_ptr = line_table; *line_ptr != NIL_PTR; line_ptr++) {
|
---|
538 | ptr = *line_ptr;
|
---|
539 | /* Skip all same lines if uniq is set */
|
---|
540 | if (uniq && *(line_ptr + 1) != NIL_PTR) {
|
---|
541 | if (compare(ptr, *(line_ptr + 1)) == SAME) continue;
|
---|
542 | }
|
---|
543 | do { /* Print line in a buffered way */
|
---|
544 | out_buffer[index++] = *ptr;
|
---|
545 | if (index == IO_SIZE) {
|
---|
546 | mwrite(fd, out_buffer, IO_SIZE);
|
---|
547 | index = 0;
|
---|
548 | }
|
---|
549 | } while (*ptr++ != '\n');
|
---|
550 | }
|
---|
551 | mwrite(fd, out_buffer, index);/* Flush buffer */
|
---|
552 | (void) close(fd); /* Close file */
|
---|
553 | nr_of_files++; /* Increment nr_of_files to merge */
|
---|
554 | }
|
---|
555 |
|
---|
556 | /* File_name () returns the nr argument from the argument list, or a uniq
|
---|
557 | * filename if the nr is too high, or the arguments were not merge files.
|
---|
558 | */
|
---|
559 | char *file_name(nr)
|
---|
560 | register int nr;
|
---|
561 | {
|
---|
562 | if (only_merge) {
|
---|
563 | if (args_offset + nr < args_limit) return argptr[args_offset + nr];
|
---|
564 | }
|
---|
565 | temp_files[16] = nr / 26 + 'a';
|
---|
566 | temp_files[17] = nr % 26 + 'a';
|
---|
567 |
|
---|
568 | return temp_files;
|
---|
569 | }
|
---|
570 |
|
---|
571 | /* Mread () performs a normal read (), but checks the return value. */
|
---|
572 | void mread(fd, address, bytes)
|
---|
573 | int fd;
|
---|
574 | char *address;
|
---|
575 | register int bytes;
|
---|
576 | {
|
---|
577 | if (read(fd, address, bytes) < 0 && bytes != 0)
|
---|
578 | error(TRUE, "Read error", NIL_PTR);
|
---|
579 | }
|
---|
580 |
|
---|
581 | /* Mwrite () performs a normal write (), but checks the return value. */
|
---|
582 | void mwrite(fd, address, bytes)
|
---|
583 | int fd;
|
---|
584 | char *address;
|
---|
585 | register int bytes;
|
---|
586 | {
|
---|
587 | if (write(fd, address, bytes) != bytes && bytes != 0)
|
---|
588 | error(TRUE, "Write error", NIL_PTR);
|
---|
589 | }
|
---|
590 |
|
---|
591 | /* Sort () sorts the input in memory starting at mem_top. */
|
---|
592 | void sort()
|
---|
593 | {
|
---|
594 | register char *ptr = mem_top;
|
---|
595 | register int count = 0;
|
---|
596 |
|
---|
597 | /* Count number of lines in memory */
|
---|
598 | while (*ptr) {
|
---|
599 | if (*ptr++ == '\n') count++;
|
---|
600 | }
|
---|
601 |
|
---|
602 | /* Set up the line table */
|
---|
603 | line_table = (char **) msbrk(count * sizeof(char *) + sizeof(char *));
|
---|
604 |
|
---|
605 | count = 1;
|
---|
606 | ptr = line_table[0] = mem_top;
|
---|
607 | while (*ptr) {
|
---|
608 | if (*ptr++ == '\n') line_table[count++] = ptr;
|
---|
609 | }
|
---|
610 |
|
---|
611 | line_table[count - 1] = NIL_PTR;
|
---|
612 |
|
---|
613 | /* Sort the line table */
|
---|
614 | sort_table(count - 1);
|
---|
615 |
|
---|
616 | /* Stash output somewhere */
|
---|
617 | if (in_core) {
|
---|
618 | open_outfile();
|
---|
619 | print_table(out_fd);
|
---|
620 | } else
|
---|
621 | print_table(ERROR);
|
---|
622 |
|
---|
623 | /* Free line table */
|
---|
624 | mbrk((char *) line_table);
|
---|
625 | }
|
---|
626 |
|
---|
627 | /* Sort_table () sorts the line table consisting of nel elements. */
|
---|
628 | void sort_table(nel)
|
---|
629 | register int nel;
|
---|
630 | {
|
---|
631 | char *tmp;
|
---|
632 | register int i;
|
---|
633 |
|
---|
634 | /* Make heap */
|
---|
635 | for (i = (nel >> 1); i >= 1; i--) incr(i, nel);
|
---|
636 |
|
---|
637 | /* Sort from heap */
|
---|
638 | for (i = nel; i > 1; i--) {
|
---|
639 | tmp = line_table[0];
|
---|
640 | line_table[0] = line_table[i - 1];
|
---|
641 | line_table[i - 1] = tmp;
|
---|
642 | incr(1, i - 1);
|
---|
643 | }
|
---|
644 | }
|
---|
645 |
|
---|
646 | /* Incr () increments the heap. */
|
---|
647 | void incr(si, ei)
|
---|
648 | register int si, ei;
|
---|
649 | {
|
---|
650 | char *tmp;
|
---|
651 |
|
---|
652 | while (si <= (ei >> 1)) {
|
---|
653 | si <<= 1;
|
---|
654 | if (si + 1 <= ei && compare(line_table[si - 1], line_table[si]) <= 0)
|
---|
655 | si++;
|
---|
656 | if (compare(line_table[(si >> 1) - 1], line_table[si - 1]) >= 0)
|
---|
657 | return;
|
---|
658 | tmp = line_table[(si >> 1) - 1];
|
---|
659 | line_table[(si >> 1) - 1] = line_table[si - 1];
|
---|
660 | line_table[si - 1] = tmp;
|
---|
661 | }
|
---|
662 | }
|
---|
663 |
|
---|
664 | /* Cmp_fields builds new lines out of the lines pointed to by el1 and el2 and
|
---|
665 | * puts it into the line1 and line2 arrays. It then calls the cmp () routine
|
---|
666 | * with the field describing the arguments.
|
---|
667 | */
|
---|
668 | int cmp_fields(el1, el2)
|
---|
669 | register char *el1, *el2;
|
---|
670 | {
|
---|
671 | int i, ret;
|
---|
672 | char line1[LINE_SIZE], line2[LINE_SIZE];
|
---|
673 |
|
---|
674 | for (i = 0; i < field_cnt; i++) { /* Setup line parts */
|
---|
675 | build_field(line1, &fields[i + 1], el1);
|
---|
676 | build_field(line2, &fields[i + 1], el2);
|
---|
677 | if ((ret = cmp((unsigned char *) line1, (unsigned char *) line2,
|
---|
678 | &fields[i + 1])) != SAME)
|
---|
679 | break; /* If equal, try next field */
|
---|
680 | }
|
---|
681 |
|
---|
682 | /* Check for reverse flag */
|
---|
683 | if (i != field_cnt && fields[i + 1].reverse) return -ret;
|
---|
684 |
|
---|
685 | /* Else return the last return value of cmp () */
|
---|
686 | return ret;
|
---|
687 | }
|
---|
688 |
|
---|
689 | /* Build_field builds a new line from the src as described by the field.
|
---|
690 | * The result is put in dest.
|
---|
691 | */
|
---|
692 | void build_field(dest, field, src)
|
---|
693 | char *dest; /* Holds result */
|
---|
694 | register FIELD *field; /* Field description */
|
---|
695 | register char *src; /* Source line */
|
---|
696 | {
|
---|
697 | char *begin = src; /* Remember start location */
|
---|
698 | char *last; /* Pointer to end location */
|
---|
699 | int i;
|
---|
700 |
|
---|
701 | /* Skip begin fields */
|
---|
702 | src = skip_fields(src, field->beg_field);
|
---|
703 |
|
---|
704 | /* Skip begin positions */
|
---|
705 | for (i = 0; i < field->beg_pos && *src != '\n'; i++) src++;
|
---|
706 |
|
---|
707 | /* Copy whatever is left */
|
---|
708 | copy(dest, src);
|
---|
709 |
|
---|
710 | /* If end field is assigned truncate (perhaps) the part copied */
|
---|
711 | if (field->end_field != ERROR) { /* Find last field */
|
---|
712 | last = skip_fields(begin, field->end_field);
|
---|
713 | /* Skip positions as given by end fields description */
|
---|
714 | for (i = 0; i < field->end_pos && *last != '\n'; i++) last++;
|
---|
715 | dest[last - src] = '\n';/* Truncate line */
|
---|
716 | }
|
---|
717 | }
|
---|
718 |
|
---|
719 | /* Skip_fields () skips nf fields of the line pointed to by str. */
|
---|
720 | char *skip_fields(str, nf)
|
---|
721 | register char *str;
|
---|
722 | int nf;
|
---|
723 | {
|
---|
724 | while (nf-- > 0) {
|
---|
725 | if (separator == '\0') {/* Means ' ' or '\t' */
|
---|
726 | while (*str != ' ' && *str != '\t' && *str != '\n') str++;
|
---|
727 | while (table[*str] & BLANK) str++;
|
---|
728 | } else {
|
---|
729 | while (*str != separator && *str != '\n') str++;
|
---|
730 | if (*str == separator) str++;
|
---|
731 | }
|
---|
732 | }
|
---|
733 | return str; /* Return pointer to indicated field */
|
---|
734 | }
|
---|
735 |
|
---|
736 | /* Compare is called by all sorting routines. It checks if fields assignments
|
---|
737 | * has been made. if so, it calls cmp_fields (). If not, it calls cmp () and
|
---|
738 | * reversed the return value if the (global) reverse flag is set.
|
---|
739 | */
|
---|
740 | int compare(el1, el2)
|
---|
741 | register char *el1, *el2;
|
---|
742 | {
|
---|
743 | int ret;
|
---|
744 |
|
---|
745 | if (field_cnt > GLOBAL) return cmp_fields(el1, el2);
|
---|
746 |
|
---|
747 | ret = cmp((unsigned char *) el1, (unsigned char *) el2, &fields[GLOBAL]);
|
---|
748 | return(fields[GLOBAL].reverse) ? -ret : ret;
|
---|
749 | }
|
---|
750 |
|
---|
751 | /* Cmp () is the actual compare routine. It compares according to the
|
---|
752 | * description given in the field pointer.
|
---|
753 | */
|
---|
754 | int cmp(el1, el2, field)
|
---|
755 | register unsigned char *el1, *el2;
|
---|
756 | FIELD *field;
|
---|
757 | {
|
---|
758 | int c1, c2;
|
---|
759 |
|
---|
760 | if (field->blanks) { /* Skip leading blanks */
|
---|
761 | while (table[*el1] & BLANK) el1++;
|
---|
762 | while (table[*el2] & BLANK) el2++;
|
---|
763 | }
|
---|
764 | if (field->numeric) /* Compare numeric */
|
---|
765 | return digits((char *) el1, (char *) el2, TRUE);
|
---|
766 |
|
---|
767 | for (;;) {
|
---|
768 | while (*el1 == *el2) {
|
---|
769 | if (*el1++ == '\n') /* EOLN on both strings */
|
---|
770 | return SAME;
|
---|
771 | el2++;
|
---|
772 | }
|
---|
773 | if (*el1 == '\n') /* EOLN on string one */
|
---|
774 | return LOWER;
|
---|
775 | if (*el2 == '\n') return HIGHER;
|
---|
776 | if (field->ascii) { /* Skip chars outside 040 - 0177 */
|
---|
777 | if ((table[*el1] & ASCII) == 0) {
|
---|
778 | do {
|
---|
779 | el1++;
|
---|
780 | } while ((table[*el1] & ASCII) == 0);
|
---|
781 | continue;
|
---|
782 | }
|
---|
783 | if ((table[*el2] & ASCII) == 0) {
|
---|
784 | do {
|
---|
785 | el2++;
|
---|
786 | } while ((table[*el2] & ASCII) == 0);
|
---|
787 | continue;
|
---|
788 | }
|
---|
789 | }
|
---|
790 | if (field->dictionary) {/* Skip non-dict chars */
|
---|
791 | if ((table[*el1] & DICT) == 0) {
|
---|
792 | do {
|
---|
793 | el1++;
|
---|
794 | } while ((table[*el1] & DICT) == 0);
|
---|
795 | continue;
|
---|
796 | }
|
---|
797 | if ((table[*el2] & DICT) == 0) {
|
---|
798 | do {
|
---|
799 | el2++;
|
---|
800 | } while ((table[*el2] & DICT) == 0);
|
---|
801 | continue;
|
---|
802 | }
|
---|
803 | }
|
---|
804 | if (field->fold_case) { /* Fold upper case to lower */
|
---|
805 | if (table[c1 = *el1++] & UPPER) c1 += 'a' - 'A';
|
---|
806 | if (table[c2 = *el2++] & UPPER) c2 += 'a' - 'A';
|
---|
807 | if (c1 == c2) continue;
|
---|
808 | return c1 - c2;
|
---|
809 | }
|
---|
810 | return *el1 - *el2;
|
---|
811 | }
|
---|
812 |
|
---|
813 | /* NOTREACHED */
|
---|
814 | }
|
---|
815 |
|
---|
816 | /*
|
---|
817 | * Digits compares () the two strings that point to a number of digits followed
|
---|
818 | * by an optional decimal point.
|
---|
819 | */
|
---|
820 | int digits(str1, str2, check_sign)
|
---|
821 | register char *str1, *str2;
|
---|
822 | BOOL check_sign; /* True if sign must be checked */
|
---|
823 | {
|
---|
824 | BOOL negative = FALSE; /* True if negative numbers */
|
---|
825 | int diff, pow, ret;
|
---|
826 |
|
---|
827 | /* Check for optional minus or plus sign */
|
---|
828 | if (check_sign) {
|
---|
829 | if (*str1 == '-') {
|
---|
830 | negative = TRUE;
|
---|
831 | str1++;
|
---|
832 | } else if (*str1 == '+')
|
---|
833 | str1++;
|
---|
834 |
|
---|
835 | if (*str2 == '-') {
|
---|
836 | if (negative == FALSE) return HIGHER;
|
---|
837 | str2++;
|
---|
838 | } else if (negative)
|
---|
839 | return LOWER;
|
---|
840 | else if (*str2 == '+')
|
---|
841 | str2++;
|
---|
842 | }
|
---|
843 |
|
---|
844 | /* Keep incrementing as long as digits are available and equal */
|
---|
845 | while ((table[*str1] & DIGIT) && table[*str2] & DIGIT) {
|
---|
846 | if (*str1 != *str2) break;
|
---|
847 | str1++;
|
---|
848 | str2++;
|
---|
849 | }
|
---|
850 |
|
---|
851 | /* First check for the decimal point. */
|
---|
852 | if (*str1 == '.' || *str2 == '.') {
|
---|
853 | if (*str1 == '.') {
|
---|
854 | if (*str2 == '.') /* Both. Check decimal part */
|
---|
855 | ret = digits(str1 + 1, str2 + 1, FALSE);
|
---|
856 | else
|
---|
857 | ret = (table[*str2] & DIGIT) ? LOWER : HIGHER;
|
---|
858 | } else
|
---|
859 | ret = (table[*str1] & DIGIT) ? HIGHER : LOWER;
|
---|
860 | }
|
---|
861 |
|
---|
862 | /* Now either two digits differ, or unknown char is seen (e.g. end of string) */
|
---|
863 | else if ((table[*str1] & DIGIT) && (table[*str2] & DIGIT)) {
|
---|
864 | diff = *str1 - *str2; /* Basic difference */
|
---|
865 | pow = 0; /* Check power of numbers */
|
---|
866 | while (table[*str1++] & DIGIT) pow++;
|
---|
867 | while (table[*str2++] & DIGIT) pow--;
|
---|
868 | ret = (pow == 0) ? diff : pow;
|
---|
869 | }
|
---|
870 |
|
---|
871 | /* Unknown char. Check on which string it occurred */
|
---|
872 | else {
|
---|
873 | if ((table[*str1] & DIGIT) == 0)
|
---|
874 | ret = (table[*str2] & DIGIT) ? LOWER : SAME;
|
---|
875 | else
|
---|
876 | ret = HIGHER;
|
---|
877 | }
|
---|
878 |
|
---|
879 | /* Reverse sense of comparisons if negative is true. (-1000 < -1) */
|
---|
880 | return(negative) ? -ret : ret;
|
---|
881 | }
|
---|
882 |
|
---|
883 | /* Files_merge () merges all files as indicated by nr_of_files. Merging goes
|
---|
884 | * in numbers of files that can be opened at the same time. (OPEN_FILES)
|
---|
885 | */
|
---|
886 | void files_merge(file_cnt)
|
---|
887 | register int file_cnt; /* Nr_of_files to merge */
|
---|
888 | {
|
---|
889 | register int i;
|
---|
890 | int limit;
|
---|
891 |
|
---|
892 | for (i = 0; i < file_cnt; i += OPEN_FILES) {
|
---|
893 | /* Merge last files and store in output file */
|
---|
894 | if ((limit = i + OPEN_FILES) >= file_cnt) {
|
---|
895 | open_outfile();
|
---|
896 | limit = file_cnt;
|
---|
897 | } else { /* Merge OPEN_FILES files and store in temp
|
---|
898 | * file */
|
---|
899 | temp_files[16] = file_cnt / 26 + 'a';
|
---|
900 | temp_files[17] = file_cnt % 26 + 'a';
|
---|
901 | if ((out_fd = creat(temp_files, 0644)) < 0)
|
---|
902 | error(TRUE, "Cannot creat ", temp_files);
|
---|
903 | file_cnt++;
|
---|
904 | }
|
---|
905 | merge(i, limit);
|
---|
906 | }
|
---|
907 |
|
---|
908 | /* Cleanup mess */
|
---|
909 | i = (only_merge) ? args_limit - args_offset : 0;
|
---|
910 | while (i < file_cnt) (void) unlink(file_name(i++));
|
---|
911 | }
|
---|
912 |
|
---|
913 | /* Merge () merges the files between start_file and limit_file. */
|
---|
914 | void merge(start_file, limit_file)
|
---|
915 | int start_file, limit_file;
|
---|
916 | {
|
---|
917 | register MERGE *smallest; /* Keeps track of smallest line */
|
---|
918 | register int i;
|
---|
919 | int file_cnt = limit_file - start_file; /* Nr of files to merge */
|
---|
920 |
|
---|
921 | /* Calculate size in core available for file_cnt merge structs */
|
---|
922 | buf_size = MEMORY_SIZE / file_cnt - LINE_SIZE;
|
---|
923 |
|
---|
924 | mbrk(mem_top); /* First reset mem to lowest loc. */
|
---|
925 | disabled = 0; /* All files not done yet */
|
---|
926 |
|
---|
927 | /* Set up merge structures. */
|
---|
928 | for (i = start_file; i < limit_file; i++) {
|
---|
929 | smallest = &merge_f[i - start_file];
|
---|
930 | if (!strcmp(file_name(i), "-")) /* File is stdin */
|
---|
931 | smallest->fd = 0;
|
---|
932 | else if ((smallest->fd = open(file_name(i), O_RDONLY)) < 0) {
|
---|
933 | smallest->fd = ERROR;
|
---|
934 | error(FALSE, "Cannot open ", file_name(i));
|
---|
935 | disabled++; /* Done this file */
|
---|
936 | continue;
|
---|
937 | }
|
---|
938 | smallest->buffer = msbrk(buf_size);
|
---|
939 | smallest->line = msbrk(LINE_SIZE);
|
---|
940 | smallest->cnt = smallest->read_chars = 0;
|
---|
941 | (void) read_line(smallest); /* Read first line */
|
---|
942 | }
|
---|
943 |
|
---|
944 | if (disabled == file_cnt) { /* Couldn't open files */
|
---|
945 | (void) close(out_fd);
|
---|
946 | return;
|
---|
947 | }
|
---|
948 |
|
---|
949 | /* Find a merg struct to assign smallest. */
|
---|
950 | for (i = 0; i < file_cnt; i++) {
|
---|
951 | if (merge_f[i].fd != ERROR) {
|
---|
952 | smallest = &merge_f[i];
|
---|
953 | break;
|
---|
954 | }
|
---|
955 | }
|
---|
956 |
|
---|
957 | /* Loop until all files minus one are done */
|
---|
958 | while (disabled < file_cnt - 1) {
|
---|
959 | if (uniq) /* Skip all same lines */
|
---|
960 | smallest = skip_lines(smallest, file_cnt);
|
---|
961 | else { /* Find smallest line */
|
---|
962 | for (i = 0; i < file_cnt; i++) {
|
---|
963 | if (merge_f[i].fd == ERROR)
|
---|
964 | continue; /* We've had this one */
|
---|
965 | if (compare(merge_f[i].line, smallest->line) < 0)
|
---|
966 | smallest = &merge_f[i];
|
---|
967 | }
|
---|
968 | } /* Print line and read next */
|
---|
969 | smallest = print(smallest, file_cnt);
|
---|
970 | }
|
---|
971 |
|
---|
972 | if (only_merge && uniq)
|
---|
973 | uniq_lines(smallest); /* Print only uniq lines */
|
---|
974 | else /* Print rest of file */
|
---|
975 | while (print(smallest, file_cnt) != NIL_MERGE);
|
---|
976 |
|
---|
977 | put_line(NIL_PTR); /* Flush output buffer */
|
---|
978 | }
|
---|
979 |
|
---|
980 | /* Put_line () prints the line into the out_fd filedescriptor. If line equals
|
---|
981 | * NIL_PTR, the out_fd is flushed and closed.
|
---|
982 | */
|
---|
983 | void put_line(line)
|
---|
984 | register char *line;
|
---|
985 | {
|
---|
986 | static int index = 0; /* Index in out_buffer */
|
---|
987 |
|
---|
988 | if (line == NIL_PTR) { /* Flush and close */
|
---|
989 | mwrite(out_fd, out_buffer, index);
|
---|
990 | index = 0;
|
---|
991 | (void) close(out_fd);
|
---|
992 | return;
|
---|
993 | }
|
---|
994 | do { /* Fill out_buffer with line */
|
---|
995 | out_buffer[index++] = *line;
|
---|
996 | if (index == IO_SIZE) {
|
---|
997 | mwrite(out_fd, out_buffer, IO_SIZE);
|
---|
998 | index = 0;
|
---|
999 | }
|
---|
1000 | } while (*line++ != '\n');
|
---|
1001 | }
|
---|
1002 |
|
---|
1003 | /*
|
---|
1004 | * Print () prints the line of the merg structure and tries to read another one.
|
---|
1005 | * If this fails, it returns the next merg structure which file_descriptor is
|
---|
1006 | * still open. If none could be found, a NIL structure is returned.
|
---|
1007 | */
|
---|
1008 | MERGE *print(merg, file_cnt)
|
---|
1009 | register MERGE *merg;
|
---|
1010 | int file_cnt; /* Nr of files that are being merged */
|
---|
1011 | {
|
---|
1012 | register int i;
|
---|
1013 |
|
---|
1014 | put_line(merg->line); /* Print the line */
|
---|
1015 |
|
---|
1016 | if (read_line(merg) == ERROR) { /* Read next line */
|
---|
1017 | for (i = 0; i < file_cnt; i++) {
|
---|
1018 | if (merge_f[i].fd != ERROR) {
|
---|
1019 | merg = &merge_f[i];
|
---|
1020 | break;
|
---|
1021 | }
|
---|
1022 | }
|
---|
1023 | if (i == file_cnt) /* No more files left */
|
---|
1024 | return NIL_MERGE;
|
---|
1025 | }
|
---|
1026 | return merg;
|
---|
1027 | }
|
---|
1028 |
|
---|
1029 | /* Read_line () reads a line from the fd from the merg struct. If the read
|
---|
1030 | * failed, disabled is incremented and the file is closed. Readings are
|
---|
1031 | * done in buf_size bytes.
|
---|
1032 | * Lines longer than LINE_SIZE are silently truncated.
|
---|
1033 | */
|
---|
1034 | int read_line(merg)
|
---|
1035 | register MERGE *merg;
|
---|
1036 | {
|
---|
1037 | register char *ptr = merg->line - 1; /* Ptr buf that will hold line */
|
---|
1038 |
|
---|
1039 | do {
|
---|
1040 | ptr++;
|
---|
1041 | if (merg->cnt == merg->read_chars) { /* Read new buffer */
|
---|
1042 | if ((merg->read_chars =
|
---|
1043 | read(merg->fd, merg->buffer, buf_size)) <= 0) {
|
---|
1044 | (void) close(merg->fd); /* OOPS */
|
---|
1045 | merg->fd = ERROR;
|
---|
1046 | disabled++;
|
---|
1047 | return ERROR;
|
---|
1048 | }
|
---|
1049 | merg->cnt = 0;
|
---|
1050 | }
|
---|
1051 | *ptr = merg->buffer[merg->cnt++]; /* Assign next char of line */
|
---|
1052 | if (ptr - merg->line == LINE_SIZE - 1)
|
---|
1053 | *ptr = '\n'; /* Truncate very long lines */
|
---|
1054 | } while (*ptr != '\n' && *ptr != '\0');
|
---|
1055 |
|
---|
1056 | if (*ptr == '\0') /* Add '\n' to last line */
|
---|
1057 | *ptr = '\n';
|
---|
1058 | *++ptr = '\0'; /* Add '\0' */
|
---|
1059 | return OK;
|
---|
1060 | }
|
---|
1061 |
|
---|
1062 | /* Skip_lines () skips all same lines in all the files currently being merged.
|
---|
1063 | * It returns a pointer to the merge struct containing the smallest line.
|
---|
1064 | */
|
---|
1065 | MERGE *skip_lines(smallest, file_cnt)
|
---|
1066 | register MERGE *smallest;
|
---|
1067 | int file_cnt;
|
---|
1068 | {
|
---|
1069 | register int i;
|
---|
1070 | int ret;
|
---|
1071 |
|
---|
1072 | if (disabled == file_cnt - 1) /* We've had all */
|
---|
1073 | return smallest;
|
---|
1074 |
|
---|
1075 | for (i = 0; i < file_cnt; i++) {
|
---|
1076 | if (merge_f[i].fd == ERROR || smallest == &merge_f[i])
|
---|
1077 | continue; /* Don't check same file */
|
---|
1078 | while ((ret = compare(merge_f[i].line, smallest->line)) == 0) {
|
---|
1079 | if (read_line(&merge_f[i]) == ERROR) break; /* EOF */
|
---|
1080 | }
|
---|
1081 | if (ret < 0) /* Line wasn't smallest. Try again */
|
---|
1082 | return skip_lines(&merge_f[i], file_cnt);
|
---|
1083 | }
|
---|
1084 | return smallest;
|
---|
1085 | }
|
---|
1086 |
|
---|
1087 | /* Uniq_lines () prints only the uniq lines out of the fd of the merg struct. */
|
---|
1088 | void uniq_lines(merg)
|
---|
1089 | register MERGE *merg;
|
---|
1090 | {
|
---|
1091 | char lastline[LINE_SIZE]; /* Buffer to hold last line */
|
---|
1092 |
|
---|
1093 | for (;;) {
|
---|
1094 | put_line(merg->line); /* Print this line */
|
---|
1095 | copy(lastline, merg->line); /* and save it */
|
---|
1096 | if (read_line(merg) == ERROR) /* Read the next */
|
---|
1097 | return;
|
---|
1098 | /* Keep reading until lines duffer */
|
---|
1099 | while (compare(lastline, merg->line) == SAME)
|
---|
1100 | if (read_line(merg) == ERROR) return;
|
---|
1101 | }
|
---|
1102 |
|
---|
1103 | /* NOTREACHED */
|
---|
1104 | }
|
---|
1105 |
|
---|
1106 | /*
|
---|
1107 | * Check_file () checks if a file is sorted in order according to the arguments
|
---|
1108 | * given in main ().
|
---|
1109 | */
|
---|
1110 | void check_file(fd, file)
|
---|
1111 | int fd;
|
---|
1112 | char *file;
|
---|
1113 | {
|
---|
1114 | register MERGE *merg; /* 1 file only */
|
---|
1115 | char lastline[LINE_SIZE]; /* Save last line */
|
---|
1116 | register int ret; /* ret status of compare */
|
---|
1117 |
|
---|
1118 | if (fd == 0) file = "stdin";
|
---|
1119 | merg = (MERGE *) mem_top; /* Assign MERGE structure */
|
---|
1120 | merg->buffer = mem_top + sizeof(MERGE);
|
---|
1121 | merg->line = msbrk(LINE_SIZE);
|
---|
1122 | merg->cnt = merg->read_chars = 0;
|
---|
1123 | merg->fd = fd;
|
---|
1124 | buf_size = MEMORY_SIZE - sizeof(MERGE);
|
---|
1125 |
|
---|
1126 | if (read_line(merg) == ERROR) /* Read first line */
|
---|
1127 | return;
|
---|
1128 | copy(lastline, merg->line); /* and save it */
|
---|
1129 |
|
---|
1130 | for (;;) {
|
---|
1131 | if (read_line(merg) == ERROR) /* EOF reached */
|
---|
1132 | break;
|
---|
1133 | if ((ret = compare(lastline, merg->line)) > 0) {
|
---|
1134 | error(FALSE, "Disorder in file ", file);
|
---|
1135 | write(2, merg->line, length(merg->line));
|
---|
1136 | break;
|
---|
1137 | } else if (ret < 0) /* Copy if lines not equal */
|
---|
1138 | copy(lastline, merg->line);
|
---|
1139 | else if (uniq) {
|
---|
1140 | error(FALSE, "Non uniq line in file ", file);
|
---|
1141 | write(2, merg->line, length(merg->line));
|
---|
1142 | break;
|
---|
1143 | }
|
---|
1144 | }
|
---|
1145 |
|
---|
1146 | mbrk(mem_top); /* Reset mem */
|
---|
1147 | }
|
---|
1148 |
|
---|
1149 | /* Length () returns the length of the argument line including the linefeed. */
|
---|
1150 | int length(line)
|
---|
1151 | register char *line;
|
---|
1152 | {
|
---|
1153 | register int i = 1; /* Add linefeed */
|
---|
1154 |
|
---|
1155 | while (*line++ != '\n') i++;
|
---|
1156 | return i;
|
---|
1157 | }
|
---|
1158 |
|
---|
1159 | /* Copy () copies the src line into the dest line including linefeed. */
|
---|
1160 | void copy(dest, src)
|
---|
1161 | register char *dest, *src;
|
---|
1162 | {
|
---|
1163 | while ((*dest++ = *src++) != '\n');
|
---|
1164 | }
|
---|
1165 |
|
---|
1166 | /* Msbrk() does a sbrk() and checks the return value. */
|
---|
1167 | char *msbrk(size)
|
---|
1168 | register int size;
|
---|
1169 | {
|
---|
1170 | register char *address;
|
---|
1171 |
|
---|
1172 | if ((address = sbrk(size)) == (char *) -1)
|
---|
1173 | error(TRUE, "Not enough memory. Use chmem to allocate more", NIL_PTR);
|
---|
1174 | return address;
|
---|
1175 | }
|
---|
1176 |
|
---|
1177 | /* Mbrk() does a brk() and checks the return value. */
|
---|
1178 | void mbrk(address)
|
---|
1179 | char *address;
|
---|
1180 | {
|
---|
1181 | if (brk(address) == -1) error(TRUE, "Cannot reset memory", NIL_PTR);
|
---|
1182 | }
|
---|
1183 |
|
---|
1184 | void catch(dummy)
|
---|
1185 | int dummy; /* to satisfy the prototype */
|
---|
1186 | {
|
---|
1187 | register int i;
|
---|
1188 |
|
---|
1189 | signal(SIGINT, SIG_IGN);
|
---|
1190 | only_merge = FALSE;
|
---|
1191 | for (i = 0; i < 26; i++) (void) unlink(file_name(i));
|
---|
1192 | exit(2);
|
---|
1193 | }
|
---|