[9] | 1 | /* sort - sort a file of lines Author: Michiel Huisjes */
|
---|
| 2 |
|
---|
| 3 | /* SYNOPSIS:
|
---|
| 4 | * sort [-funbirdcmt'x'] [+beg_pos[opts] [-end_pos]] [-o outfile] [file]..
|
---|
| 5 | *
|
---|
| 6 | * [opts] can be any of
|
---|
| 7 | * -f : Fold upper case to lower.
|
---|
| 8 | * -n : Sort to numeric value (optional decimal point) implies -b
|
---|
| 9 | * -b : Skip leading blanks
|
---|
| 10 | * -i : Ignore chars outside ASCII range (040 - 0176)
|
---|
| 11 | * -r : Reverse the sense of comparisons.
|
---|
| 12 | * -d : Sort to dictionary order. Only letters, digits, comma's and points
|
---|
| 13 | * are compared.
|
---|
| 14 | * If any of these flags are used in [opts], then they override all global
|
---|
| 15 | * ordering for this field.
|
---|
| 16 | *
|
---|
| 17 | * I/O control flags are:
|
---|
| 18 | * -u : Print uniq lines only once.
|
---|
| 19 | * -c : Check if files are sorted in order.
|
---|
| 20 | * -m : Merge already sorted files.
|
---|
| 21 | * -o outfile : Name of output file. (Can be one of the input files).
|
---|
| 22 | * Default is stdout.
|
---|
| 23 | * - : Take stdin as input.
|
---|
| 24 | *
|
---|
| 25 | * Fields:
|
---|
| 26 | * -t'x' : Field separating character is 'x'
|
---|
| 27 | * +a.b : Start comparing at field 'a' with offset 'b'. A missing 'b' is
|
---|
| 28 | * taken to be 0.
|
---|
| 29 | * -a.b : Stop comparing at field 'a' with offset 'b'. A missing 'b' is
|
---|
| 30 | * taken to be 0.
|
---|
| 31 | * A missing -a.b means the rest of the line.
|
---|
| 32 | */
|
---|
| 33 |
|
---|
| 34 | #include <sys/types.h>
|
---|
| 35 | #include <sys/stat.h>
|
---|
| 36 | #include <fcntl.h>
|
---|
| 37 | #include <signal.h>
|
---|
| 38 | #include <unistd.h>
|
---|
| 39 | #include <stdlib.h>
|
---|
| 40 | #include <string.h>
|
---|
| 41 | #include <stdio.h>
|
---|
| 42 | #include <limits.h>
|
---|
| 43 |
|
---|
| 44 | #define OPEN_FILES (OPEN_MAX-4) /* Nr of open files per process */
|
---|
| 45 | #if __minix_vmd
|
---|
| 46 | #define MEMORY_SIZE (1024 * 1024)
|
---|
| 47 | #else
|
---|
| 48 | #define MEMORY_SIZE ((10 * sizeof(int)) * 1024)
|
---|
| 49 | #endif
|
---|
| 50 | /* Total mem_size */
|
---|
| 51 | #define LINE_SIZE (1024 >> 1) /* Max length of a line */
|
---|
| 52 | #define IO_SIZE (2 * 1024) /* Size of buffered output */
|
---|
| 53 | #define STD_OUT 1 /* Fd of terminal */
|
---|
| 54 |
|
---|
| 55 | /* Return status of functions */
|
---|
| 56 | #define OK 0
|
---|
| 57 | #define ERROR -1
|
---|
| 58 | #define NIL_PTR ((char *) 0)
|
---|
| 59 |
|
---|
| 60 | /* Compare return values */
|
---|
| 61 | #define LOWER -1
|
---|
| 62 | #define SAME 0
|
---|
| 63 | #define HIGHER 1
|
---|
| 64 |
|
---|
| 65 | /* Table definitions. */
|
---|
| 66 | #define DICT 0x001 /* Alpha, numeric, letters and . */
|
---|
| 67 | #define ASCII 0x002 /* All between ' ' and '~' */
|
---|
| 68 | #define BLANK 0x004 /* ' ' and '\t' */
|
---|
| 69 | #define DIGIT 0x008 /* 0-9 */
|
---|
| 70 | #define UPPER 0x010 /* A-Z */
|
---|
| 71 |
|
---|
| 72 | typedef int BOOL;
|
---|
| 73 |
|
---|
| 74 | #define FALSE 0
|
---|
| 75 | #define TRUE 1
|
---|
| 76 |
|
---|
| 77 | typedef struct {
|
---|
| 78 | int fd; /* Fd of file */
|
---|
| 79 | char *buffer; /* Buffer for reads */
|
---|
| 80 | int read_chars; /* Nr of chars actually read in buffer */
|
---|
| 81 | int cnt; /* Nr of chars taken out of buffer */
|
---|
| 82 | char *line; /* Contains line currently used */
|
---|
| 83 | } MERGE;
|
---|
| 84 |
|
---|
| 85 | #define NIL_MERGE ((MERGE *) 0)
|
---|
| 86 | MERGE merge_f[OPEN_FILES]; /* Merge structs */
|
---|
| 87 | int buf_size; /* Size of core available for each struct */
|
---|
| 88 |
|
---|
| 89 | #define FIELDS_LIMIT 10 /* 1 global + 9 user */
|
---|
| 90 | #define GLOBAL 0
|
---|
| 91 |
|
---|
| 92 | typedef struct {
|
---|
| 93 | int beg_field, beg_pos; /* Begin field + offset */
|
---|
| 94 | int end_field, end_pos; /* End field + offset. ERROR == EOLN */
|
---|
| 95 | BOOL reverse; /* TRUE if rev. flag set on this field */
|
---|
| 96 | BOOL blanks;
|
---|
| 97 | BOOL dictionary;
|
---|
| 98 | BOOL fold_case;
|
---|
| 99 | BOOL ascii;
|
---|
| 100 | BOOL numeric;
|
---|
| 101 | } FIELD;
|
---|
| 102 |
|
---|
| 103 | /* Field declarations. A total of FILEDS_LIMIT is allowed */
|
---|
| 104 | FIELD fields[FIELDS_LIMIT];
|
---|
| 105 | int field_cnt; /* Nr of field actually assigned */
|
---|
| 106 |
|
---|
| 107 | /* Various output control flags */
|
---|
| 108 | BOOL check = FALSE;
|
---|
| 109 | BOOL only_merge = FALSE;
|
---|
| 110 | BOOL uniq = FALSE;
|
---|
| 111 |
|
---|
| 112 | char *mem_top; /* Mem_top points to lowest pos of memory. */
|
---|
| 113 | char *cur_pos; /* First free position in mem */
|
---|
| 114 | char **line_table; /* Pointer to the internal line table */
|
---|
| 115 | BOOL in_core = TRUE; /* Set if input cannot all be sorted in core */
|
---|
| 116 |
|
---|
| 117 | /* Place where temp_files should be made */
|
---|
| 118 | char temp_files[] = "/tmp/sort.XXXXX.XX";
|
---|
| 119 | char *output_file; /* Name of output file */
|
---|
| 120 | int out_fd; /* Fd to output file (could be STD_OUT) */
|
---|
| 121 | char out_buffer[IO_SIZE]; /* For buffered output */
|
---|
| 122 |
|
---|
| 123 | char **argptr; /* Pointer to argv structure */
|
---|
| 124 | int args_offset; /* Nr of args spilled on options */
|
---|
| 125 | int args_limit; /* Nr of args given */
|
---|
| 126 |
|
---|
| 127 | char separator; /* Char that separates fields */
|
---|
| 128 | int nr_of_files = 0; /* Nr_of_files to be merged */
|
---|
| 129 | int disabled; /* Nr of files done */
|
---|
| 130 |
|
---|
| 131 | char USAGE[] = "Usage: sort [-funbirdcmt'x'] [+beg_pos [-end_pos]] [-o outfile] [file] ..";
|
---|
| 132 |
|
---|
| 133 | /* Forward declarations */
|
---|
| 134 | _PROTOTYPE(int main, (int argc, char **argv));
|
---|
| 135 | _PROTOTYPE(void get_opts, (char *ptr, FIELD * field));
|
---|
| 136 | _PROTOTYPE(void new_field, (FIELD * field, int *offset, BOOL beg_fl));
|
---|
| 137 | _PROTOTYPE(void adjust_options, (FIELD * field));
|
---|
| 138 | _PROTOTYPE(void error, (BOOL quit, char *message, char *arg));
|
---|
| 139 | _PROTOTYPE(void open_outfile, (void));
|
---|
| 140 | _PROTOTYPE(void get_file, (int fd, off_t size));
|
---|
| 141 | _PROTOTYPE(int last_line, (void));
|
---|
| 142 | _PROTOTYPE(void print_table, (int fd));
|
---|
| 143 | _PROTOTYPE(char *file_name, (int nr));
|
---|
| 144 | _PROTOTYPE(void mread, (int fd, char *address, int bytes));
|
---|
| 145 | _PROTOTYPE(void mwrite, (int fd, char *address, int bytes));
|
---|
| 146 | _PROTOTYPE(void sort, (void));
|
---|
| 147 | _PROTOTYPE(void sort_table, (int nel));
|
---|
| 148 | _PROTOTYPE(void incr, (int si, int ei));
|
---|
| 149 | _PROTOTYPE(int cmp_fields, (char *el1, char *el2));
|
---|
| 150 | _PROTOTYPE(void build_field, (char *dest, FIELD * field, char *src));
|
---|
| 151 | _PROTOTYPE(char *skip_fields, (char *str, int nf));
|
---|
| 152 | _PROTOTYPE(int compare, (char *el1, char *el2));
|
---|
| 153 | _PROTOTYPE(int cmp, (unsigned char *el1, unsigned char *el2, FIELD * field));
|
---|
| 154 | _PROTOTYPE(int digits, (char *str1, char *str2, BOOL check_sign));
|
---|
| 155 | _PROTOTYPE(void files_merge, (int file_cnt));
|
---|
| 156 | _PROTOTYPE(void merge, (int start_file, int limit_file));
|
---|
| 157 | _PROTOTYPE(void put_line, (char *line));
|
---|
| 158 | _PROTOTYPE(MERGE * print, (MERGE * merg, int file_cnt));
|
---|
| 159 | _PROTOTYPE(int read_line, (MERGE * merg));
|
---|
| 160 | _PROTOTYPE(MERGE * skip_lines, (MERGE * smallest, int file_cnt));
|
---|
| 161 | _PROTOTYPE(void uniq_lines, (MERGE * merg));
|
---|
| 162 | _PROTOTYPE(void check_file, (int fd, char *file));
|
---|
| 163 | _PROTOTYPE(int length, (char *line));
|
---|
| 164 | _PROTOTYPE(void copy, (char *dest, char *src));
|
---|
| 165 | _PROTOTYPE(char *msbrk, (int size));
|
---|
| 166 | _PROTOTYPE(void mbrk, (char *address));
|
---|
| 167 | _PROTOTYPE(void catch, (int dummy));
|
---|
| 168 |
|
---|
| 169 | /* Table of all chars. 0 means no special meaning. */
|
---|
| 170 | char table[256] = {
|
---|
| 171 | /* '^@' to space */
|
---|
| 172 | 0, 0, 0, 0, 0, 0, 0, 0,
|
---|
| 173 | 0, BLANK | DICT, 0, 0, 0, 0, 0, 0,
|
---|
| 174 | 0, 0, 0, 0, 0, 0, 0, 0,
|
---|
| 175 | 0, 0, 0, 0, 0, 0, 0, 0,
|
---|
| 176 |
|
---|
| 177 | /* Space to '0' */
|
---|
| 178 | BLANK | DICT | ASCII, ASCII, ASCII, ASCII, ASCII, ASCII, ASCII,
|
---|
| 179 | ASCII, ASCII, ASCII, ASCII, ASCII, ASCII, ASCII,
|
---|
| 180 | ASCII, ASCII,
|
---|
| 181 |
|
---|
| 182 | /* '0' until '9' */
|
---|
| 183 | DIGIT | DICT | ASCII, DIGIT | DICT | ASCII, DIGIT | DICT | ASCII,
|
---|
| 184 | DIGIT | DICT | ASCII, DIGIT | DICT | ASCII, DIGIT | DICT | ASCII,
|
---|
| 185 | DIGIT | DICT | ASCII, DIGIT | DICT | ASCII, DIGIT | DICT | ASCII,
|
---|
| 186 | DIGIT | DICT | ASCII,
|
---|
| 187 |
|
---|
| 188 | /* ASCII from ':' to '@' */
|
---|
| 189 | ASCII, ASCII, ASCII, ASCII, ASCII, ASCII, ASCII,
|
---|
| 190 |
|
---|
| 191 | /* Upper case letters 'A' to 'Z' */
|
---|
| 192 | UPPER | DICT | ASCII, UPPER | DICT | ASCII, UPPER | DICT | ASCII,
|
---|
| 193 | UPPER | DICT | ASCII, UPPER | DICT | ASCII, UPPER | DICT | ASCII,
|
---|
| 194 | UPPER | DICT | ASCII, UPPER | DICT | ASCII, UPPER | DICT | ASCII,
|
---|
| 195 | UPPER | DICT | ASCII, UPPER | DICT | ASCII, UPPER | DICT | ASCII,
|
---|
| 196 | UPPER | DICT | ASCII, UPPER | DICT | ASCII, UPPER | DICT | ASCII,
|
---|
| 197 | UPPER | DICT | ASCII, UPPER | DICT | ASCII, UPPER | DICT | ASCII,
|
---|
| 198 | UPPER | DICT | ASCII, UPPER | DICT | ASCII, UPPER | DICT | ASCII,
|
---|
| 199 | UPPER | DICT | ASCII, UPPER | DICT | ASCII, UPPER | DICT | ASCII,
|
---|
| 200 | UPPER | DICT | ASCII, UPPER | DICT | ASCII,
|
---|
| 201 |
|
---|
| 202 | /* ASCII from '[' to '`' */
|
---|
| 203 | ASCII, ASCII, ASCII, ASCII, ASCII, ASCII,
|
---|
| 204 |
|
---|
| 205 | /* Lower case letters from 'a' to 'z' */
|
---|
| 206 | DICT | ASCII, DICT | ASCII, DICT | ASCII, DICT | ASCII,
|
---|
| 207 | DICT | ASCII, DICT | ASCII, DICT | ASCII, DICT | ASCII,
|
---|
| 208 | DICT | ASCII, DICT | ASCII, DICT | ASCII, DICT | ASCII,
|
---|
| 209 | DICT | ASCII, DICT | ASCII, DICT | ASCII, DICT | ASCII,
|
---|
| 210 | DICT | ASCII, DICT | ASCII, DICT | ASCII, DICT | ASCII,
|
---|
| 211 | DICT | ASCII, DICT | ASCII, DICT | ASCII, DICT | ASCII,
|
---|
| 212 | DICT | ASCII, DICT | ASCII,
|
---|
| 213 |
|
---|
| 214 | /* ASCII from '{' to '~' */
|
---|
| 215 | ASCII, ASCII, ASCII, ASCII,
|
---|
| 216 |
|
---|
| 217 | /* Stuff from -1 to -177 */
|
---|
| 218 | 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
---|
| 219 | 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
---|
| 220 | 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
---|
| 221 | 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
---|
| 222 | 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
---|
| 223 | 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
---|
| 224 | 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
---|
| 225 | 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
---|
| 226 | 0, 0, 0, 0, 0, 0, 0
|
---|
| 227 | };
|
---|
| 228 |
|
---|
| 229 |
|
---|
| 230 | /*
|
---|
| 231 | * Get_opts () assigns the options into the field structure as described in ptr.
|
---|
| 232 | * This field structure could be the GLOBAL one.
|
---|
| 233 | */
|
---|
| 234 | void get_opts(ptr, field)
|
---|
| 235 | register char *ptr;
|
---|
| 236 | register FIELD *field;
|
---|
| 237 | {
|
---|
| 238 | switch (*ptr) {
|
---|
| 239 | case 'b': /* Skip leading blanks */
|
---|
| 240 | field->blanks = TRUE;
|
---|
| 241 | break;
|
---|
| 242 | case 'd': /* Dictionary order */
|
---|
| 243 | field->dictionary = TRUE;
|
---|
| 244 | break;
|
---|
| 245 | case 'f': /* Fold upper case to lower */
|
---|
| 246 | field->fold_case = TRUE;
|
---|
| 247 | break;
|
---|
| 248 | case 'i': /* Skip chars outside ' ' '~' */
|
---|
| 249 | field->ascii = TRUE;
|
---|
| 250 | break;
|
---|
| 251 | case 'n': /* Sort on numeric */
|
---|
| 252 | field->numeric = TRUE;
|
---|
| 253 | field->blanks = TRUE;
|
---|
| 254 | break;
|
---|
| 255 | case 'r': /* Reverse comparisons */
|
---|
| 256 | field->reverse = TRUE;
|
---|
| 257 | break;
|
---|
| 258 | default: /* Illegal options */
|
---|
| 259 | error(TRUE, USAGE, NIL_PTR);
|
---|
| 260 | }
|
---|
| 261 | }
|
---|
| 262 |
|
---|
| 263 | /* New_field () assigns a new field as described by the arguments.
|
---|
| 264 | * A field description is of the form: +a.b[opts] -c.d, where b and d, as well
|
---|
| 265 | * as -c.d and [opts] are optional. Nr before digit is field nr. Nr after digit
|
---|
| 266 | * is offset from field.
|
---|
| 267 | */
|
---|
| 268 | void new_field(field, offset, beg_fl)
|
---|
| 269 | register FIELD *field; /* Field to assign */
|
---|
| 270 | int *offset; /* Offset in argv structure */
|
---|
| 271 | BOOL beg_fl; /* Assign beg or end of field */
|
---|
| 272 | {
|
---|
| 273 | register char *ptr;
|
---|
| 274 |
|
---|
| 275 | ptr = argptr[*offset];
|
---|
| 276 | *offset += 1; /* Incr offset to next arg */
|
---|
| 277 | ptr++;
|
---|
| 278 |
|
---|
| 279 | if (beg_fl)
|
---|
| 280 | field->beg_field = atoi(ptr); /* Assign int of first field */
|
---|
| 281 | else
|
---|
| 282 | field->end_field = atoi(ptr);
|
---|
| 283 |
|
---|
| 284 | while (table[*ptr] & DIGIT) /* Skip all digits */
|
---|
| 285 | ptr++;
|
---|
| 286 |
|
---|
| 287 | if (*ptr == '.') { /* Check for offset */
|
---|
| 288 | ptr++;
|
---|
| 289 | if (beg_fl)
|
---|
| 290 | field->beg_pos = atoi(ptr);
|
---|
| 291 | else
|
---|
| 292 | field->end_pos = atoi(ptr);
|
---|
| 293 | while (table[*ptr] & DIGIT) /* Skip digits */
|
---|
| 294 | ptr++;
|
---|
| 295 | }
|
---|
| 296 | if (beg_fl) {
|
---|
| 297 | while (*ptr != '\0') /* Check options after field */
|
---|
| 298 | get_opts(ptr++, field);
|
---|
| 299 | }
|
---|
| 300 | if (beg_fl) { /* Check for end pos */
|
---|
| 301 | ptr = argptr[*offset];
|
---|
| 302 | if (ptr && *ptr == '-' && ((table[*(ptr + 1)] & DIGIT) || *(ptr + 1) == '.')) {
|
---|
| 303 | new_field(field, offset, FALSE);
|
---|
| 304 | if (field->beg_field > field->end_field)
|
---|
| 305 | error(TRUE, "End field is before start field!", NIL_PTR);
|
---|
| 306 | } else /* No end pos. */
|
---|
| 307 | field->end_field = ERROR;
|
---|
| 308 | }
|
---|
| 309 | }
|
---|
| 310 |
|
---|
| 311 | int main(argc, argv)
|
---|
| 312 | int argc;
|
---|
| 313 | char *argv[];
|
---|
| 314 | {
|
---|
| 315 | int arg_count = 1; /* Offset in argv */
|
---|
| 316 | struct stat st;
|
---|
| 317 | register char *ptr; /* Ptr to *argv in use */
|
---|
| 318 | register int fd;
|
---|
| 319 | int pid, pow;
|
---|
| 320 |
|
---|
| 321 | argptr = argv;
|
---|
| 322 | cur_pos = mem_top = msbrk(MEMORY_SIZE); /* Find lowest mem. location */
|
---|
| 323 |
|
---|
| 324 | while (arg_count < argc && ((ptr = argv[arg_count])[0] == '-' || *ptr == '+')) {
|
---|
| 325 | if (*ptr == '-' && *(ptr + 1) == '\0') /* "-" means stdin */
|
---|
| 326 | break;
|
---|
| 327 | if (*ptr == '+') { /* Assign field. */
|
---|
| 328 | if (++field_cnt == FIELDS_LIMIT)
|
---|
| 329 | error(TRUE, "Too many fields", NIL_PTR);
|
---|
| 330 | new_field(&fields[field_cnt], &arg_count, TRUE);
|
---|
| 331 | } else { /* Get output options */
|
---|
| 332 | while (*++ptr) {
|
---|
| 333 | switch (*ptr) {
|
---|
| 334 | case 'c': /* Only check file */
|
---|
| 335 | check = TRUE;
|
---|
| 336 | break;
|
---|
| 337 | case 'm': /* Merge (sorted) files */
|
---|
| 338 | only_merge = TRUE;
|
---|
| 339 | break;
|
---|
| 340 | case 'u': /* Only give uniq lines */
|
---|
| 341 | uniq = TRUE;
|
---|
| 342 | break;
|
---|
| 343 | case 'o': /* Name of output file */
|
---|
| 344 | output_file = argv[++arg_count];
|
---|
| 345 | break;
|
---|
| 346 | case 't': /* Field separator */
|
---|
| 347 | ptr++;
|
---|
| 348 | separator = *ptr;
|
---|
| 349 | break;
|
---|
| 350 | default: /* Sort options */
|
---|
| 351 | get_opts(ptr, &fields[GLOBAL]);
|
---|
| 352 | }
|
---|
| 353 | }
|
---|
| 354 | arg_count++;
|
---|
| 355 | }
|
---|
| 356 | }
|
---|
| 357 |
|
---|
| 358 | for (fd = 1; fd <= field_cnt; fd++) adjust_options(&fields[fd]);
|
---|
| 359 |
|
---|
| 360 | /* Create name of tem_files 'sort.pid.aa' */
|
---|
| 361 | ptr = &temp_files[10];
|
---|
| 362 | pid = getpid();
|
---|
| 363 | pow = 10000;
|
---|
| 364 | while (pow != 0) {
|
---|
| 365 | *ptr++ = pid / pow + '0';
|
---|
| 366 | pid %= pow;
|
---|
| 367 | pow /= 10;
|
---|
| 368 | }
|
---|
| 369 |
|
---|
| 370 | signal(SIGINT, catch);
|
---|
| 371 |
|
---|
| 372 | /* Only merge files. Set up */
|
---|
| 373 | if (only_merge) {
|
---|
| 374 | args_limit = args_offset = arg_count;
|
---|
| 375 | while (argv[args_limit] != NIL_PTR)
|
---|
| 376 | args_limit++; /* Find nr of args */
|
---|
| 377 | files_merge(args_limit - arg_count);
|
---|
| 378 | exit(0);
|
---|
| 379 | }
|
---|
| 380 | if (arg_count == argc) { /* No args left. Use stdin */
|
---|
| 381 | if (check)
|
---|
| 382 | check_file(0, NIL_PTR);
|
---|
| 383 | else
|
---|
| 384 | get_file(0, (off_t) 0);
|
---|
| 385 | } else
|
---|
| 386 | while (arg_count < argc) { /* Sort or check args */
|
---|
| 387 | if (strcmp(argv[arg_count], "-") == 0)
|
---|
| 388 | fd = 0;
|
---|
| 389 | else if (stat(argv[arg_count], &st) < 0) {
|
---|
| 390 | error(FALSE, "Cannot find ", argv[arg_count++]);
|
---|
| 391 | continue;
|
---|
| 392 | }
|
---|
| 393 |
|
---|
| 394 | /* Open files */
|
---|
| 395 | else if ((fd = open(argv[arg_count], O_RDONLY)) < 0) {
|
---|
| 396 | error(FALSE, "Cannot open ", argv[arg_count++]);
|
---|
| 397 | continue;
|
---|
| 398 | }
|
---|
| 399 | if (check)
|
---|
| 400 | check_file(fd, argv[arg_count]);
|
---|
| 401 | else /* Get_file reads whole file */
|
---|
| 402 | get_file(fd, st.st_size);
|
---|
| 403 | arg_count++;
|
---|
| 404 | }
|
---|
| 405 |
|
---|
| 406 | if (check) exit(0);
|
---|
| 407 |
|
---|
| 408 | sort(); /* Sort whatever is left */
|
---|
| 409 |
|
---|
| 410 | if (nr_of_files == 1) /* Only one file sorted -> don't merge */
|
---|
| 411 | exit(0);
|
---|
| 412 |
|
---|
| 413 | files_merge(nr_of_files);
|
---|
| 414 | return(0);
|
---|
| 415 | }
|
---|
| 416 |
|
---|
| 417 | /* Adjust_options() assigns all global variables set also in the fields
|
---|
| 418 | * assigned.
|
---|
| 419 | */
|
---|
| 420 | void adjust_options(field)
|
---|
| 421 | register FIELD *field;
|
---|
| 422 | {
|
---|
| 423 | register FIELD *gfield = &fields[GLOBAL];
|
---|
| 424 |
|
---|
| 425 | if (gfield->reverse) field->reverse = TRUE;
|
---|
| 426 | if (gfield->blanks) field->blanks = TRUE;
|
---|
| 427 | if (gfield->dictionary) field->dictionary = TRUE;
|
---|
| 428 | if (gfield->fold_case) field->fold_case = TRUE;
|
---|
| 429 | if (gfield->ascii) field->ascii = TRUE;
|
---|
| 430 | if (gfield->numeric) field->numeric = TRUE;
|
---|
| 431 | }
|
---|
| 432 |
|
---|
| 433 | /* Error () prints the error message on stderr and exits if quit == TRUE. */
|
---|
| 434 | void error(quit, message, arg)
|
---|
| 435 | register BOOL quit;
|
---|
| 436 | register char *message, *arg;
|
---|
| 437 | {
|
---|
| 438 | write(2, message, strlen(message));
|
---|
| 439 | if (arg != NIL_PTR) write(2, arg, strlen(arg));
|
---|
| 440 | perror(" ");
|
---|
| 441 | if (quit) exit(1);
|
---|
| 442 | }
|
---|
| 443 |
|
---|
| 444 | /* Open_outfile () assigns to out_fd the fd where the output must go when all
|
---|
| 445 | * the sorting is done.
|
---|
| 446 | */
|
---|
| 447 | void open_outfile()
|
---|
| 448 | {
|
---|
| 449 | if (output_file == NIL_PTR)
|
---|
| 450 | out_fd = STD_OUT;
|
---|
| 451 | else if ((out_fd = creat(output_file, 0644)) < 0)
|
---|
| 452 | error(TRUE, "Cannot creat ", output_file);
|
---|
| 453 | }
|
---|
| 454 |
|
---|
| 455 | /* Get_file reads the whole file of filedescriptor fd. If the file is too big
|
---|
| 456 | * to keep in core, a partial sort is done, and the output is stashed somewhere.
|
---|
| 457 | */
|
---|
| 458 | void get_file(fd, size)
|
---|
| 459 | int fd; /* Fd of file to read */
|
---|
| 460 | register off_t size; /* Size of file */
|
---|
| 461 | {
|
---|
| 462 | register int i;
|
---|
| 463 | int rest; /* Rest in memory */
|
---|
| 464 | char save_ch; /* Used in stdin readings */
|
---|
| 465 |
|
---|
| 466 | rest = MEMORY_SIZE - (cur_pos - mem_top);
|
---|
| 467 | if (fd == 0) { /* We're reding stdin */
|
---|
| 468 | while ((i = read(0, cur_pos, rest)) > 0) {
|
---|
| 469 | if ((cur_pos - mem_top) + i == MEMORY_SIZE) {
|
---|
| 470 | in_core = FALSE;
|
---|
| 471 | i = last_line(); /* End of last line */
|
---|
| 472 | save_ch = mem_top[i];
|
---|
| 473 | mem_top[i] = '\0';
|
---|
| 474 | sort(); /* Sort core */
|
---|
| 475 | mem_top[i] = save_ch; /* Restore erased char */
|
---|
| 476 | /* Restore last (half read) line */
|
---|
| 477 | for (rest = 0; i + rest != MEMORY_SIZE; rest++)
|
---|
| 478 | mem_top[rest] = mem_top[i + rest];
|
---|
| 479 | /* Assign current pos. in memory */
|
---|
| 480 | cur_pos = &mem_top[rest];
|
---|
| 481 | } else { /* Fits, just assign position in mem. */
|
---|
| 482 | cur_pos = cur_pos + i;
|
---|
| 483 | *cur_pos = '\0';
|
---|
| 484 | }
|
---|
| 485 |
|
---|
| 486 | /* Calculate rest of mem */
|
---|
| 487 | rest = MEMORY_SIZE - (cur_pos - mem_top);
|
---|
| 488 | }
|
---|
| 489 | }
|
---|
| 490 |
|
---|
| 491 | /* Reading file. Check size */
|
---|
| 492 | else if (size > rest) { /* Won't fit */
|
---|
| 493 | mread(fd, cur_pos, rest);
|
---|
| 494 | in_core = FALSE;
|
---|
| 495 | i = last_line(); /* Get pos. of last line */
|
---|
| 496 | mem_top[i] = '\0'; /* Truncate */
|
---|
| 497 | (void) lseek(fd, (off_t) (i - MEMORY_SIZE), SEEK_CUR); /* Do this next time */
|
---|
| 498 | size = size - rest - i + MEMORY_SIZE; /* Calculate rest */
|
---|
| 499 | cur_pos = mem_top; /* Reset mem */
|
---|
| 500 | sort(); /* Sort core */
|
---|
| 501 | get_file(fd, size); /* Get rest of file */
|
---|
| 502 | } else { /* Fits. Just read in */
|
---|
| 503 | rest = size;
|
---|
| 504 | mread(fd, cur_pos, rest);
|
---|
| 505 | cur_pos = cur_pos + rest; /* Reassign cur_pos */
|
---|
| 506 | *cur_pos = '\0';
|
---|
| 507 | (void) close(fd); /* File completed */
|
---|
| 508 | }
|
---|
| 509 | }
|
---|
| 510 |
|
---|
| 511 | /* Last_line () find the last line in core and retuns the offset from the top
|
---|
| 512 | * of the memory.
|
---|
| 513 | */
|
---|
| 514 | int last_line()
|
---|
| 515 | {
|
---|
| 516 | register int i;
|
---|
| 517 |
|
---|
| 518 | for (i = MEMORY_SIZE - 2; i > 0; i--)
|
---|
| 519 | if (mem_top[i] == '\n') break;
|
---|
| 520 | return i + 1;
|
---|
| 521 | }
|
---|
| 522 |
|
---|
| 523 | /* Print_table prints the line table in the given file_descriptor. If the fd
|
---|
| 524 | * equals ERROR, it opens a temp_file itself.
|
---|
| 525 | */
|
---|
| 526 | void print_table(fd)
|
---|
| 527 | int fd;
|
---|
| 528 | {
|
---|
| 529 | register char **line_ptr; /* Ptr in line_table */
|
---|
| 530 | register char *ptr; /* Ptr to line */
|
---|
| 531 | int index = 0; /* Index in output buffer */
|
---|
| 532 |
|
---|
| 533 | if (fd == ERROR) {
|
---|
| 534 | if ((fd = creat(file_name(nr_of_files), 0644)) < 0)
|
---|
| 535 | error(TRUE, "Cannot creat ", file_name(nr_of_files));
|
---|
| 536 | }
|
---|
| 537 | for (line_ptr = line_table; *line_ptr != NIL_PTR; line_ptr++) {
|
---|
| 538 | ptr = *line_ptr;
|
---|
| 539 | /* Skip all same lines if uniq is set */
|
---|
| 540 | if (uniq && *(line_ptr + 1) != NIL_PTR) {
|
---|
| 541 | if (compare(ptr, *(line_ptr + 1)) == SAME) continue;
|
---|
| 542 | }
|
---|
| 543 | do { /* Print line in a buffered way */
|
---|
| 544 | out_buffer[index++] = *ptr;
|
---|
| 545 | if (index == IO_SIZE) {
|
---|
| 546 | mwrite(fd, out_buffer, IO_SIZE);
|
---|
| 547 | index = 0;
|
---|
| 548 | }
|
---|
| 549 | } while (*ptr++ != '\n');
|
---|
| 550 | }
|
---|
| 551 | mwrite(fd, out_buffer, index);/* Flush buffer */
|
---|
| 552 | (void) close(fd); /* Close file */
|
---|
| 553 | nr_of_files++; /* Increment nr_of_files to merge */
|
---|
| 554 | }
|
---|
| 555 |
|
---|
| 556 | /* File_name () returns the nr argument from the argument list, or a uniq
|
---|
| 557 | * filename if the nr is too high, or the arguments were not merge files.
|
---|
| 558 | */
|
---|
| 559 | char *file_name(nr)
|
---|
| 560 | register int nr;
|
---|
| 561 | {
|
---|
| 562 | if (only_merge) {
|
---|
| 563 | if (args_offset + nr < args_limit) return argptr[args_offset + nr];
|
---|
| 564 | }
|
---|
| 565 | temp_files[16] = nr / 26 + 'a';
|
---|
| 566 | temp_files[17] = nr % 26 + 'a';
|
---|
| 567 |
|
---|
| 568 | return temp_files;
|
---|
| 569 | }
|
---|
| 570 |
|
---|
| 571 | /* Mread () performs a normal read (), but checks the return value. */
|
---|
| 572 | void mread(fd, address, bytes)
|
---|
| 573 | int fd;
|
---|
| 574 | char *address;
|
---|
| 575 | register int bytes;
|
---|
| 576 | {
|
---|
| 577 | if (read(fd, address, bytes) < 0 && bytes != 0)
|
---|
| 578 | error(TRUE, "Read error", NIL_PTR);
|
---|
| 579 | }
|
---|
| 580 |
|
---|
| 581 | /* Mwrite () performs a normal write (), but checks the return value. */
|
---|
| 582 | void mwrite(fd, address, bytes)
|
---|
| 583 | int fd;
|
---|
| 584 | char *address;
|
---|
| 585 | register int bytes;
|
---|
| 586 | {
|
---|
| 587 | if (write(fd, address, bytes) != bytes && bytes != 0)
|
---|
| 588 | error(TRUE, "Write error", NIL_PTR);
|
---|
| 589 | }
|
---|
| 590 |
|
---|
| 591 | /* Sort () sorts the input in memory starting at mem_top. */
|
---|
| 592 | void sort()
|
---|
| 593 | {
|
---|
| 594 | register char *ptr = mem_top;
|
---|
| 595 | register int count = 0;
|
---|
| 596 |
|
---|
| 597 | /* Count number of lines in memory */
|
---|
| 598 | while (*ptr) {
|
---|
| 599 | if (*ptr++ == '\n') count++;
|
---|
| 600 | }
|
---|
| 601 |
|
---|
| 602 | /* Set up the line table */
|
---|
| 603 | line_table = (char **) msbrk(count * sizeof(char *) + sizeof(char *));
|
---|
| 604 |
|
---|
| 605 | count = 1;
|
---|
| 606 | ptr = line_table[0] = mem_top;
|
---|
| 607 | while (*ptr) {
|
---|
| 608 | if (*ptr++ == '\n') line_table[count++] = ptr;
|
---|
| 609 | }
|
---|
| 610 |
|
---|
| 611 | line_table[count - 1] = NIL_PTR;
|
---|
| 612 |
|
---|
| 613 | /* Sort the line table */
|
---|
| 614 | sort_table(count - 1);
|
---|
| 615 |
|
---|
| 616 | /* Stash output somewhere */
|
---|
| 617 | if (in_core) {
|
---|
| 618 | open_outfile();
|
---|
| 619 | print_table(out_fd);
|
---|
| 620 | } else
|
---|
| 621 | print_table(ERROR);
|
---|
| 622 |
|
---|
| 623 | /* Free line table */
|
---|
| 624 | mbrk((char *) line_table);
|
---|
| 625 | }
|
---|
| 626 |
|
---|
| 627 | /* Sort_table () sorts the line table consisting of nel elements. */
|
---|
| 628 | void sort_table(nel)
|
---|
| 629 | register int nel;
|
---|
| 630 | {
|
---|
| 631 | char *tmp;
|
---|
| 632 | register int i;
|
---|
| 633 |
|
---|
| 634 | /* Make heap */
|
---|
| 635 | for (i = (nel >> 1); i >= 1; i--) incr(i, nel);
|
---|
| 636 |
|
---|
| 637 | /* Sort from heap */
|
---|
| 638 | for (i = nel; i > 1; i--) {
|
---|
| 639 | tmp = line_table[0];
|
---|
| 640 | line_table[0] = line_table[i - 1];
|
---|
| 641 | line_table[i - 1] = tmp;
|
---|
| 642 | incr(1, i - 1);
|
---|
| 643 | }
|
---|
| 644 | }
|
---|
| 645 |
|
---|
| 646 | /* Incr () increments the heap. */
|
---|
| 647 | void incr(si, ei)
|
---|
| 648 | register int si, ei;
|
---|
| 649 | {
|
---|
| 650 | char *tmp;
|
---|
| 651 |
|
---|
| 652 | while (si <= (ei >> 1)) {
|
---|
| 653 | si <<= 1;
|
---|
| 654 | if (si + 1 <= ei && compare(line_table[si - 1], line_table[si]) <= 0)
|
---|
| 655 | si++;
|
---|
| 656 | if (compare(line_table[(si >> 1) - 1], line_table[si - 1]) >= 0)
|
---|
| 657 | return;
|
---|
| 658 | tmp = line_table[(si >> 1) - 1];
|
---|
| 659 | line_table[(si >> 1) - 1] = line_table[si - 1];
|
---|
| 660 | line_table[si - 1] = tmp;
|
---|
| 661 | }
|
---|
| 662 | }
|
---|
| 663 |
|
---|
| 664 | /* Cmp_fields builds new lines out of the lines pointed to by el1 and el2 and
|
---|
| 665 | * puts it into the line1 and line2 arrays. It then calls the cmp () routine
|
---|
| 666 | * with the field describing the arguments.
|
---|
| 667 | */
|
---|
| 668 | int cmp_fields(el1, el2)
|
---|
| 669 | register char *el1, *el2;
|
---|
| 670 | {
|
---|
| 671 | int i, ret;
|
---|
| 672 | char line1[LINE_SIZE], line2[LINE_SIZE];
|
---|
| 673 |
|
---|
| 674 | for (i = 0; i < field_cnt; i++) { /* Setup line parts */
|
---|
| 675 | build_field(line1, &fields[i + 1], el1);
|
---|
| 676 | build_field(line2, &fields[i + 1], el2);
|
---|
| 677 | if ((ret = cmp((unsigned char *) line1, (unsigned char *) line2,
|
---|
| 678 | &fields[i + 1])) != SAME)
|
---|
| 679 | break; /* If equal, try next field */
|
---|
| 680 | }
|
---|
| 681 |
|
---|
| 682 | /* Check for reverse flag */
|
---|
| 683 | if (i != field_cnt && fields[i + 1].reverse) return -ret;
|
---|
| 684 |
|
---|
| 685 | /* Else return the last return value of cmp () */
|
---|
| 686 | return ret;
|
---|
| 687 | }
|
---|
| 688 |
|
---|
| 689 | /* Build_field builds a new line from the src as described by the field.
|
---|
| 690 | * The result is put in dest.
|
---|
| 691 | */
|
---|
| 692 | void build_field(dest, field, src)
|
---|
| 693 | char *dest; /* Holds result */
|
---|
| 694 | register FIELD *field; /* Field description */
|
---|
| 695 | register char *src; /* Source line */
|
---|
| 696 | {
|
---|
| 697 | char *begin = src; /* Remember start location */
|
---|
| 698 | char *last; /* Pointer to end location */
|
---|
| 699 | int i;
|
---|
| 700 |
|
---|
| 701 | /* Skip begin fields */
|
---|
| 702 | src = skip_fields(src, field->beg_field);
|
---|
| 703 |
|
---|
| 704 | /* Skip begin positions */
|
---|
| 705 | for (i = 0; i < field->beg_pos && *src != '\n'; i++) src++;
|
---|
| 706 |
|
---|
| 707 | /* Copy whatever is left */
|
---|
| 708 | copy(dest, src);
|
---|
| 709 |
|
---|
| 710 | /* If end field is assigned truncate (perhaps) the part copied */
|
---|
| 711 | if (field->end_field != ERROR) { /* Find last field */
|
---|
| 712 | last = skip_fields(begin, field->end_field);
|
---|
| 713 | /* Skip positions as given by end fields description */
|
---|
| 714 | for (i = 0; i < field->end_pos && *last != '\n'; i++) last++;
|
---|
| 715 | dest[last - src] = '\n';/* Truncate line */
|
---|
| 716 | }
|
---|
| 717 | }
|
---|
| 718 |
|
---|
| 719 | /* Skip_fields () skips nf fields of the line pointed to by str. */
|
---|
| 720 | char *skip_fields(str, nf)
|
---|
| 721 | register char *str;
|
---|
| 722 | int nf;
|
---|
| 723 | {
|
---|
| 724 | while (nf-- > 0) {
|
---|
| 725 | if (separator == '\0') {/* Means ' ' or '\t' */
|
---|
| 726 | while (*str != ' ' && *str != '\t' && *str != '\n') str++;
|
---|
| 727 | while (table[*str] & BLANK) str++;
|
---|
| 728 | } else {
|
---|
| 729 | while (*str != separator && *str != '\n') str++;
|
---|
| 730 | if (*str == separator) str++;
|
---|
| 731 | }
|
---|
| 732 | }
|
---|
| 733 | return str; /* Return pointer to indicated field */
|
---|
| 734 | }
|
---|
| 735 |
|
---|
| 736 | /* Compare is called by all sorting routines. It checks if fields assignments
|
---|
| 737 | * has been made. if so, it calls cmp_fields (). If not, it calls cmp () and
|
---|
| 738 | * reversed the return value if the (global) reverse flag is set.
|
---|
| 739 | */
|
---|
| 740 | int compare(el1, el2)
|
---|
| 741 | register char *el1, *el2;
|
---|
| 742 | {
|
---|
| 743 | int ret;
|
---|
| 744 |
|
---|
| 745 | if (field_cnt > GLOBAL) return cmp_fields(el1, el2);
|
---|
| 746 |
|
---|
| 747 | ret = cmp((unsigned char *) el1, (unsigned char *) el2, &fields[GLOBAL]);
|
---|
| 748 | return(fields[GLOBAL].reverse) ? -ret : ret;
|
---|
| 749 | }
|
---|
| 750 |
|
---|
| 751 | /* Cmp () is the actual compare routine. It compares according to the
|
---|
| 752 | * description given in the field pointer.
|
---|
| 753 | */
|
---|
| 754 | int cmp(el1, el2, field)
|
---|
| 755 | register unsigned char *el1, *el2;
|
---|
| 756 | FIELD *field;
|
---|
| 757 | {
|
---|
| 758 | int c1, c2;
|
---|
| 759 |
|
---|
| 760 | if (field->blanks) { /* Skip leading blanks */
|
---|
| 761 | while (table[*el1] & BLANK) el1++;
|
---|
| 762 | while (table[*el2] & BLANK) el2++;
|
---|
| 763 | }
|
---|
| 764 | if (field->numeric) /* Compare numeric */
|
---|
| 765 | return digits((char *) el1, (char *) el2, TRUE);
|
---|
| 766 |
|
---|
| 767 | for (;;) {
|
---|
| 768 | while (*el1 == *el2) {
|
---|
| 769 | if (*el1++ == '\n') /* EOLN on both strings */
|
---|
| 770 | return SAME;
|
---|
| 771 | el2++;
|
---|
| 772 | }
|
---|
| 773 | if (*el1 == '\n') /* EOLN on string one */
|
---|
| 774 | return LOWER;
|
---|
| 775 | if (*el2 == '\n') return HIGHER;
|
---|
| 776 | if (field->ascii) { /* Skip chars outside 040 - 0177 */
|
---|
| 777 | if ((table[*el1] & ASCII) == 0) {
|
---|
| 778 | do {
|
---|
| 779 | el1++;
|
---|
| 780 | } while ((table[*el1] & ASCII) == 0);
|
---|
| 781 | continue;
|
---|
| 782 | }
|
---|
| 783 | if ((table[*el2] & ASCII) == 0) {
|
---|
| 784 | do {
|
---|
| 785 | el2++;
|
---|
| 786 | } while ((table[*el2] & ASCII) == 0);
|
---|
| 787 | continue;
|
---|
| 788 | }
|
---|
| 789 | }
|
---|
| 790 | if (field->dictionary) {/* Skip non-dict chars */
|
---|
| 791 | if ((table[*el1] & DICT) == 0) {
|
---|
| 792 | do {
|
---|
| 793 | el1++;
|
---|
| 794 | } while ((table[*el1] & DICT) == 0);
|
---|
| 795 | continue;
|
---|
| 796 | }
|
---|
| 797 | if ((table[*el2] & DICT) == 0) {
|
---|
| 798 | do {
|
---|
| 799 | el2++;
|
---|
| 800 | } while ((table[*el2] & DICT) == 0);
|
---|
| 801 | continue;
|
---|
| 802 | }
|
---|
| 803 | }
|
---|
| 804 | if (field->fold_case) { /* Fold upper case to lower */
|
---|
| 805 | if (table[c1 = *el1++] & UPPER) c1 += 'a' - 'A';
|
---|
| 806 | if (table[c2 = *el2++] & UPPER) c2 += 'a' - 'A';
|
---|
| 807 | if (c1 == c2) continue;
|
---|
| 808 | return c1 - c2;
|
---|
| 809 | }
|
---|
| 810 | return *el1 - *el2;
|
---|
| 811 | }
|
---|
| 812 |
|
---|
| 813 | /* NOTREACHED */
|
---|
| 814 | }
|
---|
| 815 |
|
---|
| 816 | /*
|
---|
| 817 | * Digits compares () the two strings that point to a number of digits followed
|
---|
| 818 | * by an optional decimal point.
|
---|
| 819 | */
|
---|
| 820 | int digits(str1, str2, check_sign)
|
---|
| 821 | register char *str1, *str2;
|
---|
| 822 | BOOL check_sign; /* True if sign must be checked */
|
---|
| 823 | {
|
---|
| 824 | BOOL negative = FALSE; /* True if negative numbers */
|
---|
| 825 | int diff, pow, ret;
|
---|
| 826 |
|
---|
| 827 | /* Check for optional minus or plus sign */
|
---|
| 828 | if (check_sign) {
|
---|
| 829 | if (*str1 == '-') {
|
---|
| 830 | negative = TRUE;
|
---|
| 831 | str1++;
|
---|
| 832 | } else if (*str1 == '+')
|
---|
| 833 | str1++;
|
---|
| 834 |
|
---|
| 835 | if (*str2 == '-') {
|
---|
| 836 | if (negative == FALSE) return HIGHER;
|
---|
| 837 | str2++;
|
---|
| 838 | } else if (negative)
|
---|
| 839 | return LOWER;
|
---|
| 840 | else if (*str2 == '+')
|
---|
| 841 | str2++;
|
---|
| 842 | }
|
---|
| 843 |
|
---|
| 844 | /* Keep incrementing as long as digits are available and equal */
|
---|
| 845 | while ((table[*str1] & DIGIT) && table[*str2] & DIGIT) {
|
---|
| 846 | if (*str1 != *str2) break;
|
---|
| 847 | str1++;
|
---|
| 848 | str2++;
|
---|
| 849 | }
|
---|
| 850 |
|
---|
| 851 | /* First check for the decimal point. */
|
---|
| 852 | if (*str1 == '.' || *str2 == '.') {
|
---|
| 853 | if (*str1 == '.') {
|
---|
| 854 | if (*str2 == '.') /* Both. Check decimal part */
|
---|
| 855 | ret = digits(str1 + 1, str2 + 1, FALSE);
|
---|
| 856 | else
|
---|
| 857 | ret = (table[*str2] & DIGIT) ? LOWER : HIGHER;
|
---|
| 858 | } else
|
---|
| 859 | ret = (table[*str1] & DIGIT) ? HIGHER : LOWER;
|
---|
| 860 | }
|
---|
| 861 |
|
---|
| 862 | /* Now either two digits differ, or unknown char is seen (e.g. end of string) */
|
---|
| 863 | else if ((table[*str1] & DIGIT) && (table[*str2] & DIGIT)) {
|
---|
| 864 | diff = *str1 - *str2; /* Basic difference */
|
---|
| 865 | pow = 0; /* Check power of numbers */
|
---|
| 866 | while (table[*str1++] & DIGIT) pow++;
|
---|
| 867 | while (table[*str2++] & DIGIT) pow--;
|
---|
| 868 | ret = (pow == 0) ? diff : pow;
|
---|
| 869 | }
|
---|
| 870 |
|
---|
| 871 | /* Unknown char. Check on which string it occurred */
|
---|
| 872 | else {
|
---|
| 873 | if ((table[*str1] & DIGIT) == 0)
|
---|
| 874 | ret = (table[*str2] & DIGIT) ? LOWER : SAME;
|
---|
| 875 | else
|
---|
| 876 | ret = HIGHER;
|
---|
| 877 | }
|
---|
| 878 |
|
---|
| 879 | /* Reverse sense of comparisons if negative is true. (-1000 < -1) */
|
---|
| 880 | return(negative) ? -ret : ret;
|
---|
| 881 | }
|
---|
| 882 |
|
---|
| 883 | /* Files_merge () merges all files as indicated by nr_of_files. Merging goes
|
---|
| 884 | * in numbers of files that can be opened at the same time. (OPEN_FILES)
|
---|
| 885 | */
|
---|
| 886 | void files_merge(file_cnt)
|
---|
| 887 | register int file_cnt; /* Nr_of_files to merge */
|
---|
| 888 | {
|
---|
| 889 | register int i;
|
---|
| 890 | int limit;
|
---|
| 891 |
|
---|
| 892 | for (i = 0; i < file_cnt; i += OPEN_FILES) {
|
---|
| 893 | /* Merge last files and store in output file */
|
---|
| 894 | if ((limit = i + OPEN_FILES) >= file_cnt) {
|
---|
| 895 | open_outfile();
|
---|
| 896 | limit = file_cnt;
|
---|
| 897 | } else { /* Merge OPEN_FILES files and store in temp
|
---|
| 898 | * file */
|
---|
| 899 | temp_files[16] = file_cnt / 26 + 'a';
|
---|
| 900 | temp_files[17] = file_cnt % 26 + 'a';
|
---|
| 901 | if ((out_fd = creat(temp_files, 0644)) < 0)
|
---|
| 902 | error(TRUE, "Cannot creat ", temp_files);
|
---|
| 903 | file_cnt++;
|
---|
| 904 | }
|
---|
| 905 | merge(i, limit);
|
---|
| 906 | }
|
---|
| 907 |
|
---|
| 908 | /* Cleanup mess */
|
---|
| 909 | i = (only_merge) ? args_limit - args_offset : 0;
|
---|
| 910 | while (i < file_cnt) (void) unlink(file_name(i++));
|
---|
| 911 | }
|
---|
| 912 |
|
---|
| 913 | /* Merge () merges the files between start_file and limit_file. */
|
---|
| 914 | void merge(start_file, limit_file)
|
---|
| 915 | int start_file, limit_file;
|
---|
| 916 | {
|
---|
| 917 | register MERGE *smallest; /* Keeps track of smallest line */
|
---|
| 918 | register int i;
|
---|
| 919 | int file_cnt = limit_file - start_file; /* Nr of files to merge */
|
---|
| 920 |
|
---|
| 921 | /* Calculate size in core available for file_cnt merge structs */
|
---|
| 922 | buf_size = MEMORY_SIZE / file_cnt - LINE_SIZE;
|
---|
| 923 |
|
---|
| 924 | mbrk(mem_top); /* First reset mem to lowest loc. */
|
---|
| 925 | disabled = 0; /* All files not done yet */
|
---|
| 926 |
|
---|
| 927 | /* Set up merge structures. */
|
---|
| 928 | for (i = start_file; i < limit_file; i++) {
|
---|
| 929 | smallest = &merge_f[i - start_file];
|
---|
| 930 | if (!strcmp(file_name(i), "-")) /* File is stdin */
|
---|
| 931 | smallest->fd = 0;
|
---|
| 932 | else if ((smallest->fd = open(file_name(i), O_RDONLY)) < 0) {
|
---|
| 933 | smallest->fd = ERROR;
|
---|
| 934 | error(FALSE, "Cannot open ", file_name(i));
|
---|
| 935 | disabled++; /* Done this file */
|
---|
| 936 | continue;
|
---|
| 937 | }
|
---|
| 938 | smallest->buffer = msbrk(buf_size);
|
---|
| 939 | smallest->line = msbrk(LINE_SIZE);
|
---|
| 940 | smallest->cnt = smallest->read_chars = 0;
|
---|
| 941 | (void) read_line(smallest); /* Read first line */
|
---|
| 942 | }
|
---|
| 943 |
|
---|
| 944 | if (disabled == file_cnt) { /* Couldn't open files */
|
---|
| 945 | (void) close(out_fd);
|
---|
| 946 | return;
|
---|
| 947 | }
|
---|
| 948 |
|
---|
| 949 | /* Find a merg struct to assign smallest. */
|
---|
| 950 | for (i = 0; i < file_cnt; i++) {
|
---|
| 951 | if (merge_f[i].fd != ERROR) {
|
---|
| 952 | smallest = &merge_f[i];
|
---|
| 953 | break;
|
---|
| 954 | }
|
---|
| 955 | }
|
---|
| 956 |
|
---|
| 957 | /* Loop until all files minus one are done */
|
---|
| 958 | while (disabled < file_cnt - 1) {
|
---|
| 959 | if (uniq) /* Skip all same lines */
|
---|
| 960 | smallest = skip_lines(smallest, file_cnt);
|
---|
| 961 | else { /* Find smallest line */
|
---|
| 962 | for (i = 0; i < file_cnt; i++) {
|
---|
| 963 | if (merge_f[i].fd == ERROR)
|
---|
| 964 | continue; /* We've had this one */
|
---|
| 965 | if (compare(merge_f[i].line, smallest->line) < 0)
|
---|
| 966 | smallest = &merge_f[i];
|
---|
| 967 | }
|
---|
| 968 | } /* Print line and read next */
|
---|
| 969 | smallest = print(smallest, file_cnt);
|
---|
| 970 | }
|
---|
| 971 |
|
---|
| 972 | if (only_merge && uniq)
|
---|
| 973 | uniq_lines(smallest); /* Print only uniq lines */
|
---|
| 974 | else /* Print rest of file */
|
---|
| 975 | while (print(smallest, file_cnt) != NIL_MERGE);
|
---|
| 976 |
|
---|
| 977 | put_line(NIL_PTR); /* Flush output buffer */
|
---|
| 978 | }
|
---|
| 979 |
|
---|
| 980 | /* Put_line () prints the line into the out_fd filedescriptor. If line equals
|
---|
| 981 | * NIL_PTR, the out_fd is flushed and closed.
|
---|
| 982 | */
|
---|
| 983 | void put_line(line)
|
---|
| 984 | register char *line;
|
---|
| 985 | {
|
---|
| 986 | static int index = 0; /* Index in out_buffer */
|
---|
| 987 |
|
---|
| 988 | if (line == NIL_PTR) { /* Flush and close */
|
---|
| 989 | mwrite(out_fd, out_buffer, index);
|
---|
| 990 | index = 0;
|
---|
| 991 | (void) close(out_fd);
|
---|
| 992 | return;
|
---|
| 993 | }
|
---|
| 994 | do { /* Fill out_buffer with line */
|
---|
| 995 | out_buffer[index++] = *line;
|
---|
| 996 | if (index == IO_SIZE) {
|
---|
| 997 | mwrite(out_fd, out_buffer, IO_SIZE);
|
---|
| 998 | index = 0;
|
---|
| 999 | }
|
---|
| 1000 | } while (*line++ != '\n');
|
---|
| 1001 | }
|
---|
| 1002 |
|
---|
| 1003 | /*
|
---|
| 1004 | * Print () prints the line of the merg structure and tries to read another one.
|
---|
| 1005 | * If this fails, it returns the next merg structure which file_descriptor is
|
---|
| 1006 | * still open. If none could be found, a NIL structure is returned.
|
---|
| 1007 | */
|
---|
| 1008 | MERGE *print(merg, file_cnt)
|
---|
| 1009 | register MERGE *merg;
|
---|
| 1010 | int file_cnt; /* Nr of files that are being merged */
|
---|
| 1011 | {
|
---|
| 1012 | register int i;
|
---|
| 1013 |
|
---|
| 1014 | put_line(merg->line); /* Print the line */
|
---|
| 1015 |
|
---|
| 1016 | if (read_line(merg) == ERROR) { /* Read next line */
|
---|
| 1017 | for (i = 0; i < file_cnt; i++) {
|
---|
| 1018 | if (merge_f[i].fd != ERROR) {
|
---|
| 1019 | merg = &merge_f[i];
|
---|
| 1020 | break;
|
---|
| 1021 | }
|
---|
| 1022 | }
|
---|
| 1023 | if (i == file_cnt) /* No more files left */
|
---|
| 1024 | return NIL_MERGE;
|
---|
| 1025 | }
|
---|
| 1026 | return merg;
|
---|
| 1027 | }
|
---|
| 1028 |
|
---|
| 1029 | /* Read_line () reads a line from the fd from the merg struct. If the read
|
---|
| 1030 | * failed, disabled is incremented and the file is closed. Readings are
|
---|
| 1031 | * done in buf_size bytes.
|
---|
| 1032 | * Lines longer than LINE_SIZE are silently truncated.
|
---|
| 1033 | */
|
---|
| 1034 | int read_line(merg)
|
---|
| 1035 | register MERGE *merg;
|
---|
| 1036 | {
|
---|
| 1037 | register char *ptr = merg->line - 1; /* Ptr buf that will hold line */
|
---|
| 1038 |
|
---|
| 1039 | do {
|
---|
| 1040 | ptr++;
|
---|
| 1041 | if (merg->cnt == merg->read_chars) { /* Read new buffer */
|
---|
| 1042 | if ((merg->read_chars =
|
---|
| 1043 | read(merg->fd, merg->buffer, buf_size)) <= 0) {
|
---|
| 1044 | (void) close(merg->fd); /* OOPS */
|
---|
| 1045 | merg->fd = ERROR;
|
---|
| 1046 | disabled++;
|
---|
| 1047 | return ERROR;
|
---|
| 1048 | }
|
---|
| 1049 | merg->cnt = 0;
|
---|
| 1050 | }
|
---|
| 1051 | *ptr = merg->buffer[merg->cnt++]; /* Assign next char of line */
|
---|
| 1052 | if (ptr - merg->line == LINE_SIZE - 1)
|
---|
| 1053 | *ptr = '\n'; /* Truncate very long lines */
|
---|
| 1054 | } while (*ptr != '\n' && *ptr != '\0');
|
---|
| 1055 |
|
---|
| 1056 | if (*ptr == '\0') /* Add '\n' to last line */
|
---|
| 1057 | *ptr = '\n';
|
---|
| 1058 | *++ptr = '\0'; /* Add '\0' */
|
---|
| 1059 | return OK;
|
---|
| 1060 | }
|
---|
| 1061 |
|
---|
| 1062 | /* Skip_lines () skips all same lines in all the files currently being merged.
|
---|
| 1063 | * It returns a pointer to the merge struct containing the smallest line.
|
---|
| 1064 | */
|
---|
| 1065 | MERGE *skip_lines(smallest, file_cnt)
|
---|
| 1066 | register MERGE *smallest;
|
---|
| 1067 | int file_cnt;
|
---|
| 1068 | {
|
---|
| 1069 | register int i;
|
---|
| 1070 | int ret;
|
---|
| 1071 |
|
---|
| 1072 | if (disabled == file_cnt - 1) /* We've had all */
|
---|
| 1073 | return smallest;
|
---|
| 1074 |
|
---|
| 1075 | for (i = 0; i < file_cnt; i++) {
|
---|
| 1076 | if (merge_f[i].fd == ERROR || smallest == &merge_f[i])
|
---|
| 1077 | continue; /* Don't check same file */
|
---|
| 1078 | while ((ret = compare(merge_f[i].line, smallest->line)) == 0) {
|
---|
| 1079 | if (read_line(&merge_f[i]) == ERROR) break; /* EOF */
|
---|
| 1080 | }
|
---|
| 1081 | if (ret < 0) /* Line wasn't smallest. Try again */
|
---|
| 1082 | return skip_lines(&merge_f[i], file_cnt);
|
---|
| 1083 | }
|
---|
| 1084 | return smallest;
|
---|
| 1085 | }
|
---|
| 1086 |
|
---|
| 1087 | /* Uniq_lines () prints only the uniq lines out of the fd of the merg struct. */
|
---|
| 1088 | void uniq_lines(merg)
|
---|
| 1089 | register MERGE *merg;
|
---|
| 1090 | {
|
---|
| 1091 | char lastline[LINE_SIZE]; /* Buffer to hold last line */
|
---|
| 1092 |
|
---|
| 1093 | for (;;) {
|
---|
| 1094 | put_line(merg->line); /* Print this line */
|
---|
| 1095 | copy(lastline, merg->line); /* and save it */
|
---|
| 1096 | if (read_line(merg) == ERROR) /* Read the next */
|
---|
| 1097 | return;
|
---|
| 1098 | /* Keep reading until lines duffer */
|
---|
| 1099 | while (compare(lastline, merg->line) == SAME)
|
---|
| 1100 | if (read_line(merg) == ERROR) return;
|
---|
| 1101 | }
|
---|
| 1102 |
|
---|
| 1103 | /* NOTREACHED */
|
---|
| 1104 | }
|
---|
| 1105 |
|
---|
| 1106 | /*
|
---|
| 1107 | * Check_file () checks if a file is sorted in order according to the arguments
|
---|
| 1108 | * given in main ().
|
---|
| 1109 | */
|
---|
| 1110 | void check_file(fd, file)
|
---|
| 1111 | int fd;
|
---|
| 1112 | char *file;
|
---|
| 1113 | {
|
---|
| 1114 | register MERGE *merg; /* 1 file only */
|
---|
| 1115 | char lastline[LINE_SIZE]; /* Save last line */
|
---|
| 1116 | register int ret; /* ret status of compare */
|
---|
| 1117 |
|
---|
| 1118 | if (fd == 0) file = "stdin";
|
---|
| 1119 | merg = (MERGE *) mem_top; /* Assign MERGE structure */
|
---|
| 1120 | merg->buffer = mem_top + sizeof(MERGE);
|
---|
| 1121 | merg->line = msbrk(LINE_SIZE);
|
---|
| 1122 | merg->cnt = merg->read_chars = 0;
|
---|
| 1123 | merg->fd = fd;
|
---|
| 1124 | buf_size = MEMORY_SIZE - sizeof(MERGE);
|
---|
| 1125 |
|
---|
| 1126 | if (read_line(merg) == ERROR) /* Read first line */
|
---|
| 1127 | return;
|
---|
| 1128 | copy(lastline, merg->line); /* and save it */
|
---|
| 1129 |
|
---|
| 1130 | for (;;) {
|
---|
| 1131 | if (read_line(merg) == ERROR) /* EOF reached */
|
---|
| 1132 | break;
|
---|
| 1133 | if ((ret = compare(lastline, merg->line)) > 0) {
|
---|
| 1134 | error(FALSE, "Disorder in file ", file);
|
---|
| 1135 | write(2, merg->line, length(merg->line));
|
---|
| 1136 | break;
|
---|
| 1137 | } else if (ret < 0) /* Copy if lines not equal */
|
---|
| 1138 | copy(lastline, merg->line);
|
---|
| 1139 | else if (uniq) {
|
---|
| 1140 | error(FALSE, "Non uniq line in file ", file);
|
---|
| 1141 | write(2, merg->line, length(merg->line));
|
---|
| 1142 | break;
|
---|
| 1143 | }
|
---|
| 1144 | }
|
---|
| 1145 |
|
---|
| 1146 | mbrk(mem_top); /* Reset mem */
|
---|
| 1147 | }
|
---|
| 1148 |
|
---|
| 1149 | /* Length () returns the length of the argument line including the linefeed. */
|
---|
| 1150 | int length(line)
|
---|
| 1151 | register char *line;
|
---|
| 1152 | {
|
---|
| 1153 | register int i = 1; /* Add linefeed */
|
---|
| 1154 |
|
---|
| 1155 | while (*line++ != '\n') i++;
|
---|
| 1156 | return i;
|
---|
| 1157 | }
|
---|
| 1158 |
|
---|
| 1159 | /* Copy () copies the src line into the dest line including linefeed. */
|
---|
| 1160 | void copy(dest, src)
|
---|
| 1161 | register char *dest, *src;
|
---|
| 1162 | {
|
---|
| 1163 | while ((*dest++ = *src++) != '\n');
|
---|
| 1164 | }
|
---|
| 1165 |
|
---|
| 1166 | /* Msbrk() does a sbrk() and checks the return value. */
|
---|
| 1167 | char *msbrk(size)
|
---|
| 1168 | register int size;
|
---|
| 1169 | {
|
---|
| 1170 | register char *address;
|
---|
| 1171 |
|
---|
| 1172 | if ((address = sbrk(size)) == (char *) -1)
|
---|
| 1173 | error(TRUE, "Not enough memory. Use chmem to allocate more", NIL_PTR);
|
---|
| 1174 | return address;
|
---|
| 1175 | }
|
---|
| 1176 |
|
---|
| 1177 | /* Mbrk() does a brk() and checks the return value. */
|
---|
| 1178 | void mbrk(address)
|
---|
| 1179 | char *address;
|
---|
| 1180 | {
|
---|
| 1181 | if (brk(address) == -1) error(TRUE, "Cannot reset memory", NIL_PTR);
|
---|
| 1182 | }
|
---|
| 1183 |
|
---|
| 1184 | void catch(dummy)
|
---|
| 1185 | int dummy; /* to satisfy the prototype */
|
---|
| 1186 | {
|
---|
| 1187 | register int i;
|
---|
| 1188 |
|
---|
| 1189 | signal(SIGINT, SIG_IGN);
|
---|
| 1190 | only_merge = FALSE;
|
---|
| 1191 | for (i = 0; i < 26; i++) (void) unlink(file_name(i));
|
---|
| 1192 | exit(2);
|
---|
| 1193 | }
|
---|