[9] | 1 | /* paste - laminate files Author: David Ihnat */
|
---|
| 2 |
|
---|
| 3 | /* Paste - a recreation of the Unix(Tm) paste(1) command.
|
---|
| 4 | *
|
---|
| 5 | * syntax: paste file1 file2 ... paste -dLIST file1 file2 ... paste -s [-dLIST]
|
---|
| 6 | * file1 file2 ...
|
---|
| 7 | *
|
---|
| 8 | * Copyright (C) 1984 by David M. Ihnat
|
---|
| 9 | *
|
---|
| 10 | * This program is a total rewrite of the Bell Laboratories Unix(Tm) command of
|
---|
| 11 | * the same name, as of System V. It contains no proprietary code, and
|
---|
| 12 | * therefore may be used without violation of any proprietary agreements
|
---|
| 13 | * whatsoever. However, you will notice that the program is copyrighted by
|
---|
| 14 | * me. This is to assure the program does *not* fall into the public domain.
|
---|
| 15 | * Thus, I may specify just what I am now: This program may be freely copied
|
---|
| 16 | * and distributed, provided this notice remains; it may not be sold for
|
---|
| 17 | * profit without express written consent of the author. Please note that I
|
---|
| 18 | * recreated the behavior of the Unix(Tm) 'paste' command as faithfully as
|
---|
| 19 | * possible, with minor exceptions (noted below); however, I haven't run a
|
---|
| 20 | * full set of regression * tests. Thus, the user of this program accepts
|
---|
| 21 | * full responsibility for any effects or loss; in particular, the author is
|
---|
| 22 | * not responsible for any losses, explicit or incidental, that may be
|
---|
| 23 | * incurred through use of this program.
|
---|
| 24 | *
|
---|
| 25 | * The changes to the program, with one exception, are transparent to a user
|
---|
| 26 | * familiar with the Unix command of the same name. These changes are:
|
---|
| 27 | *
|
---|
| 28 | * 1) The '-s' option had a bug in the Unix version when used with multiple
|
---|
| 29 | * files. (It would repeat each file in a list, i.e., for
|
---|
| 30 | *
|
---|
| 31 | * paste -s file1 file2 file3
|
---|
| 32 | *
|
---|
| 33 | * it would list
|
---|
| 34 | *
|
---|
| 35 | * <file1\n><file1\n><file2\n><file1\n><file2\n><file3\n>
|
---|
| 36 | *
|
---|
| 37 | * I fixed this, and reported the bug to the providers of the command in Unix.
|
---|
| 38 | *
|
---|
| 39 | * 2) The list of valid escape sequences has been expanded to include \b,\f,
|
---|
| 40 | * and \r. (Just because *I* can't imagine why you'd want to use them
|
---|
| 41 | * doesn't mean I should keep them from you.)
|
---|
| 42 | *
|
---|
| 43 | * 3) There is no longer any restriction on line length.
|
---|
| 44 | *
|
---|
| 45 | * I ask that any bugs (and, if possible, fixes) be reported to me when
|
---|
| 46 | * possible. -David Ihnat (312) 784-4544 ihuxx!ignatz
|
---|
| 47 | */
|
---|
| 48 |
|
---|
| 49 | /* Modified to run under MINIX 1.1 by David O. Tinker (416) 978-3636
|
---|
| 50 | * (utgpu!dtinker) Sept. 19, 1987
|
---|
| 51 | */
|
---|
| 52 |
|
---|
| 53 | /* Modified to conform to POSIX 1003.2/Draft10 standard 23rd Sept. 1990
|
---|
| 54 | * Changes:
|
---|
| 55 | * - the arguments can be in any order
|
---|
| 56 | * - removed the ToUpper function
|
---|
| 57 | * by Thomas Brupbacher (tobr@mw.lpc.ethz.ch)
|
---|
| 58 | */
|
---|
| 59 |
|
---|
| 60 | #include <errno.h>
|
---|
| 61 | #include <ctype.h>
|
---|
| 62 | #include <stdlib.h>
|
---|
| 63 | #include <string.h>
|
---|
| 64 | #include <stdio.h>
|
---|
| 65 |
|
---|
| 66 | /* I'd love to use enums, but not everyone has them. Portability, y'know. */
|
---|
| 67 | #define NODELIM 1
|
---|
| 68 | #define USAGE 2
|
---|
| 69 | #define BADFILE 3
|
---|
| 70 | #define TOOMANY 4
|
---|
| 71 |
|
---|
| 72 | #define TAB '\t'
|
---|
| 73 | #define NL '\n'
|
---|
| 74 | #define BS '\b'
|
---|
| 75 | #define FF '\f'
|
---|
| 76 | #define CR '\r'
|
---|
| 77 | #define DEL '\177'
|
---|
| 78 | #define SPACE ' '
|
---|
| 79 | #define BACKSLASH '\\'
|
---|
| 80 |
|
---|
| 81 | #define _MAXSZ 512
|
---|
| 82 | #define _MAXFILES 12
|
---|
| 83 | #define CLOSED ((FILE *)-1)
|
---|
| 84 | #define ENDLIST ((FILE *)-2)
|
---|
| 85 |
|
---|
| 86 | char *cmdnam;
|
---|
| 87 |
|
---|
| 88 | short int sflag;
|
---|
| 89 | static char default_delims[] = {TAB}; /* default delimiter string */
|
---|
| 90 | char *delims; /* the pointer to the delimiters */
|
---|
| 91 | int number_of_delims = 1; /* number of delimiters to use */
|
---|
| 92 |
|
---|
| 93 | _PROTOTYPE(int main, (int argc, char **argv));
|
---|
| 94 | _PROTOTYPE(void docol, (int nfiles, char **fnamptr));
|
---|
| 95 | _PROTOTYPE(void doserial, (int nfiles, char **fnamptr));
|
---|
| 96 | _PROTOTYPE(void delimbuild, (char *strptr));
|
---|
| 97 | _PROTOTYPE(void prerr, (int etype, char *estring));
|
---|
| 98 |
|
---|
| 99 | int main(argc, argv)
|
---|
| 100 | int argc;
|
---|
| 101 | char **argv;
|
---|
| 102 | {
|
---|
| 103 | char **arg_ptr; /* used to save argv, needed for docol() etc */
|
---|
| 104 | int num_files = 0; /* Number of filenames specified on cmd line */
|
---|
| 105 | sflag = 0;
|
---|
| 106 | delims = default_delims; /* use default delimiters */
|
---|
| 107 |
|
---|
| 108 | cmdnam = *argv;
|
---|
| 109 |
|
---|
| 110 | if (argc >= 2) {
|
---|
| 111 |
|
---|
| 112 | /* Skip invocation name */
|
---|
| 113 | argv++;
|
---|
| 114 | argc--;
|
---|
| 115 |
|
---|
| 116 | /* Save argv */
|
---|
| 117 | arg_ptr = argv;
|
---|
| 118 | /* First, parse input options */
|
---|
| 119 |
|
---|
| 120 | while (argc-- > 0) {
|
---|
| 121 | if (argv[0][0] == '-' && argv[0][1] != '\0') {
|
---|
| 122 | switch (argv[0][1]) {
|
---|
| 123 | case 'd':
|
---|
| 124 | /* Delimiter character(s) */
|
---|
| 125 | if (*(++argv) == '\0')
|
---|
| 126 | prerr(NODELIM, "");
|
---|
| 127 | else
|
---|
| 128 | delimbuild(*(argv));
|
---|
| 129 | argc--;
|
---|
| 130 | break;
|
---|
| 131 |
|
---|
| 132 | case 's': sflag++; break;
|
---|
| 133 |
|
---|
| 134 | default: prerr(USAGE, "");
|
---|
| 135 | }
|
---|
| 136 | argv++;
|
---|
| 137 | } else {
|
---|
| 138 | num_files++;
|
---|
| 139 | argv++;
|
---|
| 140 | }
|
---|
| 141 | }
|
---|
| 142 |
|
---|
| 143 | /* If there are more than MAX_FILES files on the command
|
---|
| 144 | * line, exit with error message. */
|
---|
| 145 | if (num_files > _MAXFILES) prerr(TOOMANY, "");
|
---|
| 146 |
|
---|
| 147 | /* If no files specified, simply exit. Otherwise, if not the
|
---|
| 148 | * old '-s' option, process all files. If '-s', then process
|
---|
| 149 | * files one-at-a-time. */
|
---|
| 150 |
|
---|
| 151 | if (!sflag)
|
---|
| 152 | docol(num_files, arg_ptr); /* Column paste */
|
---|
| 153 | else
|
---|
| 154 | doserial(num_files, arg_ptr); /* Serial paste */
|
---|
| 155 |
|
---|
| 156 | exit(0);
|
---|
| 157 | } else
|
---|
| 158 | prerr(USAGE, "");
|
---|
| 159 | return(0);
|
---|
| 160 | }
|
---|
| 161 |
|
---|
| 162 | void docol(nfiles, fnamptr)
|
---|
| 163 | int nfiles;
|
---|
| 164 | char **fnamptr;
|
---|
| 165 | {
|
---|
| 166 | char iobuff[_MAXSZ]; /* i/o buffer for the fgets */
|
---|
| 167 | short int somedone; /* flag for blank field handling */
|
---|
| 168 |
|
---|
| 169 | /* There is a strange case where all files are just ready to be
|
---|
| 170 | * closed, or will on this round. In that case, the string of
|
---|
| 171 | * delimiters must be preserved. delbuf[1] ->delbuf[MAXFILES+1]
|
---|
| 172 | * provides intermediate storage for closed files, if needed;
|
---|
| 173 | * delbuf[0] is the current index.
|
---|
| 174 | */
|
---|
| 175 | char delbuf[_MAXFILES + 2];
|
---|
| 176 |
|
---|
| 177 | FILE *fileptr[_MAXFILES + 1];
|
---|
| 178 |
|
---|
| 179 | int filecnt; /* Set to number of files to process */
|
---|
| 180 | register char *delimptr; /* Cycling delimiter pointer */
|
---|
| 181 | int index; /* Working variable */
|
---|
| 182 | int strend; /* End of string in buffer */
|
---|
| 183 |
|
---|
| 184 | /* Perform column paste. First, attempt to open all files. (This
|
---|
| 185 | * could be expanded to an infinite number of files, but at the
|
---|
| 186 | * (considerable) expense of remembering the file and its current
|
---|
| 187 | * offset, then opening/reading/closing. The commands' utility
|
---|
| 188 | * doesn't warrant the effort; at least, to me...)
|
---|
| 189 | */
|
---|
| 190 |
|
---|
| 191 | for (filecnt = 0; (nfiles > 0); fnamptr++) {
|
---|
| 192 | if ((fnamptr[0][0] == '-') && (fnamptr[0][1] != '\0')) {
|
---|
| 193 | if (fnamptr[0][1] == 'd') fnamptr++;
|
---|
| 194 | } else {
|
---|
| 195 | nfiles--;
|
---|
| 196 | if (fnamptr[0][0] == '-') {
|
---|
| 197 | fileptr[filecnt++] = stdin;
|
---|
| 198 | } else {
|
---|
| 199 | fileptr[filecnt] = fopen(fnamptr[0], "r");
|
---|
| 200 | if (fileptr[filecnt++] == NULL)
|
---|
| 201 | prerr(BADFILE, *fnamptr);
|
---|
| 202 | }
|
---|
| 203 | }
|
---|
| 204 | }
|
---|
| 205 |
|
---|
| 206 | fileptr[filecnt] = ENDLIST; /* End of list. */
|
---|
| 207 |
|
---|
| 208 | /* Have all files. Now, read a line from each file, and output to
|
---|
| 209 | * stdout. Notice that the old 511 character limitation on the line
|
---|
| 210 | * length no longer applies, since this program doesn't do the
|
---|
| 211 | * buffering. Do this until you go through the loop and don't
|
---|
| 212 | * successfully read from any of the files.
|
---|
| 213 | */
|
---|
| 214 | for (; filecnt;) {
|
---|
| 215 | somedone = 0; /* Blank field handling flag */
|
---|
| 216 | delimptr = delims; /* Start at beginning of delim list */
|
---|
| 217 | delbuf[0] = 0; /* No squirreled delims */
|
---|
| 218 |
|
---|
| 219 | for (index = 0; (fileptr[index] != ENDLIST) && filecnt; index++) {
|
---|
| 220 | /* Read a line and immediately output. If it's too
|
---|
| 221 | * big for the buffer, then dump what was read and go
|
---|
| 222 | * back for more.
|
---|
| 223 | *
|
---|
| 224 | * Otherwise, if it is from the last file, then leave
|
---|
| 225 | * the carriage return in place; if not, replace with
|
---|
| 226 | * a delimiter (if any)
|
---|
| 227 | */
|
---|
| 228 |
|
---|
| 229 | strend = 0; /* Set so can easily detect EOF */
|
---|
| 230 |
|
---|
| 231 | if (fileptr[index] != CLOSED)
|
---|
| 232 | while (fgets(iobuff, (_MAXSZ - 1),
|
---|
| 233 | fileptr[index]) != NULL) {
|
---|
| 234 | strend = strlen(iobuff);/* Did the buf fill? */
|
---|
| 235 |
|
---|
| 236 | if (strend == (_MAXSZ - 1)) {
|
---|
| 237 | /* Gosh, what a long line. */
|
---|
| 238 | fputs(iobuff, stdout);
|
---|
| 239 | strend = 0;
|
---|
| 240 | continue;
|
---|
| 241 | }
|
---|
| 242 |
|
---|
| 243 | /* Ok got whole line in buffer. */
|
---|
| 244 | break; /* Out of loop for this file */
|
---|
| 245 | }
|
---|
| 246 |
|
---|
| 247 | /* Ended either on an EOF (well, actually NULL
|
---|
| 248 | * return-- it *could* be some sort of file error,
|
---|
| 249 | * but but if the file was opened successfully, this
|
---|
| 250 | * is unlikely. Besides, error checking on streams
|
---|
| 251 | * doesn't allow us to decide exactly what went
|
---|
| 252 | * wrong, so I'm going to be very Unix-like and
|
---|
| 253 | * ignore it!), or a closed file, or a received line.
|
---|
| 254 | * If an EOF, close the file and mark it in the list.
|
---|
| 255 | * In any case, output the delimiter of choice.
|
---|
| 256 | */
|
---|
| 257 |
|
---|
| 258 | if (!strend) {
|
---|
| 259 | if (fileptr[index] != CLOSED) {
|
---|
| 260 | fclose(fileptr[index]);
|
---|
| 261 | fileptr[index] = CLOSED;
|
---|
| 262 | filecnt--;
|
---|
| 263 | }
|
---|
| 264 |
|
---|
| 265 | /* Is this the end of the whole thing? */
|
---|
| 266 | if ((fileptr[index + 1] == ENDLIST) && !somedone)
|
---|
| 267 | continue; /* EXITS */
|
---|
| 268 |
|
---|
| 269 | /* Ok, some files not closed this line. Last file? */
|
---|
| 270 | if (fileptr[index + 1] == ENDLIST) {
|
---|
| 271 | if (delbuf[0]) {
|
---|
| 272 | fputs(&delbuf[1], stdout);
|
---|
| 273 | delbuf[0] = 0;
|
---|
| 274 | }
|
---|
| 275 | putc((int) NL, stdout);
|
---|
| 276 | continue; /* Next read of files */
|
---|
| 277 | } else {
|
---|
| 278 | /* Closed file; setup delim */
|
---|
| 279 | if (*delimptr != DEL) {
|
---|
| 280 | delbuf[0]++;
|
---|
| 281 | delbuf[delbuf[0]] = *delimptr++;
|
---|
| 282 | delbuf[delbuf[0] + 1] = '\0';
|
---|
| 283 | } else
|
---|
| 284 | delimptr++;
|
---|
| 285 | }
|
---|
| 286 |
|
---|
| 287 | /* Reset end of delimiter string if necessary */
|
---|
| 288 | if (*delimptr == '\0') delimptr = delims;
|
---|
| 289 | } else {
|
---|
| 290 | /* Some data read. */
|
---|
| 291 | somedone++;
|
---|
| 292 |
|
---|
| 293 | /* Any saved delims? */
|
---|
| 294 | if (delbuf[0]) {
|
---|
| 295 | fputs(&delbuf[1], stdout);
|
---|
| 296 | delbuf[0] = 0;
|
---|
| 297 | }
|
---|
| 298 |
|
---|
| 299 | /* If last file, last char will be NL. */
|
---|
| 300 | if (fileptr[index + 1] != ENDLIST) {
|
---|
| 301 | if (*delimptr == DEL) {
|
---|
| 302 | delimptr++;
|
---|
| 303 | iobuff[strend - 1] = '\0';/* No delim*/
|
---|
| 304 | } else
|
---|
| 305 | iobuff[strend - 1] = *delimptr++;
|
---|
| 306 | }
|
---|
| 307 | if (*delimptr == '\0') delimptr = delims;
|
---|
| 308 |
|
---|
| 309 | /* Now dump the buffer */
|
---|
| 310 | fputs(iobuff, stdout);
|
---|
| 311 | fflush(stdout);
|
---|
| 312 | }
|
---|
| 313 | }
|
---|
| 314 | }
|
---|
| 315 | }
|
---|
| 316 |
|
---|
| 317 | void doserial(nfiles, fnamptr)
|
---|
| 318 | int nfiles;
|
---|
| 319 | char **fnamptr;
|
---|
| 320 | {
|
---|
| 321 | /* Do serial paste. Simply scarf characters, performing
|
---|
| 322 | * one-character buffering to facilitate delim processing.
|
---|
| 323 | */
|
---|
| 324 |
|
---|
| 325 | register int charnew, charold;
|
---|
| 326 | register char *delimptr;
|
---|
| 327 |
|
---|
| 328 | register FILE *fileptr;
|
---|
| 329 |
|
---|
| 330 | for (; nfiles != 0; fnamptr++) {
|
---|
| 331 | if ((fnamptr[0][0] == '-') && (fnamptr[0][1] != '\0')) {
|
---|
| 332 | if (fnamptr[0][1] == 'd') fnamptr++;
|
---|
| 333 | } else {
|
---|
| 334 | if (fnamptr[0][0] == '-') {
|
---|
| 335 | fileptr = stdin;
|
---|
| 336 | } else {
|
---|
| 337 | fileptr = fopen(*fnamptr, "r");
|
---|
| 338 |
|
---|
| 339 | if (fileptr == NULL) prerr(BADFILE, *fnamptr);
|
---|
| 340 | }
|
---|
| 341 |
|
---|
| 342 | /* The file is open; just keep taking characters,
|
---|
| 343 | * stashing them in charnew; output charold,
|
---|
| 344 | * converting to the appropriate delimiter character
|
---|
| 345 | * if needful. After the EOF, simply output
|
---|
| 346 | * 'charold' if it's a newline; otherwise, output it
|
---|
| 347 | * and then a newline.
|
---|
| 348 | */
|
---|
| 349 |
|
---|
| 350 | delimptr = delims; /* Set up for delimiter string */
|
---|
| 351 |
|
---|
| 352 | if ((charold = getc(fileptr)) == EOF) {
|
---|
| 353 | /* Empty file! */
|
---|
| 354 | putc(NL, stdout);
|
---|
| 355 | fflush(stdout);
|
---|
| 356 | continue; /* Go on to the next file */
|
---|
| 357 | }
|
---|
| 358 |
|
---|
| 359 | /* Ok, 'charold' is set up. Hit it! */
|
---|
| 360 |
|
---|
| 361 | while ((charnew = getc(fileptr)) != EOF) {
|
---|
| 362 | /* Ok, process the old character */
|
---|
| 363 | if (charold == NL) {
|
---|
| 364 | if (*delimptr != DEL)
|
---|
| 365 | putc((int) *delimptr++, stdout);
|
---|
| 366 |
|
---|
| 367 | /* Reset pointer at end of delimiter string */
|
---|
| 368 | if (*delimptr == '\0') delimptr = delims;
|
---|
| 369 | } else
|
---|
| 370 | putc(charold, stdout);
|
---|
| 371 |
|
---|
| 372 | charold = charnew;
|
---|
| 373 | }
|
---|
| 374 |
|
---|
| 375 | /* Ok, hit EOF. Process that last character */
|
---|
| 376 |
|
---|
| 377 | putc(charold, stdout);
|
---|
| 378 | if ((char) charold != NL) putc(NL, stdout);
|
---|
| 379 | fflush(stdout);
|
---|
| 380 | nfiles--;
|
---|
| 381 | }
|
---|
| 382 | }
|
---|
| 383 | }
|
---|
| 384 |
|
---|
| 385 | void delimbuild(strptr)
|
---|
| 386 | char *strptr;
|
---|
| 387 | {
|
---|
| 388 | /* Process the delimiter string into something that can be used by
|
---|
| 389 | * the routines. This involves, primarily, collapsing the backslash
|
---|
| 390 | * representations of special characters into their actual values,
|
---|
| 391 | * and terminating the string in a manner that the routines can
|
---|
| 392 | * recognize. The set of possible backslash characters has been
|
---|
| 393 | * expanded beyond that recognized by the vanilla Unix(Tm) version.
|
---|
| 394 | */
|
---|
| 395 |
|
---|
| 396 | register char *strout;
|
---|
| 397 |
|
---|
| 398 | delims = strptr; /* delims now points to argv[...] */
|
---|
| 399 | strout = strptr; /* Start at the same place, anyway */
|
---|
| 400 |
|
---|
| 401 | while (*strptr) {
|
---|
| 402 | if (*strptr != '\\') /* Is it an escape character? */
|
---|
| 403 | *strout++ = *strptr++; /* No, just transfer it */
|
---|
| 404 | else {
|
---|
| 405 | strptr++; /* Get past escape character */
|
---|
| 406 |
|
---|
| 407 | switch (*strptr) {
|
---|
| 408 | case '0': *strout++ = DEL; break;
|
---|
| 409 |
|
---|
| 410 | case 't': *strout++ = TAB; break;
|
---|
| 411 |
|
---|
| 412 | case 'n': *strout++ = NL; break;
|
---|
| 413 |
|
---|
| 414 | case 'b': *strout++ = BS; break;
|
---|
| 415 |
|
---|
| 416 | case 'f': *strout++ = FF; break;
|
---|
| 417 |
|
---|
| 418 | case 'r': *strout++ = CR; break;
|
---|
| 419 |
|
---|
| 420 | case '\\':
|
---|
| 421 | *strout++ = BACKSLASH;
|
---|
| 422 | break;
|
---|
| 423 |
|
---|
| 424 | default: *strout++ = *strptr;
|
---|
| 425 | }
|
---|
| 426 |
|
---|
| 427 | strptr++;
|
---|
| 428 | }
|
---|
| 429 |
|
---|
| 430 | }
|
---|
| 431 | *strout = '\0'; /* Heaven forfend that we forget this! */
|
---|
| 432 | }
|
---|
| 433 |
|
---|
| 434 | void prerr(etype, estring)
|
---|
| 435 | int etype;
|
---|
| 436 | char *estring;
|
---|
| 437 | {
|
---|
| 438 | switch (etype) {
|
---|
| 439 | case USAGE:
|
---|
| 440 | fprintf(stderr, "%s : Usage: %s [-s] [-d <delimiters>] file1 file2 ...\n", cmdnam, cmdnam);
|
---|
| 441 | break;
|
---|
| 442 |
|
---|
| 443 | case NODELIM:
|
---|
| 444 | fprintf(stderr, "%s : no delimiters\n", cmdnam);
|
---|
| 445 | break;
|
---|
| 446 |
|
---|
| 447 | case BADFILE:
|
---|
| 448 | fprintf(stderr, "%s : %s : cannot open\n", cmdnam, estring);
|
---|
| 449 | break;
|
---|
| 450 |
|
---|
| 451 | case TOOMANY:
|
---|
| 452 | fprintf(stderr, "%s : too many files\n", cmdnam);
|
---|
| 453 | break;
|
---|
| 454 | }
|
---|
| 455 | exit(1);
|
---|
| 456 | }
|
---|