[9] | 1 | /* grep - search a file for a pattern Author: Norbert Schlenker */
|
---|
| 2 |
|
---|
| 3 | /* Norbert Schlenker (nfs@princeton.edu) 1990-02-08
|
---|
| 4 | * Released into the public domain.
|
---|
| 5 | *
|
---|
| 6 | * Grep searches files for lines containing a pattern, as specified by
|
---|
| 7 | * a regular expression, and prints those lines. It is invoked by:
|
---|
| 8 | * grep [flags] [pattern] [file ...]
|
---|
| 9 | *
|
---|
| 10 | * Flags:
|
---|
| 11 | * -e pattern useful when pattern begins with '-'
|
---|
| 12 | * -c print a count of lines matched
|
---|
| 13 | * -i ignore case
|
---|
| 14 | * -l prints just file names, no lines (quietly overrides -n)
|
---|
| 15 | * -n printed lines are preceded by relative line numbers
|
---|
| 16 | * -s prints errors only (quietly overrides -l and -n)
|
---|
| 17 | * -v prints lines which don't contain the pattern
|
---|
| 18 | *
|
---|
| 19 | * Semantic note:
|
---|
| 20 | * If both -l and -v are specified, grep prints the names of those
|
---|
| 21 | * files which do not contain the pattern *anywhere*.
|
---|
| 22 | *
|
---|
| 23 | * Exit:
|
---|
| 24 | * Grep sets an exit status which can be tested by the caller.
|
---|
| 25 | * Note that these settings are not necessarily compatible with
|
---|
| 26 | * any other version of grep, especially when -v is specified.
|
---|
| 27 | * Possible status values are:
|
---|
| 28 | * 0 if any matches are found
|
---|
| 29 | * 1 if no matches are found
|
---|
| 30 | * 2 if syntax errors are detected or any file cannot be opened
|
---|
| 31 | */
|
---|
| 32 |
|
---|
| 33 |
|
---|
| 34 | /* External interfaces */
|
---|
| 35 | #include <sys/types.h>
|
---|
| 36 | #include <regexp.h> /* Thanks to Henry Spencer */
|
---|
| 37 | #include <stdlib.h>
|
---|
| 38 | #include <string.h>
|
---|
| 39 | #include <stdio.h>
|
---|
| 40 | #include <unistd.h>
|
---|
| 41 |
|
---|
| 42 | /* Internal constants */
|
---|
| 43 | #define MATCH 0 /* exit code: some match somewhere */
|
---|
| 44 | #define NO_MATCH 1 /* exit code: no match on any line */
|
---|
| 45 | #define FAILURE 2 /* exit code: syntax error or bad file name */
|
---|
| 46 |
|
---|
| 47 | /* Macros */
|
---|
| 48 | #define SET_FLAG(c) (flags[(c)-'a'] = 1)
|
---|
| 49 | #define FLAG(c) (flags[(c)-'a'] != 0)
|
---|
| 50 |
|
---|
| 51 | #define uppercase(c) (((unsigned) ((c) - 'A')) <= ('Z' - 'A'))
|
---|
| 52 | #define downcase(c) ((c) - 'A' + 'a')
|
---|
| 53 |
|
---|
| 54 | /* Private storage */
|
---|
| 55 | static char *program; /* program name */
|
---|
| 56 | static char flags[26]; /* invocation flags */
|
---|
| 57 | static regexp *expression; /* compiled search pattern */
|
---|
| 58 | static const char *rerr; /* error message */
|
---|
| 59 |
|
---|
| 60 | /* External variables. */
|
---|
| 61 | extern int optind;
|
---|
| 62 | extern char *optarg;
|
---|
| 63 |
|
---|
| 64 | /* Internal interfaces */
|
---|
| 65 | _PROTOTYPE(int main, (int argc, char **argv));
|
---|
| 66 | _PROTOTYPE(static int match, (FILE *input, char *label, char *filename));
|
---|
| 67 | _PROTOTYPE(static char *get_line, (FILE *input));
|
---|
| 68 | _PROTOTYPE(static char *map_nocase, (char *line));
|
---|
| 69 | _PROTOTYPE(void regerror , (const char *s ) );
|
---|
| 70 | _PROTOTYPE(static void tov8, (char *v8pattern, char *pattern));
|
---|
| 71 |
|
---|
| 72 | int main(argc, argv)
|
---|
| 73 | int argc;
|
---|
| 74 | char *argv[];
|
---|
| 75 | {
|
---|
| 76 | int opt; /* option letter from getopt() */
|
---|
| 77 | int egrep=0; /* using extended regexp operators */
|
---|
| 78 | char *pattern; /* search pattern */
|
---|
| 79 | char *v8pattern; /* v8 regexp search pattern */
|
---|
| 80 | int exit_status = NO_MATCH; /* exit status for our caller */
|
---|
| 81 | int file_status; /* status of search in one file */
|
---|
| 82 | FILE *input; /* input file (if not stdin) */
|
---|
| 83 |
|
---|
| 84 | program = argv[0];
|
---|
| 85 | if (strlen(program)>=5 && strcmp(program+strlen(program)-5,"egrep")==0) egrep=1;
|
---|
| 86 | memset(flags, 0, sizeof(flags));
|
---|
| 87 | pattern = NULL;
|
---|
| 88 |
|
---|
| 89 | /* Process any command line flags. */
|
---|
| 90 | while ((opt = getopt(argc, argv, "e:cilnsv")) != EOF) {
|
---|
| 91 | if (opt == '?')
|
---|
| 92 | exit_status = FAILURE;
|
---|
| 93 | else
|
---|
| 94 | if (opt == 'e')
|
---|
| 95 | pattern = optarg;
|
---|
| 96 | else
|
---|
| 97 | SET_FLAG(opt);
|
---|
| 98 | }
|
---|
| 99 |
|
---|
| 100 | /* Detect a few problems. */
|
---|
| 101 | if ((exit_status == FAILURE) || (optind == argc && pattern == NULL)) {
|
---|
| 102 | fprintf(stderr,"Usage: %s [-cilnsv] [-e] expression [file ...]\n",program);
|
---|
| 103 | exit(FAILURE);
|
---|
| 104 | }
|
---|
| 105 |
|
---|
| 106 | /* Ensure we have a usable pattern. */
|
---|
| 107 | if (pattern == NULL)
|
---|
| 108 | pattern = argv[optind++];
|
---|
| 109 |
|
---|
| 110 | /* Map pattern to lowercase if -i given. */
|
---|
| 111 | if (FLAG('i')) {
|
---|
| 112 | char *p;
|
---|
| 113 | for (p = pattern; *p != '\0'; p++) {
|
---|
| 114 | if (uppercase(*p))
|
---|
| 115 | *p = downcase(*p);
|
---|
| 116 | }
|
---|
| 117 | }
|
---|
| 118 |
|
---|
| 119 | if (!egrep) {
|
---|
| 120 | if ((v8pattern=malloc(2*strlen(pattern)))==(char*)0) {
|
---|
| 121 | fprintf(stderr,"%s: out of memory\n");
|
---|
| 122 | exit(FAILURE);
|
---|
| 123 | }
|
---|
| 124 | tov8(v8pattern,pattern);
|
---|
| 125 | } else v8pattern=pattern;
|
---|
| 126 |
|
---|
| 127 | rerr=(char*)0;
|
---|
| 128 | if ((expression = regcomp(v8pattern)) == NULL) {
|
---|
| 129 | fprintf(stderr,"%s: bad regular expression",program);
|
---|
| 130 | if (rerr) fprintf(stderr," (%s)",rerr);
|
---|
| 131 | fprintf(stderr,"\n");
|
---|
| 132 | exit(FAILURE);
|
---|
| 133 | }
|
---|
| 134 |
|
---|
| 135 | /* Process the files appropriately. */
|
---|
| 136 | if (optind == argc) { /* no file names - find pattern in stdin */
|
---|
| 137 | exit_status = match(stdin, (char *) NULL, "<stdin>");
|
---|
| 138 | }
|
---|
| 139 | else
|
---|
| 140 | if (optind + 1 == argc) { /* one file name - find pattern in it */
|
---|
| 141 | if (strcmp(argv[optind], "-") == 0) {
|
---|
| 142 | exit_status = match(stdin, (char *) NULL, "-");
|
---|
| 143 | } else {
|
---|
| 144 | if ((input = fopen(argv[optind], "r")) == NULL) {
|
---|
| 145 | fprintf(stderr, "%s: couldn't open %s\n",
|
---|
| 146 | program, argv[optind]);
|
---|
| 147 | exit_status = FAILURE;
|
---|
| 148 | }
|
---|
| 149 | else {
|
---|
| 150 | exit_status = match(input, (char *) NULL, argv[optind]);
|
---|
| 151 | }
|
---|
| 152 | }
|
---|
| 153 | }
|
---|
| 154 | else
|
---|
| 155 | while (optind < argc) { /* lots of file names - find pattern in all */
|
---|
| 156 | if (strcmp(argv[optind], "-") == 0) {
|
---|
| 157 | file_status = match(stdin, "-", "-");
|
---|
| 158 | } else {
|
---|
| 159 | if ((input = fopen(argv[optind], "r")) == NULL) {
|
---|
| 160 | fprintf(stderr, "%s: couldn't open %s\n",
|
---|
| 161 | program, argv[optind]);
|
---|
| 162 | exit_status = FAILURE;
|
---|
| 163 | } else {
|
---|
| 164 | file_status = match(input, argv[optind], argv[optind]);
|
---|
| 165 | fclose(input);
|
---|
| 166 | }
|
---|
| 167 | }
|
---|
| 168 | if (exit_status != FAILURE)
|
---|
| 169 | exit_status &= file_status;
|
---|
| 170 | ++optind;
|
---|
| 171 | }
|
---|
| 172 | return(exit_status);
|
---|
| 173 | }
|
---|
| 174 |
|
---|
| 175 |
|
---|
| 176 | /* match - matches the lines of a file with the regular expression.
|
---|
| 177 | * To improve performance when either -s or -l is specified, this
|
---|
| 178 | * function handles those cases specially.
|
---|
| 179 | */
|
---|
| 180 |
|
---|
| 181 | static int match(input, label, filename)
|
---|
| 182 | FILE *input;
|
---|
| 183 | char *label;
|
---|
| 184 | char *filename;
|
---|
| 185 | {
|
---|
| 186 | char *line, *testline; /* pointers to input line */
|
---|
| 187 | long int lineno = 0; /* line number */
|
---|
| 188 | long int matchcount = 0; /* lines matched */
|
---|
| 189 | int status = NO_MATCH; /* summary of what was found in this file */
|
---|
| 190 |
|
---|
| 191 | if (FLAG('s') || FLAG('l')) {
|
---|
| 192 | while ((line = get_line(input)) != NULL) {
|
---|
| 193 | testline = FLAG('i') ? map_nocase(line) : line;
|
---|
| 194 | if (regexec(expression, testline, 1)) {
|
---|
| 195 | status = MATCH;
|
---|
| 196 | break;
|
---|
| 197 | }
|
---|
| 198 | }
|
---|
| 199 | if (FLAG('l'))
|
---|
| 200 | if ((!FLAG('v') && status == MATCH) ||
|
---|
| 201 | ( FLAG('v') && status == NO_MATCH))
|
---|
| 202 | puts(filename);
|
---|
| 203 | return status;
|
---|
| 204 | }
|
---|
| 205 |
|
---|
| 206 | while ((line = get_line(input)) != NULL) {
|
---|
| 207 | ++lineno;
|
---|
| 208 | testline = FLAG('i') ? map_nocase(line) : line;
|
---|
| 209 | if (regexec(expression, testline, 1)) {
|
---|
| 210 | status = MATCH;
|
---|
| 211 | if (!FLAG('v')) {
|
---|
| 212 | if (label != NULL)
|
---|
| 213 | printf("%s:", label);
|
---|
| 214 | if (FLAG('n'))
|
---|
| 215 | printf("%ld:", lineno);
|
---|
| 216 | if (!FLAG('c')) puts(line);
|
---|
| 217 | matchcount++;
|
---|
| 218 | }
|
---|
| 219 | } else {
|
---|
| 220 | if (FLAG('v')) {
|
---|
| 221 | if (label != NULL)
|
---|
| 222 | printf("%s:", label);
|
---|
| 223 | if (FLAG('n'))
|
---|
| 224 | printf("%ld:", lineno);
|
---|
| 225 | if (!FLAG('c')) puts(line);
|
---|
| 226 | matchcount++;
|
---|
| 227 | }
|
---|
| 228 | }
|
---|
| 229 | }
|
---|
| 230 | if (FLAG('c')) printf("%ld\n", matchcount);
|
---|
| 231 | return status;
|
---|
| 232 | }
|
---|
| 233 |
|
---|
| 234 |
|
---|
| 235 | /* get_line - fetch a line from the input file
|
---|
| 236 | * This function reads a line from the input file into a dynamically
|
---|
| 237 | * allocated buffer. If the line is too long for the current buffer,
|
---|
| 238 | * attempts will be made to increase its size to accomodate the line.
|
---|
| 239 | * The trailing newline is stripped before returning to the caller.
|
---|
| 240 | */
|
---|
| 241 |
|
---|
| 242 | #define FIRST_BUFFER (size_t)256 /* first buffer size */
|
---|
| 243 |
|
---|
| 244 | static char *buf = NULL; /* input buffer */
|
---|
| 245 | static size_t buf_size = 0; /* input buffer size */
|
---|
| 246 |
|
---|
| 247 | static char *get_line(input)
|
---|
| 248 | FILE *input;
|
---|
| 249 | {
|
---|
| 250 | int n;
|
---|
| 251 | register char *bp;
|
---|
| 252 | register int c;
|
---|
| 253 | char *new_buf;
|
---|
| 254 | size_t new_size;
|
---|
| 255 |
|
---|
| 256 | if (buf_size == 0) {
|
---|
| 257 | if ((buf = (char *) malloc(FIRST_BUFFER)) == NULL) {
|
---|
| 258 | fprintf(stderr,"%s: not enough memory\n",program);
|
---|
| 259 | exit(FAILURE);
|
---|
| 260 | }
|
---|
| 261 | buf_size = FIRST_BUFFER;
|
---|
| 262 | }
|
---|
| 263 |
|
---|
| 264 | bp = buf;
|
---|
| 265 | n = buf_size;
|
---|
| 266 | while (1) {
|
---|
| 267 | while (--n > 0 && (c = getc(input)) != EOF) {
|
---|
| 268 | if (c == '\n') {
|
---|
| 269 | *bp = '\0';
|
---|
| 270 | return buf;
|
---|
| 271 | }
|
---|
| 272 | *bp++ = c;
|
---|
| 273 | }
|
---|
| 274 | if (c == EOF)
|
---|
| 275 | return (ferror(input) || bp == buf) ? NULL : buf;
|
---|
| 276 | new_size = buf_size << 1;
|
---|
| 277 | if ((new_buf = (char *) realloc(buf, new_size)) == NULL) {
|
---|
| 278 | fprintf(stderr, "%s: line too long - truncated\n", program);
|
---|
| 279 | while ((c = getc(input)) != EOF && c != '\n') ;
|
---|
| 280 | *bp = '\0';
|
---|
| 281 | return buf;
|
---|
| 282 | } else {
|
---|
| 283 | bp = new_buf + (buf_size - 1);
|
---|
| 284 | n = buf_size + 1;
|
---|
| 285 | buf = new_buf;
|
---|
| 286 | buf_size = new_size;
|
---|
| 287 | }
|
---|
| 288 | }
|
---|
| 289 | }
|
---|
| 290 |
|
---|
| 291 |
|
---|
| 292 | /* map_nocase - map a line down to lowercase letters only.
|
---|
| 293 | * bad points: assumes line gotten from get_line.
|
---|
| 294 | * there is more than A-Z you say?
|
---|
| 295 | */
|
---|
| 296 |
|
---|
| 297 | static char *map_nocase(line)
|
---|
| 298 | char *line;
|
---|
| 299 | {
|
---|
| 300 | static char *mapped=(char*)0;
|
---|
| 301 | static size_t map_size = 0;
|
---|
| 302 | char *mp;
|
---|
| 303 |
|
---|
| 304 | if (map_size < buf_size) {
|
---|
| 305 | if ((mapped=realloc(mapped,map_size=buf_size)) == NULL) {
|
---|
| 306 | fprintf(stderr,"%s: not enough memory\n",program);
|
---|
| 307 | exit(FAILURE);
|
---|
| 308 | }
|
---|
| 309 | }
|
---|
| 310 |
|
---|
| 311 | mp = mapped;
|
---|
| 312 | do {
|
---|
| 313 | *mp++ = uppercase(*line) ? downcase(*line) : *line;
|
---|
| 314 | } while (*line++ != '\0');
|
---|
| 315 |
|
---|
| 316 | return mapped;
|
---|
| 317 | }
|
---|
| 318 |
|
---|
| 319 | /* In basic regular expressions, the characters ?, +, |, (, and )
|
---|
| 320 | are taken literally; use the backslashed versions for RE operators.
|
---|
| 321 | In v8 regular expressions, things are the other way round, so
|
---|
| 322 | we have to swap those characters and their backslashed versions.
|
---|
| 323 | */
|
---|
| 324 | static void tov8(char *v8, char *basic)
|
---|
| 325 | {
|
---|
| 326 | while (*basic) switch (*basic)
|
---|
| 327 | {
|
---|
| 328 | case '?':
|
---|
| 329 | case '+':
|
---|
| 330 | case '|':
|
---|
| 331 | case '(':
|
---|
| 332 | case ')':
|
---|
| 333 | {
|
---|
| 334 | *v8++='\\';
|
---|
| 335 | *v8++=*basic++;
|
---|
| 336 | break;
|
---|
| 337 | }
|
---|
| 338 | case '\\':
|
---|
| 339 | {
|
---|
| 340 | switch (*(basic+1))
|
---|
| 341 | {
|
---|
| 342 | case '?':
|
---|
| 343 | case '+':
|
---|
| 344 | case '|':
|
---|
| 345 | case '(':
|
---|
| 346 | case ')':
|
---|
| 347 | {
|
---|
| 348 | *v8++=*++basic;
|
---|
| 349 | ++basic;
|
---|
| 350 | break;
|
---|
| 351 | }
|
---|
| 352 | case '\0':
|
---|
| 353 | {
|
---|
| 354 | *v8++=*basic++;
|
---|
| 355 | break;
|
---|
| 356 | }
|
---|
| 357 | default:
|
---|
| 358 | {
|
---|
| 359 | *v8++=*basic++;
|
---|
| 360 | *v8++=*basic++;
|
---|
| 361 | }
|
---|
| 362 | }
|
---|
| 363 | break;
|
---|
| 364 | }
|
---|
| 365 | default:
|
---|
| 366 | {
|
---|
| 367 | *v8++=*basic++;
|
---|
| 368 | }
|
---|
| 369 | }
|
---|
| 370 | *v8++='\0';
|
---|
| 371 | }
|
---|
| 372 |
|
---|
| 373 | /* Regular expression code calls this routine to print errors. */
|
---|
| 374 |
|
---|
| 375 | void regerror(s)
|
---|
| 376 | const char *s;
|
---|
| 377 | {
|
---|
| 378 | rerr=s;
|
---|
| 379 | }
|
---|