1 | /* grep - search a file for a pattern Author: Norbert Schlenker */
|
---|
2 |
|
---|
3 | /* Norbert Schlenker (nfs@princeton.edu) 1990-02-08
|
---|
4 | * Released into the public domain.
|
---|
5 | *
|
---|
6 | * Grep searches files for lines containing a pattern, as specified by
|
---|
7 | * a regular expression, and prints those lines. It is invoked by:
|
---|
8 | * grep [flags] [pattern] [file ...]
|
---|
9 | *
|
---|
10 | * Flags:
|
---|
11 | * -e pattern useful when pattern begins with '-'
|
---|
12 | * -c print a count of lines matched
|
---|
13 | * -i ignore case
|
---|
14 | * -l prints just file names, no lines (quietly overrides -n)
|
---|
15 | * -n printed lines are preceded by relative line numbers
|
---|
16 | * -s prints errors only (quietly overrides -l and -n)
|
---|
17 | * -v prints lines which don't contain the pattern
|
---|
18 | *
|
---|
19 | * Semantic note:
|
---|
20 | * If both -l and -v are specified, grep prints the names of those
|
---|
21 | * files which do not contain the pattern *anywhere*.
|
---|
22 | *
|
---|
23 | * Exit:
|
---|
24 | * Grep sets an exit status which can be tested by the caller.
|
---|
25 | * Note that these settings are not necessarily compatible with
|
---|
26 | * any other version of grep, especially when -v is specified.
|
---|
27 | * Possible status values are:
|
---|
28 | * 0 if any matches are found
|
---|
29 | * 1 if no matches are found
|
---|
30 | * 2 if syntax errors are detected or any file cannot be opened
|
---|
31 | */
|
---|
32 |
|
---|
33 |
|
---|
34 | /* External interfaces */
|
---|
35 | #include <sys/types.h>
|
---|
36 | #include <regexp.h> /* Thanks to Henry Spencer */
|
---|
37 | #include <stdlib.h>
|
---|
38 | #include <string.h>
|
---|
39 | #include <stdio.h>
|
---|
40 | #include <unistd.h>
|
---|
41 |
|
---|
42 | /* Internal constants */
|
---|
43 | #define MATCH 0 /* exit code: some match somewhere */
|
---|
44 | #define NO_MATCH 1 /* exit code: no match on any line */
|
---|
45 | #define FAILURE 2 /* exit code: syntax error or bad file name */
|
---|
46 |
|
---|
47 | /* Macros */
|
---|
48 | #define SET_FLAG(c) (flags[(c)-'a'] = 1)
|
---|
49 | #define FLAG(c) (flags[(c)-'a'] != 0)
|
---|
50 |
|
---|
51 | #define uppercase(c) (((unsigned) ((c) - 'A')) <= ('Z' - 'A'))
|
---|
52 | #define downcase(c) ((c) - 'A' + 'a')
|
---|
53 |
|
---|
54 | /* Private storage */
|
---|
55 | static char *program; /* program name */
|
---|
56 | static char flags[26]; /* invocation flags */
|
---|
57 | static regexp *expression; /* compiled search pattern */
|
---|
58 | static const char *rerr; /* error message */
|
---|
59 |
|
---|
60 | /* External variables. */
|
---|
61 | extern int optind;
|
---|
62 | extern char *optarg;
|
---|
63 |
|
---|
64 | /* Internal interfaces */
|
---|
65 | _PROTOTYPE(int main, (int argc, char **argv));
|
---|
66 | _PROTOTYPE(static int match, (FILE *input, char *label, char *filename));
|
---|
67 | _PROTOTYPE(static char *get_line, (FILE *input));
|
---|
68 | _PROTOTYPE(static char *map_nocase, (char *line));
|
---|
69 | _PROTOTYPE(void regerror , (const char *s ) );
|
---|
70 | _PROTOTYPE(static void tov8, (char *v8pattern, char *pattern));
|
---|
71 |
|
---|
72 | int main(argc, argv)
|
---|
73 | int argc;
|
---|
74 | char *argv[];
|
---|
75 | {
|
---|
76 | int opt; /* option letter from getopt() */
|
---|
77 | int egrep=0; /* using extended regexp operators */
|
---|
78 | char *pattern; /* search pattern */
|
---|
79 | char *v8pattern; /* v8 regexp search pattern */
|
---|
80 | int exit_status = NO_MATCH; /* exit status for our caller */
|
---|
81 | int file_status; /* status of search in one file */
|
---|
82 | FILE *input; /* input file (if not stdin) */
|
---|
83 |
|
---|
84 | program = argv[0];
|
---|
85 | if (strlen(program)>=5 && strcmp(program+strlen(program)-5,"egrep")==0) egrep=1;
|
---|
86 | memset(flags, 0, sizeof(flags));
|
---|
87 | pattern = NULL;
|
---|
88 |
|
---|
89 | /* Process any command line flags. */
|
---|
90 | while ((opt = getopt(argc, argv, "e:cilnsv")) != EOF) {
|
---|
91 | if (opt == '?')
|
---|
92 | exit_status = FAILURE;
|
---|
93 | else
|
---|
94 | if (opt == 'e')
|
---|
95 | pattern = optarg;
|
---|
96 | else
|
---|
97 | SET_FLAG(opt);
|
---|
98 | }
|
---|
99 |
|
---|
100 | /* Detect a few problems. */
|
---|
101 | if ((exit_status == FAILURE) || (optind == argc && pattern == NULL)) {
|
---|
102 | fprintf(stderr,"Usage: %s [-cilnsv] [-e] expression [file ...]\n",program);
|
---|
103 | exit(FAILURE);
|
---|
104 | }
|
---|
105 |
|
---|
106 | /* Ensure we have a usable pattern. */
|
---|
107 | if (pattern == NULL)
|
---|
108 | pattern = argv[optind++];
|
---|
109 |
|
---|
110 | /* Map pattern to lowercase if -i given. */
|
---|
111 | if (FLAG('i')) {
|
---|
112 | char *p;
|
---|
113 | for (p = pattern; *p != '\0'; p++) {
|
---|
114 | if (uppercase(*p))
|
---|
115 | *p = downcase(*p);
|
---|
116 | }
|
---|
117 | }
|
---|
118 |
|
---|
119 | if (!egrep) {
|
---|
120 | if ((v8pattern=malloc(2*strlen(pattern)))==(char*)0) {
|
---|
121 | fprintf(stderr,"%s: out of memory\n");
|
---|
122 | exit(FAILURE);
|
---|
123 | }
|
---|
124 | tov8(v8pattern,pattern);
|
---|
125 | } else v8pattern=pattern;
|
---|
126 |
|
---|
127 | rerr=(char*)0;
|
---|
128 | if ((expression = regcomp(v8pattern)) == NULL) {
|
---|
129 | fprintf(stderr,"%s: bad regular expression",program);
|
---|
130 | if (rerr) fprintf(stderr," (%s)",rerr);
|
---|
131 | fprintf(stderr,"\n");
|
---|
132 | exit(FAILURE);
|
---|
133 | }
|
---|
134 |
|
---|
135 | /* Process the files appropriately. */
|
---|
136 | if (optind == argc) { /* no file names - find pattern in stdin */
|
---|
137 | exit_status = match(stdin, (char *) NULL, "<stdin>");
|
---|
138 | }
|
---|
139 | else
|
---|
140 | if (optind + 1 == argc) { /* one file name - find pattern in it */
|
---|
141 | if (strcmp(argv[optind], "-") == 0) {
|
---|
142 | exit_status = match(stdin, (char *) NULL, "-");
|
---|
143 | } else {
|
---|
144 | if ((input = fopen(argv[optind], "r")) == NULL) {
|
---|
145 | fprintf(stderr, "%s: couldn't open %s\n",
|
---|
146 | program, argv[optind]);
|
---|
147 | exit_status = FAILURE;
|
---|
148 | }
|
---|
149 | else {
|
---|
150 | exit_status = match(input, (char *) NULL, argv[optind]);
|
---|
151 | }
|
---|
152 | }
|
---|
153 | }
|
---|
154 | else
|
---|
155 | while (optind < argc) { /* lots of file names - find pattern in all */
|
---|
156 | if (strcmp(argv[optind], "-") == 0) {
|
---|
157 | file_status = match(stdin, "-", "-");
|
---|
158 | } else {
|
---|
159 | if ((input = fopen(argv[optind], "r")) == NULL) {
|
---|
160 | fprintf(stderr, "%s: couldn't open %s\n",
|
---|
161 | program, argv[optind]);
|
---|
162 | exit_status = FAILURE;
|
---|
163 | } else {
|
---|
164 | file_status = match(input, argv[optind], argv[optind]);
|
---|
165 | fclose(input);
|
---|
166 | }
|
---|
167 | }
|
---|
168 | if (exit_status != FAILURE)
|
---|
169 | exit_status &= file_status;
|
---|
170 | ++optind;
|
---|
171 | }
|
---|
172 | return(exit_status);
|
---|
173 | }
|
---|
174 |
|
---|
175 |
|
---|
176 | /* match - matches the lines of a file with the regular expression.
|
---|
177 | * To improve performance when either -s or -l is specified, this
|
---|
178 | * function handles those cases specially.
|
---|
179 | */
|
---|
180 |
|
---|
181 | static int match(input, label, filename)
|
---|
182 | FILE *input;
|
---|
183 | char *label;
|
---|
184 | char *filename;
|
---|
185 | {
|
---|
186 | char *line, *testline; /* pointers to input line */
|
---|
187 | long int lineno = 0; /* line number */
|
---|
188 | long int matchcount = 0; /* lines matched */
|
---|
189 | int status = NO_MATCH; /* summary of what was found in this file */
|
---|
190 |
|
---|
191 | if (FLAG('s') || FLAG('l')) {
|
---|
192 | while ((line = get_line(input)) != NULL) {
|
---|
193 | testline = FLAG('i') ? map_nocase(line) : line;
|
---|
194 | if (regexec(expression, testline, 1)) {
|
---|
195 | status = MATCH;
|
---|
196 | break;
|
---|
197 | }
|
---|
198 | }
|
---|
199 | if (FLAG('l'))
|
---|
200 | if ((!FLAG('v') && status == MATCH) ||
|
---|
201 | ( FLAG('v') && status == NO_MATCH))
|
---|
202 | puts(filename);
|
---|
203 | return status;
|
---|
204 | }
|
---|
205 |
|
---|
206 | while ((line = get_line(input)) != NULL) {
|
---|
207 | ++lineno;
|
---|
208 | testline = FLAG('i') ? map_nocase(line) : line;
|
---|
209 | if (regexec(expression, testline, 1)) {
|
---|
210 | status = MATCH;
|
---|
211 | if (!FLAG('v')) {
|
---|
212 | if (label != NULL)
|
---|
213 | printf("%s:", label);
|
---|
214 | if (FLAG('n'))
|
---|
215 | printf("%ld:", lineno);
|
---|
216 | if (!FLAG('c')) puts(line);
|
---|
217 | matchcount++;
|
---|
218 | }
|
---|
219 | } else {
|
---|
220 | if (FLAG('v')) {
|
---|
221 | if (label != NULL)
|
---|
222 | printf("%s:", label);
|
---|
223 | if (FLAG('n'))
|
---|
224 | printf("%ld:", lineno);
|
---|
225 | if (!FLAG('c')) puts(line);
|
---|
226 | matchcount++;
|
---|
227 | }
|
---|
228 | }
|
---|
229 | }
|
---|
230 | if (FLAG('c')) printf("%ld\n", matchcount);
|
---|
231 | return status;
|
---|
232 | }
|
---|
233 |
|
---|
234 |
|
---|
235 | /* get_line - fetch a line from the input file
|
---|
236 | * This function reads a line from the input file into a dynamically
|
---|
237 | * allocated buffer. If the line is too long for the current buffer,
|
---|
238 | * attempts will be made to increase its size to accomodate the line.
|
---|
239 | * The trailing newline is stripped before returning to the caller.
|
---|
240 | */
|
---|
241 |
|
---|
242 | #define FIRST_BUFFER (size_t)256 /* first buffer size */
|
---|
243 |
|
---|
244 | static char *buf = NULL; /* input buffer */
|
---|
245 | static size_t buf_size = 0; /* input buffer size */
|
---|
246 |
|
---|
247 | static char *get_line(input)
|
---|
248 | FILE *input;
|
---|
249 | {
|
---|
250 | int n;
|
---|
251 | register char *bp;
|
---|
252 | register int c;
|
---|
253 | char *new_buf;
|
---|
254 | size_t new_size;
|
---|
255 |
|
---|
256 | if (buf_size == 0) {
|
---|
257 | if ((buf = (char *) malloc(FIRST_BUFFER)) == NULL) {
|
---|
258 | fprintf(stderr,"%s: not enough memory\n",program);
|
---|
259 | exit(FAILURE);
|
---|
260 | }
|
---|
261 | buf_size = FIRST_BUFFER;
|
---|
262 | }
|
---|
263 |
|
---|
264 | bp = buf;
|
---|
265 | n = buf_size;
|
---|
266 | while (1) {
|
---|
267 | while (--n > 0 && (c = getc(input)) != EOF) {
|
---|
268 | if (c == '\n') {
|
---|
269 | *bp = '\0';
|
---|
270 | return buf;
|
---|
271 | }
|
---|
272 | *bp++ = c;
|
---|
273 | }
|
---|
274 | if (c == EOF)
|
---|
275 | return (ferror(input) || bp == buf) ? NULL : buf;
|
---|
276 | new_size = buf_size << 1;
|
---|
277 | if ((new_buf = (char *) realloc(buf, new_size)) == NULL) {
|
---|
278 | fprintf(stderr, "%s: line too long - truncated\n", program);
|
---|
279 | while ((c = getc(input)) != EOF && c != '\n') ;
|
---|
280 | *bp = '\0';
|
---|
281 | return buf;
|
---|
282 | } else {
|
---|
283 | bp = new_buf + (buf_size - 1);
|
---|
284 | n = buf_size + 1;
|
---|
285 | buf = new_buf;
|
---|
286 | buf_size = new_size;
|
---|
287 | }
|
---|
288 | }
|
---|
289 | }
|
---|
290 |
|
---|
291 |
|
---|
292 | /* map_nocase - map a line down to lowercase letters only.
|
---|
293 | * bad points: assumes line gotten from get_line.
|
---|
294 | * there is more than A-Z you say?
|
---|
295 | */
|
---|
296 |
|
---|
297 | static char *map_nocase(line)
|
---|
298 | char *line;
|
---|
299 | {
|
---|
300 | static char *mapped=(char*)0;
|
---|
301 | static size_t map_size = 0;
|
---|
302 | char *mp;
|
---|
303 |
|
---|
304 | if (map_size < buf_size) {
|
---|
305 | if ((mapped=realloc(mapped,map_size=buf_size)) == NULL) {
|
---|
306 | fprintf(stderr,"%s: not enough memory\n",program);
|
---|
307 | exit(FAILURE);
|
---|
308 | }
|
---|
309 | }
|
---|
310 |
|
---|
311 | mp = mapped;
|
---|
312 | do {
|
---|
313 | *mp++ = uppercase(*line) ? downcase(*line) : *line;
|
---|
314 | } while (*line++ != '\0');
|
---|
315 |
|
---|
316 | return mapped;
|
---|
317 | }
|
---|
318 |
|
---|
319 | /* In basic regular expressions, the characters ?, +, |, (, and )
|
---|
320 | are taken literally; use the backslashed versions for RE operators.
|
---|
321 | In v8 regular expressions, things are the other way round, so
|
---|
322 | we have to swap those characters and their backslashed versions.
|
---|
323 | */
|
---|
324 | static void tov8(char *v8, char *basic)
|
---|
325 | {
|
---|
326 | while (*basic) switch (*basic)
|
---|
327 | {
|
---|
328 | case '?':
|
---|
329 | case '+':
|
---|
330 | case '|':
|
---|
331 | case '(':
|
---|
332 | case ')':
|
---|
333 | {
|
---|
334 | *v8++='\\';
|
---|
335 | *v8++=*basic++;
|
---|
336 | break;
|
---|
337 | }
|
---|
338 | case '\\':
|
---|
339 | {
|
---|
340 | switch (*(basic+1))
|
---|
341 | {
|
---|
342 | case '?':
|
---|
343 | case '+':
|
---|
344 | case '|':
|
---|
345 | case '(':
|
---|
346 | case ')':
|
---|
347 | {
|
---|
348 | *v8++=*++basic;
|
---|
349 | ++basic;
|
---|
350 | break;
|
---|
351 | }
|
---|
352 | case '\0':
|
---|
353 | {
|
---|
354 | *v8++=*basic++;
|
---|
355 | break;
|
---|
356 | }
|
---|
357 | default:
|
---|
358 | {
|
---|
359 | *v8++=*basic++;
|
---|
360 | *v8++=*basic++;
|
---|
361 | }
|
---|
362 | }
|
---|
363 | break;
|
---|
364 | }
|
---|
365 | default:
|
---|
366 | {
|
---|
367 | *v8++=*basic++;
|
---|
368 | }
|
---|
369 | }
|
---|
370 | *v8++='\0';
|
---|
371 | }
|
---|
372 |
|
---|
373 | /* Regular expression code calls this routine to print errors. */
|
---|
374 |
|
---|
375 | void regerror(s)
|
---|
376 | const char *s;
|
---|
377 | {
|
---|
378 | rerr=s;
|
---|
379 | }
|
---|