source: trunk/minix/commands/simple/grep.c@ 15

Last change on this file since 15 was 9, checked in by Mattia Monga, 14 years ago

Minix 3.1.2a

File size: 9.4 KB
Line 
1/* grep - search a file for a pattern Author: Norbert Schlenker */
2
3/* Norbert Schlenker (nfs@princeton.edu) 1990-02-08
4 * Released into the public domain.
5 *
6 * Grep searches files for lines containing a pattern, as specified by
7 * a regular expression, and prints those lines. It is invoked by:
8 * grep [flags] [pattern] [file ...]
9 *
10 * Flags:
11 * -e pattern useful when pattern begins with '-'
12 * -c print a count of lines matched
13 * -i ignore case
14 * -l prints just file names, no lines (quietly overrides -n)
15 * -n printed lines are preceded by relative line numbers
16 * -s prints errors only (quietly overrides -l and -n)
17 * -v prints lines which don't contain the pattern
18 *
19 * Semantic note:
20 * If both -l and -v are specified, grep prints the names of those
21 * files which do not contain the pattern *anywhere*.
22 *
23 * Exit:
24 * Grep sets an exit status which can be tested by the caller.
25 * Note that these settings are not necessarily compatible with
26 * any other version of grep, especially when -v is specified.
27 * Possible status values are:
28 * 0 if any matches are found
29 * 1 if no matches are found
30 * 2 if syntax errors are detected or any file cannot be opened
31 */
32
33
34/* External interfaces */
35#include <sys/types.h>
36#include <regexp.h> /* Thanks to Henry Spencer */
37#include <stdlib.h>
38#include <string.h>
39#include <stdio.h>
40#include <unistd.h>
41
42/* Internal constants */
43#define MATCH 0 /* exit code: some match somewhere */
44#define NO_MATCH 1 /* exit code: no match on any line */
45#define FAILURE 2 /* exit code: syntax error or bad file name */
46
47/* Macros */
48#define SET_FLAG(c) (flags[(c)-'a'] = 1)
49#define FLAG(c) (flags[(c)-'a'] != 0)
50
51#define uppercase(c) (((unsigned) ((c) - 'A')) <= ('Z' - 'A'))
52#define downcase(c) ((c) - 'A' + 'a')
53
54/* Private storage */
55static char *program; /* program name */
56static char flags[26]; /* invocation flags */
57static regexp *expression; /* compiled search pattern */
58static const char *rerr; /* error message */
59
60/* External variables. */
61extern int optind;
62extern char *optarg;
63
64/* Internal interfaces */
65_PROTOTYPE(int main, (int argc, char **argv));
66_PROTOTYPE(static int match, (FILE *input, char *label, char *filename));
67_PROTOTYPE(static char *get_line, (FILE *input));
68_PROTOTYPE(static char *map_nocase, (char *line));
69_PROTOTYPE(void regerror , (const char *s ) );
70_PROTOTYPE(static void tov8, (char *v8pattern, char *pattern));
71
72int main(argc, argv)
73int argc;
74char *argv[];
75{
76 int opt; /* option letter from getopt() */
77 int egrep=0; /* using extended regexp operators */
78 char *pattern; /* search pattern */
79 char *v8pattern; /* v8 regexp search pattern */
80 int exit_status = NO_MATCH; /* exit status for our caller */
81 int file_status; /* status of search in one file */
82 FILE *input; /* input file (if not stdin) */
83
84 program = argv[0];
85 if (strlen(program)>=5 && strcmp(program+strlen(program)-5,"egrep")==0) egrep=1;
86 memset(flags, 0, sizeof(flags));
87 pattern = NULL;
88
89/* Process any command line flags. */
90 while ((opt = getopt(argc, argv, "e:cilnsv")) != EOF) {
91 if (opt == '?')
92 exit_status = FAILURE;
93 else
94 if (opt == 'e')
95 pattern = optarg;
96 else
97 SET_FLAG(opt);
98 }
99
100/* Detect a few problems. */
101 if ((exit_status == FAILURE) || (optind == argc && pattern == NULL)) {
102 fprintf(stderr,"Usage: %s [-cilnsv] [-e] expression [file ...]\n",program);
103 exit(FAILURE);
104 }
105
106/* Ensure we have a usable pattern. */
107 if (pattern == NULL)
108 pattern = argv[optind++];
109
110/* Map pattern to lowercase if -i given. */
111 if (FLAG('i')) {
112 char *p;
113 for (p = pattern; *p != '\0'; p++) {
114 if (uppercase(*p))
115 *p = downcase(*p);
116 }
117 }
118
119 if (!egrep) {
120 if ((v8pattern=malloc(2*strlen(pattern)))==(char*)0) {
121 fprintf(stderr,"%s: out of memory\n");
122 exit(FAILURE);
123 }
124 tov8(v8pattern,pattern);
125 } else v8pattern=pattern;
126
127 rerr=(char*)0;
128 if ((expression = regcomp(v8pattern)) == NULL) {
129 fprintf(stderr,"%s: bad regular expression",program);
130 if (rerr) fprintf(stderr," (%s)",rerr);
131 fprintf(stderr,"\n");
132 exit(FAILURE);
133 }
134
135/* Process the files appropriately. */
136 if (optind == argc) { /* no file names - find pattern in stdin */
137 exit_status = match(stdin, (char *) NULL, "<stdin>");
138 }
139 else
140 if (optind + 1 == argc) { /* one file name - find pattern in it */
141 if (strcmp(argv[optind], "-") == 0) {
142 exit_status = match(stdin, (char *) NULL, "-");
143 } else {
144 if ((input = fopen(argv[optind], "r")) == NULL) {
145 fprintf(stderr, "%s: couldn't open %s\n",
146 program, argv[optind]);
147 exit_status = FAILURE;
148 }
149 else {
150 exit_status = match(input, (char *) NULL, argv[optind]);
151 }
152 }
153 }
154 else
155 while (optind < argc) { /* lots of file names - find pattern in all */
156 if (strcmp(argv[optind], "-") == 0) {
157 file_status = match(stdin, "-", "-");
158 } else {
159 if ((input = fopen(argv[optind], "r")) == NULL) {
160 fprintf(stderr, "%s: couldn't open %s\n",
161 program, argv[optind]);
162 exit_status = FAILURE;
163 } else {
164 file_status = match(input, argv[optind], argv[optind]);
165 fclose(input);
166 }
167 }
168 if (exit_status != FAILURE)
169 exit_status &= file_status;
170 ++optind;
171 }
172 return(exit_status);
173}
174
175
176/* match - matches the lines of a file with the regular expression.
177 * To improve performance when either -s or -l is specified, this
178 * function handles those cases specially.
179 */
180
181static int match(input, label, filename)
182FILE *input;
183char *label;
184char *filename;
185{
186 char *line, *testline; /* pointers to input line */
187 long int lineno = 0; /* line number */
188 long int matchcount = 0; /* lines matched */
189 int status = NO_MATCH; /* summary of what was found in this file */
190
191 if (FLAG('s') || FLAG('l')) {
192 while ((line = get_line(input)) != NULL) {
193 testline = FLAG('i') ? map_nocase(line) : line;
194 if (regexec(expression, testline, 1)) {
195 status = MATCH;
196 break;
197 }
198 }
199 if (FLAG('l'))
200 if ((!FLAG('v') && status == MATCH) ||
201 ( FLAG('v') && status == NO_MATCH))
202 puts(filename);
203 return status;
204 }
205
206 while ((line = get_line(input)) != NULL) {
207 ++lineno;
208 testline = FLAG('i') ? map_nocase(line) : line;
209 if (regexec(expression, testline, 1)) {
210 status = MATCH;
211 if (!FLAG('v')) {
212 if (label != NULL)
213 printf("%s:", label);
214 if (FLAG('n'))
215 printf("%ld:", lineno);
216 if (!FLAG('c')) puts(line);
217 matchcount++;
218 }
219 } else {
220 if (FLAG('v')) {
221 if (label != NULL)
222 printf("%s:", label);
223 if (FLAG('n'))
224 printf("%ld:", lineno);
225 if (!FLAG('c')) puts(line);
226 matchcount++;
227 }
228 }
229 }
230 if (FLAG('c')) printf("%ld\n", matchcount);
231 return status;
232}
233
234
235/* get_line - fetch a line from the input file
236 * This function reads a line from the input file into a dynamically
237 * allocated buffer. If the line is too long for the current buffer,
238 * attempts will be made to increase its size to accomodate the line.
239 * The trailing newline is stripped before returning to the caller.
240 */
241
242#define FIRST_BUFFER (size_t)256 /* first buffer size */
243
244static char *buf = NULL; /* input buffer */
245static size_t buf_size = 0; /* input buffer size */
246
247static char *get_line(input)
248FILE *input;
249{
250 int n;
251 register char *bp;
252 register int c;
253 char *new_buf;
254 size_t new_size;
255
256 if (buf_size == 0) {
257 if ((buf = (char *) malloc(FIRST_BUFFER)) == NULL) {
258 fprintf(stderr,"%s: not enough memory\n",program);
259 exit(FAILURE);
260 }
261 buf_size = FIRST_BUFFER;
262 }
263
264 bp = buf;
265 n = buf_size;
266 while (1) {
267 while (--n > 0 && (c = getc(input)) != EOF) {
268 if (c == '\n') {
269 *bp = '\0';
270 return buf;
271 }
272 *bp++ = c;
273 }
274 if (c == EOF)
275 return (ferror(input) || bp == buf) ? NULL : buf;
276 new_size = buf_size << 1;
277 if ((new_buf = (char *) realloc(buf, new_size)) == NULL) {
278 fprintf(stderr, "%s: line too long - truncated\n", program);
279 while ((c = getc(input)) != EOF && c != '\n') ;
280 *bp = '\0';
281 return buf;
282 } else {
283 bp = new_buf + (buf_size - 1);
284 n = buf_size + 1;
285 buf = new_buf;
286 buf_size = new_size;
287 }
288 }
289}
290
291
292/* map_nocase - map a line down to lowercase letters only.
293 * bad points: assumes line gotten from get_line.
294 * there is more than A-Z you say?
295 */
296
297static char *map_nocase(line)
298char *line;
299{
300 static char *mapped=(char*)0;
301 static size_t map_size = 0;
302 char *mp;
303
304 if (map_size < buf_size) {
305 if ((mapped=realloc(mapped,map_size=buf_size)) == NULL) {
306 fprintf(stderr,"%s: not enough memory\n",program);
307 exit(FAILURE);
308 }
309 }
310
311 mp = mapped;
312 do {
313 *mp++ = uppercase(*line) ? downcase(*line) : *line;
314 } while (*line++ != '\0');
315
316 return mapped;
317}
318
319/* In basic regular expressions, the characters ?, +, |, (, and )
320 are taken literally; use the backslashed versions for RE operators.
321 In v8 regular expressions, things are the other way round, so
322 we have to swap those characters and their backslashed versions.
323*/
324static void tov8(char *v8, char *basic)
325{
326 while (*basic) switch (*basic)
327 {
328 case '?':
329 case '+':
330 case '|':
331 case '(':
332 case ')':
333 {
334 *v8++='\\';
335 *v8++=*basic++;
336 break;
337 }
338 case '\\':
339 {
340 switch (*(basic+1))
341 {
342 case '?':
343 case '+':
344 case '|':
345 case '(':
346 case ')':
347 {
348 *v8++=*++basic;
349 ++basic;
350 break;
351 }
352 case '\0':
353 {
354 *v8++=*basic++;
355 break;
356 }
357 default:
358 {
359 *v8++=*basic++;
360 *v8++=*basic++;
361 }
362 }
363 break;
364 }
365 default:
366 {
367 *v8++=*basic++;
368 }
369 }
370 *v8++='\0';
371}
372
373/* Regular expression code calls this routine to print errors. */
374
375void regerror(s)
376const char *s;
377{
378 rerr=s;
379}
Note: See TracBrowser for help on using the repository browser.