source: trunk/minix/commands/elvis/ctags.c@ 9

Last change on this file since 9 was 9, checked in by Mattia Monga, 13 years ago

Minix 3.1.2a

File size: 18.0 KB
Line 
1/* ctags.c */
2
3/* This is a reimplementation of the ctags(1) program. It supports ANSI C,
4 * and has heaps o' flags. It is meant to be distributed with elvis.
5 */
6
7#include <stdio.h>
8#include "config.h"
9#ifndef FALSE
10# define FALSE 0
11# define TRUE 1
12#endif
13#ifndef TAGS
14# define TAGS "tags"
15#endif
16#ifndef REFS
17# define REFS "refs"
18#endif
19#ifndef BLKSIZE
20# define BLKSIZE 1024
21#endif
22
23#include "ctype.c" /* yes, that really is the .c file, not the .h one. */
24
25/* -------------------------------------------------------------------------- */
26/* Some global variables */
27
28/* The following boolean variables are set according to command line flags */
29int incl_static; /* -s include static tags */
30int incl_types; /* -t include typedefs and structs */
31int incl_vars; /* -v include variables */
32int make_refs; /* -r generate a "refs" file */
33int append_files; /* -a append to "tags" [and "refs"] files */
34
35/* The following are used for outputting to the "tags" and "refs" files */
36FILE *tags; /* used for writing to the "tags" file */
37FILE *refs; /* used for writing to the "refs" file */
38
39/* -------------------------------------------------------------------------- */
40/* These are used for reading a source file. It keeps track of line numbers */
41char *file_name; /* name of the current file */
42FILE *file_fp; /* stream used for reading the file */
43long file_lnum; /* line number in the current file */
44long file_seek; /* fseek() offset to the start of current line */
45int file_afternl; /* boolean: was previous character a newline? */
46int file_prevch; /* a single character that was ungotten */
47int file_header; /* boolean: is the current file a header file? */
48
49/* This function opens a file, and resets the line counter. If it fails, it
50 * it will display an error message and leave the file_fp set to NULL.
51 */
52void file_open(name)
53 char *name; /* name of file to be opened */
54{
55 /* if another file was already open, then close it */
56 if (file_fp)
57 {
58 fclose(file_fp);
59 }
60
61 /* try to open the file for reading. The file must be opened in
62 * "binary" mode because otherwise fseek() would misbehave under DOS.
63 */
64#if MSDOS || TOS
65 file_fp = fopen(name, "rb");
66#else
67 file_fp = fopen(name, "r");
68#endif
69 if (!file_fp)
70 {
71 perror(name);
72 }
73
74 /* reset the name & line number */
75 file_name = name;
76 file_lnum = 0L;
77 file_seek = 0L;
78 file_afternl = TRUE;
79
80 /* determine whether this is a header file */
81 file_header = FALSE;
82 name += strlen(name) - 2;
83 if (name >= file_name && name[0] == '.' && (name[1] == 'h' || name[1] == 'H'))
84 {
85 file_header = TRUE;
86 }
87}
88
89/* This function reads a single character from the stream. If the *previous*
90 * character was a newline, then it also increments file_lnum and sets
91 * file_offset.
92 */
93int file_getc()
94{
95 int ch;
96
97 /* if there is an ungotten character, then return it. Don't do any
98 * other processing on it, though, because we already did that the
99 * first time it was read.
100 */
101 if (file_prevch)
102 {
103 ch = file_prevch;
104 file_prevch = 0;
105 return ch;
106 }
107
108 /* if previous character was a newline, then we're starting a line */
109 if (file_afternl)
110 {
111 file_afternl = FALSE;
112 file_seek = ftell(file_fp);
113 file_lnum++;
114 }
115
116 /* Get a character. If no file is open, then return EOF */
117 ch = (file_fp ? getc(file_fp) : EOF);
118
119 /* if it is a newline, then remember that fact */
120 if (ch == '\n')
121 {
122 file_afternl = TRUE;
123 }
124
125 /* return the character */
126 return ch;
127}
128
129/* This function ungets a character from the current source file */
130void file_ungetc(ch)
131 int ch; /* character to be ungotten */
132{
133 file_prevch = ch;
134}
135
136/* This function copies the current line out some other fp. It has no effect
137 * on the file_getc() function. During copying, any '\' characters are doubled
138 * and a leading '^' or trailing '$' is also quoted. The newline character is
139 * not copied.
140 *
141 * This is meant to be used when generating a tag line.
142 */
143void file_copyline(seek, fp)
144 long seek; /* where the lines starts in the source file */
145 FILE *fp; /* the output stream to copy it to */
146{
147 long oldseek;/* where the file's pointer was before we messed it up */
148 char ch; /* a single character from the file */
149 char next; /* the next character from this file */
150
151 /* go to the start of the line */
152 oldseek = ftell(file_fp);
153 fseek(file_fp, seek, 0);
154
155 /* if first character is '^', then emit \^ */
156 ch = getc(file_fp);
157 if (ch == '^')
158 {
159 putc('\\', fp);
160 putc('^', fp);
161 ch = getc(file_fp);
162 }
163
164 /* write everything up to, but not including, the newline */
165 while (ch != '\n')
166 {
167 /* preread the next character from this file */
168 next = getc(file_fp);
169
170 /* if character is '\', or a terminal '$', then quote it */
171 if (ch == '\\' || (ch == '$' && next == '\n'))
172 {
173 putc('\\', fp);
174 }
175 putc(ch, fp);
176
177 /* next character... */
178 ch = next;
179 }
180
181 /* seek back to the old position */
182 fseek(file_fp, oldseek, 0);
183}
184
185/* -------------------------------------------------------------------------- */
186/* This section handles preprocessor directives. It strips out all of the
187 * directives, and may emit a tag for #define directives.
188 */
189
190int cpp_afternl; /* boolean: look for '#' character? */
191int cpp_prevch; /* an ungotten character, if any */
192int cpp_refsok; /* boolean: can we echo characters out to "refs"? */
193
194/* This function opens the file & resets variables */
195void cpp_open(name)
196 char *name; /* name of source file to be opened */
197{
198 /* use the lower-level file_open function to open the file */
199 file_open(name);
200
201 /* reset variables */
202 cpp_afternl = TRUE;
203 cpp_refsok = TRUE;
204}
205
206/* This function copies a character from the source file to the "refs" file */
207void cpp_echo(ch)
208 int ch; /* the character to copy */
209{
210 static wasnl;
211
212 /* echo non-EOF chars, unless not making "ref", or echo turned off */
213 if (ch != EOF && make_refs && cpp_refsok && !file_header)
214 {
215 /* try to avoid blank lines */
216 if (ch == '\n')
217 {
218 if (wasnl)
219 {
220 return;
221 }
222 wasnl = TRUE;
223 }
224 else
225 {
226 wasnl = FALSE;
227 }
228
229 /* add the character */
230 putc(ch, refs);
231 }
232}
233
234/* This function returns the next character which isn't part of a directive */
235int cpp_getc()
236{
237 static
238 int ch; /* the next input character */
239 char *scan;
240
241 /* if we have an ungotten character, then return it */
242 if (cpp_prevch)
243 {
244 ch = cpp_prevch;
245 cpp_prevch = 0;
246 return ch;
247 }
248
249 /* Get a character from the file. Return it if not special '#' */
250 ch = file_getc();
251 if (ch == '\n')
252 {
253 cpp_afternl = TRUE;
254 cpp_echo(ch);
255 return ch;
256 }
257 else if (ch != '#' || !cpp_afternl)
258 {
259 /* normal character. Any non-whitespace should turn off afternl */
260 if (ch != ' ' && ch != '\t')
261 {
262 cpp_afternl = FALSE;
263 }
264 cpp_echo(ch);
265 return ch;
266 }
267
268 /* Yikes! We found a directive */
269
270 /* see whether this is a #define line */
271 scan = " define ";
272 while (*scan)
273 {
274 if (*scan == ' ')
275 {
276 /* space character matches any whitespace */
277 do
278 {
279 ch = file_getc();
280 } while (ch == ' ' || ch == '\t');
281 file_ungetc(ch);
282 }
283 else
284 {
285 /* other characters should match exactly */
286 ch = file_getc();
287 if (ch != *scan)
288 {
289 file_ungetc(ch);
290 break;
291 }
292 }
293 scan++;
294 }
295
296 /* is this a #define line? and should we generate a tag for it? */
297 if (!*scan && (file_header || incl_static))
298 {
299 /* if not a header, then this will be a static tag */
300 if (!file_header)
301 {
302 fputs(file_name, tags);
303 putc(':', tags);
304 }
305
306 /* output the tag name */
307 for (ch = file_getc(); isalnum(ch) || ch == '_'; ch = file_getc())
308 {
309 putc(ch, tags);
310 }
311
312 /* output a tab, the filename, another tab, and the line number */
313 fprintf(tags, "\t%s\t%ld\n", file_name, file_lnum);
314 }
315
316 /* skip to the end of the directive -- a newline that isn't preceded
317 * by a '\' character.
318 */
319 while (ch != EOF && ch != '\n')
320 {
321 if (ch == '\\')
322 {
323 ch = file_getc();
324 }
325 ch = file_getc();
326 }
327
328 /* return the newline that we found at the end of the directive */
329 cpp_echo(ch);
330 return ch;
331}
332
333/* This puts a character back into the input queue for the source file */
334cpp_ungetc(ch)
335 int ch; /* a character to be ungotten */
336{
337 cpp_prevch = ch;
338}
339
340
341/* -------------------------------------------------------------------------- */
342/* This is the lexical analyser. It gets characters from the preprocessor,
343 * and gives tokens to the parser. Some special codes are...
344 * (deleted) /*...* / (comments)
345 * (deleted) //...\n (comments)
346 * (deleted) (* (parens used in complex declaration)
347 * (deleted) [...] (array subscript, when ... contains no ])
348 * (deleted) struct (intro to structure declaration)
349 * BODY {...} ('{' can occur anywhere, '}' only at BOW if ... has '{')
350 * ARGS (...{ (args of function, not extern or forward)
351 * ARGS (...); (args of an extern/forward function declaration)
352 * COMMA , (separate declarations that have same scope)
353 * SEMICOLON ; (separate declarations that have different scope)
354 * SEMICOLON =...; (initializer)
355 * TYPEDEF typedef (the "typedef" keyword)
356 * STATIC static (the "static" keyword)
357 * STATIC private (the "static" keyword)
358 * STATIC PRIVATE (the "static" keyword)
359 * NAME [a-z]+ (really any valid name that isn't reserved word)
360 */
361
362/* #define EOF -1 */
363#define DELETED 0
364#define BODY 1
365#define ARGS 2
366#define COMMA 3
367#define SEMICOLON 4
368#define TYPEDEF 5
369#define STATIC 6
370#define EXTERN 7
371#define NAME 8
372
373char lex_name[BLKSIZE]; /* the name of a "NAME" token */
374long lex_seek; /* start of line that contains lex_name */
375
376lex_gettoken()
377{
378 int ch; /* a character from the preprocessor */
379 int next; /* the next character */
380 int token; /* the token that we'll return */
381 int i;
382
383 /* loop until we get a token that isn't "DELETED" */
384 do
385 {
386 /* get the next character */
387 ch = cpp_getc();
388
389 /* process the character */
390 switch (ch)
391 {
392 case ',':
393 token = COMMA;
394 break;
395
396 case ';':
397 token = SEMICOLON;
398 break;
399
400 case '/':
401 /* get the next character */
402 ch = cpp_getc();
403 switch (ch)
404 {
405 case '*': /* start of C comment */
406 ch = cpp_getc();
407 next = cpp_getc();
408 while (next != EOF && (ch != '*' || next != '/'))
409 {
410 ch = next;
411 next = cpp_getc();
412 }
413 break;
414
415 case '/': /* start of a C++ comment */
416 do
417 {
418 ch = cpp_getc();
419 } while (ch != '\n' && ch != EOF);
420 break;
421
422 default: /* some other slash */
423 cpp_ungetc(ch);
424 }
425 token = DELETED;
426 break;
427
428 case '(':
429 ch = cpp_getc();
430 if (ch == '*')
431 {
432 token = DELETED;
433 }
434 else
435 {
436 next = cpp_getc();
437 while (ch != '{' && ch != EOF && (ch != ')' || next != ';'))/*}*/
438 {
439 ch = next;
440 next = cpp_getc();
441 }
442 if (ch == '{')/*}*/
443 {
444 cpp_ungetc(ch);
445 }
446 else if (next == ';')
447 {
448 cpp_ungetc(next);
449 }
450 token = ARGS;
451 }
452 break;
453
454 case '{':/*}*/
455 /* don't send the next characters to "refs" */
456 cpp_refsok = FALSE;
457
458 /* skip ahead to closing '}', or to embedded '{' */
459 do
460 {
461 ch = cpp_getc();
462 } while (ch != '{' && ch != '}' && ch != EOF);
463
464 /* if has embedded '{', then skip to '}' in column 1 */
465 if (ch == '{') /*}*/
466 {
467 ch = cpp_getc();
468 next = cpp_getc();
469 while (ch != EOF && (ch != '\n' || next != '}'))/*{*/
470 {
471 ch = next;
472 next = cpp_getc();
473 }
474 }
475
476 /* resume "refs" processing */
477 cpp_refsok = TRUE;
478 cpp_echo('}');
479
480 token = BODY;
481 break;
482
483 case '[':
484 /* skip to matching ']' */
485 do
486 {
487 ch = cpp_getc();
488 } while (ch != ']' && ch != EOF);
489 token = DELETED;
490 break;
491
492 case '=':
493 /* skip to next ';' */
494 do
495 {
496 ch = cpp_getc();
497
498 /* leave array initializers out of "refs" */
499 if (ch == '{')
500 {
501 cpp_refsok = FALSE;
502 }
503 } while (ch != ';' && ch != EOF);
504
505 /* resume echoing to "refs" */
506 if (!cpp_refsok)
507 {
508 cpp_refsok = TRUE;
509 cpp_echo('}');
510 cpp_echo(';');
511 }
512 token = SEMICOLON;
513 break;
514
515 case EOF:
516 token = EOF;
517 break;
518
519 default:
520 /* is this the start of a name/keyword? */
521 if (isalpha(ch) || ch == '_')
522 {
523 /* collect the whole word */
524 lex_name[0] = ch;
525 for (i = 1, ch = cpp_getc();
526 i < BLKSIZE - 1 && (isalnum(ch) || ch == '_');
527 i++, ch = cpp_getc())
528 {
529 lex_name[i] = ch;
530 }
531 lex_name[i] = '\0';
532 cpp_ungetc(ch);
533
534 /* is it a reserved word? */
535 if (!strcmp(lex_name, "typedef"))
536 {
537 token = TYPEDEF;
538 lex_seek = -1L;
539 }
540 else if (!strcmp(lex_name, "static")
541 || !strcmp(lex_name, "private")
542 || !strcmp(lex_name, "PRIVATE"))
543 {
544 token = STATIC;
545 lex_seek = -1L;
546 }
547 else if (!strcmp(lex_name, "extern")
548 || !strcmp(lex_name, "EXTERN")
549 || !strcmp(lex_name, "FORWARD"))
550 {
551 token = EXTERN;
552 lex_seek = -1L;
553 }
554 else
555 {
556 token = NAME;
557 lex_seek = file_seek;
558 }
559 }
560 else /* not part of a name/keyword */
561 {
562 token = DELETED;
563 }
564
565 } /* end switch(ch) */
566
567 } while (token == DELETED);
568
569 return token;
570}
571
572/* -------------------------------------------------------------------------- */
573/* This is the parser. It locates tag candidates, and then decides whether to
574 * generate a tag for them.
575 */
576
577/* This function generates a tag for the object in lex_name, whose tag line is
578 * located at a given seek offset.
579 */
580void maketag(scope, seek)
581 int scope; /* 0 if global, or STATIC if static */
582 long seek; /* the seek offset of the line */
583{
584 /* output the tagname and filename fields */
585 if (scope == EXTERN)
586 {
587 /* whoa! we should *never* output a tag for "extern" decl */
588 return;
589 }
590 else if (scope == STATIC)
591 {
592 fprintf(tags, "%s:%s\t%s\t", file_name, lex_name, file_name);
593 }
594 else
595 {
596 fprintf(tags, "%s\t%s\t", lex_name, file_name);
597 }
598
599 /* output the target line */
600 putc('/', tags);
601 putc('^', tags);
602 file_copyline(seek, tags);
603 putc('$', tags);
604 putc('/', tags);
605 putc('\n', tags);
606}
607
608
609/* This function parses a source file, adding any tags that it finds */
610void ctags(name)
611 char *name; /* the name of a source file to be checked */
612{
613 int prev; /* the previous token from the source file */
614 int token; /* the current token from the source file */
615 int scope; /* normally 0, but could be a TYPEDEF or STATIC token */
616 int gotname;/* boolean: does lex_name contain a tag candidate? */
617 long tagseek;/* start of line that contains lex_name */
618
619 /* open the file */
620 cpp_open(name);
621
622 /* reset */
623 scope = 0;
624 gotname = FALSE;
625 token = SEMICOLON;
626
627 /* parse until the end of the file */
628 while (prev = token, (token = lex_gettoken()) != EOF)
629 {
630 /* scope keyword? */
631 if (token == TYPEDEF || token == STATIC || token == EXTERN)
632 {
633 scope = token;
634 gotname = FALSE;
635 continue;
636 }
637
638 /* name of a possible tag candidate? */
639 if (token == NAME)
640 {
641 tagseek = file_seek;
642 gotname = TRUE;
643 continue;
644 }
645
646 /* if NAME BODY, without ARGS, then NAME is a struct tag */
647 if (gotname && token == BODY && prev != ARGS)
648 {
649 gotname = FALSE;
650
651 /* ignore if in typedef -- better name is coming soon */
652 if (scope == TYPEDEF)
653 {
654 continue;
655 }
656
657 /* generate a tag, if -t and maybe -s */
658 if (incl_types && (file_header || incl_static))
659 {
660 maketag(file_header ? 0 : STATIC, tagseek);
661 }
662 }
663
664 /* If NAME ARGS BODY, then NAME is a function */
665 if (gotname && prev == ARGS && token == BODY)
666 {
667 gotname = FALSE;
668
669 /* generate a tag, maybe checking -s */
670 if (scope != STATIC || incl_static)
671 {
672 maketag(scope, tagseek);
673 }
674 }
675
676 /* If NAME SEMICOLON or NAME COMMA, then NAME is var/typedef */
677 if (gotname && (token == SEMICOLON || token == COMMA))
678 {
679 gotname = FALSE;
680
681 /* generate a tag, if -v/-t and maybe -s */
682 if (scope == TYPEDEF && incl_types && (file_header || incl_static)
683 || scope == STATIC && incl_vars && incl_static
684 || incl_vars)
685 {
686 /* a TYPEDEF outside of a header is STATIC */
687 if (scope == TYPEDEF && !file_header)
688 {
689 maketag(STATIC, tagseek);
690 }
691 else /* use whatever scope was declared */
692 {
693 maketag(scope, tagseek);
694 }
695 }
696 }
697
698 /* reset after a semicolon or ARGS BODY pair */
699 if (token == SEMICOLON || (prev == ARGS && token == BODY))
700 {
701 scope = 0;
702 gotname = FALSE;
703 }
704 }
705
706 /* The source file will be automatically closed */
707}
708
709/* -------------------------------------------------------------------------- */
710
711void usage()
712{
713 fprintf(stderr, "usage: ctags [flags] filenames...\n");
714 fprintf(stderr, "\t-s include static functions\n");
715 fprintf(stderr, "\t-t include typedefs\n");
716 fprintf(stderr, "\t-v include variable declarations\n");
717 fprintf(stderr, "\t-r generate a \"refs\" file, too\n");
718 fprintf(stderr, "\t-a append to \"tags\", instead of overwriting\n");
719 exit(2);
720}
721
722
723
724#if AMIGA
725# include "amiwild.c"
726#endif
727
728#if VMS
729# include "vmswild.c"
730#endif
731
732main(argc, argv)
733 int argc;
734 char **argv;
735{
736 int i, j;
737
738#if MSDOS || TOS
739 char **wildexpand();
740 argv = wildexpand(&argc, argv);
741#endif
742
743 /* build the tables used by the ctype macros */
744 _ct_init("");
745
746 /* parse the option flags */
747 for (i = 1; i < argc && argv[i][0] == '-'; i++)
748 {
749 for (j = 1; argv[i][j]; j++)
750 {
751 switch (argv[i][j])
752 {
753 case 's': incl_static = TRUE; break;
754 case 't': incl_types = TRUE; break;
755 case 'v': incl_vars = TRUE; break;
756 case 'r': make_refs = TRUE; break;
757 case 'a': append_files = TRUE; break;
758 default: usage();
759 }
760 }
761 }
762
763 /* There should always be at least one source file named in args */
764 if (i == argc)
765 {
766 usage();
767 }
768
769 /* open the "tags" and maybe "refs" files */
770 tags = fopen(TAGS, append_files ? "a" : "w");
771 if (!tags)
772 {
773 perror(TAGS);
774 exit(3);
775 }
776 if (make_refs)
777 {
778 refs = fopen(REFS, append_files ? "a" : "w");
779 if (!refs)
780 {
781 perror(REFS);
782 exit(4);
783 }
784 }
785
786 /* parse each source file */
787 for (; i < argc; i++)
788 {
789 ctags(argv[i]);
790 }
791
792 /* close "tags" and maybe "refs" */
793 fclose(tags);
794 if (make_refs)
795 {
796 fclose(refs);
797 }
798
799#ifdef SORT
800 /* This is a hack which will sort the tags list. It should
801 * on UNIX and OS-9. You may have trouble with csh. Note
802 * that the tags list only has to be sorted if you intend to
803 * use it with the real vi; elvis permits unsorted tags.
804 */
805# if OSK
806 system("qsort tags >-_tags; -nx; del tags; rename _tags tags");
807# else
808 system("sort tags >_tags$$; mv _tags$$ tags");
809# endif
810#endif
811
812 exit(0);
813 /*NOTREACHED*/
814}
815
816#if MSDOS || TOS
817# define WILDCARD_NO_MAIN
818# include "wildcard.c"
819#endif
Note: See TracBrowser for help on using the repository browser.