source: trunk/minix/commands/pax/pat_rep.c@ 15

Last change on this file since 15 was 9, checked in by Mattia Monga, 14 years ago

Minix 3.1.2a

File size: 28.5 KB
Line 
1/*-
2 * Copyright (c) 1992 Keith Muller.
3 * Copyright (c) 1992, 1993
4 * The Regents of the University of California. All rights reserved.
5 *
6 * This code is derived from software contributed to Berkeley by
7 * Keith Muller of the University of California, San Diego.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 4. Neither the name of the University nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 */
33
34#ifndef lint
35#if 0
36static char sccsid[] = "@(#)pat_rep.c 8.2 (Berkeley) 4/18/94";
37#endif
38#endif /* not lint */
39
40#include <sys/types.h>
41#include <sys/stat.h>
42#include <stdio.h>
43#include <string.h>
44#include <unistd.h>
45#include <stdlib.h>
46#include <errno.h>
47#ifdef NET2_REGEX
48#include <regexp.h>
49#else
50#include <regex.h>
51#endif
52#include "pax.h"
53#include "pat_rep.h"
54#include "extern.h"
55
56/*
57 * routines to handle pattern matching, name modification (regular expression
58 * substitution and interactive renames), and destination name modification for
59 * copy (-rw). Both file name and link names are adjusted as required in these
60 * routines.
61 */
62
63#define MAXSUBEXP 10 /* max subexpressions, DO NOT CHANGE */
64static PATTERN *pathead = NULL; /* file pattern match list head */
65static PATTERN *pattail = NULL; /* file pattern match list tail */
66static REPLACE *rephead = NULL; /* replacement string list head */
67static REPLACE *reptail = NULL; /* replacement string list tail */
68
69static int rep_name(char *, int *, int);
70static int tty_rename(ARCHD *);
71static int fix_path(char *, int *, char *, int);
72static int fn_match(char *, char *, char **);
73static char * range_match(char *, int);
74#ifdef NET2_REGEX
75static int resub(regexp *, char *, char *, char *);
76#else
77static int resub(regex_t *, regmatch_t *, char *, char *, char *);
78#endif
79
80/*
81 * rep_add()
82 * parses the -s replacement string; compiles the regular expression
83 * and stores the compiled value and it's replacement string together in
84 * replacement string list. Input to this function is of the form:
85 * /old/new/pg
86 * The first char in the string specifies the delimiter used by this
87 * replacement string. "Old" is a regular expression in "ed" format which
88 * is compiled by regcomp() and is applied to filenames. "new" is the
89 * substitution string; p and g are options flags for printing and global
90 * replacement (over the single filename)
91 * Return:
92 * 0 if a proper replacement string and regular expression was added to
93 * the list of replacement patterns; -1 otherwise.
94 */
95
96int
97rep_add(char *str)
98{
99 char *pt1;
100 char *pt2;
101 REPLACE *rep;
102# ifndef NET2_REGEX
103 int res;
104 char rebuf[BUFSIZ];
105# endif
106
107 /*
108 * throw out the bad parameters
109 */
110 if ((str == NULL) || (*str == '\0')) {
111 paxwarn(1, "Empty replacement string");
112 return(-1);
113 }
114
115 /*
116 * first character in the string specifies what the delimiter is for
117 * this expression
118 */
119 if ((pt1 = strchr(str+1, *str)) == NULL) {
120 paxwarn(1, "Invalid replacement string %s", str);
121 return(-1);
122 }
123
124 /*
125 * allocate space for the node that handles this replacement pattern
126 * and split out the regular expression and try to compile it
127 */
128 if ((rep = (REPLACE *)malloc(sizeof(REPLACE))) == NULL) {
129 paxwarn(1, "Unable to allocate memory for replacement string");
130 return(-1);
131 }
132
133 *pt1 = '\0';
134# ifdef NET2_REGEX
135 if ((rep->rcmp = regcomp(str+1)) == NULL) {
136# else
137 if ((res = regcomp(&(rep->rcmp), str+1, 0)) != 0) {
138 regerror(res, &(rep->rcmp), rebuf, sizeof(rebuf));
139 paxwarn(1, "%s while compiling regular expression %s", rebuf, str);
140# endif
141 (void)free((char *)rep);
142 return(-1);
143 }
144
145 /*
146 * put the delimiter back in case we need an error message and
147 * locate the delimiter at the end of the replacement string
148 * we then point the node at the new substitution string
149 */
150 *pt1++ = *str;
151 if ((pt2 = strchr(pt1, *str)) == NULL) {
152# ifdef NET2_REGEX
153 (void)free((char *)rep->rcmp);
154# else
155 regfree(&(rep->rcmp));
156# endif
157 (void)free((char *)rep);
158 paxwarn(1, "Invalid replacement string %s", str);
159 return(-1);
160 }
161
162 *pt2 = '\0';
163 rep->nstr = pt1;
164 pt1 = pt2++;
165 rep->flgs = 0;
166
167 /*
168 * set the options if any
169 */
170 while (*pt2 != '\0') {
171 switch(*pt2) {
172 case 'g':
173 case 'G':
174 rep->flgs |= GLOB;
175 break;
176 case 'p':
177 case 'P':
178 rep->flgs |= PRNT;
179 break;
180 default:
181# ifdef NET2_REGEX
182 (void)free((char *)rep->rcmp);
183# else
184 regfree(&(rep->rcmp));
185# endif
186 (void)free((char *)rep);
187 *pt1 = *str;
188 paxwarn(1, "Invalid replacement string option %s", str);
189 return(-1);
190 }
191 ++pt2;
192 }
193
194 /*
195 * all done, link it in at the end
196 */
197 rep->fow = NULL;
198 if (rephead == NULL) {
199 reptail = rephead = rep;
200 return(0);
201 }
202 reptail->fow = rep;
203 reptail = rep;
204 return(0);
205}
206
207/*
208 * pat_add()
209 * add a pattern match to the pattern match list. Pattern matches are used
210 * to select which archive members are extracted. (They appear as
211 * arguments to pax in the list and read modes). If no patterns are
212 * supplied to pax, all members in the archive will be selected (and the
213 * pattern match list is empty).
214 * Return:
215 * 0 if the pattern was added to the list, -1 otherwise
216 */
217
218int
219pat_add(char *str, char *chdnam)
220{
221 PATTERN *pt;
222
223 /*
224 * throw out the junk
225 */
226 if ((str == NULL) || (*str == '\0')) {
227 paxwarn(1, "Empty pattern string");
228 return(-1);
229 }
230
231 /*
232 * allocate space for the pattern and store the pattern. the pattern is
233 * part of argv so do not bother to copy it, just point at it. Add the
234 * node to the end of the pattern list
235 */
236 if ((pt = (PATTERN *)malloc(sizeof(PATTERN))) == NULL) {
237 paxwarn(1, "Unable to allocate memory for pattern string");
238 return(-1);
239 }
240
241 pt->pstr = str;
242 pt->pend = NULL;
243 pt->plen = strlen(str);
244 pt->fow = NULL;
245 pt->flgs = 0;
246 pt->chdname = chdnam;
247
248 if (pathead == NULL) {
249 pattail = pathead = pt;
250 return(0);
251 }
252 pattail->fow = pt;
253 pattail = pt;
254 return(0);
255}
256
257/*
258 * pat_chk()
259 * complain if any the user supplied pattern did not result in a match to
260 * a selected archive member.
261 */
262
263void
264pat_chk(void)
265{
266 PATTERN *pt;
267 int wban = 0;
268
269 /*
270 * walk down the list checking the flags to make sure MTCH was set,
271 * if not complain
272 */
273 for (pt = pathead; pt != NULL; pt = pt->fow) {
274 if (pt->flgs & MTCH)
275 continue;
276 if (!wban) {
277 paxwarn(1, "WARNING! These patterns were not matched:");
278 ++wban;
279 }
280 (void)fprintf(stderr, "%s\n", pt->pstr);
281 }
282}
283
284/*
285 * pat_sel()
286 * the archive member which matches a pattern was selected. Mark the
287 * pattern as having selected an archive member. arcn->pat points at the
288 * pattern that was matched. arcn->pat is set in pat_match()
289 *
290 * NOTE: When the -c option is used, we are called when there was no match
291 * by pat_match() (that means we did match before the inverted sense of
292 * the logic). Now this seems really strange at first, but with -c we
293 * need to keep track of those patterns that cause an archive member to NOT
294 * be selected (it found an archive member with a specified pattern)
295 * Return:
296 * 0 if the pattern pointed at by arcn->pat was tagged as creating a
297 * match, -1 otherwise.
298 */
299
300int
301pat_sel(ARCHD *arcn)
302{
303 PATTERN *pt;
304 PATTERN **ppt;
305 int len;
306
307 /*
308 * if no patterns just return
309 */
310 if ((pathead == NULL) || ((pt = arcn->pat) == NULL))
311 return(0);
312
313 /*
314 * when we are NOT limited to a single match per pattern mark the
315 * pattern and return
316 */
317 if (!nflag) {
318 pt->flgs |= MTCH;
319 return(0);
320 }
321
322 /*
323 * we reach this point only when we allow a single selected match per
324 * pattern, if the pattern matches a directory and we do not have -d
325 * (dflag) we are done with this pattern. We may also be handed a file
326 * in the subtree of a directory. in that case when we are operating
327 * with -d, this pattern was already selected and we are done
328 */
329 if (pt->flgs & DIR_MTCH)
330 return(0);
331
332 if (!dflag && ((pt->pend != NULL) || (arcn->type == PAX_DIR))) {
333 /*
334 * ok we matched a directory and we are allowing
335 * subtree matches but because of the -n only its children will
336 * match. This is tagged as a DIR_MTCH type.
337 * WATCH IT, the code assumes that pt->pend points
338 * into arcn->name and arcn->name has not been modified.
339 * If not we will have a big mess. Yup this is another kludge
340 */
341
342 /*
343 * if this was a prefix match, remove trailing part of path
344 * so we can copy it. Future matches will be exact prefix match
345 */
346 if (pt->pend != NULL)
347 *pt->pend = '\0';
348
349 if ((pt->pstr = strdup(arcn->name)) == NULL) {
350 paxwarn(1, "Pattern select out of memory");
351 if (pt->pend != NULL)
352 *pt->pend = '/';
353 pt->pend = NULL;
354 return(-1);
355 }
356
357 /*
358 * put the trailing / back in the source string
359 */
360 if (pt->pend != NULL) {
361 *pt->pend = '/';
362 pt->pend = NULL;
363 }
364 pt->plen = strlen(pt->pstr);
365
366 /*
367 * strip off any trailing /, this should really never happen
368 */
369 len = pt->plen - 1;
370 if (*(pt->pstr + len) == '/') {
371 *(pt->pstr + len) = '\0';
372 pt->plen = len;
373 }
374 pt->flgs = DIR_MTCH | MTCH;
375 arcn->pat = pt;
376 return(0);
377 }
378
379 /*
380 * we are then done with this pattern, so we delete it from the list
381 * because it can never be used for another match.
382 * Seems kind of strange to do for a -c, but the pax spec is really
383 * vague on the interaction of -c -n and -d. We assume that when -c
384 * and the pattern rejects a member (i.e. it matched it) it is done.
385 * In effect we place the order of the flags as having -c last.
386 */
387 pt = pathead;
388 ppt = &pathead;
389 while ((pt != NULL) && (pt != arcn->pat)) {
390 ppt = &(pt->fow);
391 pt = pt->fow;
392 }
393
394 if (pt == NULL) {
395 /*
396 * should never happen....
397 */
398 paxwarn(1, "Pattern list inconsistant");
399 return(-1);
400 }
401 *ppt = pt->fow;
402 (void)free((char *)pt);
403 arcn->pat = NULL;
404 return(0);
405}
406
407/*
408 * pat_match()
409 * see if this archive member matches any supplied pattern, if a match
410 * is found, arcn->pat is set to point at the potential pattern. Later if
411 * this archive member is "selected" we process and mark the pattern as
412 * one which matched a selected archive member (see pat_sel())
413 * Return:
414 * 0 if this archive member should be processed, 1 if it should be
415 * skipped and -1 if we are done with all patterns (and pax should quit
416 * looking for more members)
417 */
418
419int
420pat_match(ARCHD *arcn)
421{
422 PATTERN *pt;
423
424 arcn->pat = NULL;
425
426 /*
427 * if there are no more patterns and we have -n (and not -c) we are
428 * done. otherwise with no patterns to match, matches all
429 */
430 if (pathead == NULL) {
431 if (nflag && !cflag)
432 return(-1);
433 return(0);
434 }
435
436 /*
437 * have to search down the list one at a time looking for a match.
438 */
439 pt = pathead;
440 while (pt != NULL) {
441 /*
442 * check for a file name match unless we have DIR_MTCH set in
443 * this pattern then we want a prefix match
444 */
445 if (pt->flgs & DIR_MTCH) {
446 /*
447 * this pattern was matched before to a directory
448 * as we must have -n set for this (but not -d). We can
449 * only match CHILDREN of that directory so we must use
450 * an exact prefix match (no wildcards).
451 */
452 if ((arcn->name[pt->plen] == '/') &&
453 (strncmp(pt->pstr, arcn->name, pt->plen) == 0))
454 break;
455 } else if (fn_match(pt->pstr, arcn->name, &pt->pend) == 0)
456 break;
457 pt = pt->fow;
458 }
459
460 /*
461 * return the result, remember that cflag (-c) inverts the sense of a
462 * match
463 */
464 if (pt == NULL)
465 return(cflag ? 0 : 1);
466
467 /*
468 * We had a match, now when we invert the sense (-c) we reject this
469 * member. However we have to tag the pattern a being successful, (in a
470 * match, not in selecting an archive member) so we call pat_sel() here.
471 */
472 arcn->pat = pt;
473 if (!cflag)
474 return(0);
475
476 if (pat_sel(arcn) < 0)
477 return(-1);
478 arcn->pat = NULL;
479 return(1);
480}
481
482/*
483 * fn_match()
484 * Return:
485 * 0 if this archive member should be processed, 1 if it should be
486 * skipped and -1 if we are done with all patterns (and pax should quit
487 * looking for more members)
488 * Note: *pend may be changed to show where the prefix ends.
489 */
490
491static int
492fn_match(char *pattern, char *string, char **pend)
493{
494 char c;
495 char test;
496
497 *pend = NULL;
498 for (;;) {
499 switch (c = *pattern++) {
500 case '\0':
501 /*
502 * Ok we found an exact match
503 */
504 if (*string == '\0')
505 return(0);
506
507 /*
508 * Check if it is a prefix match
509 */
510 if ((dflag == 1) || (*string != '/'))
511 return(-1);
512
513 /*
514 * It is a prefix match, remember where the trailing
515 * / is located
516 */
517 *pend = string;
518 return(0);
519 case '?':
520 if ((test = *string++) == '\0')
521 return (-1);
522 break;
523 case '*':
524 c = *pattern;
525 /*
526 * Collapse multiple *'s.
527 */
528 while (c == '*')
529 c = *++pattern;
530
531 /*
532 * Optimized hack for pattern with a * at the end
533 */
534 if (c == '\0')
535 return (0);
536
537 /*
538 * General case, use recursion.
539 */
540 while ((test = *string) != '\0') {
541 if (!fn_match(pattern, string, pend))
542 return (0);
543 ++string;
544 }
545 return (-1);
546 case '[':
547 /*
548 * range match
549 */
550 if (((test = *string++) == '\0') ||
551 ((pattern = range_match(pattern, test)) == NULL))
552 return (-1);
553 break;
554 case '\\':
555 default:
556 if (c != *string++)
557 return (-1);
558 break;
559 }
560 }
561 /* NOTREACHED */
562}
563
564static char *
565range_match(char *pattern, int test)
566{
567 char c;
568 char c2;
569 int negate;
570 int ok = 0;
571
572 if ((negate = (*pattern == '!')) != 0)
573 ++pattern;
574
575 while ((c = *pattern++) != ']') {
576 /*
577 * Illegal pattern
578 */
579 if (c == '\0')
580 return (NULL);
581
582 if ((*pattern == '-') && ((c2 = pattern[1]) != '\0') &&
583 (c2 != ']')) {
584 if ((c <= test) && (test <= c2))
585 ok = 1;
586 pattern += 2;
587 } else if (c == test)
588 ok = 1;
589 }
590 return (ok == negate ? NULL : pattern);
591}
592
593/*
594 * mod_name()
595 * modify a selected file name. first attempt to apply replacement string
596 * expressions, then apply interactive file rename. We apply replacement
597 * string expressions to both filenames and file links (if we didn't the
598 * links would point to the wrong place, and we could never be able to
599 * move an archive that has a file link in it). When we rename files
600 * interactively, we store that mapping (old name to user input name) so
601 * if we spot any file links to the old file name in the future, we will
602 * know exactly how to fix the file link.
603 * Return:
604 * 0 continue to process file, 1 skip this file, -1 pax is finished
605 */
606
607int
608mod_name(ARCHD *arcn)
609{
610 int res = 0;
611
612 /*
613 * Strip off leading '/' if appropriate.
614 * Currently, this option is only set for the tar format.
615 */
616 if (rmleadslash && arcn->name[0] == '/') {
617 if (arcn->name[1] == '\0') {
618 arcn->name[0] = '.';
619 } else {
620 (void)memmove(arcn->name, &arcn->name[1],
621 strlen(arcn->name));
622 arcn->nlen--;
623 }
624 if (rmleadslash < 2) {
625 rmleadslash = 2;
626 paxwarn(0, "Removing leading / from absolute path names in the archive");
627 }
628 }
629 if (rmleadslash && arcn->ln_name[0] == '/' &&
630 (arcn->type == PAX_HLK || arcn->type == PAX_HRG)) {
631 if (arcn->ln_name[1] == '\0') {
632 arcn->ln_name[0] = '.';
633 } else {
634 (void)memmove(arcn->ln_name, &arcn->ln_name[1],
635 strlen(arcn->ln_name));
636 arcn->ln_nlen--;
637 }
638 if (rmleadslash < 2) {
639 rmleadslash = 2;
640 paxwarn(0, "Removing leading / from absolute path names in the archive");
641 }
642 }
643
644 /*
645 * IMPORTANT: We have a problem. what do we do with symlinks?
646 * Modifying a hard link name makes sense, as we know the file it
647 * points at should have been seen already in the archive (and if it
648 * wasn't seen because of a read error or a bad archive, we lose
649 * anyway). But there are no such requirements for symlinks. On one
650 * hand the symlink that refers to a file in the archive will have to
651 * be modified to so it will still work at its new location in the
652 * file system. On the other hand a symlink that points elsewhere (and
653 * should continue to do so) should not be modified. There is clearly
654 * no perfect solution here. So we handle them like hardlinks. Clearly
655 * a replacement made by the interactive rename mapping is very likely
656 * to be correct since it applies to a single file and is an exact
657 * match. The regular expression replacements are a little harder to
658 * justify though. We claim that the symlink name is only likely
659 * to be replaced when it points within the file tree being moved and
660 * in that case it should be modified. what we really need to do is to
661 * call an oracle here. :)
662 */
663 if (rephead != NULL) {
664 /*
665 * we have replacement strings, modify the name and the link
666 * name if any.
667 */
668 if ((res = rep_name(arcn->name, &(arcn->nlen), 1)) != 0)
669 return(res);
670
671 if (((arcn->type == PAX_SLK) || (arcn->type == PAX_HLK) ||
672 (arcn->type == PAX_HRG)) &&
673 ((res = rep_name(arcn->ln_name, &(arcn->ln_nlen), 0)) != 0))
674 return(res);
675 }
676
677 if (iflag) {
678 /*
679 * perform interactive file rename, then map the link if any
680 */
681 if ((res = tty_rename(arcn)) != 0)
682 return(res);
683 if ((arcn->type == PAX_SLK) || (arcn->type == PAX_HLK) ||
684 (arcn->type == PAX_HRG))
685 sub_name(arcn->ln_name, &(arcn->ln_nlen), sizeof(arcn->ln_name));
686 }
687 return(res);
688}
689
690/*
691 * tty_rename()
692 * Prompt the user for a replacement file name. A "." keeps the old name,
693 * a empty line skips the file, and an EOF on reading the tty, will cause
694 * pax to stop processing and exit. Otherwise the file name input, replaces
695 * the old one.
696 * Return:
697 * 0 process this file, 1 skip this file, -1 we need to exit pax
698 */
699
700static int
701tty_rename(ARCHD *arcn)
702{
703 char tmpname[PAXPATHLEN+2];
704 int res;
705
706 /*
707 * prompt user for the replacement name for a file, keep trying until
708 * we get some reasonable input. Archives may have more than one file
709 * on them with the same name (from updates etc). We print verbose info
710 * on the file so the user knows what is up.
711 */
712 tty_prnt("\nATTENTION: %s interactive file rename operation.\n", argv0);
713
714 for (;;) {
715 ls_tty(arcn);
716 tty_prnt("Input new name, or a \".\" to keep the old name, ");
717 tty_prnt("or a \"return\" to skip this file.\n");
718 tty_prnt("Input > ");
719 if (tty_read(tmpname, sizeof(tmpname)) < 0)
720 return(-1);
721 if (strcmp(tmpname, "..") == 0) {
722 tty_prnt("Try again, illegal file name: ..\n");
723 continue;
724 }
725 if (strlen(tmpname) > PAXPATHLEN) {
726 tty_prnt("Try again, file name too long\n");
727 continue;
728 }
729 break;
730 }
731
732 /*
733 * empty file name, skips this file. a "." leaves it alone
734 */
735 if (tmpname[0] == '\0') {
736 tty_prnt("Skipping file.\n");
737 return(1);
738 }
739 if ((tmpname[0] == '.') && (tmpname[1] == '\0')) {
740 tty_prnt("Processing continues, name unchanged.\n");
741 return(0);
742 }
743
744 /*
745 * ok the name changed. We may run into links that point at this
746 * file later. we have to remember where the user sent the file
747 * in order to repair any links.
748 */
749 tty_prnt("Processing continues, name changed to: %s\n", tmpname);
750 res = add_name(arcn->name, arcn->nlen, tmpname);
751 arcn->nlen = l_strncpy(arcn->name, tmpname, sizeof(arcn->name) - 1);
752 arcn->name[arcn->nlen] = '\0';
753 if (res < 0)
754 return(-1);
755 return(0);
756}
757
758/*
759 * set_dest()
760 * fix up the file name and the link name (if any) so this file will land
761 * in the destination directory (used during copy() -rw).
762 * Return:
763 * 0 if ok, -1 if failure (name too long)
764 */
765
766int
767set_dest(ARCHD *arcn, char *dest_dir, int dir_len)
768{
769 if (fix_path(arcn->name, &(arcn->nlen), dest_dir, dir_len) < 0)
770 return(-1);
771
772 /*
773 * It is really hard to deal with symlinks here, we cannot be sure
774 * if the name they point was moved (or will be moved). It is best to
775 * leave them alone.
776 */
777 if ((arcn->type != PAX_HLK) && (arcn->type != PAX_HRG))
778 return(0);
779
780 if (fix_path(arcn->ln_name, &(arcn->ln_nlen), dest_dir, dir_len) < 0)
781 return(-1);
782 return(0);
783}
784
785/*
786 * fix_path
787 * concatenate dir_name and or_name and store the result in or_name (if
788 * it fits). This is one ugly function.
789 * Return:
790 * 0 if ok, -1 if the final name is too long
791 */
792
793static int
794fix_path( char *or_name, int *or_len, char *dir_name, int dir_len)
795{
796 char *src;
797 char *dest;
798 char *start;
799 int len;
800
801 /*
802 * we shift the or_name to the right enough to tack in the dir_name
803 * at the front. We make sure we have enough space for it all before
804 * we start. since dest always ends in a slash, we skip of or_name
805 * if it also starts with one.
806 */
807 start = or_name;
808 src = start + *or_len;
809 dest = src + dir_len;
810 if (*start == '/') {
811 ++start;
812 --dest;
813 }
814 if ((len = dest - or_name) > PAXPATHLEN) {
815 paxwarn(1, "File name %s/%s, too long", dir_name, start);
816 return(-1);
817 }
818 *or_len = len;
819
820 /*
821 * enough space, shift
822 */
823 while (src >= start)
824 *dest-- = *src--;
825 src = dir_name + dir_len - 1;
826
827 /*
828 * splice in the destination directory name
829 */
830 while (src >= dir_name)
831 *dest-- = *src--;
832
833 *(or_name + len) = '\0';
834 return(0);
835}
836
837/*
838 * rep_name()
839 * walk down the list of replacement strings applying each one in order.
840 * when we find one with a successful substitution, we modify the name
841 * as specified. if required, we print the results. if the resulting name
842 * is empty, we will skip this archive member. We use the regexp(3)
843 * routines (regexp() ought to win a prize as having the most cryptic
844 * library function manual page).
845 * --Parameters--
846 * name is the file name we are going to apply the regular expressions to
847 * (and may be modified)
848 * nlen is the length of this name (and is modified to hold the length of
849 * the final string).
850 * prnt is a flag that says whether to print the final result.
851 * Return:
852 * 0 if substitution was successful, 1 if we are to skip the file (the name
853 * ended up empty)
854 */
855
856static int
857rep_name(char *name, int *nlen, int prnt)
858{
859 REPLACE *pt;
860 char *inpt;
861 char *outpt;
862 char *endpt;
863 char *rpt;
864 int found = 0;
865 int res;
866# ifndef NET2_REGEX
867 regmatch_t pm[MAXSUBEXP];
868# endif
869 char nname[PAXPATHLEN+1]; /* final result of all replacements */
870 char buf1[PAXPATHLEN+1]; /* where we work on the name */
871
872 /*
873 * copy the name into buf1, where we will work on it. We need to keep
874 * the orig string around so we can print out the result of the final
875 * replacement. We build up the final result in nname. inpt points at
876 * the string we apply the regular expression to. prnt is used to
877 * suppress printing when we handle replacements on the link field
878 * (the user already saw that substitution go by)
879 */
880 pt = rephead;
881 (void)strcpy(buf1, name);
882 inpt = buf1;
883 outpt = nname;
884 endpt = outpt + PAXPATHLEN;
885
886 /*
887 * try each replacement string in order
888 */
889 while (pt != NULL) {
890 do {
891 /*
892 * check for a successful substitution, if not go to
893 * the next pattern, or cleanup if we were global
894 */
895# ifdef NET2_REGEX
896 if (regexec(pt->rcmp, inpt) == 0)
897# else
898 if (regexec(&(pt->rcmp), inpt, MAXSUBEXP, pm, 0) != 0)
899# endif
900 break;
901
902 /*
903 * ok we found one. We have three parts, the prefix
904 * which did not match, the section that did and the
905 * tail (that also did not match). Copy the prefix to
906 * the final output buffer (watching to make sure we
907 * do not create a string too long).
908 */
909 found = 1;
910# ifdef NET2_REGEX
911 rpt = pt->rcmp->startp[0];
912# else
913 rpt = inpt + pm[0].rm_so;
914# endif
915
916 while ((inpt < rpt) && (outpt < endpt))
917 *outpt++ = *inpt++;
918 if (outpt == endpt)
919 break;
920
921 /*
922 * for the second part (which matched the regular
923 * expression) apply the substitution using the
924 * replacement string and place it the prefix in the
925 * final output. If we have problems, skip it.
926 */
927# ifdef NET2_REGEX
928 if ((res = resub(pt->rcmp,pt->nstr,outpt,endpt)) < 0) {
929# else
930 if ((res = resub(&(pt->rcmp),pm,pt->nstr,outpt,endpt))
931 < 0) {
932# endif
933 if (prnt)
934 paxwarn(1, "Replacement name error %s",
935 name);
936 return(1);
937 }
938 outpt += res;
939
940 /*
941 * we set up to look again starting at the first
942 * character in the tail (of the input string right
943 * after the last character matched by the regular
944 * expression (inpt always points at the first char in
945 * the string to process). If we are not doing a global
946 * substitution, we will use inpt to copy the tail to
947 * the final result. Make sure we do not overrun the
948 * output buffer
949 */
950# ifdef NET2_REGEX
951 inpt = pt->rcmp->endp[0];
952# else
953 inpt += pm[0].rm_eo - pm[0].rm_so;
954# endif
955
956 if ((outpt == endpt) || (*inpt == '\0'))
957 break;
958
959 /*
960 * if the user wants global we keep trying to
961 * substitute until it fails, then we are done.
962 */
963 } while (pt->flgs & GLOB);
964
965 if (found)
966 break;
967
968 /*
969 * a successful substitution did NOT occur, try the next one
970 */
971 pt = pt->fow;
972 }
973
974 if (found) {
975 /*
976 * we had a substitution, copy the last tail piece (if there is
977 * room) to the final result
978 */
979 while ((outpt < endpt) && (*inpt != '\0'))
980 *outpt++ = *inpt++;
981
982 *outpt = '\0';
983 if ((outpt == endpt) && (*inpt != '\0')) {
984 if (prnt)
985 paxwarn(1,"Replacement name too long %s >> %s",
986 name, nname);
987 return(1);
988 }
989
990 /*
991 * inform the user of the result if wanted
992 */
993 if (prnt && (pt->flgs & PRNT)) {
994 if (*nname == '\0')
995 (void)fprintf(stderr,"%s >> <empty string>\n",
996 name);
997 else
998 (void)fprintf(stderr,"%s >> %s\n", name, nname);
999 }
1000
1001 /*
1002 * if empty inform the caller this file is to be skipped
1003 * otherwise copy the new name over the orig name and return
1004 */
1005 if (*nname == '\0')
1006 return(1);
1007 *nlen = l_strncpy(name, nname, PAXPATHLEN + 1);
1008 name[PAXPATHLEN] = '\0';
1009 }
1010 return(0);
1011}
1012
1013#ifdef NET2_REGEX
1014/*
1015 * resub()
1016 * apply the replacement to the matched expression. expand out the old
1017 * style ed(1) subexpression expansion.
1018 * Return:
1019 * -1 if error, or the number of characters added to the destination.
1020 */
1021
1022static int
1023resub(regexp *prog, char *src, char *dest, char *destend)
1024{
1025 char *spt;
1026 char *dpt;
1027 char c;
1028 int no;
1029 int len;
1030
1031 spt = src;
1032 dpt = dest;
1033 while ((dpt < destend) && ((c = *spt++) != '\0')) {
1034 if (c == '&')
1035 no = 0;
1036 else if ((c == '\\') && (*spt >= '0') && (*spt <= '9'))
1037 no = *spt++ - '0';
1038 else {
1039 if ((c == '\\') && ((*spt == '\\') || (*spt == '&')))
1040 c = *spt++;
1041 *dpt++ = c;
1042 continue;
1043 }
1044 if ((prog->startp[no] == NULL) || (prog->endp[no] == NULL) ||
1045 ((len = prog->endp[no] - prog->startp[no]) <= 0))
1046 continue;
1047
1048 /*
1049 * copy the subexpression to the destination.
1050 * fail if we run out of space or the match string is damaged
1051 */
1052 if (len > (destend - dpt))
1053 len = destend - dpt;
1054 if (l_strncpy(dpt, prog->startp[no], len) != len)
1055 return(-1);
1056 dpt += len;
1057 }
1058 return(dpt - dest);
1059}
1060
1061#else
1062
1063/*
1064 * resub()
1065 * apply the replacement to the matched expression. expand out the old
1066 * style ed(1) subexpression expansion.
1067 * Return:
1068 * -1 if error, or the number of characters added to the destination.
1069 */
1070
1071static int
1072resub(regex_t *rp, regmatch_t *pm, char *src, char *dest,
1073 char *destend)
1074{
1075 char *spt;
1076 char *dpt;
1077 char c;
1078 regmatch_t *pmpt;
1079 int len;
1080 int subexcnt;
1081
1082 spt = src;
1083 dpt = dest;
1084 subexcnt = rp->re_nsub;
1085 while ((dpt < destend) && ((c = *spt++) != '\0')) {
1086 /*
1087 * see if we just have an ordinary replacement character
1088 * or we refer to a subexpression.
1089 */
1090 if (c == '&') {
1091 pmpt = pm;
1092 } else if ((c == '\\') && (*spt >= '0') && (*spt <= '9')) {
1093 /*
1094 * make sure there is a subexpression as specified
1095 */
1096 if ((len = *spt++ - '0') > subexcnt)
1097 return(-1);
1098 pmpt = pm + len;
1099 } else {
1100 /*
1101 * Ordinary character, just copy it
1102 */
1103 if ((c == '\\') && ((*spt == '\\') || (*spt == '&')))
1104 c = *spt++;
1105 *dpt++ = c;
1106 continue;
1107 }
1108
1109 /*
1110 * continue if the subexpression is bogus
1111 */
1112 if ((pmpt->rm_so < 0) || (pmpt->rm_eo < 0) ||
1113 ((len = pmpt->rm_eo - pmpt->rm_so) <= 0))
1114 continue;
1115
1116 /*
1117 * copy the subexpression to the destination.
1118 * fail if we run out of space or the match string is damaged
1119 */
1120 if (len > (destend - dpt))
1121 len = destend - dpt;
1122 if (l_strncpy(dpt, src + pmpt->rm_so, len) != len)
1123 return(-1);
1124 dpt += len;
1125 }
1126 return(dpt - dest);
1127}
1128#endif
Note: See TracBrowser for help on using the repository browser.