File: | src/bin/pax/pat_rep.c |
Warning: | line 541, column 12 Although the value stored to 'test' is used in the enclosing expression, the value is never actually read from 'test' |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | /* $OpenBSD: pat_rep.c,v 1.45 2023/06/26 07:10:17 op Exp $ */ |
2 | /* $NetBSD: pat_rep.c,v 1.4 1995/03/21 09:07:33 cgd Exp $ */ |
3 | |
4 | /*- |
5 | * Copyright (c) 1992 Keith Muller. |
6 | * Copyright (c) 1992, 1993 |
7 | * The Regents of the University of California. All rights reserved. |
8 | * |
9 | * This code is derived from software contributed to Berkeley by |
10 | * Keith Muller of the University of California, San Diego. |
11 | * |
12 | * Redistribution and use in source and binary forms, with or without |
13 | * modification, are permitted provided that the following conditions |
14 | * are met: |
15 | * 1. Redistributions of source code must retain the above copyright |
16 | * notice, this list of conditions and the following disclaimer. |
17 | * 2. Redistributions in binary form must reproduce the above copyright |
18 | * notice, this list of conditions and the following disclaimer in the |
19 | * documentation and/or other materials provided with the distribution. |
20 | * 3. Neither the name of the University nor the names of its contributors |
21 | * may be used to endorse or promote products derived from this software |
22 | * without specific prior written permission. |
23 | * |
24 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND |
25 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
26 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
27 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE |
28 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
29 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
30 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
31 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
32 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
33 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
34 | * SUCH DAMAGE. |
35 | */ |
36 | |
37 | #include <sys/types.h> |
38 | #include <sys/stat.h> |
39 | #include <regex.h> |
40 | #include <stdio.h> |
41 | #include <stdlib.h> |
42 | #include <string.h> |
43 | |
44 | #include "pax.h" |
45 | #include "extern.h" |
46 | |
47 | /* |
48 | * data structure for storing user supplied replacement strings (-s) |
49 | */ |
50 | typedef struct replace { |
51 | char *nstr; /* the new string we will substitute with */ |
52 | regex_t rcmp; /* compiled regular expression used to match */ |
53 | int flgs; /* print conversions? global in operation? */ |
54 | #define PRNT0x1 0x1 |
55 | #define GLOB0x2 0x2 |
56 | struct replace *fow; /* pointer to next pattern */ |
57 | } REPLACE; |
58 | |
59 | /* |
60 | * routines to handle pattern matching, name modification (regular expression |
61 | * substitution and interactive renames), and destination name modification for |
62 | * copy (-rw). Both file name and link names are adjusted as required in these |
63 | * routines. |
64 | */ |
65 | |
66 | #define MAXSUBEXP10 10 /* max subexpressions, DO NOT CHANGE */ |
67 | static PATTERN *pathead = NULL((void *)0); /* file pattern match list head */ |
68 | static PATTERN *pattail = NULL((void *)0); /* file pattern match list tail */ |
69 | static REPLACE *rephead = NULL((void *)0); /* replacement string list head */ |
70 | static REPLACE *reptail = NULL((void *)0); /* replacement string list tail */ |
71 | |
72 | static int rep_name(char *, size_t, int *, int); |
73 | static int tty_rename(ARCHD *); |
74 | static int fix_path(char *, int *, char *, int); |
75 | static int fn_match(char *, char *, char **); |
76 | static char * range_match(char *, int); |
77 | static int resub(regex_t *, regmatch_t *, char *, char *, char *, char *); |
78 | |
79 | /* |
80 | * rep_add() |
81 | * parses the -s replacement string; compiles the regular expression |
82 | * and stores the compiled value and its replacement string together in |
83 | * replacement string list. Input to this function is of the form: |
84 | * /old/new/pg |
85 | * The first char in the string specifies the delimiter used by this |
86 | * replacement string. "Old" is a regular expression in "ed" format which |
87 | * is compiled by regcomp() and is applied to filenames. "new" is the |
88 | * substitution string; p and g are options flags for printing and global |
89 | * replacement (over the single filename) |
90 | * Return: |
91 | * 0 if a proper replacement string and regular expression was added to |
92 | * the list of replacement patterns; -1 otherwise. |
93 | */ |
94 | |
95 | int |
96 | rep_add(char *str) |
97 | { |
98 | char *pt1; |
99 | char *pt2; |
100 | REPLACE *rep; |
101 | int res; |
102 | char rebuf[BUFSIZ1024]; |
103 | |
104 | /* |
105 | * throw out the bad parameters |
106 | */ |
107 | if ((str == NULL((void *)0)) || (*str == '\0')) { |
108 | paxwarn(1, "Empty replacement string"); |
109 | return(-1); |
110 | } |
111 | |
112 | /* |
113 | * first character in the string specifies what the delimiter is for |
114 | * this expression |
115 | */ |
116 | for (pt1 = str+1; *pt1; pt1++) { |
117 | if (*pt1 == '\\') { |
118 | pt1++; |
119 | continue; |
120 | } |
121 | if (*pt1 == *str) |
122 | break; |
123 | } |
124 | if (*pt1 == '\0') { |
125 | paxwarn(1, "Invalid replacement string %s", str); |
126 | return(-1); |
127 | } |
128 | |
129 | /* |
130 | * allocate space for the node that handles this replacement pattern |
131 | * and split out the regular expression and try to compile it |
132 | */ |
133 | if ((rep = malloc(sizeof(REPLACE))) == NULL((void *)0)) { |
134 | paxwarn(1, "Unable to allocate memory for replacement string"); |
135 | return(-1); |
136 | } |
137 | |
138 | *pt1 = '\0'; |
139 | if ((res = regcomp(&(rep->rcmp), str+1, 0)) != 0) { |
140 | regerror(res, &(rep->rcmp), rebuf, sizeof(rebuf)); |
141 | paxwarn(1, "%s while compiling regular expression %s", rebuf, str); |
142 | free(rep); |
143 | return(-1); |
144 | } |
145 | |
146 | /* |
147 | * put the delimiter back in case we need an error message and |
148 | * locate the delimiter at the end of the replacement string |
149 | * we then point the node at the new substitution string |
150 | */ |
151 | *pt1++ = *str; |
152 | for (pt2 = pt1; *pt2; pt2++) { |
153 | if (*pt2 == '\\') { |
154 | pt2++; |
155 | continue; |
156 | } |
157 | if (*pt2 == *str) |
158 | break; |
159 | } |
160 | if (*pt2 == '\0') { |
161 | regfree(&(rep->rcmp)); |
162 | free(rep); |
163 | paxwarn(1, "Invalid replacement string %s", str); |
164 | return(-1); |
165 | } |
166 | |
167 | *pt2 = '\0'; |
168 | rep->nstr = pt1; |
169 | pt1 = pt2++; |
170 | rep->flgs = 0; |
171 | |
172 | /* |
173 | * set the options if any |
174 | */ |
175 | while (*pt2 != '\0') { |
176 | switch (*pt2) { |
177 | case 'g': |
178 | case 'G': |
179 | rep->flgs |= GLOB0x2; |
180 | break; |
181 | case 'p': |
182 | case 'P': |
183 | rep->flgs |= PRNT0x1; |
184 | break; |
185 | default: |
186 | regfree(&(rep->rcmp)); |
187 | free(rep); |
188 | *pt1 = *str; |
189 | paxwarn(1, "Invalid replacement string option %s", str); |
190 | return(-1); |
191 | } |
192 | ++pt2; |
193 | } |
194 | |
195 | /* |
196 | * all done, link it in at the end |
197 | */ |
198 | rep->fow = NULL((void *)0); |
199 | if (rephead == NULL((void *)0)) { |
200 | reptail = rephead = rep; |
201 | return(0); |
202 | } |
203 | reptail->fow = rep; |
204 | reptail = rep; |
205 | return(0); |
206 | } |
207 | |
208 | /* |
209 | * pat_add() |
210 | * add a pattern match to the pattern match list. Pattern matches are used |
211 | * to select which archive members are extracted. (They appear as |
212 | * arguments to pax in the list and read modes). If no patterns are |
213 | * supplied to pax, all members in the archive will be selected (and the |
214 | * pattern match list is empty). |
215 | * Return: |
216 | * 0 if the pattern was added to the list, -1 otherwise |
217 | */ |
218 | |
219 | int |
220 | pat_add(char *str, char *chdirname) |
221 | { |
222 | PATTERN *pt; |
223 | |
224 | /* |
225 | * throw out the junk |
226 | */ |
227 | if ((str == NULL((void *)0)) || (*str == '\0')) { |
228 | paxwarn(1, "Empty pattern string"); |
229 | return(-1); |
230 | } |
231 | |
232 | /* |
233 | * allocate space for the pattern and store the pattern. the pattern is |
234 | * part of argv so do not bother to copy it, just point at it. Add the |
235 | * node to the end of the pattern list |
236 | */ |
237 | if ((pt = malloc(sizeof(PATTERN))) == NULL((void *)0)) { |
238 | paxwarn(1, "Unable to allocate memory for pattern string"); |
239 | return(-1); |
240 | } |
241 | |
242 | pt->pstr = str; |
243 | pt->pend = NULL((void *)0); |
244 | pt->plen = strlen(str); |
245 | pt->fow = NULL((void *)0); |
246 | pt->flgs = 0; |
247 | pt->chdname = chdirname; |
248 | |
249 | if (pathead == NULL((void *)0)) { |
250 | pattail = pathead = pt; |
251 | return(0); |
252 | } |
253 | pattail->fow = pt; |
254 | pattail = pt; |
255 | return(0); |
256 | } |
257 | |
258 | /* |
259 | * pat_chk() |
260 | * complain if any of the user supplied patterns did not result in |
261 | * a match to a selected archive member. |
262 | */ |
263 | |
264 | void |
265 | pat_chk(void) |
266 | { |
267 | PATTERN *pt; |
268 | int wban = 0; |
269 | |
270 | /* |
271 | * walk down the list checking the flags to make sure MTCH was set, |
272 | * if not complain |
273 | */ |
274 | for (pt = pathead; pt != NULL((void *)0); pt = pt->fow) { |
275 | if (pt->flgs & MTCH0x1) |
276 | continue; |
277 | if (!wban) { |
278 | paxwarn(1, "WARNING! These patterns were not matched:"); |
279 | ++wban; |
280 | } |
281 | (void)fprintf(stderr(&__sF[2]), "%s\n", pt->pstr); |
282 | } |
283 | } |
284 | |
285 | /* |
286 | * pat_sel() |
287 | * the archive member which matches a pattern was selected. Mark the |
288 | * pattern as having selected an archive member. arcn->pat points at the |
289 | * pattern that was matched. arcn->pat is set in pat_match() |
290 | * |
291 | * NOTE: When the -c option is used, we are called when there was no match |
292 | * by pat_match() (that means we did match before the inverted sense of |
293 | * the logic). Now this seems really strange at first, but with -c we |
294 | * need to keep track of those patterns that cause an archive member to NOT |
295 | * be selected (it found an archive member with a specified pattern) |
296 | * Return: |
297 | * 0 if the pattern pointed at by arcn->pat was tagged as creating a |
298 | * match, -1 otherwise. |
299 | */ |
300 | |
301 | int |
302 | pat_sel(ARCHD *arcn) |
303 | { |
304 | PATTERN *pt; |
305 | PATTERN **ppt; |
306 | size_t len; |
307 | |
308 | /* |
309 | * if no patterns just return |
310 | */ |
311 | if ((pathead == NULL((void *)0)) || ((pt = arcn->pat) == NULL((void *)0))) |
312 | return(0); |
313 | |
314 | /* |
315 | * when we are NOT limited to a single match per pattern mark the |
316 | * pattern and return |
317 | */ |
318 | if (!nflag) { |
319 | pt->flgs |= MTCH0x1; |
320 | return(0); |
321 | } |
322 | |
323 | /* |
324 | * we reach this point only when we allow a single selected match per |
325 | * pattern, if the pattern matches a directory and we do not have -d |
326 | * (dflag) we are done with this pattern. We may also be handed a file |
327 | * in the subtree of a directory. in that case when we are operating |
328 | * with -d, this pattern was already selected and we are done |
329 | */ |
330 | if (pt->flgs & DIR_MTCH0x2) |
331 | return(0); |
332 | |
333 | if (!dflag && ((pt->pend != NULL((void *)0)) || (arcn->type == PAX_DIR1))) { |
334 | /* |
335 | * ok we matched a directory and we are allowing |
336 | * subtree matches but because of the -n only its children will |
337 | * match. This is tagged as a DIR_MTCH type. |
338 | * WATCH IT, the code assumes that pt->pend points |
339 | * into arcn->name and arcn->name has not been modified. |
340 | * If not we will have a big mess. Yup this is another kludge |
341 | */ |
342 | |
343 | /* |
344 | * if this was a prefix match, remove trailing part of path |
345 | * so we can copy it. Future matches will be exact prefix match |
346 | */ |
347 | if (pt->pend != NULL((void *)0)) |
348 | *pt->pend = '\0'; |
349 | |
350 | if ((pt->pstr = strdup(arcn->name)) == NULL((void *)0)) { |
351 | paxwarn(1, "Pattern select out of memory"); |
352 | if (pt->pend != NULL((void *)0)) |
353 | *pt->pend = '/'; |
354 | pt->pend = NULL((void *)0); |
355 | return(-1); |
356 | } |
357 | |
358 | /* |
359 | * put the trailing / back in the source string |
360 | */ |
361 | if (pt->pend != NULL((void *)0)) { |
362 | *pt->pend = '/'; |
363 | pt->pend = NULL((void *)0); |
364 | } |
365 | pt->plen = strlen(pt->pstr); |
366 | |
367 | /* |
368 | * strip off any trailing /, this should really never happen |
369 | */ |
370 | len = pt->plen - 1; |
371 | if (*(pt->pstr + len) == '/') { |
372 | *(pt->pstr + len) = '\0'; |
373 | pt->plen = len; |
374 | } |
375 | pt->flgs = DIR_MTCH0x2 | MTCH0x1; |
376 | arcn->pat = pt; |
377 | return(0); |
378 | } |
379 | |
380 | /* |
381 | * we are then done with this pattern, so we delete it from the list |
382 | * because it can never be used for another match. |
383 | * Seems kind of strange to do for a -c, but the pax spec is really |
384 | * vague on the interaction of -c, -n and -d. We assume that when -c |
385 | * and the pattern rejects a member (i.e. it matched it) it is done. |
386 | * In effect we place the order of the flags as having -c last. |
387 | */ |
388 | pt = pathead; |
389 | ppt = &pathead; |
390 | while ((pt != NULL((void *)0)) && (pt != arcn->pat)) { |
391 | ppt = &(pt->fow); |
392 | pt = pt->fow; |
393 | } |
394 | |
395 | if (pt == NULL((void *)0)) { |
396 | /* |
397 | * should never happen.... |
398 | */ |
399 | paxwarn(1, "Pattern list inconsistent"); |
400 | return(-1); |
401 | } |
402 | *ppt = pt->fow; |
403 | free(pt); |
404 | arcn->pat = NULL((void *)0); |
405 | return(0); |
406 | } |
407 | |
408 | /* |
409 | * pat_match() |
410 | * see if this archive member matches any supplied pattern, if a match |
411 | * is found, arcn->pat is set to point at the potential pattern. Later if |
412 | * this archive member is "selected" we process and mark the pattern as |
413 | * one which matched a selected archive member (see pat_sel()) |
414 | * Return: |
415 | * 0 if this archive member should be processed, 1 if it should be |
416 | * skipped and -1 if we are done with all patterns (and pax should quit |
417 | * looking for more members) |
418 | */ |
419 | |
420 | int |
421 | pat_match(ARCHD *arcn) |
422 | { |
423 | PATTERN *pt; |
424 | |
425 | arcn->pat = NULL((void *)0); |
426 | |
427 | /* |
428 | * if there are no more patterns and we have -n (and not -c) we are |
429 | * done. otherwise with no patterns to match, matches all |
430 | */ |
431 | if (pathead == NULL((void *)0)) { |
432 | if (nflag && !cflag) |
433 | return(-1); |
434 | return(0); |
435 | } |
436 | |
437 | /* |
438 | * have to search down the list one at a time looking for a match. |
439 | */ |
440 | pt = pathead; |
441 | while (pt != NULL((void *)0)) { |
442 | /* |
443 | * check for a file name match unless we have DIR_MTCH set in |
444 | * this pattern then we want a prefix match |
445 | */ |
446 | if (pt->flgs & DIR_MTCH0x2) { |
447 | /* |
448 | * this pattern was matched before to a directory |
449 | * as we must have -n set for this (but not -d). We can |
450 | * only match CHILDREN of that directory so we must use |
451 | * an exact prefix match (no wildcards). |
452 | */ |
453 | if ((arcn->name[pt->plen] == '/') && |
454 | (strncmp(pt->pstr, arcn->name, pt->plen) == 0)) |
455 | break; |
456 | } else if (fn_match(pt->pstr, arcn->name, &pt->pend) == 0) |
457 | break; |
458 | pt = pt->fow; |
459 | } |
460 | |
461 | /* |
462 | * return the result, remember that cflag (-c) inverts the sense of a |
463 | * match |
464 | */ |
465 | if (pt == NULL((void *)0)) |
466 | return(cflag ? 0 : 1); |
467 | |
468 | /* |
469 | * we had a match, now when we invert the sense (-c) we reject this |
470 | * member. However we have to tag the pattern a being successful, (in a |
471 | * match, not in selecting a archive member) so we call pat_sel() here. |
472 | */ |
473 | arcn->pat = pt; |
474 | if (!cflag) |
475 | return(0); |
476 | |
477 | if (pat_sel(arcn) < 0) |
478 | return(-1); |
479 | arcn->pat = NULL((void *)0); |
480 | return(1); |
481 | } |
482 | |
483 | /* |
484 | * fn_match() |
485 | * Return: |
486 | * 0 if this archive member should be processed, 1 if it should be |
487 | * skipped and -1 if we are done with all patterns (and pax should quit |
488 | * looking for more members) |
489 | * Note: *pend may be changed to show where the prefix ends. |
490 | */ |
491 | |
492 | static int |
493 | fn_match(char *pattern, char *string, char **pend) |
494 | { |
495 | char c; |
496 | char test; |
497 | |
498 | *pend = NULL((void *)0); |
499 | for (;;) { |
500 | switch (c = *pattern++) { |
501 | case '\0': |
502 | /* |
503 | * Ok we found an exact match |
504 | */ |
505 | if (*string == '\0') |
506 | return(0); |
507 | |
508 | /* |
509 | * Check if it is a prefix match |
510 | */ |
511 | if ((dflag == 1) || (*string != '/')) |
512 | return(-1); |
513 | |
514 | /* |
515 | * It is a prefix match, remember where the trailing |
516 | * / is located |
517 | */ |
518 | *pend = string; |
519 | return(0); |
520 | case '?': |
521 | if ((test = *string++) == '\0') |
522 | return (-1); |
523 | break; |
524 | case '*': |
525 | c = *pattern; |
526 | /* |
527 | * Collapse multiple *'s. |
528 | */ |
529 | while (c == '*') |
530 | c = *++pattern; |
531 | |
532 | /* |
533 | * Optimized hack for pattern with a * at the end |
534 | */ |
535 | if (c == '\0') |
536 | return (0); |
537 | |
538 | /* |
539 | * General case, use recursion. |
540 | */ |
541 | while ((test = *string) != '\0') { |
Although the value stored to 'test' is used in the enclosing expression, the value is never actually read from 'test' | |
542 | if (!fn_match(pattern, string, pend)) |
543 | return (0); |
544 | ++string; |
545 | } |
546 | return (-1); |
547 | case '[': |
548 | /* |
549 | * range match |
550 | */ |
551 | if (((test = *string++) == '\0') || |
552 | ((pattern = range_match(pattern, test)) == NULL((void *)0))) |
553 | return (-1); |
554 | break; |
555 | case '\\': |
556 | if ((c = *pattern++) == '\0') |
557 | return (-1); |
558 | /* FALLTHROUGH */ |
559 | default: |
560 | if (c != *string++) |
561 | return (-1); |
562 | break; |
563 | } |
564 | } |
565 | /* NOTREACHED */ |
566 | } |
567 | |
568 | static char * |
569 | range_match(char *pattern, int test) |
570 | { |
571 | char c; |
572 | char c2; |
573 | int negate; |
574 | int ok = 0; |
575 | |
576 | if ((negate = (*pattern == '!')) != 0) |
577 | ++pattern; |
578 | |
579 | while ((c = *pattern++) != ']') { |
580 | /* |
581 | * Illegal pattern |
582 | */ |
583 | if (c == '\0') |
584 | return (NULL((void *)0)); |
585 | |
586 | if ((*pattern == '-') && ((c2 = pattern[1]) != '\0') && |
587 | (c2 != ']')) { |
588 | if ((c <= test) && (test <= c2)) |
589 | ok = 1; |
590 | pattern += 2; |
591 | } else if (c == test) |
592 | ok = 1; |
593 | } |
594 | return (ok == negate ? NULL((void *)0) : pattern); |
595 | } |
596 | |
597 | /* |
598 | * has_dotdot() |
599 | * Returns true iff the supplied path contains a ".." component. |
600 | */ |
601 | |
602 | int |
603 | has_dotdot(const char *path) |
604 | { |
605 | const char *p = path; |
606 | |
607 | while ((p = strstr(p, "..")) != NULL((void *)0)) { |
608 | if ((p == path || p[-1] == '/') && |
609 | (p[2] == '/' || p[2] == '\0')) |
610 | return (1); |
611 | p += 2; |
612 | } |
613 | return (0); |
614 | } |
615 | |
616 | /* |
617 | * mod_name() |
618 | * modify a selected file name. first attempt to apply replacement string |
619 | * expressions, then apply interactive file rename. We apply replacement |
620 | * string expressions to both filenames and file links (if we didn't the |
621 | * links would point to the wrong place, and we could never be able to |
622 | * move an archive that has a file link in it). When we rename files |
623 | * interactively, we store that mapping (old name to user input name) so |
624 | * if we spot any file links to the old file name in the future, we will |
625 | * know exactly how to fix the file link. |
626 | * Return: |
627 | * 0 continue to process file, 1 skip this file, -1 pax is finished |
628 | */ |
629 | |
630 | int |
631 | mod_name(ARCHD *arcn) |
632 | { |
633 | int res = 0; |
634 | |
635 | /* |
636 | * Strip off leading '/' if appropriate. |
637 | * Currently, this option is only set for the tar format. |
638 | */ |
639 | while (rmleadslash && arcn->name[0] == '/') { |
640 | if (arcn->name[1] == '\0') { |
641 | arcn->name[0] = '.'; |
642 | } else { |
643 | (void)memmove(arcn->name, &arcn->name[1], |
644 | strlen(arcn->name)); |
645 | arcn->nlen--; |
646 | } |
647 | if (rmleadslash < 2) { |
648 | rmleadslash = 2; |
649 | paxwarn(0, "Removing leading / from absolute path names in the archive"); |
650 | } |
651 | } |
652 | while (rmleadslash && arcn->ln_name[0] == '/' && |
653 | PAX_IS_HARDLINK(arcn->type)((arcn->type) == 8 || (arcn->type) == 9)) { |
654 | if (arcn->ln_name[1] == '\0') { |
655 | arcn->ln_name[0] = '.'; |
656 | } else { |
657 | (void)memmove(arcn->ln_name, &arcn->ln_name[1], |
658 | strlen(arcn->ln_name)); |
659 | arcn->ln_nlen--; |
660 | } |
661 | if (rmleadslash < 2) { |
662 | rmleadslash = 2; |
663 | paxwarn(0, "Removing leading / from absolute path names in the archive"); |
664 | } |
665 | } |
666 | if (rmleadslash) { |
667 | const char *last = NULL((void *)0); |
668 | const char *p = arcn->name; |
669 | |
670 | while ((p = strstr(p, "..")) != NULL((void *)0)) { |
671 | if ((p == arcn->name || p[-1] == '/') && |
672 | (p[2] == '/' || p[2] == '\0')) |
673 | last = p + 2; |
674 | p += 2; |
675 | } |
676 | if (last != NULL((void *)0)) { |
677 | last++; |
678 | paxwarn(1, "Removing leading \"%.*s\"", |
679 | (int)(last - arcn->name), arcn->name); |
680 | arcn->nlen = strlen(last); |
681 | if (arcn->nlen > 0) |
682 | memmove(arcn->name, last, arcn->nlen + 1); |
683 | else { |
684 | arcn->name[0] = '.'; |
685 | arcn->name[1] = '\0'; |
686 | arcn->nlen = 1; |
687 | } |
688 | } |
689 | } |
690 | |
691 | /* |
692 | * IMPORTANT: We have a problem. what do we do with symlinks? |
693 | * Modifying a hard link name makes sense, as we know the file it |
694 | * points at should have been seen already in the archive (and if it |
695 | * wasn't seen because of a read error or a bad archive, we lose |
696 | * anyway). But there are no such requirements for symlinks. On one |
697 | * hand the symlink that refers to a file in the archive will have to |
698 | * be modified to so it will still work at its new location in the |
699 | * file system. On the other hand a symlink that points elsewhere (and |
700 | * should continue to do so) should not be modified. There is clearly |
701 | * no perfect solution here. So we handle them like hardlinks. Clearly |
702 | * a replacement made by the interactive rename mapping is very likely |
703 | * to be correct since it applies to a single file and is an exact |
704 | * match. The regular expression replacements are a little harder to |
705 | * justify though. We claim that the symlink name is only likely |
706 | * to be replaced when it points within the file tree being moved and |
707 | * in that case it should be modified. what we really need to do is to |
708 | * call an oracle here. :) |
709 | */ |
710 | if (rephead != NULL((void *)0)) { |
711 | /* |
712 | * we have replacement strings, modify the name and the link |
713 | * name if any. |
714 | */ |
715 | if ((res = rep_name(arcn->name, sizeof(arcn->name), &(arcn->nlen), 1)) != 0) |
716 | return(res); |
717 | |
718 | if (PAX_IS_LINK(arcn->type)((arcn->type) == 5 || ((arcn->type) == 8 || (arcn->type ) == 9))) { |
719 | if ((res = rep_name(arcn->ln_name, |
720 | sizeof(arcn->ln_name), &(arcn->ln_nlen), 0)) != 0) |
721 | return(res); |
722 | } |
723 | } |
724 | |
725 | if (iflag) { |
726 | /* |
727 | * perform interactive file rename, then map the link if any |
728 | */ |
729 | if ((res = tty_rename(arcn)) != 0) |
730 | return(res); |
731 | if (PAX_IS_LINK(arcn->type)((arcn->type) == 5 || ((arcn->type) == 8 || (arcn->type ) == 9))) |
732 | sub_name(arcn->ln_name, &(arcn->ln_nlen), |
733 | sizeof(arcn->ln_name)); |
734 | } |
735 | return(res); |
736 | } |
737 | |
738 | /* |
739 | * tty_rename() |
740 | * Prompt the user for a replacement file name. A "." keeps the old name, |
741 | * a empty line skips the file, and an EOF on reading the tty, will cause |
742 | * pax to stop processing and exit. Otherwise the file name input, replaces |
743 | * the old one. |
744 | * Return: |
745 | * 0 process this file, 1 skip this file, -1 we need to exit pax |
746 | */ |
747 | |
748 | static int |
749 | tty_rename(ARCHD *arcn) |
750 | { |
751 | char tmpname[PAXPATHLEN3072+2]; |
752 | int res; |
753 | |
754 | /* |
755 | * prompt user for the replacement name for a file, keep trying until |
756 | * we get some reasonable input. Archives may have more than one file |
757 | * on them with the same name (from updates etc). We print verbose info |
758 | * on the file so the user knows what is up. |
759 | */ |
760 | tty_prnt("\nATTENTION: %s interactive file rename operation.\n", argv0); |
761 | |
762 | for (;;) { |
763 | ls_tty(arcn); |
764 | tty_prnt("Input new name, or a \".\" to keep the old name, "); |
765 | tty_prnt("or a \"return\" to skip this file.\n"); |
766 | tty_prnt("Input > "); |
767 | if (tty_read(tmpname, sizeof(tmpname)) < 0) |
768 | return(-1); |
769 | if (strcmp(tmpname, "..") == 0) { |
770 | tty_prnt("Try again, illegal file name: ..\n"); |
771 | continue; |
772 | } |
773 | if (strlen(tmpname) > PAXPATHLEN3072) { |
774 | tty_prnt("Try again, file name too long\n"); |
775 | continue; |
776 | } |
777 | break; |
778 | } |
779 | |
780 | /* |
781 | * empty file name, skips this file. a "." leaves it alone |
782 | */ |
783 | if (tmpname[0] == '\0') { |
784 | tty_prnt("Skipping file.\n"); |
785 | return(1); |
786 | } |
787 | if ((tmpname[0] == '.') && (tmpname[1] == '\0')) { |
788 | tty_prnt("Processing continues, name unchanged.\n"); |
789 | return(0); |
790 | } |
791 | |
792 | /* |
793 | * ok the name changed. We may run into links that point at this |
794 | * file later. we have to remember where the user sent the file |
795 | * in order to repair any links. |
796 | */ |
797 | tty_prnt("Processing continues, name changed to: %s\n", tmpname); |
798 | res = add_name(arcn->name, arcn->nlen, tmpname); |
799 | arcn->nlen = strlcpy(arcn->name, tmpname, sizeof(arcn->name)); |
800 | if ((size_t)arcn->nlen >= sizeof(arcn->name)) |
801 | arcn->nlen = sizeof(arcn->name) - 1; /* XXX truncate? */ |
802 | if (res < 0) |
803 | return(-1); |
804 | return(0); |
805 | } |
806 | |
807 | /* |
808 | * set_dest() |
809 | * fix up the file name and the link name (if any) so this file will land |
810 | * in the destination directory (used during copy() -rw). |
811 | * Return: |
812 | * 0 if ok, -1 if failure (name too long) |
813 | */ |
814 | |
815 | int |
816 | set_dest(ARCHD *arcn, char *dest_dir, int dir_len) |
817 | { |
818 | if (fix_path(arcn->name, &(arcn->nlen), dest_dir, dir_len) < 0) |
819 | return(-1); |
820 | |
821 | /* |
822 | * It is really hard to deal with symlinks here, we cannot be sure |
823 | * if the name they point was moved (or will be moved). It is best to |
824 | * leave them alone. |
825 | */ |
826 | if (!PAX_IS_HARDLINK(arcn->type)((arcn->type) == 8 || (arcn->type) == 9)) |
827 | return(0); |
828 | |
829 | if (fix_path(arcn->ln_name, &(arcn->ln_nlen), dest_dir, dir_len) < 0) |
830 | return(-1); |
831 | return(0); |
832 | } |
833 | |
834 | /* |
835 | * fix_path |
836 | * concatenate dir_name and or_name and store the result in or_name (if |
837 | * it fits). This is one ugly function. |
838 | * Return: |
839 | * 0 if ok, -1 if the final name is too long |
840 | */ |
841 | |
842 | static int |
843 | fix_path(char *or_name, int *or_len, char *dir_name, int dir_len) |
844 | { |
845 | char *src; |
846 | char *dest; |
847 | char *start; |
848 | int len; |
849 | |
850 | /* |
851 | * we shift the or_name to the right enough to tack in the dir_name |
852 | * at the front. We make sure we have enough space for it all before |
853 | * we start. since dest always ends in a slash, we skip of or_name |
854 | * if it also starts with one. |
855 | */ |
856 | start = or_name; |
857 | src = start + *or_len; |
858 | dest = src + dir_len; |
859 | if (*start == '/') { |
860 | ++start; |
861 | --dest; |
862 | } |
863 | if ((len = dest - or_name) > PAXPATHLEN3072) { |
864 | paxwarn(1, "File name %s/%s, too long", dir_name, start); |
865 | return(-1); |
866 | } |
867 | *or_len = len; |
868 | |
869 | /* |
870 | * enough space, shift |
871 | */ |
872 | while (src >= start) |
873 | *dest-- = *src--; |
874 | src = dir_name + dir_len - 1; |
875 | |
876 | /* |
877 | * splice in the destination directory name |
878 | */ |
879 | while (src >= dir_name) |
880 | *dest-- = *src--; |
881 | |
882 | *(or_name + len) = '\0'; |
883 | return(0); |
884 | } |
885 | |
886 | /* |
887 | * rep_name() |
888 | * walk down the list of replacement strings applying each one in order. |
889 | * when we find one with a successful substitution, we modify the name |
890 | * as specified. if required, we print the results. if the resulting name |
891 | * is empty, we will skip this archive member. We use the regexp(3) |
892 | * routines (regexp() ought to win a prize as having the most cryptic |
893 | * library function manual page). |
894 | * --Parameters-- |
895 | * name is the file name we are going to apply the regular expressions to |
896 | * (and may be modified) |
897 | * nsize is the size of the name buffer. |
898 | * nlen is the length of this name (and is modified to hold the length of |
899 | * the final string). |
900 | * prnt is a flag that says whether to print the final result. |
901 | * Return: |
902 | * 0 if substitution was successful, 1 if we are to skip the file (the name |
903 | * ended up empty) |
904 | */ |
905 | |
906 | static int |
907 | rep_name(char *name, size_t nsize, int *nlen, int prnt) |
908 | { |
909 | REPLACE *pt; |
910 | char *inpt; |
911 | char *outpt; |
912 | char *endpt; |
913 | char *rpt; |
914 | int found = 0; |
915 | int res; |
916 | regmatch_t pm[MAXSUBEXP10]; |
917 | char nname[PAXPATHLEN3072+1]; /* final result of all replacements */ |
918 | char buf1[PAXPATHLEN3072+1]; /* where we work on the name */ |
919 | |
920 | /* |
921 | * copy the name into buf1, where we will work on it. We need to keep |
922 | * the orig string around so we can print out the result of the final |
923 | * replacement. We build up the final result in nname. inpt points at |
924 | * the string we apply the regular expression to. prnt is used to |
925 | * suppress printing when we handle replacements on the link field |
926 | * (the user already saw that substitution go by) |
927 | */ |
928 | pt = rephead; |
929 | (void)strlcpy(buf1, name, sizeof(buf1)); |
930 | inpt = buf1; |
931 | outpt = nname; |
932 | endpt = outpt + PAXPATHLEN3072; |
933 | |
934 | /* |
935 | * try each replacement string in order |
936 | */ |
937 | while (pt != NULL((void *)0)) { |
938 | do { |
939 | char *oinpt = inpt; |
940 | /* |
941 | * check for a successful substitution, if not go to |
942 | * the next pattern, or cleanup if we were global |
943 | */ |
944 | if (regexec(&(pt->rcmp), inpt, MAXSUBEXP10, pm, 0) != 0) |
945 | break; |
946 | |
947 | /* |
948 | * ok we found one. We have three parts, the prefix |
949 | * which did not match, the section that did and the |
950 | * tail (that also did not match). Copy the prefix to |
951 | * the final output buffer (watching to make sure we |
952 | * do not create a string too long). |
953 | */ |
954 | found = 1; |
955 | rpt = inpt + pm[0].rm_so; |
956 | |
957 | while ((inpt < rpt) && (outpt < endpt)) |
958 | *outpt++ = *inpt++; |
959 | if (outpt == endpt) |
960 | break; |
961 | |
962 | /* |
963 | * for the second part (which matched the regular |
964 | * expression) apply the substitution using the |
965 | * replacement string and place it the prefix in the |
966 | * final output. If we have problems, skip it. |
967 | */ |
968 | if ((res = resub(&(pt->rcmp),pm,pt->nstr,oinpt,outpt,endpt)) |
969 | < 0) { |
970 | if (prnt) |
971 | paxwarn(1, "Replacement name error %s", |
972 | name); |
973 | return(1); |
974 | } |
975 | outpt += res; |
976 | |
977 | /* |
978 | * we set up to look again starting at the first |
979 | * character in the tail (of the input string right |
980 | * after the last character matched by the regular |
981 | * expression (inpt always points at the first char in |
982 | * the string to process). If we are not doing a global |
983 | * substitution, we will use inpt to copy the tail to |
984 | * the final result. Make sure we do not overrun the |
985 | * output buffer |
986 | */ |
987 | inpt += pm[0].rm_eo - pm[0].rm_so; |
988 | |
989 | if ((outpt == endpt) || (*inpt == '\0')) |
990 | break; |
991 | |
992 | /* |
993 | * if the user wants global we keep trying to |
994 | * substitute until it fails, then we are done. |
995 | */ |
996 | } while (pt->flgs & GLOB0x2); |
997 | |
998 | if (found) |
999 | break; |
1000 | |
1001 | /* |
1002 | * a successful substitution did NOT occur, try the next one |
1003 | */ |
1004 | pt = pt->fow; |
1005 | } |
1006 | |
1007 | if (found) { |
1008 | /* |
1009 | * we had a substitution, copy the last tail piece (if there is |
1010 | * room) to the final result |
1011 | */ |
1012 | while ((outpt < endpt) && (*inpt != '\0')) |
1013 | *outpt++ = *inpt++; |
1014 | |
1015 | *outpt = '\0'; |
1016 | if ((outpt == endpt) && (*inpt != '\0')) { |
1017 | if (prnt) |
1018 | paxwarn(1,"Replacement name too long %s >> %s", |
1019 | name, nname); |
1020 | return(1); |
1021 | } |
1022 | |
1023 | /* |
1024 | * inform the user of the result if wanted |
1025 | */ |
1026 | if (prnt && (pt->flgs & PRNT0x1)) { |
1027 | if (*nname == '\0') |
1028 | (void)fprintf(stderr(&__sF[2]),"%s >> <empty string>\n", |
1029 | name); |
1030 | else |
1031 | (void)fprintf(stderr(&__sF[2]),"%s >> %s\n", name, nname); |
1032 | } |
1033 | |
1034 | /* |
1035 | * if empty inform the caller this file is to be skipped |
1036 | * otherwise copy the new name over the orig name and return |
1037 | */ |
1038 | if (*nname == '\0') |
1039 | return(1); |
1040 | *nlen = strlcpy(name, nname, nsize); |
1041 | } |
1042 | return(0); |
1043 | } |
1044 | |
1045 | /* |
1046 | * resub() |
1047 | * apply the replacement to the matched expression. expand out the old |
1048 | * style ed(1) subexpression expansion. |
1049 | * Return: |
1050 | * -1 if error, or the number of characters added to the destination. |
1051 | */ |
1052 | |
1053 | static int |
1054 | resub(regex_t *rp, regmatch_t *pm, char *src, char *inpt, char *dest, |
1055 | char *destend) |
1056 | { |
1057 | char *spt; |
1058 | char *dpt; |
1059 | char c; |
1060 | regmatch_t *pmpt; |
1061 | int len; |
1062 | int subexcnt; |
1063 | |
1064 | spt = src; |
1065 | dpt = dest; |
1066 | subexcnt = rp->re_nsub; |
1067 | while ((dpt < destend) && ((c = *spt++) != '\0')) { |
1068 | /* |
1069 | * see if we just have an ordinary replacement character |
1070 | * or we refer to a subexpression. |
1071 | */ |
1072 | if (c == '&') { |
1073 | pmpt = pm; |
1074 | } else if ((c == '\\') && (*spt >= '0') && (*spt <= '9')) { |
1075 | /* |
1076 | * make sure there is a subexpression as specified |
1077 | */ |
1078 | if ((len = *spt++ - '0') > subexcnt) |
1079 | return(-1); |
1080 | pmpt = pm + len; |
1081 | } else { |
1082 | /* |
1083 | * Ordinary character, just copy it |
1084 | */ |
1085 | if ((c == '\\') && (*spt != '\0')) |
1086 | c = *spt++; |
1087 | *dpt++ = c; |
1088 | continue; |
1089 | } |
1090 | |
1091 | /* |
1092 | * continue if the subexpression is bogus |
1093 | */ |
1094 | if ((pmpt->rm_so < 0) || (pmpt->rm_eo < 0) || |
1095 | ((len = pmpt->rm_eo - pmpt->rm_so) <= 0)) |
1096 | continue; |
1097 | |
1098 | /* |
1099 | * copy the subexpression to the destination. |
1100 | * fail if we run out of space or the match string is damaged |
1101 | */ |
1102 | if (len > (destend - dpt)) |
1103 | return (-1); |
1104 | strncpy(dpt, inpt + pmpt->rm_so, len); |
1105 | dpt += len; |
1106 | } |
1107 | return(dpt - dest); |
1108 | } |