| File: | src/bin/pax/pat_rep.c |
| Warning: | line 521, column 9 Although the value stored to 'test' is used in the enclosing expression, the value is never actually read from 'test' |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
| 1 | /* $OpenBSD: pat_rep.c,v 1.43 2017/09/16 07:42:34 otto Exp $ */ |
| 2 | /* $NetBSD: pat_rep.c,v 1.4 1995/03/21 09:07:33 cgd Exp $ */ |
| 3 | |
| 4 | /*- |
| 5 | * Copyright (c) 1992 Keith Muller. |
| 6 | * Copyright (c) 1992, 1993 |
| 7 | * The Regents of the University of California. All rights reserved. |
| 8 | * |
| 9 | * This code is derived from software contributed to Berkeley by |
| 10 | * Keith Muller of the University of California, San Diego. |
| 11 | * |
| 12 | * Redistribution and use in source and binary forms, with or without |
| 13 | * modification, are permitted provided that the following conditions |
| 14 | * are met: |
| 15 | * 1. Redistributions of source code must retain the above copyright |
| 16 | * notice, this list of conditions and the following disclaimer. |
| 17 | * 2. Redistributions in binary form must reproduce the above copyright |
| 18 | * notice, this list of conditions and the following disclaimer in the |
| 19 | * documentation and/or other materials provided with the distribution. |
| 20 | * 3. Neither the name of the University nor the names of its contributors |
| 21 | * may be used to endorse or promote products derived from this software |
| 22 | * without specific prior written permission. |
| 23 | * |
| 24 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND |
| 25 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| 26 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
| 27 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE |
| 28 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
| 29 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
| 30 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
| 31 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
| 32 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
| 33 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
| 34 | * SUCH DAMAGE. |
| 35 | */ |
| 36 | |
| 37 | #include <sys/types.h> |
| 38 | #include <sys/stat.h> |
| 39 | #include <regex.h> |
| 40 | #include <stdio.h> |
| 41 | #include <stdlib.h> |
| 42 | #include <string.h> |
| 43 | |
| 44 | #include "pax.h" |
| 45 | #include "extern.h" |
| 46 | |
| 47 | /* |
| 48 | * data structure for storing user supplied replacement strings (-s) |
| 49 | */ |
| 50 | typedef struct replace { |
| 51 | char *nstr; /* the new string we will substitute with */ |
| 52 | regex_t rcmp; /* compiled regular expression used to match */ |
| 53 | int flgs; /* print conversions? global in operation? */ |
| 54 | #define PRNT0x1 0x1 |
| 55 | #define GLOB0x2 0x2 |
| 56 | struct replace *fow; /* pointer to next pattern */ |
| 57 | } REPLACE; |
| 58 | |
| 59 | /* |
| 60 | * routines to handle pattern matching, name modification (regular expression |
| 61 | * substitution and interactive renames), and destination name modification for |
| 62 | * copy (-rw). Both file name and link names are adjusted as required in these |
| 63 | * routines. |
| 64 | */ |
| 65 | |
| 66 | #define MAXSUBEXP10 10 /* max subexpressions, DO NOT CHANGE */ |
| 67 | static PATTERN *pathead = NULL((void *)0); /* file pattern match list head */ |
| 68 | static PATTERN *pattail = NULL((void *)0); /* file pattern match list tail */ |
| 69 | static REPLACE *rephead = NULL((void *)0); /* replacement string list head */ |
| 70 | static REPLACE *reptail = NULL((void *)0); /* replacement string list tail */ |
| 71 | |
| 72 | static int rep_name(char *, size_t, int *, int); |
| 73 | static int tty_rename(ARCHD *); |
| 74 | static int fix_path(char *, int *, char *, int); |
| 75 | static int fn_match(char *, char *, char **); |
| 76 | static char * range_match(char *, int); |
| 77 | static int resub(regex_t *, regmatch_t *, char *, char *, char *, char *); |
| 78 | |
| 79 | /* |
| 80 | * rep_add() |
| 81 | * parses the -s replacement string; compiles the regular expression |
| 82 | * and stores the compiled value and it's replacement string together in |
| 83 | * replacement string list. Input to this function is of the form: |
| 84 | * /old/new/pg |
| 85 | * The first char in the string specifies the delimiter used by this |
| 86 | * replacement string. "Old" is a regular expression in "ed" format which |
| 87 | * is compiled by regcomp() and is applied to filenames. "new" is the |
| 88 | * substitution string; p and g are options flags for printing and global |
| 89 | * replacement (over the single filename) |
| 90 | * Return: |
| 91 | * 0 if a proper replacement string and regular expression was added to |
| 92 | * the list of replacement patterns; -1 otherwise. |
| 93 | */ |
| 94 | |
| 95 | int |
| 96 | rep_add(char *str) |
| 97 | { |
| 98 | char *pt1; |
| 99 | char *pt2; |
| 100 | REPLACE *rep; |
| 101 | int res; |
| 102 | char rebuf[BUFSIZ1024]; |
| 103 | |
| 104 | /* |
| 105 | * throw out the bad parameters |
| 106 | */ |
| 107 | if ((str == NULL((void *)0)) || (*str == '\0')) { |
| 108 | paxwarn(1, "Empty replacement string"); |
| 109 | return(-1); |
| 110 | } |
| 111 | |
| 112 | /* |
| 113 | * first character in the string specifies what the delimiter is for |
| 114 | * this expression |
| 115 | */ |
| 116 | for (pt1 = str+1; *pt1; pt1++) { |
| 117 | if (*pt1 == '\\') { |
| 118 | pt1++; |
| 119 | continue; |
| 120 | } |
| 121 | if (*pt1 == *str) |
| 122 | break; |
| 123 | } |
| 124 | if (*pt1 == '\0') { |
| 125 | paxwarn(1, "Invalid replacement string %s", str); |
| 126 | return(-1); |
| 127 | } |
| 128 | |
| 129 | /* |
| 130 | * allocate space for the node that handles this replacement pattern |
| 131 | * and split out the regular expression and try to compile it |
| 132 | */ |
| 133 | if ((rep = malloc(sizeof(REPLACE))) == NULL((void *)0)) { |
| 134 | paxwarn(1, "Unable to allocate memory for replacement string"); |
| 135 | return(-1); |
| 136 | } |
| 137 | |
| 138 | *pt1 = '\0'; |
| 139 | if ((res = regcomp(&(rep->rcmp), str+1, 0)) != 0) { |
| 140 | regerror(res, &(rep->rcmp), rebuf, sizeof(rebuf)); |
| 141 | paxwarn(1, "%s while compiling regular expression %s", rebuf, str); |
| 142 | free(rep); |
| 143 | return(-1); |
| 144 | } |
| 145 | |
| 146 | /* |
| 147 | * put the delimiter back in case we need an error message and |
| 148 | * locate the delimiter at the end of the replacement string |
| 149 | * we then point the node at the new substitution string |
| 150 | */ |
| 151 | *pt1++ = *str; |
| 152 | for (pt2 = pt1; *pt2; pt2++) { |
| 153 | if (*pt2 == '\\') { |
| 154 | pt2++; |
| 155 | continue; |
| 156 | } |
| 157 | if (*pt2 == *str) |
| 158 | break; |
| 159 | } |
| 160 | if (*pt2 == '\0') { |
| 161 | regfree(&(rep->rcmp)); |
| 162 | free(rep); |
| 163 | paxwarn(1, "Invalid replacement string %s", str); |
| 164 | return(-1); |
| 165 | } |
| 166 | |
| 167 | *pt2 = '\0'; |
| 168 | rep->nstr = pt1; |
| 169 | pt1 = pt2++; |
| 170 | rep->flgs = 0; |
| 171 | |
| 172 | /* |
| 173 | * set the options if any |
| 174 | */ |
| 175 | while (*pt2 != '\0') { |
| 176 | switch (*pt2) { |
| 177 | case 'g': |
| 178 | case 'G': |
| 179 | rep->flgs |= GLOB0x2; |
| 180 | break; |
| 181 | case 'p': |
| 182 | case 'P': |
| 183 | rep->flgs |= PRNT0x1; |
| 184 | break; |
| 185 | default: |
| 186 | regfree(&(rep->rcmp)); |
| 187 | free(rep); |
| 188 | *pt1 = *str; |
| 189 | paxwarn(1, "Invalid replacement string option %s", str); |
| 190 | return(-1); |
| 191 | } |
| 192 | ++pt2; |
| 193 | } |
| 194 | |
| 195 | /* |
| 196 | * all done, link it in at the end |
| 197 | */ |
| 198 | rep->fow = NULL((void *)0); |
| 199 | if (rephead == NULL((void *)0)) { |
| 200 | reptail = rephead = rep; |
| 201 | return(0); |
| 202 | } |
| 203 | reptail->fow = rep; |
| 204 | reptail = rep; |
| 205 | return(0); |
| 206 | } |
| 207 | |
| 208 | /* |
| 209 | * pat_add() |
| 210 | * add a pattern match to the pattern match list. Pattern matches are used |
| 211 | * to select which archive members are extracted. (They appear as |
| 212 | * arguments to pax in the list and read modes). If no patterns are |
| 213 | * supplied to pax, all members in the archive will be selected (and the |
| 214 | * pattern match list is empty). |
| 215 | * Return: |
| 216 | * 0 if the pattern was added to the list, -1 otherwise |
| 217 | */ |
| 218 | |
| 219 | int |
| 220 | pat_add(char *str, char *chdirname) |
| 221 | { |
| 222 | PATTERN *pt; |
| 223 | |
| 224 | /* |
| 225 | * throw out the junk |
| 226 | */ |
| 227 | if ((str == NULL((void *)0)) || (*str == '\0')) { |
| 228 | paxwarn(1, "Empty pattern string"); |
| 229 | return(-1); |
| 230 | } |
| 231 | |
| 232 | /* |
| 233 | * allocate space for the pattern and store the pattern. the pattern is |
| 234 | * part of argv so do not bother to copy it, just point at it. Add the |
| 235 | * node to the end of the pattern list |
| 236 | */ |
| 237 | if ((pt = malloc(sizeof(PATTERN))) == NULL((void *)0)) { |
| 238 | paxwarn(1, "Unable to allocate memory for pattern string"); |
| 239 | return(-1); |
| 240 | } |
| 241 | |
| 242 | pt->pstr = str; |
| 243 | pt->pend = NULL((void *)0); |
| 244 | pt->plen = strlen(str); |
| 245 | pt->fow = NULL((void *)0); |
| 246 | pt->flgs = 0; |
| 247 | pt->chdname = chdirname; |
| 248 | |
| 249 | if (pathead == NULL((void *)0)) { |
| 250 | pattail = pathead = pt; |
| 251 | return(0); |
| 252 | } |
| 253 | pattail->fow = pt; |
| 254 | pattail = pt; |
| 255 | return(0); |
| 256 | } |
| 257 | |
| 258 | /* |
| 259 | * pat_chk() |
| 260 | * complain if any the user supplied pattern did not result in a match to |
| 261 | * a selected archive member. |
| 262 | */ |
| 263 | |
| 264 | void |
| 265 | pat_chk(void) |
| 266 | { |
| 267 | PATTERN *pt; |
| 268 | int wban = 0; |
| 269 | |
| 270 | /* |
| 271 | * walk down the list checking the flags to make sure MTCH was set, |
| 272 | * if not complain |
| 273 | */ |
| 274 | for (pt = pathead; pt != NULL((void *)0); pt = pt->fow) { |
| 275 | if (pt->flgs & MTCH0x1) |
| 276 | continue; |
| 277 | if (!wban) { |
| 278 | paxwarn(1, "WARNING! These patterns were not matched:"); |
| 279 | ++wban; |
| 280 | } |
| 281 | (void)fprintf(stderr(&__sF[2]), "%s\n", pt->pstr); |
| 282 | } |
| 283 | } |
| 284 | |
| 285 | /* |
| 286 | * pat_sel() |
| 287 | * the archive member which matches a pattern was selected. Mark the |
| 288 | * pattern as having selected an archive member. arcn->pat points at the |
| 289 | * pattern that was matched. arcn->pat is set in pat_match() |
| 290 | * |
| 291 | * NOTE: When the -c option is used, we are called when there was no match |
| 292 | * by pat_match() (that means we did match before the inverted sense of |
| 293 | * the logic). Now this seems really strange at first, but with -c we |
| 294 | * need to keep track of those patterns that cause an archive member to NOT |
| 295 | * be selected (it found an archive member with a specified pattern) |
| 296 | * Return: |
| 297 | * 0 if the pattern pointed at by arcn->pat was tagged as creating a |
| 298 | * match, -1 otherwise. |
| 299 | */ |
| 300 | |
| 301 | int |
| 302 | pat_sel(ARCHD *arcn) |
| 303 | { |
| 304 | PATTERN *pt; |
| 305 | PATTERN **ppt; |
| 306 | size_t len; |
| 307 | |
| 308 | /* |
| 309 | * if no patterns just return |
| 310 | */ |
| 311 | if ((pathead == NULL((void *)0)) || ((pt = arcn->pat) == NULL((void *)0))) |
| 312 | return(0); |
| 313 | |
| 314 | /* |
| 315 | * when we are NOT limited to a single match per pattern mark the |
| 316 | * pattern and return |
| 317 | */ |
| 318 | if (!nflag) { |
| 319 | pt->flgs |= MTCH0x1; |
| 320 | return(0); |
| 321 | } |
| 322 | |
| 323 | /* |
| 324 | * we reach this point only when we allow a single selected match per |
| 325 | * pattern, if the pattern matches a directory and we do not have -d |
| 326 | * (dflag) we are done with this pattern. We may also be handed a file |
| 327 | * in the subtree of a directory. in that case when we are operating |
| 328 | * with -d, this pattern was already selected and we are done |
| 329 | */ |
| 330 | if (pt->flgs & DIR_MTCH0x2) |
| 331 | return(0); |
| 332 | |
| 333 | if (!dflag && ((pt->pend != NULL((void *)0)) || (arcn->type == PAX_DIR1))) { |
| 334 | /* |
| 335 | * ok we matched a directory and we are allowing |
| 336 | * subtree matches but because of the -n only its children will |
| 337 | * match. This is tagged as a DIR_MTCH type. |
| 338 | * WATCH IT, the code assumes that pt->pend points |
| 339 | * into arcn->name and arcn->name has not been modified. |
| 340 | * If not we will have a big mess. Yup this is another kludge |
| 341 | */ |
| 342 | |
| 343 | /* |
| 344 | * if this was a prefix match, remove trailing part of path |
| 345 | * so we can copy it. Future matches will be exact prefix match |
| 346 | */ |
| 347 | if (pt->pend != NULL((void *)0)) |
| 348 | *pt->pend = '\0'; |
| 349 | |
| 350 | if ((pt->pstr = strdup(arcn->name)) == NULL((void *)0)) { |
| 351 | paxwarn(1, "Pattern select out of memory"); |
| 352 | if (pt->pend != NULL((void *)0)) |
| 353 | *pt->pend = '/'; |
| 354 | pt->pend = NULL((void *)0); |
| 355 | return(-1); |
| 356 | } |
| 357 | |
| 358 | /* |
| 359 | * put the trailing / back in the source string |
| 360 | */ |
| 361 | if (pt->pend != NULL((void *)0)) { |
| 362 | *pt->pend = '/'; |
| 363 | pt->pend = NULL((void *)0); |
| 364 | } |
| 365 | pt->plen = strlen(pt->pstr); |
| 366 | |
| 367 | /* |
| 368 | * strip off any trailing /, this should really never happen |
| 369 | */ |
| 370 | len = pt->plen - 1; |
| 371 | if (*(pt->pstr + len) == '/') { |
| 372 | *(pt->pstr + len) = '\0'; |
| 373 | pt->plen = len; |
| 374 | } |
| 375 | pt->flgs = DIR_MTCH0x2 | MTCH0x1; |
| 376 | arcn->pat = pt; |
| 377 | return(0); |
| 378 | } |
| 379 | |
| 380 | /* |
| 381 | * we are then done with this pattern, so we delete it from the list |
| 382 | * because it can never be used for another match. |
| 383 | * Seems kind of strange to do for a -c, but the pax spec is really |
| 384 | * vague on the interaction of -c, -n and -d. We assume that when -c |
| 385 | * and the pattern rejects a member (i.e. it matched it) it is done. |
| 386 | * In effect we place the order of the flags as having -c last. |
| 387 | */ |
| 388 | pt = pathead; |
| 389 | ppt = &pathead; |
| 390 | while ((pt != NULL((void *)0)) && (pt != arcn->pat)) { |
| 391 | ppt = &(pt->fow); |
| 392 | pt = pt->fow; |
| 393 | } |
| 394 | |
| 395 | if (pt == NULL((void *)0)) { |
| 396 | /* |
| 397 | * should never happen.... |
| 398 | */ |
| 399 | paxwarn(1, "Pattern list inconsistent"); |
| 400 | return(-1); |
| 401 | } |
| 402 | *ppt = pt->fow; |
| 403 | free(pt); |
| 404 | arcn->pat = NULL((void *)0); |
| 405 | return(0); |
| 406 | } |
| 407 | |
| 408 | /* |
| 409 | * pat_match() |
| 410 | * see if this archive member matches any supplied pattern, if a match |
| 411 | * is found, arcn->pat is set to point at the potential pattern. Later if |
| 412 | * this archive member is "selected" we process and mark the pattern as |
| 413 | * one which matched a selected archive member (see pat_sel()) |
| 414 | * Return: |
| 415 | * 0 if this archive member should be processed, 1 if it should be |
| 416 | * skipped and -1 if we are done with all patterns (and pax should quit |
| 417 | * looking for more members) |
| 418 | */ |
| 419 | |
| 420 | int |
| 421 | pat_match(ARCHD *arcn) |
| 422 | { |
| 423 | PATTERN *pt; |
| 424 | |
| 425 | arcn->pat = NULL((void *)0); |
| 426 | |
| 427 | /* |
| 428 | * if there are no more patterns and we have -n (and not -c) we are |
| 429 | * done. otherwise with no patterns to match, matches all |
| 430 | */ |
| 431 | if (pathead == NULL((void *)0)) { |
| 432 | if (nflag && !cflag) |
| 433 | return(-1); |
| 434 | return(0); |
| 435 | } |
| 436 | |
| 437 | /* |
| 438 | * have to search down the list one at a time looking for a match. |
| 439 | */ |
| 440 | pt = pathead; |
| 441 | while (pt != NULL((void *)0)) { |
| 442 | /* |
| 443 | * check for a file name match unless we have DIR_MTCH set in |
| 444 | * this pattern then we want a prefix match |
| 445 | */ |
| 446 | if (pt->flgs & DIR_MTCH0x2) { |
| 447 | /* |
| 448 | * this pattern was matched before to a directory |
| 449 | * as we must have -n set for this (but not -d). We can |
| 450 | * only match CHILDREN of that directory so we must use |
| 451 | * an exact prefix match (no wildcards). |
| 452 | */ |
| 453 | if ((arcn->name[pt->plen] == '/') && |
| 454 | (strncmp(pt->pstr, arcn->name, pt->plen) == 0)) |
| 455 | break; |
| 456 | } else if (fn_match(pt->pstr, arcn->name, &pt->pend) == 0) |
| 457 | break; |
| 458 | pt = pt->fow; |
| 459 | } |
| 460 | |
| 461 | /* |
| 462 | * return the result, remember that cflag (-c) inverts the sense of a |
| 463 | * match |
| 464 | */ |
| 465 | if (pt == NULL((void *)0)) |
| 466 | return(cflag ? 0 : 1); |
| 467 | |
| 468 | /* |
| 469 | * we had a match, now when we invert the sense (-c) we reject this |
| 470 | * member. However we have to tag the pattern a being successful, (in a |
| 471 | * match, not in selecting a archive member) so we call pat_sel() here. |
| 472 | */ |
| 473 | arcn->pat = pt; |
| 474 | if (!cflag) |
| 475 | return(0); |
| 476 | |
| 477 | if (pat_sel(arcn) < 0) |
| 478 | return(-1); |
| 479 | arcn->pat = NULL((void *)0); |
| 480 | return(1); |
| 481 | } |
| 482 | |
| 483 | /* |
| 484 | * fn_match() |
| 485 | * Return: |
| 486 | * 0 if this archive member should be processed, 1 if it should be |
| 487 | * skipped and -1 if we are done with all patterns (and pax should quit |
| 488 | * looking for more members) |
| 489 | * Note: *pend may be changed to show where the prefix ends. |
| 490 | */ |
| 491 | |
| 492 | static int |
| 493 | fn_match(char *pattern, char *string, char **pend) |
| 494 | { |
| 495 | char c; |
| 496 | char test; |
| 497 | |
| 498 | *pend = NULL((void *)0); |
| 499 | for (;;) { |
| 500 | switch (c = *pattern++) { |
| 501 | case '\0': |
| 502 | /* |
| 503 | * Ok we found an exact match |
| 504 | */ |
| 505 | if (*string == '\0') |
| 506 | return(0); |
| 507 | |
| 508 | /* |
| 509 | * Check if it is a prefix match |
| 510 | */ |
| 511 | if ((dflag == 1) || (*string != '/')) |
| 512 | return(-1); |
| 513 | |
| 514 | /* |
| 515 | * It is a prefix match, remember where the trailing |
| 516 | * / is located |
| 517 | */ |
| 518 | *pend = string; |
| 519 | return(0); |
| 520 | case '?': |
| 521 | if ((test = *string++) == '\0') |
Although the value stored to 'test' is used in the enclosing expression, the value is never actually read from 'test' | |
| 522 | return (-1); |
| 523 | break; |
| 524 | case '*': |
| 525 | c = *pattern; |
| 526 | /* |
| 527 | * Collapse multiple *'s. |
| 528 | */ |
| 529 | while (c == '*') |
| 530 | c = *++pattern; |
| 531 | |
| 532 | /* |
| 533 | * Optimized hack for pattern with a * at the end |
| 534 | */ |
| 535 | if (c == '\0') |
| 536 | return (0); |
| 537 | |
| 538 | /* |
| 539 | * General case, use recursion. |
| 540 | */ |
| 541 | while ((test = *string) != '\0') { |
| 542 | if (!fn_match(pattern, string, pend)) |
| 543 | return (0); |
| 544 | ++string; |
| 545 | } |
| 546 | return (-1); |
| 547 | case '[': |
| 548 | /* |
| 549 | * range match |
| 550 | */ |
| 551 | if (((test = *string++) == '\0') || |
| 552 | ((pattern = range_match(pattern, test)) == NULL((void *)0))) |
| 553 | return (-1); |
| 554 | break; |
| 555 | case '\\': |
| 556 | if ((c = *pattern++) == '\0') |
| 557 | return (-1); |
| 558 | /* FALLTHROUGH */ |
| 559 | default: |
| 560 | if (c != *string++) |
| 561 | return (-1); |
| 562 | break; |
| 563 | } |
| 564 | } |
| 565 | /* NOTREACHED */ |
| 566 | } |
| 567 | |
| 568 | static char * |
| 569 | range_match(char *pattern, int test) |
| 570 | { |
| 571 | char c; |
| 572 | char c2; |
| 573 | int negate; |
| 574 | int ok = 0; |
| 575 | |
| 576 | if ((negate = (*pattern == '!')) != 0) |
| 577 | ++pattern; |
| 578 | |
| 579 | while ((c = *pattern++) != ']') { |
| 580 | /* |
| 581 | * Illegal pattern |
| 582 | */ |
| 583 | if (c == '\0') |
| 584 | return (NULL((void *)0)); |
| 585 | |
| 586 | if ((*pattern == '-') && ((c2 = pattern[1]) != '\0') && |
| 587 | (c2 != ']')) { |
| 588 | if ((c <= test) && (test <= c2)) |
| 589 | ok = 1; |
| 590 | pattern += 2; |
| 591 | } else if (c == test) |
| 592 | ok = 1; |
| 593 | } |
| 594 | return (ok == negate ? NULL((void *)0) : pattern); |
| 595 | } |
| 596 | |
| 597 | /* |
| 598 | * has_dotdot() |
| 599 | * Returns true iff the supplied path contains a ".." component. |
| 600 | */ |
| 601 | |
| 602 | int |
| 603 | has_dotdot(const char *path) |
| 604 | { |
| 605 | const char *p = path; |
| 606 | |
| 607 | while ((p = strstr(p, "..")) != NULL((void *)0)) { |
| 608 | if ((p == path || p[-1] == '/') && |
| 609 | (p[2] == '/' || p[2] == '\0')) |
| 610 | return (1); |
| 611 | p += 2; |
| 612 | } |
| 613 | return (0); |
| 614 | } |
| 615 | |
| 616 | /* |
| 617 | * mod_name() |
| 618 | * modify a selected file name. first attempt to apply replacement string |
| 619 | * expressions, then apply interactive file rename. We apply replacement |
| 620 | * string expressions to both filenames and file links (if we didn't the |
| 621 | * links would point to the wrong place, and we could never be able to |
| 622 | * move an archive that has a file link in it). When we rename files |
| 623 | * interactively, we store that mapping (old name to user input name) so |
| 624 | * if we spot any file links to the old file name in the future, we will |
| 625 | * know exactly how to fix the file link. |
| 626 | * Return: |
| 627 | * 0 continue to process file, 1 skip this file, -1 pax is finished |
| 628 | */ |
| 629 | |
| 630 | int |
| 631 | mod_name(ARCHD *arcn) |
| 632 | { |
| 633 | int res = 0; |
| 634 | |
| 635 | /* |
| 636 | * Strip off leading '/' if appropriate. |
| 637 | * Currently, this option is only set for the tar format. |
| 638 | */ |
| 639 | while (rmleadslash && arcn->name[0] == '/') { |
| 640 | if (arcn->name[1] == '\0') { |
| 641 | arcn->name[0] = '.'; |
| 642 | } else { |
| 643 | (void)memmove(arcn->name, &arcn->name[1], |
| 644 | strlen(arcn->name)); |
| 645 | arcn->nlen--; |
| 646 | } |
| 647 | if (rmleadslash < 2) { |
| 648 | rmleadslash = 2; |
| 649 | paxwarn(0, "Removing leading / from absolute path names in the archive"); |
| 650 | } |
| 651 | } |
| 652 | while (rmleadslash && arcn->ln_name[0] == '/' && |
| 653 | PAX_IS_HARDLINK(arcn->type)((arcn->type) == 8 || (arcn->type) == 9)) { |
| 654 | if (arcn->ln_name[1] == '\0') { |
| 655 | arcn->ln_name[0] = '.'; |
| 656 | } else { |
| 657 | (void)memmove(arcn->ln_name, &arcn->ln_name[1], |
| 658 | strlen(arcn->ln_name)); |
| 659 | arcn->ln_nlen--; |
| 660 | } |
| 661 | if (rmleadslash < 2) { |
| 662 | rmleadslash = 2; |
| 663 | paxwarn(0, "Removing leading / from absolute path names in the archive"); |
| 664 | } |
| 665 | } |
| 666 | if (rmleadslash) { |
| 667 | const char *last = NULL((void *)0); |
| 668 | const char *p = arcn->name; |
| 669 | |
| 670 | while ((p = strstr(p, "..")) != NULL((void *)0)) { |
| 671 | if ((p == arcn->name || p[-1] == '/') && |
| 672 | (p[2] == '/' || p[2] == '\0')) |
| 673 | last = p + 2; |
| 674 | p += 2; |
| 675 | } |
| 676 | if (last != NULL((void *)0)) { |
| 677 | last++; |
| 678 | paxwarn(1, "Removing leading \"%.*s\"", |
| 679 | (int)(last - arcn->name), arcn->name); |
| 680 | arcn->nlen = strlen(last); |
| 681 | if (arcn->nlen > 0) |
| 682 | memmove(arcn->name, last, arcn->nlen + 1); |
| 683 | else { |
| 684 | arcn->name[0] = '.'; |
| 685 | arcn->name[1] = '\0'; |
| 686 | arcn->nlen = 1; |
| 687 | } |
| 688 | } |
| 689 | } |
| 690 | |
| 691 | /* |
| 692 | * IMPORTANT: We have a problem. what do we do with symlinks? |
| 693 | * Modifying a hard link name makes sense, as we know the file it |
| 694 | * points at should have been seen already in the archive (and if it |
| 695 | * wasn't seen because of a read error or a bad archive, we lose |
| 696 | * anyway). But there are no such requirements for symlinks. On one |
| 697 | * hand the symlink that refers to a file in the archive will have to |
| 698 | * be modified to so it will still work at its new location in the |
| 699 | * file system. On the other hand a symlink that points elsewhere (and |
| 700 | * should continue to do so) should not be modified. There is clearly |
| 701 | * no perfect solution here. So we handle them like hardlinks. Clearly |
| 702 | * a replacement made by the interactive rename mapping is very likely |
| 703 | * to be correct since it applies to a single file and is an exact |
| 704 | * match. The regular expression replacements are a little harder to |
| 705 | * justify though. We claim that the symlink name is only likely |
| 706 | * to be replaced when it points within the file tree being moved and |
| 707 | * in that case it should be modified. what we really need to do is to |
| 708 | * call an oracle here. :) |
| 709 | */ |
| 710 | if (rephead != NULL((void *)0)) { |
| 711 | /* |
| 712 | * we have replacement strings, modify the name and the link |
| 713 | * name if any. |
| 714 | */ |
| 715 | if ((res = rep_name(arcn->name, sizeof(arcn->name), &(arcn->nlen), 1)) != 0) |
| 716 | return(res); |
| 717 | |
| 718 | if (PAX_IS_LINK(arcn->type)((arcn->type) == 5 || ((arcn->type) == 8 || (arcn->type ) == 9))) { |
| 719 | if ((res = rep_name(arcn->ln_name, |
| 720 | sizeof(arcn->ln_name), &(arcn->ln_nlen), 0)) != 0) |
| 721 | return(res); |
| 722 | } |
| 723 | } |
| 724 | |
| 725 | if (iflag) { |
| 726 | /* |
| 727 | * perform interactive file rename, then map the link if any |
| 728 | */ |
| 729 | if ((res = tty_rename(arcn)) != 0) |
| 730 | return(res); |
| 731 | if (PAX_IS_LINK(arcn->type)((arcn->type) == 5 || ((arcn->type) == 8 || (arcn->type ) == 9))) |
| 732 | sub_name(arcn->ln_name, &(arcn->ln_nlen), |
| 733 | sizeof(arcn->ln_name)); |
| 734 | } |
| 735 | return(res); |
| 736 | } |
| 737 | |
| 738 | /* |
| 739 | * tty_rename() |
| 740 | * Prompt the user for a replacement file name. A "." keeps the old name, |
| 741 | * a empty line skips the file, and an EOF on reading the tty, will cause |
| 742 | * pax to stop processing and exit. Otherwise the file name input, replaces |
| 743 | * the old one. |
| 744 | * Return: |
| 745 | * 0 process this file, 1 skip this file, -1 we need to exit pax |
| 746 | */ |
| 747 | |
| 748 | static int |
| 749 | tty_rename(ARCHD *arcn) |
| 750 | { |
| 751 | char tmpname[PAXPATHLEN3072+2]; |
| 752 | int res; |
| 753 | |
| 754 | /* |
| 755 | * prompt user for the replacement name for a file, keep trying until |
| 756 | * we get some reasonable input. Archives may have more than one file |
| 757 | * on them with the same name (from updates etc). We print verbose info |
| 758 | * on the file so the user knows what is up. |
| 759 | */ |
| 760 | tty_prnt("\nATTENTION: %s interactive file rename operation.\n", argv0); |
| 761 | |
| 762 | for (;;) { |
| 763 | ls_tty(arcn); |
| 764 | tty_prnt("Input new name, or a \".\" to keep the old name, "); |
| 765 | tty_prnt("or a \"return\" to skip this file.\n"); |
| 766 | tty_prnt("Input > "); |
| 767 | if (tty_read(tmpname, sizeof(tmpname)) < 0) |
| 768 | return(-1); |
| 769 | if (strcmp(tmpname, "..") == 0) { |
| 770 | tty_prnt("Try again, illegal file name: ..\n"); |
| 771 | continue; |
| 772 | } |
| 773 | if (strlen(tmpname) > PAXPATHLEN3072) { |
| 774 | tty_prnt("Try again, file name too long\n"); |
| 775 | continue; |
| 776 | } |
| 777 | break; |
| 778 | } |
| 779 | |
| 780 | /* |
| 781 | * empty file name, skips this file. a "." leaves it alone |
| 782 | */ |
| 783 | if (tmpname[0] == '\0') { |
| 784 | tty_prnt("Skipping file.\n"); |
| 785 | return(1); |
| 786 | } |
| 787 | if ((tmpname[0] == '.') && (tmpname[1] == '\0')) { |
| 788 | tty_prnt("Processing continues, name unchanged.\n"); |
| 789 | return(0); |
| 790 | } |
| 791 | |
| 792 | /* |
| 793 | * ok the name changed. We may run into links that point at this |
| 794 | * file later. we have to remember where the user sent the file |
| 795 | * in order to repair any links. |
| 796 | */ |
| 797 | tty_prnt("Processing continues, name changed to: %s\n", tmpname); |
| 798 | res = add_name(arcn->name, arcn->nlen, tmpname); |
| 799 | arcn->nlen = strlcpy(arcn->name, tmpname, sizeof(arcn->name)); |
| 800 | if ((size_t)arcn->nlen >= sizeof(arcn->name)) |
| 801 | arcn->nlen = sizeof(arcn->name) - 1; /* XXX truncate? */ |
| 802 | if (res < 0) |
| 803 | return(-1); |
| 804 | return(0); |
| 805 | } |
| 806 | |
| 807 | /* |
| 808 | * set_dest() |
| 809 | * fix up the file name and the link name (if any) so this file will land |
| 810 | * in the destination directory (used during copy() -rw). |
| 811 | * Return: |
| 812 | * 0 if ok, -1 if failure (name too long) |
| 813 | */ |
| 814 | |
| 815 | int |
| 816 | set_dest(ARCHD *arcn, char *dest_dir, int dir_len) |
| 817 | { |
| 818 | if (fix_path(arcn->name, &(arcn->nlen), dest_dir, dir_len) < 0) |
| 819 | return(-1); |
| 820 | |
| 821 | /* |
| 822 | * It is really hard to deal with symlinks here, we cannot be sure |
| 823 | * if the name they point was moved (or will be moved). It is best to |
| 824 | * leave them alone. |
| 825 | */ |
| 826 | if (!PAX_IS_HARDLINK(arcn->type)((arcn->type) == 8 || (arcn->type) == 9)) |
| 827 | return(0); |
| 828 | |
| 829 | if (fix_path(arcn->ln_name, &(arcn->ln_nlen), dest_dir, dir_len) < 0) |
| 830 | return(-1); |
| 831 | return(0); |
| 832 | } |
| 833 | |
| 834 | /* |
| 835 | * fix_path |
| 836 | * concatenate dir_name and or_name and store the result in or_name (if |
| 837 | * it fits). This is one ugly function. |
| 838 | * Return: |
| 839 | * 0 if ok, -1 if the final name is too long |
| 840 | */ |
| 841 | |
| 842 | static int |
| 843 | fix_path(char *or_name, int *or_len, char *dir_name, int dir_len) |
| 844 | { |
| 845 | char *src; |
| 846 | char *dest; |
| 847 | char *start; |
| 848 | int len; |
| 849 | |
| 850 | /* |
| 851 | * we shift the or_name to the right enough to tack in the dir_name |
| 852 | * at the front. We make sure we have enough space for it all before |
| 853 | * we start. since dest always ends in a slash, we skip of or_name |
| 854 | * if it also starts with one. |
| 855 | */ |
| 856 | start = or_name; |
| 857 | src = start + *or_len; |
| 858 | dest = src + dir_len; |
| 859 | if (*start == '/') { |
| 860 | ++start; |
| 861 | --dest; |
| 862 | } |
| 863 | if ((len = dest - or_name) > PAXPATHLEN3072) { |
| 864 | paxwarn(1, "File name %s/%s, too long", dir_name, start); |
| 865 | return(-1); |
| 866 | } |
| 867 | *or_len = len; |
| 868 | |
| 869 | /* |
| 870 | * enough space, shift |
| 871 | */ |
| 872 | while (src >= start) |
| 873 | *dest-- = *src--; |
| 874 | src = dir_name + dir_len - 1; |
| 875 | |
| 876 | /* |
| 877 | * splice in the destination directory name |
| 878 | */ |
| 879 | while (src >= dir_name) |
| 880 | *dest-- = *src--; |
| 881 | |
| 882 | *(or_name + len) = '\0'; |
| 883 | return(0); |
| 884 | } |
| 885 | |
| 886 | /* |
| 887 | * rep_name() |
| 888 | * walk down the list of replacement strings applying each one in order. |
| 889 | * when we find one with a successful substitution, we modify the name |
| 890 | * as specified. if required, we print the results. if the resulting name |
| 891 | * is empty, we will skip this archive member. We use the regexp(3) |
| 892 | * routines (regexp() ought to win a prize as having the most cryptic |
| 893 | * library function manual page). |
| 894 | * --Parameters-- |
| 895 | * name is the file name we are going to apply the regular expressions to |
| 896 | * (and may be modified) |
| 897 | * nsize is the size of the name buffer. |
| 898 | * nlen is the length of this name (and is modified to hold the length of |
| 899 | * the final string). |
| 900 | * prnt is a flag that says whether to print the final result. |
| 901 | * Return: |
| 902 | * 0 if substitution was successful, 1 if we are to skip the file (the name |
| 903 | * ended up empty) |
| 904 | */ |
| 905 | |
| 906 | static int |
| 907 | rep_name(char *name, size_t nsize, int *nlen, int prnt) |
| 908 | { |
| 909 | REPLACE *pt; |
| 910 | char *inpt; |
| 911 | char *outpt; |
| 912 | char *endpt; |
| 913 | char *rpt; |
| 914 | int found = 0; |
| 915 | int res; |
| 916 | regmatch_t pm[MAXSUBEXP10]; |
| 917 | char nname[PAXPATHLEN3072+1]; /* final result of all replacements */ |
| 918 | char buf1[PAXPATHLEN3072+1]; /* where we work on the name */ |
| 919 | |
| 920 | /* |
| 921 | * copy the name into buf1, where we will work on it. We need to keep |
| 922 | * the orig string around so we can print out the result of the final |
| 923 | * replacement. We build up the final result in nname. inpt points at |
| 924 | * the string we apply the regular expression to. prnt is used to |
| 925 | * suppress printing when we handle replacements on the link field |
| 926 | * (the user already saw that substitution go by) |
| 927 | */ |
| 928 | pt = rephead; |
| 929 | (void)strlcpy(buf1, name, sizeof(buf1)); |
| 930 | inpt = buf1; |
| 931 | outpt = nname; |
| 932 | endpt = outpt + PAXPATHLEN3072; |
| 933 | |
| 934 | /* |
| 935 | * try each replacement string in order |
| 936 | */ |
| 937 | while (pt != NULL((void *)0)) { |
| 938 | do { |
| 939 | char *oinpt = inpt; |
| 940 | /* |
| 941 | * check for a successful substitution, if not go to |
| 942 | * the next pattern, or cleanup if we were global |
| 943 | */ |
| 944 | if (regexec(&(pt->rcmp), inpt, MAXSUBEXP10, pm, 0) != 0) |
| 945 | break; |
| 946 | |
| 947 | /* |
| 948 | * ok we found one. We have three parts, the prefix |
| 949 | * which did not match, the section that did and the |
| 950 | * tail (that also did not match). Copy the prefix to |
| 951 | * the final output buffer (watching to make sure we |
| 952 | * do not create a string too long). |
| 953 | */ |
| 954 | found = 1; |
| 955 | rpt = inpt + pm[0].rm_so; |
| 956 | |
| 957 | while ((inpt < rpt) && (outpt < endpt)) |
| 958 | *outpt++ = *inpt++; |
| 959 | if (outpt == endpt) |
| 960 | break; |
| 961 | |
| 962 | /* |
| 963 | * for the second part (which matched the regular |
| 964 | * expression) apply the substitution using the |
| 965 | * replacement string and place it the prefix in the |
| 966 | * final output. If we have problems, skip it. |
| 967 | */ |
| 968 | if ((res = resub(&(pt->rcmp),pm,pt->nstr,oinpt,outpt,endpt)) |
| 969 | < 0) { |
| 970 | if (prnt) |
| 971 | paxwarn(1, "Replacement name error %s", |
| 972 | name); |
| 973 | return(1); |
| 974 | } |
| 975 | outpt += res; |
| 976 | |
| 977 | /* |
| 978 | * we set up to look again starting at the first |
| 979 | * character in the tail (of the input string right |
| 980 | * after the last character matched by the regular |
| 981 | * expression (inpt always points at the first char in |
| 982 | * the string to process). If we are not doing a global |
| 983 | * substitution, we will use inpt to copy the tail to |
| 984 | * the final result. Make sure we do not overrun the |
| 985 | * output buffer |
| 986 | */ |
| 987 | inpt += pm[0].rm_eo - pm[0].rm_so; |
| 988 | |
| 989 | if ((outpt == endpt) || (*inpt == '\0')) |
| 990 | break; |
| 991 | |
| 992 | /* |
| 993 | * if the user wants global we keep trying to |
| 994 | * substitute until it fails, then we are done. |
| 995 | */ |
| 996 | } while (pt->flgs & GLOB0x2); |
| 997 | |
| 998 | if (found) |
| 999 | break; |
| 1000 | |
| 1001 | /* |
| 1002 | * a successful substitution did NOT occur, try the next one |
| 1003 | */ |
| 1004 | pt = pt->fow; |
| 1005 | } |
| 1006 | |
| 1007 | if (found) { |
| 1008 | /* |
| 1009 | * we had a substitution, copy the last tail piece (if there is |
| 1010 | * room) to the final result |
| 1011 | */ |
| 1012 | while ((outpt < endpt) && (*inpt != '\0')) |
| 1013 | *outpt++ = *inpt++; |
| 1014 | |
| 1015 | *outpt = '\0'; |
| 1016 | if ((outpt == endpt) && (*inpt != '\0')) { |
| 1017 | if (prnt) |
| 1018 | paxwarn(1,"Replacement name too long %s >> %s", |
| 1019 | name, nname); |
| 1020 | return(1); |
| 1021 | } |
| 1022 | |
| 1023 | /* |
| 1024 | * inform the user of the result if wanted |
| 1025 | */ |
| 1026 | if (prnt && (pt->flgs & PRNT0x1)) { |
| 1027 | if (*nname == '\0') |
| 1028 | (void)fprintf(stderr(&__sF[2]),"%s >> <empty string>\n", |
| 1029 | name); |
| 1030 | else |
| 1031 | (void)fprintf(stderr(&__sF[2]),"%s >> %s\n", name, nname); |
| 1032 | } |
| 1033 | |
| 1034 | /* |
| 1035 | * if empty inform the caller this file is to be skipped |
| 1036 | * otherwise copy the new name over the orig name and return |
| 1037 | */ |
| 1038 | if (*nname == '\0') |
| 1039 | return(1); |
| 1040 | *nlen = strlcpy(name, nname, nsize); |
| 1041 | } |
| 1042 | return(0); |
| 1043 | } |
| 1044 | |
| 1045 | /* |
| 1046 | * resub() |
| 1047 | * apply the replacement to the matched expression. expand out the old |
| 1048 | * style ed(1) subexpression expansion. |
| 1049 | * Return: |
| 1050 | * -1 if error, or the number of characters added to the destination. |
| 1051 | */ |
| 1052 | |
| 1053 | static int |
| 1054 | resub(regex_t *rp, regmatch_t *pm, char *src, char *inpt, char *dest, |
| 1055 | char *destend) |
| 1056 | { |
| 1057 | char *spt; |
| 1058 | char *dpt; |
| 1059 | char c; |
| 1060 | regmatch_t *pmpt; |
| 1061 | int len; |
| 1062 | int subexcnt; |
| 1063 | |
| 1064 | spt = src; |
| 1065 | dpt = dest; |
| 1066 | subexcnt = rp->re_nsub; |
| 1067 | while ((dpt < destend) && ((c = *spt++) != '\0')) { |
| 1068 | /* |
| 1069 | * see if we just have an ordinary replacement character |
| 1070 | * or we refer to a subexpression. |
| 1071 | */ |
| 1072 | if (c == '&') { |
| 1073 | pmpt = pm; |
| 1074 | } else if ((c == '\\') && (*spt >= '0') && (*spt <= '9')) { |
| 1075 | /* |
| 1076 | * make sure there is a subexpression as specified |
| 1077 | */ |
| 1078 | if ((len = *spt++ - '0') > subexcnt) |
| 1079 | return(-1); |
| 1080 | pmpt = pm + len; |
| 1081 | } else { |
| 1082 | /* |
| 1083 | * Ordinary character, just copy it |
| 1084 | */ |
| 1085 | if ((c == '\\') && (*spt != '\0')) |
| 1086 | c = *spt++; |
| 1087 | *dpt++ = c; |
| 1088 | continue; |
| 1089 | } |
| 1090 | |
| 1091 | /* |
| 1092 | * continue if the subexpression is bogus |
| 1093 | */ |
| 1094 | if ((pmpt->rm_so < 0) || (pmpt->rm_eo < 0) || |
| 1095 | ((len = pmpt->rm_eo - pmpt->rm_so) <= 0)) |
| 1096 | continue; |
| 1097 | |
| 1098 | /* |
| 1099 | * copy the subexpression to the destination. |
| 1100 | * fail if we run out of space or the match string is damaged |
| 1101 | */ |
| 1102 | if (len > (destend - dpt)) |
| 1103 | return (-1); |
| 1104 | strncpy(dpt, inpt + pmpt->rm_so, len); |
| 1105 | dpt += len; |
| 1106 | } |
| 1107 | return(dpt - dest); |
| 1108 | } |