| File: | src/usr.bin/csplit/csplit.c |
| Warning: | line 134, column 2 Value stored to 'argc' is never read |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
| 1 | /* $OpenBSD: csplit.c,v 1.10 2021/07/08 00:38:42 millert Exp $ */ |
| 2 | /* $FreeBSD: src/usr.bin/csplit/csplit.c,v 1.9 2004/03/22 11:15:03 tjr Exp $ */ |
| 3 | |
| 4 | /*- |
| 5 | * Copyright (c) 2002 Tim J. Robbins. |
| 6 | * All rights reserved. |
| 7 | * |
| 8 | * Redistribution and use in source and binary forms, with or without |
| 9 | * modification, are permitted provided that the following conditions |
| 10 | * are met: |
| 11 | * 1. Redistributions of source code must retain the above copyright |
| 12 | * notice, this list of conditions and the following disclaimer. |
| 13 | * 2. Redistributions in binary form must reproduce the above copyright |
| 14 | * notice, this list of conditions and the following disclaimer in the |
| 15 | * documentation and/or other materials provided with the distribution. |
| 16 | * |
| 17 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND |
| 18 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| 19 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
| 20 | * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE |
| 21 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
| 22 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
| 23 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
| 24 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
| 25 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
| 26 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
| 27 | * SUCH DAMAGE. |
| 28 | */ |
| 29 | |
| 30 | /* |
| 31 | * csplit -- split files based on context |
| 32 | * |
| 33 | * This utility splits its input into numbered output files by line number |
| 34 | * or by a regular expression. Regular expression matches have an optional |
| 35 | * offset with them, allowing the split to occur a specified number of |
| 36 | * lines before or after the match. |
| 37 | * |
| 38 | * To handle negative offsets, we stop reading when the match occurs and |
| 39 | * store the offset that the file should have been split at, then use |
| 40 | * this output file as input until all the "overflowed" lines have been read. |
| 41 | * The file is then closed and truncated to the correct length. |
| 42 | * |
| 43 | * We assume that the output files can be seeked upon (ie. they cannot be |
| 44 | * symlinks to named pipes or character devices), but make no such |
| 45 | * assumption about the input. |
| 46 | */ |
| 47 | |
| 48 | #include <sys/types.h> |
| 49 | |
| 50 | #include <ctype.h> |
| 51 | #include <err.h> |
| 52 | #include <errno(*__errno()).h> |
| 53 | #include <limits.h> |
| 54 | #include <regex.h> |
| 55 | #include <signal.h> |
| 56 | #include <stdint.h> |
| 57 | #include <stdio.h> |
| 58 | #include <stdlib.h> |
| 59 | #include <string.h> |
| 60 | #include <unistd.h> |
| 61 | |
| 62 | void cleanup(void); |
| 63 | void do_lineno(const char *); |
| 64 | void do_rexp(const char *); |
| 65 | char *get_line(void); |
| 66 | void handlesig(int); |
| 67 | FILE *newfile(void); |
| 68 | void toomuch(FILE *, long); |
| 69 | static void __dead__attribute__((__noreturn__)) usage(void); |
| 70 | |
| 71 | /* |
| 72 | * Command line options |
| 73 | */ |
| 74 | const char *prefix; /* File name prefix */ |
| 75 | long sufflen; /* Number of decimal digits for suffix */ |
| 76 | int sflag; /* Suppress output of file names */ |
| 77 | int kflag; /* Keep output if error occurs */ |
| 78 | |
| 79 | /* |
| 80 | * Other miscellaneous globals (XXX too many) |
| 81 | */ |
| 82 | long lineno; /* Current line number in input file */ |
| 83 | long reps; /* Number of repetitions for this pattern */ |
| 84 | long nfiles; /* Number of files output so far */ |
| 85 | long maxfiles; /* Maximum number of files we can create */ |
| 86 | char currfile[PATH_MAX1024]; /* Current output file */ |
| 87 | const char *infn; /* Name of the input file */ |
| 88 | FILE *infile; /* Input file handle */ |
| 89 | FILE *overfile; /* Overflow file for toomuch() */ |
| 90 | off_t truncofs; /* Offset this file should be truncated at */ |
| 91 | int doclean; /* Should cleanup() remove output? */ |
| 92 | |
| 93 | int |
| 94 | main(int argc, char *argv[]) |
| 95 | { |
| 96 | struct sigaction sa; |
| 97 | long i; |
| 98 | int ch; |
| 99 | const char *expr; |
| 100 | char *ep, *p; |
| 101 | FILE *ofp; |
| 102 | |
| 103 | if (pledge("stdio rpath wpath cpath", NULL((void *)0)) == -1) |
| 104 | err(1, "pledge"); |
| 105 | |
| 106 | kflag = sflag = 0; |
| 107 | prefix = "xx"; |
| 108 | sufflen = 2; |
| 109 | while ((ch = getopt(argc, argv, "f:kn:s")) != -1) { |
| 110 | switch (ch) { |
| 111 | case 'f': |
| 112 | prefix = optarg; |
| 113 | break; |
| 114 | case 'k': |
| 115 | kflag = 1; |
| 116 | break; |
| 117 | case 'n': |
| 118 | errno(*__errno()) = 0; |
| 119 | sufflen = strtol(optarg, &ep, 10); |
| 120 | if (sufflen <= 0 || *ep != '\0' || errno(*__errno()) != 0) |
| 121 | errx(1, "%s: bad suffix length", optarg); |
| 122 | break; |
| 123 | case 's': |
| 124 | sflag = 1; |
| 125 | break; |
| 126 | default: |
| 127 | usage(); |
| 128 | } |
| 129 | } |
| 130 | |
| 131 | if (sufflen + strlen(prefix) >= PATH_MAX1024) |
| 132 | errx(1, "name too long"); |
| 133 | |
| 134 | argc -= optind; |
Value stored to 'argc' is never read | |
| 135 | argv += optind; |
| 136 | |
| 137 | if ((infn = *argv++) == NULL((void *)0)) |
| 138 | usage(); |
| 139 | if (strcmp(infn, "-") == 0) { |
| 140 | infile = stdin(&__sF[0]); |
| 141 | infn = "stdin"; |
| 142 | } else if ((infile = fopen(infn, "r")) == NULL((void *)0)) |
| 143 | err(1, "%s", infn); |
| 144 | |
| 145 | if (!kflag) { |
| 146 | doclean = 1; |
| 147 | atexit(cleanup); |
| 148 | sa.sa_flags = 0; |
| 149 | sa.sa_handler__sigaction_u.__sa_handler = handlesig; |
| 150 | sigemptyset(&sa.sa_mask); |
| 151 | sigaddset(&sa.sa_mask, SIGHUP1); |
| 152 | sigaddset(&sa.sa_mask, SIGINT2); |
| 153 | sigaddset(&sa.sa_mask, SIGTERM15); |
| 154 | sigaction(SIGHUP1, &sa, NULL((void *)0)); |
| 155 | sigaction(SIGINT2, &sa, NULL((void *)0)); |
| 156 | sigaction(SIGTERM15, &sa, NULL((void *)0)); |
| 157 | } |
| 158 | |
| 159 | lineno = 0; |
| 160 | nfiles = 0; |
| 161 | truncofs = 0; |
| 162 | overfile = NULL((void *)0); |
| 163 | |
| 164 | /* Ensure 10^sufflen < LONG_MAX. */ |
| 165 | for (maxfiles = 1, i = 0; i < sufflen; i++) { |
| 166 | if (maxfiles > LONG_MAX9223372036854775807L / 10) |
| 167 | errx(1, "%ld: suffix too long (limit %ld)", |
| 168 | sufflen, i); |
| 169 | maxfiles *= 10; |
| 170 | } |
| 171 | |
| 172 | /* Create files based on supplied patterns. */ |
| 173 | while (nfiles < maxfiles - 1 && (expr = *argv++) != NULL((void *)0)) { |
| 174 | /* Look ahead & see if this pattern has any repetitions. */ |
| 175 | if (*argv != NULL((void *)0) && **argv == '{') { |
| 176 | errno(*__errno()) = 0; |
| 177 | reps = strtol(*argv + 1, &ep, 10); |
| 178 | if (reps < 0 || *ep != '}' || errno(*__errno()) != 0) |
| 179 | errx(1, "%s: bad repetition count", *argv + 1); |
| 180 | argv++; |
| 181 | } else |
| 182 | reps = 0; |
| 183 | |
| 184 | if (*expr == '/' || *expr == '%') { |
| 185 | do { |
| 186 | do_rexp(expr); |
| 187 | } while (reps-- != 0 && nfiles < maxfiles - 1); |
| 188 | } else if (isdigit((unsigned char)*expr)) |
| 189 | do_lineno(expr); |
| 190 | else |
| 191 | errx(1, "%s: unrecognised pattern", expr); |
| 192 | } |
| 193 | |
| 194 | /* Copy the rest into a new file. */ |
| 195 | if (!feof(infile)(!__isthreaded ? (((infile)->_flags & 0x0020) != 0) : ( feof)(infile))) { |
| 196 | ofp = newfile(); |
| 197 | while ((p = get_line()) != NULL((void *)0) && fputs(p, ofp) == 0) |
| 198 | ; |
| 199 | if (!sflag) |
| 200 | printf("%jd\n", (intmax_t)ftello(ofp)); |
| 201 | if (fclose(ofp) != 0) |
| 202 | err(1, "%s", currfile); |
| 203 | } |
| 204 | |
| 205 | toomuch(NULL((void *)0), 0); |
| 206 | doclean = 0; |
| 207 | |
| 208 | return (0); |
| 209 | } |
| 210 | |
| 211 | static void __dead__attribute__((__noreturn__)) |
| 212 | usage(void) |
| 213 | { |
| 214 | extern char *__progname; |
| 215 | |
| 216 | fprintf(stderr(&__sF[2]), |
| 217 | "usage: %s [-ks] [-f prefix] [-n number] file args ...\n", |
| 218 | __progname); |
| 219 | exit(1); |
| 220 | } |
| 221 | |
| 222 | /* ARGSUSED */ |
| 223 | void |
| 224 | handlesig(int sig) |
| 225 | { |
| 226 | const char msg[] = "csplit: caught signal, cleaning up\n"; |
| 227 | |
| 228 | write(STDERR_FILENO2, msg, sizeof(msg) - 1); |
| 229 | cleanup(); |
| 230 | _exit(2); |
| 231 | } |
| 232 | |
| 233 | /* Create a new output file. */ |
| 234 | FILE * |
| 235 | newfile(void) |
| 236 | { |
| 237 | FILE *fp; |
| 238 | |
| 239 | if ((size_t)snprintf(currfile, sizeof(currfile), "%s%0*ld", prefix, |
| 240 | (int)sufflen, nfiles) >= sizeof(currfile)) |
| 241 | errc(1, ENAMETOOLONG63, "%s", currfile); |
| 242 | if ((fp = fopen(currfile, "w+")) == NULL((void *)0)) |
| 243 | err(1, "%s", currfile); |
| 244 | nfiles++; |
| 245 | |
| 246 | return (fp); |
| 247 | } |
| 248 | |
| 249 | /* Remove partial output, called before exiting. */ |
| 250 | void |
| 251 | cleanup(void) |
| 252 | { |
| 253 | char fnbuf[PATH_MAX1024]; |
| 254 | long i; |
| 255 | |
| 256 | if (!doclean) |
| 257 | return; |
| 258 | |
| 259 | /* |
| 260 | * NOTE: One cannot portably assume to be able to call snprintf() from |
| 261 | * inside a signal handler. It is, however, safe to do on OpenBSD. |
| 262 | */ |
| 263 | for (i = 0; i < nfiles; i++) { |
| 264 | snprintf(fnbuf, sizeof(fnbuf), "%s%0*ld", prefix, |
| 265 | (int)sufflen, i); |
| 266 | unlink(fnbuf); |
| 267 | } |
| 268 | } |
| 269 | |
| 270 | /* Read a line from the input into a static buffer. */ |
| 271 | char * |
| 272 | get_line(void) |
| 273 | { |
| 274 | static char lbuf[LINE_MAX2048]; |
| 275 | FILE *src; |
| 276 | |
| 277 | src = overfile != NULL((void *)0) ? overfile : infile; |
| 278 | |
| 279 | again: if (fgets(lbuf, sizeof(lbuf), src) == NULL((void *)0)) { |
| 280 | if (src == overfile) { |
| 281 | src = infile; |
| 282 | goto again; |
| 283 | } |
| 284 | return (NULL((void *)0)); |
| 285 | } |
| 286 | if (ferror(src)(!__isthreaded ? (((src)->_flags & 0x0040) != 0) : (ferror )(src))) |
| 287 | err(1, "%s", infn); |
| 288 | lineno++; |
| 289 | |
| 290 | return (lbuf); |
| 291 | } |
| 292 | |
| 293 | /* Conceptually rewind the input (as obtained by get_line()) back `n' lines. */ |
| 294 | void |
| 295 | toomuch(FILE *ofp, long n) |
| 296 | { |
| 297 | char buf[BUFSIZ1024]; |
| 298 | size_t i, nread; |
| 299 | |
| 300 | if (overfile != NULL((void *)0)) { |
| 301 | /* |
| 302 | * Truncate the previous file we overflowed into back to |
| 303 | * the correct length, close it. |
| 304 | */ |
| 305 | if (fflush(overfile) != 0) |
| 306 | err(1, "overflow"); |
| 307 | if (ftruncate(fileno(overfile)(!__isthreaded ? ((overfile)->_file) : (fileno)(overfile)), truncofs) != 0) |
| 308 | err(1, "overflow"); |
| 309 | if (fclose(overfile) != 0) |
| 310 | err(1, "overflow"); |
| 311 | overfile = NULL((void *)0); |
| 312 | } |
| 313 | |
| 314 | if (n == 0) |
| 315 | /* Just tidying up */ |
| 316 | return; |
| 317 | |
| 318 | lineno -= n; |
| 319 | |
| 320 | /* |
| 321 | * Wind the overflow file backwards to `n' lines before the |
| 322 | * current one. |
| 323 | */ |
| 324 | do { |
| 325 | if (ftello(ofp) < (off_t)sizeof(buf)) |
| 326 | rewind(ofp); |
| 327 | else |
| 328 | fseeko(ofp, -(off_t)sizeof(buf), SEEK_CUR1); |
| 329 | if (ferror(ofp)(!__isthreaded ? (((ofp)->_flags & 0x0040) != 0) : (ferror )(ofp))) |
| 330 | errx(1, "%s: can't seek", currfile); |
| 331 | if ((nread = fread(buf, 1, sizeof(buf), ofp)) == 0) |
| 332 | errx(1, "can't read overflowed output"); |
| 333 | if (fseeko(ofp, -(off_t)nread, SEEK_CUR1) != 0) |
| 334 | err(1, "%s", currfile); |
| 335 | for (i = 1; i <= nread; i++) |
| 336 | if (buf[nread - i] == '\n' && n-- == 0) |
| 337 | break; |
| 338 | if (ftello(ofp) == 0) |
| 339 | break; |
| 340 | } while (n > 0); |
| 341 | if (fseeko(ofp, (off_t)(nread - i + 1), SEEK_CUR1) != 0) |
| 342 | err(1, "%s", currfile); |
| 343 | |
| 344 | /* |
| 345 | * get_line() will read from here. Next call will truncate to |
| 346 | * truncofs in this file. |
| 347 | */ |
| 348 | overfile = ofp; |
| 349 | truncofs = ftello(overfile); |
| 350 | } |
| 351 | |
| 352 | /* Handle splits for /regexp/ and %regexp% patterns. */ |
| 353 | void |
| 354 | do_rexp(const char *expr) |
| 355 | { |
| 356 | regex_t cre; |
| 357 | intmax_t nwritten; |
| 358 | long ofs; |
| 359 | int first; |
| 360 | char *ecopy, *ep, *p, *pofs, *re; |
| 361 | FILE *ofp; |
| 362 | |
| 363 | if ((ecopy = strdup(expr)) == NULL((void *)0)) |
| 364 | err(1, "strdup"); |
| 365 | |
| 366 | re = ecopy + 1; |
| 367 | if ((pofs = strrchr(ecopy, *expr)) == NULL((void *)0) || pofs[-1] == '\\') |
| 368 | errx(1, "%s: missing trailing %c", expr, *expr); |
| 369 | *pofs++ = '\0'; |
| 370 | |
| 371 | if (*pofs != '\0') { |
| 372 | errno(*__errno()) = 0; |
| 373 | ofs = strtol(pofs, &ep, 10); |
| 374 | if (*ep != '\0' || errno(*__errno()) != 0) |
| 375 | errx(1, "%s: bad offset", pofs); |
| 376 | } else |
| 377 | ofs = 0; |
| 378 | |
| 379 | if (regcomp(&cre, re, REG_BASIC0000|REG_NOSUB0004) != 0) |
| 380 | errx(1, "%s: bad regular expression", re); |
| 381 | |
| 382 | if (*expr == '/') |
| 383 | /* /regexp/: Save results to a file. */ |
| 384 | ofp = newfile(); |
| 385 | else { |
| 386 | /* %regexp%: Make a temporary file for overflow. */ |
| 387 | if ((ofp = tmpfile()) == NULL((void *)0)) |
| 388 | err(1, "tmpfile"); |
| 389 | } |
| 390 | |
| 391 | /* Read and output lines until we get a match. */ |
| 392 | first = 1; |
| 393 | while ((p = get_line()) != NULL((void *)0)) { |
| 394 | if (fputs(p, ofp) != 0) |
| 395 | break; |
| 396 | if (!first && regexec(&cre, p, 0, NULL((void *)0), 0) == 0) |
| 397 | break; |
| 398 | first = 0; |
| 399 | } |
| 400 | |
| 401 | if (p == NULL((void *)0)) { |
| 402 | toomuch(NULL((void *)0), 0); |
| 403 | errx(1, "%s: no match", re); |
| 404 | } |
| 405 | |
| 406 | if (ofs <= 0) { |
| 407 | /* |
| 408 | * Negative (or zero) offset: throw back any lines we should |
| 409 | * not have read yet. |
| 410 | */ |
| 411 | if (p != NULL((void *)0)) { |
| 412 | toomuch(ofp, -ofs + 1); |
| 413 | nwritten = (intmax_t)truncofs; |
| 414 | } else |
| 415 | nwritten = (intmax_t)ftello(ofp); |
| 416 | } else { |
| 417 | /* |
| 418 | * Positive offset: copy the requested number of lines |
| 419 | * after the match. |
| 420 | */ |
| 421 | while (--ofs > 0 && (p = get_line()) != NULL((void *)0)) |
| 422 | fputs(p, ofp); |
| 423 | toomuch(NULL((void *)0), 0); |
| 424 | nwritten = (intmax_t)ftello(ofp); |
| 425 | if (fclose(ofp) != 0) |
| 426 | err(1, "%s", currfile); |
| 427 | } |
| 428 | |
| 429 | if (!sflag && *expr == '/') |
| 430 | printf("%jd\n", nwritten); |
| 431 | |
| 432 | regfree(&cre); |
| 433 | free(ecopy); |
| 434 | } |
| 435 | |
| 436 | /* Handle splits based on line number. */ |
| 437 | void |
| 438 | do_lineno(const char *expr) |
| 439 | { |
| 440 | long lastline, tgtline; |
| 441 | char *ep, *p; |
| 442 | FILE *ofp; |
| 443 | |
| 444 | errno(*__errno()) = 0; |
| 445 | tgtline = strtol(expr, &ep, 10); |
| 446 | if (tgtline <= 0 || errno(*__errno()) != 0 || *ep != '\0') |
| 447 | errx(1, "%s: bad line number", expr); |
| 448 | lastline = tgtline; |
| 449 | if (lastline <= lineno) |
| 450 | errx(1, "%s: can't go backwards", expr); |
| 451 | |
| 452 | while (nfiles < maxfiles - 1) { |
| 453 | ofp = newfile(); |
| 454 | while (lineno + 1 != lastline) { |
| 455 | if ((p = get_line()) == NULL((void *)0)) |
| 456 | errx(1, "%ld: out of range", lastline); |
| 457 | if (fputs(p, ofp) != 0) |
| 458 | break; |
| 459 | } |
| 460 | if (!sflag) |
| 461 | printf("%jd\n", (intmax_t)ftello(ofp)); |
| 462 | if (fclose(ofp) != 0) |
| 463 | err(1, "%s", currfile); |
| 464 | if (reps-- == 0) |
| 465 | break; |
| 466 | lastline += tgtline; |
| 467 | } |
| 468 | } |