| File: | src/usr.bin/sed/compile.c |
| Warning: | line 701, column 7 Dereference of null pointer (loaded from variable 's') |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
| 1 | /* $OpenBSD: compile.c,v 1.50 2018/12/07 14:45:40 schwarze Exp $ */ | |||
| 2 | ||||
| 3 | /*- | |||
| 4 | * Copyright (c) 1992 Diomidis Spinellis. | |||
| 5 | * Copyright (c) 1992, 1993 | |||
| 6 | * The Regents of the University of California. All rights reserved. | |||
| 7 | * | |||
| 8 | * This code is derived from software contributed to Berkeley by | |||
| 9 | * Diomidis Spinellis of Imperial College, University of London. | |||
| 10 | * | |||
| 11 | * Redistribution and use in source and binary forms, with or without | |||
| 12 | * modification, are permitted provided that the following conditions | |||
| 13 | * are met: | |||
| 14 | * 1. Redistributions of source code must retain the above copyright | |||
| 15 | * notice, this list of conditions and the following disclaimer. | |||
| 16 | * 2. Redistributions in binary form must reproduce the above copyright | |||
| 17 | * notice, this list of conditions and the following disclaimer in the | |||
| 18 | * documentation and/or other materials provided with the distribution. | |||
| 19 | * 3. Neither the name of the University nor the names of its contributors | |||
| 20 | * may be used to endorse or promote products derived from this software | |||
| 21 | * without specific prior written permission. | |||
| 22 | * | |||
| 23 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND | |||
| 24 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| 25 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
| 26 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | |||
| 27 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
| 28 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | |||
| 29 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |||
| 30 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | |||
| 31 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | |||
| 32 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |||
| 33 | * SUCH DAMAGE. | |||
| 34 | */ | |||
| 35 | ||||
| 36 | #include <sys/types.h> | |||
| 37 | #include <sys/stat.h> | |||
| 38 | ||||
| 39 | #include <ctype.h> | |||
| 40 | #include <errno(*__errno()).h> | |||
| 41 | #include <fcntl.h> | |||
| 42 | #include <limits.h> | |||
| 43 | #include <regex.h> | |||
| 44 | #include <stdio.h> | |||
| 45 | #include <stdlib.h> | |||
| 46 | #include <string.h> | |||
| 47 | ||||
| 48 | #include "defs.h" | |||
| 49 | #include "extern.h" | |||
| 50 | ||||
| 51 | #define LHSZ128 128 | |||
| 52 | #define LHMASK(128 - 1) (LHSZ128 - 1) | |||
| 53 | static struct labhash { | |||
| 54 | struct labhash *lh_next; | |||
| 55 | u_int lh_hash; | |||
| 56 | struct s_command *lh_cmd; | |||
| 57 | int lh_ref; | |||
| 58 | } *labels[LHSZ128]; | |||
| 59 | ||||
| 60 | static char *compile_addr(char *, struct s_addr *); | |||
| 61 | static char *compile_ccl(char **, char *); | |||
| 62 | static char *compile_delimited(char *, char *); | |||
| 63 | static char *compile_flags(char *, struct s_subst *); | |||
| 64 | static char *compile_re(char *, regex_t **); | |||
| 65 | static char *compile_subst(char *, struct s_subst *); | |||
| 66 | static char *compile_text(void); | |||
| 67 | static char *compile_tr(char *, char **); | |||
| 68 | static struct s_command | |||
| 69 | **compile_stream(struct s_command **); | |||
| 70 | static char *duptoeol(char *, char *, char **); | |||
| 71 | static void enterlabel(struct s_command *); | |||
| 72 | static struct s_command | |||
| 73 | *findlabel(char *); | |||
| 74 | static void fixuplabel(struct s_command *, struct s_command *); | |||
| 75 | static void uselabel(void); | |||
| 76 | ||||
| 77 | /* | |||
| 78 | * Command specification. This is used to drive the command parser. | |||
| 79 | */ | |||
| 80 | struct s_format { | |||
| 81 | char code; /* Command code */ | |||
| 82 | int naddr; /* Number of address args */ | |||
| 83 | enum e_args args; /* Argument type */ | |||
| 84 | }; | |||
| 85 | ||||
| 86 | static struct s_format cmd_fmts[] = { | |||
| 87 | {'{', 2, GROUP}, | |||
| 88 | {'}', 0, ENDGROUP}, | |||
| 89 | {'a', 1, TEXT}, | |||
| 90 | {'b', 2, BRANCH}, | |||
| 91 | {'c', 2, TEXT}, | |||
| 92 | {'d', 2, EMPTY}, | |||
| 93 | {'D', 2, EMPTY}, | |||
| 94 | {'g', 2, EMPTY}, | |||
| 95 | {'G', 2, EMPTY}, | |||
| 96 | {'h', 2, EMPTY}, | |||
| 97 | {'H', 2, EMPTY}, | |||
| 98 | {'i', 1, TEXT}, | |||
| 99 | {'l', 2, EMPTY}, | |||
| 100 | {'n', 2, EMPTY}, | |||
| 101 | {'N', 2, EMPTY}, | |||
| 102 | {'p', 2, EMPTY}, | |||
| 103 | {'P', 2, EMPTY}, | |||
| 104 | {'q', 1, EMPTY}, | |||
| 105 | {'r', 1, RFILE}, | |||
| 106 | {'s', 2, SUBST}, | |||
| 107 | {'t', 2, BRANCH}, | |||
| 108 | {'w', 2, WFILE}, | |||
| 109 | {'x', 2, EMPTY}, | |||
| 110 | {'y', 2, TR}, | |||
| 111 | {'!', 2, NONSEL}, | |||
| 112 | {':', 0, LABEL}, | |||
| 113 | {'#', 0, COMMENT}, | |||
| 114 | {'=', 1, EMPTY}, | |||
| 115 | {'\0', 0, COMMENT}, | |||
| 116 | }; | |||
| 117 | ||||
| 118 | /* The compiled program. */ | |||
| 119 | struct s_command *prog; | |||
| 120 | ||||
| 121 | /* | |||
| 122 | * Compile the program into prog. | |||
| 123 | * Initialise appends. | |||
| 124 | */ | |||
| 125 | void | |||
| 126 | compile(void) | |||
| 127 | { | |||
| 128 | *compile_stream(&prog) = NULL((void *)0); | |||
| 129 | fixuplabel(prog, NULL((void *)0)); | |||
| 130 | uselabel(); | |||
| 131 | appends = xreallocarray(NULL((void *)0), appendnum, sizeof(struct s_appends)); | |||
| 132 | match = xreallocarray(NULL((void *)0), maxnsub + 1, sizeof(regmatch_t)); | |||
| 133 | } | |||
| 134 | ||||
| 135 | #define EATSPACE()do { if (p) while (isascii((unsigned char)*p) && isspace ((unsigned char)*p)) p++; } while (0) do { \ | |||
| 136 | if (p) \ | |||
| 137 | while (isascii((unsigned char)*p) && \ | |||
| 138 | isspace((unsigned char)*p)) \ | |||
| 139 | p++; \ | |||
| 140 | } while (0) | |||
| 141 | ||||
| 142 | static struct s_command ** | |||
| 143 | compile_stream(struct s_command **link) | |||
| 144 | { | |||
| 145 | char *p; | |||
| 146 | static char *lbuf; /* To avoid excessive malloc calls */ | |||
| 147 | static size_t bufsize; | |||
| 148 | struct s_command *cmd, *cmd2, *stack; | |||
| 149 | struct s_format *fp; | |||
| 150 | int naddr; /* Number of addresses */ | |||
| 151 | ||||
| 152 | stack = 0; | |||
| 153 | for (;;) { | |||
| ||||
| 154 | if ((p = cu_fgets(&lbuf, &bufsize)) == NULL((void *)0)) { | |||
| 155 | if (stack != 0) | |||
| 156 | error(COMPILE2, "unexpected EOF (pending }'s)"); | |||
| 157 | return (link); | |||
| 158 | } | |||
| 159 | ||||
| 160 | semicolon: EATSPACE()do { if (p) while (isascii((unsigned char)*p) && isspace ((unsigned char)*p)) p++; } while (0); | |||
| 161 | if (*p == '#' || *p == '\0') | |||
| 162 | continue; | |||
| 163 | if (*p == ';') { | |||
| 164 | p++; | |||
| 165 | goto semicolon; | |||
| 166 | } | |||
| 167 | *link = cmd = xmalloc(sizeof(struct s_command)); | |||
| 168 | link = &cmd->next; | |||
| 169 | cmd->nonsel = cmd->inrange = 0; | |||
| 170 | /* First parse the addresses */ | |||
| 171 | naddr = 0; | |||
| 172 | ||||
| 173 | /* Valid characters to start an address */ | |||
| 174 | #define addrchar(c)(strchr("0123456789/\\$", (c))) (strchr("0123456789/\\$", (c))) | |||
| 175 | if (addrchar(*p)(strchr("0123456789/\\$", (*p)))) { | |||
| 176 | naddr++; | |||
| 177 | cmd->a1 = xmalloc(sizeof(struct s_addr)); | |||
| 178 | p = compile_addr(p, cmd->a1); | |||
| 179 | EATSPACE()do { if (p) while (isascii((unsigned char)*p) && isspace ((unsigned char)*p)) p++; } while (0); /* EXTENSION */ | |||
| 180 | if (*p == ',') { | |||
| 181 | p++; | |||
| 182 | EATSPACE()do { if (p) while (isascii((unsigned char)*p) && isspace ((unsigned char)*p)) p++; } while (0); /* EXTENSION */ | |||
| 183 | naddr++; | |||
| 184 | cmd->a2 = xmalloc(sizeof(struct s_addr)); | |||
| 185 | p = compile_addr(p, cmd->a2); | |||
| 186 | EATSPACE()do { if (p) while (isascii((unsigned char)*p) && isspace ((unsigned char)*p)) p++; } while (0); | |||
| 187 | } else { | |||
| 188 | cmd->a2 = 0; | |||
| 189 | } | |||
| 190 | } else { | |||
| 191 | cmd->a1 = cmd->a2 = 0; | |||
| 192 | } | |||
| 193 | ||||
| 194 | nonsel: /* Now parse the command */ | |||
| 195 | if (!*p) | |||
| 196 | error(COMPILE2, "command expected"); | |||
| 197 | cmd->code = *p; | |||
| 198 | for (fp = cmd_fmts; fp->code; fp++) | |||
| 199 | if (fp->code == *p) | |||
| 200 | break; | |||
| 201 | if (!fp->code
| |||
| 202 | error(COMPILE2, "invalid command code %c", *p); | |||
| 203 | if (naddr > fp->naddr) | |||
| 204 | error(COMPILE2, | |||
| 205 | "command %c expects up to %d address(es), found %d", | |||
| 206 | *p, fp->naddr, naddr); | |||
| 207 | switch (fp->args) { | |||
| 208 | case NONSEL: /* ! */ | |||
| 209 | p++; | |||
| 210 | EATSPACE()do { if (p) while (isascii((unsigned char)*p) && isspace ((unsigned char)*p)) p++; } while (0); | |||
| 211 | cmd->nonsel = 1; | |||
| 212 | goto nonsel; | |||
| 213 | case GROUP: /* { */ | |||
| 214 | p++; | |||
| 215 | EATSPACE()do { if (p) while (isascii((unsigned char)*p) && isspace ((unsigned char)*p)) p++; } while (0); | |||
| 216 | cmd->next = stack; | |||
| 217 | stack = cmd; | |||
| 218 | link = &cmd->u.c; | |||
| 219 | if (*p) | |||
| 220 | goto semicolon; | |||
| 221 | break; | |||
| 222 | case ENDGROUP: | |||
| 223 | /* | |||
| 224 | * Short-circuit command processing, since end of | |||
| 225 | * group is really just a noop. | |||
| 226 | */ | |||
| 227 | cmd->nonsel = 1; | |||
| 228 | if (stack == 0) | |||
| 229 | error(COMPILE2, "unexpected }"); | |||
| 230 | cmd2 = stack; | |||
| 231 | stack = cmd2->next; | |||
| 232 | cmd2->next = cmd; | |||
| 233 | /*FALLTHROUGH*/ | |||
| 234 | case EMPTY: /* d D g G h H l n N p P q x = \0 */ | |||
| 235 | p++; | |||
| 236 | EATSPACE()do { if (p) while (isascii((unsigned char)*p) && isspace ((unsigned char)*p)) p++; } while (0); | |||
| 237 | if (*p == ';') { | |||
| 238 | p++; | |||
| 239 | link = &cmd->next; | |||
| 240 | goto semicolon; | |||
| 241 | } | |||
| 242 | if (*p) | |||
| 243 | error(COMPILE2, | |||
| 244 | "extra characters at the end of %c command", cmd->code); | |||
| 245 | break; | |||
| 246 | case TEXT: /* a c i */ | |||
| 247 | p++; | |||
| 248 | EATSPACE()do { if (p) while (isascii((unsigned char)*p) && isspace ((unsigned char)*p)) p++; } while (0); | |||
| 249 | if (*p != '\\') | |||
| 250 | error(COMPILE2, "command %c expects \\ followed by" | |||
| 251 | " text", cmd->code); | |||
| 252 | p++; | |||
| 253 | EATSPACE()do { if (p) while (isascii((unsigned char)*p) && isspace ((unsigned char)*p)) p++; } while (0); | |||
| 254 | if (*p) | |||
| 255 | error(COMPILE2, "extra characters after \\ at the" | |||
| 256 | " end of %c command", cmd->code); | |||
| 257 | cmd->t = compile_text(); | |||
| 258 | break; | |||
| 259 | case COMMENT: /* \0 # */ | |||
| 260 | break; | |||
| 261 | case WFILE: /* w */ | |||
| 262 | p++; | |||
| 263 | EATSPACE()do { if (p) while (isascii((unsigned char)*p) && isspace ((unsigned char)*p)) p++; } while (0); | |||
| 264 | if (*p == '\0') | |||
| 265 | error(COMPILE2, "filename expected"); | |||
| 266 | cmd->t = duptoeol(p, "w command", NULL((void *)0)); | |||
| 267 | if (aflag) { | |||
| 268 | cmd->u.fd = -1; | |||
| 269 | pledge_wpath = 1; | |||
| 270 | } | |||
| 271 | else if ((cmd->u.fd = open(p, | |||
| 272 | O_WRONLY0x0001|O_APPEND0x0008|O_CREAT0x0200|O_TRUNC0x0400, | |||
| 273 | DEFFILEMODE(0000400|0000200|0000040|0000020|0000004|0000002))) == -1) | |||
| 274 | error(FATAL1, "%s: %s", p, strerror(errno(*__errno()))); | |||
| 275 | break; | |||
| 276 | case RFILE: /* r */ | |||
| 277 | pledge_rpath = 1; | |||
| 278 | p++; | |||
| 279 | EATSPACE()do { if (p) while (isascii((unsigned char)*p) && isspace ((unsigned char)*p)) p++; } while (0); | |||
| 280 | if (*p == '\0') | |||
| 281 | error(COMPILE2, "filename expected"); | |||
| 282 | cmd->t = duptoeol(p, "read command", NULL((void *)0)); | |||
| 283 | break; | |||
| 284 | case BRANCH: /* b t */ | |||
| 285 | p++; | |||
| 286 | EATSPACE()do { if (p) while (isascii((unsigned char)*p) && isspace ((unsigned char)*p)) p++; } while (0); | |||
| 287 | if (*p == '\0' || *p == ';') | |||
| 288 | cmd->t = NULL((void *)0); | |||
| 289 | else | |||
| 290 | cmd->t = duptoeol(p, "branch", &p); | |||
| 291 | if (*p == ';') { | |||
| 292 | p++; | |||
| 293 | goto semicolon; | |||
| 294 | } | |||
| 295 | break; | |||
| 296 | case LABEL: /* : */ | |||
| 297 | p++; | |||
| 298 | EATSPACE()do { if (p) while (isascii((unsigned char)*p) && isspace ((unsigned char)*p)) p++; } while (0); | |||
| 299 | cmd->t = duptoeol(p, "label", &p); | |||
| 300 | if (strlen(cmd->t) == 0) | |||
| 301 | error(COMPILE2, "empty label"); | |||
| 302 | enterlabel(cmd); | |||
| 303 | if (*p == ';') { | |||
| 304 | p++; | |||
| 305 | goto semicolon; | |||
| 306 | } | |||
| 307 | break; | |||
| 308 | case SUBST: /* s */ | |||
| 309 | p++; | |||
| 310 | if (*p == '\0' || *p == '\\') | |||
| 311 | error(COMPILE2, "substitute pattern can not be" | |||
| 312 | " delimited by newline or backslash"); | |||
| 313 | cmd->u.s = xmalloc(sizeof(struct s_subst)); | |||
| 314 | p = compile_re(p, &cmd->u.s->re); | |||
| 315 | if (p == NULL((void *)0)) | |||
| 316 | error(COMPILE2, "unterminated substitute pattern"); | |||
| 317 | --p; | |||
| 318 | p = compile_subst(p, cmd->u.s); | |||
| 319 | p = compile_flags(p, cmd->u.s); | |||
| 320 | EATSPACE()do { if (p) while (isascii((unsigned char)*p) && isspace ((unsigned char)*p)) p++; } while (0); | |||
| 321 | if (*p == ';') { | |||
| 322 | p++; | |||
| 323 | link = &cmd->next; | |||
| 324 | goto semicolon; | |||
| 325 | } | |||
| 326 | break; | |||
| 327 | case TR: /* y */ | |||
| 328 | p++; | |||
| 329 | p = compile_tr(p, (char **)&cmd->u.y); | |||
| 330 | EATSPACE()do { if (p) while (isascii((unsigned char)*p) && isspace ((unsigned char)*p)) p++; } while (0); | |||
| 331 | if (*p == ';') { | |||
| 332 | p++; | |||
| 333 | link = &cmd->next; | |||
| 334 | goto semicolon; | |||
| 335 | } | |||
| 336 | if (*p) | |||
| 337 | error(COMPILE2, "extra text at the end of a" | |||
| 338 | " transform command"); | |||
| 339 | break; | |||
| 340 | } | |||
| 341 | } | |||
| 342 | } | |||
| 343 | ||||
| 344 | /* | |||
| 345 | * Get a delimited string. P points to the delimeter of the string; d points | |||
| 346 | * to a buffer area. Newline and delimiter escapes are processed; other | |||
| 347 | * escapes are ignored. | |||
| 348 | * | |||
| 349 | * Returns a pointer to the first character after the final delimiter or NULL | |||
| 350 | * in the case of a non-terminated string. The character array d is filled | |||
| 351 | * with the processed string. | |||
| 352 | */ | |||
| 353 | static char * | |||
| 354 | compile_delimited(char *p, char *d) | |||
| 355 | { | |||
| 356 | char c; | |||
| 357 | ||||
| 358 | c = *p++; | |||
| 359 | if (c == '\0') | |||
| 360 | return (NULL((void *)0)); | |||
| 361 | else if (c == '\\') | |||
| 362 | error(COMPILE2, "\\ can not be used as a string delimiter"); | |||
| 363 | else if (c == '\n') | |||
| 364 | error(COMPILE2, "newline can not be used as a string delimiter"); | |||
| 365 | ||||
| 366 | while (p[0]) { | |||
| 367 | /* Unescaped delimiter: We are done. */ | |||
| 368 | if (p[0] == c) { | |||
| 369 | *d = '\0'; | |||
| 370 | return p + 1; | |||
| 371 | } | |||
| 372 | if (p[0] == '\\') { | |||
| 373 | /* Escaped delimiter: Skip the backslash. */ | |||
| 374 | if (p[1] == c) { | |||
| 375 | p++; | |||
| 376 | } else { | |||
| 377 | /* Backslash-n: Match linefeed. */ | |||
| 378 | if (p[1] == 'n') { | |||
| 379 | *d++ = '\n'; | |||
| 380 | p += 2; | |||
| 381 | /* Other escapes remain unchanged. */ | |||
| 382 | } else { | |||
| 383 | *d++ = *p++; | |||
| 384 | *d++ = *p++; | |||
| 385 | } | |||
| 386 | continue; | |||
| 387 | } | |||
| 388 | } | |||
| 389 | if (p[0] != '[') | |||
| 390 | *d++ = *p++; | |||
| 391 | /* | |||
| 392 | * Bracket expression: | |||
| 393 | * It may contain the delimiter without escaping. | |||
| 394 | */ | |||
| 395 | else if ((d = compile_ccl(&p, d)) == NULL((void *)0)) | |||
| 396 | error(COMPILE2, "unbalanced brackets ([])"); | |||
| 397 | } | |||
| 398 | return NULL((void *)0); | |||
| 399 | } | |||
| 400 | ||||
| 401 | ||||
| 402 | /* compile_ccl: expand a POSIX character class */ | |||
| 403 | static char * | |||
| 404 | compile_ccl(char **sp, char *t) | |||
| 405 | { | |||
| 406 | int c, d; | |||
| 407 | char *s = *sp; | |||
| 408 | ||||
| 409 | *t++ = *s++; | |||
| 410 | if (*s == '^') | |||
| 411 | *t++ = *s++; | |||
| 412 | if (*s == ']') | |||
| 413 | *t++ = *s++; | |||
| 414 | for (; *s && (*t = *s) != ']'; s++, t++) | |||
| 415 | if (*s == '[' && ((d = *(s+1)) == '.' || d == ':' || d == '=')) { | |||
| 416 | *++t = *++s, t++, s++; | |||
| 417 | for (c = *s; (*t = *s) != ']' || c != d; s++, t++) | |||
| 418 | if ((c = *s) == '\0') | |||
| 419 | return NULL((void *)0); | |||
| 420 | } else if (*s == '\\' && s[1] == 'n') { | |||
| 421 | *t = '\n'; | |||
| 422 | s++; | |||
| 423 | } | |||
| 424 | if (*s == ']') { | |||
| 425 | *sp = ++s; | |||
| 426 | return (++t); | |||
| 427 | } else { | |||
| 428 | return (NULL((void *)0)); | |||
| 429 | } | |||
| 430 | } | |||
| 431 | ||||
| 432 | /* | |||
| 433 | * Get a regular expression. P points to the delimiter of the regular | |||
| 434 | * expression; repp points to the address of a regexp pointer. Newline | |||
| 435 | * and delimiter escapes are processed; other escapes are ignored. | |||
| 436 | * Returns a pointer to the first character after the final delimiter | |||
| 437 | * or NULL in the case of a non terminated regular expression. The regexp | |||
| 438 | * pointer is set to the compiled regular expression. | |||
| 439 | * Cflags are passed to regcomp. | |||
| 440 | */ | |||
| 441 | static char * | |||
| 442 | compile_re(char *p, regex_t **repp) | |||
| 443 | { | |||
| 444 | int eval; | |||
| 445 | char *re; | |||
| 446 | ||||
| 447 | re = xmalloc(strlen(p) + 1); /* strlen(re) <= strlen(p) */ | |||
| 448 | p = compile_delimited(p, re); | |||
| 449 | if (p && strlen(re) == 0) { | |||
| 450 | *repp = NULL((void *)0); | |||
| 451 | free(re); | |||
| 452 | return (p); | |||
| 453 | } | |||
| 454 | *repp = xmalloc(sizeof(regex_t)); | |||
| 455 | if (p && (eval = regcomp(*repp, re, Eflag ? REG_EXTENDED0001 : 0)) != 0) | |||
| 456 | error(COMPILE2, "RE error: %s", strregerror(eval, *repp)); | |||
| 457 | if (maxnsub < (*repp)->re_nsub) | |||
| 458 | maxnsub = (*repp)->re_nsub; | |||
| 459 | free(re); | |||
| 460 | return (p); | |||
| 461 | } | |||
| 462 | ||||
| 463 | /* | |||
| 464 | * Compile the substitution string of a regular expression and set res to | |||
| 465 | * point to a saved copy of it. Nsub is the number of parenthesized regular | |||
| 466 | * expressions. | |||
| 467 | */ | |||
| 468 | static char * | |||
| 469 | compile_subst(char *p, struct s_subst *s) | |||
| 470 | { | |||
| 471 | static char *lbuf; | |||
| 472 | static size_t bufsize; | |||
| 473 | size_t asize, ref, size; | |||
| 474 | char c, *text, *op, *sp; | |||
| 475 | int sawesc = 0; | |||
| 476 | ||||
| 477 | c = *p++; /* Terminator character */ | |||
| 478 | if (c == '\0') | |||
| 479 | return (NULL((void *)0)); | |||
| 480 | ||||
| 481 | s->maxbref = 0; | |||
| 482 | s->linenum = linenum; | |||
| 483 | text = NULL((void *)0); | |||
| 484 | asize = size = 0; | |||
| 485 | do { | |||
| 486 | size_t len = ROUNDLEN(strlen(p) + 1)(((strlen(p) + 1) + 2048 - 1) & ~(2048 - 1)); | |||
| 487 | if (asize - size < len) { | |||
| 488 | do { | |||
| 489 | asize += len; | |||
| 490 | } while (asize - size < len); | |||
| 491 | text = xrealloc(text, asize); | |||
| 492 | } | |||
| 493 | op = sp = text + size; | |||
| 494 | for (; *p; p++) { | |||
| 495 | if (*p == '\\' || sawesc) { | |||
| 496 | /* | |||
| 497 | * If this is a continuation from the last | |||
| 498 | * buffer, we won't have a character to | |||
| 499 | * skip over. | |||
| 500 | */ | |||
| 501 | if (sawesc) | |||
| 502 | sawesc = 0; | |||
| 503 | else | |||
| 504 | p++; | |||
| 505 | ||||
| 506 | if (*p == '\0') { | |||
| 507 | /* | |||
| 508 | * This escaped character is continued | |||
| 509 | * in the next part of the line. Note | |||
| 510 | * this fact, then cause the loop to | |||
| 511 | * exit w/ normal EOL case and reenter | |||
| 512 | * above with the new buffer. | |||
| 513 | */ | |||
| 514 | sawesc = 1; | |||
| 515 | p--; | |||
| 516 | continue; | |||
| 517 | } else if (strchr("123456789", *p) != NULL((void *)0)) { | |||
| 518 | *sp++ = '\\'; | |||
| 519 | ref = *p - '0'; | |||
| 520 | if (s->re != NULL((void *)0) && | |||
| 521 | ref > s->re->re_nsub) | |||
| 522 | error(COMPILE2, | |||
| 523 | "\\%c not defined in the RE", *p); | |||
| 524 | if (s->maxbref < ref) | |||
| 525 | s->maxbref = ref; | |||
| 526 | } else if (*p == '&' || *p == '\\') | |||
| 527 | *sp++ = '\\'; | |||
| 528 | } else if (*p == c) { | |||
| 529 | p++; | |||
| 530 | *sp++ = '\0'; | |||
| 531 | size += sp - op; | |||
| 532 | s->new = xrealloc(text, size); | |||
| 533 | return (p); | |||
| 534 | } else if (*p == '\n') { | |||
| 535 | error(COMPILE2, | |||
| 536 | "unescaped newline inside substitute pattern"); | |||
| 537 | } | |||
| 538 | *sp++ = *p; | |||
| 539 | } | |||
| 540 | size += sp - op; | |||
| 541 | } while ((p = cu_fgets(&lbuf, &bufsize))); | |||
| 542 | error(COMPILE2, "unterminated substitute in regular expression"); | |||
| 543 | } | |||
| 544 | ||||
| 545 | /* | |||
| 546 | * Compile the flags of the s command | |||
| 547 | */ | |||
| 548 | static char * | |||
| 549 | compile_flags(char *p, struct s_subst *s) | |||
| 550 | { | |||
| 551 | int gn; /* True if we have seen g or n */ | |||
| 552 | long l; | |||
| 553 | ||||
| 554 | s->n = 1; /* Default */ | |||
| 555 | s->p = 0; | |||
| 556 | s->wfile = NULL((void *)0); | |||
| 557 | s->wfd = -1; | |||
| 558 | for (gn = 0;;) { | |||
| 559 | EATSPACE()do { if (p) while (isascii((unsigned char)*p) && isspace ((unsigned char)*p)) p++; } while (0); /* EXTENSION */ | |||
| 560 | switch (*p) { | |||
| 561 | case 'g': | |||
| 562 | if (gn) | |||
| 563 | error(COMPILE2, "more than one number or 'g' in" | |||
| 564 | " substitute flags"); | |||
| 565 | gn = 1; | |||
| 566 | s->n = 0; | |||
| 567 | break; | |||
| 568 | case '\0': | |||
| 569 | case '\n': | |||
| 570 | case ';': | |||
| 571 | return (p); | |||
| 572 | case 'p': | |||
| 573 | s->p = 1; | |||
| 574 | break; | |||
| 575 | case '1': case '2': case '3': | |||
| 576 | case '4': case '5': case '6': | |||
| 577 | case '7': case '8': case '9': | |||
| 578 | if (gn) | |||
| 579 | error(COMPILE2, "more than one number or 'g' in" | |||
| 580 | " substitute flags"); | |||
| 581 | gn = 1; | |||
| 582 | l = strtol(p, &p, 10); | |||
| 583 | if (l <= 0 || l >= INT_MAX2147483647) | |||
| 584 | error(COMPILE2, | |||
| 585 | "number in substitute flags out of range"); | |||
| 586 | s->n = (int)l; | |||
| 587 | continue; | |||
| 588 | case 'w': | |||
| 589 | p++; | |||
| 590 | EATSPACE()do { if (p) while (isascii((unsigned char)*p) && isspace ((unsigned char)*p)) p++; } while (0); | |||
| 591 | if (*p == '\0') | |||
| 592 | error(COMPILE2, "filename expected"); | |||
| 593 | s->wfile = duptoeol(p, "s command w flag", NULL((void *)0)); | |||
| 594 | *p = '\0'; | |||
| 595 | if (aflag) | |||
| 596 | pledge_wpath = 1; | |||
| 597 | else if ((s->wfd = open(s->wfile, | |||
| 598 | O_WRONLY0x0001|O_APPEND0x0008|O_CREAT0x0200|O_TRUNC0x0400, | |||
| 599 | DEFFILEMODE(0000400|0000200|0000040|0000020|0000004|0000002))) == -1) | |||
| 600 | error(FATAL1, "%s: %s", s->wfile, strerror(errno(*__errno()))); | |||
| 601 | return (p); | |||
| 602 | default: | |||
| 603 | error(COMPILE2, | |||
| 604 | "bad flag in substitute command: '%c'", *p); | |||
| 605 | break; | |||
| 606 | } | |||
| 607 | p++; | |||
| 608 | } | |||
| 609 | } | |||
| 610 | ||||
| 611 | /* | |||
| 612 | * Compile a translation set of strings into a lookup table. | |||
| 613 | */ | |||
| 614 | static char * | |||
| 615 | compile_tr(char *old, char **transtab) | |||
| 616 | { | |||
| 617 | int i; | |||
| 618 | char delimiter, check[UCHAR_MAX(127*2 +1) + 1]; | |||
| 619 | char *new, *end; | |||
| 620 | ||||
| 621 | memset(check, 0, sizeof(check)); | |||
| 622 | delimiter = *old; | |||
| 623 | if (delimiter == '\\') | |||
| 624 | error(COMPILE2, "\\ can not be used as a string delimiter"); | |||
| 625 | else if (delimiter == '\n' || delimiter == '\0') | |||
| 626 | error(COMPILE2, "newline can not be used as a string delimiter"); | |||
| 627 | ||||
| 628 | new = old++; | |||
| 629 | do { | |||
| 630 | if ((new = strchr(new + 1, delimiter)) == NULL((void *)0)) | |||
| 631 | error(COMPILE2, "unterminated transform source string"); | |||
| 632 | } while (*(new - 1) == '\\' && *(new -2) != '\\'); | |||
| 633 | *new = '\0'; | |||
| 634 | end = new++; | |||
| 635 | do { | |||
| 636 | if ((end = strchr(end + 1, delimiter)) == NULL((void *)0)) | |||
| 637 | error(COMPILE2, "unterminated transform target string"); | |||
| 638 | } while (*(end -1) == '\\' && *(end -2) != '\\'); | |||
| 639 | *end = '\0'; | |||
| 640 | ||||
| 641 | /* We assume characters are 8 bits */ | |||
| 642 | *transtab = xmalloc(UCHAR_MAX(127*2 +1) + 1); | |||
| 643 | for (i = 0; i <= UCHAR_MAX(127*2 +1); i++) | |||
| 644 | (*transtab)[i] = (char)i; | |||
| 645 | ||||
| 646 | while (*old != '\0' && *new != '\0') { | |||
| 647 | if (*old == '\\') { | |||
| 648 | old++; | |||
| 649 | if (*old == 'n') | |||
| 650 | *old = '\n'; | |||
| 651 | else if (*old != delimiter && *old != '\\') | |||
| 652 | error(COMPILE2, "Unexpected character after " | |||
| 653 | "backslash"); | |||
| 654 | } | |||
| 655 | if (*new == '\\') { | |||
| 656 | new++; | |||
| 657 | if (*new == 'n') | |||
| 658 | *new = '\n'; | |||
| 659 | else if (*new != delimiter && *new != '\\') | |||
| 660 | error(COMPILE2, "Unexpected character after " | |||
| 661 | "backslash"); | |||
| 662 | } | |||
| 663 | if (check[(u_char) *old] == 1) | |||
| 664 | error(COMPILE2, "Repeated character in source string"); | |||
| 665 | check[(u_char) *old] = 1; | |||
| 666 | (*transtab)[(u_char) *old++] = *new++; | |||
| 667 | } | |||
| 668 | if (*old != '\0' || *new != '\0') | |||
| 669 | error(COMPILE2, "transform strings are not the same length"); | |||
| 670 | return end + 1; | |||
| 671 | } | |||
| 672 | ||||
| 673 | /* | |||
| 674 | * Compile the text following an a, c, or i command. | |||
| 675 | */ | |||
| 676 | static char * | |||
| 677 | compile_text(void) | |||
| 678 | { | |||
| 679 | size_t asize, size, bufsize; | |||
| 680 | char *lbuf, *text, *p, *op, *s; | |||
| 681 | int esc_nl; | |||
| 682 | ||||
| 683 | lbuf = text = NULL((void *)0); | |||
| 684 | asize = size = 0; | |||
| 685 | while ((p = cu_fgets(&lbuf, &bufsize))) { | |||
| 686 | size_t len = ROUNDLEN(strlen(p) + 1)(((strlen(p) + 1) + 2048 - 1) & ~(2048 - 1)); | |||
| 687 | if (asize - size < len) { | |||
| 688 | do { | |||
| 689 | asize += len; | |||
| 690 | } while (asize - size < len); | |||
| 691 | text = xrealloc(text, asize); | |||
| 692 | } | |||
| 693 | op = s = text + size; | |||
| 694 | for (esc_nl = 0; *p != '\0'; p++) { | |||
| 695 | if (*p == '\\' && p[1] != '\0' && *++p == '\n') | |||
| 696 | esc_nl = 1; | |||
| 697 | *s++ = *p; | |||
| 698 | } | |||
| 699 | size += s - op; | |||
| 700 | if (!esc_nl
| |||
| 701 | *s = '\0'; | |||
| ||||
| 702 | break; | |||
| 703 | } | |||
| 704 | } | |||
| 705 | free(lbuf); | |||
| 706 | text = xrealloc(text, size + 1); | |||
| 707 | text[size] = '\0'; | |||
| 708 | return (text); | |||
| 709 | } | |||
| 710 | ||||
| 711 | /* | |||
| 712 | * Get an address and return a pointer to the first character after | |||
| 713 | * it. Fill the structure pointed to according to the address. | |||
| 714 | */ | |||
| 715 | static char * | |||
| 716 | compile_addr(char *p, struct s_addr *a) | |||
| 717 | { | |||
| 718 | char *end; | |||
| 719 | ||||
| 720 | switch (*p) { | |||
| 721 | case '\\': /* Context address */ | |||
| 722 | ++p; | |||
| 723 | /* FALLTHROUGH */ | |||
| 724 | case '/': /* Context address */ | |||
| 725 | p = compile_re(p, &a->u.r); | |||
| 726 | if (p == NULL((void *)0)) | |||
| 727 | error(COMPILE2, "unterminated regular expression"); | |||
| 728 | a->type = AT_RE; | |||
| 729 | return (p); | |||
| 730 | ||||
| 731 | case '$': /* Last line */ | |||
| 732 | a->type = AT_LAST; | |||
| 733 | return (p + 1); | |||
| 734 | /* Line number */ | |||
| 735 | case '0': case '1': case '2': case '3': case '4': | |||
| 736 | case '5': case '6': case '7': case '8': case '9': | |||
| 737 | a->type = AT_LINE; | |||
| 738 | a->u.l = strtoul(p, &end, 10); | |||
| 739 | return (end); | |||
| 740 | default: | |||
| 741 | error(COMPILE2, "expected context address"); | |||
| 742 | return (NULL((void *)0)); | |||
| 743 | } | |||
| 744 | } | |||
| 745 | ||||
| 746 | /* | |||
| 747 | * duptoeol -- | |||
| 748 | * Return a copy of all the characters up to \n or \0. | |||
| 749 | */ | |||
| 750 | static char * | |||
| 751 | duptoeol(char *s, char *ctype, char **semi) | |||
| 752 | { | |||
| 753 | size_t len; | |||
| 754 | int ws; | |||
| 755 | char *start; | |||
| 756 | ||||
| 757 | ws = 0; | |||
| 758 | if (semi) { | |||
| 759 | for (start = s; *s != '\0' && *s != '\n' && *s != ';'; ++s) | |||
| 760 | ws = isspace((unsigned char)*s); | |||
| 761 | } else { | |||
| 762 | for (start = s; *s != '\0' && *s != '\n'; ++s) | |||
| 763 | ws = isspace((unsigned char)*s); | |||
| 764 | *s = '\0'; | |||
| 765 | } | |||
| 766 | if (ws) | |||
| 767 | warning("whitespace after %s", ctype); | |||
| 768 | len = s - start + 1; | |||
| 769 | if (semi) | |||
| 770 | *semi = s; | |||
| 771 | s = xmalloc(len); | |||
| 772 | strlcpy(s, start, len); | |||
| 773 | return (s); | |||
| 774 | } | |||
| 775 | ||||
| 776 | /* | |||
| 777 | * Convert goto label names to addresses, and count a and r commands, in | |||
| 778 | * the given subset of the script. Free the memory used by labels in b | |||
| 779 | * and t commands (but not by :). | |||
| 780 | * | |||
| 781 | * TODO: Remove } nodes | |||
| 782 | */ | |||
| 783 | static void | |||
| 784 | fixuplabel(struct s_command *cp, struct s_command *end) | |||
| 785 | { | |||
| 786 | ||||
| 787 | for (; cp != end; cp = cp->next) | |||
| 788 | switch (cp->code) { | |||
| 789 | case 'a': | |||
| 790 | case 'r': | |||
| 791 | appendnum++; | |||
| 792 | break; | |||
| 793 | case 'b': | |||
| 794 | case 't': | |||
| 795 | /* Resolve branch target. */ | |||
| 796 | if (cp->t == NULL((void *)0)) { | |||
| 797 | cp->u.c = NULL((void *)0); | |||
| 798 | break; | |||
| 799 | } | |||
| 800 | if ((cp->u.c = findlabel(cp->t)) == NULL((void *)0)) | |||
| 801 | error(COMPILE2, "undefined label '%s'", cp->t); | |||
| 802 | free(cp->t); | |||
| 803 | break; | |||
| 804 | case '{': | |||
| 805 | /* Do interior commands. */ | |||
| 806 | fixuplabel(cp->u.c, cp->next); | |||
| 807 | break; | |||
| 808 | } | |||
| 809 | } | |||
| 810 | ||||
| 811 | /* | |||
| 812 | * Associate the given command label for later lookup. | |||
| 813 | */ | |||
| 814 | static void | |||
| 815 | enterlabel(struct s_command *cp) | |||
| 816 | { | |||
| 817 | struct labhash **lhp, *lh; | |||
| 818 | u_char *p; | |||
| 819 | u_int h, c; | |||
| 820 | ||||
| 821 | for (h = 0, p = (u_char *)cp->t; (c = *p) != 0; p++) | |||
| 822 | h = (h << 5) + h + c; | |||
| 823 | lhp = &labels[h & LHMASK(128 - 1)]; | |||
| 824 | for (lh = *lhp; lh != NULL((void *)0); lh = lh->lh_next) | |||
| 825 | if (lh->lh_hash == h && strcmp(cp->t, lh->lh_cmd->t) == 0) | |||
| 826 | error(COMPILE2, "duplicate label '%s'", cp->t); | |||
| 827 | lh = xmalloc(sizeof *lh); | |||
| 828 | lh->lh_next = *lhp; | |||
| 829 | lh->lh_hash = h; | |||
| 830 | lh->lh_cmd = cp; | |||
| 831 | lh->lh_ref = 0; | |||
| 832 | *lhp = lh; | |||
| 833 | } | |||
| 834 | ||||
| 835 | /* | |||
| 836 | * Find the label contained in the command l in the command linked | |||
| 837 | * list cp. L is excluded from the search. Return NULL if not found. | |||
| 838 | */ | |||
| 839 | static struct s_command * | |||
| 840 | findlabel(char *name) | |||
| 841 | { | |||
| 842 | struct labhash *lh; | |||
| 843 | u_char *p; | |||
| 844 | u_int h, c; | |||
| 845 | ||||
| 846 | for (h = 0, p = (u_char *)name; (c = *p) != 0; p++) | |||
| 847 | h = (h << 5) + h + c; | |||
| 848 | for (lh = labels[h & LHMASK(128 - 1)]; lh != NULL((void *)0); lh = lh->lh_next) { | |||
| 849 | if (lh->lh_hash == h && strcmp(name, lh->lh_cmd->t) == 0) { | |||
| 850 | lh->lh_ref = 1; | |||
| 851 | return (lh->lh_cmd); | |||
| 852 | } | |||
| 853 | } | |||
| 854 | return (NULL((void *)0)); | |||
| 855 | } | |||
| 856 | ||||
| 857 | /* | |||
| 858 | * Warn about any unused labels. As a side effect, release the label hash | |||
| 859 | * table space. | |||
| 860 | */ | |||
| 861 | static void | |||
| 862 | uselabel(void) | |||
| 863 | { | |||
| 864 | struct labhash *lh, *next; | |||
| 865 | int i; | |||
| 866 | ||||
| 867 | for (i = 0; i < LHSZ128; i++) { | |||
| 868 | for (lh = labels[i]; lh != NULL((void *)0); lh = next) { | |||
| 869 | next = lh->lh_next; | |||
| 870 | if (!lh->lh_ref) | |||
| 871 | warning("unused label '%s'", | |||
| 872 | lh->lh_cmd->t); | |||
| 873 | free(lh); | |||
| 874 | } | |||
| 875 | } | |||
| 876 | } |