| File: | src/usr.bin/awk/run.c |
| Warning: | line 2648, column 26 Null pointer passed as 1st argument to string length function |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
| 1 | /* $OpenBSD: run.c,v 1.83 2023/11/28 20:54:38 millert Exp $ */ | |||
| 2 | /**************************************************************** | |||
| 3 | Copyright (C) Lucent Technologies 1997 | |||
| 4 | All Rights Reserved | |||
| 5 | ||||
| 6 | Permission to use, copy, modify, and distribute this software and | |||
| 7 | its documentation for any purpose and without fee is hereby | |||
| 8 | granted, provided that the above copyright notice appear in all | |||
| 9 | copies and that both that the copyright notice and this | |||
| 10 | permission notice and warranty disclaimer appear in supporting | |||
| 11 | documentation, and that the name Lucent Technologies or any of | |||
| 12 | its entities not be used in advertising or publicity pertaining | |||
| 13 | to distribution of the software without specific, written prior | |||
| 14 | permission. | |||
| 15 | ||||
| 16 | LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, | |||
| 17 | INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. | |||
| 18 | IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY | |||
| 19 | SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES | |||
| 20 | WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER | |||
| 21 | IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, | |||
| 22 | ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF | |||
| 23 | THIS SOFTWARE. | |||
| 24 | ****************************************************************/ | |||
| 25 | ||||
| 26 | #define DEBUG | |||
| 27 | #include <stdio.h> | |||
| 28 | #include <ctype.h> | |||
| 29 | #include <errno(*__errno()).h> | |||
| 30 | #include <wctype.h> | |||
| 31 | #include <fcntl.h> | |||
| 32 | #include <setjmp.h> | |||
| 33 | #include <limits.h> | |||
| 34 | #include <math.h> | |||
| 35 | #include <string.h> | |||
| 36 | #include <stdlib.h> | |||
| 37 | #include <time.h> | |||
| 38 | #include <sys/types.h> | |||
| 39 | #include <sys/wait.h> | |||
| 40 | #include "awk.h" | |||
| 41 | #include "awkgram.tab.h" | |||
| 42 | ||||
| 43 | ||||
| 44 | static void stdinit(void); | |||
| 45 | static void flush_all(void); | |||
| 46 | static char *wide_char_to_byte_str(int rune, size_t *outlen); | |||
| 47 | ||||
| 48 | #if 1 | |||
| 49 | #define tempfree(x)do { if (((x)->csub == 4)) tfree(x); } while ( 0) do { if (istemp(x)((x)->csub == 4)) tfree(x); } while (/*CONSTCOND*/0) | |||
| 50 | #else | |||
| 51 | void tempfree(Cell *p)do { if (((Cell *p)->csub == 4)) tfree(Cell *p); } while ( 0) { | |||
| 52 | if (p->ctype == OCELL1 && (p->csub < CUNK0 || p->csub > CFREE7)) { | |||
| 53 | WARNING("bad csub %d in Cell %d %s", | |||
| 54 | p->csub, p->ctype, p->sval); | |||
| 55 | } | |||
| 56 | if (istemp(p)((p)->csub == 4)) | |||
| 57 | tfree(p); | |||
| 58 | } | |||
| 59 | #endif | |||
| 60 | ||||
| 61 | /* do we really need these? */ | |||
| 62 | /* #ifdef _NFILE */ | |||
| 63 | /* #ifndef FOPEN_MAX */ | |||
| 64 | /* #define FOPEN_MAX _NFILE */ | |||
| 65 | /* #endif */ | |||
| 66 | /* #endif */ | |||
| 67 | /* */ | |||
| 68 | /* #ifndef FOPEN_MAX */ | |||
| 69 | /* #define FOPEN_MAX 40 */ /* max number of open files */ | |||
| 70 | /* #endif */ | |||
| 71 | /* */ | |||
| 72 | /* #ifndef RAND_MAX */ | |||
| 73 | /* #define RAND_MAX 32767 */ /* all that ansi guarantees */ | |||
| 74 | /* #endif */ | |||
| 75 | ||||
| 76 | jmp_buf env; | |||
| 77 | extern int pairstack[]; | |||
| 78 | extern Awkfloat srand_seed; | |||
| 79 | ||||
| 80 | Node *winner = NULL((void *)0); /* root of parse tree */ | |||
| 81 | Cell *tmps; /* free temporary cells for execution */ | |||
| 82 | ||||
| 83 | static Cell truecell ={ OBOOL2, BTRUE11, 0, 0, 1.0, NUM01, NULL((void *)0), NULL((void *)0) }; | |||
| 84 | Cell *True = &truecell; | |||
| 85 | static Cell falsecell ={ OBOOL2, BFALSE12, 0, 0, 0.0, NUM01, NULL((void *)0), NULL((void *)0) }; | |||
| 86 | Cell *False = &falsecell; | |||
| 87 | static Cell breakcell ={ OJUMP3, JBREAK23, 0, 0, 0.0, NUM01, NULL((void *)0), NULL((void *)0) }; | |||
| 88 | Cell *jbreak = &breakcell; | |||
| 89 | static Cell contcell ={ OJUMP3, JCONT24, 0, 0, 0.0, NUM01, NULL((void *)0), NULL((void *)0) }; | |||
| 90 | Cell *jcont = &contcell; | |||
| 91 | static Cell nextcell ={ OJUMP3, JNEXT22, 0, 0, 0.0, NUM01, NULL((void *)0), NULL((void *)0) }; | |||
| 92 | Cell *jnext = &nextcell; | |||
| 93 | static Cell nextfilecell ={ OJUMP3, JNEXTFILE26, 0, 0, 0.0, NUM01, NULL((void *)0), NULL((void *)0) }; | |||
| 94 | Cell *jnextfile = &nextfilecell; | |||
| 95 | static Cell exitcell ={ OJUMP3, JEXIT21, 0, 0, 0.0, NUM01, NULL((void *)0), NULL((void *)0) }; | |||
| 96 | Cell *jexit = &exitcell; | |||
| 97 | static Cell retcell ={ OJUMP3, JRET25, 0, 0, 0.0, NUM01, NULL((void *)0), NULL((void *)0) }; | |||
| 98 | Cell *jret = &retcell; | |||
| 99 | static Cell tempcell ={ OCELL1, CTEMP4, 0, EMPTY, 0.0, NUM01|STR02|DONTFREE04, NULL((void *)0), NULL((void *)0) }; | |||
| 100 | ||||
| 101 | Node *curnode = NULL((void *)0); /* the node being executed, for debugging */ | |||
| 102 | ||||
| 103 | /* buffer memory management */ | |||
| 104 | int adjbuf(char **pbuf, int *psiz, int minlen, int quantum, char **pbptr, | |||
| 105 | const char *whatrtn) | |||
| 106 | /* pbuf: address of pointer to buffer being managed | |||
| 107 | * psiz: address of buffer size variable | |||
| 108 | * minlen: minimum length of buffer needed | |||
| 109 | * quantum: buffer size quantum | |||
| 110 | * pbptr: address of movable pointer into buffer, or 0 if none | |||
| 111 | * whatrtn: name of the calling routine if failure should cause fatal error | |||
| 112 | * | |||
| 113 | * return 0 for realloc failure, !=0 for success | |||
| 114 | */ | |||
| 115 | { | |||
| 116 | if (minlen > *psiz) { | |||
| 117 | char *tbuf; | |||
| 118 | int rminlen = quantum ? minlen % quantum : 0; | |||
| 119 | int boff = pbptr ? *pbptr - *pbuf : 0; | |||
| 120 | /* round up to next multiple of quantum */ | |||
| 121 | if (rminlen) | |||
| 122 | minlen += quantum - rminlen; | |||
| 123 | tbuf = (char *) realloc(*pbuf, minlen); | |||
| 124 | DPRINTF("adjbuf %s: %d %d (pbuf=%p, tbuf=%p)\n", whatrtn, *psiz, minlen, (void*)*pbuf, (void*)tbuf)if (dbg) printf("adjbuf %s: %d %d (pbuf=%p, tbuf=%p)\n", whatrtn , *psiz, minlen, (void*)*pbuf, (void*)tbuf); | |||
| 125 | if (tbuf == NULL((void *)0)) { | |||
| 126 | if (whatrtn) | |||
| 127 | FATAL("out of memory in %s", whatrtn); | |||
| 128 | return 0; | |||
| 129 | } | |||
| 130 | *pbuf = tbuf; | |||
| 131 | *psiz = minlen; | |||
| 132 | if (pbptr) | |||
| 133 | *pbptr = tbuf + boff; | |||
| 134 | } | |||
| 135 | return 1; | |||
| 136 | } | |||
| 137 | ||||
| 138 | void run(Node *a) /* execution of parse tree starts here */ | |||
| 139 | { | |||
| 140 | ||||
| 141 | stdinit(); | |||
| 142 | execute(a); | |||
| 143 | closeall(); | |||
| 144 | } | |||
| 145 | ||||
| 146 | Cell *execute(Node *u) /* execute a node of the parse tree */ | |||
| 147 | { | |||
| 148 | Cell *(*proc)(Node **, int); | |||
| 149 | Cell *x; | |||
| 150 | Node *a; | |||
| 151 | ||||
| 152 | if (u == NULL((void *)0)) | |||
| 153 | return(True); | |||
| 154 | for (a = u; ; a = a->nnext) { | |||
| 155 | curnode = a; | |||
| 156 | if (isvalue(a)((a)->ntype == 1)) { | |||
| 157 | x = (Cell *) (a->narg[0]); | |||
| 158 | if (isfld(x)((x)->tval & 0100) && !donefld) | |||
| 159 | fldbld(); | |||
| 160 | else if (isrec(x)((x)->tval & 0200) && !donerec) | |||
| 161 | recbld(); | |||
| 162 | return(x); | |||
| 163 | } | |||
| 164 | if (notlegal(a->nobj)(a->nobj <= 257 || a->nobj >= 352 || proctab[a-> nobj-257] == nullproc)) /* probably a Cell* but too risky to print */ | |||
| 165 | FATAL("illegal statement"); | |||
| 166 | proc = proctab[a->nobj-FIRSTTOKEN257]; | |||
| 167 | x = (*proc)(a->narg, a->nobj); | |||
| 168 | if (isfld(x)((x)->tval & 0100) && !donefld) | |||
| 169 | fldbld(); | |||
| 170 | else if (isrec(x)((x)->tval & 0200) && !donerec) | |||
| 171 | recbld(); | |||
| 172 | if (isexpr(a)((a)->ntype == 3)) | |||
| 173 | return(x); | |||
| 174 | if (isjump(x)((x)->ctype == 3)) | |||
| 175 | return(x); | |||
| 176 | if (a->nnext == NULL((void *)0)) | |||
| 177 | return(x); | |||
| 178 | tempfree(x)do { if (((x)->csub == 4)) tfree(x); } while ( 0); | |||
| 179 | } | |||
| 180 | } | |||
| 181 | ||||
| 182 | ||||
| 183 | Cell *program(Node **a, int n) /* execute an awk program */ | |||
| 184 | { /* a[0] = BEGIN, a[1] = body, a[2] = END */ | |||
| 185 | Cell *x; | |||
| 186 | ||||
| 187 | if (setjmp(env) != 0) | |||
| 188 | goto ex; | |||
| 189 | if (a[0]) { /* BEGIN */ | |||
| 190 | x = execute(a[0]); | |||
| 191 | if (isexit(x)((x)->csub == 21)) | |||
| 192 | return(True); | |||
| 193 | if (isjump(x)((x)->ctype == 3)) | |||
| 194 | FATAL("illegal break, continue, next or nextfile from BEGIN"); | |||
| 195 | tempfree(x)do { if (((x)->csub == 4)) tfree(x); } while ( 0); | |||
| 196 | } | |||
| 197 | if (a[1] || a[2]) | |||
| 198 | while (getrec(&record, &recsize, true1) > 0) { | |||
| 199 | x = execute(a[1]); | |||
| 200 | if (isexit(x)((x)->csub == 21)) | |||
| 201 | break; | |||
| 202 | tempfree(x)do { if (((x)->csub == 4)) tfree(x); } while ( 0); | |||
| 203 | } | |||
| 204 | ex: | |||
| 205 | if (setjmp(env) != 0) /* handles exit within END */ | |||
| 206 | goto ex1; | |||
| 207 | if (a[2]) { /* END */ | |||
| 208 | x = execute(a[2]); | |||
| 209 | if (isbreak(x)((x)->csub == 23) || isnext(x)((x)->csub == 22 || (x)->csub == 26) || iscont(x)((x)->csub == 24)) | |||
| 210 | FATAL("illegal break, continue, next or nextfile from END"); | |||
| 211 | tempfree(x)do { if (((x)->csub == 4)) tfree(x); } while ( 0); | |||
| 212 | } | |||
| 213 | ex1: | |||
| 214 | return(True); | |||
| 215 | } | |||
| 216 | ||||
| 217 | struct Frame { /* stack frame for awk function calls */ | |||
| 218 | int nargs; /* number of arguments in this call */ | |||
| 219 | Cell *fcncell; /* pointer to Cell for function */ | |||
| 220 | Cell **args; /* pointer to array of arguments after execute */ | |||
| 221 | Cell *retval; /* return value */ | |||
| 222 | }; | |||
| 223 | ||||
| 224 | #define NARGS50 50 /* max args in a call */ | |||
| 225 | ||||
| 226 | struct Frame *frame = NULL((void *)0); /* base of stack frames; dynamically allocated */ | |||
| 227 | int nframe = 0; /* number of frames allocated */ | |||
| 228 | struct Frame *frp = NULL((void *)0); /* frame pointer. bottom level unused */ | |||
| 229 | ||||
| 230 | Cell *call(Node **a, int n) /* function call. very kludgy and fragile */ | |||
| 231 | { | |||
| 232 | static const Cell newcopycell = { OCELL1, CCOPY6, 0, EMPTY, 0.0, NUM01|STR02|DONTFREE04, NULL((void *)0), NULL((void *)0) }; | |||
| 233 | int i, ncall, ndef; | |||
| 234 | int freed = 0; /* handles potential double freeing when fcn & param share a tempcell */ | |||
| 235 | Node *x; | |||
| 236 | Cell *args[NARGS50], *oargs[NARGS50]; /* BUG: fixed size arrays */ | |||
| 237 | Cell *y, *z, *fcn; | |||
| 238 | char *s; | |||
| 239 | ||||
| 240 | fcn = execute(a[0]); /* the function itself */ | |||
| 241 | s = fcn->nval; | |||
| 242 | if (!isfcn(fcn)((fcn)->tval & 040)) | |||
| 243 | FATAL("calling undefined function %s", s); | |||
| 244 | if (frame == NULL((void *)0)) { | |||
| 245 | frp = frame = (struct Frame *) calloc(nframe += 100, sizeof(*frame)); | |||
| 246 | if (frame == NULL((void *)0)) | |||
| 247 | FATAL("out of space for stack frames calling %s", s); | |||
| 248 | } | |||
| 249 | for (ncall = 0, x = a[1]; x != NULL((void *)0); x = x->nnext) /* args in call */ | |||
| 250 | ncall++; | |||
| 251 | ndef = (int) fcn->fval; /* args in defn */ | |||
| 252 | DPRINTF("calling %s, %d args (%d in defn), frp=%d\n", s, ncall, ndef, (int) (frp-frame))if (dbg) printf("calling %s, %d args (%d in defn), frp=%d\n", s, ncall, ndef, (int) (frp-frame)); | |||
| 253 | if (ncall > ndef) | |||
| 254 | WARNING("function %s called with %d args, uses only %d", | |||
| 255 | s, ncall, ndef); | |||
| 256 | if (ncall + ndef > NARGS50) | |||
| 257 | FATAL("function %s has %d arguments, limit %d", s, ncall+ndef, NARGS50); | |||
| 258 | for (i = 0, x = a[1]; x != NULL((void *)0); i++, x = x->nnext) { /* get call args */ | |||
| 259 | DPRINTF("evaluate args[%d], frp=%d:\n", i, (int) (frp-frame))if (dbg) printf("evaluate args[%d], frp=%d:\n", i, (int) (frp -frame)); | |||
| 260 | y = execute(x); | |||
| 261 | oargs[i] = y; | |||
| 262 | DPRINTF("args[%d]: %s %f <%s>, t=%o\n",if (dbg) printf("args[%d]: %s %f <%s>, t=%o\n", i, ((y-> nval) ? (y->nval) : "(null)"), y->fval, ((y)->tval & 020) ? "(array)" : ((y->sval) ? (y->sval) : "(null)"), y->tval) | |||
| 263 | i, NN(y->nval), y->fval, isarr(y) ? "(array)" : NN(y->sval), y->tval)if (dbg) printf("args[%d]: %s %f <%s>, t=%o\n", i, ((y-> nval) ? (y->nval) : "(null)"), y->fval, ((y)->tval & 020) ? "(array)" : ((y->sval) ? (y->sval) : "(null)"), y->tval); | |||
| 264 | if (isfcn(y)((y)->tval & 040)) | |||
| 265 | FATAL("can't use function %s as argument in %s", y->nval, s); | |||
| 266 | if (isarr(y)((y)->tval & 020)) | |||
| 267 | args[i] = y; /* arrays by ref */ | |||
| 268 | else | |||
| 269 | args[i] = copycell(y); | |||
| 270 | tempfree(y)do { if (((y)->csub == 4)) tfree(y); } while ( 0); | |||
| 271 | } | |||
| 272 | for ( ; i < ndef; i++) { /* add null args for ones not provided */ | |||
| 273 | args[i] = gettemp(); | |||
| 274 | *args[i] = newcopycell; | |||
| 275 | } | |||
| 276 | frp++; /* now ok to up frame */ | |||
| 277 | if (frp >= frame + nframe) { | |||
| 278 | int dfp = frp - frame; /* old index */ | |||
| 279 | frame = (struct Frame *) reallocarray(frame, (nframe += 100), sizeof(*frame)); | |||
| 280 | if (frame == NULL((void *)0)) | |||
| 281 | FATAL("out of space for stack frames in %s", s); | |||
| 282 | frp = frame + dfp; | |||
| 283 | } | |||
| 284 | frp->fcncell = fcn; | |||
| 285 | frp->args = args; | |||
| 286 | frp->nargs = ndef; /* number defined with (excess are locals) */ | |||
| 287 | frp->retval = gettemp(); | |||
| 288 | ||||
| 289 | DPRINTF("start exec of %s, frp=%d\n", s, (int) (frp-frame))if (dbg) printf("start exec of %s, frp=%d\n", s, (int) (frp-frame )); | |||
| 290 | y = execute((Node *)(fcn->sval)); /* execute body */ | |||
| 291 | DPRINTF("finished exec of %s, frp=%d\n", s, (int) (frp-frame))if (dbg) printf("finished exec of %s, frp=%d\n", s, (int) (frp -frame)); | |||
| 292 | ||||
| 293 | for (i = 0; i < ndef; i++) { | |||
| 294 | Cell *t = frp->args[i]; | |||
| 295 | if (isarr(t)((t)->tval & 020)) { | |||
| 296 | if (t->csub == CCOPY6) { | |||
| 297 | if (i >= ncall) { | |||
| 298 | freesymtab(t); | |||
| 299 | t->csub = CTEMP4; | |||
| 300 | tempfree(t)do { if (((t)->csub == 4)) tfree(t); } while ( 0); | |||
| 301 | } else { | |||
| 302 | oargs[i]->tval = t->tval; | |||
| 303 | oargs[i]->tval &= ~(STR02|NUM01|DONTFREE04); | |||
| 304 | oargs[i]->sval = t->sval; | |||
| 305 | tempfree(t)do { if (((t)->csub == 4)) tfree(t); } while ( 0); | |||
| 306 | } | |||
| 307 | } | |||
| 308 | } else if (t != y) { /* kludge to prevent freeing twice */ | |||
| 309 | t->csub = CTEMP4; | |||
| 310 | tempfree(t)do { if (((t)->csub == 4)) tfree(t); } while ( 0); | |||
| 311 | } else if (t == y && t->csub == CCOPY6) { | |||
| 312 | t->csub = CTEMP4; | |||
| 313 | tempfree(t)do { if (((t)->csub == 4)) tfree(t); } while ( 0); | |||
| 314 | freed = 1; | |||
| 315 | } | |||
| 316 | } | |||
| 317 | tempfree(fcn)do { if (((fcn)->csub == 4)) tfree(fcn); } while ( 0); | |||
| 318 | if (isexit(y)((y)->csub == 21) || isnext(y)((y)->csub == 22 || (y)->csub == 26)) | |||
| 319 | return y; | |||
| 320 | if (freed == 0) { | |||
| 321 | tempfree(y)do { if (((y)->csub == 4)) tfree(y); } while ( 0); /* don't free twice! */ | |||
| 322 | } | |||
| 323 | z = frp->retval; /* return value */ | |||
| 324 | DPRINTF("%s returns %g |%s| %o\n", s, getfval(z), getsval(z), z->tval)if (dbg) printf("%s returns %g |%s| %o\n", s, getfval(z), getsval (z), z->tval); | |||
| 325 | frp--; | |||
| 326 | return(z); | |||
| 327 | } | |||
| 328 | ||||
| 329 | Cell *copycell(Cell *x) /* make a copy of a cell in a temp */ | |||
| 330 | { | |||
| 331 | Cell *y; | |||
| 332 | ||||
| 333 | /* copy is not constant or field */ | |||
| 334 | ||||
| 335 | y = gettemp(); | |||
| 336 | y->tval = x->tval & ~(CON010|FLD0100|REC0200); | |||
| 337 | y->csub = CCOPY6; /* prevents freeing until call is over */ | |||
| 338 | y->nval = x->nval; /* BUG? */ | |||
| 339 | if (isstr(x)((x)->tval & 02) /* || x->ctype == OCELL */) { | |||
| 340 | y->sval = tostring(x->sval); | |||
| 341 | y->tval &= ~DONTFREE04; | |||
| 342 | } else | |||
| 343 | y->tval |= DONTFREE04; | |||
| 344 | y->fval = x->fval; | |||
| 345 | return y; | |||
| 346 | } | |||
| 347 | ||||
| 348 | Cell *arg(Node **a, int n) /* nth argument of a function */ | |||
| 349 | { | |||
| 350 | ||||
| 351 | n = ptoi(a[0]); /* argument number, counting from 0 */ | |||
| 352 | DPRINTF("arg(%d), frp->nargs=%d\n", n, frp->nargs)if (dbg) printf("arg(%d), frp->nargs=%d\n", n, frp->nargs ); | |||
| 353 | if (n+1 > frp->nargs) | |||
| 354 | FATAL("argument #%d of function %s was not supplied", | |||
| 355 | n+1, frp->fcncell->nval); | |||
| 356 | return frp->args[n]; | |||
| 357 | } | |||
| 358 | ||||
| 359 | Cell *jump(Node **a, int n) /* break, continue, next, nextfile, return */ | |||
| 360 | { | |||
| 361 | Cell *y; | |||
| 362 | ||||
| 363 | switch (n) { | |||
| 364 | case EXIT297: | |||
| 365 | if (a[0] != NULL((void *)0)) { | |||
| 366 | y = execute(a[0]); | |||
| 367 | errorflag = (int) getfval(y); | |||
| 368 | tempfree(y)do { if (((y)->csub == 4)) tfree(y); } while ( 0); | |||
| 369 | } | |||
| 370 | longjmp(env, 1); | |||
| 371 | case RETURN340: | |||
| 372 | if (a[0] != NULL((void *)0)) { | |||
| 373 | y = execute(a[0]); | |||
| 374 | if ((y->tval & (STR02|NUM01)) == (STR02|NUM01)) { | |||
| 375 | setsval(frp->retval, getsval(y)); | |||
| 376 | frp->retval->fval = getfval(y); | |||
| 377 | frp->retval->tval |= NUM01; | |||
| 378 | } | |||
| 379 | else if (y->tval & STR02) | |||
| 380 | setsval(frp->retval, getsval(y)); | |||
| 381 | else if (y->tval & NUM01) | |||
| 382 | setfval(frp->retval, getfval(y)); | |||
| 383 | else /* can't happen */ | |||
| 384 | FATAL("bad type variable %d", y->tval); | |||
| 385 | tempfree(y)do { if (((y)->csub == 4)) tfree(y); } while ( 0); | |||
| 386 | } | |||
| 387 | return(jret); | |||
| 388 | case NEXT307: | |||
| 389 | return(jnext); | |||
| 390 | case NEXTFILE308: | |||
| 391 | nextfile(); | |||
| 392 | return(jnextfile); | |||
| 393 | case BREAK292: | |||
| 394 | return(jbreak); | |||
| 395 | case CONTINUE294: | |||
| 396 | return(jcont); | |||
| 397 | default: /* can't happen */ | |||
| 398 | FATAL("illegal jump type %d", n); | |||
| 399 | } | |||
| 400 | return 0; /* not reached */ | |||
| 401 | } | |||
| 402 | ||||
| 403 | Cell *awkgetline(Node **a, int n) /* get next line from specific input */ | |||
| 404 | { /* a[0] is variable, a[1] is operator, a[2] is filename */ | |||
| 405 | Cell *r, *x; | |||
| 406 | extern Cell **fldtab; | |||
| 407 | FILE *fp; | |||
| 408 | char *buf; | |||
| 409 | int bufsize = recsize; | |||
| 410 | int mode; | |||
| 411 | bool_Bool newflag; | |||
| 412 | double result; | |||
| 413 | ||||
| 414 | if ((buf = (char *) malloc(bufsize)) == NULL((void *)0)) | |||
| 415 | FATAL("out of memory in getline"); | |||
| 416 | ||||
| 417 | fflush(stdout(&__sF[1])); /* in case someone is waiting for a prompt */ | |||
| 418 | r = gettemp(); | |||
| 419 | if (a[1] != NULL((void *)0)) { /* getline < file */ | |||
| 420 | x = execute(a[2]); /* filename */ | |||
| 421 | mode = ptoi(a[1]); | |||
| 422 | if (mode == '|') /* input pipe */ | |||
| 423 | mode = LE286; /* arbitrary flag */ | |||
| 424 | fp = openfile(mode, getsval(x), &newflag); | |||
| 425 | tempfree(x)do { if (((x)->csub == 4)) tfree(x); } while ( 0); | |||
| 426 | if (fp == NULL((void *)0)) | |||
| 427 | n = -1; | |||
| 428 | else | |||
| 429 | n = readrec(&buf, &bufsize, fp, newflag); | |||
| 430 | if (n <= 0) { | |||
| 431 | ; | |||
| 432 | } else if (a[0] != NULL((void *)0)) { /* getline var <file */ | |||
| 433 | x = execute(a[0]); | |||
| 434 | setsval(x, buf); | |||
| 435 | if (is_number(x->sval, & result)is_valid_number(x->sval, 0, ((void *)0), & result)) { | |||
| 436 | x->fval = result; | |||
| 437 | x->tval |= NUM01; | |||
| 438 | } | |||
| 439 | tempfree(x)do { if (((x)->csub == 4)) tfree(x); } while ( 0); | |||
| 440 | } else { /* getline <file */ | |||
| 441 | setsval(fldtab[0], buf); | |||
| 442 | if (is_number(fldtab[0]->sval, & result)is_valid_number(fldtab[0]->sval, 0, ((void *)0), & result )) { | |||
| 443 | fldtab[0]->fval = result; | |||
| 444 | fldtab[0]->tval |= NUM01; | |||
| 445 | } | |||
| 446 | } | |||
| 447 | } else { /* bare getline; use current input */ | |||
| 448 | if (a[0] == NULL((void *)0)) /* getline */ | |||
| 449 | n = getrec(&record, &recsize, true1); | |||
| 450 | else { /* getline var */ | |||
| 451 | n = getrec(&buf, &bufsize, false0); | |||
| 452 | if (n > 0) { | |||
| 453 | x = execute(a[0]); | |||
| 454 | setsval(x, buf); | |||
| 455 | if (is_number(x->sval, & result)is_valid_number(x->sval, 0, ((void *)0), & result)) { | |||
| 456 | x->fval = result; | |||
| 457 | x->tval |= NUM01; | |||
| 458 | } | |||
| 459 | tempfree(x)do { if (((x)->csub == 4)) tfree(x); } while ( 0); | |||
| 460 | } | |||
| 461 | } | |||
| 462 | } | |||
| 463 | setfval(r, (Awkfloat) n); | |||
| 464 | free(buf); | |||
| 465 | return r; | |||
| 466 | } | |||
| 467 | ||||
| 468 | Cell *getnf(Node **a, int n) /* get NF */ | |||
| 469 | { | |||
| 470 | if (!donefld) | |||
| 471 | fldbld(); | |||
| 472 | return (Cell *) a[0]; | |||
| 473 | } | |||
| 474 | ||||
| 475 | static char * | |||
| 476 | makearraystring(Node *p, const char *func) | |||
| 477 | { | |||
| 478 | char *buf; | |||
| 479 | int bufsz = recsize; | |||
| 480 | size_t blen; | |||
| 481 | ||||
| 482 | if ((buf = (char *) malloc(bufsz)) == NULL((void *)0)) { | |||
| 483 | FATAL("%s: out of memory", func); | |||
| 484 | } | |||
| 485 | ||||
| 486 | blen = 0; | |||
| 487 | buf[blen] = '\0'; | |||
| 488 | ||||
| 489 | for (; p; p = p->nnext) { | |||
| 490 | Cell *x = execute(p); /* expr */ | |||
| 491 | char *s = getsval(x); | |||
| 492 | size_t seplen = strlen(getsval(subseploc)); | |||
| 493 | size_t nsub = p->nnext ? seplen : 0; | |||
| 494 | size_t slen = strlen(s); | |||
| 495 | size_t tlen = blen + slen + nsub; | |||
| 496 | ||||
| 497 | if (!adjbuf(&buf, &bufsz, tlen + 1, recsize, 0, func)) { | |||
| 498 | FATAL("%s: out of memory %s[%s...]", | |||
| 499 | func, x->nval, buf); | |||
| 500 | } | |||
| 501 | memcpy(buf + blen, s, slen); | |||
| 502 | if (nsub) { | |||
| 503 | memcpy(buf + blen + slen, *SUBSEP, nsub); | |||
| 504 | } | |||
| 505 | buf[tlen] = '\0'; | |||
| 506 | blen = tlen; | |||
| 507 | tempfree(x)do { if (((x)->csub == 4)) tfree(x); } while ( 0); | |||
| 508 | } | |||
| 509 | return buf; | |||
| 510 | } | |||
| 511 | ||||
| 512 | Cell *array(Node **a, int n) /* a[0] is symtab, a[1] is list of subscripts */ | |||
| 513 | { | |||
| 514 | Cell *x, *z; | |||
| 515 | char *buf; | |||
| 516 | ||||
| 517 | x = execute(a[0]); /* Cell* for symbol table */ | |||
| 518 | buf = makearraystring(a[1], __func__); | |||
| 519 | if (!isarr(x)((x)->tval & 020)) { | |||
| 520 | DPRINTF("making %s into an array\n", NN(x->nval))if (dbg) printf("making %s into an array\n", ((x->nval) ? ( x->nval) : "(null)")); | |||
| 521 | if (freeable(x)( ((x)->tval & (02|04)) == 02 )) | |||
| 522 | xfree(x->sval){ free((void *)(intptr_t)(x->sval)); (x->sval) = ((void *)0); }; | |||
| 523 | x->tval &= ~(STR02|NUM01|DONTFREE04); | |||
| 524 | x->tval |= ARR020; | |||
| 525 | x->sval = (char *) makesymtab(NSYMTAB50); | |||
| 526 | } | |||
| 527 | z = setsymtab(buf, "", 0.0, STR02|NUM01, (Array *) x->sval); | |||
| 528 | z->ctype = OCELL1; | |||
| 529 | z->csub = CVAR2; | |||
| 530 | tempfree(x)do { if (((x)->csub == 4)) tfree(x); } while ( 0); | |||
| 531 | free(buf); | |||
| 532 | return(z); | |||
| 533 | } | |||
| 534 | ||||
| 535 | Cell *awkdelete(Node **a, int n) /* a[0] is symtab, a[1] is list of subscripts */ | |||
| 536 | { | |||
| 537 | Cell *x; | |||
| 538 | ||||
| 539 | x = execute(a[0]); /* Cell* for symbol table */ | |||
| 540 | if (x == symtabloc) { | |||
| 541 | FATAL("cannot delete SYMTAB or its elements"); | |||
| 542 | } | |||
| 543 | if (!isarr(x)((x)->tval & 020)) | |||
| 544 | return True; | |||
| 545 | if (a[1] == NULL((void *)0)) { /* delete the elements, not the table */ | |||
| 546 | freesymtab(x); | |||
| 547 | x->tval &= ~STR02; | |||
| 548 | x->tval |= ARR020; | |||
| 549 | x->sval = (char *) makesymtab(NSYMTAB50); | |||
| 550 | } else { | |||
| 551 | char *buf = makearraystring(a[1], __func__); | |||
| 552 | freeelem(x, buf); | |||
| 553 | free(buf); | |||
| 554 | } | |||
| 555 | tempfree(x)do { if (((x)->csub == 4)) tfree(x); } while ( 0); | |||
| 556 | return True; | |||
| 557 | } | |||
| 558 | ||||
| 559 | Cell *intest(Node **a, int n) /* a[0] is index (list), a[1] is symtab */ | |||
| 560 | { | |||
| 561 | Cell *ap, *k; | |||
| 562 | char *buf; | |||
| 563 | ||||
| 564 | ap = execute(a[1]); /* array name */ | |||
| 565 | if (!isarr(ap)((ap)->tval & 020)) { | |||
| 566 | DPRINTF("making %s into an array\n", ap->nval)if (dbg) printf("making %s into an array\n", ap->nval); | |||
| 567 | if (freeable(ap)( ((ap)->tval & (02|04)) == 02 )) | |||
| 568 | xfree(ap->sval){ free((void *)(intptr_t)(ap->sval)); (ap->sval) = ((void *)0); }; | |||
| 569 | ap->tval &= ~(STR02|NUM01|DONTFREE04); | |||
| 570 | ap->tval |= ARR020; | |||
| 571 | ap->sval = (char *) makesymtab(NSYMTAB50); | |||
| 572 | } | |||
| 573 | buf = makearraystring(a[0], __func__); | |||
| 574 | k = lookup(buf, (Array *) ap->sval); | |||
| 575 | tempfree(ap)do { if (((ap)->csub == 4)) tfree(ap); } while ( 0); | |||
| 576 | free(buf); | |||
| 577 | if (k == NULL((void *)0)) | |||
| 578 | return(False); | |||
| 579 | else | |||
| 580 | return(True); | |||
| 581 | } | |||
| 582 | ||||
| 583 | ||||
| 584 | /* ======== utf-8 code ========== */ | |||
| 585 | ||||
| 586 | /* | |||
| 587 | * Awk strings can contain ascii, random 8-bit items (eg Latin-1), | |||
| 588 | * or utf-8. u8_isutf tests whether a string starts with a valid | |||
| 589 | * utf-8 sequence, and returns 0 if not (e.g., high bit set). | |||
| 590 | * u8_nextlen returns length of next valid sequence, which is | |||
| 591 | * 1 for ascii, 2..4 for utf-8, or 1 for high bit non-utf. | |||
| 592 | * u8_strlen returns length of string in valid utf-8 sequences | |||
| 593 | * and/or high-bit bytes. Conversion functions go between byte | |||
| 594 | * number and character number. | |||
| 595 | * | |||
| 596 | * In theory, this behaves the same as before for non-utf8 bytes. | |||
| 597 | * | |||
| 598 | * Limited checking! This is a potential security hole. | |||
| 599 | */ | |||
| 600 | ||||
| 601 | /* is s the beginning of a valid utf-8 string? */ | |||
| 602 | /* return length 1..4 if yes, 0 if no */ | |||
| 603 | int u8_isutf(const char *s) | |||
| 604 | { | |||
| 605 | int n, ret; | |||
| 606 | unsigned char c; | |||
| 607 | ||||
| 608 | c = s[0]; | |||
| 609 | if (c < 128 || awk_mb_cur_max == 1) | |||
| 610 | return 1; /* what if it's 0? */ | |||
| 611 | ||||
| 612 | n = strlen(s); | |||
| 613 | if (n >= 2 && ((c>>5) & 0x7) == 0x6 && (s[1] & 0xC0) == 0x80) { | |||
| 614 | ret = 2; /* 110xxxxx 10xxxxxx */ | |||
| 615 | } else if (n >= 3 && ((c>>4) & 0xF) == 0xE && (s[1] & 0xC0) == 0x80 | |||
| 616 | && (s[2] & 0xC0) == 0x80) { | |||
| 617 | ret = 3; /* 1110xxxx 10xxxxxx 10xxxxxx */ | |||
| 618 | } else if (n >= 4 && ((c>>3) & 0x1F) == 0x1E && (s[1] & 0xC0) == 0x80 | |||
| 619 | && (s[2] & 0xC0) == 0x80 && (s[3] & 0xC0) == 0x80) { | |||
| 620 | ret = 4; /* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */ | |||
| 621 | } else { | |||
| 622 | ret = 0; | |||
| 623 | } | |||
| 624 | return ret; | |||
| 625 | } | |||
| 626 | ||||
| 627 | /* Convert (prefix of) utf8 string to utf-32 rune. */ | |||
| 628 | /* Sets *rune to the value, returns the length. */ | |||
| 629 | /* No error checking: watch out. */ | |||
| 630 | int u8_rune(int *rune, const char *s) | |||
| 631 | { | |||
| 632 | int n, ret; | |||
| 633 | unsigned char c; | |||
| 634 | ||||
| 635 | c = s[0]; | |||
| 636 | if (c < 128 || awk_mb_cur_max == 1) { | |||
| 637 | *rune = c; | |||
| 638 | return 1; | |||
| 639 | } | |||
| 640 | ||||
| 641 | n = strlen(s); | |||
| 642 | if (n >= 2 && ((c>>5) & 0x7) == 0x6 && (s[1] & 0xC0) == 0x80) { | |||
| 643 | *rune = ((c & 0x1F) << 6) | (s[1] & 0x3F); /* 110xxxxx 10xxxxxx */ | |||
| 644 | ret = 2; | |||
| 645 | } else if (n >= 3 && ((c>>4) & 0xF) == 0xE && (s[1] & 0xC0) == 0x80 | |||
| 646 | && (s[2] & 0xC0) == 0x80) { | |||
| 647 | *rune = ((c & 0xF) << 12) | ((s[1] & 0x3F) << 6) | (s[2] & 0x3F); | |||
| 648 | /* 1110xxxx 10xxxxxx 10xxxxxx */ | |||
| 649 | ret = 3; | |||
| 650 | } else if (n >= 4 && ((c>>3) & 0x1F) == 0x1E && (s[1] & 0xC0) == 0x80 | |||
| 651 | && (s[2] & 0xC0) == 0x80 && (s[3] & 0xC0) == 0x80) { | |||
| 652 | *rune = ((c & 0x7) << 18) | ((s[1] & 0x3F) << 12) | ((s[2] & 0x3F) << 6) | (s[3] & 0x3F); | |||
| 653 | /* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */ | |||
| 654 | ret = 4; | |||
| 655 | } else { | |||
| 656 | *rune = c; | |||
| 657 | ret = 1; | |||
| 658 | } | |||
| 659 | return ret; /* returns one byte if sequence doesn't look like utf */ | |||
| 660 | } | |||
| 661 | ||||
| 662 | /* return length of next sequence: 1 for ascii or random, 2..4 for valid utf8 */ | |||
| 663 | int u8_nextlen(const char *s) | |||
| 664 | { | |||
| 665 | int len; | |||
| 666 | ||||
| 667 | len = u8_isutf(s); | |||
| 668 | if (len == 0) | |||
| 669 | len = 1; | |||
| 670 | return len; | |||
| 671 | } | |||
| 672 | ||||
| 673 | /* return number of utf characters or single non-utf bytes */ | |||
| 674 | int u8_strlen(const char *s) | |||
| 675 | { | |||
| 676 | int i, len, n, totlen; | |||
| 677 | unsigned char c; | |||
| 678 | ||||
| 679 | n = strlen(s); | |||
| 680 | totlen = 0; | |||
| 681 | for (i = 0; i < n; i += len) { | |||
| 682 | c = s[i]; | |||
| 683 | if (c < 128 || awk_mb_cur_max == 1) { | |||
| 684 | len = 1; | |||
| 685 | } else { | |||
| 686 | len = u8_nextlen(&s[i]); | |||
| 687 | } | |||
| 688 | totlen++; | |||
| 689 | if (i > n) | |||
| 690 | FATAL("bad utf count [%s] n=%d i=%d\n", s, n, i); | |||
| 691 | } | |||
| 692 | return totlen; | |||
| 693 | } | |||
| 694 | ||||
| 695 | /* convert utf-8 char number in a string to its byte offset */ | |||
| 696 | int u8_char2byte(const char *s, int charnum) | |||
| 697 | { | |||
| 698 | int n; | |||
| 699 | int bytenum = 0; | |||
| 700 | ||||
| 701 | while (charnum > 0) { | |||
| 702 | n = u8_nextlen(s); | |||
| 703 | s += n; | |||
| 704 | bytenum += n; | |||
| 705 | charnum--; | |||
| 706 | } | |||
| 707 | return bytenum; | |||
| 708 | } | |||
| 709 | ||||
| 710 | /* convert byte offset in s to utf-8 char number that starts there */ | |||
| 711 | int u8_byte2char(const char *s, int bytenum) | |||
| 712 | { | |||
| 713 | int i, len, b; | |||
| 714 | int charnum = 0; /* BUG: what origin? */ | |||
| 715 | /* should be 0 to match start==0 which means no match */ | |||
| 716 | ||||
| 717 | b = strlen(s); | |||
| 718 | if (bytenum > b) { | |||
| 719 | return -1; /* ??? */ | |||
| 720 | } | |||
| 721 | for (i = 0; i <= bytenum; i += len) { | |||
| 722 | len = u8_nextlen(s+i); | |||
| 723 | charnum++; | |||
| 724 | } | |||
| 725 | return charnum; | |||
| 726 | } | |||
| 727 | ||||
| 728 | /* runetochar() adapted from rune.c in the Plan 9 distributione */ | |||
| 729 | ||||
| 730 | enum | |||
| 731 | { | |||
| 732 | Runeerror = 128, /* from somewhere else */ | |||
| 733 | Runemax = 0x10FFFF, | |||
| 734 | ||||
| 735 | Bit1 = 7, | |||
| 736 | Bitx = 6, | |||
| 737 | Bit2 = 5, | |||
| 738 | Bit3 = 4, | |||
| 739 | Bit4 = 3, | |||
| 740 | Bit5 = 2, | |||
| 741 | ||||
| 742 | T1 = ((1<<(Bit1+1))-1) ^ 0xFF, /* 0000 0000 */ | |||
| 743 | Tx = ((1<<(Bitx+1))-1) ^ 0xFF, /* 1000 0000 */ | |||
| 744 | T2 = ((1<<(Bit2+1))-1) ^ 0xFF, /* 1100 0000 */ | |||
| 745 | T3 = ((1<<(Bit3+1))-1) ^ 0xFF, /* 1110 0000 */ | |||
| 746 | T4 = ((1<<(Bit4+1))-1) ^ 0xFF, /* 1111 0000 */ | |||
| 747 | T5 = ((1<<(Bit5+1))-1) ^ 0xFF, /* 1111 1000 */ | |||
| 748 | ||||
| 749 | Rune1 = (1<<(Bit1+0*Bitx))-1, /* 0000 0000 0000 0000 0111 1111 */ | |||
| 750 | Rune2 = (1<<(Bit2+1*Bitx))-1, /* 0000 0000 0000 0111 1111 1111 */ | |||
| 751 | Rune3 = (1<<(Bit3+2*Bitx))-1, /* 0000 0000 1111 1111 1111 1111 */ | |||
| 752 | Rune4 = (1<<(Bit4+3*Bitx))-1, /* 0011 1111 1111 1111 1111 1111 */ | |||
| 753 | ||||
| 754 | Maskx = (1<<Bitx)-1, /* 0011 1111 */ | |||
| 755 | Testx = Maskx ^ 0xFF, /* 1100 0000 */ | |||
| 756 | ||||
| 757 | }; | |||
| 758 | ||||
| 759 | int runetochar(char *str, int c) | |||
| 760 | { | |||
| 761 | /* one character sequence 00000-0007F => 00-7F */ | |||
| 762 | if (c <= Rune1) { | |||
| 763 | str[0] = c; | |||
| 764 | return 1; | |||
| 765 | } | |||
| 766 | ||||
| 767 | /* two character sequence 00080-007FF => T2 Tx */ | |||
| 768 | if (c <= Rune2) { | |||
| 769 | str[0] = T2 | (c >> 1*Bitx); | |||
| 770 | str[1] = Tx | (c & Maskx); | |||
| 771 | return 2; | |||
| 772 | } | |||
| 773 | ||||
| 774 | /* three character sequence 00800-0FFFF => T3 Tx Tx */ | |||
| 775 | if (c > Runemax) | |||
| 776 | c = Runeerror; | |||
| 777 | if (c <= Rune3) { | |||
| 778 | str[0] = T3 | (c >> 2*Bitx); | |||
| 779 | str[1] = Tx | ((c >> 1*Bitx) & Maskx); | |||
| 780 | str[2] = Tx | (c & Maskx); | |||
| 781 | return 3; | |||
| 782 | } | |||
| 783 | ||||
| 784 | /* four character sequence 010000-1FFFFF => T4 Tx Tx Tx */ | |||
| 785 | str[0] = T4 | (c >> 3*Bitx); | |||
| 786 | str[1] = Tx | ((c >> 2*Bitx) & Maskx); | |||
| 787 | str[2] = Tx | ((c >> 1*Bitx) & Maskx); | |||
| 788 | str[3] = Tx | (c & Maskx); | |||
| 789 | return 4; | |||
| 790 | } | |||
| 791 | ||||
| 792 | ||||
| 793 | /* ========== end of utf8 code =========== */ | |||
| 794 | ||||
| 795 | ||||
| 796 | ||||
| 797 | Cell *matchop(Node **a, int n) /* ~ and match() */ | |||
| 798 | { | |||
| 799 | Cell *x, *y; | |||
| 800 | char *s, *t; | |||
| 801 | int i; | |||
| 802 | int cstart, cpatlen, len; | |||
| 803 | fa *pfa; | |||
| 804 | int (*mf)(fa *, const char *) = match, mode = 0; | |||
| 805 | ||||
| 806 | if (n == MATCHFCN306) { | |||
| 807 | mf = pmatch; | |||
| 808 | mode = 1; | |||
| 809 | } | |||
| 810 | x = execute(a[1]); /* a[1] = target text */ | |||
| 811 | s = getsval(x); | |||
| 812 | if (a[0] == NULL((void *)0)) /* a[1] == 0: already-compiled reg expr */ | |||
| 813 | i = (*mf)((fa *) a[2], s); | |||
| 814 | else { | |||
| 815 | y = execute(a[2]); /* a[2] = regular expr */ | |||
| 816 | t = getsval(y); | |||
| 817 | pfa = makedfa(t, mode); | |||
| 818 | i = (*mf)(pfa, s); | |||
| 819 | tempfree(y)do { if (((y)->csub == 4)) tfree(y); } while ( 0); | |||
| 820 | } | |||
| 821 | tempfree(x)do { if (((x)->csub == 4)) tfree(x); } while ( 0); | |||
| 822 | if (n == MATCHFCN306) { | |||
| 823 | int start = patbeg - s + 1; /* origin 1 */ | |||
| 824 | if (patlen < 0) { | |||
| 825 | start = 0; /* not found */ | |||
| 826 | } else { | |||
| 827 | cstart = u8_byte2char(s, start-1); | |||
| 828 | cpatlen = 0; | |||
| 829 | for (i = 0; i < patlen; i += len) { | |||
| 830 | len = u8_nextlen(patbeg+i); | |||
| 831 | cpatlen++; | |||
| 832 | } | |||
| 833 | ||||
| 834 | start = cstart; | |||
| 835 | patlen = cpatlen; | |||
| 836 | } | |||
| 837 | ||||
| 838 | setfval(rstartloc, (Awkfloat) start); | |||
| 839 | setfval(rlengthloc, (Awkfloat) patlen); | |||
| 840 | x = gettemp(); | |||
| 841 | x->tval = NUM01; | |||
| 842 | x->fval = start; | |||
| 843 | return x; | |||
| 844 | } else if ((n == MATCH265 && i == 1) || (n == NOTMATCH266 && i == 0)) | |||
| 845 | return(True); | |||
| 846 | else | |||
| 847 | return(False); | |||
| 848 | } | |||
| 849 | ||||
| 850 | ||||
| 851 | Cell *boolop(Node **a, int n) /* a[0] || a[1], a[0] && a[1], !a[0] */ | |||
| 852 | { | |||
| 853 | Cell *x, *y; | |||
| 854 | int i; | |||
| 855 | ||||
| 856 | x = execute(a[0]); | |||
| 857 | i = istrue(x)((x)->csub == 11); | |||
| 858 | tempfree(x)do { if (((x)->csub == 4)) tfree(x); } while ( 0); | |||
| 859 | switch (n) { | |||
| 860 | case BOR281: | |||
| 861 | if (i) return(True); | |||
| 862 | y = execute(a[1]); | |||
| 863 | i = istrue(y)((y)->csub == 11); | |||
| 864 | tempfree(y)do { if (((y)->csub == 4)) tfree(y); } while ( 0); | |||
| 865 | if (i) return(True); | |||
| 866 | else return(False); | |||
| 867 | case AND280: | |||
| 868 | if ( !i ) return(False); | |||
| 869 | y = execute(a[1]); | |||
| 870 | i = istrue(y)((y)->csub == 11); | |||
| 871 | tempfree(y)do { if (((y)->csub == 4)) tfree(y); } while ( 0); | |||
| 872 | if (i) return(True); | |||
| 873 | else return(False); | |||
| 874 | case NOT345: | |||
| 875 | if (i) return(False); | |||
| 876 | else return(True); | |||
| 877 | default: /* can't happen */ | |||
| 878 | FATAL("unknown boolean operator %d", n); | |||
| 879 | } | |||
| 880 | return 0; /*NOTREACHED*/ | |||
| 881 | } | |||
| 882 | ||||
| 883 | Cell *relop(Node **a, int n) /* a[0 < a[1], etc. */ | |||
| 884 | { | |||
| 885 | int i; | |||
| 886 | Cell *x, *y; | |||
| 887 | Awkfloat j; | |||
| 888 | bool_Bool x_is_nan, y_is_nan; | |||
| 889 | ||||
| 890 | x = execute(a[0]); | |||
| 891 | y = execute(a[1]); | |||
| 892 | x_is_nan = isnan(x->fval)((sizeof (x->fval) == sizeof (float)) ? __isnanf(x->fval ) : (sizeof (x->fval) == sizeof (double)) ? __isnan(x-> fval) : __isnanl(x->fval)); | |||
| 893 | y_is_nan = isnan(y->fval)((sizeof (y->fval) == sizeof (float)) ? __isnanf(y->fval ) : (sizeof (y->fval) == sizeof (double)) ? __isnan(y-> fval) : __isnanl(y->fval)); | |||
| 894 | if (x->tval&NUM01 && y->tval&NUM01) { | |||
| 895 | if ((x_is_nan || y_is_nan) && n != NE288) | |||
| 896 | return(False); | |||
| 897 | j = x->fval - y->fval; | |||
| 898 | i = j<0? -1: (j>0? 1: 0); | |||
| 899 | } else { | |||
| 900 | i = strcmp(getsval(x), getsval(y)); | |||
| 901 | } | |||
| 902 | tempfree(x)do { if (((x)->csub == 4)) tfree(x); } while ( 0); | |||
| 903 | tempfree(y)do { if (((y)->csub == 4)) tfree(y); } while ( 0); | |||
| 904 | switch (n) { | |||
| 905 | case LT287: if (i<0) return(True); | |||
| 906 | else return(False); | |||
| 907 | case LE286: if (i<=0) return(True); | |||
| 908 | else return(False); | |||
| 909 | case NE288: if (x_is_nan && y_is_nan) return(True); | |||
| 910 | else if (i!=0) return(True); | |||
| 911 | else return(False); | |||
| 912 | case EQ283: if (i == 0) return(True); | |||
| 913 | else return(False); | |||
| 914 | case GE284: if (i>=0) return(True); | |||
| 915 | else return(False); | |||
| 916 | case GT285: if (i>0) return(True); | |||
| 917 | else return(False); | |||
| 918 | default: /* can't happen */ | |||
| 919 | FATAL("unknown relational operator %d", n); | |||
| 920 | } | |||
| 921 | return 0; /*NOTREACHED*/ | |||
| 922 | } | |||
| 923 | ||||
| 924 | void tfree(Cell *a) /* free a tempcell */ | |||
| 925 | { | |||
| 926 | if (freeable(a)( ((a)->tval & (02|04)) == 02 )) { | |||
| 927 | DPRINTF("freeing %s %s %o\n", NN(a->nval), NN(a->sval), a->tval)if (dbg) printf("freeing %s %s %o\n", ((a->nval) ? (a-> nval) : "(null)"), ((a->sval) ? (a->sval) : "(null)"), a ->tval); | |||
| 928 | xfree(a->sval){ free((void *)(intptr_t)(a->sval)); (a->sval) = ((void *)0); }; | |||
| 929 | } | |||
| 930 | if (a == tmps) | |||
| 931 | FATAL("tempcell list is curdled"); | |||
| 932 | a->cnext = tmps; | |||
| 933 | tmps = a; | |||
| 934 | } | |||
| 935 | ||||
| 936 | Cell *gettemp(void) /* get a tempcell */ | |||
| 937 | { int i; | |||
| 938 | Cell *x; | |||
| 939 | ||||
| 940 | if (!tmps) { | |||
| 941 | tmps = (Cell *) calloc(100, sizeof(*tmps)); | |||
| 942 | if (!tmps) | |||
| 943 | FATAL("out of space for temporaries"); | |||
| 944 | for (i = 1; i < 100; i++) | |||
| 945 | tmps[i-1].cnext = &tmps[i]; | |||
| 946 | tmps[i-1].cnext = NULL((void *)0); | |||
| 947 | } | |||
| 948 | x = tmps; | |||
| 949 | tmps = x->cnext; | |||
| 950 | *x = tempcell; | |||
| 951 | return(x); | |||
| 952 | } | |||
| 953 | ||||
| 954 | Cell *indirect(Node **a, int n) /* $( a[0] ) */ | |||
| 955 | { | |||
| 956 | Awkfloat val; | |||
| 957 | Cell *x; | |||
| 958 | int m; | |||
| 959 | char *s; | |||
| 960 | ||||
| 961 | x = execute(a[0]); | |||
| 962 | val = getfval(x); /* freebsd: defend against super large field numbers */ | |||
| 963 | if ((Awkfloat)INT_MAX0x7fffffff < val) | |||
| 964 | FATAL("trying to access out of range field %s", x->nval); | |||
| 965 | m = (int) val; | |||
| 966 | if (m == 0 && !is_number(s = getsval(x), NULL)is_valid_number(s = getsval(x), 0, ((void *)0), ((void *)0))) /* suspicion! */ | |||
| 967 | FATAL("illegal field $(%s), name \"%s\"", s, x->nval); | |||
| 968 | /* BUG: can x->nval ever be null??? */ | |||
| 969 | tempfree(x)do { if (((x)->csub == 4)) tfree(x); } while ( 0); | |||
| 970 | x = fieldadr(m); | |||
| 971 | x->ctype = OCELL1; /* BUG? why are these needed? */ | |||
| 972 | x->csub = CFLD1; | |||
| 973 | return(x); | |||
| 974 | } | |||
| 975 | ||||
| 976 | Cell *substr(Node **a, int nnn) /* substr(a[0], a[1], a[2]) */ | |||
| 977 | { | |||
| 978 | int k, m, n; | |||
| 979 | int mb, nb; | |||
| 980 | char *s; | |||
| 981 | int temp; | |||
| 982 | Cell *x, *y, *z = NULL((void *)0); | |||
| 983 | ||||
| 984 | x = execute(a[0]); | |||
| 985 | y = execute(a[1]); | |||
| 986 | if (a[2] != NULL((void *)0)) | |||
| 987 | z = execute(a[2]); | |||
| 988 | s = getsval(x); | |||
| 989 | k = u8_strlen(s) + 1; | |||
| 990 | if (k <= 1) { | |||
| 991 | tempfree(x)do { if (((x)->csub == 4)) tfree(x); } while ( 0); | |||
| 992 | tempfree(y)do { if (((y)->csub == 4)) tfree(y); } while ( 0); | |||
| 993 | if (a[2] != NULL((void *)0)) { | |||
| 994 | tempfree(z)do { if (((z)->csub == 4)) tfree(z); } while ( 0); | |||
| 995 | } | |||
| 996 | x = gettemp(); | |||
| 997 | setsval(x, ""); | |||
| 998 | return(x); | |||
| 999 | } | |||
| 1000 | m = (int) getfval(y); | |||
| 1001 | if (m <= 0) | |||
| 1002 | m = 1; | |||
| 1003 | else if (m > k) | |||
| 1004 | m = k; | |||
| 1005 | tempfree(y)do { if (((y)->csub == 4)) tfree(y); } while ( 0); | |||
| 1006 | if (a[2] != NULL((void *)0)) { | |||
| 1007 | n = (int) getfval(z); | |||
| 1008 | tempfree(z)do { if (((z)->csub == 4)) tfree(z); } while ( 0); | |||
| 1009 | } else | |||
| 1010 | n = k - 1; | |||
| 1011 | if (n < 0) | |||
| 1012 | n = 0; | |||
| 1013 | else if (n > k - m) | |||
| 1014 | n = k - m; | |||
| 1015 | /* m is start, n is length from there */ | |||
| 1016 | DPRINTF("substr: m=%d, n=%d, s=%s\n", m, n, s)if (dbg) printf("substr: m=%d, n=%d, s=%s\n", m, n, s); | |||
| 1017 | y = gettemp(); | |||
| 1018 | mb = u8_char2byte(s, m-1); /* byte offset of start char in s */ | |||
| 1019 | nb = u8_char2byte(s, m-1+n); /* byte offset of end+1 char in s */ | |||
| 1020 | ||||
| 1021 | temp = s[nb]; /* with thanks to John Linderman */ | |||
| 1022 | s[nb] = '\0'; | |||
| 1023 | setsval(y, s + mb); | |||
| 1024 | s[nb] = temp; | |||
| 1025 | tempfree(x)do { if (((x)->csub == 4)) tfree(x); } while ( 0); | |||
| 1026 | return(y); | |||
| 1027 | } | |||
| 1028 | ||||
| 1029 | Cell *sindex(Node **a, int nnn) /* index(a[0], a[1]) */ | |||
| 1030 | { | |||
| 1031 | Cell *x, *y, *z; | |||
| 1032 | char *s1, *s2, *p1, *p2, *q; | |||
| 1033 | Awkfloat v = 0.0; | |||
| 1034 | ||||
| 1035 | x = execute(a[0]); | |||
| 1036 | s1 = getsval(x); | |||
| 1037 | y = execute(a[1]); | |||
| 1038 | s2 = getsval(y); | |||
| 1039 | ||||
| 1040 | z = gettemp(); | |||
| 1041 | for (p1 = s1; *p1 != '\0'; p1++) { | |||
| 1042 | for (q = p1, p2 = s2; *p2 != '\0' && *q == *p2; q++, p2++) | |||
| 1043 | continue; | |||
| 1044 | if (*p2 == '\0') { | |||
| 1045 | /* v = (Awkfloat) (p1 - s1 + 1); origin 1 */ | |||
| 1046 | ||||
| 1047 | /* should be a function: used in match() as well */ | |||
| 1048 | int i, len; | |||
| 1049 | v = 0; | |||
| 1050 | for (i = 0; i < p1-s1+1; i += len) { | |||
| 1051 | len = u8_nextlen(s1+i); | |||
| 1052 | v++; | |||
| 1053 | } | |||
| 1054 | break; | |||
| 1055 | } | |||
| 1056 | } | |||
| 1057 | tempfree(x)do { if (((x)->csub == 4)) tfree(x); } while ( 0); | |||
| 1058 | tempfree(y)do { if (((y)->csub == 4)) tfree(y); } while ( 0); | |||
| 1059 | setfval(z, v); | |||
| 1060 | return(z); | |||
| 1061 | } | |||
| 1062 | ||||
| 1063 | int has_utf8(char *s) /* return 1 if s contains any utf-8 (2 bytes or more) character */ | |||
| 1064 | { | |||
| 1065 | int n; | |||
| 1066 | ||||
| 1067 | for (n = 0; *s != 0; s += n) { | |||
| 1068 | n = u8_nextlen(s); | |||
| 1069 | if (n > 1) | |||
| 1070 | return 1; | |||
| 1071 | } | |||
| 1072 | return 0; | |||
| 1073 | } | |||
| 1074 | ||||
| 1075 | #define MAXNUMSIZE50 50 | |||
| 1076 | ||||
| 1077 | int format(char **pbuf, int *pbufsize, const char *s, Node *a) /* printf-like conversions */ | |||
| 1078 | { | |||
| 1079 | char *fmt; | |||
| 1080 | char *p, *t; | |||
| 1081 | const char *os; | |||
| 1082 | Cell *x; | |||
| 1083 | int flag = 0, n; | |||
| 1084 | int fmtwd; /* format width */ | |||
| 1085 | int fmtsz = recsize; | |||
| 1086 | char *buf = *pbuf; | |||
| 1087 | int bufsize = *pbufsize; | |||
| 1088 | #define FMTSZ(a)(fmtsz - ((a) - fmt)) (fmtsz - ((a) - fmt)) | |||
| 1089 | #define BUFSZ(a)(bufsize - ((a) - buf)) (bufsize - ((a) - buf)) | |||
| 1090 | ||||
| 1091 | static bool_Bool first = true1; | |||
| 1092 | static bool_Bool have_a_format = false0; | |||
| 1093 | ||||
| 1094 | if (first) { | |||
| 1095 | char xbuf[100]; | |||
| 1096 | ||||
| 1097 | snprintf(xbuf, sizeof(xbuf), "%a", 42.0); | |||
| 1098 | have_a_format = (strcmp(xbuf, "0x1.5p+5") == 0); | |||
| 1099 | first = false0; | |||
| 1100 | } | |||
| 1101 | ||||
| 1102 | os = s; | |||
| 1103 | p = buf; | |||
| 1104 | if ((fmt = (char *) malloc(fmtsz)) == NULL((void *)0)) | |||
| 1105 | FATAL("out of memory in format()"); | |||
| 1106 | while (*s) { | |||
| 1107 | adjbuf(&buf, &bufsize, MAXNUMSIZE50+1+p-buf, recsize, &p, "format1"); | |||
| 1108 | if (*s != '%') { | |||
| 1109 | *p++ = *s++; | |||
| 1110 | continue; | |||
| 1111 | } | |||
| 1112 | if (*(s+1) == '%') { | |||
| 1113 | *p++ = '%'; | |||
| 1114 | s += 2; | |||
| 1115 | continue; | |||
| 1116 | } | |||
| 1117 | fmtwd = atoi(s+1); | |||
| 1118 | if (fmtwd < 0) | |||
| 1119 | fmtwd = -fmtwd; | |||
| 1120 | adjbuf(&buf, &bufsize, fmtwd+1+p-buf, recsize, &p, "format2"); | |||
| 1121 | for (t = fmt; (*t++ = *s) != '\0'; s++) { | |||
| 1122 | if (!adjbuf(&fmt, &fmtsz, MAXNUMSIZE50+1+t-fmt, recsize, &t, "format3")) | |||
| 1123 | FATAL("format item %.30s... ran format() out of memory", os); | |||
| 1124 | /* Ignore size specifiers */ | |||
| 1125 | if (strchr("hjLlqtz", *s) != NULL((void *)0)) { /* the ansi panoply */ | |||
| 1126 | t--; | |||
| 1127 | continue; | |||
| 1128 | } | |||
| 1129 | if (isalpha((uschar)*s)) | |||
| 1130 | break; | |||
| 1131 | if (*s == '$') { | |||
| 1132 | FATAL("'$' not permitted in awk formats"); | |||
| 1133 | } | |||
| 1134 | if (*s == '*') { | |||
| 1135 | if (a == NULL((void *)0)) { | |||
| 1136 | FATAL("not enough args in printf(%s)", os); | |||
| 1137 | } | |||
| 1138 | x = execute(a); | |||
| 1139 | a = a->nnext; | |||
| 1140 | snprintf(t - 1, FMTSZ(t - 1)(fmtsz - ((t - 1) - fmt)), | |||
| 1141 | "%d", fmtwd=(int) getfval(x)); | |||
| 1142 | if (fmtwd < 0) | |||
| 1143 | fmtwd = -fmtwd; | |||
| 1144 | adjbuf(&buf, &bufsize, fmtwd+1+p-buf, recsize, &p, "format"); | |||
| 1145 | t = fmt + strlen(fmt); | |||
| 1146 | tempfree(x)do { if (((x)->csub == 4)) tfree(x); } while ( 0); | |||
| 1147 | } | |||
| 1148 | } | |||
| 1149 | *t = '\0'; | |||
| 1150 | if (fmtwd < 0) | |||
| 1151 | fmtwd = -fmtwd; | |||
| 1152 | adjbuf(&buf, &bufsize, fmtwd+1+p-buf, recsize, &p, "format4"); | |||
| 1153 | switch (*s) { | |||
| 1154 | case 'a': case 'A': | |||
| 1155 | if (have_a_format) | |||
| 1156 | flag = *s; | |||
| 1157 | else | |||
| 1158 | flag = 'f'; | |||
| 1159 | break; | |||
| 1160 | case 'f': case 'e': case 'g': case 'E': case 'G': | |||
| 1161 | flag = 'f'; | |||
| 1162 | break; | |||
| 1163 | case 'd': case 'i': case 'o': case 'x': case 'X': case 'u': | |||
| 1164 | flag = (*s == 'd' || *s == 'i') ? 'd' : 'u'; | |||
| 1165 | *(t-1) = 'j'; | |||
| 1166 | *t = *s; | |||
| 1167 | *++t = '\0'; | |||
| 1168 | break; | |||
| 1169 | case 's': | |||
| 1170 | flag = 's'; | |||
| 1171 | break; | |||
| 1172 | case 'c': | |||
| 1173 | flag = 'c'; | |||
| 1174 | break; | |||
| 1175 | default: | |||
| 1176 | WARNING("weird printf conversion %s", fmt); | |||
| 1177 | flag = '?'; | |||
| 1178 | break; | |||
| 1179 | } | |||
| 1180 | if (a == NULL((void *)0)) | |||
| 1181 | FATAL("not enough args in printf(%s)", os); | |||
| 1182 | x = execute(a); | |||
| 1183 | a = a->nnext; | |||
| 1184 | n = MAXNUMSIZE50; | |||
| 1185 | if (fmtwd > n) | |||
| 1186 | n = fmtwd; | |||
| 1187 | adjbuf(&buf, &bufsize, 1+n+p-buf, recsize, &p, "format5"); | |||
| 1188 | switch (flag) { | |||
| 1189 | case '?': | |||
| 1190 | snprintf(p, BUFSZ(p)(bufsize - ((p) - buf)), "%s", fmt); /* unknown, so dump it too */ | |||
| 1191 | t = getsval(x); | |||
| 1192 | n = strlen(t); | |||
| 1193 | if (fmtwd > n) | |||
| 1194 | n = fmtwd; | |||
| 1195 | adjbuf(&buf, &bufsize, 1+strlen(p)+n+p-buf, recsize, &p, "format6"); | |||
| 1196 | p += strlen(p); | |||
| 1197 | snprintf(p, BUFSZ(p)(bufsize - ((p) - buf)), "%s", t); | |||
| 1198 | break; | |||
| 1199 | case 'a': | |||
| 1200 | case 'A': | |||
| 1201 | case 'f': snprintf(p, BUFSZ(p)(bufsize - ((p) - buf)), fmt, getfval(x)); break; | |||
| 1202 | case 'd': snprintf(p, BUFSZ(p)(bufsize - ((p) - buf)), fmt, (intmax_t) getfval(x)); break; | |||
| 1203 | case 'u': snprintf(p, BUFSZ(p)(bufsize - ((p) - buf)), fmt, (uintmax_t) getfval(x)); break; | |||
| 1204 | ||||
| 1205 | case 's': { | |||
| 1206 | t = getsval(x); | |||
| 1207 | n = strlen(t); | |||
| 1208 | /* if simple format or no utf-8 in the string, sprintf works */ | |||
| 1209 | if (!has_utf8(t) || strcmp(fmt,"%s") == 0) { | |||
| 1210 | if (fmtwd > n) | |||
| 1211 | n = fmtwd; | |||
| 1212 | if (!adjbuf(&buf, &bufsize, 1+n+p-buf, recsize, &p, "format7")) | |||
| 1213 | FATAL("huge string/format (%d chars) in printf %.30s..." \ | |||
| 1214 | " ran format() out of memory", n, t); | |||
| 1215 | snprintf(p, BUFSZ(p)(bufsize - ((p) - buf)), fmt, t); | |||
| 1216 | break; | |||
| 1217 | } | |||
| 1218 | ||||
| 1219 | /* get here if string has utf-8 chars and fmt is not plain %s */ | |||
| 1220 | /* "%-w.ps", where -, w and .p are all optional */ | |||
| 1221 | /* '0' before the w is a flag character */ | |||
| 1222 | /* fmt points at % */ | |||
| 1223 | int ljust = 0, wid = 0, prec = n, pad = 0; | |||
| 1224 | char *f = fmt+1; | |||
| 1225 | if (f[0] == '-') { | |||
| 1226 | ljust = 1; | |||
| 1227 | f++; | |||
| 1228 | } | |||
| 1229 | // flags '0' and '+' are recognized but skipped | |||
| 1230 | if (f[0] == '0') { | |||
| 1231 | f++; | |||
| 1232 | if (f[0] == '+') | |||
| 1233 | f++; | |||
| 1234 | } | |||
| 1235 | if (f[0] == '+') { | |||
| 1236 | f++; | |||
| 1237 | if (f[0] == '0') | |||
| 1238 | f++; | |||
| 1239 | } | |||
| 1240 | if (isdigit((uschar)f[0])) { /* there is a wid */ | |||
| 1241 | wid = strtol(f, &f, 10); | |||
| 1242 | } | |||
| 1243 | if (f[0] == '.') { /* there is a .prec */ | |||
| 1244 | prec = strtol(++f, &f, 10); | |||
| 1245 | } | |||
| 1246 | if (prec > u8_strlen(t)) | |||
| 1247 | prec = u8_strlen(t); | |||
| 1248 | pad = wid>prec ? wid - prec : 0; // has to be >= 0 | |||
| 1249 | int i, k, n; | |||
| 1250 | ||||
| 1251 | if (ljust) { // print prec chars from t, then pad blanks | |||
| 1252 | n = u8_char2byte(t, prec); | |||
| 1253 | for (k = 0; k < n; k++) { | |||
| 1254 | //putchar(t[k]); | |||
| 1255 | *p++ = t[k]; | |||
| 1256 | } | |||
| 1257 | for (i = 0; i < pad; i++) { | |||
| 1258 | //printf(" "); | |||
| 1259 | *p++ = ' '; | |||
| 1260 | } | |||
| 1261 | } else { // print pad blanks, then prec chars from t | |||
| 1262 | for (i = 0; i < pad; i++) { | |||
| 1263 | //printf(" "); | |||
| 1264 | *p++ = ' '; | |||
| 1265 | } | |||
| 1266 | n = u8_char2byte(t, prec); | |||
| 1267 | for (k = 0; k < n; k++) { | |||
| 1268 | //putchar(t[k]); | |||
| 1269 | *p++ = t[k]; | |||
| 1270 | } | |||
| 1271 | } | |||
| 1272 | *p = 0; | |||
| 1273 | break; | |||
| 1274 | } | |||
| 1275 | ||||
| 1276 | case 'c': { | |||
| 1277 | /* | |||
| 1278 | * If a numeric value is given, awk should just turn | |||
| 1279 | * it into a character and print it: | |||
| 1280 | * BEGIN { printf("%c\n", 65) } | |||
| 1281 | * prints "A". | |||
| 1282 | * | |||
| 1283 | * But what if the numeric value is > 128 and | |||
| 1284 | * represents a valid Unicode code point?!? We do | |||
| 1285 | * our best to convert it back into UTF-8. If we | |||
| 1286 | * can't, we output the encoding of the Unicode | |||
| 1287 | * "invalid character", 0xFFFD. | |||
| 1288 | */ | |||
| 1289 | if (isnum(x)((x)->tval & 01)) { | |||
| 1290 | int charval = (int) getfval(x); | |||
| 1291 | ||||
| 1292 | if (charval != 0) { | |||
| 1293 | if (charval < 128 || awk_mb_cur_max == 1) | |||
| 1294 | snprintf(p, BUFSZ(p)(bufsize - ((p) - buf)), fmt, charval); | |||
| 1295 | else { | |||
| 1296 | // possible unicode character | |||
| 1297 | size_t count; | |||
| 1298 | char *bs = wide_char_to_byte_str(charval, &count); | |||
| 1299 | ||||
| 1300 | if (bs == NULL((void *)0)) { // invalid character | |||
| 1301 | // use unicode invalid character, 0xFFFD | |||
| 1302 | bs = "\357\277\275"; | |||
| 1303 | count = 3; | |||
| 1304 | } | |||
| 1305 | t = bs; | |||
| 1306 | n = count; | |||
| 1307 | goto format_percent_c; | |||
| 1308 | } | |||
| 1309 | } else { | |||
| 1310 | *p++ = '\0'; /* explicit null byte */ | |||
| 1311 | *p = '\0'; /* next output will start here */ | |||
| 1312 | } | |||
| 1313 | break; | |||
| 1314 | } | |||
| 1315 | t = getsval(x); | |||
| 1316 | n = u8_nextlen(t); | |||
| 1317 | format_percent_c: | |||
| 1318 | if (n < 2) { /* not utf8 */ | |||
| 1319 | snprintf(p, BUFSZ(p)(bufsize - ((p) - buf)), fmt, getsval(x)[0]); | |||
| 1320 | break; | |||
| 1321 | } | |||
| 1322 | ||||
| 1323 | // utf8 character, almost same song and dance as for %s | |||
| 1324 | int ljust = 0, wid = 0, prec = n, pad = 0; | |||
| 1325 | char *f = fmt+1; | |||
| 1326 | if (f[0] == '-') { | |||
| 1327 | ljust = 1; | |||
| 1328 | f++; | |||
| 1329 | } | |||
| 1330 | // flags '0' and '+' are recognized but skipped | |||
| 1331 | if (f[0] == '0') { | |||
| 1332 | f++; | |||
| 1333 | if (f[0] == '+') | |||
| 1334 | f++; | |||
| 1335 | } | |||
| 1336 | if (f[0] == '+') { | |||
| 1337 | f++; | |||
| 1338 | if (f[0] == '0') | |||
| 1339 | f++; | |||
| 1340 | } | |||
| 1341 | if (isdigit((uschar)f[0])) { /* there is a wid */ | |||
| 1342 | wid = strtol(f, &f, 10); | |||
| 1343 | } | |||
| 1344 | if (f[0] == '.') { /* there is a .prec */ | |||
| 1345 | prec = strtol(++f, &f, 10); | |||
| 1346 | } | |||
| 1347 | if (prec > 1) // %c --> only one character | |||
| 1348 | prec = 1; | |||
| 1349 | pad = wid>prec ? wid - prec : 0; // has to be >= 0 | |||
| 1350 | int i; | |||
| 1351 | ||||
| 1352 | if (ljust) { // print one char from t, then pad blanks | |||
| 1353 | for (i = 0; i < n; i++) | |||
| 1354 | *p++ = t[i]; | |||
| 1355 | for (i = 0; i < pad; i++) { | |||
| 1356 | //printf(" "); | |||
| 1357 | *p++ = ' '; | |||
| 1358 | } | |||
| 1359 | } else { // print pad blanks, then prec chars from t | |||
| 1360 | for (i = 0; i < pad; i++) { | |||
| 1361 | //printf(" "); | |||
| 1362 | *p++ = ' '; | |||
| 1363 | } | |||
| 1364 | for (i = 0; i < n; i++) | |||
| 1365 | *p++ = t[i]; | |||
| 1366 | } | |||
| 1367 | *p = 0; | |||
| 1368 | break; | |||
| 1369 | } | |||
| 1370 | default: | |||
| 1371 | FATAL("can't happen: bad conversion %c in format()", flag); | |||
| 1372 | } | |||
| 1373 | ||||
| 1374 | tempfree(x)do { if (((x)->csub == 4)) tfree(x); } while ( 0); | |||
| 1375 | p += strlen(p); | |||
| 1376 | s++; | |||
| 1377 | } | |||
| 1378 | *p = '\0'; | |||
| 1379 | free(fmt); | |||
| 1380 | for ( ; a; a = a->nnext) { /* evaluate any remaining args */ | |||
| 1381 | x = execute(a); | |||
| 1382 | tempfree(x)do { if (((x)->csub == 4)) tfree(x); } while ( 0); | |||
| 1383 | } | |||
| 1384 | *pbuf = buf; | |||
| 1385 | *pbufsize = bufsize; | |||
| 1386 | return p - buf; | |||
| 1387 | } | |||
| 1388 | ||||
| 1389 | Cell *awksprintf(Node **a, int n) /* sprintf(a[0]) */ | |||
| 1390 | { | |||
| 1391 | Cell *x; | |||
| 1392 | Node *y; | |||
| 1393 | char *buf; | |||
| 1394 | int bufsz=3*recsize; | |||
| 1395 | ||||
| 1396 | if ((buf = (char *) malloc(bufsz)) == NULL((void *)0)) | |||
| 1397 | FATAL("out of memory in awksprintf"); | |||
| 1398 | y = a[0]->nnext; | |||
| 1399 | x = execute(a[0]); | |||
| 1400 | if (format(&buf, &bufsz, getsval(x), y) == -1) | |||
| 1401 | FATAL("sprintf string %.30s... too long. can't happen.", buf); | |||
| 1402 | tempfree(x)do { if (((x)->csub == 4)) tfree(x); } while ( 0); | |||
| 1403 | x = gettemp(); | |||
| 1404 | x->sval = buf; | |||
| 1405 | x->tval = STR02; | |||
| 1406 | return(x); | |||
| 1407 | } | |||
| 1408 | ||||
| 1409 | Cell *awkprintf(Node **a, int n) /* printf */ | |||
| 1410 | { /* a[0] is list of args, starting with format string */ | |||
| 1411 | /* a[1] is redirection operator, a[2] is redirection file */ | |||
| 1412 | FILE *fp; | |||
| 1413 | Cell *x; | |||
| 1414 | Node *y; | |||
| 1415 | char *buf; | |||
| 1416 | int len; | |||
| 1417 | int bufsz=3*recsize; | |||
| 1418 | ||||
| 1419 | if ((buf = (char *) malloc(bufsz)) == NULL((void *)0)) | |||
| 1420 | FATAL("out of memory in awkprintf"); | |||
| 1421 | y = a[0]->nnext; | |||
| 1422 | x = execute(a[0]); | |||
| 1423 | if ((len = format(&buf, &bufsz, getsval(x), y)) == -1) | |||
| 1424 | FATAL("printf string %.30s... too long. can't happen.", buf); | |||
| 1425 | tempfree(x)do { if (((x)->csub == 4)) tfree(x); } while ( 0); | |||
| 1426 | if (a[1] == NULL((void *)0)) { | |||
| 1427 | /* fputs(buf, stdout); */ | |||
| 1428 | fwrite(buf, len, 1, stdout(&__sF[1])); | |||
| 1429 | if (ferror(stdout)(!__isthreaded ? ((((&__sF[1]))->_flags & 0x0040) != 0) : (ferror)((&__sF[1])))) | |||
| 1430 | FATAL("write error on stdout"); | |||
| 1431 | } else { | |||
| 1432 | fp = redirect(ptoi(a[1]), a[2]); | |||
| 1433 | /* fputs(buf, fp); */ | |||
| 1434 | fwrite(buf, len, 1, fp); | |||
| 1435 | fflush(fp); | |||
| 1436 | if (ferror(fp)(!__isthreaded ? (((fp)->_flags & 0x0040) != 0) : (ferror )(fp))) | |||
| 1437 | FATAL("write error on %s", filename(fp)); | |||
| 1438 | } | |||
| 1439 | free(buf); | |||
| 1440 | return(True); | |||
| 1441 | } | |||
| 1442 | ||||
| 1443 | Cell *arith(Node **a, int n) /* a[0] + a[1], etc. also -a[0] */ | |||
| 1444 | { | |||
| 1445 | Awkfloat i, j = 0; | |||
| 1446 | double v; | |||
| 1447 | Cell *x, *y, *z; | |||
| 1448 | ||||
| 1449 | x = execute(a[0]); | |||
| 1450 | i = getfval(x); | |||
| 1451 | tempfree(x)do { if (((x)->csub == 4)) tfree(x); } while ( 0); | |||
| 1452 | if (n != UMINUS346 && n != UPLUS347) { | |||
| 1453 | y = execute(a[1]); | |||
| 1454 | j = getfval(y); | |||
| 1455 | tempfree(y)do { if (((y)->csub == 4)) tfree(y); } while ( 0); | |||
| 1456 | } | |||
| 1457 | z = gettemp(); | |||
| 1458 | switch (n) { | |||
| 1459 | case ADD309: | |||
| 1460 | i += j; | |||
| 1461 | break; | |||
| 1462 | case MINUS310: | |||
| 1463 | i -= j; | |||
| 1464 | break; | |||
| 1465 | case MULT311: | |||
| 1466 | i *= j; | |||
| 1467 | break; | |||
| 1468 | case DIVIDE312: | |||
| 1469 | if (j == 0) | |||
| 1470 | FATAL("division by zero"); | |||
| 1471 | i /= j; | |||
| 1472 | break; | |||
| 1473 | case MOD313: | |||
| 1474 | if (j == 0) | |||
| 1475 | FATAL("division by zero in mod"); | |||
| 1476 | modf(i/j, &v); | |||
| 1477 | i = i - j * v; | |||
| 1478 | break; | |||
| 1479 | case UMINUS346: | |||
| 1480 | i = -i; | |||
| 1481 | break; | |||
| 1482 | case UPLUS347: /* handled by getfval(), above */ | |||
| 1483 | break; | |||
| 1484 | case POWER348: | |||
| 1485 | if (j >= 0 && modf(j, &v) == 0.0) /* pos integer exponent */ | |||
| 1486 | i = ipow(i, (int) j); | |||
| 1487 | else { | |||
| 1488 | errno(*__errno()) = 0; | |||
| 1489 | i = errcheck(pow(i, j), "pow"); | |||
| 1490 | } | |||
| 1491 | break; | |||
| 1492 | default: /* can't happen */ | |||
| 1493 | FATAL("illegal arithmetic operator %d", n); | |||
| 1494 | } | |||
| 1495 | setfval(z, i); | |||
| 1496 | return(z); | |||
| 1497 | } | |||
| 1498 | ||||
| 1499 | double ipow(double x, int n) /* x**n. ought to be done by pow, but isn't always */ | |||
| 1500 | { | |||
| 1501 | double v; | |||
| 1502 | ||||
| 1503 | if (n <= 0) | |||
| 1504 | return 1; | |||
| 1505 | v = ipow(x, n/2); | |||
| 1506 | if (n % 2 == 0) | |||
| 1507 | return v * v; | |||
| 1508 | else | |||
| 1509 | return x * v * v; | |||
| 1510 | } | |||
| 1511 | ||||
| 1512 | Cell *incrdecr(Node **a, int n) /* a[0]++, etc. */ | |||
| 1513 | { | |||
| 1514 | Cell *x, *z; | |||
| 1515 | int k; | |||
| 1516 | Awkfloat xf; | |||
| 1517 | ||||
| 1518 | x = execute(a[0]); | |||
| 1519 | xf = getfval(x); | |||
| 1520 | k = (n == PREINCR329 || n == POSTINCR328) ? 1 : -1; | |||
| 1521 | if (n == PREINCR329 || n == PREDECR331) { | |||
| 1522 | setfval(x, xf + k); | |||
| 1523 | return(x); | |||
| 1524 | } | |||
| 1525 | z = gettemp(); | |||
| 1526 | setfval(z, xf); | |||
| 1527 | setfval(x, xf + k); | |||
| 1528 | tempfree(x)do { if (((x)->csub == 4)) tfree(x); } while ( 0); | |||
| 1529 | return(z); | |||
| 1530 | } | |||
| 1531 | ||||
| 1532 | Cell *assign(Node **a, int n) /* a[0] = a[1], a[0] += a[1], etc. */ | |||
| 1533 | { /* this is subtle; don't muck with it. */ | |||
| 1534 | Cell *x, *y; | |||
| 1535 | Awkfloat xf, yf; | |||
| 1536 | double v; | |||
| 1537 | ||||
| 1538 | y = execute(a[1]); | |||
| 1539 | x = execute(a[0]); | |||
| 1540 | if (n == ASSIGN314) { /* ordinary assignment */ | |||
| 1541 | if (x == y && !(x->tval & (FLD0100|REC0200)) && x != nfloc) | |||
| 1542 | ; /* self-assignment: leave alone unless it's a field or NF */ | |||
| 1543 | else if ((y->tval & (STR02|NUM01)) == (STR02|NUM01)) { | |||
| 1544 | yf = getfval(y); | |||
| 1545 | setsval(x, getsval(y)); | |||
| 1546 | x->fval = yf; | |||
| 1547 | x->tval |= NUM01; | |||
| 1548 | } | |||
| 1549 | else if (isstr(y)((y)->tval & 02)) | |||
| 1550 | setsval(x, getsval(y)); | |||
| 1551 | else if (isnum(y)((y)->tval & 01)) | |||
| 1552 | setfval(x, getfval(y)); | |||
| 1553 | else | |||
| 1554 | funnyvar(y, "read value of"); | |||
| 1555 | tempfree(y)do { if (((y)->csub == 4)) tfree(y); } while ( 0); | |||
| 1556 | return(x); | |||
| 1557 | } | |||
| 1558 | xf = getfval(x); | |||
| 1559 | yf = getfval(y); | |||
| 1560 | switch (n) { | |||
| 1561 | case ADDEQ316: | |||
| 1562 | xf += yf; | |||
| 1563 | break; | |||
| 1564 | case SUBEQ317: | |||
| 1565 | xf -= yf; | |||
| 1566 | break; | |||
| 1567 | case MULTEQ318: | |||
| 1568 | xf *= yf; | |||
| 1569 | break; | |||
| 1570 | case DIVEQ319: | |||
| 1571 | if (yf == 0) | |||
| 1572 | FATAL("division by zero in /="); | |||
| 1573 | xf /= yf; | |||
| 1574 | break; | |||
| 1575 | case MODEQ320: | |||
| 1576 | if (yf == 0) | |||
| 1577 | FATAL("division by zero in %%="); | |||
| 1578 | modf(xf/yf, &v); | |||
| 1579 | xf = xf - yf * v; | |||
| 1580 | break; | |||
| 1581 | case POWEQ321: | |||
| 1582 | if (yf >= 0 && modf(yf, &v) == 0.0) /* pos integer exponent */ | |||
| 1583 | xf = ipow(xf, (int) yf); | |||
| 1584 | else { | |||
| 1585 | errno(*__errno()) = 0; | |||
| 1586 | xf = errcheck(pow(xf, yf), "pow"); | |||
| 1587 | } | |||
| 1588 | break; | |||
| 1589 | default: | |||
| 1590 | FATAL("illegal assignment operator %d", n); | |||
| 1591 | break; | |||
| 1592 | } | |||
| 1593 | tempfree(y)do { if (((y)->csub == 4)) tfree(y); } while ( 0); | |||
| 1594 | setfval(x, xf); | |||
| 1595 | return(x); | |||
| 1596 | } | |||
| 1597 | ||||
| 1598 | Cell *cat(Node **a, int q) /* a[0] cat a[1] */ | |||
| 1599 | { | |||
| 1600 | Cell *x, *y, *z; | |||
| 1601 | int n1, n2; | |||
| 1602 | char *s = NULL((void *)0); | |||
| 1603 | int ssz = 0; | |||
| 1604 | ||||
| 1605 | x = execute(a[0]); | |||
| 1606 | n1 = strlen(getsval(x)); | |||
| 1607 | adjbuf(&s, &ssz, n1 + 1, recsize, 0, "cat1"); | |||
| 1608 | memcpy(s, x->sval, n1); | |||
| 1609 | ||||
| 1610 | tempfree(x)do { if (((x)->csub == 4)) tfree(x); } while ( 0); | |||
| 1611 | ||||
| 1612 | y = execute(a[1]); | |||
| 1613 | n2 = strlen(getsval(y)); | |||
| 1614 | adjbuf(&s, &ssz, n1 + n2 + 1, recsize, 0, "cat2"); | |||
| 1615 | memcpy(s + n1, y->sval, n2); | |||
| 1616 | s[n1 + n2] = '\0'; | |||
| 1617 | ||||
| 1618 | tempfree(y)do { if (((y)->csub == 4)) tfree(y); } while ( 0); | |||
| 1619 | ||||
| 1620 | z = gettemp(); | |||
| 1621 | z->sval = s; | |||
| 1622 | z->tval = STR02; | |||
| 1623 | ||||
| 1624 | return(z); | |||
| 1625 | } | |||
| 1626 | ||||
| 1627 | Cell *pastat(Node **a, int n) /* a[0] { a[1] } */ | |||
| 1628 | { | |||
| 1629 | Cell *x; | |||
| 1630 | ||||
| 1631 | if (a[0] == NULL((void *)0)) | |||
| 1632 | x = execute(a[1]); | |||
| 1633 | else { | |||
| 1634 | x = execute(a[0]); | |||
| 1635 | if (istrue(x)((x)->csub == 11)) { | |||
| 1636 | tempfree(x)do { if (((x)->csub == 4)) tfree(x); } while ( 0); | |||
| 1637 | x = execute(a[1]); | |||
| 1638 | } | |||
| 1639 | } | |||
| 1640 | return x; | |||
| 1641 | } | |||
| 1642 | ||||
| 1643 | Cell *dopa2(Node **a, int n) /* a[0], a[1] { a[2] } */ | |||
| 1644 | { | |||
| 1645 | Cell *x; | |||
| 1646 | int pair; | |||
| 1647 | ||||
| 1648 | pair = ptoi(a[3]); | |||
| 1649 | if (pairstack[pair] == 0) { | |||
| 1650 | x = execute(a[0]); | |||
| 1651 | if (istrue(x)((x)->csub == 11)) | |||
| 1652 | pairstack[pair] = 1; | |||
| 1653 | tempfree(x)do { if (((x)->csub == 4)) tfree(x); } while ( 0); | |||
| 1654 | } | |||
| 1655 | if (pairstack[pair] == 1) { | |||
| 1656 | x = execute(a[1]); | |||
| 1657 | if (istrue(x)((x)->csub == 11)) | |||
| 1658 | pairstack[pair] = 0; | |||
| 1659 | tempfree(x)do { if (((x)->csub == 4)) tfree(x); } while ( 0); | |||
| 1660 | x = execute(a[2]); | |||
| 1661 | return(x); | |||
| 1662 | } | |||
| 1663 | return(False); | |||
| 1664 | } | |||
| 1665 | ||||
| 1666 | Cell *split(Node **a, int nnn) /* split(a[0], a[1], a[2]); a[3] is type */ | |||
| 1667 | { | |||
| 1668 | Cell *x = NULL((void *)0), *y, *ap; | |||
| 1669 | const char *s, *origs, *t; | |||
| 1670 | const char *fs = NULL((void *)0); | |||
| 1671 | char *origfs = NULL((void *)0); | |||
| 1672 | int sep; | |||
| 1673 | char temp, num[50]; | |||
| 1674 | int j, n, tempstat, arg3type; | |||
| 1675 | double result; | |||
| 1676 | ||||
| 1677 | y = execute(a[0]); /* source string */ | |||
| 1678 | origs = s = strdup(getsval(y)); | |||
| 1679 | if (s == NULL((void *)0)) | |||
| 1680 | FATAL("out of space in split"); | |||
| 1681 | tempfree(y)do { if (((y)->csub == 4)) tfree(y); } while ( 0); | |||
| 1682 | arg3type = ptoi(a[3]); | |||
| 1683 | if (a[2] == NULL((void *)0)) { /* BUG: CSV should override implicit fs but not explicit */ | |||
| 1684 | fs = getsval(fsloc); | |||
| 1685 | } else if (arg3type == STRING337) { /* split(str,arr,"string") */ | |||
| 1686 | x = execute(a[2]); | |||
| 1687 | fs = origfs = strdup(getsval(x)); | |||
| 1688 | if (fs == NULL((void *)0)) | |||
| 1689 | FATAL("out of space in split"); | |||
| 1690 | tempfree(x)do { if (((x)->csub == 4)) tfree(x); } while ( 0); | |||
| 1691 | } else if (arg3type == REGEXPR338) { | |||
| 1692 | fs = "(regexpr)"; /* split(str,arr,/regexpr/) */ | |||
| 1693 | } else { | |||
| 1694 | FATAL("illegal type of split"); | |||
| 1695 | } | |||
| 1696 | sep = *fs; | |||
| 1697 | ap = execute(a[1]); /* array name */ | |||
| 1698 | /* BUG 7/26/22: this appears not to reset array: see C1/asplit */ | |||
| 1699 | freesymtab(ap); | |||
| 1700 | DPRINTF("split: s=|%s|, a=%s, sep=|%s|\n", s, NN(ap->nval), fs)if (dbg) printf("split: s=|%s|, a=%s, sep=|%s|\n", s, ((ap-> nval) ? (ap->nval) : "(null)"), fs); | |||
| 1701 | ap->tval &= ~STR02; | |||
| 1702 | ap->tval |= ARR020; | |||
| 1703 | ap->sval = (char *) makesymtab(NSYMTAB50); | |||
| 1704 | ||||
| 1705 | n = 0; | |||
| 1706 | if (arg3type == REGEXPR338 && strlen((char*)((fa*)a[2])->restr) == 0) { | |||
| 1707 | /* split(s, a, //); have to arrange that it looks like empty sep */ | |||
| 1708 | arg3type = 0; | |||
| 1709 | fs = ""; | |||
| 1710 | sep = 0; | |||
| 1711 | } | |||
| 1712 | if (*s != '\0' && (strlen(fs) > 1 || arg3type == REGEXPR338)) { /* reg expr */ | |||
| 1713 | fa *pfa; | |||
| 1714 | if (arg3type == REGEXPR338) { /* it's ready already */ | |||
| 1715 | pfa = (fa *) a[2]; | |||
| 1716 | } else { | |||
| 1717 | pfa = makedfa(fs, 1); | |||
| 1718 | } | |||
| 1719 | if (nematch(pfa,s)) { | |||
| 1720 | tempstat = pfa->initstat; | |||
| 1721 | pfa->initstat = 2; | |||
| 1722 | do { | |||
| 1723 | n++; | |||
| 1724 | snprintf(num, sizeof(num), "%d", n); | |||
| 1725 | temp = *patbeg; | |||
| 1726 | setptr(patbeg, '\0')(*(char *)(intptr_t)(patbeg)) = ('\0'); | |||
| 1727 | if (is_number(s, & result)is_valid_number(s, 0, ((void *)0), & result)) | |||
| 1728 | setsymtab(num, s, result, STR02|NUM01, (Array *) ap->sval); | |||
| 1729 | else | |||
| 1730 | setsymtab(num, s, 0.0, STR02, (Array *) ap->sval); | |||
| 1731 | setptr(patbeg, temp)(*(char *)(intptr_t)(patbeg)) = (temp); | |||
| 1732 | s = patbeg + patlen; | |||
| 1733 | if (*(patbeg+patlen-1) == '\0' || *s == '\0') { | |||
| 1734 | n++; | |||
| 1735 | snprintf(num, sizeof(num), "%d", n); | |||
| 1736 | setsymtab(num, "", 0.0, STR02, (Array *) ap->sval); | |||
| 1737 | pfa->initstat = tempstat; | |||
| 1738 | goto spdone; | |||
| 1739 | } | |||
| 1740 | } while (nematch(pfa,s)); | |||
| 1741 | pfa->initstat = tempstat; /* bwk: has to be here to reset */ | |||
| 1742 | /* cf gsub and refldbld */ | |||
| 1743 | } | |||
| 1744 | n++; | |||
| 1745 | snprintf(num, sizeof(num), "%d", n); | |||
| 1746 | if (is_number(s, & result)is_valid_number(s, 0, ((void *)0), & result)) | |||
| 1747 | setsymtab(num, s, result, STR02|NUM01, (Array *) ap->sval); | |||
| 1748 | else | |||
| 1749 | setsymtab(num, s, 0.0, STR02, (Array *) ap->sval); | |||
| 1750 | spdone: | |||
| 1751 | pfa = NULL((void *)0); | |||
| 1752 | ||||
| 1753 | } else if (a[2] == NULL((void *)0) && CSV) { /* CSV only if no explicit separator */ | |||
| 1754 | char *newt = (char *) malloc(strlen(s)); /* for building new string; reuse for each field */ | |||
| 1755 | for (;;) { | |||
| 1756 | char *fr = newt; | |||
| 1757 | n++; | |||
| 1758 | if (*s == '"' ) { /* start of "..." */ | |||
| 1759 | for (s++ ; *s != '\0'; ) { | |||
| 1760 | if (*s == '"' && s[1] != '\0' && s[1] == '"') { | |||
| 1761 | s += 2; /* doubled quote */ | |||
| 1762 | *fr++ = '"'; | |||
| 1763 | } else if (*s == '"' && (s[1] == '\0' || s[1] == ',')) { | |||
| 1764 | s++; /* skip over closing quote */ | |||
| 1765 | break; | |||
| 1766 | } else { | |||
| 1767 | *fr++ = *s++; | |||
| 1768 | } | |||
| 1769 | } | |||
| 1770 | *fr++ = 0; | |||
| 1771 | } else { /* unquoted field */ | |||
| 1772 | while (*s != ',' && *s != '\0') | |||
| 1773 | *fr++ = *s++; | |||
| 1774 | *fr++ = 0; | |||
| 1775 | } | |||
| 1776 | snprintf(num, sizeof(num), "%d", n); | |||
| 1777 | if (is_number(newt, &result)is_valid_number(newt, 0, ((void *)0), &result)) | |||
| 1778 | setsymtab(num, newt, result, STR02|NUM01, (Array *) ap->sval); | |||
| 1779 | else | |||
| 1780 | setsymtab(num, newt, 0.0, STR02, (Array *) ap->sval); | |||
| 1781 | if (*s++ == '\0') | |||
| 1782 | break; | |||
| 1783 | } | |||
| 1784 | free(newt); | |||
| 1785 | ||||
| 1786 | } else if (!CSV && sep == ' ') { /* usual case: split on white space */ | |||
| 1787 | for (n = 0; ; ) { | |||
| 1788 | #define ISWS(c)((c) == ' ' || (c) == '\t' || (c) == '\n') ((c) == ' ' || (c) == '\t' || (c) == '\n') | |||
| 1789 | while (ISWS(*s)((*s) == ' ' || (*s) == '\t' || (*s) == '\n')) | |||
| 1790 | s++; | |||
| 1791 | if (*s == '\0') | |||
| 1792 | break; | |||
| 1793 | n++; | |||
| 1794 | t = s; | |||
| 1795 | do | |||
| 1796 | s++; | |||
| 1797 | while (*s != '\0' && !ISWS(*s)((*s) == ' ' || (*s) == '\t' || (*s) == '\n')); | |||
| 1798 | temp = *s; | |||
| 1799 | setptr(s, '\0')(*(char *)(intptr_t)(s)) = ('\0'); | |||
| 1800 | snprintf(num, sizeof(num), "%d", n); | |||
| 1801 | if (is_number(t, & result)is_valid_number(t, 0, ((void *)0), & result)) | |||
| 1802 | setsymtab(num, t, result, STR02|NUM01, (Array *) ap->sval); | |||
| 1803 | else | |||
| 1804 | setsymtab(num, t, 0.0, STR02, (Array *) ap->sval); | |||
| 1805 | setptr(s, temp)(*(char *)(intptr_t)(s)) = (temp); | |||
| 1806 | if (*s != '\0') | |||
| 1807 | s++; | |||
| 1808 | } | |||
| 1809 | ||||
| 1810 | } else if (sep == 0) { /* new: split(s, a, "") => 1 char/elem */ | |||
| 1811 | for (n = 0; *s != '\0'; s += u8_nextlen(s)) { | |||
| 1812 | char buf[10]; | |||
| 1813 | n++; | |||
| 1814 | snprintf(num, sizeof(num), "%d", n); | |||
| 1815 | ||||
| 1816 | for (j = 0; j < u8_nextlen(s); j++) { | |||
| 1817 | buf[j] = s[j]; | |||
| 1818 | } | |||
| 1819 | buf[j] = '\0'; | |||
| 1820 | ||||
| 1821 | if (isdigit((uschar)buf[0])) | |||
| 1822 | setsymtab(num, buf, atof(buf), STR02|NUM01, (Array *) ap->sval); | |||
| 1823 | else | |||
| 1824 | setsymtab(num, buf, 0.0, STR02, (Array *) ap->sval); | |||
| 1825 | } | |||
| 1826 | ||||
| 1827 | } else if (*s != '\0') { /* some random single character */ | |||
| 1828 | for (;;) { | |||
| 1829 | n++; | |||
| 1830 | t = s; | |||
| 1831 | while (*s != sep && *s != '\n' && *s != '\0') | |||
| 1832 | s++; | |||
| 1833 | temp = *s; | |||
| 1834 | setptr(s, '\0')(*(char *)(intptr_t)(s)) = ('\0'); | |||
| 1835 | snprintf(num, sizeof(num), "%d", n); | |||
| 1836 | if (is_number(t, & result)is_valid_number(t, 0, ((void *)0), & result)) | |||
| 1837 | setsymtab(num, t, result, STR02|NUM01, (Array *) ap->sval); | |||
| 1838 | else | |||
| 1839 | setsymtab(num, t, 0.0, STR02, (Array *) ap->sval); | |||
| 1840 | setptr(s, temp)(*(char *)(intptr_t)(s)) = (temp); | |||
| 1841 | if (*s++ == '\0') | |||
| 1842 | break; | |||
| 1843 | } | |||
| 1844 | } | |||
| 1845 | tempfree(ap)do { if (((ap)->csub == 4)) tfree(ap); } while ( 0); | |||
| 1846 | xfree(origs){ free((void *)(intptr_t)(origs)); (origs) = ((void *)0); }; | |||
| 1847 | xfree(origfs){ free((void *)(intptr_t)(origfs)); (origfs) = ((void *)0); }; | |||
| 1848 | x = gettemp(); | |||
| 1849 | x->tval = NUM01; | |||
| 1850 | x->fval = n; | |||
| 1851 | return(x); | |||
| 1852 | } | |||
| 1853 | ||||
| 1854 | Cell *condexpr(Node **a, int n) /* a[0] ? a[1] : a[2] */ | |||
| 1855 | { | |||
| 1856 | Cell *x; | |||
| 1857 | ||||
| 1858 | x = execute(a[0]); | |||
| 1859 | if (istrue(x)((x)->csub == 11)) { | |||
| 1860 | tempfree(x)do { if (((x)->csub == 4)) tfree(x); } while ( 0); | |||
| 1861 | x = execute(a[1]); | |||
| 1862 | } else { | |||
| 1863 | tempfree(x)do { if (((x)->csub == 4)) tfree(x); } while ( 0); | |||
| 1864 | x = execute(a[2]); | |||
| 1865 | } | |||
| 1866 | return(x); | |||
| 1867 | } | |||
| 1868 | ||||
| 1869 | Cell *ifstat(Node **a, int n) /* if (a[0]) a[1]; else a[2] */ | |||
| 1870 | { | |||
| 1871 | Cell *x; | |||
| 1872 | ||||
| 1873 | x = execute(a[0]); | |||
| 1874 | if (istrue(x)((x)->csub == 11)) { | |||
| 1875 | tempfree(x)do { if (((x)->csub == 4)) tfree(x); } while ( 0); | |||
| 1876 | x = execute(a[1]); | |||
| 1877 | } else if (a[2] != NULL((void *)0)) { | |||
| 1878 | tempfree(x)do { if (((x)->csub == 4)) tfree(x); } while ( 0); | |||
| 1879 | x = execute(a[2]); | |||
| 1880 | } | |||
| 1881 | return(x); | |||
| 1882 | } | |||
| 1883 | ||||
| 1884 | Cell *whilestat(Node **a, int n) /* while (a[0]) a[1] */ | |||
| 1885 | { | |||
| 1886 | Cell *x; | |||
| 1887 | ||||
| 1888 | for (;;) { | |||
| 1889 | x = execute(a[0]); | |||
| 1890 | if (!istrue(x)((x)->csub == 11)) | |||
| 1891 | return(x); | |||
| 1892 | tempfree(x)do { if (((x)->csub == 4)) tfree(x); } while ( 0); | |||
| 1893 | x = execute(a[1]); | |||
| 1894 | if (isbreak(x)((x)->csub == 23)) { | |||
| 1895 | x = True; | |||
| 1896 | return(x); | |||
| 1897 | } | |||
| 1898 | if (isnext(x)((x)->csub == 22 || (x)->csub == 26) || isexit(x)((x)->csub == 21) || isret(x)((x)->csub == 25)) | |||
| 1899 | return(x); | |||
| 1900 | tempfree(x)do { if (((x)->csub == 4)) tfree(x); } while ( 0); | |||
| 1901 | } | |||
| 1902 | } | |||
| 1903 | ||||
| 1904 | Cell *dostat(Node **a, int n) /* do a[0]; while(a[1]) */ | |||
| 1905 | { | |||
| 1906 | Cell *x; | |||
| 1907 | ||||
| 1908 | for (;;) { | |||
| 1909 | x = execute(a[0]); | |||
| 1910 | if (isbreak(x)((x)->csub == 23)) | |||
| 1911 | return True; | |||
| 1912 | if (isnext(x)((x)->csub == 22 || (x)->csub == 26) || isexit(x)((x)->csub == 21) || isret(x)((x)->csub == 25)) | |||
| 1913 | return(x); | |||
| 1914 | tempfree(x)do { if (((x)->csub == 4)) tfree(x); } while ( 0); | |||
| 1915 | x = execute(a[1]); | |||
| 1916 | if (!istrue(x)((x)->csub == 11)) | |||
| 1917 | return(x); | |||
| 1918 | tempfree(x)do { if (((x)->csub == 4)) tfree(x); } while ( 0); | |||
| 1919 | } | |||
| 1920 | } | |||
| 1921 | ||||
| 1922 | Cell *forstat(Node **a, int n) /* for (a[0]; a[1]; a[2]) a[3] */ | |||
| 1923 | { | |||
| 1924 | Cell *x; | |||
| 1925 | ||||
| 1926 | x = execute(a[0]); | |||
| 1927 | tempfree(x)do { if (((x)->csub == 4)) tfree(x); } while ( 0); | |||
| 1928 | for (;;) { | |||
| 1929 | if (a[1]!=NULL((void *)0)) { | |||
| 1930 | x = execute(a[1]); | |||
| 1931 | if (!istrue(x)((x)->csub == 11)) return(x); | |||
| 1932 | else tempfree(x)do { if (((x)->csub == 4)) tfree(x); } while ( 0); | |||
| 1933 | } | |||
| 1934 | x = execute(a[3]); | |||
| 1935 | if (isbreak(x)((x)->csub == 23)) /* turn off break */ | |||
| 1936 | return True; | |||
| 1937 | if (isnext(x)((x)->csub == 22 || (x)->csub == 26) || isexit(x)((x)->csub == 21) || isret(x)((x)->csub == 25)) | |||
| 1938 | return(x); | |||
| 1939 | tempfree(x)do { if (((x)->csub == 4)) tfree(x); } while ( 0); | |||
| 1940 | x = execute(a[2]); | |||
| 1941 | tempfree(x)do { if (((x)->csub == 4)) tfree(x); } while ( 0); | |||
| 1942 | } | |||
| 1943 | } | |||
| 1944 | ||||
| 1945 | Cell *instat(Node **a, int n) /* for (a[0] in a[1]) a[2] */ | |||
| 1946 | { | |||
| 1947 | Cell *x, *vp, *arrayp, *cp, *ncp; | |||
| 1948 | Array *tp; | |||
| 1949 | int i; | |||
| 1950 | ||||
| 1951 | vp = execute(a[0]); | |||
| 1952 | arrayp = execute(a[1]); | |||
| 1953 | if (!isarr(arrayp)((arrayp)->tval & 020)) { | |||
| 1954 | return True; | |||
| 1955 | } | |||
| 1956 | tp = (Array *) arrayp->sval; | |||
| 1957 | tempfree(arrayp)do { if (((arrayp)->csub == 4)) tfree(arrayp); } while ( 0 ); | |||
| 1958 | for (i = 0; i < tp->size; i++) { /* this routine knows too much */ | |||
| 1959 | for (cp = tp->tab[i]; cp != NULL((void *)0); cp = ncp) { | |||
| 1960 | setsval(vp, cp->nval); | |||
| 1961 | ncp = cp->cnext; | |||
| 1962 | x = execute(a[2]); | |||
| 1963 | if (isbreak(x)((x)->csub == 23)) { | |||
| 1964 | tempfree(vp)do { if (((vp)->csub == 4)) tfree(vp); } while ( 0); | |||
| 1965 | return True; | |||
| 1966 | } | |||
| 1967 | if (isnext(x)((x)->csub == 22 || (x)->csub == 26) || isexit(x)((x)->csub == 21) || isret(x)((x)->csub == 25)) { | |||
| 1968 | tempfree(vp)do { if (((vp)->csub == 4)) tfree(vp); } while ( 0); | |||
| 1969 | return(x); | |||
| 1970 | } | |||
| 1971 | tempfree(x)do { if (((x)->csub == 4)) tfree(x); } while ( 0); | |||
| 1972 | } | |||
| 1973 | } | |||
| 1974 | return True; | |||
| 1975 | } | |||
| 1976 | ||||
| 1977 | static char *nawk_convert(const char *s, int (*fun_c)(int), | |||
| 1978 | wint_t (*fun_wc)(wint_t)) | |||
| 1979 | { | |||
| 1980 | char *buf = NULL((void *)0); | |||
| 1981 | char *pbuf = NULL((void *)0); | |||
| 1982 | const char *ps = NULL((void *)0); | |||
| 1983 | size_t n = 0; | |||
| 1984 | wchar_t wc; | |||
| 1985 | const size_t sz = awk_mb_cur_max; | |||
| 1986 | int unused; | |||
| 1987 | ||||
| 1988 | if (sz == 1) { | |||
| 1989 | buf = tostring(s); | |||
| 1990 | ||||
| 1991 | for (pbuf = buf; *pbuf; pbuf++) | |||
| 1992 | *pbuf = fun_c((uschar)*pbuf); | |||
| 1993 | ||||
| 1994 | return buf; | |||
| 1995 | } else { | |||
| 1996 | /* upper/lower character may be shorter/longer */ | |||
| 1997 | buf = tostringN(s, strlen(s) * sz + 1); | |||
| 1998 | ||||
| 1999 | (void) mbtowc(NULL((void *)0), NULL((void *)0), 0); /* reset internal state */ | |||
| 2000 | /* | |||
| 2001 | * Reset internal state here too. | |||
| 2002 | * Assign result to avoid a compiler warning. (Casting to void | |||
| 2003 | * doesn't work.) | |||
| 2004 | * Increment said variable to avoid a different warning. | |||
| 2005 | */ | |||
| 2006 | unused = wctomb(NULL((void *)0), L'\0'); | |||
| 2007 | unused++; | |||
| 2008 | ||||
| 2009 | ps = s; | |||
| 2010 | pbuf = buf; | |||
| 2011 | while (n = mbtowc(&wc, ps, sz), | |||
| 2012 | n > 0 && n != (size_t)-1 && n != (size_t)-2) | |||
| 2013 | { | |||
| 2014 | ps += n; | |||
| 2015 | ||||
| 2016 | n = wctomb(pbuf, fun_wc(wc)); | |||
| 2017 | if (n == (size_t)-1) | |||
| 2018 | FATAL("illegal wide character %s", s); | |||
| 2019 | ||||
| 2020 | pbuf += n; | |||
| 2021 | } | |||
| 2022 | ||||
| 2023 | *pbuf = '\0'; | |||
| 2024 | ||||
| 2025 | if (n) | |||
| 2026 | FATAL("illegal byte sequence %s", s); | |||
| 2027 | ||||
| 2028 | return buf; | |||
| 2029 | } | |||
| 2030 | } | |||
| 2031 | ||||
| 2032 | #ifdef __DJGPP__ | |||
| 2033 | static wint_t towupper(wint_t wc) | |||
| 2034 | { | |||
| 2035 | if (wc >= 0 && wc < 256) | |||
| 2036 | return toupper(wc & 0xFF); | |||
| 2037 | ||||
| 2038 | return wc; | |||
| 2039 | } | |||
| 2040 | ||||
| 2041 | static wint_t towlower(wint_t wc) | |||
| 2042 | { | |||
| 2043 | if (wc >= 0 && wc < 256) | |||
| 2044 | return tolower(wc & 0xFF); | |||
| 2045 | ||||
| 2046 | return wc; | |||
| 2047 | } | |||
| 2048 | #endif | |||
| 2049 | ||||
| 2050 | static char *nawk_toupper(const char *s) | |||
| 2051 | { | |||
| 2052 | return nawk_convert(s, toupper, towupper); | |||
| 2053 | } | |||
| 2054 | ||||
| 2055 | static char *nawk_tolower(const char *s) | |||
| 2056 | { | |||
| 2057 | return nawk_convert(s, tolower, towlower); | |||
| 2058 | } | |||
| 2059 | ||||
| 2060 | Cell *bltin(Node **a, int n) /* builtin functions. a[0] is type, a[1] is arg list */ | |||
| 2061 | { | |||
| 2062 | Cell *x, *y; | |||
| 2063 | Awkfloat u; | |||
| 2064 | int t, sz; | |||
| 2065 | Awkfloat tmp; | |||
| 2066 | char *buf, *fmt; | |||
| 2067 | Node *nextarg; | |||
| 2068 | FILE *fp; | |||
| 2069 | int status = 0; | |||
| 2070 | time_t tv; | |||
| 2071 | struct tm *tm, tmbuf; | |||
| 2072 | int estatus = 0; | |||
| 2073 | ||||
| 2074 | t = ptoi(a[0]); | |||
| 2075 | x = execute(a[1]); | |||
| 2076 | nextarg = a[1]->nnext; | |||
| 2077 | switch (t) { | |||
| 2078 | case FLENGTH1: | |||
| 2079 | if (isarr(x)((x)->tval & 020)) | |||
| 2080 | u = ((Array *) x->sval)->nelem; /* GROT. should be function*/ | |||
| 2081 | else | |||
| 2082 | u = u8_strlen(getsval(x)); | |||
| 2083 | break; | |||
| 2084 | case FLOG4: | |||
| 2085 | errno(*__errno()) = 0; | |||
| 2086 | u = errcheck(log(getfval(x)), "log"); | |||
| 2087 | break; | |||
| 2088 | case FINT5: | |||
| 2089 | modf(getfval(x), &u); break; | |||
| 2090 | case FEXP3: | |||
| 2091 | errno(*__errno()) = 0; | |||
| 2092 | u = errcheck(exp(getfval(x)), "exp"); | |||
| 2093 | break; | |||
| 2094 | case FSQRT2: | |||
| 2095 | errno(*__errno()) = 0; | |||
| 2096 | u = errcheck(sqrt(getfval(x)), "sqrt"); | |||
| 2097 | break; | |||
| 2098 | case FSIN9: | |||
| 2099 | u = sin(getfval(x)); break; | |||
| 2100 | case FCOS10: | |||
| 2101 | u = cos(getfval(x)); break; | |||
| 2102 | case FATAN11: | |||
| 2103 | if (nextarg == NULL((void *)0)) { | |||
| 2104 | WARNING("atan2 requires two arguments; returning 1.0"); | |||
| 2105 | u = 1.0; | |||
| 2106 | } else { | |||
| 2107 | y = execute(a[1]->nnext); | |||
| 2108 | u = atan2(getfval(x), getfval(y)); | |||
| 2109 | tempfree(y)do { if (((y)->csub == 4)) tfree(y); } while ( 0); | |||
| 2110 | nextarg = nextarg->nnext; | |||
| 2111 | } | |||
| 2112 | break; | |||
| 2113 | case FCOMPL18: | |||
| 2114 | u = ~((int)getfval(x)); | |||
| 2115 | break; | |||
| 2116 | case FAND15: | |||
| 2117 | if (nextarg == 0) { | |||
| 2118 | WARNING("and requires two arguments; returning 0"); | |||
| 2119 | u = 0; | |||
| 2120 | break; | |||
| 2121 | } | |||
| 2122 | y = execute(a[1]->nnext); | |||
| 2123 | u = ((int)getfval(x)) & ((int)getfval(y)); | |||
| 2124 | tempfree(y)do { if (((y)->csub == 4)) tfree(y); } while ( 0); | |||
| 2125 | nextarg = nextarg->nnext; | |||
| 2126 | break; | |||
| 2127 | case FFOR16: | |||
| 2128 | if (nextarg == 0) { | |||
| 2129 | WARNING("or requires two arguments; returning 0"); | |||
| 2130 | u = 0; | |||
| 2131 | break; | |||
| 2132 | } | |||
| 2133 | y = execute(a[1]->nnext); | |||
| 2134 | u = ((int)getfval(x)) | ((int)getfval(y)); | |||
| 2135 | tempfree(y)do { if (((y)->csub == 4)) tfree(y); } while ( 0); | |||
| 2136 | nextarg = nextarg->nnext; | |||
| 2137 | break; | |||
| 2138 | case FXOR17: | |||
| 2139 | if (nextarg == 0) { | |||
| 2140 | WARNING("xor requires two arguments; returning 0"); | |||
| 2141 | u = 0; | |||
| 2142 | break; | |||
| 2143 | } | |||
| 2144 | y = execute(a[1]->nnext); | |||
| 2145 | u = ((int)getfval(x)) ^ ((int)getfval(y)); | |||
| 2146 | tempfree(y)do { if (((y)->csub == 4)) tfree(y); } while ( 0); | |||
| 2147 | nextarg = nextarg->nnext; | |||
| 2148 | break; | |||
| 2149 | case FLSHIFT19: | |||
| 2150 | if (nextarg == 0) { | |||
| 2151 | WARNING("lshift requires two arguments; returning 0"); | |||
| 2152 | u = 0; | |||
| 2153 | break; | |||
| 2154 | } | |||
| 2155 | y = execute(a[1]->nnext); | |||
| 2156 | u = ((int)getfval(x)) << ((int)getfval(y)); | |||
| 2157 | tempfree(y)do { if (((y)->csub == 4)) tfree(y); } while ( 0); | |||
| 2158 | nextarg = nextarg->nnext; | |||
| 2159 | break; | |||
| 2160 | case FRSHIFT20: | |||
| 2161 | if (nextarg == 0) { | |||
| 2162 | WARNING("rshift requires two arguments; returning 0"); | |||
| 2163 | u = 0; | |||
| 2164 | break; | |||
| 2165 | } | |||
| 2166 | y = execute(a[1]->nnext); | |||
| 2167 | u = ((int)getfval(x)) >> ((int)getfval(y)); | |||
| 2168 | tempfree(y)do { if (((y)->csub == 4)) tfree(y); } while ( 0); | |||
| 2169 | nextarg = nextarg->nnext; | |||
| 2170 | break; | |||
| 2171 | case FSYSTEM6: | |||
| 2172 | fflush(stdout(&__sF[1])); /* in case something is buffered already */ | |||
| 2173 | estatus = status = system(getsval(x)); | |||
| 2174 | if (status != -1) { | |||
| 2175 | if (WIFEXITED(status)(((status) & 0177) == 0)) { | |||
| 2176 | estatus = WEXITSTATUS(status)(int)(((unsigned)(status) >> 8) & 0xff); | |||
| 2177 | } else if (WIFSIGNALED(status)(((status) & 0177) != 0177 && ((status) & 0177 ) != 0)) { | |||
| 2178 | estatus = WTERMSIG(status)(((status) & 0177)) + 256; | |||
| 2179 | #ifdef WCOREDUMP | |||
| 2180 | if (WCOREDUMP(status)((status) & 0200)) | |||
| 2181 | estatus += 256; | |||
| 2182 | #endif | |||
| 2183 | } else /* something else?!? */ | |||
| 2184 | estatus = 0; | |||
| 2185 | } | |||
| 2186 | /* else estatus was set to -1 */ | |||
| 2187 | u = estatus; | |||
| 2188 | break; | |||
| 2189 | case FRAND7: | |||
| 2190 | /* random() returns numbers in [0..2^31-1] | |||
| 2191 | * in order to get a number in [0, 1), divide it by 2^31 | |||
| 2192 | */ | |||
| 2193 | u = (Awkfloat) random() / (0x7fffffffL + 0x1UL); | |||
| 2194 | break; | |||
| 2195 | case FSRAND8: | |||
| 2196 | if (isrec(x)((x)->tval & 0200)) { /* no argument provided */ | |||
| 2197 | u = time(NULL((void *)0)); | |||
| 2198 | tmp = u; | |||
| 2199 | srandom((unsigned int) u); | |||
| 2200 | } else { | |||
| 2201 | u = getfval(x); | |||
| 2202 | tmp = u; | |||
| 2203 | srandom_deterministic((unsigned int) u); | |||
| 2204 | } | |||
| 2205 | u = srand_seed; | |||
| 2206 | srand_seed = tmp; | |||
| 2207 | break; | |||
| 2208 | case FTOUPPER12: | |||
| 2209 | case FTOLOWER13: | |||
| 2210 | if (t == FTOUPPER12) | |||
| 2211 | buf = nawk_toupper(getsval(x)); | |||
| 2212 | else | |||
| 2213 | buf = nawk_tolower(getsval(x)); | |||
| 2214 | tempfree(x)do { if (((x)->csub == 4)) tfree(x); } while ( 0); | |||
| 2215 | x = gettemp(); | |||
| 2216 | setsval(x, buf); | |||
| 2217 | free(buf); | |||
| 2218 | return x; | |||
| 2219 | case FFLUSH14: | |||
| 2220 | if (isrec(x)((x)->tval & 0200) || strlen(getsval(x)) == 0) { | |||
| 2221 | flush_all(); /* fflush() or fflush("") -> all */ | |||
| 2222 | u = 0; | |||
| 2223 | } else if ((fp = openfile(FFLUSH14, getsval(x), NULL((void *)0))) == NULL((void *)0)) | |||
| 2224 | u = EOF(-1); | |||
| 2225 | else | |||
| 2226 | u = fflush(fp); | |||
| 2227 | break; | |||
| 2228 | case FMKTIME23: | |||
| 2229 | memset(&tmbuf, 0, sizeof(tmbuf)); | |||
| 2230 | tm = &tmbuf; | |||
| 2231 | t = sscanf(getsval(x), "%d %d %d %d %d %d %d", | |||
| 2232 | &tm->tm_year, &tm->tm_mon, &tm->tm_mday, &tm->tm_hour, | |||
| 2233 | &tm->tm_min, &tm->tm_sec, &tm->tm_isdst); | |||
| 2234 | switch (t) { | |||
| 2235 | case 6: | |||
| 2236 | tm->tm_isdst = -1; /* let mktime figure it out */ | |||
| 2237 | /* FALLTHROUGH */ | |||
| 2238 | case 7: | |||
| 2239 | tm->tm_year -= 1900; | |||
| 2240 | tm->tm_mon--; | |||
| 2241 | u = mktime(tm); | |||
| 2242 | break; | |||
| 2243 | default: | |||
| 2244 | u = -1; | |||
| 2245 | break; | |||
| 2246 | } | |||
| 2247 | break; | |||
| 2248 | case FSYSTIME21: | |||
| 2249 | u = time((time_t *) 0); | |||
| 2250 | break; | |||
| 2251 | case FSTRFTIME22: | |||
| 2252 | /* strftime([format [,timestamp]]) */ | |||
| 2253 | if (nextarg) { | |||
| 2254 | y = execute(nextarg); | |||
| 2255 | nextarg = nextarg->nnext; | |||
| 2256 | tv = (time_t) getfval(y); | |||
| 2257 | tempfree(y)do { if (((y)->csub == 4)) tfree(y); } while ( 0); | |||
| 2258 | } else | |||
| 2259 | tv = time((time_t *) 0); | |||
| 2260 | tm = localtime(&tv); | |||
| 2261 | if (tm == NULL((void *)0)) | |||
| 2262 | FATAL("bad time %ld", (long)tv); | |||
| 2263 | ||||
| 2264 | if (isrec(x)((x)->tval & 0200)) { | |||
| 2265 | /* format argument not provided, use default */ | |||
| 2266 | fmt = tostring("%a %b %d %H:%M:%S %Z %Y"); | |||
| 2267 | } else | |||
| 2268 | fmt = tostring(getsval(x)); | |||
| 2269 | ||||
| 2270 | sz = 32; | |||
| 2271 | buf = NULL((void *)0); | |||
| 2272 | do { | |||
| 2273 | if ((buf = (char *) reallocarray(buf, 2, sz)) == NULL((void *)0)) | |||
| 2274 | FATAL("out of memory in strftime"); | |||
| 2275 | sz *= 2; | |||
| 2276 | } while (strftime(buf, sz, fmt, tm) == 0 && fmt[0] != '\0'); | |||
| 2277 | ||||
| 2278 | y = gettemp(); | |||
| 2279 | setsval(y, buf); | |||
| 2280 | free(fmt); | |||
| 2281 | free(buf); | |||
| 2282 | ||||
| 2283 | return y; | |||
| 2284 | default: /* can't happen */ | |||
| 2285 | FATAL("illegal function type %d", t); | |||
| 2286 | break; | |||
| 2287 | } | |||
| 2288 | tempfree(x)do { if (((x)->csub == 4)) tfree(x); } while ( 0); | |||
| 2289 | x = gettemp(); | |||
| 2290 | setfval(x, u); | |||
| 2291 | if (nextarg != NULL((void *)0)) { | |||
| 2292 | WARNING("warning: function has too many arguments"); | |||
| 2293 | for ( ; nextarg; nextarg = nextarg->nnext) { | |||
| 2294 | y = execute(nextarg); | |||
| 2295 | tempfree(y)do { if (((y)->csub == 4)) tfree(y); } while ( 0); | |||
| 2296 | } | |||
| 2297 | } | |||
| 2298 | return(x); | |||
| 2299 | } | |||
| 2300 | ||||
| 2301 | Cell *printstat(Node **a, int n) /* print a[0] */ | |||
| 2302 | { | |||
| 2303 | Node *x; | |||
| 2304 | Cell *y; | |||
| 2305 | FILE *fp; | |||
| 2306 | ||||
| 2307 | if (a[1] == NULL((void *)0)) /* a[1] is redirection operator, a[2] is file */ | |||
| 2308 | fp = stdout(&__sF[1]); | |||
| 2309 | else | |||
| 2310 | fp = redirect(ptoi(a[1]), a[2]); | |||
| 2311 | for (x = a[0]; x != NULL((void *)0); x = x->nnext) { | |||
| 2312 | y = execute(x); | |||
| 2313 | fputs(getpssval(y), fp); | |||
| 2314 | tempfree(y)do { if (((y)->csub == 4)) tfree(y); } while ( 0); | |||
| 2315 | if (x->nnext == NULL((void *)0)) | |||
| 2316 | fputs(getsval(orsloc), fp); | |||
| 2317 | else | |||
| 2318 | fputs(getsval(ofsloc), fp); | |||
| 2319 | } | |||
| 2320 | if (a[1] != NULL((void *)0)) | |||
| 2321 | fflush(fp); | |||
| 2322 | if (ferror(fp)(!__isthreaded ? (((fp)->_flags & 0x0040) != 0) : (ferror )(fp))) | |||
| 2323 | FATAL("write error on %s", filename(fp)); | |||
| 2324 | return(True); | |||
| 2325 | } | |||
| 2326 | ||||
| 2327 | Cell *nullproc(Node **a, int n) | |||
| 2328 | { | |||
| 2329 | return 0; | |||
| 2330 | } | |||
| 2331 | ||||
| 2332 | ||||
| 2333 | FILE *redirect(int a, Node *b) /* set up all i/o redirections */ | |||
| 2334 | { | |||
| 2335 | FILE *fp; | |||
| 2336 | Cell *x; | |||
| 2337 | char *fname; | |||
| 2338 | ||||
| 2339 | x = execute(b); | |||
| 2340 | fname = getsval(x); | |||
| 2341 | fp = openfile(a, fname, NULL((void *)0)); | |||
| 2342 | if (fp == NULL((void *)0)) | |||
| 2343 | FATAL("can't open file %s", fname); | |||
| 2344 | tempfree(x)do { if (((x)->csub == 4)) tfree(x); } while ( 0); | |||
| 2345 | return fp; | |||
| 2346 | } | |||
| 2347 | ||||
| 2348 | struct files { | |||
| 2349 | FILE *fp; | |||
| 2350 | const char *fname; | |||
| 2351 | int mode; /* '|', 'a', 'w' => LE/LT, GT */ | |||
| 2352 | } *files; | |||
| 2353 | ||||
| 2354 | size_t nfiles; | |||
| 2355 | ||||
| 2356 | static void stdinit(void) /* in case stdin, etc., are not constants */ | |||
| 2357 | { | |||
| 2358 | nfiles = FOPEN_MAX20; | |||
| 2359 | files = (struct files *) calloc(nfiles, sizeof(*files)); | |||
| 2360 | if (files == NULL((void *)0)) | |||
| 2361 | FATAL("can't allocate file memory for %zu files", nfiles); | |||
| 2362 | files[0].fp = stdin(&__sF[0]); | |||
| 2363 | files[0].fname = tostring("/dev/stdin"); | |||
| 2364 | files[0].mode = LT287; | |||
| 2365 | files[1].fp = stdout(&__sF[1]); | |||
| 2366 | files[1].fname = tostring("/dev/stdout"); | |||
| 2367 | files[1].mode = GT285; | |||
| 2368 | files[2].fp = stderr(&__sF[2]); | |||
| 2369 | files[2].fname = tostring("/dev/stderr"); | |||
| 2370 | files[2].mode = GT285; | |||
| 2371 | } | |||
| 2372 | ||||
| 2373 | FILE *openfile(int a, const char *us, bool_Bool *pnewflag) | |||
| 2374 | { | |||
| 2375 | const char *s = us; | |||
| 2376 | size_t i; | |||
| 2377 | int m; | |||
| 2378 | FILE *fp = NULL((void *)0); | |||
| 2379 | ||||
| 2380 | if (*s == '\0') | |||
| 2381 | FATAL("null file name in print or getline"); | |||
| 2382 | for (i = 0; i < nfiles; i++) | |||
| 2383 | if (files[i].fname && strcmp(s, files[i].fname) == 0 && | |||
| 2384 | (a == files[i].mode || (a==APPEND282 && files[i].mode==GT285) || | |||
| 2385 | a == FFLUSH14)) { | |||
| 2386 | if (pnewflag) | |||
| 2387 | *pnewflag = false0; | |||
| 2388 | return files[i].fp; | |||
| 2389 | } | |||
| 2390 | if (a == FFLUSH14) /* didn't find it, so don't create it! */ | |||
| 2391 | return NULL((void *)0); | |||
| 2392 | ||||
| 2393 | for (i = 0; i < nfiles; i++) | |||
| 2394 | if (files[i].fp == NULL((void *)0)) | |||
| 2395 | break; | |||
| 2396 | if (i >= nfiles) { | |||
| 2397 | struct files *nf; | |||
| 2398 | size_t nnf = nfiles + FOPEN_MAX20; | |||
| 2399 | nf = (struct files *) reallocarray(files, nnf, sizeof(*nf)); | |||
| 2400 | if (nf == NULL((void *)0)) | |||
| 2401 | FATAL("cannot grow files for %s and %zu files", s, nnf); | |||
| 2402 | memset(&nf[nfiles], 0, FOPEN_MAX20 * sizeof(*nf)); | |||
| 2403 | nfiles = nnf; | |||
| 2404 | files = nf; | |||
| 2405 | } | |||
| 2406 | fflush(stdout(&__sF[1])); /* force a semblance of order */ | |||
| 2407 | m = a; | |||
| 2408 | if (a == GT285) { | |||
| 2409 | fp = fopen(s, "w"); | |||
| 2410 | } else if (a == APPEND282) { | |||
| 2411 | fp = fopen(s, "a"); | |||
| 2412 | m = GT285; /* so can mix > and >> */ | |||
| 2413 | } else if (a == '|') { /* output pipe */ | |||
| 2414 | fp = popen(s, "w"); | |||
| 2415 | } else if (a == LE286) { /* input pipe */ | |||
| 2416 | fp = popen(s, "r"); | |||
| 2417 | } else if (a == LT287) { /* getline <file */ | |||
| 2418 | fp = strcmp(s, "-") == 0 ? stdin(&__sF[0]) : fopen(s, "r"); /* "-" is stdin */ | |||
| 2419 | } else /* can't happen */ | |||
| 2420 | FATAL("illegal redirection %d", a); | |||
| 2421 | if (fp != NULL((void *)0)) { | |||
| 2422 | files[i].fname = tostring(s); | |||
| 2423 | files[i].fp = fp; | |||
| 2424 | files[i].mode = m; | |||
| 2425 | if (pnewflag) | |||
| 2426 | *pnewflag = true1; | |||
| 2427 | if (fp != stdin(&__sF[0]) && fp != stdout(&__sF[1]) && fp != stderr(&__sF[2])) | |||
| 2428 | (void) fcntl(fileno(fp)(!__isthreaded ? ((fp)->_file) : (fileno)(fp)), F_SETFD2, FD_CLOEXEC1); | |||
| 2429 | } | |||
| 2430 | return fp; | |||
| 2431 | } | |||
| 2432 | ||||
| 2433 | const char *filename(FILE *fp) | |||
| 2434 | { | |||
| 2435 | size_t i; | |||
| 2436 | ||||
| 2437 | for (i = 0; i < nfiles; i++) | |||
| 2438 | if (fp == files[i].fp) | |||
| 2439 | return files[i].fname; | |||
| 2440 | return "???"; | |||
| 2441 | } | |||
| 2442 | ||||
| 2443 | Cell *closefile(Node **a, int n) | |||
| 2444 | { | |||
| 2445 | Cell *x; | |||
| 2446 | size_t i; | |||
| 2447 | bool_Bool stat; | |||
| 2448 | ||||
| 2449 | x = execute(a[0]); | |||
| 2450 | getsval(x); | |||
| 2451 | stat = true1; | |||
| 2452 | for (i = 0; i < nfiles; i++) { | |||
| 2453 | if (!files[i].fname || strcmp(x->sval, files[i].fname) != 0) | |||
| 2454 | continue; | |||
| 2455 | if (files[i].mode == GT285 || files[i].mode == '|') | |||
| 2456 | fflush(files[i].fp); | |||
| 2457 | if (ferror(files[i].fp)(!__isthreaded ? (((files[i].fp)->_flags & 0x0040) != 0 ) : (ferror)(files[i].fp))) { | |||
| 2458 | if ((files[i].mode == GT285 && files[i].fp != stderr(&__sF[2])) | |||
| 2459 | || files[i].mode == '|') | |||
| 2460 | FATAL("write error on %s", files[i].fname); | |||
| 2461 | else | |||
| 2462 | WARNING("i/o error occurred on %s", files[i].fname); | |||
| 2463 | } | |||
| 2464 | if (files[i].fp == stdin(&__sF[0]) || files[i].fp == stdout(&__sF[1]) || | |||
| 2465 | files[i].fp == stderr(&__sF[2])) | |||
| 2466 | stat = freopen("/dev/null", "r+", files[i].fp) == NULL((void *)0); | |||
| 2467 | else if (files[i].mode == '|' || files[i].mode == LE286) | |||
| 2468 | stat = pclose(files[i].fp) == -1; | |||
| 2469 | else | |||
| 2470 | stat = fclose(files[i].fp) == EOF(-1); | |||
| 2471 | if (stat) | |||
| 2472 | WARNING("i/o error occurred closing %s", files[i].fname); | |||
| 2473 | xfree(files[i].fname){ free((void *)(intptr_t)(files[i].fname)); (files[i].fname) = ((void *)0); }; | |||
| 2474 | files[i].fname = NULL((void *)0); /* watch out for ref thru this */ | |||
| 2475 | files[i].fp = NULL((void *)0); | |||
| 2476 | break; | |||
| 2477 | } | |||
| 2478 | tempfree(x)do { if (((x)->csub == 4)) tfree(x); } while ( 0); | |||
| 2479 | x = gettemp(); | |||
| 2480 | setfval(x, (Awkfloat) (stat ? -1 : 0)); | |||
| 2481 | return(x); | |||
| 2482 | } | |||
| 2483 | ||||
| 2484 | void closeall(void) | |||
| 2485 | { | |||
| 2486 | size_t i; | |||
| 2487 | bool_Bool stat = false0; | |||
| 2488 | ||||
| 2489 | for (i = 0; i < nfiles; i++) { | |||
| 2490 | if (! files[i].fp) | |||
| 2491 | continue; | |||
| 2492 | if (files[i].mode == GT285 || files[i].mode == '|') | |||
| 2493 | fflush(files[i].fp); | |||
| 2494 | if (ferror(files[i].fp)(!__isthreaded ? (((files[i].fp)->_flags & 0x0040) != 0 ) : (ferror)(files[i].fp))) { | |||
| 2495 | if ((files[i].mode == GT285 && files[i].fp != stderr(&__sF[2])) | |||
| 2496 | || files[i].mode == '|') | |||
| 2497 | FATAL("write error on %s", files[i].fname); | |||
| 2498 | else | |||
| 2499 | WARNING("i/o error occurred on %s", files[i].fname); | |||
| 2500 | } | |||
| 2501 | if (files[i].fp == stdin(&__sF[0]) || files[i].fp == stdout(&__sF[1]) || | |||
| 2502 | files[i].fp == stderr(&__sF[2])) | |||
| 2503 | continue; | |||
| 2504 | if (files[i].mode == '|' || files[i].mode == LE286) | |||
| 2505 | stat = pclose(files[i].fp) == -1; | |||
| 2506 | else | |||
| 2507 | stat = fclose(files[i].fp) == EOF(-1); | |||
| 2508 | if (stat) | |||
| 2509 | WARNING("i/o error occurred while closing %s", files[i].fname); | |||
| 2510 | } | |||
| 2511 | } | |||
| 2512 | ||||
| 2513 | static void flush_all(void) | |||
| 2514 | { | |||
| 2515 | size_t i; | |||
| 2516 | ||||
| 2517 | for (i = 0; i < nfiles; i++) | |||
| 2518 | if (files[i].fp) | |||
| 2519 | fflush(files[i].fp); | |||
| 2520 | } | |||
| 2521 | ||||
| 2522 | void backsub(char **pb_ptr, const char **sptr_ptr); | |||
| 2523 | ||||
| 2524 | Cell *dosub(Node **a, int subop) /* sub and gsub */ | |||
| 2525 | { | |||
| 2526 | fa *pfa; | |||
| 2527 | int tempstat; | |||
| 2528 | char *repl; | |||
| 2529 | Cell *x; | |||
| 2530 | ||||
| 2531 | char *buf = NULL((void *)0); | |||
| 2532 | char *pb = NULL((void *)0); | |||
| 2533 | int bufsz = recsize; | |||
| 2534 | ||||
| 2535 | const char *r, *s; | |||
| 2536 | const char *start; | |||
| 2537 | const char *noempty = NULL((void *)0); /* empty match disallowed here */ | |||
| 2538 | size_t m = 0; /* match count */ | |||
| 2539 | size_t whichm; /* which match to select, 0 = global */ | |||
| 2540 | int mtype; /* match type */ | |||
| 2541 | ||||
| 2542 | if (a[0] == NULL((void *)0)) { /* 0 => a[1] is already-compiled regexpr */ | |||
| ||||
| 2543 | pfa = (fa *) a[1]; | |||
| 2544 | } else { | |||
| 2545 | x = execute(a[1]); | |||
| 2546 | pfa = makedfa(getsval(x), 1); | |||
| 2547 | tempfree(x)do { if (((x)->csub == 4)) tfree(x); } while ( 0); | |||
| 2548 | } | |||
| 2549 | ||||
| 2550 | x = execute(a[2]); /* replacement string */ | |||
| 2551 | repl = tostring(getsval(x)); | |||
| 2552 | tempfree(x)do { if (((x)->csub == 4)) tfree(x); } while ( 0); | |||
| 2553 | ||||
| 2554 | switch (subop) { | |||
| 2555 | case SUB301: | |||
| 2556 | whichm = 1; | |||
| 2557 | x = execute(a[3]); /* source string */ | |||
| 2558 | break; | |||
| 2559 | case GSUB302: | |||
| 2560 | whichm = 0; | |||
| 2561 | x = execute(a[3]); /* source string */ | |||
| 2562 | break; | |||
| 2563 | default: | |||
| 2564 | FATAL("dosub: unrecognized subop: %d", subop); | |||
| 2565 | } | |||
| 2566 | ||||
| 2567 | start = getsval(x); | |||
| 2568 | while (pmatch(pfa, start)) { | |||
| 2569 | if (buf
| |||
| 2570 | if ((pb = buf = malloc(bufsz)) == NULL((void *)0)) | |||
| 2571 | FATAL("out of memory in dosub"); | |||
| 2572 | tempstat = pfa->initstat; | |||
| 2573 | pfa->initstat = 2; | |||
| 2574 | } | |||
| 2575 | ||||
| 2576 | /* match types */ | |||
| 2577 | #define MT_IGNORE 0 /* unselected or invalid */ | |||
| 2578 | #define MT_INSERT 1 /* selected, empty */ | |||
| 2579 | #define MT_REPLACE 2 /* selected, not empty */ | |||
| 2580 | ||||
| 2581 | /* an empty match just after replacement is invalid */ | |||
| 2582 | ||||
| 2583 | if (patbeg == noempty && patlen == 0) { | |||
| 2584 | mtype = MT_IGNORE; /* invalid, not counted */ | |||
| 2585 | } else if (whichm == ++m || whichm == 0) { | |||
| 2586 | mtype = patlen
| |||
| 2587 | } else { | |||
| 2588 | mtype = MT_IGNORE; /* unselected, but counted */ | |||
| 2589 | } | |||
| 2590 | ||||
| 2591 | /* leading text: */ | |||
| 2592 | if (patbeg > start) { | |||
| 2593 | adjbuf(&buf, &bufsz, (pb - buf) + (patbeg - start), | |||
| 2594 | recsize, &pb, "dosub"); | |||
| 2595 | s = start; | |||
| 2596 | while (s < patbeg) | |||
| 2597 | *pb++ = *s++; | |||
| 2598 | } | |||
| 2599 | ||||
| 2600 | if (mtype
| |||
| 2601 | goto matching_text; /* skip replacement text */ | |||
| 2602 | ||||
| 2603 | r = repl; | |||
| 2604 | while (*r != 0) { | |||
| 2605 | adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "dosub"); | |||
| 2606 | if (*r == '\\') { | |||
| 2607 | backsub(&pb, &r); | |||
| 2608 | } else if (*r == '&') { | |||
| 2609 | r++; | |||
| 2610 | adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, | |||
| 2611 | &pb, "dosub"); | |||
| 2612 | for (s = patbeg; s < patbeg+patlen; ) | |||
| 2613 | *pb++ = *s++; | |||
| 2614 | } else { | |||
| 2615 | *pb++ = *r++; | |||
| 2616 | } | |||
| 2617 | } | |||
| 2618 | ||||
| 2619 | matching_text: | |||
| 2620 | if (mtype
| |||
| 2621 | goto next_search; /* skip matching text */ | |||
| 2622 | ||||
| 2623 | if (patlen == 0) | |||
| 2624 | patlen = u8_nextlen(patbeg); | |||
| 2625 | adjbuf(&buf, &bufsz, (pb-buf) + patlen, recsize, &pb, "dosub"); | |||
| 2626 | s = patbeg; | |||
| 2627 | while (s < patbeg + patlen) | |||
| 2628 | *pb++ = *s++; | |||
| 2629 | ||||
| 2630 | next_search: | |||
| 2631 | start = patbeg + patlen; | |||
| 2632 | if (m
| |||
| 2633 | break; | |||
| 2634 | if (mtype == MT_REPLACE) | |||
| 2635 | noempty = start; | |||
| 2636 | ||||
| 2637 | #undef MT_IGNORE | |||
| 2638 | #undef MT_INSERT | |||
| 2639 | #undef MT_REPLACE | |||
| 2640 | } | |||
| 2641 | ||||
| 2642 | xfree(repl){ free((void *)(intptr_t)(repl)); (repl) = ((void *)0); }; | |||
| 2643 | ||||
| 2644 | if (buf
| |||
| 2645 | pfa->initstat = tempstat; | |||
| 2646 | ||||
| 2647 | /* trailing text */ | |||
| 2648 | adjbuf(&buf, &bufsz, 1+strlen(start)+pb-buf, 0, &pb, "dosub"); | |||
| ||||
| 2649 | while ((*pb++ = *start++) != '\0') | |||
| 2650 | ; | |||
| 2651 | ||||
| 2652 | setsval(x, buf); | |||
| 2653 | free(buf); | |||
| 2654 | } | |||
| 2655 | ||||
| 2656 | tempfree(x)do { if (((x)->csub == 4)) tfree(x); } while ( 0); | |||
| 2657 | x = gettemp(); | |||
| 2658 | x->tval = NUM01; | |||
| 2659 | x->fval = m; | |||
| 2660 | return x; | |||
| 2661 | } | |||
| 2662 | ||||
| 2663 | Cell *gensub(Node **a, int nnn) /* global selective substitute */ | |||
| 2664 | /* XXX incomplete - doesn't support backreferences \0 ... \9 */ | |||
| 2665 | { | |||
| 2666 | Cell *x, *y, *res, *h; | |||
| 2667 | char *rptr; | |||
| 2668 | const char *sptr; | |||
| 2669 | char *buf, *pb; | |||
| 2670 | const char *t, *q; | |||
| 2671 | fa *pfa; | |||
| 2672 | int mflag, tempstat, num, whichm; | |||
| 2673 | int bufsz = recsize; | |||
| 2674 | ||||
| 2675 | if ((buf = malloc(bufsz)) == NULL((void *)0)) | |||
| 2676 | FATAL("out of memory in gensub"); | |||
| 2677 | mflag = 0; /* if mflag == 0, can replace empty string */ | |||
| 2678 | num = 0; | |||
| 2679 | x = execute(a[4]); /* source string */ | |||
| 2680 | t = getsval(x); | |||
| 2681 | res = copycell(x); /* target string - initially copy of source */ | |||
| 2682 | res->csub = CTEMP4; /* result values are temporary */ | |||
| 2683 | if (a[0] == 0) /* 0 => a[1] is already-compiled regexpr */ | |||
| 2684 | pfa = (fa *) a[1]; /* regular expression */ | |||
| 2685 | else { | |||
| 2686 | y = execute(a[1]); | |||
| 2687 | pfa = makedfa(getsval(y), 1); | |||
| 2688 | tempfree(y)do { if (((y)->csub == 4)) tfree(y); } while ( 0); | |||
| 2689 | } | |||
| 2690 | y = execute(a[2]); /* replacement string */ | |||
| 2691 | h = execute(a[3]); /* which matches should be replaced */ | |||
| 2692 | sptr = getsval(h); | |||
| 2693 | if (sptr[0] == 'g' || sptr[0] == 'G') | |||
| 2694 | whichm = -1; | |||
| 2695 | else { | |||
| 2696 | /* | |||
| 2697 | * The specified number is index of replacement, starting | |||
| 2698 | * from 1. GNU awk treats index lower than 0 same as | |||
| 2699 | * 1, we do same for compatibility. | |||
| 2700 | */ | |||
| 2701 | whichm = (int) getfval(h) - 1; | |||
| 2702 | if (whichm < 0) | |||
| 2703 | whichm = 0; | |||
| 2704 | } | |||
| 2705 | tempfree(h)do { if (((h)->csub == 4)) tfree(h); } while ( 0); | |||
| 2706 | ||||
| 2707 | if (pmatch(pfa, t)) { | |||
| 2708 | char *sl; | |||
| 2709 | ||||
| 2710 | tempstat = pfa->initstat; | |||
| 2711 | pfa->initstat = 2; | |||
| 2712 | pb = buf; | |||
| 2713 | rptr = getsval(y); | |||
| 2714 | /* | |||
| 2715 | * XXX if there are any backreferences in subst string, | |||
| 2716 | * complain now. | |||
| 2717 | */ | |||
| 2718 | for (sl = rptr; (sl = strchr(sl, '\\')) && sl[1]; sl++) { | |||
| 2719 | if (strchr("0123456789", sl[1])) { | |||
| 2720 | FATAL("gensub doesn't support backreferences (subst \"%s\")", rptr); | |||
| 2721 | } | |||
| 2722 | } | |||
| 2723 | ||||
| 2724 | do { | |||
| 2725 | if (whichm >= 0 && whichm != num) { | |||
| 2726 | num++; | |||
| 2727 | adjbuf(&buf, &bufsz, (pb - buf) + (patbeg - t) + patlen, recsize, &pb, "gensub"); | |||
| 2728 | ||||
| 2729 | /* copy the part of string up to and including | |||
| 2730 | * match to output buffer */ | |||
| 2731 | while (t < patbeg + patlen) | |||
| 2732 | *pb++ = *t++; | |||
| 2733 | continue; | |||
| 2734 | } | |||
| 2735 | ||||
| 2736 | if (patlen == 0 && *patbeg != 0) { /* matched empty string */ | |||
| 2737 | if (mflag == 0) { /* can replace empty */ | |||
| 2738 | num++; | |||
| 2739 | sptr = rptr; | |||
| 2740 | while (*sptr != 0) { | |||
| 2741 | adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "gensub"); | |||
| 2742 | if (*sptr == '\\') { | |||
| 2743 | backsub(&pb, &sptr); | |||
| 2744 | } else if (*sptr == '&') { | |||
| 2745 | sptr++; | |||
| 2746 | adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "gensub"); | |||
| 2747 | for (q = patbeg; q < patbeg+patlen; ) | |||
| 2748 | *pb++ = *q++; | |||
| 2749 | } else | |||
| 2750 | *pb++ = *sptr++; | |||
| 2751 | } | |||
| 2752 | } | |||
| 2753 | if (*t == 0) /* at end */ | |||
| 2754 | goto done; | |||
| 2755 | adjbuf(&buf, &bufsz, 2+pb-buf, recsize, &pb, "gensub"); | |||
| 2756 | *pb++ = *t++; | |||
| 2757 | if (pb > buf + bufsz) /* BUG: not sure of this test */ | |||
| 2758 | FATAL("gensub result0 %.30s too big; can't happen", buf); | |||
| 2759 | mflag = 0; | |||
| 2760 | } | |||
| 2761 | else { /* matched nonempty string */ | |||
| 2762 | num++; | |||
| 2763 | sptr = t; | |||
| 2764 | adjbuf(&buf, &bufsz, 1+(patbeg-sptr)+pb-buf, recsize, &pb, "gensub"); | |||
| 2765 | while (sptr < patbeg) | |||
| 2766 | *pb++ = *sptr++; | |||
| 2767 | sptr = rptr; | |||
| 2768 | while (*sptr != 0) { | |||
| 2769 | adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "gensub"); | |||
| 2770 | if (*sptr == '\\') { | |||
| 2771 | backsub(&pb, &sptr); | |||
| 2772 | } else if (*sptr == '&') { | |||
| 2773 | sptr++; | |||
| 2774 | adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "gensub"); | |||
| 2775 | for (q = patbeg; q < patbeg+patlen; ) | |||
| 2776 | *pb++ = *q++; | |||
| 2777 | } else | |||
| 2778 | *pb++ = *sptr++; | |||
| 2779 | } | |||
| 2780 | t = patbeg + patlen; | |||
| 2781 | if (patlen == 0 || *t == 0 || *(t-1) == 0) | |||
| 2782 | goto done; | |||
| 2783 | if (pb > buf + bufsz) | |||
| 2784 | FATAL("gensub result1 %.30s too big; can't happen", buf); | |||
| 2785 | mflag = 1; | |||
| 2786 | } | |||
| 2787 | } while (pmatch(pfa,t)); | |||
| 2788 | sptr = t; | |||
| 2789 | adjbuf(&buf, &bufsz, 1+strlen(sptr)+pb-buf, 0, &pb, "gensub"); | |||
| 2790 | while ((*pb++ = *sptr++) != 0) | |||
| 2791 | ; | |||
| 2792 | done: if (pb > buf + bufsz) | |||
| 2793 | FATAL("gensub result2 %.30s too big; can't happen", buf); | |||
| 2794 | *pb = '\0'; | |||
| 2795 | setsval(res, buf); | |||
| 2796 | pfa->initstat = tempstat; | |||
| 2797 | } | |||
| 2798 | tempfree(x)do { if (((x)->csub == 4)) tfree(x); } while ( 0); | |||
| 2799 | tempfree(y)do { if (((y)->csub == 4)) tfree(y); } while ( 0); | |||
| 2800 | free(buf); | |||
| 2801 | return(res); | |||
| 2802 | } | |||
| 2803 | ||||
| 2804 | void backsub(char **pb_ptr, const char **sptr_ptr) /* handle \\& variations */ | |||
| 2805 | { /* sptr[0] == '\\' */ | |||
| 2806 | char *pb = *pb_ptr; | |||
| 2807 | const char *sptr = *sptr_ptr; | |||
| 2808 | ||||
| 2809 | if (sptr[1] == '\\') { | |||
| 2810 | if (sptr[2] == '\\' && sptr[3] == '&') { /* \\\& -> \& */ | |||
| 2811 | *pb++ = '\\'; | |||
| 2812 | *pb++ = '&'; | |||
| 2813 | sptr += 4; | |||
| 2814 | } else if (sptr[2] == '&') { /* \\& -> \ + matched */ | |||
| 2815 | *pb++ = '\\'; | |||
| 2816 | sptr += 2; | |||
| 2817 | } else if (do_posix) { /* \\x -> \x */ | |||
| 2818 | sptr++; | |||
| 2819 | *pb++ = *sptr++; | |||
| 2820 | } else { /* \\x -> \\x */ | |||
| 2821 | *pb++ = *sptr++; | |||
| 2822 | *pb++ = *sptr++; | |||
| 2823 | } | |||
| 2824 | } else if (sptr[1] == '&') { /* literal & */ | |||
| 2825 | sptr++; | |||
| 2826 | *pb++ = *sptr++; | |||
| 2827 | } else /* literal \ */ | |||
| 2828 | *pb++ = *sptr++; | |||
| 2829 | ||||
| 2830 | *pb_ptr = pb; | |||
| 2831 | *sptr_ptr = sptr; | |||
| 2832 | } | |||
| 2833 | ||||
| 2834 | static char *wide_char_to_byte_str(int rune, size_t *outlen) | |||
| 2835 | { | |||
| 2836 | static char buf[5]; | |||
| 2837 | int len; | |||
| 2838 | ||||
| 2839 | if (rune < 0 || rune > 0x10FFFF) | |||
| 2840 | return NULL((void *)0); | |||
| 2841 | ||||
| 2842 | memset(buf, 0, sizeof(buf)); | |||
| 2843 | ||||
| 2844 | len = 0; | |||
| 2845 | if (rune <= 0x0000007F) { | |||
| 2846 | buf[len++] = rune; | |||
| 2847 | } else if (rune <= 0x000007FF) { | |||
| 2848 | // 110xxxxx 10xxxxxx | |||
| 2849 | buf[len++] = 0xC0 | (rune >> 6); | |||
| 2850 | buf[len++] = 0x80 | (rune & 0x3F); | |||
| 2851 | } else if (rune <= 0x0000FFFF) { | |||
| 2852 | // 1110xxxx 10xxxxxx 10xxxxxx | |||
| 2853 | buf[len++] = 0xE0 | (rune >> 12); | |||
| 2854 | buf[len++] = 0x80 | ((rune >> 6) & 0x3F); | |||
| 2855 | buf[len++] = 0x80 | (rune & 0x3F); | |||
| 2856 | ||||
| 2857 | } else { | |||
| 2858 | // 0x00010000 - 0x10FFFF | |||
| 2859 | // 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx | |||
| 2860 | buf[len++] = 0xF0 | (rune >> 18); | |||
| 2861 | buf[len++] = 0x80 | ((rune >> 12) & 0x3F); | |||
| 2862 | buf[len++] = 0x80 | ((rune >> 6) & 0x3F); | |||
| 2863 | buf[len++] = 0x80 | (rune & 0x3F); | |||
| 2864 | } | |||
| 2865 | ||||
| 2866 | *outlen = len; | |||
| 2867 | buf[len++] = '\0'; | |||
| 2868 | ||||
| 2869 | return buf; | |||
| 2870 | } |