Bug Summary

File:src/lib/libc/regex/regcomp.c
Warning:line 1334, column 11
Assigned value is garbage or undefined

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple amd64-unknown-openbsd7.0 -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name regcomp.c -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -mrelocation-model pic -pic-level 1 -fhalf-no-semantic-interposition -mframe-pointer=all -relaxed-aliasing -fno-rounding-math -mconstructor-aliases -munwind-tables -target-cpu x86-64 -target-feature +retpoline-indirect-calls -target-feature +retpoline-indirect-branches -tune-cpu generic -debugger-tuning=gdb -fcoverage-compilation-dir=/usr/src/lib/libc/obj -resource-dir /usr/local/lib/clang/13.0.0 -include namespace.h -I /usr/src/lib/libc/include -I /usr/src/lib/libc/hidden -D __LIBC__ -D APIWARN -D YP -I /usr/src/lib/libc/yp -I /usr/src/lib/libc -I /usr/src/lib/libc/gdtoa -I /usr/src/lib/libc/arch/amd64/gdtoa -D INFNAN_CHECK -D MULTIPLE_THREADS -D NO_FENV_H -D USE_LOCALE -I /usr/src/lib/libc -I /usr/src/lib/libc/citrus -D RESOLVSORT -D FLOATING_POINT -D PRINTF_WIDE_CHAR -D SCANF_WIDE_CHAR -D FUTEX -D PIC -internal-isystem /usr/local/lib/clang/13.0.0/include -internal-externc-isystem /usr/include -O2 -fdebug-compilation-dir=/usr/src/lib/libc/obj -ferror-limit 19 -fwrapv -D_RET_PROTECTOR -ret-protector -fgnuc-version=4.2.1 -vectorize-loops -vectorize-slp -fno-builtin-malloc -fno-builtin-calloc -fno-builtin-realloc -fno-builtin-valloc -fno-builtin-free -fno-builtin-strdup -fno-builtin-strndup -analyzer-output=html -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /home/ben/Projects/vmm/scan-build/2022-01-12-194120-40624-1 -x c /usr/src/lib/libc/regex/regcomp.c
1/* $OpenBSD: regcomp.c,v 1.43 2021/01/03 17:07:57 tb Exp $ */
2/*-
3 * Copyright (c) 1992, 1993, 1994 Henry Spencer.
4 * Copyright (c) 1992, 1993, 1994
5 * The Regents of the University of California. All rights reserved.
6 *
7 * This code is derived from software contributed to Berkeley by
8 * Henry Spencer.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. Neither the name of the University nor the names of its contributors
19 * may be used to endorse or promote products derived from this software
20 * without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 *
34 * @(#)regcomp.c 8.5 (Berkeley) 3/20/94
35 */
36
37#include <sys/types.h>
38#include <stdio.h>
39#include <string.h>
40#include <ctype.h>
41#include <limits.h>
42#include <stdlib.h>
43#include <regex.h>
44
45#include "utils.h"
46#include "regex2.h"
47
48#include "cclass.h"
49#include "cname.h"
50
51/*
52 * parse structure, passed up and down to avoid global variables and
53 * other clumsinesses
54 */
55struct parse {
56 const char *next; /* next character in RE */
57 const char *end; /* end of string (-> NUL normally) */
58 int error; /* has an error been seen? */
59 sop *strip; /* malloced strip */
60 sopno ssize; /* malloced strip size (allocated) */
61 sopno slen; /* malloced strip length (used) */
62 int ncsalloc; /* number of csets allocated */
63 struct re_guts *g;
64# define NPAREN10 10 /* we need to remember () 1-9 for back refs */
65 sopno pbegin[NPAREN10]; /* -> ( ([0] unused) */
66 sopno pend[NPAREN10]; /* -> ) ([0] unused) */
67};
68
69static void p_ere(struct parse *, int);
70static void p_ere_exp(struct parse *);
71static void p_str(struct parse *);
72static void p_bre(struct parse *, int, int);
73static int p_simp_re(struct parse *, int);
74static int p_count(struct parse *);
75static void p_bracket(struct parse *);
76static void p_b_term(struct parse *, cset *);
77static void p_b_cclass(struct parse *, cset *);
78static void p_b_eclass(struct parse *, cset *);
79static char p_b_symbol(struct parse *);
80static char p_b_coll_elem(struct parse *, int);
81static char othercase(int);
82static void bothcases(struct parse *, int);
83static void ordinary(struct parse *, int);
84static void backslash(struct parse *, int);
85static void nonnewline(struct parse *);
86static void repeat(struct parse *, sopno, int, int);
87static void seterr(struct parse *, int);
88static cset *allocset(struct parse *);
89static void freeset(struct parse *, cset *);
90static int freezeset(struct parse *, cset *);
91static int firstch(struct parse *, cset *);
92static int nch(struct parse *, cset *);
93static sopno dupl(struct parse *, sopno, sopno);
94static void doemit(struct parse *, sop, size_t);
95static void doinsert(struct parse *, sop, size_t, sopno);
96static void dofwd(struct parse *, sopno, sop);
97static int enlarge(struct parse *, sopno);
98static void stripsnug(struct parse *, struct re_guts *);
99static void findmust(struct parse *, struct re_guts *);
100static sopno pluscount(struct parse *, struct re_guts *);
101
102static char nuls[10]; /* place to point scanner in event of error */
103
104/*
105 * macros for use with parse structure
106 * BEWARE: these know that the parse structure is named `p' !!!
107 */
108#define PEEK()(*p->next) (*p->next)
109#define PEEK2()(*(p->next+1)) (*(p->next+1))
110#define MORE()(p->end - p->next > 0) (p->end - p->next > 0)
111#define MORE2()(p->end - p->next > 1) (p->end - p->next > 1)
112#define SEE(c)((p->end - p->next > 0) && (*p->next) == (
c))
(MORE()(p->end - p->next > 0) && PEEK()(*p->next) == (c))
113#define SEETWO(a, b)((p->end - p->next > 1) && (*p->next) == (
a) && (*(p->next+1)) == (b))
(MORE2()(p->end - p->next > 1) && PEEK()(*p->next) == (a) && PEEK2()(*(p->next+1)) == (b))
114#define EAT(c)((((p->end - p->next > 0) && (*p->next) ==
(c))) ? ((p->next++), 1) : 0)
((SEE(c)((p->end - p->next > 0) && (*p->next) == (
c))
) ? (NEXT()(p->next++), 1) : 0)
115#define EATTWO(a, b)((((p->end - p->next > 1) && (*p->next) ==
(a) && (*(p->next+1)) == (b))) ? ((p->next += 2
), 1) : 0)
((SEETWO(a, b)((p->end - p->next > 1) && (*p->next) == (
a) && (*(p->next+1)) == (b))
) ? (NEXT2()(p->next += 2), 1) : 0)
116#define NEXT()(p->next++) (p->next++)
117#define NEXT2()(p->next += 2) (p->next += 2)
118#define NEXTn(n)(p->next += (n)) (p->next += (n))
119#define GETNEXT()(*p->next++) (*p->next++)
120#define SETERROR(e)seterr(p, (e)) seterr(p, (e))
121#define REQUIRE(co, e)do { if (!(co)) seterr(p, (e)); } while (0) do { if (!(co)) SETERROR(e)seterr(p, (e)); } while (0)
122#define EMIT(op, sopnd)doemit(p, (sop)(op), (size_t)(sopnd)) doemit(p, (sop)(op), (size_t)(sopnd))
123#define INSERT(op, pos)doinsert(p, (sop)(op), (p->slen)-(pos)+1, pos) doinsert(p, (sop)(op), HERE()(p->slen)-(pos)+1, pos)
124#define AHEAD(pos)dofwd(p, pos, (p->slen)-(pos)) dofwd(p, pos, HERE()(p->slen)-(pos))
125#define ASTERN(sop, pos)doemit(p, (sop)(sop), (size_t)((p->slen)-pos)) EMIT(sop, HERE()-pos)doemit(p, (sop)(sop), (size_t)((p->slen)-pos))
126#define HERE()(p->slen) (p->slen)
127#define THERE()(p->slen - 1) (p->slen - 1)
128#define THERETHERE()(p->slen - 2) (p->slen - 2)
129#define DROP(n)(p->slen -= (n)) (p->slen -= (n))
130
131#ifndef NDEBUG
132static int never0 = 0; /* for use in asserts; shuts lint up */
133#else
134#define never0 0 /* some <assert.h>s have bugs too */
135#endif
136
137/*
138 - regcomp - interface for parser and compilation
139 */
140int /* 0 success, otherwise REG_something */
141regcomp(regex_t *preg, const char *pattern, int cflags)
142{
143 struct parse pa;
144 struct re_guts *g;
145 struct parse *p = &pa;
146 int i;
147 size_t len;
148#ifdef REDEBUG
149# define GOODFLAGS(f)((f)&~0200) (f)
150#else
151# define GOODFLAGS(f)((f)&~0200) ((f)&~REG_DUMP0200)
152#endif
153
154 cflags = GOODFLAGS(cflags)((cflags)&~0200);
155 if ((cflags&REG_EXTENDED0001) && (cflags&REG_NOSPEC0020))
156 return(REG_INVARG16);
157
158 if (cflags&REG_PEND0040) {
159 if (preg->re_endp < pattern)
160 return(REG_INVARG16);
161 len = preg->re_endp - pattern;
162 } else
163 len = strlen((char *)pattern);
164
165 /* do the mallocs early so failure handling is easy */
166 g = malloc(sizeof(struct re_guts));
167 if (g == NULL((void *)0))
168 return(REG_ESPACE12);
169 p->ssize = len/(size_t)2*(size_t)3 + (size_t)1; /* ugh */
170 p->strip = reallocarray(NULL((void *)0), p->ssize, sizeof(sop));
171 p->slen = 0;
172 if (p->strip == NULL((void *)0)) {
173 free(g);
174 return(REG_ESPACE12);
175 }
176
177 /* set things up */
178 p->g = g;
179 p->next = pattern;
180 p->end = p->next + len;
181 p->error = 0;
182 p->ncsalloc = 0;
183 for (i = 0; i < NPAREN10; i++) {
184 p->pbegin[i] = 0;
185 p->pend[i] = 0;
186 }
187 g->csetsize = NC(127 - (-127 -1) + 1);
188 g->sets = NULL((void *)0);
189 g->setbits = NULL((void *)0);
190 g->ncsets = 0;
191 g->cflags = cflags;
192 g->iflags = 0;
193 g->nbol = 0;
194 g->neol = 0;
195 g->must = NULL((void *)0);
196 g->mlen = 0;
197 g->nsub = 0;
198 g->backrefs = 0;
199
200 /* do it */
201 EMIT(OEND, 0)doemit(p, (sop)((1LU<<((unsigned)27))), (size_t)(0));
202 g->firststate = THERE()(p->slen - 1);
203 if (cflags&REG_EXTENDED0001)
204 p_ere(p, OUT(127 +1));
205 else if (cflags&REG_NOSPEC0020)
206 p_str(p);
207 else
208 p_bre(p, OUT(127 +1), OUT(127 +1));
209 EMIT(OEND, 0)doemit(p, (sop)((1LU<<((unsigned)27))), (size_t)(0));
210 g->laststate = THERE()(p->slen - 1);
211
212 /* tidy up loose ends and fill things in */
213 stripsnug(p, g);
214 findmust(p, g);
215 g->nplus = pluscount(p, g);
216 g->magic = MAGIC2((('R'^0200)<<8)|'E');
217 preg->re_nsub = g->nsub;
218 preg->re_g = g;
219 preg->re_magic = MAGIC1((('r'^0200)<<8) | 'e');
220#ifndef REDEBUG
221 /* not debugging, so can't rely on the assert() in regexec() */
222 if (g->iflags&BAD04)
223 SETERROR(REG_ASSERT)seterr(p, (15));
224#endif
225
226 /* win or lose, we're done */
227 if (p->error != 0) /* lose */
228 regfree(preg);
229 return(p->error);
230}
231
232/*
233 - p_ere - ERE parser top level, concatenation and alternation
234 */
235static void
236p_ere(struct parse *p, int stop) /* character this ERE should end at */
237{
238 char c;
239 sopno prevback;
240 sopno prevfwd;
241 sopno conc;
242 int first = 1; /* is this the first alternative? */
243
244 for (;;) {
245 /* do a bunch of concatenated expressions */
246 conc = HERE()(p->slen);
247 while (MORE()(p->end - p->next > 0) && (c = PEEK()(*p->next)) != '|' && c != stop)
248 p_ere_exp(p);
249 REQUIRE(HERE() != conc, REG_EMPTY)do { if (!((p->slen) != conc)) seterr(p, (14)); } while (0
)
; /* require nonempty */
250
251 if (!EAT('|')((((p->end - p->next > 0) && (*p->next) ==
('|'))) ? ((p->next++), 1) : 0)
)
252 break; /* NOTE BREAK OUT */
253
254 if (first) {
255 INSERT(OCH_, conc)doinsert(p, (sop)((15LU<<((unsigned)27))), (p->slen)
-(conc)+1, conc)
; /* offset is wrong */
256 prevfwd = conc;
257 prevback = conc;
258 first = 0;
259 }
260 ASTERN(OOR1, prevback)doemit(p, (sop)((16LU<<((unsigned)27))), (size_t)((p->
slen)-prevback))
;
261 prevback = THERE()(p->slen - 1);
262 AHEAD(prevfwd)dofwd(p, prevfwd, (p->slen)-(prevfwd)); /* fix previous offset */
263 prevfwd = HERE()(p->slen);
264 EMIT(OOR2, 0)doemit(p, (sop)((17LU<<((unsigned)27))), (size_t)(0)); /* offset is very wrong */
265 }
266
267 if (!first) { /* tail-end fixups */
268 AHEAD(prevfwd)dofwd(p, prevfwd, (p->slen)-(prevfwd));
269 ASTERN(O_CH, prevback)doemit(p, (sop)((18LU<<((unsigned)27))), (size_t)((p->
slen)-prevback))
;
270 }
271
272 assert(!MORE() || SEE(stop))((void)0);
273}
274
275/*
276 - p_ere_exp - parse one subERE, an atom possibly followed by a repetition op
277 */
278static void
279p_ere_exp(struct parse *p)
280{
281 char c;
282 sopno pos;
283 int count;
284 int count2;
285 sopno subno;
286 int wascaret = 0;
287
288 assert(MORE())((void)0); /* caller should have ensured this */
289 c = GETNEXT()(*p->next++);
290
291 pos = HERE()(p->slen);
292 switch (c) {
293 case '(':
294 REQUIRE(MORE(), REG_EPAREN)do { if (!((p->end - p->next > 0))) seterr(p, (8)); }
while (0)
;
295 p->g->nsub++;
296 subno = p->g->nsub;
297 if (subno < NPAREN10)
298 p->pbegin[subno] = HERE()(p->slen);
299 EMIT(OLPAREN, subno)doemit(p, (sop)((13LU<<((unsigned)27))), (size_t)(subno
))
;
300 if (!SEE(')')((p->end - p->next > 0) && (*p->next) == (
')'))
)
301 p_ere(p, ')');
302 if (subno < NPAREN10) {
303 p->pend[subno] = HERE()(p->slen);
304 assert(p->pend[subno] != 0)((void)0);
305 }
306 EMIT(ORPAREN, subno)doemit(p, (sop)((14LU<<((unsigned)27))), (size_t)(subno
))
;
307 REQUIRE(MORE() && GETNEXT() == ')', REG_EPAREN)do { if (!((p->end - p->next > 0) && (*p->
next++) == ')')) seterr(p, (8)); } while (0)
;
308 break;
309 case '^':
310 EMIT(OBOL, 0)doemit(p, (sop)((3LU<<((unsigned)27))), (size_t)(0));
311 p->g->iflags |= USEBOL01;
312 p->g->nbol++;
313 wascaret = 1;
314 break;
315 case '$':
316 EMIT(OEOL, 0)doemit(p, (sop)((4LU<<((unsigned)27))), (size_t)(0));
317 p->g->iflags |= USEEOL02;
318 p->g->neol++;
319 break;
320 case '|':
321 SETERROR(REG_EMPTY)seterr(p, (14));
322 break;
323 case '*':
324 case '+':
325 case '?':
326 SETERROR(REG_BADRPT)seterr(p, (13));
327 break;
328 case '.':
329 if (p->g->cflags&REG_NEWLINE0010)
330 nonnewline(p);
331 else
332 EMIT(OANY, 0)doemit(p, (sop)((5LU<<((unsigned)27))), (size_t)(0));
333 break;
334 case '[':
335 p_bracket(p);
336 break;
337 case '\\':
338 REQUIRE(MORE(), REG_EESCAPE)do { if (!((p->end - p->next > 0))) seterr(p, (5)); }
while (0)
;
339 c = GETNEXT()(*p->next++);
340 backslash(p, c);
341 break;
342 case '{': /* okay as ordinary except if digit follows */
343 REQUIRE(!MORE() || !isdigit((uch)PEEK()), REG_BADRPT)do { if (!(!(p->end - p->next > 0) || !isdigit((uch)
(*p->next)))) seterr(p, (13)); } while (0)
;
344 /* FALLTHROUGH */
345 default:
346 if (p->error != 0)
347 return;
348 ordinary(p, c);
349 break;
350 }
351
352 if (!MORE()(p->end - p->next > 0))
353 return;
354 c = PEEK()(*p->next);
355 /* we call { a repetition if followed by a digit */
356 if (!( c == '*' || c == '+' || c == '?' ||
357 (c == '{' && MORE2()(p->end - p->next > 1) && isdigit((uch)PEEK2()(*(p->next+1)))) ))
358 return; /* no repetition, we're done */
359 NEXT()(p->next++);
360
361 REQUIRE(!wascaret, REG_BADRPT)do { if (!(!wascaret)) seterr(p, (13)); } while (0);
362 switch (c) {
363 case '*': /* implemented as +? */
364 /* this case does not require the (y|) trick, noKLUDGE */
365 INSERT(OPLUS_, pos)doinsert(p, (sop)((9LU<<((unsigned)27))), (p->slen)-
(pos)+1, pos)
;
366 ASTERN(O_PLUS, pos)doemit(p, (sop)((10LU<<((unsigned)27))), (size_t)((p->
slen)-pos))
;
367 INSERT(OQUEST_, pos)doinsert(p, (sop)((11LU<<((unsigned)27))), (p->slen)
-(pos)+1, pos)
;
368 ASTERN(O_QUEST, pos)doemit(p, (sop)((12LU<<((unsigned)27))), (size_t)((p->
slen)-pos))
;
369 break;
370 case '+':
371 INSERT(OPLUS_, pos)doinsert(p, (sop)((9LU<<((unsigned)27))), (p->slen)-
(pos)+1, pos)
;
372 ASTERN(O_PLUS, pos)doemit(p, (sop)((10LU<<((unsigned)27))), (size_t)((p->
slen)-pos))
;
373 break;
374 case '?':
375 /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */
376 INSERT(OCH_, pos)doinsert(p, (sop)((15LU<<((unsigned)27))), (p->slen)
-(pos)+1, pos)
; /* offset slightly wrong */
377 ASTERN(OOR1, pos)doemit(p, (sop)((16LU<<((unsigned)27))), (size_t)((p->
slen)-pos))
; /* this one's right */
378 AHEAD(pos)dofwd(p, pos, (p->slen)-(pos)); /* fix the OCH_ */
379 EMIT(OOR2, 0)doemit(p, (sop)((17LU<<((unsigned)27))), (size_t)(0)); /* offset very wrong... */
380 AHEAD(THERE())dofwd(p, (p->slen - 1), (p->slen)-((p->slen - 1))); /* ...so fix it */
381 ASTERN(O_CH, THERETHERE())doemit(p, (sop)((18LU<<((unsigned)27))), (size_t)((p->
slen)-(p->slen - 2)))
;
382 break;
383 case '{':
384 count = p_count(p);
385 if (EAT(',')((((p->end - p->next > 0) && (*p->next) ==
(','))) ? ((p->next++), 1) : 0)
) {
386 if (isdigit((uch)PEEK()(*p->next))) {
387 count2 = p_count(p);
388 REQUIRE(count <= count2, REG_BADBR)do { if (!(count <= count2)) seterr(p, (10)); } while (0);
389 } else /* single number with comma */
390 count2 = INFINITY(255 + 1);
391 } else /* just a single number */
392 count2 = count;
393 repeat(p, pos, count, count2);
394 if (!EAT('}')((((p->end - p->next > 0) && (*p->next) ==
('}'))) ? ((p->next++), 1) : 0)
) { /* error heuristics */
395 while (MORE()(p->end - p->next > 0) && PEEK()(*p->next) != '}')
396 NEXT()(p->next++);
397 REQUIRE(MORE(), REG_EBRACE)do { if (!((p->end - p->next > 0))) seterr(p, (9)); }
while (0)
;
398 SETERROR(REG_BADBR)seterr(p, (10));
399 }
400 break;
401 }
402
403 if (!MORE()(p->end - p->next > 0))
404 return;
405 c = PEEK()(*p->next);
406 if (!( c == '*' || c == '+' || c == '?' ||
407 (c == '{' && MORE2()(p->end - p->next > 1) && isdigit((uch)PEEK2()(*(p->next+1)))) ) )
408 return;
409 SETERROR(REG_BADRPT)seterr(p, (13));
410}
411
412/*
413 - p_str - string (no metacharacters) "parser"
414 */
415static void
416p_str(struct parse *p)
417{
418 REQUIRE(MORE(), REG_EMPTY)do { if (!((p->end - p->next > 0))) seterr(p, (14));
} while (0)
;
419 while (MORE()(p->end - p->next > 0))
420 ordinary(p, GETNEXT()(*p->next++));
421}
422
423/*
424 - p_bre - BRE parser top level, anchoring and concatenation
425 * Giving end1 as OUT essentially eliminates the end1/end2 check.
426 *
427 * This implementation is a bit of a kludge, in that a trailing $ is first
428 * taken as an ordinary character and then revised to be an anchor. The
429 * only undesirable side effect is that '$' gets included as a character
430 * category in such cases. This is fairly harmless; not worth fixing.
431 * The amount of lookahead needed to avoid this kludge is excessive.
432 */
433static void
434p_bre(struct parse *p,
435 int end1, /* first terminating character */
436 int end2) /* second terminating character */
437{
438 sopno start = HERE()(p->slen);
439 int first = 1; /* first subexpression? */
440 int wasdollar = 0;
441
442 if (EAT('^')((((p->end - p->next > 0) && (*p->next) ==
('^'))) ? ((p->next++), 1) : 0)
) {
443 EMIT(OBOL, 0)doemit(p, (sop)((3LU<<((unsigned)27))), (size_t)(0));
444 p->g->iflags |= USEBOL01;
445 p->g->nbol++;
446 }
447 while (MORE()(p->end - p->next > 0) && !SEETWO(end1, end2)((p->end - p->next > 1) && (*p->next) == (
end1) && (*(p->next+1)) == (end2))
) {
448 wasdollar = p_simp_re(p, first);
449 first = 0;
450 }
451 if (wasdollar) { /* oops, that was a trailing anchor */
452 DROP(1)(p->slen -= (1));
453 EMIT(OEOL, 0)doemit(p, (sop)((4LU<<((unsigned)27))), (size_t)(0));
454 p->g->iflags |= USEEOL02;
455 p->g->neol++;
456 }
457
458 REQUIRE(HERE() != start, REG_EMPTY)do { if (!((p->slen) != start)) seterr(p, (14)); } while (
0)
; /* require nonempty */
459}
460
461/*
462 - p_simp_re - parse a simple RE, an atom possibly followed by a repetition
463 */
464static int /* was the simple RE an unbackslashed $? */
465p_simp_re(struct parse *p,
466 int starordinary) /* is a leading * an ordinary character? */
467{
468 int c;
469 int count;
470 int count2;
471 sopno pos;
472 int i;
473 sopno subno;
474# define BACKSL(1<<8) (1<<CHAR_BIT8)
475
476 pos = HERE()(p->slen); /* repetion op, if any, covers from here */
477
478 assert(MORE())((void)0); /* caller should have ensured this */
479 c = GETNEXT()(*p->next++);
480 if (c == '\\') {
481 REQUIRE(MORE(), REG_EESCAPE)do { if (!((p->end - p->next > 0))) seterr(p, (5)); }
while (0)
;
482 c = BACKSL(1<<8) | GETNEXT()(*p->next++);
483 }
484 switch (c) {
485 case '.':
486 if (p->g->cflags&REG_NEWLINE0010)
487 nonnewline(p);
488 else
489 EMIT(OANY, 0)doemit(p, (sop)((5LU<<((unsigned)27))), (size_t)(0));
490 break;
491 case '[':
492 p_bracket(p);
493 break;
494 case BACKSL(1<<8)|'<':
495 EMIT(OBOW, 0)doemit(p, (sop)((19LU<<((unsigned)27))), (size_t)(0));
496 break;
497 case BACKSL(1<<8)|'>':
498 EMIT(OEOW, 0)doemit(p, (sop)((20LU<<((unsigned)27))), (size_t)(0));
499 break;
500 case BACKSL(1<<8)|'{':
501 SETERROR(REG_BADRPT)seterr(p, (13));
502 break;
503 case BACKSL(1<<8)|'(':
504 p->g->nsub++;
505 subno = p->g->nsub;
506 if (subno < NPAREN10)
507 p->pbegin[subno] = HERE()(p->slen);
508 EMIT(OLPAREN, subno)doemit(p, (sop)((13LU<<((unsigned)27))), (size_t)(subno
))
;
509 /* the MORE here is an error heuristic */
510 if (MORE()(p->end - p->next > 0) && !SEETWO('\\', ')')((p->end - p->next > 1) && (*p->next) == (
'\\') && (*(p->next+1)) == (')'))
)
511 p_bre(p, '\\', ')');
512 if (subno < NPAREN10) {
513 p->pend[subno] = HERE()(p->slen);
514 assert(p->pend[subno] != 0)((void)0);
515 }
516 EMIT(ORPAREN, subno)doemit(p, (sop)((14LU<<((unsigned)27))), (size_t)(subno
))
;
517 REQUIRE(EATTWO('\\', ')'), REG_EPAREN)do { if (!(((((p->end - p->next > 1) && (*p->
next) == ('\\') && (*(p->next+1)) == (')'))) ? ((p
->next += 2), 1) : 0))) seterr(p, (8)); } while (0)
;
518 break;
519 case BACKSL(1<<8)|')': /* should not get here -- must be user */
520 case BACKSL(1<<8)|'}':
521 SETERROR(REG_EPAREN)seterr(p, (8));
522 break;
523 case BACKSL(1<<8)|'1':
524 case BACKSL(1<<8)|'2':
525 case BACKSL(1<<8)|'3':
526 case BACKSL(1<<8)|'4':
527 case BACKSL(1<<8)|'5':
528 case BACKSL(1<<8)|'6':
529 case BACKSL(1<<8)|'7':
530 case BACKSL(1<<8)|'8':
531 case BACKSL(1<<8)|'9':
532 i = (c&~BACKSL(1<<8)) - '0';
533 assert(i < NPAREN)((void)0);
534 if (p->pend[i] != 0) {
535 assert(i <= p->g->nsub)((void)0);
536 EMIT(OBACK_, i)doemit(p, (sop)((7LU<<((unsigned)27))), (size_t)(i));
537 assert(p->pbegin[i] != 0)((void)0);
538 assert(OP(p->strip[p->pbegin[i]]) == OLPAREN)((void)0);
539 assert(OP(p->strip[p->pend[i]]) == ORPAREN)((void)0);
540 (void) dupl(p, p->pbegin[i]+1, p->pend[i]);
541 EMIT(O_BACK, i)doemit(p, (sop)((8LU<<((unsigned)27))), (size_t)(i));
542 } else
543 SETERROR(REG_ESUBREG)seterr(p, (6));
544 p->g->backrefs = 1;
545 break;
546 case '*':
547 REQUIRE(starordinary, REG_BADRPT)do { if (!(starordinary)) seterr(p, (13)); } while (0);
548 /* FALLTHROUGH */
549 default:
550 if (p->error != 0)
551 return(0); /* Definitely not $... */
552 ordinary(p, (char)c);
553 break;
554 }
555
556 if (EAT('*')((((p->end - p->next > 0) && (*p->next) ==
('*'))) ? ((p->next++), 1) : 0)
) { /* implemented as +? */
557 /* this case does not require the (y|) trick, noKLUDGE */
558 INSERT(OPLUS_, pos)doinsert(p, (sop)((9LU<<((unsigned)27))), (p->slen)-
(pos)+1, pos)
;
559 ASTERN(O_PLUS, pos)doemit(p, (sop)((10LU<<((unsigned)27))), (size_t)((p->
slen)-pos))
;
560 INSERT(OQUEST_, pos)doinsert(p, (sop)((11LU<<((unsigned)27))), (p->slen)
-(pos)+1, pos)
;
561 ASTERN(O_QUEST, pos)doemit(p, (sop)((12LU<<((unsigned)27))), (size_t)((p->
slen)-pos))
;
562 } else if (EATTWO('\\', '{')((((p->end - p->next > 1) && (*p->next) ==
('\\') && (*(p->next+1)) == ('{'))) ? ((p->next
+= 2), 1) : 0)
) {
563 count = p_count(p);
564 if (EAT(',')((((p->end - p->next > 0) && (*p->next) ==
(','))) ? ((p->next++), 1) : 0)
) {
565 if (MORE()(p->end - p->next > 0) && isdigit((uch)PEEK()(*p->next))) {
566 count2 = p_count(p);
567 REQUIRE(count <= count2, REG_BADBR)do { if (!(count <= count2)) seterr(p, (10)); } while (0);
568 } else /* single number with comma */
569 count2 = INFINITY(255 + 1);
570 } else /* just a single number */
571 count2 = count;
572 repeat(p, pos, count, count2);
573 if (!EATTWO('\\', '}')((((p->end - p->next > 1) && (*p->next) ==
('\\') && (*(p->next+1)) == ('}'))) ? ((p->next
+= 2), 1) : 0)
) { /* error heuristics */
574 while (MORE()(p->end - p->next > 0) && !SEETWO('\\', '}')((p->end - p->next > 1) && (*p->next) == (
'\\') && (*(p->next+1)) == ('}'))
)
575 NEXT()(p->next++);
576 REQUIRE(MORE(), REG_EBRACE)do { if (!((p->end - p->next > 0))) seterr(p, (9)); }
while (0)
;
577 SETERROR(REG_BADBR)seterr(p, (10));
578 }
579 } else if (c == '$') /* $ (but not \$) ends it */
580 return(1);
581
582 return(0);
583}
584
585/*
586 - p_count - parse a repetition count
587 */
588static int /* the value */
589p_count(struct parse *p)
590{
591 int count = 0;
592 int ndigits = 0;
593
594 while (MORE()(p->end - p->next > 0) && isdigit((uch)PEEK()(*p->next)) && count <= DUPMAX255) {
595 count = count*10 + (GETNEXT()(*p->next++) - '0');
596 ndigits++;
597 }
598
599 REQUIRE(ndigits > 0 && count <= DUPMAX, REG_BADBR)do { if (!(ndigits > 0 && count <= 255)) seterr
(p, (10)); } while (0)
;
600 return(count);
601}
602
603/*
604 - p_bracket - parse a bracketed character list
605 *
606 * Note a significant property of this code: if the allocset() did SETERROR,
607 * no set operations are done.
608 */
609static void
610p_bracket(struct parse *p)
611{
612 cset *cs;
613 int invert = 0;
614
615 /* Dept of Truly Sickening Special-Case Kludges */
616 if (p->end - p->next > 5) {
617 if (strncmp(p->next, "[:<:]]", 6) == 0) {
618 EMIT(OBOW, 0)doemit(p, (sop)((19LU<<((unsigned)27))), (size_t)(0));
619 NEXTn(6)(p->next += (6));
620 return;
621 }
622 if (strncmp(p->next, "[:>:]]", 6) == 0) {
623 EMIT(OEOW, 0)doemit(p, (sop)((20LU<<((unsigned)27))), (size_t)(0));
624 NEXTn(6)(p->next += (6));
625 return;
626 }
627 }
628
629 if ((cs = allocset(p)) == NULL((void *)0)) {
630 /* allocset did set error status in p */
631 return;
632 }
633
634 if (EAT('^')((((p->end - p->next > 0) && (*p->next) ==
('^'))) ? ((p->next++), 1) : 0)
)
635 invert++; /* make note to invert set at end */
636 if (EAT(']')((((p->end - p->next > 0) && (*p->next) ==
(']'))) ? ((p->next++), 1) : 0)
)
637 CHadd(cs, ']');
638 else if (EAT('-')((((p->end - p->next > 0) && (*p->next) ==
('-'))) ? ((p->next++), 1) : 0)
)
639 CHadd(cs, '-');
640 while (MORE()(p->end - p->next > 0) && PEEK()(*p->next) != ']' && !SEETWO('-', ']')((p->end - p->next > 1) && (*p->next) == (
'-') && (*(p->next+1)) == (']'))
)
641 p_b_term(p, cs);
642 if (EAT('-')((((p->end - p->next > 0) && (*p->next) ==
('-'))) ? ((p->next++), 1) : 0)
)
643 CHadd(cs, '-');
644 REQUIRE(MORE() && GETNEXT() == ']', REG_EBRACK)do { if (!((p->end - p->next > 0) && (*p->
next++) == ']')) seterr(p, (7)); } while (0)
;
645
646 if (p->error != 0) { /* don't mess things up further */
647 freeset(p, cs);
648 return;
649 }
650
651 if (p->g->cflags&REG_ICASE0002) {
652 int i;
653 int ci;
654
655 for (i = p->g->csetsize - 1; i >= 0; i--)
656 if (CHIN(cs, i) && isalpha(i)) {
657 ci = othercase(i);
658 if (ci != i)
659 CHadd(cs, ci);
660 }
661 }
662 if (invert) {
663 int i;
664
665 for (i = p->g->csetsize - 1; i >= 0; i--)
666 if (CHIN(cs, i))
667 CHsub(cs, i);
668 else
669 CHadd(cs, i);
670 if (p->g->cflags&REG_NEWLINE0010)
671 CHsub(cs, '\n');
672 }
673
674 if (nch(p, cs) == 1) { /* optimize singleton sets */
675 ordinary(p, firstch(p, cs));
676 freeset(p, cs);
677 } else
678 EMIT(OANYOF, freezeset(p, cs))doemit(p, (sop)((6LU<<((unsigned)27))), (size_t)(freezeset
(p, cs)))
;
679}
680
681/*
682 - p_b_term - parse one term of a bracketed character list
683 */
684static void
685p_b_term(struct parse *p, cset *cs)
686{
687 char c;
688 char start, finish;
689 int i;
690
691 /* classify what we've got */
692 switch ((MORE()(p->end - p->next > 0)) ? PEEK()(*p->next) : '\0') {
693 case '[':
694 c = (MORE2()(p->end - p->next > 1)) ? PEEK2()(*(p->next+1)) : '\0';
695 break;
696 case '-':
697 SETERROR(REG_ERANGE)seterr(p, (11));
698 return; /* NOTE RETURN */
699 break;
700 default:
701 c = '\0';
702 break;
703 }
704
705 switch (c) {
706 case ':': /* character class */
707 NEXT2()(p->next += 2);
708 REQUIRE(MORE(), REG_EBRACK)do { if (!((p->end - p->next > 0))) seterr(p, (7)); }
while (0)
;
709 c = PEEK()(*p->next);
710 REQUIRE(c != '-' && c != ']', REG_ECTYPE)do { if (!(c != '-' && c != ']')) seterr(p, (4)); } while
(0)
;
711 p_b_cclass(p, cs);
712 REQUIRE(MORE(), REG_EBRACK)do { if (!((p->end - p->next > 0))) seterr(p, (7)); }
while (0)
;
713 REQUIRE(EATTWO(':', ']'), REG_ECTYPE)do { if (!(((((p->end - p->next > 1) && (*p->
next) == (':') && (*(p->next+1)) == (']'))) ? ((p->
next += 2), 1) : 0))) seterr(p, (4)); } while (0)
;
714 break;
715 case '=': /* equivalence class */
716 NEXT2()(p->next += 2);
717 REQUIRE(MORE(), REG_EBRACK)do { if (!((p->end - p->next > 0))) seterr(p, (7)); }
while (0)
;
718 c = PEEK()(*p->next);
719 REQUIRE(c != '-' && c != ']', REG_ECOLLATE)do { if (!(c != '-' && c != ']')) seterr(p, (3)); } while
(0)
;
720 p_b_eclass(p, cs);
721 REQUIRE(MORE(), REG_EBRACK)do { if (!((p->end - p->next > 0))) seterr(p, (7)); }
while (0)
;
722 REQUIRE(EATTWO('=', ']'), REG_ECOLLATE)do { if (!(((((p->end - p->next > 1) && (*p->
next) == ('=') && (*(p->next+1)) == (']'))) ? ((p->
next += 2), 1) : 0))) seterr(p, (3)); } while (0)
;
723 break;
724 default: /* symbol, ordinary character, or range */
725/* xxx revision needed for multichar stuff */
726 start = p_b_symbol(p);
727 if (SEE('-')((p->end - p->next > 0) && (*p->next) == (
'-'))
&& MORE2()(p->end - p->next > 1) && PEEK2()(*(p->next+1)) != ']') {
728 /* range */
729 NEXT()(p->next++);
730 if (EAT('-')((((p->end - p->next > 0) && (*p->next) ==
('-'))) ? ((p->next++), 1) : 0)
)
731 finish = '-';
732 else
733 finish = p_b_symbol(p);
734 } else
735 finish = start;
736/* xxx what about signed chars here... */
737 REQUIRE(start <= finish, REG_ERANGE)do { if (!(start <= finish)) seterr(p, (11)); } while (0);
738 for (i = start; i <= finish; i++)
739 CHadd(cs, i);
740 break;
741 }
742}
743
744/*
745 - p_b_cclass - parse a character-class name and deal with it
746 */
747static void
748p_b_cclass(struct parse *p, cset *cs)
749{
750 const char *sp = p->next;
751 const struct cclass *cp;
752 size_t len;
753 const char *u;
754 char c;
755
756 while (MORE()(p->end - p->next > 0) && isalpha((uch)PEEK()(*p->next)))
757 NEXT()(p->next++);
758 len = p->next - sp;
759 for (cp = cclasses; cp->name != NULL((void *)0); cp++)
760 if (strncmp(cp->name, sp, len) == 0 && cp->name[len] == '\0')
761 break;
762 if (cp->name == NULL((void *)0)) {
763 /* oops, didn't find it */
764 SETERROR(REG_ECTYPE)seterr(p, (4));
765 return;
766 }
767
768 u = cp->chars;
769 while ((c = *u++) != '\0')
770 CHadd(cs, c);
771}
772
773/*
774 - p_b_eclass - parse an equivalence-class name and deal with it
775 *
776 * This implementation is incomplete. xxx
777 */
778static void
779p_b_eclass(struct parse *p, cset *cs)
780{
781 char c;
782
783 c = p_b_coll_elem(p, '=');
784 CHadd(cs, c);
785}
786
787/*
788 - p_b_symbol - parse a character or [..]ed multicharacter collating symbol
789 */
790static char /* value of symbol */
791p_b_symbol(struct parse *p)
792{
793 char value;
794
795 REQUIRE(MORE(), REG_EBRACK)do { if (!((p->end - p->next > 0))) seterr(p, (7)); }
while (0)
;
796 if (!EATTWO('[', '.')((((p->end - p->next > 1) && (*p->next) ==
('[') && (*(p->next+1)) == ('.'))) ? ((p->next
+= 2), 1) : 0)
)
797 return(GETNEXT()(*p->next++));
798
799 /* collating symbol */
800 value = p_b_coll_elem(p, '.');
801 REQUIRE(EATTWO('.', ']'), REG_ECOLLATE)do { if (!(((((p->end - p->next > 1) && (*p->
next) == ('.') && (*(p->next+1)) == (']'))) ? ((p->
next += 2), 1) : 0))) seterr(p, (3)); } while (0)
;
802 return(value);
803}
804
805/*
806 - p_b_coll_elem - parse a collating-element name and look it up
807 */
808static char /* value of collating element */
809p_b_coll_elem(struct parse *p,
810 int endc) /* name ended by endc,']' */
811{
812 const char *sp = p->next;
813 const struct cname *cp;
814 size_t len;
815
816 while (MORE()(p->end - p->next > 0) && !SEETWO(endc, ']')((p->end - p->next > 1) && (*p->next) == (
endc) && (*(p->next+1)) == (']'))
)
817 NEXT()(p->next++);
818 if (!MORE()(p->end - p->next > 0)) {
819 SETERROR(REG_EBRACK)seterr(p, (7));
820 return(0);
821 }
822 len = p->next - sp;
823 for (cp = cnames; cp->name != NULL((void *)0); cp++)
824 if (strncmp(cp->name, sp, len) == 0 && strlen(cp->name) == len)
825 return(cp->code); /* known name */
826 if (len == 1)
827 return(*sp); /* single character */
828 SETERROR(REG_ECOLLATE)seterr(p, (3)); /* neither */
829 return(0);
830}
831
832/*
833 - othercase - return the case counterpart of an alphabetic
834 */
835static char /* if no counterpart, return ch */
836othercase(int ch)
837{
838 ch = (uch)ch;
839 assert(isalpha(ch))((void)0);
840 if (isupper(ch))
841 return ((uch)tolower(ch));
842 else if (islower(ch))
843 return ((uch)toupper(ch));
844 else /* peculiar, but could happen */
845 return(ch);
846}
847
848/*
849 - bothcases - emit a dualcase version of a two-case character
850 *
851 * Boy, is this implementation ever a kludge...
852 */
853static void
854bothcases(struct parse *p, int ch)
855{
856 const char *oldnext = p->next;
857 const char *oldend = p->end;
858 char bracket[3];
859
860 ch = (uch)ch;
861 assert(othercase(ch) != ch)((void)0); /* p_bracket() would recurse */
862 p->next = bracket;
863 p->end = bracket+2;
864 bracket[0] = ch;
865 bracket[1] = ']';
866 bracket[2] = '\0';
867 p_bracket(p);
868 assert(p->next == bracket+2)((void)0);
869 p->next = oldnext;
870 p->end = oldend;
871}
872
873/*
874 - ordinary - emit an ordinary character
875 */
876static void
877ordinary(struct parse *p, int ch)
878{
879 if ((p->g->cflags&REG_ICASE0002) && isalpha((uch)ch) && othercase(ch) != ch)
880 bothcases(p, ch);
881 else
882 EMIT(OCHAR, (uch)ch)doemit(p, (sop)((2LU<<((unsigned)27))), (size_t)((uch)ch
))
;
883}
884
885/*
886 * do something magic with this character, but only if it's extra magic
887 */
888static void
889backslash(struct parse *p, int ch)
890{
891 switch (ch) {
892 case '<':
893 EMIT(OBOW, 0)doemit(p, (sop)((19LU<<((unsigned)27))), (size_t)(0));
894 break;
895 case '>':
896 EMIT(OEOW, 0)doemit(p, (sop)((20LU<<((unsigned)27))), (size_t)(0));
897 break;
898 default:
899 ordinary(p, ch);
900 break;
901 }
902}
903
904/*
905 - nonnewline - emit REG_NEWLINE version of OANY
906 *
907 * Boy, is this implementation ever a kludge...
908 */
909static void
910nonnewline(struct parse *p)
911{
912 const char *oldnext = p->next;
913 const char *oldend = p->end;
914 static const char bracket[4] = { '^', '\n', ']', '\0' };
915
916 p->next = bracket;
917 p->end = bracket+3;
918 p_bracket(p);
919 assert(p->next == bracket+3)((void)0);
920 p->next = oldnext;
921 p->end = oldend;
922}
923
924/*
925 - repeat - generate code for a bounded repetition, recursively if needed
926 */
927static void
928repeat(struct parse *p,
929 sopno start, /* operand from here to end of strip */
930 int from, /* repeated from this number */
931 int to) /* to this number of times (maybe INFINITY) */
932{
933 sopno finish = HERE()(p->slen);
934# define N2 2
935# define INF3 3
936# define REP(f, t)((f)*8 + (t)) ((f)*8 + (t))
937# define MAP(n)(((n) <= 1) ? (n) : ((n) == (255 + 1)) ? 3 : 2) (((n) <= 1) ? (n) : ((n) == INFINITY(255 + 1)) ? INF3 : N2)
938 sopno copy;
939
940 if (p->error != 0) /* head off possible runaway recursion */
941 return;
942
943 assert(from <= to)((void)0);
944
945 switch (REP(MAP(from), MAP(to))(((((from) <= 1) ? (from) : ((from) == (255 + 1)) ? 3 : 2)
)*8 + ((((to) <= 1) ? (to) : ((to) == (255 + 1)) ? 3 : 2))
)
) {
946 case REP(0, 0)((0)*8 + (0)): /* must be user doing this */
947 DROP(finish-start)(p->slen -= (finish-start)); /* drop the operand */
948 break;
949 case REP(0, 1)((0)*8 + (1)): /* as x{1,1}? */
950 case REP(0, N)((0)*8 + (2)): /* as x{1,n}? */
951 case REP(0, INF)((0)*8 + (3)): /* as x{1,}? */
952 /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */
953 INSERT(OCH_, start)doinsert(p, (sop)((15LU<<((unsigned)27))), (p->slen)
-(start)+1, start)
; /* offset is wrong... */
954 repeat(p, start+1, 1, to);
955 ASTERN(OOR1, start)doemit(p, (sop)((16LU<<((unsigned)27))), (size_t)((p->
slen)-start))
;
956 AHEAD(start)dofwd(p, start, (p->slen)-(start)); /* ... fix it */
957 EMIT(OOR2, 0)doemit(p, (sop)((17LU<<((unsigned)27))), (size_t)(0));
958 AHEAD(THERE())dofwd(p, (p->slen - 1), (p->slen)-((p->slen - 1)));
959 ASTERN(O_CH, THERETHERE())doemit(p, (sop)((18LU<<((unsigned)27))), (size_t)((p->
slen)-(p->slen - 2)))
;
960 break;
961 case REP(1, 1)((1)*8 + (1)): /* trivial case */
962 /* done */
963 break;
964 case REP(1, N)((1)*8 + (2)): /* as x?x{1,n-1} */
965 /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */
966 INSERT(OCH_, start)doinsert(p, (sop)((15LU<<((unsigned)27))), (p->slen)
-(start)+1, start)
;
967 ASTERN(OOR1, start)doemit(p, (sop)((16LU<<((unsigned)27))), (size_t)((p->
slen)-start))
;
968 AHEAD(start)dofwd(p, start, (p->slen)-(start));
969 EMIT(OOR2, 0)doemit(p, (sop)((17LU<<((unsigned)27))), (size_t)(0)); /* offset very wrong... */
970 AHEAD(THERE())dofwd(p, (p->slen - 1), (p->slen)-((p->slen - 1))); /* ...so fix it */
971 ASTERN(O_CH, THERETHERE())doemit(p, (sop)((18LU<<((unsigned)27))), (size_t)((p->
slen)-(p->slen - 2)))
;
972 copy = dupl(p, start+1, finish+1);
973 assert(copy == finish+4)((void)0);
974 repeat(p, copy, 1, to-1);
975 break;
976 case REP(1, INF)((1)*8 + (3)): /* as x+ */
977 INSERT(OPLUS_, start)doinsert(p, (sop)((9LU<<((unsigned)27))), (p->slen)-
(start)+1, start)
;
978 ASTERN(O_PLUS, start)doemit(p, (sop)((10LU<<((unsigned)27))), (size_t)((p->
slen)-start))
;
979 break;
980 case REP(N, N)((2)*8 + (2)): /* as xx{m-1,n-1} */
981 copy = dupl(p, start, finish);
982 repeat(p, copy, from-1, to-1);
983 break;
984 case REP(N, INF)((2)*8 + (3)): /* as xx{n-1,INF} */
985 copy = dupl(p, start, finish);
986 repeat(p, copy, from-1, to);
987 break;
988 default: /* "can't happen" */
989 SETERROR(REG_ASSERT)seterr(p, (15)); /* just in case */
990 break;
991 }
992}
993
994/*
995 - seterr - set an error condition
996 */
997static void
998seterr(struct parse *p, int e)
999{
1000 if (p->error == 0) /* keep earliest error condition */
1001 p->error = e;
1002 p->next = nuls; /* try to bring things to a halt */
1003 p->end = nuls;
1004}
1005
1006/*
1007 - allocset - allocate a set of characters for []
1008 */
1009static cset *
1010allocset(struct parse *p)
1011{
1012 int no = p->g->ncsets++;
1013 size_t nc;
1014 size_t nbytes;
1015 cset *cs;
1016 size_t css = (size_t)p->g->csetsize;
1017 int i;
1018
1019 if (no >= p->ncsalloc) { /* need another column of space */
1020 void *ptr;
1021
1022 p->ncsalloc += CHAR_BIT8;
1023 nc = p->ncsalloc;
1024 assert(nc % CHAR_BIT == 0)((void)0);
1025
1026 ptr = reallocarray(p->g->sets, nc, sizeof(cset));
1027 if (ptr == NULL((void *)0))
1028 goto nomem;
1029 p->g->sets = ptr;
1030
1031 ptr = reallocarray(p->g->setbits, nc / CHAR_BIT8, css);
1032 if (ptr == NULL((void *)0))
1033 goto nomem;
1034 nbytes = (nc / CHAR_BIT8) * css;
1035 p->g->setbits = ptr;
1036
1037 for (i = 0; i < no; i++)
1038 p->g->sets[i].ptr = p->g->setbits + css*(i/CHAR_BIT8);
1039
1040 (void) memset((char *)p->g->setbits + (nbytes - css), 0, css);
1041 }
1042 /* XXX should not happen */
1043 if (p->g->sets == NULL((void *)0) || p->g->setbits == NULL((void *)0))
1044 goto nomem;
1045
1046 cs = &p->g->sets[no];
1047 cs->ptr = p->g->setbits + css*((no)/CHAR_BIT8);
1048 cs->mask = 1 << ((no) % CHAR_BIT8);
1049 cs->hash = 0;
1050
1051 return(cs);
1052nomem:
1053 free(p->g->sets);
1054 p->g->sets = NULL((void *)0);
1055 free(p->g->setbits);
1056 p->g->setbits = NULL((void *)0);
1057
1058 SETERROR(REG_ESPACE)seterr(p, (12));
1059 /* caller's responsibility not to do set ops */
1060 return(NULL((void *)0));
1061}
1062
1063/*
1064 - freeset - free a now-unused set
1065 */
1066static void
1067freeset(struct parse *p, cset *cs)
1068{
1069 int i;
1070 cset *top = &p->g->sets[p->g->ncsets];
1071 size_t css = (size_t)p->g->csetsize;
1072
1073 for (i = 0; i < css; i++)
1074 CHsub(cs, i);
1075 if (cs == top-1) /* recover only the easy case */
1076 p->g->ncsets--;
1077}
1078
1079/*
1080 - freezeset - final processing on a set of characters
1081 *
1082 * The main task here is merging identical sets. This is usually a waste
1083 * of time (although the hash code minimizes the overhead), but can win
1084 * big if REG_ICASE is being used. REG_ICASE, by the way, is why the hash
1085 * is done using addition rather than xor -- all ASCII [aA] sets xor to
1086 * the same value!
1087 */
1088static int /* set number */
1089freezeset(struct parse *p, cset *cs)
1090{
1091 uch h = cs->hash;
1092 int i;
1093 cset *top = &p->g->sets[p->g->ncsets];
1094 cset *cs2;
1095 size_t css = (size_t)p->g->csetsize;
1096
1097 /* look for an earlier one which is the same */
1098 for (cs2 = &p->g->sets[0]; cs2 < top; cs2++)
1099 if (cs2->hash == h && cs2 != cs) {
1100 /* maybe */
1101 for (i = 0; i < css; i++)
1102 if (CHIN(cs2, i) != CHIN(cs, i))
1103 break; /* no */
1104 if (i == css)
1105 break; /* yes */
1106 }
1107
1108 if (cs2 < top) { /* found one */
1109 freeset(p, cs);
1110 cs = cs2;
1111 }
1112
1113 return((int)(cs - p->g->sets));
1114}
1115
1116/*
1117 - firstch - return first character in a set (which must have at least one)
1118 */
1119static int /* character; there is no "none" value */
1120firstch(struct parse *p, cset *cs)
1121{
1122 int i;
1123 size_t css = (size_t)p->g->csetsize;
1124
1125 for (i = 0; i < css; i++)
1126 if (CHIN(cs, i))
1127 return((char)i);
1128 assert(never)((void)0);
1129 return(0); /* arbitrary */
1130}
1131
1132/*
1133 - nch - number of characters in a set
1134 */
1135static int
1136nch(struct parse *p, cset *cs)
1137{
1138 int i;
1139 size_t css = (size_t)p->g->csetsize;
1140 int n = 0;
1141
1142 for (i = 0; i < css; i++)
1143 if (CHIN(cs, i))
1144 n++;
1145 return(n);
1146}
1147
1148/*
1149 - dupl - emit a duplicate of a bunch of sops
1150 */
1151static sopno /* start of duplicate */
1152dupl(struct parse *p,
1153 sopno start, /* from here */
1154 sopno finish) /* to this less one */
1155{
1156 sopno ret = HERE()(p->slen);
1157 sopno len = finish - start;
1158
1159 assert(finish >= start)((void)0);
1160 if (len == 0)
1161 return(ret);
1162 if (!enlarge(p, p->ssize + len)) /* this many unexpected additions */
1163 return(ret);
1164 (void) memcpy(p->strip + p->slen, p->strip + start, len * sizeof(sop));
1165 p->slen += len;
1166 return(ret);
1167}
1168
1169/*
1170 - doemit - emit a strip operator
1171 *
1172 * It might seem better to implement this as a macro with a function as
1173 * hard-case backup, but it's just too big and messy unless there are
1174 * some changes to the data structures. Maybe later.
1175 */
1176static void
1177doemit(struct parse *p, sop op, size_t opnd)
1178{
1179 /* avoid making error situations worse */
1180 if (p->error != 0)
1181 return;
1182
1183 /* deal with oversize operands ("can't happen", more or less) */
1184 assert(opnd < 1<<OPSHIFT)((void)0);
1185
1186 /* deal with undersized strip */
1187 if (p->slen >= p->ssize)
1188 if (!enlarge(p, (p->ssize+1) / 2 * 3)) /* +50% */
1189 return;
1190
1191 /* finally, it's all reduced to the easy case */
1192 p->strip[p->slen++] = SOP(op, opnd)((op)|(opnd));
1193}
1194
1195/*
1196 - doinsert - insert a sop into the strip
1197 */
1198static void
1199doinsert(struct parse *p, sop op, size_t opnd, sopno pos)
1200{
1201 sopno sn;
1202 sop s;
1203 int i;
1204
1205 /* avoid making error situations worse */
1206 if (p->error != 0)
1207 return;
1208
1209 sn = HERE()(p->slen);
1210 EMIT(op, opnd)doemit(p, (sop)(op), (size_t)(opnd)); /* do checks, ensure space */
1211 assert(HERE() == sn+1)((void)0);
1212 s = p->strip[sn];
1213
1214 /* adjust paren pointers */
1215 assert(pos > 0)((void)0);
1216 for (i = 1; i < NPAREN10; i++) {
1217 if (p->pbegin[i] >= pos) {
1218 p->pbegin[i]++;
1219 }
1220 if (p->pend[i] >= pos) {
1221 p->pend[i]++;
1222 }
1223 }
1224
1225 memmove((char *)&p->strip[pos+1], (char *)&p->strip[pos],
1226 (HERE()(p->slen)-pos-1)*sizeof(sop));
1227 p->strip[pos] = s;
1228}
1229
1230/*
1231 - dofwd - complete a forward reference
1232 */
1233static void
1234dofwd(struct parse *p, sopno pos, sop value)
1235{
1236 /* avoid making error situations worse */
1237 if (p->error != 0)
1238 return;
1239
1240 assert(value < 1<<OPSHIFT)((void)0);
1241 p->strip[pos] = OP(p->strip[pos])((p->strip[pos])&0xf8000000LU) | value;
1242}
1243
1244/*
1245 - enlarge - enlarge the strip
1246 */
1247static int
1248enlarge(struct parse *p, sopno size)
1249{
1250 sop *sp;
1251
1252 if (p->ssize >= size)
1253 return 1;
1254
1255 sp = reallocarray(p->strip, size, sizeof(sop));
1256 if (sp == NULL((void *)0)) {
1257 SETERROR(REG_ESPACE)seterr(p, (12));
1258 return 0;
1259 }
1260 p->strip = sp;
1261 p->ssize = size;
1262 return 1;
1263}
1264
1265/*
1266 - stripsnug - compact the strip
1267 */
1268static void
1269stripsnug(struct parse *p, struct re_guts *g)
1270{
1271 g->nstates = p->slen;
1272 g->strip = reallocarray(p->strip, p->slen, sizeof(sop));
1273 if (g->strip == NULL((void *)0)) {
1274 SETERROR(REG_ESPACE)seterr(p, (12));
1275 g->strip = p->strip;
1276 }
1277}
1278
1279/*
1280 - findmust - fill in must and mlen with longest mandatory literal string
1281 *
1282 * This algorithm could do fancy things like analyzing the operands of |
1283 * for common subsequences. Someday. This code is simple and finds most
1284 * of the interesting cases.
1285 *
1286 * Note that must and mlen got initialized during setup.
1287 */
1288static void
1289findmust(struct parse *p, struct re_guts *g)
1290{
1291 sop *scan;
1292 sop *start; /* start initialized in the default case, after that */
1293 sop *newstart; /* newstart was initialized in the OCHAR case */
1
'newstart' declared without an initial value
1294 sopno newlen;
1295 sop s;
1296 char *cp;
1297 sopno i;
1298
1299 /* avoid making error situations worse */
1300 if (p->error != 0)
2
Assuming field 'error' is equal to 0
3
Taking false branch
1301 return;
1302
1303 /* find the longest OCHAR sequence in strip */
1304 newlen = 0;
1305 scan = g->strip + 1;
1306 do {
1307 s = *scan++;
1308 switch (OP(s)((s)&0xf8000000LU)) {
4
Control jumps to the 'default' case at line 1332
1309 case OCHAR(2LU<<((unsigned)27)): /* sequence member */
1310 if (newlen == 0) /* new sequence */
1311 newstart = scan - 1;
1312 newlen++;
1313 break;
1314 case OPLUS_(9LU<<((unsigned)27)): /* things that don't break one */
1315 case OLPAREN(13LU<<((unsigned)27)):
1316 case ORPAREN(14LU<<((unsigned)27)):
1317 break;
1318 case OQUEST_(11LU<<((unsigned)27)): /* things that must be skipped */
1319 case OCH_(15LU<<((unsigned)27)):
1320 scan--;
1321 do {
1322 scan += OPND(s)((s)&0x07ffffffLU);
1323 s = *scan;
1324 /* assert() interferes w debug printouts */
1325 if (OP(s)((s)&0xf8000000LU) != O_QUEST(12LU<<((unsigned)27)) && OP(s)((s)&0xf8000000LU) != O_CH(18LU<<((unsigned)27)) &&
1326 OP(s)((s)&0xf8000000LU) != OOR2(17LU<<((unsigned)27))) {
1327 g->iflags |= BAD04;
1328 return;
1329 }
1330 } while (OP(s)((s)&0xf8000000LU) != O_QUEST(12LU<<((unsigned)27)) && OP(s)((s)&0xf8000000LU) != O_CH(18LU<<((unsigned)27)));
1331 /* fallthrough */
1332 default: /* things that break a sequence */
1333 if (newlen > g->mlen) { /* ends one */
5
Assuming 'newlen' is > field 'mlen'
6
Taking true branch
1334 start = newstart;
7
Assigned value is garbage or undefined
1335 g->mlen = newlen;
1336 }
1337 newlen = 0;
1338 break;
1339 }
1340 } while (OP(s)((s)&0xf8000000LU) != OEND(1LU<<((unsigned)27)));
1341
1342 if (g->mlen == 0) /* there isn't one */
1343 return;
1344
1345 /* turn it into a character string */
1346 g->must = malloc((size_t)g->mlen + 1);
1347 if (g->must == NULL((void *)0)) { /* argh; just forget it */
1348 g->mlen = 0;
1349 return;
1350 }
1351 cp = g->must;
1352 scan = start;
1353 for (i = g->mlen; i > 0; i--) {
1354 while (OP(s = *scan++)((s = *scan++)&0xf8000000LU) != OCHAR(2LU<<((unsigned)27)))
1355 continue;
1356 assert(cp < g->must + g->mlen)((void)0);
1357 *cp++ = (char)OPND(s)((s)&0x07ffffffLU);
1358 }
1359 assert(cp == g->must + g->mlen)((void)0);
1360 *cp = '\0'; /* just on general principles */
1361}
1362
1363/*
1364 - pluscount - count + nesting
1365 */
1366static sopno /* nesting depth */
1367pluscount(struct parse *p, struct re_guts *g)
1368{
1369 sop *scan;
1370 sop s;
1371 sopno plusnest = 0;
1372 sopno maxnest = 0;
1373
1374 if (p->error != 0)
1375 return(0); /* there may not be an OEND */
1376
1377 scan = g->strip + 1;
1378 do {
1379 s = *scan++;
1380 switch (OP(s)((s)&0xf8000000LU)) {
1381 case OPLUS_(9LU<<((unsigned)27)):
1382 plusnest++;
1383 break;
1384 case O_PLUS(10LU<<((unsigned)27)):
1385 if (plusnest > maxnest)
1386 maxnest = plusnest;
1387 plusnest--;
1388 break;
1389 }
1390 } while (OP(s)((s)&0xf8000000LU) != OEND(1LU<<((unsigned)27)));
1391 if (plusnest != 0)
1392 g->iflags |= BAD04;
1393 return(maxnest);
1394}