File: | src/lib/libedit/tokenizer.c |
Warning: | line 203, column 2 Value stored to 'ptr' is never read |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | /* $OpenBSD: tokenizer.c,v 1.21 2016/04/11 21:17:29 schwarze Exp $ */ |
2 | /* $NetBSD: tokenizer.c,v 1.28 2016/04/11 18:56:31 christos Exp $ */ |
3 | |
4 | /*- |
5 | * Copyright (c) 1992, 1993 |
6 | * The Regents of the University of California. All rights reserved. |
7 | * |
8 | * This code is derived from software contributed to Berkeley by |
9 | * Christos Zoulas of Cornell University. |
10 | * |
11 | * Redistribution and use in source and binary forms, with or without |
12 | * modification, are permitted provided that the following conditions |
13 | * are met: |
14 | * 1. Redistributions of source code must retain the above copyright |
15 | * notice, this list of conditions and the following disclaimer. |
16 | * 2. Redistributions in binary form must reproduce the above copyright |
17 | * notice, this list of conditions and the following disclaimer in the |
18 | * documentation and/or other materials provided with the distribution. |
19 | * 3. Neither the name of the University nor the names of its contributors |
20 | * may be used to endorse or promote products derived from this software |
21 | * without specific prior written permission. |
22 | * |
23 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND |
24 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
25 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
26 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE |
27 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
28 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
29 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
30 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
31 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
32 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
33 | * SUCH DAMAGE. |
34 | */ |
35 | |
36 | #include "config.h" |
37 | |
38 | /* We build this file twice, once as NARROW, once as WIDE. */ |
39 | /* |
40 | * tokenize.c: Bourne shell like tokenizer |
41 | */ |
42 | #include <stdlib.h> |
43 | #include <string.h> |
44 | |
45 | #include "histedit.h" |
46 | |
47 | typedef enum { |
48 | Q_none, Q_single, Q_double, Q_one, Q_doubleone |
49 | } quote_t; |
50 | |
51 | #define TOK_KEEP1 1 |
52 | #define TOK_EAT2 2 |
53 | |
54 | #define WINCR20 20 |
55 | #define AINCR10 10 |
56 | |
57 | #define IFS"\t \n" STR("\t \n")"\t \n" |
58 | |
59 | #ifdef NARROWCHAR |
60 | #define Charchar char |
61 | #define FUN(prefix, rest)prefix_rest prefix ## _ ## rest |
62 | #define TYPE(type)type type |
63 | #define STR(x)x x |
64 | #define Strchr(s, c)strchr(s, c) strchr(s, c) |
65 | #define tok_strdup(s)strdup(s) strdup(s) |
66 | #else |
67 | #define Charchar wchar_t |
68 | #define FUN(prefix, rest)prefix_rest prefix ## _w ## rest |
69 | #define TYPE(type)type type ## W |
70 | #define STR(x)x L ## x |
71 | #define Strchr(s, c)strchr(s, c) wcschr(s, c) |
72 | #define tok_strdup(s)strdup(s) wcsdup(s) |
73 | #endif |
74 | |
75 | struct TYPE(tokenizer)tokenizer { |
76 | Charchar *ifs; /* In field separator */ |
77 | int argc, amax; /* Current and maximum number of args */ |
78 | Charchar **argv; /* Argument list */ |
79 | Charchar *wptr, *wmax; /* Space and limit on the word buffer */ |
80 | Charchar *wstart; /* Beginning of next word */ |
81 | Charchar *wspace; /* Space of word buffer */ |
82 | quote_t quote; /* Quoting state */ |
83 | int flags; /* flags; */ |
84 | }; |
85 | |
86 | |
87 | static void FUN(tok,finish)tok_finish(TYPE(Tokenizer)Tokenizer *); |
88 | |
89 | |
90 | /* FUN(tok,finish)(): |
91 | * Finish a word in the tokenizer. |
92 | */ |
93 | static void |
94 | FUN(tok,finish)tok_finish(TYPE(Tokenizer)Tokenizer *tok) |
95 | { |
96 | |
97 | *tok->wptr = '\0'; |
98 | if ((tok->flags & TOK_KEEP1) || tok->wptr != tok->wstart) { |
99 | tok->argv[tok->argc++] = tok->wstart; |
100 | tok->argv[tok->argc] = NULL((void *)0); |
101 | tok->wstart = ++tok->wptr; |
102 | } |
103 | tok->flags &= ~TOK_KEEP1; |
104 | } |
105 | |
106 | |
107 | /* FUN(tok,init)(): |
108 | * Initialize the tokenizer |
109 | */ |
110 | TYPE(Tokenizer)Tokenizer * |
111 | FUN(tok,init)tok_init(const Charchar *ifs) |
112 | { |
113 | TYPE(Tokenizer)Tokenizer *tok = malloc(sizeof(TYPE(Tokenizer)Tokenizer)); |
114 | |
115 | if (tok == NULL((void *)0)) |
116 | return NULL((void *)0); |
117 | tok->ifs = tok_strdup(ifs ? ifs : IFS)strdup(ifs ? ifs : "\t \n"); |
118 | if (tok->ifs == NULL((void *)0)) { |
119 | free(tok); |
120 | return NULL((void *)0); |
121 | } |
122 | tok->argc = 0; |
123 | tok->amax = AINCR10; |
124 | tok->argv = reallocarray(NULL((void *)0), tok->amax, sizeof(*tok->argv)); |
125 | if (tok->argv == NULL((void *)0)) { |
126 | free(tok->ifs); |
127 | free(tok); |
128 | return NULL((void *)0); |
129 | } |
130 | tok->argv[0] = NULL((void *)0); |
131 | tok->wspace = reallocarray(NULL((void *)0), WINCR20, sizeof(*tok->wspace)); |
132 | if (tok->wspace == NULL((void *)0)) { |
133 | free(tok->argv); |
134 | free(tok->ifs); |
135 | free(tok); |
136 | return NULL((void *)0); |
137 | } |
138 | tok->wmax = tok->wspace + WINCR20; |
139 | tok->wstart = tok->wspace; |
140 | tok->wptr = tok->wspace; |
141 | tok->flags = 0; |
142 | tok->quote = Q_none; |
143 | |
144 | return tok; |
145 | } |
146 | |
147 | |
148 | /* FUN(tok,reset)(): |
149 | * Reset the tokenizer |
150 | */ |
151 | void |
152 | FUN(tok,reset)tok_reset(TYPE(Tokenizer)Tokenizer *tok) |
153 | { |
154 | |
155 | tok->argc = 0; |
156 | tok->wstart = tok->wspace; |
157 | tok->wptr = tok->wspace; |
158 | tok->flags = 0; |
159 | tok->quote = Q_none; |
160 | } |
161 | |
162 | |
163 | /* FUN(tok,end)(): |
164 | * Clean up |
165 | */ |
166 | void |
167 | FUN(tok,end)tok_end(TYPE(Tokenizer)Tokenizer *tok) |
168 | { |
169 | |
170 | free(tok->ifs); |
171 | free(tok->wspace); |
172 | free(tok->argv); |
173 | free(tok); |
174 | } |
175 | |
176 | |
177 | |
178 | /* FUN(tok,line)(): |
179 | * Bourne shell (sh(1)) like tokenizing |
180 | * Arguments: |
181 | * tok current tokenizer state (setup with FUN(tok,init)()) |
182 | * line line to parse |
183 | * Returns: |
184 | * -1 Internal error |
185 | * 3 Quoted return |
186 | * 2 Unmatched double quote |
187 | * 1 Unmatched single quote |
188 | * 0 Ok |
189 | * Modifies (if return value is 0): |
190 | * argc number of arguments |
191 | * argv argument array |
192 | * cursorc if !NULL, argv element containing cursor |
193 | * cursorv if !NULL, offset in argv[cursorc] of cursor |
194 | */ |
195 | int |
196 | FUN(tok,line)tok_line(TYPE(Tokenizer)Tokenizer *tok, const TYPE(LineInfo)LineInfo *line, |
197 | int *argc, const Charchar ***argv, int *cursorc, int *cursoro) |
198 | { |
199 | const Charchar *ptr; |
200 | int cc, co; |
201 | |
202 | cc = co = -1; |
203 | ptr = line->buffer; |
Value stored to 'ptr' is never read | |
204 | for (ptr = line->buffer; ;ptr++) { |
205 | if (ptr >= line->lastchar) |
206 | ptr = STR("")""; |
207 | if (ptr == line->cursor) { |
208 | cc = tok->argc; |
209 | co = (int)(tok->wptr - tok->wstart); |
210 | } |
211 | switch (*ptr) { |
212 | case '\'': |
213 | tok->flags |= TOK_KEEP1; |
214 | tok->flags &= ~TOK_EAT2; |
215 | switch (tok->quote) { |
216 | case Q_none: |
217 | tok->quote = Q_single; /* Enter single quote |
218 | * mode */ |
219 | break; |
220 | |
221 | case Q_single: /* Exit single quote mode */ |
222 | tok->quote = Q_none; |
223 | break; |
224 | |
225 | case Q_one: /* Quote this ' */ |
226 | tok->quote = Q_none; |
227 | *tok->wptr++ = *ptr; |
228 | break; |
229 | |
230 | case Q_double: /* Stay in double quote mode */ |
231 | *tok->wptr++ = *ptr; |
232 | break; |
233 | |
234 | case Q_doubleone: /* Quote this ' */ |
235 | tok->quote = Q_double; |
236 | *tok->wptr++ = *ptr; |
237 | break; |
238 | |
239 | default: |
240 | return -1; |
241 | } |
242 | break; |
243 | |
244 | case '"': |
245 | tok->flags &= ~TOK_EAT2; |
246 | tok->flags |= TOK_KEEP1; |
247 | switch (tok->quote) { |
248 | case Q_none: /* Enter double quote mode */ |
249 | tok->quote = Q_double; |
250 | break; |
251 | |
252 | case Q_double: /* Exit double quote mode */ |
253 | tok->quote = Q_none; |
254 | break; |
255 | |
256 | case Q_one: /* Quote this " */ |
257 | tok->quote = Q_none; |
258 | *tok->wptr++ = *ptr; |
259 | break; |
260 | |
261 | case Q_single: /* Stay in single quote mode */ |
262 | *tok->wptr++ = *ptr; |
263 | break; |
264 | |
265 | case Q_doubleone: /* Quote this " */ |
266 | tok->quote = Q_double; |
267 | *tok->wptr++ = *ptr; |
268 | break; |
269 | |
270 | default: |
271 | return -1; |
272 | } |
273 | break; |
274 | |
275 | case '\\': |
276 | tok->flags |= TOK_KEEP1; |
277 | tok->flags &= ~TOK_EAT2; |
278 | switch (tok->quote) { |
279 | case Q_none: /* Quote next character */ |
280 | tok->quote = Q_one; |
281 | break; |
282 | |
283 | case Q_double: /* Quote next character */ |
284 | tok->quote = Q_doubleone; |
285 | break; |
286 | |
287 | case Q_one: /* Quote this, restore state */ |
288 | *tok->wptr++ = *ptr; |
289 | tok->quote = Q_none; |
290 | break; |
291 | |
292 | case Q_single: /* Stay in single quote mode */ |
293 | *tok->wptr++ = *ptr; |
294 | break; |
295 | |
296 | case Q_doubleone: /* Quote this \ */ |
297 | tok->quote = Q_double; |
298 | *tok->wptr++ = *ptr; |
299 | break; |
300 | |
301 | default: |
302 | return -1; |
303 | } |
304 | break; |
305 | |
306 | case '\n': |
307 | tok->flags &= ~TOK_EAT2; |
308 | switch (tok->quote) { |
309 | case Q_none: |
310 | goto tok_line_outok; |
311 | |
312 | case Q_single: |
313 | case Q_double: |
314 | *tok->wptr++ = *ptr; /* Add the return */ |
315 | break; |
316 | |
317 | case Q_doubleone: /* Back to double, eat the '\n' */ |
318 | tok->flags |= TOK_EAT2; |
319 | tok->quote = Q_double; |
320 | break; |
321 | |
322 | case Q_one: /* No quote, more eat the '\n' */ |
323 | tok->flags |= TOK_EAT2; |
324 | tok->quote = Q_none; |
325 | break; |
326 | |
327 | default: |
328 | return 0; |
329 | } |
330 | break; |
331 | |
332 | case '\0': |
333 | switch (tok->quote) { |
334 | case Q_none: |
335 | /* Finish word and return */ |
336 | if (tok->flags & TOK_EAT2) { |
337 | tok->flags &= ~TOK_EAT2; |
338 | return 3; |
339 | } |
340 | goto tok_line_outok; |
341 | |
342 | case Q_single: |
343 | return 1; |
344 | |
345 | case Q_double: |
346 | return 2; |
347 | |
348 | case Q_doubleone: |
349 | tok->quote = Q_double; |
350 | *tok->wptr++ = *ptr; |
351 | break; |
352 | |
353 | case Q_one: |
354 | tok->quote = Q_none; |
355 | *tok->wptr++ = *ptr; |
356 | break; |
357 | |
358 | default: |
359 | return -1; |
360 | } |
361 | break; |
362 | |
363 | default: |
364 | tok->flags &= ~TOK_EAT2; |
365 | switch (tok->quote) { |
366 | case Q_none: |
367 | if (Strchr(tok->ifs, *ptr)strchr(tok->ifs, *ptr) != NULL((void *)0)) |
368 | FUN(tok,finish)tok_finish(tok); |
369 | else |
370 | *tok->wptr++ = *ptr; |
371 | break; |
372 | |
373 | case Q_single: |
374 | case Q_double: |
375 | *tok->wptr++ = *ptr; |
376 | break; |
377 | |
378 | |
379 | case Q_doubleone: |
380 | *tok->wptr++ = '\\'; |
381 | tok->quote = Q_double; |
382 | *tok->wptr++ = *ptr; |
383 | break; |
384 | |
385 | case Q_one: |
386 | tok->quote = Q_none; |
387 | *tok->wptr++ = *ptr; |
388 | break; |
389 | |
390 | default: |
391 | return -1; |
392 | |
393 | } |
394 | break; |
395 | } |
396 | |
397 | if (tok->wptr >= tok->wmax - 4) { |
398 | size_t size = tok->wmax - tok->wspace + WINCR20; |
399 | Charchar *s = reallocarray(tok->wspace, size, sizeof(*s)); |
400 | if (s == NULL((void *)0)) |
401 | return -1; |
402 | |
403 | if (s != tok->wspace) { |
404 | int i; |
405 | for (i = 0; i < tok->argc; i++) { |
406 | tok->argv[i] = |
407 | (tok->argv[i] - tok->wspace) + s; |
408 | } |
409 | tok->wptr = (tok->wptr - tok->wspace) + s; |
410 | tok->wstart = (tok->wstart - tok->wspace) + s; |
411 | tok->wspace = s; |
412 | } |
413 | tok->wmax = s + size; |
414 | } |
415 | if (tok->argc >= tok->amax - 4) { |
416 | Charchar **p; |
417 | tok->amax += AINCR10; |
418 | p = reallocarray(tok->argv, tok->amax, sizeof(*p)); |
419 | if (p == NULL((void *)0)) { |
420 | tok->amax -= AINCR10; |
421 | return -1; |
422 | } |
423 | tok->argv = p; |
424 | } |
425 | } |
426 | tok_line_outok: |
427 | if (cc == -1 && co == -1) { |
428 | cc = tok->argc; |
429 | co = (int)(tok->wptr - tok->wstart); |
430 | } |
431 | if (cursorc != NULL((void *)0)) |
432 | *cursorc = cc; |
433 | if (cursoro != NULL((void *)0)) |
434 | *cursoro = co; |
435 | FUN(tok,finish)tok_finish(tok); |
436 | *argv = (const Charchar **)tok->argv; |
437 | *argc = tok->argc; |
438 | return 0; |
439 | } |
440 | |
441 | /* FUN(tok,str)(): |
442 | * Simpler version of tok_line, taking a NUL terminated line |
443 | * and splitting into words, ignoring cursor state. |
444 | */ |
445 | int |
446 | FUN(tok,str)tok_str(TYPE(Tokenizer)Tokenizer *tok, const Charchar *line, int *argc, |
447 | const Charchar ***argv) |
448 | { |
449 | TYPE(LineInfo)LineInfo li; |
450 | |
451 | memset(&li, 0, sizeof(li)); |
452 | li.buffer = line; |
453 | li.cursor = li.lastchar = Strchr(line, '\0')strchr(line, '\0'); |
454 | return FUN(tok,line)tok_line(tok, &li, argc, argv, NULL((void *)0), NULL((void *)0)); |
455 | } |