| File: | src/gnu/usr.bin/texinfo/info/search.c |
| Warning: | line 422, column 7 Array access (from variable 'string') results in a null pointer dereference |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
| 1 | /* search.c -- searching large bodies of text. | |||
| 2 | $Id: search.c,v 1.1.1.5 2006/07/17 16:03:43 espie Exp $ | |||
| 3 | ||||
| 4 | Copyright (C) 1993, 1997, 1998, 2002, 2004 Free Software Foundation, Inc. | |||
| 5 | ||||
| 6 | This program is free software; you can redistribute it and/or modify | |||
| 7 | it under the terms of the GNU General Public License as published by | |||
| 8 | the Free Software Foundation; either version 2, or (at your option) | |||
| 9 | any later version. | |||
| 10 | ||||
| 11 | This program is distributed in the hope that it will be useful, | |||
| 12 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |||
| 13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |||
| 14 | GNU General Public License for more details. | |||
| 15 | ||||
| 16 | You should have received a copy of the GNU General Public License | |||
| 17 | along with this program; if not, write to the Free Software | |||
| 18 | Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. | |||
| 19 | ||||
| 20 | Written by Brian Fox (bfox@ai.mit.edu). */ | |||
| 21 | ||||
| 22 | #include "info.h" | |||
| 23 | ||||
| 24 | #include "search.h" | |||
| 25 | #include "nodes.h" | |||
| 26 | ||||
| 27 | /* The search functions take two arguments: | |||
| 28 | ||||
| 29 | 1) a string to search for, and | |||
| 30 | ||||
| 31 | 2) a pointer to a SEARCH_BINDING which contains the buffer, start, | |||
| 32 | and end of the search. | |||
| 33 | ||||
| 34 | They return a long, which is the offset from the start of the buffer | |||
| 35 | at which the match was found. An offset of -1 indicates failure. */ | |||
| 36 | ||||
| 37 | /* A function which makes a binding with buffer and bounds. */ | |||
| 38 | SEARCH_BINDING * | |||
| 39 | make_binding (char *buffer, long int start, long int end) | |||
| 40 | { | |||
| 41 | SEARCH_BINDING *binding; | |||
| 42 | ||||
| 43 | binding = (SEARCH_BINDING *)xmalloc (sizeof (SEARCH_BINDING)); | |||
| 44 | binding->buffer = buffer; | |||
| 45 | binding->start = start; | |||
| 46 | binding->end = end; | |||
| 47 | binding->flags = 0; | |||
| 48 | ||||
| 49 | return (binding); | |||
| 50 | } | |||
| 51 | ||||
| 52 | /* Make a copy of BINDING without duplicating the data. */ | |||
| 53 | SEARCH_BINDING * | |||
| 54 | copy_binding (SEARCH_BINDING *binding) | |||
| 55 | { | |||
| 56 | SEARCH_BINDING *copy; | |||
| 57 | ||||
| 58 | copy = make_binding (binding->buffer, binding->start, binding->end); | |||
| 59 | copy->flags = binding->flags; | |||
| 60 | return (copy); | |||
| 61 | } | |||
| 62 | ||||
| 63 | ||||
| 64 | /* **************************************************************** */ | |||
| 65 | /* */ | |||
| 66 | /* The Actual Searching Functions */ | |||
| 67 | /* */ | |||
| 68 | /* **************************************************************** */ | |||
| 69 | ||||
| 70 | /* Search forwards or backwards for the text delimited by BINDING. | |||
| 71 | The search is forwards if BINDING->start is greater than BINDING->end. */ | |||
| 72 | long | |||
| 73 | search (char *string, SEARCH_BINDING *binding) | |||
| 74 | { | |||
| 75 | long result; | |||
| 76 | ||||
| 77 | /* If the search is backwards, then search backwards, otherwise forwards. */ | |||
| 78 | if (binding->start > binding->end) | |||
| 79 | result = search_backward (string, binding); | |||
| 80 | else | |||
| 81 | result = search_forward (string, binding); | |||
| 82 | ||||
| 83 | return (result); | |||
| 84 | } | |||
| 85 | ||||
| 86 | /* Search forwards for STRING through the text delimited in BINDING. */ | |||
| 87 | long | |||
| 88 | search_forward (char *string, SEARCH_BINDING *binding) | |||
| 89 | { | |||
| 90 | register int c, i, len; | |||
| 91 | register char *buff, *end; | |||
| 92 | char *alternate = (char *)NULL((void *)0); | |||
| 93 | ||||
| 94 | len = strlen (string); | |||
| 95 | ||||
| 96 | /* We match characters in the search buffer against STRING and ALTERNATE. | |||
| 97 | ALTERNATE is a case reversed version of STRING; this is cheaper than | |||
| 98 | case folding each character before comparison. Alternate is only | |||
| 99 | used if the case folding bit is turned on in the passed BINDING. */ | |||
| 100 | ||||
| 101 | if (binding->flags & S_FoldCase0x01) | |||
| 102 | { | |||
| 103 | alternate = xstrdup (string); | |||
| 104 | ||||
| 105 | for (i = 0; i < len; i++) | |||
| 106 | { | |||
| 107 | if (islower (alternate[i])) | |||
| 108 | alternate[i] = toupper (alternate[i]); | |||
| 109 | else if (isupper (alternate[i])) | |||
| 110 | alternate[i] = tolower (alternate[i]); | |||
| 111 | } | |||
| 112 | } | |||
| 113 | ||||
| 114 | buff = binding->buffer + binding->start; | |||
| 115 | end = binding->buffer + binding->end + 1; | |||
| 116 | ||||
| 117 | while (buff < (end - len)) | |||
| 118 | { | |||
| 119 | for (i = 0; i < len; i++) | |||
| 120 | { | |||
| 121 | c = buff[i]; | |||
| 122 | ||||
| 123 | if ((c != string[i]) && (!alternate || c != alternate[i])) | |||
| 124 | break; | |||
| 125 | } | |||
| 126 | ||||
| 127 | if (!string[i]) | |||
| 128 | { | |||
| 129 | if (alternate) | |||
| 130 | free (alternate); | |||
| 131 | if (binding->flags & S_SkipDest0x02) | |||
| 132 | buff += len; | |||
| 133 | return ((long) (buff - binding->buffer)); | |||
| 134 | } | |||
| 135 | ||||
| 136 | buff++; | |||
| 137 | } | |||
| 138 | ||||
| 139 | if (alternate) | |||
| 140 | free (alternate); | |||
| 141 | ||||
| 142 | return ((long) -1); | |||
| 143 | } | |||
| 144 | ||||
| 145 | /* Search for STRING backwards through the text delimited in BINDING. */ | |||
| 146 | long | |||
| 147 | search_backward (char *input_string, SEARCH_BINDING *binding) | |||
| 148 | { | |||
| 149 | register int c, i, len; | |||
| 150 | register char *buff, *end; | |||
| 151 | char *string; | |||
| 152 | char *alternate = (char *)NULL((void *)0); | |||
| 153 | ||||
| 154 | len = strlen (input_string); | |||
| 155 | ||||
| 156 | /* Reverse the characters in the search string. */ | |||
| 157 | string = (char *)xmalloc (1 + len); | |||
| 158 | for (c = 0, i = len - 1; input_string[c]; c++, i--) | |||
| 159 | string[i] = input_string[c]; | |||
| 160 | ||||
| 161 | string[c] = '\0'; | |||
| 162 | ||||
| 163 | /* We match characters in the search buffer against STRING and ALTERNATE. | |||
| 164 | ALTERNATE is a case reversed version of STRING; this is cheaper than | |||
| 165 | case folding each character before comparison. ALTERNATE is only | |||
| 166 | used if the case folding bit is turned on in the passed BINDING. */ | |||
| 167 | ||||
| 168 | if (binding->flags & S_FoldCase0x01) | |||
| 169 | { | |||
| 170 | alternate = xstrdup (string); | |||
| 171 | ||||
| 172 | for (i = 0; i < len; i++) | |||
| 173 | { | |||
| 174 | if (islower (alternate[i])) | |||
| 175 | alternate[i] = toupper (alternate[i]); | |||
| 176 | else if (isupper (alternate[i])) | |||
| 177 | alternate[i] = tolower (alternate[i]); | |||
| 178 | } | |||
| 179 | } | |||
| 180 | ||||
| 181 | buff = binding->buffer + binding->start - 1; | |||
| 182 | end = binding->buffer + binding->end; | |||
| 183 | ||||
| 184 | while (buff > (end + len)) | |||
| 185 | { | |||
| 186 | for (i = 0; i < len; i++) | |||
| 187 | { | |||
| 188 | c = *(buff - i); | |||
| 189 | ||||
| 190 | if (c != string[i] && (!alternate || c != alternate[i])) | |||
| 191 | break; | |||
| 192 | } | |||
| 193 | ||||
| 194 | if (!string[i]) | |||
| 195 | { | |||
| 196 | free (string); | |||
| 197 | if (alternate) | |||
| 198 | free (alternate); | |||
| 199 | ||||
| 200 | if (binding->flags & S_SkipDest0x02) | |||
| 201 | buff -= len; | |||
| 202 | return ((long) (1 + (buff - binding->buffer))); | |||
| 203 | } | |||
| 204 | ||||
| 205 | buff--; | |||
| 206 | } | |||
| 207 | ||||
| 208 | free (string); | |||
| 209 | if (alternate) | |||
| 210 | free (alternate); | |||
| 211 | ||||
| 212 | return ((long) -1); | |||
| 213 | } | |||
| 214 | ||||
| 215 | /* Find STRING in LINE, returning the offset of the end of the string. | |||
| 216 | Return an offset of -1 if STRING does not appear in LINE. The search | |||
| 217 | is bound by the end of the line (i.e., either NEWLINE or 0). */ | |||
| 218 | int | |||
| 219 | string_in_line (char *string, char *line) | |||
| 220 | { | |||
| 221 | register int end; | |||
| 222 | SEARCH_BINDING binding; | |||
| 223 | ||||
| 224 | /* Find the end of the line. */ | |||
| 225 | for (end = 0; line[end] && line[end] != '\n'; end++); | |||
| 226 | ||||
| 227 | /* Search for STRING within these confines. */ | |||
| 228 | binding.buffer = line; | |||
| 229 | binding.start = 0; | |||
| 230 | binding.end = end; | |||
| 231 | binding.flags = S_FoldCase0x01 | S_SkipDest0x02; | |||
| 232 | ||||
| 233 | return (search_forward (string, &binding)); | |||
| 234 | } | |||
| 235 | ||||
| 236 | /* Return non-zero if STRING is the first text to appear at BINDING. */ | |||
| 237 | int | |||
| 238 | looking_at (char *string, SEARCH_BINDING *binding) | |||
| 239 | { | |||
| 240 | long search_end; | |||
| 241 | ||||
| 242 | search_end = search (string, binding); | |||
| 243 | ||||
| 244 | /* If the string was not found, SEARCH_END is -1. If the string was found, | |||
| 245 | but not right away, SEARCH_END is != binding->start. Otherwise, the | |||
| 246 | string was found at binding->start. */ | |||
| 247 | return (search_end == binding->start); | |||
| 248 | } | |||
| 249 | ||||
| 250 | /* **************************************************************** */ | |||
| 251 | /* */ | |||
| 252 | /* Small String Searches */ | |||
| 253 | /* */ | |||
| 254 | /* **************************************************************** */ | |||
| 255 | ||||
| 256 | /* Function names that start with "skip" are passed a string, and return | |||
| 257 | an offset from the start of that string. Function names that start | |||
| 258 | with "find" are passed a SEARCH_BINDING, and return an absolute position | |||
| 259 | marker of the item being searched for. "Find" functions return a value | |||
| 260 | of -1 if the item being looked for couldn't be found. */ | |||
| 261 | ||||
| 262 | /* Return the index of the first non-whitespace character in STRING. */ | |||
| 263 | int | |||
| 264 | skip_whitespace (char *string) | |||
| 265 | { | |||
| 266 | register int i; | |||
| 267 | ||||
| 268 | for (i = 0; string && whitespace (string[i])((string[i] == ' ') || (string[i] == '\t')); i++); | |||
| 269 | return (i); | |||
| 270 | } | |||
| 271 | ||||
| 272 | /* Return the index of the first non-whitespace or newline character in | |||
| 273 | STRING. */ | |||
| 274 | int | |||
| 275 | skip_whitespace_and_newlines (char *string) | |||
| 276 | { | |||
| 277 | register int i; | |||
| 278 | ||||
| 279 | for (i = 0; string && whitespace_or_newline (string[i])(((string[i] == ' ') || (string[i] == '\t')) || (string[i] == '\n')); i++); | |||
| 280 | return (i); | |||
| 281 | } | |||
| 282 | ||||
| 283 | /* Return the index of the first whitespace character in STRING. */ | |||
| 284 | int | |||
| 285 | skip_non_whitespace (char *string) | |||
| 286 | { | |||
| 287 | register int i; | |||
| 288 | ||||
| 289 | for (i = 0; string && string[i] && !whitespace (string[i])((string[i] == ' ') || (string[i] == '\t')); i++); | |||
| 290 | return (i); | |||
| 291 | } | |||
| 292 | ||||
| 293 | /* Return the index of the first non-node character in STRING. Note that | |||
| 294 | this function contains quite a bit of hair to ignore periods in some | |||
| 295 | special cases. This is because we here at GNU ship some info files which | |||
| 296 | contain nodenames that contain periods. No such nodename can start with | |||
| 297 | a period, or continue with whitespace, newline, or ')' immediately following | |||
| 298 | the period. If second argument NEWLINES_OKAY is non-zero, newlines should | |||
| 299 | be skipped while parsing out the nodename specification. */ | |||
| 300 | int | |||
| 301 | skip_node_characters (char *string, int newlines_okay) | |||
| 302 | { | |||
| 303 | register int c, i = 0; | |||
| 304 | int paren_seen = 0; | |||
| 305 | int paren = 0; | |||
| 306 | ||||
| 307 | /* Handle special case. This is when another function has parsed out the | |||
| 308 | filename component of the node name, and we just want to parse out the | |||
| 309 | nodename proper. In that case, a period at the start of the nodename | |||
| 310 | indicates an empty nodename. */ | |||
| 311 | if (string && *string == '.') | |||
| 312 | return (0); | |||
| 313 | ||||
| 314 | if (string && *string == '(') | |||
| 315 | { | |||
| 316 | paren++; | |||
| 317 | paren_seen++; | |||
| 318 | i++; | |||
| 319 | } | |||
| 320 | ||||
| 321 | for (; string && (c = string[i]); i++) | |||
| 322 | { | |||
| 323 | if (paren) | |||
| 324 | { | |||
| 325 | if (c == '(') | |||
| 326 | paren++; | |||
| 327 | else if (c == ')') | |||
| 328 | paren--; | |||
| 329 | ||||
| 330 | continue; | |||
| 331 | } | |||
| 332 | ||||
| 333 | /* If the character following the close paren is a space or period, | |||
| 334 | then this node name has no more characters associated with it. */ | |||
| 335 | if (c == '\t' || | |||
| 336 | c == ',' || | |||
| 337 | c == INFO_TAGSEP'\177' || | |||
| 338 | ((!newlines_okay) && (c == '\n')) || | |||
| 339 | ((paren_seen && string[i - 1] == ')') && | |||
| 340 | (c == ' ' || c == '.')) || | |||
| 341 | (c == '.' && | |||
| 342 | ( | |||
| 343 | #if 0 | |||
| 344 | /* This test causes a node name ending in a period, like `This.', not to | |||
| 345 | be found. The trailing . is stripped. This occurs in the jargon | |||
| 346 | file (`I see no X here.' is a node name). */ | |||
| 347 | (!string[i + 1]) || | |||
| 348 | #endif | |||
| 349 | (whitespace_or_newline (string[i + 1])(((string[i + 1] == ' ') || (string[i + 1] == '\t')) || (string [i + 1] == '\n'))) || | |||
| 350 | (string[i + 1] == ')')))) | |||
| 351 | break; | |||
| 352 | } | |||
| 353 | return (i); | |||
| 354 | } | |||
| 355 | ||||
| 356 | ||||
| 357 | /* **************************************************************** */ | |||
| 358 | /* */ | |||
| 359 | /* Searching FILE_BUFFER's */ | |||
| 360 | /* */ | |||
| 361 | /* **************************************************************** */ | |||
| 362 | ||||
| 363 | /* Return the absolute position of the first occurence of a node separator in | |||
| 364 | BINDING-buffer. The search starts at BINDING->start. Return -1 if no node | |||
| 365 | separator was found. */ | |||
| 366 | long | |||
| 367 | find_node_separator (SEARCH_BINDING *binding) | |||
| 368 | { | |||
| 369 | register long i; | |||
| 370 | char *body; | |||
| 371 | ||||
| 372 | body = binding->buffer; | |||
| 373 | ||||
| 374 | /* A node is started by [^L]^_[^L]\n. That is to say, the C-l's are | |||
| 375 | optional, but the DELETE and NEWLINE are not. This separator holds | |||
| 376 | true for all separated elements in an Info file, including the tags | |||
| 377 | table (if present) and the indirect tags table (if present). */ | |||
| 378 | for (i = binding->start; i < binding->end - 1; i++) | |||
| 379 | if (((body[i] == INFO_FF'\014' && body[i + 1] == INFO_COOKIE'\037') && | |||
| 380 | (body[i + 2] == '\n' || | |||
| 381 | (body[i + 2] == INFO_FF'\014' && body[i + 3] == '\n'))) || | |||
| 382 | ((body[i] == INFO_COOKIE'\037') && | |||
| 383 | (body[i + 1] == '\n' || | |||
| 384 | (body[i + 1] == INFO_FF'\014' && body[i + 2] == '\n')))) | |||
| 385 | return (i); | |||
| 386 | return (-1); | |||
| 387 | } | |||
| 388 | ||||
| 389 | /* Return the length of the node separator characters that BODY is | |||
| 390 | currently pointing at. */ | |||
| 391 | int | |||
| 392 | skip_node_separator (char *body) | |||
| 393 | { | |||
| 394 | register int i; | |||
| 395 | ||||
| 396 | i = 0; | |||
| 397 | ||||
| 398 | if (body[i] == INFO_FF'\014') | |||
| 399 | i++; | |||
| 400 | ||||
| 401 | if (body[i++] != INFO_COOKIE'\037') | |||
| 402 | return (0); | |||
| 403 | ||||
| 404 | if (body[i] == INFO_FF'\014') | |||
| 405 | i++; | |||
| 406 | ||||
| 407 | if (body[i++] != '\n') | |||
| 408 | return (0); | |||
| 409 | ||||
| 410 | return (i); | |||
| 411 | } | |||
| 412 | ||||
| 413 | /* Return the number of characters from STRING to the start of | |||
| 414 | the next line. */ | |||
| 415 | int | |||
| 416 | skip_line (char *string) | |||
| 417 | { | |||
| 418 | register int i; | |||
| 419 | ||||
| 420 | for (i = 0; string && string[i] && string[i] != '\n'; i++); | |||
| ||||
| 421 | ||||
| 422 | if (string[i] == '\n') | |||
| ||||
| 423 | i++; | |||
| 424 | ||||
| 425 | return (i); | |||
| 426 | } | |||
| 427 | ||||
| 428 | /* Return the absolute position of the beginning of a tags table in this | |||
| 429 | binding starting the search at binding->start. */ | |||
| 430 | long | |||
| 431 | find_tags_table (SEARCH_BINDING *binding) | |||
| 432 | { | |||
| 433 | SEARCH_BINDING tmp_search; | |||
| 434 | long position; | |||
| 435 | ||||
| 436 | tmp_search.buffer = binding->buffer; | |||
| 437 | tmp_search.start = binding->start; | |||
| 438 | tmp_search.end = binding->end; | |||
| 439 | tmp_search.flags = S_FoldCase0x01; | |||
| 440 | ||||
| 441 | while ((position = find_node_separator (&tmp_search)) != -1 ) | |||
| 442 | { | |||
| 443 | tmp_search.start = position; | |||
| 444 | tmp_search.start += skip_node_separator (tmp_search.buffer | |||
| 445 | + tmp_search.start); | |||
| 446 | ||||
| 447 | if (looking_at (TAGS_TABLE_BEG_LABEL"Tag Table:\n", &tmp_search)) | |||
| 448 | return (position); | |||
| 449 | } | |||
| 450 | return (-1); | |||
| 451 | } | |||
| 452 | ||||
| 453 | /* Return the absolute position of the node named NODENAME in BINDING. | |||
| 454 | This is a brute force search, and we wish to avoid it when possible. | |||
| 455 | This function is called when a tag (indirect or otherwise) doesn't | |||
| 456 | really point to the right node. It returns the absolute position of | |||
| 457 | the separator preceding the node. */ | |||
| 458 | long | |||
| 459 | find_node_in_binding (char *nodename, SEARCH_BINDING *binding) | |||
| 460 | { | |||
| 461 | long position; | |||
| 462 | int offset, namelen; | |||
| 463 | SEARCH_BINDING tmp_search; | |||
| 464 | ||||
| 465 | namelen = strlen (nodename); | |||
| 466 | ||||
| 467 | tmp_search.buffer = binding->buffer; | |||
| 468 | tmp_search.start = binding->start; | |||
| 469 | tmp_search.end = binding->end; | |||
| 470 | tmp_search.flags = 0; | |||
| 471 | ||||
| 472 | while ((position = find_node_separator (&tmp_search)) != -1) | |||
| 473 | { | |||
| 474 | tmp_search.start = position; | |||
| 475 | tmp_search.start += skip_node_separator | |||
| 476 | (tmp_search.buffer + tmp_search.start); | |||
| 477 | ||||
| 478 | offset = string_in_line | |||
| 479 | (INFO_NODE_LABEL"Node:", tmp_search.buffer + tmp_search.start); | |||
| 480 | ||||
| 481 | if (offset == -1) | |||
| 482 | continue; | |||
| 483 | ||||
| 484 | tmp_search.start += offset; | |||
| 485 | tmp_search.start += skip_whitespace (tmp_search.buffer + tmp_search.start); | |||
| 486 | offset = skip_node_characters | |||
| 487 | (tmp_search.buffer + tmp_search.start, DONT_SKIP_NEWLINES0); | |||
| 488 | ||||
| 489 | /* Notice that this is an exact match. You cannot grovel through | |||
| 490 | the buffer with this function looking for random nodes. */ | |||
| 491 | if ((offset == namelen) && | |||
| 492 | (tmp_search.buffer[tmp_search.start] == nodename[0]) && | |||
| 493 | (strncmp (tmp_search.buffer + tmp_search.start, nodename, offset) == 0)) | |||
| 494 | return (position); | |||
| 495 | } | |||
| 496 | return (-1); | |||
| 497 | } |