File: | src/gnu/usr.bin/texinfo/info/search.c |
Warning: | line 422, column 7 Array access (from variable 'string') results in a null pointer dereference |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | /* search.c -- searching large bodies of text. | |||
2 | $Id: search.c,v 1.1.1.5 2006/07/17 16:03:43 espie Exp $ | |||
3 | ||||
4 | Copyright (C) 1993, 1997, 1998, 2002, 2004 Free Software Foundation, Inc. | |||
5 | ||||
6 | This program is free software; you can redistribute it and/or modify | |||
7 | it under the terms of the GNU General Public License as published by | |||
8 | the Free Software Foundation; either version 2, or (at your option) | |||
9 | any later version. | |||
10 | ||||
11 | This program is distributed in the hope that it will be useful, | |||
12 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |||
13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |||
14 | GNU General Public License for more details. | |||
15 | ||||
16 | You should have received a copy of the GNU General Public License | |||
17 | along with this program; if not, write to the Free Software | |||
18 | Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. | |||
19 | ||||
20 | Written by Brian Fox (bfox@ai.mit.edu). */ | |||
21 | ||||
22 | #include "info.h" | |||
23 | ||||
24 | #include "search.h" | |||
25 | #include "nodes.h" | |||
26 | ||||
27 | /* The search functions take two arguments: | |||
28 | ||||
29 | 1) a string to search for, and | |||
30 | ||||
31 | 2) a pointer to a SEARCH_BINDING which contains the buffer, start, | |||
32 | and end of the search. | |||
33 | ||||
34 | They return a long, which is the offset from the start of the buffer | |||
35 | at which the match was found. An offset of -1 indicates failure. */ | |||
36 | ||||
37 | /* A function which makes a binding with buffer and bounds. */ | |||
38 | SEARCH_BINDING * | |||
39 | make_binding (char *buffer, long int start, long int end) | |||
40 | { | |||
41 | SEARCH_BINDING *binding; | |||
42 | ||||
43 | binding = (SEARCH_BINDING *)xmalloc (sizeof (SEARCH_BINDING)); | |||
44 | binding->buffer = buffer; | |||
45 | binding->start = start; | |||
46 | binding->end = end; | |||
47 | binding->flags = 0; | |||
48 | ||||
49 | return (binding); | |||
50 | } | |||
51 | ||||
52 | /* Make a copy of BINDING without duplicating the data. */ | |||
53 | SEARCH_BINDING * | |||
54 | copy_binding (SEARCH_BINDING *binding) | |||
55 | { | |||
56 | SEARCH_BINDING *copy; | |||
57 | ||||
58 | copy = make_binding (binding->buffer, binding->start, binding->end); | |||
59 | copy->flags = binding->flags; | |||
60 | return (copy); | |||
61 | } | |||
62 | ||||
63 | ||||
64 | /* **************************************************************** */ | |||
65 | /* */ | |||
66 | /* The Actual Searching Functions */ | |||
67 | /* */ | |||
68 | /* **************************************************************** */ | |||
69 | ||||
70 | /* Search forwards or backwards for the text delimited by BINDING. | |||
71 | The search is forwards if BINDING->start is greater than BINDING->end. */ | |||
72 | long | |||
73 | search (char *string, SEARCH_BINDING *binding) | |||
74 | { | |||
75 | long result; | |||
76 | ||||
77 | /* If the search is backwards, then search backwards, otherwise forwards. */ | |||
78 | if (binding->start > binding->end) | |||
79 | result = search_backward (string, binding); | |||
80 | else | |||
81 | result = search_forward (string, binding); | |||
82 | ||||
83 | return (result); | |||
84 | } | |||
85 | ||||
86 | /* Search forwards for STRING through the text delimited in BINDING. */ | |||
87 | long | |||
88 | search_forward (char *string, SEARCH_BINDING *binding) | |||
89 | { | |||
90 | register int c, i, len; | |||
91 | register char *buff, *end; | |||
92 | char *alternate = (char *)NULL((void *)0); | |||
93 | ||||
94 | len = strlen (string); | |||
95 | ||||
96 | /* We match characters in the search buffer against STRING and ALTERNATE. | |||
97 | ALTERNATE is a case reversed version of STRING; this is cheaper than | |||
98 | case folding each character before comparison. Alternate is only | |||
99 | used if the case folding bit is turned on in the passed BINDING. */ | |||
100 | ||||
101 | if (binding->flags & S_FoldCase0x01) | |||
102 | { | |||
103 | alternate = xstrdup (string); | |||
104 | ||||
105 | for (i = 0; i < len; i++) | |||
106 | { | |||
107 | if (islower (alternate[i])) | |||
108 | alternate[i] = toupper (alternate[i]); | |||
109 | else if (isupper (alternate[i])) | |||
110 | alternate[i] = tolower (alternate[i]); | |||
111 | } | |||
112 | } | |||
113 | ||||
114 | buff = binding->buffer + binding->start; | |||
115 | end = binding->buffer + binding->end + 1; | |||
116 | ||||
117 | while (buff < (end - len)) | |||
118 | { | |||
119 | for (i = 0; i < len; i++) | |||
120 | { | |||
121 | c = buff[i]; | |||
122 | ||||
123 | if ((c != string[i]) && (!alternate || c != alternate[i])) | |||
124 | break; | |||
125 | } | |||
126 | ||||
127 | if (!string[i]) | |||
128 | { | |||
129 | if (alternate) | |||
130 | free (alternate); | |||
131 | if (binding->flags & S_SkipDest0x02) | |||
132 | buff += len; | |||
133 | return ((long) (buff - binding->buffer)); | |||
134 | } | |||
135 | ||||
136 | buff++; | |||
137 | } | |||
138 | ||||
139 | if (alternate) | |||
140 | free (alternate); | |||
141 | ||||
142 | return ((long) -1); | |||
143 | } | |||
144 | ||||
145 | /* Search for STRING backwards through the text delimited in BINDING. */ | |||
146 | long | |||
147 | search_backward (char *input_string, SEARCH_BINDING *binding) | |||
148 | { | |||
149 | register int c, i, len; | |||
150 | register char *buff, *end; | |||
151 | char *string; | |||
152 | char *alternate = (char *)NULL((void *)0); | |||
153 | ||||
154 | len = strlen (input_string); | |||
155 | ||||
156 | /* Reverse the characters in the search string. */ | |||
157 | string = (char *)xmalloc (1 + len); | |||
158 | for (c = 0, i = len - 1; input_string[c]; c++, i--) | |||
159 | string[i] = input_string[c]; | |||
160 | ||||
161 | string[c] = '\0'; | |||
162 | ||||
163 | /* We match characters in the search buffer against STRING and ALTERNATE. | |||
164 | ALTERNATE is a case reversed version of STRING; this is cheaper than | |||
165 | case folding each character before comparison. ALTERNATE is only | |||
166 | used if the case folding bit is turned on in the passed BINDING. */ | |||
167 | ||||
168 | if (binding->flags & S_FoldCase0x01) | |||
169 | { | |||
170 | alternate = xstrdup (string); | |||
171 | ||||
172 | for (i = 0; i < len; i++) | |||
173 | { | |||
174 | if (islower (alternate[i])) | |||
175 | alternate[i] = toupper (alternate[i]); | |||
176 | else if (isupper (alternate[i])) | |||
177 | alternate[i] = tolower (alternate[i]); | |||
178 | } | |||
179 | } | |||
180 | ||||
181 | buff = binding->buffer + binding->start - 1; | |||
182 | end = binding->buffer + binding->end; | |||
183 | ||||
184 | while (buff > (end + len)) | |||
185 | { | |||
186 | for (i = 0; i < len; i++) | |||
187 | { | |||
188 | c = *(buff - i); | |||
189 | ||||
190 | if (c != string[i] && (!alternate || c != alternate[i])) | |||
191 | break; | |||
192 | } | |||
193 | ||||
194 | if (!string[i]) | |||
195 | { | |||
196 | free (string); | |||
197 | if (alternate) | |||
198 | free (alternate); | |||
199 | ||||
200 | if (binding->flags & S_SkipDest0x02) | |||
201 | buff -= len; | |||
202 | return ((long) (1 + (buff - binding->buffer))); | |||
203 | } | |||
204 | ||||
205 | buff--; | |||
206 | } | |||
207 | ||||
208 | free (string); | |||
209 | if (alternate) | |||
210 | free (alternate); | |||
211 | ||||
212 | return ((long) -1); | |||
213 | } | |||
214 | ||||
215 | /* Find STRING in LINE, returning the offset of the end of the string. | |||
216 | Return an offset of -1 if STRING does not appear in LINE. The search | |||
217 | is bound by the end of the line (i.e., either NEWLINE or 0). */ | |||
218 | int | |||
219 | string_in_line (char *string, char *line) | |||
220 | { | |||
221 | register int end; | |||
222 | SEARCH_BINDING binding; | |||
223 | ||||
224 | /* Find the end of the line. */ | |||
225 | for (end = 0; line[end] && line[end] != '\n'; end++); | |||
226 | ||||
227 | /* Search for STRING within these confines. */ | |||
228 | binding.buffer = line; | |||
229 | binding.start = 0; | |||
230 | binding.end = end; | |||
231 | binding.flags = S_FoldCase0x01 | S_SkipDest0x02; | |||
232 | ||||
233 | return (search_forward (string, &binding)); | |||
234 | } | |||
235 | ||||
236 | /* Return non-zero if STRING is the first text to appear at BINDING. */ | |||
237 | int | |||
238 | looking_at (char *string, SEARCH_BINDING *binding) | |||
239 | { | |||
240 | long search_end; | |||
241 | ||||
242 | search_end = search (string, binding); | |||
243 | ||||
244 | /* If the string was not found, SEARCH_END is -1. If the string was found, | |||
245 | but not right away, SEARCH_END is != binding->start. Otherwise, the | |||
246 | string was found at binding->start. */ | |||
247 | return (search_end == binding->start); | |||
248 | } | |||
249 | ||||
250 | /* **************************************************************** */ | |||
251 | /* */ | |||
252 | /* Small String Searches */ | |||
253 | /* */ | |||
254 | /* **************************************************************** */ | |||
255 | ||||
256 | /* Function names that start with "skip" are passed a string, and return | |||
257 | an offset from the start of that string. Function names that start | |||
258 | with "find" are passed a SEARCH_BINDING, and return an absolute position | |||
259 | marker of the item being searched for. "Find" functions return a value | |||
260 | of -1 if the item being looked for couldn't be found. */ | |||
261 | ||||
262 | /* Return the index of the first non-whitespace character in STRING. */ | |||
263 | int | |||
264 | skip_whitespace (char *string) | |||
265 | { | |||
266 | register int i; | |||
267 | ||||
268 | for (i = 0; string && whitespace (string[i])((string[i] == ' ') || (string[i] == '\t')); i++); | |||
269 | return (i); | |||
270 | } | |||
271 | ||||
272 | /* Return the index of the first non-whitespace or newline character in | |||
273 | STRING. */ | |||
274 | int | |||
275 | skip_whitespace_and_newlines (char *string) | |||
276 | { | |||
277 | register int i; | |||
278 | ||||
279 | for (i = 0; string && whitespace_or_newline (string[i])(((string[i] == ' ') || (string[i] == '\t')) || (string[i] == '\n')); i++); | |||
280 | return (i); | |||
281 | } | |||
282 | ||||
283 | /* Return the index of the first whitespace character in STRING. */ | |||
284 | int | |||
285 | skip_non_whitespace (char *string) | |||
286 | { | |||
287 | register int i; | |||
288 | ||||
289 | for (i = 0; string && string[i] && !whitespace (string[i])((string[i] == ' ') || (string[i] == '\t')); i++); | |||
290 | return (i); | |||
291 | } | |||
292 | ||||
293 | /* Return the index of the first non-node character in STRING. Note that | |||
294 | this function contains quite a bit of hair to ignore periods in some | |||
295 | special cases. This is because we here at GNU ship some info files which | |||
296 | contain nodenames that contain periods. No such nodename can start with | |||
297 | a period, or continue with whitespace, newline, or ')' immediately following | |||
298 | the period. If second argument NEWLINES_OKAY is non-zero, newlines should | |||
299 | be skipped while parsing out the nodename specification. */ | |||
300 | int | |||
301 | skip_node_characters (char *string, int newlines_okay) | |||
302 | { | |||
303 | register int c, i = 0; | |||
304 | int paren_seen = 0; | |||
305 | int paren = 0; | |||
306 | ||||
307 | /* Handle special case. This is when another function has parsed out the | |||
308 | filename component of the node name, and we just want to parse out the | |||
309 | nodename proper. In that case, a period at the start of the nodename | |||
310 | indicates an empty nodename. */ | |||
311 | if (string && *string == '.') | |||
312 | return (0); | |||
313 | ||||
314 | if (string && *string == '(') | |||
315 | { | |||
316 | paren++; | |||
317 | paren_seen++; | |||
318 | i++; | |||
319 | } | |||
320 | ||||
321 | for (; string && (c = string[i]); i++) | |||
322 | { | |||
323 | if (paren) | |||
324 | { | |||
325 | if (c == '(') | |||
326 | paren++; | |||
327 | else if (c == ')') | |||
328 | paren--; | |||
329 | ||||
330 | continue; | |||
331 | } | |||
332 | ||||
333 | /* If the character following the close paren is a space or period, | |||
334 | then this node name has no more characters associated with it. */ | |||
335 | if (c == '\t' || | |||
336 | c == ',' || | |||
337 | c == INFO_TAGSEP'\177' || | |||
338 | ((!newlines_okay) && (c == '\n')) || | |||
339 | ((paren_seen && string[i - 1] == ')') && | |||
340 | (c == ' ' || c == '.')) || | |||
341 | (c == '.' && | |||
342 | ( | |||
343 | #if 0 | |||
344 | /* This test causes a node name ending in a period, like `This.', not to | |||
345 | be found. The trailing . is stripped. This occurs in the jargon | |||
346 | file (`I see no X here.' is a node name). */ | |||
347 | (!string[i + 1]) || | |||
348 | #endif | |||
349 | (whitespace_or_newline (string[i + 1])(((string[i + 1] == ' ') || (string[i + 1] == '\t')) || (string [i + 1] == '\n'))) || | |||
350 | (string[i + 1] == ')')))) | |||
351 | break; | |||
352 | } | |||
353 | return (i); | |||
354 | } | |||
355 | ||||
356 | ||||
357 | /* **************************************************************** */ | |||
358 | /* */ | |||
359 | /* Searching FILE_BUFFER's */ | |||
360 | /* */ | |||
361 | /* **************************************************************** */ | |||
362 | ||||
363 | /* Return the absolute position of the first occurence of a node separator in | |||
364 | BINDING-buffer. The search starts at BINDING->start. Return -1 if no node | |||
365 | separator was found. */ | |||
366 | long | |||
367 | find_node_separator (SEARCH_BINDING *binding) | |||
368 | { | |||
369 | register long i; | |||
370 | char *body; | |||
371 | ||||
372 | body = binding->buffer; | |||
373 | ||||
374 | /* A node is started by [^L]^_[^L]\n. That is to say, the C-l's are | |||
375 | optional, but the DELETE and NEWLINE are not. This separator holds | |||
376 | true for all separated elements in an Info file, including the tags | |||
377 | table (if present) and the indirect tags table (if present). */ | |||
378 | for (i = binding->start; i < binding->end - 1; i++) | |||
379 | if (((body[i] == INFO_FF'\014' && body[i + 1] == INFO_COOKIE'\037') && | |||
380 | (body[i + 2] == '\n' || | |||
381 | (body[i + 2] == INFO_FF'\014' && body[i + 3] == '\n'))) || | |||
382 | ((body[i] == INFO_COOKIE'\037') && | |||
383 | (body[i + 1] == '\n' || | |||
384 | (body[i + 1] == INFO_FF'\014' && body[i + 2] == '\n')))) | |||
385 | return (i); | |||
386 | return (-1); | |||
387 | } | |||
388 | ||||
389 | /* Return the length of the node separator characters that BODY is | |||
390 | currently pointing at. */ | |||
391 | int | |||
392 | skip_node_separator (char *body) | |||
393 | { | |||
394 | register int i; | |||
395 | ||||
396 | i = 0; | |||
397 | ||||
398 | if (body[i] == INFO_FF'\014') | |||
399 | i++; | |||
400 | ||||
401 | if (body[i++] != INFO_COOKIE'\037') | |||
402 | return (0); | |||
403 | ||||
404 | if (body[i] == INFO_FF'\014') | |||
405 | i++; | |||
406 | ||||
407 | if (body[i++] != '\n') | |||
408 | return (0); | |||
409 | ||||
410 | return (i); | |||
411 | } | |||
412 | ||||
413 | /* Return the number of characters from STRING to the start of | |||
414 | the next line. */ | |||
415 | int | |||
416 | skip_line (char *string) | |||
417 | { | |||
418 | register int i; | |||
419 | ||||
420 | for (i = 0; string && string[i] && string[i] != '\n'; i++); | |||
| ||||
421 | ||||
422 | if (string[i] == '\n') | |||
| ||||
423 | i++; | |||
424 | ||||
425 | return (i); | |||
426 | } | |||
427 | ||||
428 | /* Return the absolute position of the beginning of a tags table in this | |||
429 | binding starting the search at binding->start. */ | |||
430 | long | |||
431 | find_tags_table (SEARCH_BINDING *binding) | |||
432 | { | |||
433 | SEARCH_BINDING tmp_search; | |||
434 | long position; | |||
435 | ||||
436 | tmp_search.buffer = binding->buffer; | |||
437 | tmp_search.start = binding->start; | |||
438 | tmp_search.end = binding->end; | |||
439 | tmp_search.flags = S_FoldCase0x01; | |||
440 | ||||
441 | while ((position = find_node_separator (&tmp_search)) != -1 ) | |||
442 | { | |||
443 | tmp_search.start = position; | |||
444 | tmp_search.start += skip_node_separator (tmp_search.buffer | |||
445 | + tmp_search.start); | |||
446 | ||||
447 | if (looking_at (TAGS_TABLE_BEG_LABEL"Tag Table:\n", &tmp_search)) | |||
448 | return (position); | |||
449 | } | |||
450 | return (-1); | |||
451 | } | |||
452 | ||||
453 | /* Return the absolute position of the node named NODENAME in BINDING. | |||
454 | This is a brute force search, and we wish to avoid it when possible. | |||
455 | This function is called when a tag (indirect or otherwise) doesn't | |||
456 | really point to the right node. It returns the absolute position of | |||
457 | the separator preceding the node. */ | |||
458 | long | |||
459 | find_node_in_binding (char *nodename, SEARCH_BINDING *binding) | |||
460 | { | |||
461 | long position; | |||
462 | int offset, namelen; | |||
463 | SEARCH_BINDING tmp_search; | |||
464 | ||||
465 | namelen = strlen (nodename); | |||
466 | ||||
467 | tmp_search.buffer = binding->buffer; | |||
468 | tmp_search.start = binding->start; | |||
469 | tmp_search.end = binding->end; | |||
470 | tmp_search.flags = 0; | |||
471 | ||||
472 | while ((position = find_node_separator (&tmp_search)) != -1) | |||
473 | { | |||
474 | tmp_search.start = position; | |||
475 | tmp_search.start += skip_node_separator | |||
476 | (tmp_search.buffer + tmp_search.start); | |||
477 | ||||
478 | offset = string_in_line | |||
479 | (INFO_NODE_LABEL"Node:", tmp_search.buffer + tmp_search.start); | |||
480 | ||||
481 | if (offset == -1) | |||
482 | continue; | |||
483 | ||||
484 | tmp_search.start += offset; | |||
485 | tmp_search.start += skip_whitespace (tmp_search.buffer + tmp_search.start); | |||
486 | offset = skip_node_characters | |||
487 | (tmp_search.buffer + tmp_search.start, DONT_SKIP_NEWLINES0); | |||
488 | ||||
489 | /* Notice that this is an exact match. You cannot grovel through | |||
490 | the buffer with this function looking for random nodes. */ | |||
491 | if ((offset == namelen) && | |||
492 | (tmp_search.buffer[tmp_search.start] == nodename[0]) && | |||
493 | (strncmp (tmp_search.buffer + tmp_search.start, nodename, offset) == 0)) | |||
494 | return (position); | |||
495 | } | |||
496 | return (-1); | |||
497 | } |