File: | src/gnu/usr.bin/texinfo/util/texindex.c |
Warning: | line 339, column 11 Dereference of null pointer (loaded from variable 'arg') |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | /* texindex -- sort TeX index dribble output into an actual index. | |||
2 | $Id: texindex.c,v 1.6 2015/11/14 23:06:06 deraadt Exp $ | |||
3 | ||||
4 | Copyright (C) 1987, 1991, 1992, 1996, 1997, 1998, 1999, 2000, 2001, | |||
5 | 2002, 2003, 2004 Free Software Foundation, Inc. | |||
6 | ||||
7 | This program is free software; you can redistribute it and/or modify | |||
8 | it under the terms of the GNU General Public License as published by | |||
9 | the Free Software Foundation; either version 2, or (at your option) | |||
10 | any later version. | |||
11 | ||||
12 | This program is distributed in the hope that it will be useful, | |||
13 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |||
14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |||
15 | GNU General Public License for more details. | |||
16 | ||||
17 | You should have received a copy of the GNU General Public License | |||
18 | along with this program; if not, write to the Free Software | |||
19 | Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307. */ | |||
20 | ||||
21 | #include "system.h" | |||
22 | #include <getopt.h> | |||
23 | ||||
24 | static char *program_name = "texindex"; | |||
25 | ||||
26 | #if defined (emacs) | |||
27 | # include "../src/config.h" | |||
28 | /* Some s/os.h files redefine these. */ | |||
29 | # undef read | |||
30 | # undef close | |||
31 | # undef write | |||
32 | # undef open | |||
33 | #endif | |||
34 | ||||
35 | #if !defined (HAVE_MEMSET1) | |||
36 | #undef memset | |||
37 | #define memset(ptr, ignore, count) bzero (ptr, count) | |||
38 | #endif | |||
39 | ||||
40 | char *mktemp (char *); | |||
41 | ||||
42 | #if !defined (SEEK_SET0) | |||
43 | # define SEEK_SET0 0 | |||
44 | # define SEEK_CUR1 1 | |||
45 | # define SEEK_END2 2 | |||
46 | #endif /* !SEEK_SET */ | |||
47 | ||||
48 | struct linebuffer; | |||
49 | ||||
50 | /* When sorting in core, this structure describes one line | |||
51 | and the position and length of its first keyfield. */ | |||
52 | struct lineinfo | |||
53 | { | |||
54 | char *text; /* The actual text of the line. */ | |||
55 | union { | |||
56 | char *text; /* The start of the key (for textual comparison). */ | |||
57 | long number; /* The numeric value (for numeric comparison). */ | |||
58 | } key; | |||
59 | long keylen; /* Length of KEY field. */ | |||
60 | }; | |||
61 | ||||
62 | /* This structure describes a field to use as a sort key. */ | |||
63 | struct keyfield | |||
64 | { | |||
65 | int startwords; /* Number of words to skip. */ | |||
66 | int startchars; /* Number of additional chars to skip. */ | |||
67 | int endwords; /* Number of words to ignore at end. */ | |||
68 | int endchars; /* Ditto for characters of last word. */ | |||
69 | char ignore_blanks; /* Non-zero means ignore spaces and tabs. */ | |||
70 | char fold_case; /* Non-zero means case doesn't matter. */ | |||
71 | char reverse; /* Non-zero means compare in reverse order. */ | |||
72 | char numeric; /* Non-zeros means field is ASCII numeric. */ | |||
73 | char positional; /* Sort according to file position. */ | |||
74 | char braced; /* Count balanced-braced groupings as fields. */ | |||
75 | }; | |||
76 | ||||
77 | /* Vector of keyfields to use. */ | |||
78 | struct keyfield keyfields[3]; | |||
79 | ||||
80 | /* Number of keyfields stored in that vector. */ | |||
81 | int num_keyfields = 3; | |||
82 | ||||
83 | /* Vector of input file names, terminated with a null pointer. */ | |||
84 | char **infiles; | |||
85 | ||||
86 | /* Vector of corresponding output file names, or NULL, meaning default it | |||
87 | (add an `s' to the end). */ | |||
88 | char **outfiles; | |||
89 | ||||
90 | /* Length of `infiles'. */ | |||
91 | int num_infiles; | |||
92 | ||||
93 | /* Pointer to the array of pointers to lines being sorted. */ | |||
94 | char **linearray; | |||
95 | ||||
96 | /* The allocated length of `linearray'. */ | |||
97 | long nlines; | |||
98 | ||||
99 | /* Directory to use for temporary files. On Unix, it ends with a slash. */ | |||
100 | char *tempdir; | |||
101 | ||||
102 | /* Number of last temporary file. */ | |||
103 | int tempcount; | |||
104 | ||||
105 | /* Number of last temporary file already deleted. | |||
106 | Temporary files are deleted by `flush_tempfiles' in order of creation. */ | |||
107 | int last_deleted_tempcount; | |||
108 | ||||
109 | /* During in-core sort, this points to the base of the data block | |||
110 | which contains all the lines of data. */ | |||
111 | char *text_base; | |||
112 | ||||
113 | /* Initially 0; changed to 1 if we want initials in this index. */ | |||
114 | int need_initials; | |||
115 | ||||
116 | /* Remembers the first initial letter seen in this index, so we can | |||
117 | determine whether we need initials in the sorted form. */ | |||
118 | char first_initial; | |||
119 | ||||
120 | /* Additional command switches .*/ | |||
121 | ||||
122 | /* Nonzero means do not delete tempfiles -- for debugging. */ | |||
123 | int keep_tempfiles; | |||
124 | ||||
125 | /* Forward declarations of functions in this file. */ | |||
126 | void decode_command (int argc, char **argv); | |||
127 | void sort_in_core (char *infile, int total, char *outfile); | |||
128 | void sort_offline (char *infile, off_t total, char *outfile); | |||
129 | char **parsefile (char *filename, char **nextline, char *data, long int size); | |||
130 | char *find_field (struct keyfield *keyfield, char *str, long int *lengthptr); | |||
131 | char *find_pos (char *str, int words, int chars, int ignore_blanks); | |||
132 | long find_value (char *start, long int length); | |||
133 | char *find_braced_pos (char *str, int words, int chars, int ignore_blanks); | |||
134 | char *find_braced_end (char *str); | |||
135 | void writelines (char **linearray, int nlines, FILE *ostream); | |||
136 | int compare_field (struct keyfield *keyfield, char *start1, | |||
137 | long int length1, long int pos1, char *start2, | |||
138 | long int length2, long int pos2); | |||
139 | int compare_full (const void *, const void *); | |||
140 | long readline (struct linebuffer *linebuffer, FILE *stream); | |||
141 | int merge_files (char **infiles, int nfiles, char *outfile); | |||
142 | int merge_direct (char **infiles, int nfiles, char *outfile); | |||
143 | void pfatal_with_name (const char *name); | |||
144 | void fatal (const char *format, const char *arg); | |||
145 | void error (const char *format, const char *arg); | |||
146 | void *xmalloc (), *xrealloc (); | |||
147 | char *concat (char *s1, char *s2); | |||
148 | void flush_tempfiles (int to_count); | |||
149 | ||||
150 | #define MAX_IN_CORE_SORT500000 500000 | |||
151 | ||||
152 | int | |||
153 | main (int argc, char **argv) | |||
154 | { | |||
155 | int i; | |||
156 | ||||
157 | tempcount = 0; | |||
158 | last_deleted_tempcount = 0; | |||
159 | ||||
160 | #ifdef HAVE_SETLOCALE1 | |||
161 | /* Set locale via LC_ALL. */ | |||
162 | setlocale (LC_ALL0, ""); | |||
163 | #endif | |||
164 | ||||
165 | if (pledge ("stdio rpath wpath cpath tmppath", NULL((void *)0)) == -1) | |||
| ||||
166 | pfatal_with_name ("pledge"); | |||
167 | ||||
168 | /* Set the text message domain. */ | |||
169 | bindtextdomain (PACKAGE, LOCALEDIR)((const char *) ("/usr/share/locale")); | |||
170 | textdomain (PACKAGE)((const char *) ("texinfo")); | |||
171 | ||||
172 | /* In case we write to a redirected stdout that fails. */ | |||
173 | /* not ready atexit (close_stdout); */ | |||
174 | ||||
175 | /* Describe the kind of sorting to do. */ | |||
176 | /* The first keyfield uses the first braced field and folds case. */ | |||
177 | keyfields[0].braced = 1; | |||
178 | keyfields[0].fold_case = 1; | |||
179 | keyfields[0].endwords = -1; | |||
180 | keyfields[0].endchars = -1; | |||
181 | ||||
182 | /* The second keyfield uses the second braced field, numerically. */ | |||
183 | keyfields[1].braced = 1; | |||
184 | keyfields[1].numeric = 1; | |||
185 | keyfields[1].startwords = 1; | |||
186 | keyfields[1].endwords = -1; | |||
187 | keyfields[1].endchars = -1; | |||
188 | ||||
189 | /* The third keyfield (which is ignored while discarding duplicates) | |||
190 | compares the whole line. */ | |||
191 | keyfields[2].endwords = -1; | |||
192 | keyfields[2].endchars = -1; | |||
193 | ||||
194 | decode_command (argc, argv); | |||
195 | ||||
196 | /* Process input files completely, one by one. */ | |||
197 | ||||
198 | for (i = 0; i < num_infiles; i++) | |||
199 | { | |||
200 | int desc; | |||
201 | off_t ptr; | |||
202 | char *outfile; | |||
203 | struct stat instat; | |||
204 | ||||
205 | desc = open (infiles[i], O_RDONLY0x0000, 0); | |||
206 | if (desc < 0) | |||
207 | pfatal_with_name (infiles[i]); | |||
208 | ||||
209 | if (stat (infiles[i], &instat)) | |||
210 | pfatal_with_name (infiles[i]); | |||
211 | if (S_ISDIR (instat.st_mode)((instat.st_mode & 0170000) == 0040000)) | |||
212 | { | |||
213 | #ifdef EISDIR21 | |||
214 | errno(*__errno()) = EISDIR21; | |||
215 | #endif | |||
216 | pfatal_with_name (infiles[i]); | |||
217 | } | |||
218 | ||||
219 | lseek (desc, (off_t) 0, SEEK_END2); | |||
220 | ptr = (off_t) lseek (desc, (off_t) 0, SEEK_CUR1); | |||
221 | ||||
222 | close (desc); | |||
223 | ||||
224 | outfile = outfiles[i]; | |||
225 | if (!outfile) | |||
226 | outfile = concat (infiles[i], "s"); | |||
227 | ||||
228 | need_initials = 0; | |||
229 | first_initial = '\0'; | |||
230 | ||||
231 | if (ptr < MAX_IN_CORE_SORT500000) | |||
232 | /* Sort a small amount of data. */ | |||
233 | sort_in_core (infiles[i], (int)ptr, outfile); | |||
234 | else | |||
235 | sort_offline (infiles[i], ptr, outfile); | |||
236 | } | |||
237 | ||||
238 | flush_tempfiles (tempcount); | |||
239 | xexit (0); | |||
240 | return 0; /* Avoid bogus warnings. */ | |||
241 | } | |||
242 | ||||
243 | typedef struct | |||
244 | { | |||
245 | char *long_name; | |||
246 | char *short_name; | |||
247 | int *variable_ref; | |||
248 | int variable_value; | |||
249 | char *arg_name; | |||
250 | char *doc_string; | |||
251 | } TEXINDEX_OPTION; | |||
252 | ||||
253 | TEXINDEX_OPTION texindex_options[] = { | |||
254 | { "--help", "-h", (int *)NULL((void *)0), 0, (char *)NULL((void *)0), | |||
255 | N_("display this help and exit")("display this help and exit") }, | |||
256 | { "--keep", "-k", &keep_tempfiles, 1, (char *)NULL((void *)0), | |||
257 | N_("keep temporary files around after processing")("keep temporary files around after processing") }, | |||
258 | { "--no-keep", 0, &keep_tempfiles, 0, (char *)NULL((void *)0), | |||
259 | N_("do not keep temporary files around after processing (default)")("do not keep temporary files around after processing (default)" ) }, | |||
260 | { "--output", "-o", (int *)NULL((void *)0), 0, "FILE", | |||
261 | N_("send output to FILE")("send output to FILE") }, | |||
262 | { "--version", (char *)NULL((void *)0), (int *)NULL((void *)0), 0, (char *)NULL((void *)0), | |||
263 | N_("display version information and exit")("display version information and exit") }, | |||
264 | { (char *)NULL((void *)0), (char *)NULL((void *)0), (int *)NULL((void *)0), 0, (char *)NULL((void *)0) } | |||
265 | }; | |||
266 | ||||
267 | void | |||
268 | usage (int result_value) | |||
269 | { | |||
270 | register int i; | |||
271 | FILE *f = result_value ? stderr(&__sF[2]) : stdout(&__sF[1]); | |||
272 | ||||
273 | fprintf (f, _("Usage: %s [OPTION]... FILE...\n")((const char *) ("Usage: %s [OPTION]... FILE...\n")), program_name); | |||
274 | fprintf (f, _("Generate a sorted index for each TeX output FILE.\n")((const char *) ("Generate a sorted index for each TeX output FILE.\n" ))); | |||
275 | /* Avoid trigraph nonsense. */ | |||
276 | fprintf (f, | |||
277 | _("Usually FILE... is specified as `foo.%c%c\' for a document `foo.texi'.\n")((const char *) ("Usually FILE... is specified as `foo.%c%c\' for a document `foo.texi'.\n" )), | |||
278 | '?', '?'); /* avoid trigraph in cat-id-tbl.c */ | |||
279 | fprintf (f, _("\nOptions:\n")((const char *) ("\nOptions:\n"))); | |||
280 | ||||
281 | for (i = 0; texindex_options[i].long_name; i++) | |||
282 | { | |||
283 | putc (' ', f)(!__isthreaded ? __sputc(' ', f) : (putc)(' ', f)); | |||
284 | ||||
285 | if (texindex_options[i].short_name) | |||
286 | fprintf (f, "%s, ", texindex_options[i].short_name); | |||
287 | ||||
288 | fprintf (f, "%s %s", | |||
289 | texindex_options[i].long_name, | |||
290 | texindex_options[i].arg_name | |||
291 | ? texindex_options[i].arg_name : ""); | |||
292 | ||||
293 | fprintf (f, "\t%s\n", _(texindex_options[i].doc_string)((const char *) (texindex_options[i].doc_string))); | |||
294 | } | |||
295 | fputs (_("\n\((const char *) ("\nEmail bug reports to bug-texinfo@gnu.org,\ngeneral questions and discussion to help-texinfo@gnu.org.\nTexinfo home page: http://www.gnu.org/software/texinfo/" )) | |||
296 | Email bug reports to bug-texinfo@gnu.org,\n\((const char *) ("\nEmail bug reports to bug-texinfo@gnu.org,\ngeneral questions and discussion to help-texinfo@gnu.org.\nTexinfo home page: http://www.gnu.org/software/texinfo/" )) | |||
297 | general questions and discussion to help-texinfo@gnu.org.\n\((const char *) ("\nEmail bug reports to bug-texinfo@gnu.org,\ngeneral questions and discussion to help-texinfo@gnu.org.\nTexinfo home page: http://www.gnu.org/software/texinfo/" )) | |||
298 | Texinfo home page: http://www.gnu.org/software/texinfo/")((const char *) ("\nEmail bug reports to bug-texinfo@gnu.org,\ngeneral questions and discussion to help-texinfo@gnu.org.\nTexinfo home page: http://www.gnu.org/software/texinfo/" )), f); | |||
299 | fputs ("\n", f); | |||
300 | ||||
301 | xexit (result_value); | |||
302 | } | |||
303 | ||||
304 | /* Decode the command line arguments to set the parameter variables | |||
305 | and set up the vector of keyfields and the vector of input files. */ | |||
306 | ||||
307 | void | |||
308 | decode_command (int argc, char **argv) | |||
309 | { | |||
310 | int arg_index = 1; | |||
311 | char **ip; | |||
312 | char **op; | |||
313 | ||||
314 | /* Store default values into parameter variables. */ | |||
315 | ||||
316 | tempdir = getenv ("TMPDIR"); | |||
317 | if (tempdir == NULL((void *)0)) | |||
318 | tempdir = getenv ("TEMP"); | |||
319 | if (tempdir == NULL((void *)0)) | |||
320 | tempdir = getenv ("TMP"); | |||
321 | if (tempdir == NULL((void *)0)) | |||
322 | tempdir = DEFAULT_TMPDIR"/tmp/"; | |||
323 | else | |||
324 | tempdir = concat (tempdir, "/"); | |||
325 | ||||
326 | keep_tempfiles = 0; | |||
327 | ||||
328 | /* Allocate ARGC input files, which must be enough. */ | |||
329 | ||||
330 | infiles = (char **) xmalloc (argc * sizeof (char *)); | |||
331 | outfiles = (char **) xmalloc (argc * sizeof (char *)); | |||
332 | ip = infiles; | |||
333 | op = outfiles; | |||
334 | ||||
335 | while (arg_index < argc) | |||
336 | { | |||
337 | char *arg = argv[arg_index++]; | |||
338 | ||||
339 | if (*arg == '-') | |||
| ||||
340 | { | |||
341 | if (strcmp (arg, "--version") == 0) | |||
342 | { | |||
343 | printf ("texindex (GNU %s) %s\n", PACKAGE"texinfo", VERSION"4.8"); | |||
344 | puts (""); | |||
345 | puts ("Copyright (C) 2004 Free Software Foundation, Inc."); | |||
346 | printf (_("There is NO warranty. You may redistribute this software\n\((const char *) ("There is NO warranty. You may redistribute this software\nunder the terms of the GNU General Public License.\nFor more information about these matters, see the files named COPYING.\n" )) | |||
347 | under the terms of the GNU General Public License.\n\((const char *) ("There is NO warranty. You may redistribute this software\nunder the terms of the GNU General Public License.\nFor more information about these matters, see the files named COPYING.\n" )) | |||
348 | For more information about these matters, see the files named COPYING.\n")((const char *) ("There is NO warranty. You may redistribute this software\nunder the terms of the GNU General Public License.\nFor more information about these matters, see the files named COPYING.\n" ))); | |||
349 | xexit (0); | |||
350 | } | |||
351 | else if ((strcmp (arg, "--keep") == 0) || | |||
352 | (strcmp (arg, "-k") == 0)) | |||
353 | { | |||
354 | keep_tempfiles = 1; | |||
355 | } | |||
356 | else if ((strcmp (arg, "--help") == 0) || | |||
357 | (strcmp (arg, "-h") == 0)) | |||
358 | { | |||
359 | usage (0); | |||
360 | } | |||
361 | else if ((strcmp (arg, "--output") == 0) || | |||
362 | (strcmp (arg, "-o") == 0)) | |||
363 | { | |||
364 | if (argv[arg_index] != (char *)NULL((void *)0)) | |||
365 | { | |||
366 | arg_index++; | |||
367 | if (op > outfiles) | |||
368 | *(op - 1) = argv[arg_index]; | |||
369 | } | |||
370 | else | |||
371 | usage (1); | |||
372 | } | |||
373 | else | |||
374 | usage (1); | |||
375 | } | |||
376 | else | |||
377 | { | |||
378 | *ip++ = arg; | |||
379 | *op++ = (char *)NULL((void *)0); | |||
380 | } | |||
381 | } | |||
382 | ||||
383 | /* Record number of keyfields and terminate list of filenames. */ | |||
384 | num_infiles = ip - infiles; | |||
385 | *ip = (char *)NULL((void *)0); | |||
386 | if (num_infiles == 0) | |||
387 | usage (1); | |||
388 | } | |||
389 | ||||
390 | /* Return a name for temporary file COUNT. */ | |||
391 | ||||
392 | static char * | |||
393 | maketempname (int count) | |||
394 | { | |||
395 | static char *tempbase = NULL((void *)0); | |||
396 | char tempsuffix[10]; | |||
397 | char *name; | |||
398 | int fd; | |||
399 | ||||
400 | if (!tempbase) | |||
401 | { | |||
402 | int fd; | |||
403 | tempbase = concat (tempdir, "txidxXXXXXX"); | |||
404 | ||||
405 | fd = mkstemp (tempbase); | |||
406 | if (fd == -1) | |||
407 | pfatal_with_name (tempbase); | |||
408 | } | |||
409 | ||||
410 | sprintf (tempsuffix, ".%d", count); | |||
411 | name = concat (tempbase, tempsuffix); | |||
412 | ||||
413 | fd = open (name, O_CREAT0x0200|O_EXCL0x0800|O_WRONLY0x0001, 0666); | |||
414 | if (fd == -1) | |||
415 | return NULL((void *)0); | |||
416 | else | |||
417 | { | |||
418 | close(fd); | |||
419 | return name; | |||
420 | } | |||
421 | } | |||
422 | ||||
423 | ||||
424 | /* Delete all temporary files up to TO_COUNT. */ | |||
425 | ||||
426 | void | |||
427 | flush_tempfiles (int to_count) | |||
428 | { | |||
429 | if (keep_tempfiles) | |||
430 | return; | |||
431 | while (last_deleted_tempcount < to_count) | |||
432 | unlink (maketempname (++last_deleted_tempcount)); | |||
433 | } | |||
434 | ||||
435 | ||||
436 | /* Compare LINE1 and LINE2 according to the specified set of keyfields. */ | |||
437 | ||||
438 | int | |||
439 | compare_full (const void *p1, const void *p2) | |||
440 | { | |||
441 | char **line1 = (char **) p1; | |||
442 | char **line2 = (char **) p2; | |||
443 | int i; | |||
444 | ||||
445 | /* Compare using the first keyfield; | |||
446 | if that does not distinguish the lines, try the second keyfield; | |||
447 | and so on. */ | |||
448 | ||||
449 | for (i = 0; i < num_keyfields; i++) | |||
450 | { | |||
451 | long length1, length2; | |||
452 | char *start1 = find_field (&keyfields[i], *line1, &length1); | |||
453 | char *start2 = find_field (&keyfields[i], *line2, &length2); | |||
454 | int tem = compare_field (&keyfields[i], start1, length1, | |||
455 | *line1 - text_base, | |||
456 | start2, length2, *line2 - text_base); | |||
457 | if (tem) | |||
458 | { | |||
459 | if (keyfields[i].reverse) | |||
460 | return -tem; | |||
461 | return tem; | |||
462 | } | |||
463 | } | |||
464 | ||||
465 | return 0; /* Lines match exactly. */ | |||
466 | } | |||
467 | ||||
468 | /* Compare LINE1 and LINE2, described by structures | |||
469 | in which the first keyfield is identified in advance. | |||
470 | For positional sorting, assumes that the order of the lines in core | |||
471 | reflects their nominal order. */ | |||
472 | int | |||
473 | compare_prepared (const void *p1, const void *p2) | |||
474 | { | |||
475 | struct lineinfo *line1 = (struct lineinfo *) p1; | |||
476 | struct lineinfo *line2 = (struct lineinfo *) p2; | |||
477 | int i; | |||
478 | int tem; | |||
479 | char *text1, *text2; | |||
480 | ||||
481 | /* Compare using the first keyfield, which has been found for us already. */ | |||
482 | if (keyfields->positional) | |||
483 | { | |||
484 | if (line1->text - text_base > line2->text - text_base) | |||
485 | tem = 1; | |||
486 | else | |||
487 | tem = -1; | |||
488 | } | |||
489 | else if (keyfields->numeric) | |||
490 | tem = line1->key.number - line2->key.number; | |||
491 | else | |||
492 | tem = compare_field (keyfields, line1->key.text, line1->keylen, 0, | |||
493 | line2->key.text, line2->keylen, 0); | |||
494 | if (tem) | |||
495 | { | |||
496 | if (keyfields->reverse) | |||
497 | return -tem; | |||
498 | return tem; | |||
499 | } | |||
500 | ||||
501 | text1 = line1->text; | |||
502 | text2 = line2->text; | |||
503 | ||||
504 | /* Compare using the second keyfield; | |||
505 | if that does not distinguish the lines, try the third keyfield; | |||
506 | and so on. */ | |||
507 | ||||
508 | for (i = 1; i < num_keyfields; i++) | |||
509 | { | |||
510 | long length1, length2; | |||
511 | char *start1 = find_field (&keyfields[i], text1, &length1); | |||
512 | char *start2 = find_field (&keyfields[i], text2, &length2); | |||
513 | int tem = compare_field (&keyfields[i], start1, length1, | |||
514 | text1 - text_base, | |||
515 | start2, length2, text2 - text_base); | |||
516 | if (tem) | |||
517 | { | |||
518 | if (keyfields[i].reverse) | |||
519 | return -tem; | |||
520 | return tem; | |||
521 | } | |||
522 | } | |||
523 | ||||
524 | return 0; /* Lines match exactly. */ | |||
525 | } | |||
526 | ||||
527 | /* Like compare_full but more general. | |||
528 | You can pass any strings, and you can say how many keyfields to use. | |||
529 | POS1 and POS2 should indicate the nominal positional ordering of | |||
530 | the two lines in the input. */ | |||
531 | ||||
532 | int | |||
533 | compare_general (char *str1, char *str2, long int pos1, long int pos2, int use_keyfields) | |||
534 | { | |||
535 | int i; | |||
536 | ||||
537 | /* Compare using the first keyfield; | |||
538 | if that does not distinguish the lines, try the second keyfield; | |||
539 | and so on. */ | |||
540 | ||||
541 | for (i = 0; i < use_keyfields; i++) | |||
542 | { | |||
543 | long length1, length2; | |||
544 | char *start1 = find_field (&keyfields[i], str1, &length1); | |||
545 | char *start2 = find_field (&keyfields[i], str2, &length2); | |||
546 | int tem = compare_field (&keyfields[i], start1, length1, pos1, | |||
547 | start2, length2, pos2); | |||
548 | if (tem) | |||
549 | { | |||
550 | if (keyfields[i].reverse) | |||
551 | return -tem; | |||
552 | return tem; | |||
553 | } | |||
554 | } | |||
555 | ||||
556 | return 0; /* Lines match exactly. */ | |||
557 | } | |||
558 | ||||
559 | /* Find the start and length of a field in STR according to KEYFIELD. | |||
560 | A pointer to the starting character is returned, and the length | |||
561 | is stored into the int that LENGTHPTR points to. */ | |||
562 | ||||
563 | char * | |||
564 | find_field (struct keyfield *keyfield, char *str, long int *lengthptr) | |||
565 | { | |||
566 | char *start; | |||
567 | char *end; | |||
568 | char *(*fun) (); | |||
569 | ||||
570 | if (keyfield->braced) | |||
571 | fun = find_braced_pos; | |||
572 | else | |||
573 | fun = find_pos; | |||
574 | ||||
575 | start = (*fun) (str, keyfield->startwords, keyfield->startchars, | |||
576 | keyfield->ignore_blanks); | |||
577 | if (keyfield->endwords < 0) | |||
578 | { | |||
579 | if (keyfield->braced) | |||
580 | end = find_braced_end (start); | |||
581 | else | |||
582 | { | |||
583 | end = start; | |||
584 | while (*end && *end != '\n') | |||
585 | end++; | |||
586 | } | |||
587 | } | |||
588 | else | |||
589 | { | |||
590 | end = (*fun) (str, keyfield->endwords, keyfield->endchars, 0); | |||
591 | if (end - str < start - str) | |||
592 | end = start; | |||
593 | } | |||
594 | *lengthptr = end - start; | |||
595 | return start; | |||
596 | } | |||
597 | ||||
598 | /* Return a pointer to a specified place within STR, | |||
599 | skipping (from the beginning) WORDS words and then CHARS chars. | |||
600 | If IGNORE_BLANKS is nonzero, we skip all blanks | |||
601 | after finding the specified word. */ | |||
602 | ||||
603 | char * | |||
604 | find_pos (char *str, int words, int chars, int ignore_blanks) | |||
605 | { | |||
606 | int i; | |||
607 | char *p = str; | |||
608 | ||||
609 | for (i = 0; i < words; i++) | |||
610 | { | |||
611 | char c; | |||
612 | /* Find next bunch of nonblanks and skip them. */ | |||
613 | while ((c = *p) == ' ' || c == '\t') | |||
614 | p++; | |||
615 | while ((c = *p) && c != '\n' && !(c == ' ' || c == '\t')) | |||
616 | p++; | |||
617 | if (!*p || *p == '\n') | |||
618 | return p; | |||
619 | } | |||
620 | ||||
621 | while (*p == ' ' || *p == '\t') | |||
622 | p++; | |||
623 | ||||
624 | for (i = 0; i < chars; i++) | |||
625 | { | |||
626 | if (!*p || *p == '\n') | |||
627 | break; | |||
628 | p++; | |||
629 | } | |||
630 | return p; | |||
631 | } | |||
632 | ||||
633 | /* Like find_pos but assumes that each field is surrounded by braces | |||
634 | and that braces within fields are balanced. */ | |||
635 | ||||
636 | char * | |||
637 | find_braced_pos (char *str, int words, int chars, int ignore_blanks) | |||
638 | { | |||
639 | int i; | |||
640 | int bracelevel; | |||
641 | char *p = str; | |||
642 | char c; | |||
643 | ||||
644 | for (i = 0; i < words; i++) | |||
645 | { | |||
646 | bracelevel = 1; | |||
647 | while ((c = *p++) != '{' && c != '\n' && c) | |||
648 | /* Do nothing. */ ; | |||
649 | if (c != '{') | |||
650 | return p - 1; | |||
651 | while (bracelevel) | |||
652 | { | |||
653 | c = *p++; | |||
654 | if (c == '{') | |||
655 | bracelevel++; | |||
656 | if (c == '}') | |||
657 | bracelevel--; | |||
658 | if (c == 0 || c == '\n') | |||
659 | return p - 1; | |||
660 | } | |||
661 | } | |||
662 | ||||
663 | while ((c = *p++) != '{' && c != '\n' && c) | |||
664 | /* Do nothing. */ ; | |||
665 | ||||
666 | if (c != '{') | |||
667 | return p - 1; | |||
668 | ||||
669 | if (ignore_blanks) | |||
670 | while ((c = *p) == ' ' || c == '\t') | |||
671 | p++; | |||
672 | ||||
673 | for (i = 0; i < chars; i++) | |||
674 | { | |||
675 | if (!*p || *p == '\n') | |||
676 | break; | |||
677 | p++; | |||
678 | } | |||
679 | return p; | |||
680 | } | |||
681 | ||||
682 | /* Find the end of the balanced-brace field which starts at STR. | |||
683 | The position returned is just before the closing brace. */ | |||
684 | ||||
685 | char * | |||
686 | find_braced_end (char *str) | |||
687 | { | |||
688 | int bracelevel; | |||
689 | char *p = str; | |||
690 | char c; | |||
691 | ||||
692 | bracelevel = 1; | |||
693 | while (bracelevel) | |||
694 | { | |||
695 | c = *p++; | |||
696 | if (c == '{') | |||
697 | bracelevel++; | |||
698 | if (c == '}') | |||
699 | bracelevel--; | |||
700 | if (c == 0 || c == '\n') | |||
701 | return p - 1; | |||
702 | } | |||
703 | return p - 1; | |||
704 | } | |||
705 | ||||
706 | long | |||
707 | find_value (char *start, long int length) | |||
708 | { | |||
709 | while (length != 0L) | |||
710 | { | |||
711 | if (isdigit (*start)) | |||
712 | return atol (start); | |||
713 | length--; | |||
714 | start++; | |||
715 | } | |||
716 | return 0l; | |||
717 | } | |||
718 | ||||
719 | /* Vector used to translate characters for comparison. | |||
720 | This is how we make all alphanumerics follow all else, | |||
721 | and ignore case in the first sorting. */ | |||
722 | int char_order[256]; | |||
723 | ||||
724 | void | |||
725 | init_char_order (void) | |||
726 | { | |||
727 | int i; | |||
728 | for (i = 1; i < 256; i++) | |||
729 | char_order[i] = i; | |||
730 | ||||
731 | for (i = '0'; i <= '9'; i++) | |||
732 | char_order[i] += 512; | |||
733 | ||||
734 | for (i = 'a'; i <= 'z'; i++) | |||
735 | { | |||
736 | char_order[i] = 512 + i; | |||
737 | char_order[i + 'A' - 'a'] = 512 + i; | |||
738 | } | |||
739 | } | |||
740 | ||||
741 | /* Compare two fields (each specified as a start pointer and a character count) | |||
742 | according to KEYFIELD. | |||
743 | The sign of the value reports the relation between the fields. */ | |||
744 | ||||
745 | int | |||
746 | compare_field (struct keyfield *keyfield, char *start1, long int length1, | |||
747 | long int pos1, char *start2, long int length2, long int pos2) | |||
748 | { | |||
749 | if (keyfields->positional) | |||
750 | { | |||
751 | if (pos1 > pos2) | |||
752 | return 1; | |||
753 | else | |||
754 | return -1; | |||
755 | } | |||
756 | if (keyfield->numeric) | |||
757 | { | |||
758 | long value = find_value (start1, length1) - find_value (start2, length2); | |||
759 | if (value > 0) | |||
760 | return 1; | |||
761 | if (value < 0) | |||
762 | return -1; | |||
763 | return 0; | |||
764 | } | |||
765 | else | |||
766 | { | |||
767 | char *p1 = start1; | |||
768 | char *p2 = start2; | |||
769 | char *e1 = start1 + length1; | |||
770 | char *e2 = start2 + length2; | |||
771 | ||||
772 | while (1) | |||
773 | { | |||
774 | int c1, c2; | |||
775 | ||||
776 | if (p1 == e1) | |||
777 | c1 = 0; | |||
778 | else | |||
779 | c1 = *p1++; | |||
780 | if (p2 == e2) | |||
781 | c2 = 0; | |||
782 | else | |||
783 | c2 = *p2++; | |||
784 | ||||
785 | if (char_order[c1] != char_order[c2]) | |||
786 | return char_order[c1] - char_order[c2]; | |||
787 | if (!c1) | |||
788 | break; | |||
789 | } | |||
790 | ||||
791 | /* Strings are equal except possibly for case. */ | |||
792 | p1 = start1; | |||
793 | p2 = start2; | |||
794 | while (1) | |||
795 | { | |||
796 | int c1, c2; | |||
797 | ||||
798 | if (p1 == e1) | |||
799 | c1 = 0; | |||
800 | else | |||
801 | c1 = *p1++; | |||
802 | if (p2 == e2) | |||
803 | c2 = 0; | |||
804 | else | |||
805 | c2 = *p2++; | |||
806 | ||||
807 | if (c1 != c2) | |||
808 | /* Reverse sign here so upper case comes out last. */ | |||
809 | return c2 - c1; | |||
810 | if (!c1) | |||
811 | break; | |||
812 | } | |||
813 | ||||
814 | return 0; | |||
815 | } | |||
816 | } | |||
817 | ||||
818 | /* A `struct linebuffer' is a structure which holds a line of text. | |||
819 | `readline' reads a line from a stream into a linebuffer | |||
820 | and works regardless of the length of the line. */ | |||
821 | ||||
822 | struct linebuffer | |||
823 | { | |||
824 | long size; | |||
825 | char *buffer; | |||
826 | }; | |||
827 | ||||
828 | /* Initialize LINEBUFFER for use. */ | |||
829 | ||||
830 | void | |||
831 | initbuffer (struct linebuffer *linebuffer) | |||
832 | { | |||
833 | linebuffer->size = 200; | |||
834 | linebuffer->buffer = (char *) xmalloc (200); | |||
835 | } | |||
836 | ||||
837 | /* Read a line of text from STREAM into LINEBUFFER. | |||
838 | Return the length of the line. */ | |||
839 | ||||
840 | long | |||
841 | readline (struct linebuffer *linebuffer, FILE *stream) | |||
842 | { | |||
843 | char *buffer = linebuffer->buffer; | |||
844 | char *p = linebuffer->buffer; | |||
845 | char *end = p + linebuffer->size; | |||
846 | ||||
847 | while (1) | |||
848 | { | |||
849 | int c = getc (stream)(!__isthreaded ? (--(stream)->_r < 0 ? __srget(stream) : (int)(*(stream)->_p++)) : (getc)(stream)); | |||
850 | if (p == end) | |||
851 | { | |||
852 | buffer = (char *) xrealloc (buffer, linebuffer->size *= 2); | |||
853 | p += buffer - linebuffer->buffer; | |||
854 | end += buffer - linebuffer->buffer; | |||
855 | linebuffer->buffer = buffer; | |||
856 | } | |||
857 | if (c < 0 || c == '\n') | |||
858 | { | |||
859 | *p = 0; | |||
860 | break; | |||
861 | } | |||
862 | *p++ = c; | |||
863 | } | |||
864 | ||||
865 | return p - buffer; | |||
866 | } | |||
867 | ||||
868 | /* Sort an input file too big to sort in core. */ | |||
869 | ||||
870 | void | |||
871 | sort_offline (char *infile, off_t total, char *outfile) | |||
872 | { | |||
873 | /* More than enough. */ | |||
874 | int ntemps = 2 * (total + MAX_IN_CORE_SORT500000 - 1) / MAX_IN_CORE_SORT500000; | |||
875 | char **tempfiles = (char **) xmalloc (ntemps * sizeof (char *)); | |||
876 | FILE *istream = fopen (infile, "r"); | |||
877 | int i; | |||
878 | struct linebuffer lb; | |||
879 | long linelength; | |||
880 | int failure = 0; | |||
881 | ||||
882 | initbuffer (&lb); | |||
883 | ||||
884 | /* Read in one line of input data. */ | |||
885 | ||||
886 | linelength = readline (&lb, istream); | |||
887 | ||||
888 | if (lb.buffer[0] != '\\' && lb.buffer[0] != '@') | |||
889 | { | |||
890 | error (_("%s: not a texinfo index file")((const char *) ("%s: not a texinfo index file")), infile); | |||
891 | return; | |||
892 | } | |||
893 | ||||
894 | /* Split up the input into `ntemps' temporary files, or maybe fewer, | |||
895 | and put the new files' names into `tempfiles' */ | |||
896 | ||||
897 | for (i = 0; i < ntemps; i++) | |||
898 | { | |||
899 | char *outname = maketempname (++tempcount); | |||
900 | FILE *ostream; | |||
901 | long tempsize = 0; | |||
902 | ||||
903 | if (!outname) | |||
904 | pfatal_with_name("temporary file"); | |||
905 | ostream = fopen (outname, "w"); | |||
906 | if (!outname || !ostream) | |||
907 | pfatal_with_name (outname); | |||
908 | tempfiles[i] = outname; | |||
909 | ||||
910 | /* Copy lines into this temp file as long as it does not make file | |||
911 | "too big" or until there are no more lines. */ | |||
912 | ||||
913 | while (tempsize + linelength + 1 <= MAX_IN_CORE_SORT500000) | |||
914 | { | |||
915 | tempsize += linelength + 1; | |||
916 | fputs (lb.buffer, ostream); | |||
917 | putc ('\n', ostream)(!__isthreaded ? __sputc('\n', ostream) : (putc)('\n', ostream )); | |||
918 | ||||
919 | /* Read another line of input data. */ | |||
920 | ||||
921 | linelength = readline (&lb, istream); | |||
922 | if (!linelength && feof (istream)(!__isthreaded ? (((istream)->_flags & 0x0020) != 0) : (feof)(istream))) | |||
923 | break; | |||
924 | ||||
925 | if (lb.buffer[0] != '\\' && lb.buffer[0] != '@') | |||
926 | { | |||
927 | error (_("%s: not a texinfo index file")((const char *) ("%s: not a texinfo index file")), infile); | |||
928 | failure = 1; | |||
929 | goto fail; | |||
930 | } | |||
931 | } | |||
932 | fclose (ostream); | |||
933 | if (feof (istream)(!__isthreaded ? (((istream)->_flags & 0x0020) != 0) : (feof)(istream))) | |||
934 | break; | |||
935 | } | |||
936 | ||||
937 | free (lb.buffer); | |||
938 | ||||
939 | fail: | |||
940 | /* Record number of temp files we actually needed. */ | |||
941 | ||||
942 | ntemps = i; | |||
943 | ||||
944 | /* Sort each tempfile into another tempfile. | |||
945 | Delete the first set of tempfiles and put the names of the second | |||
946 | into `tempfiles'. */ | |||
947 | ||||
948 | for (i = 0; i < ntemps; i++) | |||
949 | { | |||
950 | char *newtemp = maketempname (++tempcount); | |||
951 | sort_in_core (tempfiles[i], MAX_IN_CORE_SORT500000, newtemp); | |||
952 | if (!keep_tempfiles) | |||
953 | unlink (tempfiles[i]); | |||
954 | tempfiles[i] = newtemp; | |||
955 | } | |||
956 | ||||
957 | if (failure) | |||
958 | return; | |||
959 | ||||
960 | /* Merge the tempfiles together and indexify. */ | |||
961 | ||||
962 | merge_files (tempfiles, ntemps, outfile); | |||
963 | } | |||
964 | ||||
965 | /* Sort INFILE, whose size is TOTAL, | |||
966 | assuming that is small enough to be done in-core, | |||
967 | then indexify it and send the output to OUTFILE (or to stdout). */ | |||
968 | ||||
969 | void | |||
970 | sort_in_core (char *infile, int total, char *outfile) | |||
971 | { | |||
972 | char **nextline; | |||
973 | char *data = (char *) xmalloc (total + 1); | |||
974 | char *file_data; | |||
975 | long file_size; | |||
976 | int i; | |||
977 | FILE *ostream = stdout(&__sF[1]); | |||
978 | struct lineinfo *lineinfo; | |||
979 | ||||
980 | /* Read the contents of the file into the moby array `data'. */ | |||
981 | ||||
982 | int desc = open (infile, O_RDONLY0x0000, 0); | |||
983 | ||||
984 | if (desc < 0) | |||
985 | fatal (_("failure reopening %s")((const char *) ("failure reopening %s")), infile); | |||
986 | for (file_size = 0;;) | |||
987 | { | |||
988 | i = read (desc, data + file_size, total - file_size); | |||
989 | if (i <= 0) | |||
990 | break; | |||
991 | file_size += i; | |||
992 | } | |||
993 | file_data = data; | |||
994 | data[file_size] = 0; | |||
995 | ||||
996 | close (desc); | |||
997 | ||||
998 | if (file_size > 0 && data[0] != '\\' && data[0] != '@') | |||
999 | { | |||
1000 | error (_("%s: not a texinfo index file")((const char *) ("%s: not a texinfo index file")), infile); | |||
1001 | return; | |||
1002 | } | |||
1003 | ||||
1004 | init_char_order (); | |||
1005 | ||||
1006 | /* Sort routines want to know this address. */ | |||
1007 | ||||
1008 | text_base = data; | |||
1009 | ||||
1010 | /* Create the array of pointers to lines, with a default size | |||
1011 | frequently enough. */ | |||
1012 | ||||
1013 | nlines = total / 50; | |||
1014 | if (!nlines) | |||
1015 | nlines = 2; | |||
1016 | linearray = (char **) xmalloc (nlines * sizeof (char *)); | |||
1017 | ||||
1018 | /* `nextline' points to the next free slot in this array. | |||
1019 | `nlines' is the allocated size. */ | |||
1020 | ||||
1021 | nextline = linearray; | |||
1022 | ||||
1023 | /* Parse the input file's data, and make entries for the lines. */ | |||
1024 | ||||
1025 | nextline = parsefile (infile, nextline, file_data, file_size); | |||
1026 | if (nextline == 0) | |||
1027 | { | |||
1028 | error (_("%s: not a texinfo index file")((const char *) ("%s: not a texinfo index file")), infile); | |||
1029 | return; | |||
1030 | } | |||
1031 | ||||
1032 | /* Sort the lines. */ | |||
1033 | ||||
1034 | /* If we have enough space, find the first keyfield of each line in advance. | |||
1035 | Make a `struct lineinfo' for each line, which records the keyfield | |||
1036 | as well as the line, and sort them. */ | |||
1037 | ||||
1038 | lineinfo = malloc ((nextline - linearray) * sizeof (struct lineinfo)); | |||
1039 | ||||
1040 | if (lineinfo) | |||
1041 | { | |||
1042 | struct lineinfo *lp; | |||
1043 | char **p; | |||
1044 | ||||
1045 | for (lp = lineinfo, p = linearray; p != nextline; lp++, p++) | |||
1046 | { | |||
1047 | lp->text = *p; | |||
1048 | lp->key.text = find_field (keyfields, *p, &lp->keylen); | |||
1049 | if (keyfields->numeric) | |||
1050 | lp->key.number = find_value (lp->key.text, lp->keylen); | |||
1051 | } | |||
1052 | ||||
1053 | qsort (lineinfo, nextline - linearray, sizeof (struct lineinfo), | |||
1054 | compare_prepared); | |||
1055 | ||||
1056 | for (lp = lineinfo, p = linearray; p != nextline; lp++, p++) | |||
1057 | *p = lp->text; | |||
1058 | ||||
1059 | free (lineinfo); | |||
1060 | } | |||
1061 | else | |||
1062 | qsort (linearray, nextline - linearray, sizeof (char *), compare_full); | |||
1063 | ||||
1064 | /* Open the output file. */ | |||
1065 | ||||
1066 | if (outfile) | |||
1067 | { | |||
1068 | ostream = fopen (outfile, "w"); | |||
1069 | if (!ostream) | |||
1070 | pfatal_with_name (outfile); | |||
1071 | } | |||
1072 | ||||
1073 | writelines (linearray, nextline - linearray, ostream); | |||
1074 | if (outfile) | |||
1075 | fclose (ostream); | |||
1076 | ||||
1077 | free (linearray); | |||
1078 | free (data); | |||
1079 | } | |||
1080 | ||||
1081 | /* Parse an input string in core into lines. | |||
1082 | DATA is the input string, and SIZE is its length. | |||
1083 | Data goes in LINEARRAY starting at NEXTLINE. | |||
1084 | The value returned is the first entry in LINEARRAY still unused. | |||
1085 | Value 0 means input file contents are invalid. */ | |||
1086 | ||||
1087 | char ** | |||
1088 | parsefile (char *filename, char **nextline, char *data, long int size) | |||
1089 | { | |||
1090 | char *p, *end; | |||
1091 | char **line = nextline; | |||
1092 | ||||
1093 | p = data; | |||
1094 | end = p + size; | |||
1095 | *end = 0; | |||
1096 | ||||
1097 | while (p != end) | |||
1098 | { | |||
1099 | if (p[0] != '\\' && p[0] != '@') | |||
1100 | return 0; | |||
1101 | ||||
1102 | *line = p; | |||
1103 | ||||
1104 | /* Find the first letter of the first field of this line. If it | |||
1105 | is different from the first letter of the first field of the | |||
1106 | first line, we need initial headers in the output index. */ | |||
1107 | while (*p && *p != '{') | |||
1108 | p++; | |||
1109 | if (p == end) | |||
1110 | return 0; | |||
1111 | p++; | |||
1112 | if (first_initial) | |||
1113 | { | |||
1114 | if (first_initial != toupper (*p)) | |||
1115 | need_initials = 1; | |||
1116 | } | |||
1117 | else | |||
1118 | first_initial = toupper (*p); | |||
1119 | ||||
1120 | while (*p && *p != '\n') | |||
1121 | p++; | |||
1122 | if (p != end) | |||
1123 | p++; | |||
1124 | ||||
1125 | line++; | |||
1126 | if (line == linearray + nlines) | |||
1127 | { | |||
1128 | char **old = linearray; | |||
1129 | linearray = xrealloc (linearray, sizeof (char *) * (nlines *= 4)); | |||
1130 | line += linearray - old; | |||
1131 | } | |||
1132 | } | |||
1133 | ||||
1134 | return line; | |||
1135 | } | |||
1136 | ||||
1137 | /* Indexification is a filter applied to the sorted lines | |||
1138 | as they are being written to the output file. | |||
1139 | Multiple entries for the same name, with different page numbers, | |||
1140 | get combined into a single entry with multiple page numbers. | |||
1141 | The first braced field, which is used for sorting, is discarded. | |||
1142 | However, its first character is examined, folded to lower case, | |||
1143 | and if it is different from that in the previous line fed to us | |||
1144 | a \initial line is written with one argument, the new initial. | |||
1145 | ||||
1146 | If an entry has four braced fields, then the second and third | |||
1147 | constitute primary and secondary names. | |||
1148 | In this case, each change of primary name | |||
1149 | generates a \primary line which contains only the primary name, | |||
1150 | and in between these are \secondary lines which contain | |||
1151 | just a secondary name and page numbers. */ | |||
1152 | ||||
1153 | /* The last primary name we wrote a \primary entry for. | |||
1154 | If only one level of indexing is being done, this is the last name seen. */ | |||
1155 | char *lastprimary; | |||
1156 | /* Length of storage allocated for lastprimary. */ | |||
1157 | int lastprimarylength; | |||
1158 | ||||
1159 | /* Similar, for the secondary name. */ | |||
1160 | char *lastsecondary; | |||
1161 | int lastsecondarylength; | |||
1162 | ||||
1163 | /* Zero if we are not in the middle of writing an entry. | |||
1164 | One if we have written the beginning of an entry but have not | |||
1165 | yet written any page numbers into it. | |||
1166 | Greater than one if we have written the beginning of an entry | |||
1167 | plus at least one page number. */ | |||
1168 | int pending; | |||
1169 | ||||
1170 | /* The initial (for sorting purposes) of the last primary entry written. | |||
1171 | When this changes, a \initial {c} line is written */ | |||
1172 | ||||
1173 | char *lastinitial; | |||
1174 | ||||
1175 | int lastinitiallength; | |||
1176 | ||||
1177 | /* When we need a string of length 1 for the value of lastinitial, | |||
1178 | store it here. */ | |||
1179 | ||||
1180 | char lastinitial1[2]; | |||
1181 | ||||
1182 | /* Initialize static storage for writing an index. */ | |||
1183 | ||||
1184 | void | |||
1185 | init_index (void) | |||
1186 | { | |||
1187 | pending = 0; | |||
1188 | lastinitial = lastinitial1; | |||
1189 | lastinitial1[0] = 0; | |||
1190 | lastinitial1[1] = 0; | |||
1191 | lastinitiallength = 0; | |||
1192 | lastprimarylength = 100; | |||
1193 | lastprimary = (char *) xmalloc (lastprimarylength + 1); | |||
1194 | memset (lastprimary, '\0', lastprimarylength + 1); | |||
1195 | lastsecondarylength = 100; | |||
1196 | lastsecondary = (char *) xmalloc (lastsecondarylength + 1); | |||
1197 | memset (lastsecondary, '\0', lastsecondarylength + 1); | |||
1198 | } | |||
1199 | ||||
1200 | /* Indexify. Merge entries for the same name, | |||
1201 | insert headers for each initial character, etc. */ | |||
1202 | ||||
1203 | void | |||
1204 | indexify (char *line, FILE *ostream) | |||
1205 | { | |||
1206 | char *primary, *secondary, *pagenumber; | |||
1207 | int primarylength, secondarylength = 0, pagelength; | |||
1208 | int nosecondary; | |||
1209 | int initiallength; | |||
1210 | char *initial; | |||
1211 | char initial1[2]; | |||
1212 | register char *p; | |||
1213 | ||||
1214 | /* First, analyze the parts of the entry fed to us this time. */ | |||
1215 | ||||
1216 | p = find_braced_pos (line, 0, 0, 0); | |||
1217 | if (*p == '{') | |||
1218 | { | |||
1219 | initial = p; | |||
1220 | /* Get length of inner pair of braces starting at `p', | |||
1221 | including that inner pair of braces. */ | |||
1222 | initiallength = find_braced_end (p + 1) + 1 - p; | |||
1223 | } | |||
1224 | else | |||
1225 | { | |||
1226 | initial = initial1; | |||
1227 | initial1[0] = toupper (*p); | |||
1228 | initial1[1] = 0; | |||
1229 | initiallength = 1; | |||
1230 | } | |||
1231 | ||||
1232 | pagenumber = find_braced_pos (line, 1, 0, 0); | |||
1233 | pagelength = find_braced_end (pagenumber) - pagenumber; | |||
1234 | if (pagelength == 0) | |||
1235 | fatal (_("No page number in %s")((const char *) ("No page number in %s")), line); | |||
1236 | ||||
1237 | primary = find_braced_pos (line, 2, 0, 0); | |||
1238 | primarylength = find_braced_end (primary) - primary; | |||
1239 | ||||
1240 | secondary = find_braced_pos (line, 3, 0, 0); | |||
1241 | nosecondary = !*secondary; | |||
1242 | if (!nosecondary) | |||
1243 | secondarylength = find_braced_end (secondary) - secondary; | |||
1244 | ||||
1245 | /* If the primary is different from before, make a new primary entry. */ | |||
1246 | if (strncmp (primary, lastprimary, primarylength)) | |||
1247 | { | |||
1248 | /* Close off current secondary entry first, if one is open. */ | |||
1249 | if (pending) | |||
1250 | { | |||
1251 | fputs ("}\n", ostream); | |||
1252 | pending = 0; | |||
1253 | } | |||
1254 | ||||
1255 | /* If this primary has a different initial, include an entry for | |||
1256 | the initial. */ | |||
1257 | if (need_initials && | |||
1258 | (initiallength != lastinitiallength || | |||
1259 | strncmp (initial, lastinitial, initiallength))) | |||
1260 | { | |||
1261 | fprintf (ostream, "\\initial {"); | |||
1262 | fwrite (initial, 1, initiallength, ostream); | |||
1263 | fputs ("}\n", ostream); | |||
1264 | if (initial == initial1) | |||
1265 | { | |||
1266 | lastinitial = lastinitial1; | |||
1267 | *lastinitial1 = *initial1; | |||
1268 | } | |||
1269 | else | |||
1270 | { | |||
1271 | lastinitial = initial; | |||
1272 | } | |||
1273 | lastinitiallength = initiallength; | |||
1274 | } | |||
1275 | ||||
1276 | /* Make the entry for the primary. */ | |||
1277 | if (nosecondary) | |||
1278 | fputs ("\\entry {", ostream); | |||
1279 | else | |||
1280 | fputs ("\\primary {", ostream); | |||
1281 | fwrite (primary, primarylength, 1, ostream); | |||
1282 | if (nosecondary) | |||
1283 | { | |||
1284 | fputs ("}{", ostream); | |||
1285 | pending = 1; | |||
1286 | } | |||
1287 | else | |||
1288 | fputs ("}\n", ostream); | |||
1289 | ||||
1290 | /* Record name of most recent primary. */ | |||
1291 | if (lastprimarylength < primarylength) | |||
1292 | { | |||
1293 | lastprimarylength = primarylength + 100; | |||
1294 | lastprimary = (char *) xrealloc (lastprimary, | |||
1295 | 1 + lastprimarylength); | |||
1296 | } | |||
1297 | strncpy (lastprimary, primary, primarylength); | |||
1298 | lastprimary[primarylength] = 0; | |||
1299 | ||||
1300 | /* There is no current secondary within this primary, now. */ | |||
1301 | lastsecondary[0] = 0; | |||
1302 | } | |||
1303 | ||||
1304 | /* Should not have an entry with no subtopic following one with a | |||
1305 | subtopic. */ | |||
1306 | ||||
1307 | if (nosecondary && *lastsecondary) | |||
1308 | error (_("entry %s follows an entry with a secondary name")((const char *) ("entry %s follows an entry with a secondary name" )), line); | |||
1309 | ||||
1310 | /* Start a new secondary entry if necessary. */ | |||
1311 | if (!nosecondary && strncmp (secondary, lastsecondary, secondarylength)) | |||
1312 | { | |||
1313 | if (pending) | |||
1314 | { | |||
1315 | fputs ("}\n", ostream); | |||
1316 | pending = 0; | |||
1317 | } | |||
1318 | ||||
1319 | /* Write the entry for the secondary. */ | |||
1320 | fputs ("\\secondary {", ostream); | |||
1321 | fwrite (secondary, secondarylength, 1, ostream); | |||
1322 | fputs ("}{", ostream); | |||
1323 | pending = 1; | |||
1324 | ||||
1325 | /* Record name of most recent secondary. */ | |||
1326 | if (lastsecondarylength < secondarylength) | |||
1327 | { | |||
1328 | lastsecondarylength = secondarylength + 100; | |||
1329 | lastsecondary = (char *) xrealloc (lastsecondary, | |||
1330 | 1 + lastsecondarylength); | |||
1331 | } | |||
1332 | strncpy (lastsecondary, secondary, secondarylength); | |||
1333 | lastsecondary[secondarylength] = 0; | |||
1334 | } | |||
1335 | ||||
1336 | /* Here to add one more page number to the current entry. */ | |||
1337 | if (pending++ != 1) | |||
1338 | fputs (", ", ostream); /* Punctuate first, if this is not the first. */ | |||
1339 | fwrite (pagenumber, pagelength, 1, ostream); | |||
1340 | } | |||
1341 | ||||
1342 | /* Close out any unfinished output entry. */ | |||
1343 | ||||
1344 | void | |||
1345 | finish_index (FILE *ostream) | |||
1346 | { | |||
1347 | if (pending) | |||
1348 | fputs ("}\n", ostream); | |||
1349 | free (lastprimary); | |||
1350 | free (lastsecondary); | |||
1351 | } | |||
1352 | ||||
1353 | /* Copy the lines in the sorted order. | |||
1354 | Each line is copied out of the input file it was found in. */ | |||
1355 | ||||
1356 | void | |||
1357 | writelines (char **linearray, int nlines, FILE *ostream) | |||
1358 | { | |||
1359 | char **stop_line = linearray + nlines; | |||
1360 | char **next_line; | |||
1361 | ||||
1362 | init_index (); | |||
1363 | ||||
1364 | /* Output the text of the lines, and free the buffer space. */ | |||
1365 | ||||
1366 | for (next_line = linearray; next_line != stop_line; next_line++) | |||
1367 | { | |||
1368 | /* If -u was specified, output the line only if distinct from | |||
1369 | previous one. */ | |||
1370 | if (next_line == linearray | |||
1371 | /* Compare previous line with this one, using only the | |||
1372 | explicitly specd keyfields. */ | |||
1373 | || compare_general (*(next_line - 1), *next_line, 0L, 0L, | |||
1374 | num_keyfields - 1)) | |||
1375 | { | |||
1376 | char *p = *next_line; | |||
1377 | char c; | |||
1378 | ||||
1379 | while ((c = *p++) && c != '\n') | |||
1380 | /* Do nothing. */ ; | |||
1381 | *(p - 1) = 0; | |||
1382 | indexify (*next_line, ostream); | |||
1383 | } | |||
1384 | } | |||
1385 | ||||
1386 | finish_index (ostream); | |||
1387 | } | |||
1388 | ||||
1389 | /* Assume (and optionally verify) that each input file is sorted; | |||
1390 | merge them and output the result. | |||
1391 | Returns nonzero if any input file fails to be sorted. | |||
1392 | ||||
1393 | This is the high-level interface that can handle an unlimited | |||
1394 | number of files. */ | |||
1395 | ||||
1396 | #define MAX_DIRECT_MERGE10 10 | |||
1397 | ||||
1398 | int | |||
1399 | merge_files (char **infiles, int nfiles, char *outfile) | |||
1400 | { | |||
1401 | char **tempfiles; | |||
1402 | int ntemps; | |||
1403 | int i; | |||
1404 | int value = 0; | |||
1405 | int start_tempcount = tempcount; | |||
1406 | ||||
1407 | if (nfiles <= MAX_DIRECT_MERGE10) | |||
1408 | return merge_direct (infiles, nfiles, outfile); | |||
1409 | ||||
1410 | /* Merge groups of MAX_DIRECT_MERGE input files at a time, | |||
1411 | making a temporary file to hold each group's result. */ | |||
1412 | ||||
1413 | ntemps = (nfiles + MAX_DIRECT_MERGE10 - 1) / MAX_DIRECT_MERGE10; | |||
1414 | tempfiles = (char **) xmalloc (ntemps * sizeof (char *)); | |||
1415 | for (i = 0; i < ntemps; i++) | |||
1416 | { | |||
1417 | int nf = MAX_DIRECT_MERGE10; | |||
1418 | if (i + 1 == ntemps) | |||
1419 | nf = nfiles - i * MAX_DIRECT_MERGE10; | |||
1420 | tempfiles[i] = maketempname (++tempcount); | |||
1421 | if (!tempfiles[i]) | |||
1422 | pfatal_with_name("temp file"); | |||
1423 | value |= merge_direct (&infiles[i * MAX_DIRECT_MERGE10], nf, tempfiles[i]); | |||
1424 | } | |||
1425 | ||||
1426 | /* All temporary files that existed before are no longer needed | |||
1427 | since their contents have been merged into our new tempfiles. | |||
1428 | So delete them. */ | |||
1429 | flush_tempfiles (start_tempcount); | |||
1430 | ||||
1431 | /* Now merge the temporary files we created. */ | |||
1432 | ||||
1433 | merge_files (tempfiles, ntemps, outfile); | |||
1434 | ||||
1435 | free (tempfiles); | |||
1436 | ||||
1437 | return value; | |||
1438 | } | |||
1439 | ||||
1440 | /* Assume (and optionally verify) that each input file is sorted; | |||
1441 | merge them and output the result. | |||
1442 | Returns nonzero if any input file fails to be sorted. | |||
1443 | ||||
1444 | This version of merging will not work if the number of | |||
1445 | input files gets too high. Higher level functions | |||
1446 | use it only with a bounded number of input files. */ | |||
1447 | ||||
1448 | int | |||
1449 | merge_direct (char **infiles, int nfiles, char *outfile) | |||
1450 | { | |||
1451 | struct linebuffer *lb1, *lb2; | |||
1452 | struct linebuffer **thisline, **prevline; | |||
1453 | FILE **streams; | |||
1454 | int i; | |||
1455 | int nleft; | |||
1456 | int lossage = 0; | |||
1457 | int *file_lossage; | |||
1458 | struct linebuffer *prev_out = 0; | |||
1459 | FILE *ostream = stdout(&__sF[1]); | |||
1460 | ||||
1461 | if (outfile) | |||
1462 | { | |||
1463 | ostream = fopen (outfile, "w"); | |||
1464 | } | |||
1465 | if (!ostream) | |||
1466 | pfatal_with_name (outfile); | |||
1467 | ||||
1468 | init_index (); | |||
1469 | ||||
1470 | if (nfiles == 0) | |||
1471 | { | |||
1472 | if (outfile) | |||
1473 | fclose (ostream); | |||
1474 | return 0; | |||
1475 | } | |||
1476 | ||||
1477 | /* For each file, make two line buffers. Also, for each file, there | |||
1478 | is an element of `thisline' which points at any time to one of the | |||
1479 | file's two buffers, and an element of `prevline' which points to | |||
1480 | the other buffer. `thisline' is supposed to point to the next | |||
1481 | available line from the file, while `prevline' holds the last file | |||
1482 | line used, which is remembered so that we can verify that the file | |||
1483 | is properly sorted. */ | |||
1484 | ||||
1485 | /* lb1 and lb2 contain one buffer each per file. */ | |||
1486 | lb1 = (struct linebuffer *) xmalloc (nfiles * sizeof (struct linebuffer)); | |||
1487 | lb2 = (struct linebuffer *) xmalloc (nfiles * sizeof (struct linebuffer)); | |||
1488 | ||||
1489 | /* thisline[i] points to the linebuffer holding the next available | |||
1490 | line in file i, or is zero if there are no lines left in that file. */ | |||
1491 | thisline = (struct linebuffer **) | |||
1492 | xmalloc (nfiles * sizeof (struct linebuffer *)); | |||
1493 | /* prevline[i] points to the linebuffer holding the last used line | |||
1494 | from file i. This is just for verifying that file i is properly | |||
1495 | sorted. */ | |||
1496 | prevline = (struct linebuffer **) | |||
1497 | xmalloc (nfiles * sizeof (struct linebuffer *)); | |||
1498 | /* streams[i] holds the input stream for file i. */ | |||
1499 | streams = (FILE **) xmalloc (nfiles * sizeof (FILE *)); | |||
1500 | /* file_lossage[i] is nonzero if we already know file i is not | |||
1501 | properly sorted. */ | |||
1502 | file_lossage = (int *) xmalloc (nfiles * sizeof (int)); | |||
1503 | ||||
1504 | /* Allocate and initialize all that storage. */ | |||
1505 | ||||
1506 | for (i = 0; i < nfiles; i++) | |||
1507 | { | |||
1508 | initbuffer (&lb1[i]); | |||
1509 | initbuffer (&lb2[i]); | |||
1510 | thisline[i] = &lb1[i]; | |||
1511 | prevline[i] = &lb2[i]; | |||
1512 | file_lossage[i] = 0; | |||
1513 | streams[i] = fopen (infiles[i], "r"); | |||
1514 | if (!streams[i]) | |||
1515 | pfatal_with_name (infiles[i]); | |||
1516 | ||||
1517 | readline (thisline[i], streams[i]); | |||
1518 | } | |||
1519 | ||||
1520 | /* Keep count of number of files not at eof. */ | |||
1521 | nleft = nfiles; | |||
1522 | ||||
1523 | while (nleft) | |||
1524 | { | |||
1525 | struct linebuffer *best = 0; | |||
1526 | struct linebuffer *exch; | |||
1527 | int bestfile = -1; | |||
1528 | int i; | |||
1529 | ||||
1530 | /* Look at the next avail line of each file; choose the least one. */ | |||
1531 | ||||
1532 | for (i = 0; i < nfiles; i++) | |||
1533 | { | |||
1534 | if (thisline[i] && | |||
1535 | (!best || | |||
1536 | 0 < compare_general (best->buffer, thisline[i]->buffer, | |||
1537 | (long) bestfile, (long) i, num_keyfields))) | |||
1538 | { | |||
1539 | best = thisline[i]; | |||
1540 | bestfile = i; | |||
1541 | } | |||
1542 | } | |||
1543 | ||||
1544 | /* Output that line, unless it matches the previous one and we | |||
1545 | don't want duplicates. */ | |||
1546 | ||||
1547 | if (!(prev_out && | |||
1548 | !compare_general (prev_out->buffer, | |||
1549 | best->buffer, 0L, 1L, num_keyfields - 1))) | |||
1550 | indexify (best->buffer, ostream); | |||
1551 | prev_out = best; | |||
1552 | ||||
1553 | /* Now make the line the previous of its file, and fetch a new | |||
1554 | line from that file. */ | |||
1555 | ||||
1556 | exch = prevline[bestfile]; | |||
1557 | prevline[bestfile] = thisline[bestfile]; | |||
1558 | thisline[bestfile] = exch; | |||
1559 | ||||
1560 | while (1) | |||
1561 | { | |||
1562 | /* If the file has no more, mark it empty. */ | |||
1563 | ||||
1564 | if (feof (streams[bestfile])(!__isthreaded ? (((streams[bestfile])->_flags & 0x0020 ) != 0) : (feof)(streams[bestfile]))) | |||
1565 | { | |||
1566 | thisline[bestfile] = 0; | |||
1567 | /* Update the number of files still not empty. */ | |||
1568 | nleft--; | |||
1569 | break; | |||
1570 | } | |||
1571 | readline (thisline[bestfile], streams[bestfile]); | |||
1572 | if (thisline[bestfile]->buffer[0] || !feof (streams[bestfile])(!__isthreaded ? (((streams[bestfile])->_flags & 0x0020 ) != 0) : (feof)(streams[bestfile]))) | |||
1573 | break; | |||
1574 | } | |||
1575 | } | |||
1576 | ||||
1577 | finish_index (ostream); | |||
1578 | ||||
1579 | /* Free all storage and close all input streams. */ | |||
1580 | ||||
1581 | for (i = 0; i < nfiles; i++) | |||
1582 | { | |||
1583 | fclose (streams[i]); | |||
1584 | free (lb1[i].buffer); | |||
1585 | free (lb2[i].buffer); | |||
1586 | } | |||
1587 | free (file_lossage); | |||
1588 | free (lb1); | |||
1589 | free (lb2); | |||
1590 | free (thisline); | |||
1591 | free (prevline); | |||
1592 | free (streams); | |||
1593 | ||||
1594 | if (outfile) | |||
1595 | fclose (ostream); | |||
1596 | ||||
1597 | return lossage; | |||
1598 | } | |||
1599 | ||||
1600 | /* Print error message and exit. */ | |||
1601 | ||||
1602 | void | |||
1603 | fatal (const char *format, const char *arg) | |||
1604 | { | |||
1605 | error (format, arg); | |||
1606 | xexit (1); | |||
1607 | } | |||
1608 | ||||
1609 | /* Print error message. FORMAT is printf control string, ARG is arg for it. */ | |||
1610 | void | |||
1611 | error (const char *format, const char *arg) | |||
1612 | { | |||
1613 | printf ("%s: ", program_name); | |||
1614 | printf (format, arg); | |||
1615 | if (format[strlen (format) -1] != '\n') | |||
1616 | printf ("\n"); | |||
1617 | } | |||
1618 | ||||
1619 | void | |||
1620 | perror_with_name (const char *name) | |||
1621 | { | |||
1622 | fprintf (stderr(&__sF[2]), "%s: ", program_name); | |||
1623 | perror (name); | |||
1624 | } | |||
1625 | ||||
1626 | void | |||
1627 | pfatal_with_name (const char *name) | |||
1628 | { | |||
1629 | perror_with_name (name); | |||
1630 | xexit (1); | |||
1631 | } | |||
1632 | ||||
1633 | ||||
1634 | /* Return a newly-allocated string concatenating S1 and S2. */ | |||
1635 | ||||
1636 | char * | |||
1637 | concat (char *s1, char *s2) | |||
1638 | { | |||
1639 | int len1 = strlen (s1), len2 = strlen (s2); | |||
1640 | char *result = (char *) xmalloc (len1 + len2 + 1); | |||
1641 | ||||
1642 | strcpy (result, s1); | |||
1643 | strcpy (result + len1, s2); | |||
1644 | *(result + len1 + len2) = 0; | |||
1645 | ||||
1646 | return result; | |||
1647 | } |