Bug Summary

File:src/gnu/usr.bin/texinfo/util/texindex.c
Warning:line 339, column 11
Dereference of null pointer (loaded from variable 'arg')

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple amd64-unknown-openbsd7.0 -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name texindex.c -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -mrelocation-model pic -pic-level 1 -pic-is-pie -mframe-pointer=all -relaxed-aliasing -fno-rounding-math -mconstructor-aliases -munwind-tables -target-cpu x86-64 -target-feature +retpoline-indirect-calls -target-feature +retpoline-indirect-branches -tune-cpu generic -debugger-tuning=gdb -fcoverage-compilation-dir=/usr/src/gnu/usr.bin/texinfo/obj/util -resource-dir /usr/local/lib/clang/13.0.0 -D HAVE_CONFIG_H -I . -I /usr/src/gnu/usr.bin/texinfo/util -I .. -I /usr/src/gnu/usr.bin/texinfo/lib -I ../intl -D LOCALEDIR="/usr/share/locale" -internal-isystem /usr/local/lib/clang/13.0.0/include -internal-externc-isystem /usr/include -O2 -fdebug-compilation-dir=/usr/src/gnu/usr.bin/texinfo/obj/util -ferror-limit 19 -fwrapv -D_RET_PROTECTOR -ret-protector -fgnuc-version=4.2.1 -vectorize-loops -vectorize-slp -fno-builtin-malloc -fno-builtin-calloc -fno-builtin-realloc -fno-builtin-valloc -fno-builtin-free -fno-builtin-strdup -fno-builtin-strndup -analyzer-output=html -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /home/ben/Projects/vmm/scan-build/2022-01-12-194120-40624-1 -x c /usr/src/gnu/usr.bin/texinfo/util/texindex.c
1/* texindex -- sort TeX index dribble output into an actual index.
2 $Id: texindex.c,v 1.6 2015/11/14 23:06:06 deraadt Exp $
3
4 Copyright (C) 1987, 1991, 1992, 1996, 1997, 1998, 1999, 2000, 2001,
5 2002, 2003, 2004 Free Software Foundation, Inc.
6
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
11
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307. */
20
21#include "system.h"
22#include <getopt.h>
23
24static char *program_name = "texindex";
25
26#if defined (emacs)
27# include "../src/config.h"
28/* Some s/os.h files redefine these. */
29# undef read
30# undef close
31# undef write
32# undef open
33#endif
34
35#if !defined (HAVE_MEMSET1)
36#undef memset
37#define memset(ptr, ignore, count) bzero (ptr, count)
38#endif
39
40char *mktemp (char *);
41
42#if !defined (SEEK_SET0)
43# define SEEK_SET0 0
44# define SEEK_CUR1 1
45# define SEEK_END2 2
46#endif /* !SEEK_SET */
47
48struct linebuffer;
49
50/* When sorting in core, this structure describes one line
51 and the position and length of its first keyfield. */
52struct lineinfo
53{
54 char *text; /* The actual text of the line. */
55 union {
56 char *text; /* The start of the key (for textual comparison). */
57 long number; /* The numeric value (for numeric comparison). */
58 } key;
59 long keylen; /* Length of KEY field. */
60};
61
62/* This structure describes a field to use as a sort key. */
63struct keyfield
64{
65 int startwords; /* Number of words to skip. */
66 int startchars; /* Number of additional chars to skip. */
67 int endwords; /* Number of words to ignore at end. */
68 int endchars; /* Ditto for characters of last word. */
69 char ignore_blanks; /* Non-zero means ignore spaces and tabs. */
70 char fold_case; /* Non-zero means case doesn't matter. */
71 char reverse; /* Non-zero means compare in reverse order. */
72 char numeric; /* Non-zeros means field is ASCII numeric. */
73 char positional; /* Sort according to file position. */
74 char braced; /* Count balanced-braced groupings as fields. */
75};
76
77/* Vector of keyfields to use. */
78struct keyfield keyfields[3];
79
80/* Number of keyfields stored in that vector. */
81int num_keyfields = 3;
82
83/* Vector of input file names, terminated with a null pointer. */
84char **infiles;
85
86/* Vector of corresponding output file names, or NULL, meaning default it
87 (add an `s' to the end). */
88char **outfiles;
89
90/* Length of `infiles'. */
91int num_infiles;
92
93/* Pointer to the array of pointers to lines being sorted. */
94char **linearray;
95
96/* The allocated length of `linearray'. */
97long nlines;
98
99/* Directory to use for temporary files. On Unix, it ends with a slash. */
100char *tempdir;
101
102/* Number of last temporary file. */
103int tempcount;
104
105/* Number of last temporary file already deleted.
106 Temporary files are deleted by `flush_tempfiles' in order of creation. */
107int last_deleted_tempcount;
108
109/* During in-core sort, this points to the base of the data block
110 which contains all the lines of data. */
111char *text_base;
112
113/* Initially 0; changed to 1 if we want initials in this index. */
114int need_initials;
115
116/* Remembers the first initial letter seen in this index, so we can
117 determine whether we need initials in the sorted form. */
118char first_initial;
119
120/* Additional command switches .*/
121
122/* Nonzero means do not delete tempfiles -- for debugging. */
123int keep_tempfiles;
124
125/* Forward declarations of functions in this file. */
126void decode_command (int argc, char **argv);
127void sort_in_core (char *infile, int total, char *outfile);
128void sort_offline (char *infile, off_t total, char *outfile);
129char **parsefile (char *filename, char **nextline, char *data, long int size);
130char *find_field (struct keyfield *keyfield, char *str, long int *lengthptr);
131char *find_pos (char *str, int words, int chars, int ignore_blanks);
132long find_value (char *start, long int length);
133char *find_braced_pos (char *str, int words, int chars, int ignore_blanks);
134char *find_braced_end (char *str);
135void writelines (char **linearray, int nlines, FILE *ostream);
136int compare_field (struct keyfield *keyfield, char *start1,
137 long int length1, long int pos1, char *start2,
138 long int length2, long int pos2);
139int compare_full (const void *, const void *);
140long readline (struct linebuffer *linebuffer, FILE *stream);
141int merge_files (char **infiles, int nfiles, char *outfile);
142int merge_direct (char **infiles, int nfiles, char *outfile);
143void pfatal_with_name (const char *name);
144void fatal (const char *format, const char *arg);
145void error (const char *format, const char *arg);
146void *xmalloc (), *xrealloc ();
147char *concat (char *s1, char *s2);
148void flush_tempfiles (int to_count);
149
150#define MAX_IN_CORE_SORT500000 500000
151
152int
153main (int argc, char **argv)
154{
155 int i;
156
157 tempcount = 0;
158 last_deleted_tempcount = 0;
159
160#ifdef HAVE_SETLOCALE1
161 /* Set locale via LC_ALL. */
162 setlocale (LC_ALL0, "");
163#endif
164
165 if (pledge ("stdio rpath wpath cpath tmppath", NULL((void *)0)) == -1)
1
Assuming the condition is false
2
Taking false branch
166 pfatal_with_name ("pledge");
167
168 /* Set the text message domain. */
169 bindtextdomain (PACKAGE, LOCALEDIR)((const char *) ("/usr/share/locale"));
170 textdomain (PACKAGE)((const char *) ("texinfo"));
171
172 /* In case we write to a redirected stdout that fails. */
173 /* not ready atexit (close_stdout); */
174
175 /* Describe the kind of sorting to do. */
176 /* The first keyfield uses the first braced field and folds case. */
177 keyfields[0].braced = 1;
178 keyfields[0].fold_case = 1;
179 keyfields[0].endwords = -1;
180 keyfields[0].endchars = -1;
181
182 /* The second keyfield uses the second braced field, numerically. */
183 keyfields[1].braced = 1;
184 keyfields[1].numeric = 1;
185 keyfields[1].startwords = 1;
186 keyfields[1].endwords = -1;
187 keyfields[1].endchars = -1;
188
189 /* The third keyfield (which is ignored while discarding duplicates)
190 compares the whole line. */
191 keyfields[2].endwords = -1;
192 keyfields[2].endchars = -1;
193
194 decode_command (argc, argv);
3
Calling 'decode_command'
195
196 /* Process input files completely, one by one. */
197
198 for (i = 0; i < num_infiles; i++)
199 {
200 int desc;
201 off_t ptr;
202 char *outfile;
203 struct stat instat;
204
205 desc = open (infiles[i], O_RDONLY0x0000, 0);
206 if (desc < 0)
207 pfatal_with_name (infiles[i]);
208
209 if (stat (infiles[i], &instat))
210 pfatal_with_name (infiles[i]);
211 if (S_ISDIR (instat.st_mode)((instat.st_mode & 0170000) == 0040000))
212 {
213#ifdef EISDIR21
214 errno(*__errno()) = EISDIR21;
215#endif
216 pfatal_with_name (infiles[i]);
217 }
218
219 lseek (desc, (off_t) 0, SEEK_END2);
220 ptr = (off_t) lseek (desc, (off_t) 0, SEEK_CUR1);
221
222 close (desc);
223
224 outfile = outfiles[i];
225 if (!outfile)
226 outfile = concat (infiles[i], "s");
227
228 need_initials = 0;
229 first_initial = '\0';
230
231 if (ptr < MAX_IN_CORE_SORT500000)
232 /* Sort a small amount of data. */
233 sort_in_core (infiles[i], (int)ptr, outfile);
234 else
235 sort_offline (infiles[i], ptr, outfile);
236 }
237
238 flush_tempfiles (tempcount);
239 xexit (0);
240 return 0; /* Avoid bogus warnings. */
241}
242
243typedef struct
244{
245 char *long_name;
246 char *short_name;
247 int *variable_ref;
248 int variable_value;
249 char *arg_name;
250 char *doc_string;
251} TEXINDEX_OPTION;
252
253TEXINDEX_OPTION texindex_options[] = {
254 { "--help", "-h", (int *)NULL((void *)0), 0, (char *)NULL((void *)0),
255 N_("display this help and exit")("display this help and exit") },
256 { "--keep", "-k", &keep_tempfiles, 1, (char *)NULL((void *)0),
257 N_("keep temporary files around after processing")("keep temporary files around after processing") },
258 { "--no-keep", 0, &keep_tempfiles, 0, (char *)NULL((void *)0),
259 N_("do not keep temporary files around after processing (default)")("do not keep temporary files around after processing (default)"
)
},
260 { "--output", "-o", (int *)NULL((void *)0), 0, "FILE",
261 N_("send output to FILE")("send output to FILE") },
262 { "--version", (char *)NULL((void *)0), (int *)NULL((void *)0), 0, (char *)NULL((void *)0),
263 N_("display version information and exit")("display version information and exit") },
264 { (char *)NULL((void *)0), (char *)NULL((void *)0), (int *)NULL((void *)0), 0, (char *)NULL((void *)0) }
265};
266
267void
268usage (int result_value)
269{
270 register int i;
271 FILE *f = result_value ? stderr(&__sF[2]) : stdout(&__sF[1]);
272
273 fprintf (f, _("Usage: %s [OPTION]... FILE...\n")((const char *) ("Usage: %s [OPTION]... FILE...\n")), program_name);
274 fprintf (f, _("Generate a sorted index for each TeX output FILE.\n")((const char *) ("Generate a sorted index for each TeX output FILE.\n"
))
);
275 /* Avoid trigraph nonsense. */
276 fprintf (f,
277_("Usually FILE... is specified as `foo.%c%c\' for a document `foo.texi'.\n")((const char *) ("Usually FILE... is specified as `foo.%c%c\' for a document `foo.texi'.\n"
))
,
278 '?', '?'); /* avoid trigraph in cat-id-tbl.c */
279 fprintf (f, _("\nOptions:\n")((const char *) ("\nOptions:\n")));
280
281 for (i = 0; texindex_options[i].long_name; i++)
282 {
283 putc (' ', f)(!__isthreaded ? __sputc(' ', f) : (putc)(' ', f));
284
285 if (texindex_options[i].short_name)
286 fprintf (f, "%s, ", texindex_options[i].short_name);
287
288 fprintf (f, "%s %s",
289 texindex_options[i].long_name,
290 texindex_options[i].arg_name
291 ? texindex_options[i].arg_name : "");
292
293 fprintf (f, "\t%s\n", _(texindex_options[i].doc_string)((const char *) (texindex_options[i].doc_string)));
294 }
295 fputs (_("\n\((const char *) ("\nEmail bug reports to bug-texinfo@gnu.org,\ngeneral questions and discussion to help-texinfo@gnu.org.\nTexinfo home page: http://www.gnu.org/software/texinfo/"
))
296Email bug reports to bug-texinfo@gnu.org,\n\((const char *) ("\nEmail bug reports to bug-texinfo@gnu.org,\ngeneral questions and discussion to help-texinfo@gnu.org.\nTexinfo home page: http://www.gnu.org/software/texinfo/"
))
297general questions and discussion to help-texinfo@gnu.org.\n\((const char *) ("\nEmail bug reports to bug-texinfo@gnu.org,\ngeneral questions and discussion to help-texinfo@gnu.org.\nTexinfo home page: http://www.gnu.org/software/texinfo/"
))
298Texinfo home page: http://www.gnu.org/software/texinfo/")((const char *) ("\nEmail bug reports to bug-texinfo@gnu.org,\ngeneral questions and discussion to help-texinfo@gnu.org.\nTexinfo home page: http://www.gnu.org/software/texinfo/"
))
, f);
299 fputs ("\n", f);
300
301 xexit (result_value);
302}
303
304/* Decode the command line arguments to set the parameter variables
305 and set up the vector of keyfields and the vector of input files. */
306
307void
308decode_command (int argc, char **argv)
309{
310 int arg_index = 1;
311 char **ip;
312 char **op;
313
314 /* Store default values into parameter variables. */
315
316 tempdir = getenv ("TMPDIR");
317 if (tempdir == NULL((void *)0))
4
Assuming 'tempdir' is equal to NULL
5
Taking true branch
318 tempdir = getenv ("TEMP");
319 if (tempdir == NULL((void *)0))
6
Assuming 'tempdir' is equal to NULL
7
Taking true branch
320 tempdir = getenv ("TMP");
321 if (tempdir == NULL((void *)0))
8
Assuming 'tempdir' is equal to NULL
9
Taking true branch
322 tempdir = DEFAULT_TMPDIR"/tmp/";
323 else
324 tempdir = concat (tempdir, "/");
325
326 keep_tempfiles = 0;
327
328 /* Allocate ARGC input files, which must be enough. */
329
330 infiles = (char **) xmalloc (argc * sizeof (char *));
331 outfiles = (char **) xmalloc (argc * sizeof (char *));
332 ip = infiles;
333 op = outfiles;
334
335 while (arg_index < argc)
10
Assuming 'arg_index' is < 'argc'
11
Loop condition is true. Entering loop body
24
Assuming 'arg_index' is < 'argc'
25
Loop condition is true. Entering loop body
336 {
337 char *arg = argv[arg_index++];
26
'arg' initialized to a null pointer value
338
339 if (*arg == '-')
12
Assuming the condition is true
13
Taking true branch
27
Dereference of null pointer (loaded from variable 'arg')
340 {
341 if (strcmp (arg, "--version") == 0)
14
Assuming the condition is false
15
Taking false branch
342 {
343 printf ("texindex (GNU %s) %s\n", PACKAGE"texinfo", VERSION"4.8");
344 puts ("");
345 puts ("Copyright (C) 2004 Free Software Foundation, Inc.");
346 printf (_("There is NO warranty. You may redistribute this software\n\((const char *) ("There is NO warranty. You may redistribute this software\nunder the terms of the GNU General Public License.\nFor more information about these matters, see the files named COPYING.\n"
))
347under the terms of the GNU General Public License.\n\((const char *) ("There is NO warranty. You may redistribute this software\nunder the terms of the GNU General Public License.\nFor more information about these matters, see the files named COPYING.\n"
))
348For more information about these matters, see the files named COPYING.\n")((const char *) ("There is NO warranty. You may redistribute this software\nunder the terms of the GNU General Public License.\nFor more information about these matters, see the files named COPYING.\n"
))
);
349 xexit (0);
350 }
351 else if ((strcmp (arg, "--keep") == 0) ||
16
Assuming the condition is false
18
Taking false branch
352 (strcmp (arg, "-k") == 0))
17
Assuming the condition is false
353 {
354 keep_tempfiles = 1;
355 }
356 else if ((strcmp (arg, "--help") == 0) ||
19
Assuming the condition is false
21
Taking false branch
357 (strcmp (arg, "-h") == 0))
20
Assuming the condition is false
358 {
359 usage (0);
360 }
361 else if ((strcmp (arg, "--output") == 0) ||
362 (strcmp (arg, "-o") == 0))
363 {
364 if (argv[arg_index] != (char *)NULL((void *)0))
22
Assuming pointer value is null
23
Taking false branch
365 {
366 arg_index++;
367 if (op > outfiles)
368 *(op - 1) = argv[arg_index];
369 }
370 else
371 usage (1);
372 }
373 else
374 usage (1);
375 }
376 else
377 {
378 *ip++ = arg;
379 *op++ = (char *)NULL((void *)0);
380 }
381 }
382
383 /* Record number of keyfields and terminate list of filenames. */
384 num_infiles = ip - infiles;
385 *ip = (char *)NULL((void *)0);
386 if (num_infiles == 0)
387 usage (1);
388}
389
390/* Return a name for temporary file COUNT. */
391
392static char *
393maketempname (int count)
394{
395 static char *tempbase = NULL((void *)0);
396 char tempsuffix[10];
397 char *name;
398 int fd;
399
400 if (!tempbase)
401 {
402 int fd;
403 tempbase = concat (tempdir, "txidxXXXXXX");
404
405 fd = mkstemp (tempbase);
406 if (fd == -1)
407 pfatal_with_name (tempbase);
408 }
409
410 sprintf (tempsuffix, ".%d", count);
411 name = concat (tempbase, tempsuffix);
412
413 fd = open (name, O_CREAT0x0200|O_EXCL0x0800|O_WRONLY0x0001, 0666);
414 if (fd == -1)
415 return NULL((void *)0);
416 else
417 {
418 close(fd);
419 return name;
420 }
421}
422
423
424/* Delete all temporary files up to TO_COUNT. */
425
426void
427flush_tempfiles (int to_count)
428{
429 if (keep_tempfiles)
430 return;
431 while (last_deleted_tempcount < to_count)
432 unlink (maketempname (++last_deleted_tempcount));
433}
434
435
436/* Compare LINE1 and LINE2 according to the specified set of keyfields. */
437
438int
439compare_full (const void *p1, const void *p2)
440{
441 char **line1 = (char **) p1;
442 char **line2 = (char **) p2;
443 int i;
444
445 /* Compare using the first keyfield;
446 if that does not distinguish the lines, try the second keyfield;
447 and so on. */
448
449 for (i = 0; i < num_keyfields; i++)
450 {
451 long length1, length2;
452 char *start1 = find_field (&keyfields[i], *line1, &length1);
453 char *start2 = find_field (&keyfields[i], *line2, &length2);
454 int tem = compare_field (&keyfields[i], start1, length1,
455 *line1 - text_base,
456 start2, length2, *line2 - text_base);
457 if (tem)
458 {
459 if (keyfields[i].reverse)
460 return -tem;
461 return tem;
462 }
463 }
464
465 return 0; /* Lines match exactly. */
466}
467
468/* Compare LINE1 and LINE2, described by structures
469 in which the first keyfield is identified in advance.
470 For positional sorting, assumes that the order of the lines in core
471 reflects their nominal order. */
472int
473compare_prepared (const void *p1, const void *p2)
474{
475 struct lineinfo *line1 = (struct lineinfo *) p1;
476 struct lineinfo *line2 = (struct lineinfo *) p2;
477 int i;
478 int tem;
479 char *text1, *text2;
480
481 /* Compare using the first keyfield, which has been found for us already. */
482 if (keyfields->positional)
483 {
484 if (line1->text - text_base > line2->text - text_base)
485 tem = 1;
486 else
487 tem = -1;
488 }
489 else if (keyfields->numeric)
490 tem = line1->key.number - line2->key.number;
491 else
492 tem = compare_field (keyfields, line1->key.text, line1->keylen, 0,
493 line2->key.text, line2->keylen, 0);
494 if (tem)
495 {
496 if (keyfields->reverse)
497 return -tem;
498 return tem;
499 }
500
501 text1 = line1->text;
502 text2 = line2->text;
503
504 /* Compare using the second keyfield;
505 if that does not distinguish the lines, try the third keyfield;
506 and so on. */
507
508 for (i = 1; i < num_keyfields; i++)
509 {
510 long length1, length2;
511 char *start1 = find_field (&keyfields[i], text1, &length1);
512 char *start2 = find_field (&keyfields[i], text2, &length2);
513 int tem = compare_field (&keyfields[i], start1, length1,
514 text1 - text_base,
515 start2, length2, text2 - text_base);
516 if (tem)
517 {
518 if (keyfields[i].reverse)
519 return -tem;
520 return tem;
521 }
522 }
523
524 return 0; /* Lines match exactly. */
525}
526
527/* Like compare_full but more general.
528 You can pass any strings, and you can say how many keyfields to use.
529 POS1 and POS2 should indicate the nominal positional ordering of
530 the two lines in the input. */
531
532int
533compare_general (char *str1, char *str2, long int pos1, long int pos2, int use_keyfields)
534{
535 int i;
536
537 /* Compare using the first keyfield;
538 if that does not distinguish the lines, try the second keyfield;
539 and so on. */
540
541 for (i = 0; i < use_keyfields; i++)
542 {
543 long length1, length2;
544 char *start1 = find_field (&keyfields[i], str1, &length1);
545 char *start2 = find_field (&keyfields[i], str2, &length2);
546 int tem = compare_field (&keyfields[i], start1, length1, pos1,
547 start2, length2, pos2);
548 if (tem)
549 {
550 if (keyfields[i].reverse)
551 return -tem;
552 return tem;
553 }
554 }
555
556 return 0; /* Lines match exactly. */
557}
558
559/* Find the start and length of a field in STR according to KEYFIELD.
560 A pointer to the starting character is returned, and the length
561 is stored into the int that LENGTHPTR points to. */
562
563char *
564find_field (struct keyfield *keyfield, char *str, long int *lengthptr)
565{
566 char *start;
567 char *end;
568 char *(*fun) ();
569
570 if (keyfield->braced)
571 fun = find_braced_pos;
572 else
573 fun = find_pos;
574
575 start = (*fun) (str, keyfield->startwords, keyfield->startchars,
576 keyfield->ignore_blanks);
577 if (keyfield->endwords < 0)
578 {
579 if (keyfield->braced)
580 end = find_braced_end (start);
581 else
582 {
583 end = start;
584 while (*end && *end != '\n')
585 end++;
586 }
587 }
588 else
589 {
590 end = (*fun) (str, keyfield->endwords, keyfield->endchars, 0);
591 if (end - str < start - str)
592 end = start;
593 }
594 *lengthptr = end - start;
595 return start;
596}
597
598/* Return a pointer to a specified place within STR,
599 skipping (from the beginning) WORDS words and then CHARS chars.
600 If IGNORE_BLANKS is nonzero, we skip all blanks
601 after finding the specified word. */
602
603char *
604find_pos (char *str, int words, int chars, int ignore_blanks)
605{
606 int i;
607 char *p = str;
608
609 for (i = 0; i < words; i++)
610 {
611 char c;
612 /* Find next bunch of nonblanks and skip them. */
613 while ((c = *p) == ' ' || c == '\t')
614 p++;
615 while ((c = *p) && c != '\n' && !(c == ' ' || c == '\t'))
616 p++;
617 if (!*p || *p == '\n')
618 return p;
619 }
620
621 while (*p == ' ' || *p == '\t')
622 p++;
623
624 for (i = 0; i < chars; i++)
625 {
626 if (!*p || *p == '\n')
627 break;
628 p++;
629 }
630 return p;
631}
632
633/* Like find_pos but assumes that each field is surrounded by braces
634 and that braces within fields are balanced. */
635
636char *
637find_braced_pos (char *str, int words, int chars, int ignore_blanks)
638{
639 int i;
640 int bracelevel;
641 char *p = str;
642 char c;
643
644 for (i = 0; i < words; i++)
645 {
646 bracelevel = 1;
647 while ((c = *p++) != '{' && c != '\n' && c)
648 /* Do nothing. */ ;
649 if (c != '{')
650 return p - 1;
651 while (bracelevel)
652 {
653 c = *p++;
654 if (c == '{')
655 bracelevel++;
656 if (c == '}')
657 bracelevel--;
658 if (c == 0 || c == '\n')
659 return p - 1;
660 }
661 }
662
663 while ((c = *p++) != '{' && c != '\n' && c)
664 /* Do nothing. */ ;
665
666 if (c != '{')
667 return p - 1;
668
669 if (ignore_blanks)
670 while ((c = *p) == ' ' || c == '\t')
671 p++;
672
673 for (i = 0; i < chars; i++)
674 {
675 if (!*p || *p == '\n')
676 break;
677 p++;
678 }
679 return p;
680}
681
682/* Find the end of the balanced-brace field which starts at STR.
683 The position returned is just before the closing brace. */
684
685char *
686find_braced_end (char *str)
687{
688 int bracelevel;
689 char *p = str;
690 char c;
691
692 bracelevel = 1;
693 while (bracelevel)
694 {
695 c = *p++;
696 if (c == '{')
697 bracelevel++;
698 if (c == '}')
699 bracelevel--;
700 if (c == 0 || c == '\n')
701 return p - 1;
702 }
703 return p - 1;
704}
705
706long
707find_value (char *start, long int length)
708{
709 while (length != 0L)
710 {
711 if (isdigit (*start))
712 return atol (start);
713 length--;
714 start++;
715 }
716 return 0l;
717}
718
719/* Vector used to translate characters for comparison.
720 This is how we make all alphanumerics follow all else,
721 and ignore case in the first sorting. */
722int char_order[256];
723
724void
725init_char_order (void)
726{
727 int i;
728 for (i = 1; i < 256; i++)
729 char_order[i] = i;
730
731 for (i = '0'; i <= '9'; i++)
732 char_order[i] += 512;
733
734 for (i = 'a'; i <= 'z'; i++)
735 {
736 char_order[i] = 512 + i;
737 char_order[i + 'A' - 'a'] = 512 + i;
738 }
739}
740
741/* Compare two fields (each specified as a start pointer and a character count)
742 according to KEYFIELD.
743 The sign of the value reports the relation between the fields. */
744
745int
746compare_field (struct keyfield *keyfield, char *start1, long int length1,
747 long int pos1, char *start2, long int length2, long int pos2)
748{
749 if (keyfields->positional)
750 {
751 if (pos1 > pos2)
752 return 1;
753 else
754 return -1;
755 }
756 if (keyfield->numeric)
757 {
758 long value = find_value (start1, length1) - find_value (start2, length2);
759 if (value > 0)
760 return 1;
761 if (value < 0)
762 return -1;
763 return 0;
764 }
765 else
766 {
767 char *p1 = start1;
768 char *p2 = start2;
769 char *e1 = start1 + length1;
770 char *e2 = start2 + length2;
771
772 while (1)
773 {
774 int c1, c2;
775
776 if (p1 == e1)
777 c1 = 0;
778 else
779 c1 = *p1++;
780 if (p2 == e2)
781 c2 = 0;
782 else
783 c2 = *p2++;
784
785 if (char_order[c1] != char_order[c2])
786 return char_order[c1] - char_order[c2];
787 if (!c1)
788 break;
789 }
790
791 /* Strings are equal except possibly for case. */
792 p1 = start1;
793 p2 = start2;
794 while (1)
795 {
796 int c1, c2;
797
798 if (p1 == e1)
799 c1 = 0;
800 else
801 c1 = *p1++;
802 if (p2 == e2)
803 c2 = 0;
804 else
805 c2 = *p2++;
806
807 if (c1 != c2)
808 /* Reverse sign here so upper case comes out last. */
809 return c2 - c1;
810 if (!c1)
811 break;
812 }
813
814 return 0;
815 }
816}
817
818/* A `struct linebuffer' is a structure which holds a line of text.
819 `readline' reads a line from a stream into a linebuffer
820 and works regardless of the length of the line. */
821
822struct linebuffer
823{
824 long size;
825 char *buffer;
826};
827
828/* Initialize LINEBUFFER for use. */
829
830void
831initbuffer (struct linebuffer *linebuffer)
832{
833 linebuffer->size = 200;
834 linebuffer->buffer = (char *) xmalloc (200);
835}
836
837/* Read a line of text from STREAM into LINEBUFFER.
838 Return the length of the line. */
839
840long
841readline (struct linebuffer *linebuffer, FILE *stream)
842{
843 char *buffer = linebuffer->buffer;
844 char *p = linebuffer->buffer;
845 char *end = p + linebuffer->size;
846
847 while (1)
848 {
849 int c = getc (stream)(!__isthreaded ? (--(stream)->_r < 0 ? __srget(stream) :
(int)(*(stream)->_p++)) : (getc)(stream))
;
850 if (p == end)
851 {
852 buffer = (char *) xrealloc (buffer, linebuffer->size *= 2);
853 p += buffer - linebuffer->buffer;
854 end += buffer - linebuffer->buffer;
855 linebuffer->buffer = buffer;
856 }
857 if (c < 0 || c == '\n')
858 {
859 *p = 0;
860 break;
861 }
862 *p++ = c;
863 }
864
865 return p - buffer;
866}
867
868/* Sort an input file too big to sort in core. */
869
870void
871sort_offline (char *infile, off_t total, char *outfile)
872{
873 /* More than enough. */
874 int ntemps = 2 * (total + MAX_IN_CORE_SORT500000 - 1) / MAX_IN_CORE_SORT500000;
875 char **tempfiles = (char **) xmalloc (ntemps * sizeof (char *));
876 FILE *istream = fopen (infile, "r");
877 int i;
878 struct linebuffer lb;
879 long linelength;
880 int failure = 0;
881
882 initbuffer (&lb);
883
884 /* Read in one line of input data. */
885
886 linelength = readline (&lb, istream);
887
888 if (lb.buffer[0] != '\\' && lb.buffer[0] != '@')
889 {
890 error (_("%s: not a texinfo index file")((const char *) ("%s: not a texinfo index file")), infile);
891 return;
892 }
893
894 /* Split up the input into `ntemps' temporary files, or maybe fewer,
895 and put the new files' names into `tempfiles' */
896
897 for (i = 0; i < ntemps; i++)
898 {
899 char *outname = maketempname (++tempcount);
900 FILE *ostream;
901 long tempsize = 0;
902
903 if (!outname)
904 pfatal_with_name("temporary file");
905 ostream = fopen (outname, "w");
906 if (!outname || !ostream)
907 pfatal_with_name (outname);
908 tempfiles[i] = outname;
909
910 /* Copy lines into this temp file as long as it does not make file
911 "too big" or until there are no more lines. */
912
913 while (tempsize + linelength + 1 <= MAX_IN_CORE_SORT500000)
914 {
915 tempsize += linelength + 1;
916 fputs (lb.buffer, ostream);
917 putc ('\n', ostream)(!__isthreaded ? __sputc('\n', ostream) : (putc)('\n', ostream
))
;
918
919 /* Read another line of input data. */
920
921 linelength = readline (&lb, istream);
922 if (!linelength && feof (istream)(!__isthreaded ? (((istream)->_flags & 0x0020) != 0) :
(feof)(istream))
)
923 break;
924
925 if (lb.buffer[0] != '\\' && lb.buffer[0] != '@')
926 {
927 error (_("%s: not a texinfo index file")((const char *) ("%s: not a texinfo index file")), infile);
928 failure = 1;
929 goto fail;
930 }
931 }
932 fclose (ostream);
933 if (feof (istream)(!__isthreaded ? (((istream)->_flags & 0x0020) != 0) :
(feof)(istream))
)
934 break;
935 }
936
937 free (lb.buffer);
938
939fail:
940 /* Record number of temp files we actually needed. */
941
942 ntemps = i;
943
944 /* Sort each tempfile into another tempfile.
945 Delete the first set of tempfiles and put the names of the second
946 into `tempfiles'. */
947
948 for (i = 0; i < ntemps; i++)
949 {
950 char *newtemp = maketempname (++tempcount);
951 sort_in_core (tempfiles[i], MAX_IN_CORE_SORT500000, newtemp);
952 if (!keep_tempfiles)
953 unlink (tempfiles[i]);
954 tempfiles[i] = newtemp;
955 }
956
957 if (failure)
958 return;
959
960 /* Merge the tempfiles together and indexify. */
961
962 merge_files (tempfiles, ntemps, outfile);
963}
964
965/* Sort INFILE, whose size is TOTAL,
966 assuming that is small enough to be done in-core,
967 then indexify it and send the output to OUTFILE (or to stdout). */
968
969void
970sort_in_core (char *infile, int total, char *outfile)
971{
972 char **nextline;
973 char *data = (char *) xmalloc (total + 1);
974 char *file_data;
975 long file_size;
976 int i;
977 FILE *ostream = stdout(&__sF[1]);
978 struct lineinfo *lineinfo;
979
980 /* Read the contents of the file into the moby array `data'. */
981
982 int desc = open (infile, O_RDONLY0x0000, 0);
983
984 if (desc < 0)
985 fatal (_("failure reopening %s")((const char *) ("failure reopening %s")), infile);
986 for (file_size = 0;;)
987 {
988 i = read (desc, data + file_size, total - file_size);
989 if (i <= 0)
990 break;
991 file_size += i;
992 }
993 file_data = data;
994 data[file_size] = 0;
995
996 close (desc);
997
998 if (file_size > 0 && data[0] != '\\' && data[0] != '@')
999 {
1000 error (_("%s: not a texinfo index file")((const char *) ("%s: not a texinfo index file")), infile);
1001 return;
1002 }
1003
1004 init_char_order ();
1005
1006 /* Sort routines want to know this address. */
1007
1008 text_base = data;
1009
1010 /* Create the array of pointers to lines, with a default size
1011 frequently enough. */
1012
1013 nlines = total / 50;
1014 if (!nlines)
1015 nlines = 2;
1016 linearray = (char **) xmalloc (nlines * sizeof (char *));
1017
1018 /* `nextline' points to the next free slot in this array.
1019 `nlines' is the allocated size. */
1020
1021 nextline = linearray;
1022
1023 /* Parse the input file's data, and make entries for the lines. */
1024
1025 nextline = parsefile (infile, nextline, file_data, file_size);
1026 if (nextline == 0)
1027 {
1028 error (_("%s: not a texinfo index file")((const char *) ("%s: not a texinfo index file")), infile);
1029 return;
1030 }
1031
1032 /* Sort the lines. */
1033
1034 /* If we have enough space, find the first keyfield of each line in advance.
1035 Make a `struct lineinfo' for each line, which records the keyfield
1036 as well as the line, and sort them. */
1037
1038 lineinfo = malloc ((nextline - linearray) * sizeof (struct lineinfo));
1039
1040 if (lineinfo)
1041 {
1042 struct lineinfo *lp;
1043 char **p;
1044
1045 for (lp = lineinfo, p = linearray; p != nextline; lp++, p++)
1046 {
1047 lp->text = *p;
1048 lp->key.text = find_field (keyfields, *p, &lp->keylen);
1049 if (keyfields->numeric)
1050 lp->key.number = find_value (lp->key.text, lp->keylen);
1051 }
1052
1053 qsort (lineinfo, nextline - linearray, sizeof (struct lineinfo),
1054 compare_prepared);
1055
1056 for (lp = lineinfo, p = linearray; p != nextline; lp++, p++)
1057 *p = lp->text;
1058
1059 free (lineinfo);
1060 }
1061 else
1062 qsort (linearray, nextline - linearray, sizeof (char *), compare_full);
1063
1064 /* Open the output file. */
1065
1066 if (outfile)
1067 {
1068 ostream = fopen (outfile, "w");
1069 if (!ostream)
1070 pfatal_with_name (outfile);
1071 }
1072
1073 writelines (linearray, nextline - linearray, ostream);
1074 if (outfile)
1075 fclose (ostream);
1076
1077 free (linearray);
1078 free (data);
1079}
1080
1081/* Parse an input string in core into lines.
1082 DATA is the input string, and SIZE is its length.
1083 Data goes in LINEARRAY starting at NEXTLINE.
1084 The value returned is the first entry in LINEARRAY still unused.
1085 Value 0 means input file contents are invalid. */
1086
1087char **
1088parsefile (char *filename, char **nextline, char *data, long int size)
1089{
1090 char *p, *end;
1091 char **line = nextline;
1092
1093 p = data;
1094 end = p + size;
1095 *end = 0;
1096
1097 while (p != end)
1098 {
1099 if (p[0] != '\\' && p[0] != '@')
1100 return 0;
1101
1102 *line = p;
1103
1104 /* Find the first letter of the first field of this line. If it
1105 is different from the first letter of the first field of the
1106 first line, we need initial headers in the output index. */
1107 while (*p && *p != '{')
1108 p++;
1109 if (p == end)
1110 return 0;
1111 p++;
1112 if (first_initial)
1113 {
1114 if (first_initial != toupper (*p))
1115 need_initials = 1;
1116 }
1117 else
1118 first_initial = toupper (*p);
1119
1120 while (*p && *p != '\n')
1121 p++;
1122 if (p != end)
1123 p++;
1124
1125 line++;
1126 if (line == linearray + nlines)
1127 {
1128 char **old = linearray;
1129 linearray = xrealloc (linearray, sizeof (char *) * (nlines *= 4));
1130 line += linearray - old;
1131 }
1132 }
1133
1134 return line;
1135}
1136
1137/* Indexification is a filter applied to the sorted lines
1138 as they are being written to the output file.
1139 Multiple entries for the same name, with different page numbers,
1140 get combined into a single entry with multiple page numbers.
1141 The first braced field, which is used for sorting, is discarded.
1142 However, its first character is examined, folded to lower case,
1143 and if it is different from that in the previous line fed to us
1144 a \initial line is written with one argument, the new initial.
1145
1146 If an entry has four braced fields, then the second and third
1147 constitute primary and secondary names.
1148 In this case, each change of primary name
1149 generates a \primary line which contains only the primary name,
1150 and in between these are \secondary lines which contain
1151 just a secondary name and page numbers. */
1152
1153/* The last primary name we wrote a \primary entry for.
1154 If only one level of indexing is being done, this is the last name seen. */
1155char *lastprimary;
1156/* Length of storage allocated for lastprimary. */
1157int lastprimarylength;
1158
1159/* Similar, for the secondary name. */
1160char *lastsecondary;
1161int lastsecondarylength;
1162
1163/* Zero if we are not in the middle of writing an entry.
1164 One if we have written the beginning of an entry but have not
1165 yet written any page numbers into it.
1166 Greater than one if we have written the beginning of an entry
1167 plus at least one page number. */
1168int pending;
1169
1170/* The initial (for sorting purposes) of the last primary entry written.
1171 When this changes, a \initial {c} line is written */
1172
1173char *lastinitial;
1174
1175int lastinitiallength;
1176
1177/* When we need a string of length 1 for the value of lastinitial,
1178 store it here. */
1179
1180char lastinitial1[2];
1181
1182/* Initialize static storage for writing an index. */
1183
1184void
1185init_index (void)
1186{
1187 pending = 0;
1188 lastinitial = lastinitial1;
1189 lastinitial1[0] = 0;
1190 lastinitial1[1] = 0;
1191 lastinitiallength = 0;
1192 lastprimarylength = 100;
1193 lastprimary = (char *) xmalloc (lastprimarylength + 1);
1194 memset (lastprimary, '\0', lastprimarylength + 1);
1195 lastsecondarylength = 100;
1196 lastsecondary = (char *) xmalloc (lastsecondarylength + 1);
1197 memset (lastsecondary, '\0', lastsecondarylength + 1);
1198}
1199
1200/* Indexify. Merge entries for the same name,
1201 insert headers for each initial character, etc. */
1202
1203void
1204indexify (char *line, FILE *ostream)
1205{
1206 char *primary, *secondary, *pagenumber;
1207 int primarylength, secondarylength = 0, pagelength;
1208 int nosecondary;
1209 int initiallength;
1210 char *initial;
1211 char initial1[2];
1212 register char *p;
1213
1214 /* First, analyze the parts of the entry fed to us this time. */
1215
1216 p = find_braced_pos (line, 0, 0, 0);
1217 if (*p == '{')
1218 {
1219 initial = p;
1220 /* Get length of inner pair of braces starting at `p',
1221 including that inner pair of braces. */
1222 initiallength = find_braced_end (p + 1) + 1 - p;
1223 }
1224 else
1225 {
1226 initial = initial1;
1227 initial1[0] = toupper (*p);
1228 initial1[1] = 0;
1229 initiallength = 1;
1230 }
1231
1232 pagenumber = find_braced_pos (line, 1, 0, 0);
1233 pagelength = find_braced_end (pagenumber) - pagenumber;
1234 if (pagelength == 0)
1235 fatal (_("No page number in %s")((const char *) ("No page number in %s")), line);
1236
1237 primary = find_braced_pos (line, 2, 0, 0);
1238 primarylength = find_braced_end (primary) - primary;
1239
1240 secondary = find_braced_pos (line, 3, 0, 0);
1241 nosecondary = !*secondary;
1242 if (!nosecondary)
1243 secondarylength = find_braced_end (secondary) - secondary;
1244
1245 /* If the primary is different from before, make a new primary entry. */
1246 if (strncmp (primary, lastprimary, primarylength))
1247 {
1248 /* Close off current secondary entry first, if one is open. */
1249 if (pending)
1250 {
1251 fputs ("}\n", ostream);
1252 pending = 0;
1253 }
1254
1255 /* If this primary has a different initial, include an entry for
1256 the initial. */
1257 if (need_initials &&
1258 (initiallength != lastinitiallength ||
1259 strncmp (initial, lastinitial, initiallength)))
1260 {
1261 fprintf (ostream, "\\initial {");
1262 fwrite (initial, 1, initiallength, ostream);
1263 fputs ("}\n", ostream);
1264 if (initial == initial1)
1265 {
1266 lastinitial = lastinitial1;
1267 *lastinitial1 = *initial1;
1268 }
1269 else
1270 {
1271 lastinitial = initial;
1272 }
1273 lastinitiallength = initiallength;
1274 }
1275
1276 /* Make the entry for the primary. */
1277 if (nosecondary)
1278 fputs ("\\entry {", ostream);
1279 else
1280 fputs ("\\primary {", ostream);
1281 fwrite (primary, primarylength, 1, ostream);
1282 if (nosecondary)
1283 {
1284 fputs ("}{", ostream);
1285 pending = 1;
1286 }
1287 else
1288 fputs ("}\n", ostream);
1289
1290 /* Record name of most recent primary. */
1291 if (lastprimarylength < primarylength)
1292 {
1293 lastprimarylength = primarylength + 100;
1294 lastprimary = (char *) xrealloc (lastprimary,
1295 1 + lastprimarylength);
1296 }
1297 strncpy (lastprimary, primary, primarylength);
1298 lastprimary[primarylength] = 0;
1299
1300 /* There is no current secondary within this primary, now. */
1301 lastsecondary[0] = 0;
1302 }
1303
1304 /* Should not have an entry with no subtopic following one with a
1305 subtopic. */
1306
1307 if (nosecondary && *lastsecondary)
1308 error (_("entry %s follows an entry with a secondary name")((const char *) ("entry %s follows an entry with a secondary name"
))
, line);
1309
1310 /* Start a new secondary entry if necessary. */
1311 if (!nosecondary && strncmp (secondary, lastsecondary, secondarylength))
1312 {
1313 if (pending)
1314 {
1315 fputs ("}\n", ostream);
1316 pending = 0;
1317 }
1318
1319 /* Write the entry for the secondary. */
1320 fputs ("\\secondary {", ostream);
1321 fwrite (secondary, secondarylength, 1, ostream);
1322 fputs ("}{", ostream);
1323 pending = 1;
1324
1325 /* Record name of most recent secondary. */
1326 if (lastsecondarylength < secondarylength)
1327 {
1328 lastsecondarylength = secondarylength + 100;
1329 lastsecondary = (char *) xrealloc (lastsecondary,
1330 1 + lastsecondarylength);
1331 }
1332 strncpy (lastsecondary, secondary, secondarylength);
1333 lastsecondary[secondarylength] = 0;
1334 }
1335
1336 /* Here to add one more page number to the current entry. */
1337 if (pending++ != 1)
1338 fputs (", ", ostream); /* Punctuate first, if this is not the first. */
1339 fwrite (pagenumber, pagelength, 1, ostream);
1340}
1341
1342/* Close out any unfinished output entry. */
1343
1344void
1345finish_index (FILE *ostream)
1346{
1347 if (pending)
1348 fputs ("}\n", ostream);
1349 free (lastprimary);
1350 free (lastsecondary);
1351}
1352
1353/* Copy the lines in the sorted order.
1354 Each line is copied out of the input file it was found in. */
1355
1356void
1357writelines (char **linearray, int nlines, FILE *ostream)
1358{
1359 char **stop_line = linearray + nlines;
1360 char **next_line;
1361
1362 init_index ();
1363
1364 /* Output the text of the lines, and free the buffer space. */
1365
1366 for (next_line = linearray; next_line != stop_line; next_line++)
1367 {
1368 /* If -u was specified, output the line only if distinct from
1369 previous one. */
1370 if (next_line == linearray
1371 /* Compare previous line with this one, using only the
1372 explicitly specd keyfields. */
1373 || compare_general (*(next_line - 1), *next_line, 0L, 0L,
1374 num_keyfields - 1))
1375 {
1376 char *p = *next_line;
1377 char c;
1378
1379 while ((c = *p++) && c != '\n')
1380 /* Do nothing. */ ;
1381 *(p - 1) = 0;
1382 indexify (*next_line, ostream);
1383 }
1384 }
1385
1386 finish_index (ostream);
1387}
1388
1389/* Assume (and optionally verify) that each input file is sorted;
1390 merge them and output the result.
1391 Returns nonzero if any input file fails to be sorted.
1392
1393 This is the high-level interface that can handle an unlimited
1394 number of files. */
1395
1396#define MAX_DIRECT_MERGE10 10
1397
1398int
1399merge_files (char **infiles, int nfiles, char *outfile)
1400{
1401 char **tempfiles;
1402 int ntemps;
1403 int i;
1404 int value = 0;
1405 int start_tempcount = tempcount;
1406
1407 if (nfiles <= MAX_DIRECT_MERGE10)
1408 return merge_direct (infiles, nfiles, outfile);
1409
1410 /* Merge groups of MAX_DIRECT_MERGE input files at a time,
1411 making a temporary file to hold each group's result. */
1412
1413 ntemps = (nfiles + MAX_DIRECT_MERGE10 - 1) / MAX_DIRECT_MERGE10;
1414 tempfiles = (char **) xmalloc (ntemps * sizeof (char *));
1415 for (i = 0; i < ntemps; i++)
1416 {
1417 int nf = MAX_DIRECT_MERGE10;
1418 if (i + 1 == ntemps)
1419 nf = nfiles - i * MAX_DIRECT_MERGE10;
1420 tempfiles[i] = maketempname (++tempcount);
1421 if (!tempfiles[i])
1422 pfatal_with_name("temp file");
1423 value |= merge_direct (&infiles[i * MAX_DIRECT_MERGE10], nf, tempfiles[i]);
1424 }
1425
1426 /* All temporary files that existed before are no longer needed
1427 since their contents have been merged into our new tempfiles.
1428 So delete them. */
1429 flush_tempfiles (start_tempcount);
1430
1431 /* Now merge the temporary files we created. */
1432
1433 merge_files (tempfiles, ntemps, outfile);
1434
1435 free (tempfiles);
1436
1437 return value;
1438}
1439
1440/* Assume (and optionally verify) that each input file is sorted;
1441 merge them and output the result.
1442 Returns nonzero if any input file fails to be sorted.
1443
1444 This version of merging will not work if the number of
1445 input files gets too high. Higher level functions
1446 use it only with a bounded number of input files. */
1447
1448int
1449merge_direct (char **infiles, int nfiles, char *outfile)
1450{
1451 struct linebuffer *lb1, *lb2;
1452 struct linebuffer **thisline, **prevline;
1453 FILE **streams;
1454 int i;
1455 int nleft;
1456 int lossage = 0;
1457 int *file_lossage;
1458 struct linebuffer *prev_out = 0;
1459 FILE *ostream = stdout(&__sF[1]);
1460
1461 if (outfile)
1462 {
1463 ostream = fopen (outfile, "w");
1464 }
1465 if (!ostream)
1466 pfatal_with_name (outfile);
1467
1468 init_index ();
1469
1470 if (nfiles == 0)
1471 {
1472 if (outfile)
1473 fclose (ostream);
1474 return 0;
1475 }
1476
1477 /* For each file, make two line buffers. Also, for each file, there
1478 is an element of `thisline' which points at any time to one of the
1479 file's two buffers, and an element of `prevline' which points to
1480 the other buffer. `thisline' is supposed to point to the next
1481 available line from the file, while `prevline' holds the last file
1482 line used, which is remembered so that we can verify that the file
1483 is properly sorted. */
1484
1485 /* lb1 and lb2 contain one buffer each per file. */
1486 lb1 = (struct linebuffer *) xmalloc (nfiles * sizeof (struct linebuffer));
1487 lb2 = (struct linebuffer *) xmalloc (nfiles * sizeof (struct linebuffer));
1488
1489 /* thisline[i] points to the linebuffer holding the next available
1490 line in file i, or is zero if there are no lines left in that file. */
1491 thisline = (struct linebuffer **)
1492 xmalloc (nfiles * sizeof (struct linebuffer *));
1493 /* prevline[i] points to the linebuffer holding the last used line
1494 from file i. This is just for verifying that file i is properly
1495 sorted. */
1496 prevline = (struct linebuffer **)
1497 xmalloc (nfiles * sizeof (struct linebuffer *));
1498 /* streams[i] holds the input stream for file i. */
1499 streams = (FILE **) xmalloc (nfiles * sizeof (FILE *));
1500 /* file_lossage[i] is nonzero if we already know file i is not
1501 properly sorted. */
1502 file_lossage = (int *) xmalloc (nfiles * sizeof (int));
1503
1504 /* Allocate and initialize all that storage. */
1505
1506 for (i = 0; i < nfiles; i++)
1507 {
1508 initbuffer (&lb1[i]);
1509 initbuffer (&lb2[i]);
1510 thisline[i] = &lb1[i];
1511 prevline[i] = &lb2[i];
1512 file_lossage[i] = 0;
1513 streams[i] = fopen (infiles[i], "r");
1514 if (!streams[i])
1515 pfatal_with_name (infiles[i]);
1516
1517 readline (thisline[i], streams[i]);
1518 }
1519
1520 /* Keep count of number of files not at eof. */
1521 nleft = nfiles;
1522
1523 while (nleft)
1524 {
1525 struct linebuffer *best = 0;
1526 struct linebuffer *exch;
1527 int bestfile = -1;
1528 int i;
1529
1530 /* Look at the next avail line of each file; choose the least one. */
1531
1532 for (i = 0; i < nfiles; i++)
1533 {
1534 if (thisline[i] &&
1535 (!best ||
1536 0 < compare_general (best->buffer, thisline[i]->buffer,
1537 (long) bestfile, (long) i, num_keyfields)))
1538 {
1539 best = thisline[i];
1540 bestfile = i;
1541 }
1542 }
1543
1544 /* Output that line, unless it matches the previous one and we
1545 don't want duplicates. */
1546
1547 if (!(prev_out &&
1548 !compare_general (prev_out->buffer,
1549 best->buffer, 0L, 1L, num_keyfields - 1)))
1550 indexify (best->buffer, ostream);
1551 prev_out = best;
1552
1553 /* Now make the line the previous of its file, and fetch a new
1554 line from that file. */
1555
1556 exch = prevline[bestfile];
1557 prevline[bestfile] = thisline[bestfile];
1558 thisline[bestfile] = exch;
1559
1560 while (1)
1561 {
1562 /* If the file has no more, mark it empty. */
1563
1564 if (feof (streams[bestfile])(!__isthreaded ? (((streams[bestfile])->_flags & 0x0020
) != 0) : (feof)(streams[bestfile]))
)
1565 {
1566 thisline[bestfile] = 0;
1567 /* Update the number of files still not empty. */
1568 nleft--;
1569 break;
1570 }
1571 readline (thisline[bestfile], streams[bestfile]);
1572 if (thisline[bestfile]->buffer[0] || !feof (streams[bestfile])(!__isthreaded ? (((streams[bestfile])->_flags & 0x0020
) != 0) : (feof)(streams[bestfile]))
)
1573 break;
1574 }
1575 }
1576
1577 finish_index (ostream);
1578
1579 /* Free all storage and close all input streams. */
1580
1581 for (i = 0; i < nfiles; i++)
1582 {
1583 fclose (streams[i]);
1584 free (lb1[i].buffer);
1585 free (lb2[i].buffer);
1586 }
1587 free (file_lossage);
1588 free (lb1);
1589 free (lb2);
1590 free (thisline);
1591 free (prevline);
1592 free (streams);
1593
1594 if (outfile)
1595 fclose (ostream);
1596
1597 return lossage;
1598}
1599
1600/* Print error message and exit. */
1601
1602void
1603fatal (const char *format, const char *arg)
1604{
1605 error (format, arg);
1606 xexit (1);
1607}
1608
1609/* Print error message. FORMAT is printf control string, ARG is arg for it. */
1610void
1611error (const char *format, const char *arg)
1612{
1613 printf ("%s: ", program_name);
1614 printf (format, arg);
1615 if (format[strlen (format) -1] != '\n')
1616 printf ("\n");
1617}
1618
1619void
1620perror_with_name (const char *name)
1621{
1622 fprintf (stderr(&__sF[2]), "%s: ", program_name);
1623 perror (name);
1624}
1625
1626void
1627pfatal_with_name (const char *name)
1628{
1629 perror_with_name (name);
1630 xexit (1);
1631}
1632
1633
1634/* Return a newly-allocated string concatenating S1 and S2. */
1635
1636char *
1637concat (char *s1, char *s2)
1638{
1639 int len1 = strlen (s1), len2 = strlen (s2);
1640 char *result = (char *) xmalloc (len1 + len2 + 1);
1641
1642 strcpy (result, s1);
1643 strcpy (result + len1, s2);
1644 *(result + len1 + len2) = 0;
1645
1646 return result;
1647}