File: | src/usr.bin/sort/sort.c |
Warning: | line 178, column 10 Although the value stored to 'linelen' is used in the enclosing expression, the value is never actually read from 'linelen' |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | /* $OpenBSD: sort.c,v 1.90 2019/06/28 13:35:03 deraadt Exp $ */ |
2 | |
3 | /*- |
4 | * Copyright (C) 2009 Gabor Kovesdan <gabor@FreeBSD.org> |
5 | * Copyright (C) 2012 Oleg Moskalenko <mom040267@gmail.com> |
6 | * All rights reserved. |
7 | * |
8 | * Redistribution and use in source and binary forms, with or without |
9 | * modification, are permitted provided that the following conditions |
10 | * are met: |
11 | * 1. Redistributions of source code must retain the above copyright |
12 | * notice, this list of conditions and the following disclaimer. |
13 | * 2. Redistributions in binary form must reproduce the above copyright |
14 | * notice, this list of conditions and the following disclaimer in the |
15 | * documentation and/or other materials provided with the distribution. |
16 | * |
17 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND |
18 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
19 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
20 | * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE |
21 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
22 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
23 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
24 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
25 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
26 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
27 | * SUCH DAMAGE. |
28 | */ |
29 | |
30 | #include <sys/resource.h> |
31 | #include <sys/stat.h> |
32 | #include <sys/sysctl.h> |
33 | #include <sys/types.h> |
34 | |
35 | #include <err.h> |
36 | #include <errno(*__errno()).h> |
37 | #include <getopt.h> |
38 | #include <limits.h> |
39 | #include <md5.h> |
40 | #include <regex.h> |
41 | #include <signal.h> |
42 | #include <stdbool.h> |
43 | #include <stdint.h> |
44 | #include <stdio.h> |
45 | #include <stdlib.h> |
46 | #include <string.h> |
47 | #include <unistd.h> |
48 | #include <wchar.h> |
49 | #include <wctype.h> |
50 | |
51 | #include "coll.h" |
52 | #include "file.h" |
53 | #include "sort.h" |
54 | |
55 | #ifdef GNUSORT_COMPATIBILITY |
56 | # define PERMUTE"+" "" |
57 | #else |
58 | # define PERMUTE"+" "+" |
59 | #endif |
60 | #define OPTIONS"+""bCcdfgHhik:Mmno:RrS:st:T:uVz" PERMUTE"+""bCcdfgHhik:Mmno:RrS:st:T:uVz" |
61 | |
62 | static bool_Bool need_random; |
63 | static const char *random_source; |
64 | |
65 | MD5_CTX md5_ctx; |
66 | |
67 | struct sort_opts sort_opts_vals; |
68 | |
69 | bool_Bool debug_sort; |
70 | bool_Bool need_hint; |
71 | |
72 | static struct sort_mods default_sort_mods_object; |
73 | struct sort_mods * const default_sort_mods = &default_sort_mods_object; |
74 | |
75 | /* |
76 | * Arguments from file (when file0-from option is used: |
77 | */ |
78 | static size_t argc_from_file0 = (size_t)-1; |
79 | static char **argv_from_file0; |
80 | |
81 | /* |
82 | * Placeholder symbols for options which have no single-character equivalent |
83 | */ |
84 | enum { |
85 | SORT_OPT = CHAR_MAX127 + 1, |
86 | HELP_OPT, |
87 | FF_OPT, |
88 | BS_OPT, |
89 | VERSION_OPT, |
90 | DEBUG_OPT, |
91 | RANDOMSOURCE_OPT, |
92 | COMPRESSPROGRAM_OPT, |
93 | QSORT_OPT, |
94 | HEAPSORT_OPT, |
95 | RADIXSORT_OPT, |
96 | MMAP_OPT |
97 | }; |
98 | |
99 | #define NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS6 6 |
100 | static const char mutually_exclusive_flags[NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS6] = { 'M', 'n', 'g', 'R', 'h', 'V' }; |
101 | |
102 | static const struct option long_options[] = { |
103 | { "batch-size", required_argument1, NULL((void *)0), BS_OPT }, |
104 | { "buffer-size", required_argument1, NULL((void *)0), 'S' }, |
105 | { "check", optional_argument2, NULL((void *)0), 'c' }, |
106 | { "check=silent|quiet", optional_argument2, NULL((void *)0), 'C' }, |
107 | { "compress-program", required_argument1, NULL((void *)0), COMPRESSPROGRAM_OPT }, |
108 | { "debug", no_argument0, NULL((void *)0), DEBUG_OPT }, |
109 | { "dictionary-order", no_argument0, NULL((void *)0), 'd' }, |
110 | { "field-separator", required_argument1, NULL((void *)0), 't' }, |
111 | { "files0-from", required_argument1, NULL((void *)0), FF_OPT }, |
112 | { "general-numeric-sort", no_argument0, NULL((void *)0), 'g' }, |
113 | { "heapsort", no_argument0, NULL((void *)0), HEAPSORT_OPT }, |
114 | { "help", no_argument0, NULL((void *)0), HELP_OPT }, |
115 | { "human-numeric-sort", no_argument0, NULL((void *)0), 'h' }, |
116 | { "ignore-leading-blanks", no_argument0, NULL((void *)0), 'b' }, |
117 | { "ignore-case", no_argument0, NULL((void *)0), 'f' }, |
118 | { "ignore-nonprinting", no_argument0, NULL((void *)0), 'i' }, |
119 | { "key", required_argument1, NULL((void *)0), 'k' }, |
120 | { "merge", no_argument0, NULL((void *)0), 'm' }, |
121 | { "mergesort", no_argument0, NULL((void *)0), 'H' }, |
122 | { "mmap", no_argument0, NULL((void *)0), MMAP_OPT }, |
123 | { "month-sort", no_argument0, NULL((void *)0), 'M' }, |
124 | { "numeric-sort", no_argument0, NULL((void *)0), 'n' }, |
125 | { "output", required_argument1, NULL((void *)0), 'o' }, |
126 | { "qsort", no_argument0, NULL((void *)0), QSORT_OPT }, |
127 | { "radixsort", no_argument0, NULL((void *)0), RADIXSORT_OPT }, |
128 | { "random-sort", no_argument0, NULL((void *)0), 'R' }, |
129 | { "random-source", required_argument1, NULL((void *)0), RANDOMSOURCE_OPT }, |
130 | { "reverse", no_argument0, NULL((void *)0), 'r' }, |
131 | { "sort", required_argument1, NULL((void *)0), SORT_OPT }, |
132 | { "stable", no_argument0, NULL((void *)0), 's' }, |
133 | { "temporary-directory", required_argument1, NULL((void *)0), 'T' }, |
134 | { "unique", no_argument0, NULL((void *)0), 'u' }, |
135 | { "version", no_argument0, NULL((void *)0), VERSION_OPT }, |
136 | { "version-sort", no_argument0, NULL((void *)0), 'V' }, |
137 | { "zero-terminated", no_argument0, NULL((void *)0), 'z' }, |
138 | { NULL((void *)0), no_argument0, NULL((void *)0), 0 } |
139 | }; |
140 | |
141 | /* |
142 | * Check where sort modifier is present |
143 | */ |
144 | static bool_Bool |
145 | sort_modifier_empty(struct sort_mods *sm) |
146 | { |
147 | return !(sm->Mflag || sm->Vflag || sm->nflag || sm->gflag || |
148 | sm->rflag || sm->Rflag || sm->hflag || sm->dflag || sm->fflag); |
149 | } |
150 | |
151 | /* |
152 | * Print out usage text. |
153 | */ |
154 | static __dead__attribute__((__noreturn__)) void |
155 | usage(int exit_val) |
156 | { |
157 | fprintf(exit_val ? stderr(&__sF[2]) : stdout(&__sF[1]), |
158 | "usage: %s [-bCcdfgHhiMmnRrsuVz] [-k field1[,field2]] [-o output] " |
159 | "[-S size]\n\t[-T dir] [-t char] [file ...]\n", getprogname()); |
160 | exit(exit_val); |
161 | } |
162 | |
163 | /* |
164 | * Read input file names from a file (file0-from option). |
165 | */ |
166 | static void |
167 | read_fns_from_file0(const char *fn) |
168 | { |
169 | FILE *f; |
170 | char *line = NULL((void *)0); |
171 | size_t linesize = 0; |
172 | ssize_t linelen; |
173 | |
174 | f = fopen(fn, "r"); |
175 | if (f == NULL((void *)0)) |
176 | err(2, "%s", fn); |
177 | |
178 | while ((linelen = getdelim(&line, &linesize, '\0', f)) != -1) { |
Although the value stored to 'linelen' is used in the enclosing expression, the value is never actually read from 'linelen' | |
179 | if (*line != '\0') { |
180 | if (argc_from_file0 == (size_t)-1) |
181 | argc_from_file0 = 0; |
182 | ++argc_from_file0; |
183 | argv_from_file0 = sort_reallocarray(argv_from_file0, |
184 | argc_from_file0, sizeof(char *)); |
185 | argv_from_file0[argc_from_file0 - 1] = line; |
186 | } else { |
187 | free(line); |
188 | } |
189 | line = NULL((void *)0); |
190 | linesize = 0; |
191 | } |
192 | if (ferror(f)(!__isthreaded ? (((f)->_flags & 0x0040) != 0) : (ferror )(f))) |
193 | err(2, "%s: getdelim", fn); |
194 | |
195 | closefile(f, fn); |
196 | } |
197 | |
198 | /* |
199 | * Check how much RAM is available for the sort. |
200 | */ |
201 | static void |
202 | set_hw_params(void) |
203 | { |
204 | unsigned long long free_memory; |
205 | long long user_memory; |
206 | struct rlimit rl; |
207 | size_t len; |
208 | int mib[] = { CTL_HW6, HW_USERMEM6420 }; |
209 | |
210 | /* Get total user (non-kernel) memory. */ |
211 | len = sizeof(user_memory); |
212 | if (sysctl(mib, 2, &user_memory, &len, NULL((void *)0), 0) == -1) |
213 | user_memory = -1; |
214 | |
215 | /* Increase our data size to the max */ |
216 | if (getrlimit(RLIMIT_DATA2, &rl) == 0) { |
217 | free_memory = (unsigned long long)rl.rlim_cur; |
218 | rl.rlim_cur = rl.rlim_max; |
219 | if (setrlimit(RLIMIT_DATA2, &rl) == 0) { |
220 | free_memory = (unsigned long long)rl.rlim_max; |
221 | } else { |
222 | warn("Can't set resource limit to max data size"); |
223 | } |
224 | } else { |
225 | free_memory = 1000000; |
226 | warn("Can't get resource limit for data size"); |
227 | } |
228 | |
229 | /* We prefer to use temp files rather than swap space. */ |
230 | if (user_memory != -1 && free_memory > user_memory) |
231 | free_memory = user_memory; |
232 | |
233 | available_free_memory = free_memory / 2; |
234 | } |
235 | |
236 | /* |
237 | * Set directory temporary files. |
238 | */ |
239 | static void |
240 | set_tmpdir(void) |
241 | { |
242 | if (!issetugid()) { |
243 | char *td; |
244 | |
245 | td = getenv("TMPDIR"); |
246 | if (td != NULL((void *)0)) |
247 | tmpdir = td; |
248 | } |
249 | } |
250 | |
251 | /* |
252 | * Parse -S option. |
253 | */ |
254 | static unsigned long long |
255 | parse_memory_buffer_value(const char *value) |
256 | { |
257 | char *endptr; |
258 | unsigned long long membuf; |
259 | |
260 | membuf = strtoll(value, &endptr, 10); |
261 | if (endptr == value || (long long)membuf < 0 || |
262 | (errno(*__errno()) == ERANGE34 && membuf == LLONG_MAX9223372036854775807LL)) |
263 | goto invalid; |
264 | |
265 | switch (*endptr) { |
266 | case 'Y': |
267 | if (membuf > ULLONG_MAX(9223372036854775807LL*2ULL+1ULL) / 1024) |
268 | goto invalid; |
269 | membuf *= 1024; |
270 | /* FALLTHROUGH */ |
271 | case 'Z': |
272 | if (membuf > ULLONG_MAX(9223372036854775807LL*2ULL+1ULL) / 1024) |
273 | goto invalid; |
274 | membuf *= 1024; |
275 | /* FALLTHROUGH */ |
276 | case 'E': |
277 | if (membuf > ULLONG_MAX(9223372036854775807LL*2ULL+1ULL) / 1024) |
278 | goto invalid; |
279 | membuf *= 1024; |
280 | /* FALLTHROUGH */ |
281 | case 'P': |
282 | if (membuf > ULLONG_MAX(9223372036854775807LL*2ULL+1ULL) / 1024) |
283 | goto invalid; |
284 | membuf *= 1024; |
285 | /* FALLTHROUGH */ |
286 | case 'T': |
287 | if (membuf > ULLONG_MAX(9223372036854775807LL*2ULL+1ULL) / 1024) |
288 | goto invalid; |
289 | membuf *= 1024; |
290 | /* FALLTHROUGH */ |
291 | case 'G': |
292 | if (membuf > ULLONG_MAX(9223372036854775807LL*2ULL+1ULL) / 1024) |
293 | goto invalid; |
294 | membuf *= 1024; |
295 | /* FALLTHROUGH */ |
296 | case 'M': |
297 | if (membuf > ULLONG_MAX(9223372036854775807LL*2ULL+1ULL) / 1024) |
298 | goto invalid; |
299 | membuf *= 1024; |
300 | /* FALLTHROUGH */ |
301 | case '\0': |
302 | case 'K': |
303 | if (membuf > ULLONG_MAX(9223372036854775807LL*2ULL+1ULL) / 1024) |
304 | goto invalid; |
305 | membuf *= 1024; |
306 | /* FALLTHROUGH */ |
307 | case 'b': |
308 | break; |
309 | case '%': |
310 | if (available_free_memory != 0 && |
311 | membuf > ULLONG_MAX(9223372036854775807LL*2ULL+1ULL) / available_free_memory) |
312 | goto invalid; |
313 | membuf = (available_free_memory * membuf) / |
314 | 100; |
315 | break; |
316 | default: |
317 | warnc(EINVAL22, "%s", optarg); |
318 | membuf = available_free_memory; |
319 | } |
320 | if (membuf > SIZE_MAX0xffffffffffffffffUL) |
321 | goto invalid; |
322 | return membuf; |
323 | invalid: |
324 | errx(2, "invalid memory buffer size: %s", value); |
325 | } |
326 | |
327 | /* |
328 | * Signal handler that clears the temporary files. |
329 | */ |
330 | static void |
331 | sig_handler(int sig __unused__attribute__((__unused__))) |
332 | { |
333 | clear_tmp_files(); |
334 | _exit(2); |
335 | } |
336 | |
337 | /* |
338 | * Set signal handler on panic signals. |
339 | */ |
340 | static void |
341 | set_signal_handler(void) |
342 | { |
343 | struct sigaction sa; |
344 | int i, signals[] = {SIGTERM15, SIGHUP1, SIGINT2, SIGUSR130, SIGUSR231, |
345 | SIGPIPE13, SIGXCPU24, SIGXFSZ25, 0}; |
346 | |
347 | memset(&sa, 0, sizeof(sa)); |
348 | sigfillset(&sa.sa_mask); |
349 | sa.sa_flags = SA_RESTART0x0002; |
350 | sa.sa_handler__sigaction_u.__sa_handler = sig_handler; |
351 | |
352 | for (i = 0; signals[i] != 0; i++) { |
353 | if (sigaction(signals[i], &sa, NULL((void *)0)) == -1) { |
354 | warn("sigaction(%s)", strsignal(signals[i])); |
355 | continue; |
356 | } |
357 | } |
358 | } |
359 | |
360 | /* |
361 | * Print "unknown" message and exit with status 2. |
362 | */ |
363 | static void |
364 | unknown(const char *what) |
365 | { |
366 | errx(2, "Unknown feature: %s", what); |
367 | } |
368 | |
369 | /* |
370 | * Check whether contradictory input options are used. |
371 | */ |
372 | static void |
373 | check_mutually_exclusive_flags(char c, bool_Bool *mef_flags) |
374 | { |
375 | int i, fo_index, mec; |
376 | bool_Bool found_others, found_this; |
377 | |
378 | found_others = found_this = false0; |
379 | fo_index = 0; |
380 | |
381 | for (i = 0; i < NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS6; i++) { |
382 | mec = mutually_exclusive_flags[i]; |
383 | |
384 | if (mec != c) { |
385 | if (mef_flags[i]) { |
386 | if (found_this) { |
387 | errx(2, |
388 | "%c:%c: mutually exclusive flags", |
389 | c, mec); |
390 | } |
391 | found_others = true1; |
392 | fo_index = i; |
393 | } |
394 | } else { |
395 | if (found_others) { |
396 | errx(2, |
397 | "%c:%c: mutually exclusive flags", |
398 | c, mutually_exclusive_flags[fo_index]); |
399 | } |
400 | mef_flags[i] = true1; |
401 | found_this = true1; |
402 | } |
403 | } |
404 | } |
405 | |
406 | /* |
407 | * Initialise sort opts data. |
408 | */ |
409 | static void |
410 | set_sort_opts(void) |
411 | { |
412 | memset(&default_sort_mods_object, 0, |
413 | sizeof(default_sort_mods_object)); |
414 | memset(&sort_opts_vals, 0, sizeof(sort_opts_vals)); |
415 | default_sort_mods_object.func = |
416 | get_sort_func(&default_sort_mods_object); |
417 | } |
418 | |
419 | /* |
420 | * Set a sort modifier on a sort modifiers object. |
421 | */ |
422 | static bool_Bool |
423 | set_sort_modifier(struct sort_mods *sm, int c) |
424 | { |
425 | switch (c) { |
426 | case 'b': |
427 | sm->bflag = true1; |
428 | break; |
429 | case 'd': |
430 | sm->dflag = true1; |
431 | break; |
432 | case 'f': |
433 | sm->fflag = true1; |
434 | break; |
435 | case 'g': |
436 | sm->gflag = true1; |
437 | need_hint = true1; |
438 | break; |
439 | case 'i': |
440 | sm->iflag = true1; |
441 | break; |
442 | case 'R': |
443 | sm->Rflag = true1; |
444 | need_random = true1; |
445 | break; |
446 | case 'M': |
447 | initialise_months(); |
448 | sm->Mflag = true1; |
449 | need_hint = true1; |
450 | break; |
451 | case 'n': |
452 | sm->nflag = true1; |
453 | need_hint = true1; |
454 | break; |
455 | case 'r': |
456 | sm->rflag = true1; |
457 | break; |
458 | case 'V': |
459 | sm->Vflag = true1; |
460 | break; |
461 | case 'h': |
462 | sm->hflag = true1; |
463 | need_hint = true1; |
464 | break; |
465 | default: |
466 | return false0; |
467 | } |
468 | sort_opts_vals.complex_sort = true1; |
469 | sm->func = get_sort_func(sm); |
470 | |
471 | return true1; |
472 | } |
473 | |
474 | /* |
475 | * Parse POS in -k option. |
476 | */ |
477 | static int |
478 | parse_pos(const char *s, struct key_specs *ks, bool_Bool *mef_flags, bool_Bool second) |
479 | { |
480 | regmatch_t pmatch[4]; |
481 | regex_t re; |
482 | char *c, *f; |
483 | const char *sregexp = "^([0-9]+)(\\.[0-9]+)?([bdfirMngRhV]+)?$"; |
484 | size_t len, nmatch; |
485 | int ret; |
486 | |
487 | ret = -1; |
488 | nmatch = 4; |
489 | c = f = NULL((void *)0); |
490 | |
491 | if (regcomp(&re, sregexp, REG_EXTENDED0001) != 0) |
492 | return -1; |
493 | |
494 | if (regexec(&re, s, nmatch, pmatch, 0) != 0) |
495 | goto end; |
496 | |
497 | if (pmatch[0].rm_eo <= pmatch[0].rm_so) |
498 | goto end; |
499 | |
500 | if (pmatch[1].rm_eo <= pmatch[1].rm_so) |
501 | goto end; |
502 | |
503 | len = pmatch[1].rm_eo - pmatch[1].rm_so; |
504 | |
505 | f = sort_malloc(len + 1); |
506 | memcpy(f, s + pmatch[1].rm_so, len); |
507 | f[len] = '\0'; |
508 | |
509 | if (second) { |
510 | errno(*__errno()) = 0; |
511 | ks->f2 = (size_t)strtoul(f, NULL((void *)0), 10); |
512 | if (errno(*__errno()) != 0) |
513 | goto end; |
514 | if (ks->f2 == 0) { |
515 | warn("0 field in key specs"); |
516 | goto end; |
517 | } |
518 | } else { |
519 | errno(*__errno()) = 0; |
520 | ks->f1 = (size_t)strtoul(f, NULL((void *)0), 10); |
521 | if (errno(*__errno()) != 0) |
522 | goto end; |
523 | if (ks->f1 == 0) { |
524 | warn("0 field in key specs"); |
525 | goto end; |
526 | } |
527 | } |
528 | |
529 | if (pmatch[2].rm_eo > pmatch[2].rm_so) { |
530 | len = pmatch[2].rm_eo - pmatch[2].rm_so - 1; |
531 | |
532 | c = sort_malloc(len + 1); |
533 | memcpy(c, s + pmatch[2].rm_so + 1, len); |
534 | c[len] = '\0'; |
535 | |
536 | if (second) { |
537 | errno(*__errno()) = 0; |
538 | ks->c2 = (size_t)strtoul(c, NULL((void *)0), 10); |
539 | if (errno(*__errno()) != 0) |
540 | goto end; |
541 | } else { |
542 | errno(*__errno()) = 0; |
543 | ks->c1 = (size_t)strtoul(c, NULL((void *)0), 10); |
544 | if (errno(*__errno()) != 0) |
545 | goto end; |
546 | if (ks->c1 == 0) { |
547 | warn("0 column in key specs"); |
548 | goto end; |
549 | } |
550 | } |
551 | } else { |
552 | if (second) |
553 | ks->c2 = 0; |
554 | else |
555 | ks->c1 = 1; |
556 | } |
557 | |
558 | if (pmatch[3].rm_eo > pmatch[3].rm_so) { |
559 | regoff_t i = 0; |
560 | |
561 | for (i = pmatch[3].rm_so; i < pmatch[3].rm_eo; i++) { |
562 | check_mutually_exclusive_flags(s[i], mef_flags); |
563 | if (s[i] == 'b') { |
564 | if (second) |
565 | ks->pos2b = true1; |
566 | else |
567 | ks->pos1b = true1; |
568 | } else if (!set_sort_modifier(&(ks->sm), s[i])) |
569 | goto end; |
570 | } |
571 | } |
572 | |
573 | ret = 0; |
574 | |
575 | end: |
576 | sort_free(c); |
577 | sort_free(f); |
578 | regfree(&re); |
579 | |
580 | return ret; |
581 | } |
582 | |
583 | /* |
584 | * Parse -k option value. |
585 | */ |
586 | static int |
587 | parse_k(const char *s, struct key_specs *ks) |
588 | { |
589 | int ret = -1; |
590 | bool_Bool mef_flags[NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS6] = |
591 | { false0, false0, false0, false0, false0, false0 }; |
592 | |
593 | if (*s != '\0') { |
594 | char *sptr; |
595 | |
596 | sptr = strchr(s, ','); |
597 | if (sptr) { |
598 | size_t size1; |
599 | char *pos1, *pos2; |
600 | |
601 | size1 = sptr - s; |
602 | |
603 | if (size1 < 1) |
604 | return -1; |
605 | |
606 | pos1 = sort_malloc(size1 + 1); |
607 | memcpy(pos1, s, size1); |
608 | pos1[size1] = '\0'; |
609 | |
610 | ret = parse_pos(pos1, ks, mef_flags, false0); |
611 | |
612 | sort_free(pos1); |
613 | if (ret < 0) |
614 | return ret; |
615 | |
616 | pos2 = sort_strdup(sptr + 1); |
617 | ret = parse_pos(pos2, ks, mef_flags, true1); |
618 | sort_free(pos2); |
619 | } else |
620 | ret = parse_pos(s, ks, mef_flags, false0); |
621 | } |
622 | |
623 | return ret; |
624 | } |
625 | |
626 | /* |
627 | * Parse POS in +POS -POS option. |
628 | */ |
629 | static int |
630 | parse_pos_obs(const char *s, size_t *nf, size_t *nc, char *sopts, size_t sopts_size) |
631 | { |
632 | regex_t re; |
633 | regmatch_t pmatch[4]; |
634 | char *c, *f; |
635 | const char *sregexp = "^([0-9]+)(\\.[0-9]+)?([A-Za-z]+)?$"; |
636 | int ret; |
637 | size_t len, nmatch; |
638 | |
639 | ret = -1; |
640 | nmatch = 4; |
641 | c = f = NULL((void *)0); |
642 | *nc = *nf = 0; |
643 | |
644 | if (regcomp(&re, sregexp, REG_EXTENDED0001) != 0) |
645 | return -1; |
646 | |
647 | if (regexec(&re, s, nmatch, pmatch, 0) != 0) |
648 | goto end; |
649 | |
650 | if (pmatch[0].rm_eo <= pmatch[0].rm_so) |
651 | goto end; |
652 | |
653 | if (pmatch[1].rm_eo <= pmatch[1].rm_so) |
654 | goto end; |
655 | |
656 | len = pmatch[1].rm_eo - pmatch[1].rm_so; |
657 | |
658 | f = sort_malloc(len + 1); |
659 | memcpy(f, s + pmatch[1].rm_so, len); |
660 | f[len] = '\0'; |
661 | |
662 | errno(*__errno()) = 0; |
663 | *nf = (size_t)strtoul(f, NULL((void *)0), 10); |
664 | if (errno(*__errno()) != 0) |
665 | errx(2, "Invalid key position"); |
666 | |
667 | if (pmatch[2].rm_eo > pmatch[2].rm_so) { |
668 | len = pmatch[2].rm_eo - pmatch[2].rm_so - 1; |
669 | |
670 | c = sort_malloc(len + 1); |
671 | memcpy(c, s + pmatch[2].rm_so + 1, len); |
672 | c[len] = '\0'; |
673 | |
674 | errno(*__errno()) = 0; |
675 | *nc = (size_t)strtoul(c, NULL((void *)0), 10); |
676 | if (errno(*__errno()) != 0) |
677 | errx(2, "Invalid key position"); |
678 | } |
679 | |
680 | if (pmatch[3].rm_eo > pmatch[3].rm_so) { |
681 | |
682 | len = pmatch[3].rm_eo - pmatch[3].rm_so; |
683 | |
684 | if (len >= sopts_size) |
685 | errx(2, "Invalid key position"); |
686 | memcpy(sopts, s + pmatch[3].rm_so, len); |
687 | sopts[len] = '\0'; |
688 | } |
689 | |
690 | ret = 0; |
691 | |
692 | end: |
693 | sort_free(c); |
694 | sort_free(f); |
695 | regfree(&re); |
696 | |
697 | return ret; |
698 | } |
699 | |
700 | /* |
701 | * "Translate" obsolete +POS1 -POS2 syntax into new -kPOS1,POS2 syntax |
702 | */ |
703 | static void |
704 | fix_obsolete_keys(int *argc, char **argv) |
705 | { |
706 | char sopt[129]; |
707 | int i; |
708 | |
709 | for (i = 1; i < *argc; i++) { |
710 | const char *arg1 = argv[i]; |
711 | |
712 | if (arg1[0] == '+') { |
713 | size_t c1, f1; |
714 | char sopts1[128]; |
715 | |
716 | sopts1[0] = 0; |
717 | c1 = f1 = 0; |
718 | |
719 | if (parse_pos_obs(arg1 + 1, &f1, &c1, sopts1, |
720 | sizeof(sopts1)) < 0) |
721 | continue; |
722 | |
723 | f1 += 1; |
724 | c1 += 1; |
725 | if (i + 1 < *argc) { |
726 | const char *arg2 = argv[i + 1]; |
727 | |
728 | if (arg2[0] == '-') { |
729 | size_t c2, f2; |
730 | char sopts2[128]; |
731 | |
732 | sopts2[0] = 0; |
733 | c2 = f2 = 0; |
734 | |
735 | if (parse_pos_obs(arg2 + 1, &f2, &c2, |
736 | sopts2, sizeof(sopts2)) >= 0) { |
737 | int j; |
738 | if (c2 > 0) |
739 | f2 += 1; |
740 | snprintf(sopt, sizeof(sopt), |
741 | "-k%zu.%zu%s,%zu.%zu%s", |
742 | f1, c1, sopts1, f2, |
743 | c2, sopts2); |
744 | argv[i] = sort_strdup(sopt); |
745 | for (j = i + 1; j + 1 < *argc; j++) |
746 | argv[j] = argv[j + 1]; |
747 | *argc -= 1; |
748 | continue; |
749 | } |
750 | } |
751 | } |
752 | snprintf(sopt, sizeof(sopt), "-k%zu.%zu%s", |
753 | f1, c1, sopts1); |
754 | argv[i] = sort_strdup(sopt); |
755 | } |
756 | } |
757 | } |
758 | |
759 | /* |
760 | * Set random seed |
761 | */ |
762 | static void |
763 | set_random_seed(void) |
764 | { |
765 | if (!need_random) |
766 | return; |
767 | |
768 | MD5Init(&md5_ctx); |
769 | if (random_source != NULL((void *)0)) { |
770 | unsigned char buf[BUFSIZ1024]; |
771 | size_t nr; |
772 | FILE *fp; |
773 | |
774 | if ((fp = fopen(random_source, "r")) == NULL((void *)0)) |
775 | err(2, "%s", random_source); |
776 | while ((nr = fread(buf, 1, sizeof(buf), fp)) != 0) |
777 | MD5Update(&md5_ctx, buf, nr); |
778 | if (ferror(fp)(!__isthreaded ? (((fp)->_flags & 0x0040) != 0) : (ferror )(fp))) |
779 | err(2, "%s", random_source); |
780 | fclose(fp); |
781 | } else { |
782 | unsigned char rsd[1024]; |
783 | |
784 | arc4random_buf(rsd, sizeof(rsd)); |
785 | MD5Update(&md5_ctx, rsd, sizeof(rsd)); |
786 | } |
787 | } |
788 | |
789 | /* |
790 | * Main function. |
791 | */ |
792 | int |
793 | main(int argc, char *argv[]) |
794 | { |
795 | char *outfile, *real_outfile, *sflag; |
796 | int c; |
797 | size_t i; |
798 | struct sort_mods *sm = &default_sort_mods_object; |
799 | bool_Bool mef_flags[NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS6] = |
800 | { false0, false0, false0, false0, false0, false0 }; |
801 | |
802 | set_hw_params(); |
803 | |
804 | if (pledge("stdio rpath wpath cpath fattr chown proc exec", NULL((void *)0)) == -1) |
805 | err(2, "pledge"); |
806 | |
807 | outfile = "-"; |
808 | real_outfile = NULL((void *)0); |
809 | sflag = NULL((void *)0); |
810 | |
811 | init_tmp_files(); |
812 | |
813 | set_signal_handler(); |
814 | |
815 | atexit(clear_tmp_files); |
816 | |
817 | set_tmpdir(); |
818 | set_sort_opts(); |
819 | |
820 | fix_obsolete_keys(&argc, argv); |
821 | |
822 | while (((c = getopt_long(argc, argv, OPTIONS"+""bCcdfgHhik:Mmno:RrS:st:T:uVz", long_options, NULL((void *)0))) |
823 | != -1)) { |
824 | |
825 | check_mutually_exclusive_flags(c, mef_flags); |
826 | |
827 | if (!set_sort_modifier(sm, c)) { |
828 | switch (c) { |
829 | case 'c': |
830 | sort_opts_vals.cflag = true1; |
831 | if (optarg) { |
832 | if (!strcmp(optarg, "diagnose-first")) |
833 | ; |
834 | else if (!strcmp(optarg, "silent") || |
835 | !strcmp(optarg, "quiet")) |
836 | sort_opts_vals.csilentflag = true1; |
837 | else if (*optarg) |
838 | unknown(optarg); |
839 | } |
840 | break; |
841 | case 'C': |
842 | sort_opts_vals.cflag = true1; |
843 | sort_opts_vals.csilentflag = true1; |
844 | break; |
845 | case 'k': |
846 | { |
847 | sort_opts_vals.complex_sort = true1; |
848 | sort_opts_vals.kflag = true1; |
849 | |
850 | keys = sort_reallocarray(keys, keys_num + 1, |
851 | sizeof(struct key_specs)); |
852 | memset(&(keys[keys_num]), 0, |
853 | sizeof(struct key_specs)); |
854 | #ifndef GNUSORT_COMPATIBILITY |
855 | keys[keys_num].pos1b = default_sort_mods->bflag; |
856 | keys[keys_num].pos2b = default_sort_mods->bflag; |
857 | #endif |
858 | |
859 | if (parse_k(optarg, &(keys[keys_num++])) < 0) |
860 | errc(2, EINVAL22, "-k %s", optarg); |
861 | |
862 | break; |
863 | } |
864 | case 'm': |
865 | sort_opts_vals.mflag = true1; |
866 | break; |
867 | case 'o': |
868 | outfile = optarg; |
869 | break; |
870 | case 's': |
871 | sort_opts_vals.sflag = true1; |
872 | break; |
873 | case 'S': |
874 | sflag = optarg; |
875 | break; |
876 | case 'T': |
877 | tmpdir = optarg; |
878 | break; |
879 | case 't': |
880 | while (strlen(optarg) > 1) { |
881 | if (optarg[0] != '\\') { |
882 | errc(2, EINVAL22, "%s", optarg); |
883 | } |
884 | optarg += 1; |
885 | if (*optarg == '0') { |
886 | *optarg = 0; |
887 | break; |
888 | } |
889 | } |
890 | sort_opts_vals.tflag = true1; |
891 | sort_opts_vals.field_sep = btowc(optarg[0]); |
892 | if (sort_opts_vals.field_sep == WEOF((wint_t)-1)) { |
893 | errno(*__errno()) = EINVAL22; |
894 | err(2, NULL((void *)0)); |
895 | } |
896 | break; |
897 | case 'u': |
898 | sort_opts_vals.uflag = true1; |
899 | /* stable sort for the correct unique val */ |
900 | sort_opts_vals.sflag = true1; |
901 | break; |
902 | case 'z': |
903 | sort_opts_vals.zflag = true1; |
904 | break; |
905 | case SORT_OPT: |
906 | if (!strcmp(optarg, "general-numeric")) |
907 | set_sort_modifier(sm, 'g'); |
908 | else if (!strcmp(optarg, "human-numeric")) |
909 | set_sort_modifier(sm, 'h'); |
910 | else if (!strcmp(optarg, "numeric")) |
911 | set_sort_modifier(sm, 'n'); |
912 | else if (!strcmp(optarg, "month")) |
913 | set_sort_modifier(sm, 'M'); |
914 | else if (!strcmp(optarg, "random")) |
915 | set_sort_modifier(sm, 'R'); |
916 | else |
917 | unknown(optarg); |
918 | break; |
919 | case QSORT_OPT: |
920 | sort_opts_vals.sort_method = SORT_QSORT1; |
921 | break; |
922 | case 'H': |
923 | sort_opts_vals.sort_method = SORT_MERGESORT2; |
924 | break; |
925 | case MMAP_OPT: |
926 | use_mmap = true1; |
927 | break; |
928 | case HEAPSORT_OPT: |
929 | sort_opts_vals.sort_method = SORT_HEAPSORT3; |
930 | break; |
931 | case RADIXSORT_OPT: |
932 | sort_opts_vals.sort_method = SORT_RADIXSORT4; |
933 | break; |
934 | case RANDOMSOURCE_OPT: |
935 | random_source = optarg; |
936 | break; |
937 | case COMPRESSPROGRAM_OPT: |
938 | compress_program = optarg; |
939 | break; |
940 | case FF_OPT: |
941 | read_fns_from_file0(optarg); |
942 | break; |
943 | case BS_OPT: |
944 | { |
945 | const char *errstr; |
946 | |
947 | max_open_files = strtonum(optarg, 2, |
948 | UINT_MAX(2147483647 *2U +1U) - 1, &errstr) + 1; |
949 | if (errstr != NULL((void *)0)) |
950 | errx(2, "--batch-size argument is %s", |
951 | errstr); |
952 | break; |
953 | } |
954 | case VERSION_OPT: |
955 | printf("%s\n", VERSION"2.3-OpenBSD"); |
956 | exit(EXIT_SUCCESS0); |
957 | /* NOTREACHED */ |
958 | break; |
959 | case DEBUG_OPT: |
960 | debug_sort = true1; |
961 | break; |
962 | case HELP_OPT: |
963 | usage(0); |
964 | /* NOTREACHED */ |
965 | break; |
966 | default: |
967 | usage(2); |
968 | /* NOTREACHED */ |
969 | } |
970 | } |
971 | } |
972 | argc -= optind; |
973 | argv += optind; |
974 | |
975 | if (compress_program == NULL((void *)0)) { |
976 | if (pledge("stdio rpath wpath cpath fattr chown", NULL((void *)0)) == -1) |
977 | err(2, "pledge"); |
978 | } |
979 | |
980 | #ifndef GNUSORT_COMPATIBILITY |
981 | if (argc > 2 && strcmp(argv[argc - 2], "-o") == 0) { |
982 | outfile = argv[argc - 1]; |
983 | argc -= 2; |
984 | } |
985 | #endif |
986 | |
987 | if (argv_from_file0) { |
988 | argc = argc_from_file0; |
989 | argv = argv_from_file0; |
990 | } |
991 | |
992 | if (sort_opts_vals.cflag) { |
993 | if (argc > 1) |
994 | errx(2, "only one input file is allowed with the -%c flag", |
995 | sort_opts_vals.csilentflag ? 'C' : 'c'); |
996 | |
997 | if (argc == 0 || strcmp(argv[0], "-") == 0) { |
998 | if (compress_program) { |
999 | if (pledge("stdio proc exec", NULL((void *)0)) == -1) |
1000 | err(2, "pledge"); |
1001 | } else { |
1002 | if (pledge("stdio", NULL((void *)0)) == -1) |
1003 | err(2, "pledge"); |
1004 | } |
1005 | } else { |
1006 | if (compress_program) { |
1007 | if (pledge("stdio rpath proc exec", NULL((void *)0)) == -1) |
1008 | err(2, "pledge"); |
1009 | } else { |
1010 | if (pledge("stdio rpath", NULL((void *)0)) == -1) |
1011 | err(2, "pledge"); |
1012 | } |
1013 | } |
1014 | } else { |
1015 | /* Case when the outfile equals one of the input files: */ |
1016 | if (strcmp(outfile, "-") != 0) { |
1017 | struct stat sb; |
1018 | int fd, i; |
1019 | |
1020 | for (i = 0; i < argc; ++i) { |
1021 | if (strcmp(argv[i], outfile) == 0) { |
1022 | if (stat(outfile, &sb) == -1) |
1023 | err(2, "%s", outfile); |
1024 | if (access(outfile, W_OK0x02) == -1) |
1025 | err(2, "%s", outfile); |
1026 | real_outfile = outfile; |
1027 | sort_asprintf(&outfile, "%s.XXXXXXXXXX", |
1028 | real_outfile); |
1029 | if ((fd = mkstemp(outfile)) == -1) |
1030 | err(2, "%s", outfile); |
1031 | (void)fchown(fd, sb.st_uid, sb.st_gid); |
1032 | if (fchmod(fd, sb.st_mode & ACCESSPERMS(0000700|0000070|0000007)) == -1) |
1033 | err(2, "%s", outfile); |
1034 | close(fd); |
1035 | tmp_file_atexit(outfile); |
1036 | break; |
1037 | } |
1038 | } |
1039 | } |
1040 | |
1041 | if (compress_program) { |
1042 | if (pledge("stdio rpath wpath cpath proc exec", NULL((void *)0)) == -1) |
1043 | err(2, "pledge"); |
1044 | } else { |
1045 | if (pledge("stdio rpath wpath cpath", NULL((void *)0)) == -1) |
1046 | err(2, "pledge"); |
1047 | } |
1048 | } |
1049 | |
1050 | if (sflag != NULL((void *)0)) |
1051 | available_free_memory = parse_memory_buffer_value(sflag); |
1052 | |
1053 | if (keys_num == 0) { |
1054 | keys_num = 1; |
1055 | keys = sort_reallocarray(keys, 1, sizeof(struct key_specs)); |
1056 | memset(&(keys[0]), 0, sizeof(struct key_specs)); |
1057 | keys[0].c1 = 1; |
1058 | #ifdef GNUSORT_COMPATIBILITY |
1059 | keys[0].pos1b = sm->bflag; |
1060 | keys[0].pos2b = sm->bflag; |
1061 | #endif |
1062 | memcpy(&(keys[0].sm), sm, sizeof(struct sort_mods)); |
1063 | } |
1064 | |
1065 | for (i = 0; i < keys_num; i++) { |
1066 | struct key_specs *ks; |
1067 | |
1068 | ks = &(keys[i]); |
1069 | |
1070 | if (sort_modifier_empty(&(ks->sm))) { |
1071 | #ifdef GNUSORT_COMPATIBILITY |
1072 | if (!(ks->pos1b) && !(ks->pos2b)) { |
1073 | ks->pos1b = sm->bflag; |
1074 | ks->pos2b = sm->bflag; |
1075 | } |
1076 | #endif |
1077 | memcpy(&(ks->sm), sm, sizeof(struct sort_mods)); |
1078 | } |
1079 | |
1080 | ks->sm.func = get_sort_func(&(ks->sm)); |
1081 | } |
1082 | |
1083 | if (debug_sort) |
1084 | printf("Memory to be used for sorting: %llu\n", |
1085 | available_free_memory); |
1086 | |
1087 | if (sort_opts_vals.cflag) |
1088 | return check(argc ? *argv : "-"); |
1089 | |
1090 | set_random_seed(); |
1091 | |
1092 | if (!sort_opts_vals.mflag) { |
1093 | struct file_list fl; |
1094 | struct sort_list list; |
1095 | |
1096 | sort_list_init(&list); |
1097 | file_list_init(&fl, true1); |
1098 | |
1099 | if (argc < 1) |
1100 | procfile("-", &list, &fl); |
1101 | else { |
1102 | while (argc > 0) { |
1103 | procfile(*argv, &list, &fl); |
1104 | --argc; |
1105 | ++argv; |
1106 | } |
1107 | } |
1108 | |
1109 | if (fl.count < 1) |
1110 | sort_list_to_file(&list, outfile); |
1111 | else { |
1112 | if (list.count > 0) { |
1113 | char *flast = new_tmp_file_name(); |
1114 | |
1115 | sort_list_to_file(&list, flast); |
1116 | file_list_add(&fl, flast, false0); |
1117 | } |
1118 | merge_files(&fl, outfile); |
1119 | } |
1120 | |
1121 | file_list_clean(&fl); |
1122 | |
1123 | /* |
1124 | * We are about to exit the program, so we can ignore |
1125 | * the clean-up for speed |
1126 | * |
1127 | * sort_list_clean(&list); |
1128 | */ |
1129 | |
1130 | } else { |
1131 | struct file_list fl; |
1132 | |
1133 | file_list_init(&fl, false0); |
1134 | if (argc < 1) |
1135 | file_list_add(&fl, "-", true1); |
1136 | else |
1137 | file_list_populate(&fl, argc, argv, true1); |
1138 | merge_files(&fl, outfile); |
1139 | file_list_clean(&fl); |
1140 | } |
1141 | |
1142 | if (real_outfile) { |
1143 | if (rename(outfile, real_outfile) == -1) |
1144 | err(2, "%s", real_outfile); |
1145 | sort_free(outfile); |
1146 | } |
1147 | |
1148 | return 0; |
1149 | } |