File: | src/usr.bin/csplit/csplit.c |
Warning: | line 134, column 2 Value stored to 'argc' is never read |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | /* $OpenBSD: csplit.c,v 1.10 2021/07/08 00:38:42 millert Exp $ */ |
2 | /* $FreeBSD: src/usr.bin/csplit/csplit.c,v 1.9 2004/03/22 11:15:03 tjr Exp $ */ |
3 | |
4 | /*- |
5 | * Copyright (c) 2002 Tim J. Robbins. |
6 | * All rights reserved. |
7 | * |
8 | * Redistribution and use in source and binary forms, with or without |
9 | * modification, are permitted provided that the following conditions |
10 | * are met: |
11 | * 1. Redistributions of source code must retain the above copyright |
12 | * notice, this list of conditions and the following disclaimer. |
13 | * 2. Redistributions in binary form must reproduce the above copyright |
14 | * notice, this list of conditions and the following disclaimer in the |
15 | * documentation and/or other materials provided with the distribution. |
16 | * |
17 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND |
18 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
19 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
20 | * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE |
21 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
22 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
23 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
24 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
25 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
26 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
27 | * SUCH DAMAGE. |
28 | */ |
29 | |
30 | /* |
31 | * csplit -- split files based on context |
32 | * |
33 | * This utility splits its input into numbered output files by line number |
34 | * or by a regular expression. Regular expression matches have an optional |
35 | * offset with them, allowing the split to occur a specified number of |
36 | * lines before or after the match. |
37 | * |
38 | * To handle negative offsets, we stop reading when the match occurs and |
39 | * store the offset that the file should have been split at, then use |
40 | * this output file as input until all the "overflowed" lines have been read. |
41 | * The file is then closed and truncated to the correct length. |
42 | * |
43 | * We assume that the output files can be seeked upon (ie. they cannot be |
44 | * symlinks to named pipes or character devices), but make no such |
45 | * assumption about the input. |
46 | */ |
47 | |
48 | #include <sys/types.h> |
49 | |
50 | #include <ctype.h> |
51 | #include <err.h> |
52 | #include <errno(*__errno()).h> |
53 | #include <limits.h> |
54 | #include <regex.h> |
55 | #include <signal.h> |
56 | #include <stdint.h> |
57 | #include <stdio.h> |
58 | #include <stdlib.h> |
59 | #include <string.h> |
60 | #include <unistd.h> |
61 | |
62 | void cleanup(void); |
63 | void do_lineno(const char *); |
64 | void do_rexp(const char *); |
65 | char *get_line(void); |
66 | void handlesig(int); |
67 | FILE *newfile(void); |
68 | void toomuch(FILE *, long); |
69 | static void __dead__attribute__((__noreturn__)) usage(void); |
70 | |
71 | /* |
72 | * Command line options |
73 | */ |
74 | const char *prefix; /* File name prefix */ |
75 | long sufflen; /* Number of decimal digits for suffix */ |
76 | int sflag; /* Suppress output of file names */ |
77 | int kflag; /* Keep output if error occurs */ |
78 | |
79 | /* |
80 | * Other miscellaneous globals (XXX too many) |
81 | */ |
82 | long lineno; /* Current line number in input file */ |
83 | long reps; /* Number of repetitions for this pattern */ |
84 | long nfiles; /* Number of files output so far */ |
85 | long maxfiles; /* Maximum number of files we can create */ |
86 | char currfile[PATH_MAX1024]; /* Current output file */ |
87 | const char *infn; /* Name of the input file */ |
88 | FILE *infile; /* Input file handle */ |
89 | FILE *overfile; /* Overflow file for toomuch() */ |
90 | off_t truncofs; /* Offset this file should be truncated at */ |
91 | int doclean; /* Should cleanup() remove output? */ |
92 | |
93 | int |
94 | main(int argc, char *argv[]) |
95 | { |
96 | struct sigaction sa; |
97 | long i; |
98 | int ch; |
99 | const char *expr; |
100 | char *ep, *p; |
101 | FILE *ofp; |
102 | |
103 | if (pledge("stdio rpath wpath cpath", NULL((void *)0)) == -1) |
104 | err(1, "pledge"); |
105 | |
106 | kflag = sflag = 0; |
107 | prefix = "xx"; |
108 | sufflen = 2; |
109 | while ((ch = getopt(argc, argv, "f:kn:s")) != -1) { |
110 | switch (ch) { |
111 | case 'f': |
112 | prefix = optarg; |
113 | break; |
114 | case 'k': |
115 | kflag = 1; |
116 | break; |
117 | case 'n': |
118 | errno(*__errno()) = 0; |
119 | sufflen = strtol(optarg, &ep, 10); |
120 | if (sufflen <= 0 || *ep != '\0' || errno(*__errno()) != 0) |
121 | errx(1, "%s: bad suffix length", optarg); |
122 | break; |
123 | case 's': |
124 | sflag = 1; |
125 | break; |
126 | default: |
127 | usage(); |
128 | } |
129 | } |
130 | |
131 | if (sufflen + strlen(prefix) >= PATH_MAX1024) |
132 | errx(1, "name too long"); |
133 | |
134 | argc -= optind; |
Value stored to 'argc' is never read | |
135 | argv += optind; |
136 | |
137 | if ((infn = *argv++) == NULL((void *)0)) |
138 | usage(); |
139 | if (strcmp(infn, "-") == 0) { |
140 | infile = stdin(&__sF[0]); |
141 | infn = "stdin"; |
142 | } else if ((infile = fopen(infn, "r")) == NULL((void *)0)) |
143 | err(1, "%s", infn); |
144 | |
145 | if (!kflag) { |
146 | doclean = 1; |
147 | atexit(cleanup); |
148 | sa.sa_flags = 0; |
149 | sa.sa_handler__sigaction_u.__sa_handler = handlesig; |
150 | sigemptyset(&sa.sa_mask); |
151 | sigaddset(&sa.sa_mask, SIGHUP1); |
152 | sigaddset(&sa.sa_mask, SIGINT2); |
153 | sigaddset(&sa.sa_mask, SIGTERM15); |
154 | sigaction(SIGHUP1, &sa, NULL((void *)0)); |
155 | sigaction(SIGINT2, &sa, NULL((void *)0)); |
156 | sigaction(SIGTERM15, &sa, NULL((void *)0)); |
157 | } |
158 | |
159 | lineno = 0; |
160 | nfiles = 0; |
161 | truncofs = 0; |
162 | overfile = NULL((void *)0); |
163 | |
164 | /* Ensure 10^sufflen < LONG_MAX. */ |
165 | for (maxfiles = 1, i = 0; i < sufflen; i++) { |
166 | if (maxfiles > LONG_MAX9223372036854775807L / 10) |
167 | errx(1, "%ld: suffix too long (limit %ld)", |
168 | sufflen, i); |
169 | maxfiles *= 10; |
170 | } |
171 | |
172 | /* Create files based on supplied patterns. */ |
173 | while (nfiles < maxfiles - 1 && (expr = *argv++) != NULL((void *)0)) { |
174 | /* Look ahead & see if this pattern has any repetitions. */ |
175 | if (*argv != NULL((void *)0) && **argv == '{') { |
176 | errno(*__errno()) = 0; |
177 | reps = strtol(*argv + 1, &ep, 10); |
178 | if (reps < 0 || *ep != '}' || errno(*__errno()) != 0) |
179 | errx(1, "%s: bad repetition count", *argv + 1); |
180 | argv++; |
181 | } else |
182 | reps = 0; |
183 | |
184 | if (*expr == '/' || *expr == '%') { |
185 | do { |
186 | do_rexp(expr); |
187 | } while (reps-- != 0 && nfiles < maxfiles - 1); |
188 | } else if (isdigit((unsigned char)*expr)) |
189 | do_lineno(expr); |
190 | else |
191 | errx(1, "%s: unrecognised pattern", expr); |
192 | } |
193 | |
194 | /* Copy the rest into a new file. */ |
195 | if (!feof(infile)(!__isthreaded ? (((infile)->_flags & 0x0020) != 0) : ( feof)(infile))) { |
196 | ofp = newfile(); |
197 | while ((p = get_line()) != NULL((void *)0) && fputs(p, ofp) == 0) |
198 | ; |
199 | if (!sflag) |
200 | printf("%jd\n", (intmax_t)ftello(ofp)); |
201 | if (fclose(ofp) != 0) |
202 | err(1, "%s", currfile); |
203 | } |
204 | |
205 | toomuch(NULL((void *)0), 0); |
206 | doclean = 0; |
207 | |
208 | return (0); |
209 | } |
210 | |
211 | static void __dead__attribute__((__noreturn__)) |
212 | usage(void) |
213 | { |
214 | extern char *__progname; |
215 | |
216 | fprintf(stderr(&__sF[2]), |
217 | "usage: %s [-ks] [-f prefix] [-n number] file args ...\n", |
218 | __progname); |
219 | exit(1); |
220 | } |
221 | |
222 | /* ARGSUSED */ |
223 | void |
224 | handlesig(int sig) |
225 | { |
226 | const char msg[] = "csplit: caught signal, cleaning up\n"; |
227 | |
228 | write(STDERR_FILENO2, msg, sizeof(msg) - 1); |
229 | cleanup(); |
230 | _exit(2); |
231 | } |
232 | |
233 | /* Create a new output file. */ |
234 | FILE * |
235 | newfile(void) |
236 | { |
237 | FILE *fp; |
238 | |
239 | if ((size_t)snprintf(currfile, sizeof(currfile), "%s%0*ld", prefix, |
240 | (int)sufflen, nfiles) >= sizeof(currfile)) |
241 | errc(1, ENAMETOOLONG63, "%s", currfile); |
242 | if ((fp = fopen(currfile, "w+")) == NULL((void *)0)) |
243 | err(1, "%s", currfile); |
244 | nfiles++; |
245 | |
246 | return (fp); |
247 | } |
248 | |
249 | /* Remove partial output, called before exiting. */ |
250 | void |
251 | cleanup(void) |
252 | { |
253 | char fnbuf[PATH_MAX1024]; |
254 | long i; |
255 | |
256 | if (!doclean) |
257 | return; |
258 | |
259 | /* |
260 | * NOTE: One cannot portably assume to be able to call snprintf() from |
261 | * inside a signal handler. It is, however, safe to do on OpenBSD. |
262 | */ |
263 | for (i = 0; i < nfiles; i++) { |
264 | snprintf(fnbuf, sizeof(fnbuf), "%s%0*ld", prefix, |
265 | (int)sufflen, i); |
266 | unlink(fnbuf); |
267 | } |
268 | } |
269 | |
270 | /* Read a line from the input into a static buffer. */ |
271 | char * |
272 | get_line(void) |
273 | { |
274 | static char lbuf[LINE_MAX2048]; |
275 | FILE *src; |
276 | |
277 | src = overfile != NULL((void *)0) ? overfile : infile; |
278 | |
279 | again: if (fgets(lbuf, sizeof(lbuf), src) == NULL((void *)0)) { |
280 | if (src == overfile) { |
281 | src = infile; |
282 | goto again; |
283 | } |
284 | return (NULL((void *)0)); |
285 | } |
286 | if (ferror(src)(!__isthreaded ? (((src)->_flags & 0x0040) != 0) : (ferror )(src))) |
287 | err(1, "%s", infn); |
288 | lineno++; |
289 | |
290 | return (lbuf); |
291 | } |
292 | |
293 | /* Conceptually rewind the input (as obtained by get_line()) back `n' lines. */ |
294 | void |
295 | toomuch(FILE *ofp, long n) |
296 | { |
297 | char buf[BUFSIZ1024]; |
298 | size_t i, nread; |
299 | |
300 | if (overfile != NULL((void *)0)) { |
301 | /* |
302 | * Truncate the previous file we overflowed into back to |
303 | * the correct length, close it. |
304 | */ |
305 | if (fflush(overfile) != 0) |
306 | err(1, "overflow"); |
307 | if (ftruncate(fileno(overfile)(!__isthreaded ? ((overfile)->_file) : (fileno)(overfile)), truncofs) != 0) |
308 | err(1, "overflow"); |
309 | if (fclose(overfile) != 0) |
310 | err(1, "overflow"); |
311 | overfile = NULL((void *)0); |
312 | } |
313 | |
314 | if (n == 0) |
315 | /* Just tidying up */ |
316 | return; |
317 | |
318 | lineno -= n; |
319 | |
320 | /* |
321 | * Wind the overflow file backwards to `n' lines before the |
322 | * current one. |
323 | */ |
324 | do { |
325 | if (ftello(ofp) < (off_t)sizeof(buf)) |
326 | rewind(ofp); |
327 | else |
328 | fseeko(ofp, -(off_t)sizeof(buf), SEEK_CUR1); |
329 | if (ferror(ofp)(!__isthreaded ? (((ofp)->_flags & 0x0040) != 0) : (ferror )(ofp))) |
330 | errx(1, "%s: can't seek", currfile); |
331 | if ((nread = fread(buf, 1, sizeof(buf), ofp)) == 0) |
332 | errx(1, "can't read overflowed output"); |
333 | if (fseeko(ofp, -(off_t)nread, SEEK_CUR1) != 0) |
334 | err(1, "%s", currfile); |
335 | for (i = 1; i <= nread; i++) |
336 | if (buf[nread - i] == '\n' && n-- == 0) |
337 | break; |
338 | if (ftello(ofp) == 0) |
339 | break; |
340 | } while (n > 0); |
341 | if (fseeko(ofp, (off_t)(nread - i + 1), SEEK_CUR1) != 0) |
342 | err(1, "%s", currfile); |
343 | |
344 | /* |
345 | * get_line() will read from here. Next call will truncate to |
346 | * truncofs in this file. |
347 | */ |
348 | overfile = ofp; |
349 | truncofs = ftello(overfile); |
350 | } |
351 | |
352 | /* Handle splits for /regexp/ and %regexp% patterns. */ |
353 | void |
354 | do_rexp(const char *expr) |
355 | { |
356 | regex_t cre; |
357 | intmax_t nwritten; |
358 | long ofs; |
359 | int first; |
360 | char *ecopy, *ep, *p, *pofs, *re; |
361 | FILE *ofp; |
362 | |
363 | if ((ecopy = strdup(expr)) == NULL((void *)0)) |
364 | err(1, "strdup"); |
365 | |
366 | re = ecopy + 1; |
367 | if ((pofs = strrchr(ecopy, *expr)) == NULL((void *)0) || pofs[-1] == '\\') |
368 | errx(1, "%s: missing trailing %c", expr, *expr); |
369 | *pofs++ = '\0'; |
370 | |
371 | if (*pofs != '\0') { |
372 | errno(*__errno()) = 0; |
373 | ofs = strtol(pofs, &ep, 10); |
374 | if (*ep != '\0' || errno(*__errno()) != 0) |
375 | errx(1, "%s: bad offset", pofs); |
376 | } else |
377 | ofs = 0; |
378 | |
379 | if (regcomp(&cre, re, REG_BASIC0000|REG_NOSUB0004) != 0) |
380 | errx(1, "%s: bad regular expression", re); |
381 | |
382 | if (*expr == '/') |
383 | /* /regexp/: Save results to a file. */ |
384 | ofp = newfile(); |
385 | else { |
386 | /* %regexp%: Make a temporary file for overflow. */ |
387 | if ((ofp = tmpfile()) == NULL((void *)0)) |
388 | err(1, "tmpfile"); |
389 | } |
390 | |
391 | /* Read and output lines until we get a match. */ |
392 | first = 1; |
393 | while ((p = get_line()) != NULL((void *)0)) { |
394 | if (fputs(p, ofp) != 0) |
395 | break; |
396 | if (!first && regexec(&cre, p, 0, NULL((void *)0), 0) == 0) |
397 | break; |
398 | first = 0; |
399 | } |
400 | |
401 | if (p == NULL((void *)0)) { |
402 | toomuch(NULL((void *)0), 0); |
403 | errx(1, "%s: no match", re); |
404 | } |
405 | |
406 | if (ofs <= 0) { |
407 | /* |
408 | * Negative (or zero) offset: throw back any lines we should |
409 | * not have read yet. |
410 | */ |
411 | if (p != NULL((void *)0)) { |
412 | toomuch(ofp, -ofs + 1); |
413 | nwritten = (intmax_t)truncofs; |
414 | } else |
415 | nwritten = (intmax_t)ftello(ofp); |
416 | } else { |
417 | /* |
418 | * Positive offset: copy the requested number of lines |
419 | * after the match. |
420 | */ |
421 | while (--ofs > 0 && (p = get_line()) != NULL((void *)0)) |
422 | fputs(p, ofp); |
423 | toomuch(NULL((void *)0), 0); |
424 | nwritten = (intmax_t)ftello(ofp); |
425 | if (fclose(ofp) != 0) |
426 | err(1, "%s", currfile); |
427 | } |
428 | |
429 | if (!sflag && *expr == '/') |
430 | printf("%jd\n", nwritten); |
431 | |
432 | regfree(&cre); |
433 | free(ecopy); |
434 | } |
435 | |
436 | /* Handle splits based on line number. */ |
437 | void |
438 | do_lineno(const char *expr) |
439 | { |
440 | long lastline, tgtline; |
441 | char *ep, *p; |
442 | FILE *ofp; |
443 | |
444 | errno(*__errno()) = 0; |
445 | tgtline = strtol(expr, &ep, 10); |
446 | if (tgtline <= 0 || errno(*__errno()) != 0 || *ep != '\0') |
447 | errx(1, "%s: bad line number", expr); |
448 | lastline = tgtline; |
449 | if (lastline <= lineno) |
450 | errx(1, "%s: can't go backwards", expr); |
451 | |
452 | while (nfiles < maxfiles - 1) { |
453 | ofp = newfile(); |
454 | while (lineno + 1 != lastline) { |
455 | if ((p = get_line()) == NULL((void *)0)) |
456 | errx(1, "%ld: out of range", lastline); |
457 | if (fputs(p, ofp) != 0) |
458 | break; |
459 | } |
460 | if (!sflag) |
461 | printf("%jd\n", (intmax_t)ftello(ofp)); |
462 | if (fclose(ofp) != 0) |
463 | err(1, "%s", currfile); |
464 | if (reps-- == 0) |
465 | break; |
466 | lastline += tgtline; |
467 | } |
468 | } |