File: | src/usr.bin/sed/compile.c |
Warning: | line 697, column 9 Dereference of null pointer |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | /* $OpenBSD: compile.c,v 1.50 2018/12/07 14:45:40 schwarze Exp $ */ | |||
2 | ||||
3 | /*- | |||
4 | * Copyright (c) 1992 Diomidis Spinellis. | |||
5 | * Copyright (c) 1992, 1993 | |||
6 | * The Regents of the University of California. All rights reserved. | |||
7 | * | |||
8 | * This code is derived from software contributed to Berkeley by | |||
9 | * Diomidis Spinellis of Imperial College, University of London. | |||
10 | * | |||
11 | * Redistribution and use in source and binary forms, with or without | |||
12 | * modification, are permitted provided that the following conditions | |||
13 | * are met: | |||
14 | * 1. Redistributions of source code must retain the above copyright | |||
15 | * notice, this list of conditions and the following disclaimer. | |||
16 | * 2. Redistributions in binary form must reproduce the above copyright | |||
17 | * notice, this list of conditions and the following disclaimer in the | |||
18 | * documentation and/or other materials provided with the distribution. | |||
19 | * 3. Neither the name of the University nor the names of its contributors | |||
20 | * may be used to endorse or promote products derived from this software | |||
21 | * without specific prior written permission. | |||
22 | * | |||
23 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND | |||
24 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
25 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
26 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | |||
27 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
28 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | |||
29 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |||
30 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | |||
31 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | |||
32 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |||
33 | * SUCH DAMAGE. | |||
34 | */ | |||
35 | ||||
36 | #include <sys/types.h> | |||
37 | #include <sys/stat.h> | |||
38 | ||||
39 | #include <ctype.h> | |||
40 | #include <errno(*__errno()).h> | |||
41 | #include <fcntl.h> | |||
42 | #include <limits.h> | |||
43 | #include <regex.h> | |||
44 | #include <stdio.h> | |||
45 | #include <stdlib.h> | |||
46 | #include <string.h> | |||
47 | ||||
48 | #include "defs.h" | |||
49 | #include "extern.h" | |||
50 | ||||
51 | #define LHSZ128 128 | |||
52 | #define LHMASK(128 - 1) (LHSZ128 - 1) | |||
53 | static struct labhash { | |||
54 | struct labhash *lh_next; | |||
55 | u_int lh_hash; | |||
56 | struct s_command *lh_cmd; | |||
57 | int lh_ref; | |||
58 | } *labels[LHSZ128]; | |||
59 | ||||
60 | static char *compile_addr(char *, struct s_addr *); | |||
61 | static char *compile_ccl(char **, char *); | |||
62 | static char *compile_delimited(char *, char *); | |||
63 | static char *compile_flags(char *, struct s_subst *); | |||
64 | static char *compile_re(char *, regex_t **); | |||
65 | static char *compile_subst(char *, struct s_subst *); | |||
66 | static char *compile_text(void); | |||
67 | static char *compile_tr(char *, char **); | |||
68 | static struct s_command | |||
69 | **compile_stream(struct s_command **); | |||
70 | static char *duptoeol(char *, char *, char **); | |||
71 | static void enterlabel(struct s_command *); | |||
72 | static struct s_command | |||
73 | *findlabel(char *); | |||
74 | static void fixuplabel(struct s_command *, struct s_command *); | |||
75 | static void uselabel(void); | |||
76 | ||||
77 | /* | |||
78 | * Command specification. This is used to drive the command parser. | |||
79 | */ | |||
80 | struct s_format { | |||
81 | char code; /* Command code */ | |||
82 | int naddr; /* Number of address args */ | |||
83 | enum e_args args; /* Argument type */ | |||
84 | }; | |||
85 | ||||
86 | static struct s_format cmd_fmts[] = { | |||
87 | {'{', 2, GROUP}, | |||
88 | {'}', 0, ENDGROUP}, | |||
89 | {'a', 1, TEXT}, | |||
90 | {'b', 2, BRANCH}, | |||
91 | {'c', 2, TEXT}, | |||
92 | {'d', 2, EMPTY}, | |||
93 | {'D', 2, EMPTY}, | |||
94 | {'g', 2, EMPTY}, | |||
95 | {'G', 2, EMPTY}, | |||
96 | {'h', 2, EMPTY}, | |||
97 | {'H', 2, EMPTY}, | |||
98 | {'i', 1, TEXT}, | |||
99 | {'l', 2, EMPTY}, | |||
100 | {'n', 2, EMPTY}, | |||
101 | {'N', 2, EMPTY}, | |||
102 | {'p', 2, EMPTY}, | |||
103 | {'P', 2, EMPTY}, | |||
104 | {'q', 1, EMPTY}, | |||
105 | {'r', 1, RFILE}, | |||
106 | {'s', 2, SUBST}, | |||
107 | {'t', 2, BRANCH}, | |||
108 | {'w', 2, WFILE}, | |||
109 | {'x', 2, EMPTY}, | |||
110 | {'y', 2, TR}, | |||
111 | {'!', 2, NONSEL}, | |||
112 | {':', 0, LABEL}, | |||
113 | {'#', 0, COMMENT}, | |||
114 | {'=', 1, EMPTY}, | |||
115 | {'\0', 0, COMMENT}, | |||
116 | }; | |||
117 | ||||
118 | /* The compiled program. */ | |||
119 | struct s_command *prog; | |||
120 | ||||
121 | /* | |||
122 | * Compile the program into prog. | |||
123 | * Initialise appends. | |||
124 | */ | |||
125 | void | |||
126 | compile(void) | |||
127 | { | |||
128 | *compile_stream(&prog) = NULL((void *)0); | |||
129 | fixuplabel(prog, NULL((void *)0)); | |||
130 | uselabel(); | |||
131 | appends = xreallocarray(NULL((void *)0), appendnum, sizeof(struct s_appends)); | |||
132 | match = xreallocarray(NULL((void *)0), maxnsub + 1, sizeof(regmatch_t)); | |||
133 | } | |||
134 | ||||
135 | #define EATSPACE()do { if (p) while (isascii((unsigned char)*p) && isspace ((unsigned char)*p)) p++; } while (0) do { \ | |||
136 | if (p) \ | |||
137 | while (isascii((unsigned char)*p) && \ | |||
138 | isspace((unsigned char)*p)) \ | |||
139 | p++; \ | |||
140 | } while (0) | |||
141 | ||||
142 | static struct s_command ** | |||
143 | compile_stream(struct s_command **link) | |||
144 | { | |||
145 | char *p; | |||
146 | static char *lbuf; /* To avoid excessive malloc calls */ | |||
147 | static size_t bufsize; | |||
148 | struct s_command *cmd, *cmd2, *stack; | |||
149 | struct s_format *fp; | |||
150 | int naddr; /* Number of addresses */ | |||
151 | ||||
152 | stack = 0; | |||
153 | for (;;) { | |||
| ||||
154 | if ((p = cu_fgets(&lbuf, &bufsize)) == NULL((void *)0)) { | |||
155 | if (stack != 0) | |||
156 | error(COMPILE2, "unexpected EOF (pending }'s)"); | |||
157 | return (link); | |||
158 | } | |||
159 | ||||
160 | semicolon: EATSPACE()do { if (p) while (isascii((unsigned char)*p) && isspace ((unsigned char)*p)) p++; } while (0); | |||
161 | if (*p == '#' || *p == '\0') | |||
162 | continue; | |||
163 | if (*p == ';') { | |||
164 | p++; | |||
165 | goto semicolon; | |||
166 | } | |||
167 | *link = cmd = xmalloc(sizeof(struct s_command)); | |||
168 | link = &cmd->next; | |||
169 | cmd->nonsel = cmd->inrange = 0; | |||
170 | /* First parse the addresses */ | |||
171 | naddr = 0; | |||
172 | ||||
173 | /* Valid characters to start an address */ | |||
174 | #define addrchar(c)(strchr("0123456789/\\$", (c))) (strchr("0123456789/\\$", (c))) | |||
175 | if (addrchar(*p)(strchr("0123456789/\\$", (*p)))) { | |||
176 | naddr++; | |||
177 | cmd->a1 = xmalloc(sizeof(struct s_addr)); | |||
178 | p = compile_addr(p, cmd->a1); | |||
179 | EATSPACE()do { if (p) while (isascii((unsigned char)*p) && isspace ((unsigned char)*p)) p++; } while (0); /* EXTENSION */ | |||
180 | if (*p == ',') { | |||
181 | p++; | |||
182 | EATSPACE()do { if (p) while (isascii((unsigned char)*p) && isspace ((unsigned char)*p)) p++; } while (0); /* EXTENSION */ | |||
183 | naddr++; | |||
184 | cmd->a2 = xmalloc(sizeof(struct s_addr)); | |||
185 | p = compile_addr(p, cmd->a2); | |||
186 | EATSPACE()do { if (p) while (isascii((unsigned char)*p) && isspace ((unsigned char)*p)) p++; } while (0); | |||
187 | } else { | |||
188 | cmd->a2 = 0; | |||
189 | } | |||
190 | } else { | |||
191 | cmd->a1 = cmd->a2 = 0; | |||
192 | } | |||
193 | ||||
194 | nonsel: /* Now parse the command */ | |||
195 | if (!*p) | |||
196 | error(COMPILE2, "command expected"); | |||
197 | cmd->code = *p; | |||
198 | for (fp = cmd_fmts; fp->code; fp++) | |||
199 | if (fp->code == *p) | |||
200 | break; | |||
201 | if (!fp->code
| |||
202 | error(COMPILE2, "invalid command code %c", *p); | |||
203 | if (naddr > fp->naddr) | |||
204 | error(COMPILE2, | |||
205 | "command %c expects up to %d address(es), found %d", | |||
206 | *p, fp->naddr, naddr); | |||
207 | switch (fp->args) { | |||
208 | case NONSEL: /* ! */ | |||
209 | p++; | |||
210 | EATSPACE()do { if (p) while (isascii((unsigned char)*p) && isspace ((unsigned char)*p)) p++; } while (0); | |||
211 | cmd->nonsel = 1; | |||
212 | goto nonsel; | |||
213 | case GROUP: /* { */ | |||
214 | p++; | |||
215 | EATSPACE()do { if (p) while (isascii((unsigned char)*p) && isspace ((unsigned char)*p)) p++; } while (0); | |||
216 | cmd->next = stack; | |||
217 | stack = cmd; | |||
218 | link = &cmd->u.c; | |||
219 | if (*p) | |||
220 | goto semicolon; | |||
221 | break; | |||
222 | case ENDGROUP: | |||
223 | /* | |||
224 | * Short-circuit command processing, since end of | |||
225 | * group is really just a noop. | |||
226 | */ | |||
227 | cmd->nonsel = 1; | |||
228 | if (stack == 0) | |||
229 | error(COMPILE2, "unexpected }"); | |||
230 | cmd2 = stack; | |||
231 | stack = cmd2->next; | |||
232 | cmd2->next = cmd; | |||
233 | /*FALLTHROUGH*/ | |||
234 | case EMPTY: /* d D g G h H l n N p P q x = \0 */ | |||
235 | p++; | |||
236 | EATSPACE()do { if (p) while (isascii((unsigned char)*p) && isspace ((unsigned char)*p)) p++; } while (0); | |||
237 | if (*p == ';') { | |||
238 | p++; | |||
239 | link = &cmd->next; | |||
240 | goto semicolon; | |||
241 | } | |||
242 | if (*p) | |||
243 | error(COMPILE2, | |||
244 | "extra characters at the end of %c command", cmd->code); | |||
245 | break; | |||
246 | case TEXT: /* a c i */ | |||
247 | p++; | |||
248 | EATSPACE()do { if (p) while (isascii((unsigned char)*p) && isspace ((unsigned char)*p)) p++; } while (0); | |||
249 | if (*p != '\\') | |||
250 | error(COMPILE2, "command %c expects \\ followed by" | |||
251 | " text", cmd->code); | |||
252 | p++; | |||
253 | EATSPACE()do { if (p) while (isascii((unsigned char)*p) && isspace ((unsigned char)*p)) p++; } while (0); | |||
254 | if (*p) | |||
255 | error(COMPILE2, "extra characters after \\ at the" | |||
256 | " end of %c command", cmd->code); | |||
257 | cmd->t = compile_text(); | |||
258 | break; | |||
259 | case COMMENT: /* \0 # */ | |||
260 | break; | |||
261 | case WFILE: /* w */ | |||
262 | p++; | |||
263 | EATSPACE()do { if (p) while (isascii((unsigned char)*p) && isspace ((unsigned char)*p)) p++; } while (0); | |||
264 | if (*p == '\0') | |||
265 | error(COMPILE2, "filename expected"); | |||
266 | cmd->t = duptoeol(p, "w command", NULL((void *)0)); | |||
267 | if (aflag) { | |||
268 | cmd->u.fd = -1; | |||
269 | pledge_wpath = 1; | |||
270 | } | |||
271 | else if ((cmd->u.fd = open(p, | |||
272 | O_WRONLY0x0001|O_APPEND0x0008|O_CREAT0x0200|O_TRUNC0x0400, | |||
273 | DEFFILEMODE(0000400|0000200|0000040|0000020|0000004|0000002))) == -1) | |||
274 | error(FATAL1, "%s: %s", p, strerror(errno(*__errno()))); | |||
275 | break; | |||
276 | case RFILE: /* r */ | |||
277 | pledge_rpath = 1; | |||
278 | p++; | |||
279 | EATSPACE()do { if (p) while (isascii((unsigned char)*p) && isspace ((unsigned char)*p)) p++; } while (0); | |||
280 | if (*p == '\0') | |||
281 | error(COMPILE2, "filename expected"); | |||
282 | cmd->t = duptoeol(p, "read command", NULL((void *)0)); | |||
283 | break; | |||
284 | case BRANCH: /* b t */ | |||
285 | p++; | |||
286 | EATSPACE()do { if (p) while (isascii((unsigned char)*p) && isspace ((unsigned char)*p)) p++; } while (0); | |||
287 | if (*p == '\0' || *p == ';') | |||
288 | cmd->t = NULL((void *)0); | |||
289 | else | |||
290 | cmd->t = duptoeol(p, "branch", &p); | |||
291 | if (*p == ';') { | |||
292 | p++; | |||
293 | goto semicolon; | |||
294 | } | |||
295 | break; | |||
296 | case LABEL: /* : */ | |||
297 | p++; | |||
298 | EATSPACE()do { if (p) while (isascii((unsigned char)*p) && isspace ((unsigned char)*p)) p++; } while (0); | |||
299 | cmd->t = duptoeol(p, "label", &p); | |||
300 | if (strlen(cmd->t) == 0) | |||
301 | error(COMPILE2, "empty label"); | |||
302 | enterlabel(cmd); | |||
303 | if (*p == ';') { | |||
304 | p++; | |||
305 | goto semicolon; | |||
306 | } | |||
307 | break; | |||
308 | case SUBST: /* s */ | |||
309 | p++; | |||
310 | if (*p == '\0' || *p == '\\') | |||
311 | error(COMPILE2, "substitute pattern can not be" | |||
312 | " delimited by newline or backslash"); | |||
313 | cmd->u.s = xmalloc(sizeof(struct s_subst)); | |||
314 | p = compile_re(p, &cmd->u.s->re); | |||
315 | if (p == NULL((void *)0)) | |||
316 | error(COMPILE2, "unterminated substitute pattern"); | |||
317 | --p; | |||
318 | p = compile_subst(p, cmd->u.s); | |||
319 | p = compile_flags(p, cmd->u.s); | |||
320 | EATSPACE()do { if (p) while (isascii((unsigned char)*p) && isspace ((unsigned char)*p)) p++; } while (0); | |||
321 | if (*p == ';') { | |||
322 | p++; | |||
323 | link = &cmd->next; | |||
324 | goto semicolon; | |||
325 | } | |||
326 | break; | |||
327 | case TR: /* y */ | |||
328 | p++; | |||
329 | p = compile_tr(p, (char **)&cmd->u.y); | |||
330 | EATSPACE()do { if (p) while (isascii((unsigned char)*p) && isspace ((unsigned char)*p)) p++; } while (0); | |||
331 | if (*p == ';') { | |||
332 | p++; | |||
333 | link = &cmd->next; | |||
334 | goto semicolon; | |||
335 | } | |||
336 | if (*p) | |||
337 | error(COMPILE2, "extra text at the end of a" | |||
338 | " transform command"); | |||
339 | break; | |||
340 | } | |||
341 | } | |||
342 | } | |||
343 | ||||
344 | /* | |||
345 | * Get a delimited string. P points to the delimeter of the string; d points | |||
346 | * to a buffer area. Newline and delimiter escapes are processed; other | |||
347 | * escapes are ignored. | |||
348 | * | |||
349 | * Returns a pointer to the first character after the final delimiter or NULL | |||
350 | * in the case of a non-terminated string. The character array d is filled | |||
351 | * with the processed string. | |||
352 | */ | |||
353 | static char * | |||
354 | compile_delimited(char *p, char *d) | |||
355 | { | |||
356 | char c; | |||
357 | ||||
358 | c = *p++; | |||
359 | if (c == '\0') | |||
360 | return (NULL((void *)0)); | |||
361 | else if (c == '\\') | |||
362 | error(COMPILE2, "\\ can not be used as a string delimiter"); | |||
363 | else if (c == '\n') | |||
364 | error(COMPILE2, "newline can not be used as a string delimiter"); | |||
365 | ||||
366 | while (p[0]) { | |||
367 | /* Unescaped delimiter: We are done. */ | |||
368 | if (p[0] == c) { | |||
369 | *d = '\0'; | |||
370 | return p + 1; | |||
371 | } | |||
372 | if (p[0] == '\\') { | |||
373 | /* Escaped delimiter: Skip the backslash. */ | |||
374 | if (p[1] == c) { | |||
375 | p++; | |||
376 | } else { | |||
377 | /* Backslash-n: Match linefeed. */ | |||
378 | if (p[1] == 'n') { | |||
379 | *d++ = '\n'; | |||
380 | p += 2; | |||
381 | /* Other escapes remain unchanged. */ | |||
382 | } else { | |||
383 | *d++ = *p++; | |||
384 | *d++ = *p++; | |||
385 | } | |||
386 | continue; | |||
387 | } | |||
388 | } | |||
389 | if (p[0] != '[') | |||
390 | *d++ = *p++; | |||
391 | /* | |||
392 | * Bracket expression: | |||
393 | * It may contain the delimiter without escaping. | |||
394 | */ | |||
395 | else if ((d = compile_ccl(&p, d)) == NULL((void *)0)) | |||
396 | error(COMPILE2, "unbalanced brackets ([])"); | |||
397 | } | |||
398 | return NULL((void *)0); | |||
399 | } | |||
400 | ||||
401 | ||||
402 | /* compile_ccl: expand a POSIX character class */ | |||
403 | static char * | |||
404 | compile_ccl(char **sp, char *t) | |||
405 | { | |||
406 | int c, d; | |||
407 | char *s = *sp; | |||
408 | ||||
409 | *t++ = *s++; | |||
410 | if (*s == '^') | |||
411 | *t++ = *s++; | |||
412 | if (*s == ']') | |||
413 | *t++ = *s++; | |||
414 | for (; *s && (*t = *s) != ']'; s++, t++) | |||
415 | if (*s == '[' && ((d = *(s+1)) == '.' || d == ':' || d == '=')) { | |||
416 | *++t = *++s, t++, s++; | |||
417 | for (c = *s; (*t = *s) != ']' || c != d; s++, t++) | |||
418 | if ((c = *s) == '\0') | |||
419 | return NULL((void *)0); | |||
420 | } else if (*s == '\\' && s[1] == 'n') { | |||
421 | *t = '\n'; | |||
422 | s++; | |||
423 | } | |||
424 | if (*s == ']') { | |||
425 | *sp = ++s; | |||
426 | return (++t); | |||
427 | } else { | |||
428 | return (NULL((void *)0)); | |||
429 | } | |||
430 | } | |||
431 | ||||
432 | /* | |||
433 | * Get a regular expression. P points to the delimiter of the regular | |||
434 | * expression; repp points to the address of a regexp pointer. Newline | |||
435 | * and delimiter escapes are processed; other escapes are ignored. | |||
436 | * Returns a pointer to the first character after the final delimiter | |||
437 | * or NULL in the case of a non terminated regular expression. The regexp | |||
438 | * pointer is set to the compiled regular expression. | |||
439 | * Cflags are passed to regcomp. | |||
440 | */ | |||
441 | static char * | |||
442 | compile_re(char *p, regex_t **repp) | |||
443 | { | |||
444 | int eval; | |||
445 | char *re; | |||
446 | ||||
447 | re = xmalloc(strlen(p) + 1); /* strlen(re) <= strlen(p) */ | |||
448 | p = compile_delimited(p, re); | |||
449 | if (p && strlen(re) == 0) { | |||
450 | *repp = NULL((void *)0); | |||
451 | free(re); | |||
452 | return (p); | |||
453 | } | |||
454 | *repp = xmalloc(sizeof(regex_t)); | |||
455 | if (p && (eval = regcomp(*repp, re, Eflag ? REG_EXTENDED0001 : 0)) != 0) | |||
456 | error(COMPILE2, "RE error: %s", strregerror(eval, *repp)); | |||
457 | if (maxnsub < (*repp)->re_nsub) | |||
458 | maxnsub = (*repp)->re_nsub; | |||
459 | free(re); | |||
460 | return (p); | |||
461 | } | |||
462 | ||||
463 | /* | |||
464 | * Compile the substitution string of a regular expression and set res to | |||
465 | * point to a saved copy of it. Nsub is the number of parenthesized regular | |||
466 | * expressions. | |||
467 | */ | |||
468 | static char * | |||
469 | compile_subst(char *p, struct s_subst *s) | |||
470 | { | |||
471 | static char *lbuf; | |||
472 | static size_t bufsize; | |||
473 | size_t asize, ref, size; | |||
474 | char c, *text, *op, *sp; | |||
475 | int sawesc = 0; | |||
476 | ||||
477 | c = *p++; /* Terminator character */ | |||
478 | if (c == '\0') | |||
479 | return (NULL((void *)0)); | |||
480 | ||||
481 | s->maxbref = 0; | |||
482 | s->linenum = linenum; | |||
483 | text = NULL((void *)0); | |||
484 | asize = size = 0; | |||
485 | do { | |||
486 | size_t len = ROUNDLEN(strlen(p) + 1)(((strlen(p) + 1) + 2048 - 1) & ~(2048 - 1)); | |||
487 | if (asize - size < len) { | |||
488 | do { | |||
489 | asize += len; | |||
490 | } while (asize - size < len); | |||
491 | text = xrealloc(text, asize); | |||
492 | } | |||
493 | op = sp = text + size; | |||
494 | for (; *p; p++) { | |||
495 | if (*p == '\\' || sawesc) { | |||
496 | /* | |||
497 | * If this is a continuation from the last | |||
498 | * buffer, we won't have a character to | |||
499 | * skip over. | |||
500 | */ | |||
501 | if (sawesc) | |||
502 | sawesc = 0; | |||
503 | else | |||
504 | p++; | |||
505 | ||||
506 | if (*p == '\0') { | |||
507 | /* | |||
508 | * This escaped character is continued | |||
509 | * in the next part of the line. Note | |||
510 | * this fact, then cause the loop to | |||
511 | * exit w/ normal EOL case and reenter | |||
512 | * above with the new buffer. | |||
513 | */ | |||
514 | sawesc = 1; | |||
515 | p--; | |||
516 | continue; | |||
517 | } else if (strchr("123456789", *p) != NULL((void *)0)) { | |||
518 | *sp++ = '\\'; | |||
519 | ref = *p - '0'; | |||
520 | if (s->re != NULL((void *)0) && | |||
521 | ref > s->re->re_nsub) | |||
522 | error(COMPILE2, | |||
523 | "\\%c not defined in the RE", *p); | |||
524 | if (s->maxbref < ref) | |||
525 | s->maxbref = ref; | |||
526 | } else if (*p == '&' || *p == '\\') | |||
527 | *sp++ = '\\'; | |||
528 | } else if (*p == c) { | |||
529 | p++; | |||
530 | *sp++ = '\0'; | |||
531 | size += sp - op; | |||
532 | s->new = xrealloc(text, size); | |||
533 | return (p); | |||
534 | } else if (*p == '\n') { | |||
535 | error(COMPILE2, | |||
536 | "unescaped newline inside substitute pattern"); | |||
537 | } | |||
538 | *sp++ = *p; | |||
539 | } | |||
540 | size += sp - op; | |||
541 | } while ((p = cu_fgets(&lbuf, &bufsize))); | |||
542 | error(COMPILE2, "unterminated substitute in regular expression"); | |||
543 | } | |||
544 | ||||
545 | /* | |||
546 | * Compile the flags of the s command | |||
547 | */ | |||
548 | static char * | |||
549 | compile_flags(char *p, struct s_subst *s) | |||
550 | { | |||
551 | int gn; /* True if we have seen g or n */ | |||
552 | long l; | |||
553 | ||||
554 | s->n = 1; /* Default */ | |||
555 | s->p = 0; | |||
556 | s->wfile = NULL((void *)0); | |||
557 | s->wfd = -1; | |||
558 | for (gn = 0;;) { | |||
559 | EATSPACE()do { if (p) while (isascii((unsigned char)*p) && isspace ((unsigned char)*p)) p++; } while (0); /* EXTENSION */ | |||
560 | switch (*p) { | |||
561 | case 'g': | |||
562 | if (gn) | |||
563 | error(COMPILE2, "more than one number or 'g' in" | |||
564 | " substitute flags"); | |||
565 | gn = 1; | |||
566 | s->n = 0; | |||
567 | break; | |||
568 | case '\0': | |||
569 | case '\n': | |||
570 | case ';': | |||
571 | return (p); | |||
572 | case 'p': | |||
573 | s->p = 1; | |||
574 | break; | |||
575 | case '1': case '2': case '3': | |||
576 | case '4': case '5': case '6': | |||
577 | case '7': case '8': case '9': | |||
578 | if (gn) | |||
579 | error(COMPILE2, "more than one number or 'g' in" | |||
580 | " substitute flags"); | |||
581 | gn = 1; | |||
582 | l = strtol(p, &p, 10); | |||
583 | if (l <= 0 || l >= INT_MAX2147483647) | |||
584 | error(COMPILE2, | |||
585 | "number in substitute flags out of range"); | |||
586 | s->n = (int)l; | |||
587 | continue; | |||
588 | case 'w': | |||
589 | p++; | |||
590 | EATSPACE()do { if (p) while (isascii((unsigned char)*p) && isspace ((unsigned char)*p)) p++; } while (0); | |||
591 | if (*p == '\0') | |||
592 | error(COMPILE2, "filename expected"); | |||
593 | s->wfile = duptoeol(p, "s command w flag", NULL((void *)0)); | |||
594 | *p = '\0'; | |||
595 | if (aflag) | |||
596 | pledge_wpath = 1; | |||
597 | else if ((s->wfd = open(s->wfile, | |||
598 | O_WRONLY0x0001|O_APPEND0x0008|O_CREAT0x0200|O_TRUNC0x0400, | |||
599 | DEFFILEMODE(0000400|0000200|0000040|0000020|0000004|0000002))) == -1) | |||
600 | error(FATAL1, "%s: %s", s->wfile, strerror(errno(*__errno()))); | |||
601 | return (p); | |||
602 | default: | |||
603 | error(COMPILE2, | |||
604 | "bad flag in substitute command: '%c'", *p); | |||
605 | break; | |||
606 | } | |||
607 | p++; | |||
608 | } | |||
609 | } | |||
610 | ||||
611 | /* | |||
612 | * Compile a translation set of strings into a lookup table. | |||
613 | */ | |||
614 | static char * | |||
615 | compile_tr(char *old, char **transtab) | |||
616 | { | |||
617 | int i; | |||
618 | char delimiter, check[UCHAR_MAX(127*2 +1) + 1]; | |||
619 | char *new, *end; | |||
620 | ||||
621 | memset(check, 0, sizeof(check)); | |||
622 | delimiter = *old; | |||
623 | if (delimiter == '\\') | |||
624 | error(COMPILE2, "\\ can not be used as a string delimiter"); | |||
625 | else if (delimiter == '\n' || delimiter == '\0') | |||
626 | error(COMPILE2, "newline can not be used as a string delimiter"); | |||
627 | ||||
628 | new = old++; | |||
629 | do { | |||
630 | if ((new = strchr(new + 1, delimiter)) == NULL((void *)0)) | |||
631 | error(COMPILE2, "unterminated transform source string"); | |||
632 | } while (*(new - 1) == '\\' && *(new -2) != '\\'); | |||
633 | *new = '\0'; | |||
634 | end = new++; | |||
635 | do { | |||
636 | if ((end = strchr(end + 1, delimiter)) == NULL((void *)0)) | |||
637 | error(COMPILE2, "unterminated transform target string"); | |||
638 | } while (*(end -1) == '\\' && *(end -2) != '\\'); | |||
639 | *end = '\0'; | |||
640 | ||||
641 | /* We assume characters are 8 bits */ | |||
642 | *transtab = xmalloc(UCHAR_MAX(127*2 +1) + 1); | |||
643 | for (i = 0; i <= UCHAR_MAX(127*2 +1); i++) | |||
644 | (*transtab)[i] = (char)i; | |||
645 | ||||
646 | while (*old != '\0' && *new != '\0') { | |||
647 | if (*old == '\\') { | |||
648 | old++; | |||
649 | if (*old == 'n') | |||
650 | *old = '\n'; | |||
651 | else if (*old != delimiter && *old != '\\') | |||
652 | error(COMPILE2, "Unexpected character after " | |||
653 | "backslash"); | |||
654 | } | |||
655 | if (*new == '\\') { | |||
656 | new++; | |||
657 | if (*new == 'n') | |||
658 | *new = '\n'; | |||
659 | else if (*new != delimiter && *new != '\\') | |||
660 | error(COMPILE2, "Unexpected character after " | |||
661 | "backslash"); | |||
662 | } | |||
663 | if (check[(u_char) *old] == 1) | |||
664 | error(COMPILE2, "Repeated character in source string"); | |||
665 | check[(u_char) *old] = 1; | |||
666 | (*transtab)[(u_char) *old++] = *new++; | |||
667 | } | |||
668 | if (*old != '\0' || *new != '\0') | |||
669 | error(COMPILE2, "transform strings are not the same length"); | |||
670 | return end + 1; | |||
671 | } | |||
672 | ||||
673 | /* | |||
674 | * Compile the text following an a, c, or i command. | |||
675 | */ | |||
676 | static char * | |||
677 | compile_text(void) | |||
678 | { | |||
679 | size_t asize, size, bufsize; | |||
680 | char *lbuf, *text, *p, *op, *s; | |||
681 | int esc_nl; | |||
682 | ||||
683 | lbuf = text = NULL((void *)0); | |||
684 | asize = size = 0; | |||
685 | while ((p = cu_fgets(&lbuf, &bufsize))) { | |||
686 | size_t len = ROUNDLEN(strlen(p) + 1)(((strlen(p) + 1) + 2048 - 1) & ~(2048 - 1)); | |||
687 | if (asize - size < len) { | |||
688 | do { | |||
689 | asize += len; | |||
690 | } while (asize - size < len); | |||
691 | text = xrealloc(text, asize); | |||
692 | } | |||
693 | op = s = text + size; | |||
694 | for (esc_nl = 0; *p != '\0'; p++) { | |||
695 | if (*p == '\\' && p[1] != '\0' && *++p == '\n') | |||
696 | esc_nl = 1; | |||
697 | *s++ = *p; | |||
| ||||
698 | } | |||
699 | size += s - op; | |||
700 | if (!esc_nl) { | |||
701 | *s = '\0'; | |||
702 | break; | |||
703 | } | |||
704 | } | |||
705 | free(lbuf); | |||
706 | text = xrealloc(text, size + 1); | |||
707 | text[size] = '\0'; | |||
708 | return (text); | |||
709 | } | |||
710 | ||||
711 | /* | |||
712 | * Get an address and return a pointer to the first character after | |||
713 | * it. Fill the structure pointed to according to the address. | |||
714 | */ | |||
715 | static char * | |||
716 | compile_addr(char *p, struct s_addr *a) | |||
717 | { | |||
718 | char *end; | |||
719 | ||||
720 | switch (*p) { | |||
721 | case '\\': /* Context address */ | |||
722 | ++p; | |||
723 | /* FALLTHROUGH */ | |||
724 | case '/': /* Context address */ | |||
725 | p = compile_re(p, &a->u.r); | |||
726 | if (p == NULL((void *)0)) | |||
727 | error(COMPILE2, "unterminated regular expression"); | |||
728 | a->type = AT_RE; | |||
729 | return (p); | |||
730 | ||||
731 | case '$': /* Last line */ | |||
732 | a->type = AT_LAST; | |||
733 | return (p + 1); | |||
734 | /* Line number */ | |||
735 | case '0': case '1': case '2': case '3': case '4': | |||
736 | case '5': case '6': case '7': case '8': case '9': | |||
737 | a->type = AT_LINE; | |||
738 | a->u.l = strtoul(p, &end, 10); | |||
739 | return (end); | |||
740 | default: | |||
741 | error(COMPILE2, "expected context address"); | |||
742 | return (NULL((void *)0)); | |||
743 | } | |||
744 | } | |||
745 | ||||
746 | /* | |||
747 | * duptoeol -- | |||
748 | * Return a copy of all the characters up to \n or \0. | |||
749 | */ | |||
750 | static char * | |||
751 | duptoeol(char *s, char *ctype, char **semi) | |||
752 | { | |||
753 | size_t len; | |||
754 | int ws; | |||
755 | char *start; | |||
756 | ||||
757 | ws = 0; | |||
758 | if (semi) { | |||
759 | for (start = s; *s != '\0' && *s != '\n' && *s != ';'; ++s) | |||
760 | ws = isspace((unsigned char)*s); | |||
761 | } else { | |||
762 | for (start = s; *s != '\0' && *s != '\n'; ++s) | |||
763 | ws = isspace((unsigned char)*s); | |||
764 | *s = '\0'; | |||
765 | } | |||
766 | if (ws) | |||
767 | warning("whitespace after %s", ctype); | |||
768 | len = s - start + 1; | |||
769 | if (semi) | |||
770 | *semi = s; | |||
771 | s = xmalloc(len); | |||
772 | strlcpy(s, start, len); | |||
773 | return (s); | |||
774 | } | |||
775 | ||||
776 | /* | |||
777 | * Convert goto label names to addresses, and count a and r commands, in | |||
778 | * the given subset of the script. Free the memory used by labels in b | |||
779 | * and t commands (but not by :). | |||
780 | * | |||
781 | * TODO: Remove } nodes | |||
782 | */ | |||
783 | static void | |||
784 | fixuplabel(struct s_command *cp, struct s_command *end) | |||
785 | { | |||
786 | ||||
787 | for (; cp != end; cp = cp->next) | |||
788 | switch (cp->code) { | |||
789 | case 'a': | |||
790 | case 'r': | |||
791 | appendnum++; | |||
792 | break; | |||
793 | case 'b': | |||
794 | case 't': | |||
795 | /* Resolve branch target. */ | |||
796 | if (cp->t == NULL((void *)0)) { | |||
797 | cp->u.c = NULL((void *)0); | |||
798 | break; | |||
799 | } | |||
800 | if ((cp->u.c = findlabel(cp->t)) == NULL((void *)0)) | |||
801 | error(COMPILE2, "undefined label '%s'", cp->t); | |||
802 | free(cp->t); | |||
803 | break; | |||
804 | case '{': | |||
805 | /* Do interior commands. */ | |||
806 | fixuplabel(cp->u.c, cp->next); | |||
807 | break; | |||
808 | } | |||
809 | } | |||
810 | ||||
811 | /* | |||
812 | * Associate the given command label for later lookup. | |||
813 | */ | |||
814 | static void | |||
815 | enterlabel(struct s_command *cp) | |||
816 | { | |||
817 | struct labhash **lhp, *lh; | |||
818 | u_char *p; | |||
819 | u_int h, c; | |||
820 | ||||
821 | for (h = 0, p = (u_char *)cp->t; (c = *p) != 0; p++) | |||
822 | h = (h << 5) + h + c; | |||
823 | lhp = &labels[h & LHMASK(128 - 1)]; | |||
824 | for (lh = *lhp; lh != NULL((void *)0); lh = lh->lh_next) | |||
825 | if (lh->lh_hash == h && strcmp(cp->t, lh->lh_cmd->t) == 0) | |||
826 | error(COMPILE2, "duplicate label '%s'", cp->t); | |||
827 | lh = xmalloc(sizeof *lh); | |||
828 | lh->lh_next = *lhp; | |||
829 | lh->lh_hash = h; | |||
830 | lh->lh_cmd = cp; | |||
831 | lh->lh_ref = 0; | |||
832 | *lhp = lh; | |||
833 | } | |||
834 | ||||
835 | /* | |||
836 | * Find the label contained in the command l in the command linked | |||
837 | * list cp. L is excluded from the search. Return NULL if not found. | |||
838 | */ | |||
839 | static struct s_command * | |||
840 | findlabel(char *name) | |||
841 | { | |||
842 | struct labhash *lh; | |||
843 | u_char *p; | |||
844 | u_int h, c; | |||
845 | ||||
846 | for (h = 0, p = (u_char *)name; (c = *p) != 0; p++) | |||
847 | h = (h << 5) + h + c; | |||
848 | for (lh = labels[h & LHMASK(128 - 1)]; lh != NULL((void *)0); lh = lh->lh_next) { | |||
849 | if (lh->lh_hash == h && strcmp(name, lh->lh_cmd->t) == 0) { | |||
850 | lh->lh_ref = 1; | |||
851 | return (lh->lh_cmd); | |||
852 | } | |||
853 | } | |||
854 | return (NULL((void *)0)); | |||
855 | } | |||
856 | ||||
857 | /* | |||
858 | * Warn about any unused labels. As a side effect, release the label hash | |||
859 | * table space. | |||
860 | */ | |||
861 | static void | |||
862 | uselabel(void) | |||
863 | { | |||
864 | struct labhash *lh, *next; | |||
865 | int i; | |||
866 | ||||
867 | for (i = 0; i < LHSZ128; i++) { | |||
868 | for (lh = labels[i]; lh != NULL((void *)0); lh = next) { | |||
869 | next = lh->lh_next; | |||
870 | if (!lh->lh_ref) | |||
871 | warning("unused label '%s'", | |||
872 | lh->lh_cmd->t); | |||
873 | free(lh); | |||
874 | } | |||
875 | } | |||
876 | } |