File: | src/usr.bin/awk/run.c |
Warning: | line 1067, column 7 Value stored to 'n' is never read |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | /* $OpenBSD: run.c,v 1.83 2023/11/28 20:54:38 millert Exp $ */ |
2 | /**************************************************************** |
3 | Copyright (C) Lucent Technologies 1997 |
4 | All Rights Reserved |
5 | |
6 | Permission to use, copy, modify, and distribute this software and |
7 | its documentation for any purpose and without fee is hereby |
8 | granted, provided that the above copyright notice appear in all |
9 | copies and that both that the copyright notice and this |
10 | permission notice and warranty disclaimer appear in supporting |
11 | documentation, and that the name Lucent Technologies or any of |
12 | its entities not be used in advertising or publicity pertaining |
13 | to distribution of the software without specific, written prior |
14 | permission. |
15 | |
16 | LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, |
17 | INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. |
18 | IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY |
19 | SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES |
20 | WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER |
21 | IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, |
22 | ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF |
23 | THIS SOFTWARE. |
24 | ****************************************************************/ |
25 | |
26 | #define DEBUG |
27 | #include <stdio.h> |
28 | #include <ctype.h> |
29 | #include <errno(*__errno()).h> |
30 | #include <wctype.h> |
31 | #include <fcntl.h> |
32 | #include <setjmp.h> |
33 | #include <limits.h> |
34 | #include <math.h> |
35 | #include <string.h> |
36 | #include <stdlib.h> |
37 | #include <time.h> |
38 | #include <sys/types.h> |
39 | #include <sys/wait.h> |
40 | #include "awk.h" |
41 | #include "awkgram.tab.h" |
42 | |
43 | |
44 | static void stdinit(void); |
45 | static void flush_all(void); |
46 | static char *wide_char_to_byte_str(int rune, size_t *outlen); |
47 | |
48 | #if 1 |
49 | #define tempfree(x)do { if (((x)->csub == 4)) tfree(x); } while ( 0) do { if (istemp(x)((x)->csub == 4)) tfree(x); } while (/*CONSTCOND*/0) |
50 | #else |
51 | void tempfree(Cell *p)do { if (((Cell *p)->csub == 4)) tfree(Cell *p); } while ( 0) { |
52 | if (p->ctype == OCELL1 && (p->csub < CUNK0 || p->csub > CFREE7)) { |
53 | WARNING("bad csub %d in Cell %d %s", |
54 | p->csub, p->ctype, p->sval); |
55 | } |
56 | if (istemp(p)((p)->csub == 4)) |
57 | tfree(p); |
58 | } |
59 | #endif |
60 | |
61 | /* do we really need these? */ |
62 | /* #ifdef _NFILE */ |
63 | /* #ifndef FOPEN_MAX */ |
64 | /* #define FOPEN_MAX _NFILE */ |
65 | /* #endif */ |
66 | /* #endif */ |
67 | /* */ |
68 | /* #ifndef FOPEN_MAX */ |
69 | /* #define FOPEN_MAX 40 */ /* max number of open files */ |
70 | /* #endif */ |
71 | /* */ |
72 | /* #ifndef RAND_MAX */ |
73 | /* #define RAND_MAX 32767 */ /* all that ansi guarantees */ |
74 | /* #endif */ |
75 | |
76 | jmp_buf env; |
77 | extern int pairstack[]; |
78 | extern Awkfloat srand_seed; |
79 | |
80 | Node *winner = NULL((void *)0); /* root of parse tree */ |
81 | Cell *tmps; /* free temporary cells for execution */ |
82 | |
83 | static Cell truecell ={ OBOOL2, BTRUE11, 0, 0, 1.0, NUM01, NULL((void *)0), NULL((void *)0) }; |
84 | Cell *True = &truecell; |
85 | static Cell falsecell ={ OBOOL2, BFALSE12, 0, 0, 0.0, NUM01, NULL((void *)0), NULL((void *)0) }; |
86 | Cell *False = &falsecell; |
87 | static Cell breakcell ={ OJUMP3, JBREAK23, 0, 0, 0.0, NUM01, NULL((void *)0), NULL((void *)0) }; |
88 | Cell *jbreak = &breakcell; |
89 | static Cell contcell ={ OJUMP3, JCONT24, 0, 0, 0.0, NUM01, NULL((void *)0), NULL((void *)0) }; |
90 | Cell *jcont = &contcell; |
91 | static Cell nextcell ={ OJUMP3, JNEXT22, 0, 0, 0.0, NUM01, NULL((void *)0), NULL((void *)0) }; |
92 | Cell *jnext = &nextcell; |
93 | static Cell nextfilecell ={ OJUMP3, JNEXTFILE26, 0, 0, 0.0, NUM01, NULL((void *)0), NULL((void *)0) }; |
94 | Cell *jnextfile = &nextfilecell; |
95 | static Cell exitcell ={ OJUMP3, JEXIT21, 0, 0, 0.0, NUM01, NULL((void *)0), NULL((void *)0) }; |
96 | Cell *jexit = &exitcell; |
97 | static Cell retcell ={ OJUMP3, JRET25, 0, 0, 0.0, NUM01, NULL((void *)0), NULL((void *)0) }; |
98 | Cell *jret = &retcell; |
99 | static Cell tempcell ={ OCELL1, CTEMP4, 0, EMPTY, 0.0, NUM01|STR02|DONTFREE04, NULL((void *)0), NULL((void *)0) }; |
100 | |
101 | Node *curnode = NULL((void *)0); /* the node being executed, for debugging */ |
102 | |
103 | /* buffer memory management */ |
104 | int adjbuf(char **pbuf, int *psiz, int minlen, int quantum, char **pbptr, |
105 | const char *whatrtn) |
106 | /* pbuf: address of pointer to buffer being managed |
107 | * psiz: address of buffer size variable |
108 | * minlen: minimum length of buffer needed |
109 | * quantum: buffer size quantum |
110 | * pbptr: address of movable pointer into buffer, or 0 if none |
111 | * whatrtn: name of the calling routine if failure should cause fatal error |
112 | * |
113 | * return 0 for realloc failure, !=0 for success |
114 | */ |
115 | { |
116 | if (minlen > *psiz) { |
117 | char *tbuf; |
118 | int rminlen = quantum ? minlen % quantum : 0; |
119 | int boff = pbptr ? *pbptr - *pbuf : 0; |
120 | /* round up to next multiple of quantum */ |
121 | if (rminlen) |
122 | minlen += quantum - rminlen; |
123 | tbuf = (char *) realloc(*pbuf, minlen); |
124 | DPRINTF("adjbuf %s: %d %d (pbuf=%p, tbuf=%p)\n", whatrtn, *psiz, minlen, (void*)*pbuf, (void*)tbuf)if (dbg) printf("adjbuf %s: %d %d (pbuf=%p, tbuf=%p)\n", whatrtn , *psiz, minlen, (void*)*pbuf, (void*)tbuf); |
125 | if (tbuf == NULL((void *)0)) { |
126 | if (whatrtn) |
127 | FATAL("out of memory in %s", whatrtn); |
128 | return 0; |
129 | } |
130 | *pbuf = tbuf; |
131 | *psiz = minlen; |
132 | if (pbptr) |
133 | *pbptr = tbuf + boff; |
134 | } |
135 | return 1; |
136 | } |
137 | |
138 | void run(Node *a) /* execution of parse tree starts here */ |
139 | { |
140 | |
141 | stdinit(); |
142 | execute(a); |
143 | closeall(); |
144 | } |
145 | |
146 | Cell *execute(Node *u) /* execute a node of the parse tree */ |
147 | { |
148 | Cell *(*proc)(Node **, int); |
149 | Cell *x; |
150 | Node *a; |
151 | |
152 | if (u == NULL((void *)0)) |
153 | return(True); |
154 | for (a = u; ; a = a->nnext) { |
155 | curnode = a; |
156 | if (isvalue(a)((a)->ntype == 1)) { |
157 | x = (Cell *) (a->narg[0]); |
158 | if (isfld(x)((x)->tval & 0100) && !donefld) |
159 | fldbld(); |
160 | else if (isrec(x)((x)->tval & 0200) && !donerec) |
161 | recbld(); |
162 | return(x); |
163 | } |
164 | if (notlegal(a->nobj)(a->nobj <= 257 || a->nobj >= 352 || proctab[a-> nobj-257] == nullproc)) /* probably a Cell* but too risky to print */ |
165 | FATAL("illegal statement"); |
166 | proc = proctab[a->nobj-FIRSTTOKEN257]; |
167 | x = (*proc)(a->narg, a->nobj); |
168 | if (isfld(x)((x)->tval & 0100) && !donefld) |
169 | fldbld(); |
170 | else if (isrec(x)((x)->tval & 0200) && !donerec) |
171 | recbld(); |
172 | if (isexpr(a)((a)->ntype == 3)) |
173 | return(x); |
174 | if (isjump(x)((x)->ctype == 3)) |
175 | return(x); |
176 | if (a->nnext == NULL((void *)0)) |
177 | return(x); |
178 | tempfree(x)do { if (((x)->csub == 4)) tfree(x); } while ( 0); |
179 | } |
180 | } |
181 | |
182 | |
183 | Cell *program(Node **a, int n) /* execute an awk program */ |
184 | { /* a[0] = BEGIN, a[1] = body, a[2] = END */ |
185 | Cell *x; |
186 | |
187 | if (setjmp(env) != 0) |
188 | goto ex; |
189 | if (a[0]) { /* BEGIN */ |
190 | x = execute(a[0]); |
191 | if (isexit(x)((x)->csub == 21)) |
192 | return(True); |
193 | if (isjump(x)((x)->ctype == 3)) |
194 | FATAL("illegal break, continue, next or nextfile from BEGIN"); |
195 | tempfree(x)do { if (((x)->csub == 4)) tfree(x); } while ( 0); |
196 | } |
197 | if (a[1] || a[2]) |
198 | while (getrec(&record, &recsize, true1) > 0) { |
199 | x = execute(a[1]); |
200 | if (isexit(x)((x)->csub == 21)) |
201 | break; |
202 | tempfree(x)do { if (((x)->csub == 4)) tfree(x); } while ( 0); |
203 | } |
204 | ex: |
205 | if (setjmp(env) != 0) /* handles exit within END */ |
206 | goto ex1; |
207 | if (a[2]) { /* END */ |
208 | x = execute(a[2]); |
209 | if (isbreak(x)((x)->csub == 23) || isnext(x)((x)->csub == 22 || (x)->csub == 26) || iscont(x)((x)->csub == 24)) |
210 | FATAL("illegal break, continue, next or nextfile from END"); |
211 | tempfree(x)do { if (((x)->csub == 4)) tfree(x); } while ( 0); |
212 | } |
213 | ex1: |
214 | return(True); |
215 | } |
216 | |
217 | struct Frame { /* stack frame for awk function calls */ |
218 | int nargs; /* number of arguments in this call */ |
219 | Cell *fcncell; /* pointer to Cell for function */ |
220 | Cell **args; /* pointer to array of arguments after execute */ |
221 | Cell *retval; /* return value */ |
222 | }; |
223 | |
224 | #define NARGS50 50 /* max args in a call */ |
225 | |
226 | struct Frame *frame = NULL((void *)0); /* base of stack frames; dynamically allocated */ |
227 | int nframe = 0; /* number of frames allocated */ |
228 | struct Frame *frp = NULL((void *)0); /* frame pointer. bottom level unused */ |
229 | |
230 | Cell *call(Node **a, int n) /* function call. very kludgy and fragile */ |
231 | { |
232 | static const Cell newcopycell = { OCELL1, CCOPY6, 0, EMPTY, 0.0, NUM01|STR02|DONTFREE04, NULL((void *)0), NULL((void *)0) }; |
233 | int i, ncall, ndef; |
234 | int freed = 0; /* handles potential double freeing when fcn & param share a tempcell */ |
235 | Node *x; |
236 | Cell *args[NARGS50], *oargs[NARGS50]; /* BUG: fixed size arrays */ |
237 | Cell *y, *z, *fcn; |
238 | char *s; |
239 | |
240 | fcn = execute(a[0]); /* the function itself */ |
241 | s = fcn->nval; |
242 | if (!isfcn(fcn)((fcn)->tval & 040)) |
243 | FATAL("calling undefined function %s", s); |
244 | if (frame == NULL((void *)0)) { |
245 | frp = frame = (struct Frame *) calloc(nframe += 100, sizeof(*frame)); |
246 | if (frame == NULL((void *)0)) |
247 | FATAL("out of space for stack frames calling %s", s); |
248 | } |
249 | for (ncall = 0, x = a[1]; x != NULL((void *)0); x = x->nnext) /* args in call */ |
250 | ncall++; |
251 | ndef = (int) fcn->fval; /* args in defn */ |
252 | DPRINTF("calling %s, %d args (%d in defn), frp=%d\n", s, ncall, ndef, (int) (frp-frame))if (dbg) printf("calling %s, %d args (%d in defn), frp=%d\n", s, ncall, ndef, (int) (frp-frame)); |
253 | if (ncall > ndef) |
254 | WARNING("function %s called with %d args, uses only %d", |
255 | s, ncall, ndef); |
256 | if (ncall + ndef > NARGS50) |
257 | FATAL("function %s has %d arguments, limit %d", s, ncall+ndef, NARGS50); |
258 | for (i = 0, x = a[1]; x != NULL((void *)0); i++, x = x->nnext) { /* get call args */ |
259 | DPRINTF("evaluate args[%d], frp=%d:\n", i, (int) (frp-frame))if (dbg) printf("evaluate args[%d], frp=%d:\n", i, (int) (frp -frame)); |
260 | y = execute(x); |
261 | oargs[i] = y; |
262 | DPRINTF("args[%d]: %s %f <%s>, t=%o\n",if (dbg) printf("args[%d]: %s %f <%s>, t=%o\n", i, ((y-> nval) ? (y->nval) : "(null)"), y->fval, ((y)->tval & 020) ? "(array)" : ((y->sval) ? (y->sval) : "(null)"), y->tval) |
263 | i, NN(y->nval), y->fval, isarr(y) ? "(array)" : NN(y->sval), y->tval)if (dbg) printf("args[%d]: %s %f <%s>, t=%o\n", i, ((y-> nval) ? (y->nval) : "(null)"), y->fval, ((y)->tval & 020) ? "(array)" : ((y->sval) ? (y->sval) : "(null)"), y->tval); |
264 | if (isfcn(y)((y)->tval & 040)) |
265 | FATAL("can't use function %s as argument in %s", y->nval, s); |
266 | if (isarr(y)((y)->tval & 020)) |
267 | args[i] = y; /* arrays by ref */ |
268 | else |
269 | args[i] = copycell(y); |
270 | tempfree(y)do { if (((y)->csub == 4)) tfree(y); } while ( 0); |
271 | } |
272 | for ( ; i < ndef; i++) { /* add null args for ones not provided */ |
273 | args[i] = gettemp(); |
274 | *args[i] = newcopycell; |
275 | } |
276 | frp++; /* now ok to up frame */ |
277 | if (frp >= frame + nframe) { |
278 | int dfp = frp - frame; /* old index */ |
279 | frame = (struct Frame *) reallocarray(frame, (nframe += 100), sizeof(*frame)); |
280 | if (frame == NULL((void *)0)) |
281 | FATAL("out of space for stack frames in %s", s); |
282 | frp = frame + dfp; |
283 | } |
284 | frp->fcncell = fcn; |
285 | frp->args = args; |
286 | frp->nargs = ndef; /* number defined with (excess are locals) */ |
287 | frp->retval = gettemp(); |
288 | |
289 | DPRINTF("start exec of %s, frp=%d\n", s, (int) (frp-frame))if (dbg) printf("start exec of %s, frp=%d\n", s, (int) (frp-frame )); |
290 | y = execute((Node *)(fcn->sval)); /* execute body */ |
291 | DPRINTF("finished exec of %s, frp=%d\n", s, (int) (frp-frame))if (dbg) printf("finished exec of %s, frp=%d\n", s, (int) (frp -frame)); |
292 | |
293 | for (i = 0; i < ndef; i++) { |
294 | Cell *t = frp->args[i]; |
295 | if (isarr(t)((t)->tval & 020)) { |
296 | if (t->csub == CCOPY6) { |
297 | if (i >= ncall) { |
298 | freesymtab(t); |
299 | t->csub = CTEMP4; |
300 | tempfree(t)do { if (((t)->csub == 4)) tfree(t); } while ( 0); |
301 | } else { |
302 | oargs[i]->tval = t->tval; |
303 | oargs[i]->tval &= ~(STR02|NUM01|DONTFREE04); |
304 | oargs[i]->sval = t->sval; |
305 | tempfree(t)do { if (((t)->csub == 4)) tfree(t); } while ( 0); |
306 | } |
307 | } |
308 | } else if (t != y) { /* kludge to prevent freeing twice */ |
309 | t->csub = CTEMP4; |
310 | tempfree(t)do { if (((t)->csub == 4)) tfree(t); } while ( 0); |
311 | } else if (t == y && t->csub == CCOPY6) { |
312 | t->csub = CTEMP4; |
313 | tempfree(t)do { if (((t)->csub == 4)) tfree(t); } while ( 0); |
314 | freed = 1; |
315 | } |
316 | } |
317 | tempfree(fcn)do { if (((fcn)->csub == 4)) tfree(fcn); } while ( 0); |
318 | if (isexit(y)((y)->csub == 21) || isnext(y)((y)->csub == 22 || (y)->csub == 26)) |
319 | return y; |
320 | if (freed == 0) { |
321 | tempfree(y)do { if (((y)->csub == 4)) tfree(y); } while ( 0); /* don't free twice! */ |
322 | } |
323 | z = frp->retval; /* return value */ |
324 | DPRINTF("%s returns %g |%s| %o\n", s, getfval(z), getsval(z), z->tval)if (dbg) printf("%s returns %g |%s| %o\n", s, getfval(z), getsval (z), z->tval); |
325 | frp--; |
326 | return(z); |
327 | } |
328 | |
329 | Cell *copycell(Cell *x) /* make a copy of a cell in a temp */ |
330 | { |
331 | Cell *y; |
332 | |
333 | /* copy is not constant or field */ |
334 | |
335 | y = gettemp(); |
336 | y->tval = x->tval & ~(CON010|FLD0100|REC0200); |
337 | y->csub = CCOPY6; /* prevents freeing until call is over */ |
338 | y->nval = x->nval; /* BUG? */ |
339 | if (isstr(x)((x)->tval & 02) /* || x->ctype == OCELL */) { |
340 | y->sval = tostring(x->sval); |
341 | y->tval &= ~DONTFREE04; |
342 | } else |
343 | y->tval |= DONTFREE04; |
344 | y->fval = x->fval; |
345 | return y; |
346 | } |
347 | |
348 | Cell *arg(Node **a, int n) /* nth argument of a function */ |
349 | { |
350 | |
351 | n = ptoi(a[0]); /* argument number, counting from 0 */ |
352 | DPRINTF("arg(%d), frp->nargs=%d\n", n, frp->nargs)if (dbg) printf("arg(%d), frp->nargs=%d\n", n, frp->nargs ); |
353 | if (n+1 > frp->nargs) |
354 | FATAL("argument #%d of function %s was not supplied", |
355 | n+1, frp->fcncell->nval); |
356 | return frp->args[n]; |
357 | } |
358 | |
359 | Cell *jump(Node **a, int n) /* break, continue, next, nextfile, return */ |
360 | { |
361 | Cell *y; |
362 | |
363 | switch (n) { |
364 | case EXIT297: |
365 | if (a[0] != NULL((void *)0)) { |
366 | y = execute(a[0]); |
367 | errorflag = (int) getfval(y); |
368 | tempfree(y)do { if (((y)->csub == 4)) tfree(y); } while ( 0); |
369 | } |
370 | longjmp(env, 1); |
371 | case RETURN340: |
372 | if (a[0] != NULL((void *)0)) { |
373 | y = execute(a[0]); |
374 | if ((y->tval & (STR02|NUM01)) == (STR02|NUM01)) { |
375 | setsval(frp->retval, getsval(y)); |
376 | frp->retval->fval = getfval(y); |
377 | frp->retval->tval |= NUM01; |
378 | } |
379 | else if (y->tval & STR02) |
380 | setsval(frp->retval, getsval(y)); |
381 | else if (y->tval & NUM01) |
382 | setfval(frp->retval, getfval(y)); |
383 | else /* can't happen */ |
384 | FATAL("bad type variable %d", y->tval); |
385 | tempfree(y)do { if (((y)->csub == 4)) tfree(y); } while ( 0); |
386 | } |
387 | return(jret); |
388 | case NEXT307: |
389 | return(jnext); |
390 | case NEXTFILE308: |
391 | nextfile(); |
392 | return(jnextfile); |
393 | case BREAK292: |
394 | return(jbreak); |
395 | case CONTINUE294: |
396 | return(jcont); |
397 | default: /* can't happen */ |
398 | FATAL("illegal jump type %d", n); |
399 | } |
400 | return 0; /* not reached */ |
401 | } |
402 | |
403 | Cell *awkgetline(Node **a, int n) /* get next line from specific input */ |
404 | { /* a[0] is variable, a[1] is operator, a[2] is filename */ |
405 | Cell *r, *x; |
406 | extern Cell **fldtab; |
407 | FILE *fp; |
408 | char *buf; |
409 | int bufsize = recsize; |
410 | int mode; |
411 | bool_Bool newflag; |
412 | double result; |
413 | |
414 | if ((buf = (char *) malloc(bufsize)) == NULL((void *)0)) |
415 | FATAL("out of memory in getline"); |
416 | |
417 | fflush(stdout(&__sF[1])); /* in case someone is waiting for a prompt */ |
418 | r = gettemp(); |
419 | if (a[1] != NULL((void *)0)) { /* getline < file */ |
420 | x = execute(a[2]); /* filename */ |
421 | mode = ptoi(a[1]); |
422 | if (mode == '|') /* input pipe */ |
423 | mode = LE286; /* arbitrary flag */ |
424 | fp = openfile(mode, getsval(x), &newflag); |
425 | tempfree(x)do { if (((x)->csub == 4)) tfree(x); } while ( 0); |
426 | if (fp == NULL((void *)0)) |
427 | n = -1; |
428 | else |
429 | n = readrec(&buf, &bufsize, fp, newflag); |
430 | if (n <= 0) { |
431 | ; |
432 | } else if (a[0] != NULL((void *)0)) { /* getline var <file */ |
433 | x = execute(a[0]); |
434 | setsval(x, buf); |
435 | if (is_number(x->sval, & result)is_valid_number(x->sval, 0, ((void *)0), & result)) { |
436 | x->fval = result; |
437 | x->tval |= NUM01; |
438 | } |
439 | tempfree(x)do { if (((x)->csub == 4)) tfree(x); } while ( 0); |
440 | } else { /* getline <file */ |
441 | setsval(fldtab[0], buf); |
442 | if (is_number(fldtab[0]->sval, & result)is_valid_number(fldtab[0]->sval, 0, ((void *)0), & result )) { |
443 | fldtab[0]->fval = result; |
444 | fldtab[0]->tval |= NUM01; |
445 | } |
446 | } |
447 | } else { /* bare getline; use current input */ |
448 | if (a[0] == NULL((void *)0)) /* getline */ |
449 | n = getrec(&record, &recsize, true1); |
450 | else { /* getline var */ |
451 | n = getrec(&buf, &bufsize, false0); |
452 | if (n > 0) { |
453 | x = execute(a[0]); |
454 | setsval(x, buf); |
455 | if (is_number(x->sval, & result)is_valid_number(x->sval, 0, ((void *)0), & result)) { |
456 | x->fval = result; |
457 | x->tval |= NUM01; |
458 | } |
459 | tempfree(x)do { if (((x)->csub == 4)) tfree(x); } while ( 0); |
460 | } |
461 | } |
462 | } |
463 | setfval(r, (Awkfloat) n); |
464 | free(buf); |
465 | return r; |
466 | } |
467 | |
468 | Cell *getnf(Node **a, int n) /* get NF */ |
469 | { |
470 | if (!donefld) |
471 | fldbld(); |
472 | return (Cell *) a[0]; |
473 | } |
474 | |
475 | static char * |
476 | makearraystring(Node *p, const char *func) |
477 | { |
478 | char *buf; |
479 | int bufsz = recsize; |
480 | size_t blen; |
481 | |
482 | if ((buf = (char *) malloc(bufsz)) == NULL((void *)0)) { |
483 | FATAL("%s: out of memory", func); |
484 | } |
485 | |
486 | blen = 0; |
487 | buf[blen] = '\0'; |
488 | |
489 | for (; p; p = p->nnext) { |
490 | Cell *x = execute(p); /* expr */ |
491 | char *s = getsval(x); |
492 | size_t seplen = strlen(getsval(subseploc)); |
493 | size_t nsub = p->nnext ? seplen : 0; |
494 | size_t slen = strlen(s); |
495 | size_t tlen = blen + slen + nsub; |
496 | |
497 | if (!adjbuf(&buf, &bufsz, tlen + 1, recsize, 0, func)) { |
498 | FATAL("%s: out of memory %s[%s...]", |
499 | func, x->nval, buf); |
500 | } |
501 | memcpy(buf + blen, s, slen); |
502 | if (nsub) { |
503 | memcpy(buf + blen + slen, *SUBSEP, nsub); |
504 | } |
505 | buf[tlen] = '\0'; |
506 | blen = tlen; |
507 | tempfree(x)do { if (((x)->csub == 4)) tfree(x); } while ( 0); |
508 | } |
509 | return buf; |
510 | } |
511 | |
512 | Cell *array(Node **a, int n) /* a[0] is symtab, a[1] is list of subscripts */ |
513 | { |
514 | Cell *x, *z; |
515 | char *buf; |
516 | |
517 | x = execute(a[0]); /* Cell* for symbol table */ |
518 | buf = makearraystring(a[1], __func__); |
519 | if (!isarr(x)((x)->tval & 020)) { |
520 | DPRINTF("making %s into an array\n", NN(x->nval))if (dbg) printf("making %s into an array\n", ((x->nval) ? ( x->nval) : "(null)")); |
521 | if (freeable(x)( ((x)->tval & (02|04)) == 02 )) |
522 | xfree(x->sval){ free((void *)(intptr_t)(x->sval)); (x->sval) = ((void *)0); }; |
523 | x->tval &= ~(STR02|NUM01|DONTFREE04); |
524 | x->tval |= ARR020; |
525 | x->sval = (char *) makesymtab(NSYMTAB50); |
526 | } |
527 | z = setsymtab(buf, "", 0.0, STR02|NUM01, (Array *) x->sval); |
528 | z->ctype = OCELL1; |
529 | z->csub = CVAR2; |
530 | tempfree(x)do { if (((x)->csub == 4)) tfree(x); } while ( 0); |
531 | free(buf); |
532 | return(z); |
533 | } |
534 | |
535 | Cell *awkdelete(Node **a, int n) /* a[0] is symtab, a[1] is list of subscripts */ |
536 | { |
537 | Cell *x; |
538 | |
539 | x = execute(a[0]); /* Cell* for symbol table */ |
540 | if (x == symtabloc) { |
541 | FATAL("cannot delete SYMTAB or its elements"); |
542 | } |
543 | if (!isarr(x)((x)->tval & 020)) |
544 | return True; |
545 | if (a[1] == NULL((void *)0)) { /* delete the elements, not the table */ |
546 | freesymtab(x); |
547 | x->tval &= ~STR02; |
548 | x->tval |= ARR020; |
549 | x->sval = (char *) makesymtab(NSYMTAB50); |
550 | } else { |
551 | char *buf = makearraystring(a[1], __func__); |
552 | freeelem(x, buf); |
553 | free(buf); |
554 | } |
555 | tempfree(x)do { if (((x)->csub == 4)) tfree(x); } while ( 0); |
556 | return True; |
557 | } |
558 | |
559 | Cell *intest(Node **a, int n) /* a[0] is index (list), a[1] is symtab */ |
560 | { |
561 | Cell *ap, *k; |
562 | char *buf; |
563 | |
564 | ap = execute(a[1]); /* array name */ |
565 | if (!isarr(ap)((ap)->tval & 020)) { |
566 | DPRINTF("making %s into an array\n", ap->nval)if (dbg) printf("making %s into an array\n", ap->nval); |
567 | if (freeable(ap)( ((ap)->tval & (02|04)) == 02 )) |
568 | xfree(ap->sval){ free((void *)(intptr_t)(ap->sval)); (ap->sval) = ((void *)0); }; |
569 | ap->tval &= ~(STR02|NUM01|DONTFREE04); |
570 | ap->tval |= ARR020; |
571 | ap->sval = (char *) makesymtab(NSYMTAB50); |
572 | } |
573 | buf = makearraystring(a[0], __func__); |
574 | k = lookup(buf, (Array *) ap->sval); |
575 | tempfree(ap)do { if (((ap)->csub == 4)) tfree(ap); } while ( 0); |
576 | free(buf); |
577 | if (k == NULL((void *)0)) |
578 | return(False); |
579 | else |
580 | return(True); |
581 | } |
582 | |
583 | |
584 | /* ======== utf-8 code ========== */ |
585 | |
586 | /* |
587 | * Awk strings can contain ascii, random 8-bit items (eg Latin-1), |
588 | * or utf-8. u8_isutf tests whether a string starts with a valid |
589 | * utf-8 sequence, and returns 0 if not (e.g., high bit set). |
590 | * u8_nextlen returns length of next valid sequence, which is |
591 | * 1 for ascii, 2..4 for utf-8, or 1 for high bit non-utf. |
592 | * u8_strlen returns length of string in valid utf-8 sequences |
593 | * and/or high-bit bytes. Conversion functions go between byte |
594 | * number and character number. |
595 | * |
596 | * In theory, this behaves the same as before for non-utf8 bytes. |
597 | * |
598 | * Limited checking! This is a potential security hole. |
599 | */ |
600 | |
601 | /* is s the beginning of a valid utf-8 string? */ |
602 | /* return length 1..4 if yes, 0 if no */ |
603 | int u8_isutf(const char *s) |
604 | { |
605 | int n, ret; |
606 | unsigned char c; |
607 | |
608 | c = s[0]; |
609 | if (c < 128 || awk_mb_cur_max == 1) |
610 | return 1; /* what if it's 0? */ |
611 | |
612 | n = strlen(s); |
613 | if (n >= 2 && ((c>>5) & 0x7) == 0x6 && (s[1] & 0xC0) == 0x80) { |
614 | ret = 2; /* 110xxxxx 10xxxxxx */ |
615 | } else if (n >= 3 && ((c>>4) & 0xF) == 0xE && (s[1] & 0xC0) == 0x80 |
616 | && (s[2] & 0xC0) == 0x80) { |
617 | ret = 3; /* 1110xxxx 10xxxxxx 10xxxxxx */ |
618 | } else if (n >= 4 && ((c>>3) & 0x1F) == 0x1E && (s[1] & 0xC0) == 0x80 |
619 | && (s[2] & 0xC0) == 0x80 && (s[3] & 0xC0) == 0x80) { |
620 | ret = 4; /* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */ |
621 | } else { |
622 | ret = 0; |
623 | } |
624 | return ret; |
625 | } |
626 | |
627 | /* Convert (prefix of) utf8 string to utf-32 rune. */ |
628 | /* Sets *rune to the value, returns the length. */ |
629 | /* No error checking: watch out. */ |
630 | int u8_rune(int *rune, const char *s) |
631 | { |
632 | int n, ret; |
633 | unsigned char c; |
634 | |
635 | c = s[0]; |
636 | if (c < 128 || awk_mb_cur_max == 1) { |
637 | *rune = c; |
638 | return 1; |
639 | } |
640 | |
641 | n = strlen(s); |
642 | if (n >= 2 && ((c>>5) & 0x7) == 0x6 && (s[1] & 0xC0) == 0x80) { |
643 | *rune = ((c & 0x1F) << 6) | (s[1] & 0x3F); /* 110xxxxx 10xxxxxx */ |
644 | ret = 2; |
645 | } else if (n >= 3 && ((c>>4) & 0xF) == 0xE && (s[1] & 0xC0) == 0x80 |
646 | && (s[2] & 0xC0) == 0x80) { |
647 | *rune = ((c & 0xF) << 12) | ((s[1] & 0x3F) << 6) | (s[2] & 0x3F); |
648 | /* 1110xxxx 10xxxxxx 10xxxxxx */ |
649 | ret = 3; |
650 | } else if (n >= 4 && ((c>>3) & 0x1F) == 0x1E && (s[1] & 0xC0) == 0x80 |
651 | && (s[2] & 0xC0) == 0x80 && (s[3] & 0xC0) == 0x80) { |
652 | *rune = ((c & 0x7) << 18) | ((s[1] & 0x3F) << 12) | ((s[2] & 0x3F) << 6) | (s[3] & 0x3F); |
653 | /* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */ |
654 | ret = 4; |
655 | } else { |
656 | *rune = c; |
657 | ret = 1; |
658 | } |
659 | return ret; /* returns one byte if sequence doesn't look like utf */ |
660 | } |
661 | |
662 | /* return length of next sequence: 1 for ascii or random, 2..4 for valid utf8 */ |
663 | int u8_nextlen(const char *s) |
664 | { |
665 | int len; |
666 | |
667 | len = u8_isutf(s); |
668 | if (len == 0) |
669 | len = 1; |
670 | return len; |
671 | } |
672 | |
673 | /* return number of utf characters or single non-utf bytes */ |
674 | int u8_strlen(const char *s) |
675 | { |
676 | int i, len, n, totlen; |
677 | unsigned char c; |
678 | |
679 | n = strlen(s); |
680 | totlen = 0; |
681 | for (i = 0; i < n; i += len) { |
682 | c = s[i]; |
683 | if (c < 128 || awk_mb_cur_max == 1) { |
684 | len = 1; |
685 | } else { |
686 | len = u8_nextlen(&s[i]); |
687 | } |
688 | totlen++; |
689 | if (i > n) |
690 | FATAL("bad utf count [%s] n=%d i=%d\n", s, n, i); |
691 | } |
692 | return totlen; |
693 | } |
694 | |
695 | /* convert utf-8 char number in a string to its byte offset */ |
696 | int u8_char2byte(const char *s, int charnum) |
697 | { |
698 | int n; |
699 | int bytenum = 0; |
700 | |
701 | while (charnum > 0) { |
702 | n = u8_nextlen(s); |
703 | s += n; |
704 | bytenum += n; |
705 | charnum--; |
706 | } |
707 | return bytenum; |
708 | } |
709 | |
710 | /* convert byte offset in s to utf-8 char number that starts there */ |
711 | int u8_byte2char(const char *s, int bytenum) |
712 | { |
713 | int i, len, b; |
714 | int charnum = 0; /* BUG: what origin? */ |
715 | /* should be 0 to match start==0 which means no match */ |
716 | |
717 | b = strlen(s); |
718 | if (bytenum > b) { |
719 | return -1; /* ??? */ |
720 | } |
721 | for (i = 0; i <= bytenum; i += len) { |
722 | len = u8_nextlen(s+i); |
723 | charnum++; |
724 | } |
725 | return charnum; |
726 | } |
727 | |
728 | /* runetochar() adapted from rune.c in the Plan 9 distributione */ |
729 | |
730 | enum |
731 | { |
732 | Runeerror = 128, /* from somewhere else */ |
733 | Runemax = 0x10FFFF, |
734 | |
735 | Bit1 = 7, |
736 | Bitx = 6, |
737 | Bit2 = 5, |
738 | Bit3 = 4, |
739 | Bit4 = 3, |
740 | Bit5 = 2, |
741 | |
742 | T1 = ((1<<(Bit1+1))-1) ^ 0xFF, /* 0000 0000 */ |
743 | Tx = ((1<<(Bitx+1))-1) ^ 0xFF, /* 1000 0000 */ |
744 | T2 = ((1<<(Bit2+1))-1) ^ 0xFF, /* 1100 0000 */ |
745 | T3 = ((1<<(Bit3+1))-1) ^ 0xFF, /* 1110 0000 */ |
746 | T4 = ((1<<(Bit4+1))-1) ^ 0xFF, /* 1111 0000 */ |
747 | T5 = ((1<<(Bit5+1))-1) ^ 0xFF, /* 1111 1000 */ |
748 | |
749 | Rune1 = (1<<(Bit1+0*Bitx))-1, /* 0000 0000 0000 0000 0111 1111 */ |
750 | Rune2 = (1<<(Bit2+1*Bitx))-1, /* 0000 0000 0000 0111 1111 1111 */ |
751 | Rune3 = (1<<(Bit3+2*Bitx))-1, /* 0000 0000 1111 1111 1111 1111 */ |
752 | Rune4 = (1<<(Bit4+3*Bitx))-1, /* 0011 1111 1111 1111 1111 1111 */ |
753 | |
754 | Maskx = (1<<Bitx)-1, /* 0011 1111 */ |
755 | Testx = Maskx ^ 0xFF, /* 1100 0000 */ |
756 | |
757 | }; |
758 | |
759 | int runetochar(char *str, int c) |
760 | { |
761 | /* one character sequence 00000-0007F => 00-7F */ |
762 | if (c <= Rune1) { |
763 | str[0] = c; |
764 | return 1; |
765 | } |
766 | |
767 | /* two character sequence 00080-007FF => T2 Tx */ |
768 | if (c <= Rune2) { |
769 | str[0] = T2 | (c >> 1*Bitx); |
770 | str[1] = Tx | (c & Maskx); |
771 | return 2; |
772 | } |
773 | |
774 | /* three character sequence 00800-0FFFF => T3 Tx Tx */ |
775 | if (c > Runemax) |
776 | c = Runeerror; |
777 | if (c <= Rune3) { |
778 | str[0] = T3 | (c >> 2*Bitx); |
779 | str[1] = Tx | ((c >> 1*Bitx) & Maskx); |
780 | str[2] = Tx | (c & Maskx); |
781 | return 3; |
782 | } |
783 | |
784 | /* four character sequence 010000-1FFFFF => T4 Tx Tx Tx */ |
785 | str[0] = T4 | (c >> 3*Bitx); |
786 | str[1] = Tx | ((c >> 2*Bitx) & Maskx); |
787 | str[2] = Tx | ((c >> 1*Bitx) & Maskx); |
788 | str[3] = Tx | (c & Maskx); |
789 | return 4; |
790 | } |
791 | |
792 | |
793 | /* ========== end of utf8 code =========== */ |
794 | |
795 | |
796 | |
797 | Cell *matchop(Node **a, int n) /* ~ and match() */ |
798 | { |
799 | Cell *x, *y; |
800 | char *s, *t; |
801 | int i; |
802 | int cstart, cpatlen, len; |
803 | fa *pfa; |
804 | int (*mf)(fa *, const char *) = match, mode = 0; |
805 | |
806 | if (n == MATCHFCN306) { |
807 | mf = pmatch; |
808 | mode = 1; |
809 | } |
810 | x = execute(a[1]); /* a[1] = target text */ |
811 | s = getsval(x); |
812 | if (a[0] == NULL((void *)0)) /* a[1] == 0: already-compiled reg expr */ |
813 | i = (*mf)((fa *) a[2], s); |
814 | else { |
815 | y = execute(a[2]); /* a[2] = regular expr */ |
816 | t = getsval(y); |
817 | pfa = makedfa(t, mode); |
818 | i = (*mf)(pfa, s); |
819 | tempfree(y)do { if (((y)->csub == 4)) tfree(y); } while ( 0); |
820 | } |
821 | tempfree(x)do { if (((x)->csub == 4)) tfree(x); } while ( 0); |
822 | if (n == MATCHFCN306) { |
823 | int start = patbeg - s + 1; /* origin 1 */ |
824 | if (patlen < 0) { |
825 | start = 0; /* not found */ |
826 | } else { |
827 | cstart = u8_byte2char(s, start-1); |
828 | cpatlen = 0; |
829 | for (i = 0; i < patlen; i += len) { |
830 | len = u8_nextlen(patbeg+i); |
831 | cpatlen++; |
832 | } |
833 | |
834 | start = cstart; |
835 | patlen = cpatlen; |
836 | } |
837 | |
838 | setfval(rstartloc, (Awkfloat) start); |
839 | setfval(rlengthloc, (Awkfloat) patlen); |
840 | x = gettemp(); |
841 | x->tval = NUM01; |
842 | x->fval = start; |
843 | return x; |
844 | } else if ((n == MATCH265 && i == 1) || (n == NOTMATCH266 && i == 0)) |
845 | return(True); |
846 | else |
847 | return(False); |
848 | } |
849 | |
850 | |
851 | Cell *boolop(Node **a, int n) /* a[0] || a[1], a[0] && a[1], !a[0] */ |
852 | { |
853 | Cell *x, *y; |
854 | int i; |
855 | |
856 | x = execute(a[0]); |
857 | i = istrue(x)((x)->csub == 11); |
858 | tempfree(x)do { if (((x)->csub == 4)) tfree(x); } while ( 0); |
859 | switch (n) { |
860 | case BOR281: |
861 | if (i) return(True); |
862 | y = execute(a[1]); |
863 | i = istrue(y)((y)->csub == 11); |
864 | tempfree(y)do { if (((y)->csub == 4)) tfree(y); } while ( 0); |
865 | if (i) return(True); |
866 | else return(False); |
867 | case AND280: |
868 | if ( !i ) return(False); |
869 | y = execute(a[1]); |
870 | i = istrue(y)((y)->csub == 11); |
871 | tempfree(y)do { if (((y)->csub == 4)) tfree(y); } while ( 0); |
872 | if (i) return(True); |
873 | else return(False); |
874 | case NOT345: |
875 | if (i) return(False); |
876 | else return(True); |
877 | default: /* can't happen */ |
878 | FATAL("unknown boolean operator %d", n); |
879 | } |
880 | return 0; /*NOTREACHED*/ |
881 | } |
882 | |
883 | Cell *relop(Node **a, int n) /* a[0 < a[1], etc. */ |
884 | { |
885 | int i; |
886 | Cell *x, *y; |
887 | Awkfloat j; |
888 | bool_Bool x_is_nan, y_is_nan; |
889 | |
890 | x = execute(a[0]); |
891 | y = execute(a[1]); |
892 | x_is_nan = isnan(x->fval)((sizeof (x->fval) == sizeof (float)) ? __isnanf(x->fval ) : (sizeof (x->fval) == sizeof (double)) ? __isnan(x-> fval) : __isnanl(x->fval)); |
893 | y_is_nan = isnan(y->fval)((sizeof (y->fval) == sizeof (float)) ? __isnanf(y->fval ) : (sizeof (y->fval) == sizeof (double)) ? __isnan(y-> fval) : __isnanl(y->fval)); |
894 | if (x->tval&NUM01 && y->tval&NUM01) { |
895 | if ((x_is_nan || y_is_nan) && n != NE288) |
896 | return(False); |
897 | j = x->fval - y->fval; |
898 | i = j<0? -1: (j>0? 1: 0); |
899 | } else { |
900 | i = strcmp(getsval(x), getsval(y)); |
901 | } |
902 | tempfree(x)do { if (((x)->csub == 4)) tfree(x); } while ( 0); |
903 | tempfree(y)do { if (((y)->csub == 4)) tfree(y); } while ( 0); |
904 | switch (n) { |
905 | case LT287: if (i<0) return(True); |
906 | else return(False); |
907 | case LE286: if (i<=0) return(True); |
908 | else return(False); |
909 | case NE288: if (x_is_nan && y_is_nan) return(True); |
910 | else if (i!=0) return(True); |
911 | else return(False); |
912 | case EQ283: if (i == 0) return(True); |
913 | else return(False); |
914 | case GE284: if (i>=0) return(True); |
915 | else return(False); |
916 | case GT285: if (i>0) return(True); |
917 | else return(False); |
918 | default: /* can't happen */ |
919 | FATAL("unknown relational operator %d", n); |
920 | } |
921 | return 0; /*NOTREACHED*/ |
922 | } |
923 | |
924 | void tfree(Cell *a) /* free a tempcell */ |
925 | { |
926 | if (freeable(a)( ((a)->tval & (02|04)) == 02 )) { |
927 | DPRINTF("freeing %s %s %o\n", NN(a->nval), NN(a->sval), a->tval)if (dbg) printf("freeing %s %s %o\n", ((a->nval) ? (a-> nval) : "(null)"), ((a->sval) ? (a->sval) : "(null)"), a ->tval); |
928 | xfree(a->sval){ free((void *)(intptr_t)(a->sval)); (a->sval) = ((void *)0); }; |
929 | } |
930 | if (a == tmps) |
931 | FATAL("tempcell list is curdled"); |
932 | a->cnext = tmps; |
933 | tmps = a; |
934 | } |
935 | |
936 | Cell *gettemp(void) /* get a tempcell */ |
937 | { int i; |
938 | Cell *x; |
939 | |
940 | if (!tmps) { |
941 | tmps = (Cell *) calloc(100, sizeof(*tmps)); |
942 | if (!tmps) |
943 | FATAL("out of space for temporaries"); |
944 | for (i = 1; i < 100; i++) |
945 | tmps[i-1].cnext = &tmps[i]; |
946 | tmps[i-1].cnext = NULL((void *)0); |
947 | } |
948 | x = tmps; |
949 | tmps = x->cnext; |
950 | *x = tempcell; |
951 | return(x); |
952 | } |
953 | |
954 | Cell *indirect(Node **a, int n) /* $( a[0] ) */ |
955 | { |
956 | Awkfloat val; |
957 | Cell *x; |
958 | int m; |
959 | char *s; |
960 | |
961 | x = execute(a[0]); |
962 | val = getfval(x); /* freebsd: defend against super large field numbers */ |
963 | if ((Awkfloat)INT_MAX0x7fffffff < val) |
964 | FATAL("trying to access out of range field %s", x->nval); |
965 | m = (int) val; |
966 | if (m == 0 && !is_number(s = getsval(x), NULL)is_valid_number(s = getsval(x), 0, ((void *)0), ((void *)0))) /* suspicion! */ |
967 | FATAL("illegal field $(%s), name \"%s\"", s, x->nval); |
968 | /* BUG: can x->nval ever be null??? */ |
969 | tempfree(x)do { if (((x)->csub == 4)) tfree(x); } while ( 0); |
970 | x = fieldadr(m); |
971 | x->ctype = OCELL1; /* BUG? why are these needed? */ |
972 | x->csub = CFLD1; |
973 | return(x); |
974 | } |
975 | |
976 | Cell *substr(Node **a, int nnn) /* substr(a[0], a[1], a[2]) */ |
977 | { |
978 | int k, m, n; |
979 | int mb, nb; |
980 | char *s; |
981 | int temp; |
982 | Cell *x, *y, *z = NULL((void *)0); |
983 | |
984 | x = execute(a[0]); |
985 | y = execute(a[1]); |
986 | if (a[2] != NULL((void *)0)) |
987 | z = execute(a[2]); |
988 | s = getsval(x); |
989 | k = u8_strlen(s) + 1; |
990 | if (k <= 1) { |
991 | tempfree(x)do { if (((x)->csub == 4)) tfree(x); } while ( 0); |
992 | tempfree(y)do { if (((y)->csub == 4)) tfree(y); } while ( 0); |
993 | if (a[2] != NULL((void *)0)) { |
994 | tempfree(z)do { if (((z)->csub == 4)) tfree(z); } while ( 0); |
995 | } |
996 | x = gettemp(); |
997 | setsval(x, ""); |
998 | return(x); |
999 | } |
1000 | m = (int) getfval(y); |
1001 | if (m <= 0) |
1002 | m = 1; |
1003 | else if (m > k) |
1004 | m = k; |
1005 | tempfree(y)do { if (((y)->csub == 4)) tfree(y); } while ( 0); |
1006 | if (a[2] != NULL((void *)0)) { |
1007 | n = (int) getfval(z); |
1008 | tempfree(z)do { if (((z)->csub == 4)) tfree(z); } while ( 0); |
1009 | } else |
1010 | n = k - 1; |
1011 | if (n < 0) |
1012 | n = 0; |
1013 | else if (n > k - m) |
1014 | n = k - m; |
1015 | /* m is start, n is length from there */ |
1016 | DPRINTF("substr: m=%d, n=%d, s=%s\n", m, n, s)if (dbg) printf("substr: m=%d, n=%d, s=%s\n", m, n, s); |
1017 | y = gettemp(); |
1018 | mb = u8_char2byte(s, m-1); /* byte offset of start char in s */ |
1019 | nb = u8_char2byte(s, m-1+n); /* byte offset of end+1 char in s */ |
1020 | |
1021 | temp = s[nb]; /* with thanks to John Linderman */ |
1022 | s[nb] = '\0'; |
1023 | setsval(y, s + mb); |
1024 | s[nb] = temp; |
1025 | tempfree(x)do { if (((x)->csub == 4)) tfree(x); } while ( 0); |
1026 | return(y); |
1027 | } |
1028 | |
1029 | Cell *sindex(Node **a, int nnn) /* index(a[0], a[1]) */ |
1030 | { |
1031 | Cell *x, *y, *z; |
1032 | char *s1, *s2, *p1, *p2, *q; |
1033 | Awkfloat v = 0.0; |
1034 | |
1035 | x = execute(a[0]); |
1036 | s1 = getsval(x); |
1037 | y = execute(a[1]); |
1038 | s2 = getsval(y); |
1039 | |
1040 | z = gettemp(); |
1041 | for (p1 = s1; *p1 != '\0'; p1++) { |
1042 | for (q = p1, p2 = s2; *p2 != '\0' && *q == *p2; q++, p2++) |
1043 | continue; |
1044 | if (*p2 == '\0') { |
1045 | /* v = (Awkfloat) (p1 - s1 + 1); origin 1 */ |
1046 | |
1047 | /* should be a function: used in match() as well */ |
1048 | int i, len; |
1049 | v = 0; |
1050 | for (i = 0; i < p1-s1+1; i += len) { |
1051 | len = u8_nextlen(s1+i); |
1052 | v++; |
1053 | } |
1054 | break; |
1055 | } |
1056 | } |
1057 | tempfree(x)do { if (((x)->csub == 4)) tfree(x); } while ( 0); |
1058 | tempfree(y)do { if (((y)->csub == 4)) tfree(y); } while ( 0); |
1059 | setfval(z, v); |
1060 | return(z); |
1061 | } |
1062 | |
1063 | int has_utf8(char *s) /* return 1 if s contains any utf-8 (2 bytes or more) character */ |
1064 | { |
1065 | int n; |
1066 | |
1067 | for (n = 0; *s != 0; s += n) { |
Value stored to 'n' is never read | |
1068 | n = u8_nextlen(s); |
1069 | if (n > 1) |
1070 | return 1; |
1071 | } |
1072 | return 0; |
1073 | } |
1074 | |
1075 | #define MAXNUMSIZE50 50 |
1076 | |
1077 | int format(char **pbuf, int *pbufsize, const char *s, Node *a) /* printf-like conversions */ |
1078 | { |
1079 | char *fmt; |
1080 | char *p, *t; |
1081 | const char *os; |
1082 | Cell *x; |
1083 | int flag = 0, n; |
1084 | int fmtwd; /* format width */ |
1085 | int fmtsz = recsize; |
1086 | char *buf = *pbuf; |
1087 | int bufsize = *pbufsize; |
1088 | #define FMTSZ(a)(fmtsz - ((a) - fmt)) (fmtsz - ((a) - fmt)) |
1089 | #define BUFSZ(a)(bufsize - ((a) - buf)) (bufsize - ((a) - buf)) |
1090 | |
1091 | static bool_Bool first = true1; |
1092 | static bool_Bool have_a_format = false0; |
1093 | |
1094 | if (first) { |
1095 | char xbuf[100]; |
1096 | |
1097 | snprintf(xbuf, sizeof(xbuf), "%a", 42.0); |
1098 | have_a_format = (strcmp(xbuf, "0x1.5p+5") == 0); |
1099 | first = false0; |
1100 | } |
1101 | |
1102 | os = s; |
1103 | p = buf; |
1104 | if ((fmt = (char *) malloc(fmtsz)) == NULL((void *)0)) |
1105 | FATAL("out of memory in format()"); |
1106 | while (*s) { |
1107 | adjbuf(&buf, &bufsize, MAXNUMSIZE50+1+p-buf, recsize, &p, "format1"); |
1108 | if (*s != '%') { |
1109 | *p++ = *s++; |
1110 | continue; |
1111 | } |
1112 | if (*(s+1) == '%') { |
1113 | *p++ = '%'; |
1114 | s += 2; |
1115 | continue; |
1116 | } |
1117 | fmtwd = atoi(s+1); |
1118 | if (fmtwd < 0) |
1119 | fmtwd = -fmtwd; |
1120 | adjbuf(&buf, &bufsize, fmtwd+1+p-buf, recsize, &p, "format2"); |
1121 | for (t = fmt; (*t++ = *s) != '\0'; s++) { |
1122 | if (!adjbuf(&fmt, &fmtsz, MAXNUMSIZE50+1+t-fmt, recsize, &t, "format3")) |
1123 | FATAL("format item %.30s... ran format() out of memory", os); |
1124 | /* Ignore size specifiers */ |
1125 | if (strchr("hjLlqtz", *s) != NULL((void *)0)) { /* the ansi panoply */ |
1126 | t--; |
1127 | continue; |
1128 | } |
1129 | if (isalpha((uschar)*s)) |
1130 | break; |
1131 | if (*s == '$') { |
1132 | FATAL("'$' not permitted in awk formats"); |
1133 | } |
1134 | if (*s == '*') { |
1135 | if (a == NULL((void *)0)) { |
1136 | FATAL("not enough args in printf(%s)", os); |
1137 | } |
1138 | x = execute(a); |
1139 | a = a->nnext; |
1140 | snprintf(t - 1, FMTSZ(t - 1)(fmtsz - ((t - 1) - fmt)), |
1141 | "%d", fmtwd=(int) getfval(x)); |
1142 | if (fmtwd < 0) |
1143 | fmtwd = -fmtwd; |
1144 | adjbuf(&buf, &bufsize, fmtwd+1+p-buf, recsize, &p, "format"); |
1145 | t = fmt + strlen(fmt); |
1146 | tempfree(x)do { if (((x)->csub == 4)) tfree(x); } while ( 0); |
1147 | } |
1148 | } |
1149 | *t = '\0'; |
1150 | if (fmtwd < 0) |
1151 | fmtwd = -fmtwd; |
1152 | adjbuf(&buf, &bufsize, fmtwd+1+p-buf, recsize, &p, "format4"); |
1153 | switch (*s) { |
1154 | case 'a': case 'A': |
1155 | if (have_a_format) |
1156 | flag = *s; |
1157 | else |
1158 | flag = 'f'; |
1159 | break; |
1160 | case 'f': case 'e': case 'g': case 'E': case 'G': |
1161 | flag = 'f'; |
1162 | break; |
1163 | case 'd': case 'i': case 'o': case 'x': case 'X': case 'u': |
1164 | flag = (*s == 'd' || *s == 'i') ? 'd' : 'u'; |
1165 | *(t-1) = 'j'; |
1166 | *t = *s; |
1167 | *++t = '\0'; |
1168 | break; |
1169 | case 's': |
1170 | flag = 's'; |
1171 | break; |
1172 | case 'c': |
1173 | flag = 'c'; |
1174 | break; |
1175 | default: |
1176 | WARNING("weird printf conversion %s", fmt); |
1177 | flag = '?'; |
1178 | break; |
1179 | } |
1180 | if (a == NULL((void *)0)) |
1181 | FATAL("not enough args in printf(%s)", os); |
1182 | x = execute(a); |
1183 | a = a->nnext; |
1184 | n = MAXNUMSIZE50; |
1185 | if (fmtwd > n) |
1186 | n = fmtwd; |
1187 | adjbuf(&buf, &bufsize, 1+n+p-buf, recsize, &p, "format5"); |
1188 | switch (flag) { |
1189 | case '?': |
1190 | snprintf(p, BUFSZ(p)(bufsize - ((p) - buf)), "%s", fmt); /* unknown, so dump it too */ |
1191 | t = getsval(x); |
1192 | n = strlen(t); |
1193 | if (fmtwd > n) |
1194 | n = fmtwd; |
1195 | adjbuf(&buf, &bufsize, 1+strlen(p)+n+p-buf, recsize, &p, "format6"); |
1196 | p += strlen(p); |
1197 | snprintf(p, BUFSZ(p)(bufsize - ((p) - buf)), "%s", t); |
1198 | break; |
1199 | case 'a': |
1200 | case 'A': |
1201 | case 'f': snprintf(p, BUFSZ(p)(bufsize - ((p) - buf)), fmt, getfval(x)); break; |
1202 | case 'd': snprintf(p, BUFSZ(p)(bufsize - ((p) - buf)), fmt, (intmax_t) getfval(x)); break; |
1203 | case 'u': snprintf(p, BUFSZ(p)(bufsize - ((p) - buf)), fmt, (uintmax_t) getfval(x)); break; |
1204 | |
1205 | case 's': { |
1206 | t = getsval(x); |
1207 | n = strlen(t); |
1208 | /* if simple format or no utf-8 in the string, sprintf works */ |
1209 | if (!has_utf8(t) || strcmp(fmt,"%s") == 0) { |
1210 | if (fmtwd > n) |
1211 | n = fmtwd; |
1212 | if (!adjbuf(&buf, &bufsize, 1+n+p-buf, recsize, &p, "format7")) |
1213 | FATAL("huge string/format (%d chars) in printf %.30s..." \ |
1214 | " ran format() out of memory", n, t); |
1215 | snprintf(p, BUFSZ(p)(bufsize - ((p) - buf)), fmt, t); |
1216 | break; |
1217 | } |
1218 | |
1219 | /* get here if string has utf-8 chars and fmt is not plain %s */ |
1220 | /* "%-w.ps", where -, w and .p are all optional */ |
1221 | /* '0' before the w is a flag character */ |
1222 | /* fmt points at % */ |
1223 | int ljust = 0, wid = 0, prec = n, pad = 0; |
1224 | char *f = fmt+1; |
1225 | if (f[0] == '-') { |
1226 | ljust = 1; |
1227 | f++; |
1228 | } |
1229 | // flags '0' and '+' are recognized but skipped |
1230 | if (f[0] == '0') { |
1231 | f++; |
1232 | if (f[0] == '+') |
1233 | f++; |
1234 | } |
1235 | if (f[0] == '+') { |
1236 | f++; |
1237 | if (f[0] == '0') |
1238 | f++; |
1239 | } |
1240 | if (isdigit((uschar)f[0])) { /* there is a wid */ |
1241 | wid = strtol(f, &f, 10); |
1242 | } |
1243 | if (f[0] == '.') { /* there is a .prec */ |
1244 | prec = strtol(++f, &f, 10); |
1245 | } |
1246 | if (prec > u8_strlen(t)) |
1247 | prec = u8_strlen(t); |
1248 | pad = wid>prec ? wid - prec : 0; // has to be >= 0 |
1249 | int i, k, n; |
1250 | |
1251 | if (ljust) { // print prec chars from t, then pad blanks |
1252 | n = u8_char2byte(t, prec); |
1253 | for (k = 0; k < n; k++) { |
1254 | //putchar(t[k]); |
1255 | *p++ = t[k]; |
1256 | } |
1257 | for (i = 0; i < pad; i++) { |
1258 | //printf(" "); |
1259 | *p++ = ' '; |
1260 | } |
1261 | } else { // print pad blanks, then prec chars from t |
1262 | for (i = 0; i < pad; i++) { |
1263 | //printf(" "); |
1264 | *p++ = ' '; |
1265 | } |
1266 | n = u8_char2byte(t, prec); |
1267 | for (k = 0; k < n; k++) { |
1268 | //putchar(t[k]); |
1269 | *p++ = t[k]; |
1270 | } |
1271 | } |
1272 | *p = 0; |
1273 | break; |
1274 | } |
1275 | |
1276 | case 'c': { |
1277 | /* |
1278 | * If a numeric value is given, awk should just turn |
1279 | * it into a character and print it: |
1280 | * BEGIN { printf("%c\n", 65) } |
1281 | * prints "A". |
1282 | * |
1283 | * But what if the numeric value is > 128 and |
1284 | * represents a valid Unicode code point?!? We do |
1285 | * our best to convert it back into UTF-8. If we |
1286 | * can't, we output the encoding of the Unicode |
1287 | * "invalid character", 0xFFFD. |
1288 | */ |
1289 | if (isnum(x)((x)->tval & 01)) { |
1290 | int charval = (int) getfval(x); |
1291 | |
1292 | if (charval != 0) { |
1293 | if (charval < 128 || awk_mb_cur_max == 1) |
1294 | snprintf(p, BUFSZ(p)(bufsize - ((p) - buf)), fmt, charval); |
1295 | else { |
1296 | // possible unicode character |
1297 | size_t count; |
1298 | char *bs = wide_char_to_byte_str(charval, &count); |
1299 | |
1300 | if (bs == NULL((void *)0)) { // invalid character |
1301 | // use unicode invalid character, 0xFFFD |
1302 | bs = "\357\277\275"; |
1303 | count = 3; |
1304 | } |
1305 | t = bs; |
1306 | n = count; |
1307 | goto format_percent_c; |
1308 | } |
1309 | } else { |
1310 | *p++ = '\0'; /* explicit null byte */ |
1311 | *p = '\0'; /* next output will start here */ |
1312 | } |
1313 | break; |
1314 | } |
1315 | t = getsval(x); |
1316 | n = u8_nextlen(t); |
1317 | format_percent_c: |
1318 | if (n < 2) { /* not utf8 */ |
1319 | snprintf(p, BUFSZ(p)(bufsize - ((p) - buf)), fmt, getsval(x)[0]); |
1320 | break; |
1321 | } |
1322 | |
1323 | // utf8 character, almost same song and dance as for %s |
1324 | int ljust = 0, wid = 0, prec = n, pad = 0; |
1325 | char *f = fmt+1; |
1326 | if (f[0] == '-') { |
1327 | ljust = 1; |
1328 | f++; |
1329 | } |
1330 | // flags '0' and '+' are recognized but skipped |
1331 | if (f[0] == '0') { |
1332 | f++; |
1333 | if (f[0] == '+') |
1334 | f++; |
1335 | } |
1336 | if (f[0] == '+') { |
1337 | f++; |
1338 | if (f[0] == '0') |
1339 | f++; |
1340 | } |
1341 | if (isdigit((uschar)f[0])) { /* there is a wid */ |
1342 | wid = strtol(f, &f, 10); |
1343 | } |
1344 | if (f[0] == '.') { /* there is a .prec */ |
1345 | prec = strtol(++f, &f, 10); |
1346 | } |
1347 | if (prec > 1) // %c --> only one character |
1348 | prec = 1; |
1349 | pad = wid>prec ? wid - prec : 0; // has to be >= 0 |
1350 | int i; |
1351 | |
1352 | if (ljust) { // print one char from t, then pad blanks |
1353 | for (i = 0; i < n; i++) |
1354 | *p++ = t[i]; |
1355 | for (i = 0; i < pad; i++) { |
1356 | //printf(" "); |
1357 | *p++ = ' '; |
1358 | } |
1359 | } else { // print pad blanks, then prec chars from t |
1360 | for (i = 0; i < pad; i++) { |
1361 | //printf(" "); |
1362 | *p++ = ' '; |
1363 | } |
1364 | for (i = 0; i < n; i++) |
1365 | *p++ = t[i]; |
1366 | } |
1367 | *p = 0; |
1368 | break; |
1369 | } |
1370 | default: |
1371 | FATAL("can't happen: bad conversion %c in format()", flag); |
1372 | } |
1373 | |
1374 | tempfree(x)do { if (((x)->csub == 4)) tfree(x); } while ( 0); |
1375 | p += strlen(p); |
1376 | s++; |
1377 | } |
1378 | *p = '\0'; |
1379 | free(fmt); |
1380 | for ( ; a; a = a->nnext) { /* evaluate any remaining args */ |
1381 | x = execute(a); |
1382 | tempfree(x)do { if (((x)->csub == 4)) tfree(x); } while ( 0); |
1383 | } |
1384 | *pbuf = buf; |
1385 | *pbufsize = bufsize; |
1386 | return p - buf; |
1387 | } |
1388 | |
1389 | Cell *awksprintf(Node **a, int n) /* sprintf(a[0]) */ |
1390 | { |
1391 | Cell *x; |
1392 | Node *y; |
1393 | char *buf; |
1394 | int bufsz=3*recsize; |
1395 | |
1396 | if ((buf = (char *) malloc(bufsz)) == NULL((void *)0)) |
1397 | FATAL("out of memory in awksprintf"); |
1398 | y = a[0]->nnext; |
1399 | x = execute(a[0]); |
1400 | if (format(&buf, &bufsz, getsval(x), y) == -1) |
1401 | FATAL("sprintf string %.30s... too long. can't happen.", buf); |
1402 | tempfree(x)do { if (((x)->csub == 4)) tfree(x); } while ( 0); |
1403 | x = gettemp(); |
1404 | x->sval = buf; |
1405 | x->tval = STR02; |
1406 | return(x); |
1407 | } |
1408 | |
1409 | Cell *awkprintf(Node **a, int n) /* printf */ |
1410 | { /* a[0] is list of args, starting with format string */ |
1411 | /* a[1] is redirection operator, a[2] is redirection file */ |
1412 | FILE *fp; |
1413 | Cell *x; |
1414 | Node *y; |
1415 | char *buf; |
1416 | int len; |
1417 | int bufsz=3*recsize; |
1418 | |
1419 | if ((buf = (char *) malloc(bufsz)) == NULL((void *)0)) |
1420 | FATAL("out of memory in awkprintf"); |
1421 | y = a[0]->nnext; |
1422 | x = execute(a[0]); |
1423 | if ((len = format(&buf, &bufsz, getsval(x), y)) == -1) |
1424 | FATAL("printf string %.30s... too long. can't happen.", buf); |
1425 | tempfree(x)do { if (((x)->csub == 4)) tfree(x); } while ( 0); |
1426 | if (a[1] == NULL((void *)0)) { |
1427 | /* fputs(buf, stdout); */ |
1428 | fwrite(buf, len, 1, stdout(&__sF[1])); |
1429 | if (ferror(stdout)(!__isthreaded ? ((((&__sF[1]))->_flags & 0x0040) != 0) : (ferror)((&__sF[1])))) |
1430 | FATAL("write error on stdout"); |
1431 | } else { |
1432 | fp = redirect(ptoi(a[1]), a[2]); |
1433 | /* fputs(buf, fp); */ |
1434 | fwrite(buf, len, 1, fp); |
1435 | fflush(fp); |
1436 | if (ferror(fp)(!__isthreaded ? (((fp)->_flags & 0x0040) != 0) : (ferror )(fp))) |
1437 | FATAL("write error on %s", filename(fp)); |
1438 | } |
1439 | free(buf); |
1440 | return(True); |
1441 | } |
1442 | |
1443 | Cell *arith(Node **a, int n) /* a[0] + a[1], etc. also -a[0] */ |
1444 | { |
1445 | Awkfloat i, j = 0; |
1446 | double v; |
1447 | Cell *x, *y, *z; |
1448 | |
1449 | x = execute(a[0]); |
1450 | i = getfval(x); |
1451 | tempfree(x)do { if (((x)->csub == 4)) tfree(x); } while ( 0); |
1452 | if (n != UMINUS346 && n != UPLUS347) { |
1453 | y = execute(a[1]); |
1454 | j = getfval(y); |
1455 | tempfree(y)do { if (((y)->csub == 4)) tfree(y); } while ( 0); |
1456 | } |
1457 | z = gettemp(); |
1458 | switch (n) { |
1459 | case ADD309: |
1460 | i += j; |
1461 | break; |
1462 | case MINUS310: |
1463 | i -= j; |
1464 | break; |
1465 | case MULT311: |
1466 | i *= j; |
1467 | break; |
1468 | case DIVIDE312: |
1469 | if (j == 0) |
1470 | FATAL("division by zero"); |
1471 | i /= j; |
1472 | break; |
1473 | case MOD313: |
1474 | if (j == 0) |
1475 | FATAL("division by zero in mod"); |
1476 | modf(i/j, &v); |
1477 | i = i - j * v; |
1478 | break; |
1479 | case UMINUS346: |
1480 | i = -i; |
1481 | break; |
1482 | case UPLUS347: /* handled by getfval(), above */ |
1483 | break; |
1484 | case POWER348: |
1485 | if (j >= 0 && modf(j, &v) == 0.0) /* pos integer exponent */ |
1486 | i = ipow(i, (int) j); |
1487 | else { |
1488 | errno(*__errno()) = 0; |
1489 | i = errcheck(pow(i, j), "pow"); |
1490 | } |
1491 | break; |
1492 | default: /* can't happen */ |
1493 | FATAL("illegal arithmetic operator %d", n); |
1494 | } |
1495 | setfval(z, i); |
1496 | return(z); |
1497 | } |
1498 | |
1499 | double ipow(double x, int n) /* x**n. ought to be done by pow, but isn't always */ |
1500 | { |
1501 | double v; |
1502 | |
1503 | if (n <= 0) |
1504 | return 1; |
1505 | v = ipow(x, n/2); |
1506 | if (n % 2 == 0) |
1507 | return v * v; |
1508 | else |
1509 | return x * v * v; |
1510 | } |
1511 | |
1512 | Cell *incrdecr(Node **a, int n) /* a[0]++, etc. */ |
1513 | { |
1514 | Cell *x, *z; |
1515 | int k; |
1516 | Awkfloat xf; |
1517 | |
1518 | x = execute(a[0]); |
1519 | xf = getfval(x); |
1520 | k = (n == PREINCR329 || n == POSTINCR328) ? 1 : -1; |
1521 | if (n == PREINCR329 || n == PREDECR331) { |
1522 | setfval(x, xf + k); |
1523 | return(x); |
1524 | } |
1525 | z = gettemp(); |
1526 | setfval(z, xf); |
1527 | setfval(x, xf + k); |
1528 | tempfree(x)do { if (((x)->csub == 4)) tfree(x); } while ( 0); |
1529 | return(z); |
1530 | } |
1531 | |
1532 | Cell *assign(Node **a, int n) /* a[0] = a[1], a[0] += a[1], etc. */ |
1533 | { /* this is subtle; don't muck with it. */ |
1534 | Cell *x, *y; |
1535 | Awkfloat xf, yf; |
1536 | double v; |
1537 | |
1538 | y = execute(a[1]); |
1539 | x = execute(a[0]); |
1540 | if (n == ASSIGN314) { /* ordinary assignment */ |
1541 | if (x == y && !(x->tval & (FLD0100|REC0200)) && x != nfloc) |
1542 | ; /* self-assignment: leave alone unless it's a field or NF */ |
1543 | else if ((y->tval & (STR02|NUM01)) == (STR02|NUM01)) { |
1544 | yf = getfval(y); |
1545 | setsval(x, getsval(y)); |
1546 | x->fval = yf; |
1547 | x->tval |= NUM01; |
1548 | } |
1549 | else if (isstr(y)((y)->tval & 02)) |
1550 | setsval(x, getsval(y)); |
1551 | else if (isnum(y)((y)->tval & 01)) |
1552 | setfval(x, getfval(y)); |
1553 | else |
1554 | funnyvar(y, "read value of"); |
1555 | tempfree(y)do { if (((y)->csub == 4)) tfree(y); } while ( 0); |
1556 | return(x); |
1557 | } |
1558 | xf = getfval(x); |
1559 | yf = getfval(y); |
1560 | switch (n) { |
1561 | case ADDEQ316: |
1562 | xf += yf; |
1563 | break; |
1564 | case SUBEQ317: |
1565 | xf -= yf; |
1566 | break; |
1567 | case MULTEQ318: |
1568 | xf *= yf; |
1569 | break; |
1570 | case DIVEQ319: |
1571 | if (yf == 0) |
1572 | FATAL("division by zero in /="); |
1573 | xf /= yf; |
1574 | break; |
1575 | case MODEQ320: |
1576 | if (yf == 0) |
1577 | FATAL("division by zero in %%="); |
1578 | modf(xf/yf, &v); |
1579 | xf = xf - yf * v; |
1580 | break; |
1581 | case POWEQ321: |
1582 | if (yf >= 0 && modf(yf, &v) == 0.0) /* pos integer exponent */ |
1583 | xf = ipow(xf, (int) yf); |
1584 | else { |
1585 | errno(*__errno()) = 0; |
1586 | xf = errcheck(pow(xf, yf), "pow"); |
1587 | } |
1588 | break; |
1589 | default: |
1590 | FATAL("illegal assignment operator %d", n); |
1591 | break; |
1592 | } |
1593 | tempfree(y)do { if (((y)->csub == 4)) tfree(y); } while ( 0); |
1594 | setfval(x, xf); |
1595 | return(x); |
1596 | } |
1597 | |
1598 | Cell *cat(Node **a, int q) /* a[0] cat a[1] */ |
1599 | { |
1600 | Cell *x, *y, *z; |
1601 | int n1, n2; |
1602 | char *s = NULL((void *)0); |
1603 | int ssz = 0; |
1604 | |
1605 | x = execute(a[0]); |
1606 | n1 = strlen(getsval(x)); |
1607 | adjbuf(&s, &ssz, n1 + 1, recsize, 0, "cat1"); |
1608 | memcpy(s, x->sval, n1); |
1609 | |
1610 | tempfree(x)do { if (((x)->csub == 4)) tfree(x); } while ( 0); |
1611 | |
1612 | y = execute(a[1]); |
1613 | n2 = strlen(getsval(y)); |
1614 | adjbuf(&s, &ssz, n1 + n2 + 1, recsize, 0, "cat2"); |
1615 | memcpy(s + n1, y->sval, n2); |
1616 | s[n1 + n2] = '\0'; |
1617 | |
1618 | tempfree(y)do { if (((y)->csub == 4)) tfree(y); } while ( 0); |
1619 | |
1620 | z = gettemp(); |
1621 | z->sval = s; |
1622 | z->tval = STR02; |
1623 | |
1624 | return(z); |
1625 | } |
1626 | |
1627 | Cell *pastat(Node **a, int n) /* a[0] { a[1] } */ |
1628 | { |
1629 | Cell *x; |
1630 | |
1631 | if (a[0] == NULL((void *)0)) |
1632 | x = execute(a[1]); |
1633 | else { |
1634 | x = execute(a[0]); |
1635 | if (istrue(x)((x)->csub == 11)) { |
1636 | tempfree(x)do { if (((x)->csub == 4)) tfree(x); } while ( 0); |
1637 | x = execute(a[1]); |
1638 | } |
1639 | } |
1640 | return x; |
1641 | } |
1642 | |
1643 | Cell *dopa2(Node **a, int n) /* a[0], a[1] { a[2] } */ |
1644 | { |
1645 | Cell *x; |
1646 | int pair; |
1647 | |
1648 | pair = ptoi(a[3]); |
1649 | if (pairstack[pair] == 0) { |
1650 | x = execute(a[0]); |
1651 | if (istrue(x)((x)->csub == 11)) |
1652 | pairstack[pair] = 1; |
1653 | tempfree(x)do { if (((x)->csub == 4)) tfree(x); } while ( 0); |
1654 | } |
1655 | if (pairstack[pair] == 1) { |
1656 | x = execute(a[1]); |
1657 | if (istrue(x)((x)->csub == 11)) |
1658 | pairstack[pair] = 0; |
1659 | tempfree(x)do { if (((x)->csub == 4)) tfree(x); } while ( 0); |
1660 | x = execute(a[2]); |
1661 | return(x); |
1662 | } |
1663 | return(False); |
1664 | } |
1665 | |
1666 | Cell *split(Node **a, int nnn) /* split(a[0], a[1], a[2]); a[3] is type */ |
1667 | { |
1668 | Cell *x = NULL((void *)0), *y, *ap; |
1669 | const char *s, *origs, *t; |
1670 | const char *fs = NULL((void *)0); |
1671 | char *origfs = NULL((void *)0); |
1672 | int sep; |
1673 | char temp, num[50]; |
1674 | int j, n, tempstat, arg3type; |
1675 | double result; |
1676 | |
1677 | y = execute(a[0]); /* source string */ |
1678 | origs = s = strdup(getsval(y)); |
1679 | if (s == NULL((void *)0)) |
1680 | FATAL("out of space in split"); |
1681 | tempfree(y)do { if (((y)->csub == 4)) tfree(y); } while ( 0); |
1682 | arg3type = ptoi(a[3]); |
1683 | if (a[2] == NULL((void *)0)) { /* BUG: CSV should override implicit fs but not explicit */ |
1684 | fs = getsval(fsloc); |
1685 | } else if (arg3type == STRING337) { /* split(str,arr,"string") */ |
1686 | x = execute(a[2]); |
1687 | fs = origfs = strdup(getsval(x)); |
1688 | if (fs == NULL((void *)0)) |
1689 | FATAL("out of space in split"); |
1690 | tempfree(x)do { if (((x)->csub == 4)) tfree(x); } while ( 0); |
1691 | } else if (arg3type == REGEXPR338) { |
1692 | fs = "(regexpr)"; /* split(str,arr,/regexpr/) */ |
1693 | } else { |
1694 | FATAL("illegal type of split"); |
1695 | } |
1696 | sep = *fs; |
1697 | ap = execute(a[1]); /* array name */ |
1698 | /* BUG 7/26/22: this appears not to reset array: see C1/asplit */ |
1699 | freesymtab(ap); |
1700 | DPRINTF("split: s=|%s|, a=%s, sep=|%s|\n", s, NN(ap->nval), fs)if (dbg) printf("split: s=|%s|, a=%s, sep=|%s|\n", s, ((ap-> nval) ? (ap->nval) : "(null)"), fs); |
1701 | ap->tval &= ~STR02; |
1702 | ap->tval |= ARR020; |
1703 | ap->sval = (char *) makesymtab(NSYMTAB50); |
1704 | |
1705 | n = 0; |
1706 | if (arg3type == REGEXPR338 && strlen((char*)((fa*)a[2])->restr) == 0) { |
1707 | /* split(s, a, //); have to arrange that it looks like empty sep */ |
1708 | arg3type = 0; |
1709 | fs = ""; |
1710 | sep = 0; |
1711 | } |
1712 | if (*s != '\0' && (strlen(fs) > 1 || arg3type == REGEXPR338)) { /* reg expr */ |
1713 | fa *pfa; |
1714 | if (arg3type == REGEXPR338) { /* it's ready already */ |
1715 | pfa = (fa *) a[2]; |
1716 | } else { |
1717 | pfa = makedfa(fs, 1); |
1718 | } |
1719 | if (nematch(pfa,s)) { |
1720 | tempstat = pfa->initstat; |
1721 | pfa->initstat = 2; |
1722 | do { |
1723 | n++; |
1724 | snprintf(num, sizeof(num), "%d", n); |
1725 | temp = *patbeg; |
1726 | setptr(patbeg, '\0')(*(char *)(intptr_t)(patbeg)) = ('\0'); |
1727 | if (is_number(s, & result)is_valid_number(s, 0, ((void *)0), & result)) |
1728 | setsymtab(num, s, result, STR02|NUM01, (Array *) ap->sval); |
1729 | else |
1730 | setsymtab(num, s, 0.0, STR02, (Array *) ap->sval); |
1731 | setptr(patbeg, temp)(*(char *)(intptr_t)(patbeg)) = (temp); |
1732 | s = patbeg + patlen; |
1733 | if (*(patbeg+patlen-1) == '\0' || *s == '\0') { |
1734 | n++; |
1735 | snprintf(num, sizeof(num), "%d", n); |
1736 | setsymtab(num, "", 0.0, STR02, (Array *) ap->sval); |
1737 | pfa->initstat = tempstat; |
1738 | goto spdone; |
1739 | } |
1740 | } while (nematch(pfa,s)); |
1741 | pfa->initstat = tempstat; /* bwk: has to be here to reset */ |
1742 | /* cf gsub and refldbld */ |
1743 | } |
1744 | n++; |
1745 | snprintf(num, sizeof(num), "%d", n); |
1746 | if (is_number(s, & result)is_valid_number(s, 0, ((void *)0), & result)) |
1747 | setsymtab(num, s, result, STR02|NUM01, (Array *) ap->sval); |
1748 | else |
1749 | setsymtab(num, s, 0.0, STR02, (Array *) ap->sval); |
1750 | spdone: |
1751 | pfa = NULL((void *)0); |
1752 | |
1753 | } else if (a[2] == NULL((void *)0) && CSV) { /* CSV only if no explicit separator */ |
1754 | char *newt = (char *) malloc(strlen(s)); /* for building new string; reuse for each field */ |
1755 | for (;;) { |
1756 | char *fr = newt; |
1757 | n++; |
1758 | if (*s == '"' ) { /* start of "..." */ |
1759 | for (s++ ; *s != '\0'; ) { |
1760 | if (*s == '"' && s[1] != '\0' && s[1] == '"') { |
1761 | s += 2; /* doubled quote */ |
1762 | *fr++ = '"'; |
1763 | } else if (*s == '"' && (s[1] == '\0' || s[1] == ',')) { |
1764 | s++; /* skip over closing quote */ |
1765 | break; |
1766 | } else { |
1767 | *fr++ = *s++; |
1768 | } |
1769 | } |
1770 | *fr++ = 0; |
1771 | } else { /* unquoted field */ |
1772 | while (*s != ',' && *s != '\0') |
1773 | *fr++ = *s++; |
1774 | *fr++ = 0; |
1775 | } |
1776 | snprintf(num, sizeof(num), "%d", n); |
1777 | if (is_number(newt, &result)is_valid_number(newt, 0, ((void *)0), &result)) |
1778 | setsymtab(num, newt, result, STR02|NUM01, (Array *) ap->sval); |
1779 | else |
1780 | setsymtab(num, newt, 0.0, STR02, (Array *) ap->sval); |
1781 | if (*s++ == '\0') |
1782 | break; |
1783 | } |
1784 | free(newt); |
1785 | |
1786 | } else if (!CSV && sep == ' ') { /* usual case: split on white space */ |
1787 | for (n = 0; ; ) { |
1788 | #define ISWS(c)((c) == ' ' || (c) == '\t' || (c) == '\n') ((c) == ' ' || (c) == '\t' || (c) == '\n') |
1789 | while (ISWS(*s)((*s) == ' ' || (*s) == '\t' || (*s) == '\n')) |
1790 | s++; |
1791 | if (*s == '\0') |
1792 | break; |
1793 | n++; |
1794 | t = s; |
1795 | do |
1796 | s++; |
1797 | while (*s != '\0' && !ISWS(*s)((*s) == ' ' || (*s) == '\t' || (*s) == '\n')); |
1798 | temp = *s; |
1799 | setptr(s, '\0')(*(char *)(intptr_t)(s)) = ('\0'); |
1800 | snprintf(num, sizeof(num), "%d", n); |
1801 | if (is_number(t, & result)is_valid_number(t, 0, ((void *)0), & result)) |
1802 | setsymtab(num, t, result, STR02|NUM01, (Array *) ap->sval); |
1803 | else |
1804 | setsymtab(num, t, 0.0, STR02, (Array *) ap->sval); |
1805 | setptr(s, temp)(*(char *)(intptr_t)(s)) = (temp); |
1806 | if (*s != '\0') |
1807 | s++; |
1808 | } |
1809 | |
1810 | } else if (sep == 0) { /* new: split(s, a, "") => 1 char/elem */ |
1811 | for (n = 0; *s != '\0'; s += u8_nextlen(s)) { |
1812 | char buf[10]; |
1813 | n++; |
1814 | snprintf(num, sizeof(num), "%d", n); |
1815 | |
1816 | for (j = 0; j < u8_nextlen(s); j++) { |
1817 | buf[j] = s[j]; |
1818 | } |
1819 | buf[j] = '\0'; |
1820 | |
1821 | if (isdigit((uschar)buf[0])) |
1822 | setsymtab(num, buf, atof(buf), STR02|NUM01, (Array *) ap->sval); |
1823 | else |
1824 | setsymtab(num, buf, 0.0, STR02, (Array *) ap->sval); |
1825 | } |
1826 | |
1827 | } else if (*s != '\0') { /* some random single character */ |
1828 | for (;;) { |
1829 | n++; |
1830 | t = s; |
1831 | while (*s != sep && *s != '\n' && *s != '\0') |
1832 | s++; |
1833 | temp = *s; |
1834 | setptr(s, '\0')(*(char *)(intptr_t)(s)) = ('\0'); |
1835 | snprintf(num, sizeof(num), "%d", n); |
1836 | if (is_number(t, & result)is_valid_number(t, 0, ((void *)0), & result)) |
1837 | setsymtab(num, t, result, STR02|NUM01, (Array *) ap->sval); |
1838 | else |
1839 | setsymtab(num, t, 0.0, STR02, (Array *) ap->sval); |
1840 | setptr(s, temp)(*(char *)(intptr_t)(s)) = (temp); |
1841 | if (*s++ == '\0') |
1842 | break; |
1843 | } |
1844 | } |
1845 | tempfree(ap)do { if (((ap)->csub == 4)) tfree(ap); } while ( 0); |
1846 | xfree(origs){ free((void *)(intptr_t)(origs)); (origs) = ((void *)0); }; |
1847 | xfree(origfs){ free((void *)(intptr_t)(origfs)); (origfs) = ((void *)0); }; |
1848 | x = gettemp(); |
1849 | x->tval = NUM01; |
1850 | x->fval = n; |
1851 | return(x); |
1852 | } |
1853 | |
1854 | Cell *condexpr(Node **a, int n) /* a[0] ? a[1] : a[2] */ |
1855 | { |
1856 | Cell *x; |
1857 | |
1858 | x = execute(a[0]); |
1859 | if (istrue(x)((x)->csub == 11)) { |
1860 | tempfree(x)do { if (((x)->csub == 4)) tfree(x); } while ( 0); |
1861 | x = execute(a[1]); |
1862 | } else { |
1863 | tempfree(x)do { if (((x)->csub == 4)) tfree(x); } while ( 0); |
1864 | x = execute(a[2]); |
1865 | } |
1866 | return(x); |
1867 | } |
1868 | |
1869 | Cell *ifstat(Node **a, int n) /* if (a[0]) a[1]; else a[2] */ |
1870 | { |
1871 | Cell *x; |
1872 | |
1873 | x = execute(a[0]); |
1874 | if (istrue(x)((x)->csub == 11)) { |
1875 | tempfree(x)do { if (((x)->csub == 4)) tfree(x); } while ( 0); |
1876 | x = execute(a[1]); |
1877 | } else if (a[2] != NULL((void *)0)) { |
1878 | tempfree(x)do { if (((x)->csub == 4)) tfree(x); } while ( 0); |
1879 | x = execute(a[2]); |
1880 | } |
1881 | return(x); |
1882 | } |
1883 | |
1884 | Cell *whilestat(Node **a, int n) /* while (a[0]) a[1] */ |
1885 | { |
1886 | Cell *x; |
1887 | |
1888 | for (;;) { |
1889 | x = execute(a[0]); |
1890 | if (!istrue(x)((x)->csub == 11)) |
1891 | return(x); |
1892 | tempfree(x)do { if (((x)->csub == 4)) tfree(x); } while ( 0); |
1893 | x = execute(a[1]); |
1894 | if (isbreak(x)((x)->csub == 23)) { |
1895 | x = True; |
1896 | return(x); |
1897 | } |
1898 | if (isnext(x)((x)->csub == 22 || (x)->csub == 26) || isexit(x)((x)->csub == 21) || isret(x)((x)->csub == 25)) |
1899 | return(x); |
1900 | tempfree(x)do { if (((x)->csub == 4)) tfree(x); } while ( 0); |
1901 | } |
1902 | } |
1903 | |
1904 | Cell *dostat(Node **a, int n) /* do a[0]; while(a[1]) */ |
1905 | { |
1906 | Cell *x; |
1907 | |
1908 | for (;;) { |
1909 | x = execute(a[0]); |
1910 | if (isbreak(x)((x)->csub == 23)) |
1911 | return True; |
1912 | if (isnext(x)((x)->csub == 22 || (x)->csub == 26) || isexit(x)((x)->csub == 21) || isret(x)((x)->csub == 25)) |
1913 | return(x); |
1914 | tempfree(x)do { if (((x)->csub == 4)) tfree(x); } while ( 0); |
1915 | x = execute(a[1]); |
1916 | if (!istrue(x)((x)->csub == 11)) |
1917 | return(x); |
1918 | tempfree(x)do { if (((x)->csub == 4)) tfree(x); } while ( 0); |
1919 | } |
1920 | } |
1921 | |
1922 | Cell *forstat(Node **a, int n) /* for (a[0]; a[1]; a[2]) a[3] */ |
1923 | { |
1924 | Cell *x; |
1925 | |
1926 | x = execute(a[0]); |
1927 | tempfree(x)do { if (((x)->csub == 4)) tfree(x); } while ( 0); |
1928 | for (;;) { |
1929 | if (a[1]!=NULL((void *)0)) { |
1930 | x = execute(a[1]); |
1931 | if (!istrue(x)((x)->csub == 11)) return(x); |
1932 | else tempfree(x)do { if (((x)->csub == 4)) tfree(x); } while ( 0); |
1933 | } |
1934 | x = execute(a[3]); |
1935 | if (isbreak(x)((x)->csub == 23)) /* turn off break */ |
1936 | return True; |
1937 | if (isnext(x)((x)->csub == 22 || (x)->csub == 26) || isexit(x)((x)->csub == 21) || isret(x)((x)->csub == 25)) |
1938 | return(x); |
1939 | tempfree(x)do { if (((x)->csub == 4)) tfree(x); } while ( 0); |
1940 | x = execute(a[2]); |
1941 | tempfree(x)do { if (((x)->csub == 4)) tfree(x); } while ( 0); |
1942 | } |
1943 | } |
1944 | |
1945 | Cell *instat(Node **a, int n) /* for (a[0] in a[1]) a[2] */ |
1946 | { |
1947 | Cell *x, *vp, *arrayp, *cp, *ncp; |
1948 | Array *tp; |
1949 | int i; |
1950 | |
1951 | vp = execute(a[0]); |
1952 | arrayp = execute(a[1]); |
1953 | if (!isarr(arrayp)((arrayp)->tval & 020)) { |
1954 | return True; |
1955 | } |
1956 | tp = (Array *) arrayp->sval; |
1957 | tempfree(arrayp)do { if (((arrayp)->csub == 4)) tfree(arrayp); } while ( 0 ); |
1958 | for (i = 0; i < tp->size; i++) { /* this routine knows too much */ |
1959 | for (cp = tp->tab[i]; cp != NULL((void *)0); cp = ncp) { |
1960 | setsval(vp, cp->nval); |
1961 | ncp = cp->cnext; |
1962 | x = execute(a[2]); |
1963 | if (isbreak(x)((x)->csub == 23)) { |
1964 | tempfree(vp)do { if (((vp)->csub == 4)) tfree(vp); } while ( 0); |
1965 | return True; |
1966 | } |
1967 | if (isnext(x)((x)->csub == 22 || (x)->csub == 26) || isexit(x)((x)->csub == 21) || isret(x)((x)->csub == 25)) { |
1968 | tempfree(vp)do { if (((vp)->csub == 4)) tfree(vp); } while ( 0); |
1969 | return(x); |
1970 | } |
1971 | tempfree(x)do { if (((x)->csub == 4)) tfree(x); } while ( 0); |
1972 | } |
1973 | } |
1974 | return True; |
1975 | } |
1976 | |
1977 | static char *nawk_convert(const char *s, int (*fun_c)(int), |
1978 | wint_t (*fun_wc)(wint_t)) |
1979 | { |
1980 | char *buf = NULL((void *)0); |
1981 | char *pbuf = NULL((void *)0); |
1982 | const char *ps = NULL((void *)0); |
1983 | size_t n = 0; |
1984 | wchar_t wc; |
1985 | const size_t sz = awk_mb_cur_max; |
1986 | int unused; |
1987 | |
1988 | if (sz == 1) { |
1989 | buf = tostring(s); |
1990 | |
1991 | for (pbuf = buf; *pbuf; pbuf++) |
1992 | *pbuf = fun_c((uschar)*pbuf); |
1993 | |
1994 | return buf; |
1995 | } else { |
1996 | /* upper/lower character may be shorter/longer */ |
1997 | buf = tostringN(s, strlen(s) * sz + 1); |
1998 | |
1999 | (void) mbtowc(NULL((void *)0), NULL((void *)0), 0); /* reset internal state */ |
2000 | /* |
2001 | * Reset internal state here too. |
2002 | * Assign result to avoid a compiler warning. (Casting to void |
2003 | * doesn't work.) |
2004 | * Increment said variable to avoid a different warning. |
2005 | */ |
2006 | unused = wctomb(NULL((void *)0), L'\0'); |
2007 | unused++; |
2008 | |
2009 | ps = s; |
2010 | pbuf = buf; |
2011 | while (n = mbtowc(&wc, ps, sz), |
2012 | n > 0 && n != (size_t)-1 && n != (size_t)-2) |
2013 | { |
2014 | ps += n; |
2015 | |
2016 | n = wctomb(pbuf, fun_wc(wc)); |
2017 | if (n == (size_t)-1) |
2018 | FATAL("illegal wide character %s", s); |
2019 | |
2020 | pbuf += n; |
2021 | } |
2022 | |
2023 | *pbuf = '\0'; |
2024 | |
2025 | if (n) |
2026 | FATAL("illegal byte sequence %s", s); |
2027 | |
2028 | return buf; |
2029 | } |
2030 | } |
2031 | |
2032 | #ifdef __DJGPP__ |
2033 | static wint_t towupper(wint_t wc) |
2034 | { |
2035 | if (wc >= 0 && wc < 256) |
2036 | return toupper(wc & 0xFF); |
2037 | |
2038 | return wc; |
2039 | } |
2040 | |
2041 | static wint_t towlower(wint_t wc) |
2042 | { |
2043 | if (wc >= 0 && wc < 256) |
2044 | return tolower(wc & 0xFF); |
2045 | |
2046 | return wc; |
2047 | } |
2048 | #endif |
2049 | |
2050 | static char *nawk_toupper(const char *s) |
2051 | { |
2052 | return nawk_convert(s, toupper, towupper); |
2053 | } |
2054 | |
2055 | static char *nawk_tolower(const char *s) |
2056 | { |
2057 | return nawk_convert(s, tolower, towlower); |
2058 | } |
2059 | |
2060 | Cell *bltin(Node **a, int n) /* builtin functions. a[0] is type, a[1] is arg list */ |
2061 | { |
2062 | Cell *x, *y; |
2063 | Awkfloat u; |
2064 | int t, sz; |
2065 | Awkfloat tmp; |
2066 | char *buf, *fmt; |
2067 | Node *nextarg; |
2068 | FILE *fp; |
2069 | int status = 0; |
2070 | time_t tv; |
2071 | struct tm *tm, tmbuf; |
2072 | int estatus = 0; |
2073 | |
2074 | t = ptoi(a[0]); |
2075 | x = execute(a[1]); |
2076 | nextarg = a[1]->nnext; |
2077 | switch (t) { |
2078 | case FLENGTH1: |
2079 | if (isarr(x)((x)->tval & 020)) |
2080 | u = ((Array *) x->sval)->nelem; /* GROT. should be function*/ |
2081 | else |
2082 | u = u8_strlen(getsval(x)); |
2083 | break; |
2084 | case FLOG4: |
2085 | errno(*__errno()) = 0; |
2086 | u = errcheck(log(getfval(x)), "log"); |
2087 | break; |
2088 | case FINT5: |
2089 | modf(getfval(x), &u); break; |
2090 | case FEXP3: |
2091 | errno(*__errno()) = 0; |
2092 | u = errcheck(exp(getfval(x)), "exp"); |
2093 | break; |
2094 | case FSQRT2: |
2095 | errno(*__errno()) = 0; |
2096 | u = errcheck(sqrt(getfval(x)), "sqrt"); |
2097 | break; |
2098 | case FSIN9: |
2099 | u = sin(getfval(x)); break; |
2100 | case FCOS10: |
2101 | u = cos(getfval(x)); break; |
2102 | case FATAN11: |
2103 | if (nextarg == NULL((void *)0)) { |
2104 | WARNING("atan2 requires two arguments; returning 1.0"); |
2105 | u = 1.0; |
2106 | } else { |
2107 | y = execute(a[1]->nnext); |
2108 | u = atan2(getfval(x), getfval(y)); |
2109 | tempfree(y)do { if (((y)->csub == 4)) tfree(y); } while ( 0); |
2110 | nextarg = nextarg->nnext; |
2111 | } |
2112 | break; |
2113 | case FCOMPL18: |
2114 | u = ~((int)getfval(x)); |
2115 | break; |
2116 | case FAND15: |
2117 | if (nextarg == 0) { |
2118 | WARNING("and requires two arguments; returning 0"); |
2119 | u = 0; |
2120 | break; |
2121 | } |
2122 | y = execute(a[1]->nnext); |
2123 | u = ((int)getfval(x)) & ((int)getfval(y)); |
2124 | tempfree(y)do { if (((y)->csub == 4)) tfree(y); } while ( 0); |
2125 | nextarg = nextarg->nnext; |
2126 | break; |
2127 | case FFOR16: |
2128 | if (nextarg == 0) { |
2129 | WARNING("or requires two arguments; returning 0"); |
2130 | u = 0; |
2131 | break; |
2132 | } |
2133 | y = execute(a[1]->nnext); |
2134 | u = ((int)getfval(x)) | ((int)getfval(y)); |
2135 | tempfree(y)do { if (((y)->csub == 4)) tfree(y); } while ( 0); |
2136 | nextarg = nextarg->nnext; |
2137 | break; |
2138 | case FXOR17: |
2139 | if (nextarg == 0) { |
2140 | WARNING("xor requires two arguments; returning 0"); |
2141 | u = 0; |
2142 | break; |
2143 | } |
2144 | y = execute(a[1]->nnext); |
2145 | u = ((int)getfval(x)) ^ ((int)getfval(y)); |
2146 | tempfree(y)do { if (((y)->csub == 4)) tfree(y); } while ( 0); |
2147 | nextarg = nextarg->nnext; |
2148 | break; |
2149 | case FLSHIFT19: |
2150 | if (nextarg == 0) { |
2151 | WARNING("lshift requires two arguments; returning 0"); |
2152 | u = 0; |
2153 | break; |
2154 | } |
2155 | y = execute(a[1]->nnext); |
2156 | u = ((int)getfval(x)) << ((int)getfval(y)); |
2157 | tempfree(y)do { if (((y)->csub == 4)) tfree(y); } while ( 0); |
2158 | nextarg = nextarg->nnext; |
2159 | break; |
2160 | case FRSHIFT20: |
2161 | if (nextarg == 0) { |
2162 | WARNING("rshift requires two arguments; returning 0"); |
2163 | u = 0; |
2164 | break; |
2165 | } |
2166 | y = execute(a[1]->nnext); |
2167 | u = ((int)getfval(x)) >> ((int)getfval(y)); |
2168 | tempfree(y)do { if (((y)->csub == 4)) tfree(y); } while ( 0); |
2169 | nextarg = nextarg->nnext; |
2170 | break; |
2171 | case FSYSTEM6: |
2172 | fflush(stdout(&__sF[1])); /* in case something is buffered already */ |
2173 | estatus = status = system(getsval(x)); |
2174 | if (status != -1) { |
2175 | if (WIFEXITED(status)(((status) & 0177) == 0)) { |
2176 | estatus = WEXITSTATUS(status)(int)(((unsigned)(status) >> 8) & 0xff); |
2177 | } else if (WIFSIGNALED(status)(((status) & 0177) != 0177 && ((status) & 0177 ) != 0)) { |
2178 | estatus = WTERMSIG(status)(((status) & 0177)) + 256; |
2179 | #ifdef WCOREDUMP |
2180 | if (WCOREDUMP(status)((status) & 0200)) |
2181 | estatus += 256; |
2182 | #endif |
2183 | } else /* something else?!? */ |
2184 | estatus = 0; |
2185 | } |
2186 | /* else estatus was set to -1 */ |
2187 | u = estatus; |
2188 | break; |
2189 | case FRAND7: |
2190 | /* random() returns numbers in [0..2^31-1] |
2191 | * in order to get a number in [0, 1), divide it by 2^31 |
2192 | */ |
2193 | u = (Awkfloat) random() / (0x7fffffffL + 0x1UL); |
2194 | break; |
2195 | case FSRAND8: |
2196 | if (isrec(x)((x)->tval & 0200)) { /* no argument provided */ |
2197 | u = time(NULL((void *)0)); |
2198 | tmp = u; |
2199 | srandom((unsigned int) u); |
2200 | } else { |
2201 | u = getfval(x); |
2202 | tmp = u; |
2203 | srandom_deterministic((unsigned int) u); |
2204 | } |
2205 | u = srand_seed; |
2206 | srand_seed = tmp; |
2207 | break; |
2208 | case FTOUPPER12: |
2209 | case FTOLOWER13: |
2210 | if (t == FTOUPPER12) |
2211 | buf = nawk_toupper(getsval(x)); |
2212 | else |
2213 | buf = nawk_tolower(getsval(x)); |
2214 | tempfree(x)do { if (((x)->csub == 4)) tfree(x); } while ( 0); |
2215 | x = gettemp(); |
2216 | setsval(x, buf); |
2217 | free(buf); |
2218 | return x; |
2219 | case FFLUSH14: |
2220 | if (isrec(x)((x)->tval & 0200) || strlen(getsval(x)) == 0) { |
2221 | flush_all(); /* fflush() or fflush("") -> all */ |
2222 | u = 0; |
2223 | } else if ((fp = openfile(FFLUSH14, getsval(x), NULL((void *)0))) == NULL((void *)0)) |
2224 | u = EOF(-1); |
2225 | else |
2226 | u = fflush(fp); |
2227 | break; |
2228 | case FMKTIME23: |
2229 | memset(&tmbuf, 0, sizeof(tmbuf)); |
2230 | tm = &tmbuf; |
2231 | t = sscanf(getsval(x), "%d %d %d %d %d %d %d", |
2232 | &tm->tm_year, &tm->tm_mon, &tm->tm_mday, &tm->tm_hour, |
2233 | &tm->tm_min, &tm->tm_sec, &tm->tm_isdst); |
2234 | switch (t) { |
2235 | case 6: |
2236 | tm->tm_isdst = -1; /* let mktime figure it out */ |
2237 | /* FALLTHROUGH */ |
2238 | case 7: |
2239 | tm->tm_year -= 1900; |
2240 | tm->tm_mon--; |
2241 | u = mktime(tm); |
2242 | break; |
2243 | default: |
2244 | u = -1; |
2245 | break; |
2246 | } |
2247 | break; |
2248 | case FSYSTIME21: |
2249 | u = time((time_t *) 0); |
2250 | break; |
2251 | case FSTRFTIME22: |
2252 | /* strftime([format [,timestamp]]) */ |
2253 | if (nextarg) { |
2254 | y = execute(nextarg); |
2255 | nextarg = nextarg->nnext; |
2256 | tv = (time_t) getfval(y); |
2257 | tempfree(y)do { if (((y)->csub == 4)) tfree(y); } while ( 0); |
2258 | } else |
2259 | tv = time((time_t *) 0); |
2260 | tm = localtime(&tv); |
2261 | if (tm == NULL((void *)0)) |
2262 | FATAL("bad time %ld", (long)tv); |
2263 | |
2264 | if (isrec(x)((x)->tval & 0200)) { |
2265 | /* format argument not provided, use default */ |
2266 | fmt = tostring("%a %b %d %H:%M:%S %Z %Y"); |
2267 | } else |
2268 | fmt = tostring(getsval(x)); |
2269 | |
2270 | sz = 32; |
2271 | buf = NULL((void *)0); |
2272 | do { |
2273 | if ((buf = (char *) reallocarray(buf, 2, sz)) == NULL((void *)0)) |
2274 | FATAL("out of memory in strftime"); |
2275 | sz *= 2; |
2276 | } while (strftime(buf, sz, fmt, tm) == 0 && fmt[0] != '\0'); |
2277 | |
2278 | y = gettemp(); |
2279 | setsval(y, buf); |
2280 | free(fmt); |
2281 | free(buf); |
2282 | |
2283 | return y; |
2284 | default: /* can't happen */ |
2285 | FATAL("illegal function type %d", t); |
2286 | break; |
2287 | } |
2288 | tempfree(x)do { if (((x)->csub == 4)) tfree(x); } while ( 0); |
2289 | x = gettemp(); |
2290 | setfval(x, u); |
2291 | if (nextarg != NULL((void *)0)) { |
2292 | WARNING("warning: function has too many arguments"); |
2293 | for ( ; nextarg; nextarg = nextarg->nnext) { |
2294 | y = execute(nextarg); |
2295 | tempfree(y)do { if (((y)->csub == 4)) tfree(y); } while ( 0); |
2296 | } |
2297 | } |
2298 | return(x); |
2299 | } |
2300 | |
2301 | Cell *printstat(Node **a, int n) /* print a[0] */ |
2302 | { |
2303 | Node *x; |
2304 | Cell *y; |
2305 | FILE *fp; |
2306 | |
2307 | if (a[1] == NULL((void *)0)) /* a[1] is redirection operator, a[2] is file */ |
2308 | fp = stdout(&__sF[1]); |
2309 | else |
2310 | fp = redirect(ptoi(a[1]), a[2]); |
2311 | for (x = a[0]; x != NULL((void *)0); x = x->nnext) { |
2312 | y = execute(x); |
2313 | fputs(getpssval(y), fp); |
2314 | tempfree(y)do { if (((y)->csub == 4)) tfree(y); } while ( 0); |
2315 | if (x->nnext == NULL((void *)0)) |
2316 | fputs(getsval(orsloc), fp); |
2317 | else |
2318 | fputs(getsval(ofsloc), fp); |
2319 | } |
2320 | if (a[1] != NULL((void *)0)) |
2321 | fflush(fp); |
2322 | if (ferror(fp)(!__isthreaded ? (((fp)->_flags & 0x0040) != 0) : (ferror )(fp))) |
2323 | FATAL("write error on %s", filename(fp)); |
2324 | return(True); |
2325 | } |
2326 | |
2327 | Cell *nullproc(Node **a, int n) |
2328 | { |
2329 | return 0; |
2330 | } |
2331 | |
2332 | |
2333 | FILE *redirect(int a, Node *b) /* set up all i/o redirections */ |
2334 | { |
2335 | FILE *fp; |
2336 | Cell *x; |
2337 | char *fname; |
2338 | |
2339 | x = execute(b); |
2340 | fname = getsval(x); |
2341 | fp = openfile(a, fname, NULL((void *)0)); |
2342 | if (fp == NULL((void *)0)) |
2343 | FATAL("can't open file %s", fname); |
2344 | tempfree(x)do { if (((x)->csub == 4)) tfree(x); } while ( 0); |
2345 | return fp; |
2346 | } |
2347 | |
2348 | struct files { |
2349 | FILE *fp; |
2350 | const char *fname; |
2351 | int mode; /* '|', 'a', 'w' => LE/LT, GT */ |
2352 | } *files; |
2353 | |
2354 | size_t nfiles; |
2355 | |
2356 | static void stdinit(void) /* in case stdin, etc., are not constants */ |
2357 | { |
2358 | nfiles = FOPEN_MAX20; |
2359 | files = (struct files *) calloc(nfiles, sizeof(*files)); |
2360 | if (files == NULL((void *)0)) |
2361 | FATAL("can't allocate file memory for %zu files", nfiles); |
2362 | files[0].fp = stdin(&__sF[0]); |
2363 | files[0].fname = tostring("/dev/stdin"); |
2364 | files[0].mode = LT287; |
2365 | files[1].fp = stdout(&__sF[1]); |
2366 | files[1].fname = tostring("/dev/stdout"); |
2367 | files[1].mode = GT285; |
2368 | files[2].fp = stderr(&__sF[2]); |
2369 | files[2].fname = tostring("/dev/stderr"); |
2370 | files[2].mode = GT285; |
2371 | } |
2372 | |
2373 | FILE *openfile(int a, const char *us, bool_Bool *pnewflag) |
2374 | { |
2375 | const char *s = us; |
2376 | size_t i; |
2377 | int m; |
2378 | FILE *fp = NULL((void *)0); |
2379 | |
2380 | if (*s == '\0') |
2381 | FATAL("null file name in print or getline"); |
2382 | for (i = 0; i < nfiles; i++) |
2383 | if (files[i].fname && strcmp(s, files[i].fname) == 0 && |
2384 | (a == files[i].mode || (a==APPEND282 && files[i].mode==GT285) || |
2385 | a == FFLUSH14)) { |
2386 | if (pnewflag) |
2387 | *pnewflag = false0; |
2388 | return files[i].fp; |
2389 | } |
2390 | if (a == FFLUSH14) /* didn't find it, so don't create it! */ |
2391 | return NULL((void *)0); |
2392 | |
2393 | for (i = 0; i < nfiles; i++) |
2394 | if (files[i].fp == NULL((void *)0)) |
2395 | break; |
2396 | if (i >= nfiles) { |
2397 | struct files *nf; |
2398 | size_t nnf = nfiles + FOPEN_MAX20; |
2399 | nf = (struct files *) reallocarray(files, nnf, sizeof(*nf)); |
2400 | if (nf == NULL((void *)0)) |
2401 | FATAL("cannot grow files for %s and %zu files", s, nnf); |
2402 | memset(&nf[nfiles], 0, FOPEN_MAX20 * sizeof(*nf)); |
2403 | nfiles = nnf; |
2404 | files = nf; |
2405 | } |
2406 | fflush(stdout(&__sF[1])); /* force a semblance of order */ |
2407 | m = a; |
2408 | if (a == GT285) { |
2409 | fp = fopen(s, "w"); |
2410 | } else if (a == APPEND282) { |
2411 | fp = fopen(s, "a"); |
2412 | m = GT285; /* so can mix > and >> */ |
2413 | } else if (a == '|') { /* output pipe */ |
2414 | fp = popen(s, "w"); |
2415 | } else if (a == LE286) { /* input pipe */ |
2416 | fp = popen(s, "r"); |
2417 | } else if (a == LT287) { /* getline <file */ |
2418 | fp = strcmp(s, "-") == 0 ? stdin(&__sF[0]) : fopen(s, "r"); /* "-" is stdin */ |
2419 | } else /* can't happen */ |
2420 | FATAL("illegal redirection %d", a); |
2421 | if (fp != NULL((void *)0)) { |
2422 | files[i].fname = tostring(s); |
2423 | files[i].fp = fp; |
2424 | files[i].mode = m; |
2425 | if (pnewflag) |
2426 | *pnewflag = true1; |
2427 | if (fp != stdin(&__sF[0]) && fp != stdout(&__sF[1]) && fp != stderr(&__sF[2])) |
2428 | (void) fcntl(fileno(fp)(!__isthreaded ? ((fp)->_file) : (fileno)(fp)), F_SETFD2, FD_CLOEXEC1); |
2429 | } |
2430 | return fp; |
2431 | } |
2432 | |
2433 | const char *filename(FILE *fp) |
2434 | { |
2435 | size_t i; |
2436 | |
2437 | for (i = 0; i < nfiles; i++) |
2438 | if (fp == files[i].fp) |
2439 | return files[i].fname; |
2440 | return "???"; |
2441 | } |
2442 | |
2443 | Cell *closefile(Node **a, int n) |
2444 | { |
2445 | Cell *x; |
2446 | size_t i; |
2447 | bool_Bool stat; |
2448 | |
2449 | x = execute(a[0]); |
2450 | getsval(x); |
2451 | stat = true1; |
2452 | for (i = 0; i < nfiles; i++) { |
2453 | if (!files[i].fname || strcmp(x->sval, files[i].fname) != 0) |
2454 | continue; |
2455 | if (files[i].mode == GT285 || files[i].mode == '|') |
2456 | fflush(files[i].fp); |
2457 | if (ferror(files[i].fp)(!__isthreaded ? (((files[i].fp)->_flags & 0x0040) != 0 ) : (ferror)(files[i].fp))) { |
2458 | if ((files[i].mode == GT285 && files[i].fp != stderr(&__sF[2])) |
2459 | || files[i].mode == '|') |
2460 | FATAL("write error on %s", files[i].fname); |
2461 | else |
2462 | WARNING("i/o error occurred on %s", files[i].fname); |
2463 | } |
2464 | if (files[i].fp == stdin(&__sF[0]) || files[i].fp == stdout(&__sF[1]) || |
2465 | files[i].fp == stderr(&__sF[2])) |
2466 | stat = freopen("/dev/null", "r+", files[i].fp) == NULL((void *)0); |
2467 | else if (files[i].mode == '|' || files[i].mode == LE286) |
2468 | stat = pclose(files[i].fp) == -1; |
2469 | else |
2470 | stat = fclose(files[i].fp) == EOF(-1); |
2471 | if (stat) |
2472 | WARNING("i/o error occurred closing %s", files[i].fname); |
2473 | xfree(files[i].fname){ free((void *)(intptr_t)(files[i].fname)); (files[i].fname) = ((void *)0); }; |
2474 | files[i].fname = NULL((void *)0); /* watch out for ref thru this */ |
2475 | files[i].fp = NULL((void *)0); |
2476 | break; |
2477 | } |
2478 | tempfree(x)do { if (((x)->csub == 4)) tfree(x); } while ( 0); |
2479 | x = gettemp(); |
2480 | setfval(x, (Awkfloat) (stat ? -1 : 0)); |
2481 | return(x); |
2482 | } |
2483 | |
2484 | void closeall(void) |
2485 | { |
2486 | size_t i; |
2487 | bool_Bool stat = false0; |
2488 | |
2489 | for (i = 0; i < nfiles; i++) { |
2490 | if (! files[i].fp) |
2491 | continue; |
2492 | if (files[i].mode == GT285 || files[i].mode == '|') |
2493 | fflush(files[i].fp); |
2494 | if (ferror(files[i].fp)(!__isthreaded ? (((files[i].fp)->_flags & 0x0040) != 0 ) : (ferror)(files[i].fp))) { |
2495 | if ((files[i].mode == GT285 && files[i].fp != stderr(&__sF[2])) |
2496 | || files[i].mode == '|') |
2497 | FATAL("write error on %s", files[i].fname); |
2498 | else |
2499 | WARNING("i/o error occurred on %s", files[i].fname); |
2500 | } |
2501 | if (files[i].fp == stdin(&__sF[0]) || files[i].fp == stdout(&__sF[1]) || |
2502 | files[i].fp == stderr(&__sF[2])) |
2503 | continue; |
2504 | if (files[i].mode == '|' || files[i].mode == LE286) |
2505 | stat = pclose(files[i].fp) == -1; |
2506 | else |
2507 | stat = fclose(files[i].fp) == EOF(-1); |
2508 | if (stat) |
2509 | WARNING("i/o error occurred while closing %s", files[i].fname); |
2510 | } |
2511 | } |
2512 | |
2513 | static void flush_all(void) |
2514 | { |
2515 | size_t i; |
2516 | |
2517 | for (i = 0; i < nfiles; i++) |
2518 | if (files[i].fp) |
2519 | fflush(files[i].fp); |
2520 | } |
2521 | |
2522 | void backsub(char **pb_ptr, const char **sptr_ptr); |
2523 | |
2524 | Cell *dosub(Node **a, int subop) /* sub and gsub */ |
2525 | { |
2526 | fa *pfa; |
2527 | int tempstat; |
2528 | char *repl; |
2529 | Cell *x; |
2530 | |
2531 | char *buf = NULL((void *)0); |
2532 | char *pb = NULL((void *)0); |
2533 | int bufsz = recsize; |
2534 | |
2535 | const char *r, *s; |
2536 | const char *start; |
2537 | const char *noempty = NULL((void *)0); /* empty match disallowed here */ |
2538 | size_t m = 0; /* match count */ |
2539 | size_t whichm; /* which match to select, 0 = global */ |
2540 | int mtype; /* match type */ |
2541 | |
2542 | if (a[0] == NULL((void *)0)) { /* 0 => a[1] is already-compiled regexpr */ |
2543 | pfa = (fa *) a[1]; |
2544 | } else { |
2545 | x = execute(a[1]); |
2546 | pfa = makedfa(getsval(x), 1); |
2547 | tempfree(x)do { if (((x)->csub == 4)) tfree(x); } while ( 0); |
2548 | } |
2549 | |
2550 | x = execute(a[2]); /* replacement string */ |
2551 | repl = tostring(getsval(x)); |
2552 | tempfree(x)do { if (((x)->csub == 4)) tfree(x); } while ( 0); |
2553 | |
2554 | switch (subop) { |
2555 | case SUB301: |
2556 | whichm = 1; |
2557 | x = execute(a[3]); /* source string */ |
2558 | break; |
2559 | case GSUB302: |
2560 | whichm = 0; |
2561 | x = execute(a[3]); /* source string */ |
2562 | break; |
2563 | default: |
2564 | FATAL("dosub: unrecognized subop: %d", subop); |
2565 | } |
2566 | |
2567 | start = getsval(x); |
2568 | while (pmatch(pfa, start)) { |
2569 | if (buf == NULL((void *)0)) { |
2570 | if ((pb = buf = malloc(bufsz)) == NULL((void *)0)) |
2571 | FATAL("out of memory in dosub"); |
2572 | tempstat = pfa->initstat; |
2573 | pfa->initstat = 2; |
2574 | } |
2575 | |
2576 | /* match types */ |
2577 | #define MT_IGNORE 0 /* unselected or invalid */ |
2578 | #define MT_INSERT 1 /* selected, empty */ |
2579 | #define MT_REPLACE 2 /* selected, not empty */ |
2580 | |
2581 | /* an empty match just after replacement is invalid */ |
2582 | |
2583 | if (patbeg == noempty && patlen == 0) { |
2584 | mtype = MT_IGNORE; /* invalid, not counted */ |
2585 | } else if (whichm == ++m || whichm == 0) { |
2586 | mtype = patlen ? MT_REPLACE : MT_INSERT; |
2587 | } else { |
2588 | mtype = MT_IGNORE; /* unselected, but counted */ |
2589 | } |
2590 | |
2591 | /* leading text: */ |
2592 | if (patbeg > start) { |
2593 | adjbuf(&buf, &bufsz, (pb - buf) + (patbeg - start), |
2594 | recsize, &pb, "dosub"); |
2595 | s = start; |
2596 | while (s < patbeg) |
2597 | *pb++ = *s++; |
2598 | } |
2599 | |
2600 | if (mtype == MT_IGNORE) |
2601 | goto matching_text; /* skip replacement text */ |
2602 | |
2603 | r = repl; |
2604 | while (*r != 0) { |
2605 | adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "dosub"); |
2606 | if (*r == '\\') { |
2607 | backsub(&pb, &r); |
2608 | } else if (*r == '&') { |
2609 | r++; |
2610 | adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, |
2611 | &pb, "dosub"); |
2612 | for (s = patbeg; s < patbeg+patlen; ) |
2613 | *pb++ = *s++; |
2614 | } else { |
2615 | *pb++ = *r++; |
2616 | } |
2617 | } |
2618 | |
2619 | matching_text: |
2620 | if (mtype == MT_REPLACE || *patbeg == '\0') |
2621 | goto next_search; /* skip matching text */ |
2622 | |
2623 | if (patlen == 0) |
2624 | patlen = u8_nextlen(patbeg); |
2625 | adjbuf(&buf, &bufsz, (pb-buf) + patlen, recsize, &pb, "dosub"); |
2626 | s = patbeg; |
2627 | while (s < patbeg + patlen) |
2628 | *pb++ = *s++; |
2629 | |
2630 | next_search: |
2631 | start = patbeg + patlen; |
2632 | if (m == whichm || *patbeg == '\0') |
2633 | break; |
2634 | if (mtype == MT_REPLACE) |
2635 | noempty = start; |
2636 | |
2637 | #undef MT_IGNORE |
2638 | #undef MT_INSERT |
2639 | #undef MT_REPLACE |
2640 | } |
2641 | |
2642 | xfree(repl){ free((void *)(intptr_t)(repl)); (repl) = ((void *)0); }; |
2643 | |
2644 | if (buf != NULL((void *)0)) { |
2645 | pfa->initstat = tempstat; |
2646 | |
2647 | /* trailing text */ |
2648 | adjbuf(&buf, &bufsz, 1+strlen(start)+pb-buf, 0, &pb, "dosub"); |
2649 | while ((*pb++ = *start++) != '\0') |
2650 | ; |
2651 | |
2652 | setsval(x, buf); |
2653 | free(buf); |
2654 | } |
2655 | |
2656 | tempfree(x)do { if (((x)->csub == 4)) tfree(x); } while ( 0); |
2657 | x = gettemp(); |
2658 | x->tval = NUM01; |
2659 | x->fval = m; |
2660 | return x; |
2661 | } |
2662 | |
2663 | Cell *gensub(Node **a, int nnn) /* global selective substitute */ |
2664 | /* XXX incomplete - doesn't support backreferences \0 ... \9 */ |
2665 | { |
2666 | Cell *x, *y, *res, *h; |
2667 | char *rptr; |
2668 | const char *sptr; |
2669 | char *buf, *pb; |
2670 | const char *t, *q; |
2671 | fa *pfa; |
2672 | int mflag, tempstat, num, whichm; |
2673 | int bufsz = recsize; |
2674 | |
2675 | if ((buf = malloc(bufsz)) == NULL((void *)0)) |
2676 | FATAL("out of memory in gensub"); |
2677 | mflag = 0; /* if mflag == 0, can replace empty string */ |
2678 | num = 0; |
2679 | x = execute(a[4]); /* source string */ |
2680 | t = getsval(x); |
2681 | res = copycell(x); /* target string - initially copy of source */ |
2682 | res->csub = CTEMP4; /* result values are temporary */ |
2683 | if (a[0] == 0) /* 0 => a[1] is already-compiled regexpr */ |
2684 | pfa = (fa *) a[1]; /* regular expression */ |
2685 | else { |
2686 | y = execute(a[1]); |
2687 | pfa = makedfa(getsval(y), 1); |
2688 | tempfree(y)do { if (((y)->csub == 4)) tfree(y); } while ( 0); |
2689 | } |
2690 | y = execute(a[2]); /* replacement string */ |
2691 | h = execute(a[3]); /* which matches should be replaced */ |
2692 | sptr = getsval(h); |
2693 | if (sptr[0] == 'g' || sptr[0] == 'G') |
2694 | whichm = -1; |
2695 | else { |
2696 | /* |
2697 | * The specified number is index of replacement, starting |
2698 | * from 1. GNU awk treats index lower than 0 same as |
2699 | * 1, we do same for compatibility. |
2700 | */ |
2701 | whichm = (int) getfval(h) - 1; |
2702 | if (whichm < 0) |
2703 | whichm = 0; |
2704 | } |
2705 | tempfree(h)do { if (((h)->csub == 4)) tfree(h); } while ( 0); |
2706 | |
2707 | if (pmatch(pfa, t)) { |
2708 | char *sl; |
2709 | |
2710 | tempstat = pfa->initstat; |
2711 | pfa->initstat = 2; |
2712 | pb = buf; |
2713 | rptr = getsval(y); |
2714 | /* |
2715 | * XXX if there are any backreferences in subst string, |
2716 | * complain now. |
2717 | */ |
2718 | for (sl = rptr; (sl = strchr(sl, '\\')) && sl[1]; sl++) { |
2719 | if (strchr("0123456789", sl[1])) { |
2720 | FATAL("gensub doesn't support backreferences (subst \"%s\")", rptr); |
2721 | } |
2722 | } |
2723 | |
2724 | do { |
2725 | if (whichm >= 0 && whichm != num) { |
2726 | num++; |
2727 | adjbuf(&buf, &bufsz, (pb - buf) + (patbeg - t) + patlen, recsize, &pb, "gensub"); |
2728 | |
2729 | /* copy the part of string up to and including |
2730 | * match to output buffer */ |
2731 | while (t < patbeg + patlen) |
2732 | *pb++ = *t++; |
2733 | continue; |
2734 | } |
2735 | |
2736 | if (patlen == 0 && *patbeg != 0) { /* matched empty string */ |
2737 | if (mflag == 0) { /* can replace empty */ |
2738 | num++; |
2739 | sptr = rptr; |
2740 | while (*sptr != 0) { |
2741 | adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "gensub"); |
2742 | if (*sptr == '\\') { |
2743 | backsub(&pb, &sptr); |
2744 | } else if (*sptr == '&') { |
2745 | sptr++; |
2746 | adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "gensub"); |
2747 | for (q = patbeg; q < patbeg+patlen; ) |
2748 | *pb++ = *q++; |
2749 | } else |
2750 | *pb++ = *sptr++; |
2751 | } |
2752 | } |
2753 | if (*t == 0) /* at end */ |
2754 | goto done; |
2755 | adjbuf(&buf, &bufsz, 2+pb-buf, recsize, &pb, "gensub"); |
2756 | *pb++ = *t++; |
2757 | if (pb > buf + bufsz) /* BUG: not sure of this test */ |
2758 | FATAL("gensub result0 %.30s too big; can't happen", buf); |
2759 | mflag = 0; |
2760 | } |
2761 | else { /* matched nonempty string */ |
2762 | num++; |
2763 | sptr = t; |
2764 | adjbuf(&buf, &bufsz, 1+(patbeg-sptr)+pb-buf, recsize, &pb, "gensub"); |
2765 | while (sptr < patbeg) |
2766 | *pb++ = *sptr++; |
2767 | sptr = rptr; |
2768 | while (*sptr != 0) { |
2769 | adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "gensub"); |
2770 | if (*sptr == '\\') { |
2771 | backsub(&pb, &sptr); |
2772 | } else if (*sptr == '&') { |
2773 | sptr++; |
2774 | adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "gensub"); |
2775 | for (q = patbeg; q < patbeg+patlen; ) |
2776 | *pb++ = *q++; |
2777 | } else |
2778 | *pb++ = *sptr++; |
2779 | } |
2780 | t = patbeg + patlen; |
2781 | if (patlen == 0 || *t == 0 || *(t-1) == 0) |
2782 | goto done; |
2783 | if (pb > buf + bufsz) |
2784 | FATAL("gensub result1 %.30s too big; can't happen", buf); |
2785 | mflag = 1; |
2786 | } |
2787 | } while (pmatch(pfa,t)); |
2788 | sptr = t; |
2789 | adjbuf(&buf, &bufsz, 1+strlen(sptr)+pb-buf, 0, &pb, "gensub"); |
2790 | while ((*pb++ = *sptr++) != 0) |
2791 | ; |
2792 | done: if (pb > buf + bufsz) |
2793 | FATAL("gensub result2 %.30s too big; can't happen", buf); |
2794 | *pb = '\0'; |
2795 | setsval(res, buf); |
2796 | pfa->initstat = tempstat; |
2797 | } |
2798 | tempfree(x)do { if (((x)->csub == 4)) tfree(x); } while ( 0); |
2799 | tempfree(y)do { if (((y)->csub == 4)) tfree(y); } while ( 0); |
2800 | free(buf); |
2801 | return(res); |
2802 | } |
2803 | |
2804 | void backsub(char **pb_ptr, const char **sptr_ptr) /* handle \\& variations */ |
2805 | { /* sptr[0] == '\\' */ |
2806 | char *pb = *pb_ptr; |
2807 | const char *sptr = *sptr_ptr; |
2808 | |
2809 | if (sptr[1] == '\\') { |
2810 | if (sptr[2] == '\\' && sptr[3] == '&') { /* \\\& -> \& */ |
2811 | *pb++ = '\\'; |
2812 | *pb++ = '&'; |
2813 | sptr += 4; |
2814 | } else if (sptr[2] == '&') { /* \\& -> \ + matched */ |
2815 | *pb++ = '\\'; |
2816 | sptr += 2; |
2817 | } else if (do_posix) { /* \\x -> \x */ |
2818 | sptr++; |
2819 | *pb++ = *sptr++; |
2820 | } else { /* \\x -> \\x */ |
2821 | *pb++ = *sptr++; |
2822 | *pb++ = *sptr++; |
2823 | } |
2824 | } else if (sptr[1] == '&') { /* literal & */ |
2825 | sptr++; |
2826 | *pb++ = *sptr++; |
2827 | } else /* literal \ */ |
2828 | *pb++ = *sptr++; |
2829 | |
2830 | *pb_ptr = pb; |
2831 | *sptr_ptr = sptr; |
2832 | } |
2833 | |
2834 | static char *wide_char_to_byte_str(int rune, size_t *outlen) |
2835 | { |
2836 | static char buf[5]; |
2837 | int len; |
2838 | |
2839 | if (rune < 0 || rune > 0x10FFFF) |
2840 | return NULL((void *)0); |
2841 | |
2842 | memset(buf, 0, sizeof(buf)); |
2843 | |
2844 | len = 0; |
2845 | if (rune <= 0x0000007F) { |
2846 | buf[len++] = rune; |
2847 | } else if (rune <= 0x000007FF) { |
2848 | // 110xxxxx 10xxxxxx |
2849 | buf[len++] = 0xC0 | (rune >> 6); |
2850 | buf[len++] = 0x80 | (rune & 0x3F); |
2851 | } else if (rune <= 0x0000FFFF) { |
2852 | // 1110xxxx 10xxxxxx 10xxxxxx |
2853 | buf[len++] = 0xE0 | (rune >> 12); |
2854 | buf[len++] = 0x80 | ((rune >> 6) & 0x3F); |
2855 | buf[len++] = 0x80 | (rune & 0x3F); |
2856 | |
2857 | } else { |
2858 | // 0x00010000 - 0x10FFFF |
2859 | // 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx |
2860 | buf[len++] = 0xF0 | (rune >> 18); |
2861 | buf[len++] = 0x80 | ((rune >> 12) & 0x3F); |
2862 | buf[len++] = 0x80 | ((rune >> 6) & 0x3F); |
2863 | buf[len++] = 0x80 | (rune & 0x3F); |
2864 | } |
2865 | |
2866 | *outlen = len; |
2867 | buf[len++] = '\0'; |
2868 | |
2869 | return buf; |
2870 | } |