Bug Summary

File:src/usr.bin/awk/run.c
Warning:line 124, column 3
Use of memory after it is freed

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple amd64-unknown-openbsd7.4 -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name run.c -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -mrelocation-model pic -pic-level 1 -pic-is-pie -mframe-pointer=all -relaxed-aliasing -ffp-contract=on -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -target-feature +retpoline-indirect-calls -target-feature +retpoline-indirect-branches -tune-cpu generic -debugger-tuning=gdb -fcoverage-compilation-dir=/usr/src/usr.bin/awk/obj -resource-dir /usr/local/llvm16/lib/clang/16 -I . -I /usr/src/usr.bin/awk -D HAS_ISBLANK -D NDEBUG -internal-isystem /usr/local/llvm16/lib/clang/16/include -internal-externc-isystem /usr/include -O2 -fdebug-compilation-dir=/usr/src/usr.bin/awk/obj -ferror-limit 19 -fwrapv -D_RET_PROTECTOR -ret-protector -fcf-protection=branch -fno-jump-tables -fgnuc-version=4.2.1 -vectorize-loops -vectorize-slp -fno-builtin-malloc -fno-builtin-calloc -fno-builtin-realloc -fno-builtin-valloc -fno-builtin-free -fno-builtin-strdup -fno-builtin-strndup -analyzer-output=html -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /home/ben/Projects/scan/2024-01-11-140451-98009-1 -x c /usr/src/usr.bin/awk/run.c
1/* $OpenBSD: run.c,v 1.83 2023/11/28 20:54:38 millert Exp $ */
2/****************************************************************
3Copyright (C) Lucent Technologies 1997
4All Rights Reserved
5
6Permission to use, copy, modify, and distribute this software and
7its documentation for any purpose and without fee is hereby
8granted, provided that the above copyright notice appear in all
9copies and that both that the copyright notice and this
10permission notice and warranty disclaimer appear in supporting
11documentation, and that the name Lucent Technologies or any of
12its entities not be used in advertising or publicity pertaining
13to distribution of the software without specific, written prior
14permission.
15
16LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
17INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
18IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
19SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
20WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
21IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
22ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
23THIS SOFTWARE.
24****************************************************************/
25
26#define DEBUG
27#include <stdio.h>
28#include <ctype.h>
29#include <errno(*__errno()).h>
30#include <wctype.h>
31#include <fcntl.h>
32#include <setjmp.h>
33#include <limits.h>
34#include <math.h>
35#include <string.h>
36#include <stdlib.h>
37#include <time.h>
38#include <sys/types.h>
39#include <sys/wait.h>
40#include "awk.h"
41#include "awkgram.tab.h"
42
43
44static void stdinit(void);
45static void flush_all(void);
46static char *wide_char_to_byte_str(int rune, size_t *outlen);
47
48#if 1
49#define tempfree(x)do { if (((x)->csub == 4)) tfree(x); } while ( 0) do { if (istemp(x)((x)->csub == 4)) tfree(x); } while (/*CONSTCOND*/0)
50#else
51void tempfree(Cell *p)do { if (((Cell *p)->csub == 4)) tfree(Cell *p); } while (
0)
{
52 if (p->ctype == OCELL1 && (p->csub < CUNK0 || p->csub > CFREE7)) {
53 WARNING("bad csub %d in Cell %d %s",
54 p->csub, p->ctype, p->sval);
55 }
56 if (istemp(p)((p)->csub == 4))
57 tfree(p);
58}
59#endif
60
61/* do we really need these? */
62/* #ifdef _NFILE */
63/* #ifndef FOPEN_MAX */
64/* #define FOPEN_MAX _NFILE */
65/* #endif */
66/* #endif */
67/* */
68/* #ifndef FOPEN_MAX */
69/* #define FOPEN_MAX 40 */ /* max number of open files */
70/* #endif */
71/* */
72/* #ifndef RAND_MAX */
73/* #define RAND_MAX 32767 */ /* all that ansi guarantees */
74/* #endif */
75
76jmp_buf env;
77extern int pairstack[];
78extern Awkfloat srand_seed;
79
80Node *winner = NULL((void *)0); /* root of parse tree */
81Cell *tmps; /* free temporary cells for execution */
82
83static Cell truecell ={ OBOOL2, BTRUE11, 0, 0, 1.0, NUM01, NULL((void *)0), NULL((void *)0) };
84Cell *True = &truecell;
85static Cell falsecell ={ OBOOL2, BFALSE12, 0, 0, 0.0, NUM01, NULL((void *)0), NULL((void *)0) };
86Cell *False = &falsecell;
87static Cell breakcell ={ OJUMP3, JBREAK23, 0, 0, 0.0, NUM01, NULL((void *)0), NULL((void *)0) };
88Cell *jbreak = &breakcell;
89static Cell contcell ={ OJUMP3, JCONT24, 0, 0, 0.0, NUM01, NULL((void *)0), NULL((void *)0) };
90Cell *jcont = &contcell;
91static Cell nextcell ={ OJUMP3, JNEXT22, 0, 0, 0.0, NUM01, NULL((void *)0), NULL((void *)0) };
92Cell *jnext = &nextcell;
93static Cell nextfilecell ={ OJUMP3, JNEXTFILE26, 0, 0, 0.0, NUM01, NULL((void *)0), NULL((void *)0) };
94Cell *jnextfile = &nextfilecell;
95static Cell exitcell ={ OJUMP3, JEXIT21, 0, 0, 0.0, NUM01, NULL((void *)0), NULL((void *)0) };
96Cell *jexit = &exitcell;
97static Cell retcell ={ OJUMP3, JRET25, 0, 0, 0.0, NUM01, NULL((void *)0), NULL((void *)0) };
98Cell *jret = &retcell;
99static Cell tempcell ={ OCELL1, CTEMP4, 0, EMPTY, 0.0, NUM01|STR02|DONTFREE04, NULL((void *)0), NULL((void *)0) };
100
101Node *curnode = NULL((void *)0); /* the node being executed, for debugging */
102
103/* buffer memory management */
104int adjbuf(char **pbuf, int *psiz, int minlen, int quantum, char **pbptr,
105 const char *whatrtn)
106/* pbuf: address of pointer to buffer being managed
107 * psiz: address of buffer size variable
108 * minlen: minimum length of buffer needed
109 * quantum: buffer size quantum
110 * pbptr: address of movable pointer into buffer, or 0 if none
111 * whatrtn: name of the calling routine if failure should cause fatal error
112 *
113 * return 0 for realloc failure, !=0 for success
114 */
115{
116 if (minlen > *psiz) {
23
Assuming the condition is true
24
Taking true branch
117 char *tbuf;
118 int rminlen = quantum ? minlen % quantum : 0;
25
Assuming 'quantum' is 0
26
'?' condition is false
119 int boff = pbptr
26.1
'pbptr' is non-null
? *pbptr - *pbuf : 0;
27
'?' condition is true
120 /* round up to next multiple of quantum */
121 if (rminlen
27.1
'rminlen' is 0
)
28
Taking false branch
122 minlen += quantum - rminlen;
123 tbuf = (char *) realloc(*pbuf, minlen);
29
Memory is released
124 DPRINTF("adjbuf %s: %d %d (pbuf=%p, tbuf=%p)\n", whatrtn, *psiz, minlen, (void*)*pbuf, (void*)tbuf)if (dbg) printf("adjbuf %s: %d %d (pbuf=%p, tbuf=%p)\n", whatrtn
, *psiz, minlen, (void*)*pbuf, (void*)tbuf)
;
30
Assuming 'dbg' is not equal to 0
31
Taking true branch
32
Use of memory after it is freed
125 if (tbuf == NULL((void *)0)) {
126 if (whatrtn)
127 FATAL("out of memory in %s", whatrtn);
128 return 0;
129 }
130 *pbuf = tbuf;
131 *psiz = minlen;
132 if (pbptr)
133 *pbptr = tbuf + boff;
134 }
135 return 1;
136}
137
138void run(Node *a) /* execution of parse tree starts here */
139{
140
141 stdinit();
142 execute(a);
143 closeall();
144}
145
146Cell *execute(Node *u) /* execute a node of the parse tree */
147{
148 Cell *(*proc)(Node **, int);
149 Cell *x;
150 Node *a;
151
152 if (u == NULL((void *)0))
153 return(True);
154 for (a = u; ; a = a->nnext) {
155 curnode = a;
156 if (isvalue(a)((a)->ntype == 1)) {
157 x = (Cell *) (a->narg[0]);
158 if (isfld(x)((x)->tval & 0100) && !donefld)
159 fldbld();
160 else if (isrec(x)((x)->tval & 0200) && !donerec)
161 recbld();
162 return(x);
163 }
164 if (notlegal(a->nobj)(a->nobj <= 257 || a->nobj >= 352 || proctab[a->
nobj-257] == nullproc)
) /* probably a Cell* but too risky to print */
165 FATAL("illegal statement");
166 proc = proctab[a->nobj-FIRSTTOKEN257];
167 x = (*proc)(a->narg, a->nobj);
168 if (isfld(x)((x)->tval & 0100) && !donefld)
169 fldbld();
170 else if (isrec(x)((x)->tval & 0200) && !donerec)
171 recbld();
172 if (isexpr(a)((a)->ntype == 3))
173 return(x);
174 if (isjump(x)((x)->ctype == 3))
175 return(x);
176 if (a->nnext == NULL((void *)0))
177 return(x);
178 tempfree(x)do { if (((x)->csub == 4)) tfree(x); } while ( 0);
179 }
180}
181
182
183Cell *program(Node **a, int n) /* execute an awk program */
184{ /* a[0] = BEGIN, a[1] = body, a[2] = END */
185 Cell *x;
186
187 if (setjmp(env) != 0)
188 goto ex;
189 if (a[0]) { /* BEGIN */
190 x = execute(a[0]);
191 if (isexit(x)((x)->csub == 21))
192 return(True);
193 if (isjump(x)((x)->ctype == 3))
194 FATAL("illegal break, continue, next or nextfile from BEGIN");
195 tempfree(x)do { if (((x)->csub == 4)) tfree(x); } while ( 0);
196 }
197 if (a[1] || a[2])
198 while (getrec(&record, &recsize, true1) > 0) {
199 x = execute(a[1]);
200 if (isexit(x)((x)->csub == 21))
201 break;
202 tempfree(x)do { if (((x)->csub == 4)) tfree(x); } while ( 0);
203 }
204 ex:
205 if (setjmp(env) != 0) /* handles exit within END */
206 goto ex1;
207 if (a[2]) { /* END */
208 x = execute(a[2]);
209 if (isbreak(x)((x)->csub == 23) || isnext(x)((x)->csub == 22 || (x)->csub == 26) || iscont(x)((x)->csub == 24))
210 FATAL("illegal break, continue, next or nextfile from END");
211 tempfree(x)do { if (((x)->csub == 4)) tfree(x); } while ( 0);
212 }
213 ex1:
214 return(True);
215}
216
217struct Frame { /* stack frame for awk function calls */
218 int nargs; /* number of arguments in this call */
219 Cell *fcncell; /* pointer to Cell for function */
220 Cell **args; /* pointer to array of arguments after execute */
221 Cell *retval; /* return value */
222};
223
224#define NARGS50 50 /* max args in a call */
225
226struct Frame *frame = NULL((void *)0); /* base of stack frames; dynamically allocated */
227int nframe = 0; /* number of frames allocated */
228struct Frame *frp = NULL((void *)0); /* frame pointer. bottom level unused */
229
230Cell *call(Node **a, int n) /* function call. very kludgy and fragile */
231{
232 static const Cell newcopycell = { OCELL1, CCOPY6, 0, EMPTY, 0.0, NUM01|STR02|DONTFREE04, NULL((void *)0), NULL((void *)0) };
233 int i, ncall, ndef;
234 int freed = 0; /* handles potential double freeing when fcn & param share a tempcell */
235 Node *x;
236 Cell *args[NARGS50], *oargs[NARGS50]; /* BUG: fixed size arrays */
237 Cell *y, *z, *fcn;
238 char *s;
239
240 fcn = execute(a[0]); /* the function itself */
241 s = fcn->nval;
242 if (!isfcn(fcn)((fcn)->tval & 040))
243 FATAL("calling undefined function %s", s);
244 if (frame == NULL((void *)0)) {
245 frp = frame = (struct Frame *) calloc(nframe += 100, sizeof(*frame));
246 if (frame == NULL((void *)0))
247 FATAL("out of space for stack frames calling %s", s);
248 }
249 for (ncall = 0, x = a[1]; x != NULL((void *)0); x = x->nnext) /* args in call */
250 ncall++;
251 ndef = (int) fcn->fval; /* args in defn */
252 DPRINTF("calling %s, %d args (%d in defn), frp=%d\n", s, ncall, ndef, (int) (frp-frame))if (dbg) printf("calling %s, %d args (%d in defn), frp=%d\n",
s, ncall, ndef, (int) (frp-frame))
;
253 if (ncall > ndef)
254 WARNING("function %s called with %d args, uses only %d",
255 s, ncall, ndef);
256 if (ncall + ndef > NARGS50)
257 FATAL("function %s has %d arguments, limit %d", s, ncall+ndef, NARGS50);
258 for (i = 0, x = a[1]; x != NULL((void *)0); i++, x = x->nnext) { /* get call args */
259 DPRINTF("evaluate args[%d], frp=%d:\n", i, (int) (frp-frame))if (dbg) printf("evaluate args[%d], frp=%d:\n", i, (int) (frp
-frame))
;
260 y = execute(x);
261 oargs[i] = y;
262 DPRINTF("args[%d]: %s %f <%s>, t=%o\n",if (dbg) printf("args[%d]: %s %f <%s>, t=%o\n", i, ((y->
nval) ? (y->nval) : "(null)"), y->fval, ((y)->tval &
020) ? "(array)" : ((y->sval) ? (y->sval) : "(null)"),
y->tval)
263 i, NN(y->nval), y->fval, isarr(y) ? "(array)" : NN(y->sval), y->tval)if (dbg) printf("args[%d]: %s %f <%s>, t=%o\n", i, ((y->
nval) ? (y->nval) : "(null)"), y->fval, ((y)->tval &
020) ? "(array)" : ((y->sval) ? (y->sval) : "(null)"),
y->tval)
;
264 if (isfcn(y)((y)->tval & 040))
265 FATAL("can't use function %s as argument in %s", y->nval, s);
266 if (isarr(y)((y)->tval & 020))
267 args[i] = y; /* arrays by ref */
268 else
269 args[i] = copycell(y);
270 tempfree(y)do { if (((y)->csub == 4)) tfree(y); } while ( 0);
271 }
272 for ( ; i < ndef; i++) { /* add null args for ones not provided */
273 args[i] = gettemp();
274 *args[i] = newcopycell;
275 }
276 frp++; /* now ok to up frame */
277 if (frp >= frame + nframe) {
278 int dfp = frp - frame; /* old index */
279 frame = (struct Frame *) reallocarray(frame, (nframe += 100), sizeof(*frame));
280 if (frame == NULL((void *)0))
281 FATAL("out of space for stack frames in %s", s);
282 frp = frame + dfp;
283 }
284 frp->fcncell = fcn;
285 frp->args = args;
286 frp->nargs = ndef; /* number defined with (excess are locals) */
287 frp->retval = gettemp();
288
289 DPRINTF("start exec of %s, frp=%d\n", s, (int) (frp-frame))if (dbg) printf("start exec of %s, frp=%d\n", s, (int) (frp-frame
))
;
290 y = execute((Node *)(fcn->sval)); /* execute body */
291 DPRINTF("finished exec of %s, frp=%d\n", s, (int) (frp-frame))if (dbg) printf("finished exec of %s, frp=%d\n", s, (int) (frp
-frame))
;
292
293 for (i = 0; i < ndef; i++) {
294 Cell *t = frp->args[i];
295 if (isarr(t)((t)->tval & 020)) {
296 if (t->csub == CCOPY6) {
297 if (i >= ncall) {
298 freesymtab(t);
299 t->csub = CTEMP4;
300 tempfree(t)do { if (((t)->csub == 4)) tfree(t); } while ( 0);
301 } else {
302 oargs[i]->tval = t->tval;
303 oargs[i]->tval &= ~(STR02|NUM01|DONTFREE04);
304 oargs[i]->sval = t->sval;
305 tempfree(t)do { if (((t)->csub == 4)) tfree(t); } while ( 0);
306 }
307 }
308 } else if (t != y) { /* kludge to prevent freeing twice */
309 t->csub = CTEMP4;
310 tempfree(t)do { if (((t)->csub == 4)) tfree(t); } while ( 0);
311 } else if (t == y && t->csub == CCOPY6) {
312 t->csub = CTEMP4;
313 tempfree(t)do { if (((t)->csub == 4)) tfree(t); } while ( 0);
314 freed = 1;
315 }
316 }
317 tempfree(fcn)do { if (((fcn)->csub == 4)) tfree(fcn); } while ( 0);
318 if (isexit(y)((y)->csub == 21) || isnext(y)((y)->csub == 22 || (y)->csub == 26))
319 return y;
320 if (freed == 0) {
321 tempfree(y)do { if (((y)->csub == 4)) tfree(y); } while ( 0); /* don't free twice! */
322 }
323 z = frp->retval; /* return value */
324 DPRINTF("%s returns %g |%s| %o\n", s, getfval(z), getsval(z), z->tval)if (dbg) printf("%s returns %g |%s| %o\n", s, getfval(z), getsval
(z), z->tval)
;
325 frp--;
326 return(z);
327}
328
329Cell *copycell(Cell *x) /* make a copy of a cell in a temp */
330{
331 Cell *y;
332
333 /* copy is not constant or field */
334
335 y = gettemp();
336 y->tval = x->tval & ~(CON010|FLD0100|REC0200);
337 y->csub = CCOPY6; /* prevents freeing until call is over */
338 y->nval = x->nval; /* BUG? */
339 if (isstr(x)((x)->tval & 02) /* || x->ctype == OCELL */) {
340 y->sval = tostring(x->sval);
341 y->tval &= ~DONTFREE04;
342 } else
343 y->tval |= DONTFREE04;
344 y->fval = x->fval;
345 return y;
346}
347
348Cell *arg(Node **a, int n) /* nth argument of a function */
349{
350
351 n = ptoi(a[0]); /* argument number, counting from 0 */
352 DPRINTF("arg(%d), frp->nargs=%d\n", n, frp->nargs)if (dbg) printf("arg(%d), frp->nargs=%d\n", n, frp->nargs
)
;
353 if (n+1 > frp->nargs)
354 FATAL("argument #%d of function %s was not supplied",
355 n+1, frp->fcncell->nval);
356 return frp->args[n];
357}
358
359Cell *jump(Node **a, int n) /* break, continue, next, nextfile, return */
360{
361 Cell *y;
362
363 switch (n) {
364 case EXIT297:
365 if (a[0] != NULL((void *)0)) {
366 y = execute(a[0]);
367 errorflag = (int) getfval(y);
368 tempfree(y)do { if (((y)->csub == 4)) tfree(y); } while ( 0);
369 }
370 longjmp(env, 1);
371 case RETURN340:
372 if (a[0] != NULL((void *)0)) {
373 y = execute(a[0]);
374 if ((y->tval & (STR02|NUM01)) == (STR02|NUM01)) {
375 setsval(frp->retval, getsval(y));
376 frp->retval->fval = getfval(y);
377 frp->retval->tval |= NUM01;
378 }
379 else if (y->tval & STR02)
380 setsval(frp->retval, getsval(y));
381 else if (y->tval & NUM01)
382 setfval(frp->retval, getfval(y));
383 else /* can't happen */
384 FATAL("bad type variable %d", y->tval);
385 tempfree(y)do { if (((y)->csub == 4)) tfree(y); } while ( 0);
386 }
387 return(jret);
388 case NEXT307:
389 return(jnext);
390 case NEXTFILE308:
391 nextfile();
392 return(jnextfile);
393 case BREAK292:
394 return(jbreak);
395 case CONTINUE294:
396 return(jcont);
397 default: /* can't happen */
398 FATAL("illegal jump type %d", n);
399 }
400 return 0; /* not reached */
401}
402
403Cell *awkgetline(Node **a, int n) /* get next line from specific input */
404{ /* a[0] is variable, a[1] is operator, a[2] is filename */
405 Cell *r, *x;
406 extern Cell **fldtab;
407 FILE *fp;
408 char *buf;
409 int bufsize = recsize;
410 int mode;
411 bool_Bool newflag;
412 double result;
413
414 if ((buf = (char *) malloc(bufsize)) == NULL((void *)0))
415 FATAL("out of memory in getline");
416
417 fflush(stdout(&__sF[1])); /* in case someone is waiting for a prompt */
418 r = gettemp();
419 if (a[1] != NULL((void *)0)) { /* getline < file */
420 x = execute(a[2]); /* filename */
421 mode = ptoi(a[1]);
422 if (mode == '|') /* input pipe */
423 mode = LE286; /* arbitrary flag */
424 fp = openfile(mode, getsval(x), &newflag);
425 tempfree(x)do { if (((x)->csub == 4)) tfree(x); } while ( 0);
426 if (fp == NULL((void *)0))
427 n = -1;
428 else
429 n = readrec(&buf, &bufsize, fp, newflag);
430 if (n <= 0) {
431 ;
432 } else if (a[0] != NULL((void *)0)) { /* getline var <file */
433 x = execute(a[0]);
434 setsval(x, buf);
435 if (is_number(x->sval, & result)is_valid_number(x->sval, 0, ((void *)0), & result)) {
436 x->fval = result;
437 x->tval |= NUM01;
438 }
439 tempfree(x)do { if (((x)->csub == 4)) tfree(x); } while ( 0);
440 } else { /* getline <file */
441 setsval(fldtab[0], buf);
442 if (is_number(fldtab[0]->sval, & result)is_valid_number(fldtab[0]->sval, 0, ((void *)0), & result
)
) {
443 fldtab[0]->fval = result;
444 fldtab[0]->tval |= NUM01;
445 }
446 }
447 } else { /* bare getline; use current input */
448 if (a[0] == NULL((void *)0)) /* getline */
449 n = getrec(&record, &recsize, true1);
450 else { /* getline var */
451 n = getrec(&buf, &bufsize, false0);
452 if (n > 0) {
453 x = execute(a[0]);
454 setsval(x, buf);
455 if (is_number(x->sval, & result)is_valid_number(x->sval, 0, ((void *)0), & result)) {
456 x->fval = result;
457 x->tval |= NUM01;
458 }
459 tempfree(x)do { if (((x)->csub == 4)) tfree(x); } while ( 0);
460 }
461 }
462 }
463 setfval(r, (Awkfloat) n);
464 free(buf);
465 return r;
466}
467
468Cell *getnf(Node **a, int n) /* get NF */
469{
470 if (!donefld)
471 fldbld();
472 return (Cell *) a[0];
473}
474
475static char *
476makearraystring(Node *p, const char *func)
477{
478 char *buf;
479 int bufsz = recsize;
480 size_t blen;
481
482 if ((buf = (char *) malloc(bufsz)) == NULL((void *)0)) {
483 FATAL("%s: out of memory", func);
484 }
485
486 blen = 0;
487 buf[blen] = '\0';
488
489 for (; p; p = p->nnext) {
490 Cell *x = execute(p); /* expr */
491 char *s = getsval(x);
492 size_t seplen = strlen(getsval(subseploc));
493 size_t nsub = p->nnext ? seplen : 0;
494 size_t slen = strlen(s);
495 size_t tlen = blen + slen + nsub;
496
497 if (!adjbuf(&buf, &bufsz, tlen + 1, recsize, 0, func)) {
498 FATAL("%s: out of memory %s[%s...]",
499 func, x->nval, buf);
500 }
501 memcpy(buf + blen, s, slen);
502 if (nsub) {
503 memcpy(buf + blen + slen, *SUBSEP, nsub);
504 }
505 buf[tlen] = '\0';
506 blen = tlen;
507 tempfree(x)do { if (((x)->csub == 4)) tfree(x); } while ( 0);
508 }
509 return buf;
510}
511
512Cell *array(Node **a, int n) /* a[0] is symtab, a[1] is list of subscripts */
513{
514 Cell *x, *z;
515 char *buf;
516
517 x = execute(a[0]); /* Cell* for symbol table */
518 buf = makearraystring(a[1], __func__);
519 if (!isarr(x)((x)->tval & 020)) {
520 DPRINTF("making %s into an array\n", NN(x->nval))if (dbg) printf("making %s into an array\n", ((x->nval) ? (
x->nval) : "(null)"))
;
521 if (freeable(x)( ((x)->tval & (02|04)) == 02 ))
522 xfree(x->sval){ free((void *)(intptr_t)(x->sval)); (x->sval) = ((void
*)0); }
;
523 x->tval &= ~(STR02|NUM01|DONTFREE04);
524 x->tval |= ARR020;
525 x->sval = (char *) makesymtab(NSYMTAB50);
526 }
527 z = setsymtab(buf, "", 0.0, STR02|NUM01, (Array *) x->sval);
528 z->ctype = OCELL1;
529 z->csub = CVAR2;
530 tempfree(x)do { if (((x)->csub == 4)) tfree(x); } while ( 0);
531 free(buf);
532 return(z);
533}
534
535Cell *awkdelete(Node **a, int n) /* a[0] is symtab, a[1] is list of subscripts */
536{
537 Cell *x;
538
539 x = execute(a[0]); /* Cell* for symbol table */
540 if (x == symtabloc) {
541 FATAL("cannot delete SYMTAB or its elements");
542 }
543 if (!isarr(x)((x)->tval & 020))
544 return True;
545 if (a[1] == NULL((void *)0)) { /* delete the elements, not the table */
546 freesymtab(x);
547 x->tval &= ~STR02;
548 x->tval |= ARR020;
549 x->sval = (char *) makesymtab(NSYMTAB50);
550 } else {
551 char *buf = makearraystring(a[1], __func__);
552 freeelem(x, buf);
553 free(buf);
554 }
555 tempfree(x)do { if (((x)->csub == 4)) tfree(x); } while ( 0);
556 return True;
557}
558
559Cell *intest(Node **a, int n) /* a[0] is index (list), a[1] is symtab */
560{
561 Cell *ap, *k;
562 char *buf;
563
564 ap = execute(a[1]); /* array name */
565 if (!isarr(ap)((ap)->tval & 020)) {
566 DPRINTF("making %s into an array\n", ap->nval)if (dbg) printf("making %s into an array\n", ap->nval);
567 if (freeable(ap)( ((ap)->tval & (02|04)) == 02 ))
568 xfree(ap->sval){ free((void *)(intptr_t)(ap->sval)); (ap->sval) = ((void
*)0); }
;
569 ap->tval &= ~(STR02|NUM01|DONTFREE04);
570 ap->tval |= ARR020;
571 ap->sval = (char *) makesymtab(NSYMTAB50);
572 }
573 buf = makearraystring(a[0], __func__);
574 k = lookup(buf, (Array *) ap->sval);
575 tempfree(ap)do { if (((ap)->csub == 4)) tfree(ap); } while ( 0);
576 free(buf);
577 if (k == NULL((void *)0))
578 return(False);
579 else
580 return(True);
581}
582
583
584/* ======== utf-8 code ========== */
585
586/*
587 * Awk strings can contain ascii, random 8-bit items (eg Latin-1),
588 * or utf-8. u8_isutf tests whether a string starts with a valid
589 * utf-8 sequence, and returns 0 if not (e.g., high bit set).
590 * u8_nextlen returns length of next valid sequence, which is
591 * 1 for ascii, 2..4 for utf-8, or 1 for high bit non-utf.
592 * u8_strlen returns length of string in valid utf-8 sequences
593 * and/or high-bit bytes. Conversion functions go between byte
594 * number and character number.
595 *
596 * In theory, this behaves the same as before for non-utf8 bytes.
597 *
598 * Limited checking! This is a potential security hole.
599 */
600
601/* is s the beginning of a valid utf-8 string? */
602/* return length 1..4 if yes, 0 if no */
603int u8_isutf(const char *s)
604{
605 int n, ret;
606 unsigned char c;
607
608 c = s[0];
609 if (c < 128 || awk_mb_cur_max == 1)
610 return 1; /* what if it's 0? */
611
612 n = strlen(s);
613 if (n >= 2 && ((c>>5) & 0x7) == 0x6 && (s[1] & 0xC0) == 0x80) {
614 ret = 2; /* 110xxxxx 10xxxxxx */
615 } else if (n >= 3 && ((c>>4) & 0xF) == 0xE && (s[1] & 0xC0) == 0x80
616 && (s[2] & 0xC0) == 0x80) {
617 ret = 3; /* 1110xxxx 10xxxxxx 10xxxxxx */
618 } else if (n >= 4 && ((c>>3) & 0x1F) == 0x1E && (s[1] & 0xC0) == 0x80
619 && (s[2] & 0xC0) == 0x80 && (s[3] & 0xC0) == 0x80) {
620 ret = 4; /* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
621 } else {
622 ret = 0;
623 }
624 return ret;
625}
626
627/* Convert (prefix of) utf8 string to utf-32 rune. */
628/* Sets *rune to the value, returns the length. */
629/* No error checking: watch out. */
630int u8_rune(int *rune, const char *s)
631{
632 int n, ret;
633 unsigned char c;
634
635 c = s[0];
636 if (c < 128 || awk_mb_cur_max == 1) {
637 *rune = c;
638 return 1;
639 }
640
641 n = strlen(s);
642 if (n >= 2 && ((c>>5) & 0x7) == 0x6 && (s[1] & 0xC0) == 0x80) {
643 *rune = ((c & 0x1F) << 6) | (s[1] & 0x3F); /* 110xxxxx 10xxxxxx */
644 ret = 2;
645 } else if (n >= 3 && ((c>>4) & 0xF) == 0xE && (s[1] & 0xC0) == 0x80
646 && (s[2] & 0xC0) == 0x80) {
647 *rune = ((c & 0xF) << 12) | ((s[1] & 0x3F) << 6) | (s[2] & 0x3F);
648 /* 1110xxxx 10xxxxxx 10xxxxxx */
649 ret = 3;
650 } else if (n >= 4 && ((c>>3) & 0x1F) == 0x1E && (s[1] & 0xC0) == 0x80
651 && (s[2] & 0xC0) == 0x80 && (s[3] & 0xC0) == 0x80) {
652 *rune = ((c & 0x7) << 18) | ((s[1] & 0x3F) << 12) | ((s[2] & 0x3F) << 6) | (s[3] & 0x3F);
653 /* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
654 ret = 4;
655 } else {
656 *rune = c;
657 ret = 1;
658 }
659 return ret; /* returns one byte if sequence doesn't look like utf */
660}
661
662/* return length of next sequence: 1 for ascii or random, 2..4 for valid utf8 */
663int u8_nextlen(const char *s)
664{
665 int len;
666
667 len = u8_isutf(s);
668 if (len == 0)
669 len = 1;
670 return len;
671}
672
673/* return number of utf characters or single non-utf bytes */
674int u8_strlen(const char *s)
675{
676 int i, len, n, totlen;
677 unsigned char c;
678
679 n = strlen(s);
680 totlen = 0;
681 for (i = 0; i < n; i += len) {
682 c = s[i];
683 if (c < 128 || awk_mb_cur_max == 1) {
684 len = 1;
685 } else {
686 len = u8_nextlen(&s[i]);
687 }
688 totlen++;
689 if (i > n)
690 FATAL("bad utf count [%s] n=%d i=%d\n", s, n, i);
691 }
692 return totlen;
693}
694
695/* convert utf-8 char number in a string to its byte offset */
696int u8_char2byte(const char *s, int charnum)
697{
698 int n;
699 int bytenum = 0;
700
701 while (charnum > 0) {
702 n = u8_nextlen(s);
703 s += n;
704 bytenum += n;
705 charnum--;
706 }
707 return bytenum;
708}
709
710/* convert byte offset in s to utf-8 char number that starts there */
711int u8_byte2char(const char *s, int bytenum)
712{
713 int i, len, b;
714 int charnum = 0; /* BUG: what origin? */
715 /* should be 0 to match start==0 which means no match */
716
717 b = strlen(s);
718 if (bytenum > b) {
719 return -1; /* ??? */
720 }
721 for (i = 0; i <= bytenum; i += len) {
722 len = u8_nextlen(s+i);
723 charnum++;
724 }
725 return charnum;
726}
727
728/* runetochar() adapted from rune.c in the Plan 9 distributione */
729
730enum
731{
732 Runeerror = 128, /* from somewhere else */
733 Runemax = 0x10FFFF,
734
735 Bit1 = 7,
736 Bitx = 6,
737 Bit2 = 5,
738 Bit3 = 4,
739 Bit4 = 3,
740 Bit5 = 2,
741
742 T1 = ((1<<(Bit1+1))-1) ^ 0xFF, /* 0000 0000 */
743 Tx = ((1<<(Bitx+1))-1) ^ 0xFF, /* 1000 0000 */
744 T2 = ((1<<(Bit2+1))-1) ^ 0xFF, /* 1100 0000 */
745 T3 = ((1<<(Bit3+1))-1) ^ 0xFF, /* 1110 0000 */
746 T4 = ((1<<(Bit4+1))-1) ^ 0xFF, /* 1111 0000 */
747 T5 = ((1<<(Bit5+1))-1) ^ 0xFF, /* 1111 1000 */
748
749 Rune1 = (1<<(Bit1+0*Bitx))-1, /* 0000 0000 0000 0000 0111 1111 */
750 Rune2 = (1<<(Bit2+1*Bitx))-1, /* 0000 0000 0000 0111 1111 1111 */
751 Rune3 = (1<<(Bit3+2*Bitx))-1, /* 0000 0000 1111 1111 1111 1111 */
752 Rune4 = (1<<(Bit4+3*Bitx))-1, /* 0011 1111 1111 1111 1111 1111 */
753
754 Maskx = (1<<Bitx)-1, /* 0011 1111 */
755 Testx = Maskx ^ 0xFF, /* 1100 0000 */
756
757};
758
759int runetochar(char *str, int c)
760{
761 /* one character sequence 00000-0007F => 00-7F */
762 if (c <= Rune1) {
763 str[0] = c;
764 return 1;
765 }
766
767 /* two character sequence 00080-007FF => T2 Tx */
768 if (c <= Rune2) {
769 str[0] = T2 | (c >> 1*Bitx);
770 str[1] = Tx | (c & Maskx);
771 return 2;
772 }
773
774 /* three character sequence 00800-0FFFF => T3 Tx Tx */
775 if (c > Runemax)
776 c = Runeerror;
777 if (c <= Rune3) {
778 str[0] = T3 | (c >> 2*Bitx);
779 str[1] = Tx | ((c >> 1*Bitx) & Maskx);
780 str[2] = Tx | (c & Maskx);
781 return 3;
782 }
783
784 /* four character sequence 010000-1FFFFF => T4 Tx Tx Tx */
785 str[0] = T4 | (c >> 3*Bitx);
786 str[1] = Tx | ((c >> 2*Bitx) & Maskx);
787 str[2] = Tx | ((c >> 1*Bitx) & Maskx);
788 str[3] = Tx | (c & Maskx);
789 return 4;
790}
791
792
793/* ========== end of utf8 code =========== */
794
795
796
797Cell *matchop(Node **a, int n) /* ~ and match() */
798{
799 Cell *x, *y;
800 char *s, *t;
801 int i;
802 int cstart, cpatlen, len;
803 fa *pfa;
804 int (*mf)(fa *, const char *) = match, mode = 0;
805
806 if (n == MATCHFCN306) {
807 mf = pmatch;
808 mode = 1;
809 }
810 x = execute(a[1]); /* a[1] = target text */
811 s = getsval(x);
812 if (a[0] == NULL((void *)0)) /* a[1] == 0: already-compiled reg expr */
813 i = (*mf)((fa *) a[2], s);
814 else {
815 y = execute(a[2]); /* a[2] = regular expr */
816 t = getsval(y);
817 pfa = makedfa(t, mode);
818 i = (*mf)(pfa, s);
819 tempfree(y)do { if (((y)->csub == 4)) tfree(y); } while ( 0);
820 }
821 tempfree(x)do { if (((x)->csub == 4)) tfree(x); } while ( 0);
822 if (n == MATCHFCN306) {
823 int start = patbeg - s + 1; /* origin 1 */
824 if (patlen < 0) {
825 start = 0; /* not found */
826 } else {
827 cstart = u8_byte2char(s, start-1);
828 cpatlen = 0;
829 for (i = 0; i < patlen; i += len) {
830 len = u8_nextlen(patbeg+i);
831 cpatlen++;
832 }
833
834 start = cstart;
835 patlen = cpatlen;
836 }
837
838 setfval(rstartloc, (Awkfloat) start);
839 setfval(rlengthloc, (Awkfloat) patlen);
840 x = gettemp();
841 x->tval = NUM01;
842 x->fval = start;
843 return x;
844 } else if ((n == MATCH265 && i == 1) || (n == NOTMATCH266 && i == 0))
845 return(True);
846 else
847 return(False);
848}
849
850
851Cell *boolop(Node **a, int n) /* a[0] || a[1], a[0] && a[1], !a[0] */
852{
853 Cell *x, *y;
854 int i;
855
856 x = execute(a[0]);
857 i = istrue(x)((x)->csub == 11);
858 tempfree(x)do { if (((x)->csub == 4)) tfree(x); } while ( 0);
859 switch (n) {
860 case BOR281:
861 if (i) return(True);
862 y = execute(a[1]);
863 i = istrue(y)((y)->csub == 11);
864 tempfree(y)do { if (((y)->csub == 4)) tfree(y); } while ( 0);
865 if (i) return(True);
866 else return(False);
867 case AND280:
868 if ( !i ) return(False);
869 y = execute(a[1]);
870 i = istrue(y)((y)->csub == 11);
871 tempfree(y)do { if (((y)->csub == 4)) tfree(y); } while ( 0);
872 if (i) return(True);
873 else return(False);
874 case NOT345:
875 if (i) return(False);
876 else return(True);
877 default: /* can't happen */
878 FATAL("unknown boolean operator %d", n);
879 }
880 return 0; /*NOTREACHED*/
881}
882
883Cell *relop(Node **a, int n) /* a[0 < a[1], etc. */
884{
885 int i;
886 Cell *x, *y;
887 Awkfloat j;
888 bool_Bool x_is_nan, y_is_nan;
889
890 x = execute(a[0]);
891 y = execute(a[1]);
892 x_is_nan = isnan(x->fval)((sizeof (x->fval) == sizeof (float)) ? __isnanf(x->fval
) : (sizeof (x->fval) == sizeof (double)) ? __isnan(x->
fval) : __isnanl(x->fval))
;
893 y_is_nan = isnan(y->fval)((sizeof (y->fval) == sizeof (float)) ? __isnanf(y->fval
) : (sizeof (y->fval) == sizeof (double)) ? __isnan(y->
fval) : __isnanl(y->fval))
;
894 if (x->tval&NUM01 && y->tval&NUM01) {
895 if ((x_is_nan || y_is_nan) && n != NE288)
896 return(False);
897 j = x->fval - y->fval;
898 i = j<0? -1: (j>0? 1: 0);
899 } else {
900 i = strcmp(getsval(x), getsval(y));
901 }
902 tempfree(x)do { if (((x)->csub == 4)) tfree(x); } while ( 0);
903 tempfree(y)do { if (((y)->csub == 4)) tfree(y); } while ( 0);
904 switch (n) {
905 case LT287: if (i<0) return(True);
906 else return(False);
907 case LE286: if (i<=0) return(True);
908 else return(False);
909 case NE288: if (x_is_nan && y_is_nan) return(True);
910 else if (i!=0) return(True);
911 else return(False);
912 case EQ283: if (i == 0) return(True);
913 else return(False);
914 case GE284: if (i>=0) return(True);
915 else return(False);
916 case GT285: if (i>0) return(True);
917 else return(False);
918 default: /* can't happen */
919 FATAL("unknown relational operator %d", n);
920 }
921 return 0; /*NOTREACHED*/
922}
923
924void tfree(Cell *a) /* free a tempcell */
925{
926 if (freeable(a)( ((a)->tval & (02|04)) == 02 )) {
927 DPRINTF("freeing %s %s %o\n", NN(a->nval), NN(a->sval), a->tval)if (dbg) printf("freeing %s %s %o\n", ((a->nval) ? (a->
nval) : "(null)"), ((a->sval) ? (a->sval) : "(null)"), a
->tval)
;
928 xfree(a->sval){ free((void *)(intptr_t)(a->sval)); (a->sval) = ((void
*)0); }
;
929 }
930 if (a == tmps)
931 FATAL("tempcell list is curdled");
932 a->cnext = tmps;
933 tmps = a;
934}
935
936Cell *gettemp(void) /* get a tempcell */
937{ int i;
938 Cell *x;
939
940 if (!tmps) {
941 tmps = (Cell *) calloc(100, sizeof(*tmps));
942 if (!tmps)
943 FATAL("out of space for temporaries");
944 for (i = 1; i < 100; i++)
945 tmps[i-1].cnext = &tmps[i];
946 tmps[i-1].cnext = NULL((void *)0);
947 }
948 x = tmps;
949 tmps = x->cnext;
950 *x = tempcell;
951 return(x);
952}
953
954Cell *indirect(Node **a, int n) /* $( a[0] ) */
955{
956 Awkfloat val;
957 Cell *x;
958 int m;
959 char *s;
960
961 x = execute(a[0]);
962 val = getfval(x); /* freebsd: defend against super large field numbers */
963 if ((Awkfloat)INT_MAX0x7fffffff < val)
964 FATAL("trying to access out of range field %s", x->nval);
965 m = (int) val;
966 if (m == 0 && !is_number(s = getsval(x), NULL)is_valid_number(s = getsval(x), 0, ((void *)0), ((void *)0))) /* suspicion! */
967 FATAL("illegal field $(%s), name \"%s\"", s, x->nval);
968 /* BUG: can x->nval ever be null??? */
969 tempfree(x)do { if (((x)->csub == 4)) tfree(x); } while ( 0);
970 x = fieldadr(m);
971 x->ctype = OCELL1; /* BUG? why are these needed? */
972 x->csub = CFLD1;
973 return(x);
974}
975
976Cell *substr(Node **a, int nnn) /* substr(a[0], a[1], a[2]) */
977{
978 int k, m, n;
979 int mb, nb;
980 char *s;
981 int temp;
982 Cell *x, *y, *z = NULL((void *)0);
983
984 x = execute(a[0]);
985 y = execute(a[1]);
986 if (a[2] != NULL((void *)0))
987 z = execute(a[2]);
988 s = getsval(x);
989 k = u8_strlen(s) + 1;
990 if (k <= 1) {
991 tempfree(x)do { if (((x)->csub == 4)) tfree(x); } while ( 0);
992 tempfree(y)do { if (((y)->csub == 4)) tfree(y); } while ( 0);
993 if (a[2] != NULL((void *)0)) {
994 tempfree(z)do { if (((z)->csub == 4)) tfree(z); } while ( 0);
995 }
996 x = gettemp();
997 setsval(x, "");
998 return(x);
999 }
1000 m = (int) getfval(y);
1001 if (m <= 0)
1002 m = 1;
1003 else if (m > k)
1004 m = k;
1005 tempfree(y)do { if (((y)->csub == 4)) tfree(y); } while ( 0);
1006 if (a[2] != NULL((void *)0)) {
1007 n = (int) getfval(z);
1008 tempfree(z)do { if (((z)->csub == 4)) tfree(z); } while ( 0);
1009 } else
1010 n = k - 1;
1011 if (n < 0)
1012 n = 0;
1013 else if (n > k - m)
1014 n = k - m;
1015 /* m is start, n is length from there */
1016 DPRINTF("substr: m=%d, n=%d, s=%s\n", m, n, s)if (dbg) printf("substr: m=%d, n=%d, s=%s\n", m, n, s);
1017 y = gettemp();
1018 mb = u8_char2byte(s, m-1); /* byte offset of start char in s */
1019 nb = u8_char2byte(s, m-1+n); /* byte offset of end+1 char in s */
1020
1021 temp = s[nb]; /* with thanks to John Linderman */
1022 s[nb] = '\0';
1023 setsval(y, s + mb);
1024 s[nb] = temp;
1025 tempfree(x)do { if (((x)->csub == 4)) tfree(x); } while ( 0);
1026 return(y);
1027}
1028
1029Cell *sindex(Node **a, int nnn) /* index(a[0], a[1]) */
1030{
1031 Cell *x, *y, *z;
1032 char *s1, *s2, *p1, *p2, *q;
1033 Awkfloat v = 0.0;
1034
1035 x = execute(a[0]);
1036 s1 = getsval(x);
1037 y = execute(a[1]);
1038 s2 = getsval(y);
1039
1040 z = gettemp();
1041 for (p1 = s1; *p1 != '\0'; p1++) {
1042 for (q = p1, p2 = s2; *p2 != '\0' && *q == *p2; q++, p2++)
1043 continue;
1044 if (*p2 == '\0') {
1045 /* v = (Awkfloat) (p1 - s1 + 1); origin 1 */
1046
1047 /* should be a function: used in match() as well */
1048 int i, len;
1049 v = 0;
1050 for (i = 0; i < p1-s1+1; i += len) {
1051 len = u8_nextlen(s1+i);
1052 v++;
1053 }
1054 break;
1055 }
1056 }
1057 tempfree(x)do { if (((x)->csub == 4)) tfree(x); } while ( 0);
1058 tempfree(y)do { if (((y)->csub == 4)) tfree(y); } while ( 0);
1059 setfval(z, v);
1060 return(z);
1061}
1062
1063int has_utf8(char *s) /* return 1 if s contains any utf-8 (2 bytes or more) character */
1064{
1065 int n;
1066
1067 for (n = 0; *s != 0; s += n) {
1068 n = u8_nextlen(s);
1069 if (n > 1)
1070 return 1;
1071 }
1072 return 0;
1073}
1074
1075#define MAXNUMSIZE50 50
1076
1077int format(char **pbuf, int *pbufsize, const char *s, Node *a) /* printf-like conversions */
1078{
1079 char *fmt;
1080 char *p, *t;
1081 const char *os;
1082 Cell *x;
1083 int flag = 0, n;
1084 int fmtwd; /* format width */
1085 int fmtsz = recsize;
1086 char *buf = *pbuf;
1087 int bufsize = *pbufsize;
1088#define FMTSZ(a)(fmtsz - ((a) - fmt)) (fmtsz - ((a) - fmt))
1089#define BUFSZ(a)(bufsize - ((a) - buf)) (bufsize - ((a) - buf))
1090
1091 static bool_Bool first = true1;
1092 static bool_Bool have_a_format = false0;
1093
1094 if (first) {
1095 char xbuf[100];
1096
1097 snprintf(xbuf, sizeof(xbuf), "%a", 42.0);
1098 have_a_format = (strcmp(xbuf, "0x1.5p+5") == 0);
1099 first = false0;
1100 }
1101
1102 os = s;
1103 p = buf;
1104 if ((fmt = (char *) malloc(fmtsz)) == NULL((void *)0))
1105 FATAL("out of memory in format()");
1106 while (*s) {
1107 adjbuf(&buf, &bufsize, MAXNUMSIZE50+1+p-buf, recsize, &p, "format1");
1108 if (*s != '%') {
1109 *p++ = *s++;
1110 continue;
1111 }
1112 if (*(s+1) == '%') {
1113 *p++ = '%';
1114 s += 2;
1115 continue;
1116 }
1117 fmtwd = atoi(s+1);
1118 if (fmtwd < 0)
1119 fmtwd = -fmtwd;
1120 adjbuf(&buf, &bufsize, fmtwd+1+p-buf, recsize, &p, "format2");
1121 for (t = fmt; (*t++ = *s) != '\0'; s++) {
1122 if (!adjbuf(&fmt, &fmtsz, MAXNUMSIZE50+1+t-fmt, recsize, &t, "format3"))
1123 FATAL("format item %.30s... ran format() out of memory", os);
1124 /* Ignore size specifiers */
1125 if (strchr("hjLlqtz", *s) != NULL((void *)0)) { /* the ansi panoply */
1126 t--;
1127 continue;
1128 }
1129 if (isalpha((uschar)*s))
1130 break;
1131 if (*s == '$') {
1132 FATAL("'$' not permitted in awk formats");
1133 }
1134 if (*s == '*') {
1135 if (a == NULL((void *)0)) {
1136 FATAL("not enough args in printf(%s)", os);
1137 }
1138 x = execute(a);
1139 a = a->nnext;
1140 snprintf(t - 1, FMTSZ(t - 1)(fmtsz - ((t - 1) - fmt)),
1141 "%d", fmtwd=(int) getfval(x));
1142 if (fmtwd < 0)
1143 fmtwd = -fmtwd;
1144 adjbuf(&buf, &bufsize, fmtwd+1+p-buf, recsize, &p, "format");
1145 t = fmt + strlen(fmt);
1146 tempfree(x)do { if (((x)->csub == 4)) tfree(x); } while ( 0);
1147 }
1148 }
1149 *t = '\0';
1150 if (fmtwd < 0)
1151 fmtwd = -fmtwd;
1152 adjbuf(&buf, &bufsize, fmtwd+1+p-buf, recsize, &p, "format4");
1153 switch (*s) {
1154 case 'a': case 'A':
1155 if (have_a_format)
1156 flag = *s;
1157 else
1158 flag = 'f';
1159 break;
1160 case 'f': case 'e': case 'g': case 'E': case 'G':
1161 flag = 'f';
1162 break;
1163 case 'd': case 'i': case 'o': case 'x': case 'X': case 'u':
1164 flag = (*s == 'd' || *s == 'i') ? 'd' : 'u';
1165 *(t-1) = 'j';
1166 *t = *s;
1167 *++t = '\0';
1168 break;
1169 case 's':
1170 flag = 's';
1171 break;
1172 case 'c':
1173 flag = 'c';
1174 break;
1175 default:
1176 WARNING("weird printf conversion %s", fmt);
1177 flag = '?';
1178 break;
1179 }
1180 if (a == NULL((void *)0))
1181 FATAL("not enough args in printf(%s)", os);
1182 x = execute(a);
1183 a = a->nnext;
1184 n = MAXNUMSIZE50;
1185 if (fmtwd > n)
1186 n = fmtwd;
1187 adjbuf(&buf, &bufsize, 1+n+p-buf, recsize, &p, "format5");
1188 switch (flag) {
1189 case '?':
1190 snprintf(p, BUFSZ(p)(bufsize - ((p) - buf)), "%s", fmt); /* unknown, so dump it too */
1191 t = getsval(x);
1192 n = strlen(t);
1193 if (fmtwd > n)
1194 n = fmtwd;
1195 adjbuf(&buf, &bufsize, 1+strlen(p)+n+p-buf, recsize, &p, "format6");
1196 p += strlen(p);
1197 snprintf(p, BUFSZ(p)(bufsize - ((p) - buf)), "%s", t);
1198 break;
1199 case 'a':
1200 case 'A':
1201 case 'f': snprintf(p, BUFSZ(p)(bufsize - ((p) - buf)), fmt, getfval(x)); break;
1202 case 'd': snprintf(p, BUFSZ(p)(bufsize - ((p) - buf)), fmt, (intmax_t) getfval(x)); break;
1203 case 'u': snprintf(p, BUFSZ(p)(bufsize - ((p) - buf)), fmt, (uintmax_t) getfval(x)); break;
1204
1205 case 's': {
1206 t = getsval(x);
1207 n = strlen(t);
1208 /* if simple format or no utf-8 in the string, sprintf works */
1209 if (!has_utf8(t) || strcmp(fmt,"%s") == 0) {
1210 if (fmtwd > n)
1211 n = fmtwd;
1212 if (!adjbuf(&buf, &bufsize, 1+n+p-buf, recsize, &p, "format7"))
1213 FATAL("huge string/format (%d chars) in printf %.30s..." \
1214 " ran format() out of memory", n, t);
1215 snprintf(p, BUFSZ(p)(bufsize - ((p) - buf)), fmt, t);
1216 break;
1217 }
1218
1219 /* get here if string has utf-8 chars and fmt is not plain %s */
1220 /* "%-w.ps", where -, w and .p are all optional */
1221 /* '0' before the w is a flag character */
1222 /* fmt points at % */
1223 int ljust = 0, wid = 0, prec = n, pad = 0;
1224 char *f = fmt+1;
1225 if (f[0] == '-') {
1226 ljust = 1;
1227 f++;
1228 }
1229 // flags '0' and '+' are recognized but skipped
1230 if (f[0] == '0') {
1231 f++;
1232 if (f[0] == '+')
1233 f++;
1234 }
1235 if (f[0] == '+') {
1236 f++;
1237 if (f[0] == '0')
1238 f++;
1239 }
1240 if (isdigit((uschar)f[0])) { /* there is a wid */
1241 wid = strtol(f, &f, 10);
1242 }
1243 if (f[0] == '.') { /* there is a .prec */
1244 prec = strtol(++f, &f, 10);
1245 }
1246 if (prec > u8_strlen(t))
1247 prec = u8_strlen(t);
1248 pad = wid>prec ? wid - prec : 0; // has to be >= 0
1249 int i, k, n;
1250
1251 if (ljust) { // print prec chars from t, then pad blanks
1252 n = u8_char2byte(t, prec);
1253 for (k = 0; k < n; k++) {
1254 //putchar(t[k]);
1255 *p++ = t[k];
1256 }
1257 for (i = 0; i < pad; i++) {
1258 //printf(" ");
1259 *p++ = ' ';
1260 }
1261 } else { // print pad blanks, then prec chars from t
1262 for (i = 0; i < pad; i++) {
1263 //printf(" ");
1264 *p++ = ' ';
1265 }
1266 n = u8_char2byte(t, prec);
1267 for (k = 0; k < n; k++) {
1268 //putchar(t[k]);
1269 *p++ = t[k];
1270 }
1271 }
1272 *p = 0;
1273 break;
1274 }
1275
1276 case 'c': {
1277 /*
1278 * If a numeric value is given, awk should just turn
1279 * it into a character and print it:
1280 * BEGIN { printf("%c\n", 65) }
1281 * prints "A".
1282 *
1283 * But what if the numeric value is > 128 and
1284 * represents a valid Unicode code point?!? We do
1285 * our best to convert it back into UTF-8. If we
1286 * can't, we output the encoding of the Unicode
1287 * "invalid character", 0xFFFD.
1288 */
1289 if (isnum(x)((x)->tval & 01)) {
1290 int charval = (int) getfval(x);
1291
1292 if (charval != 0) {
1293 if (charval < 128 || awk_mb_cur_max == 1)
1294 snprintf(p, BUFSZ(p)(bufsize - ((p) - buf)), fmt, charval);
1295 else {
1296 // possible unicode character
1297 size_t count;
1298 char *bs = wide_char_to_byte_str(charval, &count);
1299
1300 if (bs == NULL((void *)0)) { // invalid character
1301 // use unicode invalid character, 0xFFFD
1302 bs = "\357\277\275";
1303 count = 3;
1304 }
1305 t = bs;
1306 n = count;
1307 goto format_percent_c;
1308 }
1309 } else {
1310 *p++ = '\0'; /* explicit null byte */
1311 *p = '\0'; /* next output will start here */
1312 }
1313 break;
1314 }
1315 t = getsval(x);
1316 n = u8_nextlen(t);
1317 format_percent_c:
1318 if (n < 2) { /* not utf8 */
1319 snprintf(p, BUFSZ(p)(bufsize - ((p) - buf)), fmt, getsval(x)[0]);
1320 break;
1321 }
1322
1323 // utf8 character, almost same song and dance as for %s
1324 int ljust = 0, wid = 0, prec = n, pad = 0;
1325 char *f = fmt+1;
1326 if (f[0] == '-') {
1327 ljust = 1;
1328 f++;
1329 }
1330 // flags '0' and '+' are recognized but skipped
1331 if (f[0] == '0') {
1332 f++;
1333 if (f[0] == '+')
1334 f++;
1335 }
1336 if (f[0] == '+') {
1337 f++;
1338 if (f[0] == '0')
1339 f++;
1340 }
1341 if (isdigit((uschar)f[0])) { /* there is a wid */
1342 wid = strtol(f, &f, 10);
1343 }
1344 if (f[0] == '.') { /* there is a .prec */
1345 prec = strtol(++f, &f, 10);
1346 }
1347 if (prec > 1) // %c --> only one character
1348 prec = 1;
1349 pad = wid>prec ? wid - prec : 0; // has to be >= 0
1350 int i;
1351
1352 if (ljust) { // print one char from t, then pad blanks
1353 for (i = 0; i < n; i++)
1354 *p++ = t[i];
1355 for (i = 0; i < pad; i++) {
1356 //printf(" ");
1357 *p++ = ' ';
1358 }
1359 } else { // print pad blanks, then prec chars from t
1360 for (i = 0; i < pad; i++) {
1361 //printf(" ");
1362 *p++ = ' ';
1363 }
1364 for (i = 0; i < n; i++)
1365 *p++ = t[i];
1366 }
1367 *p = 0;
1368 break;
1369 }
1370 default:
1371 FATAL("can't happen: bad conversion %c in format()", flag);
1372 }
1373
1374 tempfree(x)do { if (((x)->csub == 4)) tfree(x); } while ( 0);
1375 p += strlen(p);
1376 s++;
1377 }
1378 *p = '\0';
1379 free(fmt);
1380 for ( ; a; a = a->nnext) { /* evaluate any remaining args */
1381 x = execute(a);
1382 tempfree(x)do { if (((x)->csub == 4)) tfree(x); } while ( 0);
1383 }
1384 *pbuf = buf;
1385 *pbufsize = bufsize;
1386 return p - buf;
1387}
1388
1389Cell *awksprintf(Node **a, int n) /* sprintf(a[0]) */
1390{
1391 Cell *x;
1392 Node *y;
1393 char *buf;
1394 int bufsz=3*recsize;
1395
1396 if ((buf = (char *) malloc(bufsz)) == NULL((void *)0))
1397 FATAL("out of memory in awksprintf");
1398 y = a[0]->nnext;
1399 x = execute(a[0]);
1400 if (format(&buf, &bufsz, getsval(x), y) == -1)
1401 FATAL("sprintf string %.30s... too long. can't happen.", buf);
1402 tempfree(x)do { if (((x)->csub == 4)) tfree(x); } while ( 0);
1403 x = gettemp();
1404 x->sval = buf;
1405 x->tval = STR02;
1406 return(x);
1407}
1408
1409Cell *awkprintf(Node **a, int n) /* printf */
1410{ /* a[0] is list of args, starting with format string */
1411 /* a[1] is redirection operator, a[2] is redirection file */
1412 FILE *fp;
1413 Cell *x;
1414 Node *y;
1415 char *buf;
1416 int len;
1417 int bufsz=3*recsize;
1418
1419 if ((buf = (char *) malloc(bufsz)) == NULL((void *)0))
1420 FATAL("out of memory in awkprintf");
1421 y = a[0]->nnext;
1422 x = execute(a[0]);
1423 if ((len = format(&buf, &bufsz, getsval(x), y)) == -1)
1424 FATAL("printf string %.30s... too long. can't happen.", buf);
1425 tempfree(x)do { if (((x)->csub == 4)) tfree(x); } while ( 0);
1426 if (a[1] == NULL((void *)0)) {
1427 /* fputs(buf, stdout); */
1428 fwrite(buf, len, 1, stdout(&__sF[1]));
1429 if (ferror(stdout)(!__isthreaded ? ((((&__sF[1]))->_flags & 0x0040) !=
0) : (ferror)((&__sF[1])))
)
1430 FATAL("write error on stdout");
1431 } else {
1432 fp = redirect(ptoi(a[1]), a[2]);
1433 /* fputs(buf, fp); */
1434 fwrite(buf, len, 1, fp);
1435 fflush(fp);
1436 if (ferror(fp)(!__isthreaded ? (((fp)->_flags & 0x0040) != 0) : (ferror
)(fp))
)
1437 FATAL("write error on %s", filename(fp));
1438 }
1439 free(buf);
1440 return(True);
1441}
1442
1443Cell *arith(Node **a, int n) /* a[0] + a[1], etc. also -a[0] */
1444{
1445 Awkfloat i, j = 0;
1446 double v;
1447 Cell *x, *y, *z;
1448
1449 x = execute(a[0]);
1450 i = getfval(x);
1451 tempfree(x)do { if (((x)->csub == 4)) tfree(x); } while ( 0);
1452 if (n != UMINUS346 && n != UPLUS347) {
1453 y = execute(a[1]);
1454 j = getfval(y);
1455 tempfree(y)do { if (((y)->csub == 4)) tfree(y); } while ( 0);
1456 }
1457 z = gettemp();
1458 switch (n) {
1459 case ADD309:
1460 i += j;
1461 break;
1462 case MINUS310:
1463 i -= j;
1464 break;
1465 case MULT311:
1466 i *= j;
1467 break;
1468 case DIVIDE312:
1469 if (j == 0)
1470 FATAL("division by zero");
1471 i /= j;
1472 break;
1473 case MOD313:
1474 if (j == 0)
1475 FATAL("division by zero in mod");
1476 modf(i/j, &v);
1477 i = i - j * v;
1478 break;
1479 case UMINUS346:
1480 i = -i;
1481 break;
1482 case UPLUS347: /* handled by getfval(), above */
1483 break;
1484 case POWER348:
1485 if (j >= 0 && modf(j, &v) == 0.0) /* pos integer exponent */
1486 i = ipow(i, (int) j);
1487 else {
1488 errno(*__errno()) = 0;
1489 i = errcheck(pow(i, j), "pow");
1490 }
1491 break;
1492 default: /* can't happen */
1493 FATAL("illegal arithmetic operator %d", n);
1494 }
1495 setfval(z, i);
1496 return(z);
1497}
1498
1499double ipow(double x, int n) /* x**n. ought to be done by pow, but isn't always */
1500{
1501 double v;
1502
1503 if (n <= 0)
1504 return 1;
1505 v = ipow(x, n/2);
1506 if (n % 2 == 0)
1507 return v * v;
1508 else
1509 return x * v * v;
1510}
1511
1512Cell *incrdecr(Node **a, int n) /* a[0]++, etc. */
1513{
1514 Cell *x, *z;
1515 int k;
1516 Awkfloat xf;
1517
1518 x = execute(a[0]);
1519 xf = getfval(x);
1520 k = (n == PREINCR329 || n == POSTINCR328) ? 1 : -1;
1521 if (n == PREINCR329 || n == PREDECR331) {
1522 setfval(x, xf + k);
1523 return(x);
1524 }
1525 z = gettemp();
1526 setfval(z, xf);
1527 setfval(x, xf + k);
1528 tempfree(x)do { if (((x)->csub == 4)) tfree(x); } while ( 0);
1529 return(z);
1530}
1531
1532Cell *assign(Node **a, int n) /* a[0] = a[1], a[0] += a[1], etc. */
1533{ /* this is subtle; don't muck with it. */
1534 Cell *x, *y;
1535 Awkfloat xf, yf;
1536 double v;
1537
1538 y = execute(a[1]);
1539 x = execute(a[0]);
1540 if (n == ASSIGN314) { /* ordinary assignment */
1541 if (x == y && !(x->tval & (FLD0100|REC0200)) && x != nfloc)
1542 ; /* self-assignment: leave alone unless it's a field or NF */
1543 else if ((y->tval & (STR02|NUM01)) == (STR02|NUM01)) {
1544 yf = getfval(y);
1545 setsval(x, getsval(y));
1546 x->fval = yf;
1547 x->tval |= NUM01;
1548 }
1549 else if (isstr(y)((y)->tval & 02))
1550 setsval(x, getsval(y));
1551 else if (isnum(y)((y)->tval & 01))
1552 setfval(x, getfval(y));
1553 else
1554 funnyvar(y, "read value of");
1555 tempfree(y)do { if (((y)->csub == 4)) tfree(y); } while ( 0);
1556 return(x);
1557 }
1558 xf = getfval(x);
1559 yf = getfval(y);
1560 switch (n) {
1561 case ADDEQ316:
1562 xf += yf;
1563 break;
1564 case SUBEQ317:
1565 xf -= yf;
1566 break;
1567 case MULTEQ318:
1568 xf *= yf;
1569 break;
1570 case DIVEQ319:
1571 if (yf == 0)
1572 FATAL("division by zero in /=");
1573 xf /= yf;
1574 break;
1575 case MODEQ320:
1576 if (yf == 0)
1577 FATAL("division by zero in %%=");
1578 modf(xf/yf, &v);
1579 xf = xf - yf * v;
1580 break;
1581 case POWEQ321:
1582 if (yf >= 0 && modf(yf, &v) == 0.0) /* pos integer exponent */
1583 xf = ipow(xf, (int) yf);
1584 else {
1585 errno(*__errno()) = 0;
1586 xf = errcheck(pow(xf, yf), "pow");
1587 }
1588 break;
1589 default:
1590 FATAL("illegal assignment operator %d", n);
1591 break;
1592 }
1593 tempfree(y)do { if (((y)->csub == 4)) tfree(y); } while ( 0);
1594 setfval(x, xf);
1595 return(x);
1596}
1597
1598Cell *cat(Node **a, int q) /* a[0] cat a[1] */
1599{
1600 Cell *x, *y, *z;
1601 int n1, n2;
1602 char *s = NULL((void *)0);
1603 int ssz = 0;
1604
1605 x = execute(a[0]);
1606 n1 = strlen(getsval(x));
1607 adjbuf(&s, &ssz, n1 + 1, recsize, 0, "cat1");
1608 memcpy(s, x->sval, n1);
1609
1610 tempfree(x)do { if (((x)->csub == 4)) tfree(x); } while ( 0);
1611
1612 y = execute(a[1]);
1613 n2 = strlen(getsval(y));
1614 adjbuf(&s, &ssz, n1 + n2 + 1, recsize, 0, "cat2");
1615 memcpy(s + n1, y->sval, n2);
1616 s[n1 + n2] = '\0';
1617
1618 tempfree(y)do { if (((y)->csub == 4)) tfree(y); } while ( 0);
1619
1620 z = gettemp();
1621 z->sval = s;
1622 z->tval = STR02;
1623
1624 return(z);
1625}
1626
1627Cell *pastat(Node **a, int n) /* a[0] { a[1] } */
1628{
1629 Cell *x;
1630
1631 if (a[0] == NULL((void *)0))
1632 x = execute(a[1]);
1633 else {
1634 x = execute(a[0]);
1635 if (istrue(x)((x)->csub == 11)) {
1636 tempfree(x)do { if (((x)->csub == 4)) tfree(x); } while ( 0);
1637 x = execute(a[1]);
1638 }
1639 }
1640 return x;
1641}
1642
1643Cell *dopa2(Node **a, int n) /* a[0], a[1] { a[2] } */
1644{
1645 Cell *x;
1646 int pair;
1647
1648 pair = ptoi(a[3]);
1649 if (pairstack[pair] == 0) {
1650 x = execute(a[0]);
1651 if (istrue(x)((x)->csub == 11))
1652 pairstack[pair] = 1;
1653 tempfree(x)do { if (((x)->csub == 4)) tfree(x); } while ( 0);
1654 }
1655 if (pairstack[pair] == 1) {
1656 x = execute(a[1]);
1657 if (istrue(x)((x)->csub == 11))
1658 pairstack[pair] = 0;
1659 tempfree(x)do { if (((x)->csub == 4)) tfree(x); } while ( 0);
1660 x = execute(a[2]);
1661 return(x);
1662 }
1663 return(False);
1664}
1665
1666Cell *split(Node **a, int nnn) /* split(a[0], a[1], a[2]); a[3] is type */
1667{
1668 Cell *x = NULL((void *)0), *y, *ap;
1669 const char *s, *origs, *t;
1670 const char *fs = NULL((void *)0);
1671 char *origfs = NULL((void *)0);
1672 int sep;
1673 char temp, num[50];
1674 int j, n, tempstat, arg3type;
1675 double result;
1676
1677 y = execute(a[0]); /* source string */
1678 origs = s = strdup(getsval(y));
1679 if (s == NULL((void *)0))
1680 FATAL("out of space in split");
1681 tempfree(y)do { if (((y)->csub == 4)) tfree(y); } while ( 0);
1682 arg3type = ptoi(a[3]);
1683 if (a[2] == NULL((void *)0)) { /* BUG: CSV should override implicit fs but not explicit */
1684 fs = getsval(fsloc);
1685 } else if (arg3type == STRING337) { /* split(str,arr,"string") */
1686 x = execute(a[2]);
1687 fs = origfs = strdup(getsval(x));
1688 if (fs == NULL((void *)0))
1689 FATAL("out of space in split");
1690 tempfree(x)do { if (((x)->csub == 4)) tfree(x); } while ( 0);
1691 } else if (arg3type == REGEXPR338) {
1692 fs = "(regexpr)"; /* split(str,arr,/regexpr/) */
1693 } else {
1694 FATAL("illegal type of split");
1695 }
1696 sep = *fs;
1697 ap = execute(a[1]); /* array name */
1698 /* BUG 7/26/22: this appears not to reset array: see C1/asplit */
1699 freesymtab(ap);
1700 DPRINTF("split: s=|%s|, a=%s, sep=|%s|\n", s, NN(ap->nval), fs)if (dbg) printf("split: s=|%s|, a=%s, sep=|%s|\n", s, ((ap->
nval) ? (ap->nval) : "(null)"), fs)
;
1701 ap->tval &= ~STR02;
1702 ap->tval |= ARR020;
1703 ap->sval = (char *) makesymtab(NSYMTAB50);
1704
1705 n = 0;
1706 if (arg3type == REGEXPR338 && strlen((char*)((fa*)a[2])->restr) == 0) {
1707 /* split(s, a, //); have to arrange that it looks like empty sep */
1708 arg3type = 0;
1709 fs = "";
1710 sep = 0;
1711 }
1712 if (*s != '\0' && (strlen(fs) > 1 || arg3type == REGEXPR338)) { /* reg expr */
1713 fa *pfa;
1714 if (arg3type == REGEXPR338) { /* it's ready already */
1715 pfa = (fa *) a[2];
1716 } else {
1717 pfa = makedfa(fs, 1);
1718 }
1719 if (nematch(pfa,s)) {
1720 tempstat = pfa->initstat;
1721 pfa->initstat = 2;
1722 do {
1723 n++;
1724 snprintf(num, sizeof(num), "%d", n);
1725 temp = *patbeg;
1726 setptr(patbeg, '\0')(*(char *)(intptr_t)(patbeg)) = ('\0');
1727 if (is_number(s, & result)is_valid_number(s, 0, ((void *)0), & result))
1728 setsymtab(num, s, result, STR02|NUM01, (Array *) ap->sval);
1729 else
1730 setsymtab(num, s, 0.0, STR02, (Array *) ap->sval);
1731 setptr(patbeg, temp)(*(char *)(intptr_t)(patbeg)) = (temp);
1732 s = patbeg + patlen;
1733 if (*(patbeg+patlen-1) == '\0' || *s == '\0') {
1734 n++;
1735 snprintf(num, sizeof(num), "%d", n);
1736 setsymtab(num, "", 0.0, STR02, (Array *) ap->sval);
1737 pfa->initstat = tempstat;
1738 goto spdone;
1739 }
1740 } while (nematch(pfa,s));
1741 pfa->initstat = tempstat; /* bwk: has to be here to reset */
1742 /* cf gsub and refldbld */
1743 }
1744 n++;
1745 snprintf(num, sizeof(num), "%d", n);
1746 if (is_number(s, & result)is_valid_number(s, 0, ((void *)0), & result))
1747 setsymtab(num, s, result, STR02|NUM01, (Array *) ap->sval);
1748 else
1749 setsymtab(num, s, 0.0, STR02, (Array *) ap->sval);
1750 spdone:
1751 pfa = NULL((void *)0);
1752
1753 } else if (a[2] == NULL((void *)0) && CSV) { /* CSV only if no explicit separator */
1754 char *newt = (char *) malloc(strlen(s)); /* for building new string; reuse for each field */
1755 for (;;) {
1756 char *fr = newt;
1757 n++;
1758 if (*s == '"' ) { /* start of "..." */
1759 for (s++ ; *s != '\0'; ) {
1760 if (*s == '"' && s[1] != '\0' && s[1] == '"') {
1761 s += 2; /* doubled quote */
1762 *fr++ = '"';
1763 } else if (*s == '"' && (s[1] == '\0' || s[1] == ',')) {
1764 s++; /* skip over closing quote */
1765 break;
1766 } else {
1767 *fr++ = *s++;
1768 }
1769 }
1770 *fr++ = 0;
1771 } else { /* unquoted field */
1772 while (*s != ',' && *s != '\0')
1773 *fr++ = *s++;
1774 *fr++ = 0;
1775 }
1776 snprintf(num, sizeof(num), "%d", n);
1777 if (is_number(newt, &result)is_valid_number(newt, 0, ((void *)0), &result))
1778 setsymtab(num, newt, result, STR02|NUM01, (Array *) ap->sval);
1779 else
1780 setsymtab(num, newt, 0.0, STR02, (Array *) ap->sval);
1781 if (*s++ == '\0')
1782 break;
1783 }
1784 free(newt);
1785
1786 } else if (!CSV && sep == ' ') { /* usual case: split on white space */
1787 for (n = 0; ; ) {
1788#define ISWS(c)((c) == ' ' || (c) == '\t' || (c) == '\n') ((c) == ' ' || (c) == '\t' || (c) == '\n')
1789 while (ISWS(*s)((*s) == ' ' || (*s) == '\t' || (*s) == '\n'))
1790 s++;
1791 if (*s == '\0')
1792 break;
1793 n++;
1794 t = s;
1795 do
1796 s++;
1797 while (*s != '\0' && !ISWS(*s)((*s) == ' ' || (*s) == '\t' || (*s) == '\n'));
1798 temp = *s;
1799 setptr(s, '\0')(*(char *)(intptr_t)(s)) = ('\0');
1800 snprintf(num, sizeof(num), "%d", n);
1801 if (is_number(t, & result)is_valid_number(t, 0, ((void *)0), & result))
1802 setsymtab(num, t, result, STR02|NUM01, (Array *) ap->sval);
1803 else
1804 setsymtab(num, t, 0.0, STR02, (Array *) ap->sval);
1805 setptr(s, temp)(*(char *)(intptr_t)(s)) = (temp);
1806 if (*s != '\0')
1807 s++;
1808 }
1809
1810 } else if (sep == 0) { /* new: split(s, a, "") => 1 char/elem */
1811 for (n = 0; *s != '\0'; s += u8_nextlen(s)) {
1812 char buf[10];
1813 n++;
1814 snprintf(num, sizeof(num), "%d", n);
1815
1816 for (j = 0; j < u8_nextlen(s); j++) {
1817 buf[j] = s[j];
1818 }
1819 buf[j] = '\0';
1820
1821 if (isdigit((uschar)buf[0]))
1822 setsymtab(num, buf, atof(buf), STR02|NUM01, (Array *) ap->sval);
1823 else
1824 setsymtab(num, buf, 0.0, STR02, (Array *) ap->sval);
1825 }
1826
1827 } else if (*s != '\0') { /* some random single character */
1828 for (;;) {
1829 n++;
1830 t = s;
1831 while (*s != sep && *s != '\n' && *s != '\0')
1832 s++;
1833 temp = *s;
1834 setptr(s, '\0')(*(char *)(intptr_t)(s)) = ('\0');
1835 snprintf(num, sizeof(num), "%d", n);
1836 if (is_number(t, & result)is_valid_number(t, 0, ((void *)0), & result))
1837 setsymtab(num, t, result, STR02|NUM01, (Array *) ap->sval);
1838 else
1839 setsymtab(num, t, 0.0, STR02, (Array *) ap->sval);
1840 setptr(s, temp)(*(char *)(intptr_t)(s)) = (temp);
1841 if (*s++ == '\0')
1842 break;
1843 }
1844 }
1845 tempfree(ap)do { if (((ap)->csub == 4)) tfree(ap); } while ( 0);
1846 xfree(origs){ free((void *)(intptr_t)(origs)); (origs) = ((void *)0); };
1847 xfree(origfs){ free((void *)(intptr_t)(origfs)); (origfs) = ((void *)0); };
1848 x = gettemp();
1849 x->tval = NUM01;
1850 x->fval = n;
1851 return(x);
1852}
1853
1854Cell *condexpr(Node **a, int n) /* a[0] ? a[1] : a[2] */
1855{
1856 Cell *x;
1857
1858 x = execute(a[0]);
1859 if (istrue(x)((x)->csub == 11)) {
1860 tempfree(x)do { if (((x)->csub == 4)) tfree(x); } while ( 0);
1861 x = execute(a[1]);
1862 } else {
1863 tempfree(x)do { if (((x)->csub == 4)) tfree(x); } while ( 0);
1864 x = execute(a[2]);
1865 }
1866 return(x);
1867}
1868
1869Cell *ifstat(Node **a, int n) /* if (a[0]) a[1]; else a[2] */
1870{
1871 Cell *x;
1872
1873 x = execute(a[0]);
1874 if (istrue(x)((x)->csub == 11)) {
1875 tempfree(x)do { if (((x)->csub == 4)) tfree(x); } while ( 0);
1876 x = execute(a[1]);
1877 } else if (a[2] != NULL((void *)0)) {
1878 tempfree(x)do { if (((x)->csub == 4)) tfree(x); } while ( 0);
1879 x = execute(a[2]);
1880 }
1881 return(x);
1882}
1883
1884Cell *whilestat(Node **a, int n) /* while (a[0]) a[1] */
1885{
1886 Cell *x;
1887
1888 for (;;) {
1889 x = execute(a[0]);
1890 if (!istrue(x)((x)->csub == 11))
1891 return(x);
1892 tempfree(x)do { if (((x)->csub == 4)) tfree(x); } while ( 0);
1893 x = execute(a[1]);
1894 if (isbreak(x)((x)->csub == 23)) {
1895 x = True;
1896 return(x);
1897 }
1898 if (isnext(x)((x)->csub == 22 || (x)->csub == 26) || isexit(x)((x)->csub == 21) || isret(x)((x)->csub == 25))
1899 return(x);
1900 tempfree(x)do { if (((x)->csub == 4)) tfree(x); } while ( 0);
1901 }
1902}
1903
1904Cell *dostat(Node **a, int n) /* do a[0]; while(a[1]) */
1905{
1906 Cell *x;
1907
1908 for (;;) {
1909 x = execute(a[0]);
1910 if (isbreak(x)((x)->csub == 23))
1911 return True;
1912 if (isnext(x)((x)->csub == 22 || (x)->csub == 26) || isexit(x)((x)->csub == 21) || isret(x)((x)->csub == 25))
1913 return(x);
1914 tempfree(x)do { if (((x)->csub == 4)) tfree(x); } while ( 0);
1915 x = execute(a[1]);
1916 if (!istrue(x)((x)->csub == 11))
1917 return(x);
1918 tempfree(x)do { if (((x)->csub == 4)) tfree(x); } while ( 0);
1919 }
1920}
1921
1922Cell *forstat(Node **a, int n) /* for (a[0]; a[1]; a[2]) a[3] */
1923{
1924 Cell *x;
1925
1926 x = execute(a[0]);
1927 tempfree(x)do { if (((x)->csub == 4)) tfree(x); } while ( 0);
1928 for (;;) {
1929 if (a[1]!=NULL((void *)0)) {
1930 x = execute(a[1]);
1931 if (!istrue(x)((x)->csub == 11)) return(x);
1932 else tempfree(x)do { if (((x)->csub == 4)) tfree(x); } while ( 0);
1933 }
1934 x = execute(a[3]);
1935 if (isbreak(x)((x)->csub == 23)) /* turn off break */
1936 return True;
1937 if (isnext(x)((x)->csub == 22 || (x)->csub == 26) || isexit(x)((x)->csub == 21) || isret(x)((x)->csub == 25))
1938 return(x);
1939 tempfree(x)do { if (((x)->csub == 4)) tfree(x); } while ( 0);
1940 x = execute(a[2]);
1941 tempfree(x)do { if (((x)->csub == 4)) tfree(x); } while ( 0);
1942 }
1943}
1944
1945Cell *instat(Node **a, int n) /* for (a[0] in a[1]) a[2] */
1946{
1947 Cell *x, *vp, *arrayp, *cp, *ncp;
1948 Array *tp;
1949 int i;
1950
1951 vp = execute(a[0]);
1952 arrayp = execute(a[1]);
1953 if (!isarr(arrayp)((arrayp)->tval & 020)) {
1954 return True;
1955 }
1956 tp = (Array *) arrayp->sval;
1957 tempfree(arrayp)do { if (((arrayp)->csub == 4)) tfree(arrayp); } while ( 0
)
;
1958 for (i = 0; i < tp->size; i++) { /* this routine knows too much */
1959 for (cp = tp->tab[i]; cp != NULL((void *)0); cp = ncp) {
1960 setsval(vp, cp->nval);
1961 ncp = cp->cnext;
1962 x = execute(a[2]);
1963 if (isbreak(x)((x)->csub == 23)) {
1964 tempfree(vp)do { if (((vp)->csub == 4)) tfree(vp); } while ( 0);
1965 return True;
1966 }
1967 if (isnext(x)((x)->csub == 22 || (x)->csub == 26) || isexit(x)((x)->csub == 21) || isret(x)((x)->csub == 25)) {
1968 tempfree(vp)do { if (((vp)->csub == 4)) tfree(vp); } while ( 0);
1969 return(x);
1970 }
1971 tempfree(x)do { if (((x)->csub == 4)) tfree(x); } while ( 0);
1972 }
1973 }
1974 return True;
1975}
1976
1977static char *nawk_convert(const char *s, int (*fun_c)(int),
1978 wint_t (*fun_wc)(wint_t))
1979{
1980 char *buf = NULL((void *)0);
1981 char *pbuf = NULL((void *)0);
1982 const char *ps = NULL((void *)0);
1983 size_t n = 0;
1984 wchar_t wc;
1985 const size_t sz = awk_mb_cur_max;
1986 int unused;
1987
1988 if (sz == 1) {
1989 buf = tostring(s);
1990
1991 for (pbuf = buf; *pbuf; pbuf++)
1992 *pbuf = fun_c((uschar)*pbuf);
1993
1994 return buf;
1995 } else {
1996 /* upper/lower character may be shorter/longer */
1997 buf = tostringN(s, strlen(s) * sz + 1);
1998
1999 (void) mbtowc(NULL((void *)0), NULL((void *)0), 0); /* reset internal state */
2000 /*
2001 * Reset internal state here too.
2002 * Assign result to avoid a compiler warning. (Casting to void
2003 * doesn't work.)
2004 * Increment said variable to avoid a different warning.
2005 */
2006 unused = wctomb(NULL((void *)0), L'\0');
2007 unused++;
2008
2009 ps = s;
2010 pbuf = buf;
2011 while (n = mbtowc(&wc, ps, sz),
2012 n > 0 && n != (size_t)-1 && n != (size_t)-2)
2013 {
2014 ps += n;
2015
2016 n = wctomb(pbuf, fun_wc(wc));
2017 if (n == (size_t)-1)
2018 FATAL("illegal wide character %s", s);
2019
2020 pbuf += n;
2021 }
2022
2023 *pbuf = '\0';
2024
2025 if (n)
2026 FATAL("illegal byte sequence %s", s);
2027
2028 return buf;
2029 }
2030}
2031
2032#ifdef __DJGPP__
2033static wint_t towupper(wint_t wc)
2034{
2035 if (wc >= 0 && wc < 256)
2036 return toupper(wc & 0xFF);
2037
2038 return wc;
2039}
2040
2041static wint_t towlower(wint_t wc)
2042{
2043 if (wc >= 0 && wc < 256)
2044 return tolower(wc & 0xFF);
2045
2046 return wc;
2047}
2048#endif
2049
2050static char *nawk_toupper(const char *s)
2051{
2052 return nawk_convert(s, toupper, towupper);
2053}
2054
2055static char *nawk_tolower(const char *s)
2056{
2057 return nawk_convert(s, tolower, towlower);
2058}
2059
2060Cell *bltin(Node **a, int n) /* builtin functions. a[0] is type, a[1] is arg list */
2061{
2062 Cell *x, *y;
2063 Awkfloat u;
2064 int t, sz;
2065 Awkfloat tmp;
2066 char *buf, *fmt;
2067 Node *nextarg;
2068 FILE *fp;
2069 int status = 0;
2070 time_t tv;
2071 struct tm *tm, tmbuf;
2072 int estatus = 0;
2073
2074 t = ptoi(a[0]);
2075 x = execute(a[1]);
2076 nextarg = a[1]->nnext;
2077 switch (t) {
2078 case FLENGTH1:
2079 if (isarr(x)((x)->tval & 020))
2080 u = ((Array *) x->sval)->nelem; /* GROT. should be function*/
2081 else
2082 u = u8_strlen(getsval(x));
2083 break;
2084 case FLOG4:
2085 errno(*__errno()) = 0;
2086 u = errcheck(log(getfval(x)), "log");
2087 break;
2088 case FINT5:
2089 modf(getfval(x), &u); break;
2090 case FEXP3:
2091 errno(*__errno()) = 0;
2092 u = errcheck(exp(getfval(x)), "exp");
2093 break;
2094 case FSQRT2:
2095 errno(*__errno()) = 0;
2096 u = errcheck(sqrt(getfval(x)), "sqrt");
2097 break;
2098 case FSIN9:
2099 u = sin(getfval(x)); break;
2100 case FCOS10:
2101 u = cos(getfval(x)); break;
2102 case FATAN11:
2103 if (nextarg == NULL((void *)0)) {
2104 WARNING("atan2 requires two arguments; returning 1.0");
2105 u = 1.0;
2106 } else {
2107 y = execute(a[1]->nnext);
2108 u = atan2(getfval(x), getfval(y));
2109 tempfree(y)do { if (((y)->csub == 4)) tfree(y); } while ( 0);
2110 nextarg = nextarg->nnext;
2111 }
2112 break;
2113 case FCOMPL18:
2114 u = ~((int)getfval(x));
2115 break;
2116 case FAND15:
2117 if (nextarg == 0) {
2118 WARNING("and requires two arguments; returning 0");
2119 u = 0;
2120 break;
2121 }
2122 y = execute(a[1]->nnext);
2123 u = ((int)getfval(x)) & ((int)getfval(y));
2124 tempfree(y)do { if (((y)->csub == 4)) tfree(y); } while ( 0);
2125 nextarg = nextarg->nnext;
2126 break;
2127 case FFOR16:
2128 if (nextarg == 0) {
2129 WARNING("or requires two arguments; returning 0");
2130 u = 0;
2131 break;
2132 }
2133 y = execute(a[1]->nnext);
2134 u = ((int)getfval(x)) | ((int)getfval(y));
2135 tempfree(y)do { if (((y)->csub == 4)) tfree(y); } while ( 0);
2136 nextarg = nextarg->nnext;
2137 break;
2138 case FXOR17:
2139 if (nextarg == 0) {
2140 WARNING("xor requires two arguments; returning 0");
2141 u = 0;
2142 break;
2143 }
2144 y = execute(a[1]->nnext);
2145 u = ((int)getfval(x)) ^ ((int)getfval(y));
2146 tempfree(y)do { if (((y)->csub == 4)) tfree(y); } while ( 0);
2147 nextarg = nextarg->nnext;
2148 break;
2149 case FLSHIFT19:
2150 if (nextarg == 0) {
2151 WARNING("lshift requires two arguments; returning 0");
2152 u = 0;
2153 break;
2154 }
2155 y = execute(a[1]->nnext);
2156 u = ((int)getfval(x)) << ((int)getfval(y));
2157 tempfree(y)do { if (((y)->csub == 4)) tfree(y); } while ( 0);
2158 nextarg = nextarg->nnext;
2159 break;
2160 case FRSHIFT20:
2161 if (nextarg == 0) {
2162 WARNING("rshift requires two arguments; returning 0");
2163 u = 0;
2164 break;
2165 }
2166 y = execute(a[1]->nnext);
2167 u = ((int)getfval(x)) >> ((int)getfval(y));
2168 tempfree(y)do { if (((y)->csub == 4)) tfree(y); } while ( 0);
2169 nextarg = nextarg->nnext;
2170 break;
2171 case FSYSTEM6:
2172 fflush(stdout(&__sF[1])); /* in case something is buffered already */
2173 estatus = status = system(getsval(x));
2174 if (status != -1) {
2175 if (WIFEXITED(status)(((status) & 0177) == 0)) {
2176 estatus = WEXITSTATUS(status)(int)(((unsigned)(status) >> 8) & 0xff);
2177 } else if (WIFSIGNALED(status)(((status) & 0177) != 0177 && ((status) & 0177
) != 0)
) {
2178 estatus = WTERMSIG(status)(((status) & 0177)) + 256;
2179#ifdef WCOREDUMP
2180 if (WCOREDUMP(status)((status) & 0200))
2181 estatus += 256;
2182#endif
2183 } else /* something else?!? */
2184 estatus = 0;
2185 }
2186 /* else estatus was set to -1 */
2187 u = estatus;
2188 break;
2189 case FRAND7:
2190 /* random() returns numbers in [0..2^31-1]
2191 * in order to get a number in [0, 1), divide it by 2^31
2192 */
2193 u = (Awkfloat) random() / (0x7fffffffL + 0x1UL);
2194 break;
2195 case FSRAND8:
2196 if (isrec(x)((x)->tval & 0200)) { /* no argument provided */
2197 u = time(NULL((void *)0));
2198 tmp = u;
2199 srandom((unsigned int) u);
2200 } else {
2201 u = getfval(x);
2202 tmp = u;
2203 srandom_deterministic((unsigned int) u);
2204 }
2205 u = srand_seed;
2206 srand_seed = tmp;
2207 break;
2208 case FTOUPPER12:
2209 case FTOLOWER13:
2210 if (t == FTOUPPER12)
2211 buf = nawk_toupper(getsval(x));
2212 else
2213 buf = nawk_tolower(getsval(x));
2214 tempfree(x)do { if (((x)->csub == 4)) tfree(x); } while ( 0);
2215 x = gettemp();
2216 setsval(x, buf);
2217 free(buf);
2218 return x;
2219 case FFLUSH14:
2220 if (isrec(x)((x)->tval & 0200) || strlen(getsval(x)) == 0) {
2221 flush_all(); /* fflush() or fflush("") -> all */
2222 u = 0;
2223 } else if ((fp = openfile(FFLUSH14, getsval(x), NULL((void *)0))) == NULL((void *)0))
2224 u = EOF(-1);
2225 else
2226 u = fflush(fp);
2227 break;
2228 case FMKTIME23:
2229 memset(&tmbuf, 0, sizeof(tmbuf));
2230 tm = &tmbuf;
2231 t = sscanf(getsval(x), "%d %d %d %d %d %d %d",
2232 &tm->tm_year, &tm->tm_mon, &tm->tm_mday, &tm->tm_hour,
2233 &tm->tm_min, &tm->tm_sec, &tm->tm_isdst);
2234 switch (t) {
2235 case 6:
2236 tm->tm_isdst = -1; /* let mktime figure it out */
2237 /* FALLTHROUGH */
2238 case 7:
2239 tm->tm_year -= 1900;
2240 tm->tm_mon--;
2241 u = mktime(tm);
2242 break;
2243 default:
2244 u = -1;
2245 break;
2246 }
2247 break;
2248 case FSYSTIME21:
2249 u = time((time_t *) 0);
2250 break;
2251 case FSTRFTIME22:
2252 /* strftime([format [,timestamp]]) */
2253 if (nextarg) {
2254 y = execute(nextarg);
2255 nextarg = nextarg->nnext;
2256 tv = (time_t) getfval(y);
2257 tempfree(y)do { if (((y)->csub == 4)) tfree(y); } while ( 0);
2258 } else
2259 tv = time((time_t *) 0);
2260 tm = localtime(&tv);
2261 if (tm == NULL((void *)0))
2262 FATAL("bad time %ld", (long)tv);
2263
2264 if (isrec(x)((x)->tval & 0200)) {
2265 /* format argument not provided, use default */
2266 fmt = tostring("%a %b %d %H:%M:%S %Z %Y");
2267 } else
2268 fmt = tostring(getsval(x));
2269
2270 sz = 32;
2271 buf = NULL((void *)0);
2272 do {
2273 if ((buf = (char *) reallocarray(buf, 2, sz)) == NULL((void *)0))
2274 FATAL("out of memory in strftime");
2275 sz *= 2;
2276 } while (strftime(buf, sz, fmt, tm) == 0 && fmt[0] != '\0');
2277
2278 y = gettemp();
2279 setsval(y, buf);
2280 free(fmt);
2281 free(buf);
2282
2283 return y;
2284 default: /* can't happen */
2285 FATAL("illegal function type %d", t);
2286 break;
2287 }
2288 tempfree(x)do { if (((x)->csub == 4)) tfree(x); } while ( 0);
2289 x = gettemp();
2290 setfval(x, u);
2291 if (nextarg != NULL((void *)0)) {
2292 WARNING("warning: function has too many arguments");
2293 for ( ; nextarg; nextarg = nextarg->nnext) {
2294 y = execute(nextarg);
2295 tempfree(y)do { if (((y)->csub == 4)) tfree(y); } while ( 0);
2296 }
2297 }
2298 return(x);
2299}
2300
2301Cell *printstat(Node **a, int n) /* print a[0] */
2302{
2303 Node *x;
2304 Cell *y;
2305 FILE *fp;
2306
2307 if (a[1] == NULL((void *)0)) /* a[1] is redirection operator, a[2] is file */
2308 fp = stdout(&__sF[1]);
2309 else
2310 fp = redirect(ptoi(a[1]), a[2]);
2311 for (x = a[0]; x != NULL((void *)0); x = x->nnext) {
2312 y = execute(x);
2313 fputs(getpssval(y), fp);
2314 tempfree(y)do { if (((y)->csub == 4)) tfree(y); } while ( 0);
2315 if (x->nnext == NULL((void *)0))
2316 fputs(getsval(orsloc), fp);
2317 else
2318 fputs(getsval(ofsloc), fp);
2319 }
2320 if (a[1] != NULL((void *)0))
2321 fflush(fp);
2322 if (ferror(fp)(!__isthreaded ? (((fp)->_flags & 0x0040) != 0) : (ferror
)(fp))
)
2323 FATAL("write error on %s", filename(fp));
2324 return(True);
2325}
2326
2327Cell *nullproc(Node **a, int n)
2328{
2329 return 0;
2330}
2331
2332
2333FILE *redirect(int a, Node *b) /* set up all i/o redirections */
2334{
2335 FILE *fp;
2336 Cell *x;
2337 char *fname;
2338
2339 x = execute(b);
2340 fname = getsval(x);
2341 fp = openfile(a, fname, NULL((void *)0));
2342 if (fp == NULL((void *)0))
2343 FATAL("can't open file %s", fname);
2344 tempfree(x)do { if (((x)->csub == 4)) tfree(x); } while ( 0);
2345 return fp;
2346}
2347
2348struct files {
2349 FILE *fp;
2350 const char *fname;
2351 int mode; /* '|', 'a', 'w' => LE/LT, GT */
2352} *files;
2353
2354size_t nfiles;
2355
2356static void stdinit(void) /* in case stdin, etc., are not constants */
2357{
2358 nfiles = FOPEN_MAX20;
2359 files = (struct files *) calloc(nfiles, sizeof(*files));
2360 if (files == NULL((void *)0))
2361 FATAL("can't allocate file memory for %zu files", nfiles);
2362 files[0].fp = stdin(&__sF[0]);
2363 files[0].fname = tostring("/dev/stdin");
2364 files[0].mode = LT287;
2365 files[1].fp = stdout(&__sF[1]);
2366 files[1].fname = tostring("/dev/stdout");
2367 files[1].mode = GT285;
2368 files[2].fp = stderr(&__sF[2]);
2369 files[2].fname = tostring("/dev/stderr");
2370 files[2].mode = GT285;
2371}
2372
2373FILE *openfile(int a, const char *us, bool_Bool *pnewflag)
2374{
2375 const char *s = us;
2376 size_t i;
2377 int m;
2378 FILE *fp = NULL((void *)0);
2379
2380 if (*s == '\0')
2381 FATAL("null file name in print or getline");
2382 for (i = 0; i < nfiles; i++)
2383 if (files[i].fname && strcmp(s, files[i].fname) == 0 &&
2384 (a == files[i].mode || (a==APPEND282 && files[i].mode==GT285) ||
2385 a == FFLUSH14)) {
2386 if (pnewflag)
2387 *pnewflag = false0;
2388 return files[i].fp;
2389 }
2390 if (a == FFLUSH14) /* didn't find it, so don't create it! */
2391 return NULL((void *)0);
2392
2393 for (i = 0; i < nfiles; i++)
2394 if (files[i].fp == NULL((void *)0))
2395 break;
2396 if (i >= nfiles) {
2397 struct files *nf;
2398 size_t nnf = nfiles + FOPEN_MAX20;
2399 nf = (struct files *) reallocarray(files, nnf, sizeof(*nf));
2400 if (nf == NULL((void *)0))
2401 FATAL("cannot grow files for %s and %zu files", s, nnf);
2402 memset(&nf[nfiles], 0, FOPEN_MAX20 * sizeof(*nf));
2403 nfiles = nnf;
2404 files = nf;
2405 }
2406 fflush(stdout(&__sF[1])); /* force a semblance of order */
2407 m = a;
2408 if (a == GT285) {
2409 fp = fopen(s, "w");
2410 } else if (a == APPEND282) {
2411 fp = fopen(s, "a");
2412 m = GT285; /* so can mix > and >> */
2413 } else if (a == '|') { /* output pipe */
2414 fp = popen(s, "w");
2415 } else if (a == LE286) { /* input pipe */
2416 fp = popen(s, "r");
2417 } else if (a == LT287) { /* getline <file */
2418 fp = strcmp(s, "-") == 0 ? stdin(&__sF[0]) : fopen(s, "r"); /* "-" is stdin */
2419 } else /* can't happen */
2420 FATAL("illegal redirection %d", a);
2421 if (fp != NULL((void *)0)) {
2422 files[i].fname = tostring(s);
2423 files[i].fp = fp;
2424 files[i].mode = m;
2425 if (pnewflag)
2426 *pnewflag = true1;
2427 if (fp != stdin(&__sF[0]) && fp != stdout(&__sF[1]) && fp != stderr(&__sF[2]))
2428 (void) fcntl(fileno(fp)(!__isthreaded ? ((fp)->_file) : (fileno)(fp)), F_SETFD2, FD_CLOEXEC1);
2429 }
2430 return fp;
2431}
2432
2433const char *filename(FILE *fp)
2434{
2435 size_t i;
2436
2437 for (i = 0; i < nfiles; i++)
2438 if (fp == files[i].fp)
2439 return files[i].fname;
2440 return "???";
2441}
2442
2443Cell *closefile(Node **a, int n)
2444{
2445 Cell *x;
2446 size_t i;
2447 bool_Bool stat;
2448
2449 x = execute(a[0]);
2450 getsval(x);
2451 stat = true1;
2452 for (i = 0; i < nfiles; i++) {
2453 if (!files[i].fname || strcmp(x->sval, files[i].fname) != 0)
2454 continue;
2455 if (files[i].mode == GT285 || files[i].mode == '|')
2456 fflush(files[i].fp);
2457 if (ferror(files[i].fp)(!__isthreaded ? (((files[i].fp)->_flags & 0x0040) != 0
) : (ferror)(files[i].fp))
) {
2458 if ((files[i].mode == GT285 && files[i].fp != stderr(&__sF[2]))
2459 || files[i].mode == '|')
2460 FATAL("write error on %s", files[i].fname);
2461 else
2462 WARNING("i/o error occurred on %s", files[i].fname);
2463 }
2464 if (files[i].fp == stdin(&__sF[0]) || files[i].fp == stdout(&__sF[1]) ||
2465 files[i].fp == stderr(&__sF[2]))
2466 stat = freopen("/dev/null", "r+", files[i].fp) == NULL((void *)0);
2467 else if (files[i].mode == '|' || files[i].mode == LE286)
2468 stat = pclose(files[i].fp) == -1;
2469 else
2470 stat = fclose(files[i].fp) == EOF(-1);
2471 if (stat)
2472 WARNING("i/o error occurred closing %s", files[i].fname);
2473 xfree(files[i].fname){ free((void *)(intptr_t)(files[i].fname)); (files[i].fname) =
((void *)0); }
;
2474 files[i].fname = NULL((void *)0); /* watch out for ref thru this */
2475 files[i].fp = NULL((void *)0);
2476 break;
2477 }
2478 tempfree(x)do { if (((x)->csub == 4)) tfree(x); } while ( 0);
2479 x = gettemp();
2480 setfval(x, (Awkfloat) (stat ? -1 : 0));
2481 return(x);
2482}
2483
2484void closeall(void)
2485{
2486 size_t i;
2487 bool_Bool stat = false0;
2488
2489 for (i = 0; i < nfiles; i++) {
2490 if (! files[i].fp)
2491 continue;
2492 if (files[i].mode == GT285 || files[i].mode == '|')
2493 fflush(files[i].fp);
2494 if (ferror(files[i].fp)(!__isthreaded ? (((files[i].fp)->_flags & 0x0040) != 0
) : (ferror)(files[i].fp))
) {
2495 if ((files[i].mode == GT285 && files[i].fp != stderr(&__sF[2]))
2496 || files[i].mode == '|')
2497 FATAL("write error on %s", files[i].fname);
2498 else
2499 WARNING("i/o error occurred on %s", files[i].fname);
2500 }
2501 if (files[i].fp == stdin(&__sF[0]) || files[i].fp == stdout(&__sF[1]) ||
2502 files[i].fp == stderr(&__sF[2]))
2503 continue;
2504 if (files[i].mode == '|' || files[i].mode == LE286)
2505 stat = pclose(files[i].fp) == -1;
2506 else
2507 stat = fclose(files[i].fp) == EOF(-1);
2508 if (stat)
2509 WARNING("i/o error occurred while closing %s", files[i].fname);
2510 }
2511}
2512
2513static void flush_all(void)
2514{
2515 size_t i;
2516
2517 for (i = 0; i < nfiles; i++)
2518 if (files[i].fp)
2519 fflush(files[i].fp);
2520}
2521
2522void backsub(char **pb_ptr, const char **sptr_ptr);
2523
2524Cell *dosub(Node **a, int subop) /* sub and gsub */
2525{
2526 fa *pfa;
2527 int tempstat;
2528 char *repl;
2529 Cell *x;
2530
2531 char *buf = NULL((void *)0);
2532 char *pb = NULL((void *)0);
2533 int bufsz = recsize;
2534
2535 const char *r, *s;
2536 const char *start;
2537 const char *noempty = NULL((void *)0); /* empty match disallowed here */
2538 size_t m = 0; /* match count */
2539 size_t whichm; /* which match to select, 0 = global */
2540 int mtype; /* match type */
2541
2542 if (a[0] == NULL((void *)0)) { /* 0 => a[1] is already-compiled regexpr */
2543 pfa = (fa *) a[1];
2544 } else {
2545 x = execute(a[1]);
2546 pfa = makedfa(getsval(x), 1);
2547 tempfree(x)do { if (((x)->csub == 4)) tfree(x); } while ( 0);
2548 }
2549
2550 x = execute(a[2]); /* replacement string */
2551 repl = tostring(getsval(x));
2552 tempfree(x)do { if (((x)->csub == 4)) tfree(x); } while ( 0);
2553
2554 switch (subop) {
2555 case SUB301:
2556 whichm = 1;
2557 x = execute(a[3]); /* source string */
2558 break;
2559 case GSUB302:
2560 whichm = 0;
2561 x = execute(a[3]); /* source string */
2562 break;
2563 default:
2564 FATAL("dosub: unrecognized subop: %d", subop);
2565 }
2566
2567 start = getsval(x);
2568 while (pmatch(pfa, start)) {
2569 if (buf == NULL((void *)0)) {
2570 if ((pb = buf = malloc(bufsz)) == NULL((void *)0))
2571 FATAL("out of memory in dosub");
2572 tempstat = pfa->initstat;
2573 pfa->initstat = 2;
2574 }
2575
2576 /* match types */
2577 #define MT_IGNORE 0 /* unselected or invalid */
2578 #define MT_INSERT 1 /* selected, empty */
2579 #define MT_REPLACE 2 /* selected, not empty */
2580
2581 /* an empty match just after replacement is invalid */
2582
2583 if (patbeg == noempty && patlen == 0) {
2584 mtype = MT_IGNORE; /* invalid, not counted */
2585 } else if (whichm == ++m || whichm == 0) {
2586 mtype = patlen ? MT_REPLACE : MT_INSERT;
2587 } else {
2588 mtype = MT_IGNORE; /* unselected, but counted */
2589 }
2590
2591 /* leading text: */
2592 if (patbeg > start) {
2593 adjbuf(&buf, &bufsz, (pb - buf) + (patbeg - start),
2594 recsize, &pb, "dosub");
2595 s = start;
2596 while (s < patbeg)
2597 *pb++ = *s++;
2598 }
2599
2600 if (mtype == MT_IGNORE)
2601 goto matching_text; /* skip replacement text */
2602
2603 r = repl;
2604 while (*r != 0) {
2605 adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "dosub");
2606 if (*r == '\\') {
2607 backsub(&pb, &r);
2608 } else if (*r == '&') {
2609 r++;
2610 adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize,
2611 &pb, "dosub");
2612 for (s = patbeg; s < patbeg+patlen; )
2613 *pb++ = *s++;
2614 } else {
2615 *pb++ = *r++;
2616 }
2617 }
2618
2619matching_text:
2620 if (mtype == MT_REPLACE || *patbeg == '\0')
2621 goto next_search; /* skip matching text */
2622
2623 if (patlen == 0)
2624 patlen = u8_nextlen(patbeg);
2625 adjbuf(&buf, &bufsz, (pb-buf) + patlen, recsize, &pb, "dosub");
2626 s = patbeg;
2627 while (s < patbeg + patlen)
2628 *pb++ = *s++;
2629
2630next_search:
2631 start = patbeg + patlen;
2632 if (m == whichm || *patbeg == '\0')
2633 break;
2634 if (mtype == MT_REPLACE)
2635 noempty = start;
2636
2637 #undef MT_IGNORE
2638 #undef MT_INSERT
2639 #undef MT_REPLACE
2640 }
2641
2642 xfree(repl){ free((void *)(intptr_t)(repl)); (repl) = ((void *)0); };
2643
2644 if (buf != NULL((void *)0)) {
2645 pfa->initstat = tempstat;
2646
2647 /* trailing text */
2648 adjbuf(&buf, &bufsz, 1+strlen(start)+pb-buf, 0, &pb, "dosub");
2649 while ((*pb++ = *start++) != '\0')
2650 ;
2651
2652 setsval(x, buf);
2653 free(buf);
2654 }
2655
2656 tempfree(x)do { if (((x)->csub == 4)) tfree(x); } while ( 0);
2657 x = gettemp();
2658 x->tval = NUM01;
2659 x->fval = m;
2660 return x;
2661}
2662
2663Cell *gensub(Node **a, int nnn) /* global selective substitute */
2664 /* XXX incomplete - doesn't support backreferences \0 ... \9 */
2665{
2666 Cell *x, *y, *res, *h;
2667 char *rptr;
2668 const char *sptr;
2669 char *buf, *pb;
2670 const char *t, *q;
2671 fa *pfa;
2672 int mflag, tempstat, num, whichm;
2673 int bufsz = recsize;
2674
2675 if ((buf = malloc(bufsz)) == NULL((void *)0))
1
Memory is allocated
2
Assuming the condition is false
3
Taking false branch
2676 FATAL("out of memory in gensub");
2677 mflag = 0; /* if mflag == 0, can replace empty string */
2678 num = 0;
2679 x = execute(a[4]); /* source string */
2680 t = getsval(x);
2681 res = copycell(x); /* target string - initially copy of source */
2682 res->csub = CTEMP4; /* result values are temporary */
2683 if (a[0] == 0) /* 0 => a[1] is already-compiled regexpr */
4
Assuming the condition is false
5
Taking false branch
2684 pfa = (fa *) a[1]; /* regular expression */
2685 else {
2686 y = execute(a[1]);
2687 pfa = makedfa(getsval(y), 1);
2688 tempfree(y)do { if (((y)->csub == 4)) tfree(y); } while ( 0);
6
Assuming field 'csub' is not equal to 4
7
Taking false branch
8
Loop condition is false. Exiting loop
2689 }
2690 y = execute(a[2]); /* replacement string */
2691 h = execute(a[3]); /* which matches should be replaced */
2692 sptr = getsval(h);
2693 if (sptr[0] == 'g' || sptr[0] == 'G')
9
Assuming the condition is false
10
Assuming the condition is false
11
Taking false branch
2694 whichm = -1;
2695 else {
2696 /*
2697 * The specified number is index of replacement, starting
2698 * from 1. GNU awk treats index lower than 0 same as
2699 * 1, we do same for compatibility.
2700 */
2701 whichm = (int) getfval(h) - 1;
2702 if (whichm < 0)
12
Assuming 'whichm' is >= 0
13
Taking false branch
2703 whichm = 0;
2704 }
2705 tempfree(h)do { if (((h)->csub == 4)) tfree(h); } while ( 0);
14
Assuming field 'csub' is not equal to 4
15
Taking false branch
16
Loop condition is false. Exiting loop
2706
2707 if (pmatch(pfa, t)) {
17
Assuming the condition is true
18
Taking true branch
2708 char *sl;
2709
2710 tempstat = pfa->initstat;
2711 pfa->initstat = 2;
2712 pb = buf;
2713 rptr = getsval(y);
2714 /*
2715 * XXX if there are any backreferences in subst string,
2716 * complain now.
2717 */
2718 for (sl = rptr; (sl = strchr(sl, '\\')) && sl[1]; sl++) {
19
Assuming 'sl' is null
2719 if (strchr("0123456789", sl[1])) {
2720 FATAL("gensub doesn't support backreferences (subst \"%s\")", rptr);
2721 }
2722 }
2723
2724 do {
2725 if (whichm
19.1
'whichm' is >= 0
>= 0 && whichm != num) {
20
Assuming 'whichm' is not equal to 'num'
21
Taking true branch
2726 num++;
2727 adjbuf(&buf, &bufsz, (pb - buf) + (patbeg - t) + patlen, recsize, &pb, "gensub");
22
Calling 'adjbuf'
2728
2729 /* copy the part of string up to and including
2730 * match to output buffer */
2731 while (t < patbeg + patlen)
2732 *pb++ = *t++;
2733 continue;
2734 }
2735
2736 if (patlen == 0 && *patbeg != 0) { /* matched empty string */
2737 if (mflag == 0) { /* can replace empty */
2738 num++;
2739 sptr = rptr;
2740 while (*sptr != 0) {
2741 adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "gensub");
2742 if (*sptr == '\\') {
2743 backsub(&pb, &sptr);
2744 } else if (*sptr == '&') {
2745 sptr++;
2746 adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "gensub");
2747 for (q = patbeg; q < patbeg+patlen; )
2748 *pb++ = *q++;
2749 } else
2750 *pb++ = *sptr++;
2751 }
2752 }
2753 if (*t == 0) /* at end */
2754 goto done;
2755 adjbuf(&buf, &bufsz, 2+pb-buf, recsize, &pb, "gensub");
2756 *pb++ = *t++;
2757 if (pb > buf + bufsz) /* BUG: not sure of this test */
2758 FATAL("gensub result0 %.30s too big; can't happen", buf);
2759 mflag = 0;
2760 }
2761 else { /* matched nonempty string */
2762 num++;
2763 sptr = t;
2764 adjbuf(&buf, &bufsz, 1+(patbeg-sptr)+pb-buf, recsize, &pb, "gensub");
2765 while (sptr < patbeg)
2766 *pb++ = *sptr++;
2767 sptr = rptr;
2768 while (*sptr != 0) {
2769 adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "gensub");
2770 if (*sptr == '\\') {
2771 backsub(&pb, &sptr);
2772 } else if (*sptr == '&') {
2773 sptr++;
2774 adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "gensub");
2775 for (q = patbeg; q < patbeg+patlen; )
2776 *pb++ = *q++;
2777 } else
2778 *pb++ = *sptr++;
2779 }
2780 t = patbeg + patlen;
2781 if (patlen == 0 || *t == 0 || *(t-1) == 0)
2782 goto done;
2783 if (pb > buf + bufsz)
2784 FATAL("gensub result1 %.30s too big; can't happen", buf);
2785 mflag = 1;
2786 }
2787 } while (pmatch(pfa,t));
2788 sptr = t;
2789 adjbuf(&buf, &bufsz, 1+strlen(sptr)+pb-buf, 0, &pb, "gensub");
2790 while ((*pb++ = *sptr++) != 0)
2791 ;
2792 done: if (pb > buf + bufsz)
2793 FATAL("gensub result2 %.30s too big; can't happen", buf);
2794 *pb = '\0';
2795 setsval(res, buf);
2796 pfa->initstat = tempstat;
2797 }
2798 tempfree(x)do { if (((x)->csub == 4)) tfree(x); } while ( 0);
2799 tempfree(y)do { if (((y)->csub == 4)) tfree(y); } while ( 0);
2800 free(buf);
2801 return(res);
2802}
2803
2804void backsub(char **pb_ptr, const char **sptr_ptr) /* handle \\& variations */
2805{ /* sptr[0] == '\\' */
2806 char *pb = *pb_ptr;
2807 const char *sptr = *sptr_ptr;
2808
2809 if (sptr[1] == '\\') {
2810 if (sptr[2] == '\\' && sptr[3] == '&') { /* \\\& -> \& */
2811 *pb++ = '\\';
2812 *pb++ = '&';
2813 sptr += 4;
2814 } else if (sptr[2] == '&') { /* \\& -> \ + matched */
2815 *pb++ = '\\';
2816 sptr += 2;
2817 } else if (do_posix) { /* \\x -> \x */
2818 sptr++;
2819 *pb++ = *sptr++;
2820 } else { /* \\x -> \\x */
2821 *pb++ = *sptr++;
2822 *pb++ = *sptr++;
2823 }
2824 } else if (sptr[1] == '&') { /* literal & */
2825 sptr++;
2826 *pb++ = *sptr++;
2827 } else /* literal \ */
2828 *pb++ = *sptr++;
2829
2830 *pb_ptr = pb;
2831 *sptr_ptr = sptr;
2832}
2833
2834static char *wide_char_to_byte_str(int rune, size_t *outlen)
2835{
2836 static char buf[5];
2837 int len;
2838
2839 if (rune < 0 || rune > 0x10FFFF)
2840 return NULL((void *)0);
2841
2842 memset(buf, 0, sizeof(buf));
2843
2844 len = 0;
2845 if (rune <= 0x0000007F) {
2846 buf[len++] = rune;
2847 } else if (rune <= 0x000007FF) {
2848 // 110xxxxx 10xxxxxx
2849 buf[len++] = 0xC0 | (rune >> 6);
2850 buf[len++] = 0x80 | (rune & 0x3F);
2851 } else if (rune <= 0x0000FFFF) {
2852 // 1110xxxx 10xxxxxx 10xxxxxx
2853 buf[len++] = 0xE0 | (rune >> 12);
2854 buf[len++] = 0x80 | ((rune >> 6) & 0x3F);
2855 buf[len++] = 0x80 | (rune & 0x3F);
2856
2857 } else {
2858 // 0x00010000 - 0x10FFFF
2859 // 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
2860 buf[len++] = 0xF0 | (rune >> 18);
2861 buf[len++] = 0x80 | ((rune >> 12) & 0x3F);
2862 buf[len++] = 0x80 | ((rune >> 6) & 0x3F);
2863 buf[len++] = 0x80 | (rune & 0x3F);
2864 }
2865
2866 *outlen = len;
2867 buf[len++] = '\0';
2868
2869 return buf;
2870}