Bug Summary

File:src/usr.bin/vi/build/../ex/ex_subst.c
Warning:line 1097, column 9
Dereference of null pointer

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple amd64-unknown-openbsd7.4 -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name ex_subst.c -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -mrelocation-model pic -pic-level 1 -pic-is-pie -mframe-pointer=all -relaxed-aliasing -ffp-contract=on -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -target-feature +retpoline-indirect-calls -target-feature +retpoline-indirect-branches -tune-cpu generic -debugger-tuning=gdb -fcoverage-compilation-dir=/usr/src/usr.bin/vi/build/obj -resource-dir /usr/local/llvm16/lib/clang/16 -I /usr/src/usr.bin/vi/build -I /usr/src/usr.bin/vi/build/../include -I . -internal-isystem /usr/local/llvm16/lib/clang/16/include -internal-externc-isystem /usr/include -O2 -fdebug-compilation-dir=/usr/src/usr.bin/vi/build/obj -ferror-limit 19 -fwrapv -D_RET_PROTECTOR -ret-protector -fcf-protection=branch -fno-jump-tables -fgnuc-version=4.2.1 -vectorize-loops -vectorize-slp -fno-builtin-malloc -fno-builtin-calloc -fno-builtin-realloc -fno-builtin-valloc -fno-builtin-free -fno-builtin-strdup -fno-builtin-strndup -analyzer-output=html -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /home/ben/Projects/scan/2024-01-11-140451-98009-1 -x c /usr/src/usr.bin/vi/build/../ex/ex_subst.c
1/* $OpenBSD: ex_subst.c,v 1.31 2023/06/23 15:06:45 millert Exp $ */
2
3/*-
4 * Copyright (c) 1992, 1993, 1994
5 * The Regents of the University of California. All rights reserved.
6 * Copyright (c) 1992, 1993, 1994, 1995, 1996
7 * Keith Bostic. All rights reserved.
8 *
9 * See the LICENSE file for redistribution information.
10 */
11
12#include "config.h"
13
14#include <sys/queue.h>
15#include <sys/time.h>
16
17#include <bitstring.h>
18#include <ctype.h>
19#include <errno(*__errno()).h>
20#include <limits.h>
21#include <stdio.h>
22#include <stdlib.h>
23#include <string.h>
24#include <unistd.h>
25
26#include "../common/common.h"
27#include "../vi/vi.h"
28
29#define MAXIMUM(a, b)(((a) > (b)) ? (a) : (b)) (((a) > (b)) ? (a) : (b))
30
31#define SUB_FIRST0x01 0x01 /* The 'r' flag isn't reasonable. */
32#define SUB_MUSTSETR0x02 0x02 /* The 'r' flag is required. */
33
34static int re_conv(SCR *, char **, size_t *, int *);
35static int re_sub(SCR *, char *, char **, size_t *, size_t *, regmatch_t [10]);
36static int re_tag_conv(SCR *, char **, size_t *, int *);
37static int s(SCR *, EXCMD *, char *, regex_t *, u_int);
38
39/*
40 * ex_s --
41 * [line [,line]] s[ubstitute] [[/;]pat[/;]/repl[/;] [cgr] [count] [#lp]]
42 *
43 * Substitute on lines matching a pattern.
44 *
45 * PUBLIC: int ex_s(SCR *, EXCMD *);
46 */
47int
48ex_s(SCR *sp, EXCMD *cmdp)
49{
50 regex_t *re;
51 size_t blen, len;
52 u_int flags;
53 int delim;
54 char *bp, *ptrn, *rep, *p, *t;
55
56 /*
57 * Skip leading white space.
58 *
59 * !!!
60 * Historic vi allowed any non-alphanumeric to serve as the
61 * substitution command delimiter.
62 *
63 * !!!
64 * If the arguments are empty, it's the same as &, i.e. we
65 * repeat the last substitution.
66 */
67 if (cmdp->argc == 0)
68 goto subagain;
69 for (p = cmdp->argv[0]->bp,
70 len = cmdp->argv[0]->len; len > 0; --len, ++p) {
71 if (!isblank(*p))
72 break;
73 }
74 if (len == 0)
75subagain: return (ex_subagain(sp, cmdp));
76
77 delim = *p++;
78 if (isalnum(delim) || delim == '\\')
79 return (s(sp, cmdp, p, &sp->subre_c, SUB_MUSTSETR0x02));
80
81 /*
82 * !!!
83 * The full-blown substitute command reset the remembered
84 * state of the 'c' and 'g' suffices.
85 */
86 sp->c_suffix = sp->g_suffix = 0;
87
88 /*
89 * Get the pattern string, toss escaping characters.
90 *
91 * !!!
92 * Historic vi accepted any of the following forms:
93 *
94 * :s/abc/def/ change "abc" to "def"
95 * :s/abc/def change "abc" to "def"
96 * :s/abc/ delete "abc"
97 * :s/abc delete "abc"
98 *
99 * QUOTING NOTE:
100 *
101 * Only toss an escaping character if it escapes a delimiter.
102 * This means that "s/A/\\\\f" replaces "A" with "\\f". It
103 * would be nice to be more regular, i.e. for each layer of
104 * escaping a single escaping character is removed, but that's
105 * not how the historic vi worked.
106 */
107 for (ptrn = t = p;;) {
108 if (p[0] == '\0' || p[0] == delim) {
109 if (p[0] == delim)
110 ++p;
111 /*
112 * !!!
113 * Nul terminate the pattern string -- it's passed
114 * to regcomp which doesn't understand anything else.
115 */
116 *t = '\0';
117 break;
118 }
119 if (p[0] == '\\') {
120 if (p[1] == delim)
121 ++p;
122 else if (p[1] == '\\')
123 *t++ = *p++;
124 }
125 *t++ = *p++;
126 }
127
128 /*
129 * If the pattern string is empty, use the last RE (not just the
130 * last substitution RE).
131 */
132 if (*ptrn == '\0') {
133 if (sp->re == NULL((void *)0)) {
134 ex_emsg(sp, NULL((void *)0), EXM_NOPREVRE);
135 return (1);
136 }
137
138 /* Re-compile the RE if necessary. */
139 if (!F_ISSET(sp, SC_RE_SEARCH)(((sp)->flags) & ((0x00400000))) && re_compile(sp,
140 sp->re, sp->re_len, NULL((void *)0), NULL((void *)0), &sp->re_c, RE_C_SEARCH0x0002))
141 return (1);
142 flags = 0;
143 } else {
144 /*
145 * !!!
146 * Compile the RE. Historic practice is that substitutes set
147 * the search direction as well as both substitute and search
148 * RE's. We compile the RE twice, as we don't want to bother
149 * ref counting the pattern string and (opaque) structure.
150 */
151 if (re_compile(sp, ptrn, t - ptrn,
152 &sp->re, &sp->re_len, &sp->re_c, RE_C_SEARCH0x0002))
153 return (1);
154 if (re_compile(sp, ptrn, t - ptrn,
155 &sp->subre, &sp->subre_len, &sp->subre_c, RE_C_SUBST0x0008))
156 return (1);
157
158 flags = SUB_FIRST0x01;
159 sp->searchdir = FORWARD;
160 }
161 re = &sp->re_c;
162
163 /*
164 * Get the replacement string.
165 *
166 * The special character & (\& if O_MAGIC not set) matches the
167 * entire RE. No handling of & is required here, it's done by
168 * re_sub().
169 *
170 * The special character ~ (\~ if O_MAGIC not set) inserts the
171 * previous replacement string into this replacement string.
172 * Count ~'s to figure out how much space we need. We could
173 * special case nonexistent last patterns or whether or not
174 * O_MAGIC is set, but it's probably not worth the effort.
175 *
176 * QUOTING NOTE:
177 *
178 * Only toss an escaping character if it escapes a delimiter or
179 * if O_MAGIC is set and it escapes a tilde.
180 *
181 * !!!
182 * If the entire replacement pattern is "%", then use the last
183 * replacement pattern. This semantic was added to vi in System
184 * V and then percolated elsewhere, presumably around the time
185 * that it was added to their version of ed(1).
186 */
187 if (p[0] == '\0' || p[0] == delim) {
188 if (p[0] == delim)
189 ++p;
190 free(sp->repl);
191 sp->repl = NULL((void *)0);
192 sp->repl_len = 0;
193 } else if (p[0] == '%' && (p[1] == '\0' || p[1] == delim))
194 p += p[1] == delim ? 2 : 1;
195 else {
196 for (rep = p, len = 0;
197 p[0] != '\0' && p[0] != delim; ++p, ++len)
198 if (p[0] == '~')
199 len += sp->repl_len;
200 GET_SPACE_RET(sp, bp, blen, len){ GS *L__gp = (sp) == ((void *)0) ? ((void *)0) : (sp)->gp
; if (L__gp == ((void *)0) || (((L__gp)->flags) & ((0x0100
)))) { (bp) = ((void *)0); (blen) = 0; { void *L__bincp; if (
((len)) > ((blen))) { if ((L__bincp = binc(((sp)), ((bp)),
&((blen)), ((len)))) == ((void *)0)) return (1); ((bp)) =
L__bincp; } }; } else { { void *L__bincp; if (((len)) > (
L__gp->tmp_blen)) { if ((L__bincp = binc(((sp)), (L__gp->
tmp_bp), &(L__gp->tmp_blen), ((len)))) == ((void *)0))
return (1); (L__gp->tmp_bp) = L__bincp; } }; (bp) = L__gp
->tmp_bp; (blen) = L__gp->tmp_blen; (((L__gp)->flags
) |= ((0x0100))); } }
;
201 for (t = bp, len = 0, p = rep;;) {
202 if (p[0] == '\0' || p[0] == delim) {
203 if (p[0] == delim)
204 ++p;
205 break;
206 }
207 if (p[0] == '\\') {
208 if (p[1] == delim)
209 ++p;
210 else if (p[1] == '\\') {
211 *t++ = *p++;
212 ++len;
213 } else if (p[1] == '~') {
214 ++p;
215 if (!O_ISSET(sp, O_MAGIC)((((&(((sp)))->opts[(((O_MAGIC)))])->flags) & (
(0x01))) ? (((sp)))->gp->opts[(((sp)))->opts[(((O_MAGIC
)))].o_cur.val].o_cur.val : (((sp)))->opts[(((O_MAGIC)))].
o_cur.val)
)
216 goto tilde;
217 }
218 } else if (p[0] == '~' && O_ISSET(sp, O_MAGIC)((((&(((sp)))->opts[(((O_MAGIC)))])->flags) & (
(0x01))) ? (((sp)))->gp->opts[(((sp)))->opts[(((O_MAGIC
)))].o_cur.val].o_cur.val : (((sp)))->opts[(((O_MAGIC)))].
o_cur.val)
) {
219tilde: ++p;
220 memcpy(t, sp->repl, sp->repl_len);
221 t += sp->repl_len;
222 len += sp->repl_len;
223 continue;
224 }
225 *t++ = *p++;
226 ++len;
227 }
228 if ((sp->repl_len = len) != 0) {
229 free(sp->repl);
230 if ((sp->repl = malloc(len)) == NULL((void *)0)) {
231 msgq(sp, M_SYSERR, NULL((void *)0));
232 FREE_SPACE(sp, bp, blen){ GS *L__gp = (sp) == ((void *)0) ? ((void *)0) : (sp)->gp
; if (L__gp != ((void *)0) && (bp) == L__gp->tmp_bp
) (((L__gp)->flags) &= ~((0x0100))); else free(bp); }
;
233 return (1);
234 }
235 memcpy(sp->repl, bp, len);
236 }
237 FREE_SPACE(sp, bp, blen){ GS *L__gp = (sp) == ((void *)0) ? ((void *)0) : (sp)->gp
; if (L__gp != ((void *)0) && (bp) == L__gp->tmp_bp
) (((L__gp)->flags) &= ~((0x0100))); else free(bp); }
;
238 }
239 return (s(sp, cmdp, p, re, flags));
240}
241
242/*
243 * ex_subagain --
244 * [line [,line]] & [cgr] [count] [#lp]]
245 *
246 * Substitute using the last substitute RE and replacement pattern.
247 *
248 * PUBLIC: int ex_subagain(SCR *, EXCMD *);
249 */
250int
251ex_subagain(SCR *sp, EXCMD *cmdp)
252{
253 if (sp->subre == NULL((void *)0)) {
254 ex_emsg(sp, NULL((void *)0), EXM_NOPREVRE);
255 return (1);
256 }
257 if (!F_ISSET(sp, SC_RE_SUBST)(((sp)->flags) & ((0x00800000))) && re_compile(sp,
258 sp->subre, sp->subre_len, NULL((void *)0), NULL((void *)0), &sp->subre_c, RE_C_SUBST0x0008))
259 return (1);
260 return (s(sp,
261 cmdp, cmdp->argc ? cmdp->argv[0]->bp : NULL((void *)0), &sp->subre_c, 0));
262}
263
264/*
265 * ex_subtilde --
266 * [line [,line]] ~ [cgr] [count] [#lp]]
267 *
268 * Substitute using the last RE and last substitute replacement pattern.
269 *
270 * PUBLIC: int ex_subtilde(SCR *, EXCMD *);
271 */
272int
273ex_subtilde(SCR *sp, EXCMD *cmdp)
274{
275 if (sp->re == NULL((void *)0)) {
276 ex_emsg(sp, NULL((void *)0), EXM_NOPREVRE);
277 return (1);
278 }
279 if (!F_ISSET(sp, SC_RE_SEARCH)(((sp)->flags) & ((0x00400000))) && re_compile(sp,
280 sp->re, sp->re_len, NULL((void *)0), NULL((void *)0), &sp->re_c, RE_C_SEARCH0x0002))
281 return (1);
282 return (s(sp,
283 cmdp, cmdp->argc ? cmdp->argv[0]->bp : NULL((void *)0), &sp->re_c, 0));
284}
285
286/*
287 * s --
288 * Do the substitution. This stuff is *really* tricky. There are lots of
289 * special cases, and general nastiness. Don't mess with it unless you're
290 * pretty confident.
291 *
292 * The nasty part of the substitution is what happens when the replacement
293 * string contains newlines. It's a bit tricky -- consider the information
294 * that has to be retained for "s/f\(o\)o/^M\1^M\1/". The solution here is
295 * to build a set of newline offsets which we use to break the line up later,
296 * when the replacement is done. Don't change it unless you're *damned*
297 * confident.
298 */
299#define NEEDNEWLINE(sp){ if ((sp)->newl_len == (sp)->newl_cnt) { (sp)->newl_len
+= 25; { void *tmpp; if (((tmpp) = (reallocarray(((sp)->newl
), ((sp)->newl_len), (sizeof(size_t))))) == ((void *)0)) {
msgq(((sp)), M_SYSERR, ((void *)0)); free((sp)->newl); } (
sp)->newl = tmpp; }; if ((sp)->newl == ((void *)0)) { (
sp)->newl_len = 0; return (1); } } }
{ \
300 if ((sp)->newl_len == (sp)->newl_cnt) { \
301 (sp)->newl_len += 25; \
302 REALLOCARRAY((sp), (sp)->newl, \{ void *tmpp; if (((tmpp) = (reallocarray(((sp)->newl), ((
sp)->newl_len), (sizeof(size_t))))) == ((void *)0)) { msgq
(((sp)), M_SYSERR, ((void *)0)); free((sp)->newl); } (sp)->
newl = tmpp; }
303 (sp)->newl_len, sizeof(size_t)){ void *tmpp; if (((tmpp) = (reallocarray(((sp)->newl), ((
sp)->newl_len), (sizeof(size_t))))) == ((void *)0)) { msgq
(((sp)), M_SYSERR, ((void *)0)); free((sp)->newl); } (sp)->
newl = tmpp; }
; \
304 if ((sp)->newl == NULL((void *)0)) { \
305 (sp)->newl_len = 0; \
306 return (1); \
307 } \
308 } \
309}
310
311#define BUILD(sp, l, len){ if (lbclen + (len) > lblen) { lblen += (((lbclen + (len)
) > (256)) ? (lbclen + (len)) : (256)); { void *tmpp; if (
((tmpp) = (realloc((lb), (lblen)))) == ((void *)0)) { msgq(((
sp)), M_SYSERR, ((void *)0)); free(lb); } lb = tmpp; }; if (lb
== ((void *)0)) { lbclen = 0; return (1); } } memcpy(lb + lbclen
, (l), (len)); lbclen += (len); }
{ \
312 if (lbclen + (len) > lblen) { \
313 lblen += MAXIMUM(lbclen + (len), 256)(((lbclen + (len)) > (256)) ? (lbclen + (len)) : (256)); \
314 REALLOC((sp), lb, lblen){ void *tmpp; if (((tmpp) = (realloc((lb), (lblen)))) == ((void
*)0)) { msgq(((sp)), M_SYSERR, ((void *)0)); free(lb); } lb =
tmpp; }
; \
315 if (lb == NULL((void *)0)) { \
316 lbclen = 0; \
317 return (1); \
318 } \
319 } \
320 memcpy(lb + lbclen, (l), (len)); \
321 lbclen += (len); \
322}
323
324#define NEEDSP(sp, len, pnt){ if (lbclen + (len) > lblen) { lblen += (((lbclen + (len)
) > (256)) ? (lbclen + (len)) : (256)); { void *tmpp; if (
((tmpp) = (realloc((lb), (lblen)))) == ((void *)0)) { msgq(((
sp)), M_SYSERR, ((void *)0)); free(lb); } lb = tmpp; }; if (lb
== ((void *)0)) { lbclen = 0; return (1); } (pnt) = lb + lbclen
; } }
{ \
325 if (lbclen + (len) > lblen) { \
326 lblen += MAXIMUM(lbclen + (len), 256)(((lbclen + (len)) > (256)) ? (lbclen + (len)) : (256)); \
327 REALLOC((sp), lb, lblen){ void *tmpp; if (((tmpp) = (realloc((lb), (lblen)))) == ((void
*)0)) { msgq(((sp)), M_SYSERR, ((void *)0)); free(lb); } lb =
tmpp; }
; \
328 if (lb == NULL((void *)0)) { \
329 lbclen = 0; \
330 return (1); \
331 } \
332 (pnt) = lb + lbclen; \
333 } \
334}
335
336static int
337s(SCR *sp, EXCMD *cmdp, char *s, regex_t *re, u_int flags)
338{
339 EVENT ev;
340 MARK from, to;
341 TEXTH tiq;
342 recno_t elno, lno, slno;
343 regmatch_t match[10];
344 size_t blen, cnt, last, lbclen, lblen, len, llen;
345 size_t offset, saved_offset, scno;
346 int lflag, nflag, pflag, rflag;
347 int didsub, do_eol_match, eflags, nempty, eval;
348 int linechanged, matched, quit, rval;
349 unsigned long ul;
350 char *bp, *lb;
351
352 NEEDFILE(sp, cmdp){ if ((sp)->ep == ((void *)0)) { ex_emsg((sp), (cmdp)->
cmd->name, EXM_NOFILEYET); return (1); } }
;
353
354 slno = sp->lno;
355 scno = sp->cno;
356
357 /*
358 * !!!
359 * Historically, the 'g' and 'c' suffices were always toggled as flags,
360 * so ":s/A/B/" was the same as ":s/A/B/ccgg". If O_EDCOMPATIBLE was
361 * not set, they were initialized to 0 for all substitute commands. If
362 * O_EDCOMPATIBLE was set, they were initialized to 0 only if the user
363 * specified substitute/replacement patterns (see ex_s()).
364 */
365 if (!O_ISSET(sp, O_EDCOMPATIBLE)((((&(((sp)))->opts[(((O_EDCOMPATIBLE)))])->flags) &
((0x01))) ? (((sp)))->gp->opts[(((sp)))->opts[(((O_EDCOMPATIBLE
)))].o_cur.val].o_cur.val : (((sp)))->opts[(((O_EDCOMPATIBLE
)))].o_cur.val)
)
366 sp->c_suffix = sp->g_suffix = 0;
367
368 /*
369 * Historic vi permitted the '#', 'l' and 'p' options in vi mode, but
370 * it only displayed the last change. I'd disallow them, but they are
371 * useful in combination with the [v]global commands. In the current
372 * model the problem is combining them with the 'c' flag -- the screen
373 * would have to flip back and forth between the confirm screen and the
374 * ex print screen, which would be pretty awful. We do display all
375 * changes, though, for what that's worth.
376 *
377 * !!!
378 * Historic vi was fairly strict about the order of "options", the
379 * count, and "flags". I'm somewhat fuzzy on the difference between
380 * options and flags, anyway, so this is a simpler approach, and we
381 * just take it them in whatever order the user gives them. (The ex
382 * usage statement doesn't reflect this.)
383 */
384 lflag = nflag = pflag = rflag = 0;
385 if (s == NULL((void *)0))
386 goto noargs;
387 for (lno = OOBLNO0; *s != '\0'; ++s)
388 switch (*s) {
389 case ' ':
390 case '\t':
391 continue;
392 case '+':
393 ++cmdp->flagoff;
394 break;
395 case '-':
396 --cmdp->flagoff;
397 break;
398 case '0': case '1': case '2': case '3': case '4':
399 case '5': case '6': case '7': case '8': case '9':
400 if (lno != OOBLNO0)
401 goto usage;
402 errno(*__errno()) = 0;
403 if ((ul = strtoul(s, &s, 10)) >= UINT_MAX0xffffffffU)
404 errno(*__errno()) = ERANGE34;
405 if (*s == '\0') /* Loop increment correction. */
406 --s;
407 if (errno(*__errno()) == ERANGE34) {
408 if (ul >= UINT_MAX0xffffffffU)
409 msgq(sp, M_ERR, "Count overflow");
410 else
411 msgq(sp, M_SYSERR, NULL((void *)0));
412 return (1);
413 }
414 lno = (recno_t)ul;
415 /*
416 * In historic vi, the count was inclusive from the
417 * second address.
418 */
419 cmdp->addr1.lno = cmdp->addr2.lno;
420 cmdp->addr2.lno += lno - 1;
421 if (!db_exist(sp, cmdp->addr2.lno) &&
422 db_last(sp, &cmdp->addr2.lno))
423 return (1);
424 break;
425 case '#':
426 nflag = 1;
427 break;
428 case 'c':
429 sp->c_suffix = !sp->c_suffix;
430
431 /* Ex text structure initialization. */
432 if (F_ISSET(sp, SC_EX)(((sp)->flags) & ((0x00000001)))) {
433 memset(&tiq, 0, sizeof(TEXTH));
434 TAILQ_INIT(&tiq)do { (&tiq)->tqh_first = ((void *)0); (&tiq)->tqh_last
= &(&tiq)->tqh_first; } while (0)
;
435 }
436 break;
437 case 'g':
438 sp->g_suffix = !sp->g_suffix;
439 break;
440 case 'l':
441 lflag = 1;
442 break;
443 case 'p':
444 pflag = 1;
445 break;
446 case 'r':
447 if (LF_ISSET(SUB_FIRST)((flags) & ((0x01)))) {
448 msgq(sp, M_ERR,
449 "Regular expression specified; r flag meaningless");
450 return (1);
451 }
452 if (!F_ISSET(sp, SC_RE_SEARCH)(((sp)->flags) & ((0x00400000)))) {
453 ex_emsg(sp, NULL((void *)0), EXM_NOPREVRE);
454 return (1);
455 }
456 rflag = 1;
457 re = &sp->re_c;
458 break;
459 default:
460 goto usage;
461 }
462
463 if (*s != '\0' || (!rflag && LF_ISSET(SUB_MUSTSETR)((flags) & ((0x02))))) {
464usage: ex_emsg(sp, cmdp->cmd->usage, EXM_USAGE);
465 return (1);
466 }
467
468noargs: if (F_ISSET(sp, SC_VI)(((sp)->flags) & ((0x00000002))) && sp->c_suffix && (lflag || nflag || pflag)) {
469 msgq(sp, M_ERR,
470"The #, l and p flags may not be combined with the c flag in vi mode");
471 return (1);
472 }
473
474 /*
475 * bp: if interactive, line cache
476 * blen: if interactive, line cache length
477 * lb: build buffer pointer.
478 * lbclen: current length of built buffer.
479 * lblen; length of build buffer.
480 */
481 bp = lb = NULL((void *)0);
482 blen = lbclen = lblen = 0;
483
484 /* For each line... */
485 for (matched = quit = 0, lno = cmdp->addr1.lno,
486 elno = cmdp->addr2.lno; !quit && lno <= elno; ++lno) {
487
488 /* Someone's unhappy, time to stop. */
489 if (INTERRUPTED(sp)(((((sp)->gp)->flags) & ((0x0004))) || (!v_event_get
((sp), ((void *)0), 0, 0x001) && ((((sp)->gp)->
flags) & ((0x0004)))))
)
490 break;
491
492 /* Get the line. */
493 if (db_get(sp, lno, DBG_FATAL0x001, &s, &llen))
494 goto err;
495
496 /*
497 * Make a local copy if doing confirmation -- when calling
498 * the confirm routine we're likely to lose the cached copy.
499 */
500 if (sp->c_suffix) {
501 if (bp == NULL((void *)0)) {
502 GET_SPACE_RET(sp, bp, blen, llen){ GS *L__gp = (sp) == ((void *)0) ? ((void *)0) : (sp)->gp
; if (L__gp == ((void *)0) || (((L__gp)->flags) & ((0x0100
)))) { (bp) = ((void *)0); (blen) = 0; { void *L__bincp; if (
((llen)) > ((blen))) { if ((L__bincp = binc(((sp)), ((bp))
, &((blen)), ((llen)))) == ((void *)0)) return (1); ((bp)
) = L__bincp; } }; } else { { void *L__bincp; if (((llen)) >
(L__gp->tmp_blen)) { if ((L__bincp = binc(((sp)), (L__gp->
tmp_bp), &(L__gp->tmp_blen), ((llen)))) == ((void *)0)
) return (1); (L__gp->tmp_bp) = L__bincp; } }; (bp) = L__gp
->tmp_bp; (blen) = L__gp->tmp_blen; (((L__gp)->flags
) |= ((0x0100))); } }
;
503 } else
504 ADD_SPACE_RET(sp, bp, blen, llen){ GS *L__gp = (sp) == ((void *)0) ? ((void *)0) : (sp)->gp
; if (L__gp != ((void *)0) && (bp) == L__gp->tmp_bp
) { (((L__gp)->flags) &= ~((0x0100))); { void *L__bincp
; if (((llen)) > (L__gp->tmp_blen)) { if ((L__bincp = binc
(((sp)), (L__gp->tmp_bp), &(L__gp->tmp_blen), ((llen
)))) == ((void *)0)) return (1); (L__gp->tmp_bp) = L__bincp
; } }; (bp) = L__gp->tmp_bp; (blen) = L__gp->tmp_blen; (
((L__gp)->flags) |= ((0x0100))); } else { void *L__bincp; if
(((llen)) > ((blen))) { if ((L__bincp = binc(((sp)), ((bp
)), &((blen)), ((llen)))) == ((void *)0)) return (1); ((bp
)) = L__bincp; } }; }
;
505 memcpy(bp, s, llen);
506 s = bp;
507 }
508
509 /* Start searching from the beginning. */
510 offset = 0;
511 len = llen;
512
513 /* Reset the build buffer offset. */
514 lbclen = 0;
515
516 /* Reset empty match test variable. */
517 nempty = -1;
518
519 /*
520 * We don't want to have to do a setline if the line didn't
521 * change -- keep track of whether or not this line changed.
522 * If doing confirmations, don't want to keep setting the
523 * line if change is refused -- keep track of substitutions.
524 */
525 didsub = linechanged = 0;
526
527 /* New line, do an EOL match. */
528 do_eol_match = 1;
529
530 /* It's not nul terminated, but we pretend it is. */
531 eflags = REG_STARTEND00004;
532
533 /* The search area is from s + offset to the EOL. */
534nextmatch: match[0].rm_so = offset;
535 match[0].rm_eo = llen;
536
537 /* Get the next match. */
538 eval = regexec(re, (char *)s, 10, match, eflags);
539
540 /*
541 * There wasn't a match or if there was an error, deal with
542 * it. If there was a previous match in this line, resolve
543 * the changes into the database. Otherwise, just move on.
544 */
545 if (eval == REG_NOMATCH1)
546 goto endmatch;
547 if (eval != 0) {
548 re_error(sp, eval, re);
549 goto err;
550 }
551 matched = 1;
552
553 /* Only the first search can match an anchored expression. */
554 eflags |= REG_NOTBOL00001;
555
556 /*
557 * !!!
558 * It's possible to match 0-length strings -- for example, the
559 * command s;a*;X;, when matched against the string "aabb" will
560 * result in "XbXbX", i.e. the matches are "aa", the space
561 * between the b's and the space between the b's and the end of
562 * the string. There is a similar space between the beginning
563 * of the string and the a's. The rule that we use (because vi
564 * historically used it) is that any 0-length match, occurring
565 * immediately after a match, is ignored. Otherwise, the above
566 * example would have resulted in "XXbXbX". Another example is
567 * incorrectly using " *" to replace groups of spaces with one
568 * space.
569 *
570 * If the match is empty and at the same place as the end of the
571 * previous match, ignore the match and move forward. If
572 * there's no more characters in the string, we were
573 * attempting to match after the last character, so quit.
574 */
575 if (match[0].rm_so == nempty && match[0].rm_eo == nempty) {
576 nempty = -1;
577 if (len == 0)
578 goto endmatch;
579 BUILD(sp, s + offset, 1){ if (lbclen + (1) > lblen) { lblen += (((lbclen + (1)) >
(256)) ? (lbclen + (1)) : (256)); { void *tmpp; if (((tmpp) =
(realloc((lb), (lblen)))) == ((void *)0)) { msgq(((sp)), M_SYSERR
, ((void *)0)); free(lb); } lb = tmpp; }; if (lb == ((void *)
0)) { lbclen = 0; return (1); } } memcpy(lb + lbclen, (s + offset
), (1)); lbclen += (1); }
580 ++offset;
581 --len;
582 goto nextmatch;
583 }
584
585 /* Confirm change. */
586 if (sp->c_suffix) {
587 /*
588 * Set the cursor position for confirmation. Note,
589 * if we matched on a '$', the cursor may be past
590 * the end of line.
591 */
592 from.lno = to.lno = lno;
593 from.cno = match[0].rm_so;
594 to.cno = match[0].rm_eo;
595 /*
596 * Both ex and vi have to correct for a change before
597 * the first character in the line.
598 */
599 if (llen == 0)
600 from.cno = to.cno = 0;
601 if (F_ISSET(sp, SC_VI)(((sp)->flags) & ((0x00000002)))) {
602 /*
603 * Only vi has to correct for a change after
604 * the last character in the line.
605 *
606 * XXX
607 * It would be nice to change the vi code so
608 * that we could display a cursor past EOL.
609 */
610 if (to.cno >= llen)
611 to.cno = llen - 1;
612 if (from.cno >= llen)
613 from.cno = llen - 1;
614
615 sp->lno = from.lno;
616 sp->cno = from.cno;
617 if (vs_refresh(sp, 1))
618 goto err;
619
620 vs_update(sp, "Confirm change? [n]", NULL((void *)0));
621
622 if (v_event_get(sp, &ev, 0, 0))
623 goto err;
624 switch (ev.e_event) {
625 case E_CHARACTER:
626 break;
627 case E_EOF:
628 case E_ERR:
629 case E_INTERRUPT:
630 goto lquit;
631 default:
632 v_event_err(sp, &ev);
633 goto lquit;
634 }
635 } else {
636 const int flags =
637 O_ISSET(sp, O_NUMBER)((((&(((sp)))->opts[(((O_NUMBER)))])->flags) & (
(0x01))) ? (((sp)))->gp->opts[(((sp)))->opts[(((O_NUMBER
)))].o_cur.val].o_cur.val : (((sp)))->opts[(((O_NUMBER)))]
.o_cur.val)
? E_C_HASH0x00200 : 0;
638 if (ex_print(sp, cmdp, &from, &to, flags) ||
639 ex_scprint(sp, &from, &to))
640 goto lquit;
641 if (ex_txt(sp, &tiq, 0, TXT_CR0x00000800))
642 goto err;
643 ev.e_c_u_event._e_ch.c = TAILQ_FIRST(&tiq)((&tiq)->tqh_first)->lb[0];
644 }
645
646 switch (ev.e_c_u_event._e_ch.c) {
647 case CH_YES'y':
648 break;
649 default:
650 case CH_NO'n':
651 didsub = 0;
652 BUILD(sp, s + offset, match[0].rm_eo - offset){ if (lbclen + (match[0].rm_eo - offset) > lblen) { lblen +=
(((lbclen + (match[0].rm_eo - offset)) > (256)) ? (lbclen
+ (match[0].rm_eo - offset)) : (256)); { void *tmpp; if (((tmpp
) = (realloc((lb), (lblen)))) == ((void *)0)) { msgq(((sp)), M_SYSERR
, ((void *)0)); free(lb); } lb = tmpp; }; if (lb == ((void *)
0)) { lbclen = 0; return (1); } } memcpy(lb + lbclen, (s + offset
), (match[0].rm_eo - offset)); lbclen += (match[0].rm_eo - offset
); }
;
653 goto skip;
654 case CH_QUIT'q':
655 /* Set the quit/interrupted flags. */
656lquit: quit = 1;
657 F_SET(sp->gp, G_INTERRUPTED)(((sp->gp)->flags) |= ((0x0004)));
658
659 /*
660 * Resolve any changes, then return to (and
661 * exit from) the main loop.
662 */
663 goto endmatch;
664 }
665 }
666
667 /*
668 * Set the cursor to the last position changed, converting
669 * from 1-based to 0-based.
670 */
671 sp->lno = lno;
672 sp->cno = match[0].rm_so;
673
674 /* Copy the bytes before the match into the build buffer. */
675 BUILD(sp, s + offset, match[0].rm_so - offset){ if (lbclen + (match[0].rm_so - offset) > lblen) { lblen +=
(((lbclen + (match[0].rm_so - offset)) > (256)) ? (lbclen
+ (match[0].rm_so - offset)) : (256)); { void *tmpp; if (((tmpp
) = (realloc((lb), (lblen)))) == ((void *)0)) { msgq(((sp)), M_SYSERR
, ((void *)0)); free(lb); } lb = tmpp; }; if (lb == ((void *)
0)) { lbclen = 0; return (1); } } memcpy(lb + lbclen, (s + offset
), (match[0].rm_so - offset)); lbclen += (match[0].rm_so - offset
); }
;
676
677 /* Substitute the matching bytes. */
678 didsub = 1;
679 if (re_sub(sp, s, &lb, &lbclen, &lblen, match))
680 goto err;
681
682 /* Set the change flag so we know this line was modified. */
683 linechanged = 1;
684
685 /* Move past the matched bytes. */
686skip: offset = match[0].rm_eo;
687 len = llen - match[0].rm_eo;
688
689 /* A match cannot be followed by an empty pattern. */
690 nempty = match[0].rm_eo;
691
692 /*
693 * If doing a global change with confirmation, we have to
694 * update the screen. The basic idea is to store the line
695 * so the screen update routines can find it, and restart.
696 */
697 if (didsub && sp->c_suffix && sp->g_suffix) {
698 /*
699 * The new search offset will be the end of the
700 * modified line.
701 */
702 saved_offset = lbclen;
703
704 /* Copy the rest of the line. */
705 if (len)
706 BUILD(sp, s + offset, len){ if (lbclen + (len) > lblen) { lblen += (((lbclen + (len)
) > (256)) ? (lbclen + (len)) : (256)); { void *tmpp; if (
((tmpp) = (realloc((lb), (lblen)))) == ((void *)0)) { msgq(((
sp)), M_SYSERR, ((void *)0)); free(lb); } lb = tmpp; }; if (lb
== ((void *)0)) { lbclen = 0; return (1); } } memcpy(lb + lbclen
, (s + offset), (len)); lbclen += (len); }
707
708 /* Set the new offset. */
709 offset = saved_offset;
710
711 /* Store inserted lines, adjusting the build buffer. */
712 last = 0;
713 if (sp->newl_cnt) {
714 for (cnt = 0;
715 cnt < sp->newl_cnt; ++cnt, ++lno, ++elno) {
716 if (db_insert(sp, lno,
717 lb + last, sp->newl[cnt] - last))
718 goto err;
719 last = sp->newl[cnt] + 1;
720 ++sp->rptlines[L_ADDED0];
721 }
722 lbclen -= last;
723 offset -= last;
724 sp->newl_cnt = 0;
725 }
726
727 /* Store and retrieve the line. */
728 if (db_set(sp, lno, lb + last, lbclen))
729 goto err;
730 if (db_get(sp, lno, DBG_FATAL0x001, &s, &llen))
731 goto err;
732 ADD_SPACE_RET(sp, bp, blen, llen){ GS *L__gp = (sp) == ((void *)0) ? ((void *)0) : (sp)->gp
; if (L__gp != ((void *)0) && (bp) == L__gp->tmp_bp
) { (((L__gp)->flags) &= ~((0x0100))); { void *L__bincp
; if (((llen)) > (L__gp->tmp_blen)) { if ((L__bincp = binc
(((sp)), (L__gp->tmp_bp), &(L__gp->tmp_blen), ((llen
)))) == ((void *)0)) return (1); (L__gp->tmp_bp) = L__bincp
; } }; (bp) = L__gp->tmp_bp; (blen) = L__gp->tmp_blen; (
((L__gp)->flags) |= ((0x0100))); } else { void *L__bincp; if
(((llen)) > ((blen))) { if ((L__bincp = binc(((sp)), ((bp
)), &((blen)), ((llen)))) == ((void *)0)) return (1); ((bp
)) = L__bincp; } }; }
733 memcpy(bp, s, llen);
734 s = bp;
735 len = llen - offset;
736
737 /* Restart the build. */
738 lbclen = 0;
739 BUILD(sp, s, offset){ if (lbclen + (offset) > lblen) { lblen += (((lbclen + (offset
)) > (256)) ? (lbclen + (offset)) : (256)); { void *tmpp; if
(((tmpp) = (realloc((lb), (lblen)))) == ((void *)0)) { msgq(
((sp)), M_SYSERR, ((void *)0)); free(lb); } lb = tmpp; }; if (
lb == ((void *)0)) { lbclen = 0; return (1); } } memcpy(lb + lbclen
, (s), (offset)); lbclen += (offset); }
;
740
741 /*
742 * If we haven't already done the after-the-string
743 * match, do one. Set REG_NOTEOL so the '$' pattern
744 * only matches once.
745 */
746 if (!do_eol_match)
747 goto endmatch;
748 if (offset == len) {
749 do_eol_match = 0;
750 eflags |= REG_NOTEOL00002;
751 }
752 goto nextmatch;
753 }
754
755 /*
756 * If it's a global:
757 *
758 * If at the end of the string, do a test for the after
759 * the string match. Set REG_NOTEOL so the '$' pattern
760 * only matches once.
761 */
762 if (sp->g_suffix && do_eol_match) {
763 if (len == 0) {
764 do_eol_match = 0;
765 eflags |= REG_NOTEOL00002;
766 }
767 goto nextmatch;
768 }
769
770endmatch: if (!linechanged)
771 continue;
772
773 /* Copy any remaining bytes into the build buffer. */
774 if (len)
775 BUILD(sp, s + offset, len){ if (lbclen + (len) > lblen) { lblen += (((lbclen + (len)
) > (256)) ? (lbclen + (len)) : (256)); { void *tmpp; if (
((tmpp) = (realloc((lb), (lblen)))) == ((void *)0)) { msgq(((
sp)), M_SYSERR, ((void *)0)); free(lb); } lb = tmpp; }; if (lb
== ((void *)0)) { lbclen = 0; return (1); } } memcpy(lb + lbclen
, (s + offset), (len)); lbclen += (len); }
776
777 /* Store inserted lines, adjusting the build buffer. */
778 last = 0;
779 if (sp->newl_cnt) {
780 for (cnt = 0;
781 cnt < sp->newl_cnt; ++cnt, ++lno, ++elno) {
782 if (db_insert(sp,
783 lno, lb + last, sp->newl[cnt] - last))
784 goto err;
785 last = sp->newl[cnt] + 1;
786 ++sp->rptlines[L_ADDED0];
787 }
788 lbclen -= last;
789 sp->newl_cnt = 0;
790 }
791
792 /* Store the changed line. */
793 if (db_set(sp, lno, lb + last, lbclen))
794 goto err;
795
796 /* Update changed line counter. */
797 if (sp->rptlchange != lno) {
798 sp->rptlchange = lno;
799 ++sp->rptlines[L_CHANGED1];
800 }
801
802 /*
803 * !!!
804 * Display as necessary. Historic practice is to only
805 * display the last line of a line split into multiple
806 * lines.
807 */
808 if (lflag || nflag || pflag) {
809 from.lno = to.lno = lno;
810 from.cno = to.cno = 0;
811 if (lflag)
812 (void)ex_print(sp, cmdp, &from, &to, E_C_LIST0x00400);
813 if (nflag)
814 (void)ex_print(sp, cmdp, &from, &to, E_C_HASH0x00200);
815 if (pflag)
816 (void)ex_print(sp, cmdp, &from, &to, E_C_PRINT0x01000);
817 }
818 }
819
820 /*
821 * !!!
822 * Historically, vi attempted to leave the cursor at the same place if
823 * the substitution was done at the current cursor position. Otherwise
824 * it moved it to the first non-blank of the last line changed. There
825 * were some problems: for example, :s/$/foo/ with the cursor on the
826 * last character of the line left the cursor on the last character, or
827 * the & command with multiple occurrences of the matching string in the
828 * line usually left the cursor in a fairly random position.
829 *
830 * We try to do the same thing, with the exception that if the user is
831 * doing substitution with confirmation, we move to the last line about
832 * which the user was consulted, as opposed to the last line that they
833 * actually changed. This prevents a screen flash if the user doesn't
834 * change many of the possible lines.
835 */
836 if (!sp->c_suffix && (sp->lno != slno || sp->cno != scno)) {
837 sp->cno = 0;
838 (void)nonblank(sp, sp->lno, &sp->cno);
839 }
840
841 /*
842 * If not in a global command, and nothing matched, say so.
843 * Else, if none of the lines displayed, put something up.
844 */
845 rval = 0;
846 if (!matched) {
847 if (!F_ISSET(sp, SC_EX_GLOBAL)(((sp)->flags) & ((0x00020000)))) {
848 msgq(sp, M_ERR, "No match found");
849 goto err;
850 }
851 } else if (!lflag && !nflag && !pflag)
852 F_SET(cmdp, E_AUTOPRINT)(((cmdp)->flags) |= ((0x00000040)));
853
854 if (0) {
855err: rval = 1;
856 }
857
858 if (bp != NULL((void *)0))
859 FREE_SPACE(sp, bp, blen){ GS *L__gp = (sp) == ((void *)0) ? ((void *)0) : (sp)->gp
; if (L__gp != ((void *)0) && (bp) == L__gp->tmp_bp
) (((L__gp)->flags) &= ~((0x0100))); else free(bp); }
;
860 free(lb);
861 return (rval);
862}
863
864/*
865 * re_compile --
866 * Compile the RE.
867 *
868 * PUBLIC: int re_compile(SCR *,
869 * PUBLIC: char *, size_t, char **, size_t *, regex_t *, u_int);
870 */
871int
872re_compile(SCR *sp, char *ptrn, size_t plen, char **ptrnp, size_t *lenp,
873 regex_t *rep, u_int flags)
874{
875 size_t len;
876 int reflags, replaced, rval;
877 char *p;
878
879 /* Set RE flags. */
880 reflags = 0;
881 if (!LF_ISSET(RE_C_TAG)((flags) & ((0x0010)))) {
882 if (O_ISSET(sp, O_EXTENDED)((((&(((sp)))->opts[(((O_EXTENDED)))])->flags) &
((0x01))) ? (((sp)))->gp->opts[(((sp)))->opts[(((O_EXTENDED
)))].o_cur.val].o_cur.val : (((sp)))->opts[(((O_EXTENDED))
)].o_cur.val)
)
883 reflags |= REG_EXTENDED0001;
884 if (O_ISSET(sp, O_IGNORECASE)((((&(((sp)))->opts[(((O_IGNORECASE)))])->flags) &
((0x01))) ? (((sp)))->gp->opts[(((sp)))->opts[(((O_IGNORECASE
)))].o_cur.val].o_cur.val : (((sp)))->opts[(((O_IGNORECASE
)))].o_cur.val)
)
885 reflags |= REG_ICASE0002;
886 if (O_ISSET(sp, O_ICLOWER)((((&(((sp)))->opts[(((O_ICLOWER)))])->flags) &
((0x01))) ? (((sp)))->gp->opts[(((sp)))->opts[(((O_ICLOWER
)))].o_cur.val].o_cur.val : (((sp)))->opts[(((O_ICLOWER)))
].o_cur.val)
) {
887 for (p = ptrn, len = plen; len > 0; ++p, --len)
888 if (isupper(*p))
889 break;
890 if (len == 0)
891 reflags |= REG_ICASE0002;
892 }
893 }
894
895 /* If we're replacing a saved value, clear the old one. */
896 if (LF_ISSET(RE_C_SEARCH)((flags) & ((0x0002))) && F_ISSET(sp, SC_RE_SEARCH)(((sp)->flags) & ((0x00400000)))) {
897 regfree(&sp->re_c);
898 F_CLR(sp, SC_RE_SEARCH)(((sp)->flags) &= ~((0x00400000)));
899 }
900 if (LF_ISSET(RE_C_SUBST)((flags) & ((0x0008))) && F_ISSET(sp, SC_RE_SUBST)(((sp)->flags) & ((0x00800000)))) {
901 regfree(&sp->subre_c);
902 F_CLR(sp, SC_RE_SUBST)(((sp)->flags) &= ~((0x00800000)));
903 }
904
905 /*
906 * If we're saving the string, it's a pattern we haven't seen before,
907 * so convert the vi-style RE's to POSIX 1003.2 RE's. Save a copy for
908 * later recompilation. Free any previously saved value.
909 */
910 if (ptrnp != NULL((void *)0)) {
911 if (LF_ISSET(RE_C_TAG)((flags) & ((0x0010)))) {
912 if (re_tag_conv(sp, &ptrn, &plen, &replaced))
913 return (1);
914 } else
915 if (re_conv(sp, &ptrn, &plen, &replaced))
916 return (1);
917
918 /* Discard previous pattern. */
919 free(*ptrnp);
920 *ptrnp = NULL((void *)0);
921 if (lenp != NULL((void *)0))
922 *lenp = plen;
923
924 /*
925 * Copy the string into allocated memory.
926 *
927 * XXX
928 * Regcomp isn't 8-bit clean, so the pattern is nul-terminated
929 * for now. There's just no other solution.
930 */
931 MALLOC(sp, *ptrnp, plen + 1){ if (((*ptrnp) = malloc(plen + 1)) == ((void *)0)) msgq((sp)
, M_SYSERR, ((void *)0)); }
;
932 if (*ptrnp != NULL((void *)0)) {
933 memcpy(*ptrnp, ptrn, plen);
934 (*ptrnp)[plen] = '\0';
935 }
936
937 /* Free up conversion-routine-allocated memory. */
938 if (replaced)
939 FREE_SPACE(sp, ptrn, 0){ GS *L__gp = (sp) == ((void *)0) ? ((void *)0) : (sp)->gp
; if (L__gp != ((void *)0) && (ptrn) == L__gp->tmp_bp
) (((L__gp)->flags) &= ~((0x0100))); else free(ptrn); }
;
940
941 if (*ptrnp == NULL((void *)0))
942 return (1);
943
944 ptrn = *ptrnp;
945 }
946
947 /*
948 * XXX
949 * Regcomp isn't 8-bit clean, so we just lost if the pattern
950 * contained a nul. Bummer!
951 */
952 if ((rval = regcomp(rep, ptrn, /* plen, */ reflags)) != 0) {
953 if (!LF_ISSET(RE_C_SILENT)((flags) & ((0x0004))))
954 re_error(sp, rval, rep);
955 return (1);
956 }
957
958 if (LF_ISSET(RE_C_SEARCH)((flags) & ((0x0002))))
959 F_SET(sp, SC_RE_SEARCH)(((sp)->flags) |= ((0x00400000)));
960 if (LF_ISSET(RE_C_SUBST)((flags) & ((0x0008))))
961 F_SET(sp, SC_RE_SUBST)(((sp)->flags) |= ((0x00800000)));
962
963 return (0);
964}
965
966/*
967 * re_conv --
968 * Convert vi's regular expressions into something that the
969 * the POSIX 1003.2 RE functions can handle.
970 *
971 * There are two conversions we make to make vi's RE's (specifically
972 * the global, search, and substitute patterns) work with POSIX RE's.
973 * We assume that \<ptrn\> does "word" searches, which is non-standard
974 * but supported by most regexp libraries..
975 *
976 * 1: If O_MAGIC is not set, strip backslashes from the magic character
977 * set (.[*~) that have them, and add them to the ones that don't.
978 * 2: If O_MAGIC is not set, the string "\~" is replaced with the text
979 * from the last substitute command's replacement string. If O_MAGIC
980 * is set, it's the string "~".
981 *
982 * !!!/XXX
983 * This doesn't exactly match the historic behavior of vi because we do
984 * the ~ substitution before calling the RE engine, so magic characters
985 * in the replacement string will be expanded by the RE engine, and they
986 * weren't historically. It's a bug.
987 */
988static int
989re_conv(SCR *sp, char **ptrnp, size_t *plenp, int *replacedp)
990{
991 size_t blen, len, needlen;
992 int magic;
993 char *bp, *p, *t;
994
995 /*
996 * First pass through, we figure out how much space we'll need.
997 * We do it in two passes, on the grounds that most of the time
998 * the user is doing a search and won't have magic characters.
999 * That way we can skip most of the memory allocation and copies.
1000 */
1001 magic = 0;
1002 for (p = *ptrnp, len = *plenp, needlen = 0; len > 0; ++p, --len)
1
Assuming 'len' is > 0
2
Loop condition is true. Entering loop body
9
Assuming 'len' is > 0
10
Loop condition is true. Entering loop body
15
Assuming 'len' is <= 0
16
Loop condition is false. Execution continues on line 1047
1003 switch (*p) {
3
Control jumps to 'case 46:' at line 1034
11
Control jumps to 'case 126:' at line 1028
1004 case '\\':
1005 if (len > 1) {
1006 --len;
1007 switch (*++p) {
1008 case '~':
1009 if (!O_ISSET(sp, O_MAGIC)((((&(((sp)))->opts[(((O_MAGIC)))])->flags) & (
(0x01))) ? (((sp)))->gp->opts[(((sp)))->opts[(((O_MAGIC
)))].o_cur.val].o_cur.val : (((sp)))->opts[(((O_MAGIC)))].
o_cur.val)
) {
1010 magic = 1;
1011 needlen += sp->repl_len;
1012 }
1013 break;
1014 case '.':
1015 case '[':
1016 case '*':
1017 if (!O_ISSET(sp, O_MAGIC)((((&(((sp)))->opts[(((O_MAGIC)))])->flags) & (
(0x01))) ? (((sp)))->gp->opts[(((sp)))->opts[(((O_MAGIC
)))].o_cur.val].o_cur.val : (((sp)))->opts[(((O_MAGIC)))].
o_cur.val)
) {
1018 magic = 1;
1019 needlen += 1;
1020 }
1021 break;
1022 default:
1023 needlen += 2;
1024 }
1025 } else
1026 needlen += 1;
1027 break;
1028 case '~':
1029 if (O_ISSET(sp, O_MAGIC)((((&(((sp)))->opts[(((O_MAGIC)))])->flags) & (
(0x01))) ? (((sp)))->gp->opts[(((sp)))->opts[(((O_MAGIC
)))].o_cur.val].o_cur.val : (((sp)))->opts[(((O_MAGIC)))].
o_cur.val)
) {
12
'?' condition is false
13
Taking true branch
1030 magic = 1;
1031 needlen += sp->repl_len;
1032 }
1033 break;
14
Execution continues on line 1002
1034 case '.':
1035 case '[':
1036 case '*':
1037 if (!O_ISSET(sp, O_MAGIC)((((&(((sp)))->opts[(((O_MAGIC)))])->flags) & (
(0x01))) ? (((sp)))->gp->opts[(((sp)))->opts[(((O_MAGIC
)))].o_cur.val].o_cur.val : (((sp)))->opts[(((O_MAGIC)))].
o_cur.val)
) {
4
Assuming the condition is false
5
'?' condition is false
6
Assuming the condition is false
7
Taking false branch
1038 magic = 1;
1039 needlen += 2;
1040 }
1041 break;
8
Execution continues on line 1002
1042 default:
1043 needlen += 1;
1044 break;
1045 }
1046
1047 if (!magic
16.1
'magic' is 1
) {
17
Taking false branch
1048 *replacedp = 0;
1049 return (0);
1050 }
1051
1052 /* Get enough memory to hold the final pattern. */
1053 *replacedp = 1;
1054 GET_SPACE_RET(sp, bp, blen, needlen){ GS *L__gp = (sp) == ((void *)0) ? ((void *)0) : (sp)->gp
; if (L__gp == ((void *)0) || (((L__gp)->flags) & ((0x0100
)))) { (bp) = ((void *)0); (blen) = 0; { void *L__bincp; if (
((needlen)) > ((blen))) { if ((L__bincp = binc(((sp)), ((bp
)), &((blen)), ((needlen)))) == ((void *)0)) return (1); (
(bp)) = L__bincp; } }; } else { { void *L__bincp; if (((needlen
)) > (L__gp->tmp_blen)) { if ((L__bincp = binc(((sp)), (
L__gp->tmp_bp), &(L__gp->tmp_blen), ((needlen)))) ==
((void *)0)) return (1); (L__gp->tmp_bp) = L__bincp; } };
(bp) = L__gp->tmp_bp; (blen) = L__gp->tmp_blen; (((L__gp
)->flags) |= ((0x0100))); } }
;
18
'?' condition is false
19
Assuming 'L__gp' is equal to null
20
Assuming 'needlen' is <= 'blen'
21
Taking false branch
1055
1056 for (p = *ptrnp, len = *plenp, t = bp; len > 0; ++p, --len)
22
Loop condition is true. Entering loop body
1057 switch (*p) {
23
Control jumps to 'case 46:' at line 1092
1058 case '\\':
1059 if (len > 1) {
1060 --len;
1061 switch (*++p) {
1062 case '~':
1063 if (O_ISSET(sp, O_MAGIC)((((&(((sp)))->opts[(((O_MAGIC)))])->flags) & (
(0x01))) ? (((sp)))->gp->opts[(((sp)))->opts[(((O_MAGIC
)))].o_cur.val].o_cur.val : (((sp)))->opts[(((O_MAGIC)))].
o_cur.val)
)
1064 *t++ = '~';
1065 else {
1066 memcpy(t,
1067 sp->repl, sp->repl_len);
1068 t += sp->repl_len;
1069 }
1070 break;
1071 case '.':
1072 case '[':
1073 case '*':
1074 if (O_ISSET(sp, O_MAGIC)((((&(((sp)))->opts[(((O_MAGIC)))])->flags) & (
(0x01))) ? (((sp)))->gp->opts[(((sp)))->opts[(((O_MAGIC
)))].o_cur.val].o_cur.val : (((sp)))->opts[(((O_MAGIC)))].
o_cur.val)
)
1075 *t++ = '\\';
1076 *t++ = *p;
1077 break;
1078 default:
1079 *t++ = '\\';
1080 *t++ = *p;
1081 }
1082 } else
1083 *t++ = '\\';
1084 break;
1085 case '~':
1086 if (O_ISSET(sp, O_MAGIC)((((&(((sp)))->opts[(((O_MAGIC)))])->flags) & (
(0x01))) ? (((sp)))->gp->opts[(((sp)))->opts[(((O_MAGIC
)))].o_cur.val].o_cur.val : (((sp)))->opts[(((O_MAGIC)))].
o_cur.val)
) {
1087 memcpy(t, sp->repl, sp->repl_len);
1088 t += sp->repl_len;
1089 } else
1090 *t++ = '~';
1091 break;
1092 case '.':
1093 case '[':
1094 case '*':
1095 if (!O_ISSET(sp, O_MAGIC)((((&(((sp)))->opts[(((O_MAGIC)))])->flags) & (
(0x01))) ? (((sp)))->gp->opts[(((sp)))->opts[(((O_MAGIC
)))].o_cur.val].o_cur.val : (((sp)))->opts[(((O_MAGIC)))].
o_cur.val)
)
24
'?' condition is false
25
Taking false branch
1096 *t++ = '\\';
1097 *t++ = *p;
26
Null pointer value stored to 't'
27
Dereference of null pointer
1098 break;
1099 default:
1100 *t++ = *p;
1101 break;
1102 }
1103
1104 *ptrnp = bp;
1105 *plenp = t - bp;
1106 return (0);
1107}
1108
1109/*
1110 * re_tag_conv --
1111 * Convert a tags search path into something that the POSIX
1112 * 1003.2 RE functions can handle.
1113 */
1114static int
1115re_tag_conv(SCR *sp, char **ptrnp, size_t *plenp, int *replacedp)
1116{
1117 size_t blen, len;
1118 int lastdollar;
1119 char *bp, *p, *t;
1120
1121 len = *plenp;
1122
1123 /* Max memory usage is 2 times the length of the string. */
1124 *replacedp = 1;
1125 GET_SPACE_RET(sp, bp, blen, len * 2){ GS *L__gp = (sp) == ((void *)0) ? ((void *)0) : (sp)->gp
; if (L__gp == ((void *)0) || (((L__gp)->flags) & ((0x0100
)))) { (bp) = ((void *)0); (blen) = 0; { void *L__bincp; if (
((len * 2)) > ((blen))) { if ((L__bincp = binc(((sp)), ((bp
)), &((blen)), ((len * 2)))) == ((void *)0)) return (1); (
(bp)) = L__bincp; } }; } else { { void *L__bincp; if (((len *
2)) > (L__gp->tmp_blen)) { if ((L__bincp = binc(((sp))
, (L__gp->tmp_bp), &(L__gp->tmp_blen), ((len * 2)))
) == ((void *)0)) return (1); (L__gp->tmp_bp) = L__bincp; }
}; (bp) = L__gp->tmp_bp; (blen) = L__gp->tmp_blen; (((
L__gp)->flags) |= ((0x0100))); } }
;
1126
1127 p = *ptrnp;
1128 t = bp;
1129
1130 /* If the last character is a '/' or '?', we just strip it. */
1131 if (len > 0 && (p[len - 1] == '/' || p[len - 1] == '?'))
1132 --len;
1133
1134 /* If the next-to-last or last character is a '$', it's magic. */
1135 if (len > 0 && p[len - 1] == '$') {
1136 --len;
1137 lastdollar = 1;
1138 } else
1139 lastdollar = 0;
1140
1141 /* If the first character is a '/' or '?', we just strip it. */
1142 if (len > 0 && (p[0] == '/' || p[0] == '?')) {
1143 ++p;
1144 --len;
1145 }
1146
1147 /* If the first or second character is a '^', it's magic. */
1148 if (p[0] == '^') {
1149 *t++ = *p++;
1150 --len;
1151 }
1152
1153 /*
1154 * Escape every other magic character we can find, meanwhile stripping
1155 * the backslashes ctags inserts when escaping the search delimiter
1156 * characters.
1157 */
1158 for (; len > 0; --len) {
1159 if (p[0] == '\\' && (p[1] == '/' || p[1] == '?')) {
1160 ++p;
1161 --len;
1162 } else if (strchr("^.[]$*", p[0]))
1163 *t++ = '\\';
1164 *t++ = *p++;
1165 if (len == 0)
1166 break;
1167 }
1168 if (lastdollar)
1169 *t++ = '$';
1170
1171 *ptrnp = bp;
1172 *plenp = t - bp;
1173 return (0);
1174}
1175
1176/*
1177 * re_error --
1178 * Report a regular expression error.
1179 *
1180 * PUBLIC: void re_error(SCR *, int, regex_t *);
1181 */
1182void
1183re_error(SCR *sp, int errcode, regex_t *preg)
1184{
1185 size_t s;
1186 char *oe;
1187
1188 s = regerror(errcode, preg, "", 0);
1189 if ((oe = malloc(s)) == NULL((void *)0))
1190 msgq(sp, M_SYSERR, NULL((void *)0));
1191 else {
1192 (void)regerror(errcode, preg, oe, s);
1193 msgq(sp, M_ERR, "RE error: %s", oe);
1194 free(oe);
1195 }
1196}
1197
1198/*
1199 * re_sub --
1200 * Do the substitution for a regular expression.
1201 */
1202static int
1203re_sub(SCR *sp, char *ip, char **lbp, size_t *lbclenp, size_t *lblenp,
1204 regmatch_t match[10])
1205{
1206 enum { C_NOTSET, C_LOWER, C_ONELOWER, C_ONEUPPER, C_UPPER } conv;
1207 size_t lbclen, lblen; /* Local copies. */
1208 size_t mlen; /* Match length. */
1209 size_t rpl; /* Remaining replacement length. */
1210 char *rp; /* Replacement pointer. */
1211 int ch;
1212 int no; /* Match replacement offset. */
1213 char *p, *t; /* Buffer pointers. */
1214 char *lb; /* Local copies. */
1215
1216 lb = *lbp; /* Get local copies. */
1217 lbclen = *lbclenp;
1218 lblen = *lblenp;
1219
1220 /*
1221 * QUOTING NOTE:
1222 *
1223 * There are some special sequences that vi provides in the
1224 * replacement patterns.
1225 * & string the RE matched (\& if nomagic set)
1226 * \# n-th regular subexpression
1227 * \E end \U, \L conversion
1228 * \e end \U, \L conversion
1229 * \l convert the next character to lower-case
1230 * \L convert to lower-case, until \E, \e, or end of replacement
1231 * \u convert the next character to upper-case
1232 * \U convert to upper-case, until \E, \e, or end of replacement
1233 *
1234 * Otherwise, since this is the lowest level of replacement, discard
1235 * all escaping characters. This (hopefully) matches historic practice.
1236 */
1237#define OUTCH(ch, nltrans){ CHAR_T __ch = (ch); u_int __value = ((unsigned char)(__ch) <=
254 ? (sp)->gp->special_key[(unsigned char)(__ch)] : (
unsigned char)(__ch) > (sp)->gp->max_special ? 0 : v_key_val
((sp),(__ch))); if ((nltrans) && (__value == K_CR || __value
== K_NL)) { { if ((sp)->newl_len == (sp)->newl_cnt) { (
sp)->newl_len += 25; { void *tmpp; if (((tmpp) = (reallocarray
(((sp)->newl), ((sp)->newl_len), (sizeof(size_t))))) ==
((void *)0)) { msgq(((sp)), M_SYSERR, ((void *)0)); free((sp
)->newl); } (sp)->newl = tmpp; }; if ((sp)->newl == (
(void *)0)) { (sp)->newl_len = 0; return (1); } } }; sp->
newl[sp->newl_cnt++] = lbclen; } else if (conv != C_NOTSET
) { switch (conv) { case C_ONELOWER: conv = C_NOTSET; case C_LOWER
: if (isupper(__ch)) __ch = tolower(__ch); break; case C_ONEUPPER
: conv = C_NOTSET; case C_UPPER: if (islower(__ch)) __ch = toupper
(__ch); break; default: abort(); } } { if (lbclen + (1) > lblen
) { lblen += (((lbclen + (1)) > (256)) ? (lbclen + (1)) : (
256)); { void *tmpp; if (((tmpp) = (realloc((lb), (lblen)))) ==
((void *)0)) { msgq(((sp)), M_SYSERR, ((void *)0)); free(lb)
; } lb = tmpp; }; if (lb == ((void *)0)) { lbclen = 0; return
(1); } (p) = lb + lbclen; } }; *p++ = __ch; ++lbclen; }
{ \
1238 CHAR_T __ch = (ch); \
1239 u_int __value = KEY_VAL(sp, __ch)((unsigned char)(__ch) <= 254 ? (sp)->gp->special_key
[(unsigned char)(__ch)] : (unsigned char)(__ch) > (sp)->
gp->max_special ? 0 : v_key_val((sp),(__ch)))
; \
1240 if ((nltrans) && (__value == K_CR || __value == K_NL)) { \
1241 NEEDNEWLINE(sp){ if ((sp)->newl_len == (sp)->newl_cnt) { (sp)->newl_len
+= 25; { void *tmpp; if (((tmpp) = (reallocarray(((sp)->newl
), ((sp)->newl_len), (sizeof(size_t))))) == ((void *)0)) {
msgq(((sp)), M_SYSERR, ((void *)0)); free((sp)->newl); } (
sp)->newl = tmpp; }; if ((sp)->newl == ((void *)0)) { (
sp)->newl_len = 0; return (1); } } }
; \
1242 sp->newl[sp->newl_cnt++] = lbclen; \
1243 } else if (conv != C_NOTSET) { \
1244 switch (conv) { \
1245 case C_ONELOWER: \
1246 conv = C_NOTSET; \
1247 /* FALLTHROUGH */ \
1248 case C_LOWER: \
1249 if (isupper(__ch)) \
1250 __ch = tolower(__ch); \
1251 break; \
1252 case C_ONEUPPER: \
1253 conv = C_NOTSET; \
1254 /* FALLTHROUGH */ \
1255 case C_UPPER: \
1256 if (islower(__ch)) \
1257 __ch = toupper(__ch); \
1258 break; \
1259 default: \
1260 abort(); \
1261 } \
1262 } \
1263 NEEDSP(sp, 1, p){ if (lbclen + (1) > lblen) { lblen += (((lbclen + (1)) >
(256)) ? (lbclen + (1)) : (256)); { void *tmpp; if (((tmpp) =
(realloc((lb), (lblen)))) == ((void *)0)) { msgq(((sp)), M_SYSERR
, ((void *)0)); free(lb); } lb = tmpp; }; if (lb == ((void *)
0)) { lbclen = 0; return (1); } (p) = lb + lbclen; } }
; \
1264 *p++ = __ch; \
1265 ++lbclen; \
1266}
1267 conv = C_NOTSET;
1268 for (rp = sp->repl, rpl = sp->repl_len, p = lb + lbclen; rpl--;) {
1269 switch (ch = *rp++) {
1270 case '&':
1271 if (O_ISSET(sp, O_MAGIC)((((&(((sp)))->opts[(((O_MAGIC)))])->flags) & (
(0x01))) ? (((sp)))->gp->opts[(((sp)))->opts[(((O_MAGIC
)))].o_cur.val].o_cur.val : (((sp)))->opts[(((O_MAGIC)))].
o_cur.val)
) {
1272 no = 0;
1273 goto subzero;
1274 }
1275 break;
1276 case '\\':
1277 if (rpl == 0)
1278 break;
1279 --rpl;
1280 switch (ch = *rp) {
1281 case '&':
1282 ++rp;
1283 if (!O_ISSET(sp, O_MAGIC)((((&(((sp)))->opts[(((O_MAGIC)))])->flags) & (
(0x01))) ? (((sp)))->gp->opts[(((sp)))->opts[(((O_MAGIC
)))].o_cur.val].o_cur.val : (((sp)))->opts[(((O_MAGIC)))].
o_cur.val)
) {
1284 no = 0;
1285 goto subzero;
1286 }
1287 break;
1288 case '0': case '1': case '2': case '3': case '4':
1289 case '5': case '6': case '7': case '8': case '9':
1290 no = *rp++ - '0';
1291subzero: if (match[no].rm_so == -1 ||
1292 match[no].rm_eo == -1)
1293 break;
1294 mlen = match[no].rm_eo - match[no].rm_so;
1295 for (t = ip + match[no].rm_so; mlen--; ++t)
1296 OUTCH(*t, 0){ CHAR_T __ch = (*t); u_int __value = ((unsigned char)(__ch) <=
254 ? (sp)->gp->special_key[(unsigned char)(__ch)] : (
unsigned char)(__ch) > (sp)->gp->max_special ? 0 : v_key_val
((sp),(__ch))); if ((0) && (__value == K_CR || __value
== K_NL)) { { if ((sp)->newl_len == (sp)->newl_cnt) { (
sp)->newl_len += 25; { void *tmpp; if (((tmpp) = (reallocarray
(((sp)->newl), ((sp)->newl_len), (sizeof(size_t))))) ==
((void *)0)) { msgq(((sp)), M_SYSERR, ((void *)0)); free((sp
)->newl); } (sp)->newl = tmpp; }; if ((sp)->newl == (
(void *)0)) { (sp)->newl_len = 0; return (1); } } }; sp->
newl[sp->newl_cnt++] = lbclen; } else if (conv != C_NOTSET
) { switch (conv) { case C_ONELOWER: conv = C_NOTSET; case C_LOWER
: if (isupper(__ch)) __ch = tolower(__ch); break; case C_ONEUPPER
: conv = C_NOTSET; case C_UPPER: if (islower(__ch)) __ch = toupper
(__ch); break; default: abort(); } } { if (lbclen + (1) > lblen
) { lblen += (((lbclen + (1)) > (256)) ? (lbclen + (1)) : (
256)); { void *tmpp; if (((tmpp) = (realloc((lb), (lblen)))) ==
((void *)0)) { msgq(((sp)), M_SYSERR, ((void *)0)); free(lb)
; } lb = tmpp; }; if (lb == ((void *)0)) { lbclen = 0; return
(1); } (p) = lb + lbclen; } }; *p++ = __ch; ++lbclen; }
;
1297 continue;
1298 case 'e':
1299 case 'E':
1300 ++rp;
1301 conv = C_NOTSET;
1302 continue;
1303 case 'l':
1304 ++rp;
1305 conv = C_ONELOWER;
1306 continue;
1307 case 'L':
1308 ++rp;
1309 conv = C_LOWER;
1310 continue;
1311 case 'u':
1312 ++rp;
1313 conv = C_ONEUPPER;
1314 continue;
1315 case 'U':
1316 ++rp;
1317 conv = C_UPPER;
1318 continue;
1319 default:
1320 ++rp;
1321 break;
1322 }
1323 }
1324 OUTCH(ch, 1){ CHAR_T __ch = (ch); u_int __value = ((unsigned char)(__ch) <=
254 ? (sp)->gp->special_key[(unsigned char)(__ch)] : (
unsigned char)(__ch) > (sp)->gp->max_special ? 0 : v_key_val
((sp),(__ch))); if ((1) && (__value == K_CR || __value
== K_NL)) { { if ((sp)->newl_len == (sp)->newl_cnt) { (
sp)->newl_len += 25; { void *tmpp; if (((tmpp) = (reallocarray
(((sp)->newl), ((sp)->newl_len), (sizeof(size_t))))) ==
((void *)0)) { msgq(((sp)), M_SYSERR, ((void *)0)); free((sp
)->newl); } (sp)->newl = tmpp; }; if ((sp)->newl == (
(void *)0)) { (sp)->newl_len = 0; return (1); } } }; sp->
newl[sp->newl_cnt++] = lbclen; } else if (conv != C_NOTSET
) { switch (conv) { case C_ONELOWER: conv = C_NOTSET; case C_LOWER
: if (isupper(__ch)) __ch = tolower(__ch); break; case C_ONEUPPER
: conv = C_NOTSET; case C_UPPER: if (islower(__ch)) __ch = toupper
(__ch); break; default: abort(); } } { if (lbclen + (1) > lblen
) { lblen += (((lbclen + (1)) > (256)) ? (lbclen + (1)) : (
256)); { void *tmpp; if (((tmpp) = (realloc((lb), (lblen)))) ==
((void *)0)) { msgq(((sp)), M_SYSERR, ((void *)0)); free(lb)
; } lb = tmpp; }; if (lb == ((void *)0)) { lbclen = 0; return
(1); } (p) = lb + lbclen; } }; *p++ = __ch; ++lbclen; }
;
1325 }
1326
1327 *lbp = lb; /* Update caller's information. */
1328 *lbclenp = lbclen;
1329 *lblenp = lblen;
1330 return (0);
1331}