Bug Summary

File:src/usr.bin/vi/build/../ex/ex_subst.c
Warning:line 1324, column 3
Potential leak of memory pointed to by 'lb'

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple amd64-unknown-openbsd7.4 -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name ex_subst.c -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -mrelocation-model pic -pic-level 1 -pic-is-pie -mframe-pointer=all -relaxed-aliasing -ffp-contract=on -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -target-feature +retpoline-indirect-calls -target-feature +retpoline-indirect-branches -tune-cpu generic -debugger-tuning=gdb -fcoverage-compilation-dir=/usr/src/usr.bin/vi/build/obj -resource-dir /usr/local/llvm16/lib/clang/16 -I /usr/src/usr.bin/vi/build -I /usr/src/usr.bin/vi/build/../include -I . -internal-isystem /usr/local/llvm16/lib/clang/16/include -internal-externc-isystem /usr/include -O2 -fdebug-compilation-dir=/usr/src/usr.bin/vi/build/obj -ferror-limit 19 -fwrapv -D_RET_PROTECTOR -ret-protector -fcf-protection=branch -fno-jump-tables -fgnuc-version=4.2.1 -vectorize-loops -vectorize-slp -fno-builtin-malloc -fno-builtin-calloc -fno-builtin-realloc -fno-builtin-valloc -fno-builtin-free -fno-builtin-strdup -fno-builtin-strndup -analyzer-output=html -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /home/ben/Projects/scan/2024-01-11-140451-98009-1 -x c /usr/src/usr.bin/vi/build/../ex/ex_subst.c
1/* $OpenBSD: ex_subst.c,v 1.31 2023/06/23 15:06:45 millert Exp $ */
2
3/*-
4 * Copyright (c) 1992, 1993, 1994
5 * The Regents of the University of California. All rights reserved.
6 * Copyright (c) 1992, 1993, 1994, 1995, 1996
7 * Keith Bostic. All rights reserved.
8 *
9 * See the LICENSE file for redistribution information.
10 */
11
12#include "config.h"
13
14#include <sys/queue.h>
15#include <sys/time.h>
16
17#include <bitstring.h>
18#include <ctype.h>
19#include <errno(*__errno()).h>
20#include <limits.h>
21#include <stdio.h>
22#include <stdlib.h>
23#include <string.h>
24#include <unistd.h>
25
26#include "../common/common.h"
27#include "../vi/vi.h"
28
29#define MAXIMUM(a, b)(((a) > (b)) ? (a) : (b)) (((a) > (b)) ? (a) : (b))
30
31#define SUB_FIRST0x01 0x01 /* The 'r' flag isn't reasonable. */
32#define SUB_MUSTSETR0x02 0x02 /* The 'r' flag is required. */
33
34static int re_conv(SCR *, char **, size_t *, int *);
35static int re_sub(SCR *, char *, char **, size_t *, size_t *, regmatch_t [10]);
36static int re_tag_conv(SCR *, char **, size_t *, int *);
37static int s(SCR *, EXCMD *, char *, regex_t *, u_int);
38
39/*
40 * ex_s --
41 * [line [,line]] s[ubstitute] [[/;]pat[/;]/repl[/;] [cgr] [count] [#lp]]
42 *
43 * Substitute on lines matching a pattern.
44 *
45 * PUBLIC: int ex_s(SCR *, EXCMD *);
46 */
47int
48ex_s(SCR *sp, EXCMD *cmdp)
49{
50 regex_t *re;
51 size_t blen, len;
52 u_int flags;
53 int delim;
54 char *bp, *ptrn, *rep, *p, *t;
55
56 /*
57 * Skip leading white space.
58 *
59 * !!!
60 * Historic vi allowed any non-alphanumeric to serve as the
61 * substitution command delimiter.
62 *
63 * !!!
64 * If the arguments are empty, it's the same as &, i.e. we
65 * repeat the last substitution.
66 */
67 if (cmdp->argc == 0)
68 goto subagain;
69 for (p = cmdp->argv[0]->bp,
70 len = cmdp->argv[0]->len; len > 0; --len, ++p) {
71 if (!isblank(*p))
72 break;
73 }
74 if (len == 0)
75subagain: return (ex_subagain(sp, cmdp));
76
77 delim = *p++;
78 if (isalnum(delim) || delim == '\\')
79 return (s(sp, cmdp, p, &sp->subre_c, SUB_MUSTSETR0x02));
80
81 /*
82 * !!!
83 * The full-blown substitute command reset the remembered
84 * state of the 'c' and 'g' suffices.
85 */
86 sp->c_suffix = sp->g_suffix = 0;
87
88 /*
89 * Get the pattern string, toss escaping characters.
90 *
91 * !!!
92 * Historic vi accepted any of the following forms:
93 *
94 * :s/abc/def/ change "abc" to "def"
95 * :s/abc/def change "abc" to "def"
96 * :s/abc/ delete "abc"
97 * :s/abc delete "abc"
98 *
99 * QUOTING NOTE:
100 *
101 * Only toss an escaping character if it escapes a delimiter.
102 * This means that "s/A/\\\\f" replaces "A" with "\\f". It
103 * would be nice to be more regular, i.e. for each layer of
104 * escaping a single escaping character is removed, but that's
105 * not how the historic vi worked.
106 */
107 for (ptrn = t = p;;) {
108 if (p[0] == '\0' || p[0] == delim) {
109 if (p[0] == delim)
110 ++p;
111 /*
112 * !!!
113 * Nul terminate the pattern string -- it's passed
114 * to regcomp which doesn't understand anything else.
115 */
116 *t = '\0';
117 break;
118 }
119 if (p[0] == '\\') {
120 if (p[1] == delim)
121 ++p;
122 else if (p[1] == '\\')
123 *t++ = *p++;
124 }
125 *t++ = *p++;
126 }
127
128 /*
129 * If the pattern string is empty, use the last RE (not just the
130 * last substitution RE).
131 */
132 if (*ptrn == '\0') {
133 if (sp->re == NULL((void *)0)) {
134 ex_emsg(sp, NULL((void *)0), EXM_NOPREVRE);
135 return (1);
136 }
137
138 /* Re-compile the RE if necessary. */
139 if (!F_ISSET(sp, SC_RE_SEARCH)(((sp)->flags) & ((0x00400000))) && re_compile(sp,
140 sp->re, sp->re_len, NULL((void *)0), NULL((void *)0), &sp->re_c, RE_C_SEARCH0x0002))
141 return (1);
142 flags = 0;
143 } else {
144 /*
145 * !!!
146 * Compile the RE. Historic practice is that substitutes set
147 * the search direction as well as both substitute and search
148 * RE's. We compile the RE twice, as we don't want to bother
149 * ref counting the pattern string and (opaque) structure.
150 */
151 if (re_compile(sp, ptrn, t - ptrn,
152 &sp->re, &sp->re_len, &sp->re_c, RE_C_SEARCH0x0002))
153 return (1);
154 if (re_compile(sp, ptrn, t - ptrn,
155 &sp->subre, &sp->subre_len, &sp->subre_c, RE_C_SUBST0x0008))
156 return (1);
157
158 flags = SUB_FIRST0x01;
159 sp->searchdir = FORWARD;
160 }
161 re = &sp->re_c;
162
163 /*
164 * Get the replacement string.
165 *
166 * The special character & (\& if O_MAGIC not set) matches the
167 * entire RE. No handling of & is required here, it's done by
168 * re_sub().
169 *
170 * The special character ~ (\~ if O_MAGIC not set) inserts the
171 * previous replacement string into this replacement string.
172 * Count ~'s to figure out how much space we need. We could
173 * special case nonexistent last patterns or whether or not
174 * O_MAGIC is set, but it's probably not worth the effort.
175 *
176 * QUOTING NOTE:
177 *
178 * Only toss an escaping character if it escapes a delimiter or
179 * if O_MAGIC is set and it escapes a tilde.
180 *
181 * !!!
182 * If the entire replacement pattern is "%", then use the last
183 * replacement pattern. This semantic was added to vi in System
184 * V and then percolated elsewhere, presumably around the time
185 * that it was added to their version of ed(1).
186 */
187 if (p[0] == '\0' || p[0] == delim) {
188 if (p[0] == delim)
189 ++p;
190 free(sp->repl);
191 sp->repl = NULL((void *)0);
192 sp->repl_len = 0;
193 } else if (p[0] == '%' && (p[1] == '\0' || p[1] == delim))
194 p += p[1] == delim ? 2 : 1;
195 else {
196 for (rep = p, len = 0;
197 p[0] != '\0' && p[0] != delim; ++p, ++len)
198 if (p[0] == '~')
199 len += sp->repl_len;
200 GET_SPACE_RET(sp, bp, blen, len){ GS *L__gp = (sp) == ((void *)0) ? ((void *)0) : (sp)->gp
; if (L__gp == ((void *)0) || (((L__gp)->flags) & ((0x0100
)))) { (bp) = ((void *)0); (blen) = 0; { void *L__bincp; if (
((len)) > ((blen))) { if ((L__bincp = binc(((sp)), ((bp)),
&((blen)), ((len)))) == ((void *)0)) return (1); ((bp)) =
L__bincp; } }; } else { { void *L__bincp; if (((len)) > (
L__gp->tmp_blen)) { if ((L__bincp = binc(((sp)), (L__gp->
tmp_bp), &(L__gp->tmp_blen), ((len)))) == ((void *)0))
return (1); (L__gp->tmp_bp) = L__bincp; } }; (bp) = L__gp
->tmp_bp; (blen) = L__gp->tmp_blen; (((L__gp)->flags
) |= ((0x0100))); } }
;
201 for (t = bp, len = 0, p = rep;;) {
202 if (p[0] == '\0' || p[0] == delim) {
203 if (p[0] == delim)
204 ++p;
205 break;
206 }
207 if (p[0] == '\\') {
208 if (p[1] == delim)
209 ++p;
210 else if (p[1] == '\\') {
211 *t++ = *p++;
212 ++len;
213 } else if (p[1] == '~') {
214 ++p;
215 if (!O_ISSET(sp, O_MAGIC)((((&(((sp)))->opts[(((O_MAGIC)))])->flags) & (
(0x01))) ? (((sp)))->gp->opts[(((sp)))->opts[(((O_MAGIC
)))].o_cur.val].o_cur.val : (((sp)))->opts[(((O_MAGIC)))].
o_cur.val)
)
216 goto tilde;
217 }
218 } else if (p[0] == '~' && O_ISSET(sp, O_MAGIC)((((&(((sp)))->opts[(((O_MAGIC)))])->flags) & (
(0x01))) ? (((sp)))->gp->opts[(((sp)))->opts[(((O_MAGIC
)))].o_cur.val].o_cur.val : (((sp)))->opts[(((O_MAGIC)))].
o_cur.val)
) {
219tilde: ++p;
220 memcpy(t, sp->repl, sp->repl_len);
221 t += sp->repl_len;
222 len += sp->repl_len;
223 continue;
224 }
225 *t++ = *p++;
226 ++len;
227 }
228 if ((sp->repl_len = len) != 0) {
229 free(sp->repl);
230 if ((sp->repl = malloc(len)) == NULL((void *)0)) {
231 msgq(sp, M_SYSERR, NULL((void *)0));
232 FREE_SPACE(sp, bp, blen){ GS *L__gp = (sp) == ((void *)0) ? ((void *)0) : (sp)->gp
; if (L__gp != ((void *)0) && (bp) == L__gp->tmp_bp
) (((L__gp)->flags) &= ~((0x0100))); else free(bp); }
;
233 return (1);
234 }
235 memcpy(sp->repl, bp, len);
236 }
237 FREE_SPACE(sp, bp, blen){ GS *L__gp = (sp) == ((void *)0) ? ((void *)0) : (sp)->gp
; if (L__gp != ((void *)0) && (bp) == L__gp->tmp_bp
) (((L__gp)->flags) &= ~((0x0100))); else free(bp); }
;
238 }
239 return (s(sp, cmdp, p, re, flags));
240}
241
242/*
243 * ex_subagain --
244 * [line [,line]] & [cgr] [count] [#lp]]
245 *
246 * Substitute using the last substitute RE and replacement pattern.
247 *
248 * PUBLIC: int ex_subagain(SCR *, EXCMD *);
249 */
250int
251ex_subagain(SCR *sp, EXCMD *cmdp)
252{
253 if (sp->subre == NULL((void *)0)) {
254 ex_emsg(sp, NULL((void *)0), EXM_NOPREVRE);
255 return (1);
256 }
257 if (!F_ISSET(sp, SC_RE_SUBST)(((sp)->flags) & ((0x00800000))) && re_compile(sp,
258 sp->subre, sp->subre_len, NULL((void *)0), NULL((void *)0), &sp->subre_c, RE_C_SUBST0x0008))
259 return (1);
260 return (s(sp,
261 cmdp, cmdp->argc ? cmdp->argv[0]->bp : NULL((void *)0), &sp->subre_c, 0));
262}
263
264/*
265 * ex_subtilde --
266 * [line [,line]] ~ [cgr] [count] [#lp]]
267 *
268 * Substitute using the last RE and last substitute replacement pattern.
269 *
270 * PUBLIC: int ex_subtilde(SCR *, EXCMD *);
271 */
272int
273ex_subtilde(SCR *sp, EXCMD *cmdp)
274{
275 if (sp->re == NULL((void *)0)) {
276 ex_emsg(sp, NULL((void *)0), EXM_NOPREVRE);
277 return (1);
278 }
279 if (!F_ISSET(sp, SC_RE_SEARCH)(((sp)->flags) & ((0x00400000))) && re_compile(sp,
280 sp->re, sp->re_len, NULL((void *)0), NULL((void *)0), &sp->re_c, RE_C_SEARCH0x0002))
281 return (1);
282 return (s(sp,
283 cmdp, cmdp->argc ? cmdp->argv[0]->bp : NULL((void *)0), &sp->re_c, 0));
284}
285
286/*
287 * s --
288 * Do the substitution. This stuff is *really* tricky. There are lots of
289 * special cases, and general nastiness. Don't mess with it unless you're
290 * pretty confident.
291 *
292 * The nasty part of the substitution is what happens when the replacement
293 * string contains newlines. It's a bit tricky -- consider the information
294 * that has to be retained for "s/f\(o\)o/^M\1^M\1/". The solution here is
295 * to build a set of newline offsets which we use to break the line up later,
296 * when the replacement is done. Don't change it unless you're *damned*
297 * confident.
298 */
299#define NEEDNEWLINE(sp){ if ((sp)->newl_len == (sp)->newl_cnt) { (sp)->newl_len
+= 25; { void *tmpp; if (((tmpp) = (reallocarray(((sp)->newl
), ((sp)->newl_len), (sizeof(size_t))))) == ((void *)0)) {
msgq(((sp)), M_SYSERR, ((void *)0)); free((sp)->newl); } (
sp)->newl = tmpp; }; if ((sp)->newl == ((void *)0)) { (
sp)->newl_len = 0; return (1); } } }
{ \
300 if ((sp)->newl_len == (sp)->newl_cnt) { \
301 (sp)->newl_len += 25; \
302 REALLOCARRAY((sp), (sp)->newl, \{ void *tmpp; if (((tmpp) = (reallocarray(((sp)->newl), ((
sp)->newl_len), (sizeof(size_t))))) == ((void *)0)) { msgq
(((sp)), M_SYSERR, ((void *)0)); free((sp)->newl); } (sp)->
newl = tmpp; }
303 (sp)->newl_len, sizeof(size_t)){ void *tmpp; if (((tmpp) = (reallocarray(((sp)->newl), ((
sp)->newl_len), (sizeof(size_t))))) == ((void *)0)) { msgq
(((sp)), M_SYSERR, ((void *)0)); free((sp)->newl); } (sp)->
newl = tmpp; }
; \
304 if ((sp)->newl == NULL((void *)0)) { \
305 (sp)->newl_len = 0; \
306 return (1); \
307 } \
308 } \
309}
310
311#define BUILD(sp, l, len){ if (lbclen + (len) > lblen) { lblen += (((lbclen + (len)
) > (256)) ? (lbclen + (len)) : (256)); { void *tmpp; if (
((tmpp) = (realloc((lb), (lblen)))) == ((void *)0)) { msgq(((
sp)), M_SYSERR, ((void *)0)); free(lb); } lb = tmpp; }; if (lb
== ((void *)0)) { lbclen = 0; return (1); } } memcpy(lb + lbclen
, (l), (len)); lbclen += (len); }
{ \
312 if (lbclen + (len) > lblen) { \
313 lblen += MAXIMUM(lbclen + (len), 256)(((lbclen + (len)) > (256)) ? (lbclen + (len)) : (256)); \
314 REALLOC((sp), lb, lblen){ void *tmpp; if (((tmpp) = (realloc((lb), (lblen)))) == ((void
*)0)) { msgq(((sp)), M_SYSERR, ((void *)0)); free(lb); } lb =
tmpp; }
; \
315 if (lb == NULL((void *)0)) { \
316 lbclen = 0; \
317 return (1); \
318 } \
319 } \
320 memcpy(lb + lbclen, (l), (len)); \
321 lbclen += (len); \
322}
323
324#define NEEDSP(sp, len, pnt){ if (lbclen + (len) > lblen) { lblen += (((lbclen + (len)
) > (256)) ? (lbclen + (len)) : (256)); { void *tmpp; if (
((tmpp) = (realloc((lb), (lblen)))) == ((void *)0)) { msgq(((
sp)), M_SYSERR, ((void *)0)); free(lb); } lb = tmpp; }; if (lb
== ((void *)0)) { lbclen = 0; return (1); } (pnt) = lb + lbclen
; } }
{ \
325 if (lbclen + (len) > lblen) { \
326 lblen += MAXIMUM(lbclen + (len), 256)(((lbclen + (len)) > (256)) ? (lbclen + (len)) : (256)); \
327 REALLOC((sp), lb, lblen){ void *tmpp; if (((tmpp) = (realloc((lb), (lblen)))) == ((void
*)0)) { msgq(((sp)), M_SYSERR, ((void *)0)); free(lb); } lb =
tmpp; }
; \
328 if (lb == NULL((void *)0)) { \
329 lbclen = 0; \
330 return (1); \
331 } \
332 (pnt) = lb + lbclen; \
333 } \
334}
335
336static int
337s(SCR *sp, EXCMD *cmdp, char *s, regex_t *re, u_int flags)
338{
339 EVENT ev;
340 MARK from, to;
341 TEXTH tiq;
342 recno_t elno, lno, slno;
343 regmatch_t match[10];
344 size_t blen, cnt, last, lbclen, lblen, len, llen;
345 size_t offset, saved_offset, scno;
346 int lflag, nflag, pflag, rflag;
347 int didsub, do_eol_match, eflags, nempty, eval;
348 int linechanged, matched, quit, rval;
349 unsigned long ul;
350 char *bp, *lb;
351
352 NEEDFILE(sp, cmdp){ if ((sp)->ep == ((void *)0)) { ex_emsg((sp), (cmdp)->
cmd->name, EXM_NOFILEYET); return (1); } }
;
353
354 slno = sp->lno;
355 scno = sp->cno;
356
357 /*
358 * !!!
359 * Historically, the 'g' and 'c' suffices were always toggled as flags,
360 * so ":s/A/B/" was the same as ":s/A/B/ccgg". If O_EDCOMPATIBLE was
361 * not set, they were initialized to 0 for all substitute commands. If
362 * O_EDCOMPATIBLE was set, they were initialized to 0 only if the user
363 * specified substitute/replacement patterns (see ex_s()).
364 */
365 if (!O_ISSET(sp, O_EDCOMPATIBLE)((((&(((sp)))->opts[(((O_EDCOMPATIBLE)))])->flags) &
((0x01))) ? (((sp)))->gp->opts[(((sp)))->opts[(((O_EDCOMPATIBLE
)))].o_cur.val].o_cur.val : (((sp)))->opts[(((O_EDCOMPATIBLE
)))].o_cur.val)
)
366 sp->c_suffix = sp->g_suffix = 0;
367
368 /*
369 * Historic vi permitted the '#', 'l' and 'p' options in vi mode, but
370 * it only displayed the last change. I'd disallow them, but they are
371 * useful in combination with the [v]global commands. In the current
372 * model the problem is combining them with the 'c' flag -- the screen
373 * would have to flip back and forth between the confirm screen and the
374 * ex print screen, which would be pretty awful. We do display all
375 * changes, though, for what that's worth.
376 *
377 * !!!
378 * Historic vi was fairly strict about the order of "options", the
379 * count, and "flags". I'm somewhat fuzzy on the difference between
380 * options and flags, anyway, so this is a simpler approach, and we
381 * just take it them in whatever order the user gives them. (The ex
382 * usage statement doesn't reflect this.)
383 */
384 lflag = nflag = pflag = rflag = 0;
385 if (s == NULL((void *)0))
386 goto noargs;
387 for (lno = OOBLNO0; *s != '\0'; ++s)
388 switch (*s) {
389 case ' ':
390 case '\t':
391 continue;
392 case '+':
393 ++cmdp->flagoff;
394 break;
395 case '-':
396 --cmdp->flagoff;
397 break;
398 case '0': case '1': case '2': case '3': case '4':
399 case '5': case '6': case '7': case '8': case '9':
400 if (lno != OOBLNO0)
401 goto usage;
402 errno(*__errno()) = 0;
403 if ((ul = strtoul(s, &s, 10)) >= UINT_MAX0xffffffffU)
404 errno(*__errno()) = ERANGE34;
405 if (*s == '\0') /* Loop increment correction. */
406 --s;
407 if (errno(*__errno()) == ERANGE34) {
408 if (ul >= UINT_MAX0xffffffffU)
409 msgq(sp, M_ERR, "Count overflow");
410 else
411 msgq(sp, M_SYSERR, NULL((void *)0));
412 return (1);
413 }
414 lno = (recno_t)ul;
415 /*
416 * In historic vi, the count was inclusive from the
417 * second address.
418 */
419 cmdp->addr1.lno = cmdp->addr2.lno;
420 cmdp->addr2.lno += lno - 1;
421 if (!db_exist(sp, cmdp->addr2.lno) &&
422 db_last(sp, &cmdp->addr2.lno))
423 return (1);
424 break;
425 case '#':
426 nflag = 1;
427 break;
428 case 'c':
429 sp->c_suffix = !sp->c_suffix;
430
431 /* Ex text structure initialization. */
432 if (F_ISSET(sp, SC_EX)(((sp)->flags) & ((0x00000001)))) {
433 memset(&tiq, 0, sizeof(TEXTH));
434 TAILQ_INIT(&tiq)do { (&tiq)->tqh_first = ((void *)0); (&tiq)->tqh_last
= &(&tiq)->tqh_first; } while (0)
;
435 }
436 break;
437 case 'g':
438 sp->g_suffix = !sp->g_suffix;
439 break;
440 case 'l':
441 lflag = 1;
442 break;
443 case 'p':
444 pflag = 1;
445 break;
446 case 'r':
447 if (LF_ISSET(SUB_FIRST)((flags) & ((0x01)))) {
448 msgq(sp, M_ERR,
449 "Regular expression specified; r flag meaningless");
450 return (1);
451 }
452 if (!F_ISSET(sp, SC_RE_SEARCH)(((sp)->flags) & ((0x00400000)))) {
453 ex_emsg(sp, NULL((void *)0), EXM_NOPREVRE);
454 return (1);
455 }
456 rflag = 1;
457 re = &sp->re_c;
458 break;
459 default:
460 goto usage;
461 }
462
463 if (*s != '\0' || (!rflag && LF_ISSET(SUB_MUSTSETR)((flags) & ((0x02))))) {
464usage: ex_emsg(sp, cmdp->cmd->usage, EXM_USAGE);
465 return (1);
466 }
467
468noargs: if (F_ISSET(sp, SC_VI)(((sp)->flags) & ((0x00000002))) && sp->c_suffix && (lflag || nflag || pflag)) {
469 msgq(sp, M_ERR,
470"The #, l and p flags may not be combined with the c flag in vi mode");
471 return (1);
472 }
473
474 /*
475 * bp: if interactive, line cache
476 * blen: if interactive, line cache length
477 * lb: build buffer pointer.
478 * lbclen: current length of built buffer.
479 * lblen; length of build buffer.
480 */
481 bp = lb = NULL((void *)0);
482 blen = lbclen = lblen = 0;
483
484 /* For each line... */
485 for (matched = quit = 0, lno = cmdp->addr1.lno,
486 elno = cmdp->addr2.lno; !quit && lno <= elno; ++lno) {
487
488 /* Someone's unhappy, time to stop. */
489 if (INTERRUPTED(sp)(((((sp)->gp)->flags) & ((0x0004))) || (!v_event_get
((sp), ((void *)0), 0, 0x001) && ((((sp)->gp)->
flags) & ((0x0004)))))
)
490 break;
491
492 /* Get the line. */
493 if (db_get(sp, lno, DBG_FATAL0x001, &s, &llen))
494 goto err;
495
496 /*
497 * Make a local copy if doing confirmation -- when calling
498 * the confirm routine we're likely to lose the cached copy.
499 */
500 if (sp->c_suffix) {
501 if (bp == NULL((void *)0)) {
502 GET_SPACE_RET(sp, bp, blen, llen){ GS *L__gp = (sp) == ((void *)0) ? ((void *)0) : (sp)->gp
; if (L__gp == ((void *)0) || (((L__gp)->flags) & ((0x0100
)))) { (bp) = ((void *)0); (blen) = 0; { void *L__bincp; if (
((llen)) > ((blen))) { if ((L__bincp = binc(((sp)), ((bp))
, &((blen)), ((llen)))) == ((void *)0)) return (1); ((bp)
) = L__bincp; } }; } else { { void *L__bincp; if (((llen)) >
(L__gp->tmp_blen)) { if ((L__bincp = binc(((sp)), (L__gp->
tmp_bp), &(L__gp->tmp_blen), ((llen)))) == ((void *)0)
) return (1); (L__gp->tmp_bp) = L__bincp; } }; (bp) = L__gp
->tmp_bp; (blen) = L__gp->tmp_blen; (((L__gp)->flags
) |= ((0x0100))); } }
;
503 } else
504 ADD_SPACE_RET(sp, bp, blen, llen){ GS *L__gp = (sp) == ((void *)0) ? ((void *)0) : (sp)->gp
; if (L__gp != ((void *)0) && (bp) == L__gp->tmp_bp
) { (((L__gp)->flags) &= ~((0x0100))); { void *L__bincp
; if (((llen)) > (L__gp->tmp_blen)) { if ((L__bincp = binc
(((sp)), (L__gp->tmp_bp), &(L__gp->tmp_blen), ((llen
)))) == ((void *)0)) return (1); (L__gp->tmp_bp) = L__bincp
; } }; (bp) = L__gp->tmp_bp; (blen) = L__gp->tmp_blen; (
((L__gp)->flags) |= ((0x0100))); } else { void *L__bincp; if
(((llen)) > ((blen))) { if ((L__bincp = binc(((sp)), ((bp
)), &((blen)), ((llen)))) == ((void *)0)) return (1); ((bp
)) = L__bincp; } }; }
;
505 memcpy(bp, s, llen);
506 s = bp;
507 }
508
509 /* Start searching from the beginning. */
510 offset = 0;
511 len = llen;
512
513 /* Reset the build buffer offset. */
514 lbclen = 0;
515
516 /* Reset empty match test variable. */
517 nempty = -1;
518
519 /*
520 * We don't want to have to do a setline if the line didn't
521 * change -- keep track of whether or not this line changed.
522 * If doing confirmations, don't want to keep setting the
523 * line if change is refused -- keep track of substitutions.
524 */
525 didsub = linechanged = 0;
526
527 /* New line, do an EOL match. */
528 do_eol_match = 1;
529
530 /* It's not nul terminated, but we pretend it is. */
531 eflags = REG_STARTEND00004;
532
533 /* The search area is from s + offset to the EOL. */
534nextmatch: match[0].rm_so = offset;
535 match[0].rm_eo = llen;
536
537 /* Get the next match. */
538 eval = regexec(re, (char *)s, 10, match, eflags);
539
540 /*
541 * There wasn't a match or if there was an error, deal with
542 * it. If there was a previous match in this line, resolve
543 * the changes into the database. Otherwise, just move on.
544 */
545 if (eval == REG_NOMATCH1)
546 goto endmatch;
547 if (eval != 0) {
548 re_error(sp, eval, re);
549 goto err;
550 }
551 matched = 1;
552
553 /* Only the first search can match an anchored expression. */
554 eflags |= REG_NOTBOL00001;
555
556 /*
557 * !!!
558 * It's possible to match 0-length strings -- for example, the
559 * command s;a*;X;, when matched against the string "aabb" will
560 * result in "XbXbX", i.e. the matches are "aa", the space
561 * between the b's and the space between the b's and the end of
562 * the string. There is a similar space between the beginning
563 * of the string and the a's. The rule that we use (because vi
564 * historically used it) is that any 0-length match, occurring
565 * immediately after a match, is ignored. Otherwise, the above
566 * example would have resulted in "XXbXbX". Another example is
567 * incorrectly using " *" to replace groups of spaces with one
568 * space.
569 *
570 * If the match is empty and at the same place as the end of the
571 * previous match, ignore the match and move forward. If
572 * there's no more characters in the string, we were
573 * attempting to match after the last character, so quit.
574 */
575 if (match[0].rm_so == nempty && match[0].rm_eo == nempty) {
576 nempty = -1;
577 if (len == 0)
578 goto endmatch;
579 BUILD(sp, s + offset, 1){ if (lbclen + (1) > lblen) { lblen += (((lbclen + (1)) >
(256)) ? (lbclen + (1)) : (256)); { void *tmpp; if (((tmpp) =
(realloc((lb), (lblen)))) == ((void *)0)) { msgq(((sp)), M_SYSERR
, ((void *)0)); free(lb); } lb = tmpp; }; if (lb == ((void *)
0)) { lbclen = 0; return (1); } } memcpy(lb + lbclen, (s + offset
), (1)); lbclen += (1); }
580 ++offset;
581 --len;
582 goto nextmatch;
583 }
584
585 /* Confirm change. */
586 if (sp->c_suffix) {
587 /*
588 * Set the cursor position for confirmation. Note,
589 * if we matched on a '$', the cursor may be past
590 * the end of line.
591 */
592 from.lno = to.lno = lno;
593 from.cno = match[0].rm_so;
594 to.cno = match[0].rm_eo;
595 /*
596 * Both ex and vi have to correct for a change before
597 * the first character in the line.
598 */
599 if (llen == 0)
600 from.cno = to.cno = 0;
601 if (F_ISSET(sp, SC_VI)(((sp)->flags) & ((0x00000002)))) {
602 /*
603 * Only vi has to correct for a change after
604 * the last character in the line.
605 *
606 * XXX
607 * It would be nice to change the vi code so
608 * that we could display a cursor past EOL.
609 */
610 if (to.cno >= llen)
611 to.cno = llen - 1;
612 if (from.cno >= llen)
613 from.cno = llen - 1;
614
615 sp->lno = from.lno;
616 sp->cno = from.cno;
617 if (vs_refresh(sp, 1))
618 goto err;
619
620 vs_update(sp, "Confirm change? [n]", NULL((void *)0));
621
622 if (v_event_get(sp, &ev, 0, 0))
623 goto err;
624 switch (ev.e_event) {
625 case E_CHARACTER:
626 break;
627 case E_EOF:
628 case E_ERR:
629 case E_INTERRUPT:
630 goto lquit;
631 default:
632 v_event_err(sp, &ev);
633 goto lquit;
634 }
635 } else {
636 const int flags =
637 O_ISSET(sp, O_NUMBER)((((&(((sp)))->opts[(((O_NUMBER)))])->flags) & (
(0x01))) ? (((sp)))->gp->opts[(((sp)))->opts[(((O_NUMBER
)))].o_cur.val].o_cur.val : (((sp)))->opts[(((O_NUMBER)))]
.o_cur.val)
? E_C_HASH0x00200 : 0;
638 if (ex_print(sp, cmdp, &from, &to, flags) ||
639 ex_scprint(sp, &from, &to))
640 goto lquit;
641 if (ex_txt(sp, &tiq, 0, TXT_CR0x00000800))
642 goto err;
643 ev.e_c_u_event._e_ch.c = TAILQ_FIRST(&tiq)((&tiq)->tqh_first)->lb[0];
644 }
645
646 switch (ev.e_c_u_event._e_ch.c) {
647 case CH_YES'y':
648 break;
649 default:
650 case CH_NO'n':
651 didsub = 0;
652 BUILD(sp, s + offset, match[0].rm_eo - offset){ if (lbclen + (match[0].rm_eo - offset) > lblen) { lblen +=
(((lbclen + (match[0].rm_eo - offset)) > (256)) ? (lbclen
+ (match[0].rm_eo - offset)) : (256)); { void *tmpp; if (((tmpp
) = (realloc((lb), (lblen)))) == ((void *)0)) { msgq(((sp)), M_SYSERR
, ((void *)0)); free(lb); } lb = tmpp; }; if (lb == ((void *)
0)) { lbclen = 0; return (1); } } memcpy(lb + lbclen, (s + offset
), (match[0].rm_eo - offset)); lbclen += (match[0].rm_eo - offset
); }
;
653 goto skip;
654 case CH_QUIT'q':
655 /* Set the quit/interrupted flags. */
656lquit: quit = 1;
657 F_SET(sp->gp, G_INTERRUPTED)(((sp->gp)->flags) |= ((0x0004)));
658
659 /*
660 * Resolve any changes, then return to (and
661 * exit from) the main loop.
662 */
663 goto endmatch;
664 }
665 }
666
667 /*
668 * Set the cursor to the last position changed, converting
669 * from 1-based to 0-based.
670 */
671 sp->lno = lno;
672 sp->cno = match[0].rm_so;
673
674 /* Copy the bytes before the match into the build buffer. */
675 BUILD(sp, s + offset, match[0].rm_so - offset){ if (lbclen + (match[0].rm_so - offset) > lblen) { lblen +=
(((lbclen + (match[0].rm_so - offset)) > (256)) ? (lbclen
+ (match[0].rm_so - offset)) : (256)); { void *tmpp; if (((tmpp
) = (realloc((lb), (lblen)))) == ((void *)0)) { msgq(((sp)), M_SYSERR
, ((void *)0)); free(lb); } lb = tmpp; }; if (lb == ((void *)
0)) { lbclen = 0; return (1); } } memcpy(lb + lbclen, (s + offset
), (match[0].rm_so - offset)); lbclen += (match[0].rm_so - offset
); }
;
676
677 /* Substitute the matching bytes. */
678 didsub = 1;
679 if (re_sub(sp, s, &lb, &lbclen, &lblen, match))
680 goto err;
681
682 /* Set the change flag so we know this line was modified. */
683 linechanged = 1;
684
685 /* Move past the matched bytes. */
686skip: offset = match[0].rm_eo;
687 len = llen - match[0].rm_eo;
688
689 /* A match cannot be followed by an empty pattern. */
690 nempty = match[0].rm_eo;
691
692 /*
693 * If doing a global change with confirmation, we have to
694 * update the screen. The basic idea is to store the line
695 * so the screen update routines can find it, and restart.
696 */
697 if (didsub && sp->c_suffix && sp->g_suffix) {
698 /*
699 * The new search offset will be the end of the
700 * modified line.
701 */
702 saved_offset = lbclen;
703
704 /* Copy the rest of the line. */
705 if (len)
706 BUILD(sp, s + offset, len){ if (lbclen + (len) > lblen) { lblen += (((lbclen + (len)
) > (256)) ? (lbclen + (len)) : (256)); { void *tmpp; if (
((tmpp) = (realloc((lb), (lblen)))) == ((void *)0)) { msgq(((
sp)), M_SYSERR, ((void *)0)); free(lb); } lb = tmpp; }; if (lb
== ((void *)0)) { lbclen = 0; return (1); } } memcpy(lb + lbclen
, (s + offset), (len)); lbclen += (len); }
707
708 /* Set the new offset. */
709 offset = saved_offset;
710
711 /* Store inserted lines, adjusting the build buffer. */
712 last = 0;
713 if (sp->newl_cnt) {
714 for (cnt = 0;
715 cnt < sp->newl_cnt; ++cnt, ++lno, ++elno) {
716 if (db_insert(sp, lno,
717 lb + last, sp->newl[cnt] - last))
718 goto err;
719 last = sp->newl[cnt] + 1;
720 ++sp->rptlines[L_ADDED0];
721 }
722 lbclen -= last;
723 offset -= last;
724 sp->newl_cnt = 0;
725 }
726
727 /* Store and retrieve the line. */
728 if (db_set(sp, lno, lb + last, lbclen))
729 goto err;
730 if (db_get(sp, lno, DBG_FATAL0x001, &s, &llen))
731 goto err;
732 ADD_SPACE_RET(sp, bp, blen, llen){ GS *L__gp = (sp) == ((void *)0) ? ((void *)0) : (sp)->gp
; if (L__gp != ((void *)0) && (bp) == L__gp->tmp_bp
) { (((L__gp)->flags) &= ~((0x0100))); { void *L__bincp
; if (((llen)) > (L__gp->tmp_blen)) { if ((L__bincp = binc
(((sp)), (L__gp->tmp_bp), &(L__gp->tmp_blen), ((llen
)))) == ((void *)0)) return (1); (L__gp->tmp_bp) = L__bincp
; } }; (bp) = L__gp->tmp_bp; (blen) = L__gp->tmp_blen; (
((L__gp)->flags) |= ((0x0100))); } else { void *L__bincp; if
(((llen)) > ((blen))) { if ((L__bincp = binc(((sp)), ((bp
)), &((blen)), ((llen)))) == ((void *)0)) return (1); ((bp
)) = L__bincp; } }; }
733 memcpy(bp, s, llen);
734 s = bp;
735 len = llen - offset;
736
737 /* Restart the build. */
738 lbclen = 0;
739 BUILD(sp, s, offset){ if (lbclen + (offset) > lblen) { lblen += (((lbclen + (offset
)) > (256)) ? (lbclen + (offset)) : (256)); { void *tmpp; if
(((tmpp) = (realloc((lb), (lblen)))) == ((void *)0)) { msgq(
((sp)), M_SYSERR, ((void *)0)); free(lb); } lb = tmpp; }; if (
lb == ((void *)0)) { lbclen = 0; return (1); } } memcpy(lb + lbclen
, (s), (offset)); lbclen += (offset); }
;
740
741 /*
742 * If we haven't already done the after-the-string
743 * match, do one. Set REG_NOTEOL so the '$' pattern
744 * only matches once.
745 */
746 if (!do_eol_match)
747 goto endmatch;
748 if (offset == len) {
749 do_eol_match = 0;
750 eflags |= REG_NOTEOL00002;
751 }
752 goto nextmatch;
753 }
754
755 /*
756 * If it's a global:
757 *
758 * If at the end of the string, do a test for the after
759 * the string match. Set REG_NOTEOL so the '$' pattern
760 * only matches once.
761 */
762 if (sp->g_suffix && do_eol_match) {
763 if (len == 0) {
764 do_eol_match = 0;
765 eflags |= REG_NOTEOL00002;
766 }
767 goto nextmatch;
768 }
769
770endmatch: if (!linechanged)
771 continue;
772
773 /* Copy any remaining bytes into the build buffer. */
774 if (len)
775 BUILD(sp, s + offset, len){ if (lbclen + (len) > lblen) { lblen += (((lbclen + (len)
) > (256)) ? (lbclen + (len)) : (256)); { void *tmpp; if (
((tmpp) = (realloc((lb), (lblen)))) == ((void *)0)) { msgq(((
sp)), M_SYSERR, ((void *)0)); free(lb); } lb = tmpp; }; if (lb
== ((void *)0)) { lbclen = 0; return (1); } } memcpy(lb + lbclen
, (s + offset), (len)); lbclen += (len); }
776
777 /* Store inserted lines, adjusting the build buffer. */
778 last = 0;
779 if (sp->newl_cnt) {
780 for (cnt = 0;
781 cnt < sp->newl_cnt; ++cnt, ++lno, ++elno) {
782 if (db_insert(sp,
783 lno, lb + last, sp->newl[cnt] - last))
784 goto err;
785 last = sp->newl[cnt] + 1;
786 ++sp->rptlines[L_ADDED0];
787 }
788 lbclen -= last;
789 sp->newl_cnt = 0;
790 }
791
792 /* Store the changed line. */
793 if (db_set(sp, lno, lb + last, lbclen))
794 goto err;
795
796 /* Update changed line counter. */
797 if (sp->rptlchange != lno) {
798 sp->rptlchange = lno;
799 ++sp->rptlines[L_CHANGED1];
800 }
801
802 /*
803 * !!!
804 * Display as necessary. Historic practice is to only
805 * display the last line of a line split into multiple
806 * lines.
807 */
808 if (lflag || nflag || pflag) {
809 from.lno = to.lno = lno;
810 from.cno = to.cno = 0;
811 if (lflag)
812 (void)ex_print(sp, cmdp, &from, &to, E_C_LIST0x00400);
813 if (nflag)
814 (void)ex_print(sp, cmdp, &from, &to, E_C_HASH0x00200);
815 if (pflag)
816 (void)ex_print(sp, cmdp, &from, &to, E_C_PRINT0x01000);
817 }
818 }
819
820 /*
821 * !!!
822 * Historically, vi attempted to leave the cursor at the same place if
823 * the substitution was done at the current cursor position. Otherwise
824 * it moved it to the first non-blank of the last line changed. There
825 * were some problems: for example, :s/$/foo/ with the cursor on the
826 * last character of the line left the cursor on the last character, or
827 * the & command with multiple occurrences of the matching string in the
828 * line usually left the cursor in a fairly random position.
829 *
830 * We try to do the same thing, with the exception that if the user is
831 * doing substitution with confirmation, we move to the last line about
832 * which the user was consulted, as opposed to the last line that they
833 * actually changed. This prevents a screen flash if the user doesn't
834 * change many of the possible lines.
835 */
836 if (!sp->c_suffix && (sp->lno != slno || sp->cno != scno)) {
837 sp->cno = 0;
838 (void)nonblank(sp, sp->lno, &sp->cno);
839 }
840
841 /*
842 * If not in a global command, and nothing matched, say so.
843 * Else, if none of the lines displayed, put something up.
844 */
845 rval = 0;
846 if (!matched) {
847 if (!F_ISSET(sp, SC_EX_GLOBAL)(((sp)->flags) & ((0x00020000)))) {
848 msgq(sp, M_ERR, "No match found");
849 goto err;
850 }
851 } else if (!lflag && !nflag && !pflag)
852 F_SET(cmdp, E_AUTOPRINT)(((cmdp)->flags) |= ((0x00000040)));
853
854 if (0) {
855err: rval = 1;
856 }
857
858 if (bp != NULL((void *)0))
859 FREE_SPACE(sp, bp, blen){ GS *L__gp = (sp) == ((void *)0) ? ((void *)0) : (sp)->gp
; if (L__gp != ((void *)0) && (bp) == L__gp->tmp_bp
) (((L__gp)->flags) &= ~((0x0100))); else free(bp); }
;
860 free(lb);
861 return (rval);
862}
863
864/*
865 * re_compile --
866 * Compile the RE.
867 *
868 * PUBLIC: int re_compile(SCR *,
869 * PUBLIC: char *, size_t, char **, size_t *, regex_t *, u_int);
870 */
871int
872re_compile(SCR *sp, char *ptrn, size_t plen, char **ptrnp, size_t *lenp,
873 regex_t *rep, u_int flags)
874{
875 size_t len;
876 int reflags, replaced, rval;
877 char *p;
878
879 /* Set RE flags. */
880 reflags = 0;
881 if (!LF_ISSET(RE_C_TAG)((flags) & ((0x0010)))) {
882 if (O_ISSET(sp, O_EXTENDED)((((&(((sp)))->opts[(((O_EXTENDED)))])->flags) &
((0x01))) ? (((sp)))->gp->opts[(((sp)))->opts[(((O_EXTENDED
)))].o_cur.val].o_cur.val : (((sp)))->opts[(((O_EXTENDED))
)].o_cur.val)
)
883 reflags |= REG_EXTENDED0001;
884 if (O_ISSET(sp, O_IGNORECASE)((((&(((sp)))->opts[(((O_IGNORECASE)))])->flags) &
((0x01))) ? (((sp)))->gp->opts[(((sp)))->opts[(((O_IGNORECASE
)))].o_cur.val].o_cur.val : (((sp)))->opts[(((O_IGNORECASE
)))].o_cur.val)
)
885 reflags |= REG_ICASE0002;
886 if (O_ISSET(sp, O_ICLOWER)((((&(((sp)))->opts[(((O_ICLOWER)))])->flags) &
((0x01))) ? (((sp)))->gp->opts[(((sp)))->opts[(((O_ICLOWER
)))].o_cur.val].o_cur.val : (((sp)))->opts[(((O_ICLOWER)))
].o_cur.val)
) {
887 for (p = ptrn, len = plen; len > 0; ++p, --len)
888 if (isupper(*p))
889 break;
890 if (len == 0)
891 reflags |= REG_ICASE0002;
892 }
893 }
894
895 /* If we're replacing a saved value, clear the old one. */
896 if (LF_ISSET(RE_C_SEARCH)((flags) & ((0x0002))) && F_ISSET(sp, SC_RE_SEARCH)(((sp)->flags) & ((0x00400000)))) {
897 regfree(&sp->re_c);
898 F_CLR(sp, SC_RE_SEARCH)(((sp)->flags) &= ~((0x00400000)));
899 }
900 if (LF_ISSET(RE_C_SUBST)((flags) & ((0x0008))) && F_ISSET(sp, SC_RE_SUBST)(((sp)->flags) & ((0x00800000)))) {
901 regfree(&sp->subre_c);
902 F_CLR(sp, SC_RE_SUBST)(((sp)->flags) &= ~((0x00800000)));
903 }
904
905 /*
906 * If we're saving the string, it's a pattern we haven't seen before,
907 * so convert the vi-style RE's to POSIX 1003.2 RE's. Save a copy for
908 * later recompilation. Free any previously saved value.
909 */
910 if (ptrnp != NULL((void *)0)) {
911 if (LF_ISSET(RE_C_TAG)((flags) & ((0x0010)))) {
912 if (re_tag_conv(sp, &ptrn, &plen, &replaced))
913 return (1);
914 } else
915 if (re_conv(sp, &ptrn, &plen, &replaced))
916 return (1);
917
918 /* Discard previous pattern. */
919 free(*ptrnp);
920 *ptrnp = NULL((void *)0);
921 if (lenp != NULL((void *)0))
922 *lenp = plen;
923
924 /*
925 * Copy the string into allocated memory.
926 *
927 * XXX
928 * Regcomp isn't 8-bit clean, so the pattern is nul-terminated
929 * for now. There's just no other solution.
930 */
931 MALLOC(sp, *ptrnp, plen + 1){ if (((*ptrnp) = malloc(plen + 1)) == ((void *)0)) msgq((sp)
, M_SYSERR, ((void *)0)); }
;
932 if (*ptrnp != NULL((void *)0)) {
933 memcpy(*ptrnp, ptrn, plen);
934 (*ptrnp)[plen] = '\0';
935 }
936
937 /* Free up conversion-routine-allocated memory. */
938 if (replaced)
939 FREE_SPACE(sp, ptrn, 0){ GS *L__gp = (sp) == ((void *)0) ? ((void *)0) : (sp)->gp
; if (L__gp != ((void *)0) && (ptrn) == L__gp->tmp_bp
) (((L__gp)->flags) &= ~((0x0100))); else free(ptrn); }
;
940
941 if (*ptrnp == NULL((void *)0))
942 return (1);
943
944 ptrn = *ptrnp;
945 }
946
947 /*
948 * XXX
949 * Regcomp isn't 8-bit clean, so we just lost if the pattern
950 * contained a nul. Bummer!
951 */
952 if ((rval = regcomp(rep, ptrn, /* plen, */ reflags)) != 0) {
953 if (!LF_ISSET(RE_C_SILENT)((flags) & ((0x0004))))
954 re_error(sp, rval, rep);
955 return (1);
956 }
957
958 if (LF_ISSET(RE_C_SEARCH)((flags) & ((0x0002))))
959 F_SET(sp, SC_RE_SEARCH)(((sp)->flags) |= ((0x00400000)));
960 if (LF_ISSET(RE_C_SUBST)((flags) & ((0x0008))))
961 F_SET(sp, SC_RE_SUBST)(((sp)->flags) |= ((0x00800000)));
962
963 return (0);
964}
965
966/*
967 * re_conv --
968 * Convert vi's regular expressions into something that the
969 * the POSIX 1003.2 RE functions can handle.
970 *
971 * There are two conversions we make to make vi's RE's (specifically
972 * the global, search, and substitute patterns) work with POSIX RE's.
973 * We assume that \<ptrn\> does "word" searches, which is non-standard
974 * but supported by most regexp libraries..
975 *
976 * 1: If O_MAGIC is not set, strip backslashes from the magic character
977 * set (.[*~) that have them, and add them to the ones that don't.
978 * 2: If O_MAGIC is not set, the string "\~" is replaced with the text
979 * from the last substitute command's replacement string. If O_MAGIC
980 * is set, it's the string "~".
981 *
982 * !!!/XXX
983 * This doesn't exactly match the historic behavior of vi because we do
984 * the ~ substitution before calling the RE engine, so magic characters
985 * in the replacement string will be expanded by the RE engine, and they
986 * weren't historically. It's a bug.
987 */
988static int
989re_conv(SCR *sp, char **ptrnp, size_t *plenp, int *replacedp)
990{
991 size_t blen, len, needlen;
992 int magic;
993 char *bp, *p, *t;
994
995 /*
996 * First pass through, we figure out how much space we'll need.
997 * We do it in two passes, on the grounds that most of the time
998 * the user is doing a search and won't have magic characters.
999 * That way we can skip most of the memory allocation and copies.
1000 */
1001 magic = 0;
1002 for (p = *ptrnp, len = *plenp, needlen = 0; len > 0; ++p, --len)
1003 switch (*p) {
1004 case '\\':
1005 if (len > 1) {
1006 --len;
1007 switch (*++p) {
1008 case '~':
1009 if (!O_ISSET(sp, O_MAGIC)((((&(((sp)))->opts[(((O_MAGIC)))])->flags) & (
(0x01))) ? (((sp)))->gp->opts[(((sp)))->opts[(((O_MAGIC
)))].o_cur.val].o_cur.val : (((sp)))->opts[(((O_MAGIC)))].
o_cur.val)
) {
1010 magic = 1;
1011 needlen += sp->repl_len;
1012 }
1013 break;
1014 case '.':
1015 case '[':
1016 case '*':
1017 if (!O_ISSET(sp, O_MAGIC)((((&(((sp)))->opts[(((O_MAGIC)))])->flags) & (
(0x01))) ? (((sp)))->gp->opts[(((sp)))->opts[(((O_MAGIC
)))].o_cur.val].o_cur.val : (((sp)))->opts[(((O_MAGIC)))].
o_cur.val)
) {
1018 magic = 1;
1019 needlen += 1;
1020 }
1021 break;
1022 default:
1023 needlen += 2;
1024 }
1025 } else
1026 needlen += 1;
1027 break;
1028 case '~':
1029 if (O_ISSET(sp, O_MAGIC)((((&(((sp)))->opts[(((O_MAGIC)))])->flags) & (
(0x01))) ? (((sp)))->gp->opts[(((sp)))->opts[(((O_MAGIC
)))].o_cur.val].o_cur.val : (((sp)))->opts[(((O_MAGIC)))].
o_cur.val)
) {
1030 magic = 1;
1031 needlen += sp->repl_len;
1032 }
1033 break;
1034 case '.':
1035 case '[':
1036 case '*':
1037 if (!O_ISSET(sp, O_MAGIC)((((&(((sp)))->opts[(((O_MAGIC)))])->flags) & (
(0x01))) ? (((sp)))->gp->opts[(((sp)))->opts[(((O_MAGIC
)))].o_cur.val].o_cur.val : (((sp)))->opts[(((O_MAGIC)))].
o_cur.val)
) {
1038 magic = 1;
1039 needlen += 2;
1040 }
1041 break;
1042 default:
1043 needlen += 1;
1044 break;
1045 }
1046
1047 if (!magic) {
1048 *replacedp = 0;
1049 return (0);
1050 }
1051
1052 /* Get enough memory to hold the final pattern. */
1053 *replacedp = 1;
1054 GET_SPACE_RET(sp, bp, blen, needlen){ GS *L__gp = (sp) == ((void *)0) ? ((void *)0) : (sp)->gp
; if (L__gp == ((void *)0) || (((L__gp)->flags) & ((0x0100
)))) { (bp) = ((void *)0); (blen) = 0; { void *L__bincp; if (
((needlen)) > ((blen))) { if ((L__bincp = binc(((sp)), ((bp
)), &((blen)), ((needlen)))) == ((void *)0)) return (1); (
(bp)) = L__bincp; } }; } else { { void *L__bincp; if (((needlen
)) > (L__gp->tmp_blen)) { if ((L__bincp = binc(((sp)), (
L__gp->tmp_bp), &(L__gp->tmp_blen), ((needlen)))) ==
((void *)0)) return (1); (L__gp->tmp_bp) = L__bincp; } };
(bp) = L__gp->tmp_bp; (blen) = L__gp->tmp_blen; (((L__gp
)->flags) |= ((0x0100))); } }
;
1055
1056 for (p = *ptrnp, len = *plenp, t = bp; len > 0; ++p, --len)
1057 switch (*p) {
1058 case '\\':
1059 if (len > 1) {
1060 --len;
1061 switch (*++p) {
1062 case '~':
1063 if (O_ISSET(sp, O_MAGIC)((((&(((sp)))->opts[(((O_MAGIC)))])->flags) & (
(0x01))) ? (((sp)))->gp->opts[(((sp)))->opts[(((O_MAGIC
)))].o_cur.val].o_cur.val : (((sp)))->opts[(((O_MAGIC)))].
o_cur.val)
)
1064 *t++ = '~';
1065 else {
1066 memcpy(t,
1067 sp->repl, sp->repl_len);
1068 t += sp->repl_len;
1069 }
1070 break;
1071 case '.':
1072 case '[':
1073 case '*':
1074 if (O_ISSET(sp, O_MAGIC)((((&(((sp)))->opts[(((O_MAGIC)))])->flags) & (
(0x01))) ? (((sp)))->gp->opts[(((sp)))->opts[(((O_MAGIC
)))].o_cur.val].o_cur.val : (((sp)))->opts[(((O_MAGIC)))].
o_cur.val)
)
1075 *t++ = '\\';
1076 *t++ = *p;
1077 break;
1078 default:
1079 *t++ = '\\';
1080 *t++ = *p;
1081 }
1082 } else
1083 *t++ = '\\';
1084 break;
1085 case '~':
1086 if (O_ISSET(sp, O_MAGIC)((((&(((sp)))->opts[(((O_MAGIC)))])->flags) & (
(0x01))) ? (((sp)))->gp->opts[(((sp)))->opts[(((O_MAGIC
)))].o_cur.val].o_cur.val : (((sp)))->opts[(((O_MAGIC)))].
o_cur.val)
) {
1087 memcpy(t, sp->repl, sp->repl_len);
1088 t += sp->repl_len;
1089 } else
1090 *t++ = '~';
1091 break;
1092 case '.':
1093 case '[':
1094 case '*':
1095 if (!O_ISSET(sp, O_MAGIC)((((&(((sp)))->opts[(((O_MAGIC)))])->flags) & (
(0x01))) ? (((sp)))->gp->opts[(((sp)))->opts[(((O_MAGIC
)))].o_cur.val].o_cur.val : (((sp)))->opts[(((O_MAGIC)))].
o_cur.val)
)
1096 *t++ = '\\';
1097 *t++ = *p;
1098 break;
1099 default:
1100 *t++ = *p;
1101 break;
1102 }
1103
1104 *ptrnp = bp;
1105 *plenp = t - bp;
1106 return (0);
1107}
1108
1109/*
1110 * re_tag_conv --
1111 * Convert a tags search path into something that the POSIX
1112 * 1003.2 RE functions can handle.
1113 */
1114static int
1115re_tag_conv(SCR *sp, char **ptrnp, size_t *plenp, int *replacedp)
1116{
1117 size_t blen, len;
1118 int lastdollar;
1119 char *bp, *p, *t;
1120
1121 len = *plenp;
1122
1123 /* Max memory usage is 2 times the length of the string. */
1124 *replacedp = 1;
1125 GET_SPACE_RET(sp, bp, blen, len * 2){ GS *L__gp = (sp) == ((void *)0) ? ((void *)0) : (sp)->gp
; if (L__gp == ((void *)0) || (((L__gp)->flags) & ((0x0100
)))) { (bp) = ((void *)0); (blen) = 0; { void *L__bincp; if (
((len * 2)) > ((blen))) { if ((L__bincp = binc(((sp)), ((bp
)), &((blen)), ((len * 2)))) == ((void *)0)) return (1); (
(bp)) = L__bincp; } }; } else { { void *L__bincp; if (((len *
2)) > (L__gp->tmp_blen)) { if ((L__bincp = binc(((sp))
, (L__gp->tmp_bp), &(L__gp->tmp_blen), ((len * 2)))
) == ((void *)0)) return (1); (L__gp->tmp_bp) = L__bincp; }
}; (bp) = L__gp->tmp_bp; (blen) = L__gp->tmp_blen; (((
L__gp)->flags) |= ((0x0100))); } }
;
1126
1127 p = *ptrnp;
1128 t = bp;
1129
1130 /* If the last character is a '/' or '?', we just strip it. */
1131 if (len > 0 && (p[len - 1] == '/' || p[len - 1] == '?'))
1132 --len;
1133
1134 /* If the next-to-last or last character is a '$', it's magic. */
1135 if (len > 0 && p[len - 1] == '$') {
1136 --len;
1137 lastdollar = 1;
1138 } else
1139 lastdollar = 0;
1140
1141 /* If the first character is a '/' or '?', we just strip it. */
1142 if (len > 0 && (p[0] == '/' || p[0] == '?')) {
1143 ++p;
1144 --len;
1145 }
1146
1147 /* If the first or second character is a '^', it's magic. */
1148 if (p[0] == '^') {
1149 *t++ = *p++;
1150 --len;
1151 }
1152
1153 /*
1154 * Escape every other magic character we can find, meanwhile stripping
1155 * the backslashes ctags inserts when escaping the search delimiter
1156 * characters.
1157 */
1158 for (; len > 0; --len) {
1159 if (p[0] == '\\' && (p[1] == '/' || p[1] == '?')) {
1160 ++p;
1161 --len;
1162 } else if (strchr("^.[]$*", p[0]))
1163 *t++ = '\\';
1164 *t++ = *p++;
1165 if (len == 0)
1166 break;
1167 }
1168 if (lastdollar)
1169 *t++ = '$';
1170
1171 *ptrnp = bp;
1172 *plenp = t - bp;
1173 return (0);
1174}
1175
1176/*
1177 * re_error --
1178 * Report a regular expression error.
1179 *
1180 * PUBLIC: void re_error(SCR *, int, regex_t *);
1181 */
1182void
1183re_error(SCR *sp, int errcode, regex_t *preg)
1184{
1185 size_t s;
1186 char *oe;
1187
1188 s = regerror(errcode, preg, "", 0);
1189 if ((oe = malloc(s)) == NULL((void *)0))
1190 msgq(sp, M_SYSERR, NULL((void *)0));
1191 else {
1192 (void)regerror(errcode, preg, oe, s);
1193 msgq(sp, M_ERR, "RE error: %s", oe);
1194 free(oe);
1195 }
1196}
1197
1198/*
1199 * re_sub --
1200 * Do the substitution for a regular expression.
1201 */
1202static int
1203re_sub(SCR *sp, char *ip, char **lbp, size_t *lbclenp, size_t *lblenp,
1204 regmatch_t match[10])
1205{
1206 enum { C_NOTSET, C_LOWER, C_ONELOWER, C_ONEUPPER, C_UPPER } conv;
1207 size_t lbclen, lblen; /* Local copies. */
1208 size_t mlen; /* Match length. */
1209 size_t rpl; /* Remaining replacement length. */
1210 char *rp; /* Replacement pointer. */
1211 int ch;
1212 int no; /* Match replacement offset. */
1213 char *p, *t; /* Buffer pointers. */
1214 char *lb; /* Local copies. */
1215
1216 lb = *lbp; /* Get local copies. */
1217 lbclen = *lbclenp;
1218 lblen = *lblenp;
1219
1220 /*
1221 * QUOTING NOTE:
1222 *
1223 * There are some special sequences that vi provides in the
1224 * replacement patterns.
1225 * & string the RE matched (\& if nomagic set)
1226 * \# n-th regular subexpression
1227 * \E end \U, \L conversion
1228 * \e end \U, \L conversion
1229 * \l convert the next character to lower-case
1230 * \L convert to lower-case, until \E, \e, or end of replacement
1231 * \u convert the next character to upper-case
1232 * \U convert to upper-case, until \E, \e, or end of replacement
1233 *
1234 * Otherwise, since this is the lowest level of replacement, discard
1235 * all escaping characters. This (hopefully) matches historic practice.
1236 */
1237#define OUTCH(ch, nltrans){ CHAR_T __ch = (ch); u_int __value = ((unsigned char)(__ch) <=
254 ? (sp)->gp->special_key[(unsigned char)(__ch)] : (
unsigned char)(__ch) > (sp)->gp->max_special ? 0 : v_key_val
((sp),(__ch))); if ((nltrans) && (__value == K_CR || __value
== K_NL)) { { if ((sp)->newl_len == (sp)->newl_cnt) { (
sp)->newl_len += 25; { void *tmpp; if (((tmpp) = (reallocarray
(((sp)->newl), ((sp)->newl_len), (sizeof(size_t))))) ==
((void *)0)) { msgq(((sp)), M_SYSERR, ((void *)0)); free((sp
)->newl); } (sp)->newl = tmpp; }; if ((sp)->newl == (
(void *)0)) { (sp)->newl_len = 0; return (1); } } }; sp->
newl[sp->newl_cnt++] = lbclen; } else if (conv != C_NOTSET
) { switch (conv) { case C_ONELOWER: conv = C_NOTSET; case C_LOWER
: if (isupper(__ch)) __ch = tolower(__ch); break; case C_ONEUPPER
: conv = C_NOTSET; case C_UPPER: if (islower(__ch)) __ch = toupper
(__ch); break; default: abort(); } } { if (lbclen + (1) > lblen
) { lblen += (((lbclen + (1)) > (256)) ? (lbclen + (1)) : (
256)); { void *tmpp; if (((tmpp) = (realloc((lb), (lblen)))) ==
((void *)0)) { msgq(((sp)), M_SYSERR, ((void *)0)); free(lb)
; } lb = tmpp; }; if (lb == ((void *)0)) { lbclen = 0; return
(1); } (p) = lb + lbclen; } }; *p++ = __ch; ++lbclen; }
{ \
1238 CHAR_T __ch = (ch); \
1239 u_int __value = KEY_VAL(sp, __ch)((unsigned char)(__ch) <= 254 ? (sp)->gp->special_key
[(unsigned char)(__ch)] : (unsigned char)(__ch) > (sp)->
gp->max_special ? 0 : v_key_val((sp),(__ch)))
; \
1240 if ((nltrans) && (__value == K_CR || __value == K_NL)) { \
1241 NEEDNEWLINE(sp){ if ((sp)->newl_len == (sp)->newl_cnt) { (sp)->newl_len
+= 25; { void *tmpp; if (((tmpp) = (reallocarray(((sp)->newl
), ((sp)->newl_len), (sizeof(size_t))))) == ((void *)0)) {
msgq(((sp)), M_SYSERR, ((void *)0)); free((sp)->newl); } (
sp)->newl = tmpp; }; if ((sp)->newl == ((void *)0)) { (
sp)->newl_len = 0; return (1); } } }
; \
1242 sp->newl[sp->newl_cnt++] = lbclen; \
1243 } else if (conv != C_NOTSET) { \
1244 switch (conv) { \
1245 case C_ONELOWER: \
1246 conv = C_NOTSET; \
1247 /* FALLTHROUGH */ \
1248 case C_LOWER: \
1249 if (isupper(__ch)) \
1250 __ch = tolower(__ch); \
1251 break; \
1252 case C_ONEUPPER: \
1253 conv = C_NOTSET; \
1254 /* FALLTHROUGH */ \
1255 case C_UPPER: \
1256 if (islower(__ch)) \
1257 __ch = toupper(__ch); \
1258 break; \
1259 default: \
1260 abort(); \
1261 } \
1262 } \
1263 NEEDSP(sp, 1, p){ if (lbclen + (1) > lblen) { lblen += (((lbclen + (1)) >
(256)) ? (lbclen + (1)) : (256)); { void *tmpp; if (((tmpp) =
(realloc((lb), (lblen)))) == ((void *)0)) { msgq(((sp)), M_SYSERR
, ((void *)0)); free(lb); } lb = tmpp; }; if (lb == ((void *)
0)) { lbclen = 0; return (1); } (p) = lb + lbclen; } }
; \
1264 *p++ = __ch; \
1265 ++lbclen; \
1266}
1267 conv = C_NOTSET;
1268 for (rp = sp->repl, rpl = sp->repl_len, p = lb + lbclen; rpl--;) {
1
Loop condition is true. Entering loop body
16
Loop condition is true. Entering loop body
1269 switch (ch = *rp++) {
2
'Default' branch taken. Execution continues on line 1324
17
'Default' branch taken. Execution continues on line 1324
1270 case '&':
1271 if (O_ISSET(sp, O_MAGIC)((((&(((sp)))->opts[(((O_MAGIC)))])->flags) & (
(0x01))) ? (((sp)))->gp->opts[(((sp)))->opts[(((O_MAGIC
)))].o_cur.val].o_cur.val : (((sp)))->opts[(((O_MAGIC)))].
o_cur.val)
) {
1272 no = 0;
1273 goto subzero;
1274 }
1275 break;
1276 case '\\':
1277 if (rpl == 0)
1278 break;
1279 --rpl;
1280 switch (ch = *rp) {
1281 case '&':
1282 ++rp;
1283 if (!O_ISSET(sp, O_MAGIC)((((&(((sp)))->opts[(((O_MAGIC)))])->flags) & (
(0x01))) ? (((sp)))->gp->opts[(((sp)))->opts[(((O_MAGIC
)))].o_cur.val].o_cur.val : (((sp)))->opts[(((O_MAGIC)))].
o_cur.val)
) {
1284 no = 0;
1285 goto subzero;
1286 }
1287 break;
1288 case '0': case '1': case '2': case '3': case '4':
1289 case '5': case '6': case '7': case '8': case '9':
1290 no = *rp++ - '0';
1291subzero: if (match[no].rm_so == -1 ||
1292 match[no].rm_eo == -1)
1293 break;
1294 mlen = match[no].rm_eo - match[no].rm_so;
1295 for (t = ip + match[no].rm_so; mlen--; ++t)
1296 OUTCH(*t, 0){ CHAR_T __ch = (*t); u_int __value = ((unsigned char)(__ch) <=
254 ? (sp)->gp->special_key[(unsigned char)(__ch)] : (
unsigned char)(__ch) > (sp)->gp->max_special ? 0 : v_key_val
((sp),(__ch))); if ((0) && (__value == K_CR || __value
== K_NL)) { { if ((sp)->newl_len == (sp)->newl_cnt) { (
sp)->newl_len += 25; { void *tmpp; if (((tmpp) = (reallocarray
(((sp)->newl), ((sp)->newl_len), (sizeof(size_t))))) ==
((void *)0)) { msgq(((sp)), M_SYSERR, ((void *)0)); free((sp
)->newl); } (sp)->newl = tmpp; }; if ((sp)->newl == (
(void *)0)) { (sp)->newl_len = 0; return (1); } } }; sp->
newl[sp->newl_cnt++] = lbclen; } else if (conv != C_NOTSET
) { switch (conv) { case C_ONELOWER: conv = C_NOTSET; case C_LOWER
: if (isupper(__ch)) __ch = tolower(__ch); break; case C_ONEUPPER
: conv = C_NOTSET; case C_UPPER: if (islower(__ch)) __ch = toupper
(__ch); break; default: abort(); } } { if (lbclen + (1) > lblen
) { lblen += (((lbclen + (1)) > (256)) ? (lbclen + (1)) : (
256)); { void *tmpp; if (((tmpp) = (realloc((lb), (lblen)))) ==
((void *)0)) { msgq(((sp)), M_SYSERR, ((void *)0)); free(lb)
; } lb = tmpp; }; if (lb == ((void *)0)) { lbclen = 0; return
(1); } (p) = lb + lbclen; } }; *p++ = __ch; ++lbclen; }
;
1297 continue;
1298 case 'e':
1299 case 'E':
1300 ++rp;
1301 conv = C_NOTSET;
1302 continue;
1303 case 'l':
1304 ++rp;
1305 conv = C_ONELOWER;
1306 continue;
1307 case 'L':
1308 ++rp;
1309 conv = C_LOWER;
1310 continue;
1311 case 'u':
1312 ++rp;
1313 conv = C_ONEUPPER;
1314 continue;
1315 case 'U':
1316 ++rp;
1317 conv = C_UPPER;
1318 continue;
1319 default:
1320 ++rp;
1321 break;
1322 }
1323 }
1324 OUTCH(ch, 1){ CHAR_T __ch = (ch); u_int __value = ((unsigned char)(__ch) <=
254 ? (sp)->gp->special_key[(unsigned char)(__ch)] : (
unsigned char)(__ch) > (sp)->gp->max_special ? 0 : v_key_val
((sp),(__ch))); if ((1) && (__value == K_CR || __value
== K_NL)) { { if ((sp)->newl_len == (sp)->newl_cnt) { (
sp)->newl_len += 25; { void *tmpp; if (((tmpp) = (reallocarray
(((sp)->newl), ((sp)->newl_len), (sizeof(size_t))))) ==
((void *)0)) { msgq(((sp)), M_SYSERR, ((void *)0)); free((sp
)->newl); } (sp)->newl = tmpp; }; if ((sp)->newl == (
(void *)0)) { (sp)->newl_len = 0; return (1); } } }; sp->
newl[sp->newl_cnt++] = lbclen; } else if (conv != C_NOTSET
) { switch (conv) { case C_ONELOWER: conv = C_NOTSET; case C_LOWER
: if (isupper(__ch)) __ch = tolower(__ch); break; case C_ONEUPPER
: conv = C_NOTSET; case C_UPPER: if (islower(__ch)) __ch = toupper
(__ch); break; default: abort(); } } { if (lbclen + (1) > lblen
) { lblen += (((lbclen + (1)) > (256)) ? (lbclen + (1)) : (
256)); { void *tmpp; if (((tmpp) = (realloc((lb), (lblen)))) ==
((void *)0)) { msgq(((sp)), M_SYSERR, ((void *)0)); free(lb)
; } lb = tmpp; }; if (lb == ((void *)0)) { lbclen = 0; return
(1); } (p) = lb + lbclen; } }; *p++ = __ch; ++lbclen; }
;
3
'?' condition is true
4
Assuming '__value' is not equal to K_CR
5
Assuming '__value' is not equal to K_NL
6
Taking false branch
7
Taking false branch
8
Assuming the condition is true
9
Taking true branch
10
Assuming the condition is false
11
'?' condition is false
12
Memory is allocated
13
Assuming the condition is false
14
Taking false branch
15
Taking false branch
18
'?' condition is true
19
Assuming '__value' is equal to K_CR
20
Assuming field 'newl_len' is equal to field 'newl_cnt'
21
Taking true branch
22
Assuming the condition is true
23
Taking true branch
24
Taking true branch
25
Potential leak of memory pointed to by 'lb'
1325 }
1326
1327 *lbp = lb; /* Update caller's information. */
1328 *lbclenp = lbclen;
1329 *lblenp = lblen;
1330 return (0);
1331}