Bug Summary

File:src/usr.bin/mandoc/html.c
Warning:line 300, column 10
Access to field 'next' results in a dereference of a null pointer (loaded from variable 'this')

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple amd64-unknown-openbsd7.0 -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name html.c -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -mrelocation-model pic -pic-level 1 -pic-is-pie -mframe-pointer=all -relaxed-aliasing -fno-rounding-math -mconstructor-aliases -munwind-tables -target-cpu x86-64 -target-feature +retpoline-indirect-calls -target-feature +retpoline-indirect-branches -tune-cpu generic -debugger-tuning=gdb -fcoverage-compilation-dir=/usr/src/usr.bin/mandoc/obj -resource-dir /usr/local/lib/clang/13.0.0 -internal-isystem /usr/local/lib/clang/13.0.0/include -internal-externc-isystem /usr/include -O2 -Wno-unused-parameter -fdebug-compilation-dir=/usr/src/usr.bin/mandoc/obj -ferror-limit 19 -fwrapv -D_RET_PROTECTOR -ret-protector -fgnuc-version=4.2.1 -vectorize-loops -vectorize-slp -fno-builtin-malloc -fno-builtin-calloc -fno-builtin-realloc -fno-builtin-valloc -fno-builtin-free -fno-builtin-strdup -fno-builtin-strndup -analyzer-output=html -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /home/ben/Projects/vmm/scan-build/2022-01-12-194120-40624-1 -x c /usr/src/usr.bin/mandoc/html.c
1/* $OpenBSD: html.c,v 1.146 2021/09/09 14:45:18 schwarze Exp $ */
2/*
3 * Copyright (c) 2008-2011, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2011-2015, 2017-2021 Ingo Schwarze <schwarze@openbsd.org>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 *
18 * Common functions for mandoc(1) HTML formatters.
19 * For use by individual formatters and by the main program.
20 */
21#include <sys/types.h>
22#include <sys/stat.h>
23
24#include <assert.h>
25#include <ctype.h>
26#include <stdarg.h>
27#include <stddef.h>
28#include <stdio.h>
29#include <stdint.h>
30#include <stdlib.h>
31#include <string.h>
32#include <unistd.h>
33
34#include "mandoc_aux.h"
35#include "mandoc_ohash.h"
36#include "mandoc.h"
37#include "roff.h"
38#include "out.h"
39#include "html.h"
40#include "manconf.h"
41#include "main.h"
42
43struct htmldata {
44 const char *name;
45 int flags;
46#define HTML_INPHRASE(1 << 0) (1 << 0) /* Can appear in phrasing context. */
47#define HTML_TOPHRASE(1 << 1) (1 << 1) /* Establishes phrasing context. */
48#define HTML_NOSTACK(1 << 2) (1 << 2) /* Does not have an end tag. */
49#define HTML_NLBEFORE(1 << 3) (1 << 3) /* Output line break before opening. */
50#define HTML_NLBEGIN(1 << 4) (1 << 4) /* Output line break after opening. */
51#define HTML_NLEND(1 << 5) (1 << 5) /* Output line break before closing. */
52#define HTML_NLAFTER(1 << 6) (1 << 6) /* Output line break after closing. */
53#define HTML_NLAROUND((1 << 3) | (1 << 6)) (HTML_NLBEFORE(1 << 3) | HTML_NLAFTER(1 << 6))
54#define HTML_NLINSIDE((1 << 4) | (1 << 5)) (HTML_NLBEGIN(1 << 4) | HTML_NLEND(1 << 5))
55#define HTML_NLALL(((1 << 3) | (1 << 6)) | ((1 << 4) | (1 <<
5)))
(HTML_NLAROUND((1 << 3) | (1 << 6)) | HTML_NLINSIDE((1 << 4) | (1 << 5)))
56#define HTML_INDENT(1 << 7) (1 << 7) /* Indent content by two spaces. */
57#define HTML_NOINDENT(1 << 8) (1 << 8) /* Exception: never indent content. */
58};
59
60static const struct htmldata htmltags[TAG_MAX] = {
61 {"html", HTML_NLALL(((1 << 3) | (1 << 6)) | ((1 << 4) | (1 <<
5)))
},
62 {"head", HTML_NLALL(((1 << 3) | (1 << 6)) | ((1 << 4) | (1 <<
5)))
| HTML_INDENT(1 << 7)},
63 {"meta", HTML_NOSTACK(1 << 2) | HTML_NLALL(((1 << 3) | (1 << 6)) | ((1 << 4) | (1 <<
5)))
},
64 {"link", HTML_NOSTACK(1 << 2) | HTML_NLALL(((1 << 3) | (1 << 6)) | ((1 << 4) | (1 <<
5)))
},
65 {"style", HTML_NLALL(((1 << 3) | (1 << 6)) | ((1 << 4) | (1 <<
5)))
| HTML_INDENT(1 << 7)},
66 {"title", HTML_NLAROUND((1 << 3) | (1 << 6))},
67 {"body", HTML_NLALL(((1 << 3) | (1 << 6)) | ((1 << 4) | (1 <<
5)))
},
68 {"div", HTML_NLAROUND((1 << 3) | (1 << 6))},
69 {"section", HTML_NLALL(((1 << 3) | (1 << 6)) | ((1 << 4) | (1 <<
5)))
},
70 {"table", HTML_NLALL(((1 << 3) | (1 << 6)) | ((1 << 4) | (1 <<
5)))
| HTML_INDENT(1 << 7)},
71 {"tr", HTML_NLALL(((1 << 3) | (1 << 6)) | ((1 << 4) | (1 <<
5)))
| HTML_INDENT(1 << 7)},
72 {"td", HTML_NLAROUND((1 << 3) | (1 << 6))},
73 {"li", HTML_NLAROUND((1 << 3) | (1 << 6)) | HTML_INDENT(1 << 7)},
74 {"ul", HTML_NLALL(((1 << 3) | (1 << 6)) | ((1 << 4) | (1 <<
5)))
| HTML_INDENT(1 << 7)},
75 {"ol", HTML_NLALL(((1 << 3) | (1 << 6)) | ((1 << 4) | (1 <<
5)))
| HTML_INDENT(1 << 7)},
76 {"dl", HTML_NLALL(((1 << 3) | (1 << 6)) | ((1 << 4) | (1 <<
5)))
| HTML_INDENT(1 << 7)},
77 {"dt", HTML_NLAROUND((1 << 3) | (1 << 6))},
78 {"dd", HTML_NLAROUND((1 << 3) | (1 << 6)) | HTML_INDENT(1 << 7)},
79 {"h1", HTML_TOPHRASE(1 << 1) | HTML_NLAROUND((1 << 3) | (1 << 6))},
80 {"h2", HTML_TOPHRASE(1 << 1) | HTML_NLAROUND((1 << 3) | (1 << 6))},
81 {"p", HTML_TOPHRASE(1 << 1) | HTML_NLAROUND((1 << 3) | (1 << 6)) | HTML_INDENT(1 << 7)},
82 {"pre", HTML_TOPHRASE(1 << 1) | HTML_NLAROUND((1 << 3) | (1 << 6)) | HTML_NOINDENT(1 << 8)},
83 {"a", HTML_INPHRASE(1 << 0) | HTML_TOPHRASE(1 << 1)},
84 {"b", HTML_INPHRASE(1 << 0) | HTML_TOPHRASE(1 << 1)},
85 {"cite", HTML_INPHRASE(1 << 0) | HTML_TOPHRASE(1 << 1)},
86 {"code", HTML_INPHRASE(1 << 0) | HTML_TOPHRASE(1 << 1)},
87 {"i", HTML_INPHRASE(1 << 0) | HTML_TOPHRASE(1 << 1)},
88 {"small", HTML_INPHRASE(1 << 0) | HTML_TOPHRASE(1 << 1)},
89 {"span", HTML_INPHRASE(1 << 0) | HTML_TOPHRASE(1 << 1)},
90 {"var", HTML_INPHRASE(1 << 0) | HTML_TOPHRASE(1 << 1)},
91 {"br", HTML_INPHRASE(1 << 0) | HTML_NOSTACK(1 << 2) | HTML_NLALL(((1 << 3) | (1 << 6)) | ((1 << 4) | (1 <<
5)))
},
92 {"hr", HTML_INPHRASE(1 << 0) | HTML_NOSTACK(1 << 2)},
93 {"mark", HTML_INPHRASE(1 << 0) },
94 {"math", HTML_INPHRASE(1 << 0) | HTML_NLALL(((1 << 3) | (1 << 6)) | ((1 << 4) | (1 <<
5)))
| HTML_INDENT(1 << 7)},
95 {"mrow", 0},
96 {"mi", 0},
97 {"mn", 0},
98 {"mo", 0},
99 {"msup", 0},
100 {"msub", 0},
101 {"msubsup", 0},
102 {"mfrac", 0},
103 {"msqrt", 0},
104 {"mfenced", 0},
105 {"mtable", 0},
106 {"mtr", 0},
107 {"mtd", 0},
108 {"munderover", 0},
109 {"munder", 0},
110 {"mover", 0},
111};
112
113/* Avoid duplicate HTML id= attributes. */
114
115struct id_entry {
116 int ord; /* Ordinal number of the latest occurrence. */
117 char id[]; /* The id= attribute without any ordinal suffix. */
118};
119static struct ohash id_unique;
120
121static void html_reset_internal(struct html *);
122static void print_byte(struct html *, char);
123static void print_endword(struct html *);
124static void print_indent(struct html *);
125static void print_word(struct html *, const char *);
126
127static void print_ctag(struct html *, struct tag *);
128static int print_escape(struct html *, char);
129static int print_encode(struct html *, const char *, const char *, int);
130static void print_href(struct html *, const char *, const char *, int);
131static void print_metaf(struct html *);
132
133
134void *
135html_alloc(const struct manoutput *outopts)
136{
137 struct html *h;
138
139 h = mandoc_calloc(1, sizeof(struct html));
140
141 h->tag = NULL((void*)0);
142 h->metac = h->metal = ESCAPE_FONTROMAN;
143 h->style = outopts->style;
144 if ((h->base_man1 = outopts->man) == NULL((void*)0))
145 h->base_man2 = NULL((void*)0);
146 else if ((h->base_man2 = strchr(h->base_man1, ';')) != NULL((void*)0))
147 *h->base_man2++ = '\0';
148 h->base_includes = outopts->includes;
149 if (outopts->fragment)
150 h->oflags |= HTML_FRAGMENT(1 << 0);
151 if (outopts->toc)
152 h->oflags |= HTML_TOC(1 << 1);
153
154 mandoc_ohash_init(&id_unique, 4, offsetof(struct id_entry, id)__builtin_offsetof(struct id_entry, id));
155
156 return h;
157}
158
159static void
160html_reset_internal(struct html *h)
161{
162 struct tag *tag;
163 struct id_entry *entry;
164 unsigned int slot;
165
166 while ((tag = h->tag) != NULL((void*)0)) {
167 h->tag = tag->next;
168 free(tag);
169 }
170 entry = ohash_first(&id_unique, &slot);
171 while (entry != NULL((void*)0)) {
172 free(entry);
173 entry = ohash_next(&id_unique, &slot);
174 }
175 ohash_delete(&id_unique);
176}
177
178void
179html_reset(void *p)
180{
181 html_reset_internal(p);
182 mandoc_ohash_init(&id_unique, 4, offsetof(struct id_entry, id)__builtin_offsetof(struct id_entry, id));
183}
184
185void
186html_free(void *p)
187{
188 html_reset_internal(p);
189 free(p);
190}
191
192void
193print_gen_head(struct html *h)
194{
195 struct tag *t;
196
197 print_otag(h, TAG_META, "?", "charset", "utf-8");
198 print_otag(h, TAG_META, "??", "name", "viewport",
199 "content", "width=device-width, initial-scale=1.0");
200 if (h->style != NULL((void*)0)) {
201 print_otag(h, TAG_LINK, "?h??", "rel", "stylesheet",
202 h->style, "type", "text/css", "media", "all");
203 return;
204 }
205
206 /*
207 * Print a minimal embedded style sheet.
208 */
209
210 t = print_otag(h, TAG_STYLE, "");
211 print_text(h, "table.head, table.foot { width: 100%; }");
212 print_endline(h);
213 print_text(h, "td.head-rtitle, td.foot-os { text-align: right; }");
214 print_endline(h);
215 print_text(h, "td.head-vol { text-align: center; }");
216 print_endline(h);
217 print_text(h, ".Nd, .Bf, .Op { display: inline; }");
218 print_endline(h);
219 print_text(h, ".Pa, .Ad { font-style: italic; }");
220 print_endline(h);
221 print_text(h, ".Ms { font-weight: bold; }");
222 print_endline(h);
223 print_text(h, ".Bl-diag ");
224 print_byte(h, '>');
225 print_text(h, " dt { font-weight: bold; }");
226 print_endline(h);
227 print_text(h, "code.Nm, .Fl, .Cm, .Ic, code.In, .Fd, .Fn, .Cd "
228 "{ font-weight: bold; font-family: inherit; }");
229 print_tagq(h, t);
230}
231
232int
233html_setfont(struct html *h, enum mandoc_esc font)
234{
235 switch (font) {
236 case ESCAPE_FONTPREV:
237 font = h->metal;
238 break;
239 case ESCAPE_FONTITALIC:
240 case ESCAPE_FONTBOLD:
241 case ESCAPE_FONTBI:
242 case ESCAPE_FONTROMAN:
243 case ESCAPE_FONTCR:
244 case ESCAPE_FONTCB:
245 case ESCAPE_FONTCI:
246 break;
247 case ESCAPE_FONT:
248 font = ESCAPE_FONTROMAN;
249 break;
250 default:
251 return 0;
252 }
253 h->metal = h->metac;
254 h->metac = font;
255 return 1;
256}
257
258static void
259print_metaf(struct html *h)
260{
261 if (h->metaf) {
262 print_tagq(h, h->metaf);
263 h->metaf = NULL((void*)0);
264 }
265 switch (h->metac) {
266 case ESCAPE_FONTITALIC:
267 h->metaf = print_otag(h, TAG_I, "");
268 break;
269 case ESCAPE_FONTBOLD:
270 h->metaf = print_otag(h, TAG_B, "");
271 break;
272 case ESCAPE_FONTBI:
273 h->metaf = print_otag(h, TAG_B, "");
274 print_otag(h, TAG_I, "");
275 break;
276 case ESCAPE_FONTCR:
277 h->metaf = print_otag(h, TAG_SPAN, "c", "Li");
278 break;
279 case ESCAPE_FONTCB:
280 h->metaf = print_otag(h, TAG_SPAN, "c", "Li");
281 print_otag(h, TAG_B, "");
282 break;
283 case ESCAPE_FONTCI:
284 h->metaf = print_otag(h, TAG_SPAN, "c", "Li");
285 print_otag(h, TAG_I, "");
286 break;
287 default:
288 break;
289 }
290}
291
292void
293html_close_paragraph(struct html *h)
294{
295 struct tag *this, *next;
296 int flags;
297
298 this = h->tag;
8
Null pointer value stored to 'this'
299 for (;;) {
9
Loop condition is true. Entering loop body
300 next = this->next;
10
Access to field 'next' results in a dereference of a null pointer (loaded from variable 'this')
301 flags = htmltags[this->tag].flags;
302 if (flags & (HTML_INPHRASE(1 << 0) | HTML_TOPHRASE(1 << 1)))
303 print_ctag(h, this);
304 if ((flags & HTML_INPHRASE(1 << 0)) == 0)
305 break;
306 this = next;
307 }
308}
309
310/*
311 * ROFF_nf switches to no-fill mode, ROFF_fi to fill mode.
312 * TOKEN_NONE does not switch. The old mode is returned.
313 */
314enum roff_tok
315html_fillmode(struct html *h, enum roff_tok want)
316{
317 struct tag *t;
318 enum roff_tok had;
319
320 for (t = h->tag; t != NULL((void*)0); t = t->next)
1
Assuming 't' is equal to NULL
2
Loop condition is false. Execution continues on line 324
321 if (t->tag == TAG_PRE)
322 break;
323
324 had = t
2.1
't' is equal to NULL
== NULL((void*)0) ? ROFF_fi : ROFF_nf;
3
'?' condition is true
325
326 if (want != had) {
4
Assuming 'want' is not equal to 'had'
5
Taking true branch
327 switch (want) {
6
Control jumps to 'case ROFF_nf:' at line 331
328 case ROFF_fi:
329 print_tagq(h, t);
330 break;
331 case ROFF_nf:
332 html_close_paragraph(h);
7
Calling 'html_close_paragraph'
333 print_otag(h, TAG_PRE, "");
334 break;
335 case TOKEN_NONE:
336 break;
337 default:
338 abort();
339 }
340 }
341 return had;
342}
343
344/*
345 * Allocate a string to be used for the "id=" attribute of an HTML
346 * element and/or as a segment identifier for a URI in an <a> element.
347 * The function may fail and return NULL if the node lacks text data
348 * to create the attribute from.
349 * The caller is responsible for free(3)ing the returned string.
350 *
351 * If the "unique" argument is non-zero, the "id_unique" ohash table
352 * is used for de-duplication. If the "unique" argument is 1,
353 * it is the first time the function is called for this tag and
354 * location, so if an ordinal suffix is needed, it is incremented.
355 * If the "unique" argument is 2, it is the second time the function
356 * is called for this tag and location, so the ordinal suffix
357 * remains unchanged.
358 */
359char *
360html_make_id(const struct roff_node *n, int unique)
361{
362 const struct roff_node *nch;
363 struct id_entry *entry;
364 char *buf, *cp;
365 size_t len;
366 unsigned int slot;
367
368 if (n->tag != NULL((void*)0))
369 buf = mandoc_strdup(n->tag);
370 else {
371 switch (n->tok) {
372 case MDOC_Sh:
373 case MDOC_Ss:
374 case MDOC_Sx:
375 case MAN_SH:
376 case MAN_SS:
377 for (nch = n->child; nch != NULL((void*)0); nch = nch->next)
378 if (nch->type != ROFFT_TEXT)
379 return NULL((void*)0);
380 buf = NULL((void*)0);
381 deroff(&buf, n);
382 if (buf == NULL((void*)0))
383 return NULL((void*)0);
384 break;
385 default:
386 if (n->child == NULL((void*)0) || n->child->type != ROFFT_TEXT)
387 return NULL((void*)0);
388 buf = mandoc_strdup(n->child->string);
389 break;
390 }
391 }
392
393 /*
394 * In ID attributes, only use ASCII characters that are
395 * permitted in URL-fragment strings according to the
396 * explicit list at:
397 * https://url.spec.whatwg.org/#url-fragment-string
398 * In addition, reserve '~' for ordinal suffixes.
399 */
400
401 for (cp = buf; *cp != '\0'; cp++)
402 if (isalnum((unsigned char)*cp) == 0 &&
403 strchr("!$&'()*+,-./:;=?@_", *cp) == NULL((void*)0))
404 *cp = '_';
405
406 if (unique == 0)
407 return buf;
408
409 /* Avoid duplicate HTML id= attributes. */
410
411 slot = ohash_qlookup(&id_unique, buf);
412 if ((entry = ohash_find(&id_unique, slot)) == NULL((void*)0)) {
413 len = strlen(buf) + 1;
414 entry = mandoc_malloc(sizeof(*entry) + len);
415 entry->ord = 1;
416 memcpy(entry->id, buf, len);
417 ohash_insert(&id_unique, slot, entry);
418 } else if (unique == 1)
419 entry->ord++;
420
421 if (entry->ord > 1) {
422 cp = buf;
423 mandoc_asprintf(&buf, "%s~%d", cp, entry->ord);
424 free(cp);
425 }
426 return buf;
427}
428
429static int
430print_escape(struct html *h, char c)
431{
432
433 switch (c) {
434 case '<':
435 print_word(h, "&lt;");
436 break;
437 case '>':
438 print_word(h, "&gt;");
439 break;
440 case '&':
441 print_word(h, "&amp;");
442 break;
443 case '"':
444 print_word(h, "&quot;");
445 break;
446 case ASCII_NBRSP31:
447 print_word(h, "&nbsp;");
448 break;
449 case ASCII_HYPH30:
450 print_byte(h, '-');
451 break;
452 case ASCII_BREAK29:
453 break;
454 default:
455 return 0;
456 }
457 return 1;
458}
459
460static int
461print_encode(struct html *h, const char *p, const char *pend, int norecurse)
462{
463 char numbuf[16];
464 const char *seq;
465 size_t sz;
466 int c, len, breakline, nospace;
467 enum mandoc_esc esc;
468 static const char rejs[10] = { ' ', '\\', '<', '>', '&', '"',
469 ASCII_NBRSP31, ASCII_HYPH30, ASCII_BREAK29, '\0' };
470
471 if (pend == NULL((void*)0))
472 pend = strchr(p, '\0');
473
474 breakline = 0;
475 nospace = 0;
476
477 while (p < pend) {
478 if (HTML_SKIPCHAR(1 << 6) & h->flags && '\\' != *p) {
479 h->flags &= ~HTML_SKIPCHAR(1 << 6);
480 p++;
481 continue;
482 }
483
484 for (sz = strcspn(p, rejs); sz-- && p < pend; p++)
485 print_byte(h, *p);
486
487 if (breakline &&
488 (p >= pend || *p == ' ' || *p == ASCII_NBRSP31)) {
489 print_otag(h, TAG_BR, "");
490 breakline = 0;
491 while (p < pend && (*p == ' ' || *p == ASCII_NBRSP31))
492 p++;
493 continue;
494 }
495
496 if (p >= pend)
497 break;
498
499 if (*p == ' ') {
500 print_endword(h);
501 p++;
502 continue;
503 }
504
505 if (print_escape(h, *p++))
506 continue;
507
508 esc = mandoc_escape(&p, &seq, &len);
509 switch (esc) {
510 case ESCAPE_FONT:
511 case ESCAPE_FONTPREV:
512 case ESCAPE_FONTBOLD:
513 case ESCAPE_FONTITALIC:
514 case ESCAPE_FONTBI:
515 case ESCAPE_FONTROMAN:
516 case ESCAPE_FONTCR:
517 case ESCAPE_FONTCB:
518 case ESCAPE_FONTCI:
519 if (0 == norecurse) {
520 h->flags |= HTML_NOSPACE(1 << 0);
521 if (html_setfont(h, esc))
522 print_metaf(h);
523 h->flags &= ~HTML_NOSPACE(1 << 0);
524 }
525 continue;
526 case ESCAPE_SKIPCHAR:
527 h->flags |= HTML_SKIPCHAR(1 << 6);
528 continue;
529 case ESCAPE_ERROR:
530 continue;
531 default:
532 break;
533 }
534
535 if (h->flags & HTML_SKIPCHAR(1 << 6)) {
536 h->flags &= ~HTML_SKIPCHAR(1 << 6);
537 continue;
538 }
539
540 switch (esc) {
541 case ESCAPE_UNICODE:
542 /* Skip past "u" header. */
543 c = mchars_num2uc(seq + 1, len - 1);
544 break;
545 case ESCAPE_NUMBERED:
546 c = mchars_num2char(seq, len);
547 if (c < 0)
548 continue;
549 break;
550 case ESCAPE_SPECIAL:
551 c = mchars_spec2cp(seq, len);
552 if (c <= 0)
553 continue;
554 break;
555 case ESCAPE_UNDEF:
556 c = *seq;
557 break;
558 case ESCAPE_DEVICE:
559 print_word(h, "html");
560 continue;
561 case ESCAPE_BREAK:
562 breakline = 1;
563 continue;
564 case ESCAPE_NOSPACE:
565 if ('\0' == *p)
566 nospace = 1;
567 continue;
568 case ESCAPE_OVERSTRIKE:
569 if (len == 0)
570 continue;
571 c = seq[len - 1];
572 break;
573 default:
574 continue;
575 }
576 if ((c < 0x20 && c != 0x09) ||
577 (c > 0x7E && c < 0xA0))
578 c = 0xFFFD;
579 if (c > 0x7E) {
580 (void)snprintf(numbuf, sizeof(numbuf), "&#x%.4X;", c);
581 print_word(h, numbuf);
582 } else if (print_escape(h, c) == 0)
583 print_byte(h, c);
584 }
585
586 return nospace;
587}
588
589static void
590print_href(struct html *h, const char *name, const char *sec, int man)
591{
592 struct stat sb;
593 const char *p, *pp;
594 char *filename;
595
596 if (man) {
597 pp = h->base_man1;
598 if (h->base_man2 != NULL((void*)0)) {
599 mandoc_asprintf(&filename, "%s.%s", name, sec);
600 if (stat(filename, &sb) == -1)
601 pp = h->base_man2;
602 free(filename);
603 }
604 } else
605 pp = h->base_includes;
606
607 while ((p = strchr(pp, '%')) != NULL((void*)0)) {
608 print_encode(h, pp, p, 1);
609 if (man && p[1] == 'S') {
610 if (sec == NULL((void*)0))
611 print_byte(h, '1');
612 else
613 print_encode(h, sec, NULL((void*)0), 1);
614 } else if ((man && p[1] == 'N') ||
615 (man == 0 && p[1] == 'I'))
616 print_encode(h, name, NULL((void*)0), 1);
617 else
618 print_encode(h, p, p + 2, 1);
619 pp = p + 2;
620 }
621 if (*pp != '\0')
622 print_encode(h, pp, NULL((void*)0), 1);
623}
624
625struct tag *
626print_otag(struct html *h, enum htmltag tag, const char *fmt, ...)
627{
628 va_list ap;
629 struct tag *t;
630 const char *attr;
631 char *arg1, *arg2;
632 int style_written, tflags;
633
634 tflags = htmltags[tag].flags;
635
636 /* Flow content is not allowed in phrasing context. */
637
638 if ((tflags & HTML_INPHRASE(1 << 0)) == 0) {
639 for (t = h->tag; t != NULL((void*)0); t = t->next) {
640 if (t->closed)
641 continue;
642 assert((htmltags[t->tag].flags & HTML_TOPHRASE) == 0)(((htmltags[t->tag].flags & (1 << 1)) == 0) ? (void
)0 : __assert2("/usr/src/usr.bin/mandoc/html.c", 642, __func__
, "(htmltags[t->tag].flags & HTML_TOPHRASE) == 0"))
;
643 break;
644 }
645
646 /*
647 * Always wrap phrasing elements in a paragraph
648 * unless already contained in some flow container;
649 * never put them directly into a section.
650 */
651
652 } else if (tflags & HTML_TOPHRASE(1 << 1) && h->tag->tag == TAG_SECTION)
653 print_otag(h, TAG_P, "c", "Pp");
654
655 /* Push this tag onto the stack of open scopes. */
656
657 if ((tflags & HTML_NOSTACK(1 << 2)) == 0) {
658 t = mandoc_malloc(sizeof(struct tag));
659 t->tag = tag;
660 t->next = h->tag;
661 t->refcnt = 0;
662 t->closed = 0;
663 h->tag = t;
664 } else
665 t = NULL((void*)0);
666
667 if (tflags & HTML_NLBEFORE(1 << 3))
668 print_endline(h);
669 if (h->col == 0)
670 print_indent(h);
671 else if ((h->flags & HTML_NOSPACE(1 << 0)) == 0) {
672 if (h->flags & HTML_KEEP(1 << 2))
673 print_word(h, "&#x00A0;");
674 else {
675 if (h->flags & HTML_PREKEEP(1 << 3))
676 h->flags |= HTML_KEEP(1 << 2);
677 print_endword(h);
678 }
679 }
680
681 if ( ! (h->flags & HTML_NONOSPACE(1 << 4)))
682 h->flags &= ~HTML_NOSPACE(1 << 0);
683 else
684 h->flags |= HTML_NOSPACE(1 << 0);
685
686 /* Print out the tag name and attributes. */
687
688 print_byte(h, '<');
689 print_word(h, htmltags[tag].name);
690
691 va_start(ap, fmt)__builtin_va_start(ap, fmt);
692
693 while (*fmt != '\0' && *fmt != 's') {
694
695 /* Parse attributes and arguments. */
696
697 arg1 = va_arg(ap, char *)__builtin_va_arg(ap, char *);
698 arg2 = NULL((void*)0);
699 switch (*fmt++) {
700 case 'c':
701 attr = "class";
702 break;
703 case 'h':
704 attr = "href";
705 break;
706 case 'i':
707 attr = "id";
708 break;
709 case '?':
710 attr = arg1;
711 arg1 = va_arg(ap, char *)__builtin_va_arg(ap, char *);
712 break;
713 default:
714 abort();
715 }
716 if (*fmt == 'M')
717 arg2 = va_arg(ap, char *)__builtin_va_arg(ap, char *);
718 if (arg1 == NULL((void*)0))
719 continue;
720
721 /* Print the attributes. */
722
723 print_byte(h, ' ');
724 print_word(h, attr);
725 print_byte(h, '=');
726 print_byte(h, '"');
727 switch (*fmt) {
728 case 'I':
729 print_href(h, arg1, NULL((void*)0), 0);
730 fmt++;
731 break;
732 case 'M':
733 print_href(h, arg1, arg2, 1);
734 fmt++;
735 break;
736 case 'R':
737 print_byte(h, '#');
738 print_encode(h, arg1, NULL((void*)0), 1);
739 fmt++;
740 break;
741 default:
742 print_encode(h, arg1, NULL((void*)0), 1);
743 break;
744 }
745 print_byte(h, '"');
746 }
747
748 style_written = 0;
749 while (*fmt++ == 's') {
750 arg1 = va_arg(ap, char *)__builtin_va_arg(ap, char *);
751 arg2 = va_arg(ap, char *)__builtin_va_arg(ap, char *);
752 if (arg2 == NULL((void*)0))
753 continue;
754 print_byte(h, ' ');
755 if (style_written == 0) {
756 print_word(h, "style=\"");
757 style_written = 1;
758 }
759 print_word(h, arg1);
760 print_byte(h, ':');
761 print_byte(h, ' ');
762 print_word(h, arg2);
763 print_byte(h, ';');
764 }
765 if (style_written)
766 print_byte(h, '"');
767
768 va_end(ap)__builtin_va_end(ap);
769
770 /* Accommodate for "well-formed" singleton escaping. */
771
772 if (htmltags[tag].flags & HTML_NOSTACK(1 << 2))
773 print_byte(h, '/');
774
775 print_byte(h, '>');
776
777 if (tflags & HTML_NLBEGIN(1 << 4))
778 print_endline(h);
779 else
780 h->flags |= HTML_NOSPACE(1 << 0);
781
782 if (tflags & HTML_INDENT(1 << 7))
783 h->indent++;
784 if (tflags & HTML_NOINDENT(1 << 8))
785 h->noindent++;
786
787 return t;
788}
789
790/*
791 * Print an element with an optional "id=" attribute.
792 * If the element has phrasing content and an "id=" attribute,
793 * also add a permalink: outside if it can be in phrasing context,
794 * inside otherwise.
795 */
796struct tag *
797print_otag_id(struct html *h, enum htmltag elemtype, const char *cattr,
798 struct roff_node *n)
799{
800 struct roff_node *nch;
801 struct tag *ret, *t;
802 char *id, *href;
803
804 ret = NULL((void*)0);
805 id = href = NULL((void*)0);
806 if (n->flags & NODE_ID(1 << 11))
807 id = html_make_id(n, 1);
808 if (n->flags & NODE_HREF(1 << 12))
809 href = id == NULL((void*)0) ? html_make_id(n, 2) : id;
810 if (href != NULL((void*)0) && htmltags[elemtype].flags & HTML_INPHRASE(1 << 0))
811 ret = print_otag(h, TAG_A, "chR", "permalink", href);
812 t = print_otag(h, elemtype, "ci", cattr, id);
813 if (ret == NULL((void*)0)) {
814 ret = t;
815 if (href != NULL((void*)0) && (nch = n->child) != NULL((void*)0)) {
816 /* man(7) is safe, it tags phrasing content only. */
817 if (n->tok > MDOC_MAX ||
818 htmltags[elemtype].flags & HTML_TOPHRASE(1 << 1))
819 nch = NULL((void*)0);
820 else /* For mdoc(7), beware of nested blocks. */
821 while (nch != NULL((void*)0) && nch->type == ROFFT_TEXT)
822 nch = nch->next;
823 if (nch == NULL((void*)0))
824 print_otag(h, TAG_A, "chR", "permalink", href);
825 }
826 }
827 free(id);
828 if (id == NULL((void*)0))
829 free(href);
830 return ret;
831}
832
833static void
834print_ctag(struct html *h, struct tag *tag)
835{
836 int tflags;
837
838 if (tag->closed == 0) {
839 tag->closed = 1;
840 if (tag == h->metaf)
841 h->metaf = NULL((void*)0);
842 if (tag == h->tblt)
843 h->tblt = NULL((void*)0);
844
845 tflags = htmltags[tag->tag].flags;
846 if (tflags & HTML_INDENT(1 << 7))
847 h->indent--;
848 if (tflags & HTML_NOINDENT(1 << 8))
849 h->noindent--;
850 if (tflags & HTML_NLEND(1 << 5))
851 print_endline(h);
852 print_indent(h);
853 print_byte(h, '<');
854 print_byte(h, '/');
855 print_word(h, htmltags[tag->tag].name);
856 print_byte(h, '>');
857 if (tflags & HTML_NLAFTER(1 << 6))
858 print_endline(h);
859 }
860 if (tag->refcnt == 0) {
861 h->tag = tag->next;
862 free(tag);
863 }
864}
865
866void
867print_gen_decls(struct html *h)
868{
869 print_word(h, "<!DOCTYPE html>");
870 print_endline(h);
871}
872
873void
874print_gen_comment(struct html *h, struct roff_node *n)
875{
876 int wantblank;
877
878 print_word(h, "<!-- This is an automatically generated file."
879 " Do not edit.");
880 h->indent = 1;
881 wantblank = 0;
882 while (n != NULL((void*)0) && n->type == ROFFT_COMMENT) {
883 if (strstr(n->string, "-->") == NULL((void*)0) &&
884 (wantblank || *n->string != '\0')) {
885 print_endline(h);
886 print_indent(h);
887 print_word(h, n->string);
888 wantblank = *n->string != '\0';
889 }
890 n = n->next;
891 }
892 if (wantblank)
893 print_endline(h);
894 print_word(h, " -->");
895 print_endline(h);
896 h->indent = 0;
897}
898
899void
900print_text(struct html *h, const char *word)
901{
902 print_tagged_text(h, word, NULL((void*)0));
903}
904
905void
906print_tagged_text(struct html *h, const char *word, struct roff_node *n)
907{
908 struct tag *t;
909 char *href;
910
911 /*
912 * Always wrap text in a paragraph unless already contained in
913 * some flow container; never put it directly into a section.
914 */
915
916 if (h->tag->tag == TAG_SECTION)
917 print_otag(h, TAG_P, "c", "Pp");
918
919 /* Output whitespace before this text? */
920
921 if (h->col && (h->flags & HTML_NOSPACE(1 << 0)) == 0) {
922 if ( ! (HTML_KEEP(1 << 2) & h->flags)) {
923 if (HTML_PREKEEP(1 << 3) & h->flags)
924 h->flags |= HTML_KEEP(1 << 2);
925 print_endword(h);
926 } else
927 print_word(h, "&#x00A0;");
928 }
929
930 /*
931 * Optionally switch fonts, optionally write a permalink, then
932 * print the text, optionally surrounded by HTML whitespace.
933 */
934
935 assert(h->metaf == NULL)((h->metaf == ((void*)0)) ? (void)0 : __assert2("/usr/src/usr.bin/mandoc/html.c"
, 935, __func__, "h->metaf == NULL"))
;
936 print_metaf(h);
937 print_indent(h);
938
939 if (n != NULL((void*)0) && (href = html_make_id(n, 2)) != NULL((void*)0)) {
940 t = print_otag(h, TAG_A, "chR", "permalink", href);
941 free(href);
942 } else
943 t = NULL((void*)0);
944
945 if ( ! print_encode(h, word, NULL((void*)0), 0)) {
946 if ( ! (h->flags & HTML_NONOSPACE(1 << 4)))
947 h->flags &= ~HTML_NOSPACE(1 << 0);
948 h->flags &= ~HTML_NONEWLINE(1 << 9);
949 } else
950 h->flags |= HTML_NOSPACE(1 << 0) | HTML_NONEWLINE(1 << 9);
951
952 if (h->metaf != NULL((void*)0)) {
953 print_tagq(h, h->metaf);
954 h->metaf = NULL((void*)0);
955 } else if (t != NULL((void*)0))
956 print_tagq(h, t);
957
958 h->flags &= ~HTML_IGNDELIM(1 << 1);
959}
960
961void
962print_tagq(struct html *h, const struct tag *until)
963{
964 struct tag *this, *next;
965
966 for (this = h->tag; this != NULL((void*)0); this = next) {
967 next = this == until ? NULL((void*)0) : this->next;
968 print_ctag(h, this);
969 }
970}
971
972/*
973 * Close out all open elements up to but excluding suntil.
974 * Note that a paragraph just inside stays open together with it
975 * because paragraphs include subsequent phrasing content.
976 */
977void
978print_stagq(struct html *h, const struct tag *suntil)
979{
980 struct tag *this, *next;
981
982 for (this = h->tag; this != NULL((void*)0); this = next) {
983 next = this->next;
984 if (this == suntil || (next == suntil &&
985 (this->tag == TAG_P || this->tag == TAG_PRE)))
986 break;
987 print_ctag(h, this);
988 }
989}
990
991
992/***********************************************************************
993 * Low level output functions.
994 * They implement line breaking using a short static buffer.
995 ***********************************************************************/
996
997/*
998 * Buffer one HTML output byte.
999 * If the buffer is full, flush and deactivate it and start a new line.
1000 * If the buffer is inactive, print directly.
1001 */
1002static void
1003print_byte(struct html *h, char c)
1004{
1005 if ((h->flags & HTML_BUFFER(1 << 10)) == 0) {
1006 putchar(c)(!__isthreaded ? __sputc(c, (&__sF[1])) : (putc)(c, (&
__sF[1])))
;
1007 h->col++;
1008 return;
1009 }
1010
1011 if (h->col + h->bufcol < sizeof(h->buf)) {
1012 h->buf[h->bufcol++] = c;
1013 return;
1014 }
1015
1016 putchar('\n')(!__isthreaded ? __sputc('\n', (&__sF[1])) : (putc)('\n',
(&__sF[1])))
;
1017 h->col = 0;
1018 print_indent(h);
1019 putchar(' ')(!__isthreaded ? __sputc(' ', (&__sF[1])) : (putc)(' ', (
&__sF[1])))
;
1020 putchar(' ')(!__isthreaded ? __sputc(' ', (&__sF[1])) : (putc)(' ', (
&__sF[1])))
;
1021 fwrite(h->buf, h->bufcol, 1, stdout(&__sF[1]));
1022 putchar(c)(!__isthreaded ? __sputc(c, (&__sF[1])) : (putc)(c, (&
__sF[1])))
;
1023 h->col = (h->indent + 1) * 2 + h->bufcol + 1;
1024 h->bufcol = 0;
1025 h->flags &= ~HTML_BUFFER(1 << 10);
1026}
1027
1028/*
1029 * If something was printed on the current output line, end it.
1030 * Not to be called right after print_indent().
1031 */
1032void
1033print_endline(struct html *h)
1034{
1035 if (h->col == 0)
1036 return;
1037
1038 if (h->bufcol) {
1039 putchar(' ')(!__isthreaded ? __sputc(' ', (&__sF[1])) : (putc)(' ', (
&__sF[1])))
;
1040 fwrite(h->buf, h->bufcol, 1, stdout(&__sF[1]));
1041 h->bufcol = 0;
1042 }
1043 putchar('\n')(!__isthreaded ? __sputc('\n', (&__sF[1])) : (putc)('\n',
(&__sF[1])))
;
1044 h->col = 0;
1045 h->flags |= HTML_NOSPACE(1 << 0);
1046 h->flags &= ~HTML_BUFFER(1 << 10);
1047}
1048
1049/*
1050 * Flush the HTML output buffer.
1051 * If it is inactive, activate it.
1052 */
1053static void
1054print_endword(struct html *h)
1055{
1056 if (h->noindent) {
1057 print_byte(h, ' ');
1058 return;
1059 }
1060
1061 if ((h->flags & HTML_BUFFER(1 << 10)) == 0) {
1062 h->col++;
1063 h->flags |= HTML_BUFFER(1 << 10);
1064 } else if (h->bufcol) {
1065 putchar(' ')(!__isthreaded ? __sputc(' ', (&__sF[1])) : (putc)(' ', (
&__sF[1])))
;
1066 fwrite(h->buf, h->bufcol, 1, stdout(&__sF[1]));
1067 h->col += h->bufcol + 1;
1068 }
1069 h->bufcol = 0;
1070}
1071
1072/*
1073 * If at the beginning of a new output line,
1074 * perform indentation and mark the line as containing output.
1075 * Make sure to really produce some output right afterwards,
1076 * but do not use print_otag() for producing it.
1077 */
1078static void
1079print_indent(struct html *h)
1080{
1081 size_t i;
1082
1083 if (h->col || h->noindent)
1084 return;
1085
1086 h->col = h->indent * 2;
1087 for (i = 0; i < h->col; i++)
1088 putchar(' ')(!__isthreaded ? __sputc(' ', (&__sF[1])) : (putc)(' ', (
&__sF[1])))
;
1089}
1090
1091/*
1092 * Print or buffer some characters
1093 * depending on the current HTML output buffer state.
1094 */
1095static void
1096print_word(struct html *h, const char *cp)
1097{
1098 while (*cp != '\0')
1099 print_byte(h, *cp++);
1100}