Bug Summary

File:src/usr.bin/mandoc/html.c
Warning:line 303, column 10
Access to field 'next' results in a dereference of a null pointer (loaded from variable 'this')

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple amd64-unknown-openbsd7.4 -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name html.c -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -mrelocation-model pic -pic-level 1 -pic-is-pie -mframe-pointer=all -relaxed-aliasing -ffp-contract=on -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -target-feature +retpoline-indirect-calls -target-feature +retpoline-indirect-branches -tune-cpu generic -debugger-tuning=gdb -fcoverage-compilation-dir=/usr/src/usr.bin/mandoc/obj -resource-dir /usr/local/llvm16/lib/clang/16 -internal-isystem /usr/local/llvm16/lib/clang/16/include -internal-externc-isystem /usr/include -O2 -Wno-unused-parameter -fdebug-compilation-dir=/usr/src/usr.bin/mandoc/obj -ferror-limit 19 -fwrapv -D_RET_PROTECTOR -ret-protector -fcf-protection=branch -fno-jump-tables -fgnuc-version=4.2.1 -vectorize-loops -vectorize-slp -fno-builtin-malloc -fno-builtin-calloc -fno-builtin-realloc -fno-builtin-valloc -fno-builtin-free -fno-builtin-strdup -fno-builtin-strndup -analyzer-output=html -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /home/ben/Projects/scan/2024-01-11-140451-98009-1 -x c /usr/src/usr.bin/mandoc/html.c
1/* $OpenBSD: html.c,v 1.150 2022/08/09 11:21:50 schwarze Exp $ */
2/*
3 * Copyright (c) 2008-2011, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2011-2015, 2017-2021 Ingo Schwarze <schwarze@openbsd.org>
5 * Copyright (c) 2022 Anna Vyalkova <cyber@sysrq.in>
6 *
7 * Permission to use, copy, modify, and distribute this software for any
8 * purpose with or without fee is hereby granted, provided that the above
9 * copyright notice and this permission notice appear in all copies.
10 *
11 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
12 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
14 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18 *
19 * Common functions for mandoc(1) HTML formatters.
20 * For use by individual formatters and by the main program.
21 */
22#include <sys/types.h>
23#include <sys/stat.h>
24
25#include <assert.h>
26#include <ctype.h>
27#include <stdarg.h>
28#include <stddef.h>
29#include <stdio.h>
30#include <stdint.h>
31#include <stdlib.h>
32#include <string.h>
33#include <unistd.h>
34
35#include "mandoc_aux.h"
36#include "mandoc_ohash.h"
37#include "mandoc.h"
38#include "roff.h"
39#include "out.h"
40#include "html.h"
41#include "manconf.h"
42#include "main.h"
43
44struct htmldata {
45 const char *name;
46 int flags;
47#define HTML_INPHRASE(1 << 0) (1 << 0) /* Can appear in phrasing context. */
48#define HTML_TOPHRASE(1 << 1) (1 << 1) /* Establishes phrasing context. */
49#define HTML_NOSTACK(1 << 2) (1 << 2) /* Does not have an end tag. */
50#define HTML_NLBEFORE(1 << 3) (1 << 3) /* Output line break before opening. */
51#define HTML_NLBEGIN(1 << 4) (1 << 4) /* Output line break after opening. */
52#define HTML_NLEND(1 << 5) (1 << 5) /* Output line break before closing. */
53#define HTML_NLAFTER(1 << 6) (1 << 6) /* Output line break after closing. */
54#define HTML_NLAROUND((1 << 3) | (1 << 6)) (HTML_NLBEFORE(1 << 3) | HTML_NLAFTER(1 << 6))
55#define HTML_NLINSIDE((1 << 4) | (1 << 5)) (HTML_NLBEGIN(1 << 4) | HTML_NLEND(1 << 5))
56#define HTML_NLALL(((1 << 3) | (1 << 6)) | ((1 << 4) | (1 <<
5)))
(HTML_NLAROUND((1 << 3) | (1 << 6)) | HTML_NLINSIDE((1 << 4) | (1 << 5)))
57#define HTML_INDENT(1 << 7) (1 << 7) /* Indent content by two spaces. */
58#define HTML_NOINDENT(1 << 8) (1 << 8) /* Exception: never indent content. */
59};
60
61static const struct htmldata htmltags[TAG_MAX] = {
62 {"html", HTML_NLALL(((1 << 3) | (1 << 6)) | ((1 << 4) | (1 <<
5)))
},
63 {"head", HTML_NLALL(((1 << 3) | (1 << 6)) | ((1 << 4) | (1 <<
5)))
| HTML_INDENT(1 << 7)},
64 {"meta", HTML_NOSTACK(1 << 2) | HTML_NLALL(((1 << 3) | (1 << 6)) | ((1 << 4) | (1 <<
5)))
},
65 {"link", HTML_NOSTACK(1 << 2) | HTML_NLALL(((1 << 3) | (1 << 6)) | ((1 << 4) | (1 <<
5)))
},
66 {"style", HTML_NLALL(((1 << 3) | (1 << 6)) | ((1 << 4) | (1 <<
5)))
| HTML_INDENT(1 << 7)},
67 {"title", HTML_NLAROUND((1 << 3) | (1 << 6))},
68 {"body", HTML_NLALL(((1 << 3) | (1 << 6)) | ((1 << 4) | (1 <<
5)))
},
69 {"main", HTML_NLALL(((1 << 3) | (1 << 6)) | ((1 << 4) | (1 <<
5)))
},
70 {"div", HTML_NLAROUND((1 << 3) | (1 << 6))},
71 {"section", HTML_NLALL(((1 << 3) | (1 << 6)) | ((1 << 4) | (1 <<
5)))
},
72 {"nav", HTML_NLALL(((1 << 3) | (1 << 6)) | ((1 << 4) | (1 <<
5)))
},
73 {"table", HTML_NLALL(((1 << 3) | (1 << 6)) | ((1 << 4) | (1 <<
5)))
| HTML_INDENT(1 << 7)},
74 {"tr", HTML_NLALL(((1 << 3) | (1 << 6)) | ((1 << 4) | (1 <<
5)))
| HTML_INDENT(1 << 7)},
75 {"td", HTML_NLAROUND((1 << 3) | (1 << 6))},
76 {"li", HTML_NLAROUND((1 << 3) | (1 << 6)) | HTML_INDENT(1 << 7)},
77 {"ul", HTML_NLALL(((1 << 3) | (1 << 6)) | ((1 << 4) | (1 <<
5)))
| HTML_INDENT(1 << 7)},
78 {"ol", HTML_NLALL(((1 << 3) | (1 << 6)) | ((1 << 4) | (1 <<
5)))
| HTML_INDENT(1 << 7)},
79 {"dl", HTML_NLALL(((1 << 3) | (1 << 6)) | ((1 << 4) | (1 <<
5)))
| HTML_INDENT(1 << 7)},
80 {"dt", HTML_NLAROUND((1 << 3) | (1 << 6))},
81 {"dd", HTML_NLAROUND((1 << 3) | (1 << 6)) | HTML_INDENT(1 << 7)},
82 {"h2", HTML_TOPHRASE(1 << 1) | HTML_NLAROUND((1 << 3) | (1 << 6))},
83 {"h3", HTML_TOPHRASE(1 << 1) | HTML_NLAROUND((1 << 3) | (1 << 6))},
84 {"p", HTML_TOPHRASE(1 << 1) | HTML_NLAROUND((1 << 3) | (1 << 6)) | HTML_INDENT(1 << 7)},
85 {"pre", HTML_TOPHRASE(1 << 1) | HTML_NLAROUND((1 << 3) | (1 << 6)) | HTML_NOINDENT(1 << 8)},
86 {"a", HTML_INPHRASE(1 << 0) | HTML_TOPHRASE(1 << 1)},
87 {"b", HTML_INPHRASE(1 << 0) | HTML_TOPHRASE(1 << 1)},
88 {"cite", HTML_INPHRASE(1 << 0) | HTML_TOPHRASE(1 << 1)},
89 {"code", HTML_INPHRASE(1 << 0) | HTML_TOPHRASE(1 << 1)},
90 {"i", HTML_INPHRASE(1 << 0) | HTML_TOPHRASE(1 << 1)},
91 {"small", HTML_INPHRASE(1 << 0) | HTML_TOPHRASE(1 << 1)},
92 {"span", HTML_INPHRASE(1 << 0) | HTML_TOPHRASE(1 << 1)},
93 {"var", HTML_INPHRASE(1 << 0) | HTML_TOPHRASE(1 << 1)},
94 {"br", HTML_INPHRASE(1 << 0) | HTML_NOSTACK(1 << 2) | HTML_NLALL(((1 << 3) | (1 << 6)) | ((1 << 4) | (1 <<
5)))
},
95 {"hr", HTML_INPHRASE(1 << 0) | HTML_NOSTACK(1 << 2)},
96 {"mark", HTML_INPHRASE(1 << 0) },
97 {"math", HTML_INPHRASE(1 << 0) | HTML_NLALL(((1 << 3) | (1 << 6)) | ((1 << 4) | (1 <<
5)))
| HTML_INDENT(1 << 7)},
98 {"mrow", 0},
99 {"mi", 0},
100 {"mn", 0},
101 {"mo", 0},
102 {"msup", 0},
103 {"msub", 0},
104 {"msubsup", 0},
105 {"mfrac", 0},
106 {"msqrt", 0},
107 {"mfenced", 0},
108 {"mtable", 0},
109 {"mtr", 0},
110 {"mtd", 0},
111 {"munderover", 0},
112 {"munder", 0},
113 {"mover", 0},
114};
115
116/* Avoid duplicate HTML id= attributes. */
117
118struct id_entry {
119 int ord; /* Ordinal number of the latest occurrence. */
120 char id[]; /* The id= attribute without any ordinal suffix. */
121};
122static struct ohash id_unique;
123
124static void html_reset_internal(struct html *);
125static void print_byte(struct html *, char);
126static void print_endword(struct html *);
127static void print_indent(struct html *);
128static void print_word(struct html *, const char *);
129
130static void print_ctag(struct html *, struct tag *);
131static int print_escape(struct html *, char);
132static int print_encode(struct html *, const char *, const char *, int);
133static void print_href(struct html *, const char *, const char *, int);
134static void print_metaf(struct html *);
135
136
137void *
138html_alloc(const struct manoutput *outopts)
139{
140 struct html *h;
141
142 h = mandoc_calloc(1, sizeof(struct html));
143
144 h->tag = NULL((void *)0);
145 h->metac = h->metal = ESCAPE_FONTROMAN;
146 h->style = outopts->style;
147 if ((h->base_man1 = outopts->man) == NULL((void *)0))
148 h->base_man2 = NULL((void *)0);
149 else if ((h->base_man2 = strchr(h->base_man1, ';')) != NULL((void *)0))
150 *h->base_man2++ = '\0';
151 h->base_includes = outopts->includes;
152 if (outopts->fragment)
153 h->oflags |= HTML_FRAGMENT(1 << 0);
154 if (outopts->toc)
155 h->oflags |= HTML_TOC(1 << 1);
156
157 mandoc_ohash_init(&id_unique, 4, offsetof(struct id_entry, id)__builtin_offsetof(struct id_entry, id));
158
159 return h;
160}
161
162static void
163html_reset_internal(struct html *h)
164{
165 struct tag *tag;
166 struct id_entry *entry;
167 unsigned int slot;
168
169 while ((tag = h->tag) != NULL((void *)0)) {
170 h->tag = tag->next;
171 free(tag);
172 }
173 entry = ohash_first(&id_unique, &slot);
174 while (entry != NULL((void *)0)) {
175 free(entry);
176 entry = ohash_next(&id_unique, &slot);
177 }
178 ohash_delete(&id_unique);
179}
180
181void
182html_reset(void *p)
183{
184 html_reset_internal(p);
185 mandoc_ohash_init(&id_unique, 4, offsetof(struct id_entry, id)__builtin_offsetof(struct id_entry, id));
186}
187
188void
189html_free(void *p)
190{
191 html_reset_internal(p);
192 free(p);
193}
194
195void
196print_gen_head(struct html *h)
197{
198 struct tag *t;
199
200 print_otag(h, TAG_META, "?", "charset", "utf-8");
201 print_otag(h, TAG_META, "??", "name", "viewport",
202 "content", "width=device-width, initial-scale=1.0");
203 if (h->style != NULL((void *)0)) {
204 print_otag(h, TAG_LINK, "?h??", "rel", "stylesheet",
205 h->style, "type", "text/css", "media", "all");
206 return;
207 }
208
209 /*
210 * Print a minimal embedded style sheet.
211 */
212
213 t = print_otag(h, TAG_STYLE, "");
214 print_text(h, "table.head, table.foot { width: 100%; }");
215 print_endline(h);
216 print_text(h, "td.head-rtitle, td.foot-os { text-align: right; }");
217 print_endline(h);
218 print_text(h, "td.head-vol { text-align: center; }");
219 print_endline(h);
220 print_text(h, ".Nd, .Bf, .Op { display: inline; }");
221 print_endline(h);
222 print_text(h, ".Pa, .Ad { font-style: italic; }");
223 print_endline(h);
224 print_text(h, ".Ms { font-weight: bold; }");
225 print_endline(h);
226 print_text(h, ".Bl-diag ");
227 print_byte(h, '>');
228 print_text(h, " dt { font-weight: bold; }");
229 print_endline(h);
230 print_text(h, "code.Nm, .Fl, .Cm, .Ic, code.In, .Fd, .Fn, .Cd "
231 "{ font-weight: bold; font-family: inherit; }");
232 print_tagq(h, t);
233}
234
235int
236html_setfont(struct html *h, enum mandoc_esc font)
237{
238 switch (font) {
239 case ESCAPE_FONTPREV:
240 font = h->metal;
241 break;
242 case ESCAPE_FONTITALIC:
243 case ESCAPE_FONTBOLD:
244 case ESCAPE_FONTBI:
245 case ESCAPE_FONTROMAN:
246 case ESCAPE_FONTCR:
247 case ESCAPE_FONTCB:
248 case ESCAPE_FONTCI:
249 break;
250 case ESCAPE_FONT:
251 font = ESCAPE_FONTROMAN;
252 break;
253 default:
254 return 0;
255 }
256 h->metal = h->metac;
257 h->metac = font;
258 return 1;
259}
260
261static void
262print_metaf(struct html *h)
263{
264 if (h->metaf) {
265 print_tagq(h, h->metaf);
266 h->metaf = NULL((void *)0);
267 }
268 switch (h->metac) {
269 case ESCAPE_FONTITALIC:
270 h->metaf = print_otag(h, TAG_I, "");
271 break;
272 case ESCAPE_FONTBOLD:
273 h->metaf = print_otag(h, TAG_B, "");
274 break;
275 case ESCAPE_FONTBI:
276 h->metaf = print_otag(h, TAG_B, "");
277 print_otag(h, TAG_I, "");
278 break;
279 case ESCAPE_FONTCR:
280 h->metaf = print_otag(h, TAG_SPAN, "c", "Li");
281 break;
282 case ESCAPE_FONTCB:
283 h->metaf = print_otag(h, TAG_SPAN, "c", "Li");
284 print_otag(h, TAG_B, "");
285 break;
286 case ESCAPE_FONTCI:
287 h->metaf = print_otag(h, TAG_SPAN, "c", "Li");
288 print_otag(h, TAG_I, "");
289 break;
290 default:
291 break;
292 }
293}
294
295void
296html_close_paragraph(struct html *h)
297{
298 struct tag *this, *next;
299 int flags;
300
301 this = h->tag;
8
Null pointer value stored to 'this'
302 for (;;) {
9
Loop condition is true. Entering loop body
303 next = this->next;
10
Access to field 'next' results in a dereference of a null pointer (loaded from variable 'this')
304 flags = htmltags[this->tag].flags;
305 if (flags & (HTML_INPHRASE(1 << 0) | HTML_TOPHRASE(1 << 1)))
306 print_ctag(h, this);
307 if ((flags & HTML_INPHRASE(1 << 0)) == 0)
308 break;
309 this = next;
310 }
311}
312
313/*
314 * ROFF_nf switches to no-fill mode, ROFF_fi to fill mode.
315 * TOKEN_NONE does not switch. The old mode is returned.
316 */
317enum roff_tok
318html_fillmode(struct html *h, enum roff_tok want)
319{
320 struct tag *t;
321 enum roff_tok had;
322
323 for (t = h->tag; t != NULL((void *)0); t = t->next)
1
Assuming 't' is equal to NULL
324 if (t->tag == TAG_PRE)
325 break;
326
327 had = t
2.1
't' is equal to NULL
== NULL((void *)0) ? ROFF_fi : ROFF_nf;
2
Loop condition is false. Execution continues on line 327
3
'?' condition is true
328
329 if (want != had) {
4
Assuming 'want' is not equal to 'had'
5
Taking true branch
330 switch (want) {
6
Control jumps to 'case ROFF_nf:' at line 334
331 case ROFF_fi:
332 print_tagq(h, t);
333 break;
334 case ROFF_nf:
335 html_close_paragraph(h);
7
Calling 'html_close_paragraph'
336 print_otag(h, TAG_PRE, "");
337 break;
338 case TOKEN_NONE:
339 break;
340 default:
341 abort();
342 }
343 }
344 return had;
345}
346
347/*
348 * Allocate a string to be used for the "id=" attribute of an HTML
349 * element and/or as a segment identifier for a URI in an <a> element.
350 * The function may fail and return NULL if the node lacks text data
351 * to create the attribute from.
352 * The caller is responsible for free(3)ing the returned string.
353 *
354 * If the "unique" argument is non-zero, the "id_unique" ohash table
355 * is used for de-duplication. If the "unique" argument is 1,
356 * it is the first time the function is called for this tag and
357 * location, so if an ordinal suffix is needed, it is incremented.
358 * If the "unique" argument is 2, it is the second time the function
359 * is called for this tag and location, so the ordinal suffix
360 * remains unchanged.
361 */
362char *
363html_make_id(const struct roff_node *n, int unique)
364{
365 const struct roff_node *nch;
366 struct id_entry *entry;
367 char *buf, *cp;
368 size_t len;
369 unsigned int slot;
370
371 if (n->tag != NULL((void *)0))
372 buf = mandoc_strdup(n->tag);
373 else {
374 switch (n->tok) {
375 case MDOC_Sh:
376 case MDOC_Ss:
377 case MDOC_Sx:
378 case MAN_SH:
379 case MAN_SS:
380 for (nch = n->child; nch != NULL((void *)0); nch = nch->next)
381 if (nch->type != ROFFT_TEXT)
382 return NULL((void *)0);
383 buf = NULL((void *)0);
384 deroff(&buf, n);
385 if (buf == NULL((void *)0))
386 return NULL((void *)0);
387 break;
388 default:
389 if (n->child == NULL((void *)0) || n->child->type != ROFFT_TEXT)
390 return NULL((void *)0);
391 buf = mandoc_strdup(n->child->string);
392 break;
393 }
394 }
395
396 /*
397 * In ID attributes, only use ASCII characters that are
398 * permitted in URL-fragment strings according to the
399 * explicit list at:
400 * https://url.spec.whatwg.org/#url-fragment-string
401 * In addition, reserve '~' for ordinal suffixes.
402 */
403
404 for (cp = buf; *cp != '\0'; cp++) {
405 if (*cp == ASCII_HYPH28)
406 *cp = '-';
407 else if (isalnum((unsigned char)*cp) == 0 &&
408 strchr("!$&'()*+,-./:;=?@_", *cp) == NULL((void *)0))
409 *cp = '_';
410 }
411
412 if (unique == 0)
413 return buf;
414
415 /* Avoid duplicate HTML id= attributes. */
416
417 slot = ohash_qlookup(&id_unique, buf);
418 if ((entry = ohash_find(&id_unique, slot)) == NULL((void *)0)) {
419 len = strlen(buf) + 1;
420 entry = mandoc_malloc(sizeof(*entry) + len);
421 entry->ord = 1;
422 memcpy(entry->id, buf, len);
423 ohash_insert(&id_unique, slot, entry);
424 } else if (unique == 1)
425 entry->ord++;
426
427 if (entry->ord > 1) {
428 cp = buf;
429 mandoc_asprintf(&buf, "%s~%d", cp, entry->ord);
430 free(cp);
431 }
432 return buf;
433}
434
435static int
436print_escape(struct html *h, char c)
437{
438
439 switch (c) {
440 case '<':
441 print_word(h, "&lt;");
442 break;
443 case '>':
444 print_word(h, "&gt;");
445 break;
446 case '&':
447 print_word(h, "&amp;");
448 break;
449 case '"':
450 print_word(h, "&quot;");
451 break;
452 case ASCII_NBRSP31:
453 print_word(h, "&nbsp;");
454 break;
455 case ASCII_HYPH28:
456 print_byte(h, '-');
457 break;
458 case ASCII_BREAK29:
459 break;
460 default:
461 return 0;
462 }
463 return 1;
464}
465
466static int
467print_encode(struct html *h, const char *p, const char *pend, int norecurse)
468{
469 char numbuf[16];
470 const char *seq;
471 size_t sz;
472 int c, len, breakline, nospace;
473 enum mandoc_esc esc;
474 static const char rejs[10] = { ' ', '\\', '<', '>', '&', '"',
475 ASCII_NBRSP31, ASCII_HYPH28, ASCII_BREAK29, '\0' };
476
477 if (pend == NULL((void *)0))
478 pend = strchr(p, '\0');
479
480 breakline = 0;
481 nospace = 0;
482
483 while (p < pend) {
484 if (HTML_SKIPCHAR(1 << 6) & h->flags && '\\' != *p) {
485 h->flags &= ~HTML_SKIPCHAR(1 << 6);
486 p++;
487 continue;
488 }
489
490 for (sz = strcspn(p, rejs); sz-- && p < pend; p++)
491 print_byte(h, *p);
492
493 if (breakline &&
494 (p >= pend || *p == ' ' || *p == ASCII_NBRSP31)) {
495 print_otag(h, TAG_BR, "");
496 breakline = 0;
497 while (p < pend && (*p == ' ' || *p == ASCII_NBRSP31))
498 p++;
499 continue;
500 }
501
502 if (p >= pend)
503 break;
504
505 if (*p == ' ') {
506 print_endword(h);
507 p++;
508 continue;
509 }
510
511 if (print_escape(h, *p++))
512 continue;
513
514 esc = mandoc_escape(&p, &seq, &len);
515 switch (esc) {
516 case ESCAPE_FONT:
517 case ESCAPE_FONTPREV:
518 case ESCAPE_FONTBOLD:
519 case ESCAPE_FONTITALIC:
520 case ESCAPE_FONTBI:
521 case ESCAPE_FONTROMAN:
522 case ESCAPE_FONTCR:
523 case ESCAPE_FONTCB:
524 case ESCAPE_FONTCI:
525 if (0 == norecurse) {
526 h->flags |= HTML_NOSPACE(1 << 0);
527 if (html_setfont(h, esc))
528 print_metaf(h);
529 h->flags &= ~HTML_NOSPACE(1 << 0);
530 }
531 continue;
532 case ESCAPE_SKIPCHAR:
533 h->flags |= HTML_SKIPCHAR(1 << 6);
534 continue;
535 case ESCAPE_ERROR:
536 continue;
537 default:
538 break;
539 }
540
541 if (h->flags & HTML_SKIPCHAR(1 << 6)) {
542 h->flags &= ~HTML_SKIPCHAR(1 << 6);
543 continue;
544 }
545
546 switch (esc) {
547 case ESCAPE_UNICODE:
548 /* Skip past "u" header. */
549 c = mchars_num2uc(seq + 1, len - 1);
550 break;
551 case ESCAPE_NUMBERED:
552 c = mchars_num2char(seq, len);
553 if (c < 0)
554 continue;
555 break;
556 case ESCAPE_SPECIAL:
557 c = mchars_spec2cp(seq, len);
558 if (c <= 0)
559 continue;
560 break;
561 case ESCAPE_UNDEF:
562 c = *seq;
563 break;
564 case ESCAPE_DEVICE:
565 print_word(h, "html");
566 continue;
567 case ESCAPE_BREAK:
568 breakline = 1;
569 continue;
570 case ESCAPE_NOSPACE:
571 if ('\0' == *p)
572 nospace = 1;
573 continue;
574 case ESCAPE_OVERSTRIKE:
575 if (len == 0)
576 continue;
577 c = seq[len - 1];
578 break;
579 default:
580 continue;
581 }
582 if ((c < 0x20 && c != 0x09) ||
583 (c > 0x7E && c < 0xA0))
584 c = 0xFFFD;
585 if (c > 0x7E) {
586 (void)snprintf(numbuf, sizeof(numbuf), "&#x%.4X;", c);
587 print_word(h, numbuf);
588 } else if (print_escape(h, c) == 0)
589 print_byte(h, c);
590 }
591
592 return nospace;
593}
594
595static void
596print_href(struct html *h, const char *name, const char *sec, int man)
597{
598 struct stat sb;
599 const char *p, *pp;
600 char *filename;
601
602 if (man) {
603 pp = h->base_man1;
604 if (h->base_man2 != NULL((void *)0)) {
605 mandoc_asprintf(&filename, "%s.%s", name, sec);
606 if (stat(filename, &sb) == -1)
607 pp = h->base_man2;
608 free(filename);
609 }
610 } else
611 pp = h->base_includes;
612
613 while ((p = strchr(pp, '%')) != NULL((void *)0)) {
614 print_encode(h, pp, p, 1);
615 if (man && p[1] == 'S') {
616 if (sec == NULL((void *)0))
617 print_byte(h, '1');
618 else
619 print_encode(h, sec, NULL((void *)0), 1);
620 } else if ((man && p[1] == 'N') ||
621 (man == 0 && p[1] == 'I'))
622 print_encode(h, name, NULL((void *)0), 1);
623 else
624 print_encode(h, p, p + 2, 1);
625 pp = p + 2;
626 }
627 if (*pp != '\0')
628 print_encode(h, pp, NULL((void *)0), 1);
629}
630
631struct tag *
632print_otag(struct html *h, enum htmltag tag, const char *fmt, ...)
633{
634 va_list ap;
635 struct tag *t;
636 const char *attr;
637 char *arg1, *arg2;
638 int style_written, tflags;
639
640 tflags = htmltags[tag].flags;
641
642 /* Flow content is not allowed in phrasing context. */
643
644 if ((tflags & HTML_INPHRASE(1 << 0)) == 0) {
645 for (t = h->tag; t != NULL((void *)0); t = t->next) {
646 if (t->closed)
647 continue;
648 assert((htmltags[t->tag].flags & HTML_TOPHRASE) == 0)(((htmltags[t->tag].flags & (1 << 1)) == 0) ? (void
)0 : __assert2("/usr/src/usr.bin/mandoc/html.c", 648, __func__
, "(htmltags[t->tag].flags & HTML_TOPHRASE) == 0"))
;
649 break;
650 }
651
652 /*
653 * Always wrap phrasing elements in a paragraph
654 * unless already contained in some flow container;
655 * never put them directly into a section.
656 */
657
658 } else if (tflags & HTML_TOPHRASE(1 << 1) && h->tag->tag == TAG_SECTION)
659 print_otag(h, TAG_P, "c", "Pp");
660
661 /* Push this tag onto the stack of open scopes. */
662
663 if ((tflags & HTML_NOSTACK(1 << 2)) == 0) {
664 t = mandoc_malloc(sizeof(struct tag));
665 t->tag = tag;
666 t->next = h->tag;
667 t->refcnt = 0;
668 t->closed = 0;
669 h->tag = t;
670 } else
671 t = NULL((void *)0);
672
673 if (tflags & HTML_NLBEFORE(1 << 3))
674 print_endline(h);
675 if (h->col == 0)
676 print_indent(h);
677 else if ((h->flags & HTML_NOSPACE(1 << 0)) == 0) {
678 if (h->flags & HTML_KEEP(1 << 2))
679 print_word(h, "&#x00A0;");
680 else {
681 if (h->flags & HTML_PREKEEP(1 << 3))
682 h->flags |= HTML_KEEP(1 << 2);
683 print_endword(h);
684 }
685 }
686
687 if ( ! (h->flags & HTML_NONOSPACE(1 << 4)))
688 h->flags &= ~HTML_NOSPACE(1 << 0);
689 else
690 h->flags |= HTML_NOSPACE(1 << 0);
691
692 /* Print out the tag name and attributes. */
693
694 print_byte(h, '<');
695 print_word(h, htmltags[tag].name);
696
697 va_start(ap, fmt)__builtin_va_start((ap), fmt);
698
699 while (*fmt != '\0' && *fmt != 's') {
700
701 /* Parse attributes and arguments. */
702
703 arg1 = va_arg(ap, char *)__builtin_va_arg((ap), char *);
704 arg2 = NULL((void *)0);
705 switch (*fmt++) {
706 case 'c':
707 attr = "class";
708 break;
709 case 'h':
710 attr = "href";
711 break;
712 case 'i':
713 attr = "id";
714 break;
715 case 'r':
716 attr = "role";
717 break;
718 case '?':
719 attr = arg1;
720 arg1 = va_arg(ap, char *)__builtin_va_arg((ap), char *);
721 break;
722 default:
723 abort();
724 }
725 if (*fmt == 'M')
726 arg2 = va_arg(ap, char *)__builtin_va_arg((ap), char *);
727 if (arg1 == NULL((void *)0))
728 continue;
729
730 /* Print the attributes. */
731
732 print_byte(h, ' ');
733 print_word(h, attr);
734 print_byte(h, '=');
735 print_byte(h, '"');
736 switch (*fmt) {
737 case 'I':
738 print_href(h, arg1, NULL((void *)0), 0);
739 fmt++;
740 break;
741 case 'M':
742 print_href(h, arg1, arg2, 1);
743 fmt++;
744 break;
745 case 'R':
746 print_byte(h, '#');
747 print_encode(h, arg1, NULL((void *)0), 1);
748 fmt++;
749 break;
750 default:
751 print_encode(h, arg1, NULL((void *)0), 1);
752 break;
753 }
754 print_byte(h, '"');
755 }
756
757 style_written = 0;
758 while (*fmt++ == 's') {
759 arg1 = va_arg(ap, char *)__builtin_va_arg((ap), char *);
760 arg2 = va_arg(ap, char *)__builtin_va_arg((ap), char *);
761 if (arg2 == NULL((void *)0))
762 continue;
763 print_byte(h, ' ');
764 if (style_written == 0) {
765 print_word(h, "style=\"");
766 style_written = 1;
767 }
768 print_word(h, arg1);
769 print_byte(h, ':');
770 print_byte(h, ' ');
771 print_word(h, arg2);
772 print_byte(h, ';');
773 }
774 if (style_written)
775 print_byte(h, '"');
776
777 va_end(ap)__builtin_va_end((ap));
778
779 /* Accommodate for "well-formed" singleton escaping. */
780
781 if (htmltags[tag].flags & HTML_NOSTACK(1 << 2))
782 print_byte(h, '/');
783
784 print_byte(h, '>');
785
786 if (tflags & HTML_NLBEGIN(1 << 4))
787 print_endline(h);
788 else
789 h->flags |= HTML_NOSPACE(1 << 0);
790
791 if (tflags & HTML_INDENT(1 << 7))
792 h->indent++;
793 if (tflags & HTML_NOINDENT(1 << 8))
794 h->noindent++;
795
796 return t;
797}
798
799/*
800 * Print an element with an optional "id=" attribute.
801 * If the element has phrasing content and an "id=" attribute,
802 * also add a permalink: outside if it can be in phrasing context,
803 * inside otherwise.
804 */
805struct tag *
806print_otag_id(struct html *h, enum htmltag elemtype, const char *cattr,
807 struct roff_node *n)
808{
809 struct roff_node *nch;
810 struct tag *ret, *t;
811 char *id, *href;
812
813 ret = NULL((void *)0);
814 id = href = NULL((void *)0);
815 if (n->flags & NODE_ID(1 << 11))
816 id = html_make_id(n, 1);
817 if (n->flags & NODE_HREF(1 << 12))
818 href = id == NULL((void *)0) ? html_make_id(n, 2) : id;
819 if (href != NULL((void *)0) && htmltags[elemtype].flags & HTML_INPHRASE(1 << 0))
820 ret = print_otag(h, TAG_A, "chR", "permalink", href);
821 t = print_otag(h, elemtype, "ci", cattr, id);
822 if (ret == NULL((void *)0)) {
823 ret = t;
824 if (href != NULL((void *)0) && (nch = n->child) != NULL((void *)0)) {
825 /* man(7) is safe, it tags phrasing content only. */
826 if (n->tok > MDOC_MAX ||
827 htmltags[elemtype].flags & HTML_TOPHRASE(1 << 1))
828 nch = NULL((void *)0);
829 else /* For mdoc(7), beware of nested blocks. */
830 while (nch != NULL((void *)0) && nch->type == ROFFT_TEXT)
831 nch = nch->next;
832 if (nch == NULL((void *)0))
833 print_otag(h, TAG_A, "chR", "permalink", href);
834 }
835 }
836 free(id);
837 if (id == NULL((void *)0))
838 free(href);
839 return ret;
840}
841
842static void
843print_ctag(struct html *h, struct tag *tag)
844{
845 int tflags;
846
847 if (tag->closed == 0) {
848 tag->closed = 1;
849 if (tag == h->metaf)
850 h->metaf = NULL((void *)0);
851 if (tag == h->tblt)
852 h->tblt = NULL((void *)0);
853
854 tflags = htmltags[tag->tag].flags;
855 if (tflags & HTML_INDENT(1 << 7))
856 h->indent--;
857 if (tflags & HTML_NOINDENT(1 << 8))
858 h->noindent--;
859 if (tflags & HTML_NLEND(1 << 5))
860 print_endline(h);
861 print_indent(h);
862 print_byte(h, '<');
863 print_byte(h, '/');
864 print_word(h, htmltags[tag->tag].name);
865 print_byte(h, '>');
866 if (tflags & HTML_NLAFTER(1 << 6))
867 print_endline(h);
868 }
869 if (tag->refcnt == 0) {
870 h->tag = tag->next;
871 free(tag);
872 }
873}
874
875void
876print_gen_decls(struct html *h)
877{
878 print_word(h, "<!DOCTYPE html>");
879 print_endline(h);
880}
881
882void
883print_gen_comment(struct html *h, struct roff_node *n)
884{
885 int wantblank;
886
887 print_word(h, "<!-- This is an automatically generated file."
888 " Do not edit.");
889 h->indent = 1;
890 wantblank = 0;
891 while (n != NULL((void *)0) && n->type == ROFFT_COMMENT) {
892 if (strstr(n->string, "-->") == NULL((void *)0) &&
893 (wantblank || *n->string != '\0')) {
894 print_endline(h);
895 print_indent(h);
896 print_word(h, n->string);
897 wantblank = *n->string != '\0';
898 }
899 n = n->next;
900 }
901 if (wantblank)
902 print_endline(h);
903 print_word(h, " -->");
904 print_endline(h);
905 h->indent = 0;
906}
907
908void
909print_text(struct html *h, const char *word)
910{
911 print_tagged_text(h, word, NULL((void *)0));
912}
913
914void
915print_tagged_text(struct html *h, const char *word, struct roff_node *n)
916{
917 struct tag *t;
918 char *href;
919
920 /*
921 * Always wrap text in a paragraph unless already contained in
922 * some flow container; never put it directly into a section.
923 */
924
925 if (h->tag->tag == TAG_SECTION)
926 print_otag(h, TAG_P, "c", "Pp");
927
928 /* Output whitespace before this text? */
929
930 if (h->col && (h->flags & HTML_NOSPACE(1 << 0)) == 0) {
931 if ( ! (HTML_KEEP(1 << 2) & h->flags)) {
932 if (HTML_PREKEEP(1 << 3) & h->flags)
933 h->flags |= HTML_KEEP(1 << 2);
934 print_endword(h);
935 } else
936 print_word(h, "&#x00A0;");
937 }
938
939 /*
940 * Optionally switch fonts, optionally write a permalink, then
941 * print the text, optionally surrounded by HTML whitespace.
942 */
943
944 assert(h->metaf == NULL)((h->metaf == ((void *)0)) ? (void)0 : __assert2("/usr/src/usr.bin/mandoc/html.c"
, 944, __func__, "h->metaf == NULL"))
;
945 print_metaf(h);
946 print_indent(h);
947
948 if (n != NULL((void *)0) && (href = html_make_id(n, 2)) != NULL((void *)0)) {
949 t = print_otag(h, TAG_A, "chR", "permalink", href);
950 free(href);
951 } else
952 t = NULL((void *)0);
953
954 if ( ! print_encode(h, word, NULL((void *)0), 0)) {
955 if ( ! (h->flags & HTML_NONOSPACE(1 << 4)))
956 h->flags &= ~HTML_NOSPACE(1 << 0);
957 h->flags &= ~HTML_NONEWLINE(1 << 9);
958 } else
959 h->flags |= HTML_NOSPACE(1 << 0) | HTML_NONEWLINE(1 << 9);
960
961 if (h->metaf != NULL((void *)0)) {
962 print_tagq(h, h->metaf);
963 h->metaf = NULL((void *)0);
964 } else if (t != NULL((void *)0))
965 print_tagq(h, t);
966
967 h->flags &= ~HTML_IGNDELIM(1 << 1);
968}
969
970void
971print_tagq(struct html *h, const struct tag *until)
972{
973 struct tag *this, *next;
974
975 for (this = h->tag; this != NULL((void *)0); this = next) {
976 next = this == until ? NULL((void *)0) : this->next;
977 print_ctag(h, this);
978 }
979}
980
981/*
982 * Close out all open elements up to but excluding suntil.
983 * Note that a paragraph just inside stays open together with it
984 * because paragraphs include subsequent phrasing content.
985 */
986void
987print_stagq(struct html *h, const struct tag *suntil)
988{
989 struct tag *this, *next;
990
991 for (this = h->tag; this != NULL((void *)0); this = next) {
992 next = this->next;
993 if (this == suntil || (next == suntil &&
994 (this->tag == TAG_P || this->tag == TAG_PRE)))
995 break;
996 print_ctag(h, this);
997 }
998}
999
1000
1001/***********************************************************************
1002 * Low level output functions.
1003 * They implement line breaking using a short static buffer.
1004 ***********************************************************************/
1005
1006/*
1007 * Buffer one HTML output byte.
1008 * If the buffer is full, flush and deactivate it and start a new line.
1009 * If the buffer is inactive, print directly.
1010 */
1011static void
1012print_byte(struct html *h, char c)
1013{
1014 if ((h->flags & HTML_BUFFER(1 << 10)) == 0) {
1015 putchar(c)(!__isthreaded ? __sputc(c, (&__sF[1])) : (putc)(c, (&
__sF[1])))
;
1016 h->col++;
1017 return;
1018 }
1019
1020 if (h->col + h->bufcol < sizeof(h->buf)) {
1021 h->buf[h->bufcol++] = c;
1022 return;
1023 }
1024
1025 putchar('\n')(!__isthreaded ? __sputc('\n', (&__sF[1])) : (putc)('\n',
(&__sF[1])))
;
1026 h->col = 0;
1027 print_indent(h);
1028 putchar(' ')(!__isthreaded ? __sputc(' ', (&__sF[1])) : (putc)(' ', (
&__sF[1])))
;
1029 putchar(' ')(!__isthreaded ? __sputc(' ', (&__sF[1])) : (putc)(' ', (
&__sF[1])))
;
1030 fwrite(h->buf, h->bufcol, 1, stdout(&__sF[1]));
1031 putchar(c)(!__isthreaded ? __sputc(c, (&__sF[1])) : (putc)(c, (&
__sF[1])))
;
1032 h->col = (h->indent + 1) * 2 + h->bufcol + 1;
1033 h->bufcol = 0;
1034 h->flags &= ~HTML_BUFFER(1 << 10);
1035}
1036
1037/*
1038 * If something was printed on the current output line, end it.
1039 * Not to be called right after print_indent().
1040 */
1041void
1042print_endline(struct html *h)
1043{
1044 if (h->col == 0)
1045 return;
1046
1047 if (h->bufcol) {
1048 putchar(' ')(!__isthreaded ? __sputc(' ', (&__sF[1])) : (putc)(' ', (
&__sF[1])))
;
1049 fwrite(h->buf, h->bufcol, 1, stdout(&__sF[1]));
1050 h->bufcol = 0;
1051 }
1052 putchar('\n')(!__isthreaded ? __sputc('\n', (&__sF[1])) : (putc)('\n',
(&__sF[1])))
;
1053 h->col = 0;
1054 h->flags |= HTML_NOSPACE(1 << 0);
1055 h->flags &= ~HTML_BUFFER(1 << 10);
1056}
1057
1058/*
1059 * Flush the HTML output buffer.
1060 * If it is inactive, activate it.
1061 */
1062static void
1063print_endword(struct html *h)
1064{
1065 if (h->noindent) {
1066 print_byte(h, ' ');
1067 return;
1068 }
1069
1070 if ((h->flags & HTML_BUFFER(1 << 10)) == 0) {
1071 h->col++;
1072 h->flags |= HTML_BUFFER(1 << 10);
1073 } else if (h->bufcol) {
1074 putchar(' ')(!__isthreaded ? __sputc(' ', (&__sF[1])) : (putc)(' ', (
&__sF[1])))
;
1075 fwrite(h->buf, h->bufcol, 1, stdout(&__sF[1]));
1076 h->col += h->bufcol + 1;
1077 }
1078 h->bufcol = 0;
1079}
1080
1081/*
1082 * If at the beginning of a new output line,
1083 * perform indentation and mark the line as containing output.
1084 * Make sure to really produce some output right afterwards,
1085 * but do not use print_otag() for producing it.
1086 */
1087static void
1088print_indent(struct html *h)
1089{
1090 size_t i;
1091
1092 if (h->col || h->noindent)
1093 return;
1094
1095 h->col = h->indent * 2;
1096 for (i = 0; i < h->col; i++)
1097 putchar(' ')(!__isthreaded ? __sputc(' ', (&__sF[1])) : (putc)(' ', (
&__sF[1])))
;
1098}
1099
1100/*
1101 * Print or buffer some characters
1102 * depending on the current HTML output buffer state.
1103 */
1104static void
1105print_word(struct html *h, const char *cp)
1106{
1107 while (*cp != '\0')
1108 print_byte(h, *cp++);
1109}