clang -cc1 -cc1 -triple amd64-unknown-openbsd7.0 -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name YAMLParser.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -mrelocation-model static -mframe-pointer=all -relaxed-aliasing -fno-rounding-math -mconstructor-aliases -munwind-tables -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -fcoverage-compilation-dir=/usr/src/gnu/usr.bin/clang/libLLVMSupport/obj -resource-dir /usr/local/lib/clang/13.0.0 -I /usr/src/gnu/usr.bin/clang/libLLVMSupport/../../../llvm/llvm/include/llvm/ADT -I /usr/src/gnu/usr.bin/clang/libLLVMSupport/../../../llvm/llvm/include/llvm/Support -I /usr/src/gnu/usr.bin/clang/libLLVMSupport/../../../llvm/llvm/include -I /usr/src/gnu/usr.bin/clang/libLLVMSupport/../include -I /usr/src/gnu/usr.bin/clang/libLLVMSupport/obj -I /usr/src/gnu/usr.bin/clang/libLLVMSupport/obj/../include -D NDEBUG -D __STDC_LIMIT_MACROS -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D LLVM_PREFIX="/usr" -internal-isystem /usr/include/c++/v1 -internal-isystem /usr/local/lib/clang/13.0.0/include -internal-externc-isystem /usr/include -O2 -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-comment -std=c++14 -fdeprecated-macro -fdebug-compilation-dir=/usr/src/gnu/usr.bin/clang/libLLVMSupport/obj -ferror-limit 19 -fvisibility-inlines-hidden -fwrapv -stack-protector 2 -fno-rtti -fgnuc-version=4.2.1 -vectorize-loops -vectorize-slp -fno-builtin-malloc -fno-builtin-calloc -fno-builtin-realloc -fno-builtin-valloc -fno-builtin-free -fno-builtin-strdup -fno-builtin-strndup -analyzer-output=html -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /home/ben/Projects/vmm/scan-build/2022-01-12-194120-40624-1 -x c++ /usr/src/gnu/usr.bin/clang/libLLVMSupport/../../../llvm/llvm/lib/Support/YAMLParser.cpp
1 | |
2 | |
3 | |
4 | |
5 | |
6 | |
7 | |
8 | |
9 | |
10 | |
11 | |
12 | |
13 | #include "llvm/Support/YAMLParser.h" |
14 | #include "llvm/ADT/AllocatorList.h" |
15 | #include "llvm/ADT/ArrayRef.h" |
16 | #include "llvm/ADT/None.h" |
17 | #include "llvm/ADT/STLExtras.h" |
18 | #include "llvm/ADT/SmallString.h" |
19 | #include "llvm/ADT/SmallVector.h" |
20 | #include "llvm/ADT/StringExtras.h" |
21 | #include "llvm/ADT/StringRef.h" |
22 | #include "llvm/ADT/Twine.h" |
23 | #include "llvm/Support/Compiler.h" |
24 | #include "llvm/Support/ErrorHandling.h" |
25 | #include "llvm/Support/MemoryBuffer.h" |
26 | #include "llvm/Support/SMLoc.h" |
27 | #include "llvm/Support/SourceMgr.h" |
28 | #include "llvm/Support/Unicode.h" |
29 | #include "llvm/Support/raw_ostream.h" |
30 | #include <algorithm> |
31 | #include <cassert> |
32 | #include <cstddef> |
33 | #include <cstdint> |
34 | #include <map> |
35 | #include <memory> |
36 | #include <string> |
37 | #include <system_error> |
38 | #include <utility> |
39 | |
40 | using namespace llvm; |
41 | using namespace yaml; |
42 | |
43 | enum UnicodeEncodingForm { |
44 | UEF_UTF32_LE, |
45 | UEF_UTF32_BE, |
46 | UEF_UTF16_LE, |
47 | UEF_UTF16_BE, |
48 | UEF_UTF8, |
49 | UEF_Unknown |
50 | }; |
51 | |
52 | |
53 | |
54 | using EncodingInfo = std::pair<UnicodeEncodingForm, unsigned>; |
55 | |
56 | |
57 | |
58 | |
59 | |
60 | |
61 | |
62 | static EncodingInfo getUnicodeEncoding(StringRef Input) { |
63 | if (Input.empty()) |
64 | return std::make_pair(UEF_Unknown, 0); |
65 | |
66 | switch (uint8_t(Input[0])) { |
67 | case 0x00: |
68 | if (Input.size() >= 4) { |
69 | if ( Input[1] == 0 |
70 | && uint8_t(Input[2]) == 0xFE |
71 | && uint8_t(Input[3]) == 0xFF) |
72 | return std::make_pair(UEF_UTF32_BE, 4); |
73 | if (Input[1] == 0 && Input[2] == 0 && Input[3] != 0) |
74 | return std::make_pair(UEF_UTF32_BE, 0); |
75 | } |
76 | |
77 | if (Input.size() >= 2 && Input[1] != 0) |
78 | return std::make_pair(UEF_UTF16_BE, 0); |
79 | return std::make_pair(UEF_Unknown, 0); |
80 | case 0xFF: |
81 | if ( Input.size() >= 4 |
82 | && uint8_t(Input[1]) == 0xFE |
83 | && Input[2] == 0 |
84 | && Input[3] == 0) |
85 | return std::make_pair(UEF_UTF32_LE, 4); |
86 | |
87 | if (Input.size() >= 2 && uint8_t(Input[1]) == 0xFE) |
88 | return std::make_pair(UEF_UTF16_LE, 2); |
89 | return std::make_pair(UEF_Unknown, 0); |
90 | case 0xFE: |
91 | if (Input.size() >= 2 && uint8_t(Input[1]) == 0xFF) |
92 | return std::make_pair(UEF_UTF16_BE, 2); |
93 | return std::make_pair(UEF_Unknown, 0); |
94 | case 0xEF: |
95 | if ( Input.size() >= 3 |
96 | && uint8_t(Input[1]) == 0xBB |
97 | && uint8_t(Input[2]) == 0xBF) |
98 | return std::make_pair(UEF_UTF8, 3); |
99 | return std::make_pair(UEF_Unknown, 0); |
100 | } |
101 | |
102 | |
103 | if (Input.size() >= 4 && Input[1] == 0 && Input[2] == 0 && Input[3] == 0) |
104 | return std::make_pair(UEF_UTF32_LE, 0); |
105 | |
106 | if (Input.size() >= 2 && Input[1] == 0) |
107 | return std::make_pair(UEF_UTF16_LE, 0); |
108 | |
109 | return std::make_pair(UEF_UTF8, 0); |
110 | } |
111 | |
112 | |
113 | void Node::anchor() {} |
114 | void NullNode::anchor() {} |
115 | void ScalarNode::anchor() {} |
116 | void BlockScalarNode::anchor() {} |
117 | void KeyValueNode::anchor() {} |
118 | void MappingNode::anchor() {} |
119 | void SequenceNode::anchor() {} |
120 | void AliasNode::anchor() {} |
121 | |
122 | namespace llvm { |
123 | namespace yaml { |
124 | |
125 | |
126 | struct Token { |
127 | enum TokenKind { |
128 | TK_Error, |
129 | TK_StreamStart, |
130 | TK_StreamEnd, |
131 | TK_VersionDirective, |
132 | TK_TagDirective, |
133 | TK_DocumentStart, |
134 | TK_DocumentEnd, |
135 | TK_BlockEntry, |
136 | TK_BlockEnd, |
137 | TK_BlockSequenceStart, |
138 | TK_BlockMappingStart, |
139 | TK_FlowEntry, |
140 | TK_FlowSequenceStart, |
141 | TK_FlowSequenceEnd, |
142 | TK_FlowMappingStart, |
143 | TK_FlowMappingEnd, |
144 | TK_Key, |
145 | TK_Value, |
146 | TK_Scalar, |
147 | TK_BlockScalar, |
148 | TK_Alias, |
149 | TK_Anchor, |
150 | TK_Tag |
151 | } Kind = TK_Error; |
152 | |
153 | |
154 | |
155 | StringRef Range; |
156 | |
157 | |
158 | std::string Value; |
159 | |
160 | Token() = default; |
161 | }; |
162 | |
163 | } |
164 | } |
165 | |
166 | using TokenQueueT = BumpPtrList<Token>; |
167 | |
168 | namespace { |
169 | |
170 | |
171 | |
172 | |
173 | |
174 | |
175 | |
176 | |
177 | |
178 | |
179 | struct SimpleKey { |
180 | TokenQueueT::iterator Tok; |
181 | unsigned Column = 0; |
182 | unsigned Line = 0; |
183 | unsigned FlowLevel = 0; |
184 | bool IsRequired = false; |
185 | |
186 | bool operator ==(const SimpleKey &Other) { |
187 | return Tok == Other.Tok; |
188 | } |
189 | }; |
190 | |
191 | } |
192 | |
193 | |
194 | |
195 | |
196 | using UTF8Decoded = std::pair<uint32_t, unsigned>; |
197 | |
198 | static UTF8Decoded decodeUTF8(StringRef Range) { |
199 | StringRef::iterator Position= Range.begin(); |
200 | StringRef::iterator End = Range.end(); |
201 | |
202 | |
203 | if (Position < End && (*Position & 0x80) == 0) { |
204 | return std::make_pair(*Position, 1); |
205 | } |
206 | |
207 | |
208 | if (Position + 1 < End && ((*Position & 0xE0) == 0xC0) && |
209 | ((*(Position + 1) & 0xC0) == 0x80)) { |
210 | uint32_t codepoint = ((*Position & 0x1F) << 6) | |
211 | (*(Position + 1) & 0x3F); |
212 | if (codepoint >= 0x80) |
213 | return std::make_pair(codepoint, 2); |
214 | } |
215 | |
216 | |
217 | if (Position + 2 < End && ((*Position & 0xF0) == 0xE0) && |
218 | ((*(Position + 1) & 0xC0) == 0x80) && |
219 | ((*(Position + 2) & 0xC0) == 0x80)) { |
220 | uint32_t codepoint = ((*Position & 0x0F) << 12) | |
221 | ((*(Position + 1) & 0x3F) << 6) | |
222 | (*(Position + 2) & 0x3F); |
223 | |
224 | |
225 | if (codepoint >= 0x800 && |
226 | (codepoint < 0xD800 || codepoint > 0xDFFF)) |
227 | return std::make_pair(codepoint, 3); |
228 | } |
229 | |
230 | |
231 | if (Position + 3 < End && ((*Position & 0xF8) == 0xF0) && |
232 | ((*(Position + 1) & 0xC0) == 0x80) && |
233 | ((*(Position + 2) & 0xC0) == 0x80) && |
234 | ((*(Position + 3) & 0xC0) == 0x80)) { |
235 | uint32_t codepoint = ((*Position & 0x07) << 18) | |
236 | ((*(Position + 1) & 0x3F) << 12) | |
237 | ((*(Position + 2) & 0x3F) << 6) | |
238 | (*(Position + 3) & 0x3F); |
239 | if (codepoint >= 0x10000 && codepoint <= 0x10FFFF) |
240 | return std::make_pair(codepoint, 4); |
241 | } |
242 | return std::make_pair(0, 0); |
243 | } |
244 | |
245 | namespace llvm { |
246 | namespace yaml { |
247 | |
248 | |
249 | class Scanner { |
250 | public: |
251 | Scanner(StringRef Input, SourceMgr &SM, bool ShowColors = true, |
252 | std::error_code *EC = nullptr); |
253 | Scanner(MemoryBufferRef Buffer, SourceMgr &SM_, bool ShowColors = true, |
254 | std::error_code *EC = nullptr); |
255 | |
256 | |
257 | Token &peekNext(); |
258 | |
259 | |
260 | Token getNext(); |
261 | |
262 | void printError(SMLoc Loc, SourceMgr::DiagKind Kind, const Twine &Message, |
263 | ArrayRef<SMRange> Ranges = None) { |
264 | SM.PrintMessage(Loc, Kind, Message, Ranges, None, ShowColors); |
265 | } |
266 | |
267 | void setError(const Twine &Message, StringRef::iterator Position) { |
268 | if (Position >= End) |
269 | Position = End - 1; |
270 | |
271 | |
272 | if (EC) |
273 | *EC = make_error_code(std::errc::invalid_argument); |
274 | |
275 | |
276 | |
277 | if (!Failed) |
278 | printError(SMLoc::getFromPointer(Position), SourceMgr::DK_Error, Message); |
279 | Failed = true; |
280 | } |
281 | |
282 | |
283 | bool failed() { |
284 | return Failed; |
285 | } |
286 | |
287 | private: |
288 | void init(MemoryBufferRef Buffer); |
289 | |
290 | StringRef currentInput() { |
291 | return StringRef(Current, End - Current); |
292 | } |
293 | |
294 | |
295 | |
296 | |
297 | |
298 | |
299 | |
300 | UTF8Decoded decodeUTF8(StringRef::iterator Position) { |
301 | return ::decodeUTF8(StringRef(Position, End - Position)); |
302 | } |
303 | |
304 | |
305 | |
306 | |
307 | |
308 | |
309 | |
310 | |
311 | |
312 | |
313 | |
314 | |
315 | |
316 | |
317 | |
318 | |
319 | |
320 | |
321 | |
322 | |
323 | |
324 | |
325 | |
326 | |
327 | |
328 | |
329 | |
330 | |
331 | StringRef::iterator skip_nb_char(StringRef::iterator Position); |
332 | |
333 | |
334 | |
335 | |
336 | |
337 | |
338 | |
339 | StringRef::iterator skip_b_break(StringRef::iterator Position); |
340 | |
341 | |
342 | |
343 | |
344 | |
345 | |
346 | |
347 | StringRef::iterator skip_s_space(StringRef::iterator Position); |
348 | |
349 | |
350 | |
351 | |
352 | |
353 | |
354 | |
355 | StringRef::iterator skip_s_white(StringRef::iterator Position); |
356 | |
357 | |
358 | |
359 | |
360 | |
361 | |
362 | |
363 | StringRef::iterator skip_ns_char(StringRef::iterator Position); |
364 | |
365 | using SkipWhileFunc = StringRef::iterator (Scanner::*)(StringRef::iterator); |
366 | |
367 | |
368 | |
369 | |
370 | |
371 | |
372 | StringRef::iterator skip_while( SkipWhileFunc Func |
373 | , StringRef::iterator Position); |
374 | |
375 | |
376 | |
377 | void advanceWhile(SkipWhileFunc Func); |
378 | |
379 | |
380 | |
381 | |
382 | void scan_ns_uri_char(); |
383 | |
384 | |
385 | |
386 | |
387 | bool consume(uint32_t Expected); |
388 | |
389 | |
390 | void skip(uint32_t Distance); |
391 | |
392 | |
393 | |
394 | bool isBlankOrBreak(StringRef::iterator Position); |
395 | |
396 | |
397 | |
398 | |
399 | bool consumeLineBreakIfPresent(); |
400 | |
401 | |
402 | void saveSimpleKeyCandidate( TokenQueueT::iterator Tok |
403 | , unsigned AtColumn |
404 | , bool IsRequired); |
405 | |
406 | |
407 | |
408 | |
409 | |
410 | void removeStaleSimpleKeyCandidates(); |
411 | |
412 | |
413 | void removeSimpleKeyCandidatesOnFlowLevel(unsigned Level); |
414 | |
415 | |
416 | |
417 | bool unrollIndent(int ToColumn); |
418 | |
419 | |
420 | |
421 | bool rollIndent( int ToColumn |
422 | , Token::TokenKind Kind |
423 | , TokenQueueT::iterator InsertPoint); |
424 | |
425 | |
426 | |
427 | void skipComment(); |
428 | |
429 | |
430 | void scanToNextToken(); |
431 | |
432 | |
433 | bool scanStreamStart(); |
434 | |
435 | |
436 | bool scanStreamEnd(); |
437 | |
438 | |
439 | bool scanDirective(); |
440 | |
441 | |
442 | bool scanDocumentIndicator(bool IsStart); |
443 | |
444 | |
445 | bool scanFlowCollectionStart(bool IsSequence); |
446 | |
447 | |
448 | bool scanFlowCollectionEnd(bool IsSequence); |
449 | |
450 | |
451 | bool scanFlowEntry(); |
452 | |
453 | |
454 | bool scanBlockEntry(); |
455 | |
456 | |
457 | bool scanKey(); |
458 | |
459 | |
460 | bool scanValue(); |
461 | |
462 | |
463 | bool scanFlowScalar(bool IsDoubleQuoted); |
464 | |
465 | |
466 | bool scanPlainScalar(); |
467 | |
468 | |
469 | bool scanAliasOrAnchor(bool IsAlias); |
470 | |
471 | |
472 | bool scanBlockScalar(bool IsLiteral); |
473 | |
474 | |
475 | char scanBlockChompingIndicator(); |
476 | |
477 | |
478 | unsigned scanBlockIndentationIndicator(); |
479 | |
480 | |
481 | |
482 | |
483 | bool scanBlockScalarHeader(char &ChompingIndicator, unsigned &IndentIndicator, |
484 | bool &IsDone); |
485 | |
486 | |
487 | |
488 | |
489 | bool findBlockScalarIndent(unsigned &BlockIndent, unsigned BlockExitIndent, |
490 | unsigned &LineBreaks, bool &IsDone); |
491 | |
492 | |
493 | |
494 | |
495 | bool scanBlockScalarIndent(unsigned BlockIndent, unsigned BlockExitIndent, |
496 | bool &IsDone); |
497 | |
498 | |
499 | bool scanTag(); |
500 | |
501 | |
502 | bool fetchMoreTokens(); |
503 | |
504 | |
505 | SourceMgr &SM; |
506 | |
507 | |
508 | MemoryBufferRef InputBuffer; |
509 | |
510 | |
511 | StringRef::iterator Current; |
512 | |
513 | |
514 | StringRef::iterator End; |
515 | |
516 | |
517 | int Indent; |
518 | |
519 | |
520 | unsigned Column; |
521 | |
522 | |
523 | unsigned Line; |
524 | |
525 | |
526 | unsigned FlowLevel; |
527 | |
528 | |
529 | bool IsStartOfStream; |
530 | |
531 | |
532 | bool IsSimpleKeyAllowed; |
533 | |
534 | |
535 | bool Failed; |
536 | |
537 | |
538 | bool ShowColors; |
539 | |
540 | |
541 | |
542 | |
543 | TokenQueueT TokenQueue; |
544 | |
545 | |
546 | SmallVector<int, 4> Indents; |
547 | |
548 | |
549 | SmallVector<SimpleKey, 4> SimpleKeys; |
550 | |
551 | std::error_code *EC; |
552 | }; |
553 | |
554 | } |
555 | } |
556 | |
557 | |
558 | static void encodeUTF8( uint32_t UnicodeScalarValue |
559 | , SmallVectorImpl<char> &Result) { |
560 | if (UnicodeScalarValue <= 0x7F) { |
561 | Result.push_back(UnicodeScalarValue & 0x7F); |
562 | } else if (UnicodeScalarValue <= 0x7FF) { |
563 | uint8_t FirstByte = 0xC0 | ((UnicodeScalarValue & 0x7C0) >> 6); |
564 | uint8_t SecondByte = 0x80 | (UnicodeScalarValue & 0x3F); |
565 | Result.push_back(FirstByte); |
566 | Result.push_back(SecondByte); |
567 | } else if (UnicodeScalarValue <= 0xFFFF) { |
568 | uint8_t FirstByte = 0xE0 | ((UnicodeScalarValue & 0xF000) >> 12); |
569 | uint8_t SecondByte = 0x80 | ((UnicodeScalarValue & 0xFC0) >> 6); |
570 | uint8_t ThirdByte = 0x80 | (UnicodeScalarValue & 0x3F); |
571 | Result.push_back(FirstByte); |
572 | Result.push_back(SecondByte); |
573 | Result.push_back(ThirdByte); |
574 | } else if (UnicodeScalarValue <= 0x10FFFF) { |
575 | uint8_t FirstByte = 0xF0 | ((UnicodeScalarValue & 0x1F0000) >> 18); |
576 | uint8_t SecondByte = 0x80 | ((UnicodeScalarValue & 0x3F000) >> 12); |
577 | uint8_t ThirdByte = 0x80 | ((UnicodeScalarValue & 0xFC0) >> 6); |
578 | uint8_t FourthByte = 0x80 | (UnicodeScalarValue & 0x3F); |
579 | Result.push_back(FirstByte); |
580 | Result.push_back(SecondByte); |
581 | Result.push_back(ThirdByte); |
582 | Result.push_back(FourthByte); |
583 | } |
584 | } |
585 | |
586 | bool yaml::dumpTokens(StringRef Input, raw_ostream &OS) { |
587 | SourceMgr SM; |
588 | Scanner scanner(Input, SM); |
589 | while (true) { |
590 | Token T = scanner.getNext(); |
591 | switch (T.Kind) { |
592 | case Token::TK_StreamStart: |
593 | OS << "Stream-Start: "; |
594 | break; |
595 | case Token::TK_StreamEnd: |
596 | OS << "Stream-End: "; |
597 | break; |
598 | case Token::TK_VersionDirective: |
599 | OS << "Version-Directive: "; |
600 | break; |
601 | case Token::TK_TagDirective: |
602 | OS << "Tag-Directive: "; |
603 | break; |
604 | case Token::TK_DocumentStart: |
605 | OS << "Document-Start: "; |
606 | break; |
607 | case Token::TK_DocumentEnd: |
608 | OS << "Document-End: "; |
609 | break; |
610 | case Token::TK_BlockEntry: |
611 | OS << "Block-Entry: "; |
612 | break; |
613 | case Token::TK_BlockEnd: |
614 | OS << "Block-End: "; |
615 | break; |
616 | case Token::TK_BlockSequenceStart: |
617 | OS << "Block-Sequence-Start: "; |
618 | break; |
619 | case Token::TK_BlockMappingStart: |
620 | OS << "Block-Mapping-Start: "; |
621 | break; |
622 | case Token::TK_FlowEntry: |
623 | OS << "Flow-Entry: "; |
624 | break; |
625 | case Token::TK_FlowSequenceStart: |
626 | OS << "Flow-Sequence-Start: "; |
627 | break; |
628 | case Token::TK_FlowSequenceEnd: |
629 | OS << "Flow-Sequence-End: "; |
630 | break; |
631 | case Token::TK_FlowMappingStart: |
632 | OS << "Flow-Mapping-Start: "; |
633 | break; |
634 | case Token::TK_FlowMappingEnd: |
635 | OS << "Flow-Mapping-End: "; |
636 | break; |
637 | case Token::TK_Key: |
638 | OS << "Key: "; |
639 | break; |
640 | case Token::TK_Value: |
641 | OS << "Value: "; |
642 | break; |
643 | case Token::TK_Scalar: |
644 | OS << "Scalar: "; |
645 | break; |
646 | case Token::TK_BlockScalar: |
647 | OS << "Block Scalar: "; |
648 | break; |
649 | case Token::TK_Alias: |
650 | OS << "Alias: "; |
651 | break; |
652 | case Token::TK_Anchor: |
653 | OS << "Anchor: "; |
654 | break; |
655 | case Token::TK_Tag: |
656 | OS << "Tag: "; |
657 | break; |
658 | case Token::TK_Error: |
659 | break; |
660 | } |
661 | OS << T.Range << "\n"; |
662 | if (T.Kind == Token::TK_StreamEnd) |
663 | break; |
664 | else if (T.Kind == Token::TK_Error) |
665 | return false; |
666 | } |
667 | return true; |
668 | } |
669 | |
670 | bool yaml::scanTokens(StringRef Input) { |
671 | SourceMgr SM; |
672 | Scanner scanner(Input, SM); |
673 | while (true) { |
674 | Token T = scanner.getNext(); |
675 | if (T.Kind == Token::TK_StreamEnd) |
676 | break; |
677 | else if (T.Kind == Token::TK_Error) |
678 | return false; |
679 | } |
680 | return true; |
681 | } |
682 | |
683 | std::string yaml::escape(StringRef Input, bool EscapePrintable) { |
684 | std::string EscapedInput; |
685 | for (StringRef::iterator i = Input.begin(), e = Input.end(); i != e; ++i) { |
686 | if (*i == '\\') |
687 | EscapedInput += "\\\\"; |
688 | else if (*i == '"') |
689 | EscapedInput += "\\\""; |
690 | else if (*i == 0) |
691 | EscapedInput += "\\0"; |
692 | else if (*i == 0x07) |
693 | EscapedInput += "\\a"; |
694 | else if (*i == 0x08) |
695 | EscapedInput += "\\b"; |
696 | else if (*i == 0x09) |
697 | EscapedInput += "\\t"; |
698 | else if (*i == 0x0A) |
699 | EscapedInput += "\\n"; |
700 | else if (*i == 0x0B) |
701 | EscapedInput += "\\v"; |
702 | else if (*i == 0x0C) |
703 | EscapedInput += "\\f"; |
704 | else if (*i == 0x0D) |
705 | EscapedInput += "\\r"; |
706 | else if (*i == 0x1B) |
707 | EscapedInput += "\\e"; |
708 | else if ((unsigned char)*i < 0x20) { |
709 | std::string HexStr = utohexstr(*i); |
710 | EscapedInput += "\\x" + std::string(2 - HexStr.size(), '0') + HexStr; |
711 | } else if (*i & 0x80) { |
712 | UTF8Decoded UnicodeScalarValue |
713 | = decodeUTF8(StringRef(i, Input.end() - i)); |
714 | if (UnicodeScalarValue.second == 0) { |
715 | |
716 | SmallString<4> Val; |
717 | encodeUTF8(0xFFFD, Val); |
718 | llvm::append_range(EscapedInput, Val); |
719 | |
720 | return EscapedInput; |
721 | } |
722 | if (UnicodeScalarValue.first == 0x85) |
723 | EscapedInput += "\\N"; |
724 | else if (UnicodeScalarValue.first == 0xA0) |
725 | EscapedInput += "\\_"; |
726 | else if (UnicodeScalarValue.first == 0x2028) |
727 | EscapedInput += "\\L"; |
728 | else if (UnicodeScalarValue.first == 0x2029) |
729 | EscapedInput += "\\P"; |
730 | else if (!EscapePrintable && |
731 | sys::unicode::isPrintable(UnicodeScalarValue.first)) |
732 | EscapedInput += StringRef(i, UnicodeScalarValue.second); |
733 | else { |
734 | std::string HexStr = utohexstr(UnicodeScalarValue.first); |
735 | if (HexStr.size() <= 2) |
736 | EscapedInput += "\\x" + std::string(2 - HexStr.size(), '0') + HexStr; |
737 | else if (HexStr.size() <= 4) |
738 | EscapedInput += "\\u" + std::string(4 - HexStr.size(), '0') + HexStr; |
739 | else if (HexStr.size() <= 8) |
740 | EscapedInput += "\\U" + std::string(8 - HexStr.size(), '0') + HexStr; |
741 | } |
742 | i += UnicodeScalarValue.second - 1; |
743 | } else |
744 | EscapedInput.push_back(*i); |
745 | } |
746 | return EscapedInput; |
747 | } |
748 | |
749 | llvm::Optional<bool> yaml::parseBool(StringRef S) { |
750 | switch (S.size()) { |
751 | case 1: |
752 | switch (S.front()) { |
753 | case 'y': |
754 | case 'Y': |
755 | return true; |
756 | case 'n': |
757 | case 'N': |
758 | return false; |
759 | default: |
760 | return None; |
761 | } |
762 | case 2: |
763 | switch (S.front()) { |
764 | case 'O': |
765 | if (S[1] == 'N') |
766 | return true; |
767 | LLVM_FALLTHROUGH; |
768 | case 'o': |
769 | if (S[1] == 'n') |
770 | return true; |
771 | return None; |
772 | case 'N': |
773 | if (S[1] == 'O') |
774 | return false; |
775 | LLVM_FALLTHROUGH; |
776 | case 'n': |
777 | if (S[1] == 'o') |
778 | return false; |
779 | return None; |
780 | default: |
781 | return None; |
782 | } |
783 | case 3: |
784 | switch (S.front()) { |
785 | case 'O': |
786 | if (S.drop_front() == "FF") |
787 | return false; |
788 | LLVM_FALLTHROUGH; |
789 | case 'o': |
790 | if (S.drop_front() == "ff") |
791 | return false; |
792 | return None; |
793 | case 'Y': |
794 | if (S.drop_front() == "ES") |
795 | return true; |
796 | LLVM_FALLTHROUGH; |
797 | case 'y': |
798 | if (S.drop_front() == "es") |
799 | return true; |
800 | return None; |
801 | default: |
802 | return None; |
803 | } |
804 | case 4: |
805 | switch (S.front()) { |
806 | case 'T': |
807 | if (S.drop_front() == "RUE") |
808 | return true; |
809 | LLVM_FALLTHROUGH; |
810 | case 't': |
811 | if (S.drop_front() == "rue") |
812 | return true; |
813 | return None; |
814 | default: |
815 | return None; |
816 | } |
817 | case 5: |
818 | switch (S.front()) { |
819 | case 'F': |
820 | if (S.drop_front() == "ALSE") |
821 | return false; |
822 | LLVM_FALLTHROUGH; |
823 | case 'f': |
824 | if (S.drop_front() == "alse") |
825 | return false; |
826 | return None; |
827 | default: |
828 | return None; |
829 | } |
830 | default: |
831 | return None; |
832 | } |
833 | } |
834 | |
835 | Scanner::Scanner(StringRef Input, SourceMgr &sm, bool ShowColors, |
836 | std::error_code *EC) |
837 | : SM(sm), ShowColors(ShowColors), EC(EC) { |
838 | init(MemoryBufferRef(Input, "YAML")); |
839 | } |
840 | |
841 | Scanner::Scanner(MemoryBufferRef Buffer, SourceMgr &SM_, bool ShowColors, |
842 | std::error_code *EC) |
843 | : SM(SM_), ShowColors(ShowColors), EC(EC) { |
844 | init(Buffer); |
845 | } |
846 | |
847 | void Scanner::init(MemoryBufferRef Buffer) { |
848 | InputBuffer = Buffer; |
849 | Current = InputBuffer.getBufferStart(); |
850 | End = InputBuffer.getBufferEnd(); |
851 | Indent = -1; |
852 | Column = 0; |
853 | Line = 0; |
854 | FlowLevel = 0; |
855 | IsStartOfStream = true; |
856 | IsSimpleKeyAllowed = true; |
857 | Failed = false; |
858 | std::unique_ptr<MemoryBuffer> InputBufferOwner = |
859 | MemoryBuffer::getMemBuffer(Buffer, false); |
860 | SM.AddNewSourceBuffer(std::move(InputBufferOwner), SMLoc()); |
861 | } |
862 | |
863 | Token &Scanner::peekNext() { |
864 | |
865 | |
866 | bool NeedMore = false; |
867 | while (true) { |
868 | if (TokenQueue.empty() || NeedMore) { |
869 | if (!fetchMoreTokens()) { |
870 | TokenQueue.clear(); |
871 | SimpleKeys.clear(); |
872 | TokenQueue.push_back(Token()); |
873 | return TokenQueue.front(); |
874 | } |
875 | } |
876 | assert(!TokenQueue.empty() && |
877 | "fetchMoreTokens lied about getting tokens!"); |
878 | |
879 | removeStaleSimpleKeyCandidates(); |
880 | SimpleKey SK; |
881 | SK.Tok = TokenQueue.begin(); |
882 | if (!is_contained(SimpleKeys, SK)) |
883 | break; |
884 | else |
885 | NeedMore = true; |
886 | } |
887 | return TokenQueue.front(); |
888 | } |
889 | |
890 | Token Scanner::getNext() { |
891 | Token Ret = peekNext(); |
892 | |
893 | if (!TokenQueue.empty()) |
894 | TokenQueue.pop_front(); |
895 | |
896 | |
897 | |
898 | if (TokenQueue.empty()) |
899 | TokenQueue.resetAlloc(); |
900 | |
901 | return Ret; |
902 | } |
903 | |
904 | StringRef::iterator Scanner::skip_nb_char(StringRef::iterator Position) { |
905 | if (Position == End) |
906 | return Position; |
907 | |
908 | if ( *Position == 0x09 |
909 | || (*Position >= 0x20 && *Position <= 0x7E)) |
910 | return Position + 1; |
911 | |
912 | |
913 | if (uint8_t(*Position) & 0x80) { |
914 | UTF8Decoded u8d = decodeUTF8(Position); |
915 | if ( u8d.second != 0 |
916 | && u8d.first != 0xFEFF |
917 | && ( u8d.first == 0x85 |
918 | || ( u8d.first >= 0xA0 |
919 | && u8d.first <= 0xD7FF) |
920 | || ( u8d.first >= 0xE000 |
921 | && u8d.first <= 0xFFFD) |
922 | || ( u8d.first >= 0x10000 |
923 | && u8d.first <= 0x10FFFF))) |
924 | return Position + u8d.second; |
925 | } |
926 | return Position; |
927 | } |
928 | |
929 | StringRef::iterator Scanner::skip_b_break(StringRef::iterator Position) { |
930 | if (Position == End) |
931 | return Position; |
932 | if (*Position == 0x0D) { |
933 | if (Position + 1 != End && *(Position + 1) == 0x0A) |
934 | return Position + 2; |
935 | return Position + 1; |
936 | } |
937 | |
938 | if (*Position == 0x0A) |
939 | return Position + 1; |
940 | return Position; |
941 | } |
942 | |
943 | StringRef::iterator Scanner::skip_s_space(StringRef::iterator Position) { |
944 | if (Position == End) |
945 | return Position; |
946 | if (*Position == ' ') |
947 | return Position + 1; |
948 | return Position; |
949 | } |
950 | |
951 | StringRef::iterator Scanner::skip_s_white(StringRef::iterator Position) { |
952 | if (Position == End) |
953 | return Position; |
954 | if (*Position == ' ' || *Position == '\t') |
955 | return Position + 1; |
956 | return Position; |
957 | } |
958 | |
959 | StringRef::iterator Scanner::skip_ns_char(StringRef::iterator Position) { |
960 | if (Position == End) |
961 | return Position; |
962 | if (*Position == ' ' || *Position == '\t') |
963 | return Position; |
964 | return skip_nb_char(Position); |
965 | } |
966 | |
967 | StringRef::iterator Scanner::skip_while( SkipWhileFunc Func |
968 | , StringRef::iterator Position) { |
969 | while (true) { |
970 | StringRef::iterator i = (this->*Func)(Position); |
971 | if (i == Position) |
972 | break; |
973 | Position = i; |
974 | } |
975 | return Position; |
976 | } |
977 | |
978 | void Scanner::advanceWhile(SkipWhileFunc Func) { |
979 | auto Final = skip_while(Func, Current); |
980 | Column += Final - Current; |
981 | Current = Final; |
982 | } |
983 | |
984 | static bool is_ns_hex_digit(const char C) { return isAlnum(C); } |
985 | |
986 | static bool is_ns_word_char(const char C) { return C == '-' || isAlpha(C); } |
987 | |
988 | void Scanner::scan_ns_uri_char() { |
989 | while (true) { |
990 | if (Current == End) |
991 | break; |
992 | if (( *Current == '%' |
993 | && Current + 2 < End |
994 | && is_ns_hex_digit(*(Current + 1)) |
995 | && is_ns_hex_digit(*(Current + 2))) |
996 | || is_ns_word_char(*Current) |
997 | || StringRef(Current, 1).find_first_of("#;/?:@&=+$,_.!~*'()[]") |
998 | != StringRef::npos) { |
999 | ++Current; |
1000 | ++Column; |
1001 | } else |
1002 | break; |
1003 | } |
1004 | } |
1005 | |
1006 | bool Scanner::consume(uint32_t Expected) { |
1007 | if (Expected >= 0x80) { |
1008 | setError("Cannot consume non-ascii characters", Current); |
1009 | return false; |
1010 | } |
1011 | if (Current == End) |
1012 | return false; |
1013 | if (uint8_t(*Current) >= 0x80) { |
1014 | setError("Cannot consume non-ascii characters", Current); |
1015 | return false; |
1016 | } |
1017 | if (uint8_t(*Current) == Expected) { |
1018 | ++Current; |
1019 | ++Column; |
1020 | return true; |
1021 | } |
1022 | return false; |
1023 | } |
1024 | |
1025 | void Scanner::skip(uint32_t Distance) { |
1026 | Current += Distance; |
1027 | Column += Distance; |
1028 | assert(Current <= End && "Skipped past the end"); |
1029 | } |
1030 | |
1031 | bool Scanner::isBlankOrBreak(StringRef::iterator Position) { |
1032 | if (Position == End) |
1033 | return false; |
1034 | return *Position == ' ' || *Position == '\t' || *Position == '\r' || |
1035 | *Position == '\n'; |
1036 | } |
1037 | |
1038 | bool Scanner::consumeLineBreakIfPresent() { |
1039 | auto Next = skip_b_break(Current); |
1040 | if (Next == Current) |
1041 | return false; |
1042 | Column = 0; |
1043 | ++Line; |
1044 | Current = Next; |
1045 | return true; |
1046 | } |
1047 | |
1048 | void Scanner::saveSimpleKeyCandidate( TokenQueueT::iterator Tok |
1049 | , unsigned AtColumn |
1050 | , bool IsRequired) { |
1051 | if (IsSimpleKeyAllowed) { |
1052 | SimpleKey SK; |
1053 | SK.Tok = Tok; |
1054 | SK.Line = Line; |
1055 | SK.Column = AtColumn; |
1056 | SK.IsRequired = IsRequired; |
1057 | SK.FlowLevel = FlowLevel; |
1058 | SimpleKeys.push_back(SK); |
1059 | } |
1060 | } |
1061 | |
1062 | void Scanner::removeStaleSimpleKeyCandidates() { |
1063 | for (SmallVectorImpl<SimpleKey>::iterator i = SimpleKeys.begin(); |
1064 | i != SimpleKeys.end();) { |
1065 | if (i->Line != Line || i->Column + 1024 < Column) { |
1066 | if (i->IsRequired) |
1067 | setError( "Could not find expected : for simple key" |
1068 | , i->Tok->Range.begin()); |
1069 | i = SimpleKeys.erase(i); |
1070 | } else |
1071 | ++i; |
1072 | } |
1073 | } |
1074 | |
1075 | void Scanner::removeSimpleKeyCandidatesOnFlowLevel(unsigned Level) { |
1076 | if (!SimpleKeys.empty() && (SimpleKeys.end() - 1)->FlowLevel == Level) |
1077 | SimpleKeys.pop_back(); |
1078 | } |
1079 | |
1080 | bool Scanner::unrollIndent(int ToColumn) { |
1081 | Token T; |
1082 | |
1083 | if (FlowLevel != 0) |
1084 | return true; |
1085 | |
1086 | while (Indent > ToColumn) { |
1087 | T.Kind = Token::TK_BlockEnd; |
1088 | T.Range = StringRef(Current, 1); |
1089 | TokenQueue.push_back(T); |
1090 | Indent = Indents.pop_back_val(); |
1091 | } |
1092 | |
1093 | return true; |
1094 | } |
1095 | |
1096 | bool Scanner::rollIndent( int ToColumn |
1097 | , Token::TokenKind Kind |
1098 | , TokenQueueT::iterator InsertPoint) { |
1099 | if (FlowLevel) |
1100 | return true; |
1101 | if (Indent < ToColumn) { |
1102 | Indents.push_back(Indent); |
1103 | Indent = ToColumn; |
1104 | |
1105 | Token T; |
1106 | T.Kind = Kind; |
1107 | T.Range = StringRef(Current, 0); |
1108 | TokenQueue.insert(InsertPoint, T); |
1109 | } |
1110 | return true; |
1111 | } |
1112 | |
1113 | void Scanner::skipComment() { |
1114 | if (Current == End || *Current != '#') |
1115 | return; |
1116 | while (true) { |
1117 | |
1118 | |
1119 | StringRef::iterator I = skip_nb_char(Current); |
1120 | if (I == Current) |
1121 | break; |
1122 | Current = I; |
1123 | ++Column; |
1124 | } |
1125 | } |
1126 | |
1127 | void Scanner::scanToNextToken() { |
1128 | while (true) { |
1129 | while (Current != End && (*Current == ' ' || *Current == '\t')) { |
1130 | skip(1); |
1131 | } |
1132 | |
1133 | skipComment(); |
1134 | |
1135 | |
1136 | StringRef::iterator i = skip_b_break(Current); |
1137 | if (i == Current) |
1138 | break; |
1139 | Current = i; |
1140 | ++Line; |
1141 | Column = 0; |
1142 | |
1143 | if (!FlowLevel) |
1144 | IsSimpleKeyAllowed = true; |
1145 | } |
1146 | } |
1147 | |
1148 | bool Scanner::scanStreamStart() { |
1149 | IsStartOfStream = false; |
1150 | |
1151 | EncodingInfo EI = getUnicodeEncoding(currentInput()); |
1152 | |
1153 | Token T; |
1154 | T.Kind = Token::TK_StreamStart; |
1155 | T.Range = StringRef(Current, EI.second); |
1156 | TokenQueue.push_back(T); |
1157 | Current += EI.second; |
1158 | return true; |
1159 | } |
1160 | |
1161 | bool Scanner::scanStreamEnd() { |
1162 | |
1163 | if (Column != 0) { |
1164 | Column = 0; |
1165 | ++Line; |
1166 | } |
1167 | |
1168 | unrollIndent(-1); |
1169 | SimpleKeys.clear(); |
1170 | IsSimpleKeyAllowed = false; |
1171 | |
1172 | Token T; |
1173 | T.Kind = Token::TK_StreamEnd; |
1174 | T.Range = StringRef(Current, 0); |
1175 | TokenQueue.push_back(T); |
1176 | return true; |
1177 | } |
1178 | |
1179 | bool Scanner::scanDirective() { |
1180 | |
1181 | unrollIndent(-1); |
1182 | SimpleKeys.clear(); |
1183 | IsSimpleKeyAllowed = false; |
1184 | |
1185 | StringRef::iterator Start = Current; |
1186 | consume('%'); |
1187 | StringRef::iterator NameStart = Current; |
1188 | Current = skip_while(&Scanner::skip_ns_char, Current); |
1189 | StringRef Name(NameStart, Current - NameStart); |
1190 | Current = skip_while(&Scanner::skip_s_white, Current); |
1191 | |
1192 | Token T; |
1193 | if (Name == "YAML") { |
1194 | Current = skip_while(&Scanner::skip_ns_char, Current); |
1195 | T.Kind = Token::TK_VersionDirective; |
1196 | T.Range = StringRef(Start, Current - Start); |
1197 | TokenQueue.push_back(T); |
1198 | return true; |
1199 | } else if(Name == "TAG") { |
1200 | Current = skip_while(&Scanner::skip_ns_char, Current); |
1201 | Current = skip_while(&Scanner::skip_s_white, Current); |
1202 | Current = skip_while(&Scanner::skip_ns_char, Current); |
1203 | T.Kind = Token::TK_TagDirective; |
1204 | T.Range = StringRef(Start, Current - Start); |
1205 | TokenQueue.push_back(T); |
1206 | return true; |
1207 | } |
1208 | return false; |
1209 | } |
1210 | |
1211 | bool Scanner::scanDocumentIndicator(bool IsStart) { |
1212 | unrollIndent(-1); |
1213 | SimpleKeys.clear(); |
1214 | IsSimpleKeyAllowed = false; |
1215 | |
1216 | Token T; |
1217 | T.Kind = IsStart ? Token::TK_DocumentStart : Token::TK_DocumentEnd; |
1218 | T.Range = StringRef(Current, 3); |
1219 | skip(3); |
1220 | TokenQueue.push_back(T); |
1221 | return true; |
1222 | } |
1223 | |
1224 | bool Scanner::scanFlowCollectionStart(bool IsSequence) { |
1225 | Token T; |
1226 | T.Kind = IsSequence ? Token::TK_FlowSequenceStart |
1227 | : Token::TK_FlowMappingStart; |
1228 | T.Range = StringRef(Current, 1); |
1229 | skip(1); |
1230 | TokenQueue.push_back(T); |
1231 | |
1232 | |
1233 | saveSimpleKeyCandidate(--TokenQueue.end(), Column - 1, false); |
1234 | |
1235 | |
1236 | IsSimpleKeyAllowed = true; |
1237 | ++FlowLevel; |
1238 | return true; |
1239 | } |
1240 | |
1241 | bool Scanner::scanFlowCollectionEnd(bool IsSequence) { |
1242 | removeSimpleKeyCandidatesOnFlowLevel(FlowLevel); |
1243 | IsSimpleKeyAllowed = false; |
1244 | Token T; |
1245 | T.Kind = IsSequence ? Token::TK_FlowSequenceEnd |
1246 | : Token::TK_FlowMappingEnd; |
1247 | T.Range = StringRef(Current, 1); |
1248 | skip(1); |
1249 | TokenQueue.push_back(T); |
1250 | if (FlowLevel) |
1251 | --FlowLevel; |
1252 | return true; |
1253 | } |
1254 | |
1255 | bool Scanner::scanFlowEntry() { |
1256 | removeSimpleKeyCandidatesOnFlowLevel(FlowLevel); |
1257 | IsSimpleKeyAllowed = true; |
1258 | Token T; |
1259 | T.Kind = Token::TK_FlowEntry; |
1260 | T.Range = StringRef(Current, 1); |
1261 | skip(1); |
1262 | TokenQueue.push_back(T); |
1263 | return true; |
1264 | } |
1265 | |
1266 | bool Scanner::scanBlockEntry() { |
1267 | rollIndent(Column, Token::TK_BlockSequenceStart, TokenQueue.end()); |
1268 | removeSimpleKeyCandidatesOnFlowLevel(FlowLevel); |
1269 | IsSimpleKeyAllowed = true; |
1270 | Token T; |
1271 | T.Kind = Token::TK_BlockEntry; |
1272 | T.Range = StringRef(Current, 1); |
1273 | skip(1); |
1274 | TokenQueue.push_back(T); |
1275 | return true; |
1276 | } |
1277 | |
1278 | bool Scanner::scanKey() { |
1279 | if (!FlowLevel) |
1280 | rollIndent(Column, Token::TK_BlockMappingStart, TokenQueue.end()); |
1281 | |
1282 | removeSimpleKeyCandidatesOnFlowLevel(FlowLevel); |
1283 | IsSimpleKeyAllowed = !FlowLevel; |
1284 | |
1285 | Token T; |
1286 | T.Kind = Token::TK_Key; |
1287 | T.Range = StringRef(Current, 1); |
1288 | skip(1); |
1289 | TokenQueue.push_back(T); |
1290 | return true; |
1291 | } |
1292 | |
1293 | bool Scanner::scanValue() { |
1294 | |
1295 | |
1296 | if (!SimpleKeys.empty()) { |
1297 | SimpleKey SK = SimpleKeys.pop_back_val(); |
1298 | Token T; |
1299 | T.Kind = Token::TK_Key; |
1300 | T.Range = SK.Tok->Range; |
1301 | TokenQueueT::iterator i, e; |
1302 | for (i = TokenQueue.begin(), e = TokenQueue.end(); i != e; ++i) { |
1303 | if (i == SK.Tok) |
1304 | break; |
1305 | } |
1306 | if (i == e) { |
1307 | Failed = true; |
1308 | return false; |
1309 | } |
1310 | i = TokenQueue.insert(i, T); |
1311 | |
1312 | |
1313 | rollIndent(SK.Column, Token::TK_BlockMappingStart, i); |
1314 | |
1315 | IsSimpleKeyAllowed = false; |
1316 | } else { |
1317 | if (!FlowLevel) |
1318 | rollIndent(Column, Token::TK_BlockMappingStart, TokenQueue.end()); |
1319 | IsSimpleKeyAllowed = !FlowLevel; |
1320 | } |
1321 | |
1322 | Token T; |
1323 | T.Kind = Token::TK_Value; |
1324 | T.Range = StringRef(Current, 1); |
1325 | skip(1); |
1326 | TokenQueue.push_back(T); |
1327 | return true; |
1328 | } |
1329 | |
1330 | |
1331 | |
1332 | LLVM_ATTRIBUTE_NOINLINE static bool |
1333 | wasEscaped(StringRef::iterator First, StringRef::iterator Position); |
1334 | |
1335 | |
1336 | |
1337 | static bool wasEscaped(StringRef::iterator First, |
1338 | StringRef::iterator Position) { |
1339 | assert(Position - 1 >= First); |
1340 | StringRef::iterator I = Position - 1; |
1341 | |
1342 | |
1343 | while (I >= First && *I == '\\') --I; |
1344 | |
1345 | |
1346 | return (Position - 1 - I) % 2 == 1; |
1347 | } |
1348 | |
1349 | bool Scanner::scanFlowScalar(bool IsDoubleQuoted) { |
1350 | StringRef::iterator Start = Current; |
1351 | unsigned ColStart = Column; |
1352 | if (IsDoubleQuoted) { |
1353 | do { |
1354 | ++Current; |
1355 | while (Current != End && *Current != '"') |
1356 | ++Current; |
1357 | |
1358 | |
1359 | } while ( Current != End |
1360 | && *(Current - 1) == '\\' |
1361 | && wasEscaped(Start + 1, Current)); |
1362 | } else { |
1363 | skip(1); |
1364 | while (Current != End) { |
1365 | |
1366 | if (Current + 1 < End && *Current == '\'' && *(Current + 1) == '\'') { |
1367 | skip(2); |
1368 | continue; |
1369 | } else if (*Current == '\'') |
1370 | break; |
1371 | StringRef::iterator i = skip_nb_char(Current); |
1372 | if (i == Current) { |
1373 | i = skip_b_break(Current); |
1374 | if (i == Current) |
1375 | break; |
1376 | Current = i; |
1377 | Column = 0; |
1378 | ++Line; |
1379 | } else { |
1380 | if (i == End) |
1381 | break; |
1382 | Current = i; |
1383 | ++Column; |
1384 | } |
1385 | } |
1386 | } |
1387 | |
1388 | if (Current == End) { |
1389 | setError("Expected quote at end of scalar", Current); |
1390 | return false; |
1391 | } |
1392 | |
1393 | skip(1); |
1394 | Token T; |
1395 | T.Kind = Token::TK_Scalar; |
1396 | T.Range = StringRef(Start, Current - Start); |
1397 | TokenQueue.push_back(T); |
1398 | |
1399 | saveSimpleKeyCandidate(--TokenQueue.end(), ColStart, false); |
1400 | |
1401 | IsSimpleKeyAllowed = false; |
1402 | |
1403 | return true; |
1404 | } |
1405 | |
1406 | bool Scanner::scanPlainScalar() { |
1407 | StringRef::iterator Start = Current; |
1408 | unsigned ColStart = Column; |
1409 | unsigned LeadingBlanks = 0; |
1410 | assert(Indent >= -1 && "Indent must be >= -1 !"); |
1411 | unsigned indent = static_cast<unsigned>(Indent + 1); |
1412 | while (Current != End) { |
1413 | if (*Current == '#') |
1414 | break; |
1415 | |
1416 | while (Current != End && !isBlankOrBreak(Current)) { |
1417 | if (FlowLevel && *Current == ':' && |
1418 | (Current + 1 == End || |
1419 | !(isBlankOrBreak(Current + 1) || *(Current + 1) == ','))) { |
1420 | setError("Found unexpected ':' while scanning a plain scalar", Current); |
1421 | return false; |
1422 | } |
1423 | |
1424 | |
1425 | if ( (*Current == ':' && isBlankOrBreak(Current + 1)) |
1426 | || ( FlowLevel |
1427 | && (StringRef(Current, 1).find_first_of(",:?[]{}") |
1428 | != StringRef::npos))) |
1429 | break; |
1430 | |
1431 | StringRef::iterator i = skip_nb_char(Current); |
1432 | if (i == Current) |
1433 | break; |
1434 | Current = i; |
1435 | ++Column; |
1436 | } |
1437 | |
1438 | |
1439 | if (!isBlankOrBreak(Current)) |
1440 | break; |
1441 | |
1442 | |
1443 | StringRef::iterator Tmp = Current; |
1444 | while (isBlankOrBreak(Tmp)) { |
1445 | StringRef::iterator i = skip_s_white(Tmp); |
1446 | if (i != Tmp) { |
1447 | if (LeadingBlanks && (Column < indent) && *Tmp == '\t') { |
1448 | setError("Found invalid tab character in indentation", Tmp); |
1449 | return false; |
1450 | } |
1451 | Tmp = i; |
1452 | ++Column; |
1453 | } else { |
1454 | i = skip_b_break(Tmp); |
1455 | if (!LeadingBlanks) |
1456 | LeadingBlanks = 1; |
1457 | Tmp = i; |
1458 | Column = 0; |
1459 | ++Line; |
1460 | } |
1461 | } |
1462 | |
1463 | if (!FlowLevel && Column < indent) |
1464 | break; |
1465 | |
1466 | Current = Tmp; |
1467 | } |
1468 | if (Start == Current) { |
1469 | setError("Got empty plain scalar", Start); |
1470 | return false; |
1471 | } |
1472 | Token T; |
1473 | T.Kind = Token::TK_Scalar; |
1474 | T.Range = StringRef(Start, Current - Start); |
1475 | TokenQueue.push_back(T); |
1476 | |
1477 | |
1478 | saveSimpleKeyCandidate(--TokenQueue.end(), ColStart, false); |
1479 | |
1480 | IsSimpleKeyAllowed = false; |
1481 | |
1482 | return true; |
1483 | } |
1484 | |
1485 | bool Scanner::scanAliasOrAnchor(bool IsAlias) { |
1486 | StringRef::iterator Start = Current; |
1487 | unsigned ColStart = Column; |
1488 | skip(1); |
1489 | while (Current != End) { |
1490 | if ( *Current == '[' || *Current == ']' |
1491 | || *Current == '{' || *Current == '}' |
1492 | || *Current == ',' |
1493 | || *Current == ':') |
1494 | break; |
1495 | StringRef::iterator i = skip_ns_char(Current); |
1496 | if (i == Current) |
1497 | break; |
1498 | Current = i; |
1499 | ++Column; |
1500 | } |
1501 | |
1502 | if (Start + 1 == Current) { |
1503 | setError("Got empty alias or anchor", Start); |
1504 | return false; |
1505 | } |
1506 | |
1507 | Token T; |
1508 | T.Kind = IsAlias ? Token::TK_Alias : Token::TK_Anchor; |
1509 | T.Range = StringRef(Start, Current - Start); |
1510 | TokenQueue.push_back(T); |
1511 | |
1512 | |
1513 | saveSimpleKeyCandidate(--TokenQueue.end(), ColStart, false); |
1514 | |
1515 | IsSimpleKeyAllowed = false; |
1516 | |
1517 | return true; |
1518 | } |
1519 | |
1520 | char Scanner::scanBlockChompingIndicator() { |
1521 | char Indicator = ' '; |
1522 | if (Current != End && (*Current == '+' || *Current == '-')) { |
1523 | Indicator = *Current; |
1524 | skip(1); |
1525 | } |
1526 | return Indicator; |
1527 | } |
1528 | |
1529 | |
1530 | |
1531 | |
1532 | |
1533 | static unsigned getChompedLineBreaks(char ChompingIndicator, |
1534 | unsigned LineBreaks, StringRef Str) { |
1535 | if (ChompingIndicator == '-') |
1536 | return 0; |
1537 | if (ChompingIndicator == '+') |
1538 | return LineBreaks; |
1539 | |
1540 | return Str.empty() ? 0 : 1; |
1541 | } |
1542 | |
1543 | unsigned Scanner::scanBlockIndentationIndicator() { |
1544 | unsigned Indent = 0; |
1545 | if (Current != End && (*Current >= '1' && *Current <= '9')) { |
1546 | Indent = unsigned(*Current - '0'); |
1547 | skip(1); |
1548 | } |
1549 | return Indent; |
1550 | } |
1551 | |
1552 | bool Scanner::scanBlockScalarHeader(char &ChompingIndicator, |
1553 | unsigned &IndentIndicator, bool &IsDone) { |
1554 | auto Start = Current; |
1555 | |
1556 | ChompingIndicator = scanBlockChompingIndicator(); |
1557 | IndentIndicator = scanBlockIndentationIndicator(); |
1558 | |
1559 | if (ChompingIndicator == ' ') |
1560 | ChompingIndicator = scanBlockChompingIndicator(); |
1561 | Current = skip_while(&Scanner::skip_s_white, Current); |
1562 | skipComment(); |
1563 | |
1564 | if (Current == End) { |
1565 | Token T; |
1566 | T.Kind = Token::TK_BlockScalar; |
1567 | T.Range = StringRef(Start, Current - Start); |
1568 | TokenQueue.push_back(T); |
1569 | IsDone = true; |
1570 | return true; |
1571 | } |
1572 | |
1573 | if (!consumeLineBreakIfPresent()) { |
1574 | setError("Expected a line break after block scalar header", Current); |
1575 | return false; |
1576 | } |
1577 | return true; |
1578 | } |
1579 | |
1580 | bool Scanner::findBlockScalarIndent(unsigned &BlockIndent, |
1581 | unsigned BlockExitIndent, |
1582 | unsigned &LineBreaks, bool &IsDone) { |
1583 | unsigned MaxAllSpaceLineCharacters = 0; |
1584 | StringRef::iterator LongestAllSpaceLine; |
| 1 | 'LongestAllSpaceLine' declared without an initial value | |
|
1585 | |
1586 | while (true) { |
| 2 | | Loop condition is true. Entering loop body | |
|
1587 | advanceWhile(&Scanner::skip_s_space); |
1588 | if (skip_nb_char(Current) != Current) { |
| |
1589 | |
1590 | if (Column <= BlockExitIndent) { |
| 4 | | Assuming 'BlockExitIndent' is < field 'Column' | |
|
| |
1591 | IsDone = true; |
1592 | return true; |
1593 | } |
1594 | |
1595 | BlockIndent = Column; |
1596 | if (MaxAllSpaceLineCharacters > BlockIndent) { |
| 6 | | Assuming 'MaxAllSpaceLineCharacters' is > 'BlockIndent' | |
|
| |
1597 | setError( |
| 8 | | 2nd function call argument is an uninitialized value |
|
1598 | "Leading all-spaces line must be smaller than the block indent", |
1599 | LongestAllSpaceLine); |
1600 | return false; |
1601 | } |
1602 | return true; |
1603 | } |
1604 | if (skip_b_break(Current) != Current && |
1605 | Column > MaxAllSpaceLineCharacters) { |
1606 | |
1607 | |
1608 | MaxAllSpaceLineCharacters = Column; |
1609 | LongestAllSpaceLine = Current; |
1610 | } |
1611 | |
1612 | |
1613 | if (Current == End) { |
1614 | IsDone = true; |
1615 | return true; |
1616 | } |
1617 | |
1618 | if (!consumeLineBreakIfPresent()) { |
1619 | IsDone = true; |
1620 | return true; |
1621 | } |
1622 | ++LineBreaks; |
1623 | } |
1624 | return true; |
1625 | } |
1626 | |
1627 | bool Scanner::scanBlockScalarIndent(unsigned BlockIndent, |
1628 | unsigned BlockExitIndent, bool &IsDone) { |
1629 | |
1630 | while (Column < BlockIndent) { |
1631 | auto I = skip_s_space(Current); |
1632 | if (I == Current) |
1633 | break; |
1634 | Current = I; |
1635 | ++Column; |
1636 | } |
1637 | |
1638 | if (skip_nb_char(Current) == Current) |
1639 | return true; |
1640 | |
1641 | if (Column <= BlockExitIndent) { |
1642 | IsDone = true; |
1643 | return true; |
1644 | } |
1645 | |
1646 | if (Column < BlockIndent) { |
1647 | if (Current != End && *Current == '#') { |
1648 | IsDone = true; |
1649 | return true; |
1650 | } |
1651 | setError("A text line is less indented than the block scalar", Current); |
1652 | return false; |
1653 | } |
1654 | return true; |
1655 | } |
1656 | |
1657 | bool Scanner::scanBlockScalar(bool IsLiteral) { |
1658 | |
1659 | assert(*Current == '|' || *Current == '>'); |
1660 | skip(1); |
1661 | |
1662 | char ChompingIndicator; |
1663 | unsigned BlockIndent; |
1664 | bool IsDone = false; |
1665 | if (!scanBlockScalarHeader(ChompingIndicator, BlockIndent, IsDone)) |
1666 | return false; |
1667 | if (IsDone) |
1668 | return true; |
1669 | |
1670 | auto Start = Current; |
1671 | unsigned BlockExitIndent = Indent < 0 ? 0 : (unsigned)Indent; |
1672 | unsigned LineBreaks = 0; |
1673 | if (BlockIndent == 0) { |
1674 | if (!findBlockScalarIndent(BlockIndent, BlockExitIndent, LineBreaks, |
1675 | IsDone)) |
1676 | return false; |
1677 | } |
1678 | |
1679 | |
1680 | SmallString<256> Str; |
1681 | while (!IsDone) { |
1682 | if (!scanBlockScalarIndent(BlockIndent, BlockExitIndent, IsDone)) |
1683 | return false; |
1684 | if (IsDone) |
1685 | break; |
1686 | |
1687 | |
1688 | auto LineStart = Current; |
1689 | advanceWhile(&Scanner::skip_nb_char); |
1690 | if (LineStart != Current) { |
1691 | Str.append(LineBreaks, '\n'); |
1692 | Str.append(StringRef(LineStart, Current - LineStart)); |
1693 | LineBreaks = 0; |
1694 | } |
1695 | |
1696 | |
1697 | if (Current == End) |
1698 | break; |
1699 | |
1700 | if (!consumeLineBreakIfPresent()) |
1701 | break; |
1702 | ++LineBreaks; |
1703 | } |
1704 | |
1705 | if (Current == End && !LineBreaks) |
1706 | |
1707 | LineBreaks = 1; |
1708 | Str.append(getChompedLineBreaks(ChompingIndicator, LineBreaks, Str), '\n'); |
1709 | |
1710 | |
1711 | if (!FlowLevel) |
1712 | IsSimpleKeyAllowed = true; |
1713 | |
1714 | Token T; |
1715 | T.Kind = Token::TK_BlockScalar; |
1716 | T.Range = StringRef(Start, Current - Start); |
1717 | T.Value = std::string(Str); |
1718 | TokenQueue.push_back(T); |
1719 | return true; |
1720 | } |
1721 | |
1722 | bool Scanner::scanTag() { |
1723 | StringRef::iterator Start = Current; |
1724 | unsigned ColStart = Column; |
1725 | skip(1); |
1726 | if (Current == End || isBlankOrBreak(Current)); |
1727 | else if (*Current == '<') { |
1728 | skip(1); |
1729 | scan_ns_uri_char(); |
1730 | if (!consume('>')) |
1731 | return false; |
1732 | } else { |
1733 | |
1734 | Current = skip_while(&Scanner::skip_ns_char, Current); |
1735 | } |
1736 | |
1737 | Token T; |
1738 | T.Kind = Token::TK_Tag; |
1739 | T.Range = StringRef(Start, Current - Start); |
1740 | TokenQueue.push_back(T); |
1741 | |
1742 | |
1743 | saveSimpleKeyCandidate(--TokenQueue.end(), ColStart, false); |
1744 | |
1745 | IsSimpleKeyAllowed = false; |
1746 | |
1747 | return true; |
1748 | } |
1749 | |
1750 | bool Scanner::fetchMoreTokens() { |
1751 | if (IsStartOfStream) |
1752 | return scanStreamStart(); |
1753 | |
1754 | scanToNextToken(); |
1755 | |
1756 | if (Current == End) |
1757 | return scanStreamEnd(); |
1758 | |
1759 | removeStaleSimpleKeyCandidates(); |
1760 | |
1761 | unrollIndent(Column); |
1762 | |
1763 | if (Column == 0 && *Current == '%') |
1764 | return scanDirective(); |
1765 | |
1766 | if (Column == 0 && Current + 4 <= End |
1767 | && *Current == '-' |
1768 | && *(Current + 1) == '-' |
1769 | && *(Current + 2) == '-' |
1770 | && (Current + 3 == End || isBlankOrBreak(Current + 3))) |
1771 | return scanDocumentIndicator(true); |
1772 | |
1773 | if (Column == 0 && Current + 4 <= End |
1774 | && *Current == '.' |
1775 | && *(Current + 1) == '.' |
1776 | && *(Current + 2) == '.' |
1777 | && (Current + 3 == End || isBlankOrBreak(Current + 3))) |
1778 | return scanDocumentIndicator(false); |
1779 | |
1780 | if (*Current == '[') |
1781 | return scanFlowCollectionStart(true); |
1782 | |
1783 | if (*Current == '{') |
1784 | return scanFlowCollectionStart(false); |
1785 | |
1786 | if (*Current == ']') |
1787 | return scanFlowCollectionEnd(true); |
1788 | |
1789 | if (*Current == '}') |
1790 | return scanFlowCollectionEnd(false); |
1791 | |
1792 | if (*Current == ',') |
1793 | return scanFlowEntry(); |
1794 | |
1795 | if (*Current == '-' && isBlankOrBreak(Current + 1)) |
1796 | return scanBlockEntry(); |
1797 | |
1798 | if (*Current == '?' && (FlowLevel || isBlankOrBreak(Current + 1))) |
1799 | return scanKey(); |
1800 | |
1801 | if (*Current == ':' && (FlowLevel || isBlankOrBreak(Current + 1))) |
1802 | return scanValue(); |
1803 | |
1804 | if (*Current == '*') |
1805 | return scanAliasOrAnchor(true); |
1806 | |
1807 | if (*Current == '&') |
1808 | return scanAliasOrAnchor(false); |
1809 | |
1810 | if (*Current == '!') |
1811 | return scanTag(); |
1812 | |
1813 | if (*Current == '|' && !FlowLevel) |
1814 | return scanBlockScalar(true); |
1815 | |
1816 | if (*Current == '>' && !FlowLevel) |
1817 | return scanBlockScalar(false); |
1818 | |
1819 | if (*Current == '\'') |
1820 | return scanFlowScalar(false); |
1821 | |
1822 | if (*Current == '"') |
1823 | return scanFlowScalar(true); |
1824 | |
1825 | |
1826 | StringRef FirstChar(Current, 1); |
1827 | if (!(isBlankOrBreak(Current) |
1828 | || FirstChar.find_first_of("-?:,[]{}#&*!|>'\"%@`") != StringRef::npos) |
1829 | || (*Current == '-' && !isBlankOrBreak(Current + 1)) |
1830 | || (!FlowLevel && (*Current == '?' || *Current == ':') |
1831 | && isBlankOrBreak(Current + 1)) |
1832 | || (!FlowLevel && *Current == ':' |
1833 | && Current + 2 < End |
1834 | && *(Current + 1) == ':' |
1835 | && !isBlankOrBreak(Current + 2))) |
1836 | return scanPlainScalar(); |
1837 | |
1838 | setError("Unrecognized character while tokenizing.", Current); |
1839 | return false; |
1840 | } |
1841 | |
1842 | Stream::Stream(StringRef Input, SourceMgr &SM, bool ShowColors, |
1843 | std::error_code *EC) |
1844 | : scanner(new Scanner(Input, SM, ShowColors, EC)), CurrentDoc() {} |
1845 | |
1846 | Stream::Stream(MemoryBufferRef InputBuffer, SourceMgr &SM, bool ShowColors, |
1847 | std::error_code *EC) |
1848 | : scanner(new Scanner(InputBuffer, SM, ShowColors, EC)), CurrentDoc() {} |
1849 | |
1850 | Stream::~Stream() = default; |
1851 | |
1852 | bool Stream::failed() { return scanner->failed(); } |
1853 | |
1854 | void Stream::printError(Node *N, const Twine &Msg, SourceMgr::DiagKind Kind) { |
1855 | printError(N ? N->getSourceRange() : SMRange(), Msg, Kind); |
1856 | } |
1857 | |
1858 | void Stream::printError(const SMRange &Range, const Twine &Msg, |
1859 | SourceMgr::DiagKind Kind) { |
1860 | scanner->printError(Range.Start, Kind, Msg, Range); |
1861 | } |
1862 | |
1863 | document_iterator Stream::begin() { |
1864 | if (CurrentDoc) |
1865 | report_fatal_error("Can only iterate over the stream once"); |
1866 | |
1867 | |
1868 | scanner->getNext(); |
1869 | |
1870 | CurrentDoc.reset(new Document(*this)); |
1871 | return document_iterator(CurrentDoc); |
1872 | } |
1873 | |
1874 | document_iterator Stream::end() { |
1875 | return document_iterator(); |
1876 | } |
1877 | |
1878 | void Stream::skip() { |
1879 | for (document_iterator i = begin(), e = end(); i != e; ++i) |
1880 | i->skip(); |
1881 | } |
1882 | |
1883 | Node::Node(unsigned int Type, std::unique_ptr<Document> &D, StringRef A, |
1884 | StringRef T) |
1885 | : Doc(D), TypeID(Type), Anchor(A), Tag(T) { |
1886 | SMLoc Start = SMLoc::getFromPointer(peekNext().Range.begin()); |
1887 | SourceRange = SMRange(Start, Start); |
1888 | } |
1889 | |
1890 | std::string Node::getVerbatimTag() const { |
1891 | StringRef Raw = getRawTag(); |
1892 | if (!Raw.empty() && Raw != "!") { |
1893 | std::string Ret; |
1894 | if (Raw.find_last_of('!') == 0) { |
1895 | Ret = std::string(Doc->getTagMap().find("!")->second); |
1896 | Ret += Raw.substr(1); |
1897 | return Ret; |
1898 | } else if (Raw.startswith("!!")) { |
1899 | Ret = std::string(Doc->getTagMap().find("!!")->second); |
1900 | Ret += Raw.substr(2); |
1901 | return Ret; |
1902 | } else { |
1903 | StringRef TagHandle = Raw.substr(0, Raw.find_last_of('!') + 1); |
1904 | std::map<StringRef, StringRef>::const_iterator It = |
1905 | Doc->getTagMap().find(TagHandle); |
1906 | if (It != Doc->getTagMap().end()) |
1907 | Ret = std::string(It->second); |
1908 | else { |
1909 | Token T; |
1910 | T.Kind = Token::TK_Tag; |
1911 | T.Range = TagHandle; |
1912 | setError(Twine("Unknown tag handle ") + TagHandle, T); |
1913 | } |
1914 | Ret += Raw.substr(Raw.find_last_of('!') + 1); |
1915 | return Ret; |
1916 | } |
1917 | } |
1918 | |
1919 | switch (getType()) { |
1920 | case NK_Null: |
1921 | return "tag:yaml.org,2002:null"; |
1922 | case NK_Scalar: |
1923 | case NK_BlockScalar: |
1924 | |
1925 | return "tag:yaml.org,2002:str"; |
1926 | case NK_Mapping: |
1927 | return "tag:yaml.org,2002:map"; |
1928 | case NK_Sequence: |
1929 | return "tag:yaml.org,2002:seq"; |
1930 | } |
1931 | |
1932 | return ""; |
1933 | } |
1934 | |
1935 | Token &Node::peekNext() { |
1936 | return Doc->peekNext(); |
1937 | } |
1938 | |
1939 | Token Node::getNext() { |
1940 | return Doc->getNext(); |
1941 | } |
1942 | |
1943 | Node *Node::parseBlockNode() { |
1944 | return Doc->parseBlockNode(); |
1945 | } |
1946 | |
1947 | BumpPtrAllocator &Node::getAllocator() { |
1948 | return Doc->NodeAllocator; |
1949 | } |
1950 | |
1951 | void Node::setError(const Twine &Msg, Token &Tok) const { |
1952 | Doc->setError(Msg, Tok); |
1953 | } |
1954 | |
1955 | bool Node::failed() const { |
1956 | return Doc->failed(); |
1957 | } |
1958 | |
1959 | StringRef ScalarNode::getValue(SmallVectorImpl<char> &Storage) const { |
1960 | |
1961 | if (Value[0] == '"') { |
1962 | |
1963 | StringRef UnquotedValue = Value.substr(1, Value.size() - 2); |
1964 | |
1965 | StringRef::size_type i = UnquotedValue.find_first_of("\\\r\n"); |
1966 | if (i != StringRef::npos) |
1967 | return unescapeDoubleQuoted(UnquotedValue, i, Storage); |
1968 | return UnquotedValue; |
1969 | } else if (Value[0] == '\'') { |
1970 | |
1971 | StringRef UnquotedValue = Value.substr(1, Value.size() - 2); |
1972 | StringRef::size_type i = UnquotedValue.find('\''); |
1973 | if (i != StringRef::npos) { |
1974 | |
1975 | Storage.clear(); |
1976 | Storage.reserve(UnquotedValue.size()); |
1977 | for (; i != StringRef::npos; i = UnquotedValue.find('\'')) { |
1978 | StringRef Valid(UnquotedValue.begin(), i); |
1979 | llvm::append_range(Storage, Valid); |
1980 | Storage.push_back('\''); |
1981 | UnquotedValue = UnquotedValue.substr(i + 2); |
1982 | } |
1983 | llvm::append_range(Storage, UnquotedValue); |
1984 | return StringRef(Storage.begin(), Storage.size()); |
1985 | } |
1986 | return UnquotedValue; |
1987 | } |
1988 | |
1989 | return Value.rtrim(' '); |
1990 | } |
1991 | |
1992 | StringRef ScalarNode::unescapeDoubleQuoted( StringRef UnquotedValue |
1993 | , StringRef::size_type i |
1994 | , SmallVectorImpl<char> &Storage) |
1995 | const { |
1996 | |
1997 | Storage.clear(); |
1998 | Storage.reserve(UnquotedValue.size()); |
1999 | for (; i != StringRef::npos; i = UnquotedValue.find_first_of("\\\r\n")) { |
2000 | |
2001 | StringRef Valid(UnquotedValue.begin(), i); |
2002 | llvm::append_range(Storage, Valid); |
2003 | |
2004 | UnquotedValue = UnquotedValue.substr(i); |
2005 | |
2006 | assert(!UnquotedValue.empty() && "Can't be empty!"); |
2007 | |
2008 | |
2009 | switch (UnquotedValue[0]) { |
2010 | case '\r': |
2011 | case '\n': |
2012 | Storage.push_back('\n'); |
2013 | if ( UnquotedValue.size() > 1 |
2014 | && (UnquotedValue[1] == '\r' || UnquotedValue[1] == '\n')) |
2015 | UnquotedValue = UnquotedValue.substr(1); |
2016 | UnquotedValue = UnquotedValue.substr(1); |
2017 | break; |
2018 | default: |
2019 | if (UnquotedValue.size() == 1) { |
2020 | Token T; |
2021 | T.Range = StringRef(UnquotedValue.begin(), 1); |
2022 | setError("Unrecognized escape code", T); |
2023 | return ""; |
2024 | } |
2025 | UnquotedValue = UnquotedValue.substr(1); |
2026 | switch (UnquotedValue[0]) { |
2027 | default: { |
2028 | Token T; |
2029 | T.Range = StringRef(UnquotedValue.begin(), 1); |
2030 | setError("Unrecognized escape code", T); |
2031 | return ""; |
2032 | } |
2033 | case '\r': |
2034 | case '\n': |
2035 | |
2036 | if ( UnquotedValue.size() > 1 |
2037 | && (UnquotedValue[1] == '\r' || UnquotedValue[1] == '\n')) |
2038 | UnquotedValue = UnquotedValue.substr(1); |
2039 | |
2040 | |
2041 | break; |
2042 | case '0': |
2043 | Storage.push_back(0x00); |
2044 | break; |
2045 | case 'a': |
2046 | Storage.push_back(0x07); |
2047 | break; |
2048 | case 'b': |
2049 | Storage.push_back(0x08); |
2050 | break; |
2051 | case 't': |
2052 | case 0x09: |
2053 | Storage.push_back(0x09); |
2054 | break; |
2055 | case 'n': |
2056 | Storage.push_back(0x0A); |
2057 | break; |
2058 | case 'v': |
2059 | Storage.push_back(0x0B); |
2060 | break; |
2061 | case 'f': |
2062 | Storage.push_back(0x0C); |
2063 | break; |
2064 | case 'r': |
2065 | Storage.push_back(0x0D); |
2066 | break; |
2067 | case 'e': |
2068 | Storage.push_back(0x1B); |
2069 | break; |
2070 | case ' ': |
2071 | Storage.push_back(0x20); |
2072 | break; |
2073 | case '"': |
2074 | Storage.push_back(0x22); |
2075 | break; |
2076 | case '/': |
2077 | Storage.push_back(0x2F); |
2078 | break; |
2079 | case '\\': |
2080 | Storage.push_back(0x5C); |
2081 | break; |
2082 | case 'N': |
2083 | encodeUTF8(0x85, Storage); |
2084 | break; |
2085 | case '_': |
2086 | encodeUTF8(0xA0, Storage); |
2087 | break; |
2088 | case 'L': |
2089 | encodeUTF8(0x2028, Storage); |
2090 | break; |
2091 | case 'P': |
2092 | encodeUTF8(0x2029, Storage); |
2093 | break; |
2094 | case 'x': { |
2095 | if (UnquotedValue.size() < 3) |
2096 | |
2097 | break; |
2098 | unsigned int UnicodeScalarValue; |
2099 | if (UnquotedValue.substr(1, 2).getAsInteger(16, UnicodeScalarValue)) |
2100 | |
2101 | UnicodeScalarValue = 0xFFFD; |
2102 | encodeUTF8(UnicodeScalarValue, Storage); |
2103 | UnquotedValue = UnquotedValue.substr(2); |
2104 | break; |
2105 | } |
2106 | case 'u': { |
2107 | if (UnquotedValue.size() < 5) |
2108 | |
2109 | break; |
2110 | unsigned int UnicodeScalarValue; |
2111 | if (UnquotedValue.substr(1, 4).getAsInteger(16, UnicodeScalarValue)) |
2112 | |
2113 | UnicodeScalarValue = 0xFFFD; |
2114 | encodeUTF8(UnicodeScalarValue, Storage); |
2115 | UnquotedValue = UnquotedValue.substr(4); |
2116 | break; |
2117 | } |
2118 | case 'U': { |
2119 | if (UnquotedValue.size() < 9) |
2120 | |
2121 | break; |
2122 | unsigned int UnicodeScalarValue; |
2123 | if (UnquotedValue.substr(1, 8).getAsInteger(16, UnicodeScalarValue)) |
2124 | |
2125 | UnicodeScalarValue = 0xFFFD; |
2126 | encodeUTF8(UnicodeScalarValue, Storage); |
2127 | UnquotedValue = UnquotedValue.substr(8); |
2128 | break; |
2129 | } |
2130 | } |
2131 | UnquotedValue = UnquotedValue.substr(1); |
2132 | } |
2133 | } |
2134 | llvm::append_range(Storage, UnquotedValue); |
2135 | return StringRef(Storage.begin(), Storage.size()); |
2136 | } |
2137 | |
2138 | Node *KeyValueNode::getKey() { |
2139 | if (Key) |
2140 | return Key; |
2141 | |
2142 | { |
2143 | Token &t = peekNext(); |
2144 | if ( t.Kind == Token::TK_BlockEnd |
2145 | || t.Kind == Token::TK_Value |
2146 | || t.Kind == Token::TK_Error) { |
2147 | return Key = new (getAllocator()) NullNode(Doc); |
2148 | } |
2149 | if (t.Kind == Token::TK_Key) |
2150 | getNext(); |
2151 | } |
2152 | |
2153 | |
2154 | Token &t = peekNext(); |
2155 | if (t.Kind == Token::TK_BlockEnd || t.Kind == Token::TK_Value) { |
2156 | return Key = new (getAllocator()) NullNode(Doc); |
2157 | } |
2158 | |
2159 | |
2160 | return Key = parseBlockNode(); |
2161 | } |
2162 | |
2163 | Node *KeyValueNode::getValue() { |
2164 | if (Value) |
2165 | return Value; |
2166 | |
2167 | if (Node* Key = getKey()) |
2168 | Key->skip(); |
2169 | else { |
2170 | setError("Null key in Key Value.", peekNext()); |
2171 | return Value = new (getAllocator()) NullNode(Doc); |
2172 | } |
2173 | |
2174 | if (failed()) |
2175 | return Value = new (getAllocator()) NullNode(Doc); |
2176 | |
2177 | |
2178 | { |
2179 | Token &t = peekNext(); |
2180 | if ( t.Kind == Token::TK_BlockEnd |
2181 | || t.Kind == Token::TK_FlowMappingEnd |
2182 | || t.Kind == Token::TK_Key |
2183 | || t.Kind == Token::TK_FlowEntry |
2184 | || t.Kind == Token::TK_Error) { |
2185 | return Value = new (getAllocator()) NullNode(Doc); |
2186 | } |
2187 | |
2188 | if (t.Kind != Token::TK_Value) { |
2189 | setError("Unexpected token in Key Value.", t); |
2190 | return Value = new (getAllocator()) NullNode(Doc); |
2191 | } |
2192 | getNext(); |
2193 | } |
2194 | |
2195 | |
2196 | Token &t = peekNext(); |
2197 | if (t.Kind == Token::TK_BlockEnd || t.Kind == Token::TK_Key) { |
2198 | return Value = new (getAllocator()) NullNode(Doc); |
2199 | } |
2200 | |
2201 | |
2202 | return Value = parseBlockNode(); |
2203 | } |
2204 | |
2205 | void MappingNode::increment() { |
2206 | if (failed()) { |
2207 | IsAtEnd = true; |
2208 | CurrentEntry = nullptr; |
2209 | return; |
2210 | } |
2211 | if (CurrentEntry) { |
2212 | CurrentEntry->skip(); |
2213 | if (Type == MT_Inline) { |
2214 | IsAtEnd = true; |
2215 | CurrentEntry = nullptr; |
2216 | return; |
2217 | } |
2218 | } |
2219 | Token T = peekNext(); |
2220 | if (T.Kind == Token::TK_Key || T.Kind == Token::TK_Scalar) { |
2221 | |
2222 | CurrentEntry = new (getAllocator()) KeyValueNode(Doc); |
2223 | } else if (Type == MT_Block) { |
2224 | switch (T.Kind) { |
2225 | case Token::TK_BlockEnd: |
2226 | getNext(); |
2227 | IsAtEnd = true; |
2228 | CurrentEntry = nullptr; |
2229 | break; |
2230 | default: |
2231 | setError("Unexpected token. Expected Key or Block End", T); |
2232 | LLVM_FALLTHROUGH; |
2233 | case Token::TK_Error: |
2234 | IsAtEnd = true; |
2235 | CurrentEntry = nullptr; |
2236 | } |
2237 | } else { |
2238 | switch (T.Kind) { |
2239 | case Token::TK_FlowEntry: |
2240 | |
2241 | getNext(); |
2242 | return increment(); |
2243 | case Token::TK_FlowMappingEnd: |
2244 | getNext(); |
2245 | LLVM_FALLTHROUGH; |
2246 | case Token::TK_Error: |
2247 | |
2248 | IsAtEnd = true; |
2249 | CurrentEntry = nullptr; |
2250 | break; |
2251 | default: |
2252 | setError( "Unexpected token. Expected Key, Flow Entry, or Flow " |
2253 | "Mapping End." |
2254 | , T); |
2255 | IsAtEnd = true; |
2256 | CurrentEntry = nullptr; |
2257 | } |
2258 | } |
2259 | } |
2260 | |
2261 | void SequenceNode::increment() { |
2262 | if (failed()) { |
2263 | IsAtEnd = true; |
2264 | CurrentEntry = nullptr; |
2265 | return; |
2266 | } |
2267 | if (CurrentEntry) |
2268 | CurrentEntry->skip(); |
2269 | Token T = peekNext(); |
2270 | if (SeqType == ST_Block) { |
2271 | switch (T.Kind) { |
2272 | case Token::TK_BlockEntry: |
2273 | getNext(); |
2274 | CurrentEntry = parseBlockNode(); |
2275 | if (!CurrentEntry) { |
2276 | IsAtEnd = true; |
2277 | CurrentEntry = nullptr; |
2278 | } |
2279 | break; |
2280 | case Token::TK_BlockEnd: |
2281 | getNext(); |
2282 | IsAtEnd = true; |
2283 | CurrentEntry = nullptr; |
2284 | break; |
2285 | default: |
2286 | setError( "Unexpected token. Expected Block Entry or Block End." |
2287 | , T); |
2288 | LLVM_FALLTHROUGH; |
2289 | case Token::TK_Error: |
2290 | IsAtEnd = true; |
2291 | CurrentEntry = nullptr; |
2292 | } |
2293 | } else if (SeqType == ST_Indentless) { |
2294 | switch (T.Kind) { |
2295 | case Token::TK_BlockEntry: |
2296 | getNext(); |
2297 | CurrentEntry = parseBlockNode(); |
2298 | if (!CurrentEntry) { |
2299 | IsAtEnd = true; |
2300 | CurrentEntry = nullptr; |
2301 | } |
2302 | break; |
2303 | default: |
2304 | case Token::TK_Error: |
2305 | IsAtEnd = true; |
2306 | CurrentEntry = nullptr; |
2307 | } |
2308 | } else if (SeqType == ST_Flow) { |
2309 | switch (T.Kind) { |
2310 | case Token::TK_FlowEntry: |
2311 | |
2312 | getNext(); |
2313 | WasPreviousTokenFlowEntry = true; |
2314 | return increment(); |
2315 | case Token::TK_FlowSequenceEnd: |
2316 | getNext(); |
2317 | LLVM_FALLTHROUGH; |
2318 | case Token::TK_Error: |
2319 | |
2320 | IsAtEnd = true; |
2321 | CurrentEntry = nullptr; |
2322 | break; |
2323 | case Token::TK_StreamEnd: |
2324 | case Token::TK_DocumentEnd: |
2325 | case Token::TK_DocumentStart: |
2326 | setError("Could not find closing ]!", T); |
2327 | |
2328 | IsAtEnd = true; |
2329 | CurrentEntry = nullptr; |
2330 | break; |
2331 | default: |
2332 | if (!WasPreviousTokenFlowEntry) { |
2333 | setError("Expected , between entries!", T); |
2334 | IsAtEnd = true; |
2335 | CurrentEntry = nullptr; |
2336 | break; |
2337 | } |
2338 | |
2339 | CurrentEntry = parseBlockNode(); |
2340 | if (!CurrentEntry) { |
2341 | IsAtEnd = true; |
2342 | } |
2343 | WasPreviousTokenFlowEntry = false; |
2344 | break; |
2345 | } |
2346 | } |
2347 | } |
2348 | |
2349 | Document::Document(Stream &S) : stream(S), Root(nullptr) { |
2350 | |
2351 | TagMap["!"] = "!"; |
2352 | TagMap["!!"] = "tag:yaml.org,2002:"; |
2353 | |
2354 | if (parseDirectives()) |
2355 | expectToken(Token::TK_DocumentStart); |
2356 | Token &T = peekNext(); |
2357 | if (T.Kind == Token::TK_DocumentStart) |
2358 | getNext(); |
2359 | } |
2360 | |
2361 | bool Document::skip() { |
2362 | if (stream.scanner->failed()) |
2363 | return false; |
2364 | if (!Root && !getRoot()) |
2365 | return false; |
2366 | Root->skip(); |
2367 | Token &T = peekNext(); |
2368 | if (T.Kind == Token::TK_StreamEnd) |
2369 | return false; |
2370 | if (T.Kind == Token::TK_DocumentEnd) { |
2371 | getNext(); |
2372 | return skip(); |
2373 | } |
2374 | return true; |
2375 | } |
2376 | |
2377 | Token &Document::peekNext() { |
2378 | return stream.scanner->peekNext(); |
2379 | } |
2380 | |
2381 | Token Document::getNext() { |
2382 | return stream.scanner->getNext(); |
2383 | } |
2384 | |
2385 | void Document::setError(const Twine &Message, Token &Location) const { |
2386 | stream.scanner->setError(Message, Location.Range.begin()); |
2387 | } |
2388 | |
2389 | bool Document::failed() const { |
2390 | return stream.scanner->failed(); |
2391 | } |
2392 | |
2393 | Node *Document::parseBlockNode() { |
2394 | Token T = peekNext(); |
2395 | |
2396 | Token AnchorInfo; |
2397 | Token TagInfo; |
2398 | parse_property: |
2399 | switch (T.Kind) { |
2400 | case Token::TK_Alias: |
2401 | getNext(); |
2402 | return new (NodeAllocator) AliasNode(stream.CurrentDoc, T.Range.substr(1)); |
2403 | case Token::TK_Anchor: |
2404 | if (AnchorInfo.Kind == Token::TK_Anchor) { |
2405 | setError("Already encountered an anchor for this node!", T); |
2406 | return nullptr; |
2407 | } |
2408 | AnchorInfo = getNext(); |
2409 | T = peekNext(); |
2410 | goto parse_property; |
2411 | case Token::TK_Tag: |
2412 | if (TagInfo.Kind == Token::TK_Tag) { |
2413 | setError("Already encountered a tag for this node!", T); |
2414 | return nullptr; |
2415 | } |
2416 | TagInfo = getNext(); |
2417 | T = peekNext(); |
2418 | goto parse_property; |
2419 | default: |
2420 | break; |
2421 | } |
2422 | |
2423 | switch (T.Kind) { |
2424 | case Token::TK_BlockEntry: |
2425 | |
2426 | |
2427 | |
2428 | return new (NodeAllocator) SequenceNode( stream.CurrentDoc |
2429 | , AnchorInfo.Range.substr(1) |
2430 | , TagInfo.Range |
2431 | , SequenceNode::ST_Indentless); |
2432 | case Token::TK_BlockSequenceStart: |
2433 | getNext(); |
2434 | return new (NodeAllocator) |
2435 | SequenceNode( stream.CurrentDoc |
2436 | , AnchorInfo.Range.substr(1) |
2437 | , TagInfo.Range |
2438 | , SequenceNode::ST_Block); |
2439 | case Token::TK_BlockMappingStart: |
2440 | getNext(); |
2441 | return new (NodeAllocator) |
2442 | MappingNode( stream.CurrentDoc |
2443 | , AnchorInfo.Range.substr(1) |
2444 | , TagInfo.Range |
2445 | , MappingNode::MT_Block); |
2446 | case Token::TK_FlowSequenceStart: |
2447 | getNext(); |
2448 | return new (NodeAllocator) |
2449 | SequenceNode( stream.CurrentDoc |
2450 | , AnchorInfo.Range.substr(1) |
2451 | , TagInfo.Range |
2452 | , SequenceNode::ST_Flow); |
2453 | case Token::TK_FlowMappingStart: |
2454 | getNext(); |
2455 | return new (NodeAllocator) |
2456 | MappingNode( stream.CurrentDoc |
2457 | , AnchorInfo.Range.substr(1) |
2458 | , TagInfo.Range |
2459 | , MappingNode::MT_Flow); |
2460 | case Token::TK_Scalar: |
2461 | getNext(); |
2462 | return new (NodeAllocator) |
2463 | ScalarNode( stream.CurrentDoc |
2464 | , AnchorInfo.Range.substr(1) |
2465 | , TagInfo.Range |
2466 | , T.Range); |
2467 | case Token::TK_BlockScalar: { |
2468 | getNext(); |
2469 | StringRef NullTerminatedStr(T.Value.c_str(), T.Value.length() + 1); |
2470 | StringRef StrCopy = NullTerminatedStr.copy(NodeAllocator).drop_back(); |
2471 | return new (NodeAllocator) |
2472 | BlockScalarNode(stream.CurrentDoc, AnchorInfo.Range.substr(1), |
2473 | TagInfo.Range, StrCopy, T.Range); |
2474 | } |
2475 | case Token::TK_Key: |
2476 | |
2477 | return new (NodeAllocator) |
2478 | MappingNode( stream.CurrentDoc |
2479 | , AnchorInfo.Range.substr(1) |
2480 | , TagInfo.Range |
2481 | , MappingNode::MT_Inline); |
2482 | case Token::TK_DocumentStart: |
2483 | case Token::TK_DocumentEnd: |
2484 | case Token::TK_StreamEnd: |
2485 | default: |
2486 | |
2487 | |
2488 | return new (NodeAllocator) NullNode(stream.CurrentDoc); |
2489 | case Token::TK_FlowMappingEnd: |
2490 | case Token::TK_FlowSequenceEnd: |
2491 | case Token::TK_FlowEntry: { |
2492 | if (Root && (isa<MappingNode>(Root) || isa<SequenceNode>(Root))) |
2493 | return new (NodeAllocator) NullNode(stream.CurrentDoc); |
2494 | |
2495 | setError("Unexpected token", T); |
2496 | return nullptr; |
2497 | } |
2498 | case Token::TK_Error: |
2499 | return nullptr; |
2500 | } |
2501 | llvm_unreachable("Control flow shouldn't reach here."); |
2502 | return nullptr; |
2503 | } |
2504 | |
2505 | bool Document::parseDirectives() { |
2506 | bool isDirective = false; |
2507 | while (true) { |
2508 | Token T = peekNext(); |
2509 | if (T.Kind == Token::TK_TagDirective) { |
2510 | parseTAGDirective(); |
2511 | isDirective = true; |
2512 | } else if (T.Kind == Token::TK_VersionDirective) { |
2513 | parseYAMLDirective(); |
2514 | isDirective = true; |
2515 | } else |
2516 | break; |
2517 | } |
2518 | return isDirective; |
2519 | } |
2520 | |
2521 | void Document::parseYAMLDirective() { |
2522 | getNext(); |
2523 | } |
2524 | |
2525 | void Document::parseTAGDirective() { |
2526 | Token Tag = getNext(); |
2527 | StringRef T = Tag.Range; |
2528 | |
2529 | T = T.substr(T.find_first_of(" \t")).ltrim(" \t"); |
2530 | std::size_t HandleEnd = T.find_first_of(" \t"); |
2531 | StringRef TagHandle = T.substr(0, HandleEnd); |
2532 | StringRef TagPrefix = T.substr(HandleEnd).ltrim(" \t"); |
2533 | TagMap[TagHandle] = TagPrefix; |
2534 | } |
2535 | |
2536 | bool Document::expectToken(int TK) { |
2537 | Token T = getNext(); |
2538 | if (T.Kind != TK) { |
2539 | setError("Unexpected token", T); |
2540 | return false; |
2541 | } |
2542 | return true; |
2543 | } |