1//===- Preprocessor.cpp - C Language Family Preprocessor Implementation ---===//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
9// This file implements the Preprocessor interface.
13// Options to support:
14// -H - Print the name of each header file used.
15// -d[DNI] - Dump various things.
16// -fworking-directory - #line's with preprocessor's working dir.
17// -fpreprocessed
18// -dependency-file,-M,-MM,-MF,-MG,-MP,-MT,-MQ,-MD,-MMD
19// -W*
20// -w
22// Messages to emit:
23// "Multiple include guards may be useful for:\n"
27#include "clang/Lex/Preprocessor.h"
28#include "clang/Basic/Builtins.h"
29#include "clang/Basic/FileManager.h"
30#include "clang/Basic/FileSystemStatCache.h"
31#include "clang/Basic/IdentifierTable.h"
32#include "clang/Basic/LLVM.h"
33#include "clang/Basic/LangOptions.h"
34#include "clang/Basic/Module.h"
35#include "clang/Basic/SourceLocation.h"
36#include "clang/Basic/SourceManager.h"
37#include "clang/Basic/TargetInfo.h"
38#include "clang/Lex/CodeCompletionHandler.h"
39#include "clang/Lex/ExternalPreprocessorSource.h"
40#include "clang/Lex/HeaderSearch.h"
41#include "clang/Lex/LexDiagnostic.h"
42#include "clang/Lex/Lexer.h"
43#include "clang/Lex/LiteralSupport.h"
44#include "clang/Lex/MacroArgs.h"
45#include "clang/Lex/MacroInfo.h"
46#include "clang/Lex/ModuleLoader.h"
47#include "clang/Lex/Pragma.h"
48#include "clang/Lex/PreprocessingRecord.h"
49#include "clang/Lex/PreprocessorLexer.h"
50#include "clang/Lex/PreprocessorOptions.h"
51#include "clang/Lex/ScratchBuffer.h"
52#include "clang/Lex/Token.h"
53#include "clang/Lex/TokenLexer.h"
54#include "llvm/ADT/APInt.h"
55#include "llvm/ADT/ArrayRef.h"
56#include "llvm/ADT/DenseMap.h"
57#include "llvm/ADT/STLExtras.h"
58#include "llvm/ADT/SmallString.h"
59#include "llvm/ADT/SmallVector.h"
60#include "llvm/ADT/StringRef.h"
61#include "llvm/ADT/StringSwitch.h"
62#include "llvm/Support/Capacity.h"
63#include "llvm/Support/ErrorHandling.h"
64#include "llvm/Support/MemoryBuffer.h"
65#include "llvm/Support/raw_ostream.h"
66#include <algorithm>
67#include <cassert>
68#include <memory>
69#include <string>
70#include <utility>
71#include <vector>
73using namespace clang;
77ExternalPreprocessorSource::~ExternalPreprocessorSource() = default;
79Preprocessor::Preprocessor(std::shared_ptr<PreprocessorOptions> PPOpts,
80 DiagnosticsEngine &diags, LangOptions &opts,
81 SourceManager &SM, HeaderSearch &Headers,
82 ModuleLoader &TheModuleLoader,
83 IdentifierInfoLookup *IILookup, bool OwnsHeaders,
84 TranslationUnitKind TUKind)
85 : PPOpts(std::move(PPOpts)), Diags(&diags), LangOpts(opts),
86 FileMgr(Headers.getFileMgr()), SourceMgr(SM),
87 ScratchBuf(new ScratchBuffer(SourceMgr)), HeaderInfo(Headers),
88 TheModuleLoader(TheModuleLoader), ExternalSource(nullptr),
89 // As the language options may have not been loaded yet (when
90 // deserializing an ASTUnit), adding keywords to the identifier table is
91 // deferred to Preprocessor::Initialize().
92 Identifiers(IILookup), PragmaHandlers(new PragmaNamespace(StringRef())),
93 TUKind(TUKind), SkipMainFilePreamble(0, true),
94 CurSubmoduleState(&NullSubmoduleState) {
95 OwnsHeaderSearch = OwnsHeaders;
97 // Default to discarding comments.
98 KeepComments = false;
99 KeepMacroComments = false;
100 SuppressIncludeNotFoundError = false;
102 // Macro expansion is enabled.
103 DisableMacroExpansion = false;
104 MacroExpansionInDirectivesOverride = false;
105 InMacroArgs = false;
106 ArgMacro = nullptr;
107 InMacroArgPreExpansion = false;
108 NumCachedTokenLexers = 0;
109 PragmasEnabled = true;
110 ParsingIfOrElifDirective = false;
111 PreprocessedOutput = false;
113 // We haven't read anything from the external source.
114 ReadMacrosFromExternalSource = false;
116 BuiltinInfo = std::make_unique<Builtin::Context>();
118 // "Poison" __VA_ARGS__, __VA_OPT__ which can only appear in the expansion of
119 // a macro. They get unpoisoned where it is allowed.
120 (Ident__VA_ARGS__ = getIdentifierInfo("__VA_ARGS__"))->setIsPoisoned();
121 SetPoisonReason(Ident__VA_ARGS__,diag::ext_pp_bad_vaargs_use);
122 (Ident__VA_OPT__ = getIdentifierInfo("__VA_OPT__"))->setIsPoisoned();
123 SetPoisonReason(Ident__VA_OPT__,diag::ext_pp_bad_vaopt_use);
125 // Initialize the pragma handlers.
126 RegisterBuiltinPragmas();
128 // Initialize builtin macros like __LINE__ and friends.
129 RegisterBuiltinMacros();
131 if(LangOpts.Borland) {
132 Ident__exception_info = getIdentifierInfo("_exception_info");
133 Ident___exception_info = getIdentifierInfo("__exception_info");
134 Ident_GetExceptionInfo = getIdentifierInfo("GetExceptionInformation");
135 Ident__exception_code = getIdentifierInfo("_exception_code");
136 Ident___exception_code = getIdentifierInfo("__exception_code");
137 Ident_GetExceptionCode = getIdentifierInfo("GetExceptionCode");
138 Ident__abnormal_termination = getIdentifierInfo("_abnormal_termination");
139 Ident___abnormal_termination = getIdentifierInfo("__abnormal_termination");
140 Ident_AbnormalTermination = getIdentifierInfo("AbnormalTermination");
141 } else {
142 Ident__exception_info = Ident__exception_code = nullptr;
143 Ident__abnormal_termination = Ident___exception_info = nullptr;
144 Ident___exception_code = Ident___abnormal_termination = nullptr;
145 Ident_GetExceptionInfo = Ident_GetExceptionCode = nullptr;
146 Ident_AbnormalTermination = nullptr;
147 }
149 // If using a PCH where a #pragma hdrstop is expected, start skipping tokens.
150 if (usingPCHWithPragmaHdrStop())
151 SkippingUntilPragmaHdrStop = true;
153 // If using a PCH with a through header, start skipping tokens.
154 if (!this->PPOpts->PCHThroughHeader.empty() &&
155 !this->PPOpts->ImplicitPCHInclude.empty())
156 SkippingUntilPCHThroughHeader = true;
158 if (this->PPOpts->GeneratePreamble)
159 PreambleConditionalStack.startRecording();
161 ExcludedConditionalDirectiveSkipMappings =
162 this->PPOpts->ExcludedConditionalDirectiveSkipMappings;
163 if (ExcludedConditionalDirectiveSkipMappings)
164 ExcludedConditionalDirectiveSkipMappings->clear();
166 MaxTokens = LangOpts.MaxTokens;
169Preprocessor::~Preprocessor() {
170 assert(BacktrackPositions.empty() && "EnableBacktrack/Backtrack imbalance!")((void)0);
172 IncludeMacroStack.clear();
174 // Destroy any macro definitions.
175 while (MacroInfoChain *I = MIChainHead) {
176 MIChainHead = I->Next;
177 I->~MacroInfoChain();
178 }
180 // Free any cached macro expanders.
181 // This populates MacroArgCache, so all TokenLexers need to be destroyed
182 // before the code below that frees up the MacroArgCache list.
183 std::fill(TokenLexerCache, TokenLexerCache + NumCachedTokenLexers, nullptr);
184 CurTokenLexer.reset();
186 // Free any cached MacroArgs.
187 for (MacroArgs *ArgList = MacroArgCache; ArgList;)
188 ArgList = ArgList->deallocate();
190 // Delete the header search info, if we own it.
191 if (OwnsHeaderSearch)
192 delete &HeaderInfo;
195void Preprocessor::Initialize(const TargetInfo &Target,
196 const TargetInfo *AuxTarget) {
197 assert((!this->Target || this->Target == &Target) &&((void)0)
198 "Invalid override of target information")((void)0);
199 this->Target = &Target;
201 assert((!this->AuxTarget || this->AuxTarget == AuxTarget) &&((void)0)
202 "Invalid override of aux target information.")((void)0);
203 this->AuxTarget = AuxTarget;
205 // Initialize information about built-ins.
206 BuiltinInfo->InitializeTarget(Target, AuxTarget);
207 HeaderInfo.setTarget(Target);
209 // Populate the identifier table with info about keywords for the current language.
210 Identifiers.AddKeywords(LangOpts);
213void Preprocessor::InitializeForModelFile() {
214 NumEnteredSourceFiles = 0;
216 // Reset pragmas
217 PragmaHandlersBackup = std::move(PragmaHandlers);
218 PragmaHandlers = std::make_unique<PragmaNamespace>(StringRef());
219 RegisterBuiltinPragmas();
221 // Reset PredefinesFileID
222 PredefinesFileID = FileID();
225void Preprocessor::FinalizeForModelFile() {
226 NumEnteredSourceFiles = 1;
228 PragmaHandlers = std::move(PragmaHandlersBackup);
231void Preprocessor::DumpToken(const Token &Tok, bool DumpFlags) const {
232 llvm::errs() << tok::getTokenName(Tok.getKind()) << " '"
233 << getSpelling(Tok) << "'";
235 if (!DumpFlags) return;
237 llvm::errs() << "\t";
238 if (Tok.isAtStartOfLine())
239 llvm::errs() << " [StartOfLine]";
240 if (Tok.hasLeadingSpace())
241 llvm::errs() << " [LeadingSpace]";
242 if (Tok.isExpandDisabled())
243 llvm::errs() << " [ExpandDisabled]";
244 if (Tok.needsCleaning()) {
245 const char *Start = SourceMgr.getCharacterData(Tok.getLocation());
246 llvm::errs() << " [UnClean='" << StringRef(Start, Tok.getLength())
247 << "']";
248 }
250 llvm::errs() << "\tLoc=<";
251 DumpLocation(Tok.getLocation());
252 llvm::errs() << ">";
255void Preprocessor::DumpLocation(SourceLocation Loc) const {
256 Loc.print(llvm::errs(), SourceMgr);
259void Preprocessor::DumpMacro(const MacroInfo &MI) const {
260 llvm::errs() << "MACRO: ";
261 for (unsigned i = 0, e = MI.getNumTokens(); i != e; ++i) {
262 DumpToken(MI.getReplacementToken(i));
263 llvm::errs() << " ";
264 }
265 llvm::errs() << "\n";
268void Preprocessor::PrintStats() {
269 llvm::errs() << "\n*** Preprocessor Stats:\n";
270 llvm::errs() << NumDirectives << " directives found:\n";
271 llvm::errs() << " " << NumDefined << " #define.\n";
272 llvm::errs() << " " << NumUndefined << " #undef.\n";
273 llvm::errs() << " #include/#include_next/#import:\n";
274 llvm::errs() << " " << NumEnteredSourceFiles << " source files entered.\n";
275 llvm::errs() << " " << MaxIncludeStackDepth << " max include stack depth\n";
276 llvm::errs() << " " << NumIf << " #if/#ifndef/#ifdef.\n";
277 llvm::errs() << " " << NumElse << " #else/#elif/#elifdef/#elifndef.\n";
278 llvm::errs() << " " << NumEndif << " #endif.\n";
279 llvm::errs() << " " << NumPragma << " #pragma.\n";
280 llvm::errs() << NumSkipped << " #if/#ifndef#ifdef regions skipped\n";
282 llvm::errs() << NumMacroExpanded << "/" << NumFnMacroExpanded << "/"
283 << NumBuiltinMacroExpanded << " obj/fn/builtin macros expanded, "
284 << NumFastMacroExpanded << " on the fast path.\n";
285 llvm::errs() << (NumFastTokenPaste+NumTokenPaste)
286 << " token paste (##) operations performed, "
287 << NumFastTokenPaste << " on the fast path.\n";
289 llvm::errs() << "\nPreprocessor Memory: " << getTotalMemory() << "B total";
291 llvm::errs() << "\n BumpPtr: " << BP.getTotalMemory();
292 llvm::errs() << "\n Macro Expanded Tokens: "
293 << llvm::capacity_in_bytes(MacroExpandedTokens);
294 llvm::errs() << "\n Predefines Buffer: " << Predefines.capacity();
295 // FIXME: List information for all submodules.
296 llvm::errs() << "\n Macros: "
297 << llvm::capacity_in_bytes(CurSubmoduleState->Macros);
298 llvm::errs() << "\n #pragma push_macro Info: "
299 << llvm::capacity_in_bytes(PragmaPushMacroInfo);
300 llvm::errs() << "\n Poison Reasons: "
301 << llvm::capacity_in_bytes(PoisonReasons);
302 llvm::errs() << "\n Comment Handlers: "
303 << llvm::capacity_in_bytes(CommentHandlers) << "\n";
307Preprocessor::macro_begin(bool IncludeExternalMacros) const {
308 if (IncludeExternalMacros && ExternalSource &&
309 !ReadMacrosFromExternalSource) {
310 ReadMacrosFromExternalSource = true;
311 ExternalSource->ReadDefinedMacros();
312 }
314 // Make sure we cover all macros in visible modules.
315 for (const ModuleMacro &Macro : ModuleMacros)
316 CurSubmoduleState->Macros.insert(std::make_pair(Macro.II, MacroState()));
318 return CurSubmoduleState->Macros.begin();
321size_t Preprocessor::getTotalMemory() const {
322 return BP.getTotalMemory()
323 + llvm::capacity_in_bytes(MacroExpandedTokens)
324 + Predefines.capacity() /* Predefines buffer. */
325 // FIXME: Include sizes from all submodules, and include MacroInfo sizes,
326 // and ModuleMacros.
327 + llvm::capacity_in_bytes(CurSubmoduleState->Macros)
328 + llvm::capacity_in_bytes(PragmaPushMacroInfo)
329 + llvm::capacity_in_bytes(PoisonReasons)
330 + llvm::capacity_in_bytes(CommentHandlers);
334Preprocessor::macro_end(bool IncludeExternalMacros) const {
335 if (IncludeExternalMacros && ExternalSource &&
336 !ReadMacrosFromExternalSource) {
337 ReadMacrosFromExternalSource = true;
338 ExternalSource->ReadDefinedMacros();
339 }
341 return CurSubmoduleState->Macros.end();
344/// Compares macro tokens with a specified token value sequence.
345static bool MacroDefinitionEquals(const MacroInfo *MI,
346 ArrayRef<TokenValue> Tokens) {
347 return Tokens.size() == MI->getNumTokens() &&
348 std::equal(Tokens.begin(), Tokens.end(), MI->tokens_begin());
351StringRef Preprocessor::getLastMacroWithSpelling(
352 SourceLocation Loc,
353 ArrayRef<TokenValue> Tokens) const {
354 SourceLocation BestLocation;
355 StringRef BestSpelling;
356 for (Preprocessor::macro_iterator I = macro_begin(), E = macro_end();
357 I != E; ++I) {
358 const MacroDirective::DefInfo
359 Def = I->second.findDirectiveAtLoc(Loc, SourceMgr);
360 if (!Def || !Def.getMacroInfo())
361 continue;
362 if (!Def.getMacroInfo()->isObjectLike())
363 continue;
364 if (!MacroDefinitionEquals(Def.getMacroInfo(), Tokens))
365 continue;
366 SourceLocation Location = Def.getLocation();
367 // Choose the macro defined latest.
368 if (BestLocation.isInvalid() ||
369 (Location.isValid() &&
370 SourceMgr.isBeforeInTranslationUnit(BestLocation, Location))) {
371 BestLocation = Location;
372 BestSpelling = I->first->getName();
373 }
374 }
375 return BestSpelling;
378void Preprocessor::recomputeCurLexerKind() {
379 if (CurLexer)
380 CurLexerKind = CLK_Lexer;
381 else if (CurTokenLexer)
382 CurLexerKind = CLK_TokenLexer;
383 else
384 CurLexerKind = CLK_CachingLexer;
387bool Preprocessor::SetCodeCompletionPoint(const FileEntry *File,
388 unsigned CompleteLine,
389 unsigned CompleteColumn) {
390 assert(File)((void)0);
391 assert(CompleteLine && CompleteColumn && "Starts from 1:1")((void)0);
392 assert(!CodeCompletionFile && "Already set")((void)0);
394 // Load the actual file's contents.
395 Optional<llvm::MemoryBufferRef> Buffer =
396 SourceMgr.getMemoryBufferForFileOrNone(File);
397 if (!Buffer)
398 return true;
400 // Find the byte position of the truncation point.
401 const char *Position = Buffer->getBufferStart();
402 for (unsigned Line = 1; Line < CompleteLine; ++Line) {
403 for (; *Position; ++Position) {
404 if (*Position != '\r' && *Position != '\n')
405 continue;
407 // Eat \r\n or \n\r as a single line.
408 if ((Position[1] == '\r' || Position[1] == '\n') &&
409 Position[0] != Position[1])
410 ++Position;
411 ++Position;
412 break;
413 }
414 }
416 Position += CompleteColumn - 1;
418 // If pointing inside the preamble, adjust the position at the beginning of
419 // the file after the preamble.
420 if (SkipMainFilePreamble.first &&
421 SourceMgr.getFileEntryForID(SourceMgr.getMainFileID()) == File) {
422 if (Position - Buffer->getBufferStart() < SkipMainFilePreamble.first)
423 Position = Buffer->getBufferStart() + SkipMainFilePreamble.first;
424 }
426 if (Position > Buffer->getBufferEnd())
427 Position = Buffer->getBufferEnd();
429 CodeCompletionFile = File;
430 CodeCompletionOffset = Position - Buffer->getBufferStart();
432 auto NewBuffer = llvm::WritableMemoryBuffer::getNewUninitMemBuffer(
433 Buffer->getBufferSize() + 1, Buffer->getBufferIdentifier());
434 char *NewBuf = NewBuffer->getBufferStart();
435 char *NewPos = std::copy(Buffer->getBufferStart(), Position, NewBuf);
436 *NewPos = '\0';
437 std::copy(Position, Buffer->getBufferEnd(), NewPos+1);
438 SourceMgr.overrideFileContents(File, std::move(NewBuffer));
440 return false;
443void Preprocessor::CodeCompleteIncludedFile(llvm::StringRef Dir,
444 bool IsAngled) {
445 setCodeCompletionReached();
446 if (CodeComplete)
447 CodeComplete->CodeCompleteIncludedFile(Dir, IsAngled);
450void Preprocessor::CodeCompleteNaturalLanguage() {
451 setCodeCompletionReached();
452 if (CodeComplete)
453 CodeComplete->CodeCompleteNaturalLanguage();
456/// getSpelling - This method is used to get the spelling of a token into a
457/// SmallVector. Note that the returned StringRef may not point to the
458/// supplied buffer if a copy can be avoided.
459StringRef Preprocessor::getSpelling(const Token &Tok,
460 SmallVectorImpl<char> &Buffer,
461 bool *Invalid) const {
462 // NOTE: this has to be checked *before* testing for an IdentifierInfo.
463 if (Tok.isNot(tok::raw_identifier) && !Tok.hasUCN()) {
464 // Try the fast path.
465 if (const IdentifierInfo *II = Tok.getIdentifierInfo())
466 return II->getName();
467 }
469 // Resize the buffer if we need to copy into it.
470 if (Tok.needsCleaning())
471 Buffer.resize(Tok.getLength());
473 const char *Ptr = Buffer.data();
474 unsigned Len = getSpelling(Tok, Ptr, Invalid);
475 return StringRef(Ptr, Len);
478/// CreateString - Plop the specified string into a scratch buffer and return a
479/// location for it. If specified, the source location provides a source
480/// location for the token.
481void Preprocessor::CreateString(StringRef Str, Token &Tok,
482 SourceLocation ExpansionLocStart,
483 SourceLocation ExpansionLocEnd) {
484 Tok.setLength(Str.size());
486 const char *DestPtr;
487 SourceLocation Loc = ScratchBuf->getToken(Str.data(), Str.size(), DestPtr);
489 if (ExpansionLocStart.isValid())
490 Loc = SourceMgr.createExpansionLoc(Loc, ExpansionLocStart,
491 ExpansionLocEnd, Str.size());
492 Tok.setLocation(Loc);
494 // If this is a raw identifier or a literal token, set the pointer data.
495 if (Tok.is(tok::raw_identifier))
496 Tok.setRawIdentifierData(DestPtr);
497 else if (Tok.isLiteral())
498 Tok.setLiteralData(DestPtr);
501SourceLocation Preprocessor::SplitToken(SourceLocation Loc, unsigned Length) {
502 auto &SM = getSourceManager();
503 SourceLocation SpellingLoc = SM.getSpellingLoc(Loc);
504 std::pair<FileID, unsigned> LocInfo = SM.getDecomposedLoc(SpellingLoc);
505 bool Invalid = false;
506 StringRef Buffer = SM.getBufferData(LocInfo.first, &Invalid);
507 if (Invalid)
508 return SourceLocation();
510 // FIXME: We could consider re-using spelling for tokens we see repeatedly.
511 const char *DestPtr;
512 SourceLocation Spelling =
513 ScratchBuf->getToken(Buffer.data() + LocInfo.second, Length, DestPtr);
514 return SM.createTokenSplitLoc(Spelling, Loc, Loc.getLocWithOffset(Length));
517Module *Preprocessor::getCurrentModule() {
518 if (!getLangOpts().isCompilingModule())
519 return nullptr;
521 return getHeaderSearchInfo().lookupModule(getLangOpts().CurrentModule);
525// Preprocessor Initialization Methods
528/// EnterMainSourceFile - Enter the specified FileID as the main source file,
529/// which implicitly adds the builtin defines etc.
530void Preprocessor::EnterMainSourceFile() {
531 // We do not allow the preprocessor to reenter the main file. Doing so will
532 // cause FileID's to accumulate information from both runs (e.g. #line
533 // information) and predefined macros aren't guaranteed to be set properly.
534 assert(NumEnteredSourceFiles == 0 && "Cannot reenter the main file!")((void)0);
535 FileID MainFileID = SourceMgr.getMainFileID();
537 // If MainFileID is loaded it means we loaded an AST file, no need to enter
538 // a main file.
539 if (!SourceMgr.isLoadedFileID(MainFileID)) {
540 // Enter the main file source buffer.
541 EnterSourceFile(MainFileID, nullptr, SourceLocation());
543 // If we've been asked to skip bytes in the main file (e.g., as part of a
544 // precompiled preamble), do so now.
545 if (SkipMainFilePreamble.first > 0)
546 CurLexer->SetByteOffset(SkipMainFilePreamble.first,
547 SkipMainFilePreamble.second);
549 // Tell the header info that the main file was entered. If the file is later
550 // #imported, it won't be re-entered.
551 if (const FileEntry *FE = SourceMgr.getFileEntryForID(MainFileID))
552 HeaderInfo.IncrementIncludeCount(FE);
553 }
555 // Preprocess Predefines to populate the initial preprocessor state.
556 std::unique_ptr<llvm::MemoryBuffer> SB =
557 llvm::MemoryBuffer::getMemBufferCopy(Predefines, "<built-in>");
558 assert(SB && "Cannot create predefined source buffer")((void)0);
559 FileID FID = SourceMgr.createFileID(std::move(SB));
560 assert(FID.isValid() && "Could not create FileID for predefines?")((void)0);
561 setPredefinesFileID(FID);
563 // Start parsing the predefines.
564 EnterSourceFile(FID, nullptr, SourceLocation());
566 if (!PPOpts->PCHThroughHeader.empty()) {
567 // Lookup and save the FileID for the through header. If it isn't found
568 // in the search path, it's a fatal error.
569 const DirectoryLookup *CurDir;
570 Optional<FileEntryRef> File = LookupFile(
571 SourceLocation(), PPOpts->PCHThroughHeader,
572 /*isAngled=*/false, /*FromDir=*/nullptr, /*FromFile=*/nullptr, CurDir,
573 /*SearchPath=*/nullptr, /*RelativePath=*/nullptr,
574 /*SuggestedModule=*/nullptr, /*IsMapped=*/nullptr,
575 /*IsFrameworkFound=*/nullptr);
576 if (!File) {
577 Diag(SourceLocation(), diag::err_pp_through_header_not_found)
578 << PPOpts->PCHThroughHeader;
579 return;
580 }
581 setPCHThroughHeaderFileID(
582 SourceMgr.createFileID(*File, SourceLocation(), SrcMgr::C_User));
583 }
585 // Skip tokens from the Predefines and if needed the main file.
586 if ((usingPCHWithThroughHeader() && SkippingUntilPCHThroughHeader) ||
587 (usingPCHWithPragmaHdrStop() && SkippingUntilPragmaHdrStop))
588 SkipTokensWhileUsingPCH();
591void Preprocessor::setPCHThroughHeaderFileID(FileID FID) {
592 assert(PCHThroughHeaderFileID.isInvalid() &&((void)0)
593 "PCHThroughHeaderFileID already set!")((void)0);
594 PCHThroughHeaderFileID = FID;
597bool Preprocessor::isPCHThroughHeader(const FileEntry *FE) {
598 assert(PCHThroughHeaderFileID.isValid() &&((void)0)
599 "Invalid PCH through header FileID")((void)0);
600 return FE == SourceMgr.getFileEntryForID(PCHThroughHeaderFileID);
603bool Preprocessor::creatingPCHWithThroughHeader() {
604 return TUKind == TU_Prefix && !PPOpts->PCHThroughHeader.empty() &&
605 PCHThroughHeaderFileID.isValid();
608bool Preprocessor::usingPCHWithThroughHeader() {
609 return TUKind != TU_Prefix && !PPOpts->PCHThroughHeader.empty() &&
610 PCHThroughHeaderFileID.isValid();
613bool Preprocessor::creatingPCHWithPragmaHdrStop() {
614 return TUKind == TU_Prefix && PPOpts->PCHWithHdrStop;
617bool Preprocessor::usingPCHWithPragmaHdrStop() {
618 return TUKind != TU_Prefix && PPOpts->PCHWithHdrStop;
621/// Skip tokens until after the #include of the through header or
622/// until after a #pragma hdrstop is seen. Tokens in the predefines file
623/// and the main file may be skipped. If the end of the predefines file
624/// is reached, skipping continues into the main file. If the end of the
625/// main file is reached, it's a fatal error.
626void Preprocessor::SkipTokensWhileUsingPCH() {
627 bool ReachedMainFileEOF = false;
628 bool UsingPCHThroughHeader = SkippingUntilPCHThroughHeader;
629 bool UsingPragmaHdrStop = SkippingUntilPragmaHdrStop;
630 Token Tok;
631 while (true) {
632 bool InPredefines =
633 (CurLexer && CurLexer->getFileID() == getPredefinesFileID());
634 switch (CurLexerKind) {
635 case CLK_Lexer:
636 CurLexer->Lex(Tok);
637 break;
638 case CLK_TokenLexer:
639 CurTokenLexer->Lex(Tok);
640 break;
641 case CLK_CachingLexer:
642 CachingLex(Tok);
643 break;
644 case CLK_LexAfterModuleImport:
645 LexAfterModuleImport(Tok);
646 break;
647 }
648 if (Tok.is(tok::eof) && !InPredefines) {
649 ReachedMainFileEOF = true;
650 break;
651 }
652 if (UsingPCHThroughHeader && !SkippingUntilPCHThroughHeader)
653 break;
654 if (UsingPragmaHdrStop && !SkippingUntilPragmaHdrStop)
655 break;
656 }
657 if (ReachedMainFileEOF) {
658 if (UsingPCHThroughHeader)
659 Diag(SourceLocation(), diag::err_pp_through_header_not_seen)
660 << PPOpts->PCHThroughHeader << 1;
661 else if (!PPOpts->PCHWithHdrStopCreate)
662 Diag(SourceLocation(), diag::err_pp_pragma_hdrstop_not_seen);
663 }
666void Preprocessor::replayPreambleConditionalStack() {
667 // Restore the conditional stack from the preamble, if there is one.
668 if (PreambleConditionalStack.isReplaying()) {
669 assert(CurPPLexer &&((void)0)
670 "CurPPLexer is null when calling replayPreambleConditionalStack.")((void)0);
671 CurPPLexer->setConditionalLevels(PreambleConditionalStack.getStack());
672 PreambleConditionalStack.doneReplaying();
673 if (PreambleConditionalStack.reachedEOFWhileSkipping())
674 SkipExcludedConditionalBlock(
675 PreambleConditionalStack.SkipInfo->HashTokenLoc,
676 PreambleConditionalStack.SkipInfo->IfTokenLoc,
677 PreambleConditionalStack.SkipInfo->FoundNonSkipPortion,
678 PreambleConditionalStack.SkipInfo->FoundElse,
679 PreambleConditionalStack.SkipInfo->ElseLoc);
680 }
683void Preprocessor::EndSourceFile() {
684 // Notify the client that we reached the end of the source file.
685 if (Callbacks)
686 Callbacks->EndOfMainFile();
690// Lexer Event Handling.
693/// LookUpIdentifierInfo - Given a tok::raw_identifier token, look up the
694/// identifier information for the token and install it into the token,
695/// updating the token kind accordingly.
696IdentifierInfo *Preprocessor::LookUpIdentifierInfo(Token &Identifier) const {
697 assert(!Identifier.getRawIdentifier().empty() && "No raw identifier data!")((void)0);
699 // Look up this token, see if it is a macro, or if it is a language keyword.
700 IdentifierInfo *II;
701 if (!Identifier.needsCleaning() && !Identifier.hasUCN()) {
702 // No cleaning needed, just use the characters from the lexed buffer.
703 II = getIdentifierInfo(Identifier.getRawIdentifier());
704 } else {
705 // Cleaning needed, alloca a buffer, clean into it, then use the buffer.
706 SmallString<64> IdentifierBuffer;
707 StringRef CleanedStr = getSpelling(Identifier, IdentifierBuffer);
709 if (Identifier.hasUCN()) {
710 SmallString<64> UCNIdentifierBuffer;
711 expandUCNs(UCNIdentifierBuffer, CleanedStr);
712 II = getIdentifierInfo(UCNIdentifierBuffer);
713 } else {
714 II = getIdentifierInfo(CleanedStr);
715 }
716 }
718 // Update the token info (identifier info and appropriate token kind).
719 Identifier.setIdentifierInfo(II);
720 if (getLangOpts().MSVCCompat && II->isCPlusPlusOperatorKeyword() &&
721 getSourceManager().isInSystemHeader(Identifier.getLocation()))
722 Identifier.setKind(tok::identifier);
723 else
724 Identifier.setKind(II->getTokenID());
726 return II;
729void Preprocessor::SetPoisonReason(IdentifierInfo *II, unsigned DiagID) {
730 PoisonReasons[II] = DiagID;
733void Preprocessor::PoisonSEHIdentifiers(bool Poison) {
734 assert(Ident__exception_code && Ident__exception_info)((void)0);
735 assert(Ident___exception_code && Ident___exception_info)((void)0);
736 Ident__exception_code->setIsPoisoned(Poison);
737 Ident___exception_code->setIsPoisoned(Poison);
738 Ident_GetExceptionCode->setIsPoisoned(Poison);
739 Ident__exception_info->setIsPoisoned(Poison);
740 Ident___exception_info->setIsPoisoned(Poison);
741 Ident_GetExceptionInfo->setIsPoisoned(Poison);
742 Ident__abnormal_termination->setIsPoisoned(Poison);
743 Ident___abnormal_termination->setIsPoisoned(Poison);
744 Ident_AbnormalTermination->setIsPoisoned(Poison);
747void Preprocessor::HandlePoisonedIdentifier(Token & Identifier) {
748 assert(Identifier.getIdentifierInfo() &&((void)0)
749 "Can't handle identifiers without identifier info!")((void)0);
750 llvm::DenseMap<IdentifierInfo*,unsigned>::const_iterator it =
751 PoisonReasons.find(Identifier.getIdentifierInfo());
752 if(it == PoisonReasons.end())
753 Diag(Identifier, diag::err_pp_used_poisoned_id);
754 else
755 Diag(Identifier,it->second) << Identifier.getIdentifierInfo();
758/// Returns a diagnostic message kind for reporting a future keyword as
759/// appropriate for the identifier and specified language.
760static diag::kind getFutureCompatDiagKind(const IdentifierInfo &II,
761 const LangOptions &LangOpts) {
762 assert(II.isFutureCompatKeyword() && "diagnostic should not be needed")((void)0);
764 if (LangOpts.CPlusPlus)
765 return llvm::StringSwitch<diag::kind>(II.getName())
766#define CXX11_KEYWORD(NAME, FLAGS) \
767 .Case(#NAME, diag::warn_cxx11_keyword)
768#define CXX20_KEYWORD(NAME, FLAGS) \
769 .Case(#NAME, diag::warn_cxx20_keyword)
770#include "clang/Basic/TokenKinds.def"
771 // char8_t is not modeled as a CXX20_KEYWORD because it's not
772 // unconditionally enabled in C++20 mode. (It can be disabled
773 // by -fno-char8_t.)
774 .Case("char8_t", diag::warn_cxx20_keyword)
775 ;
777 llvm_unreachable(__builtin_unreachable()
778 "Keyword not known to come from a newer Standard or proposed Standard")__builtin_unreachable();
781void Preprocessor::updateOutOfDateIdentifier(IdentifierInfo &II) const {
782 assert(II.isOutOfDate() && "not out of date")((void)0);
783 getExternalSource()->updateOutOfDateIdentifier(II);
786/// HandleIdentifier - This callback is invoked when the lexer reads an
787/// identifier. This callback looks up the identifier in the map and/or
788/// potentially macro expands it or turns it into a named token (like 'for').
790/// Note that callers of this method are guarded by checking the
791/// IdentifierInfo's 'isHandleIdentifierCase' bit. If this method changes, the
792/// IdentifierInfo methods that compute these properties will need to change to
793/// match.
794bool Preprocessor::HandleIdentifier(Token &Identifier) {
795 assert(Identifier.getIdentifierInfo() &&((void)0)
796 "Can't handle identifiers without identifier info!")((void)0);
798 IdentifierInfo &II = *Identifier.getIdentifierInfo();
800 // If the information about this identifier is out of date, update it from
801 // the external source.
802 // We have to treat __VA_ARGS__ in a special way, since it gets
803 // serialized with isPoisoned = true, but our preprocessor may have
804 // unpoisoned it if we're defining a C99 macro.
805 if (II.isOutOfDate()) {
Assuming the condition is false
Taking false branch
806 bool CurrentIsPoisoned = false;
807 const bool IsSpecialVariadicMacro =
808 &II == Ident__VA_ARGS__ || &II == Ident__VA_OPT__;
809 if (IsSpecialVariadicMacro)
810 CurrentIsPoisoned = II.isPoisoned();
812 updateOutOfDateIdentifier(II);
813 Identifier.setKind(II.getTokenID());
815 if (IsSpecialVariadicMacro)
816 II.setIsPoisoned(CurrentIsPoisoned);
817 }
819 // If this identifier was poisoned, and if it was not produced from a macro
820 // expansion, emit an error.
821 if (II.isPoisoned() && CurPPLexer) {
Assuming the condition is false
822 HandlePoisonedIdentifier(Identifier);
823 }
825 // If this is a macro to be expanded, do it.
826 if (MacroDefinition MD = getMacroDefinition(&II)) {
Calling 'Preprocessor::getMacroDefinition'
827 auto *MI = MD.getMacroInfo();
828 assert(MI && "macro definition with no macro info?")((void)0);
829 if (!DisableMacroExpansion) {
830 if (!Identifier.isExpandDisabled() && MI->isEnabled()) {
831 // C99 6.10.3p10: If the preprocessing token immediately after the
832 // macro name isn't a '(', this macro should not be expanded.
833 if (!MI->isFunctionLike() || isNextPPTokenLParen())
834 return HandleMacroExpandedIdentifier(Identifier, MD);
835 } else {
836 // C99 says that a disabled macro may never again be
837 // expanded, even if it's in a context where it could be expanded in the
838 // future.
839 Identifier.setFlag(Token::DisableExpand);
840 if (MI->isObjectLike() || isNextPPTokenLParen())
841 Diag(Identifier, diag::pp_disabled_macro_expansion);
842 }
843 }
844 }
846 // If this identifier is a keyword in a newer Standard or proposed Standard,
847 // produce a warning. Don't warn if we're not considering macro expansion,
848 // since this identifier might be the name of a macro.
849 // FIXME: This warning is disabled in cases where it shouldn't be, like
850 // "#define constexpr constexpr", "int constexpr;"
851 if (II.isFutureCompatKeyword() && !DisableMacroExpansion) {
852 Diag(Identifier, getFutureCompatDiagKind(II, getLangOpts()))
853 << II.getName();
854 // Don't diagnose this keyword again in this translation unit.
855 II.setIsFutureCompatKeyword(false);
856 }
858 // If this is an extension token, diagnose its use.
859 // We avoid diagnosing tokens that originate from macro definitions.
860 // FIXME: This warning is disabled in cases where it shouldn't be,
861 // like "#define TY typeof", "TY(1) x".
862 if (II.isExtensionToken() && !DisableMacroExpansion)
863 Diag(Identifier, diag::ext_token_used);
865 // If this is the 'import' contextual keyword following an '@', note
866 // that the next token indicates a module name.
867 //
868 // Note that we do not treat 'import' as a contextual
869 // keyword when we're in a caching lexer, because caching lexers only get
870 // used in contexts where import declarations are disallowed.
871 //
872 // Likewise if this is the C++ Modules TS import keyword.
873 if (((LastTokenWasAt && II.isModulesImport()) ||
874 Identifier.is(tok::kw_import)) &&
875 !InMacroArgs && !DisableMacroExpansion &&
876 (getLangOpts().Modules || getLangOpts().DebuggerSupport) &&
877 CurLexerKind != CLK_CachingLexer) {
878 ModuleImportLoc = Identifier.getLocation();
879 ModuleImportPath.clear();
880 ModuleImportExpectsIdentifier = true;
881 CurLexerKind = CLK_LexAfterModuleImport;
882 }
883 return true;
886void Preprocessor::Lex(Token &Result) {
887 ++LexLevel;
889 // We loop here until a lex function returns a token; this avoids recursion.
890 bool ReturnedToken;
891 do {
892 switch (CurLexerKind) {
893 case CLK_Lexer:
894 ReturnedToken = CurLexer->Lex(Result);
895 break;
896 case CLK_TokenLexer:
897 ReturnedToken = CurTokenLexer->Lex(Result);
898 break;
899 case CLK_CachingLexer:
900 CachingLex(Result);
901 ReturnedToken = true;
902 break;
903 case CLK_LexAfterModuleImport:
904 ReturnedToken = LexAfterModuleImport(Result);
905 break;
906 }
907 } while (!ReturnedToken);
909 if (Result.is(tok::unknown) && TheModuleLoader.HadFatalFailure)
910 return;
912 if (Result.is(tok::code_completion) && Result.getIdentifierInfo()) {
913 // Remember the identifier before code completion token.
914 setCodeCompletionIdentifierInfo(Result.getIdentifierInfo());
915 setCodeCompletionTokenRange(Result.getLocation(), Result.getEndLoc());
916 // Set IdenfitierInfo to null to avoid confusing code that handles both
917 // identifiers and completion tokens.
918 Result.setIdentifierInfo(nullptr);
919 }
921 // Update ImportSeqState to track our position within a C++20 import-seq
922 // if this token is being produced as a result of phase 4 of translation.
923 if (getLangOpts().CPlusPlusModules && LexLevel == 1 &&
924 !Result.getFlag(Token::IsReinjected)) {
925 switch (Result.getKind()) {
926 case tok::l_paren: case tok::l_square: case tok::l_brace:
927 ImportSeqState.handleOpenBracket();
928 break;
929 case tok::r_paren: case tok::r_square:
930 ImportSeqState.handleCloseBracket();
931 break;
932 case tok::r_brace:
933 ImportSeqState.handleCloseBrace();
934 break;
935 case tok::semi:
936 ImportSeqState.handleSemi();
937 break;
938 case tok::header_name:
939 case tok::annot_header_unit:
940 ImportSeqState.handleHeaderName();
941 break;
942 case tok::kw_export:
943 ImportSeqState.handleExport();
944 break;
945 case tok::identifier:
946 if (Result.getIdentifierInfo()->isModulesImport()) {
947 ImportSeqState.handleImport();
948 if (ImportSeqState.afterImportSeq()) {
949 ModuleImportLoc = Result.getLocation();
950 ModuleImportPath.clear();
951 ModuleImportExpectsIdentifier = true;
952 CurLexerKind = CLK_LexAfterModuleImport;
953 }
954 break;
955 }
956 LLVM_FALLTHROUGH[[gnu::fallthrough]];
957 default:
958 ImportSeqState.handleMisc();
959 break;
960 }
961 }
963 LastTokenWasAt = Result.is(tok::at);
964 --LexLevel;
966 if ((LexLevel == 0 || PreprocessToken) &&
967 !Result.getFlag(Token::IsReinjected)) {
968 if (LexLevel == 0)
969 ++TokenCount;
970 if (OnToken)
971 OnToken(Result);
972 }
975/// Lex a header-name token (including one formed from header-name-tokens if
976/// \p AllowConcatenation is \c true).
978/// \param FilenameTok Filled in with the next token. On success, this will
979/// be either a header_name token. On failure, it will be whatever other
980/// token was found instead.
981/// \param AllowMacroExpansion If \c true, allow the header name to be formed
982/// by macro expansion (concatenating tokens as necessary if the first
983/// token is a '<').
984/// \return \c true if we reached EOD or EOF while looking for a > token in
985/// a concatenated header name and diagnosed it. \c false otherwise.
986bool Preprocessor::LexHeaderName(Token &FilenameTok, bool AllowMacroExpansion) {
987 // Lex using header-name tokenization rules if tokens are being lexed from
988 // a file. Just grab a token normally if we're in a macro expansion.
989 if (CurPPLexer)
990 CurPPLexer->LexIncludeFilename(FilenameTok);
991 else
992 Lex(FilenameTok);
994 // This could be a <foo/bar.h> file coming from a macro expansion. In this
995 // case, glue the tokens together into an angle_string_literal token.
996 SmallString<128> FilenameBuffer;
997 if (FilenameTok.is(tok::less) && AllowMacroExpansion) {
998 bool StartOfLine = FilenameTok.isAtStartOfLine();
999 bool LeadingSpace = FilenameTok.hasLeadingSpace();
1000 bool LeadingEmptyMacro = FilenameTok.hasLeadingEmptyMacro();
1002 SourceLocation Start = FilenameTok.getLocation();
1003 SourceLocation End;
1004 FilenameBuffer.push_back('<');
1006 // Consume tokens until we find a '>'.
1007 // FIXME: A header-name could be formed starting or ending with an
1008 // alternative token. It's not clear whether that's ill-formed in all
1009 // cases.
1010 while (FilenameTok.isNot(tok::greater)) {
1011 Lex(FilenameTok);
1012 if (FilenameTok.isOneOf(tok::eod, tok::eof)) {
1013 Diag(FilenameTok.getLocation(), diag::err_expected) << tok::greater;
1014 Diag(Start, diag::note_matching) << tok::less;
1015 return true;
1016 }
1018 End = FilenameTok.getLocation();
1020 // FIXME: Provide code completion for #includes.
1021 if (FilenameTok.is(tok::code_completion)) {
1022 setCodeCompletionReached();
1023 Lex(FilenameTok);
1024 continue;
1025 }
1027 // Append the spelling of this token to the buffer. If there was a space
1028 // before it, add it now.
1029 if (FilenameTok.hasLeadingSpace())
1030 FilenameBuffer.push_back(' ');
1032 // Get the spelling of the token, directly into FilenameBuffer if
1033 // possible.
1034 size_t PreAppendSize = FilenameBuffer.size();
1035 FilenameBuffer.resize(PreAppendSize + FilenameTok.getLength());
1037 const char *BufPtr = &FilenameBuffer[PreAppendSize];
1038 unsigned ActualLen = getSpelling(FilenameTok, BufPtr);
1040 // If the token was spelled somewhere else, copy it into FilenameBuffer.
1041 if (BufPtr != &FilenameBuffer[PreAppendSize])
1042 memcpy(&FilenameBuffer[PreAppendSize], BufPtr, ActualLen);
1044 // Resize FilenameBuffer to the correct size.
1045 if (FilenameTok.getLength() != ActualLen)
1046 FilenameBuffer.resize(PreAppendSize + ActualLen);
1047 }
1049 FilenameTok.startToken();
1050 FilenameTok.setKind(tok::header_name);
1051 FilenameTok.setFlagValue(Token::StartOfLine, StartOfLine);
1052 FilenameTok.setFlagValue(Token::LeadingSpace, LeadingSpace);
1053 FilenameTok.setFlagValue(Token::LeadingEmptyMacro, LeadingEmptyMacro);
1054 CreateString(FilenameBuffer, FilenameTok, Start, End);
1055 } else if (FilenameTok.is(tok::string_literal) && AllowMacroExpansion) {
1056 // Convert a string-literal token of the form " h-char-sequence "
1057 // (produced by macro expansion) into a header-name token.
1058 //
1059 // The rules for header-names don't quite match the rules for
1060 // string-literals, but all the places where they differ result in
1061 // undefined behavior, so we can and do treat them the same.
1062 //
1063 // A string-literal with a prefix or suffix is not translated into a
1064 // header-name. This could theoretically be observable via the C++20
1065 // context-sensitive header-name formation rules.
1066 StringRef Str = getSpelling(FilenameTok, FilenameBuffer);
1067 if (Str.size() >= 2 && Str.front() == '"' && Str.back() == '"')
1068 FilenameTok.setKind(tok::header_name);
1069 }
1071 return false;
1074/// Collect the tokens of a C++20 pp-import-suffix.
1075void Preprocessor::CollectPpImportSuffix(SmallVectorImpl<Token> &Toks) {
1076 // FIXME: For error recovery, consider recognizing attribute syntax here
1077 // and terminating / diagnosing a missing semicolon if we find anything
1078 // else? (Can we leave that to the parser?)
1079 unsigned BracketDepth = 0;
1080 while (true) {
1081 Toks.emplace_back();
1082 Lex(Toks.back());
1084 switch (Toks.back().getKind()) {
1085 case tok::l_paren: case tok::l_square: case tok::l_brace:
1086 ++BracketDepth;
1087 break;
1089 case tok::r_paren: case tok::r_square: case tok::r_brace:
1090 if (BracketDepth == 0)
1091 return;
1092 --BracketDepth;
1093 break;
1095 case tok::semi:
1096 if (BracketDepth == 0)
1097 return;
1098 break;
1100 case tok::eof:
1101 return;
1103 default:
1104 break;
1105 }
1106 }
1110/// Lex a token following the 'import' contextual keyword.
1112/// pp-import: [C++20]
1113/// import header-name pp-import-suffix[opt] ;
1114/// import header-name-tokens pp-import-suffix[opt] ;
1115/// [ObjC] @ import module-name ;
1116/// [Clang] import module-name ;
1118/// header-name-tokens:
1119/// string-literal
1120/// < [any sequence of preprocessing-tokens other than >] >
1122/// module-name:
1123/// module-name-qualifier[opt] identifier
1125/// module-name-qualifier
1126/// module-name-qualifier[opt] identifier .
1128/// We respond to a pp-import by importing macros from the named module.
1129bool Preprocessor::LexAfterModuleImport(Token &Result) {
1130 // Figure out what kind of lexer we actually have.
1131 recomputeCurLexerKind();
1133 // Lex the next token. The header-name lexing rules are used at the start of
1134 // a pp-import.
1135 //
1136 // For now, we only support header-name imports in C++20 mode.
1137 // FIXME: Should we allow this in all language modes that support an import
1138 // declaration as an extension?
1139 if (ModuleImportPath.empty() && getLangOpts().CPlusPlusModules) {
1140 if (LexHeaderName(Result))
1141 return true;
1142 } else {
1143 Lex(Result);
1144 }
1146 // Allocate a holding buffer for a sequence of tokens and introduce it into
1147 // the token stream.
1148 auto EnterTokens = [this](ArrayRef<Token> Toks) {
1149 auto ToksCopy = std::make_unique<Token[]>(Toks.size());
1150 std::copy(Toks.begin(), Toks.end(), ToksCopy.get());
1151 EnterTokenStream(std::move(ToksCopy), Toks.size(),
1152 /*DisableMacroExpansion*/ true, /*IsReinject*/ false);
1153 };
1155 // Check for a header-name.
1156 SmallVector<Token, 32> Suffix;
1157 if (Result.is(tok::header_name)) {
1158 // Enter the header-name token into the token stream; a Lex action cannot
1159 // both return a token and cache tokens (doing so would corrupt the token
1160 // cache if the call to Lex comes from CachingLex / PeekAhead).
1161 Suffix.push_back(Result);
1163 // Consume the pp-import-suffix and expand any macros in it now. We'll add
1164 // it back into the token stream later.
1165 CollectPpImportSuffix(Suffix);
1166 if (Suffix.back().isNot(tok::semi)) {
1167 // This is not a pp-import after all.
1168 EnterTokens(Suffix);
1169 return false;
1170 }
1172 // C++2a [cpp.module]p1:
1173 // The ';' preprocessing-token terminating a pp-import shall not have
1174 // been produced by macro replacement.
1175 SourceLocation SemiLoc = Suffix.back().getLocation();
1176 if (SemiLoc.isMacroID())
1177 Diag(SemiLoc, diag::err_header_import_semi_in_macro);
1179 // Reconstitute the import token.
1180 Token ImportTok;
1181 ImportTok.startToken();
1182 ImportTok.setKind(tok::kw_import);
1183 ImportTok.setLocation(ModuleImportLoc);
1184 ImportTok.setIdentifierInfo(getIdentifierInfo("import"));
1185 ImportTok.setLength(6);
1187 auto Action = HandleHeaderIncludeOrImport(
1188 /*HashLoc*/ SourceLocation(), ImportTok, Suffix.front(), SemiLoc);
1189 switch (Action.Kind) {
1190 case ImportAction::None:
1191 break;
1193 case ImportAction::ModuleBegin:
1194 // Let the parser know we're textually entering the module.
1195 Suffix.emplace_back();
1196 Suffix.back().startToken();
1197 Suffix.back().setKind(tok::annot_module_begin);
1198 Suffix.back().setLocation(SemiLoc);
1199 Suffix.back().setAnnotationEndLoc(SemiLoc);
1200 Suffix.back().setAnnotationValue(Action.ModuleForHeader);
1201 LLVM_FALLTHROUGH[[gnu::fallthrough]];
1203 case ImportAction::ModuleImport:
1204 case ImportAction::SkippedModuleImport:
1205 // We chose to import (or textually enter) the file. Convert the
1206 // header-name token into a header unit annotation token.
1207 Suffix[0].setKind(tok::annot_header_unit);
1208 Suffix[0].setAnnotationEndLoc(Suffix[0].getLocation());
1209 Suffix[0].setAnnotationValue(Action.ModuleForHeader);
1210 // FIXME: Call the moduleImport callback?
1211 break;
1212 case ImportAction::Failure:
1213 assert(TheModuleLoader.HadFatalFailure &&((void)0)
1214 "This should be an early exit only to a fatal error")((void)0);
1215 Result.setKind(tok::eof);
1216 CurLexer->cutOffLexing();
1217 EnterTokens(Suffix);
1218 return true;
1219 }
1221 EnterTokens(Suffix);
1222 return false;
1223 }
1225 // The token sequence
1226 //
1227 // import identifier (. identifier)*
1228 //
1229 // indicates a module import directive. We already saw the 'import'
1230 // contextual keyword, so now we're looking for the identifiers.
1231 if (ModuleImportExpectsIdentifier && Result.getKind() == tok::identifier) {
1232 // We expected to see an identifier here, and we did; continue handling
1233 // identifiers.
1234 ModuleImportPath.push_back(std::make_pair(Result.getIdentifierInfo(),
1235 Result.getLocation()));
1236 ModuleImportExpectsIdentifier = false;
1237 CurLexerKind = CLK_LexAfterModuleImport;
1238 return true;
1239 }
1241 // If we're expecting a '.' or a ';', and we got a '.', then wait until we
1242 // see the next identifier. (We can also see a '[[' that begins an
1243 // attribute-specifier-seq here under the C++ Modules TS.)
1244 if (!ModuleImportExpectsIdentifier && Result.getKind() == tok::period) {
1245 ModuleImportExpectsIdentifier = true;
1246 CurLexerKind = CLK_LexAfterModuleImport;
1247 return true;
1248 }
1250 // If we didn't recognize a module name at all, this is not a (valid) import.
1251 if (ModuleImportPath.empty() || Result.is(tok::eof))
1252 return true;
1254 // Consume the pp-import-suffix and expand any macros in it now, if we're not
1255 // at the semicolon already.
1256 SourceLocation SemiLoc = Result.getLocation();
1257 if (Result.isNot(tok::semi)) {
1258 Suffix.push_back(Result);
1259 CollectPpImportSuffix(Suffix);
1260 if (Suffix.back().isNot(tok::semi)) {
1261 // This is not an import after all.
1262 EnterTokens(Suffix);
1263 return false;
1264 }
1265 SemiLoc = Suffix.back().getLocation();
1266 }
1268 // Under the Modules TS, the dot is just part of the module name, and not
1269 // a real hierarchy separator. Flatten such module names now.
1270 //
1271 // FIXME: Is this the right level to be performing this transformation?
1272 std::string FlatModuleName;
1273 if (getLangOpts().ModulesTS || getLangOpts().CPlusPlusModules) {
1274 for (auto &Piece : ModuleImportPath) {
1275 if (!FlatModuleName.empty())
1276 FlatModuleName += ".";
1277 FlatModuleName += Piece.first->getName();
1278 }
1279 SourceLocation FirstPathLoc = ModuleImportPath[0].second;
1280 ModuleImportPath.clear();
1281 ModuleImportPath.push_back(
1282 std::make_pair(getIdentifierInfo(FlatModuleName), FirstPathLoc));
1283 }
1285 Module *Imported = nullptr;
1286 if (getLangOpts().Modules) {
1287 Imported = TheModuleLoader.loadModule(ModuleImportLoc,
1288 ModuleImportPath,
1289 Module::Hidden,
1290 /*IsInclusionDirective=*/false);
1291 if (Imported)
1292 makeModuleVisible(Imported, SemiLoc);
1293 }
1294 if (Callbacks)
1295 Callbacks->moduleImport(ModuleImportLoc, ModuleImportPath, Imported);
1297 if (!Suffix.empty()) {
1298 EnterTokens(Suffix);
1299 return false;
1300 }
1301 return true;
1304void Preprocessor::makeModuleVisible(Module *M, SourceLocation Loc) {
1305 CurSubmoduleState->VisibleModules.setVisible(
1306 M, Loc, [](Module *) {},
1307 [&](ArrayRef<Module *> Path, Module *Conflict, StringRef Message) {
1308 // FIXME: Include the path in the diagnostic.
1309 // FIXME: Include the import location for the conflicting module.
1310 Diag(ModuleImportLoc, diag::warn_module_conflict)
1311 << Path[0]->getFullModuleName()
1312 << Conflict->getFullModuleName()
1313 << Message;
1314 });
1316 // Add this module to the imports list of the currently-built submodule.
1317 if (!BuildingSubmoduleStack.empty() && M != BuildingSubmoduleStack.back().M)
1318 BuildingSubmoduleStack.back().M->Imports.insert(M);
1321bool Preprocessor::FinishLexStringLiteral(Token &Result, std::string &String,
1322 const char *DiagnosticTag,
1323 bool AllowMacroExpansion) {
1324 // We need at least one string literal.
1325 if (Result.isNot(tok::string_literal)) {
1326 Diag(Result, diag::err_expected_string_literal)
1327 << /*Source='in...'*/0 << DiagnosticTag;
1328 return false;
1329 }
1331 // Lex string literal tokens, optionally with macro expansion.
1332 SmallVector<Token, 4> StrToks;
1333 do {
1334 StrToks.push_back(Result);
1336 if (Result.hasUDSuffix())
1337 Diag(Result, diag::err_invalid_string_udl);
1339 if (AllowMacroExpansion)
1340 Lex(Result);
1341 else
1342 LexUnexpandedToken(Result);
1343 } while (Result.is(tok::string_literal));
1345 // Concatenate and parse the strings.
1346 StringLiteralParser Literal(StrToks, *this);
1347 assert(Literal.isAscii() && "Didn't allow wide strings in")((void)0);
1349 if (Literal.hadError)
1350 return false;
1352 if (Literal.Pascal) {
1353 Diag(StrToks[0].getLocation(), diag::err_expected_string_literal)
1354 << /*Source='in...'*/0 << DiagnosticTag;
1355 return false;
1356 }
1358 String = std::string(Literal.GetString());
1359 return true;
1362bool Preprocessor::parseSimpleIntegerLiteral(Token &Tok, uint64_t &Value) {
1363 assert(Tok.is(tok::numeric_constant))((void)0);
1364 SmallString<8> IntegerBuffer;
1365 bool NumberInvalid = false;
1366 StringRef Spelling = getSpelling(Tok, IntegerBuffer, &NumberInvalid);
1367 if (NumberInvalid)
1368 return false;
1369 NumericLiteralParser Literal(Spelling, Tok.getLocation(), getSourceManager(),
1370 getLangOpts(), getTargetInfo(),
1371 getDiagnostics());
1372 if (Literal.hadError || !Literal.isIntegerLiteral() || Literal.hasUDSuffix())
1373 return false;
1374 llvm::APInt APVal(64, 0);
1375 if (Literal.GetIntegerValue(APVal))
1376 return false;
1377 Lex(Tok);
1378 Value = APVal.getLimitedValue();
1379 return true;
1382void Preprocessor::addCommentHandler(CommentHandler *Handler) {
1383 assert(Handler && "NULL comment handler")((void)0);
1384 assert(llvm::find(CommentHandlers, Handler) == CommentHandlers.end() &&((void)0)
1385 "Comment handler already registered")((void)0);
1386 CommentHandlers.push_back(Handler);
1389void Preprocessor::removeCommentHandler(CommentHandler *Handler) {
1390 std::vector<CommentHandler *>::iterator Pos =
1391 llvm::find(CommentHandlers, Handler);
1392 assert(Pos != CommentHandlers.end() && "Comment handler not registered")((void)0);
1393 CommentHandlers.erase(Pos);
1396bool Preprocessor::HandleComment(Token &result, SourceRange Comment) {
1397 bool AnyPendingTokens = false;
1398 for (std::vector<CommentHandler *>::iterator H = CommentHandlers.begin(),
1399 HEnd = CommentHandlers.end();
1400 H != HEnd; ++H) {
1401 if ((*H)->HandleComment(*this, Comment))
1402 AnyPendingTokens = true;
1403 }
1404 if (!AnyPendingTokens || getCommentRetentionState())
1405 return false;
1406 Lex(result);
1407 return true;
1410ModuleLoader::~ModuleLoader() = default;
1412CommentHandler::~CommentHandler() = default;
1414EmptylineHandler::~EmptylineHandler() = default;
1416CodeCompletionHandler::~CodeCompletionHandler() = default;
1418void Preprocessor::createPreprocessingRecord() {
1419 if (Record)
1420 return;
1422 Record = new PreprocessingRecord(getSourceManager());
1423 addPPCallbacks(std::unique_ptr<PPCallbacks>(Record));


1//===- Preprocessor.h - C Language Family Preprocessor ----------*- C++ -*-===//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
9/// \file
10/// Defines the clang::Preprocessor interface.
17#include "clang/Basic/Diagnostic.h"
18#include "clang/Basic/IdentifierTable.h"
19#include "clang/Basic/LLVM.h"
20#include "clang/Basic/LangOptions.h"
21#include "clang/Basic/Module.h"
22#include "clang/Basic/SourceLocation.h"
23#include "clang/Basic/SourceManager.h"
24#include "clang/Basic/TokenKinds.h"
25#include "clang/Lex/Lexer.h"
26#include "clang/Lex/MacroInfo.h"
27#include "clang/Lex/ModuleLoader.h"
28#include "clang/Lex/ModuleMap.h"
29#include "clang/Lex/PPCallbacks.h"
30#include "clang/Lex/PreprocessorExcludedConditionalDirectiveSkipMapping.h"
31#include "clang/Lex/Token.h"
32#include "clang/Lex/TokenLexer.h"
33#include "llvm/ADT/ArrayRef.h"
34#include "llvm/ADT/DenseMap.h"
35#include "llvm/ADT/FoldingSet.h"
36#include "llvm/ADT/FunctionExtras.h"
37#include "llvm/ADT/None.h"
38#include "llvm/ADT/Optional.h"
39#include "llvm/ADT/PointerUnion.h"
40#include "llvm/ADT/STLExtras.h"
41#include "llvm/ADT/SmallPtrSet.h"
42#include "llvm/ADT/SmallVector.h"
43#include "llvm/ADT/StringRef.h"
44#include "llvm/ADT/TinyPtrVector.h"
45#include "llvm/ADT/iterator_range.h"
46#include "llvm/Support/Allocator.h"
47#include "llvm/Support/Casting.h"
48#include "llvm/Support/Registry.h"
49#include <cassert>
50#include <cstddef>
51#include <cstdint>
52#include <map>
53#include <memory>
54#include <string>
55#include <utility>
56#include <vector>
58namespace llvm {
60template<unsigned InternalLen> class SmallString;
62} // namespace llvm
64namespace clang {
66class CodeCompletionHandler;
67class CommentHandler;
68class DirectoryEntry;
69class DirectoryLookup;
70class EmptylineHandler;
71class ExternalPreprocessorSource;
72class FileEntry;
73class FileManager;
74class HeaderSearch;
75class MacroArgs;
76class PragmaHandler;
77class PragmaNamespace;
78class PreprocessingRecord;
79class PreprocessorLexer;
80class PreprocessorOptions;
81class ScratchBuffer;
82class TargetInfo;
84namespace Builtin {
85class Context;
88/// Stores token information for comparing actual tokens with
89/// predefined values. Only handles simple tokens and identifiers.
90class TokenValue {
91 tok::TokenKind Kind;
92 IdentifierInfo *II;
95 TokenValue(tok::TokenKind Kind) : Kind(Kind), II(nullptr) {
96 assert(Kind != tok::raw_identifier && "Raw identifiers are not supported.")((void)0);
97 assert(Kind != tok::identifier &&((void)0)
98 "Identifiers should be created by TokenValue(IdentifierInfo *)")((void)0);
99 assert(!tok::isLiteral(Kind) && "Literals are not supported.")((void)0);
100 assert(!tok::isAnnotation(Kind) && "Annotations are not supported.")((void)0);
101 }
103 TokenValue(IdentifierInfo *II) : Kind(tok::identifier), II(II) {}
105 bool operator==(const Token &Tok) const {
106 return Tok.getKind() == Kind &&
107 (!II || II == Tok.getIdentifierInfo());
108 }
111/// Context in which macro name is used.
112enum MacroUse {
113 // other than #define or #undef
114 MU_Other = 0,
116 // macro name specified in #define
117 MU_Define = 1,
119 // macro name specified in #undef
120 MU_Undef = 2
123/// Engages in a tight little dance with the lexer to efficiently
124/// preprocess tokens.
126/// Lexers know only about tokens within a single source file, and don't
127/// know anything about preprocessor-level issues like the \#include stack,
128/// token expansion, etc.
129class Preprocessor {
130 friend class VAOptDefinitionContext;
131 friend class VariadicMacroScopeGuard;
133 llvm::unique_function<void(const clang::Token &)> OnToken;
134 std::shared_ptr<PreprocessorOptions> PPOpts;
135 DiagnosticsEngine *Diags;
136 LangOptions &LangOpts;
137 const TargetInfo *Target = nullptr;
138 const TargetInfo *AuxTarget = nullptr;
139 FileManager &FileMgr;
140 SourceManager &SourceMgr;
141 std::unique_ptr<ScratchBuffer> ScratchBuf;
142 HeaderSearch &HeaderInfo;
143 ModuleLoader &TheModuleLoader;
145 /// External source of macros.
146 ExternalPreprocessorSource *ExternalSource;
148 /// A BumpPtrAllocator object used to quickly allocate and release
149 /// objects internal to the Preprocessor.
150 llvm::BumpPtrAllocator BP;
152 /// Identifiers for builtin macros and other builtins.
153 IdentifierInfo *Ident__LINE__, *Ident__FILE__; // __LINE__, __FILE__
154 IdentifierInfo *Ident__DATE__, *Ident__TIME__; // __DATE__, __TIME__
155 IdentifierInfo *Ident__INCLUDE_LEVEL__; // __INCLUDE_LEVEL__
156 IdentifierInfo *Ident__BASE_FILE__; // __BASE_FILE__
157 IdentifierInfo *Ident__FILE_NAME__; // __FILE_NAME__
158 IdentifierInfo *Ident__TIMESTAMP__; // __TIMESTAMP__
159 IdentifierInfo *Ident__COUNTER__; // __COUNTER__
160 IdentifierInfo *Ident_Pragma, *Ident__pragma; // _Pragma, __pragma
161 IdentifierInfo *Ident__identifier; // __identifier
162 IdentifierInfo *Ident__VA_ARGS__; // __VA_ARGS__
163 IdentifierInfo *Ident__VA_OPT__; // __VA_OPT__
164 IdentifierInfo *Ident__has_feature; // __has_feature
165 IdentifierInfo *Ident__has_extension; // __has_extension
166 IdentifierInfo *Ident__has_builtin; // __has_builtin
167 IdentifierInfo *Ident__has_attribute; // __has_attribute
168 IdentifierInfo *Ident__has_include; // __has_include
169 IdentifierInfo *Ident__has_include_next; // __has_include_next
170 IdentifierInfo *Ident__has_warning; // __has_warning
171 IdentifierInfo *Ident__is_identifier; // __is_identifier
172 IdentifierInfo *Ident__building_module; // __building_module
173 IdentifierInfo *Ident__MODULE__; // __MODULE__
174 IdentifierInfo *Ident__has_cpp_attribute; // __has_cpp_attribute
175 IdentifierInfo *Ident__has_c_attribute; // __has_c_attribute
176 IdentifierInfo *Ident__has_declspec; // __has_declspec_attribute
177 IdentifierInfo *Ident__is_target_arch; // __is_target_arch
178 IdentifierInfo *Ident__is_target_vendor; // __is_target_vendor
179 IdentifierInfo *Ident__is_target_os; // __is_target_os
180 IdentifierInfo *Ident__is_target_environment; // __is_target_environment
182 // Weak, only valid (and set) while InMacroArgs is true.
183 Token* ArgMacro;
185 SourceLocation DATELoc, TIMELoc;
187 // Next __COUNTER__ value, starts at 0.
188 unsigned CounterValue = 0;
190 enum {
191 /// Maximum depth of \#includes.
192 MaxAllowedIncludeStackDepth = 200
193 };
195 // State that is set before the preprocessor begins.
196 bool KeepComments : 1;
197 bool KeepMacroComments : 1;
198 bool SuppressIncludeNotFoundError : 1;
200 // State that changes while the preprocessor runs:
201 bool InMacroArgs : 1; // True if parsing fn macro invocation args.
203 /// Whether the preprocessor owns the header search object.
204 bool OwnsHeaderSearch : 1;
206 /// True if macro expansion is disabled.
207 bool DisableMacroExpansion : 1;
209 /// Temporarily disables DisableMacroExpansion (i.e. enables expansion)
210 /// when parsing preprocessor directives.
211 bool MacroExpansionInDirectivesOverride : 1;
213 class ResetMacroExpansionHelper;
215 /// Whether we have already loaded macros from the external source.
216 mutable bool ReadMacrosFromExternalSource : 1;
218 /// True if pragmas are enabled.
219 bool PragmasEnabled : 1;
221 /// True if the current build action is a preprocessing action.
222 bool PreprocessedOutput : 1;
224 /// True if we are currently preprocessing a #if or #elif directive
225 bool ParsingIfOrElifDirective;
227 /// True if we are pre-expanding macro arguments.
228 bool InMacroArgPreExpansion;
230 /// Mapping/lookup information for all identifiers in
231 /// the program, including program keywords.
232 mutable IdentifierTable Identifiers;
234 /// This table contains all the selectors in the program.
235 ///
236 /// Unlike IdentifierTable above, this table *isn't* populated by the
237 /// preprocessor. It is declared/expanded here because its role/lifetime is
238 /// conceptually similar to the IdentifierTable. In addition, the current
239 /// control flow (in clang::ParseAST()), make it convenient to put here.
240 ///
241 /// FIXME: Make sure the lifetime of Identifiers/Selectors *isn't* tied to
242 /// the lifetime of the preprocessor.
243 SelectorTable Selectors;
245 /// Information about builtins.
246 std::unique_ptr<Builtin::Context> BuiltinInfo;
248 /// Tracks all of the pragmas that the client registered
249 /// with this preprocessor.
250 std::unique_ptr<PragmaNamespace> PragmaHandlers;
252 /// Pragma handlers of the original source is stored here during the
253 /// parsing of a model file.
254 std::unique_ptr<PragmaNamespace> PragmaHandlersBackup;
256 /// Tracks all of the comment handlers that the client registered
257 /// with this preprocessor.
258 std::vector<CommentHandler *> CommentHandlers;
260 /// Empty line handler.
261 EmptylineHandler *Emptyline = nullptr;
263 /// True if we want to ignore EOF token and continue later on (thus
264 /// avoid tearing the Lexer and etc. down).
265 bool IncrementalProcessing = false;
268 /// The kind of translation unit we are processing.
269 const TranslationUnitKind TUKind;
272 /// The code-completion handler.
273 CodeCompletionHandler *CodeComplete = nullptr;
275 /// The file that we're performing code-completion for, if any.
276 const FileEntry *CodeCompletionFile = nullptr;
278 /// The offset in file for the code-completion point.
279 unsigned CodeCompletionOffset = 0;
281 /// The location for the code-completion point. This gets instantiated
282 /// when the CodeCompletionFile gets \#include'ed for preprocessing.
283 SourceLocation CodeCompletionLoc;
285 /// The start location for the file of the code-completion point.
286 ///
287 /// This gets instantiated when the CodeCompletionFile gets \#include'ed
288 /// for preprocessing.
289 SourceLocation CodeCompletionFileLoc;
291 /// The source location of the \c import contextual keyword we just
292 /// lexed, if any.
293 SourceLocation ModuleImportLoc;
295 /// The module import path that we're currently processing.
296 SmallVector<std::pair<IdentifierInfo *, SourceLocation>, 2> ModuleImportPath;
298 /// Whether the last token we lexed was an '@'.
299 bool LastTokenWasAt = false;
301 /// A position within a C++20 import-seq.
302 class ImportSeq {
303 public:
304 enum State : int {
305 // Positive values represent a number of unclosed brackets.
306 AtTopLevel = 0,
307 AfterTopLevelTokenSeq = -1,
308 AfterExport = -2,
309 AfterImportSeq = -3,
310 };
312 ImportSeq(State S) : S(S) {}
314 /// Saw any kind of open bracket.
315 void handleOpenBracket() {
316 S = static_cast<State>(std::max<int>(S, 0) + 1);
317 }
318 /// Saw any kind of close bracket other than '}'.
319 void handleCloseBracket() {
320 S = static_cast<State>(std::max<int>(S, 1) - 1);
321 }
322 /// Saw a close brace.
323 void handleCloseBrace() {
324 handleCloseBracket();
325 if (S == AtTopLevel && !AfterHeaderName)
326 S = AfterTopLevelTokenSeq;
327 }
328 /// Saw a semicolon.
329 void handleSemi() {
330 if (atTopLevel()) {
331 S = AfterTopLevelTokenSeq;
332 AfterHeaderName = false;
333 }
334 }
336 /// Saw an 'export' identifier.
337 void handleExport() {
338 if (S == AfterTopLevelTokenSeq)
339 S = AfterExport;
340 else if (S <= 0)
341 S = AtTopLevel;
342 }
343 /// Saw an 'import' identifier.
344 void handleImport() {
345 if (S == AfterTopLevelTokenSeq || S == AfterExport)
346 S = AfterImportSeq;
347 else if (S <= 0)
348 S = AtTopLevel;
349 }
351 /// Saw a 'header-name' token; do not recognize any more 'import' tokens
352 /// until we reach a top-level semicolon.
353 void handleHeaderName() {
354 if (S == AfterImportSeq)
355 AfterHeaderName = true;
356 handleMisc();
357 }
359 /// Saw any other token.
360 void handleMisc() {
361 if (S <= 0)
362 S = AtTopLevel;
363 }
365 bool atTopLevel() { return S <= 0; }
366 bool afterImportSeq() { return S == AfterImportSeq; }
368 private:
369 State S;
370 /// Whether we're in the pp-import-suffix following the header-name in a
371 /// pp-import. If so, a close-brace is not sufficient to end the
372 /// top-level-token-seq of an import-seq.
373 bool AfterHeaderName = false;
374 };
376 /// Our current position within a C++20 import-seq.
377 ImportSeq ImportSeqState = ImportSeq::AfterTopLevelTokenSeq;
379 /// Whether the module import expects an identifier next. Otherwise,
380 /// it expects a '.' or ';'.
381 bool ModuleImportExpectsIdentifier = false;
383 /// The identifier and source location of the currently-active
384 /// \#pragma clang arc_cf_code_audited begin.
385 std::pair<IdentifierInfo *, SourceLocation> PragmaARCCFCodeAuditedInfo;
387 /// The source location of the currently-active
388 /// \#pragma clang assume_nonnull begin.
389 SourceLocation PragmaAssumeNonNullLoc;
391 /// True if we hit the code-completion point.
392 bool CodeCompletionReached = false;
394 /// The code completion token containing the information
395 /// on the stem that is to be code completed.
396 IdentifierInfo *CodeCompletionII = nullptr;
398 /// Range for the code completion token.
399 SourceRange CodeCompletionTokenRange;
401 /// The directory that the main file should be considered to occupy,
402 /// if it does not correspond to a real file (as happens when building a
403 /// module).
404 const DirectoryEntry *MainFileDir = nullptr;
406 /// The number of bytes that we will initially skip when entering the
407 /// main file, along with a flag that indicates whether skipping this number
408 /// of bytes will place the lexer at the start of a line.
409 ///
410 /// This is used when loading a precompiled preamble.
411 std::pair<int, bool> SkipMainFilePreamble;
413 /// Whether we hit an error due to reaching max allowed include depth. Allows
414 /// to avoid hitting the same error over and over again.
415 bool HasReachedMaxIncludeDepth = false;
417 /// The number of currently-active calls to Lex.
418 ///
419 /// Lex is reentrant, and asking for an (end-of-phase-4) token can often
420 /// require asking for multiple additional tokens. This counter makes it
421 /// possible for Lex to detect whether it's producing a token for the end
422 /// of phase 4 of translation or for some other situation.
423 unsigned LexLevel = 0;
425 /// The number of (LexLevel 0) preprocessor tokens.
426 unsigned TokenCount = 0;
428 /// Preprocess every token regardless of LexLevel.
429 bool PreprocessToken = false;
431 /// The maximum number of (LexLevel 0) tokens before issuing a -Wmax-tokens
432 /// warning, or zero for unlimited.
433 unsigned MaxTokens = 0;
434 SourceLocation MaxTokensOverrideLoc;
437 struct PreambleSkipInfo {
438 SourceLocation HashTokenLoc;
439 SourceLocation IfTokenLoc;
440 bool FoundNonSkipPortion;
441 bool FoundElse;
442 SourceLocation ElseLoc;
444 PreambleSkipInfo(SourceLocation HashTokenLoc, SourceLocation IfTokenLoc,
445 bool FoundNonSkipPortion, bool FoundElse,
446 SourceLocation ElseLoc)
447 : HashTokenLoc(HashTokenLoc), IfTokenLoc(IfTokenLoc),
448 FoundNonSkipPortion(FoundNonSkipPortion), FoundElse(FoundElse),
449 ElseLoc(ElseLoc) {}
450 };
453 friend class ASTReader;
454 friend class MacroArgs;
456 class PreambleConditionalStackStore {
457 enum State {
458 Off = 0,
459 Recording = 1,
460 Replaying = 2,
461 };
463 public:
464 PreambleConditionalStackStore() = default;
466 void startRecording() { ConditionalStackState = Recording; }
467 void startReplaying() { ConditionalStackState = Replaying; }
468 bool isRecording() const { return ConditionalStackState == Recording; }
469 bool isReplaying() const { return ConditionalStackState == Replaying; }
471 ArrayRef<PPConditionalInfo> getStack() const {
472 return ConditionalStack;
473 }
475 void doneReplaying() {
476 ConditionalStack.clear();
477 ConditionalStackState = Off;
478 }
480 void setStack(ArrayRef<PPConditionalInfo> s) {
481 if (!isRecording() && !isReplaying())
482 return;
483 ConditionalStack.clear();
484 ConditionalStack.append(s.begin(), s.end());
485 }
487 bool hasRecordedPreamble() const { return !ConditionalStack.empty(); }
489 bool reachedEOFWhileSkipping() const { return SkipInfo.hasValue(); }
491 void clearSkipInfo() { SkipInfo.reset(); }
493 llvm::Optional<PreambleSkipInfo> SkipInfo;
495 private:
496 SmallVector<PPConditionalInfo, 4> ConditionalStack;
497 State ConditionalStackState = Off;
498 } PreambleConditionalStack;
500 /// The current top of the stack that we're lexing from if
501 /// not expanding a macro and we are lexing directly from source code.
502 ///
503 /// Only one of CurLexer, or CurTokenLexer will be non-null.
504 std::unique_ptr<Lexer> CurLexer;
506 /// The current top of the stack what we're lexing from
507 /// if not expanding a macro.
508 ///
509 /// This is an alias for CurLexer.
510 PreprocessorLexer *CurPPLexer = nullptr;
512 /// Used to find the current FileEntry, if CurLexer is non-null
513 /// and if applicable.
514 ///
515 /// This allows us to implement \#include_next and find directory-specific
516 /// properties.
517 const DirectoryLookup *CurDirLookup = nullptr;
519 /// The current macro we are expanding, if we are expanding a macro.
520 ///
521 /// One of CurLexer and CurTokenLexer must be null.
522 std::unique_ptr<TokenLexer> CurTokenLexer;
524 /// The kind of lexer we're currently working with.
525 enum CurLexerKind {
526 CLK_Lexer,
527 CLK_TokenLexer,
528 CLK_CachingLexer,
529 CLK_LexAfterModuleImport
530 } CurLexerKind = CLK_Lexer;
532 /// If the current lexer is for a submodule that is being built, this
533 /// is that submodule.
534 Module *CurLexerSubmodule = nullptr;
536 /// Keeps track of the stack of files currently
537 /// \#included, and macros currently being expanded from, not counting
538 /// CurLexer/CurTokenLexer.
539 struct IncludeStackInfo {
540 enum CurLexerKind CurLexerKind;
541 Module *TheSubmodule;
542 std::unique_ptr<Lexer> TheLexer;
543 PreprocessorLexer *ThePPLexer;
544 std::unique_ptr<TokenLexer> TheTokenLexer;
545 const DirectoryLookup *TheDirLookup;
547 // The following constructors are completely useless copies of the default
548 // versions, only needed to pacify MSVC.
549 IncludeStackInfo(enum CurLexerKind CurLexerKind, Module *TheSubmodule,
550 std::unique_ptr<Lexer> &&TheLexer,
551 PreprocessorLexer *ThePPLexer,
552 std::unique_ptr<TokenLexer> &&TheTokenLexer,
553 const DirectoryLookup *TheDirLookup)
554 : CurLexerKind(std::move(CurLexerKind)),
555 TheSubmodule(std::move(TheSubmodule)), TheLexer(std::move(TheLexer)),
556 ThePPLexer(std::move(ThePPLexer)),
557 TheTokenLexer(std::move(TheTokenLexer)),
558 TheDirLookup(std::move(TheDirLookup)) {}
559 };
560 std::vector<IncludeStackInfo> IncludeMacroStack;
562 /// Actions invoked when some preprocessor activity is
563 /// encountered (e.g. a file is \#included, etc).
564 std::unique_ptr<PPCallbacks> Callbacks;
566 struct MacroExpandsInfo {
567 Token Tok;
568 MacroDefinition MD;
569 SourceRange Range;
571 MacroExpandsInfo(Token Tok, MacroDefinition MD, SourceRange Range)
572 : Tok(Tok), MD(MD), Range(Range) {}
573 };
574 SmallVector<MacroExpandsInfo, 2> DelayedMacroExpandsCallbacks;
576 /// Information about a name that has been used to define a module macro.
577 struct ModuleMacroInfo {
578 /// The most recent macro directive for this identifier.
579 MacroDirective *MD;
581 /// The active module macros for this identifier.
582 llvm::TinyPtrVector<ModuleMacro *> ActiveModuleMacros;
584 /// The generation number at which we last updated ActiveModuleMacros.
585 /// \see Preprocessor::VisibleModules.
586 unsigned ActiveModuleMacrosGeneration = 0;
588 /// Whether this macro name is ambiguous.
589 bool IsAmbiguous = false;
591 /// The module macros that are overridden by this macro.
592 llvm::TinyPtrVector<ModuleMacro *> OverriddenMacros;
594 ModuleMacroInfo(MacroDirective *MD) : MD(MD) {}
595 };
597 /// The state of a macro for an identifier.
598 class MacroState {
599 mutable llvm::PointerUnion<MacroDirective *, ModuleMacroInfo *> State;
601 ModuleMacroInfo *getModuleInfo(Preprocessor &PP,
602 const IdentifierInfo *II) const {
603 if (II->isOutOfDate())
Assuming the condition is false
Taking false branch
604 PP.updateOutOfDateIdentifier(const_cast<IdentifierInfo&>(*II));
605 // FIXME: Find a spare bit on IdentifierInfo and store a
606 // HasModuleMacros flag.
607 if (!II->hasMacroDefinition() ||
Assuming the condition is false
Taking false branch
608 (!PP.getLangOpts().Modules &&
Assuming field 'Modules' is not equal to 0
609 !PP.getLangOpts().ModulesLocalVisibility) ||
610 !PP.CurSubmoduleState->VisibleModules.getGeneration())
Assuming the condition is false
611 return nullptr;
613 auto *Info = State.dyn_cast<ModuleMacroInfo*>();
614 if (!Info
'Info' is null
'Info' is null
'Info' is null
'Info' is null
) {
Taking true branch
615 Info = new (PP.getPreprocessorAllocator())
Calling 'operator new<llvm::MallocAllocator, 4096UL, 4096UL, 128UL>'
616 ModuleMacroInfo(State.get<MacroDirective *>());
617 State = Info;
618 }
620 if (PP.CurSubmoduleState->VisibleModules.getGeneration() !=
621 Info->ActiveModuleMacrosGeneration)
622 PP.updateModuleMacroInfo(II, *Info);
623 return Info;
624 }
626 public:
627 MacroState() : MacroState(nullptr) {}
628 MacroState(MacroDirective *MD) : State(MD) {}
630 MacroState(MacroState &&O) noexcept : State(O.State) {
631 O.State = (MacroDirective *)nullptr;
632 }
634 MacroState &operator=(MacroState &&O) noexcept {
635 auto S = O.State;
636 O.State = (MacroDirective *)nullptr;
637 State = S;
638 return *this;
639 }
641 ~MacroState() {
642 if (auto *Info = State.dyn_cast<ModuleMacroInfo*>())
643 Info->~ModuleMacroInfo();
644 }
646 MacroDirective *getLatest() const {
647 if (auto *Info = State.dyn_cast<ModuleMacroInfo*>())
648 return Info->MD;
649 return State.get<MacroDirective*>();
650 }
652 void setLatest(MacroDirective *MD) {
653 if (auto *Info = State.dyn_cast<ModuleMacroInfo*>())
654 Info->MD = MD;
655 else
656 State = MD;
657 }
659 bool isAmbiguous(Preprocessor &PP, const IdentifierInfo *II) const {
660 auto *Info = getModuleInfo(PP, II);
661 return Info ? Info->IsAmbiguous : false;
662 }
664 ArrayRef<ModuleMacro *>
665 getActiveModuleMacros(Preprocessor &PP, const IdentifierInfo *II) const {
666 if (auto *Info = getModuleInfo(PP, II))
Calling 'MacroState::getModuleInfo'
667 return Info->ActiveModuleMacros;
668 return None;
669 }
671 MacroDirective::DefInfo findDirectiveAtLoc(SourceLocation Loc,
672 SourceManager &SourceMgr) const {
673 // FIXME: Incorporate module macros into the result of this.
674 if (auto *Latest = getLatest())
675 return Latest->findDirectiveAtLoc(Loc, SourceMgr);
676 return {};
677 }
679 void overrideActiveModuleMacros(Preprocessor &PP, IdentifierInfo *II) {
680 if (auto *Info = getModuleInfo(PP, II)) {
681 Info->OverriddenMacros.insert(Info->OverriddenMacros.end(),
682 Info->ActiveModuleMacros.begin(),
683 Info->ActiveModuleMacros.end());
684 Info->ActiveModuleMacros.clear();
685 Info->IsAmbiguous = false;
686 }
687 }
689 ArrayRef<ModuleMacro*> getOverriddenMacros() const {
690 if (auto *Info = State.dyn_cast<ModuleMacroInfo*>())
691 return Info->OverriddenMacros;
692 return None;
693 }
695 void setOverriddenMacros(Preprocessor &PP,
696 ArrayRef<ModuleMacro *> Overrides) {
697 auto *Info = State.dyn_cast<ModuleMacroInfo*>();
698 if (!Info) {
699 if (Overrides.empty())
700 return;
701 Info = new (PP.getPreprocessorAllocator())
702 ModuleMacroInfo(State.get<MacroDirective *>());
703 State = Info;
704 }
705 Info->OverriddenMacros.clear();
706 Info->OverriddenMacros.insert(Info->OverriddenMacros.end(),
707 Overrides.begin(), Overrides.end());
708 Info->ActiveModuleMacrosGeneration = 0;
709 }
710 };
712 /// For each IdentifierInfo that was associated with a macro, we
713 /// keep a mapping to the history of all macro definitions and #undefs in
714 /// the reverse order (the latest one is in the head of the list).
715 ///
716 /// This mapping lives within the \p CurSubmoduleState.
717 using MacroMap = llvm::DenseMap<const IdentifierInfo *, MacroState>;
719 struct SubmoduleState;
721 /// Information about a submodule that we're currently building.
722 struct BuildingSubmoduleInfo {
723 /// The module that we are building.
724 Module *M;
726 /// The location at which the module was included.
727 SourceLocation ImportLoc;
729 /// Whether we entered this submodule via a pragma.
730 bool IsPragma;
732 /// The previous SubmoduleState.
733 SubmoduleState *OuterSubmoduleState;
735 /// The number of pending module macro names when we started building this.
736 unsigned OuterPendingModuleMacroNames;
738 BuildingSubmoduleInfo(Module *M, SourceLocation ImportLoc, bool IsPragma,
739 SubmoduleState *OuterSubmoduleState,
740 unsigned OuterPendingModuleMacroNames)
741 : M(M), ImportLoc(ImportLoc), IsPragma(IsPragma),
742 OuterSubmoduleState(OuterSubmoduleState),
743 OuterPendingModuleMacroNames(OuterPendingModuleMacroNames) {}
744 };
745 SmallVector<BuildingSubmoduleInfo, 8> BuildingSubmoduleStack;
747 /// Information about a submodule's preprocessor state.
748 struct SubmoduleState {
749 /// The macros for the submodule.
750 MacroMap Macros;
752 /// The set of modules that are visible within the submodule.
753 VisibleModuleSet VisibleModules;
755 // FIXME: CounterValue?
756 // FIXME: PragmaPushMacroInfo?
757 };
758 std::map<Module *, SubmoduleState> Submodules;
760 /// The preprocessor state for preprocessing outside of any submodule.
761 SubmoduleState NullSubmoduleState;
763 /// The current submodule state. Will be \p NullSubmoduleState if we're not
764 /// in a submodule.
765 SubmoduleState *CurSubmoduleState;
767 /// The set of known macros exported from modules.
768 llvm::FoldingSet<ModuleMacro> ModuleMacros;
770 /// The names of potential module macros that we've not yet processed.
771 llvm::SmallVector<const IdentifierInfo *, 32> PendingModuleMacroNames;
773 /// The list of module macros, for each identifier, that are not overridden by
774 /// any other module macro.
775 llvm::DenseMap<const IdentifierInfo *, llvm::TinyPtrVector<ModuleMacro *>>
776 LeafModuleMacros;
778 /// Macros that we want to warn because they are not used at the end
779 /// of the translation unit.
780 ///
781 /// We store just their SourceLocations instead of
782 /// something like MacroInfo*. The benefit of this is that when we are
783 /// deserializing from PCH, we don't need to deserialize identifier & macros
784 /// just so that we can report that they are unused, we just warn using
785 /// the SourceLocations of this set (that will be filled by the ASTReader).
786 using WarnUnusedMacroLocsTy = llvm::SmallDenseSet<SourceLocation, 32>;
787 WarnUnusedMacroLocsTy WarnUnusedMacroLocs;
789 /// A "freelist" of MacroArg objects that can be
790 /// reused for quick allocation.
791 MacroArgs *MacroArgCache = nullptr;
793 /// For each IdentifierInfo used in a \#pragma push_macro directive,
794 /// we keep a MacroInfo stack used to restore the previous macro value.
795 llvm::DenseMap<IdentifierInfo *, std::vector<MacroInfo *>>
796 PragmaPushMacroInfo;
798 // Various statistics we track for performance analysis.
799 unsigned NumDirectives = 0;
800 unsigned NumDefined = 0;
801 unsigned NumUndefined = 0;
802 unsigned NumPragma = 0;
803 unsigned NumIf = 0;
804 unsigned NumElse = 0;
805 unsigned NumEndif = 0;
806 unsigned NumEnteredSourceFiles = 0;
807 unsigned MaxIncludeStackDepth = 0;
808 unsigned NumMacroExpanded = 0;
809 unsigned NumFnMacroExpanded = 0;
810 unsigned NumBuiltinMacroExpanded = 0;
811 unsigned NumFastMacroExpanded = 0;
812 unsigned NumTokenPaste = 0;
813 unsigned NumFastTokenPaste = 0;
814 unsigned NumSkipped = 0;
816 /// The predefined macros that preprocessor should use from the
817 /// command line etc.
818 std::string Predefines;
820 /// The file ID for the preprocessor predefines.
821 FileID PredefinesFileID;
823 /// The file ID for the PCH through header.
824 FileID PCHThroughHeaderFileID;
826 /// Whether tokens are being skipped until a #pragma hdrstop is seen.
827 bool SkippingUntilPragmaHdrStop = false;
829 /// Whether tokens are being skipped until the through header is seen.
830 bool SkippingUntilPCHThroughHeader = false;
832 /// \{
833 /// Cache of macro expanders to reduce malloc traffic.
834 enum { TokenLexerCacheSize = 8 };
835 unsigned NumCachedTokenLexers;
836 std::unique_ptr<TokenLexer> TokenLexerCache[TokenLexerCacheSize];
837 /// \}
839 /// Keeps macro expanded tokens for TokenLexers.
840 //
841 /// Works like a stack; a TokenLexer adds the macro expanded tokens that is
842 /// going to lex in the cache and when it finishes the tokens are removed
843 /// from the end of the cache.
844 SmallVector<Token, 16> MacroExpandedTokens;
845 std::vector<std::pair<TokenLexer *, size_t>> MacroExpandingLexersStack;
847 /// A record of the macro definitions and expansions that
848 /// occurred during preprocessing.
849 ///
850 /// This is an optional side structure that can be enabled with
851 /// \c createPreprocessingRecord() prior to preprocessing.
852 PreprocessingRecord *Record = nullptr;
854 /// Cached tokens state.
855 using CachedTokensTy = SmallVector<Token, 1>;
857 /// Cached tokens are stored here when we do backtracking or
858 /// lookahead. They are "lexed" by the CachingLex() method.
859 CachedTokensTy CachedTokens;
861 /// The position of the cached token that CachingLex() should
862 /// "lex" next.
863 ///
864 /// If it points beyond the CachedTokens vector, it means that a normal
865 /// Lex() should be invoked.
866 CachedTokensTy::size_type CachedLexPos = 0;
868 /// Stack of backtrack positions, allowing nested backtracks.
869 ///
870 /// The EnableBacktrackAtThisPos() method pushes a position to
871 /// indicate where CachedLexPos should be set when the BackTrack() method is
872 /// invoked (at which point the last position is popped).
873 std::vector<CachedTokensTy::size_type> BacktrackPositions;
875 struct MacroInfoChain {
876 MacroInfo MI;
877 MacroInfoChain *Next;
878 };
880 /// MacroInfos are managed as a chain for easy disposal. This is the head
881 /// of that list.
882 MacroInfoChain *MIChainHead = nullptr;
884 void updateOutOfDateIdentifier(IdentifierInfo &II) const;
887 Preprocessor(std::shared_ptr<PreprocessorOptions> PPOpts,
888 DiagnosticsEngine &diags, LangOptions &opts, SourceManager &SM,
889 HeaderSearch &Headers, ModuleLoader &TheModuleLoader,
890 IdentifierInfoLookup *IILookup = nullptr,
891 bool OwnsHeaderSearch = false,
892 TranslationUnitKind TUKind = TU_Complete);
894 ~Preprocessor();
896 /// Initialize the preprocessor using information about the target.
897 ///
898 /// \param Target is owned by the caller and must remain valid for the
899 /// lifetime of the preprocessor.
900 /// \param AuxTarget is owned by the caller and must remain valid for
901 /// the lifetime of the preprocessor.
902 void Initialize(const TargetInfo &Target,
903 const TargetInfo *AuxTarget = nullptr);
905 /// Initialize the preprocessor to parse a model file
906 ///
907 /// To parse model files the preprocessor of the original source is reused to
908 /// preserver the identifier table. However to avoid some duplicate
909 /// information in the preprocessor some cleanup is needed before it is used
910 /// to parse model files. This method does that cleanup.
911 void InitializeForModelFile();
913 /// Cleanup after model file parsing
914 void FinalizeForModelFile();
916 /// Retrieve the preprocessor options used to initialize this
917 /// preprocessor.
918 PreprocessorOptions &getPreprocessorOpts() const { return *PPOpts; }
920 DiagnosticsEngine &getDiagnostics() const { return *Diags; }
921 void setDiagnostics(DiagnosticsEngine &D) { Diags = &D; }
923 const LangOptions &getLangOpts() const { return LangOpts; }
924 const TargetInfo &getTargetInfo() const { return *Target; }
925 const TargetInfo *getAuxTargetInfo() const { return AuxTarget; }
926 FileManager &getFileManager() const { return FileMgr; }
927 SourceManager &getSourceManager() const { return SourceMgr; }
928 HeaderSearch &getHeaderSearchInfo() const { return HeaderInfo; }
930 IdentifierTable &getIdentifierTable() { return Identifiers; }
931 const IdentifierTable &getIdentifierTable() const { return Identifiers; }
932 SelectorTable &getSelectorTable() { return Selectors; }
933 Builtin::Context &getBuiltinInfo() { return *BuiltinInfo; }
934 llvm::BumpPtrAllocator &getPreprocessorAllocator() { return BP; }
936 void setExternalSource(ExternalPreprocessorSource *Source) {
937 ExternalSource = Source;
938 }
940 ExternalPreprocessorSource *getExternalSource() const {
941 return ExternalSource;
942 }
944 /// Retrieve the module loader associated with this preprocessor.
945 ModuleLoader &getModuleLoader() const { return TheModuleLoader; }
947 bool hadModuleLoaderFatalFailure() const {
948 return TheModuleLoader.HadFatalFailure;
949 }
951 /// Retrieve the number of Directives that have been processed by the
952 /// Preprocessor.
953 unsigned getNumDirectives() const {
954 return NumDirectives;
955 }
957 /// True if we are currently preprocessing a #if or #elif directive
958 bool isParsingIfOrElifDirective() const {
959 return ParsingIfOrElifDirective;
960 }
962 /// Control whether the preprocessor retains comments in output.
963 void SetCommentRetentionState(bool KeepComments, bool KeepMacroComments) {
964 this->KeepComments = KeepComments | KeepMacroComments;
965 this->KeepMacroComments = KeepMacroComments;
966 }
968 bool getCommentRetentionState() const { return KeepComments; }
970 void setPragmasEnabled(bool Enabled) { PragmasEnabled = Enabled; }
971 bool getPragmasEnabled() const { return PragmasEnabled; }
973 void SetSuppressIncludeNotFoundError(bool Suppress) {
974 SuppressIncludeNotFoundError = Suppress;
975 }
977 bool GetSuppressIncludeNotFoundError() {
978 return SuppressIncludeNotFoundError;
979 }
981 /// Sets whether the preprocessor is responsible for producing output or if
982 /// it is producing tokens to be consumed by Parse and Sema.
983 void setPreprocessedOutput(bool IsPreprocessedOutput) {
984 PreprocessedOutput = IsPreprocessedOutput;
985 }
987 /// Returns true if the preprocessor is responsible for generating output,
988 /// false if it is producing tokens to be consumed by Parse and Sema.
989 bool isPreprocessedOutput() const { return PreprocessedOutput; }
991 /// Return true if we are lexing directly from the specified lexer.
992 bool isCurrentLexer(const PreprocessorLexer *L) const {
993 return CurPPLexer == L;
994 }
996 /// Return the current lexer being lexed from.
997 ///
998 /// Note that this ignores any potentially active macro expansions and _Pragma
999 /// expansions going on at the time.
1000 PreprocessorLexer *getCurrentLexer() const { return CurPPLexer; }
1002 /// Return the current file lexer being lexed from.
1003 ///
1004 /// Note that this ignores any potentially active macro expansions and _Pragma
1005 /// expansions going on at the time.
1006 PreprocessorLexer *getCurrentFileLexer() const;
1008 /// Return the submodule owning the file being lexed. This may not be
1009 /// the current module if we have changed modules since entering the file.
1010 Module *getCurrentLexerSubmodule() const { return CurLexerSubmodule; }
1012 /// Returns the FileID for the preprocessor predefines.
1013 FileID getPredefinesFileID() const { return PredefinesFileID; }
1015 /// \{
1016 /// Accessors for preprocessor callbacks.
1017 ///
1018 /// Note that this class takes ownership of any PPCallbacks object given to
1019 /// it.
1020 PPCallbacks *getPPCallbacks() const { return Callbacks.get(); }
1021 void addPPCallbacks(std::unique_ptr<PPCallbacks> C) {
1022 if (Callbacks)
1023 C = std::make_unique<PPChainedCallbacks>(std::move(C),
1024 std::move(Callbacks));
1025 Callbacks = std::move(C);
1026 }
1027 /// \}
1029 /// Get the number of tokens processed so far.
1030 unsigned getTokenCount() const { return TokenCount; }
1032 /// Get the max number of tokens before issuing a -Wmax-tokens warning.
1033 unsigned getMaxTokens() const { return MaxTokens; }
1035 void overrideMaxTokens(unsigned Value, SourceLocation Loc) {
1036 MaxTokens = Value;
1037 MaxTokensOverrideLoc = Loc;
1038 };
1040 SourceLocation getMaxTokensOverrideLoc() const { return MaxTokensOverrideLoc; }
1042 /// Register a function that would be called on each token in the final
1043 /// expanded token stream.
1044 /// This also reports annotation tokens produced by the parser.
1045 void setTokenWatcher(llvm::unique_function<void(const clang::Token &)> F) {
1046 OnToken = std::move(F);
1047 }
1049 void setPreprocessToken(bool Preprocess) { PreprocessToken = Preprocess; }
1051 bool isMacroDefined(StringRef Id) {
1052 return isMacroDefined(&Identifiers.get(Id));
1053 }
1054 bool isMacroDefined(const IdentifierInfo *II) {
1055 return II->hasMacroDefinition() &&
1056 (!getLangOpts().Modules || (bool)getMacroDefinition(II));
1057 }
1059 /// Determine whether II is defined as a macro within the module M,
1060 /// if that is a module that we've already preprocessed. Does not check for
1061 /// macros imported into M.
1062 bool isMacroDefinedInLocalModule(const IdentifierInfo *II, Module *M) {
1063 if (!II->hasMacroDefinition())
1064 return false;
1065 auto I = Submodules.find(M);
1066 if (I == Submodules.end())
1067 return false;
1068 auto J = I->second.Macros.find(II);
1069 if (J == I->second.Macros.end())
1070 return false;
1071 auto *MD = J->second.getLatest();
1072 return MD && MD->isDefined();
1073 }
1075 MacroDefinition getMacroDefinition(const IdentifierInfo *II) {
1076 if (!II->hasMacroDefinition())
Assuming the condition is false
Taking false branch
1077 return {};
1079 MacroState &S = CurSubmoduleState->Macros[II];
1080 auto *MD = S.getLatest();
1081 while (MD && isa<VisibilityMacroDirective>(MD))
Assuming 'MD' is null
1082 MD = MD->getPrevious();
1083 return MacroDefinition(dyn_cast_or_null<DefMacroDirective>(MD),
Assuming null pointer is passed into cast
1084 S.getActiveModuleMacros(*this, II),
Calling 'MacroState::getActiveModuleMacros'
1085 S.isAmbiguous(*this, II));
1086 }
1088 MacroDefinition getMacroDefinitionAtLoc(const IdentifierInfo *II,
1089 SourceLocation Loc) {
1090 if (!II->hadMacroDefinition())
1091 return {};
1093 MacroState &S = CurSubmoduleState->Macros[II];
1094 MacroDirective::DefInfo DI;
1095 if (auto *MD = S.getLatest())
1096 DI = MD->findDirectiveAtLoc(Loc, getSourceManager());
1097 // FIXME: Compute the set of active module macros at the specified location.
1098 return MacroDefinition(DI.getDirective(),
1099 S.getActiveModuleMacros(*this, II),
1100 S.isAmbiguous(*this, II));
1101 }
1103 /// Given an identifier, return its latest non-imported MacroDirective
1104 /// if it is \#define'd and not \#undef'd, or null if it isn't \#define'd.
1105 MacroDirective *getLocalMacroDirective(const IdentifierInfo *II) const {
1106 if (!II->hasMacroDefinition())
1107 return nullptr;
1109 auto *MD = getLocalMacroDirectiveHistory(II);
1110 if (!MD || MD->getDefinition().isUndefined())
1111 return nullptr;
1113 return MD;
1114 }
1116 const MacroInfo *getMacroInfo(const IdentifierInfo *II) const {
1117 return const_cast<Preprocessor*>(this)->getMacroInfo(II);
1118 }
1120 MacroInfo *getMacroInfo(const IdentifierInfo *II) {
1121 if (!II->hasMacroDefinition())
1122 return nullptr;
1123 if (auto MD = getMacroDefinition(II))
1124 return MD.getMacroInfo();
1125 return nullptr;
1126 }
1128 /// Given an identifier, return the latest non-imported macro
1129 /// directive for that identifier.
1130 ///
1131 /// One can iterate over all previous macro directives from the most recent
1132 /// one.
1133 MacroDirective *getLocalMacroDirectiveHistory(const IdentifierInfo *II) const;
1135 /// Add a directive to the macro directive history for this identifier.
1136 void appendMacroDirective(IdentifierInfo *II, MacroDirective *MD);
1137 DefMacroDirective *appendDefMacroDirective(IdentifierInfo *II, MacroInfo *MI,
1138 SourceLocation Loc) {
1139 DefMacroDirective *MD = AllocateDefMacroDirective(MI, Loc);
1140 appendMacroDirective(II, MD);
1141 return MD;
1142 }
1143 DefMacroDirective *appendDefMacroDirective(IdentifierInfo *II,
1144 MacroInfo *MI) {
1145 return appendDefMacroDirective(II, MI, MI->getDefinitionLoc());
1146 }
1148 /// Set a MacroDirective that was loaded from a PCH file.
1149 void setLoadedMacroDirective(IdentifierInfo *II, MacroDirective *ED,
1150 MacroDirective *MD);
1152 /// Register an exported macro for a module and identifier.
1153 ModuleMacro *addModuleMacro(Module *Mod, IdentifierInfo *II, MacroInfo *Macro,
1154 ArrayRef<ModuleMacro *> Overrides, bool &IsNew);
1155 ModuleMacro *getModuleMacro(Module *Mod, const IdentifierInfo *II);
1157 /// Get the list of leaf (non-overridden) module macros for a name.
1158 ArrayRef<ModuleMacro*> getLeafModuleMacros(const IdentifierInfo *II) const {
1159 if (II->isOutOfDate())
1160 updateOutOfDateIdentifier(const_cast<IdentifierInfo&>(*II));
1161 auto I = LeafModuleMacros.find(II);
1162 if (I != LeafModuleMacros.end())
1163 return I->second;
1164 return None;
1165 }
1167 /// Get the list of submodules that we're currently building.
1168 ArrayRef<BuildingSubmoduleInfo> getBuildingSubmodules() const {
1169 return BuildingSubmoduleStack;
1170 }
1172 /// \{
1173 /// Iterators for the macro history table. Currently defined macros have
1174 /// IdentifierInfo::hasMacroDefinition() set and an empty
1175 /// MacroInfo::getUndefLoc() at the head of the list.
1176 using macro_iterator = MacroMap::const_iterator;
1178 macro_iterator macro_begin(bool IncludeExternalMacros = true) const;
1179 macro_iterator macro_end(bool IncludeExternalMacros = true) const;
1181 llvm::iterator_range<macro_iterator>
1182 macros(bool IncludeExternalMacros = true) const {
1183 macro_iterator begin = macro_begin(IncludeExternalMacros);
1184 macro_iterator end = macro_end(IncludeExternalMacros);
1185 return llvm::make_range(begin, end);
1186 }
1188 /// \}
1190 /// Return the name of the macro defined before \p Loc that has
1191 /// spelling \p Tokens. If there are multiple macros with same spelling,
1192 /// return the last one defined.
1193 StringRef getLastMacroWithSpelling(SourceLocation Loc,
1194 ArrayRef<TokenValue> Tokens) const;
1196 const std::string &getPredefines() const { return Predefines; }
1198 /// Set the predefines for this Preprocessor.
1199 ///
1200 /// These predefines are automatically injected when parsing the main file.
1201 void setPredefines(const char *P) { Predefines = P; }
1202 void setPredefines(StringRef P) { Predefines = std::string(P); }
1204 /// Return information about the specified preprocessor
1205 /// identifier token.
1206 IdentifierInfo *getIdentifierInfo(StringRef Name) const {
1207 return &Identifiers.get(Name);
1208 }
1210 /// Add the specified pragma handler to this preprocessor.
1211 ///
1212 /// If \p Namespace is non-null, then it is a token required to exist on the
1213 /// pragma line before the pragma string starts, e.g. "STDC" or "GCC".
1214 void AddPragmaHandler(StringRef Namespace, PragmaHandler *Handler);
1215 void AddPragmaHandler(PragmaHandler *Handler) {
1216 AddPragmaHandler(StringRef(), Handler);
1217 }
1219 /// Remove the specific pragma handler from this preprocessor.
1220 ///
1221 /// If \p Namespace is non-null, then it should be the namespace that
1222 /// \p Handler was added to. It is an error to remove a handler that
1223 /// has not been registered.
1224 void RemovePragmaHandler(StringRef Namespace, PragmaHandler *Handler);
1225 void RemovePragmaHandler(PragmaHandler *Handler) {
1226 RemovePragmaHandler(StringRef(), Handler);
1227 }
1229 /// Install empty handlers for all pragmas (making them ignored).
1230 void IgnorePragmas();
1232 /// Set empty line handler.
1233 void setEmptylineHandler(EmptylineHandler *Handler) { Emptyline = Handler; }
1235 EmptylineHandler *getEmptylineHandler() const { return Emptyline; }
1237 /// Add the specified comment handler to the preprocessor.
1238 void addCommentHandler(CommentHandler *Handler);
1240 /// Remove the specified comment handler.
1241 ///
1242 /// It is an error to remove a handler that has not been registered.
1243 void removeCommentHandler(CommentHandler *Handler);
1245 /// Set the code completion handler to the given object.
1246 void setCodeCompletionHandler(CodeCompletionHandler &Handler) {
1247 CodeComplete = &Handler;
1248 }
1250 /// Retrieve the current code-completion handler.
1251 CodeCompletionHandler *getCodeCompletionHandler() const {
1252 return CodeComplete;
1253 }
1255 /// Clear out the code completion handler.
1256 void clearCodeCompletionHandler() {
1257 CodeComplete = nullptr;
1258 }
1260 /// Hook used by the lexer to invoke the "included file" code
1261 /// completion point.
1262 void CodeCompleteIncludedFile(llvm::StringRef Dir, bool IsAngled);
1264 /// Hook used by the lexer to invoke the "natural language" code
1265 /// completion point.
1266 void CodeCompleteNaturalLanguage();
1268 /// Set the code completion token for filtering purposes.
1269 void setCodeCompletionIdentifierInfo(IdentifierInfo *Filter) {
1270 CodeCompletionII = Filter;
1271 }
1273 /// Set the code completion token range for detecting replacement range later
1274 /// on.
1275 void setCodeCompletionTokenRange(const SourceLocation Start,
1276 const SourceLocation End) {
1277 CodeCompletionTokenRange = {Start, End};
1278 }
1279 SourceRange getCodeCompletionTokenRange() const {
1280 return CodeCompletionTokenRange;
1281 }
1283 /// Get the code completion token for filtering purposes.
1284 StringRef getCodeCompletionFilter() {
1285 if (CodeCompletionII)
1286 return CodeCompletionII->getName();
1287 return {};
1288 }
1290 /// Retrieve the preprocessing record, or NULL if there is no
1291 /// preprocessing record.
1292 PreprocessingRecord *getPreprocessingRecord() const { return Record; }
1294 /// Create a new preprocessing record, which will keep track of
1295 /// all macro expansions, macro definitions, etc.
1296 void createPreprocessingRecord();
1298 /// Returns true if the FileEntry is the PCH through header.
1299 bool isPCHThroughHeader(const FileEntry *FE);
1301 /// True if creating a PCH with a through header.
1302 bool creatingPCHWithThroughHeader();
1304 /// True if using a PCH with a through header.
1305 bool usingPCHWithThroughHeader();
1307 /// True if creating a PCH with a #pragma hdrstop.
1308 bool creatingPCHWithPragmaHdrStop();
1310 /// True if using a PCH with a #pragma hdrstop.
1311 bool usingPCHWithPragmaHdrStop();
1313 /// Skip tokens until after the #include of the through header or
1314 /// until after a #pragma hdrstop.
1315 void SkipTokensWhileUsingPCH();
1317 /// Process directives while skipping until the through header or
1318 /// #pragma hdrstop is found.
1319 void HandleSkippedDirectiveWhileUsingPCH(Token &Result,
1320 SourceLocation HashLoc);
1322 /// Enter the specified FileID as the main source file,
1323 /// which implicitly adds the builtin defines etc.
1324 void EnterMainSourceFile();
1326 /// Inform the preprocessor callbacks that processing is complete.
1327 void EndSourceFile();
1329 /// Add a source file to the top of the include stack and
1330 /// start lexing tokens from it instead of the current buffer.
1331 ///
1332 /// Emits a diagnostic, doesn't enter the file, and returns true on error.
1333 bool EnterSourceFile(FileID FID, const DirectoryLookup *Dir,
1334 SourceLocation Loc);
1336 /// Add a Macro to the top of the include stack and start lexing
1337 /// tokens from it instead of the current buffer.
1338 ///
1339 /// \param Args specifies the tokens input to a function-like macro.
1340 /// \param ILEnd specifies the location of the ')' for a function-like macro
1341 /// or the identifier for an object-like macro.
1342 void EnterMacro(Token &Tok, SourceLocation ILEnd, MacroInfo *Macro,
1343 MacroArgs *Args);
1346 /// Add a "macro" context to the top of the include stack,
1347 /// which will cause the lexer to start returning the specified tokens.
1348 ///
1349 /// If \p DisableMacroExpansion is true, tokens lexed from the token stream
1350 /// will not be subject to further macro expansion. Otherwise, these tokens
1351 /// will be re-macro-expanded when/if expansion is enabled.
1352 ///
1353 /// If \p OwnsTokens is false, this method assumes that the specified stream
1354 /// of tokens has a permanent owner somewhere, so they do not need to be
1355 /// copied. If it is true, it assumes the array of tokens is allocated with
1356 /// \c new[] and the Preprocessor will delete[] it.
1357 ///
1358 /// If \p IsReinject the resulting tokens will have Token::IsReinjected flag
1359 /// set, see the flag documentation for details.
1360 void EnterTokenStream(const Token *Toks, unsigned NumToks,
1361 bool DisableMacroExpansion, bool OwnsTokens,
1362 bool IsReinject);
1365 void EnterTokenStream(std::unique_ptr<Token[]> Toks, unsigned NumToks,
1366 bool DisableMacroExpansion, bool IsReinject) {
1367 EnterTokenStream(Toks.release(), NumToks, DisableMacroExpansion, true,
1368 IsReinject);
1369 }
1371 void EnterTokenStream(ArrayRef<Token> Toks, bool DisableMacroExpansion,
1372 bool IsReinject) {
1373 EnterTokenStream(Toks.data(), Toks.size(), DisableMacroExpansion, false,
1374 IsReinject);
1375 }
1377 /// Pop the current lexer/macro exp off the top of the lexer stack.
1378 ///
1379 /// This should only be used in situations where the current state of the
1380 /// top-of-stack lexer is known.
1381 void RemoveTopOfLexerStack();
1383 /// From the point that this method is called, and until
1384 /// CommitBacktrackedTokens() or Backtrack() is called, the Preprocessor
1385 /// keeps track of the lexed tokens so that a subsequent Backtrack() call will
1386 /// make the Preprocessor re-lex the same tokens.
1387 ///
1388 /// Nested backtracks are allowed, meaning that EnableBacktrackAtThisPos can
1389 /// be called multiple times and CommitBacktrackedTokens/Backtrack calls will
1390 /// be combined with the EnableBacktrackAtThisPos calls in reverse order.
1391 ///
1392 /// NOTE: *DO NOT* forget to call either CommitBacktrackedTokens or Backtrack
1393 /// at some point after EnableBacktrackAtThisPos. If you don't, caching of
1394 /// tokens will continue indefinitely.
1395 ///
1396 void EnableBacktrackAtThisPos();
1398 /// Disable the last EnableBacktrackAtThisPos call.
1399 void CommitBacktrackedTokens();
1401 /// Make Preprocessor re-lex the tokens that were lexed since
1402 /// EnableBacktrackAtThisPos() was previously called.
1403 void Backtrack();
1405 /// True if EnableBacktrackAtThisPos() was called and
1406 /// caching of tokens is on.
1407 bool isBacktrackEnabled() const { return !BacktrackPositions.empty(); }
1409 /// Lex the next token for this preprocessor.
1410 void Lex(Token &Result);
1412 /// Lex a token, forming a header-name token if possible.
1413 bool LexHeaderName(Token &Result, bool AllowMacroExpansion = true);
1415 bool LexAfterModuleImport(Token &Result);
1416 void CollectPpImportSuffix(SmallVectorImpl<Token> &Toks);
1418 void makeModuleVisible(Module *M, SourceLocation Loc);
1420 SourceLocation getModuleImportLoc(Module *M) const {
1421 return CurSubmoduleState->VisibleModules.getImportLoc(M);
1422 }
1424 /// Lex a string literal, which may be the concatenation of multiple
1425 /// string literals and may even come from macro expansion.
1426 /// \returns true on success, false if a error diagnostic has been generated.
1427 bool LexStringLiteral(Token &Result, std::string &String,
1428 const char *DiagnosticTag, bool AllowMacroExpansion) {
1429 if (AllowMacroExpansion)
1430 Lex(Result);
1431 else
1432 LexUnexpandedToken(Result);
1433 return FinishLexStringLiteral(Result, String, DiagnosticTag,
1434 AllowMacroExpansion);
1435 }
1437 /// Complete the lexing of a string literal where the first token has
1438 /// already been lexed (see LexStringLiteral).
1439 bool FinishLexStringLiteral(Token &Result, std::string &String,
1440 const char *DiagnosticTag,
1441 bool AllowMacroExpansion);
1443 /// Lex a token. If it's a comment, keep lexing until we get
1444 /// something not a comment.
1445 ///
1446 /// This is useful in -E -C mode where comments would foul up preprocessor
1447 /// directive handling.
1448 void LexNonComment(Token &Result) {
1449 do
1450 Lex(Result);
1451 while (Result.getKind() == tok::comment);
1452 }
1454 /// Just like Lex, but disables macro expansion of identifier tokens.
1455 void LexUnexpandedToken(Token &Result) {
1456 // Disable macro expansion.
1457 bool OldVal = DisableMacroExpansion;
1458 DisableMacroExpansion = true;
1459 // Lex the token.
1460 Lex(Result);
1462 // Reenable it.
1463 DisableMacroExpansion = OldVal;
1464 }
1466 /// Like LexNonComment, but this disables macro expansion of
1467 /// identifier tokens.
1468 void LexUnexpandedNonComment(Token &Result) {
1469 do
1470 LexUnexpandedToken(Result);
1471 while (Result.getKind() == tok::comment);
1472 }
1474 /// Parses a simple integer literal to get its numeric value. Floating
1475 /// point literals and user defined literals are rejected. Used primarily to
1476 /// handle pragmas that accept integer arguments.
1477 bool parseSimpleIntegerLiteral(Token &Tok, uint64_t &Value);
1479 /// Disables macro expansion everywhere except for preprocessor directives.
1480 void SetMacroExpansionOnlyInDirectives() {
1481 DisableMacroExpansion = true;
1482 MacroExpansionInDirectivesOverride = true;
1483 }
1485 /// Peeks ahead N tokens and returns that token without consuming any
1486 /// tokens.
1487 ///
1488 /// LookAhead(0) returns the next token that would be returned by Lex(),
1489 /// LookAhead(1) returns the token after it, etc. This returns normal
1490 /// tokens after phase 5. As such, it is equivalent to using
1491 /// 'Lex', not 'LexUnexpandedToken'.
1492 const Token &LookAhead(unsigned N) {
1493 assert(LexLevel == 0 && "cannot use lookahead while lexing")((void)0);
1494 if (CachedLexPos + N < CachedTokens.size())
1495 return CachedTokens[CachedLexPos+N];
1496 else
1497 return PeekAhead(N+1);
1498 }
1500 /// When backtracking is enabled and tokens are cached,
1501 /// this allows to revert a specific number of tokens.
1502 ///
1503 /// Note that the number of tokens being reverted should be up to the last
1504 /// backtrack position, not more.
1505 void RevertCachedTokens(unsigned N) {
1506 assert(isBacktrackEnabled() &&((void)0)
1507 "Should only be called when tokens are cached for backtracking")((void)0);
1508 assert(signed(CachedLexPos) - signed(N) >= signed(BacktrackPositions.back())((void)0)
1509 && "Should revert tokens up to the last backtrack position, not more")((void)0);
1510 assert(signed(CachedLexPos) - signed(N) >= 0 &&((void)0)
1511 "Corrupted backtrack positions ?")((void)0);
1512 CachedLexPos -= N;
1513 }
1515 /// Enters a token in the token stream to be lexed next.
1516 ///
1517 /// If BackTrack() is called afterwards, the token will remain at the
1518 /// insertion point.
1519 /// If \p IsReinject is true, resulting token will have Token::IsReinjected
1520 /// flag set. See the flag documentation for details.
1521 void EnterToken(const Token &Tok, bool IsReinject) {
1522 if (LexLevel) {
1523 // It's not correct in general to enter caching lex mode while in the
1524 // middle of a nested lexing action.
1525 auto TokCopy = std::make_unique<Token[]>(1);
1526 TokCopy[0] = Tok;
1527 EnterTokenStream(std::move(TokCopy), 1, true, IsReinject);
1528 } else {
1529 EnterCachingLexMode();
1530 assert(IsReinject && "new tokens in the middle of cached stream")((void)0);
1531 CachedTokens.insert(CachedTokens.begin()+CachedLexPos, Tok);
1532 }
1533 }
1535 /// We notify the Preprocessor that if it is caching tokens (because
1536 /// backtrack is enabled) it should replace the most recent cached tokens
1537 /// with the given annotation token. This function has no effect if
1538 /// backtracking is not enabled.
1539 ///
1540 /// Note that the use of this function is just for optimization, so that the
1541 /// cached tokens doesn't get re-parsed and re-resolved after a backtrack is
1542 /// invoked.
1543 void AnnotateCachedTokens(const Token &Tok) {
1544 assert(Tok.isAnnotation() && "Expected annotation token")((void)0);
1545 if (CachedLexPos != 0 && isBacktrackEnabled())
1546 AnnotatePreviousCachedTokens(Tok);
1547 }
1549 /// Get the location of the last cached token, suitable for setting the end
1550 /// location of an annotation token.
1551 SourceLocation getLastCachedTokenLocation() const {
1552 assert(CachedLexPos != 0)((void)0);
1553 return CachedTokens[CachedLexPos-1].getLastLoc();
1554 }
1556 /// Whether \p Tok is the most recent token (`CachedLexPos - 1`) in
1557 /// CachedTokens.
1558 bool IsPreviousCachedToken(const Token &Tok) const;
1560 /// Replace token in `CachedLexPos - 1` in CachedTokens by the tokens
1561 /// in \p NewToks.
1562 ///
1563 /// Useful when a token needs to be split in smaller ones and CachedTokens
1564 /// most recent token must to be updated to reflect that.
1565 void ReplacePreviousCachedToken(ArrayRef<Token> NewToks);
1567 /// Replace the last token with an annotation token.
1568 ///
1569 /// Like AnnotateCachedTokens(), this routine replaces an
1570 /// already-parsed (and resolved) token with an annotation
1571 /// token. However, this routine only replaces the last token with
1572 /// the annotation token; it does not affect any other cached
1573 /// tokens. This function has no effect if backtracking is not
1574 /// enabled.
1575 void ReplaceLastTokenWithAnnotation(const Token &Tok) {
1576 assert(Tok.isAnnotation() && "Expected annotation token")((void)0);
1577 if (CachedLexPos != 0 && isBacktrackEnabled())
1578 CachedTokens[CachedLexPos-1] = Tok;
1579 }
1581 /// Enter an annotation token into the token stream.
1582 void EnterAnnotationToken(SourceRange Range, tok::TokenKind Kind,
1583 void *AnnotationVal);
1585 /// Determine whether it's possible for a future call to Lex to produce an
1586 /// annotation token created by a previous call to EnterAnnotationToken.
1587 bool mightHavePendingAnnotationTokens() {
1588 return CurLexerKind != CLK_Lexer;
1589 }
1591 /// Update the current token to represent the provided
1592 /// identifier, in order to cache an action performed by typo correction.
1593 void TypoCorrectToken(const Token &Tok) {
1594 assert(Tok.getIdentifierInfo() && "Expected identifier token")((void)0);
1595 if (CachedLexPos != 0 && isBacktrackEnabled())
1596 CachedTokens[CachedLexPos-1] = Tok;
1597 }
1599 /// Recompute the current lexer kind based on the CurLexer/
1600 /// CurTokenLexer pointers.
1601 void recomputeCurLexerKind();
1603 /// Returns true if incremental processing is enabled
1604 bool isIncrementalProcessingEnabled() const { return IncrementalProcessing; }
1606 /// Enables the incremental processing
1607 void enableIncrementalProcessing(bool value = true) {
1608 IncrementalProcessing = value;
1609 }
1611 /// Specify the point at which code-completion will be performed.
1612 ///
1613 /// \param File the file in which code completion should occur. If
1614 /// this file is included multiple times, code-completion will
1615 /// perform completion the first time it is included. If NULL, this
1616 /// function clears out the code-completion point.
1617 ///
1618 /// \param Line the line at which code completion should occur
1619 /// (1-based).
1620 ///
1621 /// \param Column the column at which code completion should occur
1622 /// (1-based).
1623 ///
1624 /// \returns true if an error occurred, false otherwise.
1625 bool SetCodeCompletionPoint(const FileEntry *File,
1626 unsigned Line, unsigned Column);
1628 /// Determine if we are performing code completion.
1629 bool isCodeCompletionEnabled() const { return CodeCompletionFile != nullptr; }
1631 /// Returns the location of the code-completion point.
1632 ///
1633 /// Returns an invalid location if code-completion is not enabled or the file
1634 /// containing the code-completion point has not been lexed yet.
1635 SourceLocation getCodeCompletionLoc() const { return CodeCompletionLoc; }
1637 /// Returns the start location of the file of code-completion point.
1638 ///
1639 /// Returns an invalid location if code-completion is not enabled or the file
1640 /// containing the code-completion point has not been lexed yet.
1641 SourceLocation getCodeCompletionFileLoc() const {
1642 return CodeCompletionFileLoc;
1643 }
1645 /// Returns true if code-completion is enabled and we have hit the
1646 /// code-completion point.
1647 bool isCodeCompletionReached() const { return CodeCompletionReached; }
1649 /// Note that we hit the code-completion point.
1650 void setCodeCompletionReached() {
1651 assert(isCodeCompletionEnabled() && "Code-completion not enabled!")((void)0);
1652 CodeCompletionReached = true;
1653 // Silence any diagnostics that occur after we hit the code-completion.
1654 getDiagnostics().setSuppressAllDiagnostics(true);
1655 }
1657 /// The location of the currently-active \#pragma clang
1658 /// arc_cf_code_audited begin.
1659 ///
1660 /// Returns an invalid location if there is no such pragma active.
1661 std::pair<IdentifierInfo *, SourceLocation>
1662 getPragmaARCCFCodeAuditedInfo() const {
1663 return PragmaARCCFCodeAuditedInfo;
1664 }
1666 /// Set the location of the currently-active \#pragma clang
1667 /// arc_cf_code_audited begin. An invalid location ends the pragma.
1668 void setPragmaARCCFCodeAuditedInfo(IdentifierInfo *Ident,
1669 SourceLocation Loc) {
1670 PragmaARCCFCodeAuditedInfo = {Ident, Loc};
1671 }
1673 /// The location of the currently-active \#pragma clang
1674 /// assume_nonnull begin.
1675 ///
1676 /// Returns an invalid location if there is no such pragma active.
1677 SourceLocation getPragmaAssumeNonNullLoc() const {
1678 return PragmaAssumeNonNullLoc;
1679 }
1681 /// Set the location of the currently-active \#pragma clang
1682 /// assume_nonnull begin. An invalid location ends the pragma.
1683 void setPragmaAssumeNonNullLoc(SourceLocation Loc) {
1684 PragmaAssumeNonNullLoc = Loc;
1685 }
1687 /// Set the directory in which the main file should be considered
1688 /// to have been found, if it is not a real file.
1689 void setMainFileDir(const DirectoryEntry *Dir) {
1690 MainFileDir = Dir;
1691 }
1693 /// Instruct the preprocessor to skip part of the main source file.
1694 ///
1695 /// \param Bytes The number of bytes in the preamble to skip.
1696 ///
1697 /// \param StartOfLine Whether skipping these bytes puts the lexer at the
1698 /// start of a line.
1699 void setSkipMainFilePreamble(unsigned Bytes, bool StartOfLine) {
1700 SkipMainFilePreamble.first = Bytes;
1701 SkipMainFilePreamble.second = StartOfLine;
1702 }
1704 /// Forwarding function for diagnostics. This emits a diagnostic at
1705 /// the specified Token's location, translating the token's start
1706 /// position in the current buffer into a SourcePosition object for rendering.
1707 DiagnosticBuilder Diag(SourceLocation Loc, unsigned DiagID) const {
1708 return Diags->Report(Loc, DiagID);
1709 }
1711 DiagnosticBuilder Diag(const Token &Tok, unsigned DiagID) const {
1712 return Diags->Report(Tok.getLocation(), DiagID);
1713 }
1715 /// Return the 'spelling' of the token at the given
1716 /// location; does not go up to the spelling location or down to the
1717 /// expansion location.
1718 ///
1719 /// \param buffer A buffer which will be used only if the token requires
1720 /// "cleaning", e.g. if it contains trigraphs or escaped newlines
1721 /// \param invalid If non-null, will be set \c true if an error occurs.
1722 StringRef getSpelling(SourceLocation loc,
1723 SmallVectorImpl<char> &buffer,
1724 bool *invalid = nullptr) const {
1725 return Lexer::getSpelling(loc, buffer, SourceMgr, LangOpts, invalid);
1726 }
1728 /// Return the 'spelling' of the Tok token.
1729 ///
1730 /// The spelling of a token is the characters used to represent the token in
1731 /// the source file after trigraph expansion and escaped-newline folding. In
1732 /// particular, this wants to get the true, uncanonicalized, spelling of
1733 /// things like digraphs, UCNs, etc.
1734 ///
1735 /// \param Invalid If non-null, will be set \c true if an error occurs.
1736 std::string getSpelling(const Token &Tok, bool *Invalid = nullptr) const {
1737 return Lexer::getSpelling(Tok, SourceMgr, LangOpts, Invalid);
1738 }
1740 /// Get the spelling of a token into a preallocated buffer, instead
1741 /// of as an std::string.
1742 ///
1743 /// The caller is required to allocate enough space for the token, which is
1744 /// guaranteed to be at least Tok.getLength() bytes long. The length of the
1745 /// actual result is returned.
1746 ///
1747 /// Note that this method may do two possible things: it may either fill in
1748 /// the buffer specified with characters, or it may *change the input pointer*
1749 /// to point to a constant buffer with the data already in it (avoiding a
1750 /// copy). The caller is not allowed to modify the returned buffer pointer
1751 /// if an internal buffer is returned.
1752 unsigned getSpelling(const Token &Tok, const char *&Buffer,
1753 bool *Invalid = nullptr) const {
1754 return Lexer::getSpelling(Tok, Buffer, SourceMgr, LangOpts, Invalid);
1755 }
1757 /// Get the spelling of a token into a SmallVector.
1758 ///
1759 /// Note that the returned StringRef may not point to the
1760 /// supplied buffer if a copy can be avoided.
1761 StringRef getSpelling(const Token &Tok,
1762 SmallVectorImpl<char> &Buffer,
1763 bool *Invalid = nullptr) const;
1765 /// Relex the token at the specified location.
1766 /// \returns true if there was a failure, false on success.
1767 bool getRawToken(SourceLocation Loc, Token &Result,
1768 bool IgnoreWhiteSpace = false) {
1769 return Lexer::getRawToken(Loc, Result, SourceMgr, LangOpts, IgnoreWhiteSpace);
1770 }
1772 /// Given a Token \p Tok that is a numeric constant with length 1,
1773 /// return the character.
1774 char
1775 getSpellingOfSingleCharacterNumericConstant(const Token &Tok,
1776 bool *Invalid = nullptr) const {
1777 assert(Tok.is(tok::numeric_constant) &&((void)0)
1778 Tok.getLength() == 1 && "Called on unsupported token")((void)0);
1779 assert(!Tok.needsCleaning() && "Token can't need cleaning with length 1")((void)0);
1781 // If the token is carrying a literal data pointer, just use it.
1782 if (const char *D = Tok.getLiteralData())
1783 return *D;
1785 // Otherwise, fall back on getCharacterData, which is slower, but always
1786 // works.
1787 return *SourceMgr.getCharacterData(Tok.getLocation(), Invalid);
1788 }
1790 /// Retrieve the name of the immediate macro expansion.
1791 ///
1792 /// This routine starts from a source location, and finds the name of the
1793 /// macro responsible for its immediate expansion. It looks through any
1794 /// intervening macro argument expansions to compute this. It returns a
1795 /// StringRef that refers to the SourceManager-owned buffer of the source
1796 /// where that macro name is spelled. Thus, the result shouldn't out-live
1797 /// the SourceManager.
1798 StringRef getImmediateMacroName(SourceLocation Loc) {
1799 return Lexer::getImmediateMacroName(Loc, SourceMgr, getLangOpts());
1800 }
1802 /// Plop the specified string into a scratch buffer and set the
1803 /// specified token's location and length to it.
1804 ///
1805 /// If specified, the source location provides a location of the expansion
1806 /// point of the token.
1807 void CreateString(StringRef Str, Token &Tok,
1808 SourceLocation ExpansionLocStart = SourceLocation(),
1809 SourceLocation ExpansionLocEnd = SourceLocation());
1811 /// Split the first Length characters out of the token starting at TokLoc
1812 /// and return a location pointing to the split token. Re-lexing from the
1813 /// split token will return the split token rather than the original.
1814 SourceLocation SplitToken(SourceLocation TokLoc, unsigned Length);
1816 /// Computes the source location just past the end of the
1817 /// token at this source location.
1818 ///
1819 /// This routine can be used to produce a source location that
1820 /// points just past the end of the token referenced by \p Loc, and
1821 /// is generally used when a diagnostic needs to point just after a
1822 /// token where it expected something different that it received. If
1823 /// the returned source location would not be meaningful (e.g., if
1824 /// it points into a macro), this routine returns an invalid
1825 /// source location.
1826 ///
1827 /// \param Offset an offset from the end of the token, where the source
1828 /// location should refer to. The default offset (0) produces a source
1829 /// location pointing just past the end of the token; an offset of 1 produces
1830 /// a source location pointing to the last character in the token, etc.
1831 SourceLocation getLocForEndOfToken(SourceLocation Loc, unsigned Offset = 0) {
1832 return Lexer::getLocForEndOfToken(Loc, Offset, SourceMgr, LangOpts);
1833 }
1835 /// Returns true if the given MacroID location points at the first
1836 /// token of the macro expansion.
1837 ///
1838 /// \param MacroBegin If non-null and function returns true, it is set to
1839 /// begin location of the macro.
1840 bool isAtStartOfMacroExpansion(SourceLocation loc,
1841 SourceLocation *MacroBegin = nullptr) const {
1842 return Lexer::isAtStartOfMacroExpansion(loc, SourceMgr, LangOpts,
1843 MacroBegin);
1844 }
1846 /// Returns true if the given MacroID location points at the last
1847 /// token of the macro expansion.
1848 ///
1849 /// \param MacroEnd If non-null and function returns true, it is set to
1850 /// end location of the macro.
1851 bool isAtEndOfMacroExpansion(SourceLocation loc,
1852 SourceLocation *MacroEnd = nullptr) const {
1853 return Lexer::isAtEndOfMacroExpansion(loc, SourceMgr, LangOpts, MacroEnd);
1854 }
1856 /// Print the token to stderr, used for debugging.
1857 void DumpToken(const Token &Tok, bool DumpFlags = false) const;
1858 void DumpLocation(SourceLocation Loc) const;
1859 void DumpMacro(const MacroInfo &MI) const;
1860 void dumpMacroInfo(const IdentifierInfo *II);
1862 /// Given a location that specifies the start of a
1863 /// token, return a new location that specifies a character within the token.
1864 SourceLocation AdvanceToTokenCharacter(SourceLocation TokStart,
1865 unsigned Char) const {
1866 return Lexer::AdvanceToTokenCharacter(TokStart, Char, SourceMgr, LangOpts);
1867 }
1869 /// Increment the counters for the number of token paste operations
1870 /// performed.
1871 ///
1872 /// If fast was specified, this is a 'fast paste' case we handled.
1873 void IncrementPasteCounter(bool isFast) {
1874 if (isFast)
1875 ++NumFastTokenPaste;
1876 else
1877 ++NumTokenPaste;
1878 }
1880 void PrintStats();
1882 size_t getTotalMemory() const;
1884 /// When the macro expander pastes together a comment (/##/) in Microsoft
1885 /// mode, this method handles updating the current state, returning the
1886 /// token on the next source line.
1887 void HandleMicrosoftCommentPaste(Token &Tok);
1889 //===--------------------------------------------------------------------===//
1890 // Preprocessor callback methods. These are invoked by a lexer as various
1891 // directives and events are found.
1893 /// Given a tok::raw_identifier token, look up the
1894 /// identifier information for the token and install it into the token,
1895 /// updating the token kind accordingly.
1896 IdentifierInfo *LookUpIdentifierInfo(Token &Identifier) const;
1899 llvm::DenseMap<IdentifierInfo*,unsigned> PoisonReasons;
1902 /// Specifies the reason for poisoning an identifier.
1903 ///
1904 /// If that identifier is accessed while poisoned, then this reason will be
1905 /// used instead of the default "poisoned" diagnostic.
1906 void SetPoisonReason(IdentifierInfo *II, unsigned DiagID);
1908 /// Display reason for poisoned identifier.
1909 void HandlePoisonedIdentifier(Token & Identifier);
1911 void MaybeHandlePoisonedIdentifier(Token & Identifier) {
1912 if(IdentifierInfo * II = Identifier.getIdentifierInfo()) {
1913 if(II->isPoisoned()) {
1914 HandlePoisonedIdentifier(Identifier);
1915 }
1916 }
1917 }
1920 /// Identifiers used for SEH handling in Borland. These are only
1921 /// allowed in particular circumstances
1922 // __except block
1923 IdentifierInfo *Ident__exception_code,
1924 *Ident___exception_code,
1925 *Ident_GetExceptionCode;
1926 // __except filter expression
1927 IdentifierInfo *Ident__exception_info,
1928 *Ident___exception_info,
1929 *Ident_GetExceptionInfo;
1930 // __finally
1931 IdentifierInfo *Ident__abnormal_termination,
1932 *Ident___abnormal_termination,
1933 *Ident_AbnormalTermination;
1935 const char *getCurLexerEndPos();
1936 void diagnoseMissingHeaderInUmbrellaDir(const Module &Mod);
1939 void PoisonSEHIdentifiers(bool Poison = true); // Borland
1941 /// Callback invoked when the lexer reads an identifier and has
1942 /// filled in the tokens IdentifierInfo member.
1943 ///
1944 /// This callback potentially macro expands it or turns it into a named
1945 /// token (like 'for').
1946 ///
1947 /// \returns true if we actually computed a token, false if we need to
1948 /// lex again.
1949 bool HandleIdentifier(Token &Identifier);
1951 /// Callback invoked when the lexer hits the end of the current file.
1952 ///
1953 /// This either returns the EOF token and returns true, or
1954 /// pops a level off the include stack and returns false, at which point the
1955 /// client should call lex again.
1956 bool HandleEndOfFile(Token &Result, SourceLocation Loc,
1957 bool isEndOfMacro = false);
1959 /// Callback invoked when the current TokenLexer hits the end of its
1960 /// token stream.
1961 bool HandleEndOfTokenLexer(Token &Result);
1963 /// Callback invoked when the lexer sees a # token at the start of a
1964 /// line.
1965 ///
1966 /// This consumes the directive, modifies the lexer/preprocessor state, and
1967 /// advances the lexer(s) so that the next token read is the correct one.
1968 void HandleDirective(Token &Result);
1970 /// Ensure that the next token is a tok::eod token.
1971 ///
1972 /// If not, emit a diagnostic and consume up until the eod.
1973 /// If \p EnableMacros is true, then we consider macros that expand to zero
1974 /// tokens as being ok.
1975 ///
1976 /// \return The location of the end of the directive (the terminating
1977 /// newline).
1978 SourceLocation CheckEndOfDirective(const char *DirType,
1979 bool EnableMacros = false);
1981 /// Read and discard all tokens remaining on the current line until
1982 /// the tok::eod token is found. Returns the range of the skipped tokens.
1983 SourceRange DiscardUntilEndOfDirective();
1985 /// Returns true if the preprocessor has seen a use of
1986 /// __DATE__ or __TIME__ in the file so far.
1987 bool SawDateOrTime() const {
1988 return DATELoc != SourceLocation() || TIMELoc != SourceLocation();
1989 }
1990 unsigned getCounterValue() const { return CounterValue; }
1991 void setCounterValue(unsigned V) { CounterValue = V; }
1993 /// Retrieves the module that we're currently building, if any.
1994 Module *getCurrentModule();
1996 /// Allocate a new MacroInfo object with the provided SourceLocation.
1997 MacroInfo *AllocateMacroInfo(SourceLocation L);
1999 /// Turn the specified lexer token into a fully checked and spelled
2000 /// filename, e.g. as an operand of \#include.
2001 ///
2002 /// The caller is expected to provide a buffer that is large enough to hold
2003 /// the spelling of the filename, but is also expected to handle the case
2004 /// when this method decides to use a different buffer.
2005 ///
2006 /// \returns true if the input filename was in <>'s or false if it was
2007 /// in ""'s.
2008 bool GetIncludeFilenameSpelling(SourceLocation Loc,StringRef &Buffer);
2010 /// Given a "foo" or \<foo> reference, look up the indicated file.
2011 ///
2012 /// Returns None on failure. \p isAngled indicates whether the file
2013 /// reference is for system \#include's or not (i.e. using <> instead of "").
2014 Optional<FileEntryRef>
2015 LookupFile(SourceLocation FilenameLoc, StringRef Filename, bool isAngled,
2016 const DirectoryLookup *FromDir, const FileEntry *FromFile,
2017 const DirectoryLookup *&CurDir, SmallVectorImpl<char> *SearchPath,
2018 SmallVectorImpl<char> *RelativePath,
2019 ModuleMap::KnownHeader *SuggestedModule, bool *IsMapped,
2020 bool *IsFrameworkFound, bool SkipCache = false);
2022 /// Get the DirectoryLookup structure used to find the current
2023 /// FileEntry, if CurLexer is non-null and if applicable.
2024 ///
2025 /// This allows us to implement \#include_next and find directory-specific
2026 /// properties.
2027 const DirectoryLookup *GetCurDirLookup() { return CurDirLookup; }
2029 /// Return true if we're in the top-level file, not in a \#include.
2030 bool isInPrimaryFile() const;
2032 /// Lex an on-off-switch (C99 6.10.6p2) and verify that it is
2033 /// followed by EOD. Return true if the token is not a valid on-off-switch.
2034 bool LexOnOffSwitch(tok::OnOffSwitch &Result);
2036 bool CheckMacroName(Token &MacroNameTok, MacroUse isDefineUndef,
2037 bool *ShadowFlag = nullptr);
2039 void EnterSubmodule(Module *M, SourceLocation ImportLoc, bool ForPragma);
2040 Module *LeaveSubmodule(bool ForPragma);
2043 friend void TokenLexer::ExpandFunctionArguments();
2045 void PushIncludeMacroStack() {
2046 assert(CurLexerKind != CLK_CachingLexer && "cannot push a caching lexer")((void)0);
2047 IncludeMacroStack.emplace_back(CurLexerKind, CurLexerSubmodule,
2048 std::move(CurLexer), CurPPLexer,
2049 std::move(CurTokenLexer), CurDirLookup);
2050 CurPPLexer = nullptr;
2051 }
2053 void PopIncludeMacroStack() {
2054 CurLexer = std::move(IncludeMacroStack.back().TheLexer);
2055 CurPPLexer = IncludeMacroStack.back().ThePPLexer;
2056 CurTokenLexer = std::move(IncludeMacroStack.back().TheTokenLexer);
2057 CurDirLookup = IncludeMacroStack.back().TheDirLookup;
2058 CurLexerSubmodule = IncludeMacroStack.back().TheSubmodule;
2059 CurLexerKind = IncludeMacroStack.back().CurLexerKind;
2060 IncludeMacroStack.pop_back();
2061 }
2063 void PropagateLineStartLeadingSpaceInfo(Token &Result);
2065 /// Determine whether we need to create module macros for #defines in the
2066 /// current context.
2067 bool needModuleMacros() const;
2069 /// Update the set of active module macros and ambiguity flag for a module
2070 /// macro name.
2071 void updateModuleMacroInfo(const IdentifierInfo *II, ModuleMacroInfo &Info);
2073 DefMacroDirective *AllocateDefMacroDirective(MacroInfo *MI,
2074 SourceLocation Loc);
2075 UndefMacroDirective *AllocateUndefMacroDirective(SourceLocation UndefLoc);
2076 VisibilityMacroDirective *AllocateVisibilityMacroDirective(SourceLocation Loc,
2077 bool isPublic);
2079 /// Lex and validate a macro name, which occurs after a
2080 /// \#define or \#undef.
2081 ///
2082 /// \param MacroNameTok Token that represents the name defined or undefined.
2083 /// \param IsDefineUndef Kind if preprocessor directive.
2084 /// \param ShadowFlag Points to flag that is set if macro name shadows
2085 /// a keyword.
2086 ///
2087 /// This emits a diagnostic, sets the token kind to eod,
2088 /// and discards the rest of the macro line if the macro name is invalid.
2089 void ReadMacroName(Token &MacroNameTok, MacroUse IsDefineUndef = MU_Other,
2090 bool *ShadowFlag = nullptr);
2092 /// ReadOptionalMacroParameterListAndBody - This consumes all (i.e. the
2093 /// entire line) of the macro's tokens and adds them to MacroInfo, and while
2094 /// doing so performs certain validity checks including (but not limited to):
2095 /// - # (stringization) is followed by a macro parameter
2096 /// \param MacroNameTok - Token that represents the macro name
2097 /// \param ImmediatelyAfterHeaderGuard - Macro follows an #ifdef header guard
2098 ///
2099 /// Either returns a pointer to a MacroInfo object OR emits a diagnostic and
2100 /// returns a nullptr if an invalid sequence of tokens is encountered.
2101 MacroInfo *ReadOptionalMacroParameterListAndBody(
2102 const Token &MacroNameTok, bool ImmediatelyAfterHeaderGuard);
2104 /// The ( starting an argument list of a macro definition has just been read.
2105 /// Lex the rest of the parameters and the closing ), updating \p MI with
2106 /// what we learn and saving in \p LastTok the last token read.
2107 /// Return true if an error occurs parsing the arg list.
2108 bool ReadMacroParameterList(MacroInfo *MI, Token& LastTok);
2110 /// We just read a \#if or related directive and decided that the
2111 /// subsequent tokens are in the \#if'd out portion of the
2112 /// file. Lex the rest of the file, until we see an \#endif. If \p
2113 /// FoundNonSkipPortion is true, then we have already emitted code for part of
2114 /// this \#if directive, so \#else/\#elif blocks should never be entered. If
2115 /// \p FoundElse is false, then \#else directives are ok, if not, then we have
2116 /// already seen one so a \#else directive is a duplicate. When this returns,
2117 /// the caller can lex the first valid token.
2118 void SkipExcludedConditionalBlock(SourceLocation HashTokenLoc,
2119 SourceLocation IfTokenLoc,
2120 bool FoundNonSkipPortion, bool FoundElse,
2121 SourceLocation ElseLoc = SourceLocation());
2123 /// Information about the result for evaluating an expression for a
2124 /// preprocessor directive.
2125 struct DirectiveEvalResult {
2126 /// Whether the expression was evaluated as true or not.
2127 bool Conditional;
2129 /// True if the expression contained identifiers that were undefined.
2130 bool IncludedUndefinedIds;
2132 /// The source range for the expression.
2133 SourceRange ExprRange;
2134 };
2136 /// Evaluate an integer constant expression that may occur after a
2137 /// \#if or \#elif directive and return a \p DirectiveEvalResult object.
2138 ///
2139 /// If the expression is equivalent to "!defined(X)" return X in IfNDefMacro.
2140 DirectiveEvalResult EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro);
2142 /// Install the standard preprocessor pragmas:
2143 /// \#pragma GCC poison/system_header/dependency and \#pragma once.
2144 void RegisterBuiltinPragmas();
2146 /// Register builtin macros such as __LINE__ with the identifier table.
2147 void RegisterBuiltinMacros();
2149 /// If an identifier token is read that is to be expanded as a macro, handle
2150 /// it and return the next token as 'Tok'. If we lexed a token, return true;
2151 /// otherwise the caller should lex again.
2152 bool HandleMacroExpandedIdentifier(Token &Identifier, const MacroDefinition &MD);
2154 /// Cache macro expanded tokens for TokenLexers.
2155 //
2156 /// Works like a stack; a TokenLexer adds the macro expanded tokens that is
2157 /// going to lex in the cache and when it finishes the tokens are removed
2158 /// from the end of the cache.
2159 Token *cacheMacroExpandedTokens(TokenLexer *tokLexer,
2160 ArrayRef<Token> tokens);
2162 void removeCachedMacroExpandedTokensOfLastLexer();
2164 /// Determine whether the next preprocessor token to be
2165 /// lexed is a '('. If so, consume the token and return true, if not, this
2166 /// method should have no observable side-effect on the lexed tokens.
2167 bool isNextPPTokenLParen();
2169 /// After reading "MACRO(", this method is invoked to read all of the formal
2170 /// arguments specified for the macro invocation. Returns null on error.
2171 MacroArgs *ReadMacroCallArgumentList(Token &MacroName, MacroInfo *MI,
2172 SourceLocation &MacroEnd);
2174 /// If an identifier token is read that is to be expanded
2175 /// as a builtin macro, handle it and return the next token as 'Tok'.
2176 void ExpandBuiltinMacro(Token &Tok);
2178 /// Read a \c _Pragma directive, slice it up, process it, then
2179 /// return the first token after the directive.
2180 /// This assumes that the \c _Pragma token has just been read into \p Tok.
2181 void Handle_Pragma(Token &Tok);
2183 /// Like Handle_Pragma except the pragma text is not enclosed within
2184 /// a string literal.
2185 void HandleMicrosoft__pragma(Token &Tok);
2187 /// Add a lexer to the top of the include stack and
2188 /// start lexing tokens from it instead of the current buffer.
2189 void EnterSourceFileWithLexer(Lexer *TheLexer, const DirectoryLookup *Dir);
2191 /// Set the FileID for the preprocessor predefines.
2192 void setPredefinesFileID(FileID FID) {
2193 assert(PredefinesFileID.isInvalid() && "PredefinesFileID already set!")((void)0);
2194 PredefinesFileID = FID;
2195 }
2197 /// Set the FileID for the PCH through header.
2198 void setPCHThroughHeaderFileID(FileID FID);
2200 /// Returns true if we are lexing from a file and not a
2201 /// pragma or a macro.
2202 static bool IsFileLexer(const Lexer* L, const PreprocessorLexer* P) {
2203 return L ? !L->isPragmaLexer() : P != nullptr;
2204 }
2206 static bool IsFileLexer(const IncludeStackInfo& I) {
2207 return IsFileLexer(I.TheLexer.get(), I.ThePPLexer);
2208 }
2210 bool IsFileLexer() const {
2211 return IsFileLexer(CurLexer.get(), CurPPLexer);
2212 }
2214 //===--------------------------------------------------------------------===//
2215 // Caching stuff.
2216 void CachingLex(Token &Result);
2218 bool InCachingLexMode() const {
2219 // If the Lexer pointers are 0 and IncludeMacroStack is empty, it means
2220 // that we are past EOF, not that we are in CachingLex mode.
2221 return !CurPPLexer && !CurTokenLexer && !IncludeMacroStack.empty();
2222 }
2224 void EnterCachingLexMode();
2225 void EnterCachingLexModeUnchecked();
2227 void ExitCachingLexMode() {
2228 if (InCachingLexMode())
2229 RemoveTopOfLexerStack();
2230 }
2232 const Token &PeekAhead(unsigned N);
2233 void AnnotatePreviousCachedTokens(const Token &Tok);
2235 //===--------------------------------------------------------------------===//
2236 /// Handle*Directive - implement the various preprocessor directives. These
2237 /// should side-effect the current preprocessor object so that the next call
2238 /// to Lex() will return the appropriate token next.
2239 void HandleLineDirective();
2240 void HandleDigitDirective(Token &Tok);
2241 void HandleUserDiagnosticDirective(Token &Tok, bool isWarning);
2242 void HandleIdentSCCSDirective(Token &Tok);
2243 void HandleMacroPublicDirective(Token &Tok);
2244 void HandleMacroPrivateDirective();
2246 /// An additional notification that can be produced by a header inclusion or
2247 /// import to tell the parser what happened.
2248 struct ImportAction {
2249 enum ActionKind {
2250 None,
2251 ModuleBegin,
2252 ModuleImport,
2253 SkippedModuleImport,
2254 Failure,
2255 } Kind;
2256 Module *ModuleForHeader = nullptr;
2258 ImportAction(ActionKind AK, Module *Mod = nullptr)
2259 : Kind(AK), ModuleForHeader(Mod) {
2260 assert((AK == None || Mod || AK == Failure) &&((void)0)
2261 "no module for module action")((void)0);
2262 }
2263 };
2265 Optional<FileEntryRef> LookupHeaderIncludeOrImport(
2266 const DirectoryLookup *&CurDir, StringRef &Filename,
2267 SourceLocation FilenameLoc, CharSourceRange FilenameRange,
2268 const Token &FilenameTok, bool &IsFrameworkFound, bool IsImportDecl,
2269 bool &IsMapped, const DirectoryLookup *LookupFrom,
2270 const FileEntry *LookupFromFile, StringRef &LookupFilename,
2271 SmallVectorImpl<char> &RelativePath, SmallVectorImpl<char> &SearchPath,
2272 ModuleMap::KnownHeader &SuggestedModule, bool isAngled);
2274 // File inclusion.
2275 void HandleIncludeDirective(SourceLocation HashLoc, Token &Tok,
2276 const DirectoryLookup *LookupFrom = nullptr,
2277 const FileEntry *LookupFromFile = nullptr);
2278 ImportAction
2279 HandleHeaderIncludeOrImport(SourceLocation HashLoc, Token &IncludeTok,
2280 Token &FilenameTok, SourceLocation EndLoc,
2281 const DirectoryLookup *LookupFrom = nullptr,
2282 const FileEntry *LookupFromFile = nullptr);
2283 void HandleIncludeNextDirective(SourceLocation HashLoc, Token &Tok);
2284 void HandleIncludeMacrosDirective(SourceLocation HashLoc, Token &Tok);
2285 void HandleImportDirective(SourceLocation HashLoc, Token &Tok);
2286 void HandleMicrosoftImportDirective(Token &Tok);
2289 /// Check that the given module is available, producing a diagnostic if not.
2290 /// \return \c true if the check failed (because the module is not available).
2291 /// \c false if the module appears to be usable.
2292 static bool checkModuleIsAvailable(const LangOptions &LangOpts,
2293 const TargetInfo &TargetInfo,
2294 DiagnosticsEngine &Diags, Module *M);
2296 // Module inclusion testing.
2297 /// Find the module that owns the source or header file that
2298 /// \p Loc points to. If the location is in a file that was included
2299 /// into a module, or is outside any module, returns nullptr.
2300 Module *getModuleForLocation(SourceLocation Loc);
2302 /// We want to produce a diagnostic at location IncLoc concerning an
2303 /// unreachable effect at location MLoc (eg, where a desired entity was
2304 /// declared or defined). Determine whether the right way to make MLoc
2305 /// reachable is by #include, and if so, what header should be included.
2306 ///
2307 /// This is not necessarily fast, and might load unexpected module maps, so
2308 /// should only be called by code that intends to produce an error.
2309 ///
2310 /// \param IncLoc The location at which the missing effect was detected.
2311 /// \param MLoc A location within an unimported module at which the desired
2312 /// effect occurred.
2313 /// \return A file that can be #included to provide the desired effect. Null
2314 /// if no such file could be determined or if a #include is not
2315 /// appropriate (eg, if a module should be imported instead).
2316 const FileEntry *getHeaderToIncludeForDiagnostics(SourceLocation IncLoc,
2317 SourceLocation MLoc);
2319 bool isRecordingPreamble() const {
2320 return PreambleConditionalStack.isRecording();
2321 }
2323 bool hasRecordedPreamble() const {
2324 return PreambleConditionalStack.hasRecordedPreamble();
2325 }
2327 ArrayRef<PPConditionalInfo> getPreambleConditionalStack() const {
2328 return PreambleConditionalStack.getStack();
2329 }
2331 void setRecordedPreambleConditionalStack(ArrayRef<PPConditionalInfo> s) {
2332 PreambleConditionalStack.setStack(s);
2333 }
2335 void setReplayablePreambleConditionalStack(ArrayRef<PPConditionalInfo> s,
2336 llvm::Optional<PreambleSkipInfo> SkipInfo) {
2337 PreambleConditionalStack.startReplaying();
2338 PreambleConditionalStack.setStack(s);
2339 PreambleConditionalStack.SkipInfo = SkipInfo;
2340 }
2342 llvm::Optional<PreambleSkipInfo> getPreambleSkipInfo() const {
2343 return PreambleConditionalStack.SkipInfo;
2344 }
2347 /// After processing predefined file, initialize the conditional stack from
2348 /// the preamble.
2349 void replayPreambleConditionalStack();
2351 // Macro handling.
2352 void HandleDefineDirective(Token &Tok, bool ImmediatelyAfterHeaderGuard);
2353 void HandleUndefDirective();
2355 // Conditional Inclusion.
2356 void HandleIfdefDirective(Token &Result, const Token &HashToken,
2357 bool isIfndef, bool ReadAnyTokensBeforeDirective);
2358 void HandleIfDirective(Token &IfToken, const Token &HashToken,
2359 bool ReadAnyTokensBeforeDirective);
2360 void HandleEndifDirective(Token &EndifToken);
2361 void HandleElseDirective(Token &Result, const Token &HashToken);
2362 void HandleElifFamilyDirective(Token &ElifToken, const Token &HashToken,
2363 tok::PPKeywordKind Kind);
2365 // Pragmas.
2366 void HandlePragmaDirective(PragmaIntroducer Introducer);
2367 void ResolvePragmaIncludeInstead(SourceLocation Location) const;
2370 void HandlePragmaOnce(Token &OnceTok);
2371 void HandlePragmaMark(Token &MarkTok);
2372 void HandlePragmaPoison();
2373 void HandlePragmaSystemHeader(Token &SysHeaderTok);
2374 void HandlePragmaIncludeInstead(Token &Tok);
2375 void HandlePragmaDependency(Token &DependencyTok);
2376 void HandlePragmaPushMacro(Token &Tok);
2377 void HandlePragmaPopMacro(Token &Tok);
2378 void HandlePragmaIncludeAlias(Token &Tok);
2379 void HandlePragmaModuleBuild(Token &Tok);
2380 void HandlePragmaHdrstop(Token &Tok);
2381 IdentifierInfo *ParsePragmaPushOrPopMacro(Token &Tok);
2383 // Return true and store the first token only if any CommentHandler
2384 // has inserted some tokens and getCommentRetentionState() is false.
2385 bool HandleComment(Token &result, SourceRange Comment);
2387 /// A macro is used, update information about macros that need unused
2388 /// warnings.
2389 void markMacroAsUsed(MacroInfo *MI);
2392 Optional<unsigned>
2393 getSkippedRangeForExcludedConditionalBlock(SourceLocation HashLoc);
2395 /// Contains the currently active skipped range mappings for skipping excluded
2396 /// conditional directives.
2397 ExcludedPreprocessorDirectiveSkipMapping
2398 *ExcludedConditionalDirectiveSkipMappings;
2401/// Abstract base class that describes a handler that will receive
2402/// source ranges for each of the comments encountered in the source file.
2403class CommentHandler {
2405 virtual ~CommentHandler();
2407 // The handler shall return true if it has pushed any tokens
2408 // to be read using e.g. EnterToken or EnterTokenStream.
2409 virtual bool HandleComment(Preprocessor &PP, SourceRange Comment) = 0;
2412/// Abstract base class that describes a handler that will receive
2413/// source ranges for empty lines encountered in the source file.
2414class EmptylineHandler {
2416 virtual ~EmptylineHandler();
2418 // The handler handles empty lines.
2419 virtual void HandleEmptyline(SourceRange Range) = 0;
2422/// Registry of pragma handlers added by plugins
2423using PragmaHandlerRegistry = llvm::Registry<PragmaHandler>;
2425} // namespace clang


1//===- Allocator.h - Simple memory allocation abstraction -------*- C++ -*-===//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
8/// \file
10/// This file defines the BumpPtrAllocator interface. BumpPtrAllocator conforms
11/// to the LLVM "Allocator" concept and is similar to MallocAllocator, but
12/// objects cannot be deallocated. Their lifetime is tied to the lifetime of the
13/// allocator.
20#include "llvm/ADT/Optional.h"
21#include "llvm/ADT/SmallVector.h"
22#include "llvm/Support/Alignment.h"
23#include "llvm/Support/AllocatorBase.h"
24#include "llvm/Support/Compiler.h"
25#include "llvm/Support/ErrorHandling.h"
26#include "llvm/Support/MathExtras.h"
27#include "llvm/Support/MemAlloc.h"
28#include <algorithm>
29#include <cassert>
30#include <cstddef>
31#include <cstdint>
32#include <cstdlib>
33#include <iterator>
34#include <type_traits>
35#include <utility>
37namespace llvm {
39namespace detail {
41// We call out to an external function to actually print the message as the
42// printing code uses Allocator.h in its implementation.
43void printBumpPtrAllocatorStats(unsigned NumSlabs, size_t BytesAllocated,
44 size_t TotalMemory);
46} // end namespace detail
48/// Allocate memory in an ever growing pool, as if by bump-pointer.
50/// This isn't strictly a bump-pointer allocator as it uses backing slabs of
51/// memory rather than relying on a boundless contiguous heap. However, it has
52/// bump-pointer semantics in that it is a monotonically growing pool of memory
53/// where every allocation is found by merely allocating the next N bytes in
54/// the slab, or the next N bytes in the next slab.
56/// Note that this also has a threshold for forcing allocations above a certain
57/// size into their own slab.
59/// The BumpPtrAllocatorImpl template defaults to using a MallocAllocator
60/// object, which wraps malloc, to allocate memory, but it can be changed to
61/// use a custom allocator.
63/// The GrowthDelay specifies after how many allocated slabs the allocator
64/// increases the size of the slabs.
65template <typename AllocatorT = MallocAllocator, size_t SlabSize = 4096,
66 size_t SizeThreshold = SlabSize, size_t GrowthDelay = 128>
67class BumpPtrAllocatorImpl
68 : public AllocatorBase<BumpPtrAllocatorImpl<AllocatorT, SlabSize,
69 SizeThreshold, GrowthDelay>>,
70 private AllocatorT {
72 static_assert(SizeThreshold <= SlabSize,
73 "The SizeThreshold must be at most the SlabSize to ensure "
74 "that objects larger than a slab go into their own memory "
75 "allocation.");
76 static_assert(GrowthDelay > 0,
77 "GrowthDelay must be at least 1 which already increases the"
78 "slab size after each allocated slab.");
80 BumpPtrAllocatorImpl() = default;
82 template <typename T>
83 BumpPtrAllocatorImpl(T &&Allocator)
84 : AllocatorT(std::forward<T &&>(Allocator)) {}
86 // Manually implement a move constructor as we must clear the old allocator's
87 // slabs as a matter of correctness.
88 BumpPtrAllocatorImpl(BumpPtrAllocatorImpl &&Old)
89 : AllocatorT(static_cast<AllocatorT &&>(Old)), CurPtr(Old.CurPtr),
90 End(Old.End), Slabs(std::move(Old.Slabs)),
91 CustomSizedSlabs(std::move(Old.CustomSizedSlabs)),
92 BytesAllocated(Old.BytesAllocated), RedZoneSize(Old.RedZoneSize) {
93 Old.CurPtr = Old.End = nullptr;
94 Old.BytesAllocated = 0;
95 Old.Slabs.clear();
96 Old.CustomSizedSlabs.clear();
97 }
99 ~BumpPtrAllocatorImpl() {
100 DeallocateSlabs(Slabs.begin(), Slabs.end());
101 DeallocateCustomSizedSlabs();
102 }
104 BumpPtrAllocatorImpl &operator=(BumpPtrAllocatorImpl &&RHS) {
105 DeallocateSlabs(Slabs.begin(), Slabs.end());
106 DeallocateCustomSizedSlabs();
108 CurPtr = RHS.CurPtr;
109 End = RHS.End;
110 BytesAllocated = RHS.BytesAllocated;
111 RedZoneSize = RHS.RedZoneSize;
112 Slabs = std::move(RHS.Slabs);
113 CustomSizedSlabs = std::move(RHS.CustomSizedSlabs);
114 AllocatorT::operator=(static_cast<AllocatorT &&>(RHS));
116 RHS.CurPtr = RHS.End = nullptr;
117 RHS.BytesAllocated = 0;
118 RHS.Slabs.clear();
119 RHS.CustomSizedSlabs.clear();
120 return *this;
121 }
123 /// Deallocate all but the current slab and reset the current pointer
124 /// to the beginning of it, freeing all memory allocated so far.
125 void Reset() {
126 // Deallocate all but the first slab, and deallocate all custom-sized slabs.
127 DeallocateCustomSizedSlabs();
128 CustomSizedSlabs.clear();
130 if (Slabs.empty())
131 return;
133 // Reset the state.
134 BytesAllocated = 0;
135 CurPtr = (char *)Slabs.front();
136 End = CurPtr + SlabSize;
138 __asan_poison_memory_region(*Slabs.begin(), computeSlabSize(0));
139 DeallocateSlabs(std::next(Slabs.begin()), Slabs.end());
140 Slabs.erase(std::next(Slabs.begin()), Slabs.end());
141 }
143 /// Allocate space at the specified alignment.
144 LLVM_ATTRIBUTE_RETURNS_NONNULL__attribute__((returns_nonnull)) LLVM_ATTRIBUTE_RETURNS_NOALIAS__attribute__((__malloc__)) void *
145 Allocate(size_t Size, Align Alignment) {
146 // Keep track of how many bytes we've allocated.
147 BytesAllocated += Size;
149 size_t Adjustment = offsetToAlignedAddr(CurPtr, Alignment);
Calling 'offsetToAlignedAddr'
150 assert(Adjustment + Size >= Size && "Adjustment + Size must not overflow")((void)0);
152 size_t SizeToAllocate = Size;
154 // Add trailing bytes as a "red zone" under ASan.
155 SizeToAllocate += RedZoneSize;
158 // Check if we have enough space.
159 if (Adjustment + SizeToAllocate <= size_t(End - CurPtr)) {
160 char *AlignedPtr = CurPtr + Adjustment;
161 CurPtr = AlignedPtr + SizeToAllocate;
162 // Update the allocation point of this memory block in MemorySanitizer.
163 // Without this, MemorySanitizer messages for values originated from here
164 // will point to the allocation of the entire slab.
165 __msan_allocated_memory(AlignedPtr, Size);
166 // Similarly, tell ASan about this space.
167 __asan_unpoison_memory_region(AlignedPtr, Size);
168 return AlignedPtr;
169 }
171 // If Size is really big, allocate a separate slab for it.
172 size_t PaddedSize = SizeToAllocate + Alignment.value() - 1;
173 if (PaddedSize > SizeThreshold) {
174 void *NewSlab =
175 AllocatorT::Allocate(PaddedSize, alignof(std::max_align_t));
176 // We own the new slab and don't want anyone reading anyting other than
177 // pieces returned from this method. So poison the whole slab.
178 __asan_poison_memory_region(NewSlab, PaddedSize);
179 CustomSizedSlabs.push_back(std::make_pair(NewSlab, PaddedSize));
181 uintptr_t AlignedAddr = alignAddr(NewSlab, Alignment);
182 assert(AlignedAddr + Size <= (uintptr_t)NewSlab + PaddedSize)((void)0);
183 char *AlignedPtr = (char*)AlignedAddr;
184 __msan_allocated_memory(AlignedPtr, Size);
185 __asan_unpoison_memory_region(AlignedPtr, Size);
186 return AlignedPtr;
187 }
189 // Otherwise, start a new slab and try again.
190 StartNewSlab();
191 uintptr_t AlignedAddr = alignAddr(CurPtr, Alignment);
192 assert(AlignedAddr + SizeToAllocate <= (uintptr_t)End &&((void)0)
193 "Unable to allocate memory!")((void)0);
194 char *AlignedPtr = (char*)AlignedAddr;
195 CurPtr = AlignedPtr + SizeToAllocate;
196 __msan_allocated_memory(AlignedPtr, Size);
197 __asan_unpoison_memory_region(AlignedPtr, Size);
198 return AlignedPtr;
199 }
201 inline LLVM_ATTRIBUTE_RETURNS_NONNULL__attribute__((returns_nonnull)) LLVM_ATTRIBUTE_RETURNS_NOALIAS__attribute__((__malloc__)) void *
202 Allocate(size_t Size, size_t Alignment) {
203 assert(Alignment > 0 && "0-byte alignment is not allowed. Use 1 instead.")((void)0);
204 return Allocate(Size, Align(Alignment));
Calling 'BumpPtrAllocatorImpl::Allocate'
205 }
207 // Pull in base class overloads.
208 using AllocatorBase<BumpPtrAllocatorImpl>::Allocate;
210 // Bump pointer allocators are expected to never free their storage; and
211 // clients expect pointers to remain valid for non-dereferencing uses even
212 // after deallocation.
213 void Deallocate(const void *Ptr, size_t Size, size_t /*Alignment*/) {
214 __asan_poison_memory_region(Ptr, Size);
215 }
217 // Pull in base class overloads.
218 using AllocatorBase<BumpPtrAllocatorImpl>::Deallocate;
220 size_t GetNumSlabs() const { return Slabs.size() + CustomSizedSlabs.size(); }
222 /// \return An index uniquely and reproducibly identifying
223 /// an input pointer \p Ptr in the given allocator.
224 /// The returned value is negative iff the object is inside a custom-size
225 /// slab.
226 /// Returns an empty optional if the pointer is not found in the allocator.
227 llvm::Optional<int64_t> identifyObject(const void *Ptr) {
228 const char *P = static_cast<const char *>(Ptr);
229 int64_t InSlabIdx = 0;
230 for (size_t Idx = 0, E = Slabs.size(); Idx < E; Idx++) {
231 const char *S = static_cast<const char *>(Slabs[Idx]);
232 if (P >= S && P < S + computeSlabSize(Idx))
233 return InSlabIdx + static_cast<int64_t>(P - S);
234 InSlabIdx += static_cast<int64_t>(computeSlabSize(Idx));
235 }
237 // Use negative index to denote custom sized slabs.
238 int64_t InCustomSizedSlabIdx = -1;
239 for (size_t Idx = 0, E = CustomSizedSlabs.size(); Idx < E; Idx++) {
240 const char *S = static_cast<const char *>(CustomSizedSlabs[Idx].first);
241 size_t Size = CustomSizedSlabs[Idx].second;
242 if (P >= S && P < S + Size)
243 return InCustomSizedSlabIdx - static_cast<int64_t>(P - S);
244 InCustomSizedSlabIdx -= static_cast<int64_t>(Size);
245 }
246 return None;
247 }
249 /// A wrapper around identifyObject that additionally asserts that
250 /// the object is indeed within the allocator.
251 /// \return An index uniquely and reproducibly identifying
252 /// an input pointer \p Ptr in the given allocator.
253 int64_t identifyKnownObject(const void *Ptr) {
254 Optional<int64_t> Out = identifyObject(Ptr);
255 assert(Out && "Wrong allocator used")((void)0);
256 return *Out;
257 }
259 /// A wrapper around identifyKnownObject. Accepts type information
260 /// about the object and produces a smaller identifier by relying on
261 /// the alignment information. Note that sub-classes may have different
262 /// alignment, so the most base class should be passed as template parameter
263 /// in order to obtain correct results. For that reason automatic template
264 /// parameter deduction is disabled.
265 /// \return An index uniquely and reproducibly identifying
266 /// an input pointer \p Ptr in the given allocator. This identifier is
267 /// different from the ones produced by identifyObject and
268 /// identifyAlignedObject.
269 template <typename T>
270 int64_t identifyKnownAlignedObject(const void *Ptr) {
271 int64_t Out = identifyKnownObject(Ptr);
272 assert(Out % alignof(T) == 0 && "Wrong alignment information")((void)0);
273 return Out / alignof(T);
274 }
276 size_t getTotalMemory() const {
277 size_t TotalMemory = 0;
278 for (auto I = Slabs.begin(), E = Slabs.end(); I != E; ++I)
279 TotalMemory += computeSlabSize(std::distance(Slabs.begin(), I));
280 for (auto &PtrAndSize : CustomSizedSlabs)
281 TotalMemory += PtrAndSize.second;
282 return TotalMemory;
283 }
285 size_t getBytesAllocated() const { return BytesAllocated; }
287 void setRedZoneSize(size_t NewSize) {
288 RedZoneSize = NewSize;
289 }
291 void PrintStats() const {
292 detail::printBumpPtrAllocatorStats(Slabs.size(), BytesAllocated,
293 getTotalMemory());
294 }
297 /// The current pointer into the current slab.
298 ///
299 /// This points to the next free byte in the slab.
300 char *CurPtr = nullptr;
302 /// The end of the current slab.
303 char *End = nullptr;
305 /// The slabs allocated so far.
306 SmallVector<void *, 4> Slabs;
308 /// Custom-sized slabs allocated for too-large allocation requests.
309 SmallVector<std::pair<void *, size_t>, 0> CustomSizedSlabs;
311 /// How many bytes we've allocated.
312 ///
313 /// Used so that we can compute how much space was wasted.
314 size_t BytesAllocated = 0;
316 /// The number of bytes to put between allocations when running under
317 /// a sanitizer.
318 size_t RedZoneSize = 1;
320 static size_t computeSlabSize(unsigned SlabIdx) {
321 // Scale the actual allocated slab size based on the number of slabs
322 // allocated. Every GrowthDelay slabs allocated, we double
323 // the allocated size to reduce allocation frequency, but saturate at
324 // multiplying the slab size by 2^30.
325 return SlabSize *
326 ((size_t)1 << std::min<size_t>(30, SlabIdx / GrowthDelay));
327 }
329 /// Allocate a new slab and move the bump pointers over into the new
330 /// slab, modifying CurPtr and End.
331 void StartNewSlab() {
332 size_t AllocatedSlabSize = computeSlabSize(Slabs.size());
334 void *NewSlab =
335 AllocatorT::Allocate(AllocatedSlabSize, alignof(std::max_align_t));
336 // We own the new slab and don't want anyone reading anything other than
337 // pieces returned from this method. So poison the whole slab.
338 __asan_poison_memory_region(NewSlab, AllocatedSlabSize);
340 Slabs.push_back(NewSlab);
341 CurPtr = (char *)(NewSlab);
342 End = ((char *)NewSlab) + AllocatedSlabSize;
343 }
345 /// Deallocate a sequence of slabs.
346 void DeallocateSlabs(SmallVectorImpl<void *>::iterator I,
347 SmallVectorImpl<void *>::iterator E) {
348 for (; I != E; ++I) {
349 size_t AllocatedSlabSize =
350 computeSlabSize(std::distance(Slabs.begin(), I));
351 AllocatorT::Deallocate(*I, AllocatedSlabSize, alignof(std::max_align_t));
352 }
353 }
355 /// Deallocate all memory for custom sized slabs.
356 void DeallocateCustomSizedSlabs() {
357 for (auto &PtrAndSize : CustomSizedSlabs) {
358 void *Ptr = PtrAndSize.first;
359 size_t Size = PtrAndSize.second;
360 AllocatorT::Deallocate(Ptr, Size, alignof(std::max_align_t));
361 }
362 }
364 template <typename T> friend class SpecificBumpPtrAllocator;
367/// The standard BumpPtrAllocator which just uses the default template
368/// parameters.
369typedef BumpPtrAllocatorImpl<> BumpPtrAllocator;
371/// A BumpPtrAllocator that allows only elements of a specific type to be
372/// allocated.
374/// This allows calling the destructor in DestroyAll() and when the allocator is
375/// destroyed.
376template <typename T> class SpecificBumpPtrAllocator {
377 BumpPtrAllocator Allocator;
380 SpecificBumpPtrAllocator() {
381 // Because SpecificBumpPtrAllocator walks the memory to call destructors,
382 // it can't have red zones between allocations.
383 Allocator.setRedZoneSize(0);
384 }
385 SpecificBumpPtrAllocator(SpecificBumpPtrAllocator &&Old)
386 : Allocator(std::move(Old.Allocator)) {}
387 ~SpecificBumpPtrAllocator() { DestroyAll(); }
389 SpecificBumpPtrAllocator &operator=(SpecificBumpPtrAllocator &&RHS) {
390 Allocator = std::move(RHS.Allocator);
391 return *this;
392 }
394 /// Call the destructor of each allocated object and deallocate all but the
395 /// current slab and reset the current pointer to the beginning of it, freeing
396 /// all memory allocated so far.
397 void DestroyAll() {
398 auto DestroyElements = [](char *Begin, char *End) {
399 assert(Begin == (char *)alignAddr(Begin, Align::Of<T>()))((void)0);
400 for (char *Ptr = Begin; Ptr + sizeof(T) <= End; Ptr += sizeof(T))
401 reinterpret_cast<T *>(Ptr)->~T();
402 };
404 for (auto I = Allocator.Slabs.begin(), E = Allocator.Slabs.end(); I != E;
405 ++I) {
406 size_t AllocatedSlabSize = BumpPtrAllocator::computeSlabSize(
407 std::distance(Allocator.Slabs.begin(), I));
408 char *Begin = (char *)alignAddr(*I, Align::Of<T>());
409 char *End = *I == Allocator.Slabs.back() ? Allocator.CurPtr
410 : (char *)*I + AllocatedSlabSize;
412 DestroyElements(Begin, End);
413 }
415 for (auto &PtrAndSize : Allocator.CustomSizedSlabs) {
416 void *Ptr = PtrAndSize.first;
417 size_t Size = PtrAndSize.second;
418 DestroyElements((char *)alignAddr(Ptr, Align::Of<T>()),
419 (char *)Ptr + Size);
420 }
422 Allocator.Reset();
423 }
425 /// Allocate space for an array of objects without constructing them.
426 T *Allocate(size_t num = 1) { return Allocator.Allocate<T>(num); }
429} // end namespace llvm
431template <typename AllocatorT, size_t SlabSize, size_t SizeThreshold,
432 size_t GrowthDelay>
433void *
434operator new(size_t Size,
435 llvm::BumpPtrAllocatorImpl<AllocatorT, SlabSize, SizeThreshold,
436 GrowthDelay> &Allocator) {
437 return Allocator.Allocate(Size, std::min((size_t)llvm::NextPowerOf2(Size),
Calling 'BumpPtrAllocatorImpl::Allocate'
438 alignof(std::max_align_t)));
441template <typename AllocatorT, size_t SlabSize, size_t SizeThreshold,
442 size_t GrowthDelay>
443void operator delete(void *,
444 llvm::BumpPtrAllocatorImpl<AllocatorT, SlabSize,
445 SizeThreshold, GrowthDelay> &) {


1//===-- llvm/Support/Alignment.h - Useful alignment functions ---*- C++ -*-===//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
9// This file contains types to represent alignments.
10// They are instrumented to guarantee some invariants are preserved and prevent
11// invalid manipulations.
13// - Align represents an alignment in bytes, it is always set and always a valid
14// power of two, its minimum value is 1 which means no alignment requirements.
16// - MaybeAlign is an optional type, it may be undefined or set. When it's set
17// you can get the underlying Align type by using the getValue() method.
24#include "llvm/ADT/Optional.h"
25#include "llvm/Support/MathExtras.h"
26#include <cassert>
27#ifndef NDEBUG1
28#include <string>
29#endif // NDEBUG
31namespace llvm {
33#define ALIGN_CHECK_ISPOSITIVE(decl) \
34 assert(decl > 0 && (#decl " should be defined"))((void)0)
36/// This struct is a compact representation of a valid (non-zero power of two)
37/// alignment.
38/// It is suitable for use as static global constants.
39struct Align {
41 uint8_t ShiftValue = 0; /// The log2 of the required alignment.
42 /// ShiftValue is less than 64 by construction.
44 friend struct MaybeAlign;
45 friend unsigned Log2(Align);
46 friend bool operator==(Align Lhs, Align Rhs);
47 friend bool operator!=(Align Lhs, Align Rhs);
48 friend bool operator<=(Align Lhs, Align Rhs);
49 friend bool operator>=(Align Lhs, Align Rhs);
50 friend bool operator<(Align Lhs, Align Rhs);
51 friend bool operator>(Align Lhs, Align Rhs);
52 friend unsigned encode(struct MaybeAlign A);
53 friend struct MaybeAlign decodeMaybeAlign(unsigned Value);
55 /// A trivial type to allow construction of constexpr Align.
56 /// This is currently needed to workaround a bug in GCC 5.3 which prevents
57 /// definition of constexpr assign operators.
58 /// https://stackoverflow.com/questions/46756288/explicitly-defaulted-function-cannot-be-declared-as-constexpr-because-the-implic
59 /// FIXME: Remove this, make all assign operators constexpr and introduce user
60 /// defined literals when we don't have to support GCC 5.3 anymore.
61 /// https://llvm.org/docs/GettingStarted.html#getting-a-modern-host-c-toolchain
62 struct LogValue {
63 uint8_t Log;
64 };
67 /// Default is byte-aligned.
68 constexpr Align() = default;
69 /// Do not perform checks in case of copy/move construct/assign, because the
70 /// checks have been performed when building `Other`.
71 constexpr Align(const Align &Other) = default;
72 constexpr Align(Align &&Other) = default;
73 Align &operator=(const Align &Other) = default;
74 Align &operator=(Align &&Other) = default;
76 explicit Align(uint64_t Value) {
77 assert(Value > 0 && "Value must not be 0")((void)0);
78 assert(llvm::isPowerOf2_64(Value) && "Alignment is not a power of 2")((void)0);
79 ShiftValue = Log2_64(Value);
80 assert(ShiftValue < 64 && "Broken invariant")((void)0);
81 }
83 /// This is a hole in the type system and should not be abused.
84 /// Needed to interact with C for instance.
85 uint64_t value() const { return uint64_t(1) << ShiftValue; }
The result of the left shift is undefined due to shifting by '255', which is greater or equal to the width of type 'uint64_t'
87 /// Allow constructions of constexpr Align.
88 template <size_t kValue> constexpr static LogValue Constant() {
89 return LogValue{static_cast<uint8_t>(CTLog2<kValue>())};
90 }
92 /// Allow constructions of constexpr Align from types.
93 /// Compile time equivalent to Align(alignof(T)).
94 template <typename T> constexpr static LogValue Of() {
95 return Constant<std::alignment_of<T>::value>();
96 }
98 /// Constexpr constructor from LogValue type.
99 constexpr Align(LogValue CA) : ShiftValue(CA.Log) {}
102/// Treats the value 0 as a 1, so Align is always at least 1.
103inline Align assumeAligned(uint64_t Value) {
104 return Value ? Align(Value) : Align();
107/// This struct is a compact representation of a valid (power of two) or
108/// undefined (0) alignment.
109struct MaybeAlign : public llvm::Optional<Align> {
111 using UP = llvm::Optional<Align>;
114 /// Default is undefined.
115 MaybeAlign() = default;
116 /// Do not perform checks in case of copy/move construct/assign, because the
117 /// checks have been performed when building `Other`.
118 MaybeAlign(const MaybeAlign &Other) = default;
119 MaybeAlign &operator=(const MaybeAlign &Other) = default;
120 MaybeAlign(MaybeAlign &&Other) = default;
121 MaybeAlign &operator=(MaybeAlign &&Other) = default;
123 /// Use llvm::Optional<Align> constructor.
124 using UP::UP;
126 explicit MaybeAlign(uint64_t Value) {
127 assert((Value == 0 || llvm::isPowerOf2_64(Value)) &&((void)0)
128 "Alignment is neither 0 nor a power of 2")((void)0);
129 if (Value)
130 emplace(Value);
131 }
133 /// For convenience, returns a valid alignment or 1 if undefined.
134 Align valueOrOne() const { return hasValue() ? getValue() : Align(); }
137/// Checks that SizeInBytes is a multiple of the alignment.
138inline bool isAligned(Align Lhs, uint64_t SizeInBytes) {
139 return SizeInBytes % Lhs.value() == 0;
142/// Checks that Addr is a multiple of the alignment.
143inline bool isAddrAligned(Align Lhs, const void *Addr) {
144 return isAligned(Lhs, reinterpret_cast<uintptr_t>(Addr));
147/// Returns a multiple of A needed to store `Size` bytes.
148inline uint64_t alignTo(uint64_t Size, Align A) {
149 const uint64_t Value = A.value();
Calling 'Align::value'
150 // The following line is equivalent to `(Size + Value - 1) / Value * Value`.
152 // The division followed by a multiplication can be thought of as a right
153 // shift followed by a left shift which zeros out the extra bits produced in
154 // the bump; `~(Value - 1)` is a mask where all those bits being zeroed out
155 // are just zero.
157 // Most compilers can generate this code but the pattern may be missed when
158 // multiple functions gets inlined.
159 return (Size + Value - 1) & ~(Value - 1U);
162/// If non-zero \p Skew is specified, the return value will be a minimal integer
163/// that is greater than or equal to \p Size and equal to \p A * N + \p Skew for
164/// some integer N. If \p Skew is larger than \p A, its value is adjusted to '\p
165/// Skew mod \p A'.
167/// Examples:
168/// \code
169/// alignTo(5, Align(8), 7) = 7
170/// alignTo(17, Align(8), 1) = 17
171/// alignTo(~0LL, Align(8), 3) = 3
172/// \endcode
173inline uint64_t alignTo(uint64_t Size, Align A, uint64_t Skew) {
174 const uint64_t Value = A.value();
175 Skew %= Value;
176 return ((Size + Value - 1 - Skew) & ~(Value - 1U)) + Skew;
179/// Returns a multiple of A needed to store `Size` bytes.
180/// Returns `Size` if current alignment is undefined.
181inline uint64_t alignTo(uint64_t Size, MaybeAlign A) {
182 return A ? alignTo(Size, A.getValue()) : Size;
185/// Aligns `Addr` to `Alignment` bytes, rounding up.
186inline uintptr_t alignAddr(const void *Addr, Align Alignment) {
187 uintptr_t ArithAddr = reinterpret_cast<uintptr_t>(Addr);
188 assert(static_cast<uintptr_t>(ArithAddr + Alignment.value() - 1) >=((void)0)
189 ArithAddr &&((void)0)
190 "Overflow")((void)0);
191 return alignTo(ArithAddr, Alignment);
194/// Returns the offset to the next integer (mod 2**64) that is greater than
195/// or equal to \p Value and is a multiple of \p Align.
196inline uint64_t offsetToAlignment(uint64_t Value, Align Alignment) {
197 return alignTo(Value, Alignment) - Value;
The value 255 is assigned to 'A.ShiftValue'
Calling 'alignTo'
200/// Returns the necessary adjustment for aligning `Addr` to `Alignment`
201/// bytes, rounding up.
202inline uint64_t offsetToAlignedAddr(const void *Addr, Align Alignment) {
203 return offsetToAlignment(reinterpret_cast<uintptr_t>(Addr), Alignment);
Calling 'offsetToAlignment'
206/// Returns the log2 of the alignment.
207inline unsigned Log2(Align A) { return A.ShiftValue; }
209/// Returns the alignment that satisfies both alignments.
210/// Same semantic as MinAlign.
211inline Align commonAlignment(Align A, Align B) { return std::min(A, B); }
213/// Returns the alignment that satisfies both alignments.
214/// Same semantic as MinAlign.
215inline Align commonAlignment(Align A, uint64_t Offset) {
216 return Align(MinAlign(A.value(), Offset));
219/// Returns the alignment that satisfies both alignments.
220/// Same semantic as MinAlign.
221inline MaybeAlign commonAlignment(MaybeAlign A, MaybeAlign B) {
222 return A && B ? commonAlignment(*A, *B) : A ? A : B;
225/// Returns the alignment that satisfies both alignments.
226/// Same semantic as MinAlign.
227inline MaybeAlign commonAlignment(MaybeAlign A, uint64_t Offset) {
228 return MaybeAlign(MinAlign((*A).value(), Offset));
231/// Returns a representation of the alignment that encodes undefined as 0.
232inline unsigned encode(MaybeAlign A) { return A ? A->ShiftValue + 1 : 0; }
234/// Dual operation of the encode function above.
235inline MaybeAlign decodeMaybeAlign(unsigned Value) {
236 if (Value == 0)
237 return MaybeAlign();
238 Align Out;
239 Out.ShiftValue = Value - 1;
240 return Out;
243/// Returns a representation of the alignment, the encoded value is positive by
244/// definition.
245inline unsigned encode(Align A) { return encode(MaybeAlign(A)); }
247/// Comparisons between Align and scalars. Rhs must be positive.
248inline bool operator==(Align Lhs, uint64_t Rhs) {
250 return Lhs.value() == Rhs;
252inline bool operator!=(Align Lhs, uint64_t Rhs) {
254 return Lhs.value() != Rhs;
256inline bool operator<=(Align Lhs, uint64_t Rhs) {
258 return Lhs.value() <= Rhs;
260inline bool operator>=(Align Lhs, uint64_t Rhs) {
262 return Lhs.value() >= Rhs;
264inline bool operator<(Align Lhs, uint64_t Rhs) {
266 return Lhs.value() < Rhs;
268inline bool operator>(Align Lhs, uint64_t Rhs) {
270 return Lhs.value() > Rhs;
273/// Comparisons between MaybeAlign and scalars.
274inline bool operator==(MaybeAlign Lhs, uint64_t Rhs) {
275 return Lhs ? (*Lhs).value() == Rhs : Rhs == 0;
277inline bool operator!=(MaybeAlign Lhs, uint64_t Rhs) {
278 return Lhs ? (*Lhs).value() != Rhs : Rhs != 0;
281/// Comparisons operators between Align.
282inline bool operator==(Align Lhs, Align Rhs) {
283 return Lhs.ShiftValue == Rhs.ShiftValue;
285inline bool operator!=(Align Lhs, Align Rhs) {
286 return Lhs.ShiftValue != Rhs.ShiftValue;
288inline bool operator<=(Align Lhs, Align Rhs) {
289 return Lhs.ShiftValue <= Rhs.ShiftValue;
291inline bool operator>=(Align Lhs, Align Rhs) {
292 return Lhs.ShiftValue >= Rhs.ShiftValue;
294inline bool operator<(Align Lhs, Align Rhs) {
295 return Lhs.ShiftValue < Rhs.ShiftValue;
297inline bool operator>(Align Lhs, Align Rhs) {
298 return Lhs.ShiftValue > Rhs.ShiftValue;
301// Don't allow relational comparisons with MaybeAlign.
302bool operator<=(Align Lhs, MaybeAlign Rhs) = delete;
303bool operator>=(Align Lhs, MaybeAlign Rhs) = delete;
304bool operator<(Align Lhs, MaybeAlign Rhs) = delete;
305bool operator>(Align Lhs, MaybeAlign Rhs) = delete;
307bool operator<=(MaybeAlign Lhs, Align Rhs) = delete;
308bool operator>=(MaybeAlign Lhs, Align Rhs) = delete;
309bool operator<(MaybeAlign Lhs, Align Rhs) = delete;
310bool operator>(MaybeAlign Lhs, Align Rhs) = delete;
312bool operator<=(MaybeAlign Lhs, MaybeAlign Rhs) = delete;
313bool operator>=(MaybeAlign Lhs, MaybeAlign Rhs) = delete;
314bool operator<(MaybeAlign Lhs, MaybeAlign Rhs) = delete;
315bool operator>(MaybeAlign Lhs, MaybeAlign Rhs) = delete;
317inline Align operator*(Align Lhs, uint64_t Rhs) {
318 assert(Rhs > 0 && "Rhs must be positive")((void)0);
319 return Align(Lhs.value() * Rhs);
322inline MaybeAlign operator*(MaybeAlign Lhs, uint64_t Rhs) {
323 assert(Rhs > 0 && "Rhs must be positive")((void)0);
324 return Lhs ? Lhs.getValue() * Rhs : MaybeAlign();
327inline Align operator/(Align Lhs, uint64_t Divisor) {
328 assert(llvm::isPowerOf2_64(Divisor) &&((void)0)
329 "Divisor must be positive and a power of 2")((void)0);
330 assert(Lhs != 1 && "Can't halve byte alignment")((void)0);
331 return Align(Lhs.value() / Divisor);
334inline MaybeAlign operator/(MaybeAlign Lhs, uint64_t Divisor) {
335 assert(llvm::isPowerOf2_64(Divisor) &&((void)0)
336 "Divisor must be positive and a power of 2")((void)0);
337 return Lhs ? Lhs.getValue() / Divisor : MaybeAlign();
340inline Align max(MaybeAlign Lhs, Align Rhs) {
341 return Lhs && *Lhs > Rhs ? *Lhs : Rhs;
344inline Align max(Align Lhs, MaybeAlign Rhs) {
345 return Rhs && *Rhs > Lhs ? *Rhs : Lhs;
348#ifndef NDEBUG1
349// For usage in LLVM_DEBUG macros.
350inline std::string DebugStr(const Align &A) {
351 return std::to_string(A.value());
353// For usage in LLVM_DEBUG macros.
354inline std::string DebugStr(const MaybeAlign &MA) {
355 if (MA)
356 return std::to_string(MA->value());
357 return "None";
359#endif // NDEBUG
363} // namespace llvm