[llvm][mustache] Support setting delimiters in templates#159187
Merged
Conversation
This was referenced Sep 16, 2025
Contributor
Author
This was referenced Sep 16, 2025
Member
|
@llvm/pr-subscribers-llvm-support Author: Paul Kirth (ilovepi) ChangesThe base mustache spec allows setting custom delimiters, which slightly Full diff: https://github.com/llvm/llvm-project/pull/159187.diff 3 Files Affected:
diff --git a/llvm/lib/Support/Mustache.cpp b/llvm/lib/Support/Mustache.cpp
index 9c71d6a510056..43ce6adbba41a 100644
--- a/llvm/lib/Support/Mustache.cpp
+++ b/llvm/lib/Support/Mustache.cpp
@@ -7,9 +7,14 @@
//===----------------------------------------------------------------------===//
#include "llvm/Support/Mustache.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
+
+#include <cctype>
#include <sstream>
+#define DEBUG_TYPE "mustache"
+
using namespace llvm;
using namespace llvm::mustache;
@@ -62,6 +67,7 @@ class Token {
InvertSectionOpen,
UnescapeVariable,
Comment,
+ SetDelimiter,
};
Token(std::string Str)
@@ -102,6 +108,8 @@ class Token {
return Type::Partial;
case '&':
return Type::UnescapeVariable;
+ case '=':
+ return Type::SetDelimiter;
default:
return Type::Variable;
}
@@ -189,14 +197,14 @@ class ASTNode {
};
// A wrapper for arena allocator for ASTNodes
-AstPtr createRootNode(llvm::StringMap<AstPtr> &Partials,
+static AstPtr createRootNode(llvm::StringMap<AstPtr> &Partials,
llvm::StringMap<Lambda> &Lambdas,
llvm::StringMap<SectionLambda> &SectionLambdas,
EscapeMap &Escapes) {
return std::make_unique<ASTNode>(Partials, Lambdas, SectionLambdas, Escapes);
}
-AstPtr createNode(ASTNode::Type T, Accessor A, ASTNode *Parent,
+static AstPtr createNode(ASTNode::Type T, Accessor A, ASTNode *Parent,
llvm::StringMap<AstPtr> &Partials,
llvm::StringMap<Lambda> &Lambdas,
llvm::StringMap<SectionLambda> &SectionLambdas,
@@ -205,7 +213,7 @@ AstPtr createNode(ASTNode::Type T, Accessor A, ASTNode *Parent,
SectionLambdas, Escapes);
}
-AstPtr createTextNode(std::string Body, ASTNode *Parent,
+static AstPtr createTextNode(std::string Body, ASTNode *Parent,
llvm::StringMap<AstPtr> &Partials,
llvm::StringMap<Lambda> &Lambdas,
llvm::StringMap<SectionLambda> &SectionLambdas,
@@ -226,7 +234,7 @@ AstPtr createTextNode(std::string Body, ASTNode *Parent,
// and the current token is the second token.
// For example:
// "{{#Section}}"
-bool hasTextBehind(size_t Idx, const ArrayRef<Token> &Tokens) {
+static bool hasTextBehind(size_t Idx, const ArrayRef<Token> &Tokens) {
if (Idx == 0)
return true;
@@ -242,7 +250,7 @@ bool hasTextBehind(size_t Idx, const ArrayRef<Token> &Tokens) {
// Function to check if there's no meaningful text ahead.
// We determine if a token has text ahead if the left of previous
// token does not start with a newline.
-bool hasTextAhead(size_t Idx, const ArrayRef<Token> &Tokens) {
+static bool hasTextAhead(size_t Idx, const ArrayRef<Token> &Tokens) {
if (Idx >= Tokens.size() - 1)
return true;
@@ -255,11 +263,11 @@ bool hasTextAhead(size_t Idx, const ArrayRef<Token> &Tokens) {
return !TokenBody.starts_with("\r\n") && !TokenBody.starts_with("\n");
}
-bool requiresCleanUp(Token::Type T) {
+static bool requiresCleanUp(Token::Type T) {
// We must clean up all the tokens that could contain child nodes.
return T == Token::Type::SectionOpen || T == Token::Type::InvertSectionOpen ||
T == Token::Type::SectionClose || T == Token::Type::Comment ||
- T == Token::Type::Partial;
+ T == Token::Type::Partial || T == Token::Type::SetDelimiter;
}
// Adjust next token body if there is no text ahead.
@@ -268,7 +276,7 @@ bool requiresCleanUp(Token::Type T) {
// "{{! Comment }} \nLine 2"
// would be considered as no text ahead and should be rendered as
// " Line 2"
-void stripTokenAhead(SmallVectorImpl<Token> &Tokens, size_t Idx) {
+static void stripTokenAhead(SmallVectorImpl<Token> &Tokens, size_t Idx) {
Token &NextToken = Tokens[Idx + 1];
StringRef NextTokenBody = NextToken.TokenBody;
// Cut off the leading newline which could be \n or \r\n.
@@ -286,7 +294,7 @@ void stripTokenAhead(SmallVectorImpl<Token> &Tokens, size_t Idx) {
// "A"
// The exception for this is partial tag which requires us to
// keep track of the indentation once it's rendered.
-void stripTokenBefore(SmallVectorImpl<Token> &Tokens, size_t Idx,
+static void stripTokenBefore(SmallVectorImpl<Token> &Tokens, size_t Idx,
Token &CurrentToken, Token::Type CurrentType) {
Token &PrevToken = Tokens[Idx - 1];
StringRef PrevTokenBody = PrevToken.TokenBody;
@@ -296,57 +304,129 @@ void stripTokenBefore(SmallVectorImpl<Token> &Tokens, size_t Idx,
CurrentToken.setIndentation(Indentation);
}
+struct Tag {
+ enum class Kind {
+ None,
+ Normal, // {{...}}
+ Triple, // {{{...}}}
+ };
+
+ Kind TagKind = Kind::None;
+ StringRef Content; // The content between the delimiters.
+ StringRef FullMatch; // The entire tag, including delimiters.
+ size_t StartPosition = StringRef::npos;
+};
+
+static Tag findNextTag(StringRef Template, size_t StartPos,
+ const SmallString<8> &Open,
+ const SmallString<8> &Close) {
+ const StringLiteral TripleOpen("{{{");
+ const StringLiteral TripleClose("}}}");
+
+ size_t NormalOpenPos = Template.find(Open, StartPos);
+ size_t TripleOpenPos = Template.find(TripleOpen, StartPos);
+
+ Tag Result;
+
+ // Determine which tag comes first.
+ if (TripleOpenPos != StringRef::npos &&
+ (NormalOpenPos == StringRef::npos || TripleOpenPos <= NormalOpenPos)) {
+ // Found a triple mustache tag.
+ size_t EndPos =
+ Template.find(TripleClose, TripleOpenPos + TripleOpen.size());
+ if (EndPos == StringRef::npos)
+ return Result; // No closing tag found.
+
+ Result.TagKind = Tag::Kind::Triple;
+ Result.StartPosition = TripleOpenPos;
+ size_t ContentStart = TripleOpenPos + TripleOpen.size();
+ Result.Content = Template.substr(ContentStart, EndPos - ContentStart);
+ Result.FullMatch = Template.substr(
+ TripleOpenPos, (EndPos + TripleClose.size()) - TripleOpenPos);
+ } else if (NormalOpenPos != StringRef::npos) {
+ // Found a normal mustache tag.
+ size_t EndPos = Template.find(Close, NormalOpenPos + Open.size());
+ if (EndPos == StringRef::npos)
+ return Result; // No closing tag found.
+
+ Result.TagKind = Tag::Kind::Normal;
+ Result.StartPosition = NormalOpenPos;
+ size_t ContentStart = NormalOpenPos + Open.size();
+ Result.Content = Template.substr(ContentStart, EndPos - ContentStart);
+ Result.FullMatch =
+ Template.substr(NormalOpenPos, (EndPos + Close.size()) - NormalOpenPos);
+ }
+
+ return Result;
+}
+
+static void processTag(const Tag &T, SmallVectorImpl<Token> &Tokens,
+ SmallString<8> &Open, SmallString<8> &Close) {
+ LLVM_DEBUG(dbgs() << " Found tag: \"" << T.FullMatch << "\", Content: \""
+ << T.Content << "\"\n");
+ if (T.TagKind == Tag::Kind::Triple) {
+ Tokens.emplace_back(T.FullMatch.str(), "&" + T.Content.str(), '&');
+ LLVM_DEBUG(dbgs() << " Created UnescapeVariable token.\n");
+ return;
+ }
+ StringRef Interpolated = T.Content;
+ std::string RawBody = T.FullMatch.str();
+ if (!Interpolated.trim().starts_with("=")) {
+ char Front = Interpolated.empty() ? ' ' : Interpolated.trim().front();
+ Tokens.emplace_back(RawBody, Interpolated.str(), Front);
+ LLVM_DEBUG(dbgs() << " Created tag token of type '" << Front << "'\n");
+ return;
+ }
+ Tokens.emplace_back(RawBody, Interpolated.str(), '=');
+ StringRef DelimSpec = Interpolated.trim();
+ DelimSpec = DelimSpec.drop_front(1);
+ DelimSpec = DelimSpec.take_until([](char C) { return C == '='; });
+ DelimSpec = DelimSpec.trim();
+
+ auto [NewOpen, NewClose] = DelimSpec.split(' ');
+ Open = NewOpen;
+ Close = NewClose;
+
+ LLVM_DEBUG(dbgs() << " Found Set Delimiter tag. NewOpen='" << Open
+ << "', NewClose='" << Close << "'\n");
+}
+
// Simple tokenizer that splits the template into tokens.
// The mustache spec allows {{{ }}} to unescape variables,
// but we don't support that here. An unescape variable
// is represented only by {{& variable}}.
-SmallVector<Token> tokenize(StringRef Template) {
+static SmallVector<Token> tokenize(StringRef Template) {
+ LLVM_DEBUG(dbgs() << "Tokenizing template: \"" << Template << "\"\n");
SmallVector<Token> Tokens;
- StringLiteral Open("{{");
- StringLiteral Close("}}");
- StringLiteral TripleOpen("{{{");
- StringLiteral TripleClose("}}}");
+ SmallString<8> Open("{{");
+ SmallString<8> Close("}}");
size_t Start = 0;
- size_t DelimiterStart = Template.find(Open);
- if (DelimiterStart == StringRef::npos) {
- Tokens.emplace_back(Template.str());
- return Tokens;
- }
- while (DelimiterStart != StringRef::npos) {
- if (DelimiterStart != Start)
- Tokens.emplace_back(Template.substr(Start, DelimiterStart - Start).str());
-
- if (Template.substr(DelimiterStart).starts_with(TripleOpen)) {
- size_t DelimiterEnd = Template.find(TripleClose, DelimiterStart);
- if (DelimiterEnd == StringRef::npos)
- break;
- size_t BodyStart = DelimiterStart + TripleOpen.size();
- std::string Body =
- Template.substr(BodyStart, DelimiterEnd - BodyStart).str();
- std::string RawBody =
- Template.substr(DelimiterStart, DelimiterEnd - DelimiterStart + 3)
- .str();
- Tokens.emplace_back(RawBody, "&" + Body, '&');
- Start = DelimiterEnd + TripleClose.size();
- } else {
- size_t DelimiterEnd = Template.find(Close, DelimiterStart);
- if (DelimiterEnd == StringRef::npos)
- break;
-
- // Extract the Interpolated variable without delimiters.
- size_t InterpolatedStart = DelimiterStart + Open.size();
- size_t InterpolatedEnd = DelimiterEnd - DelimiterStart - Close.size();
- std::string Interpolated =
- Template.substr(InterpolatedStart, InterpolatedEnd).str();
- std::string RawBody = Open.str() + Interpolated + Close.str();
- Tokens.emplace_back(RawBody, Interpolated, Interpolated[0]);
- Start = DelimiterEnd + Close.size();
+
+ while (Start < Template.size()) {
+ LLVM_DEBUG(dbgs() << "Loop start. Start=" << Start << ", Open='" << Open
+ << "', Close='" << Close << "'\n");
+ Tag T = findNextTag(Template, Start, Open, Close);
+
+ if (T.TagKind == Tag::Kind::None) {
+ // No more tags, the rest is text.
+ Tokens.emplace_back(Template.substr(Start).str());
+ LLVM_DEBUG(dbgs() << " No more tags. Created final Text token: \""
+ << Template.substr(Start) << "\"\n");
+ break;
+ }
+
+ // Add the text before the tag.
+ if (T.StartPosition > Start) {
+ StringRef Text = Template.substr(Start, T.StartPosition - Start);
+ Tokens.emplace_back(Text.str());
+ LLVM_DEBUG(dbgs() << " Created Text token: \"" << Text << "\"\n");
}
- DelimiterStart = Template.find(Open, Start);
- }
- if (Start < Template.size())
- Tokens.emplace_back(Template.substr(Start).str());
+ processTag(T, Tokens, Open, Close);
+
+ // Move past the tag.
+ Start = T.StartPosition + T.FullMatch.size();
+ }
// Fix up white spaces for:
// - open sections
@@ -388,6 +468,7 @@ SmallVector<Token> tokenize(StringRef Template) {
if ((!HasTextBehind && !HasTextAhead) || (!HasTextBehind && Idx == LastIdx))
stripTokenBefore(Tokens, Idx, CurrentToken, CurrentType);
}
+ LLVM_DEBUG(dbgs() << "Tokenizing finished.\n");
return Tokens;
}
@@ -551,13 +632,14 @@ void Parser::parseMustache(ASTNode *Parent, llvm::StringMap<AstPtr> &Partials,
break;
}
case Token::Type::Comment:
+ case Token::Type::SetDelimiter:
break;
case Token::Type::SectionClose:
return;
}
}
}
-void toMustacheString(const json::Value &Data, raw_ostream &OS) {
+static void toMustacheString(const json::Value &Data, raw_ostream &OS) {
switch (Data.kind()) {
case json::Value::Null:
return;
@@ -590,6 +672,8 @@ void toMustacheString(const json::Value &Data, raw_ostream &OS) {
}
void ASTNode::render(const json::Value &CurrentCtx, raw_ostream &OS) {
+ if (Ty != Root && Ty != Text && AccessorValue.empty())
+ return;
// Set the parent context to the incoming context so that we
// can walk up the context tree correctly in findContext().
ParentContext = &CurrentCtx;
@@ -789,3 +873,5 @@ Template &Template::operator=(Template &&Other) noexcept {
return *this;
}
} // namespace llvm::mustache
+
+#undef DEBUG_TYPE
diff --git a/llvm/unittests/Support/MustacheTest.cpp b/llvm/unittests/Support/MustacheTest.cpp
index f613fde072cde..addf0355c4d0a 100644
--- a/llvm/unittests/Support/MustacheTest.cpp
+++ b/llvm/unittests/Support/MustacheTest.cpp
@@ -1335,7 +1335,7 @@ TEST(MustacheDelimiters, PairBehavior) {
std::string Out;
raw_string_ostream OS(Out);
T.render(D, OS);
- EXPECT_NE("(Hey!)", Out);
+ EXPECT_EQ("(Hey!)", Out);
}
TEST(MustacheDelimiters, SpecialCharacters) {
@@ -1344,7 +1344,7 @@ TEST(MustacheDelimiters, SpecialCharacters) {
std::string Out;
raw_string_ostream OS(Out);
T.render(D, OS);
- EXPECT_NE("(It worked!)", Out);
+ EXPECT_EQ("(It worked!)", Out);
}
TEST(MustacheDelimiters, Sections) {
@@ -1354,7 +1354,7 @@ TEST(MustacheDelimiters, Sections) {
std::string Out;
raw_string_ostream OS(Out);
T.render(D, OS);
- EXPECT_NE("[\n I got interpolated.\n |data|\n\n {{data}}\n I got "
+ EXPECT_EQ("[\n I got interpolated.\n |data|\n\n {{data}}\n I got "
"interpolated.\n]\n",
Out);
}
@@ -1366,7 +1366,7 @@ TEST(MustacheDelimiters, InvertedSections) {
std::string Out;
raw_string_ostream OS(Out);
T.render(D, OS);
- EXPECT_NE("[\n I got interpolated.\n |data|\n\n {{data}}\n I got "
+ EXPECT_EQ("[\n I got interpolated.\n |data|\n\n {{data}}\n I got "
"interpolated.\n]\n",
Out);
}
@@ -1378,7 +1378,7 @@ TEST(MustacheDelimiters, PartialInheritence) {
std::string Out;
raw_string_ostream OS(Out);
T.render(D, OS);
- EXPECT_NE("[ .yes. ]\n[ .yes. ]\n", Out);
+ EXPECT_EQ("[ .yes. ]\n[ .yes. ]\n", Out);
}
TEST(MustacheDelimiters, PostPartialBehavior) {
@@ -1388,7 +1388,7 @@ TEST(MustacheDelimiters, PostPartialBehavior) {
std::string Out;
raw_string_ostream OS(Out);
T.render(D, OS);
- EXPECT_NE("[ .yes. .yes. ]\n[ .yes. .|value|. ]\n", Out);
+ EXPECT_EQ("[ .yes. .yes. ]\n[ .yes. .|value|. ]\n", Out);
}
TEST(MustacheDelimiters, SurroundingWhitespace) {
@@ -1415,7 +1415,7 @@ TEST(MustacheDelimiters, StandaloneTag) {
std::string Out;
raw_string_ostream OS(Out);
T.render(D, OS);
- EXPECT_NE("Begin.\nEnd.\n", Out);
+ EXPECT_EQ("Begin.\nEnd.\n", Out);
}
TEST(MustacheDelimiters, IndentedStandaloneTag) {
@@ -1424,7 +1424,7 @@ TEST(MustacheDelimiters, IndentedStandaloneTag) {
std::string Out;
raw_string_ostream OS(Out);
T.render(D, OS);
- EXPECT_NE("Begin.\nEnd.\n", Out);
+ EXPECT_EQ("Begin.\nEnd.\n", Out);
}
TEST(MustacheDelimiters, StandaloneLineEndings) {
@@ -1433,7 +1433,7 @@ TEST(MustacheDelimiters, StandaloneLineEndings) {
std::string Out;
raw_string_ostream OS(Out);
T.render(D, OS);
- EXPECT_NE("|\r\n|", Out);
+ EXPECT_EQ("|\r\n|", Out);
}
TEST(MustacheDelimiters, StandaloneWithoutPreviousLine) {
@@ -1442,7 +1442,7 @@ TEST(MustacheDelimiters, StandaloneWithoutPreviousLine) {
std::string Out;
raw_string_ostream OS(Out);
T.render(D, OS);
- EXPECT_NE("=", Out);
+ EXPECT_EQ("=", Out);
}
TEST(MustacheDelimiters, StandaloneWithoutNewline) {
@@ -1451,7 +1451,7 @@ TEST(MustacheDelimiters, StandaloneWithoutNewline) {
std::string Out;
raw_string_ostream OS(Out);
T.render(D, OS);
- EXPECT_NE("=\n", Out);
+ EXPECT_EQ("=\n", Out);
}
TEST(MustacheDelimiters, PairwithPadding) {
@@ -1462,4 +1462,3 @@ TEST(MustacheDelimiters, PairwithPadding) {
T.render(D, OS);
EXPECT_EQ("||", Out);
}
-
diff --git a/llvm/utils/llvm-test-mustache-spec/llvm-test-mustache-spec.cpp b/llvm/utils/llvm-test-mustache-spec/llvm-test-mustache-spec.cpp
index ea1395b2646f6..bdcef376547fb 100644
--- a/llvm/utils/llvm-test-mustache-spec/llvm-test-mustache-spec.cpp
+++ b/llvm/utils/llvm-test-mustache-spec/llvm-test-mustache-spec.cpp
@@ -54,20 +54,6 @@ static int NumXFail = 0;
static int NumSuccess = 0;
static const StringMap<StringSet<>> XFailTestNames = {{
- {"delimiters.json",
- {
- "Pair Behavior",
- "Special Characters",
- "Sections",
- "Inverted Sections",
- "Partial Inheritence",
- "Post-Partial Behavior",
- "Standalone Tag",
- "Indented Standalone Tag",
- "Standalone Line Endings",
- "Standalone Without Previous Line",
- "Standalone Without Newline",
- }},
{"~dynamic-names.json",
{
"Basic Behavior - Partial",
@@ -113,7 +99,6 @@ static const StringMap<StringSet<>> XFailTestNames = {{
"Block reindentation",
"Intrinsic indentation",
"Nested block reindentation",
-
}},
{"~lambdas.json",
{
@@ -126,7 +111,6 @@ static const StringMap<StringSet<>> XFailTestNames = {{
"Section - Expansion",
"Section - Alternate Delimiters",
"Section - Multiple Calls",
-
}},
{"partials.json", {"Standalone Indentation"}},
}};
|
This was referenced Sep 16, 2025
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
97fa89f to
fbd8894
Compare
293e89e to
c3b7fae
Compare
7cea784 to
630801a
Compare
6ba6cd7 to
78a29c3
Compare
630801a to
794217c
Compare
evelez7
approved these changes
Sep 29, 2025
78a29c3 to
dc36a30
Compare
794217c to
87331c5
Compare
Base automatically changed from
users/ilovepi/mustache-delimiter-test
to
main
September 29, 2025 18:19
87331c5 to
bd4ebfd
Compare
Contributor
Author
The base mustache spec allows setting custom delimiters, which slightly change parsing of partials. This patch implements that feature by adding a new token type, and changing the tokenizer's behavior to allow setting custom delimiters.
bd4ebfd to
9ff3fba
Compare
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/162/builds/32168 Here is the relevant piece of the build log for the reference |
mahesh-attarde
pushed a commit
to mahesh-attarde/llvm-project
that referenced
this pull request
Oct 3, 2025
The base mustache spec allows setting custom delimiters, which slightly change parsing of partials. This patch implements that feature by adding a new token type, and changing the tokenizer's behavior to allow setting custom delimiters.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Add this suggestion to a batch that can be applied as a single commit.This suggestion is invalid because no changes were made to the code.Suggestions cannot be applied while the pull request is closed.Suggestions cannot be applied while viewing a subset of changes.Only one suggestion per line can be applied in a batch.Add this suggestion to a batch that can be applied as a single commit.Applying suggestions on deleted lines is not supported.You must change the existing code in this line in order to create a valid suggestion.Outdated suggestions cannot be applied.This suggestion has been applied or marked resolved.Suggestions cannot be applied from pending reviews.Suggestions cannot be applied on multi-line comments.Suggestions cannot be applied while the pull request is queued to merge.Suggestion cannot be applied right now. Please check back later.

The base mustache spec allows setting custom delimiters, which slightly
change parsing of partials. This patch implements that feature by adding
a new token type, and changing the tokenizer's behavior to allow setting
custom delimiters.