aboutsummaryrefslogtreecommitdiffstats
path: root/lib/Format
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2017-12-18 20:11:37 +0000
committerDimitry Andric <dim@FreeBSD.org>2017-12-18 20:11:37 +0000
commit461a67fa15370a9ec88f8f8a240bf7c123bb2029 (patch)
tree6942083d7d56bba40ec790a453ca58ad3baf6832 /lib/Format
parent75c3240472ba6ac2669ee72ca67eb72d4e2851fc (diff)
downloadsrc-461a67fa15370a9ec88f8f8a240bf7c123bb2029.tar.gz
src-461a67fa15370a9ec88f8f8a240bf7c123bb2029.zip
Vendor import of clang trunk r321017:vendor/clang/clang-trunk-r321017
Notes
Notes: svn path=/vendor/clang/dist/; revision=326941 svn path=/vendor/clang/clang-trunk-r321017/; revision=326942; tag=vendor/clang/clang-trunk-r321017
Diffstat (limited to 'lib/Format')
-rw-r--r--lib/Format/BreakableToken.cpp542
-rw-r--r--lib/Format/BreakableToken.h317
-rw-r--r--lib/Format/ContinuationIndenter.cpp747
-rw-r--r--lib/Format/ContinuationIndenter.h75
-rw-r--r--lib/Format/Format.cpp195
-rw-r--r--lib/Format/FormatInternal.h83
-rw-r--r--lib/Format/FormatToken.cpp6
-rw-r--r--lib/Format/FormatToken.h167
-rw-r--r--lib/Format/FormatTokenLexer.cpp88
-rw-r--r--lib/Format/FormatTokenLexer.h4
-rw-r--r--lib/Format/NamespaceEndCommentsFixer.cpp10
-rw-r--r--lib/Format/NamespaceEndCommentsFixer.h2
-rw-r--r--lib/Format/SortJavaScriptImports.cpp16
-rw-r--r--lib/Format/TokenAnalyzer.cpp40
-rw-r--r--lib/Format/TokenAnalyzer.h47
-rw-r--r--lib/Format/TokenAnnotator.cpp265
-rw-r--r--lib/Format/TokenAnnotator.h5
-rw-r--r--lib/Format/UnwrappedLineFormatter.cpp259
-rw-r--r--lib/Format/UnwrappedLineFormatter.h11
-rw-r--r--lib/Format/UnwrappedLineParser.cpp320
-rw-r--r--lib/Format/UnwrappedLineParser.h43
-rw-r--r--lib/Format/UsingDeclarationsSorter.cpp91
-rw-r--r--lib/Format/UsingDeclarationsSorter.h2
-rw-r--r--lib/Format/WhitespaceManager.cpp33
-rw-r--r--lib/Format/WhitespaceManager.h8
25 files changed, 2288 insertions, 1088 deletions
diff --git a/lib/Format/BreakableToken.cpp b/lib/Format/BreakableToken.cpp
index 3c9df62f80dc..4735ab3564f0 100644
--- a/lib/Format/BreakableToken.cpp
+++ b/lib/Format/BreakableToken.cpp
@@ -40,9 +40,15 @@ static bool IsBlank(char C) {
}
}
-static StringRef getLineCommentIndentPrefix(StringRef Comment) {
- static const char *const KnownPrefixes[] = {
- "///<", "//!<", "///", "//", "//!"};
+static StringRef getLineCommentIndentPrefix(StringRef Comment,
+ const FormatStyle &Style) {
+ static const char *const KnownCStylePrefixes[] = {"///<", "//!<", "///", "//",
+ "//!"};
+ static const char *const KnownTextProtoPrefixes[] = {"//", "#"};
+ ArrayRef<const char *> KnownPrefixes(KnownCStylePrefixes);
+ if (Style.Language == FormatStyle::LK_TextProto)
+ KnownPrefixes = KnownTextProtoPrefixes;
+
StringRef LongestPrefix;
for (StringRef KnownPrefix : KnownPrefixes) {
if (Comment.startswith(KnownPrefix)) {
@@ -61,6 +67,8 @@ static BreakableToken::Split getCommentSplit(StringRef Text,
unsigned ColumnLimit,
unsigned TabWidth,
encoding::Encoding Encoding) {
+ DEBUG(llvm::dbgs() << "Comment split: \"" << Text << ", " << ColumnLimit
+ << "\", Content start: " << ContentStartColumn << "\n");
if (ColumnLimit <= ContentStartColumn + 1)
return BreakableToken::Split(StringRef::npos, 0);
@@ -165,7 +173,7 @@ bool switchesFormatting(const FormatToken &Token) {
}
unsigned
-BreakableToken::getLineLengthAfterCompression(unsigned RemainingTokenColumns,
+BreakableToken::getLengthAfterCompression(unsigned RemainingTokenColumns,
Split Split) const {
// Example: consider the content
// lala lala
@@ -175,58 +183,64 @@ BreakableToken::getLineLengthAfterCompression(unsigned RemainingTokenColumns,
// We compute the number of columns when the split is compressed into a single
// space, like:
// lala lala
+ //
+ // FIXME: Correctly measure the length of whitespace in Split.second so it
+ // works with tabs.
return RemainingTokenColumns + 1 - Split.second;
}
-unsigned BreakableSingleLineToken::getLineCount() const { return 1; }
+unsigned BreakableStringLiteral::getLineCount() const { return 1; }
+
+unsigned BreakableStringLiteral::getRangeLength(unsigned LineIndex,
+ unsigned Offset,
+ StringRef::size_type Length,
+ unsigned StartColumn) const {
+ llvm_unreachable("Getting the length of a part of the string literal "
+ "indicates that the code tries to reflow it.");
+}
+
+unsigned
+BreakableStringLiteral::getRemainingLength(unsigned LineIndex, unsigned Offset,
+ unsigned StartColumn) const {
+ return UnbreakableTailLength + Postfix.size() +
+ encoding::columnWidthWithTabs(Line.substr(Offset, StringRef::npos),
+ StartColumn, Style.TabWidth, Encoding);
+}
-unsigned BreakableSingleLineToken::getLineLengthAfterSplit(
- unsigned LineIndex, unsigned TailOffset,
- StringRef::size_type Length) const {
- return StartColumn + Prefix.size() + Postfix.size() +
- encoding::columnWidthWithTabs(Line.substr(TailOffset, Length),
- StartColumn + Prefix.size(),
- Style.TabWidth, Encoding);
+unsigned BreakableStringLiteral::getContentStartColumn(unsigned LineIndex,
+ bool Break) const {
+ return StartColumn + Prefix.size();
}
-BreakableSingleLineToken::BreakableSingleLineToken(
+BreakableStringLiteral::BreakableStringLiteral(
const FormatToken &Tok, unsigned StartColumn, StringRef Prefix,
StringRef Postfix, bool InPPDirective, encoding::Encoding Encoding,
const FormatStyle &Style)
: BreakableToken(Tok, InPPDirective, Encoding, Style),
- StartColumn(StartColumn), Prefix(Prefix), Postfix(Postfix) {
+ StartColumn(StartColumn), Prefix(Prefix), Postfix(Postfix),
+ UnbreakableTailLength(Tok.UnbreakableTailLength) {
assert(Tok.TokenText.startswith(Prefix) && Tok.TokenText.endswith(Postfix));
Line = Tok.TokenText.substr(
Prefix.size(), Tok.TokenText.size() - Prefix.size() - Postfix.size());
}
-BreakableStringLiteral::BreakableStringLiteral(
- const FormatToken &Tok, unsigned StartColumn, StringRef Prefix,
- StringRef Postfix, bool InPPDirective, encoding::Encoding Encoding,
- const FormatStyle &Style)
- : BreakableSingleLineToken(Tok, StartColumn, Prefix, Postfix, InPPDirective,
- Encoding, Style) {}
-
-BreakableToken::Split
-BreakableStringLiteral::getSplit(unsigned LineIndex, unsigned TailOffset,
- unsigned ColumnLimit,
- llvm::Regex &CommentPragmasRegex) const {
- return getStringSplit(Line.substr(TailOffset),
- StartColumn + Prefix.size() + Postfix.size(),
- ColumnLimit, Style.TabWidth, Encoding);
+BreakableToken::Split BreakableStringLiteral::getSplit(
+ unsigned LineIndex, unsigned TailOffset, unsigned ColumnLimit,
+ unsigned ContentStartColumn, llvm::Regex &CommentPragmasRegex) const {
+ return getStringSplit(Line.substr(TailOffset), ContentStartColumn,
+ ColumnLimit - Postfix.size(), Style.TabWidth, Encoding);
}
void BreakableStringLiteral::insertBreak(unsigned LineIndex,
unsigned TailOffset, Split Split,
- WhitespaceManager &Whitespaces) {
+ WhitespaceManager &Whitespaces) const {
Whitespaces.replaceWhitespaceInToken(
Tok, Prefix.size() + TailOffset + Split.first, Split.second, Postfix,
Prefix, InPPDirective, 1, StartColumn);
}
BreakableComment::BreakableComment(const FormatToken &Token,
- unsigned StartColumn,
- bool InPPDirective,
+ unsigned StartColumn, bool InPPDirective,
encoding::Encoding Encoding,
const FormatStyle &Style)
: BreakableToken(Token, InPPDirective, Encoding, Style),
@@ -236,19 +250,19 @@ unsigned BreakableComment::getLineCount() const { return Lines.size(); }
BreakableToken::Split
BreakableComment::getSplit(unsigned LineIndex, unsigned TailOffset,
- unsigned ColumnLimit,
+ unsigned ColumnLimit, unsigned ContentStartColumn,
llvm::Regex &CommentPragmasRegex) const {
// Don't break lines matching the comment pragmas regex.
if (CommentPragmasRegex.match(Content[LineIndex]))
return Split(StringRef::npos, 0);
return getCommentSplit(Content[LineIndex].substr(TailOffset),
- getContentStartColumn(LineIndex, TailOffset),
- ColumnLimit, Style.TabWidth, Encoding);
+ ContentStartColumn, ColumnLimit, Style.TabWidth,
+ Encoding);
}
-void BreakableComment::compressWhitespace(unsigned LineIndex,
- unsigned TailOffset, Split Split,
- WhitespaceManager &Whitespaces) {
+void BreakableComment::compressWhitespace(
+ unsigned LineIndex, unsigned TailOffset, Split Split,
+ WhitespaceManager &Whitespaces) const {
StringRef Text = Content[LineIndex].substr(TailOffset);
// Text is relative to the content line, but Whitespaces operates relative to
// the start of the corresponding token, so compute the start of the Split
@@ -262,44 +276,6 @@ void BreakableComment::compressWhitespace(unsigned LineIndex,
/*InPPDirective=*/false, /*Newlines=*/0, /*Spaces=*/1);
}
-BreakableToken::Split
-BreakableComment::getReflowSplit(StringRef Text, StringRef ReflowPrefix,
- unsigned PreviousEndColumn,
- unsigned ColumnLimit) const {
- unsigned ReflowStartColumn = PreviousEndColumn + ReflowPrefix.size();
- StringRef TrimmedText = Text.rtrim(Blanks);
- // This is the width of the resulting line in case the full line of Text gets
- // reflown up starting at ReflowStartColumn.
- unsigned FullWidth = ReflowStartColumn + encoding::columnWidthWithTabs(
- TrimmedText, ReflowStartColumn,
- Style.TabWidth, Encoding);
- // If the full line fits up, we return a reflow split after it,
- // otherwise we compute the largest piece of text that fits after
- // ReflowStartColumn.
- Split ReflowSplit =
- FullWidth <= ColumnLimit
- ? Split(TrimmedText.size(), Text.size() - TrimmedText.size())
- : getCommentSplit(Text, ReflowStartColumn, ColumnLimit,
- Style.TabWidth, Encoding);
-
- // We need to be extra careful here, because while it's OK to keep a long line
- // if it can't be broken into smaller pieces (like when the first word of a
- // long line is longer than the column limit), it's not OK to reflow that long
- // word up. So we recompute the size of the previous line after reflowing and
- // only return the reflow split if that's under the line limit.
- if (ReflowSplit.first != StringRef::npos &&
- // Check if the width of the newly reflown line is under the limit.
- PreviousEndColumn + ReflowPrefix.size() +
- encoding::columnWidthWithTabs(Text.substr(0, ReflowSplit.first),
- PreviousEndColumn +
- ReflowPrefix.size(),
- Style.TabWidth, Encoding) <=
- ColumnLimit) {
- return ReflowSplit;
- }
- return Split(StringRef::npos, 0);
-}
-
const FormatToken &BreakableComment::tokenAt(unsigned LineIndex) const {
return Tokens[LineIndex] ? *Tokens[LineIndex] : Tok;
}
@@ -309,7 +285,7 @@ static bool mayReflowContent(StringRef Content) {
// Lines starting with '@' commonly have special meaning.
// Lines starting with '-', '-#', '+' or '*' are bulleted/numbered lists.
static const SmallVector<StringRef, 8> kSpecialMeaningPrefixes = {
- "@", "TODO", "FIXME", "XXX", "-# ", "- ", "+ ", "* " };
+ "@", "TODO", "FIXME", "XXX", "-# ", "- ", "+ ", "* "};
bool hasSpecialMeaningPrefix = false;
for (StringRef Prefix : kSpecialMeaningPrefixes) {
if (Content.startswith(Prefix)) {
@@ -322,8 +298,8 @@ static bool mayReflowContent(StringRef Content) {
// To avoid issues if a line starts with a number which is actually the end
// of a previous line, we only consider numbers with up to 2 digits.
static llvm::Regex kNumberedListRegexp = llvm::Regex("^[1-9][0-9]?\\. ");
- hasSpecialMeaningPrefix = hasSpecialMeaningPrefix ||
- kNumberedListRegexp.match(Content);
+ hasSpecialMeaningPrefix =
+ hasSpecialMeaningPrefix || kNumberedListRegexp.match(Content);
// Simple heuristic for what to reflow: content should contain at least two
// characters and either the first or second character must be
@@ -339,7 +315,9 @@ BreakableBlockComment::BreakableBlockComment(
const FormatToken &Token, unsigned StartColumn,
unsigned OriginalStartColumn, bool FirstInLine, bool InPPDirective,
encoding::Encoding Encoding, const FormatStyle &Style)
- : BreakableComment(Token, StartColumn, InPPDirective, Encoding, Style) {
+ : BreakableComment(Token, StartColumn, InPPDirective, Encoding, Style),
+ DelimitersOnNewline(false),
+ UnbreakableTailLength(Token.UnbreakableTailLength) {
assert(Tok.is(TT_BlockComment) &&
"block comment section must start with a block comment");
@@ -384,8 +362,7 @@ BreakableBlockComment::BreakableBlockComment(
// If the last line is empty, the closing "*/" will have a star.
if (i + 1 == e && Content[i].empty())
break;
- if (!Content[i].empty() && i + 1 != e &&
- Decoration.startswith(Content[i]))
+ if (!Content[i].empty() && i + 1 != e && Decoration.startswith(Content[i]))
continue;
while (!Content[i].startswith(Decoration))
Decoration = Decoration.substr(0, Decoration.size() - 1);
@@ -427,11 +404,30 @@ BreakableBlockComment::BreakableBlockComment(
IndentAtLineBreak =
std::min<int>(IndentAtLineBreak, std::max(0, ContentColumn[i]));
}
- IndentAtLineBreak =
- std::max<unsigned>(IndentAtLineBreak, Decoration.size());
+ IndentAtLineBreak = std::max<unsigned>(IndentAtLineBreak, Decoration.size());
+
+ // Detect a multiline jsdoc comment and set DelimitersOnNewline in that case.
+ if (Style.Language == FormatStyle::LK_JavaScript ||
+ Style.Language == FormatStyle::LK_Java) {
+ if ((Lines[0] == "*" || Lines[0].startswith("* ")) && Lines.size() > 1) {
+ // This is a multiline jsdoc comment.
+ DelimitersOnNewline = true;
+ } else if (Lines[0].startswith("* ") && Lines.size() == 1) {
+ // Detect a long single-line comment, like:
+ // /** long long long */
+ // Below, '2' is the width of '*/'.
+ unsigned EndColumn =
+ ContentColumn[0] +
+ encoding::columnWidthWithTabs(Lines[0], ContentColumn[0],
+ Style.TabWidth, Encoding) +
+ 2;
+ DelimitersOnNewline = EndColumn > Style.ColumnLimit;
+ }
+ }
DEBUG({
llvm::dbgs() << "IndentAtLineBreak " << IndentAtLineBreak << "\n";
+ llvm::dbgs() << "DelimitersOnNewline " << DelimitersOnNewline << "\n";
for (size_t i = 0; i < Lines.size(); ++i) {
llvm::dbgs() << i << " |" << Content[i] << "| "
<< "CC=" << ContentColumn[i] << "| "
@@ -477,30 +473,45 @@ void BreakableBlockComment::adjustWhitespace(unsigned LineIndex,
IndentDelta;
}
-unsigned BreakableBlockComment::getLineLengthAfterSplit(
- unsigned LineIndex, unsigned TailOffset,
- StringRef::size_type Length) const {
- unsigned ContentStartColumn = getContentStartColumn(LineIndex, TailOffset);
+unsigned BreakableBlockComment::getRangeLength(unsigned LineIndex,
+ unsigned Offset,
+ StringRef::size_type Length,
+ unsigned StartColumn) const {
unsigned LineLength =
- ContentStartColumn + encoding::columnWidthWithTabs(
- Content[LineIndex].substr(TailOffset, Length),
- ContentStartColumn, Style.TabWidth, Encoding);
+ encoding::columnWidthWithTabs(Content[LineIndex].substr(Offset, Length),
+ StartColumn, Style.TabWidth, Encoding);
+ // FIXME: This should go into getRemainingLength instead, but we currently
+ // break tests when putting it there. Investigate how to fix those tests.
// The last line gets a "*/" postfix.
if (LineIndex + 1 == Lines.size()) {
LineLength += 2;
// We never need a decoration when breaking just the trailing "*/" postfix.
// Note that checking that Length == 0 is not enough, since Length could
// also be StringRef::npos.
- if (Content[LineIndex].substr(TailOffset, Length).empty()) {
+ if (Content[LineIndex].substr(Offset, StringRef::npos).empty()) {
LineLength -= Decoration.size();
}
}
return LineLength;
}
+unsigned BreakableBlockComment::getRemainingLength(unsigned LineIndex,
+ unsigned Offset,
+ unsigned StartColumn) const {
+ return UnbreakableTailLength +
+ getRangeLength(LineIndex, Offset, StringRef::npos, StartColumn);
+}
+
+unsigned BreakableBlockComment::getContentStartColumn(unsigned LineIndex,
+ bool Break) const {
+ if (Break)
+ return IndentAtLineBreak;
+ return std::max(0, ContentColumn[LineIndex]);
+}
+
void BreakableBlockComment::insertBreak(unsigned LineIndex, unsigned TailOffset,
Split Split,
- WhitespaceManager &Whitespaces) {
+ WhitespaceManager &Whitespaces) const {
StringRef Text = Content[LineIndex].substr(TailOffset);
StringRef Prefix = Decoration;
// We need this to account for the case when we have a decoration "* " for all
@@ -526,97 +537,55 @@ void BreakableBlockComment::insertBreak(unsigned LineIndex, unsigned TailOffset,
/*Spaces=*/LocalIndentAtLineBreak - Prefix.size());
}
-BreakableToken::Split BreakableBlockComment::getSplitBefore(
- unsigned LineIndex,
- unsigned PreviousEndColumn,
- unsigned ColumnLimit,
- llvm::Regex &CommentPragmasRegex) const {
+BreakableToken::Split
+BreakableBlockComment::getReflowSplit(unsigned LineIndex,
+ llvm::Regex &CommentPragmasRegex) const {
if (!mayReflow(LineIndex, CommentPragmasRegex))
return Split(StringRef::npos, 0);
- StringRef TrimmedContent = Content[LineIndex].ltrim(Blanks);
- return getReflowSplit(TrimmedContent, ReflowPrefix, PreviousEndColumn,
- ColumnLimit);
-}
-
-unsigned BreakableBlockComment::getReflownColumn(
- StringRef Content,
- unsigned LineIndex,
- unsigned PreviousEndColumn) const {
- unsigned StartColumn = PreviousEndColumn + ReflowPrefix.size();
- // If this is the last line, it will carry around its '*/' postfix.
- unsigned PostfixLength = (LineIndex + 1 == Lines.size() ? 2 : 0);
- // The line is composed of previous text, reflow prefix, reflown text and
- // postfix.
- unsigned ReflownColumn =
- StartColumn + encoding::columnWidthWithTabs(Content, StartColumn,
- Style.TabWidth, Encoding) +
- PostfixLength;
- return ReflownColumn;
-}
-
-unsigned BreakableBlockComment::getLineLengthAfterSplitBefore(
- unsigned LineIndex, unsigned TailOffset,
- unsigned PreviousEndColumn,
- unsigned ColumnLimit,
- Split SplitBefore) const {
- if (SplitBefore.first == StringRef::npos ||
- // Block comment line contents contain the trailing whitespace after the
- // decoration, so the need of left trim. Note that this behavior is
- // consistent with the breaking of block comments where the indentation of
- // a broken line is uniform across all the lines of the block comment.
- SplitBefore.first + SplitBefore.second <
- Content[LineIndex].ltrim().size()) {
- // A piece of line, not the whole, gets reflown.
- return getLineLengthAfterSplit(LineIndex, TailOffset, StringRef::npos);
- } else {
- // The whole line gets reflown, need to check if we need to insert a break
- // for the postfix or not.
- StringRef TrimmedContent = Content[LineIndex].ltrim(Blanks);
- unsigned ReflownColumn =
- getReflownColumn(TrimmedContent, LineIndex, PreviousEndColumn);
- if (ReflownColumn <= ColumnLimit) {
- return ReflownColumn;
- }
- return getLineLengthAfterSplit(LineIndex, TailOffset, StringRef::npos);
- }
+
+ size_t Trimmed = Content[LineIndex].find_first_not_of(Blanks);
+ return Split(0, Trimmed != StringRef::npos ? Trimmed : 0);
+}
+
+bool BreakableBlockComment::introducesBreakBeforeToken() const {
+ // A break is introduced when we want delimiters on newline.
+ return DelimitersOnNewline &&
+ Lines[0].substr(1).find_first_not_of(Blanks) != StringRef::npos;
}
-void BreakableBlockComment::replaceWhitespaceBefore(
- unsigned LineIndex, unsigned PreviousEndColumn, unsigned ColumnLimit,
- Split SplitBefore, WhitespaceManager &Whitespaces) {
- if (LineIndex == 0) return;
+
+void BreakableBlockComment::reflow(unsigned LineIndex,
+ WhitespaceManager &Whitespaces) const {
StringRef TrimmedContent = Content[LineIndex].ltrim(Blanks);
- if (SplitBefore.first != StringRef::npos) {
- // Here we need to reflow.
- assert(Tokens[LineIndex - 1] == Tokens[LineIndex] &&
- "Reflowing whitespace within a token");
- // This is the offset of the end of the last line relative to the start of
- // the token text in the token.
- unsigned WhitespaceOffsetInToken = Content[LineIndex - 1].data() +
- Content[LineIndex - 1].size() -
- tokenAt(LineIndex).TokenText.data();
- unsigned WhitespaceLength = TrimmedContent.data() -
- tokenAt(LineIndex).TokenText.data() -
- WhitespaceOffsetInToken;
- Whitespaces.replaceWhitespaceInToken(
- tokenAt(LineIndex), WhitespaceOffsetInToken,
- /*ReplaceChars=*/WhitespaceLength, /*PreviousPostfix=*/"",
- /*CurrentPrefix=*/ReflowPrefix, InPPDirective, /*Newlines=*/0,
- /*Spaces=*/0);
- // Check if we need to also insert a break at the whitespace range.
- // For this we first adapt the reflow split relative to the beginning of the
- // content.
- // Note that we don't need a penalty for this break, since it doesn't change
- // the total number of lines.
- Split BreakSplit = SplitBefore;
- BreakSplit.first += TrimmedContent.data() - Content[LineIndex].data();
- unsigned ReflownColumn =
- getReflownColumn(TrimmedContent, LineIndex, PreviousEndColumn);
- if (ReflownColumn > ColumnLimit) {
- insertBreak(LineIndex, 0, BreakSplit, Whitespaces);
+ // Here we need to reflow.
+ assert(Tokens[LineIndex - 1] == Tokens[LineIndex] &&
+ "Reflowing whitespace within a token");
+ // This is the offset of the end of the last line relative to the start of
+ // the token text in the token.
+ unsigned WhitespaceOffsetInToken = Content[LineIndex - 1].data() +
+ Content[LineIndex - 1].size() -
+ tokenAt(LineIndex).TokenText.data();
+ unsigned WhitespaceLength = TrimmedContent.data() -
+ tokenAt(LineIndex).TokenText.data() -
+ WhitespaceOffsetInToken;
+ Whitespaces.replaceWhitespaceInToken(
+ tokenAt(LineIndex), WhitespaceOffsetInToken,
+ /*ReplaceChars=*/WhitespaceLength, /*PreviousPostfix=*/"",
+ /*CurrentPrefix=*/ReflowPrefix, InPPDirective, /*Newlines=*/0,
+ /*Spaces=*/0);
+}
+
+void BreakableBlockComment::adaptStartOfLine(
+ unsigned LineIndex, WhitespaceManager &Whitespaces) const {
+ if (LineIndex == 0) {
+ if (DelimitersOnNewline) {
+ // Since we're breaking at index 1 below, the break position and the
+ // break length are the same.
+ size_t BreakLength = Lines[0].substr(1).find_first_not_of(Blanks);
+ if (BreakLength != StringRef::npos)
+ insertBreak(LineIndex, 0, Split(1, BreakLength), Whitespaces);
}
return;
}
-
// Here no reflow with the previous line will happen.
// Fix the decoration of the line at LineIndex.
StringRef Prefix = Decoration;
@@ -651,6 +620,20 @@ void BreakableBlockComment::replaceWhitespaceBefore(
InPPDirective, /*Newlines=*/1, ContentColumn[LineIndex] - Prefix.size());
}
+BreakableToken::Split
+BreakableBlockComment::getSplitAfterLastLine(unsigned TailOffset) const {
+ if (DelimitersOnNewline) {
+ // Replace the trailing whitespace of the last line with a newline.
+ // In case the last line is empty, the ending '*/' is already on its own
+ // line.
+ StringRef Line = Content.back().substr(TailOffset);
+ StringRef TrimmedLine = Line.rtrim(Blanks);
+ if (!TrimmedLine.empty())
+ return Split(TrimmedLine.size(), Line.size() - TrimmedLine.size());
+ }
+ return Split(StringRef::npos, 0);
+}
+
bool BreakableBlockComment::mayReflow(unsigned LineIndex,
llvm::Regex &CommentPragmasRegex) const {
// Content[LineIndex] may exclude the indent after the '*' decoration. In that
@@ -664,15 +647,6 @@ bool BreakableBlockComment::mayReflow(unsigned LineIndex,
!switchesFormatting(tokenAt(LineIndex));
}
-unsigned
-BreakableBlockComment::getContentStartColumn(unsigned LineIndex,
- unsigned TailOffset) const {
- // If we break, we always break at the predefined indent.
- if (TailOffset != 0)
- return IndentAtLineBreak;
- return std::max(0, ContentColumn[LineIndex]);
-}
-
BreakableLineCommentSection::BreakableLineCommentSection(
const FormatToken &Token, unsigned StartColumn,
unsigned OriginalStartColumn, bool FirstInLine, bool InPPDirective,
@@ -686,7 +660,8 @@ BreakableLineCommentSection::BreakableLineCommentSection(
CurrentTok = CurrentTok->Next) {
LastLineTok = LineTok;
StringRef TokenText(CurrentTok->TokenText);
- assert(TokenText.startswith("//"));
+ assert((TokenText.startswith("//") || TokenText.startswith("#")) &&
+ "unsupported line comment prefix, '//' and '#' are supported");
size_t FirstLineIndex = Lines.size();
TokenText.split(Lines, "\n");
Content.resize(Lines.size());
@@ -696,11 +671,13 @@ BreakableLineCommentSection::BreakableLineCommentSection(
Prefix.resize(Lines.size());
OriginalPrefix.resize(Lines.size());
for (size_t i = FirstLineIndex, e = Lines.size(); i < e; ++i) {
+ Lines[i] = Lines[i].ltrim(Blanks);
// We need to trim the blanks in case this is not the first line in a
// multiline comment. Then the indent is included in Lines[i].
StringRef IndentPrefix =
- getLineCommentIndentPrefix(Lines[i].ltrim(Blanks));
- assert(IndentPrefix.startswith("//"));
+ getLineCommentIndentPrefix(Lines[i].ltrim(Blanks), Style);
+ assert((TokenText.startswith("//") || TokenText.startswith("#")) &&
+ "unsupported line comment prefix, '//' and '#' are supported");
OriginalPrefix[i] = Prefix[i] = IndentPrefix;
if (Lines[i].size() > Prefix[i].size() &&
isAlphanumeric(Lines[i][Prefix[i].size()])) {
@@ -714,22 +691,20 @@ BreakableLineCommentSection::BreakableLineCommentSection(
Prefix[i] = "///< ";
else if (Prefix[i] == "//!<")
Prefix[i] = "//!< ";
+ else if (Prefix[i] == "#" &&
+ Style.Language == FormatStyle::LK_TextProto)
+ Prefix[i] = "# ";
}
Tokens[i] = LineTok;
Content[i] = Lines[i].substr(IndentPrefix.size());
OriginalContentColumn[i] =
- StartColumn +
- encoding::columnWidthWithTabs(OriginalPrefix[i],
- StartColumn,
- Style.TabWidth,
- Encoding);
+ StartColumn + encoding::columnWidthWithTabs(OriginalPrefix[i],
+ StartColumn,
+ Style.TabWidth, Encoding);
ContentColumn[i] =
- StartColumn +
- encoding::columnWidthWithTabs(Prefix[i],
- StartColumn,
- Style.TabWidth,
- Encoding);
+ StartColumn + encoding::columnWidthWithTabs(Prefix[i], StartColumn,
+ Style.TabWidth, Encoding);
// Calculate the end of the non-whitespace text in this line.
size_t EndOfLine = Content[i].find_last_not_of(Blanks);
@@ -760,20 +735,25 @@ BreakableLineCommentSection::BreakableLineCommentSection(
}
}
-unsigned BreakableLineCommentSection::getLineLengthAfterSplit(
- unsigned LineIndex, unsigned TailOffset,
- StringRef::size_type Length) const {
- unsigned ContentStartColumn =
- (TailOffset == 0 ? ContentColumn[LineIndex]
- : OriginalContentColumn[LineIndex]);
- return ContentStartColumn + encoding::columnWidthWithTabs(
- Content[LineIndex].substr(TailOffset, Length),
- ContentStartColumn, Style.TabWidth, Encoding);
+unsigned
+BreakableLineCommentSection::getRangeLength(unsigned LineIndex, unsigned Offset,
+ StringRef::size_type Length,
+ unsigned StartColumn) const {
+ return encoding::columnWidthWithTabs(
+ Content[LineIndex].substr(Offset, Length), StartColumn, Style.TabWidth,
+ Encoding);
+}
+
+unsigned BreakableLineCommentSection::getContentStartColumn(unsigned LineIndex,
+ bool Break) const {
+ if (Break)
+ return OriginalContentColumn[LineIndex];
+ return ContentColumn[LineIndex];
}
-void BreakableLineCommentSection::insertBreak(unsigned LineIndex,
- unsigned TailOffset, Split Split,
- WhitespaceManager &Whitespaces) {
+void BreakableLineCommentSection::insertBreak(
+ unsigned LineIndex, unsigned TailOffset, Split Split,
+ WhitespaceManager &Whitespaces) const {
StringRef Text = Content[LineIndex].substr(TailOffset);
// Compute the offset of the split relative to the beginning of the token
// text.
@@ -792,37 +772,42 @@ void BreakableLineCommentSection::insertBreak(unsigned LineIndex,
/*Spaces=*/IndentAtLineBreak - Prefix[LineIndex].size());
}
-BreakableComment::Split BreakableLineCommentSection::getSplitBefore(
- unsigned LineIndex, unsigned PreviousEndColumn, unsigned ColumnLimit,
- llvm::Regex &CommentPragmasRegex) const {
+BreakableComment::Split BreakableLineCommentSection::getReflowSplit(
+ unsigned LineIndex, llvm::Regex &CommentPragmasRegex) const {
if (!mayReflow(LineIndex, CommentPragmasRegex))
return Split(StringRef::npos, 0);
- return getReflowSplit(Content[LineIndex], ReflowPrefix, PreviousEndColumn,
- ColumnLimit);
-}
-
-unsigned BreakableLineCommentSection::getLineLengthAfterSplitBefore(
- unsigned LineIndex, unsigned TailOffset,
- unsigned PreviousEndColumn,
- unsigned ColumnLimit,
- Split SplitBefore) const {
- if (SplitBefore.first == StringRef::npos ||
- SplitBefore.first + SplitBefore.second < Content[LineIndex].size()) {
- // A piece of line, not the whole line, gets reflown.
- return getLineLengthAfterSplit(LineIndex, TailOffset, StringRef::npos);
- } else {
- // The whole line gets reflown.
- unsigned StartColumn = PreviousEndColumn + ReflowPrefix.size();
- return StartColumn + encoding::columnWidthWithTabs(Content[LineIndex],
- StartColumn,
- Style.TabWidth,
- Encoding);
- }
-}
-void BreakableLineCommentSection::replaceWhitespaceBefore(
- unsigned LineIndex, unsigned PreviousEndColumn, unsigned ColumnLimit,
- Split SplitBefore, WhitespaceManager &Whitespaces) {
+ size_t Trimmed = Content[LineIndex].find_first_not_of(Blanks);
+
+ // In a line comment section each line is a separate token; thus, after a
+ // split we replace all whitespace before the current line comment token
+ // (which does not need to be included in the split), plus the start of the
+ // line up to where the content starts.
+ return Split(0, Trimmed != StringRef::npos ? Trimmed : 0);
+}
+
+void BreakableLineCommentSection::reflow(unsigned LineIndex,
+ WhitespaceManager &Whitespaces) const {
+ // Reflow happens between tokens. Replace the whitespace between the
+ // tokens by the empty string.
+ Whitespaces.replaceWhitespace(
+ *Tokens[LineIndex], /*Newlines=*/0, /*Spaces=*/0,
+ /*StartOfTokenColumn=*/StartColumn, /*InPPDirective=*/false);
+ // Replace the indent and prefix of the token with the reflow prefix.
+ unsigned WhitespaceLength =
+ Content[LineIndex].data() - tokenAt(LineIndex).TokenText.data();
+ Whitespaces.replaceWhitespaceInToken(*Tokens[LineIndex],
+ /*Offset=*/0,
+ /*ReplaceChars=*/WhitespaceLength,
+ /*PreviousPostfix=*/"",
+ /*CurrentPrefix=*/ReflowPrefix,
+ /*InPPDirective=*/false,
+ /*Newlines=*/0,
+ /*Spaces=*/0);
+}
+
+void BreakableLineCommentSection::adaptStartOfLine(
+ unsigned LineIndex, WhitespaceManager &Whitespaces) const {
// If this is the first line of a token, we need to inform Whitespace Manager
// about it: either adapt the whitespace range preceding it, or mark it as an
// untouchable token.
@@ -830,44 +815,25 @@ void BreakableLineCommentSection::replaceWhitespaceBefore(
// // line 1 \
// // line 2
if (LineIndex > 0 && Tokens[LineIndex] != Tokens[LineIndex - 1]) {
- if (SplitBefore.first != StringRef::npos) {
- // Reflow happens between tokens. Replace the whitespace between the
- // tokens by the empty string.
- Whitespaces.replaceWhitespace(
- *Tokens[LineIndex], /*Newlines=*/0, /*Spaces=*/0,
- /*StartOfTokenColumn=*/StartColumn, /*InPPDirective=*/false);
- // Replace the indent and prefix of the token with the reflow prefix.
- unsigned WhitespaceLength =
- Content[LineIndex].data() - tokenAt(LineIndex).TokenText.data();
- Whitespaces.replaceWhitespaceInToken(*Tokens[LineIndex],
- /*Offset=*/0,
- /*ReplaceChars=*/WhitespaceLength,
- /*PreviousPostfix=*/"",
- /*CurrentPrefix=*/ReflowPrefix,
- /*InPPDirective=*/false,
- /*Newlines=*/0,
- /*Spaces=*/0);
- } else {
- // This is the first line for the current token, but no reflow with the
- // previous token is necessary. However, we still may need to adjust the
- // start column. Note that ContentColumn[LineIndex] is the expected
- // content column after a possible update to the prefix, hence the prefix
- // length change is included.
- unsigned LineColumn =
- ContentColumn[LineIndex] -
- (Content[LineIndex].data() - Lines[LineIndex].data()) +
- (OriginalPrefix[LineIndex].size() - Prefix[LineIndex].size());
-
- // We always want to create a replacement instead of adding an untouchable
- // token, even if LineColumn is the same as the original column of the
- // token. This is because WhitespaceManager doesn't align trailing
- // comments if they are untouchable.
- Whitespaces.replaceWhitespace(*Tokens[LineIndex],
- /*Newlines=*/1,
- /*Spaces=*/LineColumn,
- /*StartOfTokenColumn=*/LineColumn,
- /*InPPDirective=*/false);
- }
+ // This is the first line for the current token, but no reflow with the
+ // previous token is necessary. However, we still may need to adjust the
+ // start column. Note that ContentColumn[LineIndex] is the expected
+ // content column after a possible update to the prefix, hence the prefix
+ // length change is included.
+ unsigned LineColumn =
+ ContentColumn[LineIndex] -
+ (Content[LineIndex].data() - Lines[LineIndex].data()) +
+ (OriginalPrefix[LineIndex].size() - Prefix[LineIndex].size());
+
+ // We always want to create a replacement instead of adding an untouchable
+ // token, even if LineColumn is the same as the original column of the
+ // token. This is because WhitespaceManager doesn't align trailing
+ // comments if they are untouchable.
+ Whitespaces.replaceWhitespace(*Tokens[LineIndex],
+ /*Newlines=*/1,
+ /*Spaces=*/LineColumn,
+ /*StartOfTokenColumn=*/LineColumn,
+ /*InPPDirective=*/false);
}
if (OriginalPrefix[LineIndex] != Prefix[LineIndex]) {
// Adjust the prefix if necessary.
@@ -880,16 +846,9 @@ void BreakableLineCommentSection::replaceWhitespaceBefore(
tokenAt(LineIndex), OriginalPrefix[LineIndex].size(), 0, "", "",
/*InPPDirective=*/false, /*Newlines=*/0, /*Spaces=*/1);
}
- // Add a break after a reflow split has been introduced, if necessary.
- // Note that this break doesn't need to be penalized, since it doesn't change
- // the number of lines.
- if (SplitBefore.first != StringRef::npos &&
- SplitBefore.first + SplitBefore.second < Content[LineIndex].size()) {
- insertBreak(LineIndex, 0, SplitBefore, Whitespaces);
- }
}
-void BreakableLineCommentSection::updateNextToken(LineState& State) const {
+void BreakableLineCommentSection::updateNextToken(LineState &State) const {
if (LastLineTok) {
State.NextToken = LastLineTok->Next;
}
@@ -903,20 +862,17 @@ bool BreakableLineCommentSection::mayReflow(
if (Lines[LineIndex].startswith("//")) {
IndentContent = Lines[LineIndex].substr(2);
}
+ // FIXME: Decide whether we want to reflow non-regular indents:
+ // Currently, we only reflow when the OriginalPrefix[LineIndex] matches the
+ // OriginalPrefix[LineIndex-1]. That means we don't reflow
+ // // text that protrudes
+ // // into text with different indent
+ // We do reflow in that case in block comments.
return LineIndex > 0 && !CommentPragmasRegex.match(IndentContent) &&
mayReflowContent(Content[LineIndex]) && !Tok.Finalized &&
!switchesFormatting(tokenAt(LineIndex)) &&
OriginalPrefix[LineIndex] == OriginalPrefix[LineIndex - 1];
}
-unsigned
-BreakableLineCommentSection::getContentStartColumn(unsigned LineIndex,
- unsigned TailOffset) const {
- if (TailOffset != 0) {
- return OriginalContentColumn[LineIndex];
- }
- return ContentColumn[LineIndex];
-}
-
} // namespace format
} // namespace clang
diff --git a/lib/Format/BreakableToken.h b/lib/Format/BreakableToken.h
index e642a538e21c..8ef26ef464da 100644
--- a/lib/Format/BreakableToken.h
+++ b/lib/Format/BreakableToken.h
@@ -33,19 +33,32 @@ bool switchesFormatting(const FormatToken &Token);
struct FormatStyle;
-/// \brief Base class for strategies on how to break tokens.
+/// \brief Base class for tokens / ranges of tokens that can allow breaking
+/// within the tokens - for example, to avoid whitespace beyond the column
+/// limit, or to reflow text.
///
-/// This is organised around the concept of a \c Split, which is a whitespace
-/// range that signifies a position of the content of a token where a
-/// reformatting might be done. Operating with splits is divided into 3
-/// operations:
+/// Generally, a breakable token consists of logical lines, addressed by a line
+/// index. For example, in a sequence of line comments, each line comment is its
+/// own logical line; similarly, for a block comment, each line in the block
+/// comment is on its own logical line.
+///
+/// There are two methods to compute the layout of the token:
+/// - getRangeLength measures the number of columns needed for a range of text
+/// within a logical line, and
+/// - getContentStartColumn returns the start column at which we want the
+/// content of a logical line to start (potentially after introducing a line
+/// break).
+///
+/// The mechanism to adapt the layout of the breakable token is organised
+/// around the concept of a \c Split, which is a whitespace range that signifies
+/// a position of the content of a token where a reformatting might be done.
+///
+/// Operating with splits is divided into two operations:
/// - getSplit, for finding a split starting at a position,
-/// - getLineLengthAfterSplit, for calculating the size in columns of the rest
-/// of the content after a split has been used for breaking, and
/// - insertBreak, for executing the split using a whitespace manager.
///
/// There is a pair of operations that are used to compress a long whitespace
-/// range with a single space if that will bring the line lenght under the
+/// range with a single space if that will bring the line length under the
/// column limit:
/// - getLineLengthAfterCompression, for calculating the size in columns of the
/// line after a whitespace range has been compressed, and
@@ -56,16 +69,23 @@ struct FormatStyle;
/// For tokens where the whitespace before each line needs to be also
/// reformatted, for example for tokens supporting reflow, there are analogous
/// operations that might be executed before the main line breaking occurs:
-/// - getSplitBefore, for finding a split such that the content preceding it
+/// - getReflowSplit, for finding a split such that the content preceding it
/// needs to be specially reflown,
-/// - getLineLengthAfterSplitBefore, for calculating the line length in columns
-/// of the remainder of the content after the beginning of the content has
-/// been reformatted, and
-/// - replaceWhitespaceBefore, for executing the reflow using a whitespace
+/// - reflow, for executing the split using a whitespace manager,
+/// - introducesBreakBefore, for checking if reformatting the beginning
+/// of the content introduces a line break before it,
+/// - adaptStartOfLine, for executing the reflow using a whitespace
/// manager.
///
-/// FIXME: The interface seems set in stone, so we might want to just pull the
-/// strategy into the class, instead of controlling it from the outside.
+/// For tokens that require the whitespace after the last line to be
+/// reformatted, for example in multiline jsdoc comments that require the
+/// trailing '*/' to be on a line of itself, there are analogous operations
+/// that might be executed after the last line has been reformatted:
+/// - getSplitAfterLastLine, for finding a split after the last line that needs
+/// to be reflown,
+/// - replaceWhitespaceAfterLastLine, for executing the reflow using a
+/// whitespace manager.
+///
class BreakableToken {
public:
/// \brief Contains starting character index and length of split.
@@ -76,73 +96,122 @@ public:
/// \brief Returns the number of lines in this token in the original code.
virtual unsigned getLineCount() const = 0;
- /// \brief Returns the number of columns required to format the piece of line
- /// at \p LineIndex, from byte offset \p TailOffset with length \p Length.
+ /// \brief Returns the number of columns required to format the text in the
+ /// byte range [\p Offset, \p Offset \c + \p Length).
+ ///
+ /// \p Offset is the byte offset from the start of the content of the line
+ /// at \p LineIndex.
+ ///
+ /// \p StartColumn is the column at which the text starts in the formatted
+ /// file, needed to compute tab stops correctly.
+ virtual unsigned getRangeLength(unsigned LineIndex, unsigned Offset,
+ StringRef::size_type Length,
+ unsigned StartColumn) const = 0;
+
+ /// \brief Returns the number of columns required to format the text following
+ /// the byte \p Offset in the line \p LineIndex, including potentially
+ /// unbreakable sequences of tokens following after the end of the token.
+ ///
+ /// \p Offset is the byte offset from the start of the content of the line
+ /// at \p LineIndex.
+ ///
+ /// \p StartColumn is the column at which the text starts in the formatted
+ /// file, needed to compute tab stops correctly.
///
- /// Note that previous breaks are not taken into account. \p TailOffset is
- /// always specified from the start of the (original) line.
- /// \p Length can be set to StringRef::npos, which means "to the end of line".
- virtual unsigned
- getLineLengthAfterSplit(unsigned LineIndex, unsigned TailOffset,
- StringRef::size_type Length) const = 0;
+ /// For breakable tokens that never use extra space at the end of a line, this
+ /// is equivalent to getRangeLength with a Length of StringRef::npos.
+ virtual unsigned getRemainingLength(unsigned LineIndex, unsigned Offset,
+ unsigned StartColumn) const {
+ return getRangeLength(LineIndex, Offset, StringRef::npos, StartColumn);
+ }
+
+ /// \brief Returns the column at which content in line \p LineIndex starts,
+ /// assuming no reflow.
+ ///
+ /// If \p Break is true, returns the column at which the line should start
+ /// after the line break.
+ /// If \p Break is false, returns the column at which the line itself will
+ /// start.
+ virtual unsigned getContentStartColumn(unsigned LineIndex,
+ bool Break) const = 0;
/// \brief Returns a range (offset, length) at which to break the line at
/// \p LineIndex, if previously broken at \p TailOffset. If possible, do not
- /// violate \p ColumnLimit.
+ /// violate \p ColumnLimit, assuming the text starting at \p TailOffset in
+ /// the token is formatted starting at ContentStartColumn in the reformatted
+ /// file.
virtual Split getSplit(unsigned LineIndex, unsigned TailOffset,
- unsigned ColumnLimit,
+ unsigned ColumnLimit, unsigned ContentStartColumn,
llvm::Regex &CommentPragmasRegex) const = 0;
/// \brief Emits the previously retrieved \p Split via \p Whitespaces.
virtual void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split,
- WhitespaceManager &Whitespaces) = 0;
+ WhitespaceManager &Whitespaces) const = 0;
- /// \brief Returns the number of columns required to format the piece of line
- /// at \p LineIndex, from byte offset \p TailOffset after the whitespace range
- /// \p Split has been compressed into a single space.
- unsigned getLineLengthAfterCompression(unsigned RemainingTokenColumns,
- Split Split) const;
+ /// \brief Returns the number of columns needed to format
+ /// \p RemainingTokenColumns, assuming that Split is within the range measured
+ /// by \p RemainingTokenColumns, and that the whitespace in Split is reduced
+ /// to a single space.
+ unsigned getLengthAfterCompression(unsigned RemainingTokenColumns,
+ Split Split) const;
/// \brief Replaces the whitespace range described by \p Split with a single
/// space.
virtual void compressWhitespace(unsigned LineIndex, unsigned TailOffset,
Split Split,
- WhitespaceManager &Whitespaces) = 0;
+ WhitespaceManager &Whitespaces) const = 0;
- /// \brief Returns a whitespace range (offset, length) of the content at
- /// \p LineIndex such that the content preceding this range needs to be
- /// reformatted before any breaks are made to this line.
+ /// \brief Returns whether the token supports reflowing text.
+ virtual bool supportsReflow() const { return false; }
+
+ /// \brief Returns a whitespace range (offset, length) of the content at \p
+ /// LineIndex such that the content of that line is reflown to the end of the
+ /// previous one.
///
- /// \p PreviousEndColumn is the end column of the previous line after
- /// formatting.
+ /// Returning (StringRef::npos, 0) indicates reflowing is not possible.
///
- /// A result having offset == StringRef::npos means that no piece of the line
- /// needs to be reformatted before any breaks are made.
- virtual Split getSplitBefore(unsigned LineIndex, unsigned PreviousEndColumn,
- unsigned ColumnLimit,
+ /// The range will include any whitespace preceding the specified line's
+ /// content.
+ ///
+ /// If the split is not contained within one token, for example when reflowing
+ /// line comments, returns (0, <length>).
+ virtual Split getReflowSplit(unsigned LineIndex,
llvm::Regex &CommentPragmasRegex) const {
return Split(StringRef::npos, 0);
}
- /// \brief Returns the number of columns required to format the piece of line
- /// at \p LineIndex after the content preceding the whitespace range specified
- /// \p SplitBefore has been reformatted, but before any breaks are made to
- /// this line.
- virtual unsigned getLineLengthAfterSplitBefore(unsigned LineIndex,
- unsigned TailOffset,
- unsigned PreviousEndColumn,
- unsigned ColumnLimit,
- Split SplitBefore) const {
- return getLineLengthAfterSplit(LineIndex, TailOffset, StringRef::npos);
+ /// \brief Reflows the current line into the end of the previous one.
+ virtual void reflow(unsigned LineIndex,
+ WhitespaceManager &Whitespaces) const {}
+
+ /// \brief Returns whether there will be a line break at the start of the
+ /// token.
+ virtual bool introducesBreakBeforeToken() const {
+ return false;
}
/// \brief Replaces the whitespace between \p LineIndex-1 and \p LineIndex.
- /// Performs a reformatting of the content at \p LineIndex preceding the
- /// whitespace range \p SplitBefore.
- virtual void replaceWhitespaceBefore(unsigned LineIndex,
- unsigned PreviousEndColumn,
- unsigned ColumnLimit, Split SplitBefore,
- WhitespaceManager &Whitespaces) {}
+ virtual void adaptStartOfLine(unsigned LineIndex,
+ WhitespaceManager &Whitespaces) const {}
+
+ /// \brief Returns a whitespace range (offset, length) of the content at
+ /// the last line that needs to be reformatted after the last line has been
+ /// reformatted.
+ ///
+ /// A result having offset == StringRef::npos means that no reformat is
+ /// necessary.
+ virtual Split getSplitAfterLastLine(unsigned TailOffset) const {
+ return Split(StringRef::npos, 0);
+ }
+
+ /// \brief Replaces the whitespace from \p SplitAfterLastLine on the last line
+ /// after the last line has been formatted by performing a reformatting.
+ void replaceWhitespaceAfterLastLine(unsigned TailOffset,
+ Split SplitAfterLastLine,
+ WhitespaceManager &Whitespaces) const {
+ insertBreak(getLineCount() - 1, TailOffset, SplitAfterLastLine,
+ Whitespaces);
+ }
/// \brief Updates the next token of \p State to the next token after this
/// one. This can be used when this token manages a set of underlying tokens
@@ -161,32 +230,7 @@ protected:
const FormatStyle &Style;
};
-/// \brief Base class for single line tokens that can be broken.
-///
-/// \c getSplit() needs to be implemented by child classes.
-class BreakableSingleLineToken : public BreakableToken {
-public:
- unsigned getLineCount() const override;
- unsigned getLineLengthAfterSplit(unsigned LineIndex, unsigned TailOffset,
- StringRef::size_type Length) const override;
-
-protected:
- BreakableSingleLineToken(const FormatToken &Tok, unsigned StartColumn,
- StringRef Prefix, StringRef Postfix,
- bool InPPDirective, encoding::Encoding Encoding,
- const FormatStyle &Style);
-
- // The column in which the token starts.
- unsigned StartColumn;
- // The prefix a line needs after a break in the token.
- StringRef Prefix;
- // The postfix a line needs before introducing a break.
- StringRef Postfix;
- // The token text excluding the prefix and postfix.
- StringRef Line;
-};
-
-class BreakableStringLiteral : public BreakableSingleLineToken {
+class BreakableStringLiteral : public BreakableToken {
public:
/// \brief Creates a breakable token for a single line string literal.
///
@@ -198,11 +242,32 @@ public:
const FormatStyle &Style);
Split getSplit(unsigned LineIndex, unsigned TailOffset, unsigned ColumnLimit,
+ unsigned ReflowColumn,
llvm::Regex &CommentPragmasRegex) const override;
void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split,
- WhitespaceManager &Whitespaces) override;
+ WhitespaceManager &Whitespaces) const override;
void compressWhitespace(unsigned LineIndex, unsigned TailOffset, Split Split,
- WhitespaceManager &Whitespaces) override {}
+ WhitespaceManager &Whitespaces) const override {}
+ unsigned getLineCount() const override;
+ unsigned getRangeLength(unsigned LineIndex, unsigned Offset,
+ StringRef::size_type Length,
+ unsigned StartColumn) const override;
+ unsigned getRemainingLength(unsigned LineIndex, unsigned Offset,
+ unsigned StartColumn) const override;
+ unsigned getContentStartColumn(unsigned LineIndex, bool Break) const override;
+
+protected:
+ // The column in which the token starts.
+ unsigned StartColumn;
+ // The prefix a line needs after a break in the token.
+ StringRef Prefix;
+ // The postfix a line needs before introducing a break.
+ StringRef Postfix;
+ // The token text excluding the prefix and postfix.
+ StringRef Line;
+ // Length of the sequence of tokens after this string literal that cannot
+ // contain line breaks.
+ unsigned UnbreakableTailLength;
};
class BreakableComment : public BreakableToken {
@@ -216,21 +281,15 @@ protected:
const FormatStyle &Style);
public:
+ bool supportsReflow() const override { return true; }
unsigned getLineCount() const override;
Split getSplit(unsigned LineIndex, unsigned TailOffset, unsigned ColumnLimit,
+ unsigned ReflowColumn,
llvm::Regex &CommentPragmasRegex) const override;
void compressWhitespace(unsigned LineIndex, unsigned TailOffset, Split Split,
- WhitespaceManager &Whitespaces) override;
+ WhitespaceManager &Whitespaces) const override;
protected:
- virtual unsigned getContentStartColumn(unsigned LineIndex,
- unsigned TailOffset) const = 0;
-
- // Returns a split that divides Text into a left and right parts, such that
- // the left part is suitable for reflowing after PreviousEndColumn.
- Split getReflowSplit(StringRef Text, StringRef ReflowPrefix,
- unsigned PreviousEndColumn, unsigned ColumnLimit) const;
-
// Returns the token containing the line at LineIndex.
const FormatToken &tokenAt(unsigned LineIndex) const;
@@ -289,21 +348,23 @@ public:
bool InPPDirective, encoding::Encoding Encoding,
const FormatStyle &Style);
- unsigned getLineLengthAfterSplit(unsigned LineIndex, unsigned TailOffset,
- StringRef::size_type Length) const override;
+ unsigned getRangeLength(unsigned LineIndex, unsigned Offset,
+ StringRef::size_type Length,
+ unsigned StartColumn) const override;
+ unsigned getRemainingLength(unsigned LineIndex, unsigned Offset,
+ unsigned StartColumn) const override;
+ unsigned getContentStartColumn(unsigned LineIndex, bool Break) const override;
void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split,
- WhitespaceManager &Whitespaces) override;
- Split getSplitBefore(unsigned LineIndex, unsigned PreviousEndColumn,
- unsigned ColumnLimit,
+ WhitespaceManager &Whitespaces) const override;
+ Split getReflowSplit(unsigned LineIndex,
llvm::Regex &CommentPragmasRegex) const override;
- unsigned getLineLengthAfterSplitBefore(unsigned LineIndex,
- unsigned TailOffset,
- unsigned PreviousEndColumn,
- unsigned ColumnLimit,
- Split SplitBefore) const override;
- void replaceWhitespaceBefore(unsigned LineIndex, unsigned PreviousEndColumn,
- unsigned ColumnLimit, Split SplitBefore,
- WhitespaceManager &Whitespaces) override;
+ void reflow(unsigned LineIndex,
+ WhitespaceManager &Whitespaces) const override;
+ bool introducesBreakBeforeToken() const override;
+ void adaptStartOfLine(unsigned LineIndex,
+ WhitespaceManager &Whitespaces) const override;
+ Split getSplitAfterLastLine(unsigned TailOffset) const override;
+
bool mayReflow(unsigned LineIndex,
llvm::Regex &CommentPragmasRegex) const override;
@@ -318,14 +379,6 @@ private:
// considered part of the text).
void adjustWhitespace(unsigned LineIndex, int IndentDelta);
- // Computes the end column if the full Content from LineIndex gets reflown
- // after PreviousEndColumn.
- unsigned getReflownColumn(StringRef Content, unsigned LineIndex,
- unsigned PreviousEndColumn) const;
-
- unsigned getContentStartColumn(unsigned LineIndex,
- unsigned TailOffset) const override;
-
// The column at which the text of a broken line should start.
// Note that an optional decoration would go before that column.
// IndentAtLineBreak is a uniform position for all lines in a block comment,
@@ -348,6 +401,14 @@ private:
// If this block comment has decorations, this is the column of the start of
// the decorations.
unsigned DecorationColumn;
+
+ // If true, make sure that the opening '/**' and the closing '*/' ends on a
+ // line of itself. Styles like jsdoc require this for multiline comments.
+ bool DelimitersOnNewline;
+
+ // Length of the sequence of tokens after this string literal that cannot
+ // contain line breaks.
+ unsigned UnbreakableTailLength;
};
class BreakableLineCommentSection : public BreakableComment {
@@ -357,29 +418,23 @@ public:
bool InPPDirective, encoding::Encoding Encoding,
const FormatStyle &Style);
- unsigned getLineLengthAfterSplit(unsigned LineIndex, unsigned TailOffset,
- StringRef::size_type Length) const override;
+ unsigned getRangeLength(unsigned LineIndex, unsigned Offset,
+ StringRef::size_type Length,
+ unsigned StartColumn) const override;
+ unsigned getContentStartColumn(unsigned LineIndex, bool Break) const override;
void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split,
- WhitespaceManager &Whitespaces) override;
- Split getSplitBefore(unsigned LineIndex, unsigned PreviousEndColumn,
- unsigned ColumnLimit,
+ WhitespaceManager &Whitespaces) const override;
+ Split getReflowSplit(unsigned LineIndex,
llvm::Regex &CommentPragmasRegex) const override;
- unsigned getLineLengthAfterSplitBefore(unsigned LineIndex,
- unsigned TailOffset,
- unsigned PreviousEndColumn,
- unsigned ColumnLimit,
- Split SplitBefore) const override;
- void replaceWhitespaceBefore(unsigned LineIndex, unsigned PreviousEndColumn,
- unsigned ColumnLimit, Split SplitBefore,
- WhitespaceManager &Whitespaces) override;
+ void reflow(unsigned LineIndex,
+ WhitespaceManager &Whitespaces) const override;
+ void adaptStartOfLine(unsigned LineIndex,
+ WhitespaceManager &Whitespaces) const override;
void updateNextToken(LineState &State) const override;
bool mayReflow(unsigned LineIndex,
llvm::Regex &CommentPragmasRegex) const override;
private:
- unsigned getContentStartColumn(unsigned LineIndex,
- unsigned TailOffset) const override;
-
// OriginalPrefix[i] contains the original prefix of line i, including
// trailing whitespace before the start of the content. The indentation
// preceding the prefix is not included.
diff --git a/lib/Format/ContinuationIndenter.cpp b/lib/Format/ContinuationIndenter.cpp
index 3bf1cd8f7c13..a3d38b244c5c 100644
--- a/lib/Format/ContinuationIndenter.cpp
+++ b/lib/Format/ContinuationIndenter.cpp
@@ -12,8 +12,9 @@
///
//===----------------------------------------------------------------------===//
-#include "BreakableToken.h"
#include "ContinuationIndenter.h"
+#include "BreakableToken.h"
+#include "FormatInternal.h"
#include "WhitespaceManager.h"
#include "clang/Basic/OperatorPrecedence.h"
#include "clang/Basic/SourceManager.h"
@@ -76,6 +77,53 @@ static bool opensProtoMessageField(const FormatToken &LessTok,
(LessTok.Previous && LessTok.Previous->is(tok::equal))));
}
+// Returns the delimiter of a raw string literal, or None if TokenText is not
+// the text of a raw string literal. The delimiter could be the empty string.
+// For example, the delimiter of R"deli(cont)deli" is deli.
+static llvm::Optional<StringRef> getRawStringDelimiter(StringRef TokenText) {
+ if (TokenText.size() < 5 // The smallest raw string possible is 'R"()"'.
+ || !TokenText.startswith("R\"") || !TokenText.endswith("\""))
+ return None;
+
+ // A raw string starts with 'R"<delimiter>(' and delimiter is ascii and has
+ // size at most 16 by the standard, so the first '(' must be among the first
+ // 19 bytes.
+ size_t LParenPos = TokenText.substr(0, 19).find_first_of('(');
+ if (LParenPos == StringRef::npos)
+ return None;
+ StringRef Delimiter = TokenText.substr(2, LParenPos - 2);
+
+ // Check that the string ends in ')Delimiter"'.
+ size_t RParenPos = TokenText.size() - Delimiter.size() - 2;
+ if (TokenText[RParenPos] != ')')
+ return None;
+ if (!TokenText.substr(RParenPos + 1).startswith(Delimiter))
+ return None;
+ return Delimiter;
+}
+
+RawStringFormatStyleManager::RawStringFormatStyleManager(
+ const FormatStyle &CodeStyle) {
+ for (const auto &RawStringFormat : CodeStyle.RawStringFormats) {
+ FormatStyle Style;
+ if (!getPredefinedStyle(RawStringFormat.BasedOnStyle,
+ RawStringFormat.Language, &Style)) {
+ Style = getLLVMStyle();
+ Style.Language = RawStringFormat.Language;
+ }
+ Style.ColumnLimit = CodeStyle.ColumnLimit;
+ DelimiterStyle.insert({RawStringFormat.Delimiter, Style});
+ }
+}
+
+llvm::Optional<FormatStyle>
+RawStringFormatStyleManager::get(StringRef Delimiter) const {
+ auto It = DelimiterStyle.find(Delimiter);
+ if (It == DelimiterStyle.end())
+ return None;
+ return It->second;
+}
+
ContinuationIndenter::ContinuationIndenter(const FormatStyle &Style,
const AdditionalKeywords &Keywords,
const SourceManager &SourceMgr,
@@ -85,20 +133,32 @@ ContinuationIndenter::ContinuationIndenter(const FormatStyle &Style,
: Style(Style), Keywords(Keywords), SourceMgr(SourceMgr),
Whitespaces(Whitespaces), Encoding(Encoding),
BinPackInconclusiveFunctions(BinPackInconclusiveFunctions),
- CommentPragmasRegex(Style.CommentPragmas) {}
+ CommentPragmasRegex(Style.CommentPragmas), RawStringFormats(Style) {}
LineState ContinuationIndenter::getInitialState(unsigned FirstIndent,
+ unsigned FirstStartColumn,
const AnnotatedLine *Line,
bool DryRun) {
LineState State;
State.FirstIndent = FirstIndent;
- State.Column = FirstIndent;
+ if (FirstStartColumn && Line->First->NewlinesBefore == 0)
+ State.Column = FirstStartColumn;
+ else
+ State.Column = FirstIndent;
+ // With preprocessor directive indentation, the line starts on column 0
+ // since it's indented after the hash, but FirstIndent is set to the
+ // preprocessor indent.
+ if (Style.IndentPPDirectives == FormatStyle::PPDIS_AfterHash &&
+ (Line->Type == LT_PreprocessorDirective ||
+ Line->Type == LT_ImportStatement))
+ State.Column = 0;
State.Line = Line;
State.NextToken = Line->First;
State.Stack.push_back(ParenState(FirstIndent, FirstIndent,
/*AvoidBinPacking=*/false,
/*NoLineBreak=*/false));
State.LineContainsContinuedForLoopSection = false;
+ State.NoContinuation = false;
State.StartOfStringLiteral = 0;
State.StartOfLineLevel = 0;
State.LowestLevelOnLine = 0;
@@ -120,9 +180,8 @@ bool ContinuationIndenter::canBreak(const LineState &State) {
const FormatToken &Current = *State.NextToken;
const FormatToken &Previous = *Current.Previous;
assert(&Previous == Current.Previous);
- if (!Current.CanBreakBefore &&
- !(State.Stack.back().BreakBeforeClosingBrace &&
- Current.closesBlockOrBlockTypeList(Style)))
+ if (!Current.CanBreakBefore && !(State.Stack.back().BreakBeforeClosingBrace &&
+ Current.closesBlockOrBlockTypeList(Style)))
return false;
// The opening "{" of a braced list has to be on the same line as the first
// element if it is nested in another braced init list or function call.
@@ -264,7 +323,8 @@ bool ContinuationIndenter::mustBreak(const LineState &State) {
// We need special cases for ">>" which we have split into two ">" while
// lexing in order to make template parsing easier.
bool IsComparison = (Previous.getPrecedence() == prec::Relational ||
- Previous.getPrecedence() == prec::Equality) &&
+ Previous.getPrecedence() == prec::Equality ||
+ Previous.getPrecedence() == prec::Spaceship) &&
Previous.Previous &&
Previous.Previous->isNot(TT_BinaryOperator); // For >>.
bool LHSIsBinaryExpr =
@@ -316,6 +376,12 @@ bool ContinuationIndenter::mustBreak(const LineState &State) {
Previous.TokenText == "\'\\n\'"))))
return true;
+ if (Previous.is(TT_BlockComment) && Previous.IsMultiline)
+ return true;
+
+ if (State.NoContinuation)
+ return true;
+
return false;
}
@@ -325,6 +391,8 @@ unsigned ContinuationIndenter::addTokenToState(LineState &State, bool Newline,
const FormatToken &Current = *State.NextToken;
assert(!State.Stack.empty());
+ State.NoContinuation = false;
+
if ((Current.is(TT_ImplicitStringLiteral) &&
(Current.Previous->Tok.getIdentifierInfo() == nullptr ||
Current.Previous->Tok.getIdentifierInfo()->getPPKeywordID() ==
@@ -376,9 +444,25 @@ void ContinuationIndenter::addTokenOnCurrentLine(LineState &State, bool DryRun,
unsigned Spaces = Current.SpacesRequiredBefore + ExtraSpaces;
+ // Indent preprocessor directives after the hash if required.
+ int PPColumnCorrection = 0;
+ if (Style.IndentPPDirectives == FormatStyle::PPDIS_AfterHash &&
+ Previous.is(tok::hash) && State.FirstIndent > 0 &&
+ (State.Line->Type == LT_PreprocessorDirective ||
+ State.Line->Type == LT_ImportStatement)) {
+ Spaces += State.FirstIndent;
+
+ // For preprocessor indent with tabs, State.Column will be 1 because of the
+ // hash. This causes second-level indents onward to have an extra space
+ // after the tabs. We avoid this misalignment by subtracting 1 from the
+ // column value passed to replaceWhitespace().
+ if (Style.UseTab != FormatStyle::UT_Never)
+ PPColumnCorrection = -1;
+ }
+
if (!DryRun)
Whitespaces.replaceWhitespace(Current, /*Newlines=*/0, Spaces,
- State.Column + Spaces);
+ State.Column + Spaces + PPColumnCorrection);
// If "BreakBeforeInheritanceComma" mode, don't break within the inheritance
// declaration unless there is multiple inheritance.
@@ -405,9 +489,8 @@ void ContinuationIndenter::addTokenOnCurrentLine(LineState &State, bool DryRun,
if (Style.AlignAfterOpenBracket == FormatStyle::BAS_AlwaysBreak &&
Previous.isOneOf(tok::l_paren, TT_TemplateOpener, tok::l_square) &&
State.Column > getNewLineColumn(State) &&
- (!Previous.Previous ||
- !Previous.Previous->isOneOf(tok::kw_for, tok::kw_while,
- tok::kw_switch)) &&
+ (!Previous.Previous || !Previous.Previous->isOneOf(
+ tok::kw_for, tok::kw_while, tok::kw_switch)) &&
// Don't do this for simple (no expressions) one-argument function calls
// as that feels like needlessly wasting whitespace, e.g.:
//
@@ -454,7 +537,8 @@ void ContinuationIndenter::addTokenOnCurrentLine(LineState &State, bool DryRun,
(P->is(TT_ConditionalExpr) && P->is(tok::colon))) &&
!P->isOneOf(TT_OverloadedOperator, TT_CtorInitializerComma) &&
P->getPrecedence() != prec::Assignment &&
- P->getPrecedence() != prec::Relational) {
+ P->getPrecedence() != prec::Relational &&
+ P->getPrecedence() != prec::Spaceship) {
bool BreakBeforeOperator =
P->MustBreakBefore || P->is(tok::lessless) ||
(P->is(TT_BinaryOperator) &&
@@ -619,8 +703,18 @@ unsigned ContinuationIndenter::addTokenOnNewLine(LineState &State,
State.Stack.back().BreakBeforeParameter = false;
if (!DryRun) {
+ unsigned MaxEmptyLinesToKeep = Style.MaxEmptyLinesToKeep + 1;
+ if (Current.is(tok::r_brace) && Current.MatchingParen &&
+ // Only strip trailing empty lines for l_braces that have children, i.e.
+ // for function expressions (lambdas, arrows, etc).
+ !Current.MatchingParen->Children.empty()) {
+ // lambdas and arrow functions are expressions, thus their r_brace is not
+ // on its own line, and thus not covered by UnwrappedLineFormatter's logic
+ // about removing empty lines on closing blocks. Special case them here.
+ MaxEmptyLinesToKeep = 1;
+ }
unsigned Newlines = std::max(
- 1u, std::min(Current.NewlinesBefore, Style.MaxEmptyLinesToKeep + 1));
+ 1u, std::min(Current.NewlinesBefore, MaxEmptyLinesToKeep));
bool ContinuePPDirective =
State.Line->InPPDirective && State.Line->Type != LT_ImportStatement;
Whitespaces.replaceWhitespace(Current, Newlines, State.Column, State.Column,
@@ -661,9 +755,7 @@ unsigned ContinuationIndenter::addTokenOnNewLine(LineState &State,
// before the corresponding } or ].
if (PreviousNonComment &&
(PreviousNonComment->isOneOf(tok::l_brace, TT_ArrayInitializerLSquare) ||
- opensProtoMessageField(*PreviousNonComment, Style) ||
- (PreviousNonComment->is(TT_TemplateString) &&
- PreviousNonComment->opensScope())))
+ opensProtoMessageField(*PreviousNonComment, Style)))
State.Stack.back().BreakBeforeClosingBrace = true;
if (State.Stack.back().AvoidBinPacking) {
@@ -731,7 +823,10 @@ unsigned ContinuationIndenter::getNewLineColumn(const LineState &State) {
if (NextNonComment->is(TT_TemplateString) && NextNonComment->closesScope())
return State.Stack[State.Stack.size() - 2].LastSpace;
if (Current.is(tok::identifier) && Current.Next &&
- Current.Next->is(TT_DictLiteral))
+ (Current.Next->is(TT_DictLiteral) ||
+ ((Style.Language == FormatStyle::LK_Proto ||
+ Style.Language == FormatStyle::LK_TextProto) &&
+ Current.Next->isOneOf(TT_TemplateOpener, tok::l_brace))))
return State.Stack.back().Indent;
if (NextNonComment->is(TT_ObjCStringLiteral) &&
State.StartOfStringLiteral != 0)
@@ -871,8 +966,10 @@ unsigned ContinuationIndenter::moveStateToNextToken(LineState &State,
// Next(...)
// ^ line up here.
State.Stack.back().Indent =
- State.Column + (Style.BreakConstructorInitializers ==
- FormatStyle::BCIS_BeforeComma ? 0 : 2);
+ State.Column +
+ (Style.BreakConstructorInitializers == FormatStyle::BCIS_BeforeComma
+ ? 0
+ : 2);
State.Stack.back().NestedBlockIndent = State.Stack.back().Indent;
if (Style.ConstructorInitializerAllOnOneLineOrOnePerLine)
State.Stack.back().AvoidBinPacking = true;
@@ -884,7 +981,7 @@ unsigned ContinuationIndenter::moveStateToNextToken(LineState &State,
State.FirstIndent + Style.ConstructorInitializerIndentWidth;
State.Stack.back().NestedBlockIndent = State.Stack.back().Indent;
if (Style.ConstructorInitializerAllOnOneLineOrOnePerLine)
- State.Stack.back().AvoidBinPacking = true;
+ State.Stack.back().AvoidBinPacking = true;
}
if (Current.is(TT_InheritanceColon))
State.Stack.back().Indent =
@@ -912,8 +1009,9 @@ unsigned ContinuationIndenter::moveStateToNextToken(LineState &State,
State.Stack[i].NoLineBreak = true;
State.Stack[State.Stack.size() - 2].NestedBlockInlined = false;
}
- if (Previous && (Previous->isOneOf(tok::l_paren, tok::comma, tok::colon) ||
- Previous->isOneOf(TT_BinaryOperator, TT_ConditionalExpr)) &&
+ if (Previous &&
+ (Previous->isOneOf(tok::l_paren, tok::comma, tok::colon) ||
+ Previous->isOneOf(TT_BinaryOperator, TT_ConditionalExpr)) &&
!Previous->isOneOf(TT_DictLiteral, TT_ObjCMethodExpr)) {
State.Stack.back().NestedBlockInlined =
!Newline &&
@@ -922,13 +1020,8 @@ unsigned ContinuationIndenter::moveStateToNextToken(LineState &State,
moveStatePastFakeLParens(State, Newline);
moveStatePastScopeCloser(State);
- if (Current.is(TT_TemplateString) && Current.opensScope())
- State.Stack.back().LastSpace =
- (Current.IsMultiline ? Current.LastLineColumnWidth
- : State.Column + Current.ColumnWidth) -
- strlen("${");
- bool CanBreakProtrudingToken = !State.Stack.back().NoLineBreak &&
- !State.Stack.back().NoLineBreakInOperand;
+ bool AllowBreak = !State.Stack.back().NoLineBreak &&
+ !State.Stack.back().NoLineBreakInOperand;
moveStatePastScopeOpener(State, Newline);
moveStatePastFakeRParens(State);
@@ -942,13 +1035,9 @@ unsigned ContinuationIndenter::moveStateToNextToken(LineState &State,
State.Column += Current.ColumnWidth;
State.NextToken = State.NextToken->Next;
- unsigned Penalty = 0;
- if (CanBreakProtrudingToken)
- Penalty = breakProtrudingToken(Current, State, DryRun);
- if (State.Column > getColumnLimit(State)) {
- unsigned ExcessCharacters = State.Column - getColumnLimit(State);
- Penalty += Style.PenaltyExcessCharacter * ExcessCharacters;
- }
+
+ unsigned Penalty =
+ handleEndOfLine(Current, State, DryRun, AllowBreak);
if (Current.Role)
Current.Role->formatFromToken(State, this, DryRun);
@@ -1072,14 +1161,13 @@ void ContinuationIndenter::moveStatePastScopeOpener(LineState &State,
bool EndsInComma = Current.MatchingParen &&
Current.MatchingParen->Previous &&
Current.MatchingParen->Previous->is(tok::comma);
- AvoidBinPacking =
- EndsInComma || Current.is(TT_DictLiteral) ||
- Style.Language == FormatStyle::LK_Proto ||
- Style.Language == FormatStyle::LK_TextProto ||
- !Style.BinPackArguments ||
- (NextNoComment &&
- NextNoComment->isOneOf(TT_DesignatedInitializerPeriod,
- TT_DesignatedInitializerLSquare));
+ AvoidBinPacking = EndsInComma || Current.is(TT_DictLiteral) ||
+ Style.Language == FormatStyle::LK_Proto ||
+ Style.Language == FormatStyle::LK_TextProto ||
+ !Style.BinPackArguments ||
+ (NextNoComment &&
+ NextNoComment->isOneOf(TT_DesignatedInitializerPeriod,
+ TT_DesignatedInitializerLSquare));
BreakBeforeParameter = EndsInComma;
if (Current.ParameterCount > 1)
NestedBlockIndent = std::max(NestedBlockIndent, State.Column + 1);
@@ -1098,18 +1186,6 @@ void ContinuationIndenter::moveStatePastScopeOpener(LineState &State,
LastSpace = std::max(LastSpace, State.Stack.back().Indent);
}
- // JavaScript template strings are special as we always want to indent
- // nested expressions relative to the ${}. Otherwise, this can create quite
- // a mess.
- if (Current.is(TT_TemplateString)) {
- unsigned Column = Current.IsMultiline
- ? Current.LastLineColumnWidth
- : State.Column + Current.ColumnWidth;
- NewIndent = Column;
- LastSpace = Column;
- NestedBlockIndent = Column;
- }
-
bool EndsInComma =
Current.MatchingParen &&
Current.MatchingParen->getPreviousNonComment() &&
@@ -1200,11 +1276,93 @@ void ContinuationIndenter::moveStateToNewBlock(LineState &State) {
State.Stack.back().BreakBeforeParameter = true;
}
-unsigned ContinuationIndenter::addMultilineToken(const FormatToken &Current,
- LineState &State) {
- if (!Current.IsMultiline)
+static unsigned getLastLineEndColumn(StringRef Text, unsigned StartColumn,
+ unsigned TabWidth,
+ encoding::Encoding Encoding) {
+ size_t LastNewlinePos = Text.find_last_of("\n");
+ if (LastNewlinePos == StringRef::npos) {
+ return StartColumn +
+ encoding::columnWidthWithTabs(Text, StartColumn, TabWidth, Encoding);
+ } else {
+ return encoding::columnWidthWithTabs(Text.substr(LastNewlinePos),
+ /*StartColumn=*/0, TabWidth, Encoding);
+ }
+}
+
+unsigned ContinuationIndenter::reformatRawStringLiteral(
+ const FormatToken &Current, LineState &State,
+ const FormatStyle &RawStringStyle, bool DryRun) {
+ unsigned StartColumn = State.Column - Current.ColumnWidth;
+ auto Delimiter = *getRawStringDelimiter(Current.TokenText);
+ // The text of a raw string is between the leading 'R"delimiter(' and the
+ // trailing 'delimiter)"'.
+ unsigned PrefixSize = 3 + Delimiter.size();
+ unsigned SuffixSize = 2 + Delimiter.size();
+
+ // The first start column is the column the raw text starts.
+ unsigned FirstStartColumn = StartColumn + PrefixSize;
+
+ // The next start column is the intended indentation a line break inside
+ // the raw string at level 0. It is determined by the following rules:
+ // - if the content starts on newline, it is one level more than the current
+ // indent, and
+ // - if the content does not start on a newline, it is the first start
+ // column.
+ // These rules have the advantage that the formatted content both does not
+ // violate the rectangle rule and visually flows within the surrounding
+ // source.
+ bool ContentStartsOnNewline = Current.TokenText[PrefixSize] == '\n';
+ unsigned NextStartColumn = ContentStartsOnNewline
+ ? State.Stack.back().Indent + Style.IndentWidth
+ : FirstStartColumn;
+
+ // The last start column is the column the raw string suffix starts if it is
+ // put on a newline.
+ // The last start column is the intended indentation of the raw string postfix
+ // if it is put on a newline. It is determined by the following rules:
+ // - if the raw string prefix starts on a newline, it is the column where
+ // that raw string prefix starts, and
+ // - if the raw string prefix does not start on a newline, it is the current
+ // indent.
+ unsigned LastStartColumn = Current.NewlinesBefore
+ ? FirstStartColumn - PrefixSize
+ : State.Stack.back().Indent;
+
+ std::string RawText =
+ Current.TokenText.substr(PrefixSize).drop_back(SuffixSize);
+
+ std::pair<tooling::Replacements, unsigned> Fixes = internal::reformat(
+ RawStringStyle, RawText, {tooling::Range(0, RawText.size())},
+ FirstStartColumn, NextStartColumn, LastStartColumn, "<stdin>",
+ /*Status=*/nullptr);
+
+ auto NewCode = applyAllReplacements(RawText, Fixes.first);
+ tooling::Replacements NoFixes;
+ if (!NewCode) {
+ State.Column += Current.ColumnWidth;
return 0;
+ }
+ if (!DryRun) {
+ SourceLocation OriginLoc =
+ Current.Tok.getLocation().getLocWithOffset(PrefixSize);
+ for (const tooling::Replacement &Fix : Fixes.first) {
+ auto Err = Whitespaces.addReplacement(tooling::Replacement(
+ SourceMgr, OriginLoc.getLocWithOffset(Fix.getOffset()),
+ Fix.getLength(), Fix.getReplacementText()));
+ if (Err) {
+ llvm::errs() << "Failed to reformat raw string: "
+ << llvm::toString(std::move(Err)) << "\n";
+ }
+ }
+ }
+ unsigned RawLastLineEndColumn = getLastLineEndColumn(
+ *NewCode, FirstStartColumn, Style.TabWidth, Encoding);
+ State.Column = RawLastLineEndColumn + SuffixSize;
+ return Fixes.second;
+}
+unsigned ContinuationIndenter::addMultilineToken(const FormatToken &Current,
+ LineState &State) {
// Break before further function parameters on all levels.
for (unsigned i = 0, e = State.Stack.size(); i != e; ++i)
State.Stack[i].BreakBeforeParameter = true;
@@ -1219,33 +1377,85 @@ unsigned ContinuationIndenter::addMultilineToken(const FormatToken &Current,
return 0;
}
-unsigned ContinuationIndenter::breakProtrudingToken(const FormatToken &Current,
- LineState &State,
- bool DryRun) {
- // Don't break multi-line tokens other than block comments. Instead, just
- // update the state.
- if (Current.isNot(TT_BlockComment) && Current.IsMultiline)
- return addMultilineToken(Current, State);
-
- // Don't break implicit string literals or import statements.
- if (Current.is(TT_ImplicitStringLiteral) ||
- State.Line->Type == LT_ImportStatement)
- return 0;
+unsigned ContinuationIndenter::handleEndOfLine(const FormatToken &Current,
+ LineState &State, bool DryRun,
+ bool AllowBreak) {
+ unsigned Penalty = 0;
+ // Compute the raw string style to use in case this is a raw string literal
+ // that can be reformatted.
+ auto RawStringStyle = getRawStringStyle(Current, State);
+ if (RawStringStyle) {
+ Penalty = reformatRawStringLiteral(Current, State, *RawStringStyle, DryRun);
+ } else if (Current.IsMultiline && Current.isNot(TT_BlockComment)) {
+ // Don't break multi-line tokens other than block comments and raw string
+ // literals. Instead, just update the state.
+ Penalty = addMultilineToken(Current, State);
+ } else if (State.Line->Type != LT_ImportStatement) {
+ // We generally don't break import statements.
+ LineState OriginalState = State;
+
+ // Whether we force the reflowing algorithm to stay strictly within the
+ // column limit.
+ bool Strict = false;
+ // Whether the first non-strict attempt at reflowing did intentionally
+ // exceed the column limit.
+ bool Exceeded = false;
+ std::tie(Penalty, Exceeded) = breakProtrudingToken(
+ Current, State, AllowBreak, /*DryRun=*/true, Strict);
+ if (Exceeded) {
+ // If non-strict reflowing exceeds the column limit, try whether strict
+ // reflowing leads to an overall lower penalty.
+ LineState StrictState = OriginalState;
+ unsigned StrictPenalty =
+ breakProtrudingToken(Current, StrictState, AllowBreak,
+ /*DryRun=*/true, /*Strict=*/true)
+ .first;
+ Strict = StrictPenalty <= Penalty;
+ if (Strict) {
+ Penalty = StrictPenalty;
+ State = StrictState;
+ }
+ }
+ if (!DryRun) {
+ // If we're not in dry-run mode, apply the changes with the decision on
+ // strictness made above.
+ breakProtrudingToken(Current, OriginalState, AllowBreak, /*DryRun=*/false,
+ Strict);
+ }
+ }
+ if (State.Column > getColumnLimit(State)) {
+ unsigned ExcessCharacters = State.Column - getColumnLimit(State);
+ Penalty += Style.PenaltyExcessCharacter * ExcessCharacters;
+ }
+ return Penalty;
+}
- if (!Current.isStringLiteral() && !Current.is(tok::comment))
- return 0;
+llvm::Optional<FormatStyle>
+ContinuationIndenter::getRawStringStyle(const FormatToken &Current,
+ const LineState &State) {
+ if (!Current.isStringLiteral())
+ return None;
+ auto Delimiter = getRawStringDelimiter(Current.TokenText);
+ if (!Delimiter)
+ return None;
+ auto RawStringStyle = RawStringFormats.get(*Delimiter);
+ if (!RawStringStyle)
+ return None;
+ RawStringStyle->ColumnLimit = getColumnLimit(State);
+ return RawStringStyle;
+}
- std::unique_ptr<BreakableToken> Token;
+std::unique_ptr<BreakableToken> ContinuationIndenter::createBreakableToken(
+ const FormatToken &Current, LineState &State, bool AllowBreak) {
unsigned StartColumn = State.Column - Current.ColumnWidth;
- unsigned ColumnLimit = getColumnLimit(State);
-
if (Current.isStringLiteral()) {
// FIXME: String literal breaking is currently disabled for Java and JS, as
// it requires strings to be merged using "+" which we don't support.
if (Style.Language == FormatStyle::LK_Java ||
Style.Language == FormatStyle::LK_JavaScript ||
- !Style.BreakStringLiterals)
- return 0;
+ !Style.BreakStringLiterals ||
+ !AllowBreak)
+ return nullptr;
// Don't break string literals inside preprocessor directives (except for
// #define directives, as their contents are stored in separate lines and
@@ -1253,11 +1463,11 @@ unsigned ContinuationIndenter::breakProtrudingToken(const FormatToken &Current,
// This way we avoid breaking code with line directives and unknown
// preprocessor directives that contain long string literals.
if (State.Line->Type == LT_PreprocessorDirective)
- return 0;
+ return nullptr;
// Exempts unterminated string literals from line breaking. The user will
// likely want to terminate the string before any line breaking is done.
if (Current.IsUnterminatedLiteral)
- return 0;
+ return nullptr;
StringRef Text = Current.TokenText;
StringRef Prefix;
@@ -1272,114 +1482,359 @@ unsigned ContinuationIndenter::breakProtrudingToken(const FormatToken &Current,
Text.startswith(Prefix = "u8\"") ||
Text.startswith(Prefix = "L\""))) ||
(Text.startswith(Prefix = "_T(\"") && Text.endswith(Postfix = "\")"))) {
- Token.reset(new BreakableStringLiteral(Current, StartColumn, Prefix,
- Postfix, State.Line->InPPDirective,
- Encoding, Style));
- } else {
- return 0;
+ return llvm::make_unique<BreakableStringLiteral>(
+ Current, StartColumn, Prefix, Postfix, State.Line->InPPDirective,
+ Encoding, Style);
}
} else if (Current.is(TT_BlockComment)) {
- if (!Current.isTrailingComment() || !Style.ReflowComments ||
+ if (!Style.ReflowComments ||
// If a comment token switches formatting, like
// /* clang-format on */, we don't want to break it further,
// but we may still want to adjust its indentation.
- switchesFormatting(Current))
- return addMultilineToken(Current, State);
- Token.reset(new BreakableBlockComment(
+ switchesFormatting(Current)) {
+ return nullptr;
+ }
+ return llvm::make_unique<BreakableBlockComment>(
Current, StartColumn, Current.OriginalColumn, !Current.Previous,
- State.Line->InPPDirective, Encoding, Style));
+ State.Line->InPPDirective, Encoding, Style);
} else if (Current.is(TT_LineComment) &&
(Current.Previous == nullptr ||
Current.Previous->isNot(TT_ImplicitStringLiteral))) {
if (!Style.ReflowComments ||
CommentPragmasRegex.match(Current.TokenText.substr(2)) ||
switchesFormatting(Current))
- return 0;
- Token.reset(new BreakableLineCommentSection(
+ return nullptr;
+ return llvm::make_unique<BreakableLineCommentSection>(
Current, StartColumn, Current.OriginalColumn, !Current.Previous,
- /*InPPDirective=*/false, Encoding, Style));
+ /*InPPDirective=*/false, Encoding, Style);
+ }
+ return nullptr;
+}
+
+std::pair<unsigned, bool>
+ContinuationIndenter::breakProtrudingToken(const FormatToken &Current,
+ LineState &State, bool AllowBreak,
+ bool DryRun, bool Strict) {
+ std::unique_ptr<const BreakableToken> Token =
+ createBreakableToken(Current, State, AllowBreak);
+ if (!Token)
+ return {0, false};
+ assert(Token->getLineCount() > 0);
+ unsigned ColumnLimit = getColumnLimit(State);
+ if (Current.is(TT_LineComment)) {
// We don't insert backslashes when breaking line comments.
ColumnLimit = Style.ColumnLimit;
- } else {
- return 0;
}
if (Current.UnbreakableTailLength >= ColumnLimit)
- return 0;
-
- unsigned RemainingSpace = ColumnLimit - Current.UnbreakableTailLength;
- bool BreakInserted = false;
+ return {0, false};
+ // ColumnWidth was already accounted into State.Column before calling
+ // breakProtrudingToken.
+ unsigned StartColumn = State.Column - Current.ColumnWidth;
+ unsigned NewBreakPenalty = Current.isStringLiteral()
+ ? Style.PenaltyBreakString
+ : Style.PenaltyBreakComment;
+ // Stores whether we intentionally decide to let a line exceed the column
+ // limit.
+ bool Exceeded = false;
+ // Stores whether we introduce a break anywhere in the token.
+ bool BreakInserted = Token->introducesBreakBeforeToken();
+ // Store whether we inserted a new line break at the end of the previous
+ // logical line.
+ bool NewBreakBefore = false;
// We use a conservative reflowing strategy. Reflow starts after a line is
// broken or the corresponding whitespace compressed. Reflow ends as soon as a
// line that doesn't get reflown with the previous line is reached.
- bool ReflowInProgress = false;
+ bool Reflow = false;
+ // Keep track of where we are in the token:
+ // Where we are in the content of the current logical line.
+ unsigned TailOffset = 0;
+ // The column number we're currently at.
+ unsigned ContentStartColumn =
+ Token->getContentStartColumn(0, /*Break=*/false);
+ // The number of columns left in the current logical line after TailOffset.
+ unsigned RemainingTokenColumns =
+ Token->getRemainingLength(0, TailOffset, ContentStartColumn);
+ // Adapt the start of the token, for example indent.
+ if (!DryRun)
+ Token->adaptStartOfLine(0, Whitespaces);
+
unsigned Penalty = 0;
- unsigned RemainingTokenColumns = 0;
+ DEBUG(llvm::dbgs() << "Breaking protruding token at column " << StartColumn
+ << ".\n");
for (unsigned LineIndex = 0, EndIndex = Token->getLineCount();
LineIndex != EndIndex; ++LineIndex) {
- BreakableToken::Split SplitBefore(StringRef::npos, 0);
- if (ReflowInProgress) {
- SplitBefore = Token->getSplitBefore(LineIndex, RemainingTokenColumns,
- RemainingSpace, CommentPragmasRegex);
- }
- ReflowInProgress = SplitBefore.first != StringRef::npos;
- unsigned TailOffset =
- ReflowInProgress ? (SplitBefore.first + SplitBefore.second) : 0;
- if (!DryRun)
- Token->replaceWhitespaceBefore(LineIndex, RemainingTokenColumns,
- RemainingSpace, SplitBefore, Whitespaces);
- RemainingTokenColumns = Token->getLineLengthAfterSplitBefore(
- LineIndex, TailOffset, RemainingTokenColumns, ColumnLimit, SplitBefore);
- while (RemainingTokenColumns > RemainingSpace) {
- BreakableToken::Split Split = Token->getSplit(
- LineIndex, TailOffset, ColumnLimit, CommentPragmasRegex);
+ DEBUG(llvm::dbgs() << " Line: " << LineIndex << " (Reflow: " << Reflow
+ << ")\n");
+ NewBreakBefore = false;
+ // If we did reflow the previous line, we'll try reflowing again. Otherwise
+ // we'll start reflowing if the current line is broken or whitespace is
+ // compressed.
+ bool TryReflow = Reflow;
+ // Break the current token until we can fit the rest of the line.
+ while (ContentStartColumn + RemainingTokenColumns > ColumnLimit) {
+ DEBUG(llvm::dbgs() << " Over limit, need: "
+ << (ContentStartColumn + RemainingTokenColumns)
+ << ", space: " << ColumnLimit
+ << ", reflown prefix: " << ContentStartColumn
+ << ", offset in line: " << TailOffset << "\n");
+ // If the current token doesn't fit, find the latest possible split in the
+ // current line so that breaking at it will be under the column limit.
+ // FIXME: Use the earliest possible split while reflowing to correctly
+ // compress whitespace within a line.
+ BreakableToken::Split Split =
+ Token->getSplit(LineIndex, TailOffset, ColumnLimit,
+ ContentStartColumn, CommentPragmasRegex);
if (Split.first == StringRef::npos) {
- // The last line's penalty is handled in addNextStateToQueue().
+ // No break opportunity - update the penalty and continue with the next
+ // logical line.
if (LineIndex < EndIndex - 1)
+ // The last line's penalty is handled in addNextStateToQueue().
Penalty += Style.PenaltyExcessCharacter *
- (RemainingTokenColumns - RemainingSpace);
+ (ContentStartColumn + RemainingTokenColumns - ColumnLimit);
+ DEBUG(llvm::dbgs() << " No break opportunity.\n");
break;
}
assert(Split.first != 0);
- // Check if compressing the whitespace range will bring the line length
- // under the limit. If that is the case, we perform whitespace compression
- // instead of inserting a line break.
- unsigned RemainingTokenColumnsAfterCompression =
- Token->getLineLengthAfterCompression(RemainingTokenColumns, Split);
- if (RemainingTokenColumnsAfterCompression <= RemainingSpace) {
- RemainingTokenColumns = RemainingTokenColumnsAfterCompression;
- ReflowInProgress = true;
- if (!DryRun)
- Token->compressWhitespace(LineIndex, TailOffset, Split, Whitespaces);
- break;
+ if (Token->supportsReflow()) {
+ // Check whether the next natural split point after the current one can
+ // still fit the line, either because we can compress away whitespace,
+ // or because the penalty the excess characters introduce is lower than
+ // the break penalty.
+ // We only do this for tokens that support reflowing, and thus allow us
+ // to change the whitespace arbitrarily (e.g. comments).
+ // Other tokens, like string literals, can be broken on arbitrary
+ // positions.
+
+ // First, compute the columns from TailOffset to the next possible split
+ // position.
+ // For example:
+ // ColumnLimit: |
+ // // Some text that breaks
+ // ^ tail offset
+ // ^-- split
+ // ^-------- to split columns
+ // ^--- next split
+ // ^--------------- to next split columns
+ unsigned ToSplitColumns = Token->getRangeLength(
+ LineIndex, TailOffset, Split.first, ContentStartColumn);
+ DEBUG(llvm::dbgs() << " ToSplit: " << ToSplitColumns << "\n");
+
+ BreakableToken::Split NextSplit = Token->getSplit(
+ LineIndex, TailOffset + Split.first + Split.second, ColumnLimit,
+ ContentStartColumn + ToSplitColumns + 1, CommentPragmasRegex);
+ // Compute the columns necessary to fit the next non-breakable sequence
+ // into the current line.
+ unsigned ToNextSplitColumns = 0;
+ if (NextSplit.first == StringRef::npos) {
+ ToNextSplitColumns = Token->getRemainingLength(LineIndex, TailOffset,
+ ContentStartColumn);
+ } else {
+ ToNextSplitColumns = Token->getRangeLength(
+ LineIndex, TailOffset,
+ Split.first + Split.second + NextSplit.first, ContentStartColumn);
+ }
+ // Compress the whitespace between the break and the start of the next
+ // unbreakable sequence.
+ ToNextSplitColumns =
+ Token->getLengthAfterCompression(ToNextSplitColumns, Split);
+ DEBUG(llvm::dbgs() << " ContentStartColumn: " << ContentStartColumn
+ << "\n");
+ DEBUG(llvm::dbgs() << " ToNextSplit: " << ToNextSplitColumns << "\n");
+ // If the whitespace compression makes us fit, continue on the current
+ // line.
+ bool ContinueOnLine =
+ ContentStartColumn + ToNextSplitColumns <= ColumnLimit;
+ unsigned ExcessCharactersPenalty = 0;
+ if (!ContinueOnLine && !Strict) {
+ // Similarly, if the excess characters' penalty is lower than the
+ // penalty of introducing a new break, continue on the current line.
+ ExcessCharactersPenalty =
+ (ContentStartColumn + ToNextSplitColumns - ColumnLimit) *
+ Style.PenaltyExcessCharacter;
+ DEBUG(llvm::dbgs()
+ << " Penalty excess: " << ExcessCharactersPenalty
+ << "\n break : " << NewBreakPenalty << "\n");
+ if (ExcessCharactersPenalty < NewBreakPenalty) {
+ Exceeded = true;
+ ContinueOnLine = true;
+ }
+ }
+ if (ContinueOnLine) {
+ DEBUG(llvm::dbgs() << " Continuing on line...\n");
+ // The current line fits after compressing the whitespace - reflow
+ // the next line into it if possible.
+ TryReflow = true;
+ if (!DryRun)
+ Token->compressWhitespace(LineIndex, TailOffset, Split,
+ Whitespaces);
+ // When we continue on the same line, leave one space between content.
+ ContentStartColumn += ToSplitColumns + 1;
+ Penalty += ExcessCharactersPenalty;
+ TailOffset += Split.first + Split.second;
+ RemainingTokenColumns = Token->getRemainingLength(
+ LineIndex, TailOffset, ContentStartColumn);
+ continue;
+ }
}
-
- unsigned NewRemainingTokenColumns = Token->getLineLengthAfterSplit(
- LineIndex, TailOffset + Split.first + Split.second, StringRef::npos);
+ DEBUG(llvm::dbgs() << " Breaking...\n");
+ ContentStartColumn =
+ Token->getContentStartColumn(LineIndex, /*Break=*/true);
+ unsigned NewRemainingTokenColumns = Token->getRemainingLength(
+ LineIndex, TailOffset + Split.first + Split.second,
+ ContentStartColumn);
// When breaking before a tab character, it may be moved by a few columns,
// but will still be expanded to the next tab stop, so we don't save any
// columns.
- if (NewRemainingTokenColumns == RemainingTokenColumns)
+ if (NewRemainingTokenColumns == RemainingTokenColumns) {
+ // FIXME: Do we need to adjust the penalty?
break;
-
+ }
assert(NewRemainingTokenColumns < RemainingTokenColumns);
+
+ DEBUG(llvm::dbgs() << " Breaking at: " << TailOffset + Split.first
+ << ", " << Split.second << "\n");
if (!DryRun)
Token->insertBreak(LineIndex, TailOffset, Split, Whitespaces);
- Penalty += Current.SplitPenalty;
- unsigned ColumnsUsed =
- Token->getLineLengthAfterSplit(LineIndex, TailOffset, Split.first);
- if (ColumnsUsed > ColumnLimit) {
- Penalty += Style.PenaltyExcessCharacter * (ColumnsUsed - ColumnLimit);
- }
+
+ Penalty += NewBreakPenalty;
TailOffset += Split.first + Split.second;
RemainingTokenColumns = NewRemainingTokenColumns;
- ReflowInProgress = true;
BreakInserted = true;
+ NewBreakBefore = true;
}
+ // In case there's another line, prepare the state for the start of the next
+ // line.
+ if (LineIndex + 1 != EndIndex) {
+ unsigned NextLineIndex = LineIndex + 1;
+ if (NewBreakBefore)
+ // After breaking a line, try to reflow the next line into the current
+ // one once RemainingTokenColumns fits.
+ TryReflow = true;
+ if (TryReflow) {
+ // We decided that we want to try reflowing the next line into the
+ // current one.
+ // We will now adjust the state as if the reflow is successful (in
+ // preparation for the next line), and see whether that works. If we
+ // decide that we cannot reflow, we will later reset the state to the
+ // start of the next line.
+ Reflow = false;
+ // As we did not continue breaking the line, RemainingTokenColumns is
+ // known to fit after ContentStartColumn. Adapt ContentStartColumn to
+ // the position at which we want to format the next line if we do
+ // actually reflow.
+ // When we reflow, we need to add a space between the end of the current
+ // line and the next line's start column.
+ ContentStartColumn += RemainingTokenColumns + 1;
+ // Get the split that we need to reflow next logical line into the end
+ // of the current one; the split will include any leading whitespace of
+ // the next logical line.
+ BreakableToken::Split SplitBeforeNext =
+ Token->getReflowSplit(NextLineIndex, CommentPragmasRegex);
+ DEBUG(llvm::dbgs() << " Size of reflown text: " << ContentStartColumn
+ << "\n Potential reflow split: ");
+ if (SplitBeforeNext.first != StringRef::npos) {
+ DEBUG(llvm::dbgs() << SplitBeforeNext.first << ", "
+ << SplitBeforeNext.second << "\n");
+ TailOffset = SplitBeforeNext.first + SplitBeforeNext.second;
+ // If the rest of the next line fits into the current line below the
+ // column limit, we can safely reflow.
+ RemainingTokenColumns = Token->getRemainingLength(
+ NextLineIndex, TailOffset, ContentStartColumn);
+ Reflow = true;
+ if (ContentStartColumn + RemainingTokenColumns > ColumnLimit) {
+ DEBUG(llvm::dbgs() << " Over limit after reflow, need: "
+ << (ContentStartColumn + RemainingTokenColumns)
+ << ", space: " << ColumnLimit
+ << ", reflown prefix: " << ContentStartColumn
+ << ", offset in line: " << TailOffset << "\n");
+ // If the whole next line does not fit, try to find a point in
+ // the next line at which we can break so that attaching the part
+ // of the next line to that break point onto the current line is
+ // below the column limit.
+ BreakableToken::Split Split =
+ Token->getSplit(NextLineIndex, TailOffset, ColumnLimit,
+ ContentStartColumn, CommentPragmasRegex);
+ if (Split.first == StringRef::npos) {
+ DEBUG(llvm::dbgs() << " Did not find later break\n");
+ Reflow = false;
+ } else {
+ // Check whether the first split point gets us below the column
+ // limit. Note that we will execute this split below as part of
+ // the normal token breaking and reflow logic within the line.
+ unsigned ToSplitColumns = Token->getRangeLength(
+ NextLineIndex, TailOffset, Split.first, ContentStartColumn);
+ if (ContentStartColumn + ToSplitColumns > ColumnLimit) {
+ DEBUG(llvm::dbgs() << " Next split protrudes, need: "
+ << (ContentStartColumn + ToSplitColumns)
+ << ", space: " << ColumnLimit);
+ unsigned ExcessCharactersPenalty =
+ (ContentStartColumn + ToSplitColumns - ColumnLimit) *
+ Style.PenaltyExcessCharacter;
+ if (NewBreakPenalty < ExcessCharactersPenalty) {
+ Reflow = false;
+ }
+ }
+ }
+ }
+ } else {
+ DEBUG(llvm::dbgs() << "not found.\n");
+ }
+ }
+ if (!Reflow) {
+ // If we didn't reflow into the next line, the only space to consider is
+ // the next logical line. Reset our state to match the start of the next
+ // line.
+ TailOffset = 0;
+ ContentStartColumn =
+ Token->getContentStartColumn(NextLineIndex, /*Break=*/false);
+ RemainingTokenColumns = Token->getRemainingLength(
+ NextLineIndex, TailOffset, ContentStartColumn);
+ // Adapt the start of the token, for example indent.
+ if (!DryRun)
+ Token->adaptStartOfLine(NextLineIndex, Whitespaces);
+ } else {
+ // If we found a reflow split and have added a new break before the next
+ // line, we are going to remove the line break at the start of the next
+ // logical line. For example, here we'll add a new line break after
+ // 'text', and subsequently delete the line break between 'that' and
+ // 'reflows'.
+ // // some text that
+ // // reflows
+ // ->
+ // // some text
+ // // that reflows
+ // When adding the line break, we also added the penalty for it, so we
+ // need to subtract that penalty again when we remove the line break due
+ // to reflowing.
+ if (NewBreakBefore) {
+ assert(Penalty >= NewBreakPenalty);
+ Penalty -= NewBreakPenalty;
+ }
+ if (!DryRun)
+ Token->reflow(NextLineIndex, Whitespaces);
+ }
+ }
+ }
+
+ BreakableToken::Split SplitAfterLastLine =
+ Token->getSplitAfterLastLine(TailOffset);
+ if (SplitAfterLastLine.first != StringRef::npos) {
+ DEBUG(llvm::dbgs() << "Replacing whitespace after last line.\n");
+ if (!DryRun)
+ Token->replaceWhitespaceAfterLastLine(TailOffset, SplitAfterLastLine,
+ Whitespaces);
+ ContentStartColumn =
+ Token->getContentStartColumn(Token->getLineCount() - 1, /*Break=*/true);
+ RemainingTokenColumns = Token->getRemainingLength(
+ Token->getLineCount() - 1,
+ TailOffset + SplitAfterLastLine.first + SplitAfterLastLine.second,
+ ContentStartColumn);
}
- State.Column = RemainingTokenColumns;
+ State.Column = ContentStartColumn + RemainingTokenColumns -
+ Current.UnbreakableTailLength;
if (BreakInserted) {
// If we break the token inside a parameter list, we need to break before
@@ -1390,15 +1845,15 @@ unsigned ContinuationIndenter::breakProtrudingToken(const FormatToken &Current,
State.Stack[i].BreakBeforeParameter = true;
}
- Penalty += Current.isStringLiteral() ? Style.PenaltyBreakString
- : Style.PenaltyBreakComment;
+ if (Current.is(TT_BlockComment))
+ State.NoContinuation = true;
State.Stack.back().LastSpace = StartColumn;
}
Token->updateNextToken(State);
- return Penalty;
+ return {Penalty, Exceeded};
}
unsigned ContinuationIndenter::getColumnLimit(const LineState &State) const {
diff --git a/lib/Format/ContinuationIndenter.h b/lib/Format/ContinuationIndenter.h
index 9a06aa6f6267..ded7bfab4267 100644
--- a/lib/Format/ContinuationIndenter.h
+++ b/lib/Format/ContinuationIndenter.h
@@ -20,6 +20,8 @@
#include "FormatToken.h"
#include "clang/Format/Format.h"
#include "llvm/Support/Regex.h"
+#include <map>
+#include <tuple>
namespace clang {
class SourceManager;
@@ -27,11 +29,21 @@ class SourceManager;
namespace format {
class AnnotatedLine;
+class BreakableToken;
struct FormatToken;
struct LineState;
struct ParenState;
+struct RawStringFormatStyleManager;
class WhitespaceManager;
+struct RawStringFormatStyleManager {
+ llvm::StringMap<FormatStyle> DelimiterStyle;
+
+ RawStringFormatStyleManager(const FormatStyle &CodeStyle);
+
+ llvm::Optional<FormatStyle> get(StringRef Delimiter) const;
+};
+
class ContinuationIndenter {
public:
/// \brief Constructs a \c ContinuationIndenter to format \p Line starting in
@@ -44,9 +56,11 @@ public:
bool BinPackInconclusiveFunctions);
/// \brief Get the initial state, i.e. the state after placing \p Line's
- /// first token at \p FirstIndent.
- LineState getInitialState(unsigned FirstIndent, const AnnotatedLine *Line,
- bool DryRun);
+ /// first token at \p FirstIndent. When reformatting a fragment of code, as in
+ /// the case of formatting inside raw string literals, \p FirstStartColumn is
+ /// the column at which the state of the parent formatter is.
+ LineState getInitialState(unsigned FirstIndent, unsigned FirstStartColumn,
+ const AnnotatedLine *Line, bool DryRun);
// FIXME: canBreak and mustBreak aren't strictly indentation-related. Find a
// better home.
@@ -88,17 +102,52 @@ private:
/// \brief Update 'State' with the next token opening a nested block.
void moveStateToNewBlock(LineState &State);
+ /// \brief Reformats a raw string literal.
+ ///
+ /// \returns An extra penalty induced by reformatting the token.
+ unsigned reformatRawStringLiteral(const FormatToken &Current,
+ LineState &State,
+ const FormatStyle &RawStringStyle,
+ bool DryRun);
+
+ /// \brief If the current token is at the end of the current line, handle
+ /// the transition to the next line.
+ unsigned handleEndOfLine(const FormatToken &Current, LineState &State,
+ bool DryRun, bool AllowBreak);
+
+ /// \brief If \p Current is a raw string that is configured to be reformatted,
+ /// return the style to be used.
+ llvm::Optional<FormatStyle> getRawStringStyle(const FormatToken &Current,
+ const LineState &State);
+
/// \brief If the current token sticks out over the end of the line, break
/// it if possible.
///
- /// \returns An extra penalty if a token was broken, otherwise 0.
+ /// \returns A pair (penalty, exceeded), where penalty is the extra penalty
+ /// when tokens are broken or lines exceed the column limit, and exceeded
+ /// indicates whether the algorithm purposefully left lines exceeding the
+ /// column limit.
///
- /// The returned penalty will cover the cost of the additional line breaks and
- /// column limit violation in all lines except for the last one. The penalty
- /// for the column limit violation in the last line (and in single line
- /// tokens) is handled in \c addNextStateToQueue.
- unsigned breakProtrudingToken(const FormatToken &Current, LineState &State,
- bool DryRun);
+ /// The returned penalty will cover the cost of the additional line breaks
+ /// and column limit violation in all lines except for the last one. The
+ /// penalty for the column limit violation in the last line (and in single
+ /// line tokens) is handled in \c addNextStateToQueue.
+ ///
+ /// \p Strict indicates whether reflowing is allowed to leave characters
+ /// protruding the column limit; if true, lines will be split strictly within
+ /// the column limit where possible; if false, words are allowed to protrude
+ /// over the column limit as long as the penalty is less than the penalty
+ /// of a break.
+ std::pair<unsigned, bool> breakProtrudingToken(const FormatToken &Current,
+ LineState &State,
+ bool AllowBreak, bool DryRun,
+ bool Strict);
+
+ /// \brief Returns the \c BreakableToken starting at \p Current, or nullptr
+ /// if the current token cannot be broken.
+ std::unique_ptr<BreakableToken>
+ createBreakableToken(const FormatToken &Current, LineState &State,
+ bool AllowBreak);
/// \brief Appends the next token to \p State and updates information
/// necessary for indentation.
@@ -143,6 +192,7 @@ private:
encoding::Encoding Encoding;
bool BinPackInconclusiveFunctions;
llvm::Regex CommentPragmasRegex;
+ const RawStringFormatStyleManager RawStringFormats;
};
struct ParenState {
@@ -318,6 +368,9 @@ struct LineState {
/// \brief \c true if this line contains a continued for-loop section.
bool LineContainsContinuedForLoopSection;
+ /// \brief \c true if \p NextToken should not continue this line.
+ bool NoContinuation;
+
/// \brief The \c NestingLevel at the start of this line.
unsigned StartOfLineLevel;
@@ -364,6 +417,8 @@ struct LineState {
if (LineContainsContinuedForLoopSection !=
Other.LineContainsContinuedForLoopSection)
return LineContainsContinuedForLoopSection;
+ if (NoContinuation != Other.NoContinuation)
+ return NoContinuation;
if (StartOfLineLevel != Other.StartOfLineLevel)
return StartOfLineLevel < Other.StartOfLineLevel;
if (LowestLevelOnLine != Other.LowestLevelOnLine)
diff --git a/lib/Format/Format.cpp b/lib/Format/Format.cpp
index 6fe5be2c815d..217c6729ee39 100644
--- a/lib/Format/Format.cpp
+++ b/lib/Format/Format.cpp
@@ -16,6 +16,7 @@
#include "clang/Format/Format.h"
#include "AffectedRangeManager.h"
#include "ContinuationIndenter.h"
+#include "FormatInternal.h"
#include "FormatTokenLexer.h"
#include "NamespaceEndCommentsFixer.h"
#include "SortJavaScriptImports.h"
@@ -45,6 +46,7 @@
using clang::format::FormatStyle;
LLVM_YAML_IS_SEQUENCE_VECTOR(clang::format::FormatStyle::IncludeCategory)
+LLVM_YAML_IS_SEQUENCE_VECTOR(clang::format::FormatStyle::RawStringFormat)
namespace llvm {
namespace yaml {
@@ -125,8 +127,10 @@ template <> struct ScalarEnumerationTraits<FormatStyle::BraceBreakingStyle> {
}
};
-template <> struct ScalarEnumerationTraits<FormatStyle::BreakConstructorInitializersStyle> {
- static void enumeration(IO &IO, FormatStyle::BreakConstructorInitializersStyle &Value) {
+template <>
+struct ScalarEnumerationTraits<FormatStyle::BreakConstructorInitializersStyle> {
+ static void
+ enumeration(IO &IO, FormatStyle::BreakConstructorInitializersStyle &Value) {
IO.enumCase(Value, "BeforeColon", FormatStyle::BCIS_BeforeColon);
IO.enumCase(Value, "BeforeComma", FormatStyle::BCIS_BeforeComma);
IO.enumCase(Value, "AfterColon", FormatStyle::BCIS_AfterColon);
@@ -134,6 +138,14 @@ template <> struct ScalarEnumerationTraits<FormatStyle::BreakConstructorInitiali
};
template <>
+struct ScalarEnumerationTraits<FormatStyle::PPDirectiveIndentStyle> {
+ static void enumeration(IO &IO, FormatStyle::PPDirectiveIndentStyle &Value) {
+ IO.enumCase(Value, "None", FormatStyle::PPDIS_None);
+ IO.enumCase(Value, "AfterHash", FormatStyle::PPDIS_AfterHash);
+ }
+};
+
+template <>
struct ScalarEnumerationTraits<FormatStyle::ReturnTypeBreakingStyle> {
static void enumeration(IO &IO, FormatStyle::ReturnTypeBreakingStyle &Value) {
IO.enumCase(Value, "None", FormatStyle::RTBS_None);
@@ -181,8 +193,10 @@ template <> struct ScalarEnumerationTraits<FormatStyle::BracketAlignmentStyle> {
}
};
-template <> struct ScalarEnumerationTraits<FormatStyle::EscapedNewlineAlignmentStyle> {
- static void enumeration(IO &IO, FormatStyle::EscapedNewlineAlignmentStyle &Value) {
+template <>
+struct ScalarEnumerationTraits<FormatStyle::EscapedNewlineAlignmentStyle> {
+ static void enumeration(IO &IO,
+ FormatStyle::EscapedNewlineAlignmentStyle &Value) {
IO.enumCase(Value, "DontAlign", FormatStyle::ENAS_DontAlign);
IO.enumCase(Value, "Left", FormatStyle::ENAS_Left);
IO.enumCase(Value, "Right", FormatStyle::ENAS_Right);
@@ -347,9 +361,11 @@ template <> struct MappingTraits<FormatStyle> {
Style.ExperimentalAutoDetectBinPacking);
IO.mapOptional("FixNamespaceComments", Style.FixNamespaceComments);
IO.mapOptional("ForEachMacros", Style.ForEachMacros);
+ IO.mapOptional("IncludeBlocks", Style.IncludeBlocks);
IO.mapOptional("IncludeCategories", Style.IncludeCategories);
IO.mapOptional("IncludeIsMainRegex", Style.IncludeIsMainRegex);
IO.mapOptional("IndentCaseLabels", Style.IndentCaseLabels);
+ IO.mapOptional("IndentPPDirectives", Style.IndentPPDirectives);
IO.mapOptional("IndentWidth", Style.IndentWidth);
IO.mapOptional("IndentWrappedFunctionNames",
Style.IndentWrappedFunctionNames);
@@ -365,8 +381,7 @@ template <> struct MappingTraits<FormatStyle> {
IO.mapOptional("ObjCSpaceAfterProperty", Style.ObjCSpaceAfterProperty);
IO.mapOptional("ObjCSpaceBeforeProtocolList",
Style.ObjCSpaceBeforeProtocolList);
- IO.mapOptional("PenaltyBreakAssignment",
- Style.PenaltyBreakAssignment);
+ IO.mapOptional("PenaltyBreakAssignment", Style.PenaltyBreakAssignment);
IO.mapOptional("PenaltyBreakBeforeFirstCallParameter",
Style.PenaltyBreakBeforeFirstCallParameter);
IO.mapOptional("PenaltyBreakComment", Style.PenaltyBreakComment);
@@ -377,11 +392,13 @@ template <> struct MappingTraits<FormatStyle> {
IO.mapOptional("PenaltyReturnTypeOnItsOwnLine",
Style.PenaltyReturnTypeOnItsOwnLine);
IO.mapOptional("PointerAlignment", Style.PointerAlignment);
+ IO.mapOptional("RawStringFormats", Style.RawStringFormats);
IO.mapOptional("ReflowComments", Style.ReflowComments);
IO.mapOptional("SortIncludes", Style.SortIncludes);
IO.mapOptional("SortUsingDeclarations", Style.SortUsingDeclarations);
IO.mapOptional("SpaceAfterCStyleCast", Style.SpaceAfterCStyleCast);
- IO.mapOptional("SpaceAfterTemplateKeyword", Style.SpaceAfterTemplateKeyword);
+ IO.mapOptional("SpaceAfterTemplateKeyword",
+ Style.SpaceAfterTemplateKeyword);
IO.mapOptional("SpaceBeforeAssignmentOperators",
Style.SpaceBeforeAssignmentOperators);
IO.mapOptional("SpaceBeforeParens", Style.SpaceBeforeParens);
@@ -411,6 +428,7 @@ template <> struct MappingTraits<FormatStyle::BraceWrappingFlags> {
IO.mapOptional("AfterObjCDeclaration", Wrapping.AfterObjCDeclaration);
IO.mapOptional("AfterStruct", Wrapping.AfterStruct);
IO.mapOptional("AfterUnion", Wrapping.AfterUnion);
+ IO.mapOptional("AfterExternBlock", Wrapping.AfterExternBlock);
IO.mapOptional("BeforeCatch", Wrapping.BeforeCatch);
IO.mapOptional("BeforeElse", Wrapping.BeforeElse);
IO.mapOptional("IndentBraces", Wrapping.IndentBraces);
@@ -427,6 +445,22 @@ template <> struct MappingTraits<FormatStyle::IncludeCategory> {
}
};
+template <> struct ScalarEnumerationTraits<FormatStyle::IncludeBlocksStyle> {
+ static void enumeration(IO &IO, FormatStyle::IncludeBlocksStyle &Value) {
+ IO.enumCase(Value, "Preserve", FormatStyle::IBS_Preserve);
+ IO.enumCase(Value, "Merge", FormatStyle::IBS_Merge);
+ IO.enumCase(Value, "Regroup", FormatStyle::IBS_Regroup);
+ }
+};
+
+template <> struct MappingTraits<FormatStyle::RawStringFormat> {
+ static void mapping(IO &IO, FormatStyle::RawStringFormat &Format) {
+ IO.mapOptional("Delimiter", Format.Delimiter);
+ IO.mapOptional("Language", Format.Language);
+ IO.mapOptional("BasedOnStyle", Format.BasedOnStyle);
+ }
+};
+
// Allows to read vector<FormatStyle> while keeping default values.
// IO.getContext() should contain a pointer to the FormatStyle structure, that
// will be used to get default values for missing keys.
@@ -441,7 +475,7 @@ template <> struct DocumentListTraits<std::vector<FormatStyle>> {
if (Index >= Seq.size()) {
assert(Index == Seq.size());
FormatStyle Template;
- if (Seq.size() > 0 && Seq[0].Language == FormatStyle::LK_None) {
+ if (!Seq.empty() && Seq[0].Language == FormatStyle::LK_None) {
Template = Seq[0];
} else {
Template = *((const FormatStyle *)IO.getContext());
@@ -491,9 +525,9 @@ static FormatStyle expandPresets(const FormatStyle &Style) {
if (Style.BreakBeforeBraces == FormatStyle::BS_Custom)
return Style;
FormatStyle Expanded = Style;
- Expanded.BraceWrapping = {false, false, false, false, false, false,
- false, false, false, false, false, true,
- true, true};
+ Expanded.BraceWrapping = {false, false, false, false, false,
+ false, false, false, false, false,
+ false, false, true, true, true};
switch (Style.BreakBeforeBraces) {
case FormatStyle::BS_Linux:
Expanded.BraceWrapping.AfterClass = true;
@@ -506,6 +540,7 @@ static FormatStyle expandPresets(const FormatStyle &Style) {
Expanded.BraceWrapping.AfterFunction = true;
Expanded.BraceWrapping.AfterStruct = true;
Expanded.BraceWrapping.AfterUnion = true;
+ Expanded.BraceWrapping.AfterExternBlock = true;
Expanded.BraceWrapping.SplitEmptyFunction = true;
Expanded.BraceWrapping.SplitEmptyRecord = false;
break;
@@ -522,13 +557,13 @@ static FormatStyle expandPresets(const FormatStyle &Style) {
Expanded.BraceWrapping.AfterNamespace = true;
Expanded.BraceWrapping.AfterObjCDeclaration = true;
Expanded.BraceWrapping.AfterStruct = true;
+ Expanded.BraceWrapping.AfterExternBlock = true;
Expanded.BraceWrapping.BeforeCatch = true;
Expanded.BraceWrapping.BeforeElse = true;
break;
case FormatStyle::BS_GNU:
- Expanded.BraceWrapping = {true, true, true, true, true, true,
- true, true, true, true, true, true,
- true, true};
+ Expanded.BraceWrapping = {true, true, true, true, true, true, true, true,
+ true, true, true, true, true, true, true};
break;
case FormatStyle::BS_WebKit:
Expanded.BraceWrapping.AfterFunction = true;
@@ -564,9 +599,9 @@ FormatStyle getLLVMStyle() {
LLVMStyle.BreakBeforeBinaryOperators = FormatStyle::BOS_None;
LLVMStyle.BreakBeforeTernaryOperators = true;
LLVMStyle.BreakBeforeBraces = FormatStyle::BS_Attach;
- LLVMStyle.BraceWrapping = {false, false, false, false, false, false,
- false, false, false, false, false, true,
- true, true};
+ LLVMStyle.BraceWrapping = {false, false, false, false, false,
+ false, false, false, false, false,
+ false, false, true, true, true};
LLVMStyle.BreakAfterJavaFieldAnnotations = false;
LLVMStyle.BreakConstructorInitializers = FormatStyle::BCIS_BeforeColon;
LLVMStyle.BreakBeforeInheritanceComma = false;
@@ -588,7 +623,9 @@ FormatStyle getLLVMStyle() {
{"^(<|\"(gtest|gmock|isl|json)/)", 3},
{".*", 1}};
LLVMStyle.IncludeIsMainRegex = "(Test)?$";
+ LLVMStyle.IncludeBlocks = FormatStyle::IBS_Preserve;
LLVMStyle.IndentCaseLabels = false;
+ LLVMStyle.IndentPPDirectives = FormatStyle::PPDIS_None;
LLVMStyle.IndentWrappedFunctionNames = false;
LLVMStyle.IndentWidth = 2;
LLVMStyle.JavaScriptQuotes = FormatStyle::JSQS_Leave;
@@ -604,6 +641,7 @@ FormatStyle getLLVMStyle() {
LLVMStyle.SpacesBeforeTrailingComments = 1;
LLVMStyle.Standard = FormatStyle::LS_Cpp11;
LLVMStyle.UseTab = FormatStyle::UT_Never;
+ LLVMStyle.RawStringFormats = {{"pb", FormatStyle::LK_TextProto, "google"}};
LLVMStyle.ReflowComments = true;
LLVMStyle.SpacesInParentheses = false;
LLVMStyle.SpacesInSquareBrackets = false;
@@ -649,7 +687,8 @@ FormatStyle getGoogleStyle(FormatStyle::LanguageKind Language) {
GoogleStyle.AlwaysBreakTemplateDeclarations = true;
GoogleStyle.ConstructorInitializerAllOnOneLineOrOnePerLine = true;
GoogleStyle.DerivePointerAlignment = true;
- GoogleStyle.IncludeCategories = {{"^<.*\\.h>", 1}, {"^<.*", 2}, {".*", 3}};
+ GoogleStyle.IncludeCategories = {
+ {"^<ext/.*\\.h>", 2}, {"^<.*\\.h>", 1}, {"^<.*", 2}, {".*", 3}};
GoogleStyle.IncludeIsMainRegex = "([-_](test|unittest))?$";
GoogleStyle.IndentCaseLabels = true;
GoogleStyle.KeepEmptyLinesAtTheStartOfBlocks = false;
@@ -725,8 +764,7 @@ FormatStyle getMozillaStyle() {
FormatStyle MozillaStyle = getLLVMStyle();
MozillaStyle.AllowAllParametersOfDeclarationOnNextLine = false;
MozillaStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_Inline;
- MozillaStyle.AlwaysBreakAfterReturnType =
- FormatStyle::RTBS_TopLevel;
+ MozillaStyle.AlwaysBreakAfterReturnType = FormatStyle::RTBS_TopLevel;
MozillaStyle.AlwaysBreakAfterDefinitionReturnType =
FormatStyle::DRTBS_TopLevel;
MozillaStyle.AlwaysBreakTemplateDeclarations = true;
@@ -879,7 +917,7 @@ public:
JavaScriptRequoter(const Environment &Env, const FormatStyle &Style)
: TokenAnalyzer(Env, Style) {}
- tooling::Replacements
+ std::pair<tooling::Replacements, unsigned>
analyze(TokenAnnotator &Annotator,
SmallVectorImpl<AnnotatedLine *> &AnnotatedLines,
FormatTokenLexer &Tokens) override {
@@ -887,7 +925,7 @@ public:
AnnotatedLines.end());
tooling::Replacements Result;
requoteJSStringLiteral(AnnotatedLines, Result);
- return Result;
+ return {Result, 0};
}
private:
@@ -968,7 +1006,7 @@ public:
FormattingAttemptStatus *Status)
: TokenAnalyzer(Env, Style), Status(Status) {}
- tooling::Replacements
+ std::pair<tooling::Replacements, unsigned>
analyze(TokenAnnotator &Annotator,
SmallVectorImpl<AnnotatedLine *> &AnnotatedLines,
FormatTokenLexer &Tokens) override {
@@ -987,17 +1025,23 @@ public:
ContinuationIndenter Indenter(Style, Tokens.getKeywords(),
Env.getSourceManager(), Whitespaces, Encoding,
BinPackInconclusiveFunctions);
- UnwrappedLineFormatter(&Indenter, &Whitespaces, Style, Tokens.getKeywords(),
- Env.getSourceManager(), Status)
- .format(AnnotatedLines);
+ unsigned Penalty =
+ UnwrappedLineFormatter(&Indenter, &Whitespaces, Style,
+ Tokens.getKeywords(), Env.getSourceManager(),
+ Status)
+ .format(AnnotatedLines, /*DryRun=*/false,
+ /*AdditionalIndent=*/0,
+ /*FixBadIndentation=*/false,
+ /*FirstStartColumn=*/Env.getFirstStartColumn(),
+ /*NextStartColumn=*/Env.getNextStartColumn(),
+ /*LastStartColumn=*/Env.getLastStartColumn());
for (const auto &R : Whitespaces.generateReplacements())
if (Result.add(R))
- return Result;
- return Result;
+ return std::make_pair(Result, 0);
+ return std::make_pair(Result, Penalty);
}
private:
-
static bool inputUsesCRLF(StringRef Text) {
return Text.count('\r') * 2 > Text.count('\n');
}
@@ -1082,7 +1126,7 @@ public:
DeletedTokens(FormatTokenLess(Env.getSourceManager())) {}
// FIXME: eliminate unused parameters.
- tooling::Replacements
+ std::pair<tooling::Replacements, unsigned>
analyze(TokenAnnotator &Annotator,
SmallVectorImpl<AnnotatedLine *> &AnnotatedLines,
FormatTokenLexer &Tokens) override {
@@ -1110,7 +1154,7 @@ public:
}
}
- return generateFixes();
+ return {generateFixes(), 0};
}
private:
@@ -1386,19 +1430,27 @@ static void sortCppIncludes(const FormatStyle &Style,
}),
Indices.end());
+ int CurrentCategory = Includes.front().Category;
+
// If the #includes are out of order, we generate a single replacement fixing
// the entire block. Otherwise, no replacement is generated.
if (Indices.size() == Includes.size() &&
- std::is_sorted(Indices.begin(), Indices.end()))
+ std::is_sorted(Indices.begin(), Indices.end()) &&
+ Style.IncludeBlocks == FormatStyle::IBS_Preserve)
return;
std::string result;
for (unsigned Index : Indices) {
- if (!result.empty())
+ if (!result.empty()) {
result += "\n";
+ if (Style.IncludeBlocks == FormatStyle::IBS_Regroup &&
+ CurrentCategory != Includes[Index].Category)
+ result += "\n";
+ }
result += Includes[Index].Text;
if (Cursor && CursorIndex == Index)
*Cursor = IncludesBeginOffset + result.size() - CursorToEOLOffset;
+ CurrentCategory = Includes[Index].Category;
}
auto Err = Replaces.add(tooling::Replacement(
@@ -1506,6 +1558,10 @@ tooling::Replacements sortCppIncludes(const FormatStyle &Style, StringRef Code,
else if (Trimmed == "// clang-format on")
FormattingOff = false;
+ const bool EmptyLineSkipped =
+ Trimmed.empty() && (Style.IncludeBlocks == FormatStyle::IBS_Merge ||
+ Style.IncludeBlocks == FormatStyle::IBS_Regroup);
+
if (!FormattingOff && !Line.endswith("\\")) {
if (IncludeRegex.match(Line, &Matches)) {
StringRef IncludeName = Matches[2];
@@ -1515,7 +1571,7 @@ tooling::Replacements sortCppIncludes(const FormatStyle &Style, StringRef Code,
if (Category == 0)
MainIncludeFound = true;
IncludesInBlock.push_back({IncludeName, Line, Prev, Category});
- } else if (!IncludesInBlock.empty()) {
+ } else if (!IncludesInBlock.empty() && !EmptyLineSkipped) {
sortCppIncludes(Style, IncludesInBlock, Ranges, FileName, Replaces,
Cursor);
IncludesInBlock.clear();
@@ -1539,12 +1595,16 @@ bool isMpegTS(StringRef Code) {
return Code.size() > 188 && Code[0] == 0x47 && Code[188] == 0x47;
}
+bool isLikelyXml(StringRef Code) { return Code.ltrim().startswith("<"); }
+
tooling::Replacements sortIncludes(const FormatStyle &Style, StringRef Code,
ArrayRef<tooling::Range> Ranges,
StringRef FileName, unsigned *Cursor) {
tooling::Replacements Replaces;
if (!Style.SortIncludes)
return Replaces;
+ if (isLikelyXml(Code))
+ return Replaces;
if (Style.Language == FormatStyle::LanguageKind::LK_JavaScript &&
isMpegTS(Code))
return Replaces;
@@ -1887,17 +1947,22 @@ cleanupAroundReplacements(StringRef Code, const tooling::Replacements &Replaces,
return processReplacements(Cleanup, Code, NewReplaces, Style);
}
-tooling::Replacements reformat(const FormatStyle &Style, StringRef Code,
- ArrayRef<tooling::Range> Ranges,
- StringRef FileName,
- FormattingAttemptStatus *Status) {
+namespace internal {
+std::pair<tooling::Replacements, unsigned>
+reformat(const FormatStyle &Style, StringRef Code,
+ ArrayRef<tooling::Range> Ranges, unsigned FirstStartColumn,
+ unsigned NextStartColumn, unsigned LastStartColumn, StringRef FileName,
+ FormattingAttemptStatus *Status) {
FormatStyle Expanded = expandPresets(Style);
if (Expanded.DisableFormat)
- return tooling::Replacements();
+ return {tooling::Replacements(), 0};
+ if (isLikelyXml(Code))
+ return {tooling::Replacements(), 0};
if (Expanded.Language == FormatStyle::LK_JavaScript && isMpegTS(Code))
- return tooling::Replacements();
+ return {tooling::Replacements(), 0};
- typedef std::function<tooling::Replacements(const Environment &)>
+ typedef std::function<std::pair<tooling::Replacements, unsigned>(
+ const Environment &)>
AnalyzerPass;
SmallVector<AnalyzerPass, 4> Passes;
@@ -1923,26 +1988,42 @@ tooling::Replacements reformat(const FormatStyle &Style, StringRef Code,
return Formatter(Env, Expanded, Status).process();
});
- std::unique_ptr<Environment> Env =
- Environment::CreateVirtualEnvironment(Code, FileName, Ranges);
+ std::unique_ptr<Environment> Env = Environment::CreateVirtualEnvironment(
+ Code, FileName, Ranges, FirstStartColumn, NextStartColumn,
+ LastStartColumn);
llvm::Optional<std::string> CurrentCode = None;
tooling::Replacements Fixes;
+ unsigned Penalty = 0;
for (size_t I = 0, E = Passes.size(); I < E; ++I) {
- tooling::Replacements PassFixes = Passes[I](*Env);
+ std::pair<tooling::Replacements, unsigned> PassFixes = Passes[I](*Env);
auto NewCode = applyAllReplacements(
- CurrentCode ? StringRef(*CurrentCode) : Code, PassFixes);
+ CurrentCode ? StringRef(*CurrentCode) : Code, PassFixes.first);
if (NewCode) {
- Fixes = Fixes.merge(PassFixes);
+ Fixes = Fixes.merge(PassFixes.first);
+ Penalty += PassFixes.second;
if (I + 1 < E) {
CurrentCode = std::move(*NewCode);
Env = Environment::CreateVirtualEnvironment(
*CurrentCode, FileName,
- tooling::calculateRangesAfterReplacements(Fixes, Ranges));
+ tooling::calculateRangesAfterReplacements(Fixes, Ranges),
+ FirstStartColumn, NextStartColumn, LastStartColumn);
}
}
}
- return Fixes;
+ return {Fixes, Penalty};
+}
+} // namespace internal
+
+tooling::Replacements reformat(const FormatStyle &Style, StringRef Code,
+ ArrayRef<tooling::Range> Ranges,
+ StringRef FileName,
+ FormattingAttemptStatus *Status) {
+ return internal::reformat(Style, Code, Ranges,
+ /*FirstStartColumn=*/0,
+ /*NextStartColumn=*/0,
+ /*LastStartColumn=*/0, FileName, Status)
+ .first;
}
tooling::Replacements cleanup(const FormatStyle &Style, StringRef Code,
@@ -1954,7 +2035,7 @@ tooling::Replacements cleanup(const FormatStyle &Style, StringRef Code,
std::unique_ptr<Environment> Env =
Environment::CreateVirtualEnvironment(Code, FileName, Ranges);
Cleaner Clean(*Env, Style);
- return Clean.process();
+ return Clean.process().first;
}
tooling::Replacements reformat(const FormatStyle &Style, StringRef Code,
@@ -1974,7 +2055,7 @@ tooling::Replacements fixNamespaceEndComments(const FormatStyle &Style,
std::unique_ptr<Environment> Env =
Environment::CreateVirtualEnvironment(Code, FileName, Ranges);
NamespaceEndCommentsFixer Fix(*Env, Style);
- return Fix.process();
+ return Fix.process().first;
}
tooling::Replacements sortUsingDeclarations(const FormatStyle &Style,
@@ -1984,7 +2065,7 @@ tooling::Replacements sortUsingDeclarations(const FormatStyle &Style,
std::unique_ptr<Environment> Env =
Environment::CreateVirtualEnvironment(Code, FileName, Ranges);
UsingDeclarationsSorter Sorter(*Env, Style);
- return Sorter.process();
+ return Sorter.process().first;
}
LangOptions getFormattingLangOpts(const FormatStyle &Style) {
@@ -1992,7 +2073,8 @@ LangOptions getFormattingLangOpts(const FormatStyle &Style) {
LangOpts.CPlusPlus = 1;
LangOpts.CPlusPlus11 = Style.Standard == FormatStyle::LS_Cpp03 ? 0 : 1;
LangOpts.CPlusPlus14 = Style.Standard == FormatStyle::LS_Cpp03 ? 0 : 1;
- LangOpts.CPlusPlus1z = Style.Standard == FormatStyle::LS_Cpp03 ? 0 : 1;
+ LangOpts.CPlusPlus17 = Style.Standard == FormatStyle::LS_Cpp03 ? 0 : 1;
+ LangOpts.CPlusPlus2a = Style.Standard == FormatStyle::LS_Cpp03 ? 0 : 1;
LangOpts.LineComment = 1;
bool AlternativeOperators = Style.isCpp();
LangOpts.CXXOperatorNames = AlternativeOperators ? 1 : 0;
@@ -2025,6 +2107,11 @@ static FormatStyle::LanguageKind getLanguageByFileName(StringRef FileName) {
if (FileName.endswith_lower(".proto") ||
FileName.endswith_lower(".protodevel"))
return FormatStyle::LK_Proto;
+ if (FileName.endswith_lower(".textpb") ||
+ FileName.endswith_lower(".pb.txt") ||
+ FileName.endswith_lower(".textproto") ||
+ FileName.endswith_lower(".asciipb"))
+ return FormatStyle::LK_TextProto;
if (FileName.endswith_lower(".td"))
return FormatStyle::LK_TableGen;
return FormatStyle::LK_Cpp;
@@ -2043,7 +2130,9 @@ llvm::Expected<FormatStyle> getStyle(StringRef StyleName, StringRef FileName,
// should be improved over time and probably be done on tokens, not one the
// bare content of the file.
if (Style.Language == FormatStyle::LK_Cpp && FileName.endswith(".h") &&
- (Code.contains("\n- (") || Code.contains("\n+ (")))
+ (Code.contains("\n- (") || Code.contains("\n+ (") ||
+ Code.contains("\n@end\n") || Code.contains("\n@end ") ||
+ Code.endswith("@end")))
Style.Language = FormatStyle::LK_ObjC;
FormatStyle FallbackStyle = getNoStyle();
diff --git a/lib/Format/FormatInternal.h b/lib/Format/FormatInternal.h
new file mode 100644
index 000000000000..3984158467b3
--- /dev/null
+++ b/lib/Format/FormatInternal.h
@@ -0,0 +1,83 @@
+//===--- FormatInternal.h - Format C++ code ---------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file declares Format APIs to be used internally by the
+/// formatting library implementation.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_LIB_FORMAT_FORMATINTERNAL_H
+#define LLVM_CLANG_LIB_FORMAT_FORMATINTERNAL_H
+
+#include "BreakableToken.h"
+#include "clang/Tooling/Core/Lookup.h"
+#include <utility>
+
+namespace clang {
+namespace format {
+namespace internal {
+
+/// \brief Reformats the given \p Ranges in the code fragment \p Code.
+///
+/// A fragment of code could conceptually be surrounded by other code that might
+/// constrain how that fragment is laid out.
+/// For example, consider the fragment of code between 'R"(' and ')"',
+/// exclusive, in the following code:
+///
+/// void outer(int x) {
+/// string inner = R"(name: data
+/// ^ FirstStartColumn
+/// value: {
+/// x: 1
+/// ^ NextStartColumn
+/// }
+/// )";
+/// ^ LastStartColumn
+/// }
+///
+/// The outer code can influence the inner fragment as follows:
+/// * \p FirstStartColumn specifies the column at which \p Code starts.
+/// * \p NextStartColumn specifies the additional indent dictated by the
+/// surrounding code. It is applied to the rest of the lines of \p Code.
+/// * \p LastStartColumn specifies the column at which the last line of
+/// \p Code should end, in case the last line is an empty line.
+///
+/// In the case where the last line of the fragment contains content,
+/// the fragment ends at the end of that content and \p LastStartColumn is
+/// not taken into account, for example in:
+///
+/// void block() {
+/// string inner = R"(name: value)";
+/// }
+///
+/// Each range is extended on either end to its next bigger logic unit, i.e.
+/// everything that might influence its formatting or might be influenced by its
+/// formatting.
+///
+/// Returns a pair P, where:
+/// * P.first are the ``Replacements`` necessary to make all \p Ranges comply
+/// with \p Style.
+/// * P.second is the penalty induced by formatting the fragment \p Code.
+/// If the formatting of the fragment doesn't have a notion of penalty,
+/// returns 0.
+///
+/// If ``Status`` is non-null, its value will be populated with the status of
+/// this formatting attempt. See \c FormattingAttemptStatus.
+std::pair<tooling::Replacements, unsigned>
+reformat(const FormatStyle &Style, StringRef Code,
+ ArrayRef<tooling::Range> Ranges, unsigned FirstStartColumn,
+ unsigned NextStartColumn, unsigned LastStartColumn, StringRef FileName,
+ FormattingAttemptStatus *Status);
+
+} // namespace internal
+} // namespace format
+} // namespace clang
+
+#endif
diff --git a/lib/Format/FormatToken.cpp b/lib/Format/FormatToken.cpp
index ba5bf03a6346..10ac392abbf2 100644
--- a/lib/Format/FormatToken.cpp
+++ b/lib/Format/FormatToken.cpp
@@ -25,10 +25,9 @@ namespace format {
const char *getTokenTypeName(TokenType Type) {
static const char *const TokNames[] = {
#define TYPE(X) #X,
-LIST_TOKEN_TYPES
+ LIST_TOKEN_TYPES
#undef TYPE
- nullptr
- };
+ nullptr};
if (Type < NUM_TOKEN_TYPES)
return TokNames[Type];
@@ -52,6 +51,7 @@ bool FormatToken::isSimpleTypeSpecifier() const {
case tok::kw_half:
case tok::kw_float:
case tok::kw_double:
+ case tok::kw__Float16:
case tok::kw___float128:
case tok::kw_wchar_t:
case tok::kw_bool:
diff --git a/lib/Format/FormatToken.h b/lib/Format/FormatToken.h
index a60361a8e5fa..3dc0ab0e7cca 100644
--- a/lib/Format/FormatToken.h
+++ b/lib/Format/FormatToken.h
@@ -26,78 +26,79 @@
namespace clang {
namespace format {
-#define LIST_TOKEN_TYPES \
- TYPE(ArrayInitializerLSquare) \
- TYPE(ArraySubscriptLSquare) \
- TYPE(AttributeParen) \
- TYPE(BinaryOperator) \
- TYPE(BitFieldColon) \
- TYPE(BlockComment) \
- TYPE(CastRParen) \
- TYPE(ConditionalExpr) \
- TYPE(ConflictAlternative) \
- TYPE(ConflictEnd) \
- TYPE(ConflictStart) \
- TYPE(CtorInitializerColon) \
- TYPE(CtorInitializerComma) \
- TYPE(DesignatedInitializerLSquare) \
- TYPE(DesignatedInitializerPeriod) \
- TYPE(DictLiteral) \
- TYPE(ForEachMacro) \
- TYPE(FunctionAnnotationRParen) \
- TYPE(FunctionDeclarationName) \
- TYPE(FunctionLBrace) \
- TYPE(FunctionTypeLParen) \
- TYPE(ImplicitStringLiteral) \
- TYPE(InheritanceColon) \
- TYPE(InheritanceComma) \
- TYPE(InlineASMBrace) \
- TYPE(InlineASMColon) \
- TYPE(JavaAnnotation) \
- TYPE(JsComputedPropertyName) \
- TYPE(JsExponentiation) \
- TYPE(JsExponentiationEqual) \
- TYPE(JsFatArrow) \
- TYPE(JsNonNullAssertion) \
- TYPE(JsTypeColon) \
- TYPE(JsTypeOperator) \
- TYPE(JsTypeOptionalQuestion) \
- TYPE(LambdaArrow) \
- TYPE(LambdaLSquare) \
- TYPE(LeadingJavaAnnotation) \
- TYPE(LineComment) \
- TYPE(MacroBlockBegin) \
- TYPE(MacroBlockEnd) \
- TYPE(ObjCBlockLBrace) \
- TYPE(ObjCBlockLParen) \
- TYPE(ObjCDecl) \
- TYPE(ObjCForIn) \
- TYPE(ObjCMethodExpr) \
- TYPE(ObjCMethodSpecifier) \
- TYPE(ObjCProperty) \
- TYPE(ObjCStringLiteral) \
- TYPE(OverloadedOperator) \
- TYPE(OverloadedOperatorLParen) \
- TYPE(PointerOrReference) \
- TYPE(PureVirtualSpecifier) \
- TYPE(RangeBasedForLoopColon) \
- TYPE(RegexLiteral) \
- TYPE(SelectorName) \
- TYPE(StartOfName) \
- TYPE(TemplateCloser) \
- TYPE(TemplateOpener) \
- TYPE(TemplateString) \
- TYPE(TrailingAnnotation) \
- TYPE(TrailingReturnArrow) \
- TYPE(TrailingUnaryOperator) \
- TYPE(UnaryOperator) \
+#define LIST_TOKEN_TYPES \
+ TYPE(ArrayInitializerLSquare) \
+ TYPE(ArraySubscriptLSquare) \
+ TYPE(AttributeParen) \
+ TYPE(BinaryOperator) \
+ TYPE(BitFieldColon) \
+ TYPE(BlockComment) \
+ TYPE(CastRParen) \
+ TYPE(ConditionalExpr) \
+ TYPE(ConflictAlternative) \
+ TYPE(ConflictEnd) \
+ TYPE(ConflictStart) \
+ TYPE(CtorInitializerColon) \
+ TYPE(CtorInitializerComma) \
+ TYPE(DesignatedInitializerLSquare) \
+ TYPE(DesignatedInitializerPeriod) \
+ TYPE(DictLiteral) \
+ TYPE(ForEachMacro) \
+ TYPE(FunctionAnnotationRParen) \
+ TYPE(FunctionDeclarationName) \
+ TYPE(FunctionLBrace) \
+ TYPE(FunctionTypeLParen) \
+ TYPE(ImplicitStringLiteral) \
+ TYPE(InheritanceColon) \
+ TYPE(InheritanceComma) \
+ TYPE(InlineASMBrace) \
+ TYPE(InlineASMColon) \
+ TYPE(JavaAnnotation) \
+ TYPE(JsComputedPropertyName) \
+ TYPE(JsExponentiation) \
+ TYPE(JsExponentiationEqual) \
+ TYPE(JsFatArrow) \
+ TYPE(JsNonNullAssertion) \
+ TYPE(JsTypeColon) \
+ TYPE(JsTypeOperator) \
+ TYPE(JsTypeOptionalQuestion) \
+ TYPE(LambdaArrow) \
+ TYPE(LambdaLSquare) \
+ TYPE(LeadingJavaAnnotation) \
+ TYPE(LineComment) \
+ TYPE(MacroBlockBegin) \
+ TYPE(MacroBlockEnd) \
+ TYPE(ObjCBlockLBrace) \
+ TYPE(ObjCBlockLParen) \
+ TYPE(ObjCDecl) \
+ TYPE(ObjCForIn) \
+ TYPE(ObjCMethodExpr) \
+ TYPE(ObjCMethodSpecifier) \
+ TYPE(ObjCProperty) \
+ TYPE(ObjCStringLiteral) \
+ TYPE(OverloadedOperator) \
+ TYPE(OverloadedOperatorLParen) \
+ TYPE(PointerOrReference) \
+ TYPE(PureVirtualSpecifier) \
+ TYPE(RangeBasedForLoopColon) \
+ TYPE(RegexLiteral) \
+ TYPE(SelectorName) \
+ TYPE(StartOfName) \
+ TYPE(StructuredBindingLSquare) \
+ TYPE(TemplateCloser) \
+ TYPE(TemplateOpener) \
+ TYPE(TemplateString) \
+ TYPE(TrailingAnnotation) \
+ TYPE(TrailingReturnArrow) \
+ TYPE(TrailingUnaryOperator) \
+ TYPE(UnaryOperator) \
TYPE(Unknown)
enum TokenType {
#define TYPE(X) TT_##X,
-LIST_TOKEN_TYPES
+ LIST_TOKEN_TYPES
#undef TYPE
- NUM_TOKEN_TYPES
+ NUM_TOKEN_TYPES
};
/// \brief Determines the name of a token type.
@@ -340,10 +341,11 @@ struct FormatToken {
bool isSimpleTypeSpecifier() const;
bool isObjCAccessSpecifier() const {
- return is(tok::at) && Next && (Next->isObjCAtKeyword(tok::objc_public) ||
- Next->isObjCAtKeyword(tok::objc_protected) ||
- Next->isObjCAtKeyword(tok::objc_package) ||
- Next->isObjCAtKeyword(tok::objc_private));
+ return is(tok::at) && Next &&
+ (Next->isObjCAtKeyword(tok::objc_public) ||
+ Next->isObjCAtKeyword(tok::objc_protected) ||
+ Next->isObjCAtKeyword(tok::objc_package) ||
+ Next->isObjCAtKeyword(tok::objc_private));
}
/// \brief Returns whether \p Tok is ([{ or a template opening <.
@@ -471,6 +473,19 @@ struct FormatToken {
Style.Language == FormatStyle::LK_TextProto));
}
+ /// \brief Returns whether the token is the left square bracket of a C++
+ /// structured binding declaration.
+ bool isCppStructuredBinding(const FormatStyle &Style) const {
+ if (!Style.isCpp() || isNot(tok::l_square))
+ return false;
+ const FormatToken *T = this;
+ do {
+ T = T->getPreviousNonComment();
+ } while (T && T->isOneOf(tok::kw_const, tok::kw_volatile, tok::amp,
+ tok::ampamp));
+ return T && T->is(tok::kw_auto);
+ }
+
/// \brief Same as opensBlockOrBlockTypeList, but for the closing token.
bool closesBlockOrBlockTypeList(const FormatStyle &Style) const {
if (is(TT_TemplateString) && closesScope())
@@ -503,15 +518,13 @@ private:
return is(K1) && Next && Next->startsSequenceInternal(Tokens...);
}
- template <typename A>
- bool startsSequenceInternal(A K1) const {
+ template <typename A> bool startsSequenceInternal(A K1) const {
if (is(tok::comment) && Next)
return Next->startsSequenceInternal(K1);
return is(K1);
}
- template <typename A, typename... Ts>
- bool endsSequenceInternal(A K1) const {
+ template <typename A, typename... Ts> bool endsSequenceInternal(A K1) const {
if (is(tok::comment) && Previous)
return Previous->endsSequenceInternal(K1);
return is(K1);
@@ -644,6 +657,7 @@ struct AdditionalKeywords {
kw_readonly = &IdentTable.get("readonly");
kw_set = &IdentTable.get("set");
kw_type = &IdentTable.get("type");
+ kw_typeof = &IdentTable.get("typeof");
kw_var = &IdentTable.get("var");
kw_yield = &IdentTable.get("yield");
@@ -680,7 +694,7 @@ struct AdditionalKeywords {
JsExtraKeywords = std::unordered_set<IdentifierInfo *>(
{kw_as, kw_async, kw_await, kw_declare, kw_finally, kw_from,
kw_function, kw_get, kw_import, kw_is, kw_let, kw_module, kw_readonly,
- kw_set, kw_type, kw_var, kw_yield,
+ kw_set, kw_type, kw_typeof, kw_var, kw_yield,
// Keywords from the Java section.
kw_abstract, kw_extends, kw_implements, kw_instanceof, kw_interface});
}
@@ -714,6 +728,7 @@ struct AdditionalKeywords {
IdentifierInfo *kw_readonly;
IdentifierInfo *kw_set;
IdentifierInfo *kw_type;
+ IdentifierInfo *kw_typeof;
IdentifierInfo *kw_var;
IdentifierInfo *kw_yield;
diff --git a/lib/Format/FormatTokenLexer.cpp b/lib/Format/FormatTokenLexer.cpp
index 45c3ae1afe5f..199d2974c5c7 100644
--- a/lib/Format/FormatTokenLexer.cpp
+++ b/lib/Format/FormatTokenLexer.cpp
@@ -24,10 +24,10 @@ namespace clang {
namespace format {
FormatTokenLexer::FormatTokenLexer(const SourceManager &SourceMgr, FileID ID,
- const FormatStyle &Style,
+ unsigned Column, const FormatStyle &Style,
encoding::Encoding Encoding)
: FormatTok(nullptr), IsFirstToken(true), StateStack({LexerState::NORMAL}),
- Column(0), TrailingWhitespace(0), SourceMgr(SourceMgr), ID(ID),
+ Column(Column), TrailingWhitespace(0), SourceMgr(SourceMgr), ID(ID),
Style(Style), IdentTable(getFormattingLangOpts(Style)),
Keywords(IdentTable), Encoding(Encoding), FirstInLineIndex(0),
FormattingDisabled(false), MacroBlockBeginRegex(Style.MacroBlockBegin),
@@ -50,6 +50,8 @@ ArrayRef<FormatToken *> FormatTokenLexer::lex() {
tryParseJSRegexLiteral();
handleTemplateStrings();
}
+ if (Style.Language == FormatStyle::LK_TextProto)
+ tryParsePythonComment();
tryMergePreviousTokens();
if (Tokens.back()->NewlinesBefore > 0 || Tokens.back()->IsMultiline)
FirstInLineIndex = Tokens.size() - 1;
@@ -96,14 +98,8 @@ void FormatTokenLexer::tryMergePreviousTokens() {
}
if (Style.Language == FormatStyle::LK_Java) {
- static const tok::TokenKind JavaRightLogicalShift[] = {tok::greater,
- tok::greater,
- tok::greater};
- static const tok::TokenKind JavaRightLogicalShiftAssign[] = {tok::greater,
- tok::greater,
- tok::greaterequal};
- if (tryMergeTokens(JavaRightLogicalShift, TT_BinaryOperator))
- return;
+ static const tok::TokenKind JavaRightLogicalShiftAssign[] = {
+ tok::greater, tok::greater, tok::greaterequal};
if (tryMergeTokens(JavaRightLogicalShiftAssign, TT_BinaryOperator))
return;
}
@@ -162,9 +158,8 @@ bool FormatTokenLexer::tryMergeTokens(ArrayRef<tok::TokenKind> Kinds,
return false;
unsigned AddLength = 0;
for (unsigned i = 1; i < Kinds.size(); ++i) {
- if (!First[i]->is(Kinds[i]) ||
- First[i]->WhitespaceRange.getBegin() !=
- First[i]->WhitespaceRange.getEnd())
+ if (!First[i]->is(Kinds[i]) || First[i]->WhitespaceRange.getBegin() !=
+ First[i]->WhitespaceRange.getEnd())
return false;
AddLength += First[i]->TokenText.size();
}
@@ -337,6 +332,27 @@ void FormatTokenLexer::handleTemplateStrings() {
resetLexer(SourceMgr.getFileOffset(loc));
}
+void FormatTokenLexer::tryParsePythonComment() {
+ FormatToken *HashToken = Tokens.back();
+ if (HashToken->isNot(tok::hash))
+ return;
+ // Turn the remainder of this line into a comment.
+ const char *CommentBegin =
+ Lex->getBufferLocation() - HashToken->TokenText.size(); // at "#"
+ size_t From = CommentBegin - Lex->getBuffer().begin();
+ size_t To = Lex->getBuffer().find_first_of('\n', From);
+ if (To == StringRef::npos)
+ To = Lex->getBuffer().size();
+ size_t Len = To - From;
+ HashToken->Type = TT_LineComment;
+ HashToken->Tok.setKind(tok::comment);
+ HashToken->TokenText = Lex->getBuffer().substr(From, Len);
+ SourceLocation Loc = To < Lex->getBuffer().size()
+ ? Lex->getSourceLocation(CommentBegin + Len)
+ : SourceMgr.getLocForEndOfFile(ID);
+ resetLexer(SourceMgr.getFileOffset(Loc));
+}
+
bool FormatTokenLexer::tryMerge_TMacro() {
if (Tokens.size() < 4)
return false;
@@ -529,17 +545,53 @@ FormatToken *FormatTokenLexer::getNextToken() {
readRawToken(*FormatTok);
}
+ // JavaScript and Java do not allow to escape the end of the line with a
+ // backslash. Backslashes are syntax errors in plain source, but can occur in
+ // comments. When a single line comment ends with a \, it'll cause the next
+ // line of code to be lexed as a comment, breaking formatting. The code below
+ // finds comments that contain a backslash followed by a line break, truncates
+ // the comment token at the backslash, and resets the lexer to restart behind
+ // the backslash.
+ if ((Style.Language == FormatStyle::LK_JavaScript ||
+ Style.Language == FormatStyle::LK_Java) &&
+ FormatTok->is(tok::comment) && FormatTok->TokenText.startswith("//")) {
+ size_t BackslashPos = FormatTok->TokenText.find('\\');
+ while (BackslashPos != StringRef::npos) {
+ if (BackslashPos + 1 < FormatTok->TokenText.size() &&
+ FormatTok->TokenText[BackslashPos + 1] == '\n') {
+ const char *Offset = Lex->getBufferLocation();
+ Offset -= FormatTok->TokenText.size();
+ Offset += BackslashPos + 1;
+ resetLexer(SourceMgr.getFileOffset(Lex->getSourceLocation(Offset)));
+ FormatTok->TokenText = FormatTok->TokenText.substr(0, BackslashPos + 1);
+ FormatTok->ColumnWidth = encoding::columnWidthWithTabs(
+ FormatTok->TokenText, FormatTok->OriginalColumn, Style.TabWidth,
+ Encoding);
+ break;
+ }
+ BackslashPos = FormatTok->TokenText.find('\\', BackslashPos + 1);
+ }
+ }
+
// In case the token starts with escaped newlines, we want to
// take them into account as whitespace - this pattern is quite frequent
// in macro definitions.
// FIXME: Add a more explicit test.
- while (FormatTok->TokenText.size() > 1 && FormatTok->TokenText[0] == '\\' &&
- FormatTok->TokenText[1] == '\n') {
+ while (FormatTok->TokenText.size() > 1 && FormatTok->TokenText[0] == '\\') {
+ unsigned SkippedWhitespace = 0;
+ if (FormatTok->TokenText.size() > 2 &&
+ (FormatTok->TokenText[1] == '\r' && FormatTok->TokenText[2] == '\n'))
+ SkippedWhitespace = 3;
+ else if (FormatTok->TokenText[1] == '\n')
+ SkippedWhitespace = 2;
+ else
+ break;
+
++FormatTok->NewlinesBefore;
- WhitespaceLength += 2;
- FormatTok->LastNewlineOffset = 2;
+ WhitespaceLength += SkippedWhitespace;
+ FormatTok->LastNewlineOffset = SkippedWhitespace;
Column = 0;
- FormatTok->TokenText = FormatTok->TokenText.substr(2);
+ FormatTok->TokenText = FormatTok->TokenText.substr(SkippedWhitespace);
}
FormatTok->WhitespaceRange = SourceRange(
diff --git a/lib/Format/FormatTokenLexer.h b/lib/Format/FormatTokenLexer.h
index bf10f09cd11e..59dc2a752f1f 100644
--- a/lib/Format/FormatTokenLexer.h
+++ b/lib/Format/FormatTokenLexer.h
@@ -36,7 +36,7 @@ enum LexerState {
class FormatTokenLexer {
public:
- FormatTokenLexer(const SourceManager &SourceMgr, FileID ID,
+ FormatTokenLexer(const SourceManager &SourceMgr, FileID ID, unsigned Column,
const FormatStyle &Style, encoding::Encoding Encoding);
ArrayRef<FormatToken *> lex();
@@ -73,6 +73,8 @@ private:
// nested template parts by balancing curly braces.
void handleTemplateStrings();
+ void tryParsePythonComment();
+
bool tryMerge_TMacro();
bool tryMergeConflictMarkers();
diff --git a/lib/Format/NamespaceEndCommentsFixer.cpp b/lib/Format/NamespaceEndCommentsFixer.cpp
index 85b70b8c0a76..df99bb2e1381 100644
--- a/lib/Format/NamespaceEndCommentsFixer.cpp
+++ b/lib/Format/NamespaceEndCommentsFixer.cpp
@@ -118,6 +118,12 @@ getNamespaceToken(const AnnotatedLine *line,
return nullptr;
assert(StartLineIndex < AnnotatedLines.size());
const FormatToken *NamespaceTok = AnnotatedLines[StartLineIndex]->First;
+ if (NamespaceTok->is(tok::l_brace)) {
+ // "namespace" keyword can be on the line preceding '{', e.g. in styles
+ // where BraceWrapping.AfterNamespace is true.
+ if (StartLineIndex > 0)
+ NamespaceTok = AnnotatedLines[StartLineIndex - 1]->First;
+ }
// Detect "(inline)? namespace" in the beginning of a line.
if (NamespaceTok->is(tok::kw_inline))
NamespaceTok = NamespaceTok->getNextNonComment();
@@ -131,7 +137,7 @@ NamespaceEndCommentsFixer::NamespaceEndCommentsFixer(const Environment &Env,
const FormatStyle &Style)
: TokenAnalyzer(Env, Style) {}
-tooling::Replacements NamespaceEndCommentsFixer::analyze(
+std::pair<tooling::Replacements, unsigned> NamespaceEndCommentsFixer::analyze(
TokenAnnotator &Annotator, SmallVectorImpl<AnnotatedLine *> &AnnotatedLines,
FormatTokenLexer &Tokens) {
const SourceManager &SourceMgr = Env.getSourceManager();
@@ -200,7 +206,7 @@ tooling::Replacements NamespaceEndCommentsFixer::analyze(
}
StartLineIndex = SIZE_MAX;
}
- return Fixes;
+ return {Fixes, 0};
}
} // namespace format
diff --git a/lib/Format/NamespaceEndCommentsFixer.h b/lib/Format/NamespaceEndCommentsFixer.h
index 7790668a2e82..4779f0d27c92 100644
--- a/lib/Format/NamespaceEndCommentsFixer.h
+++ b/lib/Format/NamespaceEndCommentsFixer.h
@@ -25,7 +25,7 @@ class NamespaceEndCommentsFixer : public TokenAnalyzer {
public:
NamespaceEndCommentsFixer(const Environment &Env, const FormatStyle &Style);
- tooling::Replacements
+ std::pair<tooling::Replacements, unsigned>
analyze(TokenAnnotator &Annotator,
SmallVectorImpl<AnnotatedLine *> &AnnotatedLines,
FormatTokenLexer &Tokens) override;
diff --git a/lib/Format/SortJavaScriptImports.cpp b/lib/Format/SortJavaScriptImports.cpp
index e73695ca8477..d0b979e100d5 100644
--- a/lib/Format/SortJavaScriptImports.cpp
+++ b/lib/Format/SortJavaScriptImports.cpp
@@ -123,7 +123,7 @@ public:
: TokenAnalyzer(Env, Style),
FileContents(Env.getSourceManager().getBufferData(Env.getFileID())) {}
- tooling::Replacements
+ std::pair<tooling::Replacements, unsigned>
analyze(TokenAnnotator &Annotator,
SmallVectorImpl<AnnotatedLine *> &AnnotatedLines,
FormatTokenLexer &Tokens) override {
@@ -138,7 +138,7 @@ public:
parseModuleReferences(Keywords, AnnotatedLines);
if (References.empty())
- return Result;
+ return {Result, 0};
SmallVector<unsigned, 16> Indices;
for (unsigned i = 0, e = References.size(); i != e; ++i)
@@ -168,7 +168,7 @@ public:
}
if (ReferencesInOrder && SymbolsInOrder)
- return Result;
+ return {Result, 0};
SourceRange InsertionPoint = References[0].Range;
InsertionPoint.setEnd(References[References.size() - 1].Range.getEnd());
@@ -202,7 +202,7 @@ public:
assert(false);
}
- return Result;
+ return {Result, 0};
}
private:
@@ -277,7 +277,7 @@ private:
// Parses module references in the given lines. Returns the module references,
// and a pointer to the first "main code" line if that is adjacent to the
// affected lines of module references, nullptr otherwise.
- std::pair<SmallVector<JsModuleReference, 16>, AnnotatedLine*>
+ std::pair<SmallVector<JsModuleReference, 16>, AnnotatedLine *>
parseModuleReferences(const AdditionalKeywords &Keywords,
SmallVectorImpl<AnnotatedLine *> &AnnotatedLines) {
SmallVector<JsModuleReference, 16> References;
@@ -413,7 +413,7 @@ private:
nextToken();
if (Current->is(tok::r_brace))
break;
- if (Current->isNot(tok::identifier))
+ if (!Current->isOneOf(tok::identifier, tok::kw_default))
return false;
JsImportedSymbol Symbol;
@@ -425,7 +425,7 @@ private:
if (Current->is(Keywords.kw_as)) {
nextToken();
- if (Current->isNot(tok::identifier))
+ if (!Current->isOneOf(tok::identifier, tok::kw_default))
return false;
Symbol.Alias = Current->TokenText;
nextToken();
@@ -449,7 +449,7 @@ tooling::Replacements sortJavaScriptImports(const FormatStyle &Style,
std::unique_ptr<Environment> Env =
Environment::CreateVirtualEnvironment(Code, FileName, Ranges);
JavaScriptImportSorter Sorter(*Env, Style);
- return Sorter.process();
+ return Sorter.process().first;
}
} // end namespace format
diff --git a/lib/Format/TokenAnalyzer.cpp b/lib/Format/TokenAnalyzer.cpp
index f2e4e8ef0819..d1dfb1fea32b 100644
--- a/lib/Format/TokenAnalyzer.cpp
+++ b/lib/Format/TokenAnalyzer.cpp
@@ -38,7 +38,10 @@ namespace format {
// Code.
std::unique_ptr<Environment>
Environment::CreateVirtualEnvironment(StringRef Code, StringRef FileName,
- ArrayRef<tooling::Range> Ranges) {
+ ArrayRef<tooling::Range> Ranges,
+ unsigned FirstStartColumn,
+ unsigned NextStartColumn,
+ unsigned LastStartColumn) {
// This is referenced by `FileMgr` and will be released by `FileMgr` when it
// is deleted.
IntrusiveRefCntPtr<vfs::InMemoryFileSystem> InMemoryFileSystem(
@@ -57,8 +60,9 @@ Environment::CreateVirtualEnvironment(StringRef Code, StringRef FileName,
std::unique_ptr<SourceManager> VirtualSM(
new SourceManager(*Diagnostics, *FileMgr));
InMemoryFileSystem->addFile(
- FileName, 0, llvm::MemoryBuffer::getMemBuffer(
- Code, FileName, /*RequiresNullTerminator=*/false));
+ FileName, 0,
+ llvm::MemoryBuffer::getMemBuffer(Code, FileName,
+ /*RequiresNullTerminator=*/false));
FileID ID = VirtualSM->createFileID(FileMgr->getFile(FileName),
SourceLocation(), clang::SrcMgr::C_User);
assert(ID.isValid());
@@ -69,9 +73,9 @@ Environment::CreateVirtualEnvironment(StringRef Code, StringRef FileName,
SourceLocation End = Start.getLocWithOffset(Range.getLength());
CharRanges.push_back(CharSourceRange::getCharRange(Start, End));
}
- return llvm::make_unique<Environment>(ID, std::move(FileMgr),
- std::move(VirtualSM),
- std::move(Diagnostics), CharRanges);
+ return llvm::make_unique<Environment>(
+ ID, std::move(FileMgr), std::move(VirtualSM), std::move(Diagnostics),
+ CharRanges, FirstStartColumn, NextStartColumn, LastStartColumn);
}
TokenAnalyzer::TokenAnalyzer(const Environment &Env, const FormatStyle &Style)
@@ -88,14 +92,16 @@ TokenAnalyzer::TokenAnalyzer(const Environment &Env, const FormatStyle &Style)
<< "\n");
}
-tooling::Replacements TokenAnalyzer::process() {
+std::pair<tooling::Replacements, unsigned> TokenAnalyzer::process() {
tooling::Replacements Result;
- FormatTokenLexer Tokens(Env.getSourceManager(), Env.getFileID(), Style,
- Encoding);
+ FormatTokenLexer Tokens(Env.getSourceManager(), Env.getFileID(),
+ Env.getFirstStartColumn(), Style, Encoding);
- UnwrappedLineParser Parser(Style, Tokens.getKeywords(), Tokens.lex(), *this);
+ UnwrappedLineParser Parser(Style, Tokens.getKeywords(),
+ Env.getFirstStartColumn(), Tokens.lex(), *this);
Parser.parse();
assert(UnwrappedLines.rbegin()->empty());
+ unsigned Penalty = 0;
for (unsigned Run = 0, RunE = UnwrappedLines.size(); Run + 1 != RunE; ++Run) {
DEBUG(llvm::dbgs() << "Run " << Run << "...\n");
SmallVector<AnnotatedLine *, 16> AnnotatedLines;
@@ -106,13 +112,13 @@ tooling::Replacements TokenAnalyzer::process() {
Annotator.annotate(*AnnotatedLines.back());
}
- tooling::Replacements RunResult =
+ std::pair<tooling::Replacements, unsigned> RunResult =
analyze(Annotator, AnnotatedLines, Tokens);
DEBUG({
llvm::dbgs() << "Replacements for run " << Run << ":\n";
- for (tooling::Replacements::const_iterator I = RunResult.begin(),
- E = RunResult.end();
+ for (tooling::Replacements::const_iterator I = RunResult.first.begin(),
+ E = RunResult.first.end();
I != E; ++I) {
llvm::dbgs() << I->toString() << "\n";
}
@@ -120,17 +126,19 @@ tooling::Replacements TokenAnalyzer::process() {
for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) {
delete AnnotatedLines[i];
}
- for (const auto &R : RunResult) {
+
+ Penalty += RunResult.second;
+ for (const auto &R : RunResult.first) {
auto Err = Result.add(R);
// FIXME: better error handling here. For now, simply return an empty
// Replacements to indicate failure.
if (Err) {
llvm::errs() << llvm::toString(std::move(Err)) << "\n";
- return tooling::Replacements();
+ return {tooling::Replacements(), 0};
}
}
}
- return Result;
+ return {Result, Penalty};
}
void TokenAnalyzer::consumeUnwrappedLine(const UnwrappedLine &TheLine) {
diff --git a/lib/Format/TokenAnalyzer.h b/lib/Format/TokenAnalyzer.h
index 78a3d1bc8d9e..96ea00b25ba1 100644
--- a/lib/Format/TokenAnalyzer.h
+++ b/lib/Format/TokenAnalyzer.h
@@ -37,21 +37,37 @@ namespace format {
class Environment {
public:
Environment(SourceManager &SM, FileID ID, ArrayRef<CharSourceRange> Ranges)
- : ID(ID), CharRanges(Ranges.begin(), Ranges.end()), SM(SM) {}
+ : ID(ID), CharRanges(Ranges.begin(), Ranges.end()), SM(SM),
+ FirstStartColumn(0),
+ NextStartColumn(0),
+ LastStartColumn(0) {}
Environment(FileID ID, std::unique_ptr<FileManager> FileMgr,
std::unique_ptr<SourceManager> VirtualSM,
std::unique_ptr<DiagnosticsEngine> Diagnostics,
- const std::vector<CharSourceRange> &CharRanges)
+ const std::vector<CharSourceRange> &CharRanges,
+ unsigned FirstStartColumn,
+ unsigned NextStartColumn,
+ unsigned LastStartColumn)
: ID(ID), CharRanges(CharRanges.begin(), CharRanges.end()),
- SM(*VirtualSM), FileMgr(std::move(FileMgr)),
+ SM(*VirtualSM),
+ FirstStartColumn(FirstStartColumn),
+ NextStartColumn(NextStartColumn),
+ LastStartColumn(LastStartColumn),
+ FileMgr(std::move(FileMgr)),
VirtualSM(std::move(VirtualSM)), Diagnostics(std::move(Diagnostics)) {}
- // This sets up an virtual file system with file \p FileName containing \p
- // Code.
+ // This sets up an virtual file system with file \p FileName containing the
+ // fragment \p Code. Assumes that \p Code starts at \p FirstStartColumn,
+ // that the next lines of \p Code should start at \p NextStartColumn, and
+ // that \p Code should end at \p LastStartColumn if it ends in newline.
+ // See also the documentation of clang::format::internal::reformat.
static std::unique_ptr<Environment>
CreateVirtualEnvironment(StringRef Code, StringRef FileName,
- ArrayRef<tooling::Range> Ranges);
+ ArrayRef<tooling::Range> Ranges,
+ unsigned FirstStartColumn = 0,
+ unsigned NextStartColumn = 0,
+ unsigned LastStartColumn = 0);
FileID getFileID() const { return ID; }
@@ -59,10 +75,25 @@ public:
const SourceManager &getSourceManager() const { return SM; }
+ // Returns the column at which the fragment of code managed by this
+ // environment starts.
+ unsigned getFirstStartColumn() const { return FirstStartColumn; }
+
+ // Returns the column at which subsequent lines of the fragment of code
+ // managed by this environment should start.
+ unsigned getNextStartColumn() const { return NextStartColumn; }
+
+ // Returns the column at which the fragment of code managed by this
+ // environment should end if it ends in a newline.
+ unsigned getLastStartColumn() const { return LastStartColumn; }
+
private:
FileID ID;
SmallVector<CharSourceRange, 8> CharRanges;
SourceManager &SM;
+ unsigned FirstStartColumn;
+ unsigned NextStartColumn;
+ unsigned LastStartColumn;
// The order of these fields are important - they should be in the same order
// as they are created in `CreateVirtualEnvironment` so that they can be
@@ -76,10 +107,10 @@ class TokenAnalyzer : public UnwrappedLineConsumer {
public:
TokenAnalyzer(const Environment &Env, const FormatStyle &Style);
- tooling::Replacements process();
+ std::pair<tooling::Replacements, unsigned> process();
protected:
- virtual tooling::Replacements
+ virtual std::pair<tooling::Replacements, unsigned>
analyze(TokenAnnotator &Annotator,
SmallVectorImpl<AnnotatedLine *> &AnnotatedLines,
FormatTokenLexer &Tokens) = 0;
diff --git a/lib/Format/TokenAnnotator.cpp b/lib/Format/TokenAnnotator.cpp
index 46ea06b880ed..298c72b002f8 100644
--- a/lib/Format/TokenAnnotator.cpp
+++ b/lib/Format/TokenAnnotator.cpp
@@ -47,7 +47,7 @@ private:
if (NonTemplateLess.count(CurrentToken->Previous))
return false;
- const FormatToken& Previous = *CurrentToken->Previous;
+ const FormatToken &Previous = *CurrentToken->Previous; // The '<'.
if (Previous.Previous) {
if (Previous.Previous->Tok.isLiteral())
return false;
@@ -152,11 +152,11 @@ private:
// export type X = (...);
Contexts.back().IsExpression = false;
} else if (Left->Previous &&
- (Left->Previous->isOneOf(tok::kw_static_assert, tok::kw_decltype,
- tok::kw_if, tok::kw_while, tok::l_paren,
- tok::comma) ||
- Left->Previous->endsSequence(tok::kw_constexpr, tok::kw_if) ||
- Left->Previous->is(TT_BinaryOperator))) {
+ (Left->Previous->isOneOf(tok::kw_static_assert, tok::kw_decltype,
+ tok::kw_if, tok::kw_while, tok::l_paren,
+ tok::comma) ||
+ Left->Previous->endsSequence(tok::kw_constexpr, tok::kw_if) ||
+ Left->Previous->is(TT_BinaryOperator))) {
// static_assert, if and while usually contain expressions.
Contexts.back().IsExpression = true;
} else if (Style.Language == FormatStyle::LK_JavaScript && Left->Previous &&
@@ -325,8 +325,7 @@ private:
// In C++, this can happen either in array of templates (foo<int>[10])
// or when array is a nested template type (unique_ptr<type1<type2>[]>).
bool CppArrayTemplates =
- Style.isCpp() && Parent &&
- Parent->is(TT_TemplateCloser) &&
+ Style.isCpp() && Parent && Parent->is(TT_TemplateCloser) &&
(Contexts.back().CanBeExpression || Contexts.back().IsExpression ||
Contexts.back().InTemplateArgument);
@@ -343,7 +342,9 @@ private:
bool ColonFound = false;
unsigned BindingIncrease = 1;
- if (Left->is(TT_Unknown)) {
+ if (Left->isCppStructuredBinding(Style)) {
+ Left->Type = TT_StructuredBindingLSquare;
+ } else if (Left->is(TT_Unknown)) {
if (StartsObjCMethodExpr) {
Left->Type = TT_ObjCMethodExpr;
} else if (Style.Language == FormatStyle::LK_JavaScript && Parent &&
@@ -372,6 +373,10 @@ private:
ScopedContextCreator ContextCreator(*this, tok::l_square, BindingIncrease);
Contexts.back().IsExpression = true;
+ if (Style.Language == FormatStyle::LK_JavaScript && Parent &&
+ Parent->is(TT_JsTypeColon))
+ Contexts.back().IsExpression = false;
+
Contexts.back().ColonIsObjCMethodExpr = StartsObjCMethodExpr;
while (CurrentToken) {
@@ -439,6 +444,9 @@ private:
Contexts.back().ColonIsDictLiteral = true;
if (Left->BlockKind == BK_BracedInit)
Contexts.back().IsExpression = true;
+ if (Style.Language == FormatStyle::LK_JavaScript && Left->Previous &&
+ Left->Previous->is(TT_JsTypeColon))
+ Contexts.back().IsExpression = false;
while (CurrentToken) {
if (CurrentToken->is(tok::r_brace)) {
@@ -452,6 +460,8 @@ private:
updateParameterCount(Left, CurrentToken);
if (CurrentToken->isOneOf(tok::colon, tok::l_brace, tok::less)) {
FormatToken *Previous = CurrentToken->getPreviousNonComment();
+ if (Previous->is(TT_JsTypeOptionalQuestion))
+ Previous = Previous->getPreviousNonComment();
if (((CurrentToken->is(tok::colon) &&
(!Contexts.back().ColonIsDictLiteral || !Style.isCpp())) ||
Style.Language == FormatStyle::LK_Proto ||
@@ -531,8 +541,11 @@ private:
!Line.First->isOneOf(tok::kw_enum, tok::kw_case)) ||
Contexts.back().ContextKind == tok::l_paren || // function params
Contexts.back().ContextKind == tok::l_square || // array type
+ (!Contexts.back().IsExpression &&
+ Contexts.back().ContextKind == tok::l_brace) || // object type
(Contexts.size() == 1 &&
Line.MustBeDeclaration)) { // method/property declaration
+ Contexts.back().IsExpression = false;
Tok->Type = TT_JsTypeColon;
break;
}
@@ -593,7 +606,8 @@ private:
break;
case tok::kw_if:
case tok::kw_while:
- if (Tok->is(tok::kw_if) && CurrentToken && CurrentToken->is(tok::kw_constexpr))
+ if (Tok->is(tok::kw_if) && CurrentToken &&
+ CurrentToken->is(tok::kw_constexpr))
next();
if (CurrentToken && CurrentToken->is(tok::l_paren)) {
next();
@@ -603,7 +617,9 @@ private:
break;
case tok::kw_for:
if (Style.Language == FormatStyle::LK_JavaScript) {
- if (Tok->Previous && Tok->Previous->is(tok::period))
+ // x.for and {for: ...}
+ if ((Tok->Previous && Tok->Previous->is(tok::period)) ||
+ (Tok->Next && Tok->Next->is(tok::colon)))
break;
// JS' for await ( ...
if (CurrentToken && CurrentToken->is(Keywords.kw_await))
@@ -619,8 +635,7 @@ private:
// marks the first l_paren as a OverloadedOperatorLParen. Here, we make
// the first two parens OverloadedOperators and the second l_paren an
// OverloadedOperatorLParen.
- if (Tok->Previous &&
- Tok->Previous->is(tok::r_paren) &&
+ if (Tok->Previous && Tok->Previous->is(tok::r_paren) &&
Tok->Previous->MatchingParen &&
Tok->Previous->MatchingParen->is(TT_OverloadedOperatorLParen)) {
Tok->Previous->Type = TT_OverloadedOperator;
@@ -643,7 +658,7 @@ private:
break;
case tok::l_brace:
if (Style.Language == FormatStyle::LK_TextProto) {
- FormatToken *Previous =Tok->getPreviousNonComment();
+ FormatToken *Previous = Tok->getPreviousNonComment();
if (Previous && Previous->Type != TT_DictLiteral)
Previous->Type = TT_SelectorName;
}
@@ -683,7 +698,8 @@ private:
CurrentToken->Type = TT_PointerOrReference;
consumeToken();
if (CurrentToken &&
- CurrentToken->Previous->isOneOf(TT_BinaryOperator, tok::comma))
+ CurrentToken->Previous->isOneOf(TT_BinaryOperator, TT_UnaryOperator,
+ tok::comma))
CurrentToken->Previous->Type = TT_OverloadedOperator;
}
if (CurrentToken) {
@@ -740,8 +756,8 @@ private:
void parseIncludeDirective() {
if (CurrentToken && CurrentToken->is(tok::less)) {
- next();
- while (CurrentToken) {
+ next();
+ while (CurrentToken) {
// Mark tokens up to the trailing line comments as implicit string
// literals.
if (CurrentToken->isNot(tok::comment) &&
@@ -781,9 +797,9 @@ private:
void parseHasInclude() {
if (!CurrentToken || !CurrentToken->is(tok::l_paren))
return;
- next(); // '('
+ next(); // '('
parseIncludeDirective();
- next(); // ')'
+ next(); // ')'
}
LineType parsePreprocessorDirective() {
@@ -842,7 +858,7 @@ private:
if (Tok->is(tok::l_paren))
parseParens();
else if (Tok->isOneOf(Keywords.kw___has_include,
- Keywords.kw___has_include_next))
+ Keywords.kw___has_include_next))
parseHasInclude();
}
return Type;
@@ -855,7 +871,7 @@ public:
return parsePreprocessorDirective();
// Directly allow to 'import <string-literal>' to support protocol buffer
- // definitions (code.google.com/p/protobuf) or missing "#" (either way we
+ // definitions (github.com/google/protobuf) or missing "#" (either way we
// should not break the line).
IdentifierInfo *Info = CurrentToken->Tok.getIdentifierInfo();
if ((Style.Language == FormatStyle::LK_Java &&
@@ -933,11 +949,11 @@ private:
// FIXME: Closure-library specific stuff should not be hard-coded but be
// configurable.
return Tok.TokenText == "goog" && Tok.Next && Tok.Next->is(tok::period) &&
- Tok.Next->Next && (Tok.Next->Next->TokenText == "module" ||
- Tok.Next->Next->TokenText == "provide" ||
- Tok.Next->Next->TokenText == "require" ||
- Tok.Next->Next->TokenText == "setTestOnly" ||
- Tok.Next->Next->TokenText == "forwardDeclare") &&
+ Tok.Next->Next &&
+ (Tok.Next->Next->TokenText == "module" ||
+ Tok.Next->Next->TokenText == "provide" ||
+ Tok.Next->Next->TokenText == "require" ||
+ Tok.Next->Next->TokenText == "forwardDeclare") &&
Tok.Next->Next->Next && Tok.Next->Next->Next->is(tok::l_paren);
}
@@ -1054,8 +1070,7 @@ private:
Current.Previous->is(TT_CtorInitializerColon)) {
Contexts.back().IsExpression = true;
Contexts.back().InCtorInitializer = true;
- } else if (Current.Previous &&
- Current.Previous->is(TT_InheritanceColon)) {
+ } else if (Current.Previous && Current.Previous->is(TT_InheritanceColon)) {
Contexts.back().InInheritanceList = true;
} else if (Current.isOneOf(tok::r_paren, tok::greater, tok::comma)) {
for (FormatToken *Previous = Current.Previous;
@@ -1104,6 +1119,11 @@ private:
(!Line.MightBeFunctionDecl || Current.NestingLevel != 0)) {
Contexts.back().FirstStartOfName = &Current;
Current.Type = TT_StartOfName;
+ } else if (Current.is(tok::semi)) {
+ // Reset FirstStartOfName after finding a semicolon so that a for loop
+ // with multiple increment statements is not confused with a for loop
+ // having multiple variable declarations.
+ Contexts.back().FirstStartOfName = nullptr;
} else if (Current.isOneOf(tok::kw_auto, tok::kw___auto_type)) {
AutoFound = true;
} else if (Current.is(tok::arrow) &&
@@ -1113,10 +1133,10 @@ private:
Current.NestingLevel == 0) {
Current.Type = TT_TrailingReturnArrow;
} else if (Current.isOneOf(tok::star, tok::amp, tok::ampamp)) {
- Current.Type =
- determineStarAmpUsage(Current, Contexts.back().CanBeExpression &&
- Contexts.back().IsExpression,
- Contexts.back().InTemplateArgument);
+ Current.Type = determineStarAmpUsage(Current,
+ Contexts.back().CanBeExpression &&
+ Contexts.back().IsExpression,
+ Contexts.back().InTemplateArgument);
} else if (Current.isOneOf(tok::minus, tok::plus, tok::caret)) {
Current.Type = determinePlusMinusCaretUsage(Current);
if (Current.is(TT_UnaryOperator) && Current.is(tok::caret))
@@ -1396,11 +1416,13 @@ private:
if (NextToken->isOneOf(tok::comma, tok::semi))
return TT_PointerOrReference;
- if (PrevToken->is(tok::r_paren) && PrevToken->MatchingParen &&
- PrevToken->MatchingParen->Previous &&
- PrevToken->MatchingParen->Previous->isOneOf(tok::kw_typeof,
- tok::kw_decltype))
- return TT_PointerOrReference;
+ if (PrevToken->is(tok::r_paren) && PrevToken->MatchingParen) {
+ FormatToken *TokenBeforeMatchingParen =
+ PrevToken->MatchingParen->getPreviousNonComment();
+ if (TokenBeforeMatchingParen &&
+ TokenBeforeMatchingParen->isOneOf(tok::kw_typeof, tok::kw_decltype))
+ return TT_PointerOrReference;
+ }
if (PrevToken->Tok.isLiteral() ||
PrevToken->isOneOf(tok::r_paren, tok::r_square, tok::kw_true,
@@ -1589,7 +1611,7 @@ private:
if (Current->is(TT_ConditionalExpr))
return prec::Conditional;
if (NextNonComment && Current->is(TT_SelectorName) &&
- (NextNonComment->is(TT_DictLiteral) ||
+ (NextNonComment->isOneOf(TT_DictLiteral, TT_JsTypeColon) ||
((Style.Language == FormatStyle::LK_Proto ||
Style.Language == FormatStyle::LK_TextProto) &&
NextNonComment->is(tok::less))))
@@ -1643,17 +1665,15 @@ private:
/// \brief Parse unary operator expressions and surround them with fake
/// parentheses if appropriate.
void parseUnaryOperator() {
- if (!Current || Current->isNot(TT_UnaryOperator)) {
- parse(PrecedenceArrowAndPeriod);
- return;
+ llvm::SmallVector<FormatToken *, 2> Tokens;
+ while (Current && Current->is(TT_UnaryOperator)) {
+ Tokens.push_back(Current);
+ next();
}
-
- FormatToken *Start = Current;
- next();
- parseUnaryOperator();
-
- // The actual precedence doesn't matter.
- addFakeParenthesis(Start, prec::Unknown);
+ parse(PrecedenceArrowAndPeriod);
+ for (FormatToken *Token : llvm::reverse(Tokens))
+ // The actual precedence doesn't matter.
+ addFakeParenthesis(Token, prec::Unknown);
}
void parseConditionalExpr() {
@@ -1722,7 +1742,7 @@ void TokenAnnotator::setCommentLineLevels(
static unsigned maxNestingDepth(const AnnotatedLine &Line) {
unsigned Result = 0;
- for (const auto* Tok = Line.First; Tok != nullptr; Tok = Tok->Next)
+ for (const auto *Tok = Line.First; Tok != nullptr; Tok = Tok->Next)
Result = std::max(Result, Tok->NestingLevel);
return Result;
}
@@ -1764,7 +1784,7 @@ void TokenAnnotator::annotate(AnnotatedLine &Line) {
// function declaration.
static bool isFunctionDeclarationName(const FormatToken &Current,
const AnnotatedLine &Line) {
- auto skipOperatorName = [](const FormatToken* Next) -> const FormatToken* {
+ auto skipOperatorName = [](const FormatToken *Next) -> const FormatToken * {
for (; Next; Next = Next->Next) {
if (Next->is(TT_OverloadedOperatorLParen))
return Next;
@@ -1772,8 +1792,8 @@ static bool isFunctionDeclarationName(const FormatToken &Current,
continue;
if (Next->isOneOf(tok::kw_new, tok::kw_delete)) {
// For 'new[]' and 'delete[]'.
- if (Next->Next && Next->Next->is(tok::l_square) &&
- Next->Next->Next && Next->Next->Next->is(tok::r_square))
+ if (Next->Next && Next->Next->is(tok::l_square) && Next->Next->Next &&
+ Next->Next->Next->is(tok::r_square))
Next = Next->Next->Next;
continue;
}
@@ -1872,7 +1892,8 @@ void TokenAnnotator::calculateFormattingInformation(AnnotatedLine &Line) {
}
Line.First->TotalLength =
- Line.First->IsMultiline ? Style.ColumnLimit : Line.First->ColumnWidth;
+ Line.First->IsMultiline ? Style.ColumnLimit
+ : Line.FirstStartColumn + Line.First->ColumnWidth;
FormatToken *Current = Line.First->Next;
bool InFunctionDecl = Line.MightBeFunctionDecl;
while (Current) {
@@ -2005,6 +2026,9 @@ unsigned TokenAnnotator::splitPenalty(const AnnotatedLine &Line,
if ((Left.is(TT_TemplateString) && Left.TokenText.endswith("${")) ||
(Right.is(TT_TemplateString) && Right.TokenText.startswith("}")))
return 100;
+ // Prefer breaking call chains (".foo") over empty "{}", "[]" or "()".
+ if (Left.opensScope() && Right.closesScope())
+ return 200;
}
if (Right.is(tok::identifier) && Right.Next && Right.Next->is(TT_DictLiteral))
@@ -2049,7 +2073,8 @@ unsigned TokenAnnotator::splitPenalty(const AnnotatedLine &Line,
if (Left.is(tok::comment))
return 1000;
- if (Left.isOneOf(TT_RangeBasedForLoopColon, TT_InheritanceColon, TT_CtorInitializerColon))
+ if (Left.isOneOf(TT_RangeBasedForLoopColon, TT_InheritanceColon,
+ TT_CtorInitializerColon))
return 2;
if (Right.isMemberAccess()) {
@@ -2107,8 +2132,8 @@ unsigned TokenAnnotator::splitPenalty(const AnnotatedLine &Line,
Style.AlignAfterOpenBracket != FormatStyle::BAS_DontAlign)
return 100;
if (Left.is(tok::l_paren) && Left.Previous &&
- (Left.Previous->isOneOf(tok::kw_if, tok::kw_for)
- || Left.Previous->endsSequence(tok::kw_constexpr, tok::kw_if)))
+ (Left.Previous->isOneOf(tok::kw_if, tok::kw_for) ||
+ Left.Previous->endsSequence(tok::kw_constexpr, tok::kw_if)))
return 1000;
if (Left.is(tok::equal) && InFunctionDecl)
return 110;
@@ -2128,7 +2153,7 @@ unsigned TokenAnnotator::splitPenalty(const AnnotatedLine &Line,
if (Left.isOneOf(tok::plus, tok::comma) && Left.Previous &&
Left.Previous->isLabelString() &&
(Left.NextOperator || Left.OperatorIndex != 0))
- return 45;
+ return 50;
if (Right.is(tok::plus) && Left.isLabelString() &&
(Right.NextOperator || Right.OperatorIndex != 0))
return 25;
@@ -2162,6 +2187,8 @@ bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line,
const FormatToken &Right) {
if (Left.is(tok::kw_return) && Right.isNot(tok::semi))
return true;
+ if (Left.is(Keywords.kw_assert) && Style.Language == FormatStyle::LK_Java)
+ return true;
if (Style.ObjCSpaceAfterProperty && Line.Type == LT_ObjCProperty &&
Left.Tok.getObjCKeywordID() == tok::objc_property)
return true;
@@ -2178,8 +2205,8 @@ bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line,
: Style.SpacesInParentheses;
if (Right.isOneOf(tok::semi, tok::comma))
return false;
- if (Right.is(tok::less) &&
- Line.Type == LT_ObjCDecl && Style.ObjCSpaceBeforeProtocolList)
+ if (Right.is(tok::less) && Line.Type == LT_ObjCDecl &&
+ Style.ObjCSpaceBeforeProtocolList)
return true;
if (Right.is(tok::less) && Left.is(tok::kw_template))
return Style.SpaceAfterTemplateKeyword;
@@ -2201,15 +2228,23 @@ bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line,
Left.Previous->is(tok::kw_case));
if (Left.is(tok::l_square) && Right.is(tok::amp))
return false;
- if (Right.is(TT_PointerOrReference))
- return (Left.is(tok::r_paren) && Line.MightBeFunctionDecl) ||
- (Left.Tok.isLiteral() || (Left.is(tok::kw_const) && Left.Previous &&
- Left.Previous->is(tok::r_paren)) ||
+ if (Right.is(TT_PointerOrReference)) {
+ if (Left.is(tok::r_paren) && Line.MightBeFunctionDecl) {
+ if (!Left.MatchingParen)
+ return true;
+ FormatToken *TokenBeforeMatchingParen =
+ Left.MatchingParen->getPreviousNonComment();
+ if (!TokenBeforeMatchingParen ||
+ !TokenBeforeMatchingParen->isOneOf(tok::kw_typeof, tok::kw_decltype))
+ return true;
+ }
+ return (Left.Tok.isLiteral() ||
(!Left.isOneOf(TT_PointerOrReference, tok::l_paren) &&
(Style.PointerAlignment != FormatStyle::PAS_Left ||
(Line.IsMultiVariableDeclStmt &&
(Left.NestingLevel == 0 ||
(Left.NestingLevel == 1 && Line.First->is(tok::kw_for)))))));
+ }
if (Right.is(TT_FunctionTypeLParen) && Left.isNot(tok::l_paren) &&
(!Left.is(TT_PointerOrReference) ||
(Style.PointerAlignment != FormatStyle::PAS_Right &&
@@ -2231,17 +2266,20 @@ bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line,
if (Left.is(tok::l_square))
return (Left.is(TT_ArrayInitializerLSquare) &&
Style.SpacesInContainerLiterals && Right.isNot(tok::r_square)) ||
- (Left.is(TT_ArraySubscriptLSquare) && Style.SpacesInSquareBrackets &&
- Right.isNot(tok::r_square));
+ (Left.isOneOf(TT_ArraySubscriptLSquare,
+ TT_StructuredBindingLSquare) &&
+ Style.SpacesInSquareBrackets && Right.isNot(tok::r_square));
if (Right.is(tok::r_square))
return Right.MatchingParen &&
((Style.SpacesInContainerLiterals &&
Right.MatchingParen->is(TT_ArrayInitializerLSquare)) ||
(Style.SpacesInSquareBrackets &&
- Right.MatchingParen->is(TT_ArraySubscriptLSquare)));
+ Right.MatchingParen->isOneOf(TT_ArraySubscriptLSquare,
+ TT_StructuredBindingLSquare)));
if (Right.is(tok::l_square) &&
!Right.isOneOf(TT_ObjCMethodExpr, TT_LambdaLSquare,
- TT_DesignatedInitializerLSquare) &&
+ TT_DesignatedInitializerLSquare,
+ TT_StructuredBindingLSquare) &&
!Left.isOneOf(tok::numeric_constant, TT_DictLiteral))
return false;
if (Left.is(tok::l_brace) && Right.is(tok::r_brace))
@@ -2287,7 +2325,7 @@ bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line,
if (Left.is(TT_TemplateCloser) && Left.MatchingParen &&
Left.MatchingParen->Previous &&
Left.MatchingParen->Previous->is(tok::period))
- // A.<B>DoSomething();
+ // A.<B<C<...>>>DoSomething();
return false;
if (Left.is(TT_TemplateCloser) && Right.is(tok::l_square))
return false;
@@ -2317,8 +2355,8 @@ bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line,
if (Left.is(TT_JsFatArrow))
return true;
// for await ( ...
- if (Right.is(tok::l_paren) && Left.is(Keywords.kw_await) &&
- Left.Previous && Left.Previous->is(tok::kw_for))
+ if (Right.is(tok::l_paren) && Left.is(Keywords.kw_await) && Left.Previous &&
+ Left.Previous->is(tok::kw_for))
return true;
if (Left.is(Keywords.kw_async) && Right.is(tok::l_paren) &&
Right.MatchingParen) {
@@ -2341,18 +2379,31 @@ bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line,
Left.isOneOf(Keywords.kw_function, Keywords.kw_yield))
return false;
if (Right.isOneOf(tok::l_brace, tok::l_square) &&
- Left.isOneOf(Keywords.kw_function, Keywords.kw_yield))
+ Left.isOneOf(Keywords.kw_function, Keywords.kw_yield,
+ Keywords.kw_extends, Keywords.kw_implements))
return true;
- // JS methods can use some keywords as names (e.g. `delete()`).
- if (Right.is(tok::l_paren) && Line.MustBeDeclaration &&
- Left.Tok.getIdentifierInfo())
- return false;
+ if (Right.is(tok::l_paren)) {
+ // JS methods can use some keywords as names (e.g. `delete()`).
+ if (Line.MustBeDeclaration && Left.Tok.getIdentifierInfo())
+ return false;
+ // Valid JS method names can include keywords, e.g. `foo.delete()` or
+ // `bar.instanceof()`. Recognize call positions by preceding period.
+ if (Left.Previous && Left.Previous->is(tok::period) &&
+ Left.Tok.getIdentifierInfo())
+ return false;
+ // Additional unary JavaScript operators that need a space after.
+ if (Left.isOneOf(tok::kw_throw, Keywords.kw_await, Keywords.kw_typeof,
+ tok::kw_void))
+ return true;
+ }
if ((Left.isOneOf(Keywords.kw_let, Keywords.kw_var, Keywords.kw_in,
tok::kw_const) ||
// "of" is only a keyword if it appears after another identifier
- // (e.g. as "const x of y" in a for loop).
+ // (e.g. as "const x of y" in a for loop), or after a destructuring
+ // operation (const [x, y] of z, const {a, b} of c).
(Left.is(Keywords.kw_of) && Left.Previous &&
- Left.Previous->Tok.getIdentifierInfo())) &&
+ (Left.Previous->Tok.getIdentifierInfo() ||
+ Left.Previous->isOneOf(tok::r_square, tok::r_brace)))) &&
(!Left.Previous || !Left.Previous->is(tok::period)))
return true;
if (Left.isOneOf(tok::kw_for, Keywords.kw_as) && Left.Previous &&
@@ -2384,8 +2435,9 @@ bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line,
return false;
if (Right.is(TT_JsNonNullAssertion))
return false;
- if (Left.is(TT_JsNonNullAssertion) && Right.is(Keywords.kw_as))
- return true; // "x! as string"
+ if (Left.is(TT_JsNonNullAssertion) &&
+ Right.isOneOf(Keywords.kw_as, Keywords.kw_in))
+ return true; // "x! as string", "x! in y"
} else if (Style.Language == FormatStyle::LK_Java) {
if (Left.is(tok::r_square) && Right.is(tok::l_brace))
return true;
@@ -2464,9 +2516,18 @@ bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line,
return (Left.is(TT_TemplateOpener) &&
Style.Standard == FormatStyle::LS_Cpp03) ||
!(Left.isOneOf(tok::l_paren, tok::r_paren, tok::l_square,
- tok::kw___super, TT_TemplateCloser, TT_TemplateOpener));
+ tok::kw___super, TT_TemplateCloser,
+ TT_TemplateOpener));
if ((Left.is(TT_TemplateOpener)) != (Right.is(TT_TemplateCloser)))
return Style.SpacesInAngles;
+ // Space before TT_StructuredBindingLSquare.
+ if (Right.is(TT_StructuredBindingLSquare))
+ return !Left.isOneOf(tok::amp, tok::ampamp) ||
+ Style.PointerAlignment != FormatStyle::PAS_Right;
+ // Space before & or && following a TT_StructuredBindingLSquare.
+ if (Right.Next && Right.Next->is(TT_StructuredBindingLSquare) &&
+ Right.isOneOf(tok::amp, tok::ampamp))
+ return Style.PointerAlignment != FormatStyle::PAS_Left;
if ((Right.is(TT_BinaryOperator) && !Left.is(tok::l_paren)) ||
(Left.isOneOf(TT_BinaryOperator, TT_ConditionalExpr) &&
!Right.is(tok::r_paren)))
@@ -2516,7 +2577,9 @@ bool TokenAnnotator::mustBreakBefore(const AnnotatedLine &Line,
return true;
if (Left.is(tok::l_brace) && Line.Level == 0 &&
(Line.startsWith(tok::kw_enum) ||
- Line.startsWith(tok::kw_export, tok::kw_enum)))
+ Line.startsWith(tok::kw_const, tok::kw_enum) ||
+ Line.startsWith(tok::kw_export, tok::kw_enum) ||
+ Line.startsWith(tok::kw_export, tok::kw_const, tok::kw_enum)))
// JavaScript top-level enum key/value pairs are put on separate lines
// instead of bin-packing.
return true;
@@ -2587,19 +2650,16 @@ bool TokenAnnotator::mustBreakBefore(const AnnotatedLine &Line,
!Style.ConstructorInitializerAllOnOneLineOrOnePerLine)
return true;
// Break only if we have multiple inheritance.
- if (Style.BreakBeforeInheritanceComma &&
- Right.is(TT_InheritanceComma))
- return true;
+ if (Style.BreakBeforeInheritanceComma && Right.is(TT_InheritanceComma))
+ return true;
if (Right.is(tok::string_literal) && Right.TokenText.startswith("R\""))
// Raw string literals are special wrt. line breaks. The author has made a
// deliberate choice and might have aligned the contents of the string
// literal accordingly. Thus, we try keep existing line breaks.
return Right.NewlinesBefore > 0;
if ((Right.Previous->is(tok::l_brace) ||
- (Right.Previous->is(tok::less) &&
- Right.Previous->Previous &&
- Right.Previous->Previous->is(tok::equal))
- ) &&
+ (Right.Previous->is(tok::less) && Right.Previous->Previous &&
+ Right.Previous->Previous->is(tok::equal))) &&
Right.NestingLevel == 1 && Style.Language == FormatStyle::LK_Proto) {
// Don't put enums or option definitions onto single lines in protocol
// buffers.
@@ -2609,6 +2669,8 @@ bool TokenAnnotator::mustBreakBefore(const AnnotatedLine &Line,
return Right.HasUnescapedNewline;
if (isAllmanBrace(Left) || isAllmanBrace(Right))
return (Line.startsWith(tok::kw_enum) && Style.BraceWrapping.AfterEnum) ||
+ (Line.startsWith(tok::kw_typedef, tok::kw_enum) &&
+ Style.BraceWrapping.AfterEnum) ||
(Line.startsWith(tok::kw_class) && Style.BraceWrapping.AfterClass) ||
(Line.startsWith(tok::kw_struct) && Style.BraceWrapping.AfterStruct);
if (Left.is(TT_ObjCBlockLBrace) && !Style.AllowShortBlocksOnASingleLine)
@@ -2639,13 +2701,16 @@ bool TokenAnnotator::canBreakBefore(const AnnotatedLine &Line,
} else if (Style.Language == FormatStyle::LK_JavaScript) {
const FormatToken *NonComment = Right.getPreviousNonComment();
if (NonComment &&
- NonComment->isOneOf(tok::kw_return, tok::kw_continue, tok::kw_break,
- tok::kw_throw, Keywords.kw_interface,
- Keywords.kw_type, tok::kw_static, tok::kw_public,
- tok::kw_private, tok::kw_protected,
- Keywords.kw_readonly, Keywords.kw_abstract,
- Keywords.kw_get, Keywords.kw_set))
+ NonComment->isOneOf(
+ tok::kw_return, Keywords.kw_yield, tok::kw_continue, tok::kw_break,
+ tok::kw_throw, Keywords.kw_interface, Keywords.kw_type,
+ tok::kw_static, tok::kw_public, tok::kw_private, tok::kw_protected,
+ Keywords.kw_readonly, Keywords.kw_abstract, Keywords.kw_get,
+ Keywords.kw_set, Keywords.kw_async, Keywords.kw_await))
return false; // Otherwise automatic semicolon insertion would trigger.
+ if (Left.Tok.getIdentifierInfo() &&
+ Right.startsSequence(tok::l_square, tok::r_square))
+ return false; // breaking in "foo[]" creates illegal TS type syntax.
if (Left.is(TT_JsFatArrow) && Right.is(tok::l_brace))
return false;
if (Left.is(TT_JsTypeColon))
@@ -2702,8 +2767,7 @@ bool TokenAnnotator::canBreakBefore(const AnnotatedLine &Line,
// list.
return Left.BlockKind == BK_BracedInit ||
(Left.is(TT_CtorInitializerColon) &&
- Style.BreakConstructorInitializers ==
- FormatStyle::BCIS_AfterColon);
+ Style.BreakConstructorInitializers == FormatStyle::BCIS_AfterColon);
if (Left.is(tok::question) && Right.is(tok::colon))
return false;
if (Right.is(TT_ConditionalExpr) || Right.is(tok::question))
@@ -2820,7 +2884,7 @@ bool TokenAnnotator::canBreakBefore(const AnnotatedLine &Line,
}
void TokenAnnotator::printDebugInfo(const AnnotatedLine &Line) {
- llvm::errs() << "AnnotatedTokens:\n";
+ llvm::errs() << "AnnotatedTokens(L=" << Line.Level << "):\n";
const FormatToken *Tok = Line.First;
while (Tok) {
llvm::errs() << " M=" << Tok->MustBreakBefore
@@ -2828,10 +2892,9 @@ void TokenAnnotator::printDebugInfo(const AnnotatedLine &Line) {
<< " T=" << getTokenTypeName(Tok->Type)
<< " S=" << Tok->SpacesRequiredBefore
<< " B=" << Tok->BlockParameterCount
- << " BK=" << Tok->BlockKind
- << " P=" << Tok->SplitPenalty << " Name=" << Tok->Tok.getName()
- << " L=" << Tok->TotalLength << " PPK=" << Tok->PackingKind
- << " FakeLParens=";
+ << " BK=" << Tok->BlockKind << " P=" << Tok->SplitPenalty
+ << " Name=" << Tok->Tok.getName() << " L=" << Tok->TotalLength
+ << " PPK=" << Tok->PackingKind << " FakeLParens=";
for (unsigned i = 0, e = Tok->FakeLParens.size(); i != e; ++i)
llvm::errs() << Tok->FakeLParens[i] << "/";
llvm::errs() << " FakeRParens=" << Tok->FakeRParens;
diff --git a/lib/Format/TokenAnnotator.h b/lib/Format/TokenAnnotator.h
index 805509533bf9..04a18d45b82e 100644
--- a/lib/Format/TokenAnnotator.h
+++ b/lib/Format/TokenAnnotator.h
@@ -43,7 +43,8 @@ public:
InPPDirective(Line.InPPDirective),
MustBeDeclaration(Line.MustBeDeclaration), MightBeFunctionDecl(false),
IsMultiVariableDeclStmt(false), Affected(false),
- LeadingEmptyLinesAffected(false), ChildrenAffected(false) {
+ LeadingEmptyLinesAffected(false), ChildrenAffected(false),
+ FirstStartColumn(Line.FirstStartColumn) {
assert(!Line.Tokens.empty());
// Calculate Next and Previous for all tokens. Note that we must overwrite
@@ -127,6 +128,8 @@ public:
/// \c True if one of this line's children intersects with an input range.
bool ChildrenAffected;
+ unsigned FirstStartColumn;
+
private:
// Disallow copying.
AnnotatedLine(const AnnotatedLine &) = delete;
diff --git a/lib/Format/UnwrappedLineFormatter.cpp b/lib/Format/UnwrappedLineFormatter.cpp
index 2005a2822924..60dc1a7169d1 100644
--- a/lib/Format/UnwrappedLineFormatter.cpp
+++ b/lib/Format/UnwrappedLineFormatter.cpp
@@ -164,8 +164,7 @@ public:
return nullptr;
const AnnotatedLine *Current = *Next;
IndentTracker.nextLine(*Current);
- unsigned MergedLines =
- tryFitMultipleLinesInOne(IndentTracker, Next, End);
+ unsigned MergedLines = tryFitMultipleLinesInOne(IndentTracker, Next, End);
if (MergedLines > 0 && Style.ColumnLimit == 0)
// Disallow line merging if there is a break at the start of one of the
// input lines.
@@ -228,14 +227,16 @@ private:
if (Tok && Tok->getNamespaceToken())
return !Style.BraceWrapping.SplitEmptyNamespace && EmptyBlock
- ? tryMergeSimpleBlock(I, E, Limit) : 0;
+ ? tryMergeSimpleBlock(I, E, Limit)
+ : 0;
if (Tok && Tok->is(tok::kw_typedef))
Tok = Tok->getNextNonComment();
if (Tok && Tok->isOneOf(tok::kw_class, tok::kw_struct, tok::kw_union,
- Keywords.kw_interface))
+ tok::kw_extern, Keywords.kw_interface))
return !Style.BraceWrapping.SplitEmptyRecord && EmptyBlock
- ? tryMergeSimpleBlock(I, E, Limit) : 0;
+ ? tryMergeSimpleBlock(I, E, Limit)
+ : 0;
}
// FIXME: TheLine->Level != 0 might or might not be the right check to do.
@@ -279,15 +280,43 @@ private:
}
}
+ // Try to merge a function block with left brace unwrapped
if (TheLine->Last->is(TT_FunctionLBrace) &&
TheLine->First != TheLine->Last) {
return MergeShortFunctions ? tryMergeSimpleBlock(I, E, Limit) : 0;
}
+ // Try to merge a control statement block with left brace unwrapped
+ if (TheLine->Last->is(tok::l_brace) && TheLine->First != TheLine->Last &&
+ TheLine->First->isOneOf(tok::kw_if, tok::kw_while, tok::kw_for)) {
+ return Style.AllowShortBlocksOnASingleLine
+ ? tryMergeSimpleBlock(I, E, Limit)
+ : 0;
+ }
+ // Try to merge a control statement block with left brace wrapped
+ if (I[1]->First->is(tok::l_brace) &&
+ TheLine->First->isOneOf(tok::kw_if, tok::kw_while, tok::kw_for)) {
+ return Style.BraceWrapping.AfterControlStatement
+ ? tryMergeSimpleBlock(I, E, Limit)
+ : 0;
+ }
+ // Try to merge either empty or one-line block if is precedeed by control
+ // statement token
+ if (TheLine->First->is(tok::l_brace) && TheLine->First == TheLine->Last &&
+ I != AnnotatedLines.begin() &&
+ I[-1]->First->isOneOf(tok::kw_if, tok::kw_while, tok::kw_for)) {
+ return Style.AllowShortBlocksOnASingleLine
+ ? tryMergeSimpleBlock(I - 1, E, Limit)
+ : 0;
+ }
+ // Try to merge a block with left brace wrapped that wasn't yet covered
if (TheLine->Last->is(tok::l_brace)) {
- return !Style.BraceWrapping.AfterFunction
+ return !Style.BraceWrapping.AfterFunction ||
+ (I[1]->First->is(tok::r_brace) &&
+ !Style.BraceWrapping.SplitEmptyRecord)
? tryMergeSimpleBlock(I, E, Limit)
: 0;
}
+ // Try to merge a function block with left brace wrapped
if (I[1]->First->is(TT_FunctionLBrace) &&
Style.BraceWrapping.AfterFunction) {
if (I[1]->Last->is(TT_LineComment))
@@ -382,7 +411,9 @@ private:
return 0;
unsigned NumStmts = 0;
unsigned Length = 0;
+ bool EndsWithComment = false;
bool InPPDirective = I[0]->InPPDirective;
+ const unsigned Level = I[0]->Level;
for (; NumStmts < 3; ++NumStmts) {
if (I + 1 + NumStmts == E)
break;
@@ -392,9 +423,26 @@ private:
if (Line->First->isOneOf(tok::kw_case, tok::kw_default, tok::r_brace))
break;
if (Line->First->isOneOf(tok::kw_if, tok::kw_for, tok::kw_switch,
- tok::kw_while, tok::comment) ||
- Line->Last->is(tok::comment))
+ tok::kw_while) ||
+ EndsWithComment)
return 0;
+ if (Line->First->is(tok::comment)) {
+ if (Level != Line->Level)
+ return 0;
+ SmallVectorImpl<AnnotatedLine *>::const_iterator J = I + 2 + NumStmts;
+ for (; J != E; ++J) {
+ Line = *J;
+ if (Line->InPPDirective != InPPDirective)
+ break;
+ if (Line->First->isOneOf(tok::kw_case, tok::kw_default, tok::r_brace))
+ break;
+ if (Line->First->isNot(tok::comment) || Level != Line->Level)
+ return 0;
+ }
+ break;
+ }
+ if (Line->Last->is(tok::comment))
+ EndsWithComment = true;
Length += I[1 + NumStmts]->Last->TotalLength + 1; // 1 for the space.
}
if (NumStmts == 0 || NumStmts == 3 || Length > Limit)
@@ -425,11 +473,27 @@ private:
tok::kw_for, tok::r_brace, Keywords.kw___except)) {
if (!Style.AllowShortBlocksOnASingleLine)
return 0;
+ // Don't merge when we can't except the case when
+ // the control statement block is empty
if (!Style.AllowShortIfStatementsOnASingleLine &&
- Line.startsWith(tok::kw_if))
+ Line.startsWith(tok::kw_if) &&
+ !Style.BraceWrapping.AfterControlStatement &&
+ !I[1]->First->is(tok::r_brace))
+ return 0;
+ if (!Style.AllowShortIfStatementsOnASingleLine &&
+ Line.startsWith(tok::kw_if) &&
+ Style.BraceWrapping.AfterControlStatement && I + 2 != E &&
+ !I[2]->First->is(tok::r_brace))
+ return 0;
+ if (!Style.AllowShortLoopsOnASingleLine &&
+ Line.First->isOneOf(tok::kw_while, tok::kw_do, tok::kw_for) &&
+ !Style.BraceWrapping.AfterControlStatement &&
+ !I[1]->First->is(tok::r_brace))
return 0;
if (!Style.AllowShortLoopsOnASingleLine &&
- Line.First->isOneOf(tok::kw_while, tok::kw_do, tok::kw_for))
+ Line.First->isOneOf(tok::kw_while, tok::kw_do, tok::kw_for) &&
+ Style.BraceWrapping.AfterControlStatement && I + 2 != E &&
+ !I[2]->First->is(tok::r_brace))
return 0;
// FIXME: Consider an option to allow short exception handling clauses on
// a single line.
@@ -441,52 +505,78 @@ private:
return 0;
}
- FormatToken *Tok = I[1]->First;
- if (Tok->is(tok::r_brace) && !Tok->MustBreakBefore &&
- (Tok->getNextNonComment() == nullptr ||
- Tok->getNextNonComment()->is(tok::semi))) {
- // We merge empty blocks even if the line exceeds the column limit.
- Tok->SpacesRequiredBefore = 0;
- Tok->CanBreakBefore = true;
- return 1;
- } else if (Limit != 0 && !Line.startsWith(tok::kw_namespace) &&
- !startsExternCBlock(Line)) {
- // We don't merge short records.
- FormatToken *RecordTok =
- Line.First->is(tok::kw_typedef) ? Line.First->Next : Line.First;
- if (RecordTok &&
- RecordTok->isOneOf(tok::kw_class, tok::kw_union, tok::kw_struct,
- Keywords.kw_interface))
- return 0;
+ if (Line.Last->is(tok::l_brace)) {
+ FormatToken *Tok = I[1]->First;
+ if (Tok->is(tok::r_brace) && !Tok->MustBreakBefore &&
+ (Tok->getNextNonComment() == nullptr ||
+ Tok->getNextNonComment()->is(tok::semi))) {
+ // We merge empty blocks even if the line exceeds the column limit.
+ Tok->SpacesRequiredBefore = 0;
+ Tok->CanBreakBefore = true;
+ return 1;
+ } else if (Limit != 0 && !Line.startsWith(tok::kw_namespace) &&
+ !startsExternCBlock(Line)) {
+ // We don't merge short records.
+ FormatToken *RecordTok = Line.First;
+ // Skip record modifiers.
+ while (RecordTok->Next &&
+ RecordTok->isOneOf(tok::kw_typedef, tok::kw_export,
+ Keywords.kw_declare, Keywords.kw_abstract,
+ tok::kw_default))
+ RecordTok = RecordTok->Next;
+ if (RecordTok &&
+ RecordTok->isOneOf(tok::kw_class, tok::kw_union, tok::kw_struct,
+ Keywords.kw_interface))
+ return 0;
- // Check that we still have three lines and they fit into the limit.
- if (I + 2 == E || I[2]->Type == LT_Invalid)
- return 0;
- Limit = limitConsideringMacros(I + 2, E, Limit);
+ // Check that we still have three lines and they fit into the limit.
+ if (I + 2 == E || I[2]->Type == LT_Invalid)
+ return 0;
+ Limit = limitConsideringMacros(I + 2, E, Limit);
- if (!nextTwoLinesFitInto(I, Limit))
- return 0;
+ if (!nextTwoLinesFitInto(I, Limit))
+ return 0;
- // Second, check that the next line does not contain any braces - if it
- // does, readability declines when putting it into a single line.
- if (I[1]->Last->is(TT_LineComment))
- return 0;
- do {
- if (Tok->is(tok::l_brace) && Tok->BlockKind != BK_BracedInit)
+ // Second, check that the next line does not contain any braces - if it
+ // does, readability declines when putting it into a single line.
+ if (I[1]->Last->is(TT_LineComment))
+ return 0;
+ do {
+ if (Tok->is(tok::l_brace) && Tok->BlockKind != BK_BracedInit)
+ return 0;
+ Tok = Tok->Next;
+ } while (Tok);
+
+ // Last, check that the third line starts with a closing brace.
+ Tok = I[2]->First;
+ if (Tok->isNot(tok::r_brace))
return 0;
- Tok = Tok->Next;
- } while (Tok);
- // Last, check that the third line starts with a closing brace.
- Tok = I[2]->First;
- if (Tok->isNot(tok::r_brace))
- return 0;
+ // Don't merge "if (a) { .. } else {".
+ if (Tok->Next && Tok->Next->is(tok::kw_else))
+ return 0;
- // Don't merge "if (a) { .. } else {".
- if (Tok->Next && Tok->Next->is(tok::kw_else))
+ return 2;
+ }
+ } else if (I[1]->First->is(tok::l_brace)) {
+ if (I[1]->Last->is(TT_LineComment))
return 0;
- return 2;
+ // Check for Limit <= 2 to account for the " {".
+ if (Limit <= 2 || (Style.ColumnLimit == 0 && containsMustBreak(*I)))
+ return 0;
+ Limit -= 2;
+ unsigned MergedLines = 0;
+ if (Style.AllowShortBlocksOnASingleLine ||
+ (I[1]->First == I[1]->Last && I + 2 != E &&
+ I[2]->First->is(tok::r_brace))) {
+ MergedLines = tryMergeSimpleBlock(I + 1, E, Limit);
+ // If we managed to merge the block, count the statement header, which
+ // is on a separate line.
+ if (MergedLines > 0)
+ ++MergedLines;
+ }
+ return MergedLines;
}
return 0;
}
@@ -574,7 +664,9 @@ public:
/// \brief Formats an \c AnnotatedLine and returns the penalty.
///
/// If \p DryRun is \c false, directly applies the changes.
- virtual unsigned formatLine(const AnnotatedLine &Line, unsigned FirstIndent,
+ virtual unsigned formatLine(const AnnotatedLine &Line,
+ unsigned FirstIndent,
+ unsigned FirstStartColumn,
bool DryRun) = 0;
protected:
@@ -645,7 +737,8 @@ protected:
*Child->First, /*Newlines=*/0, /*Spaces=*/1,
/*StartOfTokenColumn=*/State.Column, State.Line->InPPDirective);
}
- Penalty += formatLine(*Child, State.Column + 1, DryRun);
+ Penalty +=
+ formatLine(*Child, State.Column + 1, /*FirstStartColumn=*/0, DryRun);
State.Column += 1 + Child->Last->TotalLength;
return true;
@@ -671,10 +764,10 @@ public:
/// \brief Formats the line, simply keeping all of the input's line breaking
/// decisions.
unsigned formatLine(const AnnotatedLine &Line, unsigned FirstIndent,
- bool DryRun) override {
+ unsigned FirstStartColumn, bool DryRun) override {
assert(!DryRun);
- LineState State =
- Indenter->getInitialState(FirstIndent, &Line, /*DryRun=*/false);
+ LineState State = Indenter->getInitialState(FirstIndent, FirstStartColumn,
+ &Line, /*DryRun=*/false);
while (State.NextToken) {
bool Newline =
Indenter->mustBreak(State) ||
@@ -697,9 +790,10 @@ public:
/// \brief Puts all tokens into a single line.
unsigned formatLine(const AnnotatedLine &Line, unsigned FirstIndent,
- bool DryRun) override {
+ unsigned FirstStartColumn, bool DryRun) override {
unsigned Penalty = 0;
- LineState State = Indenter->getInitialState(FirstIndent, &Line, DryRun);
+ LineState State =
+ Indenter->getInitialState(FirstIndent, FirstStartColumn, &Line, DryRun);
while (State.NextToken) {
formatChildren(State, /*Newline=*/false, DryRun, Penalty);
Indenter->addTokenToState(
@@ -721,8 +815,9 @@ public:
/// \brief Formats the line by finding the best line breaks with line lengths
/// below the column limit.
unsigned formatLine(const AnnotatedLine &Line, unsigned FirstIndent,
- bool DryRun) override {
- LineState State = Indenter->getInitialState(FirstIndent, &Line, DryRun);
+ unsigned FirstStartColumn, bool DryRun) override {
+ LineState State =
+ Indenter->getInitialState(FirstIndent, FirstStartColumn, &Line, DryRun);
// If the ObjC method declaration does not fit on a line, we should format
// it with one arg per line.
@@ -763,7 +858,8 @@ private:
/// \brief The BFS queue type.
typedef std::priority_queue<QueueItem, std::vector<QueueItem>,
- std::greater<QueueItem>> QueueType;
+ std::greater<QueueItem>>
+ QueueType;
/// \brief Analyze the entire solution space starting from \p InitialState.
///
@@ -888,7 +984,10 @@ private:
unsigned
UnwrappedLineFormatter::format(const SmallVectorImpl<AnnotatedLine *> &Lines,
bool DryRun, int AdditionalIndent,
- bool FixBadIndentation) {
+ bool FixBadIndentation,
+ unsigned FirstStartColumn,
+ unsigned NextStartColumn,
+ unsigned LastStartColumn) {
LineJoiner Joiner(Style, Keywords, Lines);
// Try to look up already computed penalty in DryRun-mode.
@@ -908,9 +1007,10 @@ UnwrappedLineFormatter::format(const SmallVectorImpl<AnnotatedLine *> &Lines,
// The minimum level of consecutive lines that have been formatted.
unsigned RangeMinLevel = UINT_MAX;
+ bool FirstLine = true;
for (const AnnotatedLine *Line =
Joiner.getNextMergedLine(DryRun, IndentTracker);
- Line; Line = NextLine) {
+ Line; Line = NextLine, FirstLine = false) {
const AnnotatedLine &TheLine = *Line;
unsigned Indent = IndentTracker.getIndent();
@@ -934,8 +1034,12 @@ UnwrappedLineFormatter::format(const SmallVectorImpl<AnnotatedLine *> &Lines,
}
if (ShouldFormat && TheLine.Type != LT_Invalid) {
- if (!DryRun)
- formatFirstToken(TheLine, PreviousLine, Indent);
+ if (!DryRun) {
+ bool LastLine = Line->First->is(tok::eof);
+ formatFirstToken(TheLine, PreviousLine,
+ Indent,
+ LastLine ? LastStartColumn : NextStartColumn + Indent);
+ }
NextLine = Joiner.getNextMergedLine(DryRun, IndentTracker);
unsigned ColumnLimit = getColumnLimit(TheLine.InPPDirective, NextLine);
@@ -944,16 +1048,18 @@ UnwrappedLineFormatter::format(const SmallVectorImpl<AnnotatedLine *> &Lines,
(TheLine.Type == LT_ImportStatement &&
(Style.Language != FormatStyle::LK_JavaScript ||
!Style.JavaScriptWrapImports));
-
if (Style.ColumnLimit == 0)
NoColumnLimitLineFormatter(Indenter, Whitespaces, Style, this)
- .formatLine(TheLine, Indent, DryRun);
+ .formatLine(TheLine, NextStartColumn + Indent,
+ FirstLine ? FirstStartColumn : 0, DryRun);
else if (FitsIntoOneLine)
Penalty += NoLineBreakFormatter(Indenter, Whitespaces, Style, this)
- .formatLine(TheLine, Indent, DryRun);
+ .formatLine(TheLine, NextStartColumn + Indent,
+ FirstLine ? FirstStartColumn : 0, DryRun);
else
Penalty += OptimizingLineFormatter(Indenter, Whitespaces, Style, this)
- .formatLine(TheLine, Indent, DryRun);
+ .formatLine(TheLine, NextStartColumn + Indent,
+ FirstLine ? FirstStartColumn : 0, DryRun);
RangeMinLevel = std::min(RangeMinLevel, TheLine.Level);
} else {
// If no token in the current line is affected, we still need to format
@@ -976,6 +1082,7 @@ UnwrappedLineFormatter::format(const SmallVectorImpl<AnnotatedLine *> &Lines,
// Format the first token.
if (ReformatLeadingWhitespace)
formatFirstToken(TheLine, PreviousLine,
+ TheLine.First->OriginalColumn,
TheLine.First->OriginalColumn);
else
Whitespaces->addUntouchableToken(*TheLine.First,
@@ -998,12 +1105,14 @@ UnwrappedLineFormatter::format(const SmallVectorImpl<AnnotatedLine *> &Lines,
void UnwrappedLineFormatter::formatFirstToken(const AnnotatedLine &Line,
const AnnotatedLine *PreviousLine,
- unsigned Indent) {
- FormatToken& RootToken = *Line.First;
+ unsigned Indent,
+ unsigned NewlineIndent) {
+ FormatToken &RootToken = *Line.First;
if (RootToken.is(tok::eof)) {
unsigned Newlines = std::min(RootToken.NewlinesBefore, 1u);
- Whitespaces->replaceWhitespace(RootToken, Newlines, /*Spaces=*/0,
- /*StartOfTokenColumn=*/0);
+ unsigned TokenIndent = Newlines ? NewlineIndent : 0;
+ Whitespaces->replaceWhitespace(RootToken, Newlines, TokenIndent,
+ TokenIndent);
return;
}
unsigned Newlines =
@@ -1013,6 +1122,9 @@ void UnwrappedLineFormatter::formatFirstToken(const AnnotatedLine &Line,
(!RootToken.Next ||
(RootToken.Next->is(tok::semi) && !RootToken.Next->Next)))
Newlines = std::min(Newlines, 1u);
+ // Remove empty lines at the start of nested blocks (lambdas/arrow functions)
+ if (PreviousLine == nullptr && Line.Level > 0)
+ Newlines = std::min(Newlines, 1u);
if (Newlines == 0 && !RootToken.IsFirst)
Newlines = 1;
if (RootToken.IsFirst && !RootToken.HasUnescapedNewline)
@@ -1035,6 +1147,13 @@ void UnwrappedLineFormatter::formatFirstToken(const AnnotatedLine &Line,
(!PreviousLine->InPPDirective || !RootToken.HasUnescapedNewline))
Newlines = std::min(1u, Newlines);
+ if (Newlines)
+ Indent = NewlineIndent;
+
+ // Preprocessor directives get indented after the hash, if indented.
+ if (Line.Type == LT_PreprocessorDirective || Line.Type == LT_ImportStatement)
+ Indent = 0;
+
Whitespaces->replaceWhitespace(RootToken, Newlines, Indent, Indent,
Line.InPPDirective &&
!RootToken.HasUnescapedNewline);
diff --git a/lib/Format/UnwrappedLineFormatter.h b/lib/Format/UnwrappedLineFormatter.h
index 55f0d1cac689..6432ca83a4c9 100644
--- a/lib/Format/UnwrappedLineFormatter.h
+++ b/lib/Format/UnwrappedLineFormatter.h
@@ -35,19 +35,22 @@ public:
const SourceManager &SourceMgr,
FormattingAttemptStatus *Status)
: Indenter(Indenter), Whitespaces(Whitespaces), Style(Style),
- Keywords(Keywords), SourceMgr(SourceMgr),
- Status(Status) {}
+ Keywords(Keywords), SourceMgr(SourceMgr), Status(Status) {}
/// \brief Format the current block and return the penalty.
unsigned format(const SmallVectorImpl<AnnotatedLine *> &Lines,
bool DryRun = false, int AdditionalIndent = 0,
- bool FixBadIndentation = false);
+ bool FixBadIndentation = false,
+ unsigned FirstStartColumn = 0,
+ unsigned NextStartColumn = 0,
+ unsigned LastStartColumn = 0);
private:
/// \brief Add a new line and the required indent before the first Token
/// of the \c UnwrappedLine if there was no structural parsing error.
void formatFirstToken(const AnnotatedLine &Line,
- const AnnotatedLine *PreviousLine, unsigned Indent);
+ const AnnotatedLine *PreviousLine, unsigned Indent,
+ unsigned NewlineIndent);
/// \brief Returns the column limit for a line, taking into account whether we
/// need an escaped newline due to a continued preprocessor directive.
diff --git a/lib/Format/UnwrappedLineParser.cpp b/lib/Format/UnwrappedLineParser.cpp
index faac5a371c26..b8608dcac9c7 100644
--- a/lib/Format/UnwrappedLineParser.cpp
+++ b/lib/Format/UnwrappedLineParser.cpp
@@ -18,6 +18,8 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+
#define DEBUG_TYPE "format-parser"
namespace clang {
@@ -56,8 +58,7 @@ private:
};
static bool isLineComment(const FormatToken &FormatTok) {
- return FormatTok.is(tok::comment) &&
- FormatTok.TokenText.startswith("//");
+ return FormatTok.is(tok::comment) && !FormatTok.TokenText.startswith("/*");
}
// Checks if \p FormatTok is a line comment that continues the line comment
@@ -226,15 +227,21 @@ private:
UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style,
const AdditionalKeywords &Keywords,
+ unsigned FirstStartColumn,
ArrayRef<FormatToken *> Tokens,
UnwrappedLineConsumer &Callback)
: Line(new UnwrappedLine), MustBreakBeforeNextToken(false),
CurrentLines(&Lines), Style(Style), Keywords(Keywords),
CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr),
- Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1) {}
+ Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1),
+ IfNdefCondition(nullptr), FoundIncludeGuardStart(false),
+ IncludeGuardRejected(false), FirstStartColumn(FirstStartColumn) {}
void UnwrappedLineParser::reset() {
PPBranchLevel = -1;
+ IfNdefCondition = nullptr;
+ FoundIncludeGuardStart = false;
+ IncludeGuardRejected = false;
Line.reset(new UnwrappedLine);
CommentsBeforeNextToken.clear();
FormatTok = nullptr;
@@ -243,10 +250,12 @@ void UnwrappedLineParser::reset() {
CurrentLines = &Lines;
DeclarationScopeStack.clear();
PPStack.clear();
+ Line->FirstStartColumn = FirstStartColumn;
}
void UnwrappedLineParser::parse() {
IndexedTokenSource TokenSource(AllTokens);
+ Line->FirstStartColumn = FirstStartColumn;
do {
DEBUG(llvm::dbgs() << "----\n");
reset();
@@ -326,6 +335,12 @@ void UnwrappedLineParser::parseLevel(bool HasOpeningBrace) {
break;
case tok::kw_default:
case tok::kw_case:
+ if (Style.Language == FormatStyle::LK_JavaScript &&
+ Line->MustBeDeclaration) {
+ // A 'case: string' style field declaration.
+ parseStructuralElement();
+ break;
+ }
if (!SwitchLabelEncountered &&
(Style.IndentCaseLabels || (Line->InPPDirective && Line->Level == 1)))
++Line->Level;
@@ -346,7 +361,7 @@ void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) {
// definitions, too.
unsigned StoredPosition = Tokens->getPosition();
FormatToken *Tok = FormatTok;
- const FormatToken *PrevTok = getPreviousToken();
+ const FormatToken *PrevTok = Tok->Previous;
// Keep a stack of positions of lbrace tokens. We will
// update information about whether an lbrace starts a
// braced init list or a different block during the loop.
@@ -364,13 +379,16 @@ void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) {
switch (Tok->Tok.getKind()) {
case tok::l_brace:
if (Style.Language == FormatStyle::LK_JavaScript && PrevTok) {
- if (PrevTok->is(tok::colon))
- // A colon indicates this code is in a type, or a braced list
- // following a label in an object literal ({a: {b: 1}}). The code
- // below could be confused by semicolons between the individual
- // members in a type member list, which would normally trigger
- // BK_Block. In both cases, this must be parsed as an inline braced
- // init.
+ if (PrevTok->isOneOf(tok::colon, tok::less))
+ // A ':' indicates this code is in a type, or a braced list
+ // following a label in an object literal ({a: {b: 1}}).
+ // A '<' could be an object used in a comparison, but that is nonsense
+ // code (can never return true), so more likely it is a generic type
+ // argument (`X<{a: string; b: number}>`).
+ // The code below could be confused by semicolons between the
+ // individual members in a type member list, which would normally
+ // trigger BK_Block. In both cases, this must be parsed as an inline
+ // braced init.
Tok->BlockKind = BK_BracedInit;
else if (PrevTok->is(tok::r_paren))
// `) { }` can only occur in function or method declarations in JS.
@@ -452,6 +470,21 @@ void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) {
FormatTok = Tokens->setPosition(StoredPosition);
}
+template <class T>
+static inline void hash_combine(std::size_t &seed, const T &v) {
+ std::hash<T> hasher;
+ seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
+}
+
+size_t UnwrappedLineParser::computePPHash() const {
+ size_t h = 0;
+ for (const auto &i : PPStack) {
+ hash_combine(h, size_t(i.Kind));
+ hash_combine(h, i.Line);
+ }
+ return h;
+}
+
void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, bool AddLevel,
bool MunchSemi) {
assert(FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) &&
@@ -459,16 +492,21 @@ void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, bool AddLevel,
const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin);
FormatTok->BlockKind = BK_Block;
+ size_t PPStartHash = computePPHash();
+
unsigned InitialLevel = Line->Level;
- nextToken();
+ nextToken(/*LevelDifference=*/AddLevel ? 1 : 0);
if (MacroBlock && FormatTok->is(tok::l_paren))
parseParens();
+ size_t NbPreprocessorDirectives =
+ CurrentLines == &Lines ? PreprocessorDirectives.size() : 0;
addUnwrappedLine();
- size_t OpeningLineIndex = CurrentLines->empty()
- ? (UnwrappedLine::kInvalidIndex)
- : (CurrentLines->size() - 1);
+ size_t OpeningLineIndex =
+ CurrentLines->empty()
+ ? (UnwrappedLine::kInvalidIndex)
+ : (CurrentLines->size() - 1 - NbPreprocessorDirectives);
ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
MustBeDeclaration);
@@ -486,7 +524,10 @@ void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, bool AddLevel,
return;
}
- nextToken(); // Munch the closing brace.
+ size_t PPEndHash = computePPHash();
+
+ // Munch the closing brace.
+ nextToken(/*LevelDifference=*/AddLevel ? -1 : 0);
if (MacroBlock && FormatTok->is(tok::l_paren))
parseParens();
@@ -494,11 +535,14 @@ void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, bool AddLevel,
if (MunchSemi && FormatTok->Tok.is(tok::semi))
nextToken();
Line->Level = InitialLevel;
- Line->MatchingOpeningBlockLineIndex = OpeningLineIndex;
- if (OpeningLineIndex != UnwrappedLine::kInvalidIndex) {
- // Update the opening line to add the forward reference as well
- (*CurrentLines)[OpeningLineIndex].MatchingOpeningBlockLineIndex =
- CurrentLines->size() - 1;
+
+ if (PPStartHash == PPEndHash) {
+ Line->MatchingOpeningBlockLineIndex = OpeningLineIndex;
+ if (OpeningLineIndex != UnwrappedLine::kInvalidIndex) {
+ // Update the opening line to add the forward reference as well
+ (*CurrentLines)[OpeningLineIndex].MatchingOpeningBlockLineIndex =
+ CurrentLines->size() - 1;
+ }
}
}
@@ -555,9 +599,8 @@ void UnwrappedLineParser::parseChildBlock() {
FormatTok->BlockKind = BK_Block;
nextToken();
{
- bool SkipIndent =
- (Style.Language == FormatStyle::LK_JavaScript &&
- (isGoogScope(*Line) || isIIFE(*Line, Keywords)));
+ bool SkipIndent = (Style.Language == FormatStyle::LK_JavaScript &&
+ (isGoogScope(*Line) || isIIFE(*Line, Keywords)));
ScopedLineState LineState(*this);
ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
/*MustBeDeclaration=*/false);
@@ -606,10 +649,15 @@ void UnwrappedLineParser::parsePPDirective() {
}
void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) {
- if (Unreachable || (!PPStack.empty() && PPStack.back() == PP_Unreachable))
- PPStack.push_back(PP_Unreachable);
+ size_t Line = CurrentLines->size();
+ if (CurrentLines == &PreprocessorDirectives)
+ Line += Lines.size();
+
+ if (Unreachable ||
+ (!PPStack.empty() && PPStack.back().Kind == PP_Unreachable))
+ PPStack.push_back({PP_Unreachable, Line});
else
- PPStack.push_back(PP_Conditional);
+ PPStack.push_back({PP_Conditional, Line});
}
void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) {
@@ -643,7 +691,7 @@ void UnwrappedLineParser::conditionalCompilationEnd() {
}
}
// Guard against #endif's without #if.
- if (PPBranchLevel > 0)
+ if (PPBranchLevel > -1)
--PPBranchLevel;
if (!PPChainBranchIndex.empty())
PPChainBranchIndex.pop();
@@ -660,12 +708,35 @@ void UnwrappedLineParser::parsePPIf(bool IfDef) {
if (IfDef && !IfNDef && FormatTok->TokenText == "SWIG")
Unreachable = true;
conditionalCompilationStart(Unreachable);
+ FormatToken *IfCondition = FormatTok;
+ // If there's a #ifndef on the first line, and the only lines before it are
+ // comments, it could be an include guard.
+ bool MaybeIncludeGuard = IfNDef;
+ if (!IncludeGuardRejected && !FoundIncludeGuardStart && MaybeIncludeGuard) {
+ for (auto &Line : Lines) {
+ if (!Line.Tokens.front().Tok->is(tok::comment)) {
+ MaybeIncludeGuard = false;
+ IncludeGuardRejected = true;
+ break;
+ }
+ }
+ }
+ --PPBranchLevel;
parsePPUnknown();
+ ++PPBranchLevel;
+ if (!IncludeGuardRejected && !FoundIncludeGuardStart && MaybeIncludeGuard)
+ IfNdefCondition = IfCondition;
}
void UnwrappedLineParser::parsePPElse() {
+ // If a potential include guard has an #else, it's not an include guard.
+ if (FoundIncludeGuardStart && PPBranchLevel == 0)
+ FoundIncludeGuardStart = false;
conditionalCompilationAlternative();
+ if (PPBranchLevel > -1)
+ --PPBranchLevel;
parsePPUnknown();
+ ++PPBranchLevel;
}
void UnwrappedLineParser::parsePPElIf() { parsePPElse(); }
@@ -673,6 +744,16 @@ void UnwrappedLineParser::parsePPElIf() { parsePPElse(); }
void UnwrappedLineParser::parsePPEndIf() {
conditionalCompilationEnd();
parsePPUnknown();
+ // If the #endif of a potential include guard is the last thing in the file,
+ // then we count it as a real include guard and subtract one from every
+ // preprocessor indent.
+ unsigned TokenPosition = Tokens->getPosition();
+ FormatToken *PeekNext = AllTokens[TokenPosition];
+ if (FoundIncludeGuardStart && PPBranchLevel == -1 && PeekNext->is(tok::eof) &&
+ Style.IndentPPDirectives != FormatStyle::PPDIS_None)
+ for (auto &Line : Lines)
+ if (Line.InPPDirective && Line.Level > 0)
+ --Line.Level;
}
void UnwrappedLineParser::parsePPDefine() {
@@ -682,14 +763,26 @@ void UnwrappedLineParser::parsePPDefine() {
parsePPUnknown();
return;
}
+ if (IfNdefCondition && IfNdefCondition->TokenText == FormatTok->TokenText) {
+ FoundIncludeGuardStart = true;
+ for (auto &Line : Lines) {
+ if (!Line.Tokens.front().Tok->isOneOf(tok::comment, tok::hash)) {
+ FoundIncludeGuardStart = false;
+ break;
+ }
+ }
+ }
+ IfNdefCondition = nullptr;
nextToken();
if (FormatTok->Tok.getKind() == tok::l_paren &&
FormatTok->WhitespaceRange.getBegin() ==
FormatTok->WhitespaceRange.getEnd()) {
parseParens();
}
+ if (Style.IndentPPDirectives == FormatStyle::PPDIS_AfterHash)
+ Line->Level += PPBranchLevel + 1;
addUnwrappedLine();
- Line->Level = 1;
+ ++Line->Level;
// Errors during a preprocessor directive can only affect the layout of the
// preprocessor directive, and thus we ignore them. An alternative approach
@@ -703,7 +796,10 @@ void UnwrappedLineParser::parsePPUnknown() {
do {
nextToken();
} while (!eof());
+ if (Style.IndentPPDirectives == FormatStyle::PPDIS_AfterHash)
+ Line->Level += PPBranchLevel + 1;
addUnwrappedLine();
+ IfNdefCondition = nullptr;
}
// Here we blacklist certain tokens that are not usually the first token in an
@@ -746,8 +842,8 @@ static bool mustBeJSIdent(const AdditionalKeywords &Keywords,
Keywords.kw_function, Keywords.kw_import, Keywords.kw_is,
Keywords.kw_let, Keywords.kw_var, tok::kw_const,
Keywords.kw_abstract, Keywords.kw_extends, Keywords.kw_implements,
- Keywords.kw_instanceof, Keywords.kw_interface,
- Keywords.kw_throws, Keywords.kw_from));
+ Keywords.kw_instanceof, Keywords.kw_interface, Keywords.kw_throws,
+ Keywords.kw_from));
}
static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords,
@@ -800,11 +896,14 @@ void UnwrappedLineParser::readTokenWithJavaScriptASI() {
bool PreviousMustBeValue = mustBeJSIdentOrValue(Keywords, Previous);
bool PreviousStartsTemplateExpr =
Previous->is(TT_TemplateString) && Previous->TokenText.endswith("${");
- if (PreviousMustBeValue && Line && Line->Tokens.size() > 1) {
- // If the token before the previous one is an '@', the previous token is an
- // annotation and can precede another identifier/value.
- const FormatToken *PrePrevious = std::prev(Line->Tokens.end(), 2)->Tok;
- if (PrePrevious->is(tok::at))
+ if (PreviousMustBeValue || Previous->is(tok::r_paren)) {
+ // If the line contains an '@' sign, the previous token might be an
+ // annotation, which can precede another identifier/value.
+ bool HasAt = std::find_if(Line->Tokens.begin(), Line->Tokens.end(),
+ [](UnwrappedLineNode &LineNode) {
+ return LineNode.Tok->is(tok::at);
+ }) != Line->Tokens.end();
+ if (HasAt)
return;
}
if (Next->is(tok::exclaim) && PreviousMustBeValue)
@@ -817,7 +916,8 @@ void UnwrappedLineParser::readTokenWithJavaScriptASI() {
Previous->isOneOf(tok::r_square, tok::r_paren, tok::plusplus,
tok::minusminus)))
return addUnwrappedLine();
- if (PreviousMustBeValue && isJSDeclOrStmt(Keywords, Next))
+ if ((PreviousMustBeValue || Previous->is(tok::r_paren)) &&
+ isJSDeclOrStmt(Keywords, Next))
return addUnwrappedLine();
}
@@ -922,13 +1022,22 @@ void UnwrappedLineParser::parseStructuralElement() {
parseDoWhile();
return;
case tok::kw_switch:
+ if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
+ // 'switch: string' field declaration.
+ break;
parseSwitch();
return;
case tok::kw_default:
+ if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
+ // 'default: string' field declaration.
+ break;
nextToken();
parseLabel();
return;
case tok::kw_case:
+ if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
+ // 'case: string' field declaration.
+ break;
parseCaseLabel();
return;
case tok::kw_try:
@@ -940,7 +1049,12 @@ void UnwrappedLineParser::parseStructuralElement() {
if (FormatTok->Tok.is(tok::string_literal)) {
nextToken();
if (FormatTok->Tok.is(tok::l_brace)) {
- parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/false);
+ if (Style.BraceWrapping.AfterExternBlock) {
+ addUnwrappedLine();
+ parseBlock(/*MustBeDeclaration=*/true);
+ } else {
+ parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/false);
+ }
addUnwrappedLine();
return;
}
@@ -996,7 +1110,7 @@ void UnwrappedLineParser::parseStructuralElement() {
break;
}
do {
- const FormatToken *Previous = getPreviousToken();
+ const FormatToken *Previous = FormatTok->Previous;
switch (FormatTok->Tok.getKind()) {
case tok::at:
nextToken();
@@ -1186,7 +1300,7 @@ void UnwrappedLineParser::parseStructuralElement() {
nextToken();
parseBracedList();
} else if (Style.Language == FormatStyle::LK_Proto &&
- FormatTok->Tok.is(tok::less)) {
+ FormatTok->Tok.is(tok::less)) {
nextToken();
parseBracedList(/*ContinueOnSemicolons=*/false,
/*ClosingBraceKind=*/tok::greater);
@@ -1210,14 +1324,6 @@ bool UnwrappedLineParser::tryToParseLambda() {
nextToken();
return false;
}
- const FormatToken* Previous = getPreviousToken();
- if (Previous &&
- (Previous->isOneOf(tok::identifier, tok::kw_operator, tok::kw_new,
- tok::kw_delete) ||
- Previous->closesScope() || Previous->isSimpleTypeSpecifier())) {
- nextToken();
- return false;
- }
assert(FormatTok->is(tok::l_square));
FormatToken &LSquare = *FormatTok;
if (!tryToParseLambdaIntroducer())
@@ -1260,49 +1366,18 @@ bool UnwrappedLineParser::tryToParseLambda() {
}
bool UnwrappedLineParser::tryToParseLambdaIntroducer() {
- nextToken();
- if (FormatTok->is(tok::equal)) {
- nextToken();
- if (FormatTok->is(tok::r_square)) {
- nextToken();
- return true;
- }
- if (FormatTok->isNot(tok::comma))
- return false;
- nextToken();
- } else if (FormatTok->is(tok::amp)) {
- nextToken();
- if (FormatTok->is(tok::r_square)) {
- nextToken();
- return true;
- }
- if (!FormatTok->isOneOf(tok::comma, tok::identifier)) {
- return false;
- }
- if (FormatTok->is(tok::comma))
- nextToken();
- } else if (FormatTok->is(tok::r_square)) {
+ const FormatToken *Previous = FormatTok->Previous;
+ if (Previous &&
+ (Previous->isOneOf(tok::identifier, tok::kw_operator, tok::kw_new,
+ tok::kw_delete) ||
+ FormatTok->isCppStructuredBinding(Style) || Previous->closesScope() ||
+ Previous->isSimpleTypeSpecifier())) {
nextToken();
- return true;
+ return false;
}
- do {
- if (FormatTok->is(tok::amp))
- nextToken();
- if (!FormatTok->isOneOf(tok::identifier, tok::kw_this))
- return false;
- nextToken();
- if (FormatTok->is(tok::ellipsis))
- nextToken();
- if (FormatTok->is(tok::comma)) {
- nextToken();
- } else if (FormatTok->is(tok::r_square)) {
- nextToken();
- return true;
- } else {
- return false;
- }
- } while (!eof());
- return false;
+ nextToken();
+ parseSquare(/*LambdaIntroducer=*/true);
+ return true;
}
void UnwrappedLineParser::tryToParseJSFunction() {
@@ -1419,6 +1494,15 @@ bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons,
nextToken();
parseBracedList();
break;
+ case tok::less:
+ if (Style.Language == FormatStyle::LK_Proto) {
+ nextToken();
+ parseBracedList(/*ContinueOnSemicolons=*/false,
+ /*ClosingBraceKind=*/tok::greater);
+ } else {
+ nextToken();
+ }
+ break;
case tok::semi:
// JavaScript (or more precisely TypeScript) can have semicolons in braced
// lists (in so-called TypeMemberLists). Thus, the semicolon cannot be
@@ -1495,10 +1579,12 @@ void UnwrappedLineParser::parseParens() {
} while (!eof());
}
-void UnwrappedLineParser::parseSquare() {
- assert(FormatTok->Tok.is(tok::l_square) && "'[' expected.");
- if (tryToParseLambda())
- return;
+void UnwrappedLineParser::parseSquare(bool LambdaIntroducer) {
+ if (!LambdaIntroducer) {
+ assert(FormatTok->Tok.is(tok::l_square) && "'[' expected.");
+ if (tryToParseLambda())
+ return;
+ }
do {
switch (FormatTok->Tok.getKind()) {
case tok::l_paren:
@@ -1939,6 +2025,17 @@ void UnwrappedLineParser::parseRecord(bool ParseAsExpr) {
((Style.Language == FormatStyle::LK_Java ||
Style.Language == FormatStyle::LK_JavaScript) &&
FormatTok->isOneOf(tok::period, tok::comma))) {
+ if (Style.Language == FormatStyle::LK_JavaScript &&
+ FormatTok->isOneOf(Keywords.kw_extends, Keywords.kw_implements)) {
+ // JavaScript/TypeScript supports inline object types in
+ // extends/implements positions:
+ // class Foo implements {bar: number} { }
+ nextToken();
+ if (FormatTok->is(tok::l_brace)) {
+ tryToParseBracedList();
+ continue;
+ }
+ }
bool IsNonMacroIdentifier =
FormatTok->is(tok::identifier) &&
FormatTok->TokenText != FormatTok->TokenText.upper();
@@ -2090,7 +2187,7 @@ void UnwrappedLineParser::parseJavaScriptEs6ImportExport() {
while (!eof()) {
if (FormatTok->is(tok::semi))
return;
- if (Line->Tokens.size() == 0) {
+ if (Line->Tokens.empty()) {
// Common issue: Automatic Semicolon Insertion wrapped the line, so the
// import statement should terminate.
return;
@@ -2107,14 +2204,15 @@ void UnwrappedLineParser::parseJavaScriptEs6ImportExport() {
LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line,
StringRef Prefix = "") {
- llvm::dbgs() << Prefix << "Line(" << Line.Level << ")"
+ llvm::dbgs() << Prefix << "Line(" << Line.Level
+ << ", FSC=" << Line.FirstStartColumn << ")"
<< (Line.InPPDirective ? " MACRO" : "") << ": ";
for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
E = Line.Tokens.end();
I != E; ++I) {
llvm::dbgs() << I->Tok->Tok.getName() << "["
- << "T=" << I->Tok->Type
- << ", OC=" << I->Tok->OriginalColumn << "] ";
+ << "T=" << I->Tok->Type << ", OC=" << I->Tok->OriginalColumn
+ << "] ";
}
for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
E = Line.Tokens.end();
@@ -2140,12 +2238,15 @@ void UnwrappedLineParser::addUnwrappedLine() {
CurrentLines->push_back(std::move(*Line));
Line->Tokens.clear();
Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex;
+ Line->FirstStartColumn = 0;
if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) {
CurrentLines->append(
std::make_move_iterator(PreprocessorDirectives.begin()),
std::make_move_iterator(PreprocessorDirectives.end()));
PreprocessorDirectives.clear();
}
+ // Disconnect the current token from the last token on the previous line.
+ FormatTok->Previous = nullptr;
}
bool UnwrappedLineParser::eof() const { return FormatTok->Tok.is(tok::eof); }
@@ -2287,23 +2388,17 @@ void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) {
CommentsBeforeNextToken.clear();
}
-void UnwrappedLineParser::nextToken() {
+void UnwrappedLineParser::nextToken(int LevelDifference) {
if (eof())
return;
flushComments(isOnNewLine(*FormatTok));
pushToken(FormatTok);
+ FormatToken *Previous = FormatTok;
if (Style.Language != FormatStyle::LK_JavaScript)
- readToken();
+ readToken(LevelDifference);
else
readTokenWithJavaScriptASI();
-}
-
-const FormatToken *UnwrappedLineParser::getPreviousToken() {
- // FIXME: This is a dirty way to access the previous token. Find a better
- // solution.
- if (!Line || Line->Tokens.empty())
- return nullptr;
- return Line->Tokens.back().Tok;
+ FormatTok->Previous = Previous;
}
void UnwrappedLineParser::distributeComments(
@@ -2343,8 +2438,7 @@ void UnwrappedLineParser::distributeComments(
}
for (unsigned i = 0, e = Comments.size(); i < e; ++i) {
FormatToken *FormatTok = Comments[i];
- if (HasTrailAlignedWithNextToken &&
- i == StartOfTrailAlignedWithNextToken) {
+ if (HasTrailAlignedWithNextToken && i == StartOfTrailAlignedWithNextToken) {
FormatTok->ContinuesLineCommentSection = false;
} else {
FormatTok->ContinuesLineCommentSection =
@@ -2362,7 +2456,7 @@ void UnwrappedLineParser::distributeComments(
}
}
-void UnwrappedLineParser::readToken() {
+void UnwrappedLineParser::readToken(int LevelDifference) {
SmallVector<FormatToken *, 1> Comments;
do {
FormatTok = Tokens->getNextToken();
@@ -2375,6 +2469,10 @@ void UnwrappedLineParser::readToken() {
// directives only after that unwrapped line was finished later.
bool SwitchToPreprocessorLines = !Line->Tokens.empty();
ScopedLineState BlockState(*this, SwitchToPreprocessorLines);
+ assert((LevelDifference >= 0 ||
+ static_cast<unsigned>(-LevelDifference) <= Line->Level) &&
+ "LevelDifference makes Line->Level negative");
+ Line->Level += LevelDifference;
// Comments stored before the preprocessor directive need to be output
// before the preprocessor directive, at the same level as the
// preprocessor directive, as we consider them to apply to the directive.
@@ -2395,7 +2493,7 @@ void UnwrappedLineParser::readToken() {
FormatTok->MustBreakBefore = true;
}
- if (!PPStack.empty() && (PPStack.back() == PP_Unreachable) &&
+ if (!PPStack.empty() && (PPStack.back().Kind == PP_Unreachable) &&
!Line->InPPDirective) {
continue;
}
diff --git a/lib/Format/UnwrappedLineParser.h b/lib/Format/UnwrappedLineParser.h
index a2aa2f006728..1d8ccabbd0f8 100644
--- a/lib/Format/UnwrappedLineParser.h
+++ b/lib/Format/UnwrappedLineParser.h
@@ -56,6 +56,8 @@ struct UnwrappedLine {
size_t MatchingOpeningBlockLineIndex;
static const size_t kInvalidIndex = -1;
+
+ unsigned FirstStartColumn = 0;
};
class UnwrappedLineConsumer {
@@ -71,6 +73,7 @@ class UnwrappedLineParser {
public:
UnwrappedLineParser(const FormatStyle &Style,
const AdditionalKeywords &Keywords,
+ unsigned FirstStartColumn,
ArrayRef<FormatToken *> Tokens,
UnwrappedLineConsumer &Callback);
@@ -96,7 +99,7 @@ private:
bool parseBracedList(bool ContinueOnSemicolons = false,
tok::TokenKind ClosingBraceKind = tok::r_brace);
void parseParens();
- void parseSquare();
+ void parseSquare(bool LambdaIntroducer = false);
void parseIfThenElse();
void parseTryCatch();
void parseForOrWhileLoop();
@@ -123,9 +126,12 @@ private:
void tryToParseJSFunction();
void addUnwrappedLine();
bool eof() const;
- void nextToken();
- const FormatToken *getPreviousToken();
- void readToken();
+ // LevelDifference is the difference of levels after and before the current
+ // token. For example:
+ // - if the token is '{' and opens a block, LevelDifference is 1.
+ // - if the token is '}' and closes a block, LevelDifference is -1.
+ void nextToken(int LevelDifference = 0);
+ void readToken(int LevelDifference = 0);
// Decides which comment tokens should be added to the current line and which
// should be added as comments before the next token.
@@ -156,6 +162,11 @@ private:
bool isOnNewLine(const FormatToken &FormatTok);
+ // Compute hash of the current preprocessor branch.
+ // This is used to identify the different branches, and thus track if block
+ // open and close in the same branch.
+ size_t computePPHash() const;
+
// FIXME: We are constantly running into bugs where Line.Level is incorrectly
// subtracted from beyond 0. Introduce a method to subtract from Line.Level
// and use that everywhere in the Parser.
@@ -174,7 +185,7 @@ private:
// Preprocessor directives are parsed out-of-order from other unwrapped lines.
// Thus, we need to keep a list of preprocessor directives to be reported
- // after an unwarpped line that has been started was finished.
+ // after an unwrapped line that has been started was finished.
SmallVector<UnwrappedLine, 4> PreprocessorDirectives;
// New unwrapped lines are added via CurrentLines.
@@ -207,8 +218,14 @@ private:
PP_Unreachable // #if 0 or a conditional preprocessor block inside #if 0
};
+ struct PPBranch {
+ PPBranch(PPBranchKind Kind, size_t Line) : Kind(Kind), Line(Line) {}
+ PPBranchKind Kind;
+ size_t Line;
+ };
+
// Keeps a stack of currently active preprocessor branching directives.
- SmallVector<PPBranchKind, 16> PPStack;
+ SmallVector<PPBranch, 16> PPStack;
// The \c UnwrappedLineParser re-parses the code for each combination
// of preprocessor branches that can be taken.
@@ -231,6 +248,15 @@ private:
// sequence.
std::stack<int> PPChainBranchIndex;
+ // Contains the #ifndef condition for a potential include guard.
+ FormatToken *IfNdefCondition;
+ bool FoundIncludeGuardStart;
+ bool IncludeGuardRejected;
+ // Contains the first start column where the source begins. This is zero for
+ // normal source code and may be nonzero when formatting a code fragment that
+ // does not start at the beginning of the file.
+ unsigned FirstStartColumn;
+
friend class ScopedLineState;
friend class CompoundStatementIndenter;
};
@@ -243,8 +269,9 @@ struct UnwrappedLineNode {
SmallVector<UnwrappedLine, 0> Children;
};
-inline UnwrappedLine::UnwrappedLine() : Level(0), InPPDirective(false),
- MustBeDeclaration(false), MatchingOpeningBlockLineIndex(kInvalidIndex) {}
+inline UnwrappedLine::UnwrappedLine()
+ : Level(0), InPPDirective(false), MustBeDeclaration(false),
+ MatchingOpeningBlockLineIndex(kInvalidIndex) {}
} // end namespace format
} // end namespace clang
diff --git a/lib/Format/UsingDeclarationsSorter.cpp b/lib/Format/UsingDeclarationsSorter.cpp
index fb4f59fbc9bc..ef0c7a7d5a45 100644
--- a/lib/Format/UsingDeclarationsSorter.cpp
+++ b/lib/Format/UsingDeclarationsSorter.cpp
@@ -26,6 +26,45 @@ namespace format {
namespace {
+// The order of using declaration is defined as follows:
+// Split the strings by "::" and discard any initial empty strings. The last
+// element of each list is a non-namespace name; all others are namespace
+// names. Sort the lists of names lexicographically, where the sort order of
+// individual names is that all non-namespace names come before all namespace
+// names, and within those groups, names are in case-insensitive lexicographic
+// order.
+int compareLabels(StringRef A, StringRef B) {
+ SmallVector<StringRef, 2> NamesA;
+ A.split(NamesA, "::", /*MaxSplit=*/-1, /*KeepEmpty=*/false);
+ SmallVector<StringRef, 2> NamesB;
+ B.split(NamesB, "::", /*MaxSplit=*/-1, /*KeepEmpty=*/false);
+ size_t SizeA = NamesA.size();
+ size_t SizeB = NamesB.size();
+ for (size_t I = 0, E = std::min(SizeA, SizeB); I < E; ++I) {
+ if (I + 1 == SizeA) {
+ // I is the last index of NamesA and NamesA[I] is a non-namespace name.
+
+ // Non-namespace names come before all namespace names.
+ if (SizeB > SizeA)
+ return -1;
+
+ // Two names within a group compare case-insensitively.
+ return NamesA[I].compare_lower(NamesB[I]);
+ }
+
+ // I is the last index of NamesB and NamesB[I] is a non-namespace name.
+ // Non-namespace names come before all namespace names.
+ if (I + 1 == SizeB)
+ return 1;
+
+ // Two namespaces names within a group compare case-insensitively.
+ int C = NamesA[I].compare_lower(NamesB[I]);
+ if (C != 0)
+ return C;
+ }
+ return 0;
+}
+
struct UsingDeclaration {
const AnnotatedLine *Line;
std::string Label;
@@ -34,7 +73,7 @@ struct UsingDeclaration {
: Line(Line), Label(Label) {}
bool operator<(const UsingDeclaration &Other) const {
- return Label < Other.Label;
+ return compareLabels(Label, Other.Label) < 0;
}
};
@@ -76,10 +115,42 @@ std::string computeUsingDeclarationLabel(const FormatToken *UsingTok) {
void endUsingDeclarationBlock(
SmallVectorImpl<UsingDeclaration> *UsingDeclarations,
const SourceManager &SourceMgr, tooling::Replacements *Fixes) {
+ bool BlockAffected = false;
+ for (const UsingDeclaration &Declaration : *UsingDeclarations) {
+ if (Declaration.Line->Affected) {
+ BlockAffected = true;
+ break;
+ }
+ }
+ if (!BlockAffected) {
+ UsingDeclarations->clear();
+ return;
+ }
SmallVector<UsingDeclaration, 4> SortedUsingDeclarations(
UsingDeclarations->begin(), UsingDeclarations->end());
- std::sort(SortedUsingDeclarations.begin(), SortedUsingDeclarations.end());
+ std::stable_sort(SortedUsingDeclarations.begin(),
+ SortedUsingDeclarations.end());
+ SortedUsingDeclarations.erase(
+ std::unique(SortedUsingDeclarations.begin(),
+ SortedUsingDeclarations.end(),
+ [](const UsingDeclaration &a, const UsingDeclaration &b) {
+ return a.Label == b.Label;
+ }),
+ SortedUsingDeclarations.end());
for (size_t I = 0, E = UsingDeclarations->size(); I < E; ++I) {
+ if (I >= SortedUsingDeclarations.size()) {
+ // This using declaration has been deduplicated, delete it.
+ auto Begin =
+ (*UsingDeclarations)[I].Line->First->WhitespaceRange.getBegin();
+ auto End = (*UsingDeclarations)[I].Line->Last->Tok.getEndLoc();
+ auto Range = CharSourceRange::getCharRange(Begin, End);
+ auto Err = Fixes->add(tooling::Replacement(SourceMgr, Range, ""));
+ if (Err) {
+ llvm::errs() << "Error while sorting using declarations: "
+ << llvm::toString(std::move(Err)) << "\n";
+ }
+ continue;
+ }
if ((*UsingDeclarations)[I].Line == SortedUsingDeclarations[I].Line)
continue;
auto Begin = (*UsingDeclarations)[I].Line->First->Tok.getLocation();
@@ -112,7 +183,7 @@ UsingDeclarationsSorter::UsingDeclarationsSorter(const Environment &Env,
const FormatStyle &Style)
: TokenAnalyzer(Env, Style) {}
-tooling::Replacements UsingDeclarationsSorter::analyze(
+std::pair<tooling::Replacements, unsigned> UsingDeclarationsSorter::analyze(
TokenAnnotator &Annotator, SmallVectorImpl<AnnotatedLine *> &AnnotatedLines,
FormatTokenLexer &Tokens) {
const SourceManager &SourceMgr = Env.getSourceManager();
@@ -121,15 +192,17 @@ tooling::Replacements UsingDeclarationsSorter::analyze(
tooling::Replacements Fixes;
SmallVector<UsingDeclaration, 4> UsingDeclarations;
for (size_t I = 0, E = AnnotatedLines.size(); I != E; ++I) {
- if (!AnnotatedLines[I]->Affected || AnnotatedLines[I]->InPPDirective ||
- !AnnotatedLines[I]->startsWith(tok::kw_using) ||
- AnnotatedLines[I]->First->Finalized) {
+ const auto *FirstTok = AnnotatedLines[I]->First;
+ if (AnnotatedLines[I]->InPPDirective ||
+ !AnnotatedLines[I]->startsWith(tok::kw_using) || FirstTok->Finalized) {
endUsingDeclarationBlock(&UsingDeclarations, SourceMgr, &Fixes);
continue;
}
- if (AnnotatedLines[I]->First->NewlinesBefore > 1)
+ if (FirstTok->NewlinesBefore > 1)
endUsingDeclarationBlock(&UsingDeclarations, SourceMgr, &Fixes);
- std::string Label = computeUsingDeclarationLabel(AnnotatedLines[I]->First);
+ const auto *UsingTok =
+ FirstTok->is(tok::comment) ? FirstTok->getNextNonComment() : FirstTok;
+ std::string Label = computeUsingDeclarationLabel(UsingTok);
if (Label.empty()) {
endUsingDeclarationBlock(&UsingDeclarations, SourceMgr, &Fixes);
continue;
@@ -137,7 +210,7 @@ tooling::Replacements UsingDeclarationsSorter::analyze(
UsingDeclarations.push_back(UsingDeclaration(AnnotatedLines[I], Label));
}
endUsingDeclarationBlock(&UsingDeclarations, SourceMgr, &Fixes);
- return Fixes;
+ return {Fixes, 0};
}
} // namespace format
diff --git a/lib/Format/UsingDeclarationsSorter.h b/lib/Format/UsingDeclarationsSorter.h
index f7d5f97e3a2a..6f137712d841 100644
--- a/lib/Format/UsingDeclarationsSorter.h
+++ b/lib/Format/UsingDeclarationsSorter.h
@@ -25,7 +25,7 @@ class UsingDeclarationsSorter : public TokenAnalyzer {
public:
UsingDeclarationsSorter(const Environment &Env, const FormatStyle &Style);
- tooling::Replacements
+ std::pair<tooling::Replacements, unsigned>
analyze(TokenAnnotator &Annotator,
SmallVectorImpl<AnnotatedLine *> &AnnotatedLines,
FormatTokenLexer &Tokens) override;
diff --git a/lib/Format/WhitespaceManager.cpp b/lib/Format/WhitespaceManager.cpp
index 377ec3a681b6..a5477a996327 100644
--- a/lib/Format/WhitespaceManager.cpp
+++ b/lib/Format/WhitespaceManager.cpp
@@ -67,6 +67,11 @@ void WhitespaceManager::addUntouchableToken(const FormatToken &Tok,
/*IsInsideToken=*/false));
}
+llvm::Error
+WhitespaceManager::addReplacement(const tooling::Replacement &Replacement) {
+ return Replaces.add(Replacement);
+}
+
void WhitespaceManager::replaceWhitespaceInToken(
const FormatToken &Tok, unsigned Offset, unsigned ReplaceChars,
StringRef PreviousPostfix, StringRef CurrentPrefix, bool InPPDirective,
@@ -166,15 +171,15 @@ void WhitespaceManager::calculateLineBreakInformation() {
// BreakableLineCommentSection does comment reflow changes and here is
// the aligning of trailing comments. Consider the case where we reflow
// the second line up in this example:
- //
+ //
// // line 1
// // line 2
- //
+ //
// That amounts to 2 changes by BreakableLineCommentSection:
// - the first, delimited by (), for the whitespace between the tokens,
// - and second, delimited by [], for the whitespace at the beginning
// of the second token:
- //
+ //
// // line 1(
// )[// ]line 2
//
@@ -608,8 +613,9 @@ void WhitespaceManager::generateChanges() {
if (C.CreateReplacement) {
std::string ReplacementText = C.PreviousLinePostfix;
if (C.ContinuesPPDirective)
- appendNewlineText(ReplacementText, C.NewlinesBefore,
- C.PreviousEndOfTokenColumn, C.EscapedNewlineColumn);
+ appendEscapedNewlineText(ReplacementText, C.NewlinesBefore,
+ C.PreviousEndOfTokenColumn,
+ C.EscapedNewlineColumn);
else
appendNewlineText(ReplacementText, C.NewlinesBefore);
appendIndentText(ReplacementText, C.Tok->IndentLevel,
@@ -621,8 +627,7 @@ void WhitespaceManager::generateChanges() {
}
}
-void WhitespaceManager::storeReplacement(SourceRange Range,
- StringRef Text) {
+void WhitespaceManager::storeReplacement(SourceRange Range, StringRef Text) {
unsigned WhitespaceLength = SourceMgr.getFileOffset(Range.getEnd()) -
SourceMgr.getFileOffset(Range.getBegin());
// Don't create a replacement, if it does not change anything.
@@ -645,16 +650,16 @@ void WhitespaceManager::appendNewlineText(std::string &Text,
Text.append(UseCRLF ? "\r\n" : "\n");
}
-void WhitespaceManager::appendNewlineText(std::string &Text, unsigned Newlines,
- unsigned PreviousEndOfTokenColumn,
- unsigned EscapedNewlineColumn) {
+void WhitespaceManager::appendEscapedNewlineText(
+ std::string &Text, unsigned Newlines, unsigned PreviousEndOfTokenColumn,
+ unsigned EscapedNewlineColumn) {
if (Newlines > 0) {
- unsigned Offset =
- std::min<int>(EscapedNewlineColumn - 2, PreviousEndOfTokenColumn);
+ unsigned Spaces =
+ std::max<int>(1, EscapedNewlineColumn - PreviousEndOfTokenColumn - 1);
for (unsigned i = 0; i < Newlines; ++i) {
- Text.append(EscapedNewlineColumn - Offset - 1, ' ');
+ Text.append(Spaces, ' ');
Text.append(UseCRLF ? "\\\r\n" : "\\\n");
- Offset = 0;
+ Spaces = std::max<int>(0, EscapedNewlineColumn - 1);
}
}
}
diff --git a/lib/Format/WhitespaceManager.h b/lib/Format/WhitespaceManager.h
index 4e78ab43abaf..af20dc5616a7 100644
--- a/lib/Format/WhitespaceManager.h
+++ b/lib/Format/WhitespaceManager.h
@@ -57,6 +57,8 @@ public:
/// was not called.
void addUntouchableToken(const FormatToken &Tok, bool InPPDirective);
+ llvm::Error addReplacement(const tooling::Replacement &Replacement);
+
/// \brief Inserts or replaces whitespace in the middle of a token.
///
/// Inserts \p PreviousPostfix, \p Newlines, \p Spaces and \p CurrentPrefix
@@ -194,9 +196,9 @@ private:
/// \brief Stores \p Text as the replacement for the whitespace in \p Range.
void storeReplacement(SourceRange Range, StringRef Text);
void appendNewlineText(std::string &Text, unsigned Newlines);
- void appendNewlineText(std::string &Text, unsigned Newlines,
- unsigned PreviousEndOfTokenColumn,
- unsigned EscapedNewlineColumn);
+ void appendEscapedNewlineText(std::string &Text, unsigned Newlines,
+ unsigned PreviousEndOfTokenColumn,
+ unsigned EscapedNewlineColumn);
void appendIndentText(std::string &Text, unsigned IndentLevel,
unsigned Spaces, unsigned WhitespaceStartColumn);