aboutsummaryrefslogtreecommitdiffstats
path: root/lib/Format
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2013-12-22 00:07:40 +0000
committerDimitry Andric <dim@FreeBSD.org>2013-12-22 00:07:40 +0000
commitbfef399519ca9b8a4b4c6b563253bad7e0eeffe0 (patch)
treedf8df0b0067b381eab470a3b8f28d14a552a6340 /lib/Format
parent6a0372513edbc473b538d2f724efac50405d6fef (diff)
downloadsrc-bfef399519ca9b8a4b4c6b563253bad7e0eeffe0.tar.gz
src-bfef399519ca9b8a4b4c6b563253bad7e0eeffe0.zip
Vendor import of clang release_34 branch r197841 (effectively, 3.4 RC3):vendor/clang/clang-release_34-r197841
Notes
Notes: svn path=/vendor/clang/dist/; revision=259701 svn path=/vendor/clang/clang-release_34-r197841/; revision=259703; tag=vendor/clang/clang-release_34-r197841
Diffstat (limited to 'lib/Format')
-rw-r--r--lib/Format/BreakableToken.cpp504
-rw-r--r--lib/Format/BreakableToken.h349
-rw-r--r--lib/Format/CMakeLists.txt2
-rw-r--r--lib/Format/ContinuationIndenter.cpp884
-rw-r--r--lib/Format/ContinuationIndenter.h327
-rw-r--r--lib/Format/Encoding.h144
-rw-r--r--lib/Format/Format.cpp2231
-rw-r--r--lib/Format/FormatToken.cpp204
-rw-r--r--lib/Format/FormatToken.h452
-rw-r--r--lib/Format/TokenAnnotator.cpp1096
-rw-r--r--lib/Format/TokenAnnotator.h257
-rw-r--r--lib/Format/UnwrappedLineParser.cpp862
-rw-r--r--lib/Format/UnwrappedLineParser.h163
-rw-r--r--lib/Format/WhitespaceManager.cpp413
-rw-r--r--lib/Format/WhitespaceManager.h182
15 files changed, 5600 insertions, 2470 deletions
diff --git a/lib/Format/BreakableToken.cpp b/lib/Format/BreakableToken.cpp
index 3e2e0ce7cf3d..d720ce990b52 100644
--- a/lib/Format/BreakableToken.cpp
+++ b/lib/Format/BreakableToken.cpp
@@ -13,166 +13,432 @@
///
//===----------------------------------------------------------------------===//
+#define DEBUG_TYPE "format-token-breaker"
+
#include "BreakableToken.h"
+#include "clang/Basic/CharInfo.h"
+#include "clang/Format/Format.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/Debug.h"
#include <algorithm>
namespace clang {
namespace format {
-BreakableToken::Split BreakableComment::getSplit(unsigned LineIndex,
- unsigned TailOffset,
- unsigned ColumnLimit) const {
- StringRef Text = getLine(LineIndex).substr(TailOffset);
- unsigned ContentStartColumn = getContentStartColumn(LineIndex, TailOffset);
+static const char *const Blanks = " \t\v\f\r";
+static bool IsBlank(char C) {
+ switch (C) {
+ case ' ':
+ case '\t':
+ case '\v':
+ case '\f':
+ case '\r':
+ return true;
+ default:
+ return false;
+ }
+}
+
+static BreakableToken::Split getCommentSplit(StringRef Text,
+ unsigned ContentStartColumn,
+ unsigned ColumnLimit,
+ unsigned TabWidth,
+ encoding::Encoding Encoding) {
if (ColumnLimit <= ContentStartColumn + 1)
- return Split(StringRef::npos, 0);
+ return BreakableToken::Split(StringRef::npos, 0);
unsigned MaxSplit = ColumnLimit - ContentStartColumn + 1;
- StringRef::size_type SpaceOffset = Text.rfind(' ', MaxSplit);
+ unsigned MaxSplitBytes = 0;
+
+ for (unsigned NumChars = 0;
+ NumChars < MaxSplit && MaxSplitBytes < Text.size();) {
+ unsigned BytesInChar =
+ encoding::getCodePointNumBytes(Text[MaxSplitBytes], Encoding);
+ NumChars +=
+ encoding::columnWidthWithTabs(Text.substr(MaxSplitBytes, BytesInChar),
+ ContentStartColumn, TabWidth, Encoding);
+ MaxSplitBytes += BytesInChar;
+ }
+
+ StringRef::size_type SpaceOffset = Text.find_last_of(Blanks, MaxSplitBytes);
if (SpaceOffset == StringRef::npos ||
- Text.find_last_not_of(' ', SpaceOffset) == StringRef::npos) {
- SpaceOffset = Text.find(' ', MaxSplit);
+ // Don't break at leading whitespace.
+ Text.find_last_not_of(Blanks, SpaceOffset) == StringRef::npos) {
+ // Make sure that we don't break at leading whitespace that
+ // reaches past MaxSplit.
+ StringRef::size_type FirstNonWhitespace = Text.find_first_not_of(Blanks);
+ if (FirstNonWhitespace == StringRef::npos)
+ // If the comment is only whitespace, we cannot split.
+ return BreakableToken::Split(StringRef::npos, 0);
+ SpaceOffset = Text.find_first_of(
+ Blanks, std::max<unsigned>(MaxSplitBytes, FirstNonWhitespace));
}
if (SpaceOffset != StringRef::npos && SpaceOffset != 0) {
- StringRef BeforeCut = Text.substr(0, SpaceOffset).rtrim();
- StringRef AfterCut = Text.substr(SpaceOffset).ltrim();
+ StringRef BeforeCut = Text.substr(0, SpaceOffset).rtrim(Blanks);
+ StringRef AfterCut = Text.substr(SpaceOffset).ltrim(Blanks);
return BreakableToken::Split(BeforeCut.size(),
AfterCut.begin() - BeforeCut.end());
}
return BreakableToken::Split(StringRef::npos, 0);
}
-void BreakableComment::insertBreak(unsigned LineIndex, unsigned TailOffset,
- Split Split, bool InPPDirective,
- WhitespaceManager &Whitespaces) {
- StringRef Text = getLine(LineIndex).substr(TailOffset);
- StringRef AdditionalPrefix = Decoration;
- if (Text.size() == Split.first + Split.second) {
- // For all but the last line handle trailing space in trimLine.
- if (LineIndex < Lines.size() - 1)
- return;
- // For the last line we need to break before "*/", but not to add "* ".
- AdditionalPrefix = "";
+static BreakableToken::Split getStringSplit(StringRef Text,
+ unsigned UsedColumns,
+ unsigned ColumnLimit,
+ unsigned TabWidth,
+ encoding::Encoding Encoding) {
+ // FIXME: Reduce unit test case.
+ if (Text.empty())
+ return BreakableToken::Split(StringRef::npos, 0);
+ if (ColumnLimit <= UsedColumns)
+ return BreakableToken::Split(StringRef::npos, 0);
+ unsigned MaxSplit = std::min<unsigned>(
+ ColumnLimit - UsedColumns,
+ encoding::columnWidthWithTabs(Text, UsedColumns, TabWidth, Encoding) - 1);
+ StringRef::size_type SpaceOffset = 0;
+ StringRef::size_type SlashOffset = 0;
+ StringRef::size_type WordStartOffset = 0;
+ StringRef::size_type SplitPoint = 0;
+ for (unsigned Chars = 0;;) {
+ unsigned Advance;
+ if (Text[0] == '\\') {
+ Advance = encoding::getEscapeSequenceLength(Text);
+ Chars += Advance;
+ } else {
+ Advance = encoding::getCodePointNumBytes(Text[0], Encoding);
+ Chars += encoding::columnWidthWithTabs(
+ Text.substr(0, Advance), UsedColumns + Chars, TabWidth, Encoding);
+ }
+
+ if (Chars > MaxSplit)
+ break;
+
+ if (IsBlank(Text[0]))
+ SpaceOffset = SplitPoint;
+ if (Text[0] == '/')
+ SlashOffset = SplitPoint;
+ if (Advance == 1 && !isAlphanumeric(Text[0]))
+ WordStartOffset = SplitPoint;
+
+ SplitPoint += Advance;
+ Text = Text.substr(Advance);
}
- unsigned WhitespaceStartColumn =
- getContentStartColumn(LineIndex, TailOffset) + Split.first;
- unsigned BreakOffset = Text.data() - TokenText.data() + Split.first;
- unsigned CharsToRemove = Split.second;
- Whitespaces.breakToken(Tok, BreakOffset, CharsToRemove, "", AdditionalPrefix,
- InPPDirective, IndentAtLineBreak,
- WhitespaceStartColumn);
+ if (SpaceOffset != 0)
+ return BreakableToken::Split(SpaceOffset + 1, 0);
+ if (SlashOffset != 0)
+ return BreakableToken::Split(SlashOffset + 1, 0);
+ if (WordStartOffset != 0)
+ return BreakableToken::Split(WordStartOffset + 1, 0);
+ if (SplitPoint != 0)
+ return BreakableToken::Split(SplitPoint, 0);
+ return BreakableToken::Split(StringRef::npos, 0);
}
-BreakableBlockComment::BreakableBlockComment(const SourceManager &SourceMgr,
- const AnnotatedToken &Token,
- unsigned StartColumn)
- : BreakableComment(SourceMgr, Token.FormatTok, StartColumn + 2) {
- assert(TokenText.startswith("/*") && TokenText.endswith("*/"));
+unsigned BreakableSingleLineToken::getLineCount() const { return 1; }
+
+unsigned BreakableSingleLineToken::getLineLengthAfterSplit(
+ unsigned LineIndex, unsigned Offset, StringRef::size_type Length) const {
+ return StartColumn + Prefix.size() + Postfix.size() +
+ encoding::columnWidthWithTabs(Line.substr(Offset, Length),
+ StartColumn + Prefix.size(),
+ Style.TabWidth, Encoding);
+}
+
+BreakableSingleLineToken::BreakableSingleLineToken(
+ const FormatToken &Tok, unsigned IndentLevel, unsigned StartColumn,
+ StringRef Prefix, StringRef Postfix, bool InPPDirective,
+ encoding::Encoding Encoding, const FormatStyle &Style)
+ : BreakableToken(Tok, IndentLevel, InPPDirective, Encoding, Style),
+ StartColumn(StartColumn), Prefix(Prefix), Postfix(Postfix) {
+ assert(Tok.TokenText.startswith(Prefix) && Tok.TokenText.endswith(Postfix));
+ Line = Tok.TokenText.substr(
+ Prefix.size(), Tok.TokenText.size() - Prefix.size() - Postfix.size());
+}
+
+BreakableStringLiteral::BreakableStringLiteral(
+ const FormatToken &Tok, unsigned IndentLevel, unsigned StartColumn,
+ StringRef Prefix, StringRef Postfix, bool InPPDirective,
+ encoding::Encoding Encoding, const FormatStyle &Style)
+ : BreakableSingleLineToken(Tok, IndentLevel, StartColumn, Prefix, Postfix,
+ InPPDirective, Encoding, Style) {}
+
+BreakableToken::Split
+BreakableStringLiteral::getSplit(unsigned LineIndex, unsigned TailOffset,
+ unsigned ColumnLimit) const {
+ return getStringSplit(Line.substr(TailOffset),
+ StartColumn + Prefix.size() + Postfix.size(),
+ ColumnLimit, Style.TabWidth, Encoding);
+}
+
+void BreakableStringLiteral::insertBreak(unsigned LineIndex,
+ unsigned TailOffset, Split Split,
+ WhitespaceManager &Whitespaces) {
+ Whitespaces.replaceWhitespaceInToken(
+ Tok, Prefix.size() + TailOffset + Split.first, Split.second, Postfix,
+ Prefix, InPPDirective, 1, IndentLevel, StartColumn);
+}
+
+static StringRef getLineCommentPrefix(StringRef Comment) {
+ static const char *const KnownPrefixes[] = { "/// ", "///", "// ", "//" };
+ for (size_t i = 0, e = llvm::array_lengthof(KnownPrefixes); i != e; ++i)
+ if (Comment.startswith(KnownPrefixes[i]))
+ return KnownPrefixes[i];
+ return "";
+}
+
+BreakableLineComment::BreakableLineComment(
+ const FormatToken &Token, unsigned IndentLevel, unsigned StartColumn,
+ bool InPPDirective, encoding::Encoding Encoding, const FormatStyle &Style)
+ : BreakableSingleLineToken(Token, IndentLevel, StartColumn,
+ getLineCommentPrefix(Token.TokenText), "",
+ InPPDirective, Encoding, Style) {
+ OriginalPrefix = Prefix;
+ if (Token.TokenText.size() > Prefix.size() &&
+ isAlphanumeric(Token.TokenText[Prefix.size()])) {
+ if (Prefix == "//")
+ Prefix = "// ";
+ else if (Prefix == "///")
+ Prefix = "/// ";
+ }
+}
+
+BreakableToken::Split
+BreakableLineComment::getSplit(unsigned LineIndex, unsigned TailOffset,
+ unsigned ColumnLimit) const {
+ return getCommentSplit(Line.substr(TailOffset), StartColumn + Prefix.size(),
+ ColumnLimit, Style.TabWidth, Encoding);
+}
+
+void BreakableLineComment::insertBreak(unsigned LineIndex, unsigned TailOffset,
+ Split Split,
+ WhitespaceManager &Whitespaces) {
+ Whitespaces.replaceWhitespaceInToken(
+ Tok, OriginalPrefix.size() + TailOffset + Split.first, Split.second,
+ Postfix, Prefix, InPPDirective, /*Newlines=*/1, IndentLevel, StartColumn);
+}
+
+void BreakableLineComment::replaceWhitespace(unsigned LineIndex,
+ unsigned TailOffset, Split Split,
+ WhitespaceManager &Whitespaces) {
+ Whitespaces.replaceWhitespaceInToken(
+ Tok, OriginalPrefix.size() + TailOffset + Split.first, Split.second, "",
+ "", /*InPPDirective=*/false, /*Newlines=*/0, /*IndentLevel=*/0,
+ /*Spaces=*/1);
+}
- OriginalStartColumn =
- SourceMgr.getSpellingColumnNumber(Tok.getStartOfNonWhitespace()) - 1;
+void
+BreakableLineComment::replaceWhitespaceBefore(unsigned LineIndex,
+ WhitespaceManager &Whitespaces) {
+ if (OriginalPrefix != Prefix) {
+ Whitespaces.replaceWhitespaceInToken(Tok, OriginalPrefix.size(), 0, "", "",
+ /*InPPDirective=*/false,
+ /*Newlines=*/0, /*IndentLevel=*/0,
+ /*Spaces=*/1);
+ }
+}
+BreakableBlockComment::BreakableBlockComment(
+ const FormatToken &Token, unsigned IndentLevel, unsigned StartColumn,
+ unsigned OriginalStartColumn, bool FirstInLine, bool InPPDirective,
+ encoding::Encoding Encoding, const FormatStyle &Style)
+ : BreakableToken(Token, IndentLevel, InPPDirective, Encoding, Style) {
+ StringRef TokenText(Token.TokenText);
+ assert(TokenText.startswith("/*") && TokenText.endswith("*/"));
TokenText.substr(2, TokenText.size() - 4).split(Lines, "\n");
- bool NeedsStar = true;
- CommonPrefixLength = UINT_MAX;
- if (Lines.size() == 1) {
- if (Token.Parent == 0) {
- // Standalone block comments will be aligned and prefixed with *s.
- CommonPrefixLength = OriginalStartColumn + 1;
- } else {
- // Trailing comments can start on arbitrary column, and available
- // horizontal space can be too small to align consecutive lines with
- // the first one. We could, probably, align them to current
- // indentation level, but now we just wrap them without indentation
- // and stars.
- CommonPrefixLength = 0;
- NeedsStar = false;
- }
- } else {
- for (size_t i = 1; i < Lines.size(); ++i) {
- size_t FirstNonWhitespace = Lines[i].find_first_not_of(" ");
- if (FirstNonWhitespace != StringRef::npos) {
- NeedsStar = NeedsStar && (Lines[i][FirstNonWhitespace] == '*');
- CommonPrefixLength =
- std::min<unsigned>(CommonPrefixLength, FirstNonWhitespace);
+ int IndentDelta = StartColumn - OriginalStartColumn;
+ LeadingWhitespace.resize(Lines.size());
+ StartOfLineColumn.resize(Lines.size());
+ StartOfLineColumn[0] = StartColumn + 2;
+ for (size_t i = 1; i < Lines.size(); ++i)
+ adjustWhitespace(i, IndentDelta);
+
+ Decoration = "* ";
+ if (Lines.size() == 1 && !FirstInLine) {
+ // Comments for which FirstInLine is false can start on arbitrary column,
+ // and available horizontal space can be too small to align consecutive
+ // lines with the first one.
+ // FIXME: We could, probably, align them to current indentation level, but
+ // now we just wrap them without stars.
+ Decoration = "";
+ }
+ for (size_t i = 1, e = Lines.size(); i < e && !Decoration.empty(); ++i) {
+ // If the last line is empty, the closing "*/" will have a star.
+ if (i + 1 == e && Lines[i].empty())
+ break;
+ while (!Lines[i].startswith(Decoration))
+ Decoration = Decoration.substr(0, Decoration.size() - 1);
+ }
+
+ LastLineNeedsDecoration = true;
+ IndentAtLineBreak = StartOfLineColumn[0] + 1;
+ for (size_t i = 1; i < Lines.size(); ++i) {
+ if (Lines[i].empty()) {
+ if (i + 1 == Lines.size()) {
+ // Empty last line means that we already have a star as a part of the
+ // trailing */. We also need to preserve whitespace, so that */ is
+ // correctly indented.
+ LastLineNeedsDecoration = false;
+ } else if (Decoration.empty()) {
+ // For all other lines, set the start column to 0 if they're empty, so
+ // we do not insert trailing whitespace anywhere.
+ StartOfLineColumn[i] = 0;
}
+ continue;
}
+ // The first line already excludes the star.
+ // For all other lines, adjust the line to exclude the star and
+ // (optionally) the first whitespace.
+ StartOfLineColumn[i] += Decoration.size();
+ Lines[i] = Lines[i].substr(Decoration.size());
+ LeadingWhitespace[i] += Decoration.size();
+ IndentAtLineBreak = std::min<int>(IndentAtLineBreak, StartOfLineColumn[i]);
}
- if (CommonPrefixLength == UINT_MAX)
- CommonPrefixLength = 0;
+ IndentAtLineBreak = std::max<unsigned>(IndentAtLineBreak, Decoration.size());
+ DEBUG({
+ llvm::dbgs() << "IndentAtLineBreak " << IndentAtLineBreak << "\n";
+ for (size_t i = 0; i < Lines.size(); ++i) {
+ llvm::dbgs() << i << " |" << Lines[i] << "| " << LeadingWhitespace[i]
+ << "\n";
+ }
+ });
+}
+
+void BreakableBlockComment::adjustWhitespace(unsigned LineIndex,
+ int IndentDelta) {
+ // When in a preprocessor directive, the trailing backslash in a block comment
+ // is not needed, but can serve a purpose of uniformity with necessary escaped
+ // newlines outside the comment. In this case we remove it here before
+ // trimming the trailing whitespace. The backslash will be re-added later when
+ // inserting a line break.
+ size_t EndOfPreviousLine = Lines[LineIndex - 1].size();
+ if (InPPDirective && Lines[LineIndex - 1].endswith("\\"))
+ --EndOfPreviousLine;
- Decoration = NeedsStar ? "* " : "";
+ // Calculate the end of the non-whitespace text in the previous line.
+ EndOfPreviousLine =
+ Lines[LineIndex - 1].find_last_not_of(Blanks, EndOfPreviousLine);
+ if (EndOfPreviousLine == StringRef::npos)
+ EndOfPreviousLine = 0;
+ else
+ ++EndOfPreviousLine;
+ // Calculate the start of the non-whitespace text in the current line.
+ size_t StartOfLine = Lines[LineIndex].find_first_not_of(Blanks);
+ if (StartOfLine == StringRef::npos)
+ StartOfLine = Lines[LineIndex].size();
- IndentAtLineBreak =
- std::max<int>(StartColumn - OriginalStartColumn + CommonPrefixLength, 0);
+ StringRef Whitespace = Lines[LineIndex].substr(0, StartOfLine);
+ // Adjust Lines to only contain relevant text.
+ Lines[LineIndex - 1] = Lines[LineIndex - 1].substr(0, EndOfPreviousLine);
+ Lines[LineIndex] = Lines[LineIndex].substr(StartOfLine);
+ // Adjust LeadingWhitespace to account all whitespace between the lines
+ // to the current line.
+ LeadingWhitespace[LineIndex] =
+ Lines[LineIndex].begin() - Lines[LineIndex - 1].end();
+
+ // Adjust the start column uniformly accross all lines.
+ StartOfLineColumn[LineIndex] = std::max<int>(
+ 0,
+ encoding::columnWidthWithTabs(Whitespace, 0, Style.TabWidth, Encoding) +
+ IndentDelta);
}
-void BreakableBlockComment::alignLines(WhitespaceManager &Whitespaces) {
- SourceLocation TokenLoc = Tok.getStartOfNonWhitespace();
- int IndentDelta = (StartColumn - 2) - OriginalStartColumn;
- if (IndentDelta > 0) {
- std::string WhiteSpace(IndentDelta, ' ');
- for (size_t i = 1; i < Lines.size(); ++i) {
- Whitespaces.addReplacement(
- TokenLoc.getLocWithOffset(Lines[i].data() - TokenText.data()), 0,
- WhiteSpace);
- }
- } else if (IndentDelta < 0) {
- std::string WhiteSpace(-IndentDelta, ' ');
- // Check that the line is indented enough.
- for (size_t i = 1; i < Lines.size(); ++i) {
- if (!Lines[i].startswith(WhiteSpace))
- return;
- }
- for (size_t i = 1; i < Lines.size(); ++i) {
- Whitespaces.addReplacement(
- TokenLoc.getLocWithOffset(Lines[i].data() - TokenText.data()),
- -IndentDelta, "");
- }
+unsigned BreakableBlockComment::getLineCount() const { return Lines.size(); }
+
+unsigned BreakableBlockComment::getLineLengthAfterSplit(
+ unsigned LineIndex, unsigned Offset, StringRef::size_type Length) const {
+ unsigned ContentStartColumn = getContentStartColumn(LineIndex, Offset);
+ return ContentStartColumn +
+ encoding::columnWidthWithTabs(Lines[LineIndex].substr(Offset, Length),
+ ContentStartColumn, Style.TabWidth,
+ Encoding) +
+ // The last line gets a "*/" postfix.
+ (LineIndex + 1 == Lines.size() ? 2 : 0);
+}
+
+BreakableToken::Split
+BreakableBlockComment::getSplit(unsigned LineIndex, unsigned TailOffset,
+ unsigned ColumnLimit) const {
+ return getCommentSplit(Lines[LineIndex].substr(TailOffset),
+ getContentStartColumn(LineIndex, TailOffset),
+ ColumnLimit, Style.TabWidth, Encoding);
+}
+
+void BreakableBlockComment::insertBreak(unsigned LineIndex, unsigned TailOffset,
+ Split Split,
+ WhitespaceManager &Whitespaces) {
+ StringRef Text = Lines[LineIndex].substr(TailOffset);
+ StringRef Prefix = Decoration;
+ if (LineIndex + 1 == Lines.size() &&
+ Text.size() == Split.first + Split.second) {
+ // For the last line we need to break before "*/", but not to add "* ".
+ Prefix = "";
}
- for (unsigned i = 1; i < Lines.size(); ++i)
- Lines[i] = Lines[i].substr(CommonPrefixLength + Decoration.size());
+ unsigned BreakOffsetInToken =
+ Text.data() - Tok.TokenText.data() + Split.first;
+ unsigned CharsToRemove = Split.second;
+ assert(IndentAtLineBreak >= Decoration.size());
+ Whitespaces.replaceWhitespaceInToken(
+ Tok, BreakOffsetInToken, CharsToRemove, "", Prefix, InPPDirective, 1,
+ IndentLevel, IndentAtLineBreak - Decoration.size());
}
-void BreakableBlockComment::trimLine(unsigned LineIndex, unsigned TailOffset,
- unsigned InPPDirective,
- WhitespaceManager &Whitespaces) {
- if (LineIndex == Lines.size() - 1)
- return;
+void BreakableBlockComment::replaceWhitespace(unsigned LineIndex,
+ unsigned TailOffset, Split Split,
+ WhitespaceManager &Whitespaces) {
StringRef Text = Lines[LineIndex].substr(TailOffset);
- if (!Text.endswith(" ") && !InPPDirective)
+ unsigned BreakOffsetInToken =
+ Text.data() - Tok.TokenText.data() + Split.first;
+ unsigned CharsToRemove = Split.second;
+ Whitespaces.replaceWhitespaceInToken(
+ Tok, BreakOffsetInToken, CharsToRemove, "", "", /*InPPDirective=*/false,
+ /*Newlines=*/0, /*IndentLevel=*/0, /*Spaces=*/1);
+}
+
+void
+BreakableBlockComment::replaceWhitespaceBefore(unsigned LineIndex,
+ WhitespaceManager &Whitespaces) {
+ if (LineIndex == 0)
return;
+ StringRef Prefix = Decoration;
+ if (Lines[LineIndex].empty()) {
+ if (LineIndex + 1 == Lines.size()) {
+ if (!LastLineNeedsDecoration) {
+ // If the last line was empty, we don't need a prefix, as the */ will
+ // line up with the decoration (if it exists).
+ Prefix = "";
+ }
+ } else if (!Decoration.empty()) {
+ // For other empty lines, if we do have a decoration, adapt it to not
+ // contain a trailing whitespace.
+ Prefix = Prefix.substr(0, 1);
+ }
+ } else {
+ if (StartOfLineColumn[LineIndex] == 1) {
+ // This line starts immediately after the decorating *.
+ Prefix = Prefix.substr(0, 1);
+ }
+ }
- StringRef TrimmedLine = Text.rtrim();
- unsigned WhitespaceStartColumn =
- getLineLengthAfterSplit(LineIndex, TailOffset);
- unsigned BreakOffset = TrimmedLine.end() - TokenText.data();
- unsigned CharsToRemove = Text.size() - TrimmedLine.size() + 1;
- Whitespaces.breakToken(Tok, BreakOffset, CharsToRemove, "", "", InPPDirective,
- 0, WhitespaceStartColumn);
-}
-
-BreakableLineComment::BreakableLineComment(const SourceManager &SourceMgr,
- const AnnotatedToken &Token,
- unsigned StartColumn)
- : BreakableComment(SourceMgr, Token.FormatTok, StartColumn) {
- assert(TokenText.startswith("//"));
- Decoration = getLineCommentPrefix(TokenText);
- Lines.push_back(TokenText.substr(Decoration.size()));
- IndentAtLineBreak = StartColumn;
- this->StartColumn += Decoration.size(); // Start column of the contents.
-}
-
-StringRef BreakableLineComment::getLineCommentPrefix(StringRef Comment) {
- const char *KnownPrefixes[] = { "/// ", "///", "// ", "//" };
- for (size_t i = 0; i < llvm::array_lengthof(KnownPrefixes); ++i)
- if (Comment.startswith(KnownPrefixes[i]))
- return KnownPrefixes[i];
- return "";
+ unsigned WhitespaceOffsetInToken = Lines[LineIndex].data() -
+ Tok.TokenText.data() -
+ LeadingWhitespace[LineIndex];
+ assert(StartOfLineColumn[LineIndex] >= Prefix.size());
+ Whitespaces.replaceWhitespaceInToken(
+ Tok, WhitespaceOffsetInToken, LeadingWhitespace[LineIndex], "", Prefix,
+ InPPDirective, 1, IndentLevel,
+ StartOfLineColumn[LineIndex] - Prefix.size());
+}
+
+unsigned
+BreakableBlockComment::getContentStartColumn(unsigned LineIndex,
+ unsigned TailOffset) const {
+ // If we break, we always break at the predefined indent.
+ if (TailOffset != 0)
+ return IndentAtLineBreak;
+ return StartOfLineColumn[LineIndex];
}
} // namespace format
diff --git a/lib/Format/BreakableToken.h b/lib/Format/BreakableToken.h
index c1303183d312..b965190d54de 100644
--- a/lib/Format/BreakableToken.h
+++ b/lib/Format/BreakableToken.h
@@ -17,6 +17,7 @@
#ifndef LLVM_CLANG_FORMAT_BREAKABLETOKEN_H
#define LLVM_CLANG_FORMAT_BREAKABLETOKEN_H
+#include "Encoding.h"
#include "TokenAnnotator.h"
#include "WhitespaceManager.h"
#include <utility>
@@ -24,214 +25,218 @@
namespace clang {
namespace format {
+struct FormatStyle;
+
+/// \brief Base class for strategies on how to break tokens.
+///
+/// FIXME: The interface seems set in stone, so we might want to just pull the
+/// strategy into the class, instead of controlling it from the outside.
class BreakableToken {
public:
- BreakableToken(const SourceManager &SourceMgr, const FormatToken &Tok,
- unsigned StartColumn)
- : Tok(Tok), StartColumn(StartColumn),
- TokenText(SourceMgr.getCharacterData(Tok.getStartOfNonWhitespace()),
- Tok.TokenLength) {}
+ /// \brief Contains starting character index and length of split.
+ typedef std::pair<StringRef::size_type, unsigned> Split;
+
virtual ~BreakableToken() {}
+
+ /// \brief Returns the number of lines in this token in the original code.
virtual unsigned getLineCount() const = 0;
- virtual unsigned getLineSize(unsigned Index) const = 0;
- virtual unsigned getLineLengthAfterSplit(unsigned LineIndex,
- unsigned TailOffset) const = 0;
- // Contains starting character index and length of split.
- typedef std::pair<StringRef::size_type, unsigned> Split;
+ /// \brief Returns the number of columns required to format the piece of line
+ /// at \p LineIndex, from byte offset \p Offset with length \p Length.
+ ///
+ /// Note that previous breaks are not taken into account. \p Offset is always
+ /// specified from the start of the (original) line.
+ /// \p Length can be set to StringRef::npos, which means "to the end of line".
+ virtual unsigned
+ getLineLengthAfterSplit(unsigned LineIndex, unsigned Offset,
+ StringRef::size_type Length) const = 0;
+
+ /// \brief Returns a range (offset, length) at which to break the line at
+ /// \p LineIndex, if previously broken at \p TailOffset. If possible, do not
+ /// violate \p ColumnLimit.
virtual Split getSplit(unsigned LineIndex, unsigned TailOffset,
unsigned ColumnLimit) const = 0;
+
+ /// \brief Emits the previously retrieved \p Split via \p Whitespaces.
virtual void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split,
- bool InPPDirective,
WhitespaceManager &Whitespaces) = 0;
- virtual void trimLine(unsigned LineIndex, unsigned TailOffset,
- unsigned InPPDirective,
- WhitespaceManager &Whitespaces) {}
+
+ /// \brief Replaces the whitespace range described by \p Split with a single
+ /// space.
+ virtual void replaceWhitespace(unsigned LineIndex, unsigned TailOffset,
+ Split Split,
+ WhitespaceManager &Whitespaces) = 0;
+
+ /// \brief Replaces the whitespace between \p LineIndex-1 and \p LineIndex.
+ virtual void replaceWhitespaceBefore(unsigned LineIndex,
+ WhitespaceManager &Whitespaces) {}
+
protected:
+ BreakableToken(const FormatToken &Tok, unsigned IndentLevel,
+ bool InPPDirective, encoding::Encoding Encoding,
+ const FormatStyle &Style)
+ : Tok(Tok), IndentLevel(IndentLevel), InPPDirective(InPPDirective),
+ Encoding(Encoding), Style(Style) {}
+
const FormatToken &Tok;
- unsigned StartColumn;
- StringRef TokenText;
+ const unsigned IndentLevel;
+ const bool InPPDirective;
+ const encoding::Encoding Encoding;
+ const FormatStyle &Style;
};
-class BreakableStringLiteral : public BreakableToken {
+/// \brief Base class for single line tokens that can be broken.
+///
+/// \c getSplit() needs to be implemented by child classes.
+class BreakableSingleLineToken : public BreakableToken {
public:
- BreakableStringLiteral(const SourceManager &SourceMgr, const FormatToken &Tok,
- unsigned StartColumn)
- : BreakableToken(SourceMgr, Tok, StartColumn) {
- assert(TokenText.startswith("\"") && TokenText.endswith("\""));
- }
+ virtual unsigned getLineCount() const;
+ virtual unsigned getLineLengthAfterSplit(unsigned LineIndex,
+ unsigned TailOffset,
+ StringRef::size_type Length) const;
- virtual unsigned getLineCount() const { return 1; }
+protected:
+ BreakableSingleLineToken(const FormatToken &Tok, unsigned IndentLevel,
+ unsigned StartColumn, StringRef Prefix,
+ StringRef Postfix, bool InPPDirective,
+ encoding::Encoding Encoding,
+ const FormatStyle &Style);
- virtual unsigned getLineSize(unsigned Index) const {
- return Tok.TokenLength - 2; // Should be in sync with getLine
- }
+ // The column in which the token starts.
+ unsigned StartColumn;
+ // The prefix a line needs after a break in the token.
+ StringRef Prefix;
+ // The postfix a line needs before introducing a break.
+ StringRef Postfix;
+ // The token text excluding the prefix and postfix.
+ StringRef Line;
+};
- virtual unsigned getLineLengthAfterSplit(unsigned LineIndex,
- unsigned TailOffset) const {
- return getDecorationLength() + getLine().size() - TailOffset;
- }
+class BreakableStringLiteral : public BreakableSingleLineToken {
+public:
+ /// \brief Creates a breakable token for a single line string literal.
+ ///
+ /// \p StartColumn specifies the column in which the token will start
+ /// after formatting.
+ BreakableStringLiteral(const FormatToken &Tok, unsigned IndentLevel,
+ unsigned StartColumn, StringRef Prefix,
+ StringRef Postfix, bool InPPDirective,
+ encoding::Encoding Encoding, const FormatStyle &Style);
virtual Split getSplit(unsigned LineIndex, unsigned TailOffset,
- unsigned ColumnLimit) const {
- StringRef Text = getLine().substr(TailOffset);
- if (ColumnLimit <= getDecorationLength())
- return Split(StringRef::npos, 0);
- unsigned MaxSplit = ColumnLimit - getDecorationLength();
- assert(MaxSplit < Text.size());
- StringRef::size_type SpaceOffset = Text.rfind(' ', MaxSplit);
- if (SpaceOffset != StringRef::npos && SpaceOffset != 0)
- return Split(SpaceOffset + 1, 0);
- StringRef::size_type SlashOffset = Text.rfind('/', MaxSplit);
- if (SlashOffset != StringRef::npos && SlashOffset != 0)
- return Split(SlashOffset + 1, 0);
- StringRef::size_type SplitPoint = getStartOfCharacter(Text, MaxSplit);
- if (SplitPoint != StringRef::npos && SplitPoint > 1)
- // Do not split at 0.
- return Split(SplitPoint, 0);
- return Split(StringRef::npos, 0);
- }
-
+ unsigned ColumnLimit) const;
virtual void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split,
- bool InPPDirective, WhitespaceManager &Whitespaces) {
- unsigned WhitespaceStartColumn = StartColumn + Split.first + 2;
- Whitespaces.breakToken(Tok, 1 + TailOffset + Split.first, Split.second,
- "\"", "\"", InPPDirective, StartColumn,
- WhitespaceStartColumn);
- }
-
-private:
- StringRef getLine() const {
- // Get string without quotes.
- // FIXME: Handle string prefixes.
- return TokenText.substr(1, TokenText.size() - 2);
- }
-
- unsigned getDecorationLength() const { return StartColumn + 2; }
-
- static StringRef::size_type getStartOfCharacter(StringRef Text,
- StringRef::size_type Offset) {
- StringRef::size_type NextEscape = Text.find('\\');
- while (NextEscape != StringRef::npos && NextEscape < Offset) {
- StringRef::size_type SequenceLength =
- getEscapeSequenceLength(Text.substr(NextEscape));
- if (Offset < NextEscape + SequenceLength)
- return NextEscape;
- NextEscape = Text.find('\\', NextEscape + SequenceLength);
- }
- return Offset;
- }
-
- static unsigned getEscapeSequenceLength(StringRef Text) {
- assert(Text[0] == '\\');
- if (Text.size() < 2)
- return 1;
-
- switch (Text[1]) {
- case 'u':
- return 6;
- case 'U':
- return 10;
- case 'x':
- return getHexLength(Text);
- default:
- if (Text[1] >= '0' && Text[1] <= '7')
- return getOctalLength(Text);
- return 2;
- }
- }
-
- static unsigned getHexLength(StringRef Text) {
- unsigned I = 2; // Point after '\x'.
- while (I < Text.size() && ((Text[I] >= '0' && Text[I] <= '9') ||
- (Text[I] >= 'a' && Text[I] <= 'f') ||
- (Text[I] >= 'A' && Text[I] <= 'F'))) {
- ++I;
- }
- return I;
- }
-
- static unsigned getOctalLength(StringRef Text) {
- unsigned I = 1;
- while (I < Text.size() && I < 4 && (Text[I] >= '0' && Text[I] <= '7')) {
- ++I;
- }
- return I;
- }
-
+ WhitespaceManager &Whitespaces);
+ virtual void replaceWhitespace(unsigned LineIndex, unsigned TailOffset,
+ Split Split,
+ WhitespaceManager &Whitespaces) {}
};
-class BreakableComment : public BreakableToken {
+class BreakableLineComment : public BreakableSingleLineToken {
public:
- virtual unsigned getLineSize(unsigned Index) const {
- return getLine(Index).size();
- }
-
- virtual unsigned getLineCount() const { return Lines.size(); }
-
- virtual unsigned getLineLengthAfterSplit(unsigned LineIndex,
- unsigned TailOffset) const {
- return getContentStartColumn(LineIndex, TailOffset) +
- getLine(LineIndex).size() - TailOffset;
- }
+ /// \brief Creates a breakable token for a line comment.
+ ///
+ /// \p StartColumn specifies the column in which the comment will start
+ /// after formatting.
+ BreakableLineComment(const FormatToken &Token, unsigned IndentLevel,
+ unsigned StartColumn, bool InPPDirective,
+ encoding::Encoding Encoding, const FormatStyle &Style);
virtual Split getSplit(unsigned LineIndex, unsigned TailOffset,
unsigned ColumnLimit) const;
virtual void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split,
- bool InPPDirective, WhitespaceManager &Whitespaces);
-
-protected:
- BreakableComment(const SourceManager &SourceMgr, const FormatToken &Tok,
- unsigned StartColumn)
- : BreakableToken(SourceMgr, Tok, StartColumn) {}
-
- // Get comment lines without /* */, common prefix and trailing whitespace.
- // Last line is not trimmed, as it is terminated by */, so its trailing
- // whitespace is not really trailing.
- StringRef getLine(unsigned Index) const {
- return Index < Lines.size() - 1 ? Lines[Index].rtrim() : Lines[Index];
- }
-
- unsigned getContentStartColumn(unsigned LineIndex,
- unsigned TailOffset) const {
- return (TailOffset == 0 && LineIndex == 0)
- ? StartColumn
- : IndentAtLineBreak + Decoration.size();
- }
+ WhitespaceManager &Whitespaces);
+ virtual void replaceWhitespace(unsigned LineIndex, unsigned TailOffset,
+ Split Split,
+ WhitespaceManager &Whitespaces);
+ virtual void replaceWhitespaceBefore(unsigned LineIndex,
+ WhitespaceManager &Whitespaces);
- unsigned IndentAtLineBreak;
- StringRef Decoration;
- SmallVector<StringRef, 16> Lines;
+private:
+ // The prefix without an additional space if one was added.
+ StringRef OriginalPrefix;
};
-class BreakableBlockComment : public BreakableComment {
+class BreakableBlockComment : public BreakableToken {
public:
- BreakableBlockComment(const SourceManager &SourceMgr,
- const AnnotatedToken &Token, unsigned StartColumn);
-
- void alignLines(WhitespaceManager &Whitespaces);
-
+ /// \brief Creates a breakable token for a block comment.
+ ///
+ /// \p StartColumn specifies the column in which the comment will start
+ /// after formatting, while \p OriginalStartColumn specifies in which
+ /// column the comment started before formatting.
+ /// If the comment starts a line after formatting, set \p FirstInLine to true.
+ BreakableBlockComment(const FormatToken &Token, unsigned IndentLevel,
+ unsigned StartColumn, unsigned OriginaStartColumn,
+ bool FirstInLine, bool InPPDirective,
+ encoding::Encoding Encoding, const FormatStyle &Style);
+
+ virtual unsigned getLineCount() const;
virtual unsigned getLineLengthAfterSplit(unsigned LineIndex,
- unsigned TailOffset) const {
- return BreakableComment::getLineLengthAfterSplit(LineIndex, TailOffset) +
- (LineIndex + 1 < Lines.size() ? 0 : 2);
- }
-
- virtual void trimLine(unsigned LineIndex, unsigned TailOffset,
- unsigned InPPDirective, WhitespaceManager &Whitespaces);
+ unsigned TailOffset,
+ StringRef::size_type Length) const;
+ virtual Split getSplit(unsigned LineIndex, unsigned TailOffset,
+ unsigned ColumnLimit) const;
+ virtual void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split,
+ WhitespaceManager &Whitespaces);
+ virtual void replaceWhitespace(unsigned LineIndex, unsigned TailOffset,
+ Split Split,
+ WhitespaceManager &Whitespaces);
+ virtual void replaceWhitespaceBefore(unsigned LineIndex,
+ WhitespaceManager &Whitespaces);
private:
- unsigned OriginalStartColumn;
- unsigned CommonPrefixLength;
-};
+ // Rearranges the whitespace between Lines[LineIndex-1] and Lines[LineIndex],
+ // so that all whitespace between the lines is accounted to Lines[LineIndex]
+ // as leading whitespace:
+ // - Lines[LineIndex] points to the text after that whitespace
+ // - Lines[LineIndex-1] shrinks by its trailing whitespace
+ // - LeadingWhitespace[LineIndex] is updated with the complete whitespace
+ // between the end of the text of Lines[LineIndex-1] and Lines[LineIndex]
+ //
+ // Sets StartOfLineColumn to the intended column in which the text at
+ // Lines[LineIndex] starts (note that the decoration, if present, is not
+ // considered part of the text).
+ void adjustWhitespace(unsigned LineIndex, int IndentDelta);
+
+ // Returns the column at which the text in line LineIndex starts, when broken
+ // at TailOffset. Note that the decoration (if present) is not considered part
+ // of the text.
+ unsigned getContentStartColumn(unsigned LineIndex, unsigned TailOffset) const;
+
+ // Contains the text of the lines of the block comment, excluding the leading
+ // /* in the first line and trailing */ in the last line, and excluding all
+ // trailing whitespace between the lines. Note that the decoration (if
+ // present) is also not considered part of the text.
+ SmallVector<StringRef, 16> Lines;
-class BreakableLineComment : public BreakableComment {
-public:
- BreakableLineComment(const SourceManager &SourceMgr,
- const AnnotatedToken &Token, unsigned StartColumn);
+ // LeadingWhitespace[i] is the number of characters regarded as whitespace in
+ // front of Lines[i]. Note that this can include "* " sequences, which we
+ // regard as whitespace when all lines have a "*" prefix.
+ SmallVector<unsigned, 16> LeadingWhitespace;
+
+ // StartOfLineColumn[i] is the target column at which Line[i] should be.
+ // Note that this excludes a leading "* " or "*" in case all lines have
+ // a "*" prefix.
+ SmallVector<unsigned, 16> StartOfLineColumn;
+
+ // The column at which the text of a broken line should start.
+ // Note that an optional decoration would go before that column.
+ // IndentAtLineBreak is a uniform position for all lines in a block comment,
+ // regardless of their relative position.
+ // FIXME: Revisit the decision to do this; the main reason was to support
+ // patterns like
+ // /**************//**
+ // * Comment
+ // We could also support such patterns by special casing the first line
+ // instead.
+ unsigned IndentAtLineBreak;
-private:
- static StringRef getLineCommentPrefix(StringRef Comment);
+ // This is to distinguish between the case when the last line was empty and
+ // the case when it started with a decoration ("*" or "* ").
+ bool LastLineNeedsDecoration;
+
+ // Either "* " if all lines begin with a "*", or empty.
+ StringRef Decoration;
};
} // namespace format
diff --git a/lib/Format/CMakeLists.txt b/lib/Format/CMakeLists.txt
index 560e38b4bfaa..e3ef5bd21ee9 100644
--- a/lib/Format/CMakeLists.txt
+++ b/lib/Format/CMakeLists.txt
@@ -2,7 +2,9 @@ set(LLVM_LINK_COMPONENTS support)
add_clang_library(clangFormat
BreakableToken.cpp
+ ContinuationIndenter.cpp
Format.cpp
+ FormatToken.cpp
TokenAnnotator.cpp
UnwrappedLineParser.cpp
WhitespaceManager.cpp
diff --git a/lib/Format/ContinuationIndenter.cpp b/lib/Format/ContinuationIndenter.cpp
new file mode 100644
index 000000000000..971acc2b7a3c
--- /dev/null
+++ b/lib/Format/ContinuationIndenter.cpp
@@ -0,0 +1,884 @@
+//===--- ContinuationIndenter.cpp - Format C++ code -----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file implements the continuation indenter.
+///
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "format-formatter"
+
+#include "BreakableToken.h"
+#include "ContinuationIndenter.h"
+#include "WhitespaceManager.h"
+#include "clang/Basic/OperatorPrecedence.h"
+#include "clang/Basic/SourceManager.h"
+#include "clang/Format/Format.h"
+#include "llvm/Support/Debug.h"
+#include <string>
+
+namespace clang {
+namespace format {
+
+// Returns the length of everything up to the first possible line break after
+// the ), ], } or > matching \c Tok.
+static unsigned getLengthToMatchingParen(const FormatToken &Tok) {
+ if (Tok.MatchingParen == NULL)
+ return 0;
+ FormatToken *End = Tok.MatchingParen;
+ while (End->Next && !End->Next->CanBreakBefore) {
+ End = End->Next;
+ }
+ return End->TotalLength - Tok.TotalLength + 1;
+}
+
+// Returns \c true if \c Tok is the "." or "->" of a call and starts the next
+// segment of a builder type call.
+static bool startsSegmentOfBuilderTypeCall(const FormatToken &Tok) {
+ return Tok.isMemberAccess() && Tok.Previous && Tok.Previous->closesScope();
+}
+
+// Returns \c true if \c Current starts a new parameter.
+static bool startsNextParameter(const FormatToken &Current,
+ const FormatStyle &Style) {
+ const FormatToken &Previous = *Current.Previous;
+ if (Current.Type == TT_CtorInitializerComma &&
+ Style.BreakConstructorInitializersBeforeComma)
+ return true;
+ return Previous.is(tok::comma) && !Current.isTrailingComment() &&
+ (Previous.Type != TT_CtorInitializerComma ||
+ !Style.BreakConstructorInitializersBeforeComma);
+}
+
+ContinuationIndenter::ContinuationIndenter(const FormatStyle &Style,
+ SourceManager &SourceMgr,
+ WhitespaceManager &Whitespaces,
+ encoding::Encoding Encoding,
+ bool BinPackInconclusiveFunctions)
+ : Style(Style), SourceMgr(SourceMgr), Whitespaces(Whitespaces),
+ Encoding(Encoding),
+ BinPackInconclusiveFunctions(BinPackInconclusiveFunctions) {}
+
+LineState ContinuationIndenter::getInitialState(unsigned FirstIndent,
+ const AnnotatedLine *Line,
+ bool DryRun) {
+ LineState State;
+ State.FirstIndent = FirstIndent;
+ State.Column = FirstIndent;
+ State.Line = Line;
+ State.NextToken = Line->First;
+ State.Stack.push_back(ParenState(FirstIndent, Line->Level, FirstIndent,
+ /*AvoidBinPacking=*/false,
+ /*NoLineBreak=*/false));
+ State.LineContainsContinuedForLoopSection = false;
+ State.ParenLevel = 0;
+ State.StartOfStringLiteral = 0;
+ State.StartOfLineLevel = State.ParenLevel;
+ State.LowestLevelOnLine = State.ParenLevel;
+ State.IgnoreStackForComparison = false;
+
+ // The first token has already been indented and thus consumed.
+ moveStateToNextToken(State, DryRun, /*Newline=*/false);
+ return State;
+}
+
+bool ContinuationIndenter::canBreak(const LineState &State) {
+ const FormatToken &Current = *State.NextToken;
+ const FormatToken &Previous = *Current.Previous;
+ assert(&Previous == Current.Previous);
+ if (!Current.CanBreakBefore && !(State.Stack.back().BreakBeforeClosingBrace &&
+ Current.closesBlockTypeList(Style)))
+ return false;
+ // The opening "{" of a braced list has to be on the same line as the first
+ // element if it is nested in another braced init list or function call.
+ if (!Current.MustBreakBefore && Previous.is(tok::l_brace) &&
+ Previous.Type != TT_DictLiteral &&
+ Previous.BlockKind == BK_BracedInit && Previous.Previous &&
+ Previous.Previous->isOneOf(tok::l_brace, tok::l_paren, tok::comma))
+ return false;
+ // This prevents breaks like:
+ // ...
+ // SomeParameter, OtherParameter).DoSomething(
+ // ...
+ // As they hide "DoSomething" and are generally bad for readability.
+ if (Previous.opensScope() && State.LowestLevelOnLine < State.StartOfLineLevel)
+ return false;
+ if (Current.isMemberAccess() && State.Stack.back().ContainsUnwrappedBuilder)
+ return false;
+ return !State.Stack.back().NoLineBreak;
+}
+
+bool ContinuationIndenter::mustBreak(const LineState &State) {
+ const FormatToken &Current = *State.NextToken;
+ const FormatToken &Previous = *Current.Previous;
+ if (Current.MustBreakBefore || Current.Type == TT_InlineASMColon)
+ return true;
+ if (State.Stack.back().BreakBeforeClosingBrace &&
+ Current.closesBlockTypeList(Style))
+ return true;
+ if (Previous.is(tok::semi) && State.LineContainsContinuedForLoopSection)
+ return true;
+ if ((startsNextParameter(Current, Style) || Previous.is(tok::semi) ||
+ (Style.BreakBeforeTernaryOperators &&
+ (Current.is(tok::question) || (Current.Type == TT_ConditionalExpr &&
+ Previous.isNot(tok::question)))) ||
+ (!Style.BreakBeforeTernaryOperators &&
+ (Previous.is(tok::question) || Previous.Type == TT_ConditionalExpr))) &&
+ State.Stack.back().BreakBeforeParameter && !Current.isTrailingComment() &&
+ !Current.isOneOf(tok::r_paren, tok::r_brace))
+ return true;
+ if (Style.AlwaysBreakBeforeMultilineStrings &&
+ State.Column > State.Stack.back().Indent && // Breaking saves columns.
+ !Previous.isOneOf(tok::kw_return, tok::lessless, tok::at) &&
+ Previous.Type != TT_InlineASMColon && NextIsMultilineString(State))
+ return true;
+ if (((Previous.Type == TT_DictLiteral && Previous.is(tok::l_brace)) ||
+ Previous.Type == TT_ArrayInitializerLSquare) &&
+ getLengthToMatchingParen(Previous) + State.Column > getColumnLimit(State))
+ return true;
+
+ if (!Style.BreakBeforeBinaryOperators) {
+ // If we need to break somewhere inside the LHS of a binary expression, we
+ // should also break after the operator. Otherwise, the formatting would
+ // hide the operator precedence, e.g. in:
+ // if (aaaaaaaaaaaaaa ==
+ // bbbbbbbbbbbbbb && c) {..
+ // For comparisons, we only apply this rule, if the LHS is a binary
+ // expression itself as otherwise, the line breaks seem superfluous.
+ // We need special cases for ">>" which we have split into two ">" while
+ // lexing in order to make template parsing easier.
+ //
+ // FIXME: We'll need something similar for styles that break before binary
+ // operators.
+ bool IsComparison = (Previous.getPrecedence() == prec::Relational ||
+ Previous.getPrecedence() == prec::Equality) &&
+ Previous.Previous &&
+ Previous.Previous->Type != TT_BinaryOperator; // For >>.
+ bool LHSIsBinaryExpr =
+ Previous.Previous && Previous.Previous->EndsBinaryExpression;
+ if (Previous.Type == TT_BinaryOperator &&
+ (!IsComparison || LHSIsBinaryExpr) &&
+ Current.Type != TT_BinaryOperator && // For >>.
+ !Current.isTrailingComment() &&
+ !Previous.isOneOf(tok::lessless, tok::question) &&
+ Previous.getPrecedence() != prec::Assignment &&
+ State.Stack.back().BreakBeforeParameter)
+ return true;
+ }
+
+ // Same as above, but for the first "<<" operator.
+ if (Current.is(tok::lessless) && State.Stack.back().BreakBeforeParameter &&
+ State.Stack.back().FirstLessLess == 0)
+ return true;
+
+ // FIXME: Comparing LongestObjCSelectorName to 0 is a hacky way of finding
+ // out whether it is the first parameter. Clean this up.
+ if (Current.Type == TT_ObjCSelectorName &&
+ Current.LongestObjCSelectorName == 0 &&
+ State.Stack.back().BreakBeforeParameter)
+ return true;
+ if ((Current.Type == TT_CtorInitializerColon ||
+ (Previous.ClosesTemplateDeclaration && State.ParenLevel == 0 &&
+ !Current.isTrailingComment())))
+ return true;
+
+ if ((Current.Type == TT_StartOfName || Current.is(tok::kw_operator)) &&
+ State.Line->MightBeFunctionDecl &&
+ State.Stack.back().BreakBeforeParameter && State.ParenLevel == 0)
+ return true;
+ if (startsSegmentOfBuilderTypeCall(Current) &&
+ (State.Stack.back().CallContinuation != 0 ||
+ (State.Stack.back().BreakBeforeParameter &&
+ State.Stack.back().ContainsUnwrappedBuilder)))
+ return true;
+ return false;
+}
+
+unsigned ContinuationIndenter::addTokenToState(LineState &State, bool Newline,
+ bool DryRun,
+ unsigned ExtraSpaces) {
+ const FormatToken &Current = *State.NextToken;
+
+ if (State.Stack.size() == 0 ||
+ (Current.Type == TT_ImplicitStringLiteral &&
+ (Current.Previous->Tok.getIdentifierInfo() == NULL ||
+ Current.Previous->Tok.getIdentifierInfo()->getPPKeywordID() ==
+ tok::pp_not_keyword))) {
+ // FIXME: Is this correct?
+ int WhitespaceLength = SourceMgr.getSpellingColumnNumber(
+ State.NextToken->WhitespaceRange.getEnd()) -
+ SourceMgr.getSpellingColumnNumber(
+ State.NextToken->WhitespaceRange.getBegin());
+ State.Column += WhitespaceLength + State.NextToken->ColumnWidth;
+ State.NextToken = State.NextToken->Next;
+ return 0;
+ }
+
+ unsigned Penalty = 0;
+ if (Newline)
+ Penalty = addTokenOnNewLine(State, DryRun);
+ else
+ addTokenOnCurrentLine(State, DryRun, ExtraSpaces);
+
+ return moveStateToNextToken(State, DryRun, Newline) + Penalty;
+}
+
+void ContinuationIndenter::addTokenOnCurrentLine(LineState &State, bool DryRun,
+ unsigned ExtraSpaces) {
+ FormatToken &Current = *State.NextToken;
+ const FormatToken &Previous = *State.NextToken->Previous;
+ if (Current.is(tok::equal) &&
+ (State.Line->First->is(tok::kw_for) || State.ParenLevel == 0) &&
+ State.Stack.back().VariablePos == 0) {
+ State.Stack.back().VariablePos = State.Column;
+ // Move over * and & if they are bound to the variable name.
+ const FormatToken *Tok = &Previous;
+ while (Tok && State.Stack.back().VariablePos >= Tok->ColumnWidth) {
+ State.Stack.back().VariablePos -= Tok->ColumnWidth;
+ if (Tok->SpacesRequiredBefore != 0)
+ break;
+ Tok = Tok->Previous;
+ }
+ if (Previous.PartOfMultiVariableDeclStmt)
+ State.Stack.back().LastSpace = State.Stack.back().VariablePos;
+ }
+
+ unsigned Spaces = Current.SpacesRequiredBefore + ExtraSpaces;
+
+ if (!DryRun)
+ Whitespaces.replaceWhitespace(Current, /*Newlines=*/0, /*IndentLevel=*/0,
+ Spaces, State.Column + Spaces);
+
+ if (Current.Type == TT_ObjCSelectorName && State.Stack.back().ColonPos == 0) {
+ if (State.Stack.back().Indent + Current.LongestObjCSelectorName >
+ State.Column + Spaces + Current.ColumnWidth)
+ State.Stack.back().ColonPos =
+ State.Stack.back().Indent + Current.LongestObjCSelectorName;
+ else
+ State.Stack.back().ColonPos = State.Column + Spaces + Current.ColumnWidth;
+ }
+
+ if (Previous.opensScope() && Previous.Type != TT_ObjCMethodExpr &&
+ Current.Type != TT_LineComment)
+ State.Stack.back().Indent = State.Column + Spaces;
+ if (State.Stack.back().AvoidBinPacking && startsNextParameter(Current, Style))
+ State.Stack.back().NoLineBreak = true;
+ if (startsSegmentOfBuilderTypeCall(Current))
+ State.Stack.back().ContainsUnwrappedBuilder = true;
+
+ State.Column += Spaces;
+ if (Current.is(tok::l_paren) && Previous.isOneOf(tok::kw_if, tok::kw_for))
+ // Treat the condition inside an if as if it was a second function
+ // parameter, i.e. let nested calls have a continuation indent.
+ State.Stack.back().LastSpace = State.Column + 1; // 1 is length of "(".
+ else if (Previous.is(tok::comma) || Previous.Type == TT_ObjCMethodExpr)
+ State.Stack.back().LastSpace = State.Column;
+ else if ((Previous.Type == TT_BinaryOperator ||
+ Previous.Type == TT_ConditionalExpr ||
+ Previous.Type == TT_UnaryOperator ||
+ Previous.Type == TT_CtorInitializerColon) &&
+ (Previous.getPrecedence() != prec::Assignment ||
+ Current.StartsBinaryExpression))
+ // Always indent relative to the RHS of the expression unless this is a
+ // simple assignment without binary expression on the RHS. Also indent
+ // relative to unary operators and the colons of constructor initializers.
+ State.Stack.back().LastSpace = State.Column;
+ else if (Previous.Type == TT_InheritanceColon) {
+ State.Stack.back().Indent = State.Column;
+ State.Stack.back().LastSpace = State.Column;
+ } else if (Previous.opensScope()) {
+ // If a function has a trailing call, indent all parameters from the
+ // opening parenthesis. This avoids confusing indents like:
+ // OuterFunction(InnerFunctionCall( // break
+ // ParameterToInnerFunction)) // break
+ // .SecondInnerFunctionCall();
+ bool HasTrailingCall = false;
+ if (Previous.MatchingParen) {
+ const FormatToken *Next = Previous.MatchingParen->getNextNonComment();
+ HasTrailingCall = Next && Next->isMemberAccess();
+ }
+ if (HasTrailingCall &&
+ State.Stack[State.Stack.size() - 2].CallContinuation == 0)
+ State.Stack.back().LastSpace = State.Column;
+ }
+}
+
+unsigned ContinuationIndenter::addTokenOnNewLine(LineState &State,
+ bool DryRun) {
+ FormatToken &Current = *State.NextToken;
+ const FormatToken &Previous = *State.NextToken->Previous;
+ // If we are continuing an expression, we want to use the continuation indent.
+ unsigned ContinuationIndent =
+ std::max(State.Stack.back().LastSpace, State.Stack.back().Indent) +
+ Style.ContinuationIndentWidth;
+ // Extra penalty that needs to be added because of the way certain line
+ // breaks are chosen.
+ unsigned Penalty = 0;
+
+ const FormatToken *PreviousNonComment =
+ State.NextToken->getPreviousNonComment();
+ // The first line break on any ParenLevel causes an extra penalty in order
+ // prefer similar line breaks.
+ if (!State.Stack.back().ContainsLineBreak)
+ Penalty += 15;
+ State.Stack.back().ContainsLineBreak = true;
+
+ Penalty += State.NextToken->SplitPenalty;
+
+ // Breaking before the first "<<" is generally not desirable if the LHS is
+ // short.
+ if (Current.is(tok::lessless) && State.Stack.back().FirstLessLess == 0 &&
+ State.Column <= Style.ColumnLimit / 2)
+ Penalty += Style.PenaltyBreakFirstLessLess;
+
+ if (Current.is(tok::l_brace) && Current.BlockKind == BK_Block) {
+ State.Column = State.FirstIndent;
+ } else if (Current.isOneOf(tok::r_brace, tok::r_square)) {
+ if (Current.closesBlockTypeList(Style) ||
+ (Current.MatchingParen &&
+ Current.MatchingParen->BlockKind == BK_BracedInit))
+ State.Column = State.Stack[State.Stack.size() - 2].LastSpace;
+ else
+ State.Column = State.FirstIndent;
+ } else if (Current.is(tok::string_literal) &&
+ State.StartOfStringLiteral != 0) {
+ State.Column = State.StartOfStringLiteral;
+ State.Stack.back().BreakBeforeParameter = true;
+ } else if (Current.is(tok::lessless) &&
+ State.Stack.back().FirstLessLess != 0) {
+ State.Column = State.Stack.back().FirstLessLess;
+ } else if (Current.isMemberAccess()) {
+ if (State.Stack.back().CallContinuation == 0) {
+ State.Column = ContinuationIndent;
+ State.Stack.back().CallContinuation = State.Column;
+ } else {
+ State.Column = State.Stack.back().CallContinuation;
+ }
+ } else if (State.Stack.back().QuestionColumn != 0 &&
+ (Current.Type == TT_ConditionalExpr ||
+ Previous.Type == TT_ConditionalExpr)) {
+ State.Column = State.Stack.back().QuestionColumn;
+ } else if (Previous.is(tok::comma) && State.Stack.back().VariablePos != 0) {
+ State.Column = State.Stack.back().VariablePos;
+ } else if ((PreviousNonComment &&
+ PreviousNonComment->ClosesTemplateDeclaration) ||
+ ((Current.Type == TT_StartOfName ||
+ Current.is(tok::kw_operator)) &&
+ State.ParenLevel == 0 &&
+ (!Style.IndentFunctionDeclarationAfterType ||
+ State.Line->StartsDefinition))) {
+ State.Column = State.Stack.back().Indent;
+ } else if (Current.Type == TT_ObjCSelectorName) {
+ if (State.Stack.back().ColonPos == 0) {
+ State.Stack.back().ColonPos =
+ State.Stack.back().Indent + Current.LongestObjCSelectorName;
+ State.Column = State.Stack.back().ColonPos - Current.ColumnWidth;
+ } else if (State.Stack.back().ColonPos > Current.ColumnWidth) {
+ State.Column = State.Stack.back().ColonPos - Current.ColumnWidth;
+ } else {
+ State.Column = State.Stack.back().Indent;
+ State.Stack.back().ColonPos = State.Column + Current.ColumnWidth;
+ }
+ } else if (Current.Type == TT_ArraySubscriptLSquare) {
+ if (State.Stack.back().StartOfArraySubscripts != 0)
+ State.Column = State.Stack.back().StartOfArraySubscripts;
+ else
+ State.Column = ContinuationIndent;
+ } else if (Current.Type == TT_StartOfName ||
+ Previous.isOneOf(tok::coloncolon, tok::equal) ||
+ Previous.Type == TT_ObjCMethodExpr) {
+ State.Column = ContinuationIndent;
+ } else if (Current.Type == TT_CtorInitializerColon) {
+ State.Column = State.FirstIndent + Style.ConstructorInitializerIndentWidth;
+ } else if (Current.Type == TT_CtorInitializerComma) {
+ State.Column = State.Stack.back().Indent;
+ } else {
+ State.Column = State.Stack.back().Indent;
+ // Ensure that we fall back to the continuation indent width instead of just
+ // flushing continuations left.
+ if (State.Column == State.FirstIndent &&
+ PreviousNonComment->isNot(tok::r_brace))
+ State.Column += Style.ContinuationIndentWidth;
+ }
+
+ if ((Previous.isOneOf(tok::comma, tok::semi) &&
+ !State.Stack.back().AvoidBinPacking) ||
+ Previous.Type == TT_BinaryOperator)
+ State.Stack.back().BreakBeforeParameter = false;
+ if (Previous.Type == TT_TemplateCloser && State.ParenLevel == 0)
+ State.Stack.back().BreakBeforeParameter = false;
+ if (Current.is(tok::question) ||
+ (PreviousNonComment && PreviousNonComment->is(tok::question)))
+ State.Stack.back().BreakBeforeParameter = true;
+
+ if (!DryRun) {
+ unsigned Newlines = 1;
+ if (Current.is(tok::comment))
+ Newlines = std::max(Newlines, std::min(Current.NewlinesBefore,
+ Style.MaxEmptyLinesToKeep + 1));
+ Whitespaces.replaceWhitespace(Current, Newlines,
+ State.Stack.back().IndentLevel, State.Column,
+ State.Column, State.Line->InPPDirective);
+ }
+
+ if (!Current.isTrailingComment())
+ State.Stack.back().LastSpace = State.Column;
+ if (Current.isMemberAccess())
+ State.Stack.back().LastSpace += Current.ColumnWidth;
+ State.StartOfLineLevel = State.ParenLevel;
+ State.LowestLevelOnLine = State.ParenLevel;
+
+ // Any break on this level means that the parent level has been broken
+ // and we need to avoid bin packing there.
+ for (unsigned i = 0, e = State.Stack.size() - 1; i != e; ++i) {
+ State.Stack[i].BreakBeforeParameter = true;
+ }
+ if (PreviousNonComment &&
+ !PreviousNonComment->isOneOf(tok::comma, tok::semi) &&
+ PreviousNonComment->Type != TT_TemplateCloser &&
+ PreviousNonComment->Type != TT_BinaryOperator &&
+ Current.Type != TT_BinaryOperator &&
+ !PreviousNonComment->opensScope())
+ State.Stack.back().BreakBeforeParameter = true;
+
+ // If we break after { or the [ of an array initializer, we should also break
+ // before the corresponding } or ].
+ if (Previous.is(tok::l_brace) || Previous.Type == TT_ArrayInitializerLSquare)
+ State.Stack.back().BreakBeforeClosingBrace = true;
+
+ if (State.Stack.back().AvoidBinPacking) {
+ // If we are breaking after '(', '{', '<', this is not bin packing
+ // unless AllowAllParametersOfDeclarationOnNextLine is false.
+ if (!(Previous.isOneOf(tok::l_paren, tok::l_brace) ||
+ Previous.Type == TT_BinaryOperator) ||
+ (!Style.AllowAllParametersOfDeclarationOnNextLine &&
+ State.Line->MustBeDeclaration))
+ State.Stack.back().BreakBeforeParameter = true;
+ }
+
+ return Penalty;
+}
+
+unsigned ContinuationIndenter::moveStateToNextToken(LineState &State,
+ bool DryRun, bool Newline) {
+ const FormatToken &Current = *State.NextToken;
+ assert(State.Stack.size());
+
+ if (Current.Type == TT_InheritanceColon)
+ State.Stack.back().AvoidBinPacking = true;
+ if (Current.is(tok::lessless) && State.Stack.back().FirstLessLess == 0)
+ State.Stack.back().FirstLessLess = State.Column;
+ if (Current.Type == TT_ArraySubscriptLSquare &&
+ State.Stack.back().StartOfArraySubscripts == 0)
+ State.Stack.back().StartOfArraySubscripts = State.Column;
+ if ((Current.is(tok::question) && Style.BreakBeforeTernaryOperators) ||
+ (Current.getPreviousNonComment() && Current.isNot(tok::colon) &&
+ Current.getPreviousNonComment()->is(tok::question) &&
+ !Style.BreakBeforeTernaryOperators))
+ State.Stack.back().QuestionColumn = State.Column;
+ if (!Current.opensScope() && !Current.closesScope())
+ State.LowestLevelOnLine =
+ std::min(State.LowestLevelOnLine, State.ParenLevel);
+ if (Current.isMemberAccess())
+ State.Stack.back().StartOfFunctionCall =
+ Current.LastInChainOfCalls ? 0 : State.Column + Current.ColumnWidth;
+ if (Current.Type == TT_CtorInitializerColon) {
+ // Indent 2 from the column, so:
+ // SomeClass::SomeClass()
+ // : First(...), ...
+ // Next(...)
+ // ^ line up here.
+ State.Stack.back().Indent =
+ State.Column + (Style.BreakConstructorInitializersBeforeComma ? 0 : 2);
+ if (Style.ConstructorInitializerAllOnOneLineOrOnePerLine)
+ State.Stack.back().AvoidBinPacking = true;
+ State.Stack.back().BreakBeforeParameter = false;
+ }
+
+ // In ObjC method declaration we align on the ":" of parameters, but we need
+ // to ensure that we indent parameters on subsequent lines by at least our
+ // continuation indent width.
+ if (Current.Type == TT_ObjCMethodSpecifier)
+ State.Stack.back().Indent += Style.ContinuationIndentWidth;
+
+ // Insert scopes created by fake parenthesis.
+ const FormatToken *Previous = Current.getPreviousNonComment();
+ // Don't add extra indentation for the first fake parenthesis after
+ // 'return', assignements or opening <({[. The indentation for these cases
+ // is special cased.
+ bool SkipFirstExtraIndent =
+ (Previous && (Previous->opensScope() || Previous->is(tok::kw_return) ||
+ Previous->getPrecedence() == prec::Assignment ||
+ Previous->Type == TT_ObjCMethodExpr));
+ for (SmallVectorImpl<prec::Level>::const_reverse_iterator
+ I = Current.FakeLParens.rbegin(),
+ E = Current.FakeLParens.rend();
+ I != E; ++I) {
+ ParenState NewParenState = State.Stack.back();
+ NewParenState.ContainsLineBreak = false;
+
+ // Indent from 'LastSpace' unless this the fake parentheses encapsulating a
+ // builder type call after 'return'. If such a call is line-wrapped, we
+ // commonly just want to indent from the start of the line.
+ if (!Previous || Previous->isNot(tok::kw_return) || *I > 0)
+ NewParenState.Indent =
+ std::max(std::max(State.Column, NewParenState.Indent),
+ State.Stack.back().LastSpace);
+
+ // Do not indent relative to the fake parentheses inserted for "." or "->".
+ // This is a special case to make the following to statements consistent:
+ // OuterFunction(InnerFunctionCall( // break
+ // ParameterToInnerFunction));
+ // OuterFunction(SomeObject.InnerFunctionCall( // break
+ // ParameterToInnerFunction));
+ if (*I > prec::Unknown)
+ NewParenState.LastSpace = std::max(NewParenState.LastSpace, State.Column);
+
+ // Always indent conditional expressions. Never indent expression where
+ // the 'operator' is ',', ';' or an assignment (i.e. *I <=
+ // prec::Assignment) as those have different indentation rules. Indent
+ // other expression, unless the indentation needs to be skipped.
+ if (*I == prec::Conditional ||
+ (!SkipFirstExtraIndent && *I > prec::Assignment &&
+ !Style.BreakBeforeBinaryOperators))
+ NewParenState.Indent += Style.ContinuationIndentWidth;
+ if ((Previous && !Previous->opensScope()) || *I > prec::Comma)
+ NewParenState.BreakBeforeParameter = false;
+ State.Stack.push_back(NewParenState);
+ SkipFirstExtraIndent = false;
+ }
+
+ // If we encounter an opening (, [, { or <, we add a level to our stacks to
+ // prepare for the following tokens.
+ if (Current.opensScope()) {
+ unsigned NewIndent;
+ unsigned NewIndentLevel = State.Stack.back().IndentLevel;
+ bool AvoidBinPacking;
+ bool BreakBeforeParameter = false;
+ if (Current.is(tok::l_brace) ||
+ Current.Type == TT_ArrayInitializerLSquare) {
+ if (Current.MatchingParen && Current.BlockKind == BK_Block) {
+ // If this is an l_brace starting a nested block, we pretend (wrt. to
+ // indentation) that we already consumed the corresponding r_brace.
+ // Thus, we remove all ParenStates caused bake fake parentheses that end
+ // at the r_brace. The net effect of this is that we don't indent
+ // relative to the l_brace, if the nested block is the last parameter of
+ // a function. For example, this formats:
+ //
+ // SomeFunction(a, [] {
+ // f(); // break
+ // });
+ //
+ // instead of:
+ // SomeFunction(a, [] {
+ // f(); // break
+ // });
+ for (unsigned i = 0; i != Current.MatchingParen->FakeRParens; ++i)
+ State.Stack.pop_back();
+ NewIndent = State.Stack.back().LastSpace + Style.IndentWidth;
+ ++NewIndentLevel;
+ BreakBeforeParameter = true;
+ } else {
+ NewIndent = State.Stack.back().LastSpace;
+ if (Current.opensBlockTypeList(Style)) {
+ NewIndent += Style.IndentWidth;
+ ++NewIndentLevel;
+ } else {
+ NewIndent += Style.ContinuationIndentWidth;
+ }
+ }
+ const FormatToken *NextNoComment = Current.getNextNonComment();
+ AvoidBinPacking = Current.BlockKind == BK_Block ||
+ Current.Type == TT_ArrayInitializerLSquare ||
+ Current.Type == TT_DictLiteral ||
+ (NextNoComment &&
+ NextNoComment->Type == TT_DesignatedInitializerPeriod);
+ } else {
+ NewIndent = Style.ContinuationIndentWidth +
+ std::max(State.Stack.back().LastSpace,
+ State.Stack.back().StartOfFunctionCall);
+ AvoidBinPacking = !Style.BinPackParameters ||
+ (Style.ExperimentalAutoDetectBinPacking &&
+ (Current.PackingKind == PPK_OnePerLine ||
+ (!BinPackInconclusiveFunctions &&
+ Current.PackingKind == PPK_Inconclusive)));
+ // If this '[' opens an ObjC call, determine whether all parameters fit
+ // into one line and put one per line if they don't.
+ if (Current.Type == TT_ObjCMethodExpr &&
+ getLengthToMatchingParen(Current) + State.Column >
+ getColumnLimit(State))
+ BreakBeforeParameter = true;
+ }
+
+ bool NoLineBreak = State.Stack.back().NoLineBreak ||
+ (Current.Type == TT_TemplateOpener &&
+ State.Stack.back().ContainsUnwrappedBuilder);
+ State.Stack.push_back(ParenState(NewIndent, NewIndentLevel,
+ State.Stack.back().LastSpace,
+ AvoidBinPacking, NoLineBreak));
+ State.Stack.back().BreakBeforeParameter = BreakBeforeParameter;
+ ++State.ParenLevel;
+ }
+
+ // If we encounter a closing ), ], } or >, we can remove a level from our
+ // stacks.
+ if (State.Stack.size() > 1 &&
+ (Current.isOneOf(tok::r_paren, tok::r_square) ||
+ (Current.is(tok::r_brace) && State.NextToken != State.Line->First) ||
+ State.NextToken->Type == TT_TemplateCloser)) {
+ State.Stack.pop_back();
+ --State.ParenLevel;
+ }
+ if (Current.is(tok::r_square)) {
+ // If this ends the array subscript expr, reset the corresponding value.
+ const FormatToken *NextNonComment = Current.getNextNonComment();
+ if (NextNonComment && NextNonComment->isNot(tok::l_square))
+ State.Stack.back().StartOfArraySubscripts = 0;
+ }
+
+ // Remove scopes created by fake parenthesis.
+ if (Current.isNot(tok::r_brace) ||
+ (Current.MatchingParen && Current.MatchingParen->BlockKind != BK_Block)) {
+ // Don't remove FakeRParens attached to r_braces that surround nested blocks
+ // as they will have been removed early (see above).
+ for (unsigned i = 0, e = Current.FakeRParens; i != e; ++i) {
+ unsigned VariablePos = State.Stack.back().VariablePos;
+ State.Stack.pop_back();
+ State.Stack.back().VariablePos = VariablePos;
+ }
+ }
+
+ if (Current.is(tok::string_literal) && State.StartOfStringLiteral == 0) {
+ State.StartOfStringLiteral = State.Column;
+ } else if (!Current.isOneOf(tok::comment, tok::identifier, tok::hash,
+ tok::string_literal)) {
+ State.StartOfStringLiteral = 0;
+ }
+
+ State.Column += Current.ColumnWidth;
+ State.NextToken = State.NextToken->Next;
+ unsigned Penalty = breakProtrudingToken(Current, State, DryRun);
+ if (State.Column > getColumnLimit(State)) {
+ unsigned ExcessCharacters = State.Column - getColumnLimit(State);
+ Penalty += Style.PenaltyExcessCharacter * ExcessCharacters;
+ }
+
+ // If the previous has a special role, let it consume tokens as appropriate.
+ // It is necessary to start at the previous token for the only implemented
+ // role (comma separated list). That way, the decision whether or not to break
+ // after the "{" is already done and both options are tried and evaluated.
+ // FIXME: This is ugly, find a better way.
+ if (Previous && Previous->Role)
+ Penalty += Previous->Role->format(State, this, DryRun);
+
+ return Penalty;
+}
+
+unsigned ContinuationIndenter::addMultilineToken(const FormatToken &Current,
+ LineState &State) {
+ // Break before further function parameters on all levels.
+ for (unsigned i = 0, e = State.Stack.size(); i != e; ++i)
+ State.Stack[i].BreakBeforeParameter = true;
+
+ unsigned ColumnsUsed = State.Column;
+ // We can only affect layout of the first and the last line, so the penalty
+ // for all other lines is constant, and we ignore it.
+ State.Column = Current.LastLineColumnWidth;
+
+ if (ColumnsUsed > getColumnLimit(State))
+ return Style.PenaltyExcessCharacter * (ColumnsUsed - getColumnLimit(State));
+ return 0;
+}
+
+static bool getRawStringLiteralPrefixPostfix(StringRef Text,
+ StringRef &Prefix,
+ StringRef &Postfix) {
+ if (Text.startswith(Prefix = "R\"") || Text.startswith(Prefix = "uR\"") ||
+ Text.startswith(Prefix = "UR\"") || Text.startswith(Prefix = "u8R\"") ||
+ Text.startswith(Prefix = "LR\"")) {
+ size_t ParenPos = Text.find('(');
+ if (ParenPos != StringRef::npos) {
+ StringRef Delimiter =
+ Text.substr(Prefix.size(), ParenPos - Prefix.size());
+ Prefix = Text.substr(0, ParenPos + 1);
+ Postfix = Text.substr(Text.size() - 2 - Delimiter.size());
+ return Postfix.front() == ')' && Postfix.back() == '"' &&
+ Postfix.substr(1).startswith(Delimiter);
+ }
+ }
+ return false;
+}
+
+unsigned ContinuationIndenter::breakProtrudingToken(const FormatToken &Current,
+ LineState &State,
+ bool DryRun) {
+ // Don't break multi-line tokens other than block comments. Instead, just
+ // update the state.
+ if (Current.Type != TT_BlockComment && Current.IsMultiline)
+ return addMultilineToken(Current, State);
+
+ // Don't break implicit string literals.
+ if (Current.Type == TT_ImplicitStringLiteral)
+ return 0;
+
+ if (!Current.isOneOf(tok::string_literal, tok::wide_string_literal,
+ tok::utf8_string_literal, tok::utf16_string_literal,
+ tok::utf32_string_literal, tok::comment))
+ return 0;
+
+ llvm::OwningPtr<BreakableToken> Token;
+ unsigned StartColumn = State.Column - Current.ColumnWidth;
+ unsigned ColumnLimit = getColumnLimit(State);
+
+ if (Current.isOneOf(tok::string_literal, tok::wide_string_literal,
+ tok::utf8_string_literal, tok::utf16_string_literal,
+ tok::utf32_string_literal) &&
+ Current.Type != TT_ImplicitStringLiteral) {
+ // Don't break string literals inside preprocessor directives (except for
+ // #define directives, as their contents are stored in separate lines and
+ // are not affected by this check).
+ // This way we avoid breaking code with line directives and unknown
+ // preprocessor directives that contain long string literals.
+ if (State.Line->Type == LT_PreprocessorDirective)
+ return 0;
+ // Exempts unterminated string literals from line breaking. The user will
+ // likely want to terminate the string before any line breaking is done.
+ if (Current.IsUnterminatedLiteral)
+ return 0;
+
+ StringRef Text = Current.TokenText;
+ StringRef Prefix;
+ StringRef Postfix;
+ // FIXME: Handle whitespace between '_T', '(', '"..."', and ')'.
+ // FIXME: Store Prefix and Suffix (or PrefixLength and SuffixLength to
+ // reduce the overhead) for each FormatToken, which is a string, so that we
+ // don't run multiple checks here on the hot path.
+ if ((Text.endswith(Postfix = "\"") &&
+ (Text.startswith(Prefix = "\"") || Text.startswith(Prefix = "u\"") ||
+ Text.startswith(Prefix = "U\"") || Text.startswith(Prefix = "u8\"") ||
+ Text.startswith(Prefix = "L\""))) ||
+ (Text.startswith(Prefix = "_T(\"") && Text.endswith(Postfix = "\")")) ||
+ getRawStringLiteralPrefixPostfix(Text, Prefix, Postfix)) {
+ Token.reset(new BreakableStringLiteral(
+ Current, State.Line->Level, StartColumn, Prefix, Postfix,
+ State.Line->InPPDirective, Encoding, Style));
+ } else {
+ return 0;
+ }
+ } else if (Current.Type == TT_BlockComment && Current.isTrailingComment()) {
+ Token.reset(new BreakableBlockComment(
+ Current, State.Line->Level, StartColumn, Current.OriginalColumn,
+ !Current.Previous, State.Line->InPPDirective, Encoding, Style));
+ } else if (Current.Type == TT_LineComment &&
+ (Current.Previous == NULL ||
+ Current.Previous->Type != TT_ImplicitStringLiteral)) {
+ Token.reset(new BreakableLineComment(Current, State.Line->Level,
+ StartColumn, /*InPPDirective=*/false,
+ Encoding, Style));
+ // We don't insert backslashes when breaking line comments.
+ ColumnLimit = Style.ColumnLimit;
+ } else {
+ return 0;
+ }
+ if (Current.UnbreakableTailLength >= ColumnLimit)
+ return 0;
+
+ unsigned RemainingSpace = ColumnLimit - Current.UnbreakableTailLength;
+ bool BreakInserted = false;
+ unsigned Penalty = 0;
+ unsigned RemainingTokenColumns = 0;
+ for (unsigned LineIndex = 0, EndIndex = Token->getLineCount();
+ LineIndex != EndIndex; ++LineIndex) {
+ if (!DryRun)
+ Token->replaceWhitespaceBefore(LineIndex, Whitespaces);
+ unsigned TailOffset = 0;
+ RemainingTokenColumns =
+ Token->getLineLengthAfterSplit(LineIndex, TailOffset, StringRef::npos);
+ while (RemainingTokenColumns > RemainingSpace) {
+ BreakableToken::Split Split =
+ Token->getSplit(LineIndex, TailOffset, ColumnLimit);
+ if (Split.first == StringRef::npos) {
+ // The last line's penalty is handled in addNextStateToQueue().
+ if (LineIndex < EndIndex - 1)
+ Penalty += Style.PenaltyExcessCharacter *
+ (RemainingTokenColumns - RemainingSpace);
+ break;
+ }
+ assert(Split.first != 0);
+ unsigned NewRemainingTokenColumns = Token->getLineLengthAfterSplit(
+ LineIndex, TailOffset + Split.first + Split.second, StringRef::npos);
+
+ // We can remove extra whitespace instead of breaking the line.
+ if (RemainingTokenColumns + 1 - Split.second <= RemainingSpace) {
+ RemainingTokenColumns = 0;
+ if (!DryRun)
+ Token->replaceWhitespace(LineIndex, TailOffset, Split, Whitespaces);
+ break;
+ }
+
+ assert(NewRemainingTokenColumns < RemainingTokenColumns);
+ if (!DryRun)
+ Token->insertBreak(LineIndex, TailOffset, Split, Whitespaces);
+ Penalty += Current.SplitPenalty;
+ unsigned ColumnsUsed =
+ Token->getLineLengthAfterSplit(LineIndex, TailOffset, Split.first);
+ if (ColumnsUsed > ColumnLimit) {
+ Penalty += Style.PenaltyExcessCharacter * (ColumnsUsed - ColumnLimit);
+ }
+ TailOffset += Split.first + Split.second;
+ RemainingTokenColumns = NewRemainingTokenColumns;
+ BreakInserted = true;
+ }
+ }
+
+ State.Column = RemainingTokenColumns;
+
+ if (BreakInserted) {
+ // If we break the token inside a parameter list, we need to break before
+ // the next parameter on all levels, so that the next parameter is clearly
+ // visible. Line comments already introduce a break.
+ if (Current.Type != TT_LineComment) {
+ for (unsigned i = 0, e = State.Stack.size(); i != e; ++i)
+ State.Stack[i].BreakBeforeParameter = true;
+ }
+
+ Penalty += Current.is(tok::string_literal) ? Style.PenaltyBreakString
+ : Style.PenaltyBreakComment;
+
+ State.Stack.back().LastSpace = StartColumn;
+ }
+ return Penalty;
+}
+
+unsigned ContinuationIndenter::getColumnLimit(const LineState &State) const {
+ // In preprocessor directives reserve two chars for trailing " \"
+ return Style.ColumnLimit - (State.Line->InPPDirective ? 2 : 0);
+}
+
+bool ContinuationIndenter::NextIsMultilineString(const LineState &State) {
+ const FormatToken &Current = *State.NextToken;
+ if (!Current.is(tok::string_literal))
+ return false;
+ // We never consider raw string literals "multiline" for the purpose of
+ // AlwaysBreakBeforeMultilineStrings implementation.
+ if (Current.TokenText.startswith("R\""))
+ return false;
+ if (Current.IsMultiline)
+ return true;
+ if (Current.getNextNonComment() &&
+ Current.getNextNonComment()->is(tok::string_literal))
+ return true; // Implicit concatenation.
+ if (State.Column + Current.ColumnWidth + Current.UnbreakableTailLength >
+ Style.ColumnLimit)
+ return true; // String will be split.
+ return false;
+}
+
+} // namespace format
+} // namespace clang
diff --git a/lib/Format/ContinuationIndenter.h b/lib/Format/ContinuationIndenter.h
new file mode 100644
index 000000000000..b31756583389
--- /dev/null
+++ b/lib/Format/ContinuationIndenter.h
@@ -0,0 +1,327 @@
+//===--- ContinuationIndenter.h - Format C++ code ---------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file implements an indenter that manages the indentation of
+/// continuations.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_FORMAT_CONTINUATION_INDENTER_H
+#define LLVM_CLANG_FORMAT_CONTINUATION_INDENTER_H
+
+#include "Encoding.h"
+#include "clang/Format/Format.h"
+
+namespace clang {
+class SourceManager;
+
+namespace format {
+
+class AnnotatedLine;
+struct FormatToken;
+struct LineState;
+struct ParenState;
+class WhitespaceManager;
+
+class ContinuationIndenter {
+public:
+ /// \brief Constructs a \c ContinuationIndenter to format \p Line starting in
+ /// column \p FirstIndent.
+ ContinuationIndenter(const FormatStyle &Style, SourceManager &SourceMgr,
+ WhitespaceManager &Whitespaces,
+ encoding::Encoding Encoding,
+ bool BinPackInconclusiveFunctions);
+
+ /// \brief Get the initial state, i.e. the state after placing \p Line's
+ /// first token at \p FirstIndent.
+ LineState getInitialState(unsigned FirstIndent, const AnnotatedLine *Line,
+ bool DryRun);
+
+ // FIXME: canBreak and mustBreak aren't strictly indentation-related. Find a
+ // better home.
+ /// \brief Returns \c true, if a line break after \p State is allowed.
+ bool canBreak(const LineState &State);
+
+ /// \brief Returns \c true, if a line break after \p State is mandatory.
+ bool mustBreak(const LineState &State);
+
+ /// \brief Appends the next token to \p State and updates information
+ /// necessary for indentation.
+ ///
+ /// Puts the token on the current line if \p Newline is \c false and adds a
+ /// line break and necessary indentation otherwise.
+ ///
+ /// If \p DryRun is \c false, also creates and stores the required
+ /// \c Replacement.
+ unsigned addTokenToState(LineState &State, bool Newline, bool DryRun,
+ unsigned ExtraSpaces = 0);
+
+ /// \brief Get the column limit for this line. This is the style's column
+ /// limit, potentially reduced for preprocessor definitions.
+ unsigned getColumnLimit(const LineState &State) const;
+
+private:
+ /// \brief Mark the next token as consumed in \p State and modify its stacks
+ /// accordingly.
+ unsigned moveStateToNextToken(LineState &State, bool DryRun, bool Newline);
+
+ /// \brief If the current token sticks out over the end of the line, break
+ /// it if possible.
+ ///
+ /// \returns An extra penalty if a token was broken, otherwise 0.
+ ///
+ /// The returned penalty will cover the cost of the additional line breaks and
+ /// column limit violation in all lines except for the last one. The penalty
+ /// for the column limit violation in the last line (and in single line
+ /// tokens) is handled in \c addNextStateToQueue.
+ unsigned breakProtrudingToken(const FormatToken &Current, LineState &State,
+ bool DryRun);
+
+ /// \brief Appends the next token to \p State and updates information
+ /// necessary for indentation.
+ ///
+ /// Puts the token on the current line.
+ ///
+ /// If \p DryRun is \c false, also creates and stores the required
+ /// \c Replacement.
+ void addTokenOnCurrentLine(LineState &State, bool DryRun,
+ unsigned ExtraSpaces);
+
+ /// \brief Appends the next token to \p State and updates information
+ /// necessary for indentation.
+ ///
+ /// Adds a line break and necessary indentation.
+ ///
+ /// If \p DryRun is \c false, also creates and stores the required
+ /// \c Replacement.
+ unsigned addTokenOnNewLine(LineState &State, bool DryRun);
+
+ /// \brief Adds a multiline token to the \p State.
+ ///
+ /// \returns Extra penalty for the first line of the literal: last line is
+ /// handled in \c addNextStateToQueue, and the penalty for other lines doesn't
+ /// matter, as we don't change them.
+ unsigned addMultilineToken(const FormatToken &Current, LineState &State);
+
+ /// \brief Returns \c true if the next token starts a multiline string
+ /// literal.
+ ///
+ /// This includes implicitly concatenated strings, strings that will be broken
+ /// by clang-format and string literals with escaped newlines.
+ bool NextIsMultilineString(const LineState &State);
+
+ FormatStyle Style;
+ SourceManager &SourceMgr;
+ WhitespaceManager &Whitespaces;
+ encoding::Encoding Encoding;
+ bool BinPackInconclusiveFunctions;
+};
+
+struct ParenState {
+ ParenState(unsigned Indent, unsigned IndentLevel, unsigned LastSpace,
+ bool AvoidBinPacking, bool NoLineBreak)
+ : Indent(Indent), IndentLevel(IndentLevel), LastSpace(LastSpace),
+ FirstLessLess(0), BreakBeforeClosingBrace(false), QuestionColumn(0),
+ AvoidBinPacking(AvoidBinPacking), BreakBeforeParameter(false),
+ NoLineBreak(NoLineBreak), ColonPos(0), StartOfFunctionCall(0),
+ StartOfArraySubscripts(0), NestedNameSpecifierContinuation(0),
+ CallContinuation(0), VariablePos(0), ContainsLineBreak(false),
+ ContainsUnwrappedBuilder(0) {}
+
+ /// \brief The position to which a specific parenthesis level needs to be
+ /// indented.
+ unsigned Indent;
+
+ /// \brief The number of indentation levels of the block.
+ unsigned IndentLevel;
+
+ /// \brief The position of the last space on each level.
+ ///
+ /// Used e.g. to break like:
+ /// functionCall(Parameter, otherCall(
+ /// OtherParameter));
+ unsigned LastSpace;
+
+ /// \brief The position the first "<<" operator encountered on each level.
+ ///
+ /// Used to align "<<" operators. 0 if no such operator has been encountered
+ /// on a level.
+ unsigned FirstLessLess;
+
+ /// \brief Whether a newline needs to be inserted before the block's closing
+ /// brace.
+ ///
+ /// We only want to insert a newline before the closing brace if there also
+ /// was a newline after the beginning left brace.
+ bool BreakBeforeClosingBrace;
+
+ /// \brief The column of a \c ? in a conditional expression;
+ unsigned QuestionColumn;
+
+ /// \brief Avoid bin packing, i.e. multiple parameters/elements on multiple
+ /// lines, in this context.
+ bool AvoidBinPacking;
+
+ /// \brief Break after the next comma (or all the commas in this context if
+ /// \c AvoidBinPacking is \c true).
+ bool BreakBeforeParameter;
+
+ /// \brief Line breaking in this context would break a formatting rule.
+ bool NoLineBreak;
+
+ /// \brief The position of the colon in an ObjC method declaration/call.
+ unsigned ColonPos;
+
+ /// \brief The start of the most recent function in a builder-type call.
+ unsigned StartOfFunctionCall;
+
+ /// \brief Contains the start of array subscript expressions, so that they
+ /// can be aligned.
+ unsigned StartOfArraySubscripts;
+
+ /// \brief If a nested name specifier was broken over multiple lines, this
+ /// contains the start column of the second line. Otherwise 0.
+ unsigned NestedNameSpecifierContinuation;
+
+ /// \brief If a call expression was broken over multiple lines, this
+ /// contains the start column of the second line. Otherwise 0.
+ unsigned CallContinuation;
+
+ /// \brief The column of the first variable name in a variable declaration.
+ ///
+ /// Used to align further variables if necessary.
+ unsigned VariablePos;
+
+ /// \brief \c true if this \c ParenState already contains a line-break.
+ ///
+ /// The first line break in a certain \c ParenState causes extra penalty so
+ /// that clang-format prefers similar breaks, i.e. breaks in the same
+ /// parenthesis.
+ bool ContainsLineBreak;
+
+ /// \brief \c true if this \c ParenState contains multiple segments of a
+ /// builder-type call on one line.
+ bool ContainsUnwrappedBuilder;
+
+ bool operator<(const ParenState &Other) const {
+ if (Indent != Other.Indent)
+ return Indent < Other.Indent;
+ if (LastSpace != Other.LastSpace)
+ return LastSpace < Other.LastSpace;
+ if (FirstLessLess != Other.FirstLessLess)
+ return FirstLessLess < Other.FirstLessLess;
+ if (BreakBeforeClosingBrace != Other.BreakBeforeClosingBrace)
+ return BreakBeforeClosingBrace;
+ if (QuestionColumn != Other.QuestionColumn)
+ return QuestionColumn < Other.QuestionColumn;
+ if (AvoidBinPacking != Other.AvoidBinPacking)
+ return AvoidBinPacking;
+ if (BreakBeforeParameter != Other.BreakBeforeParameter)
+ return BreakBeforeParameter;
+ if (NoLineBreak != Other.NoLineBreak)
+ return NoLineBreak;
+ if (ColonPos != Other.ColonPos)
+ return ColonPos < Other.ColonPos;
+ if (StartOfFunctionCall != Other.StartOfFunctionCall)
+ return StartOfFunctionCall < Other.StartOfFunctionCall;
+ if (StartOfArraySubscripts != Other.StartOfArraySubscripts)
+ return StartOfArraySubscripts < Other.StartOfArraySubscripts;
+ if (CallContinuation != Other.CallContinuation)
+ return CallContinuation < Other.CallContinuation;
+ if (VariablePos != Other.VariablePos)
+ return VariablePos < Other.VariablePos;
+ if (ContainsLineBreak != Other.ContainsLineBreak)
+ return ContainsLineBreak < Other.ContainsLineBreak;
+ if (ContainsUnwrappedBuilder != Other.ContainsUnwrappedBuilder)
+ return ContainsUnwrappedBuilder < Other.ContainsUnwrappedBuilder;
+ return false;
+ }
+};
+
+/// \brief The current state when indenting a unwrapped line.
+///
+/// As the indenting tries different combinations this is copied by value.
+struct LineState {
+ /// \brief The number of used columns in the current line.
+ unsigned Column;
+
+ /// \brief The token that needs to be next formatted.
+ FormatToken *NextToken;
+
+ /// \brief \c true if this line contains a continued for-loop section.
+ bool LineContainsContinuedForLoopSection;
+
+ /// \brief The level of nesting inside (), [], <> and {}.
+ unsigned ParenLevel;
+
+ /// \brief The \c ParenLevel at the start of this line.
+ unsigned StartOfLineLevel;
+
+ /// \brief The lowest \c ParenLevel on the current line.
+ unsigned LowestLevelOnLine;
+
+ /// \brief The start column of the string literal, if we're in a string
+ /// literal sequence, 0 otherwise.
+ unsigned StartOfStringLiteral;
+
+ /// \brief A stack keeping track of properties applying to parenthesis
+ /// levels.
+ std::vector<ParenState> Stack;
+
+ /// \brief Ignore the stack of \c ParenStates for state comparison.
+ ///
+ /// In long and deeply nested unwrapped lines, the current algorithm can
+ /// be insufficient for finding the best formatting with a reasonable amount
+ /// of time and memory. Setting this flag will effectively lead to the
+ /// algorithm not analyzing some combinations. However, these combinations
+ /// rarely contain the optimal solution: In short, accepting a higher
+ /// penalty early would need to lead to different values in the \c
+ /// ParenState stack (in an otherwise identical state) and these different
+ /// values would need to lead to a significant amount of avoided penalty
+ /// later.
+ ///
+ /// FIXME: Come up with a better algorithm instead.
+ bool IgnoreStackForComparison;
+
+ /// \brief The indent of the first token.
+ unsigned FirstIndent;
+
+ /// \brief The line that is being formatted.
+ ///
+ /// Does not need to be considered for memoization because it doesn't change.
+ const AnnotatedLine *Line;
+
+ /// \brief Comparison operator to be able to used \c LineState in \c map.
+ bool operator<(const LineState &Other) const {
+ if (NextToken != Other.NextToken)
+ return NextToken < Other.NextToken;
+ if (Column != Other.Column)
+ return Column < Other.Column;
+ if (LineContainsContinuedForLoopSection !=
+ Other.LineContainsContinuedForLoopSection)
+ return LineContainsContinuedForLoopSection;
+ if (ParenLevel != Other.ParenLevel)
+ return ParenLevel < Other.ParenLevel;
+ if (StartOfLineLevel != Other.StartOfLineLevel)
+ return StartOfLineLevel < Other.StartOfLineLevel;
+ if (LowestLevelOnLine != Other.LowestLevelOnLine)
+ return LowestLevelOnLine < Other.LowestLevelOnLine;
+ if (StartOfStringLiteral != Other.StartOfStringLiteral)
+ return StartOfStringLiteral < Other.StartOfStringLiteral;
+ if (IgnoreStackForComparison || Other.IgnoreStackForComparison)
+ return false;
+ return Stack < Other.Stack;
+ }
+};
+
+} // end namespace format
+} // end namespace clang
+
+#endif // LLVM_CLANG_FORMAT_CONTINUATION_INDENTER_H
diff --git a/lib/Format/Encoding.h b/lib/Format/Encoding.h
new file mode 100644
index 000000000000..356334d5376f
--- /dev/null
+++ b/lib/Format/Encoding.h
@@ -0,0 +1,144 @@
+//===--- Encoding.h - Format C++ code -------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief Contains functions for text encoding manipulation. Supports UTF-8,
+/// 8-bit encodings and escape sequences in C++ string literals.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_FORMAT_ENCODING_H
+#define LLVM_CLANG_FORMAT_ENCODING_H
+
+#include "clang/Basic/LLVM.h"
+#include "llvm/Support/ConvertUTF.h"
+#include "llvm/Support/Unicode.h"
+
+namespace clang {
+namespace format {
+namespace encoding {
+
+enum Encoding {
+ Encoding_UTF8,
+ Encoding_Unknown // We treat all other encodings as 8-bit encodings.
+};
+
+/// \brief Detects encoding of the Text. If the Text can be decoded using UTF-8,
+/// it is considered UTF8, otherwise we treat it as some 8-bit encoding.
+inline Encoding detectEncoding(StringRef Text) {
+ const UTF8 *Ptr = reinterpret_cast<const UTF8 *>(Text.begin());
+ const UTF8 *BufEnd = reinterpret_cast<const UTF8 *>(Text.end());
+ if (::isLegalUTF8String(&Ptr, BufEnd))
+ return Encoding_UTF8;
+ return Encoding_Unknown;
+}
+
+inline unsigned getCodePointCountUTF8(StringRef Text) {
+ unsigned CodePoints = 0;
+ for (size_t i = 0, e = Text.size(); i < e; i += getNumBytesForUTF8(Text[i])) {
+ ++CodePoints;
+ }
+ return CodePoints;
+}
+
+/// \brief Gets the number of code points in the Text using the specified
+/// Encoding.
+inline unsigned getCodePointCount(StringRef Text, Encoding Encoding) {
+ switch (Encoding) {
+ case Encoding_UTF8:
+ return getCodePointCountUTF8(Text);
+ default:
+ return Text.size();
+ }
+}
+
+/// \brief Returns the number of columns required to display the \p Text on a
+/// generic Unicode-capable terminal. Text is assumed to use the specified
+/// \p Encoding.
+inline unsigned columnWidth(StringRef Text, Encoding Encoding) {
+ if (Encoding == Encoding_UTF8) {
+ int ContentWidth = llvm::sys::unicode::columnWidthUTF8(Text);
+ if (ContentWidth >= 0)
+ return ContentWidth;
+ }
+ return Text.size();
+}
+
+/// \brief Returns the number of columns required to display the \p Text,
+/// starting from the \p StartColumn on a terminal with the \p TabWidth. The
+/// text is assumed to use the specified \p Encoding.
+inline unsigned columnWidthWithTabs(StringRef Text, unsigned StartColumn,
+ unsigned TabWidth, Encoding Encoding) {
+ unsigned TotalWidth = 0;
+ StringRef Tail = Text;
+ for (;;) {
+ StringRef::size_type TabPos = Tail.find('\t');
+ if (TabPos == StringRef::npos)
+ return TotalWidth + columnWidth(Tail, Encoding);
+ int Width = columnWidth(Tail.substr(0, TabPos), Encoding);
+ assert(Width >= 0);
+ TotalWidth += Width;
+ TotalWidth += TabWidth - (TotalWidth + StartColumn) % TabWidth;
+ Tail = Tail.substr(TabPos + 1);
+ }
+}
+
+/// \brief Gets the number of bytes in a sequence representing a single
+/// codepoint and starting with FirstChar in the specified Encoding.
+inline unsigned getCodePointNumBytes(char FirstChar, Encoding Encoding) {
+ switch (Encoding) {
+ case Encoding_UTF8:
+ return getNumBytesForUTF8(FirstChar);
+ default:
+ return 1;
+ }
+}
+
+inline bool isOctDigit(char c) { return '0' <= c && c <= '7'; }
+
+inline bool isHexDigit(char c) {
+ return ('0' <= c && c <= '9') || ('a' <= c && c <= 'f') ||
+ ('A' <= c && c <= 'F');
+}
+
+/// \brief Gets the length of an escape sequence inside a C++ string literal.
+/// Text should span from the beginning of the escape sequence (starting with a
+/// backslash) to the end of the string literal.
+inline unsigned getEscapeSequenceLength(StringRef Text) {
+ assert(Text[0] == '\\');
+ if (Text.size() < 2)
+ return 1;
+
+ switch (Text[1]) {
+ case 'u':
+ return 6;
+ case 'U':
+ return 10;
+ case 'x': {
+ unsigned I = 2; // Point after '\x'.
+ while (I < Text.size() && isHexDigit(Text[I]))
+ ++I;
+ return I;
+ }
+ default:
+ if (isOctDigit(Text[1])) {
+ unsigned I = 1;
+ while (I < Text.size() && I < 4 && isOctDigit(Text[I]))
+ ++I;
+ return I;
+ }
+ return 2;
+ }
+}
+
+} // namespace encoding
+} // namespace format
+} // namespace clang
+
+#endif // LLVM_CLANG_FORMAT_ENCODING_H
diff --git a/lib/Format/Format.cpp b/lib/Format/Format.cpp
index a0557f781824..01c122ecc7bf 100644
--- a/lib/Format/Format.cpp
+++ b/lib/Format/Format.cpp
@@ -15,43 +15,219 @@
#define DEBUG_TYPE "format-formatter"
-#include "BreakableToken.h"
+#include "ContinuationIndenter.h"
#include "TokenAnnotator.h"
#include "UnwrappedLineParser.h"
#include "WhitespaceManager.h"
#include "clang/Basic/Diagnostic.h"
-#include "clang/Basic/OperatorPrecedence.h"
#include "clang/Basic/SourceManager.h"
#include "clang/Format/Format.h"
-#include "clang/Frontend/TextDiagnosticPrinter.h"
#include "clang/Lex/Lexer.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/Support/Allocator.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/YAMLTraits.h"
+#include "llvm/Support/Path.h"
#include <queue>
#include <string>
+namespace llvm {
+namespace yaml {
+template <>
+struct ScalarEnumerationTraits<clang::format::FormatStyle::LanguageStandard> {
+ static void enumeration(IO &IO,
+ clang::format::FormatStyle::LanguageStandard &Value) {
+ IO.enumCase(Value, "Cpp03", clang::format::FormatStyle::LS_Cpp03);
+ IO.enumCase(Value, "C++03", clang::format::FormatStyle::LS_Cpp03);
+ IO.enumCase(Value, "Cpp11", clang::format::FormatStyle::LS_Cpp11);
+ IO.enumCase(Value, "C++11", clang::format::FormatStyle::LS_Cpp11);
+ IO.enumCase(Value, "Auto", clang::format::FormatStyle::LS_Auto);
+ }
+};
+
+template <>
+struct ScalarEnumerationTraits<clang::format::FormatStyle::UseTabStyle> {
+ static void enumeration(IO &IO,
+ clang::format::FormatStyle::UseTabStyle &Value) {
+ IO.enumCase(Value, "Never", clang::format::FormatStyle::UT_Never);
+ IO.enumCase(Value, "false", clang::format::FormatStyle::UT_Never);
+ IO.enumCase(Value, "Always", clang::format::FormatStyle::UT_Always);
+ IO.enumCase(Value, "true", clang::format::FormatStyle::UT_Always);
+ IO.enumCase(Value, "ForIndentation",
+ clang::format::FormatStyle::UT_ForIndentation);
+ }
+};
+
+template <>
+struct ScalarEnumerationTraits<clang::format::FormatStyle::BraceBreakingStyle> {
+ static void
+ enumeration(IO &IO, clang::format::FormatStyle::BraceBreakingStyle &Value) {
+ IO.enumCase(Value, "Attach", clang::format::FormatStyle::BS_Attach);
+ IO.enumCase(Value, "Linux", clang::format::FormatStyle::BS_Linux);
+ IO.enumCase(Value, "Stroustrup", clang::format::FormatStyle::BS_Stroustrup);
+ IO.enumCase(Value, "Allman", clang::format::FormatStyle::BS_Allman);
+ }
+};
+
+template <>
+struct ScalarEnumerationTraits<
+ clang::format::FormatStyle::NamespaceIndentationKind> {
+ static void
+ enumeration(IO &IO,
+ clang::format::FormatStyle::NamespaceIndentationKind &Value) {
+ IO.enumCase(Value, "None", clang::format::FormatStyle::NI_None);
+ IO.enumCase(Value, "Inner", clang::format::FormatStyle::NI_Inner);
+ IO.enumCase(Value, "All", clang::format::FormatStyle::NI_All);
+ }
+};
+
+template <> struct MappingTraits<clang::format::FormatStyle> {
+ static void mapping(llvm::yaml::IO &IO, clang::format::FormatStyle &Style) {
+ if (IO.outputting()) {
+ StringRef StylesArray[] = { "LLVM", "Google", "Chromium",
+ "Mozilla", "WebKit" };
+ ArrayRef<StringRef> Styles(StylesArray);
+ for (size_t i = 0, e = Styles.size(); i < e; ++i) {
+ StringRef StyleName(Styles[i]);
+ clang::format::FormatStyle PredefinedStyle;
+ if (clang::format::getPredefinedStyle(StyleName, &PredefinedStyle) &&
+ Style == PredefinedStyle) {
+ IO.mapOptional("# BasedOnStyle", StyleName);
+ break;
+ }
+ }
+ } else {
+ StringRef BasedOnStyle;
+ IO.mapOptional("BasedOnStyle", BasedOnStyle);
+ if (!BasedOnStyle.empty())
+ if (!clang::format::getPredefinedStyle(BasedOnStyle, &Style)) {
+ IO.setError(Twine("Unknown value for BasedOnStyle: ", BasedOnStyle));
+ return;
+ }
+ }
+
+ IO.mapOptional("AccessModifierOffset", Style.AccessModifierOffset);
+ IO.mapOptional("ConstructorInitializerIndentWidth",
+ Style.ConstructorInitializerIndentWidth);
+ IO.mapOptional("AlignEscapedNewlinesLeft", Style.AlignEscapedNewlinesLeft);
+ IO.mapOptional("AlignTrailingComments", Style.AlignTrailingComments);
+ IO.mapOptional("AllowAllParametersOfDeclarationOnNextLine",
+ Style.AllowAllParametersOfDeclarationOnNextLine);
+ IO.mapOptional("AllowShortIfStatementsOnASingleLine",
+ Style.AllowShortIfStatementsOnASingleLine);
+ IO.mapOptional("AllowShortLoopsOnASingleLine",
+ Style.AllowShortLoopsOnASingleLine);
+ IO.mapOptional("AlwaysBreakTemplateDeclarations",
+ Style.AlwaysBreakTemplateDeclarations);
+ IO.mapOptional("AlwaysBreakBeforeMultilineStrings",
+ Style.AlwaysBreakBeforeMultilineStrings);
+ IO.mapOptional("BreakBeforeBinaryOperators",
+ Style.BreakBeforeBinaryOperators);
+ IO.mapOptional("BreakBeforeTernaryOperators",
+ Style.BreakBeforeTernaryOperators);
+ IO.mapOptional("BreakConstructorInitializersBeforeComma",
+ Style.BreakConstructorInitializersBeforeComma);
+ IO.mapOptional("BinPackParameters", Style.BinPackParameters);
+ IO.mapOptional("ColumnLimit", Style.ColumnLimit);
+ IO.mapOptional("ConstructorInitializerAllOnOneLineOrOnePerLine",
+ Style.ConstructorInitializerAllOnOneLineOrOnePerLine);
+ IO.mapOptional("DerivePointerBinding", Style.DerivePointerBinding);
+ IO.mapOptional("ExperimentalAutoDetectBinPacking",
+ Style.ExperimentalAutoDetectBinPacking);
+ IO.mapOptional("IndentCaseLabels", Style.IndentCaseLabels);
+ IO.mapOptional("MaxEmptyLinesToKeep", Style.MaxEmptyLinesToKeep);
+ IO.mapOptional("NamespaceIndentation", Style.NamespaceIndentation);
+ IO.mapOptional("ObjCSpaceBeforeProtocolList",
+ Style.ObjCSpaceBeforeProtocolList);
+ IO.mapOptional("PenaltyBreakBeforeFirstCallParameter",
+ Style.PenaltyBreakBeforeFirstCallParameter);
+ IO.mapOptional("PenaltyBreakComment", Style.PenaltyBreakComment);
+ IO.mapOptional("PenaltyBreakString", Style.PenaltyBreakString);
+ IO.mapOptional("PenaltyBreakFirstLessLess",
+ Style.PenaltyBreakFirstLessLess);
+ IO.mapOptional("PenaltyExcessCharacter", Style.PenaltyExcessCharacter);
+ IO.mapOptional("PenaltyReturnTypeOnItsOwnLine",
+ Style.PenaltyReturnTypeOnItsOwnLine);
+ IO.mapOptional("PointerBindsToType", Style.PointerBindsToType);
+ IO.mapOptional("SpacesBeforeTrailingComments",
+ Style.SpacesBeforeTrailingComments);
+ IO.mapOptional("Cpp11BracedListStyle", Style.Cpp11BracedListStyle);
+ IO.mapOptional("Standard", Style.Standard);
+ IO.mapOptional("IndentWidth", Style.IndentWidth);
+ IO.mapOptional("TabWidth", Style.TabWidth);
+ IO.mapOptional("UseTab", Style.UseTab);
+ IO.mapOptional("BreakBeforeBraces", Style.BreakBeforeBraces);
+ IO.mapOptional("IndentFunctionDeclarationAfterType",
+ Style.IndentFunctionDeclarationAfterType);
+ IO.mapOptional("SpacesInParentheses", Style.SpacesInParentheses);
+ IO.mapOptional("SpacesInAngles", Style.SpacesInAngles);
+ IO.mapOptional("SpaceInEmptyParentheses", Style.SpaceInEmptyParentheses);
+ IO.mapOptional("SpacesInCStyleCastParentheses",
+ Style.SpacesInCStyleCastParentheses);
+ IO.mapOptional("SpaceAfterControlStatementKeyword",
+ Style.SpaceAfterControlStatementKeyword);
+ IO.mapOptional("SpaceBeforeAssignmentOperators",
+ Style.SpaceBeforeAssignmentOperators);
+ IO.mapOptional("ContinuationIndentWidth", Style.ContinuationIndentWidth);
+ }
+};
+}
+}
+
namespace clang {
namespace format {
+void setDefaultPenalties(FormatStyle &Style) {
+ Style.PenaltyBreakComment = 60;
+ Style.PenaltyBreakFirstLessLess = 120;
+ Style.PenaltyBreakString = 1000;
+ Style.PenaltyExcessCharacter = 1000000;
+}
+
FormatStyle getLLVMStyle() {
FormatStyle LLVMStyle;
LLVMStyle.AccessModifierOffset = -2;
LLVMStyle.AlignEscapedNewlinesLeft = false;
+ LLVMStyle.AlignTrailingComments = true;
LLVMStyle.AllowAllParametersOfDeclarationOnNextLine = true;
LLVMStyle.AllowShortIfStatementsOnASingleLine = false;
+ LLVMStyle.AllowShortLoopsOnASingleLine = false;
+ LLVMStyle.AlwaysBreakBeforeMultilineStrings = false;
+ LLVMStyle.AlwaysBreakTemplateDeclarations = false;
LLVMStyle.BinPackParameters = true;
+ LLVMStyle.BreakBeforeBinaryOperators = false;
+ LLVMStyle.BreakBeforeTernaryOperators = true;
+ LLVMStyle.BreakBeforeBraces = FormatStyle::BS_Attach;
+ LLVMStyle.BreakConstructorInitializersBeforeComma = false;
LLVMStyle.ColumnLimit = 80;
LLVMStyle.ConstructorInitializerAllOnOneLineOrOnePerLine = false;
+ LLVMStyle.ConstructorInitializerIndentWidth = 4;
+ LLVMStyle.Cpp11BracedListStyle = false;
LLVMStyle.DerivePointerBinding = false;
+ LLVMStyle.ExperimentalAutoDetectBinPacking = false;
LLVMStyle.IndentCaseLabels = false;
+ LLVMStyle.IndentFunctionDeclarationAfterType = false;
+ LLVMStyle.IndentWidth = 2;
+ LLVMStyle.TabWidth = 8;
LLVMStyle.MaxEmptyLinesToKeep = 1;
+ LLVMStyle.NamespaceIndentation = FormatStyle::NI_None;
LLVMStyle.ObjCSpaceBeforeProtocolList = true;
- LLVMStyle.PenaltyExcessCharacter = 1000000;
- LLVMStyle.PenaltyReturnTypeOnItsOwnLine = 75;
LLVMStyle.PointerBindsToType = false;
LLVMStyle.SpacesBeforeTrailingComments = 1;
LLVMStyle.Standard = FormatStyle::LS_Cpp03;
+ LLVMStyle.UseTab = FormatStyle::UT_Never;
+ LLVMStyle.SpacesInParentheses = false;
+ LLVMStyle.SpaceInEmptyParentheses = false;
+ LLVMStyle.SpacesInCStyleCastParentheses = false;
+ LLVMStyle.SpaceAfterControlStatementKeyword = true;
+ LLVMStyle.SpaceBeforeAssignmentOperators = true;
+ LLVMStyle.ContinuationIndentWidth = 4;
+ LLVMStyle.SpacesInAngles = false;
+
+ setDefaultPenalties(LLVMStyle);
+ LLVMStyle.PenaltyReturnTypeOnItsOwnLine = 60;
+ LLVMStyle.PenaltyBreakBeforeFirstCallParameter = 19;
+
return LLVMStyle;
}
@@ -59,20 +235,46 @@ FormatStyle getGoogleStyle() {
FormatStyle GoogleStyle;
GoogleStyle.AccessModifierOffset = -1;
GoogleStyle.AlignEscapedNewlinesLeft = true;
+ GoogleStyle.AlignTrailingComments = true;
GoogleStyle.AllowAllParametersOfDeclarationOnNextLine = true;
GoogleStyle.AllowShortIfStatementsOnASingleLine = true;
+ GoogleStyle.AllowShortLoopsOnASingleLine = true;
+ GoogleStyle.AlwaysBreakBeforeMultilineStrings = true;
+ GoogleStyle.AlwaysBreakTemplateDeclarations = true;
GoogleStyle.BinPackParameters = true;
+ GoogleStyle.BreakBeforeBinaryOperators = false;
+ GoogleStyle.BreakBeforeTernaryOperators = true;
+ GoogleStyle.BreakBeforeBraces = FormatStyle::BS_Attach;
+ GoogleStyle.BreakConstructorInitializersBeforeComma = false;
GoogleStyle.ColumnLimit = 80;
GoogleStyle.ConstructorInitializerAllOnOneLineOrOnePerLine = true;
+ GoogleStyle.ConstructorInitializerIndentWidth = 4;
+ GoogleStyle.Cpp11BracedListStyle = true;
GoogleStyle.DerivePointerBinding = true;
+ GoogleStyle.ExperimentalAutoDetectBinPacking = false;
GoogleStyle.IndentCaseLabels = true;
+ GoogleStyle.IndentFunctionDeclarationAfterType = true;
+ GoogleStyle.IndentWidth = 2;
+ GoogleStyle.TabWidth = 8;
GoogleStyle.MaxEmptyLinesToKeep = 1;
+ GoogleStyle.NamespaceIndentation = FormatStyle::NI_None;
GoogleStyle.ObjCSpaceBeforeProtocolList = false;
- GoogleStyle.PenaltyExcessCharacter = 1000000;
- GoogleStyle.PenaltyReturnTypeOnItsOwnLine = 200;
GoogleStyle.PointerBindsToType = true;
GoogleStyle.SpacesBeforeTrailingComments = 2;
GoogleStyle.Standard = FormatStyle::LS_Auto;
+ GoogleStyle.UseTab = FormatStyle::UT_Never;
+ GoogleStyle.SpacesInParentheses = false;
+ GoogleStyle.SpaceInEmptyParentheses = false;
+ GoogleStyle.SpacesInCStyleCastParentheses = false;
+ GoogleStyle.SpaceAfterControlStatementKeyword = true;
+ GoogleStyle.SpaceBeforeAssignmentOperators = true;
+ GoogleStyle.ContinuationIndentWidth = 4;
+ GoogleStyle.SpacesInAngles = false;
+
+ setDefaultPenalties(GoogleStyle);
+ GoogleStyle.PenaltyReturnTypeOnItsOwnLine = 200;
+ GoogleStyle.PenaltyBreakBeforeFirstCallParameter = 1;
+
return GoogleStyle;
}
@@ -80,9 +282,10 @@ FormatStyle getChromiumStyle() {
FormatStyle ChromiumStyle = getGoogleStyle();
ChromiumStyle.AllowAllParametersOfDeclarationOnNextLine = false;
ChromiumStyle.AllowShortIfStatementsOnASingleLine = false;
+ ChromiumStyle.AllowShortLoopsOnASingleLine = false;
ChromiumStyle.BinPackParameters = false;
- ChromiumStyle.Standard = FormatStyle::LS_Cpp03;
ChromiumStyle.DerivePointerBinding = false;
+ ChromiumStyle.Standard = FormatStyle::LS_Cpp03;
return ChromiumStyle;
}
@@ -98,614 +301,376 @@ FormatStyle getMozillaStyle() {
return MozillaStyle;
}
-// Returns the length of everything up to the first possible line break after
-// the ), ], } or > matching \c Tok.
-static unsigned getLengthToMatchingParen(const AnnotatedToken &Tok) {
- if (Tok.MatchingParen == NULL)
- return 0;
- AnnotatedToken *End = Tok.MatchingParen;
- while (!End->Children.empty() && !End->Children[0].CanBreakBefore) {
- End = &End->Children[0];
- }
- return End->TotalLength - Tok.TotalLength + 1;
+FormatStyle getWebKitStyle() {
+ FormatStyle Style = getLLVMStyle();
+ Style.AccessModifierOffset = -4;
+ Style.AlignTrailingComments = false;
+ Style.BreakBeforeBinaryOperators = true;
+ Style.BreakBeforeBraces = FormatStyle::BS_Stroustrup;
+ Style.BreakConstructorInitializersBeforeComma = true;
+ Style.ColumnLimit = 0;
+ Style.IndentWidth = 4;
+ Style.NamespaceIndentation = FormatStyle::NI_Inner;
+ Style.PointerBindsToType = true;
+ return Style;
}
-class UnwrappedLineFormatter {
-public:
- UnwrappedLineFormatter(const FormatStyle &Style, SourceManager &SourceMgr,
- const AnnotatedLine &Line, unsigned FirstIndent,
- const AnnotatedToken &RootToken,
- WhitespaceManager &Whitespaces)
- : Style(Style), SourceMgr(SourceMgr), Line(Line),
- FirstIndent(FirstIndent), RootToken(RootToken),
- Whitespaces(Whitespaces), Count(0) {}
-
- /// \brief Formats an \c UnwrappedLine.
- ///
- /// \returns The column after the last token in the last line of the
- /// \c UnwrappedLine.
- unsigned format(const AnnotatedLine *NextLine) {
- // Initialize state dependent on indent.
- LineState State;
- State.Column = FirstIndent;
- State.NextToken = &RootToken;
- State.Stack.push_back(
- ParenState(FirstIndent, FirstIndent, !Style.BinPackParameters,
- /*NoLineBreak=*/ false));
- State.LineContainsContinuedForLoopSection = false;
- State.ParenLevel = 0;
- State.StartOfStringLiteral = 0;
- State.StartOfLineLevel = State.ParenLevel;
-
- // The first token has already been indented and thus consumed.
- moveStateToNextToken(State, /*DryRun=*/ false);
-
- // If everything fits on a single line, just put it there.
- unsigned ColumnLimit = Style.ColumnLimit;
- if (NextLine && NextLine->InPPDirective &&
- !NextLine->First.FormatTok.HasUnescapedNewline)
- ColumnLimit = getColumnLimit();
- if (Line.Last->TotalLength <= ColumnLimit - FirstIndent) {
- while (State.NextToken != NULL) {
- addTokenToState(false, false, State);
- }
- return State.Column;
- }
-
- // If the ObjC method declaration does not fit on a line, we should format
- // it with one arg per line.
- if (Line.Type == LT_ObjCMethodDecl)
- State.Stack.back().BreakBeforeParameter = true;
+bool getPredefinedStyle(StringRef Name, FormatStyle *Style) {
+ if (Name.equals_lower("llvm"))
+ *Style = getLLVMStyle();
+ else if (Name.equals_lower("chromium"))
+ *Style = getChromiumStyle();
+ else if (Name.equals_lower("mozilla"))
+ *Style = getMozillaStyle();
+ else if (Name.equals_lower("google"))
+ *Style = getGoogleStyle();
+ else if (Name.equals_lower("webkit"))
+ *Style = getWebKitStyle();
+ else
+ return false;
- // Find best solution in solution space.
- return analyzeSolutionSpace(State);
- }
+ return true;
+}
-private:
- void DebugTokenState(const AnnotatedToken &AnnotatedTok) {
- const Token &Tok = AnnotatedTok.FormatTok.Tok;
- llvm::errs() << StringRef(SourceMgr.getCharacterData(Tok.getLocation()),
- Tok.getLength());
- llvm::errs();
- }
+llvm::error_code parseConfiguration(StringRef Text, FormatStyle *Style) {
+ if (Text.trim().empty())
+ return llvm::make_error_code(llvm::errc::invalid_argument);
+ llvm::yaml::Input Input(Text);
+ Input >> *Style;
+ return Input.error();
+}
- struct ParenState {
- ParenState(unsigned Indent, unsigned LastSpace, bool AvoidBinPacking,
- bool NoLineBreak)
- : Indent(Indent), LastSpace(LastSpace), FirstLessLess(0),
- BreakBeforeClosingBrace(false), QuestionColumn(0),
- AvoidBinPacking(AvoidBinPacking), BreakBeforeParameter(false),
- NoLineBreak(NoLineBreak), ColonPos(0), StartOfFunctionCall(0),
- NestedNameSpecifierContinuation(0), CallContinuation(0),
- VariablePos(0) {}
-
- /// \brief The position to which a specific parenthesis level needs to be
- /// indented.
- unsigned Indent;
-
- /// \brief The position of the last space on each level.
- ///
- /// Used e.g. to break like:
- /// functionCall(Parameter, otherCall(
- /// OtherParameter));
- unsigned LastSpace;
-
- /// \brief The position the first "<<" operator encountered on each level.
- ///
- /// Used to align "<<" operators. 0 if no such operator has been encountered
- /// on a level.
- unsigned FirstLessLess;
-
- /// \brief Whether a newline needs to be inserted before the block's closing
- /// brace.
- ///
- /// We only want to insert a newline before the closing brace if there also
- /// was a newline after the beginning left brace.
- bool BreakBeforeClosingBrace;
-
- /// \brief The column of a \c ? in a conditional expression;
- unsigned QuestionColumn;
-
- /// \brief Avoid bin packing, i.e. multiple parameters/elements on multiple
- /// lines, in this context.
- bool AvoidBinPacking;
-
- /// \brief Break after the next comma (or all the commas in this context if
- /// \c AvoidBinPacking is \c true).
- bool BreakBeforeParameter;
-
- /// \brief Line breaking in this context would break a formatting rule.
- bool NoLineBreak;
-
- /// \brief The position of the colon in an ObjC method declaration/call.
- unsigned ColonPos;
-
- /// \brief The start of the most recent function in a builder-type call.
- unsigned StartOfFunctionCall;
-
- /// \brief If a nested name specifier was broken over multiple lines, this
- /// contains the start column of the second line. Otherwise 0.
- unsigned NestedNameSpecifierContinuation;
-
- /// \brief If a call expression was broken over multiple lines, this
- /// contains the start column of the second line. Otherwise 0.
- unsigned CallContinuation;
-
- /// \brief The column of the first variable name in a variable declaration.
- ///
- /// Used to align further variables if necessary.
- unsigned VariablePos;
-
- bool operator<(const ParenState &Other) const {
- if (Indent != Other.Indent)
- return Indent < Other.Indent;
- if (LastSpace != Other.LastSpace)
- return LastSpace < Other.LastSpace;
- if (FirstLessLess != Other.FirstLessLess)
- return FirstLessLess < Other.FirstLessLess;
- if (BreakBeforeClosingBrace != Other.BreakBeforeClosingBrace)
- return BreakBeforeClosingBrace;
- if (QuestionColumn != Other.QuestionColumn)
- return QuestionColumn < Other.QuestionColumn;
- if (AvoidBinPacking != Other.AvoidBinPacking)
- return AvoidBinPacking;
- if (BreakBeforeParameter != Other.BreakBeforeParameter)
- return BreakBeforeParameter;
- if (NoLineBreak != Other.NoLineBreak)
- return NoLineBreak;
- if (ColonPos != Other.ColonPos)
- return ColonPos < Other.ColonPos;
- if (StartOfFunctionCall != Other.StartOfFunctionCall)
- return StartOfFunctionCall < Other.StartOfFunctionCall;
- if (NestedNameSpecifierContinuation !=
- Other.NestedNameSpecifierContinuation)
- return NestedNameSpecifierContinuation <
- Other.NestedNameSpecifierContinuation;
- if (CallContinuation != Other.CallContinuation)
- return CallContinuation < Other.CallContinuation;
- if (VariablePos != Other.VariablePos)
- return VariablePos < Other.VariablePos;
- return false;
- }
- };
+std::string configurationAsText(const FormatStyle &Style) {
+ std::string Text;
+ llvm::raw_string_ostream Stream(Text);
+ llvm::yaml::Output Output(Stream);
+ // We use the same mapping method for input and output, so we need a non-const
+ // reference here.
+ FormatStyle NonConstStyle = Style;
+ Output << NonConstStyle;
+ return Stream.str();
+}
- /// \brief The current state when indenting a unwrapped line.
- ///
- /// As the indenting tries different combinations this is copied by value.
- struct LineState {
- /// \brief The number of used columns in the current line.
- unsigned Column;
-
- /// \brief The token that needs to be next formatted.
- const AnnotatedToken *NextToken;
-
- /// \brief \c true if this line contains a continued for-loop section.
- bool LineContainsContinuedForLoopSection;
-
- /// \brief The level of nesting inside (), [], <> and {}.
- unsigned ParenLevel;
-
- /// \brief The \c ParenLevel at the start of this line.
- unsigned StartOfLineLevel;
-
- /// \brief The start column of the string literal, if we're in a string
- /// literal sequence, 0 otherwise.
- unsigned StartOfStringLiteral;
-
- /// \brief A stack keeping track of properties applying to parenthesis
- /// levels.
- std::vector<ParenState> Stack;
-
- /// \brief Comparison operator to be able to used \c LineState in \c map.
- bool operator<(const LineState &Other) const {
- if (NextToken != Other.NextToken)
- return NextToken < Other.NextToken;
- if (Column != Other.Column)
- return Column < Other.Column;
- if (LineContainsContinuedForLoopSection !=
- Other.LineContainsContinuedForLoopSection)
- return LineContainsContinuedForLoopSection;
- if (ParenLevel != Other.ParenLevel)
- return ParenLevel < Other.ParenLevel;
- if (StartOfLineLevel != Other.StartOfLineLevel)
- return StartOfLineLevel < Other.StartOfLineLevel;
- if (StartOfStringLiteral != Other.StartOfStringLiteral)
- return StartOfStringLiteral < Other.StartOfStringLiteral;
- return Stack < Other.Stack;
- }
- };
+namespace {
- /// \brief Appends the next token to \p State and updates information
- /// necessary for indentation.
- ///
- /// Puts the token on the current line if \p Newline is \c true and adds a
- /// line break and necessary indentation otherwise.
- ///
- /// If \p DryRun is \c false, also creates and stores the required
- /// \c Replacement.
- unsigned addTokenToState(bool Newline, bool DryRun, LineState &State) {
- const AnnotatedToken &Current = *State.NextToken;
- const AnnotatedToken &Previous = *State.NextToken->Parent;
-
- if (State.Stack.size() == 0 || Current.Type == TT_ImplicitStringLiteral) {
- State.Column += State.NextToken->FormatTok.WhiteSpaceLength +
- State.NextToken->FormatTok.TokenLength;
- if (State.NextToken->Children.empty())
- State.NextToken = NULL;
- else
- State.NextToken = &State.NextToken->Children[0];
- return 0;
+class NoColumnLimitFormatter {
+public:
+ NoColumnLimitFormatter(ContinuationIndenter *Indenter) : Indenter(Indenter) {}
+
+ /// \brief Formats the line starting at \p State, simply keeping all of the
+ /// input's line breaking decisions.
+ void format(unsigned FirstIndent, const AnnotatedLine *Line) {
+ LineState State =
+ Indenter->getInitialState(FirstIndent, Line, /*DryRun=*/false);
+ while (State.NextToken != NULL) {
+ bool Newline =
+ Indenter->mustBreak(State) ||
+ (Indenter->canBreak(State) && State.NextToken->NewlinesBefore > 0);
+ Indenter->addTokenToState(State, Newline, /*DryRun=*/false);
}
+ }
- // If we are continuing an expression, we want to indent an extra 4 spaces.
- unsigned ContinuationIndent =
- std::max(State.Stack.back().LastSpace, State.Stack.back().Indent) + 4;
- if (Newline) {
- unsigned WhitespaceStartColumn = State.Column;
- if (Current.is(tok::r_brace)) {
- State.Column = Line.Level * 2;
- } else if (Current.is(tok::string_literal) &&
- State.StartOfStringLiteral != 0) {
- State.Column = State.StartOfStringLiteral;
- State.Stack.back().BreakBeforeParameter = true;
- } else if (Current.is(tok::lessless) &&
- State.Stack.back().FirstLessLess != 0) {
- State.Column = State.Stack.back().FirstLessLess;
- } else if (Previous.is(tok::coloncolon)) {
- if (State.Stack.back().NestedNameSpecifierContinuation == 0) {
- State.Column = ContinuationIndent;
- State.Stack.back().NestedNameSpecifierContinuation = State.Column;
- } else {
- State.Column = State.Stack.back().NestedNameSpecifierContinuation;
- }
- } else if (Current.isOneOf(tok::period, tok::arrow)) {
- if (State.Stack.back().CallContinuation == 0) {
- State.Column = ContinuationIndent;
- State.Stack.back().CallContinuation = State.Column;
- } else {
- State.Column = State.Stack.back().CallContinuation;
- }
- } else if (Current.Type == TT_ConditionalExpr) {
- State.Column = State.Stack.back().QuestionColumn;
- } else if (Previous.is(tok::comma) &&
- State.Stack.back().VariablePos != 0) {
- State.Column = State.Stack.back().VariablePos;
- } else if (Previous.ClosesTemplateDeclaration ||
- (Current.Type == TT_StartOfName && State.ParenLevel == 0 &&
- Line.StartsDefinition)) {
- State.Column = State.Stack.back().Indent;
- } else if (Current.Type == TT_ObjCSelectorName) {
- if (State.Stack.back().ColonPos > Current.FormatTok.TokenLength) {
- State.Column =
- State.Stack.back().ColonPos - Current.FormatTok.TokenLength;
- } else {
- State.Column = State.Stack.back().Indent;
- State.Stack.back().ColonPos =
- State.Column + Current.FormatTok.TokenLength;
- }
- } else if (Current.Type == TT_StartOfName || Previous.is(tok::equal) ||
- Previous.Type == TT_ObjCMethodExpr) {
- State.Column = ContinuationIndent;
- } else {
- State.Column = State.Stack.back().Indent;
- // Ensure that we fall back to indenting 4 spaces instead of just
- // flushing continuations left.
- if (State.Column == FirstIndent)
- State.Column += 4;
- }
-
- if (Current.is(tok::question))
- State.Stack.back().BreakBeforeParameter = true;
- if (Previous.isOneOf(tok::comma, tok::semi) &&
- !State.Stack.back().AvoidBinPacking)
- State.Stack.back().BreakBeforeParameter = false;
-
- if (!DryRun) {
- unsigned NewLines = 1;
- if (Current.Type == TT_LineComment)
- NewLines =
- std::max(NewLines, std::min(Current.FormatTok.NewlinesBefore,
- Style.MaxEmptyLinesToKeep + 1));
- if (!Line.InPPDirective)
- Whitespaces.replaceWhitespace(Current, NewLines, State.Column,
- WhitespaceStartColumn);
- else
- Whitespaces.replacePPWhitespace(Current, NewLines, State.Column,
- WhitespaceStartColumn);
- }
+private:
+ ContinuationIndenter *Indenter;
+};
- State.Stack.back().LastSpace = State.Column;
- State.StartOfLineLevel = State.ParenLevel;
+class LineJoiner {
+public:
+ LineJoiner(const FormatStyle &Style) : Style(Style) {}
- // Any break on this level means that the parent level has been broken
- // and we need to avoid bin packing there.
- for (unsigned i = 0, e = State.Stack.size() - 1; i != e; ++i) {
- State.Stack[i].BreakBeforeParameter = true;
- }
- const AnnotatedToken *TokenBefore = Current.getPreviousNoneComment();
- if (TokenBefore && !TokenBefore->isOneOf(tok::comma, tok::semi) &&
- !TokenBefore->opensScope())
- State.Stack.back().BreakBeforeParameter = true;
-
- // If we break after {, we should also break before the corresponding }.
- if (Previous.is(tok::l_brace))
- State.Stack.back().BreakBeforeClosingBrace = true;
-
- if (State.Stack.back().AvoidBinPacking) {
- // If we are breaking after '(', '{', '<', this is not bin packing
- // unless AllowAllParametersOfDeclarationOnNextLine is false.
- if ((Previous.isNot(tok::l_paren) && Previous.isNot(tok::l_brace)) ||
- (!Style.AllowAllParametersOfDeclarationOnNextLine &&
- Line.MustBeDeclaration))
- State.Stack.back().BreakBeforeParameter = true;
- }
- } else {
- if (Current.is(tok::equal) &&
- (RootToken.is(tok::kw_for) || State.ParenLevel == 0) &&
- State.Stack.back().VariablePos == 0) {
- State.Stack.back().VariablePos = State.Column;
- // Move over * and & if they are bound to the variable name.
- const AnnotatedToken *Tok = &Previous;
- while (Tok &&
- State.Stack.back().VariablePos >= Tok->FormatTok.TokenLength) {
- State.Stack.back().VariablePos -= Tok->FormatTok.TokenLength;
- if (Tok->SpacesRequiredBefore != 0)
- break;
- Tok = Tok->Parent;
- }
- if (Previous.PartOfMultiVariableDeclStmt)
- State.Stack.back().LastSpace = State.Stack.back().VariablePos;
- }
+ /// \brief Calculates how many lines can be merged into 1 starting at \p I.
+ unsigned
+ tryFitMultipleLinesInOne(unsigned Indent,
+ SmallVectorImpl<AnnotatedLine *>::const_iterator &I,
+ SmallVectorImpl<AnnotatedLine *>::const_iterator E) {
+ // We can never merge stuff if there are trailing line comments.
+ AnnotatedLine *TheLine = *I;
+ if (TheLine->Last->Type == TT_LineComment)
+ return 0;
- unsigned Spaces = State.NextToken->SpacesRequiredBefore;
+ if (Indent > Style.ColumnLimit)
+ return 0;
- if (!DryRun)
- Whitespaces.replaceWhitespace(Current, 0, Spaces, State.Column);
+ unsigned Limit =
+ Style.ColumnLimit == 0 ? UINT_MAX : Style.ColumnLimit - Indent;
+ // If we already exceed the column limit, we set 'Limit' to 0. The different
+ // tryMerge..() functions can then decide whether to still do merging.
+ Limit = TheLine->Last->TotalLength > Limit
+ ? 0
+ : Limit - TheLine->Last->TotalLength;
- if (Current.Type == TT_ObjCSelectorName &&
- State.Stack.back().ColonPos == 0) {
- if (State.Stack.back().Indent + Current.LongestObjCSelectorName >
- State.Column + Spaces + Current.FormatTok.TokenLength)
- State.Stack.back().ColonPos =
- State.Stack.back().Indent + Current.LongestObjCSelectorName;
- else
- State.Stack.back().ColonPos =
- State.Column + Spaces + Current.FormatTok.TokenLength;
- }
+ if (I + 1 == E || I[1]->Type == LT_Invalid)
+ return 0;
- if (Previous.opensScope() && Previous.Type != TT_ObjCMethodExpr &&
- Current.Type != TT_LineComment)
- State.Stack.back().Indent = State.Column + Spaces;
- if (Previous.is(tok::comma) && !Current.isTrailingComment() &&
- State.Stack.back().AvoidBinPacking)
- State.Stack.back().NoLineBreak = true;
-
- State.Column += Spaces;
- if (Current.is(tok::l_paren) && Previous.isOneOf(tok::kw_if, tok::kw_for))
- // Treat the condition inside an if as if it was a second function
- // parameter, i.e. let nested calls have an indent of 4.
- State.Stack.back().LastSpace = State.Column + 1; // 1 is length of "(".
- else if (Previous.is(tok::comma))
- State.Stack.back().LastSpace = State.Column;
- else if ((Previous.Type == TT_BinaryOperator ||
- Previous.Type == TT_ConditionalExpr ||
- Previous.Type == TT_CtorInitializerColon) &&
- getPrecedence(Previous) != prec::Assignment)
- State.Stack.back().LastSpace = State.Column;
- else if (Previous.Type == TT_InheritanceColon)
- State.Stack.back().Indent = State.Column;
- else if (Previous.opensScope() && Previous.ParameterCount > 1)
- // If this function has multiple parameters, indent nested calls from
- // the start of the first parameter.
- State.Stack.back().LastSpace = State.Column;
+ if (TheLine->Last->is(tok::l_brace)) {
+ return tryMergeSimpleBlock(I, E, Limit);
+ } else if (Style.AllowShortIfStatementsOnASingleLine &&
+ TheLine->First->is(tok::kw_if)) {
+ return tryMergeSimpleControlStatement(I, E, Limit);
+ } else if (Style.AllowShortLoopsOnASingleLine &&
+ TheLine->First->isOneOf(tok::kw_for, tok::kw_while)) {
+ return tryMergeSimpleControlStatement(I, E, Limit);
+ } else if (TheLine->InPPDirective && (TheLine->First->HasUnescapedNewline ||
+ TheLine->First->IsFirst)) {
+ return tryMergeSimplePPDirective(I, E, Limit);
}
+ return 0;
+ }
- return moveStateToNextToken(State, DryRun);
+private:
+ unsigned
+ tryMergeSimplePPDirective(SmallVectorImpl<AnnotatedLine *>::const_iterator &I,
+ SmallVectorImpl<AnnotatedLine *>::const_iterator E,
+ unsigned Limit) {
+ if (Limit == 0)
+ return 0;
+ if (!I[1]->InPPDirective || I[1]->First->HasUnescapedNewline)
+ return 0;
+ if (I + 2 != E && I[2]->InPPDirective && !I[2]->First->HasUnescapedNewline)
+ return 0;
+ if (1 + I[1]->Last->TotalLength > Limit)
+ return 0;
+ return 1;
}
- /// \brief Mark the next token as consumed in \p State and modify its stacks
- /// accordingly.
- unsigned moveStateToNextToken(LineState &State, bool DryRun) {
- const AnnotatedToken &Current = *State.NextToken;
- assert(State.Stack.size());
-
- if (Current.Type == TT_InheritanceColon)
- State.Stack.back().AvoidBinPacking = true;
- if (Current.is(tok::lessless) && State.Stack.back().FirstLessLess == 0)
- State.Stack.back().FirstLessLess = State.Column;
- if (Current.is(tok::question))
- State.Stack.back().QuestionColumn = State.Column;
- if (Current.isOneOf(tok::period, tok::arrow) &&
- Line.Type == LT_BuilderTypeCall && State.ParenLevel == 0)
- State.Stack.back().StartOfFunctionCall =
- Current.LastInChainOfCalls ? 0 : State.Column;
- if (Current.Type == TT_CtorInitializerColon) {
- State.Stack.back().Indent = State.Column + 2;
- if (Style.ConstructorInitializerAllOnOneLineOrOnePerLine)
- State.Stack.back().AvoidBinPacking = true;
- State.Stack.back().BreakBeforeParameter = false;
- }
+ unsigned tryMergeSimpleControlStatement(
+ SmallVectorImpl<AnnotatedLine *>::const_iterator &I,
+ SmallVectorImpl<AnnotatedLine *>::const_iterator E, unsigned Limit) {
+ if (Limit == 0)
+ return 0;
+ if (Style.BreakBeforeBraces == FormatStyle::BS_Allman &&
+ I[1]->First->is(tok::l_brace))
+ return 0;
+ if (I[1]->InPPDirective != (*I)->InPPDirective ||
+ (I[1]->InPPDirective && I[1]->First->HasUnescapedNewline))
+ return 0;
+ AnnotatedLine &Line = **I;
+ if (Line.Last->isNot(tok::r_paren))
+ return 0;
+ if (1 + I[1]->Last->TotalLength > Limit)
+ return 0;
+ if (I[1]->First->isOneOf(tok::semi, tok::kw_if, tok::kw_for,
+ tok::kw_while) ||
+ I[1]->First->Type == TT_LineComment)
+ return 0;
+ // Only inline simple if's (no nested if or else).
+ if (I + 2 != E && Line.First->is(tok::kw_if) &&
+ I[2]->First->is(tok::kw_else))
+ return 0;
+ return 1;
+ }
- // If return returns a binary expression, align after it.
- if (Current.is(tok::kw_return) && !Current.FakeLParens.empty())
- State.Stack.back().LastSpace = State.Column + 7;
-
- // In ObjC method declaration we align on the ":" of parameters, but we need
- // to ensure that we indent parameters on subsequent lines by at least 4.
- if (Current.Type == TT_ObjCMethodSpecifier)
- State.Stack.back().Indent += 4;
-
- // Insert scopes created by fake parenthesis.
- const AnnotatedToken *Previous = Current.getPreviousNoneComment();
- // Don't add extra indentation for the first fake parenthesis after
- // 'return', assignements or opening <({[. The indentation for these cases
- // is special cased.
- bool SkipFirstExtraIndent =
- Current.is(tok::kw_return) ||
- (Previous && (Previous->opensScope() ||
- getPrecedence(*Previous) == prec::Assignment));
- for (SmallVector<prec::Level, 4>::const_reverse_iterator
- I = Current.FakeLParens.rbegin(),
- E = Current.FakeLParens.rend();
- I != E; ++I) {
- ParenState NewParenState = State.Stack.back();
- NewParenState.Indent =
- std::max(std::max(State.Column, NewParenState.Indent),
- State.Stack.back().LastSpace);
-
- // Always indent conditional expressions. Never indent expression where
- // the 'operator' is ',', ';' or an assignment (i.e. *I <=
- // prec::Assignment) as those have different indentation rules. Indent
- // other expression, unless the indentation needs to be skipped.
- if (*I == prec::Conditional ||
- (!SkipFirstExtraIndent && *I > prec::Assignment))
- NewParenState.Indent += 4;
- if (Previous && !Previous->opensScope())
- NewParenState.BreakBeforeParameter = false;
- State.Stack.push_back(NewParenState);
- SkipFirstExtraIndent = false;
- }
+ unsigned
+ tryMergeSimpleBlock(SmallVectorImpl<AnnotatedLine *>::const_iterator &I,
+ SmallVectorImpl<AnnotatedLine *>::const_iterator E,
+ unsigned Limit) {
+ // No merging if the brace already is on the next line.
+ if (Style.BreakBeforeBraces != FormatStyle::BS_Attach)
+ return 0;
- // If we encounter an opening (, [, { or <, we add a level to our stacks to
- // prepare for the following tokens.
- if (Current.opensScope()) {
- unsigned NewIndent;
- bool AvoidBinPacking;
- if (Current.is(tok::l_brace)) {
- NewIndent = 2 + State.Stack.back().LastSpace;
- AvoidBinPacking = false;
- } else {
- NewIndent = 4 + std::max(State.Stack.back().LastSpace,
- State.Stack.back().StartOfFunctionCall);
- AvoidBinPacking = !Style.BinPackParameters;
- }
- State.Stack.push_back(
- ParenState(NewIndent, State.Stack.back().LastSpace, AvoidBinPacking,
- State.Stack.back().NoLineBreak));
-
- if (Current.NoMoreTokensOnLevel && Current.FakeLParens.empty()) {
- // This parenthesis was the last token possibly making use of Indent and
- // LastSpace of the next higher ParenLevel. Thus, erase them to acieve
- // better memoization results.
- State.Stack[State.Stack.size() - 2].Indent = 0;
- State.Stack[State.Stack.size() - 2].LastSpace = 0;
- }
+ // First, check that the current line allows merging. This is the case if
+ // we're not in a control flow statement and the last token is an opening
+ // brace.
+ AnnotatedLine &Line = **I;
+ if (Line.First->isOneOf(tok::kw_if, tok::kw_while, tok::kw_do, tok::r_brace,
+ tok::kw_else, tok::kw_try, tok::kw_catch,
+ tok::kw_for,
+ // This gets rid of all ObjC @ keywords and methods.
+ tok::at, tok::minus, tok::plus))
+ return 0;
- ++State.ParenLevel;
- }
+ FormatToken *Tok = I[1]->First;
+ if (Tok->is(tok::r_brace) && !Tok->MustBreakBefore &&
+ (Tok->getNextNonComment() == NULL ||
+ Tok->getNextNonComment()->is(tok::semi))) {
+ // We merge empty blocks even if the line exceeds the column limit.
+ Tok->SpacesRequiredBefore = 0;
+ Tok->CanBreakBefore = true;
+ return 1;
+ } else if (Limit != 0 && Line.First->isNot(tok::kw_namespace)) {
+ // Check that we still have three lines and they fit into the limit.
+ if (I + 2 == E || I[2]->Type == LT_Invalid)
+ return 0;
- // If this '[' opens an ObjC call, determine whether all parameters fit into
- // one line and put one per line if they don't.
- if (Current.is(tok::l_square) && Current.Type == TT_ObjCMethodExpr &&
- Current.MatchingParen != NULL) {
- if (getLengthToMatchingParen(Current) + State.Column > getColumnLimit())
- State.Stack.back().BreakBeforeParameter = true;
- }
+ if (!nextTwoLinesFitInto(I, Limit))
+ return 0;
- // If we encounter a closing ), ], } or >, we can remove a level from our
- // stacks.
- if (Current.isOneOf(tok::r_paren, tok::r_square) ||
- (Current.is(tok::r_brace) && State.NextToken != &RootToken) ||
- State.NextToken->Type == TT_TemplateCloser) {
- State.Stack.pop_back();
- --State.ParenLevel;
- }
+ // Second, check that the next line does not contain any braces - if it
+ // does, readability declines when putting it into a single line.
+ if (I[1]->Last->Type == TT_LineComment || Tok->MustBreakBefore)
+ return 0;
+ do {
+ if (Tok->isOneOf(tok::l_brace, tok::r_brace))
+ return 0;
+ Tok = Tok->Next;
+ } while (Tok != NULL);
- // Remove scopes created by fake parenthesis.
- for (unsigned i = 0, e = Current.FakeRParens; i != e; ++i) {
- unsigned VariablePos = State.Stack.back().VariablePos;
- State.Stack.pop_back();
- State.Stack.back().VariablePos = VariablePos;
- }
+ // Last, check that the third line contains a single closing brace.
+ Tok = I[2]->First;
+ if (Tok->getNextNonComment() != NULL || Tok->isNot(tok::r_brace) ||
+ Tok->MustBreakBefore)
+ return 0;
- if (Current.is(tok::string_literal)) {
- State.StartOfStringLiteral = State.Column;
- } else if (Current.isNot(tok::comment)) {
- State.StartOfStringLiteral = 0;
+ return 2;
}
+ return 0;
+ }
- State.Column += Current.FormatTok.TokenLength;
+ bool nextTwoLinesFitInto(SmallVectorImpl<AnnotatedLine *>::const_iterator I,
+ unsigned Limit) {
+ return 1 + I[1]->Last->TotalLength + 1 + I[2]->Last->TotalLength <= Limit;
+ }
- if (State.NextToken->Children.empty())
- State.NextToken = NULL;
- else
- State.NextToken = &State.NextToken->Children[0];
+ const FormatStyle &Style;
+};
- return breakProtrudingToken(Current, State, DryRun);
- }
+class UnwrappedLineFormatter {
+public:
+ UnwrappedLineFormatter(SourceManager &SourceMgr,
+ SmallVectorImpl<CharSourceRange> &Ranges,
+ ContinuationIndenter *Indenter,
+ WhitespaceManager *Whitespaces,
+ const FormatStyle &Style)
+ : SourceMgr(SourceMgr), Ranges(Ranges), Indenter(Indenter),
+ Whitespaces(Whitespaces), Style(Style), Joiner(Style) {}
+
+ unsigned format(const SmallVectorImpl<AnnotatedLine *> &Lines, bool DryRun,
+ int AdditionalIndent = 0) {
+ assert(!Lines.empty());
+ unsigned Penalty = 0;
+ std::vector<int> IndentForLevel;
+ for (unsigned i = 0, e = Lines[0]->Level; i != e; ++i)
+ IndentForLevel.push_back(Style.IndentWidth * i + AdditionalIndent);
+ bool PreviousLineWasTouched = false;
+ const AnnotatedLine *PreviousLine = NULL;
+ bool FormatPPDirective = false;
+ for (SmallVectorImpl<AnnotatedLine *>::const_iterator I = Lines.begin(),
+ E = Lines.end();
+ I != E; ++I) {
+ const AnnotatedLine &TheLine = **I;
+ const FormatToken *FirstTok = TheLine.First;
+ int Offset = getIndentOffset(*FirstTok);
+
+ // Check whether this line is part of a formatted preprocessor directive.
+ if (FirstTok->HasUnescapedNewline)
+ FormatPPDirective = false;
+ if (!FormatPPDirective && TheLine.InPPDirective &&
+ (touchesLine(TheLine) || touchesPPDirective(I + 1, E)))
+ FormatPPDirective = true;
+
+ // Determine indent and try to merge multiple unwrapped lines.
+ while (IndentForLevel.size() <= TheLine.Level)
+ IndentForLevel.push_back(-1);
+ IndentForLevel.resize(TheLine.Level + 1);
+ unsigned Indent = getIndent(IndentForLevel, TheLine.Level);
+ if (static_cast<int>(Indent) + Offset >= 0)
+ Indent += Offset;
+ unsigned MergedLines = Joiner.tryFitMultipleLinesInOne(Indent, I, E);
+ if (!DryRun) {
+ for (unsigned i = 0; i < MergedLines; ++i) {
+ join(*I[i], *I[i + 1]);
+ }
+ }
+ I += MergedLines;
+
+ bool WasMoved = PreviousLineWasTouched && FirstTok->NewlinesBefore == 0;
+ if (TheLine.First->is(tok::eof)) {
+ if (PreviousLineWasTouched && !DryRun) {
+ unsigned Newlines = std::min(FirstTok->NewlinesBefore, 1u);
+ Whitespaces->replaceWhitespace(*TheLine.First, Newlines,
+ /*IndentLevel=*/0, /*Spaces=*/0,
+ /*TargetColumn=*/0);
+ }
+ } else if (TheLine.Type != LT_Invalid &&
+ (WasMoved || FormatPPDirective || touchesLine(TheLine))) {
+ unsigned LevelIndent =
+ getIndent(IndentForLevel, TheLine.Level);
+ if (FirstTok->WhitespaceRange.isValid()) {
+ if (!DryRun)
+ formatFirstToken(*TheLine.First, PreviousLine, TheLine.Level,
+ Indent, TheLine.InPPDirective);
+ } else {
+ Indent = LevelIndent = FirstTok->OriginalColumn;
+ }
- /// \brief If the current token sticks out over the end of the line, break
- /// it if possible.
- unsigned breakProtrudingToken(const AnnotatedToken &Current, LineState &State,
- bool DryRun) {
- llvm::OwningPtr<BreakableToken> Token;
- unsigned StartColumn = State.Column - Current.FormatTok.TokenLength;
- if (Current.is(tok::string_literal)) {
- // Only break up default narrow strings.
- const char *LiteralData = SourceMgr.getCharacterData(
- Current.FormatTok.getStartOfNonWhitespace());
- if (!LiteralData || *LiteralData != '"')
- return 0;
+ // If everything fits on a single line, just put it there.
+ unsigned ColumnLimit = Style.ColumnLimit;
+ if (I + 1 != E) {
+ AnnotatedLine *NextLine = I[1];
+ if (NextLine->InPPDirective && !NextLine->First->HasUnescapedNewline)
+ ColumnLimit = getColumnLimit(TheLine.InPPDirective);
+ }
- Token.reset(new BreakableStringLiteral(SourceMgr, Current.FormatTok,
- StartColumn));
- } else if (Current.Type == TT_BlockComment) {
- BreakableBlockComment *BBC =
- new BreakableBlockComment(SourceMgr, Current, StartColumn);
- if (!DryRun)
- BBC->alignLines(Whitespaces);
- Token.reset(BBC);
- } else if (Current.Type == TT_LineComment &&
- (Current.Parent == NULL ||
- Current.Parent->Type != TT_ImplicitStringLiteral)) {
- Token.reset(new BreakableLineComment(SourceMgr, Current, StartColumn));
- } else {
- return 0;
- }
+ if (TheLine.Last->TotalLength + Indent <= ColumnLimit) {
+ LineState State = Indenter->getInitialState(Indent, &TheLine, DryRun);
+ while (State.NextToken != NULL)
+ Indenter->addTokenToState(State, /*Newline=*/false, DryRun);
+ } else if (Style.ColumnLimit == 0) {
+ NoColumnLimitFormatter Formatter(Indenter);
+ if (!DryRun)
+ Formatter.format(Indent, &TheLine);
+ } else {
+ Penalty += format(TheLine, Indent, DryRun);
+ }
- bool BreakInserted = false;
- unsigned Penalty = 0;
- for (unsigned LineIndex = 0; LineIndex < Token->getLineCount();
- ++LineIndex) {
- unsigned TailOffset = 0;
- unsigned RemainingLength =
- Token->getLineLengthAfterSplit(LineIndex, TailOffset);
- while (RemainingLength > getColumnLimit()) {
- BreakableToken::Split Split =
- Token->getSplit(LineIndex, TailOffset, getColumnLimit());
- if (Split.first == StringRef::npos)
- break;
- assert(Split.first != 0);
- unsigned NewRemainingLength = Token->getLineLengthAfterSplit(
- LineIndex, TailOffset + Split.first + Split.second);
- if (NewRemainingLength >= RemainingLength)
- break;
- if (!DryRun) {
- Token->insertBreak(LineIndex, TailOffset, Split, Line.InPPDirective,
- Whitespaces);
+ IndentForLevel[TheLine.Level] = LevelIndent;
+ PreviousLineWasTouched = true;
+ } else {
+ // Format the first token if necessary, and notify the WhitespaceManager
+ // about the unchanged whitespace.
+ for (FormatToken *Tok = TheLine.First; Tok != NULL; Tok = Tok->Next) {
+ if (Tok == TheLine.First &&
+ (Tok->NewlinesBefore > 0 || Tok->IsFirst)) {
+ unsigned LevelIndent = Tok->OriginalColumn;
+ if (!DryRun) {
+ // Remove trailing whitespace of the previous line if it was
+ // touched.
+ if (PreviousLineWasTouched || touchesEmptyLineBefore(TheLine)) {
+ formatFirstToken(*Tok, PreviousLine, TheLine.Level, LevelIndent,
+ TheLine.InPPDirective);
+ } else {
+ Whitespaces->addUntouchableToken(*Tok, TheLine.InPPDirective);
+ }
+ }
+
+ if (static_cast<int>(LevelIndent) - Offset >= 0)
+ LevelIndent -= Offset;
+ if (Tok->isNot(tok::comment))
+ IndentForLevel[TheLine.Level] = LevelIndent;
+ } else if (!DryRun) {
+ Whitespaces->addUntouchableToken(*Tok, TheLine.InPPDirective);
+ }
}
- TailOffset += Split.first + Split.second;
- RemainingLength = NewRemainingLength;
- Penalty += Style.PenaltyExcessCharacter;
- BreakInserted = true;
+ // If we did not reformat this unwrapped line, the column at the end of
+ // the last token is unchanged - thus, we can calculate the end of the
+ // last token.
+ PreviousLineWasTouched = false;
}
- State.Column = RemainingLength;
if (!DryRun) {
- Token->trimLine(LineIndex, TailOffset, Line.InPPDirective, Whitespaces);
+ for (FormatToken *Tok = TheLine.First; Tok != NULL; Tok = Tok->Next) {
+ Tok->Finalized = true;
+ }
}
- }
-
- if (BreakInserted) {
- for (unsigned i = 0, e = State.Stack.size(); i != e; ++i)
- State.Stack[i].BreakBeforeParameter = true;
- State.Stack.back().LastSpace = StartColumn;
+ PreviousLine = *I;
}
return Penalty;
}
- unsigned getColumnLimit() {
- // In preprocessor directives reserve two chars for trailing " \"
- return Style.ColumnLimit - (Line.InPPDirective ? 2 : 0);
+private:
+ /// \brief Formats an \c AnnotatedLine and returns the penalty.
+ ///
+ /// If \p DryRun is \c false, directly applies the changes.
+ unsigned format(const AnnotatedLine &Line, unsigned FirstIndent,
+ bool DryRun) {
+ LineState State = Indenter->getInitialState(FirstIndent, &Line, DryRun);
+
+ // If the ObjC method declaration does not fit on a line, we should format
+ // it with one arg per line.
+ if (State.Line->Type == LT_ObjCMethodDecl)
+ State.Stack.back().BreakBeforeParameter = true;
+
+ // Find best solution in solution space.
+ return analyzeSolutionSpace(State, DryRun);
}
/// \brief An edge in the solution space from \c Previous->State to \c State,
@@ -733,69 +698,206 @@ private:
typedef std::priority_queue<QueueItem, std::vector<QueueItem>,
std::greater<QueueItem> > QueueType;
+ /// \brief Get the offset of the line relatively to the level.
+ ///
+ /// For example, 'public:' labels in classes are offset by 1 or 2
+ /// characters to the left from their level.
+ int getIndentOffset(const FormatToken &RootToken) {
+ if (RootToken.isAccessSpecifier(false) || RootToken.isObjCAccessSpecifier())
+ return Style.AccessModifierOffset;
+ return 0;
+ }
+
+ /// \brief Add a new line and the required indent before the first Token
+ /// of the \c UnwrappedLine if there was no structural parsing error.
+ void formatFirstToken(FormatToken &RootToken,
+ const AnnotatedLine *PreviousLine, unsigned IndentLevel,
+ unsigned Indent, bool InPPDirective) {
+ unsigned Newlines =
+ std::min(RootToken.NewlinesBefore, Style.MaxEmptyLinesToKeep + 1);
+ // Remove empty lines before "}" where applicable.
+ if (RootToken.is(tok::r_brace) &&
+ (!RootToken.Next ||
+ (RootToken.Next->is(tok::semi) && !RootToken.Next->Next)))
+ Newlines = std::min(Newlines, 1u);
+ if (Newlines == 0 && !RootToken.IsFirst)
+ Newlines = 1;
+
+ // Insert extra new line before access specifiers.
+ if (PreviousLine && PreviousLine->Last->isOneOf(tok::semi, tok::r_brace) &&
+ RootToken.isAccessSpecifier() && RootToken.NewlinesBefore == 1)
+ ++Newlines;
+
+ // Remove empty lines after access specifiers.
+ if (PreviousLine && PreviousLine->First->isAccessSpecifier())
+ Newlines = std::min(1u, Newlines);
+
+ Whitespaces->replaceWhitespace(
+ RootToken, Newlines, IndentLevel, Indent, Indent,
+ InPPDirective && !RootToken.HasUnescapedNewline);
+ }
+
+ /// \brief Get the indent of \p Level from \p IndentForLevel.
+ ///
+ /// \p IndentForLevel must contain the indent for the level \c l
+ /// at \p IndentForLevel[l], or a value < 0 if the indent for
+ /// that level is unknown.
+ unsigned getIndent(const std::vector<int> IndentForLevel, unsigned Level) {
+ if (IndentForLevel[Level] != -1)
+ return IndentForLevel[Level];
+ if (Level == 0)
+ return 0;
+ return getIndent(IndentForLevel, Level - 1) + Style.IndentWidth;
+ }
+
+ void join(AnnotatedLine &A, const AnnotatedLine &B) {
+ assert(!A.Last->Next);
+ assert(!B.First->Previous);
+ A.Last->Next = B.First;
+ B.First->Previous = A.Last;
+ B.First->CanBreakBefore = true;
+ unsigned LengthA = A.Last->TotalLength + B.First->SpacesRequiredBefore;
+ for (FormatToken *Tok = B.First; Tok; Tok = Tok->Next) {
+ Tok->TotalLength += LengthA;
+ A.Last = Tok;
+ }
+ }
+
+ unsigned getColumnLimit(bool InPPDirective) const {
+ // In preprocessor directives reserve two chars for trailing " \"
+ return Style.ColumnLimit - (InPPDirective ? 2 : 0);
+ }
+
+ bool touchesRanges(const CharSourceRange &Range) {
+ for (SmallVectorImpl<CharSourceRange>::const_iterator I = Ranges.begin(),
+ E = Ranges.end();
+ I != E; ++I) {
+ if (!SourceMgr.isBeforeInTranslationUnit(Range.getEnd(), I->getBegin()) &&
+ !SourceMgr.isBeforeInTranslationUnit(I->getEnd(), Range.getBegin()))
+ return true;
+ }
+ return false;
+ }
+
+ bool touchesLine(const AnnotatedLine &TheLine) {
+ const FormatToken *First = TheLine.First;
+ const FormatToken *Last = TheLine.Last;
+ CharSourceRange LineRange = CharSourceRange::getCharRange(
+ First->WhitespaceRange.getBegin().getLocWithOffset(
+ First->LastNewlineOffset),
+ Last->getStartOfNonWhitespace().getLocWithOffset(
+ Last->TokenText.size() - 1));
+ return touchesRanges(LineRange);
+ }
+
+ bool touchesPPDirective(SmallVectorImpl<AnnotatedLine *>::const_iterator I,
+ SmallVectorImpl<AnnotatedLine *>::const_iterator E) {
+ for (; I != E; ++I) {
+ if ((*I)->First->HasUnescapedNewline)
+ return false;
+ if (touchesLine(**I))
+ return true;
+ }
+ return false;
+ }
+
+ bool touchesEmptyLineBefore(const AnnotatedLine &TheLine) {
+ const FormatToken *First = TheLine.First;
+ CharSourceRange LineRange = CharSourceRange::getCharRange(
+ First->WhitespaceRange.getBegin(),
+ First->WhitespaceRange.getBegin().getLocWithOffset(
+ First->LastNewlineOffset));
+ return touchesRanges(LineRange);
+ }
+
/// \brief Analyze the entire solution space starting from \p InitialState.
///
/// This implements a variant of Dijkstra's algorithm on the graph that spans
/// the solution space (\c LineStates are the nodes). The algorithm tries to
/// find the shortest path (the one with lowest penalty) from \p InitialState
- /// to a state where all tokens are placed.
- unsigned analyzeSolutionSpace(LineState &InitialState) {
+ /// to a state where all tokens are placed. Returns the penalty.
+ ///
+ /// If \p DryRun is \c false, directly applies the changes.
+ unsigned analyzeSolutionSpace(LineState &InitialState, bool DryRun = false) {
std::set<LineState> Seen;
+ // Increasing count of \c StateNode items we have created. This is used to
+ // create a deterministic order independent of the container.
+ unsigned Count = 0;
+ QueueType Queue;
+
// Insert start element into queue.
StateNode *Node =
new (Allocator.Allocate()) StateNode(InitialState, false, NULL);
Queue.push(QueueItem(OrderedPenalty(0, Count), Node));
++Count;
+ unsigned Penalty = 0;
+
// While not empty, take first element and follow edges.
while (!Queue.empty()) {
- unsigned Penalty = Queue.top().first.first;
+ Penalty = Queue.top().first.first;
StateNode *Node = Queue.top().second;
if (Node->State.NextToken == NULL) {
- DEBUG(llvm::errs() << "\n---\nPenalty for line: " << Penalty << "\n");
+ DEBUG(llvm::dbgs() << "\n---\nPenalty for line: " << Penalty << "\n");
break;
}
Queue.pop();
+ // Cut off the analysis of certain solutions if the analysis gets too
+ // complex. See description of IgnoreStackForComparison.
+ if (Count > 10000)
+ Node->State.IgnoreStackForComparison = true;
+
if (!Seen.insert(Node->State).second)
// State already examined with lower penalty.
continue;
- addNextStateToQueue(Penalty, Node, /*NewLine=*/ false);
- addNextStateToQueue(Penalty, Node, /*NewLine=*/ true);
+ FormatDecision LastFormat = Node->State.NextToken->Decision;
+ if (LastFormat == FD_Unformatted || LastFormat == FD_Continue)
+ addNextStateToQueue(Penalty, Node, /*NewLine=*/false, &Count, &Queue);
+ if (LastFormat == FD_Unformatted || LastFormat == FD_Break)
+ addNextStateToQueue(Penalty, Node, /*NewLine=*/true, &Count, &Queue);
}
- if (Queue.empty())
+ if (Queue.empty()) {
// We were unable to find a solution, do nothing.
// FIXME: Add diagnostic?
+ DEBUG(llvm::dbgs() << "Could not find a solution.\n");
return 0;
+ }
// Reconstruct the solution.
- reconstructPath(InitialState, Queue.top().second);
- DEBUG(llvm::errs() << "---\n");
+ if (!DryRun)
+ reconstructPath(InitialState, Queue.top().second);
+
+ DEBUG(llvm::dbgs() << "Total number of analyzed states: " << Count << "\n");
+ DEBUG(llvm::dbgs() << "---\n");
- // Return the column after the last token of the solution.
- return Queue.top().second->State.Column;
+ return Penalty;
}
void reconstructPath(LineState &State, StateNode *Current) {
- // FIXME: This recursive implementation limits the possible number
- // of tokens per line if compiled into a binary with small stack space.
- // To become more independent of stack frame limitations we would need
- // to also change the TokenAnnotator.
- if (Current->Previous == NULL)
- return;
- reconstructPath(State, Current->Previous);
- DEBUG({
- if (Current->NewLine) {
- llvm::errs()
- << "Penalty for splitting before "
- << Current->Previous->State.NextToken->FormatTok.Tok.getName()
- << ": " << Current->Previous->State.NextToken->SplitPenalty << "\n";
- }
- });
- addTokenToState(Current->NewLine, false, State);
+ std::deque<StateNode *> Path;
+ // We do not need a break before the initial token.
+ while (Current->Previous) {
+ Path.push_front(Current);
+ Current = Current->Previous;
+ }
+ for (std::deque<StateNode *>::iterator I = Path.begin(), E = Path.end();
+ I != E; ++I) {
+ unsigned Penalty = 0;
+ formatChildren(State, (*I)->NewLine, /*DryRun=*/false, Penalty);
+ Penalty += Indenter->addTokenToState(State, (*I)->NewLine, false);
+
+ DEBUG({
+ if ((*I)->NewLine) {
+ llvm::dbgs() << "Penalty for placing "
+ << (*I)->Previous->State.NextToken->Tok.getName() << ": "
+ << Penalty << "\n";
+ }
+ });
+ }
}
/// \brief Add the following state to the analysis queue \c Queue.
@@ -803,331 +905,415 @@ private:
/// Assume the current state is \p PreviousNode and has been reached with a
/// penalty of \p Penalty. Insert a line break if \p NewLine is \c true.
void addNextStateToQueue(unsigned Penalty, StateNode *PreviousNode,
- bool NewLine) {
- if (NewLine && !canBreak(PreviousNode->State))
+ bool NewLine, unsigned *Count, QueueType *Queue) {
+ if (NewLine && !Indenter->canBreak(PreviousNode->State))
return;
- if (!NewLine && mustBreak(PreviousNode->State))
+ if (!NewLine && Indenter->mustBreak(PreviousNode->State))
return;
- if (NewLine)
- Penalty += PreviousNode->State.NextToken->SplitPenalty;
StateNode *Node = new (Allocator.Allocate())
StateNode(PreviousNode->State, NewLine, PreviousNode);
- Penalty += addTokenToState(NewLine, true, Node->State);
- if (Node->State.Column > getColumnLimit()) {
- unsigned ExcessCharacters = Node->State.Column - getColumnLimit();
- Penalty += Style.PenaltyExcessCharacter * ExcessCharacters;
- }
+ if (!formatChildren(Node->State, NewLine, /*DryRun=*/true, Penalty))
+ return;
- Queue.push(QueueItem(OrderedPenalty(Penalty, Count), Node));
- ++Count;
- }
+ Penalty += Indenter->addTokenToState(Node->State, NewLine, true);
- /// \brief Returns \c true, if a line break after \p State is allowed.
- bool canBreak(const LineState &State) {
- if (!State.NextToken->CanBreakBefore &&
- !(State.NextToken->is(tok::r_brace) &&
- State.Stack.back().BreakBeforeClosingBrace))
- return false;
- return !State.Stack.back().NoLineBreak;
+ Queue->push(QueueItem(OrderedPenalty(Penalty, *Count), Node));
+ ++(*Count);
}
- /// \brief Returns \c true, if a line break after \p State is mandatory.
- bool mustBreak(const LineState &State) {
- if (State.NextToken->MustBreakBefore)
- return true;
- if (State.NextToken->is(tok::r_brace) &&
- State.Stack.back().BreakBeforeClosingBrace)
- return true;
- if (State.NextToken->Parent->is(tok::semi) &&
- State.LineContainsContinuedForLoopSection)
- return true;
- if ((State.NextToken->Parent->isOneOf(tok::comma, tok::semi) ||
- State.NextToken->is(tok::question) ||
- State.NextToken->Type == TT_ConditionalExpr) &&
- State.Stack.back().BreakBeforeParameter &&
- !State.NextToken->isTrailingComment() &&
- State.NextToken->isNot(tok::r_paren) &&
- State.NextToken->isNot(tok::r_brace))
- return true;
- // FIXME: Comparing LongestObjCSelectorName to 0 is a hacky way of finding
- // out whether it is the first parameter. Clean this up.
- if (State.NextToken->Type == TT_ObjCSelectorName &&
- State.NextToken->LongestObjCSelectorName == 0 &&
- State.Stack.back().BreakBeforeParameter)
- return true;
- if ((State.NextToken->Type == TT_CtorInitializerColon ||
- (State.NextToken->Parent->ClosesTemplateDeclaration &&
- State.ParenLevel == 0)))
- return true;
- if (State.NextToken->Type == TT_InlineASMColon)
+ /// \brief If the \p State's next token is an r_brace closing a nested block,
+ /// format the nested block before it.
+ ///
+ /// Returns \c true if all children could be placed successfully and adapts
+ /// \p Penalty as well as \p State. If \p DryRun is false, also directly
+ /// creates changes using \c Whitespaces.
+ ///
+ /// The crucial idea here is that children always get formatted upon
+ /// encountering the closing brace right after the nested block. Now, if we
+ /// are currently trying to keep the "}" on the same line (i.e. \p NewLine is
+ /// \c false), the entire block has to be kept on the same line (which is only
+ /// possible if it fits on the line, only contains a single statement, etc.
+ ///
+ /// If \p NewLine is true, we format the nested block on separate lines, i.e.
+ /// break after the "{", format all lines with correct indentation and the put
+ /// the closing "}" on yet another new line.
+ ///
+ /// This enables us to keep the simple structure of the
+ /// \c UnwrappedLineFormatter, where we only have two options for each token:
+ /// break or don't break.
+ bool formatChildren(LineState &State, bool NewLine, bool DryRun,
+ unsigned &Penalty) {
+ FormatToken &Previous = *State.NextToken->Previous;
+ const FormatToken *LBrace = State.NextToken->getPreviousNonComment();
+ if (!LBrace || LBrace->isNot(tok::l_brace) ||
+ LBrace->BlockKind != BK_Block || Previous.Children.size() == 0)
+ // The previous token does not open a block. Nothing to do. We don't
+ // assert so that we can simply call this function for all tokens.
return true;
- // This prevents breaks like:
- // ...
- // SomeParameter, OtherParameter).DoSomething(
- // ...
- // As they hide "DoSomething" and generally bad for readability.
- if (State.NextToken->isOneOf(tok::period, tok::arrow) &&
- getRemainingLength(State) + State.Column > getColumnLimit() &&
- State.ParenLevel < State.StartOfLineLevel)
+
+ if (NewLine) {
+ int AdditionalIndent = State.Stack.back().Indent -
+ Previous.Children[0]->Level * Style.IndentWidth;
+ Penalty += format(Previous.Children, DryRun, AdditionalIndent);
return true;
- return false;
- }
+ }
- // Returns the total number of columns required for the remaining tokens.
- unsigned getRemainingLength(const LineState &State) {
- if (State.NextToken && State.NextToken->Parent)
- return Line.Last->TotalLength - State.NextToken->Parent->TotalLength;
- return 0;
+ // Cannot merge multiple statements into a single line.
+ if (Previous.Children.size() > 1)
+ return false;
+
+ // We can't put the closing "}" on a line with a trailing comment.
+ if (Previous.Children[0]->Last->isTrailingComment())
+ return false;
+
+ if (!DryRun) {
+ Whitespaces->replaceWhitespace(
+ *Previous.Children[0]->First,
+ /*Newlines=*/0, /*IndentLevel=*/0, /*Spaces=*/1,
+ /*StartOfTokenColumn=*/State.Column, State.Line->InPPDirective);
+ }
+ Penalty += format(*Previous.Children[0], State.Column + 1, DryRun);
+
+ State.Column += 1 + Previous.Children[0]->Last->TotalLength;
+ return true;
}
- FormatStyle Style;
SourceManager &SourceMgr;
- const AnnotatedLine &Line;
- const unsigned FirstIndent;
- const AnnotatedToken &RootToken;
- WhitespaceManager &Whitespaces;
+ SmallVectorImpl<CharSourceRange> &Ranges;
+ ContinuationIndenter *Indenter;
+ WhitespaceManager *Whitespaces;
+ FormatStyle Style;
+ LineJoiner Joiner;
llvm::SpecificBumpPtrAllocator<StateNode> Allocator;
- QueueType Queue;
- // Increasing count of \c StateNode items we have created. This is used
- // to create a deterministic order independent of the container.
- unsigned Count;
};
-class LexerBasedFormatTokenSource : public FormatTokenSource {
+class FormatTokenLexer {
public:
- LexerBasedFormatTokenSource(Lexer &Lex, SourceManager &SourceMgr)
- : GreaterStashed(false), Lex(Lex), SourceMgr(SourceMgr),
- IdentTable(Lex.getLangOpts()) {
+ FormatTokenLexer(Lexer &Lex, SourceManager &SourceMgr, FormatStyle &Style,
+ encoding::Encoding Encoding)
+ : FormatTok(NULL), IsFirstToken(true), GreaterStashed(false), Column(0),
+ TrailingWhitespace(0), Lex(Lex), SourceMgr(SourceMgr), Style(Style),
+ IdentTable(getFormattingLangOpts()), Encoding(Encoding) {
Lex.SetKeepWhitespaceMode(true);
}
- virtual FormatToken getNextToken() {
+ ArrayRef<FormatToken *> lex() {
+ assert(Tokens.empty());
+ do {
+ Tokens.push_back(getNextToken());
+ maybeJoinPreviousTokens();
+ } while (Tokens.back()->Tok.isNot(tok::eof));
+ return Tokens;
+ }
+
+ IdentifierTable &getIdentTable() { return IdentTable; }
+
+private:
+ void maybeJoinPreviousTokens() {
+ if (Tokens.size() < 4)
+ return;
+ FormatToken *Last = Tokens.back();
+ if (!Last->is(tok::r_paren))
+ return;
+
+ FormatToken *String = Tokens[Tokens.size() - 2];
+ if (!String->is(tok::string_literal) || String->IsMultiline)
+ return;
+
+ if (!Tokens[Tokens.size() - 3]->is(tok::l_paren))
+ return;
+
+ FormatToken *Macro = Tokens[Tokens.size() - 4];
+ if (Macro->TokenText != "_T")
+ return;
+
+ const char *Start = Macro->TokenText.data();
+ const char *End = Last->TokenText.data() + Last->TokenText.size();
+ String->TokenText = StringRef(Start, End - Start);
+ String->IsFirst = Macro->IsFirst;
+ String->LastNewlineOffset = Macro->LastNewlineOffset;
+ String->WhitespaceRange = Macro->WhitespaceRange;
+ String->OriginalColumn = Macro->OriginalColumn;
+ String->ColumnWidth = encoding::columnWidthWithTabs(
+ String->TokenText, String->OriginalColumn, Style.TabWidth, Encoding);
+
+ Tokens.pop_back();
+ Tokens.pop_back();
+ Tokens.pop_back();
+ Tokens.back() = String;
+ }
+
+ FormatToken *getNextToken() {
if (GreaterStashed) {
- FormatTok.NewlinesBefore = 0;
- FormatTok.WhiteSpaceStart =
- FormatTok.Tok.getLocation().getLocWithOffset(1);
- FormatTok.WhiteSpaceLength = 0;
+ // Create a synthesized second '>' token.
+ // FIXME: Increment Column and set OriginalColumn.
+ Token Greater = FormatTok->Tok;
+ FormatTok = new (Allocator.Allocate()) FormatToken;
+ FormatTok->Tok = Greater;
+ SourceLocation GreaterLocation =
+ FormatTok->Tok.getLocation().getLocWithOffset(1);
+ FormatTok->WhitespaceRange =
+ SourceRange(GreaterLocation, GreaterLocation);
+ FormatTok->TokenText = ">";
+ FormatTok->ColumnWidth = 1;
GreaterStashed = false;
return FormatTok;
}
- FormatTok = FormatToken();
- Lex.LexFromRawLexer(FormatTok.Tok);
- StringRef Text = rawTokenText(FormatTok.Tok);
- FormatTok.WhiteSpaceStart = FormatTok.Tok.getLocation();
- if (SourceMgr.getFileOffset(FormatTok.WhiteSpaceStart) == 0)
- FormatTok.IsFirst = true;
+ FormatTok = new (Allocator.Allocate()) FormatToken;
+ readRawToken(*FormatTok);
+ SourceLocation WhitespaceStart =
+ FormatTok->Tok.getLocation().getLocWithOffset(-TrailingWhitespace);
+ FormatTok->IsFirst = IsFirstToken;
+ IsFirstToken = false;
// Consume and record whitespace until we find a significant token.
- while (FormatTok.Tok.is(tok::unknown)) {
- unsigned Newlines = Text.count('\n');
- if (Newlines > 0)
- FormatTok.LastNewlineOffset =
- FormatTok.WhiteSpaceLength + Text.rfind('\n') + 1;
- unsigned EscapedNewlines = Text.count("\\\n");
- FormatTok.NewlinesBefore += Newlines;
- FormatTok.HasUnescapedNewline |= EscapedNewlines != Newlines;
- FormatTok.WhiteSpaceLength += FormatTok.Tok.getLength();
-
- if (FormatTok.Tok.is(tok::eof))
- return FormatTok;
- Lex.LexFromRawLexer(FormatTok.Tok);
- Text = rawTokenText(FormatTok.Tok);
- }
+ unsigned WhitespaceLength = TrailingWhitespace;
+ while (FormatTok->Tok.is(tok::unknown)) {
+ for (int i = 0, e = FormatTok->TokenText.size(); i != e; ++i) {
+ switch (FormatTok->TokenText[i]) {
+ case '\n':
+ ++FormatTok->NewlinesBefore;
+ // FIXME: This is technically incorrect, as it could also
+ // be a literal backslash at the end of the line.
+ if (i == 0 || (FormatTok->TokenText[i - 1] != '\\' &&
+ (FormatTok->TokenText[i - 1] != '\r' || i == 1 ||
+ FormatTok->TokenText[i - 2] != '\\')))
+ FormatTok->HasUnescapedNewline = true;
+ FormatTok->LastNewlineOffset = WhitespaceLength + i + 1;
+ Column = 0;
+ break;
+ case '\r':
+ case '\f':
+ case '\v':
+ Column = 0;
+ break;
+ case ' ':
+ ++Column;
+ break;
+ case '\t':
+ Column += Style.TabWidth - Column % Style.TabWidth;
+ break;
+ case '\\':
+ ++Column;
+ if (i + 1 == e || (FormatTok->TokenText[i + 1] != '\r' &&
+ FormatTok->TokenText[i + 1] != '\n'))
+ FormatTok->Type = TT_ImplicitStringLiteral;
+ break;
+ default:
+ FormatTok->Type = TT_ImplicitStringLiteral;
+ ++Column;
+ break;
+ }
+ }
- // Now FormatTok is the next non-whitespace token.
- FormatTok.TokenLength = Text.size();
+ if (FormatTok->Type == TT_ImplicitStringLiteral)
+ break;
+ WhitespaceLength += FormatTok->Tok.getLength();
- if (FormatTok.Tok.is(tok::comment)) {
- FormatTok.TrailingWhiteSpaceLength = Text.size() - Text.rtrim().size();
- FormatTok.TokenLength -= FormatTok.TrailingWhiteSpaceLength;
+ readRawToken(*FormatTok);
}
// In case the token starts with escaped newlines, we want to
// take them into account as whitespace - this pattern is quite frequent
// in macro definitions.
- // FIXME: What do we want to do with other escaped spaces, and escaped
- // spaces or newlines in the middle of tokens?
// FIXME: Add a more explicit test.
- unsigned i = 0;
- while (i + 1 < Text.size() && Text[i] == '\\' && Text[i + 1] == '\n') {
- // FIXME: ++FormatTok.NewlinesBefore is missing...
- FormatTok.WhiteSpaceLength += 2;
- FormatTok.TokenLength -= 2;
- i += 2;
+ while (FormatTok->TokenText.size() > 1 && FormatTok->TokenText[0] == '\\' &&
+ FormatTok->TokenText[1] == '\n') {
+ // FIXME: ++FormatTok->NewlinesBefore is missing...
+ WhitespaceLength += 2;
+ Column = 0;
+ FormatTok->TokenText = FormatTok->TokenText.substr(2);
}
- if (FormatTok.Tok.is(tok::raw_identifier)) {
- IdentifierInfo &Info = IdentTable.get(Text);
- FormatTok.Tok.setIdentifierInfo(&Info);
- FormatTok.Tok.setKind(Info.getTokenID());
+ FormatTok->WhitespaceRange = SourceRange(
+ WhitespaceStart, WhitespaceStart.getLocWithOffset(WhitespaceLength));
+
+ FormatTok->OriginalColumn = Column;
+
+ TrailingWhitespace = 0;
+ if (FormatTok->Tok.is(tok::comment)) {
+ // FIXME: Add the trimmed whitespace to Column.
+ StringRef UntrimmedText = FormatTok->TokenText;
+ FormatTok->TokenText = FormatTok->TokenText.rtrim(" \t\v\f");
+ TrailingWhitespace = UntrimmedText.size() - FormatTok->TokenText.size();
+ } else if (FormatTok->Tok.is(tok::raw_identifier)) {
+ IdentifierInfo &Info = IdentTable.get(FormatTok->TokenText);
+ FormatTok->Tok.setIdentifierInfo(&Info);
+ FormatTok->Tok.setKind(Info.getTokenID());
+ } else if (FormatTok->Tok.is(tok::greatergreater)) {
+ FormatTok->Tok.setKind(tok::greater);
+ FormatTok->TokenText = FormatTok->TokenText.substr(0, 1);
+ GreaterStashed = true;
}
- if (FormatTok.Tok.is(tok::greatergreater)) {
- FormatTok.Tok.setKind(tok::greater);
- FormatTok.TokenLength = 1;
- GreaterStashed = true;
+ // Now FormatTok is the next non-whitespace token.
+
+ StringRef Text = FormatTok->TokenText;
+ size_t FirstNewlinePos = Text.find('\n');
+ if (FirstNewlinePos == StringRef::npos) {
+ // FIXME: ColumnWidth actually depends on the start column, we need to
+ // take this into account when the token is moved.
+ FormatTok->ColumnWidth =
+ encoding::columnWidthWithTabs(Text, Column, Style.TabWidth, Encoding);
+ Column += FormatTok->ColumnWidth;
+ } else {
+ FormatTok->IsMultiline = true;
+ // FIXME: ColumnWidth actually depends on the start column, we need to
+ // take this into account when the token is moved.
+ FormatTok->ColumnWidth = encoding::columnWidthWithTabs(
+ Text.substr(0, FirstNewlinePos), Column, Style.TabWidth, Encoding);
+
+ // The last line of the token always starts in column 0.
+ // Thus, the length can be precomputed even in the presence of tabs.
+ FormatTok->LastLineColumnWidth = encoding::columnWidthWithTabs(
+ Text.substr(Text.find_last_of('\n') + 1), 0, Style.TabWidth,
+ Encoding);
+ Column = FormatTok->LastLineColumnWidth;
}
return FormatTok;
}
- IdentifierTable &getIdentTable() { return IdentTable; }
-
-private:
- FormatToken FormatTok;
+ FormatToken *FormatTok;
+ bool IsFirstToken;
bool GreaterStashed;
+ unsigned Column;
+ unsigned TrailingWhitespace;
Lexer &Lex;
SourceManager &SourceMgr;
+ FormatStyle &Style;
IdentifierTable IdentTable;
-
- /// Returns the text of \c FormatTok.
- StringRef rawTokenText(Token &Tok) {
- return StringRef(SourceMgr.getCharacterData(Tok.getLocation()),
- Tok.getLength());
+ encoding::Encoding Encoding;
+ llvm::SpecificBumpPtrAllocator<FormatToken> Allocator;
+ SmallVector<FormatToken *, 16> Tokens;
+
+ void readRawToken(FormatToken &Tok) {
+ Lex.LexFromRawLexer(Tok.Tok);
+ Tok.TokenText = StringRef(SourceMgr.getCharacterData(Tok.Tok.getLocation()),
+ Tok.Tok.getLength());
+ // For formatting, treat unterminated string literals like normal string
+ // literals.
+ if (Tok.is(tok::unknown) && !Tok.TokenText.empty() &&
+ Tok.TokenText[0] == '"') {
+ Tok.Tok.setKind(tok::string_literal);
+ Tok.IsUnterminatedLiteral = true;
+ }
}
};
class Formatter : public UnwrappedLineConsumer {
public:
- Formatter(DiagnosticsEngine &Diag, const FormatStyle &Style, Lexer &Lex,
- SourceManager &SourceMgr,
+ Formatter(const FormatStyle &Style, Lexer &Lex, SourceManager &SourceMgr,
const std::vector<CharSourceRange> &Ranges)
- : Diag(Diag), Style(Style), Lex(Lex), SourceMgr(SourceMgr),
- Whitespaces(SourceMgr, Style), Ranges(Ranges) {}
-
- virtual ~Formatter() {}
+ : Style(Style), Lex(Lex), SourceMgr(SourceMgr),
+ Whitespaces(SourceMgr, Style, inputUsesCRLF(Lex.getBuffer())),
+ Ranges(Ranges.begin(), Ranges.end()), UnwrappedLines(1),
+ Encoding(encoding::detectEncoding(Lex.getBuffer())) {
+ DEBUG(llvm::dbgs() << "File encoding: "
+ << (Encoding == encoding::Encoding_UTF8 ? "UTF8"
+ : "unknown")
+ << "\n");
+ }
tooling::Replacements format() {
- LexerBasedFormatTokenSource Tokens(Lex, SourceMgr);
- UnwrappedLineParser Parser(Diag, Style, Tokens, *this);
+ tooling::Replacements Result;
+ FormatTokenLexer Tokens(Lex, SourceMgr, Style, Encoding);
+
+ UnwrappedLineParser Parser(Style, Tokens.lex(), *this);
bool StructuralError = Parser.parse();
- unsigned PreviousEndOfLineColumn = 0;
- TokenAnnotator Annotator(Style, SourceMgr, Lex,
- Tokens.getIdentTable().get("in"));
- for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) {
- Annotator.annotate(AnnotatedLines[i]);
+ assert(UnwrappedLines.rbegin()->empty());
+ for (unsigned Run = 0, RunE = UnwrappedLines.size(); Run + 1 != RunE;
+ ++Run) {
+ DEBUG(llvm::dbgs() << "Run " << Run << "...\n");
+ SmallVector<AnnotatedLine *, 16> AnnotatedLines;
+ for (unsigned i = 0, e = UnwrappedLines[Run].size(); i != e; ++i) {
+ AnnotatedLines.push_back(new AnnotatedLine(UnwrappedLines[Run][i]));
+ }
+ tooling::Replacements RunResult =
+ format(AnnotatedLines, StructuralError, Tokens);
+ DEBUG({
+ llvm::dbgs() << "Replacements for run " << Run << ":\n";
+ for (tooling::Replacements::iterator I = RunResult.begin(),
+ E = RunResult.end();
+ I != E; ++I) {
+ llvm::dbgs() << I->toString() << "\n";
+ }
+ });
+ for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) {
+ delete AnnotatedLines[i];
+ }
+ Result.insert(RunResult.begin(), RunResult.end());
+ Whitespaces.reset();
}
- deriveLocalStyle();
+ return Result;
+ }
+
+ tooling::Replacements format(SmallVectorImpl<AnnotatedLine *> &AnnotatedLines,
+ bool StructuralError, FormatTokenLexer &Tokens) {
+ TokenAnnotator Annotator(Style, Tokens.getIdentTable().get("in"));
for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) {
- Annotator.calculateFormattingInformation(AnnotatedLines[i]);
+ Annotator.annotate(*AnnotatedLines[i]);
}
-
- // Adapt level to the next line if this is a comment.
- // FIXME: Can/should this be done in the UnwrappedLineParser?
- const AnnotatedLine *NextNoneCommentLine = NULL;
- for (unsigned i = AnnotatedLines.size() - 1; i > 0; --i) {
- if (NextNoneCommentLine && AnnotatedLines[i].First.is(tok::comment) &&
- AnnotatedLines[i].First.Children.empty())
- AnnotatedLines[i].Level = NextNoneCommentLine->Level;
- else
- NextNoneCommentLine =
- AnnotatedLines[i].First.isNot(tok::r_brace) ? &AnnotatedLines[i]
- : NULL;
+ deriveLocalStyle(AnnotatedLines);
+ for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) {
+ Annotator.calculateFormattingInformation(*AnnotatedLines[i]);
}
- std::vector<int> IndentForLevel;
- bool PreviousLineWasTouched = false;
- const AnnotatedToken *PreviousLineLastToken = 0;
- for (std::vector<AnnotatedLine>::iterator I = AnnotatedLines.begin(),
- E = AnnotatedLines.end();
- I != E; ++I) {
- const AnnotatedLine &TheLine = *I;
- const FormatToken &FirstTok = TheLine.First.FormatTok;
- int Offset = getIndentOffset(TheLine.First);
- while (IndentForLevel.size() <= TheLine.Level)
- IndentForLevel.push_back(-1);
- IndentForLevel.resize(TheLine.Level + 1);
- bool WasMoved = PreviousLineWasTouched && FirstTok.NewlinesBefore == 0;
- if (TheLine.First.is(tok::eof)) {
- if (PreviousLineWasTouched) {
- unsigned NewLines = std::min(FirstTok.NewlinesBefore, 1u);
- Whitespaces.replaceWhitespace(TheLine.First, NewLines, /*Indent*/ 0,
- /*WhitespaceStartColumn*/ 0);
- }
- } else if (TheLine.Type != LT_Invalid &&
- (WasMoved || touchesLine(TheLine))) {
- unsigned LevelIndent = getIndent(IndentForLevel, TheLine.Level);
- unsigned Indent = LevelIndent;
- if (static_cast<int>(Indent) + Offset >= 0)
- Indent += Offset;
- if (FirstTok.WhiteSpaceStart.isValid() &&
- // Insert a break even if there is a structural error in case where
- // we break apart a line consisting of multiple unwrapped lines.
- (FirstTok.NewlinesBefore == 0 || !StructuralError)) {
- formatFirstToken(TheLine.First, PreviousLineLastToken, Indent,
- TheLine.InPPDirective, PreviousEndOfLineColumn);
- } else {
- Indent = LevelIndent =
- SourceMgr.getSpellingColumnNumber(FirstTok.Tok.getLocation()) - 1;
- }
- tryFitMultipleLinesInOne(Indent, I, E);
- UnwrappedLineFormatter Formatter(Style, SourceMgr, TheLine, Indent,
- TheLine.First, Whitespaces);
- PreviousEndOfLineColumn =
- Formatter.format(I + 1 != E ? &*(I + 1) : NULL);
- IndentForLevel[TheLine.Level] = LevelIndent;
- PreviousLineWasTouched = true;
- } else {
- if (FirstTok.NewlinesBefore > 0 || FirstTok.IsFirst) {
- unsigned Indent =
- SourceMgr.getSpellingColumnNumber(FirstTok.Tok.getLocation()) - 1;
- unsigned LevelIndent = Indent;
- if (static_cast<int>(LevelIndent) - Offset >= 0)
- LevelIndent -= Offset;
- if (TheLine.First.isNot(tok::comment))
- IndentForLevel[TheLine.Level] = LevelIndent;
-
- // Remove trailing whitespace of the previous line if it was touched.
- if (PreviousLineWasTouched || touchesEmptyLineBefore(TheLine))
- formatFirstToken(TheLine.First, PreviousLineLastToken, Indent,
- TheLine.InPPDirective, PreviousEndOfLineColumn);
- }
- // If we did not reformat this unwrapped line, the column at the end of
- // the last token is unchanged - thus, we can calculate the end of the
- // last token.
- SourceLocation LastLoc = TheLine.Last->FormatTok.Tok.getLocation();
- PreviousEndOfLineColumn =
- SourceMgr.getSpellingColumnNumber(LastLoc) +
- Lex.MeasureTokenLength(LastLoc, SourceMgr, Lex.getLangOpts()) - 1;
- PreviousLineWasTouched = false;
- if (TheLine.Last->is(tok::comment))
- Whitespaces.addUntouchableComment(SourceMgr.getSpellingColumnNumber(
- TheLine.Last->FormatTok.Tok.getLocation()) - 1);
- else
- Whitespaces.alignComments();
- }
- PreviousLineLastToken = I->Last;
- }
+ Annotator.setCommentLineLevels(AnnotatedLines);
+ ContinuationIndenter Indenter(Style, SourceMgr, Whitespaces, Encoding,
+ BinPackInconclusiveFunctions);
+ UnwrappedLineFormatter Formatter(SourceMgr, Ranges, &Indenter, &Whitespaces,
+ Style);
+ Formatter.format(AnnotatedLines, /*DryRun=*/false);
return Whitespaces.generateReplacements();
}
private:
- void deriveLocalStyle() {
+ static bool inputUsesCRLF(StringRef Text) {
+ return Text.count('\r') * 2 > Text.count('\n');
+ }
+
+ void
+ deriveLocalStyle(const SmallVectorImpl<AnnotatedLine *> &AnnotatedLines) {
unsigned CountBoundToVariable = 0;
unsigned CountBoundToType = 0;
bool HasCpp03IncompatibleFormat = false;
+ bool HasBinPackedFunction = false;
+ bool HasOnePerLineFunction = false;
for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) {
- if (AnnotatedLines[i].First.Children.empty())
+ if (!AnnotatedLines[i]->First->Next)
continue;
- AnnotatedToken *Tok = &AnnotatedLines[i].First.Children[0];
- while (!Tok->Children.empty()) {
+ FormatToken *Tok = AnnotatedLines[i]->First->Next;
+ while (Tok->Next) {
if (Tok->Type == TT_PointerOrReference) {
- bool SpacesBefore = Tok->FormatTok.WhiteSpaceLength > 0;
- bool SpacesAfter = Tok->Children[0].FormatTok.WhiteSpaceLength > 0;
+ bool SpacesBefore =
+ Tok->WhitespaceRange.getBegin() != Tok->WhitespaceRange.getEnd();
+ bool SpacesAfter = Tok->Next->WhitespaceRange.getBegin() !=
+ Tok->Next->WhitespaceRange.getEnd();
if (SpacesBefore && !SpacesAfter)
++CountBoundToVariable;
else if (!SpacesBefore && SpacesAfter)
++CountBoundToType;
}
- if (Tok->Type == TT_TemplateCloser &&
- Tok->Parent->Type == TT_TemplateCloser &&
- Tok->FormatTok.WhiteSpaceLength == 0)
- HasCpp03IncompatibleFormat = true;
- Tok = &Tok->Children[0];
+ if (Tok->WhitespaceRange.getBegin() == Tok->WhitespaceRange.getEnd()) {
+ if (Tok->is(tok::coloncolon) &&
+ Tok->Previous->Type == TT_TemplateOpener)
+ HasCpp03IncompatibleFormat = true;
+ if (Tok->Type == TT_TemplateCloser &&
+ Tok->Previous->Type == TT_TemplateCloser)
+ HasCpp03IncompatibleFormat = true;
+ }
+
+ if (Tok->PackingKind == PPK_BinPacked)
+ HasBinPackedFunction = true;
+ if (Tok->PackingKind == PPK_OnePerLine)
+ HasOnePerLineFunction = true;
+
+ Tok = Tok->Next;
}
}
if (Style.DerivePointerBinding) {
@@ -1140,259 +1326,69 @@ private:
Style.Standard = HasCpp03IncompatibleFormat ? FormatStyle::LS_Cpp11
: FormatStyle::LS_Cpp03;
}
- }
-
- /// \brief Get the indent of \p Level from \p IndentForLevel.
- ///
- /// \p IndentForLevel must contain the indent for the level \c l
- /// at \p IndentForLevel[l], or a value < 0 if the indent for
- /// that level is unknown.
- unsigned getIndent(const std::vector<int> IndentForLevel, unsigned Level) {
- if (IndentForLevel[Level] != -1)
- return IndentForLevel[Level];
- if (Level == 0)
- return 0;
- return getIndent(IndentForLevel, Level - 1) + 2;
- }
-
- /// \brief Get the offset of the line relatively to the level.
- ///
- /// For example, 'public:' labels in classes are offset by 1 or 2
- /// characters to the left from their level.
- int getIndentOffset(const AnnotatedToken &RootToken) {
- if (RootToken.isAccessSpecifier(false) || RootToken.isObjCAccessSpecifier())
- return Style.AccessModifierOffset;
- return 0;
- }
-
- /// \brief Tries to merge lines into one.
- ///
- /// This will change \c Line and \c AnnotatedLine to contain the merged line,
- /// if possible; note that \c I will be incremented when lines are merged.
- ///
- /// Returns whether the resulting \c Line can fit in a single line.
- void tryFitMultipleLinesInOne(unsigned Indent,
- std::vector<AnnotatedLine>::iterator &I,
- std::vector<AnnotatedLine>::iterator E) {
- // We can never merge stuff if there are trailing line comments.
- if (I->Last->Type == TT_LineComment)
- return;
-
- unsigned Limit = Style.ColumnLimit - Indent;
- // If we already exceed the column limit, we set 'Limit' to 0. The different
- // tryMerge..() functions can then decide whether to still do merging.
- Limit = I->Last->TotalLength > Limit ? 0 : Limit - I->Last->TotalLength;
-
- if (I + 1 == E || (I + 1)->Type == LT_Invalid)
- return;
-
- if (I->Last->is(tok::l_brace)) {
- tryMergeSimpleBlock(I, E, Limit);
- } else if (I->First.is(tok::kw_if)) {
- tryMergeSimpleIf(I, E, Limit);
- } else if (I->InPPDirective && (I->First.FormatTok.HasUnescapedNewline ||
- I->First.FormatTok.IsFirst)) {
- tryMergeSimplePPDirective(I, E, Limit);
- }
- return;
- }
-
- void tryMergeSimplePPDirective(std::vector<AnnotatedLine>::iterator &I,
- std::vector<AnnotatedLine>::iterator E,
- unsigned Limit) {
- if (Limit == 0)
- return;
- AnnotatedLine &Line = *I;
- if (!(I + 1)->InPPDirective || (I + 1)->First.FormatTok.HasUnescapedNewline)
- return;
- if (I + 2 != E && (I + 2)->InPPDirective &&
- !(I + 2)->First.FormatTok.HasUnescapedNewline)
- return;
- if (1 + (I + 1)->Last->TotalLength > Limit)
- return;
- join(Line, *(++I));
- }
-
- void tryMergeSimpleIf(std::vector<AnnotatedLine>::iterator &I,
- std::vector<AnnotatedLine>::iterator E,
- unsigned Limit) {
- if (Limit == 0)
- return;
- if (!Style.AllowShortIfStatementsOnASingleLine)
- return;
- if ((I + 1)->InPPDirective != I->InPPDirective ||
- ((I + 1)->InPPDirective &&
- (I + 1)->First.FormatTok.HasUnescapedNewline))
- return;
- AnnotatedLine &Line = *I;
- if (Line.Last->isNot(tok::r_paren))
- return;
- if (1 + (I + 1)->Last->TotalLength > Limit)
- return;
- if ((I + 1)->First.is(tok::kw_if) || (I + 1)->First.Type == TT_LineComment)
- return;
- // Only inline simple if's (no nested if or else).
- if (I + 2 != E && (I + 2)->First.is(tok::kw_else))
- return;
- join(Line, *(++I));
- }
-
- void tryMergeSimpleBlock(std::vector<AnnotatedLine>::iterator &I,
- std::vector<AnnotatedLine>::iterator E,
- unsigned Limit) {
- // First, check that the current line allows merging. This is the case if
- // we're not in a control flow statement and the last token is an opening
- // brace.
- AnnotatedLine &Line = *I;
- if (Line.First.isOneOf(tok::kw_if, tok::kw_while, tok::kw_do, tok::r_brace,
- tok::kw_else, tok::kw_try, tok::kw_catch,
- tok::kw_for,
- // This gets rid of all ObjC @ keywords and methods.
- tok::at, tok::minus, tok::plus))
- return;
-
- AnnotatedToken *Tok = &(I + 1)->First;
- if (Tok->Children.empty() && Tok->is(tok::r_brace) &&
- !Tok->MustBreakBefore) {
- // We merge empty blocks even if the line exceeds the column limit.
- Tok->SpacesRequiredBefore = 0;
- Tok->CanBreakBefore = true;
- join(Line, *(I + 1));
- I += 1;
- } else if (Limit != 0) {
- // Check that we still have three lines and they fit into the limit.
- if (I + 2 == E || (I + 2)->Type == LT_Invalid ||
- !nextTwoLinesFitInto(I, Limit))
- return;
-
- // Second, check that the next line does not contain any braces - if it
- // does, readability declines when putting it into a single line.
- if ((I + 1)->Last->Type == TT_LineComment || Tok->MustBreakBefore)
- return;
- do {
- if (Tok->isOneOf(tok::l_brace, tok::r_brace))
- return;
- Tok = Tok->Children.empty() ? NULL : &Tok->Children.back();
- } while (Tok != NULL);
-
- // Last, check that the third line contains a single closing brace.
- Tok = &(I + 2)->First;
- if (!Tok->Children.empty() || Tok->isNot(tok::r_brace) ||
- Tok->MustBreakBefore)
- return;
-
- join(Line, *(I + 1));
- join(Line, *(I + 2));
- I += 2;
- }
- }
-
- bool nextTwoLinesFitInto(std::vector<AnnotatedLine>::iterator I,
- unsigned Limit) {
- return 1 + (I + 1)->Last->TotalLength + 1 + (I + 2)->Last->TotalLength <=
- Limit;
- }
-
- void join(AnnotatedLine &A, const AnnotatedLine &B) {
- unsigned LengthA = A.Last->TotalLength + B.First.SpacesRequiredBefore;
- A.Last->Children.push_back(B.First);
- while (!A.Last->Children.empty()) {
- A.Last->Children[0].Parent = A.Last;
- A.Last->Children[0].TotalLength += LengthA;
- A.Last = &A.Last->Children[0];
- }
- }
-
- bool touchesRanges(const CharSourceRange &Range) {
- for (unsigned i = 0, e = Ranges.size(); i != e; ++i) {
- if (!SourceMgr.isBeforeInTranslationUnit(Range.getEnd(),
- Ranges[i].getBegin()) &&
- !SourceMgr.isBeforeInTranslationUnit(Ranges[i].getEnd(),
- Range.getBegin()))
- return true;
- }
- return false;
- }
-
- bool touchesLine(const AnnotatedLine &TheLine) {
- const FormatToken *First = &TheLine.First.FormatTok;
- const FormatToken *Last = &TheLine.Last->FormatTok;
- CharSourceRange LineRange = CharSourceRange::getTokenRange(
- First->WhiteSpaceStart.getLocWithOffset(First->LastNewlineOffset),
- Last->Tok.getLocation());
- return touchesRanges(LineRange);
- }
-
- bool touchesEmptyLineBefore(const AnnotatedLine &TheLine) {
- const FormatToken *First = &TheLine.First.FormatTok;
- CharSourceRange LineRange = CharSourceRange::getCharRange(
- First->WhiteSpaceStart,
- First->WhiteSpaceStart.getLocWithOffset(First->LastNewlineOffset));
- return touchesRanges(LineRange);
+ BinPackInconclusiveFunctions =
+ HasBinPackedFunction || !HasOnePerLineFunction;
}
virtual void consumeUnwrappedLine(const UnwrappedLine &TheLine) {
- AnnotatedLines.push_back(AnnotatedLine(TheLine));
+ assert(!UnwrappedLines.empty());
+ UnwrappedLines.back().push_back(TheLine);
}
- /// \brief Add a new line and the required indent before the first Token
- /// of the \c UnwrappedLine if there was no structural parsing error.
- /// Returns the indent level of the \c UnwrappedLine.
- void formatFirstToken(const AnnotatedToken &RootToken,
- const AnnotatedToken *PreviousToken, unsigned Indent,
- bool InPPDirective, unsigned PreviousEndOfLineColumn) {
- const FormatToken &Tok = RootToken.FormatTok;
-
- unsigned Newlines =
- std::min(Tok.NewlinesBefore, Style.MaxEmptyLinesToKeep + 1);
- if (Newlines == 0 && !Tok.IsFirst)
- Newlines = 1;
-
- if (!InPPDirective || Tok.HasUnescapedNewline) {
- // Insert extra new line before access specifiers.
- if (PreviousToken && PreviousToken->isOneOf(tok::semi, tok::r_brace) &&
- RootToken.isAccessSpecifier() && Tok.NewlinesBefore == 1)
- ++Newlines;
-
- Whitespaces.replaceWhitespace(RootToken, Newlines, Indent, 0);
- } else {
- Whitespaces.replacePPWhitespace(RootToken, Newlines, Indent,
- PreviousEndOfLineColumn);
- }
+ virtual void finishRun() {
+ UnwrappedLines.push_back(SmallVector<UnwrappedLine, 16>());
}
- DiagnosticsEngine &Diag;
FormatStyle Style;
Lexer &Lex;
SourceManager &SourceMgr;
WhitespaceManager Whitespaces;
- std::vector<CharSourceRange> Ranges;
- std::vector<AnnotatedLine> AnnotatedLines;
+ SmallVector<CharSourceRange, 8> Ranges;
+ SmallVector<SmallVector<UnwrappedLine, 16>, 2> UnwrappedLines;
+
+ encoding::Encoding Encoding;
+ bool BinPackInconclusiveFunctions;
};
+} // end anonymous namespace
+
tooling::Replacements reformat(const FormatStyle &Style, Lexer &Lex,
SourceManager &SourceMgr,
- std::vector<CharSourceRange> Ranges,
- DiagnosticConsumer *DiagClient) {
- IntrusiveRefCntPtr<DiagnosticOptions> DiagOpts = new DiagnosticOptions();
- OwningPtr<DiagnosticConsumer> DiagPrinter;
- if (DiagClient == 0) {
- DiagPrinter.reset(new TextDiagnosticPrinter(llvm::errs(), &*DiagOpts));
- DiagPrinter->BeginSourceFile(Lex.getLangOpts(), Lex.getPP());
- DiagClient = DiagPrinter.get();
- }
- DiagnosticsEngine Diagnostics(
- IntrusiveRefCntPtr<DiagnosticIDs>(new DiagnosticIDs()), &*DiagOpts,
- DiagClient, false);
- Diagnostics.setSourceManager(&SourceMgr);
- Formatter formatter(Diagnostics, Style, Lex, SourceMgr, Ranges);
+ std::vector<CharSourceRange> Ranges) {
+ Formatter formatter(Style, Lex, SourceMgr, Ranges);
return formatter.format();
}
-LangOptions getFormattingLangOpts() {
+tooling::Replacements reformat(const FormatStyle &Style, StringRef Code,
+ std::vector<tooling::Range> Ranges,
+ StringRef FileName) {
+ FileManager Files((FileSystemOptions()));
+ DiagnosticsEngine Diagnostics(
+ IntrusiveRefCntPtr<DiagnosticIDs>(new DiagnosticIDs),
+ new DiagnosticOptions);
+ SourceManager SourceMgr(Diagnostics, Files);
+ llvm::MemoryBuffer *Buf = llvm::MemoryBuffer::getMemBuffer(Code, FileName);
+ const clang::FileEntry *Entry =
+ Files.getVirtualFile(FileName, Buf->getBufferSize(), 0);
+ SourceMgr.overrideFileContents(Entry, Buf);
+ FileID ID =
+ SourceMgr.createFileID(Entry, SourceLocation(), clang::SrcMgr::C_User);
+ Lexer Lex(ID, SourceMgr.getBuffer(ID), SourceMgr,
+ getFormattingLangOpts(Style.Standard));
+ SourceLocation StartOfFile = SourceMgr.getLocForStartOfFile(ID);
+ std::vector<CharSourceRange> CharRanges;
+ for (unsigned i = 0, e = Ranges.size(); i != e; ++i) {
+ SourceLocation Start = StartOfFile.getLocWithOffset(Ranges[i].getOffset());
+ SourceLocation End = Start.getLocWithOffset(Ranges[i].getLength());
+ CharRanges.push_back(CharSourceRange::getCharRange(Start, End));
+ }
+ return reformat(Style, Lex, SourceMgr, CharRanges);
+}
+
+LangOptions getFormattingLangOpts(FormatStyle::LanguageStandard Standard) {
LangOptions LangOpts;
LangOpts.CPlusPlus = 1;
- LangOpts.CPlusPlus11 = 1;
+ LangOpts.CPlusPlus11 = Standard == FormatStyle::LS_Cpp03 ? 0 : 1;
LangOpts.LineComment = 1;
LangOpts.Bool = 1;
LangOpts.ObjC1 = 1;
@@ -1400,5 +1396,82 @@ LangOptions getFormattingLangOpts() {
return LangOpts;
}
+const char *StyleOptionHelpDescription =
+ "Coding style, currently supports:\n"
+ " LLVM, Google, Chromium, Mozilla, WebKit.\n"
+ "Use -style=file to load style configuration from\n"
+ ".clang-format file located in one of the parent\n"
+ "directories of the source file (or current\n"
+ "directory for stdin).\n"
+ "Use -style=\"{key: value, ...}\" to set specific\n"
+ "parameters, e.g.:\n"
+ " -style=\"{BasedOnStyle: llvm, IndentWidth: 8}\"";
+
+FormatStyle getStyle(StringRef StyleName, StringRef FileName) {
+ // Fallback style in case the rest of this function can't determine a style.
+ StringRef FallbackStyle = "LLVM";
+ FormatStyle Style;
+ getPredefinedStyle(FallbackStyle, &Style);
+
+ if (StyleName.startswith("{")) {
+ // Parse YAML/JSON style from the command line.
+ if (llvm::error_code ec = parseConfiguration(StyleName, &Style)) {
+ llvm::errs() << "Error parsing -style: " << ec.message() << ", using "
+ << FallbackStyle << " style\n";
+ }
+ return Style;
+ }
+
+ if (!StyleName.equals_lower("file")) {
+ if (!getPredefinedStyle(StyleName, &Style))
+ llvm::errs() << "Invalid value for -style, using " << FallbackStyle
+ << " style\n";
+ return Style;
+ }
+
+ SmallString<128> Path(FileName);
+ llvm::sys::fs::make_absolute(Path);
+ for (StringRef Directory = Path; !Directory.empty();
+ Directory = llvm::sys::path::parent_path(Directory)) {
+ if (!llvm::sys::fs::is_directory(Directory))
+ continue;
+ SmallString<128> ConfigFile(Directory);
+
+ llvm::sys::path::append(ConfigFile, ".clang-format");
+ DEBUG(llvm::dbgs() << "Trying " << ConfigFile << "...\n");
+ bool IsFile = false;
+ // Ignore errors from is_regular_file: we only need to know if we can read
+ // the file or not.
+ llvm::sys::fs::is_regular_file(Twine(ConfigFile), IsFile);
+
+ if (!IsFile) {
+ // Try _clang-format too, since dotfiles are not commonly used on Windows.
+ ConfigFile = Directory;
+ llvm::sys::path::append(ConfigFile, "_clang-format");
+ DEBUG(llvm::dbgs() << "Trying " << ConfigFile << "...\n");
+ llvm::sys::fs::is_regular_file(Twine(ConfigFile), IsFile);
+ }
+
+ if (IsFile) {
+ OwningPtr<llvm::MemoryBuffer> Text;
+ if (llvm::error_code ec =
+ llvm::MemoryBuffer::getFile(ConfigFile.c_str(), Text)) {
+ llvm::errs() << ec.message() << "\n";
+ continue;
+ }
+ if (llvm::error_code ec = parseConfiguration(Text->getBuffer(), &Style)) {
+ llvm::errs() << "Error reading " << ConfigFile << ": " << ec.message()
+ << "\n";
+ continue;
+ }
+ DEBUG(llvm::dbgs() << "Using configuration file " << ConfigFile << "\n");
+ return Style;
+ }
+ }
+ llvm::errs() << "Can't find usable .clang-format, using " << FallbackStyle
+ << " style\n";
+ return Style;
+}
+
} // namespace format
} // namespace clang
diff --git a/lib/Format/FormatToken.cpp b/lib/Format/FormatToken.cpp
new file mode 100644
index 000000000000..8ac704a3bb6d
--- /dev/null
+++ b/lib/Format/FormatToken.cpp
@@ -0,0 +1,204 @@
+//===--- FormatToken.cpp - Format C++ code --------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file implements specific functions of \c FormatTokens and their
+/// roles.
+///
+//===----------------------------------------------------------------------===//
+
+#include "FormatToken.h"
+#include "ContinuationIndenter.h"
+#include "clang/Format/Format.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/Debug.h"
+
+namespace clang {
+namespace format {
+
+TokenRole::~TokenRole() {}
+
+void TokenRole::precomputeFormattingInfos(const FormatToken *Token) {}
+
+unsigned CommaSeparatedList::format(LineState &State,
+ ContinuationIndenter *Indenter,
+ bool DryRun) {
+ if (!State.NextToken->Previous || !State.NextToken->Previous->Previous ||
+ Commas.size() <= 2)
+ return 0;
+
+ // Ensure that we start on the opening brace.
+ const FormatToken *LBrace = State.NextToken->Previous->Previous;
+ if (LBrace->isNot(tok::l_brace) ||
+ LBrace->BlockKind == BK_Block ||
+ LBrace->Type == TT_DictLiteral ||
+ LBrace->Next->Type == TT_DesignatedInitializerPeriod)
+ return 0;
+
+ // Calculate the number of code points we have to format this list. As the
+ // first token is already placed, we have to subtract it.
+ unsigned RemainingCodePoints = Style.ColumnLimit - State.Column +
+ State.NextToken->Previous->ColumnWidth;
+
+ // Find the best ColumnFormat, i.e. the best number of columns to use.
+ const ColumnFormat *Format = getColumnFormat(RemainingCodePoints);
+ if (!Format)
+ return 0;
+
+ // Format the entire list.
+ unsigned Penalty = 0;
+ unsigned Column = 0;
+ unsigned Item = 0;
+ while (State.NextToken != LBrace->MatchingParen) {
+ bool NewLine = false;
+ unsigned ExtraSpaces = 0;
+
+ // If the previous token was one of our commas, we are now on the next item.
+ if (Item < Commas.size() && State.NextToken->Previous == Commas[Item]) {
+ if (!State.NextToken->isTrailingComment()) {
+ ExtraSpaces += Format->ColumnSizes[Column] - ItemLengths[Item];
+ ++Column;
+ }
+ ++Item;
+ }
+
+ if (Column == Format->Columns || State.NextToken->MustBreakBefore) {
+ Column = 0;
+ NewLine = true;
+ }
+
+ // Place token using the continuation indenter and store the penalty.
+ Penalty += Indenter->addTokenToState(State, NewLine, DryRun, ExtraSpaces);
+ }
+ return Penalty;
+}
+
+// Returns the lengths in code points between Begin and End (both included),
+// assuming that the entire sequence is put on a single line.
+static unsigned CodePointsBetween(const FormatToken *Begin,
+ const FormatToken *End) {
+ assert(End->TotalLength >= Begin->TotalLength);
+ return End->TotalLength - Begin->TotalLength + Begin->ColumnWidth;
+}
+
+void CommaSeparatedList::precomputeFormattingInfos(const FormatToken *Token) {
+ // FIXME: At some point we might want to do this for other lists, too.
+ if (!Token->MatchingParen || Token->isNot(tok::l_brace))
+ return;
+
+ FormatToken *ItemBegin = Token->Next;
+ SmallVector<bool, 8> MustBreakBeforeItem;
+
+ // The lengths of an item if it is put at the end of the line. This includes
+ // trailing comments which are otherwise ignored for column alignment.
+ SmallVector<unsigned, 8> EndOfLineItemLength;
+
+ bool HasNestedBracedList = false;
+ for (unsigned i = 0, e = Commas.size() + 1; i != e; ++i) {
+ // Skip comments on their own line.
+ while (ItemBegin->HasUnescapedNewline && ItemBegin->isTrailingComment())
+ ItemBegin = ItemBegin->Next;
+
+ MustBreakBeforeItem.push_back(ItemBegin->MustBreakBefore);
+ if (ItemBegin->is(tok::l_brace))
+ HasNestedBracedList = true;
+ const FormatToken *ItemEnd = NULL;
+ if (i == Commas.size()) {
+ ItemEnd = Token->MatchingParen;
+ const FormatToken *NonCommentEnd = ItemEnd->getPreviousNonComment();
+ ItemLengths.push_back(CodePointsBetween(ItemBegin, NonCommentEnd));
+ if (Style.Cpp11BracedListStyle) {
+ // In Cpp11 braced list style, the } and possibly other subsequent
+ // tokens will need to stay on a line with the last element.
+ while (ItemEnd->Next && !ItemEnd->Next->CanBreakBefore)
+ ItemEnd = ItemEnd->Next;
+ } else {
+ // In other braced lists styles, the "}" can be wrapped to the new line.
+ ItemEnd = Token->MatchingParen->Previous;
+ }
+ } else {
+ ItemEnd = Commas[i];
+ // The comma is counted as part of the item when calculating the length.
+ ItemLengths.push_back(CodePointsBetween(ItemBegin, ItemEnd));
+ // Consume trailing comments so the are included in EndOfLineItemLength.
+ if (ItemEnd->Next && !ItemEnd->Next->HasUnescapedNewline &&
+ ItemEnd->Next->isTrailingComment())
+ ItemEnd = ItemEnd->Next;
+ }
+ EndOfLineItemLength.push_back(CodePointsBetween(ItemBegin, ItemEnd));
+ // If there is a trailing comma in the list, the next item will start at the
+ // closing brace. Don't create an extra item for this.
+ if (ItemEnd->getNextNonComment() == Token->MatchingParen)
+ break;
+ ItemBegin = ItemEnd->Next;
+ }
+
+ // We can never place more than ColumnLimit / 3 items in a row (because of the
+ // spaces and the comma).
+ for (unsigned Columns = 1; Columns <= Style.ColumnLimit / 3; ++Columns) {
+ ColumnFormat Format;
+ Format.Columns = Columns;
+ Format.ColumnSizes.resize(Columns);
+ Format.LineCount = 1;
+ bool HasRowWithSufficientColumns = false;
+ unsigned Column = 0;
+ for (unsigned i = 0, e = ItemLengths.size(); i != e; ++i) {
+ assert(i < MustBreakBeforeItem.size());
+ if (MustBreakBeforeItem[i] || Column == Columns) {
+ ++Format.LineCount;
+ Column = 0;
+ }
+ if (Column == Columns - 1)
+ HasRowWithSufficientColumns = true;
+ unsigned length =
+ (Column == Columns - 1) ? EndOfLineItemLength[i] : ItemLengths[i];
+ Format.ColumnSizes[Column] =
+ std::max(Format.ColumnSizes[Column], length);
+ ++Column;
+ }
+ // If all rows are terminated early (e.g. by trailing comments), we don't
+ // need to look further.
+ if (!HasRowWithSufficientColumns)
+ break;
+ Format.TotalWidth = Columns - 1; // Width of the N-1 spaces.
+ for (unsigned i = 0; i < Columns; ++i) {
+ Format.TotalWidth += Format.ColumnSizes[i];
+ }
+
+ // Ignore layouts that are bound to violate the column limit.
+ if (Format.TotalWidth > Style.ColumnLimit)
+ continue;
+
+ // If this braced list has nested braced list, we format it either with one
+ // element per line or with all elements on one line.
+ if (HasNestedBracedList && Columns > 1 && Format.LineCount > 1)
+ continue;
+
+ Formats.push_back(Format);
+ }
+}
+
+const CommaSeparatedList::ColumnFormat *
+CommaSeparatedList::getColumnFormat(unsigned RemainingCharacters) const {
+ const ColumnFormat *BestFormat = NULL;
+ for (SmallVector<ColumnFormat, 4>::const_reverse_iterator
+ I = Formats.rbegin(),
+ E = Formats.rend();
+ I != E; ++I) {
+ if (I->TotalWidth <= RemainingCharacters) {
+ if (BestFormat && I->LineCount > BestFormat->LineCount)
+ break;
+ BestFormat = &*I;
+ }
+ }
+ return BestFormat;
+}
+
+} // namespace format
+} // namespace clang
diff --git a/lib/Format/FormatToken.h b/lib/Format/FormatToken.h
new file mode 100644
index 000000000000..2145ee28dcc0
--- /dev/null
+++ b/lib/Format/FormatToken.h
@@ -0,0 +1,452 @@
+//===--- FormatToken.h - Format C++ code ------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file contains the declaration of the FormatToken, a wrapper
+/// around Token with additional information related to formatting.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_FORMAT_FORMAT_TOKEN_H
+#define LLVM_CLANG_FORMAT_FORMAT_TOKEN_H
+
+#include "clang/Basic/OperatorPrecedence.h"
+#include "clang/Format/Format.h"
+#include "clang/Lex/Lexer.h"
+#include "llvm/ADT/OwningPtr.h"
+
+namespace clang {
+namespace format {
+
+enum TokenType {
+ TT_ArrayInitializerLSquare,
+ TT_ArraySubscriptLSquare,
+ TT_BinaryOperator,
+ TT_BitFieldColon,
+ TT_BlockComment,
+ TT_CastRParen,
+ TT_ConditionalExpr,
+ TT_CtorInitializerColon,
+ TT_CtorInitializerComma,
+ TT_DesignatedInitializerPeriod,
+ TT_DictLiteral,
+ TT_ImplicitStringLiteral,
+ TT_InlineASMColon,
+ TT_InheritanceColon,
+ TT_FunctionTypeLParen,
+ TT_LambdaLSquare,
+ TT_LineComment,
+ TT_ObjCBlockLParen,
+ TT_ObjCDecl,
+ TT_ObjCForIn,
+ TT_ObjCMethodExpr,
+ TT_ObjCMethodSpecifier,
+ TT_ObjCProperty,
+ TT_ObjCSelectorName,
+ TT_OverloadedOperator,
+ TT_OverloadedOperatorLParen,
+ TT_PointerOrReference,
+ TT_PureVirtualSpecifier,
+ TT_RangeBasedForLoopColon,
+ TT_StartOfName,
+ TT_TemplateCloser,
+ TT_TemplateOpener,
+ TT_TrailingReturnArrow,
+ TT_TrailingUnaryOperator,
+ TT_UnaryOperator,
+ TT_Unknown
+};
+
+// Represents what type of block a set of braces open.
+enum BraceBlockKind {
+ BK_Unknown,
+ BK_Block,
+ BK_BracedInit
+};
+
+// The packing kind of a function's parameters.
+enum ParameterPackingKind {
+ PPK_BinPacked,
+ PPK_OnePerLine,
+ PPK_Inconclusive
+};
+
+enum FormatDecision {
+ FD_Unformatted,
+ FD_Continue,
+ FD_Break
+};
+
+class TokenRole;
+class AnnotatedLine;
+
+/// \brief A wrapper around a \c Token storing information about the
+/// whitespace characters preceeding it.
+struct FormatToken {
+ FormatToken()
+ : NewlinesBefore(0), HasUnescapedNewline(false), LastNewlineOffset(0),
+ ColumnWidth(0), LastLineColumnWidth(0), IsMultiline(false),
+ IsFirst(false), MustBreakBefore(false), IsUnterminatedLiteral(false),
+ BlockKind(BK_Unknown), Type(TT_Unknown), SpacesRequiredBefore(0),
+ CanBreakBefore(false), ClosesTemplateDeclaration(false),
+ ParameterCount(0), PackingKind(PPK_Inconclusive), TotalLength(0),
+ UnbreakableTailLength(0), BindingStrength(0), SplitPenalty(0),
+ LongestObjCSelectorName(0), FakeRParens(0),
+ StartsBinaryExpression(false), EndsBinaryExpression(false),
+ LastInChainOfCalls(false), PartOfMultiVariableDeclStmt(false),
+ MatchingParen(NULL), Previous(NULL), Next(NULL),
+ Decision(FD_Unformatted), Finalized(false) {}
+
+ /// \brief The \c Token.
+ Token Tok;
+
+ /// \brief The number of newlines immediately before the \c Token.
+ ///
+ /// This can be used to determine what the user wrote in the original code
+ /// and thereby e.g. leave an empty line between two function definitions.
+ unsigned NewlinesBefore;
+
+ /// \brief Whether there is at least one unescaped newline before the \c
+ /// Token.
+ bool HasUnescapedNewline;
+
+ /// \brief The range of the whitespace immediately preceeding the \c Token.
+ SourceRange WhitespaceRange;
+
+ /// \brief The offset just past the last '\n' in this token's leading
+ /// whitespace (relative to \c WhiteSpaceStart). 0 if there is no '\n'.
+ unsigned LastNewlineOffset;
+
+ /// \brief The width of the non-whitespace parts of the token (or its first
+ /// line for multi-line tokens) in columns.
+ /// We need this to correctly measure number of columns a token spans.
+ unsigned ColumnWidth;
+
+ /// \brief Contains the width in columns of the last line of a multi-line
+ /// token.
+ unsigned LastLineColumnWidth;
+
+ /// \brief Whether the token text contains newlines (escaped or not).
+ bool IsMultiline;
+
+ /// \brief Indicates that this is the first token.
+ bool IsFirst;
+
+ /// \brief Whether there must be a line break before this token.
+ ///
+ /// This happens for example when a preprocessor directive ended directly
+ /// before the token.
+ bool MustBreakBefore;
+
+ /// \brief Returns actual token start location without leading escaped
+ /// newlines and whitespace.
+ ///
+ /// This can be different to Tok.getLocation(), which includes leading escaped
+ /// newlines.
+ SourceLocation getStartOfNonWhitespace() const {
+ return WhitespaceRange.getEnd();
+ }
+
+ /// \brief The raw text of the token.
+ ///
+ /// Contains the raw token text without leading whitespace and without leading
+ /// escaped newlines.
+ StringRef TokenText;
+
+ /// \brief Set to \c true if this token is an unterminated literal.
+ bool IsUnterminatedLiteral;
+
+ /// \brief Contains the kind of block if this token is a brace.
+ BraceBlockKind BlockKind;
+
+ TokenType Type;
+
+ /// \brief The number of spaces that should be inserted before this token.
+ unsigned SpacesRequiredBefore;
+
+ /// \brief \c true if it is allowed to break before this token.
+ bool CanBreakBefore;
+
+ bool ClosesTemplateDeclaration;
+
+ /// \brief Number of parameters, if this is "(", "[" or "<".
+ ///
+ /// This is initialized to 1 as we don't need to distinguish functions with
+ /// 0 parameters from functions with 1 parameter. Thus, we can simply count
+ /// the number of commas.
+ unsigned ParameterCount;
+
+ /// \brief A token can have a special role that can carry extra information
+ /// about the token's formatting.
+ llvm::OwningPtr<TokenRole> Role;
+
+ /// \brief If this is an opening parenthesis, how are the parameters packed?
+ ParameterPackingKind PackingKind;
+
+ /// \brief The total length of the unwrapped line up to and including this
+ /// token.
+ unsigned TotalLength;
+
+ /// \brief The original 0-based column of this token, including expanded tabs.
+ /// The configured TabWidth is used as tab width.
+ unsigned OriginalColumn;
+
+ /// \brief The length of following tokens until the next natural split point,
+ /// or the next token that can be broken.
+ unsigned UnbreakableTailLength;
+
+ // FIXME: Come up with a 'cleaner' concept.
+ /// \brief The binding strength of a token. This is a combined value of
+ /// operator precedence, parenthesis nesting, etc.
+ unsigned BindingStrength;
+
+ /// \brief Penalty for inserting a line break before this token.
+ unsigned SplitPenalty;
+
+ /// \brief If this is the first ObjC selector name in an ObjC method
+ /// definition or call, this contains the length of the longest name.
+ unsigned LongestObjCSelectorName;
+
+ /// \brief Stores the number of required fake parentheses and the
+ /// corresponding operator precedence.
+ ///
+ /// If multiple fake parentheses start at a token, this vector stores them in
+ /// reverse order, i.e. inner fake parenthesis first.
+ SmallVector<prec::Level, 4> FakeLParens;
+ /// \brief Insert this many fake ) after this token for correct indentation.
+ unsigned FakeRParens;
+
+ /// \brief \c true if this token starts a binary expression, i.e. has at least
+ /// one fake l_paren with a precedence greater than prec::Unknown.
+ bool StartsBinaryExpression;
+ /// \brief \c true if this token ends a binary expression.
+ bool EndsBinaryExpression;
+
+ /// \brief Is this the last "." or "->" in a builder-type call?
+ bool LastInChainOfCalls;
+
+ /// \brief Is this token part of a \c DeclStmt defining multiple variables?
+ ///
+ /// Only set if \c Type == \c TT_StartOfName.
+ bool PartOfMultiVariableDeclStmt;
+
+ bool is(tok::TokenKind Kind) const { return Tok.is(Kind); }
+
+ bool isOneOf(tok::TokenKind K1, tok::TokenKind K2) const {
+ return is(K1) || is(K2);
+ }
+
+ bool isOneOf(tok::TokenKind K1, tok::TokenKind K2, tok::TokenKind K3) const {
+ return is(K1) || is(K2) || is(K3);
+ }
+
+ bool isOneOf(tok::TokenKind K1, tok::TokenKind K2, tok::TokenKind K3,
+ tok::TokenKind K4, tok::TokenKind K5 = tok::NUM_TOKENS,
+ tok::TokenKind K6 = tok::NUM_TOKENS,
+ tok::TokenKind K7 = tok::NUM_TOKENS,
+ tok::TokenKind K8 = tok::NUM_TOKENS,
+ tok::TokenKind K9 = tok::NUM_TOKENS,
+ tok::TokenKind K10 = tok::NUM_TOKENS,
+ tok::TokenKind K11 = tok::NUM_TOKENS,
+ tok::TokenKind K12 = tok::NUM_TOKENS) const {
+ return is(K1) || is(K2) || is(K3) || is(K4) || is(K5) || is(K6) || is(K7) ||
+ is(K8) || is(K9) || is(K10) || is(K11) || is(K12);
+ }
+
+ bool isNot(tok::TokenKind Kind) const { return Tok.isNot(Kind); }
+
+ bool isObjCAtKeyword(tok::ObjCKeywordKind Kind) const {
+ return Tok.isObjCAtKeyword(Kind);
+ }
+
+ bool isAccessSpecifier(bool ColonRequired = true) const {
+ return isOneOf(tok::kw_public, tok::kw_protected, tok::kw_private) &&
+ (!ColonRequired || (Next && Next->is(tok::colon)));
+ }
+
+ bool isObjCAccessSpecifier() const {
+ return is(tok::at) && Next && (Next->isObjCAtKeyword(tok::objc_public) ||
+ Next->isObjCAtKeyword(tok::objc_protected) ||
+ Next->isObjCAtKeyword(tok::objc_package) ||
+ Next->isObjCAtKeyword(tok::objc_private));
+ }
+
+ /// \brief Returns whether \p Tok is ([{ or a template opening <.
+ bool opensScope() const {
+ return isOneOf(tok::l_paren, tok::l_brace, tok::l_square) ||
+ Type == TT_TemplateOpener;
+ }
+ /// \brief Returns whether \p Tok is )]} or a template closing >.
+ bool closesScope() const {
+ return isOneOf(tok::r_paren, tok::r_brace, tok::r_square) ||
+ Type == TT_TemplateCloser;
+ }
+
+ /// \brief Returns \c true if this is a "." or "->" accessing a member.
+ bool isMemberAccess() const {
+ return isOneOf(tok::arrow, tok::period) &&
+ Type != TT_DesignatedInitializerPeriod;
+ }
+
+ bool isUnaryOperator() const {
+ switch (Tok.getKind()) {
+ case tok::plus:
+ case tok::plusplus:
+ case tok::minus:
+ case tok::minusminus:
+ case tok::exclaim:
+ case tok::tilde:
+ case tok::kw_sizeof:
+ case tok::kw_alignof:
+ return true;
+ default:
+ return false;
+ }
+ }
+
+ bool isBinaryOperator() const {
+ // Comma is a binary operator, but does not behave as such wrt. formatting.
+ return getPrecedence() > prec::Comma;
+ }
+
+ bool isTrailingComment() const {
+ return is(tok::comment) && (!Next || Next->NewlinesBefore > 0);
+ }
+
+ prec::Level getPrecedence() const {
+ return getBinOpPrecedence(Tok.getKind(), true, true);
+ }
+
+ /// \brief Returns the previous token ignoring comments.
+ FormatToken *getPreviousNonComment() const {
+ FormatToken *Tok = Previous;
+ while (Tok != NULL && Tok->is(tok::comment))
+ Tok = Tok->Previous;
+ return Tok;
+ }
+
+ /// \brief Returns the next token ignoring comments.
+ const FormatToken *getNextNonComment() const {
+ const FormatToken *Tok = Next;
+ while (Tok != NULL && Tok->is(tok::comment))
+ Tok = Tok->Next;
+ return Tok;
+ }
+
+ /// \brief Returns \c true if this tokens starts a block-type list, i.e. a
+ /// list that should be indented with a block indent.
+ bool opensBlockTypeList(const FormatStyle &Style) const {
+ return Type == TT_ArrayInitializerLSquare ||
+ (is(tok::l_brace) &&
+ (BlockKind == BK_Block || Type == TT_DictLiteral ||
+ !Style.Cpp11BracedListStyle));
+ }
+
+ /// \brief Same as opensBlockTypeList, but for the closing token.
+ bool closesBlockTypeList(const FormatStyle &Style) const {
+ return MatchingParen && MatchingParen->opensBlockTypeList(Style);
+ }
+
+ FormatToken *MatchingParen;
+
+ FormatToken *Previous;
+ FormatToken *Next;
+
+ SmallVector<AnnotatedLine *, 1> Children;
+
+ /// \brief Stores the formatting decision for the token once it was made.
+ FormatDecision Decision;
+
+ /// \brief If \c true, this token has been fully formatted (indented and
+ /// potentially re-formatted inside), and we do not allow further formatting
+ /// changes.
+ bool Finalized;
+
+private:
+ // Disallow copying.
+ FormatToken(const FormatToken &) LLVM_DELETED_FUNCTION;
+ void operator=(const FormatToken &) LLVM_DELETED_FUNCTION;
+};
+
+class ContinuationIndenter;
+struct LineState;
+
+class TokenRole {
+public:
+ TokenRole(const FormatStyle &Style) : Style(Style) {}
+ virtual ~TokenRole();
+
+ /// \brief After the \c TokenAnnotator has finished annotating all the tokens,
+ /// this function precomputes required information for formatting.
+ virtual void precomputeFormattingInfos(const FormatToken *Token);
+
+ /// \brief Apply the special formatting that the given role demands.
+ ///
+ /// Continues formatting from \p State leaving indentation to \p Indenter and
+ /// returns the total penalty that this formatting incurs.
+ virtual unsigned format(LineState &State, ContinuationIndenter *Indenter,
+ bool DryRun) {
+ return 0;
+ }
+
+ /// \brief Notifies the \c Role that a comma was found.
+ virtual void CommaFound(const FormatToken *Token) {}
+
+protected:
+ const FormatStyle &Style;
+};
+
+class CommaSeparatedList : public TokenRole {
+public:
+ CommaSeparatedList(const FormatStyle &Style) : TokenRole(Style) {}
+
+ virtual void precomputeFormattingInfos(const FormatToken *Token);
+
+ virtual unsigned format(LineState &State, ContinuationIndenter *Indenter,
+ bool DryRun);
+
+ /// \brief Adds \p Token as the next comma to the \c CommaSeparated list.
+ virtual void CommaFound(const FormatToken *Token) { Commas.push_back(Token); }
+
+private:
+ /// \brief A struct that holds information on how to format a given list with
+ /// a specific number of columns.
+ struct ColumnFormat {
+ /// \brief The number of columns to use.
+ unsigned Columns;
+
+ /// \brief The total width in characters.
+ unsigned TotalWidth;
+
+ /// \brief The number of lines required for this format.
+ unsigned LineCount;
+
+ /// \brief The size of each column in characters.
+ SmallVector<unsigned, 8> ColumnSizes;
+ };
+
+ /// \brief Calculate which \c ColumnFormat fits best into
+ /// \p RemainingCharacters.
+ const ColumnFormat *getColumnFormat(unsigned RemainingCharacters) const;
+
+ /// \brief The ordered \c FormatTokens making up the commas of this list.
+ SmallVector<const FormatToken *, 8> Commas;
+
+ /// \brief The length of each of the list's items in characters including the
+ /// trailing comma.
+ SmallVector<unsigned, 8> ItemLengths;
+
+ /// \brief Precomputed formats that can be used for this list.
+ SmallVector<ColumnFormat, 4> Formats;
+};
+
+} // namespace format
+} // namespace clang
+
+#endif // LLVM_CLANG_FORMAT_FORMAT_TOKEN_H
diff --git a/lib/Format/TokenAnnotator.cpp b/lib/Format/TokenAnnotator.cpp
index 17abb01d181d..074e1d78454b 100644
--- a/lib/Format/TokenAnnotator.cpp
+++ b/lib/Format/TokenAnnotator.cpp
@@ -15,61 +15,12 @@
#include "TokenAnnotator.h"
#include "clang/Basic/SourceManager.h"
-#include "clang/Lex/Lexer.h"
#include "llvm/Support/Debug.h"
namespace clang {
namespace format {
-bool AnnotatedToken::isUnaryOperator() const {
- switch (FormatTok.Tok.getKind()) {
- case tok::plus:
- case tok::plusplus:
- case tok::minus:
- case tok::minusminus:
- case tok::exclaim:
- case tok::tilde:
- case tok::kw_sizeof:
- case tok::kw_alignof:
- return true;
- default:
- return false;
- }
-}
-
-bool AnnotatedToken::isBinaryOperator() const {
- // Comma is a binary operator, but does not behave as such wrt. formatting.
- return getPrecedence(*this) > prec::Comma;
-}
-
-bool AnnotatedToken::isTrailingComment() const {
- return is(tok::comment) &&
- (Children.empty() || Children[0].FormatTok.NewlinesBefore > 0);
-}
-
-AnnotatedToken *AnnotatedToken::getPreviousNoneComment() const {
- AnnotatedToken *Tok = Parent;
- while (Tok != NULL && Tok->is(tok::comment))
- Tok = Tok->Parent;
- return Tok;
-}
-
-const AnnotatedToken *AnnotatedToken::getNextNoneComment() const {
- const AnnotatedToken *Tok = Children.empty() ? NULL : &Children[0];
- while (Tok != NULL && Tok->is(tok::comment))
- Tok = Tok->Children.empty() ? NULL : &Tok->Children[0];
- return Tok;
-}
-
-bool AnnotatedToken::closesScope() const {
- return isOneOf(tok::r_paren, tok::r_brace, tok::r_square) ||
- Type == TT_TemplateCloser;
-}
-
-bool AnnotatedToken::opensScope() const {
- return isOneOf(tok::l_paren, tok::l_brace, tok::l_square) ||
- Type == TT_TemplateOpener;
-}
+namespace {
/// \brief A parser that gathers additional information about tokens.
///
@@ -78,11 +29,11 @@ bool AnnotatedToken::opensScope() const {
/// into template parameter lists.
class AnnotatingParser {
public:
- AnnotatingParser(SourceManager &SourceMgr, Lexer &Lex, AnnotatedLine &Line,
+ AnnotatingParser(const FormatStyle &Style, AnnotatedLine &Line,
IdentifierInfo &Ident_in)
- : SourceMgr(SourceMgr), Lex(Lex), Line(Line), CurrentToken(&Line.First),
- KeywordVirtualFound(false), NameFound(false), Ident_in(Ident_in) {
- Contexts.push_back(Context(tok::unknown, 1, /*IsExpression=*/ false));
+ : Style(Style), Line(Line), CurrentToken(Line.First),
+ KeywordVirtualFound(false), AutoFound(false), Ident_in(Ident_in) {
+ Contexts.push_back(Context(tok::unknown, 1, /*IsExpression=*/false));
}
private:
@@ -90,7 +41,7 @@ private:
if (CurrentToken == NULL)
return false;
ScopedContextCreator ContextCreator(*this, tok::less, 10);
- AnnotatedToken *Left = CurrentToken->Parent;
+ FormatToken *Left = CurrentToken->Previous;
Contexts.back().IsExpression = false;
while (CurrentToken != NULL) {
if (CurrentToken->is(tok::greater)) {
@@ -101,8 +52,18 @@ private:
return true;
}
if (CurrentToken->isOneOf(tok::r_paren, tok::r_square, tok::r_brace,
- tok::pipepipe, tok::ampamp, tok::question,
- tok::colon))
+ tok::question, tok::colon))
+ return false;
+ // If a && or || is found and interpreted as a binary operator, this set
+ // of angles is likely part of something like "a < b && c > d". If the
+ // angles are inside an expression, the ||/&& might also be a binary
+ // operator that was misinterpreted because we are parsing template
+ // parameters.
+ // FIXME: This is getting out of hand, write a decent parser.
+ if (CurrentToken->Previous->isOneOf(tok::pipepipe, tok::ampamp) &&
+ (CurrentToken->Previous->Type == TT_BinaryOperator ||
+ Contexts[Contexts.size() - 2].IsExpression) &&
+ Line.First->isNot(tok::kw_template))
return false;
updateParameterCount(Left, CurrentToken);
if (!consumeToken())
@@ -121,42 +82,66 @@ private:
Contexts.size() == 2 && Contexts[0].ColonIsForRangeExpr;
bool StartsObjCMethodExpr = false;
- AnnotatedToken *Left = CurrentToken->Parent;
+ FormatToken *Left = CurrentToken->Previous;
if (CurrentToken->is(tok::caret)) {
// ^( starts a block.
Left->Type = TT_ObjCBlockLParen;
- } else if (AnnotatedToken *MaybeSel = Left->Parent) {
+ } else if (FormatToken *MaybeSel = Left->Previous) {
// @selector( starts a selector.
- if (MaybeSel->isObjCAtKeyword(tok::objc_selector) && MaybeSel->Parent &&
- MaybeSel->Parent->is(tok::at)) {
+ if (MaybeSel->isObjCAtKeyword(tok::objc_selector) && MaybeSel->Previous &&
+ MaybeSel->Previous->is(tok::at)) {
StartsObjCMethodExpr = true;
}
}
+ if (Left->Previous && Left->Previous->isOneOf(tok::kw_static_assert,
+ tok::kw_if, tok::kw_while)) {
+ // static_assert, if and while usually contain expressions.
+ Contexts.back().IsExpression = true;
+ } else if (Left->Previous && Left->Previous->is(tok::r_square) &&
+ Left->Previous->MatchingParen &&
+ Left->Previous->MatchingParen->Type == TT_LambdaLSquare) {
+ // This is a parameter list of a lambda expression.
+ Contexts.back().IsExpression = false;
+ }
+
if (StartsObjCMethodExpr) {
Contexts.back().ColonIsObjCMethodExpr = true;
Left->Type = TT_ObjCMethodExpr;
}
+ bool MightBeFunctionType = CurrentToken->is(tok::star);
+ bool HasMultipleLines = false;
+ bool HasMultipleParametersOnALine = false;
while (CurrentToken != NULL) {
// LookForDecls is set when "if (" has been seen. Check for
// 'identifier' '*' 'identifier' followed by not '=' -- this
// '*' has to be a binary operator but determineStarAmpUsage() will
// categorize it as an unary operator, so set the right type here.
- if (LookForDecls && !CurrentToken->Children.empty()) {
- AnnotatedToken &Prev = *CurrentToken->Parent;
- AnnotatedToken &Next = CurrentToken->Children[0];
- if (Prev.Parent->is(tok::identifier) &&
- Prev.isOneOf(tok::star, tok::amp, tok::ampamp) &&
- CurrentToken->is(tok::identifier) && Next.isNot(tok::equal)) {
- Prev.Type = TT_BinaryOperator;
- LookForDecls = false;
+ if (LookForDecls && CurrentToken->Next) {
+ FormatToken *Prev = CurrentToken->getPreviousNonComment();
+ if (Prev) {
+ FormatToken *PrevPrev = Prev->getPreviousNonComment();
+ FormatToken *Next = CurrentToken->Next;
+ if (PrevPrev && PrevPrev->is(tok::identifier) &&
+ Prev->isOneOf(tok::star, tok::amp, tok::ampamp) &&
+ CurrentToken->is(tok::identifier) && Next->isNot(tok::equal)) {
+ Prev->Type = TT_BinaryOperator;
+ LookForDecls = false;
+ }
}
}
+ if (CurrentToken->Previous->Type == TT_PointerOrReference &&
+ CurrentToken->Previous->Previous->isOneOf(tok::l_paren,
+ tok::coloncolon))
+ MightBeFunctionType = true;
if (CurrentToken->is(tok::r_paren)) {
- if (CurrentToken->Parent->closesScope())
- CurrentToken->Parent->MatchingParen->NoMoreTokensOnLevel = true;
+ if (MightBeFunctionType && CurrentToken->Next &&
+ (CurrentToken->Next->is(tok::l_paren) ||
+ (CurrentToken->Next->is(tok::l_square) &&
+ !Contexts.back().IsExpression)))
+ Left->Type = TT_FunctionTypeLParen;
Left->MatchingParen = CurrentToken;
CurrentToken->MatchingParen = Left;
@@ -168,14 +153,27 @@ private:
}
}
+ if (!HasMultipleLines)
+ Left->PackingKind = PPK_Inconclusive;
+ else if (HasMultipleParametersOnALine)
+ Left->PackingKind = PPK_BinPacked;
+ else
+ Left->PackingKind = PPK_OnePerLine;
+
next();
return true;
}
if (CurrentToken->isOneOf(tok::r_square, tok::r_brace))
return false;
updateParameterCount(Left, CurrentToken);
+ if (CurrentToken->is(tok::comma) && CurrentToken->Next &&
+ !CurrentToken->Next->HasUnescapedNewline &&
+ !CurrentToken->Next->isTrailingComment())
+ HasMultipleParametersOnALine = true;
if (!consumeToken())
return false;
+ if (CurrentToken && CurrentToken->HasUnescapedNewline)
+ HasMultipleLines = true;
}
return false;
}
@@ -184,34 +182,35 @@ private:
if (!CurrentToken)
return false;
- // A '[' could be an index subscript (after an indentifier or after
+ // A '[' could be an index subscript (after an identifier or after
// ')' or ']'), it could be the start of an Objective-C method
// expression, or it could the the start of an Objective-C array literal.
- AnnotatedToken *Left = CurrentToken->Parent;
- AnnotatedToken *Parent = Left->getPreviousNoneComment();
+ FormatToken *Left = CurrentToken->Previous;
+ FormatToken *Parent = Left->getPreviousNonComment();
bool StartsObjCMethodExpr =
- Contexts.back().CanBeExpression &&
+ Contexts.back().CanBeExpression && Left->Type != TT_LambdaLSquare &&
(!Parent || Parent->isOneOf(tok::colon, tok::l_square, tok::l_paren,
tok::kw_return, tok::kw_throw) ||
Parent->isUnaryOperator() || Parent->Type == TT_ObjCForIn ||
Parent->Type == TT_CastRParen ||
- getBinOpPrecedence(Parent->FormatTok.Tok.getKind(), true, true) >
- prec::Unknown);
+ getBinOpPrecedence(Parent->Tok.getKind(), true, true) > prec::Unknown);
ScopedContextCreator ContextCreator(*this, tok::l_square, 10);
Contexts.back().IsExpression = true;
- bool StartsObjCArrayLiteral = Parent && Parent->is(tok::at);
+ bool ColonFound = false;
if (StartsObjCMethodExpr) {
Contexts.back().ColonIsObjCMethodExpr = true;
Left->Type = TT_ObjCMethodExpr;
- } else if (StartsObjCArrayLiteral) {
- Left->Type = TT_ObjCArrayLiteral;
+ } else if (Parent && Parent->is(tok::at)) {
+ Left->Type = TT_ArrayInitializerLSquare;
+ } else if (Left->Type == TT_Unknown) {
+ Left->Type = TT_ArraySubscriptLSquare;
}
while (CurrentToken != NULL) {
if (CurrentToken->is(tok::r_square)) {
- if (!CurrentToken->Children.empty() &&
- CurrentToken->Children[0].is(tok::l_paren)) {
+ if (CurrentToken->Next && CurrentToken->Next->is(tok::l_paren) &&
+ Left->Type == TT_ObjCMethodExpr) {
// An ObjC method call is rarely followed by an open parenthesis.
// FIXME: Do we incorrectly label ":" with this?
StartsObjCMethodExpr = false;
@@ -224,8 +223,6 @@ private:
// binary operator.
if (Parent != NULL && Parent->Type == TT_PointerOrReference)
Parent->Type = TT_BinaryOperator;
- } else if (StartsObjCArrayLiteral) {
- CurrentToken->Type = TT_ObjCArrayLiteral;
}
Left->MatchingParen = CurrentToken;
CurrentToken->MatchingParen = Left;
@@ -237,6 +234,12 @@ private:
}
if (CurrentToken->isOneOf(tok::r_paren, tok::r_brace))
return false;
+ if (CurrentToken->is(tok::colon))
+ ColonFound = true;
+ if (CurrentToken->is(tok::comma) &&
+ (Left->Type == TT_ArraySubscriptLSquare ||
+ (Left->Type == TT_ObjCMethodExpr && !ColonFound)))
+ Left->Type = TT_ArrayInitializerLSquare;
updateParameterCount(Left, CurrentToken);
if (!consumeToken())
return false;
@@ -246,8 +249,10 @@ private:
bool parseBrace() {
if (CurrentToken != NULL) {
+ FormatToken *Left = CurrentToken->Previous;
ScopedContextCreator ContextCreator(*this, tok::l_brace, 1);
- AnnotatedToken *Left = CurrentToken->Parent;
+ Contexts.back().ColonIsDictLiteral = true;
+
while (CurrentToken != NULL) {
if (CurrentToken->is(tok::r_brace)) {
Left->MatchingParen = CurrentToken;
@@ -258,6 +263,8 @@ private:
if (CurrentToken->isOneOf(tok::r_paren, tok::r_square))
return false;
updateParameterCount(Left, CurrentToken);
+ if (CurrentToken->is(tok::colon))
+ Left->Type = TT_DictLiteral;
if (!consumeToken())
return false;
}
@@ -267,11 +274,15 @@ private:
return true;
}
- void updateParameterCount(AnnotatedToken *Left, AnnotatedToken *Current) {
- if (Current->is(tok::comma))
+ void updateParameterCount(FormatToken *Left, FormatToken *Current) {
+ if (Current->is(tok::comma)) {
++Left->ParameterCount;
- else if (Left->ParameterCount == 0 && Current->isNot(tok::comment))
+ if (!Left->Role)
+ Left->Role.reset(new CommaSeparatedList(Style));
+ Left->Role->CommaFound(Current);
+ } else if (Left->ParameterCount == 0 && Current->isNot(tok::comment)) {
Left->ParameterCount = 1;
+ }
}
bool parseConditional() {
@@ -294,40 +305,45 @@ private:
if (!parseAngle())
return false;
if (CurrentToken != NULL)
- CurrentToken->Parent->ClosesTemplateDeclaration = true;
+ CurrentToken->Previous->ClosesTemplateDeclaration = true;
return true;
}
return false;
}
bool consumeToken() {
- AnnotatedToken *Tok = CurrentToken;
+ FormatToken *Tok = CurrentToken;
next();
- switch (Tok->FormatTok.Tok.getKind()) {
+ switch (Tok->Tok.getKind()) {
case tok::plus:
case tok::minus:
- if (Tok->Parent == NULL && Line.MustBeDeclaration)
+ if (Tok->Previous == NULL && Line.MustBeDeclaration)
Tok->Type = TT_ObjCMethodSpecifier;
break;
case tok::colon:
- if (Tok->Parent == NULL)
+ if (Tok->Previous == NULL)
return false;
// Colons from ?: are handled in parseConditional().
- if (Tok->Parent->is(tok::r_paren) && Contexts.size() == 1) {
+ if (Tok->Previous->is(tok::r_paren) && Contexts.size() == 1) {
Tok->Type = TT_CtorInitializerColon;
+ } else if (Contexts.back().ColonIsDictLiteral) {
+ Tok->Type = TT_DictLiteral;
} else if (Contexts.back().ColonIsObjCMethodExpr ||
- Line.First.Type == TT_ObjCMethodSpecifier) {
+ Line.First->Type == TT_ObjCMethodSpecifier) {
Tok->Type = TT_ObjCMethodExpr;
- Tok->Parent->Type = TT_ObjCSelectorName;
- if (Tok->Parent->FormatTok.TokenLength >
- Contexts.back().LongestObjCSelectorName)
- Contexts.back().LongestObjCSelectorName =
- Tok->Parent->FormatTok.TokenLength;
+ Tok->Previous->Type = TT_ObjCSelectorName;
+ if (Tok->Previous->ColumnWidth >
+ Contexts.back().LongestObjCSelectorName) {
+ Contexts.back().LongestObjCSelectorName = Tok->Previous->ColumnWidth;
+ }
if (Contexts.back().FirstObjCSelectorName == NULL)
- Contexts.back().FirstObjCSelectorName = Tok->Parent;
+ Contexts.back().FirstObjCSelectorName = Tok->Previous;
} else if (Contexts.back().ColonIsForRangeExpr) {
Tok->Type = TT_RangeBasedForLoopColon;
- } else if (Contexts.size() == 1) {
+ } else if (CurrentToken != NULL &&
+ CurrentToken->is(tok::numeric_constant)) {
+ Tok->Type = TT_BitFieldColon;
+ } else if (Contexts.size() == 1 && Line.First->isNot(tok::kw_enum)) {
Tok->Type = TT_InheritanceColon;
} else if (Contexts.back().ContextKind == tok::l_paren) {
Tok->Type = TT_InlineASMColon;
@@ -337,7 +353,7 @@ private:
case tok::kw_while:
if (CurrentToken != NULL && CurrentToken->is(tok::l_paren)) {
next();
- if (!parseParens(/*LookForDecls=*/ true))
+ if (!parseParens(/*LookForDecls=*/true))
return false;
}
break;
@@ -350,7 +366,8 @@ private:
case tok::l_paren:
if (!parseParens())
return false;
- if (Line.MustBeDeclaration && NameFound && !Contexts.back().IsExpression)
+ if (Line.MustBeDeclaration && Contexts.size() == 1 &&
+ !Contexts.back().IsExpression)
Line.MightBeFunctionDecl = true;
break;
case tok::l_square:
@@ -362,7 +379,7 @@ private:
return false;
break;
case tok::less:
- if (parseAngle())
+ if (Tok->Previous && !Tok->Previous->Tok.isLiteral() && parseAngle())
Tok->Type = TT_TemplateOpener;
else {
Tok->Type = TT_BinaryOperator;
@@ -375,20 +392,26 @@ private:
return false;
case tok::r_brace:
// Lines can start with '}'.
- if (Tok->Parent != NULL)
+ if (Tok->Previous != NULL)
return false;
break;
case tok::greater:
Tok->Type = TT_BinaryOperator;
break;
case tok::kw_operator:
- while (CurrentToken && CurrentToken->isNot(tok::l_paren)) {
+ while (CurrentToken &&
+ !CurrentToken->isOneOf(tok::l_paren, tok::semi, tok::r_paren)) {
if (CurrentToken->isOneOf(tok::star, tok::amp))
CurrentToken->Type = TT_PointerOrReference;
consumeToken();
+ if (CurrentToken && CurrentToken->Previous->Type == TT_BinaryOperator)
+ CurrentToken->Previous->Type = TT_OverloadedOperator;
}
- if (CurrentToken)
+ if (CurrentToken) {
CurrentToken->Type = TT_OverloadedOperatorLParen;
+ if (CurrentToken->Previous->Type == TT_BinaryOperator)
+ CurrentToken->Previous->Type = TT_OverloadedOperator;
+ }
break;
case tok::question:
parseConditional();
@@ -397,13 +420,15 @@ private:
parseTemplateDeclaration();
break;
case tok::identifier:
- if (Line.First.is(tok::kw_for) &&
- Tok->FormatTok.Tok.getIdentifierInfo() == &Ident_in)
+ if (Line.First->is(tok::kw_for) &&
+ Tok->Tok.getIdentifierInfo() == &Ident_in)
Tok->Type = TT_ObjCForIn;
break;
case tok::comma:
if (Contexts.back().FirstStartOfName)
Contexts.back().FirstStartOfName->PartOfMultiVariableDeclStmt = true;
+ if (Contexts.back().InCtorInitializer)
+ Tok->Type = TT_CtorInitializerComma;
break;
default:
break;
@@ -416,8 +441,7 @@ private:
if (CurrentToken != NULL && CurrentToken->is(tok::less)) {
next();
while (CurrentToken != NULL) {
- if (CurrentToken->isNot(tok::comment) ||
- !CurrentToken->Children.empty())
+ if (CurrentToken->isNot(tok::comment) || CurrentToken->Next)
CurrentToken->Type = TT_ImplicitStringLiteral;
next();
}
@@ -447,11 +471,15 @@ private:
next();
if (CurrentToken == NULL)
return;
+ if (CurrentToken->Tok.is(tok::numeric_constant)) {
+ CurrentToken->SpacesRequiredBefore = 1;
+ return;
+ }
// Hashes in the middle of a line can lead to any strange token
// sequence.
- if (CurrentToken->FormatTok.Tok.getIdentifierInfo() == NULL)
+ if (CurrentToken->Tok.getIdentifierInfo() == NULL)
return;
- switch (CurrentToken->FormatTok.Tok.getIdentifierInfo()->getPPKeywordID()) {
+ switch (CurrentToken->Tok.getIdentifierInfo()->getPPKeywordID()) {
case tok::pp_include:
case tok::pp_import:
parseIncludeDirective();
@@ -473,9 +501,6 @@ private:
public:
LineType parseLine() {
- int PeriodsAndArrows = 0;
- AnnotatedToken *LastPeriodOrArrow = NULL;
- bool CanBeBuilderTypeStmt = true;
if (CurrentToken->is(tok::hash)) {
parsePreprocessorDirective();
return LT_PreprocessorDirective;
@@ -483,27 +508,13 @@ public:
while (CurrentToken != NULL) {
if (CurrentToken->is(tok::kw_virtual))
KeywordVirtualFound = true;
- if (CurrentToken->isOneOf(tok::period, tok::arrow)) {
- ++PeriodsAndArrows;
- LastPeriodOrArrow = CurrentToken;
- }
- AnnotatedToken *TheToken = CurrentToken;
if (!consumeToken())
return LT_Invalid;
- if (getPrecedence(*TheToken) > prec::Assignment &&
- TheToken->Type == TT_BinaryOperator)
- CanBeBuilderTypeStmt = false;
}
if (KeywordVirtualFound)
return LT_VirtualFunctionDecl;
- // Assume a builder-type call if there are 2 or more "." and "->".
- if (PeriodsAndArrows >= 2 && CanBeBuilderTypeStmt) {
- LastPeriodOrArrow->LastInChainOfCalls = true;
- return LT_BuilderTypeCall;
- }
-
- if (Line.First.Type == TT_ObjCMethodSpecifier) {
+ if (Line.First->Type == TT_ObjCMethodSpecifier) {
if (Contexts.back().FirstObjCSelectorName != NULL)
Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName =
Contexts.back().LongestObjCSelectorName;
@@ -520,15 +531,20 @@ private:
CurrentToken->BindingStrength = Contexts.back().BindingStrength;
}
- if (CurrentToken != NULL && !CurrentToken->Children.empty())
- CurrentToken = &CurrentToken->Children[0];
- else
- CurrentToken = NULL;
-
- // Reset token type in case we have already looked at it and then recovered
- // from an error (e.g. failure to find the matching >).
if (CurrentToken != NULL)
- CurrentToken->Type = TT_Unknown;
+ CurrentToken = CurrentToken->Next;
+
+ if (CurrentToken != NULL) {
+ // Reset token type in case we have already looked at it and then
+ // recovered from an error (e.g. failure to find the matching >).
+ if (CurrentToken->Type != TT_LambdaLSquare &&
+ CurrentToken->Type != TT_ImplicitStringLiteral)
+ CurrentToken->Type = TT_Unknown;
+ if (CurrentToken->Role)
+ CurrentToken->Role.reset(NULL);
+ CurrentToken->FakeLParens.clear();
+ CurrentToken->FakeRParens = 0;
+ }
}
/// \brief A struct to hold information valid in a specific context, e.g.
@@ -538,19 +554,22 @@ private:
bool IsExpression)
: ContextKind(ContextKind), BindingStrength(BindingStrength),
LongestObjCSelectorName(0), ColonIsForRangeExpr(false),
- ColonIsObjCMethodExpr(false), FirstObjCSelectorName(NULL),
- FirstStartOfName(NULL), IsExpression(IsExpression),
- CanBeExpression(true) {}
+ ColonIsDictLiteral(false), ColonIsObjCMethodExpr(false),
+ FirstObjCSelectorName(NULL), FirstStartOfName(NULL),
+ IsExpression(IsExpression), CanBeExpression(true),
+ InCtorInitializer(false) {}
tok::TokenKind ContextKind;
unsigned BindingStrength;
unsigned LongestObjCSelectorName;
bool ColonIsForRangeExpr;
+ bool ColonIsDictLiteral;
bool ColonIsObjCMethodExpr;
- AnnotatedToken *FirstObjCSelectorName;
- AnnotatedToken *FirstStartOfName;
+ FormatToken *FirstObjCSelectorName;
+ FormatToken *FirstStartOfName;
bool IsExpression;
bool CanBeExpression;
+ bool InCtorInitializer;
};
/// \brief Puts a new \c Context onto the stack \c Contexts for the lifetime
@@ -561,21 +580,22 @@ private:
ScopedContextCreator(AnnotatingParser &P, tok::TokenKind ContextKind,
unsigned Increase)
: P(P) {
- P.Contexts.push_back(
- Context(ContextKind, P.Contexts.back().BindingStrength + Increase,
- P.Contexts.back().IsExpression));
+ P.Contexts.push_back(Context(ContextKind,
+ P.Contexts.back().BindingStrength + Increase,
+ P.Contexts.back().IsExpression));
}
~ScopedContextCreator() { P.Contexts.pop_back(); }
};
- void determineTokenType(AnnotatedToken &Current) {
- if (getPrecedence(Current) == prec::Assignment &&
- (!Current.Parent || Current.Parent->isNot(tok::kw_operator))) {
+ void determineTokenType(FormatToken &Current) {
+ if (Current.getPrecedence() == prec::Assignment &&
+ !Line.First->isOneOf(tok::kw_template, tok::kw_using) &&
+ (!Current.Previous || Current.Previous->isNot(tok::kw_operator))) {
Contexts.back().IsExpression = true;
- for (AnnotatedToken *Previous = Current.Parent;
- Previous && Previous->isNot(tok::comma);
- Previous = Previous->Parent) {
+ for (FormatToken *Previous = Current.Previous;
+ Previous && !Previous->isOneOf(tok::comma, tok::semi);
+ Previous = Previous->Previous) {
if (Previous->is(tok::r_square))
Previous = Previous->MatchingParen;
if (Previous->Type == TT_BinaryOperator &&
@@ -585,69 +605,93 @@ private:
}
} else if (Current.isOneOf(tok::kw_return, tok::kw_throw) ||
(Current.is(tok::l_paren) && !Line.MustBeDeclaration &&
- (!Current.Parent || Current.Parent->isNot(tok::kw_for)))) {
+ !Line.InPPDirective &&
+ (!Current.Previous ||
+ !Current.Previous->isOneOf(tok::kw_for, tok::kw_catch)))) {
Contexts.back().IsExpression = true;
} else if (Current.isOneOf(tok::r_paren, tok::greater, tok::comma)) {
- for (AnnotatedToken *Previous = Current.Parent;
+ for (FormatToken *Previous = Current.Previous;
Previous && Previous->isOneOf(tok::star, tok::amp);
- Previous = Previous->Parent)
+ Previous = Previous->Previous)
Previous->Type = TT_PointerOrReference;
- } else if (Current.Parent &&
- Current.Parent->Type == TT_CtorInitializerColon) {
+ } else if (Current.Previous &&
+ Current.Previous->Type == TT_CtorInitializerColon) {
Contexts.back().IsExpression = true;
+ Contexts.back().InCtorInitializer = true;
} else if (Current.is(tok::kw_new)) {
Contexts.back().CanBeExpression = false;
- } else if (Current.is(tok::semi)) {
+ } else if (Current.is(tok::semi) || Current.is(tok::exclaim)) {
// This should be the condition or increment in a for-loop.
Contexts.back().IsExpression = true;
}
if (Current.Type == TT_Unknown) {
- if (Current.Parent && Current.is(tok::identifier) &&
- ((Current.Parent->is(tok::identifier) &&
- Current.Parent->FormatTok.Tok.getIdentifierInfo()
- ->getPPKeywordID() == tok::pp_not_keyword) ||
- isSimpleTypeSpecifier(*Current.Parent) ||
- Current.Parent->Type == TT_PointerOrReference ||
- Current.Parent->Type == TT_TemplateCloser)) {
+ // Line.MightBeFunctionDecl can only be true after the parentheses of a
+ // function declaration have been found. In this case, 'Current' is a
+ // trailing token of this declaration and thus cannot be a name.
+ if (isStartOfName(Current) && !Line.MightBeFunctionDecl) {
Contexts.back().FirstStartOfName = &Current;
Current.Type = TT_StartOfName;
- NameFound = true;
+ } else if (Current.is(tok::kw_auto)) {
+ AutoFound = true;
+ } else if (Current.is(tok::arrow) && AutoFound &&
+ Line.MustBeDeclaration) {
+ Current.Type = TT_TrailingReturnArrow;
} else if (Current.isOneOf(tok::star, tok::amp, tok::ampamp)) {
Current.Type =
- determineStarAmpUsage(Current, Contexts.back().IsExpression);
+ determineStarAmpUsage(Current, Contexts.back().CanBeExpression &&
+ Contexts.back().IsExpression);
} else if (Current.isOneOf(tok::minus, tok::plus, tok::caret)) {
Current.Type = determinePlusMinusCaretUsage(Current);
} else if (Current.isOneOf(tok::minusminus, tok::plusplus)) {
Current.Type = determineIncrementUsage(Current);
} else if (Current.is(tok::exclaim)) {
Current.Type = TT_UnaryOperator;
- } else if (Current.isBinaryOperator()) {
+ } else if (Current.isBinaryOperator() &&
+ (!Current.Previous ||
+ Current.Previous->isNot(tok::l_square))) {
Current.Type = TT_BinaryOperator;
} else if (Current.is(tok::comment)) {
- std::string Data(Lexer::getSpelling(Current.FormatTok.Tok, SourceMgr,
- Lex.getLangOpts()));
- if (StringRef(Data).startswith("//"))
+ if (Current.TokenText.startswith("//"))
Current.Type = TT_LineComment;
else
Current.Type = TT_BlockComment;
} else if (Current.is(tok::r_paren)) {
- bool ParensNotExpr = !Current.Parent ||
- Current.Parent->Type == TT_PointerOrReference ||
- Current.Parent->Type == TT_TemplateCloser;
+ FormatToken *LeftOfParens = NULL;
+ if (Current.MatchingParen)
+ LeftOfParens = Current.MatchingParen->getPreviousNonComment();
+ bool IsCast = false;
+ bool ParensAreEmpty = Current.Previous == Current.MatchingParen;
+ bool ParensAreType = !Current.Previous ||
+ Current.Previous->Type == TT_PointerOrReference ||
+ Current.Previous->Type == TT_TemplateCloser ||
+ isSimpleTypeSpecifier(*Current.Previous);
bool ParensCouldEndDecl =
- !Current.Children.empty() &&
- Current.Children[0].isOneOf(tok::equal, tok::semi, tok::l_brace);
+ Current.Next &&
+ Current.Next->isOneOf(tok::equal, tok::semi, tok::l_brace);
bool IsSizeOfOrAlignOf =
- Current.MatchingParen && Current.MatchingParen->Parent &&
- Current.MatchingParen->Parent->isOneOf(tok::kw_sizeof,
- tok::kw_alignof);
- if (ParensNotExpr && !ParensCouldEndDecl && !IsSizeOfOrAlignOf &&
- Contexts.back().IsExpression)
- // FIXME: We need to get smarter and understand more cases of casts.
+ LeftOfParens &&
+ LeftOfParens->isOneOf(tok::kw_sizeof, tok::kw_alignof);
+ if (ParensAreType && !ParensCouldEndDecl && !IsSizeOfOrAlignOf &&
+ (Contexts.back().IsExpression ||
+ (Current.Next && Current.Next->isBinaryOperator())))
+ IsCast = true;
+ if (Current.Next && Current.Next->isNot(tok::string_literal) &&
+ (Current.Next->Tok.isLiteral() ||
+ Current.Next->isOneOf(tok::kw_sizeof, tok::kw_alignof)))
+ IsCast = true;
+ // If there is an identifier after the (), it is likely a cast, unless
+ // there is also an identifier before the ().
+ if (LeftOfParens && (LeftOfParens->Tok.getIdentifierInfo() == NULL ||
+ LeftOfParens->is(tok::kw_return)) &&
+ LeftOfParens->Type != TT_OverloadedOperator &&
+ LeftOfParens->Type != TT_TemplateCloser && Current.Next &&
+ Current.Next->is(tok::identifier))
+ IsCast = true;
+ if (IsCast && !ParensAreEmpty)
Current.Type = TT_CastRParen;
- } else if (Current.is(tok::at) && Current.Children.size()) {
- switch (Current.Children[0].FormatTok.Tok.getObjCKeywordID()) {
+ } else if (Current.is(tok::at) && Current.Next) {
+ switch (Current.Next->Tok.getObjCKeywordID()) {
case tok::objc_interface:
case tok::objc_implementation:
case tok::objc_protocol:
@@ -659,27 +703,63 @@ private:
default:
break;
}
+ } else if (Current.is(tok::period)) {
+ FormatToken *PreviousNoComment = Current.getPreviousNonComment();
+ if (PreviousNoComment &&
+ PreviousNoComment->isOneOf(tok::comma, tok::l_brace))
+ Current.Type = TT_DesignatedInitializerPeriod;
}
}
}
+ /// \brief Take a guess at whether \p Tok starts a name of a function or
+ /// variable declaration.
+ ///
+ /// This is a heuristic based on whether \p Tok is an identifier following
+ /// something that is likely a type.
+ bool isStartOfName(const FormatToken &Tok) {
+ if (Tok.isNot(tok::identifier) || Tok.Previous == NULL)
+ return false;
+
+ // Skip "const" as it does not have an influence on whether this is a name.
+ FormatToken *PreviousNotConst = Tok.Previous;
+ while (PreviousNotConst != NULL && PreviousNotConst->is(tok::kw_const))
+ PreviousNotConst = PreviousNotConst->Previous;
+
+ if (PreviousNotConst == NULL)
+ return false;
+
+ bool IsPPKeyword = PreviousNotConst->is(tok::identifier) &&
+ PreviousNotConst->Previous &&
+ PreviousNotConst->Previous->is(tok::hash);
+
+ if (PreviousNotConst->Type == TT_TemplateCloser)
+ return PreviousNotConst && PreviousNotConst->MatchingParen &&
+ PreviousNotConst->MatchingParen->Previous &&
+ PreviousNotConst->MatchingParen->Previous->isNot(tok::kw_template);
+
+ return (!IsPPKeyword && PreviousNotConst->is(tok::identifier)) ||
+ PreviousNotConst->Type == TT_PointerOrReference ||
+ isSimpleTypeSpecifier(*PreviousNotConst);
+ }
+
/// \brief Return the type of the given token assuming it is * or &.
- TokenType
- determineStarAmpUsage(const AnnotatedToken &Tok, bool IsExpression) {
- const AnnotatedToken *PrevToken = Tok.getPreviousNoneComment();
+ TokenType determineStarAmpUsage(const FormatToken &Tok, bool IsExpression) {
+ const FormatToken *PrevToken = Tok.getPreviousNonComment();
if (PrevToken == NULL)
return TT_UnaryOperator;
- const AnnotatedToken *NextToken = Tok.getNextNoneComment();
+ const FormatToken *NextToken = Tok.getNextNonComment();
if (NextToken == NULL)
return TT_Unknown;
- if (PrevToken->is(tok::l_paren) && !IsExpression)
+ if (PrevToken->is(tok::coloncolon) ||
+ (PrevToken->is(tok::l_paren) && !IsExpression))
return TT_PointerOrReference;
if (PrevToken->isOneOf(tok::l_paren, tok::l_square, tok::l_brace,
tok::comma, tok::semi, tok::kw_return, tok::colon,
- tok::equal) ||
+ tok::equal, tok::kw_delete, tok::kw_sizeof) ||
PrevToken->Type == TT_BinaryOperator ||
PrevToken->Type == TT_UnaryOperator || PrevToken->Type == TT_CastRParen)
return TT_UnaryOperator;
@@ -687,9 +767,14 @@ private:
if (NextToken->is(tok::l_square))
return TT_PointerOrReference;
- if (PrevToken->FormatTok.Tok.isLiteral() ||
+ if (PrevToken->is(tok::r_paren) && PrevToken->MatchingParen &&
+ PrevToken->MatchingParen->Previous &&
+ PrevToken->MatchingParen->Previous->is(tok::kw_typeof))
+ return TT_PointerOrReference;
+
+ if (PrevToken->Tok.isLiteral() ||
PrevToken->isOneOf(tok::r_paren, tok::r_square) ||
- NextToken->FormatTok.Tok.isLiteral() || NextToken->isUnaryOperator())
+ NextToken->Tok.isLiteral() || NextToken->isUnaryOperator())
return TT_BinaryOperator;
// It is very unlikely that we are going to find a pointer or reference type
@@ -700,9 +785,9 @@ private:
return TT_PointerOrReference;
}
- TokenType determinePlusMinusCaretUsage(const AnnotatedToken &Tok) {
- const AnnotatedToken *PrevToken = Tok.getPreviousNoneComment();
- if (PrevToken == NULL)
+ TokenType determinePlusMinusCaretUsage(const FormatToken &Tok) {
+ const FormatToken *PrevToken = Tok.getPreviousNonComment();
+ if (PrevToken == NULL || PrevToken->Type == TT_CastRParen)
return TT_UnaryOperator;
// Use heuristics to recognize unary operators.
@@ -720,9 +805,9 @@ private:
}
/// \brief Determine whether ++/-- are pre- or post-increments/-decrements.
- TokenType determineIncrementUsage(const AnnotatedToken &Tok) {
- const AnnotatedToken *PrevToken = Tok.getPreviousNoneComment();
- if (PrevToken == NULL)
+ TokenType determineIncrementUsage(const FormatToken &Tok) {
+ const FormatToken *PrevToken = Tok.getPreviousNonComment();
+ if (PrevToken == NULL || PrevToken->Type == TT_CastRParen)
return TT_UnaryOperator;
if (PrevToken->isOneOf(tok::r_paren, tok::r_square, tok::identifier))
return TT_TrailingUnaryOperator;
@@ -733,8 +818,8 @@ private:
// FIXME: This is copy&pasted from Sema. Put it in a common place and remove
// duplication.
/// \brief Determine whether the token kind starts a simple-type-specifier.
- bool isSimpleTypeSpecifier(const AnnotatedToken &Tok) const {
- switch (Tok.FormatTok.Tok.getKind()) {
+ bool isSimpleTypeSpecifier(const FormatToken &Tok) const {
+ switch (Tok.Tok.getKind()) {
case tok::kw_short:
case tok::kw_long:
case tok::kw___int64:
@@ -750,71 +835,90 @@ private:
case tok::kw_wchar_t:
case tok::kw_bool:
case tok::kw___underlying_type:
- return true;
case tok::annot_typename:
case tok::kw_char16_t:
case tok::kw_char32_t:
case tok::kw_typeof:
case tok::kw_decltype:
- return Lex.getLangOpts().CPlusPlus;
+ return true;
default:
- break;
+ return false;
}
- return false;
}
SmallVector<Context, 8> Contexts;
- SourceManager &SourceMgr;
- Lexer &Lex;
+ const FormatStyle &Style;
AnnotatedLine &Line;
- AnnotatedToken *CurrentToken;
+ FormatToken *CurrentToken;
bool KeywordVirtualFound;
- bool NameFound;
+ bool AutoFound;
IdentifierInfo &Ident_in;
};
+static int PrecedenceUnaryOperator = prec::PointerToMember + 1;
+static int PrecedenceArrowAndPeriod = prec::PointerToMember + 2;
+
/// \brief Parses binary expressions by inserting fake parenthesis based on
/// operator precedence.
class ExpressionParser {
public:
- ExpressionParser(AnnotatedLine &Line) : Current(&Line.First) {}
+ ExpressionParser(AnnotatedLine &Line) : Current(Line.First) {
+ // Skip leading "}", e.g. in "} else if (...) {".
+ if (Current->is(tok::r_brace))
+ next();
+ }
/// \brief Parse expressions with the given operatore precedence.
void parse(int Precedence = 0) {
- if (Precedence > prec::PointerToMember || Current == NULL)
+ // Skip 'return' and ObjC selector colons as they are not part of a binary
+ // expression.
+ while (Current &&
+ (Current->is(tok::kw_return) ||
+ (Current->is(tok::colon) && Current->Type == TT_ObjCMethodExpr)))
+ next();
+
+ if (Current == NULL || Precedence > PrecedenceArrowAndPeriod)
return;
- // Eagerly consume trailing comments.
- while (Current && Current->isTrailingComment()) {
- next();
+ // Conditional expressions need to be parsed separately for proper nesting.
+ if (Precedence == prec::Conditional) {
+ parseConditionalExpr();
+ return;
}
- AnnotatedToken *Start = Current;
- bool OperatorFound = false;
+ // Parse unary operators, which all have a higher precedence than binary
+ // operators.
+ if (Precedence == PrecedenceUnaryOperator) {
+ parseUnaryOperator();
+ return;
+ }
+
+ FormatToken *Start = Current;
+ FormatToken *LatestOperator = NULL;
while (Current) {
// Consume operators with higher precedence.
parse(Precedence + 1);
- int CurrentPrecedence = 0;
- if (Current) {
- if (Current->Type == TT_ConditionalExpr)
- CurrentPrecedence = 1 + (int) prec::Conditional;
- else if (Current->is(tok::semi) || Current->Type == TT_InlineASMColon)
- CurrentPrecedence = 1;
- else if (Current->Type == TT_BinaryOperator || Current->is(tok::comma))
- CurrentPrecedence = 1 + (int) getPrecedence(*Current);
- }
+ int CurrentPrecedence = getCurrentPrecedence();
+
+ if (Current && Current->Type == TT_ObjCSelectorName &&
+ Precedence == CurrentPrecedence)
+ Start = Current;
// At the end of the line or when an operator with higher precedence is
// found, insert fake parenthesis and return.
if (Current == NULL || Current->closesScope() ||
- (CurrentPrecedence != 0 && CurrentPrecedence < Precedence)) {
- if (OperatorFound) {
- Start->FakeLParens.push_back(prec::Level(Precedence - 1));
- if (Current)
- ++Current->Parent->FakeRParens;
+ (CurrentPrecedence != -1 && CurrentPrecedence < Precedence)) {
+ if (LatestOperator) {
+ if (Precedence == PrecedenceArrowAndPeriod) {
+ LatestOperator->LastInChainOfCalls = true;
+ // Call expressions don't have a binary operator precedence.
+ addFakeParenthesis(Start, prec::Unknown);
+ } else {
+ addFakeParenthesis(Start, prec::Level(Precedence));
+ }
}
return;
}
@@ -829,7 +933,7 @@ public:
} else {
// Operator found.
if (CurrentPrecedence == Precedence)
- OperatorFound = true;
+ LatestOperator = Current;
next();
}
@@ -837,16 +941,99 @@ public:
}
private:
+ /// \brief Gets the precedence (+1) of the given token for binary operators
+ /// and other tokens that we treat like binary operators.
+ int getCurrentPrecedence() {
+ if (Current) {
+ if (Current->Type == TT_ConditionalExpr)
+ return prec::Conditional;
+ else if (Current->is(tok::semi) || Current->Type == TT_InlineASMColon ||
+ Current->Type == TT_ObjCSelectorName)
+ return 0;
+ else if (Current->Type == TT_BinaryOperator || Current->is(tok::comma))
+ return Current->getPrecedence();
+ else if (Current->isOneOf(tok::period, tok::arrow))
+ return PrecedenceArrowAndPeriod;
+ }
+ return -1;
+ }
+
+ void addFakeParenthesis(FormatToken *Start, prec::Level Precedence) {
+ Start->FakeLParens.push_back(Precedence);
+ if (Precedence > prec::Unknown)
+ Start->StartsBinaryExpression = true;
+ if (Current) {
+ ++Current->Previous->FakeRParens;
+ if (Precedence > prec::Unknown)
+ Current->Previous->EndsBinaryExpression = true;
+ }
+ }
+
+ /// \brief Parse unary operator expressions and surround them with fake
+ /// parentheses if appropriate.
+ void parseUnaryOperator() {
+ if (Current == NULL || Current->Type != TT_UnaryOperator) {
+ parse(PrecedenceArrowAndPeriod);
+ return;
+ }
+
+ FormatToken *Start = Current;
+ next();
+ parseUnaryOperator();
+
+ // The actual precedence doesn't matter.
+ addFakeParenthesis(Start, prec::Unknown);
+ }
+
+ void parseConditionalExpr() {
+ FormatToken *Start = Current;
+ parse(prec::LogicalOr);
+ if (!Current || !Current->is(tok::question))
+ return;
+ next();
+ parse(prec::LogicalOr);
+ if (!Current || Current->Type != TT_ConditionalExpr)
+ return;
+ next();
+ parseConditionalExpr();
+ addFakeParenthesis(Start, prec::Conditional);
+ }
+
void next() {
- if (Current != NULL)
- Current = Current->Children.empty() ? NULL : &Current->Children[0];
+ if (Current)
+ Current = Current->Next;
+ while (Current && Current->isTrailingComment())
+ Current = Current->Next;
}
- AnnotatedToken *Current;
+ FormatToken *Current;
};
+} // end anonymous namespace
+
+void
+TokenAnnotator::setCommentLineLevels(SmallVectorImpl<AnnotatedLine *> &Lines) {
+ const AnnotatedLine *NextNonCommentLine = NULL;
+ for (SmallVectorImpl<AnnotatedLine *>::reverse_iterator I = Lines.rbegin(),
+ E = Lines.rend();
+ I != E; ++I) {
+ if (NextNonCommentLine && (*I)->First->is(tok::comment) &&
+ (*I)->First->Next == NULL)
+ (*I)->Level = NextNonCommentLine->Level;
+ else
+ NextNonCommentLine = (*I)->First->isNot(tok::r_brace) ? (*I) : NULL;
+
+ setCommentLineLevels((*I)->Children);
+ }
+}
+
void TokenAnnotator::annotate(AnnotatedLine &Line) {
- AnnotatingParser Parser(SourceMgr, Lex, Line, Ident_in);
+ for (SmallVectorImpl<AnnotatedLine *>::iterator I = Line.Children.begin(),
+ E = Line.Children.end();
+ I != E; ++I) {
+ annotate(**I);
+ }
+ AnnotatingParser Parser(Style, Line, Ident_in);
Line.Type = Parser.parseLine();
if (Line.Type == LT_Invalid)
return;
@@ -854,84 +1041,114 @@ void TokenAnnotator::annotate(AnnotatedLine &Line) {
ExpressionParser ExprParser(Line);
ExprParser.parse();
- if (Line.First.Type == TT_ObjCMethodSpecifier)
+ if (Line.First->Type == TT_ObjCMethodSpecifier)
Line.Type = LT_ObjCMethodDecl;
- else if (Line.First.Type == TT_ObjCDecl)
+ else if (Line.First->Type == TT_ObjCDecl)
Line.Type = LT_ObjCDecl;
- else if (Line.First.Type == TT_ObjCProperty)
+ else if (Line.First->Type == TT_ObjCProperty)
Line.Type = LT_ObjCProperty;
- Line.First.SpacesRequiredBefore = 1;
- Line.First.MustBreakBefore = Line.First.FormatTok.MustBreakBefore;
- Line.First.CanBreakBefore = Line.First.MustBreakBefore;
-
- Line.First.TotalLength = Line.First.FormatTok.TokenLength;
+ Line.First->SpacesRequiredBefore = 1;
+ Line.First->CanBreakBefore = Line.First->MustBreakBefore;
}
void TokenAnnotator::calculateFormattingInformation(AnnotatedLine &Line) {
- if (Line.First.Children.empty())
+ Line.First->TotalLength =
+ Line.First->IsMultiline ? Style.ColumnLimit : Line.First->ColumnWidth;
+ if (!Line.First->Next)
return;
- AnnotatedToken *Current = &Line.First.Children[0];
+ FormatToken *Current = Line.First->Next;
+ bool InFunctionDecl = Line.MightBeFunctionDecl;
while (Current != NULL) {
if (Current->Type == TT_LineComment)
Current->SpacesRequiredBefore = Style.SpacesBeforeTrailingComments;
- else
- Current->SpacesRequiredBefore =
- spaceRequiredBefore(Line, *Current) ? 1 : 0;
-
- if (Current->FormatTok.MustBreakBefore) {
- Current->MustBreakBefore = true;
- } else if (Current->Type == TT_LineComment) {
- Current->MustBreakBefore = Current->FormatTok.NewlinesBefore > 0;
- } else if (Current->Parent->isTrailingComment() ||
- (Current->is(tok::string_literal) &&
- Current->Parent->is(tok::string_literal))) {
- Current->MustBreakBefore = true;
- } else if (Current->is(tok::lessless) && !Current->Children.empty() &&
- Current->Parent->is(tok::string_literal) &&
- Current->Children[0].is(tok::string_literal)) {
- Current->MustBreakBefore = true;
- } else {
- Current->MustBreakBefore = false;
- }
+ else if (Current->SpacesRequiredBefore == 0 &&
+ spaceRequiredBefore(Line, *Current))
+ Current->SpacesRequiredBefore = 1;
+
+ Current->MustBreakBefore =
+ Current->MustBreakBefore || mustBreakBefore(Line, *Current);
+
Current->CanBreakBefore =
Current->MustBreakBefore || canBreakBefore(Line, *Current);
- if (Current->MustBreakBefore)
- Current->TotalLength = Current->Parent->TotalLength + Style.ColumnLimit;
+ if (Current->MustBreakBefore || !Current->Children.empty() ||
+ Current->IsMultiline)
+ Current->TotalLength = Current->Previous->TotalLength + Style.ColumnLimit;
else
- Current->TotalLength =
- Current->Parent->TotalLength + Current->FormatTok.TokenLength +
- Current->SpacesRequiredBefore;
+ Current->TotalLength = Current->Previous->TotalLength +
+ Current->ColumnWidth +
+ Current->SpacesRequiredBefore;
+
+ if (Current->Type == TT_CtorInitializerColon)
+ InFunctionDecl = false;
+
// FIXME: Only calculate this if CanBreakBefore is true once static
// initializers etc. are sorted out.
// FIXME: Move magic numbers to a better place.
- Current->SplitPenalty =
- 20 * Current->BindingStrength + splitPenalty(Line, *Current);
+ Current->SplitPenalty = 20 * Current->BindingStrength +
+ splitPenalty(Line, *Current, InFunctionDecl);
- Current = Current->Children.empty() ? NULL : &Current->Children[0];
+ Current = Current->Next;
}
- DEBUG({
- printDebugInfo(Line);
- });
+ calculateUnbreakableTailLengths(Line);
+ for (Current = Line.First; Current != NULL; Current = Current->Next) {
+ if (Current->Role)
+ Current->Role->precomputeFormattingInfos(Current);
+ }
+
+ DEBUG({ printDebugInfo(Line); });
+
+ for (SmallVectorImpl<AnnotatedLine *>::iterator I = Line.Children.begin(),
+ E = Line.Children.end();
+ I != E; ++I) {
+ calculateFormattingInformation(**I);
+ }
+}
+
+void TokenAnnotator::calculateUnbreakableTailLengths(AnnotatedLine &Line) {
+ unsigned UnbreakableTailLength = 0;
+ FormatToken *Current = Line.Last;
+ while (Current != NULL) {
+ Current->UnbreakableTailLength = UnbreakableTailLength;
+ if (Current->CanBreakBefore ||
+ Current->isOneOf(tok::comment, tok::string_literal)) {
+ UnbreakableTailLength = 0;
+ } else {
+ UnbreakableTailLength +=
+ Current->ColumnWidth + Current->SpacesRequiredBefore;
+ }
+ Current = Current->Previous;
+ }
}
unsigned TokenAnnotator::splitPenalty(const AnnotatedLine &Line,
- const AnnotatedToken &Tok) {
- const AnnotatedToken &Left = *Tok.Parent;
- const AnnotatedToken &Right = Tok;
+ const FormatToken &Tok,
+ bool InFunctionDecl) {
+ const FormatToken &Left = *Tok.Previous;
+ const FormatToken &Right = Tok;
- if (Right.Type == TT_StartOfName) {
- if (Line.First.is(tok::kw_for) && Right.PartOfMultiVariableDeclStmt)
+ if (Left.is(tok::semi))
+ return 0;
+ if (Left.is(tok::comma))
+ return 1;
+ if (Right.is(tok::l_square))
+ return 150;
+
+ if (Right.Type == TT_StartOfName || Right.is(tok::kw_operator)) {
+ if (Line.First->is(tok::kw_for) && Right.PartOfMultiVariableDeclStmt)
return 3;
- else if (Line.MightBeFunctionDecl && Right.BindingStrength == 1)
+ if (Left.Type == TT_StartOfName)
+ return 20;
+ if (InFunctionDecl && Right.BindingStrength == 1)
// FIXME: Clean up hack of using BindingStrength to find top-level names.
return Style.PenaltyReturnTypeOnItsOwnLine;
- else
- return 200;
+ return 200;
}
if (Left.is(tok::equal) && Right.is(tok::l_brace))
return 150;
+ if (Left.Type == TT_CastRParen)
+ return 100;
if (Left.is(tok::coloncolon))
return 500;
if (Left.isOneOf(tok::kw_class, tok::kw_struct))
@@ -941,50 +1158,53 @@ unsigned TokenAnnotator::splitPenalty(const AnnotatedLine &Line,
Left.Type == TT_InheritanceColon)
return 2;
- if (Right.isOneOf(tok::arrow, tok::period)) {
- if (Line.Type == LT_BuilderTypeCall)
- return prec::PointerToMember;
+ if (Right.isMemberAccess()) {
if (Left.isOneOf(tok::r_paren, tok::r_square) && Left.MatchingParen &&
Left.MatchingParen->ParameterCount > 0)
return 20; // Should be smaller than breaking at a nested comma.
return 150;
}
+ // Breaking before a trailing 'const' or not-function-like annotation is bad.
+ if (Left.is(tok::r_paren) && Line.Type != LT_ObjCProperty &&
+ (Right.is(tok::kw_const) || (Right.is(tok::identifier) && Right.Next &&
+ Right.Next->isNot(tok::l_paren))))
+ return 100;
+
// In for-loops, prefer breaking at ',' and ';'.
- if (Line.First.is(tok::kw_for) && Left.is(tok::equal))
+ if (Line.First->is(tok::kw_for) && Left.is(tok::equal))
return 4;
- if (Left.is(tok::semi))
- return 0;
- if (Left.is(tok::comma))
- return 1;
-
// In Objective-C method expressions, prefer breaking before "param:" over
// breaking after it.
if (Right.Type == TT_ObjCSelectorName)
return 0;
if (Left.is(tok::colon) && Left.Type == TT_ObjCMethodExpr)
- return 20;
+ return 50;
- if (Left.is(tok::l_paren) && Line.MightBeFunctionDecl)
+ if (Left.is(tok::l_paren) && InFunctionDecl)
return 100;
if (Left.opensScope())
- return Left.ParameterCount > 1 ? prec::Comma : 20;
+ return Left.ParameterCount > 1 ? Style.PenaltyBreakBeforeFirstCallParameter
+ : 19;
if (Right.is(tok::lessless)) {
if (Left.is(tok::string_literal)) {
- StringRef Content = StringRef(Left.FormatTok.Tok.getLiteralData(),
- Left.FormatTok.TokenLength);
- Content = Content.drop_back(1).drop_front(1).trim();
+ StringRef Content = Left.TokenText;
+ if (Content.startswith("\""))
+ Content = Content.drop_front(1);
+ if (Content.endswith("\""))
+ Content = Content.drop_back(1);
+ Content = Content.trim();
if (Content.size() > 1 &&
(Content.back() == ':' || Content.back() == '='))
- return 100;
+ return 25;
}
- return prec::Shift;
+ return 1; // Breaking at a << is really cheap.
}
if (Left.Type == TT_ConditionalExpr)
return prec::Conditional;
- prec::Level Level = getPrecedence(Left);
+ prec::Level Level = Left.getPrecedence();
if (Level != prec::Unknown)
return Level;
@@ -993,13 +1213,23 @@ unsigned TokenAnnotator::splitPenalty(const AnnotatedLine &Line,
}
bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line,
- const AnnotatedToken &Left,
- const AnnotatedToken &Right) {
+ const FormatToken &Left,
+ const FormatToken &Right) {
if (Right.is(tok::hashhash))
return Left.is(tok::hash);
if (Left.isOneOf(tok::hashhash, tok::hash))
return Right.is(tok::hash);
- if (Right.isOneOf(tok::r_paren, tok::semi, tok::comma))
+ if (Left.is(tok::l_paren) && Right.is(tok::r_paren))
+ return Style.SpaceInEmptyParentheses;
+ if (Left.is(tok::l_paren) || Right.is(tok::r_paren))
+ return (Right.Type == TT_CastRParen ||
+ (Left.MatchingParen && Left.MatchingParen->Type == TT_CastRParen))
+ ? Style.SpacesInCStyleCastParentheses
+ : Style.SpacesInParentheses;
+ if (Style.SpacesInAngles &&
+ ((Left.Type == TT_TemplateOpener) != (Right.Type == TT_TemplateCloser)))
+ return true;
+ if (Right.isOneOf(tok::semi, tok::comma))
return false;
if (Right.is(tok::less) &&
(Left.is(tok::kw_template) ||
@@ -1017,186 +1247,282 @@ bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line,
if (Left.is(tok::coloncolon))
return false;
if (Right.is(tok::coloncolon))
- return !Left.isOneOf(tok::identifier, tok::greater, tok::l_paren);
+ return (Left.is(tok::less) && Style.Standard == FormatStyle::LS_Cpp03) ||
+ !Left.isOneOf(tok::identifier, tok::greater, tok::l_paren,
+ tok::r_paren, tok::less);
if (Left.is(tok::less) || Right.isOneOf(tok::greater, tok::less))
return false;
+ if (Right.is(tok::ellipsis))
+ return Left.Tok.isLiteral();
+ if (Left.is(tok::l_square) && Right.is(tok::amp))
+ return false;
if (Right.Type == TT_PointerOrReference)
- return Left.FormatTok.Tok.isLiteral() ||
+ return Left.Tok.isLiteral() ||
((Left.Type != TT_PointerOrReference) && Left.isNot(tok::l_paren) &&
!Style.PointerBindsToType);
+ if (Right.Type == TT_FunctionTypeLParen && Left.isNot(tok::l_paren) &&
+ (Left.Type != TT_PointerOrReference || Style.PointerBindsToType))
+ return true;
if (Left.Type == TT_PointerOrReference)
- return Right.FormatTok.Tok.isLiteral() ||
+ return Right.Tok.isLiteral() || Right.Type == TT_BlockComment ||
((Right.Type != TT_PointerOrReference) &&
Right.isNot(tok::l_paren) && Style.PointerBindsToType &&
- Left.Parent && Left.Parent->isNot(tok::l_paren));
+ Left.Previous &&
+ !Left.Previous->isOneOf(tok::l_paren, tok::coloncolon));
if (Right.is(tok::star) && Left.is(tok::l_paren))
return false;
if (Left.is(tok::l_square))
- return Left.Type == TT_ObjCArrayLiteral && Right.isNot(tok::r_square);
+ return Left.Type == TT_ArrayInitializerLSquare &&
+ Right.isNot(tok::r_square);
if (Right.is(tok::r_square))
- return Right.Type == TT_ObjCArrayLiteral;
- if (Right.is(tok::l_square) && Right.Type != TT_ObjCMethodExpr)
- return false;
- if (Left.is(tok::period) || Right.is(tok::period))
+ return Right.MatchingParen &&
+ Right.MatchingParen->Type == TT_ArrayInitializerLSquare;
+ if (Right.is(tok::l_square) && Right.Type != TT_ObjCMethodExpr &&
+ Right.Type != TT_LambdaLSquare && Left.isNot(tok::numeric_constant))
return false;
if (Left.is(tok::colon))
return Left.Type != TT_ObjCMethodExpr;
if (Right.is(tok::colon))
- return Right.Type != TT_ObjCMethodExpr;
- if (Left.is(tok::l_paren))
- return false;
+ return Right.Type != TT_ObjCMethodExpr && !Left.is(tok::question);
if (Right.is(tok::l_paren)) {
+ if (Left.is(tok::r_paren) && Left.MatchingParen &&
+ Left.MatchingParen->Previous &&
+ Left.MatchingParen->Previous->is(tok::kw___attribute))
+ return true;
return Line.Type == LT_ObjCDecl ||
- Left.isOneOf(tok::kw_if, tok::kw_for, tok::kw_while, tok::kw_switch,
- tok::kw_return, tok::kw_catch, tok::kw_new,
- tok::kw_delete, tok::semi);
+ Left.isOneOf(tok::kw_return, tok::kw_new, tok::kw_delete,
+ tok::semi) ||
+ (Style.SpaceAfterControlStatementKeyword &&
+ Left.isOneOf(tok::kw_if, tok::kw_for, tok::kw_while, tok::kw_switch,
+ tok::kw_catch));
}
- if (Left.is(tok::at) &&
- Right.FormatTok.Tok.getObjCKeywordID() != tok::objc_not_keyword)
+ if (Left.is(tok::at) && Right.Tok.getObjCKeywordID() != tok::objc_not_keyword)
return false;
if (Left.is(tok::l_brace) && Right.is(tok::r_brace))
+ return !Left.Children.empty(); // No spaces in "{}".
+ if (Left.is(tok::l_brace) || Right.is(tok::r_brace))
+ return !Style.Cpp11BracedListStyle;
+ if (Right.Type == TT_UnaryOperator)
+ return !Left.isOneOf(tok::l_paren, tok::l_square, tok::at) &&
+ (Left.isNot(tok::colon) || Left.Type != TT_ObjCMethodExpr);
+ if (Left.isOneOf(tok::identifier, tok::greater, tok::r_square) &&
+ Right.is(tok::l_brace) && Right.getNextNonComment() &&
+ Right.BlockKind != BK_Block)
return false;
- if (Right.is(tok::ellipsis))
+ if (Left.is(tok::period) || Right.is(tok::period))
+ return false;
+ if (Left.Type == TT_BlockComment && Left.TokenText.endswith("=*/"))
+ return false;
+ if (Right.is(tok::hash) && Left.is(tok::identifier) && Left.TokenText == "L")
return false;
return true;
}
bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line,
- const AnnotatedToken &Tok) {
- if (Tok.FormatTok.Tok.getIdentifierInfo() &&
- Tok.Parent->FormatTok.Tok.getIdentifierInfo())
+ const FormatToken &Tok) {
+ if (Tok.Tok.getIdentifierInfo() && Tok.Previous->Tok.getIdentifierInfo())
return true; // Never ever merge two identifiers.
+ if (Tok.Previous->Type == TT_ImplicitStringLiteral)
+ return Tok.WhitespaceRange.getBegin() != Tok.WhitespaceRange.getEnd();
if (Line.Type == LT_ObjCMethodDecl) {
- if (Tok.Parent->Type == TT_ObjCMethodSpecifier)
+ if (Tok.Previous->Type == TT_ObjCMethodSpecifier)
return true;
- if (Tok.Parent->is(tok::r_paren) && Tok.is(tok::identifier))
+ if (Tok.Previous->is(tok::r_paren) && Tok.is(tok::identifier))
// Don't space between ')' and <id>
return false;
}
if (Line.Type == LT_ObjCProperty &&
- (Tok.is(tok::equal) || Tok.Parent->is(tok::equal)))
+ (Tok.is(tok::equal) || Tok.Previous->is(tok::equal)))
return false;
- if (Tok.Parent->is(tok::comma))
+ if (Tok.Type == TT_TrailingReturnArrow ||
+ Tok.Previous->Type == TT_TrailingReturnArrow)
+ return true;
+ if (Tok.Previous->is(tok::comma))
return true;
if (Tok.is(tok::comma))
return false;
if (Tok.Type == TT_CtorInitializerColon || Tok.Type == TT_ObjCBlockLParen)
return true;
- if (Tok.Parent->FormatTok.Tok.is(tok::kw_operator))
- return false;
+ if (Tok.Previous->Tok.is(tok::kw_operator))
+ return Tok.is(tok::coloncolon);
if (Tok.Type == TT_OverloadedOperatorLParen)
return false;
if (Tok.is(tok::colon))
- return !Line.First.isOneOf(tok::kw_case, tok::kw_default) &&
- Tok.getNextNoneComment() != NULL && Tok.Type != TT_ObjCMethodExpr;
- if (Tok.is(tok::l_paren) && !Tok.Children.empty() &&
- Tok.Children[0].Type == TT_PointerOrReference &&
- !Tok.Children[0].Children.empty() &&
- Tok.Children[0].Children[0].isNot(tok::r_paren) &&
- Tok.Parent->isNot(tok::l_paren) &&
- (Tok.Parent->Type != TT_PointerOrReference || Style.PointerBindsToType))
- return true;
- if (Tok.Parent->Type == TT_UnaryOperator || Tok.Parent->Type == TT_CastRParen)
+ return !Line.First->isOneOf(tok::kw_case, tok::kw_default) &&
+ Tok.getNextNonComment() != NULL && Tok.Type != TT_ObjCMethodExpr &&
+ !Tok.Previous->is(tok::question);
+ if (Tok.Previous->Type == TT_UnaryOperator ||
+ Tok.Previous->Type == TT_CastRParen)
return false;
- if (Tok.Type == TT_UnaryOperator)
- return !Tok.Parent->isOneOf(tok::l_paren, tok::l_square, tok::at) &&
- (Tok.Parent->isNot(tok::colon) ||
- Tok.Parent->Type != TT_ObjCMethodExpr);
- if (Tok.Parent->is(tok::greater) && Tok.is(tok::greater)) {
+ if (Tok.Previous->is(tok::greater) && Tok.is(tok::greater)) {
return Tok.Type == TT_TemplateCloser &&
- Tok.Parent->Type == TT_TemplateCloser &&
- Style.Standard != FormatStyle::LS_Cpp11;
+ Tok.Previous->Type == TT_TemplateCloser &&
+ (Style.Standard != FormatStyle::LS_Cpp11 || Style.SpacesInAngles);
}
if (Tok.isOneOf(tok::arrowstar, tok::periodstar) ||
- Tok.Parent->isOneOf(tok::arrowstar, tok::periodstar))
+ Tok.Previous->isOneOf(tok::arrowstar, tok::periodstar))
+ return false;
+ if (!Style.SpaceBeforeAssignmentOperators &&
+ Tok.getPrecedence() == prec::Assignment)
return false;
- if (Tok.Type == TT_BinaryOperator || Tok.Parent->Type == TT_BinaryOperator)
+ if ((Tok.Type == TT_BinaryOperator && !Tok.Previous->is(tok::l_paren)) ||
+ Tok.Previous->Type == TT_BinaryOperator)
return true;
- if (Tok.Parent->Type == TT_TemplateCloser && Tok.is(tok::l_paren))
+ if (Tok.Previous->Type == TT_TemplateCloser && Tok.is(tok::l_paren))
return false;
- if (Tok.is(tok::less) && Line.First.is(tok::hash))
+ if (Tok.is(tok::less) && Tok.Previous->isNot(tok::l_paren) &&
+ Line.First->is(tok::hash))
return true;
if (Tok.Type == TT_TrailingUnaryOperator)
return false;
- return spaceRequiredBetween(Line, *Tok.Parent, Tok);
+ return spaceRequiredBetween(Line, *Tok.Previous, Tok);
+}
+
+bool TokenAnnotator::mustBreakBefore(const AnnotatedLine &Line,
+ const FormatToken &Right) {
+ if (Right.is(tok::comment)) {
+ return Right.NewlinesBefore > 0;
+ } else if (Right.Previous->isTrailingComment() ||
+ (Right.is(tok::string_literal) &&
+ Right.Previous->is(tok::string_literal))) {
+ return true;
+ } else if (Right.Previous->IsUnterminatedLiteral) {
+ return true;
+ } else if (Right.is(tok::lessless) && Right.Next &&
+ Right.Previous->is(tok::string_literal) &&
+ Right.Next->is(tok::string_literal)) {
+ return true;
+ } else if (Right.Previous->ClosesTemplateDeclaration &&
+ Right.Previous->MatchingParen &&
+ Right.Previous->MatchingParen->BindingStrength == 1 &&
+ Style.AlwaysBreakTemplateDeclarations) {
+ // FIXME: Fix horrible hack of using BindingStrength to find top-level <>.
+ return true;
+ } else if (Right.Type == TT_CtorInitializerComma &&
+ Style.BreakConstructorInitializersBeforeComma &&
+ !Style.ConstructorInitializerAllOnOneLineOrOnePerLine) {
+ return true;
+ } else if (Right.Previous->BlockKind == BK_Block &&
+ Right.Previous->isNot(tok::r_brace) && Right.isNot(tok::r_brace)) {
+ return true;
+ } else if (Right.is(tok::l_brace) && (Right.BlockKind == BK_Block)) {
+ return Style.BreakBeforeBraces == FormatStyle::BS_Allman;
+ }
+ return false;
}
bool TokenAnnotator::canBreakBefore(const AnnotatedLine &Line,
- const AnnotatedToken &Right) {
- const AnnotatedToken &Left = *Right.Parent;
- if (Right.Type == TT_StartOfName)
+ const FormatToken &Right) {
+ const FormatToken &Left = *Right.Previous;
+ if (Right.Type == TT_StartOfName || Right.is(tok::kw_operator))
return true;
- if (Right.is(tok::colon) && Right.Type == TT_ObjCMethodExpr)
+ if (Right.isTrailingComment())
+ // We rely on MustBreakBefore being set correctly here as we should not
+ // change the "binding" behavior of a comment.
return false;
- if (Left.is(tok::colon) && Left.Type == TT_ObjCMethodExpr)
+ if (Left.is(tok::question) && Right.is(tok::colon))
+ return false;
+ if (Right.Type == TT_ConditionalExpr || Right.is(tok::question))
+ return Style.BreakBeforeTernaryOperators;
+ if (Left.Type == TT_ConditionalExpr || Left.is(tok::question))
+ return !Style.BreakBeforeTernaryOperators;
+ if (Right.is(tok::colon) &&
+ (Right.Type == TT_DictLiteral || Right.Type == TT_ObjCMethodExpr))
+ return false;
+ if (Left.is(tok::colon) &&
+ (Left.Type == TT_DictLiteral || Left.Type == TT_ObjCMethodExpr))
return true;
if (Right.Type == TT_ObjCSelectorName)
return true;
- if (Left.ClosesTemplateDeclaration)
+ if (Left.is(tok::r_paren) && Line.Type == LT_ObjCProperty)
return true;
- if (Right.Type == TT_ConditionalExpr || Right.is(tok::question))
+ if (Left.ClosesTemplateDeclaration)
return true;
if (Right.Type == TT_RangeBasedForLoopColon ||
- Right.Type == TT_OverloadedOperatorLParen)
+ Right.Type == TT_OverloadedOperatorLParen ||
+ Right.Type == TT_OverloadedOperator)
return false;
if (Left.Type == TT_RangeBasedForLoopColon)
return true;
if (Right.Type == TT_RangeBasedForLoopColon)
return false;
if (Left.Type == TT_PointerOrReference || Left.Type == TT_TemplateCloser ||
- Left.Type == TT_UnaryOperator || Left.Type == TT_ConditionalExpr ||
- Left.isOneOf(tok::question, tok::kw_operator))
+ Left.Type == TT_UnaryOperator || Left.is(tok::kw_operator))
return false;
if (Left.is(tok::equal) && Line.Type == LT_VirtualFunctionDecl)
return false;
- if (Left.is(tok::l_paren) && Right.is(tok::l_paren) && Left.Parent &&
- Left.Parent->is(tok::kw___attribute))
+ if (Left.Previous) {
+ if (Left.is(tok::l_paren) && Right.is(tok::l_paren) &&
+ Left.Previous->is(tok::kw___attribute))
+ return false;
+ if (Left.is(tok::l_paren) && (Left.Previous->Type == TT_BinaryOperator ||
+ Left.Previous->Type == TT_CastRParen))
+ return false;
+ }
+ if (Right.Type == TT_ImplicitStringLiteral)
return false;
- if (Right.Type == TT_LineComment)
- // We rely on MustBreakBefore being set correctly here as we should not
- // change the "binding" behavior of a comment.
+ if (Right.is(tok::r_paren) || Right.Type == TT_TemplateCloser)
return false;
+ // We only break before r_brace if there was a corresponding break before
+ // the l_brace, which is tracked by BreakBeforeClosingBrace.
+ if (Right.is(tok::r_brace))
+ return Right.MatchingParen && Right.MatchingParen->BlockKind == BK_Block;
+
// Allow breaking after a trailing 'const', e.g. after a method declaration,
// unless it is follow by ';', '{' or '='.
- if (Left.is(tok::kw_const) && Left.Parent != NULL &&
- Left.Parent->is(tok::r_paren))
+ if (Left.is(tok::kw_const) && Left.Previous != NULL &&
+ Left.Previous->is(tok::r_paren))
return !Right.isOneOf(tok::l_brace, tok::semi, tok::equal);
if (Right.is(tok::kw___attribute))
return true;
- // We only break before r_brace if there was a corresponding break before
- // the l_brace, which is tracked by BreakBeforeClosingBrace.
- if (Right.isOneOf(tok::r_brace, tok::r_paren, tok::greater))
- return false;
if (Left.is(tok::identifier) && Right.is(tok::string_literal))
return true;
- return (Left.isBinaryOperator() && Left.isNot(tok::lessless)) ||
+
+ if (Left.Type == TT_CtorInitializerComma &&
+ Style.BreakConstructorInitializersBeforeComma)
+ return false;
+ if (Right.Type == TT_CtorInitializerComma &&
+ Style.BreakConstructorInitializersBeforeComma)
+ return true;
+ if (Right.isBinaryOperator() && Style.BreakBeforeBinaryOperators)
+ return true;
+ if (Left.is(tok::greater) && Right.is(tok::greater) &&
+ Left.Type != TT_TemplateCloser)
+ return false;
+ if (Left.Type == TT_ArrayInitializerLSquare)
+ return true;
+ return (Left.isBinaryOperator() && Left.isNot(tok::lessless) &&
+ !Style.BreakBeforeBinaryOperators) ||
Left.isOneOf(tok::comma, tok::coloncolon, tok::semi, tok::l_brace,
tok::kw_class, tok::kw_struct) ||
- Right.isOneOf(tok::lessless, tok::arrow, tok::period, tok::colon) ||
- (Left.is(tok::r_paren) && Left.Type != TT_CastRParen &&
- Right.isOneOf(tok::identifier, tok::kw___attribute)) ||
- (Left.is(tok::l_paren) && !Right.is(tok::r_paren)) ||
- (Left.is(tok::l_square) && !Right.is(tok::r_square));
+ Right.isOneOf(tok::lessless, tok::arrow, tok::period, tok::colon,
+ tok::l_square, tok::at) ||
+ (Left.is(tok::r_paren) &&
+ Right.isOneOf(tok::identifier, tok::kw_const, tok::kw___attribute)) ||
+ (Left.is(tok::l_paren) && !Right.is(tok::r_paren));
}
void TokenAnnotator::printDebugInfo(const AnnotatedLine &Line) {
llvm::errs() << "AnnotatedTokens:\n";
- const AnnotatedToken *Tok = &Line.First;
+ const FormatToken *Tok = Line.First;
while (Tok) {
llvm::errs() << " M=" << Tok->MustBreakBefore
<< " C=" << Tok->CanBreakBefore << " T=" << Tok->Type
<< " S=" << Tok->SpacesRequiredBefore
- << " P=" << Tok->SplitPenalty
- << " Name=" << Tok->FormatTok.Tok.getName() << " FakeLParens=";
+ << " P=" << Tok->SplitPenalty << " Name=" << Tok->Tok.getName()
+ << " L=" << Tok->TotalLength << " PPK=" << Tok->PackingKind
+ << " FakeLParens=";
for (unsigned i = 0, e = Tok->FakeLParens.size(); i != e; ++i)
llvm::errs() << Tok->FakeLParens[i] << "/";
llvm::errs() << " FakeRParens=" << Tok->FakeRParens << "\n";
- Tok = Tok->Children.empty() ? NULL : &Tok->Children[0];
+ if (Tok->Next == NULL)
+ assert(Tok == Line.Last);
+ Tok = Tok->Next;
}
llvm::errs() << "----\n";
}
diff --git a/lib/Format/TokenAnnotator.h b/lib/Format/TokenAnnotator.h
index b364082391f8..aa49b2a5c078 100644
--- a/lib/Format/TokenAnnotator.h
+++ b/lib/Format/TokenAnnotator.h
@@ -17,50 +17,17 @@
#define LLVM_CLANG_FORMAT_TOKEN_ANNOTATOR_H
#include "UnwrappedLineParser.h"
-#include "clang/Basic/OperatorPrecedence.h"
#include "clang/Format/Format.h"
#include <string>
namespace clang {
-class Lexer;
class SourceManager;
namespace format {
-enum TokenType {
- TT_BinaryOperator,
- TT_BlockComment,
- TT_CastRParen,
- TT_ConditionalExpr,
- TT_CtorInitializerColon,
- TT_ImplicitStringLiteral,
- TT_InlineASMColon,
- TT_InheritanceColon,
- TT_LineComment,
- TT_ObjCArrayLiteral,
- TT_ObjCBlockLParen,
- TT_ObjCDecl,
- TT_ObjCForIn,
- TT_ObjCMethodExpr,
- TT_ObjCMethodSpecifier,
- TT_ObjCProperty,
- TT_ObjCSelectorName,
- TT_OverloadedOperatorLParen,
- TT_PointerOrReference,
- TT_PureVirtualSpecifier,
- TT_RangeBasedForLoopColon,
- TT_StartOfName,
- TT_TemplateCloser,
- TT_TemplateOpener,
- TT_TrailingUnaryOperator,
- TT_UnaryOperator,
- TT_Unknown
-};
-
enum LineType {
LT_Invalid,
LT_Other,
- LT_BuilderTypeCall,
LT_PreprocessorDirective,
LT_VirtualFunctionDecl,
LT_ObjCDecl, // An @interface, @implementation, or @protocol line.
@@ -68,175 +35,50 @@ enum LineType {
LT_ObjCProperty // An @property line.
};
-class AnnotatedToken {
-public:
- explicit AnnotatedToken(const FormatToken &FormatTok)
- : FormatTok(FormatTok), Type(TT_Unknown), SpacesRequiredBefore(0),
- CanBreakBefore(false), MustBreakBefore(false),
- ClosesTemplateDeclaration(false), MatchingParen(NULL),
- ParameterCount(0), BindingStrength(0), SplitPenalty(0),
- LongestObjCSelectorName(0), Parent(NULL),
- FakeRParens(0), LastInChainOfCalls(false),
- PartOfMultiVariableDeclStmt(false), NoMoreTokensOnLevel(false) {}
-
- bool is(tok::TokenKind Kind) const { return FormatTok.Tok.is(Kind); }
-
- bool isOneOf(tok::TokenKind K1, tok::TokenKind K2) const {
- return is(K1) || is(K2);
- }
-
- bool isOneOf(tok::TokenKind K1, tok::TokenKind K2, tok::TokenKind K3) const {
- return is(K1) || is(K2) || is(K3);
- }
-
- bool isOneOf(
- tok::TokenKind K1, tok::TokenKind K2, tok::TokenKind K3,
- tok::TokenKind K4, tok::TokenKind K5 = tok::NUM_TOKENS,
- tok::TokenKind K6 = tok::NUM_TOKENS, tok::TokenKind K7 = tok::NUM_TOKENS,
- tok::TokenKind K8 = tok::NUM_TOKENS, tok::TokenKind K9 = tok::NUM_TOKENS,
- tok::TokenKind K10 = tok::NUM_TOKENS,
- tok::TokenKind K11 = tok::NUM_TOKENS,
- tok::TokenKind K12 = tok::NUM_TOKENS) const {
- return is(K1) || is(K2) || is(K3) || is(K4) || is(K5) || is(K6) || is(K7) ||
- is(K8) || is(K9) || is(K10) || is(K11) || is(K12);
- }
-
- bool isNot(tok::TokenKind Kind) const { return FormatTok.Tok.isNot(Kind); }
-
- bool isObjCAtKeyword(tok::ObjCKeywordKind Kind) const {
- return FormatTok.Tok.isObjCAtKeyword(Kind);
- }
-
- bool isAccessSpecifier(bool ColonRequired = true) const {
- return isOneOf(tok::kw_public, tok::kw_protected, tok::kw_private) &&
- (!ColonRequired ||
- (!Children.empty() && Children[0].is(tok::colon)));
- }
-
- bool isObjCAccessSpecifier() const {
- return is(tok::at) && !Children.empty() &&
- (Children[0].isObjCAtKeyword(tok::objc_public) ||
- Children[0].isObjCAtKeyword(tok::objc_protected) ||
- Children[0].isObjCAtKeyword(tok::objc_package) ||
- Children[0].isObjCAtKeyword(tok::objc_private));
- }
-
- /// \brief Returns whether \p Tok is ([{ or a template opening <.
- bool opensScope() const;
- /// \brief Returns whether \p Tok is )]} or a template opening >.
- bool closesScope() const;
-
- bool isUnaryOperator() const;
- bool isBinaryOperator() const;
- bool isTrailingComment() const;
-
- FormatToken FormatTok;
-
- TokenType Type;
-
- unsigned SpacesRequiredBefore;
- bool CanBreakBefore;
- bool MustBreakBefore;
-
- bool ClosesTemplateDeclaration;
-
- AnnotatedToken *MatchingParen;
-
- /// \brief Number of parameters, if this is "(", "[" or "<".
- ///
- /// This is initialized to 1 as we don't need to distinguish functions with
- /// 0 parameters from functions with 1 parameter. Thus, we can simply count
- /// the number of commas.
- unsigned ParameterCount;
-
- /// \brief The total length of the line up to and including this token.
- unsigned TotalLength;
-
- // FIXME: Come up with a 'cleaner' concept.
- /// \brief The binding strength of a token. This is a combined value of
- /// operator precedence, parenthesis nesting, etc.
- unsigned BindingStrength;
-
- /// \brief Penalty for inserting a line break before this token.
- unsigned SplitPenalty;
-
- /// \brief If this is the first ObjC selector name in an ObjC method
- /// definition or call, this contains the length of the longest name.
- unsigned LongestObjCSelectorName;
-
- std::vector<AnnotatedToken> Children;
- AnnotatedToken *Parent;
-
- /// \brief Stores the number of required fake parentheses and the
- /// corresponding operator precedence.
- ///
- /// If multiple fake parentheses start at a token, this vector stores them in
- /// reverse order, i.e. inner fake parenthesis first.
- SmallVector<prec::Level, 4> FakeLParens;
- /// \brief Insert this many fake ) after this token for correct indentation.
- unsigned FakeRParens;
-
- /// \brief Is this the last "." or "->" in a builder-type call?
- bool LastInChainOfCalls;
-
- /// \brief Is this token part of a \c DeclStmt defining multiple variables?
- ///
- /// Only set if \c Type == \c TT_StartOfName.
- bool PartOfMultiVariableDeclStmt;
-
- /// \brief Set to \c true for "("-tokens if this is the last token other than
- /// ")" in the next higher parenthesis level.
- ///
- /// If this is \c true, no more formatting decisions have to be made on the
- /// next higher parenthesis level, enabling optimizations.
- ///
- /// Example:
- /// \code
- /// aaaaaa(aaaaaa());
- /// ^ // Set to true for this parenthesis.
- /// \endcode
- bool NoMoreTokensOnLevel;
-
- /// \brief Returns the previous token ignoring comments.
- AnnotatedToken *getPreviousNoneComment() const;
-
- /// \brief Returns the next token ignoring comments.
- const AnnotatedToken *getNextNoneComment() const;
-};
-
class AnnotatedLine {
public:
AnnotatedLine(const UnwrappedLine &Line)
- : First(Line.Tokens.front()), Level(Line.Level),
+ : First(Line.Tokens.front().Tok), Level(Line.Level),
InPPDirective(Line.InPPDirective),
MustBeDeclaration(Line.MustBeDeclaration), MightBeFunctionDecl(false),
StartsDefinition(false) {
assert(!Line.Tokens.empty());
- AnnotatedToken *Current = &First;
- for (std::list<FormatToken>::const_iterator I = ++Line.Tokens.begin(),
- E = Line.Tokens.end();
+
+ // Calculate Next and Previous for all tokens. Note that we must overwrite
+ // Next and Previous for every token, as previous formatting runs might have
+ // left them in a different state.
+ First->Previous = NULL;
+ FormatToken *Current = First;
+ for (std::list<UnwrappedLineNode>::const_iterator I = ++Line.Tokens.begin(),
+ E = Line.Tokens.end();
I != E; ++I) {
- Current->Children.push_back(AnnotatedToken(*I));
- Current->Children[0].Parent = Current;
- Current = &Current->Children[0];
+ const UnwrappedLineNode &Node = *I;
+ Current->Next = I->Tok;
+ I->Tok->Previous = Current;
+ Current = Current->Next;
+ Current->Children.clear();
+ for (SmallVectorImpl<UnwrappedLine>::const_iterator
+ I = Node.Children.begin(),
+ E = Node.Children.end();
+ I != E; ++I) {
+ Children.push_back(new AnnotatedLine(*I));
+ Current->Children.push_back(Children.back());
+ }
}
Last = Current;
+ Last->Next = NULL;
}
- AnnotatedLine(const AnnotatedLine &Other)
- : First(Other.First), Type(Other.Type), Level(Other.Level),
- InPPDirective(Other.InPPDirective),
- MustBeDeclaration(Other.MustBeDeclaration),
- MightBeFunctionDecl(Other.MightBeFunctionDecl),
- StartsDefinition(Other.StartsDefinition) {
- Last = &First;
- while (!Last->Children.empty()) {
- Last->Children[0].Parent = Last;
- Last = &Last->Children[0];
+
+ ~AnnotatedLine() {
+ for (unsigned i = 0, e = Children.size(); i != e; ++i) {
+ delete Children[i];
}
}
- AnnotatedToken First;
- AnnotatedToken *Last;
+ FormatToken *First;
+ FormatToken *Last;
+
+ SmallVector<AnnotatedLine *, 0> Children;
LineType Type;
unsigned Level;
@@ -244,42 +86,47 @@ public:
bool MustBeDeclaration;
bool MightBeFunctionDecl;
bool StartsDefinition;
-};
-inline prec::Level getPrecedence(const AnnotatedToken &Tok) {
- return getBinOpPrecedence(Tok.FormatTok.Tok.getKind(), true, true);
-}
+private:
+ // Disallow copying.
+ AnnotatedLine(const AnnotatedLine &) LLVM_DELETED_FUNCTION;
+ void operator=(const AnnotatedLine &) LLVM_DELETED_FUNCTION;
+};
/// \brief Determines extra information about the tokens comprising an
/// \c UnwrappedLine.
class TokenAnnotator {
public:
- TokenAnnotator(const FormatStyle &Style, SourceManager &SourceMgr, Lexer &Lex,
- IdentifierInfo &Ident_in)
- : Style(Style), SourceMgr(SourceMgr), Lex(Lex), Ident_in(Ident_in) {
- }
+ TokenAnnotator(const FormatStyle &Style, IdentifierInfo &Ident_in)
+ : Style(Style), Ident_in(Ident_in) {}
+
+ /// \brief Adapts the indent levels of comment lines to the indent of the
+ /// subsequent line.
+ // FIXME: Can/should this be done in the UnwrappedLineParser?
+ void setCommentLineLevels(SmallVectorImpl<AnnotatedLine *> &Lines);
void annotate(AnnotatedLine &Line);
void calculateFormattingInformation(AnnotatedLine &Line);
private:
/// \brief Calculate the penalty for splitting before \c Tok.
- unsigned splitPenalty(const AnnotatedLine &Line, const AnnotatedToken &Tok);
+ unsigned splitPenalty(const AnnotatedLine &Line, const FormatToken &Tok,
+ bool InFunctionDecl);
+
+ bool spaceRequiredBetween(const AnnotatedLine &Line, const FormatToken &Left,
+ const FormatToken &Right);
- bool spaceRequiredBetween(const AnnotatedLine &Line,
- const AnnotatedToken &Left,
- const AnnotatedToken &Right);
+ bool spaceRequiredBefore(const AnnotatedLine &Line, const FormatToken &Tok);
- bool spaceRequiredBefore(const AnnotatedLine &Line,
- const AnnotatedToken &Tok);
+ bool mustBreakBefore(const AnnotatedLine &Line, const FormatToken &Right);
- bool canBreakBefore(const AnnotatedLine &Line, const AnnotatedToken &Right);
+ bool canBreakBefore(const AnnotatedLine &Line, const FormatToken &Right);
void printDebugInfo(const AnnotatedLine &Line);
+ void calculateUnbreakableTailLengths(AnnotatedLine &Line);
+
const FormatStyle &Style;
- SourceManager &SourceMgr;
- Lexer &Lex;
// Contextual keywords:
IdentifierInfo &Ident_in;
diff --git a/lib/Format/UnwrappedLineParser.cpp b/lib/Format/UnwrappedLineParser.cpp
index 722af5d2b763..e0b090f6abc9 100644
--- a/lib/Format/UnwrappedLineParser.cpp
+++ b/lib/Format/UnwrappedLineParser.cpp
@@ -16,12 +16,22 @@
#define DEBUG_TYPE "format-parser"
#include "UnwrappedLineParser.h"
-#include "clang/Basic/Diagnostic.h"
#include "llvm/Support/Debug.h"
namespace clang {
namespace format {
+class FormatTokenSource {
+public:
+ virtual ~FormatTokenSource() {}
+ virtual FormatToken *getNextToken() = 0;
+
+ virtual unsigned getPosition() = 0;
+ virtual FormatToken *setPosition(unsigned Position) = 0;
+};
+
+namespace {
+
class ScopedDeclarationState {
public:
ScopedDeclarationState(UnwrappedLine &Line, std::vector<bool> &Stack,
@@ -37,6 +47,7 @@ public:
else
Line.MustBeDeclaration = true;
}
+
private:
UnwrappedLine &Line;
std::vector<bool> &Stack;
@@ -45,11 +56,11 @@ private:
class ScopedMacroState : public FormatTokenSource {
public:
ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource,
- FormatToken &ResetToken, bool &StructuralError)
+ FormatToken *&ResetToken, bool &StructuralError)
: Line(Line), TokenSource(TokenSource), ResetToken(ResetToken),
PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource),
StructuralError(StructuralError),
- PreviousStructuralError(StructuralError) {
+ PreviousStructuralError(StructuralError), Token(NULL) {
TokenSource = this;
Line.Level = 0;
Line.InPPDirective = true;
@@ -63,44 +74,60 @@ public:
StructuralError = PreviousStructuralError;
}
- virtual FormatToken getNextToken() {
+ virtual FormatToken *getNextToken() {
// The \c UnwrappedLineParser guards against this by never calling
// \c getNextToken() after it has encountered the first eof token.
assert(!eof());
Token = PreviousTokenSource->getNextToken();
if (eof())
- return createEOF();
+ return getFakeEOF();
return Token;
}
-private:
- bool eof() { return Token.HasUnescapedNewline; }
+ virtual unsigned getPosition() { return PreviousTokenSource->getPosition(); }
- FormatToken createEOF() {
- FormatToken FormatTok;
- FormatTok.Tok.startToken();
- FormatTok.Tok.setKind(tok::eof);
- return FormatTok;
+ virtual FormatToken *setPosition(unsigned Position) {
+ Token = PreviousTokenSource->setPosition(Position);
+ return Token;
+ }
+
+private:
+ bool eof() { return Token && Token->HasUnescapedNewline; }
+
+ FormatToken *getFakeEOF() {
+ static bool EOFInitialized = false;
+ static FormatToken FormatTok;
+ if (!EOFInitialized) {
+ FormatTok.Tok.startToken();
+ FormatTok.Tok.setKind(tok::eof);
+ EOFInitialized = true;
+ }
+ return &FormatTok;
}
UnwrappedLine &Line;
FormatTokenSource *&TokenSource;
- FormatToken &ResetToken;
+ FormatToken *&ResetToken;
unsigned PreviousLineLevel;
FormatTokenSource *PreviousTokenSource;
bool &StructuralError;
bool PreviousStructuralError;
- FormatToken Token;
+ FormatToken *Token;
};
+} // end anonymous namespace
+
class ScopedLineState {
public:
ScopedLineState(UnwrappedLineParser &Parser,
bool SwitchToPreprocessorLines = false)
- : Parser(Parser), SwitchToPreprocessorLines(SwitchToPreprocessorLines) {
+ : Parser(Parser) {
+ OriginalLines = Parser.CurrentLines;
if (SwitchToPreprocessorLines)
Parser.CurrentLines = &Parser.PreprocessorDirectives;
+ else if (!Parser.Line->Tokens.empty())
+ Parser.CurrentLines = &Parser.Line->Tokens.back().Children;
PreBlockLine = Parser.Line.take();
Parser.Line.reset(new UnwrappedLine());
Parser.Line->Level = PreBlockLine->Level;
@@ -113,37 +140,102 @@ public:
}
assert(Parser.Line->Tokens.empty());
Parser.Line.reset(PreBlockLine);
- Parser.MustBreakBeforeNextToken = true;
- if (SwitchToPreprocessorLines)
- Parser.CurrentLines = &Parser.Lines;
+ if (Parser.CurrentLines == &Parser.PreprocessorDirectives)
+ Parser.MustBreakBeforeNextToken = true;
+ Parser.CurrentLines = OriginalLines;
}
private:
UnwrappedLineParser &Parser;
- const bool SwitchToPreprocessorLines;
UnwrappedLine *PreBlockLine;
+ SmallVectorImpl<UnwrappedLine> *OriginalLines;
};
-UnwrappedLineParser::UnwrappedLineParser(
- clang::DiagnosticsEngine &Diag, const FormatStyle &Style,
- FormatTokenSource &Tokens, UnwrappedLineConsumer &Callback)
+namespace {
+
+class IndexedTokenSource : public FormatTokenSource {
+public:
+ IndexedTokenSource(ArrayRef<FormatToken *> Tokens)
+ : Tokens(Tokens), Position(-1) {}
+
+ virtual FormatToken *getNextToken() {
+ ++Position;
+ return Tokens[Position];
+ }
+
+ virtual unsigned getPosition() {
+ assert(Position >= 0);
+ return Position;
+ }
+
+ virtual FormatToken *setPosition(unsigned P) {
+ Position = P;
+ return Tokens[Position];
+ }
+
+ void reset() { Position = -1; }
+
+private:
+ ArrayRef<FormatToken *> Tokens;
+ int Position;
+};
+
+} // end anonymous namespace
+
+UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style,
+ ArrayRef<FormatToken *> Tokens,
+ UnwrappedLineConsumer &Callback)
: Line(new UnwrappedLine), MustBreakBeforeNextToken(false),
- CurrentLines(&Lines), StructuralError(false), Diag(Diag), Style(Style),
- Tokens(&Tokens), Callback(Callback) {}
+ CurrentLines(&Lines), StructuralError(false), Style(Style), Tokens(NULL),
+ Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1) {}
+
+void UnwrappedLineParser::reset() {
+ PPBranchLevel = -1;
+ Line.reset(new UnwrappedLine);
+ CommentsBeforeNextToken.clear();
+ FormatTok = NULL;
+ MustBreakBeforeNextToken = false;
+ PreprocessorDirectives.clear();
+ CurrentLines = &Lines;
+ DeclarationScopeStack.clear();
+ StructuralError = false;
+ PPStack.clear();
+}
bool UnwrappedLineParser::parse() {
- DEBUG(llvm::dbgs() << "----\n");
- readToken();
- parseFile();
- for (std::vector<UnwrappedLine>::iterator I = Lines.begin(), E = Lines.end();
- I != E; ++I) {
- Callback.consumeUnwrappedLine(*I);
- }
+ IndexedTokenSource TokenSource(AllTokens);
+ do {
+ DEBUG(llvm::dbgs() << "----\n");
+ reset();
+ Tokens = &TokenSource;
+ TokenSource.reset();
+
+ readToken();
+ parseFile();
+ // Create line with eof token.
+ pushToken(FormatTok);
+ addUnwrappedLine();
+
+ for (SmallVectorImpl<UnwrappedLine>::iterator I = Lines.begin(),
+ E = Lines.end();
+ I != E; ++I) {
+ Callback.consumeUnwrappedLine(*I);
+ }
+ Callback.finishRun();
+ Lines.clear();
+ while (!PPLevelBranchIndex.empty() &&
+ PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) {
+ PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1);
+ PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1);
+ }
+ if (!PPLevelBranchIndex.empty()) {
+ ++PPLevelBranchIndex.back();
+ assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size());
+ assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back());
+ }
+ } while (!PPLevelBranchIndex.empty());
- // Create line with eof token.
- pushToken(FormatTok);
- Callback.consumeUnwrappedLine(*Line);
return StructuralError;
}
@@ -151,15 +243,16 @@ void UnwrappedLineParser::parseFile() {
ScopedDeclarationState DeclarationState(
*Line, DeclarationScopeStack,
/*MustBeDeclaration=*/ !Line->InPPDirective);
- parseLevel(/*HasOpeningBrace=*/ false);
+ parseLevel(/*HasOpeningBrace=*/false);
// Make sure to format the remaining tokens.
flushComments(true);
addUnwrappedLine();
}
void UnwrappedLineParser::parseLevel(bool HasOpeningBrace) {
+ bool SwitchLabelEncountered = false;
do {
- switch (FormatTok.Tok.getKind()) {
+ switch (FormatTok->Tok.getKind()) {
case tok::comment:
nextToken();
addUnwrappedLine();
@@ -167,19 +260,24 @@ void UnwrappedLineParser::parseLevel(bool HasOpeningBrace) {
case tok::l_brace:
// FIXME: Add parameter whether this can happen - if this happens, we must
// be in a non-declaration context.
- parseBlock(/*MustBeDeclaration=*/ false);
+ parseBlock(/*MustBeDeclaration=*/false);
addUnwrappedLine();
break;
case tok::r_brace:
if (HasOpeningBrace)
return;
- Diag.Report(FormatTok.Tok.getLocation(),
- Diag.getCustomDiagID(clang::DiagnosticsEngine::Error,
- "unexpected '}'"));
StructuralError = true;
nextToken();
addUnwrappedLine();
break;
+ case tok::kw_default:
+ case tok::kw_case:
+ if (!SwitchLabelEncountered &&
+ (Style.IndentCaseLabels || (Line->InPPDirective && Line->Level == 1)))
+ ++Line->Level;
+ SwitchLabelEncountered = true;
+ parseStructuralElement();
+ break;
default:
parseStructuralElement();
break;
@@ -187,41 +285,150 @@ void UnwrappedLineParser::parseLevel(bool HasOpeningBrace) {
} while (!eof());
}
-void UnwrappedLineParser::parseBlock(bool MustBeDeclaration,
- unsigned AddLevels) {
- assert(FormatTok.Tok.is(tok::l_brace) && "'{' expected");
+void UnwrappedLineParser::calculateBraceTypes() {
+ // We'll parse forward through the tokens until we hit
+ // a closing brace or eof - note that getNextToken() will
+ // parse macros, so this will magically work inside macro
+ // definitions, too.
+ unsigned StoredPosition = Tokens->getPosition();
+ unsigned Position = StoredPosition;
+ FormatToken *Tok = FormatTok;
+ // Keep a stack of positions of lbrace tokens. We will
+ // update information about whether an lbrace starts a
+ // braced init list or a different block during the loop.
+ SmallVector<FormatToken *, 8> LBraceStack;
+ assert(Tok->Tok.is(tok::l_brace));
+ do {
+ // Get next none-comment token.
+ FormatToken *NextTok;
+ unsigned ReadTokens = 0;
+ do {
+ NextTok = Tokens->getNextToken();
+ ++ReadTokens;
+ } while (NextTok->is(tok::comment));
+
+ switch (Tok->Tok.getKind()) {
+ case tok::l_brace:
+ LBraceStack.push_back(Tok);
+ break;
+ case tok::r_brace:
+ if (!LBraceStack.empty()) {
+ if (LBraceStack.back()->BlockKind == BK_Unknown) {
+ // If there is a comma, semicolon or right paren after the closing
+ // brace, we assume this is a braced initializer list. Note that
+ // regardless how we mark inner braces here, we will overwrite the
+ // BlockKind later if we parse a braced list (where all blocks inside
+ // are by default braced lists), or when we explicitly detect blocks
+ // (for example while parsing lambdas).
+ //
+ // We exclude + and - as they can be ObjC visibility modifiers.
+ if (NextTok->isOneOf(tok::comma, tok::semi, tok::r_paren,
+ tok::r_square, tok::l_brace, tok::colon) ||
+ (NextTok->isBinaryOperator() &&
+ !NextTok->isOneOf(tok::plus, tok::minus))) {
+ Tok->BlockKind = BK_BracedInit;
+ LBraceStack.back()->BlockKind = BK_BracedInit;
+ } else {
+ Tok->BlockKind = BK_Block;
+ LBraceStack.back()->BlockKind = BK_Block;
+ }
+ }
+ LBraceStack.pop_back();
+ }
+ break;
+ case tok::semi:
+ case tok::kw_if:
+ case tok::kw_while:
+ case tok::kw_for:
+ case tok::kw_switch:
+ case tok::kw_try:
+ if (!LBraceStack.empty())
+ LBraceStack.back()->BlockKind = BK_Block;
+ break;
+ default:
+ break;
+ }
+ Tok = NextTok;
+ Position += ReadTokens;
+ } while (Tok->Tok.isNot(tok::eof) && !LBraceStack.empty());
+ // Assume other blocks for all unclosed opening braces.
+ for (unsigned i = 0, e = LBraceStack.size(); i != e; ++i) {
+ if (LBraceStack[i]->BlockKind == BK_Unknown)
+ LBraceStack[i]->BlockKind = BK_Block;
+ }
+
+ FormatTok = Tokens->setPosition(StoredPosition);
+}
+
+void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, bool AddLevel,
+ bool MunchSemi) {
+ assert(FormatTok->Tok.is(tok::l_brace) && "'{' expected");
+ unsigned InitialLevel = Line->Level;
nextToken();
addUnwrappedLine();
ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
MustBeDeclaration);
- Line->Level += AddLevels;
- parseLevel(/*HasOpeningBrace=*/ true);
+ if (AddLevel)
+ ++Line->Level;
+ parseLevel(/*HasOpeningBrace=*/true);
- if (!FormatTok.Tok.is(tok::r_brace)) {
- Line->Level -= AddLevels;
+ if (!FormatTok->Tok.is(tok::r_brace)) {
+ Line->Level = InitialLevel;
StructuralError = true;
return;
}
nextToken(); // Munch the closing brace.
- Line->Level -= AddLevels;
+ if (MunchSemi && FormatTok->Tok.is(tok::semi))
+ nextToken();
+ Line->Level = InitialLevel;
+}
+
+void UnwrappedLineParser::parseChildBlock() {
+ FormatTok->BlockKind = BK_Block;
+ nextToken();
+ {
+ ScopedLineState LineState(*this);
+ ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
+ /*MustBeDeclaration=*/false);
+ Line->Level += 1;
+ parseLevel(/*HasOpeningBrace=*/true);
+ Line->Level -= 1;
+ }
+ nextToken();
}
void UnwrappedLineParser::parsePPDirective() {
- assert(FormatTok.Tok.is(tok::hash) && "'#' expected");
+ assert(FormatTok->Tok.is(tok::hash) && "'#' expected");
ScopedMacroState MacroState(*Line, Tokens, FormatTok, StructuralError);
nextToken();
- if (FormatTok.Tok.getIdentifierInfo() == NULL) {
+ if (FormatTok->Tok.getIdentifierInfo() == NULL) {
parsePPUnknown();
return;
}
- switch (FormatTok.Tok.getIdentifierInfo()->getPPKeywordID()) {
+ switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) {
case tok::pp_define:
parsePPDefine();
+ return;
+ case tok::pp_if:
+ parsePPIf(/*IfDef=*/false);
+ break;
+ case tok::pp_ifdef:
+ case tok::pp_ifndef:
+ parsePPIf(/*IfDef=*/true);
+ break;
+ case tok::pp_else:
+ parsePPElse();
+ break;
+ case tok::pp_elif:
+ parsePPElIf();
+ break;
+ case tok::pp_endif:
+ parsePPEndIf();
break;
default:
parsePPUnknown();
@@ -229,16 +436,77 @@ void UnwrappedLineParser::parsePPDirective() {
}
}
+void UnwrappedLineParser::pushPPConditional() {
+ if (!PPStack.empty() && PPStack.back() == PP_Unreachable)
+ PPStack.push_back(PP_Unreachable);
+ else
+ PPStack.push_back(PP_Conditional);
+}
+
+void UnwrappedLineParser::parsePPIf(bool IfDef) {
+ ++PPBranchLevel;
+ assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size());
+ if (PPBranchLevel == (int)PPLevelBranchIndex.size()) {
+ PPLevelBranchIndex.push_back(0);
+ PPLevelBranchCount.push_back(0);
+ }
+ PPChainBranchIndex.push(0);
+ nextToken();
+ bool IsLiteralFalse = (FormatTok->Tok.isLiteral() &&
+ StringRef(FormatTok->Tok.getLiteralData(),
+ FormatTok->Tok.getLength()) == "0") ||
+ FormatTok->Tok.is(tok::kw_false);
+ if ((!IfDef && IsLiteralFalse) || PPLevelBranchIndex[PPBranchLevel] > 0) {
+ PPStack.push_back(PP_Unreachable);
+ } else {
+ pushPPConditional();
+ }
+ parsePPUnknown();
+}
+
+void UnwrappedLineParser::parsePPElse() {
+ if (!PPStack.empty())
+ PPStack.pop_back();
+ assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
+ if (!PPChainBranchIndex.empty())
+ ++PPChainBranchIndex.top();
+ if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty() &&
+ PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top()) {
+ PPStack.push_back(PP_Unreachable);
+ } else {
+ pushPPConditional();
+ }
+ parsePPUnknown();
+}
+
+void UnwrappedLineParser::parsePPElIf() { parsePPElse(); }
+
+void UnwrappedLineParser::parsePPEndIf() {
+ assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
+ if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) {
+ if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel]) {
+ PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1;
+ }
+ }
+ --PPBranchLevel;
+ if (!PPChainBranchIndex.empty())
+ PPChainBranchIndex.pop();
+ if (!PPStack.empty())
+ PPStack.pop_back();
+ parsePPUnknown();
+}
+
void UnwrappedLineParser::parsePPDefine() {
nextToken();
- if (FormatTok.Tok.getKind() != tok::identifier) {
+ if (FormatTok->Tok.getKind() != tok::identifier) {
parsePPUnknown();
return;
}
nextToken();
- if (FormatTok.Tok.getKind() == tok::l_paren &&
- FormatTok.WhiteSpaceLength == 0) {
+ if (FormatTok->Tok.getKind() == tok::l_paren &&
+ FormatTok->WhitespaceRange.getBegin() ==
+ FormatTok->WhitespaceRange.getEnd()) {
parseParens();
}
addUnwrappedLine();
@@ -287,15 +555,15 @@ bool tokenCanStartNewLine(clang::Token Tok) {
}
void UnwrappedLineParser::parseStructuralElement() {
- assert(!FormatTok.Tok.is(tok::l_brace));
- switch (FormatTok.Tok.getKind()) {
+ assert(!FormatTok->Tok.is(tok::l_brace));
+ switch (FormatTok->Tok.getKind()) {
case tok::at:
nextToken();
- if (FormatTok.Tok.is(tok::l_brace)) {
+ if (FormatTok->Tok.is(tok::l_brace)) {
parseBracedList();
break;
}
- switch (FormatTok.Tok.getObjCKeywordID()) {
+ switch (FormatTok->Tok.getObjCKeywordID()) {
case tok::objc_public:
case tok::objc_protected:
case tok::objc_package:
@@ -322,7 +590,7 @@ void UnwrappedLineParser::parseStructuralElement() {
return;
case tok::kw_inline:
nextToken();
- if (FormatTok.Tok.is(tok::kw_namespace)) {
+ if (FormatTok->Tok.is(tok::kw_namespace)) {
parseNamespace();
return;
}
@@ -357,10 +625,10 @@ void UnwrappedLineParser::parseStructuralElement() {
return;
case tok::kw_extern:
nextToken();
- if (FormatTok.Tok.is(tok::string_literal)) {
+ if (FormatTok->Tok.is(tok::string_literal)) {
nextToken();
- if (FormatTok.Tok.is(tok::l_brace)) {
- parseBlock(/*MustBeDeclaration=*/ true, 0);
+ if (FormatTok->Tok.is(tok::l_brace)) {
+ parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/false);
addUnwrappedLine();
return;
}
@@ -371,10 +639,10 @@ void UnwrappedLineParser::parseStructuralElement() {
break;
}
do {
- switch (FormatTok.Tok.getKind()) {
+ switch (FormatTok->Tok.getKind()) {
case tok::at:
nextToken();
- if (FormatTok.Tok.is(tok::l_brace))
+ if (FormatTok->Tok.is(tok::l_brace))
parseBracedList();
break;
case tok::kw_enum:
@@ -397,38 +665,63 @@ void UnwrappedLineParser::parseStructuralElement() {
case tok::l_paren:
parseParens();
break;
+ case tok::caret:
+ nextToken();
+ if (FormatTok->is(tok::l_brace)) {
+ parseChildBlock();
+ }
+ break;
case tok::l_brace:
- // A block outside of parentheses must be the last part of a
- // structural element.
- // FIXME: Figure out cases where this is not true, and add projections for
- // them (the one we know is missing are lambdas).
- parseBlock(/*MustBeDeclaration=*/ false);
- addUnwrappedLine();
- return;
- case tok::identifier:
+ if (!tryToParseBracedList()) {
+ // A block outside of parentheses must be the last part of a
+ // structural element.
+ // FIXME: Figure out cases where this is not true, and add projections
+ // for them (the one we know is missing are lambdas).
+ if (Style.BreakBeforeBraces == FormatStyle::BS_Linux ||
+ Style.BreakBeforeBraces == FormatStyle::BS_Stroustrup ||
+ Style.BreakBeforeBraces == FormatStyle::BS_Allman)
+ addUnwrappedLine();
+ parseBlock(/*MustBeDeclaration=*/false);
+ addUnwrappedLine();
+ return;
+ }
+ // Otherwise this was a braced init list, and the structural
+ // element continues.
+ break;
+ case tok::identifier: {
+ StringRef Text = FormatTok->TokenText;
nextToken();
if (Line->Tokens.size() == 1) {
- if (FormatTok.Tok.is(tok::colon)) {
+ if (FormatTok->Tok.is(tok::colon)) {
parseLabel();
return;
}
// Recognize function-like macro usages without trailing semicolon.
- if (FormatTok.Tok.is(tok::l_paren)) {
+ if (FormatTok->Tok.is(tok::l_paren)) {
parseParens();
- if (FormatTok.HasUnescapedNewline &&
- tokenCanStartNewLine(FormatTok.Tok)) {
+ if (FormatTok->HasUnescapedNewline &&
+ tokenCanStartNewLine(FormatTok->Tok)) {
addUnwrappedLine();
return;
}
+ } else if (FormatTok->HasUnescapedNewline && Text.size() >= 5 &&
+ Text == Text.upper()) {
+ // Recognize free-standing macros like Q_OBJECT.
+ addUnwrappedLine();
+ return;
}
}
break;
+ }
case tok::equal:
nextToken();
- if (FormatTok.Tok.is(tok::l_brace)) {
+ if (FormatTok->Tok.is(tok::l_brace)) {
parseBracedList();
}
break;
+ case tok::l_square:
+ tryToParseLambda();
+ break;
default:
nextToken();
break;
@@ -436,52 +729,146 @@ void UnwrappedLineParser::parseStructuralElement() {
} while (!eof());
}
-void UnwrappedLineParser::parseBracedList() {
+void UnwrappedLineParser::tryToParseLambda() {
+ // FIXME: This is a dirty way to access the previous token. Find a better
+ // solution.
+ if (!Line->Tokens.empty() &&
+ Line->Tokens.back().Tok->isOneOf(tok::identifier, tok::kw_operator)) {
+ nextToken();
+ return;
+ }
+ assert(FormatTok->is(tok::l_square));
+ FormatToken &LSquare = *FormatTok;
+ if (!tryToParseLambdaIntroducer())
+ return;
+
+ while (FormatTok->isNot(tok::l_brace)) {
+ switch (FormatTok->Tok.getKind()) {
+ case tok::l_brace:
+ break;
+ case tok::l_paren:
+ parseParens();
+ break;
+ case tok::identifier:
+ case tok::kw_mutable:
+ nextToken();
+ break;
+ default:
+ return;
+ }
+ }
+ LSquare.Type = TT_LambdaLSquare;
+ parseChildBlock();
+}
+
+bool UnwrappedLineParser::tryToParseLambdaIntroducer() {
+ nextToken();
+ if (FormatTok->is(tok::equal)) {
+ nextToken();
+ if (FormatTok->is(tok::r_square)) {
+ nextToken();
+ return true;
+ }
+ if (FormatTok->isNot(tok::comma))
+ return false;
+ nextToken();
+ } else if (FormatTok->is(tok::amp)) {
+ nextToken();
+ if (FormatTok->is(tok::r_square)) {
+ nextToken();
+ return true;
+ }
+ if (!FormatTok->isOneOf(tok::comma, tok::identifier)) {
+ return false;
+ }
+ if (FormatTok->is(tok::comma))
+ nextToken();
+ } else if (FormatTok->is(tok::r_square)) {
+ nextToken();
+ return true;
+ }
+ do {
+ if (FormatTok->is(tok::amp))
+ nextToken();
+ if (!FormatTok->isOneOf(tok::identifier, tok::kw_this))
+ return false;
+ nextToken();
+ if (FormatTok->is(tok::comma)) {
+ nextToken();
+ } else if (FormatTok->is(tok::r_square)) {
+ nextToken();
+ return true;
+ } else {
+ return false;
+ }
+ } while (!eof());
+ return false;
+}
+
+bool UnwrappedLineParser::tryToParseBracedList() {
+ if (FormatTok->BlockKind == BK_Unknown)
+ calculateBraceTypes();
+ assert(FormatTok->BlockKind != BK_Unknown);
+ if (FormatTok->BlockKind == BK_Block)
+ return false;
+ parseBracedList();
+ return true;
+}
+
+bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons) {
+ bool HasError = false;
nextToken();
// FIXME: Once we have an expression parser in the UnwrappedLineParser,
// replace this by using parseAssigmentExpression() inside.
- bool StartOfExpression = true;
do {
// FIXME: When we start to support lambdas, we'll want to parse them away
// here, otherwise our bail-out scenarios below break. The better solution
// might be to just implement a more or less complete expression parser.
- switch (FormatTok.Tok.getKind()) {
- case tok::l_brace:
- if (!StartOfExpression) {
- // Probably a missing closing brace. Bail out.
- addUnwrappedLine();
- return;
+ switch (FormatTok->Tok.getKind()) {
+ case tok::caret:
+ nextToken();
+ if (FormatTok->is(tok::l_brace)) {
+ parseChildBlock();
}
+ break;
+ case tok::l_square:
+ tryToParseLambda();
+ break;
+ case tok::l_brace:
+ // Assume there are no blocks inside a braced init list apart
+ // from the ones we explicitly parse out (like lambdas).
+ FormatTok->BlockKind = BK_BracedInit;
parseBracedList();
- StartOfExpression = false;
break;
case tok::r_brace:
nextToken();
- return;
+ return !HasError;
case tok::semi:
- // Probably a missing closing brace. Bail out.
- return;
+ HasError = true;
+ if (!ContinueOnSemicolons)
+ return !HasError;
+ nextToken();
+ break;
case tok::comma:
nextToken();
- StartOfExpression = true;
break;
default:
nextToken();
- StartOfExpression = false;
break;
}
} while (!eof());
+ return false;
}
void UnwrappedLineParser::parseReturn() {
nextToken();
do {
- switch (FormatTok.Tok.getKind()) {
+ switch (FormatTok->Tok.getKind()) {
case tok::l_brace:
parseBracedList();
- if (FormatTok.Tok.isNot(tok::semi)) {
+ if (FormatTok->Tok.isNot(tok::semi)) {
// Assume missing ';'.
addUnwrappedLine();
return;
@@ -498,6 +885,9 @@ void UnwrappedLineParser::parseReturn() {
nextToken();
addUnwrappedLine();
return;
+ case tok::l_square:
+ tryToParseLambda();
+ break;
default:
nextToken();
break;
@@ -506,29 +896,31 @@ void UnwrappedLineParser::parseReturn() {
}
void UnwrappedLineParser::parseParens() {
- assert(FormatTok.Tok.is(tok::l_paren) && "'(' expected.");
+ assert(FormatTok->Tok.is(tok::l_paren) && "'(' expected.");
nextToken();
do {
- switch (FormatTok.Tok.getKind()) {
+ switch (FormatTok->Tok.getKind()) {
case tok::l_paren:
parseParens();
break;
case tok::r_paren:
nextToken();
return;
+ case tok::r_brace:
+ // A "}" inside parenthesis is an error if there wasn't a matching "{".
+ return;
+ case tok::l_square:
+ tryToParseLambda();
+ break;
case tok::l_brace: {
- nextToken();
- ScopedLineState LineState(*this);
- ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
- /*MustBeDeclaration=*/ false);
- Line->Level += 1;
- parseLevel(/*HasOpeningBrace=*/ true);
- Line->Level -= 1;
+ if (!tryToParseBracedList()) {
+ parseChildBlock();
+ }
break;
}
case tok::at:
nextToken();
- if (FormatTok.Tok.is(tok::l_brace))
+ if (FormatTok->Tok.is(tok::l_brace))
parseBracedList();
break;
default:
@@ -539,26 +931,33 @@ void UnwrappedLineParser::parseParens() {
}
void UnwrappedLineParser::parseIfThenElse() {
- assert(FormatTok.Tok.is(tok::kw_if) && "'if' expected");
+ assert(FormatTok->Tok.is(tok::kw_if) && "'if' expected");
nextToken();
- if (FormatTok.Tok.is(tok::l_paren))
+ if (FormatTok->Tok.is(tok::l_paren))
parseParens();
bool NeedsUnwrappedLine = false;
- if (FormatTok.Tok.is(tok::l_brace)) {
- parseBlock(/*MustBeDeclaration=*/ false);
- NeedsUnwrappedLine = true;
+ if (FormatTok->Tok.is(tok::l_brace)) {
+ if (Style.BreakBeforeBraces == FormatStyle::BS_Allman)
+ addUnwrappedLine();
+ parseBlock(/*MustBeDeclaration=*/false);
+ if (Style.BreakBeforeBraces == FormatStyle::BS_Allman)
+ addUnwrappedLine();
+ else
+ NeedsUnwrappedLine = true;
} else {
addUnwrappedLine();
++Line->Level;
parseStructuralElement();
--Line->Level;
}
- if (FormatTok.Tok.is(tok::kw_else)) {
+ if (FormatTok->Tok.is(tok::kw_else)) {
nextToken();
- if (FormatTok.Tok.is(tok::l_brace)) {
- parseBlock(/*MustBeDeclaration=*/ false);
+ if (FormatTok->Tok.is(tok::l_brace)) {
+ if (Style.BreakBeforeBraces == FormatStyle::BS_Allman)
+ addUnwrappedLine();
+ parseBlock(/*MustBeDeclaration=*/false);
addUnwrappedLine();
- } else if (FormatTok.Tok.is(tok::kw_if)) {
+ } else if (FormatTok->Tok.is(tok::kw_if)) {
parseIfThenElse();
} else {
addUnwrappedLine();
@@ -572,15 +971,22 @@ void UnwrappedLineParser::parseIfThenElse() {
}
void UnwrappedLineParser::parseNamespace() {
- assert(FormatTok.Tok.is(tok::kw_namespace) && "'namespace' expected");
+ assert(FormatTok->Tok.is(tok::kw_namespace) && "'namespace' expected");
nextToken();
- if (FormatTok.Tok.is(tok::identifier))
+ if (FormatTok->Tok.is(tok::identifier))
nextToken();
- if (FormatTok.Tok.is(tok::l_brace)) {
- parseBlock(/*MustBeDeclaration=*/ true, 0);
+ if (FormatTok->Tok.is(tok::l_brace)) {
+ if (Style.BreakBeforeBraces == FormatStyle::BS_Linux ||
+ Style.BreakBeforeBraces == FormatStyle::BS_Allman)
+ addUnwrappedLine();
+
+ bool AddLevel = Style.NamespaceIndentation == FormatStyle::NI_All ||
+ (Style.NamespaceIndentation == FormatStyle::NI_Inner &&
+ DeclarationScopeStack.size() > 1);
+ parseBlock(/*MustBeDeclaration=*/true, AddLevel);
// Munch the semicolon after a namespace. This is more common than one would
// think. Puttin the semicolon into its own line is very ugly.
- if (FormatTok.Tok.is(tok::semi))
+ if (FormatTok->Tok.is(tok::semi))
nextToken();
addUnwrappedLine();
}
@@ -588,13 +994,15 @@ void UnwrappedLineParser::parseNamespace() {
}
void UnwrappedLineParser::parseForOrWhileLoop() {
- assert((FormatTok.Tok.is(tok::kw_for) || FormatTok.Tok.is(tok::kw_while)) &&
+ assert((FormatTok->Tok.is(tok::kw_for) || FormatTok->Tok.is(tok::kw_while)) &&
"'for' or 'while' expected");
nextToken();
- if (FormatTok.Tok.is(tok::l_paren))
+ if (FormatTok->Tok.is(tok::l_paren))
parseParens();
- if (FormatTok.Tok.is(tok::l_brace)) {
- parseBlock(/*MustBeDeclaration=*/ false);
+ if (FormatTok->Tok.is(tok::l_brace)) {
+ if (Style.BreakBeforeBraces == FormatStyle::BS_Allman)
+ addUnwrappedLine();
+ parseBlock(/*MustBeDeclaration=*/false);
addUnwrappedLine();
} else {
addUnwrappedLine();
@@ -605,10 +1013,12 @@ void UnwrappedLineParser::parseForOrWhileLoop() {
}
void UnwrappedLineParser::parseDoWhile() {
- assert(FormatTok.Tok.is(tok::kw_do) && "'do' expected");
+ assert(FormatTok->Tok.is(tok::kw_do) && "'do' expected");
nextToken();
- if (FormatTok.Tok.is(tok::l_brace)) {
- parseBlock(/*MustBeDeclaration=*/ false);
+ if (FormatTok->Tok.is(tok::l_brace)) {
+ if (Style.BreakBeforeBraces == FormatStyle::BS_Allman)
+ addUnwrappedLine();
+ parseBlock(/*MustBeDeclaration=*/false);
} else {
addUnwrappedLine();
++Line->Level;
@@ -617,7 +1027,7 @@ void UnwrappedLineParser::parseDoWhile() {
}
// FIXME: Add error handling.
- if (!FormatTok.Tok.is(tok::kw_while)) {
+ if (!FormatTok->Tok.is(tok::kw_while)) {
addUnwrappedLine();
return;
}
@@ -627,90 +1037,84 @@ void UnwrappedLineParser::parseDoWhile() {
}
void UnwrappedLineParser::parseLabel() {
- if (FormatTok.Tok.isNot(tok::colon))
- return;
nextToken();
unsigned OldLineLevel = Line->Level;
if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0))
--Line->Level;
- if (CommentsBeforeNextToken.empty() && FormatTok.Tok.is(tok::l_brace)) {
- parseBlock(/*MustBeDeclaration=*/ false);
- if (FormatTok.Tok.is(tok::kw_break))
- parseStructuralElement(); // "break;" after "}" goes on the same line.
+ if (CommentsBeforeNextToken.empty() && FormatTok->Tok.is(tok::l_brace)) {
+ if (Style.BreakBeforeBraces == FormatStyle::BS_Allman)
+ addUnwrappedLine();
+ parseBlock(/*MustBeDeclaration=*/false);
+ if (FormatTok->Tok.is(tok::kw_break)) {
+ // "break;" after "}" on its own line only for BS_Allman
+ if (Style.BreakBeforeBraces == FormatStyle::BS_Allman)
+ addUnwrappedLine();
+ parseStructuralElement();
+ }
}
addUnwrappedLine();
Line->Level = OldLineLevel;
}
void UnwrappedLineParser::parseCaseLabel() {
- assert(FormatTok.Tok.is(tok::kw_case) && "'case' expected");
+ assert(FormatTok->Tok.is(tok::kw_case) && "'case' expected");
// FIXME: fix handling of complex expressions here.
do {
nextToken();
- } while (!eof() && !FormatTok.Tok.is(tok::colon));
+ } while (!eof() && !FormatTok->Tok.is(tok::colon));
parseLabel();
}
void UnwrappedLineParser::parseSwitch() {
- assert(FormatTok.Tok.is(tok::kw_switch) && "'switch' expected");
+ assert(FormatTok->Tok.is(tok::kw_switch) && "'switch' expected");
nextToken();
- if (FormatTok.Tok.is(tok::l_paren))
+ if (FormatTok->Tok.is(tok::l_paren))
parseParens();
- if (FormatTok.Tok.is(tok::l_brace)) {
- parseBlock(/*MustBeDeclaration=*/ false, Style.IndentCaseLabels ? 2 : 1);
+ if (FormatTok->Tok.is(tok::l_brace)) {
+ if (Style.BreakBeforeBraces == FormatStyle::BS_Allman)
+ addUnwrappedLine();
+ parseBlock(/*MustBeDeclaration=*/false);
addUnwrappedLine();
} else {
addUnwrappedLine();
- Line->Level += (Style.IndentCaseLabels ? 2 : 1);
+ ++Line->Level;
parseStructuralElement();
- Line->Level -= (Style.IndentCaseLabels ? 2 : 1);
+ --Line->Level;
}
}
void UnwrappedLineParser::parseAccessSpecifier() {
nextToken();
// Otherwise, we don't know what it is, and we'd better keep the next token.
- if (FormatTok.Tok.is(tok::colon))
+ if (FormatTok->Tok.is(tok::colon))
nextToken();
addUnwrappedLine();
}
void UnwrappedLineParser::parseEnum() {
nextToken();
- if (FormatTok.Tok.is(tok::identifier) ||
- FormatTok.Tok.is(tok::kw___attribute) ||
- FormatTok.Tok.is(tok::kw___declspec)) {
+ // Eat up enum class ...
+ if (FormatTok->Tok.is(tok::kw_class) ||
+ FormatTok->Tok.is(tok::kw_struct))
+ nextToken();
+ while (FormatTok->Tok.getIdentifierInfo() ||
+ FormatTok->isOneOf(tok::colon, tok::coloncolon)) {
nextToken();
// We can have macros or attributes in between 'enum' and the enum name.
- if (FormatTok.Tok.is(tok::l_paren)) {
+ if (FormatTok->Tok.is(tok::l_paren)) {
parseParens();
}
- if (FormatTok.Tok.is(tok::identifier))
+ if (FormatTok->Tok.is(tok::identifier))
nextToken();
}
- if (FormatTok.Tok.is(tok::l_brace)) {
- nextToken();
- addUnwrappedLine();
- ++Line->Level;
- do {
- switch (FormatTok.Tok.getKind()) {
- case tok::l_paren:
- parseParens();
- break;
- case tok::r_brace:
- addUnwrappedLine();
- nextToken();
- --Line->Level;
- return;
- case tok::comma:
+ if (FormatTok->Tok.is(tok::l_brace)) {
+ FormatTok->BlockKind = BK_Block;
+ bool HasError = !parseBracedList(/*ContinueOnSemicolons=*/true);
+ if (HasError) {
+ if (FormatTok->is(tok::semi))
nextToken();
- addUnwrappedLine();
- break;
- default:
- nextToken();
- break;
- }
- } while (!eof());
+ addUnwrappedLine();
+ }
}
// We fall through to parsing a structural element afterwards, so that in
// enum A {} n, m;
@@ -719,18 +1123,20 @@ void UnwrappedLineParser::parseEnum() {
void UnwrappedLineParser::parseRecord() {
nextToken();
- if (FormatTok.Tok.is(tok::identifier) ||
- FormatTok.Tok.is(tok::kw___attribute) ||
- FormatTok.Tok.is(tok::kw___declspec)) {
+ if (FormatTok->Tok.is(tok::identifier) ||
+ FormatTok->Tok.is(tok::kw___attribute) ||
+ FormatTok->Tok.is(tok::kw___declspec) ||
+ FormatTok->Tok.is(tok::kw_alignas)) {
nextToken();
// We can have macros or attributes in between 'class' and the class name.
- if (FormatTok.Tok.is(tok::l_paren)) {
+ if (FormatTok->Tok.is(tok::l_paren)) {
parseParens();
}
// The actual identifier can be a nested name specifier, and in macros
// it is often token-pasted.
- while (FormatTok.Tok.is(tok::identifier) ||
- FormatTok.Tok.is(tok::coloncolon) || FormatTok.Tok.is(tok::hashhash))
+ while (FormatTok->Tok.is(tok::identifier) ||
+ FormatTok->Tok.is(tok::coloncolon) ||
+ FormatTok->Tok.is(tok::hashhash))
nextToken();
// Note that parsing away template declarations here leads to incorrectly
@@ -743,37 +1149,49 @@ void UnwrappedLineParser::parseRecord() {
// and thus rule out the record production in case there is no template
// (this would still leave us with an ambiguity between template function
// and class declarations).
- if (FormatTok.Tok.is(tok::colon) || FormatTok.Tok.is(tok::less)) {
- while (!eof() && FormatTok.Tok.isNot(tok::l_brace)) {
- if (FormatTok.Tok.is(tok::semi))
+ if (FormatTok->Tok.is(tok::colon) || FormatTok->Tok.is(tok::less)) {
+ while (!eof() && FormatTok->Tok.isNot(tok::l_brace)) {
+ if (FormatTok->Tok.is(tok::semi))
return;
nextToken();
}
}
}
- if (FormatTok.Tok.is(tok::l_brace))
- parseBlock(/*MustBeDeclaration=*/ true);
+ if (FormatTok->Tok.is(tok::l_brace)) {
+ if (Style.BreakBeforeBraces == FormatStyle::BS_Linux ||
+ Style.BreakBeforeBraces == FormatStyle::BS_Allman)
+ addUnwrappedLine();
+
+ parseBlock(/*MustBeDeclaration=*/true, /*Addlevel=*/true,
+ /*MunchSemi=*/false);
+ }
// We fall through to parsing a structural element afterwards, so
// class A {} n, m;
// will end up in one unwrapped line.
}
void UnwrappedLineParser::parseObjCProtocolList() {
- assert(FormatTok.Tok.is(tok::less) && "'<' expected.");
+ assert(FormatTok->Tok.is(tok::less) && "'<' expected.");
do
nextToken();
- while (!eof() && FormatTok.Tok.isNot(tok::greater));
+ while (!eof() && FormatTok->Tok.isNot(tok::greater));
nextToken(); // Skip '>'.
}
void UnwrappedLineParser::parseObjCUntilAtEnd() {
do {
- if (FormatTok.Tok.isObjCAtKeyword(tok::objc_end)) {
+ if (FormatTok->Tok.isObjCAtKeyword(tok::objc_end)) {
nextToken();
addUnwrappedLine();
break;
}
- parseStructuralElement();
+ if (FormatTok->is(tok::l_brace)) {
+ parseBlock(/*MustBeDeclaration=*/false);
+ // In ObjC interfaces, nothing should be following the "}".
+ addUnwrappedLine();
+ } else {
+ parseStructuralElement();
+ }
} while (!eof());
}
@@ -782,19 +1200,19 @@ void UnwrappedLineParser::parseObjCInterfaceOrImplementation() {
nextToken(); // interface name
// @interface can be followed by either a base class, or a category.
- if (FormatTok.Tok.is(tok::colon)) {
+ if (FormatTok->Tok.is(tok::colon)) {
nextToken();
nextToken(); // base class name
- } else if (FormatTok.Tok.is(tok::l_paren))
+ } else if (FormatTok->Tok.is(tok::l_paren))
// Skip category, if present.
parseParens();
- if (FormatTok.Tok.is(tok::less))
+ if (FormatTok->Tok.is(tok::less))
parseObjCProtocolList();
// If instance variables are present, keep the '{' on the first line too.
- if (FormatTok.Tok.is(tok::l_brace))
- parseBlock(/*MustBeDeclaration=*/ true);
+ if (FormatTok->Tok.is(tok::l_brace))
+ parseBlock(/*MustBeDeclaration=*/true);
// With instance variables, this puts '}' on its own line. Without instance
// variables, this ends the @interface line.
@@ -807,11 +1225,11 @@ void UnwrappedLineParser::parseObjCProtocol() {
nextToken();
nextToken(); // protocol name
- if (FormatTok.Tok.is(tok::less))
+ if (FormatTok->Tok.is(tok::less))
parseObjCProtocolList();
// Check for protocol declaration.
- if (FormatTok.Tok.is(tok::semi)) {
+ if (FormatTok->Tok.is(tok::semi)) {
nextToken();
return addUnwrappedLine();
}
@@ -820,24 +1238,40 @@ void UnwrappedLineParser::parseObjCProtocol() {
parseObjCUntilAtEnd();
}
+LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line,
+ StringRef Prefix = "") {
+ llvm::dbgs() << Prefix << "Line(" << Line.Level << ")"
+ << (Line.InPPDirective ? " MACRO" : "") << ": ";
+ for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
+ E = Line.Tokens.end();
+ I != E; ++I) {
+ llvm::dbgs() << I->Tok->Tok.getName() << "[" << I->Tok->Type << "] ";
+ }
+ for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
+ E = Line.Tokens.end();
+ I != E; ++I) {
+ const UnwrappedLineNode &Node = *I;
+ for (SmallVectorImpl<UnwrappedLine>::const_iterator
+ I = Node.Children.begin(),
+ E = Node.Children.end();
+ I != E; ++I) {
+ printDebugInfo(*I, "\nChild: ");
+ }
+ }
+ llvm::dbgs() << "\n";
+}
+
void UnwrappedLineParser::addUnwrappedLine() {
if (Line->Tokens.empty())
return;
DEBUG({
- llvm::dbgs() << "Line(" << Line->Level << ")"
- << (Line->InPPDirective ? " MACRO" : "") << ": ";
- for (std::list<FormatToken>::iterator I = Line->Tokens.begin(),
- E = Line->Tokens.end();
- I != E; ++I) {
- llvm::dbgs() << I->Tok.getName() << " ";
-
- }
- llvm::dbgs() << "\n";
+ if (CurrentLines == &Lines)
+ printDebugInfo(*Line);
});
CurrentLines->push_back(*Line);
Line->Tokens.clear();
if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) {
- for (std::vector<UnwrappedLine>::iterator
+ for (SmallVectorImpl<UnwrappedLine>::iterator
I = PreprocessorDirectives.begin(),
E = PreprocessorDirectives.end();
I != E; ++I) {
@@ -847,15 +1281,15 @@ void UnwrappedLineParser::addUnwrappedLine() {
}
}
-bool UnwrappedLineParser::eof() const { return FormatTok.Tok.is(tok::eof); }
+bool UnwrappedLineParser::eof() const { return FormatTok->Tok.is(tok::eof); }
void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) {
bool JustComments = Line->Tokens.empty();
- for (SmallVectorImpl<FormatToken>::const_iterator
+ for (SmallVectorImpl<FormatToken *>::const_iterator
I = CommentsBeforeNextToken.begin(),
E = CommentsBeforeNextToken.end();
I != E; ++I) {
- if (I->NewlinesBefore && JustComments) {
+ if ((*I)->NewlinesBefore && JustComments) {
addUnwrappedLine();
}
pushToken(*I);
@@ -869,7 +1303,7 @@ void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) {
void UnwrappedLineParser::nextToken() {
if (eof())
return;
- flushComments(FormatTok.NewlinesBefore > 0);
+ flushComments(FormatTok->NewlinesBefore > 0);
pushToken(FormatTok);
readToken();
}
@@ -878,8 +1312,8 @@ void UnwrappedLineParser::readToken() {
bool CommentsInCurrentLine = true;
do {
FormatTok = Tokens->getNextToken();
- while (!Line->InPPDirective && FormatTok.Tok.is(tok::hash) &&
- (FormatTok.HasUnescapedNewline || FormatTok.IsFirst)) {
+ while (!Line->InPPDirective && FormatTok->Tok.is(tok::hash) &&
+ (FormatTok->HasUnescapedNewline || FormatTok->IsFirst)) {
// If there is an unfinished unwrapped line, we flush the preprocessor
// directives only after that unwrapped line was finished later.
bool SwitchToPreprocessorLines =
@@ -888,12 +1322,18 @@ void UnwrappedLineParser::readToken() {
// Comments stored before the preprocessor directive need to be output
// before the preprocessor directive, at the same level as the
// preprocessor directive, as we consider them to apply to the directive.
- flushComments(FormatTok.NewlinesBefore > 0);
+ flushComments(FormatTok->NewlinesBefore > 0);
parsePPDirective();
}
- if (!FormatTok.Tok.is(tok::comment))
+
+ if (!PPStack.empty() && (PPStack.back() == PP_Unreachable) &&
+ !Line->InPPDirective) {
+ continue;
+ }
+
+ if (!FormatTok->Tok.is(tok::comment))
return;
- if (FormatTok.NewlinesBefore > 0 || FormatTok.IsFirst) {
+ if (FormatTok->NewlinesBefore > 0 || FormatTok->IsFirst) {
CommentsInCurrentLine = false;
}
if (CommentsInCurrentLine) {
@@ -904,10 +1344,10 @@ void UnwrappedLineParser::readToken() {
} while (!eof());
}
-void UnwrappedLineParser::pushToken(const FormatToken &Tok) {
- Line->Tokens.push_back(Tok);
+void UnwrappedLineParser::pushToken(FormatToken *Tok) {
+ Line->Tokens.push_back(UnwrappedLineNode(Tok));
if (MustBreakBeforeNextToken) {
- Line->Tokens.back().MustBreakBefore = true;
+ Line->Tokens.back().Tok->MustBreakBefore = true;
MustBreakBeforeNextToken = false;
}
}
diff --git a/lib/Format/UnwrappedLineParser.h b/lib/Format/UnwrappedLineParser.h
index 0c618e24d44e..f1f4e57a20b3 100644
--- a/lib/Format/UnwrappedLineParser.h
+++ b/lib/Format/UnwrappedLineParser.h
@@ -17,78 +17,14 @@
#define LLVM_CLANG_FORMAT_UNWRAPPED_LINE_PARSER_H
#include "clang/Basic/IdentifierTable.h"
-#include "clang/Basic/SourceManager.h"
#include "clang/Format/Format.h"
-#include "clang/Lex/Lexer.h"
+#include "FormatToken.h"
#include <list>
namespace clang {
-
-class DiagnosticsEngine;
-
namespace format {
-/// \brief A wrapper around a \c Token storing information about the
-/// whitespace characters preceeding it.
-struct FormatToken {
- FormatToken()
- : NewlinesBefore(0), HasUnescapedNewline(false), WhiteSpaceLength(0),
- LastNewlineOffset(0), TokenLength(0), IsFirst(false),
- MustBreakBefore(false), TrailingWhiteSpaceLength(0) {}
-
- /// \brief The \c Token.
- Token Tok;
-
- /// \brief The number of newlines immediately before the \c Token.
- ///
- /// This can be used to determine what the user wrote in the original code
- /// and thereby e.g. leave an empty line between two function definitions.
- unsigned NewlinesBefore;
-
- /// \brief Whether there is at least one unescaped newline before the \c
- /// Token.
- bool HasUnescapedNewline;
-
- /// \brief The location of the start of the whitespace immediately preceeding
- /// the \c Token.
- ///
- /// Used together with \c WhiteSpaceLength to create a \c Replacement.
- SourceLocation WhiteSpaceStart;
-
- /// \brief The length in characters of the whitespace immediately preceeding
- /// the \c Token.
- unsigned WhiteSpaceLength;
-
- /// \brief The offset just past the last '\n' in this token's leading
- /// whitespace (relative to \c WhiteSpaceStart). 0 if there is no '\n'.
- unsigned LastNewlineOffset;
-
- /// \brief The length of the non-whitespace parts of the token. This is
- /// necessary because we need to handle escaped newlines that are stored
- /// with the token.
- unsigned TokenLength;
-
- /// \brief Indicates that this is the first token.
- bool IsFirst;
-
- /// \brief Whether there must be a line break before this token.
- ///
- /// This happens for example when a preprocessor directive ended directly
- /// before the token.
- bool MustBreakBefore;
-
- /// \brief Number of characters of trailing whitespace.
- unsigned TrailingWhiteSpaceLength;
-
- /// \brief Returns actual token start location without leading escaped
- /// newlines and whitespace.
- ///
- /// This can be different to Tok.getLocation(), which includes leading escaped
- /// newlines.
- SourceLocation getStartOfNonWhitespace() const {
- return WhiteSpaceStart.getLocWithOffset(WhiteSpaceLength);
- }
-};
+struct UnwrappedLineNode;
/// \brief An unwrapped line is a sequence of \c Token, that we would like to
/// put on a single line if there was no column limit.
@@ -97,12 +33,11 @@ struct FormatToken {
/// \c UnwrappedLineFormatter. The key property is that changing the formatting
/// within an unwrapped line does not affect any other unwrapped lines.
struct UnwrappedLine {
- UnwrappedLine() : Level(0), InPPDirective(false), MustBeDeclaration(false) {
- }
+ UnwrappedLine();
// FIXME: Don't use std::list here.
/// \brief The \c Tokens comprising this \c UnwrappedLine.
- std::list<FormatToken> Tokens;
+ std::list<UnwrappedLineNode> Tokens;
/// \brief The indent level of the \c UnwrappedLine.
unsigned Level;
@@ -115,36 +50,38 @@ struct UnwrappedLine {
class UnwrappedLineConsumer {
public:
- virtual ~UnwrappedLineConsumer() {
- }
+ virtual ~UnwrappedLineConsumer() {}
virtual void consumeUnwrappedLine(const UnwrappedLine &Line) = 0;
+ virtual void finishRun() = 0;
};
-class FormatTokenSource {
-public:
- virtual ~FormatTokenSource() {
- }
- virtual FormatToken getNextToken() = 0;
-};
+class FormatTokenSource;
class UnwrappedLineParser {
public:
- UnwrappedLineParser(clang::DiagnosticsEngine &Diag, const FormatStyle &Style,
- FormatTokenSource &Tokens,
+ UnwrappedLineParser(const FormatStyle &Style, ArrayRef<FormatToken *> Tokens,
UnwrappedLineConsumer &Callback);
/// Returns true in case of a structural error.
bool parse();
private:
+ void reset();
void parseFile();
void parseLevel(bool HasOpeningBrace);
- void parseBlock(bool MustBeDeclaration, unsigned AddLevels = 1);
+ void parseBlock(bool MustBeDeclaration, bool AddLevel = true,
+ bool MunchSemi = true);
+ void parseChildBlock();
void parsePPDirective();
void parsePPDefine();
+ void parsePPIf(bool IfDef);
+ void parsePPElIf();
+ void parsePPElse();
+ void parsePPEndIf();
void parsePPUnknown();
void parseStructuralElement();
- void parseBracedList();
+ bool tryToParseBracedList();
+ bool parseBracedList(bool ContinueOnSemicolons = false);
void parseReturn();
void parseParens();
void parseIfThenElse();
@@ -161,12 +98,16 @@ private:
void parseObjCUntilAtEnd();
void parseObjCInterfaceOrImplementation();
void parseObjCProtocol();
+ void tryToParseLambda();
+ bool tryToParseLambdaIntroducer();
void addUnwrappedLine();
bool eof() const;
void nextToken();
void readToken();
void flushComments(bool NewlineBeforeNext);
- void pushToken(const FormatToken &Tok);
+ void pushToken(FormatToken *Tok);
+ void calculateBraceTypes();
+ void pushPPConditional();
// FIXME: We are constantly running into bugs where Line.Level is incorrectly
// subtracted from beyond 0. Introduce a method to subtract from Line.Level
@@ -177,23 +118,23 @@ private:
// line as the previous token, or not. If not, they belong to the next token.
// Since the next token might already be in a new unwrapped line, we need to
// store the comments belonging to that token.
- SmallVector<FormatToken, 1> CommentsBeforeNextToken;
- FormatToken FormatTok;
+ SmallVector<FormatToken *, 1> CommentsBeforeNextToken;
+ FormatToken *FormatTok;
bool MustBreakBeforeNextToken;
// The parsed lines. Only added to through \c CurrentLines.
- std::vector<UnwrappedLine> Lines;
+ SmallVector<UnwrappedLine, 8> Lines;
// Preprocessor directives are parsed out-of-order from other unwrapped lines.
// Thus, we need to keep a list of preprocessor directives to be reported
// after an unwarpped line that has been started was finished.
- std::vector<UnwrappedLine> PreprocessorDirectives;
+ SmallVector<UnwrappedLine, 4> PreprocessorDirectives;
// New unwrapped lines are added via CurrentLines.
// Usually points to \c &Lines. While parsing a preprocessor directive when
// there is an unfinished previous unwrapped line, will point to
// \c &PreprocessorDirectives.
- std::vector<UnwrappedLine> *CurrentLines;
+ SmallVectorImpl<UnwrappedLine> *CurrentLines;
// We store for each line whether it must be a declaration depending on
// whether we are in a compound statement or not.
@@ -203,14 +144,60 @@ private:
// indentation levels.
bool StructuralError;
- clang::DiagnosticsEngine &Diag;
const FormatStyle &Style;
FormatTokenSource *Tokens;
UnwrappedLineConsumer &Callback;
+ // FIXME: This is a temporary measure until we have reworked the ownership
+ // of the format tokens. The goal is to have the actual tokens created and
+ // owned outside of and handed into the UnwrappedLineParser.
+ ArrayRef<FormatToken *> AllTokens;
+
+ // Represents preprocessor branch type, so we can find matching
+ // #if/#else/#endif directives.
+ enum PPBranchKind {
+ PP_Conditional, // Any #if, #ifdef, #ifndef, #elif, block outside #if 0
+ PP_Unreachable // #if 0 or a conditional preprocessor block inside #if 0
+ };
+
+ // Keeps a stack of currently active preprocessor branching directives.
+ SmallVector<PPBranchKind, 16> PPStack;
+
+ // The \c UnwrappedLineParser re-parses the code for each combination
+ // of preprocessor branches that can be taken.
+ // To that end, we take the same branch (#if, #else, or one of the #elif
+ // branches) for each nesting level of preprocessor branches.
+ // \c PPBranchLevel stores the current nesting level of preprocessor
+ // branches during one pass over the code.
+ int PPBranchLevel;
+
+ // Contains the current branch (#if, #else or one of the #elif branches)
+ // for each nesting level.
+ SmallVector<int, 8> PPLevelBranchIndex;
+
+ // Contains the maximum number of branches at each nesting level.
+ SmallVector<int, 8> PPLevelBranchCount;
+
+ // Contains the number of branches per nesting level we are currently
+ // in while parsing a preprocessor branch sequence.
+ // This is used to update PPLevelBranchCount at the end of a branch
+ // sequence.
+ std::stack<int> PPChainBranchIndex;
+
friend class ScopedLineState;
};
+struct UnwrappedLineNode {
+ UnwrappedLineNode() : Tok(NULL) {}
+ UnwrappedLineNode(FormatToken *Tok) : Tok(Tok) {}
+
+ FormatToken *Tok;
+ SmallVector<UnwrappedLine, 0> Children;
+};
+
+inline UnwrappedLine::UnwrappedLine()
+ : Level(0), InPPDirective(false), MustBeDeclaration(false) {}
+
} // end namespace format
} // end namespace clang
diff --git a/lib/Format/WhitespaceManager.cpp b/lib/Format/WhitespaceManager.cpp
index a75c592bfeda..26a8d41e8741 100644
--- a/lib/Format/WhitespaceManager.cpp
+++ b/lib/Format/WhitespaceManager.cpp
@@ -18,193 +18,302 @@
namespace clang {
namespace format {
-void WhitespaceManager::replaceWhitespace(const AnnotatedToken &Tok,
- unsigned NewLines, unsigned Spaces,
- unsigned WhitespaceStartColumn) {
- if (NewLines > 0)
- alignEscapedNewlines();
-
- // 2+ newlines mean an empty line separating logic scopes.
- if (NewLines >= 2)
- alignComments();
-
- // Align line comments if they are trailing or if they continue other
- // trailing comments.
- if (Tok.isTrailingComment()) {
- SourceLocation TokenEndLoc = Tok.FormatTok.getStartOfNonWhitespace()
- .getLocWithOffset(Tok.FormatTok.TokenLength);
- // Remove the comment's trailing whitespace.
- if (Tok.FormatTok.TrailingWhiteSpaceLength != 0)
- Replaces.insert(tooling::Replacement(
- SourceMgr, TokenEndLoc, Tok.FormatTok.TrailingWhiteSpaceLength, ""));
-
- bool LineExceedsColumnLimit =
- Spaces + WhitespaceStartColumn + Tok.FormatTok.TokenLength >
- Style.ColumnLimit;
- // Align comment with other comments.
- if ((Tok.Parent != NULL || !Comments.empty()) &&
- !LineExceedsColumnLimit) {
- unsigned MinColumn =
- NewLines > 0 ? Spaces : WhitespaceStartColumn + Spaces;
- unsigned MaxColumn = Style.ColumnLimit - Tok.FormatTok.TokenLength;
- Comments.push_back(StoredToken(
- Tok.FormatTok.WhiteSpaceStart, Tok.FormatTok.WhiteSpaceLength,
- MinColumn, MaxColumn, NewLines, Spaces));
- return;
- }
- }
-
- // If this line does not have a trailing comment, align the stored comments.
- if (Tok.Children.empty() && !Tok.isTrailingComment())
- alignComments();
-
- storeReplacement(Tok.FormatTok.WhiteSpaceStart,
- Tok.FormatTok.WhiteSpaceLength,
- getNewLineText(NewLines, Spaces));
-}
-
-void WhitespaceManager::replacePPWhitespace(const AnnotatedToken &Tok,
- unsigned NewLines, unsigned Spaces,
- unsigned WhitespaceStartColumn) {
- if (NewLines == 0) {
- replaceWhitespace(Tok, NewLines, Spaces, WhitespaceStartColumn);
- } else {
- // The earliest position for "\" is 2 after the last token.
- unsigned MinColumn = WhitespaceStartColumn + 2;
- unsigned MaxColumn = Style.ColumnLimit;
- EscapedNewlines.push_back(StoredToken(
- Tok.FormatTok.WhiteSpaceStart, Tok.FormatTok.WhiteSpaceLength,
- MinColumn, MaxColumn, NewLines, Spaces));
- }
+bool
+WhitespaceManager::Change::IsBeforeInFile::operator()(const Change &C1,
+ const Change &C2) const {
+ return SourceMgr.isBeforeInTranslationUnit(
+ C1.OriginalWhitespaceRange.getBegin(),
+ C2.OriginalWhitespaceRange.getBegin());
}
-void WhitespaceManager::breakToken(const FormatToken &Tok, unsigned Offset,
- unsigned ReplaceChars, StringRef Prefix,
- StringRef Postfix, bool InPPDirective,
- unsigned Spaces,
- unsigned WhitespaceStartColumn) {
- SourceLocation Location =
- Tok.getStartOfNonWhitespace().getLocWithOffset(Offset);
- if (InPPDirective) {
- // The earliest position for "\" is 2 after the last token.
- unsigned MinColumn = WhitespaceStartColumn + 2;
- unsigned MaxColumn = Style.ColumnLimit;
- StoredToken StoredTok = StoredToken(Location, ReplaceChars, MinColumn,
- MaxColumn, /*NewLines=*/ 1, Spaces);
- StoredTok.Prefix = Prefix;
- StoredTok.Postfix = Postfix;
- EscapedNewlines.push_back(StoredTok);
- } else {
- std::string ReplacementText =
- (Prefix + getNewLineText(1, Spaces) + Postfix).str();
- Replaces.insert(tooling::Replacement(SourceMgr, Location, ReplaceChars,
- ReplacementText));
- }
+WhitespaceManager::Change::Change(
+ bool CreateReplacement, const SourceRange &OriginalWhitespaceRange,
+ unsigned IndentLevel, unsigned Spaces, unsigned StartOfTokenColumn,
+ unsigned NewlinesBefore, StringRef PreviousLinePostfix,
+ StringRef CurrentLinePrefix, tok::TokenKind Kind, bool ContinuesPPDirective)
+ : CreateReplacement(CreateReplacement),
+ OriginalWhitespaceRange(OriginalWhitespaceRange),
+ StartOfTokenColumn(StartOfTokenColumn), NewlinesBefore(NewlinesBefore),
+ PreviousLinePostfix(PreviousLinePostfix),
+ CurrentLinePrefix(CurrentLinePrefix), Kind(Kind),
+ ContinuesPPDirective(ContinuesPPDirective), IndentLevel(IndentLevel),
+ Spaces(Spaces) {}
+
+void WhitespaceManager::reset() {
+ Changes.clear();
+ Replaces.clear();
}
-const tooling::Replacements &WhitespaceManager::generateReplacements() {
- alignComments();
- alignEscapedNewlines();
- return Replaces;
+void WhitespaceManager::replaceWhitespace(FormatToken &Tok, unsigned Newlines,
+ unsigned IndentLevel, unsigned Spaces,
+ unsigned StartOfTokenColumn,
+ bool InPPDirective) {
+ if (Tok.Finalized)
+ return;
+ Tok.Decision = (Newlines > 0) ? FD_Break : FD_Continue;
+ Changes.push_back(Change(true, Tok.WhitespaceRange, IndentLevel, Spaces,
+ StartOfTokenColumn, Newlines, "", "",
+ Tok.Tok.getKind(), InPPDirective && !Tok.IsFirst));
}
-void WhitespaceManager::addReplacement(const SourceLocation &SourceLoc,
- unsigned ReplaceChars, StringRef Text) {
- Replaces.insert(
- tooling::Replacement(SourceMgr, SourceLoc, ReplaceChars, Text));
+void WhitespaceManager::addUntouchableToken(const FormatToken &Tok,
+ bool InPPDirective) {
+ if (Tok.Finalized)
+ return;
+ Changes.push_back(Change(false, Tok.WhitespaceRange, /*IndentLevel=*/0,
+ /*Spaces=*/0, Tok.OriginalColumn, Tok.NewlinesBefore,
+ "", "", Tok.Tok.getKind(),
+ InPPDirective && !Tok.IsFirst));
}
-void WhitespaceManager::addUntouchableComment(unsigned Column) {
- StoredToken Tok = StoredToken(SourceLocation(), 0, Column, Column, 0, 0);
- Tok.Untouchable = true;
- Comments.push_back(Tok);
+void WhitespaceManager::replaceWhitespaceInToken(
+ const FormatToken &Tok, unsigned Offset, unsigned ReplaceChars,
+ StringRef PreviousPostfix, StringRef CurrentPrefix, bool InPPDirective,
+ unsigned Newlines, unsigned IndentLevel, unsigned Spaces) {
+ if (Tok.Finalized)
+ return;
+ Changes.push_back(Change(
+ true, SourceRange(Tok.getStartOfNonWhitespace().getLocWithOffset(Offset),
+ Tok.getStartOfNonWhitespace().getLocWithOffset(
+ Offset + ReplaceChars)),
+ IndentLevel, Spaces, Spaces, Newlines, PreviousPostfix, CurrentPrefix,
+ // If we don't add a newline this change doesn't start a comment. Thus,
+ // when we align line comments, we don't need to treat this change as one.
+ // FIXME: We still need to take this change in account to properly
+ // calculate the new length of the comment and to calculate the changes
+ // for which to do the alignment when aligning comments.
+ Tok.Type == TT_LineComment && Newlines > 0 ? tok::comment : tok::unknown,
+ InPPDirective && !Tok.IsFirst));
}
-std::string WhitespaceManager::getNewLineText(unsigned NewLines,
- unsigned Spaces) {
- return std::string(NewLines, '\n') + std::string(Spaces, ' ');
+const tooling::Replacements &WhitespaceManager::generateReplacements() {
+ if (Changes.empty())
+ return Replaces;
+
+ std::sort(Changes.begin(), Changes.end(), Change::IsBeforeInFile(SourceMgr));
+ calculateLineBreakInformation();
+ alignTrailingComments();
+ alignEscapedNewlines();
+ generateChanges();
+
+ return Replaces;
}
-std::string WhitespaceManager::getNewLineText(unsigned NewLines,
- unsigned Spaces,
- unsigned WhitespaceStartColumn,
- unsigned EscapedNewlineColumn) {
- std::string NewLineText;
- if (NewLines > 0) {
- unsigned Offset =
- std::min<int>(EscapedNewlineColumn - 1, WhitespaceStartColumn);
- for (unsigned i = 0; i < NewLines; ++i) {
- NewLineText += std::string(EscapedNewlineColumn - Offset - 1, ' ');
- NewLineText += "\\\n";
- Offset = 0;
- }
+void WhitespaceManager::calculateLineBreakInformation() {
+ Changes[0].PreviousEndOfTokenColumn = 0;
+ for (unsigned i = 1, e = Changes.size(); i != e; ++i) {
+ unsigned OriginalWhitespaceStart =
+ SourceMgr.getFileOffset(Changes[i].OriginalWhitespaceRange.getBegin());
+ unsigned PreviousOriginalWhitespaceEnd = SourceMgr.getFileOffset(
+ Changes[i - 1].OriginalWhitespaceRange.getEnd());
+ Changes[i - 1].TokenLength = OriginalWhitespaceStart -
+ PreviousOriginalWhitespaceEnd +
+ Changes[i].PreviousLinePostfix.size() +
+ Changes[i - 1].CurrentLinePrefix.size();
+
+ Changes[i].PreviousEndOfTokenColumn =
+ Changes[i - 1].StartOfTokenColumn + Changes[i - 1].TokenLength;
+
+ Changes[i - 1].IsTrailingComment =
+ (Changes[i].NewlinesBefore > 0 || Changes[i].Kind == tok::eof) &&
+ Changes[i - 1].Kind == tok::comment;
}
- return NewLineText + std::string(Spaces, ' ');
+ // FIXME: The last token is currently not always an eof token; in those
+ // cases, setting TokenLength of the last token to 0 is wrong.
+ Changes.back().TokenLength = 0;
+ Changes.back().IsTrailingComment = Changes.back().Kind == tok::comment;
}
-void WhitespaceManager::alignComments() {
+void WhitespaceManager::alignTrailingComments() {
unsigned MinColumn = 0;
unsigned MaxColumn = UINT_MAX;
- token_iterator Start = Comments.begin();
- for (token_iterator I = Start, E = Comments.end(); I != E; ++I) {
- if (I->MinColumn > MaxColumn || I->MaxColumn < MinColumn) {
- alignComments(Start, I, MinColumn);
- MinColumn = I->MinColumn;
- MaxColumn = I->MaxColumn;
- Start = I;
- } else {
- MinColumn = std::max(MinColumn, I->MinColumn);
- MaxColumn = std::min(MaxColumn, I->MaxColumn);
+ unsigned StartOfSequence = 0;
+ bool BreakBeforeNext = false;
+ unsigned Newlines = 0;
+ for (unsigned i = 0, e = Changes.size(); i != e; ++i) {
+ unsigned ChangeMinColumn = Changes[i].StartOfTokenColumn;
+ // FIXME: Correctly handle ChangeMaxColumn in PP directives.
+ unsigned ChangeMaxColumn = Style.ColumnLimit - Changes[i].TokenLength;
+ Newlines += Changes[i].NewlinesBefore;
+ if (Changes[i].IsTrailingComment) {
+ // If this comment follows an } in column 0, it probably documents the
+ // closing of a namespace and we don't want to align it.
+ bool FollowsRBraceInColumn0 = i > 0 && Changes[i].NewlinesBefore == 0 &&
+ Changes[i - 1].Kind == tok::r_brace &&
+ Changes[i - 1].StartOfTokenColumn == 0;
+ bool WasAlignedWithStartOfNextLine = false;
+ if (Changes[i].NewlinesBefore == 1) { // A comment on its own line.
+ for (unsigned j = i + 1; j != e; ++j) {
+ if (Changes[j].Kind != tok::comment) { // Skip over comments.
+ // The start of the next token was previously aligned with the
+ // start of this comment.
+ WasAlignedWithStartOfNextLine =
+ (SourceMgr.getSpellingColumnNumber(
+ Changes[i].OriginalWhitespaceRange.getEnd()) ==
+ SourceMgr.getSpellingColumnNumber(
+ Changes[j].OriginalWhitespaceRange.getEnd()));
+ break;
+ }
+ }
+ }
+ if (!Style.AlignTrailingComments || FollowsRBraceInColumn0) {
+ alignTrailingComments(StartOfSequence, i, MinColumn);
+ MinColumn = ChangeMinColumn;
+ MaxColumn = ChangeMinColumn;
+ StartOfSequence = i;
+ } else if (BreakBeforeNext || Newlines > 1 ||
+ (ChangeMinColumn > MaxColumn || ChangeMaxColumn < MinColumn) ||
+ // Break the comment sequence if the previous line did not end
+ // in a trailing comment.
+ (Changes[i].NewlinesBefore == 1 && i > 0 &&
+ !Changes[i - 1].IsTrailingComment) ||
+ WasAlignedWithStartOfNextLine) {
+ alignTrailingComments(StartOfSequence, i, MinColumn);
+ MinColumn = ChangeMinColumn;
+ MaxColumn = ChangeMaxColumn;
+ StartOfSequence = i;
+ } else {
+ MinColumn = std::max(MinColumn, ChangeMinColumn);
+ MaxColumn = std::min(MaxColumn, ChangeMaxColumn);
+ }
+ BreakBeforeNext =
+ (i == 0) || (Changes[i].NewlinesBefore > 1) ||
+ // Never start a sequence with a comment at the beginning of
+ // the line.
+ (Changes[i].NewlinesBefore == 1 && StartOfSequence == i);
+ Newlines = 0;
}
}
- alignComments(Start, Comments.end(), MinColumn);
- Comments.clear();
+ alignTrailingComments(StartOfSequence, Changes.size(), MinColumn);
}
-void WhitespaceManager::alignComments(token_iterator I, token_iterator E,
- unsigned Column) {
- while (I != E) {
- if (!I->Untouchable) {
- unsigned Spaces = I->Spaces + Column - I->MinColumn;
- storeReplacement(I->ReplacementLoc, I->ReplacementLength,
- getNewLineText(I->NewLines, Spaces));
+void WhitespaceManager::alignTrailingComments(unsigned Start, unsigned End,
+ unsigned Column) {
+ for (unsigned i = Start; i != End; ++i) {
+ if (Changes[i].IsTrailingComment) {
+ assert(Column >= Changes[i].StartOfTokenColumn);
+ Changes[i].Spaces += Column - Changes[i].StartOfTokenColumn;
+ Changes[i].StartOfTokenColumn = Column;
}
- ++I;
}
}
void WhitespaceManager::alignEscapedNewlines() {
- unsigned MinColumn;
- if (Style.AlignEscapedNewlinesLeft) {
- MinColumn = 0;
- for (token_iterator I = EscapedNewlines.begin(), E = EscapedNewlines.end();
- I != E; ++I) {
- if (I->MinColumn > MinColumn)
- MinColumn = I->MinColumn;
+ unsigned MaxEndOfLine =
+ Style.AlignEscapedNewlinesLeft ? 0 : Style.ColumnLimit;
+ unsigned StartOfMacro = 0;
+ for (unsigned i = 1, e = Changes.size(); i < e; ++i) {
+ Change &C = Changes[i];
+ if (C.NewlinesBefore > 0) {
+ if (C.ContinuesPPDirective) {
+ MaxEndOfLine = std::max(C.PreviousEndOfTokenColumn + 2, MaxEndOfLine);
+ } else {
+ alignEscapedNewlines(StartOfMacro + 1, i, MaxEndOfLine);
+ MaxEndOfLine = Style.AlignEscapedNewlinesLeft ? 0 : Style.ColumnLimit;
+ StartOfMacro = i;
+ }
}
- } else {
- MinColumn = Style.ColumnLimit;
}
+ alignEscapedNewlines(StartOfMacro + 1, Changes.size(), MaxEndOfLine);
+}
- for (token_iterator I = EscapedNewlines.begin(), E = EscapedNewlines.end();
- I != E; ++I) {
- // I->MinColumn - 2 is the end of the previous token (i.e. the
- // WhitespaceStartColumn).
- storeReplacement(
- I->ReplacementLoc, I->ReplacementLength,
- I->Prefix + getNewLineText(I->NewLines, I->Spaces, I->MinColumn - 2,
- MinColumn) + I->Postfix);
+void WhitespaceManager::alignEscapedNewlines(unsigned Start, unsigned End,
+ unsigned Column) {
+ for (unsigned i = Start; i < End; ++i) {
+ Change &C = Changes[i];
+ if (C.NewlinesBefore > 0) {
+ assert(C.ContinuesPPDirective);
+ if (C.PreviousEndOfTokenColumn + 1 > Column)
+ C.EscapedNewlineColumn = 0;
+ else
+ C.EscapedNewlineColumn = Column;
+ }
+ }
+}
+void WhitespaceManager::generateChanges() {
+ for (unsigned i = 0, e = Changes.size(); i != e; ++i) {
+ const Change &C = Changes[i];
+ if (C.CreateReplacement) {
+ std::string ReplacementText = C.PreviousLinePostfix;
+ if (C.ContinuesPPDirective)
+ appendNewlineText(ReplacementText, C.NewlinesBefore,
+ C.PreviousEndOfTokenColumn, C.EscapedNewlineColumn);
+ else
+ appendNewlineText(ReplacementText, C.NewlinesBefore);
+ appendIndentText(ReplacementText, C.IndentLevel, C.Spaces,
+ C.StartOfTokenColumn - C.Spaces);
+ ReplacementText.append(C.CurrentLinePrefix);
+ storeReplacement(C.OriginalWhitespaceRange, ReplacementText);
+ }
}
- EscapedNewlines.clear();
}
-void WhitespaceManager::storeReplacement(SourceLocation Loc, unsigned Length,
- const std::string Text) {
+void WhitespaceManager::storeReplacement(const SourceRange &Range,
+ StringRef Text) {
+ unsigned WhitespaceLength = SourceMgr.getFileOffset(Range.getEnd()) -
+ SourceMgr.getFileOffset(Range.getBegin());
// Don't create a replacement, if it does not change anything.
- if (StringRef(SourceMgr.getCharacterData(Loc), Length) == Text)
+ if (StringRef(SourceMgr.getCharacterData(Range.getBegin()),
+ WhitespaceLength) == Text)
return;
- Replaces.insert(tooling::Replacement(SourceMgr, Loc, Length, Text));
+ Replaces.insert(tooling::Replacement(
+ SourceMgr, CharSourceRange::getCharRange(Range), Text));
+}
+
+void WhitespaceManager::appendNewlineText(std::string &Text,
+ unsigned Newlines) {
+ for (unsigned i = 0; i < Newlines; ++i)
+ Text.append(UseCRLF ? "\r\n" : "\n");
+}
+
+void WhitespaceManager::appendNewlineText(std::string &Text, unsigned Newlines,
+ unsigned PreviousEndOfTokenColumn,
+ unsigned EscapedNewlineColumn) {
+ if (Newlines > 0) {
+ unsigned Offset =
+ std::min<int>(EscapedNewlineColumn - 1, PreviousEndOfTokenColumn);
+ for (unsigned i = 0; i < Newlines; ++i) {
+ Text.append(std::string(EscapedNewlineColumn - Offset - 1, ' '));
+ Text.append(UseCRLF ? "\\\r\n" : "\\\n");
+ Offset = 0;
+ }
+ }
+}
+
+void WhitespaceManager::appendIndentText(std::string &Text,
+ unsigned IndentLevel, unsigned Spaces,
+ unsigned WhitespaceStartColumn) {
+ switch (Style.UseTab) {
+ case FormatStyle::UT_Never:
+ Text.append(std::string(Spaces, ' '));
+ break;
+ case FormatStyle::UT_Always: {
+ unsigned FirstTabWidth =
+ Style.TabWidth - WhitespaceStartColumn % Style.TabWidth;
+ // Indent with tabs only when there's at least one full tab.
+ if (FirstTabWidth + Style.TabWidth <= Spaces) {
+ Spaces -= FirstTabWidth;
+ Text.append("\t");
+ }
+ Text.append(std::string(Spaces / Style.TabWidth, '\t'));
+ Text.append(std::string(Spaces % Style.TabWidth, ' '));
+ break;
+ }
+ case FormatStyle::UT_ForIndentation:
+ if (WhitespaceStartColumn == 0) {
+ unsigned Indentation = IndentLevel * Style.IndentWidth;
+ // This happens, e.g. when a line in a block comment is indented less than
+ // the first one.
+ if (Indentation > Spaces)
+ Indentation = Spaces;
+ unsigned Tabs = Indentation / Style.TabWidth;
+ Text.append(std::string(Tabs, '\t'));
+ Spaces -= Tabs * Style.TabWidth;
+ }
+ Text.append(std::string(Spaces, ' '));
+ break;
+ }
}
} // namespace format
diff --git a/lib/Format/WhitespaceManager.h b/lib/Format/WhitespaceManager.h
index 5f3dc55edacc..ae6202395f6b 100644
--- a/lib/Format/WhitespaceManager.h
+++ b/lib/Format/WhitespaceManager.h
@@ -28,89 +28,153 @@ namespace format {
///
/// This includes special handling for certain constructs, e.g. the alignment of
/// trailing line comments.
+///
+/// To guarantee correctness of alignment operations, the \c WhitespaceManager
+/// must be informed about every token in the source file; for each token, there
+/// must be exactly one call to either \c replaceWhitespace or
+/// \c addUntouchableToken.
+///
+/// There may be multiple calls to \c breakToken for a given token.
class WhitespaceManager {
public:
- WhitespaceManager(SourceManager &SourceMgr, const FormatStyle &Style)
- : SourceMgr(SourceMgr), Style(Style) {}
+ WhitespaceManager(SourceManager &SourceMgr, const FormatStyle &Style,
+ bool UseCRLF)
+ : SourceMgr(SourceMgr), Style(Style), UseCRLF(UseCRLF) {}
+
+ /// \brief Prepares the \c WhitespaceManager for another run.
+ void reset();
/// \brief Replaces the whitespace in front of \p Tok. Only call once for
/// each \c AnnotatedToken.
- void replaceWhitespace(const AnnotatedToken &Tok, unsigned NewLines,
- unsigned Spaces, unsigned WhitespaceStartColumn);
+ void replaceWhitespace(FormatToken &Tok, unsigned Newlines,
+ unsigned IndentLevel, unsigned Spaces,
+ unsigned StartOfTokenColumn,
+ bool InPPDirective = false);
- /// \brief Like \c replaceWhitespace, but additionally adds right-aligned
- /// backslashes to escape newlines inside a preprocessor directive.
+ /// \brief Adds information about an unchangable token's whitespace.
///
- /// This function and \c replaceWhitespace have the same behavior if
- /// \c Newlines == 0.
- void replacePPWhitespace(const AnnotatedToken &Tok, unsigned NewLines,
- unsigned Spaces, unsigned WhitespaceStartColumn);
+ /// Needs to be called for every token for which \c replaceWhitespace
+ /// was not called.
+ void addUntouchableToken(const FormatToken &Tok, bool InPPDirective);
- /// \brief Inserts a line break into the middle of a token.
+ /// \brief Inserts or replaces whitespace in the middle of a token.
///
- /// Will break at \p Offset inside \p Tok, putting \p Prefix before the line
- /// break and \p Postfix before the rest of the token starts in the next line.
+ /// Inserts \p PreviousPostfix, \p Newlines, \p Spaces and \p CurrentPrefix
+ /// (in this order) at \p Offset inside \p Tok, replacing \p ReplaceChars
+ /// characters.
///
- /// \p InPPDirective, \p Spaces, \p WhitespaceStartColumn and \p Style are
- /// used to generate the correct line break.
- void breakToken(const FormatToken &Tok, unsigned Offset,
- unsigned ReplaceChars, StringRef Prefix, StringRef Postfix,
- bool InPPDirective, unsigned Spaces,
- unsigned WhitespaceStartColumn);
+ /// When \p InPPDirective is true, escaped newlines are inserted. \p Spaces is
+ /// used to align backslashes correctly.
+ void replaceWhitespaceInToken(const FormatToken &Tok, unsigned Offset,
+ unsigned ReplaceChars,
+ StringRef PreviousPostfix,
+ StringRef CurrentPrefix, bool InPPDirective,
+ unsigned Newlines, unsigned IndentLevel,
+ unsigned Spaces);
/// \brief Returns all the \c Replacements created during formatting.
const tooling::Replacements &generateReplacements();
- void addReplacement(const SourceLocation &SourceLoc, unsigned ReplaceChars,
- StringRef Text);
+private:
+ /// \brief Represents a change before a token, a break inside a token,
+ /// or the layout of an unchanged token (or whitespace within).
+ struct Change {
+ /// \brief Functor to sort changes in original source order.
+ class IsBeforeInFile {
+ public:
+ IsBeforeInFile(const SourceManager &SourceMgr) : SourceMgr(SourceMgr) {}
+ bool operator()(const Change &C1, const Change &C2) const;
+
+ private:
+ const SourceManager &SourceMgr;
+ };
+
+ Change() {}
+
+ /// \brief Creates a \c Change.
+ ///
+ /// The generated \c Change will replace the characters at
+ /// \p OriginalWhitespaceRange with a concatenation of
+ /// \p PreviousLinePostfix, \p NewlinesBefore line breaks, \p Spaces spaces
+ /// and \p CurrentLinePrefix.
+ ///
+ /// \p StartOfTokenColumn and \p InPPDirective will be used to lay out
+ /// trailing comments and escaped newlines.
+ Change(bool CreateReplacement, const SourceRange &OriginalWhitespaceRange,
+ unsigned IndentLevel, unsigned Spaces, unsigned StartOfTokenColumn,
+ unsigned NewlinesBefore, StringRef PreviousLinePostfix,
+ StringRef CurrentLinePrefix, tok::TokenKind Kind,
+ bool ContinuesPPDirective);
+
+ bool CreateReplacement;
+ // Changes might be in the middle of a token, so we cannot just keep the
+ // FormatToken around to query its information.
+ SourceRange OriginalWhitespaceRange;
+ unsigned StartOfTokenColumn;
+ unsigned NewlinesBefore;
+ std::string PreviousLinePostfix;
+ std::string CurrentLinePrefix;
+ // The kind of the token whose whitespace this change replaces, or in which
+ // this change inserts whitespace.
+ // FIXME: Currently this is not set correctly for breaks inside comments, as
+ // the \c BreakableToken is still doing its own alignment.
+ tok::TokenKind Kind;
+ bool ContinuesPPDirective;
+
+ // The number of nested blocks the token is in. This is used to add tabs
+ // only for the indentation, and not for alignment, when
+ // UseTab = US_ForIndentation.
+ unsigned IndentLevel;
+
+ // The number of spaces in front of the token or broken part of the token.
+ // This will be adapted when aligning tokens.
+ unsigned Spaces;
+
+ // \c IsTrailingComment, \c TokenLength, \c PreviousEndOfTokenColumn and
+ // \c EscapedNewlineColumn will be calculated in
+ // \c calculateLineBreakInformation.
+ bool IsTrailingComment;
+ unsigned TokenLength;
+ unsigned PreviousEndOfTokenColumn;
+ unsigned EscapedNewlineColumn;
+ };
+
+ /// \brief Calculate \c IsTrailingComment, \c TokenLength for the last tokens
+ /// or token parts in a line and \c PreviousEndOfTokenColumn and
+ /// \c EscapedNewlineColumn for the first tokens or token parts in a line.
+ void calculateLineBreakInformation();
+
+ /// \brief Align trailing comments over all \c Changes.
+ void alignTrailingComments();
- void addUntouchableComment(unsigned Column);
+ /// \brief Align trailing comments from change \p Start to change \p End at
+ /// the specified \p Column.
+ void alignTrailingComments(unsigned Start, unsigned End, unsigned Column);
- /// \brief Try to align all stashed comments.
- void alignComments();
- /// \brief Try to align all stashed escaped newlines.
+ /// \brief Align escaped newlines over all \c Changes.
void alignEscapedNewlines();
-private:
- std::string getNewLineText(unsigned NewLines, unsigned Spaces);
-
- std::string getNewLineText(unsigned NewLines, unsigned Spaces,
- unsigned WhitespaceStartColumn,
- unsigned EscapedNewlineColumn);
-
- /// \brief Structure to store tokens for later layout and alignment.
- struct StoredToken {
- StoredToken(SourceLocation ReplacementLoc, unsigned ReplacementLength,
- unsigned MinColumn, unsigned MaxColumn, unsigned NewLines,
- unsigned Spaces)
- : ReplacementLoc(ReplacementLoc), ReplacementLength(ReplacementLength),
- MinColumn(MinColumn), MaxColumn(MaxColumn), NewLines(NewLines),
- Spaces(Spaces), Untouchable(false) {}
- SourceLocation ReplacementLoc;
- unsigned ReplacementLength;
- unsigned MinColumn;
- unsigned MaxColumn;
- unsigned NewLines;
- unsigned Spaces;
- bool Untouchable;
- std::string Prefix;
- std::string Postfix;
- };
- SmallVector<StoredToken, 16> Comments;
- SmallVector<StoredToken, 16> EscapedNewlines;
- typedef SmallVector<StoredToken, 16>::iterator token_iterator;
+ /// \brief Align escaped newlines from change \p Start to change \p End at
+ /// the specified \p Column.
+ void alignEscapedNewlines(unsigned Start, unsigned End, unsigned Column);
- /// \brief Put all the comments between \p I and \p E into \p Column.
- void alignComments(token_iterator I, token_iterator E, unsigned Column);
+ /// \brief Fill \c Replaces with the replacements for all effective changes.
+ void generateChanges();
- /// \brief Stores \p Text as the replacement for the whitespace in front of
- /// \p Tok.
- void storeReplacement(SourceLocation Loc, unsigned Length,
- const std::string Text);
+ /// \brief Stores \p Text as the replacement for the whitespace in \p Range.
+ void storeReplacement(const SourceRange &Range, StringRef Text);
+ void appendNewlineText(std::string &Text, unsigned Newlines);
+ void appendNewlineText(std::string &Text, unsigned Newlines,
+ unsigned PreviousEndOfTokenColumn,
+ unsigned EscapedNewlineColumn);
+ void appendIndentText(std::string &Text, unsigned IndentLevel,
+ unsigned Spaces, unsigned WhitespaceStartColumn);
+ SmallVector<Change, 16> Changes;
SourceManager &SourceMgr;
tooling::Replacements Replaces;
const FormatStyle &Style;
+ bool UseCRLF;
};
} // namespace format