diff options
2316 files changed, 146869 insertions, 60669 deletions
diff --git a/.arcconfig b/.arcconfig new file mode 100644 index 000000000000..7f45342a433a --- /dev/null +++ b/.arcconfig @@ -0,0 +1,4 @@ +{ + "project_id" : "clang", + "conduit_uri" : "http://llvm-reviews.chandlerc.com/" +} diff --git a/.gitignore b/.gitignore index 6be9976262a8..6c34e37f4cb6 100644 --- a/.gitignore +++ b/.gitignore @@ -30,3 +30,6 @@ cscope.out #==============================================================================# # Clang extra user tools, which is tracked independently (clang-tools-extra). tools/extra +# Sphinx build products +docs/_build +docs/analyzer/_build diff --git a/CMakeLists.txt b/CMakeLists.txt index 53d4165caec3..6efcd4a7bda8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -66,6 +66,11 @@ if( CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR ) set( CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib ) set( CLANG_BUILT_STANDALONE 1 ) + + find_package(LibXml2) + if (LIBXML2_FOUND) + set(CLANG_HAVE_LIBXML 1) + endif () endif() set(CLANG_RESOURCE_DIR "" CACHE STRING @@ -133,16 +138,17 @@ configure_file( # Add appropriate flags for GCC if (LLVM_COMPILER_IS_GCC_COMPATIBLE) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-common -Woverloaded-virtual -Wcast-qual -fno-strict-aliasing -pedantic -Wno-long-long -Wall -W -Wno-unused-parameter -Wwrite-strings") + + check_cxx_compiler_flag("-Werror -Wnested-anon-types" CXX_SUPPORTS_NO_NESTED_ANON_TYPES_FLAG) + if( CXX_SUPPORTS_NO_NESTED_ANON_TYPES_FLAG ) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-nested-anon-types" ) + endif() endif () if (APPLE) set(CMAKE_MODULE_LINKER_FLAGS "-Wl,-flat_namespace -Wl,-undefined -Wl,suppress") endif () -# libxml2 is an optional dependency, required only to run validation -# tests on XML output. -find_package(LibXml2) - configure_file( ${CLANG_SOURCE_DIR}/include/clang/Config/config.h.cmake ${CLANG_BINARY_DIR}/include/clang/Config/config.h) @@ -253,6 +259,9 @@ install(DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/include/ add_definitions( -D_GNU_SOURCE ) +# FIXME: They should be options. +add_definitions(-DCLANG_ENABLE_ARCMT -DCLANG_ENABLE_REWRITER -DCLANG_ENABLE_STATIC_ANALYZER) + # Clang version information set(CLANG_EXECUTABLE_VERSION "${CLANG_VERSION_MAJOR}.${CLANG_VERSION_MINOR}" CACHE STRING @@ -272,13 +281,15 @@ add_subdirectory(runtime) option(CLANG_BUILD_EXAMPLES "Build CLANG example programs by default." OFF) add_subdirectory(examples) +option(CLANG_INCLUDE_TESTS + "Generate build targets for the Clang unit tests." + ${LLVM_INCLUDE_TESTS}) + # TODO: docs. add_subdirectory(test) -if( LLVM_INCLUDE_TESTS ) - if( NOT CLANG_BUILT_STANDALONE ) - add_subdirectory(unittests) - endif() +if( CLANG_INCLUDE_TESTS ) + add_subdirectory(unittests) endif() # Workaround for MSVS10 to avoid the Dialog Hell diff --git a/CODE_OWNERS.TXT b/CODE_OWNERS.TXT new file mode 100644 index 000000000000..13c0a9bde665 --- /dev/null +++ b/CODE_OWNERS.TXT @@ -0,0 +1,40 @@ +This file is a list of the people responsible for ensuring that patches for a +particular part of Clang are reviewed, either by themself or by someone else. +They are also the gatekeepers for their part of Clang, with the final word on +what goes in or not. + +The list is sorted by surname and formatted to allow easy grepping and +beautification by scripts. The fields are: name (N), email (E), web-address +(W), PGP key ID and fingerprint (P), description (D), and snail-mail address +(S). + +N: Chandler Carruth +E: chandlerc@gmail.com +E: chandlerc@google.com +D: CMake, library layering + +N: Eric Christopher +E: echristo@gmail.com +D: Debug Information, autotools/configure/make build, inline assembly + +N: Doug Gregor +D: All parts of Clang not covered by someone else + +N: Anton Korobeynikov +E: anton@korobeynikov.info +D: Exception handling, Windows codegen, ARM EABI + +N: Ted Kremenek +D: Clang Static Analyzer + +N: John McCall +E: rjmccall@apple.com +D: Clang LLVM IR generation + +N: Chad Rosier +E: mcrosier@apple.com +D: MS-inline asm, and the compiler driver + +N: Richard Smith +E: richard@metafoo.co.uk +D: Clang Semantic Analysis (tools/clang/lib/Sema/* tools/clang/include/clang/Sema/*) diff --git a/INSTALL.txt b/INSTALL.txt index e8e320962bb4..bd2f4fe37096 100644 --- a/INSTALL.txt +++ b/INSTALL.txt @@ -44,6 +44,6 @@ From inside the Clang build directory, run 'make install' to install the Clang compiler and header files into the prefix directory selected when LLVM was configured. -The Clang compiler is available as 'clang' and supports a gcc like command line +The Clang compiler is available as 'clang' and 'clang++'. It supports a gcc like command line interface. See the man page for clang (installed into $prefix/share/man/man1) for more information. diff --git a/LICENSE.TXT b/LICENSE.TXT index 6c224f84c5bb..e31223a486aa 100644 --- a/LICENSE.TXT +++ b/LICENSE.TXT @@ -4,7 +4,7 @@ LLVM Release License University of Illinois/NCSA Open Source License -Copyright (c) 2007-2012 University of Illinois at Urbana-Champaign. +Copyright (c) 2007-2013 University of Illinois at Urbana-Champaign. All rights reserved. Developed by: diff --git a/NOTES.txt b/NOTES.txt index 1c89d685729b..107ec5ad48c5 100644 --- a/NOTES.txt +++ b/NOTES.txt @@ -2,9 +2,6 @@ // Random Notes //===---------------------------------------------------------------------===// -C90/C99/C++ Comparisons: -http://david.tribble.com/text/cdiffs.htm - //===---------------------------------------------------------------------===// To time GCC preprocessing speed without output, use: diff --git a/bindings/python/clang/cindex.py b/bindings/python/clang/cindex.py index 5e162c0e8349..70f4f36a2cfd 100644 --- a/bindings/python/clang/cindex.py +++ b/bindings/python/clang/cindex.py @@ -1271,6 +1271,17 @@ class Cursor(Structure): # created. return self._tu + @property + def referenced(self): + """ + For a cursor that is a reference, returns a cursor + representing the entity that it references. + """ + if not hasattr(self, '_referenced'): + self._referenced = conf.lib.clang_getCursorReferenced(self) + + return self._referenced + def get_arguments(self): """Return an iterator for accessing the arguments of this cursor.""" num_args = conf.lib.clang_Cursor_getNumArguments(self) @@ -1634,6 +1645,33 @@ class _CXUnsavedFile(Structure): """Helper for passing unsaved file arguments.""" _fields_ = [("name", c_char_p), ("contents", c_char_p), ('length', c_ulong)] +# Functions calls through the python interface are rather slow. Fortunately, +# for most symboles, we do not need to perform a function call. Their spelling +# never changes and is consequently provided by this spelling cache. +SpellingCache = { + # 0: CompletionChunk.Kind("Optional"), + # 1: CompletionChunk.Kind("TypedText"), + # 2: CompletionChunk.Kind("Text"), + # 3: CompletionChunk.Kind("Placeholder"), + # 4: CompletionChunk.Kind("Informative"), + # 5 : CompletionChunk.Kind("CurrentParameter"), + 6: '(', # CompletionChunk.Kind("LeftParen"), + 7: ')', # CompletionChunk.Kind("RightParen"), + 8: ']', # CompletionChunk.Kind("LeftBracket"), + 9: ']', # CompletionChunk.Kind("RightBracket"), + 10: '{', # CompletionChunk.Kind("LeftBrace"), + 11: '}', # CompletionChunk.Kind("RightBrace"), + 12: '<', # CompletionChunk.Kind("LeftAngle"), + 13: '>', # CompletionChunk.Kind("RightAngle"), + 14: ', ', # CompletionChunk.Kind("Comma"), + # 15: CompletionChunk.Kind("ResultType"), + 16: ':', # CompletionChunk.Kind("Colon"), + 17: ';', # CompletionChunk.Kind("SemiColon"), + 18: '=', # CompletionChunk.Kind("Equal"), + 19: ' ', # CompletionChunk.Kind("HorizontalSpace"), + # 20: CompletionChunk.Kind("VerticalSpace") +} + class CompletionChunk: class Kind: def __init__(self, name): @@ -1648,18 +1686,30 @@ class CompletionChunk: def __init__(self, completionString, key): self.cs = completionString self.key = key + self.__kindNumberCache = -1 def __repr__(self): return "{'" + self.spelling + "', " + str(self.kind) + "}" @CachedProperty def spelling(self): + if self.__kindNumber in SpellingCache: + return SpellingCache[self.__kindNumber] return conf.lib.clang_getCompletionChunkText(self.cs, self.key).spelling + # We do not use @CachedProperty here, as the manual implementation is + # apparently still significantly faster. Please profile carefully if you + # would like to add CachedProperty back. + @property + def __kindNumber(self): + if self.__kindNumberCache == -1: + self.__kindNumberCache = \ + conf.lib.clang_getCompletionChunkKind(self.cs, self.key) + return self.__kindNumberCache + @CachedProperty def kind(self): - res = conf.lib.clang_getCompletionChunkKind(self.cs, self.key) - return completionChunkKindMap[res] + return completionChunkKindMap[self.__kindNumber] @CachedProperty def string(self): @@ -1672,19 +1722,19 @@ class CompletionChunk: None def isKindOptional(self): - return self.kind == completionChunkKindMap[0] + return self.__kindNumber == 0 def isKindTypedText(self): - return self.kind == completionChunkKindMap[1] + return self.__kindNumber == 1 def isKindPlaceHolder(self): - return self.kind == completionChunkKindMap[3] + return self.__kindNumber == 3 def isKindInformative(self): - return self.kind == completionChunkKindMap[4] + return self.__kindNumber == 4 def isKindResultType(self): - return self.kind == completionChunkKindMap[15] + return self.__kindNumber == 15 completionChunkKindMap = { 0: CompletionChunk.Kind("Optional"), @@ -1965,7 +2015,7 @@ class TranslationUnit(ClangObject): len(args), unsaved_array, len(unsaved_files), options) - if ptr is None: + if not ptr: raise TranslationUnitLoadError("Error parsing translation unit.") return cls(ptr, index=index) @@ -1987,7 +2037,7 @@ class TranslationUnit(ClangObject): index = Index.create() ptr = conf.lib.clang_createTranslationUnit(index, filename) - if ptr is None: + if not ptr: raise TranslationUnitLoadError(filename) return cls(ptr=ptr, index=index) @@ -3046,13 +3096,13 @@ class Config: Config.library_path = path @staticmethod - def set_library_file(file): - """Set the exact location of libclang from""" + def set_library_file(filename): + """Set the exact location of libclang""" if Config.loaded: raise Exception("library file must be set before before using " \ "any other functionalities in libclang.") - Config.library_file = path + Config.library_file = filename @staticmethod def set_compatibility_check(check_status): diff --git a/bindings/python/tests/cindex/test_cursor.py b/bindings/python/tests/cindex/test_cursor.py index edb209b52b96..a27525cfe553 100644 --- a/bindings/python/tests/cindex/test_cursor.py +++ b/bindings/python/tests/cindex/test_cursor.py @@ -250,3 +250,12 @@ def test_get_arguments(): assert len(arguments) == 2 assert arguments[0].spelling == "i" assert arguments[1].spelling == "j" + +def test_referenced(): + tu = get_tu('void foo(); void bar() { foo(); }') + foo = get_cursor(tu, 'foo') + bar = get_cursor(tu, 'bar') + for c in bar.get_children(): + if c.kind == CursorKind.CALL_EXPR: + assert c.referenced.spelling == foo.spelling + break diff --git a/bindings/python/tests/cindex/test_translation_unit.py b/bindings/python/tests/cindex/test_translation_unit.py index c91f126097ac..f77998e52457 100644 --- a/bindings/python/tests/cindex/test_translation_unit.py +++ b/bindings/python/tests/cindex/test_translation_unit.py @@ -8,6 +8,7 @@ from clang.cindex import Index from clang.cindex import SourceLocation from clang.cindex import SourceRange from clang.cindex import TranslationUnitSaveError +from clang.cindex import TranslationUnitLoadError from clang.cindex import TranslationUnit from .util import get_cursor from .util import get_tu @@ -239,3 +240,19 @@ def test_get_tokens_gc(): del tokens gc.collect() gc.collect() # Just in case. + +def test_fail_from_source(): + path = os.path.join(kInputsDir, 'non-existent.cpp') + try: + tu = TranslationUnit.from_source(path) + except TranslationUnitLoadError: + tu = None + assert tu == None + +def test_fail_from_ast_file(): + path = os.path.join(kInputsDir, 'non-existent.ast') + try: + tu = TranslationUnit.from_ast_file(path) + except TranslationUnitLoadError: + tu = None + assert tu == None diff --git a/bindings/xml/comment-xml-schema.rng b/bindings/xml/comment-xml-schema.rng index d98f405cf9e7..22371dfed1e4 100644 --- a/bindings/xml/comment-xml-schema.rng +++ b/bindings/xml/comment-xml-schema.rng @@ -25,6 +25,9 @@ <ref name="USR" /> </optional> <optional> + <ref name="Headerfile" /> + </optional> + <optional> <ref name="Declaration" /> </optional> <optional> @@ -74,6 +77,9 @@ </optional> <!-- TODO: Add exception specification. --> <optional> + <ref name="Headerfile" /> + </optional> + <optional> <ref name="Declaration" /> </optional> <optional> @@ -121,6 +127,9 @@ <ref name="USR" /> </optional> <optional> + <ref name="Headerfile" /> + </optional> + <optional> <ref name="Declaration" /> </optional> <optional> @@ -153,6 +162,9 @@ <ref name="USR" /> </optional> <optional> + <ref name="Headerfile" /> + </optional> + <optional> <ref name="Declaration" /> </optional> <optional> @@ -186,6 +198,9 @@ <ref name="USR" /> </optional> <optional> + <ref name="Headerfile" /> + </optional> + <optional> <ref name="Declaration" /> </optional> <optional> @@ -219,6 +234,9 @@ <ref name="USR" /> </optional> <optional> + <ref name="Headerfile" /> + </optional> + <optional> <ref name="Declaration" /> </optional> <optional> @@ -252,6 +270,9 @@ <ref name="USR" /> </optional> <optional> + <ref name="Headerfile" /> + </optional> + <optional> <ref name="Declaration" /> </optional> <optional> @@ -329,6 +350,14 @@ </element> </define> + <define name="Headerfile"> + <element name="Headerfile"> + <oneOrMore> + <ref name="TextBlockContent" /> + </oneOrMore> + </element> + </define> + <define name="Discussion"> <element name="Discussion"> <zeroOrMore> @@ -409,7 +438,7 @@ <define name="Availability"> <element name="Availability"> <attribute name="distribution"> - <data type="string" /> + <data type="string" /> </attribute> <optional> <element name="IntroducedInVersion"> @@ -470,6 +499,30 @@ <define name="TextBlockContent"> <choice> <element name="Para"> + <optional> + <attribute name="kind"> + <choice> + <value>attention</value> + <value>author</value> + <value>authors</value> + <value>bug</value> + <value>copyright</value> + <value>date</value> + <value>invariant</value> + <value>note</value> + <value>post</value> + <value>pre</value> + <value>remark</value> + <value>remarks</value> + <value>sa</value> + <value>see</value> + <value>since</value> + <value>todo</value> + <value>version</value> + <value>warning</value> + </choice> + </attribute> + </optional> <zeroOrMore> <ref name="TextInlineContent" /> </zeroOrMore> diff --git a/docs/AddressSanitizer.html b/docs/AddressSanitizer.html deleted file mode 100644 index 397eafc2d51b..000000000000 --- a/docs/AddressSanitizer.html +++ /dev/null @@ -1,171 +0,0 @@ -<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" - "http://www.w3.org/TR/html4/strict.dtd"> -<!-- Material used from: HTML 4.01 specs: http://www.w3.org/TR/html401/ --> -<html> -<head> - <META http-equiv="Content-Type" content="text/html; charset=ISO-8859-1"> - <title>AddressSanitizer, a fast memory error detector</title> - <link type="text/css" rel="stylesheet" href="../menu.css"> - <link type="text/css" rel="stylesheet" href="../content.css"> - <style type="text/css"> - td { - vertical-align: top; - } - </style> -</head> -<body> - -<!--#include virtual="../menu.html.incl"--> - -<div id="content"> - -<h1>AddressSanitizer</h1> -<ul> - <li> <a href="#intro">Introduction</a> - <li> <a href="#howtobuild">How to Build</a> - <li> <a href="#usage">Usage</a> - <ul><li> <a href="#has_feature">__has_feature(address_sanitizer)</a></ul> - <ul><li> <a href="#no_address_safety_analysis"> - __attribute__((no_address_safety_analysis))</a></ul> - <li> <a href="#platforms">Supported Platforms</a> - <li> <a href="#limitations">Limitations</a> - <li> <a href="#status">Current Status</a> - <li> <a href="#moreinfo">More Information</a> -</ul> - -<h2 id="intro">Introduction</h2> -AddressSanitizer is a fast memory error detector. -It consists of a compiler instrumentation module and a run-time library. -The tool can detect the following types of bugs: -<ul> <li> Out-of-bounds accesses to heap, stack and globals - <li> Use-after-free - <li> Use-after-return (to some extent) - <li> Double-free, invalid free -</ul> -Typical slowdown introduced by AddressSanitizer is <b>2x</b>. - -<h2 id="howtobuild">How to build</h2> -Follow the <a href="../get_started.html">clang build instructions</a>. -CMake build is supported.<BR> - -<h2 id="usage">Usage</h2> -Simply compile and link your program with <tt>-fsanitize=address</tt> flag. <BR> -The AddressSanitizer run-time library should be linked to the final executable, -so make sure to use <tt>clang</tt> (not <tt>ld</tt>) for the final link step.<BR> -When linking shared libraries, the AddressSanitizer run-time is not linked, -so <tt>-Wl,-z,defs</tt> may cause link errors (don't use it with AddressSanitizer). <BR> - -To get a reasonable performance add <tt>-O1</tt> or higher. <BR> -To get nicer stack traces in error messages add -<tt>-fno-omit-frame-pointer</tt>. <BR> -To get perfect stack traces you may need to disable inlining (just use <tt>-O1</tt>) and tail call -elimination (<tt>-fno-optimize-sibling-calls</tt>). - -<pre> -% cat example_UseAfterFree.cc -int main(int argc, char **argv) { - int *array = new int[100]; - delete [] array; - return array[argc]; // BOOM -} -</pre> - -<pre> -# Compile and link -% clang -O1 -g -fsanitize=address -fno-omit-frame-pointer example_UseAfterFree.cc -</pre> -OR -<pre> -# Compile -% clang -O1 -g -fsanitize=address -fno-omit-frame-pointer -c example_UseAfterFree.cc -# Link -% clang -g -fsanitize=address example_UseAfterFree.o -</pre> - -If a bug is detected, the program will print an error message to stderr and exit with a -non-zero exit code. -Currently, AddressSanitizer does not symbolize its output, so you may need to use a -separate script to symbolize the result offline (this will be fixed in future). -<pre> -% ./a.out 2> log -% projects/compiler-rt/lib/asan/scripts/asan_symbolize.py / < log | c++filt -==9442== ERROR: AddressSanitizer heap-use-after-free on address 0x7f7ddab8c084 at pc 0x403c8c bp 0x7fff87fb82d0 sp 0x7fff87fb82c8 -READ of size 4 at 0x7f7ddab8c084 thread T0 - #0 0x403c8c in main example_UseAfterFree.cc:4 - #1 0x7f7ddabcac4d in __libc_start_main ??:0 -0x7f7ddab8c084 is located 4 bytes inside of 400-byte region [0x7f7ddab8c080,0x7f7ddab8c210) -freed by thread T0 here: - #0 0x404704 in operator delete[](void*) ??:0 - #1 0x403c53 in main example_UseAfterFree.cc:4 - #2 0x7f7ddabcac4d in __libc_start_main ??:0 -previously allocated by thread T0 here: - #0 0x404544 in operator new[](unsigned long) ??:0 - #1 0x403c43 in main example_UseAfterFree.cc:2 - #2 0x7f7ddabcac4d in __libc_start_main ??:0 -==9442== ABORTING -</pre> - -AddressSanitizer exits on the first detected error. This is by design. -One reason: it makes the generated code smaller and faster (both by ~5%). -Another reason: this makes fixing bugs unavoidable. With Valgrind, it is often -the case that users treat Valgrind warnings as false positives -(which they are not) and don't fix them. - - -<h3 id="has_feature">__has_feature(address_sanitizer)</h3> -In some cases one may need to execute different code depending on whether -AddressSanitizer is enabled. -<a href="LanguageExtensions.html#__has_feature_extension">__has_feature</a> -can be used for this purpose. -<pre> -#if defined(__has_feature) -# if __has_feature(address_sanitizer) - code that builds only under AddressSanitizer -# endif -#endif -</pre> - -<h3 id="no_address_safety_analysis">__attribute__((no_address_safety_analysis))</h3> -Some code should not be instrumented by AddressSanitizer. -One may use the function attribute -<a href="LanguageExtensions.html#address_sanitizer"> - <tt>no_address_safety_analysis</tt></a> -to disable instrumentation of a particular function. -This attribute may not be supported by other compilers, so we suggest to -use it together with <tt>__has_feature(address_sanitizer)</tt>. -Note: currently, this attribute will be lost if the function is inlined. - -<h2 id="platforms">Supported Platforms</h2> -AddressSanitizer is supported on -<ul><li>Linux i386/x86_64 (tested on Ubuntu 10.04 and 12.04). -<li>MacOS 10.6, 10.7 and 10.8 (i386/x86_64). -</ul> -Support for Linux ARM (and Android ARM) is in progress -(it may work, but is not guaranteed too). - - -<h2 id="limitations">Limitations</h2> -<ul> -<li> AddressSanitizer uses more real memory than a native run. -Exact overhead depends on the allocations sizes. The smaller the -allocations you make the bigger the overhead is. -<li> AddressSanitizer uses more stack memory. We have seen up to 3x increase. -<li> On 64-bit platforms AddressSanitizer maps (but not reserves) -16+ Terabytes of virtual address space. -This means that tools like <tt>ulimit</tt> may not work as usually expected. -<li> Static linking is not supported. -</ul> - - -<h2 id="status">Current Status</h2> -AddressSanitizer is fully functional on supported platforms starting from LLVM 3.1. -The test suite is integrated into CMake build and can be run with -<tt>make check-asan</tt> command. - -<h2 id="moreinfo">More Information</h2> -<a href="http://code.google.com/p/address-sanitizer/">http://code.google.com/p/address-sanitizer</a>. - - -</div> -</body> -</html> diff --git a/docs/AddressSanitizer.rst b/docs/AddressSanitizer.rst new file mode 100644 index 000000000000..89e864450009 --- /dev/null +++ b/docs/AddressSanitizer.rst @@ -0,0 +1,163 @@ +================ +AddressSanitizer +================ + +.. contents:: + :local: + +Introduction +============ + +AddressSanitizer is a fast memory error detector. It consists of a compiler +instrumentation module and a run-time library. The tool can detect the +following types of bugs: + +* Out-of-bounds accesses to heap, stack and globals +* Use-after-free +* Use-after-return (to some extent) +* Double-free, invalid free + +Typical slowdown introduced by AddressSanitizer is **2x**. + +How to build +============ + +Follow the `clang build instructions <../get_started.html>`_. CMake build is +supported. + +Usage +===== + +Simply compile and link your program with ``-fsanitize=address`` flag. The +AddressSanitizer run-time library should be linked to the final executable, so +make sure to use ``clang`` (not ``ld``) for the final link step. When linking +shared libraries, the AddressSanitizer run-time is not linked, so +``-Wl,-z,defs`` may cause link errors (don't use it with AddressSanitizer). To +get a reasonable performance add ``-O1`` or higher. To get nicer stack traces +in error messages add ``-fno-omit-frame-pointer``. To get perfect stack traces +you may need to disable inlining (just use ``-O1``) and tail call elimination +(``-fno-optimize-sibling-calls``). + +.. code-block:: console + + % cat example_UseAfterFree.cc + int main(int argc, char **argv) { + int *array = new int[100]; + delete [] array; + return array[argc]; // BOOM + } + + # Compile and link + % clang -O1 -g -fsanitize=address -fno-omit-frame-pointer example_UseAfterFree.cc + +or: + +.. code-block:: console + + # Compile + % clang -O1 -g -fsanitize=address -fno-omit-frame-pointer -c example_UseAfterFree.cc + # Link + % clang -g -fsanitize=address example_UseAfterFree.o + +If a bug is detected, the program will print an error message to stderr and +exit with a non-zero exit code. Currently, AddressSanitizer does not symbolize +its output, so you may need to use a separate script to symbolize the result +offline (this will be fixed in future). + +.. code-block:: console + + % ./a.out 2> log + % projects/compiler-rt/lib/asan/scripts/asan_symbolize.py / < log | c++filt + ==9442== ERROR: AddressSanitizer heap-use-after-free on address 0x7f7ddab8c084 at pc 0x403c8c bp 0x7fff87fb82d0 sp 0x7fff87fb82c8 + READ of size 4 at 0x7f7ddab8c084 thread T0 + #0 0x403c8c in main example_UseAfterFree.cc:4 + #1 0x7f7ddabcac4d in __libc_start_main ??:0 + 0x7f7ddab8c084 is located 4 bytes inside of 400-byte region [0x7f7ddab8c080,0x7f7ddab8c210) + freed by thread T0 here: + #0 0x404704 in operator delete[](void*) ??:0 + #1 0x403c53 in main example_UseAfterFree.cc:4 + #2 0x7f7ddabcac4d in __libc_start_main ??:0 + previously allocated by thread T0 here: + #0 0x404544 in operator new[](unsigned long) ??:0 + #1 0x403c43 in main example_UseAfterFree.cc:2 + #2 0x7f7ddabcac4d in __libc_start_main ??:0 + ==9442== ABORTING + +AddressSanitizer exits on the first detected error. This is by design. +One reason: it makes the generated code smaller and faster (both by +~5%). Another reason: this makes fixing bugs unavoidable. With Valgrind, +it is often the case that users treat Valgrind warnings as false +positives (which they are not) and don't fix them. + +``__has_feature(address_sanitizer)`` +------------------------------------ + +In some cases one may need to execute different code depending on whether +AddressSanitizer is enabled. +:ref:`\_\_has\_feature <langext-__has_feature-__has_extension>` can be used for +this purpose. + +.. code-block:: c + + #if defined(__has_feature) + # if __has_feature(address_sanitizer) + // code that builds only under AddressSanitizer + # endif + #endif + +``__attribute__((no_sanitize_address))`` +----------------------------------------------- + +Some code should not be instrumented by AddressSanitizer. One may use the +function attribute +:ref:`no_sanitize_address <langext-address_sanitizer>` +(or a deprecated synonym `no_address_safety_analysis`) +to disable instrumentation of a particular function. This attribute may not be +supported by other compilers, so we suggest to use it together with +``__has_feature(address_sanitizer)``. Note: currently, this attribute will be +lost if the function is inlined. + +Initialization order checking +----------------------------- + +AddressSanitizer can optionally detect dynamic initialization order problems, +when initialization of globals defined in one translation unit uses +globals defined in another translation unit. To enable this check at runtime, +you should set environment variable +``ASAN_OPTIONS=check_initialization_order=1``. + +Supported Platforms +=================== + +AddressSanitizer is supported on + +* Linux i386/x86\_64 (tested on Ubuntu 10.04 and 12.04); +* MacOS 10.6, 10.7 and 10.8 (i386/x86\_64). + +Support for Linux ARM (and Android ARM) is in progress (it may work, but +is not guaranteed too). + +Limitations +=========== + +* AddressSanitizer uses more real memory than a native run. Exact overhead + depends on the allocations sizes. The smaller the allocations you make the + bigger the overhead is. +* AddressSanitizer uses more stack memory. We have seen up to 3x increase. +* On 64-bit platforms AddressSanitizer maps (but not reserves) 16+ Terabytes of + virtual address space. This means that tools like ``ulimit`` may not work as + usually expected. +* Static linking is not supported. + +Current Status +============== + +AddressSanitizer is fully functional on supported platforms starting from LLVM +3.1. The test suite is integrated into CMake build and can be run with ``make +check-asan`` command. + +More Information +================ + +`http://code.google.com/p/address-sanitizer <http://code.google.com/p/address-sanitizer/>`_ + diff --git a/docs/AnalyzerRegions.html b/docs/AnalyzerRegions.html deleted file mode 100644 index f9d333792045..000000000000 --- a/docs/AnalyzerRegions.html +++ /dev/null @@ -1,260 +0,0 @@ -<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" - "http://www.w3.org/TR/html4/strict.dtd"> -<html> -<head> -<title>Static Analyzer Design Document: Memory Regions</title> -</head> -<body> - -<h1>Static Analyzer Design Document: Memory Regions</h1> - -<h3>Authors</h3> - -<p>Ted Kremenek, <tt>kremenek at apple</tt><br> -Zhongxing Xu, <tt>xuzhongzhing at gmail</tt></p> - -<h2 id="intro">Introduction</h2> - -<p>The path-sensitive analysis engine in libAnalysis employs an extensible API -for abstractly modeling the memory of an analyzed program. This API employs the -concept of "memory regions" to abstractly model chunks of program memory such as -program variables and dynamically allocated memory such as those returned from -'malloc' and 'alloca'. Regions are hierarchical, with subregions modeling -subtyping relationships, field and array offsets into larger chunks of memory, -and so on.</p> - -<p>The region API consists of two components:</p> - -<ul> <li>A taxonomy and representation of regions themselves within the analyzer -engine. The primary definitions and interfaces are described in <tt><a -href="http://clang.llvm.org/doxygen/MemRegion_8h-source.html">MemRegion.h</a></tt>. -At the root of the region hierarchy is the class <tt>MemRegion</tt> with -specific subclasses refining the region concept for variables, heap allocated -memory, and so forth.</li> <li>The modeling of binding of values to regions. For -example, modeling the value stored to a local variable <tt>x</tt> consists of -recording the binding between the region for <tt>x</tt> (which represents the -raw memory associated with <tt>x</tt>) and the value stored to <tt>x</tt>. This -binding relationship is captured with the notion of "symbolic -stores."</li> </ul> - -<p>Symbolic stores, which can be thought of as representing the relation -<tt>regions -> values</tt>, are implemented by subclasses of the -<tt>StoreManager</tt> class (<tt><a -href="http://clang.llvm.org/doxygen/Store_8h-source.html">Store.h</a></tt>). A -particular StoreManager implementation has complete flexibility concerning the -following: - -<ul> -<li><em>How</em> to model the binding between regions and values</li> -<li><em>What</em> bindings are recorded -</ul> - -<p>Together, both points allow different StoreManagers to tradeoff between -different levels of analysis precision and scalability concerning the reasoning -of program memory. Meanwhile, the core path-sensitive engine makes no -assumptions about either points, and queries a StoreManager about the bindings -to a memory region through a generic interface that all StoreManagers share. If -a particular StoreManager cannot reason about the potential bindings of a given -memory region (e.g., '<tt>BasicStoreManager</tt>' does not reason about fields -of structures) then the StoreManager can simply return 'unknown' (represented by -'<tt>UnknownVal</tt>') for a particular region-binding. This separation of -concerns not only isolates the core analysis engine from the details of -reasoning about program memory but also facilities the option of a client of the -path-sensitive engine to easily swap in different StoreManager implementations -that internally reason about program memory in very different ways.</p> - -<p>The rest of this document is divided into two parts. We first discuss region -taxonomy and the semantics of regions. We then discuss the StoreManager -interface, and details of how the currently available StoreManager classes -implement region bindings.</p> - -<h2 id="regions">Memory Regions and Region Taxonomy</h2> - -<h3>Pointers</h3> - -<p>Before talking about the memory regions, we would talk about the pointers -since memory regions are essentially used to represent pointer values.</p> - -<p>The pointer is a type of values. Pointer values have two semantic aspects. -One is its physical value, which is an address or location. The other is the -type of the memory object residing in the address.</p> - -<p>Memory regions are designed to abstract these two properties of the pointer. -The physical value of a pointer is represented by MemRegion pointers. The rvalue -type of the region corresponds to the type of the pointee object.</p> - -<p>One complication is that we could have different view regions on the same -memory chunk. They represent the same memory location, but have different -abstract location, i.e., MemRegion pointers. Thus we need to canonicalize the -abstract locations to get a unique abstract location for one physical -location.</p> - -<p>Furthermore, these different view regions may or may not represent memory -objects of different types. Some different types are semantically the same, -for example, 'struct s' and 'my_type' are the same type.</p> - -<pre> -struct s; -typedef struct s my_type; -</pre> - -<p>But <tt>char</tt> and <tt>int</tt> are not the same type in the code below:</p> - -<pre> -void *p; -int *q = (int*) p; -char *r = (char*) p; -</pre> - -<p>Thus we need to canonicalize the MemRegion which is used in binding and -retrieving.</p> - -<h3>Regions</h3> -<p>Region is the entity used to model pointer values. A Region has the following -properties:</p> - -<ul> -<li>Kind</li> - -<li>ObjectType: the type of the object residing on the region.</li> - -<li>LocationType: the type of the pointer value that the region corresponds to. - Usually this is the pointer to the ObjectType. But sometimes we want to cache - this type explicitly, for example, for a CodeTextRegion.</li> - -<li>StartLocation</li> - -<li>EndLocation</li> -</ul> - -<h3>Symbolic Regions</h3> - -<p>A symbolic region is a map of the concept of symbolic values into the domain -of regions. It is the way that we represent symbolic pointers. Whenever a -symbolic pointer value is needed, a symbolic region is created to represent -it.</p> - -<p>A symbolic region has no type. It wraps a SymbolData. But sometimes we have -type information associated with a symbolic region. For this case, a -TypedViewRegion is created to layer the type information on top of the symbolic -region. The reason we do not carry type information with the symbolic region is -that the symbolic regions can have no type. To be consistent, we don't let them -to carry type information.</p> - -<p>Like a symbolic pointer, a symbolic region may be NULL, has unknown extent, -and represents a generic chunk of memory.</p> - -<p><em><b>NOTE</b>: We plan not to use loc::SymbolVal in RegionStore and remove it - gradually.</em></p> - -<p>Symbolic regions get their rvalue types through the following ways:</p> - -<ul> -<li>Through the parameter or global variable that points to it, e.g.: -<pre> -void f(struct s* p) { - ... -} -</pre> - -<p>The symbolic region pointed to by <tt>p</tt> has type <tt>struct -s</tt>.</p></li> - -<li>Through explicit or implicit casts, e.g.: -<pre> -void f(void* p) { - struct s* q = (struct s*) p; - ... -} -</pre> -</li> -</ul> - -<p>We attach the type information to the symbolic region lazily. For the first -case above, we create the <tt>TypedViewRegion</tt> only when the pointer is -actually used to access the pointee memory object, that is when the element or -field region is created. For the cast case, the <tt>TypedViewRegion</tt> is -created when visiting the <tt>CastExpr</tt>.</p> - |