diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2015-03-24 21:31:36 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2015-03-24 21:31:36 +0000 |
commit | fb911942f1434f3d1750f83f25f5e42c80e60638 (patch) | |
tree | 1678c4a4f0182e4029a86d135aa4a1b7d09e3c41 | |
download | src-fb911942f1434f3d1750f83f25f5e42c80e60638.tar.gz src-fb911942f1434f3d1750f83f25f5e42c80e60638.zip |
Vendor import of lld trunk r233088:vendor/lld/lld-trunk-r233088
Notes
Notes:
svn path=/vendor/lld/dist/; revision=280461
svn path=/vendor/lld/lld-trunk-r233088/; revision=280462; tag=vendor/lld/lld-trunk-r233088
1131 files changed, 106976 insertions, 0 deletions
diff --git a/.arcconfig b/.arcconfig new file mode 100644 index 000000000000..787b339a9f20 --- /dev/null +++ b/.arcconfig @@ -0,0 +1,4 @@ +{ + "project_id" : "lld", + "conduit_uri" : "http://reviews.llvm.org/" +} diff --git a/.clang-format b/.clang-format new file mode 100644 index 000000000000..9b3aa8b7213b --- /dev/null +++ b/.clang-format @@ -0,0 +1 @@ +BasedOnStyle: LLVM diff --git a/.gitignore b/.gitignore new file mode 100644 index 000000000000..0a288ee8ce96 --- /dev/null +++ b/.gitignore @@ -0,0 +1,24 @@ +#==============================================================================# +# This file specifies intentionally untracked files that git should ignore. +# See: http://www.kernel.org/pub/software/scm/git/docs/gitignore.html +#==============================================================================# + +#==============================================================================# +# File extensions to be ignored anywhere in the tree. +#==============================================================================# +# Temp files created by most text editors. +*~ +# Merge files created by git. +*.orig +# Byte compiled python modules. +*.pyc +# vim swap files +.*.swp +# Mac OS X Finder layout info +.DS_Store + +#==============================================================================# +# Directories to be ignored. +#==============================================================================# +# Sphinx build files. +docs/_build diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 000000000000..30ef47a692d2 --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,98 @@ +set(LLD_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}) +set(LLD_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}) + +# Compute the LLD version from the LLVM version. +string(REGEX MATCH "[0-9]+\\.[0-9]+(\\.[0-9]+)?" LLD_VERSION + ${PACKAGE_VERSION}) +message(STATUS "LLD version: ${LLD_VERSION}") + +string(REGEX REPLACE "([0-9]+)\\.[0-9]+(\\.[0-9]+)?" "\\1" LLD_VERSION_MAJOR + ${LLD_VERSION}) +string(REGEX REPLACE "[0-9]+\\.([0-9]+)(\\.[0-9]+)?" "\\1" LLD_VERSION_MINOR + ${LLD_VERSION}) + +# Determine LLD revision and repository. +# TODO: Figure out a way to get the revision and the repository on windows. +if ( NOT CMAKE_SYSTEM_NAME MATCHES "Windows" ) + execute_process(COMMAND ${CMAKE_SOURCE_DIR}/utils/GetSourceVersion ${LLD_SOURCE_DIR} + OUTPUT_VARIABLE LLD_REVISION) + + execute_process(COMMAND ${CMAKE_SOURCE_DIR}/utils/GetRepositoryPath ${LLD_SOURCE_DIR} + OUTPUT_VARIABLE LLD_REPOSITORY) + if ( LLD_REPOSITORY ) + # Replace newline characters with spaces + string(REGEX REPLACE "(\r?\n)+" " " LLD_REPOSITORY ${LLD_REPOSITORY}) + # Remove leading spaces + STRING(REGEX REPLACE "^[ \t\r\n]+" "" LLD_REPOSITORY "${LLD_REPOSITORY}" ) + # Remove trailing spaces + string(REGEX REPLACE "(\ )+$" "" LLD_REPOSITORY ${LLD_REPOSITORY}) + endif() + + if ( LLD_REVISION ) + # Replace newline characters with spaces + string(REGEX REPLACE "(\r?\n)+" " " LLD_REVISION ${LLD_REVISION}) + # Remove leading spaces + STRING(REGEX REPLACE "^[ \t\r\n]+" "" LLD_REVISION "${LLD_REVISION}" ) + # Remove trailing spaces + string(REGEX REPLACE "(\ )+$" "" LLD_REVISION ${LLD_REVISION}) + endif() +endif () + +# Configure the Version.inc file. +configure_file( + ${CMAKE_CURRENT_SOURCE_DIR}/include/lld/Config/Version.inc.in + ${CMAKE_CURRENT_BINARY_DIR}/include/lld/Config/Version.inc) + + +if (CMAKE_SOURCE_DIR STREQUAL CMAKE_BINARY_DIR) + message(FATAL_ERROR "In-source builds are not allowed. CMake would overwrite " +"the makefiles distributed with LLVM. Please create a directory and run cmake " +"from there, passing the path to this source directory as the last argument. " +"This process created the file `CMakeCache.txt' and the directory " +"`CMakeFiles'. Please delete them.") +endif() + +list (APPEND CMAKE_MODULE_PATH "${LLD_SOURCE_DIR}/cmake/modules") + +option(LLD_USE_VTUNE + "Enable VTune user task tracking." + OFF) +if (LLD_USE_VTUNE) + find_package(VTune) + if (VTUNE_FOUND) + include_directories(${VTune_INCLUDE_DIRS}) + list(APPEND LLVM_COMMON_LIBS ${VTune_LIBRARIES}) + add_definitions(-DLLD_HAS_VTUNE) + endif() +endif() + + +if (MSVC) + add_definitions(-wd4530) # Suppress 'warning C4530: C++ exception handler used, but unwind semantics are not enabled.' + add_definitions(-wd4062) # Suppress 'warning C4062: enumerator X in switch of enum Y is not handled' from system header. +endif() + +include_directories(BEFORE + ${CMAKE_CURRENT_BINARY_DIR}/include + ${CMAKE_CURRENT_SOURCE_DIR}/include + ) + +if (NOT LLVM_INSTALL_TOOLCHAIN_ONLY) + install(DIRECTORY include/ + DESTINATION include + FILES_MATCHING + PATTERN "*.h" + PATTERN ".svn" EXCLUDE + ) +endif() + +add_subdirectory(lib) +add_subdirectory(tools) + +add_subdirectory(test) + +if (LLVM_INCLUDE_TESTS) + add_subdirectory(unittests) +endif() + +add_subdirectory(docs) diff --git a/LICENSE.TXT b/LICENSE.TXT new file mode 100644 index 000000000000..bcb83b211422 --- /dev/null +++ b/LICENSE.TXT @@ -0,0 +1,62 @@ +============================================================================== +lld License +============================================================================== +University of Illinois/NCSA +Open Source License + +Copyright (c) 2011-2015 by the contributors listed in CREDITS.TXT +All rights reserved. + +Developed by: + + LLVM Team + + University of Illinois at Urbana-Champaign + + http://llvm.org + +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal with +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +of the Software, and to permit persons to whom the Software is furnished to do +so, subject to the following conditions: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimers. + + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimers in the + documentation and/or other materials provided with the distribution. + + * Neither the names of the LLVM Team, University of Illinois at + Urbana-Champaign, nor the names of its contributors may be used to + endorse or promote products derived from this Software without specific + prior written permission. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE +SOFTWARE. + +============================================================================== +The lld software contains code written by third parties. Such software will +have its own individual LICENSE.TXT file in the directory in which it appears. +This file will describe the copyrights, license, and restrictions which apply +to that code. + +The disclaimer of warranty in the University of Illinois Open Source License +applies to all code in the lld Distribution, and nothing in any of the +other licenses gives permission to use the names of the LLVM Team or the +University of Illinois to endorse or promote products derived from this +Software. + +The following pieces of software have additional or alternate copyrights, +licenses, and/or restrictions: + +Program Directory +------- --------- +<none yet> diff --git a/Makefile b/Makefile new file mode 100644 index 000000000000..e1b6a678fc23 --- /dev/null +++ b/Makefile @@ -0,0 +1,86 @@ +##===- Makefile --------------------------------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## + +# If LLD_LEVEL is not set, then we are the top-level Makefile. Otherwise, we +# are being included from a subdirectory makefile. + +ifndef LLD_LEVEL + +IS_TOP_LEVEL := 1 +LLD_LEVEL := . +DIRS := include lib tools unittests + +PARALLEL_DIRS := + +endif + +ifeq ($(MAKECMDGOALS),libs-only) + DIRS := $(filter-out tools docs, $(DIRS)) + OPTIONAL_DIRS := +endif +ifeq ($(BUILD_LLD_ONLY),YES) + DIRS := $(filter-out docs unittests, $(DIRS)) + OPTIONAL_DIRS := +endif + +### +# Common Makefile code, shared by all lld Makefiles. + +# Set LLVM source root level. +LEVEL := $(LLD_LEVEL)/../.. + +# Include LLVM common makefile. +include $(LEVEL)/Makefile.common + +ifneq ($(ENABLE_DOCS),1) + DIRS := $(filter-out docs, $(DIRS)) +endif + +CPP.Flags += -I$(PROJ_SRC_DIR)/$(LLD_LEVEL)/include +CPP.Flags += -I$(PROJ_OBJ_DIR)/$(LLD_LEVEL)/include + +### +# lld Top Level specific stuff. + +ifeq ($(IS_TOP_LEVEL),1) + +ifneq ($(PROJ_SRC_ROOT),$(PROJ_OBJ_ROOT)) +$(RecursiveTargets):: + $(Verb) for dir in test unittests; do \ + if [ -f $(PROJ_SRC_DIR)/$${dir}/Makefile ] && [ ! -f $${dir}/Makefile ]; then \ + $(MKDIR) $${dir}; \ + $(CP) $(PROJ_SRC_DIR)/$${dir}/Makefile $${dir}/Makefile; \ + fi \ + done +endif + +test:: + @ $(MAKE) -C test + +report:: + @ $(MAKE) -C test report + +clean:: + @ $(MAKE) -C test clean + +libs-only: all + +tags:: + $(Verb) etags `find . -type f -name '*.h' -or -name '*.cpp' | \ + grep -v /lib/Headers | grep -v /test/` + +cscope.files: + find tools lib include -name '*.cpp' \ + -or -name '*.def' \ + -or -name '*.td' \ + -or -name '*.h' > cscope.files + +.PHONY: test report clean cscope.files + +endif diff --git a/README.md b/README.md new file mode 100644 index 000000000000..dc05cdea0a12 --- /dev/null +++ b/README.md @@ -0,0 +1,10 @@ + +LLVM Linker (lld) +============================== + +This directory and its subdirectories contain source code for the LLVM Linker, a +modular cross platform linker which is built as part of the LLVM compiler +infrastructure project. + +lld is open source software. You may freely distribute it under the terms of +the license agreement found in LICENSE.txt. diff --git a/cmake/modules/FindVTune.cmake b/cmake/modules/FindVTune.cmake new file mode 100644 index 000000000000..bd0cbe9a38cb --- /dev/null +++ b/cmake/modules/FindVTune.cmake @@ -0,0 +1,31 @@ +# - Find VTune ittnotify. +# Defines: +# VTune_FOUND +# VTune_INCLUDE_DIRS +# VTune_LIBRARIES + +set(dirs + "$ENV{VTUNE_AMPLIFIER_XE_2013_DIR}/" + "C:/Program Files (x86)/Intel/VTune Amplifier XE 2013/" + "$ENV{VTUNE_AMPLIFIER_XE_2011_DIR}/" + "C:/Program Files (x86)/Intel/VTune Amplifier XE 2011/" + ) + +find_path(VTune_INCLUDE_DIRS ittnotify.h + PATHS ${dirs} + PATH_SUFFIXES include) + +if (CMAKE_SIZEOF_VOID_P MATCHES "8") + set(vtune_lib_dir lib64) +else() + set(vtune_lib_dir lib32) +endif() + +find_library(VTune_LIBRARIES libittnotify + HINTS "${VTune_INCLUDE_DIRS}/.." + PATHS ${dirs} + PATH_SUFFIXES ${vtune_lib_dir}) + +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args( + VTune DEFAULT_MSG VTune_LIBRARIES VTune_INCLUDE_DIRS) diff --git a/docs/C++11.rst b/docs/C++11.rst new file mode 100644 index 000000000000..0c4391e7b037 --- /dev/null +++ b/docs/C++11.rst @@ -0,0 +1,9 @@ +C++11 +===== + +Originally, LLD was developed in C++11 unlike the rest of LLVM. Now, all of +LLVM, LLD, and Clang are developed using C++11. See the `LLVM Coding +Standards`_ for details on the precise subset of C++11 supported by the various +host compilers. + +.. _LLVM Coding Standards: http://llvm.org/docs/CodingStandards.html diff --git a/docs/CMakeLists.txt b/docs/CMakeLists.txt new file mode 100644 index 000000000000..d4f3b058efb7 --- /dev/null +++ b/docs/CMakeLists.txt @@ -0,0 +1,8 @@ +if (LLVM_ENABLE_SPHINX) + if (SPHINX_FOUND) + include(AddSphinxTarget) + if (${SPHINX_OUTPUT_HTML}) + add_sphinx_target(html lld) + endif() + endif() +endif() diff --git a/docs/Driver.rst b/docs/Driver.rst new file mode 100644 index 000000000000..5f2d946d36bd --- /dev/null +++ b/docs/Driver.rst @@ -0,0 +1,79 @@ +====== +Driver +====== + +.. contents:: + :local: + +Introduction +============ + +This document describes the lld driver. The purpose of this document is to +describe both the motivation and design goals for the driver, as well as details +of the internal implementation. + +Overview +======== + +The lld driver is designed to support a number of different command line +interfaces. The main interfaces we plan to support are binutils' ld, Apple's +ld, and Microsoft's link.exe. + +Flavors +------- + +Each of these different interfaces is referred to as a flavor. There is also an +extra flavor "core" which is used to exercise the core functionality of the +linker it the test suite. + +* gnu +* darwin +* link +* core + +Selecting a Flavor +^^^^^^^^^^^^^^^^^^ + +There are two different ways to tell lld which flavor to be. They are checked in +order, so the second overrides the first. The first is to symlink :program:`lld` +as :program:`lld-{flavor}` or just :program:`{flavor}`. You can also specify +it as the first command line argument using ``-flavor``:: + + $ lld -flavor gnu + +There is a shortcut for ``-flavor core`` as ``-core``. + + +Adding an Option to an existing Flavor +====================================== + +#. Add the option to the desired :file:`lib/Driver/{flavor}Options.td`. + +#. Add to :cpp:class:`lld::FlavorLinkingContext` a getter and setter method + for the option. + +#. Modify :cpp:func:`lld::FlavorDriver::parse` in :file: + `lib/Driver/{Flavor}Driver.cpp` to call the targetInfo setter + for corresponding to the option. + +#. Modify {Flavor}Reader and {Flavor}Writer to use the new targtInfo option. + + +Adding a Flavor +=============== + +#. Add an entry for the flavor in :file:`include/lld/Driver/Driver.h` to + :cpp:class:`lld::UniversalDriver::Flavor`. + +#. Add an entry in :file:`lib/Driver/UniversalDriver.cpp` to + :cpp:func:`lld::Driver::strToFlavor` and + :cpp:func:`lld::UniversalDriver::link`. + This allows the flavor to be selected via symlink and :option:`-flavor`. + +#. Add a tablegen file called :file:`lib/Driver/{flavor}Options.td` that + describes the options. If the options are a superset of another driver, that + driver's td file can simply be included. The :file:`{flavor}Options.td` file + must also be added to :file:`lib/Driver/CMakeLists.txt`. + +#. Add a ``{flavor}Driver`` as a subclass of :cpp:class:`lld::Driver` + in :file:`lib/Driver/{flavor}Driver.cpp`. diff --git a/docs/Makefile b/docs/Makefile new file mode 100644 index 000000000000..4c147eb11137 --- /dev/null +++ b/docs/Makefile @@ -0,0 +1,155 @@ +# Makefile for Sphinx documentation +# + +# You can set these variables from the command line. +SPHINXOPTS = +SPHINXBUILD = sphinx-build +PAPER = +BUILDDIR = _build + +# Internal variables. +PAPEROPT_a4 = -D latex_paper_size=a4 +PAPEROPT_letter = -D latex_paper_size=letter +ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . +# the i18n builder cannot share the environment and doctrees with the others +I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . + +.PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext + +all: html + +help: + @echo "Please use \`make <target>' where <target> is one of" + @echo " html to make standalone HTML files" + @echo " dirhtml to make HTML files named index.html in directories" + @echo " singlehtml to make a single large HTML file" + @echo " pickle to make pickle files" + @echo " json to make JSON files" + @echo " htmlhelp to make HTML files and a HTML help project" + @echo " qthelp to make HTML files and a qthelp project" + @echo " devhelp to make HTML files and a Devhelp project" + @echo " epub to make an epub" + @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" + @echo " latexpdf to make LaTeX files and run them through pdflatex" + @echo " text to make text files" + @echo " man to make manual pages" + @echo " texinfo to make Texinfo files" + @echo " info to make Texinfo files and run them through makeinfo" + @echo " gettext to make PO message catalogs" + @echo " changes to make an overview of all changed/added/deprecated items" + @echo " linkcheck to check all external links for integrity" + @echo " doctest to run all doctests embedded in the documentation (if enabled)" + +clean: + -rm -rf $(BUILDDIR)/* + +html: + $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html + @echo + @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." + +dirhtml: + $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml + @echo + @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." + +singlehtml: + $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml + @echo + @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." + +pickle: + $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle + @echo + @echo "Build finished; now you can process the pickle files." + +json: + $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json + @echo + @echo "Build finished; now you can process the JSON files." + +htmlhelp: + $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp + @echo + @echo "Build finished; now you can run HTML Help Workshop with the" \ + ".hhp project file in $(BUILDDIR)/htmlhelp." + +qthelp: + $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp + @echo + @echo "Build finished; now you can run "qcollectiongenerator" with the" \ + ".qhcp project file in $(BUILDDIR)/qthelp, like this:" + @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/lld.qhcp" + @echo "To view the help file:" + @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/lld.qhc" + +devhelp: + $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp + @echo + @echo "Build finished." + @echo "To view the help file:" + @echo "# mkdir -p $$HOME/.local/share/devhelp/lld" + @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/lld" + @echo "# devhelp" + +epub: + $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub + @echo + @echo "Build finished. The epub file is in $(BUILDDIR)/epub." + +latex: + $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex + @echo + @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." + @echo "Run \`make' in that directory to run these through (pdf)latex" \ + "(use \`make latexpdf' here to do that automatically)." + +latexpdf: + $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex + @echo "Running LaTeX files through pdflatex..." + $(MAKE) -C $(BUILDDIR)/latex all-pdf + @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." + +text: + $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text + @echo + @echo "Build finished. The text files are in $(BUILDDIR)/text." + +man: + $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man + @echo + @echo "Build finished. The manual pages are in $(BUILDDIR)/man." + +texinfo: + $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo + @echo + @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." + @echo "Run \`make' in that directory to run these through makeinfo" \ + "(use \`make info' here to do that automatically)." + +info: + $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo + @echo "Running Texinfo files through makeinfo..." + make -C $(BUILDDIR)/texinfo info + @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." + +gettext: + $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale + @echo + @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." + +changes: + $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes + @echo + @echo "The overview file is in $(BUILDDIR)/changes." + +linkcheck: + $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck + @echo + @echo "Link check complete; look for any errors in the above output " \ + "or in $(BUILDDIR)/linkcheck/output.txt." + +doctest: + $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest + @echo "Testing of doctests in the sources finished, look at the " \ + "results in $(BUILDDIR)/doctest/output.txt." diff --git a/docs/README.txt b/docs/README.txt new file mode 100644 index 000000000000..eb09a2d2b7ea --- /dev/null +++ b/docs/README.txt @@ -0,0 +1,12 @@ +lld Documentation +================= + +The lld documentation is written using the Sphinx documentation generator. It is +currently tested with Sphinx 1.1.3. + +We currently use the 'nature' theme and a Beaker inspired structure. + +To rebuild documents into html: + + [/lld/docs]> make html + diff --git a/docs/Readers.rst b/docs/Readers.rst new file mode 100644 index 000000000000..e00406b8c4ce --- /dev/null +++ b/docs/Readers.rst @@ -0,0 +1,172 @@ +.. _Readers: + +Developing lld Readers +====================== + +Introduction +------------ + +The purpose of a "Reader" is to take an object file in a particular format +and create an `lld::File`:cpp:class: (which is a graph of Atoms) +representing the object file. A Reader inherits from +`lld::Reader`:cpp:class: which lives in +:file:`include/lld/Core/Reader.h` and +:file:`lib/Core/Reader.cpp`. + +The Reader infrastructure for an object format ``Foo`` requires the +following pieces in order to fit into lld: + +:file:`include/lld/ReaderWriter/ReaderFoo.h` + + .. cpp:class:: ReaderOptionsFoo : public ReaderOptions + + This Options class is the only way to configure how the Reader will + parse any file into an `lld::Reader`:cpp:class: object. This class + should be declared in the `lld`:cpp:class: namespace. + + .. cpp:function:: Reader *createReaderFoo(ReaderOptionsFoo &reader) + + This factory function configures and create the Reader. This function + should be declared in the `lld`:cpp:class: namespace. + +:file:`lib/ReaderWriter/Foo/ReaderFoo.cpp` + + .. cpp:class:: ReaderFoo : public Reader + + This is the concrete Reader class which can be called to parse + object files. It should be declared in an anonymous namespace or + if there is shared code with the `lld::WriterFoo`:cpp:class: you + can make a nested namespace (e.g. `lld::foo`:cpp:class:). + +You may have noticed that :cpp:class:`ReaderFoo` is not declared in the +``.h`` file. An important design aspect of lld is that all Readers are +created *only* through an object-format-specific +:cpp:func:`createReaderFoo` factory function. The creation of the Reader is +parametrized through a :cpp:class:`ReaderOptionsFoo` class. This options +class is the one-and-only way to control how the Reader operates when +parsing an input file into an Atom graph. For instance, you may want the +Reader to only accept certain architectures. The options class can be +instantiated from command line options or be programmatically configured. + +Where to start +-------------- + +The lld project already has a skeleton of source code for Readers for +``ELF``, ``PECOFF``, ``MachO``, and lld's native Atom graph format +(both binary ``Native`` and ``YAML`` representations). If your file format +is a variant of one of those, you should modify the existing Reader to +support your variant. This is done by customizing the Options +class for the Reader and making appropriate changes to the ``.cpp`` file to +interpret those options and act accordingly. + +If your object file format is not a variant of any existing Reader, you'll need +to create a new Reader subclass with the organization described above. + +Readers are factories +--------------------- + +The linker will usually only instantiate your Reader once. That one Reader will +have its loadFile() method called many times with different input files. +To support multithreaded linking, the Reader may be parsing multiple input +files in parallel. Therefore, there should be no parsing state in you Reader +object. Any parsing state should be in ivars of your File subclass or in +some temporary object. + +The key method to implement in a reader is:: + + virtual error_code loadFile(LinkerInput &input, + std::vector<std::unique_ptr<File>> &result); + +It takes a memory buffer (which contains the contents of the object file +being read) and returns an instantiated lld::File object which is +a collection of Atoms. The result is a vector of File pointers (instead of +simple a File pointer) because some file formats allow multiple object +"files" to be encoded in one file system file. + + +Memory Ownership +---------------- + +Atoms are always owned by their File object. During core linking when Atoms +are coalesced or stripped away, core linking does not delete them. +Core linking just removes those unused Atoms from its internal list. +The destructor of a File object is responsible for deleting all Atoms it +owns, and if ownership of the MemoryBuffer was passed to it, the File +destructor needs to delete that too. + +Making Atoms +------------ + +The internal model of lld is purely Atom based. But most object files do not +have an explicit concept of Atoms, instead most have "sections". The way +to think of this is that a section is just a list of Atoms with common +attributes. + +The first step in parsing section-based object files is to cleave each +section into a list of Atoms. The technique may vary by section type. For +code sections (e.g. .text), there are usually symbols at the start of each +function. Those symbol addresses are the points at which the section is +cleaved into discrete Atoms. Some file formats (like ELF) also include the +length of each symbol in the symbol table. Otherwise, the length of each +Atom is calculated to run to the start of the next symbol or the end of the +section. + +Other sections types can be implicitly cleaved. For instance c-string literals +or unwind info (e.g. .eh_frame) can be cleaved by having the Reader look at +the content of the section. It is important to cleave sections into Atoms +to remove false dependencies. For instance the .eh_frame section often +has no symbols, but contains "pointers" to the functions for which it +has unwind info. If the .eh_frame section was not cleaved (but left as one +big Atom), there would always be a reference (from the eh_frame Atom) to +each function. So the linker would be unable to coalesce or dead stripped +away the function atoms. + +The lld Atom model also requires that a reference to an undefined symbol be +modeled as a Reference to an UndefinedAtom. So the Reader also needs to +create an UndefinedAtom for each undefined symbol in the object file. + +Once all Atoms have been created, the second step is to create References +(recall that Atoms are "nodes" and References are "edges"). Most References +are created by looking at the "relocation records" in the object file. If +a function contains a call to "malloc", there is usually a relocation record +specifying the address in the section and the symbol table index. Your +Reader will need to convert the address to an Atom and offset and the symbol +table index into a target Atom. If "malloc" is not defined in the object file, +the target Atom of the Reference will be an UndefinedAtom. + + +Performance +----------- +Once you have the above working to parse an object file into Atoms and +References, you'll want to look at performance. Some techniques that can +help performance are: + +* Use llvm::BumpPtrAllocator or pre-allocate one big vector<Reference> and then + just have each atom point to its subrange of References in that vector. + This can be faster that allocating each Reference as separate object. +* Pre-scan the symbol table and determine how many atoms are in each section + then allocate space for all the Atom objects at once. +* Don't copy symbol names or section content to each Atom, instead use + StringRef and ArrayRef in each Atom to point to its name and content in the + MemoryBuffer. + + +Testing +------- + +We are still working on infrastructure to test Readers. The issue is that +you don't want to check in binary files to the test suite. And the tools +for creating your object file from assembly source may not be available on +every OS. + +We are investigating a way to use YAML to describe the section, symbols, +and content of a file. Then have some code which will write out an object +file from that YAML description. + +Once that is in place, you can write test cases that contain section/symbols +YAML and is run through the linker to produce Atom/References based YAML which +is then run through FileCheck to verify the Atoms and References are as +expected. + + + diff --git a/docs/_static/favicon.ico b/docs/_static/favicon.ico Binary files differnew file mode 100644 index 000000000000..724ad6e12dd4 --- /dev/null +++ b/docs/_static/favicon.ico diff --git a/docs/_templates/indexsidebar.html b/docs/_templates/indexsidebar.html new file mode 100644 index 000000000000..61968f22d5c5 --- /dev/null +++ b/docs/_templates/indexsidebar.html @@ -0,0 +1,4 @@ +<h3>Bugs</h3> + +<p>lld bugs should be reported at the + LLVM <a href="http://llvm.org/bugs">Bugzilla</a>.</p> diff --git a/docs/_templates/layout.html b/docs/_templates/layout.html new file mode 100644 index 000000000000..519a24bce63a --- /dev/null +++ b/docs/_templates/layout.html @@ -0,0 +1,12 @@ +{% extends "!layout.html" %} + +{% block extrahead %} +<style type="text/css"> + table.right { float: right; margin-left: 20px; } + table.right td { border: 1px solid #ccc; } +</style> +{% endblock %} + +{% block rootrellink %} + <li><a href="{{ pathto('index') }}">lld Home</a> | </li> +{% endblock %} diff --git a/docs/conf.py b/docs/conf.py new file mode 100644 index 000000000000..99866e1bd1e1 --- /dev/null +++ b/docs/conf.py @@ -0,0 +1,254 @@ +# -*- coding: utf-8 -*- +# +# lld documentation build configuration file. +# +# This file is execfile()d with the current directory set to its containing dir. +# +# Note that not all possible configuration values are present in this +# autogenerated file. +# +# All configuration values have a default; values that are commented out +# serve to show the default. + +import sys, os + +# If extensions (or modules to document with autodoc) are in another directory, +# add these directories to sys.path here. If the directory is relative to the +# documentation root, use os.path.abspath to make it absolute, like shown here. +#sys.path.insert(0, os.path.abspath('.')) + +# -- General configuration ----------------------------------------------------- + +# If your documentation needs a minimal Sphinx version, state it here. +#needs_sphinx = '1.0' + +# Add any Sphinx extension module names here, as strings. They can be extensions +# coming with Sphinx (named 'sphinx.ext.*') or your custom ones. +extensions = ['sphinx.ext.intersphinx', 'sphinx.ext.todo'] + +# Add any paths that contain templates here, relative to this directory. +templates_path = ['_templates'] + +# The suffix of source filenames. +source_suffix = '.rst' + +# The encoding of source files. +#source_encoding = 'utf-8-sig' + +# The master toctree document. +master_doc = 'index' + +# General information about the project. +project = u'lld' +copyright = u'2011-2014, LLVM Project' + +# The version info for the project you're documenting, acts as replacement for +# |version| and |release|, also used in various other places throughout the +# built documents. +# +# The short X.Y version. +version = '3.2' +# The full version, including alpha/beta/rc tags. +release = '3.2' + +# The language for content autogenerated by Sphinx. Refer to documentation +# for a list of supported languages. +#language = None + +# There are two options for replacing |today|: either, you set today to some +# non-false value, then it is used: +#today = '' +# Else, today_fmt is used as the format for a strftime call. +today_fmt = '%Y-%m-%d' + +# List of patterns, relative to source directory, that match files and +# directories to ignore when looking for source files. +exclude_patterns = ['_build'] + +# The reST default role (used for this markup: `text`) to use for all documents. +#default_role = None + +# If true, '()' will be appended to :func: etc. cross-reference text. +#add_function_parentheses = True + +# If true, the current module name will be prepended to all description +# unit titles (such as .. function::). +#add_module_names = True + +# If true, sectionauthor and moduleauthor directives will be shown in the +# output. They are ignored by default. +show_authors = True + +# The name of the Pygments (syntax highlighting) style to use. +pygments_style = 'friendly' + +# A list of ignored prefixes for module index sorting. +#modindex_common_prefix = [] + + +# -- Options for HTML output --------------------------------------------------- + +# The theme to use for HTML and HTML Help pages. See the documentation for +# a list of builtin themes. +html_theme = 'llvm-theme' + +# Theme options are theme-specific and customize the look and feel of a theme +# further. For a list of options available for each theme, see the +# documentation. +#html_theme_options = {} + +# Add any paths that contain custom themes here, relative to this directory. +html_theme_path = ["."] + +# The name for this set of Sphinx documents. If None, it defaults to +# "<project> v<release> documentation". +#html_title = None + +# A shorter title for the navigation bar. Default is the same as html_title. +#html_short_title = None + +# The name of an image file (relative to this directory) to place at the top +# of the sidebar. +#html_logo = None + +# If given, this must be the name of an image file (path relative to the +# configuration directory) that is the favicon of the docs. Modern browsers use +# this as icon for tabs, windows and bookmarks. It should be a Windows-style +# icon file (.ico), which is 16x16 or 32x32 pixels large. Default: None. The +# image file will be copied to the _static directory of the output HTML, but +# only if the file does not already exist there. +html_favicon = '_static/favicon.ico' + +# Add any paths that contain custom static files (such as style sheets) here, +# relative to this directory. They are copied after the builtin static files, +# so a file named "default.css" will overwrite the builtin "default.css". +html_static_path = ['_static'] + +# If not '', a 'Last updated on:' timestamp is inserted at every page bottom, +# using the given strftime format. +html_last_updated_fmt = '%Y-%m-%d' + +# If true, SmartyPants will be used to convert quotes and dashes to +# typographically correct entities. +#html_use_smartypants = True + +# Custom sidebar templates, maps document names to template names. +html_sidebars = {'index': 'indexsidebar.html'} + +# Additional templates that should be rendered to pages, maps page names to +# template names. +# html_additional_pages = {'index': 'index.html'} + +# If false, no module index is generated. +#html_domain_indices = True + +# If false, no index is generated. +#html_use_index = True + +# If true, the index is split into individual pages for each letter. +#html_split_index = False + +# If true, links to the reST sources are added to the pages. +html_show_sourcelink = True + +# If true, "Created using Sphinx" is shown in the HTML footer. Default is True. +#html_show_sphinx = True + +# If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. +#html_show_copyright = True + +# If true, an OpenSearch description file will be output, and all pages will +# contain a <link> tag referring to it. The value of this option must be the +# base URL from which the finished HTML is served. +#html_use_opensearch = '' + +# This is the file name suffix for HTML files (e.g. ".xhtml"). +#html_file_suffix = None + +# Output file base name for HTML help builder. +htmlhelp_basename = 'llddoc' + + +# -- Options for LaTeX output -------------------------------------------------- + +latex_elements = { +# The paper size ('letterpaper' or 'a4paper'). +#'papersize': 'letterpaper', + +# The font size ('10pt', '11pt' or '12pt'). +#'pointsize': '10pt', + +# Additional stuff for the LaTeX preamble. +#'preamble': '', +} + +# Grouping the document tree into LaTeX files. List of tuples +# (source start file, target name, title, author, documentclass [howto/manual]). +latex_documents = [ + ('contents', 'lld.tex', u'lld Documentation', + u'LLVM project', 'manual'), +] + +# The name of an image file (relative to this directory) to place at the top of +# the title page. +#latex_logo = None + +# For "manual" documents, if this is true, then toplevel headings are parts, +# not chapters. +#latex_use_parts = False + +# If true, show page references after internal links. +#latex_show_pagerefs = False + +# If true, show URL addresses after external links. +#latex_show_urls = False + +# Documents to append as an appendix to all manuals. +#latex_appendices = [] + +# If false, no module index is generated. +#latex_domain_indices = True + + +# -- Options for manual page output -------------------------------------------- + +# One entry per manual page. List of tuples +# (source start file, name, description, authors, manual section). +man_pages = [ + ('contents', 'lld', u'lld Documentation', + [u'LLVM project'], 1) +] + +# If true, show URL addresses after external links. +#man_show_urls = False + + +# -- Options for Texinfo output ------------------------------------------------ + +# Grouping the document tree into Texinfo files. List of tuples +# (source start file, target name, title, author, +# dir menu entry, description, category) +texinfo_documents = [ + ('contents', 'lld', u'lld Documentation', + u'LLVM project', 'lld', 'One line description of project.', + 'Miscellaneous'), +] + +# Documents to append as an appendix to all manuals. +#texinfo_appendices = [] + +# If false, no module index is generated. +#texinfo_domain_indices = True + +# How to display URL addresses: 'footnote', 'no', or 'inline'. +#texinfo_show_urls = 'footnote' + + +# FIXME: Define intersphinx configration. +intersphinx_mapping = {} + + +# -- Options for extensions ---------------------------------------------------- + +# Enable this if you want TODOs to show up in the generated documentation. +todo_include_todos = True diff --git a/docs/design.rst b/docs/design.rst new file mode 100644 index 000000000000..06d356527f58 --- /dev/null +++ b/docs/design.rst @@ -0,0 +1,500 @@ +.. _design: + +Linker Design +============= + +Introduction +------------ + +lld is a new generation of linker. It is not "section" based like traditional +linkers which mostly just interlace sections from multiple object files into the +output file. Instead, lld is based on "Atoms". Traditional section based +linking work well for simple linking, but their model makes advanced linking +features difficult to implement. Features like dead code stripping, reordering +functions for locality, and C++ coalescing require the linker to work at a finer +grain. + +An atom is an indivisible chunk of code or data. An atom has a set of +attributes, such as: name, scope, content-type, alignment, etc. An atom also +has a list of References. A Reference contains: a kind, an optional offset, an +optional addend, and an optional target atom. + +The Atom model allows the linker to use standard graph theory models for linking +data structures. Each atom is a node, and each Reference is an edge. The +feature of dead code stripping is implemented by following edges to mark all +live atoms, and then delete the non-live atoms. + + +Atom Model +---------- + +An atom is an indivisible chunk of code or data. Typically each user written +function or global variable is an atom. In addition, the compiler may emit +other atoms, such as for literal c-strings or floating point constants, or for +runtime data structures like dwarf unwind info or pointers to initializers. + +A simple "hello world" object file would be modeled like this: + +.. image:: hello.png + +There are three atoms: main, a proxy for printf, and an anonymous atom +containing the c-string literal "hello world". The Atom "main" has two +references. One is the call site for the call to printf, and the other is a +reference for the instruction that loads the address of the c-string literal. + +There are only four different types of atoms: + + * DefinedAtom + 95% of all atoms. This is a chunk of code or data + + * UndefinedAtom + This is a place holder in object files for a reference to some atom + outside the translation unit.During core linking it is usually replaced + by (coalesced into) another Atom. + + * SharedLibraryAtom + If a required symbol name turns out to be defined in a dynamic shared + library (and not some object file). A SharedLibraryAtom is the + placeholder Atom used to represent that fact. + + It is similar to an UndefinedAtom, but it also tracks information + about the associated shared library. + + * AbsoluteAtom + This is for embedded support where some stuff is implemented in ROM at + some fixed address. This atom has no content. It is just an address + that the Writer needs to fix up any references to point to. + + +File Model +---------- + +The linker views the input files as basically containers of Atoms and +References, and just a few attributes of their own. The linker works with three +kinds of files: object files, static libraries, and dynamic shared libraries. +Each kind of file has reader object which presents the file in the model +expected by the linker. + +Object File +~~~~~~~~~~~ + +An object file is just a container of atoms. When linking an object file, a +reader is instantiated which parses the object file and instantiates a set of +atoms representing all content in the .o file. The linker adds all those atoms +to a master graph. + +Static Library (Archive) +~~~~~~~~~~~~~~~~~~~~~~~~ + +This is the traditional unix static archive which is just a collection of object +files with a "table of contents". When linking with a static library, by default +nothing is added to the master graph of atoms. Instead, if after merging all +atoms from object files into a master graph, if any "undefined" atoms are left +remaining in the master graph, the linker reads the table of contents for each +static library to see if any have the needed definitions. If so, the set of +atoms from the specified object file in the static library is added to the +master graph of atoms. + +Dynamic Library (Shared Object) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Dynamic libraries are different than object files and static libraries in that +they don't directly add any content. Their purpose is to check at build time +that the remaining undefined references can be resolved at runtime, and provide +a list of dynamic libraries (SO_NEEDED) that will be needed at runtime. The way +this is modeled in the linker is that a dynamic library contributes no atoms to +the initial graph of atoms. Instead, (like static libraries) if there are +"undefined" atoms in the master graph of all atoms, then each dynamic library is +checked to see if exports the required symbol. If so, a "shared library" atom is +instantiated by the by the reader which the linker uses to replace the +"undefined" atom. + +Linking Steps +------------- + +Through the use of abstract Atoms, the core of linking is architecture +independent and file format independent. All command line parsing is factored +out into a separate "options" abstraction which enables the linker to be driven +with different command line sets. + +The overall steps in linking are: + + #. Command line processing + + #. Parsing input files + + #. Resolving + + #. Passes/Optimizations + + #. Generate output file + +The Resolving and Passes steps are done purely on the master graph of atoms, so +they have no notion of file formats such as mach-o or ELF. + + +Input Files +~~~~~~~~~~~ + +Existing developer tools using different file formats for object files. +A goal of lld is to be file format independent. This is done +through a plug-in model for reading object files. The lld::Reader is the base +class for all object file readers. A Reader follows the factory method pattern. +A Reader instantiates an lld::File object (which is a graph of Atoms) from a +given object file (on disk or in-memory). + +Every Reader subclass defines its own "options" class (for instance the mach-o +Reader defines the class ReaderOptionsMachO). This options class is the +one-and-only way to control how the Reader operates when parsing an input file +into an Atom graph. For instance, you may want the Reader to only accept +certain architectures. The options class can be instantiated from command +line options, or it can be subclassed and the ivars programmatically set. + +ELF Section Groups +~~~~~~~~~~~~~~~~~~ +Reference : `ELF Section Groups <http://mentorembedded.github.io/cxx-abi/abi/prop-72-comdat.html>`_ + +C++ has many situations where the compiler may need to emit code or data, +but may not be able to identify a unique compilation unit where it should be +emitted. The approach chosen by the C++ ABI group to deal with this problem, is +to allow the compiler to emit the required information in multiple compilation +units, in a form which allows the linker to remove all but one copy. This is +essentially the feature called COMDAT in several existing implementations. + +The COMDAT sections in ELF are modeled by using '.group' sections in the input +files. Each '.group' section is associated with a signature. The '.group' +section has a list of members that are part of the the '.group' which the linker +selects to appear in the input file(Whichever .group section appeared first +in the link). References to any of the '.group' members can also appear from +outside the '.group'. + +In lld the the '.group' sections with COMDAT are identified by contentType( +typeGroupComdat). The '.group' members are identified by using +**kindGroupChild** references. + +The point to be noted here is the 'group child' members would need to be emitted +in the output file **iff** the group was selected by the resolver. + +This is modeled in lld by removing the 'group child' members from the +definedAtom List. + +Any reference to the group-child from **outside the group** is referenced using +a 'undefined' atom. + +Resolving +~~~~~~~~~ + +The resolving step takes all the atoms' graphs from each object file and +combines them into one master object graph. Unfortunately, it is not as simple +as appending the atom list from each file into one big list. There are many +cases where atoms need to be coalesced. That is, two or more atoms need to be +coalesced into one atom. This is necessary to support: C language "tentative +definitions", C++ weak symbols for templates and inlines defined in headers, +replacing undefined atoms with actual definition atoms, and for merging copies +of constants like c-strings and floating point constants. + +The linker support coalescing by-name and by-content. By-name is used for +tentative definitions and weak symbols. By-content is used for constant data +that can be merged. + +The resolving process maintains some global linking "state", including a "symbol +table" which is a map from llvm::StringRef to lld::Atom*. With these data +structures, the linker iterates all atoms in all input files. For each atom, it +checks if the atom is named and has a global or hidden scope. If so, the atom +is added to the symbol table map. If there already is a matching atom in that +table, that means the current atom needs to be coalesced with the found atom, or +it is a multiple definition error. + +When all initial input file atoms have been processed by the resolver, a scan is +made to see if there are any undefined atoms in the graph. If there are, the +linker scans all libraries (both static and dynamic) looking for definitions to +replace the undefined atoms. It is an error if any undefined atoms are left +remaining. + +Dead code stripping (if requested) is done at the end of resolving. The linker +does a simple mark-and-sweep. It starts with "root" atoms (like "main" in a main +executable) and follows each references and marks each Atom that it visits as +"live". When done, all atoms not marked "live" are removed. + +The result of the Resolving phase is the creation of an lld::File object. The +goal is that the lld::File model is **the** internal representation +throughout the linker. The file readers parse (mach-o, ELF, COFF) into an +lld::File. The file writers (mach-o, ELF, COFF) taken an lld::File and produce +their file kind, and every Pass only operates on an lld::File. This is not only +a simpler, consistent model, but it enables the state of the linker to be dumped +at any point in the link for testing purposes. + + +Passes +~~~~~~ + +The Passes step is an open ended set of routines that each get a change to +modify or enhance the current lld::File object. Some example Passes are: + + * stub (PLT) generation + + * GOT instantiation + + * order_file optimization + + * branch island generation + + * branch shim generation + + * Objective-C optimizations (Darwin specific) + + * TLV instantiation (Darwin specific) + + * DTrace probe processing (Darwin specific) + + * compact unwind encoding (Darwin specific) + + +Some of these passes are specific to Darwin's runtime environments. But many of +the passes are applicable to any OS (such as generating branch island for out of +range branch instructions). + +The general structure of a pass is to iterate through the atoms in the current +lld::File object, inspecting each atom and doing something. For instance, the +stub pass, looks for call sites to shared library atoms (e.g. call to printf). +It then instantiates a "stub" atom (PLT entry) and a "lazy pointer" atom for +each proxy atom needed, and these new atoms are added to the current lld::File +object. Next, all the noted call sites to shared library atoms have their +References altered to point to the stub atom instead of the shared library atom. + + +Generate Output File +~~~~~~~~~~~~~~~~~~~~ + +Once the passes are done, the output file writer is given current lld::File +object. The writer's job is to create the executable content file wrapper and +place the content of the atoms into it. + +lld uses a plug-in model for writing output files. All concrete writers (e.g. +ELF, mach-o, etc) are subclasses of the lld::Writer class. + +Unlike the Reader class which has just one method to instantiate an lld::File, +the Writer class has multiple methods. The crucial method is to generate the +output file, but there are also methods which allow the Writer to contribute +Atoms to the resolver and specify passes to run. + +An example of contributing +atoms is that if the Writer knows a main executable is being linked and such +an executable requires a specially named entry point (e.g. "_main"), the Writer +can add an UndefinedAtom with that special name to the resolver. This will +cause the resolver to issue an error if that symbol is not defined. + +Sometimes a Writer supports lazily created symbols, such as names for the start +of sections. To support this, the Writer can create a File object which vends +no initial atoms, but does lazily supply atoms by name as needed. + +Every Writer subclass defines its own "options" class (for instance the mach-o +Writer defines the class WriterOptionsMachO). This options class is the +one-and-only way to control how the Writer operates when producing an output +file from an Atom graph. For instance, you may want the Writer to optimize +the output for certain OS versions, or strip local symbols, etc. The options +class can be instantiated from command line options, or it can be subclassed +and the ivars programmatically set. + + +lld::File representations +------------------------- + +Just as LLVM has three representations of its IR model, lld has three +representations of its File/Atom/Reference model: + + * In memory, abstract C++ classes (lld::Atom, lld::Reference, and lld::File). + + * textual (in YAML) + + * binary format ("native") + +Binary File Format +~~~~~~~~~~~~~~~~~~ + +In theory, lld::File objects could be written to disk in an existing Object File +format standard (e.g. ELF). Instead we choose to define a new binary file +format. There are two main reasons for this: fidelity and performance. In order +for lld to work as a linker on all platforms, its internal model must be rich +enough to model all CPU and OS linking features. But if we choose an existing +Object File format as the lld binary format, that means an on going need to +retrofit each platform specific feature needed from alternate platforms into the +existing Object File format. Having our own "native" binary format side steps +that issue. We still need to be able to binary encode all the features, but +once the in-memory model can represent the feature, it is straight forward to +binary encode it. + +The reason to use a binary file format at all, instead of a textual file format, +is speed. You want the binary format to be as fast as possible to read into the +in-memory model. Given that we control the in-memory model and the binary +format, the obvious way to make reading super fast it to make the file format be +basically just an array of atoms. The reader just mmaps in the file and looks +at the header to see how many atoms there are and instantiate that many atom +objects with the atom attribute information coming from that array. The trick +is designing this in a way that can be extended as the Atom mode evolves and new +attributes are added. + +The native object file format starts with a header that lists how many "chunks" +are in the file. A chunk is an array of "ivar data". The native file reader +instantiates an array of Atom objects (with one large malloc call). Each atom +contains just a pointer to its vtable and a pointer to its ivar data. All +methods on lld::Atom are virtual, so all the method implementations return +values based on the ivar data to which it has a pointer. If a new linking +features is added which requires a change to the lld::Atom model, a new native +reader class (e.g. version 2) is defined which knows how to read the new feature +information from the new ivar data. The old reader class (e.g. version 1) is +updated to do its best to model (the lack of the new feature) given the old ivar +data in existing native object files. + +With this model for the native file format, files can be read and turned +into the in-memory graph of lld::Atoms with just a few memory allocations. +And the format can easily adapt over time to new features. + +The binary file format follows the ReaderWriter patterns used in lld. The lld +library comes with the classes: ReaderNative and WriterNative. So, switching +between file formats is as easy as switching which Reader subclass is used. + + +Textual representations in YAML +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +In designing a textual format we want something easy for humans to read and easy +for the linker to parse. Since an atom has lots of attributes most of which are +usually just the default, we should define default values for every attribute so +that those can be omitted from the text representation. Here is the atoms for a +simple hello world program expressed in YAML:: + + target-triple: x86_64-apple-darwin11 + + atoms: + - name: _main + scope: global + type: code + content: [ 55, 48, 89, e5, 48, 8d, 3d, 00, 00, 00, 00, 30, c0, e8, 00, 00, + 00, 00, 31, c0, 5d, c3 ] + fixups: + - offset: 07 + kind: pcrel32 + target: 2 + - offset: 0E + kind: call32 + target: _fprintf + + - type: c-string + content: [ 73, 5A, 00 ] + + ... + +The biggest use for the textual format will be writing test cases. Writing test +cases in C is problematic because the compiler may vary its output over time for +its own optimization reasons which my inadvertently disable or break the linker +feature trying to be tested. By writing test cases in the linkers own textual +format, we can exactly specify every attribute of every atom and thus target +specific linker logic. + +The textual/YAML format follows the ReaderWriter patterns used in lld. The lld +library comes with the classes: ReaderYAML and WriterYAML. + + +Testing +------- + +The lld project contains a test suite which is being built up as new code is +added to lld. All new lld functionality should have a tests added to the test +suite. The test suite is `lit <http://llvm.org/cmds/lit.html/>`_ driven. Each +test is a text file with comments telling lit how to run the test and check the +result To facilitate testing, the lld project builds a tool called lld-core. +This tool reads a YAML file (default from stdin), parses it into one or more +lld::File objects in memory and then feeds those lld::File objects to the +resolver phase. The output of the resolver is written as a native object file. +It is then read back in using the native object file reader and then pass to the +YAML writer. This round-about path means that all three representations +(in-memory, binary, and text) are exercised, and any new feature has to work in +all the representations to pass the test. + + +Resolver testing +~~~~~~~~~~~~~~~~ + +Basic testing is the "core linking" or resolving phase. That is where the +linker merges object files. All test cases are written in YAML. One feature of +YAML is that it allows multiple "documents" to be encoding in one YAML stream. +That means one text file can appear to the linker as multiple .o files - the +normal case for the linker. + +Here is a simple example of a core linking test case. It checks that an +undefined atom from one file will be replaced by a definition from another +file:: + + # RUN: lld-core %s | FileCheck %s + + # + # Test that undefined atoms are replaced with defined atoms. + # + + --- + atoms: + - name: foo + definition: undefined + --- + atoms: + - name: foo + scope: global + type: code + ... + + # CHECK: name: foo + # CHECK: scope: global + # CHECK: type: code + # CHECK-NOT: name: foo + # CHECK: ... + + +Passes testing +~~~~~~~~~~~~~~ + +Since Passes just operate on an lld::File object, the lld-core tool has the +option to run a particular pass (after resolving). Thus, you can write a YAML +test case with carefully crafted input to exercise areas of a Pass and the check +the resulting lld::File object as represented in YAML. + + +Design Issues +------------- + +There are a number of open issues in the design of lld. The plan is to wait and +make these design decisions when we need to. + + +Debug Info +~~~~~~~~~~ + +Currently, the lld model says nothing about debug info. But the most popular +debug format is DWARF and there is some impedance mismatch with the lld model +and DWARF. In lld there are just Atoms and only Atoms that need to be in a +special section at runtime have an associated section. Also, Atoms do not have +addresses. The way DWARF is spec'ed different parts of DWARF are supposed to go +into specially named sections and the DWARF references function code by address. + +CPU and OS specific functionality +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Currently, lld has an abstract "Platform" that deals with any CPU or OS specific +differences in linking. We just keep adding virtual methods to the base +Platform class as we find linking areas that might need customization. At some +point we'll need to structure this better. + + +File Attributes +~~~~~~~~~~~~~~~ + +Currently, lld::File just has a path and a way to iterate its atoms. We will +need to add more attributes on a File. For example, some equivalent to the +target triple. There is also a number of cached or computed attributes that +could make various Passes more efficient. For instance, on Darwin there are a +number of Objective-C optimizations that can be done by a Pass. But it would +improve the plain C case if the Objective-C optimization Pass did not have to +scan all atoms looking for any Objective-C data structures. This could be done +if the lld::File object had an attribute that said if the file had any +Objective-C data in it. The Resolving phase would then be required to "merge" +that attribute as object files are added. diff --git a/docs/development.rst b/docs/development.rst new file mode 100644 index 000000000000..918e1778b801 --- /dev/null +++ b/docs/development.rst @@ -0,0 +1,48 @@ +.. _development: + +Development +=========== + +lld is developed as part of the `LLVM <http://llvm.org>`_ project. + +Using C++11 in lld +------------------ + +:doc:`C++11`. + +Creating a Reader +----------------- + +See the :ref:`Creating a Reader <Readers>` guide. + + +Modifying the Driver +-------------------- + +See :doc:`Driver`. + + +Debugging +--------- + +You can run lld with ``-mllvm -debug`` command line options to enable debugging +printouts. If you want to enable debug information for some specific pass, you +can run it with ``-mllvm '-debug-only=<pass>'``, where pass is a name used in +the ``DEBUG_WITH_TYPE()`` macro. + + + +Documentation +------------- + +The project documentation is written in reStructuredText and generated using the +`Sphinx <http://sphinx.pocoo.org/>`_ documentation generator. For more +information on writing documentation for the project, see the +:ref:`sphinx_intro`. + +.. toctree:: + :hidden: + + C++11 + Readers + Driver diff --git a/docs/getting_started.rst b/docs/getting_started.rst new file mode 100644 index 000000000000..986a406c1cb7 --- /dev/null +++ b/docs/getting_started.rst @@ -0,0 +1,106 @@ +.. _getting_started: + +Getting Started: Building and Running lld +========================================= + +This page gives you the shortest path to checking out and building lld. If you +run into problems, please file bugs in the `LLVM Bugzilla`__ + +__ http://llvm.org/bugs/ + +Building lld +------------ + +On Unix-like Systems +~~~~~~~~~~~~~~~~~~~~ + +1. Get the required tools. + + * `CMake 2.8`_\+. + * make (or any build system CMake supports). + * `Clang 3.1`_\+ or GCC 4.7+ (C++11 support is required). + + * If using Clang, you will also need `libc++`_. + * `Python 2.4`_\+ (not 3.x) for running tests. + +.. _CMake 2.8: http://www.cmake.org/cmake/resources/software.html +.. _Clang 3.1: http://clang.llvm.org/ +.. _libc++: http://libcxx.llvm.org/ +.. _Python 2.4: http://python.org/download/ + +2. Check out LLVM:: + + $ cd path/to/llvm-project + $ svn co http://llvm.org/svn/llvm-project/llvm/trunk llvm + +3. Check out lld:: + + $ cd llvm/tools + $ svn co http://llvm.org/svn/llvm-project/lld/trunk lld + + * lld can also be checked out to ``path/to/llvm-project`` and built as an external + project. + +4. Build LLVM and lld:: + + $ cd path/to/llvm-build/llvm (out of source build required) + $ cmake -G "Unix Makefiles" path/to/llvm-project/llvm + $ make + + * If you want to build with clang and it is not the default compiler or + it is installed in an alternate location, you'll need to tell the cmake tool + the location of the C and C++ compiler via CMAKE_C_COMPILER and + CMAKE_CXX_COMPILER. For example:: + + $ cmake -DCMAKE_CXX_COMPILER=/path/to/clang++ -DCMAKE_C_COMPILER=/path/to/clang ... + +5. Test:: + + $ make lld-test + +Using Visual Studio +~~~~~~~~~~~~~~~~~~~ + +#. Get the required tools. + + * `CMake 2.8`_\+. + * `Visual Studio 11 (2012) or later`_ (required for C++11 support) + * `Python 2.4`_\+ (not 3.x) for running tests. + +.. _CMake 2.8: http://www.cmake.org/cmake/resources/software.html +.. _Visual Studio 11 (2012) or later: http://www.microsoft.com/visualstudio/11/en-us +.. _Python 2.4: http://python.org/download/ + +#. Check out LLVM:: + + $ cd path/to/llvm-project + $ svn co http://llvm.org/svn/llvm-project/llvm/trunk llvm + +#. Check out lld:: + + $ cd llvm/tools + $ svn co http://llvm.org/svn/llvm-project/lld/trunk lld + + * lld can also be checked out to ``path/to/llvm-project`` and built as an external + project. + +#. Generate Visual Studio project files:: + + $ cd path/to/llvm-build/llvm (out of source build required) + $ cmake -G "Visual Studio 11" path/to/llvm-project/llvm + +#. Build + + * Open LLVM.sln in Visual Studio. + * Build the ``ALL_BUILD`` target. + +#. Test + + * Build the ``lld-test`` target. + +More Information +~~~~~~~~~~~~~~~~ + +For more information on using CMake see the `LLVM CMake guide`_. + +.. _LLVM CMake guide: http://llvm.org/docs/CMake.html diff --git a/docs/hello.png b/docs/hello.png Binary files differnew file mode 100644 index 000000000000..70df111f1abd --- /dev/null +++ b/docs/hello.png diff --git a/docs/index.rst b/docs/index.rst new file mode 100644 index 000000000000..7a87ad8d0583 --- /dev/null +++ b/docs/index.rst @@ -0,0 +1,88 @@ +.. _index: + +lld - The LLVM Linker +===================== + +lld is a new set of modular code for creating linker tools. + +* End-User Features: + + * Compatible with existing linker options + * Reads standard Object Files (e.g. ELF, Mach-O, PE/COFF) + * Writes standard Executable Files (e.g. ELF, Mach-O, PE) + * Fast link times + * Minimal memory use + * Remove clang's reliance on "the system linker" + * Uses the LLVM `"UIUC" BSD-Style license`__. + +* Applications: + + * Modular design + * Support cross linking + * Easy to add new CPU support + * Can be built as static tool or library + +* Design and Implementation: + + * Extensive unit tests + * Internal linker model can be dumped/read to textual format + * Internal linker model can be dumped/read to a new native format + * Native format designed to be fast to read and write + * Additional linking features can be plugged in as "passes" + * OS specific and CPU specific code factored out + +Why a new linker? +----------------- + +The fact that clang relies on whatever linker tool you happen to have installed +means that clang has been very conservative adopting features which require a +recent linker. + +In the same way that the MC layer of LLVM has removed clang's reliance on the +system assembler tool, the lld project will remove clang's reliance on the +system linker tool. + + +Current Status +-------------- + +lld can self host on x86-64 FreeBSD and Linux and x86 Windows. + +All SingleSource tests in test-suite pass on x86-64 Linux. + +All SingleSource and MultiSource tests in the LLVM test-suite +pass on MIPS 32-bit little-endian Linux. + +Source +------ + +lld is available in the LLVM SVN repository:: + + svn co http://llvm.org/svn/llvm-project/lld/trunk lld + +lld is also available via the read-only git mirror:: + + git clone http://llvm.org/git/lld.git + +Put it in llvm's tools/ directory, rerun cmake, then build target lld. + +Contents +-------- + +.. toctree:: + :maxdepth: 2 + + design + getting_started + development + windows_support + open_projects + sphinx_intro + +Indices and tables +------------------ + +* :ref:`genindex` +* :ref:`search` + +__ http://llvm.org/docs/DeveloperPolicy.html#license diff --git a/docs/llvm-theme/layout.html b/docs/llvm-theme/layout.html new file mode 100644 index 000000000000..0cd0918eac2a --- /dev/null +++ b/docs/llvm-theme/layout.html @@ -0,0 +1,22 @@ +{# + sphinxdoc/layout.html + ~~~~~~~~~~~~~~~~~~~~~ + + Sphinx layout template for the sphinxdoc theme. + + :copyright: Copyright 2007-2010 by the Sphinx team, see AUTHORS. + :license: BSD, see LICENSE for details. +#} +{% extends "basic/layout.html" %} + +{% block relbar1 %} +<div class="logo"> +<a href="{{ pathto('index') }}"><img src="{{ +pathto("_static/logo.png", 1) }}" alt="LLVM Documentation"/></a> +</div> +{{ super() }} +{% endblock %} + +{# put the sidebar before the body #} +{% block sidebar1 %}{{ sidebar() }}{% endblock %} +{% block sidebar2 %}{% endblock %} diff --git a/docs/llvm-theme/static/contents.png b/docs/llvm-theme/static/contents.png Binary files differnew file mode 100644 index 000000000000..7fb82154a174 --- /dev/null +++ b/docs/llvm-theme/static/contents.png diff --git a/docs/llvm-theme/static/llvm.css b/docs/llvm-theme/static/llvm.css new file mode 100644 index 000000000000..32802bb6a2d0 --- /dev/null +++ b/docs/llvm-theme/static/llvm.css @@ -0,0 +1,345 @@ +/* + * sphinxdoc.css_t + * ~~~~~~~~~~~~~~~ + * + * Sphinx stylesheet -- sphinxdoc theme. Originally created by + * Armin Ronacher for Werkzeug. + * + * :copyright: Copyright 2007-2010 by the Sphinx team, see AUTHORS. + * :license: BSD, see LICENSE for details. + * + */ + +@import url("basic.css"); + +/* -- page layout ----------------------------------------------------------- */ + +body { + font-family: 'Lucida Grande', 'Lucida Sans Unicode', 'Geneva', + 'Verdana', sans-serif; + font-size: 14px; + letter-spacing: -0.01em; + line-height: 150%; + text-align: center; + background-color: #BFD1D4; + color: black; + padding: 0; + border: 1px solid #aaa; + + margin: 0px 80px 0px 80px; + min-width: 740px; +} + +div.logo { + background-color: white; + text-align: left; + padding: 10px 10px 15px 15px; +} + +div.document { + background-color: white; + text-align: left; + background-image: url(contents.png); + background-repeat: repeat-x; +} + +div.bodywrapper { + margin: 0 240px 0 0; + border-right: 1px solid #ccc; +} + +div.body { + margin: 0; + padding: 0.5em 20px 20px 20px; +} + +div.related { + font-size: 1em; +} + +div.related ul { + background-image: url(navigation.png); + height: 2em; + border-top: 1px solid #ddd; + border-bottom: 1px solid #ddd; +} + +div.related ul li { + margin: 0; + padding: 0; + height: 2em; + float: left; +} + +div.related ul li.right { + float: right; + margin-right: 5px; +} + +div.related ul li a { + margin: 0; + padding: 0 5px 0 5px; + line-height: 1.75em; + color: #EE9816; +} + +div.related ul li a:hover { + color: #3CA8E7; +} + +div.sphinxsidebarwrapper { + padding: 0; +} + +div.sphinxsidebar { + margin: 0; + padding: 0.5em 15px 15px 0; + width: 210px; + float: right; + font-size: 1em; + text-align: left; +} + +div.sphinxsidebar h3, div.sphinxsidebar h4 { + margin: 1em 0 0.5em 0; + font-size: 1em; + padding: 0.1em 0 0.1em 0.5em; + color: white; + border: 1px solid #86989B; + background-color: #AFC1C4; +} + +div.sphinxsidebar h3 a { + color: white; +} + +div.sphinxsidebar ul { + padding-left: 1.5em; + margin-top: 7px; + padding: 0; + line-height: 130%; +} + +div.sphinxsidebar ul ul { + margin-left: 20px; +} + +div.footer { + background-color: #E3EFF1; + color: #86989B; + padding: 3px 8px 3px 0; + clear: both; + font-size: 0.8em; + text-align: right; +} + +div.footer a { + color: #86989B; + text-decoration: underline; +} + +/* -- body styles ----------------------------------------------------------- */ + +p { + margin: 0.8em 0 0.5em 0; +} + +a { + color: #CA7900; + text-decoration: none; +} + +a:hover { + color: #2491CF; +} + +div.body a { + text-decoration: underline; +} + +h1 { + margin: 0; + padding: 0.7em 0 0.3em 0; + font-size: 1.5em; + color: #11557C; +} + +h2 { + margin: 1.3em 0 0.2em 0; + font-size: 1.35em; + padding: 0; +} + +h3 { + margin: 1em 0 -0.3em 0; + font-size: 1.2em; +} + +div.body h1 a, div.body h2 a, div.body h3 a, div.body h4 a, div.body h5 a, div.body h6 a { + color: black!important; +} + +h1 a.anchor, h2 a.anchor, h3 a.anchor, h4 a.anchor, h5 a.anchor, h6 a.anchor { + display: none; + margin: 0 0 0 0.3em; + padding: 0 0.2em 0 0.2em; + color: #aaa!important; +} + +h1:hover a.anchor, h2:hover a.anchor, h3:hover a.anchor, h4:hover a.anchor, +h5:hover a.anchor, h6:hover a.anchor { + display: inline; +} + +h1 a.anchor:hover, h2 a.anchor:hover, h3 a.anchor:hover, h4 a.anchor:hover, +h5 a.anchor:hover, h6 a.anchor:hover { + color: #777; + background-color: #eee; +} + +a.headerlink { + color: #c60f0f!important; + font-size: 1em; + margin-left: 6px; + padding: 0 4px 0 4px; + text-decoration: none!important; +} + +a.headerlink:hover { + background-color: #ccc; + color: white!important; +} + +cite, code, tt { + font-family: 'Consolas', 'Deja Vu Sans Mono', + 'Bitstream Vera Sans Mono', monospace; + font-size: 0.95em; + letter-spacing: 0.01em; +} + +tt { + background-color: #f2f2f2; + border-bottom: 1px solid #ddd; + color: #333; +} + +tt.descname, tt.descclassname, tt.xref { + border: 0; +} + +hr { + border: 1px solid #abc; + margin: 2em; +} + +a tt { + border: 0; + color: #CA7900; +} + +a tt:hover { + color: #2491CF; +} + +pre { + font-family: 'Consolas', 'Deja Vu Sans Mono', + 'Bitstream Vera Sans Mono', monospace; + font-size: 0.95em; + letter-spacing: 0.015em; + line-height: 120%; + padding: 0.5em; + border: 1px solid #ccc; + background-color: #f8f8f8; +} + +pre a { + color: inherit; + text-decoration: underline; +} + +td.linenos pre { + padding: 0.5em 0; +} + +div.quotebar { + background-color: #f8f8f8; + max-width: 250px; + float: right; + padding: 2px 7px; + border: 1px solid #ccc; +} + +div.topic { + background-color: #f8f8f8; +} + +table { + border-collapse: collapse; + margin: 0 -0.5em 0 -0.5em; +} + +table td, table th { + padding: 0.2em 0.5em 0.2em 0.5em; +} + +div.admonition, div.warning { + font-size: 0.9em; + margin: 1em 0 1em 0; + border: 1px solid #86989B; + background-color: #f7f7f7; + padding: 0; +} + +div.admonition p, div.warning p { + margin: 0.5em 1em 0.5em 1em; + padding: 0; +} + +div.admonition pre, div.warning pre { + margin: 0.4em 1em 0.4em 1em; +} + +div.admonition p.admonition-title, +div.warning p.admonition-title { + margin: 0; + padding: 0.1em 0 0.1em 0.5em; + color: white; + border-bottom: 1px solid #86989B; + font-weight: bold; + background-color: #AFC1C4; +} + +div.warning { + border: 1px solid #940000; +} + +div.warning p.admonition-title { + background-color: #CF0000; + border-bottom-color: #940000; +} + +div.admonition ul, div.admonition ol, +div.warning ul, div.warning ol { + margin: 0.1em 0.5em 0.5em 3em; + padding: 0; +} + +div.versioninfo { + margin: 1em 0 0 0; + border: 1px solid #ccc; + background-color: #DDEAF0; + padding: 8px; + line-height: 1.3em; + font-size: 0.9em; +} + +.viewcode-back { + font-family: 'Lucida Grande', 'Lucida Sans Unicode', 'Geneva', + 'Verdana', sans-serif; +} + +div.viewcode-block:target { + background-color: #f4debf; + border-top: 1px solid #ac9; + border-bottom: 1px solid #ac9; +} diff --git a/docs/llvm-theme/static/logo.png b/docs/llvm-theme/static/logo.png Binary files differnew file mode 100644 index 000000000000..4fc899028dc6 --- /dev/null +++ b/docs/llvm-theme/static/logo.png diff --git a/docs/llvm-theme/static/navigation.png b/docs/llvm-theme/static/navigation.png Binary files differnew file mode 100644 index 000000000000..1081dc1439fb --- /dev/null +++ b/docs/llvm-theme/static/navigation.png diff --git a/docs/llvm-theme/theme.conf b/docs/llvm-theme/theme.conf new file mode 100644 index 000000000000..330fc92ffa18 --- /dev/null +++ b/docs/llvm-theme/theme.conf @@ -0,0 +1,4 @@ +[theme] +inherit = basic +stylesheet = llvm.css +pygments_style = friendly diff --git a/docs/make.bat b/docs/make.bat new file mode 100644 index 000000000000..8471252d709f --- /dev/null +++ b/docs/make.bat @@ -0,0 +1,190 @@ +@ECHO OFF + +REM Command file for Sphinx documentation + +if "%SPHINXBUILD%" == "" ( + set SPHINXBUILD=sphinx-build +) +set BUILDDIR=_build +set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% . +set I18NSPHINXOPTS=%SPHINXOPTS% . +if NOT "%PAPER%" == "" ( + set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS% + set I18NSPHINXOPTS=-D latex_paper_size=%PAPER% %I18NSPHINXOPTS% +) + +if "%1" == "" goto help + +if "%1" == "help" ( + :help + echo.Please use `make ^<target^>` where ^<target^> is one of + echo. html to make standalone HTML files + echo. dirhtml to make HTML files named index.html in directories + echo. singlehtml to make a single large HTML file + echo. pickle to make pickle files + echo. json to make JSON files + echo. htmlhelp to make HTML files and a HTML help project + echo. qthelp to make HTML files and a qthelp project + echo. devhelp to make HTML files and a Devhelp project + echo. epub to make an epub + echo. latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter + echo. text to make text files + echo. man to make manual pages + echo. texinfo to make Texinfo files + echo. gettext to make PO message catalogs + echo. changes to make an overview over all changed/added/deprecated items + echo. linkcheck to check all external links for integrity + echo. doctest to run all doctests embedded in the documentation if enabled + goto end +) + +if "%1" == "clean" ( + for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i + del /q /s %BUILDDIR%\* + goto end +) + +if "%1" == "html" ( + %SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. The HTML pages are in %BUILDDIR%/html. + goto end +) + +if "%1" == "dirhtml" ( + %SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml. + goto end +) + +if "%1" == "singlehtml" ( + %SPHINXBUILD% -b singlehtml %ALLSPHINXOPTS% %BUILDDIR%/singlehtml + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. The HTML pages are in %BUILDDIR%/singlehtml. + goto end +) + +if "%1" == "pickle" ( + %SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle + if errorlevel 1 exit /b 1 + echo. + echo.Build finished; now you can process the pickle files. + goto end +) + +if "%1" == "json" ( + %SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json + if errorlevel 1 exit /b 1 + echo. + echo.Build finished; now you can process the JSON files. + goto end +) + +if "%1" == "htmlhelp" ( + %SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp + if errorlevel 1 exit /b 1 + echo. + echo.Build finished; now you can run HTML Help Workshop with the ^ +.hhp project file in %BUILDDIR%/htmlhelp. + goto end +) + +if "%1" == "qthelp" ( + %SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% %BUILDDIR%/qthelp + if errorlevel 1 exit /b 1 + echo. + echo.Build finished; now you can run "qcollectiongenerator" with the ^ +.qhcp project file in %BUILDDIR%/qthelp, like this: + echo.^> qcollectiongenerator %BUILDDIR%\qthelp\lld.qhcp + echo.To view the help file: + echo.^> assistant -collectionFile %BUILDDIR%\qthelp\lld.ghc + goto end +) + +if "%1" == "devhelp" ( + %SPHINXBUILD% -b devhelp %ALLSPHINXOPTS% %BUILDDIR%/devhelp + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. + goto end +) + +if "%1" == "epub" ( + %SPHINXBUILD% -b epub %ALLSPHINXOPTS% %BUILDDIR%/epub + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. The epub file is in %BUILDDIR%/epub. + goto end +) + +if "%1" == "latex" ( + %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex + if errorlevel 1 exit /b 1 + echo. + echo.Build finished; the LaTeX files are in %BUILDDIR%/latex. + goto end +) + +if "%1" == "text" ( + %SPHINXBUILD% -b text %ALLSPHINXOPTS% %BUILDDIR%/text + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. The text files are in %BUILDDIR%/text. + goto end +) + +if "%1" == "man" ( + %SPHINXBUILD% -b man %ALLSPHINXOPTS% %BUILDDIR%/man + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. The manual pages are in %BUILDDIR%/man. + goto end +) + +if "%1" == "texinfo" ( + %SPHINXBUILD% -b texinfo %ALLSPHINXOPTS% %BUILDDIR%/texinfo + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. The Texinfo files are in %BUILDDIR%/texinfo. + goto end +) + +if "%1" == "gettext" ( + %SPHINXBUILD% -b gettext %I18NSPHINXOPTS% %BUILDDIR%/locale + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. The message catalogs are in %BUILDDIR%/locale. + goto end +) + +if "%1" == "changes" ( + %SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes + if errorlevel 1 exit /b 1 + echo. + echo.The overview file is in %BUILDDIR%/changes. + goto end +) + +if "%1" == "linkcheck" ( + %SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck + if errorlevel 1 exit /b 1 + echo. + echo.Link check complete; look for any errors in the above output ^ +or in %BUILDDIR%/linkcheck/output.txt. + goto end +) + +if "%1" == "doctest" ( + %SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest + if errorlevel 1 exit /b 1 + echo. + echo.Testing of doctests in the sources finished, look at the ^ +results in %BUILDDIR%/doctest/output.txt. + goto end +) + +:end diff --git a/docs/open_projects.rst b/docs/open_projects.rst new file mode 100644 index 000000000000..eb146c8b7542 --- /dev/null +++ b/docs/open_projects.rst @@ -0,0 +1,17 @@ +.. _open_projects: + +Open Projects +============= + +.. include:: ../include/lld/Core/TODO.txt +.. include:: ../lib/Core/TODO.txt +.. include:: ../lib/Driver/TODO.rst +.. include:: ../lib/ReaderWriter/ELF/X86_64/TODO.rst +.. include:: ../lib/ReaderWriter/ELF/AArch64/TODO.rst +.. include:: ../lib/ReaderWriter/ELF/ARM/TODO.rst +.. include:: ../tools/lld/TODO.txt + +Documentation TODOs +~~~~~~~~~~~~~~~~~~~ + +.. todolist:: diff --git a/docs/sphinx_intro.rst b/docs/sphinx_intro.rst new file mode 100644 index 000000000000..6845bc812e78 --- /dev/null +++ b/docs/sphinx_intro.rst @@ -0,0 +1,147 @@ +.. _sphinx_intro: + +Sphinx Introduction for LLVM Developers +======================================= + +This document is intended as a short and simple introduction to the Sphinx +documentation generation system for LLVM developers. + +Quickstart +---------- + +To get started writing documentation, you will need to: + + 1. Have the Sphinx tools :ref:`installed <installing_sphinx>`. + + 2. Understand how to :ref:`build the documentation + <building_the_documentation>`. + + 3. Start :ref:`writing documentation <writing_documentation>`! + +.. _installing_sphinx: + +Installing Sphinx +~~~~~~~~~~~~~~~~~ + +You should be able to install Sphinx using the standard Python package +installation tool ``easy_install``, as follows:: + + $ sudo easy_install sphinx + Searching for sphinx + Reading http://pypi.python.org/simple/sphinx/ + Reading http://sphinx.pocoo.org/ + Best match: Sphinx 1.1.3 + ... more lines here .. + +If you do not have root access (or otherwise want to avoid installing Sphinx in +system directories) see the section on :ref:`installing_sphinx_in_a_venv` . + +If you do not have the ``easy_install`` tool on your system, you should be able +to install it using: + + Linux + Use your distribution's standard package management tool to install it, + i.e., ``apt-get install easy_install`` or ``yum install easy_install``. + + Mac OS X + All modern Mac OS X systems come with ``easy_install`` as part of the base + system. + + Windows + See the `setuptools <http://pypi.python.org/pypi/setuptools>`_ package web + page for instructions. + + +.. _building_the_documentation: + +Building the documentation +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +In order to build the documentation, all you should need to do is change to the +``docs`` directory and invoke make as follows:: + + $ cd path/to/project/docs + $ make html + +Note that on Windows there is a ``make.bat`` command in the docs directory which +supplies the same interface as the ``Makefile``. + +That command will invoke ``sphinx-build`` with the appropriate options for the +project, and generate the HTML documentation in a ``_build`` subdirectory. You +can browse it starting from the index page by visiting +``_build/html/index.html``. + +Sphinx supports a wide variety of generation formats (including LaTeX, man +pages, and plain text). The ``Makefile`` includes a number of convenience +targets for invoking ``sphinx-build`` appropriately, the common ones are: + + make html + Generate the HTML output. + + make latexpdf + Generate LaTeX documentation and convert to a PDF. + + make man + Generate man pages. + + +.. _writing_documentation: + +Writing documentation +~~~~~~~~~~~~~~~~~~~~~ + +The documentation itself is written in the reStructuredText (ReST) format, and Sphinx +defines additional tags to support features like cross-referencing. + +The ReST format itself is organized around documents mostly being readable +plaintext documents. You should generally be able to write new documentation +easily just by following the style of the existing documentation. + +If you want to understand the formatting of the documents more, the best place +to start is Sphinx's own `ReST Primer <http://sphinx.pocoo.org/rest.html>`_. + + +Learning More +------------- + +If you want to learn more about the Sphinx system, the best place to start is +the Sphinx documentation itself, available `here +<http://sphinx.pocoo.org/contents.html>`_. + + +.. _installing_sphinx_in_a_venv: + +Installing Sphinx in a Virtual Environment +------------------------------------------ + +Most Python developers prefer to work with tools inside a *virtualenv* (virtual +environment) instance, which functions as an application sandbox. This avoids +polluting your system installation with different packages used by various +projects (and ensures that dependencies for different packages don't conflict +with one another). Of course, you need to first have the virtualenv software +itself which generally would be installed at the system level:: + + $ sudo easy_install virtualenv + +but after that you no longer need to install additional packages in the system +directories. + +Once you have the *virtualenv* tool itself installed, you can create a +virtualenv for Sphinx using:: + + $ virtualenv ~/my-sphinx-install + New python executable in /Users/dummy/my-sphinx-install/bin/python + Installing setuptools............done. + Installing pip...............done. + + $ ~/my-sphinx-install/bin/easy_install sphinx + ... install messages here ... + +and from now on you can "activate" the *virtualenv* using:: + + $ source ~/my-sphinx-install/bin/activate + +which will change your PATH to ensure the sphinx-build tool from inside the +virtual environment will be used. See the `virtualenv website +<http://www.virtualenv.org/en/latest/index.html>`_ for more information on using +virtual environments. diff --git a/docs/windows_support.rst b/docs/windows_support.rst new file mode 100644 index 000000000000..d9906a72ea1e --- /dev/null +++ b/docs/windows_support.rst @@ -0,0 +1,118 @@ +.. raw:: html + + <style type="text/css"> + .none { background-color: #FFCCCC } + .partial { background-color: #FFFF99 } + .good { background-color: #CCFF99 } + </style> + +.. role:: none +.. role:: partial +.. role:: good + +=============== +Windows support +=============== + +LLD has some experimental Windows support. When invoked as ``link.exe`` or with +``-flavor link``, the driver for Windows operating system is used to parse +command line options, and it drives further linking processes. LLD accepts +almost all command line options that the linker shipped with Microsoft Visual +C++ (link.exe) supports. + +The current status is that LLD can link itself on Windows x86 using Visual C++ +2012 or 2013 as the compiler. + +Development status +================== + +Driver + :good:`Mostly done`. Some exotic command line options that are not usually + used for application develompent, such as ``/DRIVER``, are not supported. + Options for Windows 8 app store are not recognized too + (e.g. ``/APPCONTAINER``). + +Linking against DLL + :good:`Done`. LLD can read import libraries needed to link against DLL. Both + export-by-name and export-by-ordinal are supported. + +Linking against static library + :good:`Done`. The format of static library (.lib) on Windows is actually the + same as on Unix (.a). LLD can read it. + +Creating DLL + :good:`Done`. LLD creates a DLL if ``/DLL`` option is given. Exported + functions can be specified either via command line (``/EXPORT``) or via + module-definition file (.def). Both export-by-name and export-by-ordinal are + supported. LLD uses Microsoft ``lib.exe`` tool to create an import library + file. + +Windows resource files support + :good:`Done`. If an ``.rc`` file is given, LLD converts the file to a COFF + file using some external commands and link it. Specifically, ``rc.exe`` is + used to compile a resource file (.rc) to a compiled resource (.res) + file. ``rescvt.exe`` is then used to convert a compiled resource file to a + COFF object file section. Both tools are shipped with MSVC. + +Safe Structured Exception Handler (SEH) + :good:`Done` for x86. :partial:`Work in progress` for x64. + +Module-definition file + :partial:`Partially done`. LLD currently recognizes these directives: + ``EXPORTS``, ``HEAPSIZE``, ``STACKSIZE``, ``NAME``, and ``VERSION``. + +x64 (x86-64) + :partial:`Work in progress`. LLD can create PE32+ executable but the generated + file does not work unless source object files are very simple because of the + lack of SEH handler table. + +Debug info + :none:`No progress has been made`. Microsoft linker can interpret the CodeGen + debug info (old-style debug info) and PDB to emit an .pdb file. LLD doesn't + support neither. + + +Building LLD +============ + +Using Visual Studio IDE/MSBuild +------------------------------- + +1. Check out LLVM and LLD from the LLVM SVN repository (or Git mirror), +#. run ``cmake -G "Visual Studio 12" <llvm-source-dir>`` from VS command prompt, +#. open LLVM.sln with Visual Studio, and +#. build ``lld`` target in ``lld executables`` folder + +Alternatively, you can use msbuild if you don't like to work in an IDE:: + + msbuild LLVM.sln /m /target:"lld executables\lld" + +MSBuild.exe had been shipped as a component of the .NET framework, but since +2013 it's part of Visual Studio. You can find it at "C:\\Program Files +(x86)\\msbuild". + +You can build LLD as a 64 bit application. To do that, open VS2013 x64 command +prompt and run cmake for "Visual Studio 12 Win64" target. + +Using Ninja +----------- + +1. Check out LLVM and LLD from the LLVM SVN repository (or Git mirror), +#. run ``cmake -G ninja <llvm-source-dir>`` from VS command prompt, +#. run ``ninja lld`` + +Known issues +============ + +Note that LLD is still in early stage in development, so there are still many +bugs. Here is a list of notable bugs. + +* Symbol name resolution from library files sometimes fails. On Windows, the + order of library files in command line does not matter, but LLD sometimes + fails to simulate the semantics. A workaround for it is to explicitly add + library files to command line with ``/DEFAULTLIB``. + +* Subsystem inference is not very reliable. Linker is supposed to set + ``subsystem`` field in the PE/COFF header according to entry function name, + but LLD sometimes ended up with ``unknown`` subsystem type. You need to give + ``/SUBSYSTEM`` option if it fails to infer it. diff --git a/include/Makefile b/include/Makefile new file mode 100644 index 000000000000..d8903356d9fb --- /dev/null +++ b/include/Makefile @@ -0,0 +1,4 @@ +LLD_LEVEL := .. +DIRS := lld + +include $(LLD_LEVEL)/Makefile diff --git a/include/lld/Config/Makefile b/include/lld/Config/Makefile new file mode 100644 index 000000000000..e2139220e3df --- /dev/null +++ b/include/lld/Config/Makefile @@ -0,0 +1,32 @@ +LLD_LEVEL := ../../.. + +BUILT_SOURCES = Version.inc + +TABLEGEN_INC_FILES_COMMON = 1 + +include $(LLD_LEVEL)/Makefile + +# Compute the lld version from the LLVM version, unless specified explicitly. +ifndef LLD_VERSION +LLD_VERSION := $(subst svn,,$(LLVMVersion)) +LLD_VERSION := $(subst rc,,$(LLD_VERSION)) +endif + +LLD_VERSION_COMPONENTS := $(subst ., ,$(LLD_VERSION)) +LLD_VERSION_MAJOR := $(word 1,$(LLD_VERSION_COMPONENTS)) +LLD_VERSION_MINOR := $(word 2,$(LLD_VERSION_COMPONENTS)) + +LLD_REVISION := $(strip \ + $(shell $(LLVM_SRC_ROOT)/utils/GetSourceVersion $(LLVM_SRC_ROOT)/tools/lld)) + +LLD_REPOSITORY := $(strip \ + $(shell $(LLVM_SRC_ROOT)/utils/GetRepositoryPath $(LLVM_SRC_ROOT)/tools/lld)) + +$(ObjDir)/Version.inc.tmp : Version.inc.in Makefile $(LLVM_OBJ_ROOT)/Makefile.config $(ObjDir)/.dir + $(Echo) "Updating LLD version info." + $(Verb)sed -e "s#@LLD_VERSION@#$(LLD_VERSION)#g" \ + -e "s#@LLD_VERSION_MAJOR@#$(LLD_VERSION_MAJOR)#g" \ + -e "s#@LLD_VERSION_MINOR@#$(LLD_VERSION_MINOR)#g" \ + -e "s#@LLD_REVISION@#$(LLD_REVISION)#g" \ + -e "s#@LLD_REPOSITORY@#$(LLD_REPOSITORY)#g" \ + $< > $@ diff --git a/include/lld/Config/Version.h b/include/lld/Config/Version.h new file mode 100644 index 000000000000..41433c1175ef --- /dev/null +++ b/include/lld/Config/Version.h @@ -0,0 +1,51 @@ +//===- lld/Config/Version.h - LLD Version Number ----------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief Defines version macros and version-related utility functions +/// for lld. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLD_VERSION_H +#define LLD_VERSION_H + +#include "lld/Config/Version.inc" +#include "llvm/ADT/StringRef.h" +#include <string> + +/// \brief Helper macro for LLD_VERSION_STRING. +#define LLD_MAKE_VERSION_STRING2(X) #X + +/// \brief Helper macro for LLD_VERSION_STRING. +#define LLD_MAKE_VERSION_STRING(X, Y) LLD_MAKE_VERSION_STRING2(X.Y) + +/// \brief A string that describes the lld version number, e.g., "1.0". +#define LLD_VERSION_STRING \ + LLD_MAKE_VERSION_STRING(LLD_VERSION_MAJOR, LLD_VERSION_MINOR) + +namespace lld { +/// \brief Retrieves the repository path (e.g., Subversion path) that +/// identifies the particular lld branch, tag, or trunk from which this +/// lld was built. +llvm::StringRef getLLDRepositoryPath(); + +/// \brief Retrieves the repository revision number (or identifer) from which +/// this lld was built. +llvm::StringRef getLLDRevision(); + +/// \brief Retrieves the full repository version that is an amalgamation of +/// the information in getLLDRepositoryPath() and getLLDRevision(). +std::string getLLDRepositoryVersion(); + +/// \brief Retrieves a string representing the complete lld version. +llvm::StringRef getLLDVersion(); +} + +#endif // LLD_VERSION_H diff --git a/include/lld/Config/Version.inc.in b/include/lld/Config/Version.inc.in new file mode 100644 index 000000000000..c893a56686c0 --- /dev/null +++ b/include/lld/Config/Version.inc.in @@ -0,0 +1,5 @@ +#define LLD_VERSION @LLD_VERSION@ +#define LLD_VERSION_MAJOR @LLD_VERSION_MAJOR@ +#define LLD_VERSION_MINOR @LLD_VERSION_MINOR@ +#define LLD_REVISION_STRING "@LLD_REVISION@" +#define LLD_REPOSITORY_STRING "@LLD_REPOSITORY@" diff --git a/include/lld/Core/AbsoluteAtom.h b/include/lld/Core/AbsoluteAtom.h new file mode 100644 index 000000000000..ed25297cea81 --- /dev/null +++ b/include/lld/Core/AbsoluteAtom.h @@ -0,0 +1,43 @@ +//===- Core/AbsoluteAtom.h - An absolute Atom -----------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_CORE_ABSOLUTE_ATOM_H +#define LLD_CORE_ABSOLUTE_ATOM_H + +#include "lld/Core/Atom.h" + +namespace lld { + +/// An AbsoluteAtom has no content. +/// It exists to represent content at fixed addresses in memory. +class AbsoluteAtom : public Atom { +public: + + virtual uint64_t value() const = 0; + + /// scope - The visibility of this atom to other atoms. C static functions + /// have scope scopeTranslationUnit. Regular C functions have scope + /// scopeGlobal. Functions compiled with visibility=hidden have scope + /// scopeLinkageUnit so they can be see by other atoms being linked but not + /// by the OS loader. + virtual Scope scope() const = 0; + + static bool classof(const Atom *a) { + return a->definition() == definitionAbsolute; + } + + static bool classof(const AbsoluteAtom *) { return true; } + +protected: + AbsoluteAtom() : Atom(definitionAbsolute) {} +}; + +} // namespace lld + +#endif // LLD_CORE_ABSOLUTE_ATOM_H diff --git a/include/lld/Core/Alias.h b/include/lld/Core/Alias.h new file mode 100644 index 000000000000..610022525ecb --- /dev/null +++ b/include/lld/Core/Alias.h @@ -0,0 +1,102 @@ +//===- lld/Core/Alias.h - Alias atoms -------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief Provide alias atoms. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLD_CORE_ALIAS_H +#define LLD_CORE_ALIAS_H + +#include "lld/Core/LLVM.h" +#include "lld/Core/Simple.h" +#include "llvm/ADT/Optional.h" +#include <string> + +namespace lld { + +// An AliasAtom is a zero-size atom representing an alias for other atom. It has +// a LayoutAfter reference to the target atom, so that this atom and the target +// atom will be laid out at the same location in the final result. Initially +// the target atom is an undefined atom. Resolver will replace it with a defined +// one. +// +// It does not have attributes itself. Most member function calls are forwarded +// to the target atom. +class AliasAtom : public SimpleDefinedAtom { +public: + AliasAtom(const File &file, StringRef name) + : SimpleDefinedAtom(file), _target(nullptr), _name(name), + _merge(DefinedAtom::mergeNo), _deadStrip(DefinedAtom::deadStripNormal) { + } + + StringRef name() const override { return _name; } + uint64_t size() const override { return 0; } + ArrayRef<uint8_t> rawContent() const override { return ArrayRef<uint8_t>(); } + + Scope scope() const override { + getTarget(); + return _target ? _target->scope() : scopeLinkageUnit; + } + + Merge merge() const override { + if (_merge.hasValue()) + return _merge.getValue(); + getTarget(); + return _target ? _target->merge() : mergeNo; + } + + void setMerge(Merge val) { _merge = val; } + + ContentType contentType() const override { + getTarget(); + return _target ? _target->contentType() : typeUnknown; + } + + Interposable interposable() const override { + getTarget(); + return _target ? _target->interposable() : interposeNo; + } + + SectionChoice sectionChoice() const override { + getTarget(); + return _target ? _target->sectionChoice() : sectionBasedOnContent; + } + + StringRef customSectionName() const override { + getTarget(); + return _target ? _target->customSectionName() : StringRef(""); + } + + DeadStripKind deadStrip() const override { return _deadStrip; } + void setDeadStrip(DeadStripKind val) { _deadStrip = val; } + +private: + void getTarget() const { + if (_target) + return; + for (const Reference *r : *this) { + if (r->kindNamespace() == lld::Reference::KindNamespace::all && + r->kindValue() == lld::Reference::kindLayoutAfter) { + _target = dyn_cast<DefinedAtom>(r->target()); + return; + } + } + } + + mutable const DefinedAtom *_target; + std::string _name; + llvm::Optional<Merge> _merge; + DeadStripKind _deadStrip; +}; + +} // end namespace lld + +#endif diff --git a/include/lld/Core/ArchiveLibraryFile.h b/include/lld/Core/ArchiveLibraryFile.h new file mode 100644 index 000000000000..ff379d4f3ecf --- /dev/null +++ b/include/lld/Core/ArchiveLibraryFile.h @@ -0,0 +1,60 @@ +//===- Core/ArchiveLibraryFile.h - Models static library ------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_CORE_ARCHIVE_LIBRARY_FILE_H +#define LLD_CORE_ARCHIVE_LIBRARY_FILE_H + +#include "lld/Core/File.h" +#include "lld/Core/Parallel.h" +#include <set> + +namespace lld { + +/// +/// The ArchiveLibraryFile subclass of File is used to represent unix +/// static library archives. These libraries provide no atoms to the +/// initial set of atoms linked. Instead, when the Resolver will query +/// ArchiveLibraryFile instances for specific symbols names using the +/// find() method. If the archive contains an object file which has a +/// DefinedAtom whose scope is not translationUnit, then that entire +/// object file File is returned. +/// +class ArchiveLibraryFile : public File { +public: + static bool classof(const File *f) { + return f->kind() == kindArchiveLibrary; + } + + /// Check if any member of the archive contains an Atom with the + /// specified name and return the File object for that member, or nullptr. + virtual File *find(StringRef name, bool dataSymbolOnly) = 0; + + virtual std::error_code + parseAllMembers(std::vector<std::unique_ptr<File>> &result) = 0; + + // Parses a member file containing a given symbol, so that when you + // need the file find() can return that immediately. Calling this function + // has no side effect other than pre-instantiating a file. Calling this + // function doesn't affect correctness. + virtual void preload(TaskGroup &group, StringRef symbolName) {} + + /// Returns a set of all defined symbols in the archive, i.e. all + /// resolvable symbol using this file. + virtual std::set<StringRef> getDefinedSymbols() { + return std::set<StringRef>(); + } + +protected: + /// only subclasses of ArchiveLibraryFile can be instantiated + ArchiveLibraryFile(StringRef path) : File(path, kindArchiveLibrary) {} +}; + +} // namespace lld + +#endif // LLD_CORE_ARCHIVE_LIBRARY_FILE_H diff --git a/include/lld/Core/Atom.h b/include/lld/Core/Atom.h new file mode 100644 index 000000000000..27fdde022ba7 --- /dev/null +++ b/include/lld/Core/Atom.h @@ -0,0 +1,76 @@ +//===- Core/Atom.h - A node in linking graph ------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_CORE_ATOM_H +#define LLD_CORE_ATOM_H + +#include "lld/Core/LLVM.h" + +namespace lld { + +class File; + +/// +/// The linker has a Graph Theory model of linking. An object file is seen +/// as a set of Atoms with References to other Atoms. Each Atom is a node +/// and each Reference is an edge. An Atom can be a DefinedAtom which has +/// content or a UndefinedAtom which is a placeholder and represents an +/// undefined symbol (extern declaration). +/// +class Atom { +public: + /// Whether this atom is defined or a proxy for an undefined symbol + enum Definition { + definitionRegular, ///< Normal C/C++ function or global variable. + definitionAbsolute, ///< Asm-only (foo = 10). Not tied to any content. + definitionUndefined, ///< Only in .o files to model reference to undef. + definitionSharedLibrary ///< Only in shared libraries to model export. + }; + + /// The scope in which this atom is acessible to other atoms. + enum Scope { + scopeTranslationUnit, ///< Accessible only to atoms in the same translation + /// unit (e.g. a C static). + scopeLinkageUnit, ///< Accessible to atoms being linked but not visible + /// to runtime loader (e.g. visibility=hidden). + scopeGlobal ///< Accessible to all atoms and visible to runtime + /// loader (e.g. visibility=default). + }; + + + /// file - returns the File that produced/owns this Atom + virtual const File& file() const = 0; + + /// name - The name of the atom. For a function atom, it is the (mangled) + /// name of the function. + virtual StringRef name() const = 0; + + /// definition - Whether this atom is a definition or represents an undefined + /// symbol. + Definition definition() const { return _definition; } + + static bool classof(const Atom *a) { return true; } + +protected: + /// Atom is an abstract base class. Only subclasses can access constructor. + explicit Atom(Definition def) : _definition(def) {} + + /// The memory for Atom objects is always managed by the owning File + /// object. Therefore, no one but the owning File object should call + /// delete on an Atom. In fact, some File objects may bulk allocate + /// an array of Atoms, so they cannot be individually deleted by anyone. + virtual ~Atom() {} + +private: + Definition _definition; +}; + +} // namespace lld + +#endif // LLD_CORE_ATOM_H diff --git a/include/lld/Core/DefinedAtom.h b/include/lld/Core/DefinedAtom.h new file mode 100644 index 000000000000..86d880c659b4 --- /dev/null +++ b/include/lld/Core/DefinedAtom.h @@ -0,0 +1,368 @@ +//===- Core/DefinedAtom.h - An Atom with content --------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_CORE_DEFINED_ATOM_H +#define LLD_CORE_DEFINED_ATOM_H + +#include "lld/Core/Atom.h" +#include "lld/Core/LLVM.h" + +namespace lld { +class File; +class Reference; + +/// \brief The fundamental unit of linking. +/// +/// A C function or global variable is an atom. An atom has content and +/// attributes. The content of a function atom is the instructions that +/// implement the function. The content of a global variable atom is its +/// initial bytes. +/// +/// Here are some example attribute sets for common atoms. If a particular +/// attribute is not listed, the default values are: definition=regular, +/// sectionChoice=basedOnContent, scope=translationUnit, merge=no, +/// deadStrip=normal, interposable=no +/// +/// C function: void foo() {} <br> +/// name=foo, type=code, perm=r_x, scope=global +/// +/// C static function: staic void func() {} <br> +/// name=func, type=code, perm=r_x +/// +/// C global variable: int count = 1; <br> +/// name=count, type=data, perm=rw_, scope=global +/// +/// C tentative definition: int bar; <br> +/// name=bar, type=zerofill, perm=rw_, scope=global, +/// merge=asTentative, interposable=yesAndRuntimeWeak +/// +/// Uninitialized C static variable: static int stuff; <br> +/// name=stuff, type=zerofill, perm=rw_ +/// +/// Weak C function: __attribute__((weak)) void foo() {} <br> +/// name=foo, type=code, perm=r_x, scope=global, merge=asWeak +/// +/// Hidden C function: __attribute__((visibility("hidden"))) void foo() {}<br> +/// name=foo, type=code, perm=r_x, scope=linkageUnit +/// +/// No-dead-strip function: __attribute__((used)) void foo() {} <br> +/// name=foo, type=code, perm=r_x, scope=global, deadStrip=never +/// +/// Non-inlined C++ inline method: inline void Foo::doit() {} <br> +/// name=_ZN3Foo4doitEv, type=code, perm=r_x, scope=global, +/// mergeDupes=asWeak +/// +/// Non-inlined C++ inline method whose address is taken: +/// inline void Foo::doit() {} <br> +/// name=_ZN3Foo4doitEv, type=code, perm=r_x, scope=global, +/// mergeDupes=asAddressedWeak +/// +/// literal c-string: "hello" <br> +/// name="" type=cstring, perm=r__, scope=linkageUnit +/// +/// literal double: 1.234 <br> +/// name="" type=literal8, perm=r__, scope=linkageUnit +/// +/// constant: { 1,2,3 } <br> +/// name="" type=constant, perm=r__, scope=linkageUnit +/// +/// Pointer to initializer function: <br> +/// name="" type=initializer, perm=rw_l, +/// sectionChoice=customRequired +/// +/// C function place in custom section: __attribute__((section("__foo"))) +/// void foo() {} <br> +/// name=foo, type=code, perm=r_x, scope=global, +/// sectionChoice=customRequired, customSectionName=__foo +/// +class DefinedAtom : public Atom { +public: + enum Interposable { + interposeNo, // linker can directly bind uses of this atom + interposeYes, // linker must indirect (through GOT) uses + interposeYesAndRuntimeWeak // must indirect and mark symbol weak in final + // linked image + }; + + enum Merge { + mergeNo, // Another atom with same name is error + mergeAsTentative, // Is ANSI C tentative definition, can be coalesced + mergeAsWeak, // Is C++ inline definition that was not inlined, + // but address was not taken, so atom can be hidden + // by linker + mergeAsWeakAndAddressUsed, // Is C++ definition inline definition whose + // address was taken. + mergeSameNameAndSize, // Another atom with different size is error + mergeByLargestSection, // Choose an atom whose section is the largest. + mergeByContent, // Merge with other constants with same content. + }; + + enum ContentType { + typeUnknown, // for use with definitionUndefined + typeCode, // executable code + typeResolver, // function which returns address of target + typeBranchIsland, // linker created for large binaries + typeBranchShim, // linker created to switch thumb mode + typeStub, // linker created for calling external function + typeStubHelper, // linker created for initial stub binding + typeConstant, // a read-only constant + typeCString, // a zero terminated UTF8 C string + typeUTF16String, // a zero terminated UTF16 string + typeCFI, // a FDE or CIE from dwarf unwind info + typeLSDA, // extra unwinding info + typeLiteral4, // a four-btye read-only constant + typeLiteral8, // an eight-btye read-only constant + typeLiteral16, // a sixteen-btye read-only constant + typeData, // read-write data + typeDataFast, // allow data to be quickly accessed + typeZeroFill, // zero-fill data + typeZeroFillFast, // allow zero-fill data to be quicky accessed + typeConstData, // read-only data after dynamic linker is done + typeObjC1Class, // ObjC1 class [Darwin] + typeLazyPointer, // pointer through which a stub jumps + typeLazyDylibPointer, // pointer through which a stub jumps [Darwin] + typeCFString, // NS/CFString object [Darwin] + typeGOT, // pointer to external symbol + typeInitializerPtr, // pointer to initializer function + typeTerminatorPtr, // pointer to terminator function + typeCStringPtr, // pointer to UTF8 C string [Darwin] + typeObjCClassPtr, // pointer to ObjC class [Darwin] + typeObjC2CategoryList, // pointers to ObjC category [Darwin] + typeDTraceDOF, // runtime data for Dtrace [Darwin] + typeInterposingTuples, // tuples of interposing info for dyld [Darwin] + typeTempLTO, // temporary atom for bitcode reader + typeCompactUnwindInfo, // runtime data for unwinder [Darwin] + typeProcessedUnwindInfo,// compressed compact unwind info [Darwin] + typeThunkTLV, // thunk used to access a TLV [Darwin] + typeTLVInitialData, // initial data for a TLV [Darwin] + typeTLVInitialZeroFill, // TLV initial zero fill data [Darwin] + typeTLVInitializerPtr, // pointer to thread local initializer [Darwin] + typeMachHeader, // atom representing mach_header [Darwin] + typeThreadZeroFill, // Uninitialized thread local data(TBSS) [ELF] + typeThreadData, // Initialized thread local data(TDATA) [ELF] + typeRONote, // Identifies readonly note sections [ELF] + typeRWNote, // Identifies readwrite note sections [ELF] + typeNoAlloc, // Identifies non allocatable sections [ELF] + typeGroupComdat, // Identifies a section group [ELF, COFF] + typeGnuLinkOnce, // Identifies a gnu.linkonce section [ELF] + }; + + // Permission bits for atoms and segments. The order of these values are + // important, because the layout pass may sort atoms by permission if other + // attributes are the same. + enum ContentPermissions { + perm___ = 0, // mapped as unaccessible + permR__ = 8, // mapped read-only + permRW_ = 8 + 2, // mapped readable and writable + permRW_L = 8 + 2 + 1, // initially mapped r/w, then made read-only + // loader writable + permR_X = 8 + 4, // mapped readable and executable + permRWX = 8 + 2 + 4, // mapped readable and writable and executable + permUnknown = 16 // unknown or invalid permissions + }; + + enum SectionChoice { + sectionBasedOnContent, // linker infers final section based on content + sectionCustomPreferred, // linker may place in specific section + sectionCustomRequired // linker must place in specific section + }; + + enum DeadStripKind { + deadStripNormal, // linker may dead strip this atom + deadStripNever, // linker must never dead strip this atom + deadStripAlways // linker must remove this atom if unused + }; + + enum DynamicExport { + /// \brief The linker may or may not export this atom dynamically depending + /// on the output type and other context of the link. + dynamicExportNormal, + /// \brief The linker will always export this atom dynamically. + dynamicExportAlways, + }; + + // Attributes describe a code model used by the atom. + enum CodeModel { + codeNA, // no specific code model + codeMipsPIC, // PIC function in a PIC / non-PIC mixed file + codeMipsMicro, // microMIPS instruction encoding + codeMipsMicroPIC, // microMIPS instruction encoding + PIC + codeMips16, // MIPS-16 instruction encoding + codeARMThumb, // ARM Thumb instruction set + }; + + struct Alignment { + Alignment(int p2, int m = 0) + : powerOf2(p2) + , modulus(m) {} + + uint16_t powerOf2; + uint16_t modulus; + + bool operator==(const Alignment &rhs) const { + return (powerOf2 == rhs.powerOf2) && (modulus == rhs.modulus); + } + }; + + /// \brief returns a value for the order of this Atom within its file. + /// + /// This is used by the linker to order the layout of Atoms so that the + /// resulting image is stable and reproducible. + /// + /// Note that this should not be confused with ordinals of exported symbols in + /// Windows DLLs. In Windows terminology, ordinals are symbols' export table + /// indices (small integers) which can be used instead of symbol names to + /// refer items in a DLL. + virtual uint64_t ordinal() const = 0; + + /// \brief the number of bytes of space this atom's content will occupy in the + /// final linked image. + /// + /// For a function atom, it is the number of bytes of code in the function. + virtual uint64_t size() const = 0; + + /// \brief The size of the section from which the atom is instantiated. + /// + /// Merge::mergeByLargestSection is defined in terms of section size + /// and not in terms of atom size, so we need this function separate + /// from size(). + virtual uint64_t sectionSize() const { return 0; } + + /// \brief The visibility of this atom to other atoms. + /// + /// C static functions have scope scopeTranslationUnit. Regular C functions + /// have scope scopeGlobal. Functions compiled with visibility=hidden have + /// scope scopeLinkageUnit so they can be see by other atoms being linked but + /// not by the OS loader. + virtual Scope scope() const = 0; + + /// \brief Whether the linker should use direct or indirect access to this + /// atom. + virtual Interposable interposable() const = 0; + + /// \brief how the linker should handle if multiple atoms have the same name. + virtual Merge merge() const = 0; + + /// \brief The type of this atom, such as code or data. + virtual ContentType contentType() const = 0; + + /// \brief The alignment constraints on how this atom must be laid out in the + /// final linked image (e.g. 16-byte aligned). + virtual Alignment alignment() const = 0; + + /// \brief Whether this atom must be in a specially named section in the final + /// linked image, or if the linker can infer the section based on the + /// contentType(). + virtual SectionChoice sectionChoice() const = 0; + + /// \brief If sectionChoice() != sectionBasedOnContent, then this return the + /// name of the section the atom should be placed into. + virtual StringRef customSectionName() const = 0; + + /// \brief constraints on whether the linker may dead strip away this atom. + virtual DeadStripKind deadStrip() const = 0; + + /// \brief Under which conditions should this atom be dynamically exported. + virtual DynamicExport dynamicExport() const { + return dynamicExportNormal; + } + + /// \brief Code model used by the atom. + virtual CodeModel codeModel() const { return codeNA; } + + /// \brief Returns the OS memory protections required for this atom's content + /// at runtime. + /// + /// A function atom is R_X, a global variable is RW_, and a read-only constant + /// is R__. + virtual ContentPermissions permissions() const; + + /// \brief returns a reference to the raw (unrelocated) bytes of this Atom's + /// content. + virtual ArrayRef<uint8_t> rawContent() const = 0; + + /// This class abstracts iterating over the sequence of References + /// in an Atom. Concrete instances of DefinedAtom must implement + /// the derefIterator() and incrementIterator() methods. + class reference_iterator { + public: + reference_iterator(const DefinedAtom &a, const void *it) + : _atom(a), _it(it) { } + + const Reference *operator*() const { + return _atom.derefIterator(_it); + } + + const Reference *operator->() const { + return _atom.derefIterator(_it); + } + + bool operator!=(const reference_iterator &other) const { + return _it != other._it; + } + + reference_iterator &operator++() { + _atom.incrementIterator(_it); + return *this; + } + private: + const DefinedAtom &_atom; + const void *_it; + }; + + /// \brief Returns an iterator to the beginning of this Atom's References. + virtual reference_iterator begin() const = 0; + + /// \brief Returns an iterator to the end of this Atom's References. + virtual reference_iterator end() const = 0; + + static bool classof(const Atom *a) { + return a->definition() == definitionRegular; + } + + /// Utility for deriving permissions from content type + static ContentPermissions permissions(ContentType type); + + /// Utility function to check if the atom occupies file space + bool occupiesDiskSpace() const { + ContentType atomContentType = contentType(); + return !(atomContentType == DefinedAtom::typeZeroFill || + atomContentType == DefinedAtom::typeZeroFillFast || + atomContentType == DefinedAtom::typeTLVInitialZeroFill || + atomContentType == DefinedAtom::typeThreadZeroFill); + } + + /// Utility function to check if the atom belongs to a group section + /// that represents section groups or .gnu.linkonce sections. + bool isGroupParent() const { + ContentType atomContentType = contentType(); + return (atomContentType == DefinedAtom::typeGroupComdat || + atomContentType == DefinedAtom::typeGnuLinkOnce); + } + + // Returns true if lhs should be placed before rhs in the final output. + static bool compareByPosition(const DefinedAtom *lhs, + const DefinedAtom *rhs); + +protected: + // DefinedAtom is an abstract base class. Only subclasses can access + // constructor. + DefinedAtom() : Atom(definitionRegular) { } + + /// \brief Returns a pointer to the Reference object that the abstract + /// iterator "points" to. + virtual const Reference *derefIterator(const void *iter) const = 0; + + /// \brief Adjusts the abstract iterator to "point" to the next Reference + /// object for this Atom. + virtual void incrementIterator(const void *&iter) const = 0; +}; +} // end namespace lld + +#endif diff --git a/include/lld/Core/Error.h b/include/lld/Core/Error.h new file mode 100644 index 000000000000..7caa25018f40 --- /dev/null +++ b/include/lld/Core/Error.h @@ -0,0 +1,82 @@ +//===- Error.h - system_error extensions for lld ----------------*- C++ -*-===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This declares a new error_category for the lld library. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_CORE_ERROR_H +#define LLD_CORE_ERROR_H + +#include "lld/Core/LLVM.h" +#include <system_error> + +namespace lld { + +const std::error_category &native_reader_category(); + +enum class NativeReaderError { + success = 0, + unknown_file_format, + file_too_short, + file_malformed, + unknown_chunk_type, + memory_error, + conflicting_target_machine, +}; + +inline std::error_code make_error_code(NativeReaderError e) { + return std::error_code(static_cast<int>(e), native_reader_category()); +} + +const std::error_category &YamlReaderCategory(); + +enum class YamlReaderError { + success = 0, + unknown_keyword, + illegal_value +}; + +inline std::error_code make_error_code(YamlReaderError e) { + return std::error_code(static_cast<int>(e), YamlReaderCategory()); +} + +const std::error_category &LinkerScriptReaderCategory(); + +enum class LinkerScriptReaderError { + success = 0, + parse_error, + unknown_symbol_in_expr, + unrecognized_function_in_expr +}; + +inline std::error_code make_error_code(LinkerScriptReaderError e) { + return std::error_code(static_cast<int>(e), LinkerScriptReaderCategory()); +} + +/// Creates an error_code object that has associated with it an arbitrary +/// error messsage. The value() of the error_code will always be non-zero +/// but its value is meaningless. The messsage() will be (a copy of) the +/// supplied error string. +/// Note: Once ErrorOr<> is updated to work with errors other than error_code, +/// this can be updated to return some other kind of error. +std::error_code make_dynamic_error_code(StringRef msg); +std::error_code make_dynamic_error_code(const Twine &msg); + +} // end namespace lld + +namespace std { +template <> +struct is_error_code_enum<lld::NativeReaderError> : std::true_type {}; +template <> struct is_error_code_enum<lld::YamlReaderError> : std::true_type {}; +template <> +struct is_error_code_enum<lld::LinkerScriptReaderError> : std::true_type {}; +} + +#endif diff --git a/include/lld/Core/File.h b/include/lld/Core/File.h new file mode 100644 index 000000000000..25b177ec879c --- /dev/null +++ b/include/lld/Core/File.h @@ -0,0 +1,324 @@ +//===- Core/File.h - A Container of Atoms ---------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_CORE_FILE_H +#define LLD_CORE_FILE_H + +#include "lld/Core/AbsoluteAtom.h" +#include "lld/Core/DefinedAtom.h" +#include "lld/Core/SharedLibraryAtom.h" +#include "lld/Core/UndefinedAtom.h" +#include "lld/Core/range.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/Twine.h" +#include "llvm/Support/ErrorHandling.h" +#include <functional> +#include <memory> +#include <mutex> +#include <vector> + +namespace lld { + +class LinkingContext; + +/// Every Atom is owned by some File. A common scenario is for a single +/// object file (.o) to be parsed by some reader and produce a single +/// File object that represents the content of that object file. +/// +/// To iterate through the Atoms in a File there are four methods that +/// return collections. For instance to iterate through all the DefinedAtoms +/// in a File object use: +/// for (const DefinedAtoms *atom : file->defined()) { +/// } +/// +/// The Atom objects in a File are owned by the File object. The Atom objects +/// are destroyed when the File object is destroyed. +class File { +public: + virtual ~File(); + + /// \brief Kinds of files that are supported. + enum Kind { + kindObject, ///< object file (.o) + kindSharedLibrary, ///< shared library (.so) + kindArchiveLibrary ///< archive (.a) + }; + + /// \brief Returns file kind. Need for dyn_cast<> on File objects. + Kind kind() const { + return _kind; + } + + /// This returns the path to the file which was used to create this object + /// (e.g. "/tmp/foo.o"). If the file is a member of an archive file, the + /// returned string includes the archive file name. + StringRef path() const { + if (_archivePath.empty()) + return _path; + if (_archiveMemberPath.empty()) + _archiveMemberPath = (_archivePath + "(" + _path + ")").str(); + return _archiveMemberPath; + } + + /// Returns the path of the archive file name if this file is instantiated + /// from an archive file. Otherwise returns the empty string. + StringRef archivePath() const { return _archivePath; } + void setArchivePath(StringRef path) { _archivePath = path; } + + /// Returns the path name of this file. It doesn't include archive file name. + StringRef memberPath() const { return _path; } + + /// Returns the command line order of the file. + uint64_t ordinal() const { + assert(_ordinal != UINT64_MAX); + return _ordinal; + } + + /// Returns true/false depending on whether an ordinal has been set. + bool hasOrdinal() const { return (_ordinal != UINT64_MAX); } + + /// Sets the command line order of the file. + void setOrdinal(uint64_t ordinal) const { _ordinal = ordinal; } + + template <typename T> class atom_iterator; // forward reference + + /// For allocating any objects owned by this File. + llvm::BumpPtrAllocator &allocator() const { + return _allocator; + } + + /// \brief For use interating over DefinedAtoms in this File. + typedef atom_iterator<DefinedAtom> defined_iterator; + + /// \brief For use interating over UndefinedAtoms in this File. + typedef atom_iterator<UndefinedAtom> undefined_iterator; + + /// \brief For use interating over SharedLibraryAtoms in this File. + typedef atom_iterator<SharedLibraryAtom> shared_library_iterator; + + /// \brief For use interating over AbsoluteAtoms in this File. + typedef atom_iterator<AbsoluteAtom> absolute_iterator; + + /// \brief Different object file readers may instantiate and manage atoms with + /// different data structures. This class is a collection abstraction. + /// Each concrete File instance must implement these atom_collection + /// methods to enable clients to interate the File's atoms. + template <typename T> + class atom_collection { + public: + virtual ~atom_collection() { } + virtual atom_iterator<T> begin() const = 0; + virtual atom_iterator<T> end() const = 0; + virtual const T *deref(const void *it) const = 0; + virtual void next(const void *&it) const = 0; + virtual uint64_t size() const = 0; + bool empty() const { return size() == 0; } + }; + + /// \brief The class is the iterator type used to iterate through a File's + /// Atoms. This iterator delegates the work to the associated atom_collection + /// object. There are four kinds of Atoms, so this iterator is templated on + /// the four base Atom kinds. + template <typename T> + class atom_iterator : public std::iterator<std::forward_iterator_tag, T> { + public: + atom_iterator(const atom_collection<T> &c, const void *it) + : _collection(&c), _it(it) { } + + const T *operator*() const { + return _collection->deref(_it); + } + const T *operator->() const { + return _collection->deref(_it); + } + + friend bool operator==(const atom_iterator<T> &lhs, const atom_iterator<T> &rhs) { + return lhs._it == rhs._it; + } + + friend bool operator!=(const atom_iterator<T> &lhs, const atom_iterator<T> &rhs) { + return !(lhs == rhs); + } + + atom_iterator<T> &operator++() { + _collection->next(_it); + return *this; + } + private: + const atom_collection<T> *_collection; + const void *_it; + }; + + /// \brief Must be implemented to return the atom_collection object for + /// all DefinedAtoms in this File. + virtual const atom_collection<DefinedAtom> &defined() const = 0; + + /// \brief Must be implemented to return the atom_collection object for + /// all UndefinedAtomw in this File. + virtual const atom_collection<UndefinedAtom> &undefined() const = 0; + + /// \brief Must be implemented to return the atom_collection object for + /// all SharedLibraryAtoms in this File. + virtual const atom_collection<SharedLibraryAtom> &sharedLibrary() const = 0; + + /// \brief Must be implemented to return the atom_collection object for + /// all AbsoluteAtoms in this File. + virtual const atom_collection<AbsoluteAtom> &absolute() const = 0; + + /// \brief If a file is parsed using a different method than doParse(), + /// one must use this method to set the last error status, so that + /// doParse will not be called twice. Only YAML reader uses this + /// (because YAML reader does not read blobs but structured data). + void setLastError(std::error_code err) { _lastError = err; } + + std::error_code parse(); + + // This function is called just before the core linker tries to use + // a file. Currently the PECOFF reader uses this to trigger the + // driver to parse .drectve section (which contains command line options). + // If you want to do something having side effects, don't do that in + // doParse() because a file could be pre-loaded speculatively. + // Use this hook instead. + virtual void beforeLink() {} + + // Usually each file owns a std::unique_ptr<MemoryBuffer>. + // However, there's one special case. If a file is an archive file, + // the archive file and its children all shares the same memory buffer. + // This method is used by the ArchiveFile to give its children + // co-ownership of the buffer. + void setSharedMemoryBuffer(std::shared_ptr<MemoryBuffer> mb) { + _sharedMemoryBuffer = mb; + } + +protected: + /// \brief only subclasses of File can be instantiated + File(StringRef p, Kind kind) + : _path(p), _kind(kind), _ordinal(UINT64_MAX) {} + + /// \brief Subclasses should override this method to parse the + /// memory buffer passed to this file's constructor. + virtual std::error_code doParse() { return std::error_code(); } + + /// \brief This is a convenience class for File subclasses which manage their + /// atoms as a simple std::vector<>. + template <typename T> + class atom_collection_vector : public atom_collection<T> { + public: + atom_iterator<T> begin() const override { + auto *it = _atoms.empty() ? nullptr + : reinterpret_cast<const void *>(_atoms.data()); + return atom_iterator<T>(*this, it); + } + + atom_iterator<T> end() const override { + auto *it = _atoms.empty() ? nullptr : reinterpret_cast<const void *>( + _atoms.data() + _atoms.size()); + return atom_iterator<T>(*this, it); + } + + const T *deref(const void *it) const override { + return *reinterpret_cast<const T *const *>(it); + } + + void next(const void *&it) const override { + const T *const *p = reinterpret_cast<const T *const *>(it); + ++p; + it = reinterpret_cast<const void*>(p); + } + + uint64_t size() const override { return _atoms.size(); } + + std::vector<const T *> _atoms; + }; + + /// \brief This is a convenience class for File subclasses which need to + /// return an empty collection. + template <typename T> + class atom_collection_empty : public atom_collection<T> { + public: + atom_iterator<T> begin() const override { + return atom_iterator<T>(*this, nullptr); + } + atom_iterator<T> end() const override { + return atom_iterator<T>(*this, nullptr); + } + const T *deref(const void *it) const override { + llvm_unreachable("empty collection should never be accessed"); + } + void next(const void *&it) const override {} + uint64_t size() const override { return 0; } + }; + + static atom_collection_empty<DefinedAtom> _noDefinedAtoms; + static atom_collection_empty<UndefinedAtom> _noUndefinedAtoms; + static atom_collection_empty<SharedLibraryAtom> _noSharedLibraryAtoms; + static atom_collection_empty<AbsoluteAtom> _noAbsoluteAtoms; + mutable llvm::BumpPtrAllocator _allocator; + +private: + StringRef _path; + std::string _archivePath; + mutable std::string _archiveMemberPath; + Kind _kind; + mutable uint64_t _ordinal; + std::shared_ptr<MemoryBuffer> _sharedMemoryBuffer; + llvm::Optional<std::error_code> _lastError; + std::mutex _parseMutex; +}; + +/// \brief A mutable File. +class MutableFile : public File { +public: + /// \brief Add an atom to the file. Invalidates iterators for all returned + /// containters. + virtual void addAtom(const Atom&) = 0; + + typedef range<std::vector<const DefinedAtom *>::iterator> DefinedAtomRange; + virtual DefinedAtomRange definedAtoms() = 0; + + virtual void + removeDefinedAtomsIf(std::function<bool(const DefinedAtom *)> pred) = 0; + +protected: + /// \brief only subclasses of MutableFile can be instantiated + MutableFile(StringRef p) : File(p, kindObject) {} +}; + +/// An ErrorFile represents a file that doesn't exist. +/// If you try to parse a file which doesn't exist, an instance of this +/// class will be returned. That's parse method always returns an error. +/// This is useful to delay erroring on non-existent files, so that we +/// can do unit testing a driver using non-existing file paths. +class ErrorFile : public File { +public: + ErrorFile(StringRef path, std::error_code ec) + : File(path, kindObject), _ec(ec) {} + + std::error_code doParse() override { return _ec; } + + const atom_collection<DefinedAtom> &defined() const override { + llvm_unreachable("internal error"); + } + const atom_collection<UndefinedAtom> &undefined() const override { + llvm_unreachable("internal error"); + } + const atom_collection<SharedLibraryAtom> &sharedLibrary() const override { + llvm_unreachable("internal error"); + } + const atom_collection<AbsoluteAtom> &absolute() const override { + llvm_unreachable("internal error"); + } + +private: + std::error_code _ec; +}; + +} // end namespace lld + +#endif diff --git a/include/lld/Core/Instrumentation.h b/include/lld/Core/Instrumentation.h new file mode 100644 index 000000000000..162375905e17 --- /dev/null +++ b/include/lld/Core/Instrumentation.h @@ -0,0 +1,132 @@ +//===- include/Core/Instrumentation.h - Instrumentation API ---------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief Provide an Instrumentation API that optionally uses VTune interfaces. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLD_CORE_INSTRUMENTATION_H +#define LLD_CORE_INSTRUMENTATION_H + +#include "llvm/Support/Compiler.h" +#include <utility> + +#ifdef LLD_HAS_VTUNE +# include <ittnotify.h> +#endif + +namespace lld { +#ifdef LLD_HAS_VTUNE +/// \brief A unique global scope for instrumentation data. +/// +/// Domains last for the lifetime of the application and cannot be destroyed. +/// Multiple Domains created with the same name represent the same domain. +class Domain { + __itt_domain *_domain; + +public: + explicit Domain(const char *name) : _domain(__itt_domain_createA(name)) {} + + operator __itt_domain *() const { return _domain; } + __itt_domain *operator->() const { return _domain; } +}; + +/// \brief A global reference to a string constant. +/// +/// These are uniqued by the ITT runtime and cannot be deleted. They are not +/// specific to a domain. +/// +/// Prefer reusing a single StringHandle over passing a ntbs when the same +/// string will be used often. +class StringHandle { + __itt_string_handle *_handle; + +public: + StringHandle(const char *name) : _handle(__itt_string_handle_createA(name)) {} + + operator __itt_string_handle *() const { return _handle; } +}; + +/// \brief A task on a single thread. Nests within other tasks. +/// +/// Each thread has its own task stack and tasks nest recursively on that stack. +/// A task cannot transfer threads. +/// +/// SBRM is used to ensure task starts and ends are ballanced. The lifetime of +/// a task is either the lifetime of this object, or until end is called. +class ScopedTask { + __itt_domain *_domain; + + ScopedTask(const ScopedTask &) = delete; + ScopedTask &operator=(const ScopedTask &) = delete; + +public: + /// \brief Create a task in Domain \p d named \p s. + ScopedTask(const Domain &d, const StringHandle &s) : _domain(d) { + __itt_task_begin(d, __itt_null, __itt_null, s); + } + + ScopedTask(ScopedTask &&other) { + *this = std::move(other); + } + + ScopedTask &operator=(ScopedTask &&other) { + _domain = other._domain; + other._domain = nullptr; + return *this; + } + + /// \brief Prematurely end this task. + void end() { + if (_domain) + __itt_task_end(_domain); + _domain = nullptr; + } + + ~ScopedTask() { end(); } +}; + +/// \brief A specific point in time. Allows metadata to be associated. +class Marker { +public: + Marker(const Domain &d, const StringHandle &s) { + __itt_marker(d, __itt_null, s, __itt_scope_global); + } +}; +#else +class Domain { +public: + Domain(const char *name) {} +}; + +class StringHandle { +public: + StringHandle(const char *name) {} +}; + +class ScopedTask { +public: + ScopedTask(const Domain &d, const StringHandle &s) {} + void end() {} +}; + +class Marker { +public: + Marker(const Domain &d, const StringHandle &s) {} +}; +#endif + +inline const Domain &getDefaultDomain() { + static Domain domain("org.llvm.lld"); + return domain; +} +} // end namespace lld. + +#endif diff --git a/include/lld/Core/LLVM.h b/include/lld/Core/LLVM.h new file mode 100644 index 000000000000..1bc1173bd48b --- /dev/null +++ b/include/lld/Core/LLVM.h @@ -0,0 +1,75 @@ +//===--- LLVM.h - Import various common LLVM datatypes ----------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file forward declares and imports various common LLVM datatypes that +// lld wants to use unqualified. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_CORE_LLVM_H +#define LLD_CORE_LLVM_H + +// This should be the only #include, force #includes of all the others on +// clients. +#include "llvm/ADT/Hashing.h" +#include "llvm/Support/Casting.h" +#include <utility> + +namespace llvm { + // ADT's. + class StringRef; + class Twine; + class MemoryBuffer; + template<typename T> class ArrayRef; + template<unsigned InternalLen> class SmallString; + template<typename T, unsigned N> class SmallVector; + template<typename T> class SmallVectorImpl; + + template<typename T> + struct SaveAndRestore; + + template<typename T> + class ErrorOr; + + class raw_ostream; + // TODO: DenseMap, ... +} + +namespace lld { + // Casting operators. + using llvm::isa; + using llvm::cast; + using llvm::dyn_cast; + using llvm::dyn_cast_or_null; + using llvm::cast_or_null; + + // ADT's. + using llvm::StringRef; + using llvm::Twine; + using llvm::MemoryBuffer; + using llvm::ArrayRef; + using llvm::SmallString; + using llvm::SmallVector; + using llvm::SmallVectorImpl; + using llvm::SaveAndRestore; + using llvm::ErrorOr; + + using llvm::raw_ostream; +} // end namespace lld. + +namespace std { +template <> struct hash<llvm::StringRef> { +public: + size_t operator()(const llvm::StringRef &s) const { + return llvm::hash_value(s); + } +}; +} + +#endif diff --git a/include/lld/Core/LinkingContext.h b/include/lld/Core/LinkingContext.h new file mode 100644 index 000000000000..81a3b4b4eb71 --- /dev/null +++ b/include/lld/Core/LinkingContext.h @@ -0,0 +1,364 @@ +//===- lld/Core/LinkingContext.h - Linker Target Info Interface -----------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_CORE_LINKING_CONTEXT_H +#define LLD_CORE_LINKING_CONTEXT_H + +#include "lld/Core/Error.h" +#include "lld/Core/LLVM.h" +#include "lld/Core/Node.h" +#include "lld/Core/Parallel.h" +#include "lld/Core/Reference.h" +#include "lld/Core/range.h" +#include "lld/Core/Reader.h" +#include "llvm/Support/ErrorOr.h" +#include "llvm/Support/raw_ostream.h" +#include <string> +#include <vector> + +namespace lld { +class PassManager; +class File; +class Writer; +class Node; +class SharedLibraryFile; + +/// \brief The LinkingContext class encapsulates "what and how" to link. +/// +/// The base class LinkingContext contains the options needed by core linking. +/// Subclasses of LinkingContext have additional options needed by specific +/// Writers. For example, ELFLinkingContext has methods that supplies +/// options to the ELF Writer and ELF Passes. +class LinkingContext { +public: + /// \brief The types of output file that the linker creates. + enum class OutputFileType : uint8_t { + Default, // The default output type for this target + YAML, // The output type is set to YAML + Native // The output file format is Native (Atoms) + }; + + virtual ~LinkingContext(); + + /// \name Methods needed by core linking + /// @{ + + /// Name of symbol linker should use as "entry point" to program, + /// usually "main" or "start". + virtual StringRef entrySymbolName() const { return _entrySymbolName; } + + /// Whether core linking should remove Atoms not reachable by following + /// References from the entry point Atom or from all global scope Atoms + /// if globalsAreDeadStripRoots() is true. + bool deadStrip() const { return _deadStrip; } + + /// Only used if deadStrip() returns true. Means all global scope Atoms + /// should be marked live (along with all Atoms they reference). Usually + /// this method returns false for main executables, but true for dynamic + /// shared libraries. + bool globalsAreDeadStripRoots() const { return _globalsAreDeadStripRoots; }; + + /// Only used if deadStrip() returns true. This method returns the names + /// of DefinedAtoms that should be marked live (along with all Atoms they + /// reference). Only Atoms with scope scopeLinkageUnit or scopeGlobal can + /// be kept live using this method. + const std::vector<StringRef> &deadStripRoots() const { + return _deadStripRoots; + } + + /// Add the given symbol name to the dead strip root set. Only used if + /// deadStrip() returns true. + void addDeadStripRoot(StringRef symbolName) { + assert(!symbolName.empty() && "Empty symbol cannot be a dead strip root"); + _deadStripRoots.push_back(symbolName); + } + + /// Archive files (aka static libraries) are normally lazily loaded. That is, + /// object files within an archive are only loaded and linked in, if the + /// object file contains a DefinedAtom which will replace an existing + /// UndefinedAtom. If this method returns true, core linking will also look + /// for archive members to replace existing tentative definitions in addition + /// to replacing undefines. Note: a "tentative definition" (also called a + /// "common" symbols) is a C (but not C++) concept. They are modeled in lld + /// as a DefinedAtom with merge() of mergeAsTentative. + bool searchArchivesToOverrideTentativeDefinitions() const { + return _searchArchivesToOverrideTentativeDefinitions; + } + + /// Normally core linking will turn a tentative definition into a real + /// definition if not replaced by a real DefinedAtom from some object file. + /// If this method returns true, core linking will search all supplied + /// dynamic shared libraries for symbol names that match remaining tentative + /// definitions. If any are found, the corresponding tentative definition + /// atom is replaced with SharedLibraryAtom. + bool searchSharedLibrariesToOverrideTentativeDefinitions() const { + return _searchSharedLibrariesToOverrideTentativeDefinitions; + } + + /// Normally, every UndefinedAtom must be replaced by a DefinedAtom or a + /// SharedLibraryAtom for the link to be successful. This method controls + /// whether core linking prints out a list of remaining UndefinedAtoms. + /// + /// \todo This should be a method core linking calls with a list of the + /// UndefinedAtoms so that different drivers can format the error message + /// as needed. + bool printRemainingUndefines() const { return _printRemainingUndefines; } + + /// Normally, every UndefinedAtom must be replaced by a DefinedAtom or a + /// SharedLibraryAtom for the link to be successful. This method controls + /// whether core linking considers remaining undefines to be an error. + bool allowRemainingUndefines() const { return _allowRemainingUndefines; } + + /// In the lld model, a SharedLibraryAtom is a proxy atom for something + /// that will be found in a dynamic shared library when the program runs. + /// A SharedLibraryAtom optionally contains the name of the shared library + /// in which to find the symbol name at runtime. Core linking may merge + /// two SharedLibraryAtom with the same name. If this method returns true, + /// when merging core linking will also verify that they both have the same + /// loadName() and if not print a warning. + /// + /// \todo This should be a method core linking calls so that drivers can + /// format the warning as needed. + bool warnIfCoalesableAtomsHaveDifferentLoadName() const { + return _warnIfCoalesableAtomsHaveDifferentLoadName; + } + + /// In C/C++ you can mark a function's prototype with + /// __attribute__((weak_import)) or __attribute__((weak)) to say the function + /// may not be available at runtime and/or build time and in which case its + /// address will evaluate to NULL. In lld this is modeled using the + /// UndefinedAtom::canBeNull() method. During core linking, UndefinedAtom + /// with the same name are automatically merged. If this method returns + /// true, core link also verfies that the canBeNull() value for merged + /// UndefinedAtoms are the same and warns if not. + /// + /// \todo This should be a method core linking calls so that drivers can + /// format the warning as needed. + bool warnIfCoalesableAtomsHaveDifferentCanBeNull() const { + return _warnIfCoalesableAtomsHaveDifferentCanBeNull; + } + + /// Normally, every UndefinedAtom must be replaced by a DefinedAtom or a + /// SharedLibraryAtom for the link to be successful. This method controls + /// whether core linking considers remaining undefines from the shared library + /// to be an error. + bool allowShlibUndefines() const { return _allowShlibUndefines; } + + /// If true, core linking will write the path to each input file to stdout + /// (i.e. llvm::outs()) as it is used. This is used to implement the -t + /// linker option. + /// + /// \todo This should be a method core linking calls so that drivers can + /// format the line as needed. + bool logInputFiles() const { return _logInputFiles; } + + /// Parts of LLVM use global variables which are bound to command line + /// options (see llvm::cl::Options). This method returns "command line" + /// options which are used to configure LLVM's command line settings. + /// For instance the -debug-only XXX option can be used to dynamically + /// trace different parts of LLVM and lld. + const std::vector<const char *> &llvmOptions() const { return _llvmOptions; } + + /// \name Methods used by Drivers to configure TargetInfo + /// @{ + void setOutputPath(StringRef str) { _outputPath = str; } + + // Set the entry symbol name. You may also need to call addDeadStripRoot() for + // the symbol if your platform supports dead-stripping, so that the symbol + // will not be removed from the output. + void setEntrySymbolName(StringRef name) { + _entrySymbolName = name; + } + + void setDeadStripping(bool enable) { _deadStrip = enable; } + void setAllowDuplicates(bool enable) { _allowDuplicates = enable; } + void setGlobalsAreDeadStripRoots(bool v) { _globalsAreDeadStripRoots = v; } + void setSearchArchivesToOverrideTentativeDefinitions(bool search) { + _searchArchivesToOverrideTentativeDefinitions = search; + } + void setSearchSharedLibrariesToOverrideTentativeDefinitions(bool search) { + _searchSharedLibrariesToOverrideTentativeDefinitions = search; + } + void setWarnIfCoalesableAtomsHaveDifferentCanBeNull(bool warn) { + _warnIfCoalesableAtomsHaveDifferentCanBeNull = warn; + } + void setWarnIfCoalesableAtomsHaveDifferentLoadName(bool warn) { + _warnIfCoalesableAtomsHaveDifferentLoadName = warn; + } + void setPrintRemainingUndefines(bool print) { + _printRemainingUndefines = print; + } + void setAllowRemainingUndefines(bool allow) { + _allowRemainingUndefines = allow; + } + void setAllowShlibUndefines(bool allow) { _allowShlibUndefines = allow; } + void setLogInputFiles(bool log) { _logInputFiles = log; } + + // Returns true if multiple definitions should not be treated as a + // fatal error. + bool getAllowDuplicates() const { return _allowDuplicates; } + + void appendLLVMOption(const char *opt) { _llvmOptions.push_back(opt); } + + void addAlias(StringRef from, StringRef to) { _aliasSymbols[from] = to; } + const std::map<std::string, std::string> &getAliases() const { + return _aliasSymbols; + } + + std::vector<std::unique_ptr<Node>> &getNodes() { return _nodes; } + const std::vector<std::unique_ptr<Node>> &getNodes() const { return _nodes; } + + /// Notify the LinkingContext when the symbol table found a name collision. + /// The useNew parameter specifies which the symbol table plans to keep, + /// but that can be changed by the LinkingContext. This is also an + /// opportunity for flavor specific processing. + virtual void notifySymbolTableCoalesce(const Atom *existingAtom, + const Atom *newAtom, bool &useNew) {} + + /// This method adds undefined symbols specified by the -u option to the to + /// the list of undefined symbols known to the linker. This option essentially + /// forces an undefined symbol to be created. You may also need to call + /// addDeadStripRoot() for the symbol if your platform supports dead + /// stripping, so that the symbol will not be removed from the output. + void addInitialUndefinedSymbol(StringRef symbolName) { + _initialUndefinedSymbols.push_back(symbolName); + } + + /// Iterators for symbols that appear on the command line. + typedef std::vector<StringRef> StringRefVector; + typedef StringRefVector::iterator StringRefVectorIter; + typedef StringRefVector::const_iterator StringRefVectorConstIter; + + /// Create linker internal files containing atoms for the linker to include + /// during link. Flavors can override this function in their LinkingContext + /// to add more internal files. These internal files are positioned before + /// the actual input files. + virtual void createInternalFiles(std::vector<std::unique_ptr<File> > &) const; + + /// Return the list of undefined symbols that are specified in the + /// linker command line, using the -u option. + range<const StringRef *> initialUndefinedSymbols() const { + return _initialUndefinedSymbols; + } + + /// After all set* methods are called, the Driver calls this method + /// to validate that there are no missing options or invalid combinations + /// of options. If there is a problem, a description of the problem + /// is written to the supplied stream. + /// + /// \returns true if there is an error with the current settings. + bool validate(raw_ostream &diagnostics); + + /// Formats symbol name for use in error messages. + virtual std::string demangle(StringRef symbolName) const { + return symbolName; + } + + /// @} + /// \name Methods used by Driver::link() + /// @{ + + /// Returns the file system path to which the linked output should be written. + /// + /// \todo To support in-memory linking, we need an abstraction that allows + /// the linker to write to an in-memory buffer. + StringRef outputPath() const { return _outputPath; } + + /// Set the various output file types that the linker would + /// create + bool setOutputFileType(StringRef outputFileType) { + if (outputFileType.equals_lower("yaml")) + _outputFileType = OutputFileType::YAML; + else if (outputFileType.equals_lower("native")) + _outputFileType = OutputFileType::YAML; + else + return false; + return true; + } + + /// Returns the output file type that that the linker needs to create. + OutputFileType outputFileType() const { return _outputFileType; } + + /// Accessor for Register object embedded in LinkingContext. + const Registry ®istry() const { return _registry; } + Registry ®istry() { return _registry; } + + /// This method is called by core linking to give the Writer a chance + /// to add file format specific "files" to set of files to be linked. This is + /// how file format specific atoms can be added to the link. + virtual bool createImplicitFiles(std::vector<std::unique_ptr<File> > &); + + /// This method is called by core linking to build the list of Passes to be + /// run on the merged/linked graph of all input files. + virtual void addPasses(PassManager &pm); + + /// Calls through to the writeFile() method on the specified Writer. + /// + /// \param linkedFile This is the merged/linked graph of all input file Atoms. + virtual std::error_code writeFile(const File &linkedFile) const; + + /// Return the next ordinal and Increment it. + virtual uint64_t getNextOrdinalAndIncrement() const { return _nextOrdinal++; } + + // This function is called just before the Resolver kicks in. + // Derived classes may use it to change the list of input files. + virtual void finalizeInputFiles() {} + + TaskGroup &getTaskGroup() { return _taskGroup; } + + /// @} +protected: + LinkingContext(); // Must be subclassed + + /// Abstract method to lazily instantiate the Writer. + virtual Writer &writer() const = 0; + + /// Method to create an internal file for the entry symbol + virtual std::unique_ptr<File> createEntrySymbolFile() const; + std::unique_ptr<File> createEntrySymbolFile(StringRef filename) const; + + /// Method to create an internal file for an undefined symbol + virtual std::unique_ptr<File> createUndefinedSymbolFile() const; + std::unique_ptr<File> createUndefinedSymbolFile(StringRef filename) const; + + /// Method to create an internal file for alias symbols + std::unique_ptr<File> createAliasSymbolFile() const; + + StringRef _outputPath; + StringRef _entrySymbolName; + bool _deadStrip; + bool _allowDuplicates; + bool _globalsAreDeadStripRoots; + bool _searchArchivesToOverrideTentativeDefinitions; + bool _searchSharedLibrariesToOverrideTentativeDefinitions; + bool _warnIfCoalesableAtomsHaveDifferentCanBeNull; + bool _warnIfCoalesableAtomsHaveDifferentLoadName; + bool _printRemainingUndefines; + bool _allowRemainingUndefines; + bool _logInputFiles; + bool _allowShlibUndefines; + OutputFileType _outputFileType; + std::vector<StringRef> _deadStripRoots; + std::map<std::string, std::string> _aliasSymbols; + std::vector<const char *> _llvmOptions; + StringRefVector _initialUndefinedSymbols; + std::vector<std::unique_ptr<Node>> _nodes; + mutable llvm::BumpPtrAllocator _allocator; + mutable uint64_t _nextOrdinal; + Registry _registry; + +private: + /// Validate the subclass bits. Only called by validate. + virtual bool validateImpl(raw_ostream &diagnostics) = 0; + TaskGroup _taskGroup; +}; + +} // end namespace lld + +#endif diff --git a/include/lld/Core/Node.h b/include/lld/Core/Node.h new file mode 100644 index 000000000000..cd38fbd4a482 --- /dev/null +++ b/include/lld/Core/Node.h @@ -0,0 +1,78 @@ +//===- lld/Core/Node.h - Input file class ---------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// +/// The classes in this file represents inputs to the linker. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLD_CORE_NODE_H +#define LLD_CORE_NODE_H + +#include "lld/Core/File.h" +#include "llvm/Option/ArgList.h" +#include <memory> +#include <vector> + +namespace lld { + +// A Node represents a FileNode or other type of Node. In the latter case, +// the node contains meta information about the input file list. +// Currently only GroupEnd node is defined as a meta node. +class Node { +public: + enum class Kind { File, GroupEnd }; + explicit Node(Kind type) : _kind(type) {} + virtual ~Node() {} + virtual Kind kind() const { return _kind; } + +private: + Kind _kind; +}; + +// This is a marker for --end-group. getSize() returns the number of +// files between the corresponding --start-group and this marker. +class GroupEnd : public Node { +public: + explicit GroupEnd(int size) : Node(Kind::GroupEnd), _size(size) {} + + int getSize() const { return _size; } + + static bool classof(const Node *a) { + return a->kind() == Kind::GroupEnd; + } + +private: + int _size; +}; + +// A container of File. +class FileNode : public Node { +public: + explicit FileNode(std::unique_ptr<File> f) + : Node(Node::Kind::File), _file(std::move(f)), _asNeeded(false) {} + + static bool classof(const Node *a) { + return a->kind() == Node::Kind::File; + } + + File *getFile() { return _file.get(); } + + void setAsNeeded(bool val) { _asNeeded = val; } + bool asNeeded() const { return _asNeeded; } + +protected: + std::unique_ptr<File> _file; + bool _asNeeded; +}; + +} // namespace lld + +#endif // LLD_CORE_NODE_H diff --git a/include/lld/Core/Parallel.h b/include/lld/Core/Parallel.h new file mode 100644 index 000000000000..65176ac2b04d --- /dev/null +++ b/include/lld/Core/Parallel.h @@ -0,0 +1,309 @@ +//===- lld/Core/Parallel.h - Parallel utilities ---------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_CORE_PARALLEL_H +#define LLD_CORE_PARALLEL_H + +#include "lld/Core/Instrumentation.h" +#include "lld/Core/LLVM.h" +#include "lld/Core/range.h" +#include "llvm/Support/MathExtras.h" + +#ifdef _MSC_VER +// concrt.h depends on eh.h for __uncaught_exception declaration +// even if we disable exceptions. +#include <eh.h> +#endif + +#include <algorithm> +#include <atomic> +#include <condition_variable> +#include <mutex> +#include <thread> +#include <stack> + +#ifdef _MSC_VER +#include <concrt.h> +#include <ppl.h> +#endif + +namespace lld { +/// \brief Allows one or more threads to wait on a potentially unknown number of +/// events. +/// +/// A latch starts at \p count. inc() increments this, and dec() decrements it. +/// All calls to sync() will block while the count is not 0. +/// +/// Calling dec() on a Latch with a count of 0 has undefined behaivor. +class Latch { + uint32_t _count; + mutable std::mutex _condMut; + mutable std::condition_variable _cond; + +public: + explicit Latch(uint32_t count = 0) : _count(count) {} + ~Latch() { sync(); } + + void inc() { + std::unique_lock<std::mutex> lock(_condMut); + ++_count; + } + + void dec() { + std::unique_lock<std::mutex> lock(_condMut); + if (--_count == 0) + _cond.notify_all(); + } + + void sync() const { + std::unique_lock<std::mutex> lock(_condMut); + _cond.wait(lock, [&] { + return _count == 0; + }); + } +}; + +/// \brief An implementation of future. std::future and std::promise in +/// old libstdc++ have a threading bug; there is a small chance that a +/// call of future::get throws an exception in the normal use case. +/// We want to use our own future implementation until we drop support +/// of old versions of libstdc++. +/// https://gcc.gnu.org/ml/gcc-patches/2014-05/msg01389.html +template<typename T> class Future { +public: + Future() : _hasValue(false) {} + + void set(T &&val) { + assert(!_hasValue); + { + std::unique_lock<std::mutex> lock(_mutex); + _val = val; + _hasValue = true; + } + _cond.notify_all(); + } + + T &get() { + std::unique_lock<std::mutex> lock(_mutex); + if (_hasValue) + return _val; + _cond.wait(lock, [&] { return _hasValue; }); + return _val; + } + +private: + T _val; + bool _hasValue; + std::mutex _mutex; + std::condition_variable _cond; +}; + +/// \brief An abstract class that takes closures and runs them asynchronously. +class Executor { +public: + virtual ~Executor() {} + virtual void add(std::function<void()> func) = 0; +}; + +/// \brief An implementation of an Executor that runs closures on a thread pool +/// in filo order. +class ThreadPoolExecutor : public Executor { +public: + explicit ThreadPoolExecutor(unsigned threadCount = + std::thread::hardware_concurrency()) + : _stop(false), _done(threadCount) { + // Spawn all but one of the threads in another thread as spawning threads + // can take a while. + std::thread([&, threadCount] { + for (std::size_t i = 1; i < threadCount; ++i) { + std::thread([=] { + work(); + }).detach(); + } + work(); + }).detach(); + } + + ~ThreadPoolExecutor() { + std::unique_lock<std::mutex> lock(_mutex); + _stop = true; + lock.unlock(); + _cond.notify_all(); + // Wait for ~Latch. + } + + void add(std::function<void()> f) override { + std::unique_lock<std::mutex> lock(_mutex); + _workStack.push(f); + lock.unlock(); + _cond.notify_one(); + } + +private: + void work() { + while (true) { + std::unique_lock<std::mutex> lock(_mutex); + _cond.wait(lock, [&] { + return _stop || !_workStack.empty(); + }); + if (_stop) + break; + auto task = _workStack.top(); + _workStack.pop(); + lock.unlock(); + task(); + } + _done.dec(); + } + + std::atomic<bool> _stop; + std::stack<std::function<void()>> _workStack; + std::mutex _mutex; + std::condition_variable _cond; + Latch _done; +}; + +#ifdef _MSC_VER +/// \brief An Executor that runs tasks via ConcRT. +class ConcRTExecutor : public Executor { + struct Taskish { + Taskish(std::function<void()> task) : _task(task) {} + + std::function<void()> _task; + + static void run(void *p) { + Taskish *self = static_cast<Taskish *>(p); + self->_task(); + concurrency::Free(self); + } + }; + +public: + virtual void add(std::function<void()> func) { + Concurrency::CurrentScheduler::ScheduleTask(Taskish::run, + new (concurrency::Alloc(sizeof(Taskish))) Taskish(func)); + } +}; + +inline Executor *getDefaultExecutor() { + static ConcRTExecutor exec; + return &exec; +} +#else +inline Executor *getDefaultExecutor() { + static ThreadPoolExecutor exec; + return &exec; +} +#endif + +/// \brief Allows launching a number of tasks and waiting for them to finish +/// either explicitly via sync() or implicitly on destruction. +class TaskGroup { + Latch _latch; + +public: + void spawn(std::function<void()> f) { + _latch.inc(); + getDefaultExecutor()->add([&, f] { + f(); + _latch.dec(); + }); + } + + void sync() const { _latch.sync(); } +}; + +#ifdef _MSC_VER +// Use ppl parallel_sort on Windows. +template <class RandomAccessIterator, class Comp> +void parallel_sort( + RandomAccessIterator start, RandomAccessIterator end, + const Comp &comp = std::less< + typename std::iterator_traits<RandomAccessIterator>::value_type>()) { + concurrency::parallel_sort(start, end, comp); +} +#else +namespace detail { +const ptrdiff_t minParallelSize = 1024; + +/// \brief Inclusive median. +template <class RandomAccessIterator, class Comp> +RandomAccessIterator medianOf3(RandomAccessIterator start, + RandomAccessIterator end, const Comp &comp) { + RandomAccessIterator mid = start + (std::distance(start, end) / 2); + return comp(*start, *(end - 1)) + ? (comp(*mid, *(end - 1)) ? (comp(*start, *mid) ? mid : start) + : end - 1) + : (comp(*mid, *start) ? (comp(*(end - 1), *mid) ? mid : end - 1) + : start); +} + +template <class RandomAccessIterator, class Comp> +void parallel_quick_sort(RandomAccessIterator start, RandomAccessIterator end, + const Comp &comp, TaskGroup &tg, size_t depth) { + // Do a sequential sort for small inputs. + if (std::distance(start, end) < detail::minParallelSize || depth == 0) { + std::sort(start, end, comp); + return; + } + + // Partition. + auto pivot = medianOf3(start, end, comp); + // Move pivot to end. + std::swap(*(end - 1), *pivot); + pivot = std::partition(start, end - 1, [&comp, end](decltype(*start) v) { + return comp(v, *(end - 1)); + }); + // Move pivot to middle of partition. + std::swap(*pivot, *(end - 1)); + + // Recurse. + tg.spawn([=, &comp, &tg] { + parallel_quick_sort(start, pivot, comp, tg, depth - 1); + }); + parallel_quick_sort(pivot + 1, end, comp, tg, depth - 1); +} +} + +template <class RandomAccessIterator, class Comp> +void parallel_sort( + RandomAccessIterator start, RandomAccessIterator end, + const Comp &comp = std::less< + typename std::iterator_traits<RandomAccessIterator>::value_type>()) { + TaskGroup tg; + detail::parallel_quick_sort(start, end, comp, tg, + llvm::Log2_64(std::distance(start, end)) + 1); +} +#endif + +template <class T> void parallel_sort(T *start, T *end) { + parallel_sort(start, end, std::less<T>()); +} + +#ifdef _MSC_VER +// Use ppl parallel_for_each on Windows. +template <class Iterator, class Func> +void parallel_for_each(Iterator begin, Iterator end, Func func) { + concurrency::parallel_for_each(begin, end, func); +} +#else +template <class Iterator, class Func> +void parallel_for_each(Iterator begin, Iterator end, Func func) { + TaskGroup tg; + ptrdiff_t taskSize = 1024; + while (taskSize <= std::distance(begin, end)) { + tg.spawn([=, &func] { std::for_each(begin, begin + taskSize, func); }); + begin += taskSize; + } + std::for_each(begin, end, func); +} +#endif +} // end namespace lld + +#endif diff --git a/include/lld/Core/Pass.h b/include/lld/Core/Pass.h new file mode 100644 index 000000000000..7a9d2453f482 --- /dev/null +++ b/include/lld/Core/Pass.h @@ -0,0 +1,46 @@ +//===------ Core/Pass.h - Base class for linker passes --------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_CORE_PASS_H +#define LLD_CORE_PASS_H + +#include "lld/Core/Atom.h" +#include "lld/Core/File.h" +#include "lld/Core/Reference.h" +#include "lld/Core/range.h" +#include <vector> + +namespace lld { +class MutableFile; + +/// Once the core linking is done (which resolves references, coalesces atoms +/// and produces a complete Atom graph), the linker runs a series of passes +/// on the Atom graph. The graph is modeled as a File, which means the pass +/// has access to all the atoms and to File level attributes. Each pass does +/// a particular transformation to the Atom graph or to the File attributes. +/// +/// This is the abstract base class for all passes. A Pass does its +/// actual work in it perform() method. It can iterator over Atoms in the +/// graph using the *begin()/*end() atom iterator of the File. It can add +/// new Atoms to the graph using the File's addAtom() method. +class Pass { +public: + virtual ~Pass() { } + + /// Do the actual work of the Pass. + virtual void perform(std::unique_ptr<MutableFile> &mergedFile) = 0; + +protected: + // Only subclassess can be instantiated. + Pass() { } +}; + +} // namespace lld + +#endif // LLD_CORE_PASS_H diff --git a/include/lld/Core/PassManager.h b/include/lld/Core/PassManager.h new file mode 100644 index 000000000000..65fc4d806ceb --- /dev/null +++ b/include/lld/Core/PassManager.h @@ -0,0 +1,46 @@ +//===- lld/Core/PassManager.h - Manage linker passes ----------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_CORE_PASS_MANAGER_H +#define LLD_CORE_PASS_MANAGER_H + +#include "lld/Core/LLVM.h" +#include "lld/Core/Pass.h" +#include <memory> +#include <vector> + +namespace lld { +class MutableFile; +class Pass; + +/// \brief Owns and runs a collection of passes. +/// +/// This class is currently just a container for passes and a way to run them. +/// +/// In the future this should handle timing pass runs, running parallel passes, +/// and validate/satisfy pass dependencies. +class PassManager { +public: + void add(std::unique_ptr<Pass> pass) { + _passes.push_back(std::move(pass)); + } + + std::error_code runOnFile(std::unique_ptr<MutableFile> &file) { + for (std::unique_ptr<Pass> &pass : _passes) + pass->perform(file); + return std::error_code(); + } + +private: + /// \brief Passes in the order they should run. + std::vector<std::unique_ptr<Pass>> _passes; +}; +} // end namespace lld + +#endif diff --git a/include/lld/Core/Reader.h b/include/lld/Core/Reader.h new file mode 100644 index 000000000000..ac90c5a7e85c --- /dev/null +++ b/include/lld/Core/Reader.h @@ -0,0 +1,169 @@ +//===- lld/Core/Reader.h - Abstract File Format Reading Interface ---------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_CORE_READER_H +#define LLD_CORE_READER_H + +#include "lld/Core/LLVM.h" +#include "lld/Core/Reference.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/YAMLTraits.h" +#include <functional> +#include <memory> +#include <vector> + +using llvm::sys::fs::file_magic; + +namespace llvm { +namespace yaml { +class IO; +} +} + +namespace lld { +class ELFLinkingContext; +class File; +class LinkingContext; +class PECOFFLinkingContext; +class TargetHandlerBase; +class MachOLinkingContext; + +/// \brief An abstract class for reading object files, library files, and +/// executable files. +/// +/// Each file format (e.g. ELF, mach-o, PECOFF, native, etc) have a concrete +/// subclass of Reader. +class Reader { +public: + virtual ~Reader() {} + + /// Sniffs the file to determine if this Reader can parse it. + /// The method is called with: + /// 1) the file_magic enumeration returned by identify_magic() + /// 2) the file extension (e.g. ".obj") + /// 3) the whole file content buffer if the above is not enough. + virtual bool canParse(file_magic magic, StringRef fileExtension, + const MemoryBuffer &mb) const = 0; + + /// \brief Parse a supplied buffer (already filled with the contents of a + /// file) and create a File object. + /// The resulting File object takes ownership of the MemoryBuffer. + virtual std::error_code + loadFile(std::unique_ptr<MemoryBuffer> mb, const class Registry &, + std::vector<std::unique_ptr<File>> &result) const = 0; +}; + + +/// \brief An abstract class for handling alternate yaml representations +/// of object files. +/// +/// The YAML syntax allows "tags" which are used to specify the type of +/// the YAML node. In lld, top level YAML documents can be in many YAML +/// representations (e.g mach-o encoded as yaml, etc). A tag is used to +/// specify which representation is used in the following YAML document. +/// To work, there must be a YamlIOTaggedDocumentHandler registered that +/// handles each tag type. +class YamlIOTaggedDocumentHandler { +public: + virtual ~YamlIOTaggedDocumentHandler(); + + /// This method is called on each registered YamlIOTaggedDocumentHandler + /// until one returns true. If the subclass handles tag type !xyz, then + /// this method should call io.mapTag("!xzy") to see if that is the current + /// document type, and if so, process the rest of the document using + /// YAML I/O, then convert the result into an lld::File* and return it. + virtual bool handledDocTag(llvm::yaml::IO &io, const lld::File *&f) const = 0; +}; + + +/// A registry to hold the list of currently registered Readers and +/// tables which map Reference kind values to strings. +/// The linker does not directly invoke Readers. Instead, it registers +/// Readers based on it configuration and command line options, then calls +/// the Registry object to parse files. +class Registry { +public: + Registry(); + + /// Walk the list of registered Readers and find one that can parse the + /// supplied file and parse it. + std::error_code loadFile(std::unique_ptr<MemoryBuffer> mb, + std::vector<std::unique_ptr<File>> &result) const; + + /// Walk the list of registered kind tables to convert a Reference Kind + /// name to a value. + bool referenceKindFromString(StringRef inputStr, Reference::KindNamespace &ns, + Reference::KindArch &a, + Reference::KindValue &value) const; + + /// Walk the list of registered kind tables to convert a Reference Kind + /// value to a string. + bool referenceKindToString(Reference::KindNamespace ns, Reference::KindArch a, + Reference::KindValue value, StringRef &) const; + + /// Walk the list of registered tag handlers and have the one that handles + /// the current document type process the yaml into an lld::File*. + bool handleTaggedDoc(llvm::yaml::IO &io, const lld::File *&file) const; + + // These methods are called to dynamically add support for various file + // formats. The methods are also implemented in the appropriate lib*.a + // library, so that the code for handling a format is only linked in, if this + // method is used. Any options that a Reader might need must be passed + // as parameters to the addSupport*() method. + void addSupportArchives(bool logLoading); + void addSupportYamlFiles(); + void addSupportNativeObjects(); + void addSupportCOFFObjects(PECOFFLinkingContext &); + void addSupportCOFFImportLibraries(PECOFFLinkingContext &); + void addSupportMachOObjects(MachOLinkingContext &); + void addSupportELFObjects(ELFLinkingContext &); + void addSupportELFDynamicSharedObjects(ELFLinkingContext &); + + /// To convert between kind values and names, the registry walks the list + /// of registered kind tables. Each table is a zero terminated array of + /// KindStrings elements. + struct KindStrings { + Reference::KindValue value; + StringRef name; + }; + + /// A Reference Kind value is a tuple of <namespace, arch, value>. All + /// entries in a conversion table have the same <namespace, arch>. The + /// array then contains the value/name pairs. + void addKindTable(Reference::KindNamespace ns, Reference::KindArch arch, + const KindStrings array[]); + + +private: + struct KindEntry { + Reference::KindNamespace ns; + Reference::KindArch arch; + const KindStrings *array; + }; + + void add(std::unique_ptr<Reader>); + void add(std::unique_ptr<YamlIOTaggedDocumentHandler>); + + std::vector<std::unique_ptr<Reader>> _readers; + std::vector<std::unique_ptr<YamlIOTaggedDocumentHandler>> _yamlHandlers; + std::vector<KindEntry> _kindEntries; +}; + +// Utilities for building a KindString table. For instance: +// static const Registry::KindStrings table[] = { +// LLD_KIND_STRING_ENTRY(R_VAX_ADDR16), +// LLD_KIND_STRING_ENTRY(R_VAX_DATA16), +// LLD_KIND_STRING_END +// }; +#define LLD_KIND_STRING_ENTRY(name) { name, #name } +#define LLD_KIND_STRING_END { 0, "" } + +} // end namespace lld + +#endif diff --git a/include/lld/Core/Reference.h b/include/lld/Core/Reference.h new file mode 100644 index 000000000000..7a804c31e182 --- /dev/null +++ b/include/lld/Core/Reference.h @@ -0,0 +1,125 @@ +//===- Core/References.h - A Reference to Another Atom --------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_CORE_REFERENCES_H +#define LLD_CORE_REFERENCES_H + +#include "lld/Core/LLVM.h" +#include "llvm/ADT/StringSwitch.h" + +namespace lld { +class Atom; + +/// +/// The linker has a Graph Theory model of linking. An object file is seen +/// as a set of Atoms with References to other Atoms. Each Atom is a node +/// and each Reference is an edge. +/// +/// For example if a function contains a call site to "malloc" 40 bytes into +/// the Atom, then the function Atom will have a Reference of: offsetInAtom=40, +/// kind=callsite, target=malloc, addend=0. +/// +/// Besides supporting traditional "relocations", References are also used +/// grouping atoms (group comdat), forcing layout (one atom must follow +/// another), marking data-in-code (jump tables or ARM constants), etc. +/// +/// The "kind" of a reference is a tuple of <namespace, arch, value>. This +/// enable us to re-use existing relocation types definded for various +/// file formats and architectures. For instance, in ELF the relocation type 10 +/// means R_X86_64_32 for x86_64, and R_386_GOTPC for i386. For PE/COFF +/// relocation 10 means IMAGE_REL_AMD64_SECTION. +/// +/// References and atoms form a directed graph. The dead-stripping pass +/// traverses them starting from dead-strip root atoms to garbage collect +/// unreachable ones. +/// +/// References of any kind are considered as directed edges. In addition to +/// that, references of some kind is considered as bidirected edges. +class Reference { +public: + /// Which universe defines the kindValue(). + enum class KindNamespace { + all = 0, + testing = 1, + ELF = 2, + COFF = 3, + mach_o = 4, + }; + + KindNamespace kindNamespace() const { return (KindNamespace)_kindNamespace; } + void setKindNamespace(KindNamespace ns) { _kindNamespace = (uint8_t)ns; } + + // Which architecture the kind value is for. + enum class KindArch { all, AArch64, ARM, Hexagon, Mips, x86, x86_64 }; + + KindArch kindArch() const { return (KindArch)_kindArch; } + void setKindArch(KindArch a) { _kindArch = (uint8_t)a; } + + typedef uint16_t KindValue; + + KindValue kindValue() const { return _kindValue; } + + /// setKindValue() is needed because during linking, some optimizations may + /// change the codegen and hence the reference kind. + void setKindValue(KindValue value) { + _kindValue = value; + } + + /// KindValues used with KindNamespace::all and KindArch::all. + enum { + // kindLayoutAfter is treated as a bidirected edge by the dead-stripping + // pass. + kindLayoutAfter = 1, + // kindGroupChild is treated as a bidirected edge too. + kindGroupChild, + kindAssociate, + }; + + // A value to be added to the value of a target + typedef int64_t Addend; + + /// If the reference is a fixup in the Atom, then this returns the + /// byte offset into the Atom's content to do the fix up. + virtual uint64_t offsetInAtom() const = 0; + + /// Returns the atom this reference refers to. + virtual const Atom *target() const = 0; + + /// During linking, the linker may merge graphs which coalesces some nodes + /// (i.e. Atoms). To switch the target of a reference, this method is called. + virtual void setTarget(const Atom *) = 0; + + /// Some relocations require a symbol and a value (e.g. foo + 4). + virtual Addend addend() const = 0; + + /// During linking, some optimzations may change addend value. + virtual void setAddend(Addend) = 0; + + /// Returns target specific attributes of the reference. + virtual uint32_t tag() const { return 0; } + +protected: + /// Reference is an abstract base class. Only subclasses can use constructor. + Reference(KindNamespace ns, KindArch a, KindValue value) + : _kindValue(value), _kindNamespace((uint8_t)ns), _kindArch((uint8_t)a) {} + + /// The memory for Reference objects is always managed by the owning File + /// object. Therefore, no one but the owning File object should call + /// delete on an Reference. In fact, some File objects may bulk allocate + /// an array of References, so they cannot be individually deleted by anyone. + virtual ~Reference() {} + + KindValue _kindValue; + uint8_t _kindNamespace; + uint8_t _kindArch; +}; + +} // namespace lld + +#endif // LLD_CORE_REFERENCES_H diff --git a/include/lld/Core/Resolver.h b/include/lld/Core/Resolver.h new file mode 100644 index 000000000000..e16c07b839fa --- /dev/null +++ b/include/lld/Core/Resolver.h @@ -0,0 +1,119 @@ +//===- Core/Resolver.h - Resolves Atom References -------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_CORE_RESOLVER_H +#define LLD_CORE_RESOLVER_H + +#include "lld/Core/ArchiveLibraryFile.h" +#include "lld/Core/File.h" +#include "lld/Core/SharedLibraryFile.h" +#include "lld/Core/Simple.h" +#include "lld/Core/SymbolTable.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseSet.h" +#include <set> +#include <unordered_map> +#include <unordered_set> +#include <vector> + +namespace lld { + +class Atom; +class LinkingContext; + +/// \brief The Resolver is responsible for merging all input object files +/// and producing a merged graph. +class Resolver { +public: + Resolver(LinkingContext &ctx) + : _ctx(ctx), _symbolTable(ctx), _result(new MergedFile()), + _fileIndex(0) {} + + // InputFiles::Handler methods + void doDefinedAtom(const DefinedAtom&); + bool doUndefinedAtom(const UndefinedAtom &); + void doSharedLibraryAtom(const SharedLibraryAtom &); + void doAbsoluteAtom(const AbsoluteAtom &); + + // Handle files, this adds atoms from the current file thats + // being processed by the resolver + bool handleFile(File &); + + // Handle an archive library file. + bool handleArchiveFile(File &); + + // Handle a shared library file. + void handleSharedLibrary(File &); + + /// @brief do work of merging and resolving and return list + bool resolve(); + + std::unique_ptr<MutableFile> resultFile() { return std::move(_result); } + +private: + typedef std::function<void(StringRef, bool)> UndefCallback; + + bool undefinesAdded(int begin, int end); + File *getFile(int &index); + + /// \brief Add section group/.gnu.linkonce if it does not exist previously. + void maybeAddSectionGroupOrGnuLinkOnce(const DefinedAtom &atom); + + /// \brief The main function that iterates over the files to resolve + void updatePreloadArchiveMap(); + bool resolveUndefines(); + void updateReferences(); + void deadStripOptimize(); + bool checkUndefines(); + void removeCoalescedAwayAtoms(); + void checkDylibSymbolCollisions(); + void forEachUndefines(File &file, bool searchForOverrides, UndefCallback callback); + + void markLive(const Atom *atom); + void addAtoms(const std::vector<const DefinedAtom *>&); + void maybePreloadArchiveMember(StringRef sym); + + class MergedFile : public SimpleFile { + public: + MergedFile() : SimpleFile("<linker-internal>") {} + void addAtoms(std::vector<const Atom*>& atoms); + }; + + LinkingContext &_ctx; + SymbolTable _symbolTable; + std::vector<const Atom *> _atoms; + std::set<const Atom *> _deadStripRoots; + llvm::DenseSet<const Atom *> _liveAtoms; + llvm::DenseSet<const Atom *> _deadAtoms; + std::unique_ptr<MergedFile> _result; + std::unordered_multimap<const Atom *, const Atom *> _reverseRef; + + // --start-group and --end-group + std::vector<File *> _files; + std::map<File *, bool> _newUndefinesAdded; + size_t _fileIndex; + + // Preloading + llvm::StringMap<ArchiveLibraryFile *> _archiveMap; + llvm::DenseSet<ArchiveLibraryFile *> _archiveSeen; + + // List of undefined symbols. + std::vector<StringRef> _undefines; + + // Start position in _undefines for each archive/shared library file. + // Symbols from index 0 to the start position are already searched before. + // Searching them again would never succeed. When we look for undefined + // symbols from an archive/shared library file, start from its start + // position to save time. + std::map<File *, size_t> _undefineIndex; +}; + +} // namespace lld + +#endif // LLD_CORE_RESOLVER_H diff --git a/include/lld/Core/STDExtras.h b/include/lld/Core/STDExtras.h new file mode 100644 index 000000000000..4a6183891844 --- /dev/null +++ b/include/lld/Core/STDExtras.h @@ -0,0 +1,29 @@ +//===- lld/Core/STDExtra.h - Helpers for the stdlib -----------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_CORE_STD_EXTRA_H +#define LLD_CORE_STD_EXTRA_H + +namespace lld { +/// \brief Deleter for smart pointers that only calls the destructor. Memory is +/// managed elsewhere. A common use of this is for things allocated with a +/// BumpPtrAllocator. +template <class T> +struct destruct_delete { + void operator ()(T *ptr) { + ptr->~T(); + } +}; + +template <class T> +using unique_bump_ptr = std::unique_ptr<T, destruct_delete<T>>; + +} // end namespace lld + +#endif diff --git a/include/lld/Core/SharedLibraryAtom.h b/include/lld/Core/SharedLibraryAtom.h new file mode 100644 index 000000000000..1b0c37c41138 --- /dev/null +++ b/include/lld/Core/SharedLibraryAtom.h @@ -0,0 +1,53 @@ +//===- Core/SharedLibraryAtom.h - A Shared Library Atom -------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_CORE_SHARED_LIBRARY_ATOM_H +#define LLD_CORE_SHARED_LIBRARY_ATOM_H + +#include "lld/Core/Atom.h" + +namespace lld { + +/// A SharedLibraryAtom has no content. +/// It exists to represent a symbol which will be bound at runtime. +class SharedLibraryAtom : public Atom { +public: + enum class Type : uint32_t { + Unknown, + Code, + Data, + }; + + /// Returns shared library name used to load it at runtime. + /// On linux that is the DT_NEEDED name. + /// On Darwin it is the LC_DYLIB_LOAD dylib name. + /// On Windows it is the DLL name that to be referred from .idata section. + virtual StringRef loadName() const = 0; + + /// Returns if shared library symbol can be missing at runtime and if + /// so the loader should silently resolve address of symbol to be nullptr. + virtual bool canBeNullAtRuntime() const = 0; + + virtual Type type() const = 0; + + virtual uint64_t size() const = 0; + + static bool classof(const Atom *a) { + return a->definition() == definitionSharedLibrary; + } + + static inline bool classof(const SharedLibraryAtom *) { return true; } + +protected: + SharedLibraryAtom() : Atom(definitionSharedLibrary) {} +}; + +} // namespace lld + +#endif // LLD_CORE_SHARED_LIBRARY_ATOM_H diff --git a/include/lld/Core/SharedLibraryFile.h b/include/lld/Core/SharedLibraryFile.h new file mode 100644 index 000000000000..2f84624287d8 --- /dev/null +++ b/include/lld/Core/SharedLibraryFile.h @@ -0,0 +1,65 @@ +//===- Core/SharedLibraryFile.h - Models shared libraries as Atoms --------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_CORE_SHARED_LIBRARY_FILE_H +#define LLD_CORE_SHARED_LIBRARY_FILE_H + +#include "lld/Core/File.h" + +namespace lld { + +/// +/// The SharedLibraryFile subclass of File is used to represent dynamic +/// shared libraries being linked against. +/// +class SharedLibraryFile : public File { +public: + static bool classof(const File *f) { + return f->kind() == kindSharedLibrary; + } + + /// Check if the shared library exports a symbol with the specified name. + /// If so, return a SharedLibraryAtom which represents that exported + /// symbol. Otherwise return nullptr. + virtual const SharedLibraryAtom *exports(StringRef name, + bool dataSymbolOnly) const = 0; + + // Returns DSO name. It's the soname (ELF), the install name (MachO) or + // the import name (Windows). + virtual StringRef getDSOName() const = 0; + + const atom_collection<DefinedAtom> &defined() const override { + return _definedAtoms; + } + + const atom_collection<UndefinedAtom> &undefined() const override { + return _undefinedAtoms; + } + + const atom_collection<SharedLibraryAtom> &sharedLibrary() const override { + return _sharedLibraryAtoms; + } + + const atom_collection<AbsoluteAtom> &absolute() const override { + return _absoluteAtoms; + } + +protected: + /// only subclasses of SharedLibraryFile can be instantiated + explicit SharedLibraryFile(StringRef path) : File(path, kindSharedLibrary) {} + + atom_collection_vector<DefinedAtom> _definedAtoms; + atom_collection_vector<UndefinedAtom> _undefinedAtoms; + atom_collection_vector<SharedLibraryAtom> _sharedLibraryAtoms; + atom_collection_vector<AbsoluteAtom> _absoluteAtoms; +}; + +} // namespace lld + +#endif // LLD_CORE_SHARED_LIBRARY_FILE_H diff --git a/include/lld/Core/Simple.h b/include/lld/Core/Simple.h new file mode 100644 index 000000000000..71d0c0702301 --- /dev/null +++ b/include/lld/Core/Simple.h @@ -0,0 +1,341 @@ +//===- lld/Core/Simple.h - Simple implementations of Atom and File --------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief Provide simple implementations for Atoms and File. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLD_CORE_SIMPLE_H +#define LLD_CORE_SIMPLE_H + +#include "lld/Core/DefinedAtom.h" +#include "lld/Core/File.h" +#include "lld/Core/ArchiveLibraryFile.h" +#include "lld/Core/LinkingContext.h" +#include "lld/Core/Reference.h" +#include "lld/Core/UndefinedAtom.h" +#include "llvm/ADT/ilist.h" +#include "llvm/ADT/ilist_node.h" + +namespace lld { + +class SimpleFile : public MutableFile { +public: + SimpleFile(StringRef path) : MutableFile(path) {} + + void addAtom(const Atom &atom) override { + if (auto *defAtom = dyn_cast<DefinedAtom>(&atom)) { + _definedAtoms._atoms.push_back(defAtom); + } else if (auto *undefAtom = dyn_cast<UndefinedAtom>(&atom)) { + _undefinedAtoms._atoms.push_back(undefAtom); + } else if (auto *shlibAtom = dyn_cast<SharedLibraryAtom>(&atom)) { + _sharedLibraryAtoms._atoms.push_back(shlibAtom); + } else if (auto *absAtom = dyn_cast<AbsoluteAtom>(&atom)) { + _absoluteAtoms._atoms.push_back(absAtom); + } else { + llvm_unreachable("atom has unknown definition kind"); + } + } + + void + removeDefinedAtomsIf(std::function<bool(const DefinedAtom *)> pred) override { + auto &atoms = _definedAtoms._atoms; + auto newEnd = std::remove_if(atoms.begin(), atoms.end(), pred); + atoms.erase(newEnd, atoms.end()); + } + + const atom_collection<DefinedAtom> &defined() const override { + return _definedAtoms; + } + + const atom_collection<UndefinedAtom> &undefined() const override { + return _undefinedAtoms; + } + + const atom_collection<SharedLibraryAtom> &sharedLibrary() const override { + return _sharedLibraryAtoms; + } + + const atom_collection<AbsoluteAtom> &absolute() const override { + return _absoluteAtoms; + } + + DefinedAtomRange definedAtoms() override { + return make_range(_definedAtoms._atoms); + } + +private: + atom_collection_vector<DefinedAtom> _definedAtoms; + atom_collection_vector<UndefinedAtom> _undefinedAtoms; + atom_collection_vector<SharedLibraryAtom> _sharedLibraryAtoms; + atom_collection_vector<AbsoluteAtom> _absoluteAtoms; +}; + +/// \brief Archive library file that may be used as a virtual container +/// for symbols that should be added dynamically in response to +/// call to find() method. +class SimpleArchiveLibraryFile : public ArchiveLibraryFile { +public: + SimpleArchiveLibraryFile(StringRef filename) + : ArchiveLibraryFile(filename) {} + + const atom_collection<DefinedAtom> &defined() const override { + return _definedAtoms; + } + + const atom_collection<UndefinedAtom> &undefined() const override { + return _undefinedAtoms; + } + + const atom_collection<SharedLibraryAtom> &sharedLibrary() const override { + return _sharedLibraryAtoms; + } + + const atom_collection<AbsoluteAtom> &absolute() const override { + return _absoluteAtoms; + } + + File *find(StringRef sym, bool dataSymbolOnly) override { + // For descendants: + // do some checks here and return dynamically generated files with atoms. + return nullptr; + } + + std::error_code + parseAllMembers(std::vector<std::unique_ptr<File>> &result) override { + return std::error_code(); + } + +private: + atom_collection_vector<DefinedAtom> _definedAtoms; + atom_collection_vector<UndefinedAtom> _undefinedAtoms; + atom_collection_vector<SharedLibraryAtom> _sharedLibraryAtoms; + atom_collection_vector<AbsoluteAtom> _absoluteAtoms; +}; + +class SimpleReference : public Reference { +public: + SimpleReference(Reference::KindNamespace ns, Reference::KindArch arch, + Reference::KindValue value, uint64_t off, const Atom *t, + Reference::Addend a) + : Reference(ns, arch, value), _target(t), _offsetInAtom(off), _addend(a), + _next(nullptr), _prev(nullptr) { + } + SimpleReference() + : Reference(Reference::KindNamespace::all, Reference::KindArch::all, 0), + _target(nullptr), _offsetInAtom(0), _addend(0), _next(nullptr), + _prev(nullptr) { + } + + uint64_t offsetInAtom() const override { return _offsetInAtom; } + + const Atom *target() const override { + assert(_target); + return _target; + } + + Addend addend() const override { return _addend; } + void setAddend(Addend a) override { _addend = a; } + void setTarget(const Atom *newAtom) override { _target = newAtom; } + SimpleReference *getNext() const { return _next; } + SimpleReference *getPrev() const { return _prev; } + void setNext(SimpleReference *n) { _next = n; } + void setPrev(SimpleReference *p) { _prev = p; } + +private: + const Atom *_target; + uint64_t _offsetInAtom; + Addend _addend; + SimpleReference *_next; + SimpleReference *_prev; +}; + +} + +// ilist will lazily create a sentinal (so end() can return a node past the +// end of the list). We need this trait so that the sentinal is allocated +// via the BumpPtrAllocator. +namespace llvm { +template<> +struct ilist_sentinel_traits<lld::SimpleReference> { + + ilist_sentinel_traits() : _allocator(nullptr) { } + + void setAllocator(llvm::BumpPtrAllocator *alloc) { + _allocator = alloc; + } + + lld::SimpleReference *createSentinel() const { + return new (*_allocator) lld::SimpleReference(); + } + + static void destroySentinel(lld::SimpleReference*) {} + + static lld::SimpleReference *provideInitialHead() { return nullptr; } + + lld::SimpleReference *ensureHead(lld::SimpleReference *&head) const { + if (!head) { + head = createSentinel(); + noteHead(head, head); + ilist_traits<lld::SimpleReference>::setNext(head, nullptr); + return head; + } + return ilist_traits<lld::SimpleReference>::getPrev(head); + } + + void noteHead(lld::SimpleReference *newHead, + lld::SimpleReference *sentinel) const { + ilist_traits<lld::SimpleReference>::setPrev(newHead, sentinel); + } + +private: + mutable llvm::BumpPtrAllocator *_allocator; +}; +} + +namespace lld { + +class SimpleDefinedAtom : public DefinedAtom { +public: + explicit SimpleDefinedAtom(const File &f) : _file(f) { + static uint32_t lastOrdinal = 0; + _ordinal = lastOrdinal++; + _references.setAllocator(&f.allocator()); + } + + const File &file() const override { return _file; } + + StringRef name() const override { return StringRef(); } + + uint64_t ordinal() const override { return _ordinal; } + + Scope scope() const override { return DefinedAtom::scopeLinkageUnit; } + + Interposable interposable() const override { + return DefinedAtom::interposeNo; + } + + Merge merge() const override { return DefinedAtom::mergeNo; } + + Alignment alignment() const override { return Alignment(0, 0); } + + SectionChoice sectionChoice() const override { + return DefinedAtom::sectionBasedOnContent; + } + + StringRef customSectionName() const override { return StringRef(); } + DeadStripKind deadStrip() const override { + return DefinedAtom::deadStripNormal; + } + + DefinedAtom::reference_iterator begin() const override { + const void *it = reinterpret_cast<const void *>(&*_references.begin()); + return reference_iterator(*this, it); + } + + DefinedAtom::reference_iterator end() const override { + const void *it = reinterpret_cast<const void *>(&*_references.end()); + return reference_iterator(*this, it); + } + + const Reference *derefIterator(const void *it) const override { + return reinterpret_cast<const Reference*>(it); + } + + void incrementIterator(const void *&it) const override { + const SimpleReference* node = reinterpret_cast<const SimpleReference*>(it); + const SimpleReference* next = node->getNext(); + it = reinterpret_cast<const void*>(next); + } + + void addReference(Reference::KindNamespace ns, Reference::KindArch arch, + Reference::KindValue kindValue, uint64_t off, + const Atom *target, Reference::Addend a) { + assert(target && "trying to create reference to nothing"); + auto node = new (_file.allocator()) + SimpleReference(ns, arch, kindValue, off, target, a); + _references.push_back(node); + } + + /// Sort references in a canonical order (by offset, then by kind). + void sortReferences() const { + // Cannot sort a linked list, so move elements into a temporary vector, + // sort the vector, then reconstruct the list. + llvm::SmallVector<SimpleReference *, 16> elements; + for (SimpleReference &node : _references) { + elements.push_back(&node); + } + std::sort(elements.begin(), elements.end(), + [] (const SimpleReference *lhs, const SimpleReference *rhs) -> bool { + uint64_t lhsOffset = lhs->offsetInAtom(); + uint64_t rhsOffset = rhs->offsetInAtom(); + if (rhsOffset != lhsOffset) + return (lhsOffset < rhsOffset); + if (rhs->kindNamespace() != lhs->kindNamespace()) + return (lhs->kindNamespace() < rhs->kindNamespace()); + if (rhs->kindArch() != lhs->kindArch()) + return (lhs->kindArch() < rhs->kindArch()); + return (lhs->kindValue() < rhs->kindValue()); + }); + _references.clearAndLeakNodesUnsafely(); + for (SimpleReference *node : elements) { + _references.push_back(node); + } + } + void setOrdinal(uint64_t ord) { _ordinal = ord; } + +private: + typedef llvm::ilist<SimpleReference> RefList; + + const File &_file; + uint64_t _ordinal; + mutable RefList _references; +}; + +class SimpleUndefinedAtom : public UndefinedAtom { +public: + SimpleUndefinedAtom(const File &f, StringRef name) : _file(f), _name(name) { + assert(!name.empty() && "UndefinedAtoms must have a name"); + } + + /// file - returns the File that produced/owns this Atom + const File &file() const override { return _file; } + + /// name - The name of the atom. For a function atom, it is the (mangled) + /// name of the function. + StringRef name() const override { return _name; } + + CanBeNull canBeNull() const override { return UndefinedAtom::canBeNullNever; } + +private: + const File &_file; + StringRef _name; +}; + +class SimpleAbsoluteAtom : public AbsoluteAtom { +public: + SimpleAbsoluteAtom(const File &f, StringRef name, Scope s, uint64_t value) + : _file(f), _name(name), _scope(s), _value(value) {} + + const File &file() const override { return _file; } + StringRef name() const override { return _name; } + uint64_t value() const override { return _value; } + Scope scope() const override { return _scope; } + +private: + const File &_file; + StringRef _name; + Scope _scope; + uint64_t _value; +}; + +} // end namespace lld + +#endif diff --git a/include/lld/Core/SymbolTable.h b/include/lld/Core/SymbolTable.h new file mode 100644 index 000000000000..683ed65e3635 --- /dev/null +++ b/include/lld/Core/SymbolTable.h @@ -0,0 +1,117 @@ +//===- Core/SymbolTable.h - Main Symbol Table -----------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_CORE_SYMBOL_TABLE_H +#define LLD_CORE_SYMBOL_TABLE_H + +#include "lld/Core/LLVM.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/StringExtras.h" +#include <cstring> +#include <map> +#include <vector> + +namespace lld { + +class AbsoluteAtom; +class Atom; +class DefinedAtom; +class LinkingContext; +class ResolverOptions; +class SharedLibraryAtom; +class UndefinedAtom; + +/// \brief The SymbolTable class is responsible for coalescing atoms. +/// +/// All atoms coalescable by-name or by-content should be added. +/// The method replacement() can be used to find the replacement atom +/// if an atom has been coalesced away. +class SymbolTable { +public: + explicit SymbolTable(LinkingContext &); + + /// @brief add atom to symbol table + bool add(const DefinedAtom &); + + /// @brief add atom to symbol table + bool add(const UndefinedAtom &); + + /// @brief add atom to symbol table + bool add(const SharedLibraryAtom &); + + /// @brief add atom to symbol table + bool add(const AbsoluteAtom &); + + /// @brief checks if name is in symbol table and if so atom is not + /// UndefinedAtom + bool isDefined(StringRef sym); + + /// @brief returns atom in symbol table for specified name (or nullptr) + const Atom *findByName(StringRef sym); + + /// @brief returns vector of remaining UndefinedAtoms + std::vector<const UndefinedAtom *> undefines(); + + /// returns vector of tentative definitions + std::vector<StringRef> tentativeDefinitions(); + + /// @brief add atom to replacement table + void addReplacement(const Atom *replaced, const Atom *replacement); + + /// @brief if atom has been coalesced away, return replacement, else return atom + const Atom *replacement(const Atom *); + + /// @brief if atom has been coalesced away, return true + bool isCoalescedAway(const Atom *); + + /// @brief Find a group atom. + const Atom *findGroup(StringRef name); + + /// @brief Add a group atom and returns true/false depending on whether the + /// previously existed. + bool addGroup(const DefinedAtom &da); + +private: + typedef llvm::DenseMap<const Atom *, const Atom *> AtomToAtom; + + struct StringRefMappingInfo { + static StringRef getEmptyKey() { return StringRef(); } + static StringRef getTombstoneKey() { return StringRef(" ", 1); } + static unsigned getHashValue(StringRef const val) { + return llvm::HashString(val); + } + static bool isEqual(StringRef const lhs, StringRef const rhs) { + return lhs.equals(rhs); + } + }; + typedef llvm::DenseMap<StringRef, const Atom *, + StringRefMappingInfo> NameToAtom; + + struct AtomMappingInfo { + static const DefinedAtom * getEmptyKey() { return nullptr; } + static const DefinedAtom * getTombstoneKey() { return (DefinedAtom*)(-1); } + static unsigned getHashValue(const DefinedAtom * const Val); + static bool isEqual(const DefinedAtom * const LHS, + const DefinedAtom * const RHS); + }; + typedef llvm::DenseSet<const DefinedAtom*, AtomMappingInfo> AtomContentSet; + + bool addByName(const Atom &); + bool addByContent(const DefinedAtom &); + + LinkingContext &_context; + AtomToAtom _replacedAtoms; + NameToAtom _nameTable; + NameToAtom _groupTable; + AtomContentSet _contentTable; +}; + +} // namespace lld + +#endif // LLD_CORE_SYMBOL_TABLE_H diff --git a/include/lld/Core/TODO.txt b/include/lld/Core/TODO.txt new file mode 100644 index 000000000000..8888c763ef65 --- /dev/null +++ b/include/lld/Core/TODO.txt @@ -0,0 +1,17 @@ +include/lld/Core +~~~~~~~~~~~~~~~~ + +* The native/yaml reader/writer interfaces should be changed to return + an explanatory string if there is an error. The existing error_code + abstraction only works for returning low level OS errors. It does not + work for describing formatting issues. + +* We need to design a diagnostics interface. It would be nice to share code + with Clang_ where possible. + +* We need to add more attributes to File. In particular, we need cpu + and OS information (like target triples). We should also provide explicit + support for `LLVM IR module flags metadata`__. + +.. __: http://llvm.org/docs/LangRef.html#module_flags +.. _Clang: http://clang.llvm.org/docs/InternalsManual.html#Diagnostics diff --git a/include/lld/Core/UndefinedAtom.h b/include/lld/Core/UndefinedAtom.h new file mode 100644 index 000000000000..7a835a4ebaa8 --- /dev/null +++ b/include/lld/Core/UndefinedAtom.h @@ -0,0 +1,74 @@ +//===- Core/UndefinedAtom.h - An Undefined Atom ---------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_CORE_UNDEFINED_ATOM_H +#define LLD_CORE_UNDEFINED_ATOM_H + +#include "lld/Core/Atom.h" + +namespace lld { + +/// An UndefinedAtom has no content. +/// It exists as a placeholder for a future atom. +class UndefinedAtom : public Atom { +public: + /// Whether this undefined symbol needs to be resolved, + /// or whether it can just evaluate to nullptr. + /// This concept is often called "weak", but that term + /// is overloaded to mean other things too. + enum CanBeNull { + /// Normal symbols must be resolved at build time + canBeNullNever, + + /// This symbol can be missing at runtime and will evalute to nullptr. + /// That is, the static linker still must find a definition (usually + /// is some shared library), but at runtime, the dynamic loader + /// will allow the symbol to be missing and resolved to nullptr. + /// + /// On Darwin this is generated using a function prototype with + /// __attribute__((weak_import)). + /// On linux this is generated using a function prototype with + /// __attribute__((weak)). + /// On Windows this feature is not supported. + canBeNullAtRuntime, + + /// This symbol can be missing at build time. + /// That is, the static linker will not error if a definition for + /// this symbol is not found at build time. Instead, the linker + /// will build an executable that lets the dynamic loader find the + /// symbol at runtime. + /// This feature is not supported on Darwin nor Windows. + /// On linux this is generated using a function prototype with + /// __attribute__((weak)). + canBeNullAtBuildtime + }; + + virtual CanBeNull canBeNull() const = 0; + + static bool classof(const Atom *a) { + return a->definition() == definitionUndefined; + } + + static bool classof(const UndefinedAtom *) { return true; } + + /// Returns an undefined atom if this undefined symbol has a synonym. This is + /// mainly used in COFF. In COFF, an unresolved external symbol can have up to + /// one optional name (sym2) in addition to its regular name (sym1). If a + /// definition of sym1 exists, sym1 is resolved normally. Otherwise, all + /// references to sym1 refer to sym2 instead. In that case sym2 must be + /// resolved, or link will fail. + virtual const UndefinedAtom *fallback() const { return nullptr; } + +protected: + UndefinedAtom() : Atom(definitionUndefined) {} +}; + +} // namespace lld + +#endif // LLD_CORE_UNDEFINED_ATOM_H diff --git a/include/lld/Core/Writer.h b/include/lld/Core/Writer.h new file mode 100644 index 000000000000..94c75d8d019f --- /dev/null +++ b/include/lld/Core/Writer.h @@ -0,0 +1,52 @@ +//===- lld/Core/Writer.h - Abstract File Format Interface -----------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_CORE_WRITER_H +#define LLD_CORE_WRITER_H + +#include "lld/Core/LLVM.h" +#include <memory> +#include <vector> + +namespace lld { +class File; +class ELFLinkingContext; +class MachOLinkingContext; +class PECOFFLinkingContext; +class LinkingContext; +class TargetHandlerBase; + +/// \brief The Writer is an abstract class for writing object files, shared +/// library files, and executable files. Each file format (e.g. ELF, mach-o, +/// PECOFF, native, etc) have a concrete subclass of Writer. +class Writer { +public: + virtual ~Writer(); + + /// \brief Write a file from the supplied File object + virtual std::error_code writeFile(const File &linkedFile, StringRef path) = 0; + + /// \brief This method is called by Core Linking to give the Writer a chance + /// to add file format specific "files" to set of files to be linked. This is + /// how file format specific atoms can be added to the link. + virtual bool createImplicitFiles(std::vector<std::unique_ptr<File> > &); + +protected: + // only concrete subclasses can be instantiated + Writer(); +}; + +std::unique_ptr<Writer> createWriterELF(TargetHandlerBase *handler); +std::unique_ptr<Writer> createWriterMachO(const MachOLinkingContext &); +std::unique_ptr<Writer> createWriterPECOFF(const PECOFFLinkingContext &); +std::unique_ptr<Writer> createWriterNative(); +std::unique_ptr<Writer> createWriterYAML(const LinkingContext &); +} // end namespace lld + +#endif diff --git a/include/lld/Core/range.h b/include/lld/Core/range.h new file mode 100644 index 000000000000..614c9672955c --- /dev/null +++ b/include/lld/Core/range.h @@ -0,0 +1,738 @@ +//===-- lld/Core/range.h - Iterator ranges ----------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief Iterator range type based on c++1y range proposal. +/// +/// See http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2012/n3350.html +/// +//===----------------------------------------------------------------------===// + +#ifndef LLD_CORE_RANGE_H +#define LLD_CORE_RANGE_H + +#include "llvm/Support/Compiler.h" +#include <array> +#include <cassert> +#include <iterator> +#include <string> +#include <type_traits> +#include <utility> +#include <vector> + +namespace lld { +// Nothing in this namespace is part of the exported interface. +namespace detail { +using std::begin; +using std::end; +/// Used as the result type of undefined functions. +struct undefined {}; + +template <typename R> class begin_result { + template <typename T> static auto check(T &&t) -> decltype(begin(t)); + static undefined check(...); +public: + typedef decltype(check(std::declval<R>())) type; +}; + +template <typename R> class end_result { + template <typename T> static auto check(T &&t) -> decltype(end(t)); + static undefined check(...); +public: + typedef decltype(check(std::declval<R>())) type; +}; + +// Things that begin and end work on, in compatible ways, are +// ranges. [stmt.ranged] +template <typename R> +struct is_range : std::is_same<typename detail::begin_result<R>::type, + typename detail::end_result<R>::type> {}; + +// This currently requires specialization and doesn't work for +// detecting \c range<>s or iterators. We should add +// \c contiguous_iterator_tag to fix that. +template <typename R> struct is_contiguous_range : std::false_type {}; +template <typename R> +struct is_contiguous_range<R &> : is_contiguous_range<R> {}; +template <typename R> +struct is_contiguous_range <R &&> : is_contiguous_range<R> {}; +template <typename R> +struct is_contiguous_range<const R> : is_contiguous_range<R> {}; + +template <typename T, size_t N> +struct is_contiguous_range<T[N]> : std::true_type {}; +template <typename T, size_t N> +struct is_contiguous_range<const T[N]> : std::true_type {}; +template <typename T, size_t N> +struct is_contiguous_range<std::array<T, N> > : std::true_type {}; +template <typename charT, typename traits, typename Allocator> +struct is_contiguous_range< + std::basic_string<charT, traits, Allocator> > : std::true_type {}; +template <typename T, typename Allocator> +struct is_contiguous_range<std::vector<T, Allocator> > : std::true_type {}; + +// Removes cv qualifiers from all levels of a multi-level pointer +// type, not just the type level. +template <typename T> struct remove_all_cv_ptr { + typedef T type; +}; +template <typename T> struct remove_all_cv_ptr<T *> { + typedef typename remove_all_cv_ptr<T>::type *type; +}; +template <typename T> struct remove_all_cv_ptr<const T> { + typedef typename remove_all_cv_ptr<T>::type type; +}; +template <typename T> struct remove_all_cv_ptr<volatile T> { + typedef typename remove_all_cv_ptr<T>::type type; +}; +template <typename T> struct remove_all_cv_ptr<const volatile T> { + typedef typename remove_all_cv_ptr<T>::type type; +}; + +template <typename From, typename To> +struct conversion_preserves_array_indexing : std::false_type {}; + +template <typename FromVal, typename ToVal> +struct conversion_preserves_array_indexing<FromVal *, + ToVal *> : std::integral_constant< + bool, std::is_convertible<FromVal *, ToVal *>::value && + std::is_same<typename remove_all_cv_ptr<FromVal>::type, + typename remove_all_cv_ptr<ToVal>::type>::value> {}; + +template <typename T> +LLVM_CONSTEXPR auto adl_begin(T &&t) -> decltype(begin(t)) { + return begin(std::forward<T>(t)); +} + +template <typename T> LLVM_CONSTEXPR auto adl_end(T &&t) -> decltype(end(t)) { + return end(std::forward<T>(t)); +} +} // end namespace detail + +/// A \c std::range<Iterator> represents a half-open iterator range +/// built from two iterators, \c 'begin', and \c 'end'. If \c end is +/// not reachable from \c begin, the behavior is undefined. +/// +/// The mutability of elements of the range is controlled by the +/// Iterator argument. Instantiate +/// <code>range<<var>Foo</var>::iterator></code> or +/// <code>range<<var>T</var>*></code>, or call +/// <code>make_range(<var>non_const_container</var>)</code>, and you +/// get a mutable range. Instantiate +/// <code>range<<var>Foo</var>::const_iterator></code> or +/// <code>range<const <var>T</var>*></code>, or call +/// <code>make_range(<var>const_container</var>)</code>, and you get a +/// constant range. +/// +/// \todo Inherit from std::pair<Iterator, Iterator>? +/// +/// \todo This interface contains some functions that could be +/// provided as free algorithms rather than member functions, and all +/// of the <code>pop_*()</code> functions could be replaced by \c +/// slice() at the cost of some extra iterator copies. This makes +/// them more awkward to use, but makes it easier for users to write +/// their own types that follow the same interface. On the other hand, +/// a \c range_facade could be provided to help users write new +/// ranges, and it could provide the members. Such functions are +/// marked with a note in their documentation. (Of course, all of +/// these member functions could be provided as free functions using +/// the iterator access methods, but one goal here is to allow people +/// to program without touching iterators at all.) +template <typename Iterator> class range { + Iterator begin_, end_; +public: + /// \name types + /// @{ + + /// The iterator category of \c Iterator. + /// \todo Consider defining range categories. If they don't add + /// anything over the corresponding iterator categories, then + /// they're probably not worth defining. + typedef typename std::iterator_traits< + Iterator>::iterator_category iterator_category; + /// The type of elements of the range. Not cv-qualified. + typedef typename std::iterator_traits<Iterator>::value_type value_type; + /// The type of the size of the range and offsets within the range. + typedef typename std::iterator_traits< + Iterator>::difference_type difference_type; + /// The return type of element access methods: \c front(), \c back(), etc. + typedef typename std::iterator_traits<Iterator>::reference reference; + typedef typename std::iterator_traits<Iterator>::pointer pointer; + /// @} + + /// \name constructors + /// @{ + + /// Creates a range of default-constructed (<em>not</em> + /// value-initialized) iterators. For most \c Iterator types, this + /// will be an invalid range. + range() : begin_(), end_() {} + + /// \pre \c end is reachable from \c begin. + /// \post <code>this->begin() == begin && this->end() == end</code> + LLVM_CONSTEXPR range(Iterator begin, Iterator end) + : begin_(begin), end_(end) {} + + /// \par Participates in overload resolution if: + /// - \c Iterator is not a pointer type, + /// - \c begin(r) and \c end(r) return the same type, and + /// - that type is convertible to \c Iterator. + /// + /// \todo std::begin and std::end are overloaded between T& and + /// const T&, which means that if a container has only a non-const + /// begin or end method, then it's ill-formed to pass an rvalue to + /// the free function. To avoid that problem, we don't use + /// std::forward<> here, so begin() and end() are always called with + /// an lvalue. Another option would be to insist that rvalue + /// arguments to range() must have const begin() and end() methods. + template <typename R> LLVM_CONSTEXPR range( + R &&r, + typename std::enable_if< + !std::is_pointer<Iterator>::value && + detail::is_range<R>::value && + std::is_convertible<typename detail::begin_result<R>::type, + Iterator>::value>::type* = 0) + : begin_(detail::adl_begin(r)), end_(detail::adl_end(r)) {} + + /// This constructor creates a \c range<T*> from any range with + /// contiguous iterators. Because dereferencing a past-the-end + /// iterator can be undefined behavior, empty ranges get initialized + /// with \c nullptr rather than \c &*begin(). + /// + /// \par Participates in overload resolution if: + /// - \c Iterator is a pointer type \c T*, + /// - \c begin(r) and \c end(r) return the same type, + /// - elements \c i of that type satisfy the invariant + /// <code>&*(i + N) == (&*i) + N</code>, and + /// - The result of <code>&*begin()</code> is convertible to \c T* + /// using only qualification conversions [conv.qual] (since + /// pointer conversions stop the pointer from pointing to an + /// array element). + /// + /// \todo The <code>&*(i + N) == (&*i) + N</code> invariant is + /// currently impossible to check for user-defined types. We need a + /// \c contiguous_iterator_tag to let users assert it. + template <typename R> LLVM_CONSTEXPR range( + R &&r, + typename std::enable_if< + std::is_pointer<Iterator>::value && + detail::is_contiguous_range<R>::value + // MSVC returns false for this in this context, but not if we lift it out of the + // constructor. +#ifndef _MSC_VER + && detail::conversion_preserves_array_indexing< + decltype(&*detail::adl_begin(r)), Iterator>::value +#endif + >::type* = 0) + : begin_((detail::adl_begin(r) == detail::adl_end(r) && + !std::is_pointer<decltype(detail::adl_begin(r))>::value) + // For non-pointers, &*begin(r) is only defined behavior + // if there's an element there. Otherwise, use nullptr + // since the user can't dereference it anyway. This _is_ + // detectable. + ? nullptr : &*detail::adl_begin(r)), + end_(begin_ + (detail::adl_end(r) - detail::adl_begin(r))) {} + + /// @} + + /// \name iterator access + /// @{ + LLVM_CONSTEXPR Iterator begin() const { return begin_; } + LLVM_CONSTEXPR Iterator end() const { return end_; } + /// @} + + /// \name element access + /// @{ + + /// \par Complexity: + /// O(1) + /// \pre \c !empty() + /// \returns a reference to the element at the front of the range. + LLVM_CONSTEXPR reference front() const { return *begin(); } + + /// \par Ill-formed unless: + /// \c iterator_category is convertible to \c + /// std::bidirectional_iterator_tag. + /// + /// \par Complexity: + /// O(2) (Involves copying and decrementing an iterator, so not + /// quite as cheap as \c front()) + /// + /// \pre \c !empty() + /// \returns a reference to the element at the front of the range. + LLVM_CONSTEXPR reference back() const { + static_assert( + std::is_convertible<iterator_category, + std::bidirectional_iterator_tag>::value, + "Can only retrieve the last element of a bidirectional range."); + using std::prev; + return *prev(end()); + } + + /// This method is drawn from scripting language indexing. It + /// indexes std::forward from the beginning of the range if the argument + /// is positive, or backwards from the end of the array if the + /// argument is negative. + /// + /// \par Ill-formed unless: + /// \c iterator_category is convertible to \c + /// std::random_access_iterator_tag. + /// + /// \par Complexity: + /// O(1) + /// + /// \pre <code>abs(index) < size() || index == -size()</code> + /// + /// \returns if <code>index >= 0</code>, a reference to the + /// <code>index</code>'th element in the range. Otherwise, a + /// reference to the <code>size()+index</code>'th element. + LLVM_CONSTEXPR reference operator[](difference_type index) const { + static_assert(std::is_convertible<iterator_category, + std::random_access_iterator_tag>::value, + "Can only index into a random-access range."); + // Less readable construction for constexpr support. + return index < 0 ? end()[index] + : begin()[index]; + } + /// @} + + /// \name size + /// @{ + + /// \par Complexity: + /// O(1) + /// \returns \c true if the range contains no elements. + LLVM_CONSTEXPR bool empty() const { return begin() == end(); } + + /// \par Ill-formed unless: + /// \c iterator_category is convertible to + /// \c std::forward_iterator_tag. + /// + /// \par Complexity: + /// O(1) if \c iterator_category is convertible to \c + /// std::random_access_iterator_tag. O(<code>size()</code>) + /// otherwise. + /// + /// \returns the number of times \c pop_front() can be called before + /// \c empty() becomes true. + LLVM_CONSTEXPR difference_type size() const { + static_assert(std::is_convertible<iterator_category, + std::forward_iterator_tag>::value, + "Calling size on an input range would destroy the range."); + return dispatch_size(iterator_category()); + } + /// @} + + /// \name traversal from the beginning of the range + /// @{ + + /// Advances the beginning of the range by one element. + /// \pre \c !empty() + void pop_front() { ++begin_; } + + /// Advances the beginning of the range by \c n elements. + /// + /// \par Complexity: + /// O(1) if \c iterator_category is convertible to \c + /// std::random_access_iterator_tag, O(<code>n</code>) otherwise. + /// + /// \pre <code>n >= 0</code>, and there must be at least \c n + /// elements in the range. + void pop_front(difference_type n) { advance(begin_, n); } + + /// Advances the beginning of the range by at most \c n elements, + /// stopping if the range becomes empty. A negative argument causes + /// no change. + /// + /// \par Complexity: + /// O(1) if \c iterator_category is convertible to \c + /// std::random_access_iterator_tag, O(<code>min(n, + /// <var>#-elements-in-range</var>)</code>) otherwise. + /// + /// \note Could be provided as a free function with little-to-no + /// loss in efficiency. + void pop_front_upto(difference_type n) { + advance_upto(begin_, std::max<difference_type>(0, n), end_, + iterator_category()); + } + + /// @} + + /// \name traversal from the end of the range + /// @{ + + /// Moves the end of the range earlier by one element. + /// + /// \par Ill-formed unless: + /// \c iterator_category is convertible to + /// \c std::bidirectional_iterator_tag. + /// + /// \par Complexity: + /// O(1) + /// + /// \pre \c !empty() + void pop_back() { + static_assert(std::is_convertible<iterator_category, + std::bidirectional_iterator_tag>::value, + "Can only access the end of a bidirectional range."); + --end_; + } + + /// Moves the end of the range earlier by \c n elements. + /// + /// \par Ill-formed unless: + /// \c iterator_category is convertible to + /// \c std::bidirectional_iterator_tag. + /// + /// \par Complexity: + /// O(1) if \c iterator_category is convertible to \c + /// std::random_access_iterator_tag, O(<code>n</code>) otherwise. + /// + /// \pre <code>n >= 0</code>, and there must be at least \c n + /// elements in the range. + void pop_back(difference_type n) { + static_assert(std::is_convertible<iterator_category, + std::bidirectional_iterator_tag>::value, + "Can only access the end of a bidirectional range."); + advance(end_, -n); + } + + /// Moves the end of the range earlier by <code>min(n, + /// size())</code> elements. A negative argument causes no change. + /// + /// \par Ill-formed unless: + /// \c iterator_category is convertible to + /// \c std::bidirectional_iterator_tag. + /// + /// \par Complexity: + /// O(1) if \c iterator_category is convertible to \c + /// std::random_access_iterator_tag, O(<code>min(n, + /// <var>#-elements-in-range</var>)</code>) otherwise. + /// + /// \note Could be provided as a free function with little-to-no + /// loss in efficiency. + void pop_back_upto(difference_type n) { + static_assert(std::is_convertible<iterator_category, + std::bidirectional_iterator_tag>::value, + "Can only access the end of a bidirectional range."); + advance_upto(end_, -std::max<difference_type>(0, n), begin_, + iterator_category()); + } + + /// @} + + /// \name creating derived ranges + /// @{ + + /// Divides the range into two pieces at \c index, where a positive + /// \c index represents an offset from the beginning of the range + /// and a negative \c index represents an offset from the end. + /// <code>range[index]</code> is the first element in the second + /// piece. If <code>index >= size()</code>, the second piece + /// will be empty. If <code>index < -size()</code>, the first + /// piece will be empty. + /// + /// \par Ill-formed unless: + /// \c iterator_category is convertible to + /// \c std::forward_iterator_tag. + /// + /// \par Complexity: + /// - If \c iterator_category is convertible to \c + /// std::random_access_iterator_tag: O(1) + /// - Otherwise, if \c iterator_category is convertible to \c + /// std::bidirectional_iterator_tag, \c abs(index) iterator increments + /// or decrements + /// - Otherwise, if <code>index >= 0</code>, \c index iterator + /// increments + /// - Otherwise, <code>size() + (size() + index)</code> + /// iterator increments. + /// + /// \returns a pair of adjacent ranges. + /// + /// \post + /// - <code>result.first.size() == min(index, this->size())</code> + /// - <code>result.first.end() == result.second.begin()</code> + /// - <code>result.first.size() + result.second.size()</code> <code>== + /// this->size()</code> + /// + /// \todo split() could take an arbitrary number of indices and + /// return an <code>N+1</code>-element \c tuple<>. This is tricky to + /// implement with negative indices in the optimal number of + /// increments or decrements for a bidirectional iterator, but it + /// should be possible. Do we want it? + std::pair<range, range> split(difference_type index) const { + static_assert( + std::is_convertible<iterator_category, + std::forward_iterator_tag>::value, + "Calling split on a non-std::forward range would return a useless " + "first result."); + if (index >= 0) { + range second = *this; + second.pop_front_upto(index); + return make_pair(range(begin(), second.begin()), second); + } else { + return dispatch_split_neg(index, iterator_category()); + } + } + + /// \returns A sub-range from \c start to \c stop (not including \c + /// stop, as usual). \c start and \c stop are interpreted as for + /// <code>operator[]</code>, with negative values offsetting from + /// the end of the range. Omitting the \c stop argument makes the + /// sub-range continue to the end of the original range. Positive + /// arguments saturate to the end of the range, and negative + /// arguments saturate to the beginning. If \c stop is before \c + /// start, returns an empty range beginning and ending at \c start. + /// + /// \par Ill-formed unless: + /// \c iterator_category is convertible to + /// \c std::forward_iterator_tag. + /// + /// \par Complexity: + /// - If \c iterator_category is convertible to \c + /// std::random_access_iterator_tag: O(1) + /// - Otherwise, if \c iterator_category is convertible to \c + /// std::bidirectional_iterator_tag, at most <code>min(abs(start), + /// size()) + min(abs(stop), size())</code> iterator + /// increments or decrements + /// - Otherwise, if <code>start >= 0 && stop >= 0</code>, + /// <code>max(start, stop)</code> iterator increments + /// - Otherwise, <code>size() + max(start', stop')</code> + /// iterator increments, where \c start' and \c stop' are the + /// offsets of the elements \c start and \c stop refer to. + /// + /// \note \c slice(start) should be implemented with a different + /// overload, rather than defaulting \c stop to + /// <code>numeric_limits<difference_type>::max()</code>, because + /// using a default would force non-random-access ranges to use an + /// O(<code>size()</code>) algorithm to compute the end rather + /// than the O(1) they're capable of. + range slice(difference_type start, difference_type stop) const { + static_assert( + std::is_convertible<iterator_category, + std::forward_iterator_tag>::value, + "Calling slice on a non-std::forward range would destroy the original " + "range."); + return dispatch_slice(start, stop, iterator_category()); + } + + range slice(difference_type start) const { + static_assert( + std::is_convertible<iterator_category, + std::forward_iterator_tag>::value, + "Calling slice on a non-std::forward range would destroy the original " + "range."); + return split(start).second; + } + + /// @} + +private: + // advance_upto: should be added to <algorithm>, but I'll use it as + // a helper function here. + // + // These return the number of increments that weren't applied + // because we ran into 'limit' (or 0 if we didn't run into limit). + static difference_type advance_upto(Iterator &it, difference_type n, + Iterator limit, std::input_iterator_tag) { + if (n < 0) + return 0; + while (it != limit && n > 0) { + ++it; + --n; + } + return n; + } + + static difference_type advance_upto(Iterator &it, difference_type n, + Iterator limit, + std::bidirectional_iterator_tag) { + if (n < 0) { + while (it != limit && n < 0) { + --it; + ++n; + } + } else { + while (it != limit && n > 0) { + ++it; + --n; + } + } + return n; + } + + static difference_type advance_upto(Iterator &it, difference_type n, + Iterator limit, + std::random_access_iterator_tag) { + difference_type distance = limit - it; + if (distance < 0) + assert(n <= 0); + else if (distance > 0) + assert(n >= 0); + + if (abs(distance) > abs(n)) { + it += n; + return 0; + } else { + it = limit; + return n - distance; + } + } + + // Dispatch functions. + difference_type dispatch_size(std::forward_iterator_tag) const { + return std::distance(begin(), end()); + } + + LLVM_CONSTEXPR difference_type dispatch_size( + std::random_access_iterator_tag) const { + return end() - begin(); + } + + std::pair<range, range> dispatch_split_neg(difference_type index, + std::forward_iterator_tag) const { + assert(index < 0); + difference_type size = this->size(); + return split(std::max<difference_type>(0, size + index)); + } + + std::pair<range, range> dispatch_split_neg( + difference_type index, std::bidirectional_iterator_tag) const { + assert(index < 0); + range first = *this; + first.pop_back_upto(-index); + return make_pair(first, range(first.end(), end())); + } + + range dispatch_slice(difference_type start, difference_type stop, + std::forward_iterator_tag) const { + if (start < 0 || stop < 0) { + difference_type size = this->size(); + if (start < 0) + start = std::max<difference_type>(0, size + start); + if (stop < 0) + stop = size + stop; // Possibly negative; will be fixed in 2 lines. + } + stop = std::max<difference_type>(start, stop); + + Iterator first = begin(); + advance_upto(first, start, end(), iterator_category()); + Iterator last = first; + advance_upto(last, stop - start, end(), iterator_category()); + return range(first, last); + } + + range dispatch_slice(const difference_type start, const difference_type stop, + std::bidirectional_iterator_tag) const { + Iterator first; + if (start < 0) { + first = end(); + advance_upto(first, start, begin(), iterator_category()); + } else { + first = begin(); + advance_upto(first, start, end(), iterator_category()); + } + Iterator last; + if (stop < 0) { + last = end(); + advance_upto(last, stop, first, iterator_category()); + } else { + if (start >= 0) { + last = first; + if (stop > start) + advance_upto(last, stop - start, end(), iterator_category()); + } else { + // Complicated: 'start' walked from the end of the sequence, + // but 'stop' needs to walk from the beginning. + Iterator dummy = begin(); + // Walk up to 'stop' increments from begin(), stopping when we + // get to 'first', and capturing the remaining number of + // increments. + difference_type increments_past_start = + advance_upto(dummy, stop, first, iterator_category()); + if (increments_past_start == 0) { + // If this is 0, then stop was before start. + last = first; + } else { + // Otherwise, count that many spaces beyond first. + last = first; + advance_upto(last, increments_past_start, end(), iterator_category()); + } + } + } + return range(first, last); + } + + range dispatch_slice(difference_type start, difference_type stop, + std::random_access_iterator_tag) const { + const difference_type size = this->size(); + if (start < 0) + start = size + start; + if (start < 0) + start = 0; + if (start > size) + start = size; + + if (stop < 0) + stop = size + stop; + if (stop < start) + stop = start; + if (stop > size) + stop = size; + + return range(begin() + start, begin() + stop); + } +}; + +/// \name deducing constructor wrappers +/// \relates std::range +/// \xmlonly <nonmember/> \endxmlonly +/// +/// These functions do the same thing as the constructor with the same +/// signature. They just allow users to avoid writing the iterator +/// type. +/// @{ + +/// \todo I'd like to define a \c make_range taking a single iterator +/// argument representing the beginning of a range that ends with a +/// default-constructed \c Iterator. This would help with using +/// iterators like \c istream_iterator. However, using just \c +/// make_range() could be confusing and lead to people writing +/// incorrect ranges of more common iterators. Is there a better name? +template <typename Iterator> +LLVM_CONSTEXPR range<Iterator> make_range(Iterator begin, Iterator end) { + return range<Iterator>(begin, end); +} + +/// \par Participates in overload resolution if: +/// \c begin(r) and \c end(r) return the same type. +template <typename Range> LLVM_CONSTEXPR auto make_range( + Range &&r, + typename std::enable_if<detail::is_range<Range>::value>::type* = 0) + -> range<decltype(detail::adl_begin(r))> { + return range<decltype(detail::adl_begin(r))>(r); +} + +/// \par Participates in overload resolution if: +/// - \c begin(r) and \c end(r) return the same type, +/// - that type satisfies the invariant that <code>&*(i + N) == +/// (&*i) + N</code>, and +/// - \c &*begin(r) has a pointer type. +template <typename Range> LLVM_CONSTEXPR auto make_ptr_range( + Range &&r, + typename std::enable_if< + detail::is_contiguous_range<Range>::value && + std::is_pointer<decltype(&*detail::adl_begin(r))>::value>::type* = 0) + -> range<decltype(&*detail::adl_begin(r))> { + return range<decltype(&*detail::adl_begin(r))>(r); +} +/// @} +} // end namespace lld + +#endif diff --git a/include/lld/Driver/Driver.h b/include/lld/Driver/Driver.h new file mode 100644 index 000000000000..300d2356d050 --- /dev/null +++ b/include/lld/Driver/Driver.h @@ -0,0 +1,162 @@ +//===- lld/Driver/Driver.h - Linker Driver Emulator -----------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// +/// Interface for Drivers which convert command line arguments into +/// LinkingContext objects, then perform the link. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLD_DRIVER_DRIVER_H +#define LLD_DRIVER_DRIVER_H + +#include "lld/Core/LLVM.h" +#include "lld/Core/Node.h" +#include "llvm/ADT/Triple.h" +#include "llvm/Support/raw_ostream.h" +#include <memory> +#include <set> +#include <vector> + +namespace lld { +class LinkingContext; +class CoreLinkingContext; +class MachOLinkingContext; +class PECOFFLinkingContext; +class ELFLinkingContext; + +typedef std::vector<std::unique_ptr<File>> FileVector; + +FileVector makeErrorFile(StringRef path, std::error_code ec); +FileVector parseMemberFiles(FileVector &files); +FileVector loadFile(LinkingContext &ctx, StringRef path, bool wholeArchive); + +/// Base class for all Drivers. +class Driver { +protected: + + /// Performs link using specified options + static bool link(LinkingContext &context, + raw_ostream &diag = llvm::errs()); + +private: + Driver() = delete; +}; + +/// Driver for "universal" lld tool which can mimic any linker command line +/// parsing once it figures out which command line flavor to use. +class UniversalDriver : public Driver { +public: + /// Determine flavor and pass control to Driver for that flavor. + static bool link(int argc, const char *argv[], + raw_ostream &diag = llvm::errs()); + +private: + UniversalDriver() = delete; +}; + +/// Driver for gnu/binutil 'ld' command line options. +class GnuLdDriver : public Driver { +public: + /// Parses command line arguments same as gnu/binutils ld and performs link. + /// Returns true iff an error occurred. + static bool linkELF(int argc, const char *argv[], + raw_ostream &diag = llvm::errs()); + + /// Uses gnu/binutils style ld command line options to fill in options struct. + /// Returns true iff there was an error. + static bool parse(int argc, const char *argv[], + std::unique_ptr<ELFLinkingContext> &context, + raw_ostream &diag = llvm::errs()); + + /// Parses a given memory buffer as a linker script and evaluate that. + /// Public function for testing. + static std::error_code evalLinkerScript(ELFLinkingContext &ctx, + std::unique_ptr<MemoryBuffer> mb, + raw_ostream &diag, bool nostdlib); + + /// A factory method to create an instance of ELFLinkingContext. + static std::unique_ptr<ELFLinkingContext> + createELFLinkingContext(llvm::Triple triple); + +private: + static llvm::Triple getDefaultTarget(const char *progName); + static bool applyEmulation(llvm::Triple &triple, + llvm::opt::InputArgList &args, + raw_ostream &diag); + static void addPlatformSearchDirs(ELFLinkingContext &ctx, + llvm::Triple &triple, + llvm::Triple &baseTriple); + + GnuLdDriver() = delete; +}; + +/// Driver for darwin/ld64 'ld' command line options. +class DarwinLdDriver : public Driver { +public: + /// Parses command line arguments same as darwin's ld and performs link. + /// Returns true iff there was an error. + static bool linkMachO(int argc, const char *argv[], + raw_ostream &diag = llvm::errs()); + + /// Uses darwin style ld command line options to update LinkingContext object. + /// Returns true iff there was an error. + static bool parse(int argc, const char *argv[], MachOLinkingContext &info, + raw_ostream &diag = llvm::errs()); + +private: + DarwinLdDriver() = delete; +}; + +/// Driver for Windows 'link.exe' command line options +class WinLinkDriver : public Driver { +public: + /// Parses command line arguments same as Windows link.exe and performs link. + /// Returns true iff there was an error. + static bool linkPECOFF(int argc, const char *argv[], + raw_ostream &diag = llvm::errs()); + + /// Uses Windows style link command line options to fill in options struct. + /// Returns true iff there was an error. + static bool parse(int argc, const char *argv[], PECOFFLinkingContext &info, + raw_ostream &diag = llvm::errs(), + bool isDirective = false); + + // Same as parse(), but restricted to the context of directives. + static bool parseDirectives(int argc, const char *argv[], + PECOFFLinkingContext &info, + raw_ostream &diag = llvm::errs()) { + return parse(argc, argv, info, diag, true); + } + +private: + WinLinkDriver() = delete; +}; + +/// Driver for lld unit tests +class CoreDriver : public Driver { +public: + /// Parses command line arguments same as lld-core and performs link. + /// Returns true iff there was an error. + static bool link(int argc, const char *argv[], + raw_ostream &diag = llvm::errs()); + + /// Uses lld-core command line options to fill in options struct. + /// Returns true iff there was an error. + static bool parse(int argc, const char *argv[], CoreLinkingContext &info, + raw_ostream &diag = llvm::errs()); + +private: + CoreDriver() = delete; +}; + +} // end namespace lld + +#endif diff --git a/include/lld/Driver/WinLinkModuleDef.h b/include/lld/Driver/WinLinkModuleDef.h new file mode 100644 index 000000000000..68c9a4bfef70 --- /dev/null +++ b/include/lld/Driver/WinLinkModuleDef.h @@ -0,0 +1,200 @@ +//===- lld/Driver/WinLinkModuleDef.h --------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief Windows module definition file parser. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLD_DRIVER_WIN_LINK_MODULE_DEF_H +#define LLD_DRIVER_WIN_LINK_MODULE_DEF_H + +#include "lld/Core/LLVM.h" +#include "lld/ReaderWriter/PECOFFLinkingContext.h" +#include "llvm/ADT/Optional.h" +#include "llvm/Support/Allocator.h" +#include <vector> + +namespace lld { +namespace moduledef { + +enum class Kind { + unknown, + eof, + identifier, + comma, + equal, + kw_base, + kw_data, + kw_exports, + kw_heapsize, + kw_library, + kw_name, + kw_noname, + kw_private, + kw_stacksize, + kw_version, +}; + +class Token { +public: + Token() : _kind(Kind::unknown) {} + Token(Kind kind, StringRef range) : _kind(kind), _range(range) {} + + Kind _kind; + StringRef _range; +}; + +class Lexer { +public: + explicit Lexer(std::unique_ptr<MemoryBuffer> mb) : _buffer(mb->getBuffer()) { + _sourceManager.AddNewSourceBuffer(std::move(mb), llvm::SMLoc()); + } + + Token lex(); + const llvm::SourceMgr &getSourceMgr() const { return _sourceManager; } + +private: + StringRef _buffer; + llvm::SourceMgr _sourceManager; +}; + +class Directive { +public: + enum class Kind { exports, heapsize, library, name, stacksize, version }; + + Kind getKind() const { return _kind; } + virtual ~Directive() {} + +protected: + explicit Directive(Kind k) : _kind(k) {} + +private: + Kind _kind; +}; + +class Exports : public Directive { +public: + explicit Exports(const std::vector<PECOFFLinkingContext::ExportDesc> &exports) + : Directive(Kind::exports), _exports(exports) {} + + static bool classof(const Directive *dir) { + return dir->getKind() == Kind::exports; + } + + const std::vector<PECOFFLinkingContext::ExportDesc> &getExports() const { + return _exports; + } + +private: + const std::vector<PECOFFLinkingContext::ExportDesc> _exports; +}; + +template <Directive::Kind kind> +class MemorySize : public Directive { +public: + MemorySize(uint64_t reserve, uint64_t commit) + : Directive(kind), _reserve(reserve), _commit(commit) {} + + static bool classof(const Directive *dir) { + return dir->getKind() == kind; + } + + uint64_t getReserve() const { return _reserve; } + uint64_t getCommit() const { return _commit; } + +private: + const uint64_t _reserve; + const uint64_t _commit; +}; + +typedef MemorySize<Directive::Kind::heapsize> Heapsize; +typedef MemorySize<Directive::Kind::stacksize> Stacksize; + +class Name : public Directive { +public: + Name(StringRef outputPath, uint64_t baseaddr) + : Directive(Kind::name), _outputPath(outputPath), _baseaddr(baseaddr) {} + + static bool classof(const Directive *dir) { + return dir->getKind() == Kind::name; + } + + StringRef getOutputPath() const { return _outputPath; } + uint64_t getBaseAddress() const { return _baseaddr; } + +private: + const std::string _outputPath; + const uint64_t _baseaddr; +}; + +class Library : public Directive { +public: + Library(StringRef name, uint64_t baseaddr) + : Directive(Kind::library), _name(name), _baseaddr(baseaddr) {} + + static bool classof(const Directive *dir) { + return dir->getKind() == Kind::library; + } + + StringRef getName() const { return _name; } + uint64_t getBaseAddress() const { return _baseaddr; } + +private: + const std::string _name; + const uint64_t _baseaddr; +}; + +class Version : public Directive { +public: + Version(int major, int minor) + : Directive(Kind::version), _major(major), _minor(minor) {} + + static bool classof(const Directive *dir) { + return dir->getKind() == Kind::version; + } + + int getMajorVersion() const { return _major; } + int getMinorVersion() const { return _minor; } + +private: + const int _major; + const int _minor; +}; + +class Parser { +public: + Parser(Lexer &lex, llvm::BumpPtrAllocator &alloc) + : _lex(lex), _alloc(alloc) {} + + bool parse(std::vector<Directive *> &ret); + +private: + void consumeToken(); + bool consumeTokenAsInt(uint64_t &result); + bool expectAndConsume(Kind kind, Twine msg); + + void ungetToken(); + void error(const Token &tok, Twine msg); + + bool parseOne(Directive *&dir); + bool parseExport(PECOFFLinkingContext::ExportDesc &result); + bool parseMemorySize(uint64_t &reserve, uint64_t &commit); + bool parseName(std::string &outfile, uint64_t &baseaddr); + bool parseVersion(int &major, int &minor); + + Lexer &_lex; + llvm::BumpPtrAllocator &_alloc; + Token _tok; + std::vector<Token> _tokBuf; +}; +} +} + +#endif diff --git a/include/lld/Makefile b/include/lld/Makefile new file mode 100644 index 000000000000..5bfb8910313e --- /dev/null +++ b/include/lld/Makefile @@ -0,0 +1,44 @@ +LLD_LEVEL := ../.. +DIRS := Config + +include $(LLD_LEVEL)/Makefile + +install-local:: + $(Echo) Installing lld include files + $(Verb) $(MKDIR) $(DESTDIR)$(PROJ_includedir) + $(Verb) if test -d "$(PROJ_SRC_DIR)" ; then \ + cd $(PROJ_SRC_DIR)/.. && \ + for hdr in `find lld -type f \ + '(' -name LICENSE.TXT \ + -o -name '*.def' \ + -o -name '*.h' \ + -o -name '*.inc' \ + ')' -print \ + | grep -v CVS | grep -v .svn | grep -v .dir` ; do \ + instdir=$(DESTDIR)`dirname "$(PROJ_includedir)/$$hdr"` ; \ + if test \! -d "$$instdir" ; then \ + $(EchoCmd) Making install directory $$instdir ; \ + $(MKDIR) $$instdir ;\ + fi ; \ + $(DataInstall) $$hdr $(DESTDIR)$(PROJ_includedir)/$$hdr ; \ + done ; \ + fi +ifneq ($(PROJ_SRC_ROOT),$(PROJ_OBJ_ROOT)) + $(Verb) if test -d "$(PROJ_OBJ_ROOT)/tools/lld/include/lld" ; then \ + cd $(PROJ_OBJ_ROOT)/tools/lld/include && \ + for hdr in `find lld -type f \ + '(' -name LICENSE.TXT \ + -o -name '*.def' \ + -o -name '*.h' \ + -o -name '*.inc' \ + ')' -print \ + | grep -v CVS | grep -v .tmp | grep -v .dir` ; do \ + instdir=$(DESTDIR)`dirname "$(PROJ_includedir)/$$hdr"` ; \ + if test \! -d "$$instdir" ; then \ + $(EchoCmd) Making install directory $$instdir ; \ + $(MKDIR) $$instdir ;\ + fi ; \ + $(DataInstall) $$hdr $(DESTDIR)$(PROJ_includedir)/$$hdr ; \ + done ; \ + fi +endif diff --git a/include/lld/ReaderWriter/AtomLayout.h b/include/lld/ReaderWriter/AtomLayout.h new file mode 100644 index 000000000000..ad4cd0607b88 --- /dev/null +++ b/include/lld/ReaderWriter/AtomLayout.h @@ -0,0 +1,39 @@ +//===- include/lld/ReaderWriter/AtomLayout.h ------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_READER_WRITER_ATOM_LAYOUT_H +#define LLD_READER_WRITER_ATOM_LAYOUT_H + +namespace lld { +class Atom; + +/// AtomLayouts are used by a writer to manage physical positions of atoms. +/// AtomLayout has two positions; one is file offset, and the other is the +/// address when loaded into memory. +/// +/// Construction of AtomLayouts is usually a multi-pass process. When an atom +/// is appended to a section, we don't know the starting address of the +/// section. Thus, we have no choice but to store the offset from the +/// beginning of the section as AtomLayout values. After all sections starting +/// address are fixed, AtomLayout is revisited to get the offsets updated by +/// adding the starting addresses of the section. +struct AtomLayout { + AtomLayout(const Atom *a, uint64_t fileOff, uint64_t virAddr) + : _atom(a), _fileOffset(fileOff), _virtualAddr(virAddr) {} + + AtomLayout() : _atom(nullptr), _fileOffset(0), _virtualAddr(0) {} + + const Atom *_atom; + uint64_t _fileOffset; + uint64_t _virtualAddr; +}; + +} + +#endif diff --git a/include/lld/ReaderWriter/CoreLinkingContext.h b/include/lld/ReaderWriter/CoreLinkingContext.h new file mode 100644 index 000000000000..d597ca46ddc7 --- /dev/null +++ b/include/lld/ReaderWriter/CoreLinkingContext.h @@ -0,0 +1,47 @@ +//===- lld/ReaderWriter/CoreLinkingContext.h ------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_READER_WRITER_CORE_LINKER_CONTEXT_H +#define LLD_READER_WRITER_CORE_LINKER_CONTEXT_H + +#include "lld/Core/LinkingContext.h" +#include "lld/Core/Reader.h" +#include "lld/Core/Writer.h" +#include "llvm/Support/ErrorHandling.h" + +namespace lld { + +class CoreLinkingContext : public LinkingContext { +public: + CoreLinkingContext(); + + enum { + TEST_RELOC_CALL32 = 1, + TEST_RELOC_PCREL32 = 2, + TEST_RELOC_GOT_LOAD32 = 3, + TEST_RELOC_GOT_USE32 = 4, + TEST_RELOC_LEA32_WAS_GOT = 5, + }; + + bool validateImpl(raw_ostream &diagnostics) override; + void addPasses(PassManager &pm) override; + + void addPassNamed(StringRef name) { _passNames.push_back(name); } + +protected: + Writer &writer() const override; + +private: + std::unique_ptr<Writer> _writer; + std::vector<StringRef> _passNames; +}; + +} // end namespace lld + +#endif diff --git a/include/lld/ReaderWriter/ELFLinkingContext.h b/include/lld/ReaderWriter/ELFLinkingContext.h new file mode 100644 index 000000000000..d1cd3d9f3d6b --- /dev/null +++ b/include/lld/ReaderWriter/ELFLinkingContext.h @@ -0,0 +1,362 @@ +//===- lld/ReaderWriter/ELFLinkingContext.h -------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_READER_WRITER_ELF_LINKER_CONTEXT_H +#define LLD_READER_WRITER_ELF_LINKER_CONTEXT_H + +#include "lld/Core/LinkingContext.h" +#include "lld/Core/Pass.h" +#include "lld/Core/PassManager.h" +#include "lld/Core/STDExtras.h" +#include "lld/Core/range.h" +#include "lld/Core/Reader.h" +#include "lld/Core/Writer.h" +#include "lld/ReaderWriter/LinkerScript.h" +#include "llvm/ADT/StringSet.h" +#include "llvm/ADT/Triple.h" +#include "llvm/Object/ELF.h" +#include "llvm/Support/ELF.h" +#include <map> +#include <memory> +#include <set> + +namespace lld { +class DefinedAtom; +class Reference; +class File; + +namespace elf { +template <typename ELFT> class TargetHandler; +} + +class TargetHandlerBase { +public: + virtual ~TargetHandlerBase() {} + virtual void registerRelocationNames(Registry &) = 0; + + virtual std::unique_ptr<Reader> getObjReader() = 0; + + virtual std::unique_ptr<Reader> getDSOReader() = 0; + + virtual std::unique_ptr<Writer> getWriter() = 0; +}; + +class ELFLinkingContext : public LinkingContext { +public: + /// \brief The type of ELF executable that the linker + /// creates. + enum class OutputMagic : uint8_t { + DEFAULT, // The default mode, no specific magic set + NMAGIC, // Disallow shared libraries and don't align sections + // PageAlign Data, Mark Text Segment/Data segment RW + OMAGIC // Disallow shared libraries and don't align sections, + // Mark Text Segment/Data segment RW + }; + + llvm::Triple getTriple() const { return _triple; } + + // Page size. + virtual uint64_t getPageSize() const { + if (_maxPageSize) + return *_maxPageSize; + return 0x1000; + } + virtual void setMaxPageSize(uint64_t pagesize) { + _maxPageSize = pagesize; + } + OutputMagic getOutputMagic() const { return _outputMagic; } + uint16_t getOutputELFType() const { return _outputELFType; } + uint16_t getOutputMachine() const; + bool mergeCommonStrings() const { return _mergeCommonStrings; } + virtual uint64_t getBaseAddress() const { return _baseAddress; } + virtual void setBaseAddress(uint64_t address) { _baseAddress = address; } + + void notifySymbolTableCoalesce(const Atom *existingAtom, const Atom *newAtom, + bool &useNew) override; + + /// This controls if undefined atoms need to be created for undefines that are + /// present in a SharedLibrary. If this option is set, undefined atoms are + /// created for every undefined symbol that are present in the dynamic table + /// in the shared library + bool useShlibUndefines() const { return _useShlibUndefines; } + /// @} + + /// \brief Does this relocation belong in the dynamic relocation table? + /// + /// This table is evaluated at loadtime by the dynamic loader and is + /// referenced by the DT_RELA{,ENT,SZ} entries in the dynamic table. + /// Relocations that return true will be added to the dynamic relocation + /// table. + virtual bool isDynamicRelocation(const Reference &) const { return false; } + + /// \brief Is this a copy relocation? + /// + /// If this is a copy relocation, its target must be an ObjectAtom. We must + /// include in DT_NEEDED the name of the library where this object came from. + virtual bool isCopyRelocation(const Reference &) const { + return false; + } + + bool validateImpl(raw_ostream &diagnostics) override; + + /// \brief Does the linker allow dynamic libraries to be linked with? + /// This is true when the output mode of the executable is set to be + /// having NMAGIC/OMAGIC + virtual bool allowLinkWithDynamicLibraries() const { + if (_outputMagic == OutputMagic::NMAGIC || + _outputMagic == OutputMagic::OMAGIC || _noAllowDynamicLibraries) + return false; + return true; + } + + /// \brief Use Elf_Rela format to output relocation tables. + virtual bool isRelaOutputFormat() const { return true; } + + /// \brief Does this relocation belong in the dynamic plt relocation table? + /// + /// This table holds all of the relocations used for delayed symbol binding. + /// It will be evaluated at load time if LD_BIND_NOW is set. It is referenced + /// by the DT_{JMPREL,PLTRELSZ} entries in the dynamic table. + /// Relocations that return true will be added to the dynamic plt relocation + /// table. + virtual bool isPLTRelocation(const Reference &) const { return false; } + + /// \brief The path to the dynamic interpreter + virtual StringRef getDefaultInterpreter() const { + return "/lib64/ld-linux-x86-64.so.2"; + } + + /// \brief The dynamic linker path set by the --dynamic-linker option + virtual StringRef getInterpreter() const { + if (_dynamicLinkerArg) + return _dynamicLinkerPath; + return getDefaultInterpreter(); + } + + /// \brief Does the output have dynamic sections. + virtual bool isDynamic() const; + + /// \brief Are we creating a shared library? + virtual bool isDynamicLibrary() const { + return _outputELFType == llvm::ELF::ET_DYN; + } + + /// \brief Is the relocation a relative relocation + virtual bool isRelativeReloc(const Reference &r) const; + + template <typename ELFT> + lld::elf::TargetHandler<ELFT> &getTargetHandler() const { + assert(_targetHandler && "Got null TargetHandler!"); + return static_cast<lld::elf::TargetHandler<ELFT> &>(*_targetHandler.get()); + } + + TargetHandlerBase *targetHandler() const { return _targetHandler.get(); } + void addPasses(PassManager &pm) override; + + void setTriple(llvm::Triple trip) { _triple = trip; } + void setNoInhibitExec(bool v) { _noInhibitExec = v; } + void setExportDynamic(bool v) { _exportDynamic = v; } + void setIsStaticExecutable(bool v) { _isStaticExecutable = v; } + void setMergeCommonStrings(bool v) { _mergeCommonStrings = v; } + void setUseShlibUndefines(bool use) { _useShlibUndefines = use; } + void setOutputELFType(uint32_t type) { _outputELFType = type; } + + bool shouldExportDynamic() const { return _exportDynamic; } + + void createInternalFiles(std::vector<std::unique_ptr<File>> &) const override; + + void finalizeInputFiles() override; + + /// \brief Set the dynamic linker path + void setInterpreter(StringRef dynamicLinker) { + _dynamicLinkerArg = true; + _dynamicLinkerPath = dynamicLinker; + } + + /// \brief Set NMAGIC output kind when the linker specifies --nmagic + /// or -n in the command line + /// Set OMAGIC output kind when the linker specifies --omagic + /// or -N in the command line + virtual void setOutputMagic(OutputMagic magic) { _outputMagic = magic; } + + /// \brief Disallow dynamic libraries during linking + virtual void setNoAllowDynamicLibraries() { _noAllowDynamicLibraries = true; } + + /// Searches directories for a match on the input File + ErrorOr<StringRef> searchLibrary(StringRef libName) const; + + /// \brief Searches directories for a match on the input file. + /// If \p fileName is an absolute path and \p isSysRooted is true, check + /// the file under sysroot directory. If \p fileName is a relative path + /// and is not in the current directory, search the file through library + /// search directories. + ErrorOr<StringRef> searchFile(StringRef fileName, bool isSysRooted) const; + + /// Get the entry symbol name + StringRef entrySymbolName() const override; + + /// \brief Set new initializer function + void setInitFunction(StringRef name) { _initFunction = name; } + + /// \brief Return an initializer function name. + /// Either default "_init" or configured by the -init command line option. + StringRef initFunction() const { return _initFunction; } + + /// \brief Set new finalizer function + void setFiniFunction(StringRef name) { _finiFunction = name; } + + /// \brief Return a finalizer function name. + /// Either default "_fini" or configured by the -fini command line option. + StringRef finiFunction() const { return _finiFunction; } + + /// Add an absolute symbol. Used for --defsym. + void addInitialAbsoluteSymbol(StringRef name, uint64_t addr) { + _absoluteSymbols[name] = addr; + } + + void setSharedObjectName(StringRef soname) { + _soname = soname; + } + + StringRef sharedObjectName() const { return _soname; } + + StringRef getSysroot() const { return _sysrootPath; } + + /// \brief Set path to the system root + void setSysroot(StringRef path) { + _sysrootPath = path; + } + + void addRpath(StringRef path) { + _rpathList.push_back(path); + } + + range<const StringRef *> getRpathList() const { + return _rpathList; + } + + void addRpathLink(StringRef path) { + _rpathLinkList.push_back(path); + } + + range<const StringRef *> getRpathLinkList() const { + return _rpathLinkList; + } + + const std::map<std::string, uint64_t> &getAbsoluteSymbols() const { + return _absoluteSymbols; + } + + /// \brief Helper function to allocate strings. + StringRef allocateString(StringRef ref) const { + char *x = _allocator.Allocate<char>(ref.size() + 1); + memcpy(x, ref.data(), ref.size()); + x[ref.size()] = '\0'; + return x; + } + + // add search path to list. + virtual bool addSearchPath(StringRef ref) { + _inputSearchPaths.push_back(ref); + return true; + } + + // Retrieve search path list. + StringRefVector getSearchPaths() { return _inputSearchPaths; }; + + // By default, the linker would merge sections that are read only with + // segments that have read and execute permissions. When the user specifies a + // flag --rosegment, a separate segment needs to be created. + bool mergeRODataToTextSegment() const { return _mergeRODataToTextSegment; } + + void setCreateSeparateROSegment() { _mergeRODataToTextSegment = false; } + + bool isDynamicallyExportedSymbol(StringRef name) const { + return _dynamicallyExportedSymbols.count(name) != 0; + } + + /// \brief Demangle symbols. + std::string demangle(StringRef symbolName) const override; + bool demangleSymbols() const { return _demangle; } + void setDemangleSymbols(bool d) { _demangle = d; } + + /// \brief Align segments. + bool alignSegments() const { return _alignSegments; } + void setAlignSegments(bool align) { _alignSegments = align; } + + /// \brief Strip symbols. + bool stripSymbols() const { return _stripSymbols; } + void setStripSymbols(bool strip) { _stripSymbols = strip; } + + /// \brief Collect statistics. + bool collectStats() const { return _collectStats; } + void setCollectStats(bool s) { _collectStats = s; } + + // --wrap option. + void addWrapForSymbol(StringRef sym) { _wrapCalls.insert(sym); } + + const llvm::StringSet<> &wrapCalls() const { return _wrapCalls; } + + void setUndefinesResolver(std::unique_ptr<File> resolver); + + script::Sema &linkerScriptSema() { return _linkerScriptSema; } + const script::Sema &linkerScriptSema() const { return _linkerScriptSema; } + +private: + ELFLinkingContext() = delete; + +protected: + ELFLinkingContext(llvm::Triple, std::unique_ptr<TargetHandlerBase>); + + Writer &writer() const override; + + /// Method to create a internal file for an undefined symbol + std::unique_ptr<File> createUndefinedSymbolFile() const override; + + uint16_t _outputELFType; // e.g ET_EXEC + llvm::Triple _triple; + std::unique_ptr<TargetHandlerBase> _targetHandler; + uint64_t _baseAddress; + bool _isStaticExecutable; + bool _noInhibitExec; + bool _exportDynamic; + bool _mergeCommonStrings; + bool _useShlibUndefines; + bool _dynamicLinkerArg; + bool _noAllowDynamicLibraries; + bool _mergeRODataToTextSegment; + bool _demangle; + bool _stripSymbols; + bool _alignSegments; + bool _nostdlib; + bool _collectStats; + llvm::Optional<uint64_t> _maxPageSize; + + OutputMagic _outputMagic; + StringRefVector _inputSearchPaths; + std::unique_ptr<Writer> _writer; + StringRef _dynamicLinkerPath; + StringRef _initFunction; + StringRef _finiFunction; + StringRef _sysrootPath; + StringRef _soname; + StringRefVector _rpathList; + StringRefVector _rpathLinkList; + llvm::StringSet<> _wrapCalls; + std::map<std::string, uint64_t> _absoluteSymbols; + llvm::StringSet<> _dynamicallyExportedSymbols; + std::unique_ptr<File> _resolver; + + // The linker script semantic object, which owns all script ASTs, is stored + // in the current linking context via _linkerScriptSema. + script::Sema _linkerScriptSema; +}; +} // end namespace lld + +#endif diff --git a/include/lld/ReaderWriter/ELFTargets.h b/include/lld/ReaderWriter/ELFTargets.h new file mode 100644 index 000000000000..3d00339818e2 --- /dev/null +++ b/include/lld/ReaderWriter/ELFTargets.h @@ -0,0 +1,38 @@ +//===- lld/ReaderWriter/ELFTargets.h --------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_READER_WRITER_ELF_TARGETS_H +#define LLD_READER_WRITER_ELF_TARGETS_H + +#include "ELFLinkingContext.h" + +namespace lld { +namespace elf { + +#define LLVM_TARGET(TargetName) \ + class TargetName##LinkingContext final : public ELFLinkingContext { \ + public: \ + static std::unique_ptr<ELFLinkingContext> create(llvm::Triple); \ + }; + +// FIXME: #include "llvm/Config/Targets.def" +LLVM_TARGET(AArch64) +LLVM_TARGET(ARM) +LLVM_TARGET(Hexagon) +LLVM_TARGET(Mips) +LLVM_TARGET(X86) +LLVM_TARGET(Example) +LLVM_TARGET(X86_64) + +#undef LLVM_TARGET + +} // end namespace elf +} // end namespace lld + +#endif diff --git a/include/lld/ReaderWriter/LinkerScript.h b/include/lld/ReaderWriter/LinkerScript.h new file mode 100644 index 000000000000..ae8d18d830c6 --- /dev/null +++ b/include/lld/ReaderWriter/LinkerScript.h @@ -0,0 +1,1396 @@ +//===- ReaderWriter/LinkerScript.h ----------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief Linker script parser. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLD_READER_WRITER_LINKER_SCRIPT_H +#define LLD_READER_WRITER_LINKER_SCRIPT_H + +#include "lld/Core/Error.h" +#include "lld/Core/LLVM.h" +#include "lld/Core/range.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/StringSet.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/Support/Allocator.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/SourceMgr.h" +#include "llvm/Support/raw_ostream.h" +#include <memory> +#include <system_error> +#include <unordered_map> +#include <vector> + +namespace lld { +namespace script { +class Token { +public: + enum Kind { + unknown, + eof, + exclaim, + exclaimequal, + amp, + ampequal, + l_paren, + r_paren, + star, + starequal, + plus, + plusequal, + comma, + minus, + minusequal, + slash, + slashequal, + number, + colon, + semicolon, + less, + lessequal, + lessless, + lesslessequal, + equal, + equalequal, + greater, + greaterequal, + greatergreater, + greatergreaterequal, + question, + identifier, + libname, + kw_align, + kw_align_with_input, + kw_as_needed, + kw_at, + kw_discard, + kw_entry, + kw_exclude_file, + kw_extern, + kw_group, + kw_hidden, + kw_input, + kw_keep, + kw_length, + kw_memory, + kw_origin, + kw_provide, + kw_provide_hidden, + kw_only_if_ro, + kw_only_if_rw, + kw_output, + kw_output_arch, + kw_output_format, + kw_overlay, + kw_search_dir, + kw_sections, + kw_sort_by_alignment, + kw_sort_by_init_priority, + kw_sort_by_name, + kw_sort_none, + kw_subalign, + l_brace, + pipe, + pipeequal, + r_brace, + tilde + }; + + Token() : _kind(unknown) {} + Token(StringRef range, Kind kind) : _range(range), _kind(kind) {} + + void dump(raw_ostream &os) const; + + StringRef _range; + Kind _kind; +}; + +class Lexer { +public: + explicit Lexer(std::unique_ptr<MemoryBuffer> mb) : _buffer(mb->getBuffer()) { + _sourceManager.AddNewSourceBuffer(std::move(mb), llvm::SMLoc()); + } + + void lex(Token &tok); + + const llvm::SourceMgr &getSourceMgr() const { return _sourceManager; } + +private: + bool canStartNumber(char c) const; + bool canContinueNumber(char c) const; + bool canStartName(char c) const; + bool canContinueName(char c) const; + void skipWhitespace(); + + Token _current; + /// \brief The current buffer state. + StringRef _buffer; + // Lexer owns the input files. + llvm::SourceMgr _sourceManager; +}; + +/// All linker scripts commands derive from this class. High-level, sections and +/// output section commands are all subclasses of this class. +/// Examples: +/// +/// OUTPUT_FORMAT("elf64-x86-64") /* A linker script command */ +/// OUTPUT_ARCH(i386:x86-64) /* Another command */ +/// ENTRY(_start) /* Another command */ +/// +/// SECTIONS /* Another command */ +/// { +/// .interp : { /* A sections-command */ +/// *(.interp) /* An output-section-command */ +/// } +/// } +/// +class Command { +public: + enum class Kind { + Entry, + Extern, + Group, + Input, + InputSectionsCmd, + InputSectionName, + Memory, + Output, + OutputArch, + OutputFormat, + OutputSectionDescription, + Overlay, + SearchDir, + Sections, + SortedGroup, + SymbolAssignment, + }; + + Kind getKind() const { return _kind; } + inline llvm::BumpPtrAllocator &getAllocator() const; + + virtual void dump(raw_ostream &os) const = 0; + + virtual ~Command() {} + +protected: + Command(class Parser &ctx, Kind k) : _ctx(ctx), _kind(k) {} + +private: + Parser &_ctx; + Kind _kind; +}; + +class Output : public Command { +public: + Output(Parser &ctx, StringRef outputFileName) + : Command(ctx, Kind::Output), _outputFileName(outputFileName) {} + + static bool classof(const Command *c) { return c->getKind() == Kind::Output; } + + void dump(raw_ostream &os) const override { + os << "OUTPUT(" << _outputFileName << ")\n"; + } + + StringRef getOutputFileName() const { return _outputFileName; } + +private: + StringRef _outputFileName; +}; + +class OutputFormat : public Command { +public: + OutputFormat(Parser &ctx, const SmallVectorImpl<StringRef> &formats) + : Command(ctx, Kind::OutputFormat) { + size_t numFormats = formats.size(); + StringRef *formatsStart = getAllocator().Allocate<StringRef>(numFormats); + std::copy(std::begin(formats), std::end(formats), formatsStart); + _formats = llvm::makeArrayRef(formatsStart, numFormats); + } + + static bool classof(const Command *c) { + return c->getKind() == Kind::OutputFormat; + } + + void dump(raw_ostream &os) const override { + os << "OUTPUT_FORMAT("; + bool first = true; + for (StringRef format : _formats) { + if (!first) + os << ","; + first = false; + os << "\"" << format << "\""; + } + os << ")\n"; + } + + llvm::ArrayRef<StringRef> getFormats() { return _formats; } + +private: + llvm::ArrayRef<StringRef> _formats; +}; + +class OutputArch : public Command { +public: + OutputArch(Parser &ctx, StringRef arch) + : Command(ctx, Kind::OutputArch), _arch(arch) {} + + static bool classof(const Command *c) { + return c->getKind() == Kind::OutputArch; + } + + void dump(raw_ostream &os) const override { + os << "OUTPUT_ARCH(" << getArch() << ")\n"; + } + + StringRef getArch() const { return _arch; } + +private: + StringRef _arch; +}; + +struct Path { + StringRef _path; + bool _asNeeded; + bool _isDashlPrefix; + + Path() : _asNeeded(false), _isDashlPrefix(false) {} + Path(StringRef path, bool asNeeded = false, bool isLib = false) + : _path(path), _asNeeded(asNeeded), _isDashlPrefix(isLib) {} +}; + +template<Command::Kind K> +class PathList : public Command { +public: + PathList(Parser &ctx, StringRef name, const SmallVectorImpl<Path> &paths) + : Command(ctx, K), _name(name) { + size_t numPaths = paths.size(); + Path *pathsStart = getAllocator().template Allocate<Path>(numPaths); + std::copy(std::begin(paths), std::end(paths), pathsStart); + _paths = llvm::makeArrayRef(pathsStart, numPaths); + } + + static bool classof(const Command *c) { return c->getKind() == K; } + + void dump(raw_ostream &os) const override { + os << _name << "("; + bool first = true; + for (const Path &path : getPaths()) { + if (!first) + os << " "; + first = false; + if (path._asNeeded) + os << "AS_NEEDED("; + if (path._isDashlPrefix) + os << "-l"; + os << path._path; + if (path._asNeeded) + os << ")"; + } + os << ")\n"; + } + + llvm::ArrayRef<Path> getPaths() const { return _paths; } + +private: + StringRef _name; + llvm::ArrayRef<Path> _paths; +}; + +class Group : public PathList<Command::Kind::Group> { +public: + template <class RangeT> + Group(Parser &ctx, RangeT range) + : PathList(ctx, "GROUP", std::move(range)) {} +}; + +class Input : public PathList<Command::Kind::Input> { +public: + template <class RangeT> + Input(Parser &ctx, RangeT range) + : PathList(ctx, "INPUT", std::move(range)) {} +}; + +class Entry : public Command { +public: + Entry(Parser &ctx, StringRef entryName) + : Command(ctx, Kind::Entry), _entryName(entryName) {} + + static bool classof(const Command *c) { return c->getKind() == Kind::Entry; } + + void dump(raw_ostream &os) const override { + os << "ENTRY(" << _entryName << ")\n"; + } + + StringRef getEntryName() const { return _entryName; } + +private: + StringRef _entryName; +}; + +class SearchDir : public Command { +public: + SearchDir(Parser &ctx, StringRef searchPath) + : Command(ctx, Kind::SearchDir), _searchPath(searchPath) {} + + static bool classof(const Command *c) { + return c->getKind() == Kind::SearchDir; + } + + void dump(raw_ostream &os) const override { + os << "SEARCH_DIR(\"" << _searchPath << "\")\n"; + } + + StringRef getSearchPath() const { return _searchPath; } + +private: + StringRef _searchPath; +}; + +/// Superclass for expression nodes. Linker scripts accept C-like expressions in +/// many places, such as when defining the value of a symbol or the address of +/// an output section. +/// Example: +/// +/// SECTIONS { +/// my_symbol = 1 + 1 * 2; +/// | | ^~~~> Constant : Expression +/// | | ^~~~> Constant : Expression +/// | | ^~~~> BinOp : Expression +/// ^~~~> Constant : Expression +/// ^~~~> BinOp : Expression (the top-level Expression node) +/// } +/// +class Expression { +public: + // The symbol table does not need to own its string keys and the use of StringMap + // here is an overkill. + typedef llvm::StringMap<int64_t, llvm::BumpPtrAllocator> SymbolTableTy; + + enum class Kind { Constant, Symbol, FunctionCall, Unary, BinOp, + TernaryConditional }; + Kind getKind() const { return _kind; } + inline llvm::BumpPtrAllocator &getAllocator() const; + virtual void dump(raw_ostream &os) const = 0; + virtual ErrorOr<int64_t> evalExpr(SymbolTableTy &symbolTable) const = 0; + virtual ~Expression() {} + +protected: + Expression(class Parser &ctx, Kind k) : _ctx(ctx), _kind(k) {} + +private: + Parser &_ctx; + Kind _kind; +}; + +/// A constant value is stored as unsigned because it represents absolute +/// values. We represent negative numbers by composing the unary '-' operator +/// with a constant. +class Constant : public Expression { +public: + Constant(Parser &ctx, uint64_t num) + : Expression(ctx, Kind::Constant), _num(num) {} + void dump(raw_ostream &os) const override; + + static bool classof(const Expression *c) { + return c->getKind() == Kind::Constant; + } + + ErrorOr<int64_t> evalExpr(SymbolTableTy &symbolTable) const override; + +private: + uint64_t _num; +}; + +class Symbol : public Expression { +public: + Symbol(Parser &ctx, StringRef name) + : Expression(ctx, Kind::Symbol), _name(name) {} + void dump(raw_ostream &os) const override; + + static bool classof(const Expression *c) { + return c->getKind() == Kind::Symbol; + } + + ErrorOr<int64_t> evalExpr(SymbolTableTy &symbolTable) const override; + +private: + StringRef _name; +}; + +class FunctionCall : public Expression { +public: + FunctionCall(Parser &ctx, StringRef name, + const SmallVectorImpl<const Expression *> &args) + : Expression(ctx, Kind::FunctionCall), _name(name) { + size_t numArgs = args.size(); + const Expression **argsStart = + getAllocator().Allocate<const Expression *>(numArgs); + std::copy(std::begin(args), std::end(args), argsStart); + _args = llvm::makeArrayRef(argsStart, numArgs); + } + + void dump(raw_ostream &os) const override; + + static bool classof(const Expression *c) { + return c->getKind() == Kind::FunctionCall; + } + + ErrorOr<int64_t> evalExpr(SymbolTableTy &symbolTable) const override; + +private: + StringRef _name; + llvm::ArrayRef<const Expression *> _args; +}; + +class Unary : public Expression { +public: + enum Operation { + Minus, + Not + }; + + Unary(Parser &ctx, Operation op, const Expression *child) + : Expression(ctx, Kind::Unary), _op(op), _child(child) {} + void dump(raw_ostream &os) const override; + + static bool classof(const Expression *c) { + return c->getKind() == Kind::Unary; + } + + ErrorOr<int64_t> evalExpr(SymbolTableTy &symbolTable) const override; + +private: + Operation _op; + const Expression *_child; +}; + +class BinOp : public Expression { +public: + enum Operation { + And, + CompareDifferent, + CompareEqual, + CompareGreater, + CompareGreaterEqual, + CompareLess, + CompareLessEqual, + Div, + Mul, + Or, + Shl, + Shr, + Sub, + Sum + }; + + BinOp(Parser &ctx, const Expression *lhs, Operation op, const Expression *rhs) + : Expression(ctx, Kind::BinOp), _op(op), _lhs(lhs), _rhs(rhs) {} + + void dump(raw_ostream &os) const override; + + static bool classof(const Expression *c) { + return c->getKind() == Kind::BinOp; + } + + ErrorOr<int64_t> evalExpr(SymbolTableTy &symbolTable) const override; + +private: + Operation _op; + const Expression *_lhs; + const Expression *_rhs; +}; + +/// Operands of the ternary operator can be any expression, similar to the other +/// operations, including another ternary operator. To disambiguate the parse +/// tree, note that ternary conditionals have precedence 13 and, different from +/// other operators, associates right-to-left. For example: +/// +/// i = i > 3 ? i < 5 ? 1 : 2 : 0; +/// +/// will have the following parse tree: +/// +/// i = ((i > 3) ? ((i < 5) ? 1 : 2) : 0); +/// +/// The '>' binds tigher because it has precedence 6. When faced with two "?" +/// ternary operators back-to-back, the parser prioritized the rightmost one. +/// +class TernaryConditional : public Expression { +public: + TernaryConditional(Parser &ctx, const Expression *conditional, + const Expression *trueExpr, const Expression *falseExpr) + : Expression(ctx, Kind::TernaryConditional), _conditional(conditional), + _trueExpr(trueExpr), _falseExpr(falseExpr) {} + + void dump(raw_ostream &os) const override; + + static bool classof(const Expression *c) { + return c->getKind() == Kind::TernaryConditional; + } + + ErrorOr<int64_t> evalExpr(SymbolTableTy &symbolTable) const override; + +private: + const Expression *_conditional; + const Expression *_trueExpr; + const Expression *_falseExpr; +}; + +/// Symbol assignments of the form "symbolname = <expression>" may occur either +/// as sections-commands or as output-section-commands. +/// Example: +/// +/// SECTIONS { +/// mysymbol = . /* SymbolAssignment as a sections-command */ +/// .data : { +/// othersymbol = . /* SymbolAssignment as an output-section-command */ +/// } +///} +/// +class SymbolAssignment : public Command { +public: + enum AssignmentKind { Simple, Sum, Sub, Mul, Div, Shl, Shr, And, Or }; + enum AssignmentVisibility { Default, Hidden, Provide, ProvideHidden }; + + SymbolAssignment(Parser &ctx, StringRef name, const Expression *expr, + AssignmentKind kind, AssignmentVisibility visibility) + : Command(ctx, Kind::SymbolAssignment), _expression(expr), _symbol(name), + _assignmentKind(Simple), _assignmentVisibility(visibility) {} + + static bool classof(const Command *c) { + return c->getKind() == Kind::SymbolAssignment; + } + + void dump(raw_ostream &os) const override; + const Expression *expr() const { return _expression; } + StringRef symbol() const { return _symbol; } + AssignmentKind assignmentKind() const { return _assignmentKind; } + AssignmentVisibility assignmentVisibility() const { + return _assignmentVisibility; + } + +private: + const Expression *_expression; + StringRef _symbol; + AssignmentKind _assignmentKind; + AssignmentVisibility _assignmentVisibility; +}; + +/// Encodes how to sort file names or section names that are expanded from +/// wildcard operators. This typically occurs in constructs such as +/// SECTIONS { .data : SORT_BY_NAME(*)(*) }}, where the order of the expanded +/// names is important to determine which sections go first. +enum class WildcardSortMode { + NA, + ByAlignment, + ByAlignmentAndName, + ByInitPriority, + ByName, + ByNameAndAlignment, + None +}; + +/// Represents either a single input section name or a group of sorted input +/// section names. They specify which sections to map to a given output section. +/// Example: +/// +/// SECTIONS { +/// .x: { *(.text) } +/// /* ^~~~^ InputSectionName : InputSection */ +/// .y: { *(SORT(.text*)) } +/// /* ^~~~~~~~~~~^ InputSectionSortedGroup : InputSection */ +/// } +class InputSection : public Command { +public: + static bool classof(const Command *c) { + return c->getKind() == Kind::InputSectionName || + c->getKind() == Kind::SortedGroup; + } + +protected: + InputSection(Parser &ctx, Kind k) : Command(ctx, k) {} +}; + +class InputSectionName : public InputSection { +public: + InputSectionName(Parser &ctx, StringRef name, bool excludeFile) + : InputSection(ctx, Kind::InputSectionName), _name(name), + _excludeFile(excludeFile) {} + + void dump(raw_ostream &os) const override; + + static bool classof(const Command *c) { + return c->getKind() == Kind::InputSectionName; + } + bool hasExcludeFile() const { return _excludeFile; } + StringRef name() const { return _name; } + +private: + StringRef _name; + bool _excludeFile; +}; + +class InputSectionSortedGroup : public InputSection { +public: + typedef llvm::ArrayRef<const InputSection *>::const_iterator const_iterator; + + InputSectionSortedGroup(Parser &ctx, WildcardSortMode sort, + const SmallVectorImpl<const InputSection *> §ions) + : InputSection(ctx, Kind::SortedGroup), _sortMode(sort) { + size_t numSections = sections.size(); + const InputSection **sectionsStart = + getAllocator().Allocate<const InputSection *>(numSections); + std::copy(std::begin(sections), std::end(sections), sectionsStart); + _sections = llvm::makeArrayRef(sectionsStart, numSections); + } + + void dump(raw_ostream &os) const override; + WildcardSortMode sortMode() const { return _sortMode; } + + static bool classof(const Command *c) { + return c->getKind() == Kind::SortedGroup; + } + + const_iterator begin() const { return _sections.begin(); } + const_iterator end() const { return _sections.end(); } + +private: + WildcardSortMode _sortMode; + llvm::ArrayRef<const InputSection *> _sections; +}; + +/// An output-section-command that maps a series of sections inside a given +/// file-archive pair to an output section. +/// Example: +/// +/// SECTIONS { +/// .x: { *(.text) } +/// /* ^~~~~~~^ InputSectionsCmd */ +/// .y: { w:z(SORT(.text*)) } +/// /* ^~~~~~~~~~~~~~~~^ InputSectionsCmd */ +/// } +class InputSectionsCmd : public Command { +public: + typedef llvm::ArrayRef<const InputSection *>::const_iterator const_iterator; + typedef std::vector<const InputSection *> VectorTy; + + InputSectionsCmd(Parser &ctx, StringRef memberName, StringRef archiveName, + bool keep, WildcardSortMode fileSortMode, + WildcardSortMode archiveSortMode, + const SmallVectorImpl<const InputSection *> §ions) + : Command(ctx, Kind::InputSectionsCmd), _memberName(memberName), + _archiveName(archiveName), _keep(keep), _fileSortMode(fileSortMode), + _archiveSortMode(archiveSortMode) { + size_t numSections = sections.size(); + const InputSection **sectionsStart = + getAllocator().Allocate<const InputSection *>(numSections); + std::copy(std::begin(sections), std::end(sections), sectionsStart); + _sections = llvm::makeArrayRef(sectionsStart, numSections); + } + + void dump(raw_ostream &os) const override; + + static bool classof(const Command *c) { + return c->getKind() == Kind::InputSectionsCmd; + } + + StringRef memberName() const { return _memberName; } + StringRef archiveName() const { return _archiveName; } + const_iterator begin() const { return _sections.begin(); } + const_iterator end() const { return _sections.end(); } + WildcardSortMode archiveSortMode() const { return _archiveSortMode; } + WildcardSortMode fileSortMode() const { return _fileSortMode; } + +private: + StringRef _memberName; + StringRef _archiveName; + bool _keep; + WildcardSortMode _fileSortMode; + WildcardSortMode _archiveSortMode; + llvm::ArrayRef<const InputSection *> _sections; +}; + +/// A sections-command to specify which input sections and symbols compose a +/// given output section. +/// Example: +/// +/// SECTIONS { +/// .x: { *(.text) ; symbol = .; } +/// /*^~~~~~~~~~~~~~~~~~~~~~~~~~~~~^ OutputSectionDescription */ +/// .y: { w:z(SORT(.text*)) } +/// /*^~~~~~~~~~~~~~~~~~~~~~~~^ OutputSectionDescription */ +/// .a 0x10000 : ONLY_IF_RW { *(.data*) ; *:libc.a(SORT(*)); } +/// /*^~~~~~~~~~~~~ OutputSectionDescription ~~~~~~~~~~~~~~~~~^ */ +/// } +class OutputSectionDescription : public Command { +public: + enum Constraint { C_None, C_OnlyIfRO, C_OnlyIfRW }; + + typedef llvm::ArrayRef<const Command *>::const_iterator const_iterator; + + OutputSectionDescription( + Parser &ctx, StringRef sectionName, const Expression *address, + const Expression *align, const Expression *subAlign, const Expression *at, + const Expression *fillExpr, StringRef fillStream, bool alignWithInput, + bool discard, Constraint constraint, + const SmallVectorImpl<const Command *> &outputSectionCommands) + : Command(ctx, Kind::OutputSectionDescription), _sectionName(sectionName), + _address(address), _align(align), _subAlign(subAlign), _at(at), + _fillExpr(fillExpr), _fillStream(fillStream), + _alignWithInput(alignWithInput), _discard(discard), + _constraint(constraint) { + size_t numCommands = outputSectionCommands.size(); + const Command **commandsStart = + getAllocator().Allocate<const Command *>(numCommands); + std::copy(std::begin(outputSectionCommands), + std::end(outputSectionCommands), commandsStart); + _outputSectionCommands = llvm::makeArrayRef(commandsStart, numCommands); + } + + static bool classof(const Command *c) { + return c->getKind() == Kind::OutputSectionDescription; + } + + void dump(raw_ostream &os) const override; + + const_iterator begin() const { return _outputSectionCommands.begin(); } + const_iterator end() const { return _outputSectionCommands.end(); } + StringRef name() const { return _sectionName; } + +private: + StringRef _sectionName; + const Expression *_address; + const Expression *_align; + const Expression *_subAlign; + const Expression *_at; + const Expression *_fillExpr; + StringRef _fillStream; + bool _alignWithInput; + bool _discard; + Constraint _constraint; + llvm::ArrayRef<const Command *> _outputSectionCommands; +}; + +/// Represents an Overlay structure as documented in +/// https://sourceware.org/binutils/docs/ld/Overlay-Description.html#Overlay-Description +class Overlay : public Command { +public: + Overlay(Parser &ctx) : Command(ctx, Kind::Overlay) {} + + static bool classof(const Command *c) { + return c->getKind() == Kind::Overlay; + } + + void dump(raw_ostream &os) const override { os << "Overlay description\n"; } +}; + +/// Represents all the contents of the SECTIONS {} construct. +class Sections : public Command { +public: + typedef llvm::ArrayRef<const Command *>::const_iterator const_iterator; + + Sections(Parser &ctx, + const SmallVectorImpl<const Command *> §ionsCommands) + : Command(ctx, Kind::Sections) { + size_t numCommands = sectionsCommands.size(); + const Command **commandsStart = + getAllocator().Allocate<const Command *>(numCommands); + std::copy(std::begin(sectionsCommands), std::end(sectionsCommands), + commandsStart); + _sectionsCommands = llvm::makeArrayRef(commandsStart, numCommands); + } + + static bool classof(const Command *c) { + return c->getKind() == Kind::Sections; + } + + void dump(raw_ostream &os) const override; + const_iterator begin() const { return _sectionsCommands.begin(); } + const_iterator end() const { return _sectionsCommands.end(); } + +private: + llvm::ArrayRef<const Command *> _sectionsCommands; +}; + +/// Represents a single memory block definition in a MEMORY {} command. +class MemoryBlock { +public: + MemoryBlock(StringRef name, StringRef attr, + const Expression *origin, const Expression *length) + : _name(name), _attr(attr), _origin(origin), _length(length) {} + + void dump(raw_ostream &os) const; + +private: + StringRef _name; + StringRef _attr; + const Expression *_origin; + const Expression *_length; +}; + +/// Represents all the contents of the MEMORY {} command. +class Memory : public Command { +public: + Memory(Parser &ctx, + const SmallVectorImpl<const MemoryBlock *> &blocks) + : Command(ctx, Kind::Memory) { + size_t numBlocks = blocks.size(); + const MemoryBlock **blocksStart = + getAllocator().Allocate<const MemoryBlock *>(numBlocks); + std::copy(std::begin(blocks), std::end(blocks), blocksStart); + _blocks = llvm::makeArrayRef(blocksStart, numBlocks); + } + + static bool classof(const Command *c) { + return c->getKind() == Kind::Memory; + } + + void dump(raw_ostream &os) const override; + +private: + llvm::ArrayRef<const MemoryBlock *> _blocks; +}; + +/// Represents an extern command. +class Extern : public Command { +public: + typedef llvm::ArrayRef<StringRef>::const_iterator const_iterator; + + Extern(Parser &ctx, + const SmallVectorImpl<StringRef> &symbols) + : Command(ctx, Kind::Extern) { + size_t numSymbols = symbols.size(); + StringRef *symbolsStart = + getAllocator().Allocate<StringRef>(numSymbols); + std::copy(std::begin(symbols), std::end(symbols), symbolsStart); + _symbols = llvm::makeArrayRef(symbolsStart, numSymbols); + } + + static bool classof(const Command *c) { + return c->getKind() == Kind::Extern; + } + + void dump(raw_ostream &os) const override; + const_iterator begin() const { return _symbols.begin(); } + const_iterator end() const { return _symbols.end(); } + +private: + llvm::ArrayRef<StringRef> _symbols; +}; + +/// Stores the parse tree of a linker script. +class LinkerScript { +public: + void dump(raw_ostream &os) const { + for (const Command *c : _commands) { + c->dump(os); + if (isa<SymbolAssignment>(c)) + os << "\n"; + } + } + + std::vector<const Command *> _commands; +}; + +/// Recognizes syntactic constructs of a linker script using a predictive +/// parser/recursive descent implementation. +/// +/// Based on the linker script documentation available at +/// https://sourceware.org/binutils/docs/ld/Scripts.html +class Parser { +public: + explicit Parser(std::unique_ptr<MemoryBuffer> mb) + : _lex(std::move(mb)), _peekAvailable(false) {} + + /// Let's not allow copying of Parser class because it would be expensive + /// to update all the AST pointers to a new buffer. + Parser(const Parser &instance) = delete; + + /// Lex and parse the current memory buffer to create a linker script AST. + std::error_code parse(); + + /// Returns a reference to the top level node of the linker script AST. + LinkerScript *get() { return &_script; } + + /// Returns a reference to the underlying allocator. + llvm::BumpPtrAllocator &getAllocator() { return _alloc; } + +private: + /// Advances to the next token, either asking the Lexer to lex the next token + /// or obtaining it from the look ahead buffer. + void consumeToken() { + // First check if the look ahead buffer cached the next token + if (_peekAvailable) { + _tok = _bufferedToken; + _peekAvailable = false; + return; + } + _lex.lex(_tok); + } + + /// Returns the token that succeeds the current one without consuming the + /// current token. This operation will lex an additional token and store it in + /// a private buffer. + const Token &peek() { + if (_peekAvailable) + return _bufferedToken; + + _lex.lex(_bufferedToken); + _peekAvailable = true; + return _bufferedToken; + } + + void error(const Token &tok, Twine msg) { + _lex.getSourceMgr().PrintMessage( + llvm::SMLoc::getFromPointer(tok._range.data()), + llvm::SourceMgr::DK_Error, msg); + } + + bool expectAndConsume(Token::Kind kind, Twine msg) { + if (_tok._kind != kind) { + error(_tok, msg); + return false; + } + consumeToken(); + return true; + } + + bool isNextToken(Token::Kind kind) { return (_tok._kind == kind); } + + // Recursive descent parsing member functions + // All of these functions consumes tokens and return an AST object, + // represented by the Command superclass. However, note that not all AST + // objects derive from Command. For nodes of C-like expressions, used in + // linker scripts, the superclass is Expression. For nodes that represent + // input sections that map to an output section, the superclass is + // InputSection. + // + // Example mapping common constructs to AST nodes: + // + // SECTIONS { /* Parsed to Sections class */ + // my_symbol = 1 + 1; /* Parsed to SymbolAssignment class */ + // /* ^~~> Parsed to Expression class */ + // .data : { *(.data) } /* Parsed to OutputSectionDescription class */ + // /* ^~~> Parsed to InputSectionName class */ + // /* ^~~~~> Parsed to InputSectionsCmd class */ + // } + + // ==== Expression parsing member functions ==== + + /// Parse "identifier(param [, param]...)" + /// + /// Example: + /// + /// SECTIONS { + /// my_symbol = 0x1000 | ALIGN(other_symbol); + /// /* ^~~~> parseFunctionCall() + /// } + const Expression *parseFunctionCall(); + + /// Ensures that the current token is an expression operand. If it is not, + /// issues an error to the user and returns false. + bool expectExprOperand(); + + /// Parse operands of an expression, such as function calls, identifiers, + /// literal numbers or unary operators. + /// + /// Example: + /// + /// SECTIONS { + /// my_symbol = 0x1000 | ALIGN(other_symbol); + /// ^~~~> parseExprTerminal() + /// } + const Expression *parseExprOperand(); + + // As a reference to the precedence of C operators, consult + // http://en.cppreference.com/w/c/language/operator_precedence + + /// Parse either a single expression operand and returns or parse an entire + /// expression if its top-level node has a lower or equal precedence than the + /// indicated. + const Expression *parseExpression(unsigned precedence = 13); + + /// Parse an operator and its rhs operand, assuming that the lhs was already + /// consumed. Keep parsing subsequent operator-operand pairs that do not + /// exceed highestPrecedence. + /// * lhs points to the left-hand-side operand of this operator + /// * maxPrecedence has the maximum operator precedence level that this parse + /// function is allowed to consume. + const Expression *parseOperatorOperandLoop(const Expression *lhs, + unsigned maxPrecedence); + + /// Parse ternary conditionals such as "(condition)? true: false;". This + /// operator has precedence level 13 and associates right-to-left. + const Expression *parseTernaryCondOp(const Expression *lhs); + + // ==== High-level commands parsing ==== + + /// Parse the OUTPUT linker script command. + /// Example: + /// OUTPUT(/path/to/file) + /// ^~~~> parseOutput() + /// + Output *parseOutput(); + + /// Parse the OUTPUT_FORMAT linker script command. + /// Example: + /// + /// OUTPUT_FORMAT(elf64-x86-64,elf64-x86-64,elf64-x86-64) + /// ^~~~> parseOutputFormat() + /// + OutputFormat *parseOutputFormat(); + + /// Parse the OUTPUT_ARCH linker script command. + /// Example: + /// + /// OUTPUT_ARCH(i386:x86-64) + /// ^~~~> parseOutputArch() + /// + OutputArch *parseOutputArch(); + + /// Parse the INPUT or GROUP linker script command. + /// Example: + /// + /// GROUP ( /lib/x86_64-linux-gnu/libc.so.6 + /// /usr/lib/x86_64-linux-gnu/libc_nonshared.a + /// AS_NEEDED ( /lib/x86_64-linux-gnu/ld-linux-x86-64.so.2 ) + /// -lm -l:libgcc.a ) + /// + template<class T> T *parsePathList(); + bool parseAsNeeded(SmallVectorImpl<Path> &paths); + + /// Parse the ENTRY linker script command. + /// Example: + /// + /// ENTRY(init) + /// ^~~~> parseEntry() + /// + Entry *parseEntry(); + + /// Parse the SEARCH_DIR linker script command. + /// Example: + /// + /// SEARCH_DIR("/usr/x86_64-linux-gnu/lib64"); + /// ^~~~> parseSearchDir() + /// + SearchDir *parseSearchDir(); + + /// Parse "symbol = expression" commands that live inside the + /// SECTIONS directive. + /// Example: + /// + /// SECTIONS { + /// my_symbol = 1 + 1; + /// ^~~~> parseExpression() + /// ^~~~ parseSymbolAssignment() + /// } + /// + const SymbolAssignment *parseSymbolAssignment(); + + /// Parse "EXCLUDE_FILE" used inside the listing of input section names. + /// Example: + /// + /// SECTIONS { + /// .data : { *(EXCLUDE_FILE (*crtend.o *otherfile.o) .ctors) } + /// ^~~~> parseExcludeFile() + /// } + /// + ErrorOr<InputSectionsCmd::VectorTy> parseExcludeFile(); + + /// Helper to parse SORT_BY_NAME(, SORT_BY_ALIGNMENT( and SORT_NONE(, + /// possibly nested. Returns the number of Token::r_paren tokens that need + /// to be consumed, while sortMode is updated with the parsed sort + /// criteria. + /// Example: + /// + /// SORT_BY_NAME(SORT_BY_ALIGNMENT(*)) + /// ^~~~ parseSortDirectives() ~~^ + /// Returns 2, finishes with sortMode = WildcardSortMode::ByNameAndAlignment + /// + int parseSortDirectives(WildcardSortMode &sortMode); + + /// Parse a group of input section names that are sorted via SORT* directives. + /// Example: + /// SORT_BY_NAME(SORT_BY_ALIGNMENT(*data *bss)) + const InputSection *parseSortedInputSections(); + + /// Parse input section description statements. + /// Example: + /// + /// SECTIONS { + /// .mysection : crt.o(.data* .bss SORT_BY_NAME(name*)) + /// ^~~~ parseInputSectionsCmd() + /// } + const InputSectionsCmd *parseInputSectionsCmd(); + + /// Parse output section description statements. + /// Example: + /// + /// SECTIONS { + /// .data : { crt.o(.data* .bss SORT_BY_NAME(name*)) } + /// ^~~~ parseOutputSectionDescription() + /// } + const OutputSectionDescription *parseOutputSectionDescription(); + + /// Stub for parsing overlay commands. Currently unimplemented. + const Overlay *parseOverlay(); + + /// Parse the SECTIONS linker script command. + /// Example: + /// + /// SECTIONS { + /// ^~~~ parseSections() + /// . = 0x100000; + /// .data : { *(.data) } + /// } + /// + Sections *parseSections(); + + /// Parse the MEMORY linker script command. + /// Example: + /// + /// MEMORY { + /// ^~~~ parseMemory() + /// ram (rwx) : ORIGIN = 0x20000000, LENGTH = 96K + /// rom (rx) : ORIGIN = 0x0, LENGTH = 256K + /// } + /// + Memory *parseMemory(); + + /// Parse the EXTERN linker script command. + /// Example: + /// + /// EXTERN(symbol symbol ...) + /// ^~~~> parseExtern() + /// + Extern *parseExtern(); + +private: + // Owns the entire linker script AST nodes + llvm::BumpPtrAllocator _alloc; + + // The top-level/entry-point linker script AST node + LinkerScript _script; + + Lexer _lex; + + // Current token being analyzed + Token _tok; + + // Annotate whether we buffered the next token to allow peeking + bool _peekAvailable; + Token _bufferedToken; +}; + +/// script::Sema traverses all parsed linker script structures and populate +/// internal data structures to be able to answer the following questions: +/// +/// * According to the linker script, which input section goes first in the +/// output file layout, input section A or input section B? +/// +/// * What is the name of the output section that input section A should be +/// mapped to? +/// +/// * Which linker script expressions should be calculated before emitting +/// a given section? +/// +/// * How to evaluate a given linker script expression? +/// +class Sema { +public: + /// From the linker script point of view, this class represents the minimum + /// set of information to uniquely identify an input section. + struct SectionKey { + StringRef archivePath; + StringRef memberPath; + StringRef sectionName; + }; + + Sema(); + + /// We can parse several linker scripts via command line whose ASTs are stored + /// here via addLinkerScript(). + void addLinkerScript(std::unique_ptr<Parser> script) { + _scripts.push_back(std::move(script)); + } + + const std::vector<std::unique_ptr<Parser>> &getLinkerScripts() { + return _scripts; + } + + /// Prepare our data structures according to the linker scripts currently in + /// our control (control given via addLinkerScript()). Called once all linker + /// scripts have been parsed. + void perform(); + + /// Answer if we have layout commands (section mapping rules). If we don't, + /// the output file writer can assume there is no linker script special rule + /// to handle. + bool hasLayoutCommands() const { return _layoutCommands.size() > 0; } + + /// Return true if this section has a mapping rule in the linker script + bool hasMapping(const SectionKey &key) const { |