diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2017-01-02 19:19:15 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2017-01-02 19:19:15 +0000 |
commit | d93e1dfac8711cfed1a9d9cd1876a788b83945cd (patch) | |
tree | 5896fa6c02a262a6148b215487e545d937de58b7 | |
parent | 8d43286d630f9224de07809ea253e83ebb9cdee6 (diff) | |
download | src-d93e1dfac8711cfed1a9d9cd1876a788b83945cd.tar.gz src-d93e1dfac8711cfed1a9d9cd1876a788b83945cd.zip |
Vendor import of lld trunk r290819:vendor/lld/lld-trunk-r290819
Notes
Notes:
svn path=/vendor/lld/dist/; revision=311125
svn path=/vendor/lld/lld-trunk-r290819/; revision=311126; tag=vendor/lld/lld-trunk-r290819
806 files changed, 27950 insertions, 11087 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt index 46ca748f8fac..23cef2e9fc67 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,3 +1,54 @@ +# Check if lld is built as a standalone project. +if(CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR) + project(lld) + cmake_minimum_required(VERSION 3.4.3) + + set(CMAKE_INCLUDE_CURRENT_DIR ON) + set(LLD_BUILT_STANDALONE TRUE) + + find_program(LLVM_CONFIG_PATH "llvm-config" DOC "Path to llvm-config binary") + if(NOT LLVM_CONFIG_PATH) + message(FATAL_ERROR "llvm-config not found: specify LLVM_CONFIG_PATH") + endif() + + execute_process(COMMAND "${LLVM_CONFIG_PATH}" "--obj-root" "--includedir" + RESULT_VARIABLE HAD_ERROR + OUTPUT_VARIABLE LLVM_CONFIG_OUTPUT + OUTPUT_STRIP_TRAILING_WHITESPACE) + if(HAD_ERROR) + message(FATAL_ERROR "llvm-config failed with status ${HAD_ERROR}") + endif() + + string(REGEX REPLACE "[ \t]*[\r\n]+[ \t]*" ";" LLVM_CONFIG_OUTPUT "${LLVM_CONFIG_OUTPUT}") + + list(GET LLVM_CONFIG_OUTPUT 0 OBJ_ROOT) + list(GET LLVM_CONFIG_OUTPUT 1 MAIN_INCLUDE_DIR) + + set(LLVM_OBJ_ROOT ${OBJ_ROOT} CACHE PATH "path to LLVM build tree") + set(LLVM_MAIN_INCLUDE_DIR ${MAIN_INCLUDE_DIR} CACHE PATH "path to llvm/include") + + file(TO_CMAKE_PATH ${LLVM_OBJ_ROOT} LLVM_BINARY_DIR) + set(LLVM_CMAKE_PATH "${LLVM_BINARY_DIR}/lib${LLVM_LIBDIR_SUFFIX}/cmake/llvm") + + if(NOT EXISTS "${LLVM_CMAKE_PATH}/LLVMConfig.cmake") + message(FATAL_ERROR "LLVMConfig.cmake not found") + endif() + include("${LLVM_CMAKE_PATH}/LLVMConfig.cmake") + + list(APPEND CMAKE_MODULE_PATH "${LLVM_CMAKE_PATH}") + + set(PACKAGE_VERSION "${LLVM_PACKAGE_VERSION}") + include_directories("${LLVM_BINARY_DIR}/include" ${LLVM_INCLUDE_DIRS}) + link_directories(${LLVM_LIBRARY_DIRS}) + + set(LLVM_RUNTIME_OUTPUT_INTDIR ${CMAKE_BINARY_DIR}/${CMAKE_CFG_INTDIR}/bin) + find_program(LLVM_TABLEGEN_EXE "llvm-tblgen" ${LLVM_TOOLS_BINARY_DIR} NO_DEFAULT_PATH) + + include(AddLLVM) + include(TableGen) + include(HandleLLVMOptions) +endif() + set(LLD_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}) set(LLD_INCLUDE_DIR ${LLD_SOURCE_DIR}/include ) set(LLD_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}) @@ -55,6 +106,8 @@ endif() list (APPEND CMAKE_MODULE_PATH "${LLD_SOURCE_DIR}/cmake/modules") +include(AddLLD) + option(LLD_USE_VTUNE "Enable VTune user task tracking." OFF) @@ -67,6 +120,8 @@ if (LLD_USE_VTUNE) endif() endif() +option(LLD_BUILD_TOOLS + "Build the lld tools. If OFF, just generate build targets." ON) if (MSVC) add_definitions(-wd4530) # Suppress 'warning C4530: C++ exception handler used, but unwind semantics are not enabled.' @@ -87,12 +142,6 @@ if (NOT LLVM_INSTALL_TOOLCHAIN_ONLY) ) endif() -macro(add_lld_library name) - add_llvm_library(${name} ${ARGN}) - set_target_properties(${name} PROPERTIES FOLDER "lld libraries") -endmacro(add_lld_library) - - add_subdirectory(lib) add_subdirectory(tools/lld) diff --git a/COFF/CMakeLists.txt b/COFF/CMakeLists.txt index ad5b6fda1693..70a33b9fdd81 100644 --- a/COFF/CMakeLists.txt +++ b/COFF/CMakeLists.txt @@ -2,6 +2,10 @@ set(LLVM_TARGET_DEFINITIONS Options.td) tablegen(LLVM Options.inc -gen-opt-parser-defs) add_public_tablegen_target(COFFOptionsTableGen) +if(NOT LLD_BUILT_STANDALONE) + set(tablegen_deps intrinsics_gen) +endif() + add_lld_library(lldCOFF Chunks.cpp DLL.cpp @@ -14,6 +18,7 @@ add_lld_library(lldCOFF MarkLive.cpp ModuleDef.cpp PDB.cpp + Strings.cpp SymbolTable.cpp Symbols.cpp Writer.cpp @@ -21,6 +26,9 @@ add_lld_library(lldCOFF LINK_COMPONENTS ${LLVM_TARGETS_TO_BUILD} Core + DebugInfoCodeView + DebugInfoMSF + DebugInfoPDB LTO LibDriver Object @@ -30,7 +38,11 @@ add_lld_library(lldCOFF Option Support - LINK_LIBS ${PTHREAD_LIB} - ) + LINK_LIBS + lldCore + ${PTHREAD_LIB} -add_dependencies(lldCOFF COFFOptionsTableGen intrinsics_gen) + DEPENDS + COFFOptionsTableGen + ${tablegen_deps} + ) diff --git a/COFF/Chunks.cpp b/COFF/Chunks.cpp index 1c1b18176aa2..7f0dfa92ec10 100644 --- a/COFF/Chunks.cpp +++ b/COFF/Chunks.cpp @@ -28,7 +28,7 @@ namespace lld { namespace coff { SectionChunk::SectionChunk(ObjectFile *F, const coff_section *H) - : Chunk(SectionKind), Repl(this), File(F), Header(H), + : Chunk(SectionKind), Repl(this), Header(H), File(F), Relocs(File->getCOFFObj()->getRelocations(Header)), NumRelocs(std::distance(Relocs.begin(), Relocs.end())) { // Initialize SectionName. @@ -81,11 +81,23 @@ void SectionChunk::applyRelX86(uint8_t *Off, uint16_t Type, Defined *Sym, } static void applyMOV(uint8_t *Off, uint16_t V) { - or16(Off, ((V & 0x800) >> 1) | ((V >> 12) & 0xf)); - or16(Off + 2, ((V & 0x700) << 4) | (V & 0xff)); + write16le(Off, (read16le(Off) & 0xfbf0) | ((V & 0x800) >> 1) | ((V >> 12) & 0xf)); + write16le(Off + 2, (read16le(Off + 2) & 0x8f00) | ((V & 0x700) << 4) | (V & 0xff)); +} + +static uint16_t readMOV(uint8_t *Off) { + uint16_t Opcode1 = read16le(Off); + uint16_t Opcode2 = read16le(Off + 2); + uint16_t Imm = (Opcode2 & 0x00ff) | ((Opcode2 >> 4) & 0x0700); + Imm |= ((Opcode1 << 1) & 0x0800) | ((Opcode1 & 0x000f) << 12); + return Imm; } static void applyMOV32T(uint8_t *Off, uint32_t V) { + uint16_t ImmW = readMOV(Off); // read MOVW operand + uint16_t ImmT = readMOV(Off + 4); // read MOVT operand + uint32_t Imm = ImmW | (ImmT << 16); + V += Imm; // add the immediate offset applyMOV(Off, V); // set MOVW operand applyMOV(Off + 4, V >> 16); // set MOVT operand } @@ -99,11 +111,14 @@ static void applyBranch20T(uint8_t *Off, int32_t V) { } static void applyBranch24T(uint8_t *Off, int32_t V) { + if (!isInt<25>(V)) + fatal("relocation out of range"); uint32_t S = V < 0 ? 1 : 0; uint32_t J1 = ((~V >> 23) & 1) ^ S; uint32_t J2 = ((~V >> 22) & 1) ^ S; or16(Off, (S << 10) | ((V >> 12) & 0x3ff)); - or16(Off + 2, (J1 << 13) | (J2 << 11) | ((V >> 1) & 0x7ff)); + // Clear out the J1 and J2 bits which may be set. + write16le(Off + 2, (read16le(Off + 2) & 0xd000) | (J1 << 13) | (J2 << 11) | ((V >> 1) & 0x7ff)); } void SectionChunk::applyRelARM(uint8_t *Off, uint16_t Type, Defined *Sym, @@ -119,6 +134,7 @@ void SectionChunk::applyRelARM(uint8_t *Off, uint16_t Type, Defined *Sym, case IMAGE_REL_ARM_BRANCH20T: applyBranch20T(Off, S - P - 4); break; case IMAGE_REL_ARM_BRANCH24T: applyBranch24T(Off, S - P - 4); break; case IMAGE_REL_ARM_BLX23T: applyBranch24T(Off, S - P - 4); break; + case IMAGE_REL_ARM_SECREL: add32(Off, Sym->getSecrel()); break; default: fatal("unsupported relocation type"); } @@ -134,7 +150,7 @@ void SectionChunk::writeTo(uint8_t *Buf) const { // Apply relocations. for (const coff_relocation &Rel : Relocs) { uint8_t *Off = Buf + OutputSectionOff + Rel.VirtualAddress; - SymbolBody *Body = File->getSymbolBody(Rel.SymbolTableIndex)->repl(); + SymbolBody *Body = File->getSymbolBody(Rel.SymbolTableIndex); Defined *Sym = cast<Defined>(Body); uint64_t P = RVA + Rel.VirtualAddress; switch (Config->Machine) { @@ -187,7 +203,7 @@ void SectionChunk::getBaserels(std::vector<Baserel> *Res) { uint8_t Ty = getBaserelType(Rel); if (Ty == IMAGE_REL_BASED_ABSOLUTE) continue; - SymbolBody *Body = File->getSymbolBody(Rel.SymbolTableIndex)->repl(); + SymbolBody *Body = File->getSymbolBody(Rel.SymbolTableIndex); if (isa<DefinedAbsolute>(Body)) continue; Res->emplace_back(RVA + Rel.VirtualAddress, Ty); @@ -210,7 +226,7 @@ void SectionChunk::printDiscardedMessage() const { // Removed by dead-stripping. If it's removed by ICF, ICF already // printed out the name, so don't repeat that here. if (Sym && this == Repl) - llvm::outs() << "Discarded " << Sym->getName() << "\n"; + outs() << "Discarded " << Sym->getName() << "\n"; } StringRef SectionChunk::getDebugName() { @@ -233,7 +249,7 @@ void SectionChunk::replace(SectionChunk *Other) { CommonChunk::CommonChunk(const COFFSymbolRef S) : Sym(S) { // Common symbols are aligned on natural boundaries up to 32 bytes. // This is what MSVC link.exe does. - Align = std::min(uint64_t(32), NextPowerOf2(Sym.getValue())); + Align = std::min(uint64_t(32), PowerOf2Ceil(Sym.getValue())); } uint32_t CommonChunk::getPermissions() const { diff --git a/COFF/Chunks.h b/COFF/Chunks.h index cd0e2e69ef5d..59e36b84c9b0 100644 --- a/COFF/Chunks.h +++ b/COFF/Chunks.h @@ -17,7 +17,6 @@ #include "llvm/ADT/iterator.h" #include "llvm/ADT/iterator_range.h" #include "llvm/Object/COFF.h" -#include <atomic> #include <utility> #include <vector> @@ -29,7 +28,6 @@ using llvm::object::COFFSymbolRef; using llvm::object::SectionRef; using llvm::object::coff_relocation; using llvm::object::coff_section; -using llvm::sys::fs::file_magic; class Baserel; class Defined; @@ -187,11 +185,12 @@ public: // Auxiliary Format 5: Section Definitions. Used for ICF. uint32_t Checksum = 0; + const coff_section *Header; + private: // A file this chunk was created from. ObjectFile *File; - const coff_section *Header; StringRef SectionName; std::vector<SectionChunk *> AssocChildren; llvm::iterator_range<const coff_relocation *> Relocs; @@ -202,7 +201,7 @@ private: // Used for ICF (Identical COMDAT Folding) void replace(SectionChunk *Other); - std::atomic<uint64_t> GroupID = { 0 }; + uint32_t Color[2] = {0, 0}; // Sym points to a section symbol if this is a COMDAT chunk. DefinedRegular *Sym = nullptr; diff --git a/COFF/Config.h b/COFF/Config.h index a5472e937fa1..0fa3338aa28c 100644 --- a/COFF/Config.h +++ b/COFF/Config.h @@ -26,7 +26,8 @@ using llvm::StringRef; class DefinedAbsolute; class DefinedRelative; class StringChunk; -class Undefined; +struct Symbol; +class SymbolBody; // Short aliases. static const auto AMD64 = llvm::COFF::IMAGE_FILE_MACHINE_AMD64; @@ -37,7 +38,7 @@ static const auto I386 = llvm::COFF::IMAGE_FILE_MACHINE_I386; struct Export { StringRef Name; // N in /export:N or /export:E=N StringRef ExtName; // E in /export:E=N - Undefined *Sym = nullptr; + SymbolBody *Sym = nullptr; uint16_t Ordinal = 0; bool Noname = false; bool Data = false; @@ -61,6 +62,13 @@ struct Export { } }; +enum class DebugType { + None = 0x0, + CV = 0x1, /// CodeView + PData = 0x2, /// Procedure Data + Fixup = 0x4, /// Relocation Table +}; + // Global configuration. struct Configuration { enum ManifestKind { SideBySide, Embed, No }; @@ -69,7 +77,7 @@ struct Configuration { llvm::COFF::MachineTypes Machine = IMAGE_FILE_MACHINE_UNKNOWN; bool Verbose = false; WindowsSubsystem Subsystem = llvm::COFF::IMAGE_SUBSYSTEM_UNKNOWN; - Undefined *Entry = nullptr; + SymbolBody *Entry = nullptr; bool NoEntry = false; std::string OutputFile; bool DoGC = true; @@ -78,9 +86,11 @@ struct Configuration { bool Force = false; bool Debug = false; bool WriteSymtab = true; + unsigned DebugTypes = static_cast<unsigned>(DebugType::None); + StringRef PDBPath; // Symbols in this set are considered as live by the garbage collector. - std::set<Undefined *> GCRoot; + std::set<SymbolBody *> GCRoot; std::set<StringRef> NoDefaultLibs; bool NoDefaultLibAll = false; @@ -91,11 +101,11 @@ struct Configuration { std::vector<Export> Exports; std::set<std::string> DelayLoads; std::map<std::string, int> DLLOrder; - Undefined *DelayLoadHelper = nullptr; + SymbolBody *DelayLoadHelper = nullptr; // Used for SafeSEH. - DefinedRelative *SEHTable = nullptr; - DefinedAbsolute *SEHCount = nullptr; + Symbol *SEHTable = nullptr; + Symbol *SEHCount = nullptr; // Used for /opt:lldlto=N unsigned LTOOptLevel = 2; @@ -141,6 +151,10 @@ struct Configuration { bool TerminalServerAware = true; bool LargeAddressAware = false; bool HighEntropyVA = false; + + // This is for debugging. + bool DebugPdb = false; + bool DumpPdb = false; }; extern Configuration *Config; diff --git a/COFF/DLL.cpp b/COFF/DLL.cpp index 9ac370c11d59..f93dc5cde44c 100644 --- a/COFF/DLL.cpp +++ b/COFF/DLL.cpp @@ -324,7 +324,7 @@ public: if (E.ForwardChunk) { write32le(P, E.ForwardChunk->getRVA()); } else { - write32le(P, cast<Defined>(E.Sym->repl())->getRVA()); + write32le(P, cast<Defined>(E.Sym)->getRVA()); } } } diff --git a/COFF/Driver.cpp b/COFF/Driver.cpp index bb6a60e4fc4c..dc3a00ba55ed 100644 --- a/COFF/Driver.cpp +++ b/COFF/Driver.cpp @@ -7,15 +7,17 @@ // //===----------------------------------------------------------------------===// -#include "Config.h" #include "Driver.h" +#include "Config.h" #include "Error.h" #include "InputFiles.h" +#include "Memory.h" #include "SymbolTable.h" #include "Symbols.h" #include "Writer.h" #include "lld/Driver/Driver.h" #include "llvm/ADT/Optional.h" +#include "llvm/ADT/StringSwitch.h" #include "llvm/LibDriver/LibDriver.h" #include "llvm/Option/Arg.h" #include "llvm/Option/ArgList.h" @@ -28,6 +30,13 @@ #include <algorithm> #include <memory> +#ifdef _MSC_VER +// <future> depends on <eh.h> for __uncaught_exception. +#include <eh.h> +#endif + +#include <future> + using namespace llvm; using namespace llvm::COFF; using llvm::sys::Process; @@ -41,11 +50,13 @@ namespace coff { Configuration *Config; LinkerDriver *Driver; -bool link(llvm::ArrayRef<const char *> Args) { - Configuration C; - LinkerDriver D; - Config = &C; - Driver = &D; +BumpPtrAllocator BAlloc; +StringSaver Saver{BAlloc}; +std::vector<SpecificAllocBase *> SpecificAllocBase::Instances; + +bool link(ArrayRef<const char *> Args) { + Config = make<Configuration>(); + Driver = make<LinkerDriver>(); Driver->link(Args); return true; } @@ -58,26 +69,123 @@ static std::string getOutputPath(StringRef Path) { return (S.substr(0, S.rfind('.')) + E).str(); } -// Opens a file. Path has to be resolved already. -// Newly created memory buffers are owned by this driver. -MemoryBufferRef LinkerDriver::openFile(StringRef Path) { - std::unique_ptr<MemoryBuffer> MB = - check(MemoryBuffer::getFile(Path), "could not open " + Path); - MemoryBufferRef MBRef = MB->getMemBufferRef(); - OwningMBs.push_back(std::move(MB)); // take ownership +// ErrorOr is not default constructible, so it cannot be used as the type +// parameter of a future. +// FIXME: We could open the file in createFutureForFile and avoid needing to +// return an error here, but for the moment that would cost us a file descriptor +// (a limited resource on Windows) for the duration that the future is pending. +typedef std::pair<std::unique_ptr<MemoryBuffer>, std::error_code> MBErrPair; + +// Create a std::future that opens and maps a file using the best strategy for +// the host platform. +static std::future<MBErrPair> createFutureForFile(std::string Path) { +#if LLVM_ON_WIN32 + // On Windows, file I/O is relatively slow so it is best to do this + // asynchronously. + auto Strategy = std::launch::async; +#else + auto Strategy = std::launch::deferred; +#endif + return std::async(Strategy, [=]() { + auto MBOrErr = MemoryBuffer::getFile(Path); + if (!MBOrErr) + return MBErrPair{nullptr, MBOrErr.getError()}; + return MBErrPair{std::move(*MBOrErr), std::error_code()}; + }); +} + +MemoryBufferRef LinkerDriver::takeBuffer(std::unique_ptr<MemoryBuffer> MB) { + MemoryBufferRef MBRef = *MB; + OwningMBs.push_back(std::move(MB)); + + if (Driver->Cpio) + Driver->Cpio->append(relativeToRoot(MBRef.getBufferIdentifier()), + MBRef.getBuffer()); + return MBRef; } -static std::unique_ptr<InputFile> createFile(MemoryBufferRef MB) { +void LinkerDriver::addBuffer(std::unique_ptr<MemoryBuffer> MB) { + MemoryBufferRef MBRef = takeBuffer(std::move(MB)); + // File type is detected by contents, not by file extension. - file_magic Magic = identify_magic(MB.getBuffer()); + file_magic Magic = identify_magic(MBRef.getBuffer()); + if (Magic == file_magic::windows_resource) { + Resources.push_back(MBRef); + return; + } + + FilePaths.push_back(MBRef.getBufferIdentifier()); if (Magic == file_magic::archive) - return std::unique_ptr<InputFile>(new ArchiveFile(MB)); + return Symtab.addFile(make<ArchiveFile>(MBRef)); if (Magic == file_magic::bitcode) - return std::unique_ptr<InputFile>(new BitcodeFile(MB)); + return Symtab.addFile(make<BitcodeFile>(MBRef)); + if (Magic == file_magic::coff_cl_gl_object) + fatal(MBRef.getBufferIdentifier() + ": is not a native COFF file. " + "Recompile without /GL"); + Symtab.addFile(make<ObjectFile>(MBRef)); +} + +void LinkerDriver::enqueuePath(StringRef Path) { + auto Future = + std::make_shared<std::future<MBErrPair>>(createFutureForFile(Path)); + std::string PathStr = Path; + enqueueTask([=]() { + auto MBOrErr = Future->get(); + if (MBOrErr.second) + fatal(MBOrErr.second, "could not open " + PathStr); + Driver->addBuffer(std::move(MBOrErr.first)); + }); + if (Config->OutputFile == "") - Config->OutputFile = getOutputPath(MB.getBufferIdentifier()); - return std::unique_ptr<InputFile>(new ObjectFile(MB)); + Config->OutputFile = getOutputPath(Path); +} + +void LinkerDriver::addArchiveBuffer(MemoryBufferRef MB, StringRef SymName, + StringRef ParentName) { + file_magic Magic = identify_magic(MB.getBuffer()); + if (Magic == file_magic::coff_import_library) { + Symtab.addFile(make<ImportFile>(MB)); + return; + } + + InputFile *Obj; + if (Magic == file_magic::coff_object) + Obj = make<ObjectFile>(MB); + else if (Magic == file_magic::bitcode) + Obj = make<BitcodeFile>(MB); + else + fatal("unknown file type: " + MB.getBufferIdentifier()); + + Obj->ParentName = ParentName; + Symtab.addFile(Obj); + if (Config->Verbose) + outs() << "Loaded " << toString(Obj) << " for " << SymName << "\n"; +} + +void LinkerDriver::enqueueArchiveMember(const Archive::Child &C, + StringRef SymName, + StringRef ParentName) { + if (!C.getParent()->isThin()) { + MemoryBufferRef MB = check( + C.getMemoryBufferRef(), + "could not get the buffer for the member defining symbol " + SymName); + enqueueTask([=]() { Driver->addArchiveBuffer(MB, SymName, ParentName); }); + return; + } + + auto Future = std::make_shared<std::future<MBErrPair>>(createFutureForFile( + check(C.getFullName(), + "could not get the filename for the member defining symbol " + + SymName))); + enqueueTask([=]() { + auto MBOrErr = Future->get(); + if (MBOrErr.second) + fatal(MBOrErr.second, + "could not get the buffer for the member defining " + SymName); + Driver->addArchiveBuffer(takeBuffer(std::move(MBOrErr.first)), SymName, + ParentName); + }); } static bool isDecorated(StringRef Sym) { @@ -87,7 +195,7 @@ static bool isDecorated(StringRef Sym) { // Parses .drectve section contents and returns a list of files // specified by /defaultlib. void LinkerDriver::parseDirectives(StringRef S) { - llvm::opt::InputArgList Args = Parser.parse(S); + opt::InputArgList Args = Parser.parse(S); for (auto *Arg : Args) { switch (Arg->getOption().getID()) { @@ -95,10 +203,8 @@ void LinkerDriver::parseDirectives(StringRef S) { parseAlternateName(Arg->getValue()); break; case OPT_defaultlib: - if (Optional<StringRef> Path = findLib(Arg->getValue())) { - MemoryBufferRef MB = openFile(*Path); - Symtab.addFile(createFile(MB)); - } + if (Optional<StringRef> Path = findLib(Arg->getValue())) + enqueuePath(*Path); break; case OPT_export: { Export E = parseExport(Arg->getValue()); @@ -135,19 +241,19 @@ void LinkerDriver::parseDirectives(StringRef S) { // Find file from search paths. You can omit ".obj", this function takes // care of that. Note that the returned path is not guaranteed to exist. StringRef LinkerDriver::doFindFile(StringRef Filename) { - bool hasPathSep = (Filename.find_first_of("/\\") != StringRef::npos); - if (hasPathSep) + bool HasPathSep = (Filename.find_first_of("/\\") != StringRef::npos); + if (HasPathSep) return Filename; - bool hasExt = (Filename.find('.') != StringRef::npos); + bool HasExt = (Filename.find('.') != StringRef::npos); for (StringRef Dir : SearchPaths) { SmallString<128> Path = Dir; - llvm::sys::path::append(Path, Filename); - if (llvm::sys::fs::exists(Path.str())) - return Alloc.save(Path.str()); - if (!hasExt) { + sys::path::append(Path, Filename); + if (sys::fs::exists(Path.str())) + return Saver.save(Path.str()); + if (!HasExt) { Path.append(".obj"); - if (llvm::sys::fs::exists(Path.str())) - return Alloc.save(Path.str()); + if (sys::fs::exists(Path.str())) + return Saver.save(Path.str()); } } return Filename; @@ -166,9 +272,9 @@ Optional<StringRef> LinkerDriver::findFile(StringRef Filename) { // Find library file from search path. StringRef LinkerDriver::doFindLib(StringRef Filename) { // Add ".lib" to Filename if that has no file extension. - bool hasExt = (Filename.find('.') != StringRef::npos); - if (!hasExt) - Filename = Alloc.save(Filename + ".lib"); + bool HasExt = (Filename.find('.') != StringRef::npos); + if (!HasExt) + Filename = Saver.save(Filename + ".lib"); return doFindFile(Filename); } @@ -178,11 +284,12 @@ StringRef LinkerDriver::doFindLib(StringRef Filename) { Optional<StringRef> LinkerDriver::findLib(StringRef Filename) { if (Config->NoDefaultLibAll) return None; + if (!VisitedLibs.insert(Filename.lower()).second) + return None; StringRef Path = doFindLib(Filename); if (Config->NoDefaultLibs.count(Path)) return None; - bool Seen = !VisitedFiles.insert(Path.lower()).second; - if (Seen) + if (!VisitedFiles.insert(Path.lower()).second) return None; return Path; } @@ -192,7 +299,7 @@ void LinkerDriver::addLibSearchPaths() { Optional<std::string> EnvOpt = Process::GetEnv("LIB"); if (!EnvOpt.hasValue()) return; - StringRef Env = Alloc.save(*EnvOpt); + StringRef Env = Saver.save(*EnvOpt); while (!Env.empty()) { StringRef Path; std::tie(Path, Env) = Env.split(';'); @@ -200,17 +307,17 @@ void LinkerDriver::addLibSearchPaths() { } } -Undefined *LinkerDriver::addUndefined(StringRef Name) { - Undefined *U = Symtab.addUndefined(Name); - Config->GCRoot.insert(U); - return U; +SymbolBody *LinkerDriver::addUndefined(StringRef Name) { + SymbolBody *B = Symtab.addUndefined(Name); + Config->GCRoot.insert(B); + return B; } // Symbol names are mangled by appending "_" prefix on x86. StringRef LinkerDriver::mangle(StringRef Sym) { assert(Config->Machine != IMAGE_FILE_MACHINE_UNKNOWN); if (Config->Machine == I386) - return Alloc.save("_" + Sym); + return Saver.save("_" + Sym); return Sym; } @@ -225,7 +332,7 @@ StringRef LinkerDriver::findDefaultEntry() { }; for (auto E : Entries) { StringRef Entry = Symtab.findMangle(mangle(E[0])); - if (!Entry.empty() && !isa<Undefined>(Symtab.find(Entry)->Body)) + if (!Entry.empty() && !isa<Undefined>(Symtab.find(Entry)->body())) return mangle(E[1]); } return ""; @@ -247,7 +354,83 @@ static uint64_t getDefaultImageBase() { return Config->DLL ? 0x10000000 : 0x400000; } -void LinkerDriver::link(llvm::ArrayRef<const char *> ArgsArr) { +static std::string createResponseFile(const opt::InputArgList &Args, + ArrayRef<StringRef> FilePaths, + ArrayRef<StringRef> SearchPaths) { + SmallString<0> Data; + raw_svector_ostream OS(Data); + + for (auto *Arg : Args) { + switch (Arg->getOption().getID()) { + case OPT_linkrepro: + case OPT_INPUT: + case OPT_defaultlib: + case OPT_libpath: + break; + default: + OS << stringize(Arg) << "\n"; + } + } + + for (StringRef Path : SearchPaths) { + std::string RelPath = relativeToRoot(Path); + OS << "/libpath:" << quote(RelPath) << "\n"; + } + + for (StringRef Path : FilePaths) + OS << quote(relativeToRoot(Path)) << "\n"; + + return Data.str(); +} + +static unsigned getDefaultDebugType(const opt::InputArgList &Args) { + unsigned DebugTypes = static_cast<unsigned>(DebugType::CV); + if (Args.hasArg(OPT_driver)) + DebugTypes |= static_cast<unsigned>(DebugType::PData); + if (Args.hasArg(OPT_profile)) + DebugTypes |= static_cast<unsigned>(DebugType::Fixup); + return DebugTypes; +} + +static unsigned parseDebugType(StringRef Arg) { + SmallVector<StringRef, 3> Types; + Arg.split(Types, ',', /*KeepEmpty=*/false); + + unsigned DebugTypes = static_cast<unsigned>(DebugType::None); + for (StringRef Type : Types) + DebugTypes |= StringSwitch<unsigned>(Type.lower()) + .Case("cv", static_cast<unsigned>(DebugType::CV)) + .Case("pdata", static_cast<unsigned>(DebugType::PData)) + .Case("fixup", static_cast<unsigned>(DebugType::Fixup)); + return DebugTypes; +} + +static std::string getMapFile(const opt::InputArgList &Args) { + auto *Arg = Args.getLastArg(OPT_lldmap, OPT_lldmap_file); + if (!Arg) + return ""; + if (Arg->getOption().getID() == OPT_lldmap_file) + return Arg->getValue(); + + assert(Arg->getOption().getID() == OPT_lldmap); + StringRef OutFile = Config->OutputFile; + return (OutFile.substr(0, OutFile.rfind('.')) + ".map").str(); +} + +void LinkerDriver::enqueueTask(std::function<void()> Task) { + TaskQueue.push_back(std::move(Task)); +} + +bool LinkerDriver::run() { + bool DidWork = !TaskQueue.empty(); + while (!TaskQueue.empty()) { + TaskQueue.front()(); + TaskQueue.pop_front(); + } + return DidWork; +} + +void LinkerDriver::link(ArrayRef<const char *> ArgsArr) { // If the first command line argument is "/lib", link.exe acts like lib.exe. // We call our own implementation of lib.exe that understands bitcode files. if (ArgsArr.size() > 1 && StringRef(ArgsArr[1]).equals_lower("/lib")) { @@ -257,15 +440,15 @@ void LinkerDriver::link(llvm::ArrayRef<const char *> ArgsArr) { } // Needed for LTO. - llvm::InitializeAllTargetInfos(); - llvm::InitializeAllTargets(); - llvm::InitializeAllTargetMCs(); - llvm::InitializeAllAsmParsers(); - llvm::InitializeAllAsmPrinters(); - llvm::InitializeAllDisassemblers(); + InitializeAllTargetInfos(); + InitializeAllTargets(); + InitializeAllTargetMCs(); + InitializeAllAsmParsers(); + InitializeAllAsmPrinters(); + InitializeAllDisassemblers(); // Parse command line options. - llvm::opt::InputArgList Args = Parser.parseLINK(ArgsArr.slice(1)); + opt::InputArgList Args = Parser.parseLINK(ArgsArr.slice(1)); // Handle /help if (Args.hasArg(OPT_help)) { @@ -273,6 +456,17 @@ void LinkerDriver::link(llvm::ArrayRef<const char *> ArgsArr) { return; } + if (auto *Arg = Args.getLastArg(OPT_linkrepro)) { + SmallString<64> Path = StringRef(Arg->getValue()); + sys::path::append(Path, "repro"); + ErrorOr<CpioFile *> F = CpioFile::create(Path); + if (F) + Cpio.reset(*F); + else + errs() << "/linkrepro: failed to open " << Path + << ".cpio: " << F.getError().message() << '\n'; + } + if (Args.filtered_begin(OPT_INPUT) == Args.filtered_end()) fatal("no input files"); @@ -295,8 +489,17 @@ void LinkerDriver::link(llvm::ArrayRef<const char *> ArgsArr) { Config->Force = true; // Handle /debug - if (Args.hasArg(OPT_debug)) + if (Args.hasArg(OPT_debug)) { Config->Debug = true; + Config->DebugTypes = + Args.hasArg(OPT_debugtype) + ? parseDebugType(Args.getLastArg(OPT_debugtype)->getValue()) + : getDefaultDebugType(Args); + } + + // Create a dummy PDB file to satisfy build sytem rules. + if (auto *Arg = Args.getLastArg(OPT_pdb)) + Config->PDBPath = Arg->getValue(); // Handle /noentry if (Args.hasArg(OPT_noentry)) { @@ -447,72 +650,43 @@ void LinkerDriver::link(llvm::ArrayRef<const char *> ArgsArr) { Config->TerminalServerAware = false; if (Args.hasArg(OPT_nosymtab)) Config->WriteSymtab = false; + Config->DumpPdb = Args.hasArg(OPT_dumppdb); + Config->DebugPdb = Args.hasArg(OPT_debugpdb); // Create a list of input files. Files can be given as arguments // for /defaultlib option. - std::vector<StringRef> Paths; std::vector<MemoryBufferRef> MBs; for (auto *Arg : Args.filtered(OPT_INPUT)) if (Optional<StringRef> Path = findFile(Arg->getValue())) - Paths.push_back(*Path); + enqueuePath(*Path); for (auto *Arg : Args.filtered(OPT_defaultlib)) if (Optional<StringRef> Path = findLib(Arg->getValue())) - Paths.push_back(*Path); - for (StringRef Path : Paths) - MBs.push_back(openFile(Path)); + enqueuePath(*Path); // Windows specific -- Create a resource file containing a manifest file. - if (Config->Manifest == Configuration::Embed) { - std::unique_ptr<MemoryBuffer> MB = createManifestRes(); - MBs.push_back(MB->getMemBufferRef()); - OwningMBs.push_back(std::move(MB)); // take ownership - } + if (Config->Manifest == Configuration::Embed) + addBuffer(createManifestRes()); - // Windows specific -- Input files can be Windows resource files (.res files). - // We invoke cvtres.exe to convert resource files to a regular COFF file - // then link the result file normally. - std::vector<MemoryBufferRef> Resources; - auto NotResource = [](MemoryBufferRef MB) { - return identify_magic(MB.getBuffer()) != file_magic::windows_resource; - }; - auto It = std::stable_partition(MBs.begin(), MBs.end(), NotResource); - if (It != MBs.end()) { - Resources.insert(Resources.end(), It, MBs.end()); - MBs.erase(It, MBs.end()); - } + // Read all input files given via the command line. + run(); - // Read all input files given via the command line. Note that step() - // doesn't read files that are specified by directive sections. - for (MemoryBufferRef MB : MBs) - Symtab.addFile(createFile(MB)); - Symtab.step(); - - // Determine machine type and check if all object files are - // for the same CPU type. Note that this needs to be done before - // any call to mangle(). - for (std::unique_ptr<InputFile> &File : Symtab.getFiles()) { - MachineTypes MT = File->getMachineType(); - if (MT == IMAGE_FILE_MACHINE_UNKNOWN) - continue; - if (Config->Machine == IMAGE_FILE_MACHINE_UNKNOWN) { - Config->Machine = MT; - continue; - } - if (Config->Machine != MT) - fatal(File->getShortName() + ": machine type " + machineToStr(MT) + - " conflicts with " + machineToStr(Config->Machine)); - } + // We should have inferred a machine type by now from the input files, but if + // not we assume x64. if (Config->Machine == IMAGE_FILE_MACHINE_UNKNOWN) { - llvm::errs() << "warning: /machine is not specified. x64 is assumed.\n"; + errs() << "warning: /machine is not specified. x64 is assumed.\n"; Config->Machine = AMD64; } - // Windows specific -- Convert Windows resource files to a COFF file. - if (!Resources.empty()) { - std::unique_ptr<MemoryBuffer> MB = convertResToCOFF(Resources); - Symtab.addFile(createFile(MB->getMemBufferRef())); - OwningMBs.push_back(std::move(MB)); // take ownership - } + // Windows specific -- Input files can be Windows resource files (.res files). + // We invoke cvtres.exe to convert resource files to a regular COFF file + // then link the result file normally. + if (!Resources.empty()) + addBuffer(convertResToCOFF(Resources)); + + if (Cpio) + Cpio->append("response.txt", + createResponseFile(Args, FilePaths, + ArrayRef<StringRef>(SearchPaths).slice(1))); // Handle /largeaddressaware if (Config->is64() || Args.hasArg(OPT_largeaddressaware)) @@ -537,7 +711,7 @@ void LinkerDriver::link(llvm::ArrayRef<const char *> ArgsArr) { fatal("entry point must be defined"); Config->Entry = addUndefined(S); if (Config->Verbose) - llvm::outs() << "Entry name inferred: " << S << "\n"; + outs() << "Entry name inferred: " << S << "\n"; } // Handle /export @@ -545,18 +719,19 @@ void LinkerDriver::link(llvm::ArrayRef<const char *> ArgsArr) { Export E = parseExport(Arg->getValue()); if (Config->Machine == I386) { if (!isDecorated(E.Name)) - E.Name = Alloc.save("_" + E.Name); + E.Name = Saver.save("_" + E.Name); if (!E.ExtName.empty() && !isDecorated(E.ExtName)) - E.ExtName = Alloc.save("_" + E.ExtName); + E.ExtName = Saver.save("_" + E.ExtName); } Config->Exports.push_back(E); } // Handle /def if (auto *Arg = Args.getLastArg(OPT_deffile)) { - MemoryBufferRef MB = openFile(Arg->getValue()); // parseModuleDefs mutates Config object. - parseModuleDefs(MB, &Alloc); + parseModuleDefs( + takeBuffer(check(MemoryBuffer::getFile(Arg->getValue()), + Twine("could not open ") + Arg->getValue()))); } // Handle /delayload @@ -585,14 +760,10 @@ void LinkerDriver::link(llvm::ArrayRef<const char *> ArgsArr) { Symtab.addAbsolute(mangle("__guard_fids_count"), 0); Symtab.addAbsolute(mangle("__guard_flags"), 0x100); - // Read as much files as we can from directives sections. - Symtab.run(); - - // Resolve auxiliary symbols until we get a convergence. - // (Trying to resolve a symbol may trigger a Lazy symbol to load a new file. - // A new file may contain a directive section to add new command line options. - // That's why we have to repeat until converge.) - for (;;) { + // This code may add new undefined symbols to the link, which may enqueue more + // symbol resolution tasks, so we need to continue executing tasks until we + // converge. + do { // Windows specific -- if entry point is not found, // search for its mangled names. if (Config->Entry) @@ -615,7 +786,7 @@ void LinkerDriver::link(llvm::ArrayRef<const char *> ArgsArr) { Symbol *Sym = Symtab.find(From); if (!Sym) continue; - if (auto *U = dyn_cast<Undefined>(Sym->Body)) + if (auto *U = dyn_cast<Undefined>(Sym->body())) if (!U->WeakAlias) U->WeakAlias = Symtab.addUndefined(To); } @@ -623,18 +794,15 @@ void LinkerDriver::link(llvm::ArrayRef<const char *> ArgsArr) { // Windows specific -- if __load_config_used can be resolved, resolve it. if (Symtab.findUnderscore("_load_config_used")) addUndefined(mangle("_load_config_used")); - - if (Symtab.queueEmpty()) - break; - Symtab.run(); - } + } while (run()); // Do LTO by compiling bitcode input files to a set of native COFF files then // link those files. Symtab.addCombinedLTOObjects(); + run(); // Make sure we have resolved all symbols. - Symtab.reportRemainingUndefines(/*Resolve=*/true); + Symtab.reportRemainingUndefines(); // Windows specific -- if no /subsystem is given, we need to infer // that from entry point name. @@ -662,10 +830,6 @@ void LinkerDriver::link(llvm::ArrayRef<const char *> ArgsArr) { if (Config->Manifest == Configuration::SideBySide) createSideBySideManifest(); - // Create a dummy PDB file to satisfy build sytem rules. - if (auto *Arg = Args.getLastArg(OPT_pdb)) - createPDB(Arg->getValue()); - // Identify unreferenced COMDAT sections. if (Config->DoGC) markLive(Symtab.getChunks()); @@ -679,13 +843,15 @@ void LinkerDriver::link(llvm::ArrayRef<const char *> ArgsArr) { // Create a symbol map file containing symbol VAs and their names // to help debugging. - if (auto *Arg = Args.getLastArg(OPT_lldmap)) { + std::string MapFile = getMapFile(Args); + if (!MapFile.empty()) { std::error_code EC; - llvm::raw_fd_ostream Out(Arg->getValue(), EC, OpenFlags::F_Text); + raw_fd_ostream Out(MapFile, EC, OpenFlags::F_Text); if (EC) - fatal(EC, "could not create the symbol map"); + fatal(EC, "could not create the symbol map " + MapFile); Symtab.printMap(Out); } + // Call exit to avoid calling destructors. exit(0); } diff --git a/COFF/Driver.h b/COFF/Driver.h index 23969ee802fb..e8114640edec 100644 --- a/COFF/Driver.h +++ b/COFF/Driver.h @@ -13,12 +13,13 @@ #include "Config.h" #include "SymbolTable.h" #include "lld/Core/LLVM.h" +#include "lld/Core/Reproduce.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/StringRef.h" +#include "llvm/Object/Archive.h" #include "llvm/Object/COFF.h" #include "llvm/Option/Arg.h" #include "llvm/Option/ArgList.h" -#include "llvm/Support/StringSaver.h" #include <memory> #include <set> #include <vector> @@ -42,7 +43,6 @@ void doICF(const std::vector<Chunk *> &Chunks); class ArgParser { public: - ArgParser() : Alloc(AllocAux) {} // Parses command line options. llvm::opt::InputArgList parse(llvm::ArrayRef<const char *> Args); @@ -56,25 +56,26 @@ private: std::vector<const char *> tokenize(StringRef S); std::vector<const char *> replaceResponseFiles(std::vector<const char *>); - - llvm::BumpPtrAllocator AllocAux; - llvm::StringSaver Alloc; }; class LinkerDriver { public: - LinkerDriver() : Alloc(AllocAux) {} + LinkerDriver() { coff::Symtab = &Symtab; } void link(llvm::ArrayRef<const char *> Args); // Used by the resolver to parse .drectve section contents. void parseDirectives(StringRef S); + // Used by ArchiveFile to enqueue members. + void enqueueArchiveMember(const Archive::Child &C, StringRef SymName, + StringRef ParentName); + private: - llvm::BumpPtrAllocator AllocAux; - llvm::StringSaver Alloc; ArgParser Parser; SymbolTable Symtab; + std::unique_ptr<CpioFile> Cpio; // for /linkrepro + // Opens a file. Path has to be resolved already. MemoryBufferRef openFile(StringRef Path); @@ -90,8 +91,9 @@ private: // Library search path. The first element is always "" (current directory). std::vector<StringRef> SearchPaths; std::set<std::string> VisitedFiles; + std::set<std::string> VisitedLibs; - Undefined *addUndefined(StringRef Sym); + SymbolBody *addUndefined(StringRef Sym); StringRef mangle(StringRef Sym); // Windows specific -- "main" is not the only main function in Windows. @@ -104,12 +106,26 @@ private: StringRef findDefaultEntry(); WindowsSubsystem inferSubsystem(); + MemoryBufferRef takeBuffer(std::unique_ptr<MemoryBuffer> MB); + void addBuffer(std::unique_ptr<MemoryBuffer> MB); + void addArchiveBuffer(MemoryBufferRef MBRef, StringRef SymName, + StringRef ParentName); + + void enqueuePath(StringRef Path); + + void enqueueTask(std::function<void()> Task); + bool run(); + // Driver is the owner of all opened files. // InputFiles have MemoryBufferRefs to them. std::vector<std::unique_ptr<MemoryBuffer>> OwningMBs; + + std::list<std::function<void()>> TaskQueue; + std::vector<StringRef> FilePaths; + std::vector<MemoryBufferRef> Resources; }; -void parseModuleDefs(MemoryBufferRef MB, llvm::StringSaver *Alloc); +void parseModuleDefs(MemoryBufferRef MB); void writeImportLibrary(); // Functions below this line are defined in DriverUtils.cpp. @@ -161,8 +177,6 @@ void checkFailIfMismatch(StringRef Arg); std::unique_ptr<MemoryBuffer> convertResToCOFF(const std::vector<MemoryBufferRef> &MBs); -void createPDB(StringRef Path); - // Create enum with OPT_xxx values for each option in Options.td enum { OPT_INVALID = 0, diff --git a/COFF/DriverUtils.cpp b/COFF/DriverUtils.cpp index 5d7dc2bc65af..14dd004f1c04 100644 --- a/COFF/DriverUtils.cpp +++ b/COFF/DriverUtils.cpp @@ -16,6 +16,7 @@ #include "Config.h" #include "Driver.h" #include "Error.h" +#include "Memory.h" #include "Symbols.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/StringSwitch.h" @@ -43,29 +44,29 @@ namespace { class Executor { public: explicit Executor(StringRef S) : Saver(Alloc), Prog(Saver.save(S)) {} - void add(StringRef S) { Args.push_back(Saver.save(S)); } - void add(std::string &S) { Args.push_back(Saver.save(S)); } - void add(Twine S) { Args.push_back(Saver.save(S)); } - void add(const char *S) { Args.push_back(Saver.save(S)); } + void add(StringRef S) { Args.push_back(Saver.save(S).data()); } + void add(std::string &S) { Args.push_back(Saver.save(S).data()); } + void add(Twine S) { Args.push_back(Saver.save(S).data()); } + void add(const char *S) { Args.push_back(Saver.save(S).data()); } void run() { - ErrorOr<std::string> ExeOrErr = llvm::sys::findProgramByName(Prog); + ErrorOr<std::string> ExeOrErr = sys::findProgramByName(Prog); if (auto EC = ExeOrErr.getError()) fatal(EC, "unable to find " + Prog + " in PATH: "); - const char *Exe = Saver.save(*ExeOrErr); + const char *Exe = Saver.save(*ExeOrErr).data(); Args.insert(Args.begin(), Exe); Args.push_back(nullptr); - if (llvm::sys::ExecuteAndWait(Args[0], Args.data()) != 0) { + if (sys::ExecuteAndWait(Args[0], Args.data()) != 0) { for (const char *S : Args) if (S) - llvm::errs() << S << " "; + errs() << S << " "; fatal("ExecuteAndWait failed"); } } private: - llvm::BumpPtrAllocator Alloc; - llvm::StringSaver Saver; + BumpPtrAllocator Alloc; + StringSaver Saver; StringRef Prog; std::vector<const char *> Args; }; @@ -75,10 +76,8 @@ private: // Returns /machine's value. MachineTypes getMachineType(StringRef S) { MachineTypes MT = StringSwitch<MachineTypes>(S.lower()) - .Case("x64", AMD64) - .Case("amd64", AMD64) - .Case("x86", I386) - .Case("i386", I386) + .Cases("x64", "amd64", AMD64) + .Cases("x86", "i386", I386) .Case("arm", ARMNT) .Default(IMAGE_FILE_MACHINE_UNKNOWN); if (MT != IMAGE_FILE_MACHINE_UNKNOWN) @@ -168,8 +167,8 @@ void parseMerge(StringRef S) { if (!Inserted) { StringRef Existing = Pair.first->second; if (Existing != To) - llvm::errs() << "warning: " << S << ": already merged into " - << Existing << "\n"; + errs() << "warning: " << S << ": already merged into " << Existing + << "\n"; } } @@ -279,18 +278,54 @@ static void quoteAndPrint(raw_ostream &Out, StringRef S) { } } +// An RAII temporary file class that automatically removes a temporary file. +namespace { +class TemporaryFile { +public: + TemporaryFile(StringRef Prefix, StringRef Extn) { + SmallString<128> S; + if (auto EC = sys::fs::createTemporaryFile("lld-" + Prefix, Extn, S)) + fatal(EC, "cannot create a temporary file"); + Path = S.str(); + } + + TemporaryFile(TemporaryFile &&Obj) { + std::swap(Path, Obj.Path); + } + + ~TemporaryFile() { + if (Path.empty()) + return; + if (sys::fs::remove(Path)) + fatal("failed to remove " + Path); + } + + // Returns a memory buffer of this temporary file. + // Note that this function does not leave the file open, + // so it is safe to remove the file immediately after this function + // is called (you cannot remove an opened file on Windows.) + std::unique_ptr<MemoryBuffer> getMemoryBuffer() { + // IsVolatileSize=true forces MemoryBuffer to not use mmap(). + return check(MemoryBuffer::getFile(Path, /*FileSize=*/-1, + /*RequiresNullTerminator=*/false, + /*IsVolatileSize=*/true), + "could not open " + Path); + } + + std::string Path; +}; +} + // Create the default manifest file as a temporary file. -static std::string createDefaultXml() { +TemporaryFile createDefaultXml() { // Create a temporary file. - SmallString<128> Path; - if (auto EC = sys::fs::createTemporaryFile("tmp", "manifest", Path)) - fatal(EC, "cannot create a temporary file"); + TemporaryFile File("defaultxml", "manifest"); // Open the temporary file for writing. std::error_code EC; - llvm::raw_fd_ostream OS(Path, EC, sys::fs::F_Text); + raw_fd_ostream OS(File.Path, EC, sys::fs::F_Text); if (EC) - fatal(EC, "failed to open " + Path); + fatal(EC, "failed to open " + File.Path); // Emit the XML. Note that we do *not* verify that the XML attributes are // syntactically correct. This is intentional for link.exe compatibility. @@ -316,56 +351,48 @@ static std::string createDefaultXml() { } OS << "</assembly>\n"; OS.close(); - return StringRef(Path); + return File; } static std::string readFile(StringRef Path) { std::unique_ptr<MemoryBuffer> MB = check(MemoryBuffer::getFile(Path), "could not open " + Path); - std::unique_ptr<MemoryBuffer> Buf(std::move(MB)); - return Buf->getBuffer(); + return MB->getBuffer(); } static std::string createManifestXml() { // Create the default manifest file. - std::string Path1 = createDefaultXml(); + TemporaryFile File1 = createDefaultXml(); if (Config->ManifestInput.empty()) - return readFile(Path1); + return readFile(File1.Path); // If manifest files are supplied by the user using /MANIFESTINPUT // option, we need to merge them with the default manifest. - SmallString<128> Path2; - if (auto EC = sys::fs::createTemporaryFile("tmp", "manifest", Path2)) - fatal(EC, "cannot create a temporary file"); - FileRemover Remover1(Path1); - FileRemover Remover2(Path2); + TemporaryFile File2("user", "manifest"); Executor E("mt.exe"); E.add("/manifest"); - E.add(Path1); + E.add(File1.Path); for (StringRef Filename : Config->ManifestInput) { E.add("/manifest"); E.add(Filename); } E.add("/nologo"); - E.add("/out:" + StringRef(Path2)); + E.add("/out:" + StringRef(File2.Path)); E.run(); - return readFile(Path2); + return readFile(File2.Path); } // Create a resource file containing a manifest XML. std::unique_ptr<MemoryBuffer> createManifestRes() { // Create a temporary file for the resource script file. - SmallString<128> RCPath; - if (auto EC = sys::fs::createTemporaryFile("tmp", "rc", RCPath)) - fatal(EC, "cannot create a temporary file"); - FileRemover RCRemover(RCPath); + TemporaryFile RCFile("manifest", "rc"); // Open the temporary file for writing. std::error_code EC; - llvm::raw_fd_ostream Out(RCPath, EC, sys::fs::F_Text); + raw_fd_ostream Out(RCFile.Path, EC, sys::fs::F_Text); if (EC) - fatal(EC, "failed to open " + RCPath); + fatal(EC, "failed to open " + RCFile.Path); // Write resource script to the RC file. Out << "#define LANG_ENGLISH 9\n" @@ -379,17 +406,15 @@ std::unique_ptr<MemoryBuffer> createManifestRes() { Out.close(); // Create output resource file. - SmallString<128> ResPath; - if (auto EC = sys::fs::createTemporaryFile("tmp", "res", ResPath)) - fatal(EC, "cannot create a temporary file"); + TemporaryFile ResFile("output-resource", "res"); Executor E("rc.exe"); E.add("/fo"); - E.add(ResPath.str()); + E.add(ResFile.Path); E.add("/nologo"); - E.add(RCPath.str()); + E.add(RCFile.Path); E.run(); - return check(MemoryBuffer::getFile(ResPath), "could not open " + ResPath); + return ResFile.getMemoryBuffer(); } void createSideBySideManifest() { @@ -397,7 +422,7 @@ void createSideBySideManifest() { if (Path == "") Path = Config->OutputFile + ".manifest"; std::error_code EC; - llvm::raw_fd_ostream Out(Path, EC, llvm::sys::fs::F_Text); + raw_fd_ostream Out(Path, EC, sys::fs::F_Text); if (EC) fatal(EC, "failed to create manifest"); Out << createManifestXml(); @@ -485,12 +510,14 @@ void fixupExports() { } for (Export &E : Config->Exports) { + SymbolBody *Sym = E.Sym; if (!E.ForwardTo.empty()) { E.SymbolName = E.Name; - } else if (Undefined *U = cast_or_null<Undefined>(E.Sym->WeakAlias)) { - E.SymbolName = U->getName(); } else { - E.SymbolName = E.Sym->getName(); + if (auto *U = dyn_cast<Undefined>(Sym)) + if (U->WeakAlias) + Sym = U->WeakAlias; + E.SymbolName = Sym->getName(); } } @@ -515,7 +542,7 @@ void fixupExports() { Export *Existing = Pair.first->second; if (E == *Existing || E.Name != Existing->Name) continue; - llvm::errs() << "warning: duplicate /export option: " << E.Name << "\n"; + errs() << "warning: duplicate /export option: " << E.Name << "\n"; } Config->Exports = std::move(V); @@ -555,20 +582,39 @@ void checkFailIfMismatch(StringRef Arg) { std::unique_ptr<MemoryBuffer> convertResToCOFF(const std::vector<MemoryBufferRef> &MBs) { // Create an output file path. - SmallString<128> Path; - if (auto EC = llvm::sys::fs::createTemporaryFile("resource", "obj", Path)) - fatal(EC, "could not create temporary file"); + TemporaryFile File("resource-file", "obj"); // Execute cvtres.exe. Executor E("cvtres.exe"); E.add("/machine:" + machineToStr(Config->Machine)); E.add("/readonly"); E.add("/nologo"); - E.add("/out:" + Path); - for (MemoryBufferRef MB : MBs) - E.add(MB.getBufferIdentifier()); + E.add("/out:" + Twine(File.Path)); + + // We must create new files because the memory buffers we have may have no + // underlying file still existing on the disk. + // It happens if it was created from a TemporaryFile, which usually delete + // the file just after creating the MemoryBuffer. + std::vector<TemporaryFile> ResFiles; + ResFiles.reserve(MBs.size()); + for (MemoryBufferRef MB : MBs) { + // We store the temporary file in a vector to avoid deletion + // before running cvtres + ResFiles.emplace_back("resource-file", "res"); + TemporaryFile& ResFile = ResFiles.back(); + // Write the content of the resource in a temporary file + std::error_code EC; + raw_fd_ostream OS(ResFile.Path, EC, sys::fs::F_None); + if (EC) + fatal(EC, "failed to open " + ResFile.Path); + OS << MB.getBuffer(); + OS.close(); + + E.add(ResFile.Path); + } + E.run(); - return check(MemoryBuffer::getFile(Path), "could not open " + Path); + return File.getMemoryBuffer(); } // Create OptTable @@ -595,7 +641,7 @@ public: }; // Parses a given list of options. -llvm::opt::InputArgList ArgParser::parse(ArrayRef<const char *> ArgsArr) { +opt::InputArgList ArgParser::parse(ArrayRef<const char *> ArgsArr) { // First, replace respnose files (@<file>-style options). std::vector<const char *> Argv = replaceResponseFiles(ArgsArr); @@ -603,28 +649,28 @@ llvm::opt::InputArgList ArgParser::parse(ArrayRef<const char *> ArgsArr) { COFFOptTable Table; unsigned MissingIndex; unsigned MissingCount; - llvm::opt::InputArgList Args = - Table.ParseArgs(Argv, MissingIndex, MissingCount); + opt::InputArgList Args = Table.ParseArgs(Argv, MissingIndex, MissingCount); // Print the real command line if response files are expanded. if (Args.hasArg(OPT_verbose) && ArgsArr.size() != Argv.size()) { - llvm::outs() << "Command line:"; + outs() << "Command line:"; for (const char *S : Argv) - llvm::outs() << " " << S; - llvm::outs() << "\n"; + outs() << " " << S; + outs() << "\n"; } if (MissingCount) - fatal("missing arg value for \"" + Twine(Args.getArgString(MissingIndex)) + - "\", expected " + Twine(MissingCount) + - (MissingCount == 1 ? " argument." : " arguments.")); + fatal(Twine(Args.getArgString(MissingIndex)) + ": missing argument"); for (auto *Arg : Args.filtered(OPT_UNKNOWN)) - llvm::errs() << "ignoring unknown argument: " << Arg->getSpelling() << "\n"; + errs() << "ignoring unknown argument: " << Arg->getSpelling() << "\n"; return Args; } -llvm::opt::InputArgList ArgParser::parseLINK(ArrayRef<const char *> Args) { - // Concatenate LINK env and given arguments and parse them. +// link.exe has an interesting feature. If LINK environment exists, +// its contents are handled as a command line string. So you can pass +// extra arguments using the environment variable. +opt::InputArgList ArgParser::parseLINK(ArrayRef<const char *> Args) { + // Concatenate LINK env and command line arguments, and then parse them. Optional<std::string> Env = Process::GetEnv("LINK"); if (!Env) return parse(Args); @@ -635,8 +681,7 @@ llvm::opt::InputArgList ArgParser::parseLINK(ArrayRef<const char *> Args) { std::vector<const char *> ArgParser::tokenize(StringRef S) { SmallVector<const char *, 16> Tokens; - StringSaver Saver(AllocAux); - llvm::cl::TokenizeWindowsCommandLine(S, Saver, Tokens); + cl::TokenizeWindowsCommandLine(S, Saver, Tokens); return std::vector<const char *>(Tokens.begin(), Tokens.end()); } @@ -645,14 +690,13 @@ std::vector<const char *> ArgParser::tokenize(StringRef S) { std::vector<const char *> ArgParser::replaceResponseFiles(std::vector<const char *> Argv) { SmallVector<const char *, 256> Tokens(Argv.data(), Argv.data() + Argv.size()); - StringSaver Saver(AllocAux); ExpandResponseFiles(Saver, TokenizeWindowsCommandLine, Tokens); return std::vector<const char *>(Tokens.begin(), Tokens.end()); } void printHelp(const char *Argv0) { COFFOptTable Table; - Table.PrintHelp(llvm::outs(), Argv0, "LLVM Linker", false); + Table.PrintHelp(outs(), Argv0, "LLVM Linker", false); } } // namespace coff diff --git a/COFF/Error.cpp b/COFF/Error.cpp index 602a8544ce2b..b2bd557413df 100644 --- a/COFF/Error.cpp +++ b/COFF/Error.cpp @@ -11,14 +11,31 @@ #include "llvm/ADT/Twine.h" #include "llvm/Support/Error.h" +#include "llvm/Support/Process.h" #include "llvm/Support/raw_ostream.h" +#if !defined(_MSC_VER) && !defined(__MINGW32__) +#include <unistd.h> +#endif + +using namespace llvm; + namespace lld { namespace coff { void fatal(const Twine &Msg) { - llvm::errs() << Msg << "\n"; - exit(1); + if (sys::Process::StandardErrHasColors()) { + errs().changeColor(raw_ostream::RED, /*bold=*/true); + errs() << "error: "; + errs().resetColor(); + } else { + errs() << "error: "; + } + errs() << Msg << "\n"; + + outs().flush(); + errs().flush(); + _exit(1); } void fatal(std::error_code EC, const Twine &Msg) { diff --git a/COFF/Error.h b/COFF/Error.h index c9f64c662580..47549327db2b 100644 --- a/COFF/Error.h +++ b/COFF/Error.h @@ -32,6 +32,23 @@ template <class T> T check(Expected<T> E, const Twine &Prefix) { return std::move(*E); } +template <class T> T check(ErrorOr<T> EO) { + if (!EO) + fatal(EO.getError().message()); + return std::move(*EO); +} + +template <class T> T check(Expected<T> E) { + if (!E) { + std::string Buf; + llvm::raw_string_ostream OS(Buf); + logAllUnhandledErrors(E.takeError(), OS, ""); + OS.flush(); + fatal(Buf); + } + return std::move(*E); +} + } // namespace coff } // namespace lld diff --git a/COFF/ICF.cpp b/COFF/ICF.cpp index a2c5a90334d0..196fbe2610ea 100644 --- a/COFF/ICF.cpp +++ b/COFF/ICF.cpp @@ -7,43 +7,19 @@ // //===----------------------------------------------------------------------===// // -// Identical COMDAT Folding is a feature to merge COMDAT sections not by -// name (which is regular COMDAT handling) but by contents. If two COMDAT -// sections have the same data, relocations, attributes, etc., then the two -// are considered identical and merged by the linker. This optimization -// makes outputs smaller. +// ICF is short for Identical Code Folding. That is a size optimization to +// identify and merge two or more read-only sections (typically functions) +// that happened to have the same contents. It usually reduces output size +// by a few percent. // -// ICF is theoretically a problem of reducing graphs by merging as many -// identical subgraphs as possible, if we consider sections as vertices and -// relocations as edges. This may be a bit more complicated problem than you -// might think. The order of processing sections matters since merging two -// sections can make other sections, whose relocations now point to the same -// section, mergeable. Graphs may contain cycles, which is common in COFF. -// We need a sophisticated algorithm to do this properly and efficiently. +// On Windows, ICF is enabled by default. // -// What we do in this file is this. We split sections into groups. Sections -// in the same group are considered identical. -// -// First, all sections are grouped by their "constant" values. Constant -// values are values that are never changed by ICF, such as section contents, -// section name, number of relocations, type and offset of each relocation, -// etc. Because we do not care about some relocation targets in this step, -// two sections in the same group may not be identical, but at least two -// sections in different groups can never be identical. -// -// Then, we try to split each group by relocation targets. Relocations are -// considered identical if and only if the relocation targets are in the -// same group. Splitting a group may make more groups to be splittable, -// because two relocations that were previously considered identical might -// now point to different groups. We repeat this step until the convergence -// is obtained. -// -// This algorithm is so-called "optimistic" algorithm described in -// http://research.google.com/pubs/pub36912.html. +// See ELF/ICF.cpp for the details about the algortihm. // //===----------------------------------------------------------------------===// #include "Chunks.h" +#include "Error.h" #include "Symbols.h" #include "lld/Core/Parallel.h" #include "llvm/ADT/Hashing.h" @@ -58,29 +34,34 @@ using namespace llvm; namespace lld { namespace coff { -typedef std::vector<SectionChunk *>::iterator ChunkIterator; -typedef bool (*Comparator)(const SectionChunk *, const SectionChunk *); - class ICF { public: void run(const std::vector<Chunk *> &V); private: - static uint64_t getHash(SectionChunk *C); - static bool equalsConstant(const SectionChunk *A, const SectionChunk *B); - static bool equalsVariable(const SectionChunk *A, const SectionChunk *B); - bool forEachGroup(std::vector<SectionChunk *> &Chunks, Comparator Eq); - bool segregate(ChunkIterator Begin, ChunkIterator End, Comparator Eq); + void segregate(size_t Begin, size_t End, bool Constant); - std::atomic<uint64_t> NextID = { 1 }; -}; + bool equalsConstant(const SectionChunk *A, const SectionChunk *B); + bool equalsVariable(const SectionChunk *A, const SectionChunk *B); -// Entry point to ICF. -void doICF(const std::vector<Chunk *> &Chunks) { - ICF().run(Chunks); -} + uint32_t getHash(SectionChunk *C); + bool isEligible(SectionChunk *C); + + size_t findBoundary(size_t Begin, size_t End); + + void forEachColorRange(size_t Begin, size_t End, + std::function<void(size_t, size_t)> Fn); + + void forEachColor(std::function<void(size_t, size_t)> Fn); + + std::vector<SectionChunk *> Chunks; + int Cnt = 0; + std::atomic<uint32_t> NextId = {1}; + std::atomic<bool> Repeat = {false}; +}; -uint64_t ICF::getHash(SectionChunk *C) { +// Returns a hash value for S. +uint32_t ICF::getHash(SectionChunk *C) { return hash_combine(C->getPermissions(), hash_value(C->SectionName), C->NumRelocs, @@ -89,16 +70,44 @@ uint64_t ICF::getHash(SectionChunk *C) { C->Checksum); } -bool ICF::equalsConstant(const SectionChunk *A, const SectionChunk *B) { - if (A->AssocChildren.size() != B->AssocChildren.size() || - A->NumRelocs != B->NumRelocs) { - return false; +// Returns true if section S is subject of ICF. +bool ICF::isEligible(SectionChunk *C) { + bool Global = C->Sym && C->Sym->isExternal(); + bool Writable = C->getPermissions() & llvm::COFF::IMAGE_SCN_MEM_WRITE; + return C->isCOMDAT() && C->isLive() && Global && !Writable; +} + +// Split a range into smaller ranges by recoloring sections +void ICF::segregate(size_t Begin, size_t End, bool Constant) { + while (Begin < End) { + // Divide [Begin, End) into two. Let Mid be the start index of the + // second group. + auto Bound = std::stable_partition( + Chunks.begin() + Begin + 1, Chunks.begin() + End, [&](SectionChunk *S) { + if (Constant) + return equalsConstant(Chunks[Begin], S); + return equalsVariable(Chunks[Begin], S); + }); + size_t Mid = Bound - Chunks.begin(); + + // Split [Begin, End) into [Begin, Mid) and [Mid, End). + uint32_t Id = NextId++; + for (size_t I = Begin; I < Mid; ++I) + Chunks[I]->Color[(Cnt + 1) % 2] = Id; + + // If we created a group, we need to iterate the main loop again. + if (Mid != End) + Repeat = true; + + Begin = Mid; } +} - // Compare associative sections. - for (size_t I = 0, E = A->AssocChildren.size(); I != E; ++I) - if (A->AssocChildren[I]->GroupID != B->AssocChildren[I]->GroupID) - return false; +// Compare "non-moving" part of two sections, namely everything +// except relocation targets. +bool ICF::equalsConstant(const SectionChunk *A, const SectionChunk *B) { + if (A->NumRelocs != B->NumRelocs) + return false; // Compare relocations. auto Eq = [&](const coff_relocation &R1, const coff_relocation &R2) { @@ -106,14 +115,14 @@ bool ICF::equalsConstant(const SectionChunk *A, const SectionChunk *B) { R1.VirtualAddress != R2.VirtualAddress) { return false; } - SymbolBody *B1 = A->File->getSymbolBody(R1.SymbolTableIndex)->repl(); - SymbolBody *B2 = B->File->getSymbolBody(R2.SymbolTableIndex)->repl(); + SymbolBody *B1 = A->File->getSymbolBody(R1.SymbolTableIndex); + SymbolBody *B2 = B->File->getSymbolBody(R2.SymbolTableIndex); if (B1 == B2) return true; if (auto *D1 = dyn_cast<DefinedRegular>(B1)) if (auto *D2 = dyn_cast<DefinedRegular>(B2)) return D1->getValue() == D2->getValue() && - D1->getChunk()->GroupID == D2->getChunk()->GroupID; + D1->getChunk()->Color[Cnt % 2] == D2->getChunk()->Color[Cnt % 2]; return false; }; if (!std::equal(A->Relocs.begin(), A->Relocs.end(), B->Relocs.begin(), Eq)) @@ -128,54 +137,57 @@ bool ICF::equalsConstant(const SectionChunk *A, const SectionChunk *B) { A->getContents() == B->getContents(); } +// Compare "moving" part of two sections, namely relocation targets. bool ICF::equalsVariable(const SectionChunk *A, const SectionChunk *B) { - // Compare associative sections. - for (size_t I = 0, E = A->AssocChildren.size(); I != E; ++I) - if (A->AssocChildren[I]->GroupID != B->AssocChildren[I]->GroupID) - return false; - // Compare relocations. auto Eq = [&](const coff_relocation &R1, const coff_relocation &R2) { - SymbolBody *B1 = A->File->getSymbolBody(R1.SymbolTableIndex)->repl(); - SymbolBody *B2 = B->File->getSymbolBody(R2.SymbolTableIndex)->repl(); + SymbolBody *B1 = A->File->getSymbolBody(R1.SymbolTableIndex); + SymbolBody *B2 = B->File->getSymbolBody(R2.SymbolTableIndex); if (B1 == B2) return true; if (auto *D1 = dyn_cast<DefinedRegular>(B1)) if (auto *D2 = dyn_cast<DefinedRegular>(B2)) - return D1->getChunk()->GroupID == D2->getChunk()->GroupID; + return D1->getChunk()->Color[Cnt % 2] == D2->getChunk()->Color[Cnt % 2]; return false; }; return std::equal(A->Relocs.begin(), A->Relocs.end(), B->Relocs.begin(), Eq); } -bool ICF::segregate(ChunkIterator Begin, ChunkIterator End, Comparator Eq) { - bool R = false; - for (auto It = Begin;;) { - SectionChunk *Head = *It; - auto Bound = std::partition(It + 1, End, [&](SectionChunk *SC) { - return Eq(Head, SC); - }); - if (Bound == End) - return R; - uint64_t ID = NextID++; - std::for_each(It, Bound, [&](SectionChunk *SC) { SC->GroupID = ID; }); - It = Bound; - R = true; +size_t ICF::findBoundary(size_t Begin, size_t End) { + for (size_t I = Begin + 1; I < End; ++I) + if (Chunks[Begin]->Color[Cnt % 2] != Chunks[I]->Color[Cnt % 2]) + return I; + return End; +} + +void ICF::forEachColorRange(size_t Begin, size_t End, + std::function<void(size_t, size_t)> Fn) { + if (Begin > 0) + Begin = findBoundary(Begin - 1, End); + + while (Begin < End) { + size_t Mid = findBoundary(Begin, Chunks.size()); + Fn(Begin, Mid); + Begin = Mid; } } -bool ICF::forEachGroup(std::vector<SectionChunk *> &Chunks, Comparator Eq) { - bool R = false; - for (auto It = Chunks.begin(), End = Chunks.end(); It != End;) { - SectionChunk *Head = *It; - auto Bound = std::find_if(It + 1, End, [&](SectionChunk *SC) { - return SC->GroupID != Head->GroupID; - }); - if (segregate(It, Bound, Eq)) - R = true; - It = Bound; +// Call Fn on each color group. +void ICF::forEachColor(std::function<void(size_t, size_t)> Fn) { + // If the number of sections are too small to use threading, + // call Fn sequentially. + if (Chunks.size() < 1024) { + forEachColorRange(0, Chunks.size(), Fn); + return; } - return R; + + // Split sections into 256 shards and call Fn in parallel. + size_t NumShards = 256; + size_t Step = Chunks.size() / NumShards; + parallel_for(size_t(0), NumShards, [&](size_t I) { + forEachColorRange(I * Step, (I + 1) * Step, Fn); + }); + forEachColorRange(Step * NumShards, Chunks.size(), Fn); } // Merge identical COMDAT sections. @@ -183,62 +195,62 @@ bool ICF::forEachGroup(std::vector<SectionChunk *> &Chunks, Comparator Eq) { // contents and relocations are all the same. void ICF::run(const std::vector<Chunk *> &Vec) { // Collect only mergeable sections and group by hash value. - parallel_for_each(Vec.begin(), Vec.end(), [&](Chunk *C) { - if (auto *SC = dyn_cast<SectionChunk>(C)) { - bool Global = SC->Sym && SC->Sym->isExternal(); - bool Writable = SC->getPermissions() & llvm::COFF::IMAGE_SCN_MEM_WRITE; - if (SC->isCOMDAT() && SC->isLive() && Global && !Writable) - SC->GroupID = getHash(SC) | (uint64_t(1) << 63); - } - }); - std::vector<SectionChunk *> Chunks; for (Chunk *C : Vec) { - if (auto *SC = dyn_cast<SectionChunk>(C)) { - if (SC->GroupID) { - Chunks.push_back(SC); - } else { - SC->GroupID = NextID++; - } + auto *SC = dyn_cast<SectionChunk>(C); + if (!SC) + continue; + + if (isEligible(SC)) { + // Set MSB to 1 to avoid collisions with non-hash colors. + SC->Color[0] = getHash(SC) | (1 << 31); + Chunks.push_back(SC); + } else { + SC->Color[0] = NextId++; } } + if (Chunks.empty()) + return; + // From now on, sections in Chunks are ordered so that sections in // the same group are consecutive in the vector. - std::sort(Chunks.begin(), Chunks.end(), - [](SectionChunk *A, SectionChunk *B) { - return A->GroupID < B->GroupID; - }); - - // Split groups until we get a convergence. - int Cnt = 1; - forEachGroup(Chunks, equalsConstant); - - for (;;) { - if (!forEachGroup(Chunks, equalsVariable)) - break; + std::stable_sort(Chunks.begin(), Chunks.end(), + [](SectionChunk *A, SectionChunk *B) { + return A->Color[0] < B->Color[0]; + }); + + // Compare static contents and assign unique IDs for each static content. + forEachColor([&](size_t Begin, size_t End) { segregate(Begin, End, true); }); + ++Cnt; + + // Split groups by comparing relocations until convergence is obtained. + do { + Repeat = false; + forEachColor( + [&](size_t Begin, size_t End) { segregate(Begin, End, false); }); ++Cnt; - } + } while (Repeat); + if (Config->Verbose) - llvm::outs() << "\nICF needed " << Cnt << " iterations.\n"; - - // Merge sections in the same group. - for (auto It = Chunks.begin(), End = Chunks.end(); It != End;) { - SectionChunk *Head = *It++; - auto Bound = std::find_if(It, End, [&](SectionChunk *SC) { - return Head->GroupID != SC->GroupID; - }); - if (It == Bound) - continue; + outs() << "\nICF needed " << Cnt << " iterations\n"; + + // Merge sections in the same colors. + forEachColor([&](size_t Begin, size_t End) { + if (End - Begin == 1) + return; + if (Config->Verbose) - llvm::outs() << "Selected " << Head->getDebugName() << "\n"; - while (It != Bound) { - SectionChunk *SC = *It++; + outs() << "Selected " << Chunks[Begin]->getDebugName() << "\n"; + for (size_t I = Begin + 1; I < End; ++I) { if (Config->Verbose) - llvm::outs() << " Removed " << SC->getDebugName() << "\n"; - Head->replace(SC); + outs() << " Removed " << Chunks[I]->getDebugName() << "\n"; + Chunks[Begin]->replace(Chunks[I]); } - } + }); } +// Entry point to ICF. +void doICF(const std::vector<Chunk *> &Chunks) { ICF().run(Chunks); } + } // namespace coff } // namespace lld diff --git a/COFF/InputFiles.cpp b/COFF/InputFiles.cpp index ff26826371fa..0a97c2185f89 100644 --- a/COFF/InputFiles.cpp +++ b/COFF/InputFiles.cpp @@ -7,11 +7,15 @@ // //===----------------------------------------------------------------------===// +#include "InputFiles.h" #include "Chunks.h" #include "Config.h" +#include "Driver.h" #include "Error.h" -#include "InputFiles.h" +#include "Memory.h" +#include "SymbolTable.h" #include "Symbols.h" +#include "llvm-c/lto.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Triple.h" #include "llvm/ADT/Twine.h" @@ -26,88 +30,58 @@ #include "llvm/Support/ErrorOr.h" #include "llvm/Support/FileSystem.h" #include "llvm/Target/TargetOptions.h" -#include "llvm-c/lto.h" #include <cstring> #include <system_error> #include <utility> +using namespace llvm; using namespace llvm::COFF; using namespace llvm::object; using namespace llvm::support::endian; using llvm::Triple; using llvm::support::ulittle32_t; +using llvm::sys::fs::file_magic; +using llvm::sys::fs::identify_magic; namespace lld { namespace coff { -int InputFile::NextIndex = 0; -llvm::LLVMContext BitcodeFile::Context; - -// Returns the last element of a path, which is supposed to be a filename. -static StringRef getBasename(StringRef Path) { - size_t Pos = Path.find_last_of("\\/"); - if (Pos == StringRef::npos) - return Path; - return Path.substr(Pos + 1); -} +LLVMContext BitcodeFile::Context; -// Returns a string in the format of "foo.obj" or "foo.obj(bar.lib)". -std::string InputFile::getShortName() { - if (ParentName == "") - return getName().lower(); - std::string Res = (getBasename(ParentName) + "(" + - getBasename(getName()) + ")").str(); - return StringRef(Res).lower(); -} +ArchiveFile::ArchiveFile(MemoryBufferRef M) : InputFile(ArchiveKind, M) {} void ArchiveFile::parse() { // Parse a MemoryBufferRef as an archive file. - File = check(Archive::create(MB), "failed to parse static library"); - - // Allocate a buffer for Lazy objects. - size_t NumSyms = File->getNumberOfSymbols(); - LazySymbols.reserve(NumSyms); + File = check(Archive::create(MB), toString(this)); // Read the symbol table to construct Lazy objects. for (const Archive::Symbol &Sym : File->symbols()) - LazySymbols.emplace_back(this, Sym); - - // Seen is a map from member files to boolean values. Initially - // all members are mapped to false, which indicates all these files - // are not read yet. - Error Err; - for (auto &Child : File->children(Err)) - Seen[Child.getChildOffset()].clear(); - if (Err) - fatal(Err, "failed to parse static library"); + Symtab->addLazy(this, Sym); } // Returns a buffer pointing to a member file containing a given symbol. -// This function is thread-safe. -MemoryBufferRef ArchiveFile::getMember(const Archive::Symbol *Sym) { +void ArchiveFile::addMember(const Archive::Symbol *Sym) { const Archive::Child &C = check(Sym->getMember(), "could not get the member for symbol " + Sym->getName()); // Return an empty buffer if we have already returned the same buffer. - if (Seen[C.getChildOffset()].test_and_set()) - return MemoryBufferRef(); - return check(C.getMemoryBufferRef(), - "could not get the buffer for the member defining symbol " + - Sym->getName()); + if (!Seen.insert(C.getChildOffset()).second) + return; + + Driver->enqueueArchiveMember(C, Sym->getName(), getName()); } void ObjectFile::parse() { // Parse a memory buffer as a COFF file. - std::unique_ptr<Binary> Bin = - check(createBinary(MB), "failed to parse object file"); + std::unique_ptr<Binary> Bin = check(createBinary(MB), toString(this)); if (auto *Obj = dyn_cast<COFFObjectFile>(Bin.get())) { Bin.release(); COFFObj.reset(Obj); } else { - fatal(getName() + " is not a COFF file"); + fatal(toString(this) + " is not a COFF file"); } // Read section and symbol tables. @@ -137,13 +111,28 @@ void ObjectFile::initializeChunks() { Directives = std::string((const char *)Data.data(), Data.size()); continue; } - // Skip non-DWARF debug info. MSVC linker converts the sections into - // a PDB file, but we don't support that. - if (Name == ".debug" || Name.startswith(".debug$")) - continue; - // We want to preserve DWARF debug sections only when /debug is on. + + // Object files may have DWARF debug info or MS CodeView debug info + // (or both). + // + // DWARF sections don't need any special handling from the perspective + // of the linker; they are just a data section containing relocations. + // We can just link them to complete debug info. + // + // CodeView needs a linker support. We need to interpret and debug + // info, and then write it to a separate .pdb file. + + // Ignore debug info unless /debug is given. if (!Config->Debug && Name.startswith(".debug")) continue; + + // CodeView sections are stored to a different vector because they are + // not linked in the regular manner. + if (Name == ".debug" || Name.startswith(".debug$")) { + DebugChunks.push_back(new (Alloc) SectionChunk(this, Sec)); + continue; + } + if (Sec->Characteristics & llvm::COFF::IMAGE_SCN_LNK_REMOVE) continue; auto *C = new (Alloc) SectionChunk(this, Sec); @@ -156,12 +145,14 @@ void ObjectFile::initializeSymbols() { uint32_t NumSymbols = COFFObj->getNumberOfSymbols(); SymbolBodies.reserve(NumSymbols); SparseSymbolBodies.resize(NumSymbols); - llvm::SmallVector<std::pair<Undefined *, uint32_t>, 8> WeakAliases; + SmallVector<std::pair<SymbolBody *, uint32_t>, 8> WeakAliases; int32_t LastSectionNumber = 0; for (uint32_t I = 0; I < NumSymbols; ++I) { // Get a COFFSymbolRef object. - COFFSymbolRef Sym = - check(COFFObj->getSymbol(I), "broken object file: " + getName()); + ErrorOr<COFFSymbolRef> SymOrErr = COFFObj->getSymbol(I); + if (!SymOrErr) + fatal(SymOrErr.getError(), "broken object file: " + toString(this)); + COFFSymbolRef Sym = *SymOrErr; const void *AuxP = nullptr; if (Sym.getNumberOfAuxSymbols()) @@ -175,7 +166,7 @@ void ObjectFile::initializeSymbols() { Body = createUndefined(Sym); uint32_t TagIndex = static_cast<const coff_aux_weak_external *>(AuxP)->TagIndex; - WeakAliases.emplace_back((Undefined *)Body, TagIndex); + WeakAliases.emplace_back(Body, TagIndex); } else { Body = createDefined(Sym, AuxP, IsFirst); } @@ -186,23 +177,30 @@ void ObjectFile::initializeSymbols() { I += Sym.getNumberOfAuxSymbols(); LastSectionNumber = Sym.getSectionNumber(); } - for (auto WeakAlias : WeakAliases) - WeakAlias.first->WeakAlias = SparseSymbolBodies[WeakAlias.second]; + for (auto WeakAlias : WeakAliases) { + auto *U = dyn_cast<Undefined>(WeakAlias.first); + if (!U) + continue; + // Report an error if two undefined symbols have different weak aliases. + if (U->WeakAlias && U->WeakAlias != SparseSymbolBodies[WeakAlias.second]) + Symtab->reportDuplicate(U->symbol(), this); + U->WeakAlias = SparseSymbolBodies[WeakAlias.second]; + } } -Undefined *ObjectFile::createUndefined(COFFSymbolRef Sym) { +SymbolBody *ObjectFile::createUndefined(COFFSymbolRef Sym) { StringRef Name; COFFObj->getSymbolName(Sym, Name); - return new (Alloc) Undefined(Name); + return Symtab->addUndefined(Name, this, Sym.isWeakExternal())->body(); } -Defined *ObjectFile::createDefined(COFFSymbolRef Sym, const void *AuxP, - bool IsFirst) { +SymbolBody *ObjectFile::createDefined(COFFSymbolRef Sym, const void *AuxP, + bool IsFirst) { StringRef Name; if (Sym.isCommon()) { auto *C = new (Alloc) CommonChunk(Sym); Chunks.push_back(C); - return new (Alloc) DefinedCommon(this, Sym, C); + return Symtab->addCommon(this, Sym, C)->body(); } if (Sym.isAbsolute()) { COFFObj->getSymbolName(Sym, Name); @@ -215,7 +213,10 @@ Defined *ObjectFile::createDefined(COFFSymbolRef Sym, const void *AuxP, SEHCompat = true; return nullptr; } - return new (Alloc) DefinedAbsolute(Name, Sym); + if (Sym.isExternal()) + return Symtab->addAbsolute(Name, Sym)->body(); + else + return new (Alloc) DefinedAbsolute(Name, Sym); } int32_t SectionNumber = Sym.getSectionNumber(); if (SectionNumber == llvm::COFF::IMAGE_SYM_DEBUG) @@ -223,12 +224,12 @@ Defined *ObjectFile::createDefined(COFFSymbolRef Sym, const void *AuxP, // Reserved sections numbers don't have contents. if (llvm::COFF::isReservedSectionNumber(SectionNumber)) - fatal("broken object file: " + getName()); + fatal("broken object file: " + toString(this)); // This symbol references a section which is not present in the section // header. if ((uint32_t)SectionNumber >= SparseChunks.size()) - fatal("broken object file: " + getName()); + fatal("broken object file: " + toString(this)); // Nothing else to do without a section chunk. auto *SC = cast_or_null<SectionChunk>(SparseChunks[SectionNumber]); @@ -245,7 +246,11 @@ Defined *ObjectFile::createDefined(COFFSymbolRef Sym, const void *AuxP, SC->Checksum = Aux->CheckSum; } - auto *B = new (Alloc) DefinedRegular(this, Sym, SC); + DefinedRegular *B; + if (Sym.isExternal()) + B = cast<DefinedRegular>(Symtab->addRegular(this, Sym, SC)->body()); + else + B = new (Alloc) DefinedRegular(this, Sym, SC); if (SC->isCOMDAT() && Sym.getValue() == 0 && !AuxP) SC->setSymbol(B); @@ -307,28 +312,29 @@ void ImportFile::parse() { ExtName = ExtName.substr(0, ExtName.find('@')); break; } - ImpSym = new (Alloc) DefinedImportData(DLLName, ImpName, ExtName, Hdr); - SymbolBodies.push_back(ImpSym); + + this->Hdr = Hdr; + ExternalName = ExtName; + + ImpSym = cast<DefinedImportData>( + Symtab->addImportData(ImpName, this)->body()); // If type is function, we need to create a thunk which jump to an // address pointed by the __imp_ symbol. (This allows you to call // DLL functions just like regular non-DLL functions.) if (Hdr->getType() != llvm::COFF::IMPORT_CODE) return; - ThunkSym = new (Alloc) DefinedImportThunk(Name, ImpSym, Hdr->Machine); - SymbolBodies.push_back(ThunkSym); + ThunkSym = cast<DefinedImportThunk>( + Symtab->addImportThunk(Name, ImpSym, Hdr->Machine)->body()); } void BitcodeFile::parse() { - // Usually parse() is thread-safe, but bitcode file is an exception. - std::lock_guard<std::mutex> Lock(Mu); - Context.enableDebugTypeODRUniquing(); ErrorOr<std::unique_ptr<LTOModule>> ModOrErr = LTOModule::createFromBuffer( Context, MB.getBufferStart(), MB.getBufferSize(), llvm::TargetOptions()); M = check(std::move(ModOrErr), "could not create LTO module"); - llvm::StringSaver Saver(Alloc); + StringSaver Saver(Alloc); for (unsigned I = 0, E = M->getSymbolCount(); I != E; ++I) { lto_symbol_attributes Attrs = M->getSymbolAttributes(I); if ((Attrs & LTO_SYMBOL_SCOPE_MASK) == LTO_SYMBOL_SCOPE_INTERNAL) @@ -337,15 +343,15 @@ void BitcodeFile::parse() { StringRef SymName = Saver.save(M->getSymbolName(I)); int SymbolDef = Attrs & LTO_SYMBOL_DEFINITION_MASK; if (SymbolDef == LTO_SYMBOL_DEFINITION_UNDEFINED) { - SymbolBodies.push_back(new (Alloc) Undefined(SymName)); + SymbolBodies.push_back(Symtab->addUndefined(SymName, this, false)->body()); } else { bool Replaceable = (SymbolDef == LTO_SYMBOL_DEFINITION_TENTATIVE || // common (Attrs & LTO_SYMBOL_COMDAT) || // comdat (SymbolDef == LTO_SYMBOL_DEFINITION_WEAK && // weak external (Attrs & LTO_SYMBOL_ALIAS))); - SymbolBodies.push_back(new (Alloc) DefinedBitcode(this, SymName, - Replaceable)); + SymbolBodies.push_back( + Symtab->addBitcode(this, SymName, Replaceable)->body()); } } @@ -367,7 +373,26 @@ MachineTypes BitcodeFile::getMachineType() { } } -std::mutex BitcodeFile::Mu; +// Returns the last element of a path, which is supposed to be a filename. +static StringRef getBasename(StringRef Path) { + size_t Pos = Path.find_last_of("\\/"); + if (Pos == StringRef::npos) + return Path; + return Path.substr(Pos + 1); +} + +// Returns a string in the format of "foo.obj" or "foo.obj(bar.lib)". +std::string toString(InputFile *File) { + if (!File) + return "(internal)"; + if (File->ParentName.empty()) + return File->getName().lower(); + + std::string Res = + (getBasename(File->ParentName) + "(" + getBasename(File->getName()) + ")") + .str(); + return StringRef(Res).lower(); +} } // namespace coff } // namespace lld diff --git a/COFF/InputFiles.h b/COFF/InputFiles.h index 0ec01b5075f9..498a1743e985 100644 --- a/COFF/InputFiles.h +++ b/COFF/InputFiles.h @@ -12,13 +12,13 @@ #include "lld/Core/LLVM.h" #include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseSet.h" #include "llvm/IR/LLVMContext.h" #include "llvm/LTO/legacy/LTOModule.h" #include "llvm/Object/Archive.h" #include "llvm/Object/COFF.h" #include "llvm/Support/StringSaver.h" #include <memory> -#include <mutex> #include <set> #include <vector> @@ -31,6 +31,7 @@ using llvm::COFF::MachineTypes; using llvm::object::Archive; using llvm::object::COFFObjectFile; using llvm::object::COFFSymbolRef; +using llvm::object::coff_import_header; using llvm::object::coff_section; class Chunk; @@ -38,6 +39,8 @@ class Defined; class DefinedImportData; class DefinedImportThunk; class Lazy; +class SectionChunk; +struct Symbol; class SymbolBody; class Undefined; @@ -51,67 +54,44 @@ public: // Returns the filename. StringRef getName() { return MB.getBufferIdentifier(); } - // Returns symbols defined by this file. - virtual std::vector<SymbolBody *> &getSymbols() = 0; - // Reads a file (the constructor doesn't do that). virtual void parse() = 0; // Returns the CPU type this file was compiled to. virtual MachineTypes getMachineType() { return IMAGE_FILE_MACHINE_UNKNOWN; } - // Returns a short, human-friendly filename. If this is a member of - // an archive file, a returned value includes parent's filename. - // Used for logging or debugging. - std::string getShortName(); - - // Sets a parent filename if this file is created from an archive. - void setParentName(StringRef N) { ParentName = N; } + // An archive file name if this file is created from an archive. + StringRef ParentName; // Returns .drectve section contents if exist. StringRef getDirectives() { return StringRef(Directives).trim(); } - // Each file has a unique index. The index number is used to - // resolve ties in symbol resolution. - int Index; - static int NextIndex; - protected: - InputFile(Kind K, MemoryBufferRef M) - : Index(NextIndex++), MB(M), FileKind(K) {} + InputFile(Kind K, MemoryBufferRef M) : MB(M), FileKind(K) {} MemoryBufferRef MB; std::string Directives; private: const Kind FileKind; - StringRef ParentName; }; // .lib or .a file. class ArchiveFile : public InputFile { public: - explicit ArchiveFile(MemoryBufferRef M) : InputFile(ArchiveKind, M) {} + explicit ArchiveFile(MemoryBufferRef M); static bool classof(const InputFile *F) { return F->kind() == ArchiveKind; } void parse() override; - // Returns a memory buffer for a given symbol. An empty memory buffer - // is returned if we have already returned the same memory buffer. - // (So that we don't instantiate same members more than once.) - MemoryBufferRef getMember(const Archive::Symbol *Sym); - - llvm::MutableArrayRef<Lazy> getLazySymbols() { return LazySymbols; } - - // All symbols returned by ArchiveFiles are of Lazy type. - std::vector<SymbolBody *> &getSymbols() override { - llvm_unreachable("internal fatal"); - } + // Enqueues an archive member load for the given symbol. If we've already + // enqueued a load for the same archive member, this function does nothing, + // which ensures that we don't load the same member more than once. + void addMember(const Archive::Symbol *Sym); private: std::unique_ptr<Archive> File; std::string Filename; - std::vector<Lazy> LazySymbols; - std::map<uint64_t, std::atomic_flag> Seen; + llvm::DenseSet<uint64_t> Seen; }; // .obj or .o file. This may be a member of an archive file. @@ -122,7 +102,8 @@ public: void parse() override; MachineTypes getMachineType() override; std::vector<Chunk *> &getChunks() { return Chunks; } - std::vector<SymbolBody *> &getSymbols() override { return SymbolBodies; } + std::vector<SectionChunk *> &getDebugChunks() { return DebugChunks; } + std::vector<SymbolBody *> &getSymbols() { return SymbolBodies; } // Returns a SymbolBody object for the SymbolIndex'th symbol in the // underlying object file. @@ -146,8 +127,8 @@ private: void initializeSymbols(); void initializeSEH(); - Defined *createDefined(COFFSymbolRef Sym, const void *Aux, bool IsFirst); - Undefined *createUndefined(COFFSymbolRef Sym); + SymbolBody *createDefined(COFFSymbolRef Sym, const void *Aux, bool IsFirst); + SymbolBody *createUndefined(COFFSymbolRef Sym); std::unique_ptr<COFFObjectFile> COFFObj; llvm::BumpPtrAllocator Alloc; @@ -157,6 +138,9 @@ private: // chunks and non-section chunks for common symbols. std::vector<Chunk *> Chunks; + // CodeView debug info sections. + std::vector<SectionChunk *> DebugChunks; + // This vector contains the same chunks as Chunks, but they are // indexed such that you can get a SectionChunk by section index. // Nonexistent section indices are filled with null pointers. @@ -182,7 +166,6 @@ public: explicit ImportFile(MemoryBufferRef M) : InputFile(ImportKind, M), StringAlloc(StringAllocAux) {} static bool classof(const InputFile *F) { return F->kind() == ImportKind; } - std::vector<SymbolBody *> &getSymbols() override { return SymbolBodies; } DefinedImportData *ImpSym = nullptr; DefinedImportThunk *ThunkSym = nullptr; @@ -191,10 +174,14 @@ public: private: void parse() override; - std::vector<SymbolBody *> SymbolBodies; llvm::BumpPtrAllocator Alloc; llvm::BumpPtrAllocator StringAllocAux; llvm::StringSaver StringAlloc; + +public: + StringRef ExternalName; + const coff_import_header *Hdr; + Chunk *Location = nullptr; }; // Used for LTO. @@ -202,7 +189,7 @@ class BitcodeFile : public InputFile { public: explicit BitcodeFile(MemoryBufferRef M) : InputFile(BitcodeKind, M) {} static bool classof(const InputFile *F) { return F->kind() == BitcodeKind; } - std::vector<SymbolBody *> &getSymbols() override { return SymbolBodies; } + std::vector<SymbolBody *> &getSymbols() { return SymbolBodies; } MachineTypes getMachineType() override; std::unique_ptr<LTOModule> takeModule() { return std::move(M); } @@ -214,9 +201,10 @@ private: std::vector<SymbolBody *> SymbolBodies; llvm::BumpPtrAllocator Alloc; std::unique_ptr<LTOModule> M; - static std::mutex Mu; }; +std::string toString(InputFile *File); + } // namespace coff } // namespace lld diff --git a/COFF/Librarian.cpp b/COFF/Librarian.cpp index 25fb4a87b3eb..4c597fad7345 100644 --- a/COFF/Librarian.cpp +++ b/COFF/Librarian.cpp @@ -54,7 +54,7 @@ static uint16_t getImgRelRelocation() { } } -template <class T> void append(std::vector<uint8_t> &B, const T &Data) { +template <class T> static void append(std::vector<uint8_t> &B, const T &Data) { size_t S = B.size(); B.resize(S + sizeof(T)); memcpy(&B[S], &Data, sizeof(T)); @@ -352,15 +352,16 @@ ObjectFactory::createNullImportDescriptor(std::vector<uint8_t> &Buffer) { NewArchiveMember ObjectFactory::createNullThunk(std::vector<uint8_t> &Buffer) { static const uint32_t NumberOfSections = 2; static const uint32_t NumberOfSymbols = 1; + uint32_t VASize = is32bit() ? 4 : 8; // COFF Header coff_file_header Header{ u16(Config->Machine), u16(NumberOfSections), u32(0), u32(sizeof(Header) + (NumberOfSections * sizeof(coff_section)) + // .idata$5 - sizeof(export_address_table_entry) + + VASize + // .idata$4 - sizeof(export_address_table_entry)), + VASize), u32(NumberOfSymbols), u16(0), u16(is32bit() ? IMAGE_FILE_32BIT_MACHINE : 0), }; @@ -371,36 +372,40 @@ NewArchiveMember ObjectFactory::createNullThunk(std::vector<uint8_t> &Buffer) { {{'.', 'i', 'd', 'a', 't', 'a', '$', '5'}, u32(0), u32(0), - u32(sizeof(export_address_table_entry)), + u32(VASize), u32(sizeof(coff_file_header) + NumberOfSections * sizeof(coff_section)), u32(0), u32(0), u16(0), u16(0), - u32(IMAGE_SCN_ALIGN_4BYTES | IMAGE_SCN_CNT_INITIALIZED_DATA | - IMAGE_SCN_MEM_READ | IMAGE_SCN_MEM_WRITE)}, + u32((is32bit() ? IMAGE_SCN_ALIGN_4BYTES : IMAGE_SCN_ALIGN_8BYTES) | + IMAGE_SCN_CNT_INITIALIZED_DATA | IMAGE_SCN_MEM_READ | + IMAGE_SCN_MEM_WRITE)}, {{'.', 'i', 'd', 'a', 't', 'a', '$', '4'}, u32(0), u32(0), - u32(sizeof(export_address_table_entry)), + u32(VASize), u32(sizeof(coff_file_header) + NumberOfSections * sizeof(coff_section) + - sizeof(export_address_table_entry)), + VASize), u32(0), u32(0), u16(0), u16(0), - u32(IMAGE_SCN_ALIGN_4BYTES | IMAGE_SCN_CNT_INITIALIZED_DATA | - IMAGE_SCN_MEM_READ | IMAGE_SCN_MEM_WRITE)}, + u32((is32bit() ? IMAGE_SCN_ALIGN_4BYTES : IMAGE_SCN_ALIGN_8BYTES) | + IMAGE_SCN_CNT_INITIALIZED_DATA | IMAGE_SCN_MEM_READ | + IMAGE_SCN_MEM_WRITE)}, }; append(Buffer, SectionTable); - // .idata$5 - static const export_address_table_entry ILT{u32(0)}; - append(Buffer, ILT); + // .idata$5, ILT + append(Buffer, u32(0)); + if (!is32bit()) + append(Buffer, u32(0)); - // .idata$4 - static const export_address_table_entry IAT{u32(0)}; - append(Buffer, IAT); + // .idata$4, IAT + append(Buffer, u32(0)); + if (!is32bit()) + append(Buffer, u32(0)); // Symbol Table coff_symbol16 SymbolTable[NumberOfSymbols] = { @@ -458,7 +463,7 @@ void lld::coff::writeImportLibrary() { std::vector<NewArchiveMember> Members; std::string Path = getImplibPath(); - std::string DLLName = llvm::sys::path::filename(Config->OutputFile); + std::string DLLName = sys::path::filename(Config->OutputFile); ObjectFactory OF(DLLName); std::vector<uint8_t> ImportDescriptor; diff --git a/COFF/MarkLive.cpp b/COFF/MarkLive.cpp index 0870986ad81a..0156d238b672 100644 --- a/COFF/MarkLive.cpp +++ b/COFF/MarkLive.cpp @@ -38,8 +38,8 @@ void markLive(const std::vector<Chunk *> &Chunks) { }; // Add GC root chunks. - for (Undefined *U : Config->GCRoot) - if (auto *D = dyn_cast<DefinedRegular>(U->repl())) + for (SymbolBody *B : Config->GCRoot) + if (auto *D = dyn_cast<DefinedRegular>(B)) Enqueue(D->getChunk()); while (!Worklist.empty()) { @@ -48,7 +48,7 @@ void markLive(const std::vector<Chunk *> &Chunks) { // Mark all symbols listed in the relocation table for this section. for (SymbolBody *S : SC->symbols()) - if (auto *D = dyn_cast<DefinedRegular>(S->repl())) + if (auto *D = dyn_cast<DefinedRegular>(S)) Enqueue(D->getChunk()); // Mark associative sections if any. diff --git a/COFF/Memory.h b/COFF/Memory.h new file mode 100644 index 000000000000..526f11344a09 --- /dev/null +++ b/COFF/Memory.h @@ -0,0 +1,52 @@ +//===- Memory.h -------------------------------------------------*- C++ -*-===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// See ELF/Memory.h +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_COFF_MEMORY_H +#define LLD_COFF_MEMORY_H + +#include "llvm/Support/Allocator.h" +#include "llvm/Support/StringSaver.h" +#include <vector> + +namespace lld { +namespace coff { + +extern llvm::BumpPtrAllocator BAlloc; +extern llvm::StringSaver Saver; + +struct SpecificAllocBase { + SpecificAllocBase() { Instances.push_back(this); } + virtual ~SpecificAllocBase() = default; + virtual void reset() = 0; + static std::vector<SpecificAllocBase *> Instances; +}; + +template <class T> struct SpecificAlloc : public SpecificAllocBase { + void reset() override { Alloc.DestroyAll(); } + llvm::SpecificBumpPtrAllocator<T> Alloc; +}; + +template <typename T, typename... U> T *make(U &&... Args) { + static SpecificAlloc<T> Alloc; + return new (Alloc.Alloc.Allocate()) T(std::forward<U>(Args)...); +} + +inline void freeArena() { + for (SpecificAllocBase *Alloc : SpecificAllocBase::Instances) + Alloc->reset(); + BAlloc.Reset(); +} +} +} + +#endif diff --git a/COFF/ModuleDef.cpp b/COFF/ModuleDef.cpp index 5e393f45d184..a273b6f535db 100644 --- a/COFF/ModuleDef.cpp +++ b/COFF/ModuleDef.cpp @@ -18,6 +18,7 @@ #include "Config.h" #include "Error.h" +#include "Memory.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/Support/StringSaver.h" @@ -113,7 +114,7 @@ private: class Parser { public: - explicit Parser(StringRef S, StringSaver *A) : Lex(S), Alloc(A) {} + explicit Parser(StringRef S) : Lex(S) {} void parse() { do { @@ -197,9 +198,9 @@ private: if (Config->Machine == I386) { if (!isDecorated(E.Name)) - E.Name = Alloc->save("_" + E.Name); + E.Name = Saver.save("_" + E.Name); if (!E.ExtName.empty() && !isDecorated(E.ExtName)) - E.ExtName = Alloc->save("_" + E.ExtName); + E.ExtName = Saver.save("_" + E.ExtName); } for (;;) { @@ -278,14 +279,11 @@ private: Lexer Lex; Token Tok; std::vector<Token> Stack; - StringSaver *Alloc; }; } // anonymous namespace -void parseModuleDefs(MemoryBufferRef MB, StringSaver *Alloc) { - Parser(MB.getBuffer(), Alloc).parse(); -} +void parseModuleDefs(MemoryBufferRef MB) { Parser(MB.getBuffer()).parse(); } } // namespace coff } // namespace lld diff --git a/COFF/Options.td b/COFF/Options.td index e5c9c5b4635b..9dfbcc8e188c 100644 --- a/COFF/Options.td +++ b/COFF/Options.td @@ -27,6 +27,7 @@ def failifmismatch : P<"failifmismatch", "">; def heap : P<"heap", "Size of the heap">; def implib : P<"implib", "Import library name">; def libpath : P<"libpath", "Additional library search path">; +def linkrepro : P<"linkrepro", "Dump linker invocation and input files for debugging">; def machine : P<"machine", "Specify target platform">; def merge : P<"merge", "Combine sections">; def mllvm : P<"mllvm", "Options to pass to LLVM">; @@ -61,7 +62,9 @@ def deffile : Joined<["/", "-"], "def:">, HelpText<"Use module-definition file">; def debug : F<"debug">, HelpText<"Embed a symbol table in the image">; +def debugtype : P<"debugtype", "Debug Info Options">; def dll : F<"dll">, HelpText<"Create a DLL">; +def driver : P<"driver", "Generate a Windows NT Kernel Mode Driver">; def nodefaultlib_all : F<"nodefaultlib">; def noentry : F<"noentry">; def profile : F<"profile">; @@ -91,7 +94,10 @@ def help_q : Flag<["/?", "-?"], "">, Alias<help>; def nosymtab : F<"nosymtab">; // Flags for debugging -def lldmap : Joined<["/", "-"], "lldmap:">; +def debugpdb : F<"debugpdb">; +def dumppdb : Joined<["/", "-"], "dumppdb">; +def lldmap : F<"lldmap">; +def lldmap_file : Joined<["/", "-"], "lldmap:">; //============================================================================== // The flags below do nothing. They are defined only for link.exe compatibility. diff --git a/COFF/PDB.cpp b/COFF/PDB.cpp index 7606ccc680d3..56d5a3651143 100644 --- a/COFF/PDB.cpp +++ b/COFF/PDB.cpp @@ -7,55 +7,187 @@ // //===----------------------------------------------------------------------===// -#include "Driver.h" +#include "PDB.h" +#include "Chunks.h" +#include "Config.h" #include "Error.h" +#include "SymbolTable.h" #include "Symbols.h" +#include "llvm/DebugInfo/CodeView/SymbolDumper.h" +#include "llvm/DebugInfo/CodeView/TypeDumper.h" +#include "llvm/DebugInfo/MSF/ByteStream.h" +#include "llvm/DebugInfo/MSF/MSFBuilder.h" +#include "llvm/DebugInfo/MSF/MSFCommon.h" +#include "llvm/DebugInfo/PDB/Raw/DbiStream.h" +#include "llvm/DebugInfo/PDB/Raw/DbiStreamBuilder.h" +#include "llvm/DebugInfo/PDB/Raw/InfoStream.h" +#include "llvm/DebugInfo/PDB/Raw/InfoStreamBuilder.h" +#include "llvm/DebugInfo/PDB/Raw/PDBFile.h" +#include "llvm/DebugInfo/PDB/Raw/PDBFileBuilder.h" +#include "llvm/DebugInfo/PDB/Raw/TpiStream.h" +#include "llvm/DebugInfo/PDB/Raw/TpiStreamBuilder.h" +#include "llvm/Object/COFF.h" #include "llvm/Support/Endian.h" #include "llvm/Support/FileOutputBuffer.h" +#include "llvm/Support/ScopedPrinter.h" #include <memory> +using namespace lld; +using namespace lld::coff; using namespace llvm; +using namespace llvm::codeview; using namespace llvm::support; using namespace llvm::support::endian; -const int PageSize = 4096; -const uint8_t Magic[32] = "Microsoft C/C++ MSF 7.00\r\n\032DS\0\0"; - -namespace { -struct PDBHeader { - uint8_t Magic[32]; - ulittle32_t PageSize; - ulittle32_t FpmPage; - ulittle32_t PageCount; - ulittle32_t RootSize; - ulittle32_t Reserved; - ulittle32_t RootPointer; -}; -} - -void lld::coff::createPDB(StringRef Path) { - // Create a file. - size_t FileSize = PageSize * 3; - ErrorOr<std::unique_ptr<FileOutputBuffer>> BufferOrErr = - FileOutputBuffer::create(Path, FileSize); - if (auto EC = BufferOrErr.getError()) - fatal(EC, "failed to open " + Path); - std::unique_ptr<FileOutputBuffer> Buffer = std::move(*BufferOrErr); - - // Write the file header. - uint8_t *Buf = Buffer->getBufferStart(); - auto *Hdr = reinterpret_cast<PDBHeader *>(Buf); - memcpy(Hdr->Magic, Magic, sizeof(Magic)); - Hdr->PageSize = PageSize; - // I don't know what FpmPage field means, but it must not be 0. - Hdr->FpmPage = 1; - Hdr->PageCount = FileSize / PageSize; - // Root directory is empty, containing only the length field. - Hdr->RootSize = 4; - // Root directory is on page 1. - Hdr->RootPointer = 1; - - // Write the root directory. Root stream is on page 2. - write32le(Buf + PageSize, 2); - Buffer->commit(); +using llvm::object::coff_section; + +static ExitOnError ExitOnErr; + +// Returns a list of all SectionChunks. +static std::vector<coff_section> getInputSections(SymbolTable *Symtab) { + std::vector<coff_section> V; + for (Chunk *C : Symtab->getChunks()) + if (auto *SC = dyn_cast<SectionChunk>(C)) + V.push_back(*SC->Header); + return V; +} + +static SectionChunk *findByName(std::vector<SectionChunk *> &Sections, + StringRef Name) { + for (SectionChunk *C : Sections) + if (C->getSectionName() == Name) + return C; + return nullptr; +} + +static ArrayRef<uint8_t> getDebugT(ObjectFile *File) { + SectionChunk *Sec = findByName(File->getDebugChunks(), ".debug$T"); + if (!Sec) + return {}; + + // First 4 bytes are section magic. + ArrayRef<uint8_t> Data = Sec->getContents(); + if (Data.size() < 4) + fatal(".debug$T too short"); + if (read32le(Data.data()) != COFF::DEBUG_SECTION_MAGIC) + fatal(".debug$T has an invalid magic"); + return Data.slice(4); +} + +static void dumpDebugT(ScopedPrinter &W, ObjectFile *File) { + ArrayRef<uint8_t> Data = getDebugT(File); + if (Data.empty()) + return; + + msf::ByteStream Stream(Data); + CVTypeDumper TypeDumper(&W, false); + if (auto EC = TypeDumper.dump(Data)) + fatal(EC, "CVTypeDumper::dump failed"); +} + +static void dumpDebugS(ScopedPrinter &W, ObjectFile *File) { + SectionChunk *Sec = findByName(File->getDebugChunks(), ".debug$S"); + if (!Sec) + return; + + msf::ByteStream Stream(Sec->getContents()); + CVSymbolArray Symbols; + msf::StreamReader Reader(Stream); + if (auto EC = Reader.readArray(Symbols, Reader.getLength())) + fatal(EC, "StreamReader.readArray<CVSymbolArray> failed"); + + CVTypeDumper TypeDumper(&W, false); + CVSymbolDumper SymbolDumper(W, TypeDumper, nullptr, false); + if (auto EC = SymbolDumper.dump(Symbols)) + fatal(EC, "CVSymbolDumper::dump failed"); +} + +// Dump CodeView debug info. This is for debugging. +static void dumpCodeView(SymbolTable *Symtab) { + ScopedPrinter W(outs()); + + for (ObjectFile *File : Symtab->ObjectFiles) { + dumpDebugT(W, File); + dumpDebugS(W, File); + } +} + +static void addTypeInfo(SymbolTable *Symtab, + pdb::TpiStreamBuilder &TpiBuilder) { + for (ObjectFile *File : Symtab->ObjectFiles) { + ArrayRef<uint8_t> Data = getDebugT(File); + if (Data.empty()) + continue; + + msf::ByteStream Stream(Data); + codeview::CVTypeArray Records; + msf::StreamReader Reader(Stream); + if (auto EC = Reader.readArray(Records, Reader.getLength())) + fatal(EC, "Reader.readArray failed"); + for (const codeview::CVType &Rec : Records) + TpiBuilder.addTypeRecord(Rec); + } +} + +// Creates a PDB file. +void coff::createPDB(StringRef Path, SymbolTable *Symtab, + ArrayRef<uint8_t> SectionTable) { + if (Config->DumpPdb) + dumpCodeView(Symtab); + + BumpPtrAllocator Alloc; + pdb::PDBFileBuilder Builder(Alloc); + ExitOnErr(Builder.initialize(4096)); // 4096 is blocksize + + // Create streams in MSF for predefined streams, namely + // PDB, TPI, DBI and IPI. + for (int I = 0; I < (int)pdb::kSpecialStreamCount; ++I) + ExitOnErr(Builder.getMsfBuilder().addStream(0)); + + // Add an Info stream. + auto &InfoBuilder = Builder.getInfoBuilder(); + InfoBuilder.setAge(1); + + // Should be a random number, 0 for now. + InfoBuilder.setGuid({}); + + // Should be the current time, but set 0 for reproducibilty. + InfoBuilder.setSignature(0); + InfoBuilder.setVersion(pdb::PdbRaw_ImplVer::PdbImplVC70); + + // Add an empty DPI stream. + auto &DbiBuilder = Builder.getDbiBuilder(); + DbiBuilder.setVersionHeader(pdb::PdbDbiV110); + + // Add an empty TPI stream. + auto &TpiBuilder = Builder.getTpiBuilder(); + TpiBuilder.setVersionHeader(pdb::PdbTpiV80); + if (Config->DebugPdb) + addTypeInfo(Symtab, TpiBuilder); + + // Add an empty IPI stream. + auto &IpiBuilder = Builder.getIpiBuilder(); + IpiBuilder.setVersionHeader(pdb::PdbTpiV80); + + // Add Section Contributions. + std::vector<pdb::SectionContrib> Contribs = + pdb::DbiStreamBuilder::createSectionContribs(getInputSections(Symtab)); + DbiBuilder.setSectionContribs(Contribs); + + // Add Section Map stream. + ArrayRef<object::coff_section> Sections = { + (const object::coff_section *)SectionTable.data(), + SectionTable.size() / sizeof(object::coff_section)}; + std::vector<pdb::SecMapEntry> SectionMap = + pdb::DbiStreamBuilder::createSectionMap(Sections); + DbiBuilder.setSectionMap(SectionMap); + + ExitOnErr(DbiBuilder.addModuleInfo("", "* Linker *")); + + // Add COFF section header stream. + ExitOnErr( + DbiBuilder.addDbgStream(pdb::DbgHeaderType::SectionHdr, SectionTable)); + + // Write to a file. + ExitOnErr(Builder.commit(Path)); } diff --git a/COFF/PDB.h b/COFF/PDB.h new file mode 100644 index 000000000000..091e90fa1ef1 --- /dev/null +++ b/COFF/PDB.h @@ -0,0 +1,25 @@ +//===- PDB.h ----------------------------------------------------*- C++ -*-===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_COFF_PDB_H +#define LLD_COFF_PDB_H + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/StringRef.h" + +namespace lld { +namespace coff { +class SymbolTable; + +void createPDB(llvm::StringRef Path, SymbolTable *Symtab, + llvm::ArrayRef<uint8_t> SectionTable); +} +} + +#endif diff --git a/COFF/Strings.cpp b/COFF/Strings.cpp new file mode 100644 index 000000000000..d0558413f673 --- /dev/null +++ b/COFF/Strings.cpp @@ -0,0 +1,30 @@ +//===- Strings.cpp -------------------------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "Strings.h" + +#if defined(_MSC_VER) +#include <Windows.h> +#include <DbgHelp.h> +#pragma comment(lib, "dbghelp.lib") +#endif + +using namespace lld; +using namespace lld::coff; +using namespace llvm; + +Optional<std::string> coff::demangle(StringRef S) { +#if defined(_MSC_VER) + char Buf[4096]; + if (S.startswith("?")) + if (size_t Len = UnDecorateSymbolName(S.str().c_str(), Buf, sizeof(Buf), 0)) + return std::string(Buf, Len); +#endif + return None; +} diff --git a/COFF/Strings.h b/COFF/Strings.h new file mode 100644 index 000000000000..1f85f3e2da5c --- /dev/null +++ b/COFF/Strings.h @@ -0,0 +1,23 @@ +//===- Strings.h ------------------------------------------------*- C++ -*-===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_COFF_STRINGS_H +#define LLD_COFF_STRINGS_H + +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/StringRef.h" +#include <string> + +namespace lld { +namespace coff { +llvm::Optional<std::string> demangle(llvm::StringRef S); +} +} + +#endif diff --git a/COFF/SymbolTable.cpp b/COFF/SymbolTable.cpp index df9da4c36650..9cc0b75c1510 100644 --- a/COFF/SymbolTable.cpp +++ b/COFF/SymbolTable.cpp @@ -7,12 +7,12 @@ // //===----------------------------------------------------------------------===// +#include "SymbolTable.h" #include "Config.h" #include "Driver.h" #include "Error.h" -#include "SymbolTable.h" +#include "Memory.h" #include "Symbols.h" -#include "lld/Core/Parallel.h" #include "llvm/IR/LLVMContext.h" #include "llvm/LTO/legacy/LTOCodeGenerator.h" #include "llvm/Support/Debug.h" @@ -24,222 +24,265 @@ using namespace llvm; namespace lld { namespace coff { -void SymbolTable::addFile(std::unique_ptr<InputFile> FileP) { -#if LLVM_ENABLE_THREADS - std::launch Policy = std::launch::async; -#else - std::launch Policy = std::launch::deferred; -#endif +SymbolTable *Symtab; - InputFile *File = FileP.get(); - Files.push_back(std::move(FileP)); - if (auto *F = dyn_cast<ArchiveFile>(File)) { - ArchiveQueue.push_back( - std::async(Policy, [=]() { F->parse(); return F; })); - return; +void SymbolTable::addFile(InputFile *File) { + if (Config->Verbose) + outs() << "Reading " << toString(File) << "\n"; + File->parse(); + + MachineTypes MT = File->getMachineType(); + if (Config->Machine == IMAGE_FILE_MACHINE_UNKNOWN) { + Config->Machine = MT; + } else if (MT != IMAGE_FILE_MACHINE_UNKNOWN && Config->Machine != MT) { + fatal(toString(File) + ": machine type " + machineToStr(MT) + + " conflicts with " + machineToStr(Config->Machine)); } - ObjectQueue.push_back( - std::async(Policy, [=]() { File->parse(); return File; })); + if (auto *F = dyn_cast<ObjectFile>(File)) { ObjectFiles.push_back(F); } else if (auto *F = dyn_cast<BitcodeFile>(File)) { BitcodeFiles.push_back(F); - } else { - ImportFiles.push_back(cast<ImportFile>(File)); + } else if (auto *F = dyn_cast<ImportFile>(File)) { + ImportFiles.push_back(F); } -} -void SymbolTable::step() { - if (queueEmpty()) + StringRef S = File->getDirectives(); + if (S.empty()) return; - readObjects(); - readArchives(); -} -void SymbolTable::run() { - while (!queueEmpty()) - step(); -} - -void SymbolTable::readArchives() { - if (ArchiveQueue.empty()) - return; - - // Add lazy symbols to the symbol table. Lazy symbols that conflict - // with existing undefined symbols are accumulated in LazySyms. - std::vector<Symbol *> LazySyms; - for (std::future<ArchiveFile *> &Future : ArchiveQueue) { - ArchiveFile *File = Future.get(); - if (Config->Verbose) - llvm::outs() << "Reading " << File->getShortName() << "\n"; - for (Lazy &Sym : File->getLazySymbols()) - addLazy(&Sym, &LazySyms); - } - ArchiveQueue.clear(); - - // Add archive member files to ObjectQueue that should resolve - // existing undefined symbols. - for (Symbol *Sym : LazySyms) - addMemberFile(cast<Lazy>(Sym->Body)); -} - -void SymbolTable::readObjects() { - if (ObjectQueue.empty()) - return; - - // Add defined and undefined symbols to the symbol table. - std::vector<StringRef> Directives; - for (size_t I = 0; I < ObjectQueue.size(); ++I) { - InputFile *File = ObjectQueue[I].get(); - if (Config->Verbose) - llvm::outs() << "Reading " << File->getShortName() << "\n"; - // Adding symbols may add more files to ObjectQueue - // (but not to ArchiveQueue). - for (SymbolBody *Sym : File->getSymbols()) - if (Sym->isExternal()) - addSymbol(Sym); - StringRef S = File->getDirectives(); - if (!S.empty()) { - Directives.push_back(S); - if (Config->Verbose) - llvm::outs() << "Directives: " << File->getShortName() - << ": " << S << "\n"; - } - } - ObjectQueue.clear(); - - // Parse directive sections. This may add files to - // ArchiveQueue and ObjectQueue. - for (StringRef S : Directives) - Driver->parseDirectives(S); -} - -bool SymbolTable::queueEmpty() { - return ArchiveQueue.empty() && ObjectQueue.empty(); + if (Config->Verbose) + outs() << "Directives: " << toString(File) << ": " << S << "\n"; + Driver->parseDirectives(S); } -void SymbolTable::reportRemainingUndefines(bool Resolve) { - llvm::SmallPtrSet<SymbolBody *, 8> Undefs; +void SymbolTable::reportRemainingUndefines() { + SmallPtrSet<SymbolBody *, 8> Undefs; for (auto &I : Symtab) { Symbol *Sym = I.second; - auto *Undef = dyn_cast<Undefined>(Sym->Body); + auto *Undef = dyn_cast<Undefined>(Sym->body()); if (!Undef) continue; + if (!Sym->IsUsedInRegularObj) + continue; StringRef Name = Undef->getName(); // A weak alias may have been resolved, so check for that. if (Defined *D = Undef->getWeakAlias()) { - if (Resolve) - Sym->Body = D; + // We resolve weak aliases by replacing the alias's SymbolBody with the + // target's SymbolBody. This causes all SymbolBody pointers referring to + // the old symbol to instead refer to the new symbol. However, we can't + // just blindly copy sizeof(Symbol::Body) bytes from D to Sym->Body + // because D may be an internal symbol, and internal symbols are stored as + // "unparented" SymbolBodies. For that reason we need to check which type + // of symbol we are dealing with and copy the correct number of bytes. + if (isa<DefinedRegular>(D)) + memcpy(Sym->Body.buffer, D, sizeof(DefinedRegular)); + else if (isa<DefinedAbsolute>(D)) + memcpy(Sym->Body.buffer, D, sizeof(DefinedAbsolute)); + else + // No other internal symbols are possible. + Sym->Body = D->symbol()->Body; continue; } // If we can resolve a symbol by removing __imp_ prefix, do that. // This odd rule is for compatibility with MSVC linker. if (Name.startswith("__imp_")) { Symbol *Imp = find(Name.substr(strlen("__imp_"))); - if (Imp && isa<Defined>(Imp->Body)) { - if (!Resolve) - continue; - auto *D = cast<Defined>(Imp->Body); - auto *S = new (Alloc) DefinedLocalImport(Name, D); - LocalImportChunks.push_back(S->getChunk()); - Sym->Body = S; + if (Imp && isa<Defined>(Imp->body())) { + auto *D = cast<Defined>(Imp->body()); + replaceBody<DefinedLocalImport>(Sym, Name, D); + LocalImportChunks.push_back( + cast<DefinedLocalImport>(Sym->body())->getChunk()); continue; } } // Remaining undefined symbols are not fatal if /force is specified. // They are replaced with dummy defined symbols. - if (Config->Force && Resolve) - Sym->Body = new (Alloc) DefinedAbsolute(Name, 0); - Undefs.insert(Sym->Body); + if (Config->Force) + replaceBody<DefinedAbsolute>(Sym, Name, 0); + Undefs.insert(Sym->body()); } if (Undefs.empty()) return; - for (Undefined *U : Config->GCRoot) - if (Undefs.count(U->repl())) - llvm::errs() << "<root>: undefined symbol: " << U->getName() << "\n"; - for (std::unique_ptr<InputFile> &File : Files) - if (!isa<ArchiveFile>(File.get())) - for (SymbolBody *Sym : File->getSymbols()) - if (Undefs.count(Sym->repl())) - llvm::errs() << File->getShortName() << ": undefined symbol: " - << Sym->getName() << "\n"; + for (SymbolBody *B : Config->GCRoot) + if (Undefs.count(B)) + errs() << "<root>: undefined symbol: " << B->getName() << "\n"; + for (ObjectFile *File : ObjectFiles) + for (SymbolBody *Sym : File->getSymbols()) + if (Undefs.count(Sym)) + errs() << toString(File) << ": undefined symbol: " << Sym->getName() + << "\n"; if (!Config->Force) fatal("link failed"); } -void SymbolTable::addLazy(Lazy *New, std::vector<Symbol *> *Accum) { - Symbol *Sym = insert(New); - if (Sym->Body == New) - return; - SymbolBody *Existing = Sym->Body; - if (isa<Defined>(Existing)) - return; - if (Lazy *L = dyn_cast<Lazy>(Existing)) - if (L->getFileIndex() < New->getFileIndex()) - return; - Sym->Body = New; - New->setBackref(Sym); - if (isa<Undefined>(Existing)) - Accum->push_back(Sym); +std::pair<Symbol *, bool> SymbolTable::insert(StringRef Name) { + Symbol *&Sym = Symtab[CachedHashStringRef(Name)]; + if (Sym) + return {Sym, false}; + Sym = make<Symbol>(); + Sym->IsUsedInRegularObj = false; + Sym->PendingArchiveLoad = false; + return {Sym, true}; } -void SymbolTable::addSymbol(SymbolBody *New) { - // Find an existing symbol or create and insert a new one. - assert(isa<Defined>(New) || isa<Undefined>(New)); - Symbol *Sym = insert(New); - if (Sym->Body == New) - return; - SymbolBody *Existing = Sym->Body; - - // If we have an undefined symbol and a lazy symbol, - // let the lazy symbol to read a member file. - if (auto *L = dyn_cast<Lazy>(Existing)) { - // Undefined symbols with weak aliases need not to be resolved, - // since they would be replaced with weak aliases if they remain - // undefined. - if (auto *U = dyn_cast<Undefined>(New)) { - if (!U->WeakAlias) { - addMemberFile(L); - return; - } +Symbol *SymbolTable::addUndefined(StringRef Name, InputFile *F, + bool IsWeakAlias) { + Symbol *S; + bool WasInserted; + std::tie(S, WasInserted) = insert(Name); + if (!F || !isa<BitcodeFile>(F)) + S->IsUsedInRegularObj = true; + if (WasInserted || (isa<Lazy>(S->body()) && IsWeakAlias)) { + replaceBody<Undefined>(S, Name); + return S; + } + if (auto *L = dyn_cast<Lazy>(S->body())) { + if (!S->PendingArchiveLoad) { + S->PendingArchiveLoad = true; + L->File->addMember(&L->Sym); } - Sym->Body = New; + } + return S; +} + +void SymbolTable::addLazy(ArchiveFile *F, const Archive::Symbol Sym) { + StringRef Name = Sym.getName(); + Symbol *S; + bool WasInserted; + std::tie(S, WasInserted) = insert(Name); + if (WasInserted) { + replaceBody<Lazy>(S, F, Sym); return; } + auto *U = dyn_cast<Undefined>(S->body()); + if (!U || U->WeakAlias || S->PendingArchiveLoad) + return; + S->PendingArchiveLoad = true; + F->addMember(&Sym); +} + +void SymbolTable::reportDuplicate(Symbol *Existing, InputFile *NewFile) { + fatal("duplicate symbol: " + toString(*Existing->body()) + " in " + + toString(Existing->body()->getFile()) + " and in " + + (NewFile ? toString(NewFile) : "(internal)")); +} - // compare() returns -1, 0, or 1 if the lhs symbol is less preferable, - // equivalent (conflicting), or more preferable, respectively. - int Comp = Existing->compare(New); - if (Comp == 0) - fatal("duplicate symbol: " + Existing->getDebugName() + " and " + - New->getDebugName()); - if (Comp < 0) - Sym->Body = New; +Symbol *SymbolTable::addAbsolute(StringRef N, COFFSymbolRef Sym) { + Symbol *S; + bool WasInserted; + std::tie(S, WasInserted) = insert(N); + S->IsUsedInRegularObj = true; + if (WasInserted || isa<Undefined>(S->body()) || isa<Lazy>(S->body())) + replaceBody<DefinedAbsolute>(S, N, Sym); + else if (!isa<DefinedCOFF>(S->body())) + reportDuplicate(S, nullptr); + return S; } -Symbol *SymbolTable::insert(SymbolBody *New) { - Symbol *&Sym = Symtab[New->getName()]; - if (Sym) { - New->setBackref(Sym); - return Sym; +Symbol *SymbolTable::addAbsolute(StringRef N, uint64_t VA) { + Symbol *S; + bool WasInserted; + std::tie(S, WasInserted) = insert(N); + S->IsUsedInRegularObj = true; + if (WasInserted || isa<Undefined>(S->body()) || isa<Lazy>(S->body())) + replaceBody<DefinedAbsolute>(S, N, VA); + else if (!isa<DefinedCOFF>(S->body())) + reportDuplicate(S, nullptr); + return S; +} + +Symbol *SymbolTable::addRelative(StringRef N, uint64_t VA) { + Symbol *S; + bool WasInserted; + std::tie(S, WasInserted) = insert(N); + S->IsUsedInRegularObj = true; + if (WasInserted || isa<Undefined>(S->body()) || isa<Lazy>(S->body())) + replaceBody<DefinedRelative>(S, N, VA); + else if (!isa<DefinedCOFF>(S->body())) + reportDuplicate(S, nullptr); + return S; +} + +Symbol *SymbolTable::addRegular(ObjectFile *F, COFFSymbolRef Sym, + SectionChunk *C) { + StringRef Name; + F->getCOFFObj()->getSymbolName(Sym, Name); + Symbol *S; + bool WasInserted; + std::tie(S, WasInserted) = insert(Name); + S->IsUsedInRegularObj = true; + if (WasInserted || isa<Undefined>(S->body()) || isa<Lazy>(S->body())) + replaceBody<DefinedRegular>(S, F, Sym, C); + else if (auto *R = dyn_cast<DefinedRegular>(S->body())) { + if (!C->isCOMDAT() || !R->isCOMDAT()) + reportDuplicate(S, F); + } else if (auto *B = dyn_cast<DefinedBitcode>(S->body())) { + if (B->IsReplaceable) + replaceBody<DefinedRegular>(S, F, Sym, C); + else if (!C->isCOMDAT()) + reportDuplicate(S, F); + } else + replaceBody<DefinedRegular>(S, F, Sym, C); + return S; +} + +Symbol *SymbolTable::addBitcode(BitcodeFile *F, StringRef N, bool IsReplaceable) { + Symbol *S; + bool WasInserted; + std::tie(S, WasInserted) = insert(N); + if (WasInserted || isa<Undefined>(S->body()) || isa<Lazy>(S->body())) { + replaceBody<DefinedBitcode>(S, F, N, IsReplaceable); + return S; } - Sym = new (Alloc) Symbol(New); - New->setBackref(Sym); - return Sym; + if (isa<DefinedCommon>(S->body())) + return S; + if (IsReplaceable) + if (isa<DefinedRegular>(S->body()) || isa<DefinedBitcode>(S->body())) + return S; + reportDuplicate(S, F); + return S; } -// Reads an archive member file pointed by a given symbol. -void SymbolTable::addMemberFile(Lazy *Body) { - std::unique_ptr<InputFile> File = Body->getMember(); +Symbol *SymbolTable::addCommon(ObjectFile *F, COFFSymbolRef Sym, + CommonChunk *C) { + StringRef Name; + F->getCOFFObj()->getSymbolName(Sym, Name); + Symbol *S; + bool WasInserted; + std::tie(S, WasInserted) = insert(Name); + S->IsUsedInRegularObj = true; + if (WasInserted || !isa<DefinedCOFF>(S->body())) + replaceBody<DefinedCommon>(S, F, Sym, C); + else if (auto *DC = dyn_cast<DefinedCommon>(S->body())) + if (Sym.getValue() > DC->getSize()) + replaceBody<DefinedCommon>(S, F, Sym, C); + return S; +} - // getMember returns an empty buffer if the member was already - // read from the library. - if (!File) - return; - if (Config->Verbose) - llvm::outs() << "Loaded " << File->getShortName() << " for " - << Body->getName() << "\n"; - addFile(std::move(File)); +Symbol *SymbolTable::addImportData(StringRef N, ImportFile *F) { + Symbol *S; + bool WasInserted; + std::tie(S, WasInserted) = insert(N); + S->IsUsedInRegularObj = true; + if (WasInserted || isa<Undefined>(S->body()) || isa<Lazy>(S->body())) + replaceBody<DefinedImportData>(S, N, F); + else if (!isa<DefinedCOFF>(S->body())) + reportDuplicate(S, nullptr); + return S; +} + +Symbol *SymbolTable::addImportThunk(StringRef Name, DefinedImportData *ID, + uint16_t Machine) { + Symbol *S; + bool WasInserted; + std::tie(S, WasInserted) = insert(Name); + S->IsUsedInRegularObj = true; + if (WasInserted || isa<Undefined>(S->body()) || isa<Lazy>(S->body())) + replaceBody<DefinedImportThunk>(S, Name, ID, Machine); + else if (!isa<DefinedCOFF>(S->body())) + reportDuplicate(S, nullptr); + return S; } std::vector<Chunk *> SymbolTable::getChunks() { @@ -252,7 +295,7 @@ std::vector<Chunk *> SymbolTable::getChunks() { } Symbol *SymbolTable::find(StringRef Name) { - auto It = Symtab.find(Name); + auto It = Symtab.find(CachedHashStringRef(Name)); if (It == Symtab.end()) return nullptr; return It->second; @@ -266,7 +309,7 @@ Symbol *SymbolTable::findUnderscore(StringRef Name) { StringRef SymbolTable::findByPrefix(StringRef Prefix) { for (auto Pair : Symtab) { - StringRef Name = Pair.first; + StringRef Name = Pair.first.val(); if (Name.startswith(Prefix)) return Name; } @@ -275,7 +318,7 @@ StringRef SymbolTable::findByPrefix(StringRef Prefix) { StringRef SymbolTable::findMangle(StringRef Name) { if (Symbol *Sym = find(Name)) - if (!isa<Undefined>(Sym->Body)) + if (!isa<Undefined>(Sym->body())) return Name; if (Config->Machine != I386) return findByPrefix(("?" + Name + "@@Y").str()); @@ -289,39 +332,22 @@ StringRef SymbolTable::findMangle(StringRef Name) { return findByPrefix(("?" + Name.substr(1) + "@@Y").str()); } -void SymbolTable::mangleMaybe(Undefined *U) { - if (U->WeakAlias) - return; - if (!isa<Undefined>(U->repl())) +void SymbolTable::mangleMaybe(SymbolBody *B) { + auto *U = dyn_cast<Undefined>(B); + if (!U || U->WeakAlias) return; StringRef Alias = findMangle(U->getName()); if (!Alias.empty()) U->WeakAlias = addUndefined(Alias); } -Undefined *SymbolTable::addUndefined(StringRef Name) { - auto *New = new (Alloc) Undefined(Name); - addSymbol(New); - if (auto *U = dyn_cast<Undefined>(New->repl())) - return U; - return New; -} - -DefinedRelative *SymbolTable::addRelative(StringRef Name, uint64_t VA) { - auto *New = new (Alloc) DefinedRelative(Name, VA); - addSymbol(New); - return New; -} - -DefinedAbsolute *SymbolTable::addAbsolute(StringRef Name, uint64_t VA) { - auto *New = new (Alloc) DefinedAbsolute(Name, VA); - addSymbol(New); - return New; +SymbolBody *SymbolTable::addUndefined(StringRef Name) { + return addUndefined(Name, nullptr, false)->body(); } void SymbolTable::printMap(llvm::raw_ostream &OS) { for (ObjectFile *File : ObjectFiles) { - OS << File->getShortName() << ":\n"; + OS << toString(File) << ":\n"; for (SymbolBody *Body : File->getSymbols()) if (auto *R = dyn_cast<DefinedRegular>(Body)) if (R->getChunk()->isLive()) @@ -330,84 +356,32 @@ void SymbolTable::printMap(llvm::raw_ostream &OS) { } } -void SymbolTable::addCombinedLTOObject(ObjectFile *Obj) { - for (SymbolBody *Body : Obj->getSymbols()) { - if (!Body->isExternal()) - continue; - // We should not see any new undefined symbols at this point, but we'll - // diagnose them later in reportRemainingUndefines(). - StringRef Name = Body->getName(); - Symbol *Sym = insert(Body); - SymbolBody *Existing = Sym->Body; - - if (Existing == Body) - continue; - - if (isa<DefinedBitcode>(Existing)) { - Sym->Body = Body; - continue; - } - if (auto *L = dyn_cast<Lazy>(Existing)) { - // We may see new references to runtime library symbols such as __chkstk - // here. These symbols must be wholly defined in non-bitcode files. - addMemberFile(L); - continue; - } - - int Comp = Existing->compare(Body); - if (Comp == 0) - fatal("LTO: unexpected duplicate symbol: " + Name); - if (Comp < 0) - Sym->Body = Body; - } -} - void SymbolTable::addCombinedLTOObjects() { if (BitcodeFiles.empty()) return; - // Diagnose any undefined symbols early, but do not resolve weak externals, - // as resolution breaks the invariant that each Symbol points to a unique - // SymbolBody, which we rely on to replace DefinedBitcode symbols correctly. - reportRemainingUndefines(/*Resolve=*/false); - // Create an object file and add it to the symbol table by replacing any // DefinedBitcode symbols with the definitions in the object file. LTOCodeGenerator CG(BitcodeFile::Context); CG.setOptLevel(Config->LTOOptLevel); - std::vector<ObjectFile *> Objs = createLTOObjects(&CG); - - for (ObjectFile *Obj : Objs) - addCombinedLTOObject(Obj); - - size_t NumBitcodeFiles = BitcodeFiles.size(); - run(); - if (BitcodeFiles.size() != NumBitcodeFiles) - fatal("LTO: late loaded symbol created new bitcode reference"); + for (ObjectFile *Obj : createLTOObjects(&CG)) + Obj->parse(); } // Combine and compile bitcode files and then return the result // as a vector of regular COFF object files. std::vector<ObjectFile *> SymbolTable::createLTOObjects(LTOCodeGenerator *CG) { - // All symbols referenced by non-bitcode objects must be preserved. - for (ObjectFile *File : ObjectFiles) - for (SymbolBody *Body : File->getSymbols()) - if (auto *S = dyn_cast<DefinedBitcode>(Body->repl())) - CG->addMustPreserveSymbol(S->getName()); - - // Likewise for bitcode symbols which we initially resolved to non-bitcode. + // All symbols referenced by non-bitcode objects, including GC roots, must be + // preserved. We must also replace bitcode symbols with undefined symbols so + // that they may be replaced with real definitions without conflicting. for (BitcodeFile *File : BitcodeFiles) - for (SymbolBody *Body : File->getSymbols()) - if (isa<DefinedBitcode>(Body) && !isa<DefinedBitcode>(Body->repl())) + for (SymbolBody *Body : File->getSymbols()) { + if (!isa<DefinedBitcode>(Body)) + continue; + if (Body->symbol()->IsUsedInRegularObj) CG->addMustPreserveSymbol(Body->getName()); - - // Likewise for other symbols that must be preserved. - for (Undefined *U : Config->GCRoot) { - if (auto *S = dyn_cast<DefinedBitcode>(U->repl())) - CG->addMustPreserveSymbol(S->getName()); - else if (auto *S = dyn_cast_or_null<DefinedBitcode>(U->getWeakAlias())) - CG->addMustPreserveSymbol(S->getName()); - } + replaceBody<Undefined>(Body->symbol(), Body->getName()); + } CG->setModule(BitcodeFiles[0]->takeModule()); for (unsigned I = 1, E = BitcodeFiles.size(); I != E; ++I) @@ -434,10 +408,8 @@ std::vector<ObjectFile *> SymbolTable::createLTOObjects(LTOCodeGenerator *CG) { std::vector<ObjectFile *> ObjFiles; for (SmallString<0> &Obj : Objs) { - auto *ObjFile = new ObjectFile(MemoryBufferRef(Obj, "<LTO object>")); - Files.emplace_back(ObjFile); + auto *ObjFile = make<ObjectFile>(MemoryBufferRef(Obj, "<LTO object>")); ObjectFiles.push_back(ObjFile); - ObjFile->parse(); ObjFiles.push_back(ObjFile); } diff --git a/COFF/SymbolTable.h b/COFF/SymbolTable.h index 8bf4387cdfff..703821f2e124 100644 --- a/COFF/SymbolTable.h +++ b/COFF/SymbolTable.h @@ -11,18 +11,12 @@ #define LLD_COFF_SYMBOL_TABLE_H #include "InputFiles.h" +#include "llvm/ADT/CachedHashString.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseMapInfo.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/raw_ostream.h" -#ifdef _MSC_VER -// <future> depends on <eh.h> for __uncaught_exception. -#include <eh.h> -#endif - -#include <future> - namespace llvm { struct LTOCodeGenerator; } @@ -31,8 +25,12 @@ namespace lld { namespace coff { class Chunk; +class CommonChunk; class Defined; +class DefinedAbsolute; +class DefinedRelative; class Lazy; +class SectionChunk; class SymbolBody; struct Symbol; @@ -45,18 +43,17 @@ struct Symbol; // conflicts. For example, obviously, a defined symbol is better than // an undefined symbol. Or, if there's a conflict between a lazy and a // undefined, it'll read an archive member to read a real definition -// to replace the lazy symbol. The logic is implemented in resolve(). +// to replace the lazy symbol. The logic is implemented in the +// add*() functions, which are called by input files as they are parsed. +// There is one add* function per symbol type. class SymbolTable { public: - void addFile(std::unique_ptr<InputFile> File); - std::vector<std::unique_ptr<InputFile>> &getFiles() { return Files; } - void step(); - void run(); - bool queueEmpty(); + void addFile(InputFile *File); - // Print an error message on undefined symbols. If Resolve is true, try to - // resolve any undefined symbols and update the symbol table accordingly. - void reportRemainingUndefines(bool Resolve); + // Try to resolve any undefined symbols and update the symbol table + // accordingly, then print an error message for any remaining undefined + // symbols. + void reportRemainingUndefines(); // Returns a list of chunks of selected symbols. std::vector<Chunk *> getChunks(); @@ -69,7 +66,7 @@ public: // mangled symbol. This function tries to find a mangled name // for U from the symbol table, and if found, set the symbol as // a weak alias for U. - void mangleMaybe(Undefined *U); + void mangleMaybe(SymbolBody *B); StringRef findMangle(StringRef Name); // Print a layout map to OS. @@ -88,37 +85,44 @@ public: std::vector<ObjectFile *> ObjectFiles; // Creates an Undefined symbol for a given name. - Undefined *addUndefined(StringRef Name); - DefinedRelative *addRelative(StringRef Name, uint64_t VA); - DefinedAbsolute *addAbsolute(StringRef Name, uint64_t VA); + SymbolBody *addUndefined(StringRef Name); + + Symbol *addRelative(StringRef N, uint64_t VA); + Symbol *addAbsolute(StringRef N, uint64_t VA); + + Symbol *addUndefined(StringRef Name, InputFile *F, bool IsWeakAlias); + void addLazy(ArchiveFile *F, const Archive::Symbol Sym); + Symbol *addAbsolute(StringRef N, COFFSymbolRef S); + Symbol *addRegular(ObjectFile *F, COFFSymbolRef S, SectionChunk *C); + Symbol *addBitcode(BitcodeFile *F, StringRef N, bool IsReplaceable); + Symbol *addCommon(ObjectFile *F, COFFSymbolRef S, CommonChunk *C); + Symbol *addImportData(StringRef N, ImportFile *F); + Symbol *addImportThunk(StringRef Name, DefinedImportData *S, + uint16_t Machine); + + void reportDuplicate(Symbol *Existing, InputFile *NewFile); // A list of chunks which to be added to .rdata. std::vector<Chunk *> LocalImportChunks; private: - void readArchives(); + void readArchive(); void readObjects(); - void addSymbol(SymbolBody *New); - void addLazy(Lazy *New, std::vector<Symbol *> *Accum); - Symbol *insert(SymbolBody *New); + std::pair<Symbol *, bool> insert(StringRef Name); StringRef findByPrefix(StringRef Prefix); - void addMemberFile(Lazy *Body); void addCombinedLTOObject(ObjectFile *Obj); std::vector<ObjectFile *> createLTOObjects(llvm::LTOCodeGenerator *CG); - llvm::DenseMap<StringRef, Symbol *> Symtab; - - std::vector<std::unique_ptr<InputFile>> Files; - std::vector<std::future<ArchiveFile *>> ArchiveQueue; - std::vector<std::future<InputFile *>> ObjectQueue; + llvm::DenseMap<llvm::CachedHashStringRef, Symbol *> Symtab; std::vector<BitcodeFile *> BitcodeFiles; std::vector<SmallString<0>> Objs; - llvm::BumpPtrAllocator Alloc; }; +extern SymbolTable *Symtab; + } // namespace coff } // namespace lld diff --git a/COFF/Symbols.cpp b/COFF/Symbols.cpp index 6e2db6631ce7..6de85d581f49 100644 --- a/COFF/Symbols.cpp +++ b/COFF/Symbols.cpp @@ -7,16 +7,17 @@ // //===----------------------------------------------------------------------===// +#include "Symbols.h" #include "Error.h" #include "InputFiles.h" -#include "Symbols.h" +#include "Memory.h" +#include "Strings.h" #include "llvm/ADT/STLExtras.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +using namespace llvm; using namespace llvm::object; -using llvm::sys::fs::identify_magic; -using llvm::sys::fs::file_magic; namespace lld { namespace coff { @@ -36,130 +37,14 @@ StringRef SymbolBody::getName() { return Name; } -// Returns 1, 0 or -1 if this symbol should take precedence -// over the Other, tie or lose, respectively. -int SymbolBody::compare(SymbolBody *Other) { - Kind LK = kind(), RK = Other->kind(); - - // Normalize so that the smaller kind is on the left. - if (LK > RK) - return -Other->compare(this); - - // First handle comparisons between two different kinds. - if (LK != RK) { - if (RK > LastDefinedKind) { - if (LK == LazyKind && cast<Undefined>(Other)->WeakAlias) - return -1; - - // The LHS is either defined or lazy and so it wins. - assert((LK <= LastDefinedKind || LK == LazyKind) && "Bad kind!"); - return 1; - } - - // Bitcode has special complexities. - if (RK == DefinedBitcodeKind) { - auto *RHS = cast<DefinedBitcode>(Other); - - switch (LK) { - case DefinedCommonKind: - return 1; - - case DefinedRegularKind: - // As an approximation, regular symbols win over bitcode symbols, - // but we definitely have a conflict if the regular symbol is not - // replaceable and neither is the bitcode symbol. We do not - // replicate the rest of the symbol resolution logic here; symbol - // resolution will be done accurately after lowering bitcode symbols - // to regular symbols in addCombinedLTOObject(). - if (cast<DefinedRegular>(this)->isCOMDAT() || RHS->IsReplaceable) - return 1; - - // Fallthrough to the default of a tie otherwise. - default: - return 0; - } - } - - // Either of the object file kind will trump a higher kind. - if (LK <= LastDefinedCOFFKind) - return 1; - - // The remaining kind pairs are ties amongst defined symbols. - return 0; - } - - // Now handle the case where the kinds are the same. - switch (LK) { - case DefinedRegularKind: { - auto *LHS = cast<DefinedRegular>(this); - auto *RHS = cast<DefinedRegular>(Other); - if (LHS->isCOMDAT() && RHS->isCOMDAT()) - return LHS->getFileIndex() < RHS->getFileIndex() ? 1 : -1; - return 0; - } - - case DefinedCommonKind: { - auto *LHS = cast<DefinedCommon>(this); - auto *RHS = cast<DefinedCommon>(Other); - if (LHS->getSize() == RHS->getSize()) - return LHS->getFileIndex() < RHS->getFileIndex() ? 1 : -1; - return LHS->getSize() > RHS->getSize() ? 1 : -1; - } - - case DefinedBitcodeKind: { - auto *LHS = cast<DefinedBitcode>(this); - auto *RHS = cast<DefinedBitcode>(Other); - // If both are non-replaceable, we have a tie. - if (!LHS->IsReplaceable && !RHS->IsReplaceable) - return 0; - - // Non-replaceable symbols win, but even two replaceable symboles don't - // tie. If both symbols are replaceable, choice is arbitrary. - if (RHS->IsReplaceable && LHS->IsReplaceable) - return uintptr_t(LHS) < uintptr_t(RHS) ? 1 : -1; - return LHS->IsReplaceable ? -1 : 1; - } - - case LazyKind: { - // Don't tie, pick the earliest. - auto *LHS = cast<Lazy>(this); - auto *RHS = cast<Lazy>(Other); - return LHS->getFileIndex() < RHS->getFileIndex() ? 1 : -1; - } - - case UndefinedKind: { - auto *LHS = cast<Undefined>(this); - auto *RHS = cast<Undefined>(Other); - // Tie if both undefined symbols have different weak aliases. - if (LHS->WeakAlias && RHS->WeakAlias) { - if (LHS->WeakAlias->getName() != RHS->WeakAlias->getName()) - return 0; - return uintptr_t(LHS) < uintptr_t(RHS) ? 1 : -1; - } - return LHS->WeakAlias ? 1 : -1; - } - - case DefinedLocalImportKind: - case DefinedImportThunkKind: - case DefinedImportDataKind: - case DefinedAbsoluteKind: - case DefinedRelativeKind: - // These all simply tie. - return 0; - } - llvm_unreachable("unknown symbol kind"); -} - -std::string SymbolBody::getDebugName() { - std::string N = getName().str(); - if (auto *D = dyn_cast<DefinedCOFF>(this)) { - N += " "; - N += D->File->getShortName(); - } else if (auto *D = dyn_cast<DefinedBitcode>(this)) { - N += " "; - N += D->File->getShortName(); - } - return N; +InputFile *SymbolBody::getFile() { + if (auto *Sym = dyn_cast<DefinedCOFF>(this)) + return Sym->File; + if (auto *Sym = dyn_cast<DefinedBitcode>(this)) + return Sym->File; + if (auto *Sym = dyn_cast<Lazy>(this)) + return Sym->File; + return nullptr; } COFFSymbolRef DefinedCOFF::getCOFFSymbol() { @@ -174,44 +59,27 @@ DefinedImportThunk::DefinedImportThunk(StringRef Name, DefinedImportData *S, uint16_t Machine) : Defined(DefinedImportThunkKind, Name) { switch (Machine) { - case AMD64: Data.reset(new ImportThunkChunkX64(S)); return; - case I386: Data.reset(new ImportThunkChunkX86(S)); return; - case ARMNT: Data.reset(new ImportThunkChunkARM(S)); return; + case AMD64: Data = make<ImportThunkChunkX64>(S); return; + case I386: Data = make<ImportThunkChunkX86>(S); return; + case ARMNT: Data = make<ImportThunkChunkARM>(S); return; default: llvm_unreachable("unknown machine type"); } } -std::unique_ptr<InputFile> Lazy::getMember() { - MemoryBufferRef MBRef = File->getMember(&Sym); - - // getMember returns an empty buffer if the member was already - // read from the library. - if (MBRef.getBuffer().empty()) - return std::unique_ptr<InputFile>(nullptr); - - file_magic Magic = identify_magic(MBRef.getBuffer()); - if (Magic == file_magic::coff_import_library) - return std::unique_ptr<InputFile>(new ImportFile(MBRef)); - - std::unique_ptr<InputFile> Obj; - if (Magic == file_magic::coff_object) - Obj.reset(new ObjectFile(MBRef)); - else if (Magic == file_magic::bitcode) - Obj.reset(new BitcodeFile(MBRef)); - else - fatal("unknown file type: " + File->getName()); - - Obj->setParentName(File->getName()); - return Obj; -} - Defined *Undefined::getWeakAlias() { // A weak alias may be a weak alias to another symbol, so check recursively. for (SymbolBody *A = WeakAlias; A; A = cast<Undefined>(A)->WeakAlias) - if (auto *D = dyn_cast<Defined>(A->repl())) + if (auto *D = dyn_cast<Defined>(A)) return D; return nullptr; } +// Returns a symbol name for an error message. +std::string toString(SymbolBody &B) { + if (Optional<std::string> S = demangle(B.getName())) + return ("\"" + *S + "\" (" + B.getName() + ")").str(); + return B.getName(); +} + } // namespace coff } // namespace lld diff --git a/COFF/Symbols.h b/COFF/Symbols.h index f96c1fb3cc1d..bc9ad4aa8aff 100644 --- a/COFF/Symbols.h +++ b/COFF/Symbols.h @@ -12,6 +12,7 @@ #include "Chunks.h" #include "Config.h" +#include "Memory.h" #include "lld/Core/LLVM.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/Object/Archive.h" @@ -32,15 +33,8 @@ class ArchiveFile; class BitcodeFile; class InputFile; class ObjectFile; -class SymbolBody; - -// A real symbol object, SymbolBody, is usually accessed indirectly -// through a Symbol. There's always one Symbol for each symbol name. -// The resolver updates SymbolBody pointers as it resolves symbols. -struct Symbol { - explicit Symbol(SymbolBody *P) : Body(P) {} - SymbolBody *Body; -}; +struct Symbol; +class SymbolTable; // The base class for real symbol classes. class SymbolBody { @@ -75,28 +69,19 @@ public: // Returns the symbol name. StringRef getName(); - // A SymbolBody has a backreference to a Symbol. Originally they are - // doubly-linked. A backreference will never change. But the pointer - // in the Symbol may be mutated by the resolver. If you have a - // pointer P to a SymbolBody and are not sure whether the resolver - // has chosen the object among other objects having the same name, - // you can access P->Backref->Body to get the resolver's result. - void setBackref(Symbol *P) { Backref = P; } - SymbolBody *repl() { return Backref ? Backref->Body : this; } - - // Decides which symbol should "win" in the symbol table, this or - // the Other. Returns 1 if this wins, -1 if the Other wins, or 0 if - // they are duplicate (conflicting) symbols. - int compare(SymbolBody *Other); + // Returns the file from which this symbol was created. + InputFile *getFile(); - // Returns a name of this symbol including source file name. - // Used only for debugging and logging. - std::string getDebugName(); + Symbol *symbol(); + const Symbol *symbol() const { + return const_cast<SymbolBody *>(this)->symbol(); + } protected: + friend SymbolTable; explicit SymbolBody(Kind K, StringRef N = "") : SymbolKind(K), IsExternal(true), IsCOMDAT(false), - IsReplaceable(false), Name(N) {} + IsReplaceable(false), WrittenToSymtab(false), Name(N) {} const unsigned SymbolKind : 8; unsigned IsExternal : 1; @@ -107,8 +92,12 @@ protected: // This bit is used by the \c DefinedBitcode subclass. unsigned IsReplaceable : 1; +public: + // This bit is used by Writer::createSymbolAndStringTable(). + unsigned WrittenToSymtab : 1; + +protected: StringRef Name; - Symbol *Backref = nullptr; }; // The base class for any defined symbols, including absolute symbols, @@ -149,12 +138,13 @@ public: return S->kind() <= LastDefinedCOFFKind; } - int getFileIndex() { return File->Index; } + ObjectFile *getFile() { return File; } COFFSymbolRef getCOFFSymbol(); -protected: ObjectFile *File; + +protected: const coff_symbol_generic *Sym; }; @@ -194,7 +184,7 @@ public: uint64_t getRVA() { return Data->getRVA(); } private: - friend SymbolBody; + friend SymbolTable; uint64_t getSize() { return Sym->Value; } CommonChunk *Data; }; @@ -253,14 +243,12 @@ public: static bool classof(const SymbolBody *S) { return S->kind() == LazyKind; } - // Returns an object file for this symbol, or a nullptr if the file - // was already returned. - std::unique_ptr<InputFile> getMember(); + ArchiveFile *File; - int getFileIndex() { return File->Index; } +private: + friend SymbolTable; private: - ArchiveFile *File; const Archive::Symbol Sym; }; @@ -293,26 +281,22 @@ public: // table in an output. The former has "__imp_" prefix. class DefinedImportData : public Defined { public: - DefinedImportData(StringRef D, StringRef N, StringRef E, - const coff_import_header *H) - : Defined(DefinedImportDataKind, N), DLLName(D), ExternalName(E), Hdr(H) { + DefinedImportData(StringRef N, ImportFile *F) + : Defined(DefinedImportDataKind, N), File(F) { } static bool classof(const SymbolBody *S) { return S->kind() == DefinedImportDataKind; } - uint64_t getRVA() { return Location->getRVA(); } - StringRef getDLLName() { return DLLName; } - StringRef getExternalName() { return ExternalName; } - void setLocation(Chunk *AddressTable) { Location = AddressTable; } - uint16_t getOrdinal() { return Hdr->OrdinalHint; } + uint64_t getRVA() { return File->Location->getRVA(); } + StringRef getDLLName() { return File->DLLName; } + StringRef getExternalName() { return File->ExternalName; } + void setLocation(Chunk *AddressTable) { File->Location = AddressTable; } + uint16_t getOrdinal() { return File->Hdr->OrdinalHint; } private: - StringRef DLLName; - StringRef ExternalName; - const coff_import_header *Hdr; - Chunk *Location = nullptr; + ImportFile *File; }; // This class represents a symbol for a jump table entry which jumps @@ -329,10 +313,10 @@ public: } uint64_t getRVA() { return Data->getRVA(); } - Chunk *getChunk() { return Data.get(); } + Chunk *getChunk() { return Data; } private: - std::unique_ptr<Chunk> Data; + Chunk *Data; }; // If you have a symbol "__imp_foo" in your object file, a symbol name @@ -343,17 +327,17 @@ private: class DefinedLocalImport : public Defined { public: DefinedLocalImport(StringRef N, Defined *S) - : Defined(DefinedLocalImportKind, N), Data(S) {} + : Defined(DefinedLocalImportKind, N), Data(make<LocalImportChunk>(S)) {} static bool classof(const SymbolBody *S) { return S->kind() == DefinedLocalImportKind; } - uint64_t getRVA() { return Data.getRVA(); } - Chunk *getChunk() { return &Data; } + uint64_t getRVA() { return Data->getRVA(); } + Chunk *getChunk() { return Data; } private: - LocalImportChunk Data; + LocalImportChunk *Data; }; class DefinedBitcode : public Defined { @@ -361,6 +345,11 @@ class DefinedBitcode : public Defined { public: DefinedBitcode(BitcodeFile *F, StringRef N, bool IsReplaceable) : Defined(DefinedBitcodeKind, N), File(F) { + // IsReplaceable tracks whether the bitcode symbol may be replaced with some + // other (defined, common or bitcode) symbol. This is the case for common, + // comdat and weak external symbols. We try to replace bitcode symbols with + // "real" symbols (see SymbolTable::add{Regular,Bitcode}), and resolve the + // result against the real symbol from the combined LTO object. this->IsReplaceable = IsReplaceable; } @@ -368,7 +357,6 @@ public: return S->kind() == DefinedBitcodeKind; } -private: BitcodeFile *File; }; @@ -397,6 +385,52 @@ inline uint64_t Defined::getRVA() { llvm_unreachable("unknown symbol kind"); } +// A real symbol object, SymbolBody, is usually stored within a Symbol. There's +// always one Symbol for each symbol name. The resolver updates the SymbolBody +// stored in the Body field of this object as it resolves symbols. Symbol also +// holds computed properties of symbol names. +struct Symbol { + // True if this symbol was referenced by a regular (non-bitcode) object. + unsigned IsUsedInRegularObj : 1; + + // True if we've seen both a lazy and an undefined symbol with this symbol + // name, which means that we have enqueued an archive member load and should + // not load any more archive members to resolve the same symbol. + unsigned PendingArchiveLoad : 1; + + // This field is used to store the Symbol's SymbolBody. This instantiation of + // AlignedCharArrayUnion gives us a struct with a char array field that is + // large and aligned enough to store any derived class of SymbolBody. + llvm::AlignedCharArrayUnion<DefinedRegular, DefinedCommon, DefinedAbsolute, + DefinedRelative, Lazy, Undefined, + DefinedImportData, DefinedImportThunk, + DefinedLocalImport, DefinedBitcode> + Body; + + SymbolBody *body() { + return reinterpret_cast<SymbolBody *>(Body.buffer); + } + const SymbolBody *body() const { return const_cast<Symbol *>(this)->body(); } +}; + +template <typename T, typename... ArgT> +void replaceBody(Symbol *S, ArgT &&... Arg) { + static_assert(sizeof(T) <= sizeof(S->Body), "Body too small"); + static_assert(alignof(T) <= alignof(decltype(S->Body)), + "Body not aligned enough"); + assert(static_cast<SymbolBody *>(static_cast<T *>(nullptr)) == nullptr && + "Not a SymbolBody"); + new (S->Body.buffer) T(std::forward<ArgT>(Arg)...); +} + +inline Symbol *SymbolBody::symbol() { + assert(isExternal()); + return reinterpret_cast<Symbol *>(reinterpret_cast<char *>(this) - + offsetof(Symbol, Body)); +} + +std::string toString(SymbolBody &B); + } // namespace coff } // namespace lld diff --git a/COFF/Writer.cpp b/COFF/Writer.cpp index d8077df95701..3e69aebbb424 100644 --- a/COFF/Writer.cpp +++ b/COFF/Writer.cpp @@ -7,13 +7,15 @@ // //===----------------------------------------------------------------------===// +#include "Writer.h" #include "Config.h" #include "DLL.h" #include "Error.h" #include "InputFiles.h" +#include "Memory.h" +#include "PDB.h" #include "SymbolTable.h" #include "Symbols.h" -#include "Writer.h" #include "lld/Core/Parallel.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/STLExtras.h" @@ -21,6 +23,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/Endian.h" #include "llvm/Support/FileOutputBuffer.h" +#include "llvm/Support/RandomNumberGenerator.h" #include "llvm/Support/raw_ostream.h" #include <algorithm> #include <cstdio> @@ -42,6 +45,61 @@ static const int DOSStubSize = 64; static const int NumberfOfDataDirectory = 16; namespace { + +class DebugDirectoryChunk : public Chunk { +public: + DebugDirectoryChunk(const std::vector<std::unique_ptr<Chunk>> &R) + : Records(R) {} + + size_t getSize() const override { + return Records.size() * sizeof(debug_directory); + } + + void writeTo(uint8_t *B) const override { + auto *D = reinterpret_cast<debug_directory *>(B + OutputSectionOff); + + for (const std::unique_ptr<Chunk> &Record : Records) { + D->Characteristics = 0; + D->TimeDateStamp = 0; + D->MajorVersion = 0; + D->MinorVersion = 0; + D->Type = COFF::IMAGE_DEBUG_TYPE_CODEVIEW; + D->SizeOfData = Record->getSize(); + D->AddressOfRawData = Record->getRVA(); + // TODO(compnerd) get the file offset + D->PointerToRawData = 0; + + ++D; + } + } + +private: + const std::vector<std::unique_ptr<Chunk>> &Records; +}; + +class CVDebugRecordChunk : public Chunk { + size_t getSize() const override { + return sizeof(codeview::DebugInfo) + Config->PDBPath.size() + 1; + } + + void writeTo(uint8_t *B) const override { + // Save off the DebugInfo entry to backfill the file signature (build id) + // in Writer::writeBuildId + DI = reinterpret_cast<codeview::DebugInfo *>(B + OutputSectionOff); + + DI->Signature.CVSignature = OMF::Signature::PDB70; + + // variable sized field (PDB Path) + auto *P = reinterpret_cast<char *>(B + OutputSectionOff + sizeof(*DI)); + if (!Config->PDBPath.empty()) + memcpy(P, Config->PDBPath.data(), Config->PDBPath.size()); + P[Config->PDBPath.size()] = '\0'; + } + +public: + mutable codeview::DebugInfo *DI = nullptr; +}; + // The writer writes a SymbolTable result to a file. class Writer { public: @@ -62,6 +120,7 @@ private: void setSectionPermissions(); void writeSections(); void sortExceptionTable(); + void writeBuildId(); void applyRelocations(); llvm::Optional<coff_symbol16> createSymbol(Defined *D); @@ -76,9 +135,7 @@ private: std::map<StringRef, std::vector<DefinedImportData *>> binImports(); SymbolTable *Symtab; - std::unique_ptr<llvm::FileOutputBuffer> Buffer; - llvm::SpecificBumpPtrAllocator<OutputSection> CAlloc; - llvm::SpecificBumpPtrAllocator<BaserelChunk> BAlloc; + std::unique_ptr<FileOutputBuffer> Buffer; std::vector<OutputSection *> OutputSections; std::vector<char> Strtab; std::vector<llvm::object::coff_symbol16> OutputSymtab; @@ -87,6 +144,11 @@ private: EdataContents Edata; std::unique_ptr<SEHTableChunk> SEHTable; + std::unique_ptr<Chunk> DebugDirectory; + std::vector<std::unique_ptr<Chunk>> DebugRecords; + CVDebugRecordChunk *BuildId = nullptr; + ArrayRef<uint8_t> SectionTable; + uint64_t FileSize; uint32_t PointerToSymbolTable = 0; uint64_t SizeOfImage; @@ -239,6 +301,11 @@ void Writer::run() { fixSafeSEHSymbols(); writeSections(); sortExceptionTable(); + writeBuildId(); + + if (!Config->PDBPath.empty()) + createPDB(Config->PDBPath, Symtab, SectionTable); + if (auto EC = Buffer->commit()) fatal(EC, "failed to write the output file"); } @@ -274,7 +341,7 @@ void Writer::createSections() { StringRef Name = getOutputSection(Pair.first); OutputSection *&Sec = Sections[Name]; if (!Sec) { - Sec = new (CAlloc.Allocate()) OutputSection(Name); + Sec = make<OutputSection>(Name); OutputSections.push_back(Sec); } std::vector<Chunk *> &Chunks = Pair.second; @@ -286,25 +353,46 @@ void Writer::createSections() { } void Writer::createMiscChunks() { + OutputSection *RData = createSection(".rdata"); + // Create thunks for locally-dllimported symbols. if (!Symtab->LocalImportChunks.empty()) { - OutputSection *Sec = createSection(".rdata"); for (Chunk *C : Symtab->LocalImportChunks) - Sec->addChunk(C); + RData->addChunk(C); + } + + // Create Debug Information Chunks + if (Config->Debug) { + DebugDirectory = llvm::make_unique<DebugDirectoryChunk>(DebugRecords); + + // TODO(compnerd) create a coffgrp entry if DebugType::CV is not enabled + if (Config->DebugTypes & static_cast<unsigned>(coff::DebugType::CV)) { + auto Chunk = llvm::make_unique<CVDebugRecordChunk>(); + + BuildId = Chunk.get(); + DebugRecords.push_back(std::move(Chunk)); + } + + RData->addChunk(DebugDirectory.get()); + for (const std::unique_ptr<Chunk> &C : DebugRecords) + RData->addChunk(C.get()); } // Create SEH table. x86-only. if (Config->Machine != I386) return; + std::set<Defined *> Handlers; + for (lld::coff::ObjectFile *File : Symtab->ObjectFiles) { if (!File->SEHCompat) return; for (SymbolBody *B : File->SEHandlers) - Handlers.insert(cast<Defined>(B->repl())); + Handlers.insert(cast<Defined>(B)); } + SEHTable.reset(new SEHTableChunk(Handlers)); - createSection(".rdata")->addChunk(SEHTable.get()); + RData->addChunk(SEHTable.get()); } // Create .idata section for the DLL-imported symbol table. @@ -340,7 +428,7 @@ void Writer::createImportTables() { Sec->addChunk(C); } if (!DelayIdata.empty()) { - Defined *Helper = cast<Defined>(Config->DelayLoadHelper->repl()); + Defined *Helper = cast<Defined>(Config->DelayLoadHelper); DelayIdata.create(Helper); OutputSection *Sec = createSection(".didat"); for (Chunk *C : DelayIdata.getChunks()) @@ -383,6 +471,10 @@ size_t Writer::addEntryToStringTable(StringRef Str) { } Optional<coff_symbol16> Writer::createSymbol(Defined *Def) { + // Relative symbols are unrepresentable in a COFF symbol table. + if (isa<DefinedRelative>(Def)) + return None; + if (auto *D = dyn_cast<DefinedRegular>(Def)) if (!D->getChunk()->isLive()) return None; @@ -409,7 +501,6 @@ Optional<coff_symbol16> Writer::createSymbol(Defined *Def) { switch (Def->kind()) { case SymbolBody::DefinedAbsoluteKind: - case SymbolBody::DefinedRelativeKind: Sym.Value = Def->getRVA(); Sym.SectionNumber = IMAGE_SYM_ABSOLUTE; break; @@ -445,13 +536,11 @@ void Writer::createSymbolAndStringTable() { for (lld::coff::ObjectFile *File : Symtab->ObjectFiles) for (SymbolBody *B : File->getSymbols()) if (auto *D = dyn_cast<Defined>(B)) - if (Optional<coff_symbol16> Sym = createSymbol(D)) - OutputSymtab.push_back(*Sym); - - for (ImportFile *File : Symtab->ImportFiles) - for (SymbolBody *B : File->getSymbols()) - if (Optional<coff_symbol16> Sym = createSymbol(cast<Defined>(B))) - OutputSymtab.push_back(*Sym); + if (!D->WrittenToSymtab) { + D->WrittenToSymtab = true; + if (Optional<coff_symbol16> Sym = createSymbol(D)) + OutputSymtab.push_back(*Sym); + } OutputSection *LastSection = OutputSections.back(); // We position the symbol table to be adjacent to the end of the last section. @@ -542,7 +631,7 @@ template <typename PEHeaderTy> void Writer::writeHeader() { PE->SizeOfImage = SizeOfImage; PE->SizeOfHeaders = SizeOfHeaders; if (!Config->NoEntry) { - Defined *Entry = cast<Defined>(Config->Entry->repl()); + Defined *Entry = cast<Defined>(Config->Entry); PE->AddressOfEntryPoint = Entry->getRVA(); // Pointer to thumb code must have the LSB set, so adjust it. if (Config->Machine == ARMNT) @@ -584,33 +673,32 @@ template <typename PEHeaderTy> void Writer::writeHeader() { Dir[IAT].RelativeVirtualAddress = Idata.getIATRVA(); Dir[IAT].Size = Idata.getIATSize(); } - if (!DelayIdata.empty()) { - Dir[DELAY_IMPORT_DESCRIPTOR].RelativeVirtualAddress = - DelayIdata.getDirRVA(); - Dir[DELAY_IMPORT_DESCRIPTOR].Size = DelayIdata.getDirSize(); - } if (OutputSection *Sec = findSection(".rsrc")) { Dir[RESOURCE_TABLE].RelativeVirtualAddress = Sec->getRVA(); Dir[RESOURCE_TABLE].Size = Sec->getVirtualSize(); } - if (OutputSection *Sec = findSection(".reloc")) { - Dir[BASE_RELOCATION_TABLE].RelativeVirtualAddress = Sec->getRVA(); - Dir[BASE_RELOCATION_TABLE].Size = Sec->getVirtualSize(); - } if (OutputSection *Sec = findSection(".pdata")) { Dir[EXCEPTION_TABLE].RelativeVirtualAddress = Sec->getRVA(); Dir[EXCEPTION_TABLE].Size = Sec->getVirtualSize(); } + if (OutputSection *Sec = findSection(".reloc")) { + Dir[BASE_RELOCATION_TABLE].RelativeVirtualAddress = Sec->getRVA(); + Dir[BASE_RELOCATION_TABLE].Size = Sec->getVirtualSize(); + } if (Symbol *Sym = Symtab->findUnderscore("_tls_used")) { - if (Defined *B = dyn_cast<Defined>(Sym->Body)) { + if (Defined *B = dyn_cast<Defined>(Sym->body())) { Dir[TLS_TABLE].RelativeVirtualAddress = B->getRVA(); Dir[TLS_TABLE].Size = Config->is64() ? sizeof(object::coff_tls_directory64) : sizeof(object::coff_tls_directory32); } } + if (Config->Debug) { + Dir[DEBUG_DIRECTORY].RelativeVirtualAddress = DebugDirectory->getRVA(); + Dir[DEBUG_DIRECTORY].Size = DebugDirectory->getSize(); + } if (Symbol *Sym = Symtab->findUnderscore("_load_config_used")) { - if (auto *B = dyn_cast<DefinedRegular>(Sym->Body)) { + if (auto *B = dyn_cast<DefinedRegular>(Sym->body())) { SectionChunk *SC = B->getChunk(); assert(B->getRVA() >= SC->getRVA()); uint64_t OffsetInChunk = B->getRVA() - SC->getRVA(); @@ -626,12 +714,19 @@ template <typename PEHeaderTy> void Writer::writeHeader() { Dir[LOAD_CONFIG_TABLE].Size = LoadConfigSize; } } + if (!DelayIdata.empty()) { + Dir[DELAY_IMPORT_DESCRIPTOR].RelativeVirtualAddress = + DelayIdata.getDirRVA(); + Dir[DELAY_IMPORT_DESCRIPTOR].Size = DelayIdata.getDirSize(); + } // Write section table for (OutputSection *Sec : OutputSections) { Sec->writeHeaderTo(Buf); Buf += sizeof(coff_section); } + SectionTable = ArrayRef<uint8_t>( + Buf - OutputSections.size() * sizeof(coff_section), Buf); if (OutputSymtab.empty()) return; @@ -660,8 +755,10 @@ void Writer::openFile(StringRef Path) { void Writer::fixSafeSEHSymbols() { if (!SEHTable) return; - Config->SEHTable->setRVA(SEHTable->getRVA()); - Config->SEHCount->setVA(SEHTable->getSize() / 4); + if (auto *T = dyn_cast<DefinedRelative>(Config->SEHTable->body())) + T->setRVA(SEHTable->getRVA()); + if (auto *C = dyn_cast<DefinedAbsolute>(Config->SEHCount->body())) + C->setVA(SEHTable->getSize() / 4); } // Handles /section options to allow users to overwrite @@ -715,6 +812,30 @@ void Writer::sortExceptionTable() { errs() << "warning: don't know how to handle .pdata.\n"; } +// Backfill the CVSignature in a PDB70 Debug Record. This backfilling allows us +// to get reproducible builds. +void Writer::writeBuildId() { + // There is nothing to backfill if BuildId was not setup. + if (BuildId == nullptr) + return; + + MD5 Hash; + MD5::MD5Result Res; + + Hash.update(ArrayRef<uint8_t>{Buffer->getBufferStart(), + Buffer->getBufferEnd()}); + Hash.final(Res); + + assert(BuildId->DI->Signature.CVSignature == OMF::Signature::PDB70 && + "only PDB 7.0 is supported"); + assert(sizeof(Res) == sizeof(BuildId->DI->PDB70.Signature) && + "signature size mismatch"); + memcpy(BuildId->DI->PDB70.Signature, Res, + sizeof(codeview::PDB70DebugInfo::Signature)); + // TODO(compnerd) track the Age + BuildId->DI->PDB70.Age = 1; +} + OutputSection *Writer::findSection(StringRef Name) { for (OutputSection *Sec : OutputSections) if (Sec->getName() == Name) @@ -744,16 +865,13 @@ OutputSection *Writer::createSection(StringRef Name) { uint32_t Perms = StringSwitch<uint32_t>(Name) .Case(".bss", BSS | R | W) .Case(".data", DATA | R | W) - .Case(".didat", DATA | R) - .Case(".edata", DATA | R) - .Case(".idata", DATA | R) - .Case(".rdata", DATA | R) + .Cases(".didat", ".edata", ".idata", ".rdata", DATA | R) .Case(".reloc", DATA | DISCARDABLE | R) .Case(".text", CODE | R | X) .Default(0); if (!Perms) llvm_unreachable("unknown section name"); - auto Sec = new (CAlloc.Allocate()) OutputSection(Name); + auto Sec = make<OutputSection>(Name); Sec->addPermissions(Perms); OutputSections.push_back(Sec); return Sec; @@ -784,13 +902,11 @@ void Writer::addBaserelBlocks(OutputSection *Dest, std::vector<Baserel> &V) { uint32_t P = V[J].RVA & Mask; if (P == Page) continue; - BaserelChunk *Buf = BAlloc.Allocate(); - Dest->addChunk(new (Buf) BaserelChunk(Page, &V[I], &V[0] + J)); + Dest->addChunk(make<BaserelChunk>(Page, &V[I], &V[0] + J)); I = J; Page = P; } if (I == J) return; - BaserelChunk *Buf = BAlloc.Allocate(); - Dest->addChunk(new (Buf) BaserelChunk(Page, &V[I], &V[0] + J)); + Dest->addChunk(make<BaserelChunk>(Page, &V[I], &V[0] + J)); } diff --git a/COFF/Writer.h b/COFF/Writer.h index 0473315ae50a..0d26090177d8 100644 --- a/COFF/Writer.h +++ b/COFF/Writer.h @@ -14,9 +14,7 @@ namespace lld { namespace coff { - -class Chunk; -class OutputSection; +class SymbolTable; void writeResult(SymbolTable *T); diff --git a/ELF/CMakeLists.txt b/ELF/CMakeLists.txt index a1b65adc7400..2e9d2b941fd9 100644 --- a/ELF/CMakeLists.txt +++ b/ELF/CMakeLists.txt @@ -2,24 +2,30 @@ set(LLVM_TARGET_DEFINITIONS Options.td) tablegen(LLVM Options.inc -gen-opt-parser-defs) add_public_tablegen_target(ELFOptionsTableGen) +if(NOT LLD_BUILT_STANDALONE) + set(tablegen_deps intrinsics_gen) +endif() + add_lld_library(lldELF Driver.cpp DriverUtils.cpp EhFrame.cpp Error.cpp + GdbIndex.cpp ICF.cpp InputFiles.cpp InputSection.cpp LTO.cpp LinkerScript.cpp MarkLive.cpp + Mips.cpp OutputSections.cpp Relocations.cpp ScriptParser.cpp Strings.cpp - SymbolListFile.cpp SymbolTable.cpp Symbols.cpp + SyntheticSections.cpp Target.cpp Thunks.cpp Writer.cpp @@ -31,6 +37,8 @@ add_lld_library(lldELF BitWriter Codegen Core + DebugInfoDWARF + Demangle IPO Linker LTO @@ -44,7 +52,10 @@ add_lld_library(lldELF LINK_LIBS lldConfig + lldCore ${PTHREAD_LIB} - ) -add_dependencies(lldELF intrinsics_gen ELFOptionsTableGen) + DEPENDS + ELFOptionsTableGen + ${tablegen_deps} + ) diff --git a/ELF/Config.h b/ELF/Config.h index 2ccd95e88775..b828cdb25047 100644 --- a/ELF/Config.h +++ b/ELF/Config.h @@ -12,6 +12,7 @@ #include "llvm/ADT/MapVector.h" #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSet.h" #include "llvm/Support/ELF.h" #include <vector> @@ -30,21 +31,36 @@ enum ELFKind { ELF64BEKind }; -enum class BuildIdKind { None, Fnv1, Md5, Sha1, Hexstring }; +// For --build-id. +enum class BuildIdKind { None, Fast, Md5, Sha1, Hexstring, Uuid }; -enum class UnresolvedPolicy { NoUndef, Error, Warn, Ignore }; +// For --discard-{all,locals,none} and --retain-symbols-file. +enum class DiscardPolicy { Default, All, Locals, RetainFile, None }; + +// For --strip-{all,debug}. +enum class StripPolicy { None, All, Debug }; + +// For --unresolved-symbols. +enum class UnresolvedPolicy { NoUndef, ReportError, Warn, Ignore }; + +// For --sort-section and linkerscript sorting rules. +enum class SortSectionPolicy { Default, None, Alignment, Name, Priority }; + +// For --target2 +enum class Target2Policy { Abs, Rel, GotRel }; struct SymbolVersion { llvm::StringRef Name; bool IsExternCpp; + bool HasWildcard; }; // This struct contains symbols version definition that // can be found in version script if it is used for link. struct VersionDefinition { - VersionDefinition(llvm::StringRef Name, size_t Id) : Name(Name), Id(Id) {} + VersionDefinition(llvm::StringRef Name, uint16_t Id) : Name(Name), Id(Id) {} llvm::StringRef Name; - size_t Id; + uint16_t Id; std::vector<SymbolVersion> Globals; size_t NameOff; // Offset in string table. }; @@ -54,75 +70,92 @@ struct VersionDefinition { // and such fields have the same name as the corresponding options. // Most fields are initialized by the driver. struct Configuration { - Symbol *EntrySym = nullptr; InputFile *FirstElf = nullptr; + uint8_t OSABI = 0; + llvm::StringMap<uint64_t> SectionStartMap; llvm::StringRef DynamicLinker; llvm::StringRef Entry; llvm::StringRef Emulation; llvm::StringRef Fini; llvm::StringRef Init; - llvm::StringRef LtoAAPipeline; - llvm::StringRef LtoNewPmPasses; + llvm::StringRef LTOAAPipeline; + llvm::StringRef LTONewPmPasses; llvm::StringRef OutputFile; llvm::StringRef SoName; llvm::StringRef Sysroot; + llvm::StringSet<> RetainSymbolsFile; std::string RPath; std::vector<VersionDefinition> VersionDefinitions; - std::vector<llvm::StringRef> DynamicList; + std::vector<llvm::StringRef> AuxiliaryList; std::vector<llvm::StringRef> SearchPaths; + std::vector<llvm::StringRef> SymbolOrderingFile; std::vector<llvm::StringRef> Undefined; std::vector<SymbolVersion> VersionScriptGlobals; + std::vector<SymbolVersion> VersionScriptLocals; std::vector<uint8_t> BuildIdVector; bool AllowMultipleDefinition; bool AsNeeded = false; bool Bsymbolic; bool BsymbolicFunctions; + bool ColorDiagnostics = false; bool Demangle = true; bool DisableVerify; - bool DiscardAll; - bool DiscardLocals; - bool DiscardNone; bool EhFrameHdr; bool EnableNewDtags; bool ExportDynamic; bool FatalWarnings; bool GcSections; + bool GdbIndex; bool GnuHash = false; bool ICF; bool Mips64EL = false; + bool MipsN32Abi = false; bool NoGnuUnique; bool NoUndefinedVersion; + bool Nostdlib; + bool OFormatBinary; + bool OMagic; bool Pic; bool Pie; bool PrintGcSections; bool Rela; bool Relocatable; bool SaveTemps; + bool SingleRoRx; bool Shared; bool Static = false; - bool StripAll; - bool StripDebug; bool SysvHash = true; + bool Target1Rel; bool Threads; bool Trace; bool Verbose; bool WarnCommon; + bool WarnMissingEntry; bool ZCombreloc; - bool ZExecStack; + bool ZExecstack; bool ZNodelete; bool ZNow; bool ZOrigin; bool ZRelro; + bool ExitEarly; + bool ZWxneeded; + DiscardPolicy Discard; + SortSectionPolicy SortSection; + StripPolicy Strip = StripPolicy::None; UnresolvedPolicy UnresolvedSymbols; + Target2Policy Target2 = Target2Policy::GotRel; BuildIdKind BuildId = BuildIdKind::None; ELFKind EKind = ELFNoneKind; uint16_t DefaultSymbolVersion = llvm::ELF::VER_NDX_GLOBAL; uint16_t EMachine = llvm::ELF::EM_NONE; - uint64_t EntryAddr = -1; + uint64_t ErrorLimit = 20; uint64_t ImageBase; - unsigned LtoJobs; - unsigned LtoO; + uint64_t MaxPageSize; + uint64_t ZStackSize; + unsigned LTOPartitions; + unsigned LTOO; unsigned Optimize; + unsigned ThinLTOJobs; }; // The only instance of Configuration struct. diff --git a/ELF/Driver.cpp b/ELF/Driver.cpp index c6ca2639236f..a11dbc7cc47f 100644 --- a/ELF/Driver.cpp +++ b/ELF/Driver.cpp @@ -14,14 +14,17 @@ #include "InputFiles.h" #include "InputSection.h" #include "LinkerScript.h" +#include "Memory.h" #include "Strings.h" -#include "SymbolListFile.h" #include "SymbolTable.h" #include "Target.h" +#include "Threads.h" #include "Writer.h" +#include "lld/Config/Version.h" #include "lld/Driver/Driver.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringSwitch.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/TargetSelect.h" #include "llvm/Support/raw_ostream.h" #include <cstdlib> @@ -38,48 +41,59 @@ using namespace lld::elf; Configuration *elf::Config; LinkerDriver *elf::Driver; -bool elf::link(ArrayRef<const char *> Args, raw_ostream &Error) { - HasError = false; +BumpPtrAllocator elf::BAlloc; +StringSaver elf::Saver{BAlloc}; +std::vector<SpecificAllocBase *> elf::SpecificAllocBase::Instances; + +bool elf::link(ArrayRef<const char *> Args, bool CanExitEarly, + raw_ostream &Error) { + ErrorCount = 0; ErrorOS = &Error; + Argv0 = Args[0]; - Configuration C; - LinkerDriver D; - ScriptConfiguration SC; - Config = &C; - Driver = &D; - ScriptConfig = &SC; + Config = make<Configuration>(); + Driver = make<LinkerDriver>(); + ScriptConfig = make<ScriptConfiguration>(); - Driver->main(Args); - return !HasError; + Driver->main(Args, CanExitEarly); + freeArena(); + return !ErrorCount; } // Parses a linker -m option. -static std::pair<ELFKind, uint16_t> parseEmulation(StringRef S) { - if (S.endswith("_fbsd")) +static std::tuple<ELFKind, uint16_t, uint8_t> parseEmulation(StringRef Emul) { + uint8_t OSABI = 0; + StringRef S = Emul; + if (S.endswith("_fbsd")) { S = S.drop_back(5); + OSABI = ELFOSABI_FREEBSD; + } std::pair<ELFKind, uint16_t> Ret = StringSwitch<std::pair<ELFKind, uint16_t>>(S) - .Case("aarch64linux", {ELF64LEKind, EM_AARCH64}) + .Cases("aarch64elf", "aarch64linux", {ELF64LEKind, EM_AARCH64}) .Case("armelf_linux_eabi", {ELF32LEKind, EM_ARM}) .Case("elf32_x86_64", {ELF32LEKind, EM_X86_64}) .Case("elf32btsmip", {ELF32BEKind, EM_MIPS}) .Case("elf32ltsmip", {ELF32LEKind, EM_MIPS}) + .Case("elf32btsmipn32", {ELF32BEKind, EM_MIPS}) + .Case("elf32ltsmipn32", {ELF32LEKind, EM_MIPS}) .Case("elf32ppc", {ELF32BEKind, EM_PPC}) .Case("elf64btsmip", {ELF64BEKind, EM_MIPS}) .Case("elf64ltsmip", {ELF64LEKind, EM_MIPS}) .Case("elf64ppc", {ELF64BEKind, EM_PPC64}) + .Cases("elf_amd64", "elf_x86_64", {ELF64LEKind, EM_X86_64}) .Case("elf_i386", {ELF32LEKind, EM_386}) - .Case("elf_x86_64", {ELF64LEKind, EM_X86_64}) + .Case("elf_iamcu", {ELF32LEKind, EM_IAMCU}) .Default({ELFNoneKind, EM_NONE}); if (Ret.first == ELFNoneKind) { if (S == "i386pe" || S == "i386pep" || S == "thumb2pe") - error("Windows targets are not supported on the ELF frontend: " + S); + error("Windows targets are not supported on the ELF frontend: " + Emul); else - error("unknown emulation: " + S); + error("unknown emulation: " + Emul); } - return Ret; + return std::make_tuple(Ret.first, Ret.second, OSABI); } // Returns slices of MB by parsing MB as an archive file. @@ -87,25 +101,28 @@ static std::pair<ELFKind, uint16_t> parseEmulation(StringRef S) { std::vector<MemoryBufferRef> LinkerDriver::getArchiveMembers(MemoryBufferRef MB) { std::unique_ptr<Archive> File = - check(Archive::create(MB), "failed to parse archive"); + check(Archive::create(MB), + MB.getBufferIdentifier() + ": failed to parse archive"); std::vector<MemoryBufferRef> V; - Error Err; + Error Err = Error::success(); for (const ErrorOr<Archive::Child> &COrErr : File->children(Err)) { - Archive::Child C = check(COrErr, "could not get the child of the archive " + - File->getFileName()); + Archive::Child C = + check(COrErr, MB.getBufferIdentifier() + + ": could not get the child of the archive"); MemoryBufferRef MBRef = check(C.getMemoryBufferRef(), - "could not get the buffer for a child of the archive " + - File->getFileName()); + MB.getBufferIdentifier() + + ": could not get the buffer for a child of the archive"); V.push_back(MBRef); } if (Err) - Error(Err); + fatal(MB.getBufferIdentifier() + ": Archive::children failed: " + + toString(std::move(Err))); // Take ownership of memory buffers created for members of thin archives. for (std::unique_ptr<MemoryBuffer> &MB : File->takeThinBuffers()) - OwningMBs.push_back(std::move(MB)); + make<std::unique_ptr<MemoryBuffer>>(std::move(MB)); return V; } @@ -114,25 +131,28 @@ LinkerDriver::getArchiveMembers(MemoryBufferRef MB) { // Newly created memory buffers are owned by this driver. void LinkerDriver::addFile(StringRef Path) { using namespace sys::fs; - if (Config->Verbose) - outs() << Path << "\n"; Optional<MemoryBufferRef> Buffer = readFile(Path); if (!Buffer.hasValue()) return; MemoryBufferRef MBRef = *Buffer; + if (InBinary) { + Files.push_back(make<BinaryFile>(MBRef)); + return; + } + switch (identify_magic(MBRef.getBuffer())) { case file_magic::unknown: readLinkerScript(MBRef); return; case file_magic::archive: - if (WholeArchive) { + if (InWholeArchive) { for (MemoryBufferRef MB : getArchiveMembers(MBRef)) Files.push_back(createObjectFile(MB, Path)); return; } - Files.push_back(make_unique<ArchiveFile>(MBRef)); + Files.push_back(make<ArchiveFile>(MBRef)); return; case file_magic::elf_shared_object: if (Config->Relocatable) { @@ -143,13 +163,16 @@ void LinkerDriver::addFile(StringRef Path) { return; default: if (InLib) - Files.push_back(make_unique<LazyObjectFile>(MBRef)); + Files.push_back(make<LazyObjectFile>(MBRef)); else Files.push_back(createObjectFile(MBRef)); } } Optional<MemoryBufferRef> LinkerDriver::readFile(StringRef Path) { + if (Config->Verbose) + outs() << Path << "\n"; + auto MBOrErr = MemoryBuffer::getFile(Path); if (auto EC = MBOrErr.getError()) { error(EC, "cannot open " + Path); @@ -157,7 +180,7 @@ Optional<MemoryBufferRef> LinkerDriver::readFile(StringRef Path) { } std::unique_ptr<MemoryBuffer> &MB = *MBOrErr; MemoryBufferRef MBRef = MB->getMemBufferRef(); - OwningMBs.push_back(std::move(MB)); // take MB ownership + make<std::unique_ptr<MemoryBuffer>>(std::move(MB)); // take MB ownership if (Cpio) Cpio->append(relativeToRoot(Path), MBRef.getBuffer()); @@ -167,11 +190,10 @@ Optional<MemoryBufferRef> LinkerDriver::readFile(StringRef Path) { // Add a given library by searching it from input search paths. void LinkerDriver::addLibrary(StringRef Name) { - std::string Path = searchLibrary(Name); - if (Path.empty()) - error("unable to find library -l" + Name); + if (Optional<std::string> Path = searchLibrary(Name)) + addFile(*Path); else - addFile(Path); + error("unable to find library -l" + Name); } // This function is called on startup. We need this for LTO since @@ -184,12 +206,6 @@ static void initLLVM(opt::InputArgList &Args) { InitializeAllAsmPrinters(); InitializeAllAsmParsers(); - // This is a flag to discard all but GlobalValue names. - // We want to enable it by default because it saves memory. - // Disable it only when a developer option (-save-temps) is given. - Driver->Context.setDiscardValueNames(!Config->SaveTemps); - Driver->Context.enableDebugTypeODRUniquing(); - // Parse and evaluate -mllvm options. std::vector<const char *> V; V.push_back("lld (LLVM option parsing)"); @@ -206,9 +222,6 @@ static void checkOptions(opt::InputArgList &Args) { if (Config->EMachine == EM_MIPS && Config->GnuHash) error("the .gnu.hash section is not compatible with the MIPS target."); - if (Config->EMachine == EM_AMDGPU && !Config->Entry.empty()) - error("-e option is not valid for AMDGPU."); - if (Config->Pie && Config->Shared) error("-shared and -pie may not be used together"); @@ -224,8 +237,8 @@ static void checkOptions(opt::InputArgList &Args) { } } -static StringRef -getString(opt::InputArgList &Args, unsigned Key, StringRef Default = "") { +static StringRef getString(opt::InputArgList &Args, unsigned Key, + StringRef Default = "") { if (auto *Arg = Args.getLastArg(Key)) return Arg->getValue(); return Default; @@ -254,33 +267,64 @@ static bool hasZOption(opt::InputArgList &Args, StringRef Key) { return false; } -void LinkerDriver::main(ArrayRef<const char *> ArgsArr) { +static uint64_t getZOptionValue(opt::InputArgList &Args, StringRef Key, + uint64_t Default) { + for (auto *Arg : Args.filtered(OPT_z)) { + StringRef Value = Arg->getValue(); + size_t Pos = Value.find("="); + if (Pos != StringRef::npos && Key == Value.substr(0, Pos)) { + Value = Value.substr(Pos + 1); + uint64_t Result; + if (Value.getAsInteger(0, Result)) + error("invalid " + Key + ": " + Value); + return Result; + } + } + return Default; +} + +void LinkerDriver::main(ArrayRef<const char *> ArgsArr, bool CanExitEarly) { ELFOptTable Parser; opt::InputArgList Args = Parser.parse(ArgsArr.slice(1)); + + // Interpret this flag early because error() depends on them. + Config->ErrorLimit = getInteger(Args, OPT_error_limit, 20); + + // Handle -help if (Args.hasArg(OPT_help)) { printHelp(ArgsArr[0]); return; } - if (Args.hasArg(OPT_version)) { - outs() << getVersionString(); + + // GNU linkers disagree here. Though both -version and -v are mentioned + // in help to print the version information, GNU ld just normally exits, + // while gold can continue linking. We are compatible with ld.bfd here. + if (Args.hasArg(OPT_version) || Args.hasArg(OPT_v)) + outs() << getLLDVersion() << "\n"; + if (Args.hasArg(OPT_version)) return; - } + + Config->ExitEarly = CanExitEarly && !Args.hasArg(OPT_full_shutdown); if (const char *Path = getReproduceOption(Args)) { // Note that --reproduce is a debug option so you can ignore it // if you are trying to understand the whole picture of the code. - Cpio.reset(CpioFile::create(Path)); - if (Cpio) { + ErrorOr<CpioFile *> F = CpioFile::create(Path); + if (F) { + Cpio.reset(*F); Cpio->append("response.txt", createResponseFile(Args)); - Cpio->append("version.txt", getVersionString()); - } + Cpio->append("version.txt", getLLDVersion() + "\n"); + } else + error(F.getError(), + Twine("--reproduce: failed to open ") + Path + ".cpio"); } readConfigs(Args); initLLVM(Args); createFiles(Args); + inferMachineType(); checkOptions(Args); - if (HasError) + if (ErrorCount) return; switch (Config->EKind) { @@ -297,7 +341,7 @@ void LinkerDriver::main(ArrayRef<const char *> ArgsArr) { link<ELF64BE>(Args); return; default: - error("-m or at least a .o file required"); + llvm_unreachable("unknown Config->EKind"); } } @@ -314,10 +358,115 @@ static UnresolvedPolicy getUnresolvedSymbolOption(opt::InputArgList &Args) { if (S == "ignore-all" || S == "ignore-in-object-files") return UnresolvedPolicy::Ignore; if (S == "ignore-in-shared-libs" || S == "report-all") - return UnresolvedPolicy::Error; + return UnresolvedPolicy::ReportError; error("unknown --unresolved-symbols value: " + S); } - return UnresolvedPolicy::Error; + return UnresolvedPolicy::ReportError; +} + +static Target2Policy getTarget2Option(opt::InputArgList &Args) { + if (auto *Arg = Args.getLastArg(OPT_target2)) { + StringRef S = Arg->getValue(); + if (S == "rel") + return Target2Policy::Rel; + if (S == "abs") + return Target2Policy::Abs; + if (S == "got-rel") + return Target2Policy::GotRel; + error("unknown --target2 option: " + S); + } + return Target2Policy::GotRel; +} + +static bool isOutputFormatBinary(opt::InputArgList &Args) { + if (auto *Arg = Args.getLastArg(OPT_oformat)) { + StringRef S = Arg->getValue(); + if (S == "binary") + return true; + error("unknown --oformat value: " + S); + } + return false; +} + +static bool getArg(opt::InputArgList &Args, unsigned K1, unsigned K2, + bool Default) { + if (auto *Arg = Args.getLastArg(K1, K2)) + return Arg->getOption().getID() == K1; + return Default; +} + +static DiscardPolicy getDiscardOption(opt::InputArgList &Args) { + if (Config->Relocatable) + return DiscardPolicy::None; + auto *Arg = + Args.getLastArg(OPT_discard_all, OPT_discard_locals, OPT_discard_none); + if (!Arg) + return DiscardPolicy::Default; + if (Arg->getOption().getID() == OPT_discard_all) + return DiscardPolicy::All; + if (Arg->getOption().getID() == OPT_discard_locals) + return DiscardPolicy::Locals; + return DiscardPolicy::None; +} + +static StripPolicy getStripOption(opt::InputArgList &Args) { + if (auto *Arg = Args.getLastArg(OPT_strip_all, OPT_strip_debug)) { + if (Arg->getOption().getID() == OPT_strip_all) + return StripPolicy::All; + return StripPolicy::Debug; + } + return StripPolicy::None; +} + +static uint64_t parseSectionAddress(StringRef S, opt::Arg *Arg) { + uint64_t VA = 0; + if (S.startswith("0x")) + S = S.drop_front(2); + if (S.getAsInteger(16, VA)) + error("invalid argument: " + stringize(Arg)); + return VA; +} + +static StringMap<uint64_t> getSectionStartMap(opt::InputArgList &Args) { + StringMap<uint64_t> Ret; + for (auto *Arg : Args.filtered(OPT_section_start)) { + StringRef Name; + StringRef Addr; + std::tie(Name, Addr) = StringRef(Arg->getValue()).split('='); + Ret[Name] = parseSectionAddress(Addr, Arg); + } + + if (auto *Arg = Args.getLastArg(OPT_Ttext)) + Ret[".text"] = parseSectionAddress(Arg->getValue(), Arg); + if (auto *Arg = Args.getLastArg(OPT_Tdata)) + Ret[".data"] = parseSectionAddress(Arg->getValue(), Arg); + if (auto *Arg = Args.getLastArg(OPT_Tbss)) + Ret[".bss"] = parseSectionAddress(Arg->getValue(), Arg); + return Ret; +} + +static SortSectionPolicy getSortKind(opt::InputArgList &Args) { + StringRef S = getString(Args, OPT_sort_section); + if (S == "alignment") + return SortSectionPolicy::Alignment; + if (S == "name") + return SortSectionPolicy::Name; + if (!S.empty()) + error("unknown --sort-section rule: " + S); + return SortSectionPolicy::Default; +} + +static std::vector<StringRef> getLines(MemoryBufferRef MB) { + SmallVector<StringRef, 0> Arr; + MB.getBuffer().split(Arr, '\n'); + + std::vector<StringRef> Ret; + for (StringRef S : Arr) { + S = S.trim(); + if (!S.empty()) + Ret.push_back(S); + } + return Ret; } // Initializes Config members by the command line options. @@ -334,34 +483,37 @@ void LinkerDriver::readConfigs(opt::InputArgList &Args) { if (auto *Arg = Args.getLastArg(OPT_m)) { // Parse ELF{32,64}{LE,BE} and CPU type. StringRef S = Arg->getValue(); - std::tie(Config->EKind, Config->EMachine) = parseEmulation(S); + std::tie(Config->EKind, Config->EMachine, Config->OSABI) = + parseEmulation(S); + Config->MipsN32Abi = (S == "elf32btsmipn32" || S == "elf32ltsmipn32"); Config->Emulation = S; } Config->AllowMultipleDefinition = Args.hasArg(OPT_allow_multiple_definition); Config->Bsymbolic = Args.hasArg(OPT_Bsymbolic); Config->BsymbolicFunctions = Args.hasArg(OPT_Bsymbolic_functions); - Config->Demangle = !Args.hasArg(OPT_no_demangle); + Config->Demangle = getArg(Args, OPT_demangle, OPT_no_demangle, true); Config->DisableVerify = Args.hasArg(OPT_disable_verify); - Config->DiscardAll = Args.hasArg(OPT_discard_all); - Config->DiscardLocals = Args.hasArg(OPT_discard_locals); - Config->DiscardNone = Args.hasArg(OPT_discard_none); Config->EhFrameHdr = Args.hasArg(OPT_eh_frame_hdr); Config->EnableNewDtags = !Args.hasArg(OPT_disable_new_dtags); Config->ExportDynamic = Args.hasArg(OPT_export_dynamic); Config->FatalWarnings = Args.hasArg(OPT_fatal_warnings); - Config->GcSections = Args.hasArg(OPT_gc_sections); + Config->GcSections = getArg(Args, OPT_gc_sections, OPT_no_gc_sections, false); + Config->GdbIndex = Args.hasArg(OPT_gdb_index); Config->ICF = Args.hasArg(OPT_icf); Config->NoGnuUnique = Args.hasArg(OPT_no_gnu_unique); Config->NoUndefinedVersion = Args.hasArg(OPT_no_undefined_version); - Config->Pie = Args.hasArg(OPT_pie); + Config->Nostdlib = Args.hasArg(OPT_nostdlib); + Config->OMagic = Args.hasArg(OPT_omagic); + Config->Pie = getArg(Args, OPT_pie, OPT_nopie, false); Config->PrintGcSections = Args.hasArg(OPT_print_gc_sections); Config->Relocatable = Args.hasArg(OPT_relocatable); + Config->Discard = getDiscardOption(Args); Config->SaveTemps = Args.hasArg(OPT_save_temps); + Config->SingleRoRx = Args.hasArg(OPT_no_rosegment); Config->Shared = Args.hasArg(OPT_shared); - Config->StripAll = Args.hasArg(OPT_strip_all); - Config->StripDebug = Args.hasArg(OPT_strip_debug); - Config->Threads = Args.hasArg(OPT_threads); + Config->Target1Rel = getArg(Args, OPT_target1_rel, OPT_target1_abs, false); + Config->Threads = getArg(Args, OPT_threads, OPT_no_threads, true); Config->Trace = Args.hasArg(OPT_trace); Config->Verbose = Args.hasArg(OPT_verbose); Config->WarnCommon = Args.hasArg(OPT_warn_common); @@ -370,33 +522,47 @@ void LinkerDriver::readConfigs(opt::InputArgList &Args) { Config->Entry = getString(Args, OPT_entry); Config->Fini = getString(Args, OPT_fini, "_fini"); Config->Init = getString(Args, OPT_init, "_init"); - Config->LtoAAPipeline = getString(Args, OPT_lto_aa_pipeline); - Config->LtoNewPmPasses = getString(Args, OPT_lto_newpm_passes); + Config->LTOAAPipeline = getString(Args, OPT_lto_aa_pipeline); + Config->LTONewPmPasses = getString(Args, OPT_lto_newpm_passes); Config->OutputFile = getString(Args, OPT_o); Config->SoName = getString(Args, OPT_soname); Config->Sysroot = getString(Args, OPT_sysroot); Config->Optimize = getInteger(Args, OPT_O, 1); - Config->LtoO = getInteger(Args, OPT_lto_O, 2); - if (Config->LtoO > 3) + Config->LTOO = getInteger(Args, OPT_lto_O, 2); + if (Config->LTOO > 3) error("invalid optimization level for LTO: " + getString(Args, OPT_lto_O)); - Config->LtoJobs = getInteger(Args, OPT_lto_jobs, 1); - if (Config->LtoJobs == 0) - error("number of threads must be > 0"); + Config->LTOPartitions = getInteger(Args, OPT_lto_partitions, 1); + if (Config->LTOPartitions == 0) + error("--lto-partitions: number of threads must be > 0"); + Config->ThinLTOJobs = getInteger(Args, OPT_thinlto_jobs, -1u); + if (Config->ThinLTOJobs == 0) + error("--thinlto-jobs: number of threads must be > 0"); Config->ZCombreloc = !hasZOption(Args, "nocombreloc"); - Config->ZExecStack = hasZOption(Args, "execstack"); + Config->ZExecstack = hasZOption(Args, "execstack"); Config->ZNodelete = hasZOption(Args, "nodelete"); Config->ZNow = hasZOption(Args, "now"); Config->ZOrigin = hasZOption(Args, "origin"); Config->ZRelro = !hasZOption(Args, "norelro"); + Config->ZStackSize = getZOptionValue(Args, "stack-size", -1); + Config->ZWxneeded = hasZOption(Args, "wxneeded"); - if (Config->Relocatable) - Config->StripAll = false; + Config->OFormatBinary = isOutputFormatBinary(Args); + Config->SectionStartMap = getSectionStartMap(Args); + Config->SortSection = getSortKind(Args); + Config->Target2 = getTarget2Option(Args); + Config->UnresolvedSymbols = getUnresolvedSymbolOption(Args); + + // --omagic is an option to create old-fashioned executables in which + // .text segments are writable. Today, the option is still in use to + // create special-purpose programs such as boot loaders. It doesn't + // make sense to create PT_GNU_RELRO for such executables. + if (Config->OMagic) + Config->ZRelro = false; - // --strip-all implies --strip-debug. - if (Config->StripAll) - Config->StripDebug = true; + if (!Config->Relocatable) + Config->Strip = getStripOption(Args); // Config->Pic is true if we are generating position-independent code. Config->Pic = Config->Pie || Config->Shared; @@ -414,13 +580,15 @@ void LinkerDriver::readConfigs(opt::InputArgList &Args) { // Parse --build-id or --build-id=<style>. if (Args.hasArg(OPT_build_id)) - Config->BuildId = BuildIdKind::Fnv1; + Config->BuildId = BuildIdKind::Fast; if (auto *Arg = Args.getLastArg(OPT_build_id_eq)) { StringRef S = Arg->getValue(); if (S == "md5") { Config->BuildId = BuildIdKind::Md5; - } else if (S == "sha1") { + } else if (S == "sha1" || S == "tree") { Config->BuildId = BuildIdKind::Sha1; + } else if (S == "uuid") { + Config->BuildId = BuildIdKind::Uuid; } else if (S == "none") { Config->BuildId = BuildIdKind::None; } else if (S.startswith("0x")) { @@ -431,21 +599,58 @@ void LinkerDriver::readConfigs(opt::InputArgList &Args) { } } + for (auto *Arg : Args.filtered(OPT_auxiliary)) + Config->AuxiliaryList.push_back(Arg->getValue()); + if (!Config->Shared && !Config->AuxiliaryList.empty()) + error("-f may not be used without -shared"); + for (auto *Arg : Args.filtered(OPT_undefined)) Config->Undefined.push_back(Arg->getValue()); - Config->UnresolvedSymbols = getUnresolvedSymbolOption(Args); - if (auto *Arg = Args.getLastArg(OPT_dynamic_list)) if (Optional<MemoryBufferRef> Buffer = readFile(Arg->getValue())) - parseDynamicList(*Buffer); + readDynamicList(*Buffer); + + if (auto *Arg = Args.getLastArg(OPT_symbol_ordering_file)) + if (Optional<MemoryBufferRef> Buffer = readFile(Arg->getValue())) + Config->SymbolOrderingFile = getLines(*Buffer); + + // If --retain-symbol-file is used, we'll retail only the symbols listed in + // the file and discard all others. + if (auto *Arg = Args.getLastArg(OPT_retain_symbols_file)) { + Config->Discard = DiscardPolicy::RetainFile; + if (Optional<MemoryBufferRef> Buffer = readFile(Arg->getValue())) + for (StringRef S : getLines(*Buffer)) + Config->RetainSymbolsFile.insert(S); + } for (auto *Arg : Args.filtered(OPT_export_dynamic_symbol)) - Config->DynamicList.push_back(Arg->getValue()); + Config->VersionScriptGlobals.push_back( + {Arg->getValue(), /*IsExternCpp*/ false, /*HasWildcard*/ false}); + + // Dynamic lists are a simplified linker script that doesn't need the + // "global:" and implicitly ends with a "local:*". Set the variables needed to + // simulate that. + if (Args.hasArg(OPT_dynamic_list) || Args.hasArg(OPT_export_dynamic_symbol)) { + Config->ExportDynamic = true; + if (!Config->Shared) + Config->DefaultSymbolVersion = VER_NDX_LOCAL; + } if (auto *Arg = Args.getLastArg(OPT_version_script)) if (Optional<MemoryBufferRef> Buffer = readFile(Arg->getValue())) - parseVersionScript(*Buffer); + readVersionScript(*Buffer); +} + +// Returns a value of "-format" option. +static bool getBinaryOption(StringRef S) { + if (S == "binary") + return true; + if (S == "elf" || S == "default") + return false; + error("unknown -format value: " + S + + " (supported formats: elf, default, binary)"); + return false; } void LinkerDriver::createFiles(opt::InputArgList &Args) { @@ -454,14 +659,20 @@ void LinkerDriver::createFiles(opt::InputArgList &Args) { case OPT_l: addLibrary(Arg->getValue()); break; - case OPT_alias_script_T: case OPT_INPUT: - case OPT_script: addFile(Arg->getValue()); break; + case OPT_alias_script_T: + case OPT_script: + if (Optional<MemoryBufferRef> MB = readFile(Arg->getValue())) + readLinkerScript(*MB); + break; case OPT_as_needed: Config->AsNeeded = true; break; + case OPT_format: + InBinary = getBinaryOption(Arg->getValue()); + break; case OPT_no_as_needed: Config->AsNeeded = false; break; @@ -472,10 +683,10 @@ void LinkerDriver::createFiles(opt::InputArgList &Args) { Config->Static = false; break; case OPT_whole_archive: - WholeArchive = true; + InWholeArchive = true; break; case OPT_no_whole_archive: - WholeArchive = false; + InWholeArchive = false; break; case OPT_start_lib: InLib = true; @@ -486,19 +697,55 @@ void LinkerDriver::createFiles(opt::InputArgList &Args) { } } - if (Files.empty() && !HasError) - error("no input files."); + if (Files.empty() && ErrorCount == 0) + error("no input files"); +} - // If -m <machine_type> was not given, infer it from object files. - if (Config->EKind == ELFNoneKind) { - for (std::unique_ptr<InputFile> &F : Files) { - if (F->EKind == ELFNoneKind) - continue; - Config->EKind = F->EKind; - Config->EMachine = F->EMachine; - break; - } +// If -m <machine_type> was not given, infer it from object files. +void LinkerDriver::inferMachineType() { + if (Config->EKind != ELFNoneKind) + return; + + for (InputFile *F : Files) { + if (F->EKind == ELFNoneKind) + continue; + Config->EKind = F->EKind; + Config->EMachine = F->EMachine; + Config->OSABI = F->OSABI; + Config->MipsN32Abi = Config->EMachine == EM_MIPS && isMipsN32Abi(F); + return; } + error("target emulation unknown: -m or at least one .o file required"); +} + +// Parse -z max-page-size=<value>. The default value is defined by +// each target. +static uint64_t getMaxPageSize(opt::InputArgList &Args) { + uint64_t Val = + getZOptionValue(Args, "max-page-size", Target->DefaultMaxPageSize); + if (!isPowerOf2_64(Val)) + error("max-page-size: value isn't a power of 2"); + return Val; +} + +// Parses -image-base option. +static uint64_t getImageBase(opt::InputArgList &Args) { + // Use default if no -image-base option is given. + // Because we are using "Target" here, this function + // has to be called after the variable is initialized. + auto *Arg = Args.getLastArg(OPT_image_base); + if (!Arg) + return Config->Pic ? 0 : Target->DefaultImageBase; + + StringRef S = Arg->getValue(); + uint64_t V; + if (S.getAsInteger(0, V)) { + error("-image-base: number expected, but got " + S); + return 0; + } + if ((V % Config->MaxPageSize) != 0) + warn("-image-base: address isn't multiple of page size: " + S); + return V; } // Do actual linking. Note that when this function is called, @@ -506,66 +753,70 @@ void LinkerDriver::createFiles(opt::InputArgList &Args) { template <class ELFT> void LinkerDriver::link(opt::InputArgList &Args) { SymbolTable<ELFT> Symtab; elf::Symtab<ELFT>::X = &Symtab; + Target = createTarget(); + ScriptBase = Script<ELFT>::X = make<LinkerScript<ELFT>>(); - std::unique_ptr<TargetInfo> TI(createTarget()); - Target = TI.get(); - LinkerScript<ELFT> LS; - Script<ELFT>::X = &LS; - - Config->Rela = ELFT::Is64Bits || Config->EMachine == EM_X86_64; + Config->Rela = + ELFT::Is64Bits || Config->EMachine == EM_X86_64 || Config->MipsN32Abi; Config->Mips64EL = (Config->EMachine == EM_MIPS && Config->EKind == ELF64LEKind); - - // Add entry symbol. Note that AMDGPU binaries have no entry points. - if (Config->Entry.empty() && !Config->Shared && !Config->Relocatable && - Config->EMachine != EM_AMDGPU) - Config->Entry = (Config->EMachine == EM_MIPS) ? "__start" : "_start"; + Config->MaxPageSize = getMaxPageSize(Args); + Config->ImageBase = getImageBase(Args); // Default output filename is "a.out" by the Unix tradition. if (Config->OutputFile.empty()) Config->OutputFile = "a.out"; + // Use default entry point name if no name was given via the command + // line nor linker scripts. For some reason, MIPS entry point name is + // different from others. + Config->WarnMissingEntry = + (!Config->Entry.empty() || (!Config->Shared && !Config->Relocatable)); + if (Config->Entry.empty() && !Config->Relocatable) + Config->Entry = (Config->EMachine == EM_MIPS) ? "__start" : "_start"; + // Handle --trace-symbol. for (auto *Arg : Args.filtered(OPT_trace_symbol)) Symtab.trace(Arg->getValue()); - // Set either EntryAddr (if S is a number) or EntrySym (otherwise). - if (!Config->Entry.empty()) { - StringRef S = Config->Entry; - if (S.getAsInteger(0, Config->EntryAddr)) - Config->EntrySym = Symtab.addUndefined(S); - } + // Add all files to the symbol table. This will add almost all + // symbols that we need to the symbol table. + for (InputFile *F : Files) + Symtab.addFile(F); - // Initialize Config->ImageBase. - if (auto *Arg = Args.getLastArg(OPT_image_base)) { - StringRef S = Arg->getValue(); - if (S.getAsInteger(0, Config->ImageBase)) - error(Arg->getSpelling() + ": number expected, but got " + S); - else if ((Config->ImageBase % Target->PageSize) != 0) - warning(Arg->getSpelling() + ": address isn't multiple of page size"); - } else { - Config->ImageBase = Config->Pic ? 0 : Target->DefaultImageBase; - } + // If an entry symbol is in a static archive, pull out that file now + // to complete the symbol table. After this, no new names except a + // few linker-synthesized ones will be added to the symbol table. + if (Symtab.find(Config->Entry)) + Symtab.addUndefined(Config->Entry); - for (std::unique_ptr<InputFile> &F : Files) - Symtab.addFile(std::move(F)); - if (HasError) - return; // There were duplicate symbols or incompatible files + // Return if there were name resolution errors. + if (ErrorCount) + return; Symtab.scanUndefinedFlags(); Symtab.scanShlibUndefined(); - Symtab.scanDynamicList(); Symtab.scanVersionScript(); - Symtab.scanSymbolVersions(); - Symtab.addCombinedLtoObject(); - if (HasError) + Symtab.addCombinedLTOObject(); + if (ErrorCount) return; for (auto *Arg : Args.filtered(OPT_wrap)) Symtab.wrap(Arg->getValue()); - // Write the result to the file. + // Now that we have a complete list of input files. + // Beyond this point, no new files are added. + // Aggregate all input sections into one place. + for (elf::ObjectFile<ELFT> *F : Symtab.getObjectFiles()) + for (InputSectionBase<ELFT> *S : F->getSections()) + if (S && S != &InputSection<ELFT>::Discarded) + Symtab.Sections.push_back(S); + for (BinaryFile *F : Symtab.getBinaryFiles()) + for (InputSectionData *S : F->getSections()) + Symtab.Sections.push_back(cast<InputSection<ELFT>>(S)); + + // Do size optimizations: garbage collection and identical code folding. if (Config->GcSections) markLive<ELFT>(); if (Config->ICF) @@ -573,16 +824,16 @@ template <class ELFT> void LinkerDriver::link(opt::InputArgList &Args) { // MergeInputSection::splitIntoPieces needs to be called before // any call of MergeInputSection::getOffset. Do that. - for (const std::unique_ptr<elf::ObjectFile<ELFT>> &F : - Symtab.getObjectFiles()) - for (InputSectionBase<ELFT> *S : F->getSections()) { - if (!S || S == &InputSection<ELFT>::Discarded || !S->Live) - continue; - if (S->Compressed) - S->uncompress(); - if (auto *MS = dyn_cast<MergeInputSection<ELFT>>(S)) - MS->splitIntoPieces(); - } + forEach(Symtab.Sections.begin(), Symtab.Sections.end(), + [](InputSectionBase<ELFT> *S) { + if (!S->Live) + return; + if (S->isCompressed()) + S->uncompress(); + if (auto *MS = dyn_cast<MergeInputSection<ELFT>>(S)) + MS->splitIntoPieces(); + }); - writeResult<ELFT>(&Symtab); + // Write the result to the file. + writeResult<ELFT>(); } diff --git a/ELF/Driver.h b/ELF/Driver.h index 6b9b9bb208e5..cba1eb164fdd 100644 --- a/ELF/Driver.h +++ b/ELF/Driver.h @@ -12,6 +12,7 @@ #include "SymbolTable.h" #include "lld/Core/LLVM.h" +#include "lld/Core/Reproduce.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringSet.h" @@ -23,14 +24,11 @@ namespace elf { extern class LinkerDriver *Driver; -class CpioFile; - class LinkerDriver { public: - void main(ArrayRef<const char *> Args); + void main(ArrayRef<const char *> Args, bool CanExitEarly); void addFile(StringRef Path); void addLibrary(StringRef Name); - llvm::LLVMContext Context; // to parse bitcode ifles std::unique_ptr<CpioFile> Cpio; // for reproduce private: @@ -38,17 +36,19 @@ private: llvm::Optional<MemoryBufferRef> readFile(StringRef Path); void readConfigs(llvm::opt::InputArgList &Args); void createFiles(llvm::opt::InputArgList &Args); + void inferMachineType(); template <class ELFT> void link(llvm::opt::InputArgList &Args); // True if we are in --whole-archive and --no-whole-archive. - bool WholeArchive = false; + bool InWholeArchive = false; // True if we are in --start-lib and --end-lib. bool InLib = false; - llvm::BumpPtrAllocator Alloc; - std::vector<std::unique_ptr<InputFile>> Files; - std::vector<std::unique_ptr<MemoryBuffer>> OwningMBs; + // True if we are in -format=binary and -format=elf. + bool InBinary = false; + + std::vector<InputFile *> Files; }; // Parses command line options. @@ -56,9 +56,6 @@ class ELFOptTable : public llvm::opt::OptTable { public: ELFOptTable(); llvm::opt::InputArgList parse(ArrayRef<const char *> Argv); - -private: - llvm::BumpPtrAllocator Alloc; }; // Create enum with OPT_xxx values for each option in Options.td @@ -69,41 +66,13 @@ enum { #undef OPTION }; -// This is the class to create a .cpio file for --reproduce. -// -// If "--reproduce foo" is given, we create a file "foo.cpio" and -// copy all input files to the archive, along with a response file -// to re-run the same command with the same inputs. -// It is useful for reporting issues to LLD developers. -// -// Cpio as a file format is a deliberate choice. It's standardized in -// POSIX and very easy to create. cpio command is available virtually -// on all Unix systems. See -// http://pubs.opengroup.org/onlinepubs/9699919799/utilities/pax.html#tag_20_92_13_07 -// for the format details. -class CpioFile { -public: - static CpioFile *create(StringRef OutputPath); - void append(StringRef Path, StringRef Data); - -private: - CpioFile(std::unique_ptr<llvm::raw_fd_ostream> OS, StringRef Basename); - - std::unique_ptr<llvm::raw_fd_ostream> OS; - llvm::StringSet<> Seen; - std::string Basename; -}; - void printHelp(const char *Argv0); -std::string getVersionString(); std::vector<uint8_t> parseHexstring(StringRef S); std::string createResponseFile(const llvm::opt::InputArgList &Args); -std::string relativeToRoot(StringRef Path); -std::string findFromSearchPaths(StringRef Path); -std::string searchLibrary(StringRef Path); -std::string buildSysrootedPath(llvm::StringRef Dir, llvm::StringRef File); +llvm::Optional<std::string> findFromSearchPaths(StringRef Path); +llvm::Optional<std::string> searchLibrary(StringRef Path); } // namespace elf } // namespace lld diff --git a/ELF/DriverUtils.cpp b/ELF/DriverUtils.cpp index 3f18259b4ae7..a81b133f674b 100644 --- a/ELF/DriverUtils.cpp +++ b/ELF/DriverUtils.cpp @@ -15,14 +15,18 @@ #include "Driver.h" #include "Error.h" +#include "Memory.h" +#include "ScriptParser.h" #include "lld/Config/Version.h" +#include "lld/Core/Reproduce.h" +#include "llvm/ADT/Optional.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/Triple.h" #include "llvm/Option/Option.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/Path.h" -#include "llvm/Support/StringSaver.h" +#include "llvm/Support/Process.h" using namespace llvm; using namespace llvm::sys; @@ -40,16 +44,37 @@ using namespace lld::elf; // Create table mapping all options defined in Options.td static const opt::OptTable::Info OptInfo[] = { #define OPTION(X1, X2, ID, KIND, GROUP, ALIAS, X6, X7, X8, X9, X10) \ - { \ - X1, X2, X9, X10, OPT_##ID, opt::Option::KIND##Class, X8, X7, OPT_##GROUP, \ - OPT_##ALIAS, X6 \ - }, + {X1, X2, X9, X10, OPT_##ID, opt::Option::KIND##Class, \ + X8, X7, OPT_##GROUP, OPT_##ALIAS, X6}, #include "Options.inc" #undef OPTION }; ELFOptTable::ELFOptTable() : OptTable(OptInfo) {} +// Parse -color-diagnostics={auto,always,never} or -no-color-diagnostics. +static bool getColorDiagnostics(opt::InputArgList &Args) { + bool Default = (ErrorOS == &errs() && Process::StandardErrHasColors()); + + auto *Arg = Args.getLastArg(OPT_color_diagnostics, OPT_color_diagnostics_eq, + OPT_no_color_diagnostics); + if (!Arg) + return Default; + if (Arg->getOption().getID() == OPT_color_diagnostics) + return true; + if (Arg->getOption().getID() == OPT_no_color_diagnostics) + return false; + + StringRef S = Arg->getValue(); + if (S == "auto") + return Default; + if (S == "always") + return true; + if (S != "never") + error("unknown option: -color-diagnostics=" + S); + return false; +} + static cl::TokenizerCallback getQuotingStyle(opt::InputArgList &Args) { if (auto *Arg = Args.getLastArg(OPT_rsp_quoting)) { StringRef S = Arg->getValue(); @@ -76,16 +101,16 @@ opt::InputArgList ELFOptTable::parse(ArrayRef<const char *> Argv) { // --rsp-quoting. opt::InputArgList Args = this->ParseArgs(Vec, MissingIndex, MissingCount); - // Expand response files. '@<filename>' is replaced by the file's contents. - StringSaver Saver(Alloc); + // Expand response files (arguments in the form of @<filename>) + // and then parse the argument again. cl::ExpandResponseFiles(Saver, getQuotingStyle(Args), Vec); - - // Parse options and then do error checking. Args = this->ParseArgs(Vec, MissingIndex, MissingCount); + + // Interpret -color-diagnostics early so that error messages + // for unknown flags are colored. + Config->ColorDiagnostics = getColorDiagnostics(Args); if (MissingCount) - error(Twine("missing arg value for \"") + Args.getArgString(MissingIndex) + - "\", expected " + Twine(MissingCount) + - (MissingCount == 1 ? " argument.\n" : " arguments")); + error(Twine(Args.getArgString(MissingIndex)) + ": missing argument"); for (auto *Arg : Args.filtered(OPT_UNKNOWN)) error("unknown argument: " + Arg->getSpelling()); @@ -97,115 +122,6 @@ void elf::printHelp(const char *Argv0) { Table.PrintHelp(outs(), Argv0, "lld", false); } -std::string elf::getVersionString() { - std::string Version = getLLDVersion(); - std::string Repo = getLLDRepositoryVersion(); - if (Repo.empty()) - return "LLD " + Version + "\n"; - return "LLD " + Version + " " + Repo + "\n"; -} - -// Makes a given pathname an absolute path first, and then remove -// beginning /. For example, "../foo.o" is converted to "home/john/foo.o", -// assuming that the current directory is "/home/john/bar". -std::string elf::relativeToRoot(StringRef Path) { - SmallString<128> Abs = Path; - if (std::error_code EC = fs::make_absolute(Abs)) - fatal("make_absolute failed: " + EC.message()); - path::remove_dots(Abs, /*remove_dot_dot=*/true); - - // This is Windows specific. root_name() returns a drive letter - // (e.g. "c:") or a UNC name (//net). We want to keep it as part - // of the result. - SmallString<128> Res; - StringRef Root = path::root_name(Abs); - if (Root.endswith(":")) - Res = Root.drop_back(); - else if (Root.startswith("//")) - Res = Root.substr(2); - - path::append(Res, path::relative_path(Abs)); - return Res.str(); -} - -CpioFile::CpioFile(std::unique_ptr<raw_fd_ostream> OS, StringRef S) - : OS(std::move(OS)), Basename(S) {} - -CpioFile *CpioFile::create(StringRef OutputPath) { - std::string Path = (OutputPath + ".cpio").str(); - std::error_code EC; - auto OS = llvm::make_unique<raw_fd_ostream>(Path, EC, fs::F_None); - if (EC) { - error(EC, "--reproduce: failed to open " + Path); - return nullptr; - } - return new CpioFile(std::move(OS), path::filename(OutputPath)); -} - -static void writeMember(raw_fd_ostream &OS, StringRef Path, StringRef Data) { - // The c_dev/c_ino pair should be unique according to the spec, - // but no one seems to care. - OS << "070707"; // c_magic - OS << "000000"; // c_dev - OS << "000000"; // c_ino - OS << "100664"; // c_mode: C_ISREG | rw-rw-r-- - OS << "000000"; // c_uid - OS << "000000"; // c_gid - OS << "000001"; // c_nlink - OS << "000000"; // c_rdev - OS << "00000000000"; // c_mtime - OS << format("%06o", Path.size() + 1); // c_namesize - OS << format("%011o", Data.size()); // c_filesize - OS << Path << '\0'; // c_name - OS << Data; // c_filedata -} - -void CpioFile::append(StringRef Path, StringRef Data) { - if (!Seen.insert(Path).second) - return; - - // Construct an in-archive filename so that /home/foo/bar is stored - // as baz/home/foo/bar where baz is the basename of the output file. - // (i.e. in that case we are creating baz.cpio.) - SmallString<128> Fullpath; - path::append(Fullpath, Basename, Path); - - // Use unix path separators so the cpio can be extracted on both unix and - // windows. - std::replace(Fullpath.begin(), Fullpath.end(), '\\', '/'); - - writeMember(*OS, Fullpath, Data); - - // Print the trailer and seek back. - // This way we have a valid archive if we crash. - uint64_t Pos = OS->tell(); - writeMember(*OS, "TRAILER!!!", ""); - OS->seek(Pos); -} - -// Quote a given string if it contains a space character. -static std::string quote(StringRef S) { - if (S.find(' ') == StringRef::npos) - return S; - return ("\"" + S + "\"").str(); -} - -static std::string rewritePath(StringRef S) { - if (fs::exists(S)) - return relativeToRoot(S); - return S; -} - -static std::string stringize(opt::Arg *Arg) { - std::string K = Arg->getSpelling(); - if (Arg->getNumValues() == 0) - return K; - std::string V = quote(Arg->getValue()); - if (Arg->getOption().getRenderStyle() == opt::Option::RenderJoinedStyle) - return K + V; - return K + " " + V; -} - // Reconstructs command line arguments so that so that you can re-run // the same command with the same inputs. This is for --reproduce. std::string elf::createResponseFile(const opt::InputArgList &Args) { @@ -226,8 +142,8 @@ std::string elf::createResponseFile(const opt::InputArgList &Args) { case OPT_alias_script_T: case OPT_script: case OPT_version_script: - OS << Arg->getSpelling() << " " - << quote(rewritePath(Arg->getValue())) << "\n"; + OS << Arg->getSpelling() << " " << quote(rewritePath(Arg->getValue())) + << "\n"; break; default: OS << stringize(Arg) << "\n"; @@ -236,41 +152,39 @@ std::string elf::createResponseFile(const opt::InputArgList &Args) { return Data.str(); } -std::string elf::findFromSearchPaths(StringRef Path) { - for (StringRef Dir : Config->SearchPaths) { - std::string FullPath = buildSysrootedPath(Dir, Path); - if (fs::exists(FullPath)) - return FullPath; - } - return ""; +// Find a file by concatenating given paths. If a resulting path +// starts with "=", the character is replaced with a --sysroot value. +static Optional<std::string> findFile(StringRef Path1, const Twine &Path2) { + SmallString<128> S; + if (Path1.startswith("=")) + path::append(S, Config->Sysroot, Path1.substr(1), Path2); + else + path::append(S, Path1, Path2); + + if (fs::exists(S)) + return S.str().str(); + return None; +} + +Optional<std::string> elf::findFromSearchPaths(StringRef Path) { + for (StringRef Dir : Config->SearchPaths) + if (Optional<std::string> S = findFile(Dir, Path)) + return S; + return None; } -// Searches a given library from input search paths, which are filled -// from -L command line switches. Returns a path to an existent library file. -std::string elf::searchLibrary(StringRef Path) { - if (Path.startswith(":")) - return findFromSearchPaths(Path.substr(1)); +// This is for -lfoo. We'll look for libfoo.so or libfoo.a from +// search paths. +Optional<std::string> elf::searchLibrary(StringRef Name) { + if (Name.startswith(":")) + return findFromSearchPaths(Name.substr(1)); + for (StringRef Dir : Config->SearchPaths) { - if (!Config->Static) { - std::string S = buildSysrootedPath(Dir, ("lib" + Path + ".so").str()); - if (fs::exists(S)) + if (!Config->Static) + if (Optional<std::string> S = findFile(Dir, "lib" + Name + ".so")) return S; - } - std::string S = buildSysrootedPath(Dir, ("lib" + Path + ".a").str()); - if (fs::exists(S)) + if (Optional<std::string> S = findFile(Dir, "lib" + Name + ".a")) return S; } - return ""; -} - -// Makes a path by concatenating Dir and File. -// If Dir starts with '=' the result will be preceded by Sysroot, -// which can be set with --sysroot command line switch. -std::string elf::buildSysrootedPath(StringRef Dir, StringRef File) { - SmallString<128> Path; - if (Dir.startswith("=")) - path::append(Path, Config->Sysroot, Dir.substr(1), File); - else - path::append(Path, Dir, File); - return Path.str(); + return None; } diff --git a/ELF/EhFrame.cpp b/ELF/EhFrame.cpp index b130ac1ca22d..2428473d9012 100644 --- a/ELF/EhFrame.cpp +++ b/ELF/EhFrame.cpp @@ -18,6 +18,9 @@ #include "EhFrame.h" #include "Error.h" +#include "InputSection.h" +#include "Relocations.h" +#include "Strings.h" #include "llvm/Object/ELF.h" #include "llvm/Support/Dwarf.h" @@ -29,49 +32,93 @@ using namespace llvm::dwarf; using namespace llvm::object; using namespace llvm::support::endian; -namespace lld { -namespace elf { +using namespace lld; +using namespace lld::elf; +namespace { +template <class ELFT> class EhReader { +public: + EhReader(InputSectionBase<ELFT> *S, ArrayRef<uint8_t> D) : IS(S), D(D) {} + size_t readEhRecordSize(); + uint8_t getFdeEncoding(); + +private: + template <class P> void failOn(const P *Loc, const Twine &Msg) { + fatal(IS->getLocation((const uint8_t *)Loc - IS->Data.data()) + ": " + Msg); + } + + uint8_t readByte(); + void skipBytes(size_t Count); + StringRef readString(); + void skipLeb128(); + void skipAugP(); + + InputSectionBase<ELFT> *IS; + ArrayRef<uint8_t> D; +}; +} + +template <class ELFT> +size_t elf::readEhRecordSize(InputSectionBase<ELFT> *S, size_t Off) { + return EhReader<ELFT>(S, S->Data.slice(Off)).readEhRecordSize(); +} // .eh_frame section is a sequence of records. Each record starts with // a 4 byte length field. This function reads the length. -template <class ELFT> size_t readEhRecordSize(ArrayRef<uint8_t> D) { +template <class ELFT> size_t EhReader<ELFT>::readEhRecordSize() { const endianness E = ELFT::TargetEndianness; if (D.size() < 4) - fatal("CIE/FDE too small"); + failOn(D.data(), "CIE/FDE too small"); // First 4 bytes of CIE/FDE is the size of the record. // If it is 0xFFFFFFFF, the next 8 bytes contain the size instead, // but we do not support that format yet. uint64_t V = read32<E>(D.data()); if (V == UINT32_MAX) - fatal("CIE/FDE too large"); + failOn(D.data(), "CIE/FDE too large"); uint64_t Size = V + 4; if (Size > D.size()) - fatal("CIE/FIE ends past the end of the section"); + failOn(D.data(), "CIE/FDE ends past the end of the section"); return Size; } // Read a byte and advance D by one byte. -static uint8_t readByte(ArrayRef<uint8_t> &D) { +template <class ELFT> uint8_t EhReader<ELFT>::readByte() { if (D.empty()) - fatal("corrupted or unsupported CIE information"); + failOn(D.data(), "unexpected end of CIE"); uint8_t B = D.front(); D = D.slice(1); return B; } +template <class ELFT> void EhReader<ELFT>::skipBytes(size_t Count) { + if (D.size() < Count) + failOn(D.data(), "CIE is too small"); + D = D.slice(Count); +} + +// Read a null-terminated string. +template <class ELFT> StringRef EhReader<ELFT>::readString() { + const uint8_t *End = std::find(D.begin(), D.end(), '\0'); + if (End == D.end()) + failOn(D.data(), "corrupted CIE (failed to read string)"); + StringRef S = toStringRef(D.slice(0, End - D.begin())); + D = D.slice(S.size() + 1); + return S; +} + // Skip an integer encoded in the LEB128 format. // Actual number is not of interest because only the runtime needs it. // But we need to be at least able to skip it so that we can read // the field that follows a LEB128 number. -static void skipLeb128(ArrayRef<uint8_t> &D) { +template <class ELFT> void EhReader<ELFT>::skipLeb128() { + const uint8_t *ErrPos = D.data(); while (!D.empty()) { uint8_t Val = D.front(); D = D.slice(1); if ((Val & 0x80) == 0) return; } - fatal("corrupted or unsupported CIE information"); + failOn(ErrPos, "corrupted CIE (failed to read LEB128)"); } template <class ELFT> static size_t getAugPSize(unsigned Enc) { @@ -89,79 +136,79 @@ template <class ELFT> static size_t getAugPSize(unsigned Enc) { case DW_EH_PE_sdata8: return 8; } - fatal("unknown FDE encoding"); + return 0; } -template <class ELFT> static void skipAugP(ArrayRef<uint8_t> &D) { - uint8_t Enc = readByte(D); +template <class ELFT> void EhReader<ELFT>::skipAugP() { + uint8_t Enc = readByte(); if ((Enc & 0xf0) == DW_EH_PE_aligned) - fatal("DW_EH_PE_aligned encoding is not supported"); + failOn(D.data() - 1, "DW_EH_PE_aligned encoding is not supported"); size_t Size = getAugPSize<ELFT>(Enc); + if (Size == 0) + failOn(D.data() - 1, "unknown FDE encoding"); if (Size >= D.size()) - fatal("corrupted CIE"); + failOn(D.data() - 1, "corrupted CIE"); D = D.slice(Size); } -template <class ELFT> uint8_t getFdeEncoding(ArrayRef<uint8_t> D) { - if (D.size() < 8) - fatal("CIE too small"); - D = D.slice(8); +template <class ELFT> uint8_t elf::getFdeEncoding(EhSectionPiece *P) { + auto *IS = static_cast<InputSectionBase<ELFT> *>(P->ID); + return EhReader<ELFT>(IS, P->data()).getFdeEncoding(); +} - uint8_t Version = readByte(D); +template <class ELFT> uint8_t EhReader<ELFT>::getFdeEncoding() { + skipBytes(8); + int Version = readByte(); if (Version != 1 && Version != 3) - fatal("FDE version 1 or 3 expected, but got " + Twine((unsigned)Version)); + failOn(D.data() - 1, + "FDE version 1 or 3 expected, but got " + Twine(Version)); - const unsigned char *AugEnd = std::find(D.begin(), D.end(), '\0'); - if (AugEnd == D.end()) - fatal("corrupted CIE"); - StringRef Aug(reinterpret_cast<const char *>(D.begin()), AugEnd - D.begin()); - D = D.slice(Aug.size() + 1); + StringRef Aug = readString(); - // Code alignment factor should always be 1 for .eh_frame. - if (readByte(D) != 1) - fatal("CIE code alignment must be 1"); - - // Skip data alignment factor. - skipLeb128(D); + // Skip code and data alignment factors. + skipLeb128(); + skipLeb128(); // Skip the return address register. In CIE version 1 this is a single // byte. In CIE version 3 this is an unsigned LEB128. if (Version == 1) - readByte(D); + readByte(); else - skipLeb128(D); + skipLeb128(); // We only care about an 'R' value, but other records may precede an 'R' // record. Unfortunately records are not in TLV (type-length-value) format, // so we need to teach the linker how to skip records for each type. for (char C : Aug) { if (C == 'R') - return readByte(D); + return readByte(); if (C == 'z') { - skipLeb128(D); + skipLeb128(); continue; } if (C == 'P') { - skipAugP<ELFT>(D); + skipAugP(); continue; } if (C == 'L') { - readByte(D); + readByte(); continue; } - fatal("unknown .eh_frame augmentation string: " + Aug); + failOn(Aug.data(), "unknown .eh_frame augmentation string: " + Aug); } return DW_EH_PE_absptr; } -template size_t readEhRecordSize<ELF32LE>(ArrayRef<uint8_t>); -template size_t readEhRecordSize<ELF32BE>(ArrayRef<uint8_t>); -template size_t readEhRecordSize<ELF64LE>(ArrayRef<uint8_t>); -template size_t readEhRecordSize<ELF64BE>(ArrayRef<uint8_t>); - -template uint8_t getFdeEncoding<ELF32LE>(ArrayRef<uint8_t>); -template uint8_t getFdeEncoding<ELF32BE>(ArrayRef<uint8_t>); -template uint8_t getFdeEncoding<ELF64LE>(ArrayRef<uint8_t>); -template uint8_t getFdeEncoding<ELF64BE>(ArrayRef<uint8_t>); -} -} +template size_t elf::readEhRecordSize<ELF32LE>(InputSectionBase<ELF32LE> *S, + size_t Off); +template size_t elf::readEhRecordSize<ELF32BE>(InputSectionBase<ELF32BE> *S, + size_t Off); +template size_t elf::readEhRecordSize<ELF64LE>(InputSectionBase<ELF64LE> *S, + size_t Off); +template size_t elf::readEhRecordSize<ELF64BE>(InputSectionBase<ELF64BE> *S, + size_t Off); + +template uint8_t elf::getFdeEncoding<ELF32LE>(EhSectionPiece *P); +template uint8_t elf::getFdeEncoding<ELF32BE>(EhSectionPiece *P); +template uint8_t elf::getFdeEncoding<ELF64LE>(EhSectionPiece *P); +template uint8_t elf::getFdeEncoding<ELF64BE>(EhSectionPiece *P); diff --git a/ELF/EhFrame.h b/ELF/EhFrame.h index 0d5a2ff2f417..cadc93d3a2e4 100644 --- a/ELF/EhFrame.h +++ b/ELF/EhFrame.h @@ -14,8 +14,12 @@ namespace lld { namespace elf { -template <class ELFT> size_t readEhRecordSize(ArrayRef<uint8_t> Data); -template <class ELFT> uint8_t getFdeEncoding(ArrayRef<uint8_t> Data); +template <class ELFT> class InputSectionBase; +struct EhSectionPiece; + +template <class ELFT> +size_t readEhRecordSize(InputSectionBase<ELFT> *S, size_t Off); +template <class ELFT> uint8_t getFdeEncoding(EhSectionPiece *P); } } diff --git a/ELF/Error.cpp b/ELF/Error.cpp index 59a49c17b97c..6e30f08143ed 100644 --- a/ELF/Error.cpp +++ b/ELF/Error.cpp @@ -12,54 +12,95 @@ #include "llvm/ADT/Twine.h" #include "llvm/Support/Error.h" +#include "llvm/Support/ManagedStatic.h" #include "llvm/Support/raw_ostream.h" +#include <mutex> +#if !defined(_MSC_VER) && !defined(__MINGW32__) +#include <unistd.h> +#endif + +using namespace lld::elf; using namespace llvm; namespace lld { -namespace elf { -bool HasError; -raw_ostream *ErrorOS; +uint64_t elf::ErrorCount; +raw_ostream *elf::ErrorOS; +StringRef elf::Argv0; + +// The functions defined in this file can be called from multiple threads, +// but outs() or errs() are not thread-safe. We protect them using a mutex. +static std::mutex Mu; + +static void print(StringRef S, raw_ostream::Colors C) { + *ErrorOS << Argv0 + ": "; + if (Config->ColorDiagnostics) { + ErrorOS->changeColor(C, true); + *ErrorOS << S; + ErrorOS->resetColor(); + } else { + *ErrorOS << S; + } +} -void log(const Twine &Msg) { +void elf::log(const Twine &Msg) { + std::lock_guard<std::mutex> Lock(Mu); if (Config->Verbose) - outs() << Msg << "\n"; + outs() << Argv0 << ": " << Msg << "\n"; } -void warning(const Twine &Msg) { - if (Config->FatalWarnings) +void elf::warn(const Twine &Msg) { + if (Config->FatalWarnings) { error(Msg); - else - *ErrorOS << Msg << "\n"; + return; + } + std::lock_guard<std::mutex> Lock(Mu); + print("warning: ", raw_ostream::MAGENTA); + *ErrorOS << Msg << "\n"; } -void error(const Twine &Msg) { - *ErrorOS << Msg << "\n"; - HasError = true; +void elf::error(const Twine &Msg) { + std::lock_guard<std::mutex> Lock(Mu); + + if (Config->ErrorLimit == 0 || ErrorCount < Config->ErrorLimit) { + print("error: ", raw_ostream::RED); + *ErrorOS << Msg << "\n"; + } else if (ErrorCount == Config->ErrorLimit) { + print("error: ", raw_ostream::RED); + *ErrorOS << "too many errors emitted, stopping now" + << " (use -error-limit=0 to see all errors)\n"; + if (Config->ExitEarly) + exitLld(1); + } + + ++ErrorCount; } -void error(std::error_code EC, const Twine &Prefix) { +void elf::error(std::error_code EC, const Twine &Prefix) { error(Prefix + ": " + EC.message()); } -void fatal(const Twine &Msg) { - *ErrorOS << Msg << "\n"; - exit(1); -} +void elf::exitLld(int Val) { + // Dealloc/destroy ManagedStatic variables before calling + // _exit(). In a non-LTO build, this is a nop. In an LTO + // build allows us to get the output of -time-passes. + llvm_shutdown(); -void fatal(const Twine &Msg, const Twine &Prefix) { - fatal(Prefix + ": " + Msg); + outs().flush(); + errs().flush(); + _exit(Val); } -void check(std::error_code EC) { - if (EC) - fatal(EC.message()); +void elf::fatal(const Twine &Msg) { + std::lock_guard<std::mutex> Lock(Mu); + print("error: ", raw_ostream::RED); + *ErrorOS << Msg << "\n"; + exitLld(1); } -void check(Error Err) { - check(errorToErrorCode(std::move(Err))); +void elf::fatal(std::error_code EC, const Twine &Prefix) { + fatal(Prefix + ": " + EC.message()); } -} // namespace elf } // namespace lld diff --git a/ELF/Error.h b/ELF/Error.h index 552f50498464..1ec683595cf4 100644 --- a/ELF/Error.h +++ b/ELF/Error.h @@ -6,31 +6,47 @@ // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// +// +// In LLD, we have three levels of errors: fatal, error or warn. +// +// Fatal makes the program exit immediately with an error message. +// You shouldn't use it except for reporting a corrupted input file. +// +// Error prints out an error message and increment a global variable +// ErrorCount to record the fact that we met an error condition. It does +// not exit, so it is safe for a lld-as-a-library use case. It is generally +// useful because it can report more than one errors in a single run. +// +// Warn doesn't do anything but printing out a given message. +// +//===----------------------------------------------------------------------===// -#ifndef LLD_COFF_ERROR_H -#define LLD_COFF_ERROR_H +#ifndef LLD_ELF_ERROR_H +#define LLD_ELF_ERROR_H #include "lld/Core/LLVM.h" +#include "llvm/Support/Error.h" + namespace lld { namespace elf { -extern bool HasError; +extern uint64_t ErrorCount; extern llvm::raw_ostream *ErrorOS; +extern llvm::StringRef Argv0; void log(const Twine &Msg); -void warning(const Twine &Msg); +void warn(const Twine &Msg); void error(const Twine &Msg); void error(std::error_code EC, const Twine &Prefix); -template <typename T> void error(const ErrorOr<T> &V, const Twine &Prefix) { - error(V.getError(), Prefix); -} - +LLVM_ATTRIBUTE_NORETURN void exitLld(int Val); LLVM_ATTRIBUTE_NORETURN void fatal(const Twine &Msg); -LLVM_ATTRIBUTE_NORETURN void fatal(const Twine &Msg, const Twine &Prefix); +LLVM_ATTRIBUTE_NORETURN void fatal(std::error_code EC, const Twine &Prefix); +// check() functions are convenient functions to strip errors +// from error-or-value objects. template <class T> T check(ErrorOr<T> E) { if (auto EC = E.getError()) fatal(EC.message()); @@ -39,19 +55,23 @@ template <class T> T check(ErrorOr<T> E) { template <class T> T check(Expected<T> E) { if (!E) - fatal(errorToErrorCode(E.takeError()).message()); + handleAllErrors(std::move(E.takeError()), + [](llvm::ErrorInfoBase &EIB) -> Error { + fatal(EIB.message()); + return Error::success(); + }); return std::move(*E); } template <class T> T check(ErrorOr<T> E, const Twine &Prefix) { if (auto EC = E.getError()) - fatal(EC.message(), Prefix); + fatal(Prefix + ": " + EC.message()); return std::move(*E); } template <class T> T check(Expected<T> E, const Twine &Prefix) { if (!E) - fatal(errorToErrorCode(E.takeError()).message(), Prefix); + fatal(Prefix + ": " + errorToErrorCode(E.takeError()).message()); return std::move(*E); } diff --git a/ELF/GdbIndex.cpp b/ELF/GdbIndex.cpp new file mode 100644 index 000000000000..762144dd0a96 --- /dev/null +++ b/ELF/GdbIndex.cpp @@ -0,0 +1,205 @@ +//===- GdbIndex.cpp -------------------------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// File contains classes for implementation of --gdb-index command line option. +// +// If that option is used, linker should emit a .gdb_index section that allows +// debugger to locate and read .dwo files, containing neccessary debug +// information. +// More information about implementation can be found in DWARF specification, +// latest version is available at http://dwarfstd.org. +// +// .gdb_index section format: +// (Information is based on/taken from +// https://sourceware.org/gdb/onlinedocs/gdb/Index-Section-Format.html (*)) +// +// A mapped index consists of several areas, laid out in order: +// 1) The file header. +// 2) "The CU (compilation unit) list. This is a sequence of pairs of 64-bit +// little-endian values, sorted by the CU offset. The first element in each +// pair is the offset of a CU in the .debug_info section. The second element +// in each pair is the length of that CU. References to a CU elsewhere in the +// map are done using a CU index, which is just the 0-based index into this +// table. Note that if there are type CUs, then conceptually CUs and type CUs +// form a single list for the purposes of CU indices."(*) +// 3) The types CU list. Depricated as .debug_types does not appear in the DWARF +// v5 specification. +// 4) The address area. The address area is a sequence of address +// entries, where each entrie contains low address, high address and CU +// index. +// 5) "The symbol table. This is an open-addressed hash table. The size of the +// hash table is always a power of 2. Each slot in the hash table consists of +// a pair of offset_type values. The first value is the offset of the +// symbol's name in the constant pool. The second value is the offset of the +// CU vector in the constant pool."(*) +// 6) "The constant pool. This is simply a bunch of bytes. It is organized so +// that alignment is correct: CU vectors are stored first, followed by +// strings." (*) +// +// For constructing the .gdb_index section following steps should be performed: +// 1) For file header nothing special should be done. It contains the offsets to +// the areas below. +// 2) Scan the compilation unit headers of the .debug_info sections to build a +// list of compilation units. +// 3) CU Types are no longer needed as DWARF skeleton type units never made it +// into the standard. lld does nothing to support parsing of .debug_types +// and generates empty types CU area in .gdb_index section. +// 4) Address area entries are extracted from DW_TAG_compile_unit DIEs of +// .debug_info sections. +// 5) For building the symbol table linker extracts the public names from the +// .debug_gnu_pubnames and .debug_gnu_pubtypes sections. Then it builds the +// hashtable in according to .gdb_index format specification. +// 6) Constant pool is populated at the same time as symbol table. +//===----------------------------------------------------------------------===// + +#include "GdbIndex.h" +#include "llvm/DebugInfo/DWARF/DWARFDebugPubTable.h" +#include "llvm/Object/ELFObjectFile.h" + +using namespace llvm; +using namespace llvm::object; +using namespace lld::elf; + +template <class ELFT> +GdbIndexBuilder<ELFT>::GdbIndexBuilder(InputSection<ELFT> *DebugInfoSec) + : DebugInfoSec(DebugInfoSec) { + if (Expected<std::unique_ptr<object::ObjectFile>> Obj = + object::ObjectFile::createObjectFile(DebugInfoSec->getFile()->MB)) + Dwarf.reset(new DWARFContextInMemory(*Obj.get(), this)); + else + error(toString(DebugInfoSec->getFile()) + ": error creating DWARF context"); +} + +template <class ELFT> +std::vector<std::pair<typename ELFT::uint, typename ELFT::uint>> +GdbIndexBuilder<ELFT>::readCUList() { + std::vector<std::pair<uintX_t, uintX_t>> Ret; + for (std::unique_ptr<DWARFCompileUnit> &CU : Dwarf->compile_units()) + Ret.push_back( + {DebugInfoSec->OutSecOff + CU->getOffset(), CU->getLength() + 4}); + return Ret; +} + +template <class ELFT> +std::vector<std::pair<StringRef, uint8_t>> +GdbIndexBuilder<ELFT>::readPubNamesAndTypes() { + const bool IsLE = ELFT::TargetEndianness == llvm::support::little; + StringRef Data[] = {Dwarf->getGnuPubNamesSection(), + Dwarf->getGnuPubTypesSection()}; + + std::vector<std::pair<StringRef, uint8_t>> Ret; + for (StringRef D : Data) { + DWARFDebugPubTable PubTable(D, IsLE, true); + for (const DWARFDebugPubTable::Set &S : PubTable.getData()) + for (const DWARFDebugPubTable::Entry &E : S.Entries) + Ret.push_back({E.Name, E.Descriptor.toBits()}); + } + return Ret; +} + +std::pair<bool, GdbSymbol *> GdbHashTab::add(uint32_t Hash, size_t Offset) { + if (Size * 4 / 3 >= Table.size()) + expand(); + + GdbSymbol **Slot = findSlot(Hash, Offset); + bool New = false; + if (*Slot == nullptr) { + ++Size; + *Slot = new (Alloc) GdbSymbol(Hash, Offset); + New = true; + } + return {New, *Slot}; +} + +void GdbHashTab::expand() { + if (Table.empty()) { + Table.resize(InitialSize); + return; + } + std::vector<GdbSymbol *> NewTable(Table.size() * 2); + NewTable.swap(Table); + + for (GdbSymbol *Sym : NewTable) { + if (!Sym) + continue; + GdbSymbol **Slot = findSlot(Sym->NameHash, Sym->NameOffset); + *Slot = Sym; + } +} + +// Methods finds a slot for symbol with given hash. The step size used to find +// the next candidate slot when handling a hash collision is specified in +// .gdb_index section format. The hash value for a table entry is computed by +// applying an iterative hash function to the symbol's name. +GdbSymbol **GdbHashTab::findSlot(uint32_t Hash, size_t Offset) { + uint32_t Index = Hash & (Table.size() - 1); + uint32_t Step = ((Hash * 17) & (Table.size() - 1)) | 1; + + for (;;) { + GdbSymbol *S = Table[Index]; + if (!S || ((S->NameOffset == Offset) && (S->NameHash == Hash))) + return &Table[Index]; + Index = (Index + Step) & (Table.size() - 1); + } +} + +template <class ELFT> +static InputSectionBase<ELFT> * +findSection(ArrayRef<InputSectionBase<ELFT> *> Arr, uint64_t Offset) { + for (InputSectionBase<ELFT> *S : Arr) + if (S && S != &InputSection<ELFT>::Discarded) + if (Offset >= S->Offset && Offset < S->Offset + S->getSize()) + return S; + return nullptr; +} + +template <class ELFT> +std::vector<AddressEntry<ELFT>> +GdbIndexBuilder<ELFT>::readAddressArea(size_t CurrentCU) { + std::vector<AddressEntry<ELFT>> Ret; + for (const auto &CU : Dwarf->compile_units()) { + DWARFAddressRangesVector Ranges; + CU->collectAddressRanges(Ranges); + + ArrayRef<InputSectionBase<ELFT> *> Sections = + DebugInfoSec->getFile()->getSections(); + + for (std::pair<uint64_t, uint64_t> &R : Ranges) + if (InputSectionBase<ELFT> *S = findSection(Sections, R.first)) + Ret.push_back( + {S, R.first - S->Offset, R.second - S->Offset, CurrentCU}); + ++CurrentCU; + } + return Ret; +} + +// We return file offset as load address for allocatable sections. That is +// currently used for collecting address ranges in readAddressArea(). We are +// able then to find section index that range belongs to. +template <class ELFT> +uint64_t GdbIndexBuilder<ELFT>::getSectionLoadAddress( + const object::SectionRef &Sec) const { + if (static_cast<const ELFSectionRef &>(Sec).getFlags() & ELF::SHF_ALLOC) + return static_cast<const ELFSectionRef &>(Sec).getOffset(); + return 0; +} + +template <class ELFT> +std::unique_ptr<LoadedObjectInfo> GdbIndexBuilder<ELFT>::clone() const { + return {}; +} + +namespace lld { +namespace elf { +template class GdbIndexBuilder<ELF32LE>; +template class GdbIndexBuilder<ELF32BE>; +template class GdbIndexBuilder<ELF64LE>; +template class GdbIndexBuilder<ELF64BE>; +} +} diff --git a/ELF/GdbIndex.h b/ELF/GdbIndex.h new file mode 100644 index 000000000000..c761ea173a8d --- /dev/null +++ b/ELF/GdbIndex.h @@ -0,0 +1,99 @@ +//===- GdbIndex.h --------------------------------------------*- C++ -*-===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===-------------------------------------------------------------------===// + +#ifndef LLD_ELF_GDB_INDEX_H +#define LLD_ELF_GDB_INDEX_H + +#include "InputFiles.h" +#include "llvm/Object/ELF.h" +#include "llvm/DebugInfo/DWARF/DWARFContext.h" + +namespace lld { +namespace elf { + +template <class ELFT> class InputSection; + +// Struct represents single entry of address area of gdb index. +template <class ELFT> struct AddressEntry { + InputSectionBase<ELFT> *Section; + uint64_t LowAddress; + uint64_t HighAddress; + size_t CuIndex; +}; + +// GdbIndexBuilder is a helper class used for extracting data required +// for building .gdb_index section from objects. +template <class ELFT> class GdbIndexBuilder : public llvm::LoadedObjectInfo { + typedef typename ELFT::uint uintX_t; + + InputSection<ELFT> *DebugInfoSec; + + std::unique_ptr<llvm::DWARFContext> Dwarf; + +public: + GdbIndexBuilder(InputSection<ELFT> *DebugInfoSec); + + // Extracts the compilation units. Each first element of pair is a offset of a + // CU in the .debug_info section and second is the length of that CU. + std::vector<std::pair<uintX_t, uintX_t>> readCUList(); + + // Extracts the vector of address area entries. Accepts global index of last + // parsed CU. + std::vector<AddressEntry<ELFT>> readAddressArea(size_t CurrentCU); + + // Method extracts public names and types. It returns list of name and + // gnu_pub* kind pairs. + std::vector<std::pair<StringRef, uint8_t>> readPubNamesAndTypes(); + +private: + // Method returns section file offset as a load addres for DWARF parser. That + // allows to find the target section index for address ranges. + uint64_t + getSectionLoadAddress(const llvm::object::SectionRef &Sec) const override; + std::unique_ptr<llvm::LoadedObjectInfo> clone() const override; +}; + +// Element of GdbHashTab hash table. +struct GdbSymbol { + GdbSymbol(uint32_t Hash, size_t Offset) + : NameHash(Hash), NameOffset(Offset) {} + uint32_t NameHash; + size_t NameOffset; + size_t CuVectorIndex; +}; + +// This class manages the hashed symbol table for the .gdb_index section. +// The hash value for a table entry is computed by applying an iterative hash +// function to the symbol's name. +class GdbHashTab final { +public: + std::pair<bool, GdbSymbol *> add(uint32_t Hash, size_t Offset); + + size_t getCapacity() { return Table.size(); } + GdbSymbol *getSymbol(size_t I) { return Table[I]; } + +private: + void expand(); + + GdbSymbol **findSlot(uint32_t Hash, size_t Offset); + + llvm::BumpPtrAllocator Alloc; + std::vector<GdbSymbol *> Table; + + // Size keeps the amount of filled entries in Table. + size_t Size = 0; + + // Initial size must be a power of 2. + static const int32_t InitialSize = 1024; +}; + +} // namespace elf +} // namespace lld + +#endif diff --git a/ELF/ICF.cpp b/ELF/ICF.cpp index 10a2603b3b3e..32cd0f8a185c 100644 --- a/ELF/ICF.cpp +++ b/ELF/ICF.cpp @@ -7,63 +7,82 @@ // //===----------------------------------------------------------------------===// // -// Identical Code Folding is a feature to merge sections not by name (which -// is regular comdat handling) but by contents. If two non-writable sections -// have the same data, relocations, attributes, etc., then the two -// are considered identical and merged by the linker. This optimization -// makes outputs smaller. +// ICF is short for Identical Code Folding. This is a size optimization to +// identify and merge two or more read-only sections (typically functions) +// that happened to have the same contents. It usually reduces output size +// by a few percent. // -// ICF is theoretically a problem of reducing graphs by merging as many -// identical subgraphs as possible if we consider sections as vertices and -// relocations as edges. It may sound simple, but it is a bit more -// complicated than you might think. The order of processing sections -// matters because merging two sections can make other sections, whose -// relocations now point to the same section, mergeable. Graphs may contain -// cycles. We need a sophisticated algorithm to do this properly and -// efficiently. +// In ICF, two sections are considered identical if they have the same +// section flags, section data, and relocations. Relocations are tricky, +// because two relocations are considered the same if they have the same +// relocation types, values, and if they point to the same sections *in +// terms of ICF*. // -// What we do in this file is this. We split sections into groups. Sections -// in the same group are considered identical. +// Here is an example. If foo and bar defined below are compiled to the +// same machine instructions, ICF can and should merge the two, although +// their relocations point to each other. // -// We begin by optimistically putting all sections into a single equivalence -// class. Then we apply a series of checks that split this initial -// equivalence class into more and more refined equivalence classes based on -// the properties by which a section can be distinguished. +// void foo() { bar(); } +// void bar() { foo(); } // -// We begin by checking that the section contents and flags are the -// same. This only needs to be done once since these properties don't depend -// on the current equivalence class assignment. +// If you merge the two, their relocations point to the same section and +// thus you know they are mergeable, but how do you know they are +// mergeable in the first place? This is not an easy problem to solve. // -// Then we split the equivalence classes based on checking that their -// relocations are the same, where relocation targets are compared by their -// equivalence class, not the concrete section. This may need to be done -// multiple times because as the equivalence classes are refined, two -// sections that had a relocation target in the same equivalence class may -// now target different equivalence classes, and hence these two sections -// must be put in different equivalence classes (whereas in the previous -// iteration they were not since the relocation target was the same.) +// What we are doing in LLD is to partition sections into equivalence +// classes. Sections in the same equivalence class when the algorithm +// terminates are considered identical. Here are details: // -// Our algorithm is smart enough to merge the following mutually-recursive -// functions. +// 1. First, we partition sections using their hash values as keys. Hash +// values contain section types, section contents and numbers of +// relocations. During this step, relocation targets are not taken into +// account. We just put sections that apparently differ into different +// equivalence classes. // -// void foo() { bar(); } -// void bar() { foo(); } +// 2. Next, for each equivalence class, we visit sections to compare +// relocation targets. Relocation targets are considered equivalent if +// their targets are in the same equivalence class. Sections with +// different relocation targets are put into different equivalence +// clases. +// +// 3. If we split an equivalence class in step 2, two relocations +// previously target the same equivalence class may now target +// different equivalence classes. Therefore, we repeat step 2 until a +// convergence is obtained. +// +// 4. For each equivalence class C, pick an arbitrary section in C, and +// merge all the other sections in C with it. +// +// For small programs, this algorithm needs 3-5 iterations. For large +// programs such as Chromium, it takes more than 20 iterations. +// +// This algorithm was mentioned as an "optimistic algorithm" in [1], +// though gold implements a different algorithm than this. +// +// We parallelize each step so that multiple threads can work on different +// equivalence classes concurrently. That gave us a large performance +// boost when applying ICF on large programs. For example, MSVC link.exe +// or GNU gold takes 10-20 seconds to apply ICF on Chromium, whose output +// size is about 1.5 GB, but LLD can finish it in less than 2 seconds on a +// 2.8 GHz 40 core machine. Even without threading, LLD's ICF is still +// faster than MSVC or gold though. // -// This algorithm is so-called "optimistic" algorithm described in -// http://research.google.com/pubs/pub36912.html. (Note that what GNU -// gold implemented is different from the optimistic algorithm.) +// [1] Safe ICF: Pointer Safe and Unwinding aware Identical Code Folding +// in the Gold Linker +// http://static.googleusercontent.com/media/research.google.com/en//pubs/archive/36912.pdf // //===----------------------------------------------------------------------===// #include "ICF.h" #include "Config.h" -#include "OutputSections.h" #include "SymbolTable.h" +#include "Threads.h" #include "llvm/ADT/Hashing.h" #include "llvm/Object/ELF.h" #include "llvm/Support/ELF.h" -#include "llvm/Support/raw_ostream.h" +#include <algorithm> +#include <atomic> using namespace lld; using namespace lld::elf; @@ -71,143 +90,132 @@ using namespace llvm; using namespace llvm::ELF; using namespace llvm::object; -namespace lld { -namespace elf { +namespace { template <class ELFT> class ICF { - typedef typename ELFT::Shdr Elf_Shdr; - typedef typename ELFT::Sym Elf_Sym; - typedef typename ELFT::uint uintX_t; - typedef Elf_Rel_Impl<ELFT, false> Elf_Rel; - - using Comparator = std::function<bool(const InputSection<ELFT> *, - const InputSection<ELFT> *)>; - public: void run(); private: - uint64_t NextId = 1; - - static void setLive(SymbolTable<ELFT> *S); - static uint64_t relSize(InputSection<ELFT> *S); - static uint64_t getHash(InputSection<ELFT> *S); - static bool isEligible(InputSectionBase<ELFT> *Sec); - static std::vector<InputSection<ELFT> *> getSections(); - - void segregate(InputSection<ELFT> **Begin, InputSection<ELFT> **End, - Comparator Eq); - - void forEachGroup(std::vector<InputSection<ELFT> *> &V, Comparator Eq); + void segregate(size_t Begin, size_t End, bool Constant); template <class RelTy> - static bool relocationEq(ArrayRef<RelTy> RA, ArrayRef<RelTy> RB); + bool constantEq(ArrayRef<RelTy> RelsA, ArrayRef<RelTy> RelsB); template <class RelTy> - static bool variableEq(const InputSection<ELFT> *A, - const InputSection<ELFT> *B, ArrayRef<RelTy> RA, - ArrayRef<RelTy> RB); - - static bool equalsConstant(const InputSection<ELFT> *A, - const InputSection<ELFT> *B); - - static bool equalsVariable(const InputSection<ELFT> *A, - const InputSection<ELFT> *B); + bool variableEq(const InputSection<ELFT> *A, ArrayRef<RelTy> RelsA, + const InputSection<ELFT> *B, ArrayRef<RelTy> RelsB); + + bool equalsConstant(const InputSection<ELFT> *A, const InputSection<ELFT> *B); + bool equalsVariable(const InputSection<ELFT> *A, const InputSection<ELFT> *B); + + size_t findBoundary(size_t Begin, size_t End); + + void forEachClassRange(size_t Begin, size_t End, + std::function<void(size_t, size_t)> Fn); + + void forEachClass(std::function<void(size_t, size_t)> Fn); + + std::vector<InputSection<ELFT> *> Sections; + + // We repeat the main loop while `Repeat` is true. + std::atomic<bool> Repeat; + + // The main loop counter. + int Cnt = 0; + + // We have two locations for equivalence classes. On the first iteration + // of the main loop, Class[0] has a valid value, and Class[1] contains + // garbage. We read equivalence classes from slot 0 and write to slot 1. + // So, Class[0] represents the current class, and Class[1] represents + // the next class. On each iteration, we switch their roles and use them + // alternately. + // + // Why are we doing this? Recall that other threads may be working on + // other equivalence classes in parallel. They may read sections that we + // are updating. We cannot update equivalence classes in place because + // it breaks the invariance that all possibly-identical sections must be + // in the same equivalence class at any moment. In other words, the for + // loop to update equivalence classes is not atomic, and that is + // observable from other threads. By writing new classes to other + // places, we can keep the invariance. + // + // Below, `Current` has the index of the current class, and `Next` has + // the index of the next class. If threading is enabled, they are either + // (0, 1) or (1, 0). + // + // Note on single-thread: if that's the case, they are always (0, 0) + // because we can safely read the next class without worrying about race + // conditions. Using the same location makes this algorithm converge + // faster because it uses results of the same iteration earlier. + int Current = 0; + int Next = 0; }; } -} // Returns a hash value for S. Note that the information about // relocation targets is not included in the hash value. -template <class ELFT> uint64_t ICF<ELFT>::getHash(InputSection<ELFT> *S) { - uint64_t Flags = S->getSectionHdr()->sh_flags; - uint64_t H = hash_combine(Flags, S->getSize()); - for (const Elf_Shdr *Rel : S->RelocSections) - H = hash_combine(H, (uint64_t)Rel->sh_size); - return H; +template <class ELFT> static uint32_t getHash(InputSection<ELFT> *S) { + return hash_combine(S->Flags, S->getSize(), S->NumRelocations); } -// Returns true if Sec is subject of ICF. -template <class ELFT> bool ICF<ELFT>::isEligible(InputSectionBase<ELFT> *Sec) { - if (!Sec || Sec == &InputSection<ELFT>::Discarded || !Sec->Live) - return false; - auto *S = dyn_cast<InputSection<ELFT>>(Sec); - if (!S) - return false; - +// Returns true if section S is subject of ICF. +template <class ELFT> static bool isEligible(InputSection<ELFT> *S) { // .init and .fini contains instructions that must be executed to // initialize and finalize the process. They cannot and should not // be merged. - StringRef Name = S->getSectionName(); - if (Name == ".init" || Name == ".fini") - return false; - - const Elf_Shdr &H = *S->getSectionHdr(); - return (H.sh_flags & SHF_ALLOC) && (~H.sh_flags & SHF_WRITE); -} - -template <class ELFT> -std::vector<InputSection<ELFT> *> ICF<ELFT>::getSections() { - std::vector<InputSection<ELFT> *> V; - for (const std::unique_ptr<ObjectFile<ELFT>> &F : - Symtab<ELFT>::X->getObjectFiles()) - for (InputSectionBase<ELFT> *S : F->getSections()) - if (isEligible(S)) - V.push_back(cast<InputSection<ELFT>>(S)); - return V; + return S->Live && (S->Flags & SHF_ALLOC) && !(S->Flags & SHF_WRITE) && + S->Name != ".init" && S->Name != ".fini"; } -// All sections between Begin and End must have the same group ID before -// you call this function. This function compare sections between Begin -// and End using Eq and assign new group IDs for new groups. +// Split an equivalence class into smaller classes. template <class ELFT> -void ICF<ELFT>::segregate(InputSection<ELFT> **Begin, InputSection<ELFT> **End, - Comparator Eq) { - // This loop rearranges [Begin, End) so that all sections that are - // equal in terms of Eq are contiguous. The algorithm is quadratic in - // the worst case, but that is not an issue in practice because the - // number of distinct sections in [Begin, End) is usually very small. - InputSection<ELFT> **I = Begin; - for (;;) { - InputSection<ELFT> *Head = *I; +void ICF<ELFT>::segregate(size_t Begin, size_t End, bool Constant) { + // This loop rearranges sections in [Begin, End) so that all sections + // that are equal in terms of equals{Constant,Variable} are contiguous + // in [Begin, End). + // + // The algorithm is quadratic in the worst case, but that is not an + // issue in practice because the number of the distinct sections in + // each range is usually very small. + + while (Begin < End) { + // Divide [Begin, End) into two. Let Mid be the start index of the + // second group. auto Bound = std::stable_partition( - I + 1, End, [&](InputSection<ELFT> *S) { return Eq(Head, S); }); - if (Bound == End) - return; - uint64_t Id = NextId++; - for (; I != Bound; ++I) - (*I)->GroupId = Id; - } -} - -template <class ELFT> -void ICF<ELFT>::forEachGroup(std::vector<InputSection<ELFT> *> &V, - Comparator Eq) { - for (InputSection<ELFT> **I = V.data(), **E = I + V.size(); I != E;) { - InputSection<ELFT> *Head = *I; - auto Bound = std::find_if(I + 1, E, [&](InputSection<ELFT> *S) { - return S->GroupId != Head->GroupId; - }); - segregate(I, Bound, Eq); - I = Bound; + Sections.begin() + Begin + 1, Sections.begin() + End, + [&](InputSection<ELFT> *S) { + if (Constant) + return equalsConstant(Sections[Begin], S); + return equalsVariable(Sections[Begin], S); + }); + size_t Mid = Bound - Sections.begin(); + + // Now we split [Begin, End) into [Begin, Mid) and [Mid, End) by + // updating the sections in [Begin, End). We use Mid as an equivalence + // class ID because every group ends with a unique index. + for (size_t I = Begin; I < Mid; ++I) + Sections[I]->Class[Next] = Mid; + + // If we created a group, we need to iterate the main loop again. + if (Mid != End) + Repeat = true; + + Begin = Mid; } } // Compare two lists of relocations. template <class ELFT> template <class RelTy> -bool ICF<ELFT>::relocationEq(ArrayRef<RelTy> RelsA, ArrayRef<RelTy> RelsB) { - const RelTy *IA = RelsA.begin(); - const RelTy *EA = RelsA.end(); - const RelTy *IB = RelsB.begin(); - const RelTy *EB = RelsB.end(); - if (EA - IA != EB - IB) - return false; - for (; IA != EA; ++IA, ++IB) - if (IA->r_offset != IB->r_offset || - IA->getType(Config->Mips64EL) != IB->getType(Config->Mips64EL) || - getAddend<ELFT>(*IA) != getAddend<ELFT>(*IB)) - return false; - return true; +bool ICF<ELFT>::constantEq(ArrayRef<RelTy> RelsA, ArrayRef<RelTy> RelsB) { + auto Eq = [](const RelTy &A, const RelTy &B) { + return A.r_offset == B.r_offset && + A.getType(Config->Mips64EL) == B.getType(Config->Mips64EL) && + getAddend<ELFT>(A) == getAddend<ELFT>(B); + }; + + return RelsA.size() == RelsB.size() && + std::equal(RelsA.begin(), RelsA.end(), RelsB.begin(), Eq); } // Compare "non-moving" part of two InputSections, namely everything @@ -215,125 +223,155 @@ bool ICF<ELFT>::relocationEq(ArrayRef<RelTy> RelsA, ArrayRef<RelTy> RelsB) { template <class ELFT> bool ICF<ELFT>::equalsConstant(const InputSection<ELFT> *A, const InputSection<ELFT> *B) { - if (A->RelocSections.size() != B->RelocSections.size()) + if (A->NumRelocations != B->NumRelocations || A->Flags != B->Flags || + A->getSize() != B->getSize() || A->Data != B->Data) return false; - for (size_t I = 0, E = A->RelocSections.size(); I != E; ++I) { - const Elf_Shdr *RA = A->RelocSections[I]; - const Elf_Shdr *RB = B->RelocSections[I]; - ELFFile<ELFT> &FileA = A->File->getObj(); - ELFFile<ELFT> &FileB = B->File->getObj(); - if (RA->sh_type == SHT_RELA) { - if (!relocationEq(FileA.relas(RA), FileB.relas(RB))) - return false; - } else { - if (!relocationEq(FileA.rels(RA), FileB.rels(RB))) - return false; - } - } - - return A->getSectionHdr()->sh_flags == B->getSectionHdr()->sh_flags && - A->getSize() == B->getSize() && - A->getSectionData() == B->getSectionData(); + if (A->AreRelocsRela) + return constantEq(A->relas(), B->relas()); + return constantEq(A->rels(), B->rels()); } +// Compare two lists of relocations. Returns true if all pairs of +// relocations point to the same section in terms of ICF. template <class ELFT> template <class RelTy> -bool ICF<ELFT>::variableEq(const InputSection<ELFT> *A, - const InputSection<ELFT> *B, ArrayRef<RelTy> RelsA, - ArrayRef<RelTy> RelsB) { - const RelTy *IA = RelsA.begin(); - const RelTy *EA = RelsA.end(); - const RelTy *IB = RelsB.begin(); - for (; IA != EA; ++IA, ++IB) { - SymbolBody &SA = A->File->getRelocTargetSym(*IA); - SymbolBody &SB = B->File->getRelocTargetSym(*IB); +bool ICF<ELFT>::variableEq(const InputSection<ELFT> *A, ArrayRef<RelTy> RelsA, + const InputSection<ELFT> *B, ArrayRef<RelTy> RelsB) { + auto Eq = [&](const RelTy &RA, const RelTy &RB) { + // The two sections must be identical. + SymbolBody &SA = A->getFile()->getRelocTargetSym(RA); + SymbolBody &SB = B->getFile()->getRelocTargetSym(RB); if (&SA == &SB) - continue; + return true; - // Or, the symbols should be pointing to the same section - // in terms of the group ID. + // Or, the two sections must be in the same equivalence class. auto *DA = dyn_cast<DefinedRegular<ELFT>>(&SA); auto *DB = dyn_cast<DefinedRegular<ELFT>>(&SB); if (!DA || !DB) return false; if (DA->Value != DB->Value) return false; - InputSection<ELFT> *X = dyn_cast<InputSection<ELFT>>(DA->Section); - InputSection<ELFT> *Y = dyn_cast<InputSection<ELFT>>(DB->Section); - if (X && Y && X->GroupId && X->GroupId == Y->GroupId) - continue; - return false; - } - return true; + + auto *X = dyn_cast<InputSection<ELFT>>(DA->Section); + auto *Y = dyn_cast<InputSection<ELFT>>(DB->Section); + if (!X || !Y) + return false; + + // Ineligible sections are in the special equivalence class 0. + // They can never be the same in terms of the equivalence class. + if (X->Class[Current] == 0) + return false; + + return X->Class[Current] == Y->Class[Current]; + }; + + return std::equal(RelsA.begin(), RelsA.end(), RelsB.begin(), Eq); } // Compare "moving" part of two InputSections, namely relocation targets. template <class ELFT> bool ICF<ELFT>::equalsVariable(const InputSection<ELFT> *A, const InputSection<ELFT> *B) { - for (size_t I = 0, E = A->RelocSections.size(); I != E; ++I) { - const Elf_Shdr *RA = A->RelocSections[I]; - const Elf_Shdr *RB = B->RelocSections[I]; - ELFFile<ELFT> &FileA = A->File->getObj(); - ELFFile<ELFT> &FileB = B->File->getObj(); - if (RA->sh_type == SHT_RELA) { - if (!variableEq(A, B, FileA.relas(RA), FileB.relas(RB))) - return false; - } else { - if (!variableEq(A, B, FileA.rels(RA), FileB.rels(RB))) - return false; - } + if (A->AreRelocsRela) + return variableEq(A, A->relas(), B, B->relas()); + return variableEq(A, A->rels(), B, B->rels()); +} + +template <class ELFT> size_t ICF<ELFT>::findBoundary(size_t Begin, size_t End) { + uint32_t Class = Sections[Begin]->Class[Current]; + for (size_t I = Begin + 1; I < End; ++I) + if (Class != Sections[I]->Class[Current]) + return I; + return End; +} + +// Sections in the same equivalence class are contiguous in Sections +// vector. Therefore, Sections vector can be considered as contiguous +// groups of sections, grouped by the class. +// +// This function calls Fn on every group that starts within [Begin, End). +// Note that a group must starts in that range but doesn't necessarily +// have to end before End. +template <class ELFT> +void ICF<ELFT>::forEachClassRange(size_t Begin, size_t End, + std::function<void(size_t, size_t)> Fn) { + if (Begin > 0) + Begin = findBoundary(Begin - 1, End); + + while (Begin < End) { + size_t Mid = findBoundary(Begin, Sections.size()); + Fn(Begin, Mid); + Begin = Mid; } - return true; +} + +// Call Fn on each equivalence class. +template <class ELFT> +void ICF<ELFT>::forEachClass(std::function<void(size_t, size_t)> Fn) { + // If threading is disabled or the number of sections are + // too small to use threading, call Fn sequentially. + if (!Config->Threads || Sections.size() < 1024) { + forEachClassRange(0, Sections.size(), Fn); + ++Cnt; + return; + } + + Current = Cnt % 2; + Next = (Cnt + 1) % 2; + + // Split sections into 256 shards and call Fn in parallel. + size_t NumShards = 256; + size_t Step = Sections.size() / NumShards; + forLoop(0, NumShards, + [&](size_t I) { forEachClassRange(I * Step, (I + 1) * Step, Fn); }); + forEachClassRange(Step * NumShards, Sections.size(), Fn); + ++Cnt; } // The main function of ICF. template <class ELFT> void ICF<ELFT>::run() { - // Initially, we use hash values as section group IDs. Therefore, - // if two sections have the same ID, they are likely (but not - // guaranteed) to have the same static contents in terms of ICF. - std::vector<InputSection<ELFT> *> V = getSections(); - for (InputSection<ELFT> *S : V) - // Set MSB on to avoid collisions with serial group IDs - S->GroupId = getHash(S) | (uint64_t(1) << 63); - - // From now on, sections in V are ordered so that sections in - // the same group are consecutive in the vector. - std::stable_sort(V.begin(), V.end(), + // Collect sections to merge. + for (InputSectionBase<ELFT> *Sec : Symtab<ELFT>::X->Sections) + if (auto *S = dyn_cast<InputSection<ELFT>>(Sec)) + if (isEligible(S)) + Sections.push_back(S); + + // Initially, we use hash values to partition sections. + for (InputSection<ELFT> *S : Sections) + // Set MSB to 1 to avoid collisions with non-hash IDs. + S->Class[0] = getHash(S) | (1 << 31); + + // From now on, sections in Sections vector are ordered so that sections + // in the same equivalence class are consecutive in the vector. + std::stable_sort(Sections.begin(), Sections.end(), [](InputSection<ELFT> *A, InputSection<ELFT> *B) { - return A->GroupId < B->GroupId; + return A->Class[0] < B->Class[0]; }); // Compare static contents and assign unique IDs for each static content. - forEachGroup(V, equalsConstant); + forEachClass([&](size_t Begin, size_t End) { segregate(Begin, End, true); }); - // Split groups by comparing relocations until we get a convergence. - int Cnt = 1; - for (;;) { - ++Cnt; - uint64_t Id = NextId; - forEachGroup(V, equalsVariable); - if (Id == NextId) - break; - } - log("ICF needed " + Twine(Cnt) + " iterations."); - - // Merge sections in the same group. - for (auto I = V.begin(), E = V.end(); I != E;) { - InputSection<ELFT> *Head = *I++; - auto Bound = std::find_if(I, E, [&](InputSection<ELFT> *S) { - return Head->GroupId != S->GroupId; - }); - if (I == Bound) - continue; - log("selected " + Head->getSectionName()); - while (I != Bound) { - InputSection<ELFT> *S = *I++; - log(" removed " + S->getSectionName()); - Head->replace(S); + // Split groups by comparing relocations until convergence is obtained. + do { + Repeat = false; + forEachClass( + [&](size_t Begin, size_t End) { segregate(Begin, End, false); }); + } while (Repeat); + + log("ICF needed " + Twine(Cnt) + " iterations"); + + // Merge sections by the equivalence class. + forEachClass([&](size_t Begin, size_t End) { + if (End - Begin == 1) + return; + + log("selected " + Sections[Begin]->Name); + for (size_t I = Begin + 1; I < End; ++I) { + log(" removed " + Sections[I]->Name); + Sections[Begin]->replace(Sections[I]); } - } + }); } // ICF entry point function. diff --git a/ELF/InputFiles.cpp b/ELF/InputFiles.cpp index 426d9c39715d..2a8659921463 100644 --- a/ELF/InputFiles.cpp +++ b/ELF/InputFiles.cpp @@ -11,13 +11,20 @@ #include "Driver.h" #include "Error.h" #include "InputSection.h" +#include "LinkerScript.h" +#include "Memory.h" #include "SymbolTable.h" #include "Symbols.h" +#include "SyntheticSections.h" #include "llvm/ADT/STLExtras.h" -#include "llvm/Bitcode/ReaderWriter.h" +#include "llvm/Bitcode/BitcodeReader.h" #include "llvm/CodeGen/Analysis.h" +#include "llvm/DebugInfo/DWARF/DWARFContext.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" +#include "llvm/LTO/LTO.h" +#include "llvm/MC/StringTableBuilder.h" +#include "llvm/Object/ELFObjectFile.h" #include "llvm/Support/Path.h" #include "llvm/Support/raw_ostream.h" @@ -29,8 +36,68 @@ using namespace llvm::sys::fs; using namespace lld; using namespace lld::elf; +namespace { +// In ELF object file all section addresses are zero. If we have multiple +// .text sections (when using -ffunction-section or comdat group) then +// LLVM DWARF parser will not be able to parse .debug_line correctly, unless +// we assign each section some unique address. This callback method assigns +// each section an address equal to its offset in ELF object file. +class ObjectInfo : public LoadedObjectInfo { +public: + uint64_t getSectionLoadAddress(const object::SectionRef &Sec) const override { + return static_cast<const ELFSectionRef &>(Sec).getOffset(); + } + std::unique_ptr<LoadedObjectInfo> clone() const override { + return std::unique_ptr<LoadedObjectInfo>(); + } +}; +} + +template <class ELFT> void elf::ObjectFile<ELFT>::initializeDwarfLine() { + std::unique_ptr<object::ObjectFile> Obj = + check(object::ObjectFile::createObjectFile(this->MB), + "createObjectFile failed"); + + ObjectInfo ObjInfo; + DWARFContextInMemory Dwarf(*Obj, &ObjInfo); + DwarfLine.reset(new DWARFDebugLine(&Dwarf.getLineSection().Relocs)); + DataExtractor LineData(Dwarf.getLineSection().Data, + ELFT::TargetEndianness == support::little, + ELFT::Is64Bits ? 8 : 4); + + // The second parameter is offset in .debug_line section + // for compilation unit (CU) of interest. We have only one + // CU (object file), so offset is always 0. + DwarfLine->getOrParseLineTable(LineData, 0); +} + +// Returns source line information for a given offset +// using DWARF debug info. +template <class ELFT> +std::string elf::ObjectFile<ELFT>::getLineInfo(InputSectionBase<ELFT> *S, + uintX_t Offset) { + if (!DwarfLine) + initializeDwarfLine(); + + // The offset to CU is 0. + const DWARFDebugLine::LineTable *Tbl = DwarfLine->getLineTable(0); + if (!Tbl) + return ""; + + // Use fake address calcuated by adding section file offset and offset in + // section. See comments for ObjectInfo class. + DILineInfo Info; + Tbl->getFileLineInfoForAddress( + S->Offset + Offset, nullptr, + DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath, Info); + if (Info.Line == 0) + return ""; + return convertToUnixPathSeparator(Info.FileName) + ":" + + std::to_string(Info.Line); +} + // Returns "(internal)", "foo.a(bar.o)" or "baz.o". -std::string elf::getFilename(const InputFile *F) { +std::string elf::toString(const InputFile *F) { if (!F) return "(internal)"; if (!F->ArchiveName.empty()) @@ -38,15 +105,6 @@ std::string elf::getFilename(const InputFile *F) { return F->getName(); } -template <class ELFT> -static ELFFile<ELFT> createELFObj(MemoryBufferRef MB) { - std::error_code EC; - ELFFile<ELFT> F(MB.getBuffer(), EC); - if (EC) - error(EC, "failed to read " + MB.getBufferIdentifier()); - return F; -} - template <class ELFT> static ELFKind getELFKind() { if (ELFT::TargetEndianness == support::little) return ELFT::Is64Bits ? ELF64LEKind : ELF32LEKind; @@ -54,41 +112,31 @@ template <class ELFT> static ELFKind getELFKind() { } template <class ELFT> -ELFFileBase<ELFT>::ELFFileBase(Kind K, MemoryBufferRef MB) - : InputFile(K, MB), ELFObj(createELFObj<ELFT>(MB)) { +ELFFileBase<ELFT>::ELFFileBase(Kind K, MemoryBufferRef MB) : InputFile(K, MB) { EKind = getELFKind<ELFT>(); - EMachine = ELFObj.getHeader()->e_machine; + EMachine = getObj().getHeader()->e_machine; + OSABI = getObj().getHeader()->e_ident[llvm::ELF::EI_OSABI]; } template <class ELFT> -typename ELFT::SymRange ELFFileBase<ELFT>::getElfSymbols(bool OnlyGlobals) { - if (!Symtab) - return Elf_Sym_Range(nullptr, nullptr); - Elf_Sym_Range Syms = ELFObj.symbols(Symtab); - uint32_t NumSymbols = std::distance(Syms.begin(), Syms.end()); - uint32_t FirstNonLocal = Symtab->sh_info; - if (FirstNonLocal > NumSymbols) - fatal(getFilename(this) + ": invalid sh_info in symbol table"); - - if (OnlyGlobals) - return makeArrayRef(Syms.begin() + FirstNonLocal, Syms.end()); - return makeArrayRef(Syms.begin(), Syms.end()); +typename ELFT::SymRange ELFFileBase<ELFT>::getGlobalSymbols() { + return makeArrayRef(Symbols.begin() + FirstNonLocal, Symbols.end()); } template <class ELFT> uint32_t ELFFileBase<ELFT>::getSectionIndex(const Elf_Sym &Sym) const { - uint32_t I = Sym.st_shndx; - if (I == ELF::SHN_XINDEX) - return ELFObj.getExtendedSymbolTableIndex(&Sym, Symtab, SymtabSHNDX); - if (I >= ELF::SHN_LORESERVE) - return 0; - return I; + return check(getObj().getSectionIndex(&Sym, Symbols, SymtabSHNDX)); } -template <class ELFT> void ELFFileBase<ELFT>::initStringTable() { - if (!Symtab) - return; - StringTable = check(ELFObj.getStringTableForSymtab(*Symtab)); +template <class ELFT> +void ELFFileBase<ELFT>::initSymtab(ArrayRef<Elf_Shdr> Sections, + const Elf_Shdr *Symtab) { + FirstNonLocal = Symtab->sh_info; + Symbols = check(getObj().symbols(Symtab)); + if (FirstNonLocal == 0 || FirstNonLocal > Symbols.size()) + fatal(toString(this) + ": invalid sh_info in symbol table"); + + StringTable = check(getObj().getStringTableForSymtab(*Symtab, Sections)); } template <class ELFT> @@ -97,37 +145,25 @@ elf::ObjectFile<ELFT>::ObjectFile(MemoryBufferRef M) template <class ELFT> ArrayRef<SymbolBody *> elf::ObjectFile<ELFT>::getNonLocalSymbols() { - if (!this->Symtab) - return this->SymbolBodies; - uint32_t FirstNonLocal = this->Symtab->sh_info; - return makeArrayRef(this->SymbolBodies).slice(FirstNonLocal); + return makeArrayRef(this->SymbolBodies).slice(this->FirstNonLocal); } template <class ELFT> ArrayRef<SymbolBody *> elf::ObjectFile<ELFT>::getLocalSymbols() { - if (!this->Symtab) + if (this->SymbolBodies.empty()) return this->SymbolBodies; - uint32_t FirstNonLocal = this->Symtab->sh_info; - return makeArrayRef(this->SymbolBodies).slice(1, FirstNonLocal - 1); + return makeArrayRef(this->SymbolBodies).slice(1, this->FirstNonLocal - 1); } template <class ELFT> ArrayRef<SymbolBody *> elf::ObjectFile<ELFT>::getSymbols() { - if (!this->Symtab) + if (this->SymbolBodies.empty()) return this->SymbolBodies; return makeArrayRef(this->SymbolBodies).slice(1); } -template <class ELFT> uint32_t elf::ObjectFile<ELFT>::getMipsGp0() const { - if (ELFT::Is64Bits && MipsOptions && MipsOptions->Reginfo) - return MipsOptions->Reginfo->ri_gp_value; - if (!ELFT::Is64Bits && MipsReginfo && MipsReginfo->Reginfo) - return MipsReginfo->Reginfo->ri_gp_value; - return 0; -} - template <class ELFT> -void elf::ObjectFile<ELFT>::parse(DenseSet<StringRef> &ComdatGroups) { +void elf::ObjectFile<ELFT>::parse(DenseSet<CachedHashStringRef> &ComdatGroups) { // Read section and symbol tables. initializeSections(ComdatGroups); initializeSymbols(); @@ -137,22 +173,25 @@ void elf::ObjectFile<ELFT>::parse(DenseSet<StringRef> &ComdatGroups) { // They are identified and deduplicated by group name. This function // returns a group name. template <class ELFT> -StringRef elf::ObjectFile<ELFT>::getShtGroupSignature(const Elf_Shdr &Sec) { - const ELFFile<ELFT> &Obj = this->ELFObj; - const Elf_Shdr *Symtab = check(Obj.getSection(Sec.sh_link)); - const Elf_Sym *Sym = Obj.getSymbol(Symtab, Sec.sh_info); - StringRef Strtab = check(Obj.getStringTableForSymtab(*Symtab)); - return check(Sym->getName(Strtab)); +StringRef +elf::ObjectFile<ELFT>::getShtGroupSignature(ArrayRef<Elf_Shdr> Sections, + const Elf_Shdr &Sec) { + if (this->Symbols.empty()) + this->initSymtab(Sections, + check(object::getSection<ELFT>(Sections, Sec.sh_link))); + const Elf_Sym *Sym = + check(object::getSymbol<ELFT>(this->Symbols, Sec.sh_info)); + return check(Sym->getName(this->StringTable)); } template <class ELFT> ArrayRef<typename elf::ObjectFile<ELFT>::Elf_Word> elf::ObjectFile<ELFT>::getShtGroupEntries(const Elf_Shdr &Sec) { - const ELFFile<ELFT> &Obj = this->ELFObj; + const ELFFile<ELFT> &Obj = this->getObj(); ArrayRef<Elf_Word> Entries = check(Obj.template getSectionContentsAsArray<Elf_Word>(&Sec)); if (Entries.empty() || Entries[0] != GRP_COMDAT) - fatal(getFilename(this) + ": unsupported SHT_GROUP format"); + fatal(toString(this) + ": unsupported SHT_GROUP format"); return Entries.slice(1); } @@ -163,15 +202,39 @@ bool elf::ObjectFile<ELFT>::shouldMerge(const Elf_Shdr &Sec) { if (Config->Optimize == 0) return false; + // Do not merge sections if generating a relocatable object. It makes + // the code simpler because we do not need to update relocation addends + // to reflect changes introduced by merging. Instead of that we write + // such "merge" sections into separate OutputSections and keep SHF_MERGE + // / SHF_STRINGS flags and sh_entsize value to be able to perform merging + // later during a final linking. + if (Config->Relocatable) + return false; + + // A mergeable section with size 0 is useless because they don't have + // any data to merge. A mergeable string section with size 0 can be + // argued as invalid because it doesn't end with a null character. + // We'll avoid a mess by handling them as if they were non-mergeable. + if (Sec.sh_size == 0) + return false; + + // Check for sh_entsize. The ELF spec is not clear about the zero + // sh_entsize. It says that "the member [sh_entsize] contains 0 if + // the section does not hold a table of fixed-size entries". We know + // that Rust 1.13 produces a string mergeable section with a zero + // sh_entsize. Here we just accept it rather than being picky about it. + uintX_t EntSize = Sec.sh_entsize; + if (EntSize == 0) + return false; + if (Sec.sh_size % EntSize) + fatal(toString(this) + + ": SHF_MERGE section size must be a multiple of sh_entsize"); + uintX_t Flags = Sec.sh_flags; if (!(Flags & SHF_MERGE)) return false; if (Flags & SHF_WRITE) - fatal(getFilename(this) + ": writable SHF_MERGE section is not supported"); - uintX_t EntSize = Sec.sh_entsize; - if (!EntSize || Sec.sh_size % EntSize) - fatal(getFilename(this) + - ": SHF_MERGE section size must be a multiple of sh_entsize"); + fatal(toString(this) + ": writable SHF_MERGE section is not supported"); // Don't try to merge if the alignment is larger than the sh_entsize and this // is not SHF_STRINGS. @@ -187,74 +250,61 @@ bool elf::ObjectFile<ELFT>::shouldMerge(const Elf_Shdr &Sec) { template <class ELFT> void elf::ObjectFile<ELFT>::initializeSections( - DenseSet<StringRef> &ComdatGroups) { - uint64_t Size = this->ELFObj.getNumSections(); + DenseSet<CachedHashStringRef> &ComdatGroups) { + ArrayRef<Elf_Shdr> ObjSections = check(this->getObj().sections()); + const ELFFile<ELFT> &Obj = this->getObj(); + uint64_t Size = ObjSections.size(); Sections.resize(Size); unsigned I = -1; - const ELFFile<ELFT> &Obj = this->ELFObj; - for (const Elf_Shdr &Sec : Obj.sections()) { + StringRef SectionStringTable = check(Obj.getSectionStringTable(ObjSections)); + for (const Elf_Shdr &Sec : ObjSections) { ++I; if (Sections[I] == &InputSection<ELFT>::Discarded) continue; + // SHF_EXCLUDE'ed sections are discarded by the linker. However, + // if -r is given, we'll let the final link discard such sections. + // This is compatible with GNU. + if ((Sec.sh_flags & SHF_EXCLUDE) && !Config->Relocatable) { + Sections[I] = &InputSection<ELFT>::Discarded; + continue; + } + switch (Sec.sh_type) { case SHT_GROUP: Sections[I] = &InputSection<ELFT>::Discarded; - if (ComdatGroups.insert(getShtGroupSignature(Sec)).second) + if (ComdatGroups.insert(CachedHashStringRef( + getShtGroupSignature(ObjSections, Sec))) + .second) continue; for (uint32_t SecIndex : getShtGroupEntries(Sec)) { if (SecIndex >= Size) - fatal(getFilename(this) + ": invalid section index in group: " + + fatal(toString(this) + ": invalid section index in group: " + Twine(SecIndex)); Sections[SecIndex] = &InputSection<ELFT>::Discarded; } break; case SHT_SYMTAB: - this->Symtab = &Sec; + this->initSymtab(ObjSections, &Sec); break; case SHT_SYMTAB_SHNDX: - this->SymtabSHNDX = check(Obj.getSHNDXTable(Sec)); + this->SymtabSHNDX = check(Obj.getSHNDXTable(Sec, ObjSections)); break; case SHT_STRTAB: case SHT_NULL: break; - case SHT_RELA: - case SHT_REL: { - // This section contains relocation information. - // If -r is given, we do not interpret or apply relocation - // but just copy relocation sections to output. - if (Config->Relocatable) { - Sections[I] = new (IAlloc.Allocate()) InputSection<ELFT>(this, &Sec); - break; - } - - // Find the relocation target section and associate this - // section with it. - InputSectionBase<ELFT> *Target = getRelocTarget(Sec); - if (!Target) - break; - if (auto *S = dyn_cast<InputSection<ELFT>>(Target)) { - S->RelocSections.push_back(&Sec); - break; - } - if (auto *S = dyn_cast<EhInputSection<ELFT>>(Target)) { - if (S->RelocSection) - fatal( - getFilename(this) + - ": multiple relocation sections to .eh_frame are not supported"); - S->RelocSection = &Sec; - break; - } - fatal(getFilename(this) + - ": relocations pointing to SHF_MERGE are not supported"); - } - case SHT_ARM_ATTRIBUTES: - // FIXME: ARM meta-data section. At present attributes are ignored, - // they can be used to reason about object compatibility. - Sections[I] = &InputSection<ELFT>::Discarded; - break; default: - Sections[I] = createInputSection(Sec); + Sections[I] = createInputSection(Sec, SectionStringTable); + } + + // .ARM.exidx sections have a reverse dependency on the InputSection they + // have a SHF_LINK_ORDER dependency, this is identified by the sh_link. + if (Sec.sh_flags & SHF_LINK_ORDER) { + if (Sec.sh_link >= Sections.size()) + fatal(toString(this) + ": invalid sh_link index: " + + Twine(Sec.sh_link)); + auto *IS = cast<InputSection<ELFT>>(Sections[Sec.sh_link]); + IS->DependentSection = Sections[I]; } } } @@ -264,8 +314,7 @@ InputSectionBase<ELFT> * elf::ObjectFile<ELFT>::getRelocTarget(const Elf_Shdr &Sec) { uint32_t Idx = Sec.sh_info; if (Idx >= Sections.size()) - fatal(getFilename(this) + ": invalid relocated section index: " + - Twine(Idx)); + fatal(toString(this) + ": invalid relocated section index: " + Twine(Idx)); InputSectionBase<ELFT> *Target = Sections[Idx]; // Strictly speaking, a relocation section must be included in the @@ -275,14 +324,65 @@ elf::ObjectFile<ELFT>::getRelocTarget(const Elf_Shdr &Sec) { return nullptr; if (!Target) - fatal(getFilename(this) + ": unsupported relocation reference"); + fatal(toString(this) + ": unsupported relocation reference"); return Target; } template <class ELFT> InputSectionBase<ELFT> * -elf::ObjectFile<ELFT>::createInputSection(const Elf_Shdr &Sec) { - StringRef Name = check(this->ELFObj.getSectionName(&Sec)); +elf::ObjectFile<ELFT>::createInputSection(const Elf_Shdr &Sec, + StringRef SectionStringTable) { + StringRef Name = + check(this->getObj().getSectionName(&Sec, SectionStringTable)); + + switch (Sec.sh_type) { + case SHT_ARM_ATTRIBUTES: + // FIXME: ARM meta-data section. Retain the first attribute section + // we see. The eglibc ARM dynamic loaders require the presence of an + // attribute section for dlopen to work. + // In a full implementation we would merge all attribute sections. + if (In<ELFT>::ARMAttributes == nullptr) { + In<ELFT>::ARMAttributes = make<InputSection<ELFT>>(this, &Sec, Name); + return In<ELFT>::ARMAttributes; + } + return &InputSection<ELFT>::Discarded; + case SHT_RELA: + case SHT_REL: { + // This section contains relocation information. + // If -r is given, we do not interpret or apply relocation + // but just copy relocation sections to output. + if (Config->Relocatable) + return make<InputSection<ELFT>>(this, &Sec, Name); + + // Find the relocation target section and associate this + // section with it. + InputSectionBase<ELFT> *Target = getRelocTarget(Sec); + if (!Target) + return nullptr; + if (Target->FirstRelocation) + fatal(toString(this) + + ": multiple relocation sections to one section are not supported"); + if (!isa<InputSection<ELFT>>(Target) && !isa<EhInputSection<ELFT>>(Target)) + fatal(toString(this) + + ": relocations pointing to SHF_MERGE are not supported"); + + size_t NumRelocations; + if (Sec.sh_type == SHT_RELA) { + ArrayRef<Elf_Rela> Rels = check(this->getObj().relas(&Sec)); + Target->FirstRelocation = Rels.begin(); + NumRelocations = Rels.size(); + Target->AreRelocsRela = true; + } else { + ArrayRef<Elf_Rel> Rels = check(this->getObj().rels(&Sec)); + Target->FirstRelocation = Rels.begin(); + NumRelocations = Rels.size(); + Target->AreRelocsRela = false; + } + assert(isUInt<31>(NumRelocations)); + Target->NumRelocations = NumRelocations; + return nullptr; + } + } // .note.GNU-stack is a marker section to control the presence of // PT_GNU_STACK segment in outputs. Since the presence of the segment @@ -296,39 +396,23 @@ elf::ObjectFile<ELFT>::createInputSection(const Elf_Shdr &Sec) { return &InputSection<ELFT>::Discarded; } - if (Config->StripDebug && Name.startswith(".debug")) + if (Config->Strip != StripPolicy::None && Name.startswith(".debug")) return &InputSection<ELFT>::Discarded; - // A MIPS object file has a special sections that contain register - // usage info, which need to be handled by the linker specially. - if (Config->EMachine == EM_MIPS) { - if (Name == ".reginfo") { - MipsReginfo.reset(new MipsReginfoInputSection<ELFT>(this, &Sec)); - return MipsReginfo.get(); - } - if (Name == ".MIPS.options") { - MipsOptions.reset(new MipsOptionsInputSection<ELFT>(this, &Sec)); - return MipsOptions.get(); - } - } - // The linker merges EH (exception handling) frames and creates a // .eh_frame_hdr section for runtime. So we handle them with a special // class. For relocatable outputs, they are just passed through. if (Name == ".eh_frame" && !Config->Relocatable) - return new (EHAlloc.Allocate()) EhInputSection<ELFT>(this, &Sec); + return make<EhInputSection<ELFT>>(this, &Sec, Name); if (shouldMerge(Sec)) - return new (MAlloc.Allocate()) MergeInputSection<ELFT>(this, &Sec); - return new (IAlloc.Allocate()) InputSection<ELFT>(this, &Sec); + return make<MergeInputSection<ELFT>>(this, &Sec, Name); + return make<InputSection<ELFT>>(this, &Sec, Name); } template <class ELFT> void elf::ObjectFile<ELFT>::initializeSymbols() { - this->initStringTable(); - Elf_Sym_Range Syms = this->getElfSymbols(false); - uint32_t NumSymbols = std::distance(Syms.begin(), Syms.end()); - SymbolBodies.reserve(NumSymbols); - for (const Elf_Sym &Sym : Syms) + SymbolBodies.reserve(this->Symbols.size()); + for (const Elf_Sym &Sym : this->Symbols) SymbolBodies.push_back(createSymbolBody(&Sym)); } @@ -336,12 +420,23 @@ template <class ELFT> InputSectionBase<ELFT> * elf::ObjectFile<ELFT>::getSection(const Elf_Sym &Sym) const { uint32_t Index = this->getSectionIndex(Sym); - if (Index == 0) - return nullptr; - if (Index >= Sections.size() || !Sections[Index]) - fatal(getFilename(this) + ": invalid section index: " + Twine(Index)); + if (Index >= Sections.size()) + fatal(toString(this) + ": invalid section index: " + Twine(Index)); InputSectionBase<ELFT> *S = Sections[Index]; - if (S == &InputSectionBase<ELFT>::Discarded) + + // We found that GNU assembler 2.17.50 [FreeBSD] 2007-07-03 could + // generate broken objects. STT_SECTION/STT_NOTYPE symbols can be + // associated with SHT_REL[A]/SHT_SYMTAB/SHT_STRTAB sections. + // In this case it is fine for section to be null here as we do not + // allocate sections of these types. + if (!S) { + if (Index == 0 || Sym.getType() == STT_SECTION || + Sym.getType() == STT_NOTYPE) + return nullptr; + fatal(toString(this) + ": invalid section index: " + Twine(Index)); + } + + if (S == &InputSection<ELFT>::Discarded) return S; return S->Repl; } @@ -350,11 +445,26 @@ template <class ELFT> SymbolBody *elf::ObjectFile<ELFT>::createSymbolBody(const Elf_Sym *Sym) { int Binding = Sym->getBinding(); InputSectionBase<ELFT> *Sec = getSection(*Sym); + + uint8_t StOther = Sym->st_other; + uint8_t Type = Sym->getType(); + uintX_t Value = Sym->st_value; + uintX_t Size = Sym->st_size; + if (Binding == STB_LOCAL) { + if (Sym->getType() == STT_FILE) + SourceFile = check(Sym->getName(this->StringTable)); + + if (this->StringTable.size() <= Sym->st_name) + fatal(toString(this) + ": invalid symbol name offset"); + + StringRefZ Name = this->StringTable.data() + Sym->st_name; if (Sym->st_shndx == SHN_UNDEF) - return new (this->Alloc) - Undefined(Sym->st_name, Sym->st_other, Sym->getType(), this); - return new (this->Alloc) DefinedRegular<ELFT>(*Sym, Sec); + return new (BAlloc) + Undefined(Name, /*IsLocal=*/true, StOther, Type, this); + + return new (BAlloc) DefinedRegular<ELFT>(Name, /*IsLocal=*/true, StOther, + Type, Value, Size, Sec, this); } StringRef Name = check(Sym->getName(this->StringTable)); @@ -362,33 +472,38 @@ SymbolBody *elf::ObjectFile<ELFT>::createSymbolBody(const Elf_Sym *Sym) { switch (Sym->st_shndx) { case SHN_UNDEF: return elf::Symtab<ELFT>::X - ->addUndefined(Name, Binding, Sym->st_other, Sym->getType(), - /*CanOmitFromDynSym*/ false, this) + ->addUndefined(Name, /*IsLocal=*/false, Binding, StOther, Type, + /*CanOmitFromDynSym=*/false, this) ->body(); case SHN_COMMON: + if (Value == 0 || Value >= UINT32_MAX) + fatal(toString(this) + ": common symbol '" + Name + + "' has invalid alignment: " + Twine(Value)); return elf::Symtab<ELFT>::X - ->addCommon(Name, Sym->st_size, Sym->st_value, Binding, Sym->st_other, - Sym->getType(), this) + ->addCommon(Name, Size, Value, Binding, StOther, Type, this) ->body(); } switch (Binding) { default: - fatal(getFilename(this) + ": unexpected binding: " + Twine(Binding)); + fatal(toString(this) + ": unexpected binding: " + Twine(Binding)); case STB_GLOBAL: case STB_WEAK: case STB_GNU_UNIQUE: if (Sec == &InputSection<ELFT>::Discarded) return elf::Symtab<ELFT>::X - ->addUndefined(Name, Binding, Sym->st_other, Sym->getType(), - /*CanOmitFromDynSym*/ false, this) + ->addUndefined(Name, /*IsLocal=*/false, Binding, StOther, Type, + /*CanOmitFromDynSym=*/false, this) ->body(); - return elf::Symtab<ELFT>::X->addRegular(Name, *Sym, Sec)->body(); + return elf::Symtab<ELFT>::X + ->addRegular(Name, StOther, Type, Value, Size, Binding, Sec, this) + ->body(); } } template <class ELFT> void ArchiveFile::parse() { - File = check(Archive::create(MB), "failed to parse archive"); + File = check(Archive::create(MB), + MB.getBufferIdentifier() + ": failed to parse archive"); // Read the symbol table to construct Lazy objects. for (const Archive::Symbol &Sym : File->symbols()) @@ -396,13 +511,14 @@ template <class ELFT> void ArchiveFile::parse() { } // Returns a buffer pointing to a member file containing a given symbol. -MemoryBufferRef ArchiveFile::getMember(const Archive::Symbol *Sym) { +std::pair<MemoryBufferRef, uint64_t> +ArchiveFile::getMember(const Archive::Symbol *Sym) { Archive::Child C = check(Sym->getMember(), "could not get the member for symbol " + Sym->getName()); if (!Seen.insert(C.getChildOffset()).second) - return MemoryBufferRef(); + return {MemoryBufferRef(), 0}; MemoryBufferRef Ret = check(C.getMemoryBufferRef(), @@ -412,8 +528,9 @@ MemoryBufferRef ArchiveFile::getMember(const Archive::Symbol *Sym) { if (C.getParent()->isThin() && Driver->Cpio) Driver->Cpio->append(relativeToRoot(check(C.getFullName())), Ret.getBuffer()); - - return Ret; + if (C.getParent()->isThin()) + return {Ret, 0}; + return {Ret, C.getChildOffset()}; } template <class ELFT> @@ -423,32 +540,29 @@ SharedFile<ELFT>::SharedFile(MemoryBufferRef M) template <class ELFT> const typename ELFT::Shdr * SharedFile<ELFT>::getSection(const Elf_Sym &Sym) const { - uint32_t Index = this->getSectionIndex(Sym); - if (Index == 0) - return nullptr; - return check(this->ELFObj.getSection(Index)); + return check( + this->getObj().getSection(&Sym, this->Symbols, this->SymtabSHNDX)); } // Partially parse the shared object file so that we can call // getSoName on this object. template <class ELFT> void SharedFile<ELFT>::parseSoName() { - typedef typename ELFT::Dyn Elf_Dyn; - typedef typename ELFT::uint uintX_t; const Elf_Shdr *DynamicSec = nullptr; - const ELFFile<ELFT> Obj = this->ELFObj; - for (const Elf_Shdr &Sec : Obj.sections()) { + const ELFFile<ELFT> Obj = this->getObj(); + ArrayRef<Elf_Shdr> Sections = check(Obj.sections()); + for (const Elf_Shdr &Sec : Sections) { switch (Sec.sh_type) { default: continue; case SHT_DYNSYM: - this->Symtab = &Sec; + this->initSymtab(Sections, &Sec); break; case SHT_DYNAMIC: DynamicSec = &Sec; break; case SHT_SYMTAB_SHNDX: - this->SymtabSHNDX = check(Obj.getSHNDXTable(Sec)); + this->SymtabSHNDX = check(Obj.getSHNDXTable(Sec, Sections)); break; case SHT_GNU_versym: this->VersymSec = &Sec; @@ -459,20 +573,25 @@ template <class ELFT> void SharedFile<ELFT>::parseSoName() { } } - this->initStringTable(); + if (this->VersymSec && this->Symbols.empty()) + error("SHT_GNU_versym should be associated with symbol table"); + + // DSOs are identified by soname, and they usually contain + // DT_SONAME tag in their header. But if they are missing, + // filenames are used as default sonames. SoName = sys::path::filename(this->getName()); if (!DynamicSec) return; - auto *Begin = - reinterpret_cast<const Elf_Dyn *>(Obj.base() + DynamicSec->sh_offset); - const Elf_Dyn *End = Begin + DynamicSec->sh_size / sizeof(Elf_Dyn); - for (const Elf_Dyn &Dyn : make_range(Begin, End)) { + ArrayRef<Elf_Dyn> Arr = + check(Obj.template getSectionContentsAsArray<Elf_Dyn>(DynamicSec), + toString(this) + ": getSectionContentsAsArray failed"); + for (const Elf_Dyn &Dyn : Arr) { if (Dyn.d_tag == DT_SONAME) { uintX_t Val = Dyn.getVal(); if (Val >= this->StringTable.size()) - fatal(getFilename(this) + ": invalid DT_SONAME entry"); + fatal(toString(this) + ": invalid DT_SONAME entry"); SoName = StringRef(this->StringTable.data() + Val); return; } @@ -494,9 +613,9 @@ SharedFile<ELFT>::parseVerdefs(const Elf_Versym *&Versym) { return Verdefs; // The location of the first global versym entry. - Versym = reinterpret_cast<const Elf_Versym *>(this->ELFObj.base() + - VersymSec->sh_offset) + - this->Symtab->sh_info; + const char *Base = this->MB.getBuffer().data(); + Versym = reinterpret_cast<const Elf_Versym *>(Base + VersymSec->sh_offset) + + this->FirstNonLocal; // We cannot determine the largest verdef identifier without inspecting // every Elf_Verdef, but both bfd and gold assign verdef identifiers @@ -507,7 +626,7 @@ SharedFile<ELFT>::parseVerdefs(const Elf_Versym *&Versym) { // Build the Verdefs array by following the chain of Elf_Verdef objects // from the start of the .gnu.version_d section. - const uint8_t *Verdef = this->ELFObj.base() + VerdefSec->sh_offset; + const char *Verdef = Base + VerdefSec->sh_offset; for (unsigned I = 0; I != VerdefCount; ++I) { auto *CurVerdef = reinterpret_cast<const Elf_Verdef *>(Verdef); Verdef += CurVerdef->vd_next; @@ -526,7 +645,7 @@ template <class ELFT> void SharedFile<ELFT>::parseRest() { const Elf_Versym *Versym = nullptr; std::vector<const Elf_Verdef *> Verdefs = parseVerdefs(Versym); - Elf_Sym_Range Syms = this->getElfSymbols(true); + Elf_Sym_Range Syms = this->getGlobalSymbols(); for (const Elf_Sym &Sym : Syms) { unsigned VersymIndex = 0; if (Versym) { @@ -552,18 +671,16 @@ template <class ELFT> void SharedFile<ELFT>::parseRest() { } } -static ELFKind getELFKind(MemoryBufferRef MB) { - std::string TripleStr = getBitcodeTargetTriple(MB, Driver->Context); - Triple TheTriple(TripleStr); - bool Is64Bits = TheTriple.isArch64Bit(); - if (TheTriple.isLittleEndian()) - return Is64Bits ? ELF64LEKind : ELF32LEKind; - return Is64Bits ? ELF64BEKind : ELF32BEKind; +static ELFKind getBitcodeELFKind(MemoryBufferRef MB) { + Triple T(check(getBitcodeTargetTriple(MB))); + if (T.isLittleEndian()) + return T.isArch64Bit() ? ELF64LEKind : ELF32LEKind; + return T.isArch64Bit() ? ELF64BEKind : ELF32BEKind; } -static uint8_t getMachineKind(MemoryBufferRef MB) { - std::string TripleStr = getBitcodeTargetTriple(MB, Driver->Context); - switch (Triple(TripleStr).getArch()) { +static uint8_t getBitcodeMachineKind(MemoryBufferRef MB) { + Triple T(check(getBitcodeTargetTriple(MB))); + switch (T.getArch()) { case Triple::aarch64: return EM_AARCH64; case Triple::arm: @@ -578,23 +695,22 @@ static uint8_t getMachineKind(MemoryBufferRef MB) { case Triple::ppc64: return EM_PPC64; case Triple::x86: - return EM_386; + return T.isOSIAMCU() ? EM_IAMCU : EM_386; case Triple::x86_64: return EM_X86_64; default: fatal(MB.getBufferIdentifier() + - ": could not infer e_machine from bitcode target triple " + - TripleStr); + ": could not infer e_machine from bitcode target triple " + T.str()); } } BitcodeFile::BitcodeFile(MemoryBufferRef MB) : InputFile(BitcodeKind, MB) { - EKind = getELFKind(MB); - EMachine = getMachineKind(MB); + EKind = getBitcodeELFKind(MB); + EMachine = getBitcodeMachineKind(MB); } -static uint8_t getGvVisibility(const GlobalValue *GV) { - switch (GV->getVisibility()) { +static uint8_t mapVisibility(GlobalValue::VisibilityTypes GvVisibility) { + switch (GvVisibility) { case GlobalValue::DefaultVisibility: return STV_DEFAULT; case GlobalValue::HiddenVisibility: @@ -606,124 +722,134 @@ static uint8_t getGvVisibility(const GlobalValue *GV) { } template <class ELFT> -Symbol *BitcodeFile::createSymbol(const DenseSet<const Comdat *> &KeptComdats, - const IRObjectFile &Obj, - const BasicSymbolRef &Sym) { - const GlobalValue *GV = Obj.getSymbolGV(Sym.getRawDataRefImpl()); - - SmallString<64> Name; - raw_svector_ostream OS(Name); - Sym.printName(OS); - StringRef NameRef = Saver.save(StringRef(Name)); - - uint32_t Flags = Sym.getFlags(); - bool IsWeak = Flags & BasicSymbolRef::SF_Weak; - uint32_t Binding = IsWeak ? STB_WEAK : STB_GLOBAL; - - uint8_t Type = STT_NOTYPE; - bool CanOmitFromDynSym = false; - // FIXME: Expose a thread-local flag for module asm symbols. - if (GV) { - if (GV->isThreadLocal()) - Type = STT_TLS; - CanOmitFromDynSym = canBeOmittedFromSymbolTable(GV); - } - - uint8_t Visibility; - if (GV) - Visibility = getGvVisibility(GV); - else - // FIXME: Set SF_Hidden flag correctly for module asm symbols, and expose - // protected visibility. - Visibility = STV_DEFAULT; - - if (GV) - if (const Comdat *C = GV->getComdat()) - if (!KeptComdats.count(C)) - return Symtab<ELFT>::X->addUndefined(NameRef, Binding, Visibility, Type, - CanOmitFromDynSym, this); +static Symbol *createBitcodeSymbol(const std::vector<bool> &KeptComdats, + const lto::InputFile::Symbol &ObjSym, + BitcodeFile *F) { + StringRef NameRef = Saver.save(ObjSym.getName()); + uint32_t Flags = ObjSym.getFlags(); + uint32_t Binding = (Flags & BasicSymbolRef::SF_Weak) ? STB_WEAK : STB_GLOBAL; + + uint8_t Type = ObjSym.isTLS() ? STT_TLS : STT_NOTYPE; + uint8_t Visibility = mapVisibility(ObjSym.getVisibility()); + bool CanOmitFromDynSym = ObjSym.canBeOmittedFromSymbolTable(); + + int C = check(ObjSym.getComdatIndex()); + if (C != -1 && !KeptComdats[C]) + return Symtab<ELFT>::X->addUndefined(NameRef, /*IsLocal=*/false, Binding, + Visibility, Type, CanOmitFromDynSym, + F); - const Module &M = Obj.getModule(); if (Flags & BasicSymbolRef::SF_Undefined) - return Symtab<ELFT>::X->addUndefined(NameRef, Binding, Visibility, Type, - CanOmitFromDynSym, this); - if (Flags & BasicSymbolRef::SF_Common) { - // FIXME: Set SF_Common flag correctly for module asm symbols, and expose - // size and alignment. - assert(GV); - const DataLayout &DL = M.getDataLayout(); - uint64_t Size = DL.getTypeAllocSize(GV->getValueType()); - return Symtab<ELFT>::X->addCommon(NameRef, Size, GV->getAlignment(), - Binding, Visibility, STT_OBJECT, this); - } - return Symtab<ELFT>::X->addBitcode(NameRef, IsWeak, Visibility, Type, - CanOmitFromDynSym, this); -} + return Symtab<ELFT>::X->addUndefined(NameRef, /*IsLocal=*/false, Binding, + Visibility, Type, CanOmitFromDynSym, + F); + + if (Flags & BasicSymbolRef::SF_Common) + return Symtab<ELFT>::X->addCommon(NameRef, ObjSym.getCommonSize(), + ObjSym.getCommonAlignment(), Binding, + Visibility, STT_OBJECT, F); -bool BitcodeFile::shouldSkip(uint32_t Flags) { - return !(Flags & BasicSymbolRef::SF_Global) || - (Flags & BasicSymbolRef::SF_FormatSpecific); + return Symtab<ELFT>::X->addBitcode(NameRef, Binding, Visibility, Type, + CanOmitFromDynSym, F); } template <class ELFT> -void BitcodeFile::parse(DenseSet<StringRef> &ComdatGroups) { - Obj = check(IRObjectFile::create(MB, Driver->Context)); - const Module &M = Obj->getModule(); - - DenseSet<const Comdat *> KeptComdats; - for (const auto &P : M.getComdatSymbolTable()) { - StringRef N = Saver.save(P.first()); - if (ComdatGroups.insert(N).second) - KeptComdats.insert(&P.second); +void BitcodeFile::parse(DenseSet<CachedHashStringRef> &ComdatGroups) { + + // Here we pass a new MemoryBufferRef which is identified by ArchiveName + // (the fully resolved path of the archive) + member name + offset of the + // member in the archive. + // ThinLTO uses the MemoryBufferRef identifier to access its internal + // data structures and if two archives define two members with the same name, + // this causes a collision which result in only one of the objects being + // taken into consideration at LTO time (which very likely causes undefined + // symbols later in the link stage). + Obj = check(lto::InputFile::create(MemoryBufferRef( + MB.getBuffer(), Saver.save(ArchiveName + MB.getBufferIdentifier() + + utostr(OffsetInArchive))))); + + std::vector<bool> KeptComdats; + for (StringRef S : Obj->getComdatTable()) { + StringRef N = Saver.save(S); + KeptComdats.push_back(ComdatGroups.insert(CachedHashStringRef(N)).second); } - for (const BasicSymbolRef &Sym : Obj->symbols()) - if (!shouldSkip(Sym.getFlags())) - Symbols.push_back(createSymbol<ELFT>(KeptComdats, *Obj, Sym)); + for (const lto::InputFile::Symbol &ObjSym : Obj->symbols()) + Symbols.push_back(createBitcodeSymbol<ELFT>(KeptComdats, ObjSym, this)); } template <template <class> class T> -static std::unique_ptr<InputFile> createELFFile(MemoryBufferRef MB) { +static InputFile *createELFFile(MemoryBufferRef MB) { unsigned char Size; unsigned char Endian; std::tie(Size, Endian) = getElfArchType(MB.getBuffer()); if (Endian != ELFDATA2LSB && Endian != ELFDATA2MSB) - fatal("invalid data encoding: " + MB.getBufferIdentifier()); + fatal(MB.getBufferIdentifier() + ": invalid data encoding"); + + size_t BufSize = MB.getBuffer().size(); + if ((Size == ELFCLASS32 && BufSize < sizeof(Elf32_Ehdr)) || + (Size == ELFCLASS64 && BufSize < sizeof(Elf64_Ehdr))) + fatal(MB.getBufferIdentifier() + ": file is too short"); - std::unique_ptr<InputFile> Obj; + InputFile *Obj; if (Size == ELFCLASS32 && Endian == ELFDATA2LSB) - Obj.reset(new T<ELF32LE>(MB)); + Obj = make<T<ELF32LE>>(MB); else if (Size == ELFCLASS32 && Endian == ELFDATA2MSB) - Obj.reset(new T<ELF32BE>(MB)); + Obj = make<T<ELF32BE>>(MB); else if (Size == ELFCLASS64 && Endian == ELFDATA2LSB) - Obj.reset(new T<ELF64LE>(MB)); + Obj = make<T<ELF64LE>>(MB); else if (Size == ELFCLASS64 && Endian == ELFDATA2MSB) - Obj.reset(new T<ELF64BE>(MB)); + Obj = make<T<ELF64BE>>(MB); else - fatal("invalid file class: " + MB.getBufferIdentifier()); + fatal(MB.getBufferIdentifier() + ": invalid file class"); if (!Config->FirstElf) - Config->FirstElf = Obj.get(); + Config->FirstElf = Obj; return Obj; } +template <class ELFT> void BinaryFile::parse() { + StringRef Buf = MB.getBuffer(); + ArrayRef<uint8_t> Data = + makeArrayRef<uint8_t>((const uint8_t *)Buf.data(), Buf.size()); + + std::string Filename = MB.getBufferIdentifier(); + std::transform(Filename.begin(), Filename.end(), Filename.begin(), + [](char C) { return isalnum(C) ? C : '_'; }); + Filename = "_binary_" + Filename; + StringRef StartName = Saver.save(Twine(Filename) + "_start"); + StringRef EndName = Saver.save(Twine(Filename) + "_end"); + StringRef SizeName = Saver.save(Twine(Filename) + "_size"); + + auto *Section = + make<InputSection<ELFT>>(SHF_ALLOC, SHT_PROGBITS, 8, Data, ".data"); + Sections.push_back(Section); + + elf::Symtab<ELFT>::X->addRegular(StartName, STV_DEFAULT, STT_OBJECT, 0, 0, + STB_GLOBAL, Section, nullptr); + elf::Symtab<ELFT>::X->addRegular(EndName, STV_DEFAULT, STT_OBJECT, + Data.size(), 0, STB_GLOBAL, Section, + nullptr); + elf::Symtab<ELFT>::X->addRegular(SizeName, STV_DEFAULT, STT_OBJECT, + Data.size(), 0, STB_GLOBAL, nullptr, + nullptr); +} + static bool isBitcode(MemoryBufferRef MB) { using namespace sys::fs; return identify_magic(MB.getBuffer()) == file_magic::bitcode; } -std::unique_ptr<InputFile> elf::createObjectFile(MemoryBufferRef MB, - StringRef ArchiveName) { - std::unique_ptr<InputFile> F; - if (isBitcode(MB)) - F.reset(new BitcodeFile(MB)); - else - F = createELFFile<ObjectFile>(MB); +InputFile *elf::createObjectFile(MemoryBufferRef MB, StringRef ArchiveName, + uint64_t OffsetInArchive) { + InputFile *F = + isBitcode(MB) ? make<BitcodeFile>(MB) : createELFFile<ObjectFile>(MB); F->ArchiveName = ArchiveName; + F->OffsetInArchive = OffsetInArchive; return F; } -std::unique_ptr<InputFile> elf::createSharedFile(MemoryBufferRef MB) { +InputFile *elf::createSharedFile(MemoryBufferRef MB) { return createELFFile<SharedFile>(MB); } @@ -734,8 +860,7 @@ MemoryBufferRef LazyObjectFile::getBuffer() { return MB; } -template <class ELFT> -void LazyObjectFile::parse() { +template <class ELFT> void LazyObjectFile::parse() { for (StringRef Sym : getSymbols()) Symtab<ELFT>::X->addLazyObject(Sym, *this); } @@ -745,13 +870,14 @@ template <class ELFT> std::vector<StringRef> LazyObjectFile::getElfSymbols() { typedef typename ELFT::Sym Elf_Sym; typedef typename ELFT::SymRange Elf_Sym_Range; - const ELFFile<ELFT> Obj = createELFObj<ELFT>(this->MB); - for (const Elf_Shdr &Sec : Obj.sections()) { + const ELFFile<ELFT> Obj(this->MB.getBuffer()); + ArrayRef<Elf_Shdr> Sections = check(Obj.sections()); + for (const Elf_Shdr &Sec : Sections) { if (Sec.sh_type != SHT_SYMTAB) continue; - Elf_Sym_Range Syms = Obj.symbols(&Sec); + Elf_Sym_Range Syms = check(Obj.symbols(&Sec)); uint32_t FirstNonLocal = Sec.sh_info; - StringRef StringTable = check(Obj.getStringTableForSymtab(Sec)); + StringRef StringTable = check(Obj.getStringTableForSymtab(Sec, Sections)); std::vector<StringRef> V; for (const Elf_Sym &Sym : Syms.slice(FirstNonLocal)) if (Sym.st_shndx != SHN_UNDEF) @@ -762,21 +888,11 @@ template <class ELFT> std::vector<StringRef> LazyObjectFile::getElfSymbols() { } std::vector<StringRef> LazyObjectFile::getBitcodeSymbols() { - LLVMContext Context; - std::unique_ptr<IRObjectFile> Obj = - check(IRObjectFile::create(this->MB, Context)); + std::unique_ptr<lto::InputFile> Obj = check(lto::InputFile::create(this->MB)); std::vector<StringRef> V; - for (const BasicSymbolRef &Sym : Obj->symbols()) { - uint32_t Flags = Sym.getFlags(); - if (BitcodeFile::shouldSkip(Flags)) - continue; - if (Flags & BasicSymbolRef::SF_Undefined) - continue; - SmallString<64> Name; - raw_svector_ostream OS(Name); - Sym.printName(OS); - V.push_back(Saver.save(StringRef(Name))); - } + for (const lto::InputFile::Symbol &Sym : Obj->symbols()) + if (!(Sym.getFlags() & BasicSymbolRef::SF_Undefined)) + V.push_back(Saver.save(Sym.getName())); return V; } @@ -803,10 +919,10 @@ template void ArchiveFile::parse<ELF32BE>(); template void ArchiveFile::parse<ELF64LE>(); template void ArchiveFile::parse<ELF64BE>(); -template void BitcodeFile::parse<ELF32LE>(DenseSet<StringRef> &); -template void BitcodeFile::parse<ELF32BE>(DenseSet<StringRef> &); -template void BitcodeFile::parse<ELF64LE>(DenseSet<StringRef> &); -template void BitcodeFile::parse<ELF64BE>(DenseSet<StringRef> &); +template void BitcodeFile::parse<ELF32LE>(DenseSet<CachedHashStringRef> &); +template void BitcodeFile::parse<ELF32BE>(DenseSet<CachedHashStringRef> &); +template void BitcodeFile::parse<ELF64LE>(DenseSet<CachedHashStringRef> &); +template void BitcodeFile::parse<ELF64BE>(DenseSet<CachedHashStringRef> &); template void LazyObjectFile::parse<ELF32LE>(); template void LazyObjectFile::parse<ELF32BE>(); @@ -827,3 +943,8 @@ template class elf::SharedFile<ELF32LE>; template class elf::SharedFile<ELF32BE>; template class elf::SharedFile<ELF64LE>; template class elf::SharedFile<ELF64BE>; + +template void BinaryFile::parse<ELF32LE>(); +template void BinaryFile::parse<ELF32BE>(); +template void BinaryFile::parse<ELF64LE>(); +template void BinaryFile::parse<ELF64BE>(); diff --git a/ELF/InputFiles.h b/ELF/InputFiles.h index 79cb751494b3..aba1d71379b0 100644 --- a/ELF/InputFiles.h +++ b/ELF/InputFiles.h @@ -16,16 +16,24 @@ #include "Symbols.h" #include "lld/Core/LLVM.h" +#include "lld/Core/Reproduce.h" +#include "llvm/ADT/CachedHashString.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/STLExtras.h" #include "llvm/IR/Comdat.h" #include "llvm/Object/Archive.h" #include "llvm/Object/ELF.h" #include "llvm/Object/IRObjectFile.h" -#include "llvm/Support/StringSaver.h" #include <map> +namespace llvm { +class DWARFDebugLine; +namespace lto { +class InputFile; +} +} + namespace lld { namespace elf { @@ -44,6 +52,7 @@ public: LazyObjectKind, ArchiveKind, BitcodeKind, + BinaryKind, }; Kind kind() const { return FileKind; } @@ -56,10 +65,17 @@ public: // string for creating error messages. StringRef ArchiveName; + // If this file is in an archive, the member contains the offset of + // the file in the archive. Otherwise, it's just zero. We store this + // field so that we can pass it to lib/LTO in order to disambiguate + // between objects. + uint64_t OffsetInArchive; + // If this is an architecture-specific file, the following members // have ELF type (i.e. ELF{32,64}{LE,BE}) and target machine type. ELFKind EKind = ELFNoneKind; uint16_t EMachine = llvm::ELF::EM_NONE; + uint8_t OSABI = 0; protected: InputFile(Kind K, MemoryBufferRef M) : MB(M), FileKind(K) {} @@ -69,7 +85,7 @@ private: }; // Returns "(internal)", "foo.a(bar.o)" or "baz.o". -std::string getFilename(const InputFile *F); +std::string toString(const InputFile *F); template <typename ELFT> class ELFFileBase : public InputFile { public: @@ -84,37 +100,37 @@ public: return K == ObjectKind || K == SharedKind; } - const llvm::object::ELFFile<ELFT> &getObj() const { return ELFObj; } - llvm::object::ELFFile<ELFT> &getObj() { return ELFObj; } - - uint8_t getOSABI() const { - return getObj().getHeader()->e_ident[llvm::ELF::EI_OSABI]; + llvm::object::ELFFile<ELFT> getObj() const { + return llvm::object::ELFFile<ELFT>(MB.getBuffer()); } StringRef getStringTable() const { return StringTable; } uint32_t getSectionIndex(const Elf_Sym &Sym) const; - Elf_Sym_Range getElfSymbols(bool OnlyGlobals); + Elf_Sym_Range getGlobalSymbols(); protected: - llvm::object::ELFFile<ELFT> ELFObj; - const Elf_Shdr *Symtab = nullptr; + ArrayRef<Elf_Sym> Symbols; + uint32_t FirstNonLocal = 0; ArrayRef<Elf_Word> SymtabSHNDX; StringRef StringTable; - void initStringTable(); + void initSymtab(ArrayRef<Elf_Shdr> Sections, const Elf_Shdr *Symtab); }; // .o file. template <class ELFT> class ObjectFile : public ELFFileBase<ELFT> { typedef ELFFileBase<ELFT> Base; + typedef typename ELFT::Rel Elf_Rel; + typedef typename ELFT::Rela Elf_Rela; typedef typename ELFT::Sym Elf_Sym; typedef typename ELFT::Shdr Elf_Shdr; typedef typename ELFT::SymRange Elf_Sym_Range; typedef typename ELFT::Word Elf_Word; typedef typename ELFT::uint uintX_t; - StringRef getShtGroupSignature(const Elf_Shdr &Sec); + StringRef getShtGroupSignature(ArrayRef<Elf_Shdr> Sections, + const Elf_Shdr &Sec); ArrayRef<Elf_Word> getShtGroupEntries(const Elf_Shdr &Sec); public: @@ -127,40 +143,49 @@ public: ArrayRef<SymbolBody *> getNonLocalSymbols(); explicit ObjectFile(MemoryBufferRef M); - void parse(llvm::DenseSet<StringRef> &ComdatGroups); + void parse(llvm::DenseSet<llvm::CachedHashStringRef> &ComdatGroups); ArrayRef<InputSectionBase<ELFT> *> getSections() const { return Sections; } InputSectionBase<ELFT> *getSection(const Elf_Sym &Sym) const; SymbolBody &getSymbolBody(uint32_t SymbolIndex) const { + if (SymbolIndex >= SymbolBodies.size()) + fatal(toString(this) + ": invalid symbol index"); return *SymbolBodies[SymbolIndex]; } - template <typename RelT> SymbolBody &getRelocTargetSym(const RelT &Rel) const { + template <typename RelT> + SymbolBody &getRelocTargetSym(const RelT &Rel) const { uint32_t SymIndex = Rel.getSymbol(Config->Mips64EL); return getSymbolBody(SymIndex); } - const Elf_Shdr *getSymbolTable() const { return this->Symtab; }; + // Returns source line information for a given offset. + // If no information is available, returns "". + std::string getLineInfo(InputSectionBase<ELFT> *S, uintX_t Offset); - // Get MIPS GP0 value defined by this file. This value represents the gp value + // MIPS GP0 value defined by this file. This value represents the gp value // used to create the relocatable object and required to support // R_MIPS_GPREL16 / R_MIPS_GPREL32 relocations. - uint32_t getMipsGp0() const; + uint32_t MipsGp0 = 0; // The number is the offset in the string table. It will be used as the // st_name of the symbol. std::vector<std::pair<const DefinedRegular<ELFT> *, unsigned>> KeptLocalSyms; - // SymbolBodies and Thunks for sections in this file are allocated - // using this buffer. - llvm::BumpPtrAllocator Alloc; + // Name of source file obtained from STT_FILE symbol value, + // or empty string if there is no such symbol in object file + // symbol table. + StringRef SourceFile; private: - void initializeSections(llvm::DenseSet<StringRef> &ComdatGroups); + void + initializeSections(llvm::DenseSet<llvm::CachedHashStringRef> &ComdatGroups); void initializeSymbols(); + void initializeDwarfLine(); InputSectionBase<ELFT> *getRelocTarget(const Elf_Shdr &Sec); - InputSectionBase<ELFT> *createInputSection(const Elf_Shdr &Sec); + InputSectionBase<ELFT> *createInputSection(const Elf_Shdr &Sec, + StringRef SectionStringTable); bool shouldMerge(const Elf_Shdr &Sec); SymbolBody *createSymbolBody(const Elf_Sym *Sym); @@ -171,14 +196,11 @@ private: // List of all symbols referenced or defined by this file. std::vector<SymbolBody *> SymbolBodies; - // MIPS .reginfo section defined by this file. - std::unique_ptr<MipsReginfoInputSection<ELFT>> MipsReginfo; - // MIPS .MIPS.options section defined by this file. - std::unique_ptr<MipsOptionsInputSection<ELFT>> MipsOptions; - - llvm::SpecificBumpPtrAllocator<InputSection<ELFT>> IAlloc; - llvm::SpecificBumpPtrAllocator<MergeInputSection<ELFT>> MAlloc; - llvm::SpecificBumpPtrAllocator<EhInputSection<ELFT>> EHAlloc; + // Debugging information to retrieve source file and line for error + // reporting. Linker may find reasonable number of errors in a + // single object file, so we cache debugging information in order to + // parse it only once for each object file we link. + std::unique_ptr<llvm::DWARFDebugLine> DwarfLine; }; // LazyObjectFile is analogous to ArchiveFile in the sense that @@ -204,8 +226,6 @@ private: template <class ELFT> std::vector<StringRef> getElfSymbols(); std::vector<StringRef> getBitcodeSymbols(); - llvm::BumpPtrAllocator Alloc; - llvm::StringSaver Saver{Alloc}; bool Seen = false; }; @@ -216,10 +236,11 @@ public: static bool classof(const InputFile *F) { return F->kind() == ArchiveKind; } template <class ELFT> void parse(); - // Returns a memory buffer for a given symbol. An empty memory buffer + // Returns a memory buffer for a given symbol and the offset in the archive + // for the member. An empty memory buffer and an offset of zero // is returned if we have already returned the same memory buffer. // (So that we don't instantiate same members more than once.) - MemoryBufferRef getMember(const Archive::Symbol *Sym); + std::pair<MemoryBufferRef, uint64_t> getMember(const Archive::Symbol *Sym); private: std::unique_ptr<Archive> File; @@ -231,30 +252,25 @@ public: explicit BitcodeFile(MemoryBufferRef M); static bool classof(const InputFile *F) { return F->kind() == BitcodeKind; } template <class ELFT> - void parse(llvm::DenseSet<StringRef> &ComdatGroups); + void parse(llvm::DenseSet<llvm::CachedHashStringRef> &ComdatGroups); ArrayRef<Symbol *> getSymbols() { return Symbols; } - static bool shouldSkip(uint32_t Flags); - std::unique_ptr<llvm::object::IRObjectFile> Obj; + std::unique_ptr<llvm::lto::InputFile> Obj; private: std::vector<Symbol *> Symbols; - llvm::BumpPtrAllocator Alloc; - llvm::StringSaver Saver{Alloc}; - template <class ELFT> - Symbol *createSymbol(const llvm::DenseSet<const llvm::Comdat *> &KeptComdats, - const llvm::object::IRObjectFile &Obj, - const llvm::object::BasicSymbolRef &Sym); }; // .so file. template <class ELFT> class SharedFile : public ELFFileBase<ELFT> { typedef ELFFileBase<ELFT> Base; + typedef typename ELFT::Dyn Elf_Dyn; typedef typename ELFT::Shdr Elf_Shdr; typedef typename ELFT::Sym Elf_Sym; - typedef typename ELFT::Word Elf_Word; typedef typename ELFT::SymRange Elf_Sym_Range; - typedef typename ELFT::Versym Elf_Versym; typedef typename ELFT::Verdef Elf_Verdef; + typedef typename ELFT::Versym Elf_Versym; + typedef typename ELFT::Word Elf_Word; + typedef typename ELFT::uint uintX_t; std::vector<StringRef> Undefs; StringRef SoName; @@ -294,9 +310,20 @@ public: bool isNeeded() const { return !AsNeeded || IsUsed; } }; -std::unique_ptr<InputFile> createObjectFile(MemoryBufferRef MB, - StringRef ArchiveName = ""); -std::unique_ptr<InputFile> createSharedFile(MemoryBufferRef MB); +class BinaryFile : public InputFile { +public: + explicit BinaryFile(MemoryBufferRef M) : InputFile(BinaryKind, M) {} + static bool classof(const InputFile *F) { return F->kind() == BinaryKind; } + template <class ELFT> void parse(); + ArrayRef<InputSectionData *> getSections() const { return Sections; } + +private: + std::vector<InputSectionData *> Sections; +}; + +InputFile *createObjectFile(MemoryBufferRef MB, StringRef ArchiveName = "", + uint64_t OffsetInArchive = 0); +InputFile *createSharedFile(MemoryBufferRef MB); } // namespace elf } // namespace lld diff --git a/ELF/InputSection.cpp b/ELF/InputSection.cpp index 6564e7995a89..805e51dab507 100644 --- a/ELF/InputSection.cpp +++ b/ELF/InputSection.cpp @@ -13,103 +13,187 @@ #include "Error.h" #include "InputFiles.h" #include "LinkerScript.h" +#include "Memory.h" #include "OutputSections.h" +#include "Relocations.h" +#include "SyntheticSections.h" #include "Target.h" #include "Thunks.h" - #include "llvm/Support/Compression.h" #include "llvm/Support/Endian.h" +#include <mutex> using namespace llvm; using namespace llvm::ELF; using namespace llvm::object; +using namespace llvm::support; using namespace llvm::support::endian; using namespace lld; using namespace lld::elf; -template <class ELFT> bool elf::isDiscarded(InputSectionBase<ELFT> *S) { - return !S || S == &InputSection<ELFT>::Discarded || !S->Live || - Script<ELFT>::X->isDiscarded(S); +// Returns a string to construct an error message. +template <class ELFT> +std::string elf::toString(const InputSectionBase<ELFT> *Sec) { + return (Sec->getFile()->getName() + ":(" + Sec->Name + ")").str(); +} + +template <class ELFT> +static ArrayRef<uint8_t> getSectionContents(elf::ObjectFile<ELFT> *File, + const typename ELFT::Shdr *Hdr) { + if (!File || Hdr->sh_type == SHT_NOBITS) + return makeArrayRef<uint8_t>(nullptr, Hdr->sh_size); + return check(File->getObj().getSectionContents(Hdr)); } template <class ELFT> InputSectionBase<ELFT>::InputSectionBase(elf::ObjectFile<ELFT> *File, - const Elf_Shdr *Header, + uintX_t Flags, uint32_t Type, + uintX_t Entsize, uint32_t Link, + uint32_t Info, uintX_t Addralign, + ArrayRef<uint8_t> Data, StringRef Name, Kind SectionKind) - : Header(Header), File(File), SectionKind(SectionKind), Repl(this), - Compressed(Header->sh_flags & SHF_COMPRESSED) { - // The garbage collector sets sections' Live bits. - // If GC is disabled, all sections are considered live by default. - Live = !Config->GcSections; + : InputSectionData(SectionKind, Name, Data, + !Config->GcSections || !(Flags & SHF_ALLOC)), + File(File), Flags(Flags), Entsize(Entsize), Type(Type), Link(Link), + Info(Info), Repl(this) { + NumRelocations = 0; + AreRelocsRela = false; // The ELF spec states that a value of 0 means the section has // no alignment constraits. - Alignment = std::max<uintX_t>(Header->sh_addralign, 1); + uint64_t V = std::max<uint64_t>(Addralign, 1); + if (!isPowerOf2_64(V)) + fatal(toString(File) + ": section sh_addralign is not a power of 2"); + + // We reject object files having insanely large alignments even though + // they are allowed by the spec. I think 4GB is a reasonable limitation. + // We might want to relax this in the future. + if (V > UINT32_MAX) + fatal(toString(File) + ": section sh_addralign is too large"); + Alignment = V; + + // If it is not a mergeable section, overwrite the flag so that the flag + // is consistent with the class. This inconsistency could occur when + // string merging is disabled using -O0 flag. + if (!Config->Relocatable && !isa<MergeInputSection<ELFT>>(this)) + this->Flags &= ~(SHF_MERGE | SHF_STRINGS); +} + +template <class ELFT> +InputSectionBase<ELFT>::InputSectionBase(elf::ObjectFile<ELFT> *File, + const Elf_Shdr *Hdr, StringRef Name, + Kind SectionKind) + : InputSectionBase(File, Hdr->sh_flags & ~SHF_INFO_LINK, Hdr->sh_type, + Hdr->sh_entsize, Hdr->sh_link, Hdr->sh_info, + Hdr->sh_addralign, getSectionContents(File, Hdr), Name, + SectionKind) { + this->Offset = Hdr->sh_offset; } template <class ELFT> size_t InputSectionBase<ELFT>::getSize() const { + if (auto *S = dyn_cast<SyntheticSection<ELFT>>(this)) + return S->getSize(); + if (auto *D = dyn_cast<InputSection<ELFT>>(this)) if (D->getThunksSize() > 0) return D->getThunkOff() + D->getThunksSize(); - return Header->sh_size; -} -template <class ELFT> StringRef InputSectionBase<ELFT>::getSectionName() const { - return check(File->getObj().getSectionName(this->Header)); + return Data.size(); } -template <class ELFT> -ArrayRef<uint8_t> InputSectionBase<ELFT>::getSectionData() const { - if (Compressed) - return ArrayRef<uint8_t>((const uint8_t *)Uncompressed.data(), - Uncompressed.size()); - return check(this->File->getObj().getSectionContents(this->Header)); +// Returns a string for an error message. +template <class SectionT> static std::string getName(SectionT *Sec) { + return (Sec->getFile()->getName() + ":(" + Sec->Name + ")").str(); } template <class ELFT> typename ELFT::uint InputSectionBase<ELFT>::getOffset(uintX_t Offset) const { - switch (SectionKind) { + switch (kind()) { case Regular: return cast<InputSection<ELFT>>(this)->OutSecOff + Offset; + case Synthetic: + // For synthetic sections we treat offset -1 as the end of the section. + // The same approach is used for synthetic symbols (DefinedSynthetic). + return cast<InputSection<ELFT>>(this)->OutSecOff + + (Offset == uintX_t(-1) ? getSize() : Offset); case EHFrame: - return cast<EhInputSection<ELFT>>(this)->getOffset(Offset); + // The file crtbeginT.o has relocations pointing to the start of an empty + // .eh_frame that is known to be the first in the link. It does that to + // identify the start of the output .eh_frame. + return Offset; case Merge: return cast<MergeInputSection<ELFT>>(this)->getOffset(Offset); - case MipsReginfo: - case MipsOptions: - // MIPS .reginfo and .MIPS.options sections are consumed by the linker, - // and the linker produces a single output section. It is possible that - // input files contain section symbol points to the corresponding input - // section. Redirect it to the produced output section. - if (Offset != 0) - fatal("Unsupported reference to the middle of '" + getSectionName() + - "' section"); - return this->OutSec->getVA(); } llvm_unreachable("invalid section kind"); } -template <class ELFT> void InputSectionBase<ELFT>::uncompress() { - if (!zlib::isAvailable()) - fatal("build lld with zlib to enable compressed sections support"); +template <class ELFT> bool InputSectionBase<ELFT>::isCompressed() const { + return (Flags & SHF_COMPRESSED) || Name.startswith(".zdebug"); +} - // A compressed section consists of a header of Elf_Chdr type - // followed by compressed data. - ArrayRef<uint8_t> Data = - check(this->File->getObj().getSectionContents(this->Header)); +// Returns compressed data and its size when uncompressed. +template <class ELFT> +std::pair<ArrayRef<uint8_t>, uint64_t> +InputSectionBase<ELFT>::getElfCompressedData(ArrayRef<uint8_t> Data) { + // Compressed section with Elf_Chdr is the ELF standard. if (Data.size() < sizeof(Elf_Chdr)) - fatal("corrupt compressed section"); - + fatal(toString(this) + ": corrupted compressed section"); auto *Hdr = reinterpret_cast<const Elf_Chdr *>(Data.data()); - Data = Data.slice(sizeof(Elf_Chdr)); - if (Hdr->ch_type != ELFCOMPRESS_ZLIB) - fatal("unsupported compression type"); + fatal(toString(this) + ": unsupported compression type"); + return {Data.slice(sizeof(*Hdr)), Hdr->ch_size}; +} - StringRef Buf((const char *)Data.data(), Data.size()); - if (zlib::uncompress(Buf, Uncompressed, Hdr->ch_size) != zlib::StatusOK) - fatal("error uncompressing section"); +// Returns compressed data and its size when uncompressed. +template <class ELFT> +std::pair<ArrayRef<uint8_t>, uint64_t> +InputSectionBase<ELFT>::getRawCompressedData(ArrayRef<uint8_t> Data) { + // Compressed sections without Elf_Chdr header contain this header + // instead. This is a GNU extension. + struct ZlibHeader { + char Magic[4]; // Should be "ZLIB" + char Size[8]; // Uncompressed size in big-endian + }; + + if (Data.size() < sizeof(ZlibHeader)) + fatal(toString(this) + ": corrupted compressed section"); + auto *Hdr = reinterpret_cast<const ZlibHeader *>(Data.data()); + if (memcmp(Hdr->Magic, "ZLIB", 4)) + fatal(toString(this) + ": broken ZLIB-compressed section"); + return {Data.slice(sizeof(*Hdr)), read64be(Hdr->Size)}; +} + +// Uncompress section contents. Note that this function is called +// from parallel_for_each, so it must be thread-safe. +template <class ELFT> void InputSectionBase<ELFT>::uncompress() { + if (!zlib::isAvailable()) + fatal(toString(this) + + ": build lld with zlib to enable compressed sections support"); + + // This section is compressed. Here we decompress it. Ideally, all + // compressed sections have SHF_COMPRESSED bit and their contents + // start with headers of Elf_Chdr type. However, sections whose + // names start with ".zdebug_" don't have the bit and contains a raw + // ZLIB-compressed data (which is a bad thing because section names + // shouldn't be significant in ELF.) We need to be able to read both. + ArrayRef<uint8_t> Buf; // Compressed data + size_t Size; // Uncompressed size + if (Flags & SHF_COMPRESSED) + std::tie(Buf, Size) = getElfCompressedData(Data); + else + std::tie(Buf, Size) = getRawCompressedData(Data); + + // Uncompress Buf. + char *OutputBuf; + { + static std::mutex Mu; + std::lock_guard<std::mutex> Lock(Mu); + OutputBuf = BAlloc.Allocate<char>(Size); + } + if (zlib::uncompress(toStringRef(Buf), OutputBuf, Size) != zlib::StatusOK) + fatal(toString(this) + ": error while uncompressing section"); + Data = ArrayRef<uint8_t>((uint8_t *)OutputBuf, Size); } template <class ELFT> @@ -119,29 +203,71 @@ InputSectionBase<ELFT>::getOffset(const DefinedRegular<ELFT> &Sym) const { } template <class ELFT> +InputSectionBase<ELFT> *InputSectionBase<ELFT>::getLinkOrderDep() const { + if ((Flags & SHF_LINK_ORDER) && Link != 0) + return getFile()->getSections()[Link]; + return nullptr; +} + +// Returns a source location string. Used to construct an error message. +template <class ELFT> +std::string InputSectionBase<ELFT>::getLocation(typename ELFT::uint Offset) { + // First check if we can get desired values from debugging information. + std::string LineInfo = File->getLineInfo(this, Offset); + if (!LineInfo.empty()) + return LineInfo; + + // File->SourceFile contains STT_FILE symbol that contains a + // source file name. If it's missing, we use an object file name. + std::string SrcFile = File->SourceFile; + if (SrcFile.empty()) + SrcFile = toString(File); + + // Find a function symbol that encloses a given location. + for (SymbolBody *B : File->getSymbols()) + if (auto *D = dyn_cast<DefinedRegular<ELFT>>(B)) + if (D->Section == this && D->Type == STT_FUNC) + if (D->Value <= Offset && Offset < D->Value + D->Size) + return SrcFile + ":(function " + toString(*D) + ")"; + + // If there's no symbol, print out the offset in the section. + return (SrcFile + ":(" + Name + "+0x" + utohexstr(Offset) + ")").str(); +} + +template <class ELFT> +InputSection<ELFT>::InputSection() : InputSectionBase<ELFT>() {} + +template <class ELFT> +InputSection<ELFT>::InputSection(uintX_t Flags, uint32_t Type, + uintX_t Addralign, ArrayRef<uint8_t> Data, + StringRef Name, Kind K) + : InputSectionBase<ELFT>(nullptr, Flags, Type, + /*Entsize*/ 0, /*Link*/ 0, /*Info*/ 0, Addralign, + Data, Name, K) {} + +template <class ELFT> InputSection<ELFT>::InputSection(elf::ObjectFile<ELFT> *F, - const Elf_Shdr *Header) - : InputSectionBase<ELFT>(F, Header, Base::Regular) {} + const Elf_Shdr *Header, StringRef Name) + : InputSectionBase<ELFT>(F, Header, Name, Base::Regular) {} template <class ELFT> -bool InputSection<ELFT>::classof(const InputSectionBase<ELFT> *S) { - return S->SectionKind == Base::Regular; +bool InputSection<ELFT>::classof(const InputSectionData *S) { + return S->kind() == Base::Regular || S->kind() == Base::Synthetic; } template <class ELFT> InputSectionBase<ELFT> *InputSection<ELFT>::getRelocatedSection() { - assert(this->Header->sh_type == SHT_RELA || this->Header->sh_type == SHT_REL); + assert(this->Type == SHT_RELA || this->Type == SHT_REL); ArrayRef<InputSectionBase<ELFT> *> Sections = this->File->getSections(); - return Sections[this->Header->sh_info]; + return Sections[this->Info]; } -template <class ELFT> -void InputSection<ELFT>::addThunk(const Thunk<ELFT> *T) { +template <class ELFT> void InputSection<ELFT>::addThunk(const Thunk<ELFT> *T) { Thunks.push_back(T); } template <class ELFT> uint64_t InputSection<ELFT>::getThunkOff() const { - return this->Header->sh_size; + return this->Data.size(); } template <class ELFT> uint64_t InputSection<ELFT>::getThunksSize() const { @@ -163,35 +289,62 @@ void InputSection<ELFT>::copyRelocations(uint8_t *Buf, ArrayRef<RelTy> Rels) { uint32_t Type = Rel.getType(Config->Mips64EL); SymbolBody &Body = this->File->getRelocTargetSym(Rel); - RelTy *P = reinterpret_cast<RelTy *>(Buf); + Elf_Rela *P = reinterpret_cast<Elf_Rela *>(Buf); Buf += sizeof(RelTy); + if (Config->Rela) + P->r_addend = getAddend<ELFT>(Rel); P->r_offset = RelocatedSection->getOffset(Rel.r_offset); P->setSymbolAndType(Body.DynsymIndex, Type, Config->Mips64EL); } } -// Page(Expr) is the page address of the expression Expr, defined -// as (Expr & ~0xFFF). (This applies even if the machine page size -// supported by the platform has a different value.) -static uint64_t getAArch64Page(uint64_t Expr) { - return Expr & (~static_cast<uint64_t>(0xFFF)); +static uint32_t getARMUndefinedRelativeWeakVA(uint32_t Type, uint32_t A, + uint32_t P) { + switch (Type) { + case R_ARM_THM_JUMP11: + return P + 2; + case R_ARM_CALL: + case R_ARM_JUMP24: + case R_ARM_PC24: + case R_ARM_PLT32: + case R_ARM_PREL31: + case R_ARM_THM_JUMP19: + case R_ARM_THM_JUMP24: + return P + 4; + case R_ARM_THM_CALL: + // We don't want an interworking BLX to ARM + return P + 5; + default: + return A; + } } -template <class ELFT> -static typename ELFT::uint getSymVA(uint32_t Type, typename ELFT::uint A, - typename ELFT::uint P, - const SymbolBody &Body, RelExpr Expr) { - typedef typename ELFT::uint uintX_t; +static uint64_t getAArch64UndefinedRelativeWeakVA(uint64_t Type, uint64_t A, + uint64_t P) { + switch (Type) { + case R_AARCH64_CALL26: + case R_AARCH64_CONDBR19: + case R_AARCH64_JUMP26: + case R_AARCH64_TSTBR14: + return P + 4; + default: + return A; + } +} +template <class ELFT> +static typename ELFT::uint +getRelocTargetVA(uint32_t Type, typename ELFT::uint A, typename ELFT::uint P, + const SymbolBody &Body, RelExpr Expr) { switch (Expr) { case R_HINT: + case R_TLSDESC_CALL: llvm_unreachable("cannot relocate hint relocs"); case R_TLSLD: - return Out<ELFT>::Got->getTlsIndexOff() + A - - Out<ELFT>::Got->getNumEntries() * sizeof(uintX_t); + return In<ELFT>::Got->getTlsIndexOff() + A - In<ELFT>::Got->getSize(); case R_TLSLD_PC: - return Out<ELFT>::Got->getTlsIndexVA() + A - P; + return In<ELFT>::Got->getTlsIndexVA() + A - P; case R_THUNK_ABS: return Body.getThunkVA<ELFT>() + A; case R_THUNK_PC: @@ -200,14 +353,14 @@ static typename ELFT::uint getSymVA(uint32_t Type, typename ELFT::uint A, case R_PPC_TOC: return getPPC64TocBase() + A; case R_TLSGD: - return Out<ELFT>::Got->getGlobalDynOffset(Body) + A - - Out<ELFT>::Got->getNumEntries() * sizeof(uintX_t); + return In<ELFT>::Got->getGlobalDynOffset(Body) + A - + In<ELFT>::Got->getSize(); case R_TLSGD_PC: - return Out<ELFT>::Got->getGlobalDynAddr(Body) + A - P; + return In<ELFT>::Got->getGlobalDynAddr(Body) + A - P; case R_TLSDESC: - return Out<ELFT>::Got->getGlobalDynAddr(Body) + A; + return In<ELFT>::Got->getGlobalDynAddr(Body) + A; case R_TLSDESC_PAGE: - return getAArch64Page(Out<ELFT>::Got->getGlobalDynAddr(Body) + A) - + return getAArch64Page(In<ELFT>::Got->getGlobalDynAddr(Body) + A) - getAArch64Page(P); case R_PLT: return Body.getPltVA<ELFT>() + A; @@ -217,11 +370,13 @@ static typename ELFT::uint getSymVA(uint32_t Type, typename ELFT::uint A, case R_SIZE: return Body.getSize<ELFT>() + A; case R_GOTREL: - return Body.getVA<ELFT>(A) - Out<ELFT>::Got->getVA(); + return Body.getVA<ELFT>(A) - In<ELFT>::Got->getVA(); + case R_GOTREL_FROM_END: + return Body.getVA<ELFT>(A) - In<ELFT>::Got->getVA() - + In<ELFT>::Got->getSize(); case R_RELAX_TLS_GD_TO_IE_END: case R_GOT_FROM_END: - return Body.getGotOffset<ELFT>() + A - - Out<ELFT>::Got->getNumEntries() * sizeof(uintX_t); + return Body.getGotOffset<ELFT>() + A - In<ELFT>::Got->getSize(); case R_RELAX_TLS_GD_TO_IE_ABS: case R_GOT: return Body.getGotVA<ELFT>() + A; @@ -232,11 +387,21 @@ static typename ELFT::uint getSymVA(uint32_t Type, typename ELFT::uint A, case R_GOT_PC: return Body.getGotVA<ELFT>() + A - P; case R_GOTONLY_PC: - return Out<ELFT>::Got->getVA() + A - P; + return In<ELFT>::Got->getVA() + A - P; + case R_GOTONLY_PC_FROM_END: + return In<ELFT>::Got->getVA() + A - P + In<ELFT>::Got->getSize(); case R_RELAX_TLS_LD_TO_LE: case R_RELAX_TLS_IE_TO_LE: case R_RELAX_TLS_GD_TO_LE: case R_TLS: + // A weak undefined TLS symbol resolves to the base of the TLS + // block, i.e. gets a value of zero. If we pass --gc-sections to + // lld and .tbss is not referenced, it gets reclaimed and we don't + // create a TLS program header. Therefore, we resolve this + // statically to zero. + if (Body.isTls() && (Body.isLazy() || Body.isUndefined()) && + Body.symbol()->isWeak()) + return 0; if (Target->TcbSize) return Body.getVA<ELFT>(A) + alignTo(Target->TcbSize, Out<ELFT>::TlsPhdr->p_align); @@ -253,18 +418,26 @@ static typename ELFT::uint getSymVA(uint32_t Type, typename ELFT::uint A, // If relocation against MIPS local symbol requires GOT entry, this entry // should be initialized by 'page address'. This address is high 16-bits // of sum the symbol's value and the addend. - return Out<ELFT>::Got->getMipsLocalPageOffset(Body.getVA<ELFT>(A)); + return In<ELFT>::MipsGot->getVA() + + In<ELFT>::MipsGot->getPageEntryOffset(Body, A) - + In<ELFT>::MipsGot->getGp(); case R_MIPS_GOT_OFF: + case R_MIPS_GOT_OFF32: // In case of MIPS if a GOT relocation has non-zero addend this addend // should be applied to the GOT entry content not to the GOT entry offset. // That is why we use separate expression type. - return Out<ELFT>::Got->getMipsGotOffset(Body, A); + return In<ELFT>::MipsGot->getVA() + + In<ELFT>::MipsGot->getBodyEntryOffset(Body, A) - + In<ELFT>::MipsGot->getGp(); + case R_MIPS_GOTREL: + return Body.getVA<ELFT>(A) - In<ELFT>::MipsGot->getGp(); case R_MIPS_TLSGD: - return Out<ELFT>::Got->getGlobalDynOffset(Body) + - Out<ELFT>::Got->getMipsTlsOffset() - MipsGPOffset; + return In<ELFT>::MipsGot->getVA() + In<ELFT>::MipsGot->getTlsOffset() + + In<ELFT>::MipsGot->getGlobalDynOffset(Body) - + In<ELFT>::MipsGot->getGp(); case R_MIPS_TLSLD: - return Out<ELFT>::Got->getTlsIndexOff() + - Out<ELFT>::Got->getMipsTlsOffset() - MipsGPOffset; + return In<ELFT>::MipsGot->getVA() + In<ELFT>::MipsGot->getTlsOffset() + + In<ELFT>::MipsGot->getTlsIndexOff() - In<ELFT>::MipsGot->getGp(); case R_PPC_OPD: { uint64_t SymVA = Body.getVA<ELFT>(A); // If we have an undefined weak symbol, we might get here with a symbol @@ -275,8 +448,8 @@ static typename ELFT::uint getSymVA(uint32_t Type, typename ELFT::uint A, if (Out<ELF64BE>::Opd) { // If this is a local call, and we currently have the address of a // function-descriptor, get the underlying code address instead. - uint64_t OpdStart = Out<ELF64BE>::Opd->getVA(); - uint64_t OpdEnd = OpdStart + Out<ELF64BE>::Opd->getSize(); + uint64_t OpdStart = Out<ELF64BE>::Opd->Addr; + uint64_t OpdEnd = OpdStart + Out<ELF64BE>::Opd->Size; bool InOpd = OpdStart <= SymVA && SymVA < OpdEnd; if (InOpd) SymVA = read64be(&Out<ELF64BE>::OpdBuf[SymVA - OpdStart]); @@ -284,10 +457,20 @@ static typename ELFT::uint getSymVA(uint32_t Type, typename ELFT::uint A, return SymVA - P; } case R_PC: + if (Body.isUndefined() && !Body.isLocal() && Body.symbol()->isWeak()) { + // On ARM and AArch64 a branch to an undefined weak resolves to the + // next instruction, otherwise the place. + if (Config->EMachine == EM_ARM) + return getARMUndefinedRelativeWeakVA(Type, A, P); + if (Config->EMachine == EM_AARCH64) + return getAArch64UndefinedRelativeWeakVA(Type, A, P); + } case R_RELAX_GOT_PC: return Body.getVA<ELFT>(A) - P; case R_PLT_PAGE_PC: case R_PAGE_PC: + if (Body.isUndefined() && !Body.isLocal() && Body.symbol()->isWeak()) + return getAArch64Page(A); return getAArch64Page(Body.getVA<ELFT>(A)) - getAArch64Page(P); } llvm_unreachable("Invalid expression"); @@ -303,7 +486,6 @@ static typename ELFT::uint getSymVA(uint32_t Type, typename ELFT::uint A, template <class ELFT> template <class RelTy> void InputSection<ELFT>::relocateNonAlloc(uint8_t *Buf, ArrayRef<RelTy> Rels) { - const unsigned Bits = sizeof(uintX_t) * 8; for (const RelTy &Rel : Rels) { uint32_t Type = Rel.getType(Config->Mips64EL); uintX_t Offset = this->getOffset(Rel.r_offset); @@ -314,13 +496,15 @@ void InputSection<ELFT>::relocateNonAlloc(uint8_t *Buf, ArrayRef<RelTy> Rels) { SymbolBody &Sym = this->File->getRelocTargetSym(Rel); if (Target->getRelExpr(Type, Sym) != R_ABS) { - error(this->getSectionName() + " has non-ABS reloc"); + error(this->getLocation(Offset) + ": has non-ABS reloc"); return; } - uintX_t AddrLoc = this->OutSec->getVA() + Offset; - uint64_t SymVA = - SignExtend64<Bits>(getSymVA<ELFT>(Type, Addend, AddrLoc, Sym, R_ABS)); + uintX_t AddrLoc = this->OutSec->Addr + Offset; + uint64_t SymVA = 0; + if (!Sym.isTls() || Out<ELFT>::TlsPhdr) + SymVA = SignExtend64<sizeof(uintX_t) * 8>( + getRelocTargetVA<ELFT>(Type, Addend, AddrLoc, Sym, R_ABS)); Target->relocateOne(BufLoc, Type, SymVA); } } @@ -331,78 +515,80 @@ void InputSectionBase<ELFT>::relocate(uint8_t *Buf, uint8_t *BufEnd) { // vector only for SHF_ALLOC'ed sections. For other sections, // we handle relocations directly here. auto *IS = dyn_cast<InputSection<ELFT>>(this); - if (IS && !(IS->Header->sh_flags & SHF_ALLOC)) { - for (const Elf_Shdr *RelSec : IS->RelocSections) { - if (RelSec->sh_type == SHT_RELA) - IS->relocateNonAlloc(Buf, IS->File->getObj().relas(RelSec)); - else - IS->relocateNonAlloc(Buf, IS->File->getObj().rels(RelSec)); - } + if (IS && !(IS->Flags & SHF_ALLOC)) { + if (IS->AreRelocsRela) + IS->relocateNonAlloc(Buf, IS->relas()); + else + IS->relocateNonAlloc(Buf, IS->rels()); return; } const unsigned Bits = sizeof(uintX_t) * 8; - for (const Relocation<ELFT> &Rel : Relocations) { - uintX_t Offset = Rel.InputSec->getOffset(Rel.Offset); + for (const Relocation &Rel : Relocations) { + uintX_t Offset = getOffset(Rel.Offset); uint8_t *BufLoc = Buf + Offset; uint32_t Type = Rel.Type; uintX_t A = Rel.Addend; - uintX_t AddrLoc = OutSec->getVA() + Offset; + uintX_t AddrLoc = OutSec->Addr + Offset; RelExpr Expr = Rel.Expr; - uint64_t SymVA = - SignExtend64<Bits>(getSymVA<ELFT>(Type, A, AddrLoc, *Rel.Sym, Expr)); + uint64_t TargetVA = SignExtend64<Bits>( + getRelocTargetVA<ELFT>(Type, A, AddrLoc, *Rel.Sym, Expr)); switch (Expr) { case R_RELAX_GOT_PC: case R_RELAX_GOT_PC_NOPIC: - Target->relaxGot(BufLoc, SymVA); + Target->relaxGot(BufLoc, TargetVA); break; case R_RELAX_TLS_IE_TO_LE: - Target->relaxTlsIeToLe(BufLoc, Type, SymVA); + Target->relaxTlsIeToLe(BufLoc, Type, TargetVA); break; case R_RELAX_TLS_LD_TO_LE: - Target->relaxTlsLdToLe(BufLoc, Type, SymVA); + Target->relaxTlsLdToLe(BufLoc, Type, TargetVA); break; case R_RELAX_TLS_GD_TO_LE: case R_RELAX_TLS_GD_TO_LE_NEG: - Target->relaxTlsGdToLe(BufLoc, Type, SymVA); + Target->relaxTlsGdToLe(BufLoc, Type, TargetVA); break; case R_RELAX_TLS_GD_TO_IE: case R_RELAX_TLS_GD_TO_IE_ABS: case R_RELAX_TLS_GD_TO_IE_PAGE_PC: case R_RELAX_TLS_GD_TO_IE_END: - Target->relaxTlsGdToIe(BufLoc, Type, SymVA); + Target->relaxTlsGdToIe(BufLoc, Type, TargetVA); break; case R_PPC_PLT_OPD: // Patch a nop (0x60000000) to a ld. if (BufLoc + 8 <= BufEnd && read32be(BufLoc + 4) == 0x60000000) write32be(BufLoc + 4, 0xe8410028); // ld %r2, 40(%r1) - // fallthrough + // fallthrough default: - Target->relocateOne(BufLoc, Type, SymVA); + Target->relocateOne(BufLoc, Type, TargetVA); break; } } } template <class ELFT> void InputSection<ELFT>::writeTo(uint8_t *Buf) { - if (this->Header->sh_type == SHT_NOBITS) + if (this->Type == SHT_NOBITS) + return; + + if (auto *S = dyn_cast<SyntheticSection<ELFT>>(this)) { + S->writeTo(Buf + OutSecOff); return; - ELFFile<ELFT> &EObj = this->File->getObj(); + } // If -r is given, then an InputSection may be a relocation section. - if (this->Header->sh_type == SHT_RELA) { - copyRelocations(Buf + OutSecOff, EObj.relas(this->Header)); + if (this->Type == SHT_RELA) { + copyRelocations(Buf + OutSecOff, this->template getDataAs<Elf_Rela>()); return; } - if (this->Header->sh_type == SHT_REL) { - copyRelocations(Buf + OutSecOff, EObj.rels(this->Header)); + if (this->Type == SHT_REL) { + copyRelocations(Buf + OutSecOff, this->template getDataAs<Elf_Rel>()); return; } // Copy section contents from source object file to output file. - ArrayRef<uint8_t> Data = this->getSectionData(); + ArrayRef<uint8_t> Data = this->Data; memcpy(Buf + OutSecOff, Data.data(), Data.size()); // Iterate over all relocation sections that apply to this section. @@ -431,15 +617,9 @@ void InputSection<ELFT>::replace(InputSection<ELFT> *Other) { } template <class ELFT> -SplitInputSection<ELFT>::SplitInputSection( - elf::ObjectFile<ELFT> *File, const Elf_Shdr *Header, - typename InputSectionBase<ELFT>::Kind SectionKind) - : InputSectionBase<ELFT>(File, Header, SectionKind) {} - -template <class ELFT> EhInputSection<ELFT>::EhInputSection(elf::ObjectFile<ELFT> *F, - const Elf_Shdr *Header) - : SplitInputSection<ELFT>(F, Header, InputSectionBase<ELFT>::EHFrame) { + const Elf_Shdr *Header, StringRef Name) + : InputSectionBase<ELFT>(F, Header, Name, InputSectionBase<ELFT>::EHFrame) { // Mark .eh_frame sections as live by default because there are // usually no relocations that point to .eh_frames. Otherwise, // the garbage collector would drop all .eh_frame sections. @@ -447,18 +627,54 @@ EhInputSection<ELFT>::EhInputSection(elf::ObjectFile<ELFT> *F, } template <class ELFT> -bool EhInputSection<ELFT>::classof(const InputSectionBase<ELFT> *S) { - return S->SectionKind == InputSectionBase<ELFT>::EHFrame; +bool EhInputSection<ELFT>::classof(const InputSectionData *S) { + return S->kind() == InputSectionBase<ELFT>::EHFrame; +} + +// Returns the index of the first relocation that points to a region between +// Begin and Begin+Size. +template <class IntTy, class RelTy> +static unsigned getReloc(IntTy Begin, IntTy Size, const ArrayRef<RelTy> &Rels, + unsigned &RelocI) { + // Start search from RelocI for fast access. That works because the + // relocations are sorted in .eh_frame. + for (unsigned N = Rels.size(); RelocI < N; ++RelocI) { + const RelTy &Rel = Rels[RelocI]; + if (Rel.r_offset < Begin) + continue; + + if (Rel.r_offset < Begin + Size) + return RelocI; + return -1; + } + return -1; } // .eh_frame is a sequence of CIE or FDE records. // This function splits an input section into records and returns them. +template <class ELFT> void EhInputSection<ELFT>::split() { + // Early exit if already split. + if (!this->Pieces.empty()) + return; + + if (this->NumRelocations) { + if (this->AreRelocsRela) + split(this->relas()); + else + split(this->rels()); + return; + } + split(makeArrayRef<typename ELFT::Rela>(nullptr, nullptr)); +} + template <class ELFT> -void EhInputSection<ELFT>::split() { - ArrayRef<uint8_t> Data = this->getSectionData(); +template <class RelTy> +void EhInputSection<ELFT>::split(ArrayRef<RelTy> Rels) { + ArrayRef<uint8_t> Data = this->Data; + unsigned RelI = 0; for (size_t Off = 0, End = Data.size(); Off != End;) { - size_t Size = readEhRecordSize<ELFT>(Data.slice(Off)); - this->Pieces.emplace_back(Off, Data.slice(Off, Size)); + size_t Size = readEhRecordSize<ELFT>(this, Off); + this->Pieces.emplace_back(Off, this, Size, getReloc(Off, Size, Rels, RelI)); // The empty record is the end marker. if (Size == 4) break; @@ -466,21 +682,6 @@ void EhInputSection<ELFT>::split() { } } -template <class ELFT> -typename ELFT::uint EhInputSection<ELFT>::getOffset(uintX_t Offset) const { - // The file crtbeginT.o has relocations pointing to the start of an empty - // .eh_frame that is known to be the first in the link. It does that to - // identify the start of the output .eh_frame. Handle this special case. - if (this->getSectionHdr()->sh_size == 0) - return Offset; - const SectionPiece *Piece = this->getSectionPiece(Offset); - if (Piece->OutputOff == size_t(-1)) - return -1; // Not in the output - - uintX_t Addend = Offset - Piece->InputOff; - return Piece->OutputOff + Addend; -} - static size_t findNull(ArrayRef<uint8_t> A, size_t EntSize) { // Optimize the common case. StringRef S((const char *)A.data(), A.size()); @@ -497,75 +698,96 @@ static size_t findNull(ArrayRef<uint8_t> A, size_t EntSize) { // Split SHF_STRINGS section. Such section is a sequence of // null-terminated strings. -static std::vector<SectionPiece> splitStrings(ArrayRef<uint8_t> Data, - size_t EntSize) { - std::vector<SectionPiece> V; +template <class ELFT> +void MergeInputSection<ELFT>::splitStrings(ArrayRef<uint8_t> Data, + size_t EntSize) { size_t Off = 0; + bool IsAlloc = this->Flags & SHF_ALLOC; while (!Data.empty()) { size_t End = findNull(Data, EntSize); if (End == StringRef::npos) - fatal("string is not null terminated"); + fatal(toString(this) + ": string is not null terminated"); size_t Size = End + EntSize; - V.emplace_back(Off, Data.slice(0, Size)); + Pieces.emplace_back(Off, !IsAlloc); + Hashes.push_back(hash_value(toStringRef(Data.slice(0, Size)))); Data = Data.slice(Size); Off += Size; } - return V; } // Split non-SHF_STRINGS section. Such section is a sequence of // fixed size records. -static std::vector<SectionPiece> splitNonStrings(ArrayRef<uint8_t> Data, - size_t EntSize) { - std::vector<SectionPiece> V; +template <class ELFT> +void MergeInputSection<ELFT>::splitNonStrings(ArrayRef<uint8_t> Data, + size_t EntSize) { size_t Size = Data.size(); assert((Size % EntSize) == 0); - for (unsigned I = 0, N = Size; I != N; I += EntSize) - V.emplace_back(I, Data.slice(I, EntSize)); - return V; + bool IsAlloc = this->Flags & SHF_ALLOC; + for (unsigned I = 0, N = Size; I != N; I += EntSize) { + Hashes.push_back(hash_value(toStringRef(Data.slice(I, EntSize)))); + Pieces.emplace_back(I, !IsAlloc); + } } template <class ELFT> MergeInputSection<ELFT>::MergeInputSection(elf::ObjectFile<ELFT> *F, - const Elf_Shdr *Header) - : SplitInputSection<ELFT>(F, Header, InputSectionBase<ELFT>::Merge) {} + const Elf_Shdr *Header, + StringRef Name) + : InputSectionBase<ELFT>(F, Header, Name, InputSectionBase<ELFT>::Merge) {} +// This function is called after we obtain a complete list of input sections +// that need to be linked. This is responsible to split section contents +// into small chunks for further processing. +// +// Note that this function is called from parallel_for_each. This must be +// thread-safe (i.e. no memory allocation from the pools). template <class ELFT> void MergeInputSection<ELFT>::splitIntoPieces() { - ArrayRef<uint8_t> Data = this->getSectionData(); - uintX_t EntSize = this->Header->sh_entsize; - if (this->Header->sh_flags & SHF_STRINGS) - this->Pieces = splitStrings(Data, EntSize); + ArrayRef<uint8_t> Data = this->Data; + uintX_t EntSize = this->Entsize; + if (this->Flags & SHF_STRINGS) + splitStrings(Data, EntSize); else - this->Pieces = splitNonStrings(Data, EntSize); + splitNonStrings(Data, EntSize); - if (Config->GcSections) + if (Config->GcSections && (this->Flags & SHF_ALLOC)) for (uintX_t Off : LiveOffsets) this->getSectionPiece(Off)->Live = true; } template <class ELFT> -bool MergeInputSection<ELFT>::classof(const InputSectionBase<ELFT> *S) { - return S->SectionKind == InputSectionBase<ELFT>::Merge; +bool MergeInputSection<ELFT>::classof(const InputSectionData *S) { + return S->kind() == InputSectionBase<ELFT>::Merge; } // Do binary search to get a section piece at a given input offset. template <class ELFT> -SectionPiece *SplitInputSection<ELFT>::getSectionPiece(uintX_t Offset) { - auto *This = static_cast<const SplitInputSection<ELFT> *>(this); +SectionPiece *MergeInputSection<ELFT>::getSectionPiece(uintX_t Offset) { + auto *This = static_cast<const MergeInputSection<ELFT> *>(this); return const_cast<SectionPiece *>(This->getSectionPiece(Offset)); } +template <class It, class T, class Compare> +static It fastUpperBound(It First, It Last, const T &Value, Compare Comp) { + size_t Size = std::distance(First, Last); + assert(Size != 0); + while (Size != 1) { + size_t H = Size / 2; + const It MI = First + H; + Size -= H; + First = Comp(Value, *MI) ? First : First + H; + } + return Comp(Value, *First) ? First : First + 1; +} + template <class ELFT> const SectionPiece * -SplitInputSection<ELFT>::getSectionPiece(uintX_t Offset) const { - ArrayRef<uint8_t> D = this->getSectionData(); - StringRef Data((const char *)D.data(), D.size()); - uintX_t Size = Data.size(); +MergeInputSection<ELFT>::getSectionPiece(uintX_t Offset) const { + uintX_t Size = this->Data.size(); if (Offset >= Size) - fatal("entry is past the end of the section"); + fatal(toString(this) + ": entry is past the end of the section"); // Find the element this offset points to. - auto I = std::upper_bound( + auto I = fastUpperBound( Pieces.begin(), Pieces.end(), Offset, [](const uintX_t &A, const SectionPiece &B) { return A < B.InputOff; }); --I; @@ -577,84 +799,31 @@ SplitInputSection<ELFT>::getSectionPiece(uintX_t Offset) const { // it is not just an addition to a base output offset. template <class ELFT> typename ELFT::uint MergeInputSection<ELFT>::getOffset(uintX_t Offset) const { + // Initialize OffsetMap lazily. + std::call_once(InitOffsetMap, [&] { + OffsetMap.reserve(Pieces.size()); + for (const SectionPiece &Piece : Pieces) + OffsetMap[Piece.InputOff] = Piece.OutputOff; + }); + + // Find a string starting at a given offset. auto It = OffsetMap.find(Offset); if (It != OffsetMap.end()) return It->second; + if (!this->Live) + return 0; + // If Offset is not at beginning of a section piece, it is not in the map. // In that case we need to search from the original section piece vector. const SectionPiece &Piece = *this->getSectionPiece(Offset); - assert(Piece.Live); + if (!Piece.Live) + return 0; + uintX_t Addend = Offset - Piece.InputOff; return Piece.OutputOff + Addend; } -// Create a map from input offsets to output offsets for all section pieces. -// It is called after finalize(). -template <class ELFT> void MergeInputSection<ELFT>::finalizePieces() { - OffsetMap.grow(this->Pieces.size()); - for (SectionPiece &Piece : this->Pieces) { - if (!Piece.Live) - continue; - if (Piece.OutputOff == size_t(-1)) { - // Offsets of tail-merged strings are computed lazily. - auto *OutSec = static_cast<MergeOutputSection<ELFT> *>(this->OutSec); - ArrayRef<uint8_t> D = Piece.data(); - StringRef S((const char *)D.data(), D.size()); - Piece.OutputOff = OutSec->getOffset(S); - } - OffsetMap[Piece.InputOff] = Piece.OutputOff; - } -} - -template <class ELFT> -MipsReginfoInputSection<ELFT>::MipsReginfoInputSection(elf::ObjectFile<ELFT> *F, - const Elf_Shdr *Hdr) - : InputSectionBase<ELFT>(F, Hdr, InputSectionBase<ELFT>::MipsReginfo) { - // Initialize this->Reginfo. - ArrayRef<uint8_t> D = this->getSectionData(); - if (D.size() != sizeof(Elf_Mips_RegInfo<ELFT>)) { - error("invalid size of .reginfo section"); - return; - } - Reginfo = reinterpret_cast<const Elf_Mips_RegInfo<ELFT> *>(D.data()); -} - -template <class ELFT> -bool MipsReginfoInputSection<ELFT>::classof(const InputSectionBase<ELFT> *S) { - return S->SectionKind == InputSectionBase<ELFT>::MipsReginfo; -} - -template <class ELFT> -MipsOptionsInputSection<ELFT>::MipsOptionsInputSection(elf::ObjectFile<ELFT> *F, - const Elf_Shdr *Hdr) - : InputSectionBase<ELFT>(F, Hdr, InputSectionBase<ELFT>::MipsOptions) { - // Find ODK_REGINFO option in the section's content. - ArrayRef<uint8_t> D = this->getSectionData(); - while (!D.empty()) { - if (D.size() < sizeof(Elf_Mips_Options<ELFT>)) { - error("invalid size of .MIPS.options section"); - break; - } - auto *O = reinterpret_cast<const Elf_Mips_Options<ELFT> *>(D.data()); - if (O->kind == ODK_REGINFO) { - Reginfo = &O->getRegInfo(); - break; - } - D = D.slice(O->size); - } -} - -template <class ELFT> -bool MipsOptionsInputSection<ELFT>::classof(const InputSectionBase<ELFT> *S) { - return S->SectionKind == InputSectionBase<ELFT>::MipsOptions; -} - -template bool elf::isDiscarded<ELF32LE>(InputSectionBase<ELF32LE> *); -template bool elf::isDiscarded<ELF32BE>(InputSectionBase<ELF32BE> *); -template bool elf::isDiscarded<ELF64LE>(InputSectionBase<ELF64LE> *); -template bool elf::isDiscarded<ELF64BE>(InputSectionBase<ELF64BE> *); - template class elf::InputSectionBase<ELF32LE>; template class elf::InputSectionBase<ELF32BE>; template class elf::InputSectionBase<ELF64LE>; @@ -665,11 +834,6 @@ template class elf::InputSection<ELF32BE>; template class elf::InputSection<ELF64LE>; template class elf::InputSection<ELF64BE>; -template class elf::SplitInputSection<ELF32LE>; -template class elf::SplitInputSection<ELF32BE>; -template class elf::SplitInputSection<ELF64LE>; -template class elf::SplitInputSection<ELF64BE>; - template class elf::EhInputSection<ELF32LE>; template class elf::EhInputSection<ELF32BE>; template class elf::EhInputSection<ELF64LE>; @@ -680,12 +844,7 @@ template class elf::MergeInputSection<ELF32BE>; template class elf::MergeInputSection<ELF64LE>; template class elf::MergeInputSection<ELF64BE>; -template class elf::MipsReginfoInputSection<ELF32LE>; -template class elf::MipsReginfoInputSection<ELF32BE>; -template class elf::MipsReginfoInputSection<ELF64LE>; -template class elf::MipsReginfoInputSection<ELF64BE>; - -template class elf::MipsOptionsInputSection<ELF32LE>; -template class elf::MipsOptionsInputSection<ELF32BE>; -template class elf::MipsOptionsInputSection<ELF64LE>; -template class elf::MipsOptionsInputSection<ELF64BE>; +template std::string elf::toString(const InputSectionBase<ELF32LE> *); +template std::string elf::toString(const InputSectionBase<ELF32BE> *); +template std::string elf::toString(const InputSectionBase<ELF64LE> *); +template std::string elf::toString(const InputSectionBase<ELF64BE> *); diff --git a/ELF/InputSection.h b/ELF/InputSection.h index 61a89c540c5d..adbc1e1e3829 100644 --- a/ELF/InputSection.h +++ b/ELF/InputSection.h @@ -14,25 +14,64 @@ #include "Relocations.h" #include "Thunks.h" #include "lld/Core/LLVM.h" +#include "llvm/ADT/CachedHashString.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/TinyPtrVector.h" #include "llvm/Object/ELF.h" +#include <mutex> namespace lld { namespace elf { -template <class ELFT> bool isDiscarded(InputSectionBase<ELFT> *S); - +class DefinedCommon; class SymbolBody; +struct SectionPiece; -template <class ELFT> class ICF; template <class ELFT> class DefinedRegular; template <class ELFT> class ObjectFile; template <class ELFT> class OutputSection; -template <class ELFT> class OutputSectionBase; +class OutputSectionBase; + +// We need non-template input section class to store symbol layout +// in linker script parser structures, where we do not have ELFT +// template parameter. For each scripted output section symbol we +// store pointer to preceding InputSectionData object or nullptr, +// if symbol should be placed at the very beginning of the output +// section +class InputSectionData { +public: + enum Kind { Regular, EHFrame, Merge, Synthetic, }; + + // The garbage collector sets sections' Live bits. + // If GC is disabled, all sections are considered live by default. + InputSectionData(Kind SectionKind, StringRef Name, ArrayRef<uint8_t> Data, + bool Live) + : SectionKind(SectionKind), Live(Live), Assigned(false), Name(Name), + Data(Data) {} + +private: + unsigned SectionKind : 3; + +public: + Kind kind() const { return (Kind)SectionKind; } + + unsigned Live : 1; // for garbage collection + unsigned Assigned : 1; // for linker script + uint32_t Alignment; + StringRef Name; + ArrayRef<uint8_t> Data; + + template <typename T> llvm::ArrayRef<T> getDataAs() const { + size_t S = Data.size(); + assert(S % sizeof(T) == 0); + return llvm::makeArrayRef<T>((const T *)Data.data(), S / sizeof(T)); + } + + std::vector<Relocation> Relocations; +}; // This corresponds to a section of an input file. -template <class ELFT> class InputSectionBase { +template <class ELFT> class InputSectionBase : public InputSectionData { protected: typedef typename ELFT::Chdr Elf_Chdr; typedef typename ELFT::Rel Elf_Rel; @@ -40,27 +79,46 @@ protected: typedef typename ELFT::Shdr Elf_Shdr; typedef typename ELFT::Sym Elf_Sym; typedef typename ELFT::uint uintX_t; - const Elf_Shdr *Header; // The file this section is from. ObjectFile<ELFT> *File; - // If a section is compressed, this vector has uncompressed section data. - SmallVector<char, 0> Uncompressed; - public: - enum Kind { Regular, EHFrame, Merge, MipsReginfo, MipsOptions }; - Kind SectionKind; - - InputSectionBase() : Repl(this) {} + // These corresponds to the fields in Elf_Shdr. + uintX_t Flags; + uintX_t Offset = 0; + uintX_t Entsize; + uint32_t Type; + uint32_t Link; + uint32_t Info; + + InputSectionBase() + : InputSectionData(Regular, "", ArrayRef<uint8_t>(), false), Repl(this) { + NumRelocations = 0; + AreRelocsRela = false; + } InputSectionBase(ObjectFile<ELFT> *File, const Elf_Shdr *Header, + StringRef Name, Kind SectionKind); + InputSectionBase(ObjectFile<ELFT> *File, uintX_t Flags, uint32_t Type, + uintX_t Entsize, uint32_t Link, uint32_t Info, + uintX_t Addralign, ArrayRef<uint8_t> Data, StringRef Name, Kind SectionKind); - OutputSectionBase<ELFT> *OutSec = nullptr; - uint32_t Alignment; - - // Used for garbage collection. - bool Live; + OutputSectionBase *OutSec = nullptr; + + // Relocations that refer to this section. + const Elf_Rel *FirstRelocation = nullptr; + unsigned NumRelocations : 31; + unsigned AreRelocsRela : 1; + ArrayRef<Elf_Rel> rels() const { + assert(!AreRelocsRela); + return llvm::makeArrayRef(FirstRelocation, NumRelocations); + } + ArrayRef<Elf_Rela> relas() const { + assert(AreRelocsRela); + return llvm::makeArrayRef(static_cast<const Elf_Rela *>(FirstRelocation), + NumRelocations); + } // This pointer points to the "real" instance of this instance. // Usually Repl == this. However, if ICF merges two sections, @@ -72,140 +130,163 @@ public: // Returns the size of this section (even if this is a common or BSS.) size_t getSize() const; - static InputSectionBase<ELFT> Discarded; - - StringRef getSectionName() const; - const Elf_Shdr *getSectionHdr() const { return Header; } ObjectFile<ELFT> *getFile() const { return File; } + llvm::object::ELFFile<ELFT> getObj() const { return File->getObj(); } uintX_t getOffset(const DefinedRegular<ELFT> &Sym) const; - + InputSectionBase *getLinkOrderDep() const; // Translate an offset in the input section to an offset in the output // section. uintX_t getOffset(uintX_t Offset) const; - ArrayRef<uint8_t> getSectionData() const; - + // ELF supports ZLIB-compressed section. + // Returns true if the section is compressed. + bool isCompressed() const; void uncompress(); + // Returns a source location string. Used to construct an error message. + std::string getLocation(uintX_t Offset); + void relocate(uint8_t *Buf, uint8_t *BufEnd); - std::vector<Relocation<ELFT>> Relocations; - bool Compressed; -}; +private: + std::pair<ArrayRef<uint8_t>, uint64_t> + getElfCompressedData(ArrayRef<uint8_t> Data); -template <class ELFT> InputSectionBase<ELFT> InputSectionBase<ELFT>::Discarded; + std::pair<ArrayRef<uint8_t>, uint64_t> + getRawCompressedData(ArrayRef<uint8_t> Data); +}; // SectionPiece represents a piece of splittable section contents. +// We allocate a lot of these and binary search on them. This means that they +// have to be as compact as possible, which is why we don't store the size (can +// be found by looking at the next one) and put the hash in a side table. struct SectionPiece { - SectionPiece(size_t Off, ArrayRef<uint8_t> Data) - : InputOff(Off), Data((const uint8_t *)Data.data()), Size(Data.size()), - Live(!Config->GcSections) {} - - ArrayRef<uint8_t> data() { return {Data, Size}; } - size_t size() const { return Size; } + SectionPiece(size_t Off, bool Live = false) + : InputOff(Off), OutputOff(-1), Live(Live || !Config->GcSections) {} size_t InputOff; - size_t OutputOff = -1; - -private: - // We use bitfields because SplitInputSection is accessed by - // std::upper_bound very often. - // We want to save bits to make it cache friendly. - const uint8_t *Data; - uint32_t Size : 31; - -public: - uint32_t Live : 1; -}; - -// Usually sections are copied to the output as atomic chunks of data, -// but some special types of sections are split into small pieces of data -// and each piece is copied to a different place in the output. -// This class represents such special sections. -template <class ELFT> class SplitInputSection : public InputSectionBase<ELFT> { - typedef typename ELFT::Shdr Elf_Shdr; - typedef typename ELFT::uint uintX_t; - -public: - SplitInputSection(ObjectFile<ELFT> *File, const Elf_Shdr *Header, - typename InputSectionBase<ELFT>::Kind SectionKind); - - // Splittable sections are handled as a sequence of data - // rather than a single large blob of data. - std::vector<SectionPiece> Pieces; - - // Returns the SectionPiece at a given input section offset. - SectionPiece *getSectionPiece(uintX_t Offset); - const SectionPiece *getSectionPiece(uintX_t Offset) const; + ssize_t OutputOff : 8 * sizeof(ssize_t) - 1; + size_t Live : 1; }; +static_assert(sizeof(SectionPiece) == 2 * sizeof(size_t), + "SectionPiece is too big"); // This corresponds to a SHF_MERGE section of an input file. -template <class ELFT> class MergeInputSection : public SplitInputSection<ELFT> { +template <class ELFT> class MergeInputSection : public InputSectionBase<ELFT> { typedef typename ELFT::uint uintX_t; typedef typename ELFT::Sym Elf_Sym; typedef typename ELFT::Shdr Elf_Shdr; public: - MergeInputSection(ObjectFile<ELFT> *F, const Elf_Shdr *Header); - static bool classof(const InputSectionBase<ELFT> *S); + MergeInputSection(ObjectFile<ELFT> *F, const Elf_Shdr *Header, + StringRef Name); + static bool classof(const InputSectionData *S); void splitIntoPieces(); // Mark the piece at a given offset live. Used by GC. - void markLiveAt(uintX_t Offset) { LiveOffsets.insert(Offset); } + void markLiveAt(uintX_t Offset) { + assert(this->Flags & llvm::ELF::SHF_ALLOC); + LiveOffsets.insert(Offset); + } // Translate an offset in the input section to an offset // in the output section. uintX_t getOffset(uintX_t Offset) const; - void finalizePieces(); + // Splittable sections are handled as a sequence of data + // rather than a single large blob of data. + std::vector<SectionPiece> Pieces; + + // Returns I'th piece's data. This function is very hot when + // string merging is enabled, so we want to inline. + LLVM_ATTRIBUTE_ALWAYS_INLINE + llvm::CachedHashStringRef getData(size_t I) const { + size_t Begin = Pieces[I].InputOff; + size_t End; + if (Pieces.size() - 1 == I) + End = this->Data.size(); + else + End = Pieces[I + 1].InputOff; + + StringRef S = {(const char *)(this->Data.data() + Begin), End - Begin}; + return {S, Hashes[I]}; + } + + // Returns the SectionPiece at a given input section offset. + SectionPiece *getSectionPiece(uintX_t Offset); + const SectionPiece *getSectionPiece(uintX_t Offset) const; private: - llvm::DenseMap<uintX_t, uintX_t> OffsetMap; + void splitStrings(ArrayRef<uint8_t> A, size_t Size); + void splitNonStrings(ArrayRef<uint8_t> A, size_t Size); + + std::vector<uint32_t> Hashes; + + mutable llvm::DenseMap<uintX_t, uintX_t> OffsetMap; + mutable std::once_flag InitOffsetMap; + llvm::DenseSet<uintX_t> LiveOffsets; }; +struct EhSectionPiece : public SectionPiece { + EhSectionPiece(size_t Off, InputSectionData *ID, uint32_t Size, + unsigned FirstRelocation) + : SectionPiece(Off, false), ID(ID), Size(Size), + FirstRelocation(FirstRelocation) {} + InputSectionData *ID; + uint32_t Size; + uint32_t size() const { return Size; } + + ArrayRef<uint8_t> data() { return {ID->Data.data() + this->InputOff, Size}; } + unsigned FirstRelocation; +}; + // This corresponds to a .eh_frame section of an input file. -template <class ELFT> class EhInputSection : public SplitInputSection<ELFT> { +template <class ELFT> class EhInputSection : public InputSectionBase<ELFT> { public: typedef typename ELFT::Shdr Elf_Shdr; typedef typename ELFT::uint uintX_t; - EhInputSection(ObjectFile<ELFT> *F, const Elf_Shdr *Header); - static bool classof(const InputSectionBase<ELFT> *S); + EhInputSection(ObjectFile<ELFT> *F, const Elf_Shdr *Header, StringRef Name); + static bool classof(const InputSectionData *S); void split(); + template <class RelTy> void split(ArrayRef<RelTy> Rels); - // Translate an offset in the input section to an offset in the output - // section. - uintX_t getOffset(uintX_t Offset) const; - - // Relocation section that refer to this one. - const Elf_Shdr *RelocSection = nullptr; + // Splittable sections are handled as a sequence of data + // rather than a single large blob of data. + std::vector<EhSectionPiece> Pieces; }; // This corresponds to a non SHF_MERGE section of an input file. template <class ELFT> class InputSection : public InputSectionBase<ELFT> { - friend ICF<ELFT>; typedef InputSectionBase<ELFT> Base; typedef typename ELFT::Shdr Elf_Shdr; typedef typename ELFT::Rela Elf_Rela; typedef typename ELFT::Rel Elf_Rel; typedef typename ELFT::Sym Elf_Sym; typedef typename ELFT::uint uintX_t; + typedef InputSectionData::Kind Kind; public: - InputSection(ObjectFile<ELFT> *F, const Elf_Shdr *Header); + InputSection(); + InputSection(uintX_t Flags, uint32_t Type, uintX_t Addralign, + ArrayRef<uint8_t> Data, StringRef Name, + Kind K = InputSectionData::Regular); + InputSection(ObjectFile<ELFT> *F, const Elf_Shdr *Header, StringRef Name); + + static InputSection<ELFT> Discarded; // Write this section to a mmap'ed file, assuming Buf is pointing to // beginning of the output section. void writeTo(uint8_t *Buf); - // Relocation sections that refer to this one. - llvm::TinyPtrVector<const Elf_Shdr *> RelocSections; - // The offset from beginning of the output sections this section was assigned // to. The writer sets a value. uint64_t OutSecOff = 0; - static bool classof(const InputSectionBase<ELFT> *S); + // InputSection that is dependent on us (reverse dependency for GC) + InputSectionBase<ELFT> *DependentSection = nullptr; + + static bool classof(const InputSectionData *S); InputSectionBase<ELFT> *getRelocatedSection(); @@ -223,46 +304,22 @@ public: template <class RelTy> void relocateNonAlloc(uint8_t *Buf, llvm::ArrayRef<RelTy> Rels); -private: - template <class RelTy> - void copyRelocations(uint8_t *Buf, llvm::ArrayRef<RelTy> Rels); + // Used by ICF. + uint32_t Class[2] = {0, 0}; // Called by ICF to merge two input sections. void replace(InputSection<ELFT> *Other); - // Used by ICF. - uint64_t GroupId = 0; +private: + template <class RelTy> + void copyRelocations(uint8_t *Buf, llvm::ArrayRef<RelTy> Rels); llvm::TinyPtrVector<const Thunk<ELFT> *> Thunks; }; -// MIPS .reginfo section provides information on the registers used by the code -// in the object file. Linker should collect this information and write a single -// .reginfo section in the output file. The output section contains a union of -// used registers masks taken from input .reginfo sections and final value -// of the `_gp` symbol. For details: Chapter 4 / "Register Information" at -// ftp://www.linux-mips.org/pub/linux/mips/doc/ABI/mipsabi.pdf -template <class ELFT> -class MipsReginfoInputSection : public InputSectionBase<ELFT> { - typedef typename ELFT::Shdr Elf_Shdr; - -public: - MipsReginfoInputSection(ObjectFile<ELFT> *F, const Elf_Shdr *Hdr); - static bool classof(const InputSectionBase<ELFT> *S); - - const llvm::object::Elf_Mips_RegInfo<ELFT> *Reginfo = nullptr; -}; +template <class ELFT> InputSection<ELFT> InputSection<ELFT>::Discarded; -template <class ELFT> -class MipsOptionsInputSection : public InputSectionBase<ELFT> { - typedef typename ELFT::Shdr Elf_Shdr; - -public: - MipsOptionsInputSection(ObjectFile<ELFT> *F, const Elf_Shdr *Hdr); - static bool classof(const InputSectionBase<ELFT> *S); - - const llvm::object::Elf_Mips_RegInfo<ELFT> *Reginfo = nullptr; -}; +template <class ELFT> std::string toString(const InputSectionBase<ELFT> *); } // namespace elf } // namespace lld diff --git a/ELF/LTO.cpp b/ELF/LTO.cpp index 0e8006a3b32a..a3d6a141a202 100644 --- a/ELF/LTO.cpp +++ b/ELF/LTO.cpp @@ -9,31 +9,30 @@ #include "LTO.h" #include "Config.h" -#include "Driver.h" #include "Error.h" #include "InputFiles.h" #include "Symbols.h" -#include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Analysis/CGSCCPassManager.h" -#include "llvm/Analysis/LoopPassManager.h" -#include "llvm/Analysis/TargetLibraryInfo.h" -#include "llvm/Analysis/TargetTransformInfo.h" -#include "llvm/Bitcode/ReaderWriter.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Twine.h" #include "llvm/CodeGen/CommandFlags.h" -#include "llvm/CodeGen/ParallelCG.h" -#include "llvm/IR/AutoUpgrade.h" -#include "llvm/IR/LegacyPassManager.h" -#include "llvm/IR/PassManager.h" -#include "llvm/IR/Verifier.h" -#include "llvm/LTO/legacy/UpdateCompilerUsed.h" -#include "llvm/Linker/IRMover.h" -#include "llvm/Passes/PassBuilder.h" -#include "llvm/Support/StringSaver.h" -#include "llvm/Support/TargetRegistry.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Transforms/IPO.h" -#include "llvm/Transforms/IPO/PassManagerBuilder.h" -#include "llvm/Transforms/Utils/ModuleUtils.h" +#include "llvm/IR/DiagnosticPrinter.h" +#include "llvm/LTO/Config.h" +#include "llvm/LTO/LTO.h" +#include "llvm/Object/SymbolicFile.h" +#include "llvm/Support/CodeGen.h" +#include "llvm/Support/ELF.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/raw_ostream.h" +#include <algorithm> +#include <cstddef> +#include <memory> +#include <string> +#include <system_error> +#include <vector> using namespace llvm; using namespace llvm::object; @@ -51,275 +50,110 @@ static void saveBuffer(StringRef Buffer, const Twine &Path) { OS << Buffer; } -// This is for use when debugging LTO. -static void saveBCFile(Module &M, const Twine &Path) { - std::error_code EC; - raw_fd_ostream OS(Path.str(), EC, sys::fs::OpenFlags::F_None); - if (EC) - error(EC, "cannot create " + Path); - WriteBitcodeToFile(&M, OS, /* ShouldPreserveUseListOrder */ true); +static void diagnosticHandler(const DiagnosticInfo &DI) { + SmallString<128> ErrStorage; + raw_svector_ostream OS(ErrStorage); + DiagnosticPrinterRawOStream DP(OS); + DI.print(DP); + warn(ErrStorage); } -static void runNewCustomLtoPasses(Module &M, TargetMachine &TM) { - PassBuilder PB(&TM); - - AAManager AA; - - // Parse a custom AA pipeline if asked to. - if (!PB.parseAAPipeline(AA, Config->LtoAAPipeline)) { - error("Unable to parse AA pipeline description: " + Config->LtoAAPipeline); - return; - } - - LoopAnalysisManager LAM; - FunctionAnalysisManager FAM; - CGSCCAnalysisManager CGAM; - ModuleAnalysisManager MAM; - - // Register the AA manager first so that our version is the one used. - FAM.registerPass([&] { return std::move(AA); }); - - // Register all the basic analyses with the managers. - PB.registerModuleAnalyses(MAM); - PB.registerCGSCCAnalyses(CGAM); - PB.registerFunctionAnalyses(FAM); - PB.registerLoopAnalyses(LAM); - PB.crossRegisterProxies(LAM, FAM, CGAM, MAM); - - ModulePassManager MPM; - if (!Config->DisableVerify) - MPM.addPass(VerifierPass()); +static void checkError(Error E) { + handleAllErrors(std::move(E), [&](ErrorInfoBase &EIB) -> Error { + error(EIB.message()); + return Error::success(); + }); +} - // Now, add all the passes we've been requested to. - if (!PB.parsePassPipeline(MPM, Config->LtoNewPmPasses)) { - error("unable to parse pass pipeline description: " + - Config->LtoNewPmPasses); - return; - } +static std::unique_ptr<lto::LTO> createLTO() { + lto::Config Conf; - if (!Config->DisableVerify) - MPM.addPass(VerifierPass()); - MPM.run(M, MAM); -} + // LLD supports the new relocations. + Conf.Options = InitTargetOptionsFromCodeGenFlags(); + Conf.Options.RelaxELFRelocations = true; -static void runOldLtoPasses(Module &M, TargetMachine &TM) { - // Note that the gold plugin has a similar piece of code, so - // it is probably better to move this code to a common place. - legacy::PassManager LtoPasses; - LtoPasses.add(createTargetTransformInfoWrapperPass(TM.getTargetIRAnalysis())); - PassManagerBuilder PMB; - PMB.LibraryInfo = new TargetLibraryInfoImpl(Triple(TM.getTargetTriple())); - PMB.Inliner = createFunctionInliningPass(); - PMB.VerifyInput = PMB.VerifyOutput = !Config->DisableVerify; - PMB.LoopVectorize = true; - PMB.SLPVectorize = true; - PMB.OptLevel = Config->LtoO; - PMB.populateLTOPassManager(LtoPasses); - LtoPasses.run(M); -} + Conf.RelocModel = Config->Pic ? Reloc::PIC_ : Reloc::Static; + Conf.DisableVerify = Config->DisableVerify; + Conf.DiagHandler = diagnosticHandler; + Conf.OptLevel = Config->LTOO; -static void runLTOPasses(Module &M, TargetMachine &TM) { - if (!Config->LtoNewPmPasses.empty()) { - // The user explicitly asked for a set of passes to be run. - // This needs the new PM to work as there's no clean way to - // pass a set of passes to run in the legacy PM. - runNewCustomLtoPasses(M, TM); - if (HasError) - return; - } else { - // Run the 'default' set of LTO passes. This code still uses - // the legacy PM as the new one is not the default. - runOldLtoPasses(M, TM); - } + // Set up a custom pipeline if we've been asked to. + Conf.OptPipeline = Config->LTONewPmPasses; + Conf.AAPipeline = Config->LTOAAPipeline; if (Config->SaveTemps) - saveBCFile(M, Config->OutputFile + ".lto.opt.bc"); + checkError(Conf.addSaveTemps(std::string(Config->OutputFile) + ".", + /*UseInputModulePath*/ true)); + + lto::ThinBackend Backend; + if (Config->ThinLTOJobs != -1u) + Backend = lto::createInProcessThinBackend(Config->ThinLTOJobs); + return llvm::make_unique<lto::LTO>(std::move(Conf), Backend, + Config->LTOPartitions); } -static bool shouldInternalize(const SmallPtrSet<GlobalValue *, 8> &Used, - Symbol *S, GlobalValue *GV) { - if (S->IsUsedInRegularObj || Used.count(GV)) - return false; - return !S->includeInDynsym(); -} +BitcodeCompiler::BitcodeCompiler() : LTOObj(createLTO()) {} -BitcodeCompiler::BitcodeCompiler() - : Combined(new Module("ld-temp.o", Driver->Context)) {} +BitcodeCompiler::~BitcodeCompiler() = default; static void undefine(Symbol *S) { - replaceBody<Undefined>(S, S->body()->getName(), STV_DEFAULT, S->body()->Type, - nullptr); -} - -static void handleUndefinedAsmRefs(const BasicSymbolRef &Sym, GlobalValue *GV, - StringSet<> &AsmUndefinedRefs) { - // GV associated => not an assembly symbol, bail out. - if (GV) - return; - - // This is an undefined reference to a symbol in asm. We put that in - // compiler.used, so that we can preserve it from being dropped from - // the output, without necessarily preventing its internalization. - SmallString<64> Name; - raw_svector_ostream OS(Name); - Sym.printName(OS); - AsmUndefinedRefs.insert(Name.str()); + replaceBody<Undefined>(S, S->body()->getName(), /*IsLocal=*/false, + STV_DEFAULT, S->body()->Type, nullptr); } void BitcodeCompiler::add(BitcodeFile &F) { - std::unique_ptr<IRObjectFile> Obj = std::move(F.Obj); - std::vector<GlobalValue *> Keep; - unsigned BodyIndex = 0; - ArrayRef<Symbol *> Syms = F.getSymbols(); - - Module &M = Obj->getModule(); - if (M.getDataLayoutStr().empty()) - fatal("invalid bitcode file: " + F.getName() + " has no datalayout"); - - // Discard non-compatible debug infos if necessary. - M.materializeMetadata(); - UpgradeDebugInfo(M); - - // If a symbol appears in @llvm.used, the linker is required - // to treat the symbol as there is a reference to the symbol - // that it cannot see. Therefore, we can't internalize. - SmallPtrSet<GlobalValue *, 8> Used; - collectUsedGlobalVariables(M, Used, /* CompilerUsed */ false); - - for (const BasicSymbolRef &Sym : Obj->symbols()) { - uint32_t Flags = Sym.getFlags(); - GlobalValue *GV = Obj->getSymbolGV(Sym.getRawDataRefImpl()); - if (GV && GV->hasAppendingLinkage()) - Keep.push_back(GV); - if (BitcodeFile::shouldSkip(Flags)) - continue; - Symbol *S = Syms[BodyIndex++]; - if (Flags & BasicSymbolRef::SF_Undefined) { - handleUndefinedAsmRefs(Sym, GV, AsmUndefinedRefs); - continue; - } - auto *B = dyn_cast<DefinedBitcode>(S->body()); - if (!B || B->file() != &F) - continue; - - // We collect the set of symbols we want to internalize here - // and change the linkage after the IRMover executed, i.e. after - // we imported the symbols and satisfied undefined references - // to it. We can't just change linkage here because otherwise - // the IRMover will just rename the symbol. - if (GV && shouldInternalize(Used, S, GV)) - InternalizedSyms.insert(GV->getName()); - - // At this point we know that either the combined LTO object will provide a - // definition of a symbol, or we will internalize it. In either case, we - // need to undefine the symbol. In the former case, the real definition - // needs to be able to replace the original definition without conflicting. - // In the latter case, we need to allow the combined LTO object to provide a - // definition with the same name, for example when doing parallel codegen. - undefine(S); - - if (!GV) - // Module asm symbol. - continue; - - switch (GV->getLinkage()) { - default: - break; - case GlobalValue::LinkOnceAnyLinkage: - GV->setLinkage(GlobalValue::WeakAnyLinkage); - break; - case GlobalValue::LinkOnceODRLinkage: - GV->setLinkage(GlobalValue::WeakODRLinkage); - break; - } - - Keep.push_back(GV); - } - - IRMover Mover(*Combined); - if (Error E = Mover.move(Obj->takeModule(), Keep, - [](GlobalValue &, IRMover::ValueAdder) {})) { - handleAllErrors(std::move(E), [&](const ErrorInfoBase &EIB) { - fatal("failed to link module " + F.getName() + ": " + EIB.message()); - }); + lto::InputFile &Obj = *F.Obj; + unsigned SymNum = 0; + std::vector<Symbol *> Syms = F.getSymbols(); + std::vector<lto::SymbolResolution> Resols(Syms.size()); + + // Provide a resolution to the LTO API for each symbol. + for (const lto::InputFile::Symbol &ObjSym : Obj.symbols()) { + Symbol *Sym = Syms[SymNum]; + lto::SymbolResolution &R = Resols[SymNum]; + ++SymNum; + SymbolBody *B = Sym->body(); + + // Ideally we shouldn't check for SF_Undefined but currently IRObjectFile + // reports two symbols for module ASM defined. Without this check, lld + // flags an undefined in IR with a definition in ASM as prevailing. + // Once IRObjectFile is fixed to report only one symbol this hack can + // be removed. + R.Prevailing = + !(ObjSym.getFlags() & object::BasicSymbolRef::SF_Undefined) && + B->File == &F; + + R.VisibleToRegularObj = + Sym->IsUsedInRegularObj || (R.Prevailing && Sym->includeInDynsym()); + if (R.Prevailing) + undefine(Sym); } -} - -static void internalize(GlobalValue &GV) { - assert(!GV.hasLocalLinkage() && - "Trying to internalize a symbol with local linkage!"); - GV.setLinkage(GlobalValue::InternalLinkage); -} - -std::vector<std::unique_ptr<InputFile>> BitcodeCompiler::runSplitCodegen( - const std::function<std::unique_ptr<TargetMachine>()> &TMFactory) { - unsigned NumThreads = Config->LtoJobs; - OwningData.resize(NumThreads); - - std::list<raw_svector_ostream> OSs; - std::vector<raw_pwrite_stream *> OSPtrs; - for (SmallString<0> &Obj : OwningData) { - OSs.emplace_back(Obj); - OSPtrs.push_back(&OSs.back()); - } - - splitCodeGen(std::move(Combined), OSPtrs, {}, TMFactory); - - std::vector<std::unique_ptr<InputFile>> ObjFiles; - for (SmallString<0> &Obj : OwningData) - ObjFiles.push_back(createObjectFile( - MemoryBufferRef(Obj, "LLD-INTERNAL-combined-lto-object"))); - - // If -save-temps is given, we need to save temporary objects to files. - // This is for debugging. - if (Config->SaveTemps) { - if (NumThreads == 1) { - saveBuffer(OwningData[0], Config->OutputFile + ".lto.o"); - } else { - for (unsigned I = 0; I < NumThreads; ++I) - saveBuffer(OwningData[I], Config->OutputFile + Twine(I) + ".lto.o"); - } - } - - return ObjFiles; + checkError(LTOObj->add(std::move(F.Obj), Resols)); } // Merge all the bitcode files we have seen, codegen the result -// and return the resulting ObjectFile. -std::vector<std::unique_ptr<InputFile>> BitcodeCompiler::compile() { - for (const auto &Name : InternalizedSyms) { - GlobalValue *GV = Combined->getNamedValue(Name.first()); - assert(GV); - internalize(*GV); +// and return the resulting ObjectFile(s). +std::vector<InputFile *> BitcodeCompiler::compile() { + std::vector<InputFile *> Ret; + unsigned MaxTasks = LTOObj->getMaxTasks(); + Buff.resize(MaxTasks); + + checkError(LTOObj->run([&](size_t Task) { + return llvm::make_unique<lto::NativeObjectStream>( + llvm::make_unique<raw_svector_ostream>(Buff[Task])); + })); + + for (unsigned I = 0; I != MaxTasks; ++I) { + if (Buff[I].empty()) + continue; + if (Config->SaveTemps) { + if (MaxTasks == 1) + saveBuffer(Buff[I], Config->OutputFile + ".lto.o"); + else + saveBuffer(Buff[I], Config->OutputFile + Twine(I) + ".lto.o"); + } + InputFile *Obj = createObjectFile(MemoryBufferRef(Buff[I], "lto.tmp")); + Ret.push_back(Obj); } - - std::string TheTriple = Combined->getTargetTriple(); - std::string Msg; - const Target *T = TargetRegistry::lookupTarget(TheTriple, Msg); - if (!T) - fatal("target not found: " + Msg); - - // LLD supports the new relocations. - TargetOptions Options = InitTargetOptionsFromCodeGenFlags(); - Options.RelaxELFRelocations = true; - - auto CreateTargetMachine = [&]() { - return std::unique_ptr<TargetMachine>(T->createTargetMachine( - TheTriple, "", "", Options, Config->Pic ? Reloc::PIC_ : Reloc::Static)); - }; - - std::unique_ptr<TargetMachine> TM = CreateTargetMachine(); - - // Update llvm.compiler.used so that optimizations won't strip - // off AsmUndefinedReferences. - updateCompilerUsed(*Combined, *TM, AsmUndefinedRefs); - - if (Config->SaveTemps) - saveBCFile(*Combined, Config->OutputFile + ".lto.bc"); - - runLTOPasses(*Combined, *TM); - if (HasError) - return {}; - - return runSplitCodegen(CreateTargetMachine); + return Ret; } diff --git a/ELF/LTO.h b/ELF/LTO.h index 81dffb6004b2..b3d734f2d381 100644 --- a/ELF/LTO.h +++ b/ELF/LTO.h @@ -23,9 +23,14 @@ #include "lld/Core/LLVM.h" #include "llvm/ADT/SmallString.h" -#include "llvm/ADT/StringSet.h" -#include "llvm/IR/Module.h" -#include "llvm/Linker/IRMover.h" +#include <memory> +#include <vector> + +namespace llvm { +namespace lto { +class LTO; +} +} namespace lld { namespace elf { @@ -36,17 +41,14 @@ class InputFile; class BitcodeCompiler { public: BitcodeCompiler(); + ~BitcodeCompiler(); + void add(BitcodeFile &F); - std::vector<std::unique_ptr<InputFile>> compile(); + std::vector<InputFile *> compile(); private: - std::vector<std::unique_ptr<InputFile>> runSplitCodegen( - const std::function<std::unique_ptr<llvm::TargetMachine>()> &TMFactory); - - std::unique_ptr<llvm::Module> Combined; - std::vector<SmallString<0>> OwningData; - llvm::StringSet<> InternalizedSyms; - llvm::StringSet<> AsmUndefinedRefs; + std::unique_ptr<llvm::lto::LTO> LTOObj; + std::vector<SmallString<0>> Buff; }; } } diff --git a/ELF/LinkerScript.cpp b/ELF/LinkerScript.cpp index 61abdc185e11..5057b57a4a54 100644 --- a/ELF/LinkerScript.cpp +++ b/ELF/LinkerScript.cpp @@ -8,8 +8,6 @@ //===----------------------------------------------------------------------===// // // This file contains the parser/evaluator of the linker script. -// It does not construct an AST but consume linker script directives directly. -// Results are written to Driver or Config object. // //===----------------------------------------------------------------------===// @@ -17,311 +15,971 @@ #include "Config.h" #include "Driver.h" #include "InputSection.h" +#include "Memory.h" #include "OutputSections.h" #include "ScriptParser.h" #include "Strings.h" -#include "Symbols.h" #include "SymbolTable.h" +#include "Symbols.h" +#include "SyntheticSections.h" #include "Target.h" +#include "Writer.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringSwitch.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/ELF.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FileSystem.h" -#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/MathExtras.h" #include "llvm/Support/Path.h" -#include "llvm/Support/StringSaver.h" +#include <algorithm> +#include <cassert> +#include <cstddef> +#include <cstdint> +#include <iterator> +#include <limits> +#include <memory> +#include <string> +#include <tuple> +#include <vector> using namespace llvm; using namespace llvm::ELF; using namespace llvm::object; +using namespace llvm::support::endian; using namespace lld; using namespace lld::elf; +LinkerScriptBase *elf::ScriptBase; ScriptConfiguration *elf::ScriptConfig; -// This is an operator-precedence parser to parse and evaluate -// a linker script expression. For each linker script arithmetic -// expression (e.g. ". = . + 0x1000"), a new instance of ExprParser -// is created and ran. -namespace { -class ExprParser : public ScriptParserBase { -public: - ExprParser(std::vector<StringRef> &Tokens, uint64_t Dot) - : ScriptParserBase(Tokens), Dot(Dot) {} +template <class ELFT> static void addRegular(SymbolAssignment *Cmd) { + uint8_t Visibility = Cmd->Hidden ? STV_HIDDEN : STV_DEFAULT; + Symbol *Sym = Symtab<ELFT>::X->addRegular(Cmd->Name, Visibility, STT_NOTYPE, + 0, 0, STB_GLOBAL, nullptr, nullptr); + Cmd->Sym = Sym->body(); - uint64_t run(); + // If we have no SECTIONS then we don't have '.' and don't call + // assignAddresses(). We calculate symbol value immediately in this case. + if (!ScriptConfig->HasSections) + cast< |