diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2016-07-23 20:48:50 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2016-07-23 20:48:50 +0000 |
commit | 1c98619801a5705c688e683be3ef9d70169a0686 (patch) | |
tree | 8422105cd1a94c368315f2db16b9ac746cf7c000 | |
parent | f4f3ce4613680903220815690ad79fc7ba0a2e26 (diff) | |
download | src-vendor/lld/lld-release_39-r276489.tar.gz src-vendor/lld/lld-release_39-r276489.zip |
Vendor import of lld release_39 branch r276489:vendor/lld/lld-release_39-r276489
1685 files changed, 30937 insertions, 78902 deletions
diff --git a/.arcconfig b/.arcconfig index 787b339a9f20..ebf4a4a6f8b7 100644 --- a/.arcconfig +++ b/.arcconfig @@ -1,4 +1,4 @@ { "project_id" : "lld", - "conduit_uri" : "http://reviews.llvm.org/" + "conduit_uri" : "https://reviews.llvm.org/" } diff --git a/CMakeLists.txt b/CMakeLists.txt index 7458de08fc16..46ca748f8fac 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -94,7 +94,7 @@ endmacro(add_lld_library) add_subdirectory(lib) -add_subdirectory(tools) +add_subdirectory(tools/lld) if (LLVM_INCLUDE_TESTS) add_subdirectory(test) diff --git a/COFF/CMakeLists.txt b/COFF/CMakeLists.txt index 3f31ba9ba1fb..3319f392efe1 100644 --- a/COFF/CMakeLists.txt +++ b/COFF/CMakeLists.txt @@ -10,6 +10,7 @@ add_lld_library(lldCOFF Error.cpp ICF.cpp InputFiles.cpp + Librarian.cpp MarkLive.cpp ModuleDef.cpp PDB.cpp @@ -28,6 +29,8 @@ add_lld_library(lldCOFF Target Option Support + + LINK_LIBS ${PTHREAD_LIB} ) add_dependencies(lldCOFF COFFOptionsTableGen) diff --git a/COFF/Chunks.cpp b/COFF/Chunks.cpp index f9f768d69866..1c1b18176aa2 100644 --- a/COFF/Chunks.cpp +++ b/COFF/Chunks.cpp @@ -34,10 +34,7 @@ SectionChunk::SectionChunk(ObjectFile *F, const coff_section *H) // Initialize SectionName. File->getCOFFObj()->getSectionName(Header, SectionName); - // Bit [20:24] contains section alignment. Both 0 and 1 mean alignment 1. - unsigned Shift = (Header->Characteristics >> 20) & 0xF; - if (Shift > 0) - Align = uint32_t(1) << (Shift - 1); + Align = Header->getAlignment(); // Only COMDAT sections are subject of dead-stripping. Live = !isCOMDAT(); @@ -64,7 +61,7 @@ void SectionChunk::applyRelX64(uint8_t *Off, uint16_t Type, Defined *Sym, case IMAGE_REL_AMD64_SECTION: add16(Off, Sym->getSectionIndex()); break; case IMAGE_REL_AMD64_SECREL: add32(Off, Sym->getSecrel()); break; default: - error("Unsupported relocation type"); + fatal("unsupported relocation type"); } } @@ -79,7 +76,7 @@ void SectionChunk::applyRelX86(uint8_t *Off, uint16_t Type, Defined *Sym, case IMAGE_REL_I386_SECTION: add16(Off, Sym->getSectionIndex()); break; case IMAGE_REL_I386_SECREL: add32(Off, Sym->getSecrel()); break; default: - error("Unsupported relocation type"); + fatal("unsupported relocation type"); } } @@ -123,7 +120,7 @@ void SectionChunk::applyRelARM(uint8_t *Off, uint16_t Type, Defined *Sym, case IMAGE_REL_ARM_BRANCH24T: applyBranch24T(Off, S - P - 4); break; case IMAGE_REL_ARM_BLX23T: applyBranch24T(Off, S - P - 4); break; default: - error("Unsupported relocation type"); + fatal("unsupported relocation type"); } } @@ -310,7 +307,7 @@ void SEHTableChunk::writeTo(uint8_t *Buf) const { BaserelChunk::BaserelChunk(uint32_t Page, Baserel *Begin, Baserel *End) { // Block header consists of 4 byte page RVA and 4 byte block size. // Each entry is 2 byte. Last entry may be padding. - Data.resize(align((End - Begin) * 2 + 8, 4)); + Data.resize(alignTo((End - Begin) * 2 + 8, 4)); uint8_t *P = Data.data(); write32le(P, Page); write32le(P + 4, Data.size()); diff --git a/COFF/Chunks.h b/COFF/Chunks.h index 274135516eb9..cd0e2e69ef5d 100644 --- a/COFF/Chunks.h +++ b/COFF/Chunks.h @@ -18,6 +18,7 @@ #include "llvm/ADT/iterator_range.h" #include "llvm/Object/COFF.h" #include <atomic> +#include <utility> #include <vector> namespace lld { @@ -138,6 +139,7 @@ public: SectionChunk(ObjectFile *File, const coff_section *Header); static bool classof(const Chunk *C) { return C->kind() == SectionKind; } size_t getSize() const override { return Header->SizeOfRawData; } + ArrayRef<uint8_t> getContents() const; void writeTo(uint8_t *Buf) const override; bool hasData() const override; uint32_t getPermissions() const override; @@ -186,8 +188,6 @@ public: uint32_t Checksum = 0; private: - ArrayRef<uint8_t> getContents() const; - // A file this chunk was created from. ObjectFile *File; @@ -295,7 +295,7 @@ private: // functions. x86-only. class SEHTableChunk : public Chunk { public: - explicit SEHTableChunk(std::set<Defined *> S) : Syms(S) {} + explicit SEHTableChunk(std::set<Defined *> S) : Syms(std::move(S)) {} size_t getSize() const override { return Syms.size() * 4; } void writeTo(uint8_t *Buf) const override; @@ -326,10 +326,6 @@ public: uint8_t Type; }; -inline uint64_t align(uint64_t Value, uint64_t Align) { - return llvm::RoundUpToAlignment(Value, Align); -} - } // namespace coff } // namespace lld diff --git a/COFF/Config.h b/COFF/Config.h index 9cfccadba5fa..a5472e937fa1 100644 --- a/COFF/Config.h +++ b/COFF/Config.h @@ -106,11 +106,15 @@ struct Configuration { // Used for /merge:from=to (e.g. /merge:.rdata=.text) std::map<StringRef, StringRef> Merge; + // Used for /section=.name,{DEKPRSW} to set section attributes. + std::map<StringRef, uint32_t> Section; + // Options for manifest files. ManifestKind Manifest = SideBySide; int ManifestID = 1; StringRef ManifestDependency; bool ManifestUAC = true; + std::vector<std::string> ManifestInput; StringRef ManifestLevel = "'asInvoker'"; StringRef ManifestUIAccess = "'false'"; StringRef ManifestFile; diff --git a/COFF/DLL.cpp b/COFF/DLL.cpp index 8f3383d75c7b..9ac370c11d59 100644 --- a/COFF/DLL.cpp +++ b/COFF/DLL.cpp @@ -45,7 +45,7 @@ public: size_t getSize() const override { // Starts with 2 byte Hint field, followed by a null-terminated string, // ends with 0 or 1 byte padding. - return align(Name.size() + 3, 2); + return alignTo(Name.size() + 3, 2); } void writeTo(uint8_t *Buf) const override { diff --git a/COFF/Driver.cpp b/COFF/Driver.cpp index 4cacf0ff552a..bb6a60e4fc4c 100644 --- a/COFF/Driver.cpp +++ b/COFF/Driver.cpp @@ -14,6 +14,7 @@ #include "SymbolTable.h" #include "Symbols.h" #include "Writer.h" +#include "lld/Driver/Driver.h" #include "llvm/ADT/Optional.h" #include "llvm/LibDriver/LibDriver.h" #include "llvm/Option/Arg.h" @@ -40,27 +41,28 @@ namespace coff { Configuration *Config; LinkerDriver *Driver; -void link(llvm::ArrayRef<const char *> Args) { +bool link(llvm::ArrayRef<const char *> Args) { Configuration C; LinkerDriver D; Config = &C; Driver = &D; - return Driver->link(Args); + Driver->link(Args); + return true; } -// Drop directory components and replace extension with ".exe". +// Drop directory components and replace extension with ".exe" or ".dll". static std::string getOutputPath(StringRef Path) { auto P = Path.find_last_of("\\/"); StringRef S = (P == StringRef::npos) ? Path : Path.substr(P + 1); - return (S.substr(0, S.rfind('.')) + ".exe").str(); + const char* E = Config->DLL ? ".dll" : ".exe"; + return (S.substr(0, S.rfind('.')) + E).str(); } // Opens a file. Path has to be resolved already. // Newly created memory buffers are owned by this driver. MemoryBufferRef LinkerDriver::openFile(StringRef Path) { - auto MBOrErr = MemoryBuffer::getFile(Path); - error(MBOrErr, Twine("Could not open ") + Path); - std::unique_ptr<MemoryBuffer> &MB = *MBOrErr; + std::unique_ptr<MemoryBuffer> MB = + check(MemoryBuffer::getFile(Path), "could not open " + Path); MemoryBufferRef MBRef = MB->getMemBufferRef(); OwningMBs.push_back(std::move(MB)); // take ownership return MBRef; @@ -116,12 +118,16 @@ void LinkerDriver::parseDirectives(StringRef S) { case OPT_nodefaultlib: Config->NoDefaultLibs.insert(doFindLib(Arg->getValue())); break; + case OPT_section: + parseSection(Arg->getValue()); + break; case OPT_editandcontinue: + case OPT_fastfail: case OPT_guardsym: case OPT_throwingnew: break; default: - error(Twine(Arg->getSpelling()) + " is not allowed in .drectve"); + fatal(Arg->getSpelling() + " is not allowed in .drectve"); } } } @@ -246,7 +252,7 @@ void LinkerDriver::link(llvm::ArrayRef<const char *> ArgsArr) { // We call our own implementation of lib.exe that understands bitcode files. if (ArgsArr.size() > 1 && StringRef(ArgsArr[1]).equals_lower("/lib")) { if (llvm::libDriverMain(ArgsArr.slice(1)) != 0) - error("lib failed"); + fatal("lib failed"); return; } @@ -268,7 +274,7 @@ void LinkerDriver::link(llvm::ArrayRef<const char *> ArgsArr) { } if (Args.filtered_begin(OPT_INPUT) == Args.filtered_end()) - error("no input files."); + fatal("no input files"); // Construct search path list. SearchPaths.push_back(""); @@ -295,7 +301,7 @@ void LinkerDriver::link(llvm::ArrayRef<const char *> ArgsArr) { // Handle /noentry if (Args.hasArg(OPT_noentry)) { if (!Args.hasArg(OPT_dll)) - error("/noentry must be specified with /dll"); + fatal("/noentry must be specified with /dll"); Config->NoEntry = true; } @@ -308,7 +314,7 @@ void LinkerDriver::link(llvm::ArrayRef<const char *> ArgsArr) { // Handle /fixed if (Args.hasArg(OPT_fixed)) { if (Args.hasArg(OPT_dynamicbase)) - error("/fixed must not be specified with /dynamicbase"); + fatal("/fixed must not be specified with /dynamicbase"); Config->Relocatable = false; Config->DynamicBase = false; } @@ -382,17 +388,17 @@ void LinkerDriver::link(llvm::ArrayRef<const char *> ArgsArr) { StringRef OptLevel = StringRef(S).substr(7); if (OptLevel.getAsInteger(10, Config->LTOOptLevel) || Config->LTOOptLevel > 3) - error("/opt:lldlto: invalid optimization level: " + OptLevel); + fatal("/opt:lldlto: invalid optimization level: " + OptLevel); continue; } if (StringRef(S).startswith("lldltojobs=")) { StringRef Jobs = StringRef(S).substr(11); if (Jobs.getAsInteger(10, Config->LTOJobs) || Config->LTOJobs == 0) - error("/opt:lldltojobs: invalid job count: " + Jobs); + fatal("/opt:lldltojobs: invalid job count: " + Jobs); continue; } if (S != "ref" && S != "lbr" && S != "nolbr") - error(Twine("/opt: unknown option: ") + S); + fatal("/opt: unknown option: " + S); } } @@ -404,6 +410,10 @@ void LinkerDriver::link(llvm::ArrayRef<const char *> ArgsArr) { for (auto *Arg : Args.filtered(OPT_merge)) parseMerge(Arg->getValue()); + // Handle /section + for (auto *Arg : Args.filtered(OPT_section)) + parseSection(Arg->getValue()); + // Handle /manifest if (auto *Arg = Args.getLastArg(OPT_manifest_colon)) parseManifest(Arg->getValue()); @@ -420,6 +430,10 @@ void LinkerDriver::link(llvm::ArrayRef<const char *> ArgsArr) { if (auto *Arg = Args.getLastArg(OPT_manifestfile)) Config->ManifestFile = Arg->getValue(); + // Handle /manifestinput + for (auto *Arg : Args.filtered(OPT_manifestinput)) + Config->ManifestInput.push_back(Arg->getValue()); + // Handle miscellaneous boolean flags. if (Args.hasArg(OPT_allowbind_no)) Config->AllowBind = false; @@ -485,7 +499,7 @@ void LinkerDriver::link(llvm::ArrayRef<const char *> ArgsArr) { continue; } if (Config->Machine != MT) - error(Twine(File->getShortName()) + ": machine type " + machineToStr(MT) + + fatal(File->getShortName() + ": machine type " + machineToStr(MT) + " conflicts with " + machineToStr(Config->Machine)); } if (Config->Machine == IMAGE_FILE_MACHINE_UNKNOWN) { @@ -520,7 +534,7 @@ void LinkerDriver::link(llvm::ArrayRef<const char *> ArgsArr) { // infer that from user-defined entry name. StringRef S = findDefaultEntry(); if (S.empty()) - error("entry point must be defined"); + fatal("entry point must be defined"); Config->Entry = addUndefined(S); if (Config->Verbose) llvm::outs() << "Entry name inferred: " << S << "\n"; @@ -627,14 +641,14 @@ void LinkerDriver::link(llvm::ArrayRef<const char *> ArgsArr) { if (Config->Subsystem == IMAGE_SUBSYSTEM_UNKNOWN) { Config->Subsystem = inferSubsystem(); if (Config->Subsystem == IMAGE_SUBSYSTEM_UNKNOWN) - error("subsystem must be defined"); + fatal("subsystem must be defined"); } // Handle /safeseh. if (Args.hasArg(OPT_safeseh)) for (ObjectFile *File : Symtab.ObjectFiles) if (!File->SEHCompat) - error("/safeseh: " + File->getName() + " is not compatible with SEH"); + fatal("/safeseh: " + File->getName() + " is not compatible with SEH"); // Windows specific -- when we are creating a .dll file, we also // need to create a .lib file. @@ -668,7 +682,8 @@ void LinkerDriver::link(llvm::ArrayRef<const char *> ArgsArr) { if (auto *Arg = Args.getLastArg(OPT_lldmap)) { std::error_code EC; llvm::raw_fd_ostream Out(Arg->getValue(), EC, OpenFlags::F_Text); - error(EC, "Could not create the symbol map"); + if (EC) + fatal(EC, "could not create the symbol map"); Symtab.printMap(Out); } // Call exit to avoid calling destructors. diff --git a/COFF/Driver.h b/COFF/Driver.h index e50da20cbb04..23969ee802fb 100644 --- a/COFF/Driver.h +++ b/COFF/Driver.h @@ -34,9 +34,6 @@ using llvm::COFF::WindowsSubsystem; using llvm::Optional; class InputFile; -// Entry point of the COFF linker. -void link(llvm::ArrayRef<const char *> Args); - // Implemented in MarkLive.cpp. void markLive(const std::vector<Chunk *> &Chunks); @@ -136,6 +133,7 @@ void parseSubsystem(StringRef Arg, WindowsSubsystem *Sys, uint32_t *Major, void parseAlternateName(StringRef); void parseMerge(StringRef); +void parseSection(StringRef); // Parses a string in the form of "EMBED[,=<integer>]|NO". void parseManifest(StringRef Arg); @@ -163,7 +161,6 @@ void checkFailIfMismatch(StringRef Arg); std::unique_ptr<MemoryBuffer> convertResToCOFF(const std::vector<MemoryBufferRef> &MBs); -void touchFile(StringRef Path); void createPDB(StringRef Path); // Create enum with OPT_xxx values for each option in Options.td diff --git a/COFF/DriverUtils.cpp b/COFF/DriverUtils.cpp index 014fee7fefd7..5d7dc2bc65af 100644 --- a/COFF/DriverUtils.cpp +++ b/COFF/DriverUtils.cpp @@ -19,15 +19,12 @@ #include "Symbols.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/StringSwitch.h" -#include "llvm/Object/Archive.h" -#include "llvm/Object/ArchiveWriter.h" #include "llvm/Object/COFF.h" #include "llvm/Option/Arg.h" #include "llvm/Option/ArgList.h" #include "llvm/Option/Option.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/FileUtilities.h" -#include "llvm/Support/Path.h" #include "llvm/Support/Process.h" #include "llvm/Support/Program.h" #include "llvm/Support/raw_ostream.h" @@ -53,7 +50,8 @@ public: void run() { ErrorOr<std::string> ExeOrErr = llvm::sys::findProgramByName(Prog); - error(ExeOrErr, Twine("unable to find ") + Prog + " in PATH: "); + if (auto EC = ExeOrErr.getError()) + fatal(EC, "unable to find " + Prog + " in PATH: "); const char *Exe = Saver.save(*ExeOrErr); Args.insert(Args.begin(), Exe); Args.push_back(nullptr); @@ -61,7 +59,7 @@ public: for (const char *S : Args) if (S) llvm::errs() << S << " "; - error("failed"); + fatal("ExecuteAndWait failed"); } } @@ -85,7 +83,7 @@ MachineTypes getMachineType(StringRef S) { .Default(IMAGE_FILE_MACHINE_UNKNOWN); if (MT != IMAGE_FILE_MACHINE_UNKNOWN) return MT; - error(Twine("unknown /machine argument: ") + S); + fatal("unknown /machine argument: " + S); } StringRef machineToStr(MachineTypes MT) { @@ -106,9 +104,9 @@ void parseNumbers(StringRef Arg, uint64_t *Addr, uint64_t *Size) { StringRef S1, S2; std::tie(S1, S2) = Arg.split(','); if (S1.getAsInteger(0, *Addr)) - error(Twine("invalid number: ") + S1); + fatal("invalid number: " + S1); if (Size && !S2.empty() && S2.getAsInteger(0, *Size)) - error(Twine("invalid number: ") + S2); + fatal("invalid number: " + S2); } // Parses a string in the form of "<integer>[.<integer>]". @@ -117,10 +115,10 @@ void parseVersion(StringRef Arg, uint32_t *Major, uint32_t *Minor) { StringRef S1, S2; std::tie(S1, S2) = Arg.split('.'); if (S1.getAsInteger(0, *Major)) - error(Twine("invalid number: ") + S1); + fatal("invalid number: " + S1); *Minor = 0; if (!S2.empty() && S2.getAsInteger(0, *Minor)) - error(Twine("invalid number: ") + S2); + fatal("invalid number: " + S2); } // Parses a string in the form of "<subsystem>[,<integer>[.<integer>]]". @@ -140,7 +138,7 @@ void parseSubsystem(StringRef Arg, WindowsSubsystem *Sys, uint32_t *Major, .Case("windows", IMAGE_SUBSYSTEM_WINDOWS_GUI) .Default(IMAGE_SUBSYSTEM_UNKNOWN); if (*Sys == IMAGE_SUBSYSTEM_UNKNOWN) - error(Twine("unknown subsystem: ") + SysStr); + fatal("unknown subsystem: " + SysStr); if (!Ver.empty()) parseVersion(Ver, Major, Minor); } @@ -151,10 +149,10 @@ void parseAlternateName(StringRef S) { StringRef From, To; std::tie(From, To) = S.split('='); if (From.empty() || To.empty()) - error(Twine("/alternatename: invalid argument: ") + S); + fatal("/alternatename: invalid argument: " + S); auto It = Config->AlternateNames.find(From); if (It != Config->AlternateNames.end() && It->second != To) - error(Twine("/alternatename: conflicts: ") + S); + fatal("/alternatename: conflicts: " + S); Config->AlternateNames.insert(It, std::make_pair(From, To)); } @@ -164,7 +162,7 @@ void parseMerge(StringRef S) { StringRef From, To; std::tie(From, To) = S.split('='); if (From.empty() || To.empty()) - error(Twine("/merge: invalid argument: ") + S); + fatal("/merge: invalid argument: " + S); auto Pair = Config->Merge.insert(std::make_pair(From, To)); bool Inserted = Pair.second; if (!Inserted) { @@ -175,6 +173,47 @@ void parseMerge(StringRef S) { } } +static uint32_t parseSectionAttributes(StringRef S) { + uint32_t Ret = 0; + for (char C : S.lower()) { + switch (C) { + case 'd': + Ret |= IMAGE_SCN_MEM_DISCARDABLE; + break; + case 'e': + Ret |= IMAGE_SCN_MEM_EXECUTE; + break; + case 'k': + Ret |= IMAGE_SCN_MEM_NOT_CACHED; + break; + case 'p': + Ret |= IMAGE_SCN_MEM_NOT_PAGED; + break; + case 'r': + Ret |= IMAGE_SCN_MEM_READ; + break; + case 's': + Ret |= IMAGE_SCN_MEM_SHARED; + break; + case 'w': + Ret |= IMAGE_SCN_MEM_WRITE; + break; + default: + fatal("/section: invalid argument: " + S); + } + } + return Ret; +} + +// Parses /section option argument. +void parseSection(StringRef S) { + StringRef Name, Attrs; + std::tie(Name, Attrs) = S.split(','); + if (Name.empty() || Attrs.empty()) + fatal("/section: invalid argument: " + S); + Config->Section[Name] = parseSectionAttributes(Attrs); +} + // Parses a string in the form of "EMBED[,=<integer>]|NO". // Results are directly written to Config. void parseManifest(StringRef Arg) { @@ -183,16 +222,16 @@ void parseManifest(StringRef Arg) { return; } if (!Arg.startswith_lower("embed")) - error(Twine("Invalid option ") + Arg); + fatal("invalid option " + Arg); Config->Manifest = Configuration::Embed; Arg = Arg.substr(strlen("embed")); if (Arg.empty()) return; if (!Arg.startswith_lower(",id=")) - error(Twine("Invalid option ") + Arg); + fatal("invalid option " + Arg); Arg = Arg.substr(strlen(",id=")); if (Arg.getAsInteger(0, Config->ManifestID)) - error(Twine("Invalid option ") + Arg); + fatal("invalid option " + Arg); } // Parses a string in the form of "level=<string>|uiAccess=<string>|NO". @@ -216,7 +255,7 @@ void parseManifestUAC(StringRef Arg) { std::tie(Config->ManifestUIAccess, Arg) = Arg.split(" "); continue; } - error(Twine("Invalid option ") + Arg); + fatal("invalid option " + Arg); } } @@ -240,10 +279,19 @@ static void quoteAndPrint(raw_ostream &Out, StringRef S) { } } -// Create a manifest file contents. -static std::string createManifestXml() { - std::string S; - llvm::raw_string_ostream OS(S); +// Create the default manifest file as a temporary file. +static std::string createDefaultXml() { + // Create a temporary file. + SmallString<128> Path; + if (auto EC = sys::fs::createTemporaryFile("tmp", "manifest", Path)) + fatal(EC, "cannot create a temporary file"); + + // Open the temporary file for writing. + std::error_code EC; + llvm::raw_fd_ostream OS(Path, EC, sys::fs::F_Text); + if (EC) + fatal(EC, "failed to open " + Path); + // Emit the XML. Note that we do *not* verify that the XML attributes are // syntactically correct. This is intentional for link.exe compatibility. OS << "<?xml version=\"1.0\" standalone=\"yes\"?>\n" @@ -267,21 +315,57 @@ static std::string createManifestXml() { } } OS << "</assembly>\n"; - OS.flush(); - return S; + OS.close(); + return StringRef(Path); +} + +static std::string readFile(StringRef Path) { + std::unique_ptr<MemoryBuffer> MB = + check(MemoryBuffer::getFile(Path), "could not open " + Path); + std::unique_ptr<MemoryBuffer> Buf(std::move(MB)); + return Buf->getBuffer(); +} + +static std::string createManifestXml() { + // Create the default manifest file. + std::string Path1 = createDefaultXml(); + if (Config->ManifestInput.empty()) + return readFile(Path1); + + // If manifest files are supplied by the user using /MANIFESTINPUT + // option, we need to merge them with the default manifest. + SmallString<128> Path2; + if (auto EC = sys::fs::createTemporaryFile("tmp", "manifest", Path2)) + fatal(EC, "cannot create a temporary file"); + FileRemover Remover1(Path1); + FileRemover Remover2(Path2); + + Executor E("mt.exe"); + E.add("/manifest"); + E.add(Path1); + for (StringRef Filename : Config->ManifestInput) { + E.add("/manifest"); + E.add(Filename); + } + E.add("/nologo"); + E.add("/out:" + StringRef(Path2)); + E.run(); + return readFile(Path2); } // Create a resource file containing a manifest XML. std::unique_ptr<MemoryBuffer> createManifestRes() { // Create a temporary file for the resource script file. SmallString<128> RCPath; - std::error_code EC = sys::fs::createTemporaryFile("tmp", "rc", RCPath); - error(EC, "cannot create a temporary file"); + if (auto EC = sys::fs::createTemporaryFile("tmp", "rc", RCPath)) + fatal(EC, "cannot create a temporary file"); FileRemover RCRemover(RCPath); // Open the temporary file for writing. + std::error_code EC; llvm::raw_fd_ostream Out(RCPath, EC, sys::fs::F_Text); - error(EC, Twine("failed to open ") + RCPath); + if (EC) + fatal(EC, "failed to open " + RCPath); // Write resource script to the RC file. Out << "#define LANG_ENGLISH 9\n" @@ -296,8 +380,8 @@ std::unique_ptr<MemoryBuffer> createManifestRes() { // Create output resource file. SmallString<128> ResPath; - EC = sys::fs::createTemporaryFile("tmp", "res", ResPath); - error(EC, "cannot create a temporary file"); + if (auto EC = sys::fs::createTemporaryFile("tmp", "res", ResPath)) + fatal(EC, "cannot create a temporary file"); Executor E("rc.exe"); E.add("/fo"); @@ -305,18 +389,17 @@ std::unique_ptr<MemoryBuffer> createManifestRes() { E.add("/nologo"); E.add(RCPath.str()); E.run(); - ErrorOr<std::unique_ptr<MemoryBuffer>> Ret = MemoryBuffer::getFile(ResPath); - error(Ret, Twine("Could not open ") + ResPath); - return std::move(*Ret); + return check(MemoryBuffer::getFile(ResPath), "could not open " + ResPath); } void createSideBySideManifest() { std::string Path = Config->ManifestFile; if (Path == "") - Path = (Twine(Config->OutputFile) + ".manifest").str(); + Path = Config->OutputFile + ".manifest"; std::error_code EC; llvm::raw_fd_ostream Out(Path, EC, llvm::sys::fs::F_Text); - error(EC, "failed to create manifest"); + if (EC) + fatal(EC, "failed to create manifest"); Out << createManifestXml(); } @@ -380,7 +463,7 @@ Export parseExport(StringRef Arg) { return E; err: - error(Twine("invalid /export: ") + Arg); + fatal("invalid /export: " + Arg); } static StringRef undecorate(StringRef Sym) { @@ -398,7 +481,7 @@ void fixupExports() { if (E.Ordinal == 0) continue; if (!Ords.insert(E.Ordinal).second) - error("duplicate export ordinal: " + E.Name); + fatal("duplicate export ordinal: " + E.Name); } for (Export &E : Config->Exports) { @@ -459,11 +542,11 @@ void checkFailIfMismatch(StringRef Arg) { StringRef K, V; std::tie(K, V) = Arg.split('='); if (K.empty() || V.empty()) - error(Twine("/failifmismatch: invalid argument: ") + Arg); + fatal("/failifmismatch: invalid argument: " + Arg); StringRef Existing = Config->MustMatch[K]; if (!Existing.empty() && V != Existing) - error(Twine("/failifmismatch: mismatch detected: ") + Existing + " and " + - V + " for key " + K); + fatal("/failifmismatch: mismatch detected: " + Existing + " and " + V + + " for key " + K); Config->MustMatch[K] = V; } @@ -473,8 +556,8 @@ std::unique_ptr<MemoryBuffer> convertResToCOFF(const std::vector<MemoryBufferRef> &MBs) { // Create an output file path. SmallString<128> Path; - if (llvm::sys::fs::createTemporaryFile("resource", "obj", Path)) - error("Could not create temporary file"); + if (auto EC = llvm::sys::fs::createTemporaryFile("resource", "obj", Path)) + fatal(EC, "could not create temporary file"); // Execute cvtres.exe. Executor E("cvtres.exe"); @@ -485,170 +568,7 @@ convertResToCOFF(const std::vector<MemoryBufferRef> &MBs) { for (MemoryBufferRef MB : MBs) E.add(MB.getBufferIdentifier()); E.run(); - ErrorOr<std::unique_ptr<MemoryBuffer>> Ret = MemoryBuffer::getFile(Path); - error(Ret, Twine("Could not open ") + Path); - return std::move(*Ret); -} - -static std::string writeToTempFile(StringRef Contents) { - SmallString<128> Path; - int FD; - if (llvm::sys::fs::createTemporaryFile("tmp", "def", FD, Path)) { - llvm::errs() << "failed to create a temporary file\n"; - return ""; - } - llvm::raw_fd_ostream OS(FD, /*shouldClose*/ true); - OS << Contents; - return Path.str(); -} - -void touchFile(StringRef Path) { - int FD; - std::error_code EC = sys::fs::openFileForWrite(Path, FD, sys::fs::F_Append); - error(EC, "failed to create a file"); - sys::Process::SafelyCloseFileDescriptor(FD); -} - -static std::string getImplibPath() { - if (!Config->Implib.empty()) - return Config->Implib; - SmallString<128> Out = StringRef(Config->OutputFile); - sys::path::replace_extension(Out, ".lib"); - return Out.str(); -} - -static std::unique_ptr<MemoryBuffer> createEmptyImportLibrary() { - std::string S = (Twine("LIBRARY \"") + - llvm::sys::path::filename(Config->OutputFile) + "\"\n") - .str(); - std::string Path1 = writeToTempFile(S); - std::string Path2 = getImplibPath(); - llvm::FileRemover Remover1(Path1); - llvm::FileRemover Remover2(Path2); - - Executor E("lib.exe"); - E.add("/nologo"); - E.add("/machine:" + machineToStr(Config->Machine)); - E.add(Twine("/def:") + Path1); - E.add(Twine("/out:") + Path2); - E.run(); - - ErrorOr<std::unique_ptr<MemoryBuffer>> BufOrErr = - MemoryBuffer::getFile(Path2, -1, false); - error(BufOrErr, Twine("Failed to open ") + Path2); - return MemoryBuffer::getMemBufferCopy((*BufOrErr)->getBuffer()); -} - -static std::vector<NewArchiveIterator> -readMembers(const object::Archive &Archive) { - std::vector<NewArchiveIterator> V; - for (const auto &ChildOrErr : Archive.children()) { - error(ChildOrErr, "Archive::Child::getName failed"); - const object::Archive::Child C(*ChildOrErr); - ErrorOr<StringRef> NameOrErr = C.getName(); - error(NameOrErr, "Archive::Child::getName failed"); - V.emplace_back(C, *NameOrErr); - } - return V; -} - -// This class creates short import files which is described in -// PE/COFF spec 7. Import Library Format. -class ShortImportCreator { -public: - ShortImportCreator(object::Archive *A, StringRef S) : Parent(A), DLLName(S) {} - - NewArchiveIterator create(StringRef Sym, uint16_t Ordinal, - ImportNameType NameType, bool isData) { - size_t ImpSize = DLLName.size() + Sym.size() + 2; // +2 for NULs - size_t Size = sizeof(object::ArchiveMemberHeader) + - sizeof(coff_import_header) + ImpSize; - char *Buf = Alloc.Allocate<char>(Size); - memset(Buf, 0, Size); - char *P = Buf; - - // Write archive member header - auto *Hdr = reinterpret_cast<object::ArchiveMemberHeader *>(P); - P += sizeof(*Hdr); - sprintf(Hdr->Name, "%-12s", "dummy"); - sprintf(Hdr->LastModified, "%-12d", 0); - sprintf(Hdr->UID, "%-6d", 0); - sprintf(Hdr->GID, "%-6d", 0); - sprintf(Hdr->AccessMode, "%-8d", 0644); - sprintf(Hdr->Size, "%-10d", int(sizeof(coff_import_header) + ImpSize)); - - // Write short import library. - auto *Imp = reinterpret_cast<coff_import_header *>(P); - P += sizeof(*Imp); - Imp->Sig2 = 0xFFFF; - Imp->Machine = Config->Machine; - Imp->SizeOfData = ImpSize; - if (Ordinal > 0) - Imp->OrdinalHint = Ordinal; - Imp->TypeInfo = (isData ? IMPORT_DATA : IMPORT_CODE); - Imp->TypeInfo |= NameType << 2; - - // Write symbol name and DLL name. - memcpy(P, Sym.data(), Sym.size()); - P += Sym.size() + 1; - memcpy(P, DLLName.data(), DLLName.size()); - - std::error_code EC; - object::Archive::Child C(Parent, Buf, &EC); - assert(!EC && "We created an invalid buffer"); - return NewArchiveIterator(C, DLLName); - } - -private: - BumpPtrAllocator Alloc; - object::Archive *Parent; - StringRef DLLName; -}; - -static ImportNameType getNameType(StringRef Sym, StringRef ExtName) { - if (Sym != ExtName) - return IMPORT_NAME_UNDECORATE; - if (Config->Machine == I386 && Sym.startswith("_")) - return IMPORT_NAME_NOPREFIX; - return IMPORT_NAME; -} - -static std::string replace(StringRef S, StringRef From, StringRef To) { - size_t Pos = S.find(From); - assert(Pos != StringRef::npos); - return (Twine(S.substr(0, Pos)) + To + S.substr(Pos + From.size())).str(); -} - -// Creates an import library for a DLL. In this function, we first -// create an empty import library using lib.exe and then adds short -// import files to that file. -void writeImportLibrary() { - std::unique_ptr<MemoryBuffer> Buf = createEmptyImportLibrary(); - std::error_code EC; - object::Archive Archive(Buf->getMemBufferRef(), EC); - error(EC, "Error reading an empty import file"); - std::vector<NewArchiveIterator> Members = readMembers(Archive); - - std::string DLLName = llvm::sys::path::filename(Config->OutputFile); - ShortImportCreator ShortImport(&Archive, DLLName); - for (Export &E : Config->Exports) { - if (E.Private) - continue; - if (E.ExtName.empty()) { - Members.push_back(ShortImport.create( - E.SymbolName, E.Ordinal, getNameType(E.SymbolName, E.Name), E.Data)); - } else { - Members.push_back(ShortImport.create( - replace(E.SymbolName, E.Name, E.ExtName), E.Ordinal, - getNameType(E.SymbolName, E.Name), E.Data)); - } - } - - std::string Path = getImplibPath(); - std::pair<StringRef, std::error_code> Result = - writeArchive(Path, Members, /*WriteSymtab*/ true, object::Archive::K_GNU, - /*Deterministic*/ true, /*Thin*/ false); - error(Result.second, Twine("Failed to write ") + Path); + return check(MemoryBuffer::getFile(Path), "could not open " + Path); } // Create OptTable @@ -695,7 +615,7 @@ llvm::opt::InputArgList ArgParser::parse(ArrayRef<const char *> ArgsArr) { } if (MissingCount) - error(Twine("missing arg value for \"") + Args.getArgString(MissingIndex) + + fatal("missing arg value for \"" + Twine(Args.getArgString(MissingIndex)) + "\", expected " + Twine(MissingCount) + (MissingCount == 1 ? " argument." : " arguments.")); for (auto *Arg : Args.filtered(OPT_UNKNOWN)) diff --git a/COFF/Error.cpp b/COFF/Error.cpp index 255d9bbad9d8..602a8544ce2b 100644 --- a/COFF/Error.cpp +++ b/COFF/Error.cpp @@ -10,20 +10,23 @@ #include "Error.h" #include "llvm/ADT/Twine.h" +#include "llvm/Support/Error.h" #include "llvm/Support/raw_ostream.h" namespace lld { namespace coff { -void error(const Twine &Msg) { +void fatal(const Twine &Msg) { llvm::errs() << Msg << "\n"; exit(1); } -void error(std::error_code EC, const Twine &Prefix) { - if (!EC) - return; - error(Prefix + ": " + EC.message()); +void fatal(std::error_code EC, const Twine &Msg) { + fatal(Msg + ": " + EC.message()); +} + +void fatal(llvm::Error &Err, const Twine &Msg) { + fatal(errorToErrorCode(std::move(Err)), Msg); } } // namespace coff diff --git a/COFF/Error.h b/COFF/Error.h index cb0a185f0917..c9f64c662580 100644 --- a/COFF/Error.h +++ b/COFF/Error.h @@ -11,15 +11,25 @@ #define LLD_COFF_ERROR_H #include "lld/Core/LLVM.h" +#include "llvm/Support/Error.h" namespace lld { namespace coff { -LLVM_ATTRIBUTE_NORETURN void error(const Twine &Msg); -void error(std::error_code EC, const Twine &Prefix); +LLVM_ATTRIBUTE_NORETURN void fatal(const Twine &Msg); +LLVM_ATTRIBUTE_NORETURN void fatal(std::error_code EC, const Twine &Prefix); +LLVM_ATTRIBUTE_NORETURN void fatal(llvm::Error &Err, const Twine &Prefix); -template <typename T> void error(const ErrorOr<T> &V, const Twine &Prefix) { - error(V.getError(), Prefix); +template <class T> T check(ErrorOr<T> &&V, const Twine &Prefix) { + if (auto EC = V.getError()) + fatal(EC, Prefix); + return std::move(*V); +} + +template <class T> T check(Expected<T> E, const Twine &Prefix) { + if (llvm::Error Err = E.takeError()) + fatal(Err, Prefix); + return std::move(*E); } } // namespace coff diff --git a/COFF/ICF.cpp b/COFF/ICF.cpp index f99b41624a84..a2c5a90334d0 100644 --- a/COFF/ICF.cpp +++ b/COFF/ICF.cpp @@ -70,7 +70,7 @@ private: static bool equalsConstant(const SectionChunk *A, const SectionChunk *B); static bool equalsVariable(const SectionChunk *A, const SectionChunk *B); bool forEachGroup(std::vector<SectionChunk *> &Chunks, Comparator Eq); - bool partition(ChunkIterator Begin, ChunkIterator End, Comparator Eq); + bool segregate(ChunkIterator Begin, ChunkIterator End, Comparator Eq); std::atomic<uint64_t> NextID = { 1 }; }; @@ -148,7 +148,7 @@ bool ICF::equalsVariable(const SectionChunk *A, const SectionChunk *B) { return std::equal(A->Relocs.begin(), A->Relocs.end(), B->Relocs.begin(), Eq); } -bool ICF::partition(ChunkIterator Begin, ChunkIterator End, Comparator Eq) { +bool ICF::segregate(ChunkIterator Begin, ChunkIterator End, Comparator Eq) { bool R = false; for (auto It = Begin;;) { SectionChunk *Head = *It; @@ -171,7 +171,7 @@ bool ICF::forEachGroup(std::vector<SectionChunk *> &Chunks, Comparator Eq) { auto Bound = std::find_if(It + 1, End, [&](SectionChunk *SC) { return SC->GroupID != Head->GroupID; }); - if (partition(It, Bound, Eq)) + if (segregate(It, Bound, Eq)) R = true; It = Bound; } diff --git a/COFF/InputFiles.cpp b/COFF/InputFiles.cpp index 23af1e89c34d..ff26826371fa 100644 --- a/COFF/InputFiles.cpp +++ b/COFF/InputFiles.cpp @@ -8,30 +8,41 @@ //===----------------------------------------------------------------------===// #include "Chunks.h" +#include "Config.h" #include "Error.h" #include "InputFiles.h" #include "Symbols.h" -#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Triple.h" +#include "llvm/ADT/Twine.h" #include "llvm/IR/LLVMContext.h" -#include "llvm/LTO/LTOModule.h" +#include "llvm/LTO/legacy/LTOModule.h" +#include "llvm/Object/Binary.h" #include "llvm/Object/COFF.h" #include "llvm/Support/COFF.h" -#include "llvm/Support/Debug.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/Endian.h" -#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/ErrorOr.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm-c/lto.h" +#include <cstring> +#include <system_error> +#include <utility> using namespace llvm::COFF; using namespace llvm::object; using namespace llvm::support::endian; + using llvm::Triple; using llvm::support::ulittle32_t; -using llvm::sys::fs::file_magic; -using llvm::sys::fs::identify_magic; namespace lld { namespace coff { int InputFile::NextIndex = 0; +llvm::LLVMContext BitcodeFile::Context; // Returns the last element of a path, which is supposed to be a filename. static StringRef getBasename(StringRef Path) { @@ -52,9 +63,7 @@ std::string InputFile::getShortName() { void ArchiveFile::parse() { // Parse a MemoryBufferRef as an archive file. - auto ArchiveOrErr = Archive::create(MB); - error(ArchiveOrErr, "Failed to parse static library"); - File = std::move(*ArchiveOrErr); + File = check(Archive::create(MB), "failed to parse static library"); // Allocate a buffer for Lazy objects. size_t NumSyms = File->getNumberOfSymbols(); @@ -67,40 +76,38 @@ void ArchiveFile::parse() { // Seen is a map from member files to boolean values. Initially // all members are mapped to false, which indicates all these files // are not read yet. - for (auto &ChildOrErr : File->children()) { - error(ChildOrErr, "Failed to parse static library"); - const Archive::Child &Child = *ChildOrErr; + Error Err; + for (auto &Child : File->children(Err)) Seen[Child.getChildOffset()].clear(); - } + if (Err) + fatal(Err, "failed to parse static library"); } // Returns a buffer pointing to a member file containing a given symbol. // This function is thread-safe. MemoryBufferRef ArchiveFile::getMember(const Archive::Symbol *Sym) { - auto COrErr = Sym->getMember(); - error(COrErr, Twine("Could not get the member for symbol ") + Sym->getName()); - const Archive::Child &C = *COrErr; + const Archive::Child &C = + check(Sym->getMember(), + "could not get the member for symbol " + Sym->getName()); // Return an empty buffer if we have already returned the same buffer. if (Seen[C.getChildOffset()].test_and_set()) return MemoryBufferRef(); - ErrorOr<MemoryBufferRef> Ret = C.getMemoryBufferRef(); - error(Ret, Twine("Could not get the buffer for the member defining symbol ") + - Sym->getName()); - return *Ret; + return check(C.getMemoryBufferRef(), + "could not get the buffer for the member defining symbol " + + Sym->getName()); } void ObjectFile::parse() { // Parse a memory buffer as a COFF file. - auto BinOrErr = createBinary(MB); - error(BinOrErr, "Failed to parse object file"); - std::unique_ptr<Binary> Bin = std::move(*BinOrErr); + std::unique_ptr<Binary> Bin = + check(createBinary(MB), "failed to parse object file"); if (auto *Obj = dyn_cast<COFFObjectFile>(Bin.get())) { Bin.release(); COFFObj.reset(Obj); } else { - error(Twine(getName()) + " is not a COFF file."); + fatal(getName() + " is not a COFF file"); } // Read section and symbol tables. @@ -116,10 +123,10 @@ void ObjectFile::initializeChunks() { for (uint32_t I = 1; I < NumSections + 1; ++I) { const coff_section *Sec; StringRef Name; - std::error_code EC = COFFObj->getSection(I, Sec); - error(EC, Twine("getSection failed: #") + Twine(I)); - EC = COFFObj->getSectionName(Sec, Name); - error(EC, Twine("getSectionName failed: #") + Twine(I)); + if (auto EC = COFFObj->getSection(I, Sec)) + fatal(EC, "getSection failed: #" + Twine(I)); + if (auto EC = COFFObj->getSectionName(Sec, Name)) + fatal(EC, "getSectionName failed: #" + Twine(I)); if (Name == ".sxdata") { SXData = Sec; continue; @@ -149,14 +156,12 @@ void ObjectFile::initializeSymbols() { uint32_t NumSymbols = COFFObj->getNumberOfSymbols(); SymbolBodies.reserve(NumSymbols); SparseSymbolBodies.resize(NumSymbols); - llvm::SmallVector<Undefined *, 8> WeakAliases; + llvm::SmallVector<std::pair<Undefined *, uint32_t>, 8> WeakAliases; int32_t LastSectionNumber = 0; for (uint32_t I = 0; I < NumSymbols; ++I) { // Get a COFFSymbolRef object. - auto SymOrErr = COFFObj->getSymbol(I); - error(SymOrErr, Twine("broken object file: ") + getName()); - - COFFSymbolRef Sym = *SymOrErr; + COFFSymbolRef Sym = + check(COFFObj->getSymbol(I), "broken object file: " + getName()); const void *AuxP = nullptr; if (Sym.getNumberOfAuxSymbols()) @@ -167,8 +172,10 @@ void ObjectFile::initializeSymbols() { if (Sym.isUndefined()) { Body = createUndefined(Sym); } else if (Sym.isWeakExternal()) { - Body = createWeakExternal(Sym, AuxP); - WeakAliases.push_back((Undefined *)Body); + Body = createUndefined(Sym); + uint32_t TagIndex = + static_cast<const coff_aux_weak_external *>(AuxP)->TagIndex; + WeakAliases.emplace_back((Undefined *)Body, TagIndex); } else { Body = createDefined(Sym, AuxP, IsFirst); } @@ -179,8 +186,8 @@ void ObjectFile::initializeSymbols() { I += Sym.getNumberOfAuxSymbols(); LastSectionNumber = Sym.getSectionNumber(); } - for (Undefined *U : WeakAliases) - U->WeakAlias = SparseSymbolBodies[(uintptr_t)U->WeakAlias]; + for (auto WeakAlias : WeakAliases) + WeakAlias.first->WeakAlias = SparseSymbolBodies[WeakAlias.second]; } Undefined *ObjectFile::createUndefined(COFFSymbolRef Sym) { @@ -189,15 +196,6 @@ Undefined *ObjectFile::createUndefined(COFFSymbolRef Sym) { return new (Alloc) Undefined(Name); } -Undefined *ObjectFile::createWeakExternal(COFFSymbolRef Sym, const void *AuxP) { - StringRef Name; - COFFObj->getSymbolName(Sym, Name); - auto *U = new (Alloc) Undefined(Name); - auto *Aux = (const coff_aux_weak_external *)AuxP; - U->WeakAlias = (Undefined *)(uintptr_t)Aux->TagIndex; - return U; -} - Defined *ObjectFile::createDefined(COFFSymbolRef Sym, const void *AuxP, bool IsFirst) { StringRef Name; @@ -219,11 +217,21 @@ Defined *ObjectFile::createDefined(COFFSymbolRef Sym, const void *AuxP, } return new (Alloc) DefinedAbsolute(Name, Sym); } - if (Sym.getSectionNumber() == llvm::COFF::IMAGE_SYM_DEBUG) + int32_t SectionNumber = Sym.getSectionNumber(); + if (SectionNumber == llvm::COFF::IMAGE_SYM_DEBUG) return nullptr; + // Reserved sections numbers don't have contents. + if (llvm::COFF::isReservedSectionNumber(SectionNumber)) + fatal("broken object file: " + getName()); + + // This symbol references a section which is not present in the section + // header. + if ((uint32_t)SectionNumber >= SparseChunks.size()) + fatal("broken object file: " + getName()); + // Nothing else to do without a section chunk. - auto *SC = cast_or_null<SectionChunk>(SparseChunks[Sym.getSectionNumber()]); + auto *SC = cast_or_null<SectionChunk>(SparseChunks[SectionNumber]); if (!SC) return nullptr; @@ -250,7 +258,7 @@ void ObjectFile::initializeSEH() { ArrayRef<uint8_t> A; COFFObj->getSectionContents(SXData, A); if (A.size() % 4 != 0) - error(".sxdata must be an array of symbol table indices"); + fatal(".sxdata must be an array of symbol table indices"); auto *I = reinterpret_cast<const ulittle32_t *>(A.data()); auto *E = reinterpret_cast<const ulittle32_t *>(A.data() + A.size()); for (; I != E; ++I) @@ -276,11 +284,11 @@ void ImportFile::parse() { // Check if the total size is valid. if ((size_t)(End - Buf) != (sizeof(*Hdr) + Hdr->SizeOfData)) - error("broken import library"); + fatal("broken import library"); // Read names and create an __imp_ symbol. StringRef Name = StringAlloc.save(StringRef(Buf + sizeof(*Hdr))); - StringRef ImpName = StringAlloc.save(Twine("__imp_") + Name); + StringRef ImpName = StringAlloc.save("__imp_" + Name); const char *NameStart = Buf + sizeof(coff_import_header) + Name.size() + 1; DLLName = StringRef(NameStart); StringRef ExtName; @@ -315,11 +323,10 @@ void BitcodeFile::parse() { // Usually parse() is thread-safe, but bitcode file is an exception. std::lock_guard<std::mutex> Lock(Mu); - ErrorOr<std::unique_ptr<LTOModule>> ModOrErr = - LTOModule::createFromBuffer(llvm::getGlobalContext(), MB.getBufferStart(), - MB.getBufferSize(), llvm::TargetOptions()); - error(ModOrErr, "Could not create lto module"); - M = std::move(*ModOrErr); + Context.enableDebugTypeODRUniquing(); + ErrorOr<std::unique_ptr<LTOModule>> ModOrErr = LTOModule::createFromBuffer( + Context, MB.getBufferStart(), MB.getBufferSize(), llvm::TargetOptions()); + M = check(std::move(ModOrErr), "could not create LTO module"); llvm::StringSaver Saver(Alloc); for (unsigned I = 0, E = M->getSymbolCount(); I != E; ++I) { diff --git a/COFF/InputFiles.h b/COFF/InputFiles.h index 6a263fbaddf6..0ec01b5075f9 100644 --- a/COFF/InputFiles.h +++ b/COFF/InputFiles.h @@ -12,7 +12,8 @@ #include "lld/Core/LLVM.h" #include "llvm/ADT/ArrayRef.h" -#include "llvm/LTO/LTOModule.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/LTO/legacy/LTOModule.h" #include "llvm/Object/Archive.h" #include "llvm/Object/COFF.h" #include "llvm/Support/StringSaver.h" @@ -103,7 +104,7 @@ public: // All symbols returned by ArchiveFiles are of Lazy type. std::vector<SymbolBody *> &getSymbols() override { - llvm_unreachable("internal error"); + llvm_unreachable("internal fatal"); } private: @@ -147,7 +148,6 @@ private: Defined *createDefined(COFFSymbolRef Sym, const void *Aux, bool IsFirst); Undefined *createUndefined(COFFSymbolRef Sym); - Undefined *createWeakExternal(COFFSymbolRef Sym, const void *Aux); std::unique_ptr<COFFObjectFile> COFFObj; llvm::BumpPtrAllocator Alloc; @@ -204,9 +204,10 @@ public: static bool classof(const InputFile *F) { return F->kind() == BitcodeKind; } std::vector<SymbolBody *> &getSymbols() override { return SymbolBodies; } MachineTypes getMachineType() override; - std::unique_ptr<LTOModule> takeModule() { return std::move(M); } + static llvm::LLVMContext Context; + private: void parse() override; diff --git a/COFF/Librarian.cpp b/COFF/Librarian.cpp new file mode 100644 index 000000000000..25fb4a87b3eb --- /dev/null +++ b/COFF/Librarian.cpp @@ -0,0 +1,489 @@ +//===- Librarian.cpp ------------------------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains functions for the Librarian. The librarian creates and +// manages libraries of the Common Object File Format (COFF) object files. It +// primarily is used for creating static libraries and import libraries. +// +//===----------------------------------------------------------------------===// + +#include "Config.h" +#include "Driver.h" +#include "Error.h" +#include "Symbols.h" +#include "llvm/Object/Archive.h" +#include "llvm/Object/ArchiveWriter.h" +#include "llvm/Object/COFF.h" +#include "llvm/Support/Path.h" + +#include <vector> + +using namespace lld::coff; +using namespace llvm::COFF; +using namespace llvm::object; +using namespace llvm; + +static bool is32bit() { + switch (Config->Machine) { + default: + llvm_unreachable("unsupported machine"); + case IMAGE_FILE_MACHINE_AMD64: + return false; + case IMAGE_FILE_MACHINE_ARMNT: + case IMAGE_FILE_MACHINE_I386: + return true; + } +} + +static uint16_t getImgRelRelocation() { + switch (Config->Machine) { + default: + llvm_unreachable("unsupported machine"); + case IMAGE_FILE_MACHINE_AMD64: + return IMAGE_REL_AMD64_ADDR32NB; + case IMAGE_FILE_MACHINE_ARMNT: + return IMAGE_REL_ARM_ADDR32NB; + case IMAGE_FILE_MACHINE_I386: + return IMAGE_REL_I386_DIR32NB; + } +} + +template <class T> void append(std::vector<uint8_t> &B, const T &Data) { + size_t S = B.size(); + B.resize(S + sizeof(T)); + memcpy(&B[S], &Data, sizeof(T)); +} + +static void writeStringTable(std::vector<uint8_t> &B, + ArrayRef<const std::string> Strings) { + // The COFF string table consists of a 4-byte value which is the size of the + // table, including the length field itself. This value is followed by the + // string content itself, which is an array of null-terminated C-style + // strings. The termination is important as they are referenced to by offset + // by the symbol entity in the file format. + + std::vector<uint8_t>::size_type Pos = B.size(); + std::vector<uint8_t>::size_type Offset = B.size(); + + // Skip over the length field, we will fill it in later as we will have + // computed the length while emitting the string content itself. + Pos += sizeof(uint32_t); + + for (const auto &S : Strings) { + B.resize(Pos + S.length() + 1); + strcpy(reinterpret_cast<char *>(&B[Pos]), S.c_str()); + Pos += S.length() + 1; + } + + // Backfill the length of the table now that it has been computed. + support::ulittle32_t Length(B.size() - Offset); + memcpy(&B[Offset], &Length, sizeof(Length)); +} + +static std::string getImplibPath() { + if (!Config->Implib.empty()) + return Config->Implib; + SmallString<128> Out = StringRef(Config->OutputFile); + sys::path::replace_extension(Out, ".lib"); + return Out.str(); +} + +static ImportNameType getNameType(StringRef Sym, StringRef ExtName) { + if (Sym != ExtName) + return IMPORT_NAME_UNDECORATE; + if (Config->Machine == I386 && Sym.startswith("_")) + return IMPORT_NAME_NOPREFIX; + return IMPORT_NAME; +} + +static std::string replace(StringRef S, StringRef From, StringRef To) { + size_t Pos = S.find(From); + assert(Pos != StringRef::npos); + return (Twine(S.substr(0, Pos)) + To + S.substr(Pos + From.size())).str(); +} + +static const std::string NullImportDescriptorSymbolName = + "__NULL_IMPORT_DESCRIPTOR"; + +namespace { +// This class constructs various small object files necessary to support linking +// symbols imported from a DLL. The contents are pretty strictly defined and +// nearly entirely static. The details of the structures files are defined in +// WINNT.h and the PE/COFF specification. +class ObjectFactory { + using u16 = support::ulittle16_t; + using u32 = support::ulittle32_t; + + BumpPtrAllocator Alloc; + StringRef DLLName; + StringRef Library; + std::string ImportDescriptorSymbolName; + std::string NullThunkSymbolName; + +public: + ObjectFactory(StringRef S) + : DLLName(S), Library(S.drop_back(4)), + ImportDescriptorSymbolName(("__IMPORT_DESCRIPTOR_" + Library).str()), + NullThunkSymbolName(("\x7f" + Library + "_NULL_THUNK_DATA").str()) {} + + // Creates an Import Descriptor. This is a small object file which contains a + // reference to the terminators and contains the library name (entry) for the + // import name table. It will force the linker to construct the necessary + // structure to import symbols from the DLL. + NewArchiveMember createImportDescriptor(std::vector<uint8_t> &Buffer); + + // Creates a NULL import descriptor. This is a small object file whcih + // contains a NULL import descriptor. It is used to terminate the imports + // from a specific DLL. + NewArchiveMember createNullImportDescriptor(std::vector<uint8_t> &Buffer); + + // Create a NULL Thunk Entry. This is a small object file which contains a + // NULL Import Address Table entry and a NULL Import Lookup Table Entry. It + // is used to terminate the IAT and ILT. + NewArchiveMember createNullThunk(std::vector<uint8_t> &Buffer); + + // Create a short import file which is described in PE/COFF spec 7. Import + // Library Format. + NewArchiveMember createShortImport(StringRef Sym, uint16_t Ordinal, + ImportNameType NameType, bool isData); +}; +} + +NewArchiveMember +ObjectFactory::createImportDescriptor(std::vector<uint8_t> &Buffer) { + static const uint32_t NumberOfSections = 2; + static const uint32_t NumberOfSymbols = 7; + static const uint32_t NumberOfRelocations = 3; + + // COFF Header + coff_file_header Header{ + u16(Config->Machine), u16(NumberOfSections), u32(0), + u32(sizeof(Header) + (NumberOfSections * sizeof(coff_section)) + + // .idata$2 + sizeof(coff_import_directory_table_entry) + + NumberOfRelocations * sizeof(coff_relocation) + + // .idata$4 + (DLLName.size() + 1)), + u32(NumberOfSymbols), u16(0), + u16(is32bit() ? IMAGE_FILE_32BIT_MACHINE : 0), + }; + append(Buffer, Header); + + // Section Header Table + static const coff_section SectionTable[NumberOfSections] = { + {{'.', 'i', 'd', 'a', 't', 'a', '$', '2'}, + u32(0), + u32(0), + u32(sizeof(coff_import_directory_table_entry)), + u32(sizeof(coff_file_header) + NumberOfSections * sizeof(coff_section)), + u32(sizeof(coff_file_header) + NumberOfSections * sizeof(coff_section) + + sizeof(coff_import_directory_table_entry)), + u32(0), + u16(NumberOfRelocations), + u16(0), + u32(IMAGE_SCN_ALIGN_4BYTES | IMAGE_SCN_CNT_INITIALIZED_DATA | + IMAGE_SCN_MEM_READ | IMAGE_SCN_MEM_WRITE)}, + {{'.', 'i', 'd', 'a', 't', 'a', '$', '6'}, + u32(0), + u32(0), + u32(DLLName.size() + 1), + u32(sizeof(coff_file_header) + NumberOfSections * sizeof(coff_section) + + sizeof(coff_import_directory_table_entry) + + NumberOfRelocations * sizeof(coff_relocation)), + u32(0), + u32(0), + u16(0), + u16(0), + u32(IMAGE_SCN_ALIGN_2BYTES | IMAGE_SCN_CNT_INITIALIZED_DATA | + IMAGE_SCN_MEM_READ | IMAGE_SCN_MEM_WRITE)}, + }; + append(Buffer, SectionTable); + + // .idata$2 + static const coff_import_directory_table_entry ImportDescriptor{ + u32(0), u32(0), u32(0), u32(0), u32(0), + }; + append(Buffer, ImportDescriptor); + + static const coff_relocation RelocationTable[NumberOfRelocations] = { + {u32(offsetof(coff_import_directory_table_entry, NameRVA)), u32(2), + u16(getImgRelRelocation())}, + {u32(offsetof(coff_import_directory_table_entry, ImportLookupTableRVA)), + u32(3), u16(getImgRelRelocation())}, + {u32(offsetof(coff_import_directory_table_entry, ImportAddressTableRVA)), + u32(4), u16(getImgRelRelocation())}, + }; + append(Buffer, RelocationTable); + + // .idata$6 + auto S = Buffer.size(); + Buffer.resize(S + DLLName.size() + 1); + memcpy(&Buffer[S], DLLName.data(), DLLName.size()); + Buffer[S + DLLName.size()] = '\0'; + + // Symbol Table + coff_symbol16 SymbolTable[NumberOfSymbols] = { + {{{0, 0, 0, 0, 0, 0, 0, 0}}, + u32(0), + u16(1), + u16(0), + IMAGE_SYM_CLASS_EXTERNAL, + 0}, + {{{'.', 'i', 'd', 'a', 't', 'a', '$', '2'}}, + u32(0), + u16(1), + u16(0), + IMAGE_SYM_CLASS_SECTION, + 0}, + {{{'.', 'i', 'd', 'a', 't', 'a', '$', '6'}}, + u32(0), + u16(2), + u16(0), + IMAGE_SYM_CLASS_STATIC, + 0}, + {{{'.', 'i', 'd', 'a', 't', 'a', '$', '4'}}, + u32(0), + u16(0), + u16(0), + IMAGE_SYM_CLASS_SECTION, + 0}, + {{{'.', 'i', 'd', 'a', 't', 'a', '$', '5'}}, + u32(0), + u16(0), + u16(0), + IMAGE_SYM_CLASS_SECTION, + 0}, + {{{0, 0, 0, 0, 0, 0, 0, 0}}, + u32(0), + u16(0), + u16(0), + IMAGE_SYM_CLASS_EXTERNAL, + 0}, + {{{0, 0, 0, 0, 0, 0, 0, 0}}, + u32(0), + u16(0), + u16(0), + IMAGE_SYM_CLASS_EXTERNAL, + 0}, + }; + reinterpret_cast<StringTableOffset &>(SymbolTable[0].Name).Offset = + sizeof(uint32_t); + reinterpret_cast<StringTableOffset &>(SymbolTable[5].Name).Offset = + sizeof(uint32_t) + ImportDescriptorSymbolName.length() + 1; + reinterpret_cast<StringTableOffset &>(SymbolTable[6].Name).Offset = + sizeof(uint32_t) + ImportDescriptorSymbolName.length() + 1 + + NullImportDescriptorSymbolName.length() + 1; + append(Buffer, SymbolTable); + + // String Table + writeStringTable(Buffer, + {ImportDescriptorSymbolName, NullImportDescriptorSymbolName, + NullThunkSymbolName}); + + StringRef F{reinterpret_cast<const char *>(Buffer.data()), Buffer.size()}; + return {MemoryBufferRef(F, DLLName)}; +} + +NewArchiveMember +ObjectFactory::createNullImportDescriptor(std::vector<uint8_t> &Buffer) { + static const uint32_t NumberOfSections = 1; + static const uint32_t NumberOfSymbols = 1; + + // COFF Header + coff_file_header Header{ + u16(Config->Machine), u16(NumberOfSections), u32(0), + u32(sizeof(Header) + (NumberOfSections * sizeof(coff_section)) + + // .idata$3 + sizeof(coff_import_directory_table_entry)), + u32(NumberOfSymbols), u16(0), + u16(is32bit() ? IMAGE_FILE_32BIT_MACHINE : 0), + }; + append(Buffer, Header); + + // Section Header Table + static const coff_section SectionTable[NumberOfSections] = { + {{'.', 'i', 'd', 'a', 't', 'a', '$', '3'}, + u32(0), + u32(0), + u32(sizeof(coff_import_directory_table_entry)), + u32(sizeof(coff_file_header) + + (NumberOfSections * sizeof(coff_section))), + u32(0), + u32(0), + u16(0), + u16(0), + u32(IMAGE_SCN_ALIGN_4BYTES | IMAGE_SCN_CNT_INITIALIZED_DATA | + IMAGE_SCN_MEM_READ | IMAGE_SCN_MEM_WRITE)}, + }; + append(Buffer, SectionTable); + + // .idata$3 + static const coff_import_directory_table_entry ImportDescriptor{ + u32(0), u32(0), u32(0), u32(0), u32(0), + }; + append(Buffer, ImportDescriptor); + + // Symbol Table + coff_symbol16 SymbolTable[NumberOfSymbols] = { + {{{0, 0, 0, 0, 0, 0, 0, 0}}, + u32(0), + u16(1), + u16(0), + IMAGE_SYM_CLASS_EXTERNAL, + 0}, + }; + reinterpret_cast<StringTableOffset &>(SymbolTable[0].Name).Offset = + sizeof(uint32_t); + append(Buffer, SymbolTable); + + // String Table + writeStringTable(Buffer, {NullImportDescriptorSymbolName}); + + StringRef F{reinterpret_cast<const char *>(Buffer.data()), Buffer.size()}; + return {MemoryBufferRef(F, DLLName)}; +} + +NewArchiveMember ObjectFactory::createNullThunk(std::vector<uint8_t> &Buffer) { + static const uint32_t NumberOfSections = 2; + static const uint32_t NumberOfSymbols = 1; + + // COFF Header + coff_file_header Header{ + u16(Config->Machine), u16(NumberOfSections), u32(0), + u32(sizeof(Header) + (NumberOfSections * sizeof(coff_section)) + + // .idata$5 + sizeof(export_address_table_entry) + + // .idata$4 + sizeof(export_address_table_entry)), + u32(NumberOfSymbols), u16(0), + u16(is32bit() ? IMAGE_FILE_32BIT_MACHINE : 0), + }; + append(Buffer, Header); + + // Section Header Table + static const coff_section SectionTable[NumberOfSections] = { + {{'.', 'i', 'd', 'a', 't', 'a', '$', '5'}, + u32(0), + u32(0), + u32(sizeof(export_address_table_entry)), + u32(sizeof(coff_file_header) + NumberOfSections * sizeof(coff_section)), + u32(0), + u32(0), + u16(0), + u16(0), + u32(IMAGE_SCN_ALIGN_4BYTES | IMAGE_SCN_CNT_INITIALIZED_DATA | + IMAGE_SCN_MEM_READ | IMAGE_SCN_MEM_WRITE)}, + {{'.', 'i', 'd', 'a', 't', 'a', '$', '4'}, + u32(0), + u32(0), + u32(sizeof(export_address_table_entry)), + u32(sizeof(coff_file_header) + NumberOfSections * sizeof(coff_section) + + sizeof(export_address_table_entry)), + u32(0), + u32(0), + u16(0), + u16(0), + u32(IMAGE_SCN_ALIGN_4BYTES | IMAGE_SCN_CNT_INITIALIZED_DATA | + IMAGE_SCN_MEM_READ | IMAGE_SCN_MEM_WRITE)}, + }; + append(Buffer, SectionTable); + + // .idata$5 + static const export_address_table_entry ILT{u32(0)}; + append(Buffer, ILT); + + // .idata$4 + static const export_address_table_entry IAT{u32(0)}; + append(Buffer, IAT); + + // Symbol Table + coff_symbol16 SymbolTable[NumberOfSymbols] = { + {{{0, 0, 0, 0, 0, 0, 0, 0}}, + u32(0), + u16(1), + u16(0), + IMAGE_SYM_CLASS_EXTERNAL, + 0}, + }; + reinterpret_cast<StringTableOffset &>(SymbolTable[0].Name).Offset = + sizeof(uint32_t); + append(Buffer, SymbolTable); + + // String Table + writeStringTable(Buffer, {NullThunkSymbolName}); + + StringRef F{reinterpret_cast<const char *>(Buffer.data()), Buffer.size()}; + return {MemoryBufferRef{F, DLLName}}; +} + +NewArchiveMember ObjectFactory::createShortImport(StringRef Sym, + uint16_t Ordinal, + ImportNameType NameType, + bool isData) { + size_t ImpSize = DLLName.size() + Sym.size() + 2; // +2 for NULs + size_t Size = sizeof(coff_import_header) + ImpSize; + char *Buf = Alloc.Allocate<char>(Size); + memset(Buf, 0, Size); + char *P = Buf; + + // Write short import library. + auto *Imp = reinterpret_cast<coff_import_header *>(P); + P += sizeof(*Imp); + Imp->Sig2 = 0xFFFF; + Imp->Machine = Config->Machine; + Imp->SizeOfData = ImpSize; + if (Ordinal > 0) + Imp->OrdinalHint = Ordinal; + Imp->TypeInfo = (isData ? IMPORT_DATA : IMPORT_CODE); + Imp->TypeInfo |= NameType << 2; + + // Write symbol name and DLL name. + memcpy(P, Sym.data(), Sym.size()); + P += Sym.size() + 1; + memcpy(P, DLLName.data(), DLLName.size()); + + return {MemoryBufferRef(StringRef(Buf, Size), DLLName)}; +} + +// Creates an import library for a DLL. In this function, we first +// create an empty import library using lib.exe and then adds short +// import files to that file. +void lld::coff::writeImportLibrary() { + std::vector<NewArchiveMember> Members; + + std::string Path = getImplibPath(); + std::string DLLName = llvm::sys::path::filename(Config->OutputFile); + ObjectFactory OF(DLLName); + + std::vector<uint8_t> ImportDescriptor; + Members.push_back(OF.createImportDescriptor(ImportDescriptor)); + + std::vector<uint8_t> NullImportDescriptor; + Members.push_back(OF.createNullImportDescriptor(NullImportDescriptor)); + + std::vector<uint8_t> NullThunk; + Members.push_back(OF.createNullThunk(NullThunk)); + + for (Export &E : Config->Exports) { + if (E.Private) + continue; + + ImportNameType Type = getNameType(E.SymbolName, E.Name); + std::string Name = E.ExtName.empty() + ? std::string(E.SymbolName) + : replace(E.SymbolName, E.Name, E.ExtName); + Members.push_back(OF.createShortImport(Name, E.Ordinal, Type, E.Data)); + } + + std::pair<StringRef, std::error_code> Result = + writeArchive(Path, Members, /*WriteSymtab*/ true, object::Archive::K_GNU, + /*Deterministic*/ true, /*Thin*/ false); + if (auto EC = Result.second) + fatal(EC, "failed to write " + Path); +} diff --git a/COFF/ModuleDef.cpp b/COFF/ModuleDef.cpp index d117e961f89a..5e393f45d184 100644 --- a/COFF/ModuleDef.cpp +++ b/COFF/ModuleDef.cpp @@ -134,13 +134,13 @@ private: void readAsInt(uint64_t *I) { read(); if (Tok.K != Identifier || Tok.Value.getAsInteger(10, *I)) - error("integer expected"); + fatal("integer expected"); } void expect(Kind Expected, StringRef Msg) { read(); if (Tok.K != Expected) - error(Msg); + fatal(Msg); } void unget() { Stack.push_back(Tok); } @@ -177,7 +177,7 @@ private: parseVersion(&Config->MajorImageVersion, &Config->MinorImageVersion); return; default: - error(Twine("unknown directive: ") + Tok.Value); + fatal("unknown directive: " + Tok.Value); } } @@ -188,7 +188,7 @@ private: if (Tok.K == Equal) { read(); if (Tok.K != Identifier) - error(Twine("identifier expected, but got ") + Tok.Value); + fatal("identifier expected, but got " + Tok.Value); E.ExtName = E.Name; E.Name = Tok.Value; } else { @@ -264,15 +264,15 @@ private: void parseVersion(uint32_t *Major, uint32_t *Minor) { read(); if (Tok.K != Identifier) - error(Twine("identifier expected, but got ") + Tok.Value); + fatal("identifier expected, but got " + Tok.Value); StringRef V1, V2; std::tie(V1, V2) = Tok.Value.split('.'); if (V1.getAsInteger(10, *Major)) - error(Twine("integer expected, but got ") + Tok.Value); + fatal("integer expected, but got " + Tok.Value); if (V2.empty()) *Minor = 0; else if (V2.getAsInteger(10, *Minor)) - error(Twine("integer expected, but got ") + Tok.Value); + fatal("integer expected, but got " + Tok.Value); } Lexer Lex; diff --git a/COFF/Options.td b/COFF/Options.td index a21b8de76afb..e5c9c5b4635b 100644 --- a/COFF/Options.td +++ b/COFF/Options.td @@ -48,6 +48,7 @@ def manifestuac : P<"manifestuac", "User access control">; def manifestfile : P<"manifestfile", "Manifest file path">; def manifestdependency : P<"manifestdependency", "Attributes for <dependency> in manifest file">; +def manifestinput : P<"manifestinput", "Specify manifest file">; // We cannot use multiclass P because class name "incl" is different // from its command line option name. We do this because "include" is @@ -110,6 +111,7 @@ def no_incremental : F<"incremental:no">; def nologo : F<"nologo">; def throwingnew : F<"throwingnew">; def editandcontinue : F<"editandcontinue">; +def fastfail : F<"fastfail">; def delay : QF<"delay">; def errorreport : QF<"errorreport">; diff --git a/COFF/PDB.cpp b/COFF/PDB.cpp index 786d28798bab..7606ccc680d3 100644 --- a/COFF/PDB.cpp +++ b/COFF/PDB.cpp @@ -38,7 +38,8 @@ void lld::coff::createPDB(StringRef Path) { size_t FileSize = PageSize * 3; ErrorOr<std::unique_ptr<FileOutputBuffer>> BufferOrErr = FileOutputBuffer::create(Path, FileSize); - error(BufferOrErr, Twine("failed to open ") + Path); + if (auto EC = BufferOrErr.getError()) + fatal(EC, "failed to open " + Path); std::unique_ptr<FileOutputBuffer> Buffer = std::move(*BufferOrErr); // Write the file header. diff --git a/COFF/README.md b/COFF/README.md index c1be560f4405..f1bfc9c15263 100644 --- a/COFF/README.md +++ b/COFF/README.md @@ -1,265 +1 @@ -The PE/COFF Linker -================== - -This directory contains a linker for Windows operating system. -Because the fundamental design of this port is different from -the other ports of LLD, this port is separated to this directory. - -The linker is command-line compatible with MSVC linker and is -generally 2x faster than that. It can be used to link real-world -programs such as LLD itself or Clang, or even web browsers which -are probably the largest open-source programs for Windows. - -This document is also applicable to ELF linker because the linker -shares the same design as this COFF linker. - -Overall Design --------------- - -This is a list of important data types in this linker. - -* SymbolBody - - SymbolBody is a class for symbols. They may be created for symbols - in object files or in archive file headers. The linker may create - them out of nothing. - - There are mainly three types of SymbolBodies: Defined, Undefined, or - Lazy. Defined symbols are for all symbols that are considered as - "resolved", including real defined symbols, COMDAT symbols, common - symbols, absolute symbols, linker-created symbols, etc. Undefined - symbols are for undefined symbols, which need to be replaced by - Defined symbols by the resolver. Lazy symbols represent symbols we - found in archive file headers -- which can turn into Defined symbols - if we read archieve members, but we haven't done that yet. - -* Symbol - - Symbol is a pointer to a SymbolBody. There's only one Symbol for - each unique symbol name (this uniqueness is guaranteed by the symbol - table). Because SymbolBodies are created for each file - independently, there can be many SymbolBodies for the same - name. Thus, the relationship between Symbols and SymbolBodies is 1:N. - - The resolver keeps the Symbol's pointer to always point to the "best" - SymbolBody. Pointer mutation is the resolve operation in this - linker. - - SymbolBodies have pointers to their Symbols. That means you can - always find the best SymbolBody from any SymbolBody by following - pointers twice. This structure makes it very easy to find - replacements for symbols. For example, if you have an Undefined - SymbolBody, you can find a Defined SymbolBody for that symbol just - by going to its Symbol and then to SymbolBody, assuming the resolver - have successfully resolved all undefined symbols. - -* Chunk - - Chunk represents a chunk of data that will occupy space in an - output. Each regular section becomes a chunk. - Chunks created for common or BSS symbols are not backed by sections. - The linker may create chunks out of nothing to append additional - data to an output. - - Chunks know about their size, how to copy their data to mmap'ed - outputs, and how to apply relocations to them. Specifically, - section-based chunks know how to read relocation tables and how to - apply them. - -* SymbolTable - - SymbolTable is basically a hash table from strings to Symbols, with - a logic to resolve symbol conflicts. It resolves conflicts by symbol - type. For example, if we add Undefined and Defined symbols, the - symbol table will keep the latter. If we add Defined and Lazy - symbols, it will keep the former. If we add Lazy and Undefined, it - will keep the former, but it will also trigger the Lazy symbol to - load the archive member to actually resolve the symbol. - -* OutputSection - - OutputSection is a container of Chunks. A Chunk belongs to at most - one OutputSection. - -There are mainly three actors in this linker. - -* InputFile - - InputFile is a superclass of file readers. We have a different - subclass for each input file type, such as regular object file, - archive file, etc. They are responsible for creating and owning - SymbolBodies and Chunks. - -* Writer - - The writer is responsible for writing file headers and Chunks to a - file. It creates OutputSections, put all Chunks into them, assign - unique, non-overlapping addresses and file offsets to them, and then - write them down to a file. - -* Driver - - The linking process is drived by the driver. The driver - - - processes command line options, - - creates a symbol table, - - creates an InputFile for each input file and put all symbols in it - into the symbol table, - - checks if there's no remaining undefined symbols, - - creates a writer, - - and passes the symbol table to the writer to write the result to a - file. - -Performance ------------ - -It's generally 2x faster than MSVC link.exe. It takes 3.5 seconds to -self-host on my Xeon 2580 machine. MSVC linker takes 7.0 seconds to -link the same executable. The resulting output is 65MB. -The old LLD is buggy that it produces 120MB executable for some reason, -and it takes 30 seconds to do that. - -We believe the performance difference comes from simplification and -optimizations we made to the new port. Notable differences are listed -below. - -* Reduced number of relocation table reads - - In the old design, relocation tables are read from beginning to - construct graphs because they consist of graph edges. In the new - design, they are not read until we actually apply relocations. - - This simplification has two benefits. One is that we don't create - additional objects for relocations but instead consume relocation - tables directly. The other is that it reduces number of relocation - entries we have to read, because we won't read relocations for - dead-stripped COMDAT sections. Large C++ programs tend to consist of - lots of COMDAT sections. In the old design, the time to process - relocation table is linear to size of input. In this new model, it's - linear to size of output. - -* Reduced number of symbol table lookup - - Symbol table lookup can be a heavy operation because number of - symbols can be very large and each symbol name can be very long - (think of C++ mangled symbols -- time to compute a hash value for a - string is linear to the length.) - - We look up the symbol table exactly only once for each symbol in the - new design. This is I believe the minimum possible number. This is - achieved by the separation of Symbol and SymbolBody. Once you get a - pointer to a Symbol by looking up the symbol table, you can always - get the latest symbol resolution result by just dereferencing a - pointer. (I'm not sure if the idea is new to the linker. At least, - all other linkers I've investigated so far seem to look up hash - tables or sets more than once for each new symbol, but I may be - wrong.) - -* Reduced number of file visits - - The symbol table implements the Windows linker semantics. We treat - the symbol table as a bucket of all known symbols, including symbols - in archive file headers. We put all symbols into one bucket as we - visit new files. That means we visit each file only once. - - This is different from the Unix linker semantics, in which we only - keep undefined symbols and visit each file one by one until we - resolve all undefined symbols. In the Unix model, we have to visit - archive files many times if there are circular dependencies between - archives. - -* Avoiding creating additional objects or copying data - - The data structures described in the previous section are all thin - wrappers for classes that LLVM libObject provides. We avoid copying - data from libObject's objects to our objects. We read much less data - than before. For example, we don't read symbol values until we apply - relocations because these values are not relevant to symbol - resolution. Again, COMDAT symbols may be discarded during symbol - resolution, so reading their attributes too early could result in a - waste. We use underlying objects directly where doing so makes - sense. - -Parallelism ------------ - -The abovementioned data structures are also chosen with -multi-threading in mind. It should relatively be easy to make the -symbol table a concurrent hash map, so that we let multiple workers -work on symbol table concurrently. Symbol resolution in this design is -a single pointer mutation, which allows the resolver work concurrently -in a lock-free manner using atomic pointer compare-and-swap. - -It should also be easy to apply relocations and write chunks concurrently. - -We created an experimental multi-threaded linker using the Microsoft -ConcRT concurrency library, and it was able to link itself in 0.5 -seconds, so we think the design is promising. - -Link-Time Optimization ----------------------- - -LTO is implemented by handling LLVM bitcode files as object files. -The linker resolves symbols in bitcode files normally. If all symbols -are successfully resolved, it then calls an LLVM libLTO function -with all bitcode files to convert them to one big regular COFF file. -Finally, the linker replaces bitcode symbols with COFF symbols, -so that we can link the input files as if they were in the native -format from the beginning. - -The details are described in this document. -http://llvm.org/docs/LinkTimeOptimization.html - -Glossary --------- - -* RVA - - Short for Relative Virtual Address. - - Windows executables or DLLs are not position-independent; they are - linked against a fixed address called an image base. RVAs are - offsets from an image base. - - Default image bases are 0x140000000 for executables and 0x18000000 - for DLLs. For example, when we are creating an executable, we assume - that the executable will be loaded at address 0x140000000 by the - loader, so we apply relocations accordingly. Result texts and data - will contain raw absolute addresses. - -* VA - - Short for Virtual Address. Equivalent to RVA + image base. It is - rarely used. We almost always use RVAs instead. - -* Base relocations - - Relocation information for the loader. If the loader decides to map - an executable or a DLL to a different address than their image - bases, it fixes up binaries using information contained in the base - relocation table. A base relocation table consists of a list of - locations containing addresses. The loader adds a difference between - RVA and actual load address to all locations listed there. - - Note that this run-time relocation mechanism is much simpler than ELF. - There's no PLT or GOT. Images are relocated as a whole just - by shifting entire images in memory by some offsets. Although doing - this breaks text sharing, I think this mechanism is not actually bad - on today's computers. - -* ICF - - Short for Identical COMDAT Folding. - - ICF is an optimization to reduce output size by merging COMDAT sections - by not only their names but by their contents. If two COMDAT sections - happen to have the same metadata, actual contents and relocations, - they are merged by ICF. It is known as an effective technique, - and it usually reduces C++ program's size by a few percent or more. - - Note that this is not entirely sound optimization. C/C++ require - different functions have different addresses. If a program depends on - that property, it would fail at runtime. However, that's not really an - issue on Windows because MSVC link.exe enabled the optimization by - default. As long as your program works with the linker's default - settings, your program should be safe with ICF. +See docs/NewLLD.rst diff --git a/COFF/SymbolTable.cpp b/COFF/SymbolTable.cpp index 5b7b89cd360a..df9da4c36650 100644 --- a/COFF/SymbolTable.cpp +++ b/COFF/SymbolTable.cpp @@ -14,7 +14,7 @@ #include "Symbols.h" #include "lld/Core/Parallel.h" #include "llvm/IR/LLVMContext.h" -#include "llvm/LTO/LTOCodeGenerator.h" +#include "llvm/LTO/legacy/LTOCodeGenerator.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include <utility> @@ -164,7 +164,7 @@ void SymbolTable::reportRemainingUndefines(bool Resolve) { llvm::errs() << File->getShortName() << ": undefined symbol: " << Sym->getName() << "\n"; if (!Config->Force) - error("Link failed"); + fatal("link failed"); } void SymbolTable::addLazy(Lazy *New, std::vector<Symbol *> *Accum) { @@ -211,7 +211,7 @@ void SymbolTable::addSymbol(SymbolBody *New) { // equivalent (conflicting), or more preferable, respectively. int Comp = Existing->compare(New); if (Comp == 0) - error(Twine("duplicate symbol: ") + Existing->getDebugName() + " and " + + fatal("duplicate symbol: " + Existing->getDebugName() + " and " + New->getDebugName()); if (Comp < 0) Sym->Body = New; @@ -338,21 +338,25 @@ void SymbolTable::addCombinedLTOObject(ObjectFile *Obj) { // diagnose them later in reportRemainingUndefines(). StringRef Name = Body->getName(); Symbol *Sym = insert(Body); + SymbolBody *Existing = Sym->Body; + + if (Existing == Body) + continue; - if (isa<DefinedBitcode>(Sym->Body)) { + if (isa<DefinedBitcode>(Existing)) { Sym->Body = Body; continue; } - if (auto *L = dyn_cast<Lazy>(Sym->Body)) { + if (auto *L = dyn_cast<Lazy>(Existing)) { // We may see new references to runtime library symbols such as __chkstk // here. These symbols must be wholly defined in non-bitcode files. addMemberFile(L); continue; } - SymbolBody *Existing = Sym->Body; + int Comp = Existing->compare(Body); if (Comp == 0) - error(Twine("LTO: unexpected duplicate symbol: ") + Name); + fatal("LTO: unexpected duplicate symbol: " + Name); if (Comp < 0) Sym->Body = Body; } @@ -369,7 +373,7 @@ void SymbolTable::addCombinedLTOObjects() { // Create an object file and add it to the symbol table by replacing any // DefinedBitcode symbols with the definitions in the object file. - LTOCodeGenerator CG(getGlobalContext()); + LTOCodeGenerator CG(BitcodeFile::Context); CG.setOptLevel(Config->LTOOptLevel); std::vector<ObjectFile *> Objs = createLTOObjects(&CG); @@ -379,7 +383,7 @@ void SymbolTable::addCombinedLTOObjects() { size_t NumBitcodeFiles = BitcodeFiles.size(); run(); if (BitcodeFiles.size() != NumBitcodeFiles) - error("LTO: late loaded symbol created new bitcode reference"); + fatal("LTO: late loaded symbol created new bitcode reference"); } // Combine and compile bitcode files and then return the result @@ -414,24 +418,23 @@ std::vector<ObjectFile *> SymbolTable::createLTOObjects(LTOCodeGenerator *CG) { DisableVerify = false; #endif if (!CG->optimize(DisableVerify, false, false, false)) - error(""); // optimize() should have emitted any error message. + fatal(""); // optimize() should have emitted any error message. Objs.resize(Config->LTOJobs); // Use std::list to avoid invalidation of pointers in OSPtrs. std::list<raw_svector_ostream> OSs; std::vector<raw_pwrite_stream *> OSPtrs; - for (SmallVector<char, 0> &Obj : Objs) { + for (SmallString<0> &Obj : Objs) { OSs.emplace_back(Obj); OSPtrs.push_back(&OSs.back()); } if (!CG->compileOptimized(OSPtrs)) - error(""); // compileOptimized() should have emitted any error message. + fatal(""); // compileOptimized() should have emitted any error message. std::vector<ObjectFile *> ObjFiles; - for (SmallVector<char, 0> &Obj : Objs) { - auto *ObjFile = new ObjectFile( - MemoryBufferRef(StringRef(Obj.data(), Obj.size()), "<LTO object>")); + for (SmallString<0> &Obj : Objs) { + auto *ObjFile = new ObjectFile(MemoryBufferRef(Obj, "<LTO object>")); Files.emplace_back(ObjFile); ObjectFiles.push_back(ObjFile); ObjFile->parse(); diff --git a/COFF/SymbolTable.h b/COFF/SymbolTable.h index ce305bfa8743..8bf4387cdfff 100644 --- a/COFF/SymbolTable.h +++ b/COFF/SymbolTable.h @@ -115,7 +115,7 @@ private: std::vector<std::future<InputFile *>> ObjectQueue; std::vector<BitcodeFile *> BitcodeFiles; - std::vector<SmallVector<char, 0>> Objs; + std::vector<SmallString<0>> Objs; llvm::BumpPtrAllocator Alloc; }; diff --git a/COFF/Symbols.cpp b/COFF/Symbols.cpp index d732d76cfb06..6e2db6631ce7 100644 --- a/COFF/Symbols.cpp +++ b/COFF/Symbols.cpp @@ -162,32 +162,6 @@ std::string SymbolBody::getDebugName() { return N; } -uint64_t Defined::getFileOff() { - switch (kind()) { - case DefinedImportDataKind: - return cast<DefinedImportData>(this)->getFileOff(); - case DefinedImportThunkKind: - return cast<DefinedImportThunk>(this)->getFileOff(); - case DefinedLocalImportKind: - return cast<DefinedLocalImport>(this)->getFileOff(); - case DefinedCommonKind: - return cast<DefinedCommon>(this)->getFileOff(); - case DefinedRegularKind: - return cast<DefinedRegular>(this)->getFileOff(); - - case DefinedBitcodeKind: - llvm_unreachable("There is no file offset for a bitcode symbol."); - case DefinedAbsoluteKind: - llvm_unreachable("Cannot get a file offset for an absolute symbol."); - case DefinedRelativeKind: - llvm_unreachable("Cannot get a file offset for a relative symbol."); - case LazyKind: - case UndefinedKind: - llvm_unreachable("Cannot get a file offset for an undefined symbol."); - } - llvm_unreachable("unknown symbol kind"); -} - COFFSymbolRef DefinedCOFF::getCOFFSymbol() { size_t SymSize = File->getCOFFObj()->getSymbolTableEntrySize(); if (SymSize == sizeof(coff_symbol16)) @@ -225,7 +199,7 @@ std::unique_ptr<InputFile> Lazy::getMember() { else if (Magic == file_magic::bitcode) Obj.reset(new BitcodeFile(MBRef)); else - error(Twine(File->getName()) + ": unknown file type"); + fatal("unknown file type: " + File->getName()); Obj->setParentName(File->getName()); return Obj; diff --git a/COFF/Symbols.h b/COFF/Symbols.h index 7059fbc8bb15..f96c1fb3cc1d 100644 --- a/COFF/Symbols.h +++ b/COFF/Symbols.h @@ -125,10 +125,6 @@ public: // writer sets and uses RVAs. uint64_t getRVA(); - // Returns the file offset of this symbol in the final executable. - // The writer uses this information to apply relocations. - uint64_t getFileOff(); - // Returns the RVA relative to the beginning of the output section. // Used to implement SECREL relocation type. uint64_t getSecrel(); diff --git a/COFF/Writer.cpp b/COFF/Writer.cpp index 5575c8d6b320..d8077df95701 100644 --- a/COFF/Writer.cpp +++ b/COFF/Writer.cpp @@ -59,6 +59,7 @@ private: void openFile(StringRef OutputPath); template <typename PEHeaderTy> void writeHeader(); void fixSafeSEHSymbols(); + void setSectionPermissions(); void writeSections(); void sortExceptionTable(); void applyRelocations(); @@ -114,6 +115,7 @@ public: StringRef getName() { return Name; } std::vector<Chunk *> &getChunks() { return Chunks; } void addPermissions(uint32_t C); + void setPermissions(uint32_t C); uint32_t getPermissions() { return Header.Characteristics & PermMask; } uint32_t getCharacteristics() { return Header.Characteristics; } uint64_t getRVA() { return Header.VirtualAddress; } @@ -163,19 +165,23 @@ void OutputSection::addChunk(Chunk *C) { Chunks.push_back(C); C->setOutputSection(this); uint64_t Off = Header.VirtualSize; - Off = align(Off, C->getAlign()); + Off = alignTo(Off, C->getAlign()); C->setRVA(Off); C->setOutputSectionOff(Off); Off += C->getSize(); Header.VirtualSize = Off; if (C->hasData()) - Header.SizeOfRawData = align(Off, SectorSize); + Header.SizeOfRawData = alignTo(Off, SectorSize); } void OutputSection::addPermissions(uint32_t C) { Header.Characteristics |= C & PermMask; } +void OutputSection::setPermissions(uint32_t C) { + Header.Characteristics = C & PermMask; +} + // Write the section header to a given buffer. void OutputSection::writeHeaderTo(uint8_t *Buf) { auto *Hdr = reinterpret_cast<coff_section *>(Buf); @@ -193,13 +199,13 @@ void OutputSection::writeHeaderTo(uint8_t *Buf) { uint64_t Defined::getSecrel() { if (auto *D = dyn_cast<DefinedRegular>(this)) return getRVA() - D->getChunk()->getOutputSection()->getRVA(); - error("SECREL relocation points to a non-regular symbol"); + fatal("SECREL relocation points to a non-regular symbol"); } uint64_t Defined::getSectionIndex() { if (auto *D = dyn_cast<DefinedRegular>(this)) return D->getChunk()->getOutputSection()->SectionIndex; - error("SECTION relocation points to a non-regular symbol"); + fatal("SECTION relocation points to a non-regular symbol"); } bool Defined::isExecutable() { @@ -222,6 +228,7 @@ void Writer::run() { createSection(".reloc"); assignAddresses(); removeEmptySections(); + setSectionPermissions(); createSymbolAndStringTable(); openFile(Config->OutputFile); if (Config->is64()) { @@ -232,7 +239,8 @@ void Writer::run() { fixSafeSEHSymbols(); writeSections(); sortExceptionTable(); - error(Buffer->commit(), "Failed to write the output file"); + if (auto EC = Buffer->commit()) + fatal(EC, "failed to write the output file"); } static StringRef getOutputSection(StringRef Name) { @@ -447,15 +455,15 @@ void Writer::createSymbolAndStringTable() { OutputSection *LastSection = OutputSections.back(); // We position the symbol table to be adjacent to the end of the last section. - uint64_t FileOff = - LastSection->getFileOff() + align(LastSection->getRawSize(), SectorSize); + uint64_t FileOff = LastSection->getFileOff() + + alignTo(LastSection->getRawSize(), SectorSize); if (!OutputSymtab.empty()) { PointerToSymbolTable = FileOff; FileOff += OutputSymtab.size() * sizeof(coff_symbol16); } if (!Strtab.empty()) FileOff += Strtab.size() + 4; - FileSize = align(FileOff, SectorSize); + FileSize = alignTo(FileOff, SectorSize); } // Visits all sections to assign incremental, non-overlapping RVAs and @@ -466,7 +474,7 @@ void Writer::assignAddresses() { sizeof(coff_section) * OutputSections.size(); SizeOfHeaders += Config->is64() ? sizeof(pe32plus_header) : sizeof(pe32_header); - SizeOfHeaders = align(SizeOfHeaders, SectorSize); + SizeOfHeaders = alignTo(SizeOfHeaders, SectorSize); uint64_t RVA = 0x1000; // The first page is kept unmapped. FileSize = SizeOfHeaders; // Move DISCARDABLE (or non-memory-mapped) sections to the end of file because @@ -480,10 +488,10 @@ void Writer::assignAddresses() { addBaserels(Sec); Sec->setRVA(RVA); Sec->setFileOffset(FileSize); - RVA += align(Sec->getVirtualSize(), PageSize); - FileSize += align(Sec->getRawSize(), SectorSize); + RVA += alignTo(Sec->getVirtualSize(), PageSize); + FileSize += alignTo(Sec->getRawSize(), SectorSize); } - SizeOfImage = SizeOfHeaders + align(RVA - 0x1000, PageSize); + SizeOfImage = SizeOfHeaders + alignTo(RVA - 0x1000, PageSize); } template <typename PEHeaderTy> void Writer::writeHeader() { @@ -596,13 +604,26 @@ template <typename PEHeaderTy> void Writer::writeHeader() { if (Symbol *Sym = Symtab->findUnderscore("_tls_used")) { if (Defined *B = dyn_cast<Defined>(Sym->Body)) { Dir[TLS_TABLE].RelativeVirtualAddress = B->getRVA(); - Dir[TLS_TABLE].Size = 40; + Dir[TLS_TABLE].Size = Config->is64() + ? sizeof(object::coff_tls_directory64) + : sizeof(object::coff_tls_directory32); } } if (Symbol *Sym = Symtab->findUnderscore("_load_config_used")) { - if (Defined *B = dyn_cast<Defined>(Sym->Body)) { + if (auto *B = dyn_cast<DefinedRegular>(Sym->Body)) { + SectionChunk *SC = B->getChunk(); + assert(B->getRVA() >= SC->getRVA()); + uint64_t OffsetInChunk = B->getRVA() - SC->getRVA(); + if (!SC->hasData() || OffsetInChunk + 4 > SC->getSize()) + fatal("_load_config_used is malformed"); + + ArrayRef<uint8_t> SecContents = SC->getContents(); + uint32_t LoadConfigSize = + *reinterpret_cast<const ulittle32_t *>(&SecContents[OffsetInChunk]); + if (OffsetInChunk + LoadConfigSize > SC->getSize()) + fatal("_load_config_used is too large"); Dir[LOAD_CONFIG_TABLE].RelativeVirtualAddress = B->getRVA(); - Dir[LOAD_CONFIG_TABLE].Size = Config->is64() ? 112 : 64; + Dir[LOAD_CONFIG_TABLE].Size = LoadConfigSize; } } @@ -626,14 +647,14 @@ template <typename PEHeaderTy> void Writer::writeHeader() { // The first 4 bytes is length including itself. Buf = reinterpret_cast<uint8_t *>(&SymbolTable[NumberOfSymbols]); write32le(Buf, Strtab.size() + 4); - memcpy(Buf + 4, Strtab.data(), Strtab.size()); + if (!Strtab.empty()) + memcpy(Buf + 4, Strtab.data(), Strtab.size()); } void Writer::openFile(StringRef Path) { - ErrorOr<std::unique_ptr<FileOutputBuffer>> BufferOrErr = - FileOutputBuffer::create(Path, FileSize, FileOutputBuffer::F_executable); - error(BufferOrErr, Twine("failed to open ") + Path); - Buffer = std::move(*BufferOrErr); + Buffer = check( + FileOutputBuffer::create(Path, FileSize, FileOutputBuffer::F_executable), + "failed to open " + Path); } void Writer::fixSafeSEHSymbols() { @@ -643,6 +664,17 @@ void Writer::fixSafeSEHSymbols() { Config->SEHCount->setVA(SEHTable->getSize() / 4); } +// Handles /section options to allow users to overwrite +// section attributes. +void Writer::setSectionPermissions() { + for (auto &P : Config->Section) { + StringRef Name = P.first; + uint32_t Perm = P.second; + if (auto *Sec = findSection(Name)) + Sec->setPermissions(Perm); + } +} + // Write section contents to a mmap'ed file. void Writer::writeSections() { uint8_t *Buf = Buffer->getBufferStart(); diff --git a/ELF/CMakeLists.txt b/ELF/CMakeLists.txt index 3dcb65ff8957..a1b65adc7400 100644 --- a/ELF/CMakeLists.txt +++ b/ELF/CMakeLists.txt @@ -2,25 +2,49 @@ set(LLVM_TARGET_DEFINITIONS Options.td) tablegen(LLVM Options.inc -gen-opt-parser-defs) add_public_tablegen_target(ELFOptionsTableGen) -add_lld_library(lldELF2 +add_lld_library(lldELF Driver.cpp DriverUtils.cpp + EhFrame.cpp Error.cpp + ICF.cpp InputFiles.cpp InputSection.cpp + LTO.cpp LinkerScript.cpp MarkLive.cpp OutputSections.cpp + Relocations.cpp + ScriptParser.cpp + Strings.cpp + SymbolListFile.cpp SymbolTable.cpp Symbols.cpp Target.cpp + Thunks.cpp Writer.cpp LINK_COMPONENTS + ${LLVM_TARGETS_TO_BUILD} + Analysis + BitReader + BitWriter + Codegen + Core + IPO + Linker + LTO Object Option + Passes MC Support + Target + TransformUtils + + LINK_LIBS + lldConfig + ${PTHREAD_LIB} ) -add_dependencies(lldELF2 ELFOptionsTableGen) +add_dependencies(lldELF intrinsics_gen ELFOptionsTableGen) diff --git a/ELF/Config.h b/ELF/Config.h index c279b99b43c1..2ccd95e88775 100644 --- a/ELF/Config.h +++ b/ELF/Config.h @@ -17,10 +17,10 @@ #include <vector> namespace lld { -namespace elf2 { +namespace elf { class InputFile; -class SymbolBody; +struct Symbol; enum ELFKind { ELFNoneKind, @@ -30,60 +30,105 @@ enum ELFKind { ELF64BEKind }; +enum class BuildIdKind { None, Fnv1, Md5, Sha1, Hexstring }; + +enum class UnresolvedPolicy { NoUndef, Error, Warn, Ignore }; + +struct SymbolVersion { + llvm::StringRef Name; + bool IsExternCpp; +}; + +// This struct contains symbols version definition that +// can be found in version script if it is used for link. +struct VersionDefinition { + VersionDefinition(llvm::StringRef Name, size_t Id) : Name(Name), Id(Id) {} + llvm::StringRef Name; + size_t Id; + std::vector<SymbolVersion> Globals; + size_t NameOff; // Offset in string table. +}; + // This struct contains the global configuration for the linker. // Most fields are direct mapping from the command line options // and such fields have the same name as the corresponding options. // Most fields are initialized by the driver. struct Configuration { - SymbolBody *EntrySym = nullptr; - SymbolBody *MipsGpDisp = nullptr; + Symbol *EntrySym = nullptr; InputFile *FirstElf = nullptr; llvm::StringRef DynamicLinker; llvm::StringRef Entry; llvm::StringRef Emulation; llvm::StringRef Fini; llvm::StringRef Init; + llvm::StringRef LtoAAPipeline; + llvm::StringRef LtoNewPmPasses; llvm::StringRef OutputFile; llvm::StringRef SoName; llvm::StringRef Sysroot; std::string RPath; - llvm::MapVector<llvm::StringRef, std::vector<llvm::StringRef>> OutputSections; + std::vector<VersionDefinition> VersionDefinitions; + std::vector<llvm::StringRef> DynamicList; std::vector<llvm::StringRef> SearchPaths; std::vector<llvm::StringRef> Undefined; + std::vector<SymbolVersion> VersionScriptGlobals; + std::vector<uint8_t> BuildIdVector; bool AllowMultipleDefinition; bool AsNeeded = false; bool Bsymbolic; + bool BsymbolicFunctions; + bool Demangle = true; + bool DisableVerify; bool DiscardAll; bool DiscardLocals; bool DiscardNone; + bool EhFrameHdr; bool EnableNewDtags; bool ExportDynamic; + bool FatalWarnings; bool GcSections; bool GnuHash = false; + bool ICF; bool Mips64EL = false; - bool NoInhibitExec; - bool NoUndefined; + bool NoGnuUnique; + bool NoUndefinedVersion; + bool Pic; + bool Pie; bool PrintGcSections; + bool Rela; + bool Relocatable; + bool SaveTemps; bool Shared; bool Static = false; bool StripAll; + bool StripDebug; bool SysvHash = true; + bool Threads; + bool Trace; bool Verbose; + bool WarnCommon; + bool ZCombreloc; bool ZExecStack; bool ZNodelete; bool ZNow; bool ZOrigin; bool ZRelro; + UnresolvedPolicy UnresolvedSymbols; + BuildIdKind BuildId = BuildIdKind::None; ELFKind EKind = ELFNoneKind; + uint16_t DefaultSymbolVersion = llvm::ELF::VER_NDX_GLOBAL; uint16_t EMachine = llvm::ELF::EM_NONE; uint64_t EntryAddr = -1; - unsigned Optimize = 0; + uint64_t ImageBase; + unsigned LtoJobs; + unsigned LtoO; + unsigned Optimize; }; // The only instance of Configuration struct. extern Configuration *Config; -} // namespace elf2 +} // namespace elf } // namespace lld #endif diff --git a/ELF/Driver.cpp b/ELF/Driver.cpp index f00d97851e4a..c6ca2639236f 100644 --- a/ELF/Driver.cpp +++ b/ELF/Driver.cpp @@ -10,119 +10,218 @@ #include "Driver.h" #include "Config.h" #include "Error.h" +#include "ICF.h" #include "InputFiles.h" +#include "InputSection.h" +#include "LinkerScript.h" +#include "Strings.h" +#include "SymbolListFile.h" #include "SymbolTable.h" #include "Target.h" #include "Writer.h" -#include "llvm/ADT/STLExtras.h" +#include "lld/Driver/Driver.h" #include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/Support/TargetSelect.h" #include "llvm/Support/raw_ostream.h" +#include <cstdlib> #include <utility> using namespace llvm; using namespace llvm::ELF; using namespace llvm::object; +using namespace llvm::sys; using namespace lld; -using namespace lld::elf2; +using namespace lld::elf; -Configuration *elf2::Config; -LinkerDriver *elf2::Driver; +Configuration *elf::Config; +LinkerDriver *elf::Driver; + +bool elf::link(ArrayRef<const char *> Args, raw_ostream &Error) { + HasError = false; + ErrorOS = &Error; -void elf2::link(ArrayRef<const char *> Args) { Configuration C; LinkerDriver D; + ScriptConfiguration SC; Config = &C; Driver = &D; - Driver->main(Args.slice(1)); + ScriptConfig = &SC; + + Driver->main(Args); + return !HasError; } +// Parses a linker -m option. static std::pair<ELFKind, uint16_t> parseEmulation(StringRef S) { - if (S == "elf32btsmip") - return {ELF32BEKind, EM_MIPS}; - if (S == "elf32ltsmip") - return {ELF32LEKind, EM_MIPS}; - if (S == "elf32ppc" || S == "elf32ppc_fbsd") - return {ELF32BEKind, EM_PPC}; - if (S == "elf64ppc" || S == "elf64ppc_fbsd") - return {ELF64BEKind, EM_PPC64}; - if (S == "elf_i386") - return {ELF32LEKind, EM_386}; - if (S == "elf_x86_64") - return {ELF64LEKind, EM_X86_64}; - if (S == "aarch64linux") - return {ELF64LEKind, EM_AARCH64}; - if (S == "i386pe" || S == "i386pep" || S == "thumb2pe") - error("Windows targets are not supported on the ELF frontend: " + S); - error("Unknown emulation: " + S); + if (S.endswith("_fbsd")) + S = S.drop_back(5); + + std::pair<ELFKind, uint16_t> Ret = + StringSwitch<std::pair<ELFKind, uint16_t>>(S) + .Case("aarch64linux", {ELF64LEKind, EM_AARCH64}) + .Case("armelf_linux_eabi", {ELF32LEKind, EM_ARM}) + .Case("elf32_x86_64", {ELF32LEKind, EM_X86_64}) + .Case("elf32btsmip", {ELF32BEKind, EM_MIPS}) + .Case("elf32ltsmip", {ELF32LEKind, EM_MIPS}) + .Case("elf32ppc", {ELF32BEKind, EM_PPC}) + .Case("elf64btsmip", {ELF64BEKind, EM_MIPS}) + .Case("elf64ltsmip", {ELF64LEKind, EM_MIPS}) + .Case("elf64ppc", {ELF64BEKind, EM_PPC64}) + .Case("elf_i386", {ELF32LEKind, EM_386}) + .Case("elf_x86_64", {ELF64LEKind, EM_X86_64}) + .Default({ELFNoneKind, EM_NONE}); + + if (Ret.first == ELFNoneKind) { + if (S == "i386pe" || S == "i386pep" || S == "thumb2pe") + error("Windows targets are not supported on the ELF frontend: " + S); + else + error("unknown emulation: " + S); + } + return Ret; } // Returns slices of MB by parsing MB as an archive file. // Each slice consists of a member file in the archive. -static std::vector<MemoryBufferRef> getArchiveMembers(MemoryBufferRef MB) { - ErrorOr<std::unique_ptr<Archive>> FileOrErr = Archive::create(MB); - error(FileOrErr, "Failed to parse archive"); - std::unique_ptr<Archive> File = std::move(*FileOrErr); +std::vector<MemoryBufferRef> +LinkerDriver::getArchiveMembers(MemoryBufferRef MB) { + std::unique_ptr<Archive> File = + check(Archive::create(MB), "failed to parse archive"); std::vector<MemoryBufferRef> V; - for (const ErrorOr<Archive::Child> &C : File->children()) { - error(C, "Could not get the child of the archive " + File->getFileName()); - ErrorOr<MemoryBufferRef> MbOrErr = C->getMemoryBufferRef(); - error(MbOrErr, "Could not get the buffer for a child of the archive " + - File->getFileName()); - V.push_back(*MbOrErr); + Error Err; + for (const ErrorOr<Archive::Child> &COrErr : File->children(Err)) { + Archive::Child C = check(COrErr, "could not get the child of the archive " + + File->getFileName()); + MemoryBufferRef MBRef = + check(C.getMemoryBufferRef(), + "could not get the buffer for a child of the archive " + + File->getFileName()); + V.push_back(MBRef); } + if (Err) + Error(Err); + + // Take ownership of memory buffers created for members of thin archives. + for (std::unique_ptr<MemoryBuffer> &MB : File->takeThinBuffers()) + OwningMBs.push_back(std::move(MB)); + return V; } // Opens and parses a file. Path has to be resolved already. // Newly created memory buffers are owned by this driver. void LinkerDriver::addFile(StringRef Path) { - using namespace llvm::sys::fs; + using namespace sys::fs; if (Config->Verbose) - llvm::outs() << Path << "\n"; - auto MBOrErr = MemoryBuffer::getFile(Path); - error(MBOrErr, "cannot open " + Path); - std::unique_ptr<MemoryBuffer> &MB = *MBOrErr; - MemoryBufferRef MBRef = MB->getMemBufferRef(); - OwningMBs.push_back(std::move(MB)); // take MB ownership + outs() << Path << "\n"; + + Optional<MemoryBufferRef> Buffer = readFile(Path); + if (!Buffer.hasValue()) + return; + MemoryBufferRef MBRef = *Buffer; switch (identify_magic(MBRef.getBuffer())) { case file_magic::unknown: - readLinkerScript(&Alloc, MBRef); + readLinkerScript(MBRef); return; case file_magic::archive: if (WholeArchive) { for (MemoryBufferRef MB : getArchiveMembers(MBRef)) - Files.push_back(createObjectFile(MB)); + Files.push_back(createObjectFile(MB, Path)); return; } Files.push_back(make_unique<ArchiveFile>(MBRef)); return; case file_magic::elf_shared_object: + if (Config->Relocatable) { + error("attempted static link of dynamic object " + Path); + return; + } Files.push_back(createSharedFile(MBRef)); return; default: - Files.push_back(createObjectFile(MBRef)); + if (InLib) + Files.push_back(make_unique<LazyObjectFile>(MBRef)); + else + Files.push_back(createObjectFile(MBRef)); + } +} + +Optional<MemoryBufferRef> LinkerDriver::readFile(StringRef Path) { + auto MBOrErr = MemoryBuffer::getFile(Path); + if (auto EC = MBOrErr.getError()) { + error(EC, "cannot open " + Path); + return None; } + std::unique_ptr<MemoryBuffer> &MB = *MBOrErr; + MemoryBufferRef MBRef = MB->getMemBufferRef(); + OwningMBs.push_back(std::move(MB)); // take MB ownership + + if (Cpio) + Cpio->append(relativeToRoot(Path), MBRef.getBuffer()); + + return MBRef; +} + +// Add a given library by searching it from input search paths. +void LinkerDriver::addLibrary(StringRef Name) { + std::string Path = searchLibrary(Name); + if (Path.empty()) + error("unable to find library -l" + Name); + else + addFile(Path); +} + +// This function is called on startup. We need this for LTO since +// LTO calls LLVM functions to compile bitcode files to native code. +// Technically this can be delayed until we read bitcode files, but +// we don't bother to do lazily because the initialization is fast. +static void initLLVM(opt::InputArgList &Args) { + InitializeAllTargets(); + InitializeAllTargetMCs(); + InitializeAllAsmPrinters(); + InitializeAllAsmParsers(); + + // This is a flag to discard all but GlobalValue names. + // We want to enable it by default because it saves memory. + // Disable it only when a developer option (-save-temps) is given. + Driver->Context.setDiscardValueNames(!Config->SaveTemps); + Driver->Context.enableDebugTypeODRUniquing(); + + // Parse and evaluate -mllvm options. + std::vector<const char *> V; + V.push_back("lld (LLVM option parsing)"); + for (auto *Arg : Args.filtered(OPT_mllvm)) + V.push_back(Arg->getValue()); + cl::ParseCommandLineOptions(V.size(), V.data()); } // Some command line options or some combinations of them are not allowed. // This function checks for such errors. static void checkOptions(opt::InputArgList &Args) { - // Traditional linkers can generate re-linkable object files instead - // of executables or DSOs. We don't support that since the feature - // does not seem to provide more value than the static archiver. - if (Args.hasArg(OPT_relocatable)) - error("-r option is not supported. Use 'ar' command instead."); - // The MIPS ABI as of 2016 does not support the GNU-style symbol lookup // table which is a relatively new feature. if (Config->EMachine == EM_MIPS && Config->GnuHash) - error("The .gnu.hash section is not compatible with the MIPS target."); + error("the .gnu.hash section is not compatible with the MIPS target."); if (Config->EMachine == EM_AMDGPU && !Config->Entry.empty()) error("-e option is not valid for AMDGPU."); + + if (Config->Pie && Config->Shared) + error("-shared and -pie may not be used together"); + + if (Config->Relocatable) { + if (Config->Shared) + error("-r and -shared may not be used together"); + if (Config->GcSections) + error("-r and --gc-sections may not be used together"); + if (Config->ICF) + error("-r and --icf may not be used together"); + if (Config->Pie) + error("-r and -pie may not be used together"); + } } static StringRef @@ -132,6 +231,22 @@ getString(opt::InputArgList &Args, unsigned Key, StringRef Default = "") { return Default; } +static int getInteger(opt::InputArgList &Args, unsigned Key, int Default) { + int V = Default; + if (auto *Arg = Args.getLastArg(Key)) { + StringRef S = Arg->getValue(); + if (S.getAsInteger(10, V)) + error(Arg->getSpelling() + ": number expected, but got " + S); + } + return V; +} + +static const char *getReproduceOption(opt::InputArgList &Args) { + if (auto *Arg = Args.getLastArg(OPT_reproduce)) + return Arg->getValue(); + return getenv("LLD_REPRODUCE"); +} + static bool hasZOption(opt::InputArgList &Args, StringRef Key) { for (auto *Arg : Args.filtered(OPT_z)) if (Key == Arg->getValue()) @@ -140,12 +255,33 @@ static bool hasZOption(opt::InputArgList &Args, StringRef Key) { } void LinkerDriver::main(ArrayRef<const char *> ArgsArr) { - initSymbols(); + ELFOptTable Parser; + opt::InputArgList Args = Parser.parse(ArgsArr.slice(1)); + if (Args.hasArg(OPT_help)) { + printHelp(ArgsArr[0]); + return; + } + if (Args.hasArg(OPT_version)) { + outs() << getVersionString(); + return; + } + + if (const char *Path = getReproduceOption(Args)) { + // Note that --reproduce is a debug option so you can ignore it + // if you are trying to understand the whole picture of the code. + Cpio.reset(CpioFile::create(Path)); + if (Cpio) { + Cpio->append("response.txt", createResponseFile(Args)); + Cpio->append("version.txt", getVersionString()); + } + } - opt::InputArgList Args = parseArgs(&Alloc, ArgsArr); readConfigs(Args); + initLLVM(Args); createFiles(Args); checkOptions(Args); + if (HasError) + return; switch (Config->EKind) { case ELF32LEKind: @@ -165,6 +301,25 @@ void LinkerDriver::main(ArrayRef<const char *> ArgsArr) { } } +static UnresolvedPolicy getUnresolvedSymbolOption(opt::InputArgList &Args) { + if (Args.hasArg(OPT_noinhibit_exec)) + return UnresolvedPolicy::Warn; + if (Args.hasArg(OPT_no_undefined) || hasZOption(Args, "defs")) + return UnresolvedPolicy::NoUndef; + if (Config->Relocatable) + return UnresolvedPolicy::Ignore; + + if (auto *Arg = Args.getLastArg(OPT_unresolved_symbols)) { + StringRef S = Arg->getValue(); + if (S == "ignore-all" || S == "ignore-in-object-files") + return UnresolvedPolicy::Ignore; + if (S == "ignore-in-shared-libs" || S == "report-all") + return UnresolvedPolicy::Error; + error("unknown --unresolved-symbols value: " + S); + } + return UnresolvedPolicy::Error; +} + // Initializes Config members by the command line options. void LinkerDriver::readConfigs(opt::InputArgList &Args) { for (auto *Arg : Args.filtered(OPT_L)) @@ -185,38 +340,66 @@ void LinkerDriver::readConfigs(opt::InputArgList &Args) { Config->AllowMultipleDefinition = Args.hasArg(OPT_allow_multiple_definition); Config->Bsymbolic = Args.hasArg(OPT_Bsymbolic); + Config->BsymbolicFunctions = Args.hasArg(OPT_Bsymbolic_functions); + Config->Demangle = !Args.hasArg(OPT_no_demangle); + Config->DisableVerify = Args.hasArg(OPT_disable_verify); Config->DiscardAll = Args.hasArg(OPT_discard_all); Config->DiscardLocals = Args.hasArg(OPT_discard_locals); Config->DiscardNone = Args.hasArg(OPT_discard_none); + Config->EhFrameHdr = Args.hasArg(OPT_eh_frame_hdr); Config->EnableNewDtags = !Args.hasArg(OPT_disable_new_dtags); Config->ExportDynamic = Args.hasArg(OPT_export_dynamic); + Config->FatalWarnings = Args.hasArg(OPT_fatal_warnings); Config->GcSections = Args.hasArg(OPT_gc_sections); - Config->NoInhibitExec = Args.hasArg(OPT_noinhibit_exec); - Config->NoUndefined = Args.hasArg(OPT_no_undefined); + Config->ICF = Args.hasArg(OPT_icf); + Config->NoGnuUnique = Args.hasArg(OPT_no_gnu_unique); + Config->NoUndefinedVersion = Args.hasArg(OPT_no_undefined_version); + Config->Pie = Args.hasArg(OPT_pie); Config->PrintGcSections = Args.hasArg(OPT_print_gc_sections); + Config->Relocatable = Args.hasArg(OPT_relocatable); + Config->SaveTemps = Args.hasArg(OPT_save_temps); Config->Shared = Args.hasArg(OPT_shared); Config->StripAll = Args.hasArg(OPT_strip_all); + Config->StripDebug = Args.hasArg(OPT_strip_debug); + Config->Threads = Args.hasArg(OPT_threads); + Config->Trace = Args.hasArg(OPT_trace); Config->Verbose = Args.hasArg(OPT_verbose); + Config->WarnCommon = Args.hasArg(OPT_warn_common); Config->DynamicLinker = getString(Args, OPT_dynamic_linker); Config->Entry = getString(Args, OPT_entry); Config->Fini = getString(Args, OPT_fini, "_fini"); Config->Init = getString(Args, OPT_init, "_init"); + Config->LtoAAPipeline = getString(Args, OPT_lto_aa_pipeline); + Config->LtoNewPmPasses = getString(Args, OPT_lto_newpm_passes); Config->OutputFile = getString(Args, OPT_o); Config->SoName = getString(Args, OPT_soname); Config->Sysroot = getString(Args, OPT_sysroot); + Config->Optimize = getInteger(Args, OPT_O, 1); + Config->LtoO = getInteger(Args, OPT_lto_O, 2); + if (Config->LtoO > 3) + error("invalid optimization level for LTO: " + getString(Args, OPT_lto_O)); + Config->LtoJobs = getInteger(Args, OPT_lto_jobs, 1); + if (Config->LtoJobs == 0) + error("number of threads must be > 0"); + + Config->ZCombreloc = !hasZOption(Args, "nocombreloc"); Config->ZExecStack = hasZOption(Args, "execstack"); Config->ZNodelete = hasZOption(Args, "nodelete"); Config->ZNow = hasZOption(Args, "now"); Config->ZOrigin = hasZOption(Args, "origin"); Config->ZRelro = !hasZOption(Args, "norelro"); - if (auto *Arg = Args.getLastArg(OPT_O)) { - StringRef Val = Arg->getValue(); - if (Val.getAsInteger(10, Config->Optimize)) - error("Invalid optimization level"); - } + if (Config->Relocatable) + Config->StripAll = false; + + // --strip-all implies --strip-debug. + if (Config->StripAll) + Config->StripDebug = true; + + // Config->Pic is true if we are generating position-independent code. + Config->Pic = Config->Pie || Config->Shared; if (auto *Arg = Args.getLastArg(OPT_hash_style)) { StringRef S = Arg->getValue(); @@ -226,19 +409,52 @@ void LinkerDriver::readConfigs(opt::InputArgList &Args) { } else if (S == "both") { Config->GnuHash = true; } else if (S != "sysv") - error("Unknown hash style: " + S); + error("unknown hash style: " + S); + } + + // Parse --build-id or --build-id=<style>. + if (Args.hasArg(OPT_build_id)) + Config->BuildId = BuildIdKind::Fnv1; + if (auto *Arg = Args.getLastArg(OPT_build_id_eq)) { + StringRef S = Arg->getValue(); + if (S == "md5") { + Config->BuildId = BuildIdKind::Md5; + } else if (S == "sha1") { + Config->BuildId = BuildIdKind::Sha1; + } else if (S == "none") { + Config->BuildId = BuildIdKind::None; + } else if (S.startswith("0x")) { + Config->BuildId = BuildIdKind::Hexstring; + Config->BuildIdVector = parseHex(S.substr(2)); + } else { + error("unknown --build-id style: " + S); + } } for (auto *Arg : Args.filtered(OPT_undefined)) Config->Undefined.push_back(Arg->getValue()); + + Config->UnresolvedSymbols = getUnresolvedSymbolOption(Args); + + if (auto *Arg = Args.getLastArg(OPT_dynamic_list)) + if (Optional<MemoryBufferRef> Buffer = readFile(Arg->getValue())) + parseDynamicList(*Buffer); + + for (auto *Arg : Args.filtered(OPT_export_dynamic_symbol)) + Config->DynamicList.push_back(Arg->getValue()); + + if (auto *Arg = Args.getLastArg(OPT_version_script)) + if (Optional<MemoryBufferRef> Buffer = readFile(Arg->getValue())) + parseVersionScript(*Buffer); } void LinkerDriver::createFiles(opt::InputArgList &Args) { for (auto *Arg : Args) { switch (Arg->getOption().getID()) { case OPT_l: - addFile(searchLibrary(Arg->getValue())); + addLibrary(Arg->getValue()); break; + case OPT_alias_script_T: case OPT_INPUT: case OPT_script: addFile(Arg->getValue()); @@ -261,75 +477,112 @@ void LinkerDriver::createFiles(opt::InputArgList &Args) { case OPT_no_whole_archive: WholeArchive = false; break; + case OPT_start_lib: + InLib = true; + break; + case OPT_end_lib: + InLib = false; + break; } } - if (Files.empty()) + if (Files.empty() && !HasError) error("no input files."); + + // If -m <machine_type> was not given, infer it from object files. + if (Config->EKind == ELFNoneKind) { + for (std::unique_ptr<InputFile> &F : Files) { + if (F->EKind == ELFNoneKind) + continue; + Config->EKind = F->EKind; + Config->EMachine = F->EMachine; + break; + } + } } +// Do actual linking. Note that when this function is called, +// all linker scripts have already been parsed. template <class ELFT> void LinkerDriver::link(opt::InputArgList &Args) { SymbolTable<ELFT> Symtab; - Target.reset(createTarget()); - - if (!Config->Shared) { - // Add entry symbol. - // - // There is no entry symbol for AMDGPU binaries, so skip adding one to avoid - // having and undefined symbol. - if (Config->Entry.empty() && Config->EMachine != EM_AMDGPU) - Config->Entry = (Config->EMachine == EM_MIPS) ? "__start" : "_start"; - - // In the assembly for 32 bit x86 the _GLOBAL_OFFSET_TABLE_ symbol - // is magical and is used to produce a R_386_GOTPC relocation. - // The R_386_GOTPC relocation value doesn't actually depend on the - // symbol value, so it could use an index of STN_UNDEF which, according - // to the spec, means the symbol value is 0. - // Unfortunately both gas and MC keep the _GLOBAL_OFFSET_TABLE_ symbol in - // the object file. - // The situation is even stranger on x86_64 where the assembly doesn't - // need the magical symbol, but gas still puts _GLOBAL_OFFSET_TABLE_ as - // an undefined symbol in the .o files. - // Given that the symbol is effectively unused, we just create a dummy - // hidden one to avoid the undefined symbol error. - Symtab.addIgnored("_GLOBAL_OFFSET_TABLE_"); - } + elf::Symtab<ELFT>::X = &Symtab; + + std::unique_ptr<TargetInfo> TI(createTarget()); + Target = TI.get(); + LinkerScript<ELFT> LS; + Script<ELFT>::X = &LS; + + Config->Rela = ELFT::Is64Bits || Config->EMachine == EM_X86_64; + Config->Mips64EL = + (Config->EMachine == EM_MIPS && Config->EKind == ELF64LEKind); + + // Add entry symbol. Note that AMDGPU binaries have no entry points. + if (Config->Entry.empty() && !Config->Shared && !Config->Relocatable && + Config->EMachine != EM_AMDGPU) + Config->Entry = (Config->EMachine == EM_MIPS) ? "__start" : "_start"; + + // Default output filename is "a.out" by the Unix tradition. + if (Config->OutputFile.empty()) + Config->OutputFile = "a.out"; + + // Handle --trace-symbol. + for (auto *Arg : Args.filtered(OPT_trace_symbol)) + Symtab.trace(Arg->getValue()); + // Set either EntryAddr (if S is a number) or EntrySym (otherwise). if (!Config->Entry.empty()) { - // Set either EntryAddr (if S is a number) or EntrySym (otherwise). StringRef S = Config->Entry; if (S.getAsInteger(0, Config->EntryAddr)) Config->EntrySym = Symtab.addUndefined(S); } - if (Config->EMachine == EM_MIPS) { - // On MIPS O32 ABI, _gp_disp is a magic symbol designates offset between - // start of function and gp pointer into GOT. Use 'strong' variant of - // the addIgnored to prevent '_gp_disp' substitution. - Config->MipsGpDisp = Symtab.addIgnoredStrong("_gp_disp"); - - // Define _gp for MIPS. st_value of _gp symbol will be updated by Writer - // so that it points to an absolute address which is relative to GOT. - // See "Global Data Symbols" in Chapter 6 in the following document: - // ftp://www.linux-mips.org/pub/linux/mips/doc/ABI/mipsabi.pdf - Symtab.addAbsolute("_gp", ElfSym<ELFT>::MipsGp); + // Initialize Config->ImageBase. + if (auto *Arg = Args.getLastArg(OPT_image_base)) { + StringRef S = Arg->getValue(); + if (S.getAsInteger(0, Config->ImageBase)) + error(Arg->getSpelling() + ": number expected, but got " + S); + else if ((Config->ImageBase % Target->PageSize) != 0) + warning(Arg->getSpelling() + ": address isn't multiple of page size"); + } else { + Config->ImageBase = Config->Pic ? 0 : Target->DefaultImageBase; } for (std::unique_ptr<InputFile> &F : Files) Symtab.addFile(std::move(F)); + if (HasError) + return; // There were duplicate symbols or incompatible files - for (StringRef S : Config->Undefined) - Symtab.addUndefinedOpt(S); + Symtab.scanUndefinedFlags(); + Symtab.scanShlibUndefined(); + Symtab.scanDynamicList(); + Symtab.scanVersionScript(); + Symtab.scanSymbolVersions(); + + Symtab.addCombinedLtoObject(); + if (HasError) + return; for (auto *Arg : Args.filtered(OPT_wrap)) Symtab.wrap(Arg->getValue()); - if (Config->OutputFile.empty()) - Config->OutputFile = "a.out"; - // Write the result to the file. - Symtab.scanShlibUndefined(); if (Config->GcSections) - markLive<ELFT>(&Symtab); + markLive<ELFT>(); + if (Config->ICF) + doIcf<ELFT>(); + + // MergeInputSection::splitIntoPieces needs to be called before + // any call of MergeInputSection::getOffset. Do that. + for (const std::unique_ptr<elf::ObjectFile<ELFT>> &F : + Symtab.getObjectFiles()) + for (InputSectionBase<ELFT> *S : F->getSections()) { + if (!S || S == &InputSection<ELFT>::Discarded || !S->Live) + continue; + if (S->Compressed) + S->uncompress(); + if (auto *MS = dyn_cast<MergeInputSection<ELFT>>(S)) + MS->splitIntoPieces(); + } + writeResult<ELFT>(&Symtab); } diff --git a/ELF/Driver.h b/ELF/Driver.h index 720ef46dc710..6b9b9bb208e5 100644 --- a/ELF/Driver.h +++ b/ELF/Driver.h @@ -12,36 +12,54 @@ #include "SymbolTable.h" #include "lld/Core/LLVM.h" +#include "llvm/ADT/Optional.h" #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSet.h" #include "llvm/Option/ArgList.h" +#include "llvm/Support/raw_ostream.h" namespace lld { -namespace elf2 { +namespace elf { extern class LinkerDriver *Driver; -// Entry point of the ELF linker. -void link(ArrayRef<const char *> Args); +class CpioFile; class LinkerDriver { public: void main(ArrayRef<const char *> Args); void addFile(StringRef Path); + void addLibrary(StringRef Name); + llvm::LLVMContext Context; // to parse bitcode ifles + std::unique_ptr<CpioFile> Cpio; // for reproduce private: + std::vector<MemoryBufferRef> getArchiveMembers(MemoryBufferRef MB); + llvm::Optional<MemoryBufferRef> readFile(StringRef Path); void readConfigs(llvm::opt::InputArgList &Args); void createFiles(llvm::opt::InputArgList &Args); template <class ELFT> void link(llvm::opt::InputArgList &Args); - llvm::BumpPtrAllocator Alloc; + // True if we are in --whole-archive and --no-whole-archive. bool WholeArchive = false; + + // True if we are in --start-lib and --end-lib. + bool InLib = false; + + llvm::BumpPtrAllocator Alloc; std::vector<std::unique_ptr<InputFile>> Files; std::vector<std::unique_ptr<MemoryBuffer>> OwningMBs; }; // Parses command line options. -llvm::opt::InputArgList parseArgs(llvm::BumpPtrAllocator *A, - ArrayRef<const char *> Args); +class ELFOptTable : public llvm::opt::OptTable { +public: + ELFOptTable(); + llvm::opt::InputArgList parse(ArrayRef<const char *> Argv); + +private: + llvm::BumpPtrAllocator Alloc; +}; // Create enum with OPT_xxx values for each option in Options.td enum { @@ -51,14 +69,43 @@ enum { #undef OPTION }; -// Parses a linker script. Calling this function updates the Symtab and Config. -void readLinkerScript(llvm::BumpPtrAllocator *A, MemoryBufferRef MB); +// This is the class to create a .cpio file for --reproduce. +// +// If "--reproduce foo" is given, we create a file "foo.cpio" and +// copy all input files to the archive, along with a response file +// to re-run the same command with the same inputs. +// It is useful for reporting issues to LLD developers. +// +// Cpio as a file format is a deliberate choice. It's standardized in +// POSIX and very easy to create. cpio command is available virtually +// on all Unix systems. See +// http://pubs.opengroup.org/onlinepubs/9699919799/utilities/pax.html#tag_20_92_13_07 +// for the format details. +class CpioFile { +public: + static CpioFile *create(StringRef OutputPath); + void append(StringRef Path, StringRef Data); + +private: + CpioFile(std::unique_ptr<llvm::raw_fd_ostream> OS, StringRef Basename); + + std::unique_ptr<llvm::raw_fd_ostream> OS; + llvm::StringSet<> Seen; + std::string Basename; +}; + +void printHelp(const char *Argv0); +std::string getVersionString(); +std::vector<uint8_t> parseHexstring(StringRef S); + +std::string createResponseFile(const llvm::opt::InputArgList &Args); +std::string relativeToRoot(StringRef Path); std::string findFromSearchPaths(StringRef Path); std::string searchLibrary(StringRef Path); std::string buildSysrootedPath(llvm::StringRef Dir, llvm::StringRef File); -} // namespace elf2 +} // namespace elf } // namespace lld #endif diff --git a/ELF/DriverUtils.cpp b/ELF/DriverUtils.cpp index 965ed4f00a61..3f18259b4ae7 100644 --- a/ELF/DriverUtils.cpp +++ b/ELF/DriverUtils.cpp @@ -15,16 +15,20 @@ #include "Driver.h" #include "Error.h" +#include "lld/Config/Version.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/Triple.h" +#include "llvm/Option/Option.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/Path.h" #include "llvm/Support/StringSaver.h" using namespace llvm; +using namespace llvm::sys; using namespace lld; -using namespace lld::elf2; +using namespace lld::elf; // Create OptTable @@ -34,55 +38,208 @@ using namespace lld::elf2; #undef PREFIX // Create table mapping all options defined in Options.td -static const opt::OptTable::Info infoTable[] = { +static const opt::OptTable::Info OptInfo[] = { #define OPTION(X1, X2, ID, KIND, GROUP, ALIAS, X6, X7, X8, X9, X10) \ { \ X1, X2, X9, X10, OPT_##ID, opt::Option::KIND##Class, X8, X7, OPT_##GROUP, \ OPT_##ALIAS, X6 \ - } \ - , + }, #include "Options.inc" #undef OPTION }; -class ELFOptTable : public opt::OptTable { -public: - ELFOptTable() : OptTable(infoTable) {} -}; +ELFOptTable::ELFOptTable() : OptTable(OptInfo) {} + +static cl::TokenizerCallback getQuotingStyle(opt::InputArgList &Args) { + if (auto *Arg = Args.getLastArg(OPT_rsp_quoting)) { + StringRef S = Arg->getValue(); + if (S != "windows" && S != "posix") + error("invalid response file quoting: " + S); + if (S == "windows") + return cl::TokenizeWindowsCommandLine; + return cl::TokenizeGNUCommandLine; + } + if (Triple(sys::getProcessTriple()).getOS() == Triple::Win32) + return cl::TokenizeWindowsCommandLine; + return cl::TokenizeGNUCommandLine; +} // Parses a given list of options. -opt::InputArgList elf2::parseArgs(llvm::BumpPtrAllocator *A, - ArrayRef<const char *> Argv) { +opt::InputArgList ELFOptTable::parse(ArrayRef<const char *> Argv) { // Make InputArgList from string vectors. - ELFOptTable Table; unsigned MissingIndex; unsigned MissingCount; + SmallVector<const char *, 256> Vec(Argv.data(), Argv.data() + Argv.size()); + + // We need to get the quoting style for response files before parsing all + // options so we parse here before and ignore all the options but + // --rsp-quoting. + opt::InputArgList Args = this->ParseArgs(Vec, MissingIndex, MissingCount); // Expand response files. '@<filename>' is replaced by the file's contents. - SmallVector<const char *, 256> Vec(Argv.data(), Argv.data() + Argv.size()); - StringSaver Saver(*A); - llvm::cl::ExpandResponseFiles(Saver, llvm::cl::TokenizeGNUCommandLine, Vec); + StringSaver Saver(Alloc); + cl::ExpandResponseFiles(Saver, getQuotingStyle(Args), Vec); // Parse options and then do error checking. - opt::InputArgList Args = Table.ParseArgs(Vec, MissingIndex, MissingCount); + Args = this->ParseArgs(Vec, MissingIndex, MissingCount); if (MissingCount) error(Twine("missing arg value for \"") + Args.getArgString(MissingIndex) + "\", expected " + Twine(MissingCount) + (MissingCount == 1 ? " argument.\n" : " arguments")); - iterator_range<opt::arg_iterator> Unknowns = Args.filtered(OPT_UNKNOWN); - for (auto *Arg : Unknowns) - warning("warning: unknown argument: " + Arg->getSpelling()); - if (Unknowns.begin() != Unknowns.end()) - error("unknown argument(s) found"); - + for (auto *Arg : Args.filtered(OPT_UNKNOWN)) + error("unknown argument: " + Arg->getSpelling()); return Args; } -std::string elf2::findFromSearchPaths(StringRef Path) { +void elf::printHelp(const char *Argv0) { + ELFOptTable Table; + Table.PrintHelp(outs(), Argv0, "lld", false); +} + +std::string elf::getVersionString() { + std::string Version = getLLDVersion(); + std::string Repo = getLLDRepositoryVersion(); + if (Repo.empty()) + return "LLD " + Version + "\n"; + return "LLD " + Version + " " + Repo + "\n"; +} + +// Makes a given pathname an absolute path first, and then remove +// beginning /. For example, "../foo.o" is converted to "home/john/foo.o", +// assuming that the current directory is "/home/john/bar". +std::string elf::relativeToRoot(StringRef Path) { + SmallString<128> Abs = Path; + if (std::error_code EC = fs::make_absolute(Abs)) + fatal("make_absolute failed: " + EC.message()); + path::remove_dots(Abs, /*remove_dot_dot=*/true); + + // This is Windows specific. root_name() returns a drive letter + // (e.g. "c:") or a UNC name (//net). We want to keep it as part + // of the result. + SmallString<128> Res; + StringRef Root = path::root_name(Abs); + if (Root.endswith(":")) + Res = Root.drop_back(); + else if (Root.startswith("//")) + Res = Root.substr(2); + + path::append(Res, path::relative_path(Abs)); + return Res.str(); +} + +CpioFile::CpioFile(std::unique_ptr<raw_fd_ostream> OS, StringRef S) + : OS(std::move(OS)), Basename(S) {} + +CpioFile *CpioFile::create(StringRef OutputPath) { + std::string Path = (OutputPath + ".cpio").str(); + std::error_code EC; + auto OS = llvm::make_unique<raw_fd_ostream>(Path, EC, fs::F_None); + if (EC) { + error(EC, "--reproduce: failed to open " + Path); + return nullptr; + } + return new CpioFile(std::move(OS), path::filename(OutputPath)); +} + +static void writeMember(raw_fd_ostream &OS, StringRef Path, StringRef Data) { + // The c_dev/c_ino pair should be unique according to the spec, + // but no one seems to care. + OS << "070707"; // c_magic + OS << "000000"; // c_dev + OS << "000000"; // c_ino + OS << "100664"; // c_mode: C_ISREG | rw-rw-r-- + OS << "000000"; // c_uid + OS << "000000"; // c_gid + OS << "000001"; // c_nlink + OS << "000000"; // c_rdev + OS << "00000000000"; // c_mtime + OS << format("%06o", Path.size() + 1); // c_namesize + OS << format("%011o", Data.size()); // c_filesize + OS << Path << '\0'; // c_name + OS << Data; // c_filedata +} + +void CpioFile::append(StringRef Path, StringRef Data) { + if (!Seen.insert(Path).second) + return; + + // Construct an in-archive filename so that /home/foo/bar is stored + // as baz/home/foo/bar where baz is the basename of the output file. + // (i.e. in that case we are creating baz.cpio.) + SmallString<128> Fullpath; + path::append(Fullpath, Basename, Path); + + // Use unix path separators so the cpio can be extracted on both unix and + // windows. + std::replace(Fullpath.begin(), Fullpath.end(), '\\', '/'); + + writeMember(*OS, Fullpath, Data); + + // Print the trailer and seek back. + // This way we have a valid archive if we crash. + uint64_t Pos = OS->tell(); + writeMember(*OS, "TRAILER!!!", ""); + OS->seek(Pos); +} + +// Quote a given string if it contains a space character. +static std::string quote(StringRef S) { + if (S.find(' ') == StringRef::npos) + return S; + return ("\"" + S + "\"").str(); +} + +static std::string rewritePath(StringRef S) { + if (fs::exists(S)) + return relativeToRoot(S); + return S; +} + +static std::string stringize(opt::Arg *Arg) { + std::string K = Arg->getSpelling(); + if (Arg->getNumValues() == 0) + return K; + std::string V = quote(Arg->getValue()); + if (Arg->getOption().getRenderStyle() == opt::Option::RenderJoinedStyle) + return K + V; + return K + " " + V; +} + +// Reconstructs command line arguments so that so that you can re-run +// the same command with the same inputs. This is for --reproduce. +std::string elf::createResponseFile(const opt::InputArgList &Args) { + SmallString<0> Data; + raw_svector_ostream OS(Data); + + // Copy the command line to the output while rewriting paths. + for (auto *Arg : Args) { + switch (Arg->getOption().getID()) { + case OPT_reproduce: + break; + case OPT_INPUT: + OS << quote(rewritePath(Arg->getValue())) << "\n"; + break; + case OPT_L: + case OPT_dynamic_list: + case OPT_rpath: + case OPT_alias_script_T: + case OPT_script: + case OPT_version_script: + OS << Arg->getSpelling() << " " + << quote(rewritePath(Arg->getValue())) << "\n"; + break; + default: + OS << stringize(Arg) << "\n"; + } + } + return Data.str(); +} + +std::string elf::findFromSearchPaths(StringRef Path) { for (StringRef Dir : Config->SearchPaths) { std::string FullPath = buildSysrootedPath(Dir, Path); - if (sys::fs::exists(FullPath)) + if (fs::exists(FullPath)) return FullPath; } return ""; @@ -90,31 +247,30 @@ std::string elf2::findFromSearchPaths(StringRef Path) { // Searches a given library from input search paths, which are filled // from -L command line switches. Returns a path to an existent library file. -std::string elf2::searchLibrary(StringRef Path) { - std::vector<std::string> Names; - if (Path[0] == ':') { - Names.push_back(Path.drop_front()); - } else { - if (!Config->Static) - Names.push_back(("lib" + Path + ".so").str()); - Names.push_back(("lib" + Path + ".a").str()); - } - for (const std::string &Name : Names) { - std::string S = findFromSearchPaths(Name); - if (!S.empty()) +std::string elf::searchLibrary(StringRef Path) { + if (Path.startswith(":")) + return findFromSearchPaths(Path.substr(1)); + for (StringRef Dir : Config->SearchPaths) { + if (!Config->Static) { + std::string S = buildSysrootedPath(Dir, ("lib" + Path + ".so").str()); + if (fs::exists(S)) + return S; + } + std::string S = buildSysrootedPath(Dir, ("lib" + Path + ".a").str()); + if (fs::exists(S)) return S; } - error("Unable to find library -l" + Path); + return ""; } // Makes a path by concatenating Dir and File. // If Dir starts with '=' the result will be preceded by Sysroot, // which can be set with --sysroot command line switch. -std::string elf2::buildSysrootedPath(StringRef Dir, StringRef File) { +std::string elf::buildSysrootedPath(StringRef Dir, StringRef File) { SmallString<128> Path; if (Dir.startswith("=")) - sys::path::append(Path, Config->Sysroot, Dir.substr(1), File); + path::append(Path, Config->Sysroot, Dir.substr(1), File); else - sys::path::append(Path, Dir, File); + path::append(Path, Dir, File); return Path.str(); } diff --git a/ELF/EhFrame.cpp b/ELF/EhFrame.cpp new file mode 100644 index 000000000000..b130ac1ca22d --- /dev/null +++ b/ELF/EhFrame.cpp @@ -0,0 +1,167 @@ +//===- EhFrame.cpp -------------------------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// .eh_frame section contains information on how to unwind the stack when +// an exception is thrown. The section consists of sequence of CIE and FDE +// records. The linker needs to merge CIEs and associate FDEs to CIEs. +// That means the linker has to understand the format of the section. +// +// This file contains a few utility functions to read .eh_frame contents. +// +//===----------------------------------------------------------------------===// + +#include "EhFrame.h" +#include "Error.h" + +#include "llvm/Object/ELF.h" +#include "llvm/Support/Dwarf.h" +#include "llvm/Support/Endian.h" + +using namespace llvm; +using namespace llvm::ELF; +using namespace llvm::dwarf; +using namespace llvm::object; +using namespace llvm::support::endian; + +namespace lld { +namespace elf { + +// .eh_frame section is a sequence of records. Each record starts with +// a 4 byte length field. This function reads the length. +template <class ELFT> size_t readEhRecordSize(ArrayRef<uint8_t> D) { + const endianness E = ELFT::TargetEndianness; + if (D.size() < 4) + fatal("CIE/FDE too small"); + + // First 4 bytes of CIE/FDE is the size of the record. + // If it is 0xFFFFFFFF, the next 8 bytes contain the size instead, + // but we do not support that format yet. + uint64_t V = read32<E>(D.data()); + if (V == UINT32_MAX) + fatal("CIE/FDE too large"); + uint64_t Size = V + 4; + if (Size > D.size()) + fatal("CIE/FIE ends past the end of the section"); + return Size; +} + +// Read a byte and advance D by one byte. +static uint8_t readByte(ArrayRef<uint8_t> &D) { + if (D.empty()) + fatal("corrupted or unsupported CIE information"); + uint8_t B = D.front(); + D = D.slice(1); + return B; +} + +// Skip an integer encoded in the LEB128 format. +// Actual number is not of interest because only the runtime needs it. +// But we need to be at least able to skip it so that we can read +// the field that follows a LEB128 number. +static void skipLeb128(ArrayRef<uint8_t> &D) { + while (!D.empty()) { + uint8_t Val = D.front(); + D = D.slice(1); + if ((Val & 0x80) == 0) + return; + } + fatal("corrupted or unsupported CIE information"); +} + +template <class ELFT> static size_t getAugPSize(unsigned Enc) { + switch (Enc & 0x0f) { + case DW_EH_PE_absptr: + case DW_EH_PE_signed: + return ELFT::Is64Bits ? 8 : 4; + case DW_EH_PE_udata2: + case DW_EH_PE_sdata2: + return 2; + case DW_EH_PE_udata4: + case DW_EH_PE_sdata4: + return 4; + case DW_EH_PE_udata8: + case DW_EH_PE_sdata8: + return 8; + } + fatal("unknown FDE encoding"); +} + +template <class ELFT> static void skipAugP(ArrayRef<uint8_t> &D) { + uint8_t Enc = readByte(D); + if ((Enc & 0xf0) == DW_EH_PE_aligned) + fatal("DW_EH_PE_aligned encoding is not supported"); + size_t Size = getAugPSize<ELFT>(Enc); + if (Size >= D.size()) + fatal("corrupted CIE"); + D = D.slice(Size); +} + +template <class ELFT> uint8_t getFdeEncoding(ArrayRef<uint8_t> D) { + if (D.size() < 8) + fatal("CIE too small"); + D = D.slice(8); + + uint8_t Version = readByte(D); + if (Version != 1 && Version != 3) + fatal("FDE version 1 or 3 expected, but got " + Twine((unsigned)Version)); + + const unsigned char *AugEnd = std::find(D.begin(), D.end(), '\0'); + if (AugEnd == D.end()) + fatal("corrupted CIE"); + StringRef Aug(reinterpret_cast<const char *>(D.begin()), AugEnd - D.begin()); + D = D.slice(Aug.size() + 1); + + // Code alignment factor should always be 1 for .eh_frame. + if (readByte(D) != 1) + fatal("CIE code alignment must be 1"); + + // Skip data alignment factor. + skipLeb128(D); + + // Skip the return address register. In CIE version 1 this is a single + // byte. In CIE version 3 this is an unsigned LEB128. + if (Version == 1) + readByte(D); + else + skipLeb128(D); + + // We only care about an 'R' value, but other records may precede an 'R' + // record. Unfortunately records are not in TLV (type-length-value) format, + // so we need to teach the linker how to skip records for each type. + for (char C : Aug) { + if (C == 'R') + return readByte(D); + if (C == 'z') { + skipLeb128(D); + continue; + } + if (C == 'P') { + skipAugP<ELFT>(D); + continue; + } + if (C == 'L') { + readByte(D); + continue; + } + fatal("unknown .eh_frame augmentation string: " + Aug); + } + return DW_EH_PE_absptr; +} + +template size_t readEhRecordSize<ELF32LE>(ArrayRef<uint8_t>); +template size_t readEhRecordSize<ELF32BE>(ArrayRef<uint8_t>); +template size_t readEhRecordSize<ELF64LE>(ArrayRef<uint8_t>); +template size_t readEhRecordSize<ELF64BE>(ArrayRef<uint8_t>); + +template uint8_t getFdeEncoding<ELF32LE>(ArrayRef<uint8_t>); +template uint8_t getFdeEncoding<ELF32BE>(ArrayRef<uint8_t>); +template uint8_t getFdeEncoding<ELF64LE>(ArrayRef<uint8_t>); +template uint8_t getFdeEncoding<ELF64BE>(ArrayRef<uint8_t>); +} +} diff --git a/ELF/EhFrame.h b/ELF/EhFrame.h new file mode 100644 index 000000000000..0d5a2ff2f417 --- /dev/null +++ b/ELF/EhFrame.h @@ -0,0 +1,22 @@ +//===- EhFrame.h ------------------------------------------------*- C++ -*-===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_ELF_EHFRAME_H +#define LLD_ELF_EHFRAME_H + +#include "lld/Core/LLVM.h" + +namespace lld { +namespace elf { +template <class ELFT> size_t readEhRecordSize(ArrayRef<uint8_t> Data); +template <class ELFT> uint8_t getFdeEncoding(ArrayRef<uint8_t> Data); +} +} + +#endif diff --git a/ELF/Error.cpp b/ELF/Error.cpp index e0701f7f4cc6..59a49c17b97c 100644 --- a/ELF/Error.cpp +++ b/ELF/Error.cpp @@ -8,31 +8,58 @@ //===----------------------------------------------------------------------===// #include "Error.h" +#include "Config.h" #include "llvm/ADT/Twine.h" +#include "llvm/Support/Error.h" #include "llvm/Support/raw_ostream.h" +using namespace llvm; + namespace lld { -namespace elf2 { +namespace elf { + +bool HasError; +raw_ostream *ErrorOS; -void warning(const Twine &Msg) { llvm::errs() << Msg << "\n"; } +void log(const Twine &Msg) { + if (Config->Verbose) + outs() << Msg << "\n"; +} + +void warning(const Twine &Msg) { + if (Config->FatalWarnings) + error(Msg); + else + *ErrorOS << Msg << "\n"; +} void error(const Twine &Msg) { - llvm::errs() << Msg << "\n"; - exit(1); + *ErrorOS << Msg << "\n"; + HasError = true; } void error(std::error_code EC, const Twine &Prefix) { - if (!EC) - return; error(Prefix + ": " + EC.message()); } -void error(std::error_code EC) { - if (!EC) - return; - error(EC.message()); +void fatal(const Twine &Msg) { + *ErrorOS << Msg << "\n"; + exit(1); +} + +void fatal(const Twine &Msg, const Twine &Prefix) { + fatal(Prefix + ": " + Msg); +} + +void check(std::error_code EC) { + if (EC) + fatal(EC.message()); +} + +void check(Error Err) { + check(errorToErrorCode(std::move(Err))); } -} // namespace elf2 +} // namespace elf } // namespace lld diff --git a/ELF/Error.h b/ELF/Error.h index b1d2e7a8fc5b..552f50498464 100644 --- a/ELF/Error.h +++ b/ELF/Error.h @@ -13,20 +13,49 @@ #include "lld/Core/LLVM.h" namespace lld { -namespace elf2 { +namespace elf { +extern bool HasError; +extern llvm::raw_ostream *ErrorOS; + +void log(const Twine &Msg); void warning(const Twine &Msg); -LLVM_ATTRIBUTE_NORETURN void error(const Twine &Msg); +void error(const Twine &Msg); void error(std::error_code EC, const Twine &Prefix); -void error(std::error_code EC); template <typename T> void error(const ErrorOr<T> &V, const Twine &Prefix) { error(V.getError(), Prefix); } -template <typename T> void error(const ErrorOr<T> &V) { error(V.getError()); } -} // namespace elf2 +LLVM_ATTRIBUTE_NORETURN void fatal(const Twine &Msg); +LLVM_ATTRIBUTE_NORETURN void fatal(const Twine &Msg, const Twine &Prefix); + +template <class T> T check(ErrorOr<T> E) { + if (auto EC = E.getError()) + fatal(EC.message()); + return std::move(*E); +} + +template <class T> T check(Expected<T> E) { + if (!E) + fatal(errorToErrorCode(E.takeError()).message()); + return std::move(*E); +} + +template <class T> T check(ErrorOr<T> E, const Twine &Prefix) { + if (auto EC = E.getError()) + fatal(EC.message(), Prefix); + return std::move(*E); +} + +template <class T> T check(Expected<T> E, const Twine &Prefix) { + if (!E) + fatal(errorToErrorCode(E.takeError()).message(), Prefix); + return std::move(*E); +} + +} // namespace elf } // namespace lld #endif diff --git a/ELF/ICF.cpp b/ELF/ICF.cpp new file mode 100644 index 000000000000..10a2603b3b3e --- /dev/null +++ b/ELF/ICF.cpp @@ -0,0 +1,345 @@ +//===- ICF.cpp ------------------------------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Identical Code Folding is a feature to merge sections not by name (which +// is regular comdat handling) but by contents. If two non-writable sections +// have the same data, relocations, attributes, etc., then the two +// are considered identical and merged by the linker. This optimization +// makes outputs smaller. +// +// ICF is theoretically a problem of reducing graphs by merging as many +// identical subgraphs as possible if we consider sections as vertices and +// relocations as edges. It may sound simple, but it is a bit more +// complicated than you might think. The order of processing sections +// matters because merging two sections can make other sections, whose +// relocations now point to the same section, mergeable. Graphs may contain +// cycles. We need a sophisticated algorithm to do this properly and +// efficiently. +// +// What we do in this file is this. We split sections into groups. Sections +// in the same group are considered identical. +// +// We begin by optimistically putting all sections into a single equivalence +// class. Then we apply a series of checks that split this initial +// equivalence class into more and more refined equivalence classes based on +// the properties by which a section can be distinguished. +// +// We begin by checking that the section contents and flags are the +// same. This only needs to be done once since these properties don't depend +// on the current equivalence class assignment. +// +// Then we split the equivalence classes based on checking that their +// relocations are the same, where relocation targets are compared by their +// equivalence class, not the concrete section. This may need to be done +// multiple times because as the equivalence classes are refined, two +// sections that had a relocation target in the same equivalence class may +// now target different equivalence classes, and hence these two sections +// must be put in different equivalence classes (whereas in the previous +// iteration they were not since the relocation target was the same.) +// +// Our algorithm is smart enough to merge the following mutually-recursive +// functions. +// +// void foo() { bar(); } +// void bar() { foo(); } +// +// This algorithm is so-called "optimistic" algorithm described in +// http://research.google.com/pubs/pub36912.html. (Note that what GNU +// gold implemented is different from the optimistic algorithm.) +// +//===----------------------------------------------------------------------===// + +#include "ICF.h" +#include "Config.h" +#include "OutputSections.h" +#include "SymbolTable.h" + +#include "llvm/ADT/Hashing.h" +#include "llvm/Object/ELF.h" +#include "llvm/Support/ELF.h" +#include "llvm/Support/raw_ostream.h" + +using namespace lld; +using namespace lld::elf; +using namespace llvm; +using namespace llvm::ELF; +using namespace llvm::object; + +namespace lld { +namespace elf { +template <class ELFT> class ICF { + typedef typename ELFT::Shdr Elf_Shdr; + typedef typename ELFT::Sym Elf_Sym; + typedef typename ELFT::uint uintX_t; + typedef Elf_Rel_Impl<ELFT, false> Elf_Rel; + + using Comparator = std::function<bool(const InputSection<ELFT> *, + const InputSection<ELFT> *)>; + +public: + void run(); + +private: + uint64_t NextId = 1; + + static void setLive(SymbolTable<ELFT> *S); + static uint64_t relSize(InputSection<ELFT> *S); + static uint64_t getHash(InputSection<ELFT> *S); + static bool isEligible(InputSectionBase<ELFT> *Sec); + static std::vector<InputSection<ELFT> *> getSections(); + + void segregate(InputSection<ELFT> **Begin, InputSection<ELFT> **End, + Comparator Eq); + + void forEachGroup(std::vector<InputSection<ELFT> *> &V, Comparator Eq); + + template <class RelTy> + static bool relocationEq(ArrayRef<RelTy> RA, ArrayRef<RelTy> RB); + + template <class RelTy> + static bool variableEq(const InputSection<ELFT> *A, + const InputSection<ELFT> *B, ArrayRef<RelTy> RA, + ArrayRef<RelTy> RB); + + static bool equalsConstant(const InputSection<ELFT> *A, + const InputSection<ELFT> *B); + + static bool equalsVariable(const InputSection<ELFT> *A, + const InputSection<ELFT> *B); +}; +} +} + +// Returns a hash value for S. Note that the information about +// relocation targets is not included in the hash value. +template <class ELFT> uint64_t ICF<ELFT>::getHash(InputSection<ELFT> *S) { + uint64_t Flags = S->getSectionHdr()->sh_flags; + uint64_t H = hash_combine(Flags, S->getSize()); + for (const Elf_Shdr *Rel : S->RelocSections) + H = hash_combine(H, (uint64_t)Rel->sh_size); + return H; +} + +// Returns true if Sec is subject of ICF. +template <class ELFT> bool ICF<ELFT>::isEligible(InputSectionBase<ELFT> *Sec) { + if (!Sec || Sec == &InputSection<ELFT>::Discarded || !Sec->Live) + return false; + auto *S = dyn_cast<InputSection<ELFT>>(Sec); + if (!S) + return false; + + // .init and .fini contains instructions that must be executed to + // initialize and finalize the process. They cannot and should not + // be merged. + StringRef Name = S->getSectionName(); + if (Name == ".init" || Name == ".fini") + return false; + + const Elf_Shdr &H = *S->getSectionHdr(); + return (H.sh_flags & SHF_ALLOC) && (~H.sh_flags & SHF_WRITE); +} + +template <class ELFT> +std::vector<InputSection<ELFT> *> ICF<ELFT>::getSections() { + std::vector<InputSection<ELFT> *> V; + for (const std::unique_ptr<ObjectFile<ELFT>> &F : + Symtab<ELFT>::X->getObjectFiles()) + for (InputSectionBase<ELFT> *S : F->getSections()) + if (isEligible(S)) + V.push_back(cast<InputSection<ELFT>>(S)); + return V; +} + +// All sections between Begin and End must have the same group ID before +// you call this function. This function compare sections between Begin +// and End using Eq and assign new group IDs for new groups. +template <class ELFT> +void ICF<ELFT>::segregate(InputSection<ELFT> **Begin, InputSection<ELFT> **End, + Comparator Eq) { + // This loop rearranges [Begin, End) so that all sections that are + // equal in terms of Eq are contiguous. The algorithm is quadratic in + // the worst case, but that is not an issue in practice because the + // number of distinct sections in [Begin, End) is usually very small. + InputSection<ELFT> **I = Begin; + for (;;) { + InputSection<ELFT> *Head = *I; + auto Bound = std::stable_partition( + I + 1, End, [&](InputSection<ELFT> *S) { return Eq(Head, S); }); + if (Bound == End) + return; + uint64_t Id = NextId++; + for (; I != Bound; ++I) + (*I)->GroupId = Id; + } +} + +template <class ELFT> +void ICF<ELFT>::forEachGroup(std::vector<InputSection<ELFT> *> &V, + Comparator Eq) { + for (InputSection<ELFT> **I = V.data(), **E = I + V.size(); I != E;) { + InputSection<ELFT> *Head = *I; + auto Bound = std::find_if(I + 1, E, [&](InputSection<ELFT> *S) { + return S->GroupId != Head->GroupId; + }); + segregate(I, Bound, Eq); + I = Bound; + } +} + +// Compare two lists of relocations. +template <class ELFT> +template <class RelTy> +bool ICF<ELFT>::relocationEq(ArrayRef<RelTy> RelsA, ArrayRef<RelTy> RelsB) { + const RelTy *IA = RelsA.begin(); + const RelTy *EA = RelsA.end(); + const RelTy *IB = RelsB.begin(); + const RelTy *EB = RelsB.end(); + if (EA - IA != EB - IB) + return false; + for (; IA != EA; ++IA, ++IB) + if (IA->r_offset != IB->r_offset || + IA->getType(Config->Mips64EL) != IB->getType(Config->Mips64EL) || + getAddend<ELFT>(*IA) != getAddend<ELFT>(*IB)) + return false; + return true; +} + +// Compare "non-moving" part of two InputSections, namely everything +// except relocation targets. +template <class ELFT> +bool ICF<ELFT>::equalsConstant(const InputSection<ELFT> *A, + const InputSection<ELFT> *B) { + if (A->RelocSections.size() != B->RelocSections.size()) + return false; + + for (size_t I = 0, E = A->RelocSections.size(); I != E; ++I) { + const Elf_Shdr *RA = A->RelocSections[I]; + const Elf_Shdr *RB = B->RelocSections[I]; + ELFFile<ELFT> &FileA = A->File->getObj(); + ELFFile<ELFT> &FileB = B->File->getObj(); + if (RA->sh_type == SHT_RELA) { + if (!relocationEq(FileA.relas(RA), FileB.relas(RB))) + return false; + } else { + if (!relocationEq(FileA.rels(RA), FileB.rels(RB))) + return false; + } + } + + return A->getSectionHdr()->sh_flags == B->getSectionHdr()->sh_flags && + A->getSize() == B->getSize() && + A->getSectionData() == B->getSectionData(); +} + +template <class ELFT> +template <class RelTy> +bool ICF<ELFT>::variableEq(const InputSection<ELFT> *A, + const InputSection<ELFT> *B, ArrayRef<RelTy> RelsA, + ArrayRef<RelTy> RelsB) { + const RelTy *IA = RelsA.begin(); + const RelTy *EA = RelsA.end(); + const RelTy *IB = RelsB.begin(); + for (; IA != EA; ++IA, ++IB) { + SymbolBody &SA = A->File->getRelocTargetSym(*IA); + SymbolBody &SB = B->File->getRelocTargetSym(*IB); + if (&SA == &SB) + continue; + + // Or, the symbols should be pointing to the same section + // in terms of the group ID. + auto *DA = dyn_cast<DefinedRegular<ELFT>>(&SA); + auto *DB = dyn_cast<DefinedRegular<ELFT>>(&SB); + if (!DA || !DB) + return false; + if (DA->Value != DB->Value) + return false; + InputSection<ELFT> *X = dyn_cast<InputSection<ELFT>>(DA->Section); + InputSection<ELFT> *Y = dyn_cast<InputSection<ELFT>>(DB->Section); + if (X && Y && X->GroupId && X->GroupId == Y->GroupId) + continue; + return false; + } + return true; +} + +// Compare "moving" part of two InputSections, namely relocation targets. +template <class ELFT> +bool ICF<ELFT>::equalsVariable(const InputSection<ELFT> *A, + const InputSection<ELFT> *B) { + for (size_t I = 0, E = A->RelocSections.size(); I != E; ++I) { + const Elf_Shdr *RA = A->RelocSections[I]; + const Elf_Shdr *RB = B->RelocSections[I]; + ELFFile<ELFT> &FileA = A->File->getObj(); + ELFFile<ELFT> &FileB = B->File->getObj(); + if (RA->sh_type == SHT_RELA) { + if (!variableEq(A, B, FileA.relas(RA), FileB.relas(RB))) + return false; + } else { + if (!variableEq(A, B, FileA.rels(RA), FileB.rels(RB))) + return false; + } + } + return true; +} + +// The main function of ICF. +template <class ELFT> void ICF<ELFT>::run() { + // Initially, we use hash values as section group IDs. Therefore, + // if two sections have the same ID, they are likely (but not + // guaranteed) to have the same static contents in terms of ICF. + std::vector<InputSection<ELFT> *> V = getSections(); + for (InputSection<ELFT> *S : V) + // Set MSB on to avoid collisions with serial group IDs + S->GroupId = getHash(S) | (uint64_t(1) << 63); + + // From now on, sections in V are ordered so that sections in + // the same group are consecutive in the vector. + std::stable_sort(V.begin(), V.end(), + [](InputSection<ELFT> *A, InputSection<ELFT> *B) { + return A->GroupId < B->GroupId; + }); + + // Compare static contents and assign unique IDs for each static content. + forEachGroup(V, equalsConstant); + + // Split groups by comparing relocations until we get a convergence. + int Cnt = 1; + for (;;) { + ++Cnt; + uint64_t Id = NextId; + forEachGroup(V, equalsVariable); + if (Id == NextId) + break; + } + log("ICF needed " + Twine(Cnt) + " iterations."); + + // Merge sections in the same group. + for (auto I = V.begin(), E = V.end(); I != E;) { + InputSection<ELFT> *Head = *I++; + auto Bound = std::find_if(I, E, [&](InputSection<ELFT> *S) { + return Head->GroupId != S->GroupId; + }); + if (I == Bound) + continue; + log("selected " + Head->getSectionName()); + while (I != Bound) { + InputSection<ELFT> *S = *I++; + log(" removed " + S->getSectionName()); + Head->replace(S); + } + } +} + +// ICF entry point function. +template <class ELFT> void elf::doIcf() { ICF<ELFT>().run(); } + +template void elf::doIcf<ELF32LE>(); +template void elf::doIcf<ELF32BE>(); +template void elf::doIcf<ELF64LE>(); +template void elf::doIcf<ELF64BE>(); diff --git a/ELF/ICF.h b/ELF/ICF.h new file mode 100644 index 000000000000..502e128c8109 --- /dev/null +++ b/ELF/ICF.h @@ -0,0 +1,19 @@ +//===- ICF.h --------------------------------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_ELF_ICF_H +#define LLD_ELF_ICF_H + +namespace lld { +namespace elf { +template <class ELFT> void doIcf(); +} +} + +#endif diff --git a/ELF/InputFiles.cpp b/ELF/InputFiles.cpp index 6a908d450f60..57e556395937 100644 --- a/ELF/InputFiles.cpp +++ b/ELF/InputFiles.cpp @@ -8,10 +8,17 @@ //===----------------------------------------------------------------------===// #include "InputFiles.h" -#include "InputSection.h" +#include "Driver.h" #include "Error.h" +#include "InputSection.h" +#include "SymbolTable.h" #include "Symbols.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/Bitcode/ReaderWriter.h" +#include "llvm/CodeGen/Analysis.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; using namespace llvm::ELF; @@ -19,43 +26,52 @@ using namespace llvm::object; using namespace llvm::sys::fs; using namespace lld; -using namespace lld::elf2; - -namespace { -class ECRAII { - std::error_code EC; - -public: - std::error_code &getEC() { return EC; } - ~ECRAII() { error(EC); } -}; +using namespace lld::elf; + +// Returns "(internal)", "foo.a(bar.o)" or "baz.o". +std::string elf::getFilename(const InputFile *F) { + if (!F) + return "(internal)"; + if (!F->ArchiveName.empty()) + return (F->ArchiveName + "(" + F->getName() + ")").str(); + return F->getName(); } template <class ELFT> -ELFFileBase<ELFT>::ELFFileBase(Kind K, MemoryBufferRef M) - : InputFile(K, M), ELFObj(MB.getBuffer(), ECRAII().getEC()) {} +static ELFFile<ELFT> createELFObj(MemoryBufferRef MB) { + std::error_code EC; + ELFFile<ELFT> F(MB.getBuffer(), EC); + if (EC) + error(EC, "failed to read " + MB.getBufferIdentifier()); + return F; +} -template <class ELFT> -ELFKind ELFFileBase<ELFT>::getELFKind() { +template <class ELFT> static ELFKind getELFKind() { if (ELFT::TargetEndianness == support::little) return ELFT::Is64Bits ? ELF64LEKind : ELF32LEKind; return ELFT::Is64Bits ? ELF64BEKind : ELF32BEKind; } template <class ELFT> -typename ELFFileBase<ELFT>::Elf_Sym_Range -ELFFileBase<ELFT>::getSymbolsHelper(bool Local) { +ELFFileBase<ELFT>::ELFFileBase(Kind K, MemoryBufferRef MB) + : InputFile(K, MB), ELFObj(createELFObj<ELFT>(MB)) { + EKind = getELFKind<ELFT>(); + EMachine = ELFObj.getHeader()->e_machine; +} + +template <class ELFT> +typename ELFT::SymRange ELFFileBase<ELFT>::getElfSymbols(bool OnlyGlobals) { if (!Symtab) return Elf_Sym_Range(nullptr, nullptr); Elf_Sym_Range Syms = ELFObj.symbols(Symtab); uint32_t NumSymbols = std::distance(Syms.begin(), Syms.end()); uint32_t FirstNonLocal = Symtab->sh_info; if (FirstNonLocal > NumSymbols) - error("Invalid sh_info in symbol table"); - if (!Local) - return make_range(Syms.begin() + FirstNonLocal, Syms.end()); - // +1 to skip over dummy symbol. - return make_range(Syms.begin() + 1, Syms.begin() + FirstNonLocal); + fatal(getFilename(this) + ": invalid sh_info in symbol table"); + + if (OnlyGlobals) + return makeArrayRef(Syms.begin() + FirstNonLocal, Syms.end()); + return makeArrayRef(Syms.begin(), Syms.end()); } template <class ELFT> @@ -63,7 +79,7 @@ uint32_t ELFFileBase<ELFT>::getSectionIndex(const Elf_Sym &Sym) const { uint32_t I = Sym.st_shndx; if (I == ELF::SHN_XINDEX) return ELFObj.getExtendedSymbolTableIndex(&Sym, Symtab, SymtabSHNDX); - if (I >= ELF::SHN_LORESERVE || I == ELF::SHN_ABS) + if (I >= ELF::SHN_LORESERVE) return 0; return I; } @@ -71,44 +87,46 @@ uint32_t ELFFileBase<ELFT>::getSectionIndex(const Elf_Sym &Sym) const { template <class ELFT> void ELFFileBase<ELFT>::initStringTable() { if (!Symtab) return; - ErrorOr<StringRef> StringTableOrErr = ELFObj.getStringTableForSymtab(*Symtab); - error(StringTableOrErr); - StringTable = *StringTableOrErr; + StringTable = check(ELFObj.getStringTableForSymtab(*Symtab)); } template <class ELFT> -typename ELFFileBase<ELFT>::Elf_Sym_Range -ELFFileBase<ELFT>::getNonLocalSymbols() { - return getSymbolsHelper(false); +elf::ObjectFile<ELFT>::ObjectFile(MemoryBufferRef M) + : ELFFileBase<ELFT>(Base::ObjectKind, M) {} + +template <class ELFT> +ArrayRef<SymbolBody *> elf::ObjectFile<ELFT>::getNonLocalSymbols() { + if (!this->Symtab) + return this->SymbolBodies; + uint32_t FirstNonLocal = this->Symtab->sh_info; + return makeArrayRef(this->SymbolBodies).slice(FirstNonLocal); } template <class ELFT> -ObjectFile<ELFT>::ObjectFile(MemoryBufferRef M) - : ELFFileBase<ELFT>(Base::ObjectKind, M) {} +ArrayRef<SymbolBody *> elf::ObjectFile<ELFT>::getLocalSymbols() { + if (!this->Symtab) + return this->SymbolBodies; + uint32_t FirstNonLocal = this->Symtab->sh_info; + return makeArrayRef(this->SymbolBodies).slice(1, FirstNonLocal - 1); +} template <class ELFT> -typename ObjectFile<ELFT>::Elf_Sym_Range ObjectFile<ELFT>::getLocalSymbols() { - return this->getSymbolsHelper(true); +ArrayRef<SymbolBody *> elf::ObjectFile<ELFT>::getSymbols() { + if (!this->Symtab) + return this->SymbolBodies; + return makeArrayRef(this->SymbolBodies).slice(1); } -template <class ELFT> uint32_t ObjectFile<ELFT>::getMipsGp0() const { - if (MipsReginfo) +template <class ELFT> uint32_t elf::ObjectFile<ELFT>::getMipsGp0() const { + if (ELFT::Is64Bits && MipsOptions && MipsOptions->Reginfo) + return MipsOptions->Reginfo->ri_gp_value; + if (!ELFT::Is64Bits && MipsReginfo && MipsReginfo->Reginfo) return MipsReginfo->Reginfo->ri_gp_value; return 0; } template <class ELFT> -const typename ObjectFile<ELFT>::Elf_Sym * -ObjectFile<ELFT>::getLocalSymbol(uintX_t SymIndex) { - uint32_t FirstNonLocal = this->Symtab->sh_info; - if (SymIndex >= FirstNonLocal) - return nullptr; - Elf_Sym_Range Syms = this->ELFObj.symbols(this->Symtab); - return Syms.begin() + SymIndex; -} - -template <class ELFT> -void ObjectFile<ELFT>::parse(DenseSet<StringRef> &ComdatGroups) { +void elf::ObjectFile<ELFT>::parse(DenseSet<StringRef> &ComdatGroups) { // Read section and symbol tables. initializeSections(ComdatGroups); initializeSymbols(); @@ -118,63 +136,57 @@ void ObjectFile<ELFT>::parse(DenseSet<StringRef> &ComdatGroups) { // They are identified and deduplicated by group name. This function // returns a group name. template <class ELFT> -StringRef ObjectFile<ELFT>::getShtGroupSignature(const Elf_Shdr &Sec) { +StringRef elf::ObjectFile<ELFT>::getShtGroupSignature(const Elf_Shdr &Sec) { const ELFFile<ELFT> &Obj = this->ELFObj; - uint32_t SymtabdSectionIndex = Sec.sh_link; - ErrorOr<const Elf_Shdr *> SecOrErr = Obj.getSection(SymtabdSectionIndex); - error(SecOrErr); - const Elf_Shdr *SymtabSec = *SecOrErr; - uint32_t SymIndex = Sec.sh_info; - const Elf_Sym *Sym = Obj.getSymbol(SymtabSec, SymIndex); - ErrorOr<StringRef> StringTableOrErr = Obj.getStringTableForSymtab(*SymtabSec); - error(StringTableOrErr); - ErrorOr<StringRef> SignatureOrErr = Sym->getName(*StringTableOrErr); - error(SignatureOrErr); - return *SignatureOrErr; + const Elf_Shdr *Symtab = check(Obj.getSection(Sec.sh_link)); + const Elf_Sym *Sym = Obj.getSymbol(Symtab, Sec.sh_info); + StringRef Strtab = check(Obj.getStringTableForSymtab(*Symtab)); + return check(Sym->getName(Strtab)); } template <class ELFT> -ArrayRef<typename ObjectFile<ELFT>::uint32_X> -ObjectFile<ELFT>::getShtGroupEntries(const Elf_Shdr &Sec) { +ArrayRef<typename elf::ObjectFile<ELFT>::Elf_Word> +elf::ObjectFile<ELFT>::getShtGroupEntries(const Elf_Shdr &Sec) { const ELFFile<ELFT> &Obj = this->ELFObj; - ErrorOr<ArrayRef<uint32_X>> EntriesOrErr = - Obj.template getSectionContentsAsArray<uint32_X>(&Sec); - error(EntriesOrErr); - ArrayRef<uint32_X> Entries = *EntriesOrErr; + ArrayRef<Elf_Word> Entries = + check(Obj.template getSectionContentsAsArray<Elf_Word>(&Sec)); if (Entries.empty() || Entries[0] != GRP_COMDAT) - error("Unsupported SHT_GROUP format"); + fatal(getFilename(this) + ": unsupported SHT_GROUP format"); return Entries.slice(1); } template <class ELFT> -static bool shouldMerge(const typename ELFFile<ELFT>::Elf_Shdr &Sec) { - typedef typename ELFFile<ELFT>::uintX_t uintX_t; +bool elf::ObjectFile<ELFT>::shouldMerge(const Elf_Shdr &Sec) { + // We don't merge sections if -O0 (default is -O1). This makes sometimes + // the linker significantly faster, although the output will be bigger. + if (Config->Optimize == 0) + return false; + uintX_t Flags = Sec.sh_flags; if (!(Flags & SHF_MERGE)) return false; if (Flags & SHF_WRITE) - error("Writable SHF_MERGE sections are not supported"); + fatal(getFilename(this) + ": writable SHF_MERGE section is not supported"); uintX_t EntSize = Sec.sh_entsize; if (!EntSize || Sec.sh_size % EntSize) - error("SHF_MERGE section size must be a multiple of sh_entsize"); + fatal(getFilename(this) + + ": SHF_MERGE section size must be a multiple of sh_entsize"); - // Don't try to merge if the aligment is larger than the sh_entsize. + // Don't try to merge if the alignment is larger than the sh_entsize and this + // is not SHF_STRINGS. // - // If this is not a SHF_STRINGS, we would need to pad after every entity. It - // would be equivalent for the producer of the .o to just set a larger + // Since this is not a SHF_STRINGS, we would need to pad after every entity. + // It would be equivalent for the producer of the .o to just set a larger // sh_entsize. - // - // If this is a SHF_STRINGS, the larger alignment makes sense. Unfortunately - // it would complicate tail merging. This doesn't seem that common to - // justify the effort. - if (Sec.sh_addralign > EntSize) - return false; + if (Flags & SHF_STRINGS) + return true; - return true; + return Sec.sh_addralign <= EntSize; } template <class ELFT> -void ObjectFile<ELFT>::initializeSections(DenseSet<StringRef> &ComdatGroups) { +void elf::ObjectFile<ELFT>::initializeSections( + DenseSet<StringRef> &ComdatGroups) { uint64_t Size = this->ELFObj.getNumSections(); Sections.resize(Size); unsigned I = -1; @@ -191,53 +203,85 @@ void ObjectFile<ELFT>::initializeSections(DenseSet<StringRef> &ComdatGroups) { continue; for (uint32_t SecIndex : getShtGroupEntries(Sec)) { if (SecIndex >= Size) - error("Invalid section index in group"); + fatal(getFilename(this) + ": invalid section index in group: " + + Twine(SecIndex)); Sections[SecIndex] = &InputSection<ELFT>::Discarded; } break; case SHT_SYMTAB: this->Symtab = &Sec; break; - case SHT_SYMTAB_SHNDX: { - ErrorOr<ArrayRef<Elf_Word>> ErrorOrTable = Obj.getSHNDXTable(Sec); - error(ErrorOrTable); - this->SymtabSHNDX = *ErrorOrTable; + case SHT_SYMTAB_SHNDX: + this->SymtabSHNDX = check(Obj.getSHNDXTable(Sec)); break; - } case SHT_STRTAB: case SHT_NULL: break; case SHT_RELA: case SHT_REL: { - uint32_t RelocatedSectionIndex = Sec.sh_info; - if (RelocatedSectionIndex >= Size) - error("Invalid relocated section index"); - InputSectionBase<ELFT> *RelocatedSection = - Sections[RelocatedSectionIndex]; - if (!RelocatedSection) - error("Unsupported relocation reference"); - if (auto *S = dyn_cast<InputSection<ELFT>>(RelocatedSection)) { + // This section contains relocation information. + // If -r is given, we do not interpret or apply relocation + // but just copy relocation sections to output. + if (Config->Relocatable) { + Sections[I] = new (IAlloc.Allocate()) InputSection<ELFT>(this, &Sec); + break; + } + + // Find the relocation target section and associate this + // section with it. + InputSectionBase<ELFT> *Target = getRelocTarget(Sec); + if (!Target) + break; + if (auto *S = dyn_cast<InputSection<ELFT>>(Target)) { S->RelocSections.push_back(&Sec); - } else if (auto *S = dyn_cast<EHInputSection<ELFT>>(RelocatedSection)) { + break; + } + if (auto *S = dyn_cast<EhInputSection<ELFT>>(Target)) { if (S->RelocSection) - error("Multiple relocation sections to .eh_frame are not supported"); + fatal( + getFilename(this) + + ": multiple relocation sections to .eh_frame are not supported"); S->RelocSection = &Sec; - } else { - error("Relocations pointing to SHF_MERGE are not supported"); + break; } - break; + fatal(getFilename(this) + + ": relocations pointing to SHF_MERGE are not supported"); } + case SHT_ARM_ATTRIBUTES: + // FIXME: ARM meta-data section. At present attributes are ignored, + // they can be used to reason about object compatibility. + Sections[I] = &InputSection<ELFT>::Discarded; + break; default: Sections[I] = createInputSection(Sec); } } } -template <class ELFT> InputSectionBase<ELFT> * -ObjectFile<ELFT>::createInputSection(const Elf_Shdr &Sec) { - ErrorOr<StringRef> NameOrErr = this->ELFObj.getSectionName(&Sec); - error(NameOrErr); - StringRef Name = *NameOrErr; +template <class ELFT> +InputSectionBase<ELFT> * +elf::ObjectFile<ELFT>::getRelocTarget(const Elf_Shdr &Sec) { + uint32_t Idx = Sec.sh_info; + if (Idx >= Sections.size()) + fatal(getFilename(this) + ": invalid relocated section index: " + + Twine(Idx)); + InputSectionBase<ELFT> *Target = Sections[Idx]; + + // Strictly speaking, a relocation section must be included in the + // group of the section it relocates. However, LLVM 3.3 and earlier + // would fail to do so, so we gracefully handle that case. + if (Target == &InputSection<ELFT>::Discarded) + return nullptr; + + if (!Target) + fatal(getFilename(this) + ": unsupported relocation reference"); + return Target; +} + +template <class ELFT> +InputSectionBase<ELFT> * +elf::ObjectFile<ELFT>::createInputSection(const Elf_Shdr &Sec) { + StringRef Name = check(this->ELFObj.getSectionName(&Sec)); // .note.GNU-stack is a marker section to control the presence of // PT_GNU_STACK segment in outputs. Since the presence of the segment @@ -246,98 +290,129 @@ ObjectFile<ELFT>::createInputSection(const Elf_Shdr &Sec) { if (Name == ".note.GNU-stack") return &InputSection<ELFT>::Discarded; - // A MIPS object file has a special section that contains register - // usage info, which needs to be handled by the linker specially. - if (Config->EMachine == EM_MIPS && Name == ".reginfo") { - MipsReginfo = new (Alloc) MipsReginfoInputSection<ELFT>(this, &Sec); - return MipsReginfo; + if (Name == ".note.GNU-split-stack") { + error("objects using splitstacks are not supported"); + return &InputSection<ELFT>::Discarded; } - if (Name == ".eh_frame") - return new (EHAlloc.Allocate()) EHInputSection<ELFT>(this, &Sec); - if (shouldMerge<ELFT>(Sec)) + if (Config->StripDebug && Name.startswith(".debug")) + return &InputSection<ELFT>::Discarded; + + // A MIPS object file has a special sections that contain register + // usage info, which need to be handled by the linker specially. + if (Config->EMachine == EM_MIPS) { + if (Name == ".reginfo") { + MipsReginfo.reset(new MipsReginfoInputSection<ELFT>(this, &Sec)); + return MipsReginfo.get(); + } + if (Name == ".MIPS.options") { + MipsOptions.reset(new MipsOptionsInputSection<ELFT>(this, &Sec)); + return MipsOptions.get(); + } + } + + // The linker merges EH (exception handling) frames and creates a + // .eh_frame_hdr section for runtime. So we handle them with a special + // class. For relocatable outputs, they are just passed through. + if (Name == ".eh_frame" && !Config->Relocatable) + return new (EHAlloc.Allocate()) EhInputSection<ELFT>(this, &Sec); + + if (shouldMerge(Sec)) return new (MAlloc.Allocate()) MergeInputSection<ELFT>(this, &Sec); - return new (Alloc) InputSection<ELFT>(this, &Sec); + return new (IAlloc.Allocate()) InputSection<ELFT>(this, &Sec); } -template <class ELFT> void ObjectFile<ELFT>::initializeSymbols() { +template <class ELFT> void elf::ObjectFile<ELFT>::initializeSymbols() { this->initStringTable(); - Elf_Sym_Range Syms = this->getNonLocalSymbols(); + Elf_Sym_Range Syms = this->getElfSymbols(false); uint32_t NumSymbols = std::distance(Syms.begin(), Syms.end()); SymbolBodies.reserve(NumSymbols); for (const Elf_Sym &Sym : Syms) - SymbolBodies.push_back(createSymbolBody(this->StringTable, &Sym)); + SymbolBodies.push_back(createSymbolBody(&Sym)); } template <class ELFT> InputSectionBase<ELFT> * -ObjectFile<ELFT>::getSection(const Elf_Sym &Sym) const { +elf::ObjectFile<ELFT>::getSection(const Elf_Sym &Sym) const { uint32_t Index = this->getSectionIndex(Sym); if (Index == 0) return nullptr; if (Index >= Sections.size() || !Sections[Index]) - error("Invalid section index"); - return Sections[Index]; + fatal(getFilename(this) + ": invalid section index: " + Twine(Index)); + InputSectionBase<ELFT> *S = Sections[Index]; + if (S == &InputSectionBase<ELFT>::Discarded) + return S; + return S->Repl; } template <class ELFT> -SymbolBody *ObjectFile<ELFT>::createSymbolBody(StringRef StringTable, - const Elf_Sym *Sym) { - ErrorOr<StringRef> NameOrErr = Sym->getName(StringTable); - error(NameOrErr); - StringRef Name = *NameOrErr; +SymbolBody *elf::ObjectFile<ELFT>::createSymbolBody(const Elf_Sym *Sym) { + int Binding = Sym->getBinding(); + InputSectionBase<ELFT> *Sec = getSection(*Sym); + if (Binding == STB_LOCAL) { + if (Sym->st_shndx == SHN_UNDEF) + return new (this->Alloc) + Undefined(Sym->st_name, Sym->st_other, Sym->getType(), this); + return new (this->Alloc) DefinedRegular<ELFT>(*Sym, Sec); + } + + StringRef Name = check(Sym->getName(this->StringTable)); switch (Sym->st_shndx) { case SHN_UNDEF: - return new (Alloc) UndefinedElf<ELFT>(Name, *Sym); + return elf::Symtab<ELFT>::X + ->addUndefined(Name, Binding, Sym->st_other, Sym->getType(), + /*CanOmitFromDynSym*/ false, this) + ->body(); case SHN_COMMON: - return new (Alloc) DefinedCommon(Name, Sym->st_size, Sym->st_value, - Sym->getBinding() == llvm::ELF::STB_WEAK, - Sym->getVisibility()); + return elf::Symtab<ELFT>::X + ->addCommon(Name, Sym->st_size, Sym->st_value, Binding, Sym->st_other, + Sym->getType(), this) + ->body(); } - switch (Sym->getBinding()) { + switch (Binding) { default: - error("unexpected binding"); + fatal(getFilename(this) + ": unexpected binding: " + Twine(Binding)); case STB_GLOBAL: case STB_WEAK: - case STB_GNU_UNIQUE: { - InputSectionBase<ELFT> *Sec = getSection(*Sym); + case STB_GNU_UNIQUE: if (Sec == &InputSection<ELFT>::Discarded) - return new (Alloc) UndefinedElf<ELFT>(Name, *Sym); - return new (Alloc) DefinedRegular<ELFT>(Name, *Sym, Sec); - } + return elf::Symtab<ELFT>::X + ->addUndefined(Name, Binding, Sym->st_other, Sym->getType(), + /*CanOmitFromDynSym*/ false, this) + ->body(); + return elf::Symtab<ELFT>::X->addRegular(Name, *Sym, Sec)->body(); } } -void ArchiveFile::parse() { - ErrorOr<std::unique_ptr<Archive>> FileOrErr = Archive::create(MB); - error(FileOrErr, "Failed to parse archive"); - File = std::move(*FileOrErr); - - // Allocate a buffer for Lazy objects. - size_t NumSyms = File->getNumberOfSymbols(); - LazySymbols.reserve(NumSyms); +template <class ELFT> void ArchiveFile::parse() { + File = check(Archive::create(MB), "failed to parse archive"); // Read the symbol table to construct Lazy objects. for (const Archive::Symbol &Sym : File->symbols()) - LazySymbols.emplace_back(this, Sym); + Symtab<ELFT>::X->addLazyArchive(this, Sym); } // Returns a buffer pointing to a member file containing a given symbol. MemoryBufferRef ArchiveFile::getMember(const Archive::Symbol *Sym) { - ErrorOr<Archive::Child> COrErr = Sym->getMember(); - error(COrErr, "Could not get the member for symbol " + Sym->getName()); - const Archive::Child &C = *COrErr; + Archive::Child C = + check(Sym->getMember(), + "could not get the member for symbol " + Sym->getName()); if (!Seen.insert(C.getChildOffset()).second) return MemoryBufferRef(); - ErrorOr<MemoryBufferRef> RefOrErr = C.getMemoryBufferRef(); - if (!RefOrErr) - error(RefOrErr, "Could not get the buffer for the member defining symbol " + - Sym->getName()); - return *RefOrErr; + MemoryBufferRef Ret = + check(C.getMemoryBufferRef(), + "could not get the buffer for the member defining symbol " + + Sym->getName()); + + if (C.getParent()->isThin() && Driver->Cpio) + Driver->Cpio->append(relativeToRoot(check(C.getFullName())), + Ret.getBuffer()); + + return Ret; } template <class ELFT> @@ -345,21 +420,19 @@ SharedFile<ELFT>::SharedFile(MemoryBufferRef M) : ELFFileBase<ELFT>(Base::SharedKind, M), AsNeeded(Config->AsNeeded) {} template <class ELFT> -const typename ELFFile<ELFT>::Elf_Shdr * +const typename ELFT::Shdr * SharedFile<ELFT>::getSection(const Elf_Sym &Sym) const { uint32_t Index = this->getSectionIndex(Sym); if (Index == 0) return nullptr; - ErrorOr<const Elf_Shdr *> Ret = this->ELFObj.getSection(Index); - error(Ret); - return *Ret; + return check(this->ELFObj.getSection(Index)); } // Partially parse the shared object file so that we can call // getSoName on this object. template <class ELFT> void SharedFile<ELFT>::parseSoName() { - typedef typename ELFFile<ELFT>::Elf_Dyn Elf_Dyn; - typedef typename ELFFile<ELFT>::uintX_t uintX_t; + typedef typename ELFT::Dyn Elf_Dyn; + typedef typename ELFT::uint uintX_t; const Elf_Shdr *DynamicSec = nullptr; const ELFFile<ELFT> Obj = this->ELFObj; @@ -373,12 +446,15 @@ template <class ELFT> void SharedFile<ELFT>::parseSoName() { case SHT_DYNAMIC: DynamicSec = &Sec; break; - case SHT_SYMTAB_SHNDX: { - ErrorOr<ArrayRef<Elf_Word>> ErrorOrTable = Obj.getSHNDXTable(Sec); - error(ErrorOrTable); - this->SymtabSHNDX = *ErrorOrTable; + case SHT_SYMTAB_SHNDX: + this->SymtabSHNDX = check(Obj.getSHNDXTable(Sec)); + break; + case SHT_GNU_versym: + this->VersymSec = &Sec; + break; + case SHT_GNU_verdef: + this->VerdefSec = &Sec; break; - } } } @@ -395,83 +471,358 @@ template <class ELFT> void SharedFile<ELFT>::parseSoName() { if (Dyn.d_tag == DT_SONAME) { uintX_t Val = Dyn.getVal(); if (Val >= this->StringTable.size()) - error("Invalid DT_SONAME entry"); + fatal(getFilename(this) + ": invalid DT_SONAME entry"); SoName = StringRef(this->StringTable.data() + Val); return; } } } +// Parse the version definitions in the object file if present. Returns a vector +// whose nth element contains a pointer to the Elf_Verdef for version identifier +// n. Version identifiers that are not definitions map to nullptr. The array +// always has at least length 1. +template <class ELFT> +std::vector<const typename ELFT::Verdef *> +SharedFile<ELFT>::parseVerdefs(const Elf_Versym *&Versym) { + std::vector<const Elf_Verdef *> Verdefs(1); + // We only need to process symbol versions for this DSO if it has both a + // versym and a verdef section, which indicates that the DSO contains symbol + // version definitions. + if (!VersymSec || !VerdefSec) + return Verdefs; + + // The location of the first global versym entry. + Versym = reinterpret_cast<const Elf_Versym *>(this->ELFObj.base() + + VersymSec->sh_offset) + + this->Symtab->sh_info; + + // We cannot determine the largest verdef identifier without inspecting + // every Elf_Verdef, but both bfd and gold assign verdef identifiers + // sequentially starting from 1, so we predict that the largest identifier + // will be VerdefCount. + unsigned VerdefCount = VerdefSec->sh_info; + Verdefs.resize(VerdefCount + 1); + + // Build the Verdefs array by following the chain of Elf_Verdef objects + // from the start of the .gnu.version_d section. + const uint8_t *Verdef = this->ELFObj.base() + VerdefSec->sh_offset; + for (unsigned I = 0; I != VerdefCount; ++I) { + auto *CurVerdef = reinterpret_cast<const Elf_Verdef *>(Verdef); + Verdef += CurVerdef->vd_next; + unsigned VerdefIndex = CurVerdef->vd_ndx; + if (Verdefs.size() <= VerdefIndex) + Verdefs.resize(VerdefIndex + 1); + Verdefs[VerdefIndex] = CurVerdef; + } + + return Verdefs; +} + // Fully parse the shared object file. This must be called after parseSoName(). template <class ELFT> void SharedFile<ELFT>::parseRest() { - Elf_Sym_Range Syms = this->getNonLocalSymbols(); - uint32_t NumSymbols = std::distance(Syms.begin(), Syms.end()); - SymbolBodies.reserve(NumSymbols); + // Create mapping from version identifiers to Elf_Verdef entries. + const Elf_Versym *Versym = nullptr; + std::vector<const Elf_Verdef *> Verdefs = parseVerdefs(Versym); + + Elf_Sym_Range Syms = this->getElfSymbols(true); for (const Elf_Sym &Sym : Syms) { - ErrorOr<StringRef> NameOrErr = Sym.getName(this->StringTable); - error(NameOrErr.getError()); - StringRef Name = *NameOrErr; + unsigned VersymIndex = 0; + if (Versym) { + VersymIndex = Versym->vs_index; + ++Versym; + } - if (Sym.isUndefined()) + StringRef Name = check(Sym.getName(this->StringTable)); + if (Sym.isUndefined()) { Undefs.push_back(Name); - else - SymbolBodies.emplace_back(this, Name, Sym); + continue; + } + + if (Versym) { + // Ignore local symbols and non-default versions. + if (VersymIndex == VER_NDX_LOCAL || (VersymIndex & VERSYM_HIDDEN)) + continue; + } + + const Elf_Verdef *V = + VersymIndex == VER_NDX_GLOBAL ? nullptr : Verdefs[VersymIndex]; + elf::Symtab<ELFT>::X->addShared(this, Name, Sym, V); } } -template <typename T> -static std::unique_ptr<InputFile> createELFFileAux(MemoryBufferRef MB) { - std::unique_ptr<T> Ret = llvm::make_unique<T>(MB); +static ELFKind getELFKind(MemoryBufferRef MB) { + std::string TripleStr = getBitcodeTargetTriple(MB, Driver->Context); + Triple TheTriple(TripleStr); + bool Is64Bits = TheTriple.isArch64Bit(); + if (TheTriple.isLittleEndian()) + return Is64Bits ? ELF64LEKind : ELF32LEKind; + return Is64Bits ? ELF64BEKind : ELF32BEKind; +} - if (!Config->FirstElf) - Config->FirstElf = Ret.get(); +static uint8_t getMachineKind(MemoryBufferRef MB) { + std::string TripleStr = getBitcodeTargetTriple(MB, Driver->Context); + switch (Triple(TripleStr).getArch()) { + case Triple::aarch64: + return EM_AARCH64; + case Triple::arm: + return EM_ARM; + case Triple::mips: + case Triple::mipsel: + case Triple::mips64: + case Triple::mips64el: + return EM_MIPS; + case Triple::ppc: + return EM_PPC; + case Triple::ppc64: + return EM_PPC64; + case Triple::x86: + return EM_386; + case Triple::x86_64: + return EM_X86_64; + default: + fatal(MB.getBufferIdentifier() + + ": could not infer e_machine from bitcode target triple " + + TripleStr); + } +} + +BitcodeFile::BitcodeFile(MemoryBufferRef MB) : InputFile(BitcodeKind, MB) { + EKind = getELFKind(MB); + EMachine = getMachineKind(MB); +} - if (Config->EKind == ELFNoneKind) { - Config->EKind = Ret->getELFKind(); - Config->EMachine = Ret->getEMachine(); +static uint8_t getGvVisibility(const GlobalValue *GV) { + switch (GV->getVisibility()) { + case GlobalValue::DefaultVisibility: + return STV_DEFAULT; + case GlobalValue::HiddenVisibility: + return STV_HIDDEN; + case GlobalValue::ProtectedVisibility: + return STV_PROTECTED; } + llvm_unreachable("unknown visibility"); +} + +template <class ELFT> +Symbol *BitcodeFile::createSymbol(const DenseSet<const Comdat *> &KeptComdats, + const IRObjectFile &Obj, + const BasicSymbolRef &Sym) { + const GlobalValue *GV = Obj.getSymbolGV(Sym.getRawDataRefImpl()); + + SmallString<64> Name; + raw_svector_ostream OS(Name); + Sym.printName(OS); + StringRef NameRef = Saver.save(StringRef(Name)); + + uint32_t Flags = Sym.getFlags(); + bool IsWeak = Flags & BasicSymbolRef::SF_Weak; + uint32_t Binding = IsWeak ? STB_WEAK : STB_GLOBAL; + + uint8_t Type = STT_NOTYPE; + bool CanOmitFromDynSym = false; + // FIXME: Expose a thread-local flag for module asm symbols. + if (GV) { + if (GV->isThreadLocal()) + Type = STT_TLS; + CanOmitFromDynSym = canBeOmittedFromSymbolTable(GV); + } + + uint8_t Visibility; + if (GV) + Visibility = getGvVisibility(GV); + else + // FIXME: Set SF_Hidden flag correctly for module asm symbols, and expose + // protected visibility. + Visibility = STV_DEFAULT; + + if (GV) + if (const Comdat *C = GV->getComdat()) + if (!KeptComdats.count(C)) + return Symtab<ELFT>::X->addUndefined(NameRef, Binding, Visibility, Type, + CanOmitFromDynSym, this); + + const Module &M = Obj.getModule(); + if (Flags & BasicSymbolRef::SF_Undefined) + return Symtab<ELFT>::X->addUndefined(NameRef, Binding, Visibility, Type, + CanOmitFromDynSym, this); + if (Flags & BasicSymbolRef::SF_Common) { + // FIXME: Set SF_Common flag correctly for module asm symbols, and expose + // size and alignment. + assert(GV); + const DataLayout &DL = M.getDataLayout(); + uint64_t Size = DL.getTypeAllocSize(GV->getValueType()); + return Symtab<ELFT>::X->addCommon(NameRef, Size, GV->getAlignment(), + Binding, Visibility, STT_OBJECT, this); + } + return Symtab<ELFT>::X->addBitcode(NameRef, IsWeak, Visibility, Type, + CanOmitFromDynSym, this); +} - return std::move(Ret); +bool BitcodeFile::shouldSkip(uint32_t Flags) { + return !(Flags & BasicSymbolRef::SF_Global) || + (Flags & BasicSymbolRef::SF_FormatSpecific); +} + +template <class ELFT> +void BitcodeFile::parse(DenseSet<StringRef> &ComdatGroups) { + Obj = check(IRObjectFile::create(MB, Driver->Context)); + const Module &M = Obj->getModule(); + + DenseSet<const Comdat *> KeptComdats; + for (const auto &P : M.getComdatSymbolTable()) { + StringRef N = Saver.save(P.first()); + if (ComdatGroups.insert(N).second) + KeptComdats.insert(&P.second); + } + + for (const BasicSymbolRef &Sym : Obj->symbols()) + if (!shouldSkip(Sym.getFlags())) + Symbols.push_back(createSymbol<ELFT>(KeptComdats, *Obj, Sym)); } template <template <class> class T> static std::unique_ptr<InputFile> createELFFile(MemoryBufferRef MB) { - std::pair<unsigned char, unsigned char> Type = getElfArchType(MB.getBuffer()); - if (Type.second != ELF::ELFDATA2LSB && Type.second != ELF::ELFDATA2MSB) - error("Invalid data encoding: " + MB.getBufferIdentifier()); - - if (Type.first == ELF::ELFCLASS32) { - if (Type.second == ELF::ELFDATA2LSB) - return createELFFileAux<T<ELF32LE>>(MB); - return createELFFileAux<T<ELF32BE>>(MB); - } - if (Type.first == ELF::ELFCLASS64) { - if (Type.second == ELF::ELFDATA2LSB) - return createELFFileAux<T<ELF64LE>>(MB); - return createELFFileAux<T<ELF64BE>>(MB); - } - error("Invalid file class: " + MB.getBufferIdentifier()); + unsigned char Size; + unsigned char Endian; + std::tie(Size, Endian) = getElfArchType(MB.getBuffer()); + if (Endian != ELFDATA2LSB && Endian != ELFDATA2MSB) + fatal("invalid data encoding: " + MB.getBufferIdentifier()); + + std::unique_ptr<InputFile> Obj; + if (Size == ELFCLASS32 && Endian == ELFDATA2LSB) + Obj.reset(new T<ELF32LE>(MB)); + else if (Size == ELFCLASS32 && Endian == ELFDATA2MSB) + Obj.reset(new T<ELF32BE>(MB)); + else if (Size == ELFCLASS64 && Endian == ELFDATA2LSB) + Obj.reset(new T<ELF64LE>(MB)); + else if (Size == ELFCLASS64 && Endian == ELFDATA2MSB) + Obj.reset(new T<ELF64BE>(MB)); + else + fatal("invalid file class: " + MB.getBufferIdentifier()); + + if (!Config->FirstElf) + Config->FirstElf = Obj.get(); + return Obj; +} + +static bool isBitcode(MemoryBufferRef MB) { + using namespace sys::fs; + return identify_magic(MB.getBuffer()) == file_magic::bitcode; } -std::unique_ptr<InputFile> elf2::createObjectFile(MemoryBufferRef MB) { - return createELFFile<ObjectFile>(MB); +std::unique_ptr<InputFile> elf::createObjectFile(MemoryBufferRef MB, + StringRef ArchiveName) { + std::unique_ptr<InputFile> F; + if (isBitcode(MB)) + F.reset(new BitcodeFile(MB)); + else + F = createELFFile<ObjectFile>(MB); + F->ArchiveName = ArchiveName; + return F; } -std::unique_ptr<InputFile> elf2::createSharedFile(MemoryBufferRef MB) { +std::unique_ptr<InputFile> elf::createSharedFile(MemoryBufferRef MB) { return createELFFile<SharedFile>(MB); } -template class elf2::ELFFileBase<ELF32LE>; -template class elf2::ELFFileBase<ELF32BE>; -template class elf2::ELFFileBase<ELF64LE>; -template class elf2::ELFFileBase<ELF64BE>; +MemoryBufferRef LazyObjectFile::getBuffer() { + if (Seen) + return MemoryBufferRef(); + Seen = true; + return MB; +} -template class elf2::ObjectFile<ELF32LE>; -template class elf2::ObjectFile<ELF32BE>; -template class elf2::ObjectFile<ELF64LE>; -template class elf2::ObjectFile<ELF64BE>; +template <class ELFT> +void LazyObjectFile::parse() { + for (StringRef Sym : getSymbols()) + Symtab<ELFT>::X->addLazyObject(Sym, *this); +} + +template <class ELFT> std::vector<StringRef> LazyObjectFile::getElfSymbols() { + typedef typename ELFT::Shdr Elf_Shdr; + typedef typename ELFT::Sym Elf_Sym; + typedef typename ELFT::SymRange Elf_Sym_Range; + + const ELFFile<ELFT> Obj = createELFObj<ELFT>(this->MB); + for (const Elf_Shdr &Sec : Obj.sections()) { + if (Sec.sh_type != SHT_SYMTAB) + continue; + Elf_Sym_Range Syms = Obj.symbols(&Sec); + uint32_t FirstNonLocal = Sec.sh_info; + StringRef StringTable = check(Obj.getStringTableForSymtab(Sec)); + std::vector<StringRef> V; + for (const Elf_Sym &Sym : Syms.slice(FirstNonLocal)) + if (Sym.st_shndx != SHN_UNDEF) + V.push_back(check(Sym.getName(StringTable))); + return V; + } + return {}; +} + +std::vector<StringRef> LazyObjectFile::getBitcodeSymbols() { + LLVMContext Context; + std::unique_ptr<IRObjectFile> Obj = + check(IRObjectFile::create(this->MB, Context)); + std::vector<StringRef> V; + for (const BasicSymbolRef &Sym : Obj->symbols()) { + uint32_t Flags = Sym.getFlags(); + if (BitcodeFile::shouldSkip(Flags)) + continue; + if (Flags & BasicSymbolRef::SF_Undefined) + continue; + SmallString<64> Name; + raw_svector_ostream OS(Name); + Sym.printName(OS); + V.push_back(Saver.save(StringRef(Name))); + } + return V; +} + +// Returns a vector of globally-visible defined symbol names. +std::vector<StringRef> LazyObjectFile::getSymbols() { + if (isBitcode(this->MB)) + return getBitcodeSymbols(); + + unsigned char Size; + unsigned char Endian; + std::tie(Size, Endian) = getElfArchType(this->MB.getBuffer()); + if (Size == ELFCLASS32) { + if (Endian == ELFDATA2LSB) + return getElfSymbols<ELF32LE>(); + return getElfSymbols<ELF32BE>(); + } + if (Endian == ELFDATA2LSB) + return getElfSymbols<ELF64LE>(); + return getElfSymbols<ELF64BE>(); +} -template class elf2::SharedFile<ELF32LE>; -template class elf2::SharedFile<ELF32BE>; -template class elf2::SharedFile<ELF64LE>; -template class elf2::SharedFile<ELF64BE>; +template void ArchiveFile::parse<ELF32LE>(); +template void ArchiveFile::parse<ELF32BE>(); +template void ArchiveFile::parse<ELF64LE>(); +template void ArchiveFile::parse<ELF64BE>(); + +template void BitcodeFile::parse<ELF32LE>(DenseSet<StringRef> &); +template void BitcodeFile::parse<ELF32BE>(DenseSet<StringRef> &); +template void BitcodeFile::parse<ELF64LE>(DenseSet<StringRef> &); +template void BitcodeFile::parse<ELF64BE>(DenseSet<StringRef> &); + +template void LazyObjectFile::parse<ELF32LE>(); +template void LazyObjectFile::parse<ELF32BE>(); +template void LazyObjectFile::parse<ELF64LE>(); +template void LazyObjectFile::parse<ELF64BE>(); + +template class elf::ELFFileBase<ELF32LE>; +template class elf::ELFFileBase<ELF32BE>; +template class elf::ELFFileBase<ELF64LE>; +template class elf::ELFFileBase<ELF64BE>; + +template class elf::ObjectFile<ELF32LE>; +template class elf::ObjectFile<ELF32BE>; +template class elf::ObjectFile<ELF64LE>; +template class elf::ObjectFile<ELF64BE>; + +template class elf::SharedFile<ELF32LE>; +template class elf::SharedFile<ELF32BE>; +template class elf::SharedFile<ELF64LE>; +template class elf::SharedFile<ELF64BE>; diff --git a/ELF/InputFiles.h b/ELF/InputFiles.h index 45d403c0125c..79cb751494b3 100644 --- a/ELF/InputFiles.h +++ b/ELF/InputFiles.h @@ -18,11 +18,16 @@ #include "lld/Core/LLVM.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/IR/Comdat.h" #include "llvm/Object/Archive.h" #include "llvm/Object/ELF.h" +#include "llvm/Object/IRObjectFile.h" +#include "llvm/Support/StringSaver.h" + +#include <map> namespace lld { -namespace elf2 { +namespace elf { using llvm::object::Archive; @@ -33,25 +38,45 @@ class SymbolBody; // The root class of input files. class InputFile { public: - enum Kind { ObjectKind, SharedKind, ArchiveKind }; + enum Kind { + ObjectKind, + SharedKind, + LazyObjectKind, + ArchiveKind, + BitcodeKind, + }; + Kind kind() const { return FileKind; } StringRef getName() const { return MB.getBufferIdentifier(); } + MemoryBufferRef MB; + + // Filename of .a which contained this file. If this file was + // not in an archive file, it is the empty string. We use this + // string for creating error messages. + StringRef ArchiveName; + + // If this is an architecture-specific file, the following members + // have ELF type (i.e. ELF{32,64}{LE,BE}) and target machine type. + ELFKind EKind = ELFNoneKind; + uint16_t EMachine = llvm::ELF::EM_NONE; protected: InputFile(Kind K, MemoryBufferRef M) : MB(M), FileKind(K) {} - MemoryBufferRef MB; private: const Kind FileKind; }; +// Returns "(internal)", "foo.a(bar.o)" or "baz.o". +std::string getFilename(const InputFile *F); + template <typename ELFT> class ELFFileBase : public InputFile { public: - typedef typename llvm::object::ELFFile<ELFT>::Elf_Shdr Elf_Shdr; - typedef typename llvm::object::ELFFile<ELFT>::Elf_Sym Elf_Sym; - typedef typename llvm::object::ELFFile<ELFT>::Elf_Word Elf_Word; - typedef typename llvm::object::ELFFile<ELFT>::Elf_Sym_Range Elf_Sym_Range; + typedef typename ELFT::Shdr Elf_Shdr; + typedef typename ELFT::Sym Elf_Sym; + typedef typename ELFT::Word Elf_Word; + typedef typename ELFT::SymRange Elf_Sym_Range; ELFFileBase(Kind K, MemoryBufferRef M); static bool classof(const InputFile *F) { @@ -59,11 +84,9 @@ public: return K == ObjectKind || K == SharedKind; } - static ELFKind getELFKind(); const llvm::object::ELFFile<ELFT> &getObj() const { return ELFObj; } llvm::object::ELFFile<ELFT> &getObj() { return ELFObj; } - uint16_t getEMachine() const { return getObj().getHeader()->e_machine; } uint8_t getOSABI() const { return getObj().getHeader()->e_ident[llvm::ELF::EI_OSABI]; } @@ -72,39 +95,36 @@ public: uint32_t getSectionIndex(const Elf_Sym &Sym) const; + Elf_Sym_Range getElfSymbols(bool OnlyGlobals); + protected: llvm::object::ELFFile<ELFT> ELFObj; const Elf_Shdr *Symtab = nullptr; ArrayRef<Elf_Word> SymtabSHNDX; StringRef StringTable; void initStringTable(); - Elf_Sym_Range getNonLocalSymbols(); - Elf_Sym_Range getSymbolsHelper(bool); }; // .o file. template <class ELFT> class ObjectFile : public ELFFileBase<ELFT> { typedef ELFFileBase<ELFT> Base; - typedef typename llvm::object::ELFFile<ELFT>::Elf_Sym Elf_Sym; - typedef typename llvm::object::ELFFile<ELFT>::Elf_Shdr Elf_Shdr; - typedef typename llvm::object::ELFFile<ELFT>::Elf_Sym_Range Elf_Sym_Range; - typedef typename llvm::object::ELFFile<ELFT>::Elf_Word Elf_Word; - typedef typename llvm::object::ELFFile<ELFT>::uintX_t uintX_t; - - // uint32 in ELFT's byte order - typedef llvm::support::detail::packed_endian_specific_integral< - uint32_t, ELFT::TargetEndianness, 2> - uint32_X; + typedef typename ELFT::Sym Elf_Sym; + typedef typename ELFT::Shdr Elf_Shdr; + typedef typename ELFT::SymRange Elf_Sym_Range; + typedef typename ELFT::Word Elf_Word; + typedef typename ELFT::uint uintX_t; StringRef getShtGroupSignature(const Elf_Shdr &Sec); - ArrayRef<uint32_X> getShtGroupEntries(const Elf_Shdr &Sec); + ArrayRef<Elf_Word> getShtGroupEntries(const Elf_Shdr &Sec); public: static bool classof(const InputFile *F) { return F->kind() == Base::ObjectKind; } - ArrayRef<SymbolBody *> getSymbols() { return SymbolBodies; } + ArrayRef<SymbolBody *> getSymbols(); + ArrayRef<SymbolBody *> getLocalSymbols(); + ArrayRef<SymbolBody *> getNonLocalSymbols(); explicit ObjectFile(MemoryBufferRef M); void parse(llvm::DenseSet<StringRef> &ComdatGroups); @@ -112,15 +132,14 @@ public: ArrayRef<InputSectionBase<ELFT> *> getSections() const { return Sections; } InputSectionBase<ELFT> *getSection(const Elf_Sym &Sym) const; - SymbolBody *getSymbolBody(uint32_t SymbolIndex) const { - uint32_t FirstNonLocal = this->Symtab->sh_info; - if (SymbolIndex < FirstNonLocal) - return nullptr; - return SymbolBodies[SymbolIndex - FirstNonLocal]; + SymbolBody &getSymbolBody(uint32_t SymbolIndex) const { + return *SymbolBodies[SymbolIndex]; } - Elf_Sym_Range getLocalSymbols(); - const Elf_Sym *getLocalSymbol(uintX_t SymIndex); + template <typename RelT> SymbolBody &getRelocTargetSym(const RelT &Rel) const { + uint32_t SymIndex = Rel.getSymbol(Config->Mips64EL); + return getSymbolBody(SymIndex); + } const Elf_Shdr *getSymbolTable() const { return this->Symtab; }; @@ -129,12 +148,22 @@ public: // R_MIPS_GPREL16 / R_MIPS_GPREL32 relocations. uint32_t getMipsGp0() const; + // The number is the offset in the string table. It will be used as the + // st_name of the symbol. + std::vector<std::pair<const DefinedRegular<ELFT> *, unsigned>> KeptLocalSyms; + + // SymbolBodies and Thunks for sections in this file are allocated + // using this buffer. + llvm::BumpPtrAllocator Alloc; + private: void initializeSections(llvm::DenseSet<StringRef> &ComdatGroups); void initializeSymbols(); + InputSectionBase<ELFT> *getRelocTarget(const Elf_Shdr &Sec); InputSectionBase<ELFT> *createInputSection(const Elf_Shdr &Sec); - SymbolBody *createSymbolBody(StringRef StringTable, const Elf_Sym *Sym); + bool shouldMerge(const Elf_Shdr &Sec); + SymbolBody *createSymbolBody(const Elf_Sym *Sym); // List of all sections defined by this file. std::vector<InputSectionBase<ELFT> *> Sections; @@ -143,49 +172,97 @@ private: std::vector<SymbolBody *> SymbolBodies; // MIPS .reginfo section defined by this file. - MipsReginfoInputSection<ELFT> *MipsReginfo = nullptr; + std::unique_ptr<MipsReginfoInputSection<ELFT>> MipsReginfo; + // MIPS .MIPS.options section defined by this file. + std::unique_ptr<MipsOptionsInputSection<ELFT>> MipsOptions; - llvm::BumpPtrAllocator Alloc; + llvm::SpecificBumpPtrAllocator<InputSection<ELFT>> IAlloc; llvm::SpecificBumpPtrAllocator<MergeInputSection<ELFT>> MAlloc; - llvm::SpecificBumpPtrAllocator<EHInputSection<ELFT>> EHAlloc; + llvm::SpecificBumpPtrAllocator<EhInputSection<ELFT>> EHAlloc; +}; + +// LazyObjectFile is analogous to ArchiveFile in the sense that +// the file contains lazy symbols. The difference is that +// LazyObjectFile wraps a single file instead of multiple files. +// +// This class is used for --start-lib and --end-lib options which +// instruct the linker to link object files between them with the +// archive file semantics. +class LazyObjectFile : public InputFile { +public: + explicit LazyObjectFile(MemoryBufferRef M) : InputFile(LazyObjectKind, M) {} + + static bool classof(const InputFile *F) { + return F->kind() == LazyObjectKind; + } + + template <class ELFT> void parse(); + MemoryBufferRef getBuffer(); + +private: + std::vector<StringRef> getSymbols(); |