diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2015-12-30 11:57:38 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2015-12-30 11:57:38 +0000 |
commit | 5a5c549fe9a3fef595297bd21d36bed8409dc37d (patch) | |
tree | a964c8f5ac85b7b641cac022c5f9bf4eed3d2b9b /ELF | |
parent | fb911942f1434f3d1750f83f25f5e42c80e60638 (diff) | |
download | src-5a5c549fe9a3fef595297bd21d36bed8409dc37d.tar.gz src-5a5c549fe9a3fef595297bd21d36bed8409dc37d.zip |
Vendor import of lld trunk r256633:vendor/lld/lld-trunk-r256633
Notes
Notes:
svn path=/vendor/lld/dist/; revision=292934
svn path=/vendor/lld/lld-trunk-r256633/; revision=292935; tag=vendor/lld/lld-trunk-r256633
Diffstat (limited to 'ELF')
-rw-r--r-- | ELF/CMakeLists.txt | 26 | ||||
-rw-r--r-- | ELF/Config.h | 84 | ||||
-rw-r--r-- | ELF/Driver.cpp | 299 | ||||
-rw-r--r-- | ELF/Driver.h | 67 | ||||
-rw-r--r-- | ELF/DriverUtils.cpp | 120 | ||||
-rw-r--r-- | ELF/Error.cpp | 38 | ||||
-rw-r--r-- | ELF/Error.h | 32 | ||||
-rw-r--r-- | ELF/InputFiles.cpp | 496 | ||||
-rw-r--r-- | ELF/InputFiles.h | 211 | ||||
-rw-r--r-- | ELF/InputSection.cpp | 399 | ||||
-rw-r--r-- | ELF/InputSection.h | 187 | ||||
-rw-r--r-- | ELF/LinkerScript.cpp | 318 | ||||
-rw-r--r-- | ELF/MarkLive.cpp | 131 | ||||
-rw-r--r-- | ELF/Options.td | 161 | ||||
-rw-r--r-- | ELF/OutputSections.cpp | 1534 | ||||
-rw-r--r-- | ELF/OutputSections.h | 485 | ||||
-rw-r--r-- | ELF/README.md | 21 | ||||
-rw-r--r-- | ELF/SymbolTable.cpp | 267 | ||||
-rw-r--r-- | ELF/SymbolTable.h | 98 | ||||
-rw-r--r-- | ELF/Symbols.cpp | 148 | ||||
-rw-r--r-- | ELF/Symbols.h | 327 | ||||
-rw-r--r-- | ELF/Target.cpp | 1481 | ||||
-rw-r--r-- | ELF/Target.h | 117 | ||||
-rw-r--r-- | ELF/Writer.cpp | 1282 | ||||
-rw-r--r-- | ELF/Writer.h | 24 |
25 files changed, 8353 insertions, 0 deletions
diff --git a/ELF/CMakeLists.txt b/ELF/CMakeLists.txt new file mode 100644 index 000000000000..763275e30caa --- /dev/null +++ b/ELF/CMakeLists.txt @@ -0,0 +1,26 @@ +set(LLVM_TARGET_DEFINITIONS Options.td) +tablegen(LLVM Options.inc -gen-opt-parser-defs) +add_public_tablegen_target(ELFOptionsTableGen) + +add_llvm_library(lldELF2 + Driver.cpp + DriverUtils.cpp + Error.cpp + InputFiles.cpp + InputSection.cpp + LinkerScript.cpp + MarkLive.cpp + OutputSections.cpp + SymbolTable.cpp + Symbols.cpp + Target.cpp + Writer.cpp + + LINK_COMPONENTS + Object + Option + MC + Support + ) + +add_dependencies(lldELF2 ELFOptionsTableGen) diff --git a/ELF/Config.h b/ELF/Config.h new file mode 100644 index 000000000000..7b820f18b8c7 --- /dev/null +++ b/ELF/Config.h @@ -0,0 +1,84 @@ +//===- Config.h -------------------------------------------------*- C++ -*-===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_ELF_CONFIG_H +#define LLD_ELF_CONFIG_H + +#include "llvm/ADT/MapVector.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/ELF.h" + +#include <vector> + +namespace lld { +namespace elf2 { + +class InputFile; +class SymbolBody; + +enum ELFKind { + ELFNoneKind, + ELF32LEKind, + ELF32BEKind, + ELF64LEKind, + ELF64BEKind +}; + +struct Configuration { + SymbolBody *EntrySym = nullptr; + SymbolBody *MipsGpDisp = nullptr; + InputFile *FirstElf = nullptr; + llvm::StringRef DynamicLinker; + llvm::StringRef Entry; + llvm::StringRef Emulation; + llvm::StringRef Fini; + llvm::StringRef Init; + llvm::StringRef OutputFile; + llvm::StringRef SoName; + llvm::StringRef Sysroot; + std::string RPath; + llvm::MapVector<llvm::StringRef, std::vector<llvm::StringRef>> OutputSections; + std::vector<llvm::StringRef> SearchPaths; + std::vector<llvm::StringRef> Undefined; + bool AllowMultipleDefinition; + bool AsNeeded = false; + bool Bsymbolic; + bool DiscardAll; + bool DiscardLocals; + bool DiscardNone; + bool EnableNewDtags; + bool ExportDynamic; + bool GcSections; + bool GnuHash = false; + bool Mips64EL = false; + bool NoInhibitExec; + bool NoUndefined; + bool PrintGcSections; + bool Shared; + bool Static = false; + bool StripAll; + bool SysvHash = true; + bool Verbose; + bool ZExecStack; + bool ZNodelete; + bool ZNow; + bool ZOrigin; + bool ZRelro; + ELFKind EKind = ELFNoneKind; + uint16_t EMachine = llvm::ELF::EM_NONE; + uint64_t EntryAddr = -1; + unsigned Optimize = 0; +}; + +extern Configuration *Config; + +} // namespace elf2 +} // namespace lld + +#endif diff --git a/ELF/Driver.cpp b/ELF/Driver.cpp new file mode 100644 index 000000000000..6d881373b303 --- /dev/null +++ b/ELF/Driver.cpp @@ -0,0 +1,299 @@ +//===- Driver.cpp ---------------------------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "Driver.h" +#include "Config.h" +#include "Error.h" +#include "InputFiles.h" +#include "SymbolTable.h" +#include "Target.h" +#include "Writer.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/Support/raw_ostream.h" +#include <utility> + +using namespace llvm; +using namespace llvm::ELF; +using namespace llvm::object; + +using namespace lld; +using namespace lld::elf2; + +Configuration *lld::elf2::Config; +LinkerDriver *lld::elf2::Driver; + +void lld::elf2::link(ArrayRef<const char *> Args) { + Configuration C; + LinkerDriver D; + Config = &C; + Driver = &D; + Driver->main(Args.slice(1)); +} + +static std::pair<ELFKind, uint16_t> parseEmulation(StringRef S) { + if (S == "elf32btsmip") + return {ELF32BEKind, EM_MIPS}; + if (S == "elf32ltsmip") + return {ELF32LEKind, EM_MIPS}; + if (S == "elf32ppc") + return {ELF32BEKind, EM_PPC}; + if (S == "elf64ppc") + return {ELF64BEKind, EM_PPC64}; + if (S == "elf_i386") + return {ELF32LEKind, EM_386}; + if (S == "elf_x86_64") + return {ELF64LEKind, EM_X86_64}; + if (S == "aarch64linux") + return {ELF64LEKind, EM_AARCH64}; + if (S == "i386pe" || S == "i386pep" || S == "thumb2pe") + error("Windows targets are not supported on the ELF frontend: " + S); + error("Unknown emulation: " + S); +} + +// Opens and parses a file. Path has to be resolved already. +// Newly created memory buffers are owned by this driver. +void LinkerDriver::addFile(StringRef Path) { + using namespace llvm::sys::fs; + if (Config->Verbose) + llvm::outs() << Path << "\n"; + auto MBOrErr = MemoryBuffer::getFile(Path); + error(MBOrErr, "cannot open " + Path); + std::unique_ptr<MemoryBuffer> &MB = *MBOrErr; + MemoryBufferRef MBRef = MB->getMemBufferRef(); + OwningMBs.push_back(std::move(MB)); // take MB ownership + + switch (identify_magic(MBRef.getBuffer())) { + case file_magic::unknown: + readLinkerScript(&Alloc, MBRef); + return; + case file_magic::archive: + if (WholeArchive) { + auto File = make_unique<ArchiveFile>(MBRef); + for (MemoryBufferRef &MB : File->getMembers()) + Files.push_back(createELFFile<ObjectFile>(MB)); + OwningArchives.emplace_back(std::move(File)); + return; + } + Files.push_back(make_unique<ArchiveFile>(MBRef)); + return; + case file_magic::elf_shared_object: + Files.push_back(createELFFile<SharedFile>(MBRef)); + return; + default: + Files.push_back(createELFFile<ObjectFile>(MBRef)); + } +} + +static StringRef +getString(opt::InputArgList &Args, unsigned Key, StringRef Default = "") { + if (auto *Arg = Args.getLastArg(Key)) + return Arg->getValue(); + return Default; +} + +static bool hasZOption(opt::InputArgList &Args, StringRef Key) { + for (auto *Arg : Args.filtered(OPT_z)) + if (Key == Arg->getValue()) + return true; + return false; +} + +void LinkerDriver::main(ArrayRef<const char *> ArgsArr) { + initSymbols(); + + opt::InputArgList Args = parseArgs(&Alloc, ArgsArr); + createFiles(Args); + + // Traditional linkers can generate re-linkable object files instead + // of executables or DSOs. We don't support that since the feature + // does not seem to provide more value than the static archiver. + if (Args.hasArg(OPT_relocatable)) + error("-r option is not supported. Use 'ar' command instead."); + + switch (Config->EKind) { + case ELF32LEKind: + link<ELF32LE>(Args); + return; + case ELF32BEKind: + link<ELF32BE>(Args); + return; + case ELF64LEKind: + link<ELF64LE>(Args); + return; + case ELF64BEKind: + link<ELF64BE>(Args); + return; + default: + error("-m or at least a .o file required"); + } +} + +void LinkerDriver::createFiles(opt::InputArgList &Args) { + for (auto *Arg : Args.filtered(OPT_L)) + Config->SearchPaths.push_back(Arg->getValue()); + + std::vector<StringRef> RPaths; + for (auto *Arg : Args.filtered(OPT_rpath)) + RPaths.push_back(Arg->getValue()); + if (!RPaths.empty()) + Config->RPath = llvm::join(RPaths.begin(), RPaths.end(), ":"); + + if (auto *Arg = Args.getLastArg(OPT_m)) { + StringRef S = Arg->getValue(); + std::pair<ELFKind, uint16_t> P = parseEmulation(S); + Config->EKind = P.first; + Config->EMachine = P.second; + Config->Emulation = S; + } + + Config->AllowMultipleDefinition = Args.hasArg(OPT_allow_multiple_definition); + Config->Bsymbolic = Args.hasArg(OPT_Bsymbolic); + Config->DiscardAll = Args.hasArg(OPT_discard_all); + Config->DiscardLocals = Args.hasArg(OPT_discard_locals); + Config->DiscardNone = Args.hasArg(OPT_discard_none); + Config->EnableNewDtags = !Args.hasArg(OPT_disable_new_dtags); + Config->ExportDynamic = Args.hasArg(OPT_export_dynamic); + Config->GcSections = Args.hasArg(OPT_gc_sections); + Config->NoInhibitExec = Args.hasArg(OPT_noinhibit_exec); + Config->NoUndefined = Args.hasArg(OPT_no_undefined); + Config->PrintGcSections = Args.hasArg(OPT_print_gc_sections); + Config->Shared = Args.hasArg(OPT_shared); + Config->StripAll = Args.hasArg(OPT_strip_all); + Config->Verbose = Args.hasArg(OPT_verbose); + + Config->DynamicLinker = getString(Args, OPT_dynamic_linker); + Config->Entry = getString(Args, OPT_entry); + Config->Fini = getString(Args, OPT_fini, "_fini"); + Config->Init = getString(Args, OPT_init, "_init"); + Config->OutputFile = getString(Args, OPT_o); + Config->SoName = getString(Args, OPT_soname); + Config->Sysroot = getString(Args, OPT_sysroot); + + Config->ZExecStack = hasZOption(Args, "execstack"); + Config->ZNodelete = hasZOption(Args, "nodelete"); + Config->ZNow = hasZOption(Args, "now"); + Config->ZOrigin = hasZOption(Args, "origin"); + Config->ZRelro = !hasZOption(Args, "norelro"); + + if (auto *Arg = Args.getLastArg(OPT_O)) { + StringRef Val = Arg->getValue(); + if (Val.getAsInteger(10, Config->Optimize)) + error("Invalid optimization level"); + } + + if (auto *Arg = Args.getLastArg(OPT_hash_style)) { + StringRef S = Arg->getValue(); + if (S == "gnu") { + Config->GnuHash = true; + Config->SysvHash = false; + } else if (S == "both") { + Config->GnuHash = true; + } else if (S != "sysv") + error("Unknown hash style: " + S); + } + + for (auto *Arg : Args.filtered(OPT_undefined)) + Config->Undefined.push_back(Arg->getValue()); + + for (auto *Arg : Args) { + switch (Arg->getOption().getID()) { + case OPT_l: + addFile(searchLibrary(Arg->getValue())); + break; + case OPT_INPUT: + case OPT_script: + addFile(Arg->getValue()); + break; + case OPT_as_needed: + Config->AsNeeded = true; + break; + case OPT_no_as_needed: + Config->AsNeeded = false; + break; + case OPT_Bstatic: + Config->Static = true; + break; + case OPT_Bdynamic: + Config->Static = false; + break; + case OPT_whole_archive: + WholeArchive = true; + break; + case OPT_no_whole_archive: + WholeArchive = false; + break; + } + } + + if (Files.empty()) + error("no input files."); + + if (Config->GnuHash && Config->EMachine == EM_MIPS) + error("The .gnu.hash section is not compatible with the MIPS target."); +} + +template <class ELFT> void LinkerDriver::link(opt::InputArgList &Args) { + SymbolTable<ELFT> Symtab; + Target.reset(createTarget()); + + if (!Config->Shared) { + // Add entry symbol. + if (Config->Entry.empty()) + Config->Entry = (Config->EMachine == EM_MIPS) ? "__start" : "_start"; + + // In the assembly for 32 bit x86 the _GLOBAL_OFFSET_TABLE_ symbol + // is magical and is used to produce a R_386_GOTPC relocation. + // The R_386_GOTPC relocation value doesn't actually depend on the + // symbol value, so it could use an index of STN_UNDEF which, according + // to the spec, means the symbol value is 0. + // Unfortunately both gas and MC keep the _GLOBAL_OFFSET_TABLE_ symbol in + // the object file. + // The situation is even stranger on x86_64 where the assembly doesn't + // need the magical symbol, but gas still puts _GLOBAL_OFFSET_TABLE_ as + // an undefined symbol in the .o files. + // Given that the symbol is effectively unused, we just create a dummy + // hidden one to avoid the undefined symbol error. + Symtab.addIgnored("_GLOBAL_OFFSET_TABLE_"); + } + + if (!Config->Entry.empty()) { + // Set either EntryAddr (if S is a number) or EntrySym (otherwise). + StringRef S = Config->Entry; + if (S.getAsInteger(0, Config->EntryAddr)) + Config->EntrySym = Symtab.addUndefined(S); + } + + if (Config->EMachine == EM_MIPS) { + // On MIPS O32 ABI, _gp_disp is a magic symbol designates offset between + // start of function and gp pointer into GOT. + Config->MipsGpDisp = Symtab.addIgnored("_gp_disp"); + + // Define _gp for MIPS. st_value of _gp symbol will be updated by Writer + // so that it points to an absolute address which is relative to GOT. + // See "Global Data Symbols" in Chapter 6 in the following document: + // ftp://www.linux-mips.org/pub/linux/mips/doc/ABI/mipsabi.pdf + Symtab.addAbsolute("_gp", ElfSym<ELFT>::MipsGp); + } + + for (std::unique_ptr<InputFile> &F : Files) + Symtab.addFile(std::move(F)); + + for (StringRef S : Config->Undefined) + Symtab.addUndefinedOpt(S); + + if (Config->OutputFile.empty()) + Config->OutputFile = "a.out"; + + // Write the result to the file. + Symtab.scanShlibUndefined(); + if (Config->GcSections) + markLive<ELFT>(&Symtab); + writeResult<ELFT>(&Symtab); +} diff --git a/ELF/Driver.h b/ELF/Driver.h new file mode 100644 index 000000000000..2641155104dc --- /dev/null +++ b/ELF/Driver.h @@ -0,0 +1,67 @@ +//===- Driver.h -------------------------------------------------*- C++ -*-===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_ELF_DRIVER_H +#define LLD_ELF_DRIVER_H + +#include "SymbolTable.h" +#include "lld/Core/LLVM.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Option/ArgList.h" + +namespace lld { +namespace elf2 { + +extern class LinkerDriver *Driver; + +// Entry point of the ELF linker. +void link(ArrayRef<const char *> Args); + +class LinkerDriver { +public: + void main(ArrayRef<const char *> Args); + void createFiles(llvm::opt::InputArgList &Args); + template <class ELFT> void link(llvm::opt::InputArgList &Args); + + void addFile(StringRef Path); + +private: + template <template <class> class T> + std::unique_ptr<InputFile> createELFInputFile(MemoryBufferRef MB); + + llvm::BumpPtrAllocator Alloc; + bool WholeArchive = false; + std::vector<std::unique_ptr<InputFile>> Files; + std::vector<std::unique_ptr<ArchiveFile>> OwningArchives; + std::vector<std::unique_ptr<MemoryBuffer>> OwningMBs; +}; + +// Parses command line options. +llvm::opt::InputArgList parseArgs(llvm::BumpPtrAllocator *A, + ArrayRef<const char *> Args); + +// Create enum with OPT_xxx values for each option in Options.td +enum { + OPT_INVALID = 0, +#define OPTION(_1, _2, ID, _4, _5, _6, _7, _8, _9, _10, _11) OPT_##ID, +#include "Options.inc" +#undef OPTION +}; + +// Parses a linker script. Calling this function updates the Symtab and Config. +void readLinkerScript(llvm::BumpPtrAllocator *A, MemoryBufferRef MB); + +std::string findFromSearchPaths(StringRef Path); +std::string searchLibrary(StringRef Path); +std::string buildSysrootedPath(llvm::StringRef Dir, llvm::StringRef File); + +} // namespace elf2 +} // namespace lld + +#endif diff --git a/ELF/DriverUtils.cpp b/ELF/DriverUtils.cpp new file mode 100644 index 000000000000..51b500bebf49 --- /dev/null +++ b/ELF/DriverUtils.cpp @@ -0,0 +1,120 @@ +//===- DriverUtils.cpp ----------------------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains utility functions for the driver. Because there +// are so many small functions, we created this separate file to make +// Driver.cpp less cluttered. +// +//===----------------------------------------------------------------------===// + +#include "Driver.h" +#include "Error.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/Path.h" +#include "llvm/Support/StringSaver.h" + +using namespace llvm; + +using namespace lld; +using namespace lld::elf2; + +// Create OptTable + +// Create prefix string literals used in Options.td +#define PREFIX(NAME, VALUE) const char *const NAME[] = VALUE; +#include "Options.inc" +#undef PREFIX + +// Create table mapping all options defined in Options.td +static const opt::OptTable::Info infoTable[] = { +#define OPTION(X1, X2, ID, KIND, GROUP, ALIAS, X6, X7, X8, X9, X10) \ + { \ + X1, X2, X9, X10, OPT_##ID, opt::Option::KIND##Class, X8, X7, OPT_##GROUP, \ + OPT_##ALIAS, X6 \ + } \ + , +#include "Options.inc" +#undef OPTION +}; + +class ELFOptTable : public opt::OptTable { +public: + ELFOptTable() : OptTable(infoTable) {} +}; + +// Parses a given list of options. +opt::InputArgList lld::elf2::parseArgs(llvm::BumpPtrAllocator *A, + ArrayRef<const char *> Argv) { + // Make InputArgList from string vectors. + ELFOptTable Table; + unsigned MissingIndex; + unsigned MissingCount; + + // Expand response files. '@<filename>' is replaced by the file's contents. + SmallVector<const char *, 256> Vec(Argv.data(), Argv.data() + Argv.size()); + StringSaver Saver(*A); + llvm::cl::ExpandResponseFiles(Saver, llvm::cl::TokenizeGNUCommandLine, Vec); + + // Parse options and then do error checking. + opt::InputArgList Args = Table.ParseArgs(Vec, MissingIndex, MissingCount); + if (MissingCount) + error(Twine("missing arg value for \"") + Args.getArgString(MissingIndex) + + "\", expected " + Twine(MissingCount) + + (MissingCount == 1 ? " argument.\n" : " arguments")); + + iterator_range<opt::arg_iterator> Unknowns = Args.filtered(OPT_UNKNOWN); + for (auto *Arg : Unknowns) + warning("warning: unknown argument: " + Arg->getSpelling()); + if (Unknowns.begin() != Unknowns.end()) + error("unknown argument(s) found"); + + return Args; +} + +std::string lld::elf2::findFromSearchPaths(StringRef Path) { + for (StringRef Dir : Config->SearchPaths) { + std::string FullPath = buildSysrootedPath(Dir, Path); + if (sys::fs::exists(FullPath)) + return FullPath; + } + return ""; +} + +// Searches a given library from input search paths, which are filled +// from -L command line switches. Returns a path to an existent library file. +std::string lld::elf2::searchLibrary(StringRef Path) { + std::vector<std::string> Names; + if (Path[0] == ':') { + Names.push_back(Path.drop_front()); + } else { + if (!Config->Static) + Names.push_back(("lib" + Path + ".so").str()); + Names.push_back(("lib" + Path + ".a").str()); + } + for (const std::string &Name : Names) { + std::string S = findFromSearchPaths(Name); + if (!S.empty()) + return S; + } + error("Unable to find library -l" + Path); +} + +// Makes a path by concatenating Dir and File. +// If Dir starts with '=' the result will be preceded by Sysroot, +// which can be set with --sysroot command line switch. +std::string lld::elf2::buildSysrootedPath(StringRef Dir, StringRef File) { + SmallString<128> Path; + if (Dir.startswith("=")) + sys::path::append(Path, Config->Sysroot, Dir.substr(1), File); + else + sys::path::append(Path, Dir, File); + return Path.str(); +} diff --git a/ELF/Error.cpp b/ELF/Error.cpp new file mode 100644 index 000000000000..e0701f7f4cc6 --- /dev/null +++ b/ELF/Error.cpp @@ -0,0 +1,38 @@ +//===- Error.cpp ----------------------------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "Error.h" + +#include "llvm/ADT/Twine.h" +#include "llvm/Support/raw_ostream.h" + +namespace lld { +namespace elf2 { + +void warning(const Twine &Msg) { llvm::errs() << Msg << "\n"; } + +void error(const Twine &Msg) { + llvm::errs() << Msg << "\n"; + exit(1); +} + +void error(std::error_code EC, const Twine &Prefix) { + if (!EC) + return; + error(Prefix + ": " + EC.message()); +} + +void error(std::error_code EC) { + if (!EC) + return; + error(EC.message()); +} + +} // namespace elf2 +} // namespace lld diff --git a/ELF/Error.h b/ELF/Error.h new file mode 100644 index 000000000000..b1d2e7a8fc5b --- /dev/null +++ b/ELF/Error.h @@ -0,0 +1,32 @@ +//===- Error.h --------------------------------------------------*- C++ -*-===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_COFF_ERROR_H +#define LLD_COFF_ERROR_H + +#include "lld/Core/LLVM.h" + +namespace lld { +namespace elf2 { + +void warning(const Twine &Msg); + +LLVM_ATTRIBUTE_NORETURN void error(const Twine &Msg); +void error(std::error_code EC, const Twine &Prefix); +void error(std::error_code EC); + +template <typename T> void error(const ErrorOr<T> &V, const Twine &Prefix) { + error(V.getError(), Prefix); +} +template <typename T> void error(const ErrorOr<T> &V) { error(V.getError()); } + +} // namespace elf2 +} // namespace lld + +#endif diff --git a/ELF/InputFiles.cpp b/ELF/InputFiles.cpp new file mode 100644 index 000000000000..e0827a3ee43d --- /dev/null +++ b/ELF/InputFiles.cpp @@ -0,0 +1,496 @@ +//===- InputFiles.cpp -----------------------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "InputFiles.h" +#include "InputSection.h" +#include "Error.h" +#include "Symbols.h" +#include "llvm/ADT/STLExtras.h" + +using namespace llvm; +using namespace llvm::ELF; +using namespace llvm::object; +using namespace llvm::sys::fs; + +using namespace lld; +using namespace lld::elf2; + +namespace { +class ECRAII { + std::error_code EC; + +public: + std::error_code &getEC() { return EC; } + ~ECRAII() { error(EC); } +}; +} + +template <class ELFT> +ELFFileBase<ELFT>::ELFFileBase(Kind K, MemoryBufferRef M) + : InputFile(K, M), ELFObj(MB.getBuffer(), ECRAII().getEC()) {} + +template <class ELFT> +ELFKind ELFFileBase<ELFT>::getELFKind() { + using llvm::support::little; + if (ELFT::Is64Bits) + return ELFT::TargetEndianness == little ? ELF64LEKind : ELF64BEKind; + return ELFT::TargetEndianness == little ? ELF32LEKind : ELF32BEKind; +} + +template <class ELFT> +typename ELFFileBase<ELFT>::Elf_Sym_Range +ELFFileBase<ELFT>::getSymbolsHelper(bool Local) { + if (!Symtab) + return Elf_Sym_Range(nullptr, nullptr); + Elf_Sym_Range Syms = ELFObj.symbols(Symtab); + uint32_t NumSymbols = std::distance(Syms.begin(), Syms.end()); + uint32_t FirstNonLocal = Symtab->sh_info; + if (FirstNonLocal > NumSymbols) + error("Invalid sh_info in symbol table"); + if (!Local) + return make_range(Syms.begin() + FirstNonLocal, Syms.end()); + // +1 to skip over dummy symbol. + return make_range(Syms.begin() + 1, Syms.begin() + FirstNonLocal); +} + +template <class ELFT> +uint32_t ELFFileBase<ELFT>::getSectionIndex(const Elf_Sym &Sym) const { + uint32_t I = Sym.st_shndx; + if (I == ELF::SHN_XINDEX) + return this->ELFObj.getExtendedSymbolTableIndex(&Sym, this->Symtab, + SymtabSHNDX); + if (I >= ELF::SHN_LORESERVE || I == ELF::SHN_ABS) + return 0; + return I; +} + +template <class ELFT> void ELFFileBase<ELFT>::initStringTable() { + if (!Symtab) + return; + ErrorOr<StringRef> StringTableOrErr = ELFObj.getStringTableForSymtab(*Symtab); + error(StringTableOrErr.getError()); + StringTable = *StringTableOrErr; +} + +template <class ELFT> +typename ELFFileBase<ELFT>::Elf_Sym_Range +ELFFileBase<ELFT>::getNonLocalSymbols() { + return getSymbolsHelper(false); +} + +template <class ELFT> +ObjectFile<ELFT>::ObjectFile(MemoryBufferRef M) + : ELFFileBase<ELFT>(Base::ObjectKind, M) {} + +template <class ELFT> +typename ObjectFile<ELFT>::Elf_Sym_Range ObjectFile<ELFT>::getLocalSymbols() { + return this->getSymbolsHelper(true); +} + +template <class ELFT> uint32_t ObjectFile<ELFT>::getMipsGp0() const { + return MipsReginfo ? MipsReginfo->getGp0() : 0; +} + +template <class ELFT> +const typename ObjectFile<ELFT>::Elf_Sym * +ObjectFile<ELFT>::getLocalSymbol(uintX_t SymIndex) { + uint32_t FirstNonLocal = this->Symtab->sh_info; + if (SymIndex >= FirstNonLocal) + return nullptr; + Elf_Sym_Range Syms = this->ELFObj.symbols(this->Symtab); + return Syms.begin() + SymIndex; +} + +template <class ELFT> +void elf2::ObjectFile<ELFT>::parse(DenseSet<StringRef> &Comdats) { + // Read section and symbol tables. + initializeSections(Comdats); + initializeSymbols(); +} + +// Sections with SHT_GROUP and comdat bits define comdat section groups. +// They are identified and deduplicated by group name. This function +// returns a group name. +template <class ELFT> +StringRef ObjectFile<ELFT>::getShtGroupSignature(const Elf_Shdr &Sec) { + const ELFFile<ELFT> &Obj = this->ELFObj; + uint32_t SymtabdSectionIndex = Sec.sh_link; + ErrorOr<const Elf_Shdr *> SecOrErr = Obj.getSection(SymtabdSectionIndex); + error(SecOrErr); + const Elf_Shdr *SymtabSec = *SecOrErr; + uint32_t SymIndex = Sec.sh_info; + const Elf_Sym *Sym = Obj.getSymbol(SymtabSec, SymIndex); + ErrorOr<StringRef> StringTableOrErr = Obj.getStringTableForSymtab(*SymtabSec); + error(StringTableOrErr); + ErrorOr<StringRef> SignatureOrErr = Sym->getName(*StringTableOrErr); + error(SignatureOrErr); + return *SignatureOrErr; +} + +template <class ELFT> +ArrayRef<typename ObjectFile<ELFT>::GroupEntryType> +ObjectFile<ELFT>::getShtGroupEntries(const Elf_Shdr &Sec) { + const ELFFile<ELFT> &Obj = this->ELFObj; + ErrorOr<ArrayRef<GroupEntryType>> EntriesOrErr = + Obj.template getSectionContentsAsArray<GroupEntryType>(&Sec); + error(EntriesOrErr.getError()); + ArrayRef<GroupEntryType> Entries = *EntriesOrErr; + if (Entries.empty() || Entries[0] != GRP_COMDAT) + error("Unsupported SHT_GROUP format"); + return Entries.slice(1); +} + +template <class ELFT> +static bool shouldMerge(const typename ELFFile<ELFT>::Elf_Shdr &Sec) { + typedef typename ELFFile<ELFT>::uintX_t uintX_t; + uintX_t Flags = Sec.sh_flags; + if (!(Flags & SHF_MERGE)) + return false; + if (Flags & SHF_WRITE) + error("Writable SHF_MERGE sections are not supported"); + uintX_t EntSize = Sec.sh_entsize; + if (!EntSize || Sec.sh_size % EntSize) + error("SHF_MERGE section size must be a multiple of sh_entsize"); + + // Don't try to merge if the aligment is larger than the sh_entsize. + // + // If this is not a SHF_STRINGS, we would need to pad after every entity. It + // would be equivalent for the producer of the .o to just set a larger + // sh_entsize. + // + // If this is a SHF_STRINGS, the larger alignment makes sense. Unfortunately + // it would complicate tail merging. This doesn't seem that common to + // justify the effort. + if (Sec.sh_addralign > EntSize) + return false; + + return true; +} + +template <class ELFT> +void elf2::ObjectFile<ELFT>::initializeSections(DenseSet<StringRef> &Comdats) { + uint64_t Size = this->ELFObj.getNumSections(); + Sections.resize(Size); + unsigned I = -1; + const ELFFile<ELFT> &Obj = this->ELFObj; + for (const Elf_Shdr &Sec : Obj.sections()) { + ++I; + if (Sections[I] == &InputSection<ELFT>::Discarded) + continue; + + switch (Sec.sh_type) { + case SHT_GROUP: + Sections[I] = &InputSection<ELFT>::Discarded; + if (Comdats.insert(getShtGroupSignature(Sec)).second) + continue; + for (GroupEntryType E : getShtGroupEntries(Sec)) { + uint32_t SecIndex = E; + if (SecIndex >= Size) + error("Invalid section index in group"); + Sections[SecIndex] = &InputSection<ELFT>::Discarded; + } + break; + case SHT_SYMTAB: + this->Symtab = &Sec; + break; + case SHT_SYMTAB_SHNDX: { + ErrorOr<ArrayRef<Elf_Word>> ErrorOrTable = Obj.getSHNDXTable(Sec); + error(ErrorOrTable); + this->SymtabSHNDX = *ErrorOrTable; + break; + } + case SHT_STRTAB: + case SHT_NULL: + break; + case SHT_RELA: + case SHT_REL: { + uint32_t RelocatedSectionIndex = Sec.sh_info; + if (RelocatedSectionIndex >= Size) + error("Invalid relocated section index"); + InputSectionBase<ELFT> *RelocatedSection = + Sections[RelocatedSectionIndex]; + if (!RelocatedSection) + error("Unsupported relocation reference"); + if (auto *S = dyn_cast<InputSection<ELFT>>(RelocatedSection)) { + S->RelocSections.push_back(&Sec); + } else if (auto *S = dyn_cast<EHInputSection<ELFT>>(RelocatedSection)) { + if (S->RelocSection) + error("Multiple relocation sections to .eh_frame are not supported"); + S->RelocSection = &Sec; + } else { + error("Relocations pointing to SHF_MERGE are not supported"); + } + break; + } + default: + Sections[I] = createInputSection(Sec); + } + } +} + +template <class ELFT> InputSectionBase<ELFT> * +elf2::ObjectFile<ELFT>::createInputSection(const Elf_Shdr &Sec) { + ErrorOr<StringRef> NameOrErr = this->ELFObj.getSectionName(&Sec); + error(NameOrErr); + StringRef Name = *NameOrErr; + + // .note.GNU-stack is a marker section to control the presence of + // PT_GNU_STACK segment in outputs. Since the presence of the segment + // is controlled only by the command line option (-z execstack) in LLD, + // .note.GNU-stack is ignored. + if (Name == ".note.GNU-stack") + return &InputSection<ELFT>::Discarded; + + // A MIPS object file has a special section that contains register + // usage info, which needs to be handled by the linker specially. + if (Config->EMachine == EM_MIPS && Name == ".reginfo") { + MipsReginfo = new (this->Alloc) MipsReginfoInputSection<ELFT>(this, &Sec); + return MipsReginfo; + } + + if (Name == ".eh_frame") + return new (this->EHAlloc.Allocate()) EHInputSection<ELFT>(this, &Sec); + if (shouldMerge<ELFT>(Sec)) + return new (this->MAlloc.Allocate()) MergeInputSection<ELFT>(this, &Sec); + return new (this->Alloc) InputSection<ELFT>(this, &Sec); +} + +template <class ELFT> void elf2::ObjectFile<ELFT>::initializeSymbols() { + this->initStringTable(); + Elf_Sym_Range Syms = this->getNonLocalSymbols(); + uint32_t NumSymbols = std::distance(Syms.begin(), Syms.end()); + this->SymbolBodies.reserve(NumSymbols); + for (const Elf_Sym &Sym : Syms) + this->SymbolBodies.push_back(createSymbolBody(this->StringTable, &Sym)); +} + +template <class ELFT> +InputSectionBase<ELFT> * +elf2::ObjectFile<ELFT>::getSection(const Elf_Sym &Sym) const { + uint32_t Index = this->getSectionIndex(Sym); + if (Index == 0) + return nullptr; + if (Index >= Sections.size() || !Sections[Index]) + error("Invalid section index"); + return Sections[Index]; +} + +template <class ELFT> +SymbolBody *elf2::ObjectFile<ELFT>::createSymbolBody(StringRef StringTable, + const Elf_Sym *Sym) { + ErrorOr<StringRef> NameOrErr = Sym->getName(StringTable); + error(NameOrErr.getError()); + StringRef Name = *NameOrErr; + + switch (Sym->st_shndx) { + case SHN_UNDEF: + return new (this->Alloc) UndefinedElf<ELFT>(Name, *Sym); + case SHN_COMMON: + return new (this->Alloc) DefinedCommon( + Name, Sym->st_size, Sym->st_value, + Sym->getBinding() == llvm::ELF::STB_WEAK, Sym->getVisibility()); + } + + switch (Sym->getBinding()) { + default: + error("unexpected binding"); + case STB_GLOBAL: + case STB_WEAK: + case STB_GNU_UNIQUE: { + InputSectionBase<ELFT> *Sec = getSection(*Sym); + if (Sec == &InputSection<ELFT>::Discarded) + return new (this->Alloc) UndefinedElf<ELFT>(Name, *Sym); + return new (this->Alloc) DefinedRegular<ELFT>(Name, *Sym, Sec); + } + } +} + +static std::unique_ptr<Archive> openArchive(MemoryBufferRef MB) { + ErrorOr<std::unique_ptr<Archive>> ArchiveOrErr = Archive::create(MB); + error(ArchiveOrErr, "Failed to parse archive"); + return std::move(*ArchiveOrErr); +} + +void ArchiveFile::parse() { + File = openArchive(MB); + + // Allocate a buffer for Lazy objects. + size_t NumSyms = File->getNumberOfSymbols(); + LazySymbols.reserve(NumSyms); + + // Read the symbol table to construct Lazy objects. + for (const Archive::Symbol &Sym : File->symbols()) + LazySymbols.emplace_back(this, Sym); +} + +// Returns a buffer pointing to a member file containing a given symbol. +MemoryBufferRef ArchiveFile::getMember(const Archive::Symbol *Sym) { + ErrorOr<Archive::Child> COrErr = Sym->getMember(); + error(COrErr, "Could not get the member for symbol " + Sym->getName()); + const Archive::Child &C = *COrErr; + + if (!Seen.insert(C.getChildOffset()).second) + return MemoryBufferRef(); + + ErrorOr<MemoryBufferRef> RefOrErr = C.getMemoryBufferRef(); + if (!RefOrErr) + error(RefOrErr, "Could not get the buffer for the member defining symbol " + + Sym->getName()); + return *RefOrErr; +} + +std::vector<MemoryBufferRef> ArchiveFile::getMembers() { + File = openArchive(MB); + + std::vector<MemoryBufferRef> Result; + for (auto &ChildOrErr : File->children()) { + error(ChildOrErr, + "Could not get the child of the archive " + File->getFileName()); + const Archive::Child Child(*ChildOrErr); + ErrorOr<MemoryBufferRef> MbOrErr = Child.getMemoryBufferRef(); + if (!MbOrErr) + error(MbOrErr, "Could not get the buffer for a child of the archive " + + File->getFileName()); + Result.push_back(MbOrErr.get()); + } + return Result; +} + +template <class ELFT> +SharedFile<ELFT>::SharedFile(MemoryBufferRef M) + : ELFFileBase<ELFT>(Base::SharedKind, M) { + AsNeeded = Config->AsNeeded; +} + +template <class ELFT> +const typename ELFFile<ELFT>::Elf_Shdr * +SharedFile<ELFT>::getSection(const Elf_Sym &Sym) const { + uint32_t Index = this->getSectionIndex(Sym); + if (Index == 0) + return nullptr; + ErrorOr<const Elf_Shdr *> Ret = this->ELFObj.getSection(Index); + error(Ret); + return *Ret; +} + +template <class ELFT> void SharedFile<ELFT>::parseSoName() { + typedef typename ELFFile<ELFT>::Elf_Dyn Elf_Dyn; + typedef typename ELFFile<ELFT>::uintX_t uintX_t; + const Elf_Shdr *DynamicSec = nullptr; + + const ELFFile<ELFT> Obj = this->ELFObj; + for (const Elf_Shdr &Sec : Obj.sections()) { + switch (Sec.sh_type) { + default: + continue; + case SHT_DYNSYM: + this->Symtab = &Sec; + break; + case SHT_DYNAMIC: + DynamicSec = &Sec; + break; + case SHT_SYMTAB_SHNDX: { + ErrorOr<ArrayRef<Elf_Word>> ErrorOrTable = Obj.getSHNDXTable(Sec); + error(ErrorOrTable); + this->SymtabSHNDX = *ErrorOrTable; + break; + } + } + } + + this->initStringTable(); + this->SoName = this->getName(); + + if (!DynamicSec) + return; + auto *Begin = + reinterpret_cast<const Elf_Dyn *>(Obj.base() + DynamicSec->sh_offset); + const Elf_Dyn *End = Begin + DynamicSec->sh_size / sizeof(Elf_Dyn); + + for (const Elf_Dyn &Dyn : make_range(Begin, End)) { + if (Dyn.d_tag == DT_SONAME) { + uintX_t Val = Dyn.getVal(); + if (Val >= this->StringTable.size()) + error("Invalid DT_SONAME entry"); + this->SoName = StringRef(this->StringTable.data() + Val); + return; + } + } +} + +template <class ELFT> void SharedFile<ELFT>::parse() { + Elf_Sym_Range Syms = this->getNonLocalSymbols(); + uint32_t NumSymbols = std::distance(Syms.begin(), Syms.end()); + SymbolBodies.reserve(NumSymbols); + for (const Elf_Sym &Sym : Syms) { + ErrorOr<StringRef> NameOrErr = Sym.getName(this->StringTable); + error(NameOrErr.getError()); + StringRef Name = *NameOrErr; + + if (Sym.isUndefined()) + Undefs.push_back(Name); + else + SymbolBodies.emplace_back(this, Name, Sym); + } +} + +template <typename T> +static std::unique_ptr<InputFile> createELFFileAux(MemoryBufferRef MB) { + std::unique_ptr<T> Ret = llvm::make_unique<T>(MB); + + if (!Config->FirstElf) + Config->FirstElf = Ret.get(); + + if (Config->EKind == ELFNoneKind) { + Config->EKind = Ret->getELFKind(); + Config->EMachine = Ret->getEMachine(); + } + + return std::move(Ret); +} + +template <template <class> class T> +std::unique_ptr<InputFile> lld::elf2::createELFFile(MemoryBufferRef MB) { + std::pair<unsigned char, unsigned char> Type = getElfArchType(MB.getBuffer()); + if (Type.second != ELF::ELFDATA2LSB && Type.second != ELF::ELFDATA2MSB) + error("Invalid data encoding: " + MB.getBufferIdentifier()); + + if (Type.first == ELF::ELFCLASS32) { + if (Type.second == ELF::ELFDATA2LSB) + return createELFFileAux<T<ELF32LE>>(MB); + return createELFFileAux<T<ELF32BE>>(MB); + } + if (Type.first == ELF::ELFCLASS64) { + if (Type.second == ELF::ELFDATA2LSB) + return createELFFileAux<T<ELF64LE>>(MB); + return createELFFileAux<T<ELF64BE>>(MB); + } + error("Invalid file class: " + MB.getBufferIdentifier()); +} + +template class elf2::ELFFileBase<ELF32LE>; +template class elf2::ELFFileBase<ELF32BE>; +template class elf2::ELFFileBase<ELF64LE>; +template class elf2::ELFFileBase<ELF64BE>; + +template class elf2::ObjectFile<ELF32LE>; +template class elf2::ObjectFile<ELF32BE>; +template class elf2::ObjectFile<ELF64LE>; +template class elf2::ObjectFile<ELF64BE>; + +template class elf2::SharedFile<ELF32LE>; +template class elf2::SharedFile<ELF32BE>; +template class elf2::SharedFile<ELF64LE>; +template class elf2::SharedFile<ELF64BE>; + +template std::unique_ptr<InputFile> +elf2::createELFFile<ObjectFile>(MemoryBufferRef); + +template std::unique_ptr<InputFile> +elf2::createELFFile<SharedFile>(MemoryBufferRef); diff --git a/ELF/InputFiles.h b/ELF/InputFiles.h new file mode 100644 index 000000000000..4e529c558cf5 --- /dev/null +++ b/ELF/InputFiles.h @@ -0,0 +1,211 @@ +//===- InputFiles.h ---------------------------------------------*- C++ -*-===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_ELF_INPUT_FILES_H +#define LLD_ELF_INPUT_FILES_H + +#include "Config.h" +#include "InputSection.h" +#include "Error.h" +#include "Symbols.h" + +#include "lld/Core/LLVM.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Object/Archive.h" +#include "llvm/Object/ELF.h" + +namespace lld { +namespace elf2 { + +using llvm::object::Archive; + +class InputFile; +class Lazy; +class SymbolBody; + +// The root class of input files. +class InputFile { +public: + enum Kind { ObjectKind, SharedKind, ArchiveKind }; + Kind kind() const { return FileKind; } + + StringRef getName() const { return MB.getBufferIdentifier(); } + +protected: + InputFile(Kind K, MemoryBufferRef M) : MB(M), FileKind(K) {} + MemoryBufferRef MB; + +private: + const Kind FileKind; +}; + +template <typename ELFT> class ELFFileBase : public InputFile { +public: + typedef typename llvm::object::ELFFile<ELFT>::Elf_Shdr Elf_Shdr; + typedef typename llvm::object::ELFFile<ELFT>::Elf_Sym Elf_Sym; + typedef typename llvm::object::ELFFile<ELFT>::Elf_Word Elf_Word; + typedef typename llvm::object::ELFFile<ELFT>::Elf_Sym_Range Elf_Sym_Range; + + ELFFileBase(Kind K, MemoryBufferRef M); + static bool classof(const InputFile *F) { + Kind K = F->kind(); + return K == ObjectKind || K == SharedKind; + } + + static ELFKind getELFKind(); + const llvm::object::ELFFile<ELFT> &getObj() const { return ELFObj; } + llvm::object::ELFFile<ELFT> &getObj() { return ELFObj; } + + uint16_t getEMachine() const { return getObj().getHeader()->e_machine; } + uint8_t getOSABI() const { + return getObj().getHeader()->e_ident[llvm::ELF::EI_OSABI]; + } + + StringRef getStringTable() const { return StringTable; } + + uint32_t getSectionIndex(const Elf_Sym &Sym) const; + +protected: + llvm::object::ELFFile<ELFT> ELFObj; + const Elf_Shdr *Symtab = nullptr; + ArrayRef<Elf_Word> SymtabSHNDX; + StringRef StringTable; + void initStringTable(); + Elf_Sym_Range getNonLocalSymbols(); + Elf_Sym_Range getSymbolsHelper(bool); +}; + +// .o file. +template <class ELFT> class ObjectFile : public ELFFileBase<ELFT> { + typedef ELFFileBase<ELFT> Base; + typedef typename llvm::object::ELFFile<ELFT>::Elf_Sym Elf_Sym; + typedef typename llvm::object::ELFFile<ELFT>::Elf_Shdr Elf_Shdr; + typedef typename llvm::object::ELFFile<ELFT>::Elf_Sym_Range Elf_Sym_Range; + typedef typename llvm::object::ELFFile<ELFT>::Elf_Word Elf_Word; + typedef typename llvm::object::ELFFile<ELFT>::uintX_t uintX_t; + + typedef llvm::support::detail::packed_endian_specific_integral< + uint32_t, ELFT::TargetEndianness, 2> GroupEntryType; + StringRef getShtGroupSignature(const Elf_Shdr &Sec); + ArrayRef<GroupEntryType> getShtGroupEntries(const Elf_Shdr &Sec); + +public: + static bool classof(const InputFile *F) { + return F->kind() == Base::ObjectKind; + } + + ArrayRef<SymbolBody *> getSymbols() { return this->SymbolBodies; } + + explicit ObjectFile(MemoryBufferRef M); + void parse(llvm::DenseSet<StringRef> &Comdats); + + ArrayRef<InputSectionBase<ELFT> *> getSections() const { return Sections; } + InputSectionBase<ELFT> *getSection(const Elf_Sym &Sym) const; + + SymbolBody *getSymbolBody(uint32_t SymbolIndex) const { + uint32_t FirstNonLocal = this->Symtab->sh_info; + if (SymbolIndex < FirstNonLocal) + return nullptr; + return this->SymbolBodies[SymbolIndex - FirstNonLocal]; + } + + Elf_Sym_Range getLocalSymbols(); + const Elf_Sym *getLocalSymbol(uintX_t SymIndex); + + const Elf_Shdr *getSymbolTable() const { return this->Symtab; }; + + // Get MIPS GP0 value defined by this file. This value represents the gp value + // used to create the relocatable object and required to support + // R_MIPS_GPREL16 / R_MIPS_GPREL32 relocations. + uint32_t getMipsGp0() const; + +private: + void initializeSections(llvm::DenseSet<StringRef> &Comdats); + void initializeSymbols(); + InputSectionBase<ELFT> *createInputSection(const Elf_Shdr &Sec); + + SymbolBody *createSymbolBody(StringRef StringTable, const Elf_Sym *Sym); + + // List of all sections defined by this file. + std::vector<InputSectionBase<ELFT> *> Sections; + + // List of all symbols referenced or defined by this file. + std::vector<SymbolBody *> SymbolBodies; + + // MIPS .reginfo section defined by this file. + MipsReginfoInputSection<ELFT> *MipsReginfo = nullptr; + + llvm::BumpPtrAllocator Alloc; + llvm::SpecificBumpPtrAllocator<MergeInputSection<ELFT>> MAlloc; + llvm::SpecificBumpPtrAllocator<EHInputSection<ELFT>> EHAlloc; +}; + +class ArchiveFile : public InputFile { +public: + explicit ArchiveFile(MemoryBufferRef M) : InputFile(ArchiveKind, M) {} + static bool classof(const InputFile *F) { return F->kind() == ArchiveKind; } + void parse(); + + // Returns a memory buffer for a given symbol. An empty memory buffer + // is returned if we have already returned the same memory buffer. + // (So that we don't instantiate same members more than once.) + MemoryBufferRef getMember(const Archive::Symbol *Sym); + + llvm::MutableArrayRef<Lazy> getLazySymbols() { return LazySymbols; } + std::vector<MemoryBufferRef> getMembers(); + +private: + std::unique_ptr<Archive> File; + std::vector<Lazy> LazySymbols; + llvm::DenseSet<uint64_t> Seen; +}; + +// .so file. +template <class ELFT> class SharedFile : public ELFFileBase<ELFT> { + typedef ELFFileBase<ELFT> Base; + typedef typename llvm::object::ELFFile<ELFT>::Elf_Shdr Elf_Shdr; + typedef typename llvm::object::ELFFile<ELFT>::Elf_Sym Elf_Sym; + typedef typename llvm::object::ELFFile<ELFT>::Elf_Word Elf_Word; + typedef typename llvm::object::ELFFile<ELFT>::Elf_Sym_Range Elf_Sym_Range; + + std::vector<SharedSymbol<ELFT>> SymbolBodies; + std::vector<StringRef> Undefs; + StringRef SoName; + +public: + StringRef getSoName() const { return SoName; } + llvm::MutableArrayRef<SharedSymbol<ELFT>> getSharedSymbols() { + return SymbolBodies; + } + const Elf_Shdr *getSection(const Elf_Sym &Sym) const; + llvm::ArrayRef<StringRef> getUndefinedSymbols() { return Undefs; } + + static bool classof(const InputFile *F) { + return F->kind() == Base::SharedKind; + } + + explicit SharedFile(MemoryBufferRef M); + + void parseSoName(); + void parse(); + + // Used for --as-needed + bool AsNeeded = false; + bool IsUsed = false; + bool isNeeded() const { return !AsNeeded || IsUsed; } +}; + +template <template <class> class T> +std::unique_ptr<InputFile> createELFFile(MemoryBufferRef MB); + +} // namespace elf2 +} // namespace lld + +#endif diff --git a/ELF/InputSection.cpp b/ELF/InputSection.cpp new file mode 100644 index 000000000000..2548200feb65 --- /dev/null +++ b/ELF/InputSection.cpp @@ -0,0 +1,399 @@ +//===- InputSection.cpp ---------------------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "InputSection.h" +#include "Config.h" +#include "Error.h" +#include "InputFiles.h" +#include "OutputSections.h" +#include "Target.h" + +using namespace llvm; +using namespace llvm::ELF; +using namespace llvm::object; + +using namespace lld; +using namespace lld::elf2; + +template <class ELFT> +InputSectionBase<ELFT>::InputSectionBase(ObjectFile<ELFT> *File, + const Elf_Shdr *Header, + Kind SectionKind) + : Header(Header), File(File), SectionKind(SectionKind) {} + +template <class ELFT> StringRef InputSectionBase<ELFT>::getSectionName() const { + ErrorOr<StringRef> Name = File->getObj().getSectionName(this->Header); + error(Name); + return *Name; +} + +template <class ELFT> +ArrayRef<uint8_t> InputSectionBase<ELFT>::getSectionData() const { + ErrorOr<ArrayRef<uint8_t>> Ret = + this->File->getObj().getSectionContents(this->Header); + error(Ret); + return *Ret; +} + +template <class ELFT> +typename ELFFile<ELFT>::uintX_t +InputSectionBase<ELFT>::getOffset(uintX_t Offset) { + switch (SectionKind) { + case Regular: + return cast<InputSection<ELFT>>(this)->OutSecOff + Offset; + case EHFrame: + return cast<EHInputSection<ELFT>>(this)->getOffset(Offset); + case Merge: + return cast<MergeInputSection<ELFT>>(this)->getOffset(Offset); + case MipsReginfo: + return cast<MipsReginfoInputSection<ELFT>>(this)->getOffset(Offset); + } + llvm_unreachable("Invalid section kind"); +} + +template <class ELFT> +typename ELFFile<ELFT>::uintX_t +InputSectionBase<ELFT>::getOffset(const Elf_Sym &Sym) { + return getOffset(Sym.st_value); +} + +// Returns a section that Rel relocation is pointing to. +template <class ELFT> +InputSectionBase<ELFT> * +InputSectionBase<ELFT>::getRelocTarget(const Elf_Rel &Rel) { + // Global symbol + uint32_t SymIndex = Rel.getSymbol(Config->Mips64EL); + if (SymbolBody *B = File->getSymbolBody(SymIndex)) + if (auto *D = dyn_cast<DefinedRegular<ELFT>>(B->repl())) + return D->Section; + // Local symbol + if (const Elf_Sym *Sym = File->getLocalSymbol(SymIndex)) + if (InputSectionBase<ELFT> *Sec = File->getSection(*Sym)) + return Sec; + return nullptr; +} + +template <class ELFT> +InputSectionBase<ELFT> * +InputSectionBase<ELFT>::getRelocTarget(const Elf_Rela &Rel) { + return getRelocTarget(reinterpret_cast<const Elf_Rel &>(Rel)); +} + +template <class ELFT> +InputSection<ELFT>::InputSection(ObjectFile<ELFT> *F, const Elf_Shdr *Header) + : InputSectionBase<ELFT>(F, Header, Base::Regular) {} + +template <class ELFT> +bool InputSection<ELFT>::classof(const InputSectionBase<ELFT> *S) { + return S->SectionKind == Base::Regular; +} + +template <class ELFT> +template <bool isRela> +uint8_t * +InputSectionBase<ELFT>::findMipsPairedReloc(uint8_t *Buf, uint32_t SymIndex, + uint32_t Type, + RelIteratorRange<isRela> Rels) { + // Some MIPS relocations use addend calculated from addend of the relocation + // itself and addend of paired relocation. ABI requires to compute such + // combined addend in case of REL relocation record format only. + // See p. 4-17 at ftp://www.linux-mips.org/pub/linux/mips/doc/ABI/mipsabi.pdf + if (isRela || Config->EMachine != EM_MIPS) + return nullptr; + if (Type == R_MIPS_HI16) + Type = R_MIPS_LO16; + else if (Type == R_MIPS_PCHI16) + Type = R_MIPS_PCLO16; + else if (Type == R_MICROMIPS_HI16) + Type = R_MICROMIPS_LO16; + else + return nullptr; + for (const auto &RI : Rels) { + if (RI.getType(Config->Mips64EL) != Type) + continue; + if (RI.getSymbol(Config->Mips64EL) != SymIndex) + continue; + uintX_t Offset = getOffset(RI.r_offset); + if (Offset == (uintX_t)-1) + return nullptr; + return Buf + Offset; + } + return nullptr; +} + +template <class ELFT> +static typename llvm::object::ELFFile<ELFT>::uintX_t +getSymSize(SymbolBody &Body) { + if (auto *SS = dyn_cast<DefinedElf<ELFT>>(&Body)) + return SS->Sym.st_size; + return 0; +} + +template <class ELFT> +template <bool isRela> +void InputSectionBase<ELFT>::relocate(uint8_t *Buf, uint8_t *BufEnd, + RelIteratorRange<isRela> Rels) { + typedef Elf_Rel_Impl<ELFT, isRela> RelType; + size_t Num = Rels.end() - Rels.begin(); + for (size_t I = 0; I < Num; ++I) { + const RelType &RI = *(Rels.begin() + I); + uint32_t SymIndex = RI.getSymbol(Config->Mips64EL); + uint32_t Type = RI.getType(Config->Mips64EL); + uintX_t Offset = getOffset(RI.r_offset); + if (Offset == (uintX_t)-1) + continue; + + uint8_t *BufLoc = Buf + Offset; + uintX_t AddrLoc = OutSec->getVA() + Offset; + auto NextRelocs = llvm::make_range(&RI, Rels.end()); + + if (Target->isTlsLocalDynamicReloc(Type) && + !Target->isTlsOptimized(Type, nullptr)) { + Target->relocateOne(BufLoc, BufEnd, Type, AddrLoc, + Out<ELFT>::Got->getLocalTlsIndexVA() + + getAddend<ELFT>(RI)); + continue; + } + + const Elf_Shdr *SymTab = File->getSymbolTable(); + SymbolBody *Body = nullptr; + if (SymIndex >= SymTab->sh_info) + Body = File->getSymbolBody(SymIndex)->repl(); + + if (Target->isTlsOptimized(Type, Body)) { + uintX_t SymVA; + if (!Body) + SymVA = getLocalRelTarget(*File, RI, 0); + else if (Target->relocNeedsGot(Type, *Body)) + SymVA = Out<ELFT>::Got->getEntryAddr(*Body); + else + SymVA = getSymVA<ELFT>(*Body); + // By optimizing TLS relocations, it is sometimes needed to skip + // relocations that immediately follow TLS relocations. This function + // knows how many slots we need to skip. + I += Target->relocateTlsOptimize(BufLoc, BufEnd, Type, AddrLoc, SymVA, + *Body); + continue; + } + + // Handle relocations for local symbols -- they never get + // resolved so we don't allocate a SymbolBody. + uintX_t A = getAddend<ELFT>(RI); + if (!Body) { + uintX_t SymVA = getLocalRelTarget(*File, RI, A); + // We need to adjust SymVA value in case of R_MIPS_GPREL16/32 relocations + // because they use the following expression to calculate the relocation's + // result for local symbol: S + A + GP0 - G. + if (Config->EMachine == EM_MIPS && + (Type == R_MIPS_GPREL16 || Type == R_MIPS_GPREL32)) + SymVA += File->getMipsGp0(); + Target->relocateOne(BufLoc, BufEnd, Type, AddrLoc, SymVA, 0, + findMipsPairedReloc(Buf, SymIndex, Type, NextRelocs)); + continue; + } + + if (Target->isTlsGlobalDynamicReloc(Type) && + !Target->isTlsOptimized(Type, Body)) { + Target->relocateOne(BufLoc, BufEnd, Type, AddrLoc, + Out<ELFT>::Got->getGlobalDynAddr(*Body) + + getAddend<ELFT>(RI)); + continue; + } + + uintX_t SymVA = getSymVA<ELFT>(*Body); + if (Target->relocNeedsPlt(Type, *Body)) { + SymVA = Out<ELFT>::Plt->getEntryAddr(*Body); + Type = Target->getPltRefReloc(Type); + } else if (Target->relocNeedsGot(Type, *Body)) { + SymVA = Out<ELFT>::Got->getEntryAddr(*Body); + if (Body->isTls()) + Type = Target->getTlsGotReloc(Type); + } else if (!Target->needsCopyRel(Type, *Body) && + isa<SharedSymbol<ELFT>>(*Body)) { + continue; + } else if (Target->isTlsDynReloc(Type, *Body) || + Target->isSizeDynReloc(Type, *Body)) { + continue; + } else if (Config->EMachine == EM_MIPS) { + if (Type == R_MIPS_HI16 && Body == Config->MipsGpDisp) + SymVA = getMipsGpAddr<ELFT>() - AddrLoc; + else if (Type == R_MIPS_LO16 && Body == Config->MipsGpDisp) + SymVA = getMipsGpAddr<ELFT>() - AddrLoc + 4; + } + uintX_t Size = getSymSize<ELFT>(*Body); + Target->relocateOne(BufLoc, BufEnd, Type, AddrLoc, SymVA + A, Size + A, + findMipsPairedReloc(Buf, SymIndex, Type, NextRelocs)); + } +} + +template <class ELFT> void InputSection<ELFT>::writeTo(uint8_t *Buf) { + if (this->Header->sh_type == SHT_NOBITS) + return; + // Copy section contents from source object file to output file. + ArrayRef<uint8_t> Data = this->getSectionData(); + memcpy(Buf + OutSecOff, Data.data(), Data.size()); + + ELFFile<ELFT> &EObj = this->File->getObj(); + uint8_t *BufEnd = Buf + OutSecOff + Data.size(); + // Iterate over all relocation sections that apply to this section. + for (const Elf_Shdr *RelSec : this->RelocSections) { + if (RelSec->sh_type == SHT_RELA) + this->relocate(Buf, BufEnd, EObj.relas(RelSec)); + else + this->relocate(Buf, BufEnd, EObj.rels(RelSec)); + } +} + +template <class ELFT> +SplitInputSection<ELFT>::SplitInputSection( + ObjectFile<ELFT> *File, const Elf_Shdr *Header, + typename InputSectionBase<ELFT>::Kind SectionKind) + : InputSectionBase<ELFT>(File, Header, SectionKind) {} + +template <class ELFT> +EHInputSection<ELFT>::EHInputSection(ObjectFile<ELFT> *F, + const Elf_Shdr *Header) + : SplitInputSection<ELFT>(F, Header, InputSectionBase<ELFT>::EHFrame) { + // Mark .eh_frame sections as live by default because there are + // usually no relocations that point to .eh_frames. Otherwise, + // the garbage collector would drop all .eh_frame sections. + this->Live = true; +} + +template <class ELFT> +bool EHInputSection<ELFT>::classof(const InputSectionBase<ELFT> *S) { + return S->SectionKind == InputSectionBase<ELFT>::EHFrame; +} + +template <class ELFT> +typename EHInputSection<ELFT>::uintX_t +EHInputSection<ELFT>::getOffset(uintX_t Offset) { + // The file crtbeginT.o has relocations pointing to the start of an empty + // .eh_frame that is known to be the first in the link. It does that to + // identify the start of the output .eh_frame. Handle this special case. + if (this->getSectionHdr()->sh_size == 0) + return Offset; + std::pair<uintX_t, uintX_t> *I = this->getRangeAndSize(Offset).first; + uintX_t Base = I->second; + if (Base == uintX_t(-1)) + return -1; // Not in the output + + uintX_t Addend = Offset - I->first; + return Base + Addend; +} + +template <class ELFT> +MergeInputSection<ELFT>::MergeInputSection(ObjectFile<ELFT> *F, + const Elf_Shdr *Header) + : SplitInputSection<ELFT>(F, Header, InputSectionBase<ELFT>::Merge) {} + +template <class ELFT> +bool MergeInputSection<ELFT>::classof(const InputSectionBase<ELFT> *S) { + return S->SectionKind == InputSectionBase<ELFT>::Merge; +} + +template <class ELFT> +std::pair<std::pair<typename ELFFile<ELFT>::uintX_t, + typename ELFFile<ELFT>::uintX_t> *, + typename ELFFile<ELFT>::uintX_t> +SplitInputSection<ELFT>::getRangeAndSize(uintX_t Offset) { + ArrayRef<uint8_t> D = this->getSectionData(); + StringRef Data((const char *)D.data(), D.size()); + uintX_t Size = Data.size(); + if (Offset >= Size) + error("Entry is past the end of the section"); + + // Find the element this offset points to. + auto I = std::upper_bound( + Offsets.begin(), Offsets.end(), Offset, + [](const uintX_t &A, const std::pair<uintX_t, uintX_t> &B) { + return A < B.first; + }); + uintX_t End = I == Offsets.end() ? Data.size() : I->first; + --I; + return std::make_pair(&*I, End); +} + +template <class ELFT> +typename MergeInputSection<ELFT>::uintX_t +MergeInputSection<ELFT>::getOffset(uintX_t Offset) { + std::pair<std::pair<uintX_t, uintX_t> *, uintX_t> T = + this->getRangeAndSize(Offset); + std::pair<uintX_t, uintX_t> *I = T.first; + uintX_t End = T.second; + uintX_t Start = I->first; + + // Compute the Addend and if the Base is cached, return. + uintX_t Addend = Offset - Start; + uintX_t &Base = I->second; + if (Base != uintX_t(-1)) + return Base + Addend; + + // Map the base to the offset in the output section and cache it. + ArrayRef<uint8_t> D = this->getSectionData(); + StringRef Data((const char *)D.data(), D.size()); + StringRef Entry = Data.substr(Start, End - Start); + Base = + static_cast<MergeOutputSection<ELFT> *>(this->OutSec)->getOffset(Entry); + return Base + Addend; +} + +template <class ELFT> +MipsReginfoInputSection<ELFT>::MipsReginfoInputSection(ObjectFile<ELFT> *F, + const Elf_Shdr *Header) + : InputSectionBase<ELFT>(F, Header, InputSectionBase<ELFT>::MipsReginfo) {} + +template <class ELFT> +uint32_t MipsReginfoInputSection<ELFT>::getGeneralMask() const { + ArrayRef<uint8_t> D = this->getSectionData(); + if (D.size() != sizeof(Elf_Mips_RegInfo)) + error("Invalid size of .reginfo section"); + return reinterpret_cast<const Elf_Mips_RegInfo *>(D.data())->ri_gprmask; +} + +template <class ELFT> uint32_t MipsReginfoInputSection<ELFT>::getGp0() const { + ArrayRef<uint8_t> D = this->getSectionData(); + if (D.size() != sizeof(Elf_Mips_RegInfo)) + error("Invalid size of .reginfo section"); + return reinterpret_cast<const Elf_Mips_RegInfo *>(D.data())->ri_gp_value; +} + +template <class ELFT> +bool MipsReginfoInputSection<ELFT>::classof(const InputSectionBase<ELFT> *S) { + return S->SectionKind == InputSectionBase<ELFT>::MipsReginfo; +} + +namespace lld { +namespace elf2 { +template class InputSectionBase<object::ELF32LE>; +template class InputSectionBase<object::ELF32BE>; +template class InputSectionBase<object::ELF64LE>; +template class InputSectionBase<object::ELF64BE>; + +template class InputSection<object::ELF32LE>; +template class InputSection<object::ELF32BE>; +template class InputSection<object::ELF64LE>; +template class InputSection<object::ELF64BE>; + +template class EHInputSection<object::ELF32LE>; +template class EHInputSection<object::ELF32BE>; +template class EHInputSection<object::ELF64LE>; +template class EHInputSection<object::ELF64BE>; + +template class MergeInputSection<object::ELF32LE>; +template class MergeInputSection<object::ELF32BE>; +template class MergeInputSection<object::ELF64LE>; +template class MergeInputSection<object::ELF64BE>; + +template class MipsReginfoInputSection<object::ELF32LE>; +template class MipsReginfoInputSection<object::ELF32BE>; +template class MipsReginfoInputSection<object::ELF64LE>; +template class MipsReginfoInputSection<object::ELF64BE>; +} +} diff --git a/ELF/InputSection.h b/ELF/InputSection.h new file mode 100644 index 000000000000..d4dc9864ae91 --- /dev/null +++ b/ELF/InputSection.h @@ -0,0 +1,187 @@ +//===- InputSection.h -------------------------------------------*- C++ -*-===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_ELF_INPUT_SECTION_H +#define LLD_ELF_INPUT_SECTION_H + +#include "Config.h" +#include "lld/Core/LLVM.h" +#include "llvm/Object/ELF.h" + +namespace lld { +namespace elf2 { + +template <class ELFT> class ObjectFile; +template <class ELFT> class OutputSection; +template <class ELFT> class OutputSectionBase; + +// This corresponds to a section of an input file. +template <class ELFT> class InputSectionBase { +protected: + typedef typename llvm::object::ELFFile<ELFT>::Elf_Rel Elf_Rel; + typedef typename llvm::object::ELFFile<ELFT>::Elf_Rela Elf_Rela; + typedef typename llvm::object::ELFFile<ELFT>::Elf_Shdr Elf_Shdr; + typedef typename llvm::object::ELFFile<ELFT>::Elf_Sym Elf_Sym; + typedef typename llvm::object::ELFFile<ELFT>::uintX_t uintX_t; + const Elf_Shdr *Header; + + // The file this section is from. + ObjectFile<ELFT> *File; + +public: + enum Kind { Regular, EHFrame, Merge, MipsReginfo }; + Kind SectionKind; + + InputSectionBase(ObjectFile<ELFT> *File, const Elf_Shdr *Header, + Kind SectionKind); + OutputSectionBase<ELFT> *OutSec = nullptr; + + // Used for garbage collection. + // Live bit makes sense only when Config->GcSections is true. + bool isLive() const { return !Config->GcSections || Live; } + bool Live = false; + + // Returns the size of this section (even if this is a common or BSS.) + size_t getSize() const { return Header->sh_size; } + + static InputSectionBase<ELFT> Discarded; + + StringRef getSectionName() const; + const Elf_Shdr *getSectionHdr() const { return Header; } + ObjectFile<ELFT> *getFile() const { return File; } + + // The writer sets and uses the addresses. + uintX_t getAlign() { + // The ELF spec states that a value of 0 means the section has no alignment + // constraits. + return std::max<uintX_t>(Header->sh_addralign, 1); + } + + uintX_t getOffset(const Elf_Sym &Sym); + + // Translate an offset in the input section to an offset in the output + // section. + uintX_t getOffset(uintX_t Offset); + + ArrayRef<uint8_t> getSectionData() const; + + // Returns a section that Rel is pointing to. Used by the garbage collector. + InputSectionBase<ELFT> *getRelocTarget(const Elf_Rel &Rel); + InputSectionBase<ELFT> *getRelocTarget(const Elf_Rela &Rel); + + template <bool isRela> + using RelIteratorRange = + llvm::iterator_range<const llvm::object::Elf_Rel_Impl<ELFT, isRela> *>; + + template <bool isRela> + void relocate(uint8_t *Buf, uint8_t *BufEnd, RelIteratorRange<isRela> Rels); + +private: + template <bool isRela> + uint8_t *findMipsPairedReloc(uint8_t *Buf, uint32_t SymIndex, uint32_t Type, + RelIteratorRange<isRela> Rels); +}; + +template <class ELFT> +InputSectionBase<ELFT> + InputSectionBase<ELFT>::Discarded(nullptr, nullptr, + InputSectionBase<ELFT>::Regular); + +template <class ELFT> class SplitInputSection : public InputSectionBase<ELFT> { + typedef typename llvm::object::ELFFile<ELFT>::Elf_Shdr Elf_Shdr; + typedef typename llvm::object::ELFFile<ELFT>::uintX_t uintX_t; + +public: + SplitInputSection(ObjectFile<ELFT> *File, const Elf_Shdr *Header, + typename InputSectionBase<ELFT>::Kind SectionKind); + std::vector<std::pair<uintX_t, uintX_t>> Offsets; + std::pair<std::pair<uintX_t, uintX_t> *, uintX_t> + getRangeAndSize(uintX_t Offset); +}; + +// This corresponds to a SHF_MERGE section of an input file. +template <class ELFT> class MergeInputSection : public SplitInputSection<ELFT> { + typedef typename llvm::object::ELFFile<ELFT>::uintX_t uintX_t; + typedef typename llvm::object::ELFFile<ELFT>::Elf_Sym Elf_Sym; + typedef typename llvm::object::ELFFile<ELFT>::Elf_Shdr Elf_Shdr; + +public: + MergeInputSection(ObjectFile<ELFT> *F, const Elf_Shdr *Header); + static bool classof(const InputSectionBase<ELFT> *S); + // Translate an offset in the input section to an offset in the output + // section. + uintX_t getOffset(uintX_t Offset); +}; + +// This corresponds to a .eh_frame section of an input file. +template <class ELFT> class EHInputSection : public SplitInputSection<ELFT> { +public: + typedef typename llvm::object::ELFFile<ELFT>::Elf_Shdr Elf_Shdr; + typedef typename llvm::object::ELFFile<ELFT>::uintX_t uintX_t; + EHInputSection(ObjectFile<ELFT> *F, const Elf_Shdr *Header); + static bool classof(const InputSectionBase<ELFT> *S); + + // Translate an offset in the input section to an offset in the output + // section. + uintX_t getOffset(uintX_t Offset); + + // Relocation section that refer to this one. + const Elf_Shdr *RelocSection = nullptr; +}; + +// This corresponds to a non SHF_MERGE section of an input file. +template <class ELFT> class InputSection : public InputSectionBase<ELFT> { + typedef InputSectionBase<ELFT> Base; + typedef typename llvm::object::ELFFile<ELFT>::Elf_Shdr Elf_Shdr; + typedef typename llvm::object::ELFFile<ELFT>::Elf_Rela Elf_Rela; + typedef typename llvm::object::ELFFile<ELFT>::Elf_Rel Elf_Rel; + typedef typename llvm::object::ELFFile<ELFT>::Elf_Sym Elf_Sym; + typedef typename llvm::object::ELFFile<ELFT>::uintX_t uintX_t; + +public: + InputSection(ObjectFile<ELFT> *F, const Elf_Shdr *Header); + + // Write this section to a mmap'ed file, assuming Buf is pointing to + // beginning of the output section. + void writeTo(uint8_t *Buf); + + // Relocation sections that refer to this one. + SmallVector<const Elf_Shdr *, 1> RelocSections; + + // The offset from beginning of the output sections this section was assigned + // to. The writer sets a value. + uint64_t OutSecOff = 0; + + static bool classof(const InputSectionBase<ELFT> *S); +}; + +// MIPS .reginfo section provides information on the registers used by the code +// in the object file. Linker should collect this information and write a single +// .reginfo section in the output file. The output section contains a union of +// used registers masks taken from input .reginfo sections and final value +// of the `_gp` symbol. For details: Chapter 4 / "Register Information" at +// ftp://www.linux-mips.org/pub/linux/mips/doc/ABI/mipsabi.pdf +template <class ELFT> +class MipsReginfoInputSection : public InputSectionBase<ELFT> { + typedef llvm::object::Elf_Mips_RegInfo<ELFT> Elf_Mips_RegInfo; + typedef typename llvm::object::ELFFile<ELFT>::Elf_Shdr Elf_Shdr; + +public: + MipsReginfoInputSection(ObjectFile<ELFT> *F, const Elf_Shdr *Header); + + uint32_t getGeneralMask() const; + uint32_t getGp0() const; + + static bool classof(const InputSectionBase<ELFT> *S); +}; + +} // namespace elf2 +} // namespace lld + +#endif diff --git a/ELF/LinkerScript.cpp b/ELF/LinkerScript.cpp new file mode 100644 index 000000000000..883b623f9e2c --- /dev/null +++ b/ELF/LinkerScript.cpp @@ -0,0 +1,318 @@ +//===- LinkerScript.cpp ---------------------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the parser/evaluator of the linker script. +// It does not construct an AST but consume linker script directives directly. +// Results are written to Driver or Config object. +// +//===----------------------------------------------------------------------===// + +#include "Config.h" +#include "Driver.h" +#include "SymbolTable.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/Path.h" +#include "llvm/Support/StringSaver.h" + +using namespace llvm; +using namespace lld; +using namespace lld::elf2; + +namespace { +class LinkerScript { +public: + LinkerScript(BumpPtrAllocator *A, StringRef S, bool B) + : Saver(*A), Tokens(tokenize(S)), IsUnderSysroot(B) {} + void run(); + +private: + static std::vector<StringRef> tokenize(StringRef S); + static StringRef skipSpace(StringRef S); + StringRef next(); + bool skip(StringRef Tok); + bool atEOF() { return Tokens.size() == Pos; } + void expect(StringRef Expect); + + void addFile(StringRef Path); + + void readAsNeeded(); + void readEntry(); + void readExtern(); + void readGroup(); + void readInclude(); + void readOutput(); + void readOutputArch(); + void readOutputFormat(); + void readSearchDir(); + void readSections(); + + void readOutputSectionDescription(); + + StringSaver Saver; + std::vector<StringRef> Tokens; + size_t Pos = 0; + bool IsUnderSysroot; +}; +} + +void LinkerScript::run() { + while (!atEOF()) { + StringRef Tok = next(); + if (Tok == ";") + continue; + if (Tok == "ENTRY") { + readEntry(); + } else if (Tok == "EXTERN") { + readExtern(); + } else if (Tok == "GROUP" || Tok == "INPUT") { + readGroup(); + } else if (Tok == "INCLUDE") { + readInclude(); + } else if (Tok == "OUTPUT") { + readOutput(); + } else if (Tok == "OUTPUT_ARCH") { + readOutputArch(); + } else if (Tok == "OUTPUT_FORMAT") { + readOutputFormat(); + } else if (Tok == "SEARCH_DIR") { + readSearchDir(); + } else if (Tok == "SECTIONS") { + readSections(); + } else { + error("unknown directive: " + Tok); + } + } +} + +// Split S into linker script tokens. +std::vector<StringRef> LinkerScript::tokenize(StringRef S) { + std::vector<StringRef> Ret; + for (;;) { + S = skipSpace(S); + if (S.empty()) + return Ret; + + // Quoted token + if (S.startswith("\"")) { + size_t E = S.find("\"", 1); + if (E == StringRef::npos) + error("unclosed quote"); + Ret.push_back(S.substr(1, E)); + S = S.substr(E + 1); + continue; + } + + // Unquoted token + size_t Pos = S.find_first_not_of( + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" + "0123456789_.$/\\~=+[]*?-:"); + // A character that cannot start a word (which is usually a + // punctuation) forms a single character token. + if (Pos == 0) + Pos = 1; + Ret.push_back(S.substr(0, Pos)); + S = S.substr(Pos); + } +} + +// Skip leading whitespace characters or /**/-style comments. +StringRef LinkerScript::skipSpace(StringRef S) { + for (;;) { + if (S.startswith("/*")) { + size_t E = S.find("*/", 2); + if (E == StringRef::npos) + error("unclosed comment in a linker script"); + S = S.substr(E + 2); + continue; + } + size_t Size = S.size(); + S = S.ltrim(); + if (S.size() == Size) + return S; + } +} + +StringRef LinkerScript::next() { + if (atEOF()) + error("unexpected EOF"); + return Tokens[Pos++]; +} + +bool LinkerScript::skip(StringRef Tok) { + if (atEOF()) + error("unexpected EOF"); + if (Tok != Tokens[Pos]) + return false; + ++Pos; + return true; +} + +void LinkerScript::expect(StringRef Expect) { + StringRef Tok = next(); + if (Tok != Expect) + error(Expect + " expected, but got " + Tok); +} + +void LinkerScript::addFile(StringRef S) { + if (IsUnderSysroot && S.startswith("/")) { + SmallString<128> Path; + (Config->Sysroot + S).toStringRef(Path); + if (sys::fs::exists(Path)) { + Driver->addFile(Saver.save(Path.str())); + return; + } + } + + if (sys::path::is_absolute(S)) { + Driver->addFile(S); + } else if (S.startswith("=")) { + if (Config->Sysroot.empty()) + Driver->addFile(S.substr(1)); + else + Driver->addFile(Saver.save(Config->Sysroot + "/" + S.substr(1))); + } else if (S.startswith("-l")) { + Driver->addFile(searchLibrary(S.substr(2))); + } else if (sys::fs::exists(S)) { + Driver->addFile(S); + } else { + std::string Path = findFromSearchPaths(S); + if (Path.empty()) + error("Unable to find " + S); + Driver->addFile(Saver.save(Path)); + } +} + +void LinkerScript::readAsNeeded() { + expect("("); + bool Orig = Config->AsNeeded; + Config->AsNeeded = true; + for (;;) { + StringRef Tok = next(); + if (Tok == ")") + break; + addFile(Tok); + } + Config->AsNeeded = Orig; +} + +void LinkerScript::readEntry() { + // -e <symbol> takes predecence over ENTRY(<symbol>). + expect("("); + StringRef Tok = next(); + if (Config->Entry.empty()) + Config->Entry = Tok; + expect(")"); +} + +void LinkerScript::readExtern() { + expect("("); + for (;;) { + StringRef Tok = next(); + if (Tok == ")") + return; + Config->Undefined.push_back(Tok); + } +} + +void LinkerScript::readGroup() { + expect("("); + for (;;) { + StringRef Tok = next(); + if (Tok == ")") + return; + if (Tok == "AS_NEEDED") { + readAsNeeded(); + continue; + } + addFile(Tok); + } +} + +void LinkerScript::readInclude() { + StringRef Tok = next(); + auto MBOrErr = MemoryBuffer::getFile(Tok); + error(MBOrErr, "cannot open " + Tok); + std::unique_ptr<MemoryBuffer> &MB = *MBOrErr; + StringRef S = Saver.save(MB->getMemBufferRef().getBuffer()); + std::vector<StringRef> V = tokenize(S); + Tokens.insert(Tokens.begin() + Pos, V.begin(), V.end()); +} + +void LinkerScript::readOutput() { + // -o <file> takes predecence over OUTPUT(<file>). + expect("("); + StringRef Tok = next(); + if (Config->OutputFile.empty()) + Config->OutputFile = Tok; + expect(")"); +} + +void LinkerScript::readOutputArch() { + // Error checking only for now. + expect("("); + next(); + expect(")"); +} + +void LinkerScript::readOutputFormat() { + // Error checking only for now. + expect("("); + next(); + StringRef Tok = next(); + if (Tok == ")") + return; + if (Tok != ",") + error("unexpected token: " + Tok); + next(); + expect(","); + next(); + expect(")"); +} + +void LinkerScript::readSearchDir() { + expect("("); + Config->SearchPaths.push_back(next()); + expect(")"); +} + +void LinkerScript::readSections() { + expect("{"); + while (!skip("}")) + readOutputSectionDescription(); +} + +void LinkerScript::readOutputSectionDescription() { + StringRef Name = next(); + std::vector<StringRef> &InputSections = Config->OutputSections[Name]; + + expect(":"); + expect("{"); + while (!skip("}")) { + next(); // Skip input file name. + expect("("); + while (!skip(")")) + InputSections.push_back(next()); + } +} + +static bool isUnderSysroot(StringRef Path) { + if (Config->Sysroot == "") + return false; + for (; !Path.empty(); Path = sys::path::parent_path(Path)) + if (sys::fs::equivalent(Config->Sysroot, Path)) + return true; + return false; +} + +// Entry point. The other functions or classes are private to this file. +void lld::elf2::readLinkerScript(BumpPtrAllocator *A, MemoryBufferRef MB) { + StringRef Path = MB.getBufferIdentifier(); + LinkerScript(A, MB.getBuffer(), isUnderSysroot(Path)).run(); +} diff --git a/ELF/MarkLive.cpp b/ELF/MarkLive.cpp new file mode 100644 index 000000000000..1ad9b01af4e8 --- /dev/null +++ b/ELF/MarkLive.cpp @@ -0,0 +1,131 @@ +//===- MarkLive.cpp -------------------------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements --gc-sections, which is a feature to remove unused +// sections from output. Unused sections are sections that are not reachable +// from known GC-root symbols or sections. Naturally the feature is +// implemented as a mark-sweep garbage collector. +// +// Here's how it works. Each InputSectionBase has a "Live" bit. The bit is off +// by default. Starting with GC-root symbols or sections, markLive function +// defined in this file visits all reachable sections to set their Live +// bits. Writer will then ignore sections whose Live bits are off, so that +// such sections are removed from output. +// +//===----------------------------------------------------------------------===// + +#include "InputSection.h" +#include "OutputSections.h" +#include "SymbolTable.h" +#include "Symbols.h" +#include "Writer.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Object/ELF.h" +#include <functional> +#include <vector> + +using namespace llvm; +using namespace llvm::ELF; +using namespace llvm::object; + +using namespace lld; +using namespace lld::elf2; + +// Calls Fn for each section that Sec refers to. +template <class ELFT> +static void forEachSuccessor(InputSection<ELFT> *Sec, + std::function<void(InputSectionBase<ELFT> *)> Fn) { + typedef typename ELFFile<ELFT>::Elf_Rel Elf_Rel; + typedef typename ELFFile<ELFT>::Elf_Rela Elf_Rela; + typedef typename ELFFile<ELFT>::Elf_Shdr Elf_Shdr; + + ELFFile<ELFT> &Obj = Sec->getFile()->getObj(); + for (const Elf_Shdr *RelSec : Sec->RelocSections) { + if (RelSec->sh_type == SHT_RELA) { + for (const Elf_Rela &RI : Obj.relas(RelSec)) + if (InputSectionBase<ELFT> *Succ = Sec->getRelocTarget(RI)) + Fn(Succ); + } else { + for (const Elf_Rel &RI : Obj.rels(RelSec)) + if (InputSectionBase<ELFT> *Succ = Sec->getRelocTarget(RI)) + Fn(Succ); + } + } +} + +// Sections listed below are special because they are used by the loader +// just by being in an ELF file. They should not be garbage-collected. +template <class ELFT> static bool isReserved(InputSectionBase<ELFT> *Sec) { + switch (Sec->getSectionHdr()->sh_type) { + case SHT_FINI_ARRAY: + case SHT_INIT_ARRAY: + case SHT_NOTE: + case SHT_PREINIT_ARRAY: + return true; + default: + StringRef S = Sec->getSectionName(); + return S.startswith(".ctors") || S.startswith(".dtors") || + S.startswith(".init") || S.startswith(".fini") || + S.startswith(".jcr"); + } +} + +// This is the main function of the garbage collector. +// Starting from GC-root sections, this function visits all reachable +// sections to set their "Live" bits. +template <class ELFT> void lld::elf2::markLive(SymbolTable<ELFT> *Symtab) { + SmallVector<InputSection<ELFT> *, 256> Q; + + auto Enqueue = [&](InputSectionBase<ELFT> *Sec) { + if (!Sec || Sec->Live) + return; + Sec->Live = true; + if (InputSection<ELFT> *S = dyn_cast<InputSection<ELFT>>(Sec)) + Q.push_back(S); + }; + + auto MarkSymbol = [&](SymbolBody *Sym) { + if (Sym) + if (auto *D = dyn_cast<DefinedRegular<ELFT>>(Sym->repl())) + Enqueue(D->Section); + }; + + // Add GC root symbols. + MarkSymbol(Config->EntrySym); + MarkSymbol(Symtab->find(Config->Init)); + MarkSymbol(Symtab->find(Config->Fini)); + for (StringRef S : Config->Undefined) + MarkSymbol(Symtab->find(S)); + + // Preserve externally-visible symbols if the symbols defined by this + // file could override other ELF file's symbols at runtime. + if (Config->Shared || Config->ExportDynamic) { + for (const std::pair<StringRef, Symbol *> &P : Symtab->getSymbols()) { + SymbolBody *B = P.second->Body; + if (B->getVisibility() == STV_DEFAULT) + MarkSymbol(B); + } + } + + // Preserve special sections. + for (const std::unique_ptr<ObjectFile<ELFT>> &F : Symtab->getObjectFiles()) + for (InputSectionBase<ELFT> *Sec : F->getSections()) + if (Sec && Sec != &InputSection<ELFT>::Discarded) + if (isReserved(Sec)) + Enqueue(Sec); + + // Mark all reachable sections. + while (!Q.empty()) + forEachSuccessor<ELFT>(Q.pop_back_val(), Enqueue); +} + +template void lld::elf2::markLive<ELF32LE>(SymbolTable<ELF32LE> *); +template void lld::elf2::markLive<ELF32BE>(SymbolTable<ELF32BE> *); +template void lld::elf2::markLive<ELF64LE>(SymbolTable<ELF64LE> *); +template void lld::elf2::markLive<ELF64BE>(SymbolTable<ELF64BE> *); diff --git a/ELF/Options.td b/ELF/Options.td new file mode 100644 index 000000000000..622cbb93bf11 --- /dev/null +++ b/ELF/Options.td @@ -0,0 +1,161 @@ +include "llvm/Option/OptParser.td" + +def Bsymbolic: Flag<["-"], "Bsymbolic">, + HelpText<"Bind defined symbols locally">; + +def Bdynamic: Flag<["-"], "Bdynamic">, + HelpText<"Link against shared libraries">; + +def Bstatic: Flag<["-"], "Bstatic">, + HelpText<"Do not link against shared libraries">; + +def L : JoinedOrSeparate<["-"], "L">, MetaVarName<"<dir>">, + HelpText<"Directory to search for libraries">; + +def O : Joined<["-"], "O">, HelpText<"Optimize">; + +def allow_multiple_definition: Flag<["--"], "allow-multiple-definition">, + HelpText<"Allow multiple definitions">; + +def allow_shlib_undefined : Flag<["--", "-"], "allow-shlib-undefined">; + +def as_needed : Flag<["--"], "as-needed">; + +def disable_new_dtags : Flag<["--"], "disable-new-dtags">, + HelpText<"Disable new dynamic tags">; + +def discard_all : Flag<["-"], "discard-all">, + HelpText<"Delete all local symbols">; + +def discard_locals : Flag<["-"], "discard-locals">, + HelpText<"Delete temporary local symbols">; + +def discard_none : Flag<["-"], "discard-none">, + HelpText<"Keep all symbols in the symbol table">; + +def dynamic_linker : Separate<["--", "-"], "dynamic-linker">, + HelpText<"Which dynamic linker to use">; + +def enable_new_dtags : Flag<["--"], "enable-new-dtags">, + HelpText<"Enable new dynamic tags">; + +def entry : Separate<["--", "-"], "entry">, MetaVarName<"<entry>">, + HelpText<"Name of entry point symbol">; + +def export_dynamic : Flag<["--", "-"], "export-dynamic">, + HelpText<"Put symbols in the dynamic symbol table">; + +def fini : Separate<["-"], "fini">, MetaVarName<"<symbol>">, + HelpText<"Specify a finalizer function">; + +def hash_style : Separate<["--", "-"], "hash-style">, + HelpText<"Specify hash style (sysv, gnu or both)">; + +def gc_sections : Flag<["--"], "gc-sections">, + HelpText<"Enable garbage collection of unused sections">; + +def init : Separate<["-"], "init">, MetaVarName<"<symbol>">, + HelpText<"Specify an initializer function">; + +def l : JoinedOrSeparate<["-"], "l">, MetaVarName<"<libName>">, + HelpText<"Root name of library to use">; + +def m : JoinedOrSeparate<["-"], "m">, + HelpText<"Set target emulation">; + +def no_allow_shlib_undefined : Flag<["--"], "no-allow-shlib-undefined">; + +def no_as_needed : Flag<["--"], "no-as-needed">; + +def no_whole_archive : Flag<["--", "-"], "no-whole-archive">, + HelpText<"Restores the default behavior of loading archive members">; + +def noinhibit_exec : Flag<["--"], "noinhibit-exec">, + HelpText<"Retain the executable output file whenever it is still usable">; + +def no_undefined : Flag<["--"], "no-undefined">, + HelpText<"Report unresolved symbols even if the linker is creating a shared library">; + +def o : Separate<["-"], "o">, MetaVarName<"<path>">, + HelpText<"Path to file to write output">; + +def print_gc_sections: Flag<["--"], "print-gc-sections">, + HelpText<"List removed unused sections">; + +def rpath : Separate<["-"], "rpath">, + HelpText<"Add a DT_RUNPATH to the output">; + +def relocatable : Flag<["--"], "relocatable">; + +def script : Separate<["--"], "script">, HelpText<"Read linker script">; + +def shared : Flag<["-"], "shared">, + HelpText<"Build a shared object">; + +def soname : Joined<["-"], "soname=">, + HelpText<"Set DT_SONAME">; + +def strip_all : Flag<["--"], "strip-all">, + HelpText<"Strip all symbols">; + +def sysroot : Joined<["--"], "sysroot=">, + HelpText<"Set the system root">; + +def undefined : Joined<["--"], "undefined=">, + HelpText<"Force undefined symbol during linking">; + +def verbose : Flag<["--"], "verbose">; + +def whole_archive : Flag<["--", "-"], "whole-archive">, + HelpText<"Force load of all members in a static library">; + +def z : JoinedOrSeparate<["-"], "z">, MetaVarName<"<option>">, + HelpText<"Linker option extensions">; + +// Aliases +def alias_Bdynamic_call_shared: Flag<["-"], "call_shared">, Alias<Bdynamic>; +def alias_Bdynamic_dy: Flag<["-"], "dy">, Alias<Bdynamic>; +def alias_Bstatic_dn: Flag<["-"], "dn">, Alias<Bstatic>; +def alias_Bstatic_non_shared: Flag<["-"], "non_shared">, Alias<Bstatic>; +def alias_Bstatic_static: Flag<["-"], "static">, Alias<Bstatic>; +def alias_L__library_path : Joined<["--"], "library-path=">, Alias<L>; +def alias_discard_all_x: Flag<["-"], "x">, Alias<discard_all>; +def alias_discard_locals_X: Flag<["-"], "X">, Alias<discard_locals>; +def alias_entry_e : Separate<["-"], "e">, Alias<entry>; +def alias_export_dynamic_E: Flag<["-"], "E">, Alias<export_dynamic>; +def alias_fini_fini : Joined<["-"], "fini=">, Alias<fini>; +def alias_hash_style_hash_style : Joined<["--", "-"], "hash-style=">, Alias<hash_style>; +def alias_init_init : Joined<["-"], "init=">, Alias<init>; +def alias_l__library : Joined<["--"], "library=">, Alias<l>; +def alias_o_output : Joined<["--"], "output=">, Alias<o>; +def alias_rpath_rpath : Joined<["-"], "rpath=">, Alias<rpath>; +def alias_relocatable_r : Flag<["-"], "r">, Alias<relocatable>; +def alias_shared_Bshareable : Flag<["-"], "Bshareable">, Alias<shared>; +def alias_soname_h : Separate<["-"], "h">, Alias<soname>; +def alias_soname_soname : Separate<["-"], "soname">, Alias<soname>; +def alias_script_T : Separate<["-"], "T">, Alias<script>; +def alias_strip_all: Flag<["-"], "s">, Alias<strip_all>; +def alias_undefined_u : Separate<["-"], "u">, Alias<undefined>; + +// Our symbol resolution algorithm handles symbols in archive files differently +// than traditional linkers, so we don't need --start-group and --end-group. +// These options are recongized for compatibility but ignored. +def end_group : Flag<["--"], "end-group">; +def end_group_paren: Flag<["-"], ")">; +def start_group : Flag<["--"], "start-group">; +def start_group_paren: Flag<["-"], "(">; + +// Options listed below are silently ignored for now for compatibility. +def build_id : Flag<["--"], "build-id">; +def eh_frame_hdr : Flag<["--"], "eh-frame-hdr">; +def fatal_warnings : Flag<["--"], "fatal-warnings">; +def no_add_needed : Flag<["--"], "no-add-needed">; +def no_fatal_warnings : Flag<["--"], "no-fatal-warnings">; +def no_warn_mismatch : Flag<["--"], "no-warn-mismatch">; +def version_script : Separate<["--"], "version-script">; +def warn_common : Flag<["--"], "warn-common">; +def warn_shared_textrel : Flag<["--"], "warn-shared-textrel">; +def G : Separate<["-"], "G">; + +// Aliases for ignored options +def alias_version_script_version_script : Joined<["--"], "version-script=">, Alias<version_script>; diff --git a/ELF/OutputSections.cpp b/ELF/OutputSections.cpp new file mode 100644 index 000000000000..8bdc2b0e5c4f --- /dev/null +++ b/ELF/OutputSections.cpp @@ -0,0 +1,1534 @@ +//===- OutputSections.cpp -------------------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "OutputSections.h" +#include "Config.h" +#include "SymbolTable.h" +#include "Target.h" +#include "llvm/Support/MathExtras.h" + +using namespace llvm; +using namespace llvm::object; +using namespace llvm::support::endian; +using namespace llvm::ELF; + +using namespace lld; +using namespace lld::elf2; + +bool lld::elf2::HasGotOffRel = false; + +template <class ELFT> +OutputSectionBase<ELFT>::OutputSectionBase(StringRef Name, uint32_t sh_type, + uintX_t sh_flags) + : Name(Name) { + memset(&Header, 0, sizeof(Elf_Shdr)); + Header.sh_type = sh_type; + Header.sh_flags = sh_flags; +} + +template <class ELFT> +GotPltSection<ELFT>::GotPltSection() + : OutputSectionBase<ELFT>(".got.plt", llvm::ELF::SHT_PROGBITS, + llvm::ELF::SHF_ALLOC | llvm::ELF::SHF_WRITE) { + this->Header.sh_addralign = sizeof(uintX_t); +} + +template <class ELFT> void GotPltSection<ELFT>::addEntry(SymbolBody *Sym) { + Sym->GotPltIndex = Target->getGotPltHeaderEntriesNum() + Entries.size(); + Entries.push_back(Sym); +} + +template <class ELFT> bool GotPltSection<ELFT>::empty() const { + return Entries.empty(); +} + +template <class ELFT> +typename GotPltSection<ELFT>::uintX_t +GotPltSection<ELFT>::getEntryAddr(const SymbolBody &B) const { + return this->getVA() + B.GotPltIndex * sizeof(uintX_t); +} + +template <class ELFT> void GotPltSection<ELFT>::finalize() { + this->Header.sh_size = + (Target->getGotPltHeaderEntriesNum() + Entries.size()) * sizeof(uintX_t); +} + +template <class ELFT> void GotPltSection<ELFT>::writeTo(uint8_t *Buf) { + Target->writeGotPltHeaderEntries(Buf); + Buf += Target->getGotPltHeaderEntriesNum() * sizeof(uintX_t); + for (const SymbolBody *B : Entries) { + Target->writeGotPltEntry(Buf, Out<ELFT>::Plt->getEntryAddr(*B)); + Buf += sizeof(uintX_t); + } +} + +template <class ELFT> +GotSection<ELFT>::GotSection() + : OutputSectionBase<ELFT>(".got", llvm::ELF::SHT_PROGBITS, + llvm::ELF::SHF_ALLOC | llvm::ELF::SHF_WRITE) { + if (Config->EMachine == EM_MIPS) + this->Header.sh_flags |= llvm::ELF::SHF_MIPS_GPREL; + this->Header.sh_addralign = sizeof(uintX_t); +} + +template <class ELFT> void GotSection<ELFT>::addEntry(SymbolBody *Sym) { + Sym->GotIndex = Target->getGotHeaderEntriesNum() + Entries.size(); + Entries.push_back(Sym); +} + +template <class ELFT> bool GotSection<ELFT>::addDynTlsEntry(SymbolBody *Sym) { + if (Sym->hasGlobalDynIndex()) + return false; + Sym->GlobalDynIndex = Target->getGotHeaderEntriesNum() + Entries.size(); + // Global Dynamic TLS entries take two GOT slots. + Entries.push_back(Sym); + Entries.push_back(nullptr); + return true; +} + +template <class ELFT> bool GotSection<ELFT>::addCurrentModuleTlsIndex() { + if (LocalTlsIndexOff != uint32_t(-1)) + return false; + Entries.push_back(nullptr); + Entries.push_back(nullptr); + LocalTlsIndexOff = (Entries.size() - 2) * sizeof(uintX_t); + return true; +} + +template <class ELFT> +typename GotSection<ELFT>::uintX_t +GotSection<ELFT>::getEntryAddr(const SymbolBody &B) const { + return this->getVA() + B.GotIndex * sizeof(uintX_t); +} + +template <class ELFT> +typename GotSection<ELFT>::uintX_t +GotSection<ELFT>::getGlobalDynAddr(const SymbolBody &B) const { + return this->getVA() + B.GlobalDynIndex * sizeof(uintX_t); +} + +template <class ELFT> +const SymbolBody *GotSection<ELFT>::getMipsFirstGlobalEntry() const { + return Entries.empty() ? nullptr : Entries.front(); +} + +template <class ELFT> +unsigned GotSection<ELFT>::getMipsLocalEntriesNum() const { + // TODO: Update when the suppoort of GOT entries for local symbols is added. + return Target->getGotHeaderEntriesNum(); +} + +template <class ELFT> void GotSection<ELFT>::finalize() { + this->Header.sh_size = + (Target->getGotHeaderEntriesNum() + Entries.size()) * sizeof(uintX_t); +} + +template <class ELFT> void GotSection<ELFT>::writeTo(uint8_t *Buf) { + Target->writeGotHeaderEntries(Buf); + Buf += Target->getGotHeaderEntriesNum() * sizeof(uintX_t); + for (const SymbolBody *B : Entries) { + uint8_t *Entry = Buf; + Buf += sizeof(uintX_t); + if (!B) + continue; + // MIPS has special rules to fill up GOT entries. + // See "Global Offset Table" in Chapter 5 in the following document + // for detailed description: + // ftp://www.linux-mips.org/pub/linux/mips/doc/ABI/mipsabi.pdf + // As the first approach, we can just store addresses for all symbols. + if (Config->EMachine != EM_MIPS && canBePreempted(B, false)) + continue; // The dynamic linker will take care of it. + uintX_t VA = getSymVA<ELFT>(*B); + write<uintX_t, ELFT::TargetEndianness, sizeof(uintX_t)>(Entry, VA); + } +} + +template <class ELFT> +PltSection<ELFT>::PltSection() + : OutputSectionBase<ELFT>(".plt", llvm::ELF::SHT_PROGBITS, + llvm::ELF::SHF_ALLOC | llvm::ELF::SHF_EXECINSTR) { + this->Header.sh_addralign = 16; +} + +template <class ELFT> void PltSection<ELFT>::writeTo(uint8_t *Buf) { + size_t Off = 0; + bool LazyReloc = Target->supportsLazyRelocations(); + if (LazyReloc) { + // First write PLT[0] entry which is special. + Target->writePltZeroEntry(Buf, Out<ELFT>::GotPlt->getVA(), this->getVA()); + Off += Target->getPltZeroEntrySize(); + } + for (auto &I : Entries) { + const SymbolBody *E = I.first; + unsigned RelOff = I.second; + uint64_t GotVA = + LazyReloc ? Out<ELFT>::GotPlt->getVA() : Out<ELFT>::Got->getVA(); + uint64_t GotE = LazyReloc ? Out<ELFT>::GotPlt->getEntryAddr(*E) + : Out<ELFT>::Got->getEntryAddr(*E); + uint64_t Plt = this->getVA() + Off; + Target->writePltEntry(Buf + Off, GotVA, GotE, Plt, E->PltIndex, RelOff); + Off += Target->getPltEntrySize(); + } +} + +template <class ELFT> void PltSection<ELFT>::addEntry(SymbolBody *Sym) { + Sym->PltIndex = Entries.size(); + unsigned RelOff = Target->supportsLazyRelocations() + ? Out<ELFT>::RelaPlt->getRelocOffset() + : Out<ELFT>::RelaDyn->getRelocOffset(); + Entries.push_back(std::make_pair(Sym, RelOff)); +} + +template <class ELFT> +typename PltSection<ELFT>::uintX_t +PltSection<ELFT>::getEntryAddr(const SymbolBody &B) const { + return this->getVA() + Target->getPltZeroEntrySize() + + B.PltIndex * Target->getPltEntrySize(); +} + +template <class ELFT> void PltSection<ELFT>::finalize() { + this->Header.sh_size = Target->getPltZeroEntrySize() + + Entries.size() * Target->getPltEntrySize(); +} + +template <class ELFT> +RelocationSection<ELFT>::RelocationSection(StringRef Name, bool IsRela) + : OutputSectionBase<ELFT>(Name, + IsRela ? llvm::ELF::SHT_RELA : llvm::ELF::SHT_REL, + llvm::ELF::SHF_ALLOC), + IsRela(IsRela) { + this->Header.sh_entsize = IsRela ? sizeof(Elf_Rela) : sizeof(Elf_Rel); + this->Header.sh_addralign = ELFT::Is64Bits ? 8 : 4; +} + +// Applies corresponding symbol and type for dynamic tls relocation. +// Returns true if relocation was handled. +template <class ELFT> +bool RelocationSection<ELFT>::applyTlsDynamicReloc(SymbolBody *Body, + uint32_t Type, Elf_Rel *P, + Elf_Rel *N) { + if (Target->isTlsLocalDynamicReloc(Type)) { + P->setSymbolAndType(0, Target->getTlsModuleIndexReloc(), Config->Mips64EL); + P->r_offset = Out<ELFT>::Got->getLocalTlsIndexVA(); + return true; + } + + if (!Body || !Target->isTlsGlobalDynamicReloc(Type)) + return false; + + if (Target->isTlsOptimized(Type, Body)) { + P->setSymbolAndType(Body->DynamicSymbolTableIndex, + Target->getTlsGotReloc(), Config->Mips64EL); + P->r_offset = Out<ELFT>::Got->getEntryAddr(*Body); + return true; + } + + P->setSymbolAndType(Body->DynamicSymbolTableIndex, + Target->getTlsModuleIndexReloc(), Config->Mips64EL); + P->r_offset = Out<ELFT>::Got->getGlobalDynAddr(*Body); + N->setSymbolAndType(Body->DynamicSymbolTableIndex, + Target->getTlsOffsetReloc(), Config->Mips64EL); + N->r_offset = Out<ELFT>::Got->getGlobalDynAddr(*Body) + sizeof(uintX_t); + return true; +} + +template <class ELFT> void RelocationSection<ELFT>::writeTo(uint8_t *Buf) { + const unsigned EntrySize = IsRela ? sizeof(Elf_Rela) : sizeof(Elf_Rel); + for (const DynamicReloc<ELFT> &Rel : Relocs) { + auto *P = reinterpret_cast<Elf_Rel *>(Buf); + Buf += EntrySize; + + // Skip placeholder for global dynamic TLS relocation pair. It was already + // handled by the previous relocation. + if (!Rel.C || !Rel.RI) + continue; + + InputSectionBase<ELFT> &C = *Rel.C; + const Elf_Rel &RI = *Rel.RI; + uint32_t SymIndex = RI.getSymbol(Config->Mips64EL); + const ObjectFile<ELFT> &File = *C.getFile(); + SymbolBody *Body = File.getSymbolBody(SymIndex); + if (Body) + Body = Body->repl(); + + uint32_t Type = RI.getType(Config->Mips64EL); + if (applyTlsDynamicReloc(Body, Type, P, reinterpret_cast<Elf_Rel *>(Buf))) + continue; + bool NeedsCopy = Body && Target->needsCopyRel(Type, *Body); + bool NeedsGot = Body && Target->relocNeedsGot(Type, *Body); + bool CanBePreempted = canBePreempted(Body, NeedsGot); + bool LazyReloc = Body && Target->supportsLazyRelocations() && + Target->relocNeedsPlt(Type, *Body); + bool IsDynRelative = Type == Target->getRelativeReloc(); + + unsigned Sym = CanBePreempted ? Body->DynamicSymbolTableIndex : 0; + unsigned Reloc; + if (!CanBePreempted && Body && isGnuIFunc<ELFT>(*Body)) + Reloc = Target->getIRelativeReloc(); + else if (!CanBePreempted || IsDynRelative) + Reloc = Target->getRelativeReloc(); + else if (LazyReloc) + Reloc = Target->getPltReloc(); + else if (NeedsGot) + Reloc = Body->isTls() ? Target->getTlsGotReloc() : Target->getGotReloc(); + else if (NeedsCopy) + Reloc = Target->getCopyReloc(); + else + Reloc = Target->getDynReloc(Type); + P->setSymbolAndType(Sym, Reloc, Config->Mips64EL); + + if (LazyReloc) + P->r_offset = Out<ELFT>::GotPlt->getEntryAddr(*Body); + else if (NeedsGot) + P->r_offset = Out<ELFT>::Got->getEntryAddr(*Body); + else if (NeedsCopy) + P->r_offset = Out<ELFT>::Bss->getVA() + + dyn_cast<SharedSymbol<ELFT>>(Body)->OffsetInBSS; + else + P->r_offset = C.getOffset(RI.r_offset) + C.OutSec->getVA(); + + uintX_t OrigAddend = 0; + if (IsRela && !NeedsGot) + OrigAddend = static_cast<const Elf_Rela &>(RI).r_addend; + + uintX_t Addend; + if (NeedsCopy) + Addend = 0; + else if (CanBePreempted || IsDynRelative) + Addend = OrigAddend; + else if (Body) + Addend = getSymVA<ELFT>(*Body) + OrigAddend; + else if (IsRela) + Addend = + getLocalRelTarget(File, static_cast<const Elf_Rela &>(RI), + getAddend<ELFT>(static_cast<const Elf_Rela &>(RI))); + else + Addend = getLocalRelTarget(File, RI, 0); + + if (IsRela) + static_cast<Elf_Rela *>(P)->r_addend = Addend; + } +} + +template <class ELFT> unsigned RelocationSection<ELFT>::getRelocOffset() { + const unsigned EntrySize = IsRela ? sizeof(Elf_Rela) : sizeof(Elf_Rel); + return EntrySize * Relocs.size(); +} + +template <class ELFT> void RelocationSection<ELFT>::finalize() { + this->Header.sh_link = Static ? Out<ELFT>::SymTab->SectionIndex + : Out<ELFT>::DynSymTab->SectionIndex; + this->Header.sh_size = Relocs.size() * this->Header.sh_entsize; +} + +template <class ELFT> +InterpSection<ELFT>::InterpSection() + : OutputSectionBase<ELFT>(".interp", llvm::ELF::SHT_PROGBITS, + llvm::ELF::SHF_ALLOC) { + this->Header.sh_size = Config->DynamicLinker.size() + 1; + this->Header.sh_addralign = 1; +} + +template <class ELFT> +void OutputSectionBase<ELFT>::writeHeaderTo(Elf_Shdr *SHdr) { + Header.sh_name = Out<ELFT>::ShStrTab->getOffset(Name); + *SHdr = Header; +} + +template <class ELFT> void InterpSection<ELFT>::writeTo(uint8_t *Buf) { + memcpy(Buf, Config->DynamicLinker.data(), Config->DynamicLinker.size()); +} + +template <class ELFT> +HashTableSection<ELFT>::HashTableSection() + : OutputSectionBase<ELFT>(".hash", llvm::ELF::SHT_HASH, + llvm::ELF::SHF_ALLOC) { + this->Header.sh_entsize = sizeof(Elf_Word); + this->Header.sh_addralign = sizeof(Elf_Word); +} + +static uint32_t hashSysv(StringRef Name) { + uint32_t H = 0; + for (char C : Name) { + H = (H << 4) + C; + uint32_t G = H & 0xf0000000; + if (G) + H ^= G >> 24; + H &= ~G; + } + return H; +} + +template <class ELFT> void HashTableSection<ELFT>::finalize() { + this->Header.sh_link = Out<ELFT>::DynSymTab->SectionIndex; + + unsigned NumEntries = 2; // nbucket and nchain. + NumEntries += Out<ELFT>::DynSymTab->getNumSymbols(); // The chain entries. + + // Create as many buckets as there are symbols. + // FIXME: This is simplistic. We can try to optimize it, but implementing + // support for SHT_GNU_HASH is probably even more profitable. + NumEntries += Out<ELFT>::DynSymTab->getNumSymbols(); + this->Header.sh_size = NumEntries * sizeof(Elf_Word); +} + +template <class ELFT> void HashTableSection<ELFT>::writeTo(uint8_t *Buf) { + unsigned NumSymbols = Out<ELFT>::DynSymTab->getNumSymbols(); + auto *P = reinterpret_cast<Elf_Word *>(Buf); + *P++ = NumSymbols; // nbucket + *P++ = NumSymbols; // nchain + + Elf_Word *Buckets = P; + Elf_Word *Chains = P + NumSymbols; + + for (SymbolBody *Body : Out<ELFT>::DynSymTab->getSymbols()) { + StringRef Name = Body->getName(); + unsigned I = Body->DynamicSymbolTableIndex; + uint32_t Hash = hashSysv(Name) % NumSymbols; + Chains[I] = Buckets[Hash]; + Buckets[Hash] = I; + } +} + +static uint32_t hashGnu(StringRef Name) { + uint32_t H = 5381; + for (uint8_t C : Name) + H = (H << 5) + H + C; + return H; +} + +template <class ELFT> +GnuHashTableSection<ELFT>::GnuHashTableSection() + : OutputSectionBase<ELFT>(".gnu.hash", llvm::ELF::SHT_GNU_HASH, + llvm::ELF::SHF_ALLOC) { + this->Header.sh_entsize = ELFT::Is64Bits ? 0 : 4; + this->Header.sh_addralign = ELFT::Is64Bits ? 8 : 4; +} + +template <class ELFT> +unsigned GnuHashTableSection<ELFT>::calcNBuckets(unsigned NumHashed) { + if (!NumHashed) + return 0; + + // These values are prime numbers which are not greater than 2^(N-1) + 1. + // In result, for any particular NumHashed we return a prime number + // which is not greater than NumHashed. + static const unsigned Primes[] = { + 1, 1, 3, 3, 7, 13, 31, 61, 127, 251, + 509, 1021, 2039, 4093, 8191, 16381, 32749, 65521, 131071}; + + return Primes[std::min<unsigned>(Log2_32_Ceil(NumHashed), + array_lengthof(Primes) - 1)]; +} + +// Bloom filter estimation: at least 8 bits for each hashed symbol. +// GNU Hash table requirement: it should be a power of 2, +// the minimum value is 1, even for an empty table. +// Expected results for a 32-bit target: +// calcMaskWords(0..4) = 1 +// calcMaskWords(5..8) = 2 +// calcMaskWords(9..16) = 4 +// For a 64-bit target: +// calcMaskWords(0..8) = 1 +// calcMaskWords(9..16) = 2 +// calcMaskWords(17..32) = 4 +template <class ELFT> +unsigned GnuHashTableSection<ELFT>::calcMaskWords(unsigned NumHashed) { + if (!NumHashed) + return 1; + return NextPowerOf2((NumHashed - 1) / sizeof(Elf_Off)); +} + +template <class ELFT> void GnuHashTableSection<ELFT>::finalize() { + unsigned NumHashed = HashedSymbols.size(); + NBuckets = calcNBuckets(NumHashed); + MaskWords = calcMaskWords(NumHashed); + // Second hash shift estimation: just predefined values. + Shift2 = ELFT::Is64Bits ? 6 : 5; + + this->Header.sh_link = Out<ELFT>::DynSymTab->SectionIndex; + this->Header.sh_size = sizeof(Elf_Word) * 4 // Header + + sizeof(Elf_Off) * MaskWords // Bloom Filter + + sizeof(Elf_Word) * NBuckets // Hash Buckets + + sizeof(Elf_Word) * NumHashed; // Hash Values +} + +template <class ELFT> void GnuHashTableSection<ELFT>::writeTo(uint8_t *Buf) { + writeHeader(Buf); + if (HashedSymbols.empty()) + return; + writeBloomFilter(Buf); + writeHashTable(Buf); +} + +template <class ELFT> +void GnuHashTableSection<ELFT>::writeHeader(uint8_t *&Buf) { + auto *P = reinterpret_cast<Elf_Word *>(Buf); + *P++ = NBuckets; + *P++ = Out<ELFT>::DynSymTab->getNumSymbols() - HashedSymbols.size(); + *P++ = MaskWords; + *P++ = Shift2; + Buf = reinterpret_cast<uint8_t *>(P); +} + +template <class ELFT> +void GnuHashTableSection<ELFT>::writeBloomFilter(uint8_t *&Buf) { + unsigned C = sizeof(Elf_Off) * 8; + + auto *Masks = reinterpret_cast<Elf_Off *>(Buf); + for (const HashedSymbolData &Item : HashedSymbols) { + size_t Pos = (Item.Hash / C) & (MaskWords - 1); + uintX_t V = (uintX_t(1) << (Item.Hash % C)) | + (uintX_t(1) << ((Item.Hash >> Shift2) % C)); + Masks[Pos] |= V; + } + Buf += sizeof(Elf_Off) * MaskWords; +} + +template <class ELFT> +void GnuHashTableSection<ELFT>::writeHashTable(uint8_t *Buf) { + Elf_Word *Buckets = reinterpret_cast<Elf_Word *>(Buf); + Elf_Word *Values = Buckets + NBuckets; + + int PrevBucket = -1; + int I = 0; + for (const HashedSymbolData &Item : HashedSymbols) { + int Bucket = Item.Hash % NBuckets; + assert(PrevBucket <= Bucket); + if (Bucket != PrevBucket) { + Buckets[Bucket] = Item.Body->DynamicSymbolTableIndex; + PrevBucket = Bucket; + if (I > 0) + Values[I - 1] |= 1; + } + Values[I] = Item.Hash & ~1; + ++I; + } + if (I > 0) + Values[I - 1] |= 1; +} + +static bool includeInGnuHashTable(SymbolBody *B) { + // Assume that includeInDynamicSymtab() is already checked. + return !B->isUndefined(); +} + +template <class ELFT> +void GnuHashTableSection<ELFT>::addSymbols(std::vector<SymbolBody *> &Symbols) { + std::vector<SymbolBody *> NotHashed; + NotHashed.reserve(Symbols.size()); + HashedSymbols.reserve(Symbols.size()); + for (SymbolBody *B : Symbols) { + if (includeInGnuHashTable(B)) + HashedSymbols.push_back(HashedSymbolData{B, hashGnu(B->getName())}); + else + NotHashed.push_back(B); + } + if (HashedSymbols.empty()) + return; + + unsigned NBuckets = calcNBuckets(HashedSymbols.size()); + std::stable_sort(HashedSymbols.begin(), HashedSymbols.end(), + [&](const HashedSymbolData &L, const HashedSymbolData &R) { + return L.Hash % NBuckets < R.Hash % NBuckets; + }); + + Symbols = std::move(NotHashed); + for (const HashedSymbolData &Item : HashedSymbols) + Symbols.push_back(Item.Body); +} + +template <class ELFT> +DynamicSection<ELFT>::DynamicSection(SymbolTable<ELFT> &SymTab) + : OutputSectionBase<ELFT>(".dynamic", llvm::ELF::SHT_DYNAMIC, + llvm::ELF::SHF_ALLOC | llvm::ELF::SHF_WRITE), + SymTab(SymTab) { + Elf_Shdr &Header = this->Header; + Header.sh_addralign = ELFT::Is64Bits ? 8 : 4; + Header.sh_entsize = ELFT::Is64Bits ? 16 : 8; + + // .dynamic section is not writable on MIPS. + // See "Special Section" in Chapter 4 in the following document: + // ftp://www.linux-mips.org/pub/linux/mips/doc/ABI/mipsabi.pdf + if (Config->EMachine == EM_MIPS) + Header.sh_flags = llvm::ELF::SHF_ALLOC; +} + +template <class ELFT> void DynamicSection<ELFT>::finalize() { + if (this->Header.sh_size) + return; // Already finalized. + + Elf_Shdr &Header = this->Header; + Header.sh_link = Out<ELFT>::DynStrTab->SectionIndex; + + unsigned NumEntries = 0; + if (Out<ELFT>::RelaDyn->hasRelocs()) { + ++NumEntries; // DT_RELA / DT_REL + ++NumEntries; // DT_RELASZ / DT_RELSZ + ++NumEntries; // DT_RELAENT / DT_RELENT + } + if (Out<ELFT>::RelaPlt && Out<ELFT>::RelaPlt->hasRelocs()) { + ++NumEntries; // DT_JMPREL + ++NumEntries; // DT_PLTRELSZ + ++NumEntries; // DT_PLTGOT / DT_MIPS_PLTGOT + ++NumEntries; // DT_PLTREL + } + + ++NumEntries; // DT_SYMTAB + ++NumEntries; // DT_SYMENT + ++NumEntries; // DT_STRTAB + ++NumEntries; // DT_STRSZ + if (Out<ELFT>::GnuHashTab) + ++NumEntries; // DT_GNU_HASH + if (Out<ELFT>::HashTab) + ++NumEntries; // DT_HASH + + if (!Config->RPath.empty()) { + ++NumEntries; // DT_RUNPATH / DT_RPATH + Out<ELFT>::DynStrTab->add(Config->RPath); + } + + if (!Config->SoName.empty()) { + ++NumEntries; // DT_SONAME + Out<ELFT>::DynStrTab->add(Config->SoName); + } + + if (PreInitArraySec) + NumEntries += 2; + if (InitArraySec) + NumEntries += 2; + if (FiniArraySec) + NumEntries += 2; + + for (const std::unique_ptr<SharedFile<ELFT>> &F : SymTab.getSharedFiles()) { + if (!F->isNeeded()) + continue; + Out<ELFT>::DynStrTab->add(F->getSoName()); + ++NumEntries; + } + + if (Symbol *S = SymTab.getSymbols().lookup(Config->Init)) + InitSym = S->Body; + if (Symbol *S = SymTab.getSymbols().lookup(Config->Fini)) + FiniSym = S->Body; + if (InitSym) + ++NumEntries; // DT_INIT + if (FiniSym) + ++NumEntries; // DT_FINI + + if (Config->Bsymbolic) + DtFlags |= DF_SYMBOLIC; + if (Config->ZNodelete) + DtFlags1 |= DF_1_NODELETE; + if (Config->ZNow) { + DtFlags |= DF_BIND_NOW; + DtFlags1 |= DF_1_NOW; + } + if (Config->ZOrigin) { + DtFlags |= DF_ORIGIN; + DtFlags1 |= DF_1_ORIGIN; + } + + if (DtFlags) + ++NumEntries; // DT_FLAGS + if (DtFlags1) + ++NumEntries; // DT_FLAGS_1 + + if (Config->EMachine == EM_MIPS) { + ++NumEntries; // DT_MIPS_RLD_VERSION + ++NumEntries; // DT_MIPS_FLAGS + ++NumEntries; // DT_MIPS_BASE_ADDRESS + ++NumEntries; // DT_MIPS_SYMTABNO + ++NumEntries; // DT_MIPS_LOCAL_GOTNO + ++NumEntries; // DT_MIPS_GOTSYM; + ++NumEntries; // DT_PLTGOT + if (Out<ELFT>::MipsRldMap) + ++NumEntries; // DT_MIPS_RLD_MAP + } + + ++NumEntries; // DT_NULL + + Header.sh_size = NumEntries * Header.sh_entsize; +} + +template <class ELFT> void DynamicSection<ELFT>::writeTo(uint8_t *Buf) { + auto *P = reinterpret_cast<Elf_Dyn *>(Buf); + + auto WritePtr = [&](int32_t Tag, uint64_t Val) { + P->d_tag = Tag; + P->d_un.d_ptr = Val; + ++P; + }; + + auto WriteVal = [&](int32_t Tag, uint32_t Val) { + P->d_tag = Tag; + P->d_un.d_val = Val; + ++P; + }; + + if (Out<ELFT>::RelaDyn->hasRelocs()) { + bool IsRela = Out<ELFT>::RelaDyn->isRela(); + WritePtr(IsRela ? DT_RELA : DT_REL, Out<ELFT>::RelaDyn->getVA()); + WriteVal(IsRela ? DT_RELASZ : DT_RELSZ, Out<ELFT>::RelaDyn->getSize()); + WriteVal(IsRela ? DT_RELAENT : DT_RELENT, + IsRela ? sizeof(Elf_Rela) : sizeof(Elf_Rel)); + } + if (Out<ELFT>::RelaPlt && Out<ELFT>::RelaPlt->hasRelocs()) { + WritePtr(DT_JMPREL, Out<ELFT>::RelaPlt->getVA()); + WriteVal(DT_PLTRELSZ, Out<ELFT>::RelaPlt->getSize()); + // On MIPS, the address of the .got.plt section is stored in + // the DT_MIPS_PLTGOT entry because the DT_PLTGOT entry points to + // the .got section. See "Dynamic Section" in the following document: + // https://sourceware.org/ml/binutils/2008-07/txt00000.txt + WritePtr((Config->EMachine == EM_MIPS) ? DT_MIPS_PLTGOT : DT_PLTGOT, + Out<ELFT>::GotPlt->getVA()); + WriteVal(DT_PLTREL, Out<ELFT>::RelaPlt->isRela() ? DT_RELA : DT_REL); + } + + WritePtr(DT_SYMTAB, Out<ELFT>::DynSymTab->getVA()); + WritePtr(DT_SYMENT, sizeof(Elf_Sym)); + WritePtr(DT_STRTAB, Out<ELFT>::DynStrTab->getVA()); + WriteVal(DT_STRSZ, Out<ELFT>::DynStrTab->data().size()); + if (Out<ELFT>::GnuHashTab) + WritePtr(DT_GNU_HASH, Out<ELFT>::GnuHashTab->getVA()); + if (Out<ELFT>::HashTab) + WritePtr(DT_HASH, Out<ELFT>::HashTab->getVA()); + + // If --enable-new-dtags is set, lld emits DT_RUNPATH + // instead of DT_RPATH. The two tags are functionally + // equivalent except for the following: + // - DT_RUNPATH is searched after LD_LIBRARY_PATH, while + // DT_RPATH is searched before. + // - DT_RUNPATH is used only to search for direct + // dependencies of the object it's contained in, while + // DT_RPATH is used for indirect dependencies as well. + if (!Config->RPath.empty()) + WriteVal(Config->EnableNewDtags ? DT_RUNPATH : DT_RPATH, + Out<ELFT>::DynStrTab->getOffset(Config->RPath)); + + if (!Config->SoName.empty()) + WriteVal(DT_SONAME, Out<ELFT>::DynStrTab->getOffset(Config->SoName)); + + auto WriteArray = [&](int32_t T1, int32_t T2, + const OutputSectionBase<ELFT> *Sec) { + if (!Sec) + return; + WritePtr(T1, Sec->getVA()); + WriteVal(T2, Sec->getSize()); + }; + WriteArray(DT_PREINIT_ARRAY, DT_PREINIT_ARRAYSZ, PreInitArraySec); + WriteArray(DT_INIT_ARRAY, DT_INIT_ARRAYSZ, InitArraySec); + WriteArray(DT_FINI_ARRAY, DT_FINI_ARRAYSZ, FiniArraySec); + + for (const std::unique_ptr<SharedFile<ELFT>> &F : SymTab.getSharedFiles()) + if (F->isNeeded()) + WriteVal(DT_NEEDED, Out<ELFT>::DynStrTab->getOffset(F->getSoName())); + + if (InitSym) + WritePtr(DT_INIT, getSymVA<ELFT>(*InitSym)); + if (FiniSym) + WritePtr(DT_FINI, getSymVA<ELFT>(*FiniSym)); + if (DtFlags) + WriteVal(DT_FLAGS, DtFlags); + if (DtFlags1) + WriteVal(DT_FLAGS_1, DtFlags1); + + // See "Dynamic Section" in Chapter 5 in the following document + // for detailed description: + // ftp://www.linux-mips.org/pub/linux/mips/doc/ABI/mipsabi.pdf + if (Config->EMachine == EM_MIPS) { + WriteVal(DT_MIPS_RLD_VERSION, 1); + WriteVal(DT_MIPS_FLAGS, RHF_NOTPOT); + WritePtr(DT_MIPS_BASE_ADDRESS, Target->getVAStart()); + WriteVal(DT_MIPS_SYMTABNO, Out<ELFT>::DynSymTab->getNumSymbols()); + WriteVal(DT_MIPS_LOCAL_GOTNO, Out<ELFT>::Got->getMipsLocalEntriesNum()); + if (const SymbolBody *B = Out<ELFT>::Got->getMipsFirstGlobalEntry()) + WriteVal(DT_MIPS_GOTSYM, B->DynamicSymbolTableIndex); + else + WriteVal(DT_MIPS_GOTSYM, Out<ELFT>::DynSymTab->getNumSymbols()); + WritePtr(DT_PLTGOT, Out<ELFT>::Got->getVA()); + if (Out<ELFT>::MipsRldMap) + WritePtr(DT_MIPS_RLD_MAP, Out<ELFT>::MipsRldMap->getVA()); + } + + WriteVal(DT_NULL, 0); +} + +template <class ELFT> +OutputSection<ELFT>::OutputSection(StringRef Name, uint32_t sh_type, + uintX_t sh_flags) + : OutputSectionBase<ELFT>(Name, sh_type, sh_flags) {} + +template <class ELFT> +void OutputSection<ELFT>::addSection(InputSectionBase<ELFT> *C) { + auto *S = cast<InputSection<ELFT>>(C); + Sections.push_back(S); + S->OutSec = this; + uint32_t Align = S->getAlign(); + if (Align > this->Header.sh_addralign) + this->Header.sh_addralign = Align; + + uintX_t Off = this->Header.sh_size; + Off = RoundUpToAlignment(Off, Align); + S->OutSecOff = Off; + Off += S->getSize(); + this->Header.sh_size = Off; +} + +template <class ELFT> +typename ELFFile<ELFT>::uintX_t lld::elf2::getSymVA(const SymbolBody &S) { + switch (S.kind()) { + case SymbolBody::DefinedSyntheticKind: { + auto &D = cast<DefinedSynthetic<ELFT>>(S); + return D.Section.getVA() + D.Value; + } + case SymbolBody::DefinedRegularKind: { + const auto &DR = cast<DefinedRegular<ELFT>>(S); + InputSectionBase<ELFT> *SC = DR.Section; + if (!SC) + return DR.Sym.st_value; + if (DR.Sym.getType() == STT_TLS) + return SC->OutSec->getVA() + SC->getOffset(DR.Sym) - + Out<ELFT>::TlsPhdr->p_vaddr; + return SC->OutSec->getVA() + SC->getOffset(DR.Sym); + } + case SymbolBody::DefinedCommonKind: + return Out<ELFT>::Bss->getVA() + cast<DefinedCommon>(S).OffsetInBSS; + case SymbolBody::SharedKind: { + auto &SS = cast<SharedSymbol<ELFT>>(S); + if (SS.NeedsCopy) + return Out<ELFT>::Bss->getVA() + SS.OffsetInBSS; + return 0; + } + case SymbolBody::UndefinedElfKind: + case SymbolBody::UndefinedKind: + return 0; + case SymbolBody::LazyKind: + assert(S.isUsedInRegularObj() && "Lazy symbol reached writer"); + return 0; + } + llvm_unreachable("Invalid symbol kind"); +} + +// Returns a VA which a relocatin RI refers to. Used only for local symbols. +// For non-local symbols, use getSymVA instead. +template <class ELFT, bool IsRela> +typename ELFFile<ELFT>::uintX_t +lld::elf2::getLocalRelTarget(const ObjectFile<ELFT> &File, + const Elf_Rel_Impl<ELFT, IsRela> &RI, + typename ELFFile<ELFT>::uintX_t Addend) { + typedef typename ELFFile<ELFT>::Elf_Sym Elf_Sym; + typedef typename ELFFile<ELFT>::uintX_t uintX_t; + + // PPC64 has a special relocation representing the TOC base pointer + // that does not have a corresponding symbol. + if (Config->EMachine == EM_PPC64 && RI.getType(false) == R_PPC64_TOC) + return getPPC64TocBase() + Addend; + + const Elf_Sym *Sym = + File.getObj().getRelocationSymbol(&RI, File.getSymbolTable()); + + if (!Sym) + error("Unsupported relocation without symbol"); + + InputSectionBase<ELFT> *Section = File.getSection(*Sym); + + if (Sym->getType() == STT_TLS) + return (Section->OutSec->getVA() + Section->getOffset(*Sym) + Addend) - + Out<ELFT>::TlsPhdr->p_vaddr; + + // According to the ELF spec reference to a local symbol from outside + // the group are not allowed. Unfortunately .eh_frame breaks that rule + // and must be treated specially. For now we just replace the symbol with + // 0. + if (Section == &InputSection<ELFT>::Discarded || !Section->isLive()) + return Addend; + + uintX_t VA = Section->OutSec->getVA(); + if (isa<InputSection<ELFT>>(Section)) + return VA + Section->getOffset(*Sym) + Addend; + + uintX_t Offset = Sym->st_value; + if (Sym->getType() == STT_SECTION) { + Offset += Addend; + Addend = 0; + } + return VA + Section->getOffset(Offset) + Addend; +} + +// Returns true if a symbol can be replaced at load-time by a symbol +// with the same name defined in other ELF executable or DSO. +bool lld::elf2::canBePreempted(const SymbolBody *Body, bool NeedsGot) { + if (!Body) + return false; // Body is a local symbol. + if (Body->isShared()) + return true; + + if (Body->isUndefined()) { + if (!Body->isWeak()) + return true; + + // This is an horrible corner case. Ideally we would like to say that any + // undefined symbol can be preempted so that the dynamic linker has a + // chance of finding it at runtime. + // + // The problem is that the code sequence used to test for weak undef + // functions looks like + // if (func) func() + // If the code is -fPIC the first reference is a load from the got and + // everything works. + // If the code is not -fPIC there is no reasonable way to solve it: + // * A relocation writing to the text segment will fail (it is ro). + // * A copy relocation doesn't work for functions. + // * The trick of using a plt entry as the address would fail here since + // the plt entry would have a non zero address. + // Since we cannot do anything better, we just resolve the symbol to 0 and + // don't produce a dynamic relocation. + // + // As an extra hack, assume that if we are producing a shared library the + // user knows what he or she is doing and can handle a dynamic relocation. + return Config->Shared || NeedsGot; + } + if (!Config->Shared) + return false; + return Body->getVisibility() == STV_DEFAULT; +} + +template <class ELFT> void OutputSection<ELFT>::writeTo(uint8_t *Buf) { + for (InputSection<ELFT> *C : Sections) + C->writeTo(Buf); +} + +template <class ELFT> +EHOutputSection<ELFT>::EHOutputSection(StringRef Name, uint32_t sh_type, + uintX_t sh_flags) + : OutputSectionBase<ELFT>(Name, sh_type, sh_flags) {} + +template <class ELFT> +EHRegion<ELFT>::EHRegion(EHInputSection<ELFT> *S, unsigned Index) + : S(S), Index(Index) {} + +template <class ELFT> StringRef EHRegion<ELFT>::data() const { + ArrayRef<uint8_t> SecData = S->getSectionData(); + ArrayRef<std::pair<uintX_t, uintX_t>> Offsets = S->Offsets; + size_t Start = Offsets[Index].first; + size_t End = + Index == Offsets.size() - 1 ? SecData.size() : Offsets[Index + 1].first; + return StringRef((const char *)SecData.data() + Start, End - Start); +} + +template <class ELFT> +Cie<ELFT>::Cie(EHInputSection<ELFT> *S, unsigned Index) + : EHRegion<ELFT>(S, Index) {} + +template <class ELFT> +template <bool IsRela> +void EHOutputSection<ELFT>::addSectionAux( + EHInputSection<ELFT> *S, + iterator_range<const Elf_Rel_Impl<ELFT, IsRela> *> Rels) { + const endianness E = ELFT::TargetEndianness; + + S->OutSec = this; + uint32_t Align = S->getAlign(); + if (Align > this->Header.sh_addralign) + this->Header.sh_addralign = Align; + + Sections.push_back(S); + + ArrayRef<uint8_t> SecData = S->getSectionData(); + ArrayRef<uint8_t> D = SecData; + uintX_t Offset = 0; + auto RelI = Rels.begin(); + auto RelE = Rels.end(); + + DenseMap<unsigned, unsigned> OffsetToIndex; + while (!D.empty()) { + unsigned Index = S->Offsets.size(); + S->Offsets.push_back(std::make_pair(Offset, -1)); + + uintX_t Length = readEntryLength(D); + StringRef Entry((const char *)D.data(), Length); + + while (RelI != RelE && RelI->r_offset < Offset) + ++RelI; + uintX_t NextOffset = Offset + Length; + bool HasReloc = RelI != RelE && RelI->r_offset < NextOffset; + + uint32_t ID = read32<E>(D.data() + 4); + if (ID == 0) { + // CIE + Cie<ELFT> C(S, Index); + + StringRef Personality; + if (HasReloc) { + uint32_t SymIndex = RelI->getSymbol(Config->Mips64EL); + SymbolBody &Body = *S->getFile()->getSymbolBody(SymIndex)->repl(); + Personality = Body.getName(); + } + + std::pair<StringRef, StringRef> CieInfo(Entry, Personality); + auto P = CieMap.insert(std::make_pair(CieInfo, Cies.size())); + if (P.second) { + Cies.push_back(C); + this->Header.sh_size += RoundUpToAlignment(Length, sizeof(uintX_t)); + } + OffsetToIndex[Offset] = P.first->second; + } else { + if (!HasReloc) + error("FDE doesn't reference another section"); + InputSectionBase<ELFT> *Target = S->getRelocTarget(*RelI); + if (Target != &InputSection<ELFT>::Discarded && Target->isLive()) { + uint32_t CieOffset = Offset + 4 - ID; + auto I = OffsetToIndex.find(CieOffset); + if (I == OffsetToIndex.end()) + error("Invalid CIE reference"); + Cies[I->second].Fdes.push_back(EHRegion<ELFT>(S, Index)); + this->Header.sh_size += RoundUpToAlignment(Length, sizeof(uintX_t)); + } + } + + Offset = NextOffset; + D = D.slice(Length); + } +} + +template <class ELFT> +typename EHOutputSection<ELFT>::uintX_t +EHOutputSection<ELFT>::readEntryLength(ArrayRef<uint8_t> D) { + const endianness E = ELFT::TargetEndianness; + + if (D.size() < 4) + error("Truncated CIE/FDE length"); + uint64_t Len = read32<E>(D.data()); + if (Len < UINT32_MAX) { + if (Len > (UINT32_MAX - 4)) + error("CIE/FIE size is too large"); + if (Len + 4 > D.size()) + error("CIE/FIE ends past the end of the section"); + return Len + 4; + } + + if (D.size() < 12) + error("Truncated CIE/FDE length"); + Len = read64<E>(D.data() + 4); + if (Len > (UINT64_MAX - 12)) + error("CIE/FIE size is too large"); + if (Len + 12 > D.size()) + error("CIE/FIE ends past the end of the section"); + return Len + 12; +} + +template <class ELFT> +void EHOutputSection<ELFT>::addSection(InputSectionBase<ELFT> *C) { + auto *S = cast<EHInputSection<ELFT>>(C); + const Elf_Shdr *RelSec = S->RelocSection; + if (!RelSec) + return addSectionAux( + S, make_range((const Elf_Rela *)nullptr, (const Elf_Rela *)nullptr)); + ELFFile<ELFT> &Obj = S->getFile()->getObj(); + if (RelSec->sh_type == SHT_RELA) + return addSectionAux(S, Obj.relas(RelSec)); + return addSectionAux(S, Obj.rels(RelSec)); +} + +template <class ELFT> +static typename ELFFile<ELFT>::uintX_t writeAlignedCieOrFde(StringRef Data, + uint8_t *Buf) { + typedef typename ELFFile<ELFT>::uintX_t uintX_t; + const endianness E = ELFT::TargetEndianness; + uint64_t Len = RoundUpToAlignment(Data.size(), sizeof(uintX_t)); + write32<E>(Buf, Len - 4); + memcpy(Buf + 4, Data.data() + 4, Data.size() - 4); + return Len; +} + +template <class ELFT> void EHOutputSection<ELFT>::writeTo(uint8_t *Buf) { + const endianness E = ELFT::TargetEndianness; + size_t Offset = 0; + for (const Cie<ELFT> &C : Cies) { + size_t CieOffset = Offset; + + uintX_t CIELen = writeAlignedCieOrFde<ELFT>(C.data(), Buf + Offset); + C.S->Offsets[C.Index].second = Offset; + Offset += CIELen; + + for (const EHRegion<ELFT> &F : C.Fdes) { + uintX_t Len = writeAlignedCieOrFde<ELFT>(F.data(), Buf + Offset); + write32<E>(Buf + Offset + 4, Offset + 4 - CieOffset); // Pointer + F.S->Offsets[F.Index].second = Offset; + Offset += Len; + } + } + + for (EHInputSection<ELFT> *S : Sections) { + const Elf_Shdr *RelSec = S->RelocSection; + if (!RelSec) + continue; + ELFFile<ELFT> &EObj = S->getFile()->getObj(); + if (RelSec->sh_type == SHT_RELA) + S->relocate(Buf, nullptr, EObj.relas(RelSec)); + else + S->relocate(Buf, nullptr, EObj.rels(RelSec)); + } +} + +template <class ELFT> +MergeOutputSection<ELFT>::MergeOutputSection(StringRef Name, uint32_t sh_type, + uintX_t sh_flags) + : OutputSectionBase<ELFT>(Name, sh_type, sh_flags) {} + +template <class ELFT> void MergeOutputSection<ELFT>::writeTo(uint8_t *Buf) { + if (shouldTailMerge()) { + StringRef Data = Builder.data(); + memcpy(Buf, Data.data(), Data.size()); + return; + } + for (const std::pair<StringRef, size_t> &P : Builder.getMap()) { + StringRef Data = P.first; + memcpy(Buf + P.second, Data.data(), Data.size()); + } +} + +static size_t findNull(StringRef S, size_t EntSize) { + // Optimize the common case. + if (EntSize == 1) + return S.find(0); + + for (unsigned I = 0, N = S.size(); I != N; I += EntSize) { + const char *B = S.begin() + I; + if (std::all_of(B, B + EntSize, [](char C) { return C == 0; })) + return I; + } + return StringRef::npos; +} + +template <class ELFT> +void MergeOutputSection<ELFT>::addSection(InputSectionBase<ELFT> *C) { + auto *S = cast<MergeInputSection<ELFT>>(C); + S->OutSec = this; + uint32_t Align = S->getAlign(); + if (Align > this->Header.sh_addralign) + this->Header.sh_addralign = Align; + + ArrayRef<uint8_t> D = S->getSectionData(); + StringRef Data((const char *)D.data(), D.size()); + uintX_t EntSize = S->getSectionHdr()->sh_entsize; + uintX_t Offset = 0; + + if (this->Header.sh_flags & SHF_STRINGS) { + while (!Data.empty()) { + size_t End = findNull(Data, EntSize); + if (End == StringRef::npos) + error("String is not null terminated"); + StringRef Entry = Data.substr(0, End + EntSize); + uintX_t OutputOffset = Builder.add(Entry); + if (shouldTailMerge()) + OutputOffset = -1; + S->Offsets.push_back(std::make_pair(Offset, OutputOffset)); + uintX_t Size = End + EntSize; + Data = Data.substr(Size); + Offset += Size; + } + } else { + for (unsigned I = 0, N = Data.size(); I != N; I += EntSize) { + StringRef Entry = Data.substr(I, EntSize); + size_t OutputOffset = Builder.add(Entry); + S->Offsets.push_back(std::make_pair(Offset, OutputOffset)); + Offset += EntSize; + } + } +} + +template <class ELFT> +unsigned MergeOutputSection<ELFT>::getOffset(StringRef Val) { + return Builder.getOffset(Val); +} + +template <class ELFT> bool MergeOutputSection<ELFT>::shouldTailMerge() const { + return Config->Optimize >= 2 && this->Header.sh_flags & SHF_STRINGS; +} + +template <class ELFT> void MergeOutputSection<ELFT>::finalize() { + if (shouldTailMerge()) + Builder.finalize(); + this->Header.sh_size = Builder.getSize(); +} + +template <class ELFT> +StringTableSection<ELFT>::StringTableSection(StringRef Name, bool Dynamic) + : OutputSectionBase<ELFT>(Name, llvm::ELF::SHT_STRTAB, + Dynamic ? (uintX_t)llvm::ELF::SHF_ALLOC : 0), + Dynamic(Dynamic) { + this->Header.sh_addralign = 1; +} + +template <class ELFT> void StringTableSection<ELFT>::writeTo(uint8_t *Buf) { + StringRef Data = StrTabBuilder.data(); + memcpy(Buf, Data.data(), Data.size()); +} + +template <class ELFT> +bool lld::elf2::shouldKeepInSymtab(const ObjectFile<ELFT> &File, + StringRef SymName, + const typename ELFFile<ELFT>::Elf_Sym &Sym) { + if (Sym.getType() == STT_SECTION) + return false; + + InputSectionBase<ELFT> *Sec = File.getSection(Sym); + // If sym references a section in a discarded group, don't keep it. + if (Sec == &InputSection<ELFT>::Discarded) + return false; + + if (Config->DiscardNone) + return true; + + // In ELF assembly .L symbols are normally discarded by the assembler. + // If the assembler fails to do so, the linker discards them if + // * --discard-locals is used. + // * The symbol is in a SHF_MERGE section, which is normally the reason for + // the assembler keeping the .L symbol. + if (!SymName.startswith(".L") && !SymName.empty()) + return true; + + if (Config->DiscardLocals) + return false; + + return !(Sec->getSectionHdr()->sh_flags & SHF_MERGE); +} + +template <class ELFT> +SymbolTableSection<ELFT>::SymbolTableSection( + SymbolTable<ELFT> &Table, StringTableSection<ELFT> &StrTabSec) + : OutputSectionBase<ELFT>( + StrTabSec.isDynamic() ? ".dynsym" : ".symtab", + StrTabSec.isDynamic() ? llvm::ELF::SHT_DYNSYM : llvm::ELF::SHT_SYMTAB, + StrTabSec.isDynamic() ? (uintX_t)llvm::ELF::SHF_ALLOC : 0), + Table(Table), StrTabSec(StrTabSec) { + typedef OutputSectionBase<ELFT> Base; + typename Base::Elf_Shdr &Header = this->Header; + + Header.sh_entsize = sizeof(Elf_Sym); + Header.sh_addralign = ELFT::Is64Bits ? 8 : 4; +} + +// Orders symbols according to their positions in the GOT, +// in compliance with MIPS ABI rules. +// See "Global Offset Table" in Chapter 5 in the following document +// for detailed description: +// ftp://www.linux-mips.org/pub/linux/mips/doc/ABI/mipsabi.pdf +static bool sortMipsSymbols(SymbolBody *L, SymbolBody *R) { + if (!L->isInGot() || !R->isInGot()) + return R->isInGot(); + return L->GotIndex < R->GotIndex; +} + +template <class ELFT> void SymbolTableSection<ELFT>::finalize() { + if (this->Header.sh_size) + return; // Already finalized. + + this->Header.sh_size = getNumSymbols() * sizeof(Elf_Sym); + this->Header.sh_link = StrTabSec.SectionIndex; + this->Header.sh_info = NumLocals + 1; + + if (!StrTabSec.isDynamic()) { + std::stable_sort(Symbols.begin(), Symbols.end(), + [](SymbolBody *L, SymbolBody *R) { + return getSymbolBinding(L) == STB_LOCAL && + getSymbolBinding(R) != STB_LOCAL; + }); + return; + } + if (Out<ELFT>::GnuHashTab) + // NB: It also sorts Symbols to meet the GNU hash table requirements. + Out<ELFT>::GnuHashTab->addSymbols(Symbols); + else if (Config->EMachine == EM_MIPS) + std::stable_sort(Symbols.begin(), Symbols.end(), sortMipsSymbols); + size_t I = 0; + for (SymbolBody *B : Symbols) + B->DynamicSymbolTableIndex = ++I; +} + +template <class ELFT> +void SymbolTableSection<ELFT>::addLocalSymbol(StringRef Name) { + StrTabSec.add(Name); + ++NumVisible; + ++NumLocals; +} + +template <class ELFT> +void SymbolTableSection<ELFT>::addSymbol(SymbolBody *Body) { + StrTabSec.add(Body->getName()); + Symbols.push_back(Body); + ++NumVisible; +} + +template <class ELFT> void SymbolTableSection<ELFT>::writeTo(uint8_t *Buf) { + Buf += sizeof(Elf_Sym); + + // All symbols with STB_LOCAL binding precede the weak and global symbols. + // .dynsym only contains global symbols. + if (!Config->DiscardAll && !StrTabSec.isDynamic()) + writeLocalSymbols(Buf); + + writeGlobalSymbols(Buf); +} + +template <class ELFT> +void SymbolTableSection<ELFT>::writeLocalSymbols(uint8_t *&Buf) { + // Iterate over all input object files to copy their local symbols + // to the output symbol table pointed by Buf. + for (const std::unique_ptr<ObjectFile<ELFT>> &File : Table.getObjectFiles()) { + Elf_Sym_Range Syms = File->getLocalSymbols(); + for (const Elf_Sym &Sym : Syms) { + ErrorOr<StringRef> SymNameOrErr = Sym.getName(File->getStringTable()); + error(SymNameOrErr); + StringRef SymName = *SymNameOrErr; + if (!shouldKeepInSymtab<ELFT>(*File, SymName, Sym)) + continue; + + auto *ESym = reinterpret_cast<Elf_Sym *>(Buf); + uintX_t VA = 0; + if (Sym.st_shndx == SHN_ABS) { + ESym->st_shndx = SHN_ABS; + VA = Sym.st_value; + } else { + InputSectionBase<ELFT> *Section = File->getSection(Sym); + if (!Section->isLive()) + continue; + const OutputSectionBase<ELFT> *OutSec = Section->OutSec; + ESym->st_shndx = OutSec->SectionIndex; + VA += OutSec->getVA() + Section->getOffset(Sym); + } + ESym->st_name = StrTabSec.getOffset(SymName); + ESym->st_size = Sym.st_size; + ESym->setBindingAndType(Sym.getBinding(), Sym.getType()); + ESym->st_value = VA; + Buf += sizeof(*ESym); + } + } +} + +template <class ELFT> +static const typename llvm::object::ELFFile<ELFT>::Elf_Sym * +getElfSym(SymbolBody &Body) { + if (auto *EBody = dyn_cast<DefinedElf<ELFT>>(&Body)) + return &EBody->Sym; + if (auto *EBody = dyn_cast<UndefinedElf<ELFT>>(&Body)) + return &EBody->Sym; + return nullptr; +} + +template <class ELFT> +void SymbolTableSection<ELFT>::writeGlobalSymbols(uint8_t *Buf) { + // Write the internal symbol table contents to the output symbol table + // pointed by Buf. + auto *ESym = reinterpret_cast<Elf_Sym *>(Buf); + for (SymbolBody *Body : Symbols) { + const OutputSectionBase<ELFT> *OutSec = nullptr; + + switch (Body->kind()) { + case SymbolBody::DefinedSyntheticKind: + OutSec = &cast<DefinedSynthetic<ELFT>>(Body)->Section; + break; + case SymbolBody::DefinedRegularKind: { + auto *Sym = cast<DefinedRegular<ELFT>>(Body->repl()); + if (InputSectionBase<ELFT> *Sec = Sym->Section) { + if (!Sec->isLive()) + continue; + OutSec = Sec->OutSec; + } + break; + } + case SymbolBody::DefinedCommonKind: + OutSec = Out<ELFT>::Bss; + break; + case SymbolBody::SharedKind: { + if (cast<SharedSymbol<ELFT>>(Body)->NeedsCopy) + OutSec = Out<ELFT>::Bss; + break; + } + case SymbolBody::UndefinedElfKind: + case SymbolBody::UndefinedKind: + case SymbolBody::LazyKind: + break; + } + + StringRef Name = Body->getName(); + ESym->st_name = StrTabSec.getOffset(Name); + + unsigned char Type = STT_NOTYPE; + uintX_t Size = 0; + if (const Elf_Sym *InputSym = getElfSym<ELFT>(*Body)) { + Type = InputSym->getType(); + Size = InputSym->st_size; + } else if (auto *C = dyn_cast<DefinedCommon>(Body)) { + Type = STT_OBJECT; + Size = C->Size; + } + + ESym->setBindingAndType(getSymbolBinding(Body), Type); + ESym->st_size = Size; + ESym->setVisibility(Body->getVisibility()); + ESym->st_value = getSymVA<ELFT>(*Body); + + if (OutSec) + ESym->st_shndx = OutSec->SectionIndex; + else if (isa<DefinedRegular<ELFT>>(Body)) + ESym->st_shndx = SHN_ABS; + + ++ESym; + } +} + +template <class ELFT> +uint8_t SymbolTableSection<ELFT>::getSymbolBinding(SymbolBody *Body) { + uint8_t Visibility = Body->getVisibility(); + if (Visibility != STV_DEFAULT && Visibility != STV_PROTECTED) + return STB_LOCAL; + if (const Elf_Sym *ESym = getElfSym<ELFT>(*Body)) + return ESym->getBinding(); + if (isa<DefinedSynthetic<ELFT>>(Body)) + return STB_LOCAL; + return Body->isWeak() ? STB_WEAK : STB_GLOBAL; +} + +template <class ELFT> +MipsReginfoOutputSection<ELFT>::MipsReginfoOutputSection() + : OutputSectionBase<ELFT>(".reginfo", SHT_MIPS_REGINFO, SHF_ALLOC) { + this->Header.sh_addralign = 4; + this->Header.sh_entsize = sizeof(Elf_Mips_RegInfo); + this->Header.sh_size = sizeof(Elf_Mips_RegInfo); +} + +template <class ELFT> +void MipsReginfoOutputSection<ELFT>::writeTo(uint8_t *Buf) { + auto *R = reinterpret_cast<Elf_Mips_RegInfo *>(Buf); + R->ri_gp_value = getMipsGpAddr<ELFT>(); + R->ri_gprmask = GeneralMask; +} + +template <class ELFT> +void MipsReginfoOutputSection<ELFT>::addSection(InputSectionBase<ELFT> *C) { + auto *S = cast<MipsReginfoInputSection<ELFT>>(C); + GeneralMask |= S->getGeneralMask(); +} + +namespace lld { +namespace elf2 { +template class OutputSectionBase<ELF32LE>; +template class OutputSectionBase<ELF32BE>; +template class OutputSectionBase<ELF64LE>; +template class OutputSectionBase<ELF64BE>; + +template class GotPltSection<ELF32LE>; +template class GotPltSection<ELF32BE>; +template class GotPltSection<ELF64LE>; +template class GotPltSection<ELF64BE>; + +template class GotSection<ELF32LE>; +template class GotSection<ELF32BE>; +template class GotSection<ELF64LE>; +template class GotSection<ELF64BE>; + +template class PltSection<ELF32LE>; +template class PltSection<ELF32BE>; +template class PltSection<ELF64LE>; +template class PltSection<ELF64BE>; + +template class RelocationSection<ELF32LE>; +template class RelocationSection<ELF32BE>; +template class RelocationSection<ELF64LE>; +template class RelocationSection<ELF64BE>; + +template class InterpSection<ELF32LE>; +template class InterpSection<ELF32BE>; +template class InterpSection<ELF64LE>; +template class InterpSection<ELF64BE>; + +template class GnuHashTableSection<ELF32LE>; +template class GnuHashTableSection<ELF32BE>; +template class GnuHashTableSection<ELF64LE>; +template class GnuHashTableSection<ELF64BE>; + +template class HashTableSection<ELF32LE>; +template class HashTableSection<ELF32BE>; +template class HashTableSection<ELF64LE>; +template class HashTableSection<ELF64BE>; + +template class DynamicSection<ELF32LE>; +template class DynamicSection<ELF32BE>; +template class DynamicSection<ELF64LE>; +template class DynamicSection<ELF64BE>; + +template class OutputSection<ELF32LE>; +template class OutputSection<ELF32BE>; +template class OutputSection<ELF64LE>; +template class OutputSection<ELF64BE>; + +template class EHOutputSection<ELF32LE>; +template class EHOutputSection<ELF32BE>; +template class EHOutputSection<ELF64LE>; +template class EHOutputSection<ELF64BE>; + +template class MipsReginfoOutputSection<ELF32LE>; +template class MipsReginfoOutputSection<ELF32BE>; +template class MipsReginfoOutputSection<ELF64LE>; +template class MipsReginfoOutputSection<ELF64BE>; + +template class MergeOutputSection<ELF32LE>; +template class MergeOutputSection<ELF32BE>; +template class MergeOutputSection<ELF64LE>; +template class MergeOutputSection<ELF64BE>; + +template class StringTableSection<ELF32LE>; +template class StringTableSection<ELF32BE>; +template class StringTableSection<ELF64LE>; +template class StringTableSection<ELF64BE>; + +template class SymbolTableSection<ELF32LE>; +template class SymbolTableSection<ELF32BE>; +template class SymbolTableSection<ELF64LE>; +template class SymbolTableSection<ELF64BE>; + +template ELFFile<ELF32LE>::uintX_t getSymVA<ELF32LE>(const SymbolBody &); +template ELFFile<ELF32BE>::uintX_t getSymVA<ELF32BE>(const SymbolBody &); +template ELFFile<ELF64LE>::uintX_t getSymVA<ELF64LE>(const SymbolBody &); +template ELFFile<ELF64BE>::uintX_t getSymVA<ELF64BE>(const SymbolBody &); + +template ELFFile<ELF32LE>::uintX_t +getLocalRelTarget(const ObjectFile<ELF32LE> &, + const ELFFile<ELF32LE>::Elf_Rel &, + ELFFile<ELF32LE>::uintX_t Addend); +template ELFFile<ELF32BE>::uintX_t +getLocalRelTarget(const ObjectFile<ELF32BE> &, + const ELFFile<ELF32BE>::Elf_Rel &, + ELFFile<ELF32BE>::uintX_t Addend); +template ELFFile<ELF64LE>::uintX_t +getLocalRelTarget(const ObjectFile<ELF64LE> &, + const ELFFile<ELF64LE>::Elf_Rel &, + ELFFile<ELF64LE>::uintX_t Addend); +template ELFFile<ELF64BE>::uintX_t +getLocalRelTarget(const ObjectFile<ELF64BE> &, + const ELFFile<ELF64BE>::Elf_Rel &, + ELFFile<ELF64BE>::uintX_t Addend); + +template bool shouldKeepInSymtab<ELF32LE>(const ObjectFile<ELF32LE> &, + StringRef, + const ELFFile<ELF32LE>::Elf_Sym &); +template bool shouldKeepInSymtab<ELF32BE>(const ObjectFile<ELF32BE> &, + StringRef, + const ELFFile<ELF32BE>::Elf_Sym &); +template bool shouldKeepInSymtab<ELF64LE>(const ObjectFile<ELF64LE> &, + StringRef, + const ELFFile<ELF64LE>::Elf_Sym &); +template bool shouldKeepInSymtab<ELF64BE>(const ObjectFile<ELF64BE> &, + StringRef, + const ELFFile<ELF64BE>::Elf_Sym &); +} +} diff --git a/ELF/OutputSections.h b/ELF/OutputSections.h new file mode 100644 index 000000000000..6dca2b570308 --- /dev/null +++ b/ELF/OutputSections.h @@ -0,0 +1,485 @@ +//===- OutputSections.h -----------------------------------------*- C++ -*-===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_ELF_OUTPUT_SECTIONS_H +#define LLD_ELF_OUTPUT_SECTIONS_H + +#include "lld/Core/LLVM.h" + +#include "llvm/ADT/MapVector.h" +#include "llvm/MC/StringTableBuilder.h" +#include "llvm/Object/ELF.h" + +#include "Config.h" + +#include <type_traits> + +namespace lld { +namespace elf2 { + +class SymbolBody; +template <class ELFT> class SymbolTable; +template <class ELFT> class SymbolTableSection; +template <class ELFT> class StringTableSection; +template <class ELFT> class EHInputSection; +template <class ELFT> class InputSection; +template <class ELFT> class InputSectionBase; +template <class ELFT> class MergeInputSection; +template <class ELFT> class MipsReginfoInputSection; +template <class ELFT> class OutputSection; +template <class ELFT> class ObjectFile; +template <class ELFT> class DefinedRegular; + +// Flag to force GOT to be in output if we have relocations +// that relies on its address. +extern bool HasGotOffRel; + +template <class ELFT> +static inline typename llvm::object::ELFFile<ELFT>::uintX_t +getAddend(const typename llvm::object::ELFFile<ELFT>::Elf_Rel &Rel) { + return 0; +} + +template <class ELFT> +static inline typename llvm::object::ELFFile<ELFT>::uintX_t +getAddend(const typename llvm::object::ELFFile<ELFT>::Elf_Rela &Rel) { + return Rel.r_addend; +} + +template <class ELFT> +typename llvm::object::ELFFile<ELFT>::uintX_t getSymVA(const SymbolBody &S); + +template <class ELFT, bool IsRela> +typename llvm::object::ELFFile<ELFT>::uintX_t +getLocalRelTarget(const ObjectFile<ELFT> &File, + const llvm::object::Elf_Rel_Impl<ELFT, IsRela> &Rel, + typename llvm::object::ELFFile<ELFT>::uintX_t Addend); + +bool canBePreempted(const SymbolBody *Body, bool NeedsGot); + +template <class ELFT> +bool shouldKeepInSymtab( + const ObjectFile<ELFT> &File, StringRef Name, + const typename llvm::object::ELFFile<ELFT>::Elf_Sym &Sym); + +// This represents a section in an output file. +// Different sub classes represent different types of sections. Some contain +// input sections, others are created by the linker. +// The writer creates multiple OutputSections and assign them unique, +// non-overlapping file offsets and VAs. +template <class ELFT> class OutputSectionBase { +public: + typedef typename llvm::object::ELFFile<ELFT>::uintX_t uintX_t; + typedef typename llvm::object::ELFFile<ELFT>::Elf_Shdr Elf_Shdr; + + OutputSectionBase(StringRef Name, uint32_t sh_type, uintX_t sh_flags); + void setVA(uintX_t VA) { Header.sh_addr = VA; } + uintX_t getVA() const { return Header.sh_addr; } + void setFileOffset(uintX_t Off) { Header.sh_offset = Off; } + void writeHeaderTo(Elf_Shdr *SHdr); + StringRef getName() { return Name; } + + virtual void addSection(InputSectionBase<ELFT> *C) {} + + unsigned SectionIndex; + + // Returns the size of the section in the output file. + uintX_t getSize() const { return Header.sh_size; } + void setSize(uintX_t Val) { Header.sh_size = Val; } + uintX_t getFlags() { return Header.sh_flags; } + uintX_t getFileOff() { return Header.sh_offset; } + uintX_t getAlign() { + // The ELF spec states that a value of 0 means the section has no alignment + // constraits. + return std::max<uintX_t>(Header.sh_addralign, 1); + } + uint32_t getType() { return Header.sh_type; } + void updateAlign(uintX_t Align) { + if (Align > Header.sh_addralign) + Header.sh_addralign = Align; + } + + virtual void finalize() {} + virtual void writeTo(uint8_t *Buf) = 0; + virtual ~OutputSectionBase() = default; + +protected: + StringRef Name; + Elf_Shdr Header; +}; + +template <class ELFT> class GotSection final : public OutputSectionBase<ELFT> { + typedef OutputSectionBase<ELFT> Base; + typedef typename Base::uintX_t uintX_t; + +public: + GotSection(); + void finalize() override; + void writeTo(uint8_t *Buf) override; + void addEntry(SymbolBody *Sym); + bool addDynTlsEntry(SymbolBody *Sym); + bool addCurrentModuleTlsIndex(); + bool empty() const { return Entries.empty(); } + uintX_t getEntryAddr(const SymbolBody &B) const; + uintX_t getGlobalDynAddr(const SymbolBody &B) const; + uintX_t getNumEntries() const { return Entries.size(); } + + // Returns the symbol which corresponds to the first entry of the global part + // of GOT on MIPS platform. It is required to fill up MIPS-specific dynamic + // table properties. + // Returns nullptr if the global part is empty. + const SymbolBody *getMipsFirstGlobalEntry() const; + + // Returns the number of entries in the local part of GOT including + // the number of reserved entries. This method is MIPS-specific. + unsigned getMipsLocalEntriesNum() const; + + uint32_t getLocalTlsIndexVA() { return Base::getVA() + LocalTlsIndexOff; } + +private: + std::vector<const SymbolBody *> Entries; + uint32_t LocalTlsIndexOff = -1; +}; + +template <class ELFT> +class GotPltSection final : public OutputSectionBase<ELFT> { + typedef typename llvm::object::ELFFile<ELFT>::uintX_t uintX_t; + +public: + GotPltSection(); + void finalize() override; + void writeTo(uint8_t *Buf) override; + void addEntry(SymbolBody *Sym); + bool empty() const; + uintX_t getEntryAddr(const SymbolBody &B) const; + +private: + std::vector<const SymbolBody *> Entries; +}; + +template <class ELFT> class PltSection final : public OutputSectionBase<ELFT> { + typedef OutputSectionBase<ELFT> Base; + typedef typename Base::uintX_t uintX_t; + +public: + PltSection(); + void finalize() override; + void writeTo(uint8_t *Buf) override; + void addEntry(SymbolBody *Sym); + bool empty() const { return Entries.empty(); } + uintX_t getEntryAddr(const SymbolBody &B) const; + +private: + std::vector<std::pair<const SymbolBody *, unsigned>> Entries; +}; + +template <class ELFT> struct DynamicReloc { + typedef typename llvm::object::ELFFile<ELFT>::Elf_Rel Elf_Rel; + InputSectionBase<ELFT> *C; + const Elf_Rel *RI; +}; + +template <class ELFT> +class SymbolTableSection final : public OutputSectionBase<ELFT> { +public: + typedef typename llvm::object::ELFFile<ELFT>::Elf_Shdr Elf_Shdr; + typedef typename llvm::object::ELFFile<ELFT>::Elf_Sym Elf_Sym; + typedef typename llvm::object::ELFFile<ELFT>::Elf_Sym_Range Elf_Sym_Range; + typedef typename llvm::object::ELFFile<ELFT>::uintX_t uintX_t; + SymbolTableSection(SymbolTable<ELFT> &Table, + StringTableSection<ELFT> &StrTabSec); + + void finalize() override; + void writeTo(uint8_t *Buf) override; + void addLocalSymbol(StringRef Name); + void addSymbol(SymbolBody *Body); + StringTableSection<ELFT> &getStrTabSec() const { return StrTabSec; } + unsigned getNumSymbols() const { return NumVisible + 1; } + + ArrayRef<SymbolBody *> getSymbols() const { return Symbols; } + +private: + void writeLocalSymbols(uint8_t *&Buf); + void writeGlobalSymbols(uint8_t *Buf); + + static uint8_t getSymbolBinding(SymbolBody *Body); + + SymbolTable<ELFT> &Table; + StringTableSection<ELFT> &StrTabSec; + std::vector<SymbolBody *> Symbols; + unsigned NumVisible = 0; + unsigned NumLocals = 0; +}; + +template <class ELFT> +class RelocationSection final : public OutputSectionBase<ELFT> { + typedef typename llvm::object::ELFFile<ELFT>::Elf_Rel Elf_Rel; + typedef typename llvm::object::ELFFile<ELFT>::Elf_Rela Elf_Rela; + typedef typename llvm::object::ELFFile<ELFT>::uintX_t uintX_t; + +public: + RelocationSection(StringRef Name, bool IsRela); + void addReloc(const DynamicReloc<ELFT> &Reloc) { Relocs.push_back(Reloc); } + unsigned getRelocOffset(); + void finalize() override; + void writeTo(uint8_t *Buf) override; + bool hasRelocs() const { return !Relocs.empty(); } + bool isRela() const { return IsRela; } + + bool Static = false; + +private: + bool applyTlsDynamicReloc(SymbolBody *Body, uint32_t Type, Elf_Rel *P, + Elf_Rel *N); + + std::vector<DynamicReloc<ELFT>> Relocs; + const bool IsRela; +}; + +template <class ELFT> +class OutputSection final : public OutputSectionBase<ELFT> { +public: + typedef typename llvm::object::ELFFile<ELFT>::Elf_Shdr Elf_Shdr; + typedef typename llvm::object::ELFFile<ELFT>::Elf_Sym Elf_Sym; + typedef typename llvm::object::ELFFile<ELFT>::Elf_Rel Elf_Rel; + typedef typename llvm::object::ELFFile<ELFT>::Elf_Rela Elf_Rela; + typedef typename llvm::object::ELFFile<ELFT>::uintX_t uintX_t; + OutputSection(StringRef Name, uint32_t sh_type, uintX_t sh_flags); + void addSection(InputSectionBase<ELFT> *C) override; + void writeTo(uint8_t *Buf) override; + +private: + std::vector<InputSection<ELFT> *> Sections; +}; + +template <class ELFT> +class MergeOutputSection final : public OutputSectionBase<ELFT> { + typedef typename OutputSectionBase<ELFT>::uintX_t uintX_t; + + bool shouldTailMerge() const; + +public: + MergeOutputSection(StringRef Name, uint32_t sh_type, uintX_t sh_flags); + void addSection(InputSectionBase<ELFT> *S) override; + void writeTo(uint8_t *Buf) override; + unsigned getOffset(StringRef Val); + void finalize() override; + +private: + llvm::StringTableBuilder Builder{llvm::StringTableBuilder::RAW}; +}; + +// FDE or CIE +template <class ELFT> struct EHRegion { + typedef typename llvm::object::ELFFile<ELFT>::uintX_t uintX_t; + EHRegion(EHInputSection<ELFT> *S, unsigned Index); + StringRef data() const; + EHInputSection<ELFT> *S; + unsigned Index; +}; + +template <class ELFT> struct Cie : public EHRegion<ELFT> { + Cie(EHInputSection<ELFT> *S, unsigned Index); + std::vector<EHRegion<ELFT>> Fdes; +}; + +template <class ELFT> +class EHOutputSection final : public OutputSectionBase<ELFT> { +public: + typedef typename llvm::object::ELFFile<ELFT>::uintX_t uintX_t; + typedef typename llvm::object::ELFFile<ELFT>::Elf_Shdr Elf_Shdr; + typedef typename llvm::object::ELFFile<ELFT>::Elf_Rel Elf_Rel; + typedef typename llvm::object::ELFFile<ELFT>::Elf_Rela Elf_Rela; + EHOutputSection(StringRef Name, uint32_t sh_type, uintX_t sh_flags); + void writeTo(uint8_t *Buf) override; + + template <bool IsRela> + void addSectionAux( + EHInputSection<ELFT> *S, + llvm::iterator_range<const llvm::object::Elf_Rel_Impl<ELFT, IsRela> *> + Rels); + + void addSection(InputSectionBase<ELFT> *S) override; + +private: + uintX_t readEntryLength(ArrayRef<uint8_t> D); + + std::vector<EHInputSection<ELFT> *> Sections; + std::vector<Cie<ELFT>> Cies; + + // Maps CIE content + personality to a index in Cies. + llvm::DenseMap<std::pair<StringRef, StringRef>, unsigned> CieMap; +}; + +template <class ELFT> +class InterpSection final : public OutputSectionBase<ELFT> { +public: + InterpSection(); + void writeTo(uint8_t *Buf) override; +}; + +template <class ELFT> +class StringTableSection final : public OutputSectionBase<ELFT> { +public: + typedef typename llvm::object::ELFFile<ELFT>::uintX_t uintX_t; + StringTableSection(StringRef Name, bool Dynamic); + void add(StringRef S) { StrTabBuilder.add(S); } + size_t getOffset(StringRef S) const { return StrTabBuilder.getOffset(S); } + StringRef data() const { return StrTabBuilder.data(); } + void writeTo(uint8_t *Buf) override; + + void finalize() override { + StrTabBuilder.finalize(); + this->Header.sh_size = StrTabBuilder.data().size(); + } + + bool isDynamic() const { return Dynamic; } + +private: + const bool Dynamic; + llvm::StringTableBuilder StrTabBuilder{llvm::StringTableBuilder::ELF}; +}; + +template <class ELFT> +class HashTableSection final : public OutputSectionBase<ELFT> { + typedef typename llvm::object::ELFFile<ELFT>::Elf_Word Elf_Word; + +public: + HashTableSection(); + void finalize() override; + void writeTo(uint8_t *Buf) override; +}; + +// Outputs GNU Hash section. For detailed explanation see: +// https://blogs.oracle.com/ali/entry/gnu_hash_elf_sections +template <class ELFT> +class GnuHashTableSection final : public OutputSectionBase<ELFT> { + typedef typename llvm::object::ELFFile<ELFT>::Elf_Off Elf_Off; + typedef typename llvm::object::ELFFile<ELFT>::Elf_Word Elf_Word; + typedef typename llvm::object::ELFFile<ELFT>::uintX_t uintX_t; + +public: + GnuHashTableSection(); + void finalize() override; + void writeTo(uint8_t *Buf) override; + + // Adds symbols to the hash table. + // Sorts the input to satisfy GNU hash section requirements. + void addSymbols(std::vector<SymbolBody *> &Symbols); + +private: + static unsigned calcNBuckets(unsigned NumHashed); + static unsigned calcMaskWords(unsigned NumHashed); + + void writeHeader(uint8_t *&Buf); + void writeBloomFilter(uint8_t *&Buf); + void writeHashTable(uint8_t *Buf); + + struct HashedSymbolData { + SymbolBody *Body; + uint32_t Hash; + }; + + std::vector<HashedSymbolData> HashedSymbols; + + unsigned MaskWords; + unsigned NBuckets; + unsigned Shift2; +}; + +template <class ELFT> +class DynamicSection final : public OutputSectionBase<ELFT> { + typedef OutputSectionBase<ELFT> Base; + typedef typename llvm::object::ELFFile<ELFT>::Elf_Dyn Elf_Dyn; + typedef typename llvm::object::ELFFile<ELFT>::Elf_Rel Elf_Rel; + typedef typename llvm::object::ELFFile<ELFT>::Elf_Rela Elf_Rela; + typedef typename llvm::object::ELFFile<ELFT>::Elf_Shdr Elf_Shdr; + typedef typename llvm::object::ELFFile<ELFT>::Elf_Sym Elf_Sym; + +public: + DynamicSection(SymbolTable<ELFT> &SymTab); + void finalize() override; + void writeTo(uint8_t *Buf) override; + + OutputSectionBase<ELFT> *PreInitArraySec = nullptr; + OutputSectionBase<ELFT> *InitArraySec = nullptr; + OutputSectionBase<ELFT> *FiniArraySec = nullptr; + +private: + SymbolTable<ELFT> &SymTab; + const SymbolBody *InitSym = nullptr; + const SymbolBody *FiniSym = nullptr; + uint32_t DtFlags = 0; + uint32_t DtFlags1 = 0; +}; + +template <class ELFT> +class MipsReginfoOutputSection final : public OutputSectionBase<ELFT> { + typedef llvm::object::Elf_Mips_RegInfo<ELFT> Elf_Mips_RegInfo; + +public: + MipsReginfoOutputSection(); + void writeTo(uint8_t *Buf) override; + void addSection(InputSectionBase<ELFT> *S) override; + +private: + uint32_t GeneralMask = 0; +}; + +// All output sections that are hadnled by the linker specially are +// globally accessible. Writer initializes them, so don't use them +// until Writer is initialized. +template <class ELFT> struct Out { + typedef typename llvm::object::ELFFile<ELFT>::uintX_t uintX_t; + typedef typename llvm::object::ELFFile<ELFT>::Elf_Phdr Elf_Phdr; + static DynamicSection<ELFT> *Dynamic; + static GnuHashTableSection<ELFT> *GnuHashTab; + static GotPltSection<ELFT> *GotPlt; + static GotSection<ELFT> *Got; + static HashTableSection<ELFT> *HashTab; + static InterpSection<ELFT> *Interp; + static OutputSection<ELFT> *Bss; + static OutputSection<ELFT> *MipsRldMap; + static OutputSectionBase<ELFT> *Opd; + static uint8_t *OpdBuf; + static PltSection<ELFT> *Plt; + static RelocationSection<ELFT> *RelaDyn; + static RelocationSection<ELFT> *RelaPlt; + static StringTableSection<ELFT> *DynStrTab; + static StringTableSection<ELFT> *ShStrTab; + static StringTableSection<ELFT> *StrTab; + static SymbolTableSection<ELFT> *DynSymTab; + static SymbolTableSection<ELFT> *SymTab; + static Elf_Phdr *TlsPhdr; +}; + +template <class ELFT> DynamicSection<ELFT> *Out<ELFT>::Dynamic; +template <class ELFT> GnuHashTableSection<ELFT> *Out<ELFT>::GnuHashTab; +template <class ELFT> GotPltSection<ELFT> *Out<ELFT>::GotPlt; +template <class ELFT> GotSection<ELFT> *Out<ELFT>::Got; +template <class ELFT> HashTableSection<ELFT> *Out<ELFT>::HashTab; +template <class ELFT> InterpSection<ELFT> *Out<ELFT>::Interp; +template <class ELFT> OutputSection<ELFT> *Out<ELFT>::Bss; +template <class ELFT> OutputSection<ELFT> *Out<ELFT>::MipsRldMap; +template <class ELFT> OutputSectionBase<ELFT> *Out<ELFT>::Opd; +template <class ELFT> uint8_t *Out<ELFT>::OpdBuf; +template <class ELFT> PltSection<ELFT> *Out<ELFT>::Plt; +template <class ELFT> RelocationSection<ELFT> *Out<ELFT>::RelaDyn; +template <class ELFT> RelocationSection<ELFT> *Out<ELFT>::RelaPlt; +template <class ELFT> StringTableSection<ELFT> *Out<ELFT>::DynStrTab; +template <class ELFT> StringTableSection<ELFT> *Out<ELFT>::ShStrTab; +template <class ELFT> StringTableSection<ELFT> *Out<ELFT>::StrTab; +template <class ELFT> SymbolTableSection<ELFT> *Out<ELFT>::DynSymTab; +template <class ELFT> SymbolTableSection<ELFT> *Out<ELFT>::SymTab; +template <class ELFT> typename Out<ELFT>::Elf_Phdr *Out<ELFT>::TlsPhdr; + +} // namespace elf2 +} // namespace lld + +#endif // LLD_ELF_OUTPUT_SECTIONS_H diff --git a/ELF/README.md b/ELF/README.md new file mode 100644 index 000000000000..49b8167bbfe0 --- /dev/null +++ b/ELF/README.md @@ -0,0 +1,21 @@ +The New ELF Linker +================== +This directory contains a port of the new PE/COFF linker for ELF. + +Overall Design +-------------- +See COFF/README.md for details on the design. Note that unlike COFF, we do not +distinguish chunks from input sections; they are merged together. + +Capabilities +------------ +This linker can link LLVM and Clang on Linux/x86-64 or FreeBSD/x86-64 +"Hello world" can be linked on Linux/PPC64 and on Linux/AArch64 or +FreeBSD/AArch64. + +Performance +----------- +Achieving good performance is one of our goals. It's too early to reach a +conclusion, but we are optimistic about that as it currently seems to be faster +than GNU gold. It will be interesting to compare when we are close to feature +parity. diff --git a/ELF/SymbolTable.cpp b/ELF/SymbolTable.cpp new file mode 100644 index 000000000000..e022ecd2e90d --- /dev/null +++ b/ELF/SymbolTable.cpp @@ -0,0 +1,267 @@ +//===- SymbolTable.cpp ----------------------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Symbol table is a bag of all known symbols. We put all symbols of +// all input files to the symbol table. The symbol Table is basically +// a hash table with the logic to resolve symbol name conflicts using +// the symbol types. +// +//===----------------------------------------------------------------------===// + +#include "SymbolTable.h" +#include "Config.h" +#include "Error.h" +#include "Symbols.h" + +using namespace llvm; +using namespace llvm::object; +using namespace llvm::ELF; + +using namespace lld; +using namespace lld::elf2; + +template <class ELFT> SymbolTable<ELFT>::SymbolTable() {} + +template <class ELFT> +static void checkCompatibility(InputFile *FileP) { + auto *F = dyn_cast<ELFFileBase<ELFT>>(FileP); + if (!F) + return; + if (F->getELFKind() == Config->EKind && F->getEMachine() == Config->EMachine) + return; + StringRef A = F->getName(); + StringRef B = Config->Emulation; + if (B.empty()) + B = Config->FirstElf->getName(); + error(A + " is incompatible with " + B); +} + +template <class ELFT> +void SymbolTable<ELFT>::addFile(std::unique_ptr<InputFile> File) { + InputFile *FileP = File.get(); + checkCompatibility<ELFT>(FileP); + + // .a file + if (auto *F = dyn_cast<ArchiveFile>(FileP)) { + ArchiveFiles.emplace_back(cast<ArchiveFile>(File.release())); + F->parse(); + for (Lazy &Sym : F->getLazySymbols()) + addLazy(&Sym); + return; + } + + // .so file + if (auto *F = dyn_cast<SharedFile<ELFT>>(FileP)) { + // DSOs are uniquified not by filename but by soname. + F->parseSoName(); + if (!IncludedSoNames.insert(F->getSoName()).second) + return; + + SharedFiles.emplace_back(cast<SharedFile<ELFT>>(File.release())); + F->parse(); + for (SharedSymbol<ELFT> &B : F->getSharedSymbols()) + resolve(&B); + return; + } + + // .o file + auto *F = cast<ObjectFile<ELFT>>(FileP); + ObjectFiles.emplace_back(cast<ObjectFile<ELFT>>(File.release())); + F->parse(Comdats); + for (SymbolBody *B : F->getSymbols()) + resolve(B); +} + +// Add an undefined symbol. +template <class ELFT> +SymbolBody *SymbolTable<ELFT>::addUndefined(StringRef Name) { + auto *Sym = new (Alloc) Undefined(Name, false, STV_DEFAULT, false); + resolve(Sym); + return Sym; +} + +// Add an undefined symbol. Unlike addUndefined, that symbol +// doesn't have to be resolved, thus "opt" (optional). +template <class ELFT> +SymbolBody *SymbolTable<ELFT>::addUndefinedOpt(StringRef Name) { + auto *Sym = new (Alloc) Undefined(Name, false, STV_HIDDEN, true); + resolve(Sym); + return Sym; +} + +template <class ELFT> +void SymbolTable<ELFT>::addAbsolute(StringRef Name, + typename ELFFile<ELFT>::Elf_Sym &ESym) { + resolve(new (Alloc) DefinedRegular<ELFT>(Name, ESym, nullptr)); +} + +template <class ELFT> +void SymbolTable<ELFT>::addSynthetic(StringRef Name, + OutputSectionBase<ELFT> &Section, + typename ELFFile<ELFT>::uintX_t Value) { + auto *Sym = new (Alloc) DefinedSynthetic<ELFT>(Name, Value, Section); + resolve(Sym); +} + +template <class ELFT> +SymbolBody *SymbolTable<ELFT>::addIgnored(StringRef Name) { + auto *Sym = new (Alloc) + DefinedRegular<ELFT>(Name, ElfSym<ELFT>::IgnoreUndef, nullptr); + resolve(Sym); + return Sym; +} + +template <class ELFT> bool SymbolTable<ELFT>::isUndefined(StringRef Name) { + if (SymbolBody *Sym = find(Name)) + return Sym->isUndefined(); + return false; +} + +// Returns a file from which symbol B was created. +// If B does not belong to any file in ObjectFiles, returns a nullptr. +template <class ELFT> +ELFFileBase<ELFT> * +elf2::findFile(ArrayRef<std::unique_ptr<ObjectFile<ELFT>>> ObjectFiles, + const SymbolBody *B) { + for (const std::unique_ptr<ObjectFile<ELFT>> &F : ObjectFiles) { + ArrayRef<SymbolBody *> Syms = F->getSymbols(); + if (std::find(Syms.begin(), Syms.end(), B) != Syms.end()) + return F.get(); + } + return nullptr; +} + +template <class ELFT> +std::string SymbolTable<ELFT>::conflictMsg(SymbolBody *Old, SymbolBody *New) { + ELFFileBase<ELFT> *OldFile = findFile<ELFT>(ObjectFiles, Old); + ELFFileBase<ELFT> *NewFile = findFile<ELFT>(ObjectFiles, New); + + StringRef Sym = Old->getName(); + StringRef F1 = OldFile ? OldFile->getName() : "(internal)"; + StringRef F2 = NewFile ? NewFile->getName() : "(internal)"; + return (Sym + " in " + F1 + " and " + F2).str(); +} + +// This function resolves conflicts if there's an existing symbol with +// the same name. Decisions are made based on symbol type. +template <class ELFT> void SymbolTable<ELFT>::resolve(SymbolBody *New) { + Symbol *Sym = insert(New); + if (Sym->Body == New) + return; + + SymbolBody *Existing = Sym->Body; + + if (Lazy *L = dyn_cast<Lazy>(Existing)) { + if (auto *Undef = dyn_cast<Undefined>(New)) { + addMemberFile(Undef, L); + return; + } + // Found a definition for something also in an archive. + // Ignore the archive definition. + Sym->Body = New; + return; + } + + if (New->isTls() != Existing->isTls()) + error("TLS attribute mismatch for symbol: " + conflictMsg(Existing, New)); + + // compare() returns -1, 0, or 1 if the lhs symbol is less preferable, + // equivalent (conflicting), or more preferable, respectively. + int comp = Existing->compare<ELFT>(New); + if (comp == 0) { + std::string S = "duplicate symbol: " + conflictMsg(Existing, New); + if (!Config->AllowMultipleDefinition) + error(S); + warning(S); + return; + } + if (comp < 0) + Sym->Body = New; +} + +template <class ELFT> Symbol *SymbolTable<ELFT>::insert(SymbolBody *New) { + // Find an existing Symbol or create and insert a new one. + StringRef Name = New->getName(); + Symbol *&Sym = Symtab[Name]; + if (!Sym) + Sym = new (Alloc) Symbol{New}; + New->setBackref(Sym); + return Sym; +} + +template <class ELFT> SymbolBody *SymbolTable<ELFT>::find(StringRef Name) { + auto It = Symtab.find(Name); + if (It == Symtab.end()) + return nullptr; + return It->second->Body; +} + +template <class ELFT> void SymbolTable<ELFT>::addLazy(Lazy *L) { + Symbol *Sym = insert(L); + if (Sym->Body == L) + return; + if (auto *Undef = dyn_cast<Undefined>(Sym->Body)) { + Sym->Body = L; + addMemberFile(Undef, L); + } +} + +template <class ELFT> +void SymbolTable<ELFT>::addMemberFile(Undefined *Undef, Lazy *L) { + // Weak undefined symbols should not fetch members from archives. + // If we were to keep old symbol we would not know that an archive member was + // available if a strong undefined symbol shows up afterwards in the link. + // If a strong undefined symbol never shows up, this lazy symbol will + // get to the end of the link and must be treated as the weak undefined one. + // We set UsedInRegularObj in a similar way to what is done with shared + // symbols and mark it as weak to reduce how many special cases are needed. + if (Undef->isWeak()) { + L->setUsedInRegularObj(); + L->setWeak(); + return; + } + + // Fetch a member file that has the definition for L. + // getMember returns nullptr if the member was already read from the library. + if (std::unique_ptr<InputFile> File = L->getMember()) + addFile(std::move(File)); +} + +// This function takes care of the case in which shared libraries depend on +// the user program (not the other way, which is usual). Shared libraries +// may have undefined symbols, expecting that the user program provides +// the definitions for them. An example is BSD's __progname symbol. +// We need to put such symbols to the main program's .dynsym so that +// shared libraries can find them. +// Except this, we ignore undefined symbols in DSOs. +template <class ELFT> void SymbolTable<ELFT>::scanShlibUndefined() { + for (std::unique_ptr<SharedFile<ELFT>> &File : SharedFiles) + for (StringRef U : File->getUndefinedSymbols()) + if (SymbolBody *Sym = find(U)) + if (Sym->isDefined()) + Sym->setUsedInDynamicReloc(); +} + +template class lld::elf2::SymbolTable<ELF32LE>; +template class lld::elf2::SymbolTable<ELF32BE>; +template class lld::elf2::SymbolTable<ELF64LE>; +template class lld::elf2::SymbolTable<ELF64BE>; + +template ELFFileBase<ELF32LE> * +lld::elf2::findFile(ArrayRef<std::unique_ptr<ObjectFile<ELF32LE>>>, + const SymbolBody *); +template ELFFileBase<ELF32BE> * +lld::elf2::findFile(ArrayRef<std::unique_ptr<ObjectFile<ELF32BE>>>, + const SymbolBody *); +template ELFFileBase<ELF64LE> * +lld::elf2::findFile(ArrayRef<std::unique_ptr<ObjectFile<ELF64LE>>>, + const SymbolBody *); +template ELFFileBase<ELF64BE> * +lld::elf2::findFile(ArrayRef<std::unique_ptr<ObjectFile<ELF64BE>>>, + const SymbolBody *); diff --git a/ELF/SymbolTable.h b/ELF/SymbolTable.h new file mode 100644 index 000000000000..aa905e027d48 --- /dev/null +++ b/ELF/SymbolTable.h @@ -0,0 +1,98 @@ +//===- SymbolTable.h --------------------------------------------*- C++ -*-===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_ELF_SYMBOL_TABLE_H +#define LLD_ELF_SYMBOL_TABLE_H + +#include "InputFiles.h" +#include "llvm/ADT/MapVector.h" + +namespace lld { +namespace elf2 { +class Lazy; +template <class ELFT> class OutputSectionBase; +struct Symbol; +class Undefined; + +// SymbolTable is a bucket of all known symbols, including defined, +// undefined, or lazy symbols (the last one is symbols in archive +// files whose archive members are not yet loaded). +// +// We put all symbols of all files to a SymbolTable, and the +// SymbolTable selects the "best" symbols if there are name +// conflicts. For example, obviously, a defined symbol is better than +// an undefined symbol. Or, if there's a conflict between a lazy and a +// undefined, it'll read an archive member to read a real definition +// to replace the lazy symbol. The logic is implemented in resolve(). +template <class ELFT> class SymbolTable { +public: + SymbolTable(); + + void addFile(std::unique_ptr<InputFile> File); + + const llvm::MapVector<StringRef, Symbol *> &getSymbols() const { + return Symtab; + } + + const std::vector<std::unique_ptr<ObjectFile<ELFT>>> &getObjectFiles() const { + return ObjectFiles; + } + + const std::vector<std::unique_ptr<SharedFile<ELFT>>> &getSharedFiles() const { + return SharedFiles; + } + + SymbolBody *addUndefined(StringRef Name); + SymbolBody *addUndefinedOpt(StringRef Name); + void addAbsolute(StringRef Name, + typename llvm::object::ELFFile<ELFT>::Elf_Sym &ESym); + void addSynthetic(StringRef Name, OutputSectionBase<ELFT> &Section, + typename llvm::object::ELFFile<ELFT>::uintX_t Value); + SymbolBody *addIgnored(StringRef Name); + bool isUndefined(StringRef Name); + void scanShlibUndefined(); + SymbolBody *find(StringRef Name); + +private: + Symbol *insert(SymbolBody *New); + void addLazy(Lazy *New); + void addMemberFile(Undefined *Undef, Lazy *L); + void resolve(SymbolBody *Body); + std::string conflictMsg(SymbolBody *Old, SymbolBody *New); + + std::vector<std::unique_ptr<ArchiveFile>> ArchiveFiles; + + // The order the global symbols are in is not defined. We can use an arbitrary + // order, but it has to be reproducible. That is true even when cross linking. + // The default hashing of StringRef produces different results on 32 and 64 + // bit systems so we use a MapVector. That is arbitrary, deterministic but + // a bit inefficient. + // FIXME: Experiment with passing in a custom hashing or sorting the symbols + // once symbol resolution is finished. + llvm::MapVector<StringRef, Symbol *> Symtab; + llvm::BumpPtrAllocator Alloc; + + llvm::DenseSet<StringRef> Comdats; + + // The writer needs to infer the machine type from the object files. + std::vector<std::unique_ptr<ObjectFile<ELFT>>> ObjectFiles; + + std::vector<std::unique_ptr<SharedFile<ELFT>>> SharedFiles; + llvm::DenseSet<StringRef> IncludedSoNames; +}; + +template <class ELFT> +ELFFileBase<ELFT> * +findFile(ArrayRef<std::unique_ptr<ObjectFile<ELFT>>> ObjectFiles, + const SymbolBody *B); + +} // namespace elf2 +} // namespace lld + +#endif diff --git a/ELF/Symbols.cpp b/ELF/Symbols.cpp new file mode 100644 index 000000000000..f8d585242a86 --- /dev/null +++ b/ELF/Symbols.cpp @@ -0,0 +1,148 @@ +//===- Symbols.cpp --------------------------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "Symbols.h" +#include "InputSection.h" +#include "Error.h" +#include "InputFiles.h" + +#include "llvm/ADT/STLExtras.h" + +using namespace llvm; +using namespace llvm::object; +using namespace llvm::ELF; + +using namespace lld; +using namespace lld::elf2; + +static uint8_t getMinVisibility(uint8_t VA, uint8_t VB) { + if (VA == STV_DEFAULT) + return VB; + if (VB == STV_DEFAULT) + return VA; + return std::min(VA, VB); +} + +// Returns 1, 0 or -1 if this symbol should take precedence +// over the Other, tie or lose, respectively. +template <class ELFT> int SymbolBody::compare(SymbolBody *Other) { + typedef typename ELFFile<ELFT>::uintX_t uintX_t; + assert(!isLazy() && !Other->isLazy()); + std::pair<bool, bool> L(isDefined(), !isWeak()); + std::pair<bool, bool> R(Other->isDefined(), !Other->isWeak()); + + // Normalize + if (L > R) + return -Other->compare<ELFT>(this); + + Visibility = Other->Visibility = + getMinVisibility(Visibility, Other->Visibility); + + if (IsUsedInRegularObj || Other->IsUsedInRegularObj) + IsUsedInRegularObj = Other->IsUsedInRegularObj = true; + + if (L != R) + return -1; + if (!L.first || !L.second) + return 1; + if (isShared()) + return -1; + if (Other->isShared()) + return 1; + if (isCommon()) { + if (!Other->isCommon()) + return -1; + auto *ThisC = cast<DefinedCommon>(this); + auto *OtherC = cast<DefinedCommon>(Other); + uintX_t Align = std::max(ThisC->MaxAlignment, OtherC->MaxAlignment); + if (ThisC->Size >= OtherC->Size) { + ThisC->MaxAlignment = Align; + return 1; + } + OtherC->MaxAlignment = Align; + return -1; + } + if (Other->isCommon()) + return 1; + return 0; +} + +Defined::Defined(Kind K, StringRef Name, bool IsWeak, uint8_t Visibility, + bool IsTls) + : SymbolBody(K, Name, IsWeak, Visibility, IsTls) {} + +Undefined::Undefined(SymbolBody::Kind K, StringRef N, bool IsWeak, + uint8_t Visibility, bool IsTls) + : SymbolBody(K, N, IsWeak, Visibility, IsTls), CanKeepUndefined(false) {} + +Undefined::Undefined(StringRef N, bool IsWeak, uint8_t Visibility, + bool CanKeepUndefined) + : Undefined(SymbolBody::UndefinedKind, N, IsWeak, Visibility, + /*IsTls*/ false) { + this->CanKeepUndefined = CanKeepUndefined; +} + +template <typename ELFT> +UndefinedElf<ELFT>::UndefinedElf(StringRef N, const Elf_Sym &Sym) + : Undefined(SymbolBody::UndefinedElfKind, N, + Sym.getBinding() == llvm::ELF::STB_WEAK, Sym.getVisibility(), + Sym.getType() == llvm::ELF::STT_TLS), + Sym(Sym) {} + +template <typename ELFT> +DefinedSynthetic<ELFT>::DefinedSynthetic(StringRef N, uintX_t Value, + OutputSectionBase<ELFT> &Section) + : Defined(SymbolBody::DefinedSyntheticKind, N, false, STV_DEFAULT, false), + Value(Value), Section(Section) {} + +DefinedCommon::DefinedCommon(StringRef N, uint64_t Size, uint64_t Alignment, + bool IsWeak, uint8_t Visibility) + : Defined(SymbolBody::DefinedCommonKind, N, IsWeak, Visibility, false) { + MaxAlignment = Alignment; + this->Size = Size; +} + +std::unique_ptr<InputFile> Lazy::getMember() { + MemoryBufferRef MBRef = File->getMember(&Sym); + + // getMember returns an empty buffer if the member was already + // read from the library. + if (MBRef.getBuffer().empty()) + return std::unique_ptr<InputFile>(nullptr); + + return createELFFile<ObjectFile>(MBRef); +} + +template <class ELFT> static void doInitSymbols() { + ElfSym<ELFT>::End.setBinding(STB_GLOBAL); + ElfSym<ELFT>::IgnoreUndef.setBinding(STB_WEAK); + ElfSym<ELFT>::IgnoreUndef.setVisibility(STV_HIDDEN); +} + +void lld::elf2::initSymbols() { + doInitSymbols<ELF32LE>(); + doInitSymbols<ELF32BE>(); + doInitSymbols<ELF64LE>(); + doInitSymbols<ELF64BE>(); +} + +template int SymbolBody::compare<ELF32LE>(SymbolBody *Other); +template int SymbolBody::compare<ELF32BE>(SymbolBody *Other); +template int SymbolBody::compare<ELF64LE>(SymbolBody *Other); +template int SymbolBody::compare<ELF64BE>(SymbolBody *Other); + +template class lld::elf2::UndefinedElf<ELF32LE>; +template class lld::elf2::UndefinedElf<ELF32BE>; +template class lld::elf2::UndefinedElf<ELF64LE>; +template class lld::elf2::UndefinedElf<ELF64BE>; + +template class lld::elf2::DefinedSynthetic<ELF32LE>; +template class lld::elf2::DefinedSynthetic<ELF32BE>; +template class lld::elf2::DefinedSynthetic<ELF64LE>; +template class lld::elf2::DefinedSynthetic<ELF64BE>; diff --git a/ELF/Symbols.h b/ELF/Symbols.h new file mode 100644 index 000000000000..8556452dbb13 --- /dev/null +++ b/ELF/Symbols.h @@ -0,0 +1,327 @@ +//===- Symbols.h ------------------------------------------------*- C++ -*-===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// All symbols are handled as SymbolBodies regardless of their types. +// This file defines various types of SymbolBodies. +// +// File-scope symbols in ELF objects are the only exception of SymbolBody +// instantiation. We will never create SymbolBodies for them for performance +// reason. They are often represented as nullptrs. This is fine for symbol +// resolution because the symbol table naturally cares only about +// externally-visible symbols. For relocations, you have to deal with both +// local and non-local functions, and we have two different functions +// where we need them. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_ELF_SYMBOLS_H +#define LLD_ELF_SYMBOLS_H + +#include "InputSection.h" + +#include "lld/Core/LLVM.h" +#include "llvm/Object/Archive.h" +#include "llvm/Object/ELF.h" + +namespace lld { +namespace elf2 { + +class ArchiveFile; +class InputFile; +class SymbolBody; +template <class ELFT> class ObjectFile; +template <class ELFT> class OutputSection; +template <class ELFT> class OutputSectionBase; +template <class ELFT> class SharedFile; + +// Initializes global objects defined in this file. +// Called at the beginning of main(). +void initSymbols(); + +// A real symbol object, SymbolBody, is usually accessed indirectly +// through a Symbol. There's always one Symbol for each symbol name. +// The resolver updates SymbolBody pointers as it resolves symbols. +struct Symbol { + SymbolBody *Body; +}; + +// The base class for real symbol classes. +class SymbolBody { +public: + enum Kind { + DefinedFirst, + DefinedRegularKind = DefinedFirst, + SharedKind, + DefinedElfLast = SharedKind, + DefinedCommonKind, + DefinedSyntheticKind, + DefinedLast = DefinedSyntheticKind, + UndefinedElfKind, + UndefinedKind, + LazyKind + }; + + Kind kind() const { return static_cast<Kind>(SymbolKind); } + + bool isWeak() const { return IsWeak; } + bool isUndefined() const { + return SymbolKind == UndefinedKind || SymbolKind == UndefinedElfKind; + } + bool isDefined() const { return SymbolKind <= DefinedLast; } + bool isCommon() const { return SymbolKind == DefinedCommonKind; } + bool isLazy() const { return SymbolKind == LazyKind; } + bool isShared() const { return SymbolKind == SharedKind; } + bool isUsedInRegularObj() const { return IsUsedInRegularObj; } + bool isUsedInDynamicReloc() const { return IsUsedInDynamicReloc; } + void setUsedInDynamicReloc() { IsUsedInDynamicReloc = true; } + bool isTls() const { return IsTls; } + + // Returns the symbol name. + StringRef getName() const { return Name; } + + uint8_t getVisibility() const { return Visibility; } + + unsigned DynamicSymbolTableIndex = 0; + uint32_t GlobalDynIndex = -1; + uint32_t GotIndex = -1; + uint32_t GotPltIndex = -1; + uint32_t PltIndex = -1; + bool hasGlobalDynIndex() { return GlobalDynIndex != uint32_t(-1); } + bool isInGot() const { return GotIndex != -1U; } + bool isInGotPlt() const { return GotPltIndex != -1U; } + bool isInPlt() const { return PltIndex != -1U; } + + // A SymbolBody has a backreference to a Symbol. Originally they are + // doubly-linked. A backreference will never change. But the pointer + // in the Symbol may be mutated by the resolver. If you have a + // pointer P to a SymbolBody and are not sure whether the resolver + // has chosen the object among other objects having the same name, + // you can access P->Backref->Body to get the resolver's result. + void setBackref(Symbol *P) { Backref = P; } + SymbolBody *repl() { return Backref ? Backref->Body : this; } + + // Decides which symbol should "win" in the symbol table, this or + // the Other. Returns 1 if this wins, -1 if the Other wins, or 0 if + // they are duplicate (conflicting) symbols. + template <class ELFT> int compare(SymbolBody *Other); + +protected: + SymbolBody(Kind K, StringRef Name, bool IsWeak, uint8_t Visibility, + bool IsTls) + : SymbolKind(K), IsWeak(IsWeak), Visibility(Visibility), IsTls(IsTls), + Name(Name) { + IsUsedInRegularObj = K != SharedKind && K != LazyKind; + IsUsedInDynamicReloc = 0; + } + + const unsigned SymbolKind : 8; + unsigned IsWeak : 1; + unsigned Visibility : 2; + + // True if the symbol was used for linking and thus need to be + // added to the output file's symbol table. It is usually true, + // but if it is a shared symbol that were not referenced by anyone, + // it can be false. + unsigned IsUsedInRegularObj : 1; + + // If true, the symbol is added to .dynsym symbol table. + unsigned IsUsedInDynamicReloc : 1; + + unsigned IsTls : 1; + StringRef Name; + Symbol *Backref = nullptr; +}; + +// The base class for any defined symbols. +class Defined : public SymbolBody { +public: + Defined(Kind K, StringRef Name, bool IsWeak, uint8_t Visibility, bool IsTls); + static bool classof(const SymbolBody *S) { return S->isDefined(); } +}; + +// Any defined symbol from an ELF file. +template <class ELFT> class DefinedElf : public Defined { +protected: + typedef typename llvm::object::ELFFile<ELFT>::Elf_Sym Elf_Sym; + +public: + DefinedElf(Kind K, StringRef N, const Elf_Sym &Sym) + : Defined(K, N, Sym.getBinding() == llvm::ELF::STB_WEAK, + Sym.getVisibility(), Sym.getType() == llvm::ELF::STT_TLS), + Sym(Sym) {} + + const Elf_Sym &Sym; + static bool classof(const SymbolBody *S) { + return S->kind() <= DefinedElfLast; + } +}; + +class DefinedCommon : public Defined { +public: + DefinedCommon(StringRef N, uint64_t Size, uint64_t Alignment, bool IsWeak, + uint8_t Visibility); + + static bool classof(const SymbolBody *S) { + return S->kind() == SymbolBody::DefinedCommonKind; + } + + // The output offset of this common symbol in the output bss. Computed by the + // writer. + uint64_t OffsetInBSS; + + // The maximum alignment we have seen for this symbol. + uint64_t MaxAlignment; + + uint64_t Size; +}; + +// Regular defined symbols read from object file symbol tables. +template <class ELFT> class DefinedRegular : public DefinedElf<ELFT> { + typedef typename llvm::object::ELFFile<ELFT>::Elf_Sym Elf_Sym; + +public: + DefinedRegular(StringRef N, const Elf_Sym &Sym, + InputSectionBase<ELFT> *Section) + : DefinedElf<ELFT>(SymbolBody::DefinedRegularKind, N, Sym), + Section(Section) {} + + static bool classof(const SymbolBody *S) { + return S->kind() == SymbolBody::DefinedRegularKind; + } + + // If this is null, the symbol is absolute. + InputSectionBase<ELFT> *Section; +}; + +// DefinedSynthetic is a class to represent linker-generated ELF symbols. +// The difference from the regular symbol is that DefinedSynthetic symbols +// don't belong to any input files or sections. Thus, its constructor +// takes an output section to calculate output VA, etc. +template <class ELFT> class DefinedSynthetic : public Defined { +public: + typedef typename llvm::object::ELFFile<ELFT>::Elf_Sym Elf_Sym; + typedef typename llvm::object::ELFFile<ELFT>::uintX_t uintX_t; + DefinedSynthetic(StringRef N, uintX_t Value, + OutputSectionBase<ELFT> &Section); + + static bool classof(const SymbolBody *S) { + return S->kind() == SymbolBody::DefinedSyntheticKind; + } + + uintX_t Value; + const OutputSectionBase<ELFT> &Section; +}; + +// Undefined symbol. +class Undefined : public SymbolBody { + typedef SymbolBody::Kind Kind; + bool CanKeepUndefined; + +protected: + Undefined(Kind K, StringRef N, bool IsWeak, uint8_t Visibility, bool IsTls); + +public: + Undefined(StringRef N, bool IsWeak, uint8_t Visibility, + bool CanKeepUndefined); + + static bool classof(const SymbolBody *S) { return S->isUndefined(); } + + bool canKeepUndefined() const { return CanKeepUndefined; } +}; + +template <class ELFT> class UndefinedElf : public Undefined { + typedef typename llvm::object::ELFFile<ELFT>::Elf_Sym Elf_Sym; + +public: + UndefinedElf(StringRef N, const Elf_Sym &Sym); + const Elf_Sym &Sym; + + static bool classof(const SymbolBody *S) { + return S->kind() == SymbolBody::UndefinedElfKind; + } +}; + +template <class ELFT> class SharedSymbol : public DefinedElf<ELFT> { + typedef typename llvm::object::ELFFile<ELFT>::Elf_Sym Elf_Sym; + typedef typename llvm::object::ELFFile<ELFT>::uintX_t uintX_t; + +public: + static bool classof(const SymbolBody *S) { + return S->kind() == SymbolBody::SharedKind; + } + + SharedSymbol(SharedFile<ELFT> *F, StringRef Name, const Elf_Sym &Sym) + : DefinedElf<ELFT>(SymbolBody::SharedKind, Name, Sym), File(F) {} + + SharedFile<ELFT> *File; + + // True if the linker has to generate a copy relocation for this shared + // symbol. OffsetInBSS is significant only when NeedsCopy is true. + bool NeedsCopy = false; + uintX_t OffsetInBSS = 0; +}; + +// This class represents a symbol defined in an archive file. It is +// created from an archive file header, and it knows how to load an +// object file from an archive to replace itself with a defined +// symbol. If the resolver finds both Undefined and Lazy for +// the same name, it will ask the Lazy to load a file. +class Lazy : public SymbolBody { +public: + Lazy(ArchiveFile *F, const llvm::object::Archive::Symbol S) + : SymbolBody(LazyKind, S.getName(), false, llvm::ELF::STV_DEFAULT, false), + File(F), Sym(S) {} + + static bool classof(const SymbolBody *S) { return S->kind() == LazyKind; } + + // Returns an object file for this symbol, or a nullptr if the file + // was already returned. + std::unique_ptr<InputFile> getMember(); + + void setWeak() { IsWeak = true; } + void setUsedInRegularObj() { IsUsedInRegularObj = true; } + +private: + ArchiveFile *File; + const llvm::object::Archive::Symbol Sym; +}; + +// Some linker-generated symbols need to be created as +// DefinedRegular symbols, so they need Elf_Sym symbols. +// Here we allocate such Elf_Sym symbols statically. +template <class ELFT> struct ElfSym { + typedef typename llvm::object::ELFFile<ELFT>::Elf_Sym Elf_Sym; + + // Used to represent an undefined symbol which we don't want + // to add to the output file's symbol table. + static Elf_Sym IgnoreUndef; + + // The content for _end and end symbols. + static Elf_Sym End; + + // The content for _gp symbol for MIPS target. + static Elf_Sym MipsGp; + + // __rel_iplt_start/__rel_iplt_end for signaling + // where R_[*]_IRELATIVE relocations do live. + static Elf_Sym RelaIpltStart; + static Elf_Sym RelaIpltEnd; +}; + +template <class ELFT> typename ElfSym<ELFT>::Elf_Sym ElfSym<ELFT>::IgnoreUndef; +template <class ELFT> typename ElfSym<ELFT>::Elf_Sym ElfSym<ELFT>::End; +template <class ELFT> typename ElfSym<ELFT>::Elf_Sym ElfSym<ELFT>::MipsGp; +template <class ELFT> +typename ElfSym<ELFT>::Elf_Sym ElfSym<ELFT>::RelaIpltStart; +template <class ELFT> typename ElfSym<ELFT>::Elf_Sym ElfSym<ELFT>::RelaIpltEnd; + +} // namespace elf2 +} // namespace lld + +#endif diff --git a/ELF/Target.cpp b/ELF/Target.cpp new file mode 100644 index 000000000000..8d848d040c61 --- /dev/null +++ b/ELF/Target.cpp @@ -0,0 +1,1481 @@ +//===- Target.cpp ---------------------------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Machine-specific things, such as applying relocations, creation of +// GOT or PLT entries, etc., are handled in this file. +// +// Refer the ELF spec for the single letter varaibles, S, A or P, used +// in this file. SA is S+A. +// +//===----------------------------------------------------------------------===// + +#include "Target.h" +#include "Error.h" +#include "OutputSections.h" +#include "Symbols.h" + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/Object/ELF.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/ELF.h" + +using namespace llvm; +using namespace llvm::object; +using namespace llvm::support::endian; +using namespace llvm::ELF; + +namespace lld { +namespace elf2 { + +std::unique_ptr<TargetInfo> Target; + +template <endianness E> static void add32(void *P, int32_t V) { + write32<E>(P, read32<E>(P) + V); +} + +static void add32le(uint8_t *P, int32_t V) { add32<support::little>(P, V); } +static void or32le(uint8_t *P, int32_t V) { write32le(P, read32le(P) | V); } + +template <unsigned N> static void checkInt(int64_t V, uint32_t Type) { + if (isInt<N>(V)) + return; + StringRef S = getELFRelocationTypeName(Config->EMachine, Type); + error("Relocation " + S + " out of range"); +} + +template <unsigned N> static void checkUInt(uint64_t V, uint32_t Type) { + if (isUInt<N>(V)) + return; + StringRef S = getELFRelocationTypeName(Config->EMachine, Type); + error("Relocation " + S + " out of range"); +} + +template <unsigned N> static void checkIntUInt(uint64_t V, uint32_t Type) { + if (isInt<N>(V) || isUInt<N>(V)) + return; + StringRef S = getELFRelocationTypeName(Config->EMachine, Type); + error("Relocation " + S + " out of range"); +} + +template <unsigned N> static void checkAlignment(uint64_t V, uint32_t Type) { + if ((V & (N - 1)) == 0) + return; + StringRef S = getELFRelocationTypeName(Config->EMachine, Type); + error("Improper alignment for relocation " + S); +} + +template <class ELFT> bool isGnuIFunc(const SymbolBody &S) { + if (auto *SS = dyn_cast<DefinedElf<ELFT>>(&S)) + return SS->Sym.getType() == STT_GNU_IFUNC; + return false; +} + +template bool isGnuIFunc<ELF32LE>(const SymbolBody &S); +template bool isGnuIFunc<ELF32BE>(const SymbolBody &S); +template bool isGnuIFunc<ELF64LE>(const SymbolBody &S); +template bool isGnuIFunc<ELF64BE>(const SymbolBody &S); + +namespace { +class X86TargetInfo final : public TargetInfo { +public: + X86TargetInfo(); + void writeGotPltHeaderEntries(uint8_t *Buf) const override; + unsigned getDynReloc(unsigned Type) const override; + unsigned getTlsGotReloc(unsigned Type) const override; + bool isTlsDynReloc(unsigned Type, const SymbolBody &S) const override; + void writeGotPltEntry(uint8_t *Buf, uint64_t Plt) const override; + void writePltZeroEntry(uint8_t *Buf, uint64_t GotEntryAddr, + uint64_t PltEntryAddr) const override; + void writePltEntry(uint8_t *Buf, uint64_t GotAddr, uint64_t GotEntryAddr, + uint64_t PltEntryAddr, int32_t Index, + unsigned RelOff) const override; + bool needsCopyRel(uint32_t Type, const SymbolBody &S) const override; + bool relocNeedsDynRelative(unsigned Type) const override; + bool relocNeedsGot(uint32_t Type, const SymbolBody &S) const override; + bool relocNeedsPlt(uint32_t Type, const SymbolBody &S) const override; + void relocateOne(uint8_t *Loc, uint8_t *BufEnd, uint32_t Type, uint64_t P, + uint64_t SA, uint64_t ZA = 0, + uint8_t *PairedLoc = nullptr) const override; + bool isTlsOptimized(unsigned Type, const SymbolBody *S) const override; + unsigned relocateTlsOptimize(uint8_t *Loc, uint8_t *BufEnd, uint32_t Type, + uint64_t P, uint64_t SA, + const SymbolBody &S) const override; + bool isGotRelative(uint32_t Type) const override; + +private: + void relocateTlsLdToLe(uint8_t *Loc, uint8_t *BufEnd, uint64_t P, + uint64_t SA) const; + void relocateTlsGdToIe(uint8_t *Loc, uint8_t *BufEnd, uint64_t P, + uint64_t SA) const; + void relocateTlsGdToLe(uint8_t *Loc, uint8_t *BufEnd, uint64_t P, + uint64_t SA) const; + void relocateTlsIeToLe(unsigned Type, uint8_t *Loc, uint8_t *BufEnd, + uint64_t P, uint64_t SA) const; +}; + +class X86_64TargetInfo final : public TargetInfo { +public: + X86_64TargetInfo(); + unsigned getPltRefReloc(unsigned Type) const override; + bool isTlsDynReloc(unsigned Type, const SymbolBody &S) const override; + void writeGotPltHeaderEntries(uint8_t *Buf) const override; + void writeGotPltEntry(uint8_t *Buf, uint64_t Plt) const override; + void writePltZeroEntry(uint8_t *Buf, uint64_t GotEntryAddr, + uint64_t PltEntryAddr) const override; + void writePltEntry(uint8_t *Buf, uint64_t GotAddr, uint64_t GotEntryAddr, + uint64_t PltEntryAddr, int32_t Index, + unsigned RelOff) const override; + bool needsCopyRel(uint32_t Type, const SymbolBody &S) const override; + bool relocNeedsGot(uint32_t Type, const SymbolBody &S) const override; + bool relocNeedsPlt(uint32_t Type, const SymbolBody &S) const override; + void relocateOne(uint8_t *Loc, uint8_t *BufEnd, uint32_t Type, uint64_t P, + uint64_t SA, uint64_t ZA = 0, + uint8_t *PairedLoc = nullptr) const override; + bool isRelRelative(uint32_t Type) const override; + bool isTlsOptimized(unsigned Type, const SymbolBody *S) const override; + bool isSizeDynReloc(uint32_t Type, const SymbolBody &S) const override; + unsigned relocateTlsOptimize(uint8_t *Loc, uint8_t *BufEnd, uint32_t Type, + uint64_t P, uint64_t SA, + const SymbolBody &S) const override; + +private: + void relocateTlsLdToLe(uint8_t *Loc, uint8_t *BufEnd, uint64_t P, + uint64_t SA) const; + void relocateTlsGdToLe(uint8_t *Loc, uint8_t *BufEnd, uint64_t P, + uint64_t SA) const; + void relocateTlsGdToIe(uint8_t *Loc, uint8_t *BufEnd, uint64_t P, + uint64_t SA) const; + void relocateTlsIeToLe(uint8_t *Loc, uint8_t *BufEnd, uint64_t P, + uint64_t SA) const; +}; + +class PPC64TargetInfo final : public TargetInfo { +public: + PPC64TargetInfo(); + void writeGotPltEntry(uint8_t *Buf, uint64_t Plt) const override; + void writePltZeroEntry(uint8_t *Buf, uint64_t GotEntryAddr, + uint64_t PltEntryAddr) const override; + void writePltEntry(uint8_t *Buf, uint64_t GotAddr, uint64_t GotEntryAddr, + uint64_t PltEntryAddr, int32_t Index, + unsigned RelOff) const override; + bool relocNeedsGot(uint32_t Type, const SymbolBody &S) const override; + bool relocNeedsPlt(uint32_t Type, const SymbolBody &S) const override; + void relocateOne(uint8_t *Loc, uint8_t *BufEnd, uint32_t Type, uint64_t P, + uint64_t SA, uint64_t ZA = 0, + uint8_t *PairedLoc = nullptr) const override; + bool isRelRelative(uint32_t Type) const override; +}; + +class AArch64TargetInfo final : public TargetInfo { +public: + AArch64TargetInfo(); + unsigned getDynReloc(unsigned Type) const override; + unsigned getPltRefReloc(unsigned Type) const override; + void writeGotPltEntry(uint8_t *Buf, uint64_t Plt) const override; + void writePltZeroEntry(uint8_t *Buf, uint64_t GotEntryAddr, + uint64_t PltEntryAddr) const override; + void writePltEntry(uint8_t *Buf, uint64_t GotAddr, uint64_t GotEntryAddr, + uint64_t PltEntryAddr, int32_t Index, + unsigned RelOff) const override; + bool needsCopyRel(uint32_t Type, const SymbolBody &S) const override; + bool relocNeedsGot(uint32_t Type, const SymbolBody &S) const override; + bool relocNeedsPlt(uint32_t Type, const SymbolBody &S) const override; + void relocateOne(uint8_t *Loc, uint8_t *BufEnd, uint32_t Type, uint64_t P, + uint64_t SA, uint64_t ZA = 0, + uint8_t *PairedLoc = nullptr) const override; +}; + +template <class ELFT> class MipsTargetInfo final : public TargetInfo { +public: + MipsTargetInfo(); + void writeGotHeaderEntries(uint8_t *Buf) const override; + void writeGotPltEntry(uint8_t *Buf, uint64_t Plt) const override; + void writePltZeroEntry(uint8_t *Buf, uint64_t GotEntryAddr, + uint64_t PltEntryAddr) const override; + void writePltEntry(uint8_t *Buf, uint64_t GotAddr, uint64_t GotEntryAddr, + uint64_t PltEntryAddr, int32_t Index, + unsigned RelOff) const override; + bool relocNeedsGot(uint32_t Type, const SymbolBody &S) const override; + bool relocNeedsPlt(uint32_t Type, const SymbolBody &S) const override; + void relocateOne(uint8_t *Loc, uint8_t *BufEnd, uint32_t Type, uint64_t P, + uint64_t SA, uint64_t ZA = 0, + uint8_t *PairedLoc = nullptr) const override; + bool isRelRelative(uint32_t Type) const override; +}; +} // anonymous namespace + +TargetInfo *createTarget() { + switch (Config->EMachine) { + case EM_386: + return new X86TargetInfo(); + case EM_AARCH64: + return new AArch64TargetInfo(); + case EM_MIPS: + switch (Config->EKind) { + case ELF32LEKind: + return new MipsTargetInfo<ELF32LE>(); + case ELF32BEKind: + return new MipsTargetInfo<ELF32BE>(); + default: + error("Unsupported MIPS target"); + } + case EM_PPC64: + return new PPC64TargetInfo(); + case EM_X86_64: + return new X86_64TargetInfo(); + } + error("Unknown target machine"); +} + +TargetInfo::~TargetInfo() {} + +bool TargetInfo::isTlsOptimized(unsigned Type, const SymbolBody *S) const { + return false; +} + +uint64_t TargetInfo::getVAStart() const { return Config->Shared ? 0 : VAStart; } + +bool TargetInfo::needsCopyRel(uint32_t Type, const SymbolBody &S) const { + return false; +} + +bool TargetInfo::isGotRelative(uint32_t Type) const { return false; } + +unsigned TargetInfo::getPltRefReloc(unsigned Type) const { return PCRelReloc; } + +bool TargetInfo::isRelRelative(uint32_t Type) const { return true; } + +bool TargetInfo::isSizeDynReloc(uint32_t Type, const SymbolBody &S) const { + return false; +} + +unsigned TargetInfo::relocateTlsOptimize(uint8_t *Loc, uint8_t *BufEnd, + uint32_t Type, uint64_t P, uint64_t SA, + const SymbolBody &S) const { + return 0; +} + +void TargetInfo::writeGotHeaderEntries(uint8_t *Buf) const {} + +void TargetInfo::writeGotPltHeaderEntries(uint8_t *Buf) const {} + +X86TargetInfo::X86TargetInfo() { + CopyReloc = R_386_COPY; + PCRelReloc = R_386_PC32; + GotReloc = R_386_GLOB_DAT; + PltReloc = R_386_JUMP_SLOT; + IRelativeReloc = R_386_IRELATIVE; + RelativeReloc = R_386_RELATIVE; + TlsGotReloc = R_386_TLS_TPOFF; + TlsGlobalDynamicReloc = R_386_TLS_GD; + TlsLocalDynamicReloc = R_386_TLS_LDM; + TlsModuleIndexReloc = R_386_TLS_DTPMOD32; + TlsOffsetReloc = R_386_TLS_DTPOFF32; + LazyRelocations = true; + PltEntrySize = 16; + PltZeroEntrySize = 16; +} + +void X86TargetInfo::writeGotPltHeaderEntries(uint8_t *Buf) const { + write32le(Buf, Out<ELF32LE>::Dynamic->getVA()); +} + +void X86TargetInfo::writeGotPltEntry(uint8_t *Buf, uint64_t Plt) const { + // Skip 6 bytes of "pushl (GOT+4)" + write32le(Buf, Plt + 6); +} + +unsigned X86TargetInfo::getDynReloc(unsigned Type) const { + if (Type == R_386_TLS_LE) + return R_386_TLS_TPOFF; + if (Type == R_386_TLS_LE_32) + return R_386_TLS_TPOFF32; + return Type; +} + +unsigned X86TargetInfo::getTlsGotReloc(unsigned Type) const { + if (Type == R_386_TLS_IE) + return Type; + return TlsGotReloc; +} + +bool X86TargetInfo::isTlsDynReloc(unsigned Type, const SymbolBody &S) const { + if (Type == R_386_TLS_LE || Type == R_386_TLS_LE_32 || + Type == R_386_TLS_GOTIE) + return Config->Shared; + if (Type == R_386_TLS_IE) + return canBePreempted(&S, true); + return Type == R_386_TLS_GD; +} + +void X86TargetInfo::writePltZeroEntry(uint8_t *Buf, uint64_t GotEntryAddr, + uint64_t PltEntryAddr) const { + // Executable files and shared object files have + // separate procedure linkage tables. + if (Config->Shared) { + const uint8_t V[] = { + 0xff, 0xb3, 0x04, 0x00, 0x00, 0x00, // pushl 4(%ebx + 0xff, 0xa3, 0x08, 0x00, 0x00, 0x00, // jmp *8(%ebx) + 0x90, 0x90, 0x90, 0x90 // nop;nop;nop;nop + }; + memcpy(Buf, V, sizeof(V)); + return; + } + + const uint8_t PltData[] = { + 0xff, 0x35, 0x00, 0x00, 0x00, 0x00, // pushl (GOT+4) + 0xff, 0x25, 0x00, 0x00, 0x00, 0x00, // jmp *(GOT+8) + 0x90, 0x90, 0x90, 0x90 // nop;nop;nop;nop + }; + memcpy(Buf, PltData, sizeof(PltData)); + write32le(Buf + 2, GotEntryAddr + 4); // GOT+4 + write32le(Buf + 8, GotEntryAddr + 8); // GOT+8 +} + +void X86TargetInfo::writePltEntry(uint8_t *Buf, uint64_t GotAddr, + uint64_t GotEntryAddr, uint64_t PltEntryAddr, + int32_t Index, unsigned RelOff) const { + const uint8_t Inst[] = { + 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, // jmp *foo_in_GOT|*foo@GOT(%ebx) + 0x68, 0x00, 0x00, 0x00, 0x00, // pushl $reloc_offset + 0xe9, 0x00, 0x00, 0x00, 0x00 // jmp .PLT0@PC + }; + memcpy(Buf, Inst, sizeof(Inst)); + // jmp *foo@GOT(%ebx) or jmp *foo_in_GOT + Buf[1] = Config->Shared ? 0xa3 : 0x25; + write32le(Buf + 2, Config->Shared ? (GotEntryAddr - GotAddr) : GotEntryAddr); + write32le(Buf + 7, RelOff); + write32le(Buf + 12, -Index * PltEntrySize - PltZeroEntrySize - 16); +} + +bool X86TargetInfo::needsCopyRel(uint32_t Type, const SymbolBody &S) const { + if (Type == R_386_32 || Type == R_386_16 || Type == R_386_8) + if (auto *SS = dyn_cast<SharedSymbol<ELF32LE>>(&S)) + return SS->Sym.getType() == STT_OBJECT; + return false; +} + +bool X86TargetInfo::relocNeedsGot(uint32_t Type, const SymbolBody &S) const { + if (S.isTls() && Type == R_386_TLS_GD) + return Target->isTlsOptimized(Type, &S) && canBePreempted(&S, true); + if (Type == R_386_TLS_GOTIE || Type == R_386_TLS_IE) + return !isTlsOptimized(Type, &S); + return Type == R_386_GOT32 || relocNeedsPlt(Type, S); +} + +bool X86TargetInfo::relocNeedsPlt(uint32_t Type, const SymbolBody &S) const { + return isGnuIFunc<ELF32LE>(S) || + (Type == R_386_PLT32 && canBePreempted(&S, true)) || + (Type == R_386_PC32 && S.isShared()); +} + +bool X86TargetInfo::isGotRelative(uint32_t Type) const { + // This relocation does not require got entry, + // but it is relative to got and needs it to be created. + // Here we request for that. + return Type == R_386_GOTOFF; +} + +void X86TargetInfo::relocateOne(uint8_t *Loc, uint8_t *BufEnd, uint32_t Type, + uint64_t P, uint64_t SA, uint64_t ZA, + uint8_t *PairedLoc) const { + switch (Type) { + case R_386_32: + add32le(Loc, SA); + break; + case R_386_GOT32: + case R_386_GOTOFF: + add32le(Loc, SA - Out<ELF32LE>::Got->getVA()); + break; + case R_386_GOTPC: + add32le(Loc, SA + Out<ELF32LE>::Got->getVA() - P); + break; + case R_386_PC32: + case R_386_PLT32: + add32le(Loc, SA - P); + break; + case R_386_TLS_GD: + case R_386_TLS_LDM: + case R_386_TLS_TPOFF: { + uint64_t V = SA - Out<ELF32LE>::Got->getVA() - + Out<ELF32LE>::Got->getNumEntries() * 4; + checkInt<32>(V, Type); + write32le(Loc, V); + break; + } + case R_386_TLS_IE: + case R_386_TLS_LDO_32: + write32le(Loc, SA); + break; + case R_386_TLS_LE: + write32le(Loc, SA - Out<ELF32LE>::TlsPhdr->p_memsz); + break; + case R_386_TLS_LE_32: + write32le(Loc, Out<ELF32LE>::TlsPhdr->p_memsz - SA); + break; + default: + error("unrecognized reloc " + Twine(Type)); + } +} + +bool X86TargetInfo::isTlsOptimized(unsigned Type, const SymbolBody *S) const { + if (Config->Shared || (S && !S->isTls())) + return false; + return Type == R_386_TLS_LDO_32 || Type == R_386_TLS_LDM || + Type == R_386_TLS_GD || + (Type == R_386_TLS_IE && !canBePreempted(S, true)) || + (Type == R_386_TLS_GOTIE && !canBePreempted(S, true)); +} + +bool X86TargetInfo::relocNeedsDynRelative(unsigned Type) const { + return Config->Shared && Type == R_386_TLS_IE; +} + +unsigned X86TargetInfo::relocateTlsOptimize(uint8_t *Loc, uint8_t *BufEnd, + uint32_t Type, uint64_t P, + uint64_t SA, + const SymbolBody &S) const { + switch (Type) { + case R_386_TLS_GD: + if (canBePreempted(&S, true)) + relocateTlsGdToIe(Loc, BufEnd, P, SA); + else + relocateTlsGdToLe(Loc, BufEnd, P, SA); + // The next relocation should be against __tls_get_addr, so skip it + return 1; + case R_386_TLS_GOTIE: + case R_386_TLS_IE: + relocateTlsIeToLe(Type, Loc, BufEnd, P, SA); + return 0; + case R_386_TLS_LDM: + relocateTlsLdToLe(Loc, BufEnd, P, SA); + // The next relocation should be against __tls_get_addr, so skip it + return 1; + case R_386_TLS_LDO_32: + relocateOne(Loc, BufEnd, R_386_TLS_LE, P, SA); + return 0; + } + llvm_unreachable("Unknown TLS optimization"); +} + +// "Ulrich Drepper, ELF Handling For Thread-Local Storage" (5.1 +// IA-32 Linker Optimizations, http://www.akkadia.org/drepper/tls.pdf) shows +// how GD can be optimized to IE: +// leal x@tlsgd(, %ebx, 1), +// call __tls_get_addr@plt +// Is converted to: +// movl %gs:0, %eax +// addl x@gotntpoff(%ebx), %eax +void X86TargetInfo::relocateTlsGdToIe(uint8_t *Loc, uint8_t *BufEnd, uint64_t P, + uint64_t SA) const { + const uint8_t Inst[] = { + 0x65, 0xa1, 0x00, 0x00, 0x00, 0x00, // movl %gs:0, %eax + 0x03, 0x83, 0x00, 0x00, 0x00, 0x00 // addl 0(%ebx), %eax + }; + memcpy(Loc - 3, Inst, sizeof(Inst)); + relocateOne(Loc + 5, BufEnd, R_386_32, P, + SA - Out<ELF32LE>::Got->getVA() - + Out<ELF32LE>::Got->getNumEntries() * 4); +} + +// GD can be optimized to LE: +// leal x@tlsgd(, %ebx, 1), +// call __tls_get_addr@plt +// Can be converted to: +// movl %gs:0,%eax +// addl $x@ntpoff,%eax +// But gold emits subl $foo@tpoff,%eax instead of addl. +// These instructions are completely equal in behavior. +// This method generates subl to be consistent with gold. +void X86TargetInfo::relocateTlsGdToLe(uint8_t *Loc, uint8_t *BufEnd, uint64_t P, + uint64_t SA) const { + const uint8_t Inst[] = { + 0x65, 0xa1, 0x00, 0x00, 0x00, 0x00, // movl %gs:0, %eax + 0x81, 0xe8, 0x00, 0x00, 0x00, 0x00 // subl 0(%ebx), %eax + }; + memcpy(Loc - 3, Inst, sizeof(Inst)); + relocateOne(Loc + 5, BufEnd, R_386_32, P, + Out<ELF32LE>::TlsPhdr->p_memsz - SA); +} + +// LD can be optimized to LE: +// leal foo(%reg),%eax +// call ___tls_get_addr +// Is converted to: +// movl %gs:0,%eax +// nop +// leal 0(%esi,1),%esi +void X86TargetInfo::relocateTlsLdToLe(uint8_t *Loc, uint8_t *BufEnd, uint64_t P, + uint64_t SA) const { + const uint8_t Inst[] = { + 0x65, 0xa1, 0x00, 0x00, 0x00, 0x00, // movl %gs:0,%eax + 0x90, // nop + 0x8d, 0x74, 0x26, 0x00 // leal 0(%esi,1),%esi + }; + memcpy(Loc - 2, Inst, sizeof(Inst)); +} + +// In some conditions, relocations can be optimized to avoid using GOT. +// This function does that for Initial Exec to Local Exec case. +// Read "ELF Handling For Thread-Local Storage, 5.1 +// IA-32 Linker Optimizations" (http://www.akkadia.org/drepper/tls.pdf) +// by Ulrich Drepper for details. +void X86TargetInfo::relocateTlsIeToLe(unsigned Type, uint8_t *Loc, + uint8_t *BufEnd, uint64_t P, + uint64_t SA) const { + // Ulrich's document section 6.2 says that @gotntpoff can + // be used with MOVL or ADDL instructions. + // @indntpoff is similar to @gotntpoff, but for use in + // position dependent code. + uint8_t *Inst = Loc - 2; + uint8_t *Op = Loc - 1; + uint8_t Reg = (Loc[-1] >> 3) & 7; + bool IsMov = *Inst == 0x8b; + if (Type == R_386_TLS_IE) { + // For R_386_TLS_IE relocation we perform the next transformations: + // MOVL foo@INDNTPOFF,%EAX is transformed to MOVL $foo,%EAX + // MOVL foo@INDNTPOFF,%REG is transformed to MOVL $foo,%REG + // ADDL foo@INDNTPOFF,%REG is transformed to ADDL $foo,%REG + // First one is special because when EAX is used the sequence is 5 bytes + // long, otherwise it is 6 bytes. + if (*Op == 0xa1) { + *Op = 0xb8; + } else { + *Inst = IsMov ? 0xc7 : 0x81; + *Op = 0xc0 | ((*Op >> 3) & 7); + } + } else { + // R_386_TLS_GOTIE relocation can be optimized to + // R_386_TLS_LE so that it does not use GOT. + // "MOVL foo@GOTTPOFF(%RIP), %REG" is transformed to "MOVL $foo, %REG". + // "ADDL foo@GOTNTPOFF(%RIP), %REG" is transformed to "LEAL foo(%REG), %REG" + // Note: gold converts to ADDL instead of LEAL. + *Inst = IsMov ? 0xc7 : 0x8d; + if (IsMov) + *Op = 0xc0 | ((*Op >> 3) & 7); + else + *Op = 0x80 | Reg | (Reg << 3); + } + relocateOne(Loc, BufEnd, R_386_TLS_LE, P, SA); +} + +X86_64TargetInfo::X86_64TargetInfo() { + CopyReloc = R_X86_64_COPY; + PCRelReloc = R_X86_64_PC32; + GotReloc = R_X86_64_GLOB_DAT; + PltReloc = R_X86_64_JUMP_SLOT; + RelativeReloc = R_X86_64_RELATIVE; + IRelativeReloc = R_X86_64_IRELATIVE; + TlsGotReloc = R_X86_64_TPOFF64; + TlsLocalDynamicReloc = R_X86_64_TLSLD; + TlsGlobalDynamicReloc = R_X86_64_TLSGD; + TlsModuleIndexReloc = R_X86_64_DTPMOD64; + TlsOffsetReloc = R_X86_64_DTPOFF64; + LazyRelocations = true; + PltEntrySize = 16; + PltZeroEntrySize = 16; +} + +void X86_64TargetInfo::writeGotPltHeaderEntries(uint8_t *Buf) const { + write64le(Buf, Out<ELF64LE>::Dynamic->getVA()); +} + +void X86_64TargetInfo::writeGotPltEntry(uint8_t *Buf, uint64_t Plt) const { + // Skip 6 bytes of "jmpq *got(%rip)" + write32le(Buf, Plt + 6); +} + +void X86_64TargetInfo::writePltZeroEntry(uint8_t *Buf, uint64_t GotEntryAddr, + uint64_t PltEntryAddr) const { + const uint8_t PltData[] = { + 0xff, 0x35, 0x00, 0x00, 0x00, 0x00, // pushq GOT+8(%rip) + 0xff, 0x25, 0x00, 0x00, 0x00, 0x00, // jmp *GOT+16(%rip) + 0x0f, 0x1f, 0x40, 0x00 // nopl 0x0(rax) + }; + memcpy(Buf, PltData, sizeof(PltData)); + write32le(Buf + 2, GotEntryAddr - PltEntryAddr + 2); // GOT+8 + write32le(Buf + 8, GotEntryAddr - PltEntryAddr + 4); // GOT+16 +} + +void X86_64TargetInfo::writePltEntry(uint8_t *Buf, uint64_t GotAddr, + uint64_t GotEntryAddr, + uint64_t PltEntryAddr, int32_t Index, + unsigned RelOff) const { + const uint8_t Inst[] = { + 0xff, 0x25, 0x00, 0x00, 0x00, 0x00, // jmpq *got(%rip) + 0x68, 0x00, 0x00, 0x00, 0x00, // pushq <relocation index> + 0xe9, 0x00, 0x00, 0x00, 0x00 // jmpq plt[0] + }; + memcpy(Buf, Inst, sizeof(Inst)); + + write32le(Buf + 2, GotEntryAddr - PltEntryAddr - 6); + write32le(Buf + 7, Index); + write32le(Buf + 12, -Index * PltEntrySize - PltZeroEntrySize - 16); +} + +bool X86_64TargetInfo::needsCopyRel(uint32_t Type, const SymbolBody &S) const { + if (Type == R_X86_64_32S || Type == R_X86_64_32 || Type == R_X86_64_PC32 || + Type == R_X86_64_64) + if (auto *SS = dyn_cast<SharedSymbol<ELF64LE>>(&S)) + return SS->Sym.getType() == STT_OBJECT; + return false; +} + +bool X86_64TargetInfo::relocNeedsGot(uint32_t Type, const SymbolBody &S) const { + if (Type == R_X86_64_TLSGD) + return Target->isTlsOptimized(Type, &S) && canBePreempted(&S, true); + if (Type == R_X86_64_GOTTPOFF) + return !isTlsOptimized(Type, &S); + return Type == R_X86_64_GOTPCREL || relocNeedsPlt(Type, S); +} + +bool X86_64TargetInfo::isTlsDynReloc(unsigned Type, const SymbolBody &S) const { + return Type == R_X86_64_GOTTPOFF || Type == R_X86_64_TLSGD; +} + +unsigned X86_64TargetInfo::getPltRefReloc(unsigned Type) const { + if (Type == R_X86_64_PLT32) + return R_X86_64_PC32; + return Type; +} + +bool X86_64TargetInfo::relocNeedsPlt(uint32_t Type, const SymbolBody &S) const { + if (needsCopyRel(Type, S)) + return false; + if (isGnuIFunc<ELF64LE>(S)) + return true; + + switch (Type) { + default: + return false; + case R_X86_64_32: + case R_X86_64_64: + case R_X86_64_PC32: + // This relocation is defined to have a value of (S + A - P). + // The problems start when a non PIC program calls a function in a shared + // library. + // In an ideal world, we could just report an error saying the relocation + // can overflow at runtime. + // In the real world with glibc, crt1.o has a R_X86_64_PC32 pointing to + // libc.so. + // + // The general idea on how to handle such cases is to create a PLT entry + // and use that as the function value. + // + // For the static linking part, we just return true and everything else + // will use the the PLT entry as the address. + // + // The remaining (unimplemented) problem is making sure pointer equality + // still works. We need the help of the dynamic linker for that. We + // let it know that we have a direct reference to a so symbol by creating + // an undefined symbol with a non zero st_value. Seeing that, the + // dynamic linker resolves the symbol to the value of the symbol we created. + // This is true even for got entries, so pointer equality is maintained. + // To avoid an infinite loop, the only entry that points to the + // real function is a dedicated got entry used by the plt. That is + // identified by special relocation types (R_X86_64_JUMP_SLOT, + // R_386_JMP_SLOT, etc). + return S.isShared(); + case R_X86_64_PLT32: + return canBePreempted(&S, true); + } +} + +bool X86_64TargetInfo::isRelRelative(uint32_t Type) const { + switch (Type) { + default: + return false; + case R_X86_64_DTPOFF32: + case R_X86_64_DTPOFF64: + case R_X86_64_PC8: + case R_X86_64_PC16: + case R_X86_64_PC32: + case R_X86_64_PC64: + case R_X86_64_PLT32: + case R_X86_64_SIZE32: + case R_X86_64_SIZE64: + return true; + } +} + +bool X86_64TargetInfo::isSizeDynReloc(uint32_t Type, + const SymbolBody &S) const { + return (Type == R_X86_64_SIZE32 || Type == R_X86_64_SIZE64) && + canBePreempted(&S, false); +} + +bool X86_64TargetInfo::isTlsOptimized(unsigned Type, + const SymbolBody *S) const { + if (Config->Shared || (S && !S->isTls())) + return false; + return Type == R_X86_64_TLSGD || Type == R_X86_64_TLSLD || + Type == R_X86_64_DTPOFF32 || + (Type == R_X86_64_GOTTPOFF && !canBePreempted(S, true)); +} + +// "Ulrich Drepper, ELF Handling For Thread-Local Storage" (5.5 +// x86-x64 linker optimizations, http://www.akkadia.org/drepper/tls.pdf) shows +// how LD can be optimized to LE: +// leaq bar@tlsld(%rip), %rdi +// callq __tls_get_addr@PLT +// leaq bar@dtpoff(%rax), %rcx +// Is converted to: +// .word 0x6666 +// .byte 0x66 +// mov %fs:0,%rax +// leaq bar@tpoff(%rax), %rcx +void X86_64TargetInfo::relocateTlsLdToLe(uint8_t *Loc, uint8_t *BufEnd, + uint64_t P, uint64_t SA) const { + const uint8_t Inst[] = { + 0x66, 0x66, //.word 0x6666 + 0x66, //.byte 0x66 + 0x64, 0x48, 0x8b, 0x04, 0x25, 0x00, 0x00, 0x00, 0x00 // mov %fs:0,%rax + }; + memcpy(Loc - 3, Inst, sizeof(Inst)); +} + +// "Ulrich Drepper, ELF Handling For Thread-Local Storage" (5.5 +// x86-x64 linker optimizations, http://www.akkadia.org/drepper/tls.pdf) shows +// how GD can be optimized to LE: +// .byte 0x66 +// leaq x@tlsgd(%rip), %rdi +// .word 0x6666 +// rex64 +// call __tls_get_addr@plt +// Is converted to: +// mov %fs:0x0,%rax +// lea x@tpoff,%rax +void X86_64TargetInfo::relocateTlsGdToLe(uint8_t *Loc, uint8_t *BufEnd, + uint64_t P, uint64_t SA) const { + const uint8_t Inst[] = { + 0x64, 0x48, 0x8b, 0x04, 0x25, 0x00, 0x00, 0x00, 0x00, // mov %fs:0x0,%rax + 0x48, 0x8d, 0x80, 0x00, 0x00, 0x00, 0x00 // lea x@tpoff,%rax + }; + memcpy(Loc - 4, Inst, sizeof(Inst)); + relocateOne(Loc + 8, BufEnd, R_X86_64_TPOFF32, P, SA); +} + +// "Ulrich Drepper, ELF Handling For Thread-Local Storage" (5.5 +// x86-x64 linker optimizations, http://www.akkadia.org/drepper/tls.pdf) shows +// how GD can be optimized to IE: +// .byte 0x66 +// leaq x@tlsgd(%rip), %rdi +// .word 0x6666 +// rex64 +// call __tls_get_addr@plt +// Is converted to: +// mov %fs:0x0,%rax +// addq x@tpoff,%rax +void X86_64TargetInfo::relocateTlsGdToIe(uint8_t *Loc, uint8_t *BufEnd, + uint64_t P, uint64_t SA) const { + const uint8_t Inst[] = { + 0x64, 0x48, 0x8b, 0x04, 0x25, 0x00, 0x00, 0x00, 0x00, // mov %fs:0x0,%rax + 0x48, 0x03, 0x05, 0x00, 0x00, 0x00, 0x00 // addq x@tpoff,%rax + }; + memcpy(Loc - 4, Inst, sizeof(Inst)); + relocateOne(Loc + 8, BufEnd, R_X86_64_TPOFF64, P + 12, SA); +} + +// In some conditions, R_X86_64_GOTTPOFF relocation can be optimized to +// R_X86_64_TPOFF32 so that it does not use GOT. +// This function does that. Read "ELF Handling For Thread-Local Storage, +// 5.5 x86-x64 linker optimizations" (http://www.akkadia.org/drepper/tls.pdf) +// by Ulrich Drepper for details. +void X86_64TargetInfo::relocateTlsIeToLe(uint8_t *Loc, uint8_t *BufEnd, + uint64_t P, uint64_t SA) const { + // Ulrich's document section 6.5 says that @gottpoff(%rip) must be + // used in MOVQ or ADDQ instructions only. + // "MOVQ foo@GOTTPOFF(%RIP), %REG" is transformed to "MOVQ $foo, %REG". + // "ADDQ foo@GOTTPOFF(%RIP), %REG" is transformed to "LEAQ foo(%REG), %REG" + // (if the register is not RSP/R12) or "ADDQ $foo, %RSP". + // Opcodes info can be found at http://ref.x86asm.net/coder64.html#x48. + uint8_t *Prefix = Loc - 3; + uint8_t *Inst = Loc - 2; + uint8_t *RegSlot = Loc - 1; + uint8_t Reg = Loc[-1] >> 3; + bool IsMov = *Inst == 0x8b; + bool RspAdd = !IsMov && Reg == 4; + // r12 and rsp registers requires special handling. + // Problem is that for other registers, for example leaq 0xXXXXXXXX(%r11),%r11 + // result out is 7 bytes: 4d 8d 9b XX XX XX XX, + // but leaq 0xXXXXXXXX(%r12),%r12 is 8 bytes: 4d 8d a4 24 XX XX XX XX. + // The same true for rsp. So we convert to addq for them, saving 1 byte that + // we dont have. + if (RspAdd) + *Inst = 0x81; + else + *Inst = IsMov ? 0xc7 : 0x8d; + if (*Prefix == 0x4c) + *Prefix = (IsMov || RspAdd) ? 0x49 : 0x4d; + *RegSlot = (IsMov || RspAdd) ? (0xc0 | Reg) : (0x80 | Reg | (Reg << 3)); + relocateOne(Loc, BufEnd, R_X86_64_TPOFF32, P, SA); +} + +// This function applies a TLS relocation with an optimization as described +// in the Ulrich's document. As a result of rewriting instructions at the +// relocation target, relocations immediately follow the TLS relocation (which +// would be applied to rewritten instructions) may have to be skipped. +// This function returns a number of relocations that need to be skipped. +unsigned X86_64TargetInfo::relocateTlsOptimize(uint8_t *Loc, uint8_t *BufEnd, + uint32_t Type, uint64_t P, + uint64_t SA, + const SymbolBody &S) const { + switch (Type) { + case R_X86_64_DTPOFF32: + relocateOne(Loc, BufEnd, R_X86_64_TPOFF32, P, SA); + return 0; + case R_X86_64_GOTTPOFF: + relocateTlsIeToLe(Loc, BufEnd, P, SA); + return 0; + case R_X86_64_TLSGD: { + if (canBePreempted(&S, true)) + relocateTlsGdToIe(Loc, BufEnd, P, SA); + else + relocateTlsGdToLe(Loc, BufEnd, P, SA); + // The next relocation should be against __tls_get_addr, so skip it + return 1; + } + case R_X86_64_TLSLD: + relocateTlsLdToLe(Loc, BufEnd, P, SA); + // The next relocation should be against __tls_get_addr, so skip it + return 1; + } + llvm_unreachable("Unknown TLS optimization"); +} + +void X86_64TargetInfo::relocateOne(uint8_t *Loc, uint8_t *BufEnd, uint32_t Type, + uint64_t P, uint64_t SA, uint64_t ZA, + uint8_t *PairedLoc) const { + switch (Type) { + case R_X86_64_32: + checkUInt<32>(SA, Type); + write32le(Loc, SA); + break; + case R_X86_64_32S: + checkInt<32>(SA, Type); + write32le(Loc, SA); + break; + case R_X86_64_64: + write64le(Loc, SA); + break; + case R_X86_64_DTPOFF32: + write32le(Loc, SA); + break; + case R_X86_64_DTPOFF64: + write64le(Loc, SA); + break; + case R_X86_64_GOTPCREL: + case R_X86_64_PC32: + case R_X86_64_PLT32: + case R_X86_64_TLSGD: + case R_X86_64_TLSLD: + write32le(Loc, SA - P); + break; + case R_X86_64_SIZE32: + write32le(Loc, ZA); + break; + case R_X86_64_SIZE64: + write64le(Loc, ZA); + break; + case R_X86_64_TPOFF32: { + uint64_t Val = SA - Out<ELF64LE>::TlsPhdr->p_memsz; + checkInt<32>(Val, Type); + write32le(Loc, Val); + break; + } + case R_X86_64_TPOFF64: + write32le(Loc, SA - P); + break; + default: + error("unrecognized reloc " + Twine(Type)); + } +} + +// Relocation masks following the #lo(value), #hi(value), #ha(value), +// #higher(value), #highera(value), #highest(value), and #highesta(value) +// macros defined in section 4.5.1. Relocation Types of the PPC-elf64abi +// document. +static uint16_t applyPPCLo(uint64_t V) { return V; } +static uint16_t applyPPCHi(uint64_t V) { return V >> 16; } +static uint16_t applyPPCHa(uint64_t V) { return (V + 0x8000) >> 16; } +static uint16_t applyPPCHigher(uint64_t V) { return V >> 32; } +static uint16_t applyPPCHighera(uint64_t V) { return (V + 0x8000) >> 32; } +static uint16_t applyPPCHighest(uint64_t V) { return V >> 48; } +static uint16_t applyPPCHighesta(uint64_t V) { return (V + 0x8000) >> 48; } + +PPC64TargetInfo::PPC64TargetInfo() { + PCRelReloc = R_PPC64_REL24; + GotReloc = R_PPC64_GLOB_DAT; + RelativeReloc = R_PPC64_RELATIVE; + PltEntrySize = 32; + + // We need 64K pages (at least under glibc/Linux, the loader won't + // set different permissions on a finer granularity than that). + PageSize = 65536; + + // The PPC64 ELF ABI v1 spec, says: + // + // It is normally desirable to put segments with different characteristics + // in separate 256 Mbyte portions of the address space, to give the + // operating system full paging flexibility in the 64-bit address space. + // + // And because the lowest non-zero 256M boundary is 0x10000000, PPC64 linkers + // use 0x10000000 as the starting address. + VAStart = 0x10000000; +} + +uint64_t getPPC64TocBase() { + // The TOC consists of sections .got, .toc, .tocbss, .plt in that + // order. The TOC starts where the first of these sections starts. + + // FIXME: This obviously does not do the right thing when there is no .got + // section, but there is a .toc or .tocbss section. + uint64_t TocVA = Out<ELF64BE>::Got->getVA(); + if (!TocVA) + TocVA = Out<ELF64BE>::Plt->getVA(); + + // Per the ppc64-elf-linux ABI, The TOC base is TOC value plus 0x8000 + // thus permitting a full 64 Kbytes segment. Note that the glibc startup + // code (crt1.o) assumes that you can get from the TOC base to the + // start of the .toc section with only a single (signed) 16-bit relocation. + return TocVA + 0x8000; +} + +void PPC64TargetInfo::writeGotPltEntry(uint8_t *Buf, uint64_t Plt) const {} +void PPC64TargetInfo::writePltZeroEntry(uint8_t *Buf, uint64_t GotEntryAddr, + uint64_t PltEntryAddr) const {} +void PPC64TargetInfo::writePltEntry(uint8_t *Buf, uint64_t GotAddr, + uint64_t GotEntryAddr, + uint64_t PltEntryAddr, int32_t Index, + unsigned RelOff) const { + uint64_t Off = GotEntryAddr - getPPC64TocBase(); + + // FIXME: What we should do, in theory, is get the offset of the function + // descriptor in the .opd section, and use that as the offset from %r2 (the + // TOC-base pointer). Instead, we have the GOT-entry offset, and that will + // be a pointer to the function descriptor in the .opd section. Using + // this scheme is simpler, but requires an extra indirection per PLT dispatch. + + write32be(Buf, 0xf8410028); // std %r2, 40(%r1) + write32be(Buf + 4, 0x3d620000 | applyPPCHa(Off)); // addis %r11, %r2, X@ha + write32be(Buf + 8, 0xe98b0000 | applyPPCLo(Off)); // ld %r12, X@l(%r11) + write32be(Buf + 12, 0xe96c0000); // ld %r11,0(%r12) + write32be(Buf + 16, 0x7d6903a6); // mtctr %r11 + write32be(Buf + 20, 0xe84c0008); // ld %r2,8(%r12) + write32be(Buf + 24, 0xe96c0010); // ld %r11,16(%r12) + write32be(Buf + 28, 0x4e800420); // bctr +} + +bool PPC64TargetInfo::relocNeedsGot(uint32_t Type, const SymbolBody &S) const { + if (relocNeedsPlt(Type, S)) + return true; + + switch (Type) { + default: return false; + case R_PPC64_GOT16: + case R_PPC64_GOT16_DS: + case R_PPC64_GOT16_HA: + case R_PPC64_GOT16_HI: + case R_PPC64_GOT16_LO: + case R_PPC64_GOT16_LO_DS: + return true; + } +} + +bool PPC64TargetInfo::relocNeedsPlt(uint32_t Type, const SymbolBody &S) const { + // These are function calls that need to be redirected through a PLT stub. + return Type == R_PPC64_REL24 && canBePreempted(&S, false); +} + +bool PPC64TargetInfo::isRelRelative(uint32_t Type) const { + switch (Type) { + default: + return true; + case R_PPC64_ADDR64: + case R_PPC64_TOC: + return false; + } +} + +void PPC64TargetInfo::relocateOne(uint8_t *Loc, uint8_t *BufEnd, uint32_t Type, + uint64_t P, uint64_t SA, uint64_t ZA, + uint8_t *PairedLoc) const { + uint64_t TB = getPPC64TocBase(); + + // For a TOC-relative relocation, adjust the addend and proceed in terms of + // the corresponding ADDR16 relocation type. + switch (Type) { + case R_PPC64_TOC16: Type = R_PPC64_ADDR16; SA -= TB; break; + case R_PPC64_TOC16_DS: Type = R_PPC64_ADDR16_DS; SA -= TB; break; + case R_PPC64_TOC16_HA: Type = R_PPC64_ADDR16_HA; SA -= TB; break; + case R_PPC64_TOC16_HI: Type = R_PPC64_ADDR16_HI; SA -= TB; break; + case R_PPC64_TOC16_LO: Type = R_PPC64_ADDR16_LO; SA -= TB; break; + case R_PPC64_TOC16_LO_DS: Type = R_PPC64_ADDR16_LO_DS; SA -= TB; break; + default: break; + } + + switch (Type) { + case R_PPC64_ADDR14: { + checkAlignment<4>(SA, Type); + // Preserve the AA/LK bits in the branch instruction + uint8_t AALK = Loc[3]; + write16be(Loc + 2, (AALK & 3) | (SA & 0xfffc)); + break; + } + case R_PPC64_ADDR16: + checkInt<16>(SA, Type); + write16be(Loc, SA); + break; + case R_PPC64_ADDR16_DS: + checkInt<16>(SA, Type); + write16be(Loc, (read16be(Loc) & 3) | (SA & ~3)); + break; + case R_PPC64_ADDR16_HA: + write16be(Loc, applyPPCHa(SA)); + break; + case R_PPC64_ADDR16_HI: + write16be(Loc, applyPPCHi(SA)); + break; + case R_PPC64_ADDR16_HIGHER: + write16be(Loc, applyPPCHigher(SA)); + break; + case R_PPC64_ADDR16_HIGHERA: + write16be(Loc, applyPPCHighera(SA)); + break; + case R_PPC64_ADDR16_HIGHEST: + write16be(Loc, applyPPCHighest(SA)); + break; + case R_PPC64_ADDR16_HIGHESTA: + write16be(Loc, applyPPCHighesta(SA)); + break; + case R_PPC64_ADDR16_LO: + write16be(Loc, applyPPCLo(SA)); + break; + case R_PPC64_ADDR16_LO_DS: + write16be(Loc, (read16be(Loc) & 3) | (applyPPCLo(SA) & ~3)); + break; + case R_PPC64_ADDR32: + checkInt<32>(SA, Type); + write32be(Loc, SA); + break; + case R_PPC64_ADDR64: + write64be(Loc, SA); + break; + case R_PPC64_REL16_HA: + write16be(Loc, applyPPCHa(SA - P)); + break; + case R_PPC64_REL16_HI: + write16be(Loc, applyPPCHi(SA - P)); + break; + case R_PPC64_REL16_LO: + write16be(Loc, applyPPCLo(SA - P)); + break; + case R_PPC64_REL24: { + // If we have an undefined weak symbol, we might get here with a symbol + // address of zero. That could overflow, but the code must be unreachable, + // so don't bother doing anything at all. + if (!SA) + break; + + uint64_t PltStart = Out<ELF64BE>::Plt->getVA(); + uint64_t PltEnd = PltStart + Out<ELF64BE>::Plt->getSize(); + bool InPlt = PltStart <= SA && SA < PltEnd; + + if (!InPlt && Out<ELF64BE>::Opd) { + // If this is a local call, and we currently have the address of a + // function-descriptor, get the underlying code address instead. + uint64_t OpdStart = Out<ELF64BE>::Opd->getVA(); + uint64_t OpdEnd = OpdStart + Out<ELF64BE>::Opd->getSize(); + bool InOpd = OpdStart <= SA && SA < OpdEnd; + + if (InOpd) + SA = read64be(&Out<ELF64BE>::OpdBuf[SA - OpdStart]); + } + + uint32_t Mask = 0x03FFFFFC; + checkInt<24>(SA - P, Type); + write32be(Loc, (read32be(Loc) & ~Mask) | ((SA - P) & Mask)); + + uint32_t Nop = 0x60000000; + if (InPlt && Loc + 8 <= BufEnd && read32be(Loc + 4) == Nop) + write32be(Loc + 4, 0xe8410028); // ld %r2, 40(%r1) + break; + } + case R_PPC64_REL32: + checkInt<32>(SA - P, Type); + write32be(Loc, SA - P); + break; + case R_PPC64_REL64: + write64be(Loc, SA - P); + break; + case R_PPC64_TOC: + write64be(Loc, SA); + break; + default: + error("unrecognized reloc " + Twine(Type)); + } +} + +AArch64TargetInfo::AArch64TargetInfo() { + CopyReloc = R_AARCH64_COPY; + GotReloc = R_AARCH64_GLOB_DAT; + PltReloc = R_AARCH64_JUMP_SLOT; + LazyRelocations = true; + PltEntrySize = 16; + PltZeroEntrySize = 32; +} + +unsigned AArch64TargetInfo::getDynReloc(unsigned Type) const { + if (Type == R_AARCH64_ABS32 || Type == R_AARCH64_ABS64) + return Type; + StringRef S = getELFRelocationTypeName(EM_AARCH64, Type); + error("Relocation " + S + " cannot be used when making a shared object; " + "recompile with -fPIC."); +} + +unsigned AArch64TargetInfo::getPltRefReloc(unsigned Type) const { return Type; } + +void AArch64TargetInfo::writeGotPltEntry(uint8_t *Buf, uint64_t Plt) const { + write64le(Buf, Out<ELF64LE>::Plt->getVA()); +} + +void AArch64TargetInfo::writePltZeroEntry(uint8_t *Buf, uint64_t GotEntryAddr, + uint64_t PltEntryAddr) const { + const uint8_t PltData[] = { + 0xf0, 0x7b, 0xbf, 0xa9, // stp x16, x30, [sp,#-16]! + 0x10, 0x00, 0x00, 0x90, // adrp x16, Page(&(.plt.got[2])) + 0x11, 0x02, 0x40, 0xf9, // ldr x17, [x16, Offset(&(.plt.got[2]))] + 0x10, 0x02, 0x00, 0x91, // add x16, x16, Offset(&(.plt.got[2])) + 0x20, 0x02, 0x1f, 0xd6, // br x17 + 0x1f, 0x20, 0x03, 0xd5, // nop + 0x1f, 0x20, 0x03, 0xd5, // nop + 0x1f, 0x20, 0x03, 0xd5 // nop + }; + memcpy(Buf, PltData, sizeof(PltData)); + + relocateOne(Buf + 4, Buf + 8, R_AARCH64_ADR_PREL_PG_HI21, PltEntryAddr + 4, + GotEntryAddr + 16); + relocateOne(Buf + 8, Buf + 12, R_AARCH64_LDST64_ABS_LO12_NC, PltEntryAddr + 8, + GotEntryAddr + 16); + relocateOne(Buf + 12, Buf + 16, R_AARCH64_ADD_ABS_LO12_NC, PltEntryAddr + 12, + GotEntryAddr + 16); +} + +void AArch64TargetInfo::writePltEntry(uint8_t *Buf, uint64_t GotAddr, + uint64_t GotEntryAddr, + uint64_t PltEntryAddr, int32_t Index, + unsigned RelOff) const { + const uint8_t Inst[] = { + 0x10, 0x00, 0x00, 0x90, // adrp x16, Page(&(.plt.got[n])) + 0x11, 0x02, 0x40, 0xf9, // ldr x17, [x16, Offset(&(.plt.got[n]))] + 0x10, 0x02, 0x00, 0x91, // add x16, x16, Offset(&(.plt.got[n])) + 0x20, 0x02, 0x1f, 0xd6 // br x17 + }; + memcpy(Buf, Inst, sizeof(Inst)); + + relocateOne(Buf, Buf + 4, R_AARCH64_ADR_PREL_PG_HI21, PltEntryAddr, + GotEntryAddr); + relocateOne(Buf + 4, Buf + 8, R_AARCH64_LDST64_ABS_LO12_NC, PltEntryAddr + 4, + GotEntryAddr); + relocateOne(Buf + 8, Buf + 12, R_AARCH64_ADD_ABS_LO12_NC, PltEntryAddr + 8, + GotEntryAddr); +} + +bool AArch64TargetInfo::needsCopyRel(uint32_t Type, const SymbolBody &S) const { + if (Config->Shared) + return false; + switch (Type) { + default: + return false; + case R_AARCH64_ABS16: + case R_AARCH64_ABS32: + case R_AARCH64_ABS64: + case R_AARCH64_ADD_ABS_LO12_NC: + case R_AARCH64_ADR_PREL_LO21: + case R_AARCH64_ADR_PREL_PG_HI21: + case R_AARCH64_LDST8_ABS_LO12_NC: + case R_AARCH64_LDST32_ABS_LO12_NC: + case R_AARCH64_LDST64_ABS_LO12_NC: + if (auto *SS = dyn_cast<SharedSymbol<ELF64LE>>(&S)) + return SS->Sym.getType() == STT_OBJECT; + return false; + } +} + +bool AArch64TargetInfo::relocNeedsGot(uint32_t Type, + const SymbolBody &S) const { + return Type == R_AARCH64_ADR_GOT_PAGE || Type == R_AARCH64_LD64_GOT_LO12_NC || + relocNeedsPlt(Type, S); +} + +bool AArch64TargetInfo::relocNeedsPlt(uint32_t Type, + const SymbolBody &S) const { + switch (Type) { + default: + return false; + case R_AARCH64_CALL26: + case R_AARCH64_JUMP26: + return canBePreempted(&S, true); + } +} + +static void updateAArch64Adr(uint8_t *L, uint64_t Imm) { + uint32_t ImmLo = (Imm & 0x3) << 29; + uint32_t ImmHi = ((Imm & 0x1FFFFC) >> 2) << 5; + uint64_t Mask = (0x3 << 29) | (0x7FFFF << 5); + write32le(L, (read32le(L) & ~Mask) | ImmLo | ImmHi); +} + +// Page(Expr) is the page address of the expression Expr, defined +// as (Expr & ~0xFFF). (This applies even if the machine page size +// supported by the platform has a different value.) +static uint64_t getAArch64Page(uint64_t Expr) { + return Expr & (~static_cast<uint64_t>(0xFFF)); +} + +void AArch64TargetInfo::relocateOne(uint8_t *Loc, uint8_t *BufEnd, + uint32_t Type, uint64_t P, uint64_t SA, + uint64_t ZA, uint8_t *PairedLoc) const { + switch (Type) { + case R_AARCH64_ABS16: + checkIntUInt<16>(SA, Type); + write16le(Loc, SA); + break; + case R_AARCH64_ABS32: + checkIntUInt<32>(SA, Type); + write32le(Loc, SA); + break; + case R_AARCH64_ABS64: + write64le(Loc, SA); + break; + case R_AARCH64_ADD_ABS_LO12_NC: + // This relocation stores 12 bits and there's no instruction + // to do it. Instead, we do a 32 bits store of the value + // of r_addend bitwise-or'ed Loc. This assumes that the addend + // bits in Loc are zero. + or32le(Loc, (SA & 0xFFF) << 10); + break; + case R_AARCH64_ADR_GOT_PAGE: { + uint64_t X = getAArch64Page(SA) - getAArch64Page(P); + checkInt<33>(X, Type); + updateAArch64Adr(Loc, (X >> 12) & 0x1FFFFF); // X[32:12] + break; + } + case R_AARCH64_ADR_PREL_LO21: { + uint64_t X = SA - P; + checkInt<21>(X, Type); + updateAArch64Adr(Loc, X & 0x1FFFFF); + break; + } + case R_AARCH64_ADR_PREL_PG_HI21: { + uint64_t X = getAArch64Page(SA) - getAArch64Page(P); + checkInt<33>(X, Type); + updateAArch64Adr(Loc, (X >> 12) & 0x1FFFFF); // X[32:12] + break; + } + case R_AARCH64_CALL26: + case R_AARCH64_JUMP26: { + uint64_t X = SA - P; + checkInt<28>(X, Type); + or32le(Loc, (X & 0x0FFFFFFC) >> 2); + break; + } + case R_AARCH64_LD64_GOT_LO12_NC: + checkAlignment<8>(SA, Type); + or32le(Loc, (SA & 0xFF8) << 7); + break; + case R_AARCH64_LDST8_ABS_LO12_NC: + or32le(Loc, (SA & 0xFFF) << 10); + break; + case R_AARCH64_LDST32_ABS_LO12_NC: + or32le(Loc, (SA & 0xFFC) << 8); + break; + case R_AARCH64_LDST64_ABS_LO12_NC: + or32le(Loc, (SA & 0xFF8) << 7); + break; + case R_AARCH64_PREL16: + checkIntUInt<16>(SA - P, Type); + write16le(Loc, SA - P); + break; + case R_AARCH64_PREL32: + checkIntUInt<32>(SA - P, Type); + write32le(Loc, SA - P); + break; + case R_AARCH64_PREL64: + write64le(Loc, SA - P); + break; + default: + error("unrecognized reloc " + Twine(Type)); + } +} + +template <class ELFT> MipsTargetInfo<ELFT>::MipsTargetInfo() { + PageSize = 65536; + GotHeaderEntriesNum = 2; +} + +template <class ELFT> +void MipsTargetInfo<ELFT>::writeGotHeaderEntries(uint8_t *Buf) const { + typedef typename ELFFile<ELFT>::Elf_Off Elf_Off; + auto *P = reinterpret_cast<Elf_Off *>(Buf); + // Module pointer + P[1] = ELFT::Is64Bits ? 0x8000000000000000 : 0x80000000; +} + +template <class ELFT> +void MipsTargetInfo<ELFT>::writeGotPltEntry(uint8_t *Buf, uint64_t Plt) const {} +template <class ELFT> +void MipsTargetInfo<ELFT>::writePltZeroEntry(uint8_t *Buf, uint64_t GotEntryAddr, + uint64_t PltEntryAddr) const {} +template <class ELFT> +void MipsTargetInfo<ELFT>::writePltEntry(uint8_t *Buf, uint64_t GotAddr, + uint64_t GotEntryAddr, + uint64_t PltEntryAddr, int32_t Index, + unsigned RelOff) const {} + +template <class ELFT> +bool MipsTargetInfo<ELFT>::relocNeedsGot(uint32_t Type, + const SymbolBody &S) const { + return Type == R_MIPS_GOT16 || Type == R_MIPS_CALL16; +} + +template <class ELFT> +bool MipsTargetInfo<ELFT>::relocNeedsPlt(uint32_t Type, + const SymbolBody &S) const { + return false; +} + +static uint16_t mipsHigh(uint64_t V) { return (V + 0x8000) >> 16; } + +template <endianness E, uint8_t BSIZE> +static void applyMipsPcReloc(uint8_t *Loc, uint32_t Type, uint64_t P, + uint64_t SA) { + uint32_t Mask = ~(0xffffffff << BSIZE); + uint32_t Instr = read32<E>(Loc); + int64_t A = SignExtend64<BSIZE + 2>((Instr & Mask) << 2); + checkAlignment<4>(SA + A, Type); + int64_t V = SA + A - P; + checkInt<BSIZE + 2>(V, Type); + write32<E>(Loc, (Instr & ~Mask) | ((V >> 2) & Mask)); +} + +template <class ELFT> +void MipsTargetInfo<ELFT>::relocateOne(uint8_t *Loc, uint8_t *BufEnd, + uint32_t Type, uint64_t P, uint64_t SA, + uint64_t ZA, uint8_t *PairedLoc) const { + const endianness E = ELFT::TargetEndianness; + switch (Type) { + case R_MIPS_32: + add32<E>(Loc, SA); + break; + case R_MIPS_CALL16: + case R_MIPS_GOT16: { + int64_t V = SA - getMipsGpAddr<ELFT>(); + if (Type == R_MIPS_GOT16) + checkInt<16>(V, Type); + write32<E>(Loc, (read32<E>(Loc) & 0xffff0000) | (V & 0xffff)); + break; + } + case R_MIPS_GPREL16: { + uint32_t Instr = read32<E>(Loc); + int64_t V = SA + SignExtend64<16>(Instr & 0xffff) - getMipsGpAddr<ELFT>(); + checkInt<16>(V, Type); + write32<E>(Loc, (Instr & 0xffff0000) | (V & 0xffff)); + break; + } + case R_MIPS_GPREL32: + write32<E>(Loc, SA + int32_t(read32<E>(Loc)) - getMipsGpAddr<ELFT>()); + break; + case R_MIPS_HI16: { + uint32_t Instr = read32<E>(Loc); + if (PairedLoc) { + uint64_t AHL = ((Instr & 0xffff) << 16) + + SignExtend64<16>(read32<E>(PairedLoc) & 0xffff); + write32<E>(Loc, (Instr & 0xffff0000) | mipsHigh(SA + AHL)); + } else { + warning("Can't find matching R_MIPS_LO16 relocation for R_MIPS_HI16"); + write32<E>(Loc, (Instr & 0xffff0000) | mipsHigh(SA)); + } + break; + } + case R_MIPS_JALR: + // Ignore this optimization relocation for now + break; + case R_MIPS_LO16: { + uint32_t Instr = read32<E>(Loc); + int64_t AHL = SignExtend64<16>(Instr & 0xffff); + write32<E>(Loc, (Instr & 0xffff0000) | ((SA + AHL) & 0xffff)); + break; + } + case R_MIPS_PC16: + applyMipsPcReloc<E, 16>(Loc, Type, P, SA); + break; + case R_MIPS_PC19_S2: + applyMipsPcReloc<E, 19>(Loc, Type, P, SA); + break; + case R_MIPS_PC21_S2: + applyMipsPcReloc<E, 21>(Loc, Type, P, SA); + break; + case R_MIPS_PC26_S2: + applyMipsPcReloc<E, 26>(Loc, Type, P, SA); + break; + case R_MIPS_PCHI16: { + uint32_t Instr = read32<E>(Loc); + if (PairedLoc) { + uint64_t AHL = ((Instr & 0xffff) << 16) + + SignExtend64<16>(read32<E>(PairedLoc) & 0xffff); + write32<E>(Loc, (Instr & 0xffff0000) | mipsHigh(SA + AHL - P)); + } else { + warning("Can't find matching R_MIPS_PCLO16 relocation for R_MIPS_PCHI16"); + write32<E>(Loc, (Instr & 0xffff0000) | mipsHigh(SA - P)); + } + break; + } + case R_MIPS_PCLO16: { + uint32_t Instr = read32<E>(Loc); + int64_t AHL = SignExtend64<16>(Instr & 0xffff); + write32<E>(Loc, (Instr & 0xffff0000) | ((SA + AHL - P) & 0xffff)); + break; + } + default: + error("unrecognized reloc " + Twine(Type)); + } +} + +template <class ELFT> +bool MipsTargetInfo<ELFT>::isRelRelative(uint32_t Type) const { + switch (Type) { + default: + return false; + case R_MIPS_PC16: + case R_MIPS_PC19_S2: + case R_MIPS_PC21_S2: + case R_MIPS_PC26_S2: + case R_MIPS_PCHI16: + case R_MIPS_PCLO16: + return true; + } +} + +// _gp is a MIPS-specific ABI-defined symbol which points to +// a location that is relative to GOT. This function returns +// the value for the symbol. +template <class ELFT> typename ELFFile<ELFT>::uintX_t getMipsGpAddr() { + unsigned GPOffset = 0x7ff0; + if (uint64_t V = Out<ELFT>::Got->getVA()) + return V + GPOffset; + return 0; +} + +template uint32_t getMipsGpAddr<ELF32LE>(); +template uint32_t getMipsGpAddr<ELF32BE>(); +template uint64_t getMipsGpAddr<ELF64LE>(); +template uint64_t getMipsGpAddr<ELF64BE>(); +} +} diff --git a/ELF/Target.h b/ELF/Target.h new file mode 100644 index 000000000000..52c2697dc60d --- /dev/null +++ b/ELF/Target.h @@ -0,0 +1,117 @@ +//===- Target.h -------------------------------------------------*- C++ -*-===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_ELF_TARGET_H +#define LLD_ELF_TARGET_H + +#include "llvm/ADT/StringRef.h" +#include "llvm/Object/ELF.h" + +#include <memory> + +namespace lld { +namespace elf2 { +class SymbolBody; + +class TargetInfo { +public: + unsigned getPageSize() const { return PageSize; } + uint64_t getVAStart() const; + unsigned getCopyReloc() const { return CopyReloc; } + unsigned getGotReloc() const { return GotReloc; } + unsigned getPltReloc() const { return PltReloc; } + unsigned getRelativeReloc() const { return RelativeReloc; } + unsigned getIRelativeReloc() const { return IRelativeReloc; } + bool isTlsLocalDynamicReloc(unsigned Type) const { + return Type == TlsLocalDynamicReloc; + } + bool isTlsGlobalDynamicReloc(unsigned Type) const { + return Type == TlsGlobalDynamicReloc; + } + unsigned getTlsModuleIndexReloc() const { return TlsModuleIndexReloc; } + unsigned getTlsOffsetReloc() const { return TlsOffsetReloc; } + unsigned getPltZeroEntrySize() const { return PltZeroEntrySize; } + unsigned getPltEntrySize() const { return PltEntrySize; } + bool supportsLazyRelocations() const { return LazyRelocations; } + unsigned getGotHeaderEntriesNum() const { return GotHeaderEntriesNum; } + unsigned getGotPltHeaderEntriesNum() const { return GotPltHeaderEntriesNum; } + virtual unsigned getDynReloc(unsigned Type) const { return Type; } + virtual bool isTlsDynReloc(unsigned Type, const SymbolBody &S) const { + return false; + } + virtual unsigned getPltRefReloc(unsigned Type) const; + virtual unsigned getTlsGotReloc(unsigned Type = -1) const { + return TlsGotReloc; + } + virtual void writeGotHeaderEntries(uint8_t *Buf) const; + virtual void writeGotPltHeaderEntries(uint8_t *Buf) const; + virtual void writeGotPltEntry(uint8_t *Buf, uint64_t Plt) const = 0; + virtual void writePltZeroEntry(uint8_t *Buf, uint64_t GotEntryAddr, + uint64_t PltEntryAddr) const = 0; + virtual void writePltEntry(uint8_t *Buf, uint64_t GotAddr, + uint64_t GotEntryAddr, uint64_t PltEntryAddr, + int32_t Index, unsigned RelOff) const = 0; + virtual bool isRelRelative(uint32_t Type) const; + virtual bool isSizeDynReloc(uint32_t Type, const SymbolBody &S) const; + virtual bool relocNeedsDynRelative(unsigned Type) const { return false; } + virtual bool relocNeedsGot(uint32_t Type, const SymbolBody &S) const = 0; + virtual bool relocNeedsPlt(uint32_t Type, const SymbolBody &S) const = 0; + virtual void relocateOne(uint8_t *Loc, uint8_t *BufEnd, uint32_t Type, + uint64_t P, uint64_t SA, uint64_t ZA = 0, + uint8_t *PairedLoc = nullptr) const = 0; + virtual bool isGotRelative(uint32_t Type) const; + virtual bool isTlsOptimized(unsigned Type, const SymbolBody *S) const; + virtual bool needsCopyRel(uint32_t Type, const SymbolBody &S) const; + virtual unsigned relocateTlsOptimize(uint8_t *Loc, uint8_t *BufEnd, + uint32_t Type, uint64_t P, uint64_t SA, + const SymbolBody &S) const; + virtual ~TargetInfo(); + +protected: + unsigned PageSize = 4096; + + // On freebsd x86_64 the first page cannot be mmaped. + // On linux that is controled by vm.mmap_min_addr. At least on some x86_64 + // installs that is 65536, so the first 15 pages cannot be used. + // Given that, the smallest value that can be used in here is 0x10000. + // If using 2MB pages, the smallest page aligned address that works is + // 0x200000, but it looks like every OS uses 4k pages for executables. + uint64_t VAStart = 0x10000; + + unsigned CopyReloc; + unsigned PCRelReloc; + unsigned GotReloc; + unsigned PltReloc; + unsigned RelativeReloc; + unsigned IRelativeReloc; + unsigned TlsGotReloc = 0; + unsigned TlsLocalDynamicReloc = 0; + unsigned TlsGlobalDynamicReloc = 0; + unsigned TlsModuleIndexReloc; + unsigned TlsOffsetReloc; + unsigned PltEntrySize = 8; + unsigned PltZeroEntrySize = 0; + unsigned GotHeaderEntriesNum = 0; + unsigned GotPltHeaderEntriesNum = 3; + bool LazyRelocations = false; +}; + +uint64_t getPPC64TocBase(); + +template <class ELFT> +typename llvm::object::ELFFile<ELFT>::uintX_t getMipsGpAddr(); + +template <class ELFT> bool isGnuIFunc(const SymbolBody &S); + +extern std::unique_ptr<TargetInfo> Target; +TargetInfo *createTarget(); +} +} + +#endif diff --git a/ELF/Writer.cpp b/ELF/Writer.cpp new file mode 100644 index 000000000000..bdcb720a8a48 --- /dev/null +++ b/ELF/Writer.cpp @@ -0,0 +1,1282 @@ +//===- Writer.cpp ---------------------------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "Writer.h" +#include "Config.h" +#include "OutputSections.h" +#include "SymbolTable.h" +#include "Target.h" + +#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/Support/FileOutputBuffer.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/StringSaver.h" + +using namespace llvm; +using namespace llvm::ELF; +using namespace llvm::object; + +using namespace lld; +using namespace lld::elf2; + +namespace { +// The writer writes a SymbolTable result to a file. +template <class ELFT> class Writer { +public: + typedef typename ELFFile<ELFT>::uintX_t uintX_t; + typedef typename ELFFile<ELFT>::Elf_Shdr Elf_Shdr; + typedef typename ELFFile<ELFT>::Elf_Ehdr Elf_Ehdr; + typedef typename ELFFile<ELFT>::Elf_Phdr Elf_Phdr; + typedef typename ELFFile<ELFT>::Elf_Sym Elf_Sym; + typedef typename ELFFile<ELFT>::Elf_Sym_Range Elf_Sym_Range; + typedef typename ELFFile<ELFT>::Elf_Rela Elf_Rela; + Writer(SymbolTable<ELFT> &S) : Symtab(S) {} + void run(); + +private: + void copyLocalSymbols(); + void addReservedSymbols(); + void createSections(); + void addPredefinedSections(); + + template <bool isRela> + void scanRelocs(InputSectionBase<ELFT> &C, + iterator_range<const Elf_Rel_Impl<ELFT, isRela> *> Rels); + + void scanRelocs(InputSection<ELFT> &C); + void scanRelocs(InputSectionBase<ELFT> &S, const Elf_Shdr &RelSec); + void updateRelro(Elf_Phdr *Cur, Elf_Phdr *GnuRelroPhdr, uintX_t VA); + void assignAddresses(); + void buildSectionMap(); + void fixAbsoluteSymbols(); + void openFile(StringRef OutputPath); + void writeHeader(); + void writeSections(); + bool isDiscarded(InputSectionBase<ELFT> *IS) const; + StringRef getOutputSectionName(StringRef S) const; + bool needsInterpSection() const { + return !Symtab.getSharedFiles().empty() && !Config->DynamicLinker.empty(); + } + bool isOutputDynamic() const { + return !Symtab.getSharedFiles().empty() || Config->Shared; + } + int getPhdrsNum() const; + + OutputSection<ELFT> *getBSS(); + void addCommonSymbols(std::vector<DefinedCommon *> &Syms); + void addCopyRelSymbols(std::vector<SharedSymbol<ELFT> *> &Syms); + + std::unique_ptr<llvm::FileOutputBuffer> Buffer; + + BumpPtrAllocator Alloc; + std::vector<OutputSectionBase<ELFT> *> OutputSections; + std::vector<std::unique_ptr<OutputSectionBase<ELFT>>> OwningSections; + unsigned getNumSections() const { return OutputSections.size() + 1; } + + void addRelIpltSymbols(); + void addStartEndSymbols(); + void addStartStopSymbols(OutputSectionBase<ELFT> *Sec); + void setPhdr(Elf_Phdr *PH, uint32_t Type, uint32_t Flags, uintX_t FileOff, + uintX_t VA, uintX_t Size, uintX_t Align); + void copyPhdr(Elf_Phdr *PH, OutputSectionBase<ELFT> *From); + + bool HasRelro = false; + SymbolTable<ELFT> &Symtab; + std::vector<Elf_Phdr> Phdrs; + + uintX_t FileSize; + uintX_t SectionHeaderOff; + + llvm::StringMap<llvm::StringRef> InputToOutputSection; +}; +} // anonymous namespace + +template <class ELFT> static bool shouldUseRela() { + ELFKind K = cast<ELFFileBase<ELFT>>(Config->FirstElf)->getELFKind(); + return K == ELF64LEKind || K == ELF64BEKind; +} + +template <class ELFT> void lld::elf2::writeResult(SymbolTable<ELFT> *Symtab) { + // Initialize output sections that are handled by Writer specially. + // Don't reorder because the order of initialization matters. + InterpSection<ELFT> Interp; + Out<ELFT>::Interp = &Interp; + StringTableSection<ELFT> ShStrTab(".shstrtab", false); + Out<ELFT>::ShStrTab = &ShStrTab; + StringTableSection<ELFT> StrTab(".strtab", false); + if (!Config->StripAll) + Out<ELFT>::StrTab = &StrTab; + StringTableSection<ELFT> DynStrTab(".dynstr", true); + Out<ELFT>::DynStrTab = &DynStrTab; + GotSection<ELFT> Got; + Out<ELFT>::Got = &Got; + GotPltSection<ELFT> GotPlt; + if (Target->supportsLazyRelocations()) + Out<ELFT>::GotPlt = &GotPlt; + PltSection<ELFT> Plt; + Out<ELFT>::Plt = &Plt; + std::unique_ptr<SymbolTableSection<ELFT>> SymTab; + if (!Config->StripAll) { + SymTab.reset(new SymbolTableSection<ELFT>(*Symtab, *Out<ELFT>::StrTab)); + Out<ELFT>::SymTab = SymTab.get(); + } + SymbolTableSection<ELFT> DynSymTab(*Symtab, *Out<ELFT>::DynStrTab); + Out<ELFT>::DynSymTab = &DynSymTab; + HashTableSection<ELFT> HashTab; + if (Config->SysvHash) + Out<ELFT>::HashTab = &HashTab; + GnuHashTableSection<ELFT> GnuHashTab; + if (Config->GnuHash) + Out<ELFT>::GnuHashTab = &GnuHashTab; + bool IsRela = shouldUseRela<ELFT>(); + RelocationSection<ELFT> RelaDyn(IsRela ? ".rela.dyn" : ".rel.dyn", IsRela); + Out<ELFT>::RelaDyn = &RelaDyn; + RelocationSection<ELFT> RelaPlt(IsRela ? ".rela.plt" : ".rel.plt", IsRela); + if (Target->supportsLazyRelocations()) + Out<ELFT>::RelaPlt = &RelaPlt; + DynamicSection<ELFT> Dynamic(*Symtab); + Out<ELFT>::Dynamic = &Dynamic; + + Writer<ELFT>(*Symtab).run(); +} + +// The main function of the writer. +template <class ELFT> void Writer<ELFT>::run() { + buildSectionMap(); + if (!Config->DiscardAll) + copyLocalSymbols(); + addReservedSymbols(); + createSections(); + assignAddresses(); + fixAbsoluteSymbols(); + openFile(Config->OutputFile); + writeHeader(); + writeSections(); + error(Buffer->commit()); +} + +namespace { +template <bool Is64Bits> struct SectionKey { + typedef typename std::conditional<Is64Bits, uint64_t, uint32_t>::type uintX_t; + StringRef Name; + uint32_t Type; + uintX_t Flags; + uintX_t EntSize; +}; +} +namespace llvm { +template <bool Is64Bits> struct DenseMapInfo<SectionKey<Is64Bits>> { + static SectionKey<Is64Bits> getEmptyKey() { + return SectionKey<Is64Bits>{DenseMapInfo<StringRef>::getEmptyKey(), 0, 0, + 0}; + } + static SectionKey<Is64Bits> getTombstoneKey() { + return SectionKey<Is64Bits>{DenseMapInfo<StringRef>::getTombstoneKey(), 0, + 0, 0}; + } + static unsigned getHashValue(const SectionKey<Is64Bits> &Val) { + return hash_combine(Val.Name, Val.Type, Val.Flags, Val.EntSize); + } + static bool isEqual(const SectionKey<Is64Bits> &LHS, + const SectionKey<Is64Bits> &RHS) { + return DenseMapInfo<StringRef>::isEqual(LHS.Name, RHS.Name) && + LHS.Type == RHS.Type && LHS.Flags == RHS.Flags && + LHS.EntSize == RHS.EntSize; + } +}; +} + +// The reason we have to do this early scan is as follows +// * To mmap the output file, we need to know the size +// * For that, we need to know how many dynamic relocs we will have. +// It might be possible to avoid this by outputting the file with write: +// * Write the allocated output sections, computing addresses. +// * Apply relocations, recording which ones require a dynamic reloc. +// * Write the dynamic relocations. +// * Write the rest of the file. +template <class ELFT> +template <bool isRela> +void Writer<ELFT>::scanRelocs( + InputSectionBase<ELFT> &C, + iterator_range<const Elf_Rel_Impl<ELFT, isRela> *> Rels) { + typedef Elf_Rel_Impl<ELFT, isRela> RelType; + const ObjectFile<ELFT> &File = *C.getFile(); + for (const RelType &RI : Rels) { + uint32_t SymIndex = RI.getSymbol(Config->Mips64EL); + SymbolBody *Body = File.getSymbolBody(SymIndex); + uint32_t Type = RI.getType(Config->Mips64EL); + + if (Target->isGotRelative(Type)) + HasGotOffRel = true; + + if (Target->isTlsLocalDynamicReloc(Type)) { + if (Target->isTlsOptimized(Type, nullptr)) + continue; + if (Out<ELFT>::Got->addCurrentModuleTlsIndex()) + Out<ELFT>::RelaDyn->addReloc({&C, &RI}); + continue; + } + + // Set "used" bit for --as-needed. + if (Body && Body->isUndefined() && !Body->isWeak()) + if (auto *S = dyn_cast<SharedSymbol<ELFT>>(Body->repl())) + S->File->IsUsed = true; + + if (Body) + Body = Body->repl(); + + if (Body && Body->isTls() && Target->isTlsGlobalDynamicReloc(Type)) { + bool Opt = Target->isTlsOptimized(Type, Body); + if (!Opt && Out<ELFT>::Got->addDynTlsEntry(Body)) { + Out<ELFT>::RelaDyn->addReloc({&C, &RI}); + Out<ELFT>::RelaDyn->addReloc({nullptr, nullptr}); + Body->setUsedInDynamicReloc(); + continue; + } + if (!canBePreempted(Body, true)) + continue; + } + + if (Body && Body->isTls() && !Target->isTlsDynReloc(Type, *Body)) + continue; + + if (Target->relocNeedsDynRelative(Type)) { + RelType *Rel = new (Alloc) RelType; + Rel->setSymbolAndType(0, Target->getRelativeReloc(), Config->Mips64EL); + Rel->r_offset = RI.r_offset; + Out<ELFT>::RelaDyn->addReloc({&C, Rel}); + } + + bool NeedsGot = false; + bool NeedsPlt = false; + if (Body) { + if (auto *E = dyn_cast<SharedSymbol<ELFT>>(Body)) { + if (E->NeedsCopy) + continue; + if (Target->needsCopyRel(Type, *Body)) + E->NeedsCopy = true; + } + NeedsPlt = Target->relocNeedsPlt(Type, *Body); + if (NeedsPlt) { + if (Body->isInPlt()) + continue; + Out<ELFT>::Plt->addEntry(Body); + } + NeedsGot = Target->relocNeedsGot(Type, *Body); + if (NeedsGot) { + if (NeedsPlt && Target->supportsLazyRelocations()) { + Out<ELFT>::GotPlt->addEntry(Body); + } else { + if (Body->isInGot()) + continue; + Out<ELFT>::Got->addEntry(Body); + } + } + } + + // An STT_GNU_IFUNC symbol always uses a PLT entry, and all references + // to the symbol go through the PLT. This is true even for a local + // symbol, although local symbols normally do not require PLT entries. + if (Body && isGnuIFunc<ELFT>(*Body)) { + Body->setUsedInDynamicReloc(); + Out<ELFT>::RelaPlt->addReloc({&C, &RI}); + continue; + } + + if (Config->EMachine == EM_MIPS && NeedsGot) { + // MIPS ABI has special rules to process GOT entries + // and doesn't require relocation entries for them. + // See "Global Offset Table" in Chapter 5 in the following document + // for detailed description: + // ftp://www.linux-mips.org/pub/linux/mips/doc/ABI/mipsabi.pdf + Body->setUsedInDynamicReloc(); + continue; + } + bool CBP = canBePreempted(Body, NeedsGot); + if (!CBP && (!Config->Shared || Target->isRelRelative(Type))) + continue; + if (CBP) + Body->setUsedInDynamicReloc(); + if (NeedsPlt && Target->supportsLazyRelocations()) + Out<ELFT>::RelaPlt->addReloc({&C, &RI}); + else + Out<ELFT>::RelaDyn->addReloc({&C, &RI}); + } +} + +template <class ELFT> void Writer<ELFT>::scanRelocs(InputSection<ELFT> &C) { + if (!(C.getSectionHdr()->sh_flags & SHF_ALLOC)) + return; + + for (const Elf_Shdr *RelSec : C.RelocSections) + scanRelocs(C, *RelSec); +} + +template <class ELFT> +void Writer<ELFT>::scanRelocs(InputSectionBase<ELFT> &S, + const Elf_Shdr &RelSec) { + ELFFile<ELFT> &EObj = S.getFile()->getObj(); + if (RelSec.sh_type == SHT_RELA) + scanRelocs(S, EObj.relas(&RelSec)); + else + scanRelocs(S, EObj.rels(&RelSec)); +} + +template <class ELFT> +static void reportUndefined(const SymbolTable<ELFT> &S, const SymbolBody &Sym) { + if (Config->Shared && !Config->NoUndefined) + return; + + ELFFileBase<ELFT> *SymFile = findFile<ELFT>(S.getObjectFiles(), &Sym); + std::string Message = "undefined symbol: " + Sym.getName().str(); + if (SymFile) + Message += " in " + SymFile->getName().str(); + if (Config->NoInhibitExec) + warning(Message); + else + error(Message); +} + +// Local symbols are not in the linker's symbol table. This function scans +// each object file's symbol table to copy local symbols to the output. +template <class ELFT> void Writer<ELFT>::copyLocalSymbols() { + for (const std::unique_ptr<ObjectFile<ELFT>> &F : Symtab.getObjectFiles()) { + for (const Elf_Sym &Sym : F->getLocalSymbols()) { + ErrorOr<StringRef> SymNameOrErr = Sym.getName(F->getStringTable()); + error(SymNameOrErr); + StringRef SymName = *SymNameOrErr; + if (!shouldKeepInSymtab<ELFT>(*F, SymName, Sym)) + continue; + if (Out<ELFT>::SymTab) + Out<ELFT>::SymTab->addLocalSymbol(SymName); + } + } +} + +// PPC64 has a number of special SHT_PROGBITS+SHF_ALLOC+SHF_WRITE sections that +// we would like to make sure appear is a specific order to maximize their +// coverage by a single signed 16-bit offset from the TOC base pointer. +// Conversely, the special .tocbss section should be first among all SHT_NOBITS +// sections. This will put it next to the loaded special PPC64 sections (and, +// thus, within reach of the TOC base pointer). +static int getPPC64SectionRank(StringRef SectionName) { + return StringSwitch<int>(SectionName) + .Case(".tocbss", 0) + .Case(".branch_lt", 2) + .Case(".toc", 3) + .Case(".toc1", 4) + .Case(".opd", 5) + .Default(1); +} + +template <class ELFT> static bool isRelroSection(OutputSectionBase<ELFT> *Sec) { + typename OutputSectionBase<ELFT>::uintX_t Flags = Sec->getFlags(); + if (!(Flags & SHF_ALLOC) || !(Flags & SHF_WRITE)) + return false; + if (Flags & SHF_TLS) + return true; + uint32_t Type = Sec->getType(); + if (Type == SHT_INIT_ARRAY || Type == SHT_FINI_ARRAY || + Type == SHT_PREINIT_ARRAY) + return true; + if (Sec == Out<ELFT>::GotPlt) + return Config->ZNow; + if (Sec == Out<ELFT>::Dynamic || Sec == Out<ELFT>::Got) + return true; + StringRef S = Sec->getName(); + return S == ".data.rel.ro" || S == ".ctors" || S == ".dtors" || S == ".jcr" || + S == ".eh_frame"; +} + +// Output section ordering is determined by this function. +template <class ELFT> +static bool compareOutputSections(OutputSectionBase<ELFT> *A, + OutputSectionBase<ELFT> *B) { + typedef typename ELFFile<ELFT>::uintX_t uintX_t; + + uintX_t AFlags = A->getFlags(); + uintX_t BFlags = B->getFlags(); + + // Allocatable sections go first to reduce the total PT_LOAD size and + // so debug info doesn't change addresses in actual code. + bool AIsAlloc = AFlags & SHF_ALLOC; + bool BIsAlloc = BFlags & SHF_ALLOC; + if (AIsAlloc != BIsAlloc) + return AIsAlloc; + + // We don't have any special requirements for the relative order of + // two non allocatable sections. + if (!AIsAlloc) + return false; + + // We want the read only sections first so that they go in the PT_LOAD + // covering the program headers at the start of the file. + bool AIsWritable = AFlags & SHF_WRITE; + bool BIsWritable = BFlags & SHF_WRITE; + if (AIsWritable != BIsWritable) + return BIsWritable; + + // For a corresponding reason, put non exec sections first (the program + // header PT_LOAD is not executable). + bool AIsExec = AFlags & SHF_EXECINSTR; + bool BIsExec = BFlags & SHF_EXECINSTR; + if (AIsExec != BIsExec) + return BIsExec; + + // If we got here we know that both A and B are in the same PT_LOAD. + + // The TLS initialization block needs to be a single contiguous block in a R/W + // PT_LOAD, so stick TLS sections directly before R/W sections. The TLS NOBITS + // sections are placed here as they don't take up virtual address space in the + // PT_LOAD. + bool AIsTls = AFlags & SHF_TLS; + bool BIsTls = BFlags & SHF_TLS; + if (AIsTls != BIsTls) + return AIsTls; + + // The next requirement we have is to put nobits sections last. The + // reason is that the only thing the dynamic linker will see about + // them is a p_memsz that is larger than p_filesz. Seeing that it + // zeros the end of the PT_LOAD, so that has to correspond to the + // nobits sections. + bool AIsNoBits = A->getType() == SHT_NOBITS; + bool BIsNoBits = B->getType() == SHT_NOBITS; + if (AIsNoBits != BIsNoBits) + return BIsNoBits; + + // We place RelRo section before plain r/w ones. + bool AIsRelRo = isRelroSection(A); + bool BIsRelRo = isRelroSection(B); + if (AIsRelRo != BIsRelRo) + return AIsRelRo; + + // Some architectures have additional ordering restrictions for sections + // within the same PT_LOAD. + if (Config->EMachine == EM_PPC64) + return getPPC64SectionRank(A->getName()) < + getPPC64SectionRank(B->getName()); + + return false; +} + +template <class ELFT> OutputSection<ELFT> *Writer<ELFT>::getBSS() { + if (!Out<ELFT>::Bss) { + Out<ELFT>::Bss = + new OutputSection<ELFT>(".bss", SHT_NOBITS, SHF_ALLOC | SHF_WRITE); + OwningSections.emplace_back(Out<ELFT>::Bss); + OutputSections.push_back(Out<ELFT>::Bss); + } + return Out<ELFT>::Bss; +} + +// Until this function is called, common symbols do not belong to any section. +// This function adds them to end of BSS section. +template <class ELFT> +void Writer<ELFT>::addCommonSymbols(std::vector<DefinedCommon *> &Syms) { + typedef typename ELFFile<ELFT>::uintX_t uintX_t; + + if (Syms.empty()) + return; + + // Sort the common symbols by alignment as an heuristic to pack them better. + std::stable_sort(Syms.begin(), Syms.end(), + [](const DefinedCommon *A, const DefinedCommon *B) { + return A->MaxAlignment > B->MaxAlignment; + }); + + uintX_t Off = getBSS()->getSize(); + for (DefinedCommon *C : Syms) { + uintX_t Align = C->MaxAlignment; + Off = RoundUpToAlignment(Off, Align); + C->OffsetInBSS = Off; + Off += C->Size; + } + + Out<ELFT>::Bss->setSize(Off); +} + +// Reserve space in .bss for copy relocations. +template <class ELFT> +void Writer<ELFT>::addCopyRelSymbols(std::vector<SharedSymbol<ELFT> *> &Syms) { + if (Syms.empty()) + return; + uintX_t Off = getBSS()->getSize(); + for (SharedSymbol<ELFT> *C : Syms) { + const Elf_Sym &Sym = C->Sym; + const Elf_Shdr *Sec = C->File->getSection(Sym); + uintX_t SecAlign = Sec->sh_addralign; + unsigned TrailingZeros = + std::min(countTrailingZeros(SecAlign), + countTrailingZeros((uintX_t)Sym.st_value)); + uintX_t Align = 1 << TrailingZeros; + Out<ELFT>::Bss->updateAlign(Align); + Off = RoundUpToAlignment(Off, Align); + C->OffsetInBSS = Off; + Off += Sym.st_size; + } + Out<ELFT>::Bss->setSize(Off); +} + +template <class ELFT> +StringRef Writer<ELFT>::getOutputSectionName(StringRef S) const { + auto It = InputToOutputSection.find(S); + if (It != std::end(InputToOutputSection)) + return It->second; + + if (S.startswith(".text.")) + return ".text"; + if (S.startswith(".rodata.")) + return ".rodata"; + if (S.startswith(".data.rel.ro")) + return ".data.rel.ro"; + if (S.startswith(".data.")) + return ".data"; + if (S.startswith(".bss.")) + return ".bss"; + return S; +} + +template <class ELFT> +void reportDiscarded(InputSectionBase<ELFT> *IS, + const std::unique_ptr<ObjectFile<ELFT>> &File) { + if (!Config->PrintGcSections || !IS || IS->isLive()) + return; + llvm::errs() << "removing unused section from '" << IS->getSectionName() + << "' in file '" << File->getName() << "'\n"; +} + +template <class ELFT> +bool Writer<ELFT>::isDiscarded(InputSectionBase<ELFT> *IS) const { + if (!IS || !IS->isLive() || IS == &InputSection<ELFT>::Discarded) + return true; + return InputToOutputSection.lookup(IS->getSectionName()) == "/DISCARD/"; +} + +template <class ELFT> +static bool compareSections(OutputSectionBase<ELFT> *A, + OutputSectionBase<ELFT> *B) { + auto ItA = Config->OutputSections.find(A->getName()); + auto ItEnd = std::end(Config->OutputSections); + if (ItA == ItEnd) + return compareOutputSections(A, B); + auto ItB = Config->OutputSections.find(B->getName()); + if (ItB == ItEnd) + return compareOutputSections(A, B); + + return std::distance(ItA, ItB) > 0; +} + +// The beginning and the ending of .rel[a].plt section are marked +// with __rel[a]_iplt_{start,end} symbols if it is a statically linked +// executable. The runtime needs these symbols in order to resolve +// all IRELATIVE relocs on startup. For dynamic executables, we don't +// need these symbols, since IRELATIVE relocs are resolved through GOT +// and PLT. For details, see http://www.airs.com/blog/archives/403. +template <class ELFT> +void Writer<ELFT>::addRelIpltSymbols() { + if (isOutputDynamic() || !Out<ELFT>::RelaPlt) + return; + bool IsRela = shouldUseRela<ELFT>(); + + StringRef S = IsRela ? "__rela_iplt_start" : "__rel_iplt_start"; + if (Symtab.find(S)) + Symtab.addAbsolute(S, ElfSym<ELFT>::RelaIpltStart); + + S = IsRela ? "__rela_iplt_end" : "__rel_iplt_end"; + if (Symtab.find(S)) + Symtab.addAbsolute(S, ElfSym<ELFT>::RelaIpltEnd); +} + +template <class ELFT> static bool includeInSymtab(const SymbolBody &B) { + if (!B.isUsedInRegularObj()) + return false; + + // Don't include synthetic symbols like __init_array_start in every output. + if (auto *U = dyn_cast<DefinedRegular<ELFT>>(&B)) + if (&U->Sym == &ElfSym<ELFT>::IgnoreUndef) + return false; + + return true; +} + +static bool includeInDynamicSymtab(const SymbolBody &B) { + uint8_t V = B.getVisibility(); + if (V != STV_DEFAULT && V != STV_PROTECTED) + return false; + if (Config->ExportDynamic || Config->Shared) + return true; + return B.isUsedInDynamicReloc(); +} + +// This class knows how to create an output section for a given +// input section. Output section type is determined by various +// factors, including input section's sh_flags, sh_type and +// linker scripts. +namespace { +template <class ELFT> class OutputSectionFactory { + typedef typename ELFFile<ELFT>::Elf_Shdr Elf_Shdr; + typedef typename ELFFile<ELFT>::uintX_t uintX_t; + +public: + std::pair<OutputSectionBase<ELFT> *, bool> create(InputSectionBase<ELFT> *C, + StringRef OutsecName); + + OutputSectionBase<ELFT> *lookup(StringRef Name, uint32_t Type, uintX_t Flags); + +private: + SectionKey<ELFT::Is64Bits> createKey(InputSectionBase<ELFT> *C, + StringRef OutsecName); + OutputSectionBase<ELFT> *createAux(InputSectionBase<ELFT> *C, + const SectionKey<ELFT::Is64Bits> &Key); + + SmallDenseMap<SectionKey<ELFT::Is64Bits>, OutputSectionBase<ELFT> *> Map; +}; +} + +template <class ELFT> +std::pair<OutputSectionBase<ELFT> *, bool> +OutputSectionFactory<ELFT>::create(InputSectionBase<ELFT> *C, + StringRef OutsecName) { + SectionKey<ELFT::Is64Bits> Key = createKey(C, OutsecName); + OutputSectionBase<ELFT> *&Sec = Map[Key]; + if (Sec) + return {Sec, false}; + Sec = createAux(C, Key); + return {Sec, true}; +} + +template <class ELFT> +OutputSectionBase<ELFT> * +OutputSectionFactory<ELFT>::createAux(InputSectionBase<ELFT> *C, + const SectionKey<ELFT::Is64Bits> &Key) { + switch (C->SectionKind) { + case InputSectionBase<ELFT>::Regular: + return new OutputSection<ELFT>(Key.Name, Key.Type, Key.Flags); + case InputSectionBase<ELFT>::EHFrame: + return new EHOutputSection<ELFT>(Key.Name, Key.Type, Key.Flags); + case InputSectionBase<ELFT>::Merge: + return new MergeOutputSection<ELFT>(Key.Name, Key.Type, Key.Flags); + case InputSectionBase<ELFT>::MipsReginfo: + return new MipsReginfoOutputSection<ELFT>(); + } + llvm_unreachable("Unknown output section type"); +} + +template <class ELFT> +OutputSectionBase<ELFT> *OutputSectionFactory<ELFT>::lookup(StringRef Name, + uint32_t Type, + uintX_t Flags) { + return Map.lookup({Name, Type, Flags, 0}); +} + +template <class ELFT> +SectionKey<ELFT::Is64Bits> +OutputSectionFactory<ELFT>::createKey(InputSectionBase<ELFT> *C, + StringRef OutsecName) { + const Elf_Shdr *H = C->getSectionHdr(); + uintX_t Flags = H->sh_flags & ~SHF_GROUP; + + // For SHF_MERGE we create different output sections for each sh_entsize. + // This makes each output section simple and keeps a single level + // mapping from input to output. + uintX_t EntSize = isa<MergeInputSection<ELFT>>(C) ? H->sh_entsize : 0; + + // GNU as can give .eh_frame secion type SHT_PROGBITS or SHT_X86_64_UNWIND + // depending on the construct. We want to canonicalize it so that + // there is only one .eh_frame in the end. + uint32_t Type = H->sh_type; + if (Type == SHT_PROGBITS && Config->EMachine == EM_X86_64 && + isa<EHInputSection<ELFT>>(C)) + Type = SHT_X86_64_UNWIND; + + return SectionKey<ELFT::Is64Bits>{OutsecName, Type, Flags, EntSize}; +} + +// The linker is expected to define some symbols depending on +// the linking result. This function defines such symbols. +template <class ELFT> void Writer<ELFT>::addReservedSymbols() { + // __tls_get_addr is defined by the dynamic linker for dynamic ELFs. For + // static linking the linker is required to optimize away any references to + // __tls_get_addr, so it's not defined anywhere. Create a hidden definition + // to avoid the undefined symbol error. + if (!isOutputDynamic()) + Symtab.addIgnored("__tls_get_addr"); + + // If the "_end" symbol is referenced, it is expected to point to the address + // right after the data segment. Usually, this symbol points to the end + // of .bss section or to the end of .data section if .bss section is absent. + // The order of the sections can be affected by linker script, + // so it is hard to predict which section will be the last one. + // So, if this symbol is referenced, we just add the placeholder here + // and update its value later. + if (Symtab.find("_end")) + Symtab.addAbsolute("_end", ElfSym<ELFT>::End); + + // If there is an undefined symbol "end", we should initialize it + // with the same value as "_end". In any other case it should stay intact, + // because it is an allowable name for a user symbol. + if (SymbolBody *B = Symtab.find("end")) + if (B->isUndefined()) + Symtab.addAbsolute("end", ElfSym<ELFT>::End); +} + +// Create output section objects and add them to OutputSections. +template <class ELFT> void Writer<ELFT>::createSections() { + // Add .interp first because some loaders want to see that section + // on the first page of the executable file when loaded into memory. + if (needsInterpSection()) + OutputSections.push_back(Out<ELFT>::Interp); + + // Create output sections for input object file sections. + std::vector<OutputSectionBase<ELFT> *> RegularSections; + OutputSectionFactory<ELFT> Factory; + for (const std::unique_ptr<ObjectFile<ELFT>> &F : Symtab.getObjectFiles()) { + for (InputSectionBase<ELFT> *C : F->getSections()) { + if (isDiscarded(C)) { + reportDiscarded(C, F); + continue; + } + OutputSectionBase<ELFT> *Sec; + bool IsNew; + std::tie(Sec, IsNew) = + Factory.create(C, getOutputSectionName(C->getSectionName())); + if (IsNew) { + OwningSections.emplace_back(Sec); + OutputSections.push_back(Sec); + RegularSections.push_back(Sec); + } + Sec->addSection(C); + } + } + + Out<ELFT>::Bss = static_cast<OutputSection<ELFT> *>( + Factory.lookup(".bss", SHT_NOBITS, SHF_ALLOC | SHF_WRITE)); + + // If we have a .opd section (used under PPC64 for function descriptors), + // store a pointer to it here so that we can use it later when processing + // relocations. + Out<ELFT>::Opd = Factory.lookup(".opd", SHT_PROGBITS, SHF_WRITE | SHF_ALLOC); + + Out<ELFT>::Dynamic->PreInitArraySec = Factory.lookup( + ".preinit_array", SHT_PREINIT_ARRAY, SHF_WRITE | SHF_ALLOC); + Out<ELFT>::Dynamic->InitArraySec = + Factory.lookup(".init_array", SHT_INIT_ARRAY, SHF_WRITE | SHF_ALLOC); + Out<ELFT>::Dynamic->FiniArraySec = + Factory.lookup(".fini_array", SHT_FINI_ARRAY, SHF_WRITE | SHF_ALLOC); + + // The linker needs to define SECNAME_start, SECNAME_end and SECNAME_stop + // symbols for sections, so that the runtime can get the start and end + // addresses of each section by section name. Add such symbols. + addStartEndSymbols(); + for (OutputSectionBase<ELFT> *Sec : RegularSections) + addStartStopSymbols(Sec); + + // Scan relocations. This must be done after every symbol is declared so that + // we can correctly decide if a dynamic relocation is needed. + for (const std::unique_ptr<ObjectFile<ELFT>> &F : Symtab.getObjectFiles()) { + for (InputSectionBase<ELFT> *C : F->getSections()) { + if (isDiscarded(C)) + continue; + if (auto *S = dyn_cast<InputSection<ELFT>>(C)) + scanRelocs(*S); + else if (auto *S = dyn_cast<EHInputSection<ELFT>>(C)) + if (S->RelocSection) + scanRelocs(*S, *S->RelocSection); + } + } + + // Define __rel[a]_iplt_{start,end} symbols if needed. + addRelIpltSymbols(); + + // Now that we have defined all possible symbols including linker- + // synthesized ones. Visit all symbols to give the finishing touches. + std::vector<DefinedCommon *> CommonSymbols; + std::vector<SharedSymbol<ELFT> *> CopyRelSymbols; + for (auto &P : Symtab.getSymbols()) { + SymbolBody *Body = P.second->Body; + if (auto *U = dyn_cast<Undefined>(Body)) + if (!U->isWeak() && !U->canKeepUndefined()) + reportUndefined<ELFT>(Symtab, *Body); + + if (auto *C = dyn_cast<DefinedCommon>(Body)) + CommonSymbols.push_back(C); + if (auto *SC = dyn_cast<SharedSymbol<ELFT>>(Body)) + if (SC->NeedsCopy) + CopyRelSymbols.push_back(SC); + + if (!includeInSymtab<ELFT>(*Body)) + continue; + if (Out<ELFT>::SymTab) + Out<ELFT>::SymTab->addSymbol(Body); + + if (isOutputDynamic() && includeInDynamicSymtab(*Body)) + Out<ELFT>::DynSymTab->addSymbol(Body); + } + addCommonSymbols(CommonSymbols); + addCopyRelSymbols(CopyRelSymbols); + + // So far we have added sections from input object files. + // This function adds linker-created Out<ELFT>::* sections. + addPredefinedSections(); + + std::stable_sort(OutputSections.begin(), OutputSections.end(), + compareSections<ELFT>); + + for (unsigned I = 0, N = OutputSections.size(); I < N; ++I) { + OutputSections[I]->SectionIndex = I + 1; + HasRelro |= (Config->ZRelro && isRelroSection(OutputSections[I])); + } + + for (OutputSectionBase<ELFT> *Sec : OutputSections) + Out<ELFT>::ShStrTab->add(Sec->getName()); + + // Finalizers fix each section's size. + // .dynamic section's finalizer may add strings to .dynstr, + // so finalize that early. + // Likewise, .dynsym is finalized early since that may fill up .gnu.hash. + Out<ELFT>::Dynamic->finalize(); + if (isOutputDynamic()) + Out<ELFT>::DynSymTab->finalize(); + + // Fill other section headers. + for (OutputSectionBase<ELFT> *Sec : OutputSections) + Sec->finalize(); +} + +// This function add Out<ELFT>::* sections to OutputSections. +template <class ELFT> void Writer<ELFT>::addPredefinedSections() { + auto Add = [&](OutputSectionBase<ELFT> *C) { + if (C) + OutputSections.push_back(C); + }; + + // This order is not the same as the final output order + // because we sort the sections using their attributes below. + Add(Out<ELFT>::SymTab); + Add(Out<ELFT>::ShStrTab); + Add(Out<ELFT>::StrTab); + if (isOutputDynamic()) { + Add(Out<ELFT>::DynSymTab); + Add(Out<ELFT>::GnuHashTab); + Add(Out<ELFT>::HashTab); + Add(Out<ELFT>::Dynamic); + Add(Out<ELFT>::DynStrTab); + if (Out<ELFT>::RelaDyn->hasRelocs()) + Add(Out<ELFT>::RelaDyn); + + // This is a MIPS specific section to hold a space within the data segment + // of executable file which is pointed to by the DT_MIPS_RLD_MAP entry. + // See "Dynamic section" in Chapter 5 in the following document: + // ftp://www.linux-mips.org/pub/linux/mips/doc/ABI/mipsabi.pdf + if (Config->EMachine == EM_MIPS && !Config->Shared) { + Out<ELFT>::MipsRldMap = new OutputSection<ELFT>(".rld_map", SHT_PROGBITS, + SHF_ALLOC | SHF_WRITE); + Out<ELFT>::MipsRldMap->setSize(ELFT::Is64Bits ? 8 : 4); + Out<ELFT>::MipsRldMap->updateAlign(ELFT::Is64Bits ? 8 : 4); + OwningSections.emplace_back(Out<ELFT>::MipsRldMap); + Add(Out<ELFT>::MipsRldMap); + } + } + + // We always need to add rel[a].plt to output if it has entries. + // Even during static linking it can contain R_[*]_IRELATIVE relocations. + if (Out<ELFT>::RelaPlt && Out<ELFT>::RelaPlt->hasRelocs()) { + Add(Out<ELFT>::RelaPlt); + Out<ELFT>::RelaPlt->Static = !isOutputDynamic(); + } + + bool needsGot = !Out<ELFT>::Got->empty(); + // We add the .got section to the result for dynamic MIPS target because + // its address and properties are mentioned in the .dynamic section. + if (Config->EMachine == EM_MIPS) + needsGot |= isOutputDynamic(); + // If we have a relocation that is relative to GOT (such as GOTOFFREL), + // we need to emit a GOT even if it's empty. + if (HasGotOffRel) + needsGot = true; + + if (needsGot) + Add(Out<ELFT>::Got); + if (Out<ELFT>::GotPlt && !Out<ELFT>::GotPlt->empty()) + Add(Out<ELFT>::GotPlt); + if (!Out<ELFT>::Plt->empty()) + Add(Out<ELFT>::Plt); +} + +// The linker is expected to define SECNAME_start and SECNAME_end +// symbols for a few sections. This function defines them. +template <class ELFT> void Writer<ELFT>::addStartEndSymbols() { + auto Define = [&](StringRef Start, StringRef End, + OutputSectionBase<ELFT> *OS) { + if (OS) { + Symtab.addSynthetic(Start, *OS, 0); + Symtab.addSynthetic(End, *OS, OS->getSize()); + } else { + Symtab.addIgnored(Start); + Symtab.addIgnored(End); + } + }; + + Define("__preinit_array_start", "__preinit_array_end", + Out<ELFT>::Dynamic->PreInitArraySec); + Define("__init_array_start", "__init_array_end", + Out<ELFT>::Dynamic->InitArraySec); + Define("__fini_array_start", "__fini_array_end", + Out<ELFT>::Dynamic->FiniArraySec); +} + +static bool isAlpha(char C) { + return ('a' <= C && C <= 'z') || ('A' <= C && C <= 'Z') || C == '_'; +} + +static bool isAlnum(char C) { return isAlpha(C) || ('0' <= C && C <= '9'); } + +// Returns true if S is valid as a C language identifier. +static bool isValidCIdentifier(StringRef S) { + if (S.empty() || !isAlpha(S[0])) + return false; + return std::all_of(S.begin() + 1, S.end(), isAlnum); +} + +// If a section name is valid as a C identifier (which is rare because of +// the leading '.'), linkers are expected to define __start_<secname> and +// __stop_<secname> symbols. They are at beginning and end of the section, +// respectively. This is not requested by the ELF standard, but GNU ld and +// gold provide the feature, and used by many programs. +template <class ELFT> +void Writer<ELFT>::addStartStopSymbols(OutputSectionBase<ELFT> *Sec) { + StringRef S = Sec->getName(); + if (!isValidCIdentifier(S)) + return; + StringSaver Saver(Alloc); + StringRef Start = Saver.save("__start_" + S); + StringRef Stop = Saver.save("__stop_" + S); + if (Symtab.isUndefined(Start)) + Symtab.addSynthetic(Start, *Sec, 0); + if (Symtab.isUndefined(Stop)) + Symtab.addSynthetic(Stop, *Sec, Sec->getSize()); +} + +template <class ELFT> static bool needsPhdr(OutputSectionBase<ELFT> *Sec) { + return Sec->getFlags() & SHF_ALLOC; +} + +static uint32_t toPhdrFlags(uint64_t Flags) { + uint32_t Ret = PF_R; + if (Flags & SHF_WRITE) + Ret |= PF_W; + if (Flags & SHF_EXECINSTR) + Ret |= PF_X; + return Ret; +} + +template <class ELFT> +void Writer<ELFT>::updateRelro(Elf_Phdr *Cur, Elf_Phdr *GnuRelroPhdr, + uintX_t VA) { + if (!GnuRelroPhdr->p_type) + setPhdr(GnuRelroPhdr, PT_GNU_RELRO, PF_R, Cur->p_offset, Cur->p_vaddr, + VA - Cur->p_vaddr, 1 /*p_align*/); + GnuRelroPhdr->p_filesz = VA - Cur->p_vaddr; + GnuRelroPhdr->p_memsz = VA - Cur->p_vaddr; +} + +// Visits all sections to create PHDRs and to assign incremental, +// non-overlapping addresses to output sections. +template <class ELFT> void Writer<ELFT>::assignAddresses() { + uintX_t VA = Target->getVAStart() + sizeof(Elf_Ehdr); + uintX_t FileOff = sizeof(Elf_Ehdr); + + // Calculate and reserve the space for the program header first so that + // the first section can start right after the program header. + Phdrs.resize(getPhdrsNum()); + size_t PhdrSize = sizeof(Elf_Phdr) * Phdrs.size(); + + // The first phdr entry is PT_PHDR which describes the program header itself. + setPhdr(&Phdrs[0], PT_PHDR, PF_R, FileOff, VA, PhdrSize, /*Align=*/8); + FileOff += PhdrSize; + VA += PhdrSize; + + // PT_INTERP must be the second entry if exists. + int PhdrIdx = 0; + Elf_Phdr *Interp = nullptr; + if (needsInterpSection()) + Interp = &Phdrs[++PhdrIdx]; + + // Add the first PT_LOAD segment for regular output sections. + setPhdr(&Phdrs[++PhdrIdx], PT_LOAD, PF_R, 0, Target->getVAStart(), FileOff, + Target->getPageSize()); + + Elf_Phdr GnuRelroPhdr = {}; + Elf_Phdr TlsPhdr{}; + bool RelroAligned = false; + uintX_t ThreadBSSOffset = 0; + // Create phdrs as we assign VAs and file offsets to all output sections. + for (OutputSectionBase<ELFT> *Sec : OutputSections) { + Elf_Phdr *PH = &Phdrs[PhdrIdx]; + if (needsPhdr<ELFT>(Sec)) { + uintX_t Flags = toPhdrFlags(Sec->getFlags()); + bool InRelRo = Config->ZRelro && (Flags & PF_W) && isRelroSection(Sec); + bool FirstNonRelRo = GnuRelroPhdr.p_type && !InRelRo && !RelroAligned; + if (FirstNonRelRo || PH->p_flags != Flags) { + VA = RoundUpToAlignment(VA, Target->getPageSize()); + FileOff = RoundUpToAlignment(FileOff, Target->getPageSize()); + if (FirstNonRelRo) + RelroAligned = true; + } + + if (PH->p_flags != Flags) { + // Flags changed. Create a new PT_LOAD. + PH = &Phdrs[++PhdrIdx]; + setPhdr(PH, PT_LOAD, Flags, FileOff, VA, 0, Target->getPageSize()); + } + + if (Sec->getFlags() & SHF_TLS) { + if (!TlsPhdr.p_vaddr) + setPhdr(&TlsPhdr, PT_TLS, PF_R, FileOff, VA, 0, Sec->getAlign()); + if (Sec->getType() != SHT_NOBITS) + VA = RoundUpToAlignment(VA, Sec->getAlign()); + uintX_t TVA = RoundUpToAlignment(VA + ThreadBSSOffset, Sec->getAlign()); + Sec->setVA(TVA); + TlsPhdr.p_memsz += Sec->getSize(); + if (Sec->getType() == SHT_NOBITS) { + ThreadBSSOffset = TVA - VA + Sec->getSize(); + } else { + TlsPhdr.p_filesz += Sec->getSize(); + VA += Sec->getSize(); + } + TlsPhdr.p_align = std::max<uintX_t>(TlsPhdr.p_align, Sec->getAlign()); + } else { + VA = RoundUpToAlignment(VA, Sec->getAlign()); + Sec->setVA(VA); + VA += Sec->getSize(); + if (InRelRo) + updateRelro(PH, &GnuRelroPhdr, VA); + } + } + + FileOff = RoundUpToAlignment(FileOff, Sec->getAlign()); + Sec->setFileOffset(FileOff); + if (Sec->getType() != SHT_NOBITS) + FileOff += Sec->getSize(); + if (needsPhdr<ELFT>(Sec)) { + PH->p_filesz = FileOff - PH->p_offset; + PH->p_memsz = VA - PH->p_vaddr; + } + } + + if (TlsPhdr.p_vaddr) { + // The TLS pointer goes after PT_TLS. At least glibc will align it, + // so round up the size to make sure the offsets are correct. + TlsPhdr.p_memsz = RoundUpToAlignment(TlsPhdr.p_memsz, TlsPhdr.p_align); + Phdrs[++PhdrIdx] = TlsPhdr; + Out<ELFT>::TlsPhdr = &Phdrs[PhdrIdx]; + } + + // Add an entry for .dynamic. + if (isOutputDynamic()) { + Elf_Phdr *PH = &Phdrs[++PhdrIdx]; + PH->p_type = PT_DYNAMIC; + copyPhdr(PH, Out<ELFT>::Dynamic); + } + + if (HasRelro) { + Elf_Phdr *PH = &Phdrs[++PhdrIdx]; + *PH = GnuRelroPhdr; + } + + // PT_GNU_STACK is a special section to tell the loader to make the + // pages for the stack non-executable. + if (!Config->ZExecStack) { + Elf_Phdr *PH = &Phdrs[++PhdrIdx]; + PH->p_type = PT_GNU_STACK; + PH->p_flags = PF_R | PF_W; + } + + // Fix up PT_INTERP as we now know the address of .interp section. + if (Interp) { + Interp->p_type = PT_INTERP; + copyPhdr(Interp, Out<ELFT>::Interp); + } + + // Add space for section headers. + SectionHeaderOff = RoundUpToAlignment(FileOff, ELFT::Is64Bits ? 8 : 4); + FileSize = SectionHeaderOff + getNumSections() * sizeof(Elf_Shdr); + + // Update "_end" and "end" symbols so that they + // point to the end of the data segment. + ElfSym<ELFT>::End.st_value = VA; +} + +// Returns the number of PHDR entries. +template <class ELFT> int Writer<ELFT>::getPhdrsNum() const { + bool Tls = false; + int I = 2; // 2 for PT_PHDR and first PT_LOAD + if (needsInterpSection()) + ++I; + if (isOutputDynamic()) + ++I; + if (!Config->ZExecStack) + ++I; + uintX_t Last = PF_R; + for (OutputSectionBase<ELFT> *Sec : OutputSections) { + if (!needsPhdr<ELFT>(Sec)) + continue; + if (Sec->getFlags() & SHF_TLS) + Tls = true; + uintX_t Flags = toPhdrFlags(Sec->getFlags()); + if (Last != Flags) { + Last = Flags; + ++I; + } + } + if (Tls) + ++I; + if (HasRelro) + ++I; + return I; +} + +static uint32_t getELFFlags() { + if (Config->EMachine != EM_MIPS) + return 0; + // FIXME: In fact ELF flags depends on ELF flags of input object files + // and selected emulation. For now just use hadr coded values. + uint32_t V = EF_MIPS_ABI_O32 | EF_MIPS_CPIC | EF_MIPS_ARCH_32R2; + if (Config->Shared) + V |= EF_MIPS_PIC; + return V; +} + +template <class ELFT> +static typename ELFFile<ELFT>::uintX_t getEntryAddr() { + if (Config->EntrySym) { + if (SymbolBody *E = Config->EntrySym->repl()) + return getSymVA<ELFT>(*E); + return 0; + } + if (Config->EntryAddr != uint64_t(-1)) + return Config->EntryAddr; + return 0; +} + +// This function is called after we have assigned address and size +// to each section. This function fixes some predefined absolute +// symbol values that depend on section address and size. +template <class ELFT> void Writer<ELFT>::fixAbsoluteSymbols() { + // Update __rel[a]_iplt_{start,end} symbols so that they point + // to beginning or ending of .rela.plt section, respectively. + if (Out<ELFT>::RelaPlt) { + uintX_t Start = Out<ELFT>::RelaPlt->getVA(); + ElfSym<ELFT>::RelaIpltStart.st_value = Start; + ElfSym<ELFT>::RelaIpltEnd.st_value = Start + Out<ELFT>::RelaPlt->getSize(); + } + + // Update MIPS _gp absolute symbol so that it points to the static data. + if (Config->EMachine == EM_MIPS) + ElfSym<ELFT>::MipsGp.st_value = getMipsGpAddr<ELFT>(); +} + +template <class ELFT> void Writer<ELFT>::writeHeader() { + uint8_t *Buf = Buffer->getBufferStart(); + memcpy(Buf, "\177ELF", 4); + + // Write the ELF header. + auto *EHdr = reinterpret_cast<Elf_Ehdr *>(Buf); + EHdr->e_ident[EI_CLASS] = ELFT::Is64Bits ? ELFCLASS64 : ELFCLASS32; + EHdr->e_ident[EI_DATA] = ELFT::TargetEndianness == llvm::support::little + ? ELFDATA2LSB + : ELFDATA2MSB; + EHdr->e_ident[EI_VERSION] = EV_CURRENT; + + auto &FirstObj = cast<ELFFileBase<ELFT>>(*Config->FirstElf); + EHdr->e_ident[EI_OSABI] = FirstObj.getOSABI(); + + EHdr->e_type = Config->Shared ? ET_DYN : ET_EXEC; + EHdr->e_machine = FirstObj.getEMachine(); + EHdr->e_version = EV_CURRENT; + EHdr->e_entry = getEntryAddr<ELFT>(); + EHdr->e_phoff = sizeof(Elf_Ehdr); + EHdr->e_shoff = SectionHeaderOff; + EHdr->e_flags = getELFFlags(); + EHdr->e_ehsize = sizeof(Elf_Ehdr); + EHdr->e_phentsize = sizeof(Elf_Phdr); + EHdr->e_phnum = Phdrs.size(); + EHdr->e_shentsize = sizeof(Elf_Shdr); + EHdr->e_shnum = getNumSections(); + EHdr->e_shstrndx = Out<ELFT>::ShStrTab->SectionIndex; + + // Write the program header table. + memcpy(Buf + EHdr->e_phoff, &Phdrs[0], Phdrs.size() * sizeof(Phdrs[0])); + + // Write the section header table. Note that the first table entry is null. + auto SHdrs = reinterpret_cast<Elf_Shdr *>(Buf + EHdr->e_shoff); + for (OutputSectionBase<ELFT> *Sec : OutputSections) + Sec->writeHeaderTo(++SHdrs); +} + +template <class ELFT> void Writer<ELFT>::openFile(StringRef Path) { + ErrorOr<std::unique_ptr<FileOutputBuffer>> BufferOrErr = + FileOutputBuffer::create(Path, FileSize, FileOutputBuffer::F_executable); + error(BufferOrErr, "failed to open " + Path); + Buffer = std::move(*BufferOrErr); +} + +// Write section contents to a mmap'ed file. +template <class ELFT> void Writer<ELFT>::writeSections() { + uint8_t *Buf = Buffer->getBufferStart(); + + // PPC64 needs to process relocations in the .opd section before processing + // relocations in code-containing sections. + if (OutputSectionBase<ELFT> *Sec = Out<ELFT>::Opd) { + Out<ELFT>::OpdBuf = Buf + Sec->getFileOff(); + Sec->writeTo(Buf + Sec->getFileOff()); + } + + for (OutputSectionBase<ELFT> *Sec : OutputSections) + if (Sec != Out<ELFT>::Opd) + Sec->writeTo(Buf + Sec->getFileOff()); +} + +template <class ELFT> +void Writer<ELFT>::setPhdr(Elf_Phdr *PH, uint32_t Type, uint32_t Flags, + uintX_t FileOff, uintX_t VA, uintX_t Size, + uintX_t Align) { + PH->p_type = Type; + PH->p_flags = Flags; + PH->p_offset = FileOff; + PH->p_vaddr = VA; + PH->p_paddr = VA; + PH->p_filesz = Size; + PH->p_memsz = Size; + PH->p_align = Align; +} + +template <class ELFT> +void Writer<ELFT>::copyPhdr(Elf_Phdr *PH, OutputSectionBase<ELFT> *From) { + PH->p_flags = toPhdrFlags(From->getFlags()); + PH->p_offset = From->getFileOff(); + PH->p_vaddr = From->getVA(); + PH->p_paddr = From->getVA(); + PH->p_filesz = From->getSize(); + PH->p_memsz = From->getSize(); + PH->p_align = From->getAlign(); +} + +template <class ELFT> void Writer<ELFT>::buildSectionMap() { + for (const std::pair<StringRef, std::vector<StringRef>> &OutSec : + Config->OutputSections) + for (StringRef Name : OutSec.second) + InputToOutputSection[Name] = OutSec.first; +} + +template void lld::elf2::writeResult<ELF32LE>(SymbolTable<ELF32LE> *Symtab); +template void lld::elf2::writeResult<ELF32BE>(SymbolTable<ELF32BE> *Symtab); +template void lld::elf2::writeResult<ELF64LE>(SymbolTable<ELF64LE> *Symtab); +template void lld::elf2::writeResult<ELF64BE>(SymbolTable<ELF64BE> *Symtab); diff --git a/ELF/Writer.h b/ELF/Writer.h new file mode 100644 index 000000000000..40a1711e2bd2 --- /dev/null +++ b/ELF/Writer.h @@ -0,0 +1,24 @@ +//===- Writer.h -------------------------------------------------*- C++ -*-===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_ELF_WRITER_H +#define LLD_ELF_WRITER_H + +namespace lld { +namespace elf2 { + +template <class ELFT> class SymbolTable; + +template <class ELFT> void writeResult(SymbolTable<ELFT> *Symtab); + +template <class ELFT> void markLive(SymbolTable<ELFT> *Symtab); +} +} + +#endif |