aboutsummaryrefslogtreecommitdiffstats
path: root/ELF/Symbols.cpp
blob: c0cba21cfe8d7cefe0df4f0baafe64f813dda67d (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
//===- Symbols.cpp --------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "Symbols.h"
#include "InputFiles.h"
#include "InputSection.h"
#include "OutputSections.h"
#include "SyntheticSections.h"
#include "Target.h"
#include "Writer.h"
#include "lld/Common/ErrorHandler.h"
#include "lld/Common/Strings.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/Support/Path.h"
#include <cstring>

using namespace llvm;
using namespace llvm::object;
using namespace llvm::ELF;

namespace lld {
// Returns a symbol for an error message.
static std::string demangle(StringRef symName) {
  if (elf::config->demangle)
    return demangleItanium(symName);
  return symName;
}

std::string toString(const elf::Symbol &b) { return demangle(b.getName()); }
std::string toELFString(const Archive::Symbol &b) {
  return demangle(b.getName());
}

namespace elf {
Defined *ElfSym::bss;
Defined *ElfSym::etext1;
Defined *ElfSym::etext2;
Defined *ElfSym::edata1;
Defined *ElfSym::edata2;
Defined *ElfSym::end1;
Defined *ElfSym::end2;
Defined *ElfSym::globalOffsetTable;
Defined *ElfSym::mipsGp;
Defined *ElfSym::mipsGpDisp;
Defined *ElfSym::mipsLocalGp;
Defined *ElfSym::relaIpltStart;
Defined *ElfSym::relaIpltEnd;
Defined *ElfSym::riscvGlobalPointer;
Defined *ElfSym::tlsModuleBase;

static uint64_t getSymVA(const Symbol &sym, int64_t &addend) {
  switch (sym.kind()) {
  case Symbol::DefinedKind: {
    auto &d = cast<Defined>(sym);
    SectionBase *isec = d.section;

    // This is an absolute symbol.
    if (!isec)
      return d.value;

    assert(isec != &InputSection::discarded);
    isec = isec->repl;

    uint64_t offset = d.value;

    // An object in an SHF_MERGE section might be referenced via a
    // section symbol (as a hack for reducing the number of local
    // symbols).
    // Depending on the addend, the reference via a section symbol
    // refers to a different object in the merge section.
    // Since the objects in the merge section are not necessarily
    // contiguous in the output, the addend can thus affect the final
    // VA in a non-linear way.
    // To make this work, we incorporate the addend into the section
    // offset (and zero out the addend for later processing) so that
    // we find the right object in the section.
    if (d.isSection()) {
      offset += addend;
      addend = 0;
    }

    // In the typical case, this is actually very simple and boils
    // down to adding together 3 numbers:
    // 1. The address of the output section.
    // 2. The offset of the input section within the output section.
    // 3. The offset within the input section (this addition happens
    //    inside InputSection::getOffset).
    //
    // If you understand the data structures involved with this next
    // line (and how they get built), then you have a pretty good
    // understanding of the linker.
    uint64_t va = isec->getVA(offset);

    // MIPS relocatable files can mix regular and microMIPS code.
    // Linker needs to distinguish such code. To do so microMIPS
    // symbols has the `STO_MIPS_MICROMIPS` flag in the `st_other`
    // field. Unfortunately, the `MIPS::relocateOne()` method has
    // a symbol value only. To pass type of the symbol (regular/microMIPS)
    // to that routine as well as other places where we write
    // a symbol value as-is (.dynamic section, `Elf_Ehdr::e_entry`
    // field etc) do the same trick as compiler uses to mark microMIPS
    // for CPU - set the less-significant bit.
    if (config->emachine == EM_MIPS && isMicroMips() &&
        ((sym.stOther & STO_MIPS_MICROMIPS) || sym.needsPltAddr))
      va |= 1;

    if (d.isTls() && !config->relocatable) {
      // Use the address of the TLS segment's first section rather than the
      // segment's address, because segment addresses aren't initialized until
      // after sections are finalized. (e.g. Measuring the size of .rela.dyn
      // for Android relocation packing requires knowing TLS symbol addresses
      // during section finalization.)
      if (!Out::tlsPhdr || !Out::tlsPhdr->firstSec)
        fatal(toString(d.file) +
              " has an STT_TLS symbol but doesn't have an SHF_TLS section");
      return va - Out::tlsPhdr->firstSec->addr;
    }
    return va;
  }
  case Symbol::SharedKind:
  case Symbol::UndefinedKind:
    return 0;
  case Symbol::LazyArchiveKind:
  case Symbol::LazyObjectKind:
    assert(sym.isUsedInRegularObj && "lazy symbol reached writer");
    return 0;
  case Symbol::CommonKind:
    llvm_unreachable("common symbol reached writer");
  case Symbol::PlaceholderKind:
    llvm_unreachable("placeholder symbol reached writer");
  }
  llvm_unreachable("invalid symbol kind");
}

uint64_t Symbol::getVA(int64_t addend) const {
  uint64_t outVA = getSymVA(*this, addend);
  return outVA + addend;
}

uint64_t Symbol::getGotVA() const {
  if (gotInIgot)
    return in.igotPlt->getVA() + getGotPltOffset();
  return in.got->getVA() + getGotOffset();
}

uint64_t Symbol::getGotOffset() const { return gotIndex * config->wordsize; }

uint64_t Symbol::getGotPltVA() const {
  if (isInIplt)
    return in.igotPlt->getVA() + getGotPltOffset();
  return in.gotPlt->getVA() + getGotPltOffset();
}

uint64_t Symbol::getGotPltOffset() const {
  if (isInIplt)
    return pltIndex * config->wordsize;
  return (pltIndex + target->gotPltHeaderEntriesNum) * config->wordsize;
}

uint64_t Symbol::getPPC64LongBranchOffset() const {
  assert(ppc64BranchltIndex != 0xffff);
  return ppc64BranchltIndex * config->wordsize;
}

uint64_t Symbol::getPltVA() const {
  PltSection *plt = isInIplt ? in.iplt : in.plt;
  uint64_t outVA =
      plt->getVA() + plt->headerSize + pltIndex * target->pltEntrySize;
  // While linking microMIPS code PLT code are always microMIPS
  // code. Set the less-significant bit to track that fact.
  // See detailed comment in the `getSymVA` function.
  if (config->emachine == EM_MIPS && isMicroMips())
    outVA |= 1;
  return outVA;
}

uint64_t Symbol::getPPC64LongBranchTableVA() const {
  assert(ppc64BranchltIndex != 0xffff);
  return in.ppc64LongBranchTarget->getVA() +
         ppc64BranchltIndex * config->wordsize;
}

uint64_t Symbol::getSize() const {
  if (const auto *dr = dyn_cast<Defined>(this))
    return dr->size;
  return cast<SharedSymbol>(this)->size;
}

OutputSection *Symbol::getOutputSection() const {
  if (auto *s = dyn_cast<Defined>(this)) {
    if (auto *sec = s->section)
      return sec->repl->getOutputSection();
    return nullptr;
  }
  return nullptr;
}

// If a symbol name contains '@', the characters after that is
// a symbol version name. This function parses that.
void Symbol::parseSymbolVersion() {
  StringRef s = getName();
  size_t pos = s.find('@');
  if (pos == 0 || pos == StringRef::npos)
    return;
  StringRef verstr = s.substr(pos + 1);
  if (verstr.empty())
    return;

  // Truncate the symbol name so that it doesn't include the version string.
  nameSize = pos;

  // If this is not in this DSO, it is not a definition.
  if (!isDefined())
    return;

  // '@@' in a symbol name means the default version.
  // It is usually the most recent one.
  bool isDefault = (verstr[0] == '@');
  if (isDefault)
    verstr = verstr.substr(1);

  for (const VersionDefinition &ver : namedVersionDefs()) {
    if (ver.name != verstr)
      continue;

    if (isDefault)
      versionId = ver.id;
    else
      versionId = ver.id | VERSYM_HIDDEN;
    return;
  }

  // It is an error if the specified version is not defined.
  // Usually version script is not provided when linking executable,
  // but we may still want to override a versioned symbol from DSO,
  // so we do not report error in this case. We also do not error
  // if the symbol has a local version as it won't be in the dynamic
  // symbol table.
  if (config->shared && versionId != VER_NDX_LOCAL)
    error(toString(file) + ": symbol " + s + " has undefined version " +
          verstr);
}

void Symbol::fetch() const {
  if (auto *sym = dyn_cast<LazyArchive>(this)) {
    cast<ArchiveFile>(sym->file)->fetch(sym->sym);
    return;
  }

  if (auto *sym = dyn_cast<LazyObject>(this)) {
    dyn_cast<LazyObjFile>(sym->file)->fetch();
    return;
  }

  llvm_unreachable("Symbol::fetch() is called on a non-lazy symbol");
}

MemoryBufferRef LazyArchive::getMemberBuffer() {
  Archive::Child c =
      CHECK(sym.getMember(),
            "could not get the member for symbol " + toELFString(sym));

  return CHECK(c.getMemoryBufferRef(),
               "could not get the buffer for the member defining symbol " +
                   toELFString(sym));
}

uint8_t Symbol::computeBinding() const {
  if (config->relocatable)
    return binding;
  if ((visibility != STV_DEFAULT && visibility != STV_PROTECTED) ||
      versionId == VER_NDX_LOCAL)
    return STB_LOCAL;
  if (!config->gnuUnique && binding == STB_GNU_UNIQUE)
    return STB_GLOBAL;
  return binding;
}

bool Symbol::includeInDynsym() const {
  if (!config->hasDynSymTab)
    return false;
  if (computeBinding() == STB_LOCAL)
    return false;

  // If a PIE binary was not linked against any shared libraries, then we can
  // safely drop weak undef symbols from .dynsym.
  if (isUndefWeak() && config->pie && sharedFiles.empty())
    return false;

  return isUndefined() || isShared() || exportDynamic || inDynamicList;
}

// Print out a log message for --trace-symbol.
void printTraceSymbol(const Symbol *sym) {
  std::string s;
  if (sym->isUndefined())
    s = ": reference to ";
  else if (sym->isLazy())
    s = ": lazy definition of ";
  else if (sym->isShared())
    s = ": shared definition of ";
  else if (sym->isCommon())
    s = ": common definition of ";
  else
    s = ": definition of ";

  message(toString(sym->file) + s + sym->getName());
}

void maybeWarnUnorderableSymbol(const Symbol *sym) {
  if (!config->warnSymbolOrdering)
    return;

  // If UnresolvedPolicy::Ignore is used, no "undefined symbol" error/warning
  // is emitted. It makes sense to not warn on undefined symbols.
  //
  // Note, ld.bfd --symbol-ordering-file= does not warn on undefined symbols,
  // but we don't have to be compatible here.
  if (sym->isUndefined() &&
      config->unresolvedSymbols == UnresolvedPolicy::Ignore)
    return;

  const InputFile *file = sym->file;
  auto *d = dyn_cast<Defined>(sym);

  auto report = [&](StringRef s) { warn(toString(file) + s + sym->getName()); };

  if (sym->isUndefined())
    report(": unable to order undefined symbol: ");
  else if (sym->isShared())
    report(": unable to order shared symbol: ");
  else if (d && !d->section)
    report(": unable to order absolute symbol: ");
  else if (d && isa<OutputSection>(d->section))
    report(": unable to order synthetic symbol: ");
  else if (d && !d->section->repl->isLive())
    report(": unable to order discarded symbol: ");
}

static uint8_t getMinVisibility(uint8_t va, uint8_t vb) {
  if (va == STV_DEFAULT)
    return vb;
  if (vb == STV_DEFAULT)
    return va;
  return std::min(va, vb);
}

// Merge symbol properties.
//
// When we have many symbols of the same name, we choose one of them,
// and that's the result of symbol resolution. However, symbols that
// were not chosen still affect some symbol properties.
void Symbol::mergeProperties(const Symbol &other) {
  if (other.exportDynamic)
    exportDynamic = true;
  if (other.isUsedInRegularObj)
    isUsedInRegularObj = true;

  // DSO symbols do not affect visibility in the output.
  if (!other.isShared())
    visibility = getMinVisibility(visibility, other.visibility);
}

void Symbol::resolve(const Symbol &other) {
  mergeProperties(other);

  if (isPlaceholder()) {
    replace(other);
    return;
  }

  switch (other.kind()) {
  case Symbol::UndefinedKind:
    resolveUndefined(cast<Undefined>(other));
    break;
  case Symbol::CommonKind:
    resolveCommon(cast<CommonSymbol>(other));
    break;
  case Symbol::DefinedKind:
    resolveDefined(cast<Defined>(other));
    break;
  case Symbol::LazyArchiveKind:
    resolveLazy(cast<LazyArchive>(other));
    break;
  case Symbol::LazyObjectKind:
    resolveLazy(cast<LazyObject>(other));
    break;
  case Symbol::SharedKind:
    resolveShared(cast<SharedSymbol>(other));
    break;
  case Symbol::PlaceholderKind:
    llvm_unreachable("bad symbol kind");
  }
}

void Symbol::resolveUndefined(const Undefined &other) {
  // An undefined symbol with non default visibility must be satisfied
  // in the same DSO.
  //
  // If this is a non-weak defined symbol in a discarded section, override the
  // existing undefined symbol for better error message later.
  if ((isShared() && other.visibility != STV_DEFAULT) ||
      (isUndefined() && other.binding != STB_WEAK && other.discardedSecIdx)) {
    replace(other);
    return;
  }

  if (traced)
    printTraceSymbol(&other);

  if (isLazy()) {
    // An undefined weak will not fetch archive members. See comment on Lazy in
    // Symbols.h for the details.
    if (other.binding == STB_WEAK) {
      binding = STB_WEAK;
      type = other.type;
      return;
    }

    // Do extra check for --warn-backrefs.
    //
    // --warn-backrefs is an option to prevent an undefined reference from
    // fetching an archive member written earlier in the command line. It can be
    // used to keep compatibility with GNU linkers to some degree.
    // I'll explain the feature and why you may find it useful in this comment.
    //
    // lld's symbol resolution semantics is more relaxed than traditional Unix
    // linkers. For example,
    //
    //   ld.lld foo.a bar.o
    //
    // succeeds even if bar.o contains an undefined symbol that has to be
    // resolved by some object file in foo.a. Traditional Unix linkers don't
    // allow this kind of backward reference, as they visit each file only once
    // from left to right in the command line while resolving all undefined
    // symbols at the moment of visiting.
    //
    // In the above case, since there's no undefined symbol when a linker visits
    // foo.a, no files are pulled out from foo.a, and because the linker forgets
    // about foo.a after visiting, it can't resolve undefined symbols in bar.o
    // that could have been resolved otherwise.
    //
    // That lld accepts more relaxed form means that (besides it'd make more
    // sense) you can accidentally write a command line or a build file that
    // works only with lld, even if you have a plan to distribute it to wider
    // users who may be using GNU linkers. With --warn-backrefs, you can detect
    // a library order that doesn't work with other Unix linkers.
    //
    // The option is also useful to detect cyclic dependencies between static
    // archives. Again, lld accepts
    //
    //   ld.lld foo.a bar.a
    //
    // even if foo.a and bar.a depend on each other. With --warn-backrefs, it is
    // handled as an error.
    //
    // Here is how the option works. We assign a group ID to each file. A file
    // with a smaller group ID can pull out object files from an archive file
    // with an equal or greater group ID. Otherwise, it is a reverse dependency
    // and an error.
    //
    // A file outside --{start,end}-group gets a fresh ID when instantiated. All
    // files within the same --{start,end}-group get the same group ID. E.g.
    //
    //   ld.lld A B --start-group C D --end-group E
    //
    // A forms group 0. B form group 1. C and D (including their member object
    // files) form group 2. E forms group 3. I think that you can see how this
    // group assignment rule simulates the traditional linker's semantics.
    bool backref = config->warnBackrefs && other.file &&
                   file->groupId < other.file->groupId;
    fetch();

    // We don't report backward references to weak symbols as they can be
    // overridden later.
    if (backref && !isWeak())
      warn("backward reference detected: " + other.getName() + " in " +
           toString(other.file) + " refers to " + toString(file));
    return;
  }

  // Undefined symbols in a SharedFile do not change the binding.
  if (dyn_cast_or_null<SharedFile>(other.file))
    return;

  if (isUndefined() || isShared()) {
    // The binding will be weak if there is at least one reference and all are
    // weak. The binding has one opportunity to change to weak: if the first
    // reference is weak.
    if (other.binding != STB_WEAK || !referenced)
      binding = other.binding;
    referenced = true;
  }
}

// Using .symver foo,foo@@VER unfortunately creates two symbols: foo and
// foo@@VER. We want to effectively ignore foo, so give precedence to
// foo@@VER.
// FIXME: If users can transition to using
// .symver foo,foo@@@VER
// we can delete this hack.
static int compareVersion(StringRef a, StringRef b) {
  bool x = a.contains("@@");
  bool y = b.contains("@@");
  if (!x && y)
    return 1;
  if (x && !y)
    return -1;
  return 0;
}

// Compare two symbols. Return 1 if the new symbol should win, -1 if
// the new symbol should lose, or 0 if there is a conflict.
int Symbol::compare(const Symbol *other) const {
  assert(other->isDefined() || other->isCommon());

  if (!isDefined() && !isCommon())
    return 1;

  if (int cmp = compareVersion(getName(), other->getName()))
    return cmp;

  if (other->isWeak())
    return -1;

  if (isWeak())
    return 1;

  if (isCommon() && other->isCommon()) {
    if (config->warnCommon)
      warn("multiple common of " + getName());
    return 0;
  }

  if (isCommon()) {
    if (config->warnCommon)
      warn("common " + getName() + " is overridden");
    return 1;
  }

  if (other->isCommon()) {
    if (config->warnCommon)
      warn("common " + getName() + " is overridden");
    return -1;
  }

  auto *oldSym = cast<Defined>(this);
  auto *newSym = cast<Defined>(other);

  if (dyn_cast_or_null<BitcodeFile>(other->file))
    return 0;

  if (!oldSym->section && !newSym->section && oldSym->value == newSym->value &&
      newSym->binding == STB_GLOBAL)
    return -1;

  return 0;
}

static void reportDuplicate(Symbol *sym, InputFile *newFile,
                            InputSectionBase *errSec, uint64_t errOffset) {
  if (config->allowMultipleDefinition)
    return;

  Defined *d = cast<Defined>(sym);
  if (!d->section || !errSec) {
    error("duplicate symbol: " + toString(*sym) + "\n>>> defined in " +
          toString(sym->file) + "\n>>> defined in " + toString(newFile));
    return;
  }

  // Construct and print an error message in the form of:
  //
  //   ld.lld: error: duplicate symbol: foo
  //   >>> defined at bar.c:30
  //   >>>            bar.o (/home/alice/src/bar.o)
  //   >>> defined at baz.c:563
  //   >>>            baz.o in archive libbaz.a
  auto *sec1 = cast<InputSectionBase>(d->section);
  std::string src1 = sec1->getSrcMsg(*sym, d->value);
  std::string obj1 = sec1->getObjMsg(d->value);
  std::string src2 = errSec->getSrcMsg(*sym, errOffset);
  std::string obj2 = errSec->getObjMsg(errOffset);

  std::string msg = "duplicate symbol: " + toString(*sym) + "\n>>> defined at ";
  if (!src1.empty())
    msg += src1 + "\n>>>            ";
  msg += obj1 + "\n>>> defined at ";
  if (!src2.empty())
    msg += src2 + "\n>>>            ";
  msg += obj2;
  error(msg);
}

void Symbol::resolveCommon(const CommonSymbol &other) {
  int cmp = compare(&other);
  if (cmp < 0)
    return;

  if (cmp > 0) {
    replace(other);
    return;
  }

  CommonSymbol *oldSym = cast<CommonSymbol>(this);

  oldSym->alignment = std::max(oldSym->alignment, other.alignment);
  if (oldSym->size < other.size) {
    oldSym->file = other.file;
    oldSym->size = other.size;
  }
}

void Symbol::resolveDefined(const Defined &other) {
  int cmp = compare(&other);
  if (cmp > 0)
    replace(other);
  else if (cmp == 0)
    reportDuplicate(this, other.file,
                    dyn_cast_or_null<InputSectionBase>(other.section),
                    other.value);
}

template <class LazyT> void Symbol::resolveLazy(const LazyT &other) {
  if (!isUndefined())
    return;

  // An undefined weak will not fetch archive members. See comment on Lazy in
  // Symbols.h for the details.
  if (isWeak()) {
    uint8_t ty = type;
    replace(other);
    type = ty;
    binding = STB_WEAK;
    return;
  }

  other.fetch();
}

void Symbol::resolveShared(const SharedSymbol &other) {
  if (visibility == STV_DEFAULT && (isUndefined() || isLazy())) {
    // An undefined symbol with non default visibility must be satisfied
    // in the same DSO.
    uint8_t bind = binding;
    replace(other);
    binding = bind;
    referenced = true;
  }
}

} // namespace elf
} // namespace lld