| File: | src/gnu/usr.bin/clang/liblldELF/../../../llvm/llvm/include/llvm/Object/IRSymtab.h |
| Warning: | line 328, column 20 Called C++ object pointer is null |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
| 1 | //===- InputFiles.cpp -----------------------------------------------------===// | |||
| 2 | // | |||
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | |||
| 4 | // See https://llvm.org/LICENSE.txt for license information. | |||
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | |||
| 6 | // | |||
| 7 | //===----------------------------------------------------------------------===// | |||
| 8 | ||||
| 9 | #include "InputFiles.h" | |||
| 10 | #include "Driver.h" | |||
| 11 | #include "InputSection.h" | |||
| 12 | #include "LinkerScript.h" | |||
| 13 | #include "SymbolTable.h" | |||
| 14 | #include "Symbols.h" | |||
| 15 | #include "SyntheticSections.h" | |||
| 16 | #include "lld/Common/DWARF.h" | |||
| 17 | #include "lld/Common/ErrorHandler.h" | |||
| 18 | #include "lld/Common/Memory.h" | |||
| 19 | #include "llvm/ADT/STLExtras.h" | |||
| 20 | #include "llvm/CodeGen/Analysis.h" | |||
| 21 | #include "llvm/IR/LLVMContext.h" | |||
| 22 | #include "llvm/IR/Module.h" | |||
| 23 | #include "llvm/LTO/LTO.h" | |||
| 24 | #include "llvm/MC/StringTableBuilder.h" | |||
| 25 | #include "llvm/Object/ELFObjectFile.h" | |||
| 26 | #include "llvm/Support/ARMAttributeParser.h" | |||
| 27 | #include "llvm/Support/ARMBuildAttributes.h" | |||
| 28 | #include "llvm/Support/Endian.h" | |||
| 29 | #include "llvm/Support/Path.h" | |||
| 30 | #include "llvm/Support/RISCVAttributeParser.h" | |||
| 31 | #include "llvm/Support/TarWriter.h" | |||
| 32 | #include "llvm/Support/raw_ostream.h" | |||
| 33 | ||||
| 34 | using namespace llvm; | |||
| 35 | using namespace llvm::ELF; | |||
| 36 | using namespace llvm::object; | |||
| 37 | using namespace llvm::sys; | |||
| 38 | using namespace llvm::sys::fs; | |||
| 39 | using namespace llvm::support::endian; | |||
| 40 | using namespace lld; | |||
| 41 | using namespace lld::elf; | |||
| 42 | ||||
| 43 | bool InputFile::isInGroup; | |||
| 44 | uint32_t InputFile::nextGroupId; | |||
| 45 | ||||
| 46 | std::vector<ArchiveFile *> elf::archiveFiles; | |||
| 47 | std::vector<BinaryFile *> elf::binaryFiles; | |||
| 48 | std::vector<BitcodeFile *> elf::bitcodeFiles; | |||
| 49 | std::vector<LazyObjFile *> elf::lazyObjFiles; | |||
| 50 | std::vector<InputFile *> elf::objectFiles; | |||
| 51 | std::vector<SharedFile *> elf::sharedFiles; | |||
| 52 | ||||
| 53 | std::unique_ptr<TarWriter> elf::tar; | |||
| 54 | ||||
| 55 | // Returns "<internal>", "foo.a(bar.o)" or "baz.o". | |||
| 56 | std::string lld::toString(const InputFile *f) { | |||
| 57 | if (!f) | |||
| 58 | return "<internal>"; | |||
| 59 | ||||
| 60 | if (f->toStringCache.empty()) { | |||
| 61 | if (f->archiveName.empty()) | |||
| 62 | f->toStringCache = std::string(f->getName()); | |||
| 63 | else | |||
| 64 | f->toStringCache = (f->archiveName + "(" + f->getName() + ")").str(); | |||
| 65 | } | |||
| 66 | return f->toStringCache; | |||
| 67 | } | |||
| 68 | ||||
| 69 | static ELFKind getELFKind(MemoryBufferRef mb, StringRef archiveName) { | |||
| 70 | unsigned char size; | |||
| 71 | unsigned char endian; | |||
| 72 | std::tie(size, endian) = getElfArchType(mb.getBuffer()); | |||
| 73 | ||||
| 74 | auto report = [&](StringRef msg) { | |||
| 75 | StringRef filename = mb.getBufferIdentifier(); | |||
| 76 | if (archiveName.empty()) | |||
| 77 | fatal(filename + ": " + msg); | |||
| 78 | else | |||
| 79 | fatal(archiveName + "(" + filename + "): " + msg); | |||
| 80 | }; | |||
| 81 | ||||
| 82 | if (!mb.getBuffer().startswith(ElfMagic)) | |||
| 83 | report("not an ELF file"); | |||
| 84 | if (endian != ELFDATA2LSB && endian != ELFDATA2MSB) | |||
| 85 | report("corrupted ELF file: invalid data encoding"); | |||
| 86 | if (size != ELFCLASS32 && size != ELFCLASS64) | |||
| 87 | report("corrupted ELF file: invalid file class"); | |||
| 88 | ||||
| 89 | size_t bufSize = mb.getBuffer().size(); | |||
| 90 | if ((size == ELFCLASS32 && bufSize < sizeof(Elf32_Ehdr)) || | |||
| 91 | (size == ELFCLASS64 && bufSize < sizeof(Elf64_Ehdr))) | |||
| 92 | report("corrupted ELF file: file is too short"); | |||
| 93 | ||||
| 94 | if (size == ELFCLASS32) | |||
| 95 | return (endian == ELFDATA2LSB) ? ELF32LEKind : ELF32BEKind; | |||
| 96 | return (endian == ELFDATA2LSB) ? ELF64LEKind : ELF64BEKind; | |||
| 97 | } | |||
| 98 | ||||
| 99 | InputFile::InputFile(Kind k, MemoryBufferRef m) | |||
| 100 | : mb(m), groupId(nextGroupId), fileKind(k) { | |||
| 101 | // All files within the same --{start,end}-group get the same group ID. | |||
| 102 | // Otherwise, a new file will get a new group ID. | |||
| 103 | if (!isInGroup) | |||
| 104 | ++nextGroupId; | |||
| 105 | } | |||
| 106 | ||||
| 107 | Optional<MemoryBufferRef> elf::readFile(StringRef path) { | |||
| 108 | llvm::TimeTraceScope timeScope("Load input files", path); | |||
| 109 | ||||
| 110 | // The --chroot option changes our virtual root directory. | |||
| 111 | // This is useful when you are dealing with files created by --reproduce. | |||
| 112 | if (!config->chroot.empty() && path.startswith("/")) | |||
| 113 | path = saver.save(config->chroot + path); | |||
| 114 | ||||
| 115 | log(path); | |||
| 116 | config->dependencyFiles.insert(llvm::CachedHashString(path)); | |||
| 117 | ||||
| 118 | auto mbOrErr = MemoryBuffer::getFile(path, /*IsText=*/false, | |||
| 119 | /*RequiresNullTerminator=*/false); | |||
| 120 | if (auto ec = mbOrErr.getError()) { | |||
| 121 | error("cannot open " + path + ": " + ec.message()); | |||
| 122 | return None; | |||
| 123 | } | |||
| 124 | ||||
| 125 | std::unique_ptr<MemoryBuffer> &mb = *mbOrErr; | |||
| 126 | MemoryBufferRef mbref = mb->getMemBufferRef(); | |||
| 127 | make<std::unique_ptr<MemoryBuffer>>(std::move(mb)); // take MB ownership | |||
| 128 | ||||
| 129 | if (tar) | |||
| 130 | tar->append(relativeToRoot(path), mbref.getBuffer()); | |||
| 131 | return mbref; | |||
| 132 | } | |||
| 133 | ||||
| 134 | // All input object files must be for the same architecture | |||
| 135 | // (e.g. it does not make sense to link x86 object files with | |||
| 136 | // MIPS object files.) This function checks for that error. | |||
| 137 | static bool isCompatible(InputFile *file) { | |||
| 138 | if (!file->isElf() && !isa<BitcodeFile>(file)) | |||
| 139 | return true; | |||
| 140 | ||||
| 141 | if (file->ekind == config->ekind && file->emachine == config->emachine) { | |||
| 142 | if (config->emachine != EM_MIPS) | |||
| 143 | return true; | |||
| 144 | if (isMipsN32Abi(file) == config->mipsN32Abi) | |||
| 145 | return true; | |||
| 146 | } | |||
| 147 | ||||
| 148 | StringRef target = | |||
| 149 | !config->bfdname.empty() ? config->bfdname : config->emulation; | |||
| 150 | if (!target.empty()) { | |||
| 151 | error(toString(file) + " is incompatible with " + target); | |||
| 152 | return false; | |||
| 153 | } | |||
| 154 | ||||
| 155 | InputFile *existing; | |||
| 156 | if (!objectFiles.empty()) | |||
| 157 | existing = objectFiles[0]; | |||
| 158 | else if (!sharedFiles.empty()) | |||
| 159 | existing = sharedFiles[0]; | |||
| 160 | else if (!bitcodeFiles.empty()) | |||
| 161 | existing = bitcodeFiles[0]; | |||
| 162 | else | |||
| 163 | llvm_unreachable("Must have -m, OUTPUT_FORMAT or existing input file to "__builtin_unreachable() | |||
| 164 | "determine target emulation")__builtin_unreachable(); | |||
| 165 | ||||
| 166 | error(toString(file) + " is incompatible with " + toString(existing)); | |||
| 167 | return false; | |||
| 168 | } | |||
| 169 | ||||
| 170 | template <class ELFT> static void doParseFile(InputFile *file) { | |||
| 171 | if (!isCompatible(file)) | |||
| 172 | return; | |||
| 173 | ||||
| 174 | // Binary file | |||
| 175 | if (auto *f = dyn_cast<BinaryFile>(file)) { | |||
| 176 | binaryFiles.push_back(f); | |||
| 177 | f->parse(); | |||
| 178 | return; | |||
| 179 | } | |||
| 180 | ||||
| 181 | // .a file | |||
| 182 | if (auto *f = dyn_cast<ArchiveFile>(file)) { | |||
| 183 | archiveFiles.push_back(f); | |||
| 184 | f->parse(); | |||
| 185 | return; | |||
| 186 | } | |||
| 187 | ||||
| 188 | // Lazy object file | |||
| 189 | if (auto *f = dyn_cast<LazyObjFile>(file)) { | |||
| 190 | lazyObjFiles.push_back(f); | |||
| 191 | f->parse<ELFT>(); | |||
| 192 | return; | |||
| 193 | } | |||
| 194 | ||||
| 195 | if (config->trace) | |||
| 196 | message(toString(file)); | |||
| 197 | ||||
| 198 | // .so file | |||
| 199 | if (auto *f = dyn_cast<SharedFile>(file)) { | |||
| 200 | f->parse<ELFT>(); | |||
| 201 | return; | |||
| 202 | } | |||
| 203 | ||||
| 204 | // LLVM bitcode file | |||
| 205 | if (auto *f = dyn_cast<BitcodeFile>(file)) { | |||
| 206 | bitcodeFiles.push_back(f); | |||
| 207 | f->parse<ELFT>(); | |||
| 208 | return; | |||
| 209 | } | |||
| 210 | ||||
| 211 | // Regular object file | |||
| 212 | objectFiles.push_back(file); | |||
| 213 | cast<ObjFile<ELFT>>(file)->parse(); | |||
| 214 | } | |||
| 215 | ||||
| 216 | // Add symbols in File to the symbol table. | |||
| 217 | void elf::parseFile(InputFile *file) { | |||
| 218 | switch (config->ekind) { | |||
| 219 | case ELF32LEKind: | |||
| 220 | doParseFile<ELF32LE>(file); | |||
| 221 | return; | |||
| 222 | case ELF32BEKind: | |||
| 223 | doParseFile<ELF32BE>(file); | |||
| 224 | return; | |||
| 225 | case ELF64LEKind: | |||
| 226 | doParseFile<ELF64LE>(file); | |||
| 227 | return; | |||
| 228 | case ELF64BEKind: | |||
| 229 | doParseFile<ELF64BE>(file); | |||
| 230 | return; | |||
| 231 | default: | |||
| 232 | llvm_unreachable("unknown ELFT")__builtin_unreachable(); | |||
| 233 | } | |||
| 234 | } | |||
| 235 | ||||
| 236 | // Concatenates arguments to construct a string representing an error location. | |||
| 237 | static std::string createFileLineMsg(StringRef path, unsigned line) { | |||
| 238 | std::string filename = std::string(path::filename(path)); | |||
| 239 | std::string lineno = ":" + std::to_string(line); | |||
| 240 | if (filename == path) | |||
| 241 | return filename + lineno; | |||
| 242 | return filename + lineno + " (" + path.str() + lineno + ")"; | |||
| 243 | } | |||
| 244 | ||||
| 245 | template <class ELFT> | |||
| 246 | static std::string getSrcMsgAux(ObjFile<ELFT> &file, const Symbol &sym, | |||
| 247 | InputSectionBase &sec, uint64_t offset) { | |||
| 248 | // In DWARF, functions and variables are stored to different places. | |||
| 249 | // First, lookup a function for a given offset. | |||
| 250 | if (Optional<DILineInfo> info = file.getDILineInfo(&sec, offset)) | |||
| 251 | return createFileLineMsg(info->FileName, info->Line); | |||
| 252 | ||||
| 253 | // If it failed, lookup again as a variable. | |||
| 254 | if (Optional<std::pair<std::string, unsigned>> fileLine = | |||
| 255 | file.getVariableLoc(sym.getName())) | |||
| 256 | return createFileLineMsg(fileLine->first, fileLine->second); | |||
| 257 | ||||
| 258 | // File.sourceFile contains STT_FILE symbol, and that is a last resort. | |||
| 259 | return std::string(file.sourceFile); | |||
| 260 | } | |||
| 261 | ||||
| 262 | std::string InputFile::getSrcMsg(const Symbol &sym, InputSectionBase &sec, | |||
| 263 | uint64_t offset) { | |||
| 264 | if (kind() != ObjKind) | |||
| 265 | return ""; | |||
| 266 | switch (config->ekind) { | |||
| 267 | default: | |||
| 268 | llvm_unreachable("Invalid kind")__builtin_unreachable(); | |||
| 269 | case ELF32LEKind: | |||
| 270 | return getSrcMsgAux(cast<ObjFile<ELF32LE>>(*this), sym, sec, offset); | |||
| 271 | case ELF32BEKind: | |||
| 272 | return getSrcMsgAux(cast<ObjFile<ELF32BE>>(*this), sym, sec, offset); | |||
| 273 | case ELF64LEKind: | |||
| 274 | return getSrcMsgAux(cast<ObjFile<ELF64LE>>(*this), sym, sec, offset); | |||
| 275 | case ELF64BEKind: | |||
| 276 | return getSrcMsgAux(cast<ObjFile<ELF64BE>>(*this), sym, sec, offset); | |||
| 277 | } | |||
| 278 | } | |||
| 279 | ||||
| 280 | StringRef InputFile::getNameForScript() const { | |||
| 281 | if (archiveName.empty()) | |||
| 282 | return getName(); | |||
| 283 | ||||
| 284 | if (nameForScriptCache.empty()) | |||
| 285 | nameForScriptCache = (archiveName + Twine(':') + getName()).str(); | |||
| 286 | ||||
| 287 | return nameForScriptCache; | |||
| 288 | } | |||
| 289 | ||||
| 290 | template <class ELFT> DWARFCache *ObjFile<ELFT>::getDwarf() { | |||
| 291 | llvm::call_once(initDwarf, [this]() { | |||
| 292 | dwarf = std::make_unique<DWARFCache>(std::make_unique<DWARFContext>( | |||
| 293 | std::make_unique<LLDDwarfObj<ELFT>>(this), "", | |||
| 294 | [&](Error err) { warn(getName() + ": " + toString(std::move(err))); }, | |||
| 295 | [&](Error warning) { | |||
| 296 | warn(getName() + ": " + toString(std::move(warning))); | |||
| 297 | })); | |||
| 298 | }); | |||
| 299 | ||||
| 300 | return dwarf.get(); | |||
| 301 | } | |||
| 302 | ||||
| 303 | // Returns the pair of file name and line number describing location of data | |||
| 304 | // object (variable, array, etc) definition. | |||
| 305 | template <class ELFT> | |||
| 306 | Optional<std::pair<std::string, unsigned>> | |||
| 307 | ObjFile<ELFT>::getVariableLoc(StringRef name) { | |||
| 308 | return getDwarf()->getVariableLoc(name); | |||
| 309 | } | |||
| 310 | ||||
| 311 | // Returns source line information for a given offset | |||
| 312 | // using DWARF debug info. | |||
| 313 | template <class ELFT> | |||
| 314 | Optional<DILineInfo> ObjFile<ELFT>::getDILineInfo(InputSectionBase *s, | |||
| 315 | uint64_t offset) { | |||
| 316 | // Detect SectionIndex for specified section. | |||
| 317 | uint64_t sectionIndex = object::SectionedAddress::UndefSection; | |||
| 318 | ArrayRef<InputSectionBase *> sections = s->file->getSections(); | |||
| 319 | for (uint64_t curIndex = 0; curIndex < sections.size(); ++curIndex) { | |||
| 320 | if (s == sections[curIndex]) { | |||
| 321 | sectionIndex = curIndex; | |||
| 322 | break; | |||
| 323 | } | |||
| 324 | } | |||
| 325 | ||||
| 326 | return getDwarf()->getDILineInfo(offset, sectionIndex); | |||
| 327 | } | |||
| 328 | ||||
| 329 | ELFFileBase::ELFFileBase(Kind k, MemoryBufferRef mb) : InputFile(k, mb) { | |||
| 330 | ekind = getELFKind(mb, ""); | |||
| 331 | ||||
| 332 | switch (ekind) { | |||
| 333 | case ELF32LEKind: | |||
| 334 | init<ELF32LE>(); | |||
| 335 | break; | |||
| 336 | case ELF32BEKind: | |||
| 337 | init<ELF32BE>(); | |||
| 338 | break; | |||
| 339 | case ELF64LEKind: | |||
| 340 | init<ELF64LE>(); | |||
| 341 | break; | |||
| 342 | case ELF64BEKind: | |||
| 343 | init<ELF64BE>(); | |||
| 344 | break; | |||
| 345 | default: | |||
| 346 | llvm_unreachable("getELFKind")__builtin_unreachable(); | |||
| 347 | } | |||
| 348 | } | |||
| 349 | ||||
| 350 | template <typename Elf_Shdr> | |||
| 351 | static const Elf_Shdr *findSection(ArrayRef<Elf_Shdr> sections, uint32_t type) { | |||
| 352 | for (const Elf_Shdr &sec : sections) | |||
| 353 | if (sec.sh_type == type) | |||
| 354 | return &sec; | |||
| 355 | return nullptr; | |||
| 356 | } | |||
| 357 | ||||
| 358 | template <class ELFT> void ELFFileBase::init() { | |||
| 359 | using Elf_Shdr = typename ELFT::Shdr; | |||
| 360 | using Elf_Sym = typename ELFT::Sym; | |||
| 361 | ||||
| 362 | // Initialize trivial attributes. | |||
| 363 | const ELFFile<ELFT> &obj = getObj<ELFT>(); | |||
| 364 | emachine = obj.getHeader().e_machine; | |||
| 365 | osabi = obj.getHeader().e_ident[llvm::ELF::EI_OSABI]; | |||
| 366 | abiVersion = obj.getHeader().e_ident[llvm::ELF::EI_ABIVERSION]; | |||
| 367 | ||||
| 368 | ArrayRef<Elf_Shdr> sections = CHECK(obj.sections(), this)check2((obj.sections()), [&] { return toString(this); }); | |||
| 369 | ||||
| 370 | // Find a symbol table. | |||
| 371 | bool isDSO = | |||
| 372 | (identify_magic(mb.getBuffer()) == file_magic::elf_shared_object); | |||
| 373 | const Elf_Shdr *symtabSec = | |||
| 374 | findSection(sections, isDSO ? SHT_DYNSYM : SHT_SYMTAB); | |||
| 375 | ||||
| 376 | if (!symtabSec) | |||
| 377 | return; | |||
| 378 | ||||
| 379 | // Initialize members corresponding to a symbol table. | |||
| 380 | firstGlobal = symtabSec->sh_info; | |||
| 381 | ||||
| 382 | ArrayRef<Elf_Sym> eSyms = CHECK(obj.symbols(symtabSec), this)check2((obj.symbols(symtabSec)), [&] { return toString(this ); }); | |||
| 383 | if (firstGlobal == 0 || firstGlobal > eSyms.size()) | |||
| 384 | fatal(toString(this) + ": invalid sh_info in symbol table"); | |||
| 385 | ||||
| 386 | elfSyms = reinterpret_cast<const void *>(eSyms.data()); | |||
| 387 | numELFSyms = eSyms.size(); | |||
| 388 | stringTable = CHECK(obj.getStringTableForSymtab(*symtabSec, sections), this)check2((obj.getStringTableForSymtab(*symtabSec, sections)), [ &] { return toString(this); }); | |||
| 389 | } | |||
| 390 | ||||
| 391 | template <class ELFT> | |||
| 392 | uint32_t ObjFile<ELFT>::getSectionIndex(const Elf_Sym &sym) const { | |||
| 393 | return CHECK(check2((this->getObj().getSectionIndex(sym, getELFSyms< ELFT>(), shndxTable)), [&] { return toString(this); }) | |||
| 394 | this->getObj().getSectionIndex(sym, getELFSyms<ELFT>(), shndxTable),check2((this->getObj().getSectionIndex(sym, getELFSyms< ELFT>(), shndxTable)), [&] { return toString(this); }) | |||
| 395 | this)check2((this->getObj().getSectionIndex(sym, getELFSyms< ELFT>(), shndxTable)), [&] { return toString(this); }); | |||
| 396 | } | |||
| 397 | ||||
| 398 | template <class ELFT> ArrayRef<Symbol *> ObjFile<ELFT>::getLocalSymbols() { | |||
| 399 | if (this->symbols.empty()) | |||
| 400 | return {}; | |||
| 401 | return makeArrayRef(this->symbols).slice(1, this->firstGlobal - 1); | |||
| 402 | } | |||
| 403 | ||||
| 404 | template <class ELFT> ArrayRef<Symbol *> ObjFile<ELFT>::getGlobalSymbols() { | |||
| 405 | return makeArrayRef(this->symbols).slice(this->firstGlobal); | |||
| 406 | } | |||
| 407 | ||||
| 408 | template <class ELFT> void ObjFile<ELFT>::parse(bool ignoreComdats) { | |||
| 409 | // Read a section table. justSymbols is usually false. | |||
| 410 | if (this->justSymbols) | |||
| 411 | initializeJustSymbols(); | |||
| 412 | else | |||
| 413 | initializeSections(ignoreComdats); | |||
| 414 | ||||
| 415 | // Read a symbol table. | |||
| 416 | initializeSymbols(); | |||
| 417 | } | |||
| 418 | ||||
| 419 | // Sections with SHT_GROUP and comdat bits define comdat section groups. | |||
| 420 | // They are identified and deduplicated by group name. This function | |||
| 421 | // returns a group name. | |||
| 422 | template <class ELFT> | |||
| 423 | StringRef ObjFile<ELFT>::getShtGroupSignature(ArrayRef<Elf_Shdr> sections, | |||
| 424 | const Elf_Shdr &sec) { | |||
| 425 | typename ELFT::SymRange symbols = this->getELFSyms<ELFT>(); | |||
| 426 | if (sec.sh_info >= symbols.size()) | |||
| 427 | fatal(toString(this) + ": invalid symbol index"); | |||
| 428 | const typename ELFT::Sym &sym = symbols[sec.sh_info]; | |||
| 429 | StringRef signature = CHECK(sym.getName(this->stringTable), this)check2((sym.getName(this->stringTable)), [&] { return toString (this); }); | |||
| 430 | ||||
| 431 | // As a special case, if a symbol is a section symbol and has no name, | |||
| 432 | // we use a section name as a signature. | |||
| 433 | // | |||
| 434 | // Such SHT_GROUP sections are invalid from the perspective of the ELF | |||
| 435 | // standard, but GNU gold 1.14 (the newest version as of July 2017) or | |||
| 436 | // older produce such sections as outputs for the -r option, so we need | |||
| 437 | // a bug-compatibility. | |||
| 438 | if (signature.empty() && sym.getType() == STT_SECTION) | |||
| 439 | return getSectionName(sec); | |||
| 440 | return signature; | |||
| 441 | } | |||
| 442 | ||||
| 443 | template <class ELFT> | |||
| 444 | bool ObjFile<ELFT>::shouldMerge(const Elf_Shdr &sec, StringRef name) { | |||
| 445 | if (!(sec.sh_flags & SHF_MERGE)) | |||
| 446 | return false; | |||
| 447 | ||||
| 448 | // On a regular link we don't merge sections if -O0 (default is -O1). This | |||
| 449 | // sometimes makes the linker significantly faster, although the output will | |||
| 450 | // be bigger. | |||
| 451 | // | |||
| 452 | // Doing the same for -r would create a problem as it would combine sections | |||
| 453 | // with different sh_entsize. One option would be to just copy every SHF_MERGE | |||
| 454 | // section as is to the output. While this would produce a valid ELF file with | |||
| 455 | // usable SHF_MERGE sections, tools like (llvm-)?dwarfdump get confused when | |||
| 456 | // they see two .debug_str. We could have separate logic for combining | |||
| 457 | // SHF_MERGE sections based both on their name and sh_entsize, but that seems | |||
| 458 | // to be more trouble than it is worth. Instead, we just use the regular (-O1) | |||
| 459 | // logic for -r. | |||
| 460 | if (config->optimize == 0 && !config->relocatable) | |||
| 461 | return false; | |||
| 462 | ||||
| 463 | // A mergeable section with size 0 is useless because they don't have | |||
| 464 | // any data to merge. A mergeable string section with size 0 can be | |||
| 465 | // argued as invalid because it doesn't end with a null character. | |||
| 466 | // We'll avoid a mess by handling them as if they were non-mergeable. | |||
| 467 | if (sec.sh_size == 0) | |||
| 468 | return false; | |||
| 469 | ||||
| 470 | // Check for sh_entsize. The ELF spec is not clear about the zero | |||
| 471 | // sh_entsize. It says that "the member [sh_entsize] contains 0 if | |||
| 472 | // the section does not hold a table of fixed-size entries". We know | |||
| 473 | // that Rust 1.13 produces a string mergeable section with a zero | |||
| 474 | // sh_entsize. Here we just accept it rather than being picky about it. | |||
| 475 | uint64_t entSize = sec.sh_entsize; | |||
| 476 | if (entSize == 0) | |||
| 477 | return false; | |||
| 478 | if (sec.sh_size % entSize) | |||
| 479 | fatal(toString(this) + ":(" + name + "): SHF_MERGE section size (" + | |||
| 480 | Twine(sec.sh_size) + ") must be a multiple of sh_entsize (" + | |||
| 481 | Twine(entSize) + ")"); | |||
| 482 | ||||
| 483 | if (sec.sh_flags & SHF_WRITE) | |||
| 484 | fatal(toString(this) + ":(" + name + | |||
| 485 | "): writable SHF_MERGE section is not supported"); | |||
| 486 | ||||
| 487 | return true; | |||
| 488 | } | |||
| 489 | ||||
| 490 | // This is for --just-symbols. | |||
| 491 | // | |||
| 492 | // --just-symbols is a very minor feature that allows you to link your | |||
| 493 | // output against other existing program, so that if you load both your | |||
| 494 | // program and the other program into memory, your output can refer the | |||
| 495 | // other program's symbols. | |||
| 496 | // | |||
| 497 | // When the option is given, we link "just symbols". The section table is | |||
| 498 | // initialized with null pointers. | |||
| 499 | template <class ELFT> void ObjFile<ELFT>::initializeJustSymbols() { | |||
| 500 | ArrayRef<Elf_Shdr> sections = CHECK(this->getObj().sections(), this)check2((this->getObj().sections()), [&] { return toString (this); }); | |||
| 501 | this->sections.resize(sections.size()); | |||
| 502 | } | |||
| 503 | ||||
| 504 | // An ELF object file may contain a `.deplibs` section. If it exists, the | |||
| 505 | // section contains a list of library specifiers such as `m` for libm. This | |||
| 506 | // function resolves a given name by finding the first matching library checking | |||
| 507 | // the various ways that a library can be specified to LLD. This ELF extension | |||
| 508 | // is a form of autolinking and is called `dependent libraries`. It is currently | |||
| 509 | // unique to LLVM and lld. | |||
| 510 | static void addDependentLibrary(StringRef specifier, const InputFile *f) { | |||
| 511 | if (!config->dependentLibraries) | |||
| 512 | return; | |||
| 513 | if (fs::exists(specifier)) | |||
| 514 | driver->addFile(specifier, /*withLOption=*/false); | |||
| 515 | else if (Optional<std::string> s = findFromSearchPaths(specifier)) | |||
| 516 | driver->addFile(*s, /*withLOption=*/true); | |||
| 517 | else if (Optional<std::string> s = searchLibraryBaseName(specifier)) | |||
| 518 | driver->addFile(*s, /*withLOption=*/true); | |||
| 519 | else | |||
| 520 | error(toString(f) + | |||
| 521 | ": unable to find library from dependent library specifier: " + | |||
| 522 | specifier); | |||
| 523 | } | |||
| 524 | ||||
| 525 | // Record the membership of a section group so that in the garbage collection | |||
| 526 | // pass, section group members are kept or discarded as a unit. | |||
| 527 | template <class ELFT> | |||
| 528 | static void handleSectionGroup(ArrayRef<InputSectionBase *> sections, | |||
| 529 | ArrayRef<typename ELFT::Word> entries) { | |||
| 530 | bool hasAlloc = false; | |||
| 531 | for (uint32_t index : entries.slice(1)) { | |||
| 532 | if (index >= sections.size()) | |||
| 533 | return; | |||
| 534 | if (InputSectionBase *s = sections[index]) | |||
| 535 | if (s != &InputSection::discarded && s->flags & SHF_ALLOC) | |||
| 536 | hasAlloc = true; | |||
| 537 | } | |||
| 538 | ||||
| 539 | // If any member has the SHF_ALLOC flag, the whole group is subject to garbage | |||
| 540 | // collection. See the comment in markLive(). This rule retains .debug_types | |||
| 541 | // and .rela.debug_types. | |||
| 542 | if (!hasAlloc) | |||
| 543 | return; | |||
| 544 | ||||
| 545 | // Connect the members in a circular doubly-linked list via | |||
| 546 | // nextInSectionGroup. | |||
| 547 | InputSectionBase *head; | |||
| 548 | InputSectionBase *prev = nullptr; | |||
| 549 | for (uint32_t index : entries.slice(1)) { | |||
| 550 | InputSectionBase *s = sections[index]; | |||
| 551 | if (!s || s == &InputSection::discarded) | |||
| 552 | continue; | |||
| 553 | if (prev) | |||
| 554 | prev->nextInSectionGroup = s; | |||
| 555 | else | |||
| 556 | head = s; | |||
| 557 | prev = s; | |||
| 558 | } | |||
| 559 | if (prev) | |||
| 560 | prev->nextInSectionGroup = head; | |||
| 561 | } | |||
| 562 | ||||
| 563 | template <class ELFT> | |||
| 564 | void ObjFile<ELFT>::initializeSections(bool ignoreComdats) { | |||
| 565 | const ELFFile<ELFT> &obj = this->getObj(); | |||
| 566 | ||||
| 567 | ArrayRef<Elf_Shdr> objSections = CHECK(obj.sections(), this)check2((obj.sections()), [&] { return toString(this); }); | |||
| 568 | uint64_t size = objSections.size(); | |||
| 569 | this->sections.resize(size); | |||
| 570 | this->sectionStringTable = | |||
| 571 | CHECK(obj.getSectionStringTable(objSections), this)check2((obj.getSectionStringTable(objSections)), [&] { return toString(this); }); | |||
| 572 | ||||
| 573 | std::vector<ArrayRef<Elf_Word>> selectedGroups; | |||
| 574 | ||||
| 575 | for (size_t i = 0, e = objSections.size(); i < e; ++i) { | |||
| 576 | if (this->sections[i] == &InputSection::discarded) | |||
| 577 | continue; | |||
| 578 | const Elf_Shdr &sec = objSections[i]; | |||
| 579 | ||||
| 580 | if (sec.sh_type == ELF::SHT_LLVM_CALL_GRAPH_PROFILE) | |||
| 581 | cgProfileSectionIndex = i; | |||
| 582 | ||||
| 583 | // SHF_EXCLUDE'ed sections are discarded by the linker. However, | |||
| 584 | // if -r is given, we'll let the final link discard such sections. | |||
| 585 | // This is compatible with GNU. | |||
| 586 | if ((sec.sh_flags & SHF_EXCLUDE) && !config->relocatable) { | |||
| 587 | if (sec.sh_type == SHT_LLVM_ADDRSIG) { | |||
| 588 | // We ignore the address-significance table if we know that the object | |||
| 589 | // file was created by objcopy or ld -r. This is because these tools | |||
| 590 | // will reorder the symbols in the symbol table, invalidating the data | |||
| 591 | // in the address-significance table, which refers to symbols by index. | |||
| 592 | if (sec.sh_link != 0) | |||
| 593 | this->addrsigSec = &sec; | |||
| 594 | else if (config->icf == ICFLevel::Safe) | |||
| 595 | warn(toString(this) + | |||
| 596 | ": --icf=safe conservatively ignores " | |||
| 597 | "SHT_LLVM_ADDRSIG [index " + | |||
| 598 | Twine(i) + | |||
| 599 | "] with sh_link=0 " | |||
| 600 | "(likely created using objcopy or ld -r)"); | |||
| 601 | } | |||
| 602 | this->sections[i] = &InputSection::discarded; | |||
| 603 | continue; | |||
| 604 | } | |||
| 605 | ||||
| 606 | switch (sec.sh_type) { | |||
| 607 | case SHT_GROUP: { | |||
| 608 | // De-duplicate section groups by their signatures. | |||
| 609 | StringRef signature = getShtGroupSignature(objSections, sec); | |||
| 610 | this->sections[i] = &InputSection::discarded; | |||
| 611 | ||||
| 612 | ArrayRef<Elf_Word> entries = | |||
| 613 | CHECK(obj.template getSectionContentsAsArray<Elf_Word>(sec), this)check2((obj.template getSectionContentsAsArray<Elf_Word> (sec)), [&] { return toString(this); }); | |||
| 614 | if (entries.empty()) | |||
| 615 | fatal(toString(this) + ": empty SHT_GROUP"); | |||
| 616 | ||||
| 617 | Elf_Word flag = entries[0]; | |||
| 618 | if (flag && flag != GRP_COMDAT) | |||
| 619 | fatal(toString(this) + ": unsupported SHT_GROUP format"); | |||
| 620 | ||||
| 621 | bool keepGroup = | |||
| 622 | (flag & GRP_COMDAT) == 0 || ignoreComdats || | |||
| 623 | symtab->comdatGroups.try_emplace(CachedHashStringRef(signature), this) | |||
| 624 | .second; | |||
| 625 | if (keepGroup) { | |||
| 626 | if (config->relocatable) | |||
| 627 | this->sections[i] = createInputSection(sec); | |||
| 628 | selectedGroups.push_back(entries); | |||
| 629 | continue; | |||
| 630 | } | |||
| 631 | ||||
| 632 | // Otherwise, discard group members. | |||
| 633 | for (uint32_t secIndex : entries.slice(1)) { | |||
| 634 | if (secIndex >= size) | |||
| 635 | fatal(toString(this) + | |||
| 636 | ": invalid section index in group: " + Twine(secIndex)); | |||
| 637 | this->sections[secIndex] = &InputSection::discarded; | |||
| 638 | } | |||
| 639 | break; | |||
| 640 | } | |||
| 641 | case SHT_SYMTAB_SHNDX: | |||
| 642 | shndxTable = CHECK(obj.getSHNDXTable(sec, objSections), this)check2((obj.getSHNDXTable(sec, objSections)), [&] { return toString(this); }); | |||
| 643 | break; | |||
| 644 | case SHT_SYMTAB: | |||
| 645 | case SHT_STRTAB: | |||
| 646 | case SHT_REL: | |||
| 647 | case SHT_RELA: | |||
| 648 | case SHT_NULL: | |||
| 649 | break; | |||
| 650 | default: | |||
| 651 | this->sections[i] = createInputSection(sec); | |||
| 652 | } | |||
| 653 | } | |||
| 654 | ||||
| 655 | // We have a second loop. It is used to: | |||
| 656 | // 1) handle SHF_LINK_ORDER sections. | |||
| 657 | // 2) create SHT_REL[A] sections. In some cases the section header index of a | |||
| 658 | // relocation section may be smaller than that of the relocated section. In | |||
| 659 | // such cases, the relocation section would attempt to reference a target | |||
| 660 | // section that has not yet been created. For simplicity, delay creation of | |||
| 661 | // relocation sections until now. | |||
| 662 | for (size_t i = 0, e = objSections.size(); i < e; ++i) { | |||
| 663 | if (this->sections[i] == &InputSection::discarded) | |||
| 664 | continue; | |||
| 665 | const Elf_Shdr &sec = objSections[i]; | |||
| 666 | ||||
| 667 | if (sec.sh_type == SHT_REL || sec.sh_type == SHT_RELA) | |||
| 668 | this->sections[i] = createInputSection(sec); | |||
| 669 | ||||
| 670 | // A SHF_LINK_ORDER section with sh_link=0 is handled as if it did not have | |||
| 671 | // the flag. | |||
| 672 | if (!(sec.sh_flags & SHF_LINK_ORDER) || !sec.sh_link) | |||
| 673 | continue; | |||
| 674 | ||||
| 675 | InputSectionBase *linkSec = nullptr; | |||
| 676 | if (sec.sh_link < this->sections.size()) | |||
| 677 | linkSec = this->sections[sec.sh_link]; | |||
| 678 | if (!linkSec) | |||
| 679 | fatal(toString(this) + ": invalid sh_link index: " + Twine(sec.sh_link)); | |||
| 680 | ||||
| 681 | // A SHF_LINK_ORDER section is discarded if its linked-to section is | |||
| 682 | // discarded. | |||
| 683 | InputSection *isec = cast<InputSection>(this->sections[i]); | |||
| 684 | linkSec->dependentSections.push_back(isec); | |||
| 685 | if (!isa<InputSection>(linkSec)) | |||
| 686 | error("a section " + isec->name + | |||
| 687 | " with SHF_LINK_ORDER should not refer a non-regular section: " + | |||
| 688 | toString(linkSec)); | |||
| 689 | } | |||
| 690 | ||||
| 691 | for (ArrayRef<Elf_Word> entries : selectedGroups) | |||
| 692 | handleSectionGroup<ELFT>(this->sections, entries); | |||
| 693 | } | |||
| 694 | ||||
| 695 | // For ARM only, to set the EF_ARM_ABI_FLOAT_SOFT or EF_ARM_ABI_FLOAT_HARD | |||
| 696 | // flag in the ELF Header we need to look at Tag_ABI_VFP_args to find out how | |||
| 697 | // the input objects have been compiled. | |||
| 698 | static void updateARMVFPArgs(const ARMAttributeParser &attributes, | |||
| 699 | const InputFile *f) { | |||
| 700 | Optional<unsigned> attr = | |||
| 701 | attributes.getAttributeValue(ARMBuildAttrs::ABI_VFP_args); | |||
| 702 | if (!attr.hasValue()) | |||
| 703 | // If an ABI tag isn't present then it is implicitly given the value of 0 | |||
| 704 | // which maps to ARMBuildAttrs::BaseAAPCS. However many assembler files, | |||
| 705 | // including some in glibc that don't use FP args (and should have value 3) | |||
| 706 | // don't have the attribute so we do not consider an implicit value of 0 | |||
| 707 | // as a clash. | |||
| 708 | return; | |||
| 709 | ||||
| 710 | unsigned vfpArgs = attr.getValue(); | |||
| 711 | ARMVFPArgKind arg; | |||
| 712 | switch (vfpArgs) { | |||
| 713 | case ARMBuildAttrs::BaseAAPCS: | |||
| 714 | arg = ARMVFPArgKind::Base; | |||
| 715 | break; | |||
| 716 | case ARMBuildAttrs::HardFPAAPCS: | |||
| 717 | arg = ARMVFPArgKind::VFP; | |||
| 718 | break; | |||
| 719 | case ARMBuildAttrs::ToolChainFPPCS: | |||
| 720 | // Tool chain specific convention that conforms to neither AAPCS variant. | |||
| 721 | arg = ARMVFPArgKind::ToolChain; | |||
| 722 | break; | |||
| 723 | case ARMBuildAttrs::CompatibleFPAAPCS: | |||
| 724 | // Object compatible with all conventions. | |||
| 725 | return; | |||
| 726 | default: | |||
| 727 | error(toString(f) + ": unknown Tag_ABI_VFP_args value: " + Twine(vfpArgs)); | |||
| 728 | return; | |||
| 729 | } | |||
| 730 | // Follow ld.bfd and error if there is a mix of calling conventions. | |||
| 731 | if (config->armVFPArgs != arg && config->armVFPArgs != ARMVFPArgKind::Default) | |||
| 732 | error(toString(f) + ": incompatible Tag_ABI_VFP_args"); | |||
| 733 | else | |||
| 734 | config->armVFPArgs = arg; | |||
| 735 | } | |||
| 736 | ||||
| 737 | // The ARM support in lld makes some use of instructions that are not available | |||
| 738 | // on all ARM architectures. Namely: | |||
| 739 | // - Use of BLX instruction for interworking between ARM and Thumb state. | |||
| 740 | // - Use of the extended Thumb branch encoding in relocation. | |||
| 741 | // - Use of the MOVT/MOVW instructions in Thumb Thunks. | |||
| 742 | // The ARM Attributes section contains information about the architecture chosen | |||
| 743 | // at compile time. We follow the convention that if at least one input object | |||
| 744 | // is compiled with an architecture that supports these features then lld is | |||
| 745 | // permitted to use them. | |||
| 746 | static void updateSupportedARMFeatures(const ARMAttributeParser &attributes) { | |||
| 747 | Optional<unsigned> attr = | |||
| 748 | attributes.getAttributeValue(ARMBuildAttrs::CPU_arch); | |||
| 749 | if (!attr.hasValue()) | |||
| 750 | return; | |||
| 751 | auto arch = attr.getValue(); | |||
| 752 | switch (arch) { | |||
| 753 | case ARMBuildAttrs::Pre_v4: | |||
| 754 | case ARMBuildAttrs::v4: | |||
| 755 | case ARMBuildAttrs::v4T: | |||
| 756 | // Architectures prior to v5 do not support BLX instruction | |||
| 757 | break; | |||
| 758 | case ARMBuildAttrs::v5T: | |||
| 759 | case ARMBuildAttrs::v5TE: | |||
| 760 | case ARMBuildAttrs::v5TEJ: | |||
| 761 | case ARMBuildAttrs::v6: | |||
| 762 | case ARMBuildAttrs::v6KZ: | |||
| 763 | case ARMBuildAttrs::v6K: | |||
| 764 | config->armHasBlx = true; | |||
| 765 | // Architectures used in pre-Cortex processors do not support | |||
| 766 | // The J1 = 1 J2 = 1 Thumb branch range extension, with the exception | |||
| 767 | // of Architecture v6T2 (arm1156t2-s and arm1156t2f-s) that do. | |||
| 768 | break; | |||
| 769 | default: | |||
| 770 | // All other Architectures have BLX and extended branch encoding | |||
| 771 | config->armHasBlx = true; | |||
| 772 | config->armJ1J2BranchEncoding = true; | |||
| 773 | if (arch != ARMBuildAttrs::v6_M && arch != ARMBuildAttrs::v6S_M) | |||
| 774 | // All Architectures used in Cortex processors with the exception | |||
| 775 | // of v6-M and v6S-M have the MOVT and MOVW instructions. | |||
| 776 | config->armHasMovtMovw = true; | |||
| 777 | break; | |||
| 778 | } | |||
| 779 | } | |||
| 780 | ||||
| 781 | // If a source file is compiled with x86 hardware-assisted call flow control | |||
| 782 | // enabled, the generated object file contains feature flags indicating that | |||
| 783 | // fact. This function reads the feature flags and returns it. | |||
| 784 | // | |||
| 785 | // Essentially we want to read a single 32-bit value in this function, but this | |||
| 786 | // function is rather complicated because the value is buried deep inside a | |||
| 787 | // .note.gnu.property section. | |||
| 788 | // | |||
| 789 | // The section consists of one or more NOTE records. Each NOTE record consists | |||
| 790 | // of zero or more type-length-value fields. We want to find a field of a | |||
| 791 | // certain type. It seems a bit too much to just store a 32-bit value, perhaps | |||
| 792 | // the ABI is unnecessarily complicated. | |||
| 793 | template <class ELFT> static uint32_t readAndFeatures(const InputSection &sec) { | |||
| 794 | using Elf_Nhdr = typename ELFT::Nhdr; | |||
| 795 | using Elf_Note = typename ELFT::Note; | |||
| 796 | ||||
| 797 | uint32_t featuresSet = 0; | |||
| 798 | ArrayRef<uint8_t> data = sec.data(); | |||
| 799 | auto reportFatal = [&](const uint8_t *place, const char *msg) { | |||
| 800 | fatal(toString(sec.file) + ":(" + sec.name + "+0x" + | |||
| 801 | Twine::utohexstr(place - sec.data().data()) + "): " + msg); | |||
| 802 | }; | |||
| 803 | while (!data.empty()) { | |||
| 804 | // Read one NOTE record. | |||
| 805 | auto *nhdr = reinterpret_cast<const Elf_Nhdr *>(data.data()); | |||
| 806 | if (data.size() < sizeof(Elf_Nhdr) || data.size() < nhdr->getSize()) | |||
| 807 | reportFatal(data.data(), "data is too short"); | |||
| 808 | ||||
| 809 | Elf_Note note(*nhdr); | |||
| 810 | if (nhdr->n_type != NT_GNU_PROPERTY_TYPE_0 || note.getName() != "GNU") { | |||
| 811 | data = data.slice(nhdr->getSize()); | |||
| 812 | continue; | |||
| 813 | } | |||
| 814 | ||||
| 815 | uint32_t featureAndType = config->emachine == EM_AARCH64 | |||
| 816 | ? GNU_PROPERTY_AARCH64_FEATURE_1_AND | |||
| 817 | : GNU_PROPERTY_X86_FEATURE_1_AND; | |||
| 818 | ||||
| 819 | // Read a body of a NOTE record, which consists of type-length-value fields. | |||
| 820 | ArrayRef<uint8_t> desc = note.getDesc(); | |||
| 821 | while (!desc.empty()) { | |||
| 822 | const uint8_t *place = desc.data(); | |||
| 823 | if (desc.size() < 8) | |||
| 824 | reportFatal(place, "program property is too short"); | |||
| 825 | uint32_t type = read32<ELFT::TargetEndianness>(desc.data()); | |||
| 826 | uint32_t size = read32<ELFT::TargetEndianness>(desc.data() + 4); | |||
| 827 | desc = desc.slice(8); | |||
| 828 | if (desc.size() < size) | |||
| 829 | reportFatal(place, "program property is too short"); | |||
| 830 | ||||
| 831 | if (type == featureAndType) { | |||
| 832 | // We found a FEATURE_1_AND field. There may be more than one of these | |||
| 833 | // in a .note.gnu.property section, for a relocatable object we | |||
| 834 | // accumulate the bits set. | |||
| 835 | if (size < 4) | |||
| 836 | reportFatal(place, "FEATURE_1_AND entry is too short"); | |||
| 837 | featuresSet |= read32<ELFT::TargetEndianness>(desc.data()); | |||
| 838 | } | |||
| 839 | ||||
| 840 | // Padding is present in the note descriptor, if necessary. | |||
| 841 | desc = desc.slice(alignTo<(ELFT::Is64Bits ? 8 : 4)>(size)); | |||
| 842 | } | |||
| 843 | ||||
| 844 | // Go to next NOTE record to look for more FEATURE_1_AND descriptions. | |||
| 845 | data = data.slice(nhdr->getSize()); | |||
| 846 | } | |||
| 847 | ||||
| 848 | return featuresSet; | |||
| 849 | } | |||
| 850 | ||||
| 851 | template <class ELFT> | |||
| 852 | InputSectionBase *ObjFile<ELFT>::getRelocTarget(const Elf_Shdr &sec) { | |||
| 853 | uint32_t idx = sec.sh_info; | |||
| 854 | if (idx >= this->sections.size()) | |||
| 855 | fatal(toString(this) + ": invalid relocated section index: " + Twine(idx)); | |||
| 856 | InputSectionBase *target = this->sections[idx]; | |||
| 857 | ||||
| 858 | // Strictly speaking, a relocation section must be included in the | |||
| 859 | // group of the section it relocates. However, LLVM 3.3 and earlier | |||
| 860 | // would fail to do so, so we gracefully handle that case. | |||
| 861 | if (target == &InputSection::discarded) | |||
| 862 | return nullptr; | |||
| 863 | ||||
| 864 | if (!target) | |||
| 865 | fatal(toString(this) + ": unsupported relocation reference"); | |||
| 866 | return target; | |||
| 867 | } | |||
| 868 | ||||
| 869 | // Create a regular InputSection class that has the same contents | |||
| 870 | // as a given section. | |||
| 871 | static InputSection *toRegularSection(MergeInputSection *sec) { | |||
| 872 | return make<InputSection>(sec->file, sec->flags, sec->type, sec->alignment, | |||
| 873 | sec->data(), sec->name); | |||
| 874 | } | |||
| 875 | ||||
| 876 | template <class ELFT> | |||
| 877 | InputSectionBase *ObjFile<ELFT>::createInputSection(const Elf_Shdr &sec) { | |||
| 878 | StringRef name = getSectionName(sec); | |||
| 879 | ||||
| 880 | if (config->emachine == EM_ARM && sec.sh_type == SHT_ARM_ATTRIBUTES) { | |||
| 881 | ARMAttributeParser attributes; | |||
| 882 | ArrayRef<uint8_t> contents = check(this->getObj().getSectionContents(sec)); | |||
| 883 | if (Error e = attributes.parse(contents, config->ekind == ELF32LEKind | |||
| 884 | ? support::little | |||
| 885 | : support::big)) { | |||
| 886 | auto *isec = make<InputSection>(*this, sec, name); | |||
| 887 | warn(toString(isec) + ": " + llvm::toString(std::move(e))); | |||
| 888 | } else { | |||
| 889 | updateSupportedARMFeatures(attributes); | |||
| 890 | updateARMVFPArgs(attributes, this); | |||
| 891 | ||||
| 892 | // FIXME: Retain the first attribute section we see. The eglibc ARM | |||
| 893 | // dynamic loaders require the presence of an attribute section for dlopen | |||
| 894 | // to work. In a full implementation we would merge all attribute | |||
| 895 | // sections. | |||
| 896 | if (in.attributes == nullptr) { | |||
| 897 | in.attributes = make<InputSection>(*this, sec, name); | |||
| 898 | return in.attributes; | |||
| 899 | } | |||
| 900 | return &InputSection::discarded; | |||
| 901 | } | |||
| 902 | } | |||
| 903 | ||||
| 904 | if (config->emachine == EM_RISCV && sec.sh_type == SHT_RISCV_ATTRIBUTES) { | |||
| 905 | RISCVAttributeParser attributes; | |||
| 906 | ArrayRef<uint8_t> contents = check(this->getObj().getSectionContents(sec)); | |||
| 907 | if (Error e = attributes.parse(contents, support::little)) { | |||
| 908 | auto *isec = make<InputSection>(*this, sec, name); | |||
| 909 | warn(toString(isec) + ": " + llvm::toString(std::move(e))); | |||
| 910 | } else { | |||
| 911 | // FIXME: Validate arch tag contains C if and only if EF_RISCV_RVC is | |||
| 912 | // present. | |||
| 913 | ||||
| 914 | // FIXME: Retain the first attribute section we see. Tools such as | |||
| 915 | // llvm-objdump make use of the attribute section to determine which | |||
| 916 | // standard extensions to enable. In a full implementation we would merge | |||
| 917 | // all attribute sections. | |||
| 918 | if (in.attributes == nullptr) { | |||
| 919 | in.attributes = make<InputSection>(*this, sec, name); | |||
| 920 | return in.attributes; | |||
| 921 | } | |||
| 922 | return &InputSection::discarded; | |||
| 923 | } | |||
| 924 | } | |||
| 925 | ||||
| 926 | switch (sec.sh_type) { | |||
| 927 | case SHT_LLVM_DEPENDENT_LIBRARIES: { | |||
| 928 | if (config->relocatable) | |||
| 929 | break; | |||
| 930 | ArrayRef<char> data = | |||
| 931 | CHECK(this->getObj().template getSectionContentsAsArray<char>(sec), this)check2((this->getObj().template getSectionContentsAsArray< char>(sec)), [&] { return toString(this); }); | |||
| 932 | if (!data.empty() && data.back() != '\0') { | |||
| 933 | error(toString(this) + | |||
| 934 | ": corrupted dependent libraries section (unterminated string): " + | |||
| 935 | name); | |||
| 936 | return &InputSection::discarded; | |||
| 937 | } | |||
| 938 | for (const char *d = data.begin(), *e = data.end(); d < e;) { | |||
| 939 | StringRef s(d); | |||
| 940 | addDependentLibrary(s, this); | |||
| 941 | d += s.size() + 1; | |||
| 942 | } | |||
| 943 | return &InputSection::discarded; | |||
| 944 | } | |||
| 945 | case SHT_RELA: | |||
| 946 | case SHT_REL: { | |||
| 947 | // Find a relocation target section and associate this section with that. | |||
| 948 | // Target may have been discarded if it is in a different section group | |||
| 949 | // and the group is discarded, even though it's a violation of the | |||
| 950 | // spec. We handle that situation gracefully by discarding dangling | |||
| 951 | // relocation sections. | |||
| 952 | InputSectionBase *target = getRelocTarget(sec); | |||
| 953 | if (!target) | |||
| 954 | return nullptr; | |||
| 955 | ||||
| 956 | // ELF spec allows mergeable sections with relocations, but they are | |||
| 957 | // rare, and it is in practice hard to merge such sections by contents, | |||
| 958 | // because applying relocations at end of linking changes section | |||
| 959 | // contents. So, we simply handle such sections as non-mergeable ones. | |||
| 960 | // Degrading like this is acceptable because section merging is optional. | |||
| 961 | if (auto *ms = dyn_cast<MergeInputSection>(target)) { | |||
| 962 | target = toRegularSection(ms); | |||
| 963 | this->sections[sec.sh_info] = target; | |||
| 964 | } | |||
| 965 | ||||
| 966 | if (target->firstRelocation) | |||
| 967 | fatal(toString(this) + | |||
| 968 | ": multiple relocation sections to one section are not supported"); | |||
| 969 | ||||
| 970 | if (sec.sh_type == SHT_RELA) { | |||
| 971 | ArrayRef<Elf_Rela> rels = CHECK(getObj().relas(sec), this)check2((getObj().relas(sec)), [&] { return toString(this) ; }); | |||
| 972 | target->firstRelocation = rels.begin(); | |||
| 973 | target->numRelocations = rels.size(); | |||
| 974 | target->areRelocsRela = true; | |||
| 975 | } else { | |||
| 976 | ArrayRef<Elf_Rel> rels = CHECK(getObj().rels(sec), this)check2((getObj().rels(sec)), [&] { return toString(this); }); | |||
| 977 | target->firstRelocation = rels.begin(); | |||
| 978 | target->numRelocations = rels.size(); | |||
| 979 | target->areRelocsRela = false; | |||
| 980 | } | |||
| 981 | assert(isUInt<31>(target->numRelocations))((void)0); | |||
| 982 | ||||
| 983 | // Relocation sections are usually removed from the output, so return | |||
| 984 | // `nullptr` for the normal case. However, if -r or --emit-relocs is | |||
| 985 | // specified, we need to copy them to the output. (Some post link analysis | |||
| 986 | // tools specify --emit-relocs to obtain the information.) | |||
| 987 | if (!config->relocatable && !config->emitRelocs) | |||
| 988 | return nullptr; | |||
| 989 | InputSection *relocSec = make<InputSection>(*this, sec, name); | |||
| 990 | // If the relocated section is discarded (due to /DISCARD/ or | |||
| 991 | // --gc-sections), the relocation section should be discarded as well. | |||
| 992 | target->dependentSections.push_back(relocSec); | |||
| 993 | return relocSec; | |||
| 994 | } | |||
| 995 | } | |||
| 996 | ||||
| 997 | // The GNU linker uses .note.GNU-stack section as a marker indicating | |||
| 998 | // that the code in the object file does not expect that the stack is | |||
| 999 | // executable (in terms of NX bit). If all input files have the marker, | |||
| 1000 | // the GNU linker adds a PT_GNU_STACK segment to tells the loader to | |||
| 1001 | // make the stack non-executable. Most object files have this section as | |||
| 1002 | // of 2017. | |||
| 1003 | // | |||
| 1004 | // But making the stack non-executable is a norm today for security | |||
| 1005 | // reasons. Failure to do so may result in a serious security issue. | |||
| 1006 | // Therefore, we make LLD always add PT_GNU_STACK unless it is | |||
| 1007 | // explicitly told to do otherwise (by -z execstack). Because the stack | |||
| 1008 | // executable-ness is controlled solely by command line options, | |||
| 1009 | // .note.GNU-stack sections are simply ignored. | |||
| 1010 | if (name == ".note.GNU-stack") | |||
| 1011 | return &InputSection::discarded; | |||
| 1012 | ||||
| 1013 | // Object files that use processor features such as Intel Control-Flow | |||
| 1014 | // Enforcement (CET) or AArch64 Branch Target Identification BTI, use a | |||
| 1015 | // .note.gnu.property section containing a bitfield of feature bits like the | |||
| 1016 | // GNU_PROPERTY_X86_FEATURE_1_IBT flag. Read a bitmap containing the flag. | |||
| 1017 | // | |||
| 1018 | // Since we merge bitmaps from multiple object files to create a new | |||
| 1019 | // .note.gnu.property containing a single AND'ed bitmap, we discard an input | |||
| 1020 | // file's .note.gnu.property section. | |||
| 1021 | if (name == ".note.gnu.property") { | |||
| 1022 | this->andFeatures = readAndFeatures<ELFT>(InputSection(*this, sec, name)); | |||
| 1023 | return &InputSection::discarded; | |||
| 1024 | } | |||
| 1025 | ||||
| 1026 | // Split stacks is a feature to support a discontiguous stack, | |||
| 1027 | // commonly used in the programming language Go. For the details, | |||
| 1028 | // see https://gcc.gnu.org/wiki/SplitStacks. An object file compiled | |||
| 1029 | // for split stack will include a .note.GNU-split-stack section. | |||
| 1030 | if (name == ".note.GNU-split-stack") { | |||
| 1031 | if (config->relocatable) { | |||
| 1032 | error("cannot mix split-stack and non-split-stack in a relocatable link"); | |||
| 1033 | return &InputSection::discarded; | |||
| 1034 | } | |||
| 1035 | this->splitStack = true; | |||
| 1036 | return &InputSection::discarded; | |||
| 1037 | } | |||
| 1038 | ||||
| 1039 | // An object file cmpiled for split stack, but where some of the | |||
| 1040 | // functions were compiled with the no_split_stack_attribute will | |||
| 1041 | // include a .note.GNU-no-split-stack section. | |||
| 1042 | if (name == ".note.GNU-no-split-stack") { | |||
| 1043 | this->someNoSplitStack = true; | |||
| 1044 | return &InputSection::discarded; | |||
| 1045 | } | |||
| 1046 | ||||
| 1047 | // The linkonce feature is a sort of proto-comdat. Some glibc i386 object | |||
| 1048 | // files contain definitions of symbol "__x86.get_pc_thunk.bx" in linkonce | |||
| 1049 | // sections. Drop those sections to avoid duplicate symbol errors. | |||
| 1050 | // FIXME: This is glibc PR20543, we should remove this hack once that has been | |||
| 1051 | // fixed for a while. | |||
| 1052 | if (name == ".gnu.linkonce.t.__x86.get_pc_thunk.bx" || | |||
| 1053 | name == ".gnu.linkonce.t.__i686.get_pc_thunk.bx") | |||
| 1054 | return &InputSection::discarded; | |||
| 1055 | ||||
| 1056 | // If we are creating a new .build-id section, strip existing .build-id | |||
| 1057 | // sections so that the output won't have more than one .build-id. | |||
| 1058 | // This is not usually a problem because input object files normally don't | |||
| 1059 | // have .build-id sections, but you can create such files by | |||
| 1060 | // "ld.{bfd,gold,lld} -r --build-id", and we want to guard against it. | |||
| 1061 | if (name == ".note.gnu.build-id" && config->buildId != BuildIdKind::None) | |||
| 1062 | return &InputSection::discarded; | |||
| 1063 | ||||
| 1064 | // The linker merges EH (exception handling) frames and creates a | |||
| 1065 | // .eh_frame_hdr section for runtime. So we handle them with a special | |||
| 1066 | // class. For relocatable outputs, they are just passed through. | |||
| 1067 | if (name == ".eh_frame" && !config->relocatable) | |||
| 1068 | return make<EhInputSection>(*this, sec, name); | |||
| 1069 | ||||
| 1070 | if (shouldMerge(sec, name)) | |||
| 1071 | return make<MergeInputSection>(*this, sec, name); | |||
| 1072 | return make<InputSection>(*this, sec, name); | |||
| 1073 | } | |||
| 1074 | ||||
| 1075 | template <class ELFT> | |||
| 1076 | StringRef ObjFile<ELFT>::getSectionName(const Elf_Shdr &sec) { | |||
| 1077 | return CHECK(getObj().getSectionName(sec, sectionStringTable), this)check2((getObj().getSectionName(sec, sectionStringTable)), [& ] { return toString(this); }); | |||
| 1078 | } | |||
| 1079 | ||||
| 1080 | // Initialize this->Symbols. this->Symbols is a parallel array as | |||
| 1081 | // its corresponding ELF symbol table. | |||
| 1082 | template <class ELFT> void ObjFile<ELFT>::initializeSymbols() { | |||
| 1083 | ArrayRef<Elf_Sym> eSyms = this->getELFSyms<ELFT>(); | |||
| 1084 | this->symbols.resize(eSyms.size()); | |||
| 1085 | ||||
| 1086 | // Fill in InputFile::symbols. Some entries have been initialized | |||
| 1087 | // because of LazyObjFile. | |||
| 1088 | for (size_t i = 0, end = eSyms.size(); i != end; ++i) { | |||
| 1089 | if (this->symbols[i]) | |||
| 1090 | continue; | |||
| 1091 | const Elf_Sym &eSym = eSyms[i]; | |||
| 1092 | uint32_t secIdx = getSectionIndex(eSym); | |||
| 1093 | if (secIdx >= this->sections.size()) | |||
| 1094 | fatal(toString(this) + ": invalid section index: " + Twine(secIdx)); | |||
| 1095 | if (eSym.getBinding() != STB_LOCAL) { | |||
| 1096 | if (i < firstGlobal) | |||
| 1097 | error(toString(this) + ": non-local symbol (" + Twine(i) + | |||
| 1098 | ") found at index < .symtab's sh_info (" + Twine(firstGlobal) + | |||
| 1099 | ")"); | |||
| 1100 | this->symbols[i] = | |||
| 1101 | symtab->insert(CHECK(eSyms[i].getName(this->stringTable), this)check2((eSyms[i].getName(this->stringTable)), [&] { return toString(this); })); | |||
| 1102 | continue; | |||
| 1103 | } | |||
| 1104 | ||||
| 1105 | // Handle local symbols. Local symbols are not added to the symbol | |||
| 1106 | // table because they are not visible from other object files. We | |||
| 1107 | // allocate symbol instances and add their pointers to symbols. | |||
| 1108 | if (i >= firstGlobal) | |||
| 1109 | errorOrWarn(toString(this) + ": STB_LOCAL symbol (" + Twine(i) + | |||
| 1110 | ") found at index >= .symtab's sh_info (" + | |||
| 1111 | Twine(firstGlobal) + ")"); | |||
| 1112 | ||||
| 1113 | InputSectionBase *sec = this->sections[secIdx]; | |||
| 1114 | uint8_t type = eSym.getType(); | |||
| 1115 | if (type == STT_FILE) | |||
| 1116 | sourceFile = CHECK(eSym.getName(this->stringTable), this)check2((eSym.getName(this->stringTable)), [&] { return toString(this); }); | |||
| 1117 | if (this->stringTable.size() <= eSym.st_name) | |||
| 1118 | fatal(toString(this) + ": invalid symbol name offset"); | |||
| 1119 | StringRefZ name = this->stringTable.data() + eSym.st_name; | |||
| 1120 | ||||
| 1121 | if (eSym.st_shndx == SHN_UNDEF) | |||
| 1122 | this->symbols[i] = | |||
| 1123 | make<Undefined>(this, name, STB_LOCAL, eSym.st_other, type); | |||
| 1124 | else if (sec == &InputSection::discarded) | |||
| 1125 | this->symbols[i] = | |||
| 1126 | make<Undefined>(this, name, STB_LOCAL, eSym.st_other, type, | |||
| 1127 | /*discardedSecIdx=*/secIdx); | |||
| 1128 | else | |||
| 1129 | this->symbols[i] = make<Defined>(this, name, STB_LOCAL, eSym.st_other, | |||
| 1130 | type, eSym.st_value, eSym.st_size, sec); | |||
| 1131 | } | |||
| 1132 | ||||
| 1133 | // Symbol resolution of non-local symbols. | |||
| 1134 | SmallVector<unsigned, 32> undefineds; | |||
| 1135 | for (size_t i = firstGlobal, end = eSyms.size(); i != end; ++i) { | |||
| 1136 | const Elf_Sym &eSym = eSyms[i]; | |||
| 1137 | uint8_t binding = eSym.getBinding(); | |||
| 1138 | if (binding == STB_LOCAL) | |||
| 1139 | continue; // Errored above. | |||
| 1140 | ||||
| 1141 | uint32_t secIdx = getSectionIndex(eSym); | |||
| 1142 | InputSectionBase *sec = this->sections[secIdx]; | |||
| 1143 | uint8_t stOther = eSym.st_other; | |||
| 1144 | uint8_t type = eSym.getType(); | |||
| 1145 | uint64_t value = eSym.st_value; | |||
| 1146 | uint64_t size = eSym.st_size; | |||
| 1147 | StringRefZ name = this->stringTable.data() + eSym.st_name; | |||
| 1148 | ||||
| 1149 | // Handle global undefined symbols. | |||
| 1150 | if (eSym.st_shndx == SHN_UNDEF) { | |||
| 1151 | undefineds.push_back(i); | |||
| 1152 | continue; | |||
| 1153 | } | |||
| 1154 | ||||
| 1155 | // Handle global common symbols. | |||
| 1156 | if (eSym.st_shndx == SHN_COMMON) { | |||
| 1157 | if (value == 0 || value >= UINT32_MAX0xffffffffU) | |||
| 1158 | fatal(toString(this) + ": common symbol '" + StringRef(name.data) + | |||
| 1159 | "' has invalid alignment: " + Twine(value)); | |||
| 1160 | this->symbols[i]->resolve( | |||
| 1161 | CommonSymbol{this, name, binding, stOther, type, value, size}); | |||
| 1162 | continue; | |||
| 1163 | } | |||
| 1164 | ||||
| 1165 | // If a defined symbol is in a discarded section, handle it as if it | |||
| 1166 | // were an undefined symbol. Such symbol doesn't comply with the | |||
| 1167 | // standard, but in practice, a .eh_frame often directly refer | |||
| 1168 | // COMDAT member sections, and if a comdat group is discarded, some | |||
| 1169 | // defined symbol in a .eh_frame becomes dangling symbols. | |||
| 1170 | if (sec == &InputSection::discarded) { | |||
| 1171 | Undefined und{this, name, binding, stOther, type, secIdx}; | |||
| 1172 | Symbol *sym = this->symbols[i]; | |||
| 1173 | // !ArchiveFile::parsed or LazyObjFile::fetched means that the file | |||
| 1174 | // containing this object has not finished processing, i.e. this symbol is | |||
| 1175 | // a result of a lazy symbol fetch. We should demote the lazy symbol to an | |||
| 1176 | // Undefined so that any relocations outside of the group to it will | |||
| 1177 | // trigger a discarded section error. | |||
| 1178 | if ((sym->symbolKind == Symbol::LazyArchiveKind && | |||
| 1179 | !cast<ArchiveFile>(sym->file)->parsed) || | |||
| 1180 | (sym->symbolKind == Symbol::LazyObjectKind && | |||
| 1181 | cast<LazyObjFile>(sym->file)->fetched)) | |||
| 1182 | sym->replace(und); | |||
| 1183 | else | |||
| 1184 | sym->resolve(und); | |||
| 1185 | continue; | |||
| 1186 | } | |||
| 1187 | ||||
| 1188 | // Handle global defined symbols. | |||
| 1189 | if (binding == STB_GLOBAL || binding == STB_WEAK || | |||
| 1190 | binding == STB_GNU_UNIQUE) { | |||
| 1191 | this->symbols[i]->resolve( | |||
| 1192 | Defined{this, name, binding, stOther, type, value, size, sec}); | |||
| 1193 | continue; | |||
| 1194 | } | |||
| 1195 | ||||
| 1196 | fatal(toString(this) + ": unexpected binding: " + Twine((int)binding)); | |||
| 1197 | } | |||
| 1198 | ||||
| 1199 | // Undefined symbols (excluding those defined relative to non-prevailing | |||
| 1200 | // sections) can trigger recursive fetch. Process defined symbols first so | |||
| 1201 | // that the relative order between a defined symbol and an undefined symbol | |||
| 1202 | // does not change the symbol resolution behavior. In addition, a set of | |||
| 1203 | // interconnected symbols will all be resolved to the same file, instead of | |||
| 1204 | // being resolved to different files. | |||
| 1205 | for (unsigned i : undefineds) { | |||
| 1206 | const Elf_Sym &eSym = eSyms[i]; | |||
| 1207 | StringRefZ name = this->stringTable.data() + eSym.st_name; | |||
| 1208 | this->symbols[i]->resolve(Undefined{this, name, eSym.getBinding(), | |||
| 1209 | eSym.st_other, eSym.getType()}); | |||
| 1210 | this->symbols[i]->referenced = true; | |||
| 1211 | } | |||
| 1212 | } | |||
| 1213 | ||||
| 1214 | ArchiveFile::ArchiveFile(std::unique_ptr<Archive> &&file) | |||
| 1215 | : InputFile(ArchiveKind, file->getMemoryBufferRef()), | |||
| 1216 | file(std::move(file)) {} | |||
| 1217 | ||||
| 1218 | void ArchiveFile::parse() { | |||
| 1219 | for (const Archive::Symbol &sym : file->symbols()) | |||
| 1220 | symtab->addSymbol(LazyArchive{*this, sym}); | |||
| 1221 | ||||
| 1222 | // Inform a future invocation of ObjFile<ELFT>::initializeSymbols() that this | |||
| 1223 | // archive has been processed. | |||
| 1224 | parsed = true; | |||
| 1225 | } | |||
| 1226 | ||||
| 1227 | // Returns a buffer pointing to a member file containing a given symbol. | |||
| 1228 | void ArchiveFile::fetch(const Archive::Symbol &sym) { | |||
| 1229 | Archive::Child c = | |||
| 1230 | CHECK(sym.getMember(), toString(this) +check2((sym.getMember()), [&] { return toString(toString( this) + ": could not get the member for symbol " + toELFString (sym)); }) | |||
| 1231 | ": could not get the member for symbol " +check2((sym.getMember()), [&] { return toString(toString( this) + ": could not get the member for symbol " + toELFString (sym)); }) | |||
| 1232 | toELFString(sym))check2((sym.getMember()), [&] { return toString(toString( this) + ": could not get the member for symbol " + toELFString (sym)); }); | |||
| 1233 | ||||
| 1234 | if (!seen.insert(c.getChildOffset()).second) | |||
| 1235 | return; | |||
| 1236 | ||||
| 1237 | MemoryBufferRef mb = | |||
| 1238 | CHECK(c.getMemoryBufferRef(),check2((c.getMemoryBufferRef()), [&] { return toString(toString (this) + ": could not get the buffer for the member defining symbol " + toELFString(sym)); }) | |||
| 1239 | toString(this) +check2((c.getMemoryBufferRef()), [&] { return toString(toString (this) + ": could not get the buffer for the member defining symbol " + toELFString(sym)); }) | |||
| 1240 | ": could not get the buffer for the member defining symbol " +check2((c.getMemoryBufferRef()), [&] { return toString(toString (this) + ": could not get the buffer for the member defining symbol " + toELFString(sym)); }) | |||
| 1241 | toELFString(sym))check2((c.getMemoryBufferRef()), [&] { return toString(toString (this) + ": could not get the buffer for the member defining symbol " + toELFString(sym)); }); | |||
| 1242 | ||||
| 1243 | if (tar && c.getParent()->isThin()) | |||
| 1244 | tar->append(relativeToRoot(CHECK(c.getFullName(), this)check2((c.getFullName()), [&] { return toString(this); })), mb.getBuffer()); | |||
| 1245 | ||||
| 1246 | InputFile *file = createObjectFile(mb, getName(), c.getChildOffset()); | |||
| 1247 | file->groupId = groupId; | |||
| 1248 | parseFile(file); | |||
| 1249 | } | |||
| 1250 | ||||
| 1251 | // The handling of tentative definitions (COMMON symbols) in archives is murky. | |||
| 1252 | // A tentative definition will be promoted to a global definition if there are | |||
| 1253 | // no non-tentative definitions to dominate it. When we hold a tentative | |||
| 1254 | // definition to a symbol and are inspecting archive members for inclusion | |||
| 1255 | // there are 2 ways we can proceed: | |||
| 1256 | // | |||
| 1257 | // 1) Consider the tentative definition a 'real' definition (ie promotion from | |||
| 1258 | // tentative to real definition has already happened) and not inspect | |||
| 1259 | // archive members for Global/Weak definitions to replace the tentative | |||
| 1260 | // definition. An archive member would only be included if it satisfies some | |||
| 1261 | // other undefined symbol. This is the behavior Gold uses. | |||
| 1262 | // | |||
| 1263 | // 2) Consider the tentative definition as still undefined (ie the promotion to | |||
| 1264 | // a real definition happens only after all symbol resolution is done). | |||
| 1265 | // The linker searches archive members for STB_GLOBAL definitions to | |||
| 1266 | // replace the tentative definition with. This is the behavior used by | |||
| 1267 | // GNU ld. | |||
| 1268 | // | |||
| 1269 | // The second behavior is inherited from SysVR4, which based it on the FORTRAN | |||
| 1270 | // COMMON BLOCK model. This behavior is needed for proper initialization in old | |||
| 1271 | // (pre F90) FORTRAN code that is packaged into an archive. | |||
| 1272 | // | |||
| 1273 | // The following functions search archive members for definitions to replace | |||
| 1274 | // tentative definitions (implementing behavior 2). | |||
| 1275 | static bool isBitcodeNonCommonDef(MemoryBufferRef mb, StringRef symName, | |||
| 1276 | StringRef archiveName) { | |||
| 1277 | IRSymtabFile symtabFile = check(readIRSymtab(mb)); | |||
| 1278 | for (const irsymtab::Reader::SymbolRef &sym : | |||
| 1279 | symtabFile.TheReader.symbols()) { | |||
| 1280 | if (sym.isGlobal() && sym.getName() == symName) | |||
| 1281 | return !sym.isUndefined() && !sym.isWeak() && !sym.isCommon(); | |||
| 1282 | } | |||
| 1283 | return false; | |||
| 1284 | } | |||
| 1285 | ||||
| 1286 | template <class ELFT> | |||
| 1287 | static bool isNonCommonDef(MemoryBufferRef mb, StringRef symName, | |||
| 1288 | StringRef archiveName) { | |||
| 1289 | ObjFile<ELFT> *obj = make<ObjFile<ELFT>>(mb, archiveName); | |||
| 1290 | StringRef stringtable = obj->getStringTable(); | |||
| 1291 | ||||
| 1292 | for (auto sym : obj->template getGlobalELFSyms<ELFT>()) { | |||
| 1293 | Expected<StringRef> name = sym.getName(stringtable); | |||
| 1294 | if (name && name.get() == symName) | |||
| 1295 | return sym.isDefined() && sym.getBinding() == STB_GLOBAL && | |||
| 1296 | !sym.isCommon(); | |||
| 1297 | } | |||
| 1298 | return false; | |||
| 1299 | } | |||
| 1300 | ||||
| 1301 | static bool isNonCommonDef(MemoryBufferRef mb, StringRef symName, | |||
| 1302 | StringRef archiveName) { | |||
| 1303 | switch (getELFKind(mb, archiveName)) { | |||
| 1304 | case ELF32LEKind: | |||
| 1305 | return isNonCommonDef<ELF32LE>(mb, symName, archiveName); | |||
| 1306 | case ELF32BEKind: | |||
| 1307 | return isNonCommonDef<ELF32BE>(mb, symName, archiveName); | |||
| 1308 | case ELF64LEKind: | |||
| 1309 | return isNonCommonDef<ELF64LE>(mb, symName, archiveName); | |||
| 1310 | case ELF64BEKind: | |||
| 1311 | return isNonCommonDef<ELF64BE>(mb, symName, archiveName); | |||
| 1312 | default: | |||
| 1313 | llvm_unreachable("getELFKind")__builtin_unreachable(); | |||
| 1314 | } | |||
| 1315 | } | |||
| 1316 | ||||
| 1317 | bool ArchiveFile::shouldFetchForCommon(const Archive::Symbol &sym) { | |||
| 1318 | Archive::Child c = | |||
| 1319 | CHECK(sym.getMember(), toString(this) +check2((sym.getMember()), [&] { return toString(toString( this) + ": could not get the member for symbol " + toELFString (sym)); }) | |||
| 1320 | ": could not get the member for symbol " +check2((sym.getMember()), [&] { return toString(toString( this) + ": could not get the member for symbol " + toELFString (sym)); }) | |||
| 1321 | toELFString(sym))check2((sym.getMember()), [&] { return toString(toString( this) + ": could not get the member for symbol " + toELFString (sym)); }); | |||
| 1322 | MemoryBufferRef mb = | |||
| 1323 | CHECK(c.getMemoryBufferRef(),check2((c.getMemoryBufferRef()), [&] { return toString(toString (this) + ": could not get the buffer for the member defining symbol " + toELFString(sym)); }) | |||
| 1324 | toString(this) +check2((c.getMemoryBufferRef()), [&] { return toString(toString (this) + ": could not get the buffer for the member defining symbol " + toELFString(sym)); }) | |||
| 1325 | ": could not get the buffer for the member defining symbol " +check2((c.getMemoryBufferRef()), [&] { return toString(toString (this) + ": could not get the buffer for the member defining symbol " + toELFString(sym)); }) | |||
| 1326 | toELFString(sym))check2((c.getMemoryBufferRef()), [&] { return toString(toString (this) + ": could not get the buffer for the member defining symbol " + toELFString(sym)); }); | |||
| 1327 | ||||
| 1328 | if (isBitcode(mb)) | |||
| 1329 | return isBitcodeNonCommonDef(mb, sym.getName(), getName()); | |||
| 1330 | ||||
| 1331 | return isNonCommonDef(mb, sym.getName(), getName()); | |||
| 1332 | } | |||
| 1333 | ||||
| 1334 | size_t ArchiveFile::getMemberCount() const { | |||
| 1335 | size_t count = 0; | |||
| 1336 | Error err = Error::success(); | |||
| 1337 | for (const Archive::Child &c : file->children(err)) { | |||
| 1338 | (void)c; | |||
| 1339 | ++count; | |||
| 1340 | } | |||
| 1341 | // This function is used by --print-archive-stats=, where an error does not | |||
| 1342 | // really matter. | |||
| 1343 | consumeError(std::move(err)); | |||
| 1344 | return count; | |||
| 1345 | } | |||
| 1346 | ||||
| 1347 | unsigned SharedFile::vernauxNum; | |||
| 1348 | ||||
| 1349 | // Parse the version definitions in the object file if present, and return a | |||
| 1350 | // vector whose nth element contains a pointer to the Elf_Verdef for version | |||
| 1351 | // identifier n. Version identifiers that are not definitions map to nullptr. | |||
| 1352 | template <typename ELFT> | |||
| 1353 | static std::vector<const void *> parseVerdefs(const uint8_t *base, | |||
| 1354 | const typename ELFT::Shdr *sec) { | |||
| 1355 | if (!sec) | |||
| 1356 | return {}; | |||
| 1357 | ||||
| 1358 | // We cannot determine the largest verdef identifier without inspecting | |||
| 1359 | // every Elf_Verdef, but both bfd and gold assign verdef identifiers | |||
| 1360 | // sequentially starting from 1, so we predict that the largest identifier | |||
| 1361 | // will be verdefCount. | |||
| 1362 | unsigned verdefCount = sec->sh_info; | |||
| 1363 | std::vector<const void *> verdefs(verdefCount + 1); | |||
| 1364 | ||||
| 1365 | // Build the Verdefs array by following the chain of Elf_Verdef objects | |||
| 1366 | // from the start of the .gnu.version_d section. | |||
| 1367 | const uint8_t *verdef = base + sec->sh_offset; | |||
| 1368 | for (unsigned i = 0; i != verdefCount; ++i) { | |||
| 1369 | auto *curVerdef = reinterpret_cast<const typename ELFT::Verdef *>(verdef); | |||
| 1370 | verdef += curVerdef->vd_next; | |||
| 1371 | unsigned verdefIndex = curVerdef->vd_ndx; | |||
| 1372 | verdefs.resize(verdefIndex + 1); | |||
| 1373 | verdefs[verdefIndex] = curVerdef; | |||
| 1374 | } | |||
| 1375 | return verdefs; | |||
| 1376 | } | |||
| 1377 | ||||
| 1378 | // Parse SHT_GNU_verneed to properly set the name of a versioned undefined | |||
| 1379 | // symbol. We detect fatal issues which would cause vulnerabilities, but do not | |||
| 1380 | // implement sophisticated error checking like in llvm-readobj because the value | |||
| 1381 | // of such diagnostics is low. | |||
| 1382 | template <typename ELFT> | |||
| 1383 | std::vector<uint32_t> SharedFile::parseVerneed(const ELFFile<ELFT> &obj, | |||
| 1384 | const typename ELFT::Shdr *sec) { | |||
| 1385 | if (!sec) | |||
| 1386 | return {}; | |||
| 1387 | std::vector<uint32_t> verneeds; | |||
| 1388 | ArrayRef<uint8_t> data = CHECK(obj.getSectionContents(*sec), this)check2((obj.getSectionContents(*sec)), [&] { return toString (this); }); | |||
| 1389 | const uint8_t *verneedBuf = data.begin(); | |||
| 1390 | for (unsigned i = 0; i != sec->sh_info; ++i) { | |||
| 1391 | if (verneedBuf + sizeof(typename ELFT::Verneed) > data.end()) | |||
| 1392 | fatal(toString(this) + " has an invalid Verneed"); | |||
| 1393 | auto *vn = reinterpret_cast<const typename ELFT::Verneed *>(verneedBuf); | |||
| 1394 | const uint8_t *vernauxBuf = verneedBuf + vn->vn_aux; | |||
| 1395 | for (unsigned j = 0; j != vn->vn_cnt; ++j) { | |||
| 1396 | if (vernauxBuf + sizeof(typename ELFT::Vernaux) > data.end()) | |||
| 1397 | fatal(toString(this) + " has an invalid Vernaux"); | |||
| 1398 | auto *aux = reinterpret_cast<const typename ELFT::Vernaux *>(vernauxBuf); | |||
| 1399 | if (aux->vna_name >= this->stringTable.size()) | |||
| 1400 | fatal(toString(this) + " has a Vernaux with an invalid vna_name"); | |||
| 1401 | uint16_t version = aux->vna_other & VERSYM_VERSION; | |||
| 1402 | if (version >= verneeds.size()) | |||
| 1403 | verneeds.resize(version + 1); | |||
| 1404 | verneeds[version] = aux->vna_name; | |||
| 1405 | vernauxBuf += aux->vna_next; | |||
| 1406 | } | |||
| 1407 | verneedBuf += vn->vn_next; | |||
| 1408 | } | |||
| 1409 | return verneeds; | |||
| 1410 | } | |||
| 1411 | ||||
| 1412 | // We do not usually care about alignments of data in shared object | |||
| 1413 | // files because the loader takes care of it. However, if we promote a | |||
| 1414 | // DSO symbol to point to .bss due to copy relocation, we need to keep | |||
| 1415 | // the original alignment requirements. We infer it in this function. | |||
| 1416 | template <typename ELFT> | |||
| 1417 | static uint64_t getAlignment(ArrayRef<typename ELFT::Shdr> sections, | |||
| 1418 | const typename ELFT::Sym &sym) { | |||
| 1419 | uint64_t ret = UINT64_MAX0xffffffffffffffffULL; | |||
| 1420 | if (sym.st_value) | |||
| 1421 | ret = 1ULL << countTrailingZeros((uint64_t)sym.st_value); | |||
| 1422 | if (0 < sym.st_shndx && sym.st_shndx < sections.size()) | |||
| 1423 | ret = std::min<uint64_t>(ret, sections[sym.st_shndx].sh_addralign); | |||
| 1424 | return (ret > UINT32_MAX0xffffffffU) ? 0 : ret; | |||
| 1425 | } | |||
| 1426 | ||||
| 1427 | // Fully parse the shared object file. | |||
| 1428 | // | |||
| 1429 | // This function parses symbol versions. If a DSO has version information, | |||
| 1430 | // the file has a ".gnu.version_d" section which contains symbol version | |||
| 1431 | // definitions. Each symbol is associated to one version through a table in | |||
| 1432 | // ".gnu.version" section. That table is a parallel array for the symbol | |||
| 1433 | // table, and each table entry contains an index in ".gnu.version_d". | |||
| 1434 | // | |||
| 1435 | // The special index 0 is reserved for VERF_NDX_LOCAL and 1 is for | |||
| 1436 | // VER_NDX_GLOBAL. There's no table entry for these special versions in | |||
| 1437 | // ".gnu.version_d". | |||
| 1438 | // | |||
| 1439 | // The file format for symbol versioning is perhaps a bit more complicated | |||
| 1440 | // than necessary, but you can easily understand the code if you wrap your | |||
| 1441 | // head around the data structure described above. | |||
| 1442 | template <class ELFT> void SharedFile::parse() { | |||
| 1443 | using Elf_Dyn = typename ELFT::Dyn; | |||
| 1444 | using Elf_Shdr = typename ELFT::Shdr; | |||
| 1445 | using Elf_Sym = typename ELFT::Sym; | |||
| 1446 | using Elf_Verdef = typename ELFT::Verdef; | |||
| 1447 | using Elf_Versym = typename ELFT::Versym; | |||
| 1448 | ||||
| 1449 | ArrayRef<Elf_Dyn> dynamicTags; | |||
| 1450 | const ELFFile<ELFT> obj = this->getObj<ELFT>(); | |||
| 1451 | ArrayRef<Elf_Shdr> sections = CHECK(obj.sections(), this)check2((obj.sections()), [&] { return toString(this); }); | |||
| 1452 | ||||
| 1453 | const Elf_Shdr *versymSec = nullptr; | |||
| 1454 | const Elf_Shdr *verdefSec = nullptr; | |||
| 1455 | const Elf_Shdr *verneedSec = nullptr; | |||
| 1456 | ||||
| 1457 | // Search for .dynsym, .dynamic, .symtab, .gnu.version and .gnu.version_d. | |||
| 1458 | for (const Elf_Shdr &sec : sections) { | |||
| 1459 | switch (sec.sh_type) { | |||
| 1460 | default: | |||
| 1461 | continue; | |||
| 1462 | case SHT_DYNAMIC: | |||
| 1463 | dynamicTags = | |||
| 1464 | CHECK(obj.template getSectionContentsAsArray<Elf_Dyn>(sec), this)check2((obj.template getSectionContentsAsArray<Elf_Dyn> (sec)), [&] { return toString(this); }); | |||
| 1465 | break; | |||
| 1466 | case SHT_GNU_versym: | |||
| 1467 | versymSec = &sec; | |||
| 1468 | break; | |||
| 1469 | case SHT_GNU_verdef: | |||
| 1470 | verdefSec = &sec; | |||
| 1471 | break; | |||
| 1472 | case SHT_GNU_verneed: | |||
| 1473 | verneedSec = &sec; | |||
| 1474 | break; | |||
| 1475 | } | |||
| 1476 | } | |||
| 1477 | ||||
| 1478 | if (versymSec && numELFSyms == 0) { | |||
| 1479 | error("SHT_GNU_versym should be associated with symbol table"); | |||
| 1480 | return; | |||
| 1481 | } | |||
| 1482 | ||||
| 1483 | // Search for a DT_SONAME tag to initialize this->soName. | |||
| 1484 | for (const Elf_Dyn &dyn : dynamicTags) { | |||
| 1485 | if (dyn.d_tag == DT_NEEDED) { | |||
| 1486 | uint64_t val = dyn.getVal(); | |||
| 1487 | if (val >= this->stringTable.size()) | |||
| 1488 | fatal(toString(this) + ": invalid DT_NEEDED entry"); | |||
| 1489 | dtNeeded.push_back(this->stringTable.data() + val); | |||
| 1490 | } else if (dyn.d_tag == DT_SONAME) { | |||
| 1491 | uint64_t val = dyn.getVal(); | |||
| 1492 | if (val >= this->stringTable.size()) | |||
| 1493 | fatal(toString(this) + ": invalid DT_SONAME entry"); | |||
| 1494 | soName = this->stringTable.data() + val; | |||
| 1495 | } | |||
| 1496 | } | |||
| 1497 | ||||
| 1498 | // DSOs are uniquified not by filename but by soname. | |||
| 1499 | DenseMap<StringRef, SharedFile *>::iterator it; | |||
| 1500 | bool wasInserted; | |||
| 1501 | std::tie(it, wasInserted) = symtab->soNames.try_emplace(soName, this); | |||
| 1502 | ||||
| 1503 | // If a DSO appears more than once on the command line with and without | |||
| 1504 | // --as-needed, --no-as-needed takes precedence over --as-needed because a | |||
| 1505 | // user can add an extra DSO with --no-as-needed to force it to be added to | |||
| 1506 | // the dependency list. | |||
| 1507 | it->second->isNeeded |= isNeeded; | |||
| 1508 | if (!wasInserted) | |||
| 1509 | return; | |||
| 1510 | ||||
| 1511 | sharedFiles.push_back(this); | |||
| 1512 | ||||
| 1513 | verdefs = parseVerdefs<ELFT>(obj.base(), verdefSec); | |||
| 1514 | std::vector<uint32_t> verneeds = parseVerneed<ELFT>(obj, verneedSec); | |||
| 1515 | ||||
| 1516 | // Parse ".gnu.version" section which is a parallel array for the symbol | |||
| 1517 | // table. If a given file doesn't have a ".gnu.version" section, we use | |||
| 1518 | // VER_NDX_GLOBAL. | |||
| 1519 | size_t size = numELFSyms - firstGlobal; | |||
| 1520 | std::vector<uint16_t> versyms(size, VER_NDX_GLOBAL); | |||
| 1521 | if (versymSec) { | |||
| 1522 | ArrayRef<Elf_Versym> versym = | |||
| 1523 | CHECK(obj.template getSectionContentsAsArray<Elf_Versym>(*versymSec),check2((obj.template getSectionContentsAsArray<Elf_Versym> (*versymSec)), [&] { return toString(this); }) | |||
| 1524 | this)check2((obj.template getSectionContentsAsArray<Elf_Versym> (*versymSec)), [&] { return toString(this); }) | |||
| 1525 | .slice(firstGlobal); | |||
| 1526 | for (size_t i = 0; i < size; ++i) | |||
| 1527 | versyms[i] = versym[i].vs_index; | |||
| 1528 | } | |||
| 1529 | ||||
| 1530 | // System libraries can have a lot of symbols with versions. Using a | |||
| 1531 | // fixed buffer for computing the versions name (foo@ver) can save a | |||
| 1532 | // lot of allocations. | |||
| 1533 | SmallString<0> versionedNameBuffer; | |||
| 1534 | ||||
| 1535 | // Add symbols to the symbol table. | |||
| 1536 | ArrayRef<Elf_Sym> syms = this->getGlobalELFSyms<ELFT>(); | |||
| 1537 | for (size_t i = 0; i < syms.size(); ++i) { | |||
| 1538 | const Elf_Sym &sym = syms[i]; | |||
| 1539 | ||||
| 1540 | // ELF spec requires that all local symbols precede weak or global | |||
| 1541 | // symbols in each symbol table, and the index of first non-local symbol | |||
| 1542 | // is stored to sh_info. If a local symbol appears after some non-local | |||
| 1543 | // symbol, that's a violation of the spec. | |||
| 1544 | StringRef name = CHECK(sym.getName(this->stringTable), this)check2((sym.getName(this->stringTable)), [&] { return toString (this); }); | |||
| 1545 | if (sym.getBinding() == STB_LOCAL) { | |||
| 1546 | warn("found local symbol '" + name + | |||
| 1547 | "' in global part of symbol table in file " + toString(this)); | |||
| 1548 | continue; | |||
| 1549 | } | |||
| 1550 | ||||
| 1551 | uint16_t idx = versyms[i] & ~VERSYM_HIDDEN; | |||
| 1552 | if (sym.isUndefined()) { | |||
| 1553 | // For unversioned undefined symbols, VER_NDX_GLOBAL makes more sense but | |||
| 1554 | // as of binutils 2.34, GNU ld produces VER_NDX_LOCAL. | |||
| 1555 | if (idx != VER_NDX_LOCAL && idx != VER_NDX_GLOBAL) { | |||
| 1556 | if (idx >= verneeds.size()) { | |||
| 1557 | error("corrupt input file: version need index " + Twine(idx) + | |||
| 1558 | " for symbol " + name + " is out of bounds\n>>> defined in " + | |||
| 1559 | toString(this)); | |||
| 1560 | continue; | |||
| 1561 | } | |||
| 1562 | StringRef verName = this->stringTable.data() + verneeds[idx]; | |||
| 1563 | versionedNameBuffer.clear(); | |||
| 1564 | name = | |||
| 1565 | saver.save((name + "@" + verName).toStringRef(versionedNameBuffer)); | |||
| 1566 | } | |||
| 1567 | Symbol *s = symtab->addSymbol( | |||
| 1568 | Undefined{this, name, sym.getBinding(), sym.st_other, sym.getType()}); | |||
| 1569 | s->exportDynamic = true; | |||
| 1570 | if (s->isUndefined() && sym.getBinding() != STB_WEAK && | |||
| 1571 | config->unresolvedSymbolsInShlib != UnresolvedPolicy::Ignore) | |||
| 1572 | requiredSymbols.push_back(s); | |||
| 1573 | continue; | |||
| 1574 | } | |||
| 1575 | ||||
| 1576 | // MIPS BFD linker puts _gp_disp symbol into DSO files and incorrectly | |||
| 1577 | // assigns VER_NDX_LOCAL to this section global symbol. Here is a | |||
| 1578 | // workaround for this bug. | |||
| 1579 | if (config->emachine == EM_MIPS && idx == VER_NDX_LOCAL && | |||
| 1580 | name == "_gp_disp") | |||
| 1581 | continue; | |||
| 1582 | ||||
| 1583 | uint32_t alignment = getAlignment<ELFT>(sections, sym); | |||
| 1584 | if (!(versyms[i] & VERSYM_HIDDEN)) { | |||
| 1585 | symtab->addSymbol(SharedSymbol{*this, name, sym.getBinding(), | |||
| 1586 | sym.st_other, sym.getType(), sym.st_value, | |||
| 1587 | sym.st_size, alignment, idx}); | |||
| 1588 | } | |||
| 1589 | ||||
| 1590 | // Also add the symbol with the versioned name to handle undefined symbols | |||
| 1591 | // with explicit versions. | |||
| 1592 | if (idx == VER_NDX_GLOBAL) | |||
| 1593 | continue; | |||
| 1594 | ||||
| 1595 | if (idx >= verdefs.size() || idx == VER_NDX_LOCAL) { | |||
| 1596 | error("corrupt input file: version definition index " + Twine(idx) + | |||
| 1597 | " for symbol " + name + " is out of bounds\n>>> defined in " + | |||
| 1598 | toString(this)); | |||
| 1599 | continue; | |||
| 1600 | } | |||
| 1601 | ||||
| 1602 | StringRef verName = | |||
| 1603 | this->stringTable.data() + | |||
| 1604 | reinterpret_cast<const Elf_Verdef *>(verdefs[idx])->getAux()->vda_name; | |||
| 1605 | versionedNameBuffer.clear(); | |||
| 1606 | name = (name + "@" + verName).toStringRef(versionedNameBuffer); | |||
| 1607 | symtab->addSymbol(SharedSymbol{*this, saver.save(name), sym.getBinding(), | |||
| 1608 | sym.st_other, sym.getType(), sym.st_value, | |||
| 1609 | sym.st_size, alignment, idx}); | |||
| 1610 | } | |||
| 1611 | } | |||
| 1612 | ||||
| 1613 | static ELFKind getBitcodeELFKind(const Triple &t) { | |||
| 1614 | if (t.isLittleEndian()) | |||
| 1615 | return t.isArch64Bit() ? ELF64LEKind : ELF32LEKind; | |||
| 1616 | return t.isArch64Bit() ? ELF64BEKind : ELF32BEKind; | |||
| 1617 | } | |||
| 1618 | ||||
| 1619 | static uint16_t getBitcodeMachineKind(StringRef path, const Triple &t) { | |||
| 1620 | switch (t.getArch()) { | |||
| 1621 | case Triple::aarch64: | |||
| 1622 | case Triple::aarch64_be: | |||
| 1623 | return EM_AARCH64; | |||
| 1624 | case Triple::amdgcn: | |||
| 1625 | case Triple::r600: | |||
| 1626 | return EM_AMDGPU; | |||
| 1627 | case Triple::arm: | |||
| 1628 | case Triple::thumb: | |||
| 1629 | return EM_ARM; | |||
| 1630 | case Triple::avr: | |||
| 1631 | return EM_AVR; | |||
| 1632 | case Triple::mips: | |||
| 1633 | case Triple::mipsel: | |||
| 1634 | case Triple::mips64: | |||
| 1635 | case Triple::mips64el: | |||
| 1636 | return EM_MIPS; | |||
| 1637 | case Triple::msp430: | |||
| 1638 | return EM_MSP430; | |||
| 1639 | case Triple::ppc: | |||
| 1640 | case Triple::ppcle: | |||
| 1641 | return EM_PPC; | |||
| 1642 | case Triple::ppc64: | |||
| 1643 | case Triple::ppc64le: | |||
| 1644 | return EM_PPC64; | |||
| 1645 | case Triple::riscv32: | |||
| 1646 | case Triple::riscv64: | |||
| 1647 | return EM_RISCV; | |||
| 1648 | case Triple::x86: | |||
| 1649 | return t.isOSIAMCU() ? EM_IAMCU : EM_386; | |||
| 1650 | case Triple::x86_64: | |||
| 1651 | return EM_X86_64; | |||
| 1652 | default: | |||
| 1653 | error(path + ": could not infer e_machine from bitcode target triple " + | |||
| 1654 | t.str()); | |||
| 1655 | return EM_NONE; | |||
| 1656 | } | |||
| 1657 | } | |||
| 1658 | ||||
| 1659 | static uint8_t getOsAbi(const Triple &t) { | |||
| 1660 | switch (t.getOS()) { | |||
| 1661 | case Triple::AMDHSA: | |||
| 1662 | return ELF::ELFOSABI_AMDGPU_HSA; | |||
| 1663 | case Triple::AMDPAL: | |||
| 1664 | return ELF::ELFOSABI_AMDGPU_PAL; | |||
| 1665 | case Triple::Mesa3D: | |||
| 1666 | return ELF::ELFOSABI_AMDGPU_MESA3D; | |||
| 1667 | default: | |||
| 1668 | return ELF::ELFOSABI_NONE; | |||
| 1669 | } | |||
| 1670 | } | |||
| 1671 | ||||
| 1672 | BitcodeFile::BitcodeFile(MemoryBufferRef mb, StringRef archiveName, | |||
| 1673 | uint64_t offsetInArchive) | |||
| 1674 | : InputFile(BitcodeKind, mb) { | |||
| 1675 | this->archiveName = std::string(archiveName); | |||
| 1676 | ||||
| 1677 | std::string path = mb.getBufferIdentifier().str(); | |||
| 1678 | if (config->thinLTOIndexOnly) | |||
| 1679 | path = replaceThinLTOSuffix(mb.getBufferIdentifier()); | |||
| 1680 | ||||
| 1681 | // ThinLTO assumes that all MemoryBufferRefs given to it have a unique | |||
| 1682 | // name. If two archives define two members with the same name, this | |||
| 1683 | // causes a collision which result in only one of the objects being taken | |||
| 1684 | // into consideration at LTO time (which very likely causes undefined | |||
| 1685 | // symbols later in the link stage). So we append file offset to make | |||
| 1686 | // filename unique. | |||
| 1687 | StringRef name = | |||
| 1688 | archiveName.empty() | |||
| 1689 | ? saver.save(path) | |||
| 1690 | : saver.save(archiveName + "(" + path::filename(path) + " at " + | |||
| 1691 | utostr(offsetInArchive) + ")"); | |||
| 1692 | MemoryBufferRef mbref(mb.getBuffer(), name); | |||
| 1693 | ||||
| 1694 | obj = CHECK(lto::InputFile::create(mbref), this)check2((lto::InputFile::create(mbref)), [&] { return toString (this); }); | |||
| 1695 | ||||
| 1696 | Triple t(obj->getTargetTriple()); | |||
| 1697 | ekind = getBitcodeELFKind(t); | |||
| 1698 | emachine = getBitcodeMachineKind(mb.getBufferIdentifier(), t); | |||
| 1699 | osabi = getOsAbi(t); | |||
| 1700 | } | |||
| 1701 | ||||
| 1702 | static uint8_t mapVisibility(GlobalValue::VisibilityTypes gvVisibility) { | |||
| 1703 | switch (gvVisibility) { | |||
| 1704 | case GlobalValue::DefaultVisibility: | |||
| 1705 | return STV_DEFAULT; | |||
| 1706 | case GlobalValue::HiddenVisibility: | |||
| 1707 | return STV_HIDDEN; | |||
| 1708 | case GlobalValue::ProtectedVisibility: | |||
| 1709 | return STV_PROTECTED; | |||
| 1710 | } | |||
| 1711 | llvm_unreachable("unknown visibility")__builtin_unreachable(); | |||
| 1712 | } | |||
| 1713 | ||||
| 1714 | template <class ELFT> | |||
| 1715 | static Symbol *createBitcodeSymbol(const std::vector<bool> &keptComdats, | |||
| 1716 | const lto::InputFile::Symbol &objSym, | |||
| 1717 | BitcodeFile &f) { | |||
| 1718 | StringRef name = saver.save(objSym.getName()); | |||
| 1719 | uint8_t binding = objSym.isWeak() ? STB_WEAK : STB_GLOBAL; | |||
| 1720 | uint8_t type = objSym.isTLS() ? STT_TLS : STT_NOTYPE; | |||
| 1721 | uint8_t visibility = mapVisibility(objSym.getVisibility()); | |||
| 1722 | bool canOmitFromDynSym = objSym.canBeOmittedFromSymbolTable(); | |||
| 1723 | ||||
| 1724 | int c = objSym.getComdatIndex(); | |||
| 1725 | if (objSym.isUndefined() || (c != -1 && !keptComdats[c])) { | |||
| 1726 | Undefined newSym(&f, name, binding, visibility, type); | |||
| 1727 | if (canOmitFromDynSym) | |||
| 1728 | newSym.exportDynamic = false; | |||
| 1729 | Symbol *ret = symtab->addSymbol(newSym); | |||
| 1730 | ret->referenced = true; | |||
| 1731 | return ret; | |||
| 1732 | } | |||
| 1733 | ||||
| 1734 | if (objSym.isCommon()) | |||
| 1735 | return symtab->addSymbol( | |||
| 1736 | CommonSymbol{&f, name, binding, visibility, STT_OBJECT, | |||
| 1737 | objSym.getCommonAlignment(), objSym.getCommonSize()}); | |||
| 1738 | ||||
| 1739 | Defined newSym(&f, name, binding, visibility, type, 0, 0, nullptr); | |||
| 1740 | if (canOmitFromDynSym) | |||
| 1741 | newSym.exportDynamic = false; | |||
| 1742 | return symtab->addSymbol(newSym); | |||
| 1743 | } | |||
| 1744 | ||||
| 1745 | template <class ELFT> void BitcodeFile::parse() { | |||
| 1746 | std::vector<bool> keptComdats; | |||
| 1747 | for (std::pair<StringRef, Comdat::SelectionKind> s : obj->getComdatTable()) { | |||
| 1748 | keptComdats.push_back( | |||
| 1749 | s.second == Comdat::NoDeduplicate || | |||
| 1750 | symtab->comdatGroups.try_emplace(CachedHashStringRef(s.first), this) | |||
| 1751 | .second); | |||
| 1752 | } | |||
| 1753 | ||||
| 1754 | for (const lto::InputFile::Symbol &objSym : obj->symbols()) | |||
| 1755 | symbols.push_back(createBitcodeSymbol<ELFT>(keptComdats, objSym, *this)); | |||
| 1756 | ||||
| 1757 | for (auto l : obj->getDependentLibraries()) | |||
| 1758 | addDependentLibrary(l, this); | |||
| 1759 | } | |||
| 1760 | ||||
| 1761 | void BinaryFile::parse() { | |||
| 1762 | ArrayRef<uint8_t> data = arrayRefFromStringRef(mb.getBuffer()); | |||
| 1763 | auto *section = make<InputSection>(this, SHF_ALLOC | SHF_WRITE, SHT_PROGBITS, | |||
| 1764 | 8, data, ".data"); | |||
| 1765 | sections.push_back(section); | |||
| 1766 | ||||
| 1767 | // For each input file foo that is embedded to a result as a binary | |||
| 1768 | // blob, we define _binary_foo_{start,end,size} symbols, so that | |||
| 1769 | // user programs can access blobs by name. Non-alphanumeric | |||
| 1770 | // characters in a filename are replaced with underscore. | |||
| 1771 | std::string s = "_binary_" + mb.getBufferIdentifier().str(); | |||
| 1772 | for (size_t i = 0; i < s.size(); ++i) | |||
| 1773 | if (!isAlnum(s[i])) | |||
| 1774 | s[i] = '_'; | |||
| 1775 | ||||
| 1776 | symtab->addSymbol(Defined{nullptr, saver.save(s + "_start"), STB_GLOBAL, | |||
| 1777 | STV_DEFAULT, STT_OBJECT, 0, 0, section}); | |||
| 1778 | symtab->addSymbol(Defined{nullptr, saver.save(s + "_end"), STB_GLOBAL, | |||
| 1779 | STV_DEFAULT, STT_OBJECT, data.size(), 0, section}); | |||
| 1780 | symtab->addSymbol(Defined{nullptr, saver.save(s + "_size"), STB_GLOBAL, | |||
| 1781 | STV_DEFAULT, STT_OBJECT, data.size(), 0, nullptr}); | |||
| 1782 | } | |||
| 1783 | ||||
| 1784 | InputFile *elf::createObjectFile(MemoryBufferRef mb, StringRef archiveName, | |||
| 1785 | uint64_t offsetInArchive) { | |||
| 1786 | if (isBitcode(mb)) | |||
| 1787 | return make<BitcodeFile>(mb, archiveName, offsetInArchive); | |||
| 1788 | ||||
| 1789 | switch (getELFKind(mb, archiveName)) { | |||
| 1790 | case ELF32LEKind: | |||
| 1791 | return make<ObjFile<ELF32LE>>(mb, archiveName); | |||
| 1792 | case ELF32BEKind: | |||
| 1793 | return make<ObjFile<ELF32BE>>(mb, archiveName); | |||
| 1794 | case ELF64LEKind: | |||
| 1795 | return make<ObjFile<ELF64LE>>(mb, archiveName); | |||
| 1796 | case ELF64BEKind: | |||
| 1797 | return make<ObjFile<ELF64BE>>(mb, archiveName); | |||
| 1798 | default: | |||
| 1799 | llvm_unreachable("getELFKind")__builtin_unreachable(); | |||
| 1800 | } | |||
| 1801 | } | |||
| 1802 | ||||
| 1803 | void LazyObjFile::fetch() { | |||
| 1804 | if (fetched) | |||
| 1805 | return; | |||
| 1806 | fetched = true; | |||
| 1807 | ||||
| 1808 | InputFile *file = createObjectFile(mb, archiveName, offsetInArchive); | |||
| 1809 | file->groupId = groupId; | |||
| 1810 | ||||
| 1811 | // Copy symbol vector so that the new InputFile doesn't have to | |||
| 1812 | // insert the same defined symbols to the symbol table again. | |||
| 1813 | file->symbols = std::move(symbols); | |||
| 1814 | ||||
| 1815 | parseFile(file); | |||
| 1816 | } | |||
| 1817 | ||||
| 1818 | template <class ELFT> void LazyObjFile::parse() { | |||
| 1819 | using Elf_Sym = typename ELFT::Sym; | |||
| 1820 | ||||
| 1821 | // A lazy object file wraps either a bitcode file or an ELF file. | |||
| 1822 | if (isBitcode(this->mb)) { | |||
| 1823 | std::unique_ptr<lto::InputFile> obj = | |||
| 1824 | CHECK(lto::InputFile::create(this->mb), this)check2((lto::InputFile::create(this->mb)), [&] { return toString(this); }); | |||
| 1825 | for (const lto::InputFile::Symbol &sym : obj->symbols()) { | |||
| 1826 | if (sym.isUndefined()) | |||
| 1827 | continue; | |||
| 1828 | symtab->addSymbol(LazyObject{*this, saver.save(sym.getName())}); | |||
| 1829 | } | |||
| 1830 | return; | |||
| 1831 | } | |||
| 1832 | ||||
| 1833 | if (getELFKind(this->mb, archiveName) != config->ekind) { | |||
| 1834 | error("incompatible file: " + this->mb.getBufferIdentifier()); | |||
| 1835 | return; | |||
| 1836 | } | |||
| 1837 | ||||
| 1838 | // Find a symbol table. | |||
| 1839 | ELFFile<ELFT> obj = check(ELFFile<ELFT>::create(mb.getBuffer())); | |||
| 1840 | ArrayRef<typename ELFT::Shdr> sections = CHECK(obj.sections(), this)check2((obj.sections()), [&] { return toString(this); }); | |||
| 1841 | ||||
| 1842 | for (const typename ELFT::Shdr &sec : sections) { | |||
| 1843 | if (sec.sh_type != SHT_SYMTAB) | |||
| 1844 | continue; | |||
| 1845 | ||||
| 1846 | // A symbol table is found. | |||
| 1847 | ArrayRef<Elf_Sym> eSyms = CHECK(obj.symbols(&sec), this)check2((obj.symbols(&sec)), [&] { return toString(this ); }); | |||
| 1848 | uint32_t firstGlobal = sec.sh_info; | |||
| 1849 | StringRef strtab = CHECK(obj.getStringTableForSymtab(sec, sections), this)check2((obj.getStringTableForSymtab(sec, sections)), [&] { return toString(this); }); | |||
| 1850 | this->symbols.resize(eSyms.size()); | |||
| 1851 | ||||
| 1852 | // Get existing symbols or insert placeholder symbols. | |||
| 1853 | for (size_t i = firstGlobal, end = eSyms.size(); i != end; ++i) | |||
| 1854 | if (eSyms[i].st_shndx != SHN_UNDEF) | |||
| 1855 | this->symbols[i] = symtab->insert(CHECK(eSyms[i].getName(strtab), this)check2((eSyms[i].getName(strtab)), [&] { return toString( this); })); | |||
| 1856 | ||||
| 1857 | // Replace existing symbols with LazyObject symbols. | |||
| 1858 | // | |||
| 1859 | // resolve() may trigger this->fetch() if an existing symbol is an | |||
| 1860 | // undefined symbol. If that happens, this LazyObjFile has served | |||
| 1861 | // its purpose, and we can exit from the loop early. | |||
| 1862 | for (Symbol *sym : this->symbols) { | |||
| 1863 | if (!sym) | |||
| 1864 | continue; | |||
| 1865 | sym->resolve(LazyObject{*this, sym->getName()}); | |||
| 1866 | ||||
| 1867 | // If fetched, stop iterating because this->symbols has been transferred | |||
| 1868 | // to the instantiated ObjFile. | |||
| 1869 | if (fetched) | |||
| 1870 | return; | |||
| 1871 | } | |||
| 1872 | return; | |||
| 1873 | } | |||
| 1874 | } | |||
| 1875 | ||||
| 1876 | bool LazyObjFile::shouldFetchForCommon(const StringRef &name) { | |||
| 1877 | if (isBitcode(mb)) | |||
| ||||
| 1878 | return isBitcodeNonCommonDef(mb, name, archiveName); | |||
| 1879 | ||||
| 1880 | return isNonCommonDef(mb, name, archiveName); | |||
| 1881 | } | |||
| 1882 | ||||
| 1883 | std::string elf::replaceThinLTOSuffix(StringRef path) { | |||
| 1884 | StringRef suffix = config->thinLTOObjectSuffixReplace.first; | |||
| 1885 | StringRef repl = config->thinLTOObjectSuffixReplace.second; | |||
| 1886 | ||||
| 1887 | if (path.consume_back(suffix)) | |||
| 1888 | return (path + repl).str(); | |||
| 1889 | return std::string(path); | |||
| 1890 | } | |||
| 1891 | ||||
| 1892 | template void BitcodeFile::parse<ELF32LE>(); | |||
| 1893 | template void BitcodeFile::parse<ELF32BE>(); | |||
| 1894 | template void BitcodeFile::parse<ELF64LE>(); | |||
| 1895 | template void BitcodeFile::parse<ELF64BE>(); | |||
| 1896 | ||||
| 1897 | template void LazyObjFile::parse<ELF32LE>(); | |||
| 1898 | template void LazyObjFile::parse<ELF32BE>(); | |||
| 1899 | template void LazyObjFile::parse<ELF64LE>(); | |||
| 1900 | template void LazyObjFile::parse<ELF64BE>(); | |||
| 1901 | ||||
| 1902 | template class elf::ObjFile<ELF32LE>; | |||
| 1903 | template class elf::ObjFile<ELF32BE>; | |||
| 1904 | template class elf::ObjFile<ELF64LE>; | |||
| 1905 | template class elf::ObjFile<ELF64BE>; | |||
| 1906 | ||||
| 1907 | template void SharedFile::parse<ELF32LE>(); | |||
| 1908 | template void SharedFile::parse<ELF32BE>(); | |||
| 1909 | template void SharedFile::parse<ELF64LE>(); | |||
| 1910 | template void SharedFile::parse<ELF64BE>(); |
| 1 | //===- IRSymtab.h - data definitions for IR symbol tables -------*- C++ -*-===// | |||
| 2 | // | |||
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | |||
| 4 | // See https://llvm.org/LICENSE.txt for license information. | |||
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | |||
| 6 | // | |||
| 7 | //===----------------------------------------------------------------------===// | |||
| 8 | // | |||
| 9 | // This file contains data definitions and a reader and builder for a symbol | |||
| 10 | // table for LLVM IR. Its purpose is to allow linkers and other consumers of | |||
| 11 | // bitcode files to efficiently read the symbol table for symbol resolution | |||
| 12 | // purposes without needing to construct a module in memory. | |||
| 13 | // | |||
| 14 | // As with most object files the symbol table has two parts: the symbol table | |||
| 15 | // itself and a string table which is referenced by the symbol table. | |||
| 16 | // | |||
| 17 | // A symbol table corresponds to a single bitcode file, which may consist of | |||
| 18 | // multiple modules, so symbol tables may likewise contain symbols for multiple | |||
| 19 | // modules. | |||
| 20 | // | |||
| 21 | //===----------------------------------------------------------------------===// | |||
| 22 | ||||
| 23 | #ifndef LLVM_OBJECT_IRSYMTAB_H | |||
| 24 | #define LLVM_OBJECT_IRSYMTAB_H | |||
| 25 | ||||
| 26 | #include "llvm/ADT/ArrayRef.h" | |||
| 27 | #include "llvm/ADT/StringRef.h" | |||
| 28 | #include "llvm/ADT/iterator_range.h" | |||
| 29 | #include "llvm/IR/Comdat.h" | |||
| 30 | #include "llvm/IR/GlobalValue.h" | |||
| 31 | #include "llvm/Object/SymbolicFile.h" | |||
| 32 | #include "llvm/Support/Allocator.h" | |||
| 33 | #include "llvm/Support/Endian.h" | |||
| 34 | #include "llvm/Support/Error.h" | |||
| 35 | #include <cassert> | |||
| 36 | #include <cstdint> | |||
| 37 | #include <vector> | |||
| 38 | ||||
| 39 | namespace llvm { | |||
| 40 | ||||
| 41 | struct BitcodeFileContents; | |||
| 42 | class StringTableBuilder; | |||
| 43 | ||||
| 44 | namespace irsymtab { | |||
| 45 | ||||
| 46 | namespace storage { | |||
| 47 | ||||
| 48 | // The data structures in this namespace define the low-level serialization | |||
| 49 | // format. Clients that just want to read a symbol table should use the | |||
| 50 | // irsymtab::Reader class. | |||
| 51 | ||||
| 52 | using Word = support::ulittle32_t; | |||
| 53 | ||||
| 54 | /// A reference to a string in the string table. | |||
| 55 | struct Str { | |||
| 56 | Word Offset, Size; | |||
| 57 | ||||
| 58 | StringRef get(StringRef Strtab) const { | |||
| 59 | return {Strtab.data() + Offset, Size}; | |||
| 60 | } | |||
| 61 | }; | |||
| 62 | ||||
| 63 | /// A reference to a range of objects in the symbol table. | |||
| 64 | template <typename T> struct Range { | |||
| 65 | Word Offset, Size; | |||
| 66 | ||||
| 67 | ArrayRef<T> get(StringRef Symtab) const { | |||
| 68 | return {reinterpret_cast<const T *>(Symtab.data() + Offset), Size}; | |||
| 69 | } | |||
| 70 | }; | |||
| 71 | ||||
| 72 | /// Describes the range of a particular module's symbols within the symbol | |||
| 73 | /// table. | |||
| 74 | struct Module { | |||
| 75 | Word Begin, End; | |||
| 76 | ||||
| 77 | /// The index of the first Uncommon for this Module. | |||
| 78 | Word UncBegin; | |||
| 79 | }; | |||
| 80 | ||||
| 81 | /// This is equivalent to an IR comdat. | |||
| 82 | struct Comdat { | |||
| 83 | Str Name; | |||
| 84 | ||||
| 85 | // llvm::Comdat::SelectionKind | |||
| 86 | Word SelectionKind; | |||
| 87 | }; | |||
| 88 | ||||
| 89 | /// Contains the information needed by linkers for symbol resolution, as well as | |||
| 90 | /// by the LTO implementation itself. | |||
| 91 | struct Symbol { | |||
| 92 | /// The mangled symbol name. | |||
| 93 | Str Name; | |||
| 94 | ||||
| 95 | /// The unmangled symbol name, or the empty string if this is not an IR | |||
| 96 | /// symbol. | |||
| 97 | Str IRName; | |||
| 98 | ||||
| 99 | /// The index into Header::Comdats, or -1 if not a comdat member. | |||
| 100 | Word ComdatIndex; | |||
| 101 | ||||
| 102 | Word Flags; | |||
| 103 | enum FlagBits { | |||
| 104 | FB_visibility, // 2 bits | |||
| 105 | FB_has_uncommon = FB_visibility + 2, | |||
| 106 | FB_undefined, | |||
| 107 | FB_weak, | |||
| 108 | FB_common, | |||
| 109 | FB_indirect, | |||
| 110 | FB_used, | |||
| 111 | FB_tls, | |||
| 112 | FB_may_omit, | |||
| 113 | FB_global, | |||
| 114 | FB_format_specific, | |||
| 115 | FB_unnamed_addr, | |||
| 116 | FB_executable, | |||
| 117 | }; | |||
| 118 | }; | |||
| 119 | ||||
| 120 | /// This data structure contains rarely used symbol fields and is optionally | |||
| 121 | /// referenced by a Symbol. | |||
| 122 | struct Uncommon { | |||
| 123 | Word CommonSize, CommonAlign; | |||
| 124 | ||||
| 125 | /// COFF-specific: the name of the symbol that a weak external resolves to | |||
| 126 | /// if not defined. | |||
| 127 | Str COFFWeakExternFallbackName; | |||
| 128 | ||||
| 129 | /// Specified section name, if any. | |||
| 130 | Str SectionName; | |||
| 131 | }; | |||
| 132 | ||||
| 133 | ||||
| 134 | struct Header { | |||
| 135 | /// Version number of the symtab format. This number should be incremented | |||
| 136 | /// when the format changes, but it does not need to be incremented if a | |||
| 137 | /// change to LLVM would cause it to create a different symbol table. | |||
| 138 | Word Version; | |||
| 139 | enum { kCurrentVersion = 3 }; | |||
| 140 | ||||
| 141 | /// The producer's version string (LLVM_VERSION_STRING " " LLVM_REVISION). | |||
| 142 | /// Consumers should rebuild the symbol table from IR if the producer's | |||
| 143 | /// version does not match the consumer's version due to potential differences | |||
| 144 | /// in symbol table format, symbol enumeration order and so on. | |||
| 145 | Str Producer; | |||
| 146 | ||||
| 147 | Range<Module> Modules; | |||
| 148 | Range<Comdat> Comdats; | |||
| 149 | Range<Symbol> Symbols; | |||
| 150 | Range<Uncommon> Uncommons; | |||
| 151 | ||||
| 152 | Str TargetTriple, SourceFileName; | |||
| 153 | ||||
| 154 | /// COFF-specific: linker directives. | |||
| 155 | Str COFFLinkerOpts; | |||
| 156 | ||||
| 157 | /// Dependent Library Specifiers | |||
| 158 | Range<Str> DependentLibraries; | |||
| 159 | }; | |||
| 160 | ||||
| 161 | } // end namespace storage | |||
| 162 | ||||
| 163 | /// Fills in Symtab and StrtabBuilder with a valid symbol and string table for | |||
| 164 | /// Mods. | |||
| 165 | Error build(ArrayRef<Module *> Mods, SmallVector<char, 0> &Symtab, | |||
| 166 | StringTableBuilder &StrtabBuilder, BumpPtrAllocator &Alloc); | |||
| 167 | ||||
| 168 | /// This represents a symbol that has been read from a storage::Symbol and | |||
| 169 | /// possibly a storage::Uncommon. | |||
| 170 | struct Symbol { | |||
| 171 | // Copied from storage::Symbol. | |||
| 172 | StringRef Name, IRName; | |||
| 173 | int ComdatIndex; | |||
| 174 | uint32_t Flags; | |||
| 175 | ||||
| 176 | // Copied from storage::Uncommon. | |||
| 177 | uint32_t CommonSize, CommonAlign; | |||
| 178 | StringRef COFFWeakExternFallbackName; | |||
| 179 | StringRef SectionName; | |||
| 180 | ||||
| 181 | /// Returns the mangled symbol name. | |||
| 182 | StringRef getName() const { return Name; } | |||
| 183 | ||||
| 184 | /// Returns the unmangled symbol name, or the empty string if this is not an | |||
| 185 | /// IR symbol. | |||
| 186 | StringRef getIRName() const { return IRName; } | |||
| 187 | ||||
| 188 | /// Returns the index into the comdat table (see Reader::getComdatTable()), or | |||
| 189 | /// -1 if not a comdat member. | |||
| 190 | int getComdatIndex() const { return ComdatIndex; } | |||
| 191 | ||||
| 192 | using S = storage::Symbol; | |||
| 193 | ||||
| 194 | GlobalValue::VisibilityTypes getVisibility() const { | |||
| 195 | return GlobalValue::VisibilityTypes((Flags >> S::FB_visibility) & 3); | |||
| 196 | } | |||
| 197 | ||||
| 198 | bool isUndefined() const { return (Flags >> S::FB_undefined) & 1; } | |||
| 199 | bool isWeak() const { return (Flags >> S::FB_weak) & 1; } | |||
| 200 | bool isCommon() const { return (Flags >> S::FB_common) & 1; } | |||
| 201 | bool isIndirect() const { return (Flags >> S::FB_indirect) & 1; } | |||
| 202 | bool isUsed() const { return (Flags >> S::FB_used) & 1; } | |||
| 203 | bool isTLS() const { return (Flags >> S::FB_tls) & 1; } | |||
| 204 | ||||
| 205 | bool canBeOmittedFromSymbolTable() const { | |||
| 206 | return (Flags >> S::FB_may_omit) & 1; | |||
| 207 | } | |||
| 208 | ||||
| 209 | bool isGlobal() const { return (Flags >> S::FB_global) & 1; } | |||
| 210 | bool isFormatSpecific() const { return (Flags >> S::FB_format_specific) & 1; } | |||
| 211 | bool isUnnamedAddr() const { return (Flags >> S::FB_unnamed_addr) & 1; } | |||
| 212 | bool isExecutable() const { return (Flags >> S::FB_executable) & 1; } | |||
| 213 | ||||
| 214 | uint64_t getCommonSize() const { | |||
| 215 | assert(isCommon())((void)0); | |||
| 216 | return CommonSize; | |||
| 217 | } | |||
| 218 | ||||
| 219 | uint32_t getCommonAlignment() const { | |||
| 220 | assert(isCommon())((void)0); | |||
| 221 | return CommonAlign; | |||
| 222 | } | |||
| 223 | ||||
| 224 | /// COFF-specific: for weak externals, returns the name of the symbol that is | |||
| 225 | /// used as a fallback if the weak external remains undefined. | |||
| 226 | StringRef getCOFFWeakExternalFallback() const { | |||
| 227 | assert(isWeak() && isIndirect())((void)0); | |||
| 228 | return COFFWeakExternFallbackName; | |||
| 229 | } | |||
| 230 | ||||
| 231 | StringRef getSectionName() const { return SectionName; } | |||
| 232 | }; | |||
| 233 | ||||
| 234 | /// This class can be used to read a Symtab and Strtab produced by | |||
| 235 | /// irsymtab::build. | |||
| 236 | class Reader { | |||
| 237 | StringRef Symtab, Strtab; | |||
| 238 | ||||
| 239 | ArrayRef<storage::Module> Modules; | |||
| 240 | ArrayRef<storage::Comdat> Comdats; | |||
| 241 | ArrayRef<storage::Symbol> Symbols; | |||
| 242 | ArrayRef<storage::Uncommon> Uncommons; | |||
| 243 | ArrayRef<storage::Str> DependentLibraries; | |||
| 244 | ||||
| 245 | StringRef str(storage::Str S) const { return S.get(Strtab); } | |||
| 246 | ||||
| 247 | template <typename T> ArrayRef<T> range(storage::Range<T> R) const { | |||
| 248 | return R.get(Symtab); | |||
| 249 | } | |||
| 250 | ||||
| 251 | const storage::Header &header() const { | |||
| 252 | return *reinterpret_cast<const storage::Header *>(Symtab.data()); | |||
| 253 | } | |||
| 254 | ||||
| 255 | public: | |||
| 256 | class SymbolRef; | |||
| 257 | ||||
| 258 | Reader() = default; | |||
| 259 | Reader(StringRef Symtab, StringRef Strtab) : Symtab(Symtab), Strtab(Strtab) { | |||
| 260 | Modules = range(header().Modules); | |||
| 261 | Comdats = range(header().Comdats); | |||
| 262 | Symbols = range(header().Symbols); | |||
| 263 | Uncommons = range(header().Uncommons); | |||
| 264 | DependentLibraries = range(header().DependentLibraries); | |||
| 265 | } | |||
| 266 | ||||
| 267 | using symbol_range = iterator_range<object::content_iterator<SymbolRef>>; | |||
| 268 | ||||
| 269 | /// Returns the symbol table for the entire bitcode file. | |||
| 270 | /// The symbols enumerated by this method are ephemeral, but they can be | |||
| 271 | /// copied into an irsymtab::Symbol object. | |||
| 272 | symbol_range symbols() const; | |||
| 273 | ||||
| 274 | size_t getNumModules() const { return Modules.size(); } | |||
| 275 | ||||
| 276 | /// Returns a slice of the symbol table for the I'th module in the file. | |||
| 277 | /// The symbols enumerated by this method are ephemeral, but they can be | |||
| 278 | /// copied into an irsymtab::Symbol object. | |||
| 279 | symbol_range module_symbols(unsigned I) const; | |||
| 280 | ||||
| 281 | StringRef getTargetTriple() const { return str(header().TargetTriple); } | |||
| 282 | ||||
| 283 | /// Returns the source file path specified at compile time. | |||
| 284 | StringRef getSourceFileName() const { return str(header().SourceFileName); } | |||
| 285 | ||||
| 286 | /// Returns a table with all the comdats used by this file. | |||
| 287 | std::vector<std::pair<StringRef, llvm::Comdat::SelectionKind>> | |||
| 288 | getComdatTable() const { | |||
| 289 | std::vector<std::pair<StringRef, llvm::Comdat::SelectionKind>> ComdatTable; | |||
| 290 | ComdatTable.reserve(Comdats.size()); | |||
| 291 | for (auto C : Comdats) | |||
| 292 | ComdatTable.push_back({str(C.Name), llvm::Comdat::SelectionKind( | |||
| 293 | uint32_t(C.SelectionKind))}); | |||
| 294 | return ComdatTable; | |||
| 295 | } | |||
| 296 | ||||
| 297 | /// COFF-specific: returns linker options specified in the input file. | |||
| 298 | StringRef getCOFFLinkerOpts() const { return str(header().COFFLinkerOpts); } | |||
| 299 | ||||
| 300 | /// Returns dependent library specifiers | |||
| 301 | std::vector<StringRef> getDependentLibraries() const { | |||
| 302 | std::vector<StringRef> Specifiers; | |||
| 303 | Specifiers.reserve(DependentLibraries.size()); | |||
| 304 | for (auto S : DependentLibraries) { | |||
| 305 | Specifiers.push_back(str(S)); | |||
| 306 | } | |||
| 307 | return Specifiers; | |||
| 308 | } | |||
| 309 | }; | |||
| 310 | ||||
| 311 | /// Ephemeral symbols produced by Reader::symbols() and | |||
| 312 | /// Reader::module_symbols(). | |||
| 313 | class Reader::SymbolRef : public Symbol { | |||
| 314 | const storage::Symbol *SymI, *SymE; | |||
| 315 | const storage::Uncommon *UncI; | |||
| 316 | const Reader *R; | |||
| 317 | ||||
| 318 | void read() { | |||
| 319 | if (SymI == SymE) | |||
| 320 | return; | |||
| 321 | ||||
| 322 | Name = R->str(SymI->Name); | |||
| 323 | IRName = R->str(SymI->IRName); | |||
| 324 | ComdatIndex = SymI->ComdatIndex; | |||
| 325 | Flags = SymI->Flags; | |||
| 326 | ||||
| 327 | if (Flags & (1 << storage::Symbol::FB_has_uncommon)) { | |||
| 328 | CommonSize = UncI->CommonSize; | |||
| ||||
| 329 | CommonAlign = UncI->CommonAlign; | |||
| 330 | COFFWeakExternFallbackName = R->str(UncI->COFFWeakExternFallbackName); | |||
| 331 | SectionName = R->str(UncI->SectionName); | |||
| 332 | } else | |||
| 333 | // Reset this field so it can be queried unconditionally for all symbols. | |||
| 334 | SectionName = ""; | |||
| 335 | } | |||
| 336 | ||||
| 337 | public: | |||
| 338 | SymbolRef(const storage::Symbol *SymI, const storage::Symbol *SymE, | |||
| 339 | const storage::Uncommon *UncI, const Reader *R) | |||
| 340 | : SymI(SymI), SymE(SymE), UncI(UncI), R(R) { | |||
| 341 | read(); | |||
| 342 | } | |||
| 343 | ||||
| 344 | void moveNext() { | |||
| 345 | ++SymI; | |||
| 346 | if (Flags & (1 << storage::Symbol::FB_has_uncommon)) | |||
| 347 | ++UncI; | |||
| 348 | read(); | |||
| 349 | } | |||
| 350 | ||||
| 351 | bool operator==(const SymbolRef &Other) const { return SymI == Other.SymI; } | |||
| 352 | }; | |||
| 353 | ||||
| 354 | inline Reader::symbol_range Reader::symbols() const { | |||
| 355 | return {SymbolRef(Symbols.begin(), Symbols.end(), Uncommons.begin(), this), | |||
| 356 | SymbolRef(Symbols.end(), Symbols.end(), nullptr, this)}; | |||
| 357 | } | |||
| 358 | ||||
| 359 | inline Reader::symbol_range Reader::module_symbols(unsigned I) const { | |||
| 360 | const storage::Module &M = Modules[I]; | |||
| 361 | const storage::Symbol *MBegin = Symbols.begin() + M.Begin, | |||
| 362 | *MEnd = Symbols.begin() + M.End; | |||
| 363 | return {SymbolRef(MBegin, MEnd, Uncommons.begin() + M.UncBegin, this), | |||
| 364 | SymbolRef(MEnd, MEnd, nullptr, this)}; | |||
| 365 | } | |||
| 366 | ||||
| 367 | /// The contents of the irsymtab in a bitcode file. Any underlying data for the | |||
| 368 | /// irsymtab are owned by Symtab and Strtab. | |||
| 369 | struct FileContents { | |||
| 370 | SmallVector<char, 0> Symtab, Strtab; | |||
| 371 | Reader TheReader; | |||
| 372 | }; | |||
| 373 | ||||
| 374 | /// Reads the contents of a bitcode file, creating its irsymtab if necessary. | |||
| 375 | Expected<FileContents> readBitcode(const BitcodeFileContents &BFC); | |||
| 376 | ||||
| 377 | } // end namespace irsymtab | |||
| 378 | } // end namespace llvm | |||
| 379 | ||||
| 380 | #endif // LLVM_OBJECT_IRSYMTAB_H |