Skip to content

Commit ee5b9cd

Browse files
midhuncodes7Midhunesh
andauthored
[llvm-symbolizer] Recognize and symbolize archive members (#150401)
This PR adds support for selecting specific archive members in llvm-symbolizer using the `archive.a(member.o)` syntax, with architecture-aware member selection. **Key features:** 1. **Archive member selection syntax**: Specify archive members using `archive.a(member.o)` format 2. **Architecture selection via `--default-arch` flag**: Select the appropriate member when multiple members have the same name but different architectures 3. **Architecture selection via `:arch` suffix**: Alternative syntax `archive.a(member.o):arch` for specifying architecture This functionality is primarily designed for AIX big archives, which can contain multiple members with the same name but different architectures (32-bit and 64-bit). However, the implementation works with all archive formats (GNU, BSD, Darwin, big archive) and handles same-named members created with llvm-ar q. --------- Co-authored-by: Midhunesh <midhuensh.p@ibm.com>
1 parent 6e01ea4 commit ee5b9cd

File tree

6 files changed

+460
-55
lines changed

6 files changed

+460
-55
lines changed

llvm/docs/CommandGuide/llvm-symbolizer.rst

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -535,16 +535,20 @@ MACH-O SPECIFIC OPTIONS
535535
.. option:: --default-arch <arch>
536536

537537
If a binary contains object files for multiple architectures (e.g. it is a
538-
Mach-O universal binary), symbolize the object file for a given architecture.
539-
You can also specify the architecture by writing ``binary_name:arch_name`` in
540-
the input (see example below). If the architecture is not specified in either
541-
way, the address will not be symbolized. Defaults to empty string.
538+
Mach-O universal binary or an archive with architecture variants),
539+
symbolize the object file for a given architecture. You can also specify
540+
the architecture by writing ``binary_name:arch_name`` in the input (see
541+
example below). For archives, the format ``archive.a(member.o):arch``
542+
is also supported. If the architecture is not specified,
543+
the address will not be symbolized. Defaults to empty string.
542544

543545
.. code-block:: console
544546
545547
$ cat addr.txt
546548
/tmp/mach_universal_binary:i386 0x1f84
547549
/tmp/mach_universal_binary:x86_64 0x100000f24
550+
/tmp/archive.a(member.o):ppc 0x1000
551+
/tmp/archive.a(member.o):ppc64 0x2000
548552
549553
$ llvm-symbolizer < addr.txt
550554
_main
@@ -553,6 +557,12 @@ MACH-O SPECIFIC OPTIONS
553557
_main
554558
/tmp/source_x86_64.cc:8
555559
560+
_foo
561+
/tmp/source_ppc.cc:12
562+
563+
_foo
564+
/tmp/source_ppc64.cc:12
565+
556566
.. option:: --dsym-hint <path/to/file.dSYM>
557567

558568
If the debug info for a binary isn't present in the default location, look for

llvm/include/llvm/DebugInfo/Symbolize/Symbolize.h

Lines changed: 42 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
#ifndef LLVM_DEBUGINFO_SYMBOLIZE_SYMBOLIZE_H
1414
#define LLVM_DEBUGINFO_SYMBOLIZE_SYMBOLIZE_H
1515

16+
#include "llvm/ADT/DenseMap.h"
1617
#include "llvm/ADT/StringMap.h"
1718
#include "llvm/ADT/ilist_node.h"
1819
#include "llvm/ADT/simple_ilist.h"
@@ -25,6 +26,7 @@
2526
#include <cstdint>
2627
#include <map>
2728
#include <memory>
29+
#include <optional>
2830
#include <string>
2931
#include <utility>
3032
#include <vector>
@@ -196,11 +198,18 @@ class LLVMSymbolizer {
196198
Expected<ObjectPair> getOrCreateObjectPair(const std::string &Path,
197199
const std::string &ArchName);
198200

199-
/// Return a pointer to object file at specified path, for a specified
200-
/// architecture (e.g. if path refers to a Mach-O universal binary, only one
201-
/// object file from it will be returned).
202-
Expected<ObjectFile *> getOrCreateObject(const std::string &Path,
203-
const std::string &ArchName);
201+
/// Return a pointer to the object file with the specified name, for a
202+
/// specified architecture (e.g. if path refers to a Mach-O universal
203+
/// binary, only one object file from it will be returned).
204+
Expected<ObjectFile *> getOrCreateObject(const std::string &InputPath,
205+
const std::string &DefaultArchName);
206+
207+
/// Return a pointer to the object file with the specified name, for a
208+
/// specified architecture that is present inside an archive file.
209+
Expected<ObjectFile *> getOrCreateObjectFromArchive(StringRef ArchivePath,
210+
StringRef MemberName,
211+
StringRef ArchName,
212+
StringRef FullPath);
204213

205214
/// Update the LRU cache order when a binary is accessed.
206215
void recordAccess(CachedBinary &Bin);
@@ -216,15 +225,39 @@ class LLVMSymbolizer {
216225
/// Contains parsed binary for each path, or parsing error.
217226
std::map<std::string, CachedBinary, std::less<>> BinaryForPath;
218227

228+
/// Store the archive path for the object file.
229+
DenseMap<const object::ObjectFile *, std::string> ObjectToArchivePath;
230+
219231
/// A list of cached binaries in LRU order.
220232
simple_ilist<CachedBinary> LRUBinaries;
221233
/// Sum of the sizes of the cached binaries.
222234
size_t CacheSize = 0;
223235

224-
/// Parsed object file for path/architecture pair, where "path" refers
225-
/// to Mach-O universal binary.
226-
std::map<std::pair<std::string, std::string>, std::unique_ptr<ObjectFile>>
227-
ObjectForUBPathAndArch;
236+
struct ContainerCacheKey {
237+
std::string Path;
238+
std::string MemberName;
239+
std::string ArchName;
240+
241+
// Required for map comparison.
242+
bool operator<(const ContainerCacheKey &Other) const {
243+
return std::tie(Path, MemberName, ArchName) <
244+
std::tie(Other.Path, Other.MemberName, Other.ArchName);
245+
}
246+
};
247+
248+
/// Parsed object file for each path/member/architecture triple.
249+
/// Used to cache objects extracted from containers (e.g., Mach-O
250+
/// universal binaries, archives).
251+
std::map<ContainerCacheKey, std::unique_ptr<ObjectFile>> ObjectFileCache;
252+
253+
Expected<object::Binary *>
254+
loadOrGetBinary(const std::string &ArchivePathKey,
255+
std::optional<StringRef> FullPathKey = std::nullopt);
256+
257+
Expected<ObjectFile *> findOrCacheObject(
258+
const ContainerCacheKey &Key,
259+
llvm::function_ref<Expected<std::unique_ptr<ObjectFile>>()> Loader,
260+
const std::string &PathForBinaryCache);
228261

229262
Options Opts;
230263

llvm/lib/DebugInfo/Symbolize/Symbolize.cpp

Lines changed: 148 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
#include "llvm/DebugInfo/PDB/PDBContext.h"
2222
#include "llvm/DebugInfo/Symbolize/SymbolizableObjectFile.h"
2323
#include "llvm/Demangle/Demangle.h"
24+
#include "llvm/Object/Archive.h"
2425
#include "llvm/Object/BuildID.h"
2526
#include "llvm/Object/COFF.h"
2627
#include "llvm/Object/ELFObjectFile.h"
@@ -285,7 +286,7 @@ LLVMSymbolizer::findSymbol(ArrayRef<uint8_t> BuildID, StringRef Symbol,
285286
}
286287

287288
void LLVMSymbolizer::flush() {
288-
ObjectForUBPathAndArch.clear();
289+
ObjectFileCache.clear();
289290
LRUBinaries.clear();
290291
CacheSize = 0;
291292
BinaryForPath.clear();
@@ -557,57 +558,164 @@ LLVMSymbolizer::getOrCreateObjectPair(const std::string &Path,
557558
if (!DbgObj)
558559
DbgObj = Obj;
559560
ObjectPair Res = std::make_pair(Obj, DbgObj);
560-
std::string DbgObjPath = DbgObj->getFileName().str();
561561
auto Pair =
562562
ObjectPairForPathArch.emplace(std::make_pair(Path, ArchName), Res);
563-
BinaryForPath.find(DbgObjPath)->second.pushEvictor([this, I = Pair.first]() {
564-
ObjectPairForPathArch.erase(I);
565-
});
563+
std::string FullDbgObjKey;
564+
auto It = ObjectToArchivePath.find(DbgObj);
565+
if (It != ObjectToArchivePath.end()) {
566+
StringRef ArchivePath = It->second;
567+
StringRef MemberName = sys::path::filename(DbgObj->getFileName());
568+
FullDbgObjKey = (ArchivePath + "(" + MemberName + ")").str();
569+
} else {
570+
FullDbgObjKey = DbgObj->getFileName().str();
571+
}
572+
BinaryForPath.find(FullDbgObjKey)
573+
->second.pushEvictor(
574+
[this, I = Pair.first]() { ObjectPairForPathArch.erase(I); });
566575
return Res;
567576
}
568577

569-
Expected<ObjectFile *>
570-
LLVMSymbolizer::getOrCreateObject(const std::string &Path,
571-
const std::string &ArchName) {
572-
Binary *Bin;
573-
auto Pair = BinaryForPath.emplace(Path, OwningBinary<Binary>());
578+
Expected<object::Binary *>
579+
LLVMSymbolizer::loadOrGetBinary(const std::string &ArchivePathKey,
580+
std::optional<StringRef> FullPathKey) {
581+
// If no separate cache key is provided, use the archive path itself.
582+
std::string FullPathKeyStr =
583+
FullPathKey ? FullPathKey->str() : ArchivePathKey;
584+
auto Pair = BinaryForPath.emplace(FullPathKeyStr, OwningBinary<Binary>());
574585
if (!Pair.second) {
575-
Bin = Pair.first->second->getBinary();
576586
recordAccess(Pair.first->second);
577-
} else {
578-
Expected<OwningBinary<Binary>> BinOrErr = createBinary(Path);
579-
if (!BinOrErr)
580-
return BinOrErr.takeError();
587+
return Pair.first->second->getBinary();
588+
}
589+
590+
Expected<OwningBinary<Binary>> BinOrErr = createBinary(ArchivePathKey);
591+
if (!BinOrErr)
592+
return BinOrErr.takeError();
581593

582-
CachedBinary &CachedBin = Pair.first->second;
583-
CachedBin = std::move(BinOrErr.get());
584-
CachedBin.pushEvictor([this, I = Pair.first]() { BinaryForPath.erase(I); });
585-
LRUBinaries.push_back(CachedBin);
586-
CacheSize += CachedBin.size();
587-
Bin = CachedBin->getBinary();
594+
CachedBinary &CachedBin = Pair.first->second;
595+
CachedBin = std::move(*BinOrErr);
596+
CachedBin.pushEvictor([this, I = Pair.first]() { BinaryForPath.erase(I); });
597+
LRUBinaries.push_back(CachedBin);
598+
CacheSize += CachedBin.size();
599+
return CachedBin->getBinary();
600+
}
601+
602+
Expected<ObjectFile *> LLVMSymbolizer::findOrCacheObject(
603+
const ContainerCacheKey &Key,
604+
llvm::function_ref<Expected<std::unique_ptr<ObjectFile>>()> Loader,
605+
const std::string &PathForBinaryCache) {
606+
auto It = ObjectFileCache.find(Key);
607+
if (It != ObjectFileCache.end())
608+
return It->second.get();
609+
610+
Expected<std::unique_ptr<ObjectFile>> ObjOrErr = Loader();
611+
if (!ObjOrErr) {
612+
ObjectFileCache.emplace(Key, std::unique_ptr<ObjectFile>());
613+
return ObjOrErr.takeError();
588614
}
589615

590-
if (!Bin)
591-
return static_cast<ObjectFile *>(nullptr);
616+
ObjectFile *Res = ObjOrErr->get();
617+
auto NewEntry = ObjectFileCache.emplace(Key, std::move(*ObjOrErr));
618+
auto CacheIter = BinaryForPath.find(PathForBinaryCache);
619+
if (CacheIter != BinaryForPath.end())
620+
CacheIter->second.pushEvictor(
621+
[this, Iter = NewEntry.first]() { ObjectFileCache.erase(Iter); });
622+
return Res;
623+
}
592624

593-
if (MachOUniversalBinary *UB = dyn_cast_or_null<MachOUniversalBinary>(Bin)) {
594-
auto I = ObjectForUBPathAndArch.find(std::make_pair(Path, ArchName));
595-
if (I != ObjectForUBPathAndArch.end())
596-
return I->second.get();
597-
598-
Expected<std::unique_ptr<ObjectFile>> ObjOrErr =
599-
UB->getMachOObjectForArch(ArchName);
600-
if (!ObjOrErr) {
601-
ObjectForUBPathAndArch.emplace(std::make_pair(Path, ArchName),
602-
std::unique_ptr<ObjectFile>());
603-
return ObjOrErr.takeError();
625+
Expected<ObjectFile *> LLVMSymbolizer::getOrCreateObjectFromArchive(
626+
StringRef ArchivePath, StringRef MemberName, StringRef ArchName,
627+
StringRef FullPath) {
628+
Expected<object::Binary *> BinOrErr =
629+
loadOrGetBinary(ArchivePath.str(), FullPath);
630+
if (!BinOrErr)
631+
return BinOrErr.takeError();
632+
object::Binary *Bin = *BinOrErr;
633+
634+
object::Archive *Archive = dyn_cast_if_present<object::Archive>(Bin);
635+
if (!Archive)
636+
return createStringError(std::errc::invalid_argument,
637+
"'%s' is not a valid archive",
638+
ArchivePath.str().c_str());
639+
640+
Error Err = Error::success();
641+
for (auto &Child : Archive->children(Err, /*SkipInternal=*/true)) {
642+
Expected<StringRef> NameOrErr = Child.getName();
643+
if (!NameOrErr) {
644+
// TODO: Report this as a warning to the client. Consider adding a
645+
// callback mechanism to report warning-level issues.
646+
consumeError(NameOrErr.takeError());
647+
continue;
648+
}
649+
if (*NameOrErr == MemberName) {
650+
Expected<std::unique_ptr<object::Binary>> MemberOrErr =
651+
Child.getAsBinary();
652+
if (!MemberOrErr) {
653+
// TODO: Report this as a warning to the client. Consider adding a
654+
// callback mechanism to report warning-level issues.
655+
consumeError(MemberOrErr.takeError());
656+
continue;
657+
}
658+
659+
std::unique_ptr<object::Binary> Binary = std::move(*MemberOrErr);
660+
if (auto *Obj = dyn_cast<object::ObjectFile>(Binary.get())) {
661+
ObjectToArchivePath[Obj] = ArchivePath.str();
662+
Triple::ArchType ObjArch = Obj->makeTriple().getArch();
663+
Triple RequestedTriple;
664+
RequestedTriple.setArch(Triple::getArchTypeForLLVMName(ArchName));
665+
if (ObjArch != RequestedTriple.getArch())
666+
continue;
667+
668+
ContainerCacheKey CacheKey{ArchivePath.str(), MemberName.str(),
669+
ArchName.str()};
670+
Expected<ObjectFile *> Res = findOrCacheObject(
671+
CacheKey,
672+
[O = std::unique_ptr<ObjectFile>(
673+
Obj)]() mutable -> Expected<std::unique_ptr<ObjectFile>> {
674+
return std::move(O);
675+
},
676+
ArchivePath.str());
677+
Binary.release();
678+
return Res;
679+
}
680+
}
681+
}
682+
if (Err)
683+
return std::move(Err);
684+
return createStringError(std::errc::invalid_argument,
685+
"no matching member '%s' with arch '%s' in '%s'",
686+
MemberName.str().c_str(), ArchName.str().c_str(),
687+
ArchivePath.str().c_str());
688+
}
689+
690+
Expected<ObjectFile *>
691+
LLVMSymbolizer::getOrCreateObject(const std::string &Path,
692+
const std::string &ArchName) {
693+
// First check for archive(member) format - more efficient to check closing
694+
// paren first.
695+
if (!Path.empty() && Path.back() == ')') {
696+
size_t OpenParen = Path.rfind('(', Path.size() - 1);
697+
if (OpenParen != std::string::npos) {
698+
StringRef ArchivePath = StringRef(Path).substr(0, OpenParen);
699+
StringRef MemberName =
700+
StringRef(Path).substr(OpenParen + 1, Path.size() - OpenParen - 2);
701+
return getOrCreateObjectFromArchive(ArchivePath, MemberName, ArchName,
702+
Path);
604703
}
605-
ObjectFile *Res = ObjOrErr->get();
606-
auto Pair = ObjectForUBPathAndArch.emplace(std::make_pair(Path, ArchName),
607-
std::move(ObjOrErr.get()));
608-
BinaryForPath.find(Path)->second.pushEvictor(
609-
[this, Iter = Pair.first]() { ObjectForUBPathAndArch.erase(Iter); });
610-
return Res;
704+
}
705+
706+
Expected<object::Binary *> BinOrErr = loadOrGetBinary(Path);
707+
if (!BinOrErr)
708+
return BinOrErr.takeError();
709+
object::Binary *Bin = *BinOrErr;
710+
711+
if (MachOUniversalBinary *UB = dyn_cast_or_null<MachOUniversalBinary>(Bin)) {
712+
ContainerCacheKey CacheKey{Path, "", ArchName};
713+
return findOrCacheObject(
714+
CacheKey,
715+
[UB, ArchName]() -> Expected<std::unique_ptr<ObjectFile>> {
716+
return UB->getMachOObjectForArch(ArchName);
717+
},
718+
Path);
611719
}
612720
if (Bin->isObject()) {
613721
return cast<ObjectFile>(Bin);

0 commit comments

Comments
 (0)