Skip to content

Commit 8f91f38

Browse files
committed
[LLD] Search archives for symbol defs to override COMMON symbols.
This patch changes the archive handling to enable the semantics needed for legacy FORTRAN common blocks and block data. When we have a COMMON definition of a symbol and are including an archive, LLD will now search the members for global/weak defintions to override the COMMON symbol. The previous LLD behavior (where a member would only be included if it satisifed some other needed symbol definition) can be re-enabled with the option '-no-fortran-common'. Differential Revision: https://reviews.llvm.org/D86142
1 parent 733e2ae commit 8f91f38

File tree

9 files changed

+326
-0
lines changed

9 files changed

+326
-0
lines changed

lld/ELF/Config.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -163,6 +163,7 @@ struct Configuration {
163163
bool fixCortexA53Errata843419;
164164
bool fixCortexA8;
165165
bool formatBinary = false;
166+
bool fortranCommon;
166167
bool gcSections;
167168
bool gdbIndex;
168169
bool gnuHash = false;

lld/ELF/Driver.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -974,6 +974,8 @@ static void readConfigs(opt::InputArgList &args) {
974974
!args.hasArg(OPT_relocatable);
975975
config->fixCortexA8 =
976976
args.hasArg(OPT_fix_cortex_a8) && !args.hasArg(OPT_relocatable);
977+
config->fortranCommon =
978+
args.hasFlag(OPT_fortran_common, OPT_no_fortran_common, true);
977979
config->gcSections = args.hasFlag(OPT_gc_sections, OPT_no_gc_sections, false);
978980
config->gnuUnique = args.hasFlag(OPT_gnu_unique, OPT_no_gnu_unique, true);
979981
config->gdbIndex = args.hasFlag(OPT_gdb_index, OPT_no_gdb_index, false);

lld/ELF/InputFiles.cpp

Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1237,6 +1237,88 @@ void ArchiveFile::fetch(const Archive::Symbol &sym) {
12371237
parseFile(file);
12381238
}
12391239

1240+
// The handling of tentative definitions (COMMON symbols) in archives is murky.
1241+
// A tentative defintion will be promoted to a global definition if there are no
1242+
// non-tentative definitions to dominate it. When we hold a tentative definition
1243+
// to a symbol and are inspecting archive memebers for inclusion there are 2
1244+
// ways we can proceed:
1245+
//
1246+
// 1) Consider the tentative definition a 'real' definition (ie promotion from
1247+
// tentative to real definition has already happened) and not inspect
1248+
// archive members for Global/Weak definitions to replace the tentative
1249+
// definition. An archive member would only be included if it satisfies some
1250+
// other undefined symbol. This is the behavior Gold uses.
1251+
//
1252+
// 2) Consider the tentative definition as still undefined (ie the promotion to
1253+
// a real definiton happens only after all symbol resolution is done).
1254+
// The linker searches archive memebers for global or weak definitions to
1255+
// replace the tentative definition with. This is the behavior used by
1256+
// GNU ld.
1257+
//
1258+
// The second behavior is inherited from SysVR4, which based it on the FORTRAN
1259+
// COMMON BLOCK model. This behavior is needed for proper initalizations in old
1260+
// (pre F90) FORTRAN code that is packaged into an archive.
1261+
//
1262+
// The following functions search archive members for defintions to replace
1263+
// tentative defintions (implementing behavior 2).
1264+
static bool isBitcodeNonCommonDef(MemoryBufferRef mb, StringRef symName,
1265+
StringRef archiveName) {
1266+
IRSymtabFile symtabFile = check(readIRSymtab(mb));
1267+
for (const irsymtab::Reader::SymbolRef &sym :
1268+
symtabFile.TheReader.symbols()) {
1269+
if (sym.isGlobal() && sym.getName() == symName)
1270+
return !sym.isUndefined() && !sym.isCommon();
1271+
}
1272+
return false;
1273+
}
1274+
1275+
template <class ELFT>
1276+
static bool isNonCommonDef(MemoryBufferRef mb, StringRef symName,
1277+
StringRef archiveName) {
1278+
ObjFile<ELFT> *obj = make<ObjFile<ELFT>>(mb, archiveName);
1279+
StringRef stringtable = obj->getStringTable();
1280+
1281+
for (auto sym : obj->template getGlobalELFSyms<ELFT>()) {
1282+
Expected<StringRef> name = sym.getName(stringtable);
1283+
if (name && name.get() == symName)
1284+
return sym.isDefined() && !sym.isCommon();
1285+
}
1286+
return false;
1287+
}
1288+
1289+
static bool isNonCommonDef(MemoryBufferRef mb, StringRef symName,
1290+
StringRef archiveName) {
1291+
switch (getELFKind(mb, archiveName)) {
1292+
case ELF32LEKind:
1293+
return isNonCommonDef<ELF32LE>(mb, symName, archiveName);
1294+
case ELF32BEKind:
1295+
return isNonCommonDef<ELF32BE>(mb, symName, archiveName);
1296+
case ELF64LEKind:
1297+
return isNonCommonDef<ELF64LE>(mb, symName, archiveName);
1298+
case ELF64BEKind:
1299+
return isNonCommonDef<ELF64BE>(mb, symName, archiveName);
1300+
default:
1301+
llvm_unreachable("getELFKind");
1302+
}
1303+
}
1304+
1305+
bool ArchiveFile::shouldFetchForCommon(const Archive::Symbol &sym) {
1306+
Archive::Child c =
1307+
CHECK(sym.getMember(), toString(this) +
1308+
": could not get the member for symbol " +
1309+
toELFString(sym));
1310+
MemoryBufferRef mb =
1311+
CHECK(c.getMemoryBufferRef(),
1312+
toString(this) +
1313+
": could not get the buffer for the member defining symbol " +
1314+
toELFString(sym));
1315+
1316+
if (isBitcode(mb))
1317+
return isBitcodeNonCommonDef(mb, sym.getName(), getName());
1318+
1319+
return isNonCommonDef(mb, sym.getName(), getName());
1320+
}
1321+
12401322
size_t ArchiveFile::getMemberCount() const {
12411323
size_t count = 0;
12421324
Error err = Error::success();
@@ -1771,6 +1853,13 @@ template <class ELFT> void LazyObjFile::parse() {
17711853
}
17721854
}
17731855

1856+
bool LazyObjFile::shouldFetchForCommon(const StringRef &name) {
1857+
if (isBitcode(mb))
1858+
return isBitcodeNonCommonDef(mb, name, archiveName);
1859+
1860+
return isNonCommonDef(mb, name, archiveName);
1861+
}
1862+
17741863
std::string elf::replaceThinLTOSuffix(StringRef path) {
17751864
StringRef suffix = config->thinLTOObjectSuffixReplace.first;
17761865
StringRef repl = config->thinLTOObjectSuffixReplace.second;

lld/ELF/InputFiles.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -312,6 +312,10 @@ class LazyObjFile : public InputFile {
312312
template <class ELFT> void parse();
313313
void fetch();
314314

315+
// Check if a non-common symbol should be fetched to override a common
316+
// definition.
317+
bool shouldFetchForCommon(const StringRef &name);
318+
315319
bool fetched = false;
316320

317321
private:
@@ -331,6 +335,10 @@ class ArchiveFile : public InputFile {
331335
// more than once.)
332336
void fetch(const Archive::Symbol &sym);
333337

338+
// Check if a non-common symbol should be fetched to override a common
339+
// definition.
340+
bool shouldFetchForCommon(const Archive::Symbol &sym);
341+
334342
size_t getMemberCount() const;
335343
size_t getFetchedMemberCount() const { return seen.size(); }
336344

lld/ELF/Options.td

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,10 @@ defm optimize_bb_jumps: BB<"optimize-bb-jumps",
6464
"Remove direct jumps at the end to the next basic block",
6565
"Do not remove any direct jumps at the end to the next basic block (default)">;
6666

67+
defm fortran_common : BB<"fortran-common",
68+
"Search archive members for definitions to override COMMON symbols (default)",
69+
"Do not search archive members for definitions to override COMMON symbols">;
70+
6771
defm split_stack_adjust_size
6872
: Eq<"split-stack-adjust-size",
6973
"Specify adjustment to stack size when a split-stack function calls a "

lld/ELF/Symbols.cpp

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -689,7 +689,33 @@ void Symbol::resolveDefined(const Defined &other) {
689689
other.value);
690690
}
691691

692+
template <class LazyT>
693+
static void replaceCommon(Symbol &oldSym, const LazyT &newSym) {
694+
backwardReferences.erase(&oldSym);
695+
oldSym.replace(newSym);
696+
newSym.fetch();
697+
}
698+
692699
template <class LazyT> void Symbol::resolveLazy(const LazyT &other) {
700+
// For common objects, we want to look for global or weak definitions that
701+
// should be fetched as the cannonical definition instead.
702+
if (isCommon() && elf::config->fortranCommon) {
703+
if (auto *laSym = dyn_cast<LazyArchive>(&other)) {
704+
ArchiveFile *archive = cast<ArchiveFile>(laSym->file);
705+
const Archive::Symbol &archiveSym = laSym->sym;
706+
if (archive->shouldFetchForCommon(archiveSym)) {
707+
replaceCommon(*this, other);
708+
return;
709+
}
710+
} else if (auto *loSym = dyn_cast<LazyObject>(&other)) {
711+
LazyObjFile *obj = cast<LazyObjFile>(loSym->file);
712+
if (obj->shouldFetchForCommon(loSym->getName())) {
713+
replaceCommon(*this, other);
714+
return;
715+
}
716+
}
717+
}
718+
693719
if (!isUndefined()) {
694720
// See the comment in resolveUndefined().
695721
if (isDefined())

lld/docs/ld.lld.1

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -311,6 +311,8 @@ Do not demangle symbol names.
311311
Inhibit output of an
312312
.Li .interp
313313
section.
314+
.It Fl -no-fortran-common
315+
Do not search archive members for definitions to override COMMON symbols.
314316
.It Fl -no-gc-sections
315317
Disable garbage collection of unused sections.
316318
.It Fl -no-gnu-unique
Lines changed: 182 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,182 @@
1+
# REQUIRES: ppc
2+
3+
# RUN: rm -rf %t.dir
4+
# RUN: split-file %s %t.dir
5+
# RUN: cd %t.dir
6+
7+
## Object files.
8+
# RUN: llvm-mc -triple=powerpc64le -filetype=obj ref.s -o %t1.o
9+
# RUN: llvm-mc -triple=powerpc64le -filetype=obj refanddef.s -o %t2.o
10+
# RUN: llvm-mc -triple=powerpc64le -filetype=obj def.s -o %tstrong_data_only.o
11+
# RUN: llvm-mc -triple=powerpc64le -filetype=obj weak.s -o %tweak_data_only.o
12+
13+
# RUN: llvm-mc -triple=powerpc64le -filetype=obj main.s -o %tmain.o
14+
15+
## Object file archives.
16+
# RUN: llvm-ar crs %t1.a %t1.o %tstrong_data_only.o
17+
# RUN: llvm-ar crs %t2.a %t1.o %tweak_data_only.o
18+
# RUN: llvm-ar crs %t3.a %t2.o %tstrong_data_only.o
19+
20+
## Bitcode files.
21+
# RUN: llvm-as -o %t1.bc commonblock.ll
22+
# RUN: llvm-as -o %t2.bc blockdata.ll
23+
24+
## Bitcode archive.
25+
# RUN: llvm-ar crs %t4.a %t1.bc %t2.bc
26+
27+
# RUN: ld.lld -o %t1 %tmain.o %t1.a
28+
# RUN: llvm-objdump -D -j .data %t1 | FileCheck --check-prefix=TEST1 %s
29+
30+
# RUN: ld.lld -o %t2 %tmain.o --start-lib %t1.o %tstrong_data_only.o --end-lib
31+
# RUN: llvm-objdump -D -j .data %t2 | FileCheck --check-prefix=TEST1 %s
32+
33+
# RUN: ld.lld -o %t3 %tmain.o %t2.a
34+
# RUN: llvm-objdump -D -j .data %t3 | FileCheck --check-prefix=TEST1 %s
35+
36+
# RUN: ld.lld -o %t4 %tmain.o --start-lib %t1.o %tweak_data_only.o --end-lib
37+
# RUN: llvm-objdump -D -j .data %t4 | FileCheck --check-prefix=TEST1 %s
38+
39+
# RUN: ld.lld -o %t5 %tmain.o %t3.a --print-map | FileCheck --check-prefix=MAP %s
40+
41+
# RUN: ld.lld -o %t6 %tmain.o %t2.o %t1.a
42+
# RUN: llvm-objdump -D -j .data %t6 | FileCheck --check-prefix=TEST2 %s
43+
44+
# RUN: ld.lld -o %t7 %tmain.o %t2.o --start-lib %t1.o %tstrong_data_only.o --end-lib
45+
# RUN: llvm-objdump -D -j .data %t7 | FileCheck --check-prefix=TEST2 %s
46+
47+
# RUN: not ld.lld -o %t8 %tmain.o %t1.a %tstrong_data_only.o 2>&1 | \
48+
# RUN: FileCheck --check-prefix=ERR %s
49+
50+
# RUN: not ld.lld -o %t9 %tmain.o --start-lib %t1.o %t2.o --end-lib %tstrong_data_only.o 2>&1 | \
51+
# RUN: FileCheck --check-prefix=ERR %s
52+
53+
# ERR: ld.lld: error: duplicate symbol: block
54+
55+
# RUN: ld.lld --no-fortran-common -o %t10 %tmain.o %t1.a
56+
# RUN: llvm-readobj --syms %t10 | FileCheck --check-prefix=NFC %s
57+
58+
# RUN: ld.lld --no-fortran-common -o %t11 %tmain.o --start-lib %t1.o %tstrong_data_only.o --end-lib
59+
# RUN: llvm-readobj --syms %t11 | FileCheck --check-prefix=NFC %s
60+
61+
# RUN: ld.lld -o - %tmain.o %t4.a --lto-emit-asm | FileCheck --check-prefix=ASM %s
62+
63+
# RUN: ld.lld -o - %tmain.o --start-lib %t1.bc %t2.bc --end-lib --lto-emit-asm | \
64+
# RUN: FileCheck --check-prefix=ASM %s
65+
66+
## Old FORTRAN that mixes use of COMMON blocks and BLOCK DATA requires that we
67+
## search through archives for non-tentative definitions (from the BLOCK DATA)
68+
## to replace the tentative definitions (from the COMMON block(s)).
69+
70+
## Ensure we have used the initialized definition of 'block' instead of a
71+
## common definition.
72+
# TEST1-LABEL: Disassembly of section .data:
73+
# TEST1: <block>:
74+
# TEST1-NEXT: ea 2e 44 54
75+
# TEST1-NEXT: fb 21 09 40
76+
# TEST1-NEXT: ...
77+
78+
79+
# NFC: Name: block
80+
# NFC-NEXT: Value:
81+
# NFC-NEXT: Size: 40
82+
# NFC-NEXT: Binding: Global (0x1)
83+
# NFC-NEXT: Type: Object (0x1)
84+
# NFC-NEXT: Other: 0
85+
# NFC-NEXT: Section: .bss
86+
87+
## Expecting the strong definition from the object file, and the defintions from
88+
## the archive do not interfere.
89+
# TEST2-LABEL: Disassembly of section .data:
90+
# TEST2: <block>:
91+
# TEST2-NEXT: 03 57 14 8b
92+
# TEST2-NEXT: 0a bf 05 40
93+
# TEST2-NEXT: ...
94+
95+
# MAP: 28 8 {{.*}}tmp3.a(common-archive-lookup.s.tmp2.o):(.data)
96+
# MAP-NEXT: 28 1 block
97+
98+
# ASM: .type block,@object
99+
# ASM: block:
100+
# ASM-NEXT: .long 5
101+
# ASM: .size block, 20
102+
103+
#--- ref.s
104+
.text
105+
.abiversion 2
106+
.global bar
107+
.type bar,@function
108+
bar:
109+
addis 4, 2, block@toc@ha
110+
addi 4, 4, block@toc@l
111+
112+
## Tentative definition of 'block'.
113+
.comm block,40,8
114+
115+
#--- refanddef.s
116+
## An alternate strong definition of block, in the same file as
117+
## a different referenced symbol.
118+
.text
119+
.abiversion 2
120+
.global bar
121+
.type bar,@function
122+
bar:
123+
addis 4, 2, block@toc@ha
124+
addi 4, 4, block@toc@l
125+
126+
.data
127+
.type block,@object
128+
.global block
129+
.p2align 3
130+
block:
131+
.quad 0x4005bf0a8b145703 # double 2.7182818284589998
132+
.space 32
133+
.size block, 40
134+
135+
#--- def.s
136+
## Strong definition of 'block'.
137+
.data
138+
.type block,@object
139+
.global block
140+
.p2align 3
141+
block:
142+
.quad 0x400921fb54442eea # double 3.1415926535900001
143+
.space 32
144+
.size block, 40
145+
146+
#--- weak.s
147+
## Weak definition of `block`.
148+
.data
149+
.type block,@object
150+
.weak block
151+
.p2align 3
152+
block:
153+
.quad 0x400921fb54442eea # double 3.1415926535900001
154+
.space 32
155+
.size block, 40
156+
157+
#--- main.s
158+
.global _start
159+
_start:
160+
bl bar
161+
blr
162+
163+
164+
#--- blockdata.ll
165+
target datalayout = "e-m:e-i64:64-n32:64-v256:256:256-v512:512:512"
166+
target triple = "powerpc64le-unknown-linux-gnu"
167+
168+
@block = dso_local local_unnamed_addr global [5 x i32] [i32 5, i32 0, i32 0, i32 0, i32 0], align 4
169+
170+
#--- commonblock.ll
171+
target datalayout = "e-m:e-i64:64-n32:64-v256:256:256-v512:512:512"
172+
target triple = "powerpc64le-unknown-linux-gnu"
173+
174+
@block = common dso_local local_unnamed_addr global [5 x i32] zeroinitializer, align 4
175+
176+
define dso_local i32 @bar(i32 signext %i) local_unnamed_addr {
177+
entry:
178+
%idxprom = sext i32 %i to i64
179+
%arrayidx = getelementptr inbounds [5 x i32], [5 x i32]* @block, i64 0, i64 %idxprom
180+
%0 = load i32, i32* %arrayidx, align 8
181+
ret i32 %0
182+
}

lld/test/ELF/warn-backrefs.s

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,18 @@
6666
# RUN: echo '.weak foo; foo:' | llvm-mc -filetype=obj -triple=x86_64 - -o %tweak.o
6767
# RUN: ld.lld --fatal-warnings --warn-backrefs --start-lib %tweak.o --end-lib %t1.o %t2.o -o /dev/null
6868

69+
## Check common symbols. A common sym might later be replaced by a non-common definition.
70+
# RUN: echo '.comm obj, 4' | llvm-mc -filetype=obj -triple=x86_64 -o %tcomm.o
71+
# RUN: echo '.type obj,@object; .data; .globl obj; .p2align 2; obj: .long 55; .size obj, 4' | llvm-mc -filetype=obj -triple=x86_64 -o %tstrong.o
72+
# RUN: echo '.globl foo; foo: movl obj(%rip), %eax' | llvm-mc -triple=x86_64 -filetype=obj -o %t5.o
73+
# RUN: llvm-ar rcs %tcomm.a %tcomm.o
74+
# RUN: llvm-ar rcs %tstrong.a %tstrong.o
75+
# RUN: ld.lld --warn-backrefs %tcomm.a %t1.o %t5.o 2>&1 -o /dev/null | FileCheck --check-prefix=COMM %s
76+
# RUN: ld.lld --fatal-warnings --warn-backrefs %tcomm.a %t1.o %t5.o %tstrong.a 2>&1 -o /dev/null
77+
# RUN: ld.lld --warn-backrefs --no-fortran-common %tcomm.a %t1.o %t5.o %tstrong.a 2>&1 -o /dev/null | FileCheck --check-prefix=COMM %s
78+
79+
# COMM: ld.lld: warning: backward reference detected: obj in {{.*}}5.o refers to {{.*}}comm.a
80+
6981
## If a lazy definition appears after the backward reference, don't warn.
7082
## A traditional Unix linker will resolve the reference to the later definition.
7183
# RUN: ld.lld --fatal-warnings --warn-backrefs %t2.a %t1.o %t2.a -o /dev/null

0 commit comments

Comments
 (0)