Skip to content

Commit fa16efb

Browse files
pmurlaboger
authored andcommitted
cmd/link: enable internal linking of PIE binaries on ppc64le
The amd64/arm64 relocation processing is used as a template and updated for ppc64le. This requires updating the TOC relocation handling code to support linux type TOC relocations too (note, AIX uses TOC-indirect accesses). Noteably, the shared flag of go functions is used as a proxy for the local entry point offset encoded in elf objects. Functions in go ppc64le shared objects always[1] insert 2 instructions to regenerate the TOC pointer. [1] excepting a couple special runtime functions, see preprocess in obj9.go for specific details of this behavior. Change-Id: I3646e6dc8a0a0ffe712771a976983315eae5c418 Reviewed-on: https://go-review.googlesource.com/c/go/+/352829 Run-TryBot: Paul Murphy <murp@ibm.com> Reviewed-by: Cherry Mui <cherryyz@google.com> TryBot-Result: Go Bot <gobot@golang.org> Trust: Lynn Boger <laboger@linux.vnet.ibm.com>
1 parent 93bab8a commit fa16efb

File tree

6 files changed

+201
-31
lines changed

6 files changed

+201
-31
lines changed

src/cmd/dist/test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1013,7 +1013,7 @@ func (t *tester) internalLink() bool {
10131013
func (t *tester) internalLinkPIE() bool {
10141014
switch goos + "-" + goarch {
10151015
case "darwin-amd64", "darwin-arm64",
1016-
"linux-amd64", "linux-arm64",
1016+
"linux-amd64", "linux-arm64", "linux-ppc64le",
10171017
"android-arm64",
10181018
"windows-amd64", "windows-386", "windows-arm":
10191019
return true

src/cmd/internal/sys/supported.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -158,7 +158,7 @@ func BuildModeSupported(compiler, buildmode, goos, goarch string) bool {
158158
func InternalLinkPIESupported(goos, goarch string) bool {
159159
switch goos + "/" + goarch {
160160
case "darwin/amd64", "darwin/arm64",
161-
"linux/amd64", "linux/arm64",
161+
"linux/amd64", "linux/arm64", "linux/ppc64le",
162162
"android/arm64",
163163
"windows-amd64", "windows-386", "windows-arm":
164164
return true

src/cmd/link/internal/ld/config.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -225,7 +225,8 @@ func mustLinkExternal(ctxt *Link) (res bool, reason string) {
225225
return true, "buildmode=c-shared"
226226
case BuildModePIE:
227227
switch buildcfg.GOOS + "/" + buildcfg.GOARCH {
228-
case "linux/amd64", "linux/arm64", "android/arm64":
228+
case "android/arm64":
229+
case "linux/amd64", "linux/arm64", "linux/ppc64le":
229230
case "windows/386", "windows/amd64", "windows/arm", "windows/arm64":
230231
case "darwin/amd64", "darwin/arm64":
231232
default:

src/cmd/link/internal/ld/data.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -227,6 +227,8 @@ func (st *relocSymState) relocsym(s loader.Sym, P []byte) {
227227
// DWARF info between the compiler and linker.
228228
continue
229229
}
230+
} else if target.IsPPC64() && target.IsPIE() && ldr.SymName(rs) == ".TOC." {
231+
// This is a TOC relative relocation generated from a go object. It is safe to resolve.
230232
} else {
231233
st.err.errorUnresolved(ldr, s, rs)
232234
continue

src/cmd/link/internal/ppc64/asm.go

Lines changed: 193 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -321,6 +321,11 @@ func addelfdynrel(target *ld.Target, ldr *loader.Loader, syms *ld.ArchSyms, s lo
321321
rela.AddUint64(target.Arch, elf.R_INFO(uint32(ldr.SymDynid(targ)), uint32(elf.R_PPC64_ADDR64)))
322322
rela.AddUint64(target.Arch, uint64(r.Add()))
323323
su.SetRelocType(rIdx, objabi.ElfRelocOffset) // ignore during relocsym
324+
} else if target.IsPIE() && target.IsInternal() {
325+
// For internal linking PIE, this R_ADDR relocation cannot
326+
// be resolved statically. We need to generate a dynamic
327+
// relocation. Let the code below handle it.
328+
break
324329
}
325330
return true
326331

@@ -383,12 +388,94 @@ func addelfdynrel(target *ld.Target, ldr *loader.Loader, syms *ld.ArchSyms, s lo
383388
}
384389

385390
// Handle references to ELF symbols from our own object files.
386-
if targType != sym.SDYNIMPORT {
391+
relocs := ldr.Relocs(s)
392+
r = relocs.At(rIdx)
393+
394+
switch r.Type() {
395+
case objabi.R_ADDR:
396+
if ldr.SymType(s) == sym.STEXT {
397+
log.Fatalf("R_ADDR relocation in text symbol %s is unsupported\n", ldr.SymName(s))
398+
}
399+
if target.IsPIE() && target.IsInternal() {
400+
// When internally linking, generate dynamic relocations
401+
// for all typical R_ADDR relocations. The exception
402+
// are those R_ADDR that are created as part of generating
403+
// the dynamic relocations and must be resolved statically.
404+
//
405+
// There are three phases relevant to understanding this:
406+
//
407+
// dodata() // we are here
408+
// address() // symbol address assignment
409+
// reloc() // resolution of static R_ADDR relocs
410+
//
411+
// At this point symbol addresses have not been
412+
// assigned yet (as the final size of the .rela section
413+
// will affect the addresses), and so we cannot write
414+
// the Elf64_Rela.r_offset now. Instead we delay it
415+
// until after the 'address' phase of the linker is
416+
// complete. We do this via Addaddrplus, which creates
417+
// a new R_ADDR relocation which will be resolved in
418+
// the 'reloc' phase.
419+
//
420+
// These synthetic static R_ADDR relocs must be skipped
421+
// now, or else we will be caught in an infinite loop
422+
// of generating synthetic relocs for our synthetic
423+
// relocs.
424+
//
425+
// Furthermore, the rela sections contain dynamic
426+
// relocations with R_ADDR relocations on
427+
// Elf64_Rela.r_offset. This field should contain the
428+
// symbol offset as determined by reloc(), not the
429+
// final dynamically linked address as a dynamic
430+
// relocation would provide.
431+
switch ldr.SymName(s) {
432+
case ".dynsym", ".rela", ".rela.plt", ".got.plt", ".dynamic":
433+
return false
434+
}
435+
} else {
436+
// Either internally linking a static executable,
437+
// in which case we can resolve these relocations
438+
// statically in the 'reloc' phase, or externally
439+
// linking, in which case the relocation will be
440+
// prepared in the 'reloc' phase and passed to the
441+
// external linker in the 'asmb' phase.
442+
if ldr.SymType(s) != sym.SDATA && ldr.SymType(s) != sym.SRODATA {
443+
break
444+
}
445+
}
446+
// Generate R_PPC64_RELATIVE relocations for best
447+
// efficiency in the dynamic linker.
448+
//
449+
// As noted above, symbol addresses have not been
450+
// assigned yet, so we can't generate the final reloc
451+
// entry yet. We ultimately want:
452+
//
453+
// r_offset = s + r.Off
454+
// r_info = R_PPC64_RELATIVE
455+
// r_addend = targ + r.Add
456+
//
457+
// The dynamic linker will set *offset = base address +
458+
// addend.
459+
//
460+
// AddAddrPlus is used for r_offset and r_addend to
461+
// generate new R_ADDR relocations that will update
462+
// these fields in the 'reloc' phase.
463+
rela := ldr.MakeSymbolUpdater(syms.Rela)
464+
rela.AddAddrPlus(target.Arch, s, int64(r.Off()))
465+
if r.Siz() == 8 {
466+
rela.AddUint64(target.Arch, elf.R_INFO(0, uint32(elf.R_PPC64_RELATIVE)))
467+
} else {
468+
ldr.Errorf(s, "unexpected relocation for dynamic symbol %s", ldr.SymName(targ))
469+
}
470+
rela.AddAddrPlus(target.Arch, targ, int64(r.Add()))
471+
472+
// Not mark r done here. So we still apply it statically,
473+
// so in the file content we'll also have the right offset
474+
// to the relocation target. So it can be examined statically
475+
// (e.g. go version).
387476
return true
388477
}
389478

390-
// TODO(austin): Translate our relocations to ELF
391-
392479
return false
393480
}
394481

@@ -542,35 +629,40 @@ func symtoc(ldr *loader.Loader, syms *ld.ArchSyms, s loader.Sym) int64 {
542629
}
543630

544631
// archreloctoc relocates a TOC relative symbol.
545-
// If the symbol pointed by this TOC relative symbol is in .data or .bss, the
546-
// default load instruction can be changed to an addi instruction and the
547-
// symbol address can be used directly.
548-
// This code is for AIX only.
549632
func archreloctoc(ldr *loader.Loader, target *ld.Target, syms *ld.ArchSyms, r loader.Reloc, s loader.Sym, val int64) int64 {
550633
rs := r.Sym()
551-
if target.IsLinux() {
552-
ldr.Errorf(s, "archrelocaddr called for %s relocation\n", ldr.SymName(rs))
553-
}
554634
var o1, o2 uint32
555-
556-
o1 = uint32(val >> 32)
557-
o2 = uint32(val)
558-
559-
if !strings.HasPrefix(ldr.SymName(rs), "TOC.") {
560-
ldr.Errorf(s, "archreloctoc called for a symbol without TOC anchor")
561-
}
562635
var t int64
563636
useAddi := false
564-
relocs := ldr.Relocs(rs)
565-
tarSym := relocs.At(0).Sym()
566-
567-
if target.IsInternal() && tarSym != 0 && ldr.AttrReachable(tarSym) && ldr.SymSect(tarSym).Seg == &ld.Segdata {
568-
t = ldr.SymValue(tarSym) + r.Add() - ldr.SymValue(syms.TOC)
569-
// change ld to addi in the second instruction
570-
o2 = (o2 & 0x03FF0000) | 0xE<<26
571-
useAddi = true
637+
638+
if target.IsBigEndian() {
639+
o1 = uint32(val >> 32)
640+
o2 = uint32(val)
641+
} else {
642+
o1 = uint32(val)
643+
o2 = uint32(val >> 32)
644+
}
645+
646+
// On AIX, TOC data accesses are always made indirectly against R2 (a sequence of addis+ld+load/store). If the
647+
// The target of the load is known, the sequence can be written into addis+addi+load/store. On Linux,
648+
// TOC data accesses are always made directly against R2 (e.g addis+load/store).
649+
if target.IsAIX() {
650+
if !strings.HasPrefix(ldr.SymName(rs), "TOC.") {
651+
ldr.Errorf(s, "archreloctoc called for a symbol without TOC anchor")
652+
}
653+
relocs := ldr.Relocs(rs)
654+
tarSym := relocs.At(0).Sym()
655+
656+
if target.IsInternal() && tarSym != 0 && ldr.AttrReachable(tarSym) && ldr.SymSect(tarSym).Seg == &ld.Segdata {
657+
t = ldr.SymValue(tarSym) + r.Add() - ldr.SymValue(syms.TOC)
658+
// change ld to addi in the second instruction
659+
o2 = (o2 & 0x03FF0000) | 0xE<<26
660+
useAddi = true
661+
} else {
662+
t = ldr.SymValue(rs) + r.Add() - ldr.SymValue(syms.TOC)
663+
}
572664
} else {
573-
t = ldr.SymValue(rs) + r.Add() - ldr.SymValue(syms.TOC)
665+
t = ldr.SymValue(rs) + r.Add() - symtoc(ldr, syms, s)
574666
}
575667

576668
if t != int64(int32(t)) {
@@ -593,15 +685,20 @@ func archreloctoc(ldr *loader.Loader, target *ld.Target, syms *ld.ArchSyms, r lo
593685
}
594686
o2 |= uint32(t) & 0xFFFC
595687
}
688+
case objabi.R_ADDRPOWER_TOCREL:
689+
o2 |= uint32(t) & 0xffff
596690
default:
597691
return -1
598692
}
599693

600-
return int64(o1)<<32 | int64(o2)
694+
if target.IsBigEndian() {
695+
return int64(o1)<<32 | int64(o2)
696+
}
697+
return int64(o2)<<32 | int64(o1)
601698
}
602699

603700
// archrelocaddr relocates a symbol address.
604-
// This code is for AIX only.
701+
// This code is for linux only.
605702
func archrelocaddr(ldr *loader.Loader, target *ld.Target, syms *ld.ArchSyms, r loader.Reloc, s loader.Sym, val int64) int64 {
606703
rs := r.Sym()
607704
if target.IsAIX() {
@@ -860,6 +957,18 @@ func archreloc(target *ld.Target, ldr *loader.Loader, syms *ld.ArchSyms, r loade
860957

861958
t := ldr.SymValue(rs) + r.Add() - (ldr.SymValue(s) + int64(r.Off()))
862959

960+
tgtName := ldr.SymName(rs)
961+
962+
// If we are linking PIE or shared code, all golang generated object files have an extra 2 instruction prologue
963+
// to regenerate the TOC pointer from R12. The exception are two special case functions tested below. Note,
964+
// local call offsets for externally generated objects are accounted for when converting into golang relocs.
965+
if !ldr.IsExternal(rs) && ldr.AttrShared(rs) && tgtName != "runtime.duffzero" && tgtName != "runtime.duffcopy" {
966+
// Furthermore, only apply the offset if the target looks like the start of a function call.
967+
if r.Add() == 0 && ldr.SymType(rs) == sym.STEXT {
968+
t += 8
969+
}
970+
}
971+
863972
if t&3 != 0 {
864973
ldr.Errorf(s, "relocation for %s+%d is not aligned: %d", ldr.SymName(rs), r.Off(), t)
865974
}
@@ -872,6 +981,62 @@ func archreloc(target *ld.Target, ldr *loader.Loader, syms *ld.ArchSyms, r loade
872981
case objabi.R_POWER_TOC: // S + A - .TOC.
873982
return ldr.SymValue(rs) + r.Add() - symtoc(ldr, syms, s), nExtReloc, true
874983

984+
case objabi.R_ADDRPOWER_PCREL: // S + A - P
985+
t := ldr.SymValue(rs) + r.Add() - (ldr.SymValue(s) + int64(r.Off()))
986+
ha := uint16(((t + 0x8000) >> 16) & 0xFFFF)
987+
l := uint16(t)
988+
if target.IsBigEndian() {
989+
val |= int64(l)
990+
val |= int64(ha) << 32
991+
} else {
992+
val |= int64(ha)
993+
val |= int64(l) << 32
994+
}
995+
return val, nExtReloc, true
996+
997+
case objabi.R_POWER_TLS:
998+
const OP_ADD = 31<<26 | 266<<1
999+
const MASK_OP_ADD = 0x3F<<26 | 0x1FF<<1
1000+
if val&MASK_OP_ADD != OP_ADD {
1001+
ldr.Errorf(s, "R_POWER_TLS reloc only supports XO form ADD, not %08X", val)
1002+
}
1003+
// Verify RB is R13 in ADD RA,RB,RT.
1004+
if (val>>11)&0x1F != 13 {
1005+
// If external linking is made to support this, it may expect the linker to rewrite RB.
1006+
ldr.Errorf(s, "R_POWER_TLS reloc requires R13 in RB (%08X).", uint32(val))
1007+
}
1008+
return val, nExtReloc, true
1009+
1010+
case objabi.R_POWER_TLS_IE:
1011+
// Convert TLS_IE relocation to TLS_LE if supported.
1012+
if !(target.IsPIE() && target.IsElf()) {
1013+
log.Fatalf("cannot handle R_POWER_TLS_IE (sym %s) when linking non-PIE, non-ELF binaries internally", ldr.SymName(s))
1014+
}
1015+
1016+
// We are an ELF binary, we can safely convert to TLS_LE from:
1017+
// addis to, r2, x@got@tprel@ha
1018+
// ld to, to, x@got@tprel@l(to)
1019+
//
1020+
// to TLS_LE by converting to:
1021+
// addis to, r0, x@tprel@ha
1022+
// addi to, to, x@tprel@l(to)
1023+
1024+
const OP_ADDI = 14 << 26
1025+
const OP_MASK = 0x3F << 26
1026+
const OP_RA_MASK = 0x1F << 16
1027+
uval := uint64(val)
1028+
// convert r2 to r0, and ld to addi
1029+
if target.IsBigEndian() {
1030+
uval = uval &^ (OP_RA_MASK << 32)
1031+
uval = (uval &^ OP_MASK) | OP_ADDI
1032+
} else {
1033+
uval = uval &^ (OP_RA_MASK)
1034+
uval = (uval &^ (OP_MASK << 32)) | (OP_ADDI << 32)
1035+
}
1036+
val = int64(uval)
1037+
// Treat this like an R_POWER_TLS_LE relocation now.
1038+
fallthrough
1039+
8751040
case objabi.R_POWER_TLS_LE:
8761041
// The thread pointer points 0x7000 bytes after the start of the
8771042
// thread local storage area as documented in section "3.7.2 TLS

src/debug/elf/elf.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2349,6 +2349,7 @@ const (
23492349
R_PPC64_GOT16_HI R_PPC64 = 16 // R_POWERPC_GOT16_HI
23502350
R_PPC64_GOT16_HA R_PPC64 = 17 // R_POWERPC_GOT16_HA
23512351
R_PPC64_JMP_SLOT R_PPC64 = 21 // R_POWERPC_JMP_SLOT
2352+
R_PPC64_RELATIVE R_PPC64 = 22 // R_POWERPC_RELATIVE
23522353
R_PPC64_REL32 R_PPC64 = 26 // R_POWERPC_REL32
23532354
R_PPC64_ADDR64 R_PPC64 = 38
23542355
R_PPC64_ADDR16_HIGHER R_PPC64 = 39
@@ -2457,6 +2458,7 @@ var rppc64Strings = []intName{
24572458
{16, "R_PPC64_GOT16_HI"},
24582459
{17, "R_PPC64_GOT16_HA"},
24592460
{21, "R_PPC64_JMP_SLOT"},
2461+
{22, "R_PPC64_RELATIVE"},
24602462
{26, "R_PPC64_REL32"},
24612463
{38, "R_PPC64_ADDR64"},
24622464
{39, "R_PPC64_ADDR16_HIGHER"},

0 commit comments

Comments
 (0)