Skip to content

Commit 07ff596

Browse files
AWSjswinneycherrymui
authored andcommitted
runtime/internal/atomic: add LSE atomics instructions to arm64
As a follow up to an earlier change[1] to add ARMv8+LSE instructions in the compiler generated atomic intrinsics, make the same change in the runtime library. Since not all ARMv8 systems support LSE instructions, they are protected by a feature-flag branch. [1]: golang.org/cl/234217 commit: ecc3f51 Change-Id: I0e2fb22e78d5eddb6547863667a8865946679a00 Reviewed-on: https://go-review.googlesource.com/c/go/+/310591 Reviewed-by: Cherry Mui <cherryyz@google.com> Run-TryBot: Cherry Mui <cherryyz@google.com> TryBot-Result: Go Bot <gobot@golang.org> Trust: Heschi Kreinick <heschi@google.com>
1 parent 0388670 commit 07ff596

File tree

2 files changed

+85
-17
lines changed

2 files changed

+85
-17
lines changed

src/runtime/internal/atomic/atomic_arm64.go

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,14 @@
77

88
package atomic
99

10-
import "unsafe"
10+
import (
11+
"unsafe"
12+
"internal/cpu"
13+
)
14+
15+
const (
16+
offsetARM64HasATOMICS = unsafe.Offsetof(cpu.ARM64.HasATOMICS)
17+
)
1118

1219
//go:noescape
1320
func Xadd(ptr *uint32, delta int32) uint32

src/runtime/internal/atomic/atomic_arm64.s

Lines changed: 77 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
// Use of this source code is governed by a BSD-style
33
// license that can be found in the LICENSE file.
44

5+
#include "go_asm.h"
56
#include "textflag.h"
67

78
TEXT ·Casint32(SB), NOSPLIT, $0-17
@@ -127,10 +128,15 @@ TEXT ·Store64(SB), NOSPLIT, $0-16
127128
TEXT ·Xchg(SB), NOSPLIT, $0-20
128129
MOVD ptr+0(FP), R0
129130
MOVW new+8(FP), R1
130-
again:
131+
MOVBU internal∕cpu·ARM64+const_offsetARM64HasATOMICS(SB), R4
132+
CBZ R4, load_store_loop
133+
SWPALW R1, (R0), R2
134+
MOVW R2, ret+16(FP)
135+
RET
136+
load_store_loop:
131137
LDAXRW (R0), R2
132138
STLXRW R1, (R0), R3
133-
CBNZ R3, again
139+
CBNZ R3, load_store_loop
134140
MOVW R2, ret+16(FP)
135141
RET
136142

@@ -142,10 +148,15 @@ again:
142148
TEXT ·Xchg64(SB), NOSPLIT, $0-24
143149
MOVD ptr+0(FP), R0
144150
MOVD new+8(FP), R1
145-
again:
151+
MOVBU internal∕cpu·ARM64+const_offsetARM64HasATOMICS(SB), R4
152+
CBZ R4, load_store_loop
153+
SWPALD R1, (R0), R2
154+
MOVD R2, ret+16(FP)
155+
RET
156+
load_store_loop:
146157
LDAXR (R0), R2
147158
STLXR R1, (R0), R3
148-
CBNZ R3, again
159+
CBNZ R3, load_store_loop
149160
MOVD R2, ret+16(FP)
150161
RET
151162

@@ -160,12 +171,20 @@ TEXT ·Cas(SB), NOSPLIT, $0-17
160171
MOVD ptr+0(FP), R0
161172
MOVW old+8(FP), R1
162173
MOVW new+12(FP), R2
163-
again:
174+
MOVBU internal∕cpu·ARM64+const_offsetARM64HasATOMICS(SB), R4
175+
CBZ R4, load_store_loop
176+
MOVD R1, R3
177+
CASALW R3, (R0), R2
178+
CMP R1, R3
179+
CSET EQ, R0
180+
MOVB R0, ret+16(FP)
181+
RET
182+
load_store_loop:
164183
LDAXRW (R0), R3
165184
CMPW R1, R3
166185
BNE ok
167186
STLXRW R2, (R0), R3
168-
CBNZ R3, again
187+
CBNZ R3, load_store_loop
169188
ok:
170189
CSET EQ, R0
171190
MOVB R0, ret+16(FP)
@@ -183,12 +202,20 @@ TEXT ·Cas64(SB), NOSPLIT, $0-25
183202
MOVD ptr+0(FP), R0
184203
MOVD old+8(FP), R1
185204
MOVD new+16(FP), R2
186-
again:
205+
MOVBU internal∕cpu·ARM64+const_offsetARM64HasATOMICS(SB), R4
206+
CBZ R4, load_store_loop
207+
MOVD R1, R3
208+
CASALD R3, (R0), R2
209+
CMP R1, R3
210+
CSET EQ, R0
211+
MOVB R0, ret+24(FP)
212+
RET
213+
load_store_loop:
187214
LDAXR (R0), R3
188215
CMP R1, R3
189216
BNE ok
190217
STLXR R2, (R0), R3
191-
CBNZ R3, again
218+
CBNZ R3, load_store_loop
192219
ok:
193220
CSET EQ, R0
194221
MOVB R0, ret+24(FP)
@@ -201,11 +228,17 @@ ok:
201228
TEXT ·Xadd(SB), NOSPLIT, $0-20
202229
MOVD ptr+0(FP), R0
203230
MOVW delta+8(FP), R1
204-
again:
231+
MOVBU internal∕cpu·ARM64+const_offsetARM64HasATOMICS(SB), R4
232+
CBZ R4, load_store_loop
233+
LDADDALW R1, (R0), R2
234+
ADD R1, R2
235+
MOVW R2, ret+16(FP)
236+
RET
237+
load_store_loop:
205238
LDAXRW (R0), R2
206239
ADDW R2, R1, R2
207240
STLXRW R2, (R0), R3
208-
CBNZ R3, again
241+
CBNZ R3, load_store_loop
209242
MOVW R2, ret+16(FP)
210243
RET
211244

@@ -216,11 +249,17 @@ again:
216249
TEXT ·Xadd64(SB), NOSPLIT, $0-24
217250
MOVD ptr+0(FP), R0
218251
MOVD delta+8(FP), R1
219-
again:
252+
MOVBU internal∕cpu·ARM64+const_offsetARM64HasATOMICS(SB), R4
253+
CBZ R4, load_store_loop
254+
LDADDALD R1, (R0), R2
255+
ADD R1, R2
256+
MOVD R2, ret+16(FP)
257+
RET
258+
load_store_loop:
220259
LDAXR (R0), R2
221260
ADD R2, R1, R2
222261
STLXR R2, (R0), R3
223-
CBNZ R3, again
262+
CBNZ R3, load_store_loop
224263
MOVD R2, ret+16(FP)
225264
RET
226265

@@ -236,37 +275,59 @@ TEXT ·Xchguintptr(SB), NOSPLIT, $0-24
236275
TEXT ·And8(SB), NOSPLIT, $0-9
237276
MOVD ptr+0(FP), R0
238277
MOVB val+8(FP), R1
278+
MOVBU internal∕cpu·ARM64+const_offsetARM64HasATOMICS(SB), R4
279+
CBZ R4, load_store_loop
280+
MVN R1, R2
281+
LDCLRALB R2, (R0), R3
282+
RET
283+
load_store_loop:
239284
LDAXRB (R0), R2
240285
AND R1, R2
241286
STLXRB R2, (R0), R3
242-
CBNZ R3, -3(PC)
287+
CBNZ R3, load_store_loop
243288
RET
244289

245290
TEXT ·Or8(SB), NOSPLIT, $0-9
246291
MOVD ptr+0(FP), R0
247292
MOVB val+8(FP), R1
293+
MOVBU internal∕cpu·ARM64+const_offsetARM64HasATOMICS(SB), R4
294+
CBZ R4, load_store_loop
295+
LDORALB R1, (R0), R2
296+
RET
297+
load_store_loop:
248298
LDAXRB (R0), R2
249299
ORR R1, R2
250300
STLXRB R2, (R0), R3
251-
CBNZ R3, -3(PC)
301+
CBNZ R3, load_store_loop
252302
RET
253303

254304
// func And(addr *uint32, v uint32)
255305
TEXT ·And(SB), NOSPLIT, $0-12
256306
MOVD ptr+0(FP), R0
257307
MOVW val+8(FP), R1
308+
MOVBU internal∕cpu·ARM64+const_offsetARM64HasATOMICS(SB), R4
309+
CBZ R4, load_store_loop
310+
MVN R1, R2
311+
LDCLRALW R2, (R0), R3
312+
RET
313+
load_store_loop:
258314
LDAXRW (R0), R2
259315
AND R1, R2
260316
STLXRW R2, (R0), R3
261-
CBNZ R3, -3(PC)
317+
CBNZ R3, load_store_loop
262318
RET
263319

264320
// func Or(addr *uint32, v uint32)
265321
TEXT ·Or(SB), NOSPLIT, $0-12
266322
MOVD ptr+0(FP), R0
267323
MOVW val+8(FP), R1
324+
MOVBU internal∕cpu·ARM64+const_offsetARM64HasATOMICS(SB), R4
325+
CBZ R4, load_store_loop
326+
LDORALW R1, (R0), R2
327+
RET
328+
load_store_loop:
268329
LDAXRW (R0), R2
269330
ORR R1, R2
270331
STLXRW R2, (R0), R3
271-
CBNZ R3, -3(PC)
332+
CBNZ R3, load_store_loop
272333
RET

0 commit comments

Comments
 (0)