Skip to content

Commit 98ea60d

Browse files
committed
Add metal ir analyze
1 parent 8e8b15b commit 98ea60d

File tree

1 file changed

+183
-0
lines changed

1 file changed

+183
-0
lines changed

8.misc/ngfx_metallib2spirv.md

Lines changed: 183 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,183 @@
1+
# MetalLib翻译到SPIRV
2+
3+
Apple Metal使用的LLVM IR是表达能力非常**完备**的中间表示语言。
4+
5+
```cpp
6+
#import <metal_stdlib>
7+
8+
using namespace metal;
9+
10+
// Calculates a slice of a depth pyramid from a higher resolution slice
11+
// Handles downsampling from odd sized depth textures.
12+
kernel void depthPyramid(depth2d<float, access::sample> inDepth [[texture(0)]],
13+
texture2d<float, access::write> outDepth [[texture(1)]],
14+
constant uint4& inputRect [[buffer(2)]],
15+
uint2 tid [[thread_position_in_grid]])
16+
{
17+
constexpr sampler sam (min_filter::nearest, mag_filter::nearest, coord::pixel);
18+
uint source_width = inputRect.x;
19+
uint source_height = inputRect.y;
20+
float2 src = float2(tid * 2 + inputRect.zw);
21+
22+
float minval = inDepth.sample(sam, src);
23+
minval = max(minval, inDepth.sample(sam, src + float2(0, 1)));
24+
minval = max(minval, inDepth.sample(sam, src + float2(1, 0)));
25+
minval = max(minval, inDepth.sample(sam, src + float2(1, 1)));
26+
bool edge_x = (tid.x * 2 == source_width - 3);
27+
bool edge_y = (tid.y * 2 == source_height - 3);
28+
29+
if (edge_x)
30+
{
31+
minval = max(minval, inDepth.sample(sam, src + float2(2, 0)));
32+
minval = max(minval, inDepth.sample(sam, src + float2(2, 1)));
33+
}
34+
if (edge_y)
35+
{
36+
minval = max(minval, inDepth.sample(sam, src + float2(0, 2)));
37+
minval = max(minval, inDepth.sample(sam, src + float2(1, 2)));
38+
}
39+
if (edge_x && edge_y) minval = max(minval, inDepth.sample(sam, src + float2(2, 2)));
40+
41+
outDepth.write(float4(minval), tid);
42+
}
43+
```
44+
45+
```c
46+
source_filename = "depthPyramid"
47+
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64-v96:128:128-v128:128:128-v192:256:256-v256:256:256-v512:512:512-v1024:1024:1024-n8:16:32"
48+
target triple = "air64-apple-ios13.0.0"
49+
50+
%struct._depth_2d_t.191 = type opaque
51+
%struct._texture_2d_t.192 = type opaque
52+
%struct._sampler_t.193 = type opaque
53+
54+
@__air_sampler_state = internal addrspace(2) constant i64 -9188470239253725111, align 8
55+
@llvm.global_ctors = appending global [0 x { i32, void ()*, i8* }] zeroinitializer
56+
57+
; Function Attrs: convergent nounwind
58+
define void @depthPyramid(%struct._depth_2d_t.191 addrspace(1)* %0, %struct._texture_2d_t.192 addrspace(1)* %1, <4 x i32> addrspace(2)* noalias nocapture readonly dereferenceable(16) %2, <2 x i32> %3) local_unnamed_addr #0 {
59+
%5 = load <4 x i32>, <4 x i32> addrspace(2)* %2, align 16
60+
%6 = extractelement <4 x i32> %5, i64 0 ; source_width
61+
%7 = extractelement <4 x i32> %5, i64 1 ; source_height
62+
%8 = shl <2 x i32> %3, <i32 1, i32 1> ; tid * 2
63+
%9 = shufflevector <4 x i32> %5, <4 x i32> undef, <2 x i32> <i32 2, i32 3> ; inputRect.zw
64+
%10 = add <2 x i32> %9, %8 ; tid * 2 + inputRect.zw
65+
%11 = tail call fast <2 x float> @air.convert.f.v2f32.u.v2i32(<2 x i32> %10) #2 ; float2 src = float2(tid * 2 + inputRect.zw);
66+
; inDepth.sample(sam, src);
67+
%12 = tail call { float, i8 } @air.sample_depth_2d.f32(%struct._depth_2d_t.191 addrspace(1)* nocapture readonly %0, %struct._sampler_t.193 addrspace(2)* nocapture readonly bitcast (i64 addrspace(2)* @__air_sampler_state to %struct._sampler_t.193 addrspace(2)*), i32 1, <2 x float> %11, i1 true, <2 x i32> zeroinitializer, i1 false, float 0.000000e+00, float 0.000000e+00, i32 0) #3
68+
; float minval = inDepth.sample(sam, src);
69+
%13 = extractvalue { float, i8 } %12, 0
70+
; src + float2(0, 1)
71+
%14 = fadd fast <2 x float> %11, <float 0.000000e+00, float 1.000000e+00>
72+
; inDepth.sample(sam, src + float2(0, 1))
73+
%15 = tail call { float, i8 } @air.sample_depth_2d.f32(%struct._depth_2d_t.191 addrspace(1)* nocapture readonly %0, %struct._sampler_t.193 addrspace(2)* nocapture readonly bitcast (i64 addrspace(2)* @__air_sampler_state to %struct._sampler_t.193 addrspace(2)*), i32 1, <2 x float> %14, i1 true, <2 x i32> zeroinitializer, i1 false, float 0.000000e+00, float 0.000000e+00, i32 0) #3
74+
%16 = extractvalue { float, i8 } %15, 0
75+
%17 = tail call fast float @air.fast_fmax.f32(float %13, float %16) #2
76+
%18 = fadd fast <2 x float> %11, <float 1.000000e+00, float 0.000000e+00>
77+
%19 = tail call { float, i8 } @air.sample_depth_2d.f32(%struct._depth_2d_t.191 addrspace(1)* nocapture readonly %0, %struct._sampler_t.193 addrspace(2)* nocapture readonly bitcast (i64 addrspace(2)* @__air_sampler_state to %struct._sampler_t.193 addrspace(2)*), i32 1, <2 x float> %18, i1 true, <2 x i32> zeroinitializer, i1 false, float 0.000000e+00, float 0.000000e+00, i32 0) #3
78+
%20 = extractvalue { float, i8 } %19, 0
79+
%21 = tail call fast float @air.fast_fmax.f32(float %17, float %20) #2
80+
%22 = fadd fast <2 x float> %11, <float 1.000000e+00, float 1.000000e+00>
81+
%23 = tail call { float, i8 } @air.sample_depth_2d.f32(%struct._depth_2d_t.191 addrspace(1)* nocapture readonly %0, %struct._sampler_t.193 addrspace(2)* nocapture readonly bitcast (i64 addrspace(2)* @__air_sampler_state to %struct._sampler_t.193 addrspace(2)*), i32 1, <2 x float> %22, i1 true, <2 x i32> zeroinitializer, i1 false, float 0.000000e+00, float 0.000000e+00, i32 0) #3
82+
%24 = extractvalue { float, i8 } %23, 0
83+
%25 = tail call fast float @air.fast_fmax.f32(float %21, float %24) #2
84+
%26 = extractelement <2 x i32> %3, i64 0
85+
%27 = shl i32 %26, 1
86+
%28 = add i32 %6, -3
87+
%29 = icmp eq i32 %27, %28
88+
%30 = extractelement <2 x i32> %3, i64 1
89+
%31 = shl i32 %30, 1
90+
%32 = add i32 %7, -3
91+
%33 = icmp eq i32 %31, %32
92+
br i1 %29, label %34, label %43
93+
94+
34: ; preds = %4
95+
%35 = fadd fast <2 x float> %11, <float 2.000000e+00, float 0.000000e+00>
96+
%36 = tail call { float, i8 } @air.sample_depth_2d.f32(%struct._depth_2d_t.191 addrspace(1)* nocapture readonly %0, %struct._sampler_t.193 addrspace(2)* nocapture readonly bitcast (i64 addrspace(2)* @__air_sampler_state to %struct._sampler_t.193 addrspace(2)*), i32 1, <2 x float> %35, i1 true, <2 x i32> zeroinitializer, i1 false, float 0.000000e+00, float 0.000000e+00, i32 0) #3
97+
%37 = extractvalue { float, i8 } %36, 0
98+
%38 = tail call fast float @air.fast_fmax.f32(float %25, float %37) #2
99+
%39 = fadd fast <2 x float> %11, <float 2.000000e+00, float 1.000000e+00>
100+
%40 = tail call { float, i8 } @air.sample_depth_2d.f32(%struct._depth_2d_t.191 addrspace(1)* nocapture readonly %0, %struct._sampler_t.193 addrspace(2)* nocapture readonly bitcast (i64 addrspace(2)* @__air_sampler_state to %struct._sampler_t.193 addrspace(2)*), i32 1, <2 x float> %39, i1 true, <2 x i32> zeroinitializer, i1 false, float 0.000000e+00, float 0.000000e+00, i32 0) #3
101+
%41 = extractvalue { float, i8 } %40, 0
102+
%42 = tail call fast float @air.fast_fmax.f32(float %38, float %41) #2
103+
br label %43
104+
105+
43: ; preds = %34, %4
106+
%44 = phi float [ %42, %34 ], [ %25, %4 ]
107+
br i1 %33, label %45, label %54
108+
109+
45: ; preds = %43
110+
%46 = fadd fast <2 x float> %11, <float 0.000000e+00, float 2.000000e+00>
111+
%47 = tail call { float, i8 } @air.sample_depth_2d.f32(%struct._depth_2d_t.191 addrspace(1)* nocapture readonly %0, %struct._sampler_t.193 addrspace(2)* nocapture readonly bitcast (i64 addrspace(2)* @__air_sampler_state to %struct._sampler_t.193 addrspace(2)*), i32 1, <2 x float> %46, i1 true, <2 x i32> zeroinitializer, i1 false, float 0.000000e+00, float 0.000000e+00, i32 0) #3
112+
%48 = extractvalue { float, i8 } %47, 0
113+
%49 = tail call fast float @air.fast_fmax.f32(float %44, float %48) #2
114+
%50 = fadd fast <2 x float> %11, <float 1.000000e+00, float 2.000000e+00>
115+
%51 = tail call { float, i8 } @air.sample_depth_2d.f32(%struct._depth_2d_t.191 addrspace(1)* nocapture readonly %0, %struct._sampler_t.193 addrspace(2)* nocapture readonly bitcast (i64 addrspace(2)* @__air_sampler_state to %struct._sampler_t.193 addrspace(2)*), i32 1, <2 x float> %50, i1 true, <2 x i32> zeroinitializer, i1 false, float 0.000000e+00, float 0.000000e+00, i32 0) #3
116+
%52 = extractvalue { float, i8 } %51, 0
117+
%53 = tail call fast float @air.fast_fmax.f32(float %49, float %52) #2
118+
br label %54
119+
120+
54: ; preds = %45, %43
121+
%55 = phi float [ %53, %45 ], [ %44, %43 ]
122+
%56 = and i1 %29, %33
123+
br i1 %56, label %57, label %62
124+
125+
57: ; preds = %54
126+
%58 = fadd fast <2 x float> %11, <float 2.000000e+00, float 2.000000e+00>
127+
%59 = tail call { float, i8 } @air.sample_depth_2d.f32(%struct._depth_2d_t.191 addrspace(1)* nocapture readonly %0, %struct._sampler_t.193 addrspace(2)* nocapture readonly bitcast (i64 addrspace(2)* @__air_sampler_state to %struct._sampler_t.193 addrspace(2)*), i32 1, <2 x float> %58, i1 true, <2 x i32> zeroinitializer, i1 false, float 0.000000e+00, float 0.000000e+00, i32 0) #3
128+
%60 = extractvalue { float, i8 } %59, 0
129+
%61 = tail call fast float @air.fast_fmax.f32(float %55, float %60) #2
130+
br label %62
131+
132+
62: ; preds = %57, %54
133+
%63 = phi float [ %61, %57 ], [ %55, %54 ]
134+
%64 = insertelement <4 x float> undef, float %63, i32 0
135+
%65 = shufflevector <4 x float> %64, <4 x float> undef, <4 x i32> zeroinitializer
136+
tail call void @air.write_texture_2d.v4f32(%struct._texture_2d_t.192 addrspace(1)* nocapture %1, <2 x i32> %3, <4 x float> %65, i32 0, i32 2) #1
137+
ret void
138+
}
139+
140+
; Function Attrs: argmemonly nounwind
141+
declare void @air.write_texture_2d.v4f32(%struct._texture_2d_t.192 addrspace(1)* nocapture, <2 x i32>, <4 x float>, i32, i32) local_unnamed_addr #1
142+
143+
; Function Attrs: nounwind readnone
144+
declare float @air.fast_fmax.f32(float, float) local_unnamed_addr #2
145+
146+
; Function Attrs: argmemonly convergent nounwind readonly
147+
declare { float, i8 } @air.sample_depth_2d.f32(%struct._depth_2d_t.191 addrspace(1)* nocapture readonly, %struct._sampler_t.193 addrspace(2)* nocapture readonly, i32, <2 x float>, i1, <2 x i32>, i1, float, float, i32) local_unnamed_addr #3
148+
149+
; Function Attrs: nounwind readnone
150+
declare <2 x float> @air.convert.f.v2f32.u.v2i32(<2 x i32>) local_unnamed_addr #2
151+
152+
attributes #0 = { convergent nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="true" "no-jump-tables"="false" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "unsafe-fp-math"="true" "use-soft-float"="false" }
153+
attributes #1 = { argmemonly nounwind }
154+
attributes #2 = { nounwind readnone }
155+
attributes #3 = { argmemonly convergent nounwind readonly }
156+
157+
!llvm.module.flags = !{!0, !1, !2, !3}
158+
!llvm.ident = !{!4}
159+
!air.version = !{!5}
160+
!air.language_version = !{!6}
161+
!air.compile_options = !{!7, !8, !9}
162+
!air.kernel = !{!10}
163+
!air.sampler_states = !{!17}
164+
165+
!0 = !{i32 2, !"SDK Version", [2 x i32] [i32 15, i32 0]}
166+
!1 = !{i32 2, !"Dwarf Version", i32 4}
167+
!2 = !{i32 2, !"Debug Info Version", i32 3}
168+
!3 = !{i32 1, !"wchar_size", i32 4}
169+
!4 = !{!"Apple metal version 31001.325 (metalfe-31001.325)"}
170+
!5 = !{i32 2, i32 2, i32 0}
171+
!6 = !{!"Metal", i32 2, i32 2, i32 0}
172+
!7 = !{!"air.compile.denorms_disable"}
173+
!8 = !{!"air.compile.fast_math_enable"}
174+
!9 = !{!"air.compile.framebuffer_fetch_enable"}
175+
!10 = !{void (%struct._depth_2d_t.191 addrspace(1)*, %struct._texture_2d_t.192 addrspace(1)*, <4 x i32> addrspace(2)*, <2 x i32>)* @depthPyramid, !11, !12}
176+
!11 = !{}
177+
!12 = !{!13, !14, !15, !16}
178+
!13 = !{i32 0, !"air.texture", !"air.location_index", i32 0, i32 1, !"air.sample", !"air.arg_type_name", !"depth2d<float, sample>", !"air.arg_name", !"inDepth"}
179+
!14 = !{i32 1, !"air.texture", !"air.location_index", i32 1, i32 1, !"air.write", !"air.arg_type_name", !"texture2d<float, write>", !"air.arg_name", !"outDepth"}
180+
!15 = !{i32 2, !"air.buffer", !"air.buffer_size", i32 16, !"air.location_index", i32 2, i32 1, !"air.read", !"air.arg_type_size", i32 16, !"air.arg_type_align_size", i32 16, !"air.arg_type_name", !"uint4", !"air.arg_name", !"inputRect"}
181+
!16 = !{i32 3, !"air.thread_position_in_grid", !"air.arg_type_name", !"uint2", !"air.arg_name", !"tid"}
182+
!17 = !{!"air.sampler_state", i64 addrspace(2)* @__air_sampler_state}
183+
```

0 commit comments

Comments
 (0)