|
| 1 | +# MetalLib翻译到SPIRV |
| 2 | + |
| 3 | +Apple Metal使用的LLVM IR是表达能力非常**完备**的中间表示语言。 |
| 4 | + |
| 5 | +```cpp |
| 6 | +#import <metal_stdlib> |
| 7 | + |
| 8 | +using namespace metal; |
| 9 | + |
| 10 | +// Calculates a slice of a depth pyramid from a higher resolution slice |
| 11 | +// Handles downsampling from odd sized depth textures. |
| 12 | +kernel void depthPyramid(depth2d<float, access::sample> inDepth [[texture(0)]], |
| 13 | + texture2d<float, access::write> outDepth [[texture(1)]], |
| 14 | + constant uint4& inputRect [[buffer(2)]], |
| 15 | + uint2 tid [[thread_position_in_grid]]) |
| 16 | +{ |
| 17 | + constexpr sampler sam (min_filter::nearest, mag_filter::nearest, coord::pixel); |
| 18 | + uint source_width = inputRect.x; |
| 19 | + uint source_height = inputRect.y; |
| 20 | + float2 src = float2(tid * 2 + inputRect.zw); |
| 21 | + |
| 22 | + float minval = inDepth.sample(sam, src); |
| 23 | + minval = max(minval, inDepth.sample(sam, src + float2(0, 1))); |
| 24 | + minval = max(minval, inDepth.sample(sam, src + float2(1, 0))); |
| 25 | + minval = max(minval, inDepth.sample(sam, src + float2(1, 1))); |
| 26 | + bool edge_x = (tid.x * 2 == source_width - 3); |
| 27 | + bool edge_y = (tid.y * 2 == source_height - 3); |
| 28 | + |
| 29 | + if (edge_x) |
| 30 | + { |
| 31 | + minval = max(minval, inDepth.sample(sam, src + float2(2, 0))); |
| 32 | + minval = max(minval, inDepth.sample(sam, src + float2(2, 1))); |
| 33 | + } |
| 34 | + if (edge_y) |
| 35 | + { |
| 36 | + minval = max(minval, inDepth.sample(sam, src + float2(0, 2))); |
| 37 | + minval = max(minval, inDepth.sample(sam, src + float2(1, 2))); |
| 38 | + } |
| 39 | + if (edge_x && edge_y) minval = max(minval, inDepth.sample(sam, src + float2(2, 2))); |
| 40 | + |
| 41 | + outDepth.write(float4(minval), tid); |
| 42 | +} |
| 43 | +``` |
| 44 | +
|
| 45 | +```c |
| 46 | +source_filename = "depthPyramid" |
| 47 | +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64-v96:128:128-v128:128:128-v192:256:256-v256:256:256-v512:512:512-v1024:1024:1024-n8:16:32" |
| 48 | +target triple = "air64-apple-ios13.0.0" |
| 49 | +
|
| 50 | +%struct._depth_2d_t.191 = type opaque |
| 51 | +%struct._texture_2d_t.192 = type opaque |
| 52 | +%struct._sampler_t.193 = type opaque |
| 53 | +
|
| 54 | +@__air_sampler_state = internal addrspace(2) constant i64 -9188470239253725111, align 8 |
| 55 | +@llvm.global_ctors = appending global [0 x { i32, void ()*, i8* }] zeroinitializer |
| 56 | +
|
| 57 | +; Function Attrs: convergent nounwind |
| 58 | +define void @depthPyramid(%struct._depth_2d_t.191 addrspace(1)* %0, %struct._texture_2d_t.192 addrspace(1)* %1, <4 x i32> addrspace(2)* noalias nocapture readonly dereferenceable(16) %2, <2 x i32> %3) local_unnamed_addr #0 { |
| 59 | + %5 = load <4 x i32>, <4 x i32> addrspace(2)* %2, align 16 |
| 60 | + %6 = extractelement <4 x i32> %5, i64 0 ; source_width |
| 61 | + %7 = extractelement <4 x i32> %5, i64 1 ; source_height |
| 62 | + %8 = shl <2 x i32> %3, <i32 1, i32 1> ; tid * 2 |
| 63 | + %9 = shufflevector <4 x i32> %5, <4 x i32> undef, <2 x i32> <i32 2, i32 3> ; inputRect.zw |
| 64 | + %10 = add <2 x i32> %9, %8 ; tid * 2 + inputRect.zw |
| 65 | + %11 = tail call fast <2 x float> @air.convert.f.v2f32.u.v2i32(<2 x i32> %10) #2 ; float2 src = float2(tid * 2 + inputRect.zw); |
| 66 | + ; inDepth.sample(sam, src); |
| 67 | + %12 = tail call { float, i8 } @air.sample_depth_2d.f32(%struct._depth_2d_t.191 addrspace(1)* nocapture readonly %0, %struct._sampler_t.193 addrspace(2)* nocapture readonly bitcast (i64 addrspace(2)* @__air_sampler_state to %struct._sampler_t.193 addrspace(2)*), i32 1, <2 x float> %11, i1 true, <2 x i32> zeroinitializer, i1 false, float 0.000000e+00, float 0.000000e+00, i32 0) #3 |
| 68 | + ; float minval = inDepth.sample(sam, src); |
| 69 | + %13 = extractvalue { float, i8 } %12, 0 |
| 70 | + ; src + float2(0, 1) |
| 71 | + %14 = fadd fast <2 x float> %11, <float 0.000000e+00, float 1.000000e+00> |
| 72 | + ; inDepth.sample(sam, src + float2(0, 1)) |
| 73 | + %15 = tail call { float, i8 } @air.sample_depth_2d.f32(%struct._depth_2d_t.191 addrspace(1)* nocapture readonly %0, %struct._sampler_t.193 addrspace(2)* nocapture readonly bitcast (i64 addrspace(2)* @__air_sampler_state to %struct._sampler_t.193 addrspace(2)*), i32 1, <2 x float> %14, i1 true, <2 x i32> zeroinitializer, i1 false, float 0.000000e+00, float 0.000000e+00, i32 0) #3 |
| 74 | + %16 = extractvalue { float, i8 } %15, 0 |
| 75 | + %17 = tail call fast float @air.fast_fmax.f32(float %13, float %16) #2 |
| 76 | + %18 = fadd fast <2 x float> %11, <float 1.000000e+00, float 0.000000e+00> |
| 77 | + %19 = tail call { float, i8 } @air.sample_depth_2d.f32(%struct._depth_2d_t.191 addrspace(1)* nocapture readonly %0, %struct._sampler_t.193 addrspace(2)* nocapture readonly bitcast (i64 addrspace(2)* @__air_sampler_state to %struct._sampler_t.193 addrspace(2)*), i32 1, <2 x float> %18, i1 true, <2 x i32> zeroinitializer, i1 false, float 0.000000e+00, float 0.000000e+00, i32 0) #3 |
| 78 | + %20 = extractvalue { float, i8 } %19, 0 |
| 79 | + %21 = tail call fast float @air.fast_fmax.f32(float %17, float %20) #2 |
| 80 | + %22 = fadd fast <2 x float> %11, <float 1.000000e+00, float 1.000000e+00> |
| 81 | + %23 = tail call { float, i8 } @air.sample_depth_2d.f32(%struct._depth_2d_t.191 addrspace(1)* nocapture readonly %0, %struct._sampler_t.193 addrspace(2)* nocapture readonly bitcast (i64 addrspace(2)* @__air_sampler_state to %struct._sampler_t.193 addrspace(2)*), i32 1, <2 x float> %22, i1 true, <2 x i32> zeroinitializer, i1 false, float 0.000000e+00, float 0.000000e+00, i32 0) #3 |
| 82 | + %24 = extractvalue { float, i8 } %23, 0 |
| 83 | + %25 = tail call fast float @air.fast_fmax.f32(float %21, float %24) #2 |
| 84 | + %26 = extractelement <2 x i32> %3, i64 0 |
| 85 | + %27 = shl i32 %26, 1 |
| 86 | + %28 = add i32 %6, -3 |
| 87 | + %29 = icmp eq i32 %27, %28 |
| 88 | + %30 = extractelement <2 x i32> %3, i64 1 |
| 89 | + %31 = shl i32 %30, 1 |
| 90 | + %32 = add i32 %7, -3 |
| 91 | + %33 = icmp eq i32 %31, %32 |
| 92 | + br i1 %29, label %34, label %43 |
| 93 | +
|
| 94 | +34: ; preds = %4 |
| 95 | + %35 = fadd fast <2 x float> %11, <float 2.000000e+00, float 0.000000e+00> |
| 96 | + %36 = tail call { float, i8 } @air.sample_depth_2d.f32(%struct._depth_2d_t.191 addrspace(1)* nocapture readonly %0, %struct._sampler_t.193 addrspace(2)* nocapture readonly bitcast (i64 addrspace(2)* @__air_sampler_state to %struct._sampler_t.193 addrspace(2)*), i32 1, <2 x float> %35, i1 true, <2 x i32> zeroinitializer, i1 false, float 0.000000e+00, float 0.000000e+00, i32 0) #3 |
| 97 | + %37 = extractvalue { float, i8 } %36, 0 |
| 98 | + %38 = tail call fast float @air.fast_fmax.f32(float %25, float %37) #2 |
| 99 | + %39 = fadd fast <2 x float> %11, <float 2.000000e+00, float 1.000000e+00> |
| 100 | + %40 = tail call { float, i8 } @air.sample_depth_2d.f32(%struct._depth_2d_t.191 addrspace(1)* nocapture readonly %0, %struct._sampler_t.193 addrspace(2)* nocapture readonly bitcast (i64 addrspace(2)* @__air_sampler_state to %struct._sampler_t.193 addrspace(2)*), i32 1, <2 x float> %39, i1 true, <2 x i32> zeroinitializer, i1 false, float 0.000000e+00, float 0.000000e+00, i32 0) #3 |
| 101 | + %41 = extractvalue { float, i8 } %40, 0 |
| 102 | + %42 = tail call fast float @air.fast_fmax.f32(float %38, float %41) #2 |
| 103 | + br label %43 |
| 104 | +
|
| 105 | +43: ; preds = %34, %4 |
| 106 | + %44 = phi float [ %42, %34 ], [ %25, %4 ] |
| 107 | + br i1 %33, label %45, label %54 |
| 108 | +
|
| 109 | +45: ; preds = %43 |
| 110 | + %46 = fadd fast <2 x float> %11, <float 0.000000e+00, float 2.000000e+00> |
| 111 | + %47 = tail call { float, i8 } @air.sample_depth_2d.f32(%struct._depth_2d_t.191 addrspace(1)* nocapture readonly %0, %struct._sampler_t.193 addrspace(2)* nocapture readonly bitcast (i64 addrspace(2)* @__air_sampler_state to %struct._sampler_t.193 addrspace(2)*), i32 1, <2 x float> %46, i1 true, <2 x i32> zeroinitializer, i1 false, float 0.000000e+00, float 0.000000e+00, i32 0) #3 |
| 112 | + %48 = extractvalue { float, i8 } %47, 0 |
| 113 | + %49 = tail call fast float @air.fast_fmax.f32(float %44, float %48) #2 |
| 114 | + %50 = fadd fast <2 x float> %11, <float 1.000000e+00, float 2.000000e+00> |
| 115 | + %51 = tail call { float, i8 } @air.sample_depth_2d.f32(%struct._depth_2d_t.191 addrspace(1)* nocapture readonly %0, %struct._sampler_t.193 addrspace(2)* nocapture readonly bitcast (i64 addrspace(2)* @__air_sampler_state to %struct._sampler_t.193 addrspace(2)*), i32 1, <2 x float> %50, i1 true, <2 x i32> zeroinitializer, i1 false, float 0.000000e+00, float 0.000000e+00, i32 0) #3 |
| 116 | + %52 = extractvalue { float, i8 } %51, 0 |
| 117 | + %53 = tail call fast float @air.fast_fmax.f32(float %49, float %52) #2 |
| 118 | + br label %54 |
| 119 | +
|
| 120 | +54: ; preds = %45, %43 |
| 121 | + %55 = phi float [ %53, %45 ], [ %44, %43 ] |
| 122 | + %56 = and i1 %29, %33 |
| 123 | + br i1 %56, label %57, label %62 |
| 124 | +
|
| 125 | +57: ; preds = %54 |
| 126 | + %58 = fadd fast <2 x float> %11, <float 2.000000e+00, float 2.000000e+00> |
| 127 | + %59 = tail call { float, i8 } @air.sample_depth_2d.f32(%struct._depth_2d_t.191 addrspace(1)* nocapture readonly %0, %struct._sampler_t.193 addrspace(2)* nocapture readonly bitcast (i64 addrspace(2)* @__air_sampler_state to %struct._sampler_t.193 addrspace(2)*), i32 1, <2 x float> %58, i1 true, <2 x i32> zeroinitializer, i1 false, float 0.000000e+00, float 0.000000e+00, i32 0) #3 |
| 128 | + %60 = extractvalue { float, i8 } %59, 0 |
| 129 | + %61 = tail call fast float @air.fast_fmax.f32(float %55, float %60) #2 |
| 130 | + br label %62 |
| 131 | +
|
| 132 | +62: ; preds = %57, %54 |
| 133 | + %63 = phi float [ %61, %57 ], [ %55, %54 ] |
| 134 | + %64 = insertelement <4 x float> undef, float %63, i32 0 |
| 135 | + %65 = shufflevector <4 x float> %64, <4 x float> undef, <4 x i32> zeroinitializer |
| 136 | + tail call void @air.write_texture_2d.v4f32(%struct._texture_2d_t.192 addrspace(1)* nocapture %1, <2 x i32> %3, <4 x float> %65, i32 0, i32 2) #1 |
| 137 | + ret void |
| 138 | +} |
| 139 | +
|
| 140 | +; Function Attrs: argmemonly nounwind |
| 141 | +declare void @air.write_texture_2d.v4f32(%struct._texture_2d_t.192 addrspace(1)* nocapture, <2 x i32>, <4 x float>, i32, i32) local_unnamed_addr #1 |
| 142 | +
|
| 143 | +; Function Attrs: nounwind readnone |
| 144 | +declare float @air.fast_fmax.f32(float, float) local_unnamed_addr #2 |
| 145 | +
|
| 146 | +; Function Attrs: argmemonly convergent nounwind readonly |
| 147 | +declare { float, i8 } @air.sample_depth_2d.f32(%struct._depth_2d_t.191 addrspace(1)* nocapture readonly, %struct._sampler_t.193 addrspace(2)* nocapture readonly, i32, <2 x float>, i1, <2 x i32>, i1, float, float, i32) local_unnamed_addr #3 |
| 148 | +
|
| 149 | +; Function Attrs: nounwind readnone |
| 150 | +declare <2 x float> @air.convert.f.v2f32.u.v2i32(<2 x i32>) local_unnamed_addr #2 |
| 151 | +
|
| 152 | +attributes #0 = { convergent nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="true" "no-jump-tables"="false" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "unsafe-fp-math"="true" "use-soft-float"="false" } |
| 153 | +attributes #1 = { argmemonly nounwind } |
| 154 | +attributes #2 = { nounwind readnone } |
| 155 | +attributes #3 = { argmemonly convergent nounwind readonly } |
| 156 | +
|
| 157 | +!llvm.module.flags = !{!0, !1, !2, !3} |
| 158 | +!llvm.ident = !{!4} |
| 159 | +!air.version = !{!5} |
| 160 | +!air.language_version = !{!6} |
| 161 | +!air.compile_options = !{!7, !8, !9} |
| 162 | +!air.kernel = !{!10} |
| 163 | +!air.sampler_states = !{!17} |
| 164 | +
|
| 165 | +!0 = !{i32 2, !"SDK Version", [2 x i32] [i32 15, i32 0]} |
| 166 | +!1 = !{i32 2, !"Dwarf Version", i32 4} |
| 167 | +!2 = !{i32 2, !"Debug Info Version", i32 3} |
| 168 | +!3 = !{i32 1, !"wchar_size", i32 4} |
| 169 | +!4 = !{!"Apple metal version 31001.325 (metalfe-31001.325)"} |
| 170 | +!5 = !{i32 2, i32 2, i32 0} |
| 171 | +!6 = !{!"Metal", i32 2, i32 2, i32 0} |
| 172 | +!7 = !{!"air.compile.denorms_disable"} |
| 173 | +!8 = !{!"air.compile.fast_math_enable"} |
| 174 | +!9 = !{!"air.compile.framebuffer_fetch_enable"} |
| 175 | +!10 = !{void (%struct._depth_2d_t.191 addrspace(1)*, %struct._texture_2d_t.192 addrspace(1)*, <4 x i32> addrspace(2)*, <2 x i32>)* @depthPyramid, !11, !12} |
| 176 | +!11 = !{} |
| 177 | +!12 = !{!13, !14, !15, !16} |
| 178 | +!13 = !{i32 0, !"air.texture", !"air.location_index", i32 0, i32 1, !"air.sample", !"air.arg_type_name", !"depth2d<float, sample>", !"air.arg_name", !"inDepth"} |
| 179 | +!14 = !{i32 1, !"air.texture", !"air.location_index", i32 1, i32 1, !"air.write", !"air.arg_type_name", !"texture2d<float, write>", !"air.arg_name", !"outDepth"} |
| 180 | +!15 = !{i32 2, !"air.buffer", !"air.buffer_size", i32 16, !"air.location_index", i32 2, i32 1, !"air.read", !"air.arg_type_size", i32 16, !"air.arg_type_align_size", i32 16, !"air.arg_type_name", !"uint4", !"air.arg_name", !"inputRect"} |
| 181 | +!16 = !{i32 3, !"air.thread_position_in_grid", !"air.arg_type_name", !"uint2", !"air.arg_name", !"tid"} |
| 182 | +!17 = !{!"air.sampler_state", i64 addrspace(2)* @__air_sampler_state} |
| 183 | +``` |
0 commit comments