Skip to content

[SPIR-V] Compiling with doubles may also implicitly enable Int64 capability (which may or may not be supported) #7038

@Nielsbishere

Description

@Nielsbishere

Description
When loading and adding a 64-bit float from a BAB it produces 64-bit int instructions as well, even though not used in the source.

Steps to Reproduce

typedef uint U32; typedef float F32; typedef double3 F64x3; typedef float3 F32x3; typedef uint4 U32x4; static const U32 ResourceId_mask = (1 << 17) - 1; struct TransformPreciseDouble {//Stride 8, Length 24	F64x3 pos; }; struct TransformImprecise {//Stride 4, Length 12	F32x3 pos;//Relative to the camera origin }; enum EResourceBinding {	EResourceBinding_ObjectTransformsLocal,	EResourceBinding_ObjectTransformsGlobal, EResourceBinding_ObjectTransformsLocalRW }; #ifdef __spirv__  #define _binding(a, b, ...) [[vk::binding(a, b)]] __VA_ARGS__ #define _vkBinding(a, b) [[vk::binding(a, b)]] #else  #define _binding(a, b, ...) __VA_ARGS__ : register(space##a) #define _vkBinding(a, b) #endif _vkBinding( 0, 2) cbuffer globals {//Globals used during the entire frame for useful information such as frame id.	U32 _frameId;//Can loop back to 0 after U32_MAX!	F32 _time;//Time since launch of app	F32 _deltaTime;//deltaTime since last frame.	U32 _swapchainCount;//How many swapchains are present (will insert ids into appData)	U32x4 _swapchains[8];//Descriptors of swapchains: (Read, write)[2][8] //Up to 368 bytes of user data, useful for supplying constant per frame data. //Make sure to offset to make sure.	U32x4 _appData[23]; }; U32 getAppData1u(U32 offset) { return offset >= 92 ? 0 : _appData[offset >> 2][offset & 3]; } #define rwBufferUniform(i) _rwBuffer[i & ResourceId_mask] #define bufferUniform(i) _buffer[i & ResourceId_mask] _binding( 3, 1, ByteAddressBuffer _buffer[131072]); _binding( 4, 1, RWByteAddressBuffer _rwBuffer[131072]); template<typename T> void setAtUniform(U32 resourceId, U32 id, T t) { rwBufferUniform(resourceId).Store<T>(id, t); } template<typename T> T getAtUniform(U32 resourceId, U32 id) { return bufferUniform(resourceId).Load<T>(id); } U32 bufferBytesUniform(U32 resourceId) {	U32 bytes; bufferUniform(resourceId).GetDimensions(bytes); return bytes; } [[oxc::stage("compute")]] [[oxc::extension("I64", "F64")]]//Loading F64 might incurr I64 instructions in spirv [numthreads(256, 1, 1)] void main(U32 i : SV_DispatchThreadID) {	U32 objectTransformGlobal = getAppData1u(EResourceBinding_ObjectTransformsGlobal);	U32 bytes = bufferBytesUniform(objectTransformGlobal);	TransformImprecise result;	{	U32 elems = bytes / sizeof(TransformPreciseDouble); if(i + 1 >= elems) return;	TransformPreciseDouble cam = getAtUniform<TransformPreciseDouble>(objectTransformGlobal, 0);	TransformPreciseDouble obj = getAtUniform<TransformPreciseDouble>(objectTransformGlobal, i + 1);	result.pos = (float3)(obj.pos - cam.pos);	}	U32 objectTransformLocal = getAppData1u(EResourceBinding_ObjectTransformsLocalRW); setAtUniform(objectTransformLocal, i + 1, result); }

Turns into roughly:

Section has 20486 bytes. Showing offset #0 with size 20486 File contents: (ascii) ; SPIR-V ; Version: 1.5 ; Generator: Google spiregg; 0 ; Bound: 138 ; Schema: 0 OpCapability Shader ; 0x00000014 OpCapability Float64 ; 0x0000001c OpCapability Int64 ; 0x00000024 OpMemoryModel Logical GLSL450 ; 0x0000002c OpEntryPoint GLCompute %1 "main" %gl_GlobalInvocationID %3 %4 %5 ; 0x00000038 OpExecutionMode %1 LocalSize 256 1 1 ; 0x0000005c ; Annotations OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId ; 0x00000074 OpDecorate %3 DescriptorSet 1 ; 0x00000084 OpDecorate %3 Binding 3 ; 0x00000094 OpDecorate %4 DescriptorSet 1 ; 0x000000a4 OpDecorate %4 Binding 4 ; 0x000000b4 OpDecorate %5 DescriptorSet 2 ; 0x000000c4 OpDecorate %5 Binding 0 ; 0x000000d4 OpDecorate %_runtimearr_uint ArrayStride 4 ; 0x000000e4 OpMemberDecorate %_struct_6 0 Offset 0 ; 0x000000f4 OpMemberDecorate %_struct_6 0 NonWritable ; 0x00000108 OpDecorate %_struct_6 Block ; 0x00000118 OpMemberDecorate %_struct_7 0 Offset 0 ; 0x00000124 OpDecorate %_struct_7 Block ; 0x00000138 OpDecorate %_arr_v4uint_uint_8 ArrayStride 16 ; 0x00000144 OpDecorate %_arr_v4uint_uint_23 ArrayStride 16 ; 0x00000154 OpMemberDecorate %_struct_8 0 Offset 0 ; 0x00000164 OpMemberDecorate %_struct_8 1 Offset 4 ; 0x00000178 OpMemberDecorate %_struct_8 2 Offset 8 ; 0x0000018c OpMemberDecorate %_struct_8 3 Offset 12 ; 0x000001a0 OpMemberDecorate %_struct_8 4 Offset 16 ; 0x000001b4 OpMemberDecorate %_struct_8 5 Offset 144 ; 0x000001c8 OpDecorate %_struct_8 Block ; 0x000001dc ; Types, variables and constants %int = OpTypeInt 32 1 ; 0x000001e8 %int_5 = OpConstant %int 5 ; 0x000001f8 %float = OpTypeFloat 32 ; 0x00000208 %uint = OpTypeInt 32 0 ; 0x00000214 %uint_131071 = OpConstant %uint 131071 ; 0x00000224 %uint_24 = OpConstant %uint 24 ; 0x00000234 %uint_1 = OpConstant %uint 1 ; 0x00000244 %uint_0 = OpConstant %uint 0 ; 0x00000254 %uint_2 = OpConstant %uint 2 ; 0x00000264 %uint_3 = OpConstant %uint 3 ; 0x00000274 %uint_4 = OpConstant %uint 4 ; 0x00000284 %uint_32 = OpConstant %uint 32 ; 0x00000294 %uint_131072 = OpConstant %uint 131072 ; 0x000002a4 %_runtimearr_uint = OpTypeRuntimeArray %uint ; 0x000002b4, ArrayStride 4 %_struct_6 = OpTypeStruct %_runtimearr_uint ; 0x000002c0, Block %_arr__struct_6_uint_131072 = OpTypeArray %_struct_6 %uint_131072 ; 0x000002cc %_ptr_StorageBuffer__arr__struct_6_uint_131072 = OpTypePointer StorageBuffer %_arr__struct_6_uint_131072 ; 0x000002dc %_struct_7 = OpTypeStruct %_runtimearr_uint ; 0x000002ec, Block %_arr__struct_7_uint_131072 = OpTypeArray %_struct_7 %uint_131072 ; 0x000002f8 %_ptr_StorageBuffer__arr__struct_7_uint_131072 = OpTypePointer StorageBuffer %_arr__struct_7_uint_131072 ; 0x00000308 %uint_8 = OpConstant %uint 8 ; 0x00000318 %v4uint = OpTypeVector %uint 4 ; 0x00000328 %_arr_v4uint_uint_8 = OpTypeArray %v4uint %uint_8 ; 0x00000338, ArrayStride 16 %uint_23 = OpConstant %uint 23 ; 0x00000348 %_arr_v4uint_uint_23 = OpTypeArray %v4uint %uint_23 ; 0x00000358, ArrayStride 16 %_struct_8 = OpTypeStruct %uint %float %float %uint %_arr_v4uint_uint_8 %_arr_v4uint_uint_23 ; 0x00000368, Block %_ptr_Uniform__struct_8 = OpTypePointer Uniform %_struct_8 ; 0x00000388 %v3uint = OpTypeVector %uint 3 ; 0x00000398 %_ptr_Input_v3uint = OpTypePointer Input %v3uint ; 0x000003a8 %void = OpTypeVoid ; 0x000003b8 %36 = OpTypeFunction %void ; 0x000003c0 %v3float = OpTypeVector %float 3 ; 0x000003cc %double = OpTypeFloat 64 ; 0x000003dc %v3double = OpTypeVector %double 3 ; 0x000003e8 %bool = OpTypeBool ; 0x000003f8 %_ptr_Uniform_uint = OpTypePointer Uniform %uint ; 0x00000400 %_ptr_StorageBuffer__struct_6 = OpTypePointer StorageBuffer %_struct_6 ; 0x00000410 %_ptr_StorageBuffer_uint = OpTypePointer StorageBuffer %uint ; 0x00000420 %ulong = OpTypeInt 64 0 ; 0x00000430 %3 = OpVariable %_ptr_StorageBuffer__arr__struct_6_uint_131072 StorageBuffer ; 0x00000440, DescriptorSet 1, Binding 3 %4 = OpVariable %_ptr_StorageBuffer__arr__struct_7_uint_131072 StorageBuffer ; 0x00000450, DescriptorSet 1, Binding 4 %5 = OpVariable %_ptr_Uniform__struct_8 Uniform ; 0x00000460, DescriptorSet 2, Binding 0 %gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input ; 0x00000470, BuiltIn GlobalInvocationId %uint_5 = OpConstant %uint 5 ; 0x00000480 ; Function 1 %1 = OpFunction %void None %36 ; 0x00000490 %46 = OpLabel ; 0x000004a4 %47 = OpLoad %v3uint %gl_GlobalInvocationID ; 0x000004ac %48 = OpCompositeExtract %uint %47 0 ; 0x000004bc OpSelectionMerge %49 None ; 0x000004d0 OpSwitch %uint_0 %50 ; 0x000004dc %50 = OpLabel ; 0x000004e8 %51 = OpAccessChain %_ptr_Uniform_uint %5 %int_5 %uint_4 %uint_3 ; 0x000004f0 %52 = OpLoad %uint %51 ; 0x0000050c %53 = OpBitwiseAnd %uint %52 %uint_131071 ; 0x0000051c %54 = OpAccessChain %_ptr_StorageBuffer__struct_6 %3 %53 ; 0x00000530 %55 = OpArrayLength %uint %54 0 ; 0x00000544 %56 = OpIMul %uint %55 %uint_4 ; 0x00000558 %57 = OpUDiv %uint %56 %uint_24 ; 0x0000056c %58 = OpIAdd %uint %48 %uint_1 ; 0x00000580 %59 = OpUGreaterThanEqual %bool %58 %57 ; 0x00000594 OpSelectionMerge %60 None ; 0x000005a8 OpBranchConditional %59 %61 %60 ; 0x000005b4 %61 = OpLabel ; 0x000005c4 OpBranch %49 ; 0x000005cc %60 = OpLabel ; 0x000005d4 %62 = OpAccessChain %_ptr_StorageBuffer_uint %3 %53 %uint_0 %uint_0 ; 0x000005dc %63 = OpLoad %uint %62 ; 0x000005f8 %64 = OpAccessChain %_ptr_StorageBuffer_uint %3 %53 %uint_0 %uint_1 ; 0x00000608 %65 = OpLoad %uint %64 ; 0x00000624 %66 = OpUConvert %ulong %63 ; 0x00000634 %67 = OpUConvert %ulong %65 ; 0x00000644 %68 = OpShiftLeftLogical %ulong %67 %uint_32 ; 0x00000654 %69 = OpBitwiseOr %ulong %66 %68 ; 0x00000668 %70 = OpBitcast %double %69 ; 0x0000067c %71 = OpAccessChain %_ptr_StorageBuffer_uint %3 %53 %uint_0 %uint_2 ; 0x0000068c %72 = OpLoad %uint %71 ; 0x000006a8 %73 = OpAccessChain %_ptr_StorageBuffer_uint %3 %53 %uint_0 %uint_3 ; 0x000006b8 %74 = OpLoad %uint %73 ; 0x000006d4 %75 = OpUConvert %ulong %72 ; 0x000006e4 %76 = OpUConvert %ulong %74 ; 0x000006f4 %77 = OpShiftLeftLogical %ulong %76 %uint_32 ; 0x00000704 %78 = OpBitwiseOr %ulong %75 %77 ; 0x00000718 %79 = OpBitcast %double %78 ; 0x0000072c %80 = OpAccessChain %_ptr_StorageBuffer_uint %3 %53 %uint_0 %uint_4 ; 0x0000073c %81 = OpLoad %uint %80 ; 0x00000758 %82 = OpAccessChain %_ptr_StorageBuffer_uint %3 %53 %uint_0 %uint_5 ; 0x00000768 %83 = OpLoad %uint %82 ; 0x00000784 %84 = OpUConvert %ulong %81 ; 0x00000794 %85 = OpUConvert %ulong %83 ; 0x000007a4 %86 = OpShiftLeftLogical %ulong %85 %uint_32 ; 0x000007b4 %87 = OpBitwiseOr %ulong %84 %86 ; 0x000007c8 %88 = OpBitcast %double %87 ; 0x000007dc %89 = OpCompositeConstruct %v3double %70 %79 %88 ; 0x000007ec %90 = OpShiftRightLogical %uint %58 %uint_2 ; 0x00000804 %91 = OpAccessChain %_ptr_StorageBuffer_uint %3 %53 %uint_0 %90 ; 0x00000818 %92 = OpLoad %uint %91 ; 0x00000834 %93 = OpIAdd %uint %90 %uint_1 ; 0x00000844 %94 = OpAccessChain %_ptr_StorageBuffer_uint %3 %53 %uint_0 %93 ; 0x00000858 %95 = OpLoad %uint %94 ; 0x00000874 %96 = OpUConvert %ulong %92 ; 0x00000884 %97 = OpUConvert %ulong %95 ; 0x00000894 %98 = OpShiftLeftLogical %ulong %97 %uint_32 ; 0x000008a4 %99 = OpBitwiseOr %ulong %96 %98 ; 0x000008b8 %100 = OpBitcast %double %99 ; 0x000008cc %101 = OpIAdd %uint %90 %uint_2 ; 0x000008dc %102 = OpAccessChain %_ptr_StorageBuffer_uint %3 %53 %uint_0 %101 ; 0x000008f0 %103 = OpLoad %uint %102 ; 0x0000090c %104 = OpIAdd %uint %90 %uint_3 ; 0x0000091c %105 = OpAccessChain %_ptr_StorageBuffer_uint %3 %53 %uint_0 %104 ; 0x00000930 %106 = OpLoad %uint %105 ; 0x0000094c %107 = OpUConvert %ulong %103 ; 0x0000095c %108 = OpUConvert %ulong %106 ; 0x0000096c %109 = OpShiftLeftLogical %ulong %108 %uint_32 ; 0x0000097c %110 = OpBitwiseOr %ulong %107 %109 ; 0x00000990 %111 = OpBitcast %double %110 ; 0x000009a4 %112 = OpIAdd %uint %90 %uint_4 ; 0x000009b4 %113 = OpAccessChain %_ptr_StorageBuffer_uint %3 %53 %uint_0 %112 ; 0x000009c8 %114 = OpLoad %uint %113 ; 0x000009e4 %115 = OpIAdd %uint %90 %uint_5 ; 0x000009f4 %116 = OpAccessChain %_ptr_StorageBuffer_uint %3 %53 %uint_0 %115 ; 0x00000a08 %117 = OpLoad %uint %116 ; 0x00000a24 %118 = OpUConvert %ulong %114 ; 0x00000a34 %119 = OpUConvert %ulong %117 ; 0x00000a44 %120 = OpShiftLeftLogical %ulong %119 %uint_32 ; 0x00000a54 %121 = OpBitwiseOr %ulong %118 %120 ; 0x00000a68 %122 = OpBitcast %double %121 ; 0x00000a7c %123 = OpCompositeConstruct %v3double %100 %111 %122 ; 0x00000a8c %124 = OpFSub %v3double %123 %89 ; 0x00000aa4 %125 = OpFConvert %v3float %124 ; 0x00000ab8 %126 = OpAccessChain %_ptr_Uniform_uint %5 %int_5 %uint_5 %uint_0 ; 0x00000ac8 %127 = OpLoad %uint %126 ; 0x00000ae4 %128 = OpBitwiseAnd %uint %127 %uint_131071 ; 0x00000af4 %129 = OpCompositeExtract %float %125 0 ; 0x00000b08 %130 = OpCompositeExtract %float %125 1 ; 0x00000b1c %131 = OpCompositeExtract %float %125 2 ; 0x00000b30 %132 = OpAccessChain %_ptr_StorageBuffer_uint %4 %128 %uint_0 %90 ; 0x00000b44 %133 = OpBitcast %uint %129 ; 0x00000b60 OpStore %132 %133 ; 0x00000b70 %134 = OpAccessChain %_ptr_StorageBuffer_uint %4 %128 %uint_0 %93 ; 0x00000b7c %135 = OpBitcast %uint %130 ; 0x00000b98 OpStore %134 %135 ; 0x00000ba8 %136 = OpAccessChain %_ptr_StorageBuffer_uint %4 %128 %uint_0 %101 ; 0x00000bb4 %137 = OpBitcast %uint %131 ; 0x00000bd0 OpStore %136 %137 ; 0x00000be0 OpBranch %49 ; 0x00000bec %49 = OpLabel ; 0x00000bf4 OpReturn ; 0x00000bfc OpFunctionEnd ; 0x00000c00 

E.g. %66 = OpUConvert %ulong %63 ; 0x00000634 is emitted before working on the double, requiring the capability.

Actual Behavior
Just like DXIL don't start enabling capabilities that aren't used by the shader (DXIL only enables the 64-bit float extension rather than both).
From the D3D12 database I can't find a device that doesn't have both I64 and F64, but the vulkan database is offline so I'm not sure if any such device exists for Vulkan (for example some mobile device).

Environment

  • DXC version: Ahead of latest release or the version in shader playground (2024-04-29)
  • Host Operating System: N/A

Metadata

Metadata

Assignees

Labels

bugBug, regression, crashspirvWork related to SPIR-V

Type

No type

Projects

Status

Triaged

Milestone

No milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions