From 4c34d2736429bbc658c4d5ce511ce4e84a9b2743 Mon Sep 17 00:00:00 2001 From: Lucie Choi Date: Fri, 13 Feb 2026 20:28:27 -0800 Subject: [PATCH 01/13] Gather, CalculateLevelOfDetailUnclamped --- .../clang/include/clang/SPIRV/SpirvBuilder.h | 4 ++ tools/clang/lib/SPIRV/SpirvBuilder.cpp | 11 ++++- ...ampledtexture.calculate-lod-unclamped.hlsl | 21 ++++++++++ .../vk.sampledtexture.gather.hlsl | 42 +++++++++++++++++++ utils/hct/gen_intrin_main.txt | 4 ++ 5 files changed, 80 insertions(+), 2 deletions(-) create mode 100644 tools/clang/test/CodeGenSPIRV/vk.sampledtexture.calculate-lod-unclamped.hlsl create mode 100644 tools/clang/test/CodeGenSPIRV/vk.sampledtexture.gather.hlsl diff --git a/tools/clang/include/clang/SPIRV/SpirvBuilder.h b/tools/clang/include/clang/SPIRV/SpirvBuilder.h index 868c978c35..6d02c07866 100644 --- a/tools/clang/include/clang/SPIRV/SpirvBuilder.h +++ b/tools/clang/include/clang/SPIRV/SpirvBuilder.h @@ -339,6 +339,10 @@ class SpirvBuilder { /// \brief Creates SPIR-V instructions for gathering the given image. /// + /// If the of `image` is a sampled image, then that image will be gathered. + /// In this case, `sampler` must be `nullptr`. If `image` is not a sampled + /// image, a sampled image will be created by combining `image` and `sampler`. + /// /// If compareVal is given a non-null value, OpImageDrefGather or /// OpImageSparseDrefGather will be generated; otherwise, OpImageGather or /// OpImageSparseGather will be generated. diff --git a/tools/clang/lib/SPIRV/SpirvBuilder.cpp b/tools/clang/lib/SPIRV/SpirvBuilder.cpp index 7ce0508585..a85916d106 100644 --- a/tools/clang/lib/SPIRV/SpirvBuilder.cpp +++ b/tools/clang/lib/SPIRV/SpirvBuilder.cpp @@ -714,8 +714,15 @@ SpirvInstruction *SpirvBuilder::createImageGather( assert(insertPoint && "null insert point"); // An OpSampledImage is required to do the image sampling. - auto *sampledImage = - createSampledImage(imageType, image, sampler, loc, range); + // Skip creating OpSampledImage if the imageType is a sampled texture. + SpirvInstruction *sampledImage = nullptr; + if (isSampledTexture(imageType)) { + assert(!sampler && + "sampler must be null when sampling from a sampled texture"); + sampledImage = image; + } else { + sampledImage = createSampledImage(imageType, image, sampler, loc, range); + } // TODO: Update ImageGather to accept minLod if necessary. const auto mask = composeImageOperandsMask( diff --git a/tools/clang/test/CodeGenSPIRV/vk.sampledtexture.calculate-lod-unclamped.hlsl b/tools/clang/test/CodeGenSPIRV/vk.sampledtexture.calculate-lod-unclamped.hlsl new file mode 100644 index 0000000000..aebc57738b --- /dev/null +++ b/tools/clang/test/CodeGenSPIRV/vk.sampledtexture.calculate-lod-unclamped.hlsl @@ -0,0 +1,21 @@ +// RUN: %dxc -T ps_6_8 -E main -fcgl %s -spirv | FileCheck %s + +// CHECK: OpCapability ImageQuery + +vk::SampledTexture2D t1 : register(t0); + +// CHECK: %type_2d_image = OpTypeImage %float 2D 0 0 0 1 Unknown +// CHECK: %type_sampled_image = OpTypeSampledImage %type_2d_image +// CHECK: [[ptr:%[a-zA-Z0-9_]+]] = OpTypePointer UniformConstant %type_sampled_image + +// CHECK: %t1 = OpVariable [[ptr]] UniformConstant + +void main() { + float2 xy = float2(0.5, 0.5); + +//CHECK: [[tex1:%[a-zA-Z0-9_]+]] = OpLoad %type_sampled_image %t1 +//CHECK-NEXT: [[xy_load:%[a-zA-Z0-9_]+]] = OpLoad %v2float %xy +//CHECK-NEXT: [[query:%[a-zA-Z0-9_]+]] = OpImageQueryLod %v2float [[tex1]] [[xy_load]] +//CHECK-NEXT: {{%[0-9]+}} = OpCompositeExtract %float [[query]] 1 + float lod1 = t1.CalculateLevelOfDetailUnclamped(xy); +} \ No newline at end of file diff --git a/tools/clang/test/CodeGenSPIRV/vk.sampledtexture.gather.hlsl b/tools/clang/test/CodeGenSPIRV/vk.sampledtexture.gather.hlsl new file mode 100644 index 0000000000..f7a26c153a --- /dev/null +++ b/tools/clang/test/CodeGenSPIRV/vk.sampledtexture.gather.hlsl @@ -0,0 +1,42 @@ +// RUN: %dxc -T ps_6_7 -E main -fcgl %s -spirv | FileCheck %s + +// CHECK: OpCapability SparseResidency + +// CHECK: [[v2fc:%[0-9]+]] = OpConstantComposite %v2float %float_0_5 %float_0_25 +// CHECK: [[v2ic:%[0-9]+]] = OpConstantComposite %v2int %int_2 %int_3 + +// CHECK: [[type_2d_image_1:%[a-zA-Z0-9_]+]] = OpTypeImage %float 2D 0 0 0 1 Unknown +// CHECK: [[type_sampled_image_1:%[a-zA-Z0-9_]+]] = OpTypeSampledImage [[type_2d_image_1]] +// CHECK: [[ptr_type_1:%[a-zA-Z0-9_]+]] = OpTypePointer UniformConstant [[type_sampled_image_1]] + +// CHECK: [[type_2d_image_2:%[a-zA-Z0-9_]+]] = OpTypeImage %uint 2D 0 0 0 1 Unknown +// CHECK: [[type_sampled_image_2:%[a-zA-Z0-9_]+]] = OpTypeSampledImage [[type_2d_image_2]] +// CHECK: [[ptr_type_2:%[a-zA-Z0-9_]+]] = OpTypePointer UniformConstant [[type_sampled_image_2]] + +// CHECK: %SparseResidencyStruct = OpTypeStruct %uint %v4float + +// CHECK: [[tex1:%[a-zA-Z0-9_]+]] = OpVariable [[ptr_type_1]] UniformConstant +// CHECK: [[tex2:%[a-zA-Z0-9_]+]] = OpVariable [[ptr_type_2]] UniformConstant + +vk::SampledTexture2D tex1 : register(t1); +vk::SampledTexture2D tex2 : register(t2); + +float4 main() : SV_Target { + +// CHECK: [[tex1_load:%[a-zA-Z0-9_]+]] = OpLoad [[type_sampled_image_1]] [[tex1]] +// CHECK: [[val1:%[a-zA-Z0-9_]+]] = OpImageGather %v4float [[tex1_load]] [[v2fc]] %int_0 None + float4 val1 = tex1.Gather(float2(0.5, 0.25)); + +// CHECK: [[tex2_load:%[a-zA-Z0-9_]+]] = OpLoad [[type_sampled_image_2]] [[tex2]] +// CHECK: [[val2:%[a-zA-Z0-9_]+]] = OpImageGather %v4uint [[tex2_load]] [[v2fc]] %int_0 ConstOffset [[v2ic]] + uint4 val2 = tex2.Gather(float2(0.5, 0.25), int2(2, 3)); + +// CHECK: [[tex3_load:%[a-zA-Z0-9_]+]] = OpLoad [[type_sampled_image_1]] [[tex1]] +// CHECK: [[val3:%[a-zA-Z0-9_]+]] = OpImageSparseGather %SparseResidencyStruct [[tex3_load]] [[v2fc]] %int_0 ConstOffset [[v2ic]] +// CHECK: [[status_0:%[a-zA-Z0-9_]+]] = OpCompositeExtract %uint [[val3]] 0 +// CHECK: OpStore %status [[status_0]] + uint status; + float4 val3 = tex1.Gather(float2(0.5, 0.25), int2(2, 3), status); + + return 1.0; +} diff --git a/utils/hct/gen_intrin_main.txt b/utils/hct/gen_intrin_main.txt index 1a0d3b7a3b..4d5d5126c8 100644 --- a/utils/hct/gen_intrin_main.txt +++ b/utils/hct/gen_intrin_main.txt @@ -1239,4 +1239,8 @@ namespace VkSampledTexture2DMethods { $classT [[ro]] Sample(in float<2> x, in int<2> o, in float clamp) : tex2d_t_o_cl; $classT [[]] Sample(in float<2> x, in int<2> o, in float clamp, out uint_only status) : tex2d_t_o_cl_s; float [[ro]] CalculateLevelOfDetail(in float<2> x) : tex2d_t_calc_lod; + float [[ro]] CalculateLevelOfDetailUnclamped(in float<2> x) : tex2d_t_calc_lod_unclamped; + $match<0, -1> void<4> [[ro]] Gather(in float<2> x) : tex2d_t_gather; + $match<0, -1> void<4> [[ro]] Gather(in float<2> x, in int<2> o) : tex2d_t_gather_o; + $match<0, -1> void<4> [[]] Gather(in float<2> x, in int<2> o, out uint_only status) : tex2d_t_gather_o_s; } namespace From a6d6c73c500c9fa8c5bc44f96a6f6452422e7267 Mon Sep 17 00:00:00 2001 From: luciechoi Date: Sat, 17 Jan 2026 13:08:41 +0000 Subject: [PATCH 02/13] GetDimensions Implement `GetDimensions` based on new method. --- tools/clang/lib/SPIRV/SpirvEmitter.cpp | 22 ++++- .../vk.sampledtexture.get-dimensions.hlsl | 94 +++++++++++++++++++ utils/hct/gen_intrin_main.txt | 4 + 3 files changed, 117 insertions(+), 3 deletions(-) create mode 100644 tools/clang/test/CodeGenSPIRV/vk.sampledtexture.get-dimensions.hlsl diff --git a/tools/clang/lib/SPIRV/SpirvEmitter.cpp b/tools/clang/lib/SPIRV/SpirvEmitter.cpp index f77e9ac82b..bfeab5f376 100644 --- a/tools/clang/lib/SPIRV/SpirvEmitter.cpp +++ b/tools/clang/lib/SPIRV/SpirvEmitter.cpp @@ -4328,7 +4328,18 @@ SpirvEmitter::processBufferTextureGetDimensions(const CXXMemberCallExpr *expr) { const Expr *mipLevel = nullptr, *numLevels = nullptr, *numSamples = nullptr; assert(isTexture(type) || isRWTexture(type) || isBuffer(type) || - isRWBuffer(type)); + isRWBuffer(type) || isSampledTexture(type)); + if (isSampledTexture(type)) { + LowerTypeVisitor lowerTypeVisitor(astContext, spvContext, spirvOptions, + spvBuilder); + const SpirvType *spvType = lowerTypeVisitor.lowerType( + type, SpirvLayoutRule::Void, llvm::None, expr->getExprLoc()); + // Get image type based on type, assuming type is a sampledimage type + const auto *sampledType = cast(spvType); + const SpirvType *imgType = sampledType->getImageType(); + objectInstr = spvBuilder.createUnaryOp(spv::Op::OpImage, imgType, + objectInstr, expr->getExprLoc()); + } // For Texture1D, arguments are either: // a) width @@ -4362,6 +4373,9 @@ SpirvEmitter::processBufferTextureGetDimensions(const CXXMemberCallExpr *expr) { // a) width, height, elements // b) MipLevel, width, height, elements, NumLevels + // SampledTexture types follow the same rules above, as + // this method doesn't require a Sampler argument. + // Note: SPIR-V Spec requires return type of OpImageQuerySize(Lod) to be a // scalar/vector of integers. SPIR-V Spec also requires return type of // OpImageQueryLevels and OpImageQuerySamples to be scalar integers. @@ -4379,6 +4393,7 @@ SpirvEmitter::processBufferTextureGetDimensions(const CXXMemberCallExpr *expr) { if ((typeName == "Texture1D" && numArgs > 1) || (typeName == "Texture2D" && numArgs > 2) || + (typeName == "SampledTexture2D" && numArgs > 2) || (typeName == "TextureCube" && numArgs > 2) || (typeName == "Texture3D" && numArgs > 3) || (typeName == "Texture1DArray" && numArgs > 2) || @@ -4417,7 +4432,7 @@ SpirvEmitter::processBufferTextureGetDimensions(const CXXMemberCallExpr *expr) { // Only Texture types use ImageQuerySizeLod. // TextureMS, RWTexture, Buffers, RWBuffers use ImageQuerySize. SpirvInstruction *lod = nullptr; - if (isTexture(type) && !numSamples) { + if ((isTexture(type) || isSampledTexture(type)) && !numSamples) { if (mipLevel) { // For Texture types when mipLevel argument is present. lod = doExpr(mipLevel, range); @@ -6433,7 +6448,8 @@ SpirvInstruction * SpirvEmitter::processGetDimensions(const CXXMemberCallExpr *expr) { const auto objectType = expr->getImplicitObjectArgument()->getType(); if (isTexture(objectType) || isRWTexture(objectType) || - isBuffer(objectType) || isRWBuffer(objectType)) { + isBuffer(objectType) || isRWBuffer(objectType) || + isSampledTexture(objectType)) { return processBufferTextureGetDimensions(expr); } else if (isByteAddressBuffer(objectType) || isRWByteAddressBuffer(objectType) || diff --git a/tools/clang/test/CodeGenSPIRV/vk.sampledtexture.get-dimensions.hlsl b/tools/clang/test/CodeGenSPIRV/vk.sampledtexture.get-dimensions.hlsl new file mode 100644 index 0000000000..cf98bd8649 --- /dev/null +++ b/tools/clang/test/CodeGenSPIRV/vk.sampledtexture.get-dimensions.hlsl @@ -0,0 +1,94 @@ +// RUN: %dxc -T ps_6_0 -E main -fcgl %s -spirv | FileCheck %s +// RUN: not %dxc -T ps_6_0 -E main -fcgl %s -spirv -DERROR 2>&1 | FileCheck %s --check-prefix=ERROR + +// CHECK: OpCapability ImageQuery + +vk::SampledTexture2D t1; + +void main() { + uint mipLevel = 1; + uint width, height, numLevels; + +// CHECK: [[t1_load:%[0-9]+]] = OpLoad %type_sampled_image %t1 +// CHECK-NEXT: [[image1:%[0-9]+]] = OpImage %type_2d_image [[t1_load]] +// CHECK-NEXT: [[query1:%[0-9]+]] = OpImageQuerySizeLod %v2uint [[image1]] %int_0 +// CHECK-NEXT: [[query1_0:%[0-9]+]] = OpCompositeExtract %uint [[query1]] 0 +// CHECK-NEXT: OpStore %width [[query1_0]] +// CHECK-NEXT: [[query1_1:%[0-9]+]] = OpCompositeExtract %uint [[query1]] 1 +// CHECK-NEXT: OpStore %height [[query1_1]] + t1.GetDimensions(width, height); + +// CHECK: [[t1_load:%[0-9]+]] = OpLoad %type_sampled_image %t1 +// CHECK-NEXT: [[image2:%[0-9]+]] = OpImage %type_2d_image [[t1_load]] +// CHECK-NEXT: [[mip:%[0-9]+]] = OpLoad %uint %mipLevel +// CHECK-NEXT: [[query2:%[0-9]+]] = OpImageQuerySizeLod %v2uint [[image2]] [[mip]] +// CHECK-NEXT: [[query2_0:%[0-9]+]] = OpCompositeExtract %uint [[query2]] 0 +// CHECK-NEXT: OpStore %width [[query2_0]] +// CHECK-NEXT: [[query2_1:%[0-9]+]] = OpCompositeExtract %uint [[query2]] 1 +// CHECK-NEXT: OpStore %height [[query2_1]] +// CHECK-NEXT: [[query_level_2:%[0-9]+]] = OpImageQueryLevels %uint [[image2]] +// CHECK-NEXT: OpStore %numLevels [[query_level_2]] + t1.GetDimensions(mipLevel, width, height, numLevels); + + float f_width, f_height, f_numLevels; +// CHECK: [[t1_load:%[0-9]+]] = OpLoad %type_sampled_image %t1 +// CHECK-NEXT: [[image1:%[0-9]+]] = OpImage %type_2d_image [[t1_load]] +// CHECK-NEXT: [[query1:%[0-9]+]] = OpImageQuerySizeLod %v2uint [[image1]] %int_0 +// CHECK-NEXT: [[query1_0:%[0-9]+]] = OpCompositeExtract %uint [[query1]] 0 +// CHECK-NEXT: [[f_query1_0:%[0-9]+]] = OpConvertUToF %float [[query1_0]] +// CHECK-NEXT: OpStore %f_width [[f_query1_0]] +// CHECK-NEXT: [[query1_1:%[0-9]+]] = OpCompositeExtract %uint [[query1]] 1 +// CHECK-NEXT: [[f_query1_1:%[0-9]+]] = OpConvertUToF %float [[query1_1]] +// CHECK-NEXT: OpStore %f_height [[f_query1_1]] + t1.GetDimensions(f_width, f_height); + +// CHECK: [[t1_load:%[0-9]+]] = OpLoad %type_sampled_image %t1 +// CHECK-NEXT: [[image2:%[0-9]+]] = OpImage %type_2d_image [[t1_load]] +// CHECK-NEXT: [[mip:%[0-9]+]] = OpLoad %uint %mipLevel +// CHECK-NEXT: [[query2:%[0-9]+]] = OpImageQuerySizeLod %v2uint [[image2]] [[mip]] +// CHECK-NEXT: [[query2_0:%[0-9]+]] = OpCompositeExtract %uint [[query2]] 0 +// CHECK-NEXT: [[f_query2_0:%[0-9]+]] = OpConvertUToF %float [[query2_0]] +// CHECK-NEXT: OpStore %f_width [[f_query2_0]] +// CHECK-NEXT: [[query2_1:%[0-9]+]] = OpCompositeExtract %uint [[query2]] 1 +// CHECK-NEXT: [[f_query2_1:%[0-9]+]] = OpConvertUToF %float [[query2_1]] +// CHECK-NEXT: OpStore %f_height [[f_query2_1]] +// CHECK-NEXT: [[query_level_2:%[0-9]+]] = OpImageQueryLevels %uint [[image2]] +// CHECK-NEXT: [[f_query_level_2:%[0-9]+]] = OpConvertUToF %float [[query_level_2]] +// CHECK-NEXT: OpStore %f_numLevels [[f_query_level_2]] + t1.GetDimensions(mipLevel, f_width, f_height, f_numLevels); + + int i_width, i_height, i_numLevels; +// CHECK: [[t1_load:%[0-9]+]] = OpLoad %type_sampled_image %t1 +// CHECK-NEXT: [[image1:%[0-9]+]] = OpImage %type_2d_image [[t1_load]] +// CHECK-NEXT: [[query1:%[0-9]+]] = OpImageQuerySizeLod %v2uint [[image1]] %int_0 +// CHECK-NEXT: [[query1_0:%[0-9]+]] = OpCompositeExtract %uint [[query1]] 0 +// CHECK-NEXT: [[query_0_int:%[0-9]+]] = OpBitcast %int [[query1_0]] +// CHECK-NEXT: OpStore %i_width [[query_0_int]] +// CHECK-NEXT: [[query1_1:%[0-9]+]] = OpCompositeExtract %uint [[query1]] 1 +// CHECK-NEXT: [[query_1_int:%[0-9]+]] = OpBitcast %int [[query1_1]] +// CHECK-NEXT: OpStore %i_height [[query_1_int]] + t1.GetDimensions(i_width, i_height); + +// CHECK: [[t1_load:%[0-9]+]] = OpLoad %type_sampled_image %t1 +// CHECK-NEXT: [[image2:%[0-9]+]] = OpImage %type_2d_image [[t1_load]] +// CHECK-NEXT: [[mip:%[0-9]+]] = OpLoad %uint %mipLevel +// CHECK-NEXT: [[query2:%[0-9]+]] = OpImageQuerySizeLod %v2uint [[image2]] [[mip]] +// CHECK-NEXT: [[query2_0:%[0-9]+]] = OpCompositeExtract %uint [[query2]] 0 +// CHECK-NEXT: [[query_0_int:%[0-9]+]] = OpBitcast %int [[query2_0]] +// CHECK-NEXT: OpStore %i_width [[query_0_int]] +// CHECK-NEXT: [[query2_1:%[0-9]+]] = OpCompositeExtract %uint [[query2]] 1 +// CHECK-NEXT: [[query_1_int:%[0-9]+]] = OpBitcast %int [[query2_1]] +// CHECK-NEXT: OpStore %i_height [[query_1_int]] +// CHECK-NEXT: [[query_level_2:%[0-9]+]] = OpImageQueryLevels %uint [[image2]] +// CHECK-NEXT: [[query_level_2_int:%[0-9]+]] = OpBitcast %int [[query_level_2]] +// CHECK-NEXT: OpStore %i_numLevels [[query_level_2_int]] + t1.GetDimensions(mipLevel, i_width, i_height, i_numLevels); + +#ifdef ERROR +// ERROR: error: Output argument must be an l-value + t1.GetDimensions(mipLevel, 0, height, numLevels); + +// ERROR: error: Output argument must be an l-value + t1.GetDimensions(width, 20); +#endif +} \ No newline at end of file diff --git a/utils/hct/gen_intrin_main.txt b/utils/hct/gen_intrin_main.txt index 4d5d5126c8..899124e896 100644 --- a/utils/hct/gen_intrin_main.txt +++ b/utils/hct/gen_intrin_main.txt @@ -1243,4 +1243,8 @@ namespace VkSampledTexture2DMethods { $match<0, -1> void<4> [[ro]] Gather(in float<2> x) : tex2d_t_gather; $match<0, -1> void<4> [[ro]] Gather(in float<2> x, in int<2> o) : tex2d_t_gather_o; $match<0, -1> void<4> [[]] Gather(in float<2> x, in int<2> o, out uint_only status) : tex2d_t_gather_o_s; + void [[]] GetDimensions(in uint x, out uint_only width, out $type2 height, out $type2 levels) : resinfo_uint; + void [[]] GetDimensions(in uint x, out float_like width, out $type2 height, out $type2 levels) : resinfo; + void [[]] GetDimensions(out uint_only width, out $type1 height) : resinfo_uint_o; + void [[]] GetDimensions(out float_like width, out $type1 height) : resinfo_o; } namespace From f0f989cc36c87d07b9174a18249eddcb3d2dabb9 Mon Sep 17 00:00:00 2001 From: luciechoi Date: Sat, 24 Jan 2026 00:14:38 +0000 Subject: [PATCH 03/13] Load Implement using new method --- tools/clang/lib/SPIRV/SpirvEmitter.cpp | 23 +++++++-- .../CodeGenSPIRV/vk.sampledtexture.load.hlsl | 48 +++++++++++++++++++ utils/hct/gen_intrin_main.txt | 3 ++ 3 files changed, 70 insertions(+), 4 deletions(-) create mode 100644 tools/clang/test/CodeGenSPIRV/vk.sampledtexture.load.hlsl diff --git a/tools/clang/lib/SPIRV/SpirvEmitter.cpp b/tools/clang/lib/SPIRV/SpirvEmitter.cpp index bfeab5f376..4d33778295 100644 --- a/tools/clang/lib/SPIRV/SpirvEmitter.cpp +++ b/tools/clang/lib/SPIRV/SpirvEmitter.cpp @@ -4693,9 +4693,11 @@ SpirvInstruction *SpirvEmitter::processBufferTextureLoad( // The result type of an OpImageFetch must be a vec4 of float or int. const auto type = object->getType(); assert(isBuffer(type) || isRWBuffer(type) || isTexture(type) || - isRWTexture(type) || isSubpassInput(type) || isSubpassInputMS(type)); + isRWTexture(type) || isSubpassInput(type) || isSubpassInputMS(type) || + isSampledTexture(type)); - const bool doFetch = isBuffer(type) || isTexture(type); + const bool doFetch = + isBuffer(type) || isTexture(type) || isSampledTexture(type); const bool rasterizerOrdered = isRasterizerOrderedView(type); if (rasterizerOrdered) { @@ -4759,6 +4761,18 @@ SpirvInstruction *SpirvEmitter::processBufferTextureLoad( // OpImageFetch and OpImageRead can only fetch a vector of 4 elements. const QualType texelType = astContext.getExtVectorType(elemType, 4u); + + if (isSampledTexture(type)) { + LowerTypeVisitor lowerTypeVisitor(astContext, spvContext, spirvOptions, + spvBuilder); + const SpirvType *spvType = lowerTypeVisitor.lowerType( + type, SpirvLayoutRule::Void, llvm::None, loc); + // Get image type based on type, assuming type is a sampledimage type + const auto *sampledImageType = cast(spvType); + const SpirvType *imgType = sampledImageType->getImageType(); + objectInfo = + spvBuilder.createUnaryOp(spv::Op::OpImage, imgType, objectInfo, loc); + } auto *texel = spvBuilder.createImageFetchOrRead( doFetch, texelType, type, objectInfo, location, lod, constOffset, /*constOffsets*/ nullptr, sampleNumber, residencyCode, loc, range); @@ -6345,7 +6359,8 @@ SpirvEmitter::processTextureSampleCmpLevel(const CXXMemberCallExpr *expr) { SpirvInstruction * SpirvEmitter::processBufferTextureLoad(const CXXMemberCallExpr *expr) { // Signature: - // For Texture1D, Texture1DArray, Texture2D, Texture2DArray, Texture3D: + // For Texture1D, Texture1DArray, Texture2D, Texture2DArray, Texture3D + // and their SampledTexture variants: // ret Object.Load(int Location // [, int Offset] // [, uint status]); @@ -6403,7 +6418,7 @@ SpirvEmitter::processBufferTextureLoad(const CXXMemberCallExpr *expr) { // and 1 for location. const bool hasOffsetArg = numArgs - hasStatusArg - textureMS - 1 > 0; - if (isTexture(objectType)) { + if (isTexture(objectType) || isSampledTexture(objectType)) { // .Load() has a second optional paramter for offset. SpirvInstruction *location = doExpr(locationArg); SpirvInstruction *constOffset = nullptr, *varOffset = nullptr; diff --git a/tools/clang/test/CodeGenSPIRV/vk.sampledtexture.load.hlsl b/tools/clang/test/CodeGenSPIRV/vk.sampledtexture.load.hlsl new file mode 100644 index 0000000000..8ecd9e70a3 --- /dev/null +++ b/tools/clang/test/CodeGenSPIRV/vk.sampledtexture.load.hlsl @@ -0,0 +1,48 @@ +// RUN: %dxc -T ps_6_0 -E main -fcgl %s -spirv | FileCheck %s + + +vk::SampledTexture2D tex2D_F4 : register(t1); + +// CHECK: OpCapability SparseResidency + +// CHECK: [[v2ic:%[0-9]+]] = OpConstantComposite %v2int %int_1 %int_2 + +// CHECK: %SparseResidencyStruct = OpTypeStruct %uint %v4float + +float4 main(int3 location: A) : SV_Target { + uint status; + +// CHECK: [[loc:%[0-9]+]] = OpLoad %v3int %location +// CHECK-NEXT: [[coord_0:%[0-9]+]] = OpVectorShuffle %v2int [[loc]] [[loc]] 0 1 +// CHECK-NEXT: [[lod_0:%[0-9]+]] = OpCompositeExtract %int [[loc]] 2 +// CHECK-NEXT: [[tex:%[0-9]+]] = OpLoad %type_sampled_image %tex2D_F4 +// CHECK-NEXT: [[tex_img:%[0-9]+]] = OpImage %type_2d_image [[tex]] +// CHECK-NEXT: {{%[0-9]+}} = OpImageFetch %v4float [[tex_img]] [[coord_0]] Lod [[lod_0]] + float4 val1 = tex2D_F4.Load(location); + +// CHECK: [[loc:%[0-9]+]] = OpLoad %v3int %location +// CHECK-NEXT: [[coord_0:%[0-9]+]] = OpVectorShuffle %v2int [[loc]] [[loc]] 0 1 +// CHECK-NEXT: [[lod_0:%[0-9]+]] = OpCompositeExtract %int [[loc]] 2 +// CHECK-NEXT: [[tex:%[0-9]+]] = OpLoad %type_sampled_image %tex2D_F4 +// CHECK-NEXT: [[tex_img:%[0-9]+]] = OpImage %type_2d_image [[tex]] +// CHECK-NEXT: {{%[0-9]+}} = OpImageFetch %v4float [[tex_img]] [[coord_0]] Lod|ConstOffset [[lod_0]] [[v2ic]] + float4 val2 = tex2D_F4.Load(location, int2(1, 2)); + +///////////////////////////////// +/// Using the Status argument /// +///////////////////////////////// + +// CHECK: [[loc:%[0-9]+]] = OpLoad %v3int %location +// CHECK-NEXT: [[coord_0:%[0-9]+]] = OpVectorShuffle %v2int [[loc]] [[loc]] 0 1 +// CHECK-NEXT: [[lod_0:%[0-9]+]] = OpCompositeExtract %int [[loc]] 2 +// CHECK-NEXT: [[tex:%[0-9]+]] = OpLoad %type_sampled_image %tex2D_F4 +// CHECK-NEXT: [[tex_img:%[0-9]+]] = OpImage %type_2d_image [[tex]] +// CHECK-NEXT:[[structResult:%[0-9]+]] = OpImageSparseFetch %SparseResidencyStruct [[tex_img]] [[coord_0]] Lod|ConstOffset [[lod_0]] [[v2ic]] +// CHECK-NEXT: [[status:%[0-9]+]] = OpCompositeExtract %uint [[structResult]] 0 +// CHECK-NEXT: OpStore %status [[status]] +// CHECK-NEXT: [[v4result:%[0-9]+]] = OpCompositeExtract %v4float [[structResult]] 1 +// CHECK-NEXT: OpStore %val3 [[v4result]] + float4 val3 = tex2D_F4.Load(location, int2(1, 2), status); + + return 1.0; +} diff --git a/utils/hct/gen_intrin_main.txt b/utils/hct/gen_intrin_main.txt index 899124e896..dc0be344aa 100644 --- a/utils/hct/gen_intrin_main.txt +++ b/utils/hct/gen_intrin_main.txt @@ -1247,4 +1247,7 @@ namespace VkSampledTexture2DMethods { void [[]] GetDimensions(in uint x, out float_like width, out $type2 height, out $type2 levels) : resinfo; void [[]] GetDimensions(out uint_only width, out $type1 height) : resinfo_uint_o; void [[]] GetDimensions(out float_like width, out $type1 height) : resinfo_o; + $classT [[ro]] Load(in int<3> x) : tex2d_t_load; + $classT [[ro]] Load(in int<3> x, in int<2> o) : tex2d_t_load_o; + $classT [[]] Load(in int<3> x, in int<2> o, out uint_only status) : tex2d_t_load_o_s; } namespace From 44909b7c6f95d9be5f94f6869e33164980937156 Mon Sep 17 00:00:00 2001 From: luciechoi Date: Mon, 26 Jan 2026 09:50:48 +0000 Subject: [PATCH 04/13] SampleBias, SampleLevel, SampleGrad, SampleCmp, SampleCmpLevelZero, SampleCmpLevel, SampleCmpGrad, SampleCmpBias --- tools/clang/lib/SPIRV/SpirvEmitter.cpp | 278 ++++++++++++++---- .../vk.sampledtexture.cmp-level.hlsl | 32 ++ .../vk.sampledtexture.sample-bias.hlsl | 33 +++ .../vk.sampledtexture.sample-cmp-bias.hlsl | 33 +++ .../vk.sampledtexture.sample-cmp-grad.hlsl | 39 +++ ....sampledtexture.sample-cmp-level-zero.hlsl | 32 ++ .../vk.sampledtexture.sample-cmp.hlsl | 37 +++ .../vk.sampledtexture.sample-grad.hlsl | 38 +++ .../vk.sampledtexture.sample-level.hlsl | 32 ++ utils/hct/gen_intrin_main.txt | 29 ++ 10 files changed, 521 insertions(+), 62 deletions(-) create mode 100644 tools/clang/test/CodeGenSPIRV/vk.sampledtexture.cmp-level.hlsl create mode 100644 tools/clang/test/CodeGenSPIRV/vk.sampledtexture.sample-bias.hlsl create mode 100644 tools/clang/test/CodeGenSPIRV/vk.sampledtexture.sample-cmp-bias.hlsl create mode 100644 tools/clang/test/CodeGenSPIRV/vk.sampledtexture.sample-cmp-grad.hlsl create mode 100644 tools/clang/test/CodeGenSPIRV/vk.sampledtexture.sample-cmp-level-zero.hlsl create mode 100644 tools/clang/test/CodeGenSPIRV/vk.sampledtexture.sample-cmp.hlsl create mode 100644 tools/clang/test/CodeGenSPIRV/vk.sampledtexture.sample-grad.hlsl create mode 100644 tools/clang/test/CodeGenSPIRV/vk.sampledtexture.sample-level.hlsl diff --git a/tools/clang/lib/SPIRV/SpirvEmitter.cpp b/tools/clang/lib/SPIRV/SpirvEmitter.cpp index 4d33778295..f6a0083a1a 100644 --- a/tools/clang/lib/SPIRV/SpirvEmitter.cpp +++ b/tools/clang/lib/SPIRV/SpirvEmitter.cpp @@ -5955,6 +5955,8 @@ SpirvEmitter::processTextureSampleBiasLevel(const CXXMemberCallExpr *expr, // [, int Offset] // [, float clamp] // [, out uint Status]); + // Their SampledTexture variants have the same signature without the + // sampler_state parameter. // // For TextureCube and TextureCubeArray: // DXGI_FORMAT Object.SampleBias(sampler_state S, @@ -5969,6 +5971,8 @@ SpirvEmitter::processTextureSampleBiasLevel(const CXXMemberCallExpr *expr, // float LOD // [, int Offset] // [, out uint Status]); + // Their SampledTexture variants have the same signature without the + // sampler_state parameter. // // For TextureCube and TextureCubeArray: // DXGI_FORMAT Object.SampleLevel(sampler_state S, @@ -5980,37 +5984,57 @@ SpirvEmitter::processTextureSampleBiasLevel(const CXXMemberCallExpr *expr, const bool hasStatusArg = expr->getArg(numArgs - 1)->getType()->isUnsignedIntegerType(); auto *status = hasStatusArg ? doExpr(expr->getArg(numArgs - 1)) : nullptr; + const auto *imageExpr = expr->getImplicitObjectArgument(); + const QualType imageType = imageExpr->getType(); + const bool isImageSampledTexture = isSampledTexture(imageType); + + int samplerIndex, coordinateIndex, biasIndex, offsetIndex, clampIndex; + if (isImageSampledTexture) { + samplerIndex = -1; // non-existant + coordinateIndex = 0; + biasIndex = 1; + offsetIndex = 2; + clampIndex = 3; + } else { + samplerIndex = 0; + coordinateIndex = 1; + biasIndex = 2; + offsetIndex = 3; + clampIndex = 4; + } SpirvInstruction *clamp = nullptr; // The .SampleLevel() methods do not take the clamp argument. if (isBias) { - if (numArgs > 3 && expr->getArg(3)->getType()->isFloatingType()) - clamp = doExpr(expr->getArg(3)); - else if (numArgs > 4 && expr->getArg(4)->getType()->isFloatingType()) - clamp = doExpr(expr->getArg(4)); + if (numArgs > offsetIndex && + expr->getArg(offsetIndex)->getType()->isFloatingType()) + clamp = doExpr(expr->getArg(offsetIndex)); + else if (numArgs > offsetIndex + 1 && + expr->getArg(offsetIndex + 1)->getType()->isFloatingType()) + clamp = doExpr(expr->getArg(offsetIndex + 1)); } const bool hasClampArg = clamp != nullptr; // Subtract 1 for clamp (if it exists), 1 for status (if it exists), // and 3 for sampler_state, location, and Bias/LOD. - const bool hasOffsetArg = numArgs - hasClampArg - hasStatusArg - 3 > 0; + const bool hasOffsetArg = + numArgs - hasClampArg - hasStatusArg - offsetIndex > 0; - const auto *imageExpr = expr->getImplicitObjectArgument(); - const QualType imageType = imageExpr->getType(); auto *image = loadIfGLValue(imageExpr); - auto *sampler = doExpr(expr->getArg(0)); - auto *coordinate = doExpr(expr->getArg(1)); + auto *sampler = + samplerIndex < 0 ? nullptr : doExpr(expr->getArg(samplerIndex)); + auto *coordinate = doExpr(expr->getArg(coordinateIndex)); SpirvInstruction *lod = nullptr; SpirvInstruction *bias = nullptr; if (isBias) { - bias = doExpr(expr->getArg(2)); + bias = doExpr(expr->getArg(biasIndex)); } else { - lod = doExpr(expr->getArg(2)); + lod = doExpr(expr->getArg(biasIndex)); } // If offset is present in .Bias()/.SampleLevel(), it is the fourth argument. SpirvInstruction *constOffset = nullptr, *varOffset = nullptr; if (hasOffsetArg) - handleOffsetInMethodCall(expr, 3, &constOffset, &varOffset); + handleOffsetInMethodCall(expr, offsetIndex, &constOffset, &varOffset); const auto retType = expr->getDirectCallee()->getReturnType(); @@ -6037,6 +6061,8 @@ SpirvEmitter::processTextureSampleGrad(const CXXMemberCallExpr *expr) { // [, int Offset] // [, float Clamp] // [, out uint Status]); + // Their SampledTexture variants have the same signature without the + // sampler_state parameter. // // For TextureCube and TextureCubeArray: // DXGI_FORMAT Object.SampleGrad(sampler_state S, @@ -6050,29 +6076,51 @@ SpirvEmitter::processTextureSampleGrad(const CXXMemberCallExpr *expr) { const bool hasStatusArg = expr->getArg(numArgs - 1)->getType()->isUnsignedIntegerType(); auto *status = hasStatusArg ? doExpr(expr->getArg(numArgs - 1)) : nullptr; + const auto *imageExpr = expr->getImplicitObjectArgument(); + const QualType imageType = imageExpr->getType(); + const bool isImageSampledTexture = isSampledTexture(imageType); + + int samplerIndex, coordinateIndex, ddxIndex, ddyIndex, offsetIndex, + clampIndex; + if (isImageSampledTexture) { + samplerIndex = -1; // non-existant + coordinateIndex = 0; + ddxIndex = 1; + ddyIndex = 2; + offsetIndex = 3; + clampIndex = 4; + } else { + samplerIndex = 0; + coordinateIndex = 1; + ddxIndex = 2; + ddyIndex = 3; + offsetIndex = 4; + clampIndex = 5; + } SpirvInstruction *clamp = nullptr; - if (numArgs > 4 && expr->getArg(4)->getType()->isFloatingType()) - clamp = doExpr(expr->getArg(4)); - else if (numArgs > 5 && expr->getArg(5)->getType()->isFloatingType()) - clamp = doExpr(expr->getArg(5)); + if (numArgs > offsetIndex && + expr->getArg(offsetIndex)->getType()->isFloatingType()) + clamp = doExpr(expr->getArg(offsetIndex)); + else if (numArgs > offsetIndex + 1 && + expr->getArg(offsetIndex + 1)->getType()->isFloatingType()) + clamp = doExpr(expr->getArg(offsetIndex + 1)); const bool hasClampArg = clamp != nullptr; // Subtract 1 for clamp (if it exists), 1 for status (if it exists), // and 4 for sampler_state, location, DDX, and DDY; - const bool hasOffsetArg = numArgs - hasClampArg - hasStatusArg - 4 > 0; + const bool hasOffsetArg = + numArgs - hasClampArg - hasStatusArg - offsetIndex > 0; - const auto *imageExpr = expr->getImplicitObjectArgument(); - const QualType imageType = imageExpr->getType(); auto *image = loadIfGLValue(imageExpr); - auto *sampler = doExpr(expr->getArg(0)); - auto *coordinate = doExpr(expr->getArg(1)); - auto *ddx = doExpr(expr->getArg(2)); - auto *ddy = doExpr(expr->getArg(3)); + auto *sampler = samplerIndex < 0 ? nullptr : doExpr(expr->getArg(0)); + auto *coordinate = doExpr(expr->getArg(coordinateIndex)); + auto *ddx = doExpr(expr->getArg(ddxIndex)); + auto *ddy = doExpr(expr->getArg(ddyIndex)); // If offset is present in .SampleGrad(), it is the fifth argument. SpirvInstruction *constOffset = nullptr, *varOffset = nullptr; if (hasOffsetArg) - handleOffsetInMethodCall(expr, 4, &constOffset, &varOffset); + handleOffsetInMethodCall(expr, offsetIndex, &constOffset, &varOffset); const auto retType = expr->getDirectCallee()->getReturnType(); return createImageSample( @@ -6097,6 +6145,8 @@ SpirvEmitter::processTextureSampleCmp(const CXXMemberCallExpr *expr) { // [, float Clamp] // [, out uint Status] // ); + // SampledTexture variants have the same signature without the + // sampler_state parameter. // // For TextureCube and TextureCubeArray: // float Object.SampleCmp( @@ -6111,30 +6161,49 @@ SpirvEmitter::processTextureSampleCmp(const CXXMemberCallExpr *expr) { const bool hasStatusArg = expr->getArg(numArgs - 1)->getType()->isUnsignedIntegerType(); auto *status = hasStatusArg ? doExpr(expr->getArg(numArgs - 1)) : nullptr; + const auto *imageExpr = expr->getImplicitObjectArgument(); + const QualType imageType = imageExpr->getType(); + const bool isImageSampledTexture = isSampledTexture(imageType); + + int samplerIndex, coordinateIndex, compareValIndex, offsetIndex, clampIndex; + if (isImageSampledTexture) { + samplerIndex = -1; // non-existant + coordinateIndex = 0; + compareValIndex = 1; + offsetIndex = 2; + clampIndex = 3; + } else { + samplerIndex = 0; + coordinateIndex = 1; + compareValIndex = 2; + offsetIndex = 3; + clampIndex = 4; + } SpirvInstruction *clamp = nullptr; - if (numArgs > 3 && expr->getArg(3)->getType()->isFloatingType()) - clamp = doExpr(expr->getArg(3)); - else if (numArgs > 4 && expr->getArg(4)->getType()->isFloatingType()) - clamp = doExpr(expr->getArg(4)); + if (numArgs > offsetIndex && + expr->getArg(offsetIndex)->getType()->isFloatingType()) + clamp = doExpr(expr->getArg(offsetIndex)); + else if (numArgs > offsetIndex + 1 && + expr->getArg(offsetIndex + 1)->getType()->isFloatingType()) + clamp = doExpr(expr->getArg(offsetIndex + 1)); const bool hasClampArg = clamp != nullptr; - const auto *imageExpr = expr->getImplicitObjectArgument(); auto *image = loadIfGLValue(imageExpr); - auto *sampler = doExpr(expr->getArg(0)); - auto *coordinate = doExpr(expr->getArg(1)); - auto *compareVal = doExpr(expr->getArg(2)); + auto *sampler = samplerIndex < 0 ? nullptr : doExpr(expr->getArg(0)); + auto *coordinate = doExpr(expr->getArg(coordinateIndex)); + auto *compareVal = doExpr(expr->getArg(compareValIndex)); // If offset is present in .SampleCmp(), it will be the fourth argument. SpirvInstruction *constOffset = nullptr, *varOffset = nullptr; // Subtract 1 for clamp (if it exists), 1 for status (if it exists), // and 3 for sampler_state, location, and compare_value. - const bool hasOffsetArg = numArgs - hasStatusArg - hasClampArg - 3 > 0; + const bool hasOffsetArg = + numArgs - hasStatusArg - hasClampArg - offsetIndex > 0; if (hasOffsetArg) - handleOffsetInMethodCall(expr, 3, &constOffset, &varOffset); + handleOffsetInMethodCall(expr, offsetIndex, &constOffset, &varOffset); const auto retType = expr->getDirectCallee()->getReturnType(); - const auto imageType = imageExpr->getType(); addDerivativeGroupExecutionMode(); @@ -6160,6 +6229,8 @@ SpirvEmitter::processTextureSampleCmpBias(const CXXMemberCallExpr *expr) { // [, float Clamp] // [, out uint Status] // ); + // SampledTexture variants have the same signature without the + // sampler_state parameter. // // For TextureCube and TextureCubeArray: // float Object.SampleCmpBias( @@ -6173,21 +6244,41 @@ SpirvEmitter::processTextureSampleCmpBias(const CXXMemberCallExpr *expr) { const auto *imageExpr = expr->getImplicitObjectArgument(); auto *image = loadIfGLValue(imageExpr); + const auto imageType = imageExpr->getType(); + const bool isImageSampledTexture = isSampledTexture(imageType); - auto *sampler = doExpr(expr->getArg(0)); - auto *coordinate = doExpr(expr->getArg(1)); - auto *compareVal = doExpr(expr->getArg(2)); - auto *bias = doExpr(expr->getArg(3)); + int samplerIndex, coordinateIndex, compareValIndex, biasIndex, offsetIndex, + clampIndex; + if (isImageSampledTexture) { + samplerIndex = -1; // non-existant + coordinateIndex = 0; + compareValIndex = 1; + biasIndex = 2; + offsetIndex = 3; + clampIndex = 4; + } else { + samplerIndex = 0; + coordinateIndex = 1; + compareValIndex = 2; + biasIndex = 3; + offsetIndex = 4; + clampIndex = 5; + } + + auto *sampler = + samplerIndex < 0 ? nullptr : doExpr(expr->getArg(samplerIndex)); + auto *coordinate = doExpr(expr->getArg(coordinateIndex)); + auto *compareVal = doExpr(expr->getArg(compareValIndex)); + auto *bias = doExpr(expr->getArg(biasIndex)); SpirvInstruction *constOffset = nullptr, *varOffset = nullptr; SpirvInstruction *clamp = nullptr; SpirvInstruction *status = nullptr; - handleOptionalTextureSampleArgs(expr, 4, &constOffset, &varOffset, &clamp, - &status); + handleOptionalTextureSampleArgs(expr, offsetIndex, &constOffset, &varOffset, + &clamp, &status); const auto retType = expr->getDirectCallee()->getReturnType(); - const auto imageType = imageExpr->getType(); addDerivativeGroupExecutionMode(); @@ -6210,6 +6301,8 @@ SpirvEmitter::processTextureSampleCmpGrad(const CXXMemberCallExpr *expr) { // [, int Offset] // [, float Clamp] // [, out uint Status]); + // Their SampledTexture variants have the same signature without the + // sampler_state parameter. // // For TextureCube and TextureCubeArray: // DXGI_FORMAT Object.SampleGrad(sampler_state S, @@ -6223,19 +6316,41 @@ SpirvEmitter::processTextureSampleCmpGrad(const CXXMemberCallExpr *expr) { const auto *imageExpr = expr->getImplicitObjectArgument(); const QualType imageType = imageExpr->getType(); auto *image = loadIfGLValue(imageExpr); + const bool isImageSampledTexture = isSampledTexture(imageType); - auto *sampler = doExpr(expr->getArg(0)); - auto *coordinate = doExpr(expr->getArg(1)); - auto *compareVal = doExpr(expr->getArg(2)); - auto *ddx = doExpr(expr->getArg(3)); - auto *ddy = doExpr(expr->getArg(4)); + int samplerIndex, coordinateIndex, compareValIndex, ddxIndex, ddyIndex, + offsetIndex, clampIndex; + if (isImageSampledTexture) { + samplerIndex = -1; // non-existant + coordinateIndex = 0; + compareValIndex = 1; + ddxIndex = 2; + ddyIndex = 3; + offsetIndex = 4; + clampIndex = 5; + } else { + samplerIndex = 0; + coordinateIndex = 1; + compareValIndex = 2; + ddxIndex = 3; + ddyIndex = 4; + offsetIndex = 5; + clampIndex = 6; + } + + auto *sampler = + samplerIndex < 0 ? nullptr : doExpr(expr->getArg(samplerIndex)); + auto *coordinate = doExpr(expr->getArg(coordinateIndex)); + auto *compareVal = doExpr(expr->getArg(compareValIndex)); + auto *ddx = doExpr(expr->getArg(ddxIndex)); + auto *ddy = doExpr(expr->getArg(ddyIndex)); SpirvInstruction *constOffset = nullptr, *varOffset = nullptr; SpirvInstruction *clamp = nullptr; SpirvInstruction *status = nullptr; - handleOptionalTextureSampleArgs(expr, 5, &constOffset, &varOffset, &clamp, - &status); + handleOptionalTextureSampleArgs(expr, offsetIndex, &constOffset, &varOffset, + &clamp, &status); const auto retType = expr->getDirectCallee()->getReturnType(); return createImageSample( @@ -6265,6 +6380,9 @@ SpirvEmitter::processTextureSampleCmpLevelZero(const CXXMemberCallExpr *expr) { // [, out uint Status] // ); // + // SampledTexture variants have the same signature without the + // sampler_state parameter. + // // For TextureCube and TextureCubeArray: // float Object.SampleCmpLevelZero( // SamplerComparisonState S, @@ -6279,21 +6397,37 @@ SpirvEmitter::processTextureSampleCmpLevelZero(const CXXMemberCallExpr *expr) { auto *status = hasStatusArg ? doExpr(expr->getArg(numArgs - 1)) : nullptr; const auto *imageExpr = expr->getImplicitObjectArgument(); + const auto imageType = imageExpr->getType(); + const bool isImageSampledTexture = isSampledTexture(imageType); + + int samplerIndex, coordIndex, compareValIndex, offsetIndex; + if (isImageSampledTexture) { + samplerIndex = -1; // non-existant + coordIndex = 0; + compareValIndex = 1; + offsetIndex = 2; + } else { + samplerIndex = 0; + coordIndex = 1; + compareValIndex = 2; + offsetIndex = 3; + } + auto *image = loadIfGLValue(imageExpr); - auto *sampler = doExpr(expr->getArg(0)); - auto *coordinate = doExpr(expr->getArg(1)); - auto *compareVal = doExpr(expr->getArg(2)); + auto *sampler = + samplerIndex < 0 ? nullptr : doExpr(expr->getArg(samplerIndex)); + auto *coordinate = doExpr(expr->getArg(coordIndex)); + auto *compareVal = doExpr(expr->getArg(compareValIndex)); auto *lod = spvBuilder.getConstantFloat(astContext.FloatTy, llvm::APFloat(0.0f)); // If offset is present in .SampleCmp(), it will be the fourth argument. SpirvInstruction *constOffset = nullptr, *varOffset = nullptr; - const bool hasOffsetArg = numArgs - hasStatusArg - 3 > 0; + const bool hasOffsetArg = numArgs - hasStatusArg - offsetIndex > 0; if (hasOffsetArg) - handleOffsetInMethodCall(expr, 3, &constOffset, &varOffset); + handleOffsetInMethodCall(expr, offsetIndex, &constOffset, &varOffset); const auto retType = expr->getDirectCallee()->getReturnType(); - const auto imageType = imageExpr->getType(); return createImageSample( retType, imageType, image, sampler, coordinate, compareVal, @@ -6317,6 +6451,8 @@ SpirvEmitter::processTextureSampleCmpLevel(const CXXMemberCallExpr *expr) { // [, int Offset] // [, out uint Status] // ); + // SampledTexture variants have the same signature without the + // sampler_state parameter. // // For TextureCube and TextureCubeArray: // float Object.SampleCmpLevel( @@ -6333,20 +6469,38 @@ SpirvEmitter::processTextureSampleCmpLevel(const CXXMemberCallExpr *expr) { auto *status = hasStatusArg ? doExpr(expr->getArg(numArgs - 1)) : nullptr; const auto *imageExpr = expr->getImplicitObjectArgument(); + const auto imageType = imageExpr->getType(); + const bool isImageSampledTexture = isSampledTexture(imageType); + + int samplerIndex, coordIndex, compareValIndex, lodIndex, offsetIndex; + if (isImageSampledTexture) { + samplerIndex = -1; // non-existant + coordIndex = 0; + compareValIndex = 1; + lodIndex = 2; + offsetIndex = 3; + } else { + samplerIndex = 0; + coordIndex = 1; + compareValIndex = 2; + lodIndex = 3; + offsetIndex = 4; + } + auto *image = loadIfGLValue(imageExpr); - auto *sampler = doExpr(expr->getArg(0)); - auto *coordinate = doExpr(expr->getArg(1)); - auto *compareVal = doExpr(expr->getArg(2)); - auto *lod = doExpr(expr->getArg(3)); + auto *sampler = + samplerIndex < 0 ? nullptr : doExpr(expr->getArg(samplerIndex)); + auto *coordinate = doExpr(expr->getArg(coordIndex)); + auto *compareVal = doExpr(expr->getArg(compareValIndex)); + auto *lod = doExpr(expr->getArg(lodIndex)); // If offset is present in .SampleCmp(), it will be the fourth argument. SpirvInstruction *constOffset = nullptr, *varOffset = nullptr; - const bool hasOffsetArg = numArgs - hasStatusArg - 4 > 0; + const bool hasOffsetArg = numArgs - hasStatusArg - offsetIndex > 0; if (hasOffsetArg) - handleOffsetInMethodCall(expr, 4, &constOffset, &varOffset); + handleOffsetInMethodCall(expr, offsetIndex, &constOffset, &varOffset); const auto retType = expr->getDirectCallee()->getReturnType(); - const auto imageType = imageExpr->getType(); return createImageSample( retType, imageType, image, sampler, coordinate, compareVal, diff --git a/tools/clang/test/CodeGenSPIRV/vk.sampledtexture.cmp-level.hlsl b/tools/clang/test/CodeGenSPIRV/vk.sampledtexture.cmp-level.hlsl new file mode 100644 index 0000000000..4dd8dec8af --- /dev/null +++ b/tools/clang/test/CodeGenSPIRV/vk.sampledtexture.cmp-level.hlsl @@ -0,0 +1,32 @@ +// RUN: %dxc -T ps_6_0 -E main -fcgl %s -spirv | FileCheck %s + +// CHECK: OpCapability SparseResidency + +// CHECK: [[v2fc:%[0-9]+]] = OpConstantComposite %v2float %float_0_5 %float_0_25 +// CHECK: [[v2ic:%[0-9]+]] = OpConstantComposite %v2int %int_2 %int_3 + +// CHECK: [[type_2d_image_1:%[a-zA-Z0-9_]+]] = OpTypeImage %float 2D 0 0 0 1 Unknown +// CHECK: [[type_sampled_image_1:%[a-zA-Z0-9_]+]] = OpTypeSampledImage [[type_2d_image_1]] +// CHECK: [[ptr_type_1:%[a-zA-Z0-9_]+]] = OpTypePointer UniformConstant [[type_sampled_image_1]] + +// CHECK: [[tex1:%[a-zA-Z0-9_]+]] = OpVariable [[ptr_type_1]] UniformConstant + +vk::SampledTexture2D tex1 : register(t0); + +float4 main() : SV_Target { +// CHECK: [[tex1_load:%[a-zA-Z0-9_]+]] = OpLoad [[type_sampled_image_1]] [[tex1]] +// CHECK: [[sampled_result1:%[a-zA-Z0-9_]+]] = OpImageSampleDrefExplicitLod %float [[tex1_load]] [[v2fc]] %float_2 Lod %float_1 + float val1 = tex1.SampleCmpLevel(float2(0.5, 0.25), 2.0f, 1.0f); + +// CHECK: [[tex2_load:%[a-zA-Z0-9_]+]] = OpLoad [[type_sampled_image_1]] [[tex1]] +// CHECK: [[sampled_result2:%[a-zA-Z0-9_]+]] = OpImageSampleDrefExplicitLod %float [[tex2_load]] [[v2fc]] %float_2 Lod|ConstOffset %float_1 [[v2ic]] + float val2 = tex1.SampleCmpLevel(float2(0.5, 0.25), 2.0f, 1.0f, int2(2,3)); + +// CHECK: [[tex3_load:%[a-zA-Z0-9_]+]] = OpLoad [[type_sampled_image_1]] [[tex1]] +// CHECK: [[sampled_result3:%[a-zA-Z0-9_]+]] = OpImageSparseSampleDrefExplicitLod %SparseResidencyStruct [[tex3_load]] [[v2fc]] %float_2 Lod|ConstOffset %float_1 [[v2ic]] +// CHECK: [[status_0:%[a-zA-Z0-9_]+]] = OpCompositeExtract %uint [[sampled_result3]] 0 +// CHECK: OpStore %status [[status_0]] + uint status; + float val3 = tex1.SampleCmpLevel(float2(0.5, 0.25), 2.0f, 1.0f, int2(2,3), status); + return 1.0; +} diff --git a/tools/clang/test/CodeGenSPIRV/vk.sampledtexture.sample-bias.hlsl b/tools/clang/test/CodeGenSPIRV/vk.sampledtexture.sample-bias.hlsl new file mode 100644 index 0000000000..a233394f6e --- /dev/null +++ b/tools/clang/test/CodeGenSPIRV/vk.sampledtexture.sample-bias.hlsl @@ -0,0 +1,33 @@ +// RUN: %dxc -T ps_6_0 -E main -fcgl %s -spirv | FileCheck %s + +// CHECK: OpCapability MinLod +// CHECK: OpCapability SparseResidency + +// CHECK: [[v2fc:%[0-9]+]] = OpConstantComposite %v2float %float_0_5 %float_0_25 +// CHECK: [[v2ic:%[0-9]+]] = OpConstantComposite %v2int %int_2 %int_3 + +// CHECK: [[type_2d_image_1:%[a-zA-Z0-9_]+]] = OpTypeImage %float 2D 0 0 0 1 Unknown +// CHECK: [[type_sampled_image_1:%[a-zA-Z0-9_]+]] = OpTypeSampledImage [[type_2d_image_1]] +// CHECK: [[ptr_type_1:%[a-zA-Z0-9_]+]] = OpTypePointer UniformConstant [[type_sampled_image_1]] + +// CHECK: [[tex1:%[a-zA-Z0-9_]+]] = OpVariable [[ptr_type_1]] UniformConstant + +vk::SampledTexture2D tex1 : register(t0); + +float4 main() : SV_Target { +// CHECK: [[tex1_load:%[a-zA-Z0-9_]+]] = OpLoad [[type_sampled_image_1]] [[tex1]] +// CHECK: [[sampled_result1:%[a-zA-Z0-9_]+]] = OpImageSampleImplicitLod %v4float [[tex1_load]] [[v2fc]] Bias|ConstOffset %float_0_5 [[v2ic]] + float4 val1 = tex1.SampleBias(float2(0.5, 0.25), 0.5f, int2(2, 3)); + +// CHECK: [[tex2_load:%[a-zA-Z0-9_]+]] = OpLoad [[type_sampled_image_1]] [[tex1]] +// CHECK: [[sampled_result2:%[a-zA-Z0-9_]+]] = OpImageSampleImplicitLod %v4float [[tex2_load]] [[v2fc]] Bias|ConstOffset|MinLod %float_0_5 [[v2ic]] %float_2_5 + float4 val2 = tex1.SampleBias(float2(0.5, 0.25), 0.5f, int2(2, 3), 2.5f); + +// CHECK: [[tex3_load:%[a-zA-Z0-9_]+]] = OpLoad [[type_sampled_image_1]] [[tex1]] +// CHECK: [[sampled_result3:%[a-zA-Z0-9_]+]] = OpImageSparseSampleImplicitLod %SparseResidencyStruct [[tex3_load]] [[v2fc]] Bias|ConstOffset|MinLod %float_0_5 [[v2ic]] %float_2_5 +// CHECK: [[status_0:%[a-zA-Z0-9_]+]] = OpCompositeExtract %uint [[sampled_result3]] 0 +// CHECK: OpStore %status [[status_0]] + uint status; + float4 val3 = tex1.SampleBias(float2(0.5, 0.25), 0.5f, int2(2, 3), 2.5f, status); + return 1.0; +} diff --git a/tools/clang/test/CodeGenSPIRV/vk.sampledtexture.sample-cmp-bias.hlsl b/tools/clang/test/CodeGenSPIRV/vk.sampledtexture.sample-cmp-bias.hlsl new file mode 100644 index 0000000000..e302d88a50 --- /dev/null +++ b/tools/clang/test/CodeGenSPIRV/vk.sampledtexture.sample-cmp-bias.hlsl @@ -0,0 +1,33 @@ +// RUN: %dxc -T ps_6_0 -E main -fcgl %s -spirv | FileCheck %s + +// CHECK: OpCapability MinLod +// CHECK: OpCapability SparseResidency + +// CHECK: [[v2fc:%[0-9]+]] = OpConstantComposite %v2float %float_0_5 %float_0_25 +// CHECK: [[v2ic:%[0-9]+]] = OpConstantComposite %v2int %int_2 %int_3 + +// CHECK: [[type_2d_image_1:%[a-zA-Z0-9_]+]] = OpTypeImage %float 2D 0 0 0 1 Unknown +// CHECK: [[type_sampled_image_1:%[a-zA-Z0-9_]+]] = OpTypeSampledImage [[type_2d_image_1]] +// CHECK: [[ptr_type_1:%[a-zA-Z0-9_]+]] = OpTypePointer UniformConstant [[type_sampled_image_1]] + +// CHECK: [[tex1:%[a-zA-Z0-9_]+]] = OpVariable [[ptr_type_1]] UniformConstant + +vk::SampledTexture2D tex1 : register(t0); + +float4 main() : SV_Target { +// CHECK: [[tex1_load:%[a-zA-Z0-9_]+]] = OpLoad [[type_sampled_image_1]] [[tex1]] +// CHECK: [[sampled_result1:%[a-zA-Z0-9_]+]] = OpImageSampleDrefImplicitLod %float [[tex1_load]] [[v2fc]] %float_1 Bias|ConstOffset %float_0_5 [[v2ic]] + float val1 = tex1.SampleCmpBias(float2(0.5, 0.25), 1.0f, 0.5f, int2(2, 3)); + +// CHECK: [[tex2_load:%[a-zA-Z0-9_]+]] = OpLoad [[type_sampled_image_1]] [[tex1]] +// CHECK: [[sampled_result2:%[a-zA-Z0-9_]+]] = OpImageSampleDrefImplicitLod %float [[tex2_load]] [[v2fc]] %float_1 Bias|ConstOffset|MinLod %float_0_5 [[v2ic]] %float_2_5 + float val2 = tex1.SampleCmpBias(float2(0.5, 0.25), 1.0f, 0.5f, int2(2, 3), 2.5f); + +// CHECK: [[tex3_load:%[a-zA-Z0-9_]+]] = OpLoad [[type_sampled_image_1]] [[tex1]] +// CHECK: [[sampled_result3:%[a-zA-Z0-9_]+]] = OpImageSparseSampleDrefImplicitLod %SparseResidencyStruct [[tex3_load]] [[v2fc]] %float_1 Bias|ConstOffset|MinLod %float_0_5 [[v2ic]] %float_2_5 +// CHECK: [[status_0:%[a-zA-Z0-9_]+]] = OpCompositeExtract %uint [[sampled_result3]] 0 +// CHECK: OpStore %status [[status_0]] + uint status; + float val3 = tex1.SampleCmpBias(float2(0.5, 0.25), 1.0f, 0.5f, int2(2, 3), 2.5f, status); + return 1.0; +} diff --git a/tools/clang/test/CodeGenSPIRV/vk.sampledtexture.sample-cmp-grad.hlsl b/tools/clang/test/CodeGenSPIRV/vk.sampledtexture.sample-cmp-grad.hlsl new file mode 100644 index 0000000000..4acca8a42e --- /dev/null +++ b/tools/clang/test/CodeGenSPIRV/vk.sampledtexture.sample-cmp-grad.hlsl @@ -0,0 +1,39 @@ +// RUN: %dxc -T ps_6_0 -E main -fcgl %s -spirv | FileCheck %s + +// CHECK: OpCapability MinLod +// CHECK: OpCapability SparseResidency + +// CHECK: [[v2fc:%[0-9]+]] = OpConstantComposite %v2float %float_0_5 %float_0_25 +// CHECK: [[v2f_1:%[0-9]+]] = OpConstantComposite %v2float %float_1 %float_1 +// CHECK: [[v2f_2:%[0-9]+]] = OpConstantComposite %v2float %float_2 %float_2 +// CHECK: [[v2ic:%[0-9]+]] = OpConstantComposite %v2int %int_2 %int_3 + +// CHECK: [[type_2d_image_1:%[a-zA-Z0-9_]+]] = OpTypeImage %float 2D 0 0 0 1 Unknown +// CHECK: [[type_sampled_image_1:%[a-zA-Z0-9_]+]] = OpTypeSampledImage [[type_2d_image_1]] +// CHECK: [[ptr_type_1:%[a-zA-Z0-9_]+]] = OpTypePointer UniformConstant [[type_sampled_image_1]] + +// CHECK: [[tex1:%[a-zA-Z0-9_]+]] = OpVariable [[ptr_type_1]] UniformConstant + +vk::SampledTexture2D tex1 : register(t0); + +float4 main() : SV_Target { +// CHECK: [[tex1_load:%[a-zA-Z0-9_]+]] = OpLoad [[type_sampled_image_1]] [[tex1]] +// CHECK: [[sampled_result1:%[a-zA-Z0-9_]+]] = OpImageSampleDrefExplicitLod %float [[tex1_load]] [[v2fc]] %float_1 Grad [[v2f_1]] [[v2f_2]] + float val1 = tex1.SampleCmpGrad(float2(0.5, 0.25), 1.0f, float2(1, 1), float2(2, 2)); + +// CHECK: [[tex2_load:%[a-zA-Z0-9_]+]] = OpLoad [[type_sampled_image_1]] [[tex1]] +// CHECK: [[sampled_result2:%[a-zA-Z0-9_]+]] = OpImageSampleDrefExplicitLod %float [[tex2_load]] [[v2fc]] %float_1 Grad|ConstOffset [[v2f_1]] [[v2f_2]] [[v2ic]] + float val2 = tex1.SampleCmpGrad(float2(0.5, 0.25), 1.0f, float2(1, 1), float2(2, 2), int2(2,3)); + +// CHECK: [[tex3_load:%[a-zA-Z0-9_]+]] = OpLoad [[type_sampled_image_1]] [[tex1]] +// CHECK: [[sampled_result3:%[a-zA-Z0-9_]+]] = OpImageSampleDrefExplicitLod %float [[tex3_load]] [[v2fc]] %float_1 Grad|ConstOffset|MinLod [[v2f_1]] [[v2f_2]] [[v2ic]] %float_0_5 + float val3 = tex1.SampleCmpGrad(float2(0.5, 0.25), 1.0f, float2(1, 1), float2(2, 2), int2(2,3), 0.5); + +// CHECK: [[tex4_load:%[a-zA-Z0-9_]+]] = OpLoad [[type_sampled_image_1]] [[tex1]] +// CHECK: [[sampled_result4:%[a-zA-Z0-9_]+]] = OpImageSparseSampleDrefExplicitLod %SparseResidencyStruct [[tex4_load]] [[v2fc]] %float_1 Grad|ConstOffset|MinLod [[v2f_1]] [[v2f_2]] [[v2ic]] %float_0_5 +// CHECK: [[status_0:%[a-zA-Z0-9_]+]] = OpCompositeExtract %uint [[sampled_result4]] 0 +// CHECK: OpStore %status [[status_0]] + uint status; + float val4 = tex1.SampleCmpGrad(float2(0.5, 0.25), 1.0f, float2(1, 1), float2(2, 2), int2(2,3), 0.5, status); + return 1.0; +} diff --git a/tools/clang/test/CodeGenSPIRV/vk.sampledtexture.sample-cmp-level-zero.hlsl b/tools/clang/test/CodeGenSPIRV/vk.sampledtexture.sample-cmp-level-zero.hlsl new file mode 100644 index 0000000000..b7475a7fca --- /dev/null +++ b/tools/clang/test/CodeGenSPIRV/vk.sampledtexture.sample-cmp-level-zero.hlsl @@ -0,0 +1,32 @@ +// RUN: %dxc -T ps_6_0 -E main -fcgl %s -spirv | FileCheck %s + +// CHECK: OpCapability SparseResidency + +// CHECK: [[v2fc:%[0-9]+]] = OpConstantComposite %v2float %float_0_5 %float_0_25 +// CHECK: [[v2ic:%[0-9]+]] = OpConstantComposite %v2int %int_2 %int_3 + +// CHECK: [[type_2d_image_1:%[a-zA-Z0-9_]+]] = OpTypeImage %float 2D 0 0 0 1 Unknown +// CHECK: [[type_sampled_image_1:%[a-zA-Z0-9_]+]] = OpTypeSampledImage [[type_2d_image_1]] +// CHECK: [[ptr_type_1:%[a-zA-Z0-9_]+]] = OpTypePointer UniformConstant [[type_sampled_image_1]] + +// CHECK: [[tex1:%[a-zA-Z0-9_]+]] = OpVariable [[ptr_type_1]] UniformConstant + +vk::SampledTexture2D tex1 : register(t0); + +float4 main() : SV_Target { +// CHECK: [[tex1_load:%[a-zA-Z0-9_]+]] = OpLoad [[type_sampled_image_1]] [[tex1]] +// CHECK: [[sampled_result1:%[a-zA-Z0-9_]+]] = OpImageSampleDrefExplicitLod %float [[tex1_load]] [[v2fc]] %float_2 Lod %float_0 + float val1 = tex1.SampleCmpLevelZero(float2(0.5, 0.25), 2.0f); + +// CHECK: [[tex2_load:%[a-zA-Z0-9_]+]] = OpLoad [[type_sampled_image_1]] [[tex1]] +// CHECK: [[sampled_result2:%[a-zA-Z0-9_]+]] = OpImageSampleDrefExplicitLod %float [[tex2_load]] [[v2fc]] %float_2 Lod|ConstOffset %float_0 [[v2ic]] + float val2 = tex1.SampleCmpLevelZero(float2(0.5, 0.25), 2.0f, int2(2,3)); + +// CHECK: [[tex3_load:%[a-zA-Z0-9_]+]] = OpLoad [[type_sampled_image_1]] [[tex1]] +// CHECK: [[sampled_result3:%[a-zA-Z0-9_]+]] = OpImageSparseSampleDrefExplicitLod %SparseResidencyStruct [[tex3_load]] [[v2fc]] %float_2 Lod|ConstOffset %float_0 [[v2ic]] +// CHECK: [[status_0:%[a-zA-Z0-9_]+]] = OpCompositeExtract %uint [[sampled_result3]] 0 +// CHECK: OpStore %status [[status_0]] + uint status; + float val3 = tex1.SampleCmpLevelZero(float2(0.5, 0.25), 2.0f, int2(2,3), status); + return 1.0; +} diff --git a/tools/clang/test/CodeGenSPIRV/vk.sampledtexture.sample-cmp.hlsl b/tools/clang/test/CodeGenSPIRV/vk.sampledtexture.sample-cmp.hlsl new file mode 100644 index 0000000000..3fc5798b09 --- /dev/null +++ b/tools/clang/test/CodeGenSPIRV/vk.sampledtexture.sample-cmp.hlsl @@ -0,0 +1,37 @@ +// RUN: %dxc -T ps_6_0 -E main -fcgl %s -spirv | FileCheck %s + +// CHECK: OpCapability MinLod +// CHECK: OpCapability SparseResidency + +// CHECK: [[v2fc:%[0-9]+]] = OpConstantComposite %v2float %float_0_5 %float_0_25 +// CHECK: [[v2ic:%[0-9]+]] = OpConstantComposite %v2int %int_2 %int_3 + +// CHECK: [[type_2d_image_1:%[a-zA-Z0-9_]+]] = OpTypeImage %float 2D 0 0 0 1 Unknown +// CHECK: [[type_sampled_image_1:%[a-zA-Z0-9_]+]] = OpTypeSampledImage [[type_2d_image_1]] +// CHECK: [[ptr_type_1:%[a-zA-Z0-9_]+]] = OpTypePointer UniformConstant [[type_sampled_image_1]] + +// CHECK: [[tex1:%[a-zA-Z0-9_]+]] = OpVariable [[ptr_type_1]] UniformConstant + +vk::SampledTexture2D tex1 : register(t0); + +float4 main() : SV_Target { +// CHECK: [[tex1_load:%[a-zA-Z0-9_]+]] = OpLoad [[type_sampled_image_1]] [[tex1]] +// CHECK: [[sampled_result1:%[a-zA-Z0-9_]+]] = OpImageSampleDrefImplicitLod %float [[tex1_load]] [[v2fc]] %float_2 + float val1 = tex1.SampleCmp(float2(0.5, 0.25), 2.0f); + +// CHECK: [[tex2_load:%[a-zA-Z0-9_]+]] = OpLoad [[type_sampled_image_1]] [[tex1]] +// CHECK: [[sampled_result2:%[a-zA-Z0-9_]+]] = OpImageSampleDrefImplicitLod %float [[tex2_load]] [[v2fc]] %float_2 ConstOffset [[v2ic]] + float val2 = tex1.SampleCmp(float2(0.5, 0.25), 2.0f, int2(2,3)); + +// CHECK: [[tex3_load:%[a-zA-Z0-9_]+]] = OpLoad [[type_sampled_image_1]] [[tex1]] +// CHECK: [[sampled_result3:%[a-zA-Z0-9_]+]] = OpImageSampleDrefImplicitLod %float [[tex3_load]] [[v2fc]] %float_2 ConstOffset|MinLod [[v2ic]] %float_0_5 + float val3 = tex1.SampleCmp(float2(0.5, 0.25), 2.0f, int2(2,3), 0.5); + +// CHECK: [[tex4_load:%[a-zA-Z0-9_]+]] = OpLoad [[type_sampled_image_1]] [[tex1]] +// CHECK: [[sampled_result4:%[a-zA-Z0-9_]+]] = OpImageSparseSampleDrefImplicitLod %SparseResidencyStruct [[tex4_load]] [[v2fc]] %float_2 ConstOffset|MinLod [[v2ic]] %float_0_5 +// CHECK: [[status_0:%[a-zA-Z0-9_]+]] = OpCompositeExtract %uint [[sampled_result4]] 0 +// CHECK: OpStore %status [[status_0]] + uint status; + float val4 = tex1.SampleCmp(float2(0.5, 0.25), 2.0f, int2(2,3), 0.5, status); + return 1.0; +} diff --git a/tools/clang/test/CodeGenSPIRV/vk.sampledtexture.sample-grad.hlsl b/tools/clang/test/CodeGenSPIRV/vk.sampledtexture.sample-grad.hlsl new file mode 100644 index 0000000000..829e148c02 --- /dev/null +++ b/tools/clang/test/CodeGenSPIRV/vk.sampledtexture.sample-grad.hlsl @@ -0,0 +1,38 @@ +// RUN: %dxc -T ps_6_0 -E main -fcgl %s -spirv | FileCheck %s + +// CHECK: OpCapability MinLod +// CHECK: OpCapability SparseResidency + +// CHECK: [[v2fc:%[0-9]+]] = OpConstantComposite %v2float %float_0_5 %float_0_25 +// CHECK: [[v2f_1:%[0-9]+]] = OpConstantComposite %v2float %float_1 %float_1 +// CHECK: [[v2f_2:%[0-9]+]] = OpConstantComposite %v2float %float_2 %float_2 +// CHECK: [[v2ic:%[0-9]+]] = OpConstantComposite %v2int %int_2 %int_3 + +// CHECK: [[type_2d_image_1:%[a-zA-Z0-9_]+]] = OpTypeImage %float 2D 0 0 0 1 Unknown +// CHECK: [[type_sampled_image_1:%[a-zA-Z0-9_]+]] = OpTypeSampledImage [[type_2d_image_1]] +// CHECK: [[ptr_type_1:%[a-zA-Z0-9_]+]] = OpTypePointer UniformConstant [[type_sampled_image_1]] + +// CHECK: [[tex1:%[a-zA-Z0-9_]+]] = OpVariable [[ptr_type_1]] UniformConstant + +vk::SampledTexture2D tex1 : register(t0); + +float4 main() : SV_Target { +// CHECK: [[tex1_load:%[a-zA-Z0-9_]+]] = OpLoad [[type_sampled_image_1]] [[tex1]] +// CHECK: [[sampled_result1:%[a-zA-Z0-9_]+]] = OpImageSampleExplicitLod %v4float [[tex1_load]] [[v2fc]] Grad [[v2f_1]] [[v2f_2]] + float4 val1 = tex1.SampleGrad(float2(0.5, 0.25), float2(1, 1), float2(2, 2)); + +// CHECK: [[tex2_load:%[a-zA-Z0-9_]+]] = OpLoad [[type_sampled_image_1]] [[tex1]] +// CHECK: [[sampled_result2:%[a-zA-Z0-9_]+]] = OpImageSampleExplicitLod %v4float [[tex2_load]] [[v2fc]] Grad|ConstOffset [[v2f_1]] [[v2f_2]] [[v2ic]] + float4 val2 = tex1.SampleGrad(float2(0.5, 0.25), float2(1, 1), float2(2, 2), int2(2,3)); + +// CHECK: [[tex3_load:%[a-zA-Z0-9_]+]] = OpLoad [[type_sampled_image_1]] [[tex1]] +// CHECK: [[sampled_result3:%[a-zA-Z0-9_]+]] = OpImageSampleExplicitLod %v4float [[tex3_load]] [[v2fc]] Grad|ConstOffset|MinLod [[v2f_1]] [[v2f_2]] [[v2ic]] %float_0_5 + float4 val3 = tex1.SampleGrad(float2(0.5, 0.25), float2(1, 1), float2(2, 2), int2(2,3), 0.5); +// CHECK: [[tex4_load:%[a-zA-Z0-9_]+]] = OpLoad [[type_sampled_image_1]] [[tex1]] +// CHECK: [[sampled_result4:%[a-zA-Z0-9_]+]] = OpImageSparseSampleExplicitLod %SparseResidencyStruct [[tex4_load]] [[v2fc]] Grad|ConstOffset|MinLod [[v2f_1]] [[v2f_2]] [[v2ic]] %float_0_5 +// CHECK: [[status_0:%[a-zA-Z0-9_]+]] = OpCompositeExtract %uint [[sampled_result4]] 0 +// CHECK: OpStore %status [[status_0]] + uint status; + float4 val4 = tex1.SampleGrad(float2(0.5, 0.25), float2(1, 1), float2(2, 2), int2(2,3), 0.5, status); + return 1.0; +} diff --git a/tools/clang/test/CodeGenSPIRV/vk.sampledtexture.sample-level.hlsl b/tools/clang/test/CodeGenSPIRV/vk.sampledtexture.sample-level.hlsl new file mode 100644 index 0000000000..a0c885b9ed --- /dev/null +++ b/tools/clang/test/CodeGenSPIRV/vk.sampledtexture.sample-level.hlsl @@ -0,0 +1,32 @@ +// RUN: %dxc -T ps_6_0 -E main -fcgl %s -spirv | FileCheck %s + +// CHECK: OpCapability SparseResidency + +// CHECK: [[v2fc:%[0-9]+]] = OpConstantComposite %v2float %float_0_5 %float_0_25 +// CHECK: [[v2ic:%[0-9]+]] = OpConstantComposite %v2int %int_2 %int_3 + +// CHECK: [[type_2d_image_1:%[a-zA-Z0-9_]+]] = OpTypeImage %float 2D 0 0 0 1 Unknown +// CHECK: [[type_sampled_image_1:%[a-zA-Z0-9_]+]] = OpTypeSampledImage [[type_2d_image_1]] +// CHECK: [[ptr_type_1:%[a-zA-Z0-9_]+]] = OpTypePointer UniformConstant [[type_sampled_image_1]] + +// CHECK: [[tex1:%[a-zA-Z0-9_]+]] = OpVariable [[ptr_type_1]] UniformConstant + +vk::SampledTexture2D tex1 : register(t0); + +float4 main() : SV_Target { +// CHECK: [[tex1_load:%[a-zA-Z0-9_]+]] = OpLoad [[type_sampled_image_1]] [[tex1]] +// CHECK: [[sampled_result1:%[a-zA-Z0-9_]+]] = OpImageSampleExplicitLod %v4float [[tex1_load]] [[v2fc]] Lod|ConstOffset %float_0_5 [[v2ic]] + float4 val1 = tex1.SampleLevel(float2(0.5, 0.25), 0.5f, int2(2, 3)); + +// CHECK: [[tex2_load:%[a-zA-Z0-9_]+]] = OpLoad [[type_sampled_image_1]] [[tex1]] +// CHECK: [[sampled_result2:%[a-zA-Z0-9_]+]] = OpImageSampleExplicitLod %v4float [[tex2_load]] [[v2fc]] Lod|ConstOffset %float_0_5 [[v2ic]] + float4 val2 = tex1.SampleLevel(float2(0.5, 0.25), 0.5f, int2(2, 3)); + +// CHECK: [[tex3_load:%[a-zA-Z0-9_]+]] = OpLoad [[type_sampled_image_1]] [[tex1]] +// CHECK: [[sampled_result3:%[a-zA-Z0-9_]+]] = OpImageSparseSampleExplicitLod %SparseResidencyStruct [[tex3_load]] [[v2fc]] Lod|ConstOffset %float_0_5 [[v2ic]] +// CHECK: [[status_0:%[a-zA-Z0-9_]+]] = OpCompositeExtract %uint [[sampled_result3]] 0 +// CHECK: OpStore %status [[status_0]] + uint status; + float4 val3 = tex1.SampleLevel(float2(0.5, 0.25), 0.5f, int2(2, 3), status); + return 1.0; +} diff --git a/utils/hct/gen_intrin_main.txt b/utils/hct/gen_intrin_main.txt index dc0be344aa..42c2d3e322 100644 --- a/utils/hct/gen_intrin_main.txt +++ b/utils/hct/gen_intrin_main.txt @@ -1250,4 +1250,33 @@ namespace VkSampledTexture2DMethods { $classT [[ro]] Load(in int<3> x) : tex2d_t_load; $classT [[ro]] Load(in int<3> x, in int<2> o) : tex2d_t_load_o; $classT [[]] Load(in int<3> x, in int<2> o, out uint_only status) : tex2d_t_load_o_s; + $classT [[ro]] SampleBias(in float<2> x, in float bias) : tex2d_t_bias; + $classT [[ro]] SampleBias(in float<2> x, in float bias, in int<2> o) : tex2d_t_bias_o; + float_like [[ro]] SampleCmp(in float<2> x, in float compareValue) : tex2d_t_comp; + float_like [[ro]] SampleCmp(in float<2> x, in float compareValue, in int<2> o) : tex2d_t_comp_o; + float_like [[ro]] SampleCmpBias(in float<2> x, in float compareValue, in float bias) : tex2d_t_comp_bias; + float_like [[ro]] SampleCmpBias(in float<2> x, in float compareValue, in float bias, in int<2> o) : tex2d_t_comp_bias_o; + float_like [[ro]] SampleCmpGrad(in float<2> x, in float compareValue, in $type1 ddx, in $type1 ddy) : tex2d_t_comp_dd; + float_like [[ro]] SampleCmpGrad(in float<2> x, in float compareValue, in $type1 ddx, in $type1 ddy, in int<2> o) : tex2d_t_comp_dd_o; + float_like [[ro]] SampleCmpLevel(in float<2> x, in float compareValue, in float lod); + float_like [[ro]] SampleCmpLevel(in float<2> x, in float compareValue, in float lod, in int<2> o); + float_like [[ro]] SampleCmpLevelZero(in float<2> x, in float compareValue) : tex2d_t_comp_lz; + float_like [[ro]] SampleCmpLevelZero(in float<2> x, in float compareValue, in int<2> o) : tex2d_t_comp_lz_o; + $classT [[ro]] SampleGrad(in float<2> x, in $type1 ddx, in $type1 ddy) : tex2d_t_dd; + $classT [[ro]] SampleGrad(in float<2> x, in $type1 ddx, in $type1 ddy, in int<2> o) : tex2d_t_dd_o; + $classT [[ro]] SampleLevel(in float<2> x, in float lod) : tex2d_t_lod; + $classT [[ro]] SampleLevel(in float<2> x, in float lod, in int<2> o) : tex2d_t_lod_o; + float_like [[ro]] SampleCmp(in float<2> x, in float compareValue, in int<2> o, in float clamp) : tex2d_t_comp_o_cl; + float_like [[]] SampleCmp(in float<2> x, in float compareValue, in int<2> o, in float clamp, out uint_only status) : tex2d_t_comp_o_cl_s; + float_like [[ro]] SampleCmpBias(in float<2> x, in float compareValue, in float bias, in int<2> o, in float clamp) : tex2d_t_comp_bias_o_cl; + float_like [[]] SampleCmpBias(in float<2> x, in float compareValue, in float bias, in int<2> o, in float clamp, out uint_only status) : tex2d_t_comp_bias_o_cl_s; + float_like [[ro]] SampleCmpGrad(in float<2> x, in float compareValue, in $type1 ddx, in $type1 ddy, in int<2> o, in float clamp) : tex2d_t_comp_dd_o_cl; + float_like [[]] SampleCmpGrad(in float<2> x, in float compareValue, in $type1 ddx, in $type1 ddy, in int<2> o, in float clamp, out uint_only status) : tex2d_t_comp_dd_o_cl_s; + float_like [[]] SampleCmpLevel(in float<2> x, in float compareValue, in float lod, in int<2> o, out uint_only status); + float_like [[]] SampleCmpLevelZero(in float<2> x, in float compareValue, in int<2> o, out uint_only status) : tex2d_t_comp_o_s; + $classT [[]] SampleLevel(in float<2> x, in float lod, in int<2> o, out uint_only status) : tex2d_t_lod_o_s; + $classT [[ro]] SampleBias(in float<2> x, in float bias, in int<2> o, in float clamp) : tex2d_t_bias_o_cl; + $classT [[]] SampleBias(in float<2> x, in float bias, in int<2> o, in float clamp, out uint_only status) : tex2d_t_bias_o_cl_s; + $classT [[ro]] SampleGrad(in float<2> x, in $type1 ddx, in $type1 ddy, in int<2> o, in float clamp) : tex2d_t_dd_o_cl; + $classT [[]] SampleGrad(in float<2> x, in $type1 ddx, in $type1 ddy, in int<2> o, in float clamp, out uint_only status) : tex2d_t_dd_o_cl_s; } namespace From 3a47aa75957e0bc98182103dbf961bbc1a657f51 Mon Sep 17 00:00:00 2001 From: luciechoi Date: Thu, 19 Feb 2026 21:24:56 +0000 Subject: [PATCH 05/13] refactor using the helper for calculating the optional argument index --- tools/clang/lib/SPIRV/SpirvEmitter.cpp | 130 +++++------------- .../texture.array.sample-bias.hlsl | 8 +- .../texture.array.sample-cmp.hlsl | 8 +- .../texture.array.sample-grad.hlsl | 8 +- .../CodeGenSPIRV/texture.array.sample.hlsl | 8 +- .../CodeGenSPIRV/texture.sample-bias.hlsl | 8 +- .../test/CodeGenSPIRV/texture.sample-cmp.hlsl | 8 +- .../CodeGenSPIRV/texture.sample-grad.hlsl | 8 +- .../test/CodeGenSPIRV/texture.sample.hlsl | 8 +- 9 files changed, 64 insertions(+), 130 deletions(-) diff --git a/tools/clang/lib/SPIRV/SpirvEmitter.cpp b/tools/clang/lib/SPIRV/SpirvEmitter.cpp index 8e5fa589a1..b67370337e 100644 --- a/tools/clang/lib/SPIRV/SpirvEmitter.cpp +++ b/tools/clang/lib/SPIRV/SpirvEmitter.cpp @@ -5838,9 +5838,11 @@ void SpirvEmitter::handleOptionalTextureSampleArgs( if (index >= numArgs) return; - - *clamp = doExpr(expr->getArg(index)); - index++; + bool hasClampArg = expr->getArg(index)->getType()->isFloatingType(); + if (hasClampArg) { + *clamp = doExpr(expr->getArg(index)); + index++; + } if (index >= numArgs) return; @@ -5883,46 +5885,32 @@ SpirvEmitter::processTextureSampleGather(const CXXMemberCallExpr *expr, const auto numArgs = expr->getNumArgs(); const auto loc = expr->getExprLoc(); const auto range = expr->getSourceRange(); - const bool hasStatusArg = - expr->getArg(numArgs - 1)->getType()->isUnsignedIntegerType(); const auto *imageExpr = expr->getImplicitObjectArgument(); const QualType imageType = imageExpr->getType(); const bool isImageSampledTexture = isSampledTexture(imageType); - int samplerIndex; - uint32_t coordIndex; + int samplerIndex, coordIndex, offsetIndex; if (isImageSampledTexture) { samplerIndex = -1; // non-existant coordIndex = 0; + offsetIndex = 1; } else { samplerIndex = 0; coordIndex = 1; + offsetIndex = 2; } - SpirvInstruction *clamp = nullptr; - if (numArgs > coordIndex + 1 && - expr->getArg(coordIndex + 1)->getType()->isFloatingType()) - clamp = doExpr(expr->getArg(coordIndex + 1)); - else if (numArgs > coordIndex + 2 && - expr->getArg(coordIndex + 2)->getType()->isFloatingType()) - clamp = doExpr(expr->getArg(coordIndex + 2)); - const bool hasClampArg = (clamp != 0); - const auto status = - hasStatusArg ? doExpr(expr->getArg(numArgs - 1)) : nullptr; - auto *image = loadIfGLValue(imageExpr); SpirvInstruction *sampler = samplerIndex >= 0 ? doExpr(expr->getArg(samplerIndex)) : nullptr; auto *coordinate = doExpr(expr->getArg(coordIndex)); - // .Sample()/.Gather() may have a third optional paramter for offset. + SpirvInstruction *constOffset = nullptr, *varOffset = nullptr; - // Subtract 1 for status (if it exists), subtract 1 for clamp (if it exists), - // and subtract offsetIndex for sampler_state (if exists) location. - const bool hasOffsetArg = - numArgs - hasStatusArg - hasClampArg - coordIndex > 1; - if (hasOffsetArg) - handleOffsetInMethodCall(expr, coordIndex + 1, &constOffset, &varOffset); + SpirvInstruction *clamp = nullptr; + SpirvInstruction *status = nullptr; + handleOptionalTextureSampleArgs(expr, offsetIndex, &constOffset, &varOffset, + &clamp, &status); const auto retType = expr->getDirectCallee()->getReturnType(); if (isSample) { @@ -5981,9 +5969,6 @@ SpirvEmitter::processTextureSampleBiasLevel(const CXXMemberCallExpr *expr, // [, out uint Status]); const auto numArgs = expr->getNumArgs(); - const bool hasStatusArg = - expr->getArg(numArgs - 1)->getType()->isUnsignedIntegerType(); - auto *status = hasStatusArg ? doExpr(expr->getArg(numArgs - 1)) : nullptr; const auto *imageExpr = expr->getImplicitObjectArgument(); const QualType imageType = imageExpr->getType(); const bool isImageSampledTexture = isSampledTexture(imageType); @@ -6001,23 +5986,6 @@ SpirvEmitter::processTextureSampleBiasLevel(const CXXMemberCallExpr *expr, offsetIndex = 3; } - SpirvInstruction *clamp = nullptr; - // The .SampleLevel() methods do not take the clamp argument. - if (isBias) { - if (numArgs > offsetIndex && - expr->getArg(offsetIndex)->getType()->isFloatingType()) - clamp = doExpr(expr->getArg(offsetIndex)); - else if (numArgs > offsetIndex + 1 && - expr->getArg(offsetIndex + 1)->getType()->isFloatingType()) - clamp = doExpr(expr->getArg(offsetIndex + 1)); - } - const bool hasClampArg = clamp != nullptr; - - // Subtract 1 for clamp (if it exists), 1 for status (if it exists), - // and 3 for sampler_state, location, and Bias/LOD. - const bool hasOffsetArg = - numArgs - hasClampArg - hasStatusArg - offsetIndex > 0; - auto *image = loadIfGLValue(imageExpr); auto *sampler = samplerIndex < 0 ? nullptr : doExpr(expr->getArg(samplerIndex)); @@ -6029,10 +5997,12 @@ SpirvEmitter::processTextureSampleBiasLevel(const CXXMemberCallExpr *expr, } else { lod = doExpr(expr->getArg(biasIndex)); } - // If offset is present in .Bias()/.SampleLevel(), it is the fourth argument. + SpirvInstruction *constOffset = nullptr, *varOffset = nullptr; - if (hasOffsetArg) - handleOffsetInMethodCall(expr, offsetIndex, &constOffset, &varOffset); + SpirvInstruction *clamp = nullptr; + SpirvInstruction *status = nullptr; + handleOptionalTextureSampleArgs(expr, offsetIndex, &constOffset, &varOffset, + &clamp, &status); const auto retType = expr->getDirectCallee()->getReturnType(); @@ -6071,9 +6041,6 @@ SpirvEmitter::processTextureSampleGrad(const CXXMemberCallExpr *expr) { // [, out uint Status]); const auto numArgs = expr->getNumArgs(); - const bool hasStatusArg = - expr->getArg(numArgs - 1)->getType()->isUnsignedIntegerType(); - auto *status = hasStatusArg ? doExpr(expr->getArg(numArgs - 1)) : nullptr; const auto *imageExpr = expr->getImplicitObjectArgument(); const QualType imageType = imageExpr->getType(); const bool isImageSampledTexture = isSampledTexture(imageType); @@ -6093,29 +6060,17 @@ SpirvEmitter::processTextureSampleGrad(const CXXMemberCallExpr *expr) { offsetIndex = 4; } - SpirvInstruction *clamp = nullptr; - if (numArgs > offsetIndex && - expr->getArg(offsetIndex)->getType()->isFloatingType()) - clamp = doExpr(expr->getArg(offsetIndex)); - else if (numArgs > offsetIndex + 1 && - expr->getArg(offsetIndex + 1)->getType()->isFloatingType()) - clamp = doExpr(expr->getArg(offsetIndex + 1)); - const bool hasClampArg = clamp != nullptr; - - // Subtract 1 for clamp (if it exists), 1 for status (if it exists), - // and 4 for sampler_state, location, DDX, and DDY; - const bool hasOffsetArg = - numArgs - hasClampArg - hasStatusArg - offsetIndex > 0; - auto *image = loadIfGLValue(imageExpr); auto *sampler = samplerIndex < 0 ? nullptr : doExpr(expr->getArg(0)); auto *coordinate = doExpr(expr->getArg(coordinateIndex)); auto *ddx = doExpr(expr->getArg(ddxIndex)); auto *ddy = doExpr(expr->getArg(ddyIndex)); - // If offset is present in .SampleGrad(), it is the fifth argument. + SpirvInstruction *constOffset = nullptr, *varOffset = nullptr; - if (hasOffsetArg) - handleOffsetInMethodCall(expr, offsetIndex, &constOffset, &varOffset); + SpirvInstruction *clamp = nullptr; + SpirvInstruction *status = nullptr; + handleOptionalTextureSampleArgs(expr, offsetIndex, &constOffset, &varOffset, + &clamp, &status); const auto retType = expr->getDirectCallee()->getReturnType(); return createImageSample( @@ -6153,9 +6108,6 @@ SpirvEmitter::processTextureSampleCmp(const CXXMemberCallExpr *expr) { // ); const auto numArgs = expr->getNumArgs(); - const bool hasStatusArg = - expr->getArg(numArgs - 1)->getType()->isUnsignedIntegerType(); - auto *status = hasStatusArg ? doExpr(expr->getArg(numArgs - 1)) : nullptr; const auto *imageExpr = expr->getImplicitObjectArgument(); const QualType imageType = imageExpr->getType(); const bool isImageSampledTexture = isSampledTexture(imageType); @@ -6173,28 +6125,16 @@ SpirvEmitter::processTextureSampleCmp(const CXXMemberCallExpr *expr) { offsetIndex = 3; } - SpirvInstruction *clamp = nullptr; - if (numArgs > offsetIndex && - expr->getArg(offsetIndex)->getType()->isFloatingType()) - clamp = doExpr(expr->getArg(offsetIndex)); - else if (numArgs > offsetIndex + 1 && - expr->getArg(offsetIndex + 1)->getType()->isFloatingType()) - clamp = doExpr(expr->getArg(offsetIndex + 1)); - const bool hasClampArg = clamp != nullptr; - auto *image = loadIfGLValue(imageExpr); auto *sampler = samplerIndex < 0 ? nullptr : doExpr(expr->getArg(0)); auto *coordinate = doExpr(expr->getArg(coordinateIndex)); auto *compareVal = doExpr(expr->getArg(compareValIndex)); - // If offset is present in .SampleCmp(), it will be the fourth argument. - SpirvInstruction *constOffset = nullptr, *varOffset = nullptr; - // Subtract 1 for clamp (if it exists), 1 for status (if it exists), - // and 3 for sampler_state, location, and compare_value. - const bool hasOffsetArg = - numArgs - hasStatusArg - hasClampArg - offsetIndex > 0; - if (hasOffsetArg) - handleOffsetInMethodCall(expr, offsetIndex, &constOffset, &varOffset); + SpirvInstruction *constOffset = nullptr, *varOffset = nullptr; + SpirvInstruction *clamp = nullptr; + SpirvInstruction *status = nullptr; + handleOptionalTextureSampleArgs(expr, offsetIndex, &constOffset, &varOffset, + &clamp, &status); const auto retType = expr->getDirectCallee()->getReturnType(); @@ -6264,7 +6204,6 @@ SpirvEmitter::processTextureSampleCmpBias(const CXXMemberCallExpr *expr) { SpirvInstruction *constOffset = nullptr, *varOffset = nullptr; SpirvInstruction *clamp = nullptr; SpirvInstruction *status = nullptr; - handleOptionalTextureSampleArgs(expr, offsetIndex, &constOffset, &varOffset, &clamp, &status); @@ -6336,7 +6275,6 @@ SpirvEmitter::processTextureSampleCmpGrad(const CXXMemberCallExpr *expr) { SpirvInstruction *constOffset = nullptr, *varOffset = nullptr; SpirvInstruction *clamp = nullptr; SpirvInstruction *status = nullptr; - handleOptionalTextureSampleArgs(expr, offsetIndex, &constOffset, &varOffset, &clamp, &status); @@ -6380,10 +6318,6 @@ SpirvEmitter::processTextureSampleCmpLevelZero(const CXXMemberCallExpr *expr) { // ); const auto numArgs = expr->getNumArgs(); - const bool hasStatusArg = - expr->getArg(numArgs - 1)->getType()->isUnsignedIntegerType(); - auto *status = hasStatusArg ? doExpr(expr->getArg(numArgs - 1)) : nullptr; - const auto *imageExpr = expr->getImplicitObjectArgument(); const auto imageType = imageExpr->getType(); const bool isImageSampledTexture = isSampledTexture(imageType); @@ -6409,11 +6343,11 @@ SpirvEmitter::processTextureSampleCmpLevelZero(const CXXMemberCallExpr *expr) { auto *lod = spvBuilder.getConstantFloat(astContext.FloatTy, llvm::APFloat(0.0f)); - // If offset is present in .SampleCmp(), it will be the fourth argument. SpirvInstruction *constOffset = nullptr, *varOffset = nullptr; - const bool hasOffsetArg = numArgs - hasStatusArg - offsetIndex > 0; - if (hasOffsetArg) - handleOffsetInMethodCall(expr, offsetIndex, &constOffset, &varOffset); + SpirvInstruction *clamp = nullptr; + SpirvInstruction *status = nullptr; + handleOptionalTextureSampleArgs(expr, offsetIndex, &constOffset, &varOffset, + &clamp, &status); const auto retType = expr->getDirectCallee()->getReturnType(); diff --git a/tools/clang/test/CodeGenSPIRV/texture.array.sample-bias.hlsl b/tools/clang/test/CodeGenSPIRV/texture.array.sample-bias.hlsl index a809b74a7a..207a97cbb0 100644 --- a/tools/clang/test/CodeGenSPIRV/texture.array.sample-bias.hlsl +++ b/tools/clang/test/CodeGenSPIRV/texture.array.sample-bias.hlsl @@ -43,9 +43,9 @@ float4 main(int2 offset : A) : SV_Target { float4 val3 = t3.SampleBias(gSampler, float4(1, 2, 3, 1), 0.5); float clamp; -// CHECK: [[clamp:%[0-9]+]] = OpLoad %float %clamp -// CHECK-NEXT: [[t1_0:%[0-9]+]] = OpLoad %type_1d_image_array %t1 +// CHECK: [[t1_0:%[0-9]+]] = OpLoad %type_1d_image_array %t1 // CHECK-NEXT: [[gSampler_2:%[0-9]+]] = OpLoad %type_sampler %gSampler +// CHECK-NEXT: [[clamp:%[0-9]+]] = OpLoad %float %clamp // CHECK-NEXT: [[sampledImg_2:%[0-9]+]] = OpSampledImage %type_sampled_image [[t1_0]] [[gSampler_2]] // CHECK-NEXT: {{%[0-9]+}} = OpImageSampleImplicitLod %v4float [[sampledImg_2]] [[v2fc]] Bias|ConstOffset|MinLod %float_0_5 %int_1 [[clamp]] float4 val4 = t1.SampleBias(gSampler, float2(1, 1), 0.5, 1, clamp); @@ -57,9 +57,9 @@ float4 main(int2 offset : A) : SV_Target { float4 val5 = t3.SampleBias(gSampler, float4(1, 2, 3, 1), 0.5, /*clamp*/ 2.5f); uint status; -// CHECK: [[clamp_0:%[0-9]+]] = OpLoad %float %clamp -// CHECK-NEXT: [[t1_1:%[0-9]+]] = OpLoad %type_1d_image_array %t1 +// CHECK: [[t1_1:%[0-9]+]] = OpLoad %type_1d_image_array %t1 // CHECK-NEXT: [[gSampler_4:%[0-9]+]] = OpLoad %type_sampler %gSampler +// CHECK-NEXT: [[clamp_0:%[0-9]+]] = OpLoad %float %clamp // CHECK-NEXT: [[sampledImg_4:%[0-9]+]] = OpSampledImage %type_sampled_image [[t1_1]] [[gSampler_4]] // CHECK-NEXT: [[structResult:%[0-9]+]] = OpImageSparseSampleImplicitLod %SparseResidencyStruct [[sampledImg_4]] [[v2fc]] Bias|ConstOffset|MinLod %float_0_5 %int_1 [[clamp_0]] // CHECK-NEXT: [[status:%[0-9]+]] = OpCompositeExtract %uint [[structResult]] 0 diff --git a/tools/clang/test/CodeGenSPIRV/texture.array.sample-cmp.hlsl b/tools/clang/test/CodeGenSPIRV/texture.array.sample-cmp.hlsl index 44eae8a96d..cee63df78f 100644 --- a/tools/clang/test/CodeGenSPIRV/texture.array.sample-cmp.hlsl +++ b/tools/clang/test/CodeGenSPIRV/texture.array.sample-cmp.hlsl @@ -39,10 +39,10 @@ float4 main(int2 offset: A, float comparator: B) : SV_Target { float val3 = t3.SampleCmp(gSampler, float4(1, 2, 3, 1), comparator); float clamp; -// CHECK: [[clamp:%[0-9]+]] = OpLoad %float %clamp -// CHECK-NEXT: [[t2_0:%[0-9]+]] = OpLoad %type_2d_image_array %t2 +// CHECK: [[t2_0:%[0-9]+]] = OpLoad %type_2d_image_array %t2 // CHECK-NEXT: [[gSampler_2:%[0-9]+]] = OpLoad %type_sampler %gSampler // CHECK-NEXT: [[comparator_2:%[0-9]+]] = OpLoad %float %comparator +// CHECK-NEXT: [[clamp:%[0-9]+]] = OpLoad %float %clamp // CHECK-NEXT: [[sampledImg_2:%[0-9]+]] = OpSampledImage %type_sampled_image_0 [[t2_0]] [[gSampler_2]] // CHECK-NEXT: {{%[0-9]+}} = OpImageSampleDrefImplicitLod %float [[sampledImg_2]] [[v3fc]] [[comparator_2]] ConstOffset|MinLod [[v2ic]] [[clamp]] float val4 = t2.SampleCmp(gSampler, float3(1, 2, 1), comparator, 1, clamp); @@ -55,10 +55,10 @@ float4 main(int2 offset: A, float comparator: B) : SV_Target { float val5 = t3.SampleCmp(gSampler, float4(1, 2, 3, 1), comparator, /*clamp*/ 1.5); uint status; -// CHECK: [[clamp_0:%[0-9]+]] = OpLoad %float %clamp -// CHECK-NEXT: [[t2_1:%[0-9]+]] = OpLoad %type_2d_image_array %t2 +// CHECK: [[t2_1:%[0-9]+]] = OpLoad %type_2d_image_array %t2 // CHECK-NEXT: [[gSampler_4:%[0-9]+]] = OpLoad %type_sampler %gSampler // CHECK-NEXT: [[comparator_4:%[0-9]+]] = OpLoad %float %comparator +// CHECK-NEXT: [[clamp_0:%[0-9]+]] = OpLoad %float %clamp // CHECK-NEXT: [[sampledImg_4:%[0-9]+]] = OpSampledImage %type_sampled_image_0 [[t2_1]] [[gSampler_4]] // CHECK-NEXT: [[structResult:%[0-9]+]] = OpImageSparseSampleDrefImplicitLod %SparseResidencyStruct [[sampledImg_4]] [[v3fc]] [[comparator_4]] ConstOffset|MinLod [[v2ic]] [[clamp_0]] // CHECK-NEXT: [[status:%[0-9]+]] = OpCompositeExtract %uint [[structResult]] 0 diff --git a/tools/clang/test/CodeGenSPIRV/texture.array.sample-grad.hlsl b/tools/clang/test/CodeGenSPIRV/texture.array.sample-grad.hlsl index 194da86976..b9335aed17 100644 --- a/tools/clang/test/CodeGenSPIRV/texture.array.sample-grad.hlsl +++ b/tools/clang/test/CodeGenSPIRV/texture.array.sample-grad.hlsl @@ -54,9 +54,9 @@ float4 main(int2 offset : A) : SV_Target { float4 val4 = t2.SampleGrad(gSampler, float3(1, 1, 1), float2(2, 2), float2(3, 3), 1, /*clamp*/2.5); float clamp; -// CHECK: [[clamp:%[0-9]+]] = OpLoad %float %clamp -// CHECK-NEXT: [[t3_0:%[0-9]+]] = OpLoad %type_cube_image_array %t3 +// CHECK: [[t3_0:%[0-9]+]] = OpLoad %type_cube_image_array %t3 // CHECK-NEXT: [[gSampler_3:%[0-9]+]] = OpLoad %type_sampler %gSampler +// CHECK-NEXT: [[clamp:%[0-9]+]] = OpLoad %float %clamp // CHECK-NEXT: [[sampledImg_3:%[0-9]+]] = OpSampledImage %type_sampled_image_1 [[t3_0]] [[gSampler_3]] // CHECK-NEXT: {{%[0-9]+}} = OpImageSampleExplicitLod %v4float [[sampledImg_3]] [[v4f_1]] Grad|MinLod [[v3f_2]] [[v3f_3]] [[clamp]] float4 val5 = t3.SampleGrad(gSampler, float4(1, 1, 1, 1), float3(2, 2, 2), float3(3, 3, 3), clamp); @@ -72,9 +72,9 @@ float4 main(int2 offset : A) : SV_Target { // CHECK-NEXT: OpStore %val6 [[result]] float4 val6 = t2.SampleGrad(gSampler, float3(1, 1, 1), float2(2, 2), float2(3, 3), 1, /*clamp*/2.5, status); -// CHECK: [[clamp_0:%[0-9]+]] = OpLoad %float %clamp -// CHECK-NEXT: [[t3_1:%[0-9]+]] = OpLoad %type_cube_image_array %t3 +// CHECK: [[t3_1:%[0-9]+]] = OpLoad %type_cube_image_array %t3 // CHECK-NEXT: [[gSampler_5:%[0-9]+]] = OpLoad %type_sampler %gSampler +// CHECK-NEXT: [[clamp_0:%[0-9]+]] = OpLoad %float %clamp // CHECK-NEXT: [[sampledImg_5:%[0-9]+]] = OpSampledImage %type_sampled_image_1 [[t3_1]] [[gSampler_5]] // CHECK-NEXT: [[structResult_0:%[0-9]+]] = OpImageSparseSampleExplicitLod %SparseResidencyStruct [[sampledImg_5]] [[v4f_1]] Grad|MinLod [[v3f_2]] [[v3f_3]] [[clamp_0]] // CHECK-NEXT: [[status_0:%[0-9]+]] = OpCompositeExtract %uint [[structResult_0]] 0 diff --git a/tools/clang/test/CodeGenSPIRV/texture.array.sample.hlsl b/tools/clang/test/CodeGenSPIRV/texture.array.sample.hlsl index 0420b2af96..4d06d24e22 100644 --- a/tools/clang/test/CodeGenSPIRV/texture.array.sample.hlsl +++ b/tools/clang/test/CodeGenSPIRV/texture.array.sample.hlsl @@ -42,9 +42,9 @@ float4 main() : SV_Target { float4 val3 = t3.Sample(gSampler, float4(0.5, 0.25, 0.125, 1)); float clamp; -// CHECK: [[clamp:%[0-9]+]] = OpLoad %float %clamp -// CHECK-NEXT: [[t1_0:%[0-9]+]] = OpLoad %type_1d_image_array %t1 +// CHECK: [[t1_0:%[0-9]+]] = OpLoad %type_1d_image_array %t1 // CHECK-NEXT: [[gSampler_2:%[0-9]+]] = OpLoad %type_sampler %gSampler +// CHECK-NEXT: [[clamp:%[0-9]+]] = OpLoad %float %clamp // CHECK-NEXT: [[sampledImg_2:%[0-9]+]] = OpSampledImage %type_sampled_image [[t1_0]] [[gSampler_2]] // CHECK-NEXT: {{%[0-9]+}} = OpImageSampleImplicitLod %v4float [[sampledImg_2]] [[v2fc]] ConstOffset|MinLod %int_1 [[clamp]] float4 val4 = t1.Sample(gSampler, float2(0.5, 1), 1, clamp); @@ -56,9 +56,9 @@ float4 main() : SV_Target { float4 val5 = t3.Sample(gSampler, float4(0.5, 0.25, 0.125, 1), /*clamp*/ 1.5); uint status; -// CHECK: [[clamp_0:%[0-9]+]] = OpLoad %float %clamp -// CHECK-NEXT: [[t1_1:%[0-9]+]] = OpLoad %type_1d_image_array %t1 +// CHECK: [[t1_1:%[0-9]+]] = OpLoad %type_1d_image_array %t1 // CHECK-NEXT: [[gSampler_4:%[0-9]+]] = OpLoad %type_sampler %gSampler +// CHECK-NEXT: [[clamp_0:%[0-9]+]] = OpLoad %float %clamp // CHECK-NEXT: [[sampledImg_4:%[0-9]+]] = OpSampledImage %type_sampled_image [[t1_1]] [[gSampler_4]] // CHECK-NEXT: [[structResult:%[0-9]+]] = OpImageSparseSampleImplicitLod %SparseResidencyStruct [[sampledImg_4]] [[v2fc]] ConstOffset|MinLod %int_1 [[clamp_0]] // CHECK-NEXT: [[status:%[0-9]+]] = OpCompositeExtract %uint [[structResult]] 0 diff --git a/tools/clang/test/CodeGenSPIRV/texture.sample-bias.hlsl b/tools/clang/test/CodeGenSPIRV/texture.sample-bias.hlsl index 6b3470edf6..e92a9a1449 100644 --- a/tools/clang/test/CodeGenSPIRV/texture.sample-bias.hlsl +++ b/tools/clang/test/CodeGenSPIRV/texture.sample-bias.hlsl @@ -51,9 +51,9 @@ float4 main(int3 offset: A) : SV_Target { float4 val4 = t4.SampleBias(gSampler, float3(1, 2, 3), 0.5); float clamp; -// CHECK: [[clamp:%[0-9]+]] = OpLoad %float %clamp -// CHECK-NEXT: [[t3_0:%[0-9]+]] = OpLoad %type_3d_image %t3 +// CHECK: [[t3_0:%[0-9]+]] = OpLoad %type_3d_image %t3 // CHECK-NEXT: [[gSampler_3:%[0-9]+]] = OpLoad %type_sampler %gSampler +// CHECK-NEXT: [[clamp:%[0-9]+]] = OpLoad %float %clamp // CHECK-NEXT: [[sampledImg_3:%[0-9]+]] = OpSampledImage %type_sampled_image_1 [[t3_0]] [[gSampler_3]] // CHECK-NEXT: {{%[0-9]+}} = OpImageSampleImplicitLod %v4float [[sampledImg_3]] [[v3fc]] Bias|ConstOffset|MinLod %float_0_5 [[v3ic]] [[clamp]] float4 val5 = t3.SampleBias(gSampler, float3(1, 2, 3), 0.5, 1, clamp); @@ -65,9 +65,9 @@ float4 main(int3 offset: A) : SV_Target { float4 val6 = t4.SampleBias(gSampler, float3(1, 2, 3), 0.5, /*clamp*/ 2.5); uint status; -// CHECK: [[clamp_0:%[0-9]+]] = OpLoad %float %clamp -// CHECK-NEXT: [[t3_1:%[0-9]+]] = OpLoad %type_3d_image %t3 +// CHECK: [[t3_1:%[0-9]+]] = OpLoad %type_3d_image %t3 // CHECK-NEXT: [[gSampler_5:%[0-9]+]] = OpLoad %type_sampler %gSampler +// CHECK-NEXT: [[clamp_0:%[0-9]+]] = OpLoad %float %clamp // CHECK-NEXT: [[sampledImg_5:%[0-9]+]] = OpSampledImage %type_sampled_image_1 [[t3_1]] [[gSampler_5]] // CHECK-NEXT: [[structResult:%[0-9]+]] = OpImageSparseSampleImplicitLod %SparseResidencyStruct [[sampledImg_5]] [[v3fc]] Bias|ConstOffset|MinLod %float_0_5 [[v3ic]] [[clamp_0]] // CHECK-NEXT: [[status:%[0-9]+]] = OpCompositeExtract %uint [[structResult]] 0 diff --git a/tools/clang/test/CodeGenSPIRV/texture.sample-cmp.hlsl b/tools/clang/test/CodeGenSPIRV/texture.sample-cmp.hlsl index 53fb6cdd8f..31222a6a17 100644 --- a/tools/clang/test/CodeGenSPIRV/texture.sample-cmp.hlsl +++ b/tools/clang/test/CodeGenSPIRV/texture.sample-cmp.hlsl @@ -39,10 +39,10 @@ float4 main(int2 offset: A, float comparator: B) : SV_Target { float val4 = t4.SampleCmp(gSampler, float3(1, 2, 3), comparator); float clamp; -// CHECK: [[clamp:%[0-9]+]] = OpLoad %float %clamp -// CHECK-NEXT: [[t2_0:%[0-9]+]] = OpLoad %type_2d_image %t2 +// CHECK: [[t2_0:%[0-9]+]] = OpLoad %type_2d_image %t2 // CHECK-NEXT: [[gSampler_2:%[0-9]+]] = OpLoad %type_sampler %gSampler // CHECK-NEXT: [[comparator_2:%[0-9]+]] = OpLoad %float %comparator +// CHECK-NEXT: [[clamp:%[0-9]+]] = OpLoad %float %clamp // CHECK-NEXT: [[sampledImg_2:%[0-9]+]] = OpSampledImage %type_sampled_image_0 [[t2_0]] [[gSampler_2]] // CHECK-NEXT: {{%[0-9]+}} = OpImageSampleDrefImplicitLod %float [[sampledImg_2]] [[v2fc]] [[comparator_2]] ConstOffset|MinLod [[v2ic]] [[clamp]] float val5 = t2.SampleCmp(gSampler, float2(1, 2), comparator, 1, clamp); @@ -55,10 +55,10 @@ float4 main(int2 offset: A, float comparator: B) : SV_Target { float val6 = t4.SampleCmp(gSampler, float3(1, 2, 3), comparator, /*clamp*/2.5); uint status; -// CHECK: [[clamp_0:%[0-9]+]] = OpLoad %float %clamp -// CHECK-NEXT: [[t2_1:%[0-9]+]] = OpLoad %type_2d_image %t2 +// CHECK: [[t2_1:%[0-9]+]] = OpLoad %type_2d_image %t2 // CHECK-NEXT: [[gSampler_4:%[0-9]+]] = OpLoad %type_sampler %gSampler // CHECK-NEXT: [[comparator_4:%[0-9]+]] = OpLoad %float %comparator +// CHECK-NEXT: [[clamp_0:%[0-9]+]] = OpLoad %float %clamp // CHECK-NEXT: [[sampledImg_4:%[0-9]+]] = OpSampledImage %type_sampled_image_0 [[t2_1]] [[gSampler_4]] // CHECK-NEXT: [[structResult:%[0-9]+]] = OpImageSparseSampleDrefImplicitLod %SparseResidencyStruct [[sampledImg_4]] [[v2fc]] [[comparator_4]] ConstOffset|MinLod [[v2ic]] [[clamp_0]] // CHECK-NEXT: [[status:%[0-9]+]] = OpCompositeExtract %uint [[structResult]] 0 diff --git a/tools/clang/test/CodeGenSPIRV/texture.sample-grad.hlsl b/tools/clang/test/CodeGenSPIRV/texture.sample-grad.hlsl index 86a234355e..66235b0b90 100644 --- a/tools/clang/test/CodeGenSPIRV/texture.sample-grad.hlsl +++ b/tools/clang/test/CodeGenSPIRV/texture.sample-grad.hlsl @@ -55,9 +55,9 @@ float4 main(int2 offset : A) : SV_Target { float4 val4 = t4.SampleGrad(gSampler, float3(1, 1, 1), float3(2, 2, 2), float3(3, 3, 3)); float clamp; -// CHECK: [[clamp:%[0-9]+]] = OpLoad %float %clamp -// CHECK-NEXT: [[t2_0:%[0-9]+]] = OpLoad %type_2d_image %t2 +// CHECK: [[t2_0:%[0-9]+]] = OpLoad %type_2d_image %t2 // CHECK-NEXT: [[gSampler_3:%[0-9]+]] = OpLoad %type_sampler %gSampler +// CHECK-NEXT: [[clamp:%[0-9]+]] = OpLoad %float %clamp // CHECK-NEXT: [[sampledImg_3:%[0-9]+]] = OpSampledImage %type_sampled_image_0 [[t2_0]] [[gSampler_3]] // CHECK-NEXT: {{%[0-9]+}} = OpImageSampleExplicitLod %v4float [[sampledImg_3]] [[v2f_1]] Grad|ConstOffset|MinLod [[v2f_2]] [[v2f_3]] [[v2i_3]] [[clamp]] float4 val5 = t2.SampleGrad(gSampler, float2(1, 1), float2(2, 2), float2(3, 3), 3, clamp); @@ -69,9 +69,9 @@ float4 main(int2 offset : A) : SV_Target { float4 val6 = t4.SampleGrad(gSampler, float3(1, 1, 1), float3(2, 2, 2), float3(3, 3, 3), /*clamp*/3.5); uint status; -// CHECK: [[clamp_0:%[0-9]+]] = OpLoad %float %clamp -// CHECK-NEXT: [[t2_1:%[0-9]+]] = OpLoad %type_2d_image %t2 +// CHECK: [[t2_1:%[0-9]+]] = OpLoad %type_2d_image %t2 // CHECK-NEXT: [[gSampler_5:%[0-9]+]] = OpLoad %type_sampler %gSampler +// CHECK-NEXT: [[clamp_0:%[0-9]+]] = OpLoad %float %clamp // CHECK-NEXT: [[sampledImg_5:%[0-9]+]] = OpSampledImage %type_sampled_image_0 [[t2_1]] [[gSampler_5]] // CHECK-NEXT: [[structResult:%[0-9]+]] = OpImageSparseSampleExplicitLod %SparseResidencyStruct [[sampledImg_5]] [[v2f_1]] Grad|ConstOffset|MinLod [[v2f_2]] [[v2f_3]] [[v2i_3]] [[clamp_0]] // CHECK-NEXT: [[status:%[0-9]+]] = OpCompositeExtract %uint [[structResult]] 0 diff --git a/tools/clang/test/CodeGenSPIRV/texture.sample.hlsl b/tools/clang/test/CodeGenSPIRV/texture.sample.hlsl index 6a847c3442..440dfb8fbb 100644 --- a/tools/clang/test/CodeGenSPIRV/texture.sample.hlsl +++ b/tools/clang/test/CodeGenSPIRV/texture.sample.hlsl @@ -51,9 +51,9 @@ float4 main(int2 offset: A) : SV_Target { float4 val4 = t4.Sample(gSampler, float3(0.5, 0.25, 0.3)); float clamp; -// CHECK: [[clamp:%[0-9]+]] = OpLoad %float %clamp -// CHECK-NEXT: [[t2_0:%[0-9]+]] = OpLoad %type_2d_image %t2 +// CHECK: [[t2_0:%[0-9]+]] = OpLoad %type_2d_image %t2 // CHECK-NEXT: [[gSampler_3:%[0-9]+]] = OpLoad %type_sampler %gSampler +// CHECK-NEXT: [[clamp:%[0-9]+]] = OpLoad %float %clamp // CHECK-NEXT: [[sampledImg_3:%[0-9]+]] = OpSampledImage %type_sampled_image_0 [[t2_0]] [[gSampler_3]] // CHECK-NEXT: {{%[0-9]+}} = OpImageSampleImplicitLod %v4float [[sampledImg_3]] [[v2fc]] ConstOffset|MinLod [[v2ic]] [[clamp]] float4 val5 = t2.Sample(gSampler, float2(0.5, 0.25), int2(2, 3), clamp); @@ -65,9 +65,9 @@ float4 main(int2 offset: A) : SV_Target { float4 val6 = t4.Sample(gSampler, float3(0.5, 0.25, 0.3), /*clamp*/ 2.0f); uint status; -// CHECK: [[clamp_0:%[0-9]+]] = OpLoad %float %clamp -// CHECK-NEXT: [[t2_1:%[0-9]+]] = OpLoad %type_2d_image %t2 +// CHECK: [[t2_1:%[0-9]+]] = OpLoad %type_2d_image %t2 // CHECK-NEXT: [[gSampler_5:%[0-9]+]] = OpLoad %type_sampler %gSampler +// CHECK-NEXT: [[clamp_0:%[0-9]+]] = OpLoad %float %clamp // CHECK-NEXT: [[sampledImg_5:%[0-9]+]] = OpSampledImage %type_sampled_image_0 [[t2_1]] [[gSampler_5]] // CHECK-NEXT: [[structResult:%[0-9]+]] = OpImageSparseSampleImplicitLod %SparseResidencyStruct [[sampledImg_5]] [[v2fc]] ConstOffset|MinLod [[v2ic]] [[clamp_0]] // CHECK-NEXT: [[status:%[0-9]+]] = OpCompositeExtract %uint [[structResult]] 0 From c565009e08ef88d1a4dda7722e1dab30a585b6ad Mon Sep 17 00:00:00 2001 From: Damyan Pepper Date: Thu, 19 Feb 2026 09:25:50 -0800 Subject: [PATCH 06/13] Add missing REQUIRES: spirv to vk.sampledtexture.struct.error.hlsl (#8170) Test uses `-spirv` but lacks the `REQUIRES: spirv` annotation, so it runs and fails under `-DENABLE_SPIRV_CODEGEN=OFF`. --------- Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com> --- tools/clang/test/SemaHLSL/vk.sampledtexture.struct.error.hlsl | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/clang/test/SemaHLSL/vk.sampledtexture.struct.error.hlsl b/tools/clang/test/SemaHLSL/vk.sampledtexture.struct.error.hlsl index 6f1db1f871..cdc2c94548 100644 --- a/tools/clang/test/SemaHLSL/vk.sampledtexture.struct.error.hlsl +++ b/tools/clang/test/SemaHLSL/vk.sampledtexture.struct.error.hlsl @@ -1,3 +1,4 @@ +// REQUIRES: spirv // RUN: %dxc -T ps_6_0 -E main %s -spirv -fcgl -verify struct Struct { float f; }; From 065d00d79df045205106d7a1aedf320766a4b7b1 Mon Sep 17 00:00:00 2001 From: Ashley Coleman Date: Thu, 19 Feb 2026 11:02:24 -0700 Subject: [PATCH 07/13] [SM6.10][specs/783] Update LinAlg DXIL Op Class Names (#8169) https://github.com/microsoft/hlsl-specs/pull/783 changed the names of the linalg DXIL ops, update the implementation to reflect that. The only interesting changes are in `utils/hct/hctdb.py` and `lib/HLSL/HLOperationLower.cpp` everything else is generated code --- docs/DXIL.rst | 36 ++--- include/dxc/DXIL/DxilConstants.h | 208 ++++++++++++------------ include/dxc/DXIL/DxilInstructions.h | 125 ++++++++------- lib/DXIL/DxilOperations.cpp | 237 ++++++++++++++-------------- lib/HLSL/HLOperationLower.cpp | 38 ++--- utils/hct/hctdb.py | 89 +++++------ 6 files changed, 378 insertions(+), 355 deletions(-) diff --git a/docs/DXIL.rst b/docs/DXIL.rst index 40dc2ba01a..2afd65f55e 100644 --- a/docs/DXIL.rst +++ b/docs/DXIL.rst @@ -3077,24 +3077,24 @@ ID Name Description 2147483657 RayQuery_CommittedTriangleObjectPosition returns committed triangle vertices in object space as <9 x float> 2147483658 HitObject_TriangleObjectPosition returns triangle vertices in object space as <9 x float> 2147483659 ReservedD0 reserved -2147483660 FillMatrix fills a matrix with a scalar value -2147483661 CopyConvertMatrix Converts and copies the element and use type of the source matrix to the destination matrix with optional transpose -2147483662 MatrixLoadFromDescriptor fills a matrix with data from a [RW]ByteAddressBuffer -2147483663 MatrixLoadFromMemory fills a matrix with data from a groupshared array -2147483664 MatrixLength returns the number of elements stored in thread-local storage on the active thread for the provided matrix -2147483665 MatrixGetCoordinate returns a two element vector containing the column and row of the matrix that the thread-local index corresponds to -2147483666 MatrixGetElement returns the element of the matrix corresponding to the provided thread-local index -2147483667 MatrixSetElement sets the element of the matrix corresponding to the provided thread-local index -2147483668 MatrixStoreToDescriptor stores a matrix to a RWByteAddressBuffer -2147483669 MatrixStoreToMemory stores a matrix to groupshared memory -2147483670 MatrixQueryAccumulatorLayout returns comptime 0 when accumulator matrix are A layout, 1 when B layout -2147483671 MatrixMulOp applies a multiplication op to matrix C using A and B as parameters -2147483672 MatrixAccumulate accumulate A or B matrix into Accumulator matrix following LHS += RHS -2147483673 MatrixVecMul Multiplies a MxK dimension matrix and a K sized input vector -2147483674 MatrixVecMulAdd Multiplies a MxK dimension matrix and a K sized input vector then adds a M sized bias vector -2147483675 MatrixAccumulateToDescriptor accumulates a matrix to a RWByteAddressBuffer -2147483676 MatrixAccumulateToMemory accumulates a matrix to groupshared memory -2147483677 MatrixOuterProduct Outer products an M sized vector and a N sized vector producing an MxN matrix +2147483660 LinAlgFillMatrix fills a matrix with a scalar value +2147483661 LinAlgCopyConvertMatrix Converts and copies the element and use type of the source matrix to the destination matrix with optional transpose +2147483662 LinAlgMatrixLoadFromDescriptor fills a matrix with data from a [RW]ByteAddressBuffer +2147483663 LinAlgMatrixLoadFromMemory fills a matrix with data from a groupshared array +2147483664 LinAlgMatrixLength returns the number of elements stored in thread-local storage on the active thread for the provided matrix +2147483665 LinAlgMatrixGetCoordinate returns a two element vector containing the column and row of the matrix that the thread-local index corresponds to +2147483666 LinAlgMatrixGetElement returns the element of the matrix corresponding to the provided thread-local index +2147483667 LinAlgMatrixSetElement sets the element of the matrix corresponding to the provided thread-local index +2147483668 LinAlgMatrixStoreToDescriptor stores a matrix to a RWByteAddressBuffer +2147483669 LinAlgMatrixStoreToMemory stores a matrix to groupshared memory +2147483670 LinAlgMatrixQueryAccumulatorLayout returns comptime 0 when accumulator matrix are A layout, 1 when B layout +2147483671 LinAlgMatrixMulOp applies a multiplication op to matrix C using A and B as parameters +2147483672 LinAlgMatrixAccumulate accumulate A or B matrix into Accumulator matrix following LHS += RHS +2147483673 LinAlgMatVecMul Multiplies a MxK dimension matrix and a K sized input vector +2147483674 LinAlgMatVecMulAdd Multiplies a MxK dimension matrix and a K sized input vector then adds a M sized bias vector +2147483675 LinAlgMatrixAccumulateToDescriptor accumulates a matrix to a RWByteAddressBuffer +2147483676 LinAlgMatrixAccumulateToMemory accumulates a matrix to groupshared memory +2147483677 LinAlgMatrixOuterProduct Outer products an M sized vector and a N sized vector producing an MxN matrix 2147483678 ReservedD1 reserved 2147483679 ReservedD2 reserved 2147483680 ReservedD3 reserved diff --git a/include/dxc/DXIL/DxilConstants.h b/include/dxc/DXIL/DxilConstants.h index b822e3b5b0..dfb835aa00 100644 --- a/include/dxc/DXIL/DxilConstants.h +++ b/include/dxc/DXIL/DxilConstants.h @@ -546,40 +546,45 @@ enum class OpCode : unsigned { 9, // returns committed triangle vertices in object space as <9 x float> // Linear Algebra Operations - CopyConvertMatrix = + LinAlgCopyConvertMatrix = 13, // Converts and copies the element and use type of the source matrix // to the destination matrix with optional transpose - FillMatrix = 12, // fills a matrix with a scalar value - MatrixAccumulate = 24, // accumulate A or B matrix into Accumulator matrix - // following LHS += RHS - MatrixAccumulateToDescriptor = + LinAlgFillMatrix = 12, // fills a matrix with a scalar value + LinAlgMatVecMul = + 25, // Multiplies a MxK dimension matrix and a K sized input vector + LinAlgMatVecMulAdd = 26, // Multiplies a MxK dimension matrix and a K sized + // input vector then adds a M sized bias vector + LinAlgMatrixAccumulate = 24, // accumulate A or B matrix into Accumulator + // matrix following LHS += RHS + LinAlgMatrixAccumulateToDescriptor = 27, // accumulates a matrix to a RWByteAddressBuffer - MatrixAccumulateToMemory = 28, // accumulates a matrix to groupshared memory - MatrixGetCoordinate = + LinAlgMatrixAccumulateToMemory = + 28, // accumulates a matrix to groupshared memory + LinAlgMatrixGetCoordinate = 17, // returns a two element vector containing the column and row of the // matrix that the thread-local index corresponds to - MatrixGetElement = 18, // returns the element of the matrix corresponding to - // the provided thread-local index - MatrixLength = 16, // returns the number of elements stored in thread-local - // storage on the active thread for the provided matrix - MatrixLoadFromDescriptor = + LinAlgMatrixGetElement = + 18, // returns the element of the matrix corresponding to the provided + // thread-local index + LinAlgMatrixLength = + 16, // returns the number of elements stored in thread-local storage on + // the active thread for the provided matrix + LinAlgMatrixLoadFromDescriptor = 14, // fills a matrix with data from a [RW]ByteAddressBuffer - MatrixLoadFromMemory = + LinAlgMatrixLoadFromMemory = 15, // fills a matrix with data from a groupshared array - MatrixMulOp = + LinAlgMatrixMulOp = 23, // applies a multiplication op to matrix C using A and B as parameters - MatrixOuterProduct = 29, // Outer products an M sized vector and a N sized - // vector producing an MxN matrix - MatrixQueryAccumulatorLayout = 22, // returns comptime 0 when accumulator - // matrix are A layout, 1 when B layout - MatrixSetElement = 19, // sets the element of the matrix corresponding to the - // provided thread-local index - MatrixStoreToDescriptor = 20, // stores a matrix to a RWByteAddressBuffer - MatrixStoreToMemory = 21, // stores a matrix to groupshared memory - MatrixVecMul = - 25, // Multiplies a MxK dimension matrix and a K sized input vector - MatrixVecMulAdd = 26, // Multiplies a MxK dimension matrix and a K sized input - // vector then adds a M sized bias vector + LinAlgMatrixOuterProduct = 29, // Outer products an M sized vector and a N + // sized vector producing an MxN matrix + LinAlgMatrixQueryAccumulatorLayout = + 22, // returns comptime 0 when accumulator matrix are A layout, 1 when B + // layout + LinAlgMatrixSetElement = 19, // sets the element of the matrix corresponding + // to the provided thread-local index + LinAlgMatrixStoreToDescriptor = + 20, // stores a matrix to a RWByteAddressBuffer + LinAlgMatrixStoreToMemory = 21, // stores a matrix to groupshared memory // No-op ExperimentalNop = 0, // nop does nothing @@ -1260,80 +1265,87 @@ enum class OpCode : unsigned { // object space as <9 x float> // ReservedD0 = 0x8000000B, 2147483659U, -2147483637 EXP_OPCODE(ExperimentalOps, ReservedD0), // reserved - // FillMatrix = 0x8000000C, 2147483660U, -2147483636 - EXP_OPCODE(ExperimentalOps, FillMatrix), // fills a matrix with a scalar value - // CopyConvertMatrix = 0x8000000D, 2147483661U, -2147483635 + // LinAlgFillMatrix = 0x8000000C, 2147483660U, -2147483636 EXP_OPCODE(ExperimentalOps, - CopyConvertMatrix), // Converts and copies the element and use type - // of the source matrix to the destination - // matrix with optional transpose - // MatrixLoadFromDescriptor = 0x8000000E, 2147483662U, -2147483634 - EXP_OPCODE(ExperimentalOps, - MatrixLoadFromDescriptor), // fills a matrix with data from a - // [RW]ByteAddressBuffer - // MatrixLoadFromMemory = 0x8000000F, 2147483663U, -2147483633 - EXP_OPCODE(ExperimentalOps, MatrixLoadFromMemory), // fills a matrix with data - // from a groupshared array - // MatrixLength = 0x80000010, 2147483664U, -2147483632 + LinAlgFillMatrix), // fills a matrix with a scalar value + // LinAlgCopyConvertMatrix = 0x8000000D, 2147483661U, -2147483635 EXP_OPCODE( ExperimentalOps, - MatrixLength), // returns the number of elements stored in thread-local - // storage on the active thread for the provided matrix - // MatrixGetCoordinate = 0x80000011, 2147483665U, -2147483631 + LinAlgCopyConvertMatrix), // Converts and copies the element and use type + // of the source matrix to the destination + // matrix with optional transpose + // LinAlgMatrixLoadFromDescriptor = 0x8000000E, 2147483662U, -2147483634 + EXP_OPCODE(ExperimentalOps, + LinAlgMatrixLoadFromDescriptor), // fills a matrix with data from a + // [RW]ByteAddressBuffer + // LinAlgMatrixLoadFromMemory = 0x8000000F, 2147483663U, -2147483633 EXP_OPCODE(ExperimentalOps, - MatrixGetCoordinate), // returns a two element vector containing - // the column and row of the matrix that the - // thread-local index corresponds to - // MatrixGetElement = 0x80000012, 2147483666U, -2147483630 + LinAlgMatrixLoadFromMemory), // fills a matrix with data from a + // groupshared array + // LinAlgMatrixLength = 0x80000010, 2147483664U, -2147483632 + EXP_OPCODE(ExperimentalOps, + LinAlgMatrixLength), // returns the number of elements stored in + // thread-local storage on the active thread + // for the provided matrix + // LinAlgMatrixGetCoordinate = 0x80000011, 2147483665U, -2147483631 EXP_OPCODE( ExperimentalOps, - MatrixGetElement), // returns the element of the matrix corresponding to - // the provided thread-local index - // MatrixSetElement = 0x80000013, 2147483667U, -2147483629 + LinAlgMatrixGetCoordinate), // returns a two element vector containing the + // column and row of the matrix that the + // thread-local index corresponds to + // LinAlgMatrixGetElement = 0x80000012, 2147483666U, -2147483630 EXP_OPCODE(ExperimentalOps, - MatrixSetElement), // sets the element of the matrix corresponding - // to the provided thread-local index - // MatrixStoreToDescriptor = 0x80000014, 2147483668U, -2147483628 + LinAlgMatrixGetElement), // returns the element of the matrix + // corresponding to the provided + // thread-local index + // LinAlgMatrixSetElement = 0x80000013, 2147483667U, -2147483629 EXP_OPCODE( ExperimentalOps, - MatrixStoreToDescriptor), // stores a matrix to a RWByteAddressBuffer - // MatrixStoreToMemory = 0x80000015, 2147483669U, -2147483627 + LinAlgMatrixSetElement), // sets the element of the matrix corresponding + // to the provided thread-local index + // LinAlgMatrixStoreToDescriptor = 0x80000014, 2147483668U, -2147483628 EXP_OPCODE(ExperimentalOps, - MatrixStoreToMemory), // stores a matrix to groupshared memory - // MatrixQueryAccumulatorLayout = 0x80000016, 2147483670U, -2147483626 + LinAlgMatrixStoreToDescriptor), // stores a matrix to a + // RWByteAddressBuffer + // LinAlgMatrixStoreToMemory = 0x80000015, 2147483669U, -2147483627 EXP_OPCODE( ExperimentalOps, - MatrixQueryAccumulatorLayout), // returns comptime 0 when accumulator - // matrix are A layout, 1 when B layout - // MatrixMulOp = 0x80000017, 2147483671U, -2147483625 + LinAlgMatrixStoreToMemory), // stores a matrix to groupshared memory + // LinAlgMatrixQueryAccumulatorLayout = 0x80000016, 2147483670U, -2147483626 + EXP_OPCODE(ExperimentalOps, + LinAlgMatrixQueryAccumulatorLayout), // returns comptime 0 when + // accumulator matrix are A + // layout, 1 when B layout + // LinAlgMatrixMulOp = 0x80000017, 2147483671U, -2147483625 EXP_OPCODE(ExperimentalOps, - MatrixMulOp), // applies a multiplication op to matrix C using A - // and B as parameters - // MatrixAccumulate = 0x80000018, 2147483672U, -2147483624 + LinAlgMatrixMulOp), // applies a multiplication op to matrix C + // using A and B as parameters + // LinAlgMatrixAccumulate = 0x80000018, 2147483672U, -2147483624 EXP_OPCODE(ExperimentalOps, - MatrixAccumulate), // accumulate A or B matrix into Accumulator - // matrix following LHS += RHS - // MatrixVecMul = 0x80000019, 2147483673U, -2147483623 + LinAlgMatrixAccumulate), // accumulate A or B matrix into + // Accumulator matrix following LHS += RHS + // LinAlgMatVecMul = 0x80000019, 2147483673U, -2147483623 EXP_OPCODE(ExperimentalOps, - MatrixVecMul), // Multiplies a MxK dimension matrix and a K sized - // input vector - // MatrixVecMulAdd = 0x8000001A, 2147483674U, -2147483622 + LinAlgMatVecMul), // Multiplies a MxK dimension matrix and a K + // sized input vector + // LinAlgMatVecMulAdd = 0x8000001A, 2147483674U, -2147483622 EXP_OPCODE( ExperimentalOps, - MatrixVecMulAdd), // Multiplies a MxK dimension matrix and a K sized input - // vector then adds a M sized bias vector - // MatrixAccumulateToDescriptor = 0x8000001B, 2147483675U, -2147483621 + LinAlgMatVecMulAdd), // Multiplies a MxK dimension matrix and a K sized + // input vector then adds a M sized bias vector + // LinAlgMatrixAccumulateToDescriptor = 0x8000001B, 2147483675U, -2147483621 EXP_OPCODE(ExperimentalOps, - MatrixAccumulateToDescriptor), // accumulates a matrix to a - // RWByteAddressBuffer - // MatrixAccumulateToMemory = 0x8000001C, 2147483676U, -2147483620 + LinAlgMatrixAccumulateToDescriptor), // accumulates a matrix to a + // RWByteAddressBuffer + // LinAlgMatrixAccumulateToMemory = 0x8000001C, 2147483676U, -2147483620 + EXP_OPCODE(ExperimentalOps, + LinAlgMatrixAccumulateToMemory), // accumulates a matrix to + // groupshared memory + // LinAlgMatrixOuterProduct = 0x8000001D, 2147483677U, -2147483619 EXP_OPCODE( ExperimentalOps, - MatrixAccumulateToMemory), // accumulates a matrix to groupshared memory - // MatrixOuterProduct = 0x8000001D, 2147483677U, -2147483619 - EXP_OPCODE(ExperimentalOps, - MatrixOuterProduct), // Outer products an M sized vector and a N - // sized vector producing an MxN matrix + LinAlgMatrixOuterProduct), // Outer products an M sized vector and a N + // sized vector producing an MxN matrix // ReservedD1 = 0x8000001E, 2147483678U, -2147483618 EXP_OPCODE(ExperimentalOps, ReservedD1), // reserved // ReservedD2 = 0x8000001F, 2147483679U, -2147483617 @@ -1505,26 +1517,26 @@ enum class OpCodeClass : unsigned { CreateHandleForLib, // Linear Algebra Operations - CopyConvertMatrix, - FillMatrix, + LinAlgCopyConvertMatrix, + LinAlgFillMatrix, + LinAlgMatVecMul, + LinAlgMatVecMulAdd, + LinAlgMatrixAccumulate, + LinAlgMatrixAccumulateToDescriptor, + LinAlgMatrixAccumulateToMemory, + LinAlgMatrixGetCoordinate, + LinAlgMatrixGetElement, + LinAlgMatrixLength, + LinAlgMatrixLoadFromDescriptor, + LinAlgMatrixLoadFromMemory, + LinAlgMatrixMulOp, + LinAlgMatrixOuterProduct, + LinAlgMatrixQueryAccumulatorLayout, + LinAlgMatrixSetElement, + LinAlgMatrixStoreToDescriptor, + LinAlgMatrixStoreToMemory, MatVecMul, MatVecMulAdd, - MatrixAccumulate, - MatrixAccumulateToDescriptor, - MatrixAccumulateToMemory, - MatrixGetCoordinate, - MatrixGetElement, - MatrixLength, - MatrixLoadFromDescriptor, - MatrixLoadFromMemory, - MatrixMulOp, - MatrixOuterProduct, - MatrixQueryAccumulatorLayout, - MatrixSetElement, - MatrixStoreToDescriptor, - MatrixStoreToMemory, - MatrixVecMul, - MatrixVecMulAdd, OuterProductAccumulate, VectorAccumulate, diff --git a/include/dxc/DXIL/DxilInstructions.h b/include/dxc/DXIL/DxilInstructions.h index 30a7f2853d..2f388fdcd3 100644 --- a/include/dxc/DXIL/DxilInstructions.h +++ b/include/dxc/DXIL/DxilInstructions.h @@ -10501,12 +10501,13 @@ struct DxilInst_HitObject_TriangleObjectPosition { }; /// This instruction fills a matrix with a scalar value -struct DxilInst_FillMatrix { +struct DxilInst_LinAlgFillMatrix { llvm::Instruction *Instr; // Construction and identification - DxilInst_FillMatrix(llvm::Instruction *pInstr) : Instr(pInstr) {} + DxilInst_LinAlgFillMatrix(llvm::Instruction *pInstr) : Instr(pInstr) {} operator bool() const { - return hlsl::OP::IsDxilOpFuncCallInst(Instr, hlsl::OP::OpCode::FillMatrix); + return hlsl::OP::IsDxilOpFuncCallInst(Instr, + hlsl::OP::OpCode::LinAlgFillMatrix); } // Validation support bool isAllowed() const { return true; } @@ -10528,13 +10529,13 @@ struct DxilInst_FillMatrix { /// This instruction Converts and copies the element and use type of the source /// matrix to the destination matrix with optional transpose -struct DxilInst_CopyConvertMatrix { +struct DxilInst_LinAlgCopyConvertMatrix { llvm::Instruction *Instr; // Construction and identification - DxilInst_CopyConvertMatrix(llvm::Instruction *pInstr) : Instr(pInstr) {} + DxilInst_LinAlgCopyConvertMatrix(llvm::Instruction *pInstr) : Instr(pInstr) {} operator bool() const { - return hlsl::OP::IsDxilOpFuncCallInst(Instr, - hlsl::OP::OpCode::CopyConvertMatrix); + return hlsl::OP::IsDxilOpFuncCallInst( + Instr, hlsl::OP::OpCode::LinAlgCopyConvertMatrix); } // Validation support bool isAllowed() const { return true; } @@ -10558,14 +10559,14 @@ struct DxilInst_CopyConvertMatrix { }; /// This instruction fills a matrix with data from a [RW]ByteAddressBuffer -struct DxilInst_MatrixLoadFromDescriptor { +struct DxilInst_LinAlgMatrixLoadFromDescriptor { llvm::Instruction *Instr; // Construction and identification - DxilInst_MatrixLoadFromDescriptor(llvm::Instruction *pInstr) + DxilInst_LinAlgMatrixLoadFromDescriptor(llvm::Instruction *pInstr) : Instr(pInstr) {} operator bool() const { return hlsl::OP::IsDxilOpFuncCallInst( - Instr, hlsl::OP::OpCode::MatrixLoadFromDescriptor); + Instr, hlsl::OP::OpCode::LinAlgMatrixLoadFromDescriptor); } // Validation support bool isAllowed() const { return true; } @@ -10595,13 +10596,14 @@ struct DxilInst_MatrixLoadFromDescriptor { }; /// This instruction fills a matrix with data from a groupshared array -struct DxilInst_MatrixLoadFromMemory { +struct DxilInst_LinAlgMatrixLoadFromMemory { llvm::Instruction *Instr; // Construction and identification - DxilInst_MatrixLoadFromMemory(llvm::Instruction *pInstr) : Instr(pInstr) {} + DxilInst_LinAlgMatrixLoadFromMemory(llvm::Instruction *pInstr) + : Instr(pInstr) {} operator bool() const { return hlsl::OP::IsDxilOpFuncCallInst( - Instr, hlsl::OP::OpCode::MatrixLoadFromMemory); + Instr, hlsl::OP::OpCode::LinAlgMatrixLoadFromMemory); } // Validation support bool isAllowed() const { return true; } @@ -10632,13 +10634,13 @@ struct DxilInst_MatrixLoadFromMemory { /// This instruction returns the number of elements stored in thread-local /// storage on the active thread for the provided matrix -struct DxilInst_MatrixLength { +struct DxilInst_LinAlgMatrixLength { llvm::Instruction *Instr; // Construction and identification - DxilInst_MatrixLength(llvm::Instruction *pInstr) : Instr(pInstr) {} + DxilInst_LinAlgMatrixLength(llvm::Instruction *pInstr) : Instr(pInstr) {} operator bool() const { return hlsl::OP::IsDxilOpFuncCallInst(Instr, - hlsl::OP::OpCode::MatrixLength); + hlsl::OP::OpCode::LinAlgMatrixLength); } // Validation support bool isAllowed() const { return true; } @@ -10660,13 +10662,14 @@ struct DxilInst_MatrixLength { /// This instruction returns a two element vector containing the column and row /// of the matrix that the thread-local index corresponds to -struct DxilInst_MatrixGetCoordinate { +struct DxilInst_LinAlgMatrixGetCoordinate { llvm::Instruction *Instr; // Construction and identification - DxilInst_MatrixGetCoordinate(llvm::Instruction *pInstr) : Instr(pInstr) {} + DxilInst_LinAlgMatrixGetCoordinate(llvm::Instruction *pInstr) + : Instr(pInstr) {} operator bool() const { return hlsl::OP::IsDxilOpFuncCallInst( - Instr, hlsl::OP::OpCode::MatrixGetCoordinate); + Instr, hlsl::OP::OpCode::LinAlgMatrixGetCoordinate); } // Validation support bool isAllowed() const { return true; } @@ -10691,13 +10694,13 @@ struct DxilInst_MatrixGetCoordinate { /// This instruction returns the element of the matrix corresponding to the /// provided thread-local index -struct DxilInst_MatrixGetElement { +struct DxilInst_LinAlgMatrixGetElement { llvm::Instruction *Instr; // Construction and identification - DxilInst_MatrixGetElement(llvm::Instruction *pInstr) : Instr(pInstr) {} + DxilInst_LinAlgMatrixGetElement(llvm::Instruction *pInstr) : Instr(pInstr) {} operator bool() const { - return hlsl::OP::IsDxilOpFuncCallInst(Instr, - hlsl::OP::OpCode::MatrixGetElement); + return hlsl::OP::IsDxilOpFuncCallInst( + Instr, hlsl::OP::OpCode::LinAlgMatrixGetElement); } // Validation support bool isAllowed() const { return true; } @@ -10722,13 +10725,13 @@ struct DxilInst_MatrixGetElement { /// This instruction sets the element of the matrix corresponding to the /// provided thread-local index -struct DxilInst_MatrixSetElement { +struct DxilInst_LinAlgMatrixSetElement { llvm::Instruction *Instr; // Construction and identification - DxilInst_MatrixSetElement(llvm::Instruction *pInstr) : Instr(pInstr) {} + DxilInst_LinAlgMatrixSetElement(llvm::Instruction *pInstr) : Instr(pInstr) {} operator bool() const { - return hlsl::OP::IsDxilOpFuncCallInst(Instr, - hlsl::OP::OpCode::MatrixSetElement); + return hlsl::OP::IsDxilOpFuncCallInst( + Instr, hlsl::OP::OpCode::LinAlgMatrixSetElement); } // Validation support bool isAllowed() const { return true; } @@ -10755,13 +10758,14 @@ struct DxilInst_MatrixSetElement { }; /// This instruction stores a matrix to a RWByteAddressBuffer -struct DxilInst_MatrixStoreToDescriptor { +struct DxilInst_LinAlgMatrixStoreToDescriptor { llvm::Instruction *Instr; // Construction and identification - DxilInst_MatrixStoreToDescriptor(llvm::Instruction *pInstr) : Instr(pInstr) {} + DxilInst_LinAlgMatrixStoreToDescriptor(llvm::Instruction *pInstr) + : Instr(pInstr) {} operator bool() const { return hlsl::OP::IsDxilOpFuncCallInst( - Instr, hlsl::OP::OpCode::MatrixStoreToDescriptor); + Instr, hlsl::OP::OpCode::LinAlgMatrixStoreToDescriptor); } // Validation support bool isAllowed() const { return true; } @@ -10794,13 +10798,14 @@ struct DxilInst_MatrixStoreToDescriptor { }; /// This instruction stores a matrix to groupshared memory -struct DxilInst_MatrixStoreToMemory { +struct DxilInst_LinAlgMatrixStoreToMemory { llvm::Instruction *Instr; // Construction and identification - DxilInst_MatrixStoreToMemory(llvm::Instruction *pInstr) : Instr(pInstr) {} + DxilInst_LinAlgMatrixStoreToMemory(llvm::Instruction *pInstr) + : Instr(pInstr) {} operator bool() const { return hlsl::OP::IsDxilOpFuncCallInst( - Instr, hlsl::OP::OpCode::MatrixStoreToMemory); + Instr, hlsl::OP::OpCode::LinAlgMatrixStoreToMemory); } // Validation support bool isAllowed() const { return true; } @@ -10834,14 +10839,14 @@ struct DxilInst_MatrixStoreToMemory { /// This instruction returns comptime 0 when accumulator matrix are A layout, 1 /// when B layout -struct DxilInst_MatrixQueryAccumulatorLayout { +struct DxilInst_LinAlgMatrixQueryAccumulatorLayout { llvm::Instruction *Instr; // Construction and identification - DxilInst_MatrixQueryAccumulatorLayout(llvm::Instruction *pInstr) + DxilInst_LinAlgMatrixQueryAccumulatorLayout(llvm::Instruction *pInstr) : Instr(pInstr) {} operator bool() const { return hlsl::OP::IsDxilOpFuncCallInst( - Instr, hlsl::OP::OpCode::MatrixQueryAccumulatorLayout); + Instr, hlsl::OP::OpCode::LinAlgMatrixQueryAccumulatorLayout); } // Validation support bool isAllowed() const { return true; } @@ -10856,12 +10861,13 @@ struct DxilInst_MatrixQueryAccumulatorLayout { /// This instruction applies a multiplication op to matrix C using A and B as /// parameters -struct DxilInst_MatrixMulOp { +struct DxilInst_LinAlgMatrixMulOp { llvm::Instruction *Instr; // Construction and identification - DxilInst_MatrixMulOp(llvm::Instruction *pInstr) : Instr(pInstr) {} + DxilInst_LinAlgMatrixMulOp(llvm::Instruction *pInstr) : Instr(pInstr) {} operator bool() const { - return hlsl::OP::IsDxilOpFuncCallInst(Instr, hlsl::OP::OpCode::MatrixMulOp); + return hlsl::OP::IsDxilOpFuncCallInst(Instr, + hlsl::OP::OpCode::LinAlgMatrixMulOp); } // Validation support bool isAllowed() const { return true; } @@ -10886,13 +10892,13 @@ struct DxilInst_MatrixMulOp { /// This instruction accumulate A or B matrix into Accumulator matrix following /// LHS += RHS -struct DxilInst_MatrixAccumulate { +struct DxilInst_LinAlgMatrixAccumulate { llvm::Instruction *Instr; // Construction and identification - DxilInst_MatrixAccumulate(llvm::Instruction *pInstr) : Instr(pInstr) {} + DxilInst_LinAlgMatrixAccumulate(llvm::Instruction *pInstr) : Instr(pInstr) {} operator bool() const { - return hlsl::OP::IsDxilOpFuncCallInst(Instr, - hlsl::OP::OpCode::MatrixAccumulate); + return hlsl::OP::IsDxilOpFuncCallInst( + Instr, hlsl::OP::OpCode::LinAlgMatrixAccumulate); } // Validation support bool isAllowed() const { return true; } @@ -10917,13 +10923,13 @@ struct DxilInst_MatrixAccumulate { /// This instruction Multiplies a MxK dimension matrix and a K sized input /// vector -struct DxilInst_MatrixVecMul { +struct DxilInst_LinAlgMatVecMul { llvm::Instruction *Instr; // Construction and identification - DxilInst_MatrixVecMul(llvm::Instruction *pInstr) : Instr(pInstr) {} + DxilInst_LinAlgMatVecMul(llvm::Instruction *pInstr) : Instr(pInstr) {} operator bool() const { return hlsl::OP::IsDxilOpFuncCallInst(Instr, - hlsl::OP::OpCode::MatrixVecMul); + hlsl::OP::OpCode::LinAlgMatVecMul); } // Validation support bool isAllowed() const { return true; } @@ -10951,13 +10957,13 @@ struct DxilInst_MatrixVecMul { /// This instruction Multiplies a MxK dimension matrix and a K sized input /// vector then adds a M sized bias vector -struct DxilInst_MatrixVecMulAdd { +struct DxilInst_LinAlgMatVecMulAdd { llvm::Instruction *Instr; // Construction and identification - DxilInst_MatrixVecMulAdd(llvm::Instruction *pInstr) : Instr(pInstr) {} + DxilInst_LinAlgMatVecMulAdd(llvm::Instruction *pInstr) : Instr(pInstr) {} operator bool() const { return hlsl::OP::IsDxilOpFuncCallInst(Instr, - hlsl::OP::OpCode::MatrixVecMulAdd); + hlsl::OP::OpCode::LinAlgMatVecMulAdd); } // Validation support bool isAllowed() const { return true; } @@ -10990,14 +10996,14 @@ struct DxilInst_MatrixVecMulAdd { }; /// This instruction accumulates a matrix to a RWByteAddressBuffer -struct DxilInst_MatrixAccumulateToDescriptor { +struct DxilInst_LinAlgMatrixAccumulateToDescriptor { llvm::Instruction *Instr; // Construction and identification - DxilInst_MatrixAccumulateToDescriptor(llvm::Instruction *pInstr) + DxilInst_LinAlgMatrixAccumulateToDescriptor(llvm::Instruction *pInstr) : Instr(pInstr) {} operator bool() const { return hlsl::OP::IsDxilOpFuncCallInst( - Instr, hlsl::OP::OpCode::MatrixAccumulateToDescriptor); + Instr, hlsl::OP::OpCode::LinAlgMatrixAccumulateToDescriptor); } // Validation support bool isAllowed() const { return true; } @@ -11030,14 +11036,14 @@ struct DxilInst_MatrixAccumulateToDescriptor { }; /// This instruction accumulates a matrix to groupshared memory -struct DxilInst_MatrixAccumulateToMemory { +struct DxilInst_LinAlgMatrixAccumulateToMemory { llvm::Instruction *Instr; // Construction and identification - DxilInst_MatrixAccumulateToMemory(llvm::Instruction *pInstr) + DxilInst_LinAlgMatrixAccumulateToMemory(llvm::Instruction *pInstr) : Instr(pInstr) {} operator bool() const { return hlsl::OP::IsDxilOpFuncCallInst( - Instr, hlsl::OP::OpCode::MatrixAccumulateToMemory); + Instr, hlsl::OP::OpCode::LinAlgMatrixAccumulateToMemory); } // Validation support bool isAllowed() const { return true; } @@ -11071,13 +11077,14 @@ struct DxilInst_MatrixAccumulateToMemory { /// This instruction Outer products an M sized vector and a N sized vector /// producing an MxN matrix -struct DxilInst_MatrixOuterProduct { +struct DxilInst_LinAlgMatrixOuterProduct { llvm::Instruction *Instr; // Construction and identification - DxilInst_MatrixOuterProduct(llvm::Instruction *pInstr) : Instr(pInstr) {} + DxilInst_LinAlgMatrixOuterProduct(llvm::Instruction *pInstr) + : Instr(pInstr) {} operator bool() const { - return hlsl::OP::IsDxilOpFuncCallInst(Instr, - hlsl::OP::OpCode::MatrixOuterProduct); + return hlsl::OP::IsDxilOpFuncCallInst( + Instr, hlsl::OP::OpCode::LinAlgMatrixOuterProduct); } // Validation support bool isAllowed() const { return true; } diff --git a/lib/DXIL/DxilOperations.cpp b/lib/DXIL/DxilOperations.cpp index a1716a8b5a..1393474b48 100644 --- a/lib/DXIL/DxilOperations.cpp +++ b/lib/DXIL/DxilOperations.cpp @@ -2833,146 +2833,146 @@ static const OP::OpCodeProperty ExperimentalOps_OpCodeProps[] = { {}}, // Overloads: v // Linear Algebra Operations - {OC::FillMatrix, - "FillMatrix", - OCC::FillMatrix, - "fillMatrix", + {OC::LinAlgFillMatrix, + "LinAlgFillMatrix", + OCC::LinAlgFillMatrix, + "linAlgFillMatrix", Attribute::None, 2, {{0x200}, {0x63}}, {{0x0}, {0x0}}}, // Overloads: o,hfwi - {OC::CopyConvertMatrix, - "CopyConvertMatrix", - OCC::CopyConvertMatrix, - "copyConvertMatrix", + {OC::LinAlgCopyConvertMatrix, + "LinAlgCopyConvertMatrix", + OCC::LinAlgCopyConvertMatrix, + "linAlgCopyConvertMatrix", Attribute::None, 2, {{0x200}, {0x200}}, {{0x0}, {0x0}}}, // Overloads: o,o - {OC::MatrixLoadFromDescriptor, - "MatrixLoadFromDescriptor", - OCC::MatrixLoadFromDescriptor, - "matrixLoadFromDescriptor", + {OC::LinAlgMatrixLoadFromDescriptor, + "LinAlgMatrixLoadFromDescriptor", + OCC::LinAlgMatrixLoadFromDescriptor, + "linAlgMatrixLoadFromDescriptor", Attribute::None, 1, {{0x200}}, {{0x0}}}, // Overloads: o - {OC::MatrixLoadFromMemory, - "MatrixLoadFromMemory", - OCC::MatrixLoadFromMemory, - "matrixLoadFromMemory", + {OC::LinAlgMatrixLoadFromMemory, + "LinAlgMatrixLoadFromMemory", + OCC::LinAlgMatrixLoadFromMemory, + "linAlgMatrixLoadFromMemory", Attribute::None, 2, {{0x200}, {0x63}}, {{0x0}, {0x0}}}, // Overloads: o,hfwi - {OC::MatrixLength, - "MatrixLength", - OCC::MatrixLength, - "matrixLength", + {OC::LinAlgMatrixLength, + "LinAlgMatrixLength", + OCC::LinAlgMatrixLength, + "linAlgMatrixLength", Attribute::None, 1, {{0x200}}, {{0x0}}}, // Overloads: o - {OC::MatrixGetCoordinate, - "MatrixGetCoordinate", - OCC::MatrixGetCoordinate, - "matrixGetCoordinate", + {OC::LinAlgMatrixGetCoordinate, + "LinAlgMatrixGetCoordinate", + OCC::LinAlgMatrixGetCoordinate, + "linAlgMatrixGetCoordinate", Attribute::None, 1, {{0x200}}, {{0x0}}}, // Overloads: o - {OC::MatrixGetElement, - "MatrixGetElement", - OCC::MatrixGetElement, - "matrixGetElement", + {OC::LinAlgMatrixGetElement, + "LinAlgMatrixGetElement", + OCC::LinAlgMatrixGetElement, + "linAlgMatrixGetElement", Attribute::None, 2, {{0x63}, {0x200}}, {{0x0}, {0x0}}}, // Overloads: hfwi,o - {OC::MatrixSetElement, - "MatrixSetElement", - OCC::MatrixSetElement, - "matrixSetElement", + {OC::LinAlgMatrixSetElement, + "LinAlgMatrixSetElement", + OCC::LinAlgMatrixSetElement, + "linAlgMatrixSetElement", Attribute::None, 3, {{0x200}, {0x200}, {0x63}}, {{0x0}, {0x0}, {0x0}}}, // Overloads: o,o,hfwi - {OC::MatrixStoreToDescriptor, - "MatrixStoreToDescriptor", - OCC::MatrixStoreToDescriptor, - "matrixStoreToDescriptor", + {OC::LinAlgMatrixStoreToDescriptor, + "LinAlgMatrixStoreToDescriptor", + OCC::LinAlgMatrixStoreToDescriptor, + "linAlgMatrixStoreToDescriptor", Attribute::None, 1, {{0x200}}, {{0x0}}}, // Overloads: o - {OC::MatrixStoreToMemory, - "MatrixStoreToMemory", - OCC::MatrixStoreToMemory, - "matrixStoreToMemory", + {OC::LinAlgMatrixStoreToMemory, + "LinAlgMatrixStoreToMemory", + OCC::LinAlgMatrixStoreToMemory, + "linAlgMatrixStoreToMemory", Attribute::None, 2, {{0x200}, {0x63}}, {{0x0}, {0x0}}}, // Overloads: o,hfwi - {OC::MatrixQueryAccumulatorLayout, - "MatrixQueryAccumulatorLayout", - OCC::MatrixQueryAccumulatorLayout, - "matrixQueryAccumulatorLayout", + {OC::LinAlgMatrixQueryAccumulatorLayout, + "LinAlgMatrixQueryAccumulatorLayout", + OCC::LinAlgMatrixQueryAccumulatorLayout, + "linAlgMatrixQueryAccumulatorLayout", Attribute::None, 0, {}, {}}, // Overloads: v - {OC::MatrixMulOp, - "MatrixMulOp", - OCC::MatrixMulOp, - "matrixMulOp", + {OC::LinAlgMatrixMulOp, + "LinAlgMatrixMulOp", + OCC::LinAlgMatrixMulOp, + "linAlgMatrixMulOp", Attribute::None, 3, {{0x200}, {0x200}, {0x200}}, {{0x0}, {0x0}, {0x0}}}, // Overloads: o,o,o - {OC::MatrixAccumulate, - "MatrixAccumulate", - OCC::MatrixAccumulate, - "matrixAccumulate", + {OC::LinAlgMatrixAccumulate, + "LinAlgMatrixAccumulate", + OCC::LinAlgMatrixAccumulate, + "linAlgMatrixAccumulate", Attribute::None, 3, {{0x200}, {0x200}, {0x200}}, {{0x0}, {0x0}, {0x0}}}, // Overloads: o,o,o - {OC::MatrixVecMul, - "MatrixVecMul", - OCC::MatrixVecMul, - "matrixVecMul", + {OC::LinAlgMatVecMul, + "LinAlgMatVecMul", + OCC::LinAlgMatVecMul, + "linAlgMatVecMul", Attribute::None, 3, {{0x400}, {0x200}, {0x400}}, {{0x63}, {0x0}, {0x63}}}, // Overloads: getNumParams() <= 1) return nullptr; return FT->getParamType(1); @@ -7011,7 +7014,7 @@ llvm::Type *OP::GetOverloadType(OpCode opCode, llvm::Function *F) { case OpCode::GetGroupWaveCount: case OpCode::ClusterID: case OpCode::ReservedD0: - case OpCode::MatrixQueryAccumulatorLayout: + case OpCode::LinAlgMatrixQueryAccumulatorLayout: case OpCode::ReservedD1: case OpCode::ReservedD2: case OpCode::ReservedD3: @@ -7038,7 +7041,7 @@ llvm::Type *OP::GetOverloadType(OpCode opCode, llvm::Function *F) { case OpCode::SampleCmpGrad: case OpCode::SampleCmpBias: case OpCode::RawBufferVectorLoad: - case OpCode::MatrixGetCoordinate: { + case OpCode::LinAlgMatrixGetCoordinate: { StructType *ST = cast(Ty); return ST->getElementType(0); } @@ -7050,39 +7053,39 @@ llvm::Type *OP::GetOverloadType(OpCode opCode, llvm::Function *F) { return cast(Ty)->getElementType(); case OpCode::MatVecMul: case OpCode::MatVecMulAdd: - case OpCode::FillMatrix: - case OpCode::CopyConvertMatrix: - case OpCode::MatrixLoadFromMemory: - case OpCode::MatrixGetElement: + case OpCode::LinAlgFillMatrix: + case OpCode::LinAlgCopyConvertMatrix: + case OpCode::LinAlgMatrixLoadFromMemory: + case OpCode::LinAlgMatrixGetElement: if (FT->getNumParams() < 2) return nullptr; return llvm::StructType::get(Ctx, {FT->getReturnType(), FT->getParamType(1)}); case OpCode::OuterProductAccumulate: - case OpCode::MatrixStoreToMemory: - case OpCode::MatrixAccumulateToMemory: + case OpCode::LinAlgMatrixStoreToMemory: + case OpCode::LinAlgMatrixAccumulateToMemory: if (FT->getNumParams() < 3) return nullptr; return llvm::StructType::get(Ctx, {FT->getParamType(1), FT->getParamType(2)}); - case OpCode::MatrixSetElement: + case OpCode::LinAlgMatrixSetElement: if (FT->getNumParams() < 4) return nullptr; return llvm::StructType::get( Ctx, {FT->getReturnType(), FT->getParamType(1), FT->getParamType(3)}); - case OpCode::MatrixMulOp: - case OpCode::MatrixAccumulate: - case OpCode::MatrixVecMul: - case OpCode::MatrixOuterProduct: + case OpCode::LinAlgMatrixMulOp: + case OpCode::LinAlgMatrixAccumulate: + case OpCode::LinAlgMatVecMul: + case OpCode::LinAlgMatrixOuterProduct: if (FT->getNumParams() < 3) return nullptr; return llvm::StructType::get( Ctx, {FT->getReturnType(), FT->getParamType(1), FT->getParamType(2)}); - case OpCode::MatrixVecMulAdd: + case OpCode::LinAlgMatVecMulAdd: if (FT->getNumParams() < 5) return nullptr; return llvm::StructType::get(Ctx, diff --git a/lib/HLSL/HLOperationLower.cpp b/lib/HLSL/HLOperationLower.cpp index b05ca17f76..22ea3c77d0 100644 --- a/lib/HLSL/HLOperationLower.cpp +++ b/lib/HLSL/HLOperationLower.cpp @@ -7658,43 +7658,43 @@ constexpr IntrinsicLower gLowerTable[] = { DXIL::OpCode::HitObject_TriangleObjectPosition}, {IntrinsicOp::IOP___builtin_LinAlg_CopyConvertMatrix, EmptyLower, - DXIL::OpCode::CopyConvertMatrix}, + DXIL::OpCode::LinAlgCopyConvertMatrix}, {IntrinsicOp::IOP___builtin_LinAlg_FillMatrix, EmptyLower, - DXIL::OpCode::FillMatrix}, + DXIL::OpCode::LinAlgFillMatrix}, {IntrinsicOp::IOP___builtin_LinAlg_MatrixGetCoordinate, EmptyLower, - DXIL::OpCode::MatrixGetCoordinate}, + DXIL::OpCode::LinAlgMatrixGetCoordinate}, {IntrinsicOp::IOP___builtin_LinAlg_MatrixGetElement, EmptyLower, - DXIL::OpCode::MatrixGetElement}, + DXIL::OpCode::LinAlgMatrixGetElement}, {IntrinsicOp::IOP___builtin_LinAlg_MatrixLength, EmptyLower, - DXIL::OpCode::MatrixLength}, + DXIL::OpCode::LinAlgMatrixLength}, {IntrinsicOp::IOP___builtin_LinAlg_MatrixLoadFromDescriptor, EmptyLower, - DXIL::OpCode::MatrixLoadFromDescriptor}, + DXIL::OpCode::LinAlgMatrixLoadFromDescriptor}, {IntrinsicOp::IOP___builtin_LinAlg_MatrixLoadFromMemory, EmptyLower, - DXIL::OpCode::MatrixLoadFromMemory}, + DXIL::OpCode::LinAlgMatrixLoadFromMemory}, {IntrinsicOp::IOP___builtin_LinAlg_MatrixSetElement, EmptyLower, - DXIL::OpCode::MatrixSetElement}, + DXIL::OpCode::LinAlgMatrixSetElement}, {IntrinsicOp::IOP___builtin_LinAlg_MatrixStoreToDescriptor, EmptyLower, - DXIL::OpCode::MatrixStoreToDescriptor}, + DXIL::OpCode::LinAlgMatrixStoreToDescriptor}, {IntrinsicOp::IOP___builtin_LinAlg_MatrixStoreToMemory, EmptyLower, - DXIL::OpCode::MatrixStoreToMemory}, + DXIL::OpCode::LinAlgMatrixStoreToMemory}, {IntrinsicOp::IOP___builtin_LinAlg_MatrixAccumulate, EmptyLower, - DXIL::OpCode::MatrixAccumulate}, + DXIL::OpCode::LinAlgMatrixAccumulate}, {IntrinsicOp::IOP___builtin_LinAlg_MatrixMatrixMultiply, EmptyLower, - DXIL::OpCode::MatrixMulOp}, + DXIL::OpCode::LinAlgMatrixMulOp}, {IntrinsicOp::IOP___builtin_LinAlg_MatrixMatrixMultiplyAccumulate, - EmptyLower, DXIL::OpCode::MatrixMulOp}, + EmptyLower, DXIL::OpCode::LinAlgMatrixMulOp}, {IntrinsicOp::IOP___builtin_LinAlg_MatrixQueryAccumulatorLayout, EmptyLower, - DXIL::OpCode::MatrixQueryAccumulatorLayout}, + DXIL::OpCode::LinAlgMatrixQueryAccumulatorLayout}, {IntrinsicOp::IOP___builtin_LinAlg_MatrixAccumulateToDescriptor, EmptyLower, - DXIL::OpCode::MatrixAccumulateToDescriptor}, + DXIL::OpCode::LinAlgMatrixAccumulateToDescriptor}, {IntrinsicOp::IOP___builtin_LinAlg_MatrixAccumulateToMemory, EmptyLower, - DXIL::OpCode::MatrixAccumulateToMemory}, + DXIL::OpCode::LinAlgMatrixAccumulateToMemory}, {IntrinsicOp::IOP___builtin_LinAlg_MatrixOuterProduct, EmptyLower, - DXIL::OpCode::MatrixOuterProduct}, + DXIL::OpCode::LinAlgMatrixOuterProduct}, {IntrinsicOp::IOP___builtin_LinAlg_MatrixVectorMultiply, EmptyLower, - DXIL::OpCode::MatrixVecMul}, + DXIL::OpCode::LinAlgMatVecMul}, {IntrinsicOp::IOP___builtin_LinAlg_MatrixVectorMultiplyAdd, EmptyLower, - DXIL::OpCode::MatrixVecMulAdd}, + DXIL::OpCode::LinAlgMatVecMulAdd}, {IntrinsicOp::IOP_DebugBreak, TrivialNoArgOperation, DXIL::OpCode::DebugBreak}, {IntrinsicOp::IOP_DxIsDebuggerPresent, TranslateWaveToVal, diff --git a/utils/hct/hctdb.py b/utils/hct/hctdb.py index 692f305fde..0b98fed0a0 100644 --- a/utils/hct/hctdb.py +++ b/utils/hct/hctdb.py @@ -1161,20 +1161,21 @@ def populate_categories_and_models_ExperimentalOps(self): # Thread/Wave/ThreadGroup scope operations for i in insts( - "MatrixQueryAccumulatorLayout," - + "MatrixLoadFromDescriptor,MatrixAccumulateToDescriptor," - + "MatrixVecMul,MatrixVecMulAdd,MatrixOuterProduct" + "LinAlgMatrixQueryAccumulatorLayout,LinAlgMatrixLoadFromDescriptor," + + "LinAlgMatrixAccumulateToDescriptor,LinAlgMatVecMul," + + "LinAlgMatVecMulAdd,LinAlgMatrixOuterProduct" ): i.category = "Linear Algebra Operations" i.shader_model = experimental_sm # Wave/ThreadGroup scope operations for i in insts( - "FillMatrix,CopyConvertMatrix," - + "MatrixLength,MatrixGetCoordinate,MatrixGetElement,MatrixSetElement," - + "MatrixStoreToDescriptor," - + "MatrixLoadFromMemory,MatrixStoreToMemory,MatrixAccumulateToMemory," - + "MatrixMulOp,MatrixAccumulate" + "LinAlgFillMatrix,LinAlgCopyConvertMatrix,LinAlgMatrixLength," + + "LinAlgMatrixGetCoordinate,LinAlgMatrixGetElement," + + "LinAlgMatrixSetElement,LinAlgMatrixStoreToDescriptor," + + "LinAlgMatrixLoadFromMemory,LinAlgMatrixStoreToMemory," + + "LinAlgMatrixAccumulateToMemory,LinAlgMatrixMulOp," + + "LinAlgMatrixAccumulate" ): i.category = "Linear Algebra Operations" i.shader_model = experimental_sm @@ -6343,8 +6344,8 @@ def populate_ExperimentalOps(self): op_table.reserve_dxil_op_range("ReservedD", 1) add_dxil_op( - "FillMatrix", - "FillMatrix", + "LinAlgFillMatrix", + "LinAlgFillMatrix", "fills a matrix with a scalar value", "o,hfwi", "", @@ -6355,8 +6356,8 @@ def populate_ExperimentalOps(self): ) add_dxil_op( - "CopyConvertMatrix", - "CopyConvertMatrix", + "LinAlgCopyConvertMatrix", + "LinAlgCopyConvertMatrix", "Converts and copies the element and use type of the source matrix to the destination matrix with optional transpose", "o,o", "", @@ -6368,8 +6369,8 @@ def populate_ExperimentalOps(self): ) add_dxil_op( - "MatrixLoadFromDescriptor", - "MatrixLoadFromDescriptor", + "LinAlgMatrixLoadFromDescriptor", + "LinAlgMatrixLoadFromDescriptor", "fills a matrix with data from a [RW]ByteAddressBuffer", "o", "", @@ -6390,8 +6391,8 @@ def populate_ExperimentalOps(self): ) add_dxil_op( - "MatrixLoadFromMemory", - "MatrixLoadFromMemory", + "LinAlgMatrixLoadFromMemory", + "LinAlgMatrixLoadFromMemory", "fills a matrix with data from a groupshared array", "o,hfwi", # TODO: needs to be updated for groupshared "", @@ -6413,8 +6414,8 @@ def populate_ExperimentalOps(self): ) add_dxil_op( - "MatrixLength", - "MatrixLength", + "LinAlgMatrixLength", + "LinAlgMatrixLength", "returns the number of elements stored in thread-local storage on the active thread for the provided matrix", "o", "", @@ -6425,8 +6426,8 @@ def populate_ExperimentalOps(self): ) add_dxil_op( - "MatrixGetCoordinate", - "MatrixGetCoordinate", + "LinAlgMatrixGetCoordinate", + "LinAlgMatrixGetCoordinate", "returns a two element vector containing the column and row of the matrix that the thread-local index corresponds to", "o", "", @@ -6442,8 +6443,8 @@ def populate_ExperimentalOps(self): ) add_dxil_op( - "MatrixGetElement", - "MatrixGetElement", + "LinAlgMatrixGetElement", + "LinAlgMatrixGetElement", "returns the element of the matrix corresponding to the provided thread-local index", "hfwi,o", "", @@ -6457,8 +6458,8 @@ def populate_ExperimentalOps(self): ) add_dxil_op( - "MatrixSetElement", - "MatrixSetElement", + "LinAlgMatrixSetElement", + "LinAlgMatrixSetElement", "sets the element of the matrix corresponding to the provided thread-local index", "o,o,hfwi", "", @@ -6473,8 +6474,8 @@ def populate_ExperimentalOps(self): ) add_dxil_op( - "MatrixStoreToDescriptor", - "MatrixStoreToDescriptor", + "LinAlgMatrixStoreToDescriptor", + "LinAlgMatrixStoreToDescriptor", "stores a matrix to a RWByteAddressBuffer", "o", "", @@ -6494,8 +6495,8 @@ def populate_ExperimentalOps(self): ) add_dxil_op( - "MatrixStoreToMemory", - "MatrixStoreToMemory", + "LinAlgMatrixStoreToMemory", + "LinAlgMatrixStoreToMemory", "stores a matrix to groupshared memory", "o,hfwi", # TODO: needs to be updated for groupshared "", @@ -6518,8 +6519,8 @@ def populate_ExperimentalOps(self): ) add_dxil_op( - "MatrixQueryAccumulatorLayout", - "MatrixQueryAccumulatorLayout", + "LinAlgMatrixQueryAccumulatorLayout", + "LinAlgMatrixQueryAccumulatorLayout", "returns comptime 0 when accumulator matrix are A layout, 1 when B layout", "v", "", @@ -6529,8 +6530,8 @@ def populate_ExperimentalOps(self): ) add_dxil_op( - "MatrixMulOp", - "MatrixMulOp", + "LinAlgMatrixMulOp", + "LinAlgMatrixMulOp", "applies a multiplication op to matrix C using A and B as parameters", "o,o,o", "", @@ -6542,8 +6543,8 @@ def populate_ExperimentalOps(self): ) add_dxil_op( - "MatrixAccumulate", - "MatrixAccumulate", + "LinAlgMatrixAccumulate", + "LinAlgMatrixAccumulate", "accumulate A or B matrix into Accumulator matrix following LHS += RHS", "o,o,o", "", @@ -6555,8 +6556,8 @@ def populate_ExperimentalOps(self): ) add_dxil_op( - "MatrixVecMul", - "MatrixVecMul", + "LinAlgMatVecMul", + "LinAlgMatVecMul", "Multiplies a MxK dimension matrix and a K sized input vector", " Date: Thu, 19 Feb 2026 20:51:52 -0500 Subject: [PATCH 08/13] Corrected setting IRBuilder debug location in HLSignatureLower::GenerateDxilComputeAndNodeCommonInputs() (#8022) As a result of https://github.com/microsoft/DirectXShaderCompiler/pull/7799, the HLSL entry function can now contain `dbg.value` calls with !dbg location metadata with scopes corresponding to functions which have been inlined. The `HLSignatureLower::GenerateDxilComputeAndNodeCommonInputs()` creates `@dx.op.threadId` intrinsic calls, using the default dbg loc when creating an `IRBuilder`. When the first instruction in the entry block is one of the `dbg,value` calls, its dbg location is copied to the `@dx.op.threadId` calls. That makes the DXIL unreadable by the modern LLVM IR reader since it fails the IR module verification because of !dbg pointing to a different subprogram scope. This change sets the dbg location for IRBuilder to be the !dbg node of the first non-PHI and non-debug instruction in the block. --------- Co-authored-by: Konstantin --- lib/HLSL/HLSignatureLower.cpp | 2 ++ .../dxil/debug/dxop_threadid_dbgloc.hlsl | 24 +++++++++++++++++++ 2 files changed, 26 insertions(+) create mode 100644 tools/clang/test/HLSLFileCheck/dxil/debug/dxop_threadid_dbgloc.hlsl diff --git a/lib/HLSL/HLSignatureLower.cpp b/lib/HLSL/HLSignatureLower.cpp index bb5c25f6ee..3fdb228594 100644 --- a/lib/HLSL/HLSignatureLower.cpp +++ b/lib/HLSL/HLSignatureLower.cpp @@ -1253,6 +1253,8 @@ void HLSignatureLower::GenerateDxilComputeAndNodeCommonInputs() { DXASSERT(funcAnnotation, "must find annotation for entry function"); auto &funcProps = HLM.GetDxilFunctionProps(Entry); IRBuilder<> Builder(Entry->getEntryBlock().getFirstInsertionPt()); + Builder.SetCurrentDebugLocation( + Entry->getEntryBlock().getFirstNonPHIOrDbg()->getDebugLoc()); for (Argument &arg : Entry->args()) { DxilParameterAnnotation ¶mAnnotation = diff --git a/tools/clang/test/HLSLFileCheck/dxil/debug/dxop_threadid_dbgloc.hlsl b/tools/clang/test/HLSLFileCheck/dxil/debug/dxop_threadid_dbgloc.hlsl new file mode 100644 index 0000000000..a17cd5572a --- /dev/null +++ b/tools/clang/test/HLSLFileCheck/dxil/debug/dxop_threadid_dbgloc.hlsl @@ -0,0 +1,24 @@ +// RUN: %dxc -T cs_6_5 -E CSMain -O0 -Zi -enable-16bit-types %s | FileCheck %s + +// CHECK-LABEL: @CSMain() +// CHECK: %{{[0-9]+}} = call i32 @dx.op.threadId.i32(i32 {{[0-9]+}}, i32 {{[0-9]+}}), !dbg [[DBG_MD:![0-9]+]] +// CHECK-DAG: [[SUBPGM:![0-9]+]] = !DISubprogram(name: "CSMain" +// CHECK-DAG: [[DBG_MD]] = !DILocation(line: 21, column: 15, scope: [[SUBPGM]]) + +RWBuffer u0 : register(u0); +RWBuffer u1 : register(u1); + +static float my_var; +static float my_var2; + +void foo() { + my_var2 = my_var * 2; +} + +[RootSignature("DescriptorTable(UAV(u0,numDescriptors=2))")] +[numthreads(64,1,1)] +void CSMain(uint3 dtid : SV_DispatchThreadID) { + my_var = u0[dtid.x]; + foo(); + u1[dtid.x] = my_var2; +} From 214e18263c632954521edb43b09b5e1f8620bd22 Mon Sep 17 00:00:00 2001 From: Steven Perron Date: Fri, 20 Feb 2026 09:41:03 -0500 Subject: [PATCH 09/13] [SPIRV] Update spir-v submodules and handle multi-dim array (#8167) We are updating the spir-v submodules. They contain a new pass that will linearize multi-dimensional resource arrays, and call the combine access chain pass. This means we need to fix up some tests, and we want to make sure that we can properly handle multi-dimensional resource array in DXC. Fixes #7922 --- external/SPIRV-Headers | 2 +- external/SPIRV-Tools | 2 +- tools/clang/lib/SPIRV/DeclResultIdMapper.cpp | 15 +++ ...uctured-buffer.array.counter.indirect.hlsl | 6 +- ...ctured-buffer.array.counter.indirect2.hlsl | 6 +- ....global-struct-of-resources.optimized.hlsl | 121 +++++++++--------- 6 files changed, 81 insertions(+), 71 deletions(-) diff --git a/external/SPIRV-Headers b/external/SPIRV-Headers index 04f10f650d..f31ca173ef 160000 --- a/external/SPIRV-Headers +++ b/external/SPIRV-Headers @@ -1 +1 @@ -Subproject commit 04f10f650d514df88b76d25e83db360142c7b174 +Subproject commit f31ca173eff866369e54d35e53375fadbabd58f4 diff --git a/external/SPIRV-Tools b/external/SPIRV-Tools index fbe4f3ad91..64f5770f59 160000 --- a/external/SPIRV-Tools +++ b/external/SPIRV-Tools @@ -1 +1 @@ -Subproject commit fbe4f3ad913c44fe8700545f8ffe35d1382b7093 +Subproject commit 64f5770f59db933d46b9cad6edc42b4186409ef4 diff --git a/tools/clang/lib/SPIRV/DeclResultIdMapper.cpp b/tools/clang/lib/SPIRV/DeclResultIdMapper.cpp index a9d10f9296..9fed57b41b 100644 --- a/tools/clang/lib/SPIRV/DeclResultIdMapper.cpp +++ b/tools/clang/lib/SPIRV/DeclResultIdMapper.cpp @@ -1263,6 +1263,21 @@ SpirvVariable *DeclResultIdMapper::createExternVar(const VarDecl *var, // another variable or function parameter needsLegalization = true; } + + // If we have a multi-dimensional array of resources, we need to run + // legalization to flatten the array. + if (const auto *arrayType = astContext.getAsConstantArrayType(type)) { + if (astContext.getAsConstantArrayType(arrayType->getElementType())) { + QualType elemType = arrayType->getElementType(); + while (const auto *innerArrayType = + astContext.getAsConstantArrayType(elemType)) { + elemType = innerArrayType->getElementType(); + } + if (hlsl::IsHLSLResourceType(elemType)) + needsLegalization = true; + } + } + if (vkImgFeatures.isCombinedImageSampler || vkImgFeatures.format) { spvContext.registerVkImageFeaturesForSpvVariable(varInstr, vkImgFeatures); } diff --git a/tools/clang/test/CodeGenSPIRV/type.rwstructured-buffer.array.counter.indirect.hlsl b/tools/clang/test/CodeGenSPIRV/type.rwstructured-buffer.array.counter.indirect.hlsl index aa65570f71..76d8165107 100644 --- a/tools/clang/test/CodeGenSPIRV/type.rwstructured-buffer.array.counter.indirect.hlsl +++ b/tools/clang/test/CodeGenSPIRV/type.rwstructured-buffer.array.counter.indirect.hlsl @@ -20,13 +20,11 @@ void func(RWStructuredBuffer local) { float4 main(PSInput input) : SV_TARGET { -// CHECK: [[ac1:%[0-9]+]] = OpAccessChain %_ptr_Uniform_type_ACSBuffer_counter %counter_var_g_rwbuffer {{%[0-9]+}} -// CHECK: [[ac2:%[0-9]+]] = OpAccessChain %_ptr_Uniform_int [[ac1]] %uint_0 +// CHECK: [[ac2:%[0-9]+]] = OpAccessChain %_ptr_Uniform_int %counter_var_g_rwbuffer {{%[0-9]+}} %uint_0 // CHECK: OpAtomicIAdd %int [[ac2]] %uint_1 %uint_0 %int_1 func(g_rwbuffer[input.idx]); -// CHECK: [[ac1_0:%[0-9]+]] = OpAccessChain %_ptr_Uniform_type_RWStructuredBuffer_uint %g_rwbuffer {{%[0-9]+}} -// CHECK: [[ac2_0:%[0-9]+]] = OpAccessChain %_ptr_Uniform_uint [[ac1_0]] %int_0 %uint_0 +// CHECK: [[ac2_0:%[0-9]+]] = OpAccessChain %_ptr_Uniform_uint %g_rwbuffer {{%[0-9]+}} %int_0 %uint_0 // CHECK: OpLoad %uint [[ac2_0]] return g_rwbuffer[input.idx][0]; } diff --git a/tools/clang/test/CodeGenSPIRV/type.rwstructured-buffer.array.counter.indirect2.hlsl b/tools/clang/test/CodeGenSPIRV/type.rwstructured-buffer.array.counter.indirect2.hlsl index 68cfb9f6e1..0ea52864a2 100644 --- a/tools/clang/test/CodeGenSPIRV/type.rwstructured-buffer.array.counter.indirect2.hlsl +++ b/tools/clang/test/CodeGenSPIRV/type.rwstructured-buffer.array.counter.indirect2.hlsl @@ -18,13 +18,11 @@ float4 main(PSInput input) : SV_TARGET { RWStructuredBuffer l_rwbuffer[5] = g_rwbuffer; -// CHECK: [[ac1:%[0-9]+]] = OpAccessChain %_ptr_Uniform_type_ACSBuffer_counter %counter_var_g_rwbuffer %int_0 -// CHECK: [[ac2:%[0-9]+]] = OpAccessChain %_ptr_Uniform_int [[ac1]] %uint_0 +// CHECK: [[ac2:%[0-9]+]] = OpAccessChain %_ptr_Uniform_int %counter_var_g_rwbuffer %int_0 %uint_0 // CHECK: OpAtomicIAdd %int [[ac2]] %uint_1 %uint_0 %int_1 l_rwbuffer[0].IncrementCounter(); -// CHECK: [[ac1_0:%[0-9]+]] = OpAccessChain %_ptr_Uniform_type_RWStructuredBuffer_uint %g_rwbuffer {{%[0-9]+}} -// CHECK: [[ac2_0:%[0-9]+]] = OpAccessChain %_ptr_Uniform_uint [[ac1_0]] %int_0 %uint_0 +// CHECK: [[ac2_0:%[0-9]+]] = OpAccessChain %_ptr_Uniform_uint %g_rwbuffer {{%[0-9]+}} %int_0 %uint_0 // CHECK: OpLoad %uint [[ac2_0]] return l_rwbuffer[input.idx][0]; } diff --git a/tools/clang/test/CodeGenSPIRV/vk.binding.global-struct-of-resources.optimized.hlsl b/tools/clang/test/CodeGenSPIRV/vk.binding.global-struct-of-resources.optimized.hlsl index ddf85b3cc1..1656e7f50f 100644 --- a/tools/clang/test/CodeGenSPIRV/vk.binding.global-struct-of-resources.optimized.hlsl +++ b/tools/clang/test/CodeGenSPIRV/vk.binding.global-struct-of-resources.optimized.hlsl @@ -3,20 +3,20 @@ // Check the names // // CHECK: OpName %secondGlobal_t "secondGlobal.t" -// CHECK: OpName %firstGlobal_0__0__t "firstGlobal[0][0].t" -// CHECK: OpName %firstGlobal_0__1__t "firstGlobal[0][1].t" -// CHECK: OpName %firstGlobal_1__0__t "firstGlobal[1][0].t" -// CHECK: OpName %firstGlobal_1__1__t "firstGlobal[1][1].t" +// CHECK: OpName %[[fg0:firstGlobal_[0-9]+__t]] "firstGlobal{{.*}}.t" +// CHECK: OpName %[[fg1:firstGlobal_[0-9]+__t]] "firstGlobal{{.*}}.t" +// CHECK: OpName %[[fg2:firstGlobal_[0-9]+__t]] "firstGlobal{{.*}}.t" +// CHECK: OpName %[[fg3:firstGlobal_[0-9]+__t]] "firstGlobal{{.*}}.t" // CHECK: OpName %secondGlobal_tt_0__s "secondGlobal.tt[0].s" // CHECK: OpName %secondGlobal_tt_1__s "secondGlobal.tt[1].s" -// CHECK: OpName %firstGlobal_0__0__tt_0__s "firstGlobal[0][0].tt[0].s" -// CHECK: OpName %firstGlobal_0__0__tt_1__s "firstGlobal[0][0].tt[1].s" -// CHECK: OpName %firstGlobal_0__1__tt_0__s "firstGlobal[0][1].tt[0].s" -// CHECK: OpName %firstGlobal_0__1__tt_1__s "firstGlobal[0][1].tt[1].s" -// CHECK: OpName %firstGlobal_1__0__tt_0__s "firstGlobal[1][0].tt[0].s" -// CHECK: OpName %firstGlobal_1__0__tt_1__s "firstGlobal[1][0].tt[1].s" -// CHECK: OpName %firstGlobal_1__1__tt_0__s "firstGlobal[1][1].tt[0].s" -// CHECK: OpName %firstGlobal_1__1__tt_1__s "firstGlobal[1][1].tt[1].s" +// CHECK: OpName %[[fgtt0_0:firstGlobal_[0-9]+__tt_0__s]] "firstGlobal{{.*}}.tt[0].s" +// CHECK: OpName %[[fgtt0_1:firstGlobal_[0-9]+__tt_1__s]] "firstGlobal{{.*}}.tt[1].s" +// CHECK: OpName %[[fgtt1_0:firstGlobal_[0-9]+__tt_0__s]] "firstGlobal{{.*}}.tt[0].s" +// CHECK: OpName %[[fgtt1_1:firstGlobal_[0-9]+__tt_1__s]] "firstGlobal{{.*}}.tt[1].s" +// CHECK: OpName %[[fgtt2_0:firstGlobal_[0-9]+__tt_0__s]] "firstGlobal{{.*}}.tt[0].s" +// CHECK: OpName %[[fgtt2_1:firstGlobal_[0-9]+__tt_1__s]] "firstGlobal{{.*}}.tt[1].s" +// CHECK: OpName %[[fgtt3_0:firstGlobal_[0-9]+__tt_0__s]] "firstGlobal{{.*}}.tt[0].s" +// CHECK: OpName %[[fgtt3_1:firstGlobal_[0-9]+__tt_1__s]] "firstGlobal{{.*}}.tt[1].s" // Check flattening of bindings // Explanation: Only the resources that are used will have a binding assignment @@ -70,52 +70,52 @@ // // CHECK: OpDecorate %secondGlobal_t DescriptorSet 0 // CHECK: OpDecorate %secondGlobal_t Binding 32 -// CHECK: OpDecorate %firstGlobal_0__0__t DescriptorSet 0 -// CHECK: OpDecorate %firstGlobal_0__0__t Binding 0 -// CHECK: OpDecorate %firstGlobal_0__1__t DescriptorSet 0 -// CHECK: OpDecorate %firstGlobal_0__1__t Binding 8 -// CHECK: OpDecorate %firstGlobal_1__0__t DescriptorSet 0 -// CHECK: OpDecorate %firstGlobal_1__0__t Binding 16 -// CHECK: OpDecorate %firstGlobal_1__1__t DescriptorSet 0 -// CHECK: OpDecorate %firstGlobal_1__1__t Binding 24 +// CHECK: OpDecorate %[[fg0]] DescriptorSet 0 +// CHECK: OpDecorate %[[fg0]] Binding 0 +// CHECK: OpDecorate %[[fg1]] DescriptorSet 0 +// CHECK: OpDecorate %[[fg1]] Binding 8 +// CHECK: OpDecorate %[[fg2]] DescriptorSet 0 +// CHECK: OpDecorate %[[fg2]] Binding 16 +// CHECK: OpDecorate %[[fg3]] DescriptorSet 0 +// CHECK: OpDecorate %[[fg3]] Binding 24 // CHECK: OpDecorate %secondGlobal_tt_0__s DescriptorSet 0 // CHECK: OpDecorate %secondGlobal_tt_0__s Binding 34 // CHECK: OpDecorate %secondGlobal_tt_1__s DescriptorSet 0 // CHECK: OpDecorate %secondGlobal_tt_1__s Binding 37 -// CHECK: OpDecorate %firstGlobal_0__0__tt_0__s DescriptorSet 0 -// CHECK: OpDecorate %firstGlobal_0__0__tt_0__s Binding 2 -// CHECK: OpDecorate %firstGlobal_0__0__tt_1__s DescriptorSet 0 -// CHECK: OpDecorate %firstGlobal_0__0__tt_1__s Binding 5 -// CHECK: OpDecorate %firstGlobal_0__1__tt_0__s DescriptorSet 0 -// CHECK: OpDecorate %firstGlobal_0__1__tt_0__s Binding 10 -// CHECK: OpDecorate %firstGlobal_0__1__tt_1__s DescriptorSet 0 -// CHECK: OpDecorate %firstGlobal_0__1__tt_1__s Binding 13 -// CHECK: OpDecorate %firstGlobal_1__0__tt_0__s DescriptorSet 0 -// CHECK: OpDecorate %firstGlobal_1__0__tt_0__s Binding 18 -// CHECK: OpDecorate %firstGlobal_1__0__tt_1__s DescriptorSet 0 -// CHECK: OpDecorate %firstGlobal_1__0__tt_1__s Binding 21 -// CHECK: OpDecorate %firstGlobal_1__1__tt_0__s DescriptorSet 0 -// CHECK: OpDecorate %firstGlobal_1__1__tt_0__s Binding 26 -// CHECK: OpDecorate %firstGlobal_1__1__tt_1__s DescriptorSet 0 -// CHECK: OpDecorate %firstGlobal_1__1__tt_1__s Binding 29 +// CHECK: OpDecorate %[[fgtt0_0]] DescriptorSet 0 +// CHECK: OpDecorate %[[fgtt0_0]] Binding 2 +// CHECK: OpDecorate %[[fgtt0_1]] DescriptorSet 0 +// CHECK: OpDecorate %[[fgtt0_1]] Binding 5 +// CHECK: OpDecorate %[[fgtt1_0]] DescriptorSet 0 +// CHECK: OpDecorate %[[fgtt1_0]] Binding 10 +// CHECK: OpDecorate %[[fgtt1_1]] DescriptorSet 0 +// CHECK: OpDecorate %[[fgtt1_1]] Binding 13 +// CHECK: OpDecorate %[[fgtt2_0]] DescriptorSet 0 +// CHECK: OpDecorate %[[fgtt2_0]] Binding 18 +// CHECK: OpDecorate %[[fgtt2_1]] DescriptorSet 0 +// CHECK: OpDecorate %[[fgtt2_1]] Binding 21 +// CHECK: OpDecorate %[[fgtt3_0]] DescriptorSet 0 +// CHECK: OpDecorate %[[fgtt3_0]] Binding 26 +// CHECK: OpDecorate %[[fgtt3_1]] DescriptorSet 0 +// CHECK: OpDecorate %[[fgtt3_1]] Binding 29 // Check existence of replacement variables // // CHECK: %secondGlobal_t = OpVariable %_ptr_UniformConstant__arr_type_2d_image_uint_2 UniformConstant -// CHECK: %firstGlobal_0__0__t = OpVariable %_ptr_UniformConstant__arr_type_2d_image_uint_2 UniformConstant -// CHECK: %firstGlobal_0__1__t = OpVariable %_ptr_UniformConstant__arr_type_2d_image_uint_2 UniformConstant -// CHECK: %firstGlobal_1__0__t = OpVariable %_ptr_UniformConstant__arr_type_2d_image_uint_2 UniformConstant -// CHECK: %firstGlobal_1__1__t = OpVariable %_ptr_UniformConstant__arr_type_2d_image_uint_2 UniformConstant +// CHECK: %[[fg0]] = OpVariable %_ptr_UniformConstant__arr_type_2d_image_uint_2 UniformConstant +// CHECK: %[[fg1]] = OpVariable %_ptr_UniformConstant__arr_type_2d_image_uint_2 UniformConstant +// CHECK: %[[fg2]] = OpVariable %_ptr_UniformConstant__arr_type_2d_image_uint_2 UniformConstant +// CHECK: %[[fg3]] = OpVariable %_ptr_UniformConstant__arr_type_2d_image_uint_2 UniformConstant // CHECK: %secondGlobal_tt_0__s = OpVariable %_ptr_UniformConstant__arr_type_sampler_uint_3 UniformConstant // CHECK: %secondGlobal_tt_1__s = OpVariable %_ptr_UniformConstant__arr_type_sampler_uint_3 UniformConstant -// CHECK: %firstGlobal_0__0__tt_0__s = OpVariable %_ptr_UniformConstant__arr_type_sampler_uint_3 UniformConstant -// CHECK: %firstGlobal_0__0__tt_1__s = OpVariable %_ptr_UniformConstant__arr_type_sampler_uint_3 UniformConstant -// CHECK: %firstGlobal_0__1__tt_0__s = OpVariable %_ptr_UniformConstant__arr_type_sampler_uint_3 UniformConstant -// CHECK: %firstGlobal_0__1__tt_1__s = OpVariable %_ptr_UniformConstant__arr_type_sampler_uint_3 UniformConstant -// CHECK: %firstGlobal_1__0__tt_0__s = OpVariable %_ptr_UniformConstant__arr_type_sampler_uint_3 UniformConstant -// CHECK: %firstGlobal_1__0__tt_1__s = OpVariable %_ptr_UniformConstant__arr_type_sampler_uint_3 UniformConstant -// CHECK: %firstGlobal_1__1__tt_0__s = OpVariable %_ptr_UniformConstant__arr_type_sampler_uint_3 UniformConstant -// CHECK: %firstGlobal_1__1__tt_1__s = OpVariable %_ptr_UniformConstant__arr_type_sampler_uint_3 UniformConstant +// CHECK: %[[fgtt0_0]] = OpVariable %_ptr_UniformConstant__arr_type_sampler_uint_3 UniformConstant +// CHECK: %[[fgtt0_1]] = OpVariable %_ptr_UniformConstant__arr_type_sampler_uint_3 UniformConstant +// CHECK: %[[fgtt1_0]] = OpVariable %_ptr_UniformConstant__arr_type_sampler_uint_3 UniformConstant +// CHECK: %[[fgtt1_1]] = OpVariable %_ptr_UniformConstant__arr_type_sampler_uint_3 UniformConstant +// CHECK: %[[fgtt2_0]] = OpVariable %_ptr_UniformConstant__arr_type_sampler_uint_3 UniformConstant +// CHECK: %[[fgtt2_1]] = OpVariable %_ptr_UniformConstant__arr_type_sampler_uint_3 UniformConstant +// CHECK: %[[fgtt3_0]] = OpVariable %_ptr_UniformConstant__arr_type_sampler_uint_3 UniformConstant +// CHECK: %[[fgtt3_1]] = OpVariable %_ptr_UniformConstant__arr_type_sampler_uint_3 UniformConstant struct T { SamplerState s[3]; @@ -135,12 +135,12 @@ S secondGlobal; float4 main() : SV_Target { return -// CHECK: [[fg_0_0_t:%[0-9]+]] = OpLoad %_arr_type_2d_image_uint_2 %firstGlobal_0__0__t +// CHECK: [[fg_0_0_t:%[0-9]+]] = OpLoad %_arr_type_2d_image_uint_2 %[[fg0]] // CHECK: [[fg_1_t_0:%[0-9]+]] = OpCompositeExtract %type_2d_image [[fg_0_0_t]] 0 // CHECK: [[fg_1_t_1:%[0-9]+]] = OpCompositeExtract %type_2d_image [[fg_0_0_t]] 1 -// CHECK: [[tmp:%[0-9]+]] = OpAccessChain %_ptr_UniformConstant_type_sampler %firstGlobal_0__0__tt_0__s %uint_1 +// CHECK: [[tmp:%[0-9]+]] = OpAccessChain %_ptr_UniformConstant_type_sampler %[[fgtt0_0]] %uint_1 // CHECK: [[fg_1_tt_0_s_1:%[0-9]+]] = OpLoad %type_sampler [[tmp]] -// CHECK: [[tmp:%[0-9]+]] = OpAccessChain %_ptr_UniformConstant_type_sampler %firstGlobal_0__0__tt_1__s %uint_2 +// CHECK: [[tmp:%[0-9]+]] = OpAccessChain %_ptr_UniformConstant_type_sampler %[[fgtt0_1]] %uint_2 // CHECK: [[fg_1_tt_1_s_2:%[0-9]+]] = OpLoad %type_sampler [[tmp]] // CHECK: [[sampled_img_1:%[0-9]+]] = OpSampledImage %type_sampled_image [[fg_1_t_0]] [[fg_1_tt_0_s_1]] // CHECK: {{%[0-9]+}} = OpImageSampleImplicitLod %v4float [[sampled_img_1]] @@ -149,12 +149,12 @@ float4 main() : SV_Target { // CHECK: OpFAdd tex2D(firstGlobal[0][0], float2(0,0)) + -// CHECK: [[fg_0_1_t:%[0-9]+]] = OpLoad %_arr_type_2d_image_uint_2 %firstGlobal_0__1__t +// CHECK: [[fg_0_1_t:%[0-9]+]] = OpLoad %_arr_type_2d_image_uint_2 %[[fg1]] // CHECK: [[fg_0_1_t_0:%[0-9]+]] = OpCompositeExtract %type_2d_image [[fg_0_1_t]] 0 // CHECK: [[fg_0_1_t_1:%[0-9]+]] = OpCompositeExtract %type_2d_image [[fg_0_1_t]] 1 -// CHECK: [[tmp:%[0-9]+]] = OpAccessChain %_ptr_UniformConstant_type_sampler %firstGlobal_0__1__tt_0__s %uint_1 +// CHECK: [[tmp:%[0-9]+]] = OpAccessChain %_ptr_UniformConstant_type_sampler %[[fgtt1_0]] %uint_1 // CHECK: [[fg_0_1_tt_0_s_1:%[0-9]+]] = OpLoad %type_sampler [[tmp]] -// CHECK: [[tmp:%[0-9]+]] = OpAccessChain %_ptr_UniformConstant_type_sampler %firstGlobal_0__1__tt_1__s %uint_2 +// CHECK: [[tmp:%[0-9]+]] = OpAccessChain %_ptr_UniformConstant_type_sampler %[[fgtt1_1]] %uint_2 // CHECK: [[fg_0_1_tt_1_s_2:%[0-9]+]] = OpLoad %type_sampler [[tmp]] // CHECK: [[sampled_img_3:%[0-9]+]] = OpSampledImage %type_sampled_image [[fg_0_1_t_0]] [[fg_0_1_tt_0_s_1]] // CHECK: {{%[0-9]+}} = OpImageSampleImplicitLod %v4float [[sampled_img_3]] @@ -162,12 +162,12 @@ float4 main() : SV_Target { // CHECK: {{%[0-9]+}} = OpImageSampleImplicitLod %v4float [[sampled_img_4]] // CHECK: OpFAdd tex2D(firstGlobal[0][1], float2(0,0)) + -// CHECK: [[fg_1_0_t:%[0-9]+]] = OpLoad %_arr_type_2d_image_uint_2 %firstGlobal_1__0__t +// CHECK: [[fg_1_0_t:%[0-9]+]] = OpLoad %_arr_type_2d_image_uint_2 %[[fg2]] // CHECK: [[fg_1_0_t_0:%[0-9]+]] = OpCompositeExtract %type_2d_image [[fg_1_0_t]] 0 // CHECK: [[fg_1_0_t_1:%[0-9]+]] = OpCompositeExtract %type_2d_image [[fg_1_0_t]] 1 -// CHECK: [[tmp:%[0-9]+]] = OpAccessChain %_ptr_UniformConstant_type_sampler %firstGlobal_1__0__tt_0__s %uint_1 +// CHECK: [[tmp:%[0-9]+]] = OpAccessChain %_ptr_UniformConstant_type_sampler %[[fgtt2_0]] %uint_1 // CHECK: [[fg_1_0_tt_0_s_1:%[0-9]+]] = OpLoad %type_sampler [[tmp]] -// CHECK: [[tmp:%[0-9]+]] = OpAccessChain %_ptr_UniformConstant_type_sampler %firstGlobal_1__0__tt_1__s %uint_2 +// CHECK: [[tmp:%[0-9]+]] = OpAccessChain %_ptr_UniformConstant_type_sampler %[[fgtt2_1]] %uint_2 // CHECK: [[fg_1_0_tt_1_s_2:%[0-9]+]] = OpLoad %type_sampler [[tmp]] // CHECK: [[sampled_img_5:%[0-9]+]] = OpSampledImage %type_sampled_image [[fg_1_0_t_0]] [[fg_1_0_tt_0_s_1]] // CHECK: {{%[0-9]+}} = OpImageSampleImplicitLod %v4float [[sampled_img_5]] @@ -175,12 +175,12 @@ float4 main() : SV_Target { // CHECK: {{%[0-9]+}} = OpImageSampleImplicitLod %v4float [[sampled_img_6]] // CHECK: OpFAdd tex2D(firstGlobal[1][0], float2(0,0)) + -// CHECK: [[fg_1_1_t:%[0-9]+]] = OpLoad %_arr_type_2d_image_uint_2 %firstGlobal_1__1__t +// CHECK: [[fg_1_1_t:%[0-9]+]] = OpLoad %_arr_type_2d_image_uint_2 %[[fg3]] // CHECK: [[fg_1_1_t_0:%[0-9]+]] = OpCompositeExtract %type_2d_image [[fg_1_1_t]] 0 // CHECK: [[fg_1_1_t_1:%[0-9]+]] = OpCompositeExtract %type_2d_image [[fg_1_1_t]] 1 -// CHECK: [[tmp:%[0-9]+]] = OpAccessChain %_ptr_UniformConstant_type_sampler %firstGlobal_1__1__tt_0__s %uint_1 +// CHECK: [[tmp:%[0-9]+]] = OpAccessChain %_ptr_UniformConstant_type_sampler %[[fgtt3_0]] %uint_1 // CHECK: [[fg_1_1_tt_0_s_1:%[0-9]+]] = OpLoad %type_sampler [[tmp]] -// CHECK: [[tmp:%[0-9]+]] = OpAccessChain %_ptr_UniformConstant_type_sampler %firstGlobal_1__1__tt_1__s %uint_2 +// CHECK: [[tmp:%[0-9]+]] = OpAccessChain %_ptr_UniformConstant_type_sampler %[[fgtt3_1]] %uint_2 // CHECK: [[fg_1_1_tt_1_s_2:%[0-9]+]] = OpLoad %type_sampler [[tmp]] // CHECK: [[sampled_img_7:%[0-9]+]] = OpSampledImage %type_sampled_image [[fg_1_1_t_0]] [[fg_1_1_tt_0_s_1]] // CHECK: {{%[0-9]+}} = OpImageSampleImplicitLod %v4float [[sampled_img_7]] @@ -204,4 +204,3 @@ float4 main() : SV_Target { // CHECK: {{%[0-9]+}} = OpImageSampleImplicitLod %v4float [[sampled_img_10]] secondGlobal.t[1].Sample(secondGlobal.tt[1].s[2], float2(0,0)); } - From 7667f11308595f0d61721a4484a227bafde9d547 Mon Sep 17 00:00:00 2001 From: Ashley Coleman Date: Fri, 20 Feb 2026 09:54:31 -0700 Subject: [PATCH 10/13] [SM6.10] Update HL LinAlg builtin signatures (#8173) Resolves the `int MatrixRef` TODO left from previous changes, as well as updates most builtins to match their final shape. This should be considered an NFC change since these builtins aren't implemented yet and instead we are just updating the shape of the reversed op slots --- utils/hct/gen_intrin_main.txt | 45 ++++++++++++++++------------------- 1 file changed, 20 insertions(+), 25 deletions(-) diff --git a/utils/hct/gen_intrin_main.txt b/utils/hct/gen_intrin_main.txt index 42c2d3e322..e41bc85d08 100644 --- a/utils/hct/gen_intrin_main.txt +++ b/utils/hct/gen_intrin_main.txt @@ -397,32 +397,27 @@ void [[min_sm=6.10]] __builtin_VectorAccumulate(in LinAlg InputVector, in RWB // LinAlg intrinsics -// TODO: Replace all int MatrixRef with MatrixRef type // TODO: Replace all int GroupSharedMem with groupshared memory -void [[min_sm=6.10]] __builtin_LinAlg_FillMatrix(int MatrixRef, numeric value); -void [[min_sm=6.10]] __builtin_LinAlg_CopyConvertMatrix(int MatrixRefDest, int MatrixRefSrc, bool transpose); -void [[min_sm=6.10]] __builtin_LinAlg_MatrixLoadFromDescriptor(int MatrixRef, resource buf, int32_only offset, int32_only stride, int32_only layout); -void [[min_sm=6.10]] __builtin_LinAlg_MatrixLoadFromMemory(int MatrixRef, int GroupSharedMem, int32_only offset, int32_only stride, int32_only layout); -void [[min_sm=6.10]] __builtin_LinAlg_MatrixLength(int MatrixRef); -void [[min_sm=6.10]] __builtin_LinAlg_MatrixGetCoordinate(int MatrixRef, int32_only threadLocalIndex); -numeric [[min_sm=6.10]] __builtin_LinAlg_MatrixGetElement(int MatrixRef, int32_only threadLocalIndex); -void [[min_sm=6.10]] __builtin_LinAlg_MatrixSetElement(int MatrixRef, int32_only threadLocalIndex, numeric value); -void [[min_sm=6.10]] __builtin_LinAlg_MatrixStoreToDescriptor(int MatrixRef, resource buf, int32_only offset, int32_only stride, int32_only layout); -void [[min_sm=6.10]] __builtin_LinAlg_MatrixStoreToMemory(int MatrixRef, int GroupSharedMem, int32_only offset, int32_only stride, int32_only layout); -int32_only [[min_sm=6.10]] __builtin_LinAlg_MatrixQueryAccumulatorLayout(); -void [[min_sm=6.10]] __builtin_LinAlg_MatrixMatrixMultiply(int MatrixRefA, int MatrixRefB, int MatrixRefC); -void [[min_sm=6.10]] __builtin_LinAlg_MatrixMatrixMultiplyAccumulate(int MatrixRefA, int MatrixRefB, int MatrixRefC); -void [[min_sm=6.10]] __builtin_LinAlg_MatrixAccumulate(int MatrixRefRHS, int MatrixRefLHS); - -// TODO: Fix vector types -void [[min_sm=6.10]] __builtin_LinAlg_MatrixVectorMultiply(int MatrixRef); -void [[min_sm=6.10]] __builtin_LinAlg_MatrixVectorMultiplyAdd(int MatrixRef); - -void [[min_sm=6.10]] __builtin_LinAlg_MatrixAccumulateToDescriptor(int MatrixRef, resource buf, int32_only offset, int32_only stride, int32_only layout); -void [[min_sm=6.10]] __builtin_LinAlg_MatrixAccumulateToMemory(int MatrixRef, int GroupSharedMem, int32_only offset, int32_only stride, int32_only layout); - -// TODO: Fix vector types -void [[min_sm=6.10]] __builtin_LinAlg_MatrixOuterProduct(int MatrixRef); +void [[min_sm=6.10]] __builtin_LinAlg_FillMatrix(out LinAlgMatrix ret, in numeric value); +void [[min_sm=6.10]] __builtin_LinAlg_CopyConvertMatrix(out LinAlgMatrix ret, in LinAlgMatrix source, in bool transpose); +void [[min_sm=6.10]] __builtin_LinAlg_MatrixLoadFromDescriptor(out LinAlgMatrix ret, in ByteAddressBuffer buf, in uint offset, in uint stride, in uint layout); +void [[min_sm=6.10]] __builtin_LinAlg_MatrixLoadFromDescriptor(out LinAlgMatrix ret, in RWByteAddressBuffer buf, in uint offset, in uint stride, in uint layout); +void [[min_sm=6.10]] __builtin_LinAlg_MatrixLoadFromMemory(out LinAlgMatrix ret, in int GroupSharedMem, in uint offset, in uint stride, in uint layout); +uint [[min_sm=6.10]] __builtin_LinAlg_MatrixLength(in LinAlgMatrix matrix); +uint<2> [[min_sm=6.10]] __builtin_LinAlg_MatrixGetCoordinate(in LinAlgMatrix matrix, in uint threadLocalIndex); +numeric [[min_sm=6.10]] __builtin_LinAlg_MatrixGetElement(in LinAlgMatrix matrix, in uint threadLocalIndex); +void [[min_sm=6.10]] __builtin_LinAlg_MatrixSetElement(out LinAlgMatrix ret, in LinAlgMatrix matrix, in uint threadLocalIndex, in numeric value); +void [[min_sm=6.10]] __builtin_LinAlg_MatrixStoreToDescriptor(in LinAlgMatrix matrix, in RWByteAddressBuffer buf, in uint offset, in uint stride, in uint layout); +void [[min_sm=6.10]] __builtin_LinAlg_MatrixStoreToMemory(in LinAlgMatrix matrix, in int GroupSharedMem, in uint offset, in uint stride, in uint layout); +uint [[min_sm=6.10]] __builtin_LinAlg_MatrixQueryAccumulatorLayout(); +void [[min_sm=6.10]] __builtin_LinAlg_MatrixMatrixMultiply(out LinAlgMatrix matrixC, in LinAlgMatrix matrixA, in LinAlgMatrix matrixB); +void [[min_sm=6.10]] __builtin_LinAlg_MatrixMatrixMultiplyAccumulate(out LinAlgMatrix matrixC, in LinAlgMatrix matrixA, in LinAlgMatrix matrixB); +void [[min_sm=6.10]] __builtin_LinAlg_MatrixAccumulate(out LinAlgMatrix matrixC, in LinAlgMatrix matrixLHS, in LinAlgMatrix matrixRHS); +void [[min_sm=6.10]] __builtin_LinAlg_MatrixVectorMultiply(out numeric<> ret, in LinAlgMatrix mat, in numeric<> input, in uint input_interp); +void [[min_sm=6.10]] __builtin_LinAlg_MatrixVectorMultiplyAdd(out numeric<> ret, in LinAlgMatrix mat, in numeric<> input, in uint input_interp, in numeric<> bias, in uint bias_interp); +void [[min_sm=6.10]] __builtin_LinAlg_MatrixAccumulateToDescriptor(in LinAlgMatrix matrix, in RWByteAddressBuffer buf, in uint offset, in uint stride, in uint layout); +void [[min_sm=6.10]] __builtin_LinAlg_MatrixAccumulateToMemory(in LinAlgMatrix matrix, in int GroupSharedMem, in uint offset, in uint stride, in uint layout); +void [[min_sm=6.10]] __builtin_LinAlg_MatrixOuterProduct(out LinAlgMatrix ret, in numeric<> vecA, in numeric<> vecB); } namespace From 9a0e597039a8ba62cac03bab0cb61f7061be7a42 Mon Sep 17 00:00:00 2001 From: Alex Sepkowski Date: Fri, 20 Feb 2026 11:43:38 -0800 Subject: [PATCH 11/13] Execution Tests: Update datasets for rhs values for shift operations (#8179) Assisted by gh copilot. Fix BitShiftRhs test input sets to avoid shift amounts >= bit width, which is undefined behavior in C++ and can cause test failures as expected values are computed via C++. --- tools/clang/unittests/HLSLExec/LongVectors.cpp | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/tools/clang/unittests/HLSLExec/LongVectors.cpp b/tools/clang/unittests/HLSLExec/LongVectors.cpp index f4e4723017..dbb8a8d672 100644 --- a/tools/clang/unittests/HLSLExec/LongVectors.cpp +++ b/tools/clang/unittests/HLSLExec/LongVectors.cpp @@ -755,8 +755,17 @@ template uint32_t FirstBitLow(T A) { DEFAULT_OP_2(OpType::And, (A & B)); DEFAULT_OP_2(OpType::Or, (A | B)); DEFAULT_OP_2(OpType::Xor, (A ^ B)); -DEFAULT_OP_2(OpType::LeftShift, (A << B)); -DEFAULT_OP_2(OpType::RightShift, (A >> B)); + +// HLSL/DXIL masks shift amounts to the low bits (4 bits for 16-bit, 5 bits for +// 32-bit, 6 bits for 64-bit). We must do the same in C++ to avoid undefined +// behavior when shift amount >= bit width, and to match GPU results. +template T MaskShiftAmount(T ShiftAmount) { + constexpr T ShiftMask = static_cast(sizeof(T) * 8 - 1); + return ShiftAmount & ShiftMask; +} + +DEFAULT_OP_2(OpType::LeftShift, (A << MaskShiftAmount(B))); +DEFAULT_OP_2(OpType::RightShift, (A >> MaskShiftAmount(B))); DEFAULT_OP_1(OpType::Saturate, (Saturate(A))); DEFAULT_OP_1(OpType::ReverseBits, (ReverseBits(A))); From 4c67f083f808a46cebf55b8042013339df63d8ea Mon Sep 17 00:00:00 2001 From: Ashley Coleman Date: Fri, 20 Feb 2026 13:11:19 -0700 Subject: [PATCH 12/13] [SM6.10][specs/791] Align LinAlg Matrix Mul with spec (#8183) https://github.com/microsoft/hlsl-specs/pull/791 splits the MulOp operation into two distinct ops. Update the placeholder code to reflect that. Interesting changes are in `gen_intrin_main`, `hctdb.py`, and `HLOperationLower.cpp`. The rest of the code is generated code --- docs/DXIL.rst | 4 +- include/dxc/DXIL/DxilConstants.h | 26 +++++++----- include/dxc/DXIL/DxilInstructions.h | 46 +++++++++++++++++++--- lib/DXIL/DxilOperations.cpp | 61 ++++++++++++++++------------- lib/HLSL/HLOperationLower.cpp | 4 +- utils/hct/gen_intrin_main.txt | 2 +- utils/hct/hctdb.py | 24 +++++++++--- 7 files changed, 113 insertions(+), 54 deletions(-) diff --git a/docs/DXIL.rst b/docs/DXIL.rst index 2afd65f55e..8007a1ef48 100644 --- a/docs/DXIL.rst +++ b/docs/DXIL.rst @@ -3076,7 +3076,7 @@ ID Name Description 2147483656 RayQuery_CandidateTriangleObjectPosition returns candidate triangle vertices in object space as <9 x float> 2147483657 RayQuery_CommittedTriangleObjectPosition returns committed triangle vertices in object space as <9 x float> 2147483658 HitObject_TriangleObjectPosition returns triangle vertices in object space as <9 x float> -2147483659 ReservedD0 reserved +2147483659 LinAlgMatrixMultiplyAccumulate Returns the resulting matrix from multiplying A and B and accumulating into C 2147483660 LinAlgFillMatrix fills a matrix with a scalar value 2147483661 LinAlgCopyConvertMatrix Converts and copies the element and use type of the source matrix to the destination matrix with optional transpose 2147483662 LinAlgMatrixLoadFromDescriptor fills a matrix with data from a [RW]ByteAddressBuffer @@ -3088,7 +3088,7 @@ ID Name Description 2147483668 LinAlgMatrixStoreToDescriptor stores a matrix to a RWByteAddressBuffer 2147483669 LinAlgMatrixStoreToMemory stores a matrix to groupshared memory 2147483670 LinAlgMatrixQueryAccumulatorLayout returns comptime 0 when accumulator matrix are A layout, 1 when B layout -2147483671 LinAlgMatrixMulOp applies a multiplication op to matrix C using A and B as parameters +2147483671 LinAlgMatrixMultiply Returns the resulting matrix from multiplying A and B 2147483672 LinAlgMatrixAccumulate accumulate A or B matrix into Accumulator matrix following LHS += RHS 2147483673 LinAlgMatVecMul Multiplies a MxK dimension matrix and a K sized input vector 2147483674 LinAlgMatVecMulAdd Multiplies a MxK dimension matrix and a K sized input vector then adds a M sized bias vector diff --git a/include/dxc/DXIL/DxilConstants.h b/include/dxc/DXIL/DxilConstants.h index dfb835aa00..eb38ec6e70 100644 --- a/include/dxc/DXIL/DxilConstants.h +++ b/include/dxc/DXIL/DxilConstants.h @@ -524,7 +524,6 @@ static const OpCodeTableID TableID = OpCodeTableID::ExperimentalOps; // Enumeration for ExperimentalOps DXIL operations enum class OpCode : unsigned { // - ReservedD0 = 11, // reserved ReservedD1 = 30, // reserved ReservedD2 = 31, // reserved ReservedD3 = 32, // reserved @@ -573,8 +572,11 @@ enum class OpCode : unsigned { 14, // fills a matrix with data from a [RW]ByteAddressBuffer LinAlgMatrixLoadFromMemory = 15, // fills a matrix with data from a groupshared array - LinAlgMatrixMulOp = - 23, // applies a multiplication op to matrix C using A and B as parameters + LinAlgMatrixMultiply = + 23, // Returns the resulting matrix from multiplying A and B + LinAlgMatrixMultiplyAccumulate = + 11, // Returns the resulting matrix from multiplying A and B and + // accumulating into C LinAlgMatrixOuterProduct = 29, // Outer products an M sized vector and a N // sized vector producing an MxN matrix LinAlgMatrixQueryAccumulatorLayout = @@ -1263,8 +1265,11 @@ enum class OpCode : unsigned { EXP_OPCODE(ExperimentalOps, HitObject_TriangleObjectPosition), // returns triangle vertices in // object space as <9 x float> - // ReservedD0 = 0x8000000B, 2147483659U, -2147483637 - EXP_OPCODE(ExperimentalOps, ReservedD0), // reserved + // LinAlgMatrixMultiplyAccumulate = 0x8000000B, 2147483659U, -2147483637 + EXP_OPCODE(ExperimentalOps, + LinAlgMatrixMultiplyAccumulate), // Returns the resulting matrix + // from multiplying A and B and + // accumulating into C // LinAlgFillMatrix = 0x8000000C, 2147483660U, -2147483636 EXP_OPCODE(ExperimentalOps, LinAlgFillMatrix), // fills a matrix with a scalar value @@ -1316,10 +1321,10 @@ enum class OpCode : unsigned { LinAlgMatrixQueryAccumulatorLayout), // returns comptime 0 when // accumulator matrix are A // layout, 1 when B layout - // LinAlgMatrixMulOp = 0x80000017, 2147483671U, -2147483625 + // LinAlgMatrixMultiply = 0x80000017, 2147483671U, -2147483625 EXP_OPCODE(ExperimentalOps, - LinAlgMatrixMulOp), // applies a multiplication op to matrix C - // using A and B as parameters + LinAlgMatrixMultiply), // Returns the resulting matrix from + // multiplying A and B // LinAlgMatrixAccumulate = 0x80000018, 2147483672U, -2147483624 EXP_OPCODE(ExperimentalOps, LinAlgMatrixAccumulate), // accumulate A or B matrix into @@ -1529,7 +1534,8 @@ enum class OpCodeClass : unsigned { LinAlgMatrixLength, LinAlgMatrixLoadFromDescriptor, LinAlgMatrixLoadFromMemory, - LinAlgMatrixMulOp, + LinAlgMatrixMultiply, + LinAlgMatrixMultiplyAccumulate, LinAlgMatrixOuterProduct, LinAlgMatrixQueryAccumulatorLayout, LinAlgMatrixSetElement, @@ -1725,7 +1731,7 @@ enum class OpCodeClass : unsigned { NodeOutputIsValid, OutputComplete, - NumOpClasses = 224, // exclusive last value of enumeration + NumOpClasses = 225, // exclusive last value of enumeration }; // OPCODECLASS-ENUM:END diff --git a/include/dxc/DXIL/DxilInstructions.h b/include/dxc/DXIL/DxilInstructions.h index 2f388fdcd3..8c48202ce0 100644 --- a/include/dxc/DXIL/DxilInstructions.h +++ b/include/dxc/DXIL/DxilInstructions.h @@ -10500,6 +10500,41 @@ struct DxilInst_HitObject_TriangleObjectPosition { void set_hitObject(llvm::Value *val) { Instr->setOperand(1, val); } }; +/// This instruction Returns the resulting matrix from multiplying A and B and +/// accumulating into C +struct DxilInst_LinAlgMatrixMultiplyAccumulate { + llvm::Instruction *Instr; + // Construction and identification + DxilInst_LinAlgMatrixMultiplyAccumulate(llvm::Instruction *pInstr) + : Instr(pInstr) {} + operator bool() const { + return hlsl::OP::IsDxilOpFuncCallInst( + Instr, hlsl::OP::OpCode::LinAlgMatrixMultiplyAccumulate); + } + // Validation support + bool isAllowed() const { return true; } + bool isArgumentListValid() const { + if (4 != llvm::dyn_cast(Instr)->getNumArgOperands()) + return false; + return true; + } + // Metadata + bool requiresUniformInputs() const { return false; } + // Operand indexes + enum OperandIdx { + arg_matrixA = 1, + arg_matrixB = 2, + arg_matrixC = 3, + }; + // Accessors + llvm::Value *get_matrixA() const { return Instr->getOperand(1); } + void set_matrixA(llvm::Value *val) { Instr->setOperand(1, val); } + llvm::Value *get_matrixB() const { return Instr->getOperand(2); } + void set_matrixB(llvm::Value *val) { Instr->setOperand(2, val); } + llvm::Value *get_matrixC() const { return Instr->getOperand(3); } + void set_matrixC(llvm::Value *val) { Instr->setOperand(3, val); } +}; + /// This instruction fills a matrix with a scalar value struct DxilInst_LinAlgFillMatrix { llvm::Instruction *Instr; @@ -10859,15 +10894,14 @@ struct DxilInst_LinAlgMatrixQueryAccumulatorLayout { bool requiresUniformInputs() const { return false; } }; -/// This instruction applies a multiplication op to matrix C using A and B as -/// parameters -struct DxilInst_LinAlgMatrixMulOp { +/// This instruction Returns the resulting matrix from multiplying A and B +struct DxilInst_LinAlgMatrixMultiply { llvm::Instruction *Instr; // Construction and identification - DxilInst_LinAlgMatrixMulOp(llvm::Instruction *pInstr) : Instr(pInstr) {} + DxilInst_LinAlgMatrixMultiply(llvm::Instruction *pInstr) : Instr(pInstr) {} operator bool() const { - return hlsl::OP::IsDxilOpFuncCallInst(Instr, - hlsl::OP::OpCode::LinAlgMatrixMulOp); + return hlsl::OP::IsDxilOpFuncCallInst( + Instr, hlsl::OP::OpCode::LinAlgMatrixMultiply); } // Validation support bool isAllowed() const { return true; } diff --git a/lib/DXIL/DxilOperations.cpp b/lib/DXIL/DxilOperations.cpp index 1393474b48..fa9e0fde4c 100644 --- a/lib/DXIL/DxilOperations.cpp +++ b/lib/DXIL/DxilOperations.cpp @@ -2823,16 +2823,15 @@ static const OP::OpCodeProperty ExperimentalOps_OpCodeProps[] = { {{0x2}}, {{0x0}}}, // Overloads: f - {OC::ReservedD0, - "ReservedD0", - OCC::Reserved, - "reserved", - Attribute::None, - 0, - {}, - {}}, // Overloads: v - // Linear Algebra Operations + {OC::LinAlgMatrixMultiplyAccumulate, + "LinAlgMatrixMultiplyAccumulate", + OCC::LinAlgMatrixMultiplyAccumulate, + "linAlgMatrixMultiplyAccumulate", + Attribute::None, + 4, + {{0x200}, {0x200}, {0x200}, {0x200}}, + {{0x0}, {0x0}, {0x0}, {0x0}}}, // Overloads: o,o,o,o {OC::LinAlgFillMatrix, "LinAlgFillMatrix", OCC::LinAlgFillMatrix, @@ -2921,10 +2920,10 @@ static const OP::OpCodeProperty ExperimentalOps_OpCodeProps[] = { 0, {}, {}}, // Overloads: v - {OC::LinAlgMatrixMulOp, - "LinAlgMatrixMulOp", - OCC::LinAlgMatrixMulOp, - "linAlgMatrixMulOp", + {OC::LinAlgMatrixMultiply, + "LinAlgMatrixMultiply", + OCC::LinAlgMatrixMultiply, + "linAlgMatrixMultiply", Attribute::None, 3, {{0x200}, {0x200}, {0x200}}, @@ -3950,15 +3949,16 @@ void OP::GetMinShaderModelAndMask(OpCode C, bool bWithTranslation, minor = 10; return; } - // Instructions: LinAlgFillMatrix=2147483660, - // LinAlgCopyConvertMatrix=2147483661, LinAlgMatrixLoadFromMemory=2147483663, - // LinAlgMatrixLength=2147483664, LinAlgMatrixGetCoordinate=2147483665, - // LinAlgMatrixGetElement=2147483666, LinAlgMatrixSetElement=2147483667, + // Instructions: LinAlgMatrixMultiplyAccumulate=2147483659, + // LinAlgFillMatrix=2147483660, LinAlgCopyConvertMatrix=2147483661, + // LinAlgMatrixLoadFromMemory=2147483663, LinAlgMatrixLength=2147483664, + // LinAlgMatrixGetCoordinate=2147483665, LinAlgMatrixGetElement=2147483666, + // LinAlgMatrixSetElement=2147483667, // LinAlgMatrixStoreToDescriptor=2147483668, - // LinAlgMatrixStoreToMemory=2147483669, LinAlgMatrixMulOp=2147483671, + // LinAlgMatrixStoreToMemory=2147483669, LinAlgMatrixMultiply=2147483671, // LinAlgMatrixAccumulate=2147483672, // LinAlgMatrixAccumulateToMemory=2147483676 - if ((2147483660 <= op && op <= 2147483661) || + if ((2147483659 <= op && op <= 2147483661) || (2147483663 <= op && op <= 2147483669) || (2147483671 <= op && op <= 2147483672) || op == 2147483676) { major = 6; @@ -6557,13 +6557,14 @@ Function *OP::GetOpFunc(OpCode opCode, Type *pOverloadType) { A(pHit); break; - // - case OpCode::ReservedD0: - A(pV); + // Linear Algebra Operations + case OpCode::LinAlgMatrixMultiplyAccumulate: + EXT(0); A(pI32); + EXT(1); + EXT(2); + EXT(3); break; - - // Linear Algebra Operations case OpCode::LinAlgFillMatrix: EXT(0); A(pI32); @@ -6637,7 +6638,7 @@ Function *OP::GetOpFunc(OpCode opCode, Type *pOverloadType) { A(pI32); A(pI32); break; - case OpCode::LinAlgMatrixMulOp: + case OpCode::LinAlgMatrixMultiply: EXT(0); A(pI32); EXT(1); @@ -7013,7 +7014,6 @@ llvm::Type *OP::GetOverloadType(OpCode opCode, llvm::Function *F) { case OpCode::GetGroupWaveIndex: case OpCode::GetGroupWaveCount: case OpCode::ClusterID: - case OpCode::ReservedD0: case OpCode::LinAlgMatrixQueryAccumulatorLayout: case OpCode::ReservedD1: case OpCode::ReservedD2: @@ -7070,13 +7070,20 @@ llvm::Type *OP::GetOverloadType(OpCode opCode, llvm::Function *F) { return llvm::StructType::get(Ctx, {FT->getParamType(1), FT->getParamType(2)}); + case OpCode::LinAlgMatrixMultiplyAccumulate: + if (FT->getNumParams() < 4) + return nullptr; + return llvm::StructType::get(Ctx, + {FT->getReturnType(), FT->getParamType(1), + FT->getParamType(2), FT->getParamType(3)}); + case OpCode::LinAlgMatrixSetElement: if (FT->getNumParams() < 4) return nullptr; return llvm::StructType::get( Ctx, {FT->getReturnType(), FT->getParamType(1), FT->getParamType(3)}); - case OpCode::LinAlgMatrixMulOp: + case OpCode::LinAlgMatrixMultiply: case OpCode::LinAlgMatrixAccumulate: case OpCode::LinAlgMatVecMul: case OpCode::LinAlgMatrixOuterProduct: diff --git a/lib/HLSL/HLOperationLower.cpp b/lib/HLSL/HLOperationLower.cpp index 22ea3c77d0..2665c441a6 100644 --- a/lib/HLSL/HLOperationLower.cpp +++ b/lib/HLSL/HLOperationLower.cpp @@ -7680,9 +7680,9 @@ constexpr IntrinsicLower gLowerTable[] = { {IntrinsicOp::IOP___builtin_LinAlg_MatrixAccumulate, EmptyLower, DXIL::OpCode::LinAlgMatrixAccumulate}, {IntrinsicOp::IOP___builtin_LinAlg_MatrixMatrixMultiply, EmptyLower, - DXIL::OpCode::LinAlgMatrixMulOp}, + DXIL::OpCode::LinAlgMatrixMultiply}, {IntrinsicOp::IOP___builtin_LinAlg_MatrixMatrixMultiplyAccumulate, - EmptyLower, DXIL::OpCode::LinAlgMatrixMulOp}, + EmptyLower, DXIL::OpCode::LinAlgMatrixMultiplyAccumulate}, {IntrinsicOp::IOP___builtin_LinAlg_MatrixQueryAccumulatorLayout, EmptyLower, DXIL::OpCode::LinAlgMatrixQueryAccumulatorLayout}, {IntrinsicOp::IOP___builtin_LinAlg_MatrixAccumulateToDescriptor, EmptyLower, diff --git a/utils/hct/gen_intrin_main.txt b/utils/hct/gen_intrin_main.txt index e41bc85d08..3f20c42ead 100644 --- a/utils/hct/gen_intrin_main.txt +++ b/utils/hct/gen_intrin_main.txt @@ -411,7 +411,7 @@ void [[min_sm=6.10]] __builtin_LinAlg_MatrixStoreToDescriptor(in LinAlgMatrix ma void [[min_sm=6.10]] __builtin_LinAlg_MatrixStoreToMemory(in LinAlgMatrix matrix, in int GroupSharedMem, in uint offset, in uint stride, in uint layout); uint [[min_sm=6.10]] __builtin_LinAlg_MatrixQueryAccumulatorLayout(); void [[min_sm=6.10]] __builtin_LinAlg_MatrixMatrixMultiply(out LinAlgMatrix matrixC, in LinAlgMatrix matrixA, in LinAlgMatrix matrixB); -void [[min_sm=6.10]] __builtin_LinAlg_MatrixMatrixMultiplyAccumulate(out LinAlgMatrix matrixC, in LinAlgMatrix matrixA, in LinAlgMatrix matrixB); +void [[min_sm=6.10]] __builtin_LinAlg_MatrixMatrixMultiplyAccumulate(out LinAlgMatrix matrixR, in LinAlgMatrix matrixA, in LinAlgMatrix matrixB, in LinAlgMatrix matrixC); void [[min_sm=6.10]] __builtin_LinAlg_MatrixAccumulate(out LinAlgMatrix matrixC, in LinAlgMatrix matrixLHS, in LinAlgMatrix matrixRHS); void [[min_sm=6.10]] __builtin_LinAlg_MatrixVectorMultiply(out numeric<> ret, in LinAlgMatrix mat, in numeric<> input, in uint input_interp); void [[min_sm=6.10]] __builtin_LinAlg_MatrixVectorMultiplyAdd(out numeric<> ret, in LinAlgMatrix mat, in numeric<> input, in uint input_interp, in numeric<> bias, in uint bias_interp); diff --git a/utils/hct/hctdb.py b/utils/hct/hctdb.py index 0b98fed0a0..36884121f3 100644 --- a/utils/hct/hctdb.py +++ b/utils/hct/hctdb.py @@ -1174,8 +1174,8 @@ def populate_categories_and_models_ExperimentalOps(self): + "LinAlgMatrixGetCoordinate,LinAlgMatrixGetElement," + "LinAlgMatrixSetElement,LinAlgMatrixStoreToDescriptor," + "LinAlgMatrixLoadFromMemory,LinAlgMatrixStoreToMemory," - + "LinAlgMatrixAccumulateToMemory,LinAlgMatrixMulOp," - + "LinAlgMatrixAccumulate" + + "LinAlgMatrixAccumulateToMemory,LinAlgMatrixMultiply," + + "LinAlgMatrixMultiplyAccumulate,LinAlgMatrixAccumulate" ): i.category = "Linear Algebra Operations" i.shader_model = experimental_sm @@ -6341,7 +6341,19 @@ def populate_ExperimentalOps(self): ) # Linear Algebra Ops - op_table.reserve_dxil_op_range("ReservedD", 1) + add_dxil_op( + "LinAlgMatrixMultiplyAccumulate", + "LinAlgMatrixMultiplyAccumulate", + "Returns the resulting matrix from multiplying A and B and accumulating into C", + "o,o,o,o", + "", + [ + db_dxil_param(0, "$x0", "", "resulting matrix"), + db_dxil_param(2, "$x1", "matrixA", "A matrix"), + db_dxil_param(3, "$x2", "matrixB", "B matrix"), + db_dxil_param(4, "$x3", "matrixC", "C matrix"), + ], + ) add_dxil_op( "LinAlgFillMatrix", @@ -6530,9 +6542,9 @@ def populate_ExperimentalOps(self): ) add_dxil_op( - "LinAlgMatrixMulOp", - "LinAlgMatrixMulOp", - "applies a multiplication op to matrix C using A and B as parameters", + "LinAlgMatrixMultiply", + "LinAlgMatrixMultiply", + "Returns the resulting matrix from multiplying A and B", "o,o,o", "", [ From 848c036b83b848b19d26404698e7b6e00efc6a7a Mon Sep 17 00:00:00 2001 From: luciechoi Date: Mon, 23 Feb 2026 20:24:37 +0000 Subject: [PATCH 13/13] Remove unused variables --- tools/clang/lib/SPIRV/SpirvEmitter.cpp | 5 ----- 1 file changed, 5 deletions(-) diff --git a/tools/clang/lib/SPIRV/SpirvEmitter.cpp b/tools/clang/lib/SPIRV/SpirvEmitter.cpp index b67370337e..c10b4af350 100644 --- a/tools/clang/lib/SPIRV/SpirvEmitter.cpp +++ b/tools/clang/lib/SPIRV/SpirvEmitter.cpp @@ -5882,7 +5882,6 @@ SpirvEmitter::processTextureSampleGather(const CXXMemberCallExpr *expr, // [, uint Status]); // // Other Texture types do not have a Gather method. - const auto numArgs = expr->getNumArgs(); const auto loc = expr->getExprLoc(); const auto range = expr->getSourceRange(); @@ -5968,7 +5967,6 @@ SpirvEmitter::processTextureSampleBiasLevel(const CXXMemberCallExpr *expr, // float LOD // [, out uint Status]); - const auto numArgs = expr->getNumArgs(); const auto *imageExpr = expr->getImplicitObjectArgument(); const QualType imageType = imageExpr->getType(); const bool isImageSampledTexture = isSampledTexture(imageType); @@ -6040,7 +6038,6 @@ SpirvEmitter::processTextureSampleGrad(const CXXMemberCallExpr *expr) { // [, float Clamp] // [, out uint Status]); - const auto numArgs = expr->getNumArgs(); const auto *imageExpr = expr->getImplicitObjectArgument(); const QualType imageType = imageExpr->getType(); const bool isImageSampledTexture = isSampledTexture(imageType); @@ -6107,7 +6104,6 @@ SpirvEmitter::processTextureSampleCmp(const CXXMemberCallExpr *expr) { // [, out uint Status] // ); - const auto numArgs = expr->getNumArgs(); const auto *imageExpr = expr->getImplicitObjectArgument(); const QualType imageType = imageExpr->getType(); const bool isImageSampledTexture = isSampledTexture(imageType); @@ -6317,7 +6313,6 @@ SpirvEmitter::processTextureSampleCmpLevelZero(const CXXMemberCallExpr *expr) { // [, out uint Status] // ); - const auto numArgs = expr->getNumArgs(); const auto *imageExpr = expr->getImplicitObjectArgument(); const auto imageType = imageExpr->getType(); const bool isImageSampledTexture = isSampledTexture(imageType);