diff --git a/antora/modules/ROOT/nav.adoc b/antora/modules/ROOT/nav.adoc index 74164bdcdb..883fd086b8 100644 --- a/antora/modules/ROOT/nav.adoc +++ b/antora/modules/ROOT/nav.adoc @@ -1,7 +1,7 @@ //// - Copyright (c) 2023-2026, Holochip Inc - Copyright (c) 2023-2026, Sascha Willems -- Copyright (c) 2025, Arm Limited and Contributors +- Copyright (c) 2026, Arm Limited and Contributors - - SPDX-License-Identifier: Apache-2.0 - @@ -54,6 +54,7 @@ ** xref:samples/extensions/buffer_device_address/README.adoc[Buffer device address] ** xref:samples/extensions/calibrated_timestamps/README.adoc[Calibrated timestamps] ** xref:samples/extensions/conditional_rendering/README.adoc[Conditional rendering] +** xref:samples/extensions/compute_shader_derivatives/README.adoc[Compute shader derivatives] ** xref:samples/extensions/conservative_rasterization/README.adoc[Conservative rasterization] ** xref:samples/extensions/debug_utils/README.adoc[Debug utils] ** xref:samples/extensions/descriptor_buffer_basic/README.adoc[Descriptor buffer basic] diff --git a/framework/vulkan_type_mapping.h b/framework/vulkan_type_mapping.h index b86b37a834..cca1658bd8 100644 --- a/framework/vulkan_type_mapping.h +++ b/framework/vulkan_type_mapping.h @@ -1,4 +1,4 @@ -/* Copyright (c) 2025, Arm Limited and Contributors +/* Copyright (c) 2026, Arm Limited and Contributors * Copyright (c) 2024-2026, NVIDIA CORPORATION. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 @@ -299,6 +299,12 @@ struct HPPType using Type = vk::PhysicalDeviceTimelineSemaphoreFeaturesKHR; }; +template <> +struct HPPType +{ + using Type = vk::PhysicalDeviceComputeShaderDerivativesFeaturesKHR; +}; + template <> struct HPPType { diff --git a/samples/extensions/README.adoc b/samples/extensions/README.adoc index a81c60ef33..e52a54c127 100644 --- a/samples/extensions/README.adoc +++ b/samples/extensions/README.adoc @@ -1,5 +1,5 @@ //// -- Copyright (c) 2025, Arm Limited and Contributors +- Copyright (c) 2026, Arm Limited and Contributors - Copyright (c) 2021-2026, The Khronos Group - - SPDX-License-Identifier: Apache-2.0 @@ -313,8 +313,13 @@ Demonstrate how to build data graph pipelines and execute neural networks: * xref:./{extension_samplespath}tensor_and_data_graph/simple_tensor_and_data_graph/README.adoc[simple_tensor_and_data_graph] - Explains how to set up and execute a simple neural network using a data graph pipeline. +=== xref:./{extension_samplespath}compute_shader_derivatives/README.adoc[Compute shader derivatives] + +*Extension*: https://docs.vulkan.org/features/latest/features/proposals/VK_KHR_compute_shader_derivatives.html[`VK_KHR_compute_shader_derivatives`] + +Demonstrate how to use derivatives (dFdx/dFdy) in compute shaders via derivative groups and how to request/enable the corresponding device feature. === xref:./{extension_samplespath}ray_tracing_invocation_reorder/README.adoc[Ray Tracing Invocation Reorder] *Extensions:* https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VK_EXT_ray_tracing_invocation_reorder.html[`VK_EXT_ray_tracing_invocation_reorder`] -Demonstrate how to optimize ray tracing pipelines by reordering the invocation order. \ No newline at end of file +Demonstrate how to optimize ray tracing pipelines by reordering the invocation order. diff --git a/samples/extensions/compute_shader_derivatives/CMakeLists.txt b/samples/extensions/compute_shader_derivatives/CMakeLists.txt new file mode 100644 index 0000000000..ba96f3ca20 --- /dev/null +++ b/samples/extensions/compute_shader_derivatives/CMakeLists.txt @@ -0,0 +1,33 @@ +# Copyright (c) 2026, Holochip Inc. + +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 the "License"; +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +get_filename_component(FOLDER_NAME ${CMAKE_CURRENT_LIST_DIR} NAME) +get_filename_component(PARENT_DIR ${CMAKE_CURRENT_LIST_DIR} PATH) +get_filename_component(CATEGORY_NAME ${PARENT_DIR} NAME) + +add_sample( + ID ${FOLDER_NAME} + CATEGORY ${CATEGORY_NAME} + AUTHOR "Holochip" + NAME "Compute shader derivatives" + DESCRIPTION "Demonstrates VK_KHR_compute_shader_derivatives with a minimal compute dispatch using dFdx/dFdy in compute" + SHADER_FILES_SLANG + "compute_shader_derivatives/slang/derivatives_quad.comp.slang" + "compute_shader_derivatives/slang/derivatives_linear.comp.slang" + "compute_shader_derivatives/slang/fullscreen.vert.slang" + "compute_shader_derivatives/slang/fullscreen.frag.slang" + ) \ No newline at end of file diff --git a/samples/extensions/compute_shader_derivatives/README.adoc b/samples/extensions/compute_shader_derivatives/README.adoc new file mode 100644 index 0000000000..88d5c2ad4a --- /dev/null +++ b/samples/extensions/compute_shader_derivatives/README.adoc @@ -0,0 +1,96 @@ +//// +- Copyright (c) 2025-2026, Holochip Inc. +- +- SPDX-License-Identifier: Apache-2.0 +- +- Licensed under the Apache License, Version 2.0 the "License"; +- you may not use this file except in compliance with the License. +- You may obtain a copy of the License at +- +- http://www.apache.org/licenses/LICENSE-2.0 +- +- Unless required by applicable law or agreed to in writing, software +- distributed under the License is distributed on an "AS IS" BASIS, +- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +- See the License for the specific language governing permissions and +- limitations under the License. +- +//// +ifdef::site-gen-antora[] +TIP: The source for this sample can be found in the https://github.com/KhronosGroup/Vulkan-Samples/tree/main/samples/extensions/compute_shader_derivatives[Khronos Vulkan samples github repository]. +endif::[] + += VK_KHR_compute_shader_derivatives — Derivatives in compute shaders + +This sample demonstrates VK_KHR_compute_shader_derivatives, which enables the use of derivative instructions (like dFdx/dFdy) inside compute shaders. Traditionally, derivatives are only available in fragment shaders, but this extension defines derivative groups in compute and how invocations are paired for derivative computations. + +// Screenshot of the sample output +.Compute shader derivatives output +image::shader_derivatives.png[align=center,alt="Compute shader derivatives output"] + +== What is it? +- SPIR-V: The companion SPIR-V extension allows derivative instructions in the Compute execution model. +- Vulkan: The device feature is exposed via `VkPhysicalDeviceComputeShaderDerivativesFeaturesKHR` with two booleans: + * `computeDerivativeGroupQuads` — enables quad-based derivative groups. + * `computeDerivativeGroupLinear` — enables linearly mapped derivative groups. +- GLSL: Use `#extension GL_KHR_compute_shader_derivatives : enable` and a layout qualifier to choose the grouping: + * `layout(derivative_group_quadsNV) in;` + * `layout(derivative_group_linearNV) in;` + (The `NV` suffix is retained in the GLSL tokens for compatibility.) + +== Why/when to use it +- Port algorithms that rely on derivatives (e.g., LOD selection, filtering, gradients) to compute for flexibility or performance. +- Keep consistent behavior with fragment-stage derivatives by choosing an appropriate grouping mode (quads vs. linear). + +== What this sample does +- Requests and requires the feature `computeDerivativeGroupQuads`. +- Builds a compute pipeline with a shader that calls `ddx`/`ddy` (derivative instructions) in compute. +- Computes a procedural 2D radial function and uses derivatives to calculate gradient magnitude, demonstrating a practical use case for spatial analysis and edge detection. +- Renders a fullscreen visualization showing: + * Blue: The base procedural radial pattern + * Red/Yellow: Edges detected via high gradient magnitude + * The compute shader writes the visualization to a storage image, which is then displayed via a graphics pipeline +- Displays a GUI overlay explaining the visualization and the practical applications of compute shader derivatives. +- The sample demonstrates how compute shader derivatives enable algorithms that traditionally required fragment shaders (like gradient-based filtering or LOD selection) to run in compute shaders for greater flexibility. + +== Rendering architecture + +This sample uses a two-stage rendering pipeline to demonstrate compute shader derivatives and display the results: + +=== Stage 1: Compute shader (derivative calculation) +The compute shader (`derivatives.comp.slang`) executes with an 8×8 local workgroup size and the `[DerivativeGroupQuad]` attribute, which enables quad-based derivative computation. For each pixel in a 512×512 output image: + +1. Computes a procedural radial function based on distance from center +2. Calls `ddx()` and `ddy()` to calculate spatial derivatives of the function +3. Computes gradient magnitude: `sqrt(dx² + dy²)` to detect edges +4. Writes a color visualization to a storage image (VK_FORMAT_R8G8B8A8_UNORM) + +The storage image serves as the output buffer for the compute shader and the input texture for the graphics pipeline. + +=== Stage 2: Graphics pipeline (fullscreen display) +After a pipeline barrier synchronizes the compute write with the fragment shader read, the graphics pipeline displays the computed image: + +1. **Vertex shader** (`fullscreen.vert.slang`): Generates a fullscreen triangle using only vertex IDs (no vertex buffer required) + * Vertex 0: `(-1, -1)` with UV `(0, 0)` — bottom-left corner + * Vertex 1: `(3, -1)` with UV `(2, 0)` — extends far right (off-screen) + * Vertex 2: `(-1, 3)` with UV `(0, 2)` — extends far up (off-screen) + * The oversized triangle covers the entire viewport; hardware automatically clips the parts outside the screen +2. **Fragment shader** (`fullscreen.frag.slang`): Samples the storage image using interpolated UV coordinates and outputs the color +3. **GUI overlay**: Drawn on top using ImGui to explain the visualization + +=== Why use a fullscreen triangle instead of a quad? +The fullscreen triangle is a common optimization technique for post-processing and fullscreen effects: + +- **Fewer vertices**: Only 3 vertices instead of 4 (quad) or 6 (two triangles) +- **No vertex buffer**: Positions and UVs are generated procedurally from `SV_VertexID` +- **Simpler setup**: Single draw call with `vkCmdDraw(cmd, 3, 1, 0, 0)` +- **Automatic clipping**: The GPU clips the oversized triangle to the viewport bounds +- **Better cache behavior**: Single triangle primitive instead of two + +This technique is widely used in modern rendering engines for fullscreen passes like tone mapping, bloom, and other post-processing effects. + +== Required Vulkan extensions and features +- Instance extension: `VK_KHR_get_physical_device_properties2` (for feature chaining). +- Device extension: `VK_KHR_compute_shader_derivatives` (required). +- Device feature: `VkPhysicalDeviceComputeShaderDerivativesFeaturesKHR::computeDerivativeGroupQuads = VK_TRUE`. + diff --git a/samples/extensions/compute_shader_derivatives/compute_shader_derivatives.cpp b/samples/extensions/compute_shader_derivatives/compute_shader_derivatives.cpp new file mode 100644 index 0000000000..e10992d37f --- /dev/null +++ b/samples/extensions/compute_shader_derivatives/compute_shader_derivatives.cpp @@ -0,0 +1,548 @@ +/* Copyright (c) 2026, Holochip Inc. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 the "License"; + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "compute_shader_derivatives.h" + +#include + +#include "common/vk_common.h" +#include "common/vk_initializers.h" +#include "core/util/logging.hpp" + +ComputeShaderDerivatives::ComputeShaderDerivatives() +{ + title = "Compute shader derivatives (VK_KHR_compute_shader_derivatives)"; + + // Use Vulkan 1.2 instance so SPIR-V 1.4 modules produced by Slang are valid under validation + set_api_version(VK_API_VERSION_1_2); + + // Needed for feature chaining + add_instance_extension(VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME); + // Device extension providing the feature (KHR is required) + add_device_extension(VK_KHR_COMPUTE_SHADER_DERIVATIVES_EXTENSION_NAME); + // Note for developers/tooling: + // If your shader compiler/toolchain only emits SPV_NV_compute_shader_derivatives instead of SPV_KHR, + // please update to a newer Vulkan SDK, glslang, and SPIR-V Tools that support the KHR variant. + // This sample intentionally does not enable the NV extension and only targets VK_KHR_compute_shader_derivatives. + // Shader draw parameters (required for SV_VertexID in Slang-generated vertex shader SPIR-V) + add_device_extension(VK_KHR_SHADER_DRAW_PARAMETERS_EXTENSION_NAME, /*optional*/ true); +} + +ComputeShaderDerivatives::~ComputeShaderDerivatives() +{ + if (has_device()) + { + VkDevice device = get_device().get_handle(); + + // Compute pipeline resources + if (compute_pipeline) + { + vkDestroyPipeline(device, compute_pipeline, nullptr); + } + if (compute_pipeline_layout) + { + vkDestroyPipelineLayout(device, compute_pipeline_layout, nullptr); + } + if (compute_descriptor_pool) + { + vkDestroyDescriptorPool(device, compute_descriptor_pool, nullptr); + } + if (compute_descriptor_set_layout) + { + vkDestroyDescriptorSetLayout(device, compute_descriptor_set_layout, nullptr); + } + + // Graphics pipeline resources + if (graphics_pipeline) + { + vkDestroyPipeline(device, graphics_pipeline, nullptr); + } + if (graphics_pipeline_layout) + { + vkDestroyPipelineLayout(device, graphics_pipeline_layout, nullptr); + } + if (graphics_descriptor_pool) + { + vkDestroyDescriptorPool(device, graphics_descriptor_pool, nullptr); + } + if (graphics_descriptor_set_layout) + { + vkDestroyDescriptorSetLayout(device, graphics_descriptor_set_layout, nullptr); + } + + // Storage image resources + if (storage_image_sampler) + { + vkDestroySampler(device, storage_image_sampler, nullptr); + } + if (storage_image_view) + { + vkDestroyImageView(device, storage_image_view, nullptr); + } + if (storage_image) + { + vkDestroyImage(device, storage_image, nullptr); + } + if (storage_image_memory) + { + vkFreeMemory(device, storage_image_memory, nullptr); + } + } +} + +void ComputeShaderDerivatives::create_storage_image() +{ + auto device = get_device().get_handle(); + + // Create storage image for compute shader output + VkImageCreateInfo image_ci{VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO}; + image_ci.imageType = VK_IMAGE_TYPE_2D; + image_ci.format = VK_FORMAT_R8G8B8A8_UNORM; + image_ci.extent.width = image_width; + image_ci.extent.height = image_height; + image_ci.extent.depth = 1; + image_ci.mipLevels = 1; + image_ci.arrayLayers = 1; + image_ci.samples = VK_SAMPLE_COUNT_1_BIT; + image_ci.tiling = VK_IMAGE_TILING_OPTIMAL; + image_ci.usage = VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_SAMPLED_BIT; + image_ci.sharingMode = VK_SHARING_MODE_EXCLUSIVE; + image_ci.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; + VK_CHECK(vkCreateImage(device, &image_ci, nullptr, &storage_image)); + + VkMemoryRequirements mem_req{}; + vkGetImageMemoryRequirements(device, storage_image, &mem_req); + + VkMemoryAllocateInfo alloc_info{VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO}; + alloc_info.allocationSize = mem_req.size; + alloc_info.memoryTypeIndex = get_device().get_gpu().get_memory_type(mem_req.memoryTypeBits, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT); + VK_CHECK(vkAllocateMemory(device, &alloc_info, nullptr, &storage_image_memory)); + VK_CHECK(vkBindImageMemory(device, storage_image, storage_image_memory, 0)); + + // Create image view + VkImageViewCreateInfo view_ci{VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO}; + view_ci.image = storage_image; + view_ci.viewType = VK_IMAGE_VIEW_TYPE_2D; + view_ci.format = VK_FORMAT_R8G8B8A8_UNORM; + view_ci.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + view_ci.subresourceRange.baseMipLevel = 0; + view_ci.subresourceRange.levelCount = 1; + view_ci.subresourceRange.baseArrayLayer = 0; + view_ci.subresourceRange.layerCount = 1; + VK_CHECK(vkCreateImageView(device, &view_ci, nullptr, &storage_image_view)); + + // Create sampler for graphics pipeline + VkSamplerCreateInfo sampler_ci{VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO}; + sampler_ci.magFilter = VK_FILTER_LINEAR; + sampler_ci.minFilter = VK_FILTER_LINEAR; + sampler_ci.mipmapMode = VK_SAMPLER_MIPMAP_MODE_LINEAR; + sampler_ci.addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; + sampler_ci.addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; + sampler_ci.addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; + VK_CHECK(vkCreateSampler(device, &sampler_ci, nullptr, &storage_image_sampler)); +} + +void ComputeShaderDerivatives::create_output_buffer_and_descriptors() +{ + auto device = get_device().get_handle(); + + // Create descriptor pool for compute: 1 storage image only + VkDescriptorPoolSize pool_size{}; + pool_size.type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; + pool_size.descriptorCount = 1; + + VkDescriptorPoolCreateInfo pool_ci{VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO}; + pool_ci.maxSets = 1; + pool_ci.poolSizeCount = 1; + pool_ci.pPoolSizes = &pool_size; + VK_CHECK(vkCreateDescriptorPool(device, &pool_ci, nullptr, &compute_descriptor_pool)); +} + +void ComputeShaderDerivatives::request_gpu_features(vkb::core::PhysicalDeviceC &gpu) +{ + // Request both derivative group modes as OPTIONAL, prefer Quads if available at runtime. + // Some implementations only support computeDerivativeGroupLinear. + REQUEST_OPTIONAL_FEATURE(gpu, VkPhysicalDeviceComputeShaderDerivativesFeaturesKHR, computeDerivativeGroupQuads); + REQUEST_OPTIONAL_FEATURE(gpu, VkPhysicalDeviceComputeShaderDerivativesFeaturesKHR, computeDerivativeGroupLinear); + + // Storage image read/write without format (required for storage images without explicit format qualifiers) + if (gpu.get_features().shaderStorageImageReadWithoutFormat) + { + gpu.get_mutable_requested_features().shaderStorageImageReadWithoutFormat = VK_TRUE; + } + else + { + throw std::runtime_error("GPU does not support shaderStorageImageReadWithoutFormat feature, which is required for this sample."); + } + if (gpu.get_features().shaderStorageImageWriteWithoutFormat) + { + gpu.get_mutable_requested_features().shaderStorageImageWriteWithoutFormat = VK_TRUE; + } + else + { + throw std::runtime_error("GPU does not support shaderStorageImageWriteWithoutFormat feature, which is required for this sample."); + } +} + +void ComputeShaderDerivatives::create_compute_pipeline() +{ + auto device = get_device().get_handle(); + + // Descriptor set layout: binding 0 = storage image + VkDescriptorSetLayoutBinding binding{}; + binding.binding = 0; + binding.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; + binding.descriptorCount = 1; + binding.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; + binding.pImmutableSamplers = nullptr; + + VkDescriptorSetLayoutCreateInfo set_layout_ci{VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO}; + set_layout_ci.bindingCount = 1; + set_layout_ci.pBindings = &binding; + VK_CHECK(vkCreateDescriptorSetLayout(device, &set_layout_ci, nullptr, &compute_descriptor_set_layout)); + + // Pipeline layout uses the descriptor set layout at set 0 + VkPipelineLayoutCreateInfo layout_ci{VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO}; + layout_ci.setLayoutCount = 1; + layout_ci.pSetLayouts = &compute_descriptor_set_layout; + VK_CHECK(vkCreatePipelineLayout(device, &layout_ci, nullptr, &compute_pipeline_layout)); + + // Allocate descriptor set + VkDescriptorSetAllocateInfo alloc_info{VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO}; + alloc_info.descriptorPool = compute_descriptor_pool; + alloc_info.descriptorSetCount = 1; + alloc_info.pSetLayouts = &compute_descriptor_set_layout; + VK_CHECK(vkAllocateDescriptorSets(device, &alloc_info, &compute_descriptor_set)); + + // Update descriptor: storage image only + VkDescriptorImageInfo image_info{}; + image_info.imageView = storage_image_view; + image_info.imageLayout = VK_IMAGE_LAYOUT_GENERAL; + + VkWriteDescriptorSet write{}; + write.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + write.pNext = nullptr; + write.dstSet = compute_descriptor_set; + write.dstBinding = 0; + write.dstArrayElement = 0; + write.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; + write.descriptorCount = 1; + write.pImageInfo = &image_info; + write.pBufferInfo = nullptr; + write.pTexelBufferView = nullptr; + + vkUpdateDescriptorSets(device, 1, &write, 0, nullptr); + + // Load compute shader based on selected derivative group mode + const char *comp_path = use_quads_ ? + "compute_shader_derivatives/slang/derivatives_quad.comp.spv" : + "compute_shader_derivatives/slang/derivatives_linear.comp.spv"; + VkPipelineShaderStageCreateInfo stage = load_shader(comp_path, VK_SHADER_STAGE_COMPUTE_BIT); + + VkComputePipelineCreateInfo compute_ci{VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO}; + compute_ci.stage = stage; + compute_ci.layout = compute_pipeline_layout; + VK_CHECK(vkCreateComputePipelines(device, pipeline_cache, 1, &compute_ci, nullptr, &compute_pipeline)); +} + +void ComputeShaderDerivatives::create_graphics_pipeline() +{ + auto device = get_device().get_handle(); + + // Create descriptor pool for graphics: 1 combined image sampler + VkDescriptorPoolSize pool_size{}; + pool_size.type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; + pool_size.descriptorCount = 1; + + VkDescriptorPoolCreateInfo pool_ci{VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO}; + pool_ci.maxSets = 1; + pool_ci.poolSizeCount = 1; + pool_ci.pPoolSizes = &pool_size; + VK_CHECK(vkCreateDescriptorPool(device, &pool_ci, nullptr, &graphics_descriptor_pool)); + + // Descriptor set layout: binding 0 = combined image sampler + VkDescriptorSetLayoutBinding binding{}; + binding.binding = 0; + binding.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; + binding.descriptorCount = 1; + binding.stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT; + binding.pImmutableSamplers = nullptr; + + VkDescriptorSetLayoutCreateInfo set_layout_ci{VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO}; + set_layout_ci.bindingCount = 1; + set_layout_ci.pBindings = &binding; + VK_CHECK(vkCreateDescriptorSetLayout(device, &set_layout_ci, nullptr, &graphics_descriptor_set_layout)); + + // Pipeline layout + VkPipelineLayoutCreateInfo layout_ci{VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO}; + layout_ci.setLayoutCount = 1; + layout_ci.pSetLayouts = &graphics_descriptor_set_layout; + VK_CHECK(vkCreatePipelineLayout(device, &layout_ci, nullptr, &graphics_pipeline_layout)); + + // Allocate descriptor set + VkDescriptorSetAllocateInfo alloc_info{VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO}; + alloc_info.descriptorPool = graphics_descriptor_pool; + alloc_info.descriptorSetCount = 1; + alloc_info.pSetLayouts = &graphics_descriptor_set_layout; + VK_CHECK(vkAllocateDescriptorSets(device, &alloc_info, &graphics_descriptor_set)); + + // Update descriptor set with storage image sampler + VkDescriptorImageInfo image_info{}; + image_info.sampler = storage_image_sampler; + image_info.imageView = storage_image_view; + image_info.imageLayout = VK_IMAGE_LAYOUT_GENERAL; + + VkWriteDescriptorSet write{VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET}; + write.dstSet = graphics_descriptor_set; + write.dstBinding = 0; + write.dstArrayElement = 0; + write.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; + write.descriptorCount = 1; + write.pImageInfo = &image_info; + vkUpdateDescriptorSets(device, 1, &write, 0, nullptr); + + // Load shaders for fullscreen quad + VkPipelineShaderStageCreateInfo shader_stages[2]; + shader_stages[0] = load_shader("compute_shader_derivatives/slang/fullscreen.vert.spv", VK_SHADER_STAGE_VERTEX_BIT); + shader_stages[1] = load_shader("compute_shader_derivatives/slang/fullscreen.frag.spv", VK_SHADER_STAGE_FRAGMENT_BIT); + + // Vertex input state: no vertex buffers (fullscreen triangle generated in vertex shader) + VkPipelineVertexInputStateCreateInfo vertex_input_ci{VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO}; + + // Input assembly + VkPipelineInputAssemblyStateCreateInfo input_assembly_ci{VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO}; + input_assembly_ci.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST; + + // Viewport and scissor (dynamic) + VkPipelineViewportStateCreateInfo viewport_ci{VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO}; + viewport_ci.viewportCount = 1; + viewport_ci.scissorCount = 1; + + // Rasterization + VkPipelineRasterizationStateCreateInfo rasterization_ci{VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO}; + rasterization_ci.polygonMode = VK_POLYGON_MODE_FILL; + rasterization_ci.cullMode = VK_CULL_MODE_NONE; + rasterization_ci.frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE; + rasterization_ci.lineWidth = 1.0f; + + // Multisample + VkPipelineMultisampleStateCreateInfo multisample_ci{VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO}; + multisample_ci.rasterizationSamples = VK_SAMPLE_COUNT_1_BIT; + + // Depth stencil + VkPipelineDepthStencilStateCreateInfo depth_stencil_ci{VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO}; + + // Color blend + VkPipelineColorBlendAttachmentState blend_attachment{}; + blend_attachment.colorWriteMask = VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT; + + VkPipelineColorBlendStateCreateInfo color_blend_ci{VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO}; + color_blend_ci.attachmentCount = 1; + color_blend_ci.pAttachments = &blend_attachment; + + // Dynamic state + VkDynamicState dynamic_states[] = {VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR}; + VkPipelineDynamicStateCreateInfo dynamic_ci{VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO}; + dynamic_ci.dynamicStateCount = 2; + dynamic_ci.pDynamicStates = dynamic_states; + + // Create graphics pipeline + VkGraphicsPipelineCreateInfo pipeline_ci{VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO}; + pipeline_ci.stageCount = 2; + pipeline_ci.pStages = shader_stages; + pipeline_ci.pVertexInputState = &vertex_input_ci; + pipeline_ci.pInputAssemblyState = &input_assembly_ci; + pipeline_ci.pViewportState = &viewport_ci; + pipeline_ci.pRasterizationState = &rasterization_ci; + pipeline_ci.pMultisampleState = &multisample_ci; + pipeline_ci.pDepthStencilState = &depth_stencil_ci; + pipeline_ci.pColorBlendState = &color_blend_ci; + pipeline_ci.pDynamicState = &dynamic_ci; + pipeline_ci.layout = graphics_pipeline_layout; + pipeline_ci.renderPass = render_pass; + pipeline_ci.subpass = 0; + + VK_CHECK(vkCreateGraphicsPipelines(device, pipeline_cache, 1, &pipeline_ci, nullptr, &graphics_pipeline)); +} + +bool ComputeShaderDerivatives::prepare(const vkb::ApplicationOptions &options) +{ + if (!ApiVulkanSample::prepare(options)) + { + return false; + } + + // Decide which derivative group to use at runtime based on enabled device features. + // Prefer Quads when available; otherwise, fall back to Linear. + VkPhysicalDeviceComputeShaderDerivativesFeaturesKHR csd_features{VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_COMPUTE_SHADER_DERIVATIVES_FEATURES_KHR}; + VkPhysicalDeviceFeatures2 features2{VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2}; + features2.pNext = &csd_features; + vkGetPhysicalDeviceFeatures2(get_device().get_gpu().get_handle(), &features2); + use_quads_ = (csd_features.computeDerivativeGroupQuads == VK_TRUE); + if (!use_quads_ && csd_features.computeDerivativeGroupLinear != VK_TRUE) + { + // Neither mode is available: cannot run this sample. + LOGE("VK_KHR_compute_shader_derivatives present but neither quads nor linear derivative groups are reported as supported."); + return false; + } + + // Create resources in order: image, buffer, then pipelines + create_storage_image(); + create_output_buffer_and_descriptors(); + create_compute_pipeline(); + create_graphics_pipeline(); + + prepared = true; + return true; +} + +void ComputeShaderDerivatives::build_command_buffers() +{ + // Not used; this sample records per-frame in render() +} + +void ComputeShaderDerivatives::render(float delta_time) +{ + if (!prepared) + { + return; + } + + // Acquire swapchain image and signal acquired_image_ready + prepare_frame(); + + // Recreate and record the current frame's command buffer + recreate_current_command_buffer(); + VkCommandBuffer cmd = draw_cmd_buffers[current_buffer]; + + VkCommandBufferBeginInfo begin_info = vkb::initializers::command_buffer_begin_info(); + VK_CHECK(vkBeginCommandBuffer(cmd, &begin_info)); + + // Transition storage image to GENERAL layout for compute shader write + VkImageMemoryBarrier image_barrier{VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER}; + image_barrier.srcAccessMask = 0; + image_barrier.dstAccessMask = VK_ACCESS_SHADER_WRITE_BIT; + image_barrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED; + image_barrier.newLayout = VK_IMAGE_LAYOUT_GENERAL; + image_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + image_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + image_barrier.image = storage_image; + image_barrier.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + image_barrier.subresourceRange.levelCount = 1; + image_barrier.subresourceRange.layerCount = 1; + vkCmdPipelineBarrier(cmd, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + 0, 0, nullptr, 0, nullptr, 1, &image_barrier); + + // Dispatch compute shader: 512x512 image with 8x8 local size = 64x64 workgroups + vkCmdBindPipeline(cmd, VK_PIPELINE_BIND_POINT_COMPUTE, compute_pipeline); + vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_COMPUTE, compute_pipeline_layout, 0, 1, &compute_descriptor_set, 0, nullptr); + vkCmdDispatch(cmd, image_width / 8, image_height / 8, 1); + + // Barrier: compute write → fragment shader read + VkImageMemoryBarrier compute_to_frag_barrier{VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER}; + compute_to_frag_barrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT; + compute_to_frag_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT; + compute_to_frag_barrier.oldLayout = VK_IMAGE_LAYOUT_GENERAL; + compute_to_frag_barrier.newLayout = VK_IMAGE_LAYOUT_GENERAL; + compute_to_frag_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + compute_to_frag_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + compute_to_frag_barrier.image = storage_image; + compute_to_frag_barrier.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + compute_to_frag_barrier.subresourceRange.levelCount = 1; + compute_to_frag_barrier.subresourceRange.layerCount = 1; + vkCmdPipelineBarrier(cmd, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, + 0, 0, nullptr, 0, nullptr, 1, &compute_to_frag_barrier); + + // Begin render pass to display the computed image and GUI + VkClearValue clear_values[2]; + clear_values[0].color = {{0.0f, 0.0f, 0.0f, 1.0f}}; // Clear to black (will be covered by image) + clear_values[1].depthStencil = {1.0f, 0}; + + VkRenderPassBeginInfo render_pass_begin_info = vkb::initializers::render_pass_begin_info(); + render_pass_begin_info.renderPass = render_pass; + render_pass_begin_info.framebuffer = framebuffers[current_buffer]; + render_pass_begin_info.renderArea.extent.width = width; + render_pass_begin_info.renderArea.extent.height = height; + render_pass_begin_info.clearValueCount = 2; + render_pass_begin_info.pClearValues = clear_values; + + vkCmdBeginRenderPass(cmd, &render_pass_begin_info, VK_SUBPASS_CONTENTS_INLINE); + + // Set viewport and scissor for graphics rendering + VkViewport viewport{}; + viewport.width = static_cast(width); + viewport.height = static_cast(height); + viewport.minDepth = 0.0f; + viewport.maxDepth = 1.0f; + vkCmdSetViewport(cmd, 0, 1, &viewport); + + VkRect2D scissor{}; + scissor.extent.width = width; + scissor.extent.height = height; + vkCmdSetScissor(cmd, 0, 1, &scissor); + + // Render the computed image as a fullscreen quad + vkCmdBindPipeline(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, graphics_pipeline); + vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, graphics_pipeline_layout, 0, 1, &graphics_descriptor_set, 0, nullptr); + vkCmdDraw(cmd, 3, 1, 0, 0); // Draw fullscreen triangle (3 vertices) + + // Draw the GUI overlay on top + draw_ui(cmd); + + vkCmdEndRenderPass(cmd); + + VK_CHECK(vkEndCommandBuffer(cmd)); + + // Submit: wait on acquire semaphore, signal render_complete for present + VkPipelineStageFlags wait_stage = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; + VkSubmitInfo submit_info{}; + submit_info.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; + submit_info.waitSemaphoreCount = 1; + submit_info.pWaitSemaphores = &semaphores.acquired_image_ready; + submit_info.pWaitDstStageMask = &wait_stage; + submit_info.commandBufferCount = 1; + submit_info.pCommandBuffers = &cmd; + submit_info.signalSemaphoreCount = 1; + submit_info.pSignalSemaphores = &semaphores.render_complete; + + VkQueue queue = get_device().get_queue_by_present(0).get_handle(); + VK_CHECK(vkQueueSubmit(queue, 1, &submit_info, VK_NULL_HANDLE)); + + // Present (waits on render_complete) + submit_frame(); +} + +void ComputeShaderDerivatives::on_update_ui_overlay(vkb::Drawer &drawer) +{ + if (drawer.header("Compute Shader Derivatives")) + { + drawer.text("Visualization:"); + drawer.text("- Blue: Base procedural radial pattern"); + drawer.text("- Red/Yellow: Edges (high gradient magnitude)"); + drawer.text("- Gradient magnitude = sqrt(dx^2 + dy^2)"); + drawer.text(""); + + drawer.text("This demonstrates edge detection using compute shader"); + drawer.text("derivatives, useful for LOD selection, filtering, and"); + drawer.text("spatial analysis in compute pipelines."); + } +} + +std::unique_ptr create_compute_shader_derivatives() +{ + return std::make_unique(); +} diff --git a/samples/extensions/compute_shader_derivatives/compute_shader_derivatives.h b/samples/extensions/compute_shader_derivatives/compute_shader_derivatives.h new file mode 100644 index 0000000000..8c3c697012 --- /dev/null +++ b/samples/extensions/compute_shader_derivatives/compute_shader_derivatives.h @@ -0,0 +1,75 @@ +/* Copyright (c) 2026, Holochip Inc. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 the "License"; + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +#include "api_vulkan_sample.h" + +class ComputeShaderDerivatives : public ApiVulkanSample +{ + public: + ComputeShaderDerivatives(); + ~ComputeShaderDerivatives() override; + + void build_command_buffers() override; // unused, per-frame recording + bool prepare(const vkb::ApplicationOptions &options) override; + void render(float delta_time) override; + void request_gpu_features(vkb::core::PhysicalDeviceC &gpu) override; + void on_update_ui_overlay(vkb::Drawer &drawer) override; + + private: + void create_storage_image(); + void create_output_buffer_and_descriptors(); + void create_compute_pipeline(); + void create_graphics_pipeline(); + + // Which derivative group mode the sample will use at runtime. + // Prefer quads when available; otherwise fall back to linear. + bool use_quads_{false}; + + // Image dimensions for visualization + static constexpr uint32_t image_width{512}; + static constexpr uint32_t image_height{512}; + + // Compute pipeline objects + VkPipelineLayout compute_pipeline_layout{VK_NULL_HANDLE}; + VkPipeline compute_pipeline{VK_NULL_HANDLE}; + + // Graphics pipeline objects (for displaying the image) + VkPipelineLayout graphics_pipeline_layout{VK_NULL_HANDLE}; + VkPipeline graphics_pipeline{VK_NULL_HANDLE}; + + // Compute descriptor objects + VkDescriptorSetLayout compute_descriptor_set_layout{VK_NULL_HANDLE}; + VkDescriptorPool compute_descriptor_pool{VK_NULL_HANDLE}; + VkDescriptorSet compute_descriptor_set{VK_NULL_HANDLE}; + + // Graphics descriptor objects (for sampling the image) + VkDescriptorSetLayout graphics_descriptor_set_layout{VK_NULL_HANDLE}; + VkDescriptorPool graphics_descriptor_pool{VK_NULL_HANDLE}; + VkDescriptorSet graphics_descriptor_set{VK_NULL_HANDLE}; + + // Storage image for compute shader output + VkImage storage_image{VK_NULL_HANDLE}; + VkDeviceMemory storage_image_memory{VK_NULL_HANDLE}; + VkImageView storage_image_view{VK_NULL_HANDLE}; + VkSampler storage_image_sampler{VK_NULL_HANDLE}; +}; + +std::unique_ptr create_compute_shader_derivatives(); diff --git a/samples/extensions/compute_shader_derivatives/shader_derivatives.png b/samples/extensions/compute_shader_derivatives/shader_derivatives.png new file mode 100644 index 0000000000..2f0f8de9c1 Binary files /dev/null and b/samples/extensions/compute_shader_derivatives/shader_derivatives.png differ diff --git a/shaders/compute_shader_derivatives/slang/derivatives_linear.comp.slang b/shaders/compute_shader_derivatives/slang/derivatives_linear.comp.slang new file mode 100644 index 0000000000..19980c5d78 --- /dev/null +++ b/shaders/compute_shader_derivatives/slang/derivatives_linear.comp.slang @@ -0,0 +1,68 @@ +/* Copyright (c) 2026, Holochip Inc + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 the "License"; + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// Compute shader demonstrating derivatives in compute (Linear mode). +// Many implementations only support computeDerivativeGroupLinear. +// Mapping 1D lanes to 2D coordinates for linear groups follows the DX SM 6.6 spec: +// https://microsoft.github.io/DirectX-Specs/d3d/HLSL_SM_6_6_Derivatives.html +// Intuition: lanes are ordered linearly across the group; hardware derives a +// consistent local 2D mapping to evaluate ddx/ddy even when quads aren't supported. +// Note: ddx/ddy in compute require VK_KHR_compute_shader_derivatives at runtime. +// Tooling note: If your toolchain emits SPV_NV_compute_shader_derivatives instead of SPV_KHR, +// please update to a newer Vulkan SDK, glslang, and SPIR-V Tools that support the KHR variant. + +// Output image for gradient magnitude visualization +[[vk::binding(0, 0)]] RWTexture2D gOutputImage : register(u0, space0); + +// 8x8 local size; derivative groups are defined by implementation in Linear mode +[shader("compute")] +[numthreads(8, 8, 1)] +[DerivativeGroupLinear] // Enable linear-based derivative computation +void main(uint3 tid : SV_DispatchThreadID) +{ + // Get image dimensions + uint2 dims; + gOutputImage.GetDimensions(dims.x, dims.y); + + if (tid.x >= dims.x || tid.y >= dims.y) + return; + + // Normalized coordinates [0, 1] + float2 uv = float2(tid.xy) / float2(dims - 1); + + // Procedural function (same as Quad variant) to produce identical output + float2 center = float2(0.5, 0.5); + float2 delta = uv - center; + float dist = length(delta); + + float value = sin(dist * 10.0) * 0.5 + 0.5; + value *= (1.0 - smoothstep(0.0, 0.7, dist)); + + // Derivatives of the function + float dx = ddx(value); + float dy = ddy(value); + + float gradientMag = sqrt(dx * dx + dy * dy); + float edgeIntensity = saturate(gradientMag * 10.0); + + float3 color; + color.r = edgeIntensity; + color.g = edgeIntensity * 0.5; + color.b = value * (1.0 - edgeIntensity); + + gOutputImage[tid.xy] = float4(color, 1.0); +} diff --git a/shaders/compute_shader_derivatives/slang/derivatives_linear.comp.spv b/shaders/compute_shader_derivatives/slang/derivatives_linear.comp.spv new file mode 100644 index 0000000000..c133d4fa0e Binary files /dev/null and b/shaders/compute_shader_derivatives/slang/derivatives_linear.comp.spv differ diff --git a/shaders/compute_shader_derivatives/slang/derivatives_quad.comp.slang b/shaders/compute_shader_derivatives/slang/derivatives_quad.comp.slang new file mode 100644 index 0000000000..95718b2e4e --- /dev/null +++ b/shaders/compute_shader_derivatives/slang/derivatives_quad.comp.slang @@ -0,0 +1,71 @@ +/* Copyright (c) 2026, Holochip Inc + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 the "License"; + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// Compute shader demonstrating derivatives in compute (Quad mode). +// This shader computes a procedural 2D function and uses derivatives to calculate +// gradient magnitude - demonstrating edge detection and spatial analysis capabilities. +// Note: ddx/ddy in compute require VK_KHR_compute_shader_derivatives at runtime. +// Tooling note: If your toolchain emits SPV_NV_compute_shader_derivatives instead of SPV_KHR, +// please update to a newer Vulkan SDK, glslang, and SPIR-V Tools that support the KHR variant. + +// Output image for gradient magnitude visualization +[[vk::binding(0, 0)]] RWTexture2D gOutputImage : register(u0, space0); + +// 8x8 local size for efficient compute with proper derivative quad coverage +[shader("compute")] +[numthreads(8, 8, 1)] +[DerivativeGroupQuad] // Enable quad-based derivative computation +void main(uint3 tid : SV_DispatchThreadID) +{ + // Get image dimensions + uint2 dims; + gOutputImage.GetDimensions(dims.x, dims.y); + + if (tid.x >= dims.x || tid.y >= dims.y) + return; + + // Normalized coordinates [0, 1] + float2 uv = float2(tid.xy) / float2(dims - 1); + + // Create an interesting procedural function: radial gradient with modulation + float2 center = float2(0.5, 0.5); + float2 delta = uv - center; + float dist = length(delta); + + // Procedural function with spatial variation + float value = sin(dist * 10.0) * 0.5 + 0.5; + value *= (1.0 - smoothstep(0.0, 0.7, dist)); + + // Compute derivatives of the function - this is the key feature! + float dx = ddx(value); + float dy = ddy(value); + + // Gradient magnitude - useful for edge detection, LOD selection, and filtering + float gradientMag = sqrt(dx * dx + dy * dy); + + // Visualize gradient magnitude as edge detection + float edgeIntensity = saturate(gradientMag * 10.0); + + // Create a color visualization + float3 color; + color.r = edgeIntensity; // Red for edges + color.g = edgeIntensity * 0.5; // Some yellow for strong edges + color.b = value * (1.0 - edgeIntensity); // Blue for the base pattern + + // Write visualization to output image + gOutputImage[tid.xy] = float4(color, 1.0); +} diff --git a/shaders/compute_shader_derivatives/slang/derivatives_quad.comp.spv b/shaders/compute_shader_derivatives/slang/derivatives_quad.comp.spv new file mode 100644 index 0000000000..cdddf7b9bc Binary files /dev/null and b/shaders/compute_shader_derivatives/slang/derivatives_quad.comp.spv differ diff --git a/shaders/compute_shader_derivatives/slang/fullscreen.frag.slang b/shaders/compute_shader_derivatives/slang/fullscreen.frag.slang new file mode 100644 index 0000000000..2b4a0dffa9 --- /dev/null +++ b/shaders/compute_shader_derivatives/slang/fullscreen.frag.slang @@ -0,0 +1,35 @@ +/* Copyright (c) 2026, Holochip Inc. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 the "License"; + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// Fragment shader for displaying the computed gradient visualization + +// Combined image sampler at binding 0 (texture and sampler share the same binding) +[[vk::binding(0, 0)]] Texture2D computedImage; +[[vk::binding(0, 0)]] SamplerState computedImageSampler; + +struct PSInput +{ + float4 position : SV_Position; + float2 uv : TEXCOORD0; +}; + +[shader("fragment")] +float4 main(PSInput input) : SV_Target +{ + // Sample the computed gradient visualization image + return computedImage.Sample(computedImageSampler, input.uv); +} diff --git a/shaders/compute_shader_derivatives/slang/fullscreen.frag.spv b/shaders/compute_shader_derivatives/slang/fullscreen.frag.spv new file mode 100644 index 0000000000..935c73f121 Binary files /dev/null and b/shaders/compute_shader_derivatives/slang/fullscreen.frag.spv differ diff --git a/shaders/compute_shader_derivatives/slang/fullscreen.vert.slang b/shaders/compute_shader_derivatives/slang/fullscreen.vert.slang new file mode 100644 index 0000000000..c5168093b7 --- /dev/null +++ b/shaders/compute_shader_derivatives/slang/fullscreen.vert.slang @@ -0,0 +1,40 @@ +/* Copyright (c) 2026, Holochip Inc. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 the "License"; + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// Fullscreen triangle vertex shader (no vertex buffers needed) +// Generates a large triangle that covers the entire screen + +struct VSOutput +{ + float4 position : SV_Position; + float2 uv : TEXCOORD0; +}; + +[shader("vertex")] +VSOutput main(uint vertexID : SV_VertexID) +{ + VSOutput output; + + // Generate fullscreen triangle using vertex ID + // Vertex 0: (-1, -1) -> UV (0, 0) + // Vertex 1: ( 3, -1) -> UV (2, 0) + // Vertex 2: (-1, 3) -> UV (0, 2) + output.uv = float2((vertexID << 1) & 2, vertexID & 2); + output.position = float4(output.uv * 2.0 - 1.0, 0.0, 1.0); + + return output; +} diff --git a/shaders/compute_shader_derivatives/slang/fullscreen.vert.spv b/shaders/compute_shader_derivatives/slang/fullscreen.vert.spv new file mode 100644 index 0000000000..7fc4259c34 Binary files /dev/null and b/shaders/compute_shader_derivatives/slang/fullscreen.vert.spv differ diff --git a/third_party/spirv-cross b/third_party/spirv-cross index d8e3e2b141..7affe74d77 160000 --- a/third_party/spirv-cross +++ b/third_party/spirv-cross @@ -1 +1 @@ -Subproject commit d8e3e2b141b8c8a167b2e3984736a6baacff316c +Subproject commit 7affe74d77f93a622bb5002789d5332d32e512ee diff --git a/third_party/volk b/third_party/volk index a8da8ef336..f30088b3f4 160000 --- a/third_party/volk +++ b/third_party/volk @@ -1 +1 @@ -Subproject commit a8da8ef3368482b0ee9b0ec0c6079a16a89c6924 +Subproject commit f30088b3f4160810b53e19258dd2f7395e5f0ba3 diff --git a/third_party/vulkan b/third_party/vulkan index 2cd90f9d20..19725e4d48 160000 --- a/third_party/vulkan +++ b/third_party/vulkan @@ -1 +1 @@ -Subproject commit 2cd90f9d20df57eac214c148f3aed885372ddcfe +Subproject commit 19725e4d48082fe78e26622b15d3080ccd54112b