diff --git a/examples_tests b/examples_tests index dd7de7a89c..af29d6546f 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit dd7de7a89cfa5a59970dde4d4744ecf746d77a4a +Subproject commit af29d6546ff9f9e7beeea633f7e8de27fb879ba7 diff --git a/include/nbl/builtin/hlsl/concepts/accessors/anisotropically_sampled.hlsl b/include/nbl/builtin/hlsl/concepts/accessors/anisotropically_sampled.hlsl index e6019d056c..76f2c2219a 100644 --- a/include/nbl/builtin/hlsl/concepts/accessors/anisotropically_sampled.hlsl +++ b/include/nbl/builtin/hlsl/concepts/accessors/anisotropically_sampled.hlsl @@ -18,25 +18,28 @@ namespace accessors { // declare concept #define NBL_CONCEPT_NAME AnisotropicallySampled -#define NBL_CONCEPT_TPLT_PRM_KINDS (typename)(int32_t) -#define NBL_CONCEPT_TPLT_PRM_NAMES (U)(Dims) +#define NBL_CONCEPT_TPLT_PRM_KINDS (typename)(int32_t)(int32_t) +#define NBL_CONCEPT_TPLT_PRM_NAMES (U)(Dims)(Components) // not the greatest syntax but works #define NBL_CONCEPT_PARAM_0 (a,U) #define NBL_CONCEPT_PARAM_1 (uv,vector) #define NBL_CONCEPT_PARAM_2 (layer,uint16_t) #define NBL_CONCEPT_PARAM_3 (dU,vector) #define NBL_CONCEPT_PARAM_4 (dV,vector) +#define NBL_CONCEPT_PARAM_5 (outVal,vector) // start concept -NBL_CONCEPT_BEGIN(5) +NBL_CONCEPT_BEGIN(6) // need to be defined AFTER the cocnept begins #define a NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_0 #define uv NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_1 #define layer NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_2 #define dU NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_3 #define dV NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_4 +#define outVal NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_5 NBL_CONCEPT_END( - ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((a.template get(uv,layer,dU,dV)) , ::nbl::hlsl::is_same_v, float32_t4>)) + ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((a.template get(outVal,uv,layer,dU,dV)) , ::nbl::hlsl::is_same_v, void)) ); +#undef outVal #undef dV #undef dU #undef layer @@ -47,4 +50,4 @@ NBL_CONCEPT_END( } } } -#endif \ No newline at end of file +#endif diff --git a/include/nbl/builtin/hlsl/concepts/accessors/loadable_image.hlsl b/include/nbl/builtin/hlsl/concepts/accessors/loadable_image.hlsl index 8c7251214d..fcc200ad95 100644 --- a/include/nbl/builtin/hlsl/concepts/accessors/loadable_image.hlsl +++ b/include/nbl/builtin/hlsl/concepts/accessors/loadable_image.hlsl @@ -18,28 +18,31 @@ namespace accessors { // concept `LoadableImage` translates to smth like this: -//template -//concept LoadableImage = requires(U a, vector uv, uint16_t layer) { -// ::nbl::hlsl::is_same_v().template get(uv,layer)), vector>; +//template +//concept LoadableImage = requires(U a, vector uv, uint16_t layer, vector outVal) { +// ::nbl::hlsl::is_same_v().template get(outVal,uv,layer)), void>; //}; // declare concept #define NBL_CONCEPT_NAME LoadableImage -#define NBL_CONCEPT_TPLT_PRM_KINDS (typename)(typename)(int32_t) -#define NBL_CONCEPT_TPLT_PRM_NAMES (U)(T)(Dims) +#define NBL_CONCEPT_TPLT_PRM_KINDS (typename)(typename)(int32_t)(int32_t) +#define NBL_CONCEPT_TPLT_PRM_NAMES (U)(T)(Dims)(Components) // not the greatest syntax but works #define NBL_CONCEPT_PARAM_0 (a,U) #define NBL_CONCEPT_PARAM_1 (uv,vector) #define NBL_CONCEPT_PARAM_2 (layer,uint16_t) +#define NBL_CONCEPT_PARAM_3 (outVal,vector) // start concept -NBL_CONCEPT_BEGIN(3) +NBL_CONCEPT_BEGIN(4) // need to be defined AFTER the concept begins #define a NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_0 #define uv NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_1 #define layer NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_2 +#define outVal NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_3 NBL_CONCEPT_END( - ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((a.template get(uv,layer)), ::nbl::hlsl::is_same_v, vector)) + ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((a.template get(outVal,uv,layer)), ::nbl::hlsl::is_same_v, void)) ); +#undef outVal #undef layer #undef uv #undef a @@ -47,23 +50,26 @@ NBL_CONCEPT_END( // declare concept #define NBL_CONCEPT_NAME MipmappedLoadableImage -#define NBL_CONCEPT_TPLT_PRM_KINDS (typename)(typename)(int32_t) -#define NBL_CONCEPT_TPLT_PRM_NAMES (U)(T)(Dims) +#define NBL_CONCEPT_TPLT_PRM_KINDS (typename)(typename)(int32_t)(int32_t) +#define NBL_CONCEPT_TPLT_PRM_NAMES (U)(T)(Dims)(Components) // not the greatest syntax but works #define NBL_CONCEPT_PARAM_0 (a,U) #define NBL_CONCEPT_PARAM_1 (uv,vector) #define NBL_CONCEPT_PARAM_2 (layer,uint16_t) #define NBL_CONCEPT_PARAM_3 (level,uint16_t) +#define NBL_CONCEPT_PARAM_4 (outVal,vector) // start concept -NBL_CONCEPT_BEGIN(4) +NBL_CONCEPT_BEGIN(5) // need to be defined AFTER the cocnept begins #define a NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_0 #define uv NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_1 #define layer NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_2 #define level NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_3 +#define outVal NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_4 NBL_CONCEPT_END( - ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((a.template get(uv,layer,level)) , ::nbl::hlsl::is_same_v, vector)) + ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((a.template get(outVal,uv,layer,level)) , ::nbl::hlsl::is_same_v, void)) ); +#undef outVal #undef level #undef layer #undef uv @@ -73,4 +79,4 @@ NBL_CONCEPT_END( } } } -#endif \ No newline at end of file +#endif diff --git a/include/nbl/builtin/hlsl/concepts/accessors/mip_mapped.hlsl b/include/nbl/builtin/hlsl/concepts/accessors/mip_mapped.hlsl index c49e66617b..e8b61d4029 100644 --- a/include/nbl/builtin/hlsl/concepts/accessors/mip_mapped.hlsl +++ b/include/nbl/builtin/hlsl/concepts/accessors/mip_mapped.hlsl @@ -25,16 +25,19 @@ namespace accessors #define NBL_CONCEPT_PARAM_1 (uv,vector) #define NBL_CONCEPT_PARAM_2 (layer,uint16_t) #define NBL_CONCEPT_PARAM_3 (level,float) +#define NBL_CONCEPT_PARAM_4 (outVal,float32_t4) // start concept -NBL_CONCEPT_BEGIN(4) +NBL_CONCEPT_BEGIN(5) // need to be defined AFTER the cocnept begins #define a NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_0 #define uv NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_1 #define layer NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_2 #define level NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_3 +#define outVal NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_4 NBL_CONCEPT_END( - ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((a.template get(uv,layer,level)) , ::nbl::hlsl::is_same_v, float32_t4>)) + ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((a.template get(outVal,uv,layer,level)) , ::nbl::hlsl::is_same_v, void)) ); +#undef outVal #undef level #undef layer #undef uv @@ -44,4 +47,4 @@ NBL_CONCEPT_END( } } } -#endif \ No newline at end of file +#endif diff --git a/include/nbl/builtin/hlsl/concepts/accessors/storable_image.hlsl b/include/nbl/builtin/hlsl/concepts/accessors/storable_image.hlsl index 7eda9b9303..900352d993 100644 --- a/include/nbl/builtin/hlsl/concepts/accessors/storable_image.hlsl +++ b/include/nbl/builtin/hlsl/concepts/accessors/storable_image.hlsl @@ -18,13 +18,13 @@ namespace accessors { // declare concept #define NBL_CONCEPT_NAME StorableImage -#define NBL_CONCEPT_TPLT_PRM_KINDS (typename)(typename)(int32_t) -#define NBL_CONCEPT_TPLT_PRM_NAMES (U)(T)(Dims) +#define NBL_CONCEPT_TPLT_PRM_KINDS (typename)(typename)(int32_t)(int32_t) +#define NBL_CONCEPT_TPLT_PRM_NAMES (U)(T)(Dims)(Components) // not the greatest syntax but works #define NBL_CONCEPT_PARAM_0 (a,U) #define NBL_CONCEPT_PARAM_1 (uv,vector) #define NBL_CONCEPT_PARAM_2 (layer,uint16_t) -#define NBL_CONCEPT_PARAM_3 (data,vector) +#define NBL_CONCEPT_PARAM_3 (data,vector) // start concept NBL_CONCEPT_BEGIN(4) // need to be defined AFTER the cocnept begins diff --git a/include/nbl/builtin/hlsl/rwmc/CascadeAccumulator.hlsl b/include/nbl/builtin/hlsl/rwmc/CascadeAccumulator.hlsl index 593e267a26..a52c9302d3 100644 --- a/include/nbl/builtin/hlsl/rwmc/CascadeAccumulator.hlsl +++ b/include/nbl/builtin/hlsl/rwmc/CascadeAccumulator.hlsl @@ -3,7 +3,6 @@ #include #include #include -#include #include namespace nbl @@ -13,79 +12,81 @@ namespace hlsl namespace rwmc { -template) -struct CascadeAccumulator +template && concepts::UnsignedIntegralScalar) +struct DefaultCascades { - struct CascadeEntry + using layer_type = CascadeLayerType; + using sample_count_type = SampleCountType; + NBL_CONSTEXPR_STATIC_INLINE uint32_t CascadeCount = CascadeCountValue; + + sample_count_type cascadeSampleCounter[CascadeCount]; + CascadeLayerType data[CascadeCount]; + + void clear(uint32_t cascadeIx) { - uint32_t cascadeSampleCounter[CascadeCount]; - CascadeLayerType data[CascadeCount]; + cascadeSampleCounter[cascadeIx] = sample_count_type(0u); + data[cascadeIx] = promote(0.0f); + } - void addSampleIntoCascadeEntry(CascadeLayerType _sample, uint32_t lowerCascadeIndex, float lowerCascadeLevelWeight, float higherCascadeLevelWeight, uint32_t sampleCount) + void addSampleIntoCascadeEntry(CascadeLayerType _sample, uint16_t lowerCascadeIndex, SSplattingParameters::scalar_t lowerCascadeLevelWeight, SSplattingParameters::scalar_t higherCascadeLevelWeight, uint32_t sampleCount) + { + const SSplattingParameters::scalar_t reciprocalSampleCount = SSplattingParameters::scalar_t(1.0f) / SSplattingParameters::scalar_t(sampleCount); + + sample_count_type lowerCascadeSampleCount = cascadeSampleCounter[lowerCascadeIndex]; + data[lowerCascadeIndex] += (_sample * lowerCascadeLevelWeight - (sampleCount - lowerCascadeSampleCount) * data[lowerCascadeIndex]) * reciprocalSampleCount; + cascadeSampleCounter[lowerCascadeIndex] = sample_count_type(sampleCount); + + uint16_t higherCascadeIndex = lowerCascadeIndex + uint16_t(1u); + if (higherCascadeIndex < CascadeCount) { - const float reciprocalSampleCount = 1.0f / float(sampleCount); - - uint32_t lowerCascadeSampleCount = cascadeSampleCounter[lowerCascadeIndex]; - data[lowerCascadeIndex] += (_sample * lowerCascadeLevelWeight - (sampleCount - lowerCascadeSampleCount) * data[lowerCascadeIndex]) * reciprocalSampleCount; - cascadeSampleCounter[lowerCascadeIndex] = sampleCount; - - uint32_t higherCascadeIndex = lowerCascadeIndex + 1u; - if (higherCascadeIndex < CascadeCount) - { - uint32_t higherCascadeSampleCount = cascadeSampleCounter[higherCascadeIndex]; - data[higherCascadeIndex] += (_sample * higherCascadeLevelWeight - (sampleCount - higherCascadeSampleCount) * data[higherCascadeIndex]) * reciprocalSampleCount; - cascadeSampleCounter[higherCascadeIndex] = sampleCount; - } + sample_count_type higherCascadeSampleCount = cascadeSampleCounter[higherCascadeIndex]; + data[higherCascadeIndex] += (_sample * higherCascadeLevelWeight - (sampleCount - higherCascadeSampleCount) * data[higherCascadeIndex]) * reciprocalSampleCount; + cascadeSampleCounter[higherCascadeIndex] = sample_count_type(sampleCount); } - }; - - using cascade_layer_scalar_type = typename vector_traits::scalar_type; - using this_t = CascadeAccumulator; - using input_sample_type = CascadeLayerType; - using output_storage_type = CascadeEntry; - using initialization_data = SplattingParameters; - output_storage_type accumulation; + } +}; + +template +struct CascadeAccumulator +{ + using scalar_t = typename SSplattingParameters::scalar_t; + using input_sample_type = typename CascadesType::layer_type; + using this_t = CascadeAccumulator; + using cascades_type = CascadesType; + NBL_CONSTEXPR_STATIC_INLINE uint32_t CascadeCount = cascades_type::CascadeCount; + NBL_CONSTEXPR_STATIC_INLINE scalar_t LastCascade = scalar_t(CascadeCount - 1u); + cascades_type accumulation; - SplattingParameters splattingParameters; + SSplattingParameters splattingParameters; - static this_t create(NBL_CONST_REF_ARG(SplattingParameters) settings) + static this_t create(NBL_CONST_REF_ARG(SPackedSplattingParameters) settings) { this_t retval; - for (int i = 0; i < CascadeCount; ++i) - { - retval.accumulation.data[i] = promote(0.0f); - retval.accumulation.cascadeSampleCounter[i] = 0u; - } - retval.splattingParameters = settings; + for (uint32_t i = 0u; i < CascadeCount; ++i) + retval.accumulation.clear(i); + retval.splattingParameters = settings.unpack(); return retval; } - - cascade_layer_scalar_type getLuma(NBL_CONST_REF_ARG(CascadeLayerType) col) - { - return hlsl::dot(hlsl::transpose(colorspace::scRGBtoXYZ)[1], col); - } // most of this code is stolen from https://cg.ivd.kit.edu/publications/2018/rwmc/tool/split.cpp void addSample(uint32_t sampleCount, input_sample_type _sample) { - const float32_t2 unpackedParams = hlsl::unpackHalf2x16(splattingParameters.packedLog2); - const cascade_layer_scalar_type log2Start = unpackedParams[0]; - const cascade_layer_scalar_type log2Base = unpackedParams[1]; - const cascade_layer_scalar_type luma = getLuma(_sample); - const cascade_layer_scalar_type log2Luma = log2(luma); - const cascade_layer_scalar_type cascade = log2Luma * 1.f / log2Base - log2Start / log2Base; - const cascade_layer_scalar_type clampedCascade = clamp(cascade, 0, CascadeCount - 1); + const scalar_t luma = splattingParameters.calcLuma(_sample); + const scalar_t log2Luma = log2(luma); + const scalar_t cascade = log2Luma * splattingParameters.RcpLog2Base - splattingParameters.Log2BaseRootOfStart; + const scalar_t clampedCascade = clamp(cascade, scalar_t(0), LastCascade); + const scalar_t clampedCascadeFloor = floor(clampedCascade); // c<=0 -> 0, c>=Count-1 -> Count-1 - uint32_t lowerCascadeIndex = floor(cascade); + uint16_t lowerCascadeIndex = uint16_t(clampedCascadeFloor); // 0 whenever clamped or `cascade` is integer (when `clampedCascade` is integer) - cascade_layer_scalar_type higherCascadeWeight = clampedCascade - floor(clampedCascade); + scalar_t higherCascadeWeight = clampedCascade - clampedCascadeFloor; // never 0 thanks to magic of `1-fract(x)` - cascade_layer_scalar_type lowerCascadeWeight = cascade_layer_scalar_type(1) - higherCascadeWeight; + scalar_t lowerCascadeWeight = scalar_t(1) - higherCascadeWeight; // handle super bright sample case - if (cascade > CascadeCount - 1) - lowerCascadeWeight = exp2(log2Start + log2Base * (CascadeCount - 1) - log2Luma); + if (cascade > LastCascade) + lowerCascadeWeight = exp2(splattingParameters.BrightSampleLumaBias - log2Luma); accumulation.addSampleIntoCascadeEntry(_sample, lowerCascadeIndex, lowerCascadeWeight, higherCascadeWeight, sampleCount); } @@ -97,4 +98,4 @@ struct CascadeAccumulator } } -#endif \ No newline at end of file +#endif diff --git a/include/nbl/builtin/hlsl/rwmc/ResolveParameters.hlsl b/include/nbl/builtin/hlsl/rwmc/ResolveParameters.hlsl index 7509eac493..8a16cdc2f5 100644 --- a/include/nbl/builtin/hlsl/rwmc/ResolveParameters.hlsl +++ b/include/nbl/builtin/hlsl/rwmc/ResolveParameters.hlsl @@ -1,7 +1,8 @@ #ifndef _NBL_BUILTIN_HLSL_RWMC_RESOLVE_PARAMETERS_HLSL_INCLUDED_ -#define _NBL_BUILTIN_HLSL_RWMC_RESOLVE_PARAMETERS_HLSL_INCLUDED_ - -#include "nbl/builtin/hlsl/cpp_compat.hlsl" +#define _NBL_BUILTIN_HLSL_RWMC_RESOLVE_PARAMETERS_HLSL_INCLUDED_ + +#include "nbl/builtin/hlsl/cpp_compat.hlsl" +#include namespace nbl { @@ -10,36 +11,42 @@ namespace hlsl namespace rwmc { -struct ResolveParameters -{ - uint32_t lastCascadeIndex; - float initialEmin; // a minimum image brightness that we always consider reliable - float reciprocalBase; - float reciprocalN; - float reciprocalKappa; - float colorReliabilityFactor; - float NOverKappa; -}; - -ResolveParameters computeResolveParameters(float base, uint32_t sampleCount, float minReliableLuma, float kappa, uint32_t cascadeSize) -{ - ResolveParameters retval; - retval.lastCascadeIndex = cascadeSize - 1u; - retval.initialEmin = minReliableLuma; - retval.reciprocalBase = 1.f / base; - const float N = float(sampleCount); - retval.reciprocalN = 1.f / N; - retval.reciprocalKappa = 1.f / kappa; - // if not interested in exact expected value estimation (kappa!=1.f), can usually accept a bit more variance relative to the image brightness we already have - // allow up to ~ more energy in one sample to lessen bias in some cases - retval.colorReliabilityFactor = base + (1.f - base) * retval.reciprocalKappa; - retval.NOverKappa = N * retval.reciprocalKappa; - - return retval; -} - -} -} -} +struct SResolveParameters +{ + using scalar_t = float32_t; + + static SResolveParameters create(scalar_t base, uint32_t sampleCount, scalar_t minReliableLuma, scalar_t kappa) + { + SResolveParameters retval; + retval.initialEmin = minReliableLuma; + retval.reciprocalBase = 1.f / base; + const scalar_t N = scalar_t(sampleCount); + retval.reciprocalN = 1.f / N; + retval.reciprocalKappa = 1.f / kappa; + // if not interested in exact expected value estimation (kappa!=1.f), can usually accept a bit more variance relative to the image brightness we already have + // allow up to ~ more energy in one sample to lessen bias in some cases + retval.colorReliabilityFactor = base + (1.f - base) * retval.reciprocalKappa; + retval.NOverKappa = N * retval.reciprocalKappa; + + return retval; + } + + template + scalar_t calcLuma(NBL_CONST_REF_ARG(SampleType) col) + { + return hlsl::dot(hlsl::transpose(Colorspace::ToXYZ())[1], col); + } + + scalar_t initialEmin; // a minimum image brightness that we always consider reliable + scalar_t reciprocalBase; + scalar_t reciprocalN; + scalar_t reciprocalKappa; + scalar_t colorReliabilityFactor; + scalar_t NOverKappa; +}; + +} +} +} -#endif \ No newline at end of file +#endif diff --git a/include/nbl/builtin/hlsl/rwmc/SplattingParameters.hlsl b/include/nbl/builtin/hlsl/rwmc/SplattingParameters.hlsl index c549d83be6..0b804a1517 100644 --- a/include/nbl/builtin/hlsl/rwmc/SplattingParameters.hlsl +++ b/include/nbl/builtin/hlsl/rwmc/SplattingParameters.hlsl @@ -1,7 +1,8 @@ #ifndef _NBL_BUILTIN_HLSL_RWMC_SPLATTING_PARAMETERS_HLSL_INCLUDED_ -#define _NBL_BUILTIN_HLSL_RWMC_SPLATTING_PARAMETERS_HLSL_INCLUDED_ - -#include "nbl/builtin/hlsl/cpp_compat.hlsl" +#define _NBL_BUILTIN_HLSL_RWMC_SPLATTING_PARAMETERS_HLSL_INCLUDED_ + +#include "nbl/builtin/hlsl/cpp_compat.hlsl" +#include namespace nbl { @@ -10,16 +11,46 @@ namespace hlsl namespace rwmc { -struct SplattingParameters -{ - // float16_t log2Start; 0 - // float16_t log2Base; 1 - // pack as Half2x16 - int32_t packedLog2; -}; +struct SSplattingParameters +{ + using scalar_t = float32_t; + scalar_t RcpLog2Base; + scalar_t Log2BaseRootOfStart; + scalar_t BrightSampleLumaBias; + + template + scalar_t calcLuma(NBL_CONST_REF_ARG(CascadeLayerType) col) + { + return hlsl::dot(hlsl::transpose(Colorspace::ToXYZ())[1], col); + } +}; + +struct SPackedSplattingParameters +{ + // float16_t baseRootOfStart; 0 + // float16_t rcpLog2Base; 1 + // pack as Half2x16 + int32_t PackedBaseRootAndRcpLog2Base; + + // float16_t log2BaseRootOfStart; 2 + // float16_t brightSampleLumaBias; 3 + // pack as Half2x16 + int32_t PackedLog2BaseRootAndBrightSampleLumaBias; + + SSplattingParameters unpack() + { + SSplattingParameters retval; + const float32_t2 unpackedBaseRootAndRcpLog2Base = hlsl::unpackHalf2x16(PackedBaseRootAndRcpLog2Base); + const float32_t2 unpackedLog2BaseRootAndBrightSampleLumaBias = hlsl::unpackHalf2x16(PackedLog2BaseRootAndBrightSampleLumaBias); + retval.RcpLog2Base = unpackedBaseRootAndRcpLog2Base[1]; + retval.Log2BaseRootOfStart = unpackedLog2BaseRootAndBrightSampleLumaBias[0]; + retval.BrightSampleLumaBias = unpackedLog2BaseRootAndBrightSampleLumaBias[1]; + return retval; + } +}; } } } -#endif \ No newline at end of file +#endif diff --git a/include/nbl/builtin/hlsl/rwmc/resolve.hlsl b/include/nbl/builtin/hlsl/rwmc/resolve.hlsl index d8f777d277..a30bd49e74 100644 --- a/include/nbl/builtin/hlsl/rwmc/resolve.hlsl +++ b/include/nbl/builtin/hlsl/rwmc/resolve.hlsl @@ -1,115 +1,106 @@ -#ifndef _NBL_BUILTIN_HLSL_RWMC_RESOLVE_HLSL_INCLUDED_ -#define _NBL_BUILTIN_HLSL_RWMC_RESOLVE_HLSL_INCLUDED_ - -#include "nbl/builtin/hlsl/cpp_compat.hlsl" -#include -#include -#include -#include -#include +#ifndef _NBL_BUILTIN_HLSL_RWMC_RESOLVE_HLSL_INCLUDED_ +#define _NBL_BUILTIN_HLSL_RWMC_RESOLVE_HLSL_INCLUDED_ + +#include "nbl/builtin/hlsl/cpp_compat.hlsl" +#include +#include +#include namespace nbl { namespace hlsl { -namespace rwmc -{ - // declare concept -#define NBL_CONCEPT_NAME ResolveAccessorBase -#define NBL_CONCEPT_TPLT_PRM_KINDS (typename)(typename)(int32_t) -#define NBL_CONCEPT_TPLT_PRM_NAMES (T)(VectorScalarType)(Dims) -// not the greatest syntax but works -#define NBL_CONCEPT_PARAM_0 (a,T) -#define NBL_CONCEPT_PARAM_1 (scalar,VectorScalarType) -// start concept - NBL_CONCEPT_BEGIN(2) -// need to be defined AFTER the concept begins -#define a NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_0 -#define scalar NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_1 -NBL_CONCEPT_END( - ((NBL_CONCEPT_REQ_EXPR)((a.calcLuma(vector(scalar, scalar, scalar))))) -); -#undef a -#undef scalar -#include - -/* ResolveAccessor is required to: -* - satisfy `LoadableImage` concept requirements -* - implement function called `calcLuma` which calculates luma from a 3 component pixel value -*/ - -template -NBL_BOOL_CONCEPT ResolveAccessor = ResolveAccessorBase && concepts::accessors::LoadableImage; - -template -struct ResolveAccessorAdaptor -{ - using output_scalar_type = OutputScalar; - using output_type = vector; - NBL_CONSTEXPR int32_t image_dimension = 2; - - RWTexture2DArray cascade; - - float32_t calcLuma(NBL_REF_ARG(float32_t3) col) - { - return hlsl::dot(colorspace::scRGB::ToXYZ()[1], col); +namespace rwmc +{ +// declare concept +#define NBL_CONCEPT_NAME ResolveLumaParamsBase +#define NBL_CONCEPT_TPLT_PRM_KINDS (typename)(typename) +#define NBL_CONCEPT_TPLT_PRM_NAMES (T)(SampleType) +#define NBL_CONCEPT_PARAM_0 (a,T) +NBL_CONCEPT_BEGIN(1) +#define a NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_0 +NBL_CONCEPT_END( + ((NBL_CONCEPT_REQ_TYPE)(T::scalar_t)) + ((NBL_CONCEPT_REQ_TYPE_ALIAS_CONCEPT)(concepts::FloatingPointScalar, typename T::scalar_t)) + ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)( + (a.template calcLuma(::nbl::hlsl::experimental::declval())), + ::nbl::hlsl::is_same_v, + typename T::scalar_t + )) +); +#undef a +#include + +template +NBL_BOOL_CONCEPT ResolveLumaParams = ResolveLumaParamsBase; + +template +NBL_BOOL_CONCEPT ResolveAccessor = concepts::accessors::MipmappedLoadableImage; + +template) +struct SResolveAccessorAdaptor +{ + using output_scalar_t = OutputScalar; + NBL_CONSTEXPR_STATIC_INLINE int32_t Components = 3; + using output_t = vector; + NBL_CONSTEXPR_STATIC_INLINE int32_t image_dimension = 2; + + template + void get(NBL_REF_ARG(output_t) value, vector uv, uint16_t layer, uint16_t level) + { + typename AccessorType::output_t sampled; + accessor.template get(sampled, uv, layer, level); + value = sampled.xyz; + } + + AccessorType accessor; +}; + +template && + ResolveLumaParams +) +struct SResolver +{ + using output_t = typename CascadeAccessor::output_t; + using output_scalar_t = typename vector_traits::scalar_type; + using scalar_t = typename SResolveParameters::scalar_t; + NBL_CONSTEXPR_STATIC_INLINE uint16_t last_cascade = uint16_t(CascadeCount - 1u); + + struct SCascadeSample + { + output_t centerValue; + scalar_t normalizedCenterLuma; + scalar_t normalizedNeighbourhoodAverageLuma; + }; + + static SResolver create(NBL_REF_ARG(SResolveParameters) resolveParameters) + { + SResolver retval; + retval.params = resolveParameters; + + return retval; } - template - output_type get(vector uv, uint16_t layer) - { - uint32_t imgWidth, imgHeight, layers; - cascade.GetDimensions(imgWidth, imgHeight, layers); - int16_t2 cascadeImageDimension = int16_t2(imgWidth, imgHeight); - - if (any(uv < int16_t2(0, 0)) || any(uv > cascadeImageDimension)) - return vector(0, 0, 0, 0); - - return cascade.Load(int32_t3(uv, int32_t(layer))); - } -}; - -template && ResolveAccessor) -struct Resolver -{ - using output_type = OutputColorTypeVec; - using scalar_t = typename vector_traits::scalar_type; - - struct CascadeSample - { - float32_t3 centerValue; - float normalizedCenterLuma; - float normalizedNeighbourhoodAverageLuma; - }; - - static Resolver create(NBL_REF_ARG(ResolveParameters) resolveParameters) - { - Resolver retval; - retval.params = resolveParameters; - - return retval; - } - - output_type operator()(NBL_REF_ARG(CascadeAccessor) acc, const int16_t2 coord) - { - using scalar_t = typename vector_traits::scalar_type; - - scalar_t reciprocalBaseI = 1.f; - CascadeSample curr = __sampleCascade(acc, coord, 0u, reciprocalBaseI); - - output_type accumulation = output_type(0.0f, 0.0f, 0.0f); - scalar_t Emin = params.initialEmin; - - scalar_t prevNormalizedCenterLuma, prevNormalizedNeighbourhoodAverageLuma; - for (int16_t i = 0u; i <= params.lastCascadeIndex; i++) - { - const bool notFirstCascade = i != 0; - const bool notLastCascade = i != params.lastCascadeIndex; - - CascadeSample next; - if (notLastCascade) - { - reciprocalBaseI *= params.reciprocalBase; + output_t operator()(NBL_REF_ARG(CascadeAccessor) acc, const int16_t2 coord) + { + scalar_t reciprocalBaseI = 1.f; + SCascadeSample curr = __sampleCascade(acc, coord, 0u, reciprocalBaseI); + + output_t accumulation = promote(0.0f); + scalar_t Emin = params.initialEmin; + + scalar_t prevNormalizedCenterLuma, prevNormalizedNeighbourhoodAverageLuma; + NBL_UNROLL + for (uint16_t i = 0u; i <= last_cascade; i++) + { + const bool notFirstCascade = i != 0; + const bool notLastCascade = i != last_cascade; + + SCascadeSample next; + if (notLastCascade) + { + reciprocalBaseI *= params.reciprocalBase; next = __sampleCascade(acc, coord, int16_t(i + 1), reciprocalBaseI); } @@ -131,11 +122,11 @@ struct Resolver globalReliability += next.normalizedNeighbourhoodAverageLuma; } // check if above minimum sampling threshold (avg 9 sample occurences in 3x3 neighbourhood), then use per-pixel reliability (NOTE: tertiary op is in reverse) - reliability = globalReliability < params.reciprocalN ? globalReliability : localReliability; - { - const scalar_t accumLuma = acc.calcLuma(accumulation); - if (accumLuma > Emin) - Emin = accumLuma; + reliability = globalReliability < params.reciprocalN ? globalReliability : localReliability; + { + const scalar_t accumLuma = params.template calcLuma(accumulation); + if (accumLuma > Emin) + Emin = accumLuma; const scalar_t colorReliability = Emin * reciprocalBaseI * params.colorReliabilityFactor; @@ -152,40 +143,47 @@ struct Resolver curr = next; } - return accumulation; - } - - ResolveParameters params; + return accumulation; + } + + SResolveParameters params; // pseudo private stuff: - CascadeSample __sampleCascade(NBL_REF_ARG(CascadeAccessor) acc, int16_t2 coord, uint16_t cascadeIndex, scalar_t reciprocalBaseI) - { - output_type neighbourhood[9]; - neighbourhood[0] = acc.template get(coord + int16_t2(-1, -1), cascadeIndex).xyz; - neighbourhood[1] = acc.template get(coord + int16_t2(0, -1), cascadeIndex).xyz; - neighbourhood[2] = acc.template get(coord + int16_t2(1, -1), cascadeIndex).xyz; - neighbourhood[3] = acc.template get(coord + int16_t2(-1, 0), cascadeIndex).xyz; - neighbourhood[4] = acc.template get(coord + int16_t2(0, 0), cascadeIndex).xyz; - neighbourhood[5] = acc.template get(coord + int16_t2(1, 0), cascadeIndex).xyz; - neighbourhood[6] = acc.template get(coord + int16_t2(-1, 1), cascadeIndex).xyz; - neighbourhood[7] = acc.template get(coord + int16_t2(0, 1), cascadeIndex).xyz; - neighbourhood[8] = acc.template get(coord + int16_t2(1, 1), cascadeIndex).xyz; - - // numerical robustness - float32_t3 excl_hood_sum = ((neighbourhood[0] + neighbourhood[1]) + (neighbourhood[2] + neighbourhood[3])) + - ((neighbourhood[5] + neighbourhood[6]) + (neighbourhood[7] + neighbourhood[8])); - - CascadeSample retval; - retval.centerValue = neighbourhood[4]; - retval.normalizedNeighbourhoodAverageLuma = retval.normalizedCenterLuma = acc.calcLuma(neighbourhood[4]) * reciprocalBaseI; - retval.normalizedNeighbourhoodAverageLuma = (acc.calcLuma(excl_hood_sum) * reciprocalBaseI + retval.normalizedNeighbourhoodAverageLuma) / 9.f; - return retval; - } -}; + SCascadeSample __sampleCascade(NBL_REF_ARG(CascadeAccessor) acc, int16_t2 coord, uint16_t cascadeIndex, scalar_t reciprocalBaseI) + { + output_t sampleValue; + scalar_t excl_hood_luma_sum = 0.f; + + acc.template get(sampleValue, coord + int16_t2(-1, -1), cascadeIndex, 0u); + excl_hood_luma_sum += params.template calcLuma(sampleValue); + acc.template get(sampleValue, coord + int16_t2(0, -1), cascadeIndex, 0u); + excl_hood_luma_sum += params.template calcLuma(sampleValue); + acc.template get(sampleValue, coord + int16_t2(1, -1), cascadeIndex, 0u); + excl_hood_luma_sum += params.template calcLuma(sampleValue); + acc.template get(sampleValue, coord + int16_t2(-1, 0), cascadeIndex, 0u); + excl_hood_luma_sum += params.template calcLuma(sampleValue); + + SCascadeSample retval; + acc.template get(retval.centerValue, coord + int16_t2(0, 0), cascadeIndex, 0u); + const scalar_t centerLuma = params.template calcLuma(retval.centerValue); + acc.template get(sampleValue, coord + int16_t2(1, 0), cascadeIndex, 0u); + excl_hood_luma_sum += params.template calcLuma(sampleValue); + acc.template get(sampleValue, coord + int16_t2(-1, 1), cascadeIndex, 0u); + excl_hood_luma_sum += params.template calcLuma(sampleValue); + acc.template get(sampleValue, coord + int16_t2(0, 1), cascadeIndex, 0u); + excl_hood_luma_sum += params.template calcLuma(sampleValue); + acc.template get(sampleValue, coord + int16_t2(1, 1), cascadeIndex, 0u); + excl_hood_luma_sum += params.template calcLuma(sampleValue); + + retval.normalizedCenterLuma = centerLuma * reciprocalBaseI; + retval.normalizedNeighbourhoodAverageLuma = (excl_hood_luma_sum + centerLuma) * reciprocalBaseI / 9.f; + return retval; + } +}; } } } -#endif \ No newline at end of file +#endif