@@ -5,6 +5,79 @@ using namespace nbl::system;
55using namespace nbl ::asset;
66using namespace nbl ::video;
77
8+
9+ CComputeBlit::CComputeBlit (smart_refctd_ptr<ILogicalDevice>&& logicalDevice, smart_refctd_ptr<IShaderCompiler::CCache>&& cache, smart_refctd_ptr<ILogger>&& logger) : m_device(std::move(logicalDevice)), m_logger(nullptr )
10+ {
11+ if (logger)
12+ m_logger = std::move (logger);
13+ else if (auto debugCb=m_device->getPhysicalDevice ()->getDebugCallback (); debugCb->getLogger ())
14+ m_logger = smart_refctd_ptr<system::ILogger>(debugCb->getLogger ());
15+
16+ if (cache)
17+ m_shaderCache = std::move (cache);
18+ else
19+ m_shaderCache = make_smart_refctd_ptr<IShaderCompiler::CCache>();
20+ }
21+
22+ void CComputeBlit::createAndCachePipelines (CAssetConverter* converter, smart_refctd_ptr<IGPUComputePipeline>* pipelines, const std::span<const STask> tasks)
23+ {
24+ core::vector<smart_refctd_ptr<ICPUComputePipeline>> cpuPplns;
25+ cpuPplns.reserve (tasks.size ());
26+
27+ const auto & limits = m_device->getPhysicalDevice ()->getLimits ();
28+ for (auto task : tasks)
29+ {
30+ // adjust task default values
31+ {
32+ if (task.workgroupSizeLog2 <limits.maxSubgroupSize )
33+ task.workgroupSizeLog2 = core::roundDownToPoT (limits.maxComputeWorkGroupInvocations );
34+ bool useFloat16 = false ;
35+ uint16_t channels = 4 ;
36+ using namespace hlsl ::format;
37+ if (task.outputFormat !=TexelBlockFormat::TBF_UNKNOWN)
38+ {
39+ channels = getTraits (task.outputFormat ).Channels ;
40+ const auto precisionAt1 = getFormatPrecision (static_cast <E_FORMAT>(task.outputFormat ),3 ,1 .f );
41+ const auto precisionAt0 = getFormatPrecision (static_cast <E_FORMAT>(task.outputFormat ),3 ,0 .f );
42+ if (limits.workgroupMemoryExplicitLayout16BitAccess && limits.shaderFloat16 && precisionAt1>=std::exp2f (-11 .f ) && precisionAt0>=std::numeric_limits<hlsl::float16_t >::min ())
43+ useFloat16 = true ;
44+ }
45+ // the absolute minimum needed to store a single pixel
46+ const auto singlePixelStorage = channels*(useFloat16 ? sizeof (hlsl::float16_t ):sizeof (hlsl::float32_t ));
47+ // also slightly more memory is needed
48+ task.sharedMemoryPerInvocation = core::max (singlePixelStorage*2 ,task.sharedMemoryPerInvocation );
49+ }
50+ // create blit pipeline
51+ cpuPplns.emplace_back (nullptr );
52+ // create optional coverage normalization pipeline
53+ cpuPplns.emplace_back (nullptr );
54+ }
55+
56+ CAssetConverter::SInputs inputs = {};
57+ inputs.readCache = converter;
58+ inputs.logger = m_logger.getRaw ();
59+ std::get<CAssetConverter::SInputs::asset_span_t <ICPUComputePipeline>>(inputs.assets ) = {&cpuPplns.data ()->get (),cpuPplns.size ()};
60+ inputs.readShaderCache = m_shaderCache.get ();
61+ inputs.writeShaderCache = m_shaderCache.get ();
62+ // no pipeline cache, because we only make the same pipeline once, ever
63+ auto reserveResults = converter->reserve (inputs);
64+ assert (reserveResults.getRequiredQueueFlags ().value ==IQueue::FAMILY_FLAGS::NONE);
65+ // copy over the results
66+ {
67+ auto rIt = reserveResults.getGPUObjects <ICPUComputePipeline>().data ();
68+ // TODO: redo
69+ for (size_t i=0 ; i<tasks.size (); i++)
70+ *(pipelines++) = (rIt++)->value ;
71+ }
72+
73+ // this just inserts the pipelines into the cache
74+ {
75+ CAssetConverter::SConvertParams params = {};
76+ auto convertResults = reserveResults.convert (params);
77+ assert (!convertResults.blocking ());
78+ }
79+ }
80+
881#if 0
982core::smart_refctd_ptr<video::IGPUShader> CComputeBlit::createAlphaTestSpecializedShader(const asset::IImage::E_TYPE imageType, const uint32_t alphaBinCount)
1083{
@@ -39,21 +112,14 @@ core::smart_refctd_ptr<video::IGPUShader> CComputeBlit::createAlphaTestSpecializ
39112 "}\n";
40113
41114auto cpuShader = core::make_smart_refctd_ptr<asset::ICPUShader>(shaderSourceStream.str().c_str(), IGPUShader::E_SHADER_STAGE::ESS_COMPUTE, IGPUShader::E_CONTENT_TYPE::ECT_HLSL, "CComputeBlitGLSLGLSL::createAlphaTestSpecializedShader");
42-
43- return m_device->createShader(std::move(cpuShader.get()));
44115}
45116
46- core::smart_refctd_ptr<video::IGPUShader> CComputeBlit::createNormalizationSpecializedShader(const asset::IImage::E_TYPE imageType, const asset::E_FORMAT outFormat,
47- const uint32_t alphaBinCount)
117+ core::smart_refctd_ptr<video::IGPUShader> CComputeBlit::createNormalizationSpecializedShader(const asset::IImage::E_TYPE imageType, const uint32_t alphaBinCount)
48118{
49119const auto workgroupDims = getDefaultWorkgroupDims(imageType);
50120const auto paddedAlphaBinCount = getPaddedAlphaBinCount(workgroupDims, alphaBinCount);
51121const uint32_t blitDimCount = static_cast<uint32_t>(imageType) + 1;
52122
53- const auto castedFormat = getOutImageViewFormat(outFormat);
54- assert(outFormat == castedFormat);
55- const char* formatQualifier = asset::CHLSLCompiler::getStorageImageFormatQualifier(castedFormat);
56-
57123std::ostringstream shaderSourceStream;
58124
59125shaderSourceStream
@@ -67,7 +133,7 @@ core::smart_refctd_ptr<video::IGPUShader> CComputeBlit::createNormalizationSpeci
67133 "[[vk::binding(0, 0)]]\n"
68134 "nbl::hlsl::blit::impl::dim_to_image_properties<ceval_params_t::BlitDimCount>::combined_sampler_t inCS;\n"
69135
70- "[[vk::image_format(\"" << formatQualifier << " \")]]\n"
136+ "[[vk::image_format(\"unknown \")]]\n"
71137 "[[vk::binding(1, 0)]]\n"
72138 "nbl::hlsl::blit::impl::dim_to_image_properties<ceval_params_t::BlitDimCount>::image_t outImg;\n"
73139
@@ -90,7 +156,5 @@ core::smart_refctd_ptr<video::IGPUShader> CComputeBlit::createNormalizationSpeci
90156 "}\n";
91157
92158auto cpuShader = core::make_smart_refctd_ptr<asset::ICPUShader>(shaderSourceStream.str().c_str(), IGPUShader::E_SHADER_STAGE::ESS_COMPUTE, IGPUShader::E_CONTENT_TYPE::ECT_HLSL, "CComputeBlitGLSL::createNormalizationSpecializedShader");
93-
94- return m_device->createShader(std::move(cpuShader.get()));
95159}
96160#endif
0 commit comments