Devsh-Graphics-Programming
diff --git a/‎include/nbl/video/utilities/CComputeBlit.h‎
Lines changed: 20 additions & 17 deletions b/‎include/nbl/video/utilities/CComputeBlit.h‎
Lines changed: 20 additions & 17 deletions
diff --git a/‎src/nbl/video/utilities/CComputeBlit.cpp‎
Lines changed: 75 additions & 11 deletions b/‎src/nbl/video/utilities/CComputeBlit.cpp‎
Lines changed: 75 additions & 11 deletions
@@ -41,7 +41,21 @@ class NBL_API2 CComputeBlit : public core::IReferenceCounted
 }
 
 // ctor
-inline CComputeBlit(core::smart_refctd_ptr<ILogicalDevice>&& logicalDevice) : m_device(std::move(logicalDevice)) {}
+CComputeBlit(
+core::smart_refctd_ptr<ILogicalDevice>&& logicalDevice,
+core::smart_refctd_ptr<asset::IShaderCompiler::CCache>&& cache=nullptr,
+core::smart_refctd_ptr<system::ILogger>&& logger=nullptr
+);
+
+// if you set the balues too small, we'll correct them ourselves anyway
+struct STask
+{
+uint32_t workgroupSizeLog2 : 4 = 0;
+// the TRUE output format, not the storage view format you might manually encode into
+hlsl::format::TexelBlockFormat outputFormat : 8 = hlsl::format::TexelBlockFormat::TBF_UNKNOWN;
+uint32_t sharedMemoryPerInvocation : 6 = 0;
+uint32_t unused : 14 = 0;
+};
 
 //! Returns the original format if supports STORAGE_IMAGE otherwise returns a format in its compat class which supports STORAGE_IMAGE.
 inline asset::E_FORMAT getOutputViewFormat(const asset::E_FORMAT format)
@@ -66,22 +80,7 @@ class NBL_API2 CComputeBlit : public core::IReferenceCounted
 return compatFormat;
 }
 }
-/*
-struct STask
-{
-hlsl::vector<uint8_t,3> preloadWindow; 
-asset::E_FORMAT inFormat;
-asset::E_FORMAT outFormat;
-// default no coverage adjustment
-uint8_t alphaBinCountLog2 : 4 = 0;
-};
-inline void initializeTaskDefault(STask& task) const
-{
-auto physDev = m_device->getPhysicalDevice();
-const auto formatTrait = hlsl::format::getTraits(static_cast<hlsl::format::TexelBlockFormat>(task.outFormat));
-task.alphaBinCountLog2 = hlsl::max(,task.alphaBinCountLog2);
-}
-*/
+
 #if 0
 // @param `alphaBinCount` is only required to size the histogram present in the default nbl_glsl_blit_AlphaStatistics_t in default_compute_common.comp
 core::smart_refctd_ptr<video::IGPUShader> createAlphaTestSpecializedShader(const asset::IImage::E_TYPE inImageType, const uint32_t alphaBinCount = asset::IBlitUtilities::DefaultAlphaBinCount);
@@ -666,7 +665,11 @@ class NBL_API2 CComputeBlit : public core::IReferenceCounted
 EBT_COUNT
 };
 
+void createAndCachePipelines(CAssetConverter* converter, core::smart_refctd_ptr<IGPUComputePipeline>* pipelines, const std::span<const STask> tasks);
+
 core::smart_refctd_ptr<ILogicalDevice> m_device;
+system::logger_opt_smart_ptr m_logger;
+core::smart_refctd_ptr<asset::IShaderCompiler::CCache> m_shaderCache;
 
 //! This calculates the inclusive upper bound on the preload region i.e. it will be reachable for some cases. For the rest it will be bigger
 //! by a pixel in each dimension.
 
@@ -5,6 +5,79 @@ using namespace nbl::system;
 using namespace nbl::asset;
 using namespace nbl::video;
 
+
+CComputeBlit::CComputeBlit(smart_refctd_ptr<ILogicalDevice>&& logicalDevice, smart_refctd_ptr<IShaderCompiler::CCache>&& cache, smart_refctd_ptr<ILogger>&& logger) : m_device(std::move(logicalDevice)), m_logger(nullptr)
+{
+if (logger)
+m_logger = std::move(logger);
+else if (auto debugCb=m_device->getPhysicalDevice()->getDebugCallback(); debugCb->getLogger())
+m_logger = smart_refctd_ptr<system::ILogger>(debugCb->getLogger());
+
+if (cache)
+m_shaderCache = std::move(cache);
+else
+m_shaderCache = make_smart_refctd_ptr<IShaderCompiler::CCache>();
+}
+
+void CComputeBlit::createAndCachePipelines(CAssetConverter* converter, smart_refctd_ptr<IGPUComputePipeline>* pipelines, const std::span<const STask> tasks)
+{
+core::vector<smart_refctd_ptr<ICPUComputePipeline>> cpuPplns;
+cpuPplns.reserve(tasks.size());
+
+const auto& limits = m_device->getPhysicalDevice()->getLimits();
+for (auto task : tasks)
+{
+// adjust task default values
+{
+if (task.workgroupSizeLog2<limits.maxSubgroupSize)
+task.workgroupSizeLog2 = core::roundDownToPoT(limits.maxComputeWorkGroupInvocations);
+bool useFloat16 = false;
+uint16_t channels = 4;
+using namespace hlsl::format;
+if (task.outputFormat!=TexelBlockFormat::TBF_UNKNOWN)
+{
+channels = getTraits(task.outputFormat).Channels;
+const auto precisionAt1 = getFormatPrecision(static_cast<E_FORMAT>(task.outputFormat),3,1.f);
+const auto precisionAt0 = getFormatPrecision(static_cast<E_FORMAT>(task.outputFormat),3,0.f);
+if (limits.workgroupMemoryExplicitLayout16BitAccess && limits.shaderFloat16 && precisionAt1>=std::exp2f(-11.f) && precisionAt0>=std::numeric_limits<hlsl::float16_t>::min())
+useFloat16 = true;
+}
+// the absolute minimum needed to store a single pixel
+const auto singlePixelStorage = channels*(useFloat16 ? sizeof(hlsl::float16_t):sizeof(hlsl::float32_t));
+// also slightly more memory is needed
+task.sharedMemoryPerInvocation = core::max(singlePixelStorage*2,task.sharedMemoryPerInvocation);
+}
+// create blit pipeline
+cpuPplns.emplace_back(nullptr);
+// create optional coverage normalization pipeline
+cpuPplns.emplace_back(nullptr);
+}
+
+CAssetConverter::SInputs inputs = {};
+inputs.readCache = converter;
+inputs.logger = m_logger.getRaw();
+std::get<CAssetConverter::SInputs::asset_span_t<ICPUComputePipeline>>(inputs.assets) = {&cpuPplns.data()->get(),cpuPplns.size()};
+inputs.readShaderCache = m_shaderCache.get();
+inputs.writeShaderCache = m_shaderCache.get();
+// no pipeline cache, because we only make the same pipeline once, ever
+auto reserveResults = converter->reserve(inputs);
+assert(reserveResults.getRequiredQueueFlags().value==IQueue::FAMILY_FLAGS::NONE);
+// copy over the results
+{
+auto rIt = reserveResults.getGPUObjects<ICPUComputePipeline>().data();
+// TODO: redo
+for (size_t i=0; i<tasks.size(); i++)
+*(pipelines++) = (rIt++)->value;
+}
+
+// this just inserts the pipelines into the cache
+{
+CAssetConverter::SConvertParams params = {};
+auto convertResults = reserveResults.convert(params);
+assert(!convertResults.blocking());
+}
+}
+
 #if 0
 core::smart_refctd_ptr<video::IGPUShader> CComputeBlit::createAlphaTestSpecializedShader(const asset::IImage::E_TYPE imageType, const uint32_t alphaBinCount)
 {
@@ -39,21 +112,14 @@ core::smart_refctd_ptr<video::IGPUShader> CComputeBlit::createAlphaTestSpecializ
  "}\n";
 
 auto cpuShader = core::make_smart_refctd_ptr<asset::ICPUShader>(shaderSourceStream.str().c_str(), IGPUShader::E_SHADER_STAGE::ESS_COMPUTE, IGPUShader::E_CONTENT_TYPE::ECT_HLSL, "CComputeBlitGLSLGLSL::createAlphaTestSpecializedShader");
-
-return m_device->createShader(std::move(cpuShader.get()));
 }
 
-core::smart_refctd_ptr<video::IGPUShader> CComputeBlit::createNormalizationSpecializedShader(const asset::IImage::E_TYPE imageType, const asset::E_FORMAT outFormat,
-const uint32_t alphaBinCount)
+core::smart_refctd_ptr<video::IGPUShader> CComputeBlit::createNormalizationSpecializedShader(const asset::IImage::E_TYPE imageType, const uint32_t alphaBinCount)
 {
 const auto workgroupDims = getDefaultWorkgroupDims(imageType);
 const auto paddedAlphaBinCount = getPaddedAlphaBinCount(workgroupDims, alphaBinCount);
 const uint32_t blitDimCount = static_cast<uint32_t>(imageType) + 1;
 
-const auto castedFormat = getOutImageViewFormat(outFormat);
-assert(outFormat == castedFormat);
-const char* formatQualifier = asset::CHLSLCompiler::getStorageImageFormatQualifier(castedFormat);
-
 std::ostringstream shaderSourceStream;
 
 shaderSourceStream
@@ -67,7 +133,7 @@ core::smart_refctd_ptr<video::IGPUShader> CComputeBlit::createNormalizationSpeci
  "[[vk::binding(0, 0)]]\n"
  "nbl::hlsl::blit::impl::dim_to_image_properties<ceval_params_t::BlitDimCount>::combined_sampler_t inCS;\n"
 
- "[[vk::image_format(\"" << formatQualifier << "\")]]\n"
+ "[[vk::image_format(\"unknown\")]]\n"
  "[[vk::binding(1, 0)]]\n"
  "nbl::hlsl::blit::impl::dim_to_image_properties<ceval_params_t::BlitDimCount>::image_t outImg;\n"
 
@@ -90,7 +156,5 @@ core::smart_refctd_ptr<video::IGPUShader> CComputeBlit::createNormalizationSpeci
  "}\n";
 
 auto cpuShader = core::make_smart_refctd_ptr<asset::ICPUShader>(shaderSourceStream.str().c_str(), IGPUShader::E_SHADER_STAGE::ESS_COMPUTE, IGPUShader::E_CONTENT_TYPE::ECT_HLSL, "CComputeBlitGLSL::createNormalizationSpecializedShader");
-
-return m_device->createShader(std::move(cpuShader.get()));
 }
 #endif