@@ -38,7 +38,8 @@ class CAssetConverter : public core::IReferenceCounted
3838asset::ICPUSampler,
3939asset::ICPUShader,
4040asset::ICPUBuffer,
41- // acceleration structures,
41+ asset::ICPUBottomLevelAccelerationStructure,
42+ asset::ICPUTopLevelAccelerationStructure,
4243asset::ICPUImage,
4344asset::ICPUBufferView,
4445asset::ICPUImageView,
@@ -71,6 +72,14 @@ class CAssetConverter : public core::IReferenceCounted
7172{
7273if (!params.valid ())
7374return nullptr ;
75+ #ifndef _NBL_DEBUG
76+ if (!params.optimizer )
77+ {
78+ using pass_e = asset::ISPIRVOptimizer::E_OPTIMIZER_PASS;
79+ // shall we do others?
80+ params.optimizer = core::make_smart_rectd_ptr<asset::ISPIRVOptimizer>({EOP_STRIP_DEBUG_INFO});
81+ }
82+ #endif
7483return core::smart_refctd_ptr<CAssetConverter>(new CAssetConverter (std::move (params)),core::dont_grab);
7584}
7685// When getting dependents, the creation parameters of GPU objects will be produced and patched appropriately.
@@ -149,6 +158,75 @@ class CAssetConverter : public core::IReferenceCounted
149158return {true ,retval};
150159}
151160};
161+ struct NBL_API2 acceleration_structure_patch_base
162+ {
163+ public:
164+ enum class BuildPreference : uint8_t
165+ {
166+ None = 0 ,
167+ FastTrace = 1 ,
168+ FastBuild = 2 ,
169+ Invalid = 3
170+ };
171+
172+ // ! select build flags
173+ uint8_t allowUpdate : 1 = false ;
174+ uint8_t allowCompaction : 1 = false ;
175+ uint8_t allowDataAccess : 1 = false ;
176+ BuildPreference preference : 2 = BuildPreference::Invalid;
177+ uint8_t lowMemory : 1 = false ;
178+ // ! things that control the build
179+ uint8_t hostBuild : 1 = false ;
180+ uint8_t compactAfterBuild : 1 = false ;
181+
182+ protected:
183+ bool valid (const ILogicalDevice* device);
184+
185+ template <typename CRTP>
186+ std::pair<bool ,CRTP> combine_impl (const CRTP& _this, const CRTP& other) const
187+ {
188+ if (_this.preference !=other.preference || _this.preference ==BuildPreference::Invalid)
189+ return {false ,_this};
190+ CRTP retval = _this;
191+ retval.allowUpdate |= other.allowUpdate ;
192+ retval.allowCompaction |= other.allowCompaction ;
193+ retval.allowDataAccess |= other.allowDataAccess ;
194+ retval.lowMemory |= other.lowMemory ;
195+ retval.hostBuild |= other.hostBuild ;
196+ retval.compactAfterBuild |= other.compactAfterBuild ;
197+ return {true ,retval};
198+ }
199+ };
200+ template <>
201+ struct NBL_API2 patch_impl_t <asset::ICPUBottomLevelAccelerationStructure> : acceleration_structure_patch_base
202+ {
203+ public:
204+ PATCH_IMPL_BOILERPLATE (asset::ICPUBottomLevelAccelerationStructure);
205+
206+ using build_flags_t = asset::ICPUBottomLevelAccelerationStructure::BUILD_FLAGS;
207+ core::bitflag<build_flags_t > getBuildFlags (const asset::ICPUBottomLevelAccelerationStructure* blas) const ;
208+
209+ protected:
210+ inline std::pair<bool ,this_t > combine (const this_t & other) const
211+ {
212+ return combine_impl<this_t >(*this ,other);
213+ }
214+ };
215+ template <>
216+ struct NBL_API2 patch_impl_t <asset::ICPUTopLevelAccelerationStructure> : acceleration_structure_patch_base
217+ {
218+ public:
219+ PATCH_IMPL_BOILERPLATE (asset::ICPUTopLevelAccelerationStructure);
220+
221+ using build_flags_t = asset::ICPUTopLevelAccelerationStructure::BUILD_FLAGS;
222+ core::bitflag<build_flags_t > getBuildFlags (const asset::ICPUTopLevelAccelerationStructure* tlas) const ;
223+
224+ protected:
225+ inline std::pair<bool ,this_t > combine (const this_t & other) const
226+ {
227+ return combine_impl<this_t >(*this ,other);
228+ }
229+ };
152230template <>
153231struct NBL_API2 patch_impl_t <asset::ICPUImage>
154232{
@@ -458,6 +536,8 @@ class CAssetConverter : public core::IReferenceCounted
458536virtual const patch_t <asset::ICPUSampler>* operator ()(const lookup_t <asset::ICPUSampler>&) const = 0;
459537virtual const patch_t <asset::ICPUShader>* operator ()(const lookup_t <asset::ICPUShader>&) const = 0;
460538virtual const patch_t <asset::ICPUBuffer>* operator ()(const lookup_t <asset::ICPUBuffer>&) const = 0;
539+ virtual const patch_t <asset::ICPUBottomLevelAccelerationStructure>* operator ()(const lookup_t <asset::ICPUBottomLevelAccelerationStructure>&) const = 0;
540+ virtual const patch_t <asset::ICPUTopLevelAccelerationStructure>* operator ()(const lookup_t <asset::ICPUTopLevelAccelerationStructure>&) const = 0;
461541virtual const patch_t <asset::ICPUImage>* operator ()(const lookup_t <asset::ICPUImage>&) const = 0;
462542virtual const patch_t <asset::ICPUBufferView>* operator ()(const lookup_t <asset::ICPUBufferView>&) const = 0;
463543virtual const patch_t <asset::ICPUImageView>* operator ()(const lookup_t <asset::ICPUImageView>&) const = 0;
@@ -577,6 +657,8 @@ class CAssetConverter : public core::IReferenceCounted
577657bool operator ()(lookup_t <asset::ICPUSampler>);
578658bool operator ()(lookup_t <asset::ICPUShader>);
579659bool operator ()(lookup_t <asset::ICPUBuffer>);
660+ bool operator ()(lookup_t <asset::ICPUBottomLevelAccelerationStructure>);
661+ bool operator ()(lookup_t <asset::ICPUTopLevelAccelerationStructure>);
580662bool operator ()(lookup_t <asset::ICPUImage>);
581663bool operator ()(lookup_t <asset::ICPUBufferView>);
582664bool operator ()(lookup_t <asset::ICPUImageView>);
@@ -717,6 +799,16 @@ class CAssetConverter : public core::IReferenceCounted
717799return {};
718800}
719801
802+ // this a weird signature, but its for an acceleration structure backing IGPUBuffer
803+ virtual inline std::span<const uint32_t > getSharedOwnershipQueueFamilies (const size_t groupCopyID, const asset::ICPUBottomLevelAccelerationStructure* blas, const patch_t <asset::ICPUBottomLevelAccelerationStructure>& patch) const
804+ {
805+ return {};
806+ }
807+ virtual inline std::span<const uint32_t > getSharedOwnershipQueueFamilies (const size_t groupCopyID, const asset::ICPUTopLevelAccelerationStructure* tlas, const patch_t <asset::ICPUTopLevelAccelerationStructure>& patch) const
808+ {
809+ return {};
810+ }
811+
720812virtual inline std::span<const uint32_t > getSharedOwnershipQueueFamilies (const size_t groupCopyID, const asset::ICPUImage* buffer, const patch_t <asset::ICPUImage>& patch) const
721813{
722814return {};
@@ -793,6 +885,7 @@ class CAssetConverter : public core::IReferenceCounted
793885{
794886// By default the last to queue to touch a GPU object will own it after any transfer or compute operations are complete.
795887// If you want to record a pipeline barrier that will release ownership to another family, override this.
888+ // The overload for the IGPUBuffer may be called with a hash belonging to a Acceleration Structure, this means that its the storage buffer backing the AS
796889virtual inline uint32_t getFinalOwnerQueueFamily (const IGPUBuffer* buffer, const core::blake3_hash_t & createdFrom)
797890{
798891return IQueue::FamilyIgnored;
@@ -829,6 +922,11 @@ class CAssetConverter : public core::IReferenceCounted
829922IUtilities* utilities = nullptr ;
830923// optional, last submit (compute, transfer if no compute needed) signals these in addition to the scratch semaphore
831924std::span<const IQueue::SSubmitInfo::SSemaphoreInfo> extraSignalSemaphores = {};
925+ // specific to Acceleration Structure Build, they need to be at least as large as the largest amount of scratch required for an AS build
926+ CAsyncSingleBufferSubAllocatorST</* TODO: try uint64_t GP Address Allocator*/ >* scratchForDeviceASBuild = nullptr ;
927+ std::pmr::memory_resource* scratchForHostASBuild = nullptr ;
928+ // needs to service allocations without limit, unlike the above where failure will just force a flush and performance of already queued up builds
929+ IDeviceMemoryAllocator* compactedASAllocator = nullptr ;
832930// specific to mip-map recomputation, these are okay defaults for the size of our Descriptor Indexed temporary descriptor set
833931uint32_t sampledImageBindingCount = 1 <<10 ;
834932uint32_t storageImageBindingCount = 11 <<10 ;
@@ -853,10 +951,22 @@ class CAssetConverter : public core::IReferenceCounted
853951// https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/vkCmdCopyBufferToImage.html#VUID-vkCmdCopyBufferToImage-commandBuffer-07739
854952inline core::bitflag<IQueue::FAMILY_FLAGS> getRequiredQueueFlags () const {return m_queueFlags;}
855953
954+ // This is just enough memory to build the Acceleration Structures one by one waiting for each Device Build to complete inbetween. If 0 there are no Device AS Builds or Compactions to perform.
955+ inline uint64_t getMinASBuildScratchSize (const bool forHostOps) const {return m_minASBuildScratchSize[forHostOps];}
956+ // Enough memory to build and compact all the Acceleration Structures at once, obviously respecting order of BLAS (build->compact) -> TLAS (build->compact)
957+ inline uint64_t getMaxASBuildScratchSize (const bool forHostOps) const {return m_maxASBuildScratchSize[forHostOps];}
958+ // What usage flags your scratch buffer must have, if returns NONE means are no Device AS Builds to perform.
959+ inline auto getASBuildScratchUsages () const {return m_ASBuildScratchUsages;}
960+ // tells you if you need to provide a valid `SConvertParams::scratchForHostASBuild`
961+ inline bool willHostASBuild () const {return m_willHostBuildSomeAS;}
962+ // tells you if you need to provide a valid `SConvertParams::compactedASAllocator`
963+ inline bool willCompactAS () const {return m_willHostBuildSomeAS;}
964+
856965//
857966inline operator bool () const {return bool (m_converter);}
858967
859- // until `convert` is called, this will only contain valid entries for items already found in `SInput::readCache`
968+ // Until `convert` is called, the Buffers and Images are not filled with content and Acceleration Structures are not built, unless found in the `SInput::readCache`
969+ // WARNING: The Acceleration Structure Pointer WILL CHANGE after calling `convert` if its patch dictates that it will be compacted! (since AS can't resize)
860970// TODO: we could also return per-object semaphore values when object is ready for use (would have to propagate two semaphores up through dependants)
861971template <asset::Asset AssetType>
862972std::span<const asset_cached_t <AssetType>> getGPUObjects () const {return std::get<vector_t <AssetType>>(m_gpuObjects);}
@@ -911,24 +1021,43 @@ class CAssetConverter : public core::IReferenceCounted
9111021core::tuple_transform_t <staging_cache_t ,supported_asset_types> m_stagingCaches;
9121022// need a more explicit list of GPU objects that need device-assisted conversion
9131023template <asset::Asset AssetType>
914- struct ConversionRequest
1024+ struct SConversionRequestBase
9151025{
9161026// canonical asset (the one that provides content)
9171027core::smart_refctd_ptr<const AssetType> canonical;
9181028// gpu object to transfer canonical's data to or build it from
9191029asset_traits<AssetType>::video_t * gpuObj;
920- // only relevant for images
921- uint16_t recomputeMips = 0 ;
9221030};
923- template <asset::Asset AssetType>
924- using conversion_requests_t = core::vector<ConversionRequest<AssetType>>;
925- using convertible_asset_types = core::type_list<
926- asset::ICPUBuffer,
927- asset::ICPUImage/* ,
928- asset::ICPUBottomLevelAccelerationStructure,
929- asset::ICPUTopLevelAccelerationStructure*/
930- >;
931- core::tuple_transform_t <conversion_requests_t ,convertible_asset_types> m_conversionRequests;
1031+ using SConvReqBuffer = SConversionRequestBase<asset::ICPUBuffer>;
1032+ core::vector<SConvReqBuffer> m_bufferConversions;
1033+ struct SConvReqImage : SConversionRequestBase<asset::ICPUImage>
1034+ {
1035+ bool recomputeMips = 0 ;
1036+ };
1037+ core::vector<SConvReqImage> m_imageConversions;
1038+ template <typename CPUAccelerationStructure>// requires std::is_base_of_v<asset::ICPUAccelerationStructure,CPUAccelerationStructure>
1039+ struct SConvReqAccelerationStructure : SConversionRequestBase<CPUAccelerationStructure>
1040+ {
1041+ constexpr static inline uint64_t WontCompact = (0x1ull <<48 )-1 ;
1042+ inline bool compact () const {return compactedASWriteOffset!=WontCompact;}
1043+
1044+ using build_f = typename CPUAccelerationStructure::BUILD_FLAGS;
1045+ inline void setBuildFlags (const build_f _flags) {buildFlags = static_cast <uint16_t >(_flags);}
1046+ inline build_f getBuildFlags () const {return static_cast <build_f>(buildFlags);}
1047+
1048+
1049+ uint64_t compactedASWriteOffset : 48 = WontCompact;
1050+ uint64_t buildFlags : 16 = static_cast <uint16_t >(build_f::NONE);
1051+ };
1052+ core::vector<SConvReqAccelerationStructure<asset::ICPUBottomLevelAccelerationStructure>> m_blasConversions[2 ];
1053+ core::vector<SConvReqAccelerationStructure<asset::ICPUTopLevelAccelerationStructure>> m_tlasConversions[2 ];
1054+
1055+ //
1056+ uint64_t m_minASBuildScratchSize[2 ] = {0 ,0 };
1057+ uint64_t m_maxASBuildScratchSize[2 ] = {0 ,0 };
1058+ core::bitflag<IGPUBuffer::E_USAGE_FLAGS> m_ASBuildScratchUsages = IGPUBuffer::E_USAGE_FLAGS::EUF_NONE;
1059+ uint8_t m_willHostBuildSomeAS : 1 = false ;
1060+ uint8_t m_willCompactSomeAS : 1 = false ;
9321061
9331062//
9341063core::bitflag<IQueue::FAMILY_FLAGS> m_queueFlags = IQueue::FAMILY_FLAGS::NONE;
0 commit comments