NVlabs
diff --git a/‎.editorconfig‎
Lines changed: 2 additions & 1 deletion b/‎.editorconfig‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎.github/workflows/main.yml‎
Lines changed: 2 additions & 5 deletions b/‎.github/workflows/main.yml‎
Lines changed: 2 additions & 5 deletions
diff --git a/‎.gitignore‎
Lines changed: 1 addition & 0 deletions b/‎.gitignore‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎.gitmodules‎
Lines changed: 3 additions & 0 deletions b/‎.gitmodules‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎CITATION.cff‎
Lines changed: 1 addition & 1 deletion b/‎CITATION.cff‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎CMakeLists.txt‎
Lines changed: 117 additions & 19 deletions b/‎CMakeLists.txt‎
Lines changed: 117 additions & 19 deletions
diff --git a/‎LICENSE.txt‎
Lines changed: 21 additions & 21 deletions b/‎LICENSE.txt‎
Lines changed: 21 additions & 21 deletions
diff --git a/‎README.md‎
Lines changed: 9 additions & 1 deletion b/‎README.md‎
Lines changed: 9 additions & 1 deletion
diff --git a/‎benchmarks/image/CMakeLists.txt‎
Lines changed: 1 addition & 1 deletion b/‎benchmarks/image/CMakeLists.txt‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎benchmarks/image/bench_ours.cu‎
Lines changed: 1 addition & 1 deletion b/‎benchmarks/image/bench_ours.cu‎
Lines changed: 1 addition & 1 deletion
@@ -6,10 +6,11 @@ insert_final_newline = true
 indent_style = tab
 indent_size = 4
 trim_trailing_whitespace = true
+max_line_length = 140
 
 [*.md]
 trim_trailing_whitespace = false
 
-[*.{clangd,yml}]
+[*.{clangd,nix,yml}]
 indent_style = space
 indent_size = 2
@@ -30,9 +30,6 @@ jobs:
  - os: ubuntu-22.04
  cuda: "11.8"
  arch: 89
- - os: ubuntu-22.04
- cuda: "11.7"
- arch: 89
  - os: ubuntu-22.04
  cuda: "11.7"
  arch: 86
@@ -67,7 +64,7 @@ jobs:
  run: ./dependencies/cuda-cmake-github-actions/scripts/actions/install_cuda_ubuntu.sh
  shell: bash
  - name: CMake
- run: cmake . -B ${{ env.build_dir }} -DCMAKE_BUILD_TYPE=${{ env.config }}
+ run: cmake . -B ${{ env.build_dir }} -DCMAKE_BUILD_TYPE=${{ env.config }} -DTCNN_BUILD_TESTS=1
  - name: Build
  working-directory: ${{ env.build_dir }}
  run: cmake --build . --target all --verbose -j `nproc`
@@ -121,7 +118,7 @@ jobs:
  shell: powershell
  run: .\dependencies\cuda-cmake-github-actions\scripts\actions\install_cuda_windows.ps1
  - name: CMake
- run: cmake . -B ${{ env.build_dir }} -G "${{ matrix.visual_studio }}" -A x64
+ run: cmake . -B ${{ env.build_dir }} -G "${{ matrix.visual_studio }}" -A x64 -DTCNN_BUILD_TESTS=1
  - name: Build
  working-directory: ${{ env.build_dir }}
  run: cmake --build . --config ${{ env.config }} --target ALL_BUILD --verbose
@@ -4,6 +4,7 @@
 __pycache__
 build*
 dist
+rtc
 /.cache
 /.vscode
 /.direnv
 
@@ -4,3 +4,6 @@
 [submodule "dependencies/fmt"]
 path = dependencies/fmt
 url = https://github.com/fmtlib/fmt
+[submodule "dependencies/cmrc"]
+path = dependencies/cmrc
+url = https://github.com/vector-of-bool/cmrc
@@ -20,5 +20,5 @@ keywords:
  - 'neural network, tiny, tensor cores, cuda'
 license: BSD-3-Clause
 license-url: https://github.com/NVlabs/tiny-cuda-nn/blob/master/LICENSE.txt
-version: 1.7
+version: 2.0
 date-released: '2021-04-21'
@@ -1,5 +1,5 @@
-# Copyright (c) 2020-2023, NVIDIA CORPORATION. All rights reserved.
-#
+# Copyright (c) 2020-2025, NVIDIA CORPORATION. All rights reserved.
+# 
 # Redistribution and use in source and binary forms, with or without modification, are permitted
 # provided that the following conditions are met:
 # * Redistributions of source code must retain the above copyright notice, this list of
@@ -10,7 +10,7 @@
 # * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
 # to endorse or promote products derived from this software without specific prior written
 # permission.
-#
+# 
 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
 # IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
 # FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
@@ -24,15 +24,23 @@ cmake_minimum_required(VERSION 3.18)
 
 project(
 tiny-cuda-nn
-VERSION 1.7
+VERSION 2.0
 DESCRIPTION "Lightning fast & tiny C++/CUDA neural network framework"
 LANGUAGES CXX CUDA
 )
 
+option(TCNN_ALLOW_CUBLAS_CUSOLVER "Allows tiny-cuda-nn to use cuBLAS and cuSolver. Only required for the Shampoo optimizer." OFF)
 option(TCNN_BUILD_BENCHMARK "Build tiny-cuda-nn example benchmark?" ON)
 option(TCNN_BUILD_EXAMPLES "Build tiny-cuda-nn example applications?" ON)
-option(TCNN_ALLOW_CUBLAS_CUSOLVER "Allows tiny-cuda-nn to use cuBLAS and cuSolver. Only required for the Shampoo optimizer." OFF)
+option(TCNN_BUILD_NO_FWD_BWD "Build without offline compiled forward and backward kernels?" OFF)
+option(TCNN_BUILD_TESTS "Build tiny-cuda-nn's tests?" OFF)
+option(TCNN_BUILD_WITH_RTC "Build support for runtime compilation of fully fused kernels?" ON)
+option(TCNN_BUILD_USE_FAST_MATH "Build tiny-cuda-nn with '--use_fast_math' option?" ON)
+
+set(TCNN_EXTERNAL_FMT "" CACHE STRING "If non-empty, the `fmt` target is supplied externally with the given name.")
+
 set(TCNN_CUDA_ARCHITECTURES "" CACHE STRING "Build tiny-cuda-nn for a specific GPU architecture.")
+option(TCNN_LINK_CUDA "Link tiny-cuda-nn to CUDA libraries?" ON)
 
 ###############################################################################
 # Build type and C++ compiler setup
@@ -57,6 +65,11 @@ if (APPLE)
 set(CMAKE_MACOSX_RPATH ON)
 endif()
 
+if (CMAKE_EXPORT_COMPILE_COMMANDS)
+ set(CMAKE_CXX_STANDARD_INCLUDE_DIRECTORIES ${CMAKE_CXX_IMPLICIT_INCLUDE_DIRECTORIES})
+ set(CMAKE_CUDA_STANDARD_INCLUDE_DIRECTORIES ${CMAKE_CUDA_IMPLICIT_INCLUDE_DIRECTORIES})
+endif()
+
 if (MSVC)
 set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /D_CRT_SECURE_NO_WARNINGS")
 set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /MP")
@@ -204,17 +217,44 @@ if (CUDA_VERSION VERSION_LESS 10.2)
 message(FATAL_ERROR "CUDA version too low. tiny-cuda-nn require CUDA 10.2 or higher.")
 endif()
 
-list(APPEND TCNN_LIBRARIES cuda)
-list(APPEND TCNN_DEFINITIONS -DTCNN_MIN_GPU_ARCH=${MIN_GPU_ARCH})
+list(APPEND TCNN_INCLUDES "include")
+
+if (TCNN_HAS_PARENT)
+set(TCNN_DEFINITIONS ${TCNN_DEFINITIONS} PARENT_SCOPE)
+endif()
+
+# Only compile the shampoo optimizer if
+# a recent enough cuBLAS version is available.
 if (TCNN_ALLOW_CUBLAS_CUSOLVER AND CUDA_VERSION VERSION_GREATER_EQUAL 11.0)
-# Only compile the shampoo optimizer if
-# a new enough cuBLAS version is available.
-list(APPEND TCNN_LIBRARIES cublas)
+set(TCNN_BUILD_WITH_SHAMPOO ON)
+else()
+set(TCNN_BUILD_WITH_SHAMPOO OFF)
+endif()
+
+if (TCNN_BUILD_WITH_SHAMPOO)
 list(APPEND TCNN_DEFINITIONS -DTCNN_SHAMPOO)
 endif()
 
-if (TCNN_HAS_PARENT)
-set(TCNN_DEFINITIONS ${TCNN_DEFINITIONS} PARENT_SCOPE)
+if (TCNN_BUILD_WITH_RTC)
+list(APPEND TCNN_DEFINITIONS -DTCNN_RTC)
+endif()
+
+if (TCNN_BUILD_USE_FAST_MATH)
+list(APPEND TCNN_DEFINITIONS -DTCNN_RTC_USE_FAST_MATH)
+endif()
+
+if (TCNN_BUILD_NO_FWD_BWD)
+list(APPEND TCNN_DEFINITIONS -DTCNN_NO_FWD_BWD)
+endif()
+
+if (TCNN_LINK_CUDA)
+list(APPEND TCNN_LIBRARIES cuda)
+if (TCNN_BUILD_WITH_SHAMPOO)
+list(APPEND TCNN_LIBRARIES cublas)
+endif()
+if (TCNN_BUILD_WITH_RTC)
+list(APPEND TCNN_LIBRARIES nvrtc)
+endif()
 endif()
 
 if (MSVC)
@@ -224,6 +264,9 @@ else()
 list(APPEND CUDA_NVCC_FLAGS "-Xcompiler=-fno-strict-aliasing")
 list(APPEND CUDA_NVCC_FLAGS "-Xcudafe=--diag_suppress=unrecognized_gcc_pragma")
 endif()
+if (TCNN_BUILD_USE_FAST_MATH)
+list(APPEND CUDA_NVCC_FLAGS "--use_fast_math")
+endif()
 list(APPEND CUDA_NVCC_FLAGS "--extended-lambda")
 list(APPEND CUDA_NVCC_FLAGS "--expt-relaxed-constexpr")
 
@@ -237,7 +280,14 @@ if (NOT MSVC)
 endif()
 
 set(BUILD_SHARED_LIBS OFF)
-add_subdirectory("dependencies/fmt")
+
+if (TCNN_EXTERNAL_FMT)
+list(APPEND TCNN_LIBRARIES "${TCNN_EXTERNAL_FMT}")
+else()
+add_subdirectory("dependencies/fmt")
+list(APPEND TCNN_LIBRARIES fmt)
+list(APPEND TCNN_INCLUDES "dependencies/fmt/include")
+endif()
 
 ###############################################################################
 # tiny-cuda-nn library, samples, and benchmarks
@@ -258,31 +308,79 @@ set(TCNN_SOURCES
 src/object.cu
 src/optimizer.cu
 src/reduce_sum.cu
+src/rtc_kernel.cu
 )
 
 if (MIN_GPU_ARCH GREATER 70)
 list(APPEND TCNN_SOURCES src/fully_fused_mlp.cu)
 endif()
 
+list(APPEND TCNN_DEFINITIONS -DTCNN_MIN_GPU_ARCH=${MIN_GPU_ARCH})
+
 ###############################################################################
 # Linker / library
 ###############################################################################
 
-add_library(tiny-cuda-nn STATIC ${TCNN_SOURCES})
-target_compile_definitions(tiny-cuda-nn PUBLIC ${TCNN_DEFINITIONS})
-target_compile_options(tiny-cuda-nn PUBLIC $<$<COMPILE_LANGUAGE:CUDA>:${CUDA_NVCC_FLAGS}>)
-target_include_directories(tiny-cuda-nn PUBLIC
+include("${CMAKE_CURRENT_SOURCE_DIR}/dependencies/cmrc/CMakeRC.cmake")
+cmrc_add_resource_library(tiny-cuda-nn-resources NAMESPACE tcnn)
+list(APPEND TCNN_DEFINITIONS -DTCNN_CMRC)
+list(APPEND TCNN_LIBRARIES tiny-cuda-nn-resources)
+
+if (TCNN_BUILD_WITH_RTC)
+# Fetch CUDA headers and folders that will be required by the runtime compiler
+# and include those headers with the compiled binary of tcnn.
+foreach (CUDA_INCLUDE_CANDIDATE IN LISTS CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES)
+if (EXISTS "${CUDA_INCLUDE_CANDIDATE}/cuda_fp16.h")
+set(CUDA_INCLUDE "${CUDA_INCLUDE_CANDIDATE}")
+break()
+endif()
+endforeach(CUDA_INCLUDE_CANDIDATE)
+
+if (NOT CUDA_INCLUDE)
+# If the CUDA include dir couldn't be found via CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES,
+# try a relative path w.r.t. the CUDA compiler binary as a last-ditch effort.
+get_filename_component(CUDA_COMPILER_BIN "${CMAKE_CUDA_COMPILER}" DIRECTORY)
+get_filename_component(CUDA_DIR "${CUDA_COMPILER_BIN}" DIRECTORY)
+set(CUDA_INCLUDE "${CUDA_DIR}/include")
+endif()
+
+file(GLOB CUDA_HEADERS "${CUDA_INCLUDE}/cuda_fp16*" "${CUDA_INCLUDE}/vector*")
+if (NOT CUDA_HEADERS)
+message(WARNING "FP16 headers could not be found. JIT compilation will likely fail.")
+endif()
+
+file(GLOB_RECURSE TCNN_HEADERS "${CMAKE_CURRENT_SOURCE_DIR}/include/tiny-cuda-nn/*")
+file(GLOB PCG32_HEADERS "${CMAKE_CURRENT_SOURCE_DIR}/dependencies/pcg32/*")
+
+cmrc_add_resources(tiny-cuda-nn-resources WHENCE "${CUDA_INCLUDE}" ${CUDA_HEADERS})
+cmrc_add_resources(tiny-cuda-nn-resources WHENCE "${CMAKE_CURRENT_SOURCE_DIR}/include" ${TCNN_HEADERS})
+cmrc_add_resources(tiny-cuda-nn-resources WHENCE "${CMAKE_CURRENT_SOURCE_DIR}/dependencies" ${PCG32_HEADERS})
+endif()
+
+list(APPEND TCNN_INCLUDES
 "include"
 "dependencies"
 "dependencies/cutlass/include"
 "dependencies/cutlass/tools/util/include"
-"dependencies/fmt/include"
 )
-target_link_libraries(tiny-cuda-nn PUBLIC ${CUDA_LIBRARIES} ${TCNN_LIBRARIES} fmt)
+
+add_library(tiny-cuda-nn STATIC ${TCNN_SOURCES})
+target_compile_definitions(tiny-cuda-nn PUBLIC ${TCNN_DEFINITIONS})
+target_compile_options(tiny-cuda-nn PUBLIC $<$<COMPILE_LANGUAGE:CUDA>:${CUDA_NVCC_FLAGS}>)
+target_include_directories(tiny-cuda-nn PUBLIC ${TCNN_INCLUDES})
+target_link_libraries(tiny-cuda-nn PUBLIC ${TCNN_LIBRARIES})
 
 if (TCNN_BUILD_EXAMPLES)
 add_subdirectory("samples")
 endif()
+
 if (TCNN_BUILD_BENCHMARK)
 add_subdirectory("benchmarks/image")
+add_subdirectory("benchmarks/mlp")
+endif()
+
+if (TCNN_BUILD_TESTS)
+enable_testing()
+add_subdirectory(tests)
+list(APPEND CMAKE_CTEST_ARGUMENTS "--output-on-failure")
 endif()
@@ -1,21 +1,21 @@
-Copyright (c) 2020-2025, NVIDIA CORPORATION. All rights reserved.
-
-Redistribution and use in source and binary forms, with or without modification, are permitted
-provided that the following conditions are met:
- * Redistributions of source code must retain the above copyright notice, this list of
- conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above copyright notice, this list of
- conditions and the following disclaimer in the documentation and/or other materials
- provided with the distribution.
- * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
- to endorse or promote products derived from this software without specific prior written
- permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
-IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
-FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
-FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
-BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
-OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
-STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+Copyright (c) 2020-2025, NVIDIA CORPORATION. All rights reserved.
+
+Redistribution and use in source and binary forms, with or without modification, are permitted
+provided that the following conditions are met:
+ * Redistributions of source code must retain the above copyright notice, this list of
+ conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright notice, this list of
+ conditions and the following disclaimer in the documentation and/or other materials
+ provided with the distribution.
+ * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
+ to endorse or promote products derived from this software without specific prior written
+ permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
+IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
@@ -44,6 +44,7 @@ nlohmann::json config = {
 using namespace tcnn;
 
 auto model = create_from_config(n_input_dims, n_output_dims, config);
+model->set_jit_fusion(supports_jit_fusion()); // Optional: accelerate with JIT fusion
 
 // Train the model (batch_size must be a multiple of tcnn::BATCH_SIZE_GRANULARITY)
 GPUMatrix<float> training_batch_inputs(n_input_dims, batch_size);
@@ -65,6 +66,11 @@ GPUMatrix<float> inference_outputs(n_output_dims, batch_size);
 model.network->inference(inference_inputs, inference_outputs);
 ```
 
+**Important**: enabling JIT fusion is a new, optional feature with tiny-cuda-nn v2.0 and later.
+It is recommended to *always* enable it for a performance boost of 1.5x to 2.5x, depending on the model and GPU.
+Newer GPUs exhibit larger speedups.
+Please [open an issue](https://github.com/NVlabs/tiny-cuda-nn/issues) if you encounter a slowdown or other problems with JIT fusion enabled.
+
 
 ## Example: learning a 2D image
 
@@ -161,6 +167,8 @@ model = tcnn.NetworkWithInputEncoding(
 encoding = tcnn.Encoding(n_input_dims, config["encoding"])
 network = tcnn.Network(encoding.n_output_dims, n_output_dims, config["network"])
 model = torch.nn.Sequential(encoding, network)
+
+model.jit_fusion = tcnn.supports_jit_fusion() # Optional: accelerate with JIT fusion
 ```
 
 See `samples/mlp_learning_an_image_pytorch.py` for an example.
@@ -225,7 +233,7 @@ If you use it in your research, we would appreciate a citation via
 month = {4},
 title = {{tiny-cuda-nn}},
 url = {https://github.com/NVlabs/tiny-cuda-nn},
-version = {1.7},
+version = {2.0},
 year = {2021}
 }
 ```
 
@@ -1,4 +1,4 @@
-# Copyright (c) 2020-2023, NVIDIA CORPORATION. All rights reserved.
+# Copyright (c) 2020-2025, NVIDIA CORPORATION. All rights reserved.
 # 
 # Redistribution and use in source and binary forms, with or without modification, are permitted
 # provided that the following conditions are met:
 
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2023, NVIDIA CORPORATION. All rights reserved.
+ * Copyright (c) 2020-2025, NVIDIA CORPORATION. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without modification, are permitted
  * provided that the following conditions are met:
Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-# Copyright (c) 2020-2023, NVIDIA CORPORATION. All rights reserved.`
	`1`	`+# Copyright (c) 2020-2025, NVIDIA CORPORATION. All rights reserved.`
`2`	`2`	`#`
`3`	`3`	`# Redistribution and use in source and binary forms, with or without modification, are permitted`
`4`	`4`	`# provided that the following conditions are met:`
Original file line number	Diff line number	Diff line change
`@@ -1,5 +1,5 @@`
`1`	`1`	`/*`
`2`		`- * Copyright (c) 2020-2023, NVIDIA CORPORATION. All rights reserved.`
	`2`	`+ * Copyright (c) 2020-2025, NVIDIA CORPORATION. All rights reserved.`
`3`	`3`	`*`
`4`	`4`	`* Redistribution and use in source and binary forms, with or without modification, are permitted`
`5`	`5`	`* provided that the following conditions are met:`