PaddlePaddle
diff --git a/‎.clang-tidy‎
Lines changed: 5 additions & 5 deletions b/‎.clang-tidy‎
Lines changed: 5 additions & 5 deletions
diff --git a/‎.flake8‎
Lines changed: 0 additions & 28 deletions b/‎.flake8‎
Lines changed: 0 additions & 28 deletions
diff --git a/‎.github/PULL_REQUEST_TEMPLATE.md‎
Lines changed: 8 additions & 4 deletions b/‎.github/PULL_REQUEST_TEMPLATE.md‎
Lines changed: 8 additions & 4 deletions
diff --git a/‎.gitignore‎
Lines changed: 4 additions & 15 deletions b/‎.gitignore‎
Lines changed: 4 additions & 15 deletions
diff --git a/‎.gitmodules‎
Lines changed: 8 additions & 0 deletions b/‎.gitmodules‎
Lines changed: 8 additions & 0 deletions
diff --git a/‎.pre-commit-config.yaml‎
Lines changed: 2 additions & 7 deletions b/‎.pre-commit-config.yaml‎
Lines changed: 2 additions & 7 deletions
diff --git a/‎CMakeLists.txt‎
Lines changed: 31 additions & 34 deletions b/‎CMakeLists.txt‎
Lines changed: 31 additions & 34 deletions
diff --git a/‎README.md‎
Lines changed: 3 additions & 3 deletions b/‎README.md‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎README_cn.md‎
Lines changed: 4 additions & 4 deletions b/‎README_cn.md‎
Lines changed: 4 additions & 4 deletions
@@ -11,9 +11,9 @@ bugprone-copy-constructor-init,
 bugprone-exception-escape,
 -bugprone-fold-init-type,
 -bugprone-forwarding-reference-overload,
--bugprone-inaccurate-erase,
+bugprone-inaccurate-erase,
 bugprone-incorrect-roundings,
--bugprone-infinite-loop,
+bugprone-infinite-loop,
 bugprone-integer-division,
 -bugprone-macro-repeated-side-effects,
 -bugprone-misplaced-operator-in-strlen-in-alloc,
@@ -28,7 +28,7 @@ bugprone-signed-char-misuse,
 -bugprone-sizeof-container,
 -bugprone-sizeof-expression,
 -bugprone-string-constructor,
--bugprone-string-integer-assignment,
+bugprone-string-integer-assignment,
 -bugprone-string-literal-with-embedded-nul,
 -bugprone-suspicious-enum-usage,
 -bugprone-suspicious-memset-usage,
@@ -123,7 +123,7 @@ clang-analyzer-optin.portability.UnixAPI,
 -clang-analyzer-osx.coreFoundation.CFRetainRelease,
 -clang-analyzer-osx.coreFoundation.containers.OutOfBounds,
 -clang-analyzer-osx.coreFoundation.containers.PointerSizedValues,
--clang-analyzer-security.FloatLoopCounter,
+clang-analyzer-security.FloatLoopCounter,
 -clang-analyzer-security.insecureAPI.DeprecatedOrUnsafeBufferHandling,
 -clang-analyzer-security.insecureAPI.SecuritySyntaxChecker,
 -clang-analyzer-security.insecureAPI.UncheckedReturn,
@@ -198,7 +198,7 @@ performance-move-const-arg,
 -performance-move-constructor-init,
 -performance-no-automatic-move,
 performance-noexcept-move-constructor,
--performance-trivially-destructible,
+performance-trivially-destructible,
 -performance-type-promotion-in-math-fn,
 -performance-unnecessary-copy-initialization,
 readability-container-size-empty,
 
@@ -1,9 +1,13 @@
+<!-- TemplateReference: https://github.com/PaddlePaddle/Paddle/wiki/PULL-REQUEST-TEMPLATE--REFERENCE -->
 <!-- Demo: https://github.com/PaddlePaddle/Paddle/pull/24810 -->
-### PR types
-<!-- One of [ New features | Bug fixes | Function optimization | Performance optimization | Breaking changes | Others ] -->
 
-### PR changes
-<!-- One of [ OPs | APIs | Docs | Others ] -->
+### PR Category
+<!-- One of [ User Experience | Execute Infrastructure | Operator Mechanism | CINN | Custom Device | Performance Optimization | Distributed Strategy | Parameter Server | Communication Library | Auto Parallel | Inference | Environment Adaptation | Others ] -->
+
+
+### PR Types
+<!-- One of [ New features | Bug fixes | Improvements | Performance | BC Breaking | Deprecations | Docs | Devs | Not User Facing | Security | Deprecations | Others ] -->
+
 
 ### Description
 <!-- Describe what you’ve done -->
@@ -33,6 +33,9 @@ paddle/phi/include/*
 paddle/phi/infermeta/generated.*
 paddle/fluid/prim/api/generated_prim/*.cc
 paddle/fluid/prim/api/generated_prim/*.h
+python/paddle/libs/bfloat16.h
+python/paddle/libs/cinn_cuda_runtime_source.cuh
+python/paddle/libs/float16.h
 *.DS_Store
 *.vs
 build/
@@ -78,7 +81,7 @@ tools/nvcc_lazy
 paddle/phi/kernels/sparse/gpu/cutlass_generator/all_gemm_operations.h
 paddle/phi/kernels/sparse/gpu/cutlass_generator/configurations.h
 
-# these files (directories) are generated before build system generation
+#these files (directories) are generated before build system generation
 paddle/fluid/operators/generated_op*.cc
 paddle/fluid/operators/generated_sparse_op.cc
 paddle/fluid/operators/generated_static_op.cc
@@ -104,17 +107,3 @@ paddle/phi/kernels/fusion/cutlass/cutlass_kernels/fpA_intB_gemm/autogen/*
 paddle/phi/kernels/fusion/cutlass/cutlass_kernels/fpA_intB_gemm/autogen_tmp/*
 paddle/fluid/pybind/static_op_function.*
 paddle/fluid/pybind/ops_api.cc
-paddle/fluid/pir/dialect/operator/ir/pd_api.*
-paddle/fluid/pir/dialect/operator/ir/op_decomp.cc
-paddle/fluid/pir/dialect/operator/ir/pd_op_vjp.cc
-paddle/fluid/pir/dialect/operator/ir/pd_op.*
-paddle/fluid/pir/dialect/operator/ir/pd_op_bwd.*
-paddle/fluid/pir/dialect/operator/ir/pd_op_fused.*
-paddle/fluid/pir/dialect/operator/ir/pd_op_fused_bwd.*
-paddle/fluid/pir/dialect/operator/ir/pd_pir_op.*
-paddle/fluid/pir/dialect/operator/ir/pd_pir_op_bwd.*
-paddle/fluid/pir/dialect/operator/ir/pd_pir_op_update.*
-paddle/fluid/pir/dialect/operator/ir/pd_op_info.*
-paddle/cinn/hlir/dialect/generated/ops.parsed.yaml
-paddle/cinn/hlir/dialect/operator/ir/cinn_op.*
-paddle/cinn/hlir/dialect/operator/ir/cinn_op_info.*
@@ -110,3 +110,11 @@
 path = third_party/cccl
 url = https://github.com/NVIDIA/cccl.git
 ignore = dirty
+[submodule "third_party/cryptopp"]
+path = third_party/cryptopp
+url = https://github.com/weidai11/cryptopp.git
+ignore = dirty
+[submodule "third_party/cryptopp-cmake"]
+path = third_party/cryptopp-cmake
+url = https://github.com/noloader/cryptopp-cmake.git
+ignore = dirty
@@ -36,7 +36,7 @@ repos:
  # Exclude some unit test files that require tabs.
  exclude: |
  (?x)^(
- test/dygraph_to_static/test_legacy_error.py
+ test/dygraph_to_static/test_error.py
  )$
 - repo: local
  hooks:
@@ -56,13 +56,8 @@ repos:
  hooks:
  - id: black
  files: (.*\.(py|pyi|bzl)|BUILD|.*\.BUILD|WORKSPACE)$
-- repo: https://github.com/PyCQA/flake8
- rev: 5.0.4
- hooks:
- - id: flake8
- args: ["--config=.flake8"]
 - repo: https://github.com/astral-sh/ruff-pre-commit
- rev: v0.1.5
+ rev: v0.3.0
  hooks:
  - id: ruff
  args: [--fix, --exit-non-zero-on-fix, --no-cache]
 
@@ -54,17 +54,20 @@ option(WITH_XPU "Compile PaddlePaddle with BAIDU KUNLUN XPU" OFF)
 option(WITH_XPU_KP "Compile PaddlePaddle with BAIDU XPU compiler " OFF)
 option(WITH_XPU_XFT "Compile PaddlePaddle with BAIDU XPU-XFT" OFF)
 option(WITH_XPU_PLUGIN "Compile PaddlePaddle with BAIDU XPU plugin" OFF)
-option(WITH_XPU_XHPC "Compile PaddlePaddle with BAIDU XPU-HPC library" OFF)
+option(WITH_XPU_XHPC "Compile PaddlePaddle with BAIDU XPU-HPC library"
+ ${WITH_XPU})
 option(WITH_WIN_DUMP_DBG "Compile with windows core dump debug mode" OFF)
 option(WITH_ROCM "Compile PaddlePaddle with ROCM platform" OFF)
 option(WITH_IPU "Compile PaddlePaddle with Graphcore IPU" OFF)
 option(WITH_ONNXRUNTIME "Compile PaddlePaddle with ONNXRUNTIME" OFF)
 option(WITH_CUSPARSELT "Compile PaddlePaddle with CUSPARSELT" OFF)
 option(WITH_SETUP_INSTALL "Compile PaddlePaddle with setup.py" OFF)
 option(WITH_SHARED_PHI "Compile PaddlePaddle with SHARED LIB of PHI" ON)
-option(CINN_ONLY "Compile CINN only in Paddle" OFF)
 option(CINN_WITH_CUDNN "Compile CINN with CUDNN support" ON)
-
+option(WITH_PIP_CUDA_LIBRARIES
+ "Paddle uses the CUDA library provided by NVIDIA" OFF)
+option(WITH_NIGHTLY_BUILD
+ "Compile nightly paddle whl package of the develop branch" OFF)
 find_package(Git REQUIRED)
 
 # config GIT_URL with github mirrors to speed up dependent repos clone
@@ -95,6 +98,18 @@ if(WITH_GPU AND WITH_ROCM)
 endif()
 
 if(WITH_GPU AND NOT APPLE)
+ #(Note risemeup1): The cudart dynamic library libcudart.so is used by set CUDA_USE_STATIC_CUDA_RUNTIME and CMAKE_CUDA_FLAGS
+ if(CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL
+ "x86_64")
+ set(CUDA_USE_STATIC_CUDA_RUNTIME
+ OFF
+ CACHE BOOL "" FORCE)
+ set(CMAKE_CUDA_FLAGS "--cudart shared")
+ if(WITH_PIP_CUDA_LIBRARIES)
+ #(Note risemeup1): Flag 'WITH_PIP_CUDA_LIBRARIES' will be used in dynamic_loader.cc to search for CUDA-related .so files through the Python libraries provided by NVIDIA.
+ add_definitions(-DWITH_PIP_CUDA_LIBRARIES)
+ endif()
+ endif()
  enable_language(CUDA)
  message(STATUS "CUDA compiler: ${CMAKE_CUDA_COMPILER}, version: "
  "${CMAKE_CUDA_COMPILER_ID} ${CMAKE_CUDA_COMPILER_VERSION}")
@@ -112,6 +127,7 @@ if((CMAKE_CXX_COMPILER_ID STREQUAL "GNU") AND CMAKE_CXX_COMPILER_VERSION
 endif()
 
 # MUSL build turn off warnings
+
 if(WITH_MUSL)
  set(CMAKE_CXX_FLAGS
  "${CMAKE_CXX_FLAGS} -Wno-error=deprecated-declarations -Wno-deprecated-declarations -Wno-error=pessimizing-move -Wno-error=deprecated-copy"
@@ -126,7 +142,10 @@ endif()
 if(WIN32)
  option(MSVC_STATIC_CRT "use static C Runtime library by default" ON)
  message("Build static library of PHI")
- set(CMAKE_SUPPRESS_REGENERATION ON)
+ # (Note xuxinyi04): If CMAKE_SUPPRESS_REGENERATION is OFF, which is default, then CMake adds a
+ # special target on which all other targets depend that checks the build system and optionally
+ # re-runs CMake to regenerate the build system when the target specification source changes.
+ set(CMAKE_SUPPRESS_REGENERATION OFF)
  set(CMAKE_STATIC_LIBRARY_PREFIX lib)
  set(WITH_SHARED_PHI
  OFF
@@ -224,6 +243,8 @@ if(WIN32)
  "${${flag_var}} /ignore:4049 /ignore:4217 /ignore:4006 /ignore:4221")
  if(MSVC_STATIC_CRT)
  set(${flag_var} "${${flag_var}} /NODEFAULTLIB:MSVCRT.LIB")
+ else()
+ set(${flag_var} "${${flag_var}} /NODEFAULTLIB:LIBCMT.LIB")
  endif()
  endforeach()
 
@@ -314,12 +335,11 @@ option(WITH_FLPS "FL PS mode" OFF)
 option(WITH_RPC "Compile with rpc support" ${WITH_DISTRIBUTE})
 option(WITH_CUDNN_FRONTEND
  "Compile with CUDNN Frontend API support (experimental)" OFF)
-option(WITH_CUDNN_DSO "Compile PaddlePaddle with cuDNN dynamic-link libraries"
- OFF)
 option(WITH_SHARED_IR "Compile PaddlePaddle with SHARED LIB of IR" ON)
 option(WITH_NVCC_LAZY
  "Compile PaddlePaddle with nvcc lazy mode, used for CI-Inference only."
  ON)
+option(BUILD_WHL_PACKAGE "Build paddle whl package after compilation" ON)
 
 if(WITH_RECORD_BUILDTIME)
  set_property(
@@ -410,16 +430,6 @@ if(NOT WITH_GPU AND WITH_NCCL)
  CACHE STRING "Disable NCCL when compiling without GPU" FORCE)
 endif()
 
-if(NOT WITH_GPU AND WITH_CUDNN_DSO)
- message(
- WARNING
- "Can't compile with cuDNN libraries when compiling without GPU. Force WITH_CUDNN_DSO=OFF."
- )
- set(WITH_CUDNN_DSO
- OFF
- CACHE STRING "Disable cuDNN libraries when compiling without GPU" FORCE)
-endif()
-
 # force WITH_XPU on when WITH_XPU_KP
 if(WITH_XPU_KP AND NOT WITH_XPU)
  message(
@@ -598,7 +608,10 @@ include(third_party
 )# download, build, install third_party, Contains about 20+ dependencies
 
 include(flags) # set paddle compile flags
-
+include(util) # set unittest and link libs
+include(version) # set PADDLE_VERSION
+include(coveralls) # set code coverage
+include(configure) # add paddle env configuration
 #------------- cinn cmake config start --------------
 
 if(WITH_CINN)
@@ -614,20 +627,9 @@ if(WITH_CINN)
  add_definitions(-DCINN_WITH_CUDNN)
  endif()
  endif()
+
  include(cmake/cinn.cmake)
  add_definitions(-DPADDLE_WITH_CINN)
-
- if(CINN_ONLY)
- add_definitions(-DCINN_WITH_ONLY)
- if(WITH_PYTHON)
- add_subdirectory(python)
- endif()
- add_subdirectory(test)
- if(NOT WITH_GFLAGS)
- add_subdirectory(paddle/utils)
- endif()
- return()
- endif()
 endif()
 
 #------------- cinn cmake config end --------------
@@ -638,11 +640,6 @@ if(WITH_PROFILER)
  add_definitions(-DWITH_GPERFTOOLS)
 endif()
 
-include(util) # set unittest and link libs
-include(version) # set PADDLE_VERSION
-include(coveralls) # set code coverage
-include(configure) # add paddle env configuration
-
 include_directories("${PADDLE_SOURCE_DIR}")
 
 if(WITH_NV_JETSON)
 
@@ -15,11 +15,11 @@ English | [简体中文](./README_cn.md) | [日本語](./README_ja.md)
 Welcome to the PaddlePaddle GitHub.
 
 PaddlePaddle, as the first independent R&D deep learning platform in China, has been officially open-sourced to professional communities since 2016. It is an industrial platform with advanced technologies and rich features that cover core deep learning frameworks, basic model libraries, end-to-end development kits, tools & components as well as service platforms.
-PaddlePaddle is originated from industrial practices with dedication and commitments to industrialization. It has been widely adopted by a wide range of sectors including manufacturing, agriculture, enterprise service, and so on while serving more than 8 million developers, 220,000 companies and generating 800,000 models. With such advantages, PaddlePaddle has helped an increasing number of partners commercialize AI.
+PaddlePaddle is originated from industrial practices with dedication and commitments to industrialization. It has been widely adopted by a wide range of sectors including manufacturing, agriculture, enterprise service, and so on while serving more than 10.7 million developers, 235,000 companies and generating 860,000 models. With such advantages, PaddlePaddle has helped an increasing number of partners commercialize AI.
 
 ## Installation
 
-### Latest PaddlePaddle Release: [v2.5](https://github.com/PaddlePaddle/Paddle/tree/release/2.5)
+### Latest PaddlePaddle Release: [v2.6](https://github.com/PaddlePaddle/Paddle/tree/release/2.6)
 
 Our vision is to enable deep learning for everyone via PaddlePaddle.
 Please refer to our [release announcement](https://github.com/PaddlePaddle/Paddle/releases) to track the latest features of PaddlePaddle.
@@ -50,7 +50,7 @@ Now our developers can acquire Tesla V100 online computing resources for free. I
 
 - **High-Performance Inference Engines for Comprehensive Deployment Environments**
 
- PaddlePaddle is not only compatible with models trained in 3rd party open-source frameworks , but also offers complete inference products for various production scenarios. Our inference product line includes [Paddle Inference](https://paddle-inference.readthedocs.io/en/master/guides/introduction/index_intro.html): Native inference library for high-performance server and cloud inference; [Paddle Serving](https://github.com/PaddlePaddle/Serving): A service-oriented framework suitable for distributed and pipeline productions; [Paddle Lite](https://github.com/PaddlePaddle/Paddle-Lite): Ultra-Lightweight inference engine for mobile and IoT environments; [Paddle.js](https://www.paddlepaddle.org.cn/paddle/paddlejs): A frontend inference engine for browser and mini-apps. Furthermore, by great amounts of optimization with leading hardware in each scenario, Paddle inference engines outperform most of the other mainstream frameworks.
+ PaddlePaddle is not only compatible with models trained in 3rd party open-source frameworks , but also offers complete inference products for various production scenarios. Our inference product line includes [Paddle Inference](https://www.paddlepaddle.org.cn/inference/master/guides/introduction/index_intro.html): Native inference library for high-performance server and cloud inference; [FastDeploy](https://github.com/PaddlePaddle/FastDeploy): Easy-to-use and High Performance AI model deployment toolkit for Cloud, Mobile and Edge without-of-the-box and unified experience; [Paddle Lite](https://github.com/PaddlePaddle/Paddle-Lite): Ultra-Lightweight inference engine for mobile and IoT environments; [Paddle.js](https://www.paddlepaddle.org.cn/paddle/paddlejs): A frontend inference engine for browser and mini-apps. Furthermore, by great amounts of optimization with leading hardware in each scenario, Paddle inference engines outperform most of the other mainstream frameworks.
 
 - **Industry-Oriented Models and Libraries with Open Source Repositories**
 
 
@@ -14,13 +14,13 @@
 
 欢迎来到 PaddlePaddle GitHub
 
-飞桨(PaddlePaddle)以百度多年的深度学习技术研究和业务应用为基础，是中国首个自主研发、功能完备、 开源开放的产业级深度学习平台，集深度学习核心训练和推理框架、基础模型库、端到端开发套件和丰富的工具组件于一体。目前，飞桨累计开发者800万，服务企业22万家，基于飞桨开源深度学习平台产生了80万个模型。飞桨助力开发者快速实现AI想法，快速上线AI业务。帮助越来越多的行业完成AI赋能，实现产业智能化升级。
+飞桨(PaddlePaddle)以百度多年的深度学习技术研究和业务应用为基础，是中国首个自主研发、功能完备、 开源开放的产业级深度学习平台，集深度学习核心训练和推理框架、基础模型库、端到端开发套件和丰富的工具组件于一体。目前，飞桨累计开发者1070万，服务企业23.5万家，基于飞桨开源深度学习平台产生了86万个模型。飞桨助力开发者快速实现AI想法，快速上线AI业务。帮助越来越多的行业完成AI赋能，实现产业智能化升级。
 
 ## 安装
 
-### PaddlePaddle最新版本: [v2.5](https://github.com/PaddlePaddle/Paddle/tree/release/2.5)
+### PaddlePaddle 最新版本: [v2.6](https://github.com/PaddlePaddle/Paddle/tree/release/2.6)
 
-跟进PaddlePaddle最新特性请参考我们的[版本说明](https://github.com/PaddlePaddle/Paddle/releases)
+跟进 PaddlePaddle 最新特性请参考我们的[版本说明](https://github.com/PaddlePaddle/Paddle/releases)
 
 ### 安装最新稳定版本
 
@@ -48,7 +48,7 @@ PaddlePaddle用户可领取**免费Tesla V100在线算力资源**，训练模型
 
 - **支持多端多平台的高性能推理部署工具**
 
- 飞桨不仅广泛兼容第三方开源框架训练的模型部署，并且为不同的场景的生产环境提供了完备的推理引擎，包括适用于高性能服务器及云端推理的原生推理库 [Paddle Inference](https://www.paddlepaddle.org.cn/inference/product_introduction/inference_intro.html)，面向分布式、流水线生产环境下自动上云、A/B测试等高阶功能的服务化推理框架 [Paddle Serving](https://github.com/PaddlePaddle/Serving)，针对于移动端、物联网场景的轻量化推理引擎 [Paddle Lite](https://github.com/PaddlePaddle/Paddle-Lite)，以及在浏览器、小程序等环境下使用的前端推理引擎 [Paddle.js](https://www.paddlepaddle.org.cn/paddle/paddlejs)。同时，透过与不同场景下的主流硬件高度适配优化及异构计算的支持, 飞桨的推理性能也领先绝大部分的主流实现。
+ 飞桨不仅广泛兼容第三方开源框架训练的模型部署，并且为不同的场景的生产环境提供了完备的推理引擎，包括适用于高性能服务器及云端推理的原生推理库 [Paddle Inference](https://www.paddlepaddle.org.cn/inference/master/guides/introduction/index_intro.html)，全场景、易用灵活、极致高效的AI推理部署工具，支持云边端部署工具 [FastDeploy](https://github.com/PaddlePaddle/FastDeploy)，针对于移动端、物联网场景的轻量化推理引擎 [Paddle Lite](https://github.com/PaddlePaddle/Paddle-Lite)，以及在浏览器、小程序等环境下使用的前端推理引擎 [Paddle.js](https://www.paddlepaddle.org.cn/paddle/paddlejs)。同时，透过与不同场景下的主流硬件高度适配优化及异构计算的支持, 飞桨的推理性能也领先绝大部分的主流实现。
 
 - **面向产业应用，开源开放覆盖多领域的工业级模型库。**