PaddlePaddle · luotao1 · Aug 4, 2025 · Aug 3, 2025
@@ -322,7 +322,7 @@ option(WITH_ARM "Compile PaddlePaddle with arm support" OFF)
 option(WITH_SW "Compile PaddlePaddle with sw support" OFF)
 option(WITH_MIPS "Compile PaddlePaddle with mips support" OFF)
 option(WITH_LOONGARCH "Compile PaddlePaddle with loongarch support" OFF)
-option(WITH_MUSL "Compile with musl libc instead of gblic" OFF)
+option(WITH_MUSL "Compile with musl libc instead of glibc" OFF)
 option(WITH_UNITY_BUILD "Compile with UnityBuild mode" OFF)
 option(WITH_STRIP "Strip so files of Whl packages" OFF)
 option(NEW_RELEASE_PYPI

diff --git a/python/paddle/distributed/auto_parallel/pipelining/stage.py b/python/paddle/distributed/auto_parallel/pipelining/stage.py
@@ -282,7 +282,7 @@ def _create_grad_send_info(
  def map_recv_to_send(a):
  # Note: we send gradients back to previous stage as long as in
  # forward it is a received input, regardless of whether it requires
- # grad. It is up to the previous stage to disgard this gradient.
+ # grad. It is up to the previous stage to discard this gradient.
  if isinstance(a, _RecvInfo):
  grad_send_info.append(a.source)
  return a.source

diff --git a/python/paddle/distributed/auto_parallel/static/utils.py b/python/paddle/distributed/auto_parallel/static/utils.py
@@ -373,7 +373,7 @@ def _coordinate2linear_idx(mesh_shape, coordinate):
  # that the processes in mesh are
  # 1. starts from 0
  # 2. continuous
- # it will be wrong if ths above condition does not meet,
+ # it will be wrong if the above condition does not meet,
  # e.g. process_mesh = { process_groups = [7, 8, 9,10, 12, 13, 14, 15], mesh = [2, 4]}
  # if you want a more general mapping, you should use cartesian product
 

diff --git a/python/paddle/incubate/optimizer/functional/bfgs.py b/python/paddle/incubate/optimizer/functional/bfgs.py
@@ -81,7 +81,7 @@ def minimize_bfgs(
 
  - is_converge (bool): Indicates whether found the minimum within tolerance.
  - num_func_calls (int): number of objective function called.
- - position (Tensor): the position of the last iteration. If the search converged, this value is the argmin of the objective function regrading to the initial position.
+ - position (Tensor): the position of the last iteration. If the search converged, this value is the argmin of the objective function regarding to the initial position.
  - objective_value (Tensor): objective function value at the `position`.
  - objective_gradient (Tensor): objective function gradient at the `position`.
  - inverse_hessian_estimate (Tensor): the estimate of inverse hessian at the `position`.

diff --git a/python/paddle/incubate/optimizer/functional/lbfgs.py b/python/paddle/incubate/optimizer/functional/lbfgs.py
@@ -83,7 +83,7 @@ def minimize_lbfgs(
 
  - is_converge (bool): Indicates whether found the minimum within tolerance.
  - num_func_calls (int): number of objective function called.
- - position (Tensor): the position of the last iteration. If the search converged, this value is the argmin of the objective function regrading to the initial position.
+ - position (Tensor): the position of the last iteration. If the search converged, this value is the argmin of the objective function regarding to the initial position.
  - objective_value (Tensor): objective function value at the `position`.
  - objective_gradient (Tensor): objective function gradient at the `position`.
 

diff --git a/python/paddle/incubate/optimizer/recompute.py b/python/paddle/incubate/optimizer/recompute.py
@@ -390,7 +390,7 @@ def _parse_backward(self):
  self._record_fetch_op(idx)
  )
 
- # should check the current used checkpoint is ths last fetch one
+ # should check the current used checkpoint is the last fetch one
  assert (
  second_to_last_fetch_checkpoint == input_var
  ), f"Current recompute segment should use [{second_to_last_fetch_checkpoint}] BUT got [{input_var}]"

diff --git a/python/paddle/nn/functional/flash_attention.py b/python/paddle/nn/functional/flash_attention.py
@@ -1156,7 +1156,7 @@ def flash_attn_varlen_func(
  "FLAGS_flash_attn_version"
  ]
  == 3
- ), "FLAGS_flash_attn_version is 2, conflits with flash_attn_varlen_func"
+ ), "FLAGS_flash_attn_version is 2, conflicts with flash_attn_varlen_func"
 
  assert (
  in_dynamic_or_pir_mode()
@@ -1650,7 +1650,7 @@ def flashmask_attention(
  If causal mode is disabled, Query at position i will only attend to keys between [i - window_size, i + window_size] or [i - window_size[0], i + window_size[1]].
  return_softmax_lse (bool): Whether to return the log-sum-exp of the softmax. Default is False.
  return_seed_offset (bool): Whether to return the random seed offset. Default is False.
- fixed_seed_of fset(Tensor, optional): With fixed seed, offset for dropout mask.
+ fixed_seed_offset(Tensor, optional): With fixed seed, offset for dropout mask.
  rng_name (str): The name to select Generator.
  training (bool): Whether the module is in training mode. Default is True.
  name (str, optional): Name of the operation. Default is None. Normally, users do not need to set this property.

diff --git a/setup.py b/setup.py
@@ -273,7 +273,7 @@ def run(self):
  egg_info.run(self)
 
 
-# class Installlib is rewritten to add header files to .egg/paddle
+# class InstallLib is rewritten to add header files to .egg/paddle
 class InstallLib(install_lib):
  def run(self):
  self.build()

diff --git a/test/ir/inference/test_trt_convert_cross_multihead_matmul.py b/test/ir/inference/test_trt_convert_cross_multihead_matmul.py
@@ -293,7 +293,7 @@ def teller1(program_config, predictor_config):
  self.add_skip_case(
  teller1,
  SkipReasons.TRT_NOT_IMPLEMENTED,
- "TThe cross attention trt oss plugin do not support static shape yet",
+ "The cross attention trt oss plugin do not support static shape yet",
  )
 
  def teller2(program_config, predictor_config):

diff --git a/test/ir/inference/test_trt_convert_flash_multihead_matmul.py b/test/ir/inference/test_trt_convert_flash_multihead_matmul.py
@@ -294,7 +294,7 @@ def teller1(program_config, predictor_config):
  self.add_skip_case(
  teller1,
  SkipReasons.TRT_NOT_IMPLEMENTED,
- "TThe flash attention trt oss plugin do not support static shape yet",
+ "The flash attention trt oss plugin do not support static shape yet",
  )
 
  def teller2(program_config, predictor_config):
@@ -603,7 +603,7 @@ def teller1(program_config, predictor_config):
  self.add_skip_case(
  teller1,
  SkipReasons.TRT_NOT_IMPLEMENTED,
- "TThe flash attention trt oss plugin do not support static shape yet",
+ "The flash attention trt oss plugin do not support static shape yet",
  )
 
  def teller2(program_config, predictor_config):

diff --git a/test/legacy_test/test_fused_dconv_drelu_dbn_op.py b/test/legacy_test/test_fused_dconv_drelu_dbn_op.py
@@ -423,7 +423,7 @@ def init_attr(self):
  self.exhaustive_search = False
 
 
-@skip_check_grad_ci(reason="no grap op")
+@skip_check_grad_ci(reason="no grad op")
 @unittest.skipIf(skip_unit_test(), skip_msg)
 class TestFusedDconvDreluDbnOpShortcut(TestFusedDconvDreluDbnOp):
  def init_attr(self):
@@ -433,7 +433,7 @@ def init_attr(self):
  self.exhaustive_search = False
 
 
-@skip_check_grad_ci(reason="no grap op")
+@skip_check_grad_ci(reason="no grad op")
 @unittest.skipIf(skip_unit_test(), skip_msg)
 class TestFusedDconvDreluDbnOpDual(TestFusedDconvDreluDbnOp):
  def init_attr(self):
@@ -443,7 +443,7 @@ def init_attr(self):
  self.exhaustive_search = False
 
 
-@skip_check_grad_ci(reason="no grap op")
+@skip_check_grad_ci(reason="no grad op")
 @unittest.skipIf(skip_unit_test(), skip_msg)
 class TestFusedDconvDreluDbnOpShortcutAdd(TestFusedDconvDreluDbnOp):
  def init_attr(self):
@@ -453,7 +453,7 @@ def init_attr(self):
  self.exhaustive_search = False
 
 
-@skip_check_grad_ci(reason="no grap op")
+@skip_check_grad_ci(reason="no grad op")
 @unittest.skipIf(skip_unit_test(), skip_msg)
 class TestFusedDconvDreluDbnOpDualAdd(TestFusedDconvDreluDbnOp):
  def init_attr(self):
@@ -463,7 +463,7 @@ def init_attr(self):
  self.exhaustive_search = False
 
 
-@skip_check_grad_ci(reason="no grap op")
+@skip_check_grad_ci(reason="no grad op")
 @unittest.skipIf(skip_unit_test(), skip_msg)
 class TestFusedDconvDreluDbnOpExhaustive(TestFusedDconvDreluDbnOp):
  def init_attr(self):

diff --git a/test/legacy_test/test_fused_gemm_epilogue_grad_op.py b/test/legacy_test/test_fused_gemm_epilogue_grad_op.py
@@ -41,7 +41,7 @@ def get_outputs(DOut, X, Y):
  return DX, DY, DBias
 
 
-@skip_check_grad_ci(reason="no grap op")
+@skip_check_grad_ci(reason="no grad op")
 @unittest.skipIf(
  not core.is_compiled_with_cuda() or not is_rocm_gfx928(),
  "core is not compiled with CUDA",
@@ -79,7 +79,7 @@ def test_check_output(self):
  )
 
 
-@skip_check_grad_ci(reason="no grap op")
+@skip_check_grad_ci(reason="no grad op")
 @unittest.skipIf(
  not core.is_compiled_with_cuda() or not is_rocm_gfx928(),
  "core is not compiled with CUDA",
@@ -92,7 +92,7 @@ def init_dtype_type(self):
  self.atol = 1e-6
 
 
-@skip_check_grad_ci(reason="no grap op")
+@skip_check_grad_ci(reason="no grad op")
 @unittest.skipIf(
  not core.is_compiled_with_cuda() or core.is_compiled_with_rocm(),
  "core is not compiled with CUDA or is compiled with ROCm",
@@ -105,7 +105,7 @@ def init_dtype_type(self):
  self.atol = 1e-6
 
 
-@skip_check_grad_ci(reason="no grap op")
+@skip_check_grad_ci(reason="no grad op")
 @unittest.skipIf(
  not core.is_compiled_with_cuda() or not is_rocm_gfx928(),
  "core is not compiled with CUDA",
@@ -143,7 +143,7 @@ def test_check_output(self):
  )
 
 
-@skip_check_grad_ci(reason="no grap op")
+@skip_check_grad_ci(reason="no grad op")
 @unittest.skipIf(
  not core.is_compiled_with_cuda() or not is_rocm_gfx928(),
  "core is not compiled with CUDA",
@@ -156,7 +156,7 @@ def init_dtype_type(self):
  self.atol = 1e-6
 
 
-@skip_check_grad_ci(reason="no grap op")
+@skip_check_grad_ci(reason="no grad op")
 @unittest.skipIf(
  not core.is_compiled_with_cuda() or core.is_compiled_with_rocm(),
  "core is not compiled with CUDA or is compiled with ROCm",
@@ -169,7 +169,7 @@ def init_dtype_type(self):
  self.atol = 1e-6
 
 
-@skip_check_grad_ci(reason="no grap op")
+@skip_check_grad_ci(reason="no grad op")
 @unittest.skipIf(
  not core.is_compiled_with_cuda() or not is_rocm_gfx928(),
  "core is not compiled with CUDA",
@@ -207,7 +207,7 @@ def test_check_output(self):
  )
 
 
-@skip_check_grad_ci(reason="no grap op")
+@skip_check_grad_ci(reason="no grad op")
 @unittest.skipIf(
  not core.is_compiled_with_cuda() or not is_rocm_gfx928(),
  "core is not compiled with CUDA",
@@ -218,7 +218,7 @@ def init_dtype_type(self):
  self.atol = 1e-6
 
 
-@skip_check_grad_ci(reason="no grap op")
+@skip_check_grad_ci(reason="no grad op")
 @unittest.skipIf(
  not core.is_compiled_with_cuda() or core.is_compiled_with_rocm(),
  "core is not compiled with CUDA or is compiled with ROCm",
@@ -229,7 +229,7 @@ def init_dtype_type(self):
  self.atol = 1e-6
 
 
-@skip_check_grad_ci(reason="no grap op")
+@skip_check_grad_ci(reason="no grad op")
 @unittest.skipIf(
  not core.is_compiled_with_cuda() or not is_rocm_gfx928(),
  "core is not compiled with CUDA",
@@ -267,7 +267,7 @@ def test_check_output(self):
  )
 
 
-@skip_check_grad_ci(reason="no grap op")
+@skip_check_grad_ci(reason="no grad op")
 @unittest.skipIf(
  not core.is_compiled_with_cuda() or not is_rocm_gfx928(),
  "core is not compiled with CUDA",
@@ -278,7 +278,7 @@ def init_dtype_type(self):
  self.atol = 1e-6
 
 
-@skip_check_grad_ci(reason="no grap op")
+@skip_check_grad_ci(reason="no grad op")
 @unittest.skipIf(
  not core.is_compiled_with_cuda() or core.is_compiled_with_rocm(),
  "core is not compiled with CUDA or is compiled with ROCm",