There was an error while loading. Please reload this page.
1 parent e6194b1 commit 207e34aCopy full SHA for 207e34a
paddle/cinn/backends/codegen_gpu_dev.cc
@@ -244,6 +244,15 @@ void CodeGenGpuDev::PrintFunctionDeclaration(const ir::_LoweredFunc_ *op) {
244
if (!has_symbol_in_thread_num) {
245
str_ += "__launch_bounds__(";
246
str_ += std::to_string(thread_num);
247
+ // Explicitly set min_blocks_per_sm for grid reduce to prevent launch
248
+ // failure.
249
+ if (!op->temp_spaces.empty()) {
250
+ int min_blocks_per_sm = 1024 / thread_num;
251
+ if (min_blocks_per_sm > 1) {
252
+ str_ += ", ";
253
+ str_ += std::to_string(min_blocks_per_sm);
254
+ }
255
256
str_ += ") ";
257
}
258
0 commit comments