Skip to content

Commit 0e2771c

Browse files
authored
Merge branch 'main' into fdinatale/trtllm-bench/request_timeline
2 parents 7cc8094 + 1191555 commit 0e2771c

File tree

261 files changed

+9087
-4328
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

261 files changed

+9087
-4328
lines changed

.devcontainer/make_env.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -180,9 +180,11 @@ def main():
180180
env_files = [
181181
JENKINS_PROPS_PATH,
182182
DEV_CONTAINER_ENV_PATH,
183-
DEV_CONTAINER_USER_ENV_PATH,
184183
]
185184

185+
if DEV_CONTAINER_USER_ENV_PATH.exists():
186+
env_files.append(DEV_CONTAINER_USER_ENV_PATH)
187+
186188
env = _load_env(env_files)
187189
_handle_rootless(env_inout=env)
188190

.gitattributes

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
*.a filter=lfs diff=lfs merge=lfs -text
2+
*.dll filter=lfs diff=lfs merge=lfs -text
23
*.lib filter=lfs diff=lfs merge=lfs -text
34
*.so filter=lfs diff=lfs merge=lfs -text
4-
*.dll filter=lfs diff=lfs merge=lfs -text
5+
*.txz filter=lfs diff=lfs merge=lfs -text
56
*.xz filter=lfs diff=lfs merge=lfs -text
67
triton_backend/tools/gpt/input_data.json filter=lfs diff=lfs merge=lfs -text
78
*cubin.cpp filter=lfs diff=lfs merge=lfs -text

.github/scripts/label_community_user.py

Lines changed: 116 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import os
22
import sys
3+
from datetime import datetime, timedelta, timezone
34

45
import requests
56

@@ -97,6 +98,73 @@ def add_label_to_pr(repo_owner: str, repo_name: str, pr_number: str,
9798
raise e
9899

99100

101+
def get_recent_open_prs(repo_owner: str,
102+
repo_name: str,
103+
minutes_back: int = 65):
104+
"""Get open PRs created or updated in the last N minutes."""
105+
cutoff_time = datetime.now(timezone.utc) - timedelta(minutes=minutes_back)
106+
107+
url = f"{GITHUB_API_URL}/repos/{repo_owner}/{repo_name}/pulls"
108+
params = {
109+
"state": "open",
110+
"sort": "updated",
111+
"direction": "desc",
112+
"per_page": 100
113+
}
114+
115+
recent_prs = []
116+
page = 1
117+
118+
try:
119+
while True:
120+
params["page"] = page
121+
response = requests.get(url,
122+
headers=HEADERS,
123+
params=params,
124+
timeout=30)
125+
response.raise_for_status()
126+
page_prs = response.json()
127+
128+
if not page_prs: # no more PRs
129+
break
130+
131+
found_old_pr = False
132+
for pr in page_prs:
133+
created_at = datetime.strptime(
134+
pr["created_at"],
135+
"%Y-%m-%dT%H:%M:%SZ").replace(tzinfo=timezone.utc)
136+
updated_at = datetime.strptime(
137+
pr["updated_at"],
138+
"%Y-%m-%dT%H:%M:%SZ").replace(tzinfo=timezone.utc)
139+
140+
if created_at >= cutoff_time or updated_at >= cutoff_time:
141+
recent_prs.append(pr)
142+
else:
143+
# since sorted by updated desc, once we hit an old PR we can stop
144+
found_old_pr = True
145+
break
146+
147+
if found_old_pr:
148+
break
149+
150+
page += 1
151+
# safety limit to avoid infinite loops
152+
if page > 10: # max 1000 PRs (100 * 10)
153+
print(
154+
f"Warning: Hit pagination limit at page {page}, may have missed some PRs"
155+
)
156+
break
157+
158+
print(
159+
f"Found {len(recent_prs)} PRs created/updated in the last {minutes_back} minutes (checked {page} pages)"
160+
)
161+
return recent_prs
162+
163+
except requests.exceptions.RequestException as e:
164+
print(f"Error fetching PRs: {e}")
165+
raise
166+
167+
100168
def main():
101169
"""
102170
Main function to check user membership and apply community labels.
@@ -106,45 +174,69 @@ def main():
106174
1 - Failed to determine user membership (API permission issues)
107175
2 - Failed to add community label (labeling API issues)
108176
"""
109-
pr_author = os.environ.get("PR_AUTHOR")
110-
assert pr_author, "PR_AUTHOR environment variable not set"
111-
pr_number = os.environ.get("PR_NUMBER")
112-
assert pr_number, "PR_NUMBER environment variable not set"
113177
repo_owner = os.environ.get("REPO_OWNER")
114178
assert repo_owner, "REPO_OWNER environment variable not set"
115179
repo_name = os.environ.get("REPO_NAME")
116180
assert repo_name, "REPO_NAME environment variable not set"
117181
community_label = os.environ.get("COMMUNITY_LABEL")
118182
assert community_label, "COMMUNITY_LABEL environment variable not set"
183+
time_window_minutes = int(os.environ.get("TIME_WINDOW_MINUTES"))
119184

120185
print(
121-
f"Starting NVIDIA membership check for PR author '{pr_author}' on PR #{pr_number}."
186+
f"Starting community PR labeling sweep for {repo_owner}/{repo_name}. Time window: {time_window_minutes} minutes."
122187
)
123188

124189
try:
125-
is_member = check_user_membership("NVIDIA", pr_author)
126-
except RuntimeError as e:
127-
print(
128-
f"Critical error during NVIDIA membership check for '{pr_author}': {e}"
129-
)
130-
print("Halting script due to inability to determine membership status.")
190+
recent_prs = get_recent_open_prs(repo_owner, repo_name,
191+
time_window_minutes)
192+
except requests.exceptions.RequestException:
193+
print("Failed to fetch recent PRs")
131194
sys.exit(1)
132195

133-
print(
134-
f"User '{pr_author}' is determined to be an NVIDIA member: {is_member}")
196+
processed_count = 0
197+
labeled_count = 0
198+
199+
for pr in recent_prs:
200+
pr_number = pr["number"]
201+
pr_author = pr["user"]["login"]
202+
existing_labels = {label["name"] for label in pr["labels"]}
203+
204+
if community_label in existing_labels:
205+
print(
206+
f"PR #{pr_number} by {pr_author} already has community label, skipping"
207+
)
208+
continue
209+
210+
print(f"Processing PR #{pr_number} by {pr_author}")
211+
processed_count += 1
135212

136-
if not is_member:
137-
print(
138-
f"User '{pr_author}' is a community user. Adding label '{community_label}'."
139-
)
140213
try:
141-
add_label_to_pr(repo_owner, repo_name, pr_number, community_label)
142-
except requests.exceptions.RequestException as e:
143-
print(f"Failed to add community label: {e}")
144-
sys.exit(2)
145-
else:
146-
print(
147-
f"User '{pr_author}' is an NVIDIA member. No label will be added.")
214+
is_member = check_user_membership("NVIDIA", pr_author)
215+
except RuntimeError as e:
216+
print(
217+
f"Critical error during NVIDIA membership check for '{pr_author}': {e}"
218+
)
219+
print("Continuing with next PR...")
220+
continue
221+
222+
if not is_member:
223+
print(
224+
f"User '{pr_author}' is a community user. Adding label '{community_label}'."
225+
)
226+
try:
227+
add_label_to_pr(repo_owner, repo_name, str(pr_number),
228+
community_label)
229+
labeled_count += 1
230+
except requests.exceptions.RequestException as e:
231+
print(f"Failed to add community label to PR #{pr_number}: {e}")
232+
# continue with other PRs instead of exiting
233+
continue
234+
else:
235+
print(f"User '{pr_author}' is an NVIDIA member. No label needed.")
236+
237+
print(
238+
f"Sweep complete: processed {processed_count} PRs, labeled {labeled_count} as community"
239+
)
148240

149241

150242
if __name__ == "__main__":

.github/workflows/label_community_pr.yml

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,15 @@
11
name: Label Community PR
22

33
on:
4-
pull_request:
5-
types: [opened]
4+
schedule:
5+
- cron: '0 * * * *' # every hour at minute 0
6+
workflow_dispatch: # manual trigger option
7+
inputs:
8+
time_window_minutes:
9+
description: 'Time window in minutes to look back for PRs'
10+
required: false
11+
default: 65
12+
type: number
613

714
jobs:
815
label_pr:
@@ -22,9 +29,8 @@ jobs:
2229
- name: Run labeling script
2330
env:
2431
AUTO_LABEL_COMMUNITY_TOKEN: ${{ secrets.AUTO_LABEL_COMMUNITY_TOKEN }}
25-
PR_AUTHOR: ${{ github.event.pull_request.user.login }}
26-
PR_NUMBER: ${{ github.event.pull_request.number }}
27-
REPO_OWNER: ${{ github.event.repository.owner.login }}
32+
REPO_OWNER: ${{ github.repository_owner }}
2833
REPO_NAME: ${{ github.event.repository.name }}
2934
COMMUNITY_LABEL: "Community want to contribute"
35+
TIME_WINDOW_MINUTES: ${{ inputs.time_window_minutes || 65 }}
3036
run: python .github/scripts/label_community_user.py

.gitignore

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,9 @@ tensorrt_llm/libs
4040
tensorrt_llm/bindings.*.so
4141
tensorrt_llm/bindings.pyi
4242
tensorrt_llm/bindings/**/*.pyi
43+
tensorrt_llm/deep_ep/
44+
tensorrt_llm/deep_ep_cpp_tllm.*.so
45+
tensorrt_llm/deep_ep_cpp_tllm.pyi
4346
*docs/cpp_docs*
4447
*docs/source/_cpp_gen*
4548
docs/source/**/*.rst
@@ -55,6 +58,7 @@ llm-test-workspace/
5558
*.safetensors
5659
*/tllm_debug/**
5760
*.patch
61+
!cpp/tensorrt_llm/deep_ep/*.patch
5862

5963
# Generated files
6064
cpp/include/tensorrt_llm/executor/version.h

.pre-commit-config.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ repos:
2727
args: [--allow-multiple-documents]
2828
exclude: ".*/gitlab/.*.yml"
2929
- id: trailing-whitespace
30+
exclude: '\.patch$'
3031
- id: check-toml
3132
- id: mixed-line-ending
3233
args: [--fix=lf]

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ TensorRT-LLM
99
[![python](https://img.shields.io/badge/python-3.10-green)](https://www.python.org/downloads/release/python-31012/)
1010
[![cuda](https://img.shields.io/badge/cuda-12.9.0-green)](https://developer.nvidia.com/cuda-downloads)
1111
[![trt](https://img.shields.io/badge/TRT-10.11.0-green)](https://developer.nvidia.com/tensorrt)
12-
[![version](https://img.shields.io/badge/release-1.0.0rc1-green)](./tensorrt_llm/version.py)
12+
[![version](https://img.shields.io/badge/release-1.0.0rc2-green)](./tensorrt_llm/version.py)
1313
[![license](https://img.shields.io/badge/license-Apache%202-blue)](./LICENSE)
1414

1515
[Architecture](./docs/source/torch/arch_overview.md)   |   [Performance](./docs/source/performance/perf-overview.md)   |   [Examples](https://nvidia.github.io/TensorRT-LLM/quick-start-guide.html)   |   [Documentation](./docs/source/)   |   [Roadmap](https://github.com/NVIDIA/TensorRT-LLM/issues?q=is%3Aissue%20state%3Aopen%20label%3Aroadmap)

benchmarks/cpp/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ python3 prepare_dataset.py \
4141
```
4242

4343
For datasets that don't have prompt key, set --dataset-prompt instead.
44-
Take [cnn_dailymail dataset](https://huggingface.co/datasets/cnn_dailymail) for example:
44+
Take [cnn_dailymail dataset](https://huggingface.co/datasets/abisee/cnn_dailymail) for example:
4545
```
4646
python3 prepare_dataset.py \
4747
--tokenizer <path/to/tokenizer> \

cpp/CMakeLists.txt

Lines changed: 40 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ option(FAST_BUILD "Skip compiling some kernels to accelerate compiling" OFF)
3939
option(FAST_MATH "Compiling in fast math mode" OFF)
4040
option(INDEX_RANGE_CHECK "Compiling with index range checks" OFF)
4141
option(COMPRESS_FATBIN "Compress everything in fatbin" ON)
42+
option(TIMING_NVCC "Enable nvcc build timing report" OFF)
4243
option(ENABLE_MULTI_DEVICE
4344
"Enable building with multi device support (requires NCCL, MPI,...)" ON)
4445
option(ENABLE_UCX "Enable building with UCX (Uniform Communication X) support"
@@ -135,10 +136,15 @@ configure_file(
135136
${CMAKE_CURRENT_SOURCE_DIR}/include/tensorrt_llm/executor/version.h)
136137

137138
setup_cuda_compiler()
138-
setup_cuda_architectures()
139139

140140
enable_language(C CXX CUDA)
141141

142+
# Configure CUDA Architectures after enabling CUDA.
143+
144+
# Old CMake rejects family conditional architectures during enable_language, But
145+
# after that CMake handles it just fine.
146+
setup_cuda_architectures()
147+
142148
find_package(CUDAToolkit 11.2 REQUIRED COMPONENTS cudart_static cuda_driver
143149
cublas cublasLt curand nvml)
144150

@@ -323,6 +329,10 @@ endif()
323329
if(COMPRESS_FATBIN)
324330
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --fatbin-options -compress-all")
325331
endif()
332+
if(NVCC_TIMING)
333+
set(CMAKE_CUDA_FLAGS
334+
"${CMAKE_CUDA_FLAGS} --time ${CMAKE_CURRENT_BINARY_DIR}/nvcc-timing.csv")
335+
endif()
326336
message("CMAKE_CUDA_FLAGS: ${CMAKE_CUDA_FLAGS}")
327337

328338
set(COMMON_HEADER_DIRS ${PROJECT_SOURCE_DIR} ${CUDAToolkit_INCLUDE_DIR})
@@ -345,15 +355,6 @@ if(NOT WIN32 AND NOT DEFINED USE_CXX11_ABI)
345355
endif()
346356

347357
if(BUILD_PYT)
348-
# Build TORCH_CUDA_ARCH_LIST
349-
set(TORCH_CUDA_ARCH_LIST "")
350-
foreach(CUDA_ARCH IN LISTS CMAKE_CUDA_ARCHITECTURES)
351-
string(REGEX REPLACE "^([1-9][0-9]*)([0-9]a?)-real$" "\\1.\\2" TORCH_ARCH
352-
${CUDA_ARCH})
353-
list(APPEND TORCH_CUDA_ARCH_LIST ${TORCH_ARCH})
354-
endforeach()
355-
356-
message(STATUS "TORCH_CUDA_ARCH_LIST: ${TORCH_CUDA_ARCH_LIST}")
357358
# ignore values passed from the environment
358359
if(DEFINED ENV{TORCH_CUDA_ARCH_LIST})
359360
message(
@@ -362,6 +363,20 @@ if(BUILD_PYT)
362363
)
363364
endif()
364365
unset(ENV{TORCH_CUDA_ARCH_LIST})
366+
# Torch maintains custom logic to add CUDA architecture flags into
367+
# CMAKE_CUDA_FLAGS based on TORCH_CUDA_ARCH_LIST variable, instead of using
368+
# the native support introduced in newer CMake versions. And it always tries
369+
# to add some flags, even given empty TORCH_CUDA_ARCH_LIST.
370+
371+
# We prefer CMake's native support to be able to easily customize the CUDA
372+
# architectures to be compiled for, for each kernel individually. So we set
373+
# TORCH_CUDA_ARCH_LIST to a placeholder value and remove the generated flags
374+
# then to effectively prevent Torch from adding CUDA architecture flags.
375+
message(
376+
STATUS
377+
"Set TORCH_CUDA_ARCH_LIST to placeholder value \"8.0\" to make Torch happy. "
378+
"This is NOT the list of architectures that will be compiled for.")
379+
set(TORCH_CUDA_ARCH_LIST "8.0")
365380

366381
find_package(Python3 COMPONENTS Interpreter Development REQUIRED)
367382
message(STATUS "Found Python executable at ${Python3_EXECUTABLE}")
@@ -391,7 +406,22 @@ print(os.path.dirname(torch.__file__),end='');"
391406
list(APPEND CMAKE_PREFIX_PATH ${TORCH_DIR})
392407
set(USE_SYSTEM_NVTX ON)
393408
set(nvtx3_dir ${3RDPARTY_DIR}/NVTX/include)
409+
set(CMAKE_CUDA_ARCHITECTURES_BACKUP ${CMAKE_CUDA_ARCHITECTURES})
394410
find_package(Torch REQUIRED)
411+
set(CMAKE_CUDA_ARCHITECTURES ${CMAKE_CUDA_ARCHITECTURES_BACKUP})
412+
message(
413+
STATUS
414+
"Removing Torch generated placeholder CUDA architecture flags: -gencode arch=compute_80,code=sm_80."
415+
)
416+
string(REPLACE "-gencode arch=compute_80,code=sm_80 " "" CMAKE_CUDA_FLAGS_NEW
417+
"${CMAKE_CUDA_FLAGS}")
418+
if("${CMAKE_CUDA_FLAGS_NEW}" STREQUAL "${CMAKE_CUDA_FLAGS}")
419+
message(
420+
FATAL_ERROR
421+
"Torch didn't generate expected placeholder CUDA architecture flags.")
422+
endif()
423+
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS_NEW}")
424+
395425
add_compile_definitions(TORCH_CUDA=1)
396426

397427
if(DEFINED TORCH_CXX_FLAGS)

0 commit comments

Comments
 (0)