Skip to content

Commit a071ccb

Browse files
authored
fix NCCL makefile for CUDA 7.5 (pytorch#1401)
1 parent db1eb66 commit a071ccb

File tree

1 file changed

+12
-3
lines changed

1 file changed

+12
-3
lines changed

torch/lib/nccl/Makefile

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,13 +17,25 @@ CUDA_LIB ?= $(CUDA_HOME)/lib64
1717
CUDA_INC ?= $(CUDA_HOME)/include
1818
NVCC ?= $(CUDA_HOME)/bin/nvcc
1919

20+
CUDA_VERSION ?= $(shell ls $(CUDA_LIB)/libcudart.so.* | head -1 | rev | cut -d "." -f -2 | rev)
21+
CUDA_MAJOR = $(shell echo $(CUDA_VERSION) | cut -d "." -f 1)
22+
CUDA_MINOR = $(shell echo $(CUDA_VERSION) | cut -d "." -f 2)
23+
24+
ifeq ($(CUDA_MAJOR), 7)
25+
NVCC_GENCODE ?= -gencode=arch=compute_30,code=sm_30 \
26+
-gencode=arch=compute_35,code=sm_35 \
27+
-gencode=arch=compute_50,code=sm_50 \
28+
-gencode=arch=compute_52,code=sm_52 \
29+
-gencode=arch=compute_52,code=compute_52
30+
else
2031
NVCC_GENCODE ?= -gencode=arch=compute_30,code=sm_30 \
2132
-gencode=arch=compute_35,code=sm_35 \
2233
-gencode=arch=compute_50,code=sm_50 \
2334
-gencode=arch=compute_52,code=sm_52 \
2435
-gencode=arch=compute_60,code=sm_60\
2536
-gencode=arch=compute_61,code=sm_61 \
2637
-gencode=arch=compute_60,code=compute_60
38+
endif
2739

2840
CXXFLAGS := -I$(CUDA_INC) -fPIC -fvisibility=hidden
2941
NVCUFLAGS := -ccbin $(CXX) $(NVCC_GENCODE) -lineinfo -std=c++11 -maxrregcount 96
@@ -58,9 +70,6 @@ NCCL_MINOR := 3
5870
NCCL_PATCH := 4
5971
CXXFLAGS += -DNCCL_MAJOR=$(NCCL_MAJOR) -DNCCL_MINOR=$(NCCL_MINOR) -DNCCL_PATCH=$(NCCL_PATCH)
6072

61-
CUDA_VERSION ?= $(shell ls $(CUDA_LIB)/libcudart.so.* | head -1 | rev | cut -d "." -f -2 | rev)
62-
CUDA_MAJOR = $(shell echo $(CUDA_VERSION) | cut -d "." -f 1)
63-
CUDA_MINOR = $(shell echo $(CUDA_VERSION) | cut -d "." -f 2)
6473
CXXFLAGS += -DCUDA_MAJOR=$(CUDA_MAJOR) -DCUDA_MINOR=$(CUDA_MINOR)
6574

6675
.PHONY : all lib staticlib clean test mpitest install deb debian debclean forlib fortest forclean

0 commit comments

Comments
 (0)