1
0
Fork 0
TheChymera-overlay/sci-libs/pytorch/files/pytorch-1.6.0-nccl-nvccflag...

28 lines
1.3 KiB
Diff

diff -uprN nccl-5949d96f36d050e59d05872f8bbffd2549318e95/makefiles/common.mk nccl-patched/makefiles/common.mk
--- nccl-5949d96f36d050e59d05872f8bbffd2549318e95/makefiles/common.mk 2020-06-09 00:31:44.000000000 +0800
+++ nccl-patched/makefiles/common.mk 2020-08-06 21:25:57.784279738 +0800
@@ -54,7 +54,7 @@ CXXFLAGS := -DCUDA_MAJOR=$(CUDA_MAJOR)
# Maxrregcount needs to be set accordingly to NCCL_MAX_NTHREADS (otherwise it will cause kernel launch errors)
# 512 : 120, 640 : 96, 768 : 80, 1024 : 60
# We would not have to set this if we used __launch_bounds__, but this only works on kernels, not on functions.
-NVCUFLAGS := -ccbin $(CXX) $(NVCC_GENCODE) -std=c++11 -Xptxas -maxrregcount=96 -Xfatbin -compress-all
+NVCUFLAGS := $(NVCCFLAGS) $(NVCC_GENCODE) -std=c++11 -Xptxas -maxrregcount=96 -Xfatbin -compress-all
# Use addprefix so that we can specify more than one path
NVLDFLAGS := -L${CUDA_LIB} -lcudart -lrt
@@ -68,14 +68,6 @@ NVLDFLAGS += ${GCOV_FLAGS:%=-Xcompiler
# $(warning GCOV_FLAGS=${GCOV_FLAGS})
########## GCOV ##########
-ifeq ($(DEBUG), 0)
-NVCUFLAGS += -O3
-CXXFLAGS += -O3 -g
-else
-NVCUFLAGS += -O0 -G -g
-CXXFLAGS += -O0 -g -ggdb3
-endif
-
ifneq ($(VERBOSE), 0)
NVCUFLAGS += -Xptxas -v -Xcompiler -Wall,-Wextra,-Wno-unused-parameter
CXXFLAGS += -Wall -Wextra