diff -uprN nccl-5949d96f36d050e59d05872f8bbffd2549318e95/makefiles/common.mk nccl-patched/makefiles/common.mk --- nccl-5949d96f36d050e59d05872f8bbffd2549318e95/makefiles/common.mk 2020-06-09 00:31:44.000000000 +0800 +++ nccl-patched/makefiles/common.mk 2020-08-06 21:25:57.784279738 +0800 @@ -54,7 +54,7 @@ CXXFLAGS := -DCUDA_MAJOR=$(CUDA_MAJOR) # Maxrregcount needs to be set accordingly to NCCL_MAX_NTHREADS (otherwise it will cause kernel launch errors) # 512 : 120, 640 : 96, 768 : 80, 1024 : 60 # We would not have to set this if we used __launch_bounds__, but this only works on kernels, not on functions. -NVCUFLAGS := -ccbin $(CXX) $(NVCC_GENCODE) -std=c++11 -Xptxas -maxrregcount=96 -Xfatbin -compress-all +NVCUFLAGS := $(NVCCFLAGS) $(NVCC_GENCODE) -std=c++11 -Xptxas -maxrregcount=96 -Xfatbin -compress-all # Use addprefix so that we can specify more than one path NVLDFLAGS := -L${CUDA_LIB} -lcudart -lrt @@ -68,14 +68,6 @@ NVLDFLAGS += ${GCOV_FLAGS:%=-Xcompiler # $(warning GCOV_FLAGS=${GCOV_FLAGS}) ########## GCOV ########## -ifeq ($(DEBUG), 0) -NVCUFLAGS += -O3 -CXXFLAGS += -O3 -g -else -NVCUFLAGS += -O0 -G -g -CXXFLAGS += -O0 -g -ggdb3 -endif - ifneq ($(VERBOSE), 0) NVCUFLAGS += -Xptxas -v -Xcompiler -Wall,-Wextra,-Wno-unused-parameter CXXFLAGS += -Wall -Wextra