TheChymera-overlay/sci-libs/pytorch/files/pytorch-1.6.0-nccl-nvccflags.patch

diff -uprN nccl-5949d96f36d050e59d05872f8bbffd2549318e95/makefiles/common.mk nccl-patched/makefiles/common.mk
--- nccl-5949d96f36d050e59d05872f8bbffd2549318e95/makefiles/common.mk	2020-06-09 00:31:44.000000000 +0800
+++ nccl-patched/makefiles/common.mk	2020-08-06 21:25:57.784279738 +0800
@@ -54,7 +54,7 @@ CXXFLAGS   := -DCUDA_MAJOR=$(CUDA_MAJOR)
 # Maxrregcount needs to be set accordingly to NCCL_MAX_NTHREADS (otherwise it will cause kernel launch errors)
 # 512 : 120, 640 : 96, 768 : 80, 1024 : 60
 # We would not have to set this if we used __launch_bounds__, but this only works on kernels, not on functions.
-NVCUFLAGS  := -ccbin $(CXX) $(NVCC_GENCODE) -std=c++11 -Xptxas -maxrregcount=96 -Xfatbin -compress-all
+NVCUFLAGS  := $(NVCCFLAGS) $(NVCC_GENCODE) -std=c++11 -Xptxas -maxrregcount=96 -Xfatbin -compress-all
 # Use addprefix so that we can specify more than one path
 NVLDFLAGS  := -L${CUDA_LIB} -lcudart -lrt

@@ -68,14 +68,6 @@ NVLDFLAGS   += ${GCOV_FLAGS:%=-Xcompiler
 # $(warning GCOV_FLAGS=${GCOV_FLAGS})
 ########## GCOV ##########

-ifeq ($(DEBUG), 0)
-NVCUFLAGS += -O3
-CXXFLAGS  += -O3 -g
-else
-NVCUFLAGS += -O0 -G -g
-CXXFLAGS  += -O0 -g -ggdb3
-endif
-
 ifneq ($(VERBOSE), 0)
 NVCUFLAGS += -Xptxas -v -Xcompiler -Wall,-Wextra,-Wno-unused-parameter
 CXXFLAGS  += -Wall -Wextra