1
0
mirror of synced 2024-09-25 10:01:48 +02:00
TheChymera-overlay/sci-libs/pytorch/files/pytorch-1.6.0-nccl-nvccflags.patch
Horea Christian 61db1849ca
sci-libs/pytorch: fix ahead of Gentoo Science
Bug: https://bugs.gentoo.org/738592
Package-Manager: Portage-3.0.2, Repoman-2.3.23
Signed-off-by: Horea Christian <chr@chymera.eu>
2020-08-23 04:12:34 -04:00

28 lines
1.3 KiB
Diff

diff -uprN nccl-5949d96f36d050e59d05872f8bbffd2549318e95/makefiles/common.mk nccl-patched/makefiles/common.mk
--- nccl-5949d96f36d050e59d05872f8bbffd2549318e95/makefiles/common.mk 2020-06-09 00:31:44.000000000 +0800
+++ nccl-patched/makefiles/common.mk 2020-08-06 21:25:57.784279738 +0800
@@ -54,7 +54,7 @@ CXXFLAGS := -DCUDA_MAJOR=$(CUDA_MAJOR)
# Maxrregcount needs to be set accordingly to NCCL_MAX_NTHREADS (otherwise it will cause kernel launch errors)
# 512 : 120, 640 : 96, 768 : 80, 1024 : 60
# We would not have to set this if we used __launch_bounds__, but this only works on kernels, not on functions.
-NVCUFLAGS := -ccbin $(CXX) $(NVCC_GENCODE) -std=c++11 -Xptxas -maxrregcount=96 -Xfatbin -compress-all
+NVCUFLAGS := $(NVCCFLAGS) $(NVCC_GENCODE) -std=c++11 -Xptxas -maxrregcount=96 -Xfatbin -compress-all
# Use addprefix so that we can specify more than one path
NVLDFLAGS := -L${CUDA_LIB} -lcudart -lrt
@@ -68,14 +68,6 @@ NVLDFLAGS += ${GCOV_FLAGS:%=-Xcompiler
# $(warning GCOV_FLAGS=${GCOV_FLAGS})
########## GCOV ##########
-ifeq ($(DEBUG), 0)
-NVCUFLAGS += -O3
-CXXFLAGS += -O3 -g
-else
-NVCUFLAGS += -O0 -G -g
-CXXFLAGS += -O0 -g -ggdb3
-endif
-
ifneq ($(VERBOSE), 0)
NVCUFLAGS += -Xptxas -v -Xcompiler -Wall,-Wextra,-Wno-unused-parameter
CXXFLAGS += -Wall -Wextra