#!/usr/bin/make -f

export CXX=clang++
export CC=clang
export DEB_BUILD_MAINT_OPTIONS = hardening=+all,-branch optimize=-lto
export DEB_CXXFLAGS_MAINT_PREPEND = -gz
export VERBOSE=1
export DPKG_GENSYMBOLS_CHECK_LEVEL = 4

# Detect the host architecture and only enable clients (tests/benchmarks)
# on x86_64 where x86 intrinsics exist.
DEB_HOST_ARCH ?= $(shell dpkg-architecture -qDEB_HOST_ARCH)
ifeq ($(DEB_HOST_ARCH),amd64)
    BUILD_CLIENTS = ON
else
    BUILD_CLIENTS = OFF
endif

export TENSILE_ROCM_ASSEMBLER_PATH=/usr/bin/clang++
export TENSILE_ROCM_OFFLOAD_BUNDLER_PATH=/usr/bin/clang-offload-bundler
export PATH:=$(PATH):/usr/lib/llvm-22/bin

GPU_TARGETS := gfx942;gfx950

# Due to nested component structure, define base variables for tensilelite
TENSILELITE = $(CURDIR)/tensilelite/tensilelite
TENSILE_ROOT = $(TENSILELITE)/Tensile
# Define the custom python local path
LOCAL_PYTHON_PATH := $(TENSILELITE)/usr/local/lib/python3.13/dist-packages
# Define the 'fake' HIPBLASLT_PATH
FAKE_HIPBLASLT_PATH := $(CURDIR)/tensilelite

export PATH := $(TENSILELITE)/usr/local/bin:$(PATH)
export PYTHONPATH := $(LOCAL_PYTHON_PATH):$(PYTHONPATH)

# Default flags
CMAKE_FLAGS = \
	-DCMAKE_BUILD_TYPE=Release \
	-DBUILD_VERBOSE=ON

# Dependencies
CMAKE_FLAGS += \
	-DCMAKE_VERBOSE_MAKEFILE=ON

# ROCm flags
CMAKE_FLAGS += \
	-DROCM_SYMLINK_LIBS=OFF \
	-DGPU_TARGETS="$(GPU_TARGETS)" \
	-DBUILD_CLIENTS_BENCHMARKS=$(BUILD_CLIENTS) \
	-DBUILD_CLIENTS_TESTS=$(BUILD_CLIENTS) \
	-DBLAS_LIBRARIES=/usr/lib/$(DEB_HOST_MULTIARCH)/blas/libblas.a \
	-DHIP_PLATFORM=amd \
	-DCMAKE_CXX_COMPILER=$(CXX)

# Tensile flags
CMAKE_FLAGS += \
	-DBUILD_WITH_TENSILE=ON \
	-DTENSILELITE_ENABLE_HOST=ON \
	-DTensile_ARCHITECTURE="$(GPU_TARGETS)" \
	-DTensile_LIBRARY_FORMAT=msgpack \
	-DTensile_COMPILER=hipcc \
	-DTensile_LOGIC=asm_full \
	-DTensile_LAZY_LIBRARY_LOADING=ON \
	-DTensile_SEPARATE_ARCHITECTURES=ON \
	-DTensile_TEST_LOCAL_PATH=$(TENSILELITE) \
	-DTensile_ROOT=$(TENSILE_ROOT) \
	-DTensile_CODE_OBJECT_VERSION=default \
	-Dnanobind_DIR=$(shell python3 -m nanobind --cmake_dir) \
	-DBUILD_USE_LOCAL_TENSILE_HIPBLASLT_NEXT_CMAKE=OFF \
	-DHIPSPARSELT_HIPBLASLT_PATH=$(FAKE_HIPBLASLT_PATH) \
	-DHIPBLASLT_ENABLE_OPENMP=OFF \
	-DHIPBLASLT_BUNDLE_PYTHON_DEPS=ON \
	-DHIPBLASLT_ENABLE_MSGPACK=ON \
	-DVIRTUALENV_BIN_DIR=/usr/bin \
	-DVIRTUALENV_PYTHON_EXENAME=python3 \
	-DVIRTUALENV_SITE_PATH=$(LOCAL_PYTHON_PATH)

%:
	dh $@

execute_before_dh_auto_configure-arch:
	# Run the setup for tensilelite
	cd $(TENSILELITE) && python3 setup.py install --root $(TENSILELITE)
	# Create the forced directory where the python libs are expected to be
	mkdir -p $(CURDIR)/obj-$(DEB_HOST_GNU_TYPE)/lib
	# Copy the generated python files to the created directory
	cp -r $(TENSILELITE)/usr/local/lib/python3.*/dist-packages/Tensile $(CURDIR)/obj-$(DEB_HOST_GNU_TYPE)/lib/

override_dh_auto_configure-arch:
	dh_auto_configure -- $(CMAKE_FLAGS)

override_dh_auto_test-arch:
ifeq (,$(filter noinsttest,$(DEB_BUILD_OPTIONS)))
	set -e \
	; if [ -r /dev/kfd ] \
	; then \
		GPU_ARCH=$$(rocminfo | grep "Name:" | grep -E "gfx942|gfx950" || true) \
		; if [ -n "$$GPU_ARCH" ]; then \
			obj-$(DEB_HOST_MULTIARCH)/clients/staging/hipsparselt-test --gtest_filter="*quick*" \
		; else \
			echo "W: Supported CDNA GPU not found. Running fallback tests only." \
			; obj-$(DEB_HOST_MULTIARCH)/clients/staging/hipsparselt-test --gtest_filter="*quick*:-*compress_test*:*spmm_test*" \
		; fi \
	; else echo "W: /dev/kfd unreadable: no available AMD GPU." \
	;      echo "W: tests skipped." \
	; fi
endif

override_dh_auto_configure-indep:
	:

override_dh_auto_build-indep:
ifeq (,$(filter nodoc,$(DEB_BUILD_OPTIONS)))
	perl -pi -e 's/WARN_AS_ERROR.*=.*YES/WARN_AS_ERROR = NO/' docs/doxygen/Doxyfile
	http_proxy='127.0.0.1:9' \
	https_proxy='127.0.0.1:9' \
	rocm-docs-build
endif

override_dh_auto_test-indep:
	:

override_dh_auto_install-indep:
	:

override_dh_strip:
	dh_strip -X.hsaco -X.co --no-automatic-dbgsym

# dwz doesn't fully support DWARF-5 yet, see #1016936
override_dh_dwz:
	:

override_dh_gencontrol:
	dh_gencontrol -- -Vrocm:GPU-Architecture="$(subst ;, ,$(GPU_TARGETS))"

override_dh_fixperms:
	dh_fixperms
ifeq ($(DEB_HOST_ARCH),amd64)
	# Due to tests being skipped on ARM, only attempt to chmod the files if they
	# exist on the architecture being built.
	chmod a-x debian/hipsparselt-tests/usr/bin/*.yaml debian/hipsparselt-tests/usr/bin/*.data || true
endif

override_dh_sphinxdoc:
	dh_sphinxdoc
	# Remove redundant license.
	rm -f debian/hipsparselt-doc/usr/share/doc/hipsparselt-doc/html/_sources/license.md
	# Fix Privacy Breach (MathJax) by using local references instead of embedded js scripts.
	find debian/hipsparselt-doc/usr/share/doc -name "*.html" -exec \
		sed -i -E \
		    -e 's|https?://cdn\.jsdelivr\.net/npm/mathjax@[0-9]+(/es5)?/tex-mml-chtml\.js|/usr/share/nodejs/mathjax-full/es5/tex-mml-chtml.js|g' \
		    -e 's|https?://cdn\.mathjax\.org/mathjax/latest/MathJax\.js|/usr/share/javascript/mathjax/MathJax.js|g' \
		    {} +

override_dh_install:
	dh_install
ifeq ($(DEB_HOST_ARCH),amd64)
	# Scripts in /usr/bin must not have language extensions
	if [ -f debian/hipsparselt-tests/usr/bin/hipsparselt_gentest.py ]; then \
		mv debian/hipsparselt-tests/usr/bin/hipsparselt_gentest.py debian/hipsparselt-tests/usr/bin/hipsparselt_gentest; \
	fi
endif
