From d638d4164bf70905dba056fca74c55b628a9c9de Mon Sep 17 00:00:00 2001 From: Kirthi Shankar Sivamani Date: Mon, 29 Jun 2026 04:24:24 +0530 Subject: [PATCH] Revert "Add wheel support for Newton-Schulz method via cuSolverMp (#3004)" This reverts commit 20e185caac7ded8073f3c235c4e828b440c26e00. Signed-off-by: ksivamani --- build_tools/utils.py | 11 ++-------- build_tools/wheel_utils/Dockerfile.aarch | 13 +---------- build_tools/wheel_utils/Dockerfile.x86 | 13 +---------- build_tools/wheel_utils/build_wheels.sh | 4 ---- setup.py | 2 -- transformer_engine/common/__init__.py | 28 ------------------------ 6 files changed, 4 insertions(+), 67 deletions(-) diff --git a/build_tools/utils.py b/build_tools/utils.py index 82a1dd968c..a3bb8c23f2 100644 --- a/build_tools/utils.py +++ b/build_tools/utils.py @@ -14,7 +14,7 @@ import sys import platform from pathlib import Path -from importlib.metadata import PackageNotFoundError, distribution, version as get_version +from importlib.metadata import version as get_version from subprocess import CalledProcessError from typing import List, Optional, Tuple, Union @@ -323,17 +323,10 @@ def cuda_version() -> Tuple[int, ...]: version_str = get_version("nvidia-cuda-runtime-cu12") version_tuple = tuple(int(part) for part in version_str.split(".") if part.isdigit()) return version_tuple - except PackageNotFoundError: + except importlib.metadata.PackageNotFoundError: raise RuntimeError("Could neither find NVCC executable nor CUDA runtime Python package.") -def cusolvermp_pypi_package_name(cuda_major: Optional[int] = None) -> str: - """PyPI package providing cuSolverMp runtime libraries for a CUDA major version.""" - if cuda_major is None: - cuda_major = cuda_version()[0] - return f"nvidia-cusolvermp-cu{cuda_major}" - - def get_frameworks() -> List[str]: """DL frameworks to build support for""" _frameworks: List[str] = [] diff --git a/build_tools/wheel_utils/Dockerfile.aarch b/build_tools/wheel_utils/Dockerfile.aarch index 4f6635f507..c040dadcdb 100644 --- a/build_tools/wheel_utils/Dockerfile.aarch +++ b/build_tools/wheel_utils/Dockerfile.aarch @@ -35,23 +35,12 @@ RUN dnf clean all RUN dnf -y install glog.aarch64 glog-devel.aarch64 RUN dnf -y install libnccl libnccl-devel libnccl-static -# expose system libs for TE CMake build. -RUN dnf -y install \ - libcusolvermp0-cuda-${CUDA_MAJOR} libcusolvermp0-devel-cuda-${CUDA_MAJOR} && \ - dnf clean all -RUN mkdir -p /opt/nvidia/cusolvermp && \ - ln -s /usr/include/libcusolvermp/${CUDA_MAJOR} /opt/nvidia/cusolvermp/include && \ - ln -s /usr/lib64/libcusolvermp/${CUDA_MAJOR} /opt/nvidia/cusolvermp/lib && \ - echo "/usr/lib64/libcusolvermp/${CUDA_MAJOR}" > /etc/ld.so.conf.d/999_nvidia_cusolvermp.conf && \ - ldconfig - ENV PATH="/usr/local/cuda/bin:${PATH}" -ENV LD_LIBRARY_PATH="/usr/local/cuda/lib64:/opt/nvidia/cusolvermp/lib:${LD_LIBRARY_PATH}" +ENV LD_LIBRARY_PATH="/usr/local/cuda/lib64:${LD_LIBRARY_PATH}" ENV CUDA_HOME=/usr/local/cuda ENV CUDA_ROOT=/usr/local/cuda ENV CUDA_PATH=/usr/local/cuda ENV CUDADIR=/usr/local/cuda -ENV CUSOLVERMP_HOME=/opt/nvidia/cusolvermp ENV NVTE_RELEASE_BUILD=1 CMD ["/bin/bash", "-c", "bash /TransformerEngine/build_tools/wheel_utils/build_wheels.sh manylinux_2_28_aarch64 $BUILD_METAPACKAGE $BUILD_COMMON $BUILD_PYTORCH $BUILD_JAX $CUDA_MAJOR"] diff --git a/build_tools/wheel_utils/Dockerfile.x86 b/build_tools/wheel_utils/Dockerfile.x86 index b01e443910..2728b6b7c1 100644 --- a/build_tools/wheel_utils/Dockerfile.x86 +++ b/build_tools/wheel_utils/Dockerfile.x86 @@ -35,23 +35,12 @@ RUN dnf clean all RUN dnf -y install glog.x86_64 glog-devel.x86_64 RUN dnf -y install libnccl libnccl-devel libnccl-static -# expose system libs for TE CMake build. -RUN dnf -y install \ - libcusolvermp0-cuda-${CUDA_MAJOR} libcusolvermp0-devel-cuda-${CUDA_MAJOR} && \ - dnf clean all -RUN mkdir -p /opt/nvidia/cusolvermp && \ - ln -s /usr/include/libcusolvermp/${CUDA_MAJOR} /opt/nvidia/cusolvermp/include && \ - ln -s /usr/lib64/libcusolvermp/${CUDA_MAJOR} /opt/nvidia/cusolvermp/lib && \ - echo "/usr/lib64/libcusolvermp/${CUDA_MAJOR}" > /etc/ld.so.conf.d/999_nvidia_cusolvermp.conf && \ - ldconfig - ENV PATH="/usr/local/cuda/bin:${PATH}" -ENV LD_LIBRARY_PATH="/usr/local/cuda/lib64:/opt/nvidia/cusolvermp/lib:${LD_LIBRARY_PATH}" +ENV LD_LIBRARY_PATH="/usr/local/cuda/lib64:${LD_LIBRARY_PATH}" ENV CUDA_HOME=/usr/local/cuda ENV CUDA_ROOT=/usr/local/cuda ENV CUDA_PATH=/usr/local/cuda ENV CUDADIR=/usr/local/cuda -ENV CUSOLVERMP_HOME=/opt/nvidia/cusolvermp ENV NVTE_RELEASE_BUILD=1 CMD ["/bin/bash", "-c", "bash /TransformerEngine/build_tools/wheel_utils/build_wheels.sh manylinux_2_28_x86_64 $BUILD_METAPACKAGE $BUILD_COMMON $BUILD_PYTORCH $BUILD_JAX $CUDA_MAJOR"] diff --git a/build_tools/wheel_utils/build_wheels.sh b/build_tools/wheel_utils/build_wheels.sh index 17bde4ef17..ff422e9d2c 100644 --- a/build_tools/wheel_utils/build_wheels.sh +++ b/build_tools/wheel_utils/build_wheels.sh @@ -25,10 +25,6 @@ git submodule update --init --recursive # Install deps /opt/python/cp310-cp310/bin/pip install cmake pybind11[global] ninja setuptools wheel -# Enable optional build features. cuSolverMp is provided by the build image -# (see Dockerfile.x86 / Dockerfile.aarch), which also sets CUSOLVERMP_HOME. -export NVTE_WITH_CUSOLVERMP=1 - if $BUILD_METAPACKAGE ; then cd /TransformerEngine NVTE_BUILD_METAPACKAGE=1 /opt/python/cp310-cp310/bin/python setup.py bdist_wheel 2>&1 | tee /wheelhouse/logs/metapackage.txt diff --git a/setup.py b/setup.py index 2ff305867a..b231a5c55d 100644 --- a/setup.py +++ b/setup.py @@ -20,7 +20,6 @@ from build_tools.utils import ( cuda_archs, cuda_version, - cusolvermp_pypi_package_name, get_frameworks, remove_dups, min_python_version_str, @@ -119,7 +118,6 @@ def setup_requirements() -> Tuple[List[str], List[str]]: "pydantic", "importlib-metadata>=1.0", "packaging", - cusolvermp_pypi_package_name(), ] test_reqs: List[str] = ["pytest>=8.2.1"] diff --git a/transformer_engine/common/__init__.py b/transformer_engine/common/__init__.py index 42b458bfc5..231680321e 100644 --- a/transformer_engine/common/__init__.py +++ b/transformer_engine/common/__init__.py @@ -255,29 +255,6 @@ def _nvidia_cudart_include_dir() -> str: return str(include_dir) if include_dir.exists() else "" -@functools.lru_cache(maxsize=None) -def _is_cusolvermp_installed_in_system() -> bool: - """Check if cuSolverMp is registered in the system library cache.""" - - if platform.system() != "Linux": - return False - - try: - result = subprocess.run( - ["ldconfig", "-p"], - capture_output=True, - text=True, - check=False, - ) - except (OSError, subprocess.SubprocessError): - return False - - if result.returncode != 0: - return False - - return any("cusolvermp" in line.lower() for line in result.stdout.splitlines()) - - @functools.lru_cache(maxsize=None) def _load_cuda_library_from_python(lib_name: str, strict: bool = False): """ @@ -392,11 +369,6 @@ def _load_core_library(): _, _CUDNN_LIB_CTYPES = _load_cuda_library("cudnn") system_nvrtc, _NVRTC_LIB_CTYPES = _load_cuda_library("nvrtc") system_curand, _CURAND_LIB_CTYPES = _load_cuda_library("curand") - _CUSOLVERMP_LIB_CTYPES = None - if not _is_cusolvermp_installed_in_system() and any( - _is_package_installed(p) for p in ("nvidia-cusolvermp-cu12", "nvidia-cusolvermp-cu13") - ): - _, _CUSOLVERMP_LIB_CTYPES = _load_cuda_library_from_python("cusolverMp", strict=False) # This additional step is necessary to be able to install TE wheels # and import TE (without any guards) in an environment where the cuda