Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 2 additions & 9 deletions build_tools/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
import sys
import platform
from pathlib import Path
from importlib.metadata import PackageNotFoundError, distribution, version as get_version
from importlib.metadata import version as get_version

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 The revert dropped PackageNotFoundError from the named import but kept the reference as importlib.metadata.PackageNotFoundError. This works because import importlib is present and the from importlib.metadata import statement loads the submodule, but it is an inconsistent style. Re-adding the name to the existing from import keeps the catch site readable and consistent with how the rest of the file uses importlib.metadata symbols.

Suggested change
from importlib.metadata import version as get_version
from importlib.metadata import PackageNotFoundError, version as get_version

Note: If this suggestion doesn't match your team's coding style, reply to this and let me know. I'll remember it for next time!

from subprocess import CalledProcessError
from typing import List, Optional, Tuple, Union

Expand Down Expand Up @@ -323,17 +323,10 @@ def cuda_version() -> Tuple[int, ...]:
version_str = get_version("nvidia-cuda-runtime-cu12")
version_tuple = tuple(int(part) for part in version_str.split(".") if part.isdigit())
return version_tuple
except PackageNotFoundError:
except importlib.metadata.PackageNotFoundError:

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Pairing change: once PackageNotFoundError is re-exported from the from importlib.metadata import line above, the catch clause can revert to the shorter, original form.

Suggested change
except importlib.metadata.PackageNotFoundError:
except PackageNotFoundError:

Note: If this suggestion doesn't match your team's coding style, reply to this and let me know. I'll remember it for next time!

raise RuntimeError("Could neither find NVCC executable nor CUDA runtime Python package.")


def cusolvermp_pypi_package_name(cuda_major: Optional[int] = None) -> str:
"""PyPI package providing cuSolverMp runtime libraries for a CUDA major version."""
if cuda_major is None:
cuda_major = cuda_version()[0]
return f"nvidia-cusolvermp-cu{cuda_major}"


def get_frameworks() -> List[str]:
"""DL frameworks to build support for"""
_frameworks: List[str] = []
Expand Down
13 changes: 1 addition & 12 deletions build_tools/wheel_utils/Dockerfile.aarch
Original file line number Diff line number Diff line change
Expand Up @@ -35,23 +35,12 @@ RUN dnf clean all
RUN dnf -y install glog.aarch64 glog-devel.aarch64
RUN dnf -y install libnccl libnccl-devel libnccl-static

# expose system libs for TE CMake build.
RUN dnf -y install \
libcusolvermp0-cuda-${CUDA_MAJOR} libcusolvermp0-devel-cuda-${CUDA_MAJOR} && \
dnf clean all
RUN mkdir -p /opt/nvidia/cusolvermp && \
ln -s /usr/include/libcusolvermp/${CUDA_MAJOR} /opt/nvidia/cusolvermp/include && \
ln -s /usr/lib64/libcusolvermp/${CUDA_MAJOR} /opt/nvidia/cusolvermp/lib && \
echo "/usr/lib64/libcusolvermp/${CUDA_MAJOR}" > /etc/ld.so.conf.d/999_nvidia_cusolvermp.conf && \
ldconfig

ENV PATH="/usr/local/cuda/bin:${PATH}"
ENV LD_LIBRARY_PATH="/usr/local/cuda/lib64:/opt/nvidia/cusolvermp/lib:${LD_LIBRARY_PATH}"
ENV LD_LIBRARY_PATH="/usr/local/cuda/lib64:${LD_LIBRARY_PATH}"
ENV CUDA_HOME=/usr/local/cuda
ENV CUDA_ROOT=/usr/local/cuda
ENV CUDA_PATH=/usr/local/cuda
ENV CUDADIR=/usr/local/cuda
ENV CUSOLVERMP_HOME=/opt/nvidia/cusolvermp
ENV NVTE_RELEASE_BUILD=1

CMD ["/bin/bash", "-c", "bash /TransformerEngine/build_tools/wheel_utils/build_wheels.sh manylinux_2_28_aarch64 $BUILD_METAPACKAGE $BUILD_COMMON $BUILD_PYTORCH $BUILD_JAX $CUDA_MAJOR"]
13 changes: 1 addition & 12 deletions build_tools/wheel_utils/Dockerfile.x86
Original file line number Diff line number Diff line change
Expand Up @@ -35,23 +35,12 @@ RUN dnf clean all
RUN dnf -y install glog.x86_64 glog-devel.x86_64
RUN dnf -y install libnccl libnccl-devel libnccl-static

# expose system libs for TE CMake build.
RUN dnf -y install \
libcusolvermp0-cuda-${CUDA_MAJOR} libcusolvermp0-devel-cuda-${CUDA_MAJOR} && \
dnf clean all
RUN mkdir -p /opt/nvidia/cusolvermp && \
ln -s /usr/include/libcusolvermp/${CUDA_MAJOR} /opt/nvidia/cusolvermp/include && \
ln -s /usr/lib64/libcusolvermp/${CUDA_MAJOR} /opt/nvidia/cusolvermp/lib && \
echo "/usr/lib64/libcusolvermp/${CUDA_MAJOR}" > /etc/ld.so.conf.d/999_nvidia_cusolvermp.conf && \
ldconfig

ENV PATH="/usr/local/cuda/bin:${PATH}"
ENV LD_LIBRARY_PATH="/usr/local/cuda/lib64:/opt/nvidia/cusolvermp/lib:${LD_LIBRARY_PATH}"
ENV LD_LIBRARY_PATH="/usr/local/cuda/lib64:${LD_LIBRARY_PATH}"
ENV CUDA_HOME=/usr/local/cuda
ENV CUDA_ROOT=/usr/local/cuda
ENV CUDA_PATH=/usr/local/cuda
ENV CUDADIR=/usr/local/cuda
ENV CUSOLVERMP_HOME=/opt/nvidia/cusolvermp
ENV NVTE_RELEASE_BUILD=1

CMD ["/bin/bash", "-c", "bash /TransformerEngine/build_tools/wheel_utils/build_wheels.sh manylinux_2_28_x86_64 $BUILD_METAPACKAGE $BUILD_COMMON $BUILD_PYTORCH $BUILD_JAX $CUDA_MAJOR"]
4 changes: 0 additions & 4 deletions build_tools/wheel_utils/build_wheels.sh
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,6 @@ git submodule update --init --recursive
# Install deps
/opt/python/cp310-cp310/bin/pip install cmake pybind11[global] ninja setuptools wheel

# Enable optional build features. cuSolverMp is provided by the build image
# (see Dockerfile.x86 / Dockerfile.aarch), which also sets CUSOLVERMP_HOME.
export NVTE_WITH_CUSOLVERMP=1

if $BUILD_METAPACKAGE ; then
cd /TransformerEngine
NVTE_BUILD_METAPACKAGE=1 /opt/python/cp310-cp310/bin/python setup.py bdist_wheel 2>&1 | tee /wheelhouse/logs/metapackage.txt
Expand Down
2 changes: 0 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
from build_tools.utils import (
cuda_archs,
cuda_version,
cusolvermp_pypi_package_name,
get_frameworks,
remove_dups,
min_python_version_str,
Expand Down Expand Up @@ -119,7 +118,6 @@ def setup_requirements() -> Tuple[List[str], List[str]]:
"pydantic",
"importlib-metadata>=1.0",
"packaging",
cusolvermp_pypi_package_name(),
]
test_reqs: List[str] = ["pytest>=8.2.1"]

Expand Down
28 changes: 0 additions & 28 deletions transformer_engine/common/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -255,29 +255,6 @@ def _nvidia_cudart_include_dir() -> str:
return str(include_dir) if include_dir.exists() else ""


@functools.lru_cache(maxsize=None)
def _is_cusolvermp_installed_in_system() -> bool:
"""Check if cuSolverMp is registered in the system library cache."""

if platform.system() != "Linux":
return False

try:
result = subprocess.run(
["ldconfig", "-p"],
capture_output=True,
text=True,
check=False,
)
except (OSError, subprocess.SubprocessError):
return False

if result.returncode != 0:
return False

return any("cusolvermp" in line.lower() for line in result.stdout.splitlines())


@functools.lru_cache(maxsize=None)
def _load_cuda_library_from_python(lib_name: str, strict: bool = False):
"""
Expand Down Expand Up @@ -392,11 +369,6 @@ def _load_core_library():
_, _CUDNN_LIB_CTYPES = _load_cuda_library("cudnn")
system_nvrtc, _NVRTC_LIB_CTYPES = _load_cuda_library("nvrtc")
system_curand, _CURAND_LIB_CTYPES = _load_cuda_library("curand")
_CUSOLVERMP_LIB_CTYPES = None
if not _is_cusolvermp_installed_in_system() and any(
_is_package_installed(p) for p in ("nvidia-cusolvermp-cu12", "nvidia-cusolvermp-cu13")
):
_, _CUSOLVERMP_LIB_CTYPES = _load_cuda_library_from_python("cusolverMp", strict=False)

# This additional step is necessary to be able to install TE wheels
# and import TE (without any guards) in an environment where the cuda
Expand Down
Loading