Merge remote-tracking branch 'origin/rocm-jax-stable-2024_05_27'

ROCm · Jun 3, 2024 · bb233bf · bb233bf
2 parents 7488ace + ea1b8a6
commit bb233bf
Show file tree

Hide file tree

Showing 1,355 changed files with 52,634 additions and 19,381 deletions.
diff --git a/.kokoro/jax/build.sh b/.kokoro/jax/build.sh
@@ -36,7 +36,7 @@ prelude() {
 
   if is_linux_gpu_job ; then
     export JAX_CUDA_VERSION=12
-    export JAX_CUDNN_VERSION=8.9
+    export JAX_CUDNN_VERSION=9.1
     nvidia-smi
     setup_env_vars_py39
   else

diff --git a/.kokoro/linux/build.sh b/.kokoro/linux/build.sh
@@ -40,21 +40,27 @@ function pull_docker_image_with_retries() {
   echo "TF_INFO_DOCKER_SHA,$(docker pull "$DOCKER_IMAGE" | sed -n '/Digest:/s/Digest: //g p')" >> "$KOKORO_ARTIFACTS_DIR/custom_sponge_config.csv"
 }
 
+# TODO(b/338885148): Remove this once the TF containers have cuDNN 9
+if is_linux_gpu_job ; then
+  DOCKER_IMAGE="gcr.io/tensorflow-sigs/build@sha256:dddcaf30321e9007103dce75c51b83fea3c06de462fcf41e7c6ae93f37fc3545"
+fi
+
 pull_docker_image_with_retries
+
+
 # Start a container in the background
-docker run --name xla -w /tf/xla -itd --rm \
-    -v "$KOKORO_ARTIFACTS_DIR/github/xla:/tf/xla" \
-    -v "$KOKORO_ARTIFACTS_DIR/pkg:/tf/pkg" \
+docker run --name xla -w /github/xla -itd --rm \
+    -v "./github:/github" \
     "$DOCKER_IMAGE" \
     bash
 
 TAGS_FILTER="-no_oss"
 ADDITIONAL_FLAGS=""
 RBE_FLAGS=""
-TARGET_FILTERS="-@tsl//tsl/platform:subprocess_test -@tsl//tsl/platform/cloud:google_auth_provider_test -@tsl//tsl/platform/cloud:oauth_client_test"
+TARGET_FILTERS=""
 
 if is_linux_gpu_job ; then
-    TAGS_FILTER="$TAGS_FILTER,requires-gpu-nvidia"
+    TAGS_FILTER="$TAGS_FILTER,requires-gpu-nvidia,-requires-gpu-amd"
 
     # We are currently running XLA presubmits on machines with NVIDIA T4 GPUs,
     # which have a compute compatibility of 7.5. Se we filter out all the tests
@@ -64,18 +70,23 @@ if is_linux_gpu_job ; then
 
     ADDITIONAL_FLAGS="$ADDITIONAL_FLAGS --run_under=//tools/ci_build/gpu_build:parallel_gpu_execute"
     RBE_FLAGS="--config=rbe_linux_cuda_nvcc --jobs=150"
+    (
+      #TODO(b/338885148): Remove this block after TF was updated to cuDNN 9
+      pushd github/xla
+      sed -i 's/@sigbuild-r2\.17-clang_/@sigbuild-r2.17-clang-cudnn9_/g' .bazelrc
+      echo "The following changes were made:"
+      git diff -- .bazelrc || true
+      popd
+    )
     echo "***NOTE: nvidia-smi lists the highest CUDA version the driver supports, which may be different than the version of CUDA actually used!!***"
     nvidia-smi
 else
-    TAGS_FILTER="$TAGS_FILTER,-gpu,-requires-gpu-nvidia"
+    TAGS_FILTER="$TAGS_FILTER,-gpu,-requires-gpu-nvidia,-requires-gpu-amd"
     ADDITIONAL_FLAGS="$ADDITIONAL_FLAGS --config=nonccl"
+    TARGET_FILTERS="$TARGET_FILTERS -//xla/service/gpu/..."
 
     if is_linux_cpu_arm64_job ; then
-        TAGS_FILTER="$TAGS_FILTER,-no_aarch64"
-        ADDITIONAL_FLAGS="$ADDITIONAL_FLAGS --action_env PYTHON_BIN_PATH=/usr/bin/python3.11 --python_path=/usr/bin/python3.11"
-        # Some cross-compile tests are not working for XLA Linux Aarch64.
-        # TODO(ddunleavy): Revisit these when hermetic python is available.
-        TARGET_FILTERS="$TARGET_FILTERS -//xla/python_api:xla_shape_test -//xla/python_api:xla_literal_test -//xla/service:xla_aot_compile_stablehlo_cpu_test -//xla/tests:local_client_aot_test"
+        TAGS_FILTER="$TAGS_FILTER,-not_run:arm"
         RBE_FLAGS="--config=rbe_cross_compile_linux_arm64_xla --jobs=150"
     else
         RBE_FLAGS="--config=rbe_linux_cpu --jobs=150"
@@ -91,7 +102,7 @@ docker exec xla bazel \
         --keep_going \
         --nobuild_tests_only \
         --features=layering_check \
-        --profile=/tf/pkg/profile.json.gz \
+        --profile=profile.json.gz \
         --flaky_test_attempts=3 \
         --config=warnings \
         $RBE_FLAGS \
@@ -100,7 +111,7 @@ docker exec xla bazel \
 
 
 # Print build time statistics, including critical path.
-docker exec xla bazel analyze-profile "/tf/pkg/profile.json.gz"
+docker exec xla bazel analyze-profile profile.json.gz
 
 # Stop container
 docker stop xla

diff --git a/WORKSPACE b/WORKSPACE
@@ -7,6 +7,32 @@ workspace(name = "xla")
 # restriction that load() statements need to be at the top of .bzl files.
 # E.g. we can not retrieve a new repository with http_archive and then load()
 # a macro from that repository in the same file.
+
+# Initialize hermetic Python
+load("//third_party/py:python_init_rules.bzl", "python_init_rules")
+
+python_init_rules()
+
+load("//third_party/py:python_init_repositories.bzl", "python_init_repositories")
+
+python_init_repositories(
+    requirements = {
+        "3.11": "//:requirements_lock_3_11.txt",
+    },
+)
+
+load("//third_party/py:python_init_toolchains.bzl", "python_init_toolchains")
+
+python_init_toolchains()
+
+load("//third_party/py:python_init_pip.bzl", "python_init_pip")
+
+python_init_pip()
+
+load("@pypi//:requirements.bzl", "install_deps")
+
+install_deps()
+
 load(":workspace4.bzl", "xla_workspace4")
 
 xla_workspace4()

diff --git a/build_tools/configure/configure.py b/build_tools/configure/configure.py
@@ -69,27 +69,32 @@ def _find_executable(executable: str) -> Optional[str]:
   return None
 
 
-def _find_executable_or_die(executable: str) -> str:
+def _find_executable_or_die(
+    executable_name: str, executable_path: Optional[str] = None
+) -> str:
   """Finds executable and resolves symlinks or raises RuntimeError.
 
   Resolving symlinks is sometimes necessary for finding system headers.
 
   Args:
-    executable: The name of the executable that we want to find.
+    executable_name: The name of the executable that we want to find.
+    executable_path: If not None, the path to the executable.
 
   Returns:
-    The path to the executable we are looking for.
+    The path to the executable we are looking for, after symlinks are resolved.
   Raises:
     RuntimeError: if path to the executable cannot be found.
   """
-  resolved_path_to_exe = _find_executable(executable)
+  if executable_path:
+    return str(pathlib.Path(executable_path).resolve(strict=True))
+  resolved_path_to_exe = _find_executable(executable_name)
   if resolved_path_to_exe is None:
     raise RuntimeError(
-        f"Could not find executable `{executable}`! "
+        f"Could not find executable `{executable_name}`! "
         "Please change your $PATH or pass the path directly like"
-        f"`--{executable}_path=path/to/executable."
+        f"`--{executable_name}_path=path/to/executable."
     )
-  logging.info("Found path to %s at %s", executable, resolved_path_to_exe)
+  logging.info("Found path to %s at %s", executable_name, resolved_path_to_exe)
 
   return resolved_path_to_exe
 
@@ -183,6 +188,7 @@ class Backend(ArgparseableEnum):
   CPU = enum.auto()
   CUDA = enum.auto()
   ROCM = enum.auto()
+  SYCL = enum.auto()
 
 
 class HostCompiler(ArgparseableEnum):
@@ -235,7 +241,7 @@ def get_relevant_paths_and_versions(self, config: "XLAConfigOptions"):
       self.ld_library_path = os.environ.get("LD_LIBRARY_PATH", None)
 
     if config.host_compiler == HostCompiler.CLANG:
-      self.clang_path = self.clang_path or _find_executable_or_die("clang")
+      self.clang_path = _find_executable_or_die("clang", self.clang_path)
       self.clang_major_version = (
           self.clang_major_version or _get_clang_major_version(self.clang_path)
       )
@@ -246,11 +252,11 @@ def get_relevant_paths_and_versions(self, config: "XLAConfigOptions"):
       # directly.
       self.lld_path = self.lld_path or shutil.which("ld.lld")
     elif config.host_compiler == HostCompiler.GCC:
-      self.gcc_path = self.gcc_path or _find_executable_or_die("gcc")
+      self.gcc_path = _find_executable_or_die("gcc", self.gcc_path)
 
     if config.backend == Backend.CUDA:
       if config.cuda_compiler == CudaCompiler.CLANG:
-        self.clang_path = self.clang_path or _find_executable_or_die("clang")
+        self.clang_path = _find_executable_or_die("clang", self.clang_path)
 
       if not self.cuda_compute_capabilities:
         self.cuda_compute_capabilities = _get_cuda_compute_capabilities_or_die()
@@ -402,6 +408,8 @@ def to_bazelrc_lines(
         rc.append("build --config nonccl")
     elif self.backend == Backend.ROCM:
       pass
+    elif self.backend == Backend.SYCL:
+      rc.append("build --config sycl")
 
     # Lines that are added for every backend
     if dpav.ld_library_path:

diff --git a/build_tools/configure/configure_test.py b/build_tools/configure/configure_test.py
@@ -12,6 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
+import os
+
 from absl.testing import absltest
 
 from xla.build_tools import test_utils
@@ -55,7 +57,11 @@ def setUpClass(cls):
       cls.clang_bazelrc_lines = [line.strip() for line in f.readlines()]
 
     with (testdata / "gcc.bazelrc").open() as f:
-      cls.gcc_bazelrc_lines = [line.strip() for line in f.readlines()]
+      resolved_gcc_path = os.path.realpath(_GCC_PATH)
+      cls.gcc_bazelrc_lines = [
+          line.strip().replace(_GCC_PATH, resolved_gcc_path)
+          for line in f.readlines()
+      ]
 
     with (testdata / "cuda_clang.bazelrc").open() as f:
       cls.cuda_clang_bazelrc_lines = [line.strip() for line in f.readlines()]
@@ -64,7 +70,11 @@ def setUpClass(cls):
       cls.nvcc_clang_bazelrc_lines = [line.strip() for line in f.readlines()]
 
     with (testdata / "nvcc_gcc.bazelrc").open() as f:
-      cls.nvcc_gcc_bazelrc_lines = [line.strip() for line in f.readlines()]
+      resolved_gcc_path = os.path.realpath(_GCC_PATH)
+      cls.nvcc_gcc_bazelrc_lines = [
+          line.strip().replace(_GCC_PATH, resolved_gcc_path)
+          for line in f.readlines()
+      ]
 
   def test_clang_bazelrc(self):
     config = XLAConfigOptions(

diff --git a/build_tools/rocm/run_xla.sh b/build_tools/rocm/run_xla.sh
@@ -0,0 +1,98 @@
+#!/usr/bin/env bash
+# Copyright 2024 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# ==============================================================================
+
+set -e
+set -x
+
+N_BUILD_JOBS=$(grep -c ^processor /proc/cpuinfo)
+# If rocm-smi exists locally (it should) use it to find
+# out how many GPUs we have to test with.
+rocm-smi -i
+STATUS=$?
+if [ $STATUS -ne 0 ]; then TF_GPU_COUNT=1; else
+   TF_GPU_COUNT=$(rocm-smi -i|grep 'Device ID' |grep 'GPU' |wc -l)
+fi
+TF_TESTS_PER_GPU=1
+N_TEST_JOBS=$(expr ${TF_GPU_COUNT} \* ${TF_TESTS_PER_GPU})
+
+echo ""
+echo "Bazel will use ${N_BUILD_JOBS} concurrent build job(s) and ${N_TEST_JOBS} concurrent test job(s)."
+echo ""
+
+# First positional argument (if any) specifies the ROCM_INSTALL_DIR
+if [[ -n $1 ]]; then
+    ROCM_INSTALL_DIR=$1
+else
+    if [[ -z "${ROCM_PATH}" ]]; then
+        ROCM_INSTALL_DIR=/opt/rocm-6.0.2
+    else
+        ROCM_INSTALL_DIR=$ROCM_PATH
+    fi
+fi
+
+export PYTHON_BIN_PATH=`which python3`
+PYTHON_VERSION=`python3 -c "import sys;print(f'{sys.version_info.major}.{sys.version_info.minor}')"`
+export TF_PYTHON_VERSION=$PYTHON_VERSION
+export TF_NEED_ROCM=1
+export ROCM_PATH=$ROCM_INSTALL_DIR
+TAGS_FILTER="gpu,requires-gpu-amd,-requires-gpu-nvidia,-no_oss,-oss_excluded,-oss_serial,-no_gpu,-no_rocm"
+UNSUPPORTED_GPU_TAGS="$(echo -requires-gpu-sm{60,70,80,86,89,90}{,-only})"
+TAGS_FILTER="${TAGS_FILTER},${UNSUPPORTED_GPU_TAGS// /,}"
+if [ -f /usertools/rocm.bazelrc ]; then
+        # Use the bazelrc files in /usertools if available
+	if [ ! -d /tf ];then
+           # The bazelrc files in /usertools expect /tf to exist
+           mkdir /tf
+        fi
+
+	bazel \
+        --bazelrc=/usertools/rocm.bazelrc \
+        test \
+        --config=sigbuild_local_cache \
+        --config=rocm \
+        --config=xla_cpp \
+        --build_tag_filters=${TAGS_FILTER} \
+        --test_tag_filters=${TAGS_FILTER} \
+        --keep_going \
+        --test_output=errors \
+        --local_test_jobs=${N_TEST_JOBS} \
+        --test_env=TF_TESTS_PER_GPU=$TF_TESTS_PER_GPU \
+        --test_env=TF_GPU_COUNT=$TF_GPU_COUNT \
+        --repo_env=HERMETIC_PYTHON_VERSION=3.11 \
+        --action_env=XLA_FLAGS=--xla_gpu_force_compilation_parallelism=16 \
+        --action_env=XLA_FLAGS=--xla_gpu_enable_llvm_module_compilation_parallelism=true \
+        --run_under=//tools/ci_build/gpu_build:parallel_gpu_execute \
+        -- //xla/...
+else
+
+    yes "" | $PYTHON_BIN_PATH configure.py
+    bazel \
+        test \
+        -k \
+        --test_tag_filters=-no_oss,-oss_excluded,-oss_serial,gpu,requires-gpu,-no_gpu,-no_rocm --keep_going \
+        --build_tag_filters=-no_oss,-oss_excluded,-oss_serial,gpu,requires-gpu,-no_gpu,-no_rocm \
+        --config=rocm \
+        --test_output=errors \
+        --local_test_jobs=${N_TEST_JOBS} \
+        --test_env=TF_TESTS_PER_GPU=$TF_TESTS_PER_GPU \
+        --test_env=TF_GPU_COUNT=$TF_GPU_COUNT \
+        --repo_env=HERMETIC_PYTHON_VERSION=3.11 \
+        --action_env=XLA_FLAGS=--xla_gpu_force_compilation_parallelism=16 \
+        --action_env=XLA_FLAGS=--xla_gpu_enable_llvm_module_compilation_parallelism=true \
+        --run_under=//tools/ci_build/gpu_build:parallel_gpu_execute \
+        -- //xla/...
+fi
diff --git a/build_tools/sycl/build.sh b/build_tools/sycl/build.sh
@@ -0,0 +1,46 @@
+#!/usr/bin/env bash
+# Copyright 2024 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+#
+#
+# A script to build XLA sycl target.
+#
+# Required input:
+#     workspace: the local to do this buidl
+
+if [ $# -lt 1 ];then
+  echo "Error: workspace not set."
+  exit 1
+fi
+
+workspace=$1
+
+if [ -e ${workspace} ];then
+  time_stamp=$(date +%s%N)
+  echo "Warning: ${workspace} exist."
+  workspace=$workspace/$time_stamp
+  echo "Will use $workspace as new workspace"
+fi
+
+mkdir -p $workspace
+
+xla_path=$workspace/xla
+cd $workspace
+git clone -b yang/ci https://github.com/Intel-tensorflow/xla xla
+bash $xla_path/build_tools/sycl/install_bazel.sh $workspace
+bash $xla_path/build_tools/sycl/install_oneapi.sh $workspace install
+bash $xla_path/build_tools/sycl/build_xla.sh $workspace
+bash $xla_path/build_tools/sycl/clean.sh $workspace
+