Skip to content

Commit 3a941d1

Browse files
committed
Setup NVIDIA driver
1 parent c6ebd4e commit 3a941d1

File tree

1 file changed

+16
-12
lines changed

1 file changed

+16
-12
lines changed

.github/workflows/test.yml

Lines changed: 16 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ concurrency:
1515
jobs:
1616
build:
1717
name: "Build XLA"
18-
runs-on: linux.4xlarge
18+
runs-on: linux.12xlarge
1919
env:
2020
ECR_DOCKER_IMAGE_BASE: 308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/xla_base
2121
GCR_DOCKER_IMAGE: gcr.io/tpu-pytorch/xla_base:latest
@@ -58,22 +58,22 @@ jobs:
5858
- name: Prepare build env
5959
shell: bash
6060
run: |
61-
echo "declare -x SCCACHE_BUCKET=${SCCACHE_BUCKET}" | docker exec -i ${pid} sh -c "cat >> env"
62-
echo "declare -x CC=clang-8 CXX=clang++-8" | docker exec -i ${pid} sh -c "cat >> xla_env"
63-
echo "declare -x XLA_USE_XRT=1" | docker exec -i ${pid} sh -c "cat >> xla_env"
64-
echo "declare -x XLA_CUDA=1" | docker exec -i ${pid} sh -c "cat >> xla_env"
65-
echo "declare -x BAZEL_REMOTE_CACHE=1" | docker exec -i ${pid} sh -c "cat >> xla_env"
66-
echo "${GCLOUD_SERVICE_KEY}" | docker exec -i ${pid} sh -c "cat >> default_credentials.json"
61+
echo "declare -x SCCACHE_BUCKET=${SCCACHE_BUCKET}" | docker exec -i "${pid}" sh -c "cat >> env"
62+
echo "declare -x CC=clang-8 CXX=clang++-8" | docker exec -i "${pid}" sh -c "cat >> xla_env"
63+
echo "declare -x XLA_USE_XRT=1" | docker exec -i "${pid}" sh -c "cat >> xla_env"
64+
echo "declare -x XLA_CUDA=1" | docker exec -i "${pid}" sh -c "cat >> xla_env"
65+
echo "declare -x BAZEL_REMOTE_CACHE=1" | docker exec -i "${pid}" sh -c "cat >> xla_env"
66+
echo "${GCLOUD_SERVICE_KEY}" | docker exec -i "${pid}" sh -c "cat >> default_credentials.json"
6767
6868
- name: Build
6969
shell: bash
7070
run: |
71-
docker exec -u jenkins ${pid} bash -c ". ~/.bashrc && .circleci/build.sh"
71+
docker exec -u jenkins "${pid}" bash -c ". ~/.bashrc && .circleci/build.sh"
7272
7373
- name: Cleanup build env
7474
shell: bash
7575
run: |
76-
docker exec ${pid} rm default_credentials.json /tmp/pytorch/xla/default_credentials.json
76+
docker exec "${pid}" rm default_credentials.json /tmp/pytorch/xla/default_credentials.json
7777
- name: Push built docker image to ECR
7878
shell: bash
7979
run: |
@@ -89,7 +89,7 @@ jobs:
8989
needs: build
9090
strategy:
9191
matrix:
92-
runner: [linux.4x.large, linux.8xlarge.nvidia.gpu]
92+
runner: [linux.4xlarge, linux.8xlarge.nvidia.gpu]
9393
runs-on: ${{ matrix.runner }}
9494
env:
9595
ECR_DOCKER_IMAGE: 308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/xla_base:latest
@@ -103,13 +103,17 @@ jobs:
103103
instructions: |
104104
Build is done inside the container, to start an interactive session run:
105105
docker exec -it $(docker container ps --format '{{.ID}}') bash
106+
- name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG
107+
id: install-nvidia-driver
108+
uses: pytorch/test-infra/.github/actions/setup-nvidia@main
109+
if: contains(matrix.runner, 'nvidia')
106110
- name: Download and run docker image from GCR
107111
shell: bash
108112
run: |
109113
export COMMIT_DOCKER_IMAGE="${ECR_DOCKER_IMAGE}-${GITHUB_SHA}"
110-
echo "DOCKER_IMAGE: "${COMMIT_DOCKER_IMAGE}
114+
echo "DOCKER_IMAGE: ${COMMIT_DOCKER_IMAGE}"
111115
docker pull "${COMMIT_DOCKER_IMAGE}"
112-
pid=$(docker run -t -d -w $WORKDIR ${COMMIT_DOCKER_IMAGE})
116+
pid=$(docker run ${GPU_FLAG:-} -t -d -w "$WORKDIR" "${COMMIT_DOCKER_IMAGE}")
113117
echo "${GCLOUD_SERVICE_KEY}" | docker exec -i "${pid}" sh -c "cat >> /tmp/pytorch/xla/default_credentials.json"
114118
echo "pid=${pid}" >> "${GITHUB_ENV}"
115119
- name: Test

0 commit comments

Comments
 (0)