@@ -15,7 +15,7 @@ concurrency:
1515jobs :
1616 build :
1717 name : " Build XLA" 
18-  runs-on : linux.4xlarge  
18+  runs-on : linux.12xlarge  
1919 env :
2020 ECR_DOCKER_IMAGE_BASE : 308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/xla_base 
2121 GCR_DOCKER_IMAGE : gcr.io/tpu-pytorch/xla_base:latest 
@@ -58,22 +58,22 @@ jobs:
5858name : Prepare build env 
5959 shell : bash 
6060 run : | 
61-  echo "declare -x SCCACHE_BUCKET=${SCCACHE_BUCKET}" | docker exec -i ${pid} sh -c "cat >> env" 
62-  echo "declare -x CC=clang-8 CXX=clang++-8" | docker exec -i ${pid} sh -c "cat >> xla_env" 
63-  echo "declare -x XLA_USE_XRT=1" | docker exec -i ${pid} sh -c "cat >> xla_env" 
64-  echo "declare -x XLA_CUDA=1" | docker exec -i ${pid} sh -c "cat >> xla_env" 
65-  echo "declare -x BAZEL_REMOTE_CACHE=1" | docker exec -i ${pid} sh -c "cat >> xla_env" 
66-  echo "${GCLOUD_SERVICE_KEY}" | docker exec -i ${pid} sh -c "cat >> default_credentials.json" 
61+  echo "declare -x SCCACHE_BUCKET=${SCCACHE_BUCKET}" | docker exec -i " ${pid}"  sh -c "cat >> env" 
62+  echo "declare -x CC=clang-8 CXX=clang++-8" | docker exec -i " ${pid}"  sh -c "cat >> xla_env" 
63+  echo "declare -x XLA_USE_XRT=1" | docker exec -i " ${pid}"  sh -c "cat >> xla_env" 
64+  echo "declare -x XLA_CUDA=1" | docker exec -i " ${pid}"  sh -c "cat >> xla_env" 
65+  echo "declare -x BAZEL_REMOTE_CACHE=1" | docker exec -i " ${pid}"  sh -c "cat >> xla_env" 
66+  echo "${GCLOUD_SERVICE_KEY}" | docker exec -i " ${pid}"  sh -c "cat >> default_credentials.json" 
6767
6868name : Build 
6969 shell : bash 
7070 run : | 
71-  docker exec -u jenkins ${pid} bash -c ". ~/.bashrc && .circleci/build.sh" 
71+  docker exec -u jenkins " ${pid}"  bash -c ". ~/.bashrc && .circleci/build.sh" 
7272
7373name : Cleanup build env 
7474 shell : bash 
7575 run : | 
76-  docker exec ${pid} rm default_credentials.json /tmp/pytorch/xla/default_credentials.json 
76+  docker exec " ${pid}"  rm default_credentials.json /tmp/pytorch/xla/default_credentials.json 
7777name : Push built docker image to ECR 
7878 shell : bash 
7979 run : | 
8989 needs : build 
9090 strategy :
9191 matrix :
92-  runner : [linux.4x.large , linux.8xlarge.nvidia.gpu] 
92+  runner : [linux.4xlarge , linux.8xlarge.nvidia.gpu] 
9393 runs-on : ${{ matrix.runner }} 
9494 env :
9595 ECR_DOCKER_IMAGE : 308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/xla_base:latest 
@@ -103,13 +103,17 @@ jobs:
103103 instructions : | 
104104 Build is done inside the container, to start an interactive session run: 
105105 docker exec -it $(docker container ps --format '{{.ID}}') bash 
106+ name : Install nvidia driver, nvidia-docker runtime, set GPU_FLAG 
107+  id : install-nvidia-driver 
108+  uses : pytorch/test-infra/.github/actions/setup-nvidia@main 
109+  if : contains(matrix.runner, 'nvidia') 
106110 - name : Download and run docker image from GCR 
107111 shell : bash 
108112 run : | 
109113 export COMMIT_DOCKER_IMAGE="${ECR_DOCKER_IMAGE}-${GITHUB_SHA}" 
110-  echo "DOCKER_IMAGE: " ${COMMIT_DOCKER_IMAGE} 
114+  echo "DOCKER_IMAGE: ${COMMIT_DOCKER_IMAGE}"  
111115 docker pull "${COMMIT_DOCKER_IMAGE}" 
112-  pid=$(docker run - t -d -w $WORKDIR  ${COMMIT_DOCKER_IMAGE}) 
116+  pid=$(docker run ${GPU_FLAG:-} - t -d -w " $WORKDIR" " ${COMMIT_DOCKER_IMAGE}" ) 
113117 echo "${GCLOUD_SERVICE_KEY}" | docker exec -i "${pid}" sh -c "cat >> /tmp/pytorch/xla/default_credentials.json" 
114118 echo "pid=${pid}" >> "${GITHUB_ENV}" 
115119name : Test 
0 commit comments