Build & Publish API snapshot to data.covidactnow.org #4803
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Build & Publish API snapshot to data.covidactnow.org | |
# Use a concurrency group to make sure we don't try to have multiple workflows | |
# run with the hosted runner at the same time. | |
concurrency: gce-runner | |
on: | |
workflow_dispatch: | |
inputs: | |
sentry_environment: | |
description: 'Sentry environment of build. Should be "production" on main build and "staging" for other branches.' | |
required: true | |
pyseir_snapshot: | |
description: 'Optionally download an existing pyseir model artifact from a previous snapshot number instead of generating a new one.' | |
required: false | |
default: "" | |
env: | |
# S3 Bucket (used by s3-sync-action tasks) to store final API snapshot. | |
AWS_S3_BUCKET: 'data.covidactnow.org' | |
# Use plotting by default on CI | |
PYSEIR_PLOT_RESULTS: 'True' | |
# The snapshot ID that identifies all of the API artifacts we're generating and ends | |
# up in the final /snapshot/{id}/ URL. | |
SNAPSHOT_ID: ${{github.run_number}} | |
# Used by execute-model (for now) to optimize parallelization on self-hosted | |
# runner. | |
COVID_MODEL_CORES: 96 | |
# Used by python code that reports errors to sentry. | |
SENTRY_DSN: ${{ secrets.SENTRY_DSN }} | |
# Sets the sentry environment, controlling how alerts are reported. | |
SENTRY_ENVIRONMENT: ${{ github.event.inputs.sentry_environment }} | |
# use a webhook to write to slack channel dev-alerts for QA | |
SLACK_DEV_ALERTS_WEBHOOK: ${{ secrets.SLACK_DEV_ALERTS_WEBHOOK }} | |
# Setting openblas threading to one to speed up numpy in multiprocessing. | |
OPENBLAS_NUM_THREADS: 1 | |
# Optional Snapshot number to use pyseir model output from. An empty string by default | |
PYSEIR_ARTIFACT_SNAPSHOT: ${{ github.event.inputs.pyseir_snapshot }} | |
# The GCE instance to start / stop before / after running the job. | |
GCE_ZONE: "us-west1-b" | |
GCE_INSTANCE: "can-actions-runner" | |
jobs: | |
start-runner: | |
runs-on: ubuntu-latest | |
steps: | |
- id: "auth" | |
uses: "google-github-actions/auth@v1" | |
with: | |
credentials_json: "${{ secrets.GCE_ADMIN_SERVICE_ACCOUNT }}" | |
- name: "Set up Cloud SDK" | |
uses: "google-github-actions/setup-gcloud@v1" | |
- name: "Start ${{env.GCE_INSTANCE}} VM." | |
run: "gcloud compute instances start --zone ${{env.GCE_ZONE}} ${{env.GCE_INSTANCE}}" | |
build-and-publish-snapshot: | |
needs: "start-runner" | |
runs-on: gce-runner | |
steps: | |
- name: Parse covid data model branch name and set env variable | |
run: | | |
echo "COVID_DATA_MODEL_REF=${GITHUB_REF_NAME}" >> $GITHUB_ENV | |
- name: Checkout covid-data-model (${{ env.COVID_DATA_MODEL_REF }}) | |
uses: actions/checkout@v2 | |
with: | |
repository: act-now-coalition/covid-data-model | |
path: covid-data-model | |
lfs: true | |
ref: '${{env.COVID_DATA_MODEL_REF}}' | |
- name: Setup Python | |
uses: actions/setup-python@v2 | |
with: | |
python-version: '3.7.6' | |
architecture: 'x64' | |
- name: Cache Pip | |
uses: actions/cache@v1 | |
with: | |
path: ~/.cache/pip | |
key: ${{ runner.os }}-pip-${{ hashFiles('requirements.txt') }} | |
restore-keys: | | |
${{ runner.os }}-pip- | |
${{ runner.os }}- | |
# If you don't manually install the correct version of numpy before | |
# installing modules from requirements.txt, fastparquet may try to install a | |
# newer version of numpy that requires python 3.8 (but we use 3.7). | |
# See https://github.com/dask/fastparquet/issues/632#issuecomment-877871388 | |
- run: pip install numpy==1.21.4 | |
working-directory: ./covid-data-model | |
- name: Install Dependencies | |
working-directory: ./covid-data-model | |
run: pip install -r requirements.txt | |
- name: Pull git lfs | |
working-directory: ./covid-data-model | |
run: git lfs pull | |
- name: Build Model Results (run.sh .. .. execute_model) | |
env: | |
GITHUB_TOKEN: ${{ secrets.CAN_ROBOT_PERSONAL_ACCESS_TOKEN }} | |
run: | | |
./covid-data-model/run.sh /data/api-results-${{env.SNAPSHOT_ID}} execute_model ${{env.PYSEIR_ARTIFACT_SNAPSHOT}} | |
- name: Zip Model Results (run.sh .. .. execute_zip_folder) | |
run: ./covid-data-model/run.sh /data/api-results-${{env.SNAPSHOT_ID}} execute_zip_folder | |
- name: Upload Raw Data QA and Model Results | |
uses: actions/upload-artifact@v2-preview | |
with: | |
name: model-results-${{env.SNAPSHOT_ID}} | |
path: /data/api-results-${{env.SNAPSHOT_ID}}/api-results.zip | |
- name: Build API (run.sh .. .. execute_api_v2) | |
run: | | |
./covid-data-model/run.sh /data/api-results-${{env.SNAPSHOT_ID}} execute_api_v2 | |
- name: make and copy to local tmp directory | |
run: | | |
mkdir -p ./tmp/data/ | |
cp -r /data/api-results-${{env.SNAPSHOT_ID}}/ ./tmp/data/ | |
- name: Deploy Artifacts to S3 (https://data.covidactnow.org/snapshot/${{env.SNAPSHOT_ID}}/). | |
uses: jakejarvis/s3-sync-action@master | |
with: | |
args: --acl public-read --follow-symlinks | |
env: | |
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} | |
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} | |
SOURCE_DIR: './tmp/data/api-results-${{env.SNAPSHOT_ID}}/' | |
DEST_DIR: 'snapshot/${{env.SNAPSHOT_ID}}/' | |
- name: remove local tmp directory and local data build | |
run: | | |
rm -rf ./tmp/data/ | |
# /data is a persistent volume on our build machines that does not get | |
# automatically cleaned up. | |
rm -r /data/api-results-${{env.SNAPSHOT_ID}} | |
- name: Trigger website PR generation if main branch build | |
env: | |
GITHUB_TOKEN: ${{ secrets.CAN_ROBOT_PERSONAL_ACCESS_TOKEN }} | |
run: ./covid-data-model/tools/maybe-trigger-web-snapshot-update.sh ${{env.SNAPSHOT_ID}} ${{env.COVID_DATA_MODEL_REF}} | |
- name: Trigger Label API if main branch build | |
env: | |
GITHUB_TOKEN: ${{ secrets.CAN_ROBOT_PERSONAL_ACCESS_TOKEN }} | |
run: ./covid-data-model/tools/maybe-trigger-label-api.sh ${{env.SNAPSHOT_ID}} ${{env.COVID_DATA_MODEL_REF}} | |
stop-runner: | |
if: ${{ always() }} | |
needs: ["start-runner", "build-and-publish-snapshot"] | |
runs-on: ubuntu-latest | |
steps: | |
- id: "auth" | |
uses: "google-github-actions/auth@v1" | |
with: | |
credentials_json: "${{ secrets.GCE_ADMIN_SERVICE_ACCOUNT }}" | |
- name: "Set up Cloud SDK" | |
uses: "google-github-actions/setup-gcloud@v1" | |
- name: "Stop ${{env.GCE_INSTANCE}} VM." | |
run: "gcloud compute instances stop --zone ${{env.GCE_ZONE}} ${{env.GCE_INSTANCE}}" |