Skip to content

Build & Publish API snapshot to data.covidactnow.org #4803

Build & Publish API snapshot to data.covidactnow.org

Build & Publish API snapshot to data.covidactnow.org #4803

Workflow file for this run

name: Build & Publish API snapshot to data.covidactnow.org
# Use a concurrency group to make sure we don't try to have multiple workflows
# run with the hosted runner at the same time.
concurrency: gce-runner
on:
workflow_dispatch:
inputs:
sentry_environment:
description: 'Sentry environment of build. Should be "production" on main build and "staging" for other branches.'
required: true
pyseir_snapshot:
description: 'Optionally download an existing pyseir model artifact from a previous snapshot number instead of generating a new one.'
required: false
default: ""
env:
# S3 Bucket (used by s3-sync-action tasks) to store final API snapshot.
AWS_S3_BUCKET: 'data.covidactnow.org'
# Use plotting by default on CI
PYSEIR_PLOT_RESULTS: 'True'
# The snapshot ID that identifies all of the API artifacts we're generating and ends
# up in the final /snapshot/{id}/ URL.
SNAPSHOT_ID: ${{github.run_number}}
# Used by execute-model (for now) to optimize parallelization on self-hosted
# runner.
COVID_MODEL_CORES: 96
# Used by python code that reports errors to sentry.
SENTRY_DSN: ${{ secrets.SENTRY_DSN }}
# Sets the sentry environment, controlling how alerts are reported.
SENTRY_ENVIRONMENT: ${{ github.event.inputs.sentry_environment }}
# use a webhook to write to slack channel dev-alerts for QA
SLACK_DEV_ALERTS_WEBHOOK: ${{ secrets.SLACK_DEV_ALERTS_WEBHOOK }}
# Setting openblas threading to one to speed up numpy in multiprocessing.
OPENBLAS_NUM_THREADS: 1
# Optional Snapshot number to use pyseir model output from. An empty string by default
PYSEIR_ARTIFACT_SNAPSHOT: ${{ github.event.inputs.pyseir_snapshot }}
# The GCE instance to start / stop before / after running the job.
GCE_ZONE: "us-west1-b"
GCE_INSTANCE: "can-actions-runner"
jobs:
start-runner:
runs-on: ubuntu-latest
steps:
- id: "auth"
uses: "google-github-actions/auth@v1"
with:
credentials_json: "${{ secrets.GCE_ADMIN_SERVICE_ACCOUNT }}"
- name: "Set up Cloud SDK"
uses: "google-github-actions/setup-gcloud@v1"
- name: "Start ${{env.GCE_INSTANCE}} VM."
run: "gcloud compute instances start --zone ${{env.GCE_ZONE}} ${{env.GCE_INSTANCE}}"
build-and-publish-snapshot:
needs: "start-runner"
runs-on: gce-runner
steps:
- name: Parse covid data model branch name and set env variable
run: |
echo "COVID_DATA_MODEL_REF=${GITHUB_REF_NAME}" >> $GITHUB_ENV
- name: Checkout covid-data-model (${{ env.COVID_DATA_MODEL_REF }})
uses: actions/checkout@v2
with:
repository: act-now-coalition/covid-data-model
path: covid-data-model
lfs: true
ref: '${{env.COVID_DATA_MODEL_REF}}'
- name: Setup Python
uses: actions/setup-python@v2
with:
python-version: '3.7.6'
architecture: 'x64'
- name: Cache Pip
uses: actions/cache@v1
with:
path: ~/.cache/pip
key: ${{ runner.os }}-pip-${{ hashFiles('requirements.txt') }}
restore-keys: |
${{ runner.os }}-pip-
${{ runner.os }}-
# If you don't manually install the correct version of numpy before
# installing modules from requirements.txt, fastparquet may try to install a
# newer version of numpy that requires python 3.8 (but we use 3.7).
# See https://github.com/dask/fastparquet/issues/632#issuecomment-877871388
- run: pip install numpy==1.21.4
working-directory: ./covid-data-model
- name: Install Dependencies
working-directory: ./covid-data-model
run: pip install -r requirements.txt
- name: Pull git lfs
working-directory: ./covid-data-model
run: git lfs pull
- name: Build Model Results (run.sh .. .. execute_model)
env:
GITHUB_TOKEN: ${{ secrets.CAN_ROBOT_PERSONAL_ACCESS_TOKEN }}
run: |
./covid-data-model/run.sh /data/api-results-${{env.SNAPSHOT_ID}} execute_model ${{env.PYSEIR_ARTIFACT_SNAPSHOT}}
- name: Zip Model Results (run.sh .. .. execute_zip_folder)
run: ./covid-data-model/run.sh /data/api-results-${{env.SNAPSHOT_ID}} execute_zip_folder
- name: Upload Raw Data QA and Model Results
uses: actions/upload-artifact@v2-preview
with:
name: model-results-${{env.SNAPSHOT_ID}}
path: /data/api-results-${{env.SNAPSHOT_ID}}/api-results.zip
- name: Build API (run.sh .. .. execute_api_v2)
run: |
./covid-data-model/run.sh /data/api-results-${{env.SNAPSHOT_ID}} execute_api_v2
- name: make and copy to local tmp directory
run: |
mkdir -p ./tmp/data/
cp -r /data/api-results-${{env.SNAPSHOT_ID}}/ ./tmp/data/
- name: Deploy Artifacts to S3 (https://data.covidactnow.org/snapshot/${{env.SNAPSHOT_ID}}/).
uses: jakejarvis/s3-sync-action@master
with:
args: --acl public-read --follow-symlinks
env:
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
SOURCE_DIR: './tmp/data/api-results-${{env.SNAPSHOT_ID}}/'
DEST_DIR: 'snapshot/${{env.SNAPSHOT_ID}}/'
- name: remove local tmp directory and local data build
run: |
rm -rf ./tmp/data/
# /data is a persistent volume on our build machines that does not get
# automatically cleaned up.
rm -r /data/api-results-${{env.SNAPSHOT_ID}}
- name: Trigger website PR generation if main branch build
env:
GITHUB_TOKEN: ${{ secrets.CAN_ROBOT_PERSONAL_ACCESS_TOKEN }}
run: ./covid-data-model/tools/maybe-trigger-web-snapshot-update.sh ${{env.SNAPSHOT_ID}} ${{env.COVID_DATA_MODEL_REF}}
- name: Trigger Label API if main branch build
env:
GITHUB_TOKEN: ${{ secrets.CAN_ROBOT_PERSONAL_ACCESS_TOKEN }}
run: ./covid-data-model/tools/maybe-trigger-label-api.sh ${{env.SNAPSHOT_ID}} ${{env.COVID_DATA_MODEL_REF}}
stop-runner:
if: ${{ always() }}
needs: ["start-runner", "build-and-publish-snapshot"]
runs-on: ubuntu-latest
steps:
- id: "auth"
uses: "google-github-actions/auth@v1"
with:
credentials_json: "${{ secrets.GCE_ADMIN_SERVICE_ACCOUNT }}"
- name: "Set up Cloud SDK"
uses: "google-github-actions/setup-gcloud@v1"
- name: "Stop ${{env.GCE_INSTANCE}} VM."
run: "gcloud compute instances stop --zone ${{env.GCE_ZONE}} ${{env.GCE_INSTANCE}}"