Skip to content

Commit

Permalink
wip: add data details
Browse files Browse the repository at this point in the history
  • Loading branch information
jaanphare committed Apr 7, 2024
1 parent ce4bfd4 commit 03fd70f
Show file tree
Hide file tree
Showing 33 changed files with 103 additions and 12 deletions.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
6 changes: 6 additions & 0 deletions data_processing/models/bls.gov/consumer_price_index.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
{{ config(materialized='external', location=var('output_path') + '/' + this.name + '.parquet') }}

SELECT
YEAR AS year,
"AVG" AS consumer_price_index
FROM {{ ref('download_consumer_price_index') }}
13 changes: 13 additions & 0 deletions data_processing/models/bls.gov/download_consumer_price_index.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
import pandas as pd
import os
import requests

def model(dbt, session):
# URL to the Excel file containing the Consumer Price Index data
output_path = dbt.config.get('output_path')
base_path = os.path.expanduser(output_path)
excel_path = os.path.join(base_path, "r-cpi-u-rs-allitems.xlsx")
# cpi_url = "https://www.bls.gov/cpi/research-series/r-cpi-u-rs-allitems.xlsx"
# download and save to output path as r-cpi-u-rs-allitems.xlsx
consumer_price_index_df = pd.read_excel(excel_path, skiprows=5, usecols=['YEAR', 'AVG'])
return consumer_price_index_df
11 changes: 11 additions & 0 deletions data_processing/models/config.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
version: 2

models:
- name: download_consumer_price_index
config:
data_path: "{{ var('data_path') }}"
output_path: "{{ var('output_path') }}"
- name: ahrq.gov
config:
data_path: "{{ var('data_path') }}"
output_path: "{{ var('output_path') }}"
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
{{ config(materialized='external', location=var('output_path') + '/' + this.name + '.parquet') }}

WITH cpi_adjustment AS (
SELECT
year,
consumer_price_index
FROM {{ ref('consumer_price_index') }}
),
latest_cpi AS (
SELECT
MAX(consumer_price_index) AS cpi_2022
FROM cpi_adjustment
WHERE year = 2022
),
inflation_adjustment_factors AS (
SELECT
2016 AS year,
(lc.cpi_2022 / ca.consumer_price_index) AS adjustment_factor_to_2022
FROM cpi_adjustment ca
CROSS JOIN latest_cpi lc
WHERE ca.year = 2016
),
commercial_data AS (
SELECT
PLAN_PMT_AMT * iaf.adjustment_factor_to_2022 AS Payment,
COUNT(*) AS count,
'Commercial' AS Insurance
FROM read_parquet('/Users/me/data/syh_dr/syhdr_commercial_inpatient_2016.parquet') cd
JOIN inflation_adjustment_factors iaf ON 1 = 1
GROUP BY PLAN_PMT_AMT, iaf.adjustment_factor_to_2022
),
medicaid_data AS (
SELECT
PLAN_PMT_AMT * iaf.adjustment_factor_to_2022 AS Payment,
COUNT(*) AS count,
'Medicaid' AS Insurance
FROM read_parquet('/Users/me/data/syh_dr/syhdr_medicaid_inpatient_2016.parquet') md
JOIN inflation_adjustment_factors iaf ON 1 = 1
GROUP BY PLAN_PMT_AMT, iaf.adjustment_factor_to_2022
),
medicare_data AS (
SELECT
PLAN_PMT_AMT * iaf.adjustment_factor_to_2022 AS Payment,
COUNT(*) AS count,
'Medicare' AS Insurance
FROM read_parquet('/Users/me/data/syh_dr/syhdr_medicare_inpatient_2016.parquet') mcd
JOIN inflation_adjustment_factors iaf ON 1 = 1
GROUP BY PLAN_PMT_AMT, iaf.adjustment_factor_to_2022
),
combined_data AS (
SELECT * FROM commercial_data
UNION ALL
SELECT * FROM medicaid_data
UNION ALL
SELECT * FROM medicare_data
)
SELECT
Payment,
count,
Insurance
FROM combined_data
ORDER BY Insurance, Payment
File renamed without changes.
File renamed without changes.
File renamed without changes.
Binary file modified docs/data/insurance_plan_payment_histogram.parquet
Binary file not shown.
8 changes: 4 additions & 4 deletions docs/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ function paymentChart(paymentData, width) {
return Plot.plot({
width,
marginLeft: 60,
x: { type: "log", domain: [1, 1000000] }, // Set the domain of the x-axis to be fixed between 1 and 1,000,000
x: { type: "log", domain: [100, 300_000] }, // Set the domain of the x-axis to be fixed between 1 and 1,000,000
y: { axis: null }, // Hide the y-axis
color: { legend: "swatches", columns: 1, domain: orderInsurance },
marks: [
Expand All @@ -93,9 +93,9 @@ function paymentChart(paymentData, width) {
fill: "Insurance",
order: orderInsurance,
thresholds: d3
.ticks(Math.log10(1), Math.log10(1000000), 40)
.map((d) => +(10 ** d).toPrecision(3)),
tip: true,
.ticks(Math.log10(1), Math.log10(1000000), 90)
.map((d) => 10 ** d),
tip: { format: { x: ",.3r" } }
}
)
),
Expand Down
7 changes: 0 additions & 7 deletions healthcare_data/models/config.yml

This file was deleted.

4 changes: 3 additions & 1 deletion requirements.in
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,6 @@ ipykernel
jupysql
pip-tools
duckdb-engine
pdfplumber
pdfplumber
openpyxl
pip-tools
4 changes: 4 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,8 @@ duckdb==0.10.1
# duckdb-engine
duckdb-engine==0.11.2
# via -r requirements.in
et-xmlfile==1.1.0
# via openpyxl
executing==2.0.1
# via stack-data
fonttools==4.51.0
Expand Down Expand Up @@ -165,6 +167,8 @@ numpy==1.26.4
# pandas
# pyarrow
# seaborn
openpyxl==3.1.2
# via -r requirements.in
packaging==24.0
# via
# build
Expand Down

0 comments on commit 03fd70f

Please sign in to comment.