-
Notifications
You must be signed in to change notification settings - Fork 0
/
populations.py
73 lines (60 loc) · 2.3 KB
/
populations.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
from typing import Optional
from pathogen_properties import Population, prevalence_data_filename
location_populations: list[tuple[str, dict[int, int]]] = []
def load_location_populations():
if location_populations:
return
# Downloaded 2023-05-11 from
# https://www2.census.gov/programs-surveys/popest/tables/2020-2022/counties/totals/co-est2022-pop.xlsx
with open(prevalence_data_filename("Census-co-est2022-pop.tsv")) as inf:
for line in inf:
bits = line.strip().split("\t")
if len(bits) != 5:
continue
location = bits[0]
counts = {
2020: int(bits[2].replace(",", "")),
2021: int(bits[3].replace(",", "")),
2022: int(bits[4].replace(",", "")),
}
location_populations.append((location, counts))
def us_population(
year: int, county: Optional[str] = None, state: Optional[str] = None
) -> Population:
if year not in [2020, 2021, 2022]:
raise Exception("Unsupported year: %s" % year)
total_people = 0
# All estimates are July 1st, specifically.
pop_date = "%s-07-01" % year
source = "https://www.census.gov/data/tables/time-series/demo/popest/2020s-counties-total.html"
load_location_populations()
for location, counts in location_populations:
people = counts[year]
if not county and not state and location == "United States":
return Population(
people=people,
source=source,
date=pop_date,
country="United States",
)
if location == ".%s, %s" % (county, state):
return Population(
people=people,
source=source,
date=pop_date,
country="United States",
state=state,
county=county,
)
if not county and location.endswith(", %s" % state):
total_people += people
if total_people == 0:
raise Exception("county=%r, state=%r not found" % (county, state))
# The only case where we total up is to get state populations.
return Population(
people=total_people,
source=source,
date=pop_date,
country="United States",
state=state,
)