-
Notifications
You must be signed in to change notification settings - Fork 1
/
00-get-data-raw.R
101 lines (75 loc) · 2.33 KB
/
00-get-data-raw.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
# Title: Get raw data used in project and add to "data-raw" folder
# Description:
# - download raw data if accessible (use option to run)
# - keep copy of data sources in "data-raw" folder (not in git repository)
# - provide information about manual download source for ESS data
# - provide information about used data files (file name and hash)
library(conflicted)
library(tidyverse)
conflicts_prefer(dplyr::filter, .quiet = TRUE)
library(glue)
# option to download accessible data files
# (set default 'FALSE' to avoid re-downloading in run all script)
DOWNLOAD_AGAIN <- FALSE
# create data raw folder if not exists
fs::dir_create("data-raw/")
## Party Facts ----
if (DOWNLOAD_AGAIN) {
url <- "https://partyfacts.herokuapp.com/download/"
map(
c("core-parties-csv", "external-parties-csv"),
\(.fi) download.file(glue("{url}{.fi}/"), glue("data-raw/pf-{.fi}.csv"))
)
download.file(
"https://raw.githubusercontent.com/hdigital/partyfactsdata/main/import/essprtv/02-ess-harmonize.csv",
"data-raw/pf-essprtv.csv"
)
}
## ESS ----
# manual download of ESS Rounds 1–10 from ESS Data Portal needed
# https://ess-search.nsd.no/
doi_url <- "https://doi.org"
doi_prefix <- "10.21338"
doi_suffix <-
c(
"ess1e06_6",
"ess2e03_6",
"ess3e03_7",
"ess4e04_5",
"ess5e03_4",
"ess7e02_2",
"ess8e02_2",
"ess9e03_1",
"ess10e03_1",
"ess10sce03_0"
)
print("Download ESS data files manually with DOI url")
map_chr(doi_suffix, \(.x) glue("{doi_url}/{doi_prefix}/{.x}"))
glue("{doi_url}/10.18712/ess6e02_5")
## CHES ----
if (DOWNLOAD_AGAIN) {
url <- "https://www.chesdata.eu/s/"
ches_dataset <- "1999-2019_CHES_dataset_meansv3-td4m.dta"
ches_codebook <- "1999-2019_CHES_codebook-yj99.pdf"
map(
ches_dataset, ches_codebook,
\(.fi) download.file(glue("{url}{.fi}/"), glue("data-raw/ches-{.fi}"))
)
}
## ParlGov ----
if (DOWNLOAD_AGAIN) {
download.file(
"https://parlgov.org/data/parlgov-development_csv-utf-8/view_cabinet.csv",
"data-raw/parlgov_view_cabinet.csv"
)
}
## Data files index ----
data_files <-
tibble(data_file = fs::dir_ls("data-raw/", regexp = "(.+)(\\.)(csv|dta)")) |>
mutate(
size = fs::file_size(data_file),
hash = rlang::hash_file(data_file)
) |>
arrange(data_file)
fs::dir_create("data/")
write_csv(data_files, "data/00-data-raw_files.csv")