-
Notifications
You must be signed in to change notification settings - Fork 0
/
build.sh
116 lines (90 loc) · 3.01 KB
/
build.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
# exit on the first error found
set -o errexit
echo "Set Source Code Directory"
UNUM_SRC_DIR=$(pwd)
echo "Set Source Code Directory to $UNUM_SRC_DIR"
echo "Installing sudo"
apt-get install sudo
echo "Install Other System Dependencies"
sudo apt-get install -y build-essential git sudo unzip vim zip
echo "Update Package Lists"
sudo apt-get update -qq
echo "search for PostGIS packages"
apt-cache search postgis
echo "Install Database Dependencies"
sudo apt-get install -y postgresql postgresql-contrib postgresql-server-dev-all '^postgresql-[1-9][0-9](.[0-9])*-postgis-[0-9](.[0-9])*$'
echo "Restart PostgreSQL"
sudo service postgresql restart
echo "Create Postgresql USER"
sudo -u postgres psql -c "CREATE ROLE $(whoami) CREATEDB LOGIN SUPERUSER"
sudo -u postgres psql -c "CREATE DATABASE unum;"
echo "Download Safecast"
git clone https://github.com/DanielJDufour/safecast /tmp/safecast
echo "Install Safecast"
cd /tmp/safecast && make install
echo "Initialize DB Extensions"
psql -f ${UNUM_SRC_DIR}/sql_scripts/create_extensions.sql unum;
echo "Create PSQL Utils"
for file in ${UNUM_SRC_DIR}/sql_scripts/1-utils/*; do
echo "RUNNING $file";
psql -f $file unum;
done
echo "Create Custom Number Conversion Functions"
for file in ${UNUM_SRC_DIR}/sql_scripts/2-convert/*; do
echo "RUNNING $file";
psql -f $file unum;
done
echo "Create gazetteers Directory"
if [ ! -d "/tmp/gazetteers" ] ; then
mkdir /tmp/gazetteers;
fi;
echo "Download Wikidata Gazetteer"
if [ ! -f "/tmp/gazetteers/wikidata-gazetteer.tsv" ] ; then
cd /tmp/gazetteers;
wget --quiet https://s3.amazonaws.com/firstdraftgis/wikidata-gazetteer.tsv.zip;
unzip wikidata-gazetteer.tsv.zip;
rm wikidata-gazetteer.tsv.zip;
fi;
echo "Download OSMNames"
if [ ! -f "/tmp/gazetteers/planet-latest_geonames.tsv" ] ; then
cd /tmp/gazetteers;
wget --quiet https://github.com/OSMNames/OSMNames/releases/download/v2.0.3/planet-latest_geonames.tsv.gz;
gunzip planet-latest_geonames.tsv.gz;
fi;
echo "Download GeoNames"
if [ ! -f "/tmp/gazetteers/allCountries.txt" ] ; then
cd /tmp/gazetteers;
wget --quiet http://download.geonames.org/export/dump/allCountries.zip;
unzip allCountries.zip;
rm allCountries.zip;
fi;
echo "Copy Over Temp Files"
cp ${UNUM_SRC_DIR}/data/* /tmp/.
echo "Load Gazetteers"
for file in ${UNUM_SRC_DIR}/sql_scripts/5-load/*; do
echo "RUNNING $file";
psql -f $file unum;
done
echo "Re-format Data from Gazetteers into Standard Format"
for file in ${UNUM_SRC_DIR}/sql_scripts/10-conform/*; do
echo "RUNNING $file";
psql -f $file unum;
done
echo "Conflate"
for file in ${UNUM_SRC_DIR}/sql_scripts/20-conflate/*; do
echo "RUNNING $file";
psql -f $file unum;
done
echo "Export"
for file in ${UNUM_SRC_DIR}/sql_scripts/30-export/*; do
echo "RUNNING $file";
psql -f $file unum;
done
echo "Sample"
head -1 /tmp/unum.tsv > /tmp/unum_sample.tsv && time shuf -n 10000 /tmp/unum.tsv >> /tmp/unum_sample.tsv
echo "Install Pandas"
pip install pandas --upgrade
echo "Test"
cd $UNUM_SRC_DIR && python3 test.py
echo "Zip"
cd /tmp && zip -r unum.tsv.zip unum.tsv