-
Notifications
You must be signed in to change notification settings - Fork 0
/
get_common_names.py
121 lines (80 loc) · 3.64 KB
/
get_common_names.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
'''Scrape species pages'''
import argparse, csv, json, os, re, requests, time
from datetime import date
import wikipediaapi as wiki
import utils.setup as setup
import utils.csv as utils_csv
def get_iucn_common_name(taxon, iucn_url) -> list:
'''get a list of dicts '''
common_names_result = None
# print(iucn_url)
iucn_response = requests.get(iucn_url)
# print(iucn_response.status_code)
if iucn_response.status_code < 300:
# is not None and 'result' in iucn_response:
if iucn_response.text is not None:
common_names_raw = iucn_response.text
print(common_names_raw)
common_names = json.loads(common_names_raw)
if 'result' in common_names:
common_names_result = common_names['result']
taxon_common_names = {'taxon':taxon, 'common_names': common_names_result}
# print(common_names)
time.sleep(3)
return taxon_common_names
# def get_wikipedia_common_names(taxon, wiki_user_agent) -> list:
# '''
# Get a list of dicts {'common':<retrieved_name>} from the Wikipedia API
# For different languages, may be able to point o en/es/fr/etc urls
# '''
# if taxon is None:
# taxon = 'Myotis morrisi'
# wiki_api_url_en = 'http://en.wikipedia.org/w/api.php?action=query&prop=extracts&titles='
# wiki_api_suffix = '&explaintext=1&exsectionformat=wiki'
# wiki_api_url = wiki_api_url_en + taxon + wiki_api_suffix
# # wiki_html = wiki.Wikipedia(wiki_user_agent,
# # 'en',
# # extract_format=wiki.ExtractFormat.HTML)
# r = requests.get(url=wiki_api_url, headers={'User-Agen':wiki_user_agent})
# # css_select = '.infobox > tbody:nth-child(1) > tr:nth-child(1) > th:nth-child(1)'
def main():
'''main function'''
config = setup.get_config()
# Set up command line arguments
parser = argparse.ArgumentParser()
parser.add_argument("input_csv", help="input CSV that includes a 'taxon' column that contains scientific names")
parser.add_argument("output_path", help="Output path (with trailing '/')")
args = parser.parse_args()
# setup IUCN API call for common names
iucn_base_url = config['IUCN_BASE_URL']
iucn_token = config['IUCN_TOKEN']
# # setup Wikipedia API call
# wiki_user_agent = config['WIKI_USER_AGENT']
taxon_list = utils_csv.rows(args.input_csv)
# # Test small batch
# taxon_list = taxon_list[0:2]
common_name_list = []
# Get Common Names
for row in taxon_list:
taxon = row['taxon']
if taxon is not None and len(taxon) > 0:
species_url = f"{iucn_base_url}/api/v3/species/common_names/{taxon}?token={iucn_token}"
taxon_common_name = get_iucn_common_name(taxon, species_url)
time.sleep(0.5)
common_name_list.append(taxon_common_name)
# Check if dir exists, and if not, make it
output_path = args.output_path
if not os.path.isdir(output_path):
os.makedirs(output_path)
date_suffix = re.sub(r'\-|\s*|\:|\..*', '', str(date.today()))
with open(f"{output_path}taxon_common_names_{date_suffix}.csv", encoding='utf-8', mode='w') as all_names:
col_names = list(common_name_list[0].keys())
print(col_names)
write = csv.DictWriter(f=all_names, fieldnames=col_names)
write.writeheader()
write.writerows(common_name_list)
f = open(f"{output_path}taxon_common_names_{date_suffix}.json", 'w', encoding='utf-8')
f.write(json.dumps(common_name_list, indent=True, ensure_ascii=False))
f.close()
if __name__ == '__main__':
main()