Skip to content

Commit

Permalink
add feature to import from Google Spreadhsheet and download entries (c…
Browse files Browse the repository at this point in the history
…loses #152)
  • Loading branch information
slaporte committed Sep 24, 2019
1 parent 68f64ca commit 924c6a0
Show file tree
Hide file tree
Showing 9 changed files with 123 additions and 34 deletions.
2 changes: 1 addition & 1 deletion client/package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "montage",
"version": "19.09.22",
"version": "19.09.23",
"description": "Photo evaluation tool for and by Wiki Loves competitions",
"main": "src/app.module.js",
"repository": "https://github.com/hatnote/montage",
Expand Down
8 changes: 4 additions & 4 deletions client/src/components/campaign/round-new/round-new.html
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ <h4 ng-if="$ctrl.index === 1">Source</h4>
ng-model="$ctrl.import.import_method"
ng-if="$ctrl.index === 1">
<md-radio-button value="category">Category on Wikimedia Commons</md-radio-button>
<md-radio-button value="gistcsv">File List URL</md-radio-button>
<md-radio-button value="csv">File List URL</md-radio-button>
<md-radio-button value="selected">File List</md-radio-button>

<div ng-if="$ctrl.import.import_method === 'category'">
Expand All @@ -97,12 +97,12 @@ <h4 ng-if="$ctrl.index === 1">Source</h4>

<div
layout="column" layout-align="start stretch"
ng-if="$ctrl.import.import_method === 'gistcsv'">
<p class="muted">List of files saved as CSV and uploaded somewhere on the internet.</p>
ng-if="$ctrl.import.import_method === 'csv'">
<p class="muted">List of files saved as CSV and uploaded as a Google Sheet or Gist.</p>
<md-input-container class="no-margin-bottom">
<label>Enter URL</label>
<input type="text" autofocus
ng-model="$ctrl.import.gist_url"
ng-model="$ctrl.import.csv_url"
ng-required="true">
</md-input-container>
</div>
Expand Down
13 changes: 11 additions & 2 deletions client/src/components/campaign/round/round.component.js
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ function controller($window, adminService, alertService) {

vm.activateRound = activateRound;
vm.downloadResults = downloadResults;
vm.downloadEntries = downloadEntries;
vm.finalizeRound = finalizeRound;
vm.pauseRound = pauseRound;
vm.populateRound = populateRound;
Expand Down Expand Up @@ -53,6 +54,14 @@ function controller($window, adminService, alertService) {
$window.open(url);
}

/**
*
*/
function downloadEntries() {
const url = adminService.downloadEntries(vm.round.id);
$window.open(url);
}

/**
*
*/
Expand Down Expand Up @@ -141,8 +150,8 @@ function controller($window, adminService, alertService) {
function populateRound() {
adminService
.populateRound(vm.round.id, {
import_method: 'gistcsv',
gist_url:
import_method: 'csv',
csv_url:
'https://gist.githubusercontent.com/yarl/bc4b89847f9ced089f7169bbfec79841/raw/c8bd23d3b354ce9d20de578245e4dc7c9f095fb0/wlm2015_fr_5.csv',
})
.catch(alertService.error);
Expand Down
5 changes: 5 additions & 0 deletions client/src/components/campaign/round/round.html
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,11 @@ <h4 layout="row" layout-align="space-between center">
<md-icon>file_download</md-icon>
Download Results
</md-button>
<md-button
ng-click="$ctrl.downloadEntries()">
<md-icon>collections</md-icon>
Download Entries
</md-button>
<md-button ng-if="false"
ng-disabled="($ctrl.round.status !== 'paused' && $ctrl.round.status !== 'finalized') || $ctrl.loading"
ng-click="$ctrl.populateRound()">
Expand Down
1 change: 1 addition & 0 deletions client/src/services/admin.service.js
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ const Service = ($http, $q, $window) => {
advanceRound: (id, data) => $http.post(`${base}/round/${id}/advance`, data),

downloadRound: id => `${base}/round/${id}/results/download`,
downloadEntries: id => `${base}/round/${id}/entries/download`
};

return admin;
Expand Down
13 changes: 8 additions & 5 deletions montage/admin_endpoints.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
MaintainerDAO,
OrganizerDAO)

GISTCSV_METHOD = 'gistcsv'
CATEGORY_METHOD = 'category'
ROUND_METHOD = 'round'
SELECTED_METHOD = 'selected'
Expand Down Expand Up @@ -333,11 +332,15 @@ def import_entries(user_dao, round_id, request_dict):
# loader warnings
import_warnings = list()

if import_method == GISTCSV_METHOD:
gist_url = request_dict['gist_url']
if import_method == 'csv' or import_method == 'gistcsv':
if import_method == 'gistcsv':
csv_url = request_dict['gist_url']
else:
csv_url = request_dict['csv_url']

entries, warnings = coord_dao.add_entries_from_csv(round_id,
gist_url)
params = {'gist_url': gist_url}
csv_url)
params = {'csv_url': csv_url}
if warnings:
msg = 'unable to load {} files ({!r})'.format(len(warnings), warnings)
import_warnings.append(msg)
Expand Down
85 changes: 78 additions & 7 deletions montage/loaders.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@

import datetime
import StringIO
import urllib2
import json
import re

from boltons.iterutils import chunked_iter
from unicodecsv import DictReader
Expand All @@ -11,6 +13,8 @@

REMOTE_UTILS_URL = 'https://tools.wmflabs.org/montage-dev/v1/utils/'

GSHEET_URL = 'https://docs.google.com/spreadsheets/d/%s/gviz/tq?tqx=out:csv'

CSV_FULL_COLS = ['img_name',
'img_major_mime',
'img_minor_mime',
Expand All @@ -23,7 +27,23 @@

def wpts2dt(timestamp):
wpts_format = '%Y%m%d%H%M%S'
return datetime.datetime.strptime(timestamp, wpts_format)
try:
ret = datetime.datetime.strptime(timestamp, wpts_format)
except ValueError as e:
wpts_format = '%Y-%m-%dT%H:%M:%S' # based on output format
ret = datetime.datetime.strptime(timestamp, wpts_format)
return ret


def parse_doc_id(raw_url):
doc_id_re = re.compile(r'/spreadsheets/d/([a-zA-Z0-9-_]+)')
#sheet_id_re = re.compile(r'[#&]gid=([0-9]+)')
doc_id = re.findall(doc_id_re, raw_url)
try:
ret = doc_id[0]
except IndexError as e:
raise ValueError('invalid spreadsheet url "%s"' % raw_url)
return ret


def make_entry(edict):
Expand All @@ -43,18 +63,21 @@ def make_entry(edict):
return rdb.Entry(**raw_entry)


def load_full_csv(csv_file_obj):
def load_full_csv(csv_file_obj, source='remote'):
# TODO: streaming this for big CSVs is an unnecessary headache

ret = []
warnings = []

dr = DictReader(csv_file_obj)

if 'filename' in dr.fieldnames:
return load_partial_csv(dr, source=source)

for key in CSV_FULL_COLS:
if key not in dr.fieldnames:
raise ValueError('missing required column "%s" in csv file' % key)

for edict in dr:
try:
entry = make_entry(edict)
Expand All @@ -66,6 +89,14 @@ def load_full_csv(csv_file_obj):
return ret, warnings


def load_partial_csv(dr, source='remote'):
ret = []
warnings = []
file_names = [r['filename'] for r in dr]
file_names_obj = StringIO.StringIO('\n'.join(file_names))
return load_name_list(file_names_obj, source=source)


def load_name_list(file_obj, source='local'):
""" Just the file names, and we'll look up the rest"""

Expand All @@ -87,7 +118,7 @@ def load_name_list(file_obj, source='local'):
edicts, warnings = get_by_filename_remote(rl)
else:
for filename in rl:
edict = get_file_info(filename)
edict, warnings = get_file_info(filename)
edicts.append(edict)

for edict in edicts:
Expand All @@ -100,13 +131,17 @@ def load_name_list(file_obj, source='local'):

return ret, warnings


def get_entries_from_csv(raw_url, source='local'):
if 'google.com' in raw_url:
return get_entries_from_gsheet(raw_url, source)
return get_entries_from_gist(raw_url, source)

def get_entries_from_gist(raw_url, source='local'):
if 'githubusercontent' not in raw_url:
raw_url = raw_url.replace('gist.github.com',
'gist.githubusercontent.com') + '/raw'
resp = urllib2.urlopen(raw_url)

try:
ret, warnings = load_full_csv(resp)
except ValueError as e:
Expand All @@ -116,6 +151,28 @@ def get_entries_from_gist(raw_url, source='local'):
return ret, warnings


def get_entries_from_gsheet(raw_url, source='local'):
#TODO: add support for sheet tabs
doc_id = parse_doc_id(raw_url)
url = GSHEET_URL % doc_id
resp = urllib2.urlopen(url)

if not 'text/csv' in resp.headers.getheader('content-type'):
raise ValueError('cannot load Google Sheet "%s" (is link sharing on?)' % raw_url)

try:
ret, warnings = load_full_csv(resp, source=source)
except ValueError:
try:
ret, warnings = load_partial_csv(resp)
except ValueError:
file_names = [fn.strip('\"') for fn in resp.read().split('\n')]
file_names_obj = StringIO.StringIO('\n'.join(file_names))
ret, warnings = load_name_list(file_names_obj, source=source)

return ret, warnings


def load_by_filename(filenames, source='local'):
ret = []
if source == 'remote':
Expand Down Expand Up @@ -162,7 +219,7 @@ def get_from_remote(url, params):
return file_infos, no_infos


def get_by_filename_remote(filenames, chunk_size=250):
def get_by_filename_remote(filenames, chunk_size=200):
file_infos = []
warnings = []
for filenames_chunk in chunked_iter(filenames, chunk_size):
Expand Down Expand Up @@ -190,5 +247,19 @@ def get_by_filename_remote(filenames, chunk_size=250):

if __name__ == '__main__':
#imgs = load_category('Images_from_Wiki_Loves_Monuments_2015_in_France')
#imgs = get_entries_from_gist_csv('https://gist.githubusercontent.com/slaporte/7433943491098d770a8e9c41252e5424/raw/9181d59224cd3335a8f434ff4683c83023f7a3f9/wlm2015_fr_12k.csv')
#imgs, warnings = get_entries_from_gist('https://gist.github.com/slaporte/a773b4f9a7d1b7fbda62f12507eb40be', source='remote')
print('!! results csv')
imgs, warnings = get_entries_from_gsheet('https://docs.google.com/spreadsheets/d/1RDlpT23SV_JB1mIz0OA-iuc3MNdNVLbaK_LtWAC7vzg/edit?usp=sharing', source='remote')
print('-- loaded %s files' % len(imgs))
print('!! filename list')
imgs, warnings = get_entries_from_gsheet('https://docs.google.com/spreadsheets/d/1Nqj-JsX3L5qLp5ITTAcAFYouglbs5OpnFwP6zSFpa0M/edit?usp=sharing', source='remote')
print('-- loaded %s files' % len(imgs))
print('!! full CSV')
imgs, warnings = get_entries_from_gsheet('https://docs.google.com/spreadsheets/d/1WzHFg_bhvNthRMwNmxnk010KJ8fwuyCrby29MvHUzH8/edit#gid=550467819', source='remote')
print('-- loaded %s files' % len(imgs))
print('!! unshared doc')
try:
imgs, warnings = get_entries_from_gsheet('https://docs.google.com/spreadsheets/d/1tza92brMKkZBTykw3iS6X9ij1D4_kvIYAiUlq1Yi7Fs/edit', source='remote')
except ValueError as e:
print('-- %s ' % e)
import pdb; pdb.set_trace()
14 changes: 9 additions & 5 deletions montage/rdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -1562,20 +1562,24 @@ def add_entries_by_name(self, round_id, file_names):

return entries

def add_entries_from_csv(self, round_id, gist_url):
def add_entries_from_csv(self, round_id, csv_url):
# NOTE: this no longer creates RoundEntries, use
# add_round_entries to do this.
rnd = self.user_dao.get_round(round_id)
if ENV_NAME == 'dev':
source = 'remote'
else:
source = 'local'
entries, warnings = loaders.get_entries_from_gist(gist_url,
source=source)
try:
entries, warnings = loaders.get_entries_from_csv(csv_url,
source=source)
except ValueError as e:
raise InvalidAction('unable to load csv "%s"' % csv_url)

entries, new_entry_count = self.add_entries(rnd, entries)

msg = ('%s loaded %s entries from csv gist (%r), %s new entries added'
% (self.user.username, len(entries), gist_url, new_entry_count))
msg = ('%s loaded %s entries from csv (%r), %s new entries added'
% (self.user.username, len(entries), csv_url, new_entry_count))
self.log_action('add_entries', message=msg, round=rnd)

return entries, warnings
Expand Down
16 changes: 6 additions & 10 deletions run_server_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -274,13 +274,6 @@ def full_run(base_url, remote):
'/admin/round/%s/import' % round_id,
data, as_user='LilyOfTheWest')

data = {'import_method': 'category',
'category': 'Images_from_Wiki_Loves_Monuments_2015_in_Albania'}
resp = fetch('coordinator: import entries from a category',
'/admin/round/%s/import' % round_id,
data, as_user='LilyOfTheWest')


resp = fetch('coordinator: activate a round',
'/admin/round/%s/activate' % round_id,
{'post': True},
Expand All @@ -299,10 +292,13 @@ def full_run(base_url, remote):
{'post': True},
as_user='LilyOfTheWest')

gist_url = 'https://gist.githubusercontent.com/slaporte/7433943491098d770a8e9c41252e5424/raw/ca394147a841ea5f238502ffd07cbba54b9b1a6a/wlm2015_fr_500.csv'
resp = fetch('coordinator: import more entries from different gist csv into an existing round',



gsheet_url = 'https://docs.google.com/spreadsheets/d/1WzHFg_bhvNthRMwNmxnk010KJ8fwuyCrby29MvHUzH8/edit#gid=550467819'
resp = fetch('coordinator: import more entries from different gsheet csv into an existing round',
'/admin/round/%s/import' % round_id,
{'import_method': 'gistcsv', 'gist_url': gist_url},
{'import_method': 'csv', 'csv_url': gsheet_url},
as_user='LilyOfTheWest')

resp = fetch('coordinator: import files selected by name',
Expand Down

0 comments on commit 924c6a0

Please sign in to comment.