-
Notifications
You must be signed in to change notification settings - Fork 0
/
get_item_regex.py
127 lines (89 loc) · 3.24 KB
/
get_item_regex.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
#!/usr/bin/python3
# The helptext is displayed with -h
codedoc = """
Get a list of items.
Prequisites:
Parameters:
stdin:
Item list
Return status:
The following status is returned to the shell:
0 Normal termination
1 Help requested (-h)
2 Ctrl-c pressed, program interrupted
3 Invalid or missing parameter
20 General error
Author:
Geert Van Pamel, 2021-11-30, GNU General Public License v3.0, User:Geertivp
Documentation:
"""
# List the required modules
import logging # Error logging
import os # Operating system: getenv
import re # Regular expressions (very handy!)
import sys # System: argv, exit (get the parameters, terminate the program)
import time # sleep
import urllib.parse # URL encoding/decoding (e.g. Wikidata Query URL)
import pywikibot # API interface to Wikidata
from datetime import datetime # now, strftime, delta time, total_seconds
# Global technical parameters
modnm = 'Pywikibot get_item_regex' # Module name (using the Pywikibot package)
pgmid = '2021-11-30 (gvp)' # Program ID and version
# Defaults: transparent and safe
debug = False # Can be activated with -d (errors and configuration changes are always shown)
exitfatal = True # Exit on fatal error (can be disabled with -p; please take care)
shell = True # Shell available (command line parameters are available; automatically overruled by PAWS)
verbose = True # Can be set with -q or -v (better keep verbose to monitor the bot progress)
# Technical parameters
"""
Default error penalty wait factor (can be overruled with -f).
Larger values ensure that maxlag errors are avoided, but temporarily delay processing.
It is advised not to overrule this value.
"""
exitstat = 0 # (default) Exit status
def wd_proc_all_items():
"""
"""
global exitstat
# Process all items in the list
for qnumber in itemlist: # Main loop for all DISTINCT items
print(qnumber)
def show_prog_version():
# Show program version
print('%s version %s' % (modnm, pgmid))
# Main program entry
# First identify the program
logger = logging.getLogger('get_item_regex')
if verbose:
show_prog_version() # Print the module name
try:
pgmnm = sys.argv.pop(0) # Get the name of the executable
if debug:
print('%s version %s' % (pgmnm, pgmid)) # Physical program
except:
shell = False
logger.error('No shell available') # Most probably running on PAWS Jupyter
"""
Start main program logic
Precompile the Regular expressions, once (for efficiency reasons; they will be used in loops)
"""
qsuffre = re.compile(r'Q[0-9]+') # Q-number
# Get list of item numbers
inputfile = sys.stdin.read()
itemlist = sorted(set(qsuffre.findall(inputfile)))
if debug:
print(itemlist)
wd_proc_all_items() # Execute all items for one language
"""
Print all sitelinks (base addresses)
PAWS is using tokens (passwords can't be used because Python scripts are public)
Shell is using passwords (from user-password.py file)
"""
if debug:
for site in sorted(pywikibot._sites.values()):
if site.username():
print(site, site.username(), site.is_oauth_token_available(), site.logged_in())
sys.exit(exitstat)
# Einde van de miserie
"""
"""