-
Notifications
You must be signed in to change notification settings - Fork 0
/
apispec.py
477 lines (424 loc) · 20.1 KB
/
apispec.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
# -*- coding: utf-8 -*-
"""
This is a library for querying special pages through API.
A supported feature does not mean it is complete.
Feel free to continue. :-) It shall be large and comprehensive someday.
No version checks or API existence checks implemented.
Different features require different MW versions as shown in local docstrings.
Tested on Wikipedia. Other MediaWiki installations may provide different APIs,
please check documentation at /w/api.php if something does not work.
You may find detailed help for each feature in the code.
Supported features:
Blocks Querying valid local blocks, based on Special:BlockList.
"""
"""
General help to continue: http://www.mediawiki.org/wiki/API:Lists
http://en.wikipedia.org/wiki/Special:ApiSandbox is good for experiments.
TODO:
* Handling errors and warnings where necessary. For example: disabled modules
(may be useful for non-WM wikis)
http://www.mediawiki.org/wiki/API:Errors_and_warnings#Disabled_modules
"""
#
# (C) Bináris, 2012
# (C) Pywikipedia bot team, 2012
#
# Distributed under the terms of the MIT license.
#
__version__ = '$Id$'
import datetime, re
import wikipedia as pywikibot
import query
site = pywikibot.getSite()
#Some functions for datetime conversion
def iso(t):
"""Removes fraction part of Python-generated time and adds a Z for UTC"""
s = t.isoformat()
return s[:s.find('.')] + 'Z'
def uniso(timestamp):
"""Removes T and Z from an ISO 8601-formatted text for readability."""
return timestamp.replace('T', ' ').replace('Z', '')
def dt(timestamp):
"""Converts a MediaWiki timestamp to a Python-compatible datetime object"""
#Timestamps come as "2012-01-30T16:47:57Z"
#Be sure to exclude "infity" before calling this!
l = timestamp.split('T')
try:
d = l[0].split('-') #The date
t = l[1][:-1].split(':') #The time
d = [int(x) for x in d]
t = [int(x) for x in t]
return datetime.datetime(d[0], d[1], d[2], t[0], t[1], t[2])
except IndexError:
print 'Erroneous timestamp:', timestamp
return datetime.datetime(1,1,1,1,1,1) #This may be handled as error
def duration(x):
"""
Duration of a finite block or protection.
Don't call for infinite ones. Or at least don't blame me.
"""
return (dt(x['expiry']) - dt(x['timestamp'])).days
#API may change. This is a general dictionary with all possible keys that
#may be returned in case of KeyError. If you get this as result, please
#report a bug with all possible circumstances.
errordic = {
'id': -1,
'userid': -1,
'byid': -1,
'user': 'This is an error message, has API format changed?',
'by': 'This is an error message, has API format changed?',
'reason': 'This is an error message, has API format changed?',
'rangestart': 'This is an error message, has API format changed?',
'rangeend': 'This is an error message, has API format changed?',
'timestamp': '0000-00-00T00:00:00Z',
'expiry': '0000-00-00T00:00:00Z',
}
class Blocks(object):
"""
A class for querying valid local blocks. Based on Special:BlockList.
That means it won't handle expired or solved blocks, nor globals.
(Although newly expired blocks may appear in the list for a time, and
autoblocks may appear on the day of expiry after expiration.)
Requires MW 1.12.
All the methods return a list of dictionaries, unless otherwise stated.
Each dictionary represents a block. Keys are shown at
http://www.mediawiki.org/wiki/API:Blocks
under 'bkprop'. Note that 'user' key is not present if it is an autoblock,
and flags are not present if they are not valid for that block. It is your
task to handle this. Even numeric values such as id's come as strings!
The list is ordered by timestamp of applying the block unless noted.
Timestamps of beginning and expiry are in UTC and ISO 8601 format.
http://www.mediawiki.org/wiki/API:Data_formats#Timestamps
Convert them to local time if necessary.
See http://docs.python.org/library/datetime.html for help.
Methods:
1. General lists:
allblocks List of all effective blocks
autoblocks List of effective autoblocks
notautoblocks Direct (not automatic) blocks. Required by other methods.
anonblocks List of IP blocks, including range blocks
anonblocks_norange List of IP blocks, excluding range blocks
rangeblocks List of range blocks
reguserblocks List of blocks concerning registered users, not anons
byadmin Blocks raised by a given admin
user Blocks of the given user or single IP (exact match)
(Only direct blocks for the given IP, not range blocks)
userfragment Blocks of the given user or single IP
You may give any part of the username.
userregex Blocks of the given user or single IP
Give a regex with r'...' or ur'...'
IP Blocks of the given single IP or range (max. /16)
If you give an IP, this will return also range blocks
concerning this IP (not like user()).
reason Blocks raised with the given reason (exact match)
reasonfragment Blocks raised with the given reason
You may give any part of the reason, e.g. 'andalism'
reasonregex Blocks raised with the given reason (regex)
Remarks: user-like and reason-like methods are case sensitive.
Use user() and IP() whenever appropriate. They are much faster
and cause less server load!
"Fragment" and "regex" methods are for hunting vandals with
some peculiar pattern or examining blocking habits of admins.
Regexes will be searched for rather than matched. This means
you have to begin them with ^ if you want to find them at the
beginning of the string.
2. Simplified lists of blocked users/IPs:
These lists, unlike others, contain simple Unicode strings rather than
block directories. The first one is ordered chronologically by date of
blocking, which is useful for hunting the reincarnations of a vandal,
while the others in alphanumerical order.
blockedusernames_chrono List of blocked users in chronolocical order
blockedusernames List of blocked users in alphabetical order
blockedanons All blocked IPs (including range blocks)
blockedanons_norange Standalone blocked IPs (excluding range blocks)
blockedranges List of blocked ranges
3. By expiry:
finiteblocks List of all finite blocks
infiniteblocks List of all infinite and indefinite blocks
expindays Blocks expiring within n days (from the second of calling)
expnotindays Finite blocks not expiring within n days
(use with infiniteblocks to get all the remaining blocks)
expuntil Blocks expiring by the given timestamp
expafter Finite blocks expiring after the given timestamp
(use with infiniteblocks to get all the remaining blocks)
For valid timestamps see
http://www.mediawiki.org/wiki/API:Data_formats#Timestamps
The next three are ordered by ascending duration:
shorterthan List of blocks shorter than given number of days
longerthan List of blocks at least as long as given number of days
(use with infiniteblocks to get all the remaining blocks)
between List of blocks whose duration is in the given interval
4. Auxiliary:
display(block) A simple function for lazy programmers that will display
human-readable details of the given dictionary that
represents a block (with English keywords at this time).
You may use it with pywikibot.output or insert into
a wikipage between <pre> tags. This may be iterated or
joined on a list of blocks, but don't blame me if your
monitor is not tall enough.
Should 'bot' be an instance of this class and b a block,
bot.display(b) returns the text in Unicode.
It displays existing flags of the block. Missing flags are
not set for this block. For meaning of flags see
http://www.mediawiki.org/wiki/API:Blocks
Possible flags: 'automatic', 'anononly', 'nocreate',
'autoblock', 'noemail', 'allowusertalk', 'hidden'.
displaylist Takes a list of blocks as argument and creates a long human
readable text of it. Use as above.
E.g. pywikibot.output(bot.displaylist(bot.expindays(1)))
displays blocks expiring within 24 hours.
Optional parameters:
site site as usual, autodetected if missing
This has some bug at the moment, but works well in home wiki.
top 'new'/'old' (newest or oldest block on top; default='new')
limit Maximum number of blocks to get in one query as integer or
string. Defaults to 5000. That is the allowed maximum for bots
in Wikimedia wikis. You MUST set it to no more than 500 if your
bot does not have a flag.
See http://www.mediawiki.org/wiki/API:Query_-_Lists#Limits
Bot will repeat queries until it is necessary, so you get the
whole list anyway.
!! Setting this value too low may result in an infinite loop
or duplicated results. Use as great limit as possible.
See https://bugzilla.wikimedia.org/show_bug.cgi?id=34029
Additionally, decreasing this limit will cause a
quasi-exponential increase of running time!
help: http://www.mediawiki.org/wiki/API:Blocks
TODO:
* Explore the bug of site parameter
* A function listing all blocked IPs, expanding ranges
* Some statistics from blocks
"""
#################################################
# Methods for internal use #
#################################################
def __init__(self, site=site, top='new', limit=5000):
self.site = site
self.bkdir = ['older', 'newer'][top=='old'] #a bit strange
# bkdir: Direction to list in.
#older: List newest blocks first (default).
#Note: bkstart has to be later than bkend.
#newer: List oldest blocks first. Note: bkstart has to be before bkend.
self.bklimit = limit #Allowed maximum for bots=5000
self.empty()
def empty(self):
"""
Sets the parameters needed by all methods, and clears the others.
"""
self.params = {
'action': 'query',
'list': 'blocks',
'bklimit': self.bklimit,
'bkdir': self.bkdir,
'bkprop':
'id|user|userid|by|byid|timestamp|expiry|reason|range|flags',
}
def query(self):
result = query.GetData(self.params, site=self.site)
blocklist = result['query']['blocks']
#Todo: handle possible errors (they will cause KeyError at this time)
while 'query-continue' in result:
self.params.update(result['query-continue']['blocks'])
result = query.GetData(self.params)
blocklist += result['query']['blocks']
#Finally we remove possible duplicates. This piece of code may be
#removed after successful closing of
#https://bugzilla.wikimedia.org/show_bug.cgi?id=34029
for b in blocklist:
if blocklist.count(b) > 1:
blocklist.pop(blocklist.index(b))
return blocklist
def IPsortkey(self, IP):
"""
Sortkey for IPs given as strings. Assumes a properly formatted
IP string, either standalone or range.
"""
l1 = IP.split('/')
l2 = l1[0].split('.') #IP part without range
newlist = [('0' + s)[-3:] for s in l2]
s = '.'.join(newlist)
if len(l1) > 1:
s += '/' + l1[1]
return s
#################################################
# General lists #
#################################################
def allblocks(self):
"""Returns complete list"""
self.empty()
return self.query()
def autoblocks(self):
"""Returns autoblocks"""
self.empty()
self.params['bkend'] = \
iso(datetime.datetime.utcnow() - datetime.timedelta(1))
return filter(lambda x: 'user' not in x, self.query())
#Autoblocks back to previous day 00:00:00 UTC appear in the list even
#if they are no more in effect, but we don't query them.
def notautoblocks(self):
"""Returns direct (not automatic) blocks. Required by other methods."""
self.empty()
return filter(lambda x: 'user' in x, self.query())
def anonblocks(self):
"""Returns anonblocks, including range blocks"""
self.empty()
return filter(lambda x: x['userid'] == '0', self.notautoblocks())
def anonblocks_norange(self):
"""Returns anonblocks, excluding range blocks"""
self.empty()
try:
return filter(lambda x: x['rangestart'] == x['rangeend'],
self.anonblocks())
except KeyError:
return [errordic]
def rangeblocks(self):
"""Returns range blocks"""
self.empty()
try:
return filter(lambda x: x['rangestart'] != x['rangeend'],
self.anonblocks())
except KeyError:
return [errordic]
def reguserblocks(self):
"""Returns block concerning registered users, not anons"""
self.empty()
return filter(lambda x: x['userid'] > '0', self.notautoblocks())
def byadmin(self, admin):
"""Returns blocks raised by given admin"""
self.empty()
return filter(lambda x: x['by']==admin, self.query())
def user(self, user):
"""Returns blocks of the given user or single IP"""
self.empty()
self.params['bkusers'] = user
return self.query()
def userfragment(self, user):
"""Returns blocks of the given user or single IP (part of name)"""
self.empty()
return filter(lambda x: user in x['user'], self.notautoblocks())
def userregex(self, regex):
"""Returns blocks of the given user or single IP (regex)"""
self.empty()
return filter(
lambda x: re.search(regex, x['user']), self.notautoblocks())
def IP(self, IP):
"""Returns blocks of the given single IP or range (max. /16)"""
self.empty()
self.params['bkip'] = IP
return self.query()
def reason(self, reason):
"""Returns blocks raised with the given reason (exact text)"""
self.empty()
return filter(lambda x: x['reason'] == reason, self.query())
def reasonfragment(self, reason):
"""Returns blocks raised with the given reason (part of it)"""
self.empty()
return filter(lambda x: reason in x['reason'], self.query())
def reasonregex(self, regex):
"""Returns blocks raised with the given reason (regex)"""
self.empty()
return filter(
lambda x: re.search(regex, x['reason']), self.allblocks())
#################################################
# Lists of blocked users/IPs #
#################################################
#These methods return ordered list of Unicode strings
def blockedusernames_chrono(self):
return [b['user'] for b in self.reguserblocks()]
def blockedusernames(self):
return sorted(self.blockedusernames_chrono())
def blockedanons(self):
return sorted(
[b['user'] for b in self.anonblocks()], key=self.IPsortkey)
def blockedanons_norange(self):
return sorted(
[b['user'] for b in self.anonblocks_norange()], key=self.IPsortkey)
def blockedranges(self):
return sorted(
[b['user'] for b in self.rangeblocks()], key=self.IPsortkey)
#################################################
# Lists by expiry #
#################################################
def finiteblocks(self):
"""Returns finite blocks"""
self.empty()
return filter(lambda x: x['expiry'][0].isdigit(), self.query())
def infiniteblocks(self):
"""Returns infinite and indefinite blocks"""
self.empty()
return filter(lambda x: x['expiry'].isalpha(), self.query())
def expindays(self, days):
"""Returns blocks expiring within days days"""
limit = iso(datetime.datetime.utcnow() + datetime.timedelta(days))
return filter(lambda x: x['expiry'] <= limit, self.finiteblocks())
def expnotindays(self, days):
"""Returns finite blocks NOT expiring within days days"""
limit = iso(datetime.datetime.utcnow() + datetime.timedelta(days))
return filter(lambda x: x['expiry'] > limit, self.finiteblocks())
def expuntil(self, timestamp):
"""Returns blocks expiring by timestamp"""
return filter(lambda x: x['expiry'] <= timestamp, self.finiteblocks())
def expafter(self, timestamp):
"""Returns finite blocks NOT expiring by timestamp"""
return filter(lambda x: x['expiry'] > timestamp, self.finiteblocks())
def shorterthan(self, days):
"""Returns finite blocks shorter than day"""
return sorted(
filter(lambda x: duration(x) < days, self.finiteblocks()),
key = duration)
def longerthan(self, days):
"""Returns finite blocks longer than or equal to day"""
return sorted(
filter(lambda x: duration(x) >= days, self.finiteblocks()),
key = duration)
def between(self, days1, days2):
"""
Returns finite blocks whose duration is in closed interval
[day1,day2]
"""
return sorted(
filter(lambda x: days1 <= duration(x) <= days2, self.finiteblocks()),
key = duration)
#################################################
# Auxiliary methods #
#################################################
def display(self, block):
"""
Simple displayer for a block dictionary. Use with pywikibot.output
or e-mail it or insert into a wikipage with <pre>.
"""
w = 21 #width for justification
flags = ['automatic', 'anononly', 'nocreate', 'autoblock', 'noemail',
'allowusertalk', 'hidden']
s = 'Data for block #%s' % block['id']
s += '\nBlocked user:'.ljust(w)
try:
s += block['user']
if 'userid' in block and block['userid'] > '0':
s += ' (#%s)' % block['userid']
except KeyError:
s += 'n/a (autoblock)'
if 'rangestart' in block and 'rangeend' in block and \
block['rangestart'] != block['rangeend']:
s += '\nRange block:'.ljust(w) + u'%s–%s' % \
(block['rangestart'],block['rangeend'])
s += '\nAdmin:'.ljust(w) + '%s (#%s)' % (block['by'], block['byid'])
s += '\nBeginning in UTC:'.ljust(w) + uniso(block['timestamp'])
s += ('\nExpiry%s:' \
% ['', ' in UTC'][block['expiry'][0].isdigit()]).ljust(w)
s += uniso(block['expiry'])
s += '\nFlags:'.ljust(w)
s += ', '.join(filter(lambda x: x in block, flags))
s += '\nReason:'.ljust(w) + block['reason'] + '\n'
return s
def displaylist(self, blocklist):
"""
Returns a long human readable version of a blocklist, ready for
pywikibot.output. Usually too long for direct display.
"""
return '\n'.join([self.display(b) for b in blocklist])
if __name__ == '__main__':
pywikibot.handleArgs() #for help
pywikibot.output(
'This is a library for querying special pages through API.')
pywikibot.output('Use this module through import or with -help.')