55from displayTable import *
66import re
77
8+ def decode_tuple (tuple ):
9+ """
10+ Decodes binary strings in a result row
11+ @param tuple: Database row
12+ @type tuple: tuple
13+ @rtype: tuple
14+ """
15+ result = ()
16+ for var in tuple :
17+ if isinstance (var , bytes ):
18+ var = var .decode ('utf8' )
19+ result += (var ,)
20+ return result
21+
822class Reports :
923 def __init__ ( self , site , db , wiki , dry_run = False ):
1024 self .db = db
@@ -14,11 +28,19 @@ def __init__( self, site, db, wiki, dry_run=False ):
1428 self .site .requests ['timeout' ] = 120
1529 self .dry_run = dry_run
1630
31+ def query ( self , sql ):
32+ """
33+ Executes an SQL query and returns a generator producing rows
34+ """
35+ cur = self .db .cursor ()
36+ cur .execute ( sql )
37+ for row in cur .fetchall ():
38+ yield decode_tuple (row )
39+
1740 # Oldest edited articles
1841 # Run time on enwiki 5 hours 23 minutes as of 8 Sept 2015
1942 def forgotten_articles ( self ):
2043 # Make the query
21- cur = self .db .cursor ()
2244 query = """SELECT SQL_SMALL_RESULT
2345 MAX(rev_timestamp) AS lastedit, COUNT(rev_id) AS editcount, page_title
2446 FROM revision,
@@ -44,12 +66,11 @@ def forgotten_articles( self ):
4466 WHERE rev_page=page_id
4567 GROUP BY page_id
4668 ORDER BY lastedit ASC"""
47- cur .execute ( query )
4869
4970 # Extract the data into a Python nested list
5071 content = []
5172 content .append ( ['forgotten-articles-title' , 'forgotten-articles-last-edited' , 'forgotten-articles-editcount' ] )
52- for row in cur . fetchall ( ) :
73+ for row in self . query ( query ) :
5374 # A page name is being caught by the testwiki abuse filter - the following lets this run:
5475 if re .search ('abuse_filter' ,row [2 ],re .IGNORECASE ):
5576 continue
@@ -63,14 +84,12 @@ def forgotten_articles( self ):
6384 # Page count by namespace
6485 # Run time on enwiki 4 hours 8 minutes as of 8 Sept 2015
6586 def page_count_by_namespace ( self ):
66- cur = self .db .cursor ()
6787 query = """SELECT page_namespace, COUNT(*) AS total, SUM(page_is_redirect) AS redirect FROM page
6888 GROUP BY page_namespace"""
69- cur .execute ( query )
7089
7190 content = []
7291 content .append ( ['pagecount-namespace' , 'pagecount-namespace-name' , 'pagecount-total' , 'pagecount-redirect' , 'pagecount-non-redirect' ] )
73- for row in cur . fetchall ( ):
92+ for row in self . query ( query ):
7493 content .append ( [ row [0 ], '{{subst:ns:' + str ( row [0 ] ) + '}}' , row [1 ], row [2 ], row [1 ]- row [2 ] ])
7594
7695 # Format the data as wikitext
@@ -80,17 +99,15 @@ def page_count_by_namespace( self ):
8099
81100 # Pages with most revisions
82101 def pages_with_most_revisions ( self ):
83- cur = self .db .cursor ()
84102 query = """SELECT COUNT(*) AS revisions, rev_page, p.page_namespace, p.page_title FROM revision r
85103 LEFT JOIN ( SELECT page_id, page_title, page_namespace FROM page ) p ON r.rev_page = p.page_id
86104 GROUP BY rev_page
87105 ORDER BY revisions DESC
88106 LIMIT 1000"""
89- cur .execute ( query )
90107
91108 content = []
92109 content .append ( ['pagerevisions-namespace' , 'pagerevisions-title' , 'pagerevisions-revisions' ] )
93- for row in cur . fetchall ( ):
110+ for row in self . query ( query ):
94111 content .append ( [ row [2 ], self .linkify ( row [3 ], row [2 ] ), row [0 ] ])
95112 # Format the data as wikitext
96113 text = display_report ( self .wiki , content , 'pagerevisions-desc' )
@@ -101,7 +118,6 @@ def pages_with_most_revisions( self ):
101118 # Identify users who meet the criteria for being granted "autopatrolled" on the English Wikipedia but who don't already have it.
102119 # Author: Andrew Crawford (thparkth) <acrawford@laetabilis.com>
103120 def autopatrol_eligibles ( self ):
104- cur = self .db .cursor ()
105121 query = """ SELECT
106122 # "editor" consisting of username, wrapped in HTML tags linking to the sigma "created" tool
107123 CONCAT (
@@ -150,11 +166,10 @@ def autopatrol_eligibles( self ):
150166 HAVING created_count > 24
151167 ORDER BY created_count DESC
152168 LIMIT 500"""
153- cur .execute ( query )
154169
155170 content = []
156171 content .append ( ['autopatrol-username' , 'autopatrol-listlink' , 'autopatrol-articles' ] )
157- for row in cur . fetchall ( ):
172+ for row in self . query ( query ):
158173 if row [1 ] is None :
159174 continue
160175 content .append ( [ row [0 ], row [1 ], row [2 ] ] )
@@ -164,20 +179,18 @@ def autopatrol_eligibles( self ):
164179
165180
166181 def new_wikiprojects ( self ):
167- cur = self .db .cursor ()
168182 query = """SELECT rc_timestamp, rc_title
169183 FROM recentchanges
170184 WHERE rc_new = 1
171185 AND rc_namespace = 4
172186 AND rc_title LIKE 'WikiProject%'
173187 AND rc_title NOT LIKE '%/%'
174188 ORDER BY rc_timestamp DESC
175- """ ;
176- cur .execute ( query )
189+ """
177190
178191 content = []
179192 content .append ( [ 'newwp-date' , 'newwp-title' ] )
180- for row in cur . fetchall ( ):
193+ for row in self . query ( query ):
181194 if row [1 ] is None :
182195 continue
183196 content .append ( [ row [0 ], self .linkify ( row [1 ], 4 ) ] )
@@ -187,7 +200,6 @@ def new_wikiprojects( self ):
187200
188201
189202 def talk_pages_by_size ( self ):
190- cur = self .db .cursor ()
191203 query = """SELECT page_namespace,
192204 REPLACE( SUBSTRING_INDEX(page_title, '/', 1 ), '_', ' ' ) AS parent,
193205 SUM( page_len ) / 1024 / 1024 AS total_size
@@ -196,11 +208,10 @@ def talk_pages_by_size( self ):
196208 GROUP BY page_namespace, parent
197209 ORDER BY total_size DESC
198210 LIMIT 300"""
199- cur .execute ( query )
200211
201212 content = []
202213 content .append ( ['tpbs-namespace' , 'tpbs-page' , 'tpbs-size' ] )
203- for row in cur . fetchall ( ):
214+ for row in self . query ( query ):
204215 content .append ( [ row [0 ], self .linkify ( row [1 ], row [0 ] ), row [2 ] ] )
205216
206217 # Format the data as wikitext
@@ -209,7 +220,6 @@ def talk_pages_by_size( self ):
209220
210221
211222 def unused_file_redirects ( self ):
212- cur = self .db .cursor ()
213223 query = """SELECT page_title,
214224 ( SELECT COUNT(*)
215225 FROM imagelinks
@@ -225,11 +235,10 @@ def unused_file_redirects( self ):
225235 AND page_is_redirect = 1
226236 HAVING imagelinks + links <= 1
227237 """
228- cur .execute ( query )
229238
230239 content = []
231240 content .append ( ['ufr-page' , 'ufr-imagelinks' , 'ufr-links' ] )
232- for row in cur . fetchall ( ):
241+ for row in self . query ( query ):
233242 content .append ( [ self .linkify ( row [0 ], 6 ), row [1 ], row [2 ] ] )
234243
235244 # Format the data as wikitext
@@ -238,7 +247,6 @@ def unused_file_redirects( self ):
238247
239248
240249 def oldest_active ( self ):
241- cur = self .db .cursor ()
242250 query = """SELECT SQL_SMALL_RESULT
243251 CONCAT( '[[User:',user_name,'|',user_name,']]' ) AS user_name,
244252 user_registration,
@@ -258,12 +266,11 @@ def oldest_active( self ):
258266 ) AS InnerQuery
259267 ORDER BY user_registration
260268 LIMIT 200"""
261- cur .execute ( query )
262269
263270 content = []
264271 content .append ( ['oldestactive-username' , 'oldestactive-creationdate' , 'oldestactive-editcount' ] )
265- for row in cur . fetchall ( ):
266- content .append ( [ row [0 ], row [1 ] , row [2 ] ] );
272+ for row in self . query ( query ):
273+ content .append ( [ row [0 ], row [1 ] , row [2 ] ] )
267274
268275 # Format the data as wikitext
269276 text = display_report ( self .wiki , content , 'oldestactive-desc' )
@@ -291,7 +298,6 @@ def deleted_prods( self ):
291298 AND log_namespace=0
292299 GROUP BY page_id
293300 LIMIT 500"""
294- cur .execute ( query )
295301 content = []
296302 content .append (
297303 ['deletedprods-title' ,
@@ -301,8 +307,8 @@ def deleted_prods( self ):
301307 'deletedprods-delcomments'
302308 ]
303309 )
304- for row in cur . fetchall ( ):
305- logtext = row [4 ];
310+ for row in self . query ( query ):
311+ logtext = row [4 ]
306312 logtext = re .sub ("{" ,"<nowiki>{</nowiki>" ,logtext )
307313 logtext = re .sub ("}" ,"<nowiki>}</nowiki>" ,logtext )
308314 content .append ( [ self .linkify ( row [0 ] ), row [1 ], datetime .datetime .strptime ( row [2 ],'%Y%m%d%H%M%S' ), datetime .datetime .strptime ( row [3 ],'%Y%m%d%H%M%S' ), logtext ] )
@@ -312,25 +318,22 @@ def deleted_prods( self ):
312318 self .publish_report ( 'deletedprods-page-title' , text )
313319
314320 def most_used_templates ( self ):
315- cur = self .db .cursor ()
316321 query = """SELECT tl_title, COUNT(*)
317322 FROM templatelinks
318323 WHERE tl_namespace = 10
319324 GROUP BY tl_title
320325 ORDER BY COUNT(*) DESC
321326 LIMIT 3000"""
322- cur .execute ( query )
323327 content = []
324328 content .append ( ['mostusedtemplate-title' , 'mostusedtemplate-count' ] )
325- for row in cur . fetchall ( ):
329+ for row in self . query ( query ):
326330 content .append ( [ self .linkify ( row [0 ], 10 ), row [1 ] ] )
327331
328332 # Format the data as wikitext
329333 text = display_report ( self .wiki , content , 'mostusedtemplate-desc' )
330334 self .publish_report ( 'mostusedtemplate-page-title' , text )
331335
332336 def unused_templates ( self ):
333- cur = self .db .cursor ()
334337 query = """SELECT page_title
335338 FROM page
336339 LEFT JOIN categorylinks
@@ -346,18 +349,16 @@ def unused_templates( self ):
346349 AND tl_from IS NULL
347350 AND cl_from IS NULL
348351 LIMIT 2000"""
349- cur .execute ( query )
350352 content = []
351353 content .append ( ['unusedtemplate-title' ] )
352- for row in cur . fetchall ( ):
354+ for row in self . query ( query ):
353355 content .append ( [ self .linkify ( row [0 ], 10 ) ] )
354356
355357 # Format the data as wikitext
356358 text = display_report ( self .wiki , content , 'unusedtemplate-desc' )
357359 self .publish_report ( 'unusedtemplate-page-title' , text )
358360
359361 def orphaned_talk ( self ):
360- cur = self .db .cursor ()
361362 query = """SELECT page_namespace, page_id, page_title, page_is_redirect, page_len
362363 FROM page talkpage
363364 WHERE talkpage.page_title NOT LIKE '%/%'
@@ -375,16 +376,15 @@ def orphaned_talk( self ):
375376 AND tl_title='G8-exempt'
376377 )
377378 LIMIT 1000"""
378- cur .execute ( query )
379379 content = []
380380 content .append ( ["orphantalk-count" , "orphantalk-isredirect" , "orphantalk-namespace" , "orphantalk-itemtitle" , "orphantalk-pagesize" ] )
381381 namespace_translate = { 1 :"Talk" , 5 :"Wikipedia_Talk" , 11 :"Template_Talk" , 15 :"Category_Talk" , 101 :"Portal_Talk" , 119 :"Draft_Talk" }
382382 count = 1
383383
384- for row in cur . fetchall ( ):
384+ for row in self . query ( query ):
385385 namespace = namespace_translate [row [0 ]]
386386 fulllink = namespace + ':' + row [2 ]
387- pagesize = round ( row [4 ]/ 1024 , 1 );
387+ pagesize = round ( row [4 ]/ 1024 , 1 )
388388 if row [3 ] == 0 :
389389 redirect_label = " "
390390 else :
@@ -398,18 +398,16 @@ def orphaned_talk( self ):
398398
399399 def most_edited_page_last_month ( self ):
400400 # Make the query
401- cur = self .db .cursor ()
402401 query = """SELECT rc_title, count(*) as num_edits
403402 FROM recentchanges
404403 WHERE rc_namespace = 0 AND rc_type < 2
405404 GROUP BY 1 ORDER BY 2 DESC
406405 LIMIT 25;"""
407- cur .execute ( query )
408406
409407 # Extract the data into a Python nested list
410408 content = []
411409 content .append ( ['most_edited_page_last_month-title' , 'most_edited_page_last_month-editcount' ] )
412- for row in cur . fetchall () :
410+ for row in self . query ( query ) :
413411 content .append ( [ self .linkify ( row [0 ] ), row [1 ] ] )
414412
415413 # Format the data as wikitext
@@ -418,7 +416,6 @@ def most_edited_page_last_month( self ):
418416
419417 # Longest articles (NS=0)
420418 def article_by_size ( self ):
421- cur = self .db .cursor ()
422419 query = """SELECT
423420 page_namespace,
424421 page_title,
@@ -429,10 +426,9 @@ def article_by_size( self ):
429426 AND page_title NOT LIKE "%/%"
430427 ORDER BY page_len DESC
431428 LIMIT 1000;"""
432- cur .execute ( query )
433429 content = []
434430 content .append ( ['article_by_size-namespace' , 'article_by_size-title' , 'article_by_size-size' ] )
435- for row in cur . fetchall ( ):
431+ for row in self . query ( query ):
436432 content .append ( [ row [0 ], self .linkify ( row [1 ], row [0 ] ), row [2 ] ])
437433
438434 # Format the data as wikitext
0 commit comments