Skip to content
This repository was archived by the owner on Feb 7, 2024. It is now read-only.

Commit ac2dc2b

Browse files
committed
Updated scripts
1 parent 076593c commit ac2dc2b

File tree

6 files changed

+182
-41
lines changed

6 files changed

+182
-41
lines changed

scripts/copy_extension_json.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
import requests
2+
import pywikibot
3+
4+
response = requests.get('https://www.mediawiki.org/wiki/Module:ExtensionJson?action=raw')
5+
if response.status_code == 200:
6+
site = pywikibot.Site()
7+
if not site.user():
8+
site.login()
9+
if site.user():
10+
page = pywikibot.Page(site, 'Module:ExtensionJson')
11+
if page.exists():
12+
summary = 'Updated page'
13+
else:
14+
summary = 'Created page'
15+
page.text = response.text
16+
page.save(summary=summary, quiet=True)
17+
else:
18+
print("Failed to log in")
19+
else:
20+
print(f"Failed to fetch data. Status code: {response.status_code}")

scripts/create_pages.py

Lines changed: 118 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,118 @@
1+
from argparse import ArgumentParser
2+
import requests
3+
import pywikibot
4+
from pywikibot.exceptions import InvalidTitleError
5+
from sqlalchemy.orm import Session
6+
from sqlalchemy import select
7+
import time
8+
import sys
9+
sys.path.append('./lib')
10+
from models import engine, SkinData, ExtensionData
11+
from utils import log_message
12+
13+
14+
def get_args():
15+
parser = ArgumentParser(prog='Create', description='creates pages in wiki corresponding to skins and extensions in database')
16+
parser.add_argument("-v", "--verbose", action="count", default=0, help="increase output verbosity")
17+
return parser.parse_args()
18+
19+
20+
def get_skins(session):
21+
stmt = select(SkinData.w8y_sd_name).group_by(
22+
SkinData.w8y_sd_name
23+
)
24+
return session.scalars(stmt)
25+
26+
27+
def get_extensions(session):
28+
stmt = select(ExtensionData.w8y_ed_name).group_by(
29+
ExtensionData.w8y_ed_name
30+
)
31+
return session.scalars(stmt)
32+
33+
34+
def create_pages(components, args, site, session, componentType, namespace, template, errors):
35+
count = 0
36+
start_time = time.time()
37+
for component in components:
38+
count = count + 1
39+
if count % 100 == 0:
40+
now = time.time()
41+
duration = now - start_time
42+
message = 'Processed %d %s in %d seconds' % (count, componentType, duration)
43+
log_message(session, message)
44+
if args.verbose:
45+
print(message)
46+
try:
47+
database_name = component.decode('utf-8')
48+
badchars = ['[', ']', '>', '<']
49+
sanitized_name = database_name
50+
for char in badchars:
51+
sanitized_name = sanitized_name.replace(char, '')
52+
pagename = namespace + ':' + sanitized_name
53+
page = pywikibot.Page(site, pagename)
54+
if page.exists():
55+
message = f'{pagename} already exists'
56+
log_message(session, message)
57+
if args.verbose > 1:
58+
print(message)
59+
else:
60+
message = f'Creating page {pagename}'
61+
log_message(session, message)
62+
if args.verbose:
63+
print(message)
64+
summary = 'Created page'
65+
page.text = f'{{{{{template}|name={database_name}}}}}'
66+
page.save(summary=summary, quiet=True)
67+
except InvalidTitleError:
68+
errors.append(pagename)
69+
message = f'Invalid title: {pagename}'
70+
log_message(session, message)
71+
if args.verbose:
72+
print(message)
73+
74+
def run():
75+
args = get_args()
76+
start_time = time.time()
77+
site = pywikibot.Site()
78+
if not site.user():
79+
site.login()
80+
if site.user():
81+
errors = []
82+
with Session(engine) as session:
83+
try:
84+
message = 'Starting creating skin pages.'
85+
log_message(session, message)
86+
if args.verbose:
87+
print(message)
88+
skins = get_skins(session)
89+
create_pages(skins, args, site, session, 'skins', 'Skin', 'Skin', errors)
90+
91+
message = 'Starting creating extension pages.'
92+
log_message(session, message)
93+
if args.verbose:
94+
print(message)
95+
extensions = get_extensions(session)
96+
create_pages(extensions, args, site, session, 'extensions', 'Extension', 'Extension', errors)
97+
except KeyboardInterrupt:
98+
pass
99+
finally:
100+
duration = time.time() - start_time
101+
message = 'Completed creating skin and extension pages in %d seconds' % (duration)
102+
log_message(session, message)
103+
if args.verbose:
104+
print(message)
105+
for error in errors:
106+
message = f'Bad page title: {error}'
107+
log_message(session, message)
108+
if args.verbose:
109+
print(message)
110+
else:
111+
message = 'User login failure.'
112+
log_message(session, message)
113+
if args.verbose:
114+
print(message)
115+
116+
117+
if __name__ == '__main__':
118+
run()

scripts/requirements.txt

Lines changed: 0 additions & 5 deletions
This file was deleted.

scripts/scrape.py

Lines changed: 44 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -56,44 +56,52 @@ def run():
5656
good_count = 0
5757
error_count = 0
5858
site = pywikibot.Site()
59-
with Session(engine) as session:
60-
try:
61-
wikis = get_wikis(session, args.new)
62-
message = 'Starting scraping wikis.'
63-
log_message(session, message)
64-
if args.verbose:
65-
print(message)
66-
count = 0
67-
for wiki in wikis:
68-
count += 1
69-
if count % 100 == 0:
70-
duration = time.time() - start_time
71-
message = 'Processed %d wikis in %d seconds' % (count, duration)
72-
log_message(session, message)
73-
if args.verbose:
59+
if not site.user():
60+
site.login()
61+
if site.user():
62+
with Session(engine) as session:
63+
try:
64+
wikis = get_wikis(session, args.new)
65+
message = 'Starting scraping wikis.'
66+
log_message(session, message)
67+
if args.verbose:
68+
print(message)
69+
count = 0
70+
for wiki in wikis:
71+
count += 1
72+
if count % 100 == 0:
73+
duration = time.time() - start_time
74+
message = 'Processed %d wikis in %d seconds' % (count, duration)
75+
log_message(session, message)
76+
if args.verbose:
77+
print(message)
78+
url = wiki.w8y_wi_api_url.decode('utf8')
79+
page_id = wiki.w8y_wi_page_id
80+
if args.verbose > 1:
81+
message = f'Scraping {url}'
7482
print(message)
75-
url = wiki.w8y_wi_api_url.decode('utf8')
76-
page_id = wiki.w8y_wi_page_id
77-
if args.verbose > 1:
78-
message = f'Scraping {url}'
83+
(sr_id, error) = scrape_site(url, page_id, wiki.w8y_wi_last_sr_id, args, session)
84+
wiki.w8y_wi_last_sr_id = sr_id
85+
session.add(wiki)
86+
session.commit()
87+
if error:
88+
error_count += 1
89+
else:
90+
good_count += 1
91+
purge(site, session, args, page_id)
92+
except KeyboardInterrupt:
93+
session.rollback()
94+
finally:
95+
duration = time.time() - start_time
96+
message = 'Completed scraping, %d complete, %d errors, %d seconds' % (good_count, error_count, duration)
97+
log_message(session, message)
98+
if args.verbose:
7999
print(message)
80-
(sr_id, error) = scrape_site(url, page_id, wiki.w8y_wi_last_sr_id, args, session)
81-
wiki.w8y_wi_last_sr_id = sr_id
82-
session.add(wiki)
83-
session.commit()
84-
if error:
85-
error_count += 1
86-
else:
87-
good_count += 1
88-
purge(site, session, args, page_id)
89-
except KeyboardInterrupt:
90-
session.rollback()
91-
finally:
92-
duration = time.time() - start_time
93-
message = 'Completed scraping, %d complete, %d errors, %d seconds' % (good_count, error_count, duration)
94-
log_message(session, message)
95-
if args.verbose:
96-
print(message)
100+
else:
101+
message = 'User login failure.'
102+
log_message(session, message)
103+
if args.verbose:
104+
print(message)
97105

98106

99107
if __name__ == '__main__':

0 commit comments

Comments
 (0)