Skip to content
This repository was archived by the owner on Feb 7, 2024. It is now read-only.

Commit 28d078f

Browse files
authored
Merge pull request #2 from cicalese/linktables
Add skin and extension link tables
2 parents 3e1fb04 + 73c0182 commit 28d078f

File tree

6 files changed

+313
-91
lines changed

6 files changed

+313
-91
lines changed

scripts/lib/models.py

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -29,12 +29,20 @@ class VersionRecord(Base):
2929
__table__ = Base.metadata.tables['w8y_version_records']
3030

3131

32-
class Skin(Base):
33-
__table__ = Base.metadata.tables['w8y_skins']
32+
class SkinLink(Base):
33+
__table__ = Base.metadata.tables['w8y_skin_links']
3434

3535

36-
class Extension(Base):
37-
__table__ = Base.metadata.tables['w8y_extensions']
36+
class SkinData(Base):
37+
__table__ = Base.metadata.tables['w8y_skin_data']
38+
39+
40+
class ExtensionLink(Base):
41+
__table__ = Base.metadata.tables['w8y_extension_links']
42+
43+
44+
class ExtensionData(Base):
45+
__table__ = Base.metadata.tables['w8y_extension_data']
3846

3947

4048
class Log(Base):

scripts/lib/scraper.py

Lines changed: 53 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
from models import ScrapeRecord, VersionRecord, Skin, Extension
1+
from models import ScrapeRecord, VersionRecord, SkinLink, SkinData, ExtensionLink, ExtensionData
22
from sqlalchemy import select
33
from utils import log_message
44
import json
@@ -42,23 +42,31 @@ def last_versions_match(session, last_sr_id, skin_versions, extension_versions):
4242
if vr_id is None:
4343
return False, None
4444

45-
stmt = select(Skin).where(Skin.w8y_sk_vr_id == vr_id)
45+
stmt = select(SkinData).join_from(
46+
SkinLink,
47+
SkinData,
48+
SkinLink.w8y_sl_sd_id == SkinData.w8y_sd_sd_id
49+
).where(SkinLink.w8y_sl_vr_id == vr_id)
4650
last_skins = session.scalars(stmt)
4751
count = 0
4852
for skin in last_skins:
49-
name = skin.w8y_sk_name
50-
if name not in skin_versions or skin.w8y_sk_version != skin_versions[name]:
53+
name = skin.w8y_sd_name
54+
if name not in skin_versions or skin.w8y_sd_version != skin_versions[name]:
5155
return False, None
5256
count += 1
5357
if len(skin_versions) != count:
5458
return False, None
5559

56-
stmt = select(Extension).where(Extension.w8y_ex_vr_id == vr_id)
60+
stmt = select(ExtensionData).join_from(
61+
ExtensionLink,
62+
ExtensionData,
63+
ExtensionLink.w8y_el_ed_id == ExtensionData.w8y_ed_ed_id
64+
).where(ExtensionLink.w8y_el_vr_id == vr_id)
5765
last_extensions = session.scalars(stmt)
5866
count = 0
5967
for extension in last_extensions:
60-
name = extension.w8y_ex_name
61-
if name not in extension_versions or extension.w8y_ex_version != extension_versions[name]:
68+
name = extension.w8y_ed_name
69+
if name not in extension_versions or extension.w8y_ed_version != extension_versions[name]:
6270
return False, None
6371
count += 1
6472
if len(extension_versions) != count:
@@ -114,24 +122,46 @@ def create_version_records(session, last_sr_id, components):
114122
vr_id = version_record.w8y_vr_vr_id
115123

116124
for skin in skins:
117-
session.add(
118-
Skin(
119-
w8y_sk_vr_id=vr_id,
120-
w8y_sk_name=skin['name'],
121-
w8y_sk_version=skin['version'],
122-
w8y_sk_doc_url=skin['url']
125+
stmt = select(SkinData).where(
126+
(SkinData.w8y_sd_name == skin['name']) &
127+
(SkinData.w8y_sd_version == skin['version']) &
128+
(SkinData.w8y_sd_doc_url == skin['url'])
129+
)
130+
skin_data = session.scalars(stmt).one_or_none()
131+
if skin_data is None:
132+
skin_data = SkinData(
133+
w8y_sd_name=skin['name'],
134+
w8y_sd_version=skin['version'],
135+
w8y_sd_doc_url=skin['url']
123136
)
137+
session.add(skin_data)
138+
session.commit()
139+
skin_link = SkinLink(
140+
w8y_sl_vr_id = vr_id,
141+
w8y_sl_sd_id = skin_data.w8y_sd_sd_id
124142
)
143+
session.add(skin_link)
125144

126145
for extension in extensions:
127-
session.add(
128-
Extension(
129-
w8y_ex_vr_id=vr_id,
130-
w8y_ex_name=extension['name'],
131-
w8y_ex_version=extension['version'],
132-
w8y_ex_doc_url=extension['url']
146+
stmt = select(ExtensionData).where(
147+
(ExtensionData.w8y_ed_name == extension['name']) &
148+
(ExtensionData.w8y_ed_version == extension['version']) &
149+
(ExtensionData.w8y_ed_doc_url == extension['url'])
150+
)
151+
extension_data = session.scalars(stmt).one_or_none()
152+
if extension_data is None:
153+
extension_data = ExtensionData(
154+
w8y_ed_name=extension['name'],
155+
w8y_ed_version=extension['version'],
156+
w8y_ed_doc_url=extension['url']
133157
)
158+
session.add(extension_data)
159+
session.commit()
160+
extension_link = ExtensionLink(
161+
w8y_el_vr_id = vr_id,
162+
w8y_el_ed_id = extension_data.w8y_ed_ed_id
134163
)
164+
session.add(extension_link)
135165

136166
return vr_id
137167

@@ -141,16 +171,11 @@ def scrape_site(url, page_id, last_sr_id, args, session):
141171
timestamp = time.time()
142172

143173
if not data or 'query' not in data or 'general' not in data['query'] or 'statistics' not in data['query']:
144-
version_record = VersionRecord()
145-
session.add(version_record)
146-
session.commit()
147-
vr_id = version_record.w8y_vr_vr_id
148174
scrape = ScrapeRecord(
149175
w8y_sr_page_id=page_id,
150176
w8y_sr_api_url=bytes(url, 'utf-8'),
151177
w8y_sr_timestamp=timestamp,
152-
w8y_sr_is_alive=False,
153-
w8y_sr_vr_id=vr_id
178+
w8y_sr_is_alive=False
154179
)
155180
session.add(scrape)
156181
session.commit()
@@ -163,11 +188,11 @@ def scrape_site(url, page_id, last_sr_id, args, session):
163188
if 'dbtype' in general and 'dbversion' in general:
164189
db_version = general['dbtype'] + ': ' + general['dbversion']
165190
else:
166-
db_version = ""
191+
db_version = ''
167192
if 'phpversion' in general and 'phpsapi' in general:
168193
php_version = general['phpversion'] + '(' + general['phpsapi'] + ')'
169194
else:
170-
php_version = ""
195+
php_version = ''
171196
language = general['lang']
172197
if 'logo' in general and len(general['logo']) < 256:
173198
logo = general['logo']
@@ -183,10 +208,7 @@ def scrape_site(url, page_id, last_sr_id, args, session):
183208
if 'extensions' in query:
184209
vr_id = create_version_records(session, last_sr_id, query['extensions'])
185210
else:
186-
version_record = VersionRecord()
187-
session.add(version_record)
188-
session.commit()
189-
vr_id = version_record.w8y_vr_vr_id
211+
vr_id = None
190212

191213
scrape = ScrapeRecord(
192214
w8y_sr_page_id=page_id,

sql/mysql/tables.sql

Lines changed: 44 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ CREATE TABLE /*_*/w8y_wikis (
77
w8y_wi_api_url VARCHAR(255) NOT NULL,
88
w8y_wi_last_sr_id INT UNSIGNED DEFAULT NULL,
99
w8y_wi_is_defunct TINYINT(1) NOT NULL,
10+
INDEX w8y_wi_last_sr_id (w8y_wi_last_sr_id),
1011
PRIMARY KEY(w8y_wi_page_id)
1112
) /*$wgDBTableOptions*/;
1213

@@ -17,7 +18,7 @@ CREATE TABLE /*_*/w8y_scrape_records (
1718
w8y_sr_api_url VARCHAR(255) NOT NULL,
1819
w8y_sr_timestamp INT UNSIGNED NOT NULL,
1920
w8y_sr_is_alive TINYINT(1) NOT NULL,
20-
w8y_sr_vr_id INT UNSIGNED NOT NULL,
21+
w8y_sr_vr_id INT UNSIGNED DEFAULT NULL,
2122
w8y_sr_mw_version VARCHAR(255) DEFAULT NULL,
2223
w8y_sr_db_version VARCHAR(255) DEFAULT NULL,
2324
w8y_sr_php_version VARCHAR(255) DEFAULT NULL,
@@ -26,6 +27,14 @@ CREATE TABLE /*_*/w8y_scrape_records (
2627
w8y_sr_language VARCHAR(255) DEFAULT NULL,
2728
w8y_sr_general BLOB DEFAULT NULL,
2829
w8y_sr_statistics BLOB DEFAULT NULL,
30+
INDEX w8y_sr_page_id_by_timestamp (
31+
w8y_sr_page_id, w8y_sr_timestamp
32+
),
33+
INDEX w8y_sr_mw_version (w8y_sr_sr_id, w8y_sr_mw_version),
34+
INDEX w8y_sr_db_version (w8y_sr_sr_id, w8y_sr_db_version),
35+
INDEX w8y_sr_php_version (
36+
w8y_sr_sr_id, w8y_sr_php_version
37+
),
2938
PRIMARY KEY(w8y_sr_sr_id)
3039
) /*$wgDBTableOptions*/;
3140

@@ -36,21 +45,43 @@ CREATE TABLE /*_*/w8y_version_records (
3645
) /*$wgDBTableOptions*/;
3746

3847

39-
CREATE TABLE /*_*/w8y_skins (
40-
w8y_sk_vr_id INT UNSIGNED NOT NULL,
41-
w8y_sk_name VARCHAR(255) NOT NULL,
42-
w8y_sk_version VARCHAR(255) DEFAULT NULL,
43-
w8y_sk_doc_url VARCHAR(255) DEFAULT NULL,
44-
PRIMARY KEY(w8y_sk_vr_id, w8y_sk_name)
48+
CREATE TABLE /*_*/w8y_skin_links (
49+
w8y_sl_vr_id INT UNSIGNED NOT NULL,
50+
w8y_sl_sd_id INT UNSIGNED NOT NULL,
51+
PRIMARY KEY(w8y_sl_vr_id, w8y_sl_sd_id)
4552
) /*$wgDBTableOptions*/;
4653

4754

48-
CREATE TABLE /*_*/w8y_extensions (
49-
w8y_ex_vr_id INT UNSIGNED NOT NULL,
50-
w8y_ex_name VARCHAR(255) NOT NULL,
51-
w8y_ex_version VARCHAR(255) DEFAULT NULL,
52-
w8y_ex_doc_url VARCHAR(255) DEFAULT NULL,
53-
PRIMARY KEY(w8y_ex_vr_id, w8y_ex_name)
55+
CREATE TABLE /*_*/w8y_skin_data (
56+
w8y_sd_sd_id INT UNSIGNED AUTO_INCREMENT NOT NULL,
57+
w8y_sd_name VARCHAR(255) NOT NULL,
58+
w8y_sd_version VARCHAR(255) DEFAULT NULL,
59+
w8y_sd_doc_url VARCHAR(255) DEFAULT NULL,
60+
UNIQUE INDEX w8y_sd (
61+
w8y_sd_name, w8y_sd_version, w8y_sd_doc_url
62+
),
63+
INDEX w8y_sd_name (w8y_sd_name),
64+
PRIMARY KEY(w8y_sd_sd_id)
65+
) /*$wgDBTableOptions*/;
66+
67+
68+
CREATE TABLE /*_*/w8y_extension_links (
69+
w8y_el_vr_id INT UNSIGNED NOT NULL,
70+
w8y_el_ed_id INT UNSIGNED NOT NULL,
71+
PRIMARY KEY(w8y_el_vr_id, w8y_el_ed_id)
72+
) /*$wgDBTableOptions*/;
73+
74+
75+
CREATE TABLE /*_*/w8y_extension_data (
76+
w8y_ed_ed_id INT UNSIGNED AUTO_INCREMENT NOT NULL,
77+
w8y_ed_name VARCHAR(255) NOT NULL,
78+
w8y_ed_version VARCHAR(255) DEFAULT NULL,
79+
w8y_ed_doc_url VARCHAR(255) DEFAULT NULL,
80+
UNIQUE INDEX w8y_ed (
81+
w8y_ed_name, w8y_ed_version, w8y_ed_doc_url
82+
),
83+
INDEX w8y_ed_name (w8y_ed_name),
84+
PRIMARY KEY(w8y_ed_ed_id)
5485
) /*$wgDBTableOptions*/;
5586

5687

sql/postgres/tables.sql

Lines changed: 53 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -10,14 +10,16 @@ CREATE TABLE w8y_wikis (
1010
PRIMARY KEY(w8y_wi_page_id)
1111
);
1212

13+
CREATE INDEX w8y_wi_last_sr_id ON w8y_wikis (w8y_wi_last_sr_id);
14+
1315

1416
CREATE TABLE w8y_scrape_records (
1517
w8y_sr_sr_id SERIAL NOT NULL,
1618
w8y_sr_page_id INT NOT NULL,
1719
w8y_sr_api_url VARCHAR(255) NOT NULL,
1820
w8y_sr_timestamp INT NOT NULL,
1921
w8y_sr_is_alive BOOLEAN NOT NULL,
20-
w8y_sr_vr_id INT NOT NULL,
22+
w8y_sr_vr_id INT DEFAULT NULL,
2123
w8y_sr_mw_version VARCHAR(255) DEFAULT NULL,
2224
w8y_sr_db_version VARCHAR(255) DEFAULT NULL,
2325
w8y_sr_php_version VARCHAR(255) DEFAULT NULL,
@@ -29,30 +31,68 @@ CREATE TABLE w8y_scrape_records (
2931
PRIMARY KEY(w8y_sr_sr_id)
3032
);
3133

34+
CREATE INDEX w8y_sr_page_id_by_timestamp ON w8y_scrape_records (
35+
w8y_sr_page_id, w8y_sr_timestamp
36+
);
37+
38+
CREATE INDEX w8y_sr_mw_version ON w8y_scrape_records (w8y_sr_sr_id, w8y_sr_mw_version);
39+
40+
CREATE INDEX w8y_sr_db_version ON w8y_scrape_records (w8y_sr_sr_id, w8y_sr_db_version);
41+
42+
CREATE INDEX w8y_sr_php_version ON w8y_scrape_records (
43+
w8y_sr_sr_id, w8y_sr_php_version
44+
);
45+
3246

3347
CREATE TABLE w8y_version_records (
3448
w8y_vr_vr_id SERIAL NOT NULL,
3549
PRIMARY KEY(w8y_vr_vr_id)
3650
);
3751

3852

39-
CREATE TABLE w8y_skins (
40-
w8y_sk_vr_id INT NOT NULL,
41-
w8y_sk_name VARCHAR(255) NOT NULL,
42-
w8y_sk_version VARCHAR(255) DEFAULT NULL,
43-
w8y_sk_doc_url VARCHAR(255) DEFAULT NULL,
44-
PRIMARY KEY(w8y_sk_vr_id, w8y_sk_name)
53+
CREATE TABLE w8y_skin_links (
54+
w8y_sl_vr_id INT NOT NULL,
55+
w8y_sl_sd_id INT NOT NULL,
56+
PRIMARY KEY(w8y_sl_vr_id, w8y_sl_sd_id)
4557
);
4658

4759

48-
CREATE TABLE w8y_extensions (
49-
w8y_ex_vr_id INT NOT NULL,
50-
w8y_ex_name VARCHAR(255) NOT NULL,
51-
w8y_ex_version VARCHAR(255) DEFAULT NULL,
52-
w8y_ex_doc_url VARCHAR(255) DEFAULT NULL,
53-
PRIMARY KEY(w8y_ex_vr_id, w8y_ex_name)
60+
CREATE TABLE w8y_skin_data (
61+
w8y_sd_sd_id SERIAL NOT NULL,
62+
w8y_sd_name VARCHAR(255) NOT NULL,
63+
w8y_sd_version VARCHAR(255) DEFAULT NULL,
64+
w8y_sd_doc_url VARCHAR(255) DEFAULT NULL,
65+
PRIMARY KEY(w8y_sd_sd_id)
5466
);
5567

68+
CREATE UNIQUE INDEX w8y_sd ON w8y_skin_data (
69+
w8y_sd_name, w8y_sd_version, w8y_sd_doc_url
70+
);
71+
72+
CREATE INDEX w8y_sd_name ON w8y_skin_data (w8y_sd_name);
73+
74+
75+
CREATE TABLE w8y_extension_links (
76+
w8y_el_vr_id INT NOT NULL,
77+
w8y_el_ed_id INT NOT NULL,
78+
PRIMARY KEY(w8y_el_vr_id, w8y_el_ed_id)
79+
);
80+
81+
82+
CREATE TABLE w8y_extension_data (
83+
w8y_ed_ed_id SERIAL NOT NULL,
84+
w8y_ed_name VARCHAR(255) NOT NULL,
85+
w8y_ed_version VARCHAR(255) DEFAULT NULL,
86+
w8y_ed_doc_url VARCHAR(255) DEFAULT NULL,
87+
PRIMARY KEY(w8y_ed_ed_id)
88+
);
89+
90+
CREATE UNIQUE INDEX w8y_ed ON w8y_extension_data (
91+
w8y_ed_name, w8y_ed_version, w8y_ed_doc_url
92+
);
93+
94+
CREATE INDEX w8y_ed_name ON w8y_extension_data (w8y_ed_name);
95+
5696

5797
CREATE TABLE w8y_log (
5898
w8y_lo_id SERIAL NOT NULL,

0 commit comments

Comments
 (0)