Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Rename
  • Loading branch information
RickiJay-WMDE committed Aug 29, 2025
commit 1c8da635a69f3f4f46cfe93fa3358168d66bd7de
8 changes: 4 additions & 4 deletions app.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from starlette.requests import Request
from strawberry.fastapi import GraphQLRouter

from export_csv import export_quantity_csv
from export_csv import export_metric_csv
from model.strawberry import schema
from resolvers.authentication import authenticate_request
from schedule import scheduler
Expand Down Expand Up @@ -49,10 +49,10 @@ def read_root():
CHUNK_SIZE = 1024 * 1024


@app.get("/csv/quantity", response_class=StreamingResponse)
async def quantity_csv(request: Request, background_tasks: BackgroundTasks):
@app.get("/csv/metrics", response_class=StreamingResponse)
async def metric_csv(request: Request, background_tasks: BackgroundTasks):
"""Quantity CSV"""

# authenticate_request(request)

return await export_quantity_csv(background_tasks)
return await export_metric_csv(background_tasks)
2 changes: 1 addition & 1 deletion export_csv/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
"""Export CSVs"""

from export_csv.quantity import export_quantity_csv
from export_csv.metric import export_metric_csv
65 changes: 38 additions & 27 deletions export_csv/quantity.py → export_csv/metric.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,36 @@
CHUNK_SIZE = 1024 * 1024


async def export_quantity_csv(background_tasks: BackgroundTasks):
"""Quantity CSV"""
async def export_metric_csv(background_tasks: BackgroundTasks):
"""CSV with Requested Metrics"""

filtered_subquery = (
query = get_metrics_query()

df = await read_sql_query(query, index_col="wikibase_id")

filename = f"{uuid.uuid4()}.csv"
df.to_csv(filename)
del df

def iterfile():
with open(filename, "rb") as f:
while chunk := f.read(CHUNK_SIZE):
yield chunk

background_tasks.add_task(os.remove, filename)

headers = {"Content-Disposition": 'attachment; filename="metrics.csv"'}
return StreamingResponse(iterfile(), headers=headers, media_type="text/csv")


def get_metrics_query():
"""
Filter Out Offline and Test Wikis

Pull Quantity, Recent Changes, and Software Version Metrics
"""

filtered_wikibase_subquery = (
select(WikibaseModel)
.where(
and_(
Expand Down Expand Up @@ -132,9 +158,8 @@ async def export_quantity_csv(background_tasks: BackgroundTasks):

query = (
select(
filtered_subquery.c.id.label("wikibase_id"),
filtered_subquery.c.wb_type.label("wikibase_type"),

filtered_wikibase_subquery.c.id.label("wikibase_id"),
filtered_wikibase_subquery.c.wb_type.label("wikibase_type"),
most_recent_successful_quantity_obs.c.date.label(
"quantity_observation_date"
),
Expand All @@ -150,15 +175,15 @@ async def export_quantity_csv(background_tasks: BackgroundTasks):
),
most_recent_successful_quantity_obs.c.total_url_properties,
most_recent_successful_quantity_obs.c.total_url_statements,

most_recent_successful_rc_obs.c.date.label('recent_changes_observation_date'),
most_recent_successful_rc_obs.c.date.label(
"recent_changes_observation_date"
),
most_recent_successful_rc_obs.c.first_change_date,
most_recent_successful_rc_obs.c.last_change_date,
most_recent_successful_rc_obs.c.human_change_count,
most_recent_successful_rc_obs.c.human_change_user_count,
most_recent_successful_rc_obs.c.bot_change_count,
most_recent_successful_rc_obs.c.bot_change_user_count,

most_recent_successful_sv_obs.c.observation_date.label(
"software_version_observation_date"
),
Expand All @@ -167,36 +192,22 @@ async def export_quantity_csv(background_tasks: BackgroundTasks):
)
.join(
most_recent_successful_quantity_obs,
onclause=filtered_subquery.c.id
onclause=filtered_wikibase_subquery.c.id
== most_recent_successful_quantity_obs.c.wikibase_id,
isouter=True,
)
.join(
most_recent_successful_rc_obs,
onclause=filtered_subquery.c.id
onclause=filtered_wikibase_subquery.c.id
== most_recent_successful_rc_obs.c.wikibase_id,
isouter=True,
)
.join(
most_recent_successful_sv_obs,
onclause=filtered_subquery.c.id
onclause=filtered_wikibase_subquery.c.id
== most_recent_successful_sv_obs.c.wikibase_id,
isouter=True,
)
)

df = await read_sql_query(query, index_col="wikibase_id")

filename = f"{uuid.uuid4()}.csv"
df.to_csv(filename)
del df

def iterfile():
with open(filename, "rb") as f:
while chunk := f.read(CHUNK_SIZE):
yield chunk

background_tasks.add_task(os.remove, filename)

headers = {"Content-Disposition": 'attachment; filename="quantity_data.csv"'}
return StreamingResponse(iterfile(), headers=headers, media_type="text/csv")
return query