Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
fix: csv export without tmp file (#115)
* fix: csv export without tmp file

* fix: do not use StreamingResponse
  • Loading branch information
rti authored Sep 6, 2025
commit 4fd944e9847fd3360d62485f1a6cf6f3f080bf22
8 changes: 3 additions & 5 deletions app.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

from contextlib import asynccontextmanager
from typing import Optional
from fastapi import BackgroundTasks, FastAPI
from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import PlainTextResponse
from strawberry.fastapi import GraphQLRouter
Expand Down Expand Up @@ -48,9 +48,7 @@ def read_root():


@app.get("/csv/metrics")
async def metric_csv(
background_tasks: BackgroundTasks, authorization: Optional[str] = None
):
async def metric_csv(authorization: Optional[str] = None):
"""Quantity CSV"""

try:
Expand All @@ -63,4 +61,4 @@ async def metric_csv(
except AssertionError:
return PlainTextResponse("Authorization Failed", 403)

return await export_metric_csv(background_tasks)
return await export_metric_csv()
4 changes: 1 addition & 3 deletions export_csv/metric.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
"""Quantity CSV"""

from fastapi import BackgroundTasks
from fastapi.responses import StreamingResponse
from sqlalchemy import Select, and_, func, or_, select

Expand All @@ -17,7 +16,7 @@
from model.enum import WikibaseType, WikibaseURLType


async def export_metric_csv(background_tasks: BackgroundTasks) -> StreamingResponse:
async def export_metric_csv() -> StreamingResponse:
"""CSV with Requested Metrics"""

query = get_metrics_query()
Expand All @@ -26,7 +25,6 @@ async def export_metric_csv(background_tasks: BackgroundTasks) -> StreamingRespo
query=query,
export_filename="metrics",
index_col="wikibase_id",
background_tasks=background_tasks,
)


Expand Down
24 changes: 3 additions & 21 deletions export_csv/util.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,7 @@
"""Utilities"""

import os
from typing import Optional
import uuid
from fastapi import BackgroundTasks
from fastapi.responses import StreamingResponse
from fastapi.responses import Response
import pandas
from sqlalchemy import Connection, Select

Expand All @@ -25,14 +22,9 @@ async def read_sql_query(
return df


CHUNK_SIZE = 1024 * 1024
EXPORT_DIRECTORY = "export/data"


async def export_csv(
query: Select,
export_filename: str,
background_tasks: BackgroundTasks,
index_col: Optional[str] = None,
):
"""Export CSV"""
Expand All @@ -41,18 +33,8 @@ async def export_csv(
if index_col == "wikibase_id":
assert len(set(df.index)) == len(df), "Returned Multiple Rows per Wikibase"

os.makedirs(EXPORT_DIRECTORY, exist_ok=True)

filename = f"{EXPORT_DIRECTORY}/{uuid.uuid4()}.csv"
df.to_csv(filename)
csv = df.to_csv()
del df

def iterfile():
with open(filename, "rb") as f:
while chunk := f.read(CHUNK_SIZE):
yield chunk

background_tasks.add_task(os.remove, filename)

headers = {"Content-Disposition": f'attachment; filename="{export_filename}.csv"'}
return StreamingResponse(iterfile(), headers=headers, media_type="text/csv")
return Response(csv, headers=headers, media_type="text/csv")
46 changes: 12 additions & 34 deletions tests/test_export_csv/test_export_metrics_csv.py
Original file line number Diff line number Diff line change
@@ -1,26 +1,9 @@
"""Test Export Metrics CSV"""

import os
import re
from typing import Callable
import pytest
from export_csv.metric import export_metric_csv


class MockBackgroundTasks:
"""Mock BackgroundTasks"""

task_list = []

# pylint: disable-next=unused-argument
def add_task(self, func: Callable, *args):
"""
Add Task

really add filename to list
"""

self.task_list.append(args[0])
from fastapi.testclient import TestClient
from app import app


EXPECTED_HEADER_ROW = ",".join(
Expand All @@ -46,7 +29,7 @@ def add_task(self, func: Callable, *args):
"bot_change_user_count",
"software_version_observation_date",
"software_name",
"version\n",
"version",
]
)
EXPECTED_PATTERN = re.compile(
Expand Down Expand Up @@ -76,7 +59,7 @@ def add_task(self, func: Callable, *args):
# # Software
r"(\d{4}-\d\d-\d\d \d\d:\d\d:\d\d|)",
r"(MediaWiki|)",
r"(\d+\.\d+\.\d+|)\n",
r"(\d+\.\d+\.\d+|)",
]
)
)
Expand All @@ -94,19 +77,14 @@ def add_task(self, func: Callable, *args):
async def test_export_metric_csv():
"""Test Export Metric CSV"""

mock_background_tasks = MockBackgroundTasks()

result = await export_metric_csv(mock_background_tasks)
assert len(mock_background_tasks.task_list) == 1
client = TestClient(app)
result = client.get("/csv/metrics?authorization=test-auth-token")
assert result.status_code == 200
assert result.media_type == "text/csv"
# CANNOT FIGURE OUT HOW TO CHECK CONTENT OF RESPONSE
content = result.content.decode("utf-8")

with open(mock_background_tasks.task_list[0], mode="r", encoding="utf-8") as file:
returned_lines = file.readlines()
assert returned_lines[0] == EXPECTED_HEADER_ROW
for returned_line in returned_lines[1:]:
assert EXPECTED_PATTERN.match(returned_line)
lines = content.splitlines()
assert len(lines) >= 2
assert lines[0] == EXPECTED_HEADER_ROW

for file in mock_background_tasks.task_list:
os.remove(file)
for line in lines[1:]:
assert EXPECTED_PATTERN.match(line)