Skip to content

Commit cb98549

Browse files
guerganahasanakg
andauthored
Ignore already reviewed mismatches on reupload (#880)
Bug: T329631 Co-authored-by: Hasan Akgün <hasanakg@outlook.com>
1 parent 2c4e64b commit cb98549

File tree

2 files changed

+334
-10
lines changed

2 files changed

+334
-10
lines changed

app/Jobs/ImportCSV.php

Lines changed: 75 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -46,16 +46,58 @@ public function handle(CSVImportReader $reader)
4646
$filepath = Storage::disk('local')
4747
->path('mismatch-files/' . $this->meta->filename);
4848

49-
DB::transaction(function () use ($reader, $filepath) {
50-
$reader->lines($filepath)->each(function ($mismatchLine) {
51-
$mismatch = Mismatch::make($mismatchLine);
52-
if ($mismatch->type == null) {
53-
$mismatch->type = 'statement';
49+
$mismatch_attrs = (new Mismatch())->getFillable();
50+
51+
DB::transaction(function () use ($reader, $filepath, $mismatch_attrs) {
52+
$new_mismatches = [];
53+
$where_clauses = [];
54+
55+
$mismatches_per_upload_user = DB::table('mismatches')
56+
->select($mismatch_attrs)
57+
->join('import_meta', 'mismatches.import_id', '=', 'import_meta.id')
58+
->where('import_meta.user_id', '=', $this->meta->user->id);
59+
60+
$reader->lines($filepath)->each(function ($mismatchLine) use (
61+
&$new_mismatches,
62+
&$where_clauses
63+
) {
64+
65+
$new_mismatch = $this->createMismatch($mismatchLine);
66+
$new_mismatches[] = $new_mismatch;
67+
$where_clause = [['review_status', '!=', 'pending']];
68+
foreach ($new_mismatch->getAttributes() as $key => $attribute) {
69+
if ($key == 'review_status') {
70+
continue;
71+
}
72+
$where_clause[] = [$key, $attribute];
73+
}
74+
$where_clauses[] = $where_clause;
75+
});
76+
77+
$mismatches_per_upload_user->where(function ($query) use ($where_clauses) {
78+
foreach ($where_clauses as $where_clause) {
79+
$query->orWhere(function ($query) use ($where_clause) {
80+
$query->where($where_clause);
81+
});
5482
}
55-
$mismatch->importMeta()->associate($this->meta);
56-
$mismatch->save();
5783
});
5884

85+
$existing_mismatches = $mismatches_per_upload_user->get();
86+
87+
foreach ($new_mismatches as $new_mismatch) {
88+
if ($existing_mismatches->doesntContain(function ($value) use ($new_mismatch) {
89+
$metaAttrs = $new_mismatch->getAttributes();
90+
foreach ($metaAttrs as $attrKey => $attr) {
91+
if ($attrKey != 'review_status' && $value->{$attrKey} != $attr) {
92+
return false;
93+
}
94+
}
95+
return true;
96+
})) {
97+
$this->saveMismatch($new_mismatch);
98+
}
99+
}
100+
59101
$this->meta->status = 'completed';
60102
$this->meta->save();
61103
});
@@ -64,7 +106,7 @@ public function handle(CSVImportReader $reader)
64106
/**
65107
* Handle a job failure.
66108
*
67-
* @param \Throwable $exception
109+
* @param \Throwable $exception
68110
* @return void
69111
*/
70112
public function failed(Throwable $exception)
@@ -78,4 +120,29 @@ public function failed(Throwable $exception)
78120
$this->meta->status = 'failed';
79121
$this->meta->save();
80122
}
123+
124+
125+
private function createMismatch($mismatch_data)
126+
{
127+
$new_mismatch = Mismatch::make($mismatch_data);
128+
129+
if ($new_mismatch->type == null) {
130+
$new_mismatch->type = 'statement';
131+
}
132+
133+
return $new_mismatch;
134+
}
135+
136+
/**
137+
* Save mismatch to database
138+
*
139+
* @param \Mismatch $new_mismatch
140+
* @return void
141+
*/
142+
private function saveMismatch($new_mismatch)
143+
{
144+
// if review_status == pending -> save
145+
$new_mismatch->importMeta()->associate($this->meta);
146+
$new_mismatch->save();
147+
}
81148
}

tests/Feature/ImportCSVTest.php

Lines changed: 259 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,15 +8,16 @@
88
use App\Models\ImportMeta;
99
use App\Jobs\ImportCSV;
1010
use App\Models\User;
11+
use App\Models\Mismatch;
1112
use Throwable;
1213

1314
class ImportCSVTest extends TestCase
1415
{
1516
use RefreshDatabase;
1617
/**
17-
* Ensure import persists mismatches to database
18+
* Ensure import persists mismatches to empty database
1819
*/
19-
public function test_creates_mismatches(): void
20+
public function test_creates_mismatches_in_empty_db(): void
2021
{
2122
$filename = 'creates-mismatches.csv';
2223
$user = User::factory()->uploader()->create();
@@ -58,6 +59,193 @@ public function test_creates_mismatches(): void
5859
}
5960
}
6061

62+
/**
63+
* Ensure reupload doesn't import duplicated row with reviewed state
64+
*/
65+
public function test_doesnt_import_mismatches_when_row_exists_and_is_reviewed(): void
66+
{
67+
$filename = 'creates-mismatches.csv';
68+
$user = User::factory()->uploader()->create();
69+
$reupload_import = ImportMeta::factory()->for($user)->create([
70+
'filename' => $filename
71+
]);
72+
73+
$already_in_db_import = ImportMeta::factory()->for($user);
74+
75+
Mismatch::factory()
76+
->for($already_in_db_import)->create([
77+
'statement_guid' => 'Q184746$7814880A-A6EF-40EC-885E-F46DD58C8DC5',
78+
'item_id' => 'Q184746',
79+
'property_id' => 'P569',
80+
'meta_wikidata_value' => 'Q12138',
81+
'wikidata_value' => '3 April 1934',
82+
'external_value' => '1934-04-03',
83+
'external_url' => 'https://d-nb.info/gnd/119004453',
84+
'review_status' => 'both',
85+
'type' => 'statement'
86+
]);
87+
88+
$header = config('imports.upload.column_keys');
89+
$lines = [
90+
["Q184746","Q184746$7814880A-A6EF-40EC-885E-F46DD58C8DC5","P569","3 April 1934"
91+
,"Q12138","1934-04-03","https://d-nb.info/gnd/119004453","statement"],
92+
["Q184746","Q184746$7200D1AD-E4E8-401B-8D57-8C823810F11F","P21","Q6581072"
93+
,"","nonbinary","https://www.imdb.com/name/nm0328762/","statement"]
94+
];
95+
96+
$content = join("\n", array_map(function (array $line) {
97+
return join(',', $line);
98+
}, array_merge([$header], $lines)));
99+
100+
Storage::fake('local');
101+
Storage::put(
102+
'mismatch-files/' . $filename,
103+
$content
104+
);
105+
106+
$expected = array_map(function ($row) use ($header) {
107+
return array_combine($header, $row);
108+
}, $lines);
109+
110+
ImportCSV::dispatch($reupload_import);
111+
112+
$this->assertDatabaseCount('mismatches', count($lines));
113+
$this->assertDatabaseHas('mismatches', [
114+
'import_id' => $reupload_import->id
115+
]);
116+
117+
foreach ($expected as $mismatch) {
118+
$this->assertDatabaseHas('mismatches', $mismatch);
119+
}
120+
}
121+
122+
/**
123+
* Ensure reupload imports duplicated row if db row has review status = 'pending'
124+
*/
125+
public function test_imports_mismatches_when_row_exists_and_review_status_is_pending(): void
126+
{
127+
$filename = 'creates-mismatches.csv';
128+
$user = User::factory()->uploader()->create();
129+
$reupload_import = ImportMeta::factory()->for($user)->create([
130+
'filename' => $filename
131+
]);
132+
133+
$already_in_db_import = ImportMeta::factory()->for($user)->create();
134+
135+
Mismatch::factory()
136+
->for($already_in_db_import)->create([
137+
'statement_guid' => 'Q184746$7814880A-A6EF-40EC-885E-F46DD58C8DC5',
138+
'item_id' => 'Q184746',
139+
'property_id' => 'P569',
140+
'meta_wikidata_value' => 'Q12138',
141+
'wikidata_value' => '3 April 1934',
142+
'external_value' => '1934-04-03',
143+
'external_url' => 'https://d-nb.info/gnd/119004453',
144+
'review_status' => 'pending',
145+
'type' => 'statement'
146+
]);
147+
148+
$header = config('imports.upload.column_keys');
149+
$lines = [
150+
["Q184746","Q184746$7814880A-A6EF-40EC-885E-F46DD58C8DC5","P569","3 April 1934"
151+
,"Q12138","1934-04-03","https://d-nb.info/gnd/119004453","statement"],
152+
["Q184746","Q184746$7200D1AD-E4E8-401B-8D57-8C823810F11F","P21","Q6581072"
153+
,"","nonbinary","https://www.imdb.com/name/nm0328762/","statement"]
154+
];
155+
156+
$content = join("\n", array_map(function (array $line) {
157+
return join(',', $line);
158+
}, array_merge([$header], $lines)));
159+
160+
Storage::fake('local');
161+
Storage::put(
162+
'mismatch-files/' . $filename,
163+
$content
164+
);
165+
166+
$expected = array_map(function ($row) use ($header) {
167+
return array_combine($header, $row);
168+
}, $lines);
169+
170+
ImportCSV::dispatch($reupload_import);
171+
172+
$this->assertDatabaseCount('mismatches', count($lines) + 1);
173+
$this->assertDatabaseHas('mismatches', [
174+
'import_id' => $already_in_db_import->id
175+
]);
176+
177+
foreach ($expected as $mismatch) {
178+
$this->assertDatabaseHas('mismatches', $mismatch);
179+
}
180+
}
181+
182+
/**
183+
* Ensure reupload imports duplicated rows if uploaded by different users
184+
*/
185+
public function test_imports_mismatch_row_from_different_users_only_when_not_pending_in_db(): void
186+
{
187+
$filename = 'creates-mismatches.csv';
188+
$user1 = User::factory()->uploader()->create();
189+
$user2 = User::factory()->uploader()->create();
190+
$reupload_import1 = ImportMeta::factory()->for($user1)->create([
191+
'filename' => $filename
192+
]);
193+
$reupload_import2 = ImportMeta::factory()->for($user2)->create([
194+
'filename' => $filename
195+
]);
196+
197+
$already_in_db_import = ImportMeta::factory()->for($user1)->create();
198+
199+
Mismatch::factory()
200+
->for($already_in_db_import)->create([
201+
'statement_guid' => 'Q184746$7814880A-A6EF-40EC-885E-F46DD58C8DC5',
202+
'item_id' => 'Q184746',
203+
'property_id' => 'P569',
204+
'meta_wikidata_value' => 'Q12138',
205+
'wikidata_value' => '3 April 1934',
206+
'external_value' => '1934-04-03',
207+
'external_url' => 'https://d-nb.info/gnd/119004453',
208+
'review_status' => 'both',
209+
'type' => 'statement'
210+
]);
211+
212+
$header = config('imports.upload.column_keys');
213+
$lines = [
214+
["Q184746","Q184746$7814880A-A6EF-40EC-885E-F46DD58C8DC5","P569","3 April 1934"
215+
,"Q12138","1934-04-03","https://d-nb.info/gnd/119004453","statement"],
216+
["Q184746","Q184746$7200D1AD-E4E8-401B-8D57-8C823810F11F","P21","Q6581072"
217+
,"","nonbinary","https://www.imdb.com/name/nm0328762/","statement"]
218+
];
219+
220+
$content = join("\n", array_map(function (array $line) {
221+
return join(',', $line);
222+
}, array_merge([$header], $lines)));
223+
224+
Storage::fake('local');
225+
Storage::put(
226+
'mismatch-files/' . $filename,
227+
$content
228+
);
229+
230+
$expected = array_map(function ($row) use ($header) {
231+
return array_combine($header, $row);
232+
}, $lines);
233+
234+
ImportCSV::dispatch($reupload_import1);
235+
ImportCSV::dispatch($reupload_import2);
236+
237+
// total would be 5 but one is not imported because it's already reviewed in db
238+
$this->assertDatabaseCount('mismatches', count($lines) * 2);
239+
$this->assertDatabaseHas('mismatches', [
240+
'import_id' => $reupload_import1->id,
241+
'import_id' => $reupload_import2->id,
242+
]);
243+
244+
foreach ($expected as $mismatch) {
245+
$this->assertDatabaseHas('mismatches', $mismatch);
246+
}
247+
}
248+
61249
/**
62250
* Ensure no change is commited to Database in case of error
63251
*/
@@ -103,4 +291,73 @@ public function test_rolls_back_on_failure(): void
103291
]);
104292
}
105293
}
294+
295+
/**
296+
* Ensure reupload doesn't import duplicated row with reviewed state and ignoring type column if it's empty
297+
*/
298+
public function test_doesnt_import_mismatches_when_row_exists_and_is_reviewed_and_type_is_empty_in_csv(): void
299+
{
300+
$filename = 'creates-mismatches.csv';
301+
$user1 = User::factory()->uploader()->create();
302+
$user2 = User::factory()->uploader()->create();
303+
$reupload_import1 = ImportMeta::factory()->for($user1)->create([
304+
'filename' => $filename
305+
]);
306+
$reupload_import2 = ImportMeta::factory()->for($user2)->create([
307+
'filename' => $filename
308+
]);
309+
310+
$already_in_db_import = ImportMeta::factory()->for($user1)->create();
311+
312+
Mismatch::factory()
313+
->for($already_in_db_import)->create([
314+
'statement_guid' => 'Q184746$7814880A-A6EF-40EC-885E-F46DD58C8DC5',
315+
'item_id' => 'Q184746',
316+
'property_id' => 'P569',
317+
'meta_wikidata_value' => 'Q12138',
318+
'wikidata_value' => '3 April 1934',
319+
'external_value' => '1934-04-03',
320+
'external_url' => 'https://d-nb.info/gnd/119004453',
321+
'review_status' => 'both',
322+
'type' => 'statement'
323+
]);
324+
325+
$header = config('imports.upload.column_keys');
326+
$lines = [
327+
["Q184746","Q184746$7814880A-A6EF-40EC-885E-F46DD58C8DC5","P569","3 April 1934"
328+
,"Q12138","1934-04-03","https://d-nb.info/gnd/119004453",""],
329+
["Q184746","Q184746$7200D1AD-E4E8-401B-8D57-8C823810F11F","P21","Q6581072"
330+
,"","nonbinary","https://www.imdb.com/name/nm0328762/",""]
331+
];
332+
333+
$content = join("\n", array_map(function (array $line) {
334+
return join(',', $line);
335+
}, array_merge([$header], $lines)));
336+
337+
Storage::fake('local');
338+
Storage::put(
339+
'mismatch-files/' . $filename,
340+
$content
341+
);
342+
343+
$expected = array_map(function ($row) use ($header) {
344+
$expected_line = array_combine($header, $row);
345+
$expected_line['type'] = 'statement';
346+
return $expected_line;
347+
}, $lines);
348+
349+
ImportCSV::dispatch($reupload_import1);
350+
ImportCSV::dispatch($reupload_import2);
351+
352+
// total would be 5 but one is not imported because it's already reviewed in db
353+
$this->assertDatabaseCount('mismatches', count($lines) * 2);
354+
$this->assertDatabaseHas('mismatches', [
355+
'import_id' => $reupload_import1->id,
356+
'import_id' => $reupload_import2->id,
357+
]);
358+
359+
foreach ($expected as $mismatch) {
360+
$this->assertDatabaseHas('mismatches', $mismatch);
361+
}
362+
}
106363
}

0 commit comments

Comments
 (0)