Skip to content

Commit 4abcc13

Browse files
authored
Update GenbankReaderTest.java
Added a test for the updates to the locus pattern in GenBankSequenceParser that allow uppercase units, strand and topology.
1 parent 6acd1e9 commit 4abcc13

1 file changed

Lines changed: 64 additions & 0 deletions

File tree

biojava-core/src/test/java/org/biojava/nbio/core/sequence/io/GenbankReaderTest.java

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -255,6 +255,33 @@ private DNASequence readGenbankResource(final String resource) throws Exception
255255
}
256256
return sequence;
257257
}
258+
259+
private ProteinSequence readGenbankProteinResource(final String resource) throws Exception {
260+
ProteinSequence sequence = null;
261+
InputStream inputStream = null;
262+
try {
263+
inputStream = getClass().getResourceAsStream(resource);
264+
265+
GenbankReader<ProteinSequence, AminoAcidCompound> genbankProtein
266+
= new GenbankReader<>(
267+
inputStream,
268+
new GenericGenbankHeaderParser<>(),
269+
new ProteinSequenceCreator(AminoAcidCompoundSet.getAminoAcidCompoundSet())
270+
);
271+
272+
LinkedHashMap<String, ProteinSequence> proteinSequences = genbankProtein.process();
273+
sequence = proteinSequences.values().iterator().next();
274+
}
275+
finally {
276+
try {
277+
inputStream.close();
278+
}
279+
catch (Exception e) {
280+
// ignore
281+
}
282+
}
283+
return sequence;
284+
}
258285

259286
@Test
260287
public void testNcbiExpandedAccessionFormats() throws Exception {
@@ -267,6 +294,43 @@ public void testNcbiExpandedAccessionFormats() throws Exception {
267294
DNASequence header2 = readGenbankResource("/empty_header2.gb");
268295
assertEquals("AZZZAA02123456789 10000000000 bp DNA linear PRI 15-OCT-2018", header2.getOriginalHeader());
269296
}
297+
298+
@Test
299+
public void testLegacyLocusCompatable() throws Exception {
300+
301+
// Testing opening a genbank file with uppercase units, strand and topology
302+
DNASequence header0 = readGenbankResource("/uppercase_locus0.gb");
303+
assertEquals("ABC12.3_DE 7071 BP DS-DNA CIRCULAR SYN 22-JUL-1994", header0.getOriginalHeader());
304+
305+
// Testing uppercase SS strand
306+
DNASequence header1 = readGenbankResource("/uppercase_locus1.gb");
307+
assertEquals("ABC12.3_DE 7071 BP SS-DNA CIRCULAR SYN 13-JUL-1994", header1.getOriginalHeader());
308+
309+
// Testing uppercase MS strand
310+
DNASequence header2 = readGenbankResource("/uppercase_locus2.gb");
311+
assertEquals("ABC12.3_DE 7071 BP MS-DNA CIRCULAR SYN 13-JUL-1994", header2.getOriginalHeader());
312+
313+
// Testing uppercase LINEAR topology
314+
DNASequence header3 = readGenbankResource("/uppercase_locus3.gb");
315+
assertEquals("ABC12.3_DE 7071 BP DNA LINEAR SYN 22-JUL-1994", header3.getOriginalHeader());
316+
317+
// Testing uppercase units with no strand or topology
318+
DNASequence header4 = readGenbankResource("/uppercase_locus4.gb");
319+
assertEquals("ABC12.3_DE 7071 BP DNA SYN 13-JUL-1994", header4.getOriginalHeader());
320+
321+
// Testing uppercase units with no strand, topology, division or date
322+
DNASequence header5 = readGenbankResource("/uppercase_locus5.gb");
323+
assertEquals("ABC12.3_DE 7071 BP DNA", header5.getOriginalHeader());
324+
325+
// Testing uppercase units with no strand, molecule type, topology, division or date
326+
DNASequence header6 = readGenbankResource("/uppercase_locus6.gb");
327+
assertEquals("ABC12.3_DE 7071 BP", header6.getOriginalHeader());
328+
329+
// Testing uppercase protein units
330+
ProteinSequence header7 = readGenbankProteinResource("/uppercase_locus7.gb");
331+
assertEquals("ABC12.3_DE 7071 AA Protein", header7.getOriginalHeader());
332+
333+
}
270334

271335
/**
272336
* Helper class to be able to verify the closed state of the input stream.

0 commit comments

Comments
 (0)