@@ -255,6 +255,33 @@ private DNASequence readGenbankResource(final String resource) throws Exception
255255 }
256256 return sequence ;
257257 }
258+
259+ private ProteinSequence readGenbankProteinResource (final String resource ) throws Exception {
260+ ProteinSequence sequence = null ;
261+ InputStream inputStream = null ;
262+ try {
263+ inputStream = getClass ().getResourceAsStream (resource );
264+
265+ GenbankReader <ProteinSequence , AminoAcidCompound > genbankProtein
266+ = new GenbankReader <>(
267+ inputStream ,
268+ new GenericGenbankHeaderParser <>(),
269+ new ProteinSequenceCreator (AminoAcidCompoundSet .getAminoAcidCompoundSet ())
270+ );
271+
272+ LinkedHashMap <String , ProteinSequence > proteinSequences = genbankProtein .process ();
273+ sequence = proteinSequences .values ().iterator ().next ();
274+ }
275+ finally {
276+ try {
277+ inputStream .close ();
278+ }
279+ catch (Exception e ) {
280+ // ignore
281+ }
282+ }
283+ return sequence ;
284+ }
258285
259286 @ Test
260287 public void testNcbiExpandedAccessionFormats () throws Exception {
@@ -267,6 +294,43 @@ public void testNcbiExpandedAccessionFormats() throws Exception {
267294 DNASequence header2 = readGenbankResource ("/empty_header2.gb" );
268295 assertEquals ("AZZZAA02123456789 10000000000 bp DNA linear PRI 15-OCT-2018" , header2 .getOriginalHeader ());
269296 }
297+
298+ @ Test
299+ public void testLegacyLocusCompatable () throws Exception {
300+
301+ // Testing opening a genbank file with uppercase units, strand and topology
302+ DNASequence header0 = readGenbankResource ("/uppercase_locus0.gb" );
303+ assertEquals ("ABC12.3_DE 7071 BP DS-DNA CIRCULAR SYN 22-JUL-1994" , header0 .getOriginalHeader ());
304+
305+ // Testing uppercase SS strand
306+ DNASequence header1 = readGenbankResource ("/uppercase_locus1.gb" );
307+ assertEquals ("ABC12.3_DE 7071 BP SS-DNA CIRCULAR SYN 13-JUL-1994" , header1 .getOriginalHeader ());
308+
309+ // Testing uppercase MS strand
310+ DNASequence header2 = readGenbankResource ("/uppercase_locus2.gb" );
311+ assertEquals ("ABC12.3_DE 7071 BP MS-DNA CIRCULAR SYN 13-JUL-1994" , header2 .getOriginalHeader ());
312+
313+ // Testing uppercase LINEAR topology
314+ DNASequence header3 = readGenbankResource ("/uppercase_locus3.gb" );
315+ assertEquals ("ABC12.3_DE 7071 BP DNA LINEAR SYN 22-JUL-1994" , header3 .getOriginalHeader ());
316+
317+ // Testing uppercase units with no strand or topology
318+ DNASequence header4 = readGenbankResource ("/uppercase_locus4.gb" );
319+ assertEquals ("ABC12.3_DE 7071 BP DNA SYN 13-JUL-1994" , header4 .getOriginalHeader ());
320+
321+ // Testing uppercase units with no strand, topology, division or date
322+ DNASequence header5 = readGenbankResource ("/uppercase_locus5.gb" );
323+ assertEquals ("ABC12.3_DE 7071 BP DNA" , header5 .getOriginalHeader ());
324+
325+ // Testing uppercase units with no strand, molecule type, topology, division or date
326+ DNASequence header6 = readGenbankResource ("/uppercase_locus6.gb" );
327+ assertEquals ("ABC12.3_DE 7071 BP" , header6 .getOriginalHeader ());
328+
329+ // Testing uppercase protein units
330+ ProteinSequence header7 = readGenbankProteinResource ("/uppercase_locus7.gb" );
331+ assertEquals ("ABC12.3_DE 7071 AA Protein" , header7 .getOriginalHeader ());
332+
333+ }
270334
271335 /**
272336 * Helper class to be able to verify the closed state of the input stream.
0 commit comments