2626package org .biojava .nbio .core .sequence .io ;
2727
2828import org .biojava .nbio .core .exceptions .CompoundNotFoundException ;
29- import org .biojava .nbio .core .sequence .DNASequence ;
3029import org .biojava .nbio .core .sequence .DataSource ;
31- import org .biojava .nbio .core .sequence .ProteinSequence ;
3230import org .biojava .nbio .core .sequence .TaxonomyID ;
33- import org .biojava .nbio .core .sequence .compound .AminoAcidCompound ;
34- import org .biojava .nbio .core .sequence .compound .AminoAcidCompoundSet ;
35- import org .biojava .nbio .core .sequence .compound .DNACompoundSet ;
36- import org .biojava .nbio .core .sequence .compound .NucleotideCompound ;
37- import org .biojava .nbio .core .sequence .features .AbstractFeature ;
3831import org .biojava .nbio .core .sequence .features .DBReferenceInfo ;
3932import org .biojava .nbio .core .sequence .io .template .SequenceCreatorInterface ;
4033import org .biojava .nbio .core .sequence .io .template .SequenceHeaderParserInterface ;
4336import org .slf4j .Logger ;
4437import org .slf4j .LoggerFactory ;
4538
46- import java .io .*;
39+ import java .io .BufferedReader ;
40+ import java .io .File ;
41+ import java .io .FileNotFoundException ;
42+ import java .io .FileReader ;
43+ import java .io .IOException ;
44+ import java .io .InputStream ;
45+ import java .io .InputStreamReader ;
4746import java .util .ArrayList ;
4847import java .util .HashMap ;
4948import java .util .LinkedHashMap ;
49+ import java .util .List ;
5050
5151/**
52- * Use GenbankReaderHelper as an example of how to use this class where GenbankReaderHelper should be the
52+ * Use {@link GenbankReaderHelper} as an example of how to use this class where {@link GenbankReaderHelper} should be the
5353 * primary class used to read Genbank files
5454 *
5555 */
@@ -66,9 +66,9 @@ public boolean isClosed() {
6666 }
6767
6868 /**
69- * If you are going to use FileProxyProteinSequenceCreator then do not use this constructor because we need details about
70- * local file offsets for quick reads. InputStreams does not give you the name of the stream to access quickly via file seek. A seek in
71- * an inputstream is forced to read all the data so you don't gain anything.
69+ * If you are going to use {@link FileProxyProteinSequenceCreator} then do not use this constructor because we need details about
70+ * local file offsets for quick reads. {@link InputStream} does not give you the name of the stream to access quickly via file seek. A seek in
71+ * an {@link InputStream} is forced to read all the data so you don't gain anything.
7272 * @param is
7373 * @param headerParser
7474 * @param sequenceCreator
@@ -107,18 +107,21 @@ public GenbankReader(
107107
108108 /**
109109 * The parsing is done in this method.<br>
110- * This method tries to process all the available Genbank records
110+ * This method will return all the available Genbank records
111111 * in the File or InputStream, closes the underlying resource,
112112 * and return the results in {@link LinkedHashMap}.<br>
113- * You don't need to call {@link #close()} after calling this method.
113+ * You don't need to call {@link GenbankReader #close()} after calling this method.
114114 * @see #process(int)
115115 * @return {@link HashMap} containing all the parsed Genbank records
116116 * present, starting current fileIndex onwards.
117117 * @throws IOException
118118 * @throws CompoundNotFoundException
119+ * @throws OutOfMemoryError if the input resource is larger than the allocated heap.
119120 */
120121 public LinkedHashMap <String ,S > process () throws IOException , CompoundNotFoundException {
121- return process (-1 );
122+ LinkedHashMap <String ,S > result = process (-1 );
123+ close ();
124+ return result ;
122125 }
123126
124127 /**
@@ -137,13 +140,18 @@ public LinkedHashMap<String,S> process() throws IOException, CompoundNotFoundExc
137140 * @see #process()
138141 * @author Amr AL-Hossary
139142 * @since 3.0.6
140- * @param max maximum number of records to return, <code>-1</code> for infinity .
143+ * @param max maximum number of records to return.
141144 * @return {@link HashMap} containing maximum <code>max</code> parsed Genbank records
142145 * present, starting current fileIndex onwards.
143146 * @throws IOException
144147 * @throws CompoundNotFoundException
145148 */
146149 public LinkedHashMap <String ,S > process (final int max ) throws IOException , CompoundNotFoundException {
150+
151+ if (closed ){
152+ throw new IOException ("Cannot perform action: resource has been closed." );
153+ }
154+
147155 LinkedHashMap <String ,S > sequences = new LinkedHashMap <>();
148156 @ SuppressWarnings ("unchecked" )
149157 int i =0 ;
@@ -158,12 +166,9 @@ public LinkedHashMap<String,S> process(final int max) throws IOException, Compou
158166 genbankParser .getSequenceHeaderParser ().parseHeader (genbankParser .getHeader (), sequence );
159167
160168 // add features to new sequence
161- for (String k : genbankParser .getFeatures ().keySet ()){
162- for (AbstractFeature f : genbankParser .getFeatures (k )){
163- //f.getLocations().setSequence(sequence); // can't set proper sequence source to features. It is actually needed? Don't think so...
164- sequence .addFeature (f );
165- }
166- }
169+ genbankParser .getFeatures ().values ().stream ()
170+ .flatMap (List ::stream )
171+ .forEach (sequence ::addFeature );
167172
168173 // add taxonomy ID to new sequence
169174 ArrayList <DBReferenceInfo > dbQualifier = genbankParser .getDatabaseReferences ().get ("db_xref" );
@@ -175,10 +180,6 @@ public LinkedHashMap<String,S> process(final int max) throws IOException, Compou
175180 sequences .put (sequence .getAccession ().getID (), sequence );
176181 }
177182
178- if (max < 0 ) {
179- close ();
180- }
181-
182183 return sequences ;
183184 }
184185
@@ -187,33 +188,9 @@ public void close() {
187188 bufferedReader .close ();
188189 this .closed = true ;
189190 } catch (IOException e ) {
190- logger .error ("Couldn't close the reader. {} " , e . getMessage () );
191+ logger .error ("Couldn't close the reader." , e );
191192 this .closed = false ;
192193 }
193194 }
194-
195- public static void main (String [] args ) throws Exception {
196- String proteinFile = "src/test/resources/BondFeature.gb" ;
197- FileInputStream is = new FileInputStream (proteinFile );
198-
199- GenbankReader <ProteinSequence , AminoAcidCompound > proteinReader = new GenbankReader <>(is , new GenericGenbankHeaderParser <>(), new ProteinSequenceCreator (AminoAcidCompoundSet .getAminoAcidCompoundSet ()));
200- LinkedHashMap <String ,ProteinSequence > proteinSequences = proteinReader .process ();
201- System .out .println (proteinSequences );
202-
203- String inputFile = "src/test/resources/NM_000266.gb" ;
204- is = new FileInputStream (inputFile );
205- GenbankReader <DNASequence , NucleotideCompound > dnaReader = new GenbankReader <>(is , new GenericGenbankHeaderParser <>(), new DNASequenceCreator (DNACompoundSet .getDNACompoundSet ()));
206- LinkedHashMap <String ,DNASequence > dnaSequences = dnaReader .process ();
207- System .out .println (dnaSequences );
208-
209- String crazyFile = "src/test/resources/CraftedFeature.gb" ;
210- is = new FileInputStream (crazyFile );
211- GenbankReader <DNASequence , NucleotideCompound > crazyReader = new GenbankReader <>(is , new GenericGenbankHeaderParser <>(), new DNASequenceCreator (DNACompoundSet .getDNACompoundSet ()));
212- LinkedHashMap <String ,DNASequence > crazyAnnotatedSequences = crazyReader .process ();
213-
214- is .close ();
215- System .out .println (crazyAnnotatedSequences );
216- }
217-
218195}
219196
0 commit comments