Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .gitattributes
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@
*.sto text
*.tsv text
*.txt text
*.xml text
*.xml text eol=lf #Causing decompression test to fail when line endings in org/biojava/nbio/core/util/build.xml are crlf
*.xsd text
*.yml text

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,15 +26,8 @@
package org.biojava.nbio.core.sequence.io;

import org.biojava.nbio.core.exceptions.CompoundNotFoundException;
import org.biojava.nbio.core.sequence.DNASequence;
import org.biojava.nbio.core.sequence.DataSource;
import org.biojava.nbio.core.sequence.ProteinSequence;
import org.biojava.nbio.core.sequence.TaxonomyID;
import org.biojava.nbio.core.sequence.compound.AminoAcidCompound;
import org.biojava.nbio.core.sequence.compound.AminoAcidCompoundSet;
import org.biojava.nbio.core.sequence.compound.DNACompoundSet;
import org.biojava.nbio.core.sequence.compound.NucleotideCompound;
import org.biojava.nbio.core.sequence.features.AbstractFeature;
import org.biojava.nbio.core.sequence.features.DBReferenceInfo;
import org.biojava.nbio.core.sequence.io.template.SequenceCreatorInterface;
import org.biojava.nbio.core.sequence.io.template.SequenceHeaderParserInterface;
Expand All @@ -43,13 +36,20 @@
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.*;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;

/**
* Use GenbankReaderHelper as an example of how to use this class where GenbankReaderHelper should be the
* Use {@link GenbankReaderHelper} as an example of how to use this class where {@link GenbankReaderHelper} should be the
* primary class used to read Genbank files
*
*/
Expand All @@ -66,9 +66,9 @@ public boolean isClosed() {
}

/**
* If you are going to use FileProxyProteinSequenceCreator then do not use this constructor because we need details about
* local file offsets for quick reads. InputStreams does not give you the name of the stream to access quickly via file seek. A seek in
* an inputstream is forced to read all the data so you don't gain anything.
* If you are going to use {@link FileProxyProteinSequenceCreator} then do not use this constructor because we need details about
* local file offsets for quick reads. {@link InputStream} does not give you the name of the stream to access quickly via file seek. A seek in
* an {@link InputStream} is forced to read all the data so you don't gain anything.
* @param is
* @param headerParser
* @param sequenceCreator
Expand Down Expand Up @@ -107,18 +107,21 @@ public GenbankReader(

/**
* The parsing is done in this method.<br>
* This method tries to process all the available Genbank records
* This method will return all the available Genbank records
* in the File or InputStream, closes the underlying resource,
* and return the results in {@link LinkedHashMap}.<br>
* You don't need to call {@link #close()} after calling this method.
* You don't need to call {@link GenbankReader#close()} after calling this method.
* @see #process(int)
* @return {@link HashMap} containing all the parsed Genbank records
* present, starting current fileIndex onwards.
* @throws IOException
* @throws CompoundNotFoundException
* @throws OutOfMemoryError if the input resource is larger than the allocated heap.
*/
public LinkedHashMap<String,S> process() throws IOException, CompoundNotFoundException {
return process(-1);
LinkedHashMap<String,S> result = process(-1);
close();
return result;
}

/**
Expand All @@ -137,13 +140,18 @@ public LinkedHashMap<String,S> process() throws IOException, CompoundNotFoundExc
* @see #process()
* @author Amr AL-Hossary
* @since 3.0.6
* @param max maximum number of records to return, <code>-1</code> for infinity.
* @param max maximum number of records to return.
* @return {@link HashMap} containing maximum <code>max</code> parsed Genbank records
* present, starting current fileIndex onwards.
* @throws IOException
* @throws CompoundNotFoundException
*/
public LinkedHashMap<String,S> process(final int max) throws IOException, CompoundNotFoundException {

if(closed){
throw new IOException("Cannot perform action: resource has been closed.");
}

LinkedHashMap<String,S> sequences = new LinkedHashMap<>();
@SuppressWarnings("unchecked")
int i=0;
Expand All @@ -158,12 +166,9 @@ public LinkedHashMap<String,S> process(final int max) throws IOException, Compou
genbankParser.getSequenceHeaderParser().parseHeader(genbankParser.getHeader(), sequence);

// add features to new sequence
for (String k: genbankParser.getFeatures().keySet()){
for (AbstractFeature f: genbankParser.getFeatures(k)){
//f.getLocations().setSequence(sequence); // can't set proper sequence source to features. It is actually needed? Don't think so...
sequence.addFeature(f);
}
}
genbankParser.getFeatures().values().stream()
.flatMap(List::stream)
.forEach(sequence::addFeature);

// add taxonomy ID to new sequence
ArrayList<DBReferenceInfo> dbQualifier = genbankParser.getDatabaseReferences().get("db_xref");
Expand All @@ -175,10 +180,6 @@ public LinkedHashMap<String,S> process(final int max) throws IOException, Compou
sequences.put(sequence.getAccession().getID(), sequence);
}

if (max < 0) {
close();
}

return sequences;
}

Expand All @@ -187,33 +188,9 @@ public void close() {
bufferedReader.close();
this.closed = true;
} catch (IOException e) {
logger.error("Couldn't close the reader. {}", e.getMessage());
logger.error("Couldn't close the reader.", e);
this.closed = false;
}
}

public static void main(String[] args) throws Exception {
String proteinFile = "src/test/resources/BondFeature.gb";
FileInputStream is = new FileInputStream(proteinFile);

GenbankReader<ProteinSequence, AminoAcidCompound> proteinReader = new GenbankReader<>(is, new GenericGenbankHeaderParser<>(), new ProteinSequenceCreator(AminoAcidCompoundSet.getAminoAcidCompoundSet()));
LinkedHashMap<String,ProteinSequence> proteinSequences = proteinReader.process();
System.out.println(proteinSequences);

String inputFile = "src/test/resources/NM_000266.gb";
is = new FileInputStream(inputFile);
GenbankReader<DNASequence, NucleotideCompound> dnaReader = new GenbankReader<>(is, new GenericGenbankHeaderParser<>(), new DNASequenceCreator(DNACompoundSet.getDNACompoundSet()));
LinkedHashMap<String,DNASequence> dnaSequences = dnaReader.process();
System.out.println(dnaSequences);

String crazyFile = "src/test/resources/CraftedFeature.gb";
is = new FileInputStream(crazyFile);
GenbankReader<DNASequence, NucleotideCompound> crazyReader = new GenbankReader<>(is, new GenericGenbankHeaderParser<>(), new DNASequenceCreator(DNACompoundSet.getDNACompoundSet()));
LinkedHashMap<String,DNASequence> crazyAnnotatedSequences = crazyReader.process();

is.close();
System.out.println(crazyAnnotatedSequences);
}

}

Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,11 @@
import org.biojava.nbio.core.sequence.compound.AminoAcidCompoundSet;
import org.biojava.nbio.core.sequence.compound.DNACompoundSet;
import org.biojava.nbio.core.sequence.compound.NucleotideCompound;
import org.biojava.nbio.core.sequence.features.*;
import org.biojava.nbio.core.sequence.features.AbstractFeature;
import org.biojava.nbio.core.sequence.features.DBReferenceInfo;
import org.biojava.nbio.core.sequence.features.DatabaseReferenceInterface;
import org.biojava.nbio.core.sequence.features.FeatureRetriever;
import org.biojava.nbio.core.sequence.features.FeaturesKeyWordInterface;
import org.biojava.nbio.core.sequence.io.GenbankSequenceParser;
import org.biojava.nbio.core.sequence.io.GenericGenbankHeaderParser;
import org.biojava.nbio.core.sequence.template.AbstractSequence;
Expand All @@ -41,7 +45,14 @@
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.*;
import java.io.BufferedInputStream;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.URL;
import java.net.URLConnection;
import java.util.ArrayList;
Expand All @@ -54,7 +65,7 @@
*/
public class GenbankProxySequenceReader<C extends Compound> extends StringProxySequenceReader<C> implements FeaturesKeyWordInterface, DatabaseReferenceInterface, FeatureRetriever {

private final static Logger logger = LoggerFactory.getLogger(GenbankProxySequenceReader.class);
private static final Logger logger = LoggerFactory.getLogger(GenbankProxySequenceReader.class);

private static final String eutilBaseURL = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/"; //
private String genbankDirectoryCache = null;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,6 @@
*/
package org.biojava.nbio.core.sequence.io;

import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;

import org.biojava.nbio.core.exceptions.CompoundNotFoundException;
import org.biojava.nbio.core.sequence.DNASequence;
import org.biojava.nbio.core.sequence.ProteinSequence;
Expand All @@ -46,8 +39,20 @@
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.BufferedInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;

import static org.hamcrest.CoreMatchers.is;
import static org.junit.Assert.*;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertThat;
import static org.junit.Assert.assertTrue;

/**
*
Expand Down Expand Up @@ -161,7 +166,7 @@ public void testProcess() throws Exception {
*/
@Test
public void testPartialProcess() throws IOException, CompoundNotFoundException, NoSuchFieldException {
InputStream inStream = this.getClass().getResourceAsStream("/two-dnaseqs.gb");
CheckableInputStream inStream = new CheckableInputStream(this.getClass().getResourceAsStream("/two-dnaseqs.gb"));

GenbankReader<DNASequence, NucleotideCompound> genbankDNA
= new GenbankReader<>(
Expand All @@ -173,27 +178,29 @@ public void testPartialProcess() throws IOException, CompoundNotFoundException,
// First call to process(1) returns the first sequence
LinkedHashMap<String, DNASequence> dnaSequences = genbankDNA.process(1);

assertFalse(inStream.isclosed());
assertNotNull(dnaSequences);
assertEquals(1, dnaSequences.size());
assertNotNull(dnaSequences.get("vPetite"));

// Second call to process(1) returns the second sequence
dnaSequences = genbankDNA.process(1);
assertFalse(inStream.isclosed());
assertNotNull(dnaSequences);
assertEquals(1, dnaSequences.size());
assertNotNull(dnaSequences.get("sbFDR"));

assertFalse(genbankDNA.isClosed());
genbankDNA.close();
assertTrue(genbankDNA.isClosed());

assertTrue(inStream.isclosed());
}

@Test
public void CDStest() throws Exception {
logger.info("CDS Test");

InputStream inStream = this.getClass().getResourceAsStream("/BondFeature.gb");
CheckableInputStream inStream = new CheckableInputStream(this.getClass().getResourceAsStream("/BondFeature.gb"));
assertNotNull(inStream);

GenbankReader<ProteinSequence, AminoAcidCompound> GenbankProtein
Expand All @@ -203,7 +210,7 @@ public void CDStest() throws Exception {
new ProteinSequenceCreator(AminoAcidCompoundSet.getAminoAcidCompoundSet())
);
LinkedHashMap<String, ProteinSequence> proteinSequences = GenbankProtein.process();
inStream.close();
assertTrue(inStream.isclosed());


Assert.assertTrue(proteinSequences.size() == 1);
Expand Down Expand Up @@ -260,4 +267,27 @@ public void testNcbiExpandedAccessionFormats() throws Exception {
DNASequence header2 = readGenbankResource("/empty_header2.gb");
assertEquals("AZZZAA02123456789 10000000000 bp DNA linear PRI 15-OCT-2018", header2.getOriginalHeader());
}

/**
* Helper class to be able to verify the closed state of the input stream.
*/
private class CheckableInputStream extends BufferedInputStream {

private boolean closed;

CheckableInputStream(InputStream in) {
super(in);
closed = false;
}

@Override
public void close() throws IOException {
super.close();
closed = true;
}

boolean isclosed() {
return closed;
}
}
}
Loading