Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.stream.Stream;

/**
* Use {@link GenbankReaderHelper} as an example of how to use this class where {@link GenbankReaderHelper} should be the
Expand Down Expand Up @@ -153,35 +154,56 @@ public LinkedHashMap<String,S> process(final int max) throws IOException, Compou
}

LinkedHashMap<String,S> sequences = new LinkedHashMap<>();
@SuppressWarnings("unchecked")
int i=0;
while(true) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is what is hanging the build. Because the stream returned is null (the file does not exist, see comment below), sequence is always null and this while loop never finishes.

The while loop is not needed anymore, simply take an action if the sequence is null.

if(max>0 && i>=max) break;
i++;
String seqString = genbankParser.getSequence(bufferedReader, 0);
//reached end of file?
if(seqString==null) break;
@SuppressWarnings("unchecked")
S sequence = (S) sequenceCreator.getSequence(seqString, 0);
genbankParser.getSequenceHeaderParser().parseHeader(genbankParser.getHeader(), sequence);

// add features to new sequence
genbankParser.getFeatures().values().stream()
.flatMap(List::stream)
.forEach(sequence::addFeature);

// add taxonomy ID to new sequence
ArrayList<DBReferenceInfo> dbQualifier = genbankParser.getDatabaseReferences().get("db_xref");
if (dbQualifier != null){
DBReferenceInfo q = dbQualifier.get(0);
sequence.setTaxonomy(new TaxonomyID(q.getDatabase()+":"+q.getId(), DataSource.GENBANK));
S sequence = getSequence();
if(null == sequence) {
break;
}

sequences.put(sequence.getAccession().getID(), sequence);
}

return sequences;
}

public Stream<S> getSequencesAsStream() {
return Stream.generate(() -> {
try {
return getSequence();
} catch (IOException | CompoundNotFoundException e) {
// TODO Auto-generated catch block
e.printStackTrace();
return null;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd rather throw the exception forward and let the caller handle it. Returning null means all callers have to handle the null and that they lose the ability to provide a good error message.

}
});
}

private S getSequence() throws IOException, CompoundNotFoundException {
String seqString = genbankParser.getSequence(bufferedReader, 0);
//reached end of file?
if(seqString==null) {
return null;
}
@SuppressWarnings("unchecked")
S sequence = (S) sequenceCreator.getSequence(seqString, 0);
genbankParser.getSequenceHeaderParser().parseHeader(genbankParser.getHeader(), sequence);

// add features to new sequence
genbankParser.getFeatures().values().stream()
.flatMap(List::stream)
.forEach(sequence::addFeature);

// add taxonomy ID to new sequence
ArrayList<DBReferenceInfo> dbQualifier = genbankParser.getDatabaseReferences().get("db_xref");
if (dbQualifier != null){
DBReferenceInfo q = dbQualifier.get(0);
sequence.setTaxonomy(new TaxonomyID(q.getDatabase()+":"+q.getId(), DataSource.GENBANK));
}

return sequence;
}

public void close() {
try {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.stream.Stream;

import static org.hamcrest.CoreMatchers.is;
import static org.junit.Assert.assertEquals;
Expand Down Expand Up @@ -163,6 +164,27 @@ public void testProcess() throws Exception {
assertEquals(3, dnaSequence.getAccession().getVersion().intValue());
assertTrue(genbankDNA.isClosed());
}

@Test
public void testSequenceStream() {
CheckableInputStream inStream = new CheckableInputStream(this.getClass().getResourceAsStream("/two-dnaseqs.gb"));
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Did you also include this file in the test resources directory? It seems not to be in this PR


GenbankReader<DNASequence, NucleotideCompound> genbankDNA
= new GenbankReader<>(
inStream,
new GenericGenbankHeaderParser<>(),
new DNASequenceCreator(DNACompoundSet.getDNACompoundSet())
);

Stream<DNASequence> seqStream = genbankDNA.getSequencesAsStream();
assertEquals(seqStream.count(),2);

assertFalse(genbankDNA.isClosed());
genbankDNA.close();
assertTrue(genbankDNA.isClosed());
assertTrue(inStream.isclosed());

}

/**
* Test the process method with a number of sequences to be read at each call.
Expand Down