Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,8 @@ public class GenbankSequenceParser<S extends AbstractSequence<C>, C extends Comp
private GenericGenbankHeaderParser<S, C> headerParser;
private String header;
private String accession;
private boolean isCircularSequence;
private long sequenceLength;
public LinkedHashMap<String, ArrayList<DBReferenceInfo>> mapDB;
/**
* this data structure collects list of features extracted from the
Expand Down Expand Up @@ -109,7 +111,7 @@ public class GenbankSequenceParser<S extends AbstractSequence<C>, C extends Comp
protected static final String START_SEQUENCE_TAG = "ORIGIN";
protected static final String END_SEQUENCE_TAG = "//";
// locus line
protected static final Pattern lp = Pattern.compile("^(\\S+)\\s+\\d+\\s+(bp|BP|aa|AA)\\s{0,4}(([dmsDMS][sS]-)?(\\S+))?\\s*(circular|CIRCULAR|linear|LINEAR)?\\s*(\\S+)?\\s*(\\S+)?$");
protected static final Pattern lp = Pattern.compile("^(\\S+)\\s+(\\d+)\\s+(bp|BP|aa|AA)\\s{0,4}(([dmsDMS][sS]-)?(\\S+))?\\s*(circular|CIRCULAR|linear|LINEAR)?\\s*(\\S+)?\\s*(\\S+)?$");
// version line
protected static final Pattern vp = Pattern.compile("^(\\S*?)(\\.(\\d+))?(\\s+GI:(\\S+))?$");
// reference line
Expand Down Expand Up @@ -160,9 +162,9 @@ private String parse(BufferedReader bufferedReader) {
if (m.matches()) {
headerParser.setName(m.group(1));
headerParser.setAccession(m.group(1)); // default if no accession found

String lengthUnits = m.group(2);
String type = m.group(5);
sequenceLength = Long.valueOf(m.group(2));
String lengthUnits = m.group(3);
String type = m.group(6);

if (lengthUnits.equalsIgnoreCase("aa")) {
compoundType = AminoAcidCompoundSet.getAminoAcidCompoundSet();
Expand All @@ -178,6 +180,12 @@ private String parse(BufferedReader bufferedReader) {
}
}

if (m.group(7) != null) isCircularSequence = m.group(7).equalsIgnoreCase("circular");

// configure location parser with needed information
locationParser.setSequenceLength(sequenceLength);
locationParser.setSequenceCircular(isCircularSequence);

log.debug("compound type: {}", compoundType.getClass().getSimpleName());

} else {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,11 @@ public GroupLocation(Location... subLocations) {
this(Arrays.asList(subLocations));
}

public GroupLocation(boolean isCircular, Location... subLocations) {
this(Arrays.asList(subLocations));
setCircular(isCircular);
}

public GroupLocation(Point start, Point end, Strand strand,
boolean circular, Location... subLocations) {
super(start, end, strand, circular, subLocations);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,9 @@
*/
public class InsdcParser <S extends AbstractSequence<C>, C extends Compound>{

private boolean isSequenceCircular;
private long sequenceLength;

private final DataSource dataSource;

/**
Expand Down Expand Up @@ -80,7 +83,6 @@ public class InsdcParser <S extends AbstractSequence<C>, C extends Compound>{
* Not really sure that they are not declared obsolete but they are still in
* several files.
*/
//protected static final Pattern genbankSplitPattern = Pattern.compile("^\\s?(join|order|bond|complement|)\\(?([^\\)]+)\\)?");
protected static final Pattern genbankSplitPattern = Pattern.compile("^\\s?(join|order|bond|complement|)\\(?(.+)\\)?");
/**
* designed to recursively split a location string in tokens. Valid tokens
Expand Down Expand Up @@ -126,7 +128,13 @@ public DataSource getDataSource() {
return dataSource;
}

public void setSequenceCircular(boolean sequenceCircular) {
isSequenceCircular = sequenceCircular;
}

public void setSequenceLength(long sequenceLength) {
this.sequenceLength = sequenceLength;
}

/**
* Main method for parsing a location from a String instance
Expand All @@ -146,23 +154,24 @@ public Location parse(String locationString) throws ParserException {
l = ll.get(0);
} else {
l = new SimpleLocation(
featureGlobalStart,
featureGlobalEnd,
new SimplePoint(featureGlobalStart),
new SimplePoint(featureGlobalEnd),
Strand.UNDEFINED,
isSequenceCircular,
ll);
}
return l;
}

/**
* Reader based version of the parse methods.
*
* @param reader The source of the data; assumes that end of the reader
* stream is the end of the location string to parse
* @return The parsed location
* @throws IOException Thrown with any reader error
* @throws ParserException Thrown with any error with parsing locations
*/
/**
* Reader based version of the parse methods.
*
* @param reader The source of the data; assumes that end of the reader
* stream is the end of the location string to parse
* @return The parsed location
* @throws IOException Thrown with any reader error
* @throws ParserException Thrown with any error with parsing locations
*/
public List<AbstractLocation> parse(Reader reader) throws IOException, ParserException {
// use parse(String s) instead!
return null;
Expand All @@ -186,7 +195,8 @@ private List<Location> parseLocationString(String string, int versus) throws Par
if (!splitQualifier.isEmpty()) {
//recursive case
int localVersus = splitQualifier.equalsIgnoreCase("complement") ? -1 : 1;
List<Location> subLocations = parseLocationString(splitString, versus * localVersus);
List<Location> subLocations = parseLocationString(
splitString, versus * localVersus);

switch (complexFeaturesAppendMode) {
case FLATTEN:
Expand Down Expand Up @@ -228,8 +238,8 @@ private List<Location> parseLocationString(String string, int versus) throws Par

String accession = m.group(1);
Strand s = versus == 1 ? Strand.POSITIVE : Strand.NEGATIVE;
int start = Integer.parseInt(m.group(3));
int end = m.group(6) == null ? start : new Integer(m.group(6));
int start = Integer.valueOf(m.group(3));
int end = m.group(6) == null ? start : Integer.valueOf(m.group(6));

if (featureGlobalStart > start) {
featureGlobalStart = start;
Expand All @@ -238,11 +248,35 @@ private List<Location> parseLocationString(String string, int versus) throws Par
featureGlobalEnd = end;
}

AbstractLocation l = new SimpleLocation(
start,
end,
s
);
AbstractLocation l;
if (start <= end) {
l = new SimpleLocation(
start,
end,
s
);
} else {
// in case of location spanning the end point, Location contract wants sublocations
AbstractLocation l5prime = new SimpleLocation(
1,
end,
Strand.UNDEFINED
);
AbstractLocation l3prime = new SimpleLocation(
start,
(int) sequenceLength,
Strand.UNDEFINED
);

l = new InsdcLocations.GroupLocation(
new SimplePoint(start),
new SimplePoint(end),
s,
isSequenceCircular,
l5prime, l3prime
);

}

if(m.group(4) != null && m.group(4).equals("^")) l.setBetweenCompounds(true);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -128,9 +128,9 @@ protected void assertLocation() {
int st = getStart().getPosition();
int e = getEnd().getPosition();

if (st > e) {
if (st > e && ! isCircular()) {
throw new IllegalStateException(
String.format("Start (%d) is greater than end (%d); "
String.format("Start (%d) is greater than end (%d) in non circular sequence; "
+ "this is an incorrect format",
st, e));
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,41 +22,26 @@

import org.biojava.nbio.core.exceptions.CompoundNotFoundException;
import org.biojava.nbio.core.sequence.DNASequence;
import org.biojava.nbio.core.sequence.RNASequence;
import org.biojava.nbio.core.sequence.ProteinSequence;
import org.biojava.nbio.core.sequence.compound.AminoAcidCompound;
import org.biojava.nbio.core.sequence.compound.AminoAcidCompoundSet;
import org.biojava.nbio.core.sequence.compound.DNACompoundSet;
import org.biojava.nbio.core.sequence.compound.RNACompoundSet;
import org.biojava.nbio.core.sequence.compound.NucleotideCompound;
import org.biojava.nbio.core.sequence.RNASequence;
import org.biojava.nbio.core.sequence.Strand;
import org.biojava.nbio.core.sequence.compound.*;
import org.biojava.nbio.core.sequence.features.FeatureInterface;
import org.biojava.nbio.core.sequence.features.Qualifier;
import org.biojava.nbio.core.sequence.location.template.AbstractLocation;
import org.biojava.nbio.core.sequence.template.AbstractSequence;
import org.junit.After;
import org.junit.AfterClass;
import org.junit.Assert;
import org.junit.Before;
import org.junit.BeforeClass;
import org.junit.Test;
import org.junit.*;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.BufferedInputStream;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.*;
import java.util.ArrayList;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;

import static org.hamcrest.CoreMatchers.is;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertThat;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.*;

/**
*
Expand Down Expand Up @@ -229,7 +214,7 @@ public void CDStest() throws Exception {

Assert.assertNotNull(codedBy);
Assert.assertTrue(!codedBy.isEmpty());
assertEquals(codedBy, "NM_000266.2:503..904");
assertEquals("NM_000266.2:503..904", codedBy);
assertEquals(5, dbrefs.size());

}
Expand Down Expand Up @@ -350,6 +335,22 @@ public void testLegacyLocusCompatable() throws IOException, CompoundNotFoundExce

}

@Test
public void readSequenceWithZeroSpanFeature() throws IOException, CompoundNotFoundException {
logger.info("make or read genbank file error when feature spans zero point of circular sequence (issue #855)");
final DNASequence seq = readGenbankResource("/feature-spans-zero-point-circular-sequence.gb");

assertNotNull(seq);

final FeatureInterface<AbstractSequence<NucleotideCompound>, NucleotideCompound> f = seq.getFeatures().get(33);
final AbstractLocation fLocation = f.getLocations();

assertEquals(true, fLocation.isCircular());
assertEquals(7028, (int)fLocation.getStart().getPosition());
assertEquals(286, (int)fLocation.getEnd().getPosition());
assertEquals(Strand.NEGATIVE, fLocation.getStrand());
}

/**
* Helper class to be able to verify the closed state of the input stream.
*/
Expand Down
Loading