Skip to content

Commit 4fd3fdb

Browse files
committed
Need to extract also the sequence length to correctly create two locations spanning sequence end.
1 parent a74c015 commit 4fd3fdb

1 file changed

Lines changed: 6 additions & 5 deletions

File tree

biojava-core/src/main/java/org/biojava/nbio/core/sequence/io/GenbankSequenceParser.java

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@ public class GenbankSequenceParser<S extends AbstractSequence<C>, C extends Comp
6868
private String header;
6969
private String accession;
7070
private boolean isCircularSequence;
71+
private long sequenceLength;
7172
public LinkedHashMap<String, ArrayList<DBReferenceInfo>> mapDB;
7273
/**
7374
* this data structure collects list of features extracted from the
@@ -110,7 +111,7 @@ public class GenbankSequenceParser<S extends AbstractSequence<C>, C extends Comp
110111
protected static final String START_SEQUENCE_TAG = "ORIGIN";
111112
protected static final String END_SEQUENCE_TAG = "//";
112113
// locus line
113-
protected static final Pattern lp = Pattern.compile("^(\\S+)\\s+\\d+\\s+(bp|BP|aa|AA)\\s{0,4}(([dmsDMS][sS]-)?(\\S+))?\\s*(circular|CIRCULAR|linear|LINEAR)?\\s*(\\S+)?\\s*(\\S+)?$");
114+
protected static final Pattern lp = Pattern.compile("^(\\S+)\\s+(\\d+)\\s+(bp|BP|aa|AA)\\s{0,4}(([dmsDMS][sS]-)?(\\S+))?\\s*(circular|CIRCULAR|linear|LINEAR)?\\s*(\\S+)?\\s*(\\S+)?$");
114115
// version line
115116
protected static final Pattern vp = Pattern.compile("^(\\S*?)(\\.(\\d+))?(\\s+GI:(\\S+))?$");
116117
// reference line
@@ -161,9 +162,9 @@ private String parse(BufferedReader bufferedReader) {
161162
if (m.matches()) {
162163
headerParser.setName(m.group(1));
163164
headerParser.setAccession(m.group(1)); // default if no accession found
164-
165-
String lengthUnits = m.group(2);
166-
String type = m.group(5);
165+
sequenceLength = Long.valueOf(m.group(2));
166+
String lengthUnits = m.group(3);
167+
String type = m.group(6);
167168

168169
if (lengthUnits.equalsIgnoreCase("aa")) {
169170
compoundType = AminoAcidCompoundSet.getAminoAcidCompoundSet();
@@ -179,7 +180,7 @@ private String parse(BufferedReader bufferedReader) {
179180
}
180181
}
181182

182-
isCircularSequence = m.group(6).equalsIgnoreCase("circular");
183+
isCircularSequence = m.group(7).equalsIgnoreCase("circular");
183184

184185
log.debug("compound type: {}", compoundType.getClass().getSimpleName());
185186

0 commit comments

Comments
 (0)