@@ -68,6 +68,7 @@ public class GenbankSequenceParser<S extends AbstractSequence<C>, C extends Comp
6868 private String header ;
6969 private String accession ;
7070 private boolean isCircularSequence ;
71+ private long sequenceLength ;
7172 public LinkedHashMap <String , ArrayList <DBReferenceInfo >> mapDB ;
7273 /**
7374 * this data structure collects list of features extracted from the
@@ -110,7 +111,7 @@ public class GenbankSequenceParser<S extends AbstractSequence<C>, C extends Comp
110111 protected static final String START_SEQUENCE_TAG = "ORIGIN" ;
111112 protected static final String END_SEQUENCE_TAG = "//" ;
112113 // locus line
113- protected static final Pattern lp = Pattern .compile ("^(\\ S+)\\ s+\\ d+\\ s+(bp|BP|aa|AA)\\ s{0,4}(([dmsDMS][sS]-)?(\\ S+))?\\ s*(circular|CIRCULAR|linear|LINEAR)?\\ s*(\\ S+)?\\ s*(\\ S+)?$" );
114+ protected static final Pattern lp = Pattern .compile ("^(\\ S+)\\ s+( \\ d+) \\ s+(bp|BP|aa|AA)\\ s{0,4}(([dmsDMS][sS]-)?(\\ S+))?\\ s*(circular|CIRCULAR|linear|LINEAR)?\\ s*(\\ S+)?\\ s*(\\ S+)?$" );
114115 // version line
115116 protected static final Pattern vp = Pattern .compile ("^(\\ S*?)(\\ .(\\ d+))?(\\ s+GI:(\\ S+))?$" );
116117 // reference line
@@ -161,9 +162,9 @@ private String parse(BufferedReader bufferedReader) {
161162 if (m .matches ()) {
162163 headerParser .setName (m .group (1 ));
163164 headerParser .setAccession (m .group (1 )); // default if no accession found
164-
165- String lengthUnits = m .group (2 );
166- String type = m .group (5 );
165+ sequenceLength = Long . valueOf ( m . group ( 2 ));
166+ String lengthUnits = m .group (3 );
167+ String type = m .group (6 );
167168
168169 if (lengthUnits .equalsIgnoreCase ("aa" )) {
169170 compoundType = AminoAcidCompoundSet .getAminoAcidCompoundSet ();
@@ -179,7 +180,7 @@ private String parse(BufferedReader bufferedReader) {
179180 }
180181 }
181182
182- isCircularSequence = m .group (6 ).equalsIgnoreCase ("circular" );
183+ isCircularSequence = m .group (7 ).equalsIgnoreCase ("circular" );
183184
184185 log .debug ("compound type: {}" , compoundType .getClass ().getSimpleName ());
185186
0 commit comments