22
33
44import java .io .*;
5+ import java .util .LinkedList ;
6+ import java .util .List ;
57
68public class EmblParser {
79
810 private File file ;
9- private EmblId id ;
10- private EmblReference emblReference ;
11- private String accessionNumber ;
11+ private EmblId emblId = new EmblId () ;
12+ private EmblReference emblReference = new EmblReference () ;
13+ private List < String > accessionNumber = new LinkedList <>() ;
1214 private String ProjectIdentifier ;
15+ private String OrGanelle ;
1316 private String createdDate ;
1417 private String lastUpdatedDate ;
1518 private String sequenceDescription ;
16- private String keyword ;
19+ private List < String > keyword = new LinkedList <>() ;
1720 private String organismSpecies ;
1821 private String organismClassification ;
1922 private String databaseCrossReference ;
@@ -24,125 +27,74 @@ public class EmblParser {
2427 private StringBuilder sequence ;
2528
2629 public EmblId getEmblId () {
27- return id ;
30+ return emblId ;
2831 }
2932
3033 public EmblReference getEmblReference () {
3134 return emblReference ;
3235 }
3336
34-
35- public String getAccessionNumber () {
37+ public List <String > getAccessionNumber () {
3638 return accessionNumber ;
3739 }
3840
39- public void setAccessionNumber (String accessionNumber ) {
40- this .accessionNumber = accessionNumber ;
41- }
42-
4341 public String getProjectIdentifier () {
4442 return ProjectIdentifier ;
4543 }
4644
47- public void setProjectIdentifier (String projectIdentifier ) {
48- ProjectIdentifier = projectIdentifier ;
49- }
50-
5145 public String getCreatedDate () {
5246 return createdDate ;
5347 }
5448
55- public void setCreatedDate (String createdDate ) {
56- this .createdDate = createdDate ;
57- }
58-
5949 public String getLastUpdatedDate () {
6050 return lastUpdatedDate ;
6151 }
6252
63- public void setLastUpdatedDate (String lastUpdatedDate ) {
64- this .lastUpdatedDate = lastUpdatedDate ;
65- }
66-
6753 public String getSequenceDescription () {
6854 return sequenceDescription ;
6955 }
7056
71- public void setSequenceDescription (String sequenceDescription ) {
72- this .sequenceDescription = sequenceDescription ;
73- }
74-
75- public String getKeyword () {
57+ public List <String > getKeyword () {
7658 return keyword ;
7759 }
7860
79- public void setKeyword (String keyword ) {
80- this .keyword = keyword ;
81- }
82-
8361 public String getOrganismSpecies () {
8462 return organismSpecies ;
8563 }
8664
87- public void setOrganismSpecies (String organismSpecies ) {
88- this .organismSpecies = organismSpecies ;
89- }
90-
9165 public String getOrganismClassification () {
9266 return organismClassification ;
9367 }
9468
95- public void setOrganismClassification (String organismClassification ) {
96- this .organismClassification = organismClassification ;
97- }
9869
9970 public String getDatabaseCrossReference () {
10071 return databaseCrossReference ;
10172 }
10273
103- public void setDatabaseCrossReference (String databaseCrossReference ) {
104- this .databaseCrossReference = databaseCrossReference ;
105- }
106-
10774 public String getAssemblyHeader () {
10875 return assemblyHeader ;
10976 }
11077
111- public void setAssemblyHeader (String assemblyHeader ) {
112- this .assemblyHeader = assemblyHeader ;
113- }
11478
11579 public String getAssemblyInformation () {
11680 return assemblyInformation ;
11781 }
11882
119- public void setAssemblyInformation (String assemblyInformation ) {
120- this .assemblyInformation = assemblyInformation ;
121- }
12283
12384 public String getCON () {
12485 return CON ;
12586 }
12687
127- public void setCON (String CON ) {
128- this .CON = CON ;
129- }
13088
13189 public String getSequenceHeader () {
13290 return sequenceHeader ;
13391 }
13492
135- public void setSequenceHeader (String sequenceHeader ) {
136- this .sequenceHeader = sequenceHeader ;
137- }
13893
13994 public StringBuilder getSequence () {
14095 return sequence ;
14196 }
14297
143- public void setSequence (StringBuilder sequence ) {
144- this .sequence = sequence ;
145- }
14698
14799 public EmblParser (File file ) {
148100 setFile (file );
@@ -157,11 +109,37 @@ public void setFile(File file) {
157109 public void parse () {
158110 try (FileReader fileReader = new FileReader (file )) {
159111 String line = "" ;
112+ String lineIdentifier ;
113+ String lineInfo ;
160114 try (BufferedReader bufferedReader = new BufferedReader (fileReader )) {
161115 while (bufferedReader .readLine () != null )
162116 line = bufferedReader .readLine ();
163- if (line .substring (0 , 2 ).equals ("ID" ))
117+ lineInfo = line .substring (2 );
118+ lineIdentifier = line .substring (0 , 2 );
119+ if (lineIdentifier .equals ("ID" ))
164120 populateID (line );
121+ else if (lineIdentifier .equals ("AC" ))
122+ populateAccessionNumber (line );
123+ else if (lineIdentifier .equals ("DT" ) && line .contains ("Created" ))
124+ createdDate = lineInfo ;
125+ else if (lineIdentifier .equals ("DT" ) && line .contains ("updated" ))
126+ lastUpdatedDate = lineInfo ;
127+ else if (lineIdentifier .equals ("DE" ))
128+ sequenceDescription = lineInfo ;
129+ else if (lineIdentifier .equals ("KW" ))
130+ keyword .add (lineInfo );
131+ else if (lineIdentifier .equals ("OS" ))
132+ organismSpecies = lineInfo ;
133+ else if (lineIdentifier .equals ("OC" ))
134+ organismClassification = lineInfo ;
135+ else if (lineIdentifier .equals ("OG" ))
136+ OrGanelle = lineInfo ;
137+ else if (lineIdentifier .equals ("RN" ) || lineIdentifier .equals ("RP" ) || lineIdentifier .equals ("RX" )
138+ || lineIdentifier .equals ("RG" ) || lineIdentifier .equals ("RA" )
139+ || lineIdentifier .equals ("RT" ) || lineIdentifier .equals ("RL" ))
140+ emblReferencePopulating (lineIdentifier , lineInfo );
141+
142+
165143 }
166144 } catch (FileNotFoundException e ) {
167145 e .printStackTrace ();
@@ -170,8 +148,37 @@ public void parse() {
170148 }
171149 }
172150
151+ private void emblReferencePopulating (String lineIdentifier , String lineInfo ) {
152+ if (lineIdentifier .equals ("RN" ))
153+ emblReference .setReferenceNumber (lineInfo );
154+ else if (lineIdentifier .equals ("RP" ))
155+ emblReference .setReferencePosition (lineInfo );
156+ else if (lineIdentifier .equals ("RX" ))
157+ emblReference .setReferenceCrossReference (lineInfo );
158+ else if (lineIdentifier .equals ("RG" ))
159+ emblReference .setReferenceGroup (lineInfo );
160+ else if (lineIdentifier .equals ("RA" ))
161+ emblReference .setReferenceAuthor (lineInfo );
162+ else if (lineIdentifier .equals ("RT" ))
163+ emblReference .setReferenceTitle (lineInfo );
164+ else if (lineIdentifier .equals ("RL" ))
165+ emblReference .setReferenceLocation (lineInfo );
166+ }
167+
168+ private void populateAccessionNumber (String line ) {
169+ accessionNumber .add (line );
170+ }
171+
173172 private void populateID (String line ) {
174-
173+ line .replace ("," , "" );
174+ String [] strings = line .split (" " );
175+ emblId .setPrimaryAccession (strings [1 ]);
176+ emblId .setSequenceVersion (strings [2 ]);
177+ emblId .setTopology (strings [3 ]);
178+ emblId .setMoleculeType (strings [4 ]);
179+ emblId .setDataClass (strings [5 ]);
180+ emblId .setTaxonomicDivision (strings [6 ]);
181+ emblId .setSequenceLength (strings [7 ]);
175182 }
176183
177184
0 commit comments