Skip to content

Commit 3dc43b4

Browse files
author
Noor Almbaidin
committed
t
1 parent 68dffbb commit 3dc43b4

1 file changed

Lines changed: 67 additions & 60 deletions

File tree

  • biojava-core/src/main/java/org/biojava/nbio/core/sequence/io/embl

biojava-core/src/main/java/org/biojava/nbio/core/sequence/io/embl/EmblParser.java

Lines changed: 67 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -2,18 +2,21 @@
22

33

44
import java.io.*;
5+
import java.util.LinkedList;
6+
import java.util.List;
57

68
public class EmblParser {
79

810
private File file;
9-
private EmblId id;
10-
private EmblReference emblReference;
11-
private String accessionNumber;
11+
private EmblId emblId = new EmblId();
12+
private EmblReference emblReference = new EmblReference();
13+
private List<String> accessionNumber = new LinkedList<>();
1214
private String ProjectIdentifier;
15+
private String OrGanelle;
1316
private String createdDate;
1417
private String lastUpdatedDate;
1518
private String sequenceDescription;
16-
private String keyword;
19+
private List<String> keyword = new LinkedList<>();
1720
private String organismSpecies;
1821
private String organismClassification;
1922
private String databaseCrossReference;
@@ -24,125 +27,74 @@ public class EmblParser {
2427
private StringBuilder sequence;
2528

2629
public EmblId getEmblId() {
27-
return id;
30+
return emblId;
2831
}
2932

3033
public EmblReference getEmblReference() {
3134
return emblReference;
3235
}
3336

34-
35-
public String getAccessionNumber() {
37+
public List<String> getAccessionNumber() {
3638
return accessionNumber;
3739
}
3840

39-
public void setAccessionNumber(String accessionNumber) {
40-
this.accessionNumber = accessionNumber;
41-
}
42-
4341
public String getProjectIdentifier() {
4442
return ProjectIdentifier;
4543
}
4644

47-
public void setProjectIdentifier(String projectIdentifier) {
48-
ProjectIdentifier = projectIdentifier;
49-
}
50-
5145
public String getCreatedDate() {
5246
return createdDate;
5347
}
5448

55-
public void setCreatedDate(String createdDate) {
56-
this.createdDate = createdDate;
57-
}
58-
5949
public String getLastUpdatedDate() {
6050
return lastUpdatedDate;
6151
}
6252

63-
public void setLastUpdatedDate(String lastUpdatedDate) {
64-
this.lastUpdatedDate = lastUpdatedDate;
65-
}
66-
6753
public String getSequenceDescription() {
6854
return sequenceDescription;
6955
}
7056

71-
public void setSequenceDescription(String sequenceDescription) {
72-
this.sequenceDescription = sequenceDescription;
73-
}
74-
75-
public String getKeyword() {
57+
public List<String> getKeyword() {
7658
return keyword;
7759
}
7860

79-
public void setKeyword(String keyword) {
80-
this.keyword = keyword;
81-
}
82-
8361
public String getOrganismSpecies() {
8462
return organismSpecies;
8563
}
8664

87-
public void setOrganismSpecies(String organismSpecies) {
88-
this.organismSpecies = organismSpecies;
89-
}
90-
9165
public String getOrganismClassification() {
9266
return organismClassification;
9367
}
9468

95-
public void setOrganismClassification(String organismClassification) {
96-
this.organismClassification = organismClassification;
97-
}
9869

9970
public String getDatabaseCrossReference() {
10071
return databaseCrossReference;
10172
}
10273

103-
public void setDatabaseCrossReference(String databaseCrossReference) {
104-
this.databaseCrossReference = databaseCrossReference;
105-
}
106-
10774
public String getAssemblyHeader() {
10875
return assemblyHeader;
10976
}
11077

111-
public void setAssemblyHeader(String assemblyHeader) {
112-
this.assemblyHeader = assemblyHeader;
113-
}
11478

11579
public String getAssemblyInformation() {
11680
return assemblyInformation;
11781
}
11882

119-
public void setAssemblyInformation(String assemblyInformation) {
120-
this.assemblyInformation = assemblyInformation;
121-
}
12283

12384
public String getCON() {
12485
return CON;
12586
}
12687

127-
public void setCON(String CON) {
128-
this.CON = CON;
129-
}
13088

13189
public String getSequenceHeader() {
13290
return sequenceHeader;
13391
}
13492

135-
public void setSequenceHeader(String sequenceHeader) {
136-
this.sequenceHeader = sequenceHeader;
137-
}
13893

13994
public StringBuilder getSequence() {
14095
return sequence;
14196
}
14297

143-
public void setSequence(StringBuilder sequence) {
144-
this.sequence = sequence;
145-
}
14698

14799
public EmblParser(File file) {
148100
setFile(file);
@@ -157,11 +109,37 @@ public void setFile(File file) {
157109
public void parse() {
158110
try (FileReader fileReader = new FileReader(file)) {
159111
String line = "";
112+
String lineIdentifier;
113+
String lineInfo;
160114
try (BufferedReader bufferedReader = new BufferedReader(fileReader)) {
161115
while (bufferedReader.readLine() != null)
162116
line = bufferedReader.readLine();
163-
if (line.substring(0, 2).equals("ID"))
117+
lineInfo = line.substring(2);
118+
lineIdentifier = line.substring(0, 2);
119+
if (lineIdentifier.equals("ID"))
164120
populateID(line);
121+
else if (lineIdentifier.equals("AC"))
122+
populateAccessionNumber(line);
123+
else if (lineIdentifier.equals("DT") && line.contains("Created"))
124+
createdDate = lineInfo;
125+
else if (lineIdentifier.equals("DT") && line.contains("updated"))
126+
lastUpdatedDate = lineInfo;
127+
else if (lineIdentifier.equals("DE"))
128+
sequenceDescription = lineInfo;
129+
else if (lineIdentifier.equals("KW"))
130+
keyword.add(lineInfo);
131+
else if (lineIdentifier.equals("OS"))
132+
organismSpecies = lineInfo;
133+
else if (lineIdentifier.equals("OC"))
134+
organismClassification = lineInfo;
135+
else if (lineIdentifier.equals("OG"))
136+
OrGanelle = lineInfo;
137+
else if (lineIdentifier.equals("RN") || lineIdentifier.equals("RP") || lineIdentifier.equals("RX")
138+
|| lineIdentifier.equals("RG") || lineIdentifier.equals("RA")
139+
|| lineIdentifier.equals("RT") || lineIdentifier.equals("RL"))
140+
emblReferencePopulating(lineIdentifier, lineInfo);
141+
142+
165143
}
166144
} catch (FileNotFoundException e) {
167145
e.printStackTrace();
@@ -170,8 +148,37 @@ public void parse() {
170148
}
171149
}
172150

151+
private void emblReferencePopulating(String lineIdentifier, String lineInfo) {
152+
if (lineIdentifier.equals("RN"))
153+
emblReference.setReferenceNumber(lineInfo);
154+
else if (lineIdentifier.equals("RP"))
155+
emblReference.setReferencePosition(lineInfo);
156+
else if (lineIdentifier.equals("RX"))
157+
emblReference.setReferenceCrossReference(lineInfo);
158+
else if (lineIdentifier.equals("RG"))
159+
emblReference.setReferenceGroup(lineInfo);
160+
else if (lineIdentifier.equals("RA"))
161+
emblReference.setReferenceAuthor(lineInfo);
162+
else if (lineIdentifier.equals("RT"))
163+
emblReference.setReferenceTitle(lineInfo);
164+
else if (lineIdentifier.equals("RL"))
165+
emblReference.setReferenceLocation(lineInfo);
166+
}
167+
168+
private void populateAccessionNumber(String line) {
169+
accessionNumber.add(line);
170+
}
171+
173172
private void populateID(String line) {
174-
173+
line.replace(",", "");
174+
String[] strings = line.split(" ");
175+
emblId.setPrimaryAccession(strings[1]);
176+
emblId.setSequenceVersion(strings[2]);
177+
emblId.setTopology(strings[3]);
178+
emblId.setMoleculeType(strings[4]);
179+
emblId.setDataClass(strings[5]);
180+
emblId.setTaxonomicDivision(strings[6]);
181+
emblId.setSequenceLength(strings[7]);
175182
}
176183

177184

0 commit comments

Comments
 (0)