Skip to content

Commit cc80fe9

Browse files
committed
Parsed PDB KEWODS record
They are parsed from PDB and added to Structure. Not parsed from mmCIF yet.
1 parent 37991a7 commit cc80fe9

File tree

6 files changed

+123
-0
lines changed

6 files changed

+123
-0
lines changed

biojava-structure/src/main/java/org/biojava/nbio/structure/Structure.java

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -788,4 +788,18 @@ public interface Structure extends Cloneable, Serializable {
788788
* returned the same value as {@link #getPDBCode()}
789789
*/
790790
String getIdentifier();
791+
792+
/**
793+
* Gets the keywords (KEYWODS) record of the structure
794+
* @return The keywords in a <code>List&lt;String&gt;</code>
795+
* @since 6.0.0
796+
*/
797+
List<String> getKeywords();
798+
799+
/**
800+
* Sets the KEYWODS record of the structure.
801+
* @param keywords The keywords in a <code>List&lt;String&gt; to set.</code>
802+
* @since 6.0.0
803+
*/
804+
void setKeywords(List<String> keywords);
791805
}

biojava-structure/src/main/java/org/biojava/nbio/structure/StructureImpl.java

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@ public class StructureImpl implements Structure {
5757
private List<Bond> ssbonds;
5858
private List<Site> sites;
5959
private String name ;
60+
private List<String> keywords;
6061
private StructureIdentifier structureIdentifier;
6162

6263
private PDBHeader pdbHeader;
@@ -76,6 +77,7 @@ public StructureImpl() {
7677
pdbHeader = new PDBHeader();
7778
ssbonds = new ArrayList<>();
7879
sites = new ArrayList<>();
80+
keywords = new ArrayList<>();
7981
}
8082

8183
/**
@@ -122,6 +124,7 @@ public Structure clone() {
122124
n.setPDBHeader(pdbHeader);
123125
n.setDBRefs(this.getDBRefs());
124126
n.setSites(getSites());
127+
n.setKeywords(this.getKeywords());
125128

126129

127130
// go through each chain and clone chain
@@ -1020,6 +1023,16 @@ public String getPdbId() {
10201023
return pdb_id;
10211024
}
10221025

1026+
/** {@inheritDoc} */
1027+
public List<String> getKeywords() {
1028+
return keywords;
1029+
}
1030+
1031+
/** {@inheritDoc} */
1032+
public void setKeywords(List<String> keywords) {
1033+
this.keywords = keywords;
1034+
}
1035+
10231036
@Override
10241037
public void resetModels() {
10251038
models = new ArrayList<>();

biojava-structure/src/main/java/org/biojava/nbio/structure/SubstructureIdentifier.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -186,6 +186,7 @@ public Structure reduce(Structure s) throws StructureException {
186186
newS.setName(this.toString());
187187
newS.setDBRefs(s.getDBRefs());
188188
newS.setBiologicalAssembly(s.isBiologicalAssembly());
189+
newS.setKeywords(s.getKeywords());
189190
newS.getPDBHeader().setDescription(
190191
"sub-range " + ranges + " of " + newS.getPDBCode() + " "
191192
+ s.getPDBHeader().getDescription());

biojava-structure/src/main/java/org/biojava/nbio/structure/io/PDBFileParser.java

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -181,6 +181,7 @@ public class PDBFileParser {
181181
private List<String> compndLines = new ArrayList<String>();
182182
private List<String> sourceLines = new ArrayList<String>();
183183
private List<String> journalLines = new ArrayList<String>();
184+
private List<String> keywordsLines = new ArrayList<String>();
184185
private List<DBRef> dbrefs;
185186
private Map<String, Site> siteMap = new LinkedHashMap<String, Site>();
186187
private Map<String, List<ResidueNumber>> siteToResidueMap = new LinkedHashMap<String, List<ResidueNumber>>();
@@ -2442,6 +2443,8 @@ private void pdb_SITE_Handler(String line){
24422443

24432444
//Site variable related to parsing the REMARK 800 records.
24442445
Site site;
2446+
2447+
private String[] keywords;
24452448
private void pdb_REMARK_800_Handler(String line){
24462449

24472450
if (params.isHeaderOnly()) return;
@@ -2595,6 +2598,7 @@ public Structure parsePDBFile(BufferedReader buf)
25952598
current_compound = null;
25962599
sourceLines.clear();
25972600
compndLines.clear();
2601+
keywordsLines.clear();
25982602
isLastCompndLine = false;
25992603
isLastSourceLine = false;
26002604
prevMolId = -1;
@@ -2660,6 +2664,8 @@ else if (recordName.equals("SOURCE"))
26602664
sourceLines.add(line); //pdb_SOURCE_Handler
26612665
else if (recordName.equals("COMPND"))
26622666
compndLines.add(line); //pdb_COMPND_Handler
2667+
else if (recordName.equals("KEYWDS"))
2668+
keywordsLines.add(line);
26632669
else if (recordName.equals("JRNL"))
26642670
pdb_JRNL_Handler(line);
26652671
else if (recordName.equals("EXPDTA"))
@@ -2693,6 +2699,8 @@ else if ( params.isParseSecStruc()) {
26932699
}
26942700

26952701
makeCompounds(compndLines, sourceLines);
2702+
2703+
handlePDBKeywords(keywordsLines);
26962704

26972705
triggerEndFileChecks();
26982706

@@ -2757,6 +2765,52 @@ private void makeCompounds(List<String> compoundList,
27572765

27582766
}
27592767

2768+
/**Parse KEYWODS record of the PDF file.<br>
2769+
* A keyword may be split over two lines. whether a keyword ends by the end
2770+
* of a line or it is aplit over two lines, a <code>space</code> is added
2771+
* between the 2 lines's contents, unless the first line ends in
2772+
* a '-' character.
2773+
* <pre>
2774+
* Record Format
2775+
* COLUMNS DATA TYPE FIELD DEFINITION
2776+
* ---------------------------------------------------------------------------------
2777+
* 1 - 6 Record name "KEYWDS"
2778+
* 9 - 10 Continuation continuation Allows concatenation of records if necessary.
2779+
* 11 - 79 List keywds Comma-separated list of keywords relevant
2780+
* to the entry.
2781+
* Example
2782+
* 1 2 3 4 5 6 7 8
2783+
* 12345678901234567890123456789012345678901234567890123456789012345678901234567890
2784+
* KEYWDS LYASE, TRICARBOXYLIC ACID CYCLE, MITOCHONDRION, OXIDATIVE
2785+
* KEYWDS 2 METABOLISM
2786+
* </pre>
2787+
* @param lines The KEWODS record lines.
2788+
* @author Amr ALHOSSARY
2789+
*/
2790+
private void handlePDBKeywords(List<String> lines) {
2791+
StringBuilder fullList = new StringBuilder();
2792+
for (String line : lines) {
2793+
String kwList = line.substring(10).trim();
2794+
if(kwList.length() > 0) {
2795+
if(fullList.length() > 0 && fullList.indexOf("-", fullList.length()-1) < 0) {
2796+
fullList.append(' ');
2797+
}
2798+
fullList.append(kwList);
2799+
}
2800+
}
2801+
String fulllengthList = fullList.toString();
2802+
keywords = fulllengthList.split("( )*,( )*");
2803+
ArrayList<String> lst = new ArrayList<String>(keywords.length);
2804+
for (String keyword : keywords) {
2805+
if(keyword.length() == 0) {
2806+
logger.warn("Keyword empty in structure "+ structure.getIdentifier().toString());
2807+
continue;
2808+
}
2809+
lst.add(keyword);
2810+
}
2811+
structure.setKeywords(lst);
2812+
}
2813+
27602814
/**
27612815
* Handles creation of all bonds. Looks at LINK records, SSBOND (Disulfide
27622816
* bonds), peptide bonds, and intra-residue bonds.
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
package org.biojava.nbio.structure;
2+
3+
import static org.junit.Assert.assertEquals;
4+
5+
import java.io.IOException;
6+
import java.io.InputStream;
7+
import java.util.List;
8+
import java.util.zip.GZIPInputStream;
9+
10+
import org.biojava.nbio.structure.io.PDBFileParser;
11+
import org.junit.Test;
12+
13+
public class TestKeywords {
14+
15+
@Test
16+
public void testKeywordsOnFiveLines () throws IOException {
17+
String fileName = "/3cdl.pdb";
18+
InputStream inStream = this.getClass().getResourceAsStream(fileName);
19+
20+
PDBFileParser pdbpars = new PDBFileParser();
21+
Structure structure = pdbpars.parsePDBFile(inStream);
22+
List<String> keywords = structure.getKeywords();
23+
assertEquals(keywords.size(), 12);
24+
assertEquals(keywords.get(11), "TRANSCRIPTION REGULATOR");
25+
}
26+
27+
@Test
28+
public void testDash() throws IOException {
29+
String fileName;
30+
fileName = "/pdb6elw.ent.gz";
31+
InputStream resourceAsStream = getClass().getResourceAsStream(fileName);
32+
GZIPInputStream inStream = new GZIPInputStream(resourceAsStream);
33+
34+
Structure structure = new PDBFileParser().parsePDBFile(inStream);
35+
36+
List<String> keywords = structure.getKeywords();
37+
assertEquals(keywords.size(), 6);
38+
assertEquals(keywords.get(3), "THIOREDOXIN-FOLD");
39+
assertEquals(keywords.get(4), "ANTI-OXIDATVE DEFENSE SYSTEM");
40+
}
41+
}
71 KB
Binary file not shown.

0 commit comments

Comments
 (0)