Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -287,7 +287,7 @@ private void testHeader(Structure sPdb, Structure sCif) {
// description is set in CIF parser to same as classification (_struct_keywords.pdbx_keywords field)
// while in PDB parser it is simply not set
//assertNotNull("pdb description null",hPdb.getDescription());
assertNotNull("cif description null",hCif.getDescription());
//assertNotNull("cif description null",hCif.getDescription()); //If we will enable this test again, we may use assertNull instead
//assertEquals("failed getDescription:",hPdb.getDescription().toLowerCase(), hCif.getDescription().toLowerCase());

assertEquals("failed getDepDate:",hPdb.getDepDate(), hCif.getDepDate());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import java.net.URL;
import java.util.Arrays;

import static org.junit.Assert.assertArrayEquals;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNotNull;

Expand All @@ -47,6 +48,18 @@ public void test1A2C() throws IOException {
testRoundTrip("1A2C");
}

@Test
public void test3CDL() throws IOException {
// a structure with insertion codes
testRoundTrip("3CDL");
}

@Test
public void test6ELW() throws IOException {
// a structure with insertion codes
testRoundTrip("6ELW");
}

private static void testRoundTrip(String pdbId) throws IOException {
URL url = new URL("https://files.rcsb.org/download/" + pdbId + ".cif");
Structure originalStruct = CifStructureConverter.fromURL(url);
Expand All @@ -57,6 +70,10 @@ private static void testRoundTrip(String pdbId) throws IOException {
assertNotNull(readStruct);
assertEquals(originalStruct.getChains().size(), readStruct.getChains().size());
assertEquals(originalStruct.nrModels(), readStruct.nrModels());

assertArrayEquals("Keywords Are not preserved",
originalStruct.getPDBHeader().getKeywords().toArray(),
readStruct.getPDBHeader().getKeywords().toArray());

for (int i = 0; i < originalStruct.nrModels(); i++) {
assertEquals(originalStruct.getModel(i).size(), readStruct.getModel(i).size());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,12 @@

/**
* A class that contains PDB Header information.
*
* In contrast to what the name suggests, this class does not represent a
* direct mapping of the Header section of the PDB legacy file format.
* Instead, it holds the information that is not directly related to the
* structure data. Such information may exist in some cases and may not exist in
* other cases.
*
* @author Andreas Prlic
* @since 1.6
*
Expand All @@ -47,7 +52,10 @@ public class PDBHeader implements PDBRecord {
private static final Logger logger = LoggerFactory.getLogger(PDBHeader.class);

private String title;
/**@deprecated This field should not be used. It will be removed later.
* Use {@link #getKeywords()} instead. */
private String description;
private List<String> keywords;
private String idCode;
private String classification;

Expand Down Expand Up @@ -92,6 +100,8 @@ public PDBHeader(){
bioAssemblies = new LinkedHashMap<Integer, BioAssemblyInfo>();
crystallographicInfo = new PDBCrystallographicInfo();

keywords = new ArrayList<>();

}

/** String representation
Expand Down Expand Up @@ -589,9 +599,20 @@ public String getTitle() {
public void setTitle(String title) {
this.title = title;
}

/**@deprecated will be removed later. Use {@link #getKeywords()} if you use
* <code>description</code> to keep the keywords.
* @return
*/
@Deprecated
public String getDescription() {
return description;
}
/**@deprecated will be removed later. Use {@link #getKeywords()} if you use
* <code>description</code> to keep the keywords.
* @param description
*/
@Deprecated
public void setDescription(String description) {
this.description = description;
}
Expand Down Expand Up @@ -683,4 +704,22 @@ public float getRwork() {
public void setRwork(float rWork) {
this.rWork = rWork;
}

/**
* Gets the keywords (KEYWODS) record of the structure
* @return The keywords in a <code>List&lt;String&gt;</code>
* @since 6.0.0
*/
public List<String> getKeywords() {
return keywords;
}

/**
* Sets the KEYWODS record of the structure.
* @param keywords The keywords in a <code>List&lt;String&gt; to set.</code>
* @since 6.0.0
*/
public void setKeywords(List<String> keywords) {
this.keywords = keywords;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -665,7 +665,9 @@ public interface Structure extends Cloneable, Serializable {
EntityInfo getEntityById(int entityId);

/**
* Return the header information for this PDB file
* Return the header information for this PDB file.
* <b>N.B.</b> Take care when you blindly use the returned object from this method,
* because it might be null in some cases.
*
* @return the PDBHeader object
*/
Expand Down Expand Up @@ -788,4 +790,5 @@ public interface Structure extends Cloneable, Serializable {
* returned the same value as {@link #getPDBCode()}
*/
String getIdentifier();

}
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,7 @@ public class PDBFileParser {
private List<String> compndLines = new ArrayList<String>();
private List<String> sourceLines = new ArrayList<String>();
private List<String> journalLines = new ArrayList<String>();
private List<String> keywordsLines = new ArrayList<String>();
private List<DBRef> dbrefs;
private Map<String, Site> siteMap = new LinkedHashMap<String, Site>();
private Map<String, List<ResidueNumber>> siteToResidueMap = new LinkedHashMap<String, List<ResidueNumber>>();
Expand Down Expand Up @@ -2442,6 +2443,8 @@ private void pdb_SITE_Handler(String line){

//Site variable related to parsing the REMARK 800 records.
Site site;

private String[] keywords;
private void pdb_REMARK_800_Handler(String line){

if (params.isHeaderOnly()) return;
Expand Down Expand Up @@ -2595,6 +2598,7 @@ public Structure parsePDBFile(BufferedReader buf)
current_compound = null;
sourceLines.clear();
compndLines.clear();
keywordsLines.clear();
isLastCompndLine = false;
isLastSourceLine = false;
prevMolId = -1;
Expand Down Expand Up @@ -2660,6 +2664,8 @@ else if (recordName.equals("SOURCE"))
sourceLines.add(line); //pdb_SOURCE_Handler
else if (recordName.equals("COMPND"))
compndLines.add(line); //pdb_COMPND_Handler
else if (recordName.equals("KEYWDS"))
keywordsLines.add(line);
else if (recordName.equals("JRNL"))
pdb_JRNL_Handler(line);
else if (recordName.equals("EXPDTA"))
Expand Down Expand Up @@ -2693,6 +2699,8 @@ else if ( params.isParseSecStruc()) {
}

makeCompounds(compndLines, sourceLines);

handlePDBKeywords(keywordsLines);

triggerEndFileChecks();

Expand Down Expand Up @@ -2757,6 +2765,52 @@ private void makeCompounds(List<String> compoundList,

}

/**Parse KEYWODS record of the PDB file.<br>
* A keyword may be split over two lines. whether a keyword ends by the end
* of a line or it is aplit over two lines, a <code>space</code> is added
* between the 2 lines's contents, unless the first line ends in
* a '-' character.
* <pre>
* Record Format
* COLUMNS DATA TYPE FIELD DEFINITION
* ---------------------------------------------------------------------------------
* 1 - 6 Record name "KEYWDS"
* 9 - 10 Continuation continuation Allows concatenation of records if necessary.
* 11 - 79 List keywds Comma-separated list of keywords relevant
* to the entry.
* Example
* 1 2 3 4 5 6 7 8
* 12345678901234567890123456789012345678901234567890123456789012345678901234567890
* KEYWDS LYASE, TRICARBOXYLIC ACID CYCLE, MITOCHONDRION, OXIDATIVE
* KEYWDS 2 METABOLISM
* </pre>
* @param lines The KEWODS record lines.
* @author Amr ALHOSSARY
*/
private void handlePDBKeywords(List<String> lines) {
StringBuilder fullList = new StringBuilder();
for (String line : lines) {
String kwList = line.substring(10).trim();
if(kwList.length() > 0) {
if(fullList.length() > 0 && fullList.indexOf("-", fullList.length()-1) < 0) {
fullList.append(' ');
}
fullList.append(kwList);
}
}
String fulllengthList = fullList.toString();
keywords = fulllengthList.split("( )*,( )*");
ArrayList<String> lst = new ArrayList<String>(keywords.length);
for (String keyword : keywords) {
if(keyword.length() == 0) {
logger.warn("Keyword empty in structure "+ structure.getIdentifier().toString());
continue;
}
lst.add(keyword);
}
pdbHeader.setKeywords(lst);
}

/**
* Handles creation of all bonds. Looks at LINK records, SSBOND (Disulfide
* bonds), peptide bonds, and intra-residue bonds.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,10 @@ protected CifFile getInternal(Structure structure, List<WrappedAtom> wrappedAtom
MmCifBlockBuilder blockBuilder = CifBuilder.enterFile(StandardSchemata.MMCIF)
.enterBlock(structure.getPDBCode());

blockBuilder.enterStructKeywords().enterText()
.add(String.join(", ", structure.getPDBHeader().getKeywords()))
.leaveColumn().leaveCategory();

if (atomSite.isDefined() && atomSite.getRowCount() > 0) {
// set atom site
blockBuilder.addCategory(atomSite);
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,24 @@
package org.biojava.nbio.structure.io.cif;

import java.time.LocalDate;
import java.time.ZoneId;
import java.time.format.DateTimeFormatter;
import java.time.format.DateTimeFormatterBuilder;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.NoSuchElementException;
import java.util.Optional;
import java.util.OptionalInt;
import java.util.stream.Collectors;
import java.util.stream.IntStream;

import javax.vecmath.Matrix4d;

import org.biojava.nbio.structure.AminoAcid;
import org.biojava.nbio.structure.AminoAcidImpl;
import org.biojava.nbio.structure.Atom;
Expand Down Expand Up @@ -88,15 +107,6 @@
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import javax.vecmath.Matrix4d;
import java.time.LocalDate;
import java.time.ZoneId;
import java.time.format.DateTimeFormatter;
import java.time.format.DateTimeFormatterBuilder;
import java.util.*;
import java.util.stream.Collectors;
import java.util.stream.IntStream;

/**
* An implementation of a CifFileConsumer for BioJava. Will process the information provided by a CifFile instance and
* use it to build up a {@link Structure} object.
Expand Down Expand Up @@ -885,12 +895,24 @@ public void consumeStructConnType(StructConnType structConnType) {

@Override
public void consumeStructKeywords(StructKeywords structKeywords) {
ArrayList<String> keywordsList = new ArrayList<String>();

StrColumn text = structKeywords.getText();
if (text.isDefined()) {
String keywords = text.get(0);
String[] strings = keywords.split(" *, *");
for (String string : strings) {
keywordsList.add(string.trim());
}
}
structure.getPDBHeader().setKeywords(keywordsList);

StrColumn pdbxKeywords = structKeywords.getPdbxKeywords();
// TODO what is the correct format for these?
if (pdbxKeywords.isDefined()) {
String keywords = pdbxKeywords.values().collect(Collectors.joining(", "));
pdbHeader.setDescription(keywords);
String keywords = pdbxKeywords.get(0);
pdbHeader.setClassification(keywords);
//This field should be left empty. TODO The next line should be removed later
pdbHeader.setDescription(keywords);
}
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
package org.biojava.nbio.structure;

import static org.junit.Assert.assertEquals;

import java.io.IOException;
import java.io.InputStream;
import java.util.List;
import java.util.zip.GZIPInputStream;

import org.biojava.nbio.structure.io.PDBFileParser;
import org.junit.Test;

public class TestKeywords {

@Test
public void testKeywordsOnFiveLines () throws IOException {
String fileName = "/3cdl.pdb";
InputStream inStream = this.getClass().getResourceAsStream(fileName);

PDBFileParser pdbpars = new PDBFileParser();
Structure structure = pdbpars.parsePDBFile(inStream);
List<String> keywords = structure.getPDBHeader().getKeywords();
assertEquals(12, keywords.size());
assertEquals("TRANSCRIPTION REGULATOR", keywords.get(11));
}

@Test
public void testDash() throws IOException {
String fileName;
fileName = "/pdb6elw-26lines.ent.gz";
InputStream resourceAsStream = getClass().getResourceAsStream(fileName);
GZIPInputStream inStream = new GZIPInputStream(resourceAsStream);

Structure structure = new PDBFileParser().parsePDBFile(inStream);

List<String> keywords = structure.getPDBHeader().getKeywords();
assertEquals(6, keywords.size());
assertEquals("THIOREDOXIN-FOLD", keywords.get(3));
assertEquals("ANTI-OXIDATVE DEFENSE SYSTEM", keywords.get(4));
}
}
Binary file not shown.