Skip to content

Commit dcdd364

Browse files
authored
Merge pull request #1001 from aalhossary/fix_1000_Resolution_lost_in_cryo-EM
Parsing resolution values in PDB (REMARK 2 and REMARK 3 [new format]) and mmCif
2 parents c8a3ca3 + f04916a commit dcdd364

File tree

5 files changed

+64
-14
lines changed

5 files changed

+64
-14
lines changed

biojava-structure/src/main/java/org/biojava/nbio/structure/PDBHeader.java

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -580,6 +580,18 @@ public void setCrystallographicInfo(PDBCrystallographicInfo crystallographicInfo
580580
this.crystallographicInfo = crystallographicInfo;
581581
}
582582

583+
/**
584+
* Returns the resolution (or effective resolution) of the experiment. This is
585+
* related to <code>_refine.ls_d_res_high</code> (DIFFRACTION) or
586+
* <code>_em_3d_reconstruction.resolution</code> (ELECTRON MICROSCOPY) for mmCif
587+
* format, or to <code>REMARK 2</code> or <code>REMARK 3</code> for PDB legacy
588+
* format. If more than one value is available (in rare cases), the last one is
589+
* reported. If no value is available, it defaults to
590+
* {@link #DEFAULT_RESOLUTION} ({@value #DEFAULT_RESOLUTION}).
591+
*
592+
* @return The reported experiment resolution, {@link #DEFAULT_RESOLUTION}
593+
* ({@value #DEFAULT_RESOLUTION}) if no value is available.
594+
*/
583595
public float getResolution() {
584596
return resolution;
585597
}

biojava-structure/src/main/java/org/biojava/nbio/structure/io/PDBFileParser.java

Lines changed: 25 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1351,7 +1351,7 @@ private void pdb_REMARK_Handler(String line) {
13511351
if (line.startsWith("REMARK 800")) {
13521352
pdb_REMARK_800_Handler(line);
13531353

1354-
} else if ( line.startsWith("REMARK 350")){
1354+
} else if ( line.startsWith("REMARK 350")){
13551355

13561356
if ( params.isParseBioAssembly()) {
13571357

@@ -1361,6 +1361,10 @@ private void pdb_REMARK_Handler(String line) {
13611361

13621362
bioAssemblyParser.pdb_REMARK_350_Handler(line);
13631363
}
1364+
} else if (line.startsWith("REMARK 2")) {
1365+
//REMARK 2 RESOLUTION.
1366+
Pattern pR = Pattern.compile("^REMARK 2 RESOLUTION.\\s+(\\d+\\.\\d+)\\s+ANGSTROMS\\..*");
1367+
handleResolutionLine(line, pR);
13641368

13651369
// REMARK 3 (for R free)
13661370
// note: if more than 1 value present (occurring in hybrid experimental technique entries, e.g. 3ins, 4n9m)
@@ -1396,21 +1400,29 @@ private void pdb_REMARK_Handler(String line) {
13961400
// then last one encountered will be taken
13971401
} else if (line.startsWith("REMARK 3 RESOLUTION RANGE HIGH")){
13981402
Pattern pR = Pattern.compile("^REMARK 3 RESOLUTION RANGE HIGH \\(ANGSTROMS\\) :\\s+(\\d+\\.\\d+).*");
1399-
Matcher mR = pR.matcher(line);
1400-
if (mR.matches()) {
1401-
try {
1402-
float res = Float.parseFloat(mR.group(1));
1403-
if (pdbHeader.getResolution()!=PDBHeader.DEFAULT_RESOLUTION) {
1404-
logger.warn("More than 1 resolution value present, will use last one {} and discard previous {} "
1405-
,mR.group(1), String.format("%4.2f",pdbHeader.getResolution()));
1406-
}
1407-
pdbHeader.setResolution(res);
1408-
} catch (NumberFormatException e) {
1409-
logger.info("Could not parse resolution '{}', ignoring it",mR.group(1));
1403+
handleResolutionLine(line, pR);
1404+
} else if (line.startsWith("REMARK 3 EFFECTIVE RESOLUTION")){
1405+
Pattern pR = Pattern.compile("^REMARK 3 EFFECTIVE RESOLUTION \\(ANGSTROMS\\)\\s+:\\s+(\\d+\\.\\d+).*");
1406+
handleResolutionLine(line, pR);
1407+
}
1408+
}
1409+
1410+
public void handleResolutionLine(String line, Pattern pR) {
1411+
Matcher mR = pR.matcher(line);
1412+
if (mR.matches()) {
1413+
final String resString = mR.group(1);
1414+
try {
1415+
float res = Float.parseFloat(resString);
1416+
final float resInHeader = pdbHeader.getResolution();
1417+
if (resInHeader!=PDBHeader.DEFAULT_RESOLUTION && resInHeader != res) {
1418+
logger.warn("More than 1 resolution value present, will use last one {} and discard previous {} "
1419+
,resString, String.format("%4.2f",resInHeader));
14101420
}
1421+
pdbHeader.setResolution(res);
1422+
} catch (NumberFormatException e) {
1423+
logger.info("Could not parse resolution '{}', ignoring it",resString);
14111424
}
14121425
}
1413-
14141426
}
14151427

14161428

biojava-structure/src/main/java/org/biojava/nbio/structure/io/cif/CifStructureConsumer.java

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
import org.rcsb.cif.schema.mm.DatabasePDBRemark;
1111
import org.rcsb.cif.schema.mm.DatabasePDBRev;
1212
import org.rcsb.cif.schema.mm.DatabasePDBRevRecord;
13+
import org.rcsb.cif.schema.mm.Em3dReconstruction;
1314
import org.rcsb.cif.schema.mm.Entity;
1415
import org.rcsb.cif.schema.mm.EntityPoly;
1516
import org.rcsb.cif.schema.mm.EntityPolySeq;
@@ -107,7 +108,13 @@ public interface CifStructureConsumer extends CifFileConsumer<Structure> {
107108
*/
108109
void consumeDatabasePDBRevRecord(DatabasePDBRevRecord databasePDBrevRecord);
109110

110-
/**
111+
/**
112+
* Consume Electron Microscopy 3D reconstruction data
113+
* @param em3dReconstruction
114+
*/
115+
void consumeEm3dReconstruction(Em3dReconstruction em3dReconstruction);
116+
117+
/**
111118
* Consume a particular Cif category.
112119
* @param entity data
113120
*/

biojava-structure/src/main/java/org/biojava/nbio/structure/io/cif/CifStructureConsumerImpl.java

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,7 @@
6969
import org.rcsb.cif.schema.mm.DatabasePDBRemark;
7070
import org.rcsb.cif.schema.mm.DatabasePDBRev;
7171
import org.rcsb.cif.schema.mm.DatabasePDBRevRecord;
72+
import org.rcsb.cif.schema.mm.Em3dReconstruction;
7273
import org.rcsb.cif.schema.mm.Entity;
7374
import org.rcsb.cif.schema.mm.EntityPoly;
7475
import org.rcsb.cif.schema.mm.EntityPolySeq;
@@ -128,6 +129,7 @@ public class CifStructureConsumerImpl implements CifStructureConsumer {
128129
private List<Chain> currentModel;
129130
private PDBHeader pdbHeader;
130131
private String currentNmrModelNumber;
132+
private Em3dReconstruction em3dReconstruction;
131133
private List<Chain> entityChains;
132134

133135
private Entity entity;
@@ -644,6 +646,18 @@ public void consumeDatabasePDBRevRecord(DatabasePDBRevRecord databasePDBrevRecor
644646
revRecords.add(new org.biojava.nbio.structure.DatabasePDBRevRecord(databasePDBrevRecord, i));
645647
}
646648
}
649+
650+
@Override
651+
public void consumeEm3dReconstruction(Em3dReconstruction em3dReconstruction) {
652+
this.em3dReconstruction = em3dReconstruction;
653+
654+
for (int rowIndex = 0; rowIndex < em3dReconstruction.getRowCount(); rowIndex++) { //can it have more than 1 value?
655+
final FloatColumn resolution = em3dReconstruction.getResolution();
656+
if (ValueKind.PRESENT.equals(resolution.getValueKind(rowIndex)))
657+
pdbHeader.setResolution((float) resolution.get(rowIndex));
658+
}
659+
//TODO other fields (maybe RFree)?
660+
}
647661

648662
@Override
649663
public void consumeEntity(Entity entity) {
@@ -831,6 +845,10 @@ public void consumePdbxStructOperList(PdbxStructOperList pdbxStructOperList) {
831845
public void consumeRefine(Refine refine) {
832846
for (int rowIndex = 0; rowIndex < refine.getRowCount(); rowIndex++) {
833847
// RESOLUTION
848+
ValueKind valueKind = refine.getLsDResHigh().getValueKind(rowIndex);
849+
if (! ValueKind.PRESENT.equals(valueKind)) {
850+
continue;
851+
}
834852
// in very rare cases (for instance hybrid methods x-ray + neutron diffraction, e.g. 3ins, 4n9m)
835853
// there are 2 resolution values, one for each method
836854
// we take the last one found so that behaviour is like in PDB file parsing

biojava-structure/src/main/java/org/biojava/nbio/structure/io/cif/CifStructureConverter.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,7 @@ public static Structure fromCifFile(CifFile cifFile, FileParsingParameters param
118118
consumer.consumeDatabasePDBRemark(cifBlock.getDatabasePDBRemark());
119119
consumer.consumeDatabasePDBRev(cifBlock.getDatabasePDBRev());
120120
consumer.consumeDatabasePDBRevRecord(cifBlock.getDatabasePDBRevRecord());
121+
consumer.consumeEm3dReconstruction(cifBlock.getEm3dReconstruction());
121122
consumer.consumeEntity(cifBlock.getEntity());
122123
consumer.consumeEntityPoly(cifBlock.getEntityPoly());
123124
consumer.consumeEntitySrcGen(cifBlock.getEntitySrcGen());

0 commit comments

Comments
 (0)