Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
package org.biojava.nbio.structure.test.io;

import static org.junit.Assume.assumeNotNull;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNull;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.URL;

import org.biojava.nbio.structure.Structure;
import org.biojava.nbio.structure.StructureException;
import org.biojava.nbio.structure.StructureIO;
import org.biojava.nbio.structure.align.client.StructureName;
import org.biojava.nbio.structure.io.PDBFileParser;
import org.biojava.nbio.structure.io.cif.CifStructureConverter;
import org.junit.jupiter.api.Test;

/**
* Test that various identifiers in Structure are parsed consistently across
* file types.
*
* These tests are not intended to be prescriptive, but merely to document the
* current state and force changes to be committed to git.
*/
public class TestIdentifiersInStructure {
@Test
public void testCifIdentifiers() throws IOException, StructureException {
String noid = "/5pti_noid.cif";
String yesid = "/5pti.cif";

// No ID, from file
URL url = this.getClass().getResource(noid);
String file = url.getPath();
Structure s = CifStructureConverter.fromURL(url);
assertNull(s.getPdbId());
assertEquals("", s.getIdentifier());
assertEquals("", s.getName());
assertNull(s.getStructureIdentifier());

// No ID, from StructureIO
s = StructureIO.getStructure(file);
assertNull(s.getPdbId());
assertEquals(file, s.getIdentifier());
assertEquals("5PTI", s.getName());
StructureName sid = (StructureName) s.getStructureIdentifier();
assertEquals(file, sid.getIdentifier());

// No id, from stream
InputStream stream = this.getClass().getResourceAsStream(noid);
s = CifStructureConverter.fromInputStream(stream);
assertNull(s.getPdbId());
assertEquals("", s.getIdentifier());
assertEquals("", s.getName());
assertNull(s.getStructureIdentifier());

// With ID, from file
url = this.getClass().getResource(yesid);
file = url.getPath();
s = CifStructureConverter.fromURL(url);
assertEquals("5PTI", s.getPdbId().getId());
assertEquals("", s.getIdentifier());
assertEquals("", s.getName());
assertNull(s.getStructureIdentifier());

// With ID, from StructureIO
s = StructureIO.getStructure(file);
assertEquals("5PTI", s.getPdbId().getId());
assertEquals(file, s.getIdentifier());
assertEquals("5PTI", s.getName());
sid = (StructureName) s.getStructureIdentifier();
assertEquals(file, sid.getIdentifier());

// With id, from stream
stream = this.getClass().getResourceAsStream(yesid);
s = CifStructureConverter.fromInputStream(stream);
assertEquals("5PTI", s.getPdbId().getId());
assertEquals("", s.getIdentifier());
assertEquals("", s.getName());
assertNull(s.getStructureIdentifier());
}

@Test
public void testPDBIdentifiers() throws IOException, StructureException {
String yesid = "/5pti.pdb";
String noid = "/noid.pdb";

assumeNotNull(this.getClass().getResource(yesid));
assumeNotNull(this.getClass().getResource(noid));

PDBFileParser parser = new PDBFileParser();

// No id, from StructureIO
String file = this.getClass().getResource(noid).getPath();
// Structure s = parser.parsePDBFile(file);
Structure s = StructureIO.getStructure(file);
assertNull(s.getPdbId());
assertEquals(file, s.getIdentifier());
assertEquals("", s.getName()); // differs from CIF behavior
StructureName sid = (StructureName) s.getStructureIdentifier();
assertEquals(file, sid.getIdentifier());

// No id, from stream
InputStream stream = this.getClass().getResourceAsStream(noid);
s = parser.parsePDBFile(new BufferedReader(new InputStreamReader(stream)));
assertNull(s.getPdbId());
assertEquals("", s.getIdentifier());
assertEquals("", s.getName());
assertNull(s.getStructureIdentifier());

// With id, from StructureIO
file = this.getClass().getResource(yesid).getPath();
s = StructureIO.getStructure(file);
assertEquals("5PTI", s.getPdbId().toString());
assertEquals(file, s.getIdentifier());
assertEquals("5PTI", s.getName());
sid = (StructureName) s.getStructureIdentifier();
assertEquals(file, sid.getIdentifier());


// With id, from stream
stream = this.getClass().getResourceAsStream(yesid);
s = parser.parsePDBFile(new BufferedReader(new InputStreamReader(stream)));
assertEquals("5PTI", s.getPdbId().toString());
assertEquals("", s.getIdentifier());
assertEquals("", s.getName());
assertNull(s.getStructureIdentifier());
}
}
73 changes: 73 additions & 0 deletions biojava-integrationtest/src/test/resources/5pti_noid.cif
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
data_
#
loop_
_entity.id
_entity.type
_entity.src_method
_entity.pdbx_description
_entity.formula_weight
_entity.pdbx_number_of_molecules
_entity.details
1 polymer man 'TRYPSIN INHIBITOR' 6527.598 1 ?
#
loop_
_struct_asym.id
_struct_asym.pdbx_blank_PDB_chainid_flag
_struct_asym.pdbx_modified
_struct_asym.entity_id
_struct_asym.details
A N N 1 ?
#
loop_
_atom_site.group_PDB
_atom_site.id
_atom_site.type_symbol
_atom_site.label_atom_id
_atom_site.label_alt_id
_atom_site.label_comp_id
_atom_site.label_asym_id
_atom_site.label_entity_id
_atom_site.label_seq_id
_atom_site.pdbx_PDB_ins_code
_atom_site.Cartn_x
_atom_site.Cartn_y
_atom_site.Cartn_z
_atom_site.occupancy
_atom_site.B_iso_or_equiv
_atom_site.Cartn_x_esd
_atom_site.Cartn_y_esd
_atom_site.Cartn_z_esd
_atom_site.occupancy_esd
_atom_site.B_iso_or_equiv_esd
_atom_site.pdbx_formal_charge
_atom_site.auth_seq_id
_atom_site.auth_comp_id
_atom_site.auth_asym_id
_atom_site.auth_atom_id
_atom_site.pdbx_PDB_model_num
ATOM 1 N N . ARG A 1 1 ? 32.231 15.281 -13.143 1.00 28.28 ? ? ? ? ? ? 1 ARG A N 1
ATOM 2 C CA . ARG A 1 1 ? 32.184 14.697 -11.772 1.00 27.90 ? ? ? ? ? ? 1 ARG A CA 1
ATOM 3 C C . ARG A 1 1 ? 33.438 13.890 -11.387 1.00 24.90 ? ? ? ? ? ? 1 ARG A C 1
ATOM 4 O O . ARG A 1 1 ? 34.102 13.070 -12.066 1.00 24.44 ? ? ? ? ? ? 1 ARG A O 1
ATOM 5 C CB . ARG A 1 1 ? 30.797 14.065 -11.625 1.00 27.88 ? ? ? ? ? ? 1 ARG A CB 1
ATOM 6 C CG . ARG A 1 1 ? 30.976 12.589 -11.819 1.00 29.61 ? ? ? ? ? ? 1 ARG A CG 1
ATOM 7 C CD . ARG A 1 1 ? 29.608 12.016 -11.694 1.00 31.91 ? ? ? ? ? ? 1 ARG A CD 1
ATOM 8 N NE . ARG A 1 1 ? 28.942 12.335 -12.945 1.00 33.51 ? ? ? ? ? ? 1 ARG A NE 1
ATOM 9 C CZ . ARG A 1 1 ? 27.670 12.696 -13.050 1.00 34.29 ? ? ? ? ? ? 1 ARG A CZ 1
ATOM 10 N NH1 . ARG A 1 1 ? 26.901 12.777 -11.999 1.00 34.48 ? ? ? ? ? ? 1 ARG A NH1 1
ATOM 11 N NH2 . ARG A 1 1 ? 27.161 12.963 -14.255 1.00 35.44 ? ? ? ? ? ? 1 ARG A NH2 1
ATOM 12 D D1 . ARG A 1 1 ? 32.983 14.824 -13.703 1.00 27.71 ? ? ? ? ? ? 1 ARG A D1 1
ATOM 13 D D2 . ARG A 1 1 ? 31.275 15.112 -13.535 1.00 28.50 ? ? ? ? ? ? 1 ARG A D2 1
ATOM 14 D D3 . ARG A 1 1 ? 32.174 16.346 -13.050 1.00 28.23 ? ? ? ? ? ? 1 ARG A D3 1
ATOM 15 H HA . ARG A 1 1 ? 32.192 15.563 -11.115 1.00 26.97 ? ? ? ? ? ? 1 ARG A HA 1
ATOM 16 H HB2 . ARG A 1 1 ? 30.392 14.428 -10.697 1.00 28.71 ? ? ? ? ? ? 1 ARG A HB2 1
ATOM 17 H HB3 . ARG A 1 1 ? 30.182 14.438 -12.437 1.00 28.97 ? ? ? ? ? ? 1 ARG A HB3 1
ATOM 18 H HG2 . ARG A 1 1 ? 31.369 12.359 -12.800 1.00 29.44 ? ? ? ? ? ? 1 ARG A HG2 1
ATOM 19 H HG3 . ARG A 1 1 ? 31.591 12.143 -11.105 1.00 29.52 ? ? ? ? ? ? 1 ARG A HG3 1
ATOM 20 H HD2 . ARG A 1 1 ? 29.656 10.965 -11.482 1.00 31.41 ? ? ? ? ? ? 1 ARG A HD2 1
ATOM 21 H HD3 . ARG A 1 1 ? 29.218 12.381 -10.794 1.00 31.29 ? ? ? ? ? ? 1 ARG A HD3 1
ATOM 22 D DE . ARG A 1 1 ? 29.457 12.292 -13.843 1.00 34.62 ? ? ? ? ? ? 1 ARG A DE 1
ATOM 23 D DH11 . ARG A 1 1 ? 25.930 13.038 -12.118 1.00 34.80 ? ? ? ? ? ? 1 ARG A DH11 1
ATOM 24 D DH12 . ARG A 1 1 ? 27.324 12.549 -11.100 1.00 34.71 ? ? ? ? ? ? 1 ARG A DH12 1
ATOM 25 D DH21 . ARG A 1 1 ? 27.786 12.886 -15.076 1.00 33.93 ? ? ? ? ? ? 1 ARG A DH21 1
ATOM 26 D DH22 . ARG A 1 1 ? 26.154 13.214 -14.269 1.00 34.53 ? ? ? ? ? ? 1 ARG A DH22 1
9 changes: 9 additions & 0 deletions biojava-integrationtest/src/test/resources/5pti_noid.pdb
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
HEADER 01-JUL-21
ATOM 1 N MET A 1 19.682 12.062 34.184 1.00 39.14 N
ATOM 2 CA MET A 1 20.443 10.838 34.522 1.00 39.14 C
ATOM 3 C MET A 1 20.073 9.731 33.538 1.00 39.14 C
ATOM 4 CB MET A 1 20.138 10.405 35.966 1.00 39.14 C
ATOM 5 O MET A 1 19.030 9.110 33.696 1.00 39.14 O
ATOM 6 CG MET A 1 20.829 11.294 37.004 1.00 39.14 C
ATOM 7 SD MET A 1 20.292 10.920 38.687 1.00 39.14 S
ATOM 8 CE MET A 1 21.522 11.848 39.645 1.00 39.14 C
Original file line number Diff line number Diff line change
Expand Up @@ -371,13 +371,16 @@ private void pdb_HEADER_Handler(String line) {

logger.debug("Parsing entry " + pdbId);


PdbId pdbIdToSet;
try {
pdbIdToSet = new PdbId(pdbCode);
} catch (IllegalArgumentException e) {
logger.info("Malformed (or null) PDB ID {}. setting PdbId to null", pdbCode);
if(pdbCode.isBlank()) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It seems that String.isBlank() is a java 11 feature that we can't use just yet. At least that's what IntelliJ tells me. Interestingly the tests passed in CI, not sure why.

In any case, I'd like to move to java 11 soon. I think it is long overdue. We'll try to address that separately.

pdbIdToSet = null;
} else {
try {
pdbIdToSet = new PdbId(pdbCode);
} catch (IllegalArgumentException e) {
logger.warn("Malformed PDB ID {}. setting PdbId to null", pdbCode);
pdbIdToSet = null;
}
}
structure.setPdbId(pdbIdToSet);
pdbHeader.setPdbId(pdbIdToSet);
Expand Down Expand Up @@ -2719,7 +2722,7 @@ else if ( params.isParseSecStruc()) {
}

makeCompounds(compndLines, sourceLines);

handlePDBKeywords(keywordsLines);

triggerEndFileChecks();
Expand Down Expand Up @@ -2786,18 +2789,18 @@ private void makeCompounds(List<String> compoundList,
}

/**Parse KEYWODS record of the PDB file.<br>
* A keyword may be split over two lines. whether a keyword ends by the end
* of a line or it is aplit over two lines, a <code>space</code> is added
* between the 2 lines's contents, unless the first line ends in
* A keyword may be split over two lines. whether a keyword ends by the end
* of a line or it is aplit over two lines, a <code>space</code> is added
* between the 2 lines's contents, unless the first line ends in
* a '-' character.
* <pre>
* Record Format
* COLUMNS DATA TYPE FIELD DEFINITION
* COLUMNS DATA TYPE FIELD DEFINITION
* ---------------------------------------------------------------------------------
* 1 - 6 Record name "KEYWDS"
* 1 - 6 Record name "KEYWDS"
* 9 - 10 Continuation continuation Allows concatenation of records if necessary.
* 11 - 79 List keywds Comma-separated list of keywords relevant
* to the entry.
* to the entry.
* Example
* 1 2 3 4 5 6 7 8
* 12345678901234567890123456789012345678901234567890123456789012345678901234567890
Expand Down Expand Up @@ -2830,7 +2833,7 @@ private void handlePDBKeywords(List<String> lines) {
}
pdbHeader.setKeywords(lst);
}

/**
* Handles creation of all bonds. Looks at LINK records, SSBOND (Disulfide
* bonds), peptide bonds, and intra-residue bonds.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -646,11 +646,11 @@ public void consumeDatabasePDBRevRecord(DatabasePDBRevRecord databasePDBrevRecor
revRecords.add(new org.biojava.nbio.structure.DatabasePDBRevRecord(databasePDBrevRecord, i));
}
}

@Override
public void consumeEm3dReconstruction(Em3dReconstruction em3dReconstruction) {
this.em3dReconstruction = em3dReconstruction;

for (int rowIndex = 0; rowIndex < em3dReconstruction.getRowCount(); rowIndex++) { //can it have more than 1 value?
final FloatColumn resolution = em3dReconstruction.getResolution();
if (ValueKind.PRESENT.equals(resolution.getValueKind(rowIndex)))
Expand Down Expand Up @@ -896,12 +896,16 @@ public void consumeStruct(Struct struct) {
if (struct.isDefined() && struct.getEntryId().isDefined()) {
PdbId pdbId;
String pdbCode = struct.getEntryId().get(0);
try {
pdbId = new PdbId(pdbCode);
} catch (IllegalArgumentException e) {
logger.info("Malformed (or null) PDB ID {}. setting PdbId to null", pdbCode);
pdbId = null;
}
if(pdbCode.isBlank()){
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same comment

pdbId = null;
} else {
try {
pdbId = new PdbId(pdbCode);
} catch (IllegalArgumentException e) {
logger.warn("Malformed PDB ID {}. setting PdbId to null", pdbCode);
pdbId = null;
}
}
pdbHeader.setPdbId(pdbId);
structure.setPdbId(pdbId);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -242,8 +242,10 @@ protected static CeSymmResult align(Atom[] atoms, CESymmParameters params)
optimalAFP = selfAlignments.get(0);
StructureIdentifier id = atoms[0].getGroup().getChain().getStructure()
.getStructureIdentifier();
optimalAFP.setName1(id.getIdentifier());
optimalAFP.setName2(id.getIdentifier());
if(id != null) {
optimalAFP.setName1(id.getIdentifier());
optimalAFP.setName2(id.getIdentifier());
}

// Store the optimal self-alignment
result.setSelfAlignment(optimalAFP);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,18 @@
package org.biojava.nbio.structure.symmetry.internal;

import static org.junit.Assert.*;
import static org.junit.Assume.assumeNotNull;

import java.io.IOException;
import java.io.InputStream;
import java.net.URL;

import org.biojava.nbio.structure.Atom;
import org.biojava.nbio.structure.Structure;
import org.biojava.nbio.structure.StructureException;
import org.biojava.nbio.structure.StructureIO;
import org.biojava.nbio.structure.StructureTools;
import org.biojava.nbio.structure.io.PDBFileParser;
import org.biojava.nbio.structure.symmetry.internal.CeSymm;
import org.junit.Test;

Expand Down Expand Up @@ -58,4 +63,16 @@ public void testEasyCases() throws IOException, StructureException {
assertEquals(result.getNumRepeats(), orders[i]);
}
}

@Test
public void testAlphafold() throws IOException, StructureException {
URL url = this.getClass().getResource("/AF-A0A0R4IYF1-F1-model_v2.pdb");
assumeNotNull(url);
String file = url.getPath();
Structure s = StructureIO.getStructure(file);
assertNull(s.getPdbId());
Atom[] atoms = StructureTools.getRepresentativeAtomArray(s);
CeSymmResult result = CeSymm.analyze(atoms);
assertNotNull(result);
}
}
Loading