Skip to content
Original file line number Diff line number Diff line change
Expand Up @@ -28,14 +28,19 @@
import org.biojava.nbio.structure.Structure;
import org.biojava.nbio.structure.StructureException;
import org.biojava.nbio.structure.StructureIO;
import org.biojava.nbio.structure.StructureTools;
import org.biojava.nbio.structure.align.util.AtomCache;
import org.biojava.nbio.structure.cluster.SubunitClusterer;
import org.biojava.nbio.structure.cluster.SubunitClustererMethod;
import org.biojava.nbio.structure.cluster.SubunitClustererParameters;
import org.biojava.nbio.structure.io.FileParsingParameters;
import org.biojava.nbio.structure.quaternary.BiologicalAssemblyBuilder;
import org.biojava.nbio.structure.quaternary.BiologicalAssemblyTransformation;
import org.biojava.nbio.structure.symmetry.core.QuatSymmetryDetector;
import org.biojava.nbio.structure.symmetry.core.QuatSymmetryParameters;
import org.biojava.nbio.structure.symmetry.core.QuatSymmetryResults;
import org.biojava.nbio.structure.symmetry.core.Stoichiometry;
import org.junit.Ignore;
import org.junit.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
Expand Down Expand Up @@ -366,4 +371,67 @@ public void testPseudoIdentity95() throws IOException, StructureException {
assertEquals(SubunitClustererMethod.SEQUENCE, symmetry.getSubunitClusters().get(0).getClustererMethod());

}

@Test
public void testSymDetectionWithClusteringByEntityId() throws IOException, StructureException {
AtomCache cache = new AtomCache();
cache.setUseMmtf(false);
cache.setUseMmCif(true);
FileParsingParameters params = new FileParsingParameters();
params.setAlignSeqRes(true);
cache.setFileParsingParams(params);
StructureIO.setAtomCache(cache);
Structure pdb = StructureIO.getStructure("BIO:1SMT:1");

SubunitClustererParameters cp = new SubunitClustererParameters();
cp.setUseEntityIdForSeqIdentityDetermination(true);
cp.setClustererMethod(SubunitClustererMethod.SEQUENCE);
QuatSymmetryParameters symmParams = new QuatSymmetryParameters();
QuatSymmetryResults symmetry = QuatSymmetryDetector.calcGlobalSymmetry(
pdb, symmParams, cp);

// C2 symmetry, A2 stoichiometry
assertEquals("C2", symmetry.getSymmetry());
assertEquals("A2", symmetry.getStoichiometry().toString());
}

/**
* A performance test that demonstrates how the SubunitClustererParameters.setUseEntityIdForSeqIdentityDetermination()
* has a dramatic effect in runtime versus doing alignments.
*/
@Ignore("This is a performance test to be run manually")
@Test
public void testSymDetectionPerformanceLargeCapsid() throws IOException, StructureException {
AtomCache cache = new AtomCache();
cache.setUseMmtf(false);
cache.setUseMmCif(true);
FileParsingParameters params = new FileParsingParameters();
params.setAlignSeqRes(true);
params.setParseBioAssembly(true);
cache.setFileParsingParams(params);
StructureIO.setAtomCache(cache);

// making sure we remove all atoms but representative before we expand, otherwise memory requirements are huge
// 6Q1F is another good example
Structure au = StructureIO.getStructure("6NHJ");
StructureTools.reduceToRepresentativeAtoms(au);
BiologicalAssemblyBuilder builder = new BiologicalAssemblyBuilder();
List<BiologicalAssemblyTransformation> transforms = au.getPDBHeader().getBioAssemblies().get(1).getTransforms();
Structure pdb = builder.rebuildQuaternaryStructure(au, transforms, true, false);

SubunitClustererParameters cp = new SubunitClustererParameters();

// This is the parameter that makes this fast, set it to false to see the difference.
// As of git commit ed322e387cd46344a7864a, the difference in runtime is not that huge:
// 2 minutes with true, 10 minutes with false. I observed a much larger difference before, but can't reproduce anymore - JD 2020-01-23
cp.setUseEntityIdForSeqIdentityDetermination(true);

cp.setClustererMethod(SubunitClustererMethod.SEQUENCE);
QuatSymmetryParameters symmParams = new QuatSymmetryParameters();
QuatSymmetryResults symmetry = QuatSymmetryDetector.calcGlobalSymmetry(
pdb, symmParams, cp);

assertEquals("I", symmetry.getSymmetry());
assertEquals("A960B960C600D480E300", symmetry.getStoichiometry().toString());
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -306,12 +306,12 @@ public List<String> getChainIds() {
* used and when all chains within the entity are numbered in the same way), but
* in general they will be neither unique (because of insertion codes) nor aligned.
* </p>
* @param g
* @param c
* @param g the group
* @param c the chain
* @return the aligned residue index (1 to n), if no SEQRES groups are available at all then {@link ResidueNumber#getSeqNum()}
* is returned as a fall-back, if the group is not found in the SEQRES groups then -1 is returned
* for the given group and chain
* @throws IllegalArgumentException if the given Chain is not a member of this EnityInfo
* @throws IllegalArgumentException if the given Chain is not a member of this EntityInfo
* @see Chain#getSeqResGroup(int)
*/
public int getAlignedResIndex(Group g, Chain c) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1264,7 +1264,7 @@ public static final Character get1LetterCode(String groupCode3) {
* 3-character code for a group.
*
*/
public static final boolean isNucleotide(String groupCode3) {
public static boolean isNucleotide(String groupCode3) {
String code = groupCode3.trim();
return nucleotides30.containsKey(code)
|| nucleotides23.containsKey(code);
Expand All @@ -1283,7 +1283,7 @@ public static final boolean isNucleotide(String groupCode3) {
* @deprecated Use {@link StructureIdentifier#reduce(Structure)} instead (v. 4.2.0)
*/
@Deprecated
public static final Structure getReducedStructure(Structure s,
public static Structure getReducedStructure(Structure s,
String chainId) throws StructureException {
// since we deal here with structure alignments,
// only use Model 1...
Expand Down Expand Up @@ -1338,7 +1338,7 @@ public static final Structure getReducedStructure(Structure s,
return newS;
}

public static final String convertAtomsToSeq(Atom[] atoms) {
public static String convertAtomsToSeq(Atom[] atoms) {

StringBuilder buf = new StringBuilder();
Group prevGroup = null;
Expand Down Expand Up @@ -1374,7 +1374,7 @@ public static final String convertAtomsToSeq(Atom[] atoms) {
* @throws StructureException
* if the group cannot be found.
*/
public static final Group getGroupByPDBResidueNumber(Structure struc,
public static Group getGroupByPDBResidueNumber(Structure struc,
ResidueNumber pdbResNum) throws StructureException {
if (struc == null || pdbResNum == null) {
throw new IllegalArgumentException("Null argument(s).");
Expand Down Expand Up @@ -1447,7 +1447,7 @@ public static AtomContactSet getAtomsInContact(Chain chain, double cutoff) {
* @param chain
* @param cutoff
* @return
* @see {@link #getRepresentativeAtomsInContact(Chain, double)}
* @see #getRepresentativeAtomsInContact(Chain, double)
*/
public static AtomContactSet getAtomsCAInContact(Chain chain, double cutoff) {
Grid grid = new Grid(cutoff);
Expand Down Expand Up @@ -1921,4 +1921,24 @@ private static String replaceFirstChar(String name, char c, char d) {
return name;
}

/**
* Remove all atoms but the representative atoms (C alphas or phosphates) from the given structure.
* @param structure the structure
* @since 5.4.0
*/
public static void reduceToRepresentativeAtoms(Structure structure) {
for (int modelIdx = 0; modelIdx<structure.nrModels(); modelIdx++) {
for (Chain c : structure.getPolyChains(modelIdx)) {
for (Group g : c.getAtomGroups()) {
List<Atom> atoms = g.getAtoms();
if (g.isAminoAcid()) {
atoms.removeIf(a->!a.getName().equals(CA_ATOM_NAME));
} else if (g.isNucleotide()) {
atoms.removeIf(a->!a.getName().equals(NUCLEOTIDE_REPRESENTATIVE));
}
// else we keep all other atoms. We are concerned only about aminoacids and nucleotides that make up the bulk of the structures
}
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@
import org.biojava.nbio.core.sequence.compound.AminoAcidCompound;
import org.biojava.nbio.structure.Atom;
import org.biojava.nbio.structure.Chain;
import org.biojava.nbio.structure.EntityInfo;
import org.biojava.nbio.structure.Group;
import org.biojava.nbio.structure.Structure;
import org.biojava.nbio.structure.StructureException;
import org.biojava.nbio.structure.align.StructureAlignment;
Expand Down Expand Up @@ -190,22 +192,35 @@ public boolean isIdenticalTo(SubunitCluster other) {
* @return true if the SubunitClusters are identical, false otherwise
*/
public boolean isIdenticalByEntityIdTo(SubunitCluster other) {
Structure thisStruct = this.subunits.get(this.representative).getStructure();
Structure otherStruct = other.subunits.get(other.representative).getStructure();
String thisName = this.subunits.get(this.representative).getName();
String otherName = other.subunits.get(this.representative).getName();
Subunit thisSub = this.subunits.get(this.representative);
Subunit otherSub = other.subunits.get(other.representative);
String thisName = thisSub.getName();
String otherName = otherSub.getName();

Structure thisStruct = thisSub.getStructure();
Structure otherStruct = otherSub.getStructure();
if (thisStruct == null || otherStruct == null) {
logger.info("SubunitClusters {}-{} have no referenced structures. Ignoring identity check by entity id",
thisName,
otherName);
return false;
}
if (thisStruct != otherStruct) {
// different object references: will not cluster even if entity id is same
return false;
}
Chain thisChain = thisStruct.getChain(thisName);
Chain otherChain = otherStruct.getChain(otherName);
if (thisChain == null || otherChain == null) {
logger.info("Can't determine entity ids of SubunitClusters {}-{}. Ignoring identity check by entity id",
this.subunits.get(this.representative).getName(),
other.subunits.get(other.representative).getName());
thisName,
otherName);
return false;
}
if (thisChain.getEntityInfo() == null || otherChain.getEntityInfo() == null) {
logger.info("Can't determine entity ids of SubunitClusters {}-{}. Ignoring identity check by entity id",
this.subunits.get(this.representative).getName(),
other.subunits.get(other.representative).getName());
thisName,
otherName);
return false;
}
int thisEntityId = thisChain.getEntityInfo().getMolId();
Expand Down Expand Up @@ -241,7 +256,7 @@ public boolean mergeIdentical(SubunitCluster other) {
* same Subunit. This is checked by comparing the entity identifiers of the subunits
* if one can be found.
* Thus this only makes sense when the subunits are complete chains of a
* deposited PDB entry. I
* deposited PDB entry.
*
* @param other
* SubunitCluster
Expand All @@ -252,12 +267,59 @@ public boolean mergeIdenticalByEntityId(SubunitCluster other) {
if (!isIdenticalByEntityIdTo(other))
return false;

Subunit thisSub = this.subunits.get(this.representative);
Subunit otherSub = other.subunits.get(other.representative);
String thisName = thisSub.getName();
String otherName = otherSub.getName();

logger.info("SubunitClusters {}-{} belong to same entity. Assuming they are identical",
this.subunits.get(this.representative).getName(),
other.subunits.get(other.representative).getName());
thisName,
otherName);

this.subunits.addAll(other.subunits);
this.subunitEQR.addAll(other.subunitEQR);
List<Integer> thisAligned = new ArrayList<>();
List<Integer> otherAligned = new ArrayList<>();

// we've merged by entity id, we can assume structure, chain and entity are available (checked in isIdenticalByEntityIdTo())
Structure thisStruct = thisSub.getStructure();
Structure otherStruct = otherSub.getStructure();
Chain thisChain = thisStruct.getChain(thisName);
Chain otherChain = otherStruct.getChain(otherName);
EntityInfo entityInfo = thisChain.getEntityInfo();

// Extract the aligned residues of both Subunits
for (int thisIndex=0; thisIndex < thisSub.size(); thisIndex++) {

Group g = thisSub.getRepresentativeAtoms()[thisIndex].getGroup();

int seqresIndex = entityInfo.getAlignedResIndex(g, thisChain);

if (seqresIndex == -1) {
// this might mean that FileParsingParameters.setAlignSeqRes() wasn't set to true during parsing
continue;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok I think that answers my question before, makes sense! Thanks Jose

}

// note the seqresindex is 1-based
Group otherG = otherChain.getSeqResGroups().get(seqresIndex - 1);

int otherIndex = otherChain.getAtomGroups().indexOf(otherG);
if (otherIndex == -1) {
// skip residues that are unobserved in other sequence ("gaps" in the entity SEQRES alignment)
continue;
}

// Only consider residues that are part of the SubunitCluster
if (this.subunitEQR.get(this.representative).contains(thisIndex)
&& other.subunitEQR.get(other.representative).contains(otherIndex)) {
thisAligned.add(thisIndex);
otherAligned.add(otherIndex);
}
}

if (thisAligned.size() == 0 && otherAligned.size() == 0) {
logger.warn("No equivalent aligned atoms found between SubunitClusters {}-{} via entity SEQRES alignment. Is FileParsingParameters.setAlignSeqRes() set?", thisName, otherName);
}

updateEquivResidues(other, thisAligned, otherAligned);

return true;
}
Expand Down Expand Up @@ -690,7 +752,7 @@ public SubunitClustererMethod getClustererMethod() {
*/
public List<Atom[]> getAlignedAtomsSubunits() {

List<Atom[]> alignedAtoms = Collections.emptyList();
List<Atom[]> alignedAtoms = new ArrayList<>();

// Loop through all subunits and add the aligned positions
for (int s = 0; s < subunits.size(); s++)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -511,7 +511,8 @@ public boolean isHighConfidenceScores(double sequenceIdentity, double sequenceCo
/**
* Whether to use the entity id of subunits to infer that sequences are identical.
* Only applies if the {@link SubunitClustererMethod} is a sequence based one.
* @return
* @return the flag
* @since 5.4.0
*/
public boolean isUseEntityIdForSeqIdentityDetermination() {
return useEntityIdForSeqIdentityDetermination;
Expand All @@ -520,7 +521,10 @@ public boolean isUseEntityIdForSeqIdentityDetermination() {
/**
* Whether to use the entity id of subunits to infer that sequences are identical.
* Only applies if the {@link SubunitClustererMethod} is a sequence based one.
* Note this requires {@link org.biojava.nbio.structure.io.FileParsingParameters#setAlignSeqRes(boolean)} to be
* set to true.
* @param useEntityIdForSeqIdentityDetermination the flag to be set
* @since 5.4.0
*/
public void setUseEntityIdForSeqIdentityDetermination(boolean useEntityIdForSeqIdentityDetermination) {
this.useEntityIdForSeqIdentityDetermination = useEntityIdForSeqIdentityDetermination;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,8 @@
* @author Peter
*/
public class C2RotationSolver implements QuatSymmetrySolver {
private QuatSymmetrySubunits subunits = null;
private QuatSymmetryParameters parameters = null;
private QuatSymmetrySubunits subunits;
private QuatSymmetryParameters parameters;
private Vector3d centroid = new Vector3d();
private Matrix4d centroidInverse = new Matrix4d();

Expand Down Expand Up @@ -132,7 +132,7 @@ private void solve() {
}

private void addEOperation() {
List<Integer> permutation = Arrays.asList(new Integer[]{0,1});
List<Integer> permutation = Arrays.asList(0,1);
Matrix4d transformation = new Matrix4d();
transformation.setIdentity();
combineWithTranslation(transformation);
Expand All @@ -145,7 +145,6 @@ private void addEOperation() {

/**
* Adds translational component to rotation matrix
* @param rotTrans
* @param rotation
* @return
*/
Expand Down
Loading