biojava · josemduarte · Jan 25, 2020 · Jan 20, 2020 · Jan 21, 2020 · Jan 21, 2020
diff --git a/.../test/java/org/biojava/nbio/structure/test/symmetry/TestQuatSymmetryDetectorExamples.java b/.../test/java/org/biojava/nbio/structure/test/symmetry/TestQuatSymmetryDetectorExamples.java
@@ -28,14 +28,19 @@
 import org.biojava.nbio.structure.Structure;
 import org.biojava.nbio.structure.StructureException;
 import org.biojava.nbio.structure.StructureIO;
+import org.biojava.nbio.structure.StructureTools;
 import org.biojava.nbio.structure.align.util.AtomCache;
 import org.biojava.nbio.structure.cluster.SubunitClusterer;
 import org.biojava.nbio.structure.cluster.SubunitClustererMethod;
 import org.biojava.nbio.structure.cluster.SubunitClustererParameters;
+import org.biojava.nbio.structure.io.FileParsingParameters;
+import org.biojava.nbio.structure.quaternary.BiologicalAssemblyBuilder;
+import org.biojava.nbio.structure.quaternary.BiologicalAssemblyTransformation;
 import org.biojava.nbio.structure.symmetry.core.QuatSymmetryDetector;
 import org.biojava.nbio.structure.symmetry.core.QuatSymmetryParameters;
 import org.biojava.nbio.structure.symmetry.core.QuatSymmetryResults;
 import org.biojava.nbio.structure.symmetry.core.Stoichiometry;
+import org.junit.Ignore;
 import org.junit.Test;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -366,4 +371,67 @@ public void testPseudoIdentity95() throws IOException, StructureException {
 		assertEquals(SubunitClustererMethod.SEQUENCE, symmetry.getSubunitClusters().get(0).getClustererMethod());
 
 	}
+
+	@Test
+	public void testSymDetectionWithClusteringByEntityId() throws IOException, StructureException {
+		AtomCache cache = new AtomCache();
+		cache.setUseMmtf(false);
+		cache.setUseMmCif(true);
+		FileParsingParameters params = new FileParsingParameters();
+		params.setAlignSeqRes(true);
+		cache.setFileParsingParams(params);
+		StructureIO.setAtomCache(cache);
+		Structure pdb = StructureIO.getStructure("BIO:1SMT:1");
+
+		SubunitClustererParameters cp = new SubunitClustererParameters();
+		cp.setUseEntityIdForSeqIdentityDetermination(true);
+		cp.setClustererMethod(SubunitClustererMethod.SEQUENCE);
+		QuatSymmetryParameters symmParams = new QuatSymmetryParameters();
+		QuatSymmetryResults symmetry = QuatSymmetryDetector.calcGlobalSymmetry(
+				pdb, symmParams, cp);
+
+		// C2 symmetry, A2 stoichiometry
+		assertEquals("C2", symmetry.getSymmetry());
+		assertEquals("A2", symmetry.getStoichiometry().toString());
+	}
+
+	/**
+	 * A performance test that demonstrates how the SubunitClustererParameters.setUseEntityIdForSeqIdentityDetermination()
+	 * has a dramatic effect in runtime versus doing alignments.
+	 */
+	@Ignore("This is a performance test to be run manually")
+	@Test
+	public void testSymDetectionPerformanceLargeCapsid() throws IOException, StructureException {
+		AtomCache cache = new AtomCache();
+		cache.setUseMmtf(false);
+		cache.setUseMmCif(true);
+		FileParsingParameters params = new FileParsingParameters();
+		params.setAlignSeqRes(true);
+		params.setParseBioAssembly(true);
+		cache.setFileParsingParams(params);
+		StructureIO.setAtomCache(cache);
+
+		// making sure we remove all atoms but representative before we expand, otherwise memory requirements are huge
+		// 6Q1F is another good example
+		Structure au = StructureIO.getStructure("6NHJ");
+		StructureTools.reduceToRepresentativeAtoms(au);
+		BiologicalAssemblyBuilder builder = new BiologicalAssemblyBuilder();
+		List<BiologicalAssemblyTransformation> transforms = au.getPDBHeader().getBioAssemblies().get(1).getTransforms();
+		Structure pdb = builder.rebuildQuaternaryStructure(au, transforms, true, false);
+
+		SubunitClustererParameters cp = new SubunitClustererParameters();
+
+		// This is the parameter that makes this fast, set it to false to see the difference.
+		// As of git commit ed322e387cd46344a7864a, the difference in runtime is not that huge:
+		// 2 minutes with true, 10 minutes with false. I observed a much larger difference before, but can't reproduce anymore - JD 2020-01-23
+		cp.setUseEntityIdForSeqIdentityDetermination(true);
+
+		cp.setClustererMethod(SubunitClustererMethod.SEQUENCE);
+		QuatSymmetryParameters symmParams = new QuatSymmetryParameters();
+		QuatSymmetryResults symmetry = QuatSymmetryDetector.calcGlobalSymmetry(
+				pdb, symmParams, cp);
+
+		assertEquals("I", symmetry.getSymmetry());
+		assertEquals("A960B960C600D480E300", symmetry.getStoichiometry().toString());
+	}
 }
diff --git a/biojava-structure/src/main/java/org/biojava/nbio/structure/EntityInfo.java b/biojava-structure/src/main/java/org/biojava/nbio/structure/EntityInfo.java
@@ -306,12 +306,12 @@ public List<String> getChainIds() {
 	 * used and when all chains within the entity are numbered in the same way), but
 	 * in general they will be neither unique (because of insertion codes) nor aligned.
 	 * </p>
-	 * @param g
-	 * @param c
+	 * @param g the group
+	 * @param c the chain
 	 * @return the aligned residue index (1 to n), if no SEQRES groups are available at all then {@link ResidueNumber#getSeqNum()}
 	 * is returned as a fall-back, if the group is not found in the SEQRES groups then -1 is returned
 	 * for the given group and chain
-	 * @throws IllegalArgumentException if the given Chain is not a member of this EnityInfo
+	 * @throws IllegalArgumentException if the given Chain is not a member of this EntityInfo
 	 * @see Chain#getSeqResGroup(int)
 	 */
 	public int getAlignedResIndex(Group g, Chain c) {

diff --git a/biojava-structure/src/main/java/org/biojava/nbio/structure/StructureTools.java b/biojava-structure/src/main/java/org/biojava/nbio/structure/StructureTools.java
@@ -1264,7 +1264,7 @@ public static final Character get1LetterCode(String groupCode3) {
 	 *            3-character code for a group.
 	 *
 	 */
-	public static final boolean isNucleotide(String groupCode3) {
+	public static boolean isNucleotide(String groupCode3) {
 		String code = groupCode3.trim();
 		return nucleotides30.containsKey(code)
 				|| nucleotides23.containsKey(code);
@@ -1283,7 +1283,7 @@ public static final boolean isNucleotide(String groupCode3) {
 	 * @deprecated Use {@link StructureIdentifier#reduce(Structure)} instead (v. 4.2.0)
 	 */
 	@Deprecated
-	public static final Structure getReducedStructure(Structure s,
+	public static Structure getReducedStructure(Structure s,
 			String chainId) throws StructureException {
 		// since we deal here with structure alignments,
 		// only use Model 1...
@@ -1338,7 +1338,7 @@ public static final Structure getReducedStructure(Structure s,
 		return newS;
 	}
 
-	public static final String convertAtomsToSeq(Atom[] atoms) {
+	public static String convertAtomsToSeq(Atom[] atoms) {
 
 		StringBuilder buf = new StringBuilder();
 		Group prevGroup = null;
@@ -1374,7 +1374,7 @@ public static final String convertAtomsToSeq(Atom[] atoms) {
 	 * @throws StructureException
 	 *             if the group cannot be found.
 	 */
-	public static final Group getGroupByPDBResidueNumber(Structure struc,
+	public static Group getGroupByPDBResidueNumber(Structure struc,
 			ResidueNumber pdbResNum) throws StructureException {
 		if (struc == null || pdbResNum == null) {
 			throw new IllegalArgumentException("Null argument(s).");
@@ -1447,7 +1447,7 @@ public static AtomContactSet getAtomsInContact(Chain chain, double cutoff) {
 	 * @param chain
 	 * @param cutoff
 	 * @return
-	 * @see {@link #getRepresentativeAtomsInContact(Chain, double)}
+	 * @see #getRepresentativeAtomsInContact(Chain, double)
 	 */
 	public static AtomContactSet getAtomsCAInContact(Chain chain, double cutoff) {
 		Grid grid = new Grid(cutoff);
@@ -1921,4 +1921,24 @@ private static String replaceFirstChar(String name, char c, char d) {
 		return name;
 	}
 
+	/**
+	 * Remove all atoms but the representative atoms (C alphas or phosphates) from the given structure.
+	 * @param structure the structure
+	 * @since 5.4.0
+	 */
+	public static void reduceToRepresentativeAtoms(Structure structure) {
+		for (int modelIdx = 0; modelIdx<structure.nrModels(); modelIdx++) {
+			for (Chain c : structure.getPolyChains(modelIdx)) {
+				for (Group g : c.getAtomGroups()) {
+					List<Atom> atoms = g.getAtoms();
+					if (g.isAminoAcid()) {
+						atoms.removeIf(a->!a.getName().equals(CA_ATOM_NAME));
+					} else if (g.isNucleotide()) {
+						atoms.removeIf(a->!a.getName().equals(NUCLEOTIDE_REPRESENTATIVE));
+					}
+					// else we keep all other atoms. We are concerned only about aminoacids and nucleotides that make up the bulk of the structures
+				}
+			}
+		}
+	}
 }
diff --git a/biojava-structure/src/main/java/org/biojava/nbio/structure/cluster/SubunitCluster.java b/biojava-structure/src/main/java/org/biojava/nbio/structure/cluster/SubunitCluster.java
@@ -32,6 +32,8 @@
 import org.biojava.nbio.core.sequence.compound.AminoAcidCompound;
 import org.biojava.nbio.structure.Atom;
 import org.biojava.nbio.structure.Chain;
+import org.biojava.nbio.structure.EntityInfo;
+import org.biojava.nbio.structure.Group;
 import org.biojava.nbio.structure.Structure;
 import org.biojava.nbio.structure.StructureException;
 import org.biojava.nbio.structure.align.StructureAlignment;
@@ -190,22 +192,35 @@ public boolean isIdenticalTo(SubunitCluster other) {
 	 * @return true if the SubunitClusters are identical, false otherwise
 	 */
 	public boolean isIdenticalByEntityIdTo(SubunitCluster other) {
-		Structure thisStruct = this.subunits.get(this.representative).getStructure();
-		Structure otherStruct = other.subunits.get(other.representative).getStructure();
-		String thisName = this.subunits.get(this.representative).getName();
-		String otherName = other.subunits.get(this.representative).getName();
+		Subunit thisSub = this.subunits.get(this.representative);
+		Subunit otherSub = other.subunits.get(other.representative);
+		String thisName = thisSub.getName();
+		String otherName = otherSub.getName();
+
+		Structure thisStruct = thisSub.getStructure();
+		Structure otherStruct = otherSub.getStructure();
+		if (thisStruct == null || otherStruct == null) {
+			logger.info("SubunitClusters {}-{} have no referenced structures. Ignoring identity check by entity id",
+					thisName,
+					otherName);
+			return false;
+		}
+		if (thisStruct != otherStruct) {
+			// different object references: will not cluster even if entity id is same
+			return false;
+		}
 		Chain thisChain = thisStruct.getChain(thisName);
 		Chain otherChain = otherStruct.getChain(otherName);
 		if (thisChain == null || otherChain == null) {
 			logger.info("Can't determine entity ids of SubunitClusters {}-{}. Ignoring identity check by entity id",
-					this.subunits.get(this.representative).getName(),
-					other.subunits.get(other.representative).getName());
+					thisName,
+					otherName);
 			return false;
 		}
 		if (thisChain.getEntityInfo() == null || otherChain.getEntityInfo() == null) {
 			logger.info("Can't determine entity ids of SubunitClusters {}-{}. Ignoring identity check by entity id",
-					this.subunits.get(this.representative).getName(),
-					other.subunits.get(other.representative).getName());
+					thisName,
+					otherName);
 			return false;
 		}
 		int thisEntityId = thisChain.getEntityInfo().getMolId();
@@ -241,7 +256,7 @@ public boolean mergeIdentical(SubunitCluster other) {
 	 * same Subunit. This is checked by comparing the entity identifiers of the subunits
 	 * if one can be found.
 	 * Thus this only makes sense when the subunits are complete chains of a
-	 * deposited PDB entry. I
+	 * deposited PDB entry.
 	 *
 	 * @param other
 	 *            SubunitCluster
@@ -252,12 +267,59 @@ public boolean mergeIdenticalByEntityId(SubunitCluster other) {
 		if (!isIdenticalByEntityIdTo(other))
 			return false;
 
+		Subunit thisSub = this.subunits.get(this.representative);
+		Subunit otherSub = other.subunits.get(other.representative);
+		String thisName = thisSub.getName();
+		String otherName = otherSub.getName();
+
 		logger.info("SubunitClusters {}-{} belong to same entity. Assuming they are identical",
-				this.subunits.get(this.representative).getName(),
-				other.subunits.get(other.representative).getName());
+				thisName,
+				otherName);
 
-		this.subunits.addAll(other.subunits);
-		this.subunitEQR.addAll(other.subunitEQR);
+		List<Integer> thisAligned = new ArrayList<>();
+		List<Integer> otherAligned = new ArrayList<>();
+
+		// we've merged by entity id, we can assume structure, chain and entity are available (checked in isIdenticalByEntityIdTo())
+		Structure thisStruct = thisSub.getStructure();
+		Structure otherStruct = otherSub.getStructure();
+		Chain thisChain = thisStruct.getChain(thisName);
+		Chain otherChain = otherStruct.getChain(otherName);
+		EntityInfo entityInfo = thisChain.getEntityInfo();
+
+		// Extract the aligned residues of both Subunits
+		for (int thisIndex=0; thisIndex < thisSub.size(); thisIndex++) {
+
+			Group g = thisSub.getRepresentativeAtoms()[thisIndex].getGroup();
+
+			int seqresIndex = entityInfo.getAlignedResIndex(g, thisChain);
+
+			if (seqresIndex == -1) {
+				// this might mean that FileParsingParameters.setAlignSeqRes() wasn't set to true during parsing
+				continue;
+			}
+
+			// note the seqresindex is 1-based
+			Group otherG = otherChain.getSeqResGroups().get(seqresIndex - 1);
+
+			int otherIndex = otherChain.getAtomGroups().indexOf(otherG);
+			if (otherIndex == -1) {
+				// skip residues that are unobserved in other sequence ("gaps" in the entity SEQRES alignment)
+				continue;
+			}
+
+			// Only consider residues that are part of the SubunitCluster
+			if (this.subunitEQR.get(this.representative).contains(thisIndex)
+					&& other.subunitEQR.get(other.representative).contains(otherIndex)) {
+				thisAligned.add(thisIndex);
+				otherAligned.add(otherIndex);
+			}
+		}
+
+		if (thisAligned.size() == 0 && otherAligned.size() == 0) {
+			logger.warn("No equivalent aligned atoms found between SubunitClusters {}-{} via entity SEQRES alignment. Is FileParsingParameters.setAlignSeqRes() set?", thisName, otherName);
+		}
+
+		updateEquivResidues(other, thisAligned, otherAligned);
 
 		return true;
 	}
@@ -690,7 +752,7 @@ public SubunitClustererMethod getClustererMethod() {
 	 */
 	public List<Atom[]> getAlignedAtomsSubunits() {
 
-		List<Atom[]> alignedAtoms = Collections.emptyList();
+		List<Atom[]> alignedAtoms = new ArrayList<>();
 
 		// Loop through all subunits and add the aligned positions
 		for (int s = 0; s < subunits.size(); s++)

diff --git a/...tructure/src/main/java/org/biojava/nbio/structure/cluster/SubunitClustererParameters.java b/...tructure/src/main/java/org/biojava/nbio/structure/cluster/SubunitClustererParameters.java
@@ -511,7 +511,8 @@ public boolean isHighConfidenceScores(double sequenceIdentity, double sequenceCo
 	/**
 	 * Whether to use the entity id of subunits to infer that sequences are identical.
 	 * Only applies if the {@link SubunitClustererMethod} is a sequence based one.
-	 * @return
+	 * @return the flag
+	 * @since 5.4.0
 	 */
 	public boolean isUseEntityIdForSeqIdentityDetermination() {
 		return useEntityIdForSeqIdentityDetermination;
@@ -520,7 +521,10 @@ public boolean isUseEntityIdForSeqIdentityDetermination() {
 	/**
 	 * Whether to use the entity id of subunits to infer that sequences are identical.
 	 * Only applies if the {@link SubunitClustererMethod} is a sequence based one.
+	 * Note this requires {@link org.biojava.nbio.structure.io.FileParsingParameters#setAlignSeqRes(boolean)} to be
+	 * set to true.
 	 * @param useEntityIdForSeqIdentityDetermination the flag to be set
+	 * @since 5.4.0
 	 */
 	public void setUseEntityIdForSeqIdentityDetermination(boolean useEntityIdForSeqIdentityDetermination) {
 		this.useEntityIdForSeqIdentityDetermination = useEntityIdForSeqIdentityDetermination;

diff --git a/...va-structure/src/main/java/org/biojava/nbio/structure/symmetry/core/C2RotationSolver.java b/...va-structure/src/main/java/org/biojava/nbio/structure/symmetry/core/C2RotationSolver.java
@@ -39,8 +39,8 @@
  * @author Peter
  */
 public class C2RotationSolver implements QuatSymmetrySolver {
-	private QuatSymmetrySubunits subunits = null;
-	private QuatSymmetryParameters parameters = null;
+	private QuatSymmetrySubunits subunits;
+	private QuatSymmetryParameters parameters;
 	private Vector3d centroid = new Vector3d();
 	private Matrix4d centroidInverse = new Matrix4d();
 
@@ -132,7 +132,7 @@ private void solve() {
 	}
 
 	private void addEOperation() {
-		List<Integer> permutation = Arrays.asList(new Integer[]{0,1});
+		List<Integer> permutation = Arrays.asList(0,1);
 		Matrix4d transformation = new Matrix4d();
 		transformation.setIdentity();
 		combineWithTranslation(transformation);
@@ -145,7 +145,6 @@ private void addEOperation() {
 
 	/**
 	 * Adds translational component to rotation matrix
-	 * @param rotTrans
 	 * @param rotation
 	 * @return
 	 */