biojava · josemduarte · Apr 18, 2020 · Jan 25, 2020 · Jan 25, 2020 · Jan 25, 2020
diff --git a/...rationtest/src/test/java/org/biojava/nbio/structure/test/contact/TestInterfaceFinder.java b/...rationtest/src/test/java/org/biojava/nbio/structure/test/contact/TestInterfaceFinder.java
@@ -0,0 +1,47 @@
+package org.biojava.nbio.structure.test.contact;
+
+import org.biojava.nbio.structure.Structure;
+import org.biojava.nbio.structure.StructureException;
+import org.biojava.nbio.structure.StructureIO;
+import org.biojava.nbio.structure.contact.AtomContactSet;
+import org.biojava.nbio.structure.contact.InterfaceFinder;
+import org.biojava.nbio.structure.contact.Pair;
+import org.biojava.nbio.structure.contact.StructureInterface;
+import org.biojava.nbio.structure.contact.StructureInterfaceList;
+import org.junit.Test;
+
+import java.io.IOException;
+import java.util.HashSet;
+import java.util.Set;
+
+import static org.junit.Assert.assertEquals;
+
+public class TestInterfaceFinder {
+
+    @Test
+    public void testGetAllInterfaces() throws StructureException, IOException {
+        Structure s = StructureIO.getStructure("3hbx");
+
+        long start = System.currentTimeMillis();
+
+        InterfaceFinder finder = new InterfaceFinder(s);
+        StructureInterfaceList list = finder.getAllInterfaces();
+
+        long end = System.currentTimeMillis();
+        System.out.println("Took " + (end-start) + " ms to calculate interfaces");
+
+        assertEquals(12, list.size());
+
+        Set<Pair<String>> unique = new HashSet<>();
+
+        for (StructureInterface interf : list) {
+            System.out.println("Interface " + interf.getMoleculeIds());
+            AtomContactSet set = interf.getContacts();
+            System.out.println("Number of contacts: " + set.size());
+
+            unique.add(interf.getMoleculeIds());
+
+        }
+        assertEquals(12, unique.size());
+    }
+}
diff --git a/biojava-structure/src/main/java/org/biojava/nbio/structure/StructureTools.java b/biojava-structure/src/main/java/org/biojava/nbio/structure/StructureTools.java
@@ -474,7 +474,7 @@ public static List<Group> getUnalignedGroups(Atom[] ca) {
 	 * @see StructureTools#DEFAULT_LIGAND_PROXIMITY_CUTOFF
 	 */
 	public static List<Group> getLigandsByProximity(Collection<Group> target, Atom[] query, double cutoff) {
-		// Geometric hashing of the reduced structure
+		// Spatial hashing of the reduced structure
 		Grid grid = new Grid(cutoff);
 		grid.addAtoms(query);
 
@@ -1387,7 +1387,7 @@ public static Group getGroupByPDBResidueNumber(Structure struc,
 
 	/**
 	 * Returns the set of intra-chain contacts for the given chain for given
-	 * atom names, i.e. the contact map. Uses a geometric hashing algorithm that
+	 * atom names, i.e. the contact map. Uses a spatial hashing algorithm that
 	 * speeds up the calculation without need of full distance matrix. The
 	 * parsing mode {@link FileParsingParameters#setAlignSeqRes(boolean)} needs
 	 * to be set to true for this to work.
@@ -1422,7 +1422,7 @@ public static AtomContactSet getAtomsInContact(Chain chain,
 
 	/**
 	 * Returns the set of intra-chain contacts for the given chain for all non-H
-	 * atoms of non-hetatoms, i.e. the contact map. Uses a geometric hashing
+	 * atoms of non-hetatoms, i.e. the contact map. Uses a spatial hashing
 	 * algorithm that speeds up the calculation without need of full distance
 	 * matrix. The parsing mode
 	 * {@link FileParsingParameters#setAlignSeqRes(boolean)} needs to be set to
@@ -1439,7 +1439,7 @@ public static AtomContactSet getAtomsInContact(Chain chain, double cutoff) {
 	/**
 	 * Returns the set of intra-chain contacts for the given chain for C-alpha
 	 * atoms (including non-standard aminoacids appearing as HETATM groups),
-	 * i.e. the contact map. Uses a geometric hashing algorithm that speeds up
+	 * i.e. the contact map. Uses a spatial hashing algorithm that speeds up
 	 * the calculation without need of full distance matrix. The parsing mode
 	 * {@link FileParsingParameters#setAlignSeqRes(boolean)} needs to be set to
 	 * true for this to work.
@@ -1462,7 +1462,7 @@ public static AtomContactSet getAtomsCAInContact(Chain chain, double cutoff) {
 	/**
 	 * Returns the set of intra-chain contacts for the given chain for C-alpha
 	 * or C3' atoms (including non-standard aminoacids appearing as HETATM
-	 * groups), i.e. the contact map. Uses a geometric hashing algorithm that
+	 * groups), i.e. the contact map. Uses a spatial hashing algorithm that
 	 * speeds up the calculation without need of full distance matrix.
 	 *
 	 * @param chain
@@ -1483,7 +1483,7 @@ public static AtomContactSet getRepresentativeAtomsInContact(Chain chain,
 
 	/**
 	 * Returns the set of inter-chain contacts between the two given chains for
-	 * the given atom names. Uses a geometric hashing algorithm that speeds up
+	 * the given atom names. Uses a spatial hashing algorithm that speeds up
 	 * the calculation without need of full distance matrix. The parsing mode
 	 * {@link FileParsingParameters#setAlignSeqRes(boolean)} needs to be set to
 	 * true for this to work.
@@ -1518,7 +1518,7 @@ public static AtomContactSet getAtomsInContact(Chain chain1, Chain chain2,
 
 	/**
 	 * Returns the set of inter-chain contacts between the two given chains for
-	 * all non-H atoms. Uses a geometric hashing algorithm that speeds up the
+	 * all non-H atoms. Uses a spatial hashing algorithm that speeds up the
 	 * calculation without need of full distance matrix. The parsing mode
 	 * {@link FileParsingParameters#setAlignSeqRes(boolean)} needs to be set to
 	 * true for this to work.

diff --git a/biojava-structure/src/main/java/org/biojava/nbio/structure/contact/InterfaceFinder.java b/biojava-structure/src/main/java/org/biojava/nbio/structure/contact/InterfaceFinder.java
@@ -0,0 +1,95 @@
+package org.biojava.nbio.structure.contact;
+
+import org.biojava.nbio.structure.Atom;
+import org.biojava.nbio.structure.Calc;
+import org.biojava.nbio.structure.Chain;
+import org.biojava.nbio.structure.Structure;
+import org.biojava.nbio.structure.StructureTools;
+import org.biojava.nbio.structure.xtal.CrystalTransform;
+import org.biojava.nbio.structure.xtal.SpaceGroup;
+
+import javax.vecmath.Point3d;
+import java.util.List;
+
+/**
+ * A class containing methods to find interfaces in a given structure.
+ * @author Jose Duarte
+ * @since 5.4.0
+ */
+public class InterfaceFinder {
+
+    public static final double DEFAULT_CONTACT_CUTOFF = 6;
+
+    private static final CrystalTransform IDENTITY_TRANSFORM = new CrystalTransform((SpaceGroup) null);
+    private static final boolean INCLUDE_HETATOMS = true;
+
+    private Structure structure;
+    private double cutoff;
+
+    private BoundingBox[] boundingBoxes;
+
+    public InterfaceFinder(Structure structure) {
+        this.structure = structure;
+        this.cutoff = DEFAULT_CONTACT_CUTOFF;
+    }
+
+    /**
+     * Set the contact distance cutoff.
+     * @param cutoff the distance value in Angstroms
+     */
+    public void setCutoff(double cutoff) {
+        this.cutoff = cutoff;
+    }
+
+    /**
+     * Find all inter polymer-chain interfaces in the structure.
+     * Two chains will be considered in contact if at least a pair of atoms (one from each chain) is within the
+     * contact cutoff.
+     * @return the list of all interfaces
+     */
+    public StructureInterfaceList getAllInterfaces() {
+        initBoundingBoxes();
+
+        StructureInterfaceList list = new StructureInterfaceList();
+
+        List<Chain> polyChains = structure.getPolyChains();
+        for (int i = 0; i<polyChains.size(); i++) {
+            for (int j = i + 1; j<polyChains.size(); j++) {
+                if (! boundingBoxes[i].overlaps(boundingBoxes[j], cutoff)) {
+                    continue;
+                }
+                StructureInterface interf = calcInterface(polyChains.get(i), polyChains.get(j));
+                if (interf!=null) {
+                    list.add(interf);
+                }
+            }
+        }
+        return list;
+    }
+
+    private void initBoundingBoxes() {
+        List<Chain> polyChains = structure.getPolyChains();
+        boundingBoxes = new BoundingBox[polyChains.size()];
+        for (int i = 0; i<polyChains.size(); i++) {
+            Atom[] atoms = StructureTools.getAllNonHAtomArray(polyChains.get(i), INCLUDE_HETATOMS);
+            Point3d[] points = Calc.atomsToPoints(atoms);
+            BoundingBox bb = new BoundingBox(points);
+            boundingBoxes[i] = bb;
+        }
+    }
+
+    private StructureInterface calcInterface(Chain chain1, Chain chain2) {
+        AtomContactSet graph = StructureTools.getAtomsInContact(chain1, chain2, cutoff, INCLUDE_HETATOMS);
+
+        StructureInterface interf = null;
+        if (graph.size()>0) {
+            interf = new StructureInterface(
+                    StructureTools.getAllNonHAtomArray(chain1, INCLUDE_HETATOMS), StructureTools.getAllNonHAtomArray(chain2, INCLUDE_HETATOMS),
+                    chain1.getName(), chain2.getName(),
+                    graph,
+                    IDENTITY_TRANSFORM, IDENTITY_TRANSFORM);
+        }
+
+        return interf;
+    }
+}
diff --git a/biojava-structure/src/main/java/org/biojava/nbio/structure/contact/StructureInterface.java b/biojava-structure/src/main/java/org/biojava/nbio/structure/contact/StructureInterface.java
@@ -304,7 +304,7 @@ protected Atom[] getAtomsForAsa(int cofactorSizeToUse) {
 	 * non-Hydrogen atoms are not included
 	 * @return
 	 */
-	private static final Atom[] getAllNonHAtomArray(Atom[] m, int minSizeHetAtomToInclude) {
+	private static Atom[] getAllNonHAtomArray(Atom[] m, int minSizeHetAtomToInclude) {
 		List<Atom> atoms = new ArrayList<>();
 
 		for (Atom a:m){
@@ -348,7 +348,7 @@ private static boolean isInChain(Group g) {
 		ChemComp chemComp = g.getChemComp();
 
 		if (chemComp==null) {
-			logger.warn("Warning: can't determine PolymerType for group "+g.getResidueNumber()+" ("+g.getPDBName()+"). Will consider it as non-nucleotide/non-protein type.");
+			logger.warn("Can't determine PolymerType for group "+g.getResidueNumber()+" ("+g.getPDBName()+"). Will consider it as non-nucleotide/non-protein type.");
 			return false;
 		}
 
@@ -458,8 +458,8 @@ public GroupAsa getSecondGroupAsa(ResidueNumber resNum) {
 	 */
 	public Pair<List<Group>> getCoreResidues(double bsaToAsaCutoff, double minAsaForSurface) {
 
-		List<Group> core1 = new ArrayList<Group>();
-		List<Group> core2 = new ArrayList<Group>();
+		List<Group> core1 = new ArrayList<>();
+		List<Group> core2 = new ArrayList<>();
 
 		for (GroupAsa groupAsa:groupAsas1.values()) {
 
@@ -482,7 +482,7 @@ public Pair<List<Group>> getCoreResidues(double bsaToAsaCutoff, double minAsaFor
 			}
 		}
 
-		return new Pair<List<Group>>(core1, core2);
+		return new Pair<>(core1, core2);
 	}
 
 	/**
@@ -494,8 +494,8 @@ public Pair<List<Group>> getCoreResidues(double bsaToAsaCutoff, double minAsaFor
 	 */
 	public Pair<List<Group>> getRimResidues(double bsaToAsaCutoff, double minAsaForSurface) {
 
-		List<Group> rim1 = new ArrayList<Group>();
-		List<Group> rim2 = new ArrayList<Group>();
+		List<Group> rim1 = new ArrayList<>();
+		List<Group> rim2 = new ArrayList<>();
 
 		for (GroupAsa groupAsa:groupAsas1.values()) {
 
@@ -529,8 +529,8 @@ public Pair<List<Group>> getRimResidues(double bsaToAsaCutoff, double minAsaForS
 	 */
 	public Pair<List<Group>> getInterfacingResidues(double minAsaForSurface) {
 
-		List<Group> interf1 = new ArrayList<Group>();
-		List<Group> interf2 = new ArrayList<Group>();
+		List<Group> interf1 = new ArrayList<>();
+		List<Group> interf2 = new ArrayList<>();
 
 		for (GroupAsa groupAsa:groupAsas1.values()) {
 
@@ -545,7 +545,7 @@ public Pair<List<Group>> getInterfacingResidues(double minAsaForSurface) {
 			}
 		}
 
-		return new Pair<List<Group>>(interf1, interf2);
+		return new Pair<>(interf1, interf2);
 	}
 
 	/**
@@ -554,8 +554,8 @@ public Pair<List<Group>> getInterfacingResidues(double minAsaForSurface) {
 	 * @return
 	 */
 	public Pair<List<Group>> getSurfaceResidues(double minAsaForSurface) {
-		List<Group> surf1 = new ArrayList<Group>();
-		List<Group> surf2 = new ArrayList<Group>();
+		List<Group> surf1 = new ArrayList<>();
+		List<Group> surf2 = new ArrayList<>();
 
 		for (GroupAsa groupAsa:groupAsas1.values()) {
 
@@ -570,7 +570,7 @@ public Pair<List<Group>> getSurfaceResidues(double minAsaForSurface) {
 			}
 		}
 
-		return new Pair<List<Group>>(surf1, surf2);
+		return new Pair<>(surf1, surf2);
 	}
 
 	public StructureInterfaceCluster getCluster() {
@@ -585,12 +585,12 @@ public void setCluster(StructureInterfaceCluster cluster) {
 	 * Calculates the contact overlap score between this StructureInterface and
 	 * the given one.
 	 * The two sides of the given StructureInterface need to match this StructureInterface
-	 * in the sense that they must come from the same Compound (Entity), i.e.
+	 * in the sense that they must come from the same Entity, i.e.
 	 * their residue numbers need to align with 100% identity, except for unobserved
 	 * density residues. The SEQRES indices obtained through {@link EntityInfo#getAlignedResIndex(Group, Chain)} are
 	 * used to match residues, thus if no SEQRES is present or if {@link FileParsingParameters#setAlignSeqRes(boolean)}
 	 * is not used, this calculation is not guaranteed to work properly.
-	 * @param other
+	 * @param other the interface to be compared to this one
 	 * @param invert if false the comparison will be done first-to-first and second-to-second,
 	 * if true the match will be first-to-second and second-to-first
 	 * @return the contact overlap score, range [0.0,1.0]
@@ -668,7 +668,7 @@ public GroupContactSet getGroupContacts() {
 
 	/**
 	 * Tell whether the interface is isologous, i.e. it is formed
-	 * by the same patches of same Compound on both sides.
+	 * by the same patches of same entity on both sides.
 	 *
 	 * @return true if isologous, false if heterologous
 	 */
@@ -691,11 +691,11 @@ public Pair<Chain> getParentChains() {
 			return null;
 		}
 
-		return new Pair<Chain>(firstMol[0].getGroup().getChain(), secondMol[0].getGroup().getChain());
+		return new Pair<>(firstMol[0].getGroup().getChain(), secondMol[0].getGroup().getChain());
 	}
 
 	/**
-	 * Finds the parent compounds by looking up the references of first atom of each side of this interface
+	 * Finds the parent entities by looking up the references of first atom of each side of this interface
 	 * @return
 	 */
 	public Pair<EntityInfo> getParentCompounds() {
@@ -720,7 +720,7 @@ private Structure getParentStructure() {
 	 * Return a String representing the 2 molecules of this interface in PDB format.
 	 * If the molecule ids (i.e. chain ids) are the same for both molecules, then the second
 	 * one will be replaced by the next letter in alphabet (or A for Z)
-	 * @return
+	 * @return the PDB-formatted string
 	 */
 	public String toPDB() {
 
@@ -758,7 +758,7 @@ public String toPDB() {
 	 * Return a String representing the 2 molecules of this interface in mmCIF format.
 	 * If the molecule ids (i.e. chain ids) are the same for both molecules, then the second
 	 * one will be written as chainId_operatorId (with operatorId taken from {@link #getTransforms()}
-	 * @return
+	 * @return the mmCIF-formatted string
 	 */
 	public String toMMCIF() {
 		StringBuilder sb = new StringBuilder();