Skip to content

Commit 996c149

Browse files
author
Dmytro Guzenko
committed
Small refactoring for more flexible symmetry detection.
1 parent 4c5f7f9 commit 996c149

File tree

26 files changed

+526
-321
lines changed

26 files changed

+526
-321
lines changed

biojava-core/src/main/java/org/biojava/nbio/core/alignment/SimpleSequencePair.java

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -208,11 +208,13 @@ public AlignedSequence<S, C> getTarget() {
208208
}
209209

210210
@Override
211-
public double getPercentageOfIdentity() {
211+
public double getPercentageOfIdentity(boolean countGaps) {
212212
double seqid = getNumIdenticals();
213-
double length = getLength()
214-
- getAlignedSequence(1).getNumGapPositions()
215-
- getAlignedSequence(2).getNumGapPositions();
213+
double length = getLength();
214+
if (!countGaps) {
215+
length = length - getAlignedSequence(1).getNumGapPositions()
216+
- getAlignedSequence(2).getNumGapPositions();
217+
}
216218
return seqid / length;
217219
}
218220

biojava-core/src/main/java/org/biojava/nbio/core/alignment/template/SequencePair.java

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -99,12 +99,13 @@ public interface SequencePair<S extends Sequence<C>, C extends Compound> extends
9999

100100
/**
101101
* Returns the percentage of identity between the two sequences in the alignment as a fraction between 0 and 1.
102-
* This is equivalent to {@link #getNumIdenticals()} / {@link #getLength()}. Gap positions are exluded
103-
* from the calculation.
102+
* This is equivalent to {@link #getNumIdenticals()} / {@link #getLength()}.
103+
* @param countGaps
104+
* If true, gap positions are included into the calculation.
104105
*
105106
* @return the percentage of sequence identity as a fraction in [0,1]
106107
*/
107-
double getPercentageOfIdentity();
108+
double getPercentageOfIdentity(boolean countGaps);
108109

109110
/**
110111
* Returns the number of indices for which both the query and target sequences have a similar {@link Compound}.

biojava-core/src/test/java/org/biojava/nbio/core/alignment/SimpleSequencePairTest.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -250,8 +250,8 @@ public void testGetNumIdenticals() {
250250

251251
@Test
252252
public void testGetPercentageOfIdentity() {
253-
assertEquals(global.getPercentageOfIdentity(), 1.0, 0.01);
254-
assertEquals(local.getPercentageOfIdentity(), 1.0, 0.01);
253+
assertEquals(global.getPercentageOfIdentity(false), 1.0, 0.01);
254+
assertEquals(local.getPercentageOfIdentity(false), 1.0, 0.01);
255255
}
256256

257257
@Test

biojava-integrationtest/src/test/java/org/biojava/nbio/structure/test/cluster/TestSubunitClustererExamples.java

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -79,12 +79,12 @@ public void testPseudostoichiometry() throws StructureException,
7979
assertEquals(clusters.get(0).length(), 141);
8080
assertEquals(clusters.get(1).length(), 146);
8181
assertEquals(clusters.get(0).getClustererMethod(),
82-
SubunitClustererMethod.IDENTITY);
82+
SubunitClustererMethod.SEQUENCE);
8383
assertEquals(clusters.get(1).getClustererMethod(),
84-
SubunitClustererMethod.IDENTITY);
84+
SubunitClustererMethod.SEQUENCE);
8585

8686
params.setClustererMethod(SubunitClustererMethod.STRUCTURE);
87-
params.setRmsdThreshold(3.0);
87+
params.setRMSDThreshold(3.0);
8888

8989
clusters = SubunitClusterer.cluster(s, params);
9090

@@ -105,7 +105,7 @@ public void testInternalSymmetry() throws StructureException, IOException {
105105

106106
SubunitClustererParameters params = new SubunitClustererParameters();
107107
params.setClustererMethod(SubunitClustererMethod.SEQUENCE);
108-
params.setCoverageThreshold(0.8);
108+
params.setSequenceCoverageThreshold(0.8);
109109

110110
List<SubunitCluster> clusters = SubunitClusterer.cluster(s, params);
111111

@@ -114,11 +114,12 @@ public void testInternalSymmetry() throws StructureException, IOException {
114114
assertEquals(clusters.get(0).size(), 3);
115115
assertEquals(clusters.get(0).length(), 351);
116116
assertEquals(clusters.get(0).getClustererMethod(),
117-
SubunitClustererMethod.IDENTITY);
117+
SubunitClustererMethod.SEQUENCE);
118118

119119
params.setClustererMethod(SubunitClustererMethod.STRUCTURE);
120+
params.setStructureCoverageThreshold(0.8);
120121
params.setInternalSymmetry(true);
121-
params.setRmsdThreshold(3.0);
122+
params.setRMSDThreshold(3.0);
122123

123124
clusters = SubunitClusterer.cluster(s, params);
124125

biojava-integrationtest/src/test/java/org/biojava/nbio/structure/test/symmetry/TestQuatSymmetryDetectorExamples.java

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -179,7 +179,7 @@ public void testInternalSymmetry() throws IOException, StructureException {
179179
SubunitClustererParameters cp = new SubunitClustererParameters();
180180
cp.setClustererMethod(SubunitClustererMethod.STRUCTURE);
181181
cp.setInternalSymmetry(true);
182-
cp.setCoverageThreshold(0.75); // Lower coverage for internal symm
182+
cp.setStructureCoverageThreshold(0.75); // Lower coverage for internal symm
183183

184184
QuatSymmetryParameters symmParams = new QuatSymmetryParameters();
185185
QuatSymmetryResults symmetry = QuatSymmetryDetector.calcGlobalSymmetry(
@@ -245,7 +245,9 @@ public void testPseudoIdentity95() throws IOException, StructureException {
245245
Structure pdb = StructureIO.getStructure("BIO:4DZ8:1");
246246

247247
SubunitClustererParameters cp = new SubunitClustererParameters();
248-
cp.setClustererMethod(SubunitClustererMethod.IDENTITY);
248+
cp.setSequenceIdentityThreshold(0.95);
249+
cp.setSequenceCoverageThreshold(0.95);
250+
cp.setClustererMethod(SubunitClustererMethod.SEQUENCE);
249251
QuatSymmetryParameters symmParams = new QuatSymmetryParameters();
250252

251253
QuatSymmetryResults symmetry = QuatSymmetryDetector.calcGlobalSymmetry(
@@ -254,7 +256,7 @@ public void testPseudoIdentity95() throws IOException, StructureException {
254256
assertEquals("C2", symmetry.getSymmetry());
255257
assertEquals("A2", symmetry.getStoichiometry());
256258
assertFalse(symmetry.isPseudoStoichiometric());
257-
assertEquals(SubunitClustererMethod.IDENTITY, symmetry.getSubunitClusters().get(0).getClustererMethod());
259+
assertEquals(SubunitClustererMethod.SEQUENCE, symmetry.getSubunitClusters().get(0).getClustererMethod());
258260

259261
}
260262
}

biojava-integrationtest/src/test/resources/ce_1fdo.A_2iv2.X.out

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
<AFPChain name1="1FDO.A" name2="2IV2.X" method="jCE" version="1.2" alnLength="720" blockNum="1" gapLen="28" optLength="692" totalLenIni="697" alignScore="1551.34" chainRmsd="0.91" identity="0.9957" normAlignScore="0.00" probability="8.40e+00" similarity="0.9957" similarity1="97" similarity2="99" totalRmsdIni="1.36" totalRmsdOpt="0.91" ca1Length="715" ca2Length="697" afpNum="4" alignScoreUpdate="0.00" time="-1">
1+
<AFPChain name1="1FDO.A" name2="2IV2.X" method="jCE" version="1.2" alnLength="720" blockNum="1" gapLen="28" optLength="692" totalLenIni="697" alignScore="1551.34" chainRmsd="0.91" identity="0.9957" normAlignScore="0.00" probability="8.40e+00" similarity="0.9957" similarity1="97" similarity2="99" totalRmsdIni="1.36" totalRmsdOpt="0.91" ca1Length="715" ca2Length="697" afpNum="4" alignScoreUpdate="0.00" time="-1" tmScore="0.96">
22
<block blockNr="0" blockSize="0" blockScore="0.00" blockRmsd="0.91" blockGap="0">
33
<eqr eqrNr="0" pdbres1="1" chain1="A" pdbres2="1" chain2="X" />
44
<eqr eqrNr="1" pdbres1="2" chain1="A" pdbres2="2" chain2="X" />

biojava-structure-gui/src/main/java/demo/DemoQuatSymmetryJmol.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@ public static void main(String[] args) throws IOException,
7272
SubunitClustererParameters cp = new SubunitClustererParameters();
7373
cp.setClustererMethod(SubunitClustererMethod.SEQUENCE); // normal
7474
// cp.setClustererMethod(SubunitClustererMethod.STRUCTURE); // pseudo
75-
cp.setCoverageThreshold(0.9);
75+
cp.setSequenceCoverageThreshold(0.9);
7676

7777
// Calculate and display the global symmetry
7878
QuatSymmetryResults globalSymmetry = QuatSymmetryDetector

biojava-structure/src/main/java/demo/DemoSymmetry.java

Lines changed: 15 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
import org.biojava.nbio.structure.StructureIO;
88
import org.biojava.nbio.structure.align.multiple.MultipleAlignment;
99
import org.biojava.nbio.structure.cluster.SubunitCluster;
10+
import org.biojava.nbio.structure.cluster.SubunitClustererMethod;
1011
import org.biojava.nbio.structure.cluster.SubunitClustererParameters;
1112
import org.biojava.nbio.structure.symmetry.core.QuatSymmetryDetector;
1213
import org.biojava.nbio.structure.symmetry.core.QuatSymmetryParameters;
@@ -39,7 +40,8 @@ private static void findQuatSym(Structure bioAssembly) throws StructureException
3940
System.out.println("GLOBAL SYMMETRY, NO CLUSTERING");
4041
SubunitClustererParameters clusterParams = new SubunitClustererParameters();
4142
clusterParams.setSequenceIdentityThreshold(0.95);
42-
clusterParams.setRmsdThreshold(0.0);
43+
clusterParams.setRMSDThreshold(0.0);
44+
clusterParams.setClustererMethod(SubunitClustererMethod.SEQUENCE);
4345

4446
QuatSymmetryResults globalResults = QuatSymmetryDetector.calcGlobalSymmetry(bioAssembly, symmParams, clusterParams);
4547

@@ -61,32 +63,33 @@ private static void findQuatSym(Structure bioAssembly) throws StructureException
6163
System.out.println("\nGLOBAL SYMMETRY, WITH CLUSTERING (PSEUDO-SYMMETRY)");
6264
clusterParams = new SubunitClustererParameters();
6365
// we can either set sequence identity to 40% or rmsd to 2, both would have the same effect of clustering the alpha/beta subunits together
64-
clusterParams.setSequenceIdentityThreshold(0.40);
65-
clusterParams.setRmsdThreshold(0.0);
66+
clusterParams.setSequenceIdentityThreshold(0.4);
67+
clusterParams.setClustererMethod(SubunitClustererMethod.STRUCTURE);
68+
clusterParams.setRMSDThreshold(3.0);
69+
70+
globalResults = QuatSymmetryDetector.calcGlobalSymmetry(bioAssembly, symmParams, clusterParams);
71+
72+
6673

67-
globalResults = QuatSymmetryDetector.calcGlobalSymmetry(bioAssembly, symmParams, clusterParams);
68-
69-
70-
7174
System.out.println(globalResults.getSymmetry() + (globalResults.isPseudoStoichiometric()?"(pseudo)":""));
72-
75+
7376
System.out.println("There are "+globalResults.getSubunitClusters().size()+" subunit clusters");
7477
i = 1;
7578
for (SubunitCluster suc : globalResults.getSubunitClusters()) {
7679
//System.out.println(suc.getClustererMethod());
7780
MultipleAlignment ma = suc.getMultipleAlignment();
78-
81+
7982
System.out.printf("Cluster %d (clustered by %s), RMSD = %4.2f\n", i, suc.getClustererMethod(), ma.getScore("RMSD"));
80-
83+
8184
i++;
8285
}
8386

84-
87+
8588
System.out.println("\n\nLOCAL SYMMETRIES");
8689
List<QuatSymmetryResults> localResults = QuatSymmetryDetector.calcLocalSymmetries(bioAssembly, symmParams, clusterParams);
8790

8891
System.out.println("Number of local symmetries: "+localResults.size());
89-
92+
9093
for (QuatSymmetryResults results : localResults) {
9194
System.out.println(results.getSymmetry()+" "+results.getStoichiometry());
9295
}

biojava-structure/src/main/java/org/biojava/nbio/structure/Calc.java

Lines changed: 21 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -774,10 +774,7 @@ public static final Atom getCentroid(Atom[] atomSet) {
774774
if (atomSet.length==0)
775775
throw new IllegalArgumentException("Atom array has length 0, can't calculate centroid!");
776776

777-
// if we don't catch this case, the centroid returned is (NaN,NaN,NaN), which can cause lots of problems down the line
778-
if (atomSet.length==0)
779-
throw new IllegalArgumentException("Atom array has length 0, can't calculate centroid!");
780-
777+
781778
double[] coords = new double[3];
782779

783780
coords[0] = 0;
@@ -805,7 +802,7 @@ public static final Atom getCentroid(Atom[] atomSet) {
805802
* Returns the center of mass of the set of atoms. Atomic masses of the
806803
* Atoms are used.
807804
*
808-
* @param atomSet
805+
* @param points
809806
* a set of Atoms
810807
* @return an Atom representing the center of mass
811808
*/
@@ -1264,9 +1261,9 @@ public static double rmsd(Atom[] x, Atom[] y) {
12641261
/**
12651262
* Calculate the TM-Score for the superposition.
12661263
*
1267-
* <em>Normalizes by the <strong>maximum</strong>-length structure (that is, {@code max\{len1,len2\}}) rather than the minimum.</em>
1264+
* <em>Normalizes by the <strong>minimum</strong>-length structure (that is, {@code min\{len1,len2\}}).</em>
12681265
*
1269-
* Atom sets must be superposed.
1266+
* Atom sets must be pre-rotated.
12701267
*
12711268
* <p>
12721269
* Citation:<br/>
@@ -1284,44 +1281,15 @@ public static double rmsd(Atom[] x, Atom[] y) {
12841281
* The full length of the protein supplying atomSet2
12851282
* @return The TM-Score
12861283
* @throws StructureException
1287-
* @see {@link #getTMScore(Atom[], Atom[], int, int)}, which normalizes by
1288-
* the minimum length
12891284
*/
1290-
public static double getTMScoreAlternate(Atom[] atomSet1, Atom[] atomSet2,
1291-
int len1, int len2) throws StructureException {
1292-
if (atomSet1.length != atomSet2.length) {
1293-
throw new StructureException(
1294-
"The two atom sets are not of same length!");
1295-
}
1296-
if (atomSet1.length > len1) {
1297-
throw new StructureException(
1298-
"len1 must be greater or equal to the alignment length!");
1299-
}
1300-
if (atomSet2.length > len2) {
1301-
throw new StructureException(
1302-
"len2 must be greater or equal to the alignment length!");
1303-
}
1304-
1305-
int Lmax = Math.max(len1, len2);
1306-
int Laln = atomSet1.length;
1307-
1308-
double d0 = 1.24 * Math.cbrt(Lmax - 15.) - 1.8;
1309-
double d0sq = d0 * d0;
1310-
1311-
double sum = 0;
1312-
for (int i = 0; i < Laln; i++) {
1313-
double d = Calc.getDistance(atomSet1[i], atomSet2[i]);
1314-
sum += 1. / (1 + d * d / d0sq);
1315-
}
1316-
1317-
return sum / Lmax;
1285+
public static double getTMScore(Atom[] atomSet1, Atom[] atomSet2,
1286+
int len1, int len2) throws StructureException {
1287+
return getTMScore(atomSet1, atomSet2, len1, len2,true);
13181288
}
13191289

13201290
/**
13211291
* Calculate the TM-Score for the superposition.
13221292
*
1323-
* <em>Normalizes by the <strong>minimum</strong>-length structure (that is, {@code min\{len1,len2\}}).</em>
1324-
*
13251293
* Atom sets must be pre-rotated.
13261294
*
13271295
* <p>
@@ -1338,11 +1306,15 @@ public static double getTMScoreAlternate(Atom[] atomSet1, Atom[] atomSet2,
13381306
* The full length of the protein supplying atomSet1
13391307
* @param len2
13401308
* The full length of the protein supplying atomSet2
1309+
* @param normalizeMin
1310+
* Whether to normalize by the <strong>minimum</strong>-length structure,
1311+
* that is, {@code min\{len1,len2\}}. If false, normalized by the {@code max\{len1,len2\}}).
1312+
*
13411313
* @return The TM-Score
13421314
* @throws StructureException
13431315
*/
13441316
public static double getTMScore(Atom[] atomSet1, Atom[] atomSet2, int len1,
1345-
int len2) throws StructureException {
1317+
int len2, boolean normalizeMin) throws StructureException {
13461318
if (atomSet1.length != atomSet2.length) {
13471319
throw new StructureException(
13481320
"The two atom sets are not of same length!");
@@ -1356,10 +1328,16 @@ public static double getTMScore(Atom[] atomSet1, Atom[] atomSet2, int len1,
13561328
"len2 must be greater or equal to the alignment length!");
13571329
}
13581330

1359-
int Lmin = Math.min(len1, len2);
1331+
int Lnorm;
1332+
if (normalizeMin) {
1333+
Lnorm = Math.min(len1, len2);
1334+
} else {
1335+
Lnorm = Math.max(len1, len2);
1336+
}
1337+
13601338
int Laln = atomSet1.length;
13611339

1362-
double d0 = 1.24 * Math.cbrt(Lmin - 15.) - 1.8;
1340+
double d0 = 1.24 * Math.cbrt(Lnorm - 15.) - 1.8;
13631341
double d0sq = d0 * d0;
13641342

13651343
double sum = 0;
@@ -1368,6 +1346,6 @@ public static double getTMScore(Atom[] atomSet1, Atom[] atomSet2, int len1,
13681346
sum += 1. / (1 + d * d / d0sq);
13691347
}
13701348

1371-
return sum / Lmin;
1349+
return sum / Lnorm;
13721350
}
13731351
}

biojava-structure/src/main/java/org/biojava/nbio/structure/align/ce/CECalculator.java

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
import org.biojava.nbio.structure.align.model.AFP;
3535
import org.biojava.nbio.structure.align.model.AFPChain;
3636
import org.biojava.nbio.structure.align.util.AFPAlignmentDisplay;
37+
import org.biojava.nbio.structure.align.util.AFPChainScorer;
3738
import org.biojava.nbio.structure.geometry.Matrices;
3839
import org.biojava.nbio.structure.geometry.SuperPositions;
3940
import org.biojava.nbio.structure.jama.Matrix;
@@ -921,6 +922,8 @@ public void nextStep( AFPChain afpChain,
921922

922923
convertAfpChain(afpChain, ca1, ca2);
923924
AFPAlignmentDisplay.getAlign(afpChain, ca1, ca2);
925+
double tmScore = AFPChainScorer.getTMScore(afpChain, ca1, ca2,false);
926+
afpChain.setTMScore(tmScore);
924927
}
925928

926929

@@ -1155,11 +1158,8 @@ private void checkBestTraces( AFPChain afpChain,
11551158

11561159
afpChain.setAfpSet(afpSet);
11571160

1158-
1159-
1160-
11611161
//System.out.println("z:"+z + " zThr" + zThr+ " bestTraceScore " + bestTraceScore + " " + nGaps );
1162-
if(z>=zThr) {
1162+
if(params.isOptimizeAlignment() && z>=zThr) {
11631163
nGaps = optimizeSuperposition(afpChain,nse1, nse2, strLen, rmsd, ca1, ca2,nGaps,strBuf1,strBuf2);
11641164
// if(isPrint) {
11651165
// /*
@@ -1183,6 +1183,7 @@ private void checkBestTraces( AFPChain afpChain,
11831183
align_se2[lcmp+l]=bestTrace2[k]+l;
11841184
}
11851185
lali_x_+=bestTraceLen[k];
1186+
lcmp+=bestTraceLen[k];
11861187
if(k<nBestTrace-1) {
11871188
if(bestTrace1[k]+bestTraceLen[k]!=bestTrace1[k+1])
11881189
for(int l=bestTrace1[k]+bestTraceLen[k]; l<bestTrace1[k+1]; l++) {
@@ -1199,6 +1200,7 @@ private void checkBestTraces( AFPChain afpChain,
11991200
}
12001201
}
12011202
nAtom=lali_x_;
1203+
afpChain.setTotalRmsdOpt(afpChain.getTotalRmsdIni());
12021204
}
12031205

12041206
timeEnd = System.currentTimeMillis();

0 commit comments

Comments
 (0)