Skip to content

Commit ebbc288

Browse files
committed
Added another couple of examples and cleaned up the existing ones
1 parent ae48d15 commit ebbc288

File tree

5 files changed

+86
-38
lines changed

5 files changed

+86
-38
lines changed
Lines changed: 18 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,30 +1,38 @@
11
package org.biojava.examples;
22

3-
import org.apache.spark.api.java.JavaDoubleRDD;
4-
import org.biojava.spark.data.AtomContactRDD;
3+
54
import org.biojava.spark.data.AtomSelectObject;
65
import org.biojava.spark.data.StructureDataRDD;
76

87
/**
9-
* Test class to consider counting atom-atom contacts.
8+
* An example reading the PDB and finding the mean C-alpha
9+
* to C-alpha distance between Proline and Lysine.
1010
* @author Anthony Bradley
1111
*
1212
*/
1313
public class CountContacts {
1414

15+
16+
/**
17+
* An example reading the PDB and finding the mean C-alpha
18+
* to C-alpha distance between Proline and Lysine.
19+
* @param args
20+
*/
1521
public static void main(String[] args) {
1622
double cutoff = 5.0;
23+
// Starter counter
24+
Long startTime = System.currentTimeMillis();
1725
// Get the atom contacts
18-
AtomContactRDD contacts = new StructureDataRDD("/Users/anthony/full")
19-
.filterResolution(3.0)
20-
.filterRfree(0.3)
26+
Double mean = new StructureDataRDD("/Users/anthony/full")
2127
.findContacts(new AtomSelectObject()
2228
.groupNameList(new String[] {"PRO","LYS"})
2329
.elementNameList(new String[] {"C"})
2430
.atomNameList(new String[] {"CA"}),
25-
cutoff);
26-
// Get the CA-CA contact distances
27-
JavaDoubleRDD contactDist = contacts.getDistanceDistOfAtomInts("CA", "CA");
28-
System.out.println(contactDist.mean());
31+
cutoff)
32+
.getDistanceDistOfAtomInts("CA", "CA")
33+
.mean();
34+
System.out.println("Mean PRO-LYS CA-CA distance: "+mean);
35+
System.out.println("Found in "+(System.currentTimeMillis()-startTime)+" ms");
36+
2937
}
3038
}
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
package org.biojava.examples;
2+
3+
import java.util.Map;
4+
5+
import org.biojava.spark.data.StructureDataRDD;
6+
7+
/**
8+
* An example reading the PDB and finding the mean frequency of
9+
* every element in the PDB.
10+
* @author Anthony Bradley
11+
*/
12+
public class CountElements {
13+
14+
/**
15+
* An example reading the PDB and finding the mean frequency of
16+
* every element in the PDB.
17+
* @param args
18+
*/
19+
public static void main(String[] args) {
20+
21+
// Starter counter
22+
Long startTime = System.currentTimeMillis();
23+
24+
Map<String, Long> elementCountMap = new StructureDataRDD("/Users/anthony/fulk")
25+
.findAtoms()
26+
.countByElement();
27+
System.out.println("Element map"+elementCountMap);
28+
System.out.println("Found in "+(System.currentTimeMillis()-startTime)+" ms");
29+
}
30+
31+
}

src/main/java/org/biojava/examples/MapChains.java

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,14 +6,14 @@
66
import org.biojava.spark.data.StructureDataRDD;
77

88
/**
9-
* Class to map the PDB to chains an operate on it.
9+
* Example of mapping the PDB to chains of just C-alpha coords.
1010
* @author Anthony Bradley
1111
*
1212
*/
1313
public class MapChains {
1414

1515
/**
16-
* The main function
16+
* Example of mapping the PDB to chains of just C-alpha coords.
1717
* @param args
1818
*/
1919
public static void main(String[] args) {
@@ -23,7 +23,8 @@ public static void main(String[] args) {
2323
.filterResolution(3.0)
2424
.filterRfree(0.3)
2525
.getCalphaPair();
26-
27-
26+
27+
28+
2829
}
2930
}
Lines changed: 23 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,40 +1,39 @@
11
package org.biojava.examples;
22

3-
import org.biojava.spark.data.AtomDataRDD;
4-
import org.biojava.spark.data.AtomSelectObject;
5-
import org.biojava.spark.data.SparkUtils;
63
import org.biojava.spark.data.StructureDataRDD;
74

85
/**
9-
* A very simple example reading the PDB and printing the
10-
* composition of elements.
6+
* A very simple example reading the PDB and finding the number
7+
* of entries in the PDB with resolution better than 3.0 Angstrom
8+
* and R-free better than 0.3.
119
* @author Anthony Bradley
1210
*
1311
*/
1412
public class SimpleExample {
1513

14+
/**
15+
* A very simple example reading the PDB and finding the number
16+
* of entries in the PDB with resolution better than 3.0 Angstrom
17+
* and R-free better than 0.3.
18+
* @param args the input list of arguments.
19+
*/
1620
public static void main(String[] args) {
1721

22+
// Specify your limits for R-factor and Resolution
23+
double maxResolution = 3.0;
24+
double maxRfree = 0.3;
25+
26+
// Starter counter
1827
Long startTime = System.currentTimeMillis();
19-
// Get the RDD of the atom contacts
20-
AtomDataRDD atomDataRDD = new StructureDataRDD("/Users/anthony/full")
21-
.filterResolution(3.0)
22-
.filterRfree(0.3)
23-
.findAtoms(new AtomSelectObject().charged(false)
24-
.atomNameList(new String[] {"CA","N"})
25-
.groupNameList(new String[] {"LYS","PRO"}));
26-
// // Now cache this
27-
atomDataRDD.cacheData();
28-
System.out.println(atomDataRDD.countByElement());
29-
// // Now get the group contacts
30-
// System.out.println(atomContactRDD.countInterGroupContacts("HIS", "LYS"));
31-
// System.out.println(atomContactRDD.countInterGroupContacts("LYS", "PRO"));
32-
// System.out.println(atomContactRDD.countInterGroupContacts("LYS", "HIS"));
33-
// System.out.println(atomContactRDD.getDistanceDistOfAtomInts("CA", "CA")
34-
// .mean());
3528

36-
SparkUtils.shutdown();
37-
Long endTime = System.currentTimeMillis();
38-
System.out.println("Total time: "+((endTime-startTime)/1000));
29+
// The actual code
30+
Long numEntries = new StructureDataRDD("/Users/anthony/full")
31+
.filterResolution(maxResolution)
32+
.filterRfree(maxRfree)
33+
.size();
34+
35+
System.out.println(numEntries+" found with resolution better than"+maxResolution+
36+
" and R-free less than "+maxRfree);
37+
System.out.println("Found in "+(System.currentTimeMillis()-startTime)+" ms");
3938
}
4039
}

src/main/java/org/biojava/spark/data/StructureDataRDD.java

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -139,5 +139,14 @@ public JavaPairRDD<String, Point3d[]> getCalphaPair() {
139139
.flatMapToPair(new Point3dCalpha());
140140
}
141141

142+
/**
143+
* Get the number of entries in the RDD.
144+
* @return the {@link Long} number of entries
145+
*/
146+
public Long size() {
147+
return javaPairRdd
148+
.count();
149+
}
150+
142151

143152
}

0 commit comments

Comments
 (0)