title
BioJava:CookBook3:MSAProfiler
permalink
wiki/BioJava%3ACookBook3%3AMSAProfiler
How to profile the time and memory used for Multiple Sequence Alignment in BioJava
package org .biojava3 .alignment ;
import java .io .File ; import java .io .PrintStream ; import
java .util .ArrayList ; import java .util .List ;
import org .biojava .nbio .alignment .Alignments .PairwiseSequenceScorerType ;
import org .biojava .nbio .alignment .Alignments .ProfileProfileAlignerType ;
import org .biojava .nbio .alignment .template .GapPenalty ; import
org .biojava .nbio .alignment .template .PairwiseSequenceScorer ; import
org .biojava .nbio .alignment .template .Profile ; import
org .biojava .nbio .alignment .template .SubstitutionMatrix ; import
org .biojava .nbio .core .sequence .ProteinSequence ; import
org .biojava .nbio .core .sequence .compound .AminoAcidCompound ; import
org .biojava .nbio .core .sequence .io .FastaReaderHelper ; import
org .biojava .nbio .core .util .ConcurrencyTools ;
public class CookbookMSAProfiler {
private static class Profiler {
private long maxMemoryUsed , timeCheckpoint ;
private final long timeStart ;
private Profiler () {
maxMemoryUsed = Runtime .getRuntime ().totalMemory ();
timeStart = timeCheckpoint = System .nanoTime ();
}
private long getMaxMemoryUsed () {
return maxMemoryUsed = Math .max (maxMemoryUsed , Runtime .getRuntime ().totalMemory ());
}
private long getTimeSinceCheckpoint () {
return System .nanoTime () - timeCheckpoint ;
}
private long getTimeSinceStart () {
return System .nanoTime () - timeStart ;
}
private void setCheckpoint () {
maxMemoryUsed = Math .max (maxMemoryUsed , Runtime .getRuntime ().totalMemory ());
timeCheckpoint = System .nanoTime ();
}
}
public static void main (String [] args ) throws Exception {
if (args .length < 1 ) {
System .err .println ("The first argument must be a fasta file of protein sequences." );
return ;
}
// ConcurrencyTools.setThreadPoolSingle();
PrintStream fout = new PrintStream ("msa.txt" );
Profiler profiler = new Profiler ();
System .out .printf ("Loading sequences from %s... " , args [0 ]);
List <ProteinSequence > list = new ArrayList <ProteinSequence >();
list .addAll (FastaReaderHelper .readFastaProteinSequence (new File (args [0 ])).values ());
if (args .length > 1 && Integer .parseInt (args [1 ]) < list .size ()) {
System .out .printf ("%s/%d" , args [1 ], list .size ());
list = list .subList (0 , Integer .parseInt (args [1 ]));
} else {
System .out .printf ("%d" , list .size ());
}
System .out .printf (" sequences in %d ms using %d kB%n%n" , profiler .getTimeSinceCheckpoint ()/1000000 ,
profiler .getMaxMemoryUsed ()/1024 );
profiler .setCheckpoint ();
System .out .print ("Stage 1: pairwise similarity calculation... " );
GapPenalty gaps = new SimpleGapPenalty ();
SubstitutionMatrix <AminoAcidCompound > blosum62 = new SimpleSubstitutionMatrix <AminoAcidCompound >();
List <PairwiseSequenceScorer <ProteinSequence , AminoAcidCompound >> scorers = Alignments .getAllPairsScorers (list ,
PairwiseSequenceScorerType .GLOBAL_IDENTITIES , gaps , blosum62 );
Alignments .runPairwiseScorers (scorers );
System .out .printf ("%d scores in %d ms using %d kB%n%n" , scorers .size (),
profiler .getTimeSinceCheckpoint ()/1000000 , profiler .getMaxMemoryUsed ()/1024 );
profiler .setCheckpoint ();
System .out .print ("Stage 2: hierarchical clustering into a guide tree... " );
GuideTree <ProteinSequence , AminoAcidCompound > tree = new GuideTree <ProteinSequence , AminoAcidCompound >(list ,
scorers );
scorers = null ;
System .out .printf ("%d ms using %d kB%n%n%s%n%n" , profiler .getTimeSinceCheckpoint ()/1000000 ,
profiler .getMaxMemoryUsed ()/1024 , tree );
profiler .setCheckpoint ();
System .out .print ("Stage 3: progressive alignment... " );
Profile <ProteinSequence , AminoAcidCompound > msa = Alignments .getProgressiveAlignment (tree ,
ProfileProfileAlignerType .GLOBAL , gaps , blosum62 );
System .out .printf ("%d profile-profile alignments in %d ms using %d kB%n%n" , list .size () - 1 ,
profiler .getTimeSinceCheckpoint ()/1000000 , profiler .getMaxMemoryUsed ()/1024 );
fout .print (msa );
fout .close ();
ConcurrencyTools .shutdown ();
System .out .printf ("Total time: %d ms%nMemory use: %d kB%n" , profiler .getTimeSinceStart ()/1000000 ,
profiler .getMaxMemoryUsed ()/1024 );
}
}