Skip to content

Commit 19dedce

Browse files
committed
replace java BiojavaAdamContext with scala BiojavaAdamContext
1 parent c2a1f9d commit 19dedce

31 files changed

+1049
-635
lines changed

README.md

Lines changed: 47 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# biojava-adam
22

3-
BioJava and ADAM integration.
3+
[Biojava](http://biojava.org) and ADAM integration.
44

55
### Hacking biojava-adam
66

@@ -38,37 +38,58 @@ Type :help for more information.
3838
scala> import org.biojava.nbio.adam.BiojavaAdamContext
3939
import org.biojava.nbio.adam.BiojavaAdamContext
4040
41-
scala> val biojavaContext = new BiojavaAdamContext(sc)
42-
biojavaContext: org.biojava.nbio.adam.BiojavaAdamContext = org.biojava.nbio.adam.BiojavaAdamContext@1e041848
41+
scala> val bc = BiojavaAdamContext(sc)
42+
bc: org.biojava.nbio.adam.BiojavaAdamContext = org.biojava.nbio.adam.BiojavaAdamContext@4f8900b0
4343
44-
scala> val reads = biojavaContext.loadFastqReads("fastq_sample1.fq")
45-
reads: org.bdgenomics.adam.rdd.sequence.ReadRDD = ReadRDD(MapPartitionsRDD[1] at map at BiojavaAdamContext.java:180,SequenceDictionary{
46-
H06HDADXX130110:1:2103:11970:57672/2->250
47-
H06HDADXX130110:2:2116:3345:91806/2->250
48-
H06HDADXX130110:1:2103:11970:57672/1->250
49-
H06HDADXX130110:2:2116:3345:91806/1->250
50-
H06JUADXX130110:1:1108:6424:55322/1->250
51-
H06JUADXX130110:1:1108:6424:55322/2->250})
44+
scala> val reads = bc.loadFastqReads("src/test/resources/org/biojava/nbio/adam/bqsr.0.fq")
45+
reads: org.bdgenomics.adam.rdd.read.ReadRDD = RDDBoundReadRDD with 0 reference sequences
5246
5347
scala> reads.rdd.first
54-
res0: org.bdgenomics.formats.avro.Read = {"name": "H06HDADXX130110:2:2116:3345:91806/1", "description":
55-
"H06HDADXX130110:2:2116:3345:91806/1", "alphabet": "DNA", "sequence": "GTTAGGGTTAGGGTTGGGTTAGGGTTAGGGTT
56-
AGGGTTAGGGGTAGGGTTAGGGTTAGGGGTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGGTAGGGCTAGGGTTAAGGGTAGGGTTAGCGAAAGGGCTG
57-
GGGTTAGGGGTGCGGGTACGCGTAGCATTAGGGCTAGAAGTAGGATCTGCAGTGCCTGACCGCGTCTGCGCGGCGACTGCCCAAAGCCTGGGGCCGACTCCAG
58-
GCTGAAGCTCAT", "length": 250, "qualityScores": ">=<=???>?>???=??>>8<?><=2=<===1194<?;:?>>?#3==>########
59-
#######################################################################################################
60-
############################################################################################",
61-
"qualityScoreVariant": "FASTQ_SANGER"}
62-
63-
scala> val sequences = biojavaContext.loadGenbankDna("SCU49845.gb")
64-
sequences: org.bdgenomics.adam.rdd.sequence.SequenceRDD = SequenceRDD(MapPartitionsRDD[7] at map at BiojavaAdamContext.java:244,SequenceDictionary{
65-
U49845->5028})
66-
67-
scala> sequences.rdd.first
68-
res1: org.bdgenomics.formats.avro.Sequence = {"name": "U49845", "description": "Saccharomyces cerevisiae
48+
res0: org.bdgenomics.formats.avro.Read = {"name": "SRR062634.10022079/1", "description":
49+
"SRR062634.10022079/1", "alphabet": "DNA", "sequence": "AATTCAAAACCAGCCTGGCCAATATGGTGAAACCTCATCTCTACTAAAAA
50+
TACAAAAATTAGCCAGGCATGGTGGTGCGTGCGTGTAGTCCCAGCTACTT", "length": 100, "qualityScores": "?-DDBEEB=EEEDDEDEEEA
51+
:D?5?E?CEBE5ED?D:AEDEDEDED-B,BC0AC,BB6@CDBDEC?BCBAA@5,=8CA-?A>?2:&048<BB5BE#####", "qualityScoreVariant":
52+
"FASTQ_SANGER", "attributes": {}}
53+
54+
scala> val dna = bc.loadBiojavaFastaDna("src/test/resources/org/biojava/nbio/adam/hla_gen.0.fa")
55+
dna: org.bdgenomics.adam.rdd.sequence.SequenceRDD = RDDBoundSequenceRDD with 0 reference sequences
56+
57+
scala> dna.rdd.first
58+
res0: org.bdgenomics.formats.avro.Sequence = {"name": "HLA:HLA00001 A*01:01:01:01 3503 bp",
59+
"description": null, "alphabet": "DNA", "sequence": "CAGGAGCAGAGGGGTCAGGGCGAAGTCCCAGGGCCCCAGGCGTGGCTCTCAG
60+
GGTCTCAGGCCCCGAAGGCGGTGTATGGATTGGGGAGTCCCAGCCTTGGGGATTCCCCAACTCCGCAGTTTCTTTTCTCCCTCTCCCAACCTACGTAGGGTCCTT
61+
CATCCTGGATACTCACGACGCGGACCCAGTTCTCACTCCCATTGGGTGTCGGGTTTCCAGAGAAGCCAATCAGTGTCGTCGCGGTCGCTGTTCTAAAGTCCGCAC
62+
...
63+
64+
scala> val prot = bc.loadBiojavaFastaProtein("src/test/resources/org/biojava/nbio/adam/hla_prot.0.fa")
65+
prot: org.bdgenomics.adam.rdd.sequence.SequenceRDD = RDDBoundSequenceRDD with 0 reference sequences
66+
67+
scala> prot.rdd.first
68+
res2: org.bdgenomics.formats.avro.Sequence = {"name": "HLA:HLA00001 A*01:01:01:01 365 bp", "description":
69+
null, "alphabet": "PROTEIN", "sequence": "MAVMAPRTLLLLLSGALALTQTWAGSHSMRYFFTSVSRPGRGEPRFIAVGYVDDTQFVRFDSD
70+
AASQKMEPRAPWIEQEGPEYWDQETRNMKAHSQTDRANLGTLRGYYNQSEDGSHTIQIMYGCDVGPDGRFLRGYRQDAYDGKDYIALNEDLRSWTAADMAAQITK
71+
RKWEAVHAAEQRRVYLEGRCVDGLRRYLENGKETLQRTDPPKTHMTHHPISDHEATLRCWALGFYPAEITLTWQRDGEDQTQDTELVETRPAGDGTFQKWAAVVV
72+
PSGEEQRYTCHVQHEGLPKPLTLRWELSSQPTIPIVGIIAGLVLLGAVITGAVVAAVMWRRKSSDRKGGSYTQAASSDSAQGSDVSLTACKV", "length":
73+
365, "attributes": {}}
74+
75+
scala> val genbankDna = bc.loadGenbankDna("src/test/resources/org/biojava/nbio/adam/SCU49845.gb")
76+
genbankDna: org.bdgenomics.adam.rdd.sequence.SequenceRDD = RDDBoundSequenceRDD with 0 reference sequences
77+
78+
scala> genbankDna.rdd.first
79+
res4: org.bdgenomics.formats.avro.Sequence = {"name": "U49845", "description": "Saccharomyces cerevisiae
6980
TCP1-beta gene, partial cds; and Axl2p\n(AXL2) and Rev7p (REV7) genes, complete cds.", "alphabet": "DNA",
7081
"sequence": "GATCCTCCATATACAACGGTATCTCCACCTCAGGTTTAGATCTCAACAACGGAACCATTGCCGACATGAGACAGTTAGGTATCGTCGAGAGT
7182
TACAAGCTAAAACGAGCAGTAGTCAGCTCTGCATCTGAAGCCGCTGAAGTTCTACTAAGGGTGGATAACATCATCCGTGCAAGACCAAGAACCGCCAATAGACAA
7283
CATATGTAACATATTTAGGATATACCTCGAAAATAATAAACCGCCACACTGTCATTATTATAATTAGAAACAGAACGCAAAAATTATCCACTATATAATTCAAAG
7384
...
85+
86+
scala> val features = bc.loadGenbankDnaFeatures("src/test/resources/org/biojava/nbio/adam/SCU49845.gb")
87+
features: org.bdgenomics.adam.rdd.feature.FeatureRDD = RDDBoundFeatureRDD with 0 reference sequences
88+
89+
scala> features.rdd.first
90+
res5: org.bdgenomics.formats.avro.Feature = {"featureId": null, "name": "source", "source": null,
91+
"featureType": null, "contigName": "U49845", "start": 0, "end": 5028, "strand": "FORWARD", "phase":
92+
null, "frame": null, "score": null, "geneId": null, "transcriptId": null, "exonId": null, "aliases":
93+
[], "parentIds": [], "target": null, "gap": null, "derivesFrom": null, "notes": [], "dbxrefs": [],
94+
"ontologyTerms": [], "circular": null, "attributes": {}}
7495
```

pom.xml

Lines changed: 21 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
<?xml version="1.0" encoding="ISO-8859-1"?>
22
<!--
33
4-
biojava-adam BioJava and ADAM integration.
5-
Copyright (c) 2017 held jointly by the individual authors.
4+
biojava-adam Biojava and ADAM integration.
5+
Copyright (c) 2017-2018 held jointly by the individual authors.
66
77
This library is free software; you can redistribute it and/or modify it
88
under the terms of the GNU Lesser General Public License as published
@@ -34,7 +34,7 @@
3434
<packaging>jar</packaging>
3535
<name>biojava-adam</name>
3636
<version>5.0.0-SNAPSHOT</version>
37-
<description>BioJava and ADAM integration.</description>
37+
<description>Biojava and ADAM integration.</description>
3838
<url>http://www.biojava.org</url>
3939
<inceptionYear>2017</inceptionYear>
4040
<scm>
@@ -67,7 +67,8 @@
6767
<maven.enforcer.jdk-version>[1.8,)</maven.enforcer.jdk-version>
6868
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
6969
<project.build.targetEncoding>UTF-8</project.build.targetEncoding>
70-
<scala.version>2.11.8</scala.version>
70+
<scala-guice.version>4.1.0</scala-guice.version>
71+
<scala.version>2.11.12</scala.version>
7172
<scala.version.prefix>2.11</scala.version.prefix>
7273
<slf4j.version>1.7.25</slf4j.version>
7374
<spark.version>2.2.1</spark.version>
@@ -293,19 +294,24 @@
293294
<artifactId>junit</artifactId>
294295
<version>${junit.version}</version>
295296
</dependency>
297+
<dependency>
298+
<groupId>net.codingwell</groupId>
299+
<artifactId>scala-guice_${scala.version.prefix}</artifactId>
300+
<version>${scala-guice.version}</version>
301+
</dependency>
296302
<dependency>
297303
<groupId>org.apache.spark</groupId>
298-
<artifactId>spark-core_2.11</artifactId>
304+
<artifactId>spark-core_${scala.version.prefix}</artifactId>
299305
<version>${spark.version}</version>
300306
</dependency>
301307
<dependency>
302308
<groupId>org.apache.spark</groupId>
303-
<artifactId>spark-sql_2.11</artifactId>
309+
<artifactId>spark-sql_${scala.version.prefix}</artifactId>
304310
<version>${spark.version}</version>
305311
</dependency>
306312
<dependency>
307313
<groupId>org.bdgenomics.adam</groupId>
308-
<artifactId>adam-core-spark2_2.11</artifactId>
314+
<artifactId>adam-core-spark2_${scala.version.prefix}</artifactId>
309315
<version>${adam.version}</version>
310316
<exclusions>
311317
<exclusion>
@@ -363,11 +369,6 @@
363369
<artifactId>slf4j-log4j12</artifactId>
364370
<version>${slf4j.version}</version>
365371
</dependency>
366-
<dependency>
367-
<groupId>net.codingwell</groupId>
368-
<artifactId>scala-guice_${scala.version.prefix}</artifactId>
369-
<version>4.1.0</version>
370-
</dependency>
371372
</dependencies>
372373
</dependencyManagement>
373374
<dependencies>
@@ -376,19 +377,24 @@
376377
<artifactId>junit</artifactId>
377378
<scope>test</scope>
378379
</dependency>
380+
<dependency>
381+
<groupId>net.codingwell</groupId>
382+
<artifactId>scala-guice_${scala.version.prefix}</artifactId>
383+
<scope>compile</scope>
384+
</dependency>
379385
<dependency>
380386
<groupId>org.apache.spark</groupId>
381-
<artifactId>spark-core_2.11</artifactId>
387+
<artifactId>spark-core_${scala.version.prefix}</artifactId>
382388
<scope>provided</scope>
383389
</dependency>
384390
<dependency>
385391
<groupId>org.apache.spark</groupId>
386-
<artifactId>spark-sql_2.11</artifactId>
392+
<artifactId>spark-sql_${scala.version.prefix}</artifactId>
387393
<scope>provided</scope>
388394
</dependency>
389395
<dependency>
390396
<groupId>org.bdgenomics.adam</groupId>
391-
<artifactId>adam-core-spark2_2.11</artifactId>
397+
<artifactId>adam-core-spark2_${scala.version.prefix}</artifactId>
392398
<scope>compile</scope>
393399
</dependency>
394400
<dependency>
@@ -416,11 +422,6 @@
416422
<artifactId>slf4j-log4j12</artifactId>
417423
<scope>test</scope>
418424
</dependency>
419-
<dependency>
420-
<groupId>net.codingwell</groupId>
421-
<artifactId>scala-guice_${scala.version.prefix}</artifactId>
422-
<scope>compile</scope>
423-
</dependency>
424425
</dependencies>
425426
<profiles>
426427
<profile>

0 commit comments

Comments
 (0)