|
27 | 27 | import org.biojava.nbio.core.sequence.AccessionID; |
28 | 28 | import org.biojava.nbio.core.sequence.DNASequence; |
29 | 29 | import org.biojava.nbio.core.sequence.features.AbstractFeature; |
| 30 | +import org.biojava.nbio.core.sequence.features.FeatureInterface; |
30 | 31 | import org.biojava.nbio.core.sequence.features.Qualifier; |
31 | 32 | import org.biojava.nbio.core.sequence.features.TextFeature; |
32 | 33 | import org.biojava.nbio.core.sequence.location.SimpleLocation; |
| 34 | +import org.biojava.nbio.core.sequence.location.template.Location; |
| 35 | +import org.biojava.nbio.core.sequence.template.AbstractSequence; |
33 | 36 | import org.biojava.nbio.core.sequence.Strand; |
| 37 | +import org.biojava.nbio.core.sequence.compound.NucleotideCompound; |
34 | 38 | import org.junit.Assert; |
35 | 39 | import org.junit.Test; |
36 | 40 |
|
@@ -110,7 +114,7 @@ public void testGithub886() throws Exception { |
110 | 114 | Arrays.asList(seq), |
111 | 115 | GenbankWriterHelper.LINEAR_DNA); |
112 | 116 | fragwriter.close(); |
113 | | - System.out.println(fragwriter.toString().replaceAll("\r\n", "\n")); |
| 117 | + //System.out.println(fragwriter.toString().replaceAll("\r\n", "\n")); |
114 | 118 |
|
115 | 119 | // now read in the file that was created and check that the qualifiers were created correctly |
116 | 120 | InputStream readerInputStream = new ByteArrayInputStream(fragwriter.toByteArray()); |
@@ -140,4 +144,198 @@ public void testGithub886() throws Exception { |
140 | 144 | assertEquals("50%", newQualifiers.get("note7").get(0).getValue()); |
141 | 145 |
|
142 | 146 | } |
| 147 | + |
| 148 | + @Test |
| 149 | + public void testLocationJoins() throws Exception { |
| 150 | + |
| 151 | + // First read a GenBank file containing location joins |
| 152 | + InputStream inStream = GenbankWriterTest.class.getResourceAsStream("/with_joins.gb"); |
| 153 | + DNASequence sequence = GenbankReaderHelper.readGenbankDNASequence(inStream).values().iterator().next(); |
| 154 | + |
| 155 | + // Check the joins are read correctly |
| 156 | + List<FeatureInterface<AbstractSequence<NucleotideCompound>, NucleotideCompound>> features = sequence.getFeatures(); |
| 157 | + |
| 158 | + FeatureInterface<AbstractSequence<NucleotideCompound>, NucleotideCompound> join1 = features.get(0); |
| 159 | + List<Location> join1SubLocs = join1.getLocations().getSubLocations(); |
| 160 | + |
| 161 | + assertEquals("join1, getType()", "CDS", join1.getType()); |
| 162 | + assertEquals("join1, getLocations().getStrand()", "POSITIVE", join1.getLocations().getStrand().toString()); |
| 163 | + assertEquals("join1, getLocations().getSubLocations().size()", 8, join1SubLocs.size()); |
| 164 | + |
| 165 | + assertEquals("join1, SubLocation 1)", 3356, join1SubLocs.get(0).getStart().getPosition().intValue()); |
| 166 | + assertEquals("join1, SubLocation 1)", 3356, join1SubLocs.get(0).getEnd().getPosition().intValue()); |
| 167 | + |
| 168 | + assertEquals("join1, SubLocation 2)", 3500, join1SubLocs.get(1).getStart().getPosition().intValue()); |
| 169 | + assertEquals("join1, SubLocation 2)", 3792, join1SubLocs.get(1).getEnd().getPosition().intValue()); |
| 170 | + |
| 171 | + assertEquals("join1, SubLocation 3)", 3793, join1SubLocs.get(2).getStart().getPosition().intValue()); |
| 172 | + assertEquals("join1, SubLocation 3)", 3793, join1SubLocs.get(2).getEnd().getPosition().intValue()); |
| 173 | + |
| 174 | + assertEquals("join1, SubLocation 4)", 4185, join1SubLocs.get(3).getStart().getPosition().intValue()); |
| 175 | + assertEquals("join1, SubLocation 4)", 4228, join1SubLocs.get(3).getEnd().getPosition().intValue()); |
| 176 | + |
| 177 | + assertEquals("join1, SubLocation 5)", 4229, join1SubLocs.get(4).getStart().getPosition().intValue()); |
| 178 | + assertEquals("join1, SubLocation 5)", 4229, join1SubLocs.get(4).getEnd().getPosition().intValue()); |
| 179 | + |
| 180 | + assertEquals("join1, SubLocation 6)", 4348, join1SubLocs.get(5).getStart().getPosition().intValue()); |
| 181 | + assertEquals("join1, SubLocation 6)", 4676, join1SubLocs.get(5).getEnd().getPosition().intValue()); |
| 182 | + |
| 183 | + assertEquals("join1, SubLocation 7)", 4677, join1SubLocs.get(6).getStart().getPosition().intValue()); |
| 184 | + assertEquals("join1, SubLocation 7)", 4677, join1SubLocs.get(6).getEnd().getPosition().intValue()); |
| 185 | + |
| 186 | + assertEquals("join1, SubLocation 8)", 4775, join1SubLocs.get(7).getStart().getPosition().intValue()); |
| 187 | + assertEquals("join1, SubLocation 8)", 5094, join1SubLocs.get(7).getEnd().getPosition().intValue()); |
| 188 | + |
| 189 | + //qualifiers |
| 190 | + assertEquals("join1, getType()", "Joined feature", join1.getQualifiers().get("standard_name").get(0).getValue()); |
| 191 | + |
| 192 | + //Join 2 |
| 193 | + FeatureInterface<AbstractSequence<NucleotideCompound>, NucleotideCompound> join2 = features.get(1); |
| 194 | + List<Location> join2SubLocs = join1.getLocations().getSubLocations(); |
| 195 | + |
| 196 | + assertEquals("join1, getType()", "CDS", join2.getType()); |
| 197 | + assertEquals("join1, getLocations().getStrand()", "NEGATIVE", join2.getLocations().getStrand().toString()); |
| 198 | + assertEquals("join1, getLocations().getSubLocations().size()", 8, join2SubLocs.size()); |
| 199 | + |
| 200 | + assertEquals("join2, SubLocation 1)", 3356, join2SubLocs.get(0).getStart().getPosition().intValue()); |
| 201 | + assertEquals("join2, SubLocation 1)", 3356, join2SubLocs.get(0).getEnd().getPosition().intValue()); |
| 202 | + |
| 203 | + assertEquals("join2, SubLocation 2)", 3500, join2SubLocs.get(1).getStart().getPosition().intValue()); |
| 204 | + assertEquals("join2, SubLocation 2)", 3792, join2SubLocs.get(1).getEnd().getPosition().intValue()); |
| 205 | + |
| 206 | + assertEquals("join2, SubLocation 3)", 3793, join2SubLocs.get(2).getStart().getPosition().intValue()); |
| 207 | + assertEquals("join2, SubLocation 3)", 3793, join2SubLocs.get(2).getEnd().getPosition().intValue()); |
| 208 | + |
| 209 | + assertEquals("join2, SubLocation 4)", 4185, join2SubLocs.get(3).getStart().getPosition().intValue()); |
| 210 | + assertEquals("join2, SubLocation 4)", 4228, join2SubLocs.get(3).getEnd().getPosition().intValue()); |
| 211 | + |
| 212 | + assertEquals("join2, SubLocation 5)", 4229, join2SubLocs.get(4).getStart().getPosition().intValue()); |
| 213 | + assertEquals("join2, SubLocation 5)", 4229, join2SubLocs.get(4).getEnd().getPosition().intValue()); |
| 214 | + |
| 215 | + assertEquals("join2, SubLocation 6)", 4348, join2SubLocs.get(5).getStart().getPosition().intValue()); |
| 216 | + assertEquals("join2, SubLocation 6)", 4676, join2SubLocs.get(5).getEnd().getPosition().intValue()); |
| 217 | + |
| 218 | + assertEquals("join2, SubLocation 7)", 4677, join2SubLocs.get(6).getStart().getPosition().intValue()); |
| 219 | + assertEquals("join2, SubLocation 7)", 4677, join2SubLocs.get(6).getEnd().getPosition().intValue()); |
| 220 | + |
| 221 | + assertEquals("join2, SubLocation 8)", 4775, join2SubLocs.get(7).getStart().getPosition().intValue()); |
| 222 | + assertEquals("join2, SubLocation 8)", 5094, join2SubLocs.get(7).getEnd().getPosition().intValue()); |
| 223 | + |
| 224 | + //qualifiers |
| 225 | + assertEquals("join1, getType()", "Joined feature on complement", join2.getQualifiers().get("standard_name").get(0).getValue()); |
| 226 | + |
| 227 | + // Now write the joins back to a file using the GenbankWriterHelper |
| 228 | + ByteArrayOutputStream fragwriter = new ByteArrayOutputStream(); |
| 229 | + GenbankWriterHelper.writeNucleotideSequenceOriginal( |
| 230 | + fragwriter, |
| 231 | + Arrays.asList(sequence)); |
| 232 | + fragwriter.close(); |
| 233 | + |
| 234 | + System.out.println(fragwriter.toString().replaceAll("\r\n", "\n")); |
| 235 | + |
| 236 | + // Read the output file and test that no information is lost |
| 237 | + InputStream readerInputStream = new ByteArrayInputStream(fragwriter.toByteArray()); |
| 238 | + DNASequence newSequence = GenbankReaderHelper.readGenbankDNASequence(readerInputStream).values().iterator().next(); |
| 239 | + |
| 240 | + List<FeatureInterface<AbstractSequence<NucleotideCompound>, NucleotideCompound>> newFeatures = newSequence.getFeatures(); |
| 241 | + |
| 242 | + // Check the output matches the original sequence feature |
| 243 | + for (int i=0; i < features.size(); i++ ) { |
| 244 | + assertEquals("getFeatures(), getType()", features.get(i).getType(), newFeatures.get(i).getType()); |
| 245 | + assertEquals("getFeatures(), getLocations()", features.get(i).getLocations(), newFeatures.get(i).getLocations()); |
| 246 | + assertEquals("getFeatures(), getStrand()", features.get(i).getLocations().getStrand(), newFeatures.get(i).getLocations().getStrand()); |
| 247 | + |
| 248 | + List<Location> subLocations = features.get(i).getLocations().getSubLocations(); |
| 249 | + List<Location> newSubLocations = newFeatures.get(i).getLocations().getSubLocations(); |
| 250 | + assertEquals("getSubLocations()", subLocations.size(), newSubLocations.size()); |
| 251 | + |
| 252 | + assertEquals("getSubLocations()", subLocations, newSubLocations); |
| 253 | + |
| 254 | + for (int j=0; j < subLocations.size(); j++ ) { |
| 255 | + assertEquals("getSubLocations()", subLocations.get(j).toString(), newSubLocations.get(j).toString()); |
| 256 | + } |
| 257 | + |
| 258 | + Map<String, List<Qualifier>> qualifiers = features.get(i).getQualifiers(); |
| 259 | + Map<String, List<Qualifier>> newQualifiers = newFeatures.get(i).getQualifiers(); |
| 260 | + |
| 261 | + for (String qualifierType: qualifiers.keySet()) { |
| 262 | + assertEquals("getSubLocations()", qualifiers.get(qualifierType).get(0).getValue(), newQualifiers.get(qualifierType).get(0).getValue()); |
| 263 | + } |
| 264 | + |
| 265 | + } |
| 266 | + |
| 267 | + } |
| 268 | + |
| 269 | + /** |
| 270 | + * Going from GenBank file -> DNASequence object -> GenBank file looses information |
| 271 | + * https://github.com/biojava/biojava/issues/942 |
| 272 | + */ |
| 273 | + @Test |
| 274 | + public void testGithub942() throws Exception { |
| 275 | + |
| 276 | + // Important information is lost when reading and writing a |
| 277 | + // GenBank file through GenbankReaderHelper & GenbankWriterHelper |
| 278 | + |
| 279 | + // First read the sample GenBank file from |
| 280 | + // https://www.ncbi.nlm.nih.gov/Sitemap/samplerecord.html using the |
| 281 | + // GenbankReaderHelper |
| 282 | + InputStream inStream = GenbankWriterTest.class.getResourceAsStream("/NC_000913.gb"); |
| 283 | + DNASequence sequence = GenbankReaderHelper.readGenbankDNASequence(inStream).values().iterator().next(); |
| 284 | + |
| 285 | + // Then write sequence back to a file using the GenbankWriterHelper |
| 286 | + ByteArrayOutputStream fragwriter = new ByteArrayOutputStream(); |
| 287 | + GenbankWriterHelper.writeNucleotideSequenceOriginal( |
| 288 | + fragwriter, |
| 289 | + Arrays.asList(sequence)); |
| 290 | + fragwriter.close(); |
| 291 | + |
| 292 | + // Test no important information is lost |
| 293 | + InputStream readerInputStream = new ByteArrayInputStream(fragwriter.toByteArray()); |
| 294 | + DNASequence newSequence = GenbankReaderHelper.readGenbankDNASequence(readerInputStream).values().iterator().next(); |
| 295 | + |
| 296 | + //System.out.println(fragwriter.toString().replaceAll("\r\n", "\n")); |
| 297 | + |
| 298 | + assertEquals("getOriginalHeader()", sequence.getOriginalHeader(), newSequence.getOriginalHeader()); |
| 299 | + assertEquals("getLength()", sequence.getLength(), newSequence.getLength()); |
| 300 | + assertEquals("getAccession().getID()", sequence.getAccession().getID(), newSequence.getAccession().getID()); |
| 301 | + assertEquals("getAccession().getVersion()", sequence.getAccession().getVersion(), newSequence.getAccession().getVersion()); |
| 302 | + assertEquals("getDescription()", sequence.getDescription(), newSequence.getDescription()); |
| 303 | + assertEquals("getSource()", sequence.getSource(), newSequence.getSource()); |
| 304 | + assertEquals("getDNAType()", sequence.getDNAType(), newSequence.getDNAType()); |
| 305 | + assertEquals("getTaxonomy()", sequence.getTaxonomy(), newSequence.getTaxonomy()); |
| 306 | + assertEquals("getReferences()", sequence.getReferences(), newSequence.getReferences()); |
| 307 | + assertEquals("getComments()", sequence.getComments(), newSequence.getComments()); |
| 308 | + assertEquals("getNotesList()", sequence.getNotesList(), newSequence.getNotesList()); |
| 309 | + |
| 310 | + List<FeatureInterface<AbstractSequence<NucleotideCompound>, NucleotideCompound>> features = sequence.getFeatures(); |
| 311 | + List<FeatureInterface<AbstractSequence<NucleotideCompound>, NucleotideCompound>> newFeatures = newSequence.getFeatures(); |
| 312 | + |
| 313 | + //feature locations and qualifiers |
| 314 | + for (int i=0; i < features.size(); i++ ) { |
| 315 | + assertEquals("getFeatures(), getType()", features.get(i).getType(), newFeatures.get(i).getType()); |
| 316 | + assertEquals("getFeatures(), getLocations()", features.get(i).getLocations(), newFeatures.get(i).getLocations()); |
| 317 | + assertEquals("getFeatures(), getStrand()", features.get(i).getLocations().getStrand(), newFeatures.get(i).getLocations().getStrand()); |
| 318 | + |
| 319 | + List<Location> subLocations = features.get(i).getLocations().getSubLocations(); |
| 320 | + List<Location> newSubLocations = newFeatures.get(i).getLocations().getSubLocations(); |
| 321 | + assertEquals("getSubLocations()", subLocations.size(), newSubLocations.size()); |
| 322 | + |
| 323 | + assertEquals("getSubLocations()", subLocations, newSubLocations); |
| 324 | + |
| 325 | + for (int j=0; j < subLocations.size(); j++ ) { |
| 326 | + assertEquals("getSubLocations()", subLocations.get(j).toString(), newSubLocations.get(j).toString()); |
| 327 | + } |
| 328 | + |
| 329 | + Map<String, List<Qualifier>> qualifiers = features.get(i).getQualifiers(); |
| 330 | + Map<String, List<Qualifier>> newQualifiers = newFeatures.get(i).getQualifiers(); |
| 331 | + |
| 332 | + for (String qualifierType: qualifiers.keySet()) { |
| 333 | + assertEquals("getSubLocations()", qualifiers.get(qualifierType).get(0).getValue(), newQualifiers.get(qualifierType).get(0).getValue()); |
| 334 | + } |
| 335 | + |
| 336 | + } |
| 337 | + |
| 338 | + assertEquals("getSequenceAsString()", sequence.getSequenceAsString(), newSequence.getSequenceAsString()); |
| 339 | + |
| 340 | + } |
143 | 341 | } |
0 commit comments