Skip to content

Commit 1bedd9a

Browse files
authored
Merge pull request #701 from valasatava/bug-fix-yana
Fixing bug in Chromosome Mapping Tools
2 parents 663c62f + 3e2be54 commit 1bedd9a

File tree

2 files changed

+89
-93
lines changed

2 files changed

+89
-93
lines changed

biojava-genome/src/main/java/org/biojava/nbio/genome/util/ChromosomeMappingTools.java

Lines changed: 28 additions & 93 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,11 @@ public class ChromosomeMappingTools {
2828

2929
public static final String CHROMOSOME = "CHROMOSOME";
3030
public static final String CDS = "CDS";
31-
31+
32+
private static int base = 1;
33+
public static void setCoordinateSystem(int baseInt) {
34+
base = baseInt;
35+
}
3236

3337
/** Pretty print the details of a GeneChromosomePosition to a String
3438
*
@@ -202,25 +206,12 @@ private static String formatExonStructureReverse(GeneChromosomePosition chromPos
202206
*/
203207
public static int getCDSLength(GeneChromosomePosition chromPos) {
204208

205-
logger.debug(chromPos.toString());
206-
207-
logger.debug("chromosomal information: ");
208-
209-
logger.debug("Gene:" + chromPos.getGeneName());
210-
logger.debug(" Transcription (including UTRs): " + chromPos.getTranscriptionStart() + " - " + chromPos.getTranscriptionEnd() + " (length:" + (chromPos.getTranscriptionEnd() - chromPos.getTranscriptionStart()) + ")");
211-
logger.debug(" Orientation: " + chromPos.getOrientation());
212-
logger.debug(" CDS: " + (chromPos.getCdsStart()) + " - " + chromPos.getCdsEnd() + " (length: " + (chromPos.getCdsEnd() - chromPos.getCdsStart()) + ")");
213-
214-
215209
List<Integer> exonStarts = chromPos.getExonStarts();
216210
List<Integer> exonEnds = chromPos.getExonEnds();
217211

218-
logger.debug("Exons:" + exonStarts.size());
219-
220212
int cdsStart = chromPos.getCdsStart();
221213
int cdsEnd = chromPos.getCdsEnd();
222214

223-
224215
int codingLength;
225216
if (chromPos.getOrientation().equals('+'))
226217
codingLength = ChromosomeMappingTools.getCDSLengthForward(exonStarts, exonEnds, cdsStart, cdsEnd);
@@ -504,16 +495,14 @@ public static ChromPos getChromPosForward(int cdsPos, List<Integer> exonStarts,
504495
*/
505496
public static int getCDSLengthReverse(List<Integer> exonStarts, List<Integer> exonEnds, int cdsStart, int cdsEnd) {
506497

507-
boolean inCoding = false;
508498
int codingLength = 0;
509499

510500
if (cdsEnd < cdsStart) {
511501
int tmp = cdsEnd;
512502
cdsEnd = cdsStart;
513503
cdsStart = tmp;
514504
}
515-
516-
int lengthExons = 0;
505+
cdsStart = cdsStart + base;
517506

518507
// map reverse
519508
for (int i = exonStarts.size() - 1; i >= 0; i--) {
@@ -526,52 +515,19 @@ public static int getCDSLengthReverse(List<Integer> exonStarts, List<Integer> ex
526515
end = start;
527516
start = tmp;
528517
}
529-
lengthExons += end - start;
530-
531-
if (start <= cdsEnd && end >= cdsEnd) {
532-
inCoding = true;
533-
534-
int tmpstart = start;
535-
if (start < cdsStart) {
536-
tmpstart = cdsStart;
537-
}
538-
codingLength += (cdsEnd - tmpstart);
518+
start = start + base;
539519

540-
boolean debug = logger.isDebugEnabled();
541-
542-
if ( debug) {
543-
544-
StringBuffer b = new StringBuffer();
545-
546-
b.append(" UTR :" + (cdsEnd + 1) + " - " + (end) + newline);
547-
if (tmpstart == start)
548-
b.append(" -> ");
549-
else
550-
b.append(" <-> ");
551-
b.append("Exon :" + tmpstart + " - " + cdsEnd + " | " + (cdsEnd - tmpstart) + " | " + codingLength + " | " + (codingLength % 3) + newline);
552-
// single exon with UTR on both ends
553-
if (tmpstart != start)
554-
b.append(" UTR :" + (cdsStart - 1) + " - " + start + newline);
555-
logger.debug(b.toString());
520+
if ((start < cdsStart && end < cdsStart) || (start > cdsEnd && end > cdsEnd))
521+
continue;
556522

557-
}
558-
} else if (start <= cdsStart && end >= cdsStart) {
559-
inCoding = false;
560-
codingLength += (end - cdsStart);
523+
if (start < cdsStart)
524+
start = cdsStart;
561525

562-
logger.debug(" <- Exon : " + (cdsStart+1) + " - " + end + " | " + (end - cdsStart) + " | " + (codingLength-3) + " | " + (codingLength % 3));
563-
logger.debug(" UTR : " + start + " - " + (cdsStart ));
526+
if (end > cdsEnd)
527+
end = cdsEnd;
564528

565-
} else if (inCoding) {
566-
// full exon is coding
567-
codingLength += (end - start);
568-
logger.debug(" Exon : " + start + " - " + end + " | " + (end - start) + " | " + codingLength + " | " + (codingLength % 3));
569-
} else {
570-
// e.g. see UBQLN3
571-
logger.debug(" no translation!");
572-
}
529+
codingLength += (end - start + 1);
573530
}
574-
logger.debug("length exons: " + lengthExons + " codin length: " + (codingLength - 3));
575531
return codingLength - 3;
576532
}
577533

@@ -585,47 +541,26 @@ public static int getCDSLengthReverse(List<Integer> exonStarts, List<Integer> ex
585541
* @return
586542
*/
587543
public static int getCDSLengthForward(List<Integer> exonStarts, List<Integer> exonEnds, int cdsStart, int cdsEnd) {
588-
boolean inCoding = false;
544+
589545
int codingLength = 0;
590546

591-
int lengthExons = 0;
592-
// map forward
593547
for (int i = 0; i < exonStarts.size(); i++) {
594548

595-
int start = exonStarts.get(i);
549+
int start = exonStarts.get(i)+base;
596550
int end = exonEnds.get(i);
597-
lengthExons += end - start;
598551

599-
logger.debug("forward exon: " + (start+1) + " - " + end + " | " + (end - start));
552+
if ( (start < cdsStart+base && end < cdsStart) || (start > cdsEnd && end > cdsEnd) )
553+
continue;
600554

601-
if (start+1 <= cdsStart +1 && end >= cdsStart+1) {
555+
if (start < cdsStart+base)
556+
start = cdsStart+base;
602557

603-
inCoding = true;
604-
codingLength += (end - cdsStart);
605-
606-
logger.debug(" UTR : " + start + " - " + (cdsStart ));
607-
logger.debug(" -> Exon : " + (cdsStart+1) + " - " + end + " | " + (end - cdsStart+1) + " | " + codingLength + " | " + (codingLength % 3));
608-
609-
} else if (start+1 <= cdsEnd && end >= cdsEnd) {
610-
611-
inCoding = false;
612-
codingLength += (cdsEnd - start);
613-
614-
logger.debug(" <- Exon : " + (start +1)+ " - " + cdsEnd + " | " + (cdsEnd - start+1) + " | " + codingLength + " | " + (codingLength % 3));
615-
logger.debug(" UTR : " + cdsEnd + 1 + " - " + end);
558+
if (end > cdsEnd)
559+
end = cdsEnd;
616560

617-
} else if (inCoding) {
618-
// full exon is coding
619-
codingLength += (end - start);
620-
621-
logger.debug(" Exon :" + (start+1) + " - " + end + " | " + (end - start+1) + " | " + codingLength + " | " + (codingLength % 3));
622-
}
561+
codingLength += (end - start + 1);
623562
}
624-
625-
logger.debug("length exons: " + Integer.toString(lengthExons));
626-
logger.debug("CDS length:" + Integer.toString((codingLength-3)));
627-
628-
return codingLength-3 ;
563+
return codingLength - 3;
629564
}
630565

631566
/** Extracts the exon boundaries in CDS coordinates. (needs to be divided by 3 to get AA positions)
@@ -868,7 +803,7 @@ public static int getCDSPosForward(int chromPos, List<Integer> exonStarts, List<
868803
int cdsStart, int cdsEnd) {
869804

870805
// the genetic coordinate is not in a coding region
871-
if ( (chromPos < (cdsStart+1) ) || ( chromPos > (cdsEnd+1) ) ) {
806+
if ( (chromPos < (cdsStart+base) ) || ( chromPos > (cdsEnd+base) ) ) {
872807
logger.debug("The "+format(chromPos)+" position is not in a coding region");
873808
return -1;
874809
}
@@ -887,7 +822,7 @@ public static int getCDSPosForward(int chromPos, List<Integer> exonStarts, List<
887822

888823
lengthExon = end - start;
889824

890-
if (start+1 <= chromPos && end >= chromPos ) {
825+
if (start+base <= chromPos && end >= chromPos ) {
891826
return codingLength + (chromPos-start);
892827
}
893828
else {
@@ -914,7 +849,7 @@ public static int getCDSPosReverse(int chromPos, List<Integer> exonStarts, List<
914849
int cdsStart, int cdsEnd) {
915850

916851
// the genetic coordinate is not in a coding region
917-
if ( (chromPos < (cdsStart+1)) || ( chromPos > (cdsEnd+1) ) ) {
852+
if ( (chromPos < (cdsStart+base)) || ( chromPos > (cdsEnd+base) ) ) {
918853
logger.debug("The "+format(chromPos)+" position is not in a coding region");
919854
return -1;
920855
}
@@ -933,7 +868,7 @@ public static int getCDSPosReverse(int chromPos, List<Integer> exonStarts, List<
933868

934869
lengthExon = end - start;
935870
// +1 offset to be a base 1
936-
if (start+1 <= chromPos && end >= chromPos ) {
871+
if (start+base <= chromPos && end >= chromPos ) {
937872
return codingLength + (end-chromPos+1);
938873
}
939874
else {
Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
package org.biojava.nbio.genome;
2+
3+
import org.biojava.nbio.genome.util.ChromosomeMappingTools;
4+
import org.junit.Test;
5+
6+
import java.util.ArrayList;
7+
import java.util.Arrays;
8+
import java.util.List;
9+
10+
import static org.junit.Assert.assertEquals;
11+
12+
/**
13+
* Created by Yana Valasatava on 8/14/17.
14+
*/
15+
public class TestChromosomeMappingTools {
16+
17+
@Test
18+
public void testGetCDSLengthForward() {
19+
20+
List<Integer> exonStarts = new ArrayList<>(Arrays.asList(10, 30, 50, 70));
21+
List<Integer> exonEnds = new ArrayList<>(Arrays.asList(20, 40, 60, 80));
22+
int cdsStart = 35;
23+
int cdsEnd = 75;
24+
25+
int cdsDesired = 23 - 3;
26+
ChromosomeMappingTools.setCoordinateSystem(0);
27+
int cdsTest = ChromosomeMappingTools.getCDSLengthForward(exonStarts, exonEnds, cdsStart, cdsEnd);
28+
29+
assertEquals(cdsDesired, cdsTest);
30+
}
31+
32+
@Test
33+
public void testGetCDSLengthReverseAsc() {
34+
35+
List<Integer> exonStarts = new ArrayList<>(Arrays.asList(10, 50, 70));
36+
List<Integer> exonEnds = new ArrayList<>(Arrays.asList(20, 60, 80));
37+
int cdsStart = 55;
38+
int cdsEnd = 75;
39+
40+
int cdsDesired = 12 - 3;
41+
ChromosomeMappingTools.setCoordinateSystem(0);
42+
int cdsTest = ChromosomeMappingTools.getCDSLengthReverse(exonStarts, exonEnds, cdsStart, cdsEnd);
43+
44+
assertEquals(cdsDesired, cdsTest);
45+
}
46+
47+
@Test
48+
public void testGetCDSLengthReverseDesc() {
49+
50+
List<Integer> exonStarts = new ArrayList<>(Arrays.asList(70, 50, 10));
51+
List<Integer> exonEnds = new ArrayList<>(Arrays.asList(80, 60, 20));
52+
int cdsStart = 75;
53+
int cdsEnd = 50;
54+
55+
int cdsDesired = 17 - 3;
56+
ChromosomeMappingTools.setCoordinateSystem(0);
57+
int cdsTest = ChromosomeMappingTools.getCDSLengthReverse(exonStarts, exonEnds, cdsStart, cdsEnd);
58+
59+
assertEquals(cdsDesired, cdsTest);
60+
}
61+
}

0 commit comments

Comments
 (0)