|
22 | 22 | * To change this template, choose Tools | Templates |
23 | 23 | * and open the template in the editor. |
24 | 24 | */ |
25 | | - |
26 | 25 | package org.biojava.nbio.phylo; |
27 | 26 |
|
28 | 27 | /** |
29 | | - * |
30 | | - * @author willishf |
| 28 | + * This class provides static methods for the calculation of the percentage of |
| 29 | + * identity between two aligned sequences. |
| 30 | + * <p> |
| 31 | + * Since 4.1.1 the methods for distance inference in forester are also used in |
| 32 | + * BioJava, so this implementation of percentage of identity is not needed |
| 33 | + * anymore. However, the code is maintained as the own BioJava implementation. |
| 34 | + * |
| 35 | + * @author Scooter Willis |
| 36 | + * |
31 | 37 | */ |
32 | 38 | public class Comparison { |
33 | 39 |
|
34 | | - |
35 | | - /** |
36 | | - * this is a gapped PID calculation |
37 | | - * |
38 | | - * @param s1 |
39 | | - * SequenceI |
40 | | - * @param s2 |
41 | | - * SequenceI |
42 | | - * @return float |
43 | | - */ |
44 | | - public final static float PID(String seq1, String seq2) |
45 | | - { |
46 | | - return PID(seq1, seq2, 0, seq1.length()); |
47 | | - } |
48 | | - |
49 | | - static final int caseShift = 'a' - 'A'; |
50 | | - |
51 | | - // Another pid with region specification |
52 | | - public final static float PID(String seq1, String seq2, int start, int end) |
53 | | - { |
54 | | - |
55 | | - int s1len = seq1.length(); |
56 | | - int s2len = seq2.length(); |
57 | | - |
58 | | - int len = Math.min(s1len, s2len); |
59 | | - |
60 | | - if (end < len) |
61 | | - { |
62 | | - len = end; |
63 | | - } |
64 | | - |
65 | | - if (len < start) |
66 | | - { |
67 | | - start = len - 1; // we just use a single residue for the difference |
68 | | - } |
69 | | - |
70 | | - int bad = 0; |
71 | | - char chr1; |
72 | | - char chr2; |
73 | | - |
74 | | - for (int i = start; i < len; i++) |
75 | | - { |
76 | | - chr1 = seq1.charAt(i); |
77 | | - |
78 | | - chr2 = seq2.charAt(i); |
79 | | - |
80 | | - if ('a' <= chr1 && chr1 <= 'z') |
81 | | - { |
82 | | - // TO UPPERCASE !!! |
83 | | - // Faster than toUpperCase |
84 | | - chr1 -= caseShift; |
85 | | - } |
86 | | - if ('a' <= chr2 && chr2 <= 'z') |
87 | | - { |
88 | | - // TO UPPERCASE !!! |
89 | | - // Faster than toUpperCase |
90 | | - chr2 -= caseShift; |
91 | | - } |
92 | | - |
93 | | - if (chr1 != chr2 && !isGap(chr1) && !isGap(chr2)) |
94 | | - { |
95 | | - bad++; |
96 | | - } |
97 | | - } |
98 | | - |
99 | | - return ((float) 100 * (len - bad)) / len; |
100 | | - } |
101 | | - |
102 | | - /** |
103 | | - * DOCUMENT ME! |
104 | | - * |
105 | | - * @param c |
106 | | - * DOCUMENT ME! |
107 | | - * |
108 | | - * @return DOCUMENT ME! |
109 | | - */ |
110 | | - public static final boolean isGap(char c) |
111 | | - { |
112 | | - return (c == '-' || c == '.' || c == ' ') ? true : false; |
113 | | - } |
| 40 | + private static final int caseShift = 'a' - 'A'; |
| 41 | + |
| 42 | + /** |
| 43 | + * this is a gapped PID calculation |
| 44 | + * |
| 45 | + * @param s1 |
| 46 | + * SequenceI |
| 47 | + * @param s2 |
| 48 | + * SequenceI |
| 49 | + * @return float |
| 50 | + */ |
| 51 | + public final static float PID(String seq1, String seq2) { |
| 52 | + return PID(seq1, seq2, 0, seq1.length()); |
| 53 | + } |
| 54 | + |
| 55 | + // Another pid with region specification |
| 56 | + public final static float PID(String seq1, String seq2, int start, int end) { |
| 57 | + |
| 58 | + int s1len = seq1.length(); |
| 59 | + int s2len = seq2.length(); |
| 60 | + |
| 61 | + int len = Math.min(s1len, s2len); |
| 62 | + |
| 63 | + if (end < len) { |
| 64 | + len = end; |
| 65 | + } |
| 66 | + |
| 67 | + if (len < start) { |
| 68 | + start = len - 1; // we just use a single residue for the difference |
| 69 | + } |
| 70 | + |
| 71 | + int bad = 0; |
| 72 | + char chr1; |
| 73 | + char chr2; |
| 74 | + |
| 75 | + for (int i = start; i < len; i++) { |
| 76 | + |
| 77 | + chr1 = seq1.charAt(i); |
| 78 | + chr2 = seq2.charAt(i); |
| 79 | + |
| 80 | + if ('a' <= chr1 && chr1 <= 'z') { |
| 81 | + // TO UPPERCASE !!! |
| 82 | + // Faster than toUpperCase |
| 83 | + chr1 -= caseShift; |
| 84 | + } |
| 85 | + if ('a' <= chr2 && chr2 <= 'z') { |
| 86 | + // TO UPPERCASE !!! |
| 87 | + // Faster than toUpperCase |
| 88 | + chr2 -= caseShift; |
| 89 | + } |
| 90 | + |
| 91 | + if (chr1 != chr2 && !isGap(chr1) && !isGap(chr2)) { |
| 92 | + bad++; |
| 93 | + } |
| 94 | + } |
| 95 | + |
| 96 | + return ((float) 100 * (len - bad)) / len; |
| 97 | + } |
| 98 | + |
| 99 | + /** |
| 100 | + * Method that determines if a character means a gap in the alignment. |
| 101 | + * |
| 102 | + * @param c |
| 103 | + * gap character is one of the symbols in {' ','-','.'} |
| 104 | + * |
| 105 | + * @return true if it is a gap, false otherwise |
| 106 | + */ |
| 107 | + public static final boolean isGap(char c) { |
| 108 | + return (c == '-' || c == '.' || c == ' ') ? true : false; |
| 109 | + } |
114 | 110 |
|
115 | 111 | } |
0 commit comments