Skip to content

Commit 2cc65e1

Browse files
committed
Update UniprotProxySequenceReader.java
Added some comments to document the changes
1 parent 1163074 commit 2cc65e1

1 file changed

Lines changed: 5 additions & 0 deletions

File tree

biojava-core/src/main/java/org/biojava/nbio/core/sequence/loader/UniprotProxySequenceReader.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,8 @@ public UniprotProxySequenceReader(String accession, CompoundSet<C> compoundSet)
106106

107107
/**
108108
* The xml is passed in as a DOM object so we know everything about the protein.
109+
* Some uniprot records contain white space in the sequence. We must strip it out so setContents doesn't fail.
110+
* TODO add simmilar logic to the other constructors
109111
* If an error occurs throw an exception. We could have a bad uniprot id
110112
* @param document
111113
* @param compoundSet
@@ -149,6 +151,7 @@ public void setCompoundSet(CompoundSet<C> compoundSet) {
149151
public void setContents(String sequence) throws CompoundNotFoundException {
150152
// Horrendously inefficient - pretty much the way the old BJ did things.
151153
// TODO Should be optimised.
154+
// NOTE This chokes on whitespace in the sequence, so whitespace is stripped by the caller
152155
this.sequence = sequence;
153156
this.parsedCompounds.clear();
154157
for (int i = 0; i < sequence.length();) {
@@ -381,6 +384,7 @@ public ArrayList<String> getProteinAliases() throws XPathExpressionException {
381384
Element uniprotElement = uniprotDoc.getDocumentElement();
382385
Element entryElement = XMLHelper.selectSingleElement(uniprotElement, "entry");
383386
Element proteinElement = XMLHelper.selectSingleElement(entryElement, "protein");
387+
// An alternativeName can contain multiple fullNames and multiple shortNames, so we are careful to catch them all
384388
ArrayList<Element> keyWordElementList = XMLHelper.selectElements(proteinElement, "alternativeName");
385389
for (Element element : keyWordElementList) {
386390
Element fullNameElement = XMLHelper.selectSingleElement(element, "fullName");
@@ -395,6 +399,7 @@ public ArrayList<String> getProteinAliases() throws XPathExpressionException {
395399
}
396400
}
397401
}
402+
// recommendedName seems to allow only one fullName, to be on the safe side, we double check for multiple shortNames for the recommendedName
398403
keyWordElementList = XMLHelper.selectElements(proteinElement, "recommendedName");
399404
for (Element element : keyWordElementList) {
400405
Element fullNameElement = XMLHelper.selectSingleElement(element, "fullName");

0 commit comments

Comments
 (0)