@@ -106,6 +106,8 @@ public UniprotProxySequenceReader(String accession, CompoundSet<C> compoundSet)
106106
107107 /**
108108 * The xml is passed in as a DOM object so we know everything about the protein.
109+ * Some uniprot records contain white space in the sequence. We must strip it out so setContents doesn't fail.
110+ * TODO add simmilar logic to the other constructors
109111 * If an error occurs throw an exception. We could have a bad uniprot id
110112 * @param document
111113 * @param compoundSet
@@ -149,6 +151,7 @@ public void setCompoundSet(CompoundSet<C> compoundSet) {
149151 public void setContents (String sequence ) throws CompoundNotFoundException {
150152 // Horrendously inefficient - pretty much the way the old BJ did things.
151153 // TODO Should be optimised.
154+ // NOTE This chokes on whitespace in the sequence, so whitespace is stripped by the caller
152155 this .sequence = sequence ;
153156 this .parsedCompounds .clear ();
154157 for (int i = 0 ; i < sequence .length ();) {
@@ -381,6 +384,7 @@ public ArrayList<String> getProteinAliases() throws XPathExpressionException {
381384 Element uniprotElement = uniprotDoc .getDocumentElement ();
382385 Element entryElement = XMLHelper .selectSingleElement (uniprotElement , "entry" );
383386 Element proteinElement = XMLHelper .selectSingleElement (entryElement , "protein" );
387+ // An alternativeName can contain multiple fullNames and multiple shortNames, so we are careful to catch them all
384388 ArrayList <Element > keyWordElementList = XMLHelper .selectElements (proteinElement , "alternativeName" );
385389 for (Element element : keyWordElementList ) {
386390 Element fullNameElement = XMLHelper .selectSingleElement (element , "fullName" );
@@ -395,6 +399,7 @@ public ArrayList<String> getProteinAliases() throws XPathExpressionException {
395399 }
396400 }
397401 }
402+ // recommendedName seems to allow only one fullName, to be on the safe side, we double check for multiple shortNames for the recommendedName
398403 keyWordElementList = XMLHelper .selectElements (proteinElement , "recommendedName" );
399404 for (Element element : keyWordElementList ) {
400405 Element fullNameElement = XMLHelper .selectSingleElement (element , "fullName" );
0 commit comments