Skip to content

Commit 856bebd

Browse files
author
schallee@darkmist.net
committed
Big nasty fix for issue 46.
This introduces a HashMap based Trie to allow longest matching on entities with associated test cases. This may be useful in other places as well. It also introduces some simple utils for doing null safe equals, hashcode, etc...
1 parent beaf870 commit 856bebd

5 files changed

Lines changed: 998 additions & 17 deletions

File tree

src/main/java/org/owasp/esapi/codecs/HTMLEntityCodec.java

Lines changed: 29 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
package org.owasp.esapi.codecs;
1717

1818
import java.util.HashMap;
19+
import java.util.Map;
1920

2021
/**
2122
* Implementation of the Codec interface for HTML entity encoding.
@@ -29,7 +30,7 @@ public class HTMLEntityCodec extends Codec {
2930

3031
private static HashMap<Character,String> characterToEntityMap;
3132

32-
private static HashMap<String,Character> entityToCharacterMap;
33+
private static HashTrie<Character> entityToCharacterMap;
3334

3435
static {
3536
initializeMaps();
@@ -240,21 +241,32 @@ private Character parseHex( PushbackString input ) {
240241
* Returns the decoded version of the character starting at index, or null if no decoding is possible.
241242
*/
242243
private Character getNamedEntity( PushbackString input ) {
243-
// search through the rest of the string up to 6 characters
244244
StringBuilder possible = new StringBuilder();
245-
int len = Math.min( input.remainder().length(), 7 );
246-
for ( int i=0; i<len; i++ ) {
247-
possible.append( Character.toLowerCase(input.next().charValue()) );
248-
Character entity = (Character) entityToCharacterMap.get(possible.toString());
249-
if ( entity != null ) {
250-
// eat any trailing semicolons
251-
if ( input.peek( ';') ) {
252-
input.next();
253-
}
254-
return entity;
255-
}
256-
}
257-
return null;
245+
Map.Entry<CharSequence,Character> entry;
246+
int len;
247+
248+
// kludge around PushbackString....
249+
len = Math.min(input.remainder().length(), entityToCharacterMap.getMaxKeyLength());
250+
for(int i=0;i<len;i++)
251+
possible.append(Character.toLowerCase(input.next()));
252+
253+
// look up the longest match
254+
entry = entityToCharacterMap.getLongestMatch(possible);
255+
if(entry == null)
256+
return null; // no match, caller will reset input
257+
258+
// fixup input
259+
input.reset();
260+
input.next(); // read &
261+
len = entry.getKey().length(); // what matched's length
262+
for(int i=0;i<len;i++)
263+
input.next();
264+
265+
// check for a trailing semicolen
266+
if(input.peek(';'))
267+
input.next();
268+
269+
return entry.getValue();
258270
}
259271

260272
/**
@@ -769,7 +781,7 @@ private static void initializeMaps() {
769781
/* &hearts; : black heart suit */, 9830
770782
/* &diams; : black diamond suit */, };
771783
characterToEntityMap = new HashMap<Character,String>(entityNames.length);
772-
entityToCharacterMap = new HashMap<String,Character>(entityValues.length);
784+
entityToCharacterMap = new HashTrie<Character>();
773785
for (int i = 0; i < entityNames.length; i++) {
774786
String e = entityNames[i];
775787
Character c = entityValues[i];
@@ -778,4 +790,4 @@ private static void initializeMaps() {
778790
}
779791
}
780792

781-
}
793+
}

0 commit comments

Comments
 (0)