Skip to content

Commit 08e1b3e

Browse files
committed
Move ServletUtils.sanitizeForHTML to a new generic HTMLUtils class.
1 parent 2c25e9a commit 08e1b3e

File tree

11 files changed

+97
-32
lines changed

11 files changed

+97
-32
lines changed

src/org/wikipedia/HTMLUtils.java

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
/**
2+
* @(#)HTMLUtils.java 0.01 26/10/2025
3+
* Copyright (C) 2025 - 20xx MER-C
4+
*
5+
* This program is free software; you can redistribute it and/or
6+
* modify it under the terms of the GNU General Public License
7+
* as published by the Free Software Foundation; either version 3
8+
* of the License, or (at your option) any later version. Additionally
9+
* this file is subject to the "Classpath" exception.
10+
*
11+
* This program is distributed in the hope that it will be useful,
12+
* but WITHOUT ANY WARRANTY; without even the implied warranty of
13+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14+
* GNU General Public License for more details.
15+
*
16+
* You should have received a copy of the GNU General Public License
17+
* along with this program; if not, write to the Free Software Foundation,
18+
* Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
19+
*/
20+
21+
package org.wikipedia;
22+
23+
/**
24+
* Utility methods for generating and parsing HTML that don't belong in any of
25+
* the specialist utility classes.
26+
* @author MER-C
27+
* @version 0.01
28+
*/
29+
public class HTMLUtils
30+
{
31+
/**
32+
* Sanitizes untrusted input for XSS destined for inclusion in the HTML
33+
* body.
34+
* @param input an input string
35+
* @see <a href="https://www.owasp.org/index.php/XSS_Prevention">OWASP XSS
36+
* Prevention Cheat Sheet Rule 1</a>
37+
* @return the sanitized input or the empty string if input is null
38+
*/
39+
public static String sanitizeForHTML(String input)
40+
{
41+
if (input == null)
42+
return "";
43+
return input.replaceAll("&", "&amp;")
44+
.replaceAll("<", "&lt;")
45+
.replaceAll(">", "&gt;")
46+
.replaceAll("'", "&#x27;")
47+
.replaceAll("\"", "&quot;")
48+
.replaceAll("/", "&#x2F;");
49+
}
50+
}

src/org/wikipedia/WikitextUtils.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/**
2-
* @(#)ParserUtils.java 0.02 23/12/2016
2+
* @(#)WikitextUtils.java 0.02 23/12/2016
33
* Copyright (C) 2012-2018 MER-C
44
*
55
* This program is free software; you can redistribute it and/or

src/org/wikipedia/servlets/ServletUtils.java

Lines changed: 0 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -35,24 +35,6 @@
3535
*/
3636
public class ServletUtils
3737
{
38-
/**
39-
* Sanitizes untrusted input for XSS destined for inclusion in the HTML
40-
* body.
41-
* @param input an input string
42-
* @see <a href="https://www.owasp.org/index.php/XSS_Prevention">OWASP XSS
43-
* Prevention Cheat Sheet Rule 1</a>
44-
* @return the sanitized input or the empty string if input is null
45-
*/
46-
public static String sanitizeForHTML(String input)
47-
{
48-
if (input == null)
49-
return "";
50-
return input.replaceAll("&", "&amp;")
51-
.replaceAll("<", "&lt;").replaceAll(">", "&gt;")
52-
.replaceAll("'", "&#x27;").replaceAll("\"", "&quot;")
53-
.replaceAll("/", "&#x2F;");
54-
}
55-
5638
/**
5739
* Sanitizes untrusted input for XSS destined for inclusion in boring
5840
* HTML attributes.

src/org/wikipedia/servlets/contributionsurveyor.jsp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@
4545
{
4646
List<String> catmembers = wiki.getCategoryMembers(category, Wiki.USER_NAMESPACE);
4747
if (catmembers.isEmpty())
48-
request.setAttribute("error", "Category \"" + ServletUtils.sanitizeForHTML(category) + "\" contains no users!");
48+
request.setAttribute("error", "Category \"" + HTMLUtils.sanitizeForHTML(category) + "\" contains no users!");
4949
else
5050
for (String tempstring : catmembers)
5151
users.add(wiki.removeNamespace(tempstring));

src/org/wikipedia/servlets/editorintersection.jsp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
1515
String wikiparam = ServletUtils.sanitizeForAttributeOrDefault(request.getParameter("wiki"), "en.wikipedia.org");
1616
String mode = Objects.requireNonNullElse(request.getParameter("mode"), "none");
17-
String pages = ServletUtils.sanitizeForHTML(request.getParameter("pages"));
17+
String pages = HTMLUtils.sanitizeForHTML(request.getParameter("pages"));
1818
String category = ServletUtils.sanitizeForAttribute(request.getParameter("category"));
1919
String user = ServletUtils.sanitizeForAttribute(request.getParameter("user"));
2020

src/org/wikipedia/servlets/imagecci.jsp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,6 @@ href="//en.wikipedia.org/wiki/WP:CCI">Contributor copyright investigations.</a>
7474

7575
<%
7676
if (user != null && survey.isEmpty())
77-
request.setAttribute("error", "ERROR: User " + ServletUtils.sanitizeForHTML(user) + " does not exist!");
77+
request.setAttribute("error", "ERROR: User " + HTMLUtils.sanitizeForHTML(user) + " does not exist!");
7878
%>
7979
<%@ include file="footer.jspf" %>

src/org/wikipedia/servlets/masslinksearch.jsp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
String inputdomains = request.getParameter("domains");
1717
if (inputdomains != null)
1818
{
19-
inputdomains = ServletUtils.sanitizeForHTML(inputdomains).trim().toLowerCase()
19+
inputdomains = HTMLUtils.sanitizeForHTML(inputdomains).trim().toLowerCase()
2020
// \\bexample\\.com\\b to example.com
2121
.replace("\\b", "").replace("\\.", ".")
2222
// *{{LinkSummary|example.com}} to example.com

src/org/wikipedia/servlets/userwatchlist.jsp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,7 @@ Someone # Spam
107107
String text = enWiki.getPageText(List.of(inputpage)).get(0);
108108
if (text == null)
109109
{
110-
request.setAttribute("error", "ERROR: page &quot;" + ServletUtils.sanitizeForHTML(inputpage) + "&quot; does not exist!");
110+
request.setAttribute("error", "ERROR: page &quot;" + HTMLUtils.sanitizeForHTML(inputpage) + "&quot; does not exist!");
111111
%>
112112
<%@ include file="footer.jspf" %>
113113
<%
@@ -166,7 +166,7 @@ Someone # Spam
166166
for (int i = 0; i < userstofetch.size(); i++)
167167
{
168168
String user = userstofetch.get(i);
169-
String reason = ServletUtils.sanitizeForHTML(input.get(user));
169+
String reason = HTMLUtils.sanitizeForHTML(input.get(user));
170170
// user links
171171
%>
172172
<h3><%= user %></h3>
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
/**
2+
* @(#)HTMLUtilsTest.java 0.01 26/10/2025
3+
* Copyright (C) 2025 - 20xx MER-C
4+
*
5+
* This program is free software; you can redistribute it and/or
6+
* modify it under the terms of the GNU General Public License
7+
* as published by the Free Software Foundation; either version 3
8+
* of the License, or (at your option) any later version. Additionally
9+
* this file is subject to the "Classpath" exception.
10+
*
11+
* This program is distributed in the hope that it will be useful,
12+
* but WITHOUT ANY WARRANTY; without even the implied warranty of
13+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14+
* GNU General Public License for more details.
15+
*
16+
* You should have received a copy of the GNU General Public License
17+
* along with this program; if not, write to the Free Software Foundation,
18+
* Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
19+
*/
20+
21+
package org.wikipedia;
22+
23+
import org.junit.jupiter.api.Test;
24+
import static org.junit.jupiter.api.Assertions.*;
25+
26+
/**
27+
* Unit tests for org.wikipedia.HTMLUtils
28+
* @author MER-C
29+
*/
30+
public class HTMLUtilsTest
31+
{
32+
@Test
33+
public void sanitizeForHTML()
34+
{
35+
assertEquals("", HTMLUtils.sanitizeForHTML(null));
36+
assertEquals("&lt;p&gt;&quot;&#x27;Test&amp;123&#x27;&quot;&lt;&#x2F;p&gt;",
37+
HTMLUtils.sanitizeForHTML("<p>\"'Test&123'\"</p>"));
38+
}
39+
}

test/org/wikipedia/WikitextUtilsTest.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
import static org.junit.jupiter.api.Assertions.*;
2727

2828
/**
29-
* Unit tests for org.wikipedia.ParserUtils
29+
* Unit tests for org.wikipedia.WikitextUtils
3030
* @author MER-C
3131
*/
3232
public class WikitextUtilsTest

0 commit comments

Comments
 (0)