aimacode · darius · Apr 13, 2017 · Apr 13, 2017
diff --git a/tests/test_nlp.py b/tests/test_nlp.py
@@ -1,12 +1,16 @@
 import pytest
 import nlp
+
 from nlp import loadPageHTML, stripRawHTML, findOutlinks, onlyWikipediaURLS
 from nlp import expand_pages, relevant_pages, normalize, ConvergenceDetector, getInlinks
 from nlp import getOutlinks, Page
 from nlp import Rules, Lexicon
 # Clumsy imports because we want to access certain nlp.py globals explicitly, because
 # they are accessed by function's within nlp.py
 
+from unittest.mock import patch
+from io import BytesIO
+
 
 def test_rules():
     assert Rules(A="B C | D E") == {'A': [['B', 'C'], ['D', 'E']]}
@@ -27,6 +31,19 @@ def test_lexicon():
             < href="/wiki/TestThing" > href="/wiki/TestBoy"
             href="/wiki/TestLiving" href="/wiki/TestMan" >"""
 testHTML2 = "Nothing"
+testHTML3 = """
+            <!DOCTYPE html>
+            <html>
+            <head>
+            <title>Page Title</title>
+            </head>
+            <body>
+
+            <p>AIMA book</p>
+
+            </body>
+            </html>
+            """
 
 pA = Page("A", 1, 6, ["B", "C", "E"], ["D"])
 pB = Page("B", 2, 5, ["E"], ["A", "C", "D"])
@@ -52,12 +69,14 @@ def test_lexicon():
 #     assert all(loadedPages.get(key,"") != "" for key in addresses)
 
 
-def test_stripRawHTML():
+@patch('urllib.request.urlopen', return_value=BytesIO(testHTML3.encode()))
+def test_stripRawHTML(html_mock):
     addr = "https://en.wikipedia.org/wiki/Ethics"
     aPage = loadPageHTML([addr])
     someHTML = aPage[addr]
     strippedHTML = stripRawHTML(someHTML)
     assert "<head>" not in strippedHTML and "</head>" not in strippedHTML
+    assert "AIMA book" in someHTML and "AIMA book" in strippedHTML
 
 
 def test_determineInlinks():