view test/test_token.py @ 5108:67fad01d2009

issue2550653: xapian search, stemming is not working This is a partial fix for the issue. It does make stemming work (so searching for silent will also return docs with silently in them). However to do this we need to lowercase the text so the porter stemmer will work. This means capitalization is not preserved. Tests in test/test_indexer for xapian backend all pass. David Wolever (wolever) did the work.
author John Rouillard <rouilj@ieee.org>
date Mon, 27 Jun 2016 22:10:45 -0400
parents 364c54991861
children 6971c9249c6d
line wrap: on
line source

#
# Copyright (c) 2001 Richard Jones
# This module is free software, and you may redistribute it and/or modify
# under the same terms as Python, so long as this copyright message and
# disclaimer are retained in their original form.
#
# This module is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.

import unittest, time

from roundup.token import token_split

class TokenTestCase(unittest.TestCase):
    def testValid(self):
        l = token_split('hello world')
        self.assertEqual(l, ['hello', 'world'])

    def testIgnoreExtraSpace(self):
        l = token_split('hello  world ')
        self.assertEqual(l, ['hello', 'world'])

    def testQuoting(self):
        l = token_split('"hello world"')
        self.assertEqual(l, ['hello world'])
        l = token_split("'hello world'")
        self.assertEqual(l, ['hello world'])

    def testEmbedQuote(self):
        l = token_split(r'Roch\'e Compaan')
        self.assertEqual(l, ["Roch'e", "Compaan"])
        l = token_split('address="1 2 3"')
        self.assertEqual(l, ['address=1 2 3'])

    def testEscaping(self):
        l = token_split('"Roch\'e" Compaan')
        self.assertEqual(l, ["Roch'e", "Compaan"])
        l = token_split(r'hello\ world')
        self.assertEqual(l, ['hello world'])
        l = token_split(r'\\')
        self.assertEqual(l, ['\\'])
        l = token_split(r'\n')
        self.assertEqual(l, ['\n'])

    def testBadQuote(self):
        self.assertRaises(ValueError, token_split, '"hello world')
        self.assertRaises(ValueError, token_split, "Roch'e Compaan")

# vim: set filetype=python ts=4 sw=4 et si

Roundup Issue Tracker: http://roundup-tracker.org/