view roundup/cgi/accept_language.py @ 6593:e70e2789bc2c

issue2551189 - increase text search maxlength This removes I think all the magic references to 25 and 30 (varchar size) and replaces them with references to maxlength or maxlength+5. I am not sure why the db column is 5 characters larger than the size of what should be the max size of a word, but I'll keep the buffer of 5 as making it 1/5 the size of maxlength makes less sense. Also added tests for fts search in templating which were missing. Added postgres, mysql and sqlite native indexing backends in which to test fts. Added fts test to native-fts as well to make sure it's working. I want to commit this now for CI. Todo: add test cases for the use of FTS in the csv output in actions.py. There is no test coverage of the match case there. change maxlength to a higher value (50) as requested in the ticket. Modify existing extremewords test cases to allow words > 25 and < 51 write code to migrate column sizes for mysql and postgresql to match maxlength I will roll this into the version 7 schema update that supports use of database fts support.
author John Rouillard <rouilj@ieee.org>
date Tue, 25 Jan 2022 13:22:00 -0500
parents 3b945aee0919
children 63c9680eed20
line wrap: on
line source

"""Parse the Accept-Language header as defined in RFC2616.

See http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.4
for details.  This module should follow the spec.
Author: Hernan M. Foffani (hfoffani@gmail.com)
Some use samples:

>>> parse("da, en-gb;q=0.8, en;q=0.7")
['da', 'en_gb', 'en']
>>> parse("en;q=0.2, fr;q=1")
['fr', 'en']
>>> parse("zn; q = 0.2 ,pt-br;q =1")
['pt_br', 'zn']
>>> parse("es-AR")
['es_AR']
>>> parse("es-es-cat")
['es_es_cat']
>>> parse("")
[]
>>> parse(None)
[]
>>> parse("   ")
[]
>>> parse("en,")
['en']
"""

import re
import heapq

# regexp for languange-range search
nqlre = "([A-Za-z]+[-[A-Za-z]+]*)$"
# regexp for languange-range search with quality value
qlre = r"([A-Za-z]+[-[A-Za-z]+]*);q=([\d\.]+)"
# both
lre = re.compile(nqlre + "|" + qlre)

whitespace = ' \t\n\r\v\f'
try:
    # Python 3.
    remove_ws = (str.maketrans('', '', whitespace),)
except AttributeError:
    # Python 2.
    remove_ws = (None, whitespace)


def parse(language_header):
    """parse(string_with_accept_header_content) -> languages list"""

    if language_header is None: return []

    # strip whitespaces.
    lh = language_header.translate(*remove_ws)

    # if nothing, return
    if lh == "": return []

    # split by commas and parse the quality values.
    pls = [lre.findall(x) for x in lh.split(',')]

    # drop uncomformant
    qls = [x[0] for x in pls if len(x) > 0]

    # use a heap queue to sort by quality values.
    # the value of each item is 1.0 complement.
    pq = []
    order=0
    for l in qls:
        order +=1
        if l[0] != '':
            heapq.heappush(pq, (0.0, order, l[0]))
        else:
            heapq.heappush(pq, (1.0-float(l[2]), order, l[1]))

    # get the languages ordered by quality
    # and replace - by _
    return [ heapq.heappop(pq)[2].replace('-','_') 
             for x in range(len(pq)) ]

if __name__ == "__main__":
    import doctest
    doctest.testmod()

# vim: set et sts=4 sw=4 :

Roundup Issue Tracker: http://roundup-tracker.org/