1- # Copyright The IETF Trust 2010-2019 , All Rights Reserved
1+ # Copyright The IETF Trust 2010-2020 , All Rights Reserved
22# -*- coding: utf-8 -*-
33# Taken from http://code.google.com/p/soclone/source/browse/trunk/soclone/utils/html.py
44"""Utilities for working with HTML."""
88
99import bleach
1010import copy
11+ import html2text
1112import lxml .etree
1213import lxml .html
1314import lxml .html .clean
1415import six
1516
1617import debug # pyflakes:ignore
1718
19+ from django import forms
1820from django .utils .functional import keep_lazy
1921
22+ from ietf .utils .mime import get_mime_type
23+
2024acceptable_tags = ('a' , 'abbr' , 'acronym' , 'address' , 'b' , 'big' ,
2125 'blockquote' , 'body' , 'br' , 'caption' , 'center' , 'cite' , 'code' , 'col' ,
2226 'colgroup' , 'dd' , 'del' , 'dfn' , 'dir' , 'div' , 'dl' , 'dt' , 'em' , 'font' ,
@@ -76,3 +80,18 @@ def clean_html(self, html):
7680
7781def sanitize_document (html ):
7882 return lxml_cleaner .clean_html (html )
83+
84+
85+ # ----------------------------------------------------------------------
86+ # Text field cleaning
87+
88+ def clean_text_field (text ):
89+ mime_type , encoding = get_mime_type (text .encode ('utf8' ))
90+ if mime_type == 'text/html' : # or re.search(r'<\w+>', text):
91+ text = html2text .html2text (text )
92+ elif mime_type in ['text/plain' , 'application/x-empty' , ]:
93+ pass
94+ else :
95+ raise forms .ValidationError ("Unexpected text field mime type: %s" % mime_type )
96+ return text
97+
0 commit comments