-
Notifications
You must be signed in to change notification settings - Fork 311
Expand file tree
/
Copy pathmagicencode.py
More file actions
296 lines (241 loc) · 10.7 KB
/
magicencode.py
File metadata and controls
296 lines (241 loc) · 10.7 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
#!/usr/bin/python
# -*- coding: utf-8 -*-
""" Magic Encode
This module tries to convert an UTF-8 string to an encoded string for the printer.
It uses trial and error in order to guess the right codepage.
The code is based on the encoding-code in py-xml-escpos by @fvdsn.
:author: `Patrick Kanzler <dev@pkanzler.de>`_
:organization: `python-escpos <https://github.com/python-escpos>`_
:copyright: Copyright (c) 2016 Patrick Kanzler and Frédéric van der Essen
:license: MIT
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals
from builtins import bytes
from .constants import CODEPAGE_CHANGE
from .exceptions import Error
from .codepages import CodePages
import six
class Encoder(object):
"""Takes a list of available code spaces. Picks the right one for a
given character.
Note: To determine the code page, it needs to do the conversion, and
thus already knows what the final byte in the target encoding would
be. Nevertheless, the API of this class doesn't return the byte.
The caller use to do the character conversion itself.
$ python -m timeit -s "{u'ö':'a'}.get(u'ö')"
100000000 loops, best of 3: 0.0133 usec per loop
$ python -m timeit -s "u'ö'.encode('latin1')"
100000000 loops, best of 3: 0.0141 usec per loop
"""
def __init__(self, codepage_map):
self.codepages = codepage_map
self.available_encodings = set(codepage_map.keys())
self.available_characters = {}
self.used_encodings = set()
def get_sequence(self, encoding):
return int(self.codepages[encoding])
def get_encoding_name(self, encoding):
"""Given an encoding provided by the user, will return a
canonical encoding name; and also validate that the encoding
is supported.
TODO: Support encoding aliases: pc437 instead of cp437.
"""
encoding = CodePages.get_encoding_name(encoding)
if encoding not in self.codepages:
raise ValueError((
'Encoding "{}" cannot be used for the current profile. '
'Valid encodings are: {}'
).format(encoding, ','.join(self.codepages.keys())))
return encoding
@staticmethod
def _get_codepage_char_list(encoding):
"""Get codepage character list
Gets characters 128-255 for a given code page, as an array.
:param encoding: The name of the encoding. This must appear in the CodePage list
"""
codepage = CodePages.get_encoding(encoding)
if 'data' in codepage:
encodable_chars = list("".join(codepage['data']))
assert(len(encodable_chars) == 128)
return encodable_chars
elif 'python_encode' in codepage:
encodable_chars = [u" "] * 128
for i in range(0, 128):
codepoint = i + 128
try:
encodable_chars[i] = bytes([codepoint]).decode(codepage['python_encode'])
except UnicodeDecodeError:
# Non-encodable character, just skip it
pass
return encodable_chars
raise LookupError("Can't find a known encoding for {}".format(encoding))
def _get_codepage_char_map(self, encoding):
""" Get codepage character map
Process an encoding and return a map of UTF-characters to code points
in this encoding.
This is generated once only, and returned from a cache.
:param encoding: The name of the encoding.
"""
# Skip things that were loaded previously
if encoding in self.available_characters:
return self.available_characters[encoding]
codepage_char_list = self._get_codepage_char_list(encoding)
codepage_char_map = dict((utf8, i + 128) for (i, utf8) in enumerate(codepage_char_list))
self.available_characters[encoding] = codepage_char_map
return codepage_char_map
def can_encode(self, encoding, char):
"""Determine if a character is encodeable in the given code page.
:param encoding: The name of the encoding.
:param char: The character to attempt to encode.
"""
available_map = {}
try:
available_map = self._get_codepage_char_map(encoding)
except LookupError:
return False
# Decide whether this character is encodeable in this code page
is_ascii = ord(char) < 128
is_encodable = char in available_map
return is_ascii or is_encodable
@staticmethod
def _encode_char(char, charmap, defaultchar):
""" Encode a single character with the given encoding map
:param char: char to encode
:param charmap: dictionary for mapping characters in this code page
"""
if ord(char) < 128:
return ord(char)
if char in charmap:
return charmap[char]
return ord(defaultchar)
def encode(self, text, encoding, defaultchar='?'):
""" Encode text under the given encoding
:param text: Text to encode
:param encoding: Encoding name to use (must be defined in capabilities)
:param defaultchar: Fallback for non-encodable characters
"""
codepage_char_map = self._get_codepage_char_map(encoding)
output_bytes = bytes([self._encode_char(char, codepage_char_map, defaultchar) for char in text])
return output_bytes
def __encoding_sort_func(self, item):
key, index = item
return (
key in self.used_encodings,
index
)
def find_suitable_encoding(self, char):
"""The order of our search is a specific one:
1. code pages that we already tried before; there is a good
chance they might work again, reducing the search space,
and by re-using already used encodings we might also
reduce the number of codepage change instructiosn we have
to send. Still, any performance gains will presumably be
fairly minor.
2. code pages in lower ESCPOS slots first. Presumably, they
are more likely to be supported, so if a printer profile
is missing or incomplete, we might increase our change
that the code page we pick for this character is actually
supported.
"""
sorted_encodings = sorted(
self.codepages.items(),
key=self.__encoding_sort_func)
for encoding, _ in sorted_encodings:
if self.can_encode(encoding, char):
# This encoding worked; at it to the set of used ones.
self.used_encodings.add(encoding)
return encoding
def split_writable_text(encoder, text, encoding):
"""Splits off as many characters from the begnning of text as
are writable with "encoding". Returns a 2-tuple (writable, rest).
"""
if not encoding:
return None, text
for idx, char in enumerate(text):
if encoder.can_encode(encoding, char):
continue
return text[:idx], text[idx:]
return text, None
class MagicEncode(object):
"""A helper that helps us to automatically switch to the right
code page to encode any given Unicode character.
This will consider the printers supported codepages, according
to the printer profile, and if a character cannot be encoded
with the current profile, it will attempt to find a suitable one.
If the printer does not support a suitable code page, it can
insert an error character.
"""
def __init__(self, driver, encoding=None, disabled=False,
defaultsymbol='?', encoder=None):
"""
:param driver:
:param encoding: If you know the current encoding of the printer
when initializing this class, set it here. If the current
encoding is unknown, the first character emitted will be a
codepage switch.
:param disabled:
:param defaultsymbol:
:param encoder:
"""
if disabled and not encoding:
raise Error('If you disable magic encode, you need to define an encoding!')
self.driver = driver
self.encoder = encoder or Encoder(driver.profile.get_code_pages())
self.encoding = self.encoder.get_encoding_name(encoding) if encoding else None
self.defaultsymbol = defaultsymbol
self.disabled = disabled
def force_encoding(self, encoding):
"""Sets a fixed encoding. The change is emitted right away.
From now one, this buffer will switch the code page anymore.
However, it will still keep track of the current code page.
"""
if not encoding:
self.disabled = False
else:
self.write_with_encoding(encoding, None)
self.disabled = True
def write(self, text):
"""Write the text, automatically switching encodings.
"""
if self.disabled:
self.write_with_encoding(self.encoding, text)
return
# See how far we can go into the text with the current encoding
to_write, text = split_writable_text(self.encoder, text, self.encoding)
if to_write:
self.write_with_encoding(self.encoding, to_write)
while text:
# See if any of the code pages that the printer profile
# supports can encode this character.
encoding = self.encoder.find_suitable_encoding(text[0])
if not encoding:
self._handle_character_failed(text[0])
text = text[1:]
continue
# Write as much text as possible with the encoding found.
to_write, text = split_writable_text(self.encoder, text, encoding)
if to_write:
self.write_with_encoding(encoding, to_write)
def _handle_character_failed(self, char):
"""Called when no codepage was found to render a character.
"""
# Writing the default symbol via write() allows us to avoid
# unnecesary codepage switches.
self.write(self.defaultsymbol)
def write_with_encoding(self, encoding, text):
if text is not None and type(text) is not six.text_type:
raise Error("The supplied text has to be unicode, but is of type {type}.".format(
type=type(text)
))
# We always know the current code page; if the new codepage
# is different, emit a change command.
if encoding != self.encoding:
self.encoding = encoding
self.driver._raw(
CODEPAGE_CHANGE +
six.int2byte(self.encoder.get_sequence(encoding)))
if text:
self.driver._raw(self.encoder.encode(text, encoding))