Skip to content

Commit b5da364

Browse files
author
Steve Canny
committed
img: add StreamReader.read_str()
1 parent 51e4880 commit b5da364

File tree

3 files changed

+67
-4
lines changed

3 files changed

+67
-4
lines changed

docx/image/exceptions.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,12 @@ class InvalidImageStreamError(Exception):
77
"""
88

99

10+
class UnexpectedEndOfFileError(Exception):
11+
"""
12+
EOF was unexpectedly encountered while reading an image stream.
13+
"""
14+
15+
1016
class UnrecognizedImageError(Exception):
1117
"""
1218
The provided image stream could not be recognized.

docx/image/helpers.py

Lines changed: 35 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,13 @@
22

33
from __future__ import absolute_import, division, print_function
44

5+
from struct import Struct
56

6-
_BIG_ENDIAN = '>'
7-
_LITTLE_ENDIAN = '<'
7+
from .exceptions import UnexpectedEndOfFileError
8+
9+
10+
BIG_ENDIAN = '>'
11+
LITTLE_ENDIAN = '<'
812

913

1014
class StreamReader(object):
@@ -17,6 +21,34 @@ def __init__(self, stream, byte_order, base_offset=0):
1721
super(StreamReader, self).__init__()
1822
self._stream = stream
1923
self._byte_order = (
20-
_LITTLE_ENDIAN if byte_order == _LITTLE_ENDIAN else _BIG_ENDIAN
24+
LITTLE_ENDIAN if byte_order == LITTLE_ENDIAN else BIG_ENDIAN
2125
)
2226
self._base_offset = base_offset
27+
28+
def read_str(self, char_count, base, offset=0):
29+
"""
30+
Return a string containing the *char_count* bytes at the file
31+
position determined by self._base_offset + *base* + *offset*.
32+
"""
33+
def str_struct(char_count):
34+
format_ = '%ds' % char_count
35+
return Struct(format_)
36+
struct = str_struct(char_count)
37+
chars = self._unpack_item(struct, base, offset)
38+
unicode_str = chars.decode('UTF-8')
39+
return unicode_str
40+
41+
def seek(self, base, offset=0):
42+
location = self._base_offset + base + offset
43+
self._stream.seek(location)
44+
45+
def _read_bytes(self, byte_count, base, offset):
46+
self.seek(base, offset)
47+
bytes_ = self._stream.read(byte_count)
48+
if len(bytes_) < byte_count:
49+
raise UnexpectedEndOfFileError
50+
return bytes_
51+
52+
def _unpack_item(self, struct, base, offset):
53+
bytes_ = self._read_bytes(struct.size, base, offset)
54+
return struct.unpack(bytes_)[0]

tests/image/test_helpers.py

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,31 @@
66

77
from __future__ import absolute_import, print_function
88

9+
import pytest
10+
11+
from docx.compat import BytesIO
12+
from docx.image.exceptions import UnexpectedEndOfFileError
13+
from docx.image.helpers import BIG_ENDIAN, StreamReader
14+
915

1016
class DescribeStreamReader(object):
11-
pass
17+
18+
def it_can_read_a_string_of_specified_len_at_offset(
19+
self, read_str_fixture):
20+
stream_rdr, expected_string = read_str_fixture
21+
s = stream_rdr.read_str(6, 2)
22+
assert s == 'foobar'
23+
24+
def it_raises_on_unexpected_EOF(self, read_str_fixture):
25+
stream_rdr = read_str_fixture[0]
26+
with pytest.raises(UnexpectedEndOfFileError):
27+
stream_rdr.read_str(9, 2)
28+
29+
# fixtures -------------------------------------------------------
30+
31+
@pytest.fixture
32+
def read_str_fixture(self):
33+
stream = BytesIO(b'\x01\x02foobar\x03\x04')
34+
stream_rdr = StreamReader(stream, BIG_ENDIAN)
35+
expected_string = 'foobar'
36+
return stream_rdr, expected_string

0 commit comments

Comments
 (0)