Skip to content

Commit 17610b0

Browse files
author
Steve Canny
committed
img: add _MarkerFinder.next()
1 parent 2b14ad4 commit 17610b0

File tree

3 files changed

+84
-1
lines changed

3 files changed

+84
-1
lines changed

docx/image/helpers.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,12 @@ def __init__(self, stream, byte_order, base_offset=0):
2525
)
2626
self._base_offset = base_offset
2727

28+
def read(self, count):
29+
"""
30+
Allow pass-through read() call
31+
"""
32+
return self._stream.read(count)
33+
2834
def read_byte(self, base=None, offset=0):
2935
"""
3036
Return the int value of the byte at the file position defined by
@@ -61,6 +67,12 @@ def seek(self, base, offset=0):
6167
location = self._base_offset + base + offset
6268
self._stream.seek(location)
6369

70+
def tell(self):
71+
"""
72+
Allow pass-through tell() call
73+
"""
74+
return self._stream.tell()
75+
6476
def _read_bytes(self, byte_count, base, offset):
6577
self.seek(base, offset)
6678
bytes_ = self._stream.read(byte_count)

docx/image/jpeg.py

Lines changed: 50 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -158,7 +158,56 @@ def next(self, start):
158158
following the 2-byte marker code, the start of the marker segment,
159159
for those markers that have a segment.
160160
"""
161-
raise NotImplementedError
161+
position = start
162+
while True:
163+
# skip over any non-\xFF bytes
164+
position = self._offset_of_next_ff_byte(start=position)
165+
# skip over any \xFF padding bytes
166+
position, byte_ = self._next_non_ff_byte(start=position+1)
167+
# 'FF 00' sequence is not a marker, start over if found
168+
if byte_ == b'\x00':
169+
continue
170+
# this is a marker, gather return values and break out of scan
171+
marker_code, segment_offset = byte_, position+1
172+
break
173+
return marker_code, segment_offset
174+
175+
def _next_non_ff_byte(self, start):
176+
"""
177+
Return an offset, byte 2-tuple for the next byte in *stream* that is
178+
not '\xFF', starting with the byte at offset *start*. If the byte at
179+
offset *start* is not '\xFF', *start* and the returned *offset* will
180+
be the same.
181+
"""
182+
self._stream.seek(start)
183+
byte_ = self._read_byte()
184+
while byte_ == b'\xFF':
185+
byte_ = self._read_byte()
186+
offset_of_non_ff_byte = self._stream.tell() - 1
187+
return offset_of_non_ff_byte, byte_
188+
189+
def _offset_of_next_ff_byte(self, start):
190+
"""
191+
Return the offset of the next '\xFF' byte in *stream* starting with
192+
the byte at offset *start*. Returns *start* if the byte at that
193+
offset is a hex 255; it does not necessarily advance in the stream.
194+
"""
195+
self._stream.seek(start)
196+
byte_ = self._read_byte()
197+
while byte_ != b'\xFF':
198+
byte_ = self._read_byte()
199+
offset_of_ff_byte = self._stream.tell() - 1
200+
return offset_of_ff_byte
201+
202+
def _read_byte(self):
203+
"""
204+
Return the next byte read from stream. Raise Exception if stream is
205+
at end of file.
206+
"""
207+
byte_ = self._stream.read(1)
208+
if not byte_:
209+
raise Exception('unexpected end of file')
210+
return byte_
162211

163212

164213
def _MarkerFactory(marker_code, stream, offset):

tests/image/test_jpeg.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -159,6 +159,11 @@ def it_can_construct_from_a_stream(self, from_stream_fixture):
159159
_MarkerFinder__init_.assert_called_once_with(stream_)
160160
assert isinstance(marker_finder, _MarkerFinder)
161161

162+
def it_can_find_the_next_marker_after_a_given_offset(self, next_fixture):
163+
marker_finder, start, expected_code_and_offset = next_fixture
164+
marker_code, segment_offset = marker_finder.next(start)
165+
assert (marker_code, segment_offset) == expected_code_and_offset
166+
162167
# fixtures -------------------------------------------------------
163168

164169
@pytest.fixture
@@ -169,6 +174,23 @@ def from_stream_fixture(self, stream_, _MarkerFinder__init_):
169174
def _MarkerFinder__init_(self, request):
170175
return initializer_mock(request, _MarkerFinder)
171176

177+
@pytest.fixture(params=[
178+
(0, JPEG_MARKER_CODE.SOI, 2),
179+
(1, JPEG_MARKER_CODE.APP0, 4),
180+
(2, JPEG_MARKER_CODE.APP0, 4),
181+
(3, JPEG_MARKER_CODE.EOI, 12),
182+
(4, JPEG_MARKER_CODE.EOI, 12),
183+
(6, JPEG_MARKER_CODE.EOI, 12),
184+
(8, JPEG_MARKER_CODE.EOI, 12),
185+
])
186+
def next_fixture(self, request):
187+
start, marker_code, segment_offset = request.param
188+
bytes_ = b'\xFF\xD8\xFF\xE0\x00\x01\xFF\x00\xFF\xFF\xFF\xD9'
189+
stream_reader = StreamReader(BytesIO(bytes_), BIG_ENDIAN)
190+
marker_finder = _MarkerFinder(stream_reader)
191+
expected_code_and_offset = (marker_code, segment_offset)
192+
return marker_finder, start, expected_code_and_offset
193+
172194
@pytest.fixture
173195
def stream_(self, request):
174196
return instance_mock(request, BytesIO)

0 commit comments

Comments
 (0)