Skip to content

Commit 5a8e12f

Browse files
jerjouJon Wayne Parrott
authored andcommitted
Use pyaudio's built-in async api. (GoogleCloudPlatform#645)
1 parent d7c851e commit 5a8e12f

File tree

2 files changed

+51
-57
lines changed

2 files changed

+51
-57
lines changed

speech/grpc/transcribe_streaming.py

Lines changed: 32 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -17,10 +17,10 @@
1717
from __future__ import division
1818

1919
import contextlib
20+
import functools
2021
import re
2122
import signal
2223
import sys
23-
import threading
2424

2525
from google.cloud import credentials
2626
from google.cloud.speech.v1beta1 import cloud_speech_pb2 as cloud_speech
@@ -76,8 +76,7 @@ def _audio_data_generator(buff):
7676
stop = False
7777
while not stop:
7878
# Use a blocking get() to ensure there's at least one chunk of data.
79-
chunk = buff.get()
80-
data = [chunk]
79+
data = [buff.get()]
8180

8281
# Now consume whatever other data's still buffered.
8382
while True:
@@ -86,54 +85,47 @@ def _audio_data_generator(buff):
8685
except queue.Empty:
8786
break
8887

89-
# If `_fill_buffer` adds `None` to the buffer, the audio stream is
90-
# closed. Yield the final bit of the buffer and exit the loop.
88+
# `None` in the buffer signals that the audio stream is closed. Yield
89+
# the final bit of the buffer and exit the loop.
9190
if None in data:
9291
stop = True
9392
data.remove(None)
93+
9494
yield b''.join(data)
9595

9696

97-
def _fill_buffer(audio_stream, buff, chunk, stoprequest):
97+
def _fill_buffer(buff, in_data, frame_count, time_info, status_flags):
9898
"""Continuously collect data from the audio stream, into the buffer."""
99-
try:
100-
while not stoprequest.is_set():
101-
buff.put(audio_stream.read(chunk))
102-
except IOError:
103-
pass
104-
finally:
105-
# Add `None` to the buff, indicating that a stop request is made.
106-
# This will signal `_audio_data_generator` to exit.
107-
buff.put(None)
99+
buff.put(in_data)
100+
return None, pyaudio.paContinue
108101

109102

110103
# [START audio_stream]
111104
@contextlib.contextmanager
112-
def record_audio(rate, chunk, stoprequest):
105+
def record_audio(rate, chunk):
113106
"""Opens a recording stream in a context manager."""
107+
# Create a thread-safe buffer of audio data
108+
buff = queue.Queue()
109+
114110
audio_interface = pyaudio.PyAudio()
115111
audio_stream = audio_interface.open(
116112
format=pyaudio.paInt16,
117113
# The API currently only supports 1-channel (mono) audio
118114
# https://goo.gl/z757pE
119115
channels=1, rate=rate,
120116
input=True, frames_per_buffer=chunk,
117+
# Run the audio stream asynchronously to fill the buffer object.
118+
# This is necessary so that the input device's buffer doesn't overflow
119+
# while the calling thread makes network requests, etc.
120+
stream_callback=functools.partial(_fill_buffer, buff),
121121
)
122122

123-
# Create a thread-safe buffer of audio data
124-
buff = queue.Queue()
125-
126-
# Spin up a separate thread to buffer audio data from the microphone
127-
# This is necessary so that the input device's buffer doesn't overflow
128-
# while the calling thread makes network requests, etc.
129-
fill_buffer_thread = threading.Thread(
130-
target=_fill_buffer, args=(audio_stream, buff, chunk, stoprequest))
131-
fill_buffer_thread.start()
132-
133123
yield _audio_data_generator(buff)
134124

135-
fill_buffer_thread.join()
125+
audio_stream.stop_stream()
136126
audio_stream.close()
127+
# Signal the _audio_data_generator to finish
128+
buff.put(None)
137129
audio_interface.terminate()
138130
# [END audio_stream]
139131

@@ -172,7 +164,17 @@ def request_stream(data_stream, rate, interim_results=True):
172164
yield cloud_speech.StreamingRecognizeRequest(audio_content=data)
173165

174166

175-
def listen_print_loop(recognize_stream, stoprequest):
167+
def listen_print_loop(recognize_stream):
168+
"""Iterates through server responses and prints them.
169+
170+
The recognize_stream passed is a generator that will block until a response
171+
is provided by the server. When the transcription response comes, print it.
172+
173+
In this case, responses are provided for interim results as well. If the
174+
response is an interim one, print a line feed at the end of it, to allow
175+
the next result to overwrite it, until the response is a final one. For the
176+
final one, print a newline to preserve the finalized transcription.
177+
"""
176178
num_chars_printed = 0
177179
for resp in recognize_stream:
178180
if resp.error.code != code_pb2.OK:
@@ -204,7 +206,6 @@ def listen_print_loop(recognize_stream, stoprequest):
204206
# one of our keywords.
205207
if re.search(r'\b(exit|quit)\b', transcript, re.I):
206208
print('Exiting..')
207-
stoprequest.set()
208209
break
209210

210211
num_chars_printed = 0
@@ -213,17 +214,9 @@ def listen_print_loop(recognize_stream, stoprequest):
213214
def main():
214215
with cloud_speech.beta_create_Speech_stub(
215216
make_channel('speech.googleapis.com', 443)) as service:
216-
217-
# stoprequest is event object which is set in `listen_print_loop`
218-
# to indicate that the trancsription should be stopped.
219-
#
220-
# The `_fill_buffer` thread checks this object, and closes
221-
# the `audio_stream` once it's set.
222-
stoprequest = threading.Event()
223-
224217
# For streaming audio from the microphone, there are three threads.
225218
# First, a thread that collects audio data as it comes in
226-
with record_audio(RATE, CHUNK, stoprequest) as buffered_audio_data:
219+
with record_audio(RATE, CHUNK) as buffered_audio_data:
227220
# Second, a thread that sends requests with that data
228221
requests = request_stream(buffered_audio_data, RATE)
229222
# Third, a thread that listens for transcription responses
@@ -235,7 +228,7 @@ def main():
235228

236229
# Now, put the transcription responses to use.
237230
try:
238-
listen_print_loop(recognize_stream, stoprequest)
231+
listen_print_loop(recognize_stream)
239232

240233
recognize_stream.cancel()
241234
except face.CancellationError:

speech/grpc/transcribe_streaming_test.py

Lines changed: 19 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
# limitations under the License.
1313

1414
import re
15+
import threading
1516
import time
1617

1718
import transcribe_streaming
@@ -24,34 +25,34 @@ def __init__(self, audio_filename):
2425
def __call__(self, *args):
2526
return self
2627

27-
def open(self, *args, **kwargs):
28-
self.audio_file = open(self.audio_filename, 'rb')
28+
def open(self, stream_callback, *args, **kwargs):
29+
self.closed = threading.Event()
30+
self.stream_thread = threading.Thread(
31+
target=self.stream_audio, args=(
32+
self.audio_filename, stream_callback, self.closed))
33+
self.stream_thread.start()
2934
return self
3035

3136
def close(self):
32-
self.audio_file.close()
37+
self.closed.set()
3338

3439
def stop_stream(self):
3540
pass
3641

3742
def terminate(self):
3843
pass
3944

40-
def read(self, num_frames):
41-
if self.audio_file.closed:
42-
raise IOError()
43-
# Approximate realtime by sleeping for the appropriate time for the
44-
# requested number of frames
45-
time.sleep(num_frames / float(transcribe_streaming.RATE))
46-
# audio is 16-bit samples, whereas python byte is 8-bit
47-
num_bytes = 2 * num_frames
48-
try:
49-
chunk = self.audio_file.read(num_bytes)
50-
except ValueError:
51-
raise IOError()
52-
if not chunk:
53-
raise IOError()
54-
return chunk
45+
@staticmethod
46+
def stream_audio(audio_filename, callback, closed, num_frames=512):
47+
with open(audio_filename, 'rb') as audio_file:
48+
while not closed.is_set():
49+
# Approximate realtime by sleeping for the appropriate time for
50+
# the requested number of frames
51+
time.sleep(num_frames / float(transcribe_streaming.RATE))
52+
# audio is 16-bit samples, whereas python byte is 8-bit
53+
num_bytes = 2 * num_frames
54+
chunk = audio_file.read(num_bytes) or b'\0' * num_bytes
55+
callback(chunk, None, None, None)
5556

5657

5758
def test_main(resource, monkeypatch, capsys):

0 commit comments

Comments
 (0)