Reduce amount of audio lost between utterances

Jerjou Cheng · Jerjou Cheng · commit d35fa561547b · 2016-12-22T09:22:01.000-08:00
diff --git a/speech/grpc/transcribe_streaming_minute.py b/speech/grpc/transcribe_streaming_minute.py
@@ -72,8 +72,7 @@ def _audio_data_generator(buff):
         A chunk of data that is the aggregate of all chunks of data in `buff`.
         The function will block until at least one data chunk is available.
     """
-    stop = False
-    while not stop:
+    while True:
         # Use a blocking get() to ensure there's at least one chunk of data.
         data = [buff.get()]
 
@@ -84,11 +83,13 @@ def _audio_data_generator(buff):
             except queue.Empty:
                 break
 
-        # `None` in the buffer signals that the audio stream is closed. Yield
-        # the final bit of the buffer and exit the loop.
+        # `None` in the buffer signals that we should stop generating. Put the
+        # data back into the buffer for the next generator.
         if None in data:
-            stop = True
             data.remove(None)
+            if data:
+                buff.put(b''.join(data))
+            break
 
         yield b''.join(data)
 
@@ -163,7 +164,7 @@ def request_stream(data_stream, rate, interim_results=True):
         yield cloud_speech_pb2.StreamingRecognizeRequest(audio_content=data)
 
 
-def listen_print_loop(recognize_stream, wrap_it_up_secs, max_recog_secs=60):
+def listen_print_loop(recognize_stream, wrap_it_up_secs, buff, max_recog_secs=60):
     """Iterates through server responses and prints them.
 
     The recognize_stream passed is a generator that will block until a response
@@ -186,6 +187,7 @@ def listen_print_loop(recognize_stream, wrap_it_up_secs, max_recog_secs=60):
             if resp.endpointer_type is resp.END_OF_SPEECH and (
                     time.time() > time_to_switch):
                 graceful_exit = True
+                buff.put(None)
             continue
 
         # Display the top transcription
@@ -238,7 +240,7 @@ def main():
         # Now, put the transcription responses to use.
         try:
             while True:
-                listen_print_loop(recognize_stream, WRAP_IT_UP_SECS)
+                listen_print_loop(recognize_stream, WRAP_IT_UP_SECS, buff)
 
                 # Discard this stream and create a new one.
                 # Note: calling .cancel() doesn't immediately raise an RpcError
diff --git a/speech/grpc/transcribe_streaming_utterances.py b/speech/grpc/transcribe_streaming_utterances.py
@@ -66,8 +66,7 @@ def _audio_data_generator(buff):
         A chunk of data that is the aggregate of all chunks of data in `buff`.
         The function will block until at least one data chunk is available.
     """
-    stop = False
-    while not stop:
+    while True:
         # Use a blocking get() to ensure there's at least one chunk of data.
         data = [buff.get()]
 
@@ -78,11 +77,13 @@ def _audio_data_generator(buff):
             except queue.Empty:
                 break
 
-        # `None` in the buffer signals that the audio stream is closed. Yield
-        # the final bit of the buffer and exit the loop.
+        # `None` in the buffer signals that we should stop generating. Put the
+        # data back into the buffer for the next generator.
         if None in data:
-            stop = True
             data.remove(None)
+            if data:
+                buff.put(b''.join(data))
+            break
 
         yield b''.join(data)
 
@@ -158,7 +159,7 @@ def request_stream(data_stream, rate, interim_results=True):
         yield cloud_speech_pb2.StreamingRecognizeRequest(audio_content=data)
 
 
-def listen_print_loop(recognize_stream):
+def listen_print_loop(recognize_stream, buff):
     """Iterates through server responses and prints them.
 
     The recognize_stream passed is a generator that will block until a response
@@ -175,6 +176,10 @@ def listen_print_loop(recognize_stream):
             raise RuntimeError('Server error: ' + resp.error.message)
 
         if not resp.results:
+            if resp.endpointer_type is resp.END_OF_UTTERANCE:
+                # Signal the audio generator to stop generating, and leave the
+                # buffer to fill.
+                buff.put(None)
             continue
 
         # Display the top transcription
@@ -226,7 +231,7 @@ def main():
         # Now, put the transcription responses to use.
         try:
             while True:
-                listen_print_loop(recognize_stream)
+                listen_print_loop(recognize_stream, buff)
 
                 # Discard this stream and create a new one.
                 # Note: calling .cancel() doesn't immediately raise an RpcError