| # Copyright 2019 Google LLC |
| # |
| # Licensed under the Apache License, Version 2.0 (the "License"); |
| # you may not use this file except in compliance with the License. |
| # You may obtain a copy of the License at |
| # |
| # https://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, software |
| # distributed under the License is distributed on an "AS IS" BASIS, |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| # See the License for the specific language governing permissions and |
| # limitations under the License. |
| |
| """Interface to asynchronously capture continuous audio from PyAudio. |
| |
| |
| This module requires pyaudio. See here for installation instructions: |
| http://people.csail.mit.edu/hubert/pyaudio/ |
| |
| This module provides one class, AudioRecorder, which buffers chunks of audio |
| from PyAudio. |
| """ |
| |
| from __future__ import absolute_import |
| from __future__ import division |
| from __future__ import print_function |
| |
| import logging |
| |
| import math |
| import time |
| |
| import numpy as np |
| import pyaudio |
| import queue |
| |
| logger = logging.getLogger(__name__) |
| |
| |
| class TimeoutError(Exception): |
| """A timeout while waiting for pyaudio to buffer samples.""" |
| pass |
| |
| |
| class AudioRecorder(object): |
| """Asynchronously record and buffer audio using pyaudio. |
| |
| This class wraps the pyaudio interface. It contains a queue.Queue object to |
| hold chunks of raw audio, and a callback function _enqueue_audio() which |
| places raw audio into this queue. This allows the pyaudio.Stream object to |
| record asynchronously at low latency. |
| |
| The class acts as a context manager. When entering the context it creates a |
| pyaudio.Stream object and starts recording; it stops recording on exit. The |
| Stream saves all of its audio to the Queue as two-tuples of |
| (timestamp, raw_audio). The raw_audio is available from the queue as a numpy |
| array using the get_audio() function. |
| |
| This class uses the term "frame" in the same sense that PortAudio does, so |
| "frame" means something different here than elsewhere in the daredevil stack. |
| A frame in PortAudio is one audio sample across all channels, so one frame of |
| 16-bit stereo audio is four bytes of data as two 16-bit integers. |
| """ |
| pyaudio_format = pyaudio.paInt16 |
| numpy_format = np.int16 |
| num_channels = 1 |
| |
| # How many frames of audio PyAudio will fetch at once. |
| # Higher numbers will increase the latancy. |
| frames_per_chunk = 2**9 |
| |
| # Limit queue to this number of audio chunks. |
| max_queue_chunks = 1200 |
| |
| # Timeout if we can't get a chunk from the queue for timeout_factor times the |
| # chunk duration. |
| timeout_factor = 4 |
| |
| def __init__(self, raw_audio_sample_rate_hz=48000, |
| downsample_factor=3, |
| device_index=None): |
| self._downsample_factor = downsample_factor |
| self._raw_audio_sample_rate_hz = raw_audio_sample_rate_hz |
| self.audio_sample_rate_hz = self._raw_audio_sample_rate_hz // self._downsample_factor |
| self._raw_audio_queue = queue.Queue(self.max_queue_chunks) |
| self._audio = pyaudio.PyAudio() |
| self._print_input_devices() |
| self._device_index = device_index |
| |
| def __enter__(self): |
| if self._device_index is None: |
| self._device_index = self._audio.get_default_input_device_info()["index"] |
| kwargs = { |
| "input_device_index": self._device_index |
| } |
| device_info = self._audio.get_device_info_by_host_api_device_index( |
| 0, self._device_index) |
| if device_info.get("maxInputChannels") <= 0: |
| raise ValueError("Audio device has insufficient input channels.") |
| print("Using audio device '%s' for index %d" % ( |
| device_info["name"], device_info["index"])) |
| self._stream = self._audio.open( |
| format=self.pyaudio_format, |
| channels=self.num_channels, |
| rate=self._raw_audio_sample_rate_hz, |
| input=True, |
| output=False, |
| frames_per_buffer=self.frames_per_chunk, |
| start=True, |
| stream_callback=self._enqueue_raw_audio, |
| **kwargs) |
| logger.info("Started audio stream.") |
| return self |
| |
| def __exit__(self, exception_type, exception_value, traceback): |
| self._stream.stop_stream() |
| self._stream.close() |
| logger.info("Stopped and closed audio stream.") |
| |
| def __del__(self): |
| self._audio.terminate() |
| logger.info("Terminated PyAudio/PortAudio.") |
| |
| @property |
| def is_active(self): |
| return self._stream.is_active() |
| |
| @property |
| def bytes_per_sample(self): |
| return pyaudio.get_sample_size(self.pyaudio_format) |
| |
| @property |
| def _chunk_duration_seconds(self): |
| return self.frames_per_chunk / self._raw_audio_sample_rate_hz |
| |
| def _print_input_devices(self): |
| info = self._audio.get_host_api_info_by_index(0) |
| print("\nInput microphone devices:") |
| for i in range(0, info.get("deviceCount")): |
| device_info = self._audio.get_device_info_by_host_api_device_index(0, i) |
| if device_info.get("maxInputChannels") <= 0: continue |
| print(" ID: ", i, " - ", device_info.get("name")) |
| |
| def _enqueue_raw_audio(self, in_data, *_): # unused args to match expected |
| try: |
| self._raw_audio_queue.put((in_data, time.time()), block=False) |
| return None, pyaudio.paContinue |
| except queue.Full: |
| error_message = "Raw audio buffer full." |
| logger.critical(error_message) |
| raise TimeoutError(error_message) |
| |
| def _get_chunk(self, timeout=None): |
| raw_data, timestamp = self._raw_audio_queue.get(timeout=timeout) |
| array_data = np.fromstring(raw_data, self.numpy_format).reshape( |
| -1, self.num_channels) |
| return array_data, timestamp |
| |
| def get_audio_device_info(self): |
| if self._device_index is None: |
| return self._audio.get_default_input_device_info() |
| else: |
| return self._audio.get_device_info_by_index(self._device_index) |
| |
| def sample_duration_seconds(self, num_samples): |
| return num_samples / self.audio_sample_rate_hz / self.num_channels |
| |
| def clear_queue(self): |
| logger.debug("Purging %d chunks from queue.", self._raw_audio_queue.qsize()) |
| while not self._raw_audio_queue.empty(): |
| self._raw_audio_queue.get() |
| |
| def get_audio(self, num_audio_frames): |
| """Grab at least num_audio_frames frames of audio. |
| |
| Record at least num_audio_frames of audio and transform it into a |
| numpy array. The term "frame" is in the sense used by PortAudio; see the |
| note in the class docstring for details. |
| |
| Audio returned will be the earliest audio in the queue; it could be from |
| before this function was called. |
| |
| Args: |
| num_audio_frames: minimum number of samples of audio to grab. |
| |
| Returns: |
| A tuple of (audio, first_timestamp, last_timestamp). |
| """ |
| num_audio_chunks = int(math.ceil(num_audio_frames * |
| self._downsample_factor / self.frames_per_chunk)) |
| logger.debug("Capturing %d chunks to get at least %d frames.", |
| num_audio_chunks, num_audio_frames) |
| if num_audio_chunks < 1: |
| num_audio_chunks = 1 |
| try: |
| timeout = self.timeout_factor * self._chunk_duration_seconds |
| chunks, timestamps = zip( |
| *[self._get_chunk(timeout=timeout) for _ in range(num_audio_chunks)]) |
| except queue.Empty: |
| error_message = "Audio capture timed out after %.1f seconds." % timeout |
| logger.critical(error_message) |
| raise TimeoutError(error_message) |
| |
| assert len(chunks) == num_audio_chunks |
| logger.debug("Got %d chunks. Chunk 0 has shape %s and dtype %s.", |
| len(chunks), chunks[0].shape, chunks[0].dtype) |
| if self._raw_audio_queue.qsize() > (0.8 * self.max_queue_chunks): |
| logger.warning("%d chunks remain in the queue.", |
| self._raw_audio_queue.qsize()) |
| else: |
| logger.debug("%d chunks remain in the queue.", |
| self._raw_audio_queue.qsize()) |
| |
| audio = np.concatenate(chunks) |
| if self._downsample_factor != 1: |
| audio = audio[::self._downsample_factor] |
| logging.debug("Audio array has shape %s and dtype %s.", audio.shape, |
| audio.dtype) |
| return audio * 0.5, timestamps[0], timestamps[-1] |