ESPHome  2025.2.0
audio.h
Go to the documentation of this file.
1 #pragma once
2 
3 #include "esphome/core/defines.h"
4 
5 #include <cstddef>
6 #include <cstdint>
7 
8 namespace esphome {
9 namespace audio {
10 
12  /* Class to respresent important parameters of the audio stream that also provides helper function to convert between
13  * various audio related units.
14  *
15  * - An audio sample represents a unit of audio for one channel.
16  * - A frame represents a unit of audio with a sample for every channel.
17  *
18  * In gneneral, converting between bytes, samples, and frames shouldn't result in rounding errors so long as frames
19  * are used as the main unit when transferring audio data. Durations may result in rounding for certain sample rates;
20  * e.g., 44.1 KHz. The ``frames_to_milliseconds_with_remainder`` function should be used for accuracy, as it takes
21  * into account the remainder rather than just ignoring any rounding.
22  */
23  public:
25  : AudioStreamInfo(16, 1, 16000){}; // Default values represent ESPHome's audio components historical values
26  AudioStreamInfo(uint8_t bits_per_sample, uint8_t channels, uint32_t sample_rate);
27 
28  uint8_t get_bits_per_sample() const { return this->bits_per_sample_; }
29  uint8_t get_channels() const { return this->channels_; }
30  uint32_t get_sample_rate() const { return this->sample_rate_; }
31 
36  uint32_t bytes_to_ms(size_t bytes) const {
37  return bytes * 1000 / (this->sample_rate_ * this->bytes_per_sample_ * this->channels_);
38  }
39 
43  uint32_t bytes_to_frames(size_t bytes) const { return (bytes / (this->bytes_per_sample_ * this->channels_)); }
44 
48  uint32_t bytes_to_samples(size_t bytes) const { return (bytes / this->bytes_per_sample_); }
49 
53  size_t frames_to_bytes(uint32_t frames) const { return frames * this->bytes_per_sample_ * this->channels_; }
54 
58  size_t samples_to_bytes(uint32_t samples) const { return samples * this->bytes_per_sample_; }
59 
63  uint32_t ms_to_frames(uint32_t ms) const { return (ms * this->sample_rate_) / 1000; }
64 
68  uint32_t ms_to_samples(uint32_t ms) const { return (ms * this->channels_ * this->sample_rate_) / 1000; }
69 
73  size_t ms_to_bytes(uint32_t ms) const {
74  return (ms * this->bytes_per_sample_ * this->channels_ * this->sample_rate_) / 1000;
75  }
76 
81  uint32_t frames_to_microseconds(uint32_t frames) const;
82 
88  uint32_t frames_to_milliseconds_with_remainder(uint32_t *frames) const;
89 
90  // Class comparison operators
91  bool operator==(const AudioStreamInfo &rhs) const;
92  bool operator!=(const AudioStreamInfo &rhs) const { return !operator==(rhs); }
93 
94  protected:
96  uint8_t channels_;
97  uint32_t sample_rate_;
98 
99  // The greatest common divisor between 1000 ms = 1 second and the sample rate. Used to avoid accumulating error when
100  // converting from frames to duration. Computed at construction.
102 
103  // Conversion factor derived from the number of bits per sample. Assumes audio data is aligned to the byte. Computed
104  // at construction.
106 };
107 
108 enum class AudioFileType : uint8_t {
109  NONE = 0,
110 #ifdef USE_AUDIO_FLAC_SUPPORT
111  FLAC,
112 #endif
113 #ifdef USE_AUDIO_MP3_SUPPORT
114  MP3,
115 #endif
116  WAV,
117 };
118 
119 struct AudioFile {
120  const uint8_t *data;
121  size_t length;
123 };
124 
128 const char *audio_file_type_to_string(AudioFileType file_type);
129 
135 void scale_audio_samples(const int16_t *audio_samples, int16_t *output_buffer, int16_t scale_factor,
136  size_t samples_to_scale);
137 
138 } // namespace audio
139 } // namespace esphome
bool operator==(const AudioStreamInfo &rhs) const
Definition: audio.cpp:35
uint8_t get_channels() const
Definition: audio.h:29
uint32_t ms_to_frames(uint32_t ms) const
Converts duration to frames.
Definition: audio.h:63
const uint8_t * data
Definition: audio.h:120
uint8_t get_bits_per_sample() const
Definition: audio.h:28
uint32_t bytes_to_ms(size_t bytes) const
Convert bytes to duration in milliseconds.
Definition: audio.h:36
size_t samples_to_bytes(uint32_t samples) const
Converts samples to bytes.
Definition: audio.h:58
uint32_t frames_to_milliseconds_with_remainder(uint32_t *frames) const
Computes the duration, in milliseconds, the given amount of frames represents.
Definition: audio.cpp:26
uint32_t bytes_to_samples(size_t bytes) const
Convert bytes to samples.
Definition: audio.h:48
bool operator!=(const AudioStreamInfo &rhs) const
Definition: audio.h:92
size_t frames_to_bytes(uint32_t frames) const
Converts frames to bytes.
Definition: audio.h:53
uint32_t ms_to_samples(uint32_t ms) const
Converts duration to samples.
Definition: audio.h:68
AudioFileType file_type
Definition: audio.h:122
uint32_t get_sample_rate() const
Definition: audio.h:30
size_t ms_to_bytes(uint32_t ms) const
Converts duration to bytes.
Definition: audio.h:73
uint32_t frames_to_microseconds(uint32_t frames) const
Computes the duration, in microseconds, the given amount of frames represents.
Definition: audio.cpp:22
Implementation of SPI Controller mode.
Definition: a01nyub.cpp:7
std::vector< uint8_t > bytes
Definition: sml_parser.h:12
uint32_t bytes_to_frames(size_t bytes) const
Convert bytes to frames.
Definition: audio.h:43
void scale_audio_samples(const int16_t *audio_samples, int16_t *output_buffer, int16_t scale_factor, size_t samples_to_scale)
Scales Q15 fixed point audio samples.
Definition: audio.cpp:57
const char * audio_file_type_to_string(AudioFileType file_type)
Helper function to convert file type to a const char string.
Definition: audio.cpp:40