ESPHome  2025.3.3
audio_decoder.cpp
Go to the documentation of this file.
1 #include "audio_decoder.h"
2 
3 #ifdef USE_ESP32
4 
5 #include "esphome/core/hal.h"
6 
7 namespace esphome {
8 namespace audio {
9 
10 static const uint32_t DECODING_TIMEOUT_MS = 50; // The decode function will yield after this duration
11 static const uint32_t READ_WRITE_TIMEOUT_MS = 20; // Timeout for transferring audio data
12 
13 static const uint32_t MAX_POTENTIALLY_FAILED_COUNT = 10;
14 
15 AudioDecoder::AudioDecoder(size_t input_buffer_size, size_t output_buffer_size) {
17  this->output_transfer_buffer_ = AudioSinkTransferBuffer::create(output_buffer_size);
18 }
19 
21 #ifdef USE_AUDIO_MP3_SUPPORT
22  if (this->audio_file_type_ == AudioFileType::MP3) {
23  esp_audio_libs::helix_decoder::MP3FreeDecoder(this->mp3_decoder_);
24  }
25 #endif
26 }
27 
28 esp_err_t AudioDecoder::add_source(std::weak_ptr<RingBuffer> &input_ring_buffer) {
29  if (this->input_transfer_buffer_ != nullptr) {
30  this->input_transfer_buffer_->set_source(input_ring_buffer);
31  return ESP_OK;
32  }
33  return ESP_ERR_NO_MEM;
34 }
35 
36 esp_err_t AudioDecoder::add_sink(std::weak_ptr<RingBuffer> &output_ring_buffer) {
37  if (this->output_transfer_buffer_ != nullptr) {
38  this->output_transfer_buffer_->set_sink(output_ring_buffer);
39  return ESP_OK;
40  }
41  return ESP_ERR_NO_MEM;
42 }
43 
44 #ifdef USE_SPEAKER
46  if (this->output_transfer_buffer_ != nullptr) {
47  this->output_transfer_buffer_->set_sink(speaker);
48  return ESP_OK;
49  }
50  return ESP_ERR_NO_MEM;
51 }
52 #endif
53 
54 esp_err_t AudioDecoder::start(AudioFileType audio_file_type) {
55  if ((this->input_transfer_buffer_ == nullptr) || (this->output_transfer_buffer_ == nullptr)) {
56  return ESP_ERR_NO_MEM;
57  }
58 
59  this->audio_file_type_ = audio_file_type;
60 
61  this->potentially_failed_count_ = 0;
62  this->end_of_file_ = false;
63 
64  switch (this->audio_file_type_) {
65 #ifdef USE_AUDIO_FLAC_SUPPORT
67  this->flac_decoder_ = make_unique<esp_audio_libs::flac::FLACDecoder>();
68  this->free_buffer_required_ =
69  this->output_transfer_buffer_->capacity(); // Adjusted and reallocated after reading the header
70  break;
71 #endif
72 #ifdef USE_AUDIO_MP3_SUPPORT
73  case AudioFileType::MP3:
74  this->mp3_decoder_ = esp_audio_libs::helix_decoder::MP3InitDecoder();
75 
76  // MP3 always has 1152 samples per chunk
77  this->free_buffer_required_ = 1152 * sizeof(int16_t) * 2; // samples * size per sample * channels
78 
79  // Always reallocate the output transfer buffer to the smallest necessary size
80  this->output_transfer_buffer_->reallocate(this->free_buffer_required_);
81  break;
82 #endif
83  case AudioFileType::WAV:
84  this->wav_decoder_ = make_unique<esp_audio_libs::wav_decoder::WAVDecoder>();
85  this->wav_decoder_->reset();
86 
87  // Processing WAVs doesn't actually require a specific amount of buffer size, as it is already in PCM format.
88  // Thus, we don't reallocate to a minimum size.
89  this->free_buffer_required_ = 1024;
90  if (this->output_transfer_buffer_->capacity() < this->free_buffer_required_) {
91  this->output_transfer_buffer_->reallocate(this->free_buffer_required_);
92  }
93  break;
95  default:
96  return ESP_ERR_NOT_SUPPORTED;
97  break;
98  }
99 
100  return ESP_OK;
101 }
102 
104  if (stop_gracefully) {
105  if (this->output_transfer_buffer_->available() == 0) {
106  if (this->end_of_file_) {
107  // The file decoder indicates it reached the end of file
109  }
110 
111  if (!this->input_transfer_buffer_->has_buffered_data()) {
112  // If all the internal buffers are empty, the decoding is done
114  }
115  }
116  }
117 
118  if (this->potentially_failed_count_ > MAX_POTENTIALLY_FAILED_COUNT) {
119  if (stop_gracefully) {
120  // No more new data is going to come in, so decoding is done
122  }
124  }
125 
127 
128  uint32_t decoding_start = millis();
129 
130  bool first_loop_iteration = true;
131 
132  size_t bytes_processed = 0;
133  size_t bytes_available_before_processing = 0;
134 
135  while (state == FileDecoderState::MORE_TO_PROCESS) {
136  // Transfer decoded out
137  if (!this->pause_output_) {
138  // Never shift the data in the output transfer buffer to avoid unnecessary, slow data moves
139  size_t bytes_written =
140  this->output_transfer_buffer_->transfer_data_to_sink(pdMS_TO_TICKS(READ_WRITE_TIMEOUT_MS), false);
141 
142  if (this->audio_stream_info_.has_value()) {
143  this->accumulated_frames_written_ += this->audio_stream_info_.value().bytes_to_frames(bytes_written);
144  this->playback_ms_ +=
145  this->audio_stream_info_.value().frames_to_milliseconds_with_remainder(&this->accumulated_frames_written_);
146  }
147  } else {
148  // If paused, block to avoid wasting CPU resources
149  delay(READ_WRITE_TIMEOUT_MS);
150  }
151 
152  // Verify there is enough space to store more decoded audio and that the function hasn't been running too long
153  if ((this->output_transfer_buffer_->free() < this->free_buffer_required_) ||
154  (millis() - decoding_start > DECODING_TIMEOUT_MS)) {
156  }
157 
158  // Decode more audio
159 
160  // Only shift data on the first loop iteration to avoid unnecessary, slow moves
161  size_t bytes_read = this->input_transfer_buffer_->transfer_data_from_source(pdMS_TO_TICKS(READ_WRITE_TIMEOUT_MS),
162  first_loop_iteration);
163 
164  if (!first_loop_iteration && (this->input_transfer_buffer_->available() < bytes_processed)) {
165  // Less data is available than what was processed in last iteration, so don't attempt to decode.
166  // This attempts to avoid the decoder from consistently trying to decode an incomplete frame. The transfer buffer
167  // will shift the remaining data to the start and copy more from the source the next time the decode function is
168  // called
169  break;
170  }
171 
172  bytes_available_before_processing = this->input_transfer_buffer_->available();
173 
174  if ((this->potentially_failed_count_ > 10) && (bytes_read == 0)) {
175  // Failed to decode in last attempt and there is no new data
176 
177  if ((this->input_transfer_buffer_->free() == 0) && first_loop_iteration) {
178  // The input buffer is full. Since it previously failed on the exact same data, we can never recover
179  state = FileDecoderState::FAILED;
180  } else {
181  // Attempt to get more data next time
182  state = FileDecoderState::IDLE;
183  }
184  } else if (this->input_transfer_buffer_->available() == 0) {
185  // No data to decode, attempt to get more data next time
186  state = FileDecoderState::IDLE;
187  } else {
188  switch (this->audio_file_type_) {
189 #ifdef USE_AUDIO_FLAC_SUPPORT
190  case AudioFileType::FLAC:
191  state = this->decode_flac_();
192  break;
193 #endif
194 #ifdef USE_AUDIO_MP3_SUPPORT
195  case AudioFileType::MP3:
196  state = this->decode_mp3_();
197  break;
198 #endif
199  case AudioFileType::WAV:
200  state = this->decode_wav_();
201  break;
202  case AudioFileType::NONE:
203  default:
204  state = FileDecoderState::IDLE;
205  break;
206  }
207  }
208 
209  first_loop_iteration = false;
210  bytes_processed = bytes_available_before_processing - this->input_transfer_buffer_->available();
211 
214  } else if (state == FileDecoderState::END_OF_FILE) {
215  this->end_of_file_ = true;
216  } else if (state == FileDecoderState::FAILED) {
218  } else if (state == FileDecoderState::MORE_TO_PROCESS) {
219  this->potentially_failed_count_ = 0;
220  }
221  }
223 }
224 
225 #ifdef USE_AUDIO_FLAC_SUPPORT
227  if (!this->audio_stream_info_.has_value()) {
228  // Header hasn't been read
229  auto result = this->flac_decoder_->read_header(this->input_transfer_buffer_->get_buffer_start(),
230  this->input_transfer_buffer_->available());
231 
232  if (result == esp_audio_libs::flac::FLAC_DECODER_HEADER_OUT_OF_DATA) {
234  }
235 
236  if (result != esp_audio_libs::flac::FLAC_DECODER_SUCCESS) {
237  // Couldn't read FLAC header
239  }
240 
241  size_t bytes_consumed = this->flac_decoder_->get_bytes_index();
242  this->input_transfer_buffer_->decrease_buffer_length(bytes_consumed);
243 
244  // Reallocate the output transfer buffer to the smallest necessary size
245  this->free_buffer_required_ = flac_decoder_->get_output_buffer_size_bytes();
246  if (!this->output_transfer_buffer_->reallocate(this->free_buffer_required_)) {
247  // Couldn't reallocate output buffer
249  }
250 
251  this->audio_stream_info_ =
252  audio::AudioStreamInfo(this->flac_decoder_->get_sample_depth(), this->flac_decoder_->get_num_channels(),
253  this->flac_decoder_->get_sample_rate());
254 
256  }
257 
258  uint32_t output_samples = 0;
259  auto result = this->flac_decoder_->decode_frame(
260  this->input_transfer_buffer_->get_buffer_start(), this->input_transfer_buffer_->available(),
261  reinterpret_cast<int16_t *>(this->output_transfer_buffer_->get_buffer_end()), &output_samples);
262 
263  if (result == esp_audio_libs::flac::FLAC_DECODER_ERROR_OUT_OF_DATA) {
264  // Not an issue, just needs more data that we'll get next time.
266  }
267 
268  size_t bytes_consumed = this->flac_decoder_->get_bytes_index();
269  this->input_transfer_buffer_->decrease_buffer_length(bytes_consumed);
270 
271  if (result > esp_audio_libs::flac::FLAC_DECODER_ERROR_OUT_OF_DATA) {
272  // Corrupted frame, don't retry with current buffer content, wait for new sync
274  }
275 
276  // We have successfully decoded some input data and have new output data
277  this->output_transfer_buffer_->increase_buffer_length(
278  this->audio_stream_info_.value().samples_to_bytes(output_samples));
279 
280  if (result == esp_audio_libs::flac::FLAC_DECODER_NO_MORE_FRAMES) {
282  }
283 
285 }
286 #endif
287 
288 #ifdef USE_AUDIO_MP3_SUPPORT
290  // Look for the next sync word
291  int buffer_length = (int) this->input_transfer_buffer_->available();
292  int32_t offset =
293  esp_audio_libs::helix_decoder::MP3FindSyncWord(this->input_transfer_buffer_->get_buffer_start(), buffer_length);
294 
295  if (offset < 0) {
296  // New data may have the sync word
297  this->input_transfer_buffer_->decrease_buffer_length(buffer_length);
299  }
300 
301  // Advance read pointer to match the offset for the syncword
302  this->input_transfer_buffer_->decrease_buffer_length(offset);
303  uint8_t *buffer_start = this->input_transfer_buffer_->get_buffer_start();
304 
305  buffer_length = (int) this->input_transfer_buffer_->available();
306  int err = esp_audio_libs::helix_decoder::MP3Decode(this->mp3_decoder_, &buffer_start, &buffer_length,
307  (int16_t *) this->output_transfer_buffer_->get_buffer_end(), 0);
308 
309  size_t consumed = this->input_transfer_buffer_->available() - buffer_length;
310  this->input_transfer_buffer_->decrease_buffer_length(consumed);
311 
312  if (err) {
313  switch (err) {
314  case esp_audio_libs::helix_decoder::ERR_MP3_OUT_OF_MEMORY:
315  // Intentional fallthrough
316  case esp_audio_libs::helix_decoder::ERR_MP3_NULL_POINTER:
318  break;
319  default:
320  // Most errors are recoverable by moving on to the next frame, so mark as potentailly failed
322  break;
323  }
324  } else {
325  esp_audio_libs::helix_decoder::MP3FrameInfo mp3_frame_info;
326  esp_audio_libs::helix_decoder::MP3GetLastFrameInfo(this->mp3_decoder_, &mp3_frame_info);
327  if (mp3_frame_info.outputSamps > 0) {
328  int bytes_per_sample = (mp3_frame_info.bitsPerSample / 8);
329  this->output_transfer_buffer_->increase_buffer_length(mp3_frame_info.outputSamps * bytes_per_sample);
330 
331  if (!this->audio_stream_info_.has_value()) {
332  this->audio_stream_info_ =
333  audio::AudioStreamInfo(mp3_frame_info.bitsPerSample, mp3_frame_info.nChans, mp3_frame_info.samprate);
334  }
335  }
336  }
337 
339 }
340 #endif
341 
343  if (!this->audio_stream_info_.has_value()) {
344  // Header hasn't been processed
345 
346  esp_audio_libs::wav_decoder::WAVDecoderResult result = this->wav_decoder_->decode_header(
347  this->input_transfer_buffer_->get_buffer_start(), this->input_transfer_buffer_->available());
348 
349  if (result == esp_audio_libs::wav_decoder::WAV_DECODER_SUCCESS_IN_DATA) {
350  this->input_transfer_buffer_->decrease_buffer_length(this->wav_decoder_->bytes_processed());
351 
353  this->wav_decoder_->bits_per_sample(), this->wav_decoder_->num_channels(), this->wav_decoder_->sample_rate());
354 
355  this->wav_bytes_left_ = this->wav_decoder_->chunk_bytes_left();
356  this->wav_has_known_end_ = (this->wav_bytes_left_ > 0);
358  } else if (result == esp_audio_libs::wav_decoder::WAV_DECODER_WARNING_INCOMPLETE_DATA) {
359  // Available data didn't have the full header
361  } else {
363  }
364  } else {
365  if (!this->wav_has_known_end_ || (this->wav_bytes_left_ > 0)) {
366  size_t bytes_to_copy = this->input_transfer_buffer_->available();
367 
368  if (this->wav_has_known_end_) {
369  bytes_to_copy = std::min(bytes_to_copy, this->wav_bytes_left_);
370  }
371 
372  bytes_to_copy = std::min(bytes_to_copy, this->output_transfer_buffer_->free());
373 
374  if (bytes_to_copy > 0) {
375  std::memcpy(this->output_transfer_buffer_->get_buffer_end(), this->input_transfer_buffer_->get_buffer_start(),
376  bytes_to_copy);
377  this->input_transfer_buffer_->decrease_buffer_length(bytes_to_copy);
378  this->output_transfer_buffer_->increase_buffer_length(bytes_to_copy);
379  if (this->wav_has_known_end_) {
380  this->wav_bytes_left_ -= bytes_to_copy;
381  }
382  }
383  return FileDecoderState::IDLE;
384  }
385  }
386 
388 }
389 
390 } // namespace audio
391 } // namespace esphome
392 
393 #endif
bool state
Definition: fan.h:34
esp_err_t add_source(std::weak_ptr< RingBuffer > &input_ring_buffer)
Adds a source ring buffer for raw file data.
static std::unique_ptr< AudioSinkTransferBuffer > create(size_t buffer_size)
Creates a new sink transfer buffer.
std::unique_ptr< esp_audio_libs::flac::FLACDecoder > flac_decoder_
esp_err_t start(AudioFileType audio_file_type)
Sets up decoding the file.
uint32_t IRAM_ATTR HOT millis()
Definition: core.cpp:25
esp_audio_libs::helix_decoder::HMP3Decoder mp3_decoder_
FileDecoderState decode_wav_()
esp_err_t add_sink(std::weak_ptr< RingBuffer > &output_ring_buffer)
Adds a sink ring buffer for decoded audio.
~AudioDecoder()
Deallocates the MP3 decoder (the flac and wav decoders are deallocated automatically) ...
std::unique_ptr< AudioSinkTransferBuffer > output_transfer_buffer_
AudioDecoder(size_t input_buffer_size, size_t output_buffer_size)
Allocates the input and output transfer buffers.
static std::unique_ptr< AudioSourceTransferBuffer > create(size_t buffer_size)
Creates a new source transfer buffer.
std::unique_ptr< AudioSourceTransferBuffer > input_transfer_buffer_
std::unique_ptr< esp_audio_libs::wav_decoder::WAVDecoder > wav_decoder_
FileDecoderState decode_flac_()
Implementation of SPI Controller mode.
Definition: a01nyub.cpp:7
FileDecoderState decode_mp3_()
void IRAM_ATTR HOT delay(uint32_t ms)
Definition: core.cpp:26
AudioDecoderState decode(bool stop_gracefully)
Decodes audio from the ring buffer source and writes to the sink.
optional< AudioStreamInfo > audio_stream_info_