ESPHome  2025.3.3
mixer_speaker.cpp
Go to the documentation of this file.
1 #include "mixer_speaker.h"
2 
3 #ifdef USE_ESP32
4 
5 #include "esphome/core/hal.h"
6 #include "esphome/core/helpers.h"
7 #include "esphome/core/log.h"
8 
9 #include <algorithm>
10 #include <cstring>
11 
12 namespace esphome {
13 namespace mixer_speaker {
14 
15 static const UBaseType_t MIXER_TASK_PRIORITY = 10;
16 
17 static const uint32_t TRANSFER_BUFFER_DURATION_MS = 50;
18 static const uint32_t TASK_DELAY_MS = 25;
19 
20 static const size_t TASK_STACK_SIZE = 4096;
21 
22 static const int16_t MAX_AUDIO_SAMPLE_VALUE = INT16_MAX;
23 static const int16_t MIN_AUDIO_SAMPLE_VALUE = INT16_MIN;
24 
25 static const char *const TAG = "speaker_mixer";
26 
27 // Gives the Q15 fixed point scaling factor to reduce by 0 dB, 1dB, ..., 50 dB
28 // dB to PCM scaling factor formula: floating_point_scale_factor = 2^(-db/6.014)
29 // float to Q15 fixed point formula: q15_scale_factor = floating_point_scale_factor * 2^(15)
30 static const std::vector<int16_t> DECIBEL_REDUCTION_TABLE = {
31  32767, 29201, 26022, 23189, 20665, 18415, 16410, 14624, 13032, 11613, 10349, 9222, 8218, 7324, 6527, 5816, 5183,
32  4619, 4116, 3668, 3269, 2913, 2596, 2313, 2061, 1837, 1637, 1459, 1300, 1158, 1032, 920, 820, 731,
33  651, 580, 517, 461, 411, 366, 326, 291, 259, 231, 206, 183, 163, 146, 130, 116, 103};
34 
35 enum MixerEventGroupBits : uint32_t {
36  COMMAND_STOP = (1 << 0), // stops the mixer task
37  STATE_STARTING = (1 << 10),
38  STATE_RUNNING = (1 << 11),
39  STATE_STOPPING = (1 << 12),
40  STATE_STOPPED = (1 << 13),
41  ERR_ESP_NO_MEM = (1 << 19),
42  ALL_BITS = 0x00FFFFFF, // All valid FreeRTOS event group bits
43 };
44 
46  ESP_LOGCONFIG(TAG, "Mixer Source Speaker");
47  ESP_LOGCONFIG(TAG, " Buffer Duration: %" PRIu32 " ms", this->buffer_duration_ms_);
48  if (this->timeout_ms_.has_value()) {
49  ESP_LOGCONFIG(TAG, " Timeout: %" PRIu32 " ms", this->timeout_ms_.value());
50  } else {
51  ESP_LOGCONFIG(TAG, " Timeout: never");
52  }
53 }
54 
57  [this](uint32_t new_playback_ms, uint32_t remainder_us, uint32_t pending_ms, uint32_t write_timestamp) {
58  uint32_t personal_playback_ms = std::min(new_playback_ms, this->pending_playback_ms_);
59  if (personal_playback_ms > 0) {
60  this->pending_playback_ms_ -= personal_playback_ms;
61  this->audio_output_callback_(personal_playback_ms, remainder_us, this->pending_playback_ms_, write_timestamp);
62  }
63  });
64 }
65 
67  switch (this->state_) {
69  esp_err_t err = this->start_();
70  if (err == ESP_OK) {
72  this->stop_gracefully_ = false;
73  this->last_seen_data_ms_ = millis();
74  this->status_clear_error();
75  } else {
76  switch (err) {
77  case ESP_ERR_NO_MEM:
78  this->status_set_error("Failed to start mixer: not enough memory");
79  break;
80  case ESP_ERR_NOT_SUPPORTED:
81  this->status_set_error("Failed to start mixer: unsupported bits per sample");
82  break;
83  case ESP_ERR_INVALID_ARG:
84  this->status_set_error("Failed to start mixer: audio stream isn't compatible with the other audio stream.");
85  break;
86  case ESP_ERR_INVALID_STATE:
87  this->status_set_error("Failed to start mixer: mixer task failed to start");
88  break;
89  default:
90  this->status_set_error("Failed to start mixer");
91  break;
92  }
93 
95  }
96  break;
97  }
99  if (!this->transfer_buffer_->has_buffered_data()) {
100  if ((this->timeout_ms_.has_value() && ((millis() - this->last_seen_data_ms_) > this->timeout_ms_.value())) ||
101  this->stop_gracefully_) {
103  }
104  }
105  break;
107  this->stop_();
108  this->stop_gracefully_ = false;
110  break;
112  break;
113  }
114 }
115 
116 size_t SourceSpeaker::play(const uint8_t *data, size_t length, TickType_t ticks_to_wait) {
117  if (this->is_stopped()) {
118  this->start();
119  }
120  size_t bytes_written = 0;
121  if (this->ring_buffer_.use_count() == 1) {
122  std::shared_ptr<RingBuffer> temp_ring_buffer = this->ring_buffer_.lock();
123  bytes_written = temp_ring_buffer->write_without_replacement(data, length, ticks_to_wait);
124  if (bytes_written > 0) {
125  this->last_seen_data_ms_ = millis();
126  }
127  }
128  return bytes_written;
129 }
130 
132 
134  const size_t ring_buffer_size = this->audio_stream_info_.ms_to_bytes(this->buffer_duration_ms_);
135  if (this->transfer_buffer_.use_count() == 0) {
136  this->transfer_buffer_ =
137  audio::AudioSourceTransferBuffer::create(this->audio_stream_info_.ms_to_bytes(TRANSFER_BUFFER_DURATION_MS));
138 
139  if (this->transfer_buffer_ == nullptr) {
140  return ESP_ERR_NO_MEM;
141  }
142  std::shared_ptr<RingBuffer> temp_ring_buffer;
143 
144  if (!this->ring_buffer_.use_count()) {
145  temp_ring_buffer = RingBuffer::create(ring_buffer_size);
146  this->ring_buffer_ = temp_ring_buffer;
147  }
148 
149  if (!this->ring_buffer_.use_count()) {
150  return ESP_ERR_NO_MEM;
151  } else {
152  this->transfer_buffer_->set_source(temp_ring_buffer);
153  }
154  }
155 
156  return this->parent_->start(this->audio_stream_info_);
157 }
158 
160  if (this->state_ != speaker::STATE_STOPPED) {
162  }
163 }
164 
166  this->transfer_buffer_.reset(); // deallocates the transfer buffer
167 }
168 
169 void SourceSpeaker::finish() { this->stop_gracefully_ = true; }
170 
172  return ((this->transfer_buffer_.use_count() > 0) && this->transfer_buffer_->has_buffered_data());
173 }
174 
175 void SourceSpeaker::set_mute_state(bool mute_state) {
176  this->mute_state_ = mute_state;
177  this->parent_->get_output_speaker()->set_mute_state(mute_state);
178 }
179 
181 
182 void SourceSpeaker::set_volume(float volume) {
183  this->volume_ = volume;
184  this->parent_->get_output_speaker()->set_volume(volume);
185 }
186 
188 
189 size_t SourceSpeaker::process_data_from_source(TickType_t ticks_to_wait) {
190  if (!this->transfer_buffer_.use_count()) {
191  return 0;
192  }
193 
194  // Store current offset, as these samples are already ducked
195  const size_t current_length = this->transfer_buffer_->available();
196 
197  size_t bytes_read = this->transfer_buffer_->transfer_data_from_source(ticks_to_wait);
198 
199  uint32_t samples_to_duck = this->audio_stream_info_.bytes_to_samples(bytes_read);
200  if (samples_to_duck > 0) {
201  int16_t *current_buffer = reinterpret_cast<int16_t *>(this->transfer_buffer_->get_buffer_start() + current_length);
202 
203  duck_samples(current_buffer, samples_to_duck, &this->current_ducking_db_reduction_,
206  }
207 
208  return bytes_read;
209 }
210 
211 void SourceSpeaker::apply_ducking(uint8_t decibel_reduction, uint32_t duration) {
212  if (this->target_ducking_db_reduction_ != decibel_reduction) {
214 
215  this->target_ducking_db_reduction_ = decibel_reduction;
216 
217  uint8_t total_ducking_steps = 0;
219  // The dB reduction level is increasing (which results in quieter audio)
220  total_ducking_steps = this->target_ducking_db_reduction_ - this->current_ducking_db_reduction_ - 1;
221  this->db_change_per_ducking_step_ = 1;
222  } else {
223  // The dB reduction level is decreasing (which results in louder audio)
224  total_ducking_steps = this->current_ducking_db_reduction_ - this->target_ducking_db_reduction_ - 1;
225  this->db_change_per_ducking_step_ = -1;
226  }
227  if ((duration > 0) && (total_ducking_steps > 0)) {
229 
230  this->samples_per_ducking_step_ = this->ducking_transition_samples_remaining_ / total_ducking_steps;
232  this->samples_per_ducking_step_ * total_ducking_steps; // Adjust for integer division rounding
233 
235  } else {
238  }
239  }
240 }
241 
242 void SourceSpeaker::duck_samples(int16_t *input_buffer, uint32_t input_samples_to_duck,
243  int8_t *current_ducking_db_reduction, uint32_t *ducking_transition_samples_remaining,
244  uint32_t samples_per_ducking_step, int8_t db_change_per_ducking_step) {
245  if (*ducking_transition_samples_remaining > 0) {
246  // Ducking level is still transitioning
247 
248  // Takes the ceiling of input_samples_to_duck/samples_per_ducking_step
249  uint32_t ducking_steps_in_batch =
250  input_samples_to_duck / samples_per_ducking_step + (input_samples_to_duck % samples_per_ducking_step != 0);
251 
252  for (uint32_t i = 0; i < ducking_steps_in_batch; ++i) {
253  uint32_t samples_left_in_step = *ducking_transition_samples_remaining % samples_per_ducking_step;
254 
255  if (samples_left_in_step == 0) {
256  samples_left_in_step = samples_per_ducking_step;
257  }
258 
259  uint32_t samples_to_duck = std::min(input_samples_to_duck, samples_left_in_step);
260  samples_to_duck = std::min(samples_to_duck, *ducking_transition_samples_remaining);
261 
262  // Ensure we only point to valid index in the Q15 scaling factor table
263  uint8_t safe_db_reduction_index =
264  clamp<uint8_t>(*current_ducking_db_reduction, 0, DECIBEL_REDUCTION_TABLE.size() - 1);
265  int16_t q15_scale_factor = DECIBEL_REDUCTION_TABLE[safe_db_reduction_index];
266 
267  audio::scale_audio_samples(input_buffer, input_buffer, q15_scale_factor, samples_to_duck);
268 
269  if (samples_left_in_step - samples_to_duck == 0) {
270  // After scaling the current samples, we are ready to transition to the next step
271  *current_ducking_db_reduction += db_change_per_ducking_step;
272  }
273 
274  input_buffer += samples_to_duck;
275  *ducking_transition_samples_remaining -= samples_to_duck;
276  input_samples_to_duck -= samples_to_duck;
277  }
278  }
279 
280  if ((*current_ducking_db_reduction > 0) && (input_samples_to_duck > 0)) {
281  // Audio is ducked, but its not in the middle of a transition step
282 
283  uint8_t safe_db_reduction_index =
284  clamp<uint8_t>(*current_ducking_db_reduction, 0, DECIBEL_REDUCTION_TABLE.size() - 1);
285  int16_t q15_scale_factor = DECIBEL_REDUCTION_TABLE[safe_db_reduction_index];
286 
287  audio::scale_audio_samples(input_buffer, input_buffer, q15_scale_factor, input_samples_to_duck);
288  }
289 }
290 
292  ESP_LOGCONFIG(TAG, "Speaker Mixer:");
293  ESP_LOGCONFIG(TAG, " Number of output channels: %u", this->output_channels_);
294 }
295 
297  this->event_group_ = xEventGroupCreate();
298 
299  if (this->event_group_ == nullptr) {
300  ESP_LOGE(TAG, "Failed to create event group");
301  this->mark_failed();
302  return;
303  }
304 }
305 
307  uint32_t event_group_bits = xEventGroupGetBits(this->event_group_);
308 
309  if (event_group_bits & MixerEventGroupBits::STATE_STARTING) {
310  ESP_LOGD(TAG, "Starting speaker mixer");
311  xEventGroupClearBits(this->event_group_, MixerEventGroupBits::STATE_STARTING);
312  }
313  if (event_group_bits & MixerEventGroupBits::ERR_ESP_NO_MEM) {
314  this->status_set_error("Failed to allocate the mixer's internal buffer");
315  xEventGroupClearBits(this->event_group_, MixerEventGroupBits::ERR_ESP_NO_MEM);
316  }
317  if (event_group_bits & MixerEventGroupBits::STATE_RUNNING) {
318  ESP_LOGD(TAG, "Started speaker mixer");
319  this->status_clear_error();
320  xEventGroupClearBits(this->event_group_, MixerEventGroupBits::STATE_RUNNING);
321  }
322  if (event_group_bits & MixerEventGroupBits::STATE_STOPPING) {
323  ESP_LOGD(TAG, "Stopping speaker mixer");
324  xEventGroupClearBits(this->event_group_, MixerEventGroupBits::STATE_STOPPING);
325  }
326  if (event_group_bits & MixerEventGroupBits::STATE_STOPPED) {
327  if (this->delete_task_() == ESP_OK) {
328  xEventGroupClearBits(this->event_group_, MixerEventGroupBits::ALL_BITS);
329  }
330  }
331 
332  if (this->task_handle_ != nullptr) {
333  bool all_stopped = true;
334 
335  for (auto &speaker : this->source_speakers_) {
336  all_stopped &= speaker->is_stopped();
337  }
338 
339  if (all_stopped) {
340  this->stop();
341  }
342  }
343 }
344 
346  if (!this->audio_stream_info_.has_value()) {
347  if (stream_info.get_bits_per_sample() != 16) {
348  // Audio streams that don't have 16 bits per sample are not supported
349  return ESP_ERR_NOT_SUPPORTED;
350  }
351 
352  this->audio_stream_info_ = audio::AudioStreamInfo(stream_info.get_bits_per_sample(), this->output_channels_,
353  stream_info.get_sample_rate());
354  this->output_speaker_->set_audio_stream_info(this->audio_stream_info_.value());
355  } else {
356  if (!this->queue_mode_ && (stream_info.get_sample_rate() != this->audio_stream_info_.value().get_sample_rate())) {
357  // The two audio streams must have the same sample rate to mix properly if not in queue mode
358  return ESP_ERR_INVALID_ARG;
359  }
360  }
361 
362  return this->start_task_();
363 }
364 
366  if (this->task_stack_buffer_ == nullptr) {
367  if (this->task_stack_in_psram_) {
369  this->task_stack_buffer_ = stack_allocator.allocate(TASK_STACK_SIZE);
370  } else {
372  this->task_stack_buffer_ = stack_allocator.allocate(TASK_STACK_SIZE);
373  }
374  }
375 
376  if (this->task_stack_buffer_ == nullptr) {
377  return ESP_ERR_NO_MEM;
378  }
379 
380  if (this->task_handle_ == nullptr) {
381  this->task_handle_ = xTaskCreateStatic(audio_mixer_task, "mixer", TASK_STACK_SIZE, (void *) this,
382  MIXER_TASK_PRIORITY, this->task_stack_buffer_, &this->task_stack_);
383  }
384 
385  if (this->task_handle_ == nullptr) {
386  return ESP_ERR_INVALID_STATE;
387  }
388 
389  return ESP_OK;
390 }
391 
393  if (!this->task_created_) {
394  this->task_handle_ = nullptr;
395 
396  if (this->task_stack_buffer_ != nullptr) {
397  if (this->task_stack_in_psram_) {
399  stack_allocator.deallocate(this->task_stack_buffer_, TASK_STACK_SIZE);
400  } else {
402  stack_allocator.deallocate(this->task_stack_buffer_, TASK_STACK_SIZE);
403  }
404 
405  this->task_stack_buffer_ = nullptr;
406  }
407 
408  return ESP_OK;
409  }
410 
411  return ESP_ERR_INVALID_STATE;
412 }
413 
414 void MixerSpeaker::stop() { xEventGroupSetBits(this->event_group_, MixerEventGroupBits::COMMAND_STOP); }
415 
416 void MixerSpeaker::copy_frames(const int16_t *input_buffer, audio::AudioStreamInfo input_stream_info,
417  int16_t *output_buffer, audio::AudioStreamInfo output_stream_info,
418  uint32_t frames_to_transfer) {
419  uint8_t input_channels = input_stream_info.get_channels();
420  uint8_t output_channels = output_stream_info.get_channels();
421  const uint8_t max_input_channel_index = input_channels - 1;
422 
423  if (input_channels == output_channels) {
424  size_t bytes_to_copy = input_stream_info.frames_to_bytes(frames_to_transfer);
425  memcpy(output_buffer, input_buffer, bytes_to_copy);
426 
427  return;
428  }
429 
430  for (uint32_t frame_index = 0; frame_index < frames_to_transfer; ++frame_index) {
431  for (uint8_t output_channel_index = 0; output_channel_index < output_channels; ++output_channel_index) {
432  uint8_t input_channel_index = std::min(output_channel_index, max_input_channel_index);
433  output_buffer[output_channels * frame_index + output_channel_index] =
434  input_buffer[input_channels * frame_index + input_channel_index];
435  }
436  }
437 }
438 
439 void MixerSpeaker::mix_audio_samples(const int16_t *primary_buffer, audio::AudioStreamInfo primary_stream_info,
440  const int16_t *secondary_buffer, audio::AudioStreamInfo secondary_stream_info,
441  int16_t *output_buffer, audio::AudioStreamInfo output_stream_info,
442  uint32_t frames_to_mix) {
443  const uint8_t primary_channels = primary_stream_info.get_channels();
444  const uint8_t secondary_channels = secondary_stream_info.get_channels();
445  const uint8_t output_channels = output_stream_info.get_channels();
446 
447  const uint8_t max_primary_channel_index = primary_channels - 1;
448  const uint8_t max_secondary_channel_index = secondary_channels - 1;
449 
450  for (uint32_t frames_index = 0; frames_index < frames_to_mix; ++frames_index) {
451  for (uint8_t output_channel_index = 0; output_channel_index < output_channels; ++output_channel_index) {
452  const uint32_t secondary_channel_index = std::min(output_channel_index, max_secondary_channel_index);
453  const int32_t secondary_sample = secondary_buffer[frames_index * secondary_channels + secondary_channel_index];
454 
455  const uint32_t primary_channel_index = std::min(output_channel_index, max_primary_channel_index);
456  const int32_t primary_sample =
457  static_cast<int32_t>(primary_buffer[frames_index * primary_channels + primary_channel_index]);
458 
459  const int32_t added_sample = secondary_sample + primary_sample;
460 
461  output_buffer[frames_index * output_channels + output_channel_index] =
462  static_cast<int16_t>(clamp<int32_t>(added_sample, MIN_AUDIO_SAMPLE_VALUE, MAX_AUDIO_SAMPLE_VALUE));
463  }
464  }
465 }
466 
467 void MixerSpeaker::audio_mixer_task(void *params) {
468  MixerSpeaker *this_mixer = (MixerSpeaker *) params;
469 
470  xEventGroupSetBits(this_mixer->event_group_, MixerEventGroupBits::STATE_STARTING);
471 
472  this_mixer->task_created_ = true;
473 
474  std::unique_ptr<audio::AudioSinkTransferBuffer> output_transfer_buffer = audio::AudioSinkTransferBuffer::create(
475  this_mixer->audio_stream_info_.value().ms_to_bytes(TRANSFER_BUFFER_DURATION_MS));
476 
477  if (output_transfer_buffer == nullptr) {
478  xEventGroupSetBits(this_mixer->event_group_,
480 
481  this_mixer->task_created_ = false;
482  vTaskDelete(nullptr);
483  }
484 
485  output_transfer_buffer->set_sink(this_mixer->output_speaker_);
486 
487  xEventGroupSetBits(this_mixer->event_group_, MixerEventGroupBits::STATE_RUNNING);
488 
489  bool sent_finished = false;
490 
491  while (true) {
492  uint32_t event_group_bits = xEventGroupGetBits(this_mixer->event_group_);
493  if (event_group_bits & MixerEventGroupBits::COMMAND_STOP) {
494  break;
495  }
496 
497  // Never shift the data in the output transfer buffer to avoid unnecessary, slow data moves
498  output_transfer_buffer->transfer_data_to_sink(pdMS_TO_TICKS(TASK_DELAY_MS), false);
499 
500  const uint32_t output_frames_free =
501  this_mixer->audio_stream_info_.value().bytes_to_frames(output_transfer_buffer->free());
502 
503  std::vector<SourceSpeaker *> speakers_with_data;
504  std::vector<std::shared_ptr<audio::AudioSourceTransferBuffer>> transfer_buffers_with_data;
505 
506  for (auto &speaker : this_mixer->source_speakers_) {
507  if (speaker->get_transfer_buffer().use_count() > 0) {
508  std::shared_ptr<audio::AudioSourceTransferBuffer> transfer_buffer = speaker->get_transfer_buffer().lock();
509  speaker->process_data_from_source(0); // Transfers and ducks audio from source ring buffers
510 
511  if ((transfer_buffer->available() > 0) && !speaker->get_pause_state()) {
512  // Store the locked transfer buffers in their own vector to avoid releasing ownership until after the loop
513  transfer_buffers_with_data.push_back(transfer_buffer);
514  speakers_with_data.push_back(speaker);
515  }
516  }
517  }
518 
519  if (transfer_buffers_with_data.empty()) {
520  // No audio available for transferring, block task temporarily
521  delay(TASK_DELAY_MS);
522  continue;
523  }
524 
525  uint32_t frames_to_mix = output_frames_free;
526 
527  if ((transfer_buffers_with_data.size() == 1) || this_mixer->queue_mode_) {
528  // Only one speaker has audio data, just copy samples over
529 
530  audio::AudioStreamInfo active_stream_info = speakers_with_data[0]->get_audio_stream_info();
531 
532  if (active_stream_info.get_sample_rate() ==
534  // Speaker's sample rate matches the output speaker's, copy directly
535 
536  const uint32_t frames_available_in_buffer =
537  active_stream_info.bytes_to_frames(transfer_buffers_with_data[0]->available());
538  frames_to_mix = std::min(frames_to_mix, frames_available_in_buffer);
539  copy_frames(reinterpret_cast<int16_t *>(transfer_buffers_with_data[0]->get_buffer_start()), active_stream_info,
540  reinterpret_cast<int16_t *>(output_transfer_buffer->get_buffer_end()),
541  this_mixer->audio_stream_info_.value(), frames_to_mix);
542 
543  // Update source speaker buffer length
544  transfer_buffers_with_data[0]->decrease_buffer_length(active_stream_info.frames_to_bytes(frames_to_mix));
545  speakers_with_data[0]->accumulated_frames_read_ += frames_to_mix;
546 
547  // Add new audio duration to the source speaker pending playback
548  speakers_with_data[0]->pending_playback_ms_ +=
549  active_stream_info.frames_to_milliseconds_with_remainder(&speakers_with_data[0]->accumulated_frames_read_);
550 
551  // Update output transfer buffer length
552  output_transfer_buffer->increase_buffer_length(
553  this_mixer->audio_stream_info_.value().frames_to_bytes(frames_to_mix));
554  } else {
555  // Speaker's stream info doesn't match the output speaker's, so it's a new source speaker
556  if (!this_mixer->output_speaker_->is_stopped()) {
557  if (!sent_finished) {
558  this_mixer->output_speaker_->finish();
559  sent_finished = true; // Avoid repeatedly sending the finish command
560  }
561  } else {
562  // Speaker has finished writing the current audio, update the stream information and restart the speaker
563  this_mixer->audio_stream_info_ =
564  audio::AudioStreamInfo(active_stream_info.get_bits_per_sample(), this_mixer->output_channels_,
565  active_stream_info.get_sample_rate());
566  this_mixer->output_speaker_->set_audio_stream_info(this_mixer->audio_stream_info_.value());
567  this_mixer->output_speaker_->start();
568  sent_finished = false;
569  }
570  }
571  } else {
572  // Determine how many frames to mix
573  for (int i = 0; i < transfer_buffers_with_data.size(); ++i) {
574  const uint32_t frames_available_in_buffer =
575  speakers_with_data[i]->get_audio_stream_info().bytes_to_frames(transfer_buffers_with_data[i]->available());
576  frames_to_mix = std::min(frames_to_mix, frames_available_in_buffer);
577  }
578  int16_t *primary_buffer = reinterpret_cast<int16_t *>(transfer_buffers_with_data[0]->get_buffer_start());
579  audio::AudioStreamInfo primary_stream_info = speakers_with_data[0]->get_audio_stream_info();
580 
581  // Mix two streams together
582  for (int i = 1; i < transfer_buffers_with_data.size(); ++i) {
583  mix_audio_samples(primary_buffer, primary_stream_info,
584  reinterpret_cast<int16_t *>(transfer_buffers_with_data[i]->get_buffer_start()),
585  speakers_with_data[i]->get_audio_stream_info(),
586  reinterpret_cast<int16_t *>(output_transfer_buffer->get_buffer_end()),
587  this_mixer->audio_stream_info_.value(), frames_to_mix);
588 
589  speakers_with_data[i]->pending_playback_ms_ +=
590  speakers_with_data[i]->get_audio_stream_info().frames_to_milliseconds_with_remainder(
591  &speakers_with_data[i]->accumulated_frames_read_);
592 
593  if (i != transfer_buffers_with_data.size() - 1) {
594  // Need to mix more streams together, point primary buffer and stream info to the already mixed output
595  primary_buffer = reinterpret_cast<int16_t *>(output_transfer_buffer->get_buffer_end());
596  primary_stream_info = this_mixer->audio_stream_info_.value();
597  }
598  }
599 
600  // Update source transfer buffer lengths and add new audio durations to the source speaker pending playbacks
601  for (int i = 0; i < transfer_buffers_with_data.size(); ++i) {
602  transfer_buffers_with_data[i]->decrease_buffer_length(
603  speakers_with_data[i]->get_audio_stream_info().frames_to_bytes(frames_to_mix));
604  speakers_with_data[i]->accumulated_frames_read_ += frames_to_mix;
605 
606  speakers_with_data[i]->pending_playback_ms_ +=
607  speakers_with_data[i]->get_audio_stream_info().frames_to_milliseconds_with_remainder(
608  &speakers_with_data[i]->accumulated_frames_read_);
609  }
610 
611  // Update output transfer buffer length
612  output_transfer_buffer->increase_buffer_length(
613  this_mixer->audio_stream_info_.value().frames_to_bytes(frames_to_mix));
614  }
615  }
616 
617  xEventGroupSetBits(this_mixer->event_group_, MixerEventGroupBits::STATE_STOPPING);
618 
619  output_transfer_buffer.reset();
620 
621  xEventGroupSetBits(this_mixer->event_group_, MixerEventGroupBits::STATE_STOPPED);
622  this_mixer->task_created_ = false;
623  vTaskDelete(nullptr);
624 }
625 
626 } // namespace mixer_speaker
627 } // namespace esphome
628 
629 #endif
value_type const & value() const
Definition: optional.h:89
esp_err_t start_task_()
Starts the mixer task after allocating memory for the task stack.
void set_volume(float volume) override
Volume state changes are passed to the parent&#39;s output speaker.
speaker::Speaker * get_output_speaker() const
std::vector< SourceSpeaker * > source_speakers_
virtual void set_volume(float volume)
Definition: speaker.h:71
uint8_t get_channels() const
Definition: audio.h:29
void add_audio_output_callback(std::function< void(uint32_t, uint32_t, uint32_t, uint32_t)> &&callback)
Callback function for sending the duration of the audio written to the speaker since the last callbac...
Definition: speaker.h:112
static void duck_samples(int16_t *input_buffer, uint32_t input_samples_to_duck, int8_t *current_ducking_db_reduction, uint32_t *ducking_transition_samples_remaining, uint32_t samples_per_ducking_step, int8_t db_change_per_ducking_step)
Ducks audio samples by a specified amount.
static std::unique_ptr< AudioSinkTransferBuffer > create(size_t buffer_size)
Creates a new sink transfer buffer.
std::shared_ptr< audio::AudioSourceTransferBuffer > transfer_buffer_
size_t process_data_from_source(TickType_t ticks_to_wait)
Transfers audio from the ring buffer into the transfer buffer.
uint8_t get_bits_per_sample() const
Definition: audio.h:28
T * allocate(size_t n)
Definition: helpers.h:703
virtual void finish()
Definition: speaker.h:58
virtual bool get_mute_state()
Definition: speaker.h:93
size_t play(const uint8_t *data, size_t length, TickType_t ticks_to_wait) override
bool has_value() const
Definition: optional.h:87
uint32_t frames_to_milliseconds_with_remainder(uint32_t *frames) const
Computes the duration, in milliseconds, the given amount of frames represents.
Definition: audio.cpp:26
CallbackManager< void(uint32_t, uint32_t, uint32_t, uint32_t)> audio_output_callback_
Definition: speaker.h:126
bool is_stopped() const
Definition: speaker.h:67
uint32_t IRAM_ATTR HOT millis()
Definition: core.cpp:25
uint32_t bytes_to_samples(size_t bytes) const
Convert bytes to samples.
Definition: audio.h:48
static void audio_mixer_task(void *params)
void set_audio_stream_info(const audio::AudioStreamInfo &audio_stream_info)
Definition: speaker.h:99
void status_set_error(const char *message="unspecified")
Definition: component.cpp:159
void set_mute_state(bool mute_state) override
Mute state changes are passed to the parent&#39;s output speaker.
static void copy_frames(const int16_t *input_buffer, audio::AudioStreamInfo input_stream_info, int16_t *output_buffer, audio::AudioStreamInfo output_stream_info, uint32_t frames_to_transfer)
Copies audio frames from the input buffer to the output buffer taking into account the number of chan...
size_t frames_to_bytes(uint32_t frames) const
Converts frames to bytes.
Definition: audio.h:53
uint32_t ms_to_samples(uint32_t ms) const
Converts duration to samples.
Definition: audio.h:68
static std::unique_ptr< AudioSourceTransferBuffer > create(size_t buffer_size)
Creates a new source transfer buffer.
uint32_t get_sample_rate() const
Definition: audio.h:30
size_t ms_to_bytes(uint32_t ms) const
Converts duration to bytes.
Definition: audio.h:73
void deallocate(T *p, size_t n)
Definition: helpers.h:741
void status_clear_error()
Definition: component.cpp:172
virtual float get_volume()
Definition: speaker.h:79
virtual void start()=0
virtual void mark_failed()
Mark this component as failed.
Definition: component.cpp:118
esp_err_t start(audio::AudioStreamInfo &stream_info)
Starts the mixer task.
uint16_t length
Definition: tt21100.cpp:12
Implementation of SPI Controller mode.
Definition: a01nyub.cpp:7
static void mix_audio_samples(const int16_t *primary_buffer, audio::AudioStreamInfo primary_stream_info, const int16_t *secondary_buffer, audio::AudioStreamInfo secondary_stream_info, int16_t *output_buffer, audio::AudioStreamInfo output_stream_info, uint32_t frames_to_mix)
Mixes the primary and secondary streams taking into account the number of channels in each stream...
esp_err_t delete_task_()
If the task is stopped, it sets the task handle to the nullptr and deallocates its stack...
uint32_t bytes_to_frames(size_t bytes) const
Convert bytes to frames.
Definition: audio.h:43
std::weak_ptr< RingBuffer > ring_buffer_
audio::AudioStreamInfo & get_audio_stream_info()
Definition: speaker.h:103
An STL allocator that uses SPI or internal RAM.
Definition: helpers.h:683
static std::unique_ptr< RingBuffer > create(size_t len)
Definition: ring_buffer.cpp:22
void scale_audio_samples(const int16_t *audio_samples, int16_t *output_buffer, int16_t scale_factor, size_t samples_to_scale)
Scales Q15 fixed point audio samples.
Definition: audio.cpp:57
void apply_ducking(uint8_t decibel_reduction, uint32_t duration)
Sets the ducking level for the source speaker.
audio::AudioStreamInfo audio_stream_info_
Definition: speaker.h:118
virtual void set_mute_state(bool mute_state)
Definition: speaker.h:81
void IRAM_ATTR HOT delay(uint32_t ms)
Definition: core.cpp:26
optional< audio::AudioStreamInfo > audio_stream_info_
uint8_t duration
Definition: msa3xx.h:430