13 namespace mixer_speaker {
15 static const UBaseType_t MIXER_TASK_PRIORITY = 10;
17 static const uint32_t TRANSFER_BUFFER_DURATION_MS = 50;
18 static const uint32_t TASK_DELAY_MS = 25;
20 static const size_t TASK_STACK_SIZE = 4096;
22 static const int16_t MAX_AUDIO_SAMPLE_VALUE = INT16_MAX;
23 static const int16_t MIN_AUDIO_SAMPLE_VALUE = INT16_MIN;
25 static const char *
const TAG =
"speaker_mixer";
30 static const std::vector<int16_t> DECIBEL_REDUCTION_TABLE = {
31 32767, 29201, 26022, 23189, 20665, 18415, 16410, 14624, 13032, 11613, 10349, 9222, 8218, 7324, 6527, 5816, 5183,
32 4619, 4116, 3668, 3269, 2913, 2596, 2313, 2061, 1837, 1637, 1459, 1300, 1158, 1032, 920, 820, 731,
33 651, 580, 517, 461, 411, 366, 326, 291, 259, 231, 206, 183, 163, 146, 130, 116, 103};
46 ESP_LOGCONFIG(TAG,
"Mixer Source Speaker");
51 ESP_LOGCONFIG(TAG,
" Timeout: never");
57 [
this](uint32_t new_playback_ms, uint32_t remainder_us, uint32_t pending_ms, uint32_t write_timestamp) {
59 if (personal_playback_ms > 0) {
69 esp_err_t err = this->
start_();
80 case ESP_ERR_NOT_SUPPORTED:
81 this->
status_set_error(
"Failed to start mixer: unsupported bits per sample");
83 case ESP_ERR_INVALID_ARG:
84 this->
status_set_error(
"Failed to start mixer: audio stream isn't compatible with the other audio stream.");
86 case ESP_ERR_INVALID_STATE:
120 size_t bytes_written = 0;
122 std::shared_ptr<RingBuffer> temp_ring_buffer = this->
ring_buffer_.lock();
123 bytes_written = temp_ring_buffer->write_without_replacement(data, length, ticks_to_wait);
124 if (bytes_written > 0) {
128 return bytes_written;
140 return ESP_ERR_NO_MEM;
142 std::shared_ptr<RingBuffer> temp_ring_buffer;
150 return ESP_ERR_NO_MEM;
197 size_t bytes_read = this->
transfer_buffer_->transfer_data_from_source(ticks_to_wait);
200 if (samples_to_duck > 0) {
201 int16_t *current_buffer =
reinterpret_cast<int16_t *
>(this->
transfer_buffer_->get_buffer_start() + current_length);
217 uint8_t total_ducking_steps = 0;
227 if ((duration > 0) && (total_ducking_steps > 0)) {
243 int8_t *current_ducking_db_reduction, uint32_t *ducking_transition_samples_remaining,
244 uint32_t samples_per_ducking_step, int8_t db_change_per_ducking_step) {
245 if (*ducking_transition_samples_remaining > 0) {
249 uint32_t ducking_steps_in_batch =
250 input_samples_to_duck / samples_per_ducking_step + (input_samples_to_duck % samples_per_ducking_step != 0);
252 for (uint32_t i = 0; i < ducking_steps_in_batch; ++i) {
253 uint32_t samples_left_in_step = *ducking_transition_samples_remaining % samples_per_ducking_step;
255 if (samples_left_in_step == 0) {
256 samples_left_in_step = samples_per_ducking_step;
259 uint32_t samples_to_duck = std::min(input_samples_to_duck, samples_left_in_step);
260 samples_to_duck = std::min(samples_to_duck, *ducking_transition_samples_remaining);
263 uint8_t safe_db_reduction_index =
264 clamp<uint8_t>(*current_ducking_db_reduction, 0, DECIBEL_REDUCTION_TABLE.size() - 1);
265 int16_t q15_scale_factor = DECIBEL_REDUCTION_TABLE[safe_db_reduction_index];
269 if (samples_left_in_step - samples_to_duck == 0) {
271 *current_ducking_db_reduction += db_change_per_ducking_step;
274 input_buffer += samples_to_duck;
275 *ducking_transition_samples_remaining -= samples_to_duck;
276 input_samples_to_duck -= samples_to_duck;
280 if ((*current_ducking_db_reduction > 0) && (input_samples_to_duck > 0)) {
283 uint8_t safe_db_reduction_index =
284 clamp<uint8_t>(*current_ducking_db_reduction, 0, DECIBEL_REDUCTION_TABLE.size() - 1);
285 int16_t q15_scale_factor = DECIBEL_REDUCTION_TABLE[safe_db_reduction_index];
292 ESP_LOGCONFIG(TAG,
"Speaker Mixer:");
293 ESP_LOGCONFIG(TAG,
" Number of output channels: %u", this->output_channels_);
297 this->event_group_ = xEventGroupCreate();
299 if (this->event_group_ ==
nullptr) {
300 ESP_LOGE(TAG,
"Failed to create event group");
307 uint32_t event_group_bits = xEventGroupGetBits(this->event_group_);
310 ESP_LOGD(TAG,
"Starting speaker mixer");
311 xEventGroupClearBits(this->event_group_, MixerEventGroupBits::STATE_STARTING);
315 xEventGroupClearBits(this->event_group_, MixerEventGroupBits::ERR_ESP_NO_MEM);
318 ESP_LOGD(TAG,
"Started speaker mixer");
320 xEventGroupClearBits(this->event_group_, MixerEventGroupBits::STATE_RUNNING);
323 ESP_LOGD(TAG,
"Stopping speaker mixer");
324 xEventGroupClearBits(this->event_group_, MixerEventGroupBits::STATE_STOPPING);
327 if (this->delete_task_() == ESP_OK) {
332 if (this->task_handle_ !=
nullptr) {
333 bool all_stopped =
true;
335 for (
auto &speaker : this->source_speakers_) {
336 all_stopped &= speaker->is_stopped();
349 return ESP_ERR_NOT_SUPPORTED;
358 return ESP_ERR_INVALID_ARG;
362 return this->start_task_();
366 if (this->task_stack_buffer_ ==
nullptr) {
367 if (this->task_stack_in_psram_) {
369 this->task_stack_buffer_ = stack_allocator.
allocate(TASK_STACK_SIZE);
372 this->task_stack_buffer_ = stack_allocator.
allocate(TASK_STACK_SIZE);
376 if (this->task_stack_buffer_ ==
nullptr) {
377 return ESP_ERR_NO_MEM;
380 if (this->task_handle_ ==
nullptr) {
381 this->task_handle_ = xTaskCreateStatic(audio_mixer_task,
"mixer", TASK_STACK_SIZE, (
void *)
this,
382 MIXER_TASK_PRIORITY, this->task_stack_buffer_, &this->task_stack_);
385 if (this->task_handle_ ==
nullptr) {
386 return ESP_ERR_INVALID_STATE;
393 if (!this->task_created_) {
394 this->task_handle_ =
nullptr;
396 if (this->task_stack_buffer_ !=
nullptr) {
397 if (this->task_stack_in_psram_) {
399 stack_allocator.
deallocate(this->task_stack_buffer_, TASK_STACK_SIZE);
402 stack_allocator.
deallocate(this->task_stack_buffer_, TASK_STACK_SIZE);
405 this->task_stack_buffer_ =
nullptr;
411 return ESP_ERR_INVALID_STATE;
418 uint32_t frames_to_transfer) {
419 uint8_t input_channels = input_stream_info.
get_channels();
420 uint8_t output_channels = output_stream_info.
get_channels();
421 const uint8_t max_input_channel_index = input_channels - 1;
423 if (input_channels == output_channels) {
424 size_t bytes_to_copy = input_stream_info.
frames_to_bytes(frames_to_transfer);
425 memcpy(output_buffer, input_buffer, bytes_to_copy);
430 for (uint32_t frame_index = 0; frame_index < frames_to_transfer; ++frame_index) {
431 for (uint8_t output_channel_index = 0; output_channel_index < output_channels; ++output_channel_index) {
432 uint8_t input_channel_index = std::min(output_channel_index, max_input_channel_index);
433 output_buffer[output_channels * frame_index + output_channel_index] =
434 input_buffer[input_channels * frame_index + input_channel_index];
442 uint32_t frames_to_mix) {
443 const uint8_t primary_channels = primary_stream_info.
get_channels();
444 const uint8_t secondary_channels = secondary_stream_info.
get_channels();
445 const uint8_t output_channels = output_stream_info.
get_channels();
447 const uint8_t max_primary_channel_index = primary_channels - 1;
448 const uint8_t max_secondary_channel_index = secondary_channels - 1;
450 for (uint32_t frames_index = 0; frames_index < frames_to_mix; ++frames_index) {
451 for (uint8_t output_channel_index = 0; output_channel_index < output_channels; ++output_channel_index) {
452 const uint32_t secondary_channel_index = std::min(output_channel_index, max_secondary_channel_index);
453 const int32_t secondary_sample = secondary_buffer[frames_index * secondary_channels + secondary_channel_index];
455 const uint32_t primary_channel_index = std::min(output_channel_index, max_primary_channel_index);
456 const int32_t primary_sample =
457 static_cast<int32_t
>(primary_buffer[frames_index * primary_channels + primary_channel_index]);
459 const int32_t added_sample = secondary_sample + primary_sample;
461 output_buffer[frames_index * output_channels + output_channel_index] =
462 static_cast<int16_t
>(clamp<int32_t>(added_sample, MIN_AUDIO_SAMPLE_VALUE, MAX_AUDIO_SAMPLE_VALUE));
477 if (output_transfer_buffer ==
nullptr) {
482 vTaskDelete(
nullptr);
489 bool sent_finished =
false;
492 uint32_t event_group_bits = xEventGroupGetBits(this_mixer->
event_group_);
498 output_transfer_buffer->transfer_data_to_sink(pdMS_TO_TICKS(TASK_DELAY_MS),
false);
500 const uint32_t output_frames_free =
501 this_mixer->
audio_stream_info_.value().bytes_to_frames(output_transfer_buffer->free());
503 std::vector<SourceSpeaker *> speakers_with_data;
504 std::vector<std::shared_ptr<audio::AudioSourceTransferBuffer>> transfer_buffers_with_data;
507 if (speaker->get_transfer_buffer().use_count() > 0) {
508 std::shared_ptr<audio::AudioSourceTransferBuffer> transfer_buffer = speaker->get_transfer_buffer().lock();
509 speaker->process_data_from_source(0);
511 if ((transfer_buffer->available() > 0) && !speaker->get_pause_state()) {
513 transfer_buffers_with_data.push_back(transfer_buffer);
514 speakers_with_data.push_back(speaker);
519 if (transfer_buffers_with_data.empty()) {
521 delay(TASK_DELAY_MS);
525 uint32_t frames_to_mix = output_frames_free;
527 if ((transfer_buffers_with_data.size() == 1) || this_mixer->
queue_mode_) {
536 const uint32_t frames_available_in_buffer =
537 active_stream_info.
bytes_to_frames(transfer_buffers_with_data[0]->available());
538 frames_to_mix = std::min(frames_to_mix, frames_available_in_buffer);
539 copy_frames(reinterpret_cast<int16_t *>(transfer_buffers_with_data[0]->get_buffer_start()), active_stream_info,
540 reinterpret_cast<int16_t *>(output_transfer_buffer->get_buffer_end()),
544 transfer_buffers_with_data[0]->decrease_buffer_length(active_stream_info.
frames_to_bytes(frames_to_mix));
545 speakers_with_data[0]->accumulated_frames_read_ += frames_to_mix;
548 speakers_with_data[0]->pending_playback_ms_ +=
552 output_transfer_buffer->increase_buffer_length(
557 if (!sent_finished) {
559 sent_finished =
true;
568 sent_finished =
false;
573 for (
int i = 0; i < transfer_buffers_with_data.size(); ++i) {
574 const uint32_t frames_available_in_buffer =
575 speakers_with_data[i]->get_audio_stream_info().bytes_to_frames(transfer_buffers_with_data[i]->available());
576 frames_to_mix = std::min(frames_to_mix, frames_available_in_buffer);
578 int16_t *primary_buffer =
reinterpret_cast<int16_t *
>(transfer_buffers_with_data[0]->get_buffer_start());
582 for (
int i = 1; i < transfer_buffers_with_data.size(); ++i) {
583 mix_audio_samples(primary_buffer, primary_stream_info,
584 reinterpret_cast<int16_t *>(transfer_buffers_with_data[i]->get_buffer_start()),
586 reinterpret_cast<int16_t *>(output_transfer_buffer->get_buffer_end()),
589 speakers_with_data[i]->pending_playback_ms_ +=
590 speakers_with_data[i]->get_audio_stream_info().frames_to_milliseconds_with_remainder(
593 if (i != transfer_buffers_with_data.size() - 1) {
595 primary_buffer =
reinterpret_cast<int16_t *
>(output_transfer_buffer->get_buffer_end());
601 for (
int i = 0; i < transfer_buffers_with_data.size(); ++i) {
602 transfer_buffers_with_data[i]->decrease_buffer_length(
604 speakers_with_data[i]->accumulated_frames_read_ += frames_to_mix;
606 speakers_with_data[i]->pending_playback_ms_ +=
607 speakers_with_data[i]->get_audio_stream_info().frames_to_milliseconds_with_remainder(
612 output_transfer_buffer->increase_buffer_length(
619 output_transfer_buffer.reset();
623 vTaskDelete(
nullptr);
value_type const & value() const
esp_err_t start_task_()
Starts the mixer task after allocating memory for the task stack.
void set_volume(float volume) override
Volume state changes are passed to the parent's output speaker.
speaker::Speaker * get_output_speaker() const
void dump_config() override
std::vector< SourceSpeaker * > source_speakers_
virtual void set_volume(float volume)
uint8_t get_channels() const
void add_audio_output_callback(std::function< void(uint32_t, uint32_t, uint32_t, uint32_t)> &&callback)
Callback function for sending the duration of the audio written to the speaker since the last callbac...
speaker::Speaker * output_speaker_
static void duck_samples(int16_t *input_buffer, uint32_t input_samples_to_duck, int8_t *current_ducking_db_reduction, uint32_t *ducking_transition_samples_remaining, uint32_t samples_per_ducking_step, int8_t db_change_per_ducking_step)
Ducks audio samples by a specified amount.
static std::unique_ptr< AudioSinkTransferBuffer > create(size_t buffer_size)
Creates a new sink transfer buffer.
EventGroupHandle_t event_group_
std::shared_ptr< audio::AudioSourceTransferBuffer > transfer_buffer_
size_t process_data_from_source(TickType_t ticks_to_wait)
Transfers audio from the ring buffer into the transfer buffer.
uint8_t get_bits_per_sample() const
virtual bool get_mute_state()
size_t play(const uint8_t *data, size_t length, TickType_t ticks_to_wait) override
uint32_t frames_to_milliseconds_with_remainder(uint32_t *frames) const
Computes the duration, in milliseconds, the given amount of frames represents.
uint32_t accumulated_frames_read_
CallbackManager< void(uint32_t, uint32_t, uint32_t, uint32_t)> audio_output_callback_
uint32_t IRAM_ATTR HOT millis()
uint32_t bytes_to_samples(size_t bytes) const
Convert bytes to samples.
static void audio_mixer_task(void *params)
void set_audio_stream_info(const audio::AudioStreamInfo &audio_stream_info)
void status_set_error(const char *message="unspecified")
void set_mute_state(bool mute_state) override
Mute state changes are passed to the parent's output speaker.
static void copy_frames(const int16_t *input_buffer, audio::AudioStreamInfo input_stream_info, int16_t *output_buffer, audio::AudioStreamInfo output_stream_info, uint32_t frames_to_transfer)
Copies audio frames from the input buffer to the output buffer taking into account the number of chan...
bool get_mute_state() override
float get_volume() override
size_t frames_to_bytes(uint32_t frames) const
Converts frames to bytes.
uint32_t ms_to_samples(uint32_t ms) const
Converts duration to samples.
static std::unique_ptr< AudioSourceTransferBuffer > create(size_t buffer_size)
Creates a new source transfer buffer.
uint32_t get_sample_rate() const
optional< uint32_t > timeout_ms_
size_t ms_to_bytes(uint32_t ms) const
Converts duration to bytes.
uint32_t buffer_duration_ms_
uint32_t pending_playback_ms_
void deallocate(T *p, size_t n)
void dump_config() override
int8_t target_ducking_db_reduction_
uint32_t samples_per_ducking_step_
void status_clear_error()
virtual float get_volume()
virtual void mark_failed()
Mark this component as failed.
int8_t current_ducking_db_reduction_
esp_err_t start(audio::AudioStreamInfo &stream_info)
Starts the mixer task.
uint32_t ducking_transition_samples_remaining_
int8_t db_change_per_ducking_step_
Implementation of SPI Controller mode.
static void mix_audio_samples(const int16_t *primary_buffer, audio::AudioStreamInfo primary_stream_info, const int16_t *secondary_buffer, audio::AudioStreamInfo secondary_stream_info, int16_t *output_buffer, audio::AudioStreamInfo output_stream_info, uint32_t frames_to_mix)
Mixes the primary and secondary streams taking into account the number of channels in each stream...
esp_err_t delete_task_()
If the task is stopped, it sets the task handle to the nullptr and deallocates its stack...
uint32_t last_seen_data_ms_
uint32_t bytes_to_frames(size_t bytes) const
Convert bytes to frames.
std::weak_ptr< RingBuffer > ring_buffer_
audio::AudioStreamInfo & get_audio_stream_info()
An STL allocator that uses SPI or internal RAM.
static std::unique_ptr< RingBuffer > create(size_t len)
void scale_audio_samples(const int16_t *audio_samples, int16_t *output_buffer, int16_t scale_factor, size_t samples_to_scale)
Scales Q15 fixed point audio samples.
void apply_ducking(uint8_t decibel_reduction, uint32_t duration)
Sets the ducking level for the source speaker.
audio::AudioStreamInfo audio_stream_info_
bool has_buffered_data() const override
virtual void set_mute_state(bool mute_state)
void IRAM_ATTR HOT delay(uint32_t ms)
optional< audio::AudioStreamInfo > audio_stream_info_