ESPHome  2024.4.1
voice_assistant.cpp
Go to the documentation of this file.
1 #include "voice_assistant.h"
2 
3 #ifdef USE_VOICE_ASSISTANT
4 
5 #include "esphome/core/log.h"
6 
7 #include <cstdio>
8 
9 namespace esphome {
10 namespace voice_assistant {
11 
12 static const char *const TAG = "voice_assistant";
13 
14 #ifdef SAMPLE_RATE_HZ
15 #undef SAMPLE_RATE_HZ
16 #endif
17 
18 static const size_t SAMPLE_RATE_HZ = 16000;
19 static const size_t INPUT_BUFFER_SIZE = 32 * SAMPLE_RATE_HZ / 1000; // 32ms * 16kHz / 1000ms
20 static const size_t BUFFER_SIZE = 1024 * SAMPLE_RATE_HZ / 1000;
21 static const size_t SEND_BUFFER_SIZE = INPUT_BUFFER_SIZE * sizeof(int16_t);
22 static const size_t RECEIVE_SIZE = 1024;
23 static const size_t SPEAKER_BUFFER_SIZE = 16 * RECEIVE_SIZE;
24 
26 
28  this->socket_ = socket::socket(AF_INET, SOCK_DGRAM, IPPROTO_IP);
29  if (this->socket_ == nullptr) {
30  ESP_LOGE(TAG, "Could not create socket");
31  this->mark_failed();
32  return false;
33  }
34  int enable = 1;
35  int err = this->socket_->setsockopt(SOL_SOCKET, SO_REUSEADDR, &enable, sizeof(int));
36  if (err != 0) {
37  ESP_LOGW(TAG, "Socket unable to set reuseaddr: errno %d", err);
38  // we can still continue
39  }
40  err = this->socket_->setblocking(false);
41  if (err != 0) {
42  ESP_LOGE(TAG, "Socket unable to set nonblocking mode: errno %d", err);
43  this->mark_failed();
44  return false;
45  }
46 
47 #ifdef USE_SPEAKER
48  if (this->speaker_ != nullptr) {
49  struct sockaddr_storage server;
50 
51  socklen_t sl = socket::set_sockaddr_any((struct sockaddr *) &server, sizeof(server), 6055);
52  if (sl == 0) {
53  ESP_LOGE(TAG, "Socket unable to set sockaddr: errno %d", errno);
54  this->mark_failed();
55  return false;
56  }
57 
58  err = this->socket_->bind((struct sockaddr *) &server, sizeof(server));
59  if (err != 0) {
60  ESP_LOGE(TAG, "Socket unable to bind: errno %d", errno);
61  this->mark_failed();
62  return false;
63  }
64  }
65 #endif
66  this->udp_socket_running_ = true;
67  return true;
68 }
69 
71  ESP_LOGCONFIG(TAG, "Setting up Voice Assistant...");
72 
74 
75 #ifdef USE_SPEAKER
76  if (this->speaker_ != nullptr) {
78  this->speaker_buffer_ = speaker_allocator.allocate(SPEAKER_BUFFER_SIZE);
79  if (this->speaker_buffer_ == nullptr) {
80  ESP_LOGW(TAG, "Could not allocate speaker buffer");
81  this->mark_failed();
82  return;
83  }
84  }
85 #endif
86 
88  this->input_buffer_ = allocator.allocate(INPUT_BUFFER_SIZE);
89  if (this->input_buffer_ == nullptr) {
90  ESP_LOGW(TAG, "Could not allocate input buffer");
91  this->mark_failed();
92  return;
93  }
94 
95 #ifdef USE_ESP_ADF
96  this->vad_instance_ = vad_create(VAD_MODE_4);
97 #endif
98 
99  this->ring_buffer_ = RingBuffer::create(BUFFER_SIZE * sizeof(int16_t));
100  if (this->ring_buffer_ == nullptr) {
101  ESP_LOGW(TAG, "Could not allocate ring buffer");
102  this->mark_failed();
103  return;
104  }
105 
107  this->send_buffer_ = send_allocator.allocate(SEND_BUFFER_SIZE);
108  if (send_buffer_ == nullptr) {
109  ESP_LOGW(TAG, "Could not allocate send buffer");
110  this->mark_failed();
111  return;
112  }
113 }
114 
116  size_t bytes_read = 0;
117  if (this->mic_->is_running()) { // Read audio into input buffer
118  bytes_read = this->mic_->read(this->input_buffer_, INPUT_BUFFER_SIZE * sizeof(int16_t));
119  if (bytes_read == 0) {
120  memset(this->input_buffer_, 0, INPUT_BUFFER_SIZE * sizeof(int16_t));
121  return 0;
122  }
123  // Write audio into ring buffer
124  this->ring_buffer_->write((void *) this->input_buffer_, bytes_read);
125  } else {
126  ESP_LOGD(TAG, "microphone not running");
127  }
128  return bytes_read;
129 }
130 
132  if (this->api_client_ == nullptr && this->state_ != State::IDLE && this->state_ != State::STOP_MICROPHONE &&
134  if (this->mic_->is_running() || this->state_ == State::STARTING_MICROPHONE) {
136  } else {
138  }
139  this->continuous_ = false;
140  this->signal_stop_();
141  return;
142  }
143  switch (this->state_) {
144  case State::IDLE: {
145  if (this->continuous_ && this->desired_state_ == State::IDLE) {
146  this->idle_trigger_->trigger();
147 
148  this->ring_buffer_->reset();
149 #ifdef USE_ESP_ADF
150  if (this->use_wake_word_) {
152  } else
153 #endif
154  {
156  }
157  } else {
158  this->high_freq_.stop();
159  }
160  break;
161  }
163  ESP_LOGD(TAG, "Starting Microphone");
164  memset(this->send_buffer_, 0, SEND_BUFFER_SIZE);
165  memset(this->input_buffer_, 0, INPUT_BUFFER_SIZE * sizeof(int16_t));
166  this->mic_->start();
167  this->high_freq_.start();
169  break;
170  }
172  if (this->mic_->is_running()) {
173  this->set_state_(this->desired_state_);
174  }
175  break;
176  }
177 #ifdef USE_ESP_ADF
178  case State::WAIT_FOR_VAD: {
179  this->read_microphone_();
180  ESP_LOGD(TAG, "Waiting for speech...");
182  break;
183  }
184  case State::WAITING_FOR_VAD: {
185  size_t bytes_read = this->read_microphone_();
186  if (bytes_read > 0) {
187  vad_state_t vad_state =
188  vad_process(this->vad_instance_, this->input_buffer_, SAMPLE_RATE_HZ, VAD_FRAME_LENGTH_MS);
189  if (vad_state == VAD_SPEECH) {
190  if (this->vad_counter_ < this->vad_threshold_) {
191  this->vad_counter_++;
192  } else {
193  ESP_LOGD(TAG, "VAD detected speech");
195 
196  // Reset for next time
197  this->vad_counter_ = 0;
198  }
199  } else {
200  if (this->vad_counter_ > 0) {
201  this->vad_counter_--;
202  }
203  }
204  }
205  break;
206  }
207 #endif
208  case State::START_PIPELINE: {
209  this->read_microphone_();
210  ESP_LOGD(TAG, "Requesting start...");
211  uint32_t flags = 0;
212  if (this->use_wake_word_)
214  if (this->silence_detection_)
216  api::VoiceAssistantAudioSettings audio_settings;
217  audio_settings.noise_suppression_level = this->noise_suppression_level_;
218  audio_settings.auto_gain = this->auto_gain_;
219  audio_settings.volume_multiplier = this->volume_multiplier_;
220 
222  msg.start = true;
223  msg.conversation_id = this->conversation_id_;
224  msg.flags = flags;
225  msg.audio_settings = audio_settings;
226  msg.wake_word_phrase = this->wake_word_;
227  this->wake_word_ = "";
228 
229  if (this->api_client_ == nullptr || !this->api_client_->send_voice_assistant_request(msg)) {
230  ESP_LOGW(TAG, "Could not request start");
231  this->error_trigger_->trigger("not-connected", "Could not request start");
232  this->continuous_ = false;
234  break;
235  }
237  this->set_timeout("reset-conversation_id", 5 * 60 * 1000, [this]() { this->conversation_id_ = ""; });
238  break;
239  }
241  this->read_microphone_();
242  break; // State changed when udp server port received
243  }
245  this->read_microphone_();
246  size_t available = this->ring_buffer_->available();
247  while (available >= SEND_BUFFER_SIZE) {
248  size_t read_bytes = this->ring_buffer_->read((void *) this->send_buffer_, SEND_BUFFER_SIZE, 0);
249  if (this->audio_mode_ == AUDIO_MODE_API) {
251  msg.data.assign((char *) this->send_buffer_, read_bytes);
253  } else {
254  if (!this->udp_socket_running_) {
255  if (!this->start_udp_socket_()) {
257  break;
258  }
259  }
260  this->socket_->sendto(this->send_buffer_, read_bytes, 0, (struct sockaddr *) &this->dest_addr_,
261  sizeof(this->dest_addr_));
262  }
263  available = this->ring_buffer_->available();
264  }
265 
266  break;
267  }
268  case State::STOP_MICROPHONE: {
269  if (this->mic_->is_running()) {
270  this->mic_->stop();
272  } else {
273  this->set_state_(this->desired_state_);
274  }
275  break;
276  }
278  if (this->mic_->is_stopped()) {
279  this->set_state_(this->desired_state_);
280  }
281  break;
282  }
284  break; // State changed by events
285  }
287  bool playing = false;
288 #ifdef USE_SPEAKER
289  if (this->speaker_ != nullptr) {
290  ssize_t received_len = 0;
291  if (this->audio_mode_ == AUDIO_MODE_UDP) {
292  if (this->speaker_buffer_index_ + RECEIVE_SIZE < SPEAKER_BUFFER_SIZE) {
293  received_len = this->socket_->read(this->speaker_buffer_ + this->speaker_buffer_index_, RECEIVE_SIZE);
294  if (received_len > 0) {
295  this->speaker_buffer_index_ += received_len;
296  this->speaker_buffer_size_ += received_len;
297  this->speaker_bytes_received_ += received_len;
298  }
299  } else {
300  ESP_LOGD(TAG, "Receive buffer full");
301  }
302  }
303  // Build a small buffer of audio before sending to the speaker
304  bool end_of_stream = this->stream_ended_ && (this->audio_mode_ == AUDIO_MODE_API || received_len < 0);
305  if (this->speaker_bytes_received_ > RECEIVE_SIZE * 4 || end_of_stream)
306  this->write_speaker_();
307  if (this->wait_for_stream_end_) {
308  this->cancel_timeout("playing");
309  if (end_of_stream) {
310  ESP_LOGD(TAG, "End of audio stream received");
311  this->cancel_timeout("speaker-timeout");
313  }
314  break; // We dont want to timeout here as the STREAM_END event will take care of that.
315  }
316  playing = this->speaker_->is_running();
317  }
318 #endif
319 #ifdef USE_MEDIA_PLAYER
320  if (this->media_player_ != nullptr) {
322  }
323 #endif
324  if (playing) {
325  this->set_timeout("playing", 2000, [this]() {
326  this->cancel_timeout("speaker-timeout");
328  });
329  }
330  break;
331  }
333 #ifdef USE_SPEAKER
334  if (this->speaker_ != nullptr) {
335  if (this->speaker_buffer_size_ > 0) {
336  this->write_speaker_();
337  break;
338  }
339  if (this->speaker_->has_buffered_data() || this->speaker_->is_running()) {
340  break;
341  }
342  ESP_LOGD(TAG, "Speaker has finished outputting all audio");
343  this->speaker_->stop();
344  this->cancel_timeout("speaker-timeout");
345  this->cancel_timeout("playing");
346  this->speaker_buffer_size_ = 0;
347  this->speaker_buffer_index_ = 0;
348  this->speaker_bytes_received_ = 0;
349  memset(this->speaker_buffer_, 0, SPEAKER_BUFFER_SIZE);
350  this->wait_for_stream_end_ = false;
351  this->stream_ended_ = false;
352 
354  }
355 #endif
357  break;
358  }
359  default:
360  break;
361  }
362 }
363 
364 #ifdef USE_SPEAKER
366  if (this->speaker_buffer_size_ > 0) {
367  size_t written = this->speaker_->play(this->speaker_buffer_, this->speaker_buffer_size_);
368  if (written > 0) {
369  memmove(this->speaker_buffer_, this->speaker_buffer_ + written, this->speaker_buffer_size_ - written);
370  this->speaker_buffer_size_ -= written;
371  this->speaker_buffer_index_ -= written;
372  this->set_timeout("speaker-timeout", 5000, [this]() { this->speaker_->stop(); });
373  } else {
374  ESP_LOGD(TAG, "Speaker buffer full, trying again next loop");
375  }
376  }
377 }
378 #endif
379 
381  if (!subscribe) {
382  if (this->api_client_ == nullptr || client != this->api_client_) {
383  ESP_LOGE(TAG, "Client attempting to unsubscribe that is not the current API Client");
384  return;
385  }
386  this->api_client_ = nullptr;
388  return;
389  }
390 
391  if (this->api_client_ != nullptr) {
392  ESP_LOGE(TAG, "Multiple API Clients attempting to connect to Voice Assistant");
393  ESP_LOGE(TAG, "Current client: %s", this->api_client_->get_client_combined_info().c_str());
394  ESP_LOGE(TAG, "New client: %s", client->get_client_combined_info().c_str());
395  return;
396  }
397 
398  this->api_client_ = client;
400 }
401 
402 static const LogString *voice_assistant_state_to_string(State state) {
403  switch (state) {
404  case State::IDLE:
405  return LOG_STR("IDLE");
407  return LOG_STR("START_MICROPHONE");
409  return LOG_STR("STARTING_MICROPHONE");
410  case State::WAIT_FOR_VAD:
411  return LOG_STR("WAIT_FOR_VAD");
413  return LOG_STR("WAITING_FOR_VAD");
415  return LOG_STR("START_PIPELINE");
417  return LOG_STR("STARTING_PIPELINE");
419  return LOG_STR("STREAMING_MICROPHONE");
421  return LOG_STR("STOP_MICROPHONE");
423  return LOG_STR("STOPPING_MICROPHONE");
425  return LOG_STR("AWAITING_RESPONSE");
427  return LOG_STR("STREAMING_RESPONSE");
429  return LOG_STR("RESPONSE_FINISHED");
430  default:
431  return LOG_STR("UNKNOWN");
432  }
433 };
434 
436  State old_state = this->state_;
437  this->state_ = state;
438  ESP_LOGD(TAG, "State changed from %s to %s", LOG_STR_ARG(voice_assistant_state_to_string(old_state)),
439  LOG_STR_ARG(voice_assistant_state_to_string(state)));
440 }
441 
442 void VoiceAssistant::set_state_(State state, State desired_state) {
443  this->set_state_(state);
444  this->desired_state_ = desired_state;
445  ESP_LOGD(TAG, "Desired state set to %s", LOG_STR_ARG(voice_assistant_state_to_string(desired_state)));
446 }
447 
449  ESP_LOGE(TAG, "Failed to start server. See Home Assistant logs for more details.");
450  this->error_trigger_->trigger("failed-to-start", "Failed to start server. See Home Assistant logs for more details.");
452 }
453 
455  if (this->state_ != State::STARTING_PIPELINE) {
456  this->signal_stop_();
457  return;
458  }
459 
460  ESP_LOGD(TAG, "Client started, streaming microphone");
461  this->audio_mode_ = AUDIO_MODE_API;
462 
463  if (this->mic_->is_running()) {
465  } else {
467  }
468 }
469 
470 void VoiceAssistant::start_streaming(struct sockaddr_storage *addr, uint16_t port) {
471  if (this->state_ != State::STARTING_PIPELINE) {
472  this->signal_stop_();
473  return;
474  }
475 
476  ESP_LOGD(TAG, "Client started, streaming microphone");
477  this->audio_mode_ = AUDIO_MODE_UDP;
478 
479  memcpy(&this->dest_addr_, addr, sizeof(this->dest_addr_));
480  if (this->dest_addr_.ss_family == AF_INET) {
481  ((struct sockaddr_in *) &this->dest_addr_)->sin_port = htons(port);
482  }
483 #if LWIP_IPV6
484  else if (this->dest_addr_.ss_family == AF_INET6) {
485  ((struct sockaddr_in6 *) &this->dest_addr_)->sin6_port = htons(port);
486  }
487 #endif
488  else {
489  ESP_LOGW(TAG, "Unknown address family: %d", this->dest_addr_.ss_family);
490  return;
491  }
492 
493  if (this->mic_->is_running()) {
495  } else {
497  }
498 }
499 
500 void VoiceAssistant::request_start(bool continuous, bool silence_detection) {
501  if (this->api_client_ == nullptr) {
502  ESP_LOGE(TAG, "No API client connected");
504  this->continuous_ = false;
505  return;
506  }
507  if (this->state_ == State::IDLE) {
508  this->continuous_ = continuous;
509  this->silence_detection_ = silence_detection;
510  this->ring_buffer_->reset();
511 #ifdef USE_ESP_ADF
512  if (this->use_wake_word_) {
514  } else
515 #endif
516  {
518  }
519  }
520 }
521 
523  this->continuous_ = false;
524 
525  switch (this->state_) {
526  case State::IDLE:
527  break;
530  case State::WAIT_FOR_VAD:
534  break;
537  this->signal_stop_();
539  break;
542  this->desired_state_ = State::IDLE;
543  break;
547  break; // Let the incoming audio stream finish then it will go to idle.
548  }
549 }
550 
552  memset(&this->dest_addr_, 0, sizeof(this->dest_addr_));
553  if (this->api_client_ == nullptr) {
554  return;
555  }
556  ESP_LOGD(TAG, "Signaling stop...");
558  msg.start = false;
560 }
561 
563  ESP_LOGD(TAG, "Event Type: %d", msg.event_type);
564  switch (msg.event_type) {
566  ESP_LOGD(TAG, "Assist Pipeline running");
567  this->defer([this]() { this->start_trigger_->trigger(); });
568  break;
570  break;
572  ESP_LOGD(TAG, "Wake word detected");
573  this->defer([this]() { this->wake_word_detected_trigger_->trigger(); });
574  break;
575  }
577  ESP_LOGD(TAG, "STT started");
578  this->defer([this]() { this->listening_trigger_->trigger(); });
579  break;
581  std::string text;
582  for (auto arg : msg.data) {
583  if (arg.name == "text") {
584  text = std::move(arg.value);
585  }
586  }
587  if (text.empty()) {
588  ESP_LOGW(TAG, "No text in STT_END event");
589  return;
590  }
591  ESP_LOGD(TAG, "Speech recognised as: \"%s\"", text.c_str());
592  this->defer([this, text]() { this->stt_end_trigger_->trigger(text); });
593  break;
594  }
596  ESP_LOGD(TAG, "Intent started");
597  this->defer([this]() { this->intent_start_trigger_->trigger(); });
598  break;
600  for (auto arg : msg.data) {
601  if (arg.name == "conversation_id") {
602  this->conversation_id_ = std::move(arg.value);
603  }
604  }
605  this->defer([this]() { this->intent_end_trigger_->trigger(); });
606  break;
607  }
609  std::string text;
610  for (auto arg : msg.data) {
611  if (arg.name == "text") {
612  text = std::move(arg.value);
613  }
614  }
615  if (text.empty()) {
616  ESP_LOGW(TAG, "No text in TTS_START event");
617  return;
618  }
619  ESP_LOGD(TAG, "Response: \"%s\"", text.c_str());
620  this->defer([this, text]() {
621  this->tts_start_trigger_->trigger(text);
622 #ifdef USE_SPEAKER
623  this->speaker_->start();
624 #endif
625  });
626  break;
627  }
629  std::string url;
630  for (auto arg : msg.data) {
631  if (arg.name == "url") {
632  url = std::move(arg.value);
633  }
634  }
635  if (url.empty()) {
636  ESP_LOGW(TAG, "No url in TTS_END event");
637  return;
638  }
639  ESP_LOGD(TAG, "Response URL: \"%s\"", url.c_str());
640  this->defer([this, url]() {
641 #ifdef USE_MEDIA_PLAYER
642  if (this->media_player_ != nullptr) {
644  }
645 #endif
646  this->tts_end_trigger_->trigger(url);
647  });
649  this->set_state_(new_state, new_state);
650  break;
651  }
653  ESP_LOGD(TAG, "Assist Pipeline ended");
654  if (this->state_ == State::STREAMING_MICROPHONE) {
655  this->ring_buffer_->reset();
656 #ifdef USE_ESP_ADF
657  if (this->use_wake_word_) {
658  // No need to stop the microphone since we didn't use the speaker
660  } else
661 #endif
662  {
664  }
665  } else if (this->state_ == State::AWAITING_RESPONSE) {
666  // No TTS start event ("nevermind")
668  }
669  this->defer([this]() { this->end_trigger_->trigger(); });
670  break;
671  }
673  std::string code = "";
674  std::string message = "";
675  for (auto arg : msg.data) {
676  if (arg.name == "code") {
677  code = std::move(arg.value);
678  } else if (arg.name == "message") {
679  message = std::move(arg.value);
680  }
681  }
682  if (code == "wake-word-timeout" || code == "wake_word_detection_aborted") {
683  // Don't change state here since either the "tts-end" or "run-end" events will do it.
684  return;
685  } else if (code == "wake-provider-missing" || code == "wake-engine-missing") {
686  // Wake word is not set up or not ready on Home Assistant so stop and do not retry until user starts again.
687  this->defer([this, code, message]() {
688  this->request_stop();
689  this->error_trigger_->trigger(code, message);
690  });
691  return;
692  }
693  ESP_LOGE(TAG, "Error: %s - %s", code.c_str(), message.c_str());
694  if (this->state_ != State::IDLE) {
695  this->signal_stop_();
697  }
698  this->defer([this, code, message]() { this->error_trigger_->trigger(code, message); });
699  break;
700  }
702 #ifdef USE_SPEAKER
703  this->wait_for_stream_end_ = true;
704  ESP_LOGD(TAG, "TTS stream start");
705  this->defer([this] { this->tts_stream_start_trigger_->trigger(); });
706 #endif
707  break;
708  }
710 #ifdef USE_SPEAKER
711  this->stream_ended_ = true;
712  ESP_LOGD(TAG, "TTS stream end");
713 #endif
714  break;
715  }
717  ESP_LOGD(TAG, "Starting STT by VAD");
718  this->defer([this]() { this->stt_vad_start_trigger_->trigger(); });
719  break;
721  ESP_LOGD(TAG, "STT by VAD end");
723  this->defer([this]() { this->stt_vad_end_trigger_->trigger(); });
724  break;
725  default:
726  ESP_LOGD(TAG, "Unhandled event type: %d", msg.event_type);
727  break;
728  }
729 }
730 
732 #ifdef USE_SPEAKER // We should never get to this function if there is no speaker anyway
733  if (this->speaker_buffer_index_ + msg.data.length() < SPEAKER_BUFFER_SIZE) {
734  memcpy(this->speaker_buffer_ + this->speaker_buffer_index_, msg.data.data(), msg.data.length());
735  this->speaker_buffer_index_ += msg.data.length();
736  this->speaker_buffer_size_ += msg.data.length();
737  this->speaker_bytes_received_ += msg.data.length();
738  } else {
739  ESP_LOGE(TAG, "Cannot receive audio, buffer is full");
740  }
741 #endif
742 }
743 
744 VoiceAssistant *global_voice_assistant = nullptr; // NOLINT(cppcoreguidelines-avoid-non-const-global-variables)
745 
746 } // namespace voice_assistant
747 } // namespace esphome
748 
749 #endif // USE_VOICE_ASSISTANT
virtual size_t play(const uint8_t *data, size_t length)=0
bool is_running() const
Definition: speaker.h:23
const float AFTER_CONNECTION
For components that should be initialized after a data connection (API/MQTT) is connected.
Definition: component.cpp:27
bool cancel_timeout(const std::string &name)
Cancel a timeout function.
Definition: component.cpp:73
HighFrequencyLoopRequester high_freq_
VoiceAssistant * global_voice_assistant
socklen_t set_sockaddr_any(struct sockaddr *addr, socklen_t addrlen, uint16_t port)
Set a sockaddr to the any address and specified port for the IP version used by socket_ip().
Definition: socket.cpp:53
std::unique_ptr< socket::Socket > socket_
sa_family_t ss_family
Definition: headers.h:92
Trigger< std::string > * tts_start_trigger_
void set_timeout(const std::string &name, uint32_t timeout, std::function< void()> &&f)
Set a timeout function with a unique name.
Definition: component.cpp:69
An STL allocator that uses SPI RAM.
Definition: helpers.h:645
void defer(const std::string &name, std::function< void()> &&f)
Defer a callback to the next loop() call.
Definition: component.cpp:130
Trigger< std::string > * tts_end_trigger_
uint32_t socklen_t
Definition: headers.h:97
VoiceAssistantAudioSettings audio_settings
Definition: api_pb2.h:1710
enums::VoiceAssistantEvent event_type
Definition: api_pb2.h:1747
void client_subscription(api::APIConnection *client, bool subscribe)
virtual bool has_buffered_data() const =0
std::vector< VoiceAssistantEventData > data
Definition: api_pb2.h:1748
std::string get_client_combined_info() const
void trigger(Ts... x)
Inform the parent automation that the event has triggered.
Definition: automation.h:95
media_player::MediaPlayer * media_player_
void start()
Start running the loop continuously.
Definition: helpers.cpp:547
bool send_voice_assistant_request(const VoiceAssistantRequest &msg)
void stop()
Stop running the loop continuously.
Definition: helpers.cpp:553
const uint32_t flags
Definition: stm32flash.h:85
std::unique_ptr< RingBuffer > ring_buffer_
bool send_voice_assistant_audio(const VoiceAssistantAudio &msg)
void on_audio(const api::VoiceAssistantAudio &msg)
virtual size_t read(int16_t *buf, size_t len)=0
virtual void start()=0
virtual void mark_failed()
Mark this component as failed.
Definition: component.cpp:118
This is a workaround until we can figure out a way to get the tflite-micro idf component code availab...
Definition: a01nyub.cpp:7
static std::unique_ptr< RingBuffer > create(size_t len)
Definition: ring_buffer.cpp:14
MediaPlayerCall & set_media_url(const std::string &url)
virtual void stop()=0
void on_event(const api::VoiceAssistantEventResponse &msg)
bool state
Definition: fan.h:34
Trigger< std::string > * stt_end_trigger_
void request_start(bool continuous, bool silence_detection)
Trigger< std::string, std::string > * error_trigger_
std::unique_ptr< Socket > socket(int domain, int type, int protocol)
Create a socket of the given domain, type and protocol.