moly_kit/widgets/
realtime.rs

1use crate::aitk::{
2    utils::asynchronous::spawn,
3    utils::tool::{display_name_from_namespaced, parse_tool_arguments},
4};
5use crate::prelude::*;
6use crate::{
7    utils::makepad::events::EventExt,
8    widgets::{avatar::*, chat_line::*, slot::*, standard_message_content::*},
9};
10use makepad_widgets::permission::Permission;
11use makepad_widgets::permission::PermissionStatus;
12use makepad_widgets::{makepad_platform::AudioDeviceType, *};
13use std::sync::{Arc, Mutex};
14
15script_mod! {
16    use mod.prelude.widgets.*
17    use mod.widgets.*
18
19    let AIAnimation = RoundedView {
20        width: 200, height: 200
21        show_bg: true
22        draw_bg +: {
23            hash21: fn(p: vec2) -> float {
24                let px = fract(p.x * 234.34)
25                let py = fract(p.y * 435.345)
26                let d = dot(vec2(px, py), vec2(py + 34.23, px + 34.23))
27                return fract((px + d) * (py + d))
28            }
29
30            noise: fn(p: vec2) -> float {
31                let i = floor(p)
32                let f = fract(p)
33                let fs = f * f * (3.0 - 2.0 * f)
34                let a = self.hash21(i)
35                let b = self.hash21(i + vec2(1.0, 0.0))
36                let c = self.hash21(i + vec2(0.0, 1.0))
37                let d = self.hash21(i + vec2(1.0, 1.0))
38                return mix(
39                    mix(a, b, fs.x),
40                    mix(c, d, fs.x),
41                    fs.y
42                )
43            }
44
45            fbm: fn(p: vec2) -> float {
46                let mut sum = 0.0
47                let mut amp = 0.5
48                let mut freq = 1.0
49
50                sum += self.noise(p * freq) * amp
51                amp *= 0.5
52                freq *= 2.0
53
54                sum += self.noise(p * freq) * amp
55                amp *= 0.5
56                freq *= 2.0
57
58                sum += self.noise(p * freq) * amp
59                amp *= 0.5
60                freq *= 2.0
61
62                sum += self.noise(p * freq) * amp
63                amp *= 0.5
64                freq *= 2.0
65
66                return sum
67            }
68
69            pixel: fn() -> vec4 {
70                let uv = (self.pos - 0.5) * 2.0
71
72                let mut col = vec3(0.1, 0.1, 0.1)
73
74                let radius = 0.3 + sin(self.draw_pass.time * 0.5) * 0.02
75                let d = length(uv)
76
77                let angle = atan2(uv.y, uv.x)
78                let wave = sin(angle * 3.0 + self.draw_pass.time) * 0.1
79                let wave2 = cos(angle * 5.0 - self.draw_pass.time * 1.3) * 0.08
80
81                let noise1 = self.fbm(uv * 3.0 + self.draw_pass.time * 0.1)
82                let noise2 = self.fbm(uv * 5.0 - self.draw_pass.time * 0.2)
83
84                let orb_color = vec3(0.2, 0.6, 1.0)
85                let orb = smoothstep(
86                    radius + wave + wave2,
87                    radius - 0.1 + wave + wave2,
88                    d
89                )
90
91                let gradient1 = vec3(0.8, 0.2, 0.5) * sin(angle + self.draw_pass.time)
92                let gradient2 = vec3(0.2, 0.5, 1.0) * cos(
93                    angle - self.draw_pass.time * 0.7
94                )
95
96                let mut particles = 0.0
97
98                let particle_pos1 = vec2(
99                    sin(self.draw_pass.time * 0.5) * 0.5,
100                    cos(self.draw_pass.time * 0.3) * 0.5
101                )
102                particles += smoothstep(
103                    0.05, 0.0, length(uv - particle_pos1)
104                )
105
106                let particle_pos2 = vec2(
107                    sin(self.draw_pass.time * 0.7) * 0.5,
108                    cos(self.draw_pass.time * 0.5) * 0.5
109                )
110                particles += smoothstep(
111                    0.05, 0.0, length(uv - particle_pos2)
112                )
113
114                let particle_pos3 = vec2(
115                    sin(self.draw_pass.time * 0.9) * 0.5,
116                    cos(self.draw_pass.time * 0.7) * 0.5
117                )
118                particles += smoothstep(
119                    0.05, 0.0, length(uv - particle_pos3)
120                )
121
122                col += orb * mix(orb_color, gradient1, noise1)
123                col += orb * mix(gradient2, orb_color, noise2) * 0.5
124                col += particles * vec3(0.5, 0.8, 1.0)
125                col += exp(-d * 4.0) * vec3(0.2, 0.4, 0.8) * 0.5
126
127                let sdf = Sdf2d.viewport(self.pos * self.rect_size)
128                let r = min(self.rect_size.x, self.rect_size.y) * 0.5
129                sdf.circle(
130                    self.rect_size.x * 0.5,
131                    self.rect_size.y * 0.5,
132                    r
133                )
134
135                sdf.fill_keep(vec4(col, 1.0))
136
137                return sdf.result
138            }
139        }
140    }
141
142    let SimpleDropDown = DropDown {
143        draw_text +: {
144            text_style +: { font_size: 12 }
145            get_color: fn() -> vec4 {
146                return mix(
147                    #2
148                    #x0
149                    self.down
150                )
151            }
152        }
153
154        popup_menu: PopupMenu {
155            width: 300, height: Fit,
156            flow: Down,
157            padding: theme.mspace_1 {}
158
159            menu_item: PopupMenuItem {
160                width: Fill, height: Fit,
161                align: Align { y: 0.5 }
162                padding: Inset {
163                    left: 15, right: 15, top: 10, bottom: 10
164                }
165
166                draw_text +: {
167                    get_color: fn() -> vec4 {
168                        return mix(
169                            mix(
170                                #3
171                                #x0
172                                self.active
173                            )
174                            #x0
175                            self.hover
176                        )
177                    }
178                }
179
180                draw_bg +: {
181                    color: uniform(#xf)
182                    color_active: uniform(#xe9)
183                }
184            }
185
186            draw_bg +: {
187                color: uniform(#xf9)
188                border_size: 1.0
189            }
190        }
191    }
192
193    let TranscriptionModelSelector = View {
194        height: Fit
195        align: Align { x: 0.0, y: 0.5 }
196        spacing: 10
197
198        Label {
199            text: "Transcription model:"
200            draw_text +: {
201                color: #222
202                text_style +: { font_size: 11 }
203            }
204        }
205
206        transcription_model_selector := SimpleDropDown {
207            margin: 5
208            labels: ["whisper-1" "whisper"
209                "gpt-4o-transcribe" "gpt-4o-mini-transcribe"]
210
211            draw_text +: {
212                color: #222
213                text_style +: { font_size: 11 }
214            }
215
216            popup_menu +: {
217                menu_item +: {
218                    draw_text +: {
219                        color: #222
220                        text_style +: { font_size: 11 }
221                    }
222                }
223            }
224        }
225    }
226
227    let VoiceSelector = View {
228        height: Fit
229        align: Align { x: 0.0, y: 0.5 }
230        spacing: 10
231
232        Label {
233            text: "Voice:"
234            draw_text +: {
235                color: #222
236                text_style +: { font_size: 11 }
237            }
238        }
239
240        voice_selector := SimpleDropDown {
241            margin: 5
242            labels: ["marin" "cedar" "alloy" "shimmer"
243                "ash" "ballad" "coral" "echo" "sage" "verse"]
244
245            draw_text +: {
246                color: #222
247                text_style +: { font_size: 11 }
248            }
249
250            popup_menu +: {
251                menu_item +: {
252                    draw_text +: {
253                        color: #222
254                        text_style +: { font_size: 11 }
255                    }
256                }
257            }
258        }
259    }
260
261    let IconButton = Button {
262        width: Fit, height: Fit
263        draw_text +: {
264            text_style: theme.font_icons {
265                font_size: 14.
266            }
267            color: #5,
268            color_hover: #2,
269            color_focus: #2
270            color_down: #5
271        }
272        draw_bg +: {
273            color_down: #0000
274            border_radius: 7.
275            border_size: 0.
276        }
277    }
278
279    let DeviceSelector = View {
280        height: Fit
281        align: Align { x: 0.0, y: 0.5 }
282        spacing: 5
283
284        label := Label {
285            draw_text +: {
286                color: #222
287                text_style +: { font_size: 11 }
288            }
289        }
290
291        device_selector := SimpleDropDown {
292            margin: 5
293            labels: ["default"]
294
295            draw_text +: {
296                color: #222
297                text_style +: { font_size: 11 }
298            }
299
300            popup_menu +: {
301                menu_item +: {
302                    draw_text +: {
303                        color: #222
304                        text_style +: { font_size: 11 }
305                    }
306                }
307            }
308        }
309    }
310
311    let MuteControl = View {
312        width: Fit, height: Fit
313        align: Align { x: 0.5, y: 0.5 }
314        cursor: MouseCursor.Hand
315        mute_button := IconButton {
316            text: "\u{f130}"
317        }
318        mute_status := Label {
319            padding: 0
320            text: "Mute"
321            draw_text +: {
322                color: #222
323                text_style +: { font_size: 11 }
324            }
325        }
326    }
327
328    let DevicesSelector = View {
329        height: Fit, width: Fill
330        flow: Down, spacing: 5
331        View {
332            height: Fit
333            mic_selector := DeviceSelector {
334                width: Fit
335                label +: { text: "Mic:" }
336            }
337            mute_control := MuteControl {}
338        }
339        speaker_selector := DeviceSelector {
340            label +: { text: "Speaker:" }
341        }
342    }
343
344    let Controls = View {
345        width: Fill, height: Fit
346        flow: Down
347        spacing: 10
348        align: Align { x: 0.0, y: 0.5 }
349        padding: 20
350
351        devices_selector := DevicesSelector {}
352        selected_devices_view := View {
353            visible: false
354            height: Fit
355            align: Align { x: 0.0, y: 0.5 }
356            selected_devices := Label {
357                draw_text +: {
358                    text_style +: { font_size: 11 }
359                    color: #222
360                }
361            }
362        }
363
364        voice_selector_wrapper := VoiceSelector {}
365        selected_voice_view := View {
366            visible: false
367            height: Fit
368            align: Align { x: 0.0, y: 0.5 }
369            selected_voice := Label {
370                draw_text +: {
371                    text_style +: { font_size: 11 }
372                    color: #222
373                }
374            }
375        }
376
377        TranscriptionModelSelector {}
378
379        View {
380            width: Fit, height: Fit
381            align: Align { x: 0.0, y: 0.5 }
382            spacing: 10
383
384            toggle_interruptions := Toggle {
385                width: Fit
386                height: Fit
387                draw_bg +: {
388                    size: 25.
389                }
390                padding: Inset {
391                    left: 5, right: 5, top: 10, bottom: 10
392                }
393            }
394
395            Label {
396                text: "Allow interruptions\n(requires headphones, no AEC yet)"
397                draw_text +: {
398                    color: #222
399                    text_style +: { font_size: 10 }
400                }
401            }
402        }
403
404        status_label := Label {
405            text: "Ready to start"
406            width: Fill
407            draw_text +: {
408                color: #222
409                text_style +: { font_size: 11 }
410            }
411        }
412
413        request_permission_button := RoundedShadowView {
414            visible: false
415            cursor: MouseCursor.Hand
416            margin: Inset {
417                left: 10, right: 10, bottom: 0, top: 10
418            }
419            width: Fill, height: Fit
420            align: Align { x: 0.5, y: 0.5 }
421            padding: Inset {
422                left: 20, right: 20, bottom: 10, top: 10
423            }
424            draw_bg +: {
425                color: #xf9f9f9
426                border_radius: 4.5
427                shadow_color: instance(#0002)
428                shadow_radius: 8.0
429                shadow_offset: vec2(0.0, -1.5)
430            }
431            Label {
432                text: "Request microphone permission"
433                draw_text +: {
434                    text_style +: { font_size: 11 }
435                    color: #000
436                }
437            }
438        }
439
440        tool_permission_line := ToolRequestLine {
441            visible: false
442            margin: Inset { left: 10, right: 10, top: 10 }
443        }
444
445        start_stop_button := RoundedShadowView {
446            cursor: MouseCursor.Hand
447            margin: Inset {
448                left: 10, right: 10, bottom: 0, top: 10
449            }
450            width: Fill, height: Fit
451            align: Align { x: 0.5, y: 0.5 }
452            padding: Inset {
453                left: 20, right: 20, bottom: 10, top: 10
454            }
455            draw_bg +: {
456                color: #xf9f9f9
457                border_radius: 4.5
458                shadow_color: instance(#0002)
459                shadow_radius: 8.0
460                shadow_offset: vec2(0.0, -1.5)
461            }
462            stop_start_label := Label {
463                text: "Start"
464                draw_text +: {
465                    text_style +: { font_size: 11 }
466                    color: #000
467                }
468            }
469        }
470    }
471
472    mod.widgets.Realtime = #(Realtime::register_widget(vm)) {
473        ..mod.widgets.RoundedView
474        show_bg: true
475        draw_bg +: {
476            color: #xf9f9f9
477            border_radius: 10.0
478        }
479        flow: Down
480        spacing: 20
481        width: Fill, height: Fit
482        align: Align { x: 0.5, y: 0.0 }
483        padding: 10
484
485        header := View {
486            height: Fit
487            flow: Overlay
488
489            align: Align { x: 1.0, y: 0.5 }
490            close_button := IconButton {
491                text: "\u{f00d}"
492            }
493        }
494
495        AIAnimation {}
496        Controls {}
497    }
498
499    mod.widgets.RealtimeContent = RoundedView {
500        align: Align { x: 0.5, y: 0.5 }
501
502        AdaptiveView {
503            Desktop +: {
504                width: 450, height: Fit
505                align: Align { x: 0.5, y: 0.5 }
506
507                CachedWidget {
508                    realtime := mod.widgets.Realtime {}
509                }
510            }
511
512            Mobile +: {
513                width: Fill, height: Fill
514                align: Align { x: 0.5, y: 0.5 }
515
516                CachedWidget {
517                    realtime := mod.widgets.Realtime {}
518                }
519            }
520        }
521    }
522}
523
524#[derive(Clone, Debug, Default)]
525pub enum RealtimeModalAction {
526    #[default]
527    None,
528    DismissModal,
529}
530
531#[derive(Clone, Debug, Default, PartialEq)]
532enum MicPermissionStatus {
533    #[default]
534    NotDetermined,
535    Requesting,
536    Granted,
537    Denied,
538}
539
540#[derive(Script, ScriptHook, Widget)]
541pub struct Realtime {
542    #[deref]
543    view: View,
544
545    #[rust]
546    realtime_channel: Option<RealtimeChannel>,
547
548    #[rust]
549    is_connected: bool,
550
551    #[rust]
552    conversation_active: bool,
553
554    #[rust]
555    transcript: String,
556
557    #[rust]
558    conversation_messages: Vec<(String, Message)>, // (item_id, message) for ordering
559
560    #[rust]
561    recorded_audio: Arc<Mutex<Vec<f32>>>,
562
563    #[rust]
564    playback_audio: Arc<Mutex<Vec<f32>>>,
565
566    /// Whether we should record and send audio
567    #[rust]
568    should_record: Arc<Mutex<bool>>,
569
570    /// Whether the user has muted the microphone
571    #[rust]
572    is_muted: Arc<Mutex<bool>>,
573
574    #[rust]
575    is_playing: Arc<Mutex<bool>>,
576
577    #[rust]
578    playback_position: Arc<Mutex<usize>>,
579
580    #[rust]
581    audio_setup_done: bool,
582
583    #[rust]
584    audio_streaming_timer: Option<Timer>,
585
586    #[rust]
587    ai_is_responding: bool,
588
589    #[rust]
590    user_is_interrupting: bool,
591
592    #[rust]
593    current_assistant_item_id: Option<String>,
594
595    #[rust]
596    selected_voice: String,
597
598    #[rust]
599    has_sent_audio: bool,
600
601    #[rust]
602    should_request_connection: bool,
603
604    #[rust]
605    connection_request_sent: bool,
606
607    #[rust]
608    bot_entity_id: Option<EntityId>,
609
610    #[rust]
611    chat_controller: Option<Arc<Mutex<ChatController>>>,
612
613    #[rust]
614    pending_tool_call: Option<(String, String, String)>, // (name, call_id, arguments)
615
616    #[rust]
617    audio_devices: Vec<AudioDeviceDesc>,
618
619    #[rust]
620    mic_permission_status: MicPermissionStatus,
621}
622
623impl Widget for Realtime {
624    fn handle_event(&mut self, cx: &mut Cx, event: &Event, scope: &mut Scope) {
625        self.view.handle_event(cx, event, scope);
626        self.widget_match_event(cx, event, scope);
627
628        if let Some(_value) = self
629            .drop_down(cx, ids!(transcription_model_selector))
630            .changed(event.actions())
631        {
632            if self.is_connected {
633                self.update_session_config(cx);
634            }
635        }
636
637        if let Some(enabled) = self
638            .check_box(cx, ids!(toggle_interruptions))
639            .changed(event.actions())
640        {
641            // // Send interruption configuration to the realtime client
642            // if let Some(channel) = &self.realtime_channel {
643            //     let _ = channel.command_sender.send(RealtimeCommand::SetInterruptionEnabled(enabled));
644            // }
645
646            if enabled && self.conversation_active {
647                *self.should_record.lock().unwrap() = true;
648            }
649        }
650
651        // Handle realtime events
652        self.handle_realtime_events(cx);
653
654        if !self.audio_setup_done
655            && self.mic_permission_status == MicPermissionStatus::NotDetermined
656        {
657            cx.request_permission(Permission::AudioInput);
658            self.mic_permission_status = MicPermissionStatus::Requesting;
659        }
660
661        if !self.audio_setup_done
662            && let Event::PermissionResult(pr) = event
663        {
664            if pr.permission == Permission::AudioInput {
665                match pr.status {
666                    PermissionStatus::Granted => {
667                        self.mic_permission_status = MicPermissionStatus::Granted;
668                        self.setup_audio(cx);
669                        self.audio_setup_done = true;
670                        self.view(cx, ids!(start_stop_button)).set_visible(cx, true);
671                    }
672                    PermissionStatus::DeniedCanRetry => {
673                        self.label(cx, ids!(status_label)).set_text(cx, "⚠️ Moly needs microphone access to have realtime conversations.\nClick on the button below to trigger another request");
674                        self.view(cx, ids!(request_permission_button))
675                            .set_visible(cx, true);
676                        self.view(cx, ids!(start_stop_button))
677                            .set_visible(cx, false);
678                        self.mic_permission_status = MicPermissionStatus::Denied;
679                    }
680                    _ => {
681                        self.label(cx, ids!(status_label)).set_text(cx, "⚠️ Moly does not have access to your microphone.\nTo continue, allow Moly to access your microphone\nin your system settings\nand then restart the app.");
682                        self.view(cx, ids!(request_permission_button))
683                            .set_visible(cx, false);
684                        self.view(cx, ids!(start_stop_button))
685                            .set_visible(cx, false);
686                        self.mic_permission_status = MicPermissionStatus::Denied;
687                    }
688                }
689            }
690        }
691
692        if self.audio_setup_done {
693            // Try to start pending conversation if we got connected
694            self.try_start_pending_conversation(cx);
695        }
696
697        // Handle audio streaming timer
698        if let Some(timer) = &self.audio_streaming_timer {
699            if timer.is_event(event).is_some() && self.conversation_active {
700                self.send_audio_chunk_to_realtime(cx);
701
702                // Check if we should resume recording when playback buffer is empty
703                // This is the backup mechanism for when toggle is OFF (no interruptions)
704                if self.playback_audio.lock().unwrap().is_empty() {
705                    let interruptions_enabled =
706                        self.check_box(cx, ids!(toggle_interruptions)).active(cx);
707
708                    if !interruptions_enabled {
709                        // Only auto-resume recording if interruptions are disabled
710                        // (when interruptions are enabled, recording control is handled elsewhere)
711                        if let Ok(mut should_record) = self.should_record.try_lock() {
712                            if !*should_record && self.conversation_active && !self.ai_is_responding
713                            {
714                                ::log::debug!(
715                                    "Auto-resuming recording - playback empty and interruptions disabled"
716                                );
717                                *should_record = true;
718                                self.label(cx, ids!(status_label))
719                                    .set_text(cx, "🎤 Listening...");
720                            }
721                        }
722                    }
723                }
724            }
725        }
726    }
727
728    fn draw_walk(&mut self, cx: &mut Cx2d, scope: &mut Scope, walk: Walk) -> DrawStep {
729        self.view.draw_walk(cx, scope, walk)
730    }
731}
732
733impl WidgetMatchEvent for Realtime {
734    /// Triggered at startup and whenever system audio devices change.
735    ///
736    /// We use it to update the list of available audio devices and select the default ones.
737    fn handle_audio_devices(
738        &mut self,
739        cx: &mut Cx,
740        devices: &AudioDevicesEvent,
741        _scope: &mut Scope,
742    ) {
743        let mut input_names = Vec::new();
744        let mut output_names = Vec::new();
745        let mut default_input_name = String::new();
746        let mut default_output_name = String::new();
747
748        devices
749            .descs
750            .iter()
751            .for_each(|desc| match desc.device_type {
752                AudioDeviceType::Input => {
753                    input_names.push(desc.name.clone());
754                    if desc.is_default {
755                        default_input_name = desc.name.clone();
756                    }
757                }
758                AudioDeviceType::Output => {
759                    output_names.push(desc.name.clone());
760                    if desc.is_default {
761                        default_output_name = desc.name.clone();
762                    }
763                }
764                AudioDeviceType::Loopback => {}
765            });
766
767        let mic_dropdown = self.drop_down(cx, ids!(mic_selector.device_selector));
768        mic_dropdown.set_labels(cx, input_names.clone());
769        mic_dropdown.set_selected_by_label(&default_input_name, cx);
770
771        let speaker_dropdown = self.drop_down(cx, ids!(speaker_selector.device_selector));
772        speaker_dropdown.set_labels(cx, output_names.clone());
773        speaker_dropdown.set_selected_by_label(&default_output_name, cx);
774
775        // Automatically switch to default devices
776        // e.g. when a user connects headphones we assume they want to use them right away.
777        // Note: we do not want to automatically switch to default devices if the user has already selected a non-default device, unless
778        // the default device is new (wasn't present in the previous list)
779        let default_input = devices.default_input();
780        let default_output = devices.default_output();
781
782        // The default device is new, assume we want to use it
783        if !self
784            .audio_devices
785            .iter()
786            .any(|d| d.device_type == AudioDeviceType::Input && d.device_id == default_input[0])
787        {
788            cx.use_audio_inputs(&default_input);
789        }
790
791        // The default device is new, assume we want to use it
792        if !self
793            .audio_devices
794            .iter()
795            .any(|d| d.device_type == AudioDeviceType::Output && d.device_id == default_output[0])
796        {
797            cx.use_audio_outputs(&default_output);
798        }
799
800        self.audio_devices = devices.descs.clone();
801    }
802
803    fn handle_actions(&mut self, cx: &mut Cx, actions: &Actions, _scope: &mut Scope) {
804        if self
805            .view(cx, ids!(start_stop_button))
806            .finger_down(actions)
807            .is_some()
808        {
809            if self.conversation_active {
810                self.reset_all(cx);
811            } else {
812                self.start_conversation(cx);
813            }
814            self.update_ui(cx);
815        }
816
817        // Handle tool permission buttons from ToolRequestLine
818        for chat_line_action in self
819            .chat_line(cx, ids!(tool_permission_line))
820            .filter_actions(actions)
821            .map(|wa| wa.cast::<ChatLineAction>())
822        {
823            match chat_line_action {
824                ChatLineAction::ToolApprove => {
825                    self.approve_tool_call(cx);
826                }
827                ChatLineAction::ToolDeny => {
828                    self.deny_tool_call(cx);
829                }
830                _ => {}
831            }
832        }
833
834        let speaker_dropdown = self.drop_down(cx, ids!(speaker_selector.device_selector));
835        if let Some(_id) = speaker_dropdown.changed(actions) {
836            let selected_device = self
837                .audio_devices
838                .iter()
839                .find(|device| device.name == speaker_dropdown.selected_label());
840            if let Some(device) = selected_device {
841                cx.use_audio_outputs(&[device.device_id]);
842            }
843        }
844
845        let microphone_dropdown = self.drop_down(cx, ids!(mic_selector.device_selector));
846        if let Some(_id) = microphone_dropdown.changed(actions) {
847            let selected_device = self
848                .audio_devices
849                .iter()
850                .find(|device| device.name == microphone_dropdown.selected_label());
851            if let Some(device) = selected_device {
852                cx.use_audio_inputs(&[device.device_id]);
853            }
854        }
855
856        // Mute
857        let mute_button = self.button(cx, ids!(mute_button));
858        let mute_label = self.label(cx, ids!(mute_status));
859        if self
860            .view(cx, ids!(mute_control))
861            .finger_down(actions)
862            .is_some()
863            || mute_button.clicked(actions)
864        {
865            let mut is_muted = self.is_muted.lock().unwrap();
866            if *is_muted {
867                // Mic was muted, unmute and update button to "Mute"
868                *is_muted = false;
869                mute_button.set_text(cx, ""); // fa-microphone
870                mute_label.set_text(cx, "Mute");
871            } else {
872                *is_muted = true;
873                mute_button.set_text(cx, ""); // fa-microphone-slash
874                mute_label.set_text(cx, "Unmute");
875            }
876        }
877
878        // Mic permissions
879        if self
880            .view(cx, ids!(request_permission_button))
881            .finger_up(actions)
882            .is_some()
883        {
884            cx.request_permission(Permission::AudioInput);
885        }
886
887        // Modal close
888        if self.button(cx, ids!(close_button)).clicked(actions) {
889            self.reset_state(cx);
890            cx.action(RealtimeModalAction::DismissModal);
891        }
892    }
893}
894
895impl Realtime {
896    pub fn set_realtime_channel(&mut self, channel: RealtimeChannel) {
897        self.realtime_channel = Some(channel);
898        self.is_connected = true;
899    }
900
901    pub fn set_bot_entity_id(&mut self, _cx: &mut Cx, bot_entity_id: EntityId) {
902        self.bot_entity_id = Some(bot_entity_id);
903    }
904
905    pub fn set_chat_controller(&mut self, chat_controller: Option<Arc<Mutex<ChatController>>>) {
906        self.chat_controller = chat_controller;
907    }
908
909    fn try_start_pending_conversation(&mut self, cx: &mut Cx) {
910        if self.is_connected && !self.conversation_active && self.should_request_connection {
911            // We can now start the conversation that was requested
912            self.should_request_connection = false;
913            self.connection_request_sent = false;
914            self.conversation_active = true;
915            self.ai_is_responding = true;
916            self.user_is_interrupting = false;
917            self.current_assistant_item_id = None;
918            *self.should_record.lock().unwrap() = false;
919            self.has_sent_audio = false;
920
921            // Clear previous audio
922            self.recorded_audio.lock().unwrap().clear();
923            self.playback_audio.lock().unwrap().clear();
924            *self.is_playing.lock().unwrap() = false;
925            *self.playback_position.lock().unwrap() = 0;
926            self.transcript.clear();
927
928            self.update_ui(cx);
929            self.start_audio_streaming(cx);
930            self.create_greeting_response(cx);
931        }
932    }
933
934    fn start_conversation(&mut self, cx: &mut Cx) {
935        if !self.is_connected {
936            // Set flag to request reconnection, Chat widget will handle this
937            self.should_request_connection = true;
938            self.connection_request_sent = false;
939            self.label(cx, ids!(status_label))
940                .set_text(cx, "Reconnecting...");
941            return;
942        }
943
944        self.conversation_active = true;
945        self.ai_is_responding = true;
946        self.user_is_interrupting = false;
947        self.current_assistant_item_id = None;
948        *self.should_record.lock().unwrap() = false;
949        self.has_sent_audio = false;
950
951        // Clear previous audio
952        self.recorded_audio.lock().unwrap().clear();
953        self.playback_audio.lock().unwrap().clear();
954        *self.is_playing.lock().unwrap() = false;
955        *self.playback_position.lock().unwrap() = 0;
956        self.transcript.clear();
957
958        self.update_ui(cx);
959        self.label(cx, ids!(status_label))
960            .set_text(cx, "Loading..."); // This will be removed by the greeting message
961        self.start_audio_streaming(cx);
962        self.create_greeting_response(cx);
963    }
964
965    fn start_audio_streaming(&mut self, cx: &mut Cx) {
966        // Start a timer to send audio chunks periodically
967        if self.audio_streaming_timer.is_none() {
968            let timer = cx.start_interval(0.020); // 20ms intervals
969            self.audio_streaming_timer = Some(timer);
970        }
971    }
972
973    fn send_audio_chunk_to_realtime(&mut self, _cx: &mut Cx) {
974        // Collect audio data and send to realtime client
975        if let Ok(mut recorded) = self.recorded_audio.try_lock() {
976            if !recorded.is_empty() {
977                let audio_data = recorded.clone();
978                recorded.clear();
979
980                // Convert to PCM16 and send
981                let pcm16_data = Self::convert_f32_to_pcm16(&audio_data);
982                if let Some(channel) = &self.realtime_channel {
983                    let _ = channel
984                        .command_sender
985                        .unbounded_send(RealtimeCommand::SendAudio(pcm16_data));
986                }
987            }
988        }
989    }
990
991    /// Common reset logic for both user-initiated reset and connection loss
992    fn reset_conversation_state(
993        &mut self,
994        cx: &mut Cx,
995        status_message: &str,
996        allow_reconnect: bool,
997    ) {
998        self.stop_conversation(cx);
999
1000        self.is_connected = false;
1001        self.has_sent_audio = false;
1002
1003        if !allow_reconnect {
1004            // Full reset - user clicked stop
1005            self.should_request_connection = false;
1006            self.connection_request_sent = false;
1007        }
1008        self.transcript.clear();
1009        self.label(cx, ids!(status_label))
1010            .set_text(cx, status_message);
1011
1012        // Hide tool permission UI and clear pending tool call
1013        self.chat_line(cx, ids!(tool_permission_line))
1014            .set_visible(cx, false);
1015        self.pending_tool_call = None;
1016
1017        // Show voice selector again
1018        self.view(cx, ids!(voice_selector_wrapper))
1019            .set_visible(cx, true);
1020        self.view(cx, ids!(selected_voice_view))
1021            .set_visible(cx, false);
1022
1023        self.update_ui(cx);
1024    }
1025
1026    fn reset_all(&mut self, cx: &mut Cx) {
1027        self.reset_conversation_state(cx, "Ready to start", false);
1028
1029        // Stop the session
1030        if let Some(channel) = &self.realtime_channel {
1031            let _ = channel
1032                .command_sender
1033                .unbounded_send(RealtimeCommand::StopSession);
1034        }
1035    }
1036
1037    fn stop_conversation(&mut self, cx: &mut Cx) {
1038        self.conversation_active = false;
1039        self.ai_is_responding = false;
1040        self.user_is_interrupting = false;
1041        self.current_assistant_item_id = None;
1042        *self.should_record.lock().unwrap() = false;
1043        *self.is_playing.lock().unwrap() = false;
1044
1045        // Stop audio streaming timer
1046        if let Some(timer) = &self.audio_streaming_timer {
1047            cx.stop_timer(*timer);
1048            self.audio_streaming_timer = None;
1049        }
1050
1051        // Clear audio buffers
1052        if let Ok(mut playback) = self.playback_audio.try_lock() {
1053            playback.clear();
1054        }
1055        if let Ok(mut recorded) = self.recorded_audio.try_lock() {
1056            recorded.clear();
1057        }
1058    }
1059
1060    fn handle_realtime_events(&mut self, cx: &mut Cx) {
1061        let events = if let Some(channel) = &self.realtime_channel {
1062            if let Ok(mut receiver_opt) = channel.event_receiver.lock() {
1063                if let Some(receiver) = receiver_opt.as_mut() {
1064                    let mut events = Vec::new();
1065                    while let Ok(Some(event)) = receiver.try_next() {
1066                        events.push(event);
1067                    }
1068                    events
1069                } else {
1070                    Vec::new()
1071                }
1072            } else {
1073                Vec::new()
1074            }
1075        } else {
1076            Vec::new()
1077        };
1078
1079        // Now process events without holding the lock
1080        for event in events {
1081            match event {
1082                RealtimeEvent::SessionReady => {
1083                    self.label(cx, ids!(connection_status))
1084                        .set_text(cx, "✅ Connected to OpenAI");
1085                    // self.update_session_config(cx);
1086                }
1087                RealtimeEvent::AudioData(audio_data) => {
1088                    // When we start receiving AI audio, the user is no longer interrupting
1089                    if self.user_is_interrupting {
1090                        self.user_is_interrupting = false;
1091                    }
1092
1093                    self.ai_is_responding = true;
1094
1095                    // Process audio immediately to start playback
1096                    self.add_audio_to_playback(audio_data);
1097
1098                    // Update recording state based on interruption settings
1099                    if self.conversation_active {
1100                        let interruptions_enabled =
1101                            self.check_box(cx, ids!(toggle_interruptions)).active(cx);
1102
1103                        if !interruptions_enabled {
1104                            // Interruptions disabled - mute microphone during AI speech
1105                            *self.should_record.lock().unwrap() = false;
1106                        } else {
1107                            // Interruptions enabled - ensure recording is active for real-time interruption
1108                            *self.should_record.lock().unwrap() = true;
1109                        }
1110                    }
1111
1112                    self.label(cx, ids!(status_label))
1113                        .set_text(cx, "🔊 Playing audio...");
1114                }
1115                RealtimeEvent::AudioTranscript(text) => {
1116                    self.transcript.push_str(&text);
1117                }
1118                RealtimeEvent::AudioTranscriptCompleted(transcript, item_id) => {
1119                    // Store completed AI transcript as a bot message
1120                    if !transcript.trim().is_empty() {
1121                        let message = Message {
1122                            from: self.bot_entity_id.clone().unwrap_or_default(),
1123                            content: MessageContent {
1124                                text: transcript,
1125                                ..Default::default()
1126                            },
1127                            ..Default::default()
1128                        };
1129                        self.conversation_messages.push((item_id, message));
1130                    }
1131                }
1132                RealtimeEvent::UserTranscriptCompleted(transcript, item_id) => {
1133                    // Store completed user transcript as a user message
1134                    if !transcript.trim().is_empty() {
1135                        let message = Message {
1136                            from: EntityId::User,
1137                            content: MessageContent {
1138                                text: transcript,
1139                                ..Default::default()
1140                            },
1141                            ..Default::default()
1142                        };
1143                        self.conversation_messages.push((item_id, message));
1144                    }
1145                }
1146                RealtimeEvent::SpeechStarted => {
1147                    self.label(cx, ids!(status_label))
1148                        .set_text(cx, "🎤 User speech detected");
1149
1150                    self.user_is_interrupting = true;
1151
1152                    // CRITICAL: Clear the playback audio buffer to stop ongoing AI audio
1153                    // This prevents audio accumulation and feedback loops
1154                    if let Ok(mut playbook) = self.playback_audio.try_lock() {
1155                        let cleared_samples = playbook.len();
1156                        playbook.clear();
1157                        ::log::debug!(
1158                            "Cleared {} audio samples from playback buffer to prevent feedback",
1159                            cleared_samples
1160                        );
1161                    }
1162
1163                    // Stop current playback and reset position
1164                    if let Ok(mut is_playing) = self.is_playing.try_lock() {
1165                        *is_playing = false;
1166                    }
1167                    if let Ok(mut position) = self.playback_position.try_lock() {
1168                        *position = 0;
1169                    }
1170
1171                    // Resume recording immediately when user starts speaking
1172                    if self.conversation_active {
1173                        *self.should_record.lock().unwrap() = true;
1174                    }
1175                }
1176                RealtimeEvent::SpeechStopped => {
1177                    self.label(cx, ids!(status_label))
1178                        .set_text(cx, "Processing...");
1179
1180                    // Temporarily stop recording while waiting for response
1181                    if self.conversation_active {
1182                        *self.should_record.lock().unwrap() = false;
1183                    }
1184                }
1185                RealtimeEvent::ResponseCompleted => {
1186                    let status_label = self.label(cx, ids!(status_label));
1187                    self.user_is_interrupting = false;
1188                    self.ai_is_responding = false;
1189                    self.current_assistant_item_id = None;
1190
1191                    // Resume recording after AI response is complete
1192                    if self.conversation_active {
1193                        // Check if interruptions are enabled via the toggle
1194                        let interruptions_enabled =
1195                            self.check_box(cx, ids!(toggle_interruptions)).active(cx);
1196
1197                        if interruptions_enabled {
1198                            // Allow immediate interruption
1199                            *self.should_record.lock().unwrap() = true;
1200                            status_label.set_text(cx, "✅ Response generated - 🎤 listening again");
1201                        } else {
1202                            // Without interruptions, only resume when playback buffer is truly empty
1203                            if self.playback_audio.lock().unwrap().is_empty() {
1204                                ::log::debug!(
1205                                    "Setting should_record to true - response completed and playback empty"
1206                                );
1207                                *self.should_record.lock().unwrap() = true;
1208                                status_label
1209                                    .set_text(cx, "✅ Response generated - 🎤 listening again");
1210                            } else {
1211                                status_label
1212                                    .set_text(cx, "✅ Response generated - 🔊 playing audio");
1213                                ::log::debug!("Playback still active, keeping recording disabled");
1214                            }
1215                        }
1216                    }
1217                }
1218                RealtimeEvent::FunctionCallRequest {
1219                    name,
1220                    call_id,
1221                    arguments,
1222                } => {
1223                    // Check if dangerous mode is enabled to auto-approve function calls
1224                    let dangerous_mode_enabled = self
1225                        .chat_controller
1226                        .as_ref()
1227                        .map(|ctx| {
1228                            ctx.lock()
1229                                .unwrap()
1230                                .tool_manager()
1231                                .map(|tm| tm.get_dangerous_mode_enabled())
1232                                .unwrap_or(false)
1233                        })
1234                        .unwrap_or(false);
1235
1236                    if dangerous_mode_enabled {
1237                        // Auto-approve function calls in dangerous mode
1238                        let display_name = display_name_from_namespaced(&name);
1239                        self.label(cx, ids!(status_label))
1240                            .set_text(cx, &format!("🔧 Auto-executing tool: {}", display_name));
1241
1242                        // Execute the function call directly
1243                        self.handle_function_call(cx, name, call_id, arguments);
1244                    } else {
1245                        // Show permission request as usual
1246                        self.label(cx, ids!(status_label))
1247                            .set_text(cx, &format!("🔧 Tool permission requested: {}", name));
1248
1249                        self.show_tool_permission_request(cx, name, call_id, arguments);
1250                    }
1251                }
1252                RealtimeEvent::Error(error) => {
1253                    ::log::error!("Realtime API error: {}", error);
1254
1255                    if !self.is_connected || !self.conversation_active {
1256                        ::log::debug!(
1257                            "Ignoring error - already disconnected or conversation not active"
1258                        );
1259                        return;
1260                    }
1261
1262                    // Check if this is a connection error
1263                    if error.contains("Connection lost")
1264                        || error.contains("Connection closed")
1265                        || error.contains("Failed to send")
1266                    {
1267                        // Connection was dropped - use common reset but allow reconnection
1268                        self.reset_conversation_state(
1269                            cx,
1270                            "❌ Connection lost. Please restart the conversation.",
1271                            true, // allow_reconnect
1272                        );
1273                    } else {
1274                        // Other types of errors - just display them
1275                        self.label(cx, ids!(status_label))
1276                            .set_text(cx, &format!("❌ Error: {}", error));
1277
1278                        // Resume recording on non-connection errors
1279                        if self.conversation_active {
1280                            *self.should_record.lock().unwrap() = true;
1281                        }
1282                    }
1283                }
1284            }
1285        }
1286    }
1287
1288    fn show_tool_permission_request(
1289        &mut self,
1290        cx: &mut Cx,
1291        name: String,
1292        call_id: String,
1293        arguments: String,
1294    ) {
1295        self.pending_tool_call = Some((name.clone(), call_id, arguments));
1296
1297        let tool_line = self.chat_line(cx, ids!(tool_permission_line));
1298        tool_line.set_visible(cx, true);
1299
1300        // Configure the tool line
1301        let display_name = display_name_from_namespaced(&name);
1302
1303        tool_line
1304            .avatar(cx, ids!(message_section.sender.avatar))
1305            .borrow_mut()
1306            .unwrap()
1307            .avatar = Some(EntityAvatar::Text("T".into()));
1308        tool_line
1309            .label(cx, ids!(message_section.sender.name))
1310            .set_text(cx, "Permission Request");
1311
1312        let content = MessageContent {
1313            text: format!("Tool '{}' is requesting permission to run", display_name),
1314            ..Default::default()
1315        };
1316        tool_line
1317            .slot(cx, ids!(message_section.content_section.content))
1318            .current()
1319            .as_standard_message_content()
1320            .set_content(cx, &content);
1321
1322        tool_line
1323            .view(cx, ids!(message_section.content_section.tool_actions))
1324            .set_visible(cx, true);
1325
1326        // Pause recording while waiting for permission
1327        *self.should_record.lock().unwrap() = false;
1328
1329        self.view.redraw(cx);
1330    }
1331
1332    fn handle_function_call(
1333        &mut self,
1334        _cx: &mut Cx,
1335        name: String,
1336        call_id: String,
1337        arguments: String,
1338    ) {
1339        let Some(chat_controller) = self.chat_controller.as_ref().cloned() else {
1340            ::log::error!("No chat controller available for function call");
1341            if let Some(channel) = &self.realtime_channel {
1342                let error_result = serde_json::json!({
1343                    "error": "Tool manager not available"
1344                })
1345                .to_string();
1346                let _ = channel.command_sender.unbounded_send(
1347                    RealtimeCommand::SendFunctionCallResult {
1348                        call_id,
1349                        output: error_result,
1350                    },
1351                );
1352            }
1353            return;
1354        };
1355
1356        let Some(tool_manager) = chat_controller.lock().unwrap().tool_manager().cloned() else {
1357            ::log::error!("No tool manager available for function call");
1358            if let Some(channel) = &self.realtime_channel {
1359                let error_result = serde_json::json!({
1360                    "error": "Tool manager not available"
1361                })
1362                .to_string();
1363                let _ = channel.command_sender.unbounded_send(
1364                    RealtimeCommand::SendFunctionCallResult {
1365                        call_id,
1366                        output: error_result,
1367                    },
1368                );
1369            }
1370            return;
1371        };
1372
1373        let channel = self.realtime_channel.clone();
1374
1375        let future = async move {
1376            // Parse the arguments JSON
1377            let arguments_map = match parse_tool_arguments(&arguments) {
1378                Ok(args) => args,
1379                Err(e) => {
1380                    ::log::error!("Failed to parse function call arguments: {}", e);
1381                    if let Some(channel) = &channel {
1382                        let error_result = serde_json::json!({
1383                            "error": e
1384                        })
1385                        .to_string();
1386                        let _ = channel.command_sender.unbounded_send(
1387                            RealtimeCommand::SendFunctionCallResult {
1388                                call_id,
1389                                output: error_result,
1390                            },
1391                        );
1392                    }
1393                    return;
1394                }
1395            };
1396
1397            let result = tool_manager
1398                .execute_tool_call(&name, &call_id, arguments_map)
1399                .await;
1400
1401            if let Some(channel) = &channel {
1402                let output = if result.is_error {
1403                    serde_json::json!({
1404                        "error": result.content
1405                    })
1406                    .to_string()
1407                } else {
1408                    result.content
1409                };
1410
1411                let _ = channel
1412                    .command_sender
1413                    .unbounded_send(RealtimeCommand::SendFunctionCallResult { call_id, output });
1414            }
1415        };
1416
1417        spawn(future);
1418    }
1419
1420    fn approve_tool_call(&mut self, cx: &mut Cx) {
1421        if let Some((name, call_id, arguments)) = self.pending_tool_call.take() {
1422            // Hide permission UI
1423            self.chat_line(cx, ids!(tool_permission_line))
1424                .set_visible(cx, false);
1425
1426            // Update status
1427            let display_name = display_name_from_namespaced(&name);
1428            self.label(cx, ids!(status_label))
1429                .set_text(cx, &format!("🔧 Executing tool: {}", display_name));
1430
1431            // Execute the tool
1432            self.handle_function_call(cx, name, call_id, arguments);
1433
1434            // Resume recording if conversation is active
1435            if self.conversation_active {
1436                *self.should_record.lock().unwrap() = true;
1437            }
1438
1439            self.view.redraw(cx);
1440        }
1441    }
1442
1443    fn deny_tool_call(&mut self, cx: &mut Cx) {
1444        if let Some((name, call_id, _arguments)) = self.pending_tool_call.take() {
1445            // Hide permission UI
1446            self.chat_line(cx, ids!(tool_permission_line))
1447                .set_visible(cx, false);
1448
1449            // Send denial response
1450            if let Some(channel) = &self.realtime_channel {
1451                let denial_result = serde_json::json!({
1452                    "error": "Tool execution denied by user"
1453                })
1454                .to_string();
1455                let _ = channel.command_sender.unbounded_send(
1456                    RealtimeCommand::SendFunctionCallResult {
1457                        call_id,
1458                        output: denial_result,
1459                    },
1460                );
1461            }
1462
1463            // Update status
1464            let display_name = display_name_from_namespaced(&name);
1465            self.label(cx, ids!(status_label))
1466                .set_text(cx, &format!("🚫 Tool '{}' denied", display_name));
1467
1468            // Resume recording if conversation is active
1469            if self.conversation_active {
1470                *self.should_record.lock().unwrap() = true;
1471            }
1472
1473            self.view.redraw(cx);
1474        }
1475    }
1476
1477    fn setup_audio(&mut self, cx: &mut Cx) {
1478        let recorded_audio = self.recorded_audio.clone();
1479        let should_record = self.should_record.clone();
1480        let is_muted = self.is_muted.clone();
1481
1482        // Audio input callback - capture for realtime streaming
1483        cx.audio_input(0, move |info, input_buffer| {
1484            if let Ok(should_record_guard) = should_record.try_lock() {
1485                if let Ok(is_muted_guard) = is_muted.try_lock() {
1486                    if *should_record_guard && !*is_muted_guard {
1487                        if let Ok(mut recorded) = recorded_audio.try_lock() {
1488                            let channel = input_buffer.channel(0);
1489
1490                            // Calculate downsampling ratio from input sample rate to 24kHz
1491                            let input_sample_rate = info.sample_rate;
1492                            let target_sample_rate = 24000.0;
1493                            let downsample_ratio =
1494                                (input_sample_rate / target_sample_rate) as usize;
1495
1496                            // Downsample by taking every nth sample based on the ratio
1497                            // TODO: this is a simple decimation - for better quality, we should use proper filtering
1498                            for i in (0..channel.len()).step_by(downsample_ratio) {
1499                                recorded.push(channel[i]);
1500                            }
1501                        }
1502                    }
1503                }
1504            }
1505        });
1506
1507        let playback_audio = self.playback_audio.clone();
1508        let playback_position = self.playback_position.clone();
1509        let is_playing = self.is_playing.clone();
1510
1511        // Audio output callback - plays AI response audio
1512        cx.audio_output(0, move |info, output_buffer| {
1513            // Always start with silence
1514            output_buffer.zero();
1515
1516            if let Ok(mut playback) = playback_audio.try_lock() {
1517                if let Ok(mut pos) = playback_position.try_lock() {
1518                    if let Ok(mut playing) = is_playing.try_lock() {
1519                        // Check if we should continue playing
1520                        let input_sample_rate = 24000.0; // Input audio sample rate
1521                        let output_sample_rate = info.sample_rate;
1522                        let upsample_ratio = (output_sample_rate / input_sample_rate) as usize;
1523
1524                        if *playing
1525                            && !playback.is_empty()
1526                            && *pos < playback.len() * upsample_ratio
1527                        {
1528                            // Write to all output channels (mono -> stereo if needed)
1529                            let frame_count = output_buffer.frame_count();
1530                            let channel_count = output_buffer.channel_count();
1531
1532                            let mut samples_to_drain = 0;
1533
1534                            for frame_idx in 0..frame_count {
1535                                // Calculate upsampling ratio based on actual sample rates
1536                                // Assuming input audio is 24kHz, calculate the ratio dynamically
1537                                let input_sample_rate = 24000.0; // Input audio sample rate
1538                                let output_sample_rate = info.sample_rate;
1539                                let upsample_ratio =
1540                                    (output_sample_rate / input_sample_rate) as usize;
1541
1542                                let sample_idx = *pos / upsample_ratio; // Map output position to input sample
1543
1544                                if sample_idx < playback.len() {
1545                                    let audio_sample = playback[sample_idx];
1546
1547                                    // Write the same sample to all output channels
1548                                    for channel_idx in 0..channel_count {
1549                                        let channel = output_buffer.channel_mut(channel_idx);
1550                                        channel[frame_idx] = audio_sample;
1551                                    }
1552
1553                                    *pos += 1;
1554
1555                                    // Track how many samples we can safely remove (every upsample_ratio pos increments = 1 sample)
1556                                    if *pos % upsample_ratio == 0 {
1557                                        samples_to_drain += 1;
1558                                    }
1559                                } else {
1560                                    // Reached end of audio data
1561                                    *playing = false;
1562                                    *pos = 0;
1563                                    // Drain remaining samples since we're done
1564                                    samples_to_drain = playback.len();
1565                                    break;
1566                                }
1567                            }
1568
1569                            // Remove consumed samples from the front of the buffer
1570                            if samples_to_drain > 0 && samples_to_drain <= playback.len() {
1571                                playback.drain(..samples_to_drain);
1572                                // Adjust position since we removed samples from the front
1573                                *pos = (*pos).saturating_sub(samples_to_drain * upsample_ratio);
1574                                // ::log::debug!("Drained {} samples, buffer size now: {}, pos: {}",
1575                                //         samples_to_drain, playback.len(), *pos);
1576                            }
1577                        } else {
1578                            // Not playing or no data - ensure we output silence
1579                            if *playing && playback.is_empty() {
1580                                *playing = false;
1581                                *pos = 0;
1582                            }
1583                        }
1584                    }
1585                }
1586            }
1587        });
1588
1589        self.audio_setup_done = true;
1590    }
1591
1592    fn add_audio_to_playback(&mut self, audio_bytes: Vec<u8>) {
1593        // Convert PCM16 bytes back to f32 samples
1594        let samples = Self::convert_pcm16_to_f32(&audio_bytes);
1595
1596        if let Ok(mut playback) = self.playback_audio.try_lock() {
1597            // If we're not currently playing, start fresh playback immediately
1598            if let Ok(mut is_playing) = self.is_playing.try_lock() {
1599                if !*is_playing {
1600                    // Clear old audio data and start fresh playback
1601                    playback.clear();
1602                    *self.playback_position.lock().unwrap() = 0;
1603                    *is_playing = true;
1604                    ::log::debug!(
1605                        "Started fresh playback of AI response audio ({} samples)",
1606                        samples.len()
1607                    );
1608                }
1609            }
1610
1611            playback.extend_from_slice(&samples);
1612        }
1613    }
1614
1615    fn convert_f32_to_pcm16(samples: &[f32]) -> Vec<u8> {
1616        let mut pcm16_bytes = Vec::with_capacity(samples.len() * 2);
1617
1618        for &sample in samples {
1619            let clamped = sample.max(-1.0).min(1.0);
1620            let pcm16_sample = (clamped * 32767.0) as i16;
1621            pcm16_bytes.extend_from_slice(&pcm16_sample.to_le_bytes());
1622        }
1623
1624        pcm16_bytes
1625    }
1626
1627    fn convert_pcm16_to_f32(bytes: &[u8]) -> Vec<f32> {
1628        let mut samples = Vec::with_capacity(bytes.len() / 2);
1629
1630        for chunk in bytes.chunks_exact(2) {
1631            let pcm16_sample = i16::from_le_bytes([chunk[0], chunk[1]]);
1632            let f32_sample = pcm16_sample as f32 / 32767.0;
1633            samples.push(f32_sample);
1634        }
1635
1636        samples
1637    }
1638
1639    fn update_session_config(&mut self, cx: &mut Cx) {
1640        self.selected_voice = self.drop_down(cx, ids!(voice_selector)).selected_label();
1641        self.view(cx, ids!(voice_selector_wrapper))
1642            .set_visible(cx, false);
1643        self.view(cx, ids!(selected_voice_view))
1644            .set_visible(cx, true);
1645        self.label(cx, ids!(selected_voice)).set_text(
1646            cx,
1647            format!("Selected voice: {}", self.selected_voice).as_str(),
1648        );
1649
1650        // Send updated session config
1651        if let Some(channel) = &self.realtime_channel {
1652            let _ = channel
1653                .command_sender
1654                .unbounded_send(RealtimeCommand::UpdateSessionConfig {
1655                    voice: self.selected_voice.clone(),
1656                    transcription_model: self
1657                        .drop_down(cx, ids!(transcription_model_selector))
1658                        .selected_label(),
1659                });
1660        }
1661    }
1662
1663    fn create_greeting_response(&mut self, cx: &mut Cx) {
1664        self.update_session_config(cx);
1665        if let Some(channel) = &self.realtime_channel {
1666            let _ = channel
1667                .command_sender
1668                .unbounded_send(RealtimeCommand::CreateGreetingResponse);
1669        }
1670    }
1671
1672    fn update_ui(&self, cx: &mut Cx) {
1673        if !self.conversation_active {
1674            self.label(cx, ids!(stop_start_label))
1675                .set_text(cx, "Start conversation");
1676        } else {
1677            self.label(cx, ids!(stop_start_label))
1678                .set_text(cx, "Stop conversation");
1679        }
1680    }
1681
1682    /// Check if the realtime widget is requesting a new connection
1683    pub fn connection_requested(&mut self) -> bool {
1684        if self.should_request_connection && !self.is_connected && !self.connection_request_sent {
1685            self.connection_request_sent = true;
1686            true
1687        } else {
1688            false
1689        }
1690    }
1691
1692    /// Get conversation messages and clear the collection
1693    pub fn take_conversation_messages(&mut self) -> Vec<Message> {
1694        let mut messages_with_ids = std::mem::take(&mut self.conversation_messages);
1695
1696        // Sort by item_id to ensure chronological order
1697        messages_with_ids.sort_by(|a, b| a.0.cmp(&b.0));
1698
1699        // Extract just the messages, maintaining the sorted order
1700        messages_with_ids
1701            .into_iter()
1702            .map(|(_, message)| message)
1703            .collect()
1704    }
1705
1706    /// Add reset_state method for cleanup when modal closes
1707    pub fn reset_state(&mut self, cx: &mut Cx) {
1708        self.reset_all(cx);
1709    }
1710}
1711
1712impl RealtimeRef {
1713    pub fn set_realtime_channel(&mut self, channel: RealtimeChannel) {
1714        if let Some(mut inner) = self.borrow_mut() {
1715            inner.set_realtime_channel(channel);
1716        }
1717    }
1718
1719    pub fn set_bot_entity_id(&mut self, cx: &mut Cx, bot_entity_id: EntityId) {
1720        if let Some(mut inner) = self.borrow_mut() {
1721            inner.set_bot_entity_id(cx, bot_entity_id);
1722        }
1723    }
1724
1725    pub fn connection_requested(&mut self) -> bool {
1726        if let Some(mut inner) = self.borrow_mut() {
1727            inner.connection_requested()
1728        } else {
1729            false
1730        }
1731    }
1732
1733    pub fn take_conversation_messages(&mut self) -> Vec<Message> {
1734        if let Some(mut inner) = self.borrow_mut() {
1735            inner.take_conversation_messages()
1736        } else {
1737            Vec::new()
1738        }
1739    }
1740
1741    pub fn reset_state(&mut self, cx: &mut Cx) {
1742        if let Some(mut inner) = self.borrow_mut() {
1743            inner.reset_state(cx);
1744        }
1745    }
1746
1747    pub fn set_chat_controller(&mut self, chat_controller: Option<Arc<Mutex<ChatController>>>) {
1748        if let Some(mut inner) = self.borrow_mut() {
1749            inner.set_chat_controller(chat_controller);
1750        }
1751    }
1752}