car-voice 0.15.1

Voice I/O capability for CAR — mic capture, VAD, listener/speaker traits
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
//! Voice turn orchestrator — drives `dispatch_voice_turn` into TTS.
//!
//! Sits between the listener (which produces finalized utterances) and
//! the speaker (which plays audio). Implements the two-track pattern
//! from `docs/proposals/voice-sidecar-orchestration.md`:
//!
//! 1. Classify the utterance — tool-likely (email/calendar/search) or
//!    conversational.
//! 2. Tool-likely → play a hardcoded bridge phrase first; only the
//!    sidecar runs an LLM. Prevents the fast model from hallucinating
//!    tool data it doesn't have ("STRUCTURAL HALLUCINATION FIX").
//! 3. Conversational → fast track streams a quick reply through TTS
//!    while the sidecar runs in parallel for the substantive answer.
//! 4. The pipeline lock releases between fast turn and sidecar so the
//!    user can speak again as soon as the fast TTS ends.
//! 5. A new utterance arriving cancels the in-flight turn (turn_id
//!    bump + cancellation token); a stale sidecar result that arrives
//!    after a barge-in is dropped at the gate.
//!
//! See [`docs/proposals/voice-sidecar-orchestration-plan.md`] §6 for
//! the design behind the cancellation, mixer integration, and prewarm
//! decisions.

use std::sync::atomic::{AtomicU64, Ordering};
use std::sync::Arc;
use std::time::Duration;

use car_engine::{
    dispatch_voice_turn_sidecar_only_with_classifier,
    dispatch_voice_turn_sidecar_only_with_telemetry, dispatch_voice_turn_with_telemetry,
    DirectDataFetcher, SidecarResult, VoiceTelemetry, VoiceTurnControl, VoiceTurnError,
    VoiceTurnHandle,
};
use car_inference::{
    intent::IntentHint, GenerateParams, GenerateRequest, InferenceEngine, StreamEvent,
};
use tokio::sync::{Mutex, Semaphore};

use crate::{
    compose_voice_context,
    utterance::{ToolKind, UtteranceClass},
    voice_audio_mixer::VoiceMixerHandle,
    Result, Speaker, VoiceConfig, VoiceError,
};
// Re-exports so the `crate::utterance::*` types and helpers are
// reachable through `car_voice::orchestrator::*` for callers that
// followed the old layout. The canonical home is `crate::utterance`.
pub use crate::utterance::{bridge_phrase, classify_utterance, format_for_voice};

fn tool_kind_str(kind: ToolKind) -> &'static str {
    match kind {
        ToolKind::Email => "email",
        ToolKind::Calendar => "calendar",
        ToolKind::Search => "search",
        ToolKind::Unknown => "unknown",
    }
}

/// Find the index (inclusive) of the last sentence-ending punctuation
/// in `s`. Used to flush sentence-buffered TTS as text streams in.
fn find_sentence_end(s: &str) -> Option<usize> {
    s.rfind(|c: char| matches!(c, '.' | '!' | '?'))
}

/// Default sidecar wait timeout. After this, drop the continuation
/// even if the sidecar is still running.
const DEFAULT_SIDECAR_TIMEOUT: Duration = Duration::from_secs(30);

/// Default interval between "still working on that" progress phrases.
const DEFAULT_PROGRESS_INTERVAL: Duration = Duration::from_secs(8);

/// Default cap on progress-phrase attempts (8s × 4 = 32s).
const DEFAULT_MAX_PROGRESS_ATTEMPTS: u32 = 4;

/// Hard ceiling on tokens per voice reply. 200 tokens ≈ ~30 seconds
/// of TTS at typical speaking rates — already long for a voice turn.
/// The voice prompt overlay asks the model to be much shorter; this
/// cap prevents a chatty model from rambling past the user's
/// patience even when the prompt is ignored.
const DEFAULT_VOICE_MAX_TOKENS: usize = 200;

fn voice_params() -> GenerateParams {
    GenerateParams {
        max_tokens: DEFAULT_VOICE_MAX_TOKENS,
        ..GenerateParams::default()
    }
}

/// Progress phrase played periodically while the sidecar is still
/// running. Hardcoded for now; pool variety is a follow-up like
/// bridge phrases.
const PROGRESS_PHRASE: &str = "Still working on that.";

/// Orchestrates one voice session — listener output → engine dispatch
/// → TTS playback. Hold one per concurrent voice session (typically
/// one per app instance for a single-user mic).
pub struct VoiceOrchestrator {
    engine: Arc<InferenceEngine>,
    speaker: Arc<dyn Speaker>,
    /// Optional mixer handle. When `Some`, TTS routes through
    /// `queue_tts`/`stop_tts` for cancellable playback. When `None`,
    /// falls back to the speaker's non-cancellable `speak` (barge-in
    /// still bumps `current_turn_id` to drop stale results, but the
    /// active TTS clip plays to its end).
    mixer: Option<VoiceMixerHandle>,
    /// Optional telemetry sink. When `Some`, the orchestrator emits
    /// voice eventlog kinds (fast/sidecar lifecycle, bridge played,
    /// turn cancelled) for observability. Independent of the engine's
    /// own telemetry: the orchestrator passes the same sink through
    /// to dispatch so engine-side and caller-side events land in the
    /// same log.
    telemetry: Option<VoiceTelemetry>,
    /// Optional direct data fetcher (Phase C). When attached and the
    /// utterance classifies as tool-likely, the sidecar consults the
    /// fetcher first; on hit the LLM is skipped entirely.
    direct_fetcher: Option<Arc<dyn DirectDataFetcher>>,
    /// Optional explicit model id for the fast track (overrides the
    /// adaptive router). Use when you know which model you want
    /// (e.g. `apple/foundation:default` on macOS 26+ for sub-500ms
    /// TTFT) and the router's scoring isn't cooperating, or when
    /// you've registered a delegated model whose hint you want to
    /// pin. `None` keeps adaptive routing.
    fast_model: Option<String>,
    /// Optional explicit model id for the sidecar track. Same
    /// semantics as `fast_model` — `None` keeps adaptive routing.
    sidecar_model: Option<String>,
    /// When true, conversational utterances skip the fast track and
    /// go straight to a single sidecar dispatch — one TTS clip per
    /// turn instead of two. Useful when fast and sidecar would
    /// resolve to the same model (so the second clip just paraphrases
    /// the first), or when you want a simpler single-reply UX. Tool-
    /// likely utterances still play their bridge phrase. Default
    /// `false` preserves the two-track behavior described in the
    /// proposal.
    skip_fast_track: bool,
    config: VoiceConfig,
    /// Serializes fast + sidecar TTS playback. Permit count = 1.
    /// Released between turns so the user can barge in cleanly.
    pipeline_lock: Arc<Semaphore>,
    /// Most recent turn id minted. Sidecar results with a stale id
    /// are dropped at the gate (race-recovery).
    current_turn_id: Arc<AtomicU64>,
    /// Control surface for the in-flight turn, if any. Stored
    /// separately from the receivers (which stay with the driving
    /// task) so `cancel_current_turn` can reach the cancellation
    /// token from a different task. See plan §6.8.
    current_handle: Arc<Mutex<Option<VoiceTurnControl>>>,
    sidecar_timeout: Duration,
}

impl VoiceOrchestrator {
    /// Construct an orchestrator that uses the speaker's
    /// non-cancellable `speak` path. Suitable for environments where
    /// no mixer is available or barge-in mid-TTS isn't required.
    pub fn new(
        engine: Arc<InferenceEngine>,
        speaker: Arc<dyn Speaker>,
        config: VoiceConfig,
    ) -> Self {
        Self::with_mixer(engine, speaker, config, None)
    }

    /// Construct an orchestrator with optional mixer-routed TTS.
    /// Pass `Some(mixer)` to enable cancellable playback (mixer's
    /// `stop_tts` is called on barge-in).
    pub fn with_mixer(
        engine: Arc<InferenceEngine>,
        speaker: Arc<dyn Speaker>,
        config: VoiceConfig,
        mixer: Option<VoiceMixerHandle>,
    ) -> Self {
        Self {
            engine,
            speaker,
            mixer,
            telemetry: None,
            direct_fetcher: None,
            fast_model: None,
            sidecar_model: None,
            skip_fast_track: false,
            config,
            pipeline_lock: Arc::new(Semaphore::new(1)),
            current_turn_id: Arc::new(AtomicU64::new(0)),
            current_handle: Arc::new(Mutex::new(None)),
            sidecar_timeout: DEFAULT_SIDECAR_TIMEOUT,
        }
    }

    /// Override the sidecar wait timeout. Builder-style.
    pub fn with_sidecar_timeout(mut self, timeout: Duration) -> Self {
        self.sidecar_timeout = timeout;
        self
    }

    /// Attach a telemetry sink. The orchestrator emits voice eventlog
    /// kinds (`VoiceFastTurnStarted`/`Ended`, `VoiceSidecarResolved`/
    /// `Failed`/`TimedOut`, `VoiceBridgePlayed`, `VoiceTurnCancelled`)
    /// to this sink as turns progress. The same sink is passed
    /// through to `car-engine`'s dispatch so engine-side and
    /// caller-side events share the log.
    pub fn with_telemetry(mut self, telemetry: VoiceTelemetry) -> Self {
        self.telemetry = Some(telemetry);
        self
    }

    /// Pin specific model ids for the fast and sidecar tracks. Each
    /// argument is a full model id (e.g. `apple/foundation:default`)
    /// or `None` to leave that track on the adaptive router. Useful
    /// when the router's scoring picks an unavailable model (e.g. an
    /// MLX model whose weight shards aren't downloaded) or when you
    /// want to force a specific delegated runner. Builder-style.
    pub fn with_models(
        mut self,
        fast_model: Option<String>,
        sidecar_model: Option<String>,
    ) -> Self {
        self.fast_model = fast_model;
        self.sidecar_model = sidecar_model;
        self
    }

    /// Skip the fast track for conversational utterances — produces
    /// one TTS clip per turn (sidecar only) instead of two. See
    /// `skip_fast_track` docs above.
    pub fn with_skip_fast_track(mut self, skip: bool) -> Self {
        self.skip_fast_track = skip;
        self
    }

    /// Attach a direct data fetcher (Phase C). On a tool-likely
    /// utterance, the sidecar consults the fetcher first; on hit the
    /// LLM is skipped entirely. Saves ~1 LLM round-trip per
    /// applicable turn (1-3 seconds of latency on observed quip
    /// workloads). On miss, falls through to the LLM path
    /// transparently.
    pub fn with_direct_fetcher(mut self, fetcher: Arc<dyn DirectDataFetcher>) -> Self {
        self.direct_fetcher = Some(fetcher);
        self
    }

    /// Issue a 1-token probe to load the fast model into memory.
    /// Best-effort; errors are logged, not propagated. Apple's
    /// FoundationModels has a non-trivial cold-start; calling this at
    /// orchestrator construction makes the first turn meet the
    /// <500ms target. Idempotent.
    pub async fn prewarm(&self) {
        let req = GenerateRequest {
            prompt: ".".to_string(),
            // Honor a pinned fast model when set — otherwise prewarm
            // routes adaptively and may resolve to a different model
            // than the dispatch path actually uses.
            model: self.fast_model.clone(),
            params: GenerateParams {
                max_tokens: 1,
                ..Default::default()
            },
            intent: Some(IntentHint {
                prefer_fast: true,
                ..IntentHint::default()
            }),
            ..Default::default()
        };
        match self.engine.generate(req).await {
            Ok(_) => tracing::debug!("voice fast model prewarmed"),
            Err(e) => tracing::warn!(
                error = %e,
                "fast model prewarm failed; first turn will be slower"
            ),
        }
    }

    /// Listener calls this on `VoiceEvent::BargeIn` (or when a new
    /// utterance arrives during an in-flight turn). Halts active
    /// TTS, bumps the current turn id so any in-flight sidecar
    /// result is dropped at its arrival gate, and cancels the
    /// in-flight turn's tasks via the stored control.
    pub async fn cancel_current_turn(&self) {
        if let Some(m) = &self.mixer {
            m.stop_tts();
        }
        self.current_turn_id.fetch_add(1, Ordering::AcqRel);
        let mut g = self.current_handle.lock().await;
        if let Some(c) = g.as_ref() {
            c.cancel();
        }
        *g = None;
    }

    /// Drive one finalized utterance through the two-track pipeline.
    pub async fn handle_utterance(&self, utterance: String) -> Result<()> {
        // Cancel any in-flight turn before minting a new one. New
        // utterance arriving means the previous one is implicitly
        // superseded — drop its sidecar result if it arrives later.
        self.cancel_current_turn().await;

        let class = classify_utterance(&utterance);
        let handle = match class {
            UtteranceClass::ToolLikely(kind) => self.dispatch_with_bridge(utterance, kind).await?,
            UtteranceClass::Conversational if self.skip_fast_track => {
                self.dispatch_sidecar_only(utterance)
            }
            UtteranceClass::Conversational => self.dispatch_two_track(utterance),
        };

        let our_turn = handle.turn_id();
        self.current_turn_id.store(our_turn, Ordering::Release);
        {
            let mut g = self.current_handle.lock().await;
            *g = Some(handle.control.clone());
        }

        // Fast track: bridge-only paths have a pre-closed channel and
        // exit the drain immediately. Conversational paths drain the
        // streamed deltas through TTS as sentences land.
        let _permit = self
            .pipeline_lock
            .clone()
            .acquire_owned()
            .await
            .map_err(|_| VoiceError::Playback("pipeline closed".into()))?;
        self.drain_fast_to_tts(handle.fast).await?;
        drop(_permit); // release between turns; user can barge in here.

        // Sidecar wait — capped by sidecar_timeout. Plays
        // periodic progress phrases while the user is waiting.
        // Race-recovery via turn_id gate: if the result's turn_id
        // doesn't match the *current* turn id, drop it.
        match self.wait_with_progress(handle.sidecar, our_turn).await {
            Some(Ok(result)) => self.maybe_play_sidecar(our_turn, result).await?,
            Some(Err(VoiceTurnError::Cancelled)) => {
                tracing::info!(turn_id = our_turn, "sidecar cancelled");
            }
            Some(Err(VoiceTurnError::Inference(e))) => {
                tracing::warn!(turn_id = our_turn, error = %e, "sidecar inference failed");
            }
            None => {
                if let Some(t) = &self.telemetry {
                    t.emit(
                        car_eventlog::EventKind::VoiceSidecarTimedOut,
                        our_turn,
                        vec![(
                            "timeout_ms",
                            (self.sidecar_timeout.as_millis() as u64).into(),
                        )],
                    );
                }
            }
        }

        // Clear our slot so cancel_current_turn doesn't fire on a
        // stale handle.
        let mut g = self.current_handle.lock().await;
        if g.as_ref().map(|c| c.turn_id) == Some(our_turn) {
            *g = None;
        }
        Ok(())
    }

    /// Drive a tool-likely utterance: play the bridge phrase
    /// synchronously, then return a sidecar-only handle.
    async fn dispatch_with_bridge(
        &self,
        utterance: String,
        kind: ToolKind,
    ) -> Result<VoiceTurnHandle> {
        // Bridge phrase plays under the pipeline lock so it can't
        // interleave with whatever the previous turn was finishing.
        let phrase = bridge_phrase(kind);
        {
            let _permit = self
                .pipeline_lock
                .clone()
                .acquire_owned()
                .await
                .map_err(|_| VoiceError::Playback("pipeline closed".into()))?;
            self.speak(phrase).await?;
        }
        // Sidecar still runs to produce the substantive answer.
        let handle = self.dispatch_sidecar_only(utterance);
        if let Some(t) = &self.telemetry {
            t.emit(
                car_eventlog::EventKind::VoiceBridgePlayed,
                handle.turn_id(),
                vec![
                    ("kind", serde_json::Value::from(tool_kind_str(kind))),
                    ("phrase", serde_json::Value::from(phrase)),
                ],
            );
        }
        Ok(handle)
    }

    fn dispatch_two_track(&self, utterance: String) -> VoiceTurnHandle {
        let context = compose_voice_context(&self.config, None);
        let fast_req = GenerateRequest {
            prompt: utterance.clone(),
            model: self.fast_model.clone(),
            params: voice_params(),
            context: context.clone(),
            intent: Some(IntentHint {
                prefer_fast: true,
                ..IntentHint::default()
            }),
            ..Default::default()
        };
        let sidecar_req = GenerateRequest {
            prompt: utterance.clone(),
            model: self.sidecar_model.clone(),
            params: voice_params(),
            context,
            ..Default::default()
        };
        dispatch_voice_turn_with_telemetry(
            self.engine.clone(),
            utterance,
            fast_req,
            sidecar_req,
            self.telemetry.clone(),
        )
    }

    fn dispatch_sidecar_only(&self, utterance: String) -> VoiceTurnHandle {
        let context = compose_voice_context(&self.config, None);
        let sidecar_req = GenerateRequest {
            prompt: utterance.clone(),
            model: self.sidecar_model.clone(),
            params: voice_params(),
            context,
            ..Default::default()
        };
        match &self.direct_fetcher {
            Some(fetcher) => dispatch_voice_turn_sidecar_only_with_classifier(
                self.engine.clone(),
                utterance,
                sidecar_req,
                Some(fetcher.clone()),
                self.telemetry.clone(),
            ),
            None => dispatch_voice_turn_sidecar_only_with_telemetry(
                self.engine.clone(),
                utterance,
                sidecar_req,
                self.telemetry.clone(),
            ),
        }
    }

    async fn drain_fast_to_tts(
        &self,
        mut rx: tokio::sync::mpsc::Receiver<StreamEvent>,
    ) -> Result<()> {
        let mut buf = String::new();
        while let Some(evt) = rx.recv().await {
            match evt {
                StreamEvent::TextDelta(d) => {
                    buf.push_str(&d);
                    if let Some(end) = find_sentence_end(&buf) {
                        let sentence: String = buf.drain(..=end).collect();
                        let trimmed = sentence.trim();
                        if !trimmed.is_empty() {
                            self.speak(trimmed).await?;
                        }
                    }
                }
                StreamEvent::Done { .. } => break,
                _ => {}
            }
        }
        let trimmed = buf.trim();
        if !trimmed.is_empty() {
            self.speak(trimmed).await?;
        }
        Ok(())
    }

    /// Wait for the sidecar's oneshot to resolve, playing a short
    /// progress phrase every `progress_interval_secs` while the user
    /// would otherwise be staring at silence. Returns:
    /// - `Some(Ok(result))` on a sidecar hit within the cap
    /// - `Some(Err(error))` when the sidecar resolved with an error
    ///   (cancellation, inference failure)
    /// - `None` when the cap (the lesser of `sidecar_timeout` and
    ///   `max_progress_attempts × progress_interval_secs`) elapsed
    ///   without resolution
    async fn wait_with_progress(
        &self,
        sidecar: tokio::sync::oneshot::Receiver<std::result::Result<SidecarResult, VoiceTurnError>>,
        our_turn: u64,
    ) -> Option<std::result::Result<SidecarResult, VoiceTurnError>> {
        let interval = Duration::from_secs(
            self.config
                .progress_interval_secs
                .unwrap_or(DEFAULT_PROGRESS_INTERVAL.as_secs()),
        );
        let max_attempts = self
            .config
            .max_progress_attempts
            .unwrap_or(DEFAULT_MAX_PROGRESS_ATTEMPTS);
        let progress_cap = interval.saturating_mul(max_attempts);
        let total_cap = std::cmp::min(progress_cap, self.sidecar_timeout);
        let deadline = tokio::time::Instant::now() + total_cap;

        tokio::pin!(sidecar);
        let mut attempts: u32 = 0;
        loop {
            // Sleep up to `interval`, but never past the overall deadline.
            let now = tokio::time::Instant::now();
            if now >= deadline {
                return None;
            }
            let remaining = deadline.saturating_duration_since(now);
            let sleep_for = std::cmp::min(interval, remaining);

            tokio::select! {
                biased;
                outcome = &mut sidecar => {
                    return Some(outcome.unwrap_or(Err(VoiceTurnError::Cancelled)));
                }
                _ = tokio::time::sleep(sleep_for) => {
                    if tokio::time::Instant::now() >= deadline {
                        return None;
                    }
                    attempts += 1;
                    if attempts > max_attempts {
                        return None;
                    }
                    // Bail if we've been superseded — no point playing a
                    // progress phrase for a turn that's no longer current.
                    if self.current_turn_id.load(Ordering::Acquire) != our_turn {
                        // Wait briefly to see if the sidecar resolves anyway.
                        return match tokio::time::timeout(
                            Duration::from_millis(50),
                            &mut sidecar,
                        )
                        .await
                        {
                            Ok(Ok(r)) => Some(r),
                            _ => Some(Err(VoiceTurnError::Cancelled)),
                        };
                    }
                    let _permit = match self
                        .pipeline_lock
                        .clone()
                        .acquire_owned()
                        .await
                    {
                        Ok(p) => p,
                        Err(_) => return None,
                    };
                    if let Err(e) = self.speak(PROGRESS_PHRASE).await {
                        tracing::warn!(turn_id = our_turn, error=%e, "progress phrase playback failed");
                    }
                    drop(_permit);
                }
            }
        }
    }

    async fn maybe_play_sidecar(&self, our_turn: u64, result: SidecarResult) -> Result<()> {
        let now = self.current_turn_id.load(Ordering::Acquire);
        if result.turn_id != now {
            tracing::info!(
                stale = result.turn_id,
                current = now,
                "dropped stale sidecar result"
            );
            return Ok(());
        }
        let _permit = self
            .pipeline_lock
            .clone()
            .acquire_owned()
            .await
            .map_err(|_| VoiceError::Playback("pipeline closed".into()))?;
        self.speak(&result.text).await?;
        let _ = our_turn; // logged via tracing field elsewhere; keep param so
                          // future telemetry can correlate without churn.
        Ok(())
    }

    /// Speak `text` through the cancellable mixer path when one is
    /// available; fall back to `Speaker::speak` otherwise. Empty/
    /// whitespace input is a no-op.
    async fn speak(&self, text: &str) -> Result<()> {
        if text.trim().is_empty() {
            return Ok(());
        }
        match &self.mixer {
            Some(mixer) => {
                let audio = self.speaker.synth(text).await?;
                mixer.queue_tts(audio.bytes);
                let flag = mixer.speaking_flag();
                while flag.load(Ordering::Relaxed) {
                    tokio::time::sleep(Duration::from_millis(20)).await;
                }
                Ok(())
            }
            None => self.speaker.speak(text).await,
        }
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    // Pure-function tests for classify_utterance / bridge_phrase /
    // format_for_voice live alongside their definitions in
    // `crate::utterance`. The orchestrator's unit tests cover the
    // pieces that depend on the macOS-only mixer/dispatch machinery.

    #[test]
    fn find_sentence_end_basic() {
        assert_eq!(find_sentence_end(""), None);
        assert_eq!(find_sentence_end("hello world"), None);
        assert_eq!(find_sentence_end("Hello."), Some(5));
        assert_eq!(find_sentence_end("Hi! Bye."), Some(7));
        assert_eq!(find_sentence_end("Hi! And then..."), Some(14));
    }

    #[test]
    fn voice_config_progress_defaults_round_trip() {
        // Ensure the new config fields default to None (use built-ins)
        // and round-trip through JSON cleanly for hot-reload paths.
        let cfg = VoiceConfig::default();
        assert!(cfg.progress_interval_secs.is_none());
        assert!(cfg.max_progress_attempts.is_none());
        let json = serde_json::to_string(&cfg).unwrap();
        let back: VoiceConfig = serde_json::from_str(&json).unwrap();
        assert_eq!(back.progress_interval_secs, None);
        assert_eq!(back.max_progress_attempts, None);
    }
}