Skip to main content

speech_prep/chunker/
config.rs

1use std::time::Duration;
2
3use crate::error::{Error, Result};
4
5/// Configuration for the audio chunker.
6///
7/// Controls how audio is segmented into processing chunks.
8#[derive(Debug, Clone, Copy)]
9pub struct ChunkerConfig {
10    /// Target duration for each chunk (default: 500ms).
11    ///
12    /// Chunks will be approximately this duration, but may vary by up to
13    /// `duration_tolerance` to align with speech boundaries.
14    pub target_duration: Duration,
15
16    /// Maximum allowed chunk duration before forced split (default: 600ms).
17    ///
18    /// Long speech segments exceeding this duration will be split into
19    /// multiple chunks to maintain streaming latency guarantees.
20    pub max_duration: Duration,
21
22    /// Tolerance for chunk duration variance (default: 100ms).
23    ///
24    /// Chunks may be `target_duration ± duration_tolerance` to better
25    /// align with natural speech boundaries from VAD.
26    pub duration_tolerance: Duration,
27
28    /// Minimum chunk duration to emit (default: 100ms).
29    ///
30    /// Segments shorter than this are buffered or merged with adjacent chunks
31    /// to avoid inefficient processing of tiny fragments.
32    pub min_duration: Duration,
33
34    /// Duration of overlap between adjacent chunks (default: 50ms).
35    ///
36    /// Preserves acoustic context across chunk boundaries. Must be in range
37    /// 20-80ms. Overlaps are stored in
38    /// `ProcessedChunk::overlap_prev` and `ProcessedChunk::overlap_next`.
39    pub overlap_duration: Duration,
40}
41
42impl Default for ChunkerConfig {
43    fn default() -> Self {
44        Self {
45            target_duration: Duration::from_millis(500),
46            max_duration: Duration::from_millis(600),
47            duration_tolerance: Duration::from_millis(100),
48            min_duration: Duration::from_millis(100),
49            overlap_duration: Duration::from_millis(50),
50        }
51    }
52}
53
54impl ChunkerConfig {
55    /// Create a new chunker configuration with validation.
56    ///
57    /// # Errors
58    ///
59    /// Returns `Error::InvalidInput` if:
60    /// - `target_duration` is zero or exceeds 5 seconds
61    /// - `max_duration` is less than `target_duration`
62    /// - `min_duration` exceeds `target_duration`
63    /// - `overlap_duration` is outside range 20-80ms
64    pub fn new(
65        target_duration: Duration,
66        max_duration: Duration,
67        duration_tolerance: Duration,
68        min_duration: Duration,
69        overlap_duration: Duration,
70    ) -> Result<Self> {
71        if target_duration.as_millis() == 0 {
72            return Err(Error::InvalidInput(
73                "target_duration must be greater than zero".into(),
74            ));
75        }
76        if target_duration > Duration::from_secs(5) {
77            return Err(Error::InvalidInput(
78                "target_duration must not exceed 5 seconds".into(),
79            ));
80        }
81
82        if max_duration < target_duration {
83            return Err(Error::InvalidInput(
84                "max_duration must be >= target_duration".into(),
85            ));
86        }
87
88        if min_duration > target_duration {
89            return Err(Error::InvalidInput(
90                "min_duration must be <= target_duration".into(),
91            ));
92        }
93
94        let overlap_ms = overlap_duration.as_millis();
95        if !(20..=80).contains(&overlap_ms) {
96            return Err(Error::InvalidInput(format!(
97                "overlap_duration must be 20-80ms, got {overlap_ms}ms"
98            )));
99        }
100
101        Ok(Self {
102            target_duration,
103            max_duration,
104            duration_tolerance,
105            min_duration,
106            overlap_duration,
107        })
108    }
109
110    /// Create a configuration optimized for real-time streaming (smaller
111    /// chunks).
112    #[must_use]
113    pub fn streaming() -> Self {
114        Self {
115            target_duration: Duration::from_millis(250),
116            max_duration: Duration::from_millis(300),
117            duration_tolerance: Duration::from_millis(50),
118            min_duration: Duration::from_millis(100),
119            overlap_duration: Duration::from_millis(50),
120        }
121    }
122
123    /// Create a configuration optimized for batch processing (larger chunks).
124    #[must_use]
125    pub fn batch() -> Self {
126        Self {
127            target_duration: Duration::from_secs(1),
128            max_duration: Duration::from_millis(1200),
129            duration_tolerance: Duration::from_millis(200),
130            min_duration: Duration::from_millis(200),
131            overlap_duration: Duration::from_millis(50),
132        }
133    }
134}