speech_prep/chunker/config.rs
1use std::time::Duration;
2
3use crate::error::{Error, Result};
4
5/// Configuration for the audio chunker.
6///
7/// Controls how audio is segmented into processing chunks.
8#[derive(Debug, Clone, Copy)]
9pub struct ChunkerConfig {
10 /// Target duration for each chunk (default: 500ms).
11 ///
12 /// Chunks will be approximately this duration, but may vary by up to
13 /// `duration_tolerance` to align with speech boundaries.
14 pub target_duration: Duration,
15
16 /// Maximum allowed chunk duration before forced split (default: 600ms).
17 ///
18 /// Long speech segments exceeding this duration will be split into
19 /// multiple chunks to maintain streaming latency guarantees.
20 pub max_duration: Duration,
21
22 /// Tolerance for chunk duration variance (default: 100ms).
23 ///
24 /// Chunks may be `target_duration ± duration_tolerance` to better
25 /// align with natural speech boundaries from VAD.
26 pub duration_tolerance: Duration,
27
28 /// Minimum chunk duration to emit (default: 100ms).
29 ///
30 /// Segments shorter than this are buffered or merged with adjacent chunks
31 /// to avoid inefficient processing of tiny fragments.
32 pub min_duration: Duration,
33
34 /// Duration of overlap between adjacent chunks (default: 50ms).
35 ///
36 /// Preserves acoustic context across chunk boundaries. Must be in range
37 /// 20-80ms. Overlaps are stored in
38 /// `ProcessedChunk::overlap_prev` and `ProcessedChunk::overlap_next`.
39 pub overlap_duration: Duration,
40}
41
42impl Default for ChunkerConfig {
43 fn default() -> Self {
44 Self {
45 target_duration: Duration::from_millis(500),
46 max_duration: Duration::from_millis(600),
47 duration_tolerance: Duration::from_millis(100),
48 min_duration: Duration::from_millis(100),
49 overlap_duration: Duration::from_millis(50),
50 }
51 }
52}
53
54impl ChunkerConfig {
55 /// Create a new chunker configuration with validation.
56 ///
57 /// # Errors
58 ///
59 /// Returns `Error::InvalidInput` if:
60 /// - `target_duration` is zero or exceeds 5 seconds
61 /// - `max_duration` is less than `target_duration`
62 /// - `min_duration` exceeds `target_duration`
63 /// - `overlap_duration` is outside range 20-80ms
64 pub fn new(
65 target_duration: Duration,
66 max_duration: Duration,
67 duration_tolerance: Duration,
68 min_duration: Duration,
69 overlap_duration: Duration,
70 ) -> Result<Self> {
71 if target_duration.as_millis() == 0 {
72 return Err(Error::InvalidInput(
73 "target_duration must be greater than zero".into(),
74 ));
75 }
76 if target_duration > Duration::from_secs(5) {
77 return Err(Error::InvalidInput(
78 "target_duration must not exceed 5 seconds".into(),
79 ));
80 }
81
82 if max_duration < target_duration {
83 return Err(Error::InvalidInput(
84 "max_duration must be >= target_duration".into(),
85 ));
86 }
87
88 if min_duration > target_duration {
89 return Err(Error::InvalidInput(
90 "min_duration must be <= target_duration".into(),
91 ));
92 }
93
94 let overlap_ms = overlap_duration.as_millis();
95 if !(20..=80).contains(&overlap_ms) {
96 return Err(Error::InvalidInput(format!(
97 "overlap_duration must be 20-80ms, got {overlap_ms}ms"
98 )));
99 }
100
101 Ok(Self {
102 target_duration,
103 max_duration,
104 duration_tolerance,
105 min_duration,
106 overlap_duration,
107 })
108 }
109
110 /// Create a configuration optimized for real-time streaming (smaller
111 /// chunks).
112 #[must_use]
113 pub fn streaming() -> Self {
114 Self {
115 target_duration: Duration::from_millis(250),
116 max_duration: Duration::from_millis(300),
117 duration_tolerance: Duration::from_millis(50),
118 min_duration: Duration::from_millis(100),
119 overlap_duration: Duration::from_millis(50),
120 }
121 }
122
123 /// Create a configuration optimized for batch processing (larger chunks).
124 #[must_use]
125 pub fn batch() -> Self {
126 Self {
127 target_duration: Duration::from_secs(1),
128 max_duration: Duration::from_millis(1200),
129 duration_tolerance: Duration::from_millis(200),
130 min_duration: Duration::from_millis(200),
131 overlap_duration: Duration::from_millis(50),
132 }
133 }
134}