1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
use std::time::Duration;
use crate::error::{Error, Result};
/// Configuration for the audio chunker.
///
/// Controls how audio is segmented into processing chunks.
#[derive(Debug, Clone, Copy)]
pub struct ChunkerConfig {
/// Target duration for each chunk (default: 500ms).
///
/// Chunks will be approximately this duration, but may vary by up to
/// `duration_tolerance` to align with speech boundaries.
pub target_duration: Duration,
/// Maximum allowed chunk duration before forced split (default: 600ms).
///
/// Long speech segments exceeding this duration will be split into
/// multiple chunks to maintain streaming latency guarantees.
pub max_duration: Duration,
/// Tolerance for chunk duration variance (default: 100ms).
///
/// Chunks may be `target_duration ± duration_tolerance` to better
/// align with natural speech boundaries from VAD.
pub duration_tolerance: Duration,
/// Minimum chunk duration to emit (default: 100ms).
///
/// Segments shorter than this are buffered or merged with adjacent chunks
/// to avoid inefficient processing of tiny fragments.
pub min_duration: Duration,
/// Duration of overlap between adjacent chunks (default: 50ms).
///
/// Preserves acoustic context across chunk boundaries. Must be in range
/// 20-80ms. Overlaps are stored in
/// `ProcessedChunk::overlap_prev` and `ProcessedChunk::overlap_next`.
pub overlap_duration: Duration,
}
impl Default for ChunkerConfig {
fn default() -> Self {
Self {
target_duration: Duration::from_millis(500),
max_duration: Duration::from_millis(600),
duration_tolerance: Duration::from_millis(100),
min_duration: Duration::from_millis(100),
overlap_duration: Duration::from_millis(50),
}
}
}
impl ChunkerConfig {
/// Create a new chunker configuration with validation.
///
/// # Errors
///
/// Returns `Error::InvalidInput` if:
/// - `target_duration` is zero or exceeds 5 seconds
/// - `max_duration` is less than `target_duration`
/// - `min_duration` exceeds `target_duration`
/// - `overlap_duration` is outside range 20-80ms
pub fn new(
target_duration: Duration,
max_duration: Duration,
duration_tolerance: Duration,
min_duration: Duration,
overlap_duration: Duration,
) -> Result<Self> {
if target_duration.as_millis() == 0 {
return Err(Error::InvalidInput(
"target_duration must be greater than zero".into(),
));
}
if target_duration > Duration::from_secs(5) {
return Err(Error::InvalidInput(
"target_duration must not exceed 5 seconds".into(),
));
}
if max_duration < target_duration {
return Err(Error::InvalidInput(
"max_duration must be >= target_duration".into(),
));
}
if min_duration > target_duration {
return Err(Error::InvalidInput(
"min_duration must be <= target_duration".into(),
));
}
let overlap_ms = overlap_duration.as_millis();
if !(20..=80).contains(&overlap_ms) {
return Err(Error::InvalidInput(format!(
"overlap_duration must be 20-80ms, got {overlap_ms}ms"
)));
}
Ok(Self {
target_duration,
max_duration,
duration_tolerance,
min_duration,
overlap_duration,
})
}
/// Create a configuration optimized for real-time streaming (smaller
/// chunks).
#[must_use]
pub fn streaming() -> Self {
Self {
target_duration: Duration::from_millis(250),
max_duration: Duration::from_millis(300),
duration_tolerance: Duration::from_millis(50),
min_duration: Duration::from_millis(100),
overlap_duration: Duration::from_millis(50),
}
}
/// Create a configuration optimized for batch processing (larger chunks).
#[must_use]
pub fn batch() -> Self {
Self {
target_duration: Duration::from_secs(1),
max_duration: Duration::from_millis(1200),
duration_tolerance: Duration::from_millis(200),
min_duration: Duration::from_millis(200),
overlap_duration: Duration::from_millis(50),
}
}
}