1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
/// A single token with timing metadata
#[derive(Debug, Clone, PartialEq)]
pub struct TimedToken {
/// Token identifier from the model vocabulary
pub token_id: u32,
/// Human-readable token text after SentencePiece cleanup
pub text: String,
/// Start time in seconds
pub start: f64,
/// End time in seconds
pub end: f64,
/// Confidence score for the token
pub confidence: f32,
}
/// A text chunk in the final transcript
#[derive(Debug, Clone, PartialEq)]
pub struct TranscriptChunk {
/// Chunk start time in seconds
pub start: f64,
/// Chunk end time in seconds
pub end: f64,
/// Decoded text for this chunk
pub text: String,
}
/// Final transcription output
#[derive(Debug, Clone, PartialEq)]
pub struct TranscriptionResult {
/// Full transcript text
pub text: String,
/// Chunked transcript regions
pub chunks: Vec<TranscriptChunk>,
/// Token-level timestamps
pub tokens: Vec<TimedToken>,
/// Input audio duration in seconds
pub duration_seconds: f64,
}
impl TranscriptionResult {
pub(crate) fn empty(duration_seconds: f64) -> Self {
Self {
text: String::new(),
chunks: Vec::new(),
tokens: Vec::new(),
duration_seconds,
}
}
}