Skip to main content

atomr_agents_stt_core/
transcript.rs

1//! Transcript output shape.
2
3use serde::{Deserialize, Serialize};
4
5use crate::kinds::BackendKind;
6
7#[derive(Debug, Clone, Serialize, Deserialize)]
8pub struct Transcript {
9    pub text: String,
10    /// BCP-47 if known. `None` if backend didn't return one and
11    /// language detection was off.
12    pub language: Option<String>,
13    pub segments: Vec<Segment>,
14    pub duration_secs: f32,
15    pub backend: BackendKind,
16    pub model_id: Option<String>,
17    pub cost_usd: Option<f32>,
18}
19
20#[derive(Debug, Clone, Serialize, Deserialize)]
21pub struct Segment {
22    pub text: String,
23    pub start_ms: u32,
24    pub end_ms: u32,
25    pub words: Vec<Word>,
26    pub speaker: Option<SpeakerTag>,
27    pub confidence: Option<f32>,
28}
29
30#[derive(Debug, Clone, Serialize, Deserialize)]
31pub struct Word {
32    pub text: String,
33    pub start_ms: u32,
34    pub end_ms: u32,
35    pub confidence: Option<f32>,
36}
37
38#[derive(Debug, Clone, Serialize, Deserialize)]
39pub struct SpeakerTag {
40    pub id: u8,
41    pub label: Option<String>,
42}
43
44impl Transcript {
45    /// Construct a single-segment transcript from text + timing —
46    /// useful for backends that only return aggregate text and
47    /// duration.
48    pub fn from_text(text: impl Into<String>, backend: BackendKind, duration_secs: f32) -> Self {
49        let text = text.into();
50        let end_ms = (duration_secs * 1000.0) as u32;
51        let segment = Segment {
52            text: text.clone(),
53            start_ms: 0,
54            end_ms,
55            words: Vec::new(),
56            speaker: None,
57            confidence: None,
58        };
59        Self {
60            text,
61            language: None,
62            segments: vec![segment],
63            duration_secs,
64            backend,
65            model_id: None,
66            cost_usd: None,
67        }
68    }
69}