Skip to main content

text_core/
contracts.rs

1use std::collections::BTreeMap;
2
3use runtime_core::{MobileCapability, OperationId, OperationMetadata, RuntimeCapabilities};
4use serde::{Deserialize, Serialize};
5use video_analysis_core::{OwnedTextSegment, TextSegment, Timebase, Timestamp};
6
7use crate::{segment_document_id, OwnedTextDocument, TextDocument, TextSpan};
8
9#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
10#[serde(rename_all = "camelCase")]
11pub struct TimebaseContract {
12    pub num: i32,
13    pub den: i32,
14}
15
16impl From<Timebase> for TimebaseContract {
17    fn from(value: Timebase) -> Self {
18        Self {
19            num: value.num,
20            den: value.den,
21        }
22    }
23}
24
25impl From<TimebaseContract> for Timebase {
26    fn from(value: TimebaseContract) -> Self {
27        Self::new(value.num, value.den)
28    }
29}
30
31#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
32#[serde(rename_all = "camelCase")]
33pub struct TimestampContract {
34    pub pts: i64,
35    pub timebase: TimebaseContract,
36}
37
38impl TimestampContract {
39    pub fn seconds(self) -> f64 {
40        Timestamp::from(self).seconds()
41    }
42}
43
44impl From<Timestamp> for TimestampContract {
45    fn from(value: Timestamp) -> Self {
46        Self {
47            pts: value.pts,
48            timebase: value.timebase.into(),
49        }
50    }
51}
52
53impl From<TimestampContract> for Timestamp {
54    fn from(value: TimestampContract) -> Self {
55        Self::new(value.pts, value.timebase.into())
56    }
57}
58
59#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
60#[serde(rename_all = "camelCase")]
61pub struct TextSourceRef {
62    #[serde(default)]
63    pub source_id: Option<String>,
64    #[serde(default)]
65    pub source_kind: Option<String>,
66    #[serde(default)]
67    pub uri: Option<String>,
68    #[serde(default)]
69    pub media_timestamp: Option<TimestampContract>,
70    #[serde(default)]
71    pub duration_seconds: Option<f64>,
72}
73
74#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
75#[serde(rename_all = "camelCase")]
76pub struct TextProvenance {
77    #[serde(default)]
78    pub crate_name: Option<String>,
79    #[serde(default)]
80    pub operation: Option<String>,
81    #[serde(default)]
82    pub model_id: Option<String>,
83    #[serde(default)]
84    pub runtime: Option<String>,
85    #[serde(default)]
86    pub confidence: Option<f32>,
87}
88
89#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
90#[serde(rename_all = "camelCase")]
91pub struct TextAnnotationSpan {
92    pub span: TextSpan,
93    #[serde(default)]
94    pub token_start: Option<usize>,
95    #[serde(default)]
96    pub token_end: Option<usize>,
97    #[serde(default)]
98    pub source_segment_id: Option<String>,
99}
100
101#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
102#[serde(rename_all = "camelCase")]
103pub struct TextDocumentContract {
104    pub id: String,
105    pub text: String,
106    #[serde(default)]
107    pub language: Option<String>,
108    #[serde(default)]
109    pub timestamp: Option<TimestampContract>,
110    #[serde(default)]
111    pub attributes: BTreeMap<String, String>,
112    #[serde(default)]
113    pub source: Option<TextSourceRef>,
114    #[serde(default)]
115    pub provenance: Vec<TextProvenance>,
116    #[serde(default)]
117    pub annotations: Vec<TextAnnotationSpan>,
118}
119
120impl TextDocumentContract {
121    pub fn new(id: impl Into<String>, text: impl Into<String>) -> Self {
122        Self {
123            id: id.into(),
124            text: text.into(),
125            language: None,
126            timestamp: None,
127            attributes: BTreeMap::new(),
128            source: None,
129            provenance: Vec::new(),
130            annotations: Vec::new(),
131        }
132    }
133
134    pub fn from_segment_contract(segment: &TextSegmentContract) -> Self {
135        segment.to_text_document_contract()
136    }
137
138    pub fn to_text_segment_contract(&self, segment_index: u64) -> TextSegmentContract {
139        TextSegmentContract::from_document_contract(self, segment_index)
140    }
141}
142
143pub trait IntoTextDocumentContract {
144    fn into_text_document_contract(self) -> TextDocumentContract;
145}
146
147impl IntoTextDocumentContract for TextDocument<'_> {
148    fn into_text_document_contract(self) -> TextDocumentContract {
149        TextDocumentContract {
150            id: self.id.to_string(),
151            text: self.text.to_string(),
152            language: self.language.map(ToString::to_string),
153            timestamp: self.timestamp.map(Into::into),
154            attributes: BTreeMap::new(),
155            source: None,
156            provenance: Vec::new(),
157            annotations: Vec::new(),
158        }
159    }
160}
161
162impl IntoTextDocumentContract for OwnedTextDocument {
163    fn into_text_document_contract(self) -> TextDocumentContract {
164        TextDocumentContract {
165            id: self.id,
166            text: self.text,
167            language: self.language,
168            timestamp: self.timestamp.map(Into::into),
169            attributes: BTreeMap::new(),
170            source: None,
171            provenance: Vec::new(),
172            annotations: Vec::new(),
173        }
174    }
175}
176
177impl IntoTextDocumentContract for &OwnedTextDocument {
178    fn into_text_document_contract(self) -> TextDocumentContract {
179        self.as_document().into_text_document_contract()
180    }
181}
182
183impl From<TextDocument<'_>> for TextDocumentContract {
184    fn from(value: TextDocument<'_>) -> Self {
185        value.into_text_document_contract()
186    }
187}
188
189impl From<OwnedTextDocument> for TextDocumentContract {
190    fn from(value: OwnedTextDocument) -> Self {
191        value.into_text_document_contract()
192    }
193}
194
195#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
196#[serde(rename_all = "camelCase")]
197pub struct TextSegmentContract {
198    #[serde(default)]
199    pub stream_id: Option<String>,
200    pub segment_index: u64,
201    pub text: String,
202    #[serde(default)]
203    pub language: Option<String>,
204    #[serde(default)]
205    pub timestamp: Option<TimestampContract>,
206    #[serde(default)]
207    pub duration_seconds: Option<f64>,
208    pub is_final: bool,
209    #[serde(default)]
210    pub attributes: BTreeMap<String, String>,
211    #[serde(default)]
212    pub source: Option<TextSourceRef>,
213    #[serde(default)]
214    pub provenance: Vec<TextProvenance>,
215    #[serde(default)]
216    pub annotations: Vec<TextAnnotationSpan>,
217}
218
219impl TextSegmentContract {
220    pub fn new(segment_index: u64, text: impl Into<String>) -> Self {
221        Self {
222            stream_id: None,
223            segment_index,
224            text: text.into(),
225            language: None,
226            timestamp: None,
227            duration_seconds: None,
228            is_final: true,
229            attributes: BTreeMap::new(),
230            source: None,
231            provenance: Vec::new(),
232            annotations: Vec::new(),
233        }
234    }
235
236    pub fn document_id(&self) -> Option<String> {
237        self.stream_id
238            .as_deref()
239            .map(|stream_id| segment_document_id(stream_id, self.segment_index))
240    }
241
242    pub fn to_owned_text_segment(&self) -> OwnedTextSegment {
243        let mut segment =
244            OwnedTextSegment::new(self.segment_index, self.text.clone()).finality(self.is_final);
245        if let Some(language) = &self.language {
246            segment = segment.language(language.clone());
247        }
248        if let Some(timestamp) = self.timestamp {
249            segment = segment.timestamp(timestamp.into());
250        }
251        segment
252    }
253
254    pub fn to_text_document_contract(&self) -> TextDocumentContract {
255        TextDocumentContract {
256            id: self
257                .document_id()
258                .unwrap_or_else(|| self.segment_index.to_string()),
259            text: self.text.clone(),
260            language: self.language.clone(),
261            timestamp: self.timestamp,
262            attributes: self.attributes.clone(),
263            source: self.source.clone().or_else(|| {
264                (self.timestamp.is_some() || self.duration_seconds.is_some()).then(|| {
265                    TextSourceRef {
266                        source_id: self.stream_id.clone(),
267                        source_kind: Some("text_segment".to_string()),
268                        uri: None,
269                        media_timestamp: self.timestamp,
270                        duration_seconds: self.duration_seconds,
271                    }
272                })
273            }),
274            provenance: self.provenance.clone(),
275            annotations: self.annotations.clone(),
276        }
277    }
278
279    pub fn from_document_contract(document: &TextDocumentContract, segment_index: u64) -> Self {
280        Self {
281            stream_id: None,
282            segment_index,
283            text: document.text.clone(),
284            language: document.language.clone(),
285            timestamp: document.timestamp.or_else(|| {
286                document
287                    .source
288                    .as_ref()
289                    .and_then(|source| source.media_timestamp)
290            }),
291            duration_seconds: document
292                .source
293                .as_ref()
294                .and_then(|source| source.duration_seconds),
295            is_final: true,
296            attributes: document.attributes.clone(),
297            source: document.source.clone(),
298            provenance: document.provenance.clone(),
299            annotations: document.annotations.clone(),
300        }
301    }
302}
303
304pub trait AsTextSegmentContract {
305    fn as_text_segment_contract(&self) -> TextSegmentContract;
306}
307
308impl AsTextSegmentContract for TextSegment<'_> {
309    fn as_text_segment_contract(&self) -> TextSegmentContract {
310        TextSegmentContract {
311            stream_id: None,
312            segment_index: self.segment_index,
313            text: self.text.to_string(),
314            language: self.language.map(ToString::to_string),
315            timestamp: self.timestamp.map(Into::into),
316            duration_seconds: None,
317            is_final: self.is_final,
318            attributes: BTreeMap::new(),
319            source: None,
320            provenance: Vec::new(),
321            annotations: Vec::new(),
322        }
323    }
324}
325
326impl AsTextSegmentContract for OwnedTextSegment {
327    fn as_text_segment_contract(&self) -> TextSegmentContract {
328        self.as_segment().as_text_segment_contract()
329    }
330}
331
332impl From<TextSegment<'_>> for TextSegmentContract {
333    fn from(value: TextSegment<'_>) -> Self {
334        value.as_text_segment_contract()
335    }
336}
337
338impl From<OwnedTextSegment> for TextSegmentContract {
339    fn from(value: OwnedTextSegment) -> Self {
340        value.as_text_segment_contract()
341    }
342}
343
344#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
345#[serde(rename_all = "camelCase")]
346pub struct TextStatisticsRequest {
347    pub text: String,
348}
349
350#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
351#[serde(rename_all = "camelCase")]
352pub struct TextStatisticsResult {
353    pub byte_count: usize,
354    pub character_count: usize,
355    pub word_count: usize,
356    pub line_count: usize,
357    pub sentence_count: usize,
358}
359
360pub fn text_statistics_metadata() -> OperationMetadata {
361    OperationMetadata {
362        id: OperationId::new("text.statistics"),
363        name: "Text statistics".to_string(),
364        description: Some("Counts bytes, characters, words, lines, and sentences.".to_string()),
365        version: env!("CARGO_PKG_VERSION").to_string(),
366        capabilities: RuntimeCapabilities {
367            native: true,
368            server: true,
369            wasm: true,
370            mobile: MobileCapability::Wasm,
371            requirements: Vec::new(),
372            max_recommended_input_bytes: Some(1_000_000),
373        },
374    }
375}