1use std::collections::BTreeMap;
2
3use runtime_core::{MobileCapability, OperationId, OperationMetadata, RuntimeCapabilities};
4use serde::{Deserialize, Serialize};
5use video_analysis_core::{OwnedTextSegment, TextSegment, Timebase, Timestamp};
6
7use crate::{segment_document_id, OwnedTextDocument, TextDocument, TextSpan};
8
9#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
10#[serde(rename_all = "camelCase")]
11pub struct TimebaseContract {
12 pub num: i32,
13 pub den: i32,
14}
15
16impl From<Timebase> for TimebaseContract {
17 fn from(value: Timebase) -> Self {
18 Self {
19 num: value.num,
20 den: value.den,
21 }
22 }
23}
24
25impl From<TimebaseContract> for Timebase {
26 fn from(value: TimebaseContract) -> Self {
27 Self::new(value.num, value.den)
28 }
29}
30
31#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
32#[serde(rename_all = "camelCase")]
33pub struct TimestampContract {
34 pub pts: i64,
35 pub timebase: TimebaseContract,
36}
37
38impl TimestampContract {
39 pub fn seconds(self) -> f64 {
40 Timestamp::from(self).seconds()
41 }
42}
43
44impl From<Timestamp> for TimestampContract {
45 fn from(value: Timestamp) -> Self {
46 Self {
47 pts: value.pts,
48 timebase: value.timebase.into(),
49 }
50 }
51}
52
53impl From<TimestampContract> for Timestamp {
54 fn from(value: TimestampContract) -> Self {
55 Self::new(value.pts, value.timebase.into())
56 }
57}
58
59#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
60#[serde(rename_all = "camelCase")]
61pub struct TextSourceRef {
62 #[serde(default)]
63 pub source_id: Option<String>,
64 #[serde(default)]
65 pub source_kind: Option<String>,
66 #[serde(default)]
67 pub uri: Option<String>,
68 #[serde(default)]
69 pub media_timestamp: Option<TimestampContract>,
70 #[serde(default)]
71 pub duration_seconds: Option<f64>,
72}
73
74#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
75#[serde(rename_all = "camelCase")]
76pub struct TextProvenance {
77 #[serde(default)]
78 pub crate_name: Option<String>,
79 #[serde(default)]
80 pub operation: Option<String>,
81 #[serde(default)]
82 pub model_id: Option<String>,
83 #[serde(default)]
84 pub runtime: Option<String>,
85 #[serde(default)]
86 pub confidence: Option<f32>,
87}
88
89#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
90#[serde(rename_all = "camelCase")]
91pub struct TextAnnotationSpan {
92 pub span: TextSpan,
93 #[serde(default)]
94 pub token_start: Option<usize>,
95 #[serde(default)]
96 pub token_end: Option<usize>,
97 #[serde(default)]
98 pub source_segment_id: Option<String>,
99}
100
101#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
102#[serde(rename_all = "camelCase")]
103pub struct TextDocumentContract {
104 pub id: String,
105 pub text: String,
106 #[serde(default)]
107 pub language: Option<String>,
108 #[serde(default)]
109 pub timestamp: Option<TimestampContract>,
110 #[serde(default)]
111 pub attributes: BTreeMap<String, String>,
112 #[serde(default)]
113 pub source: Option<TextSourceRef>,
114 #[serde(default)]
115 pub provenance: Vec<TextProvenance>,
116 #[serde(default)]
117 pub annotations: Vec<TextAnnotationSpan>,
118}
119
120impl TextDocumentContract {
121 pub fn new(id: impl Into<String>, text: impl Into<String>) -> Self {
122 Self {
123 id: id.into(),
124 text: text.into(),
125 language: None,
126 timestamp: None,
127 attributes: BTreeMap::new(),
128 source: None,
129 provenance: Vec::new(),
130 annotations: Vec::new(),
131 }
132 }
133
134 pub fn from_segment_contract(segment: &TextSegmentContract) -> Self {
135 segment.to_text_document_contract()
136 }
137
138 pub fn to_text_segment_contract(&self, segment_index: u64) -> TextSegmentContract {
139 TextSegmentContract::from_document_contract(self, segment_index)
140 }
141}
142
143pub trait IntoTextDocumentContract {
144 fn into_text_document_contract(self) -> TextDocumentContract;
145}
146
147impl IntoTextDocumentContract for TextDocument<'_> {
148 fn into_text_document_contract(self) -> TextDocumentContract {
149 TextDocumentContract {
150 id: self.id.to_string(),
151 text: self.text.to_string(),
152 language: self.language.map(ToString::to_string),
153 timestamp: self.timestamp.map(Into::into),
154 attributes: BTreeMap::new(),
155 source: None,
156 provenance: Vec::new(),
157 annotations: Vec::new(),
158 }
159 }
160}
161
162impl IntoTextDocumentContract for OwnedTextDocument {
163 fn into_text_document_contract(self) -> TextDocumentContract {
164 TextDocumentContract {
165 id: self.id,
166 text: self.text,
167 language: self.language,
168 timestamp: self.timestamp.map(Into::into),
169 attributes: BTreeMap::new(),
170 source: None,
171 provenance: Vec::new(),
172 annotations: Vec::new(),
173 }
174 }
175}
176
177impl IntoTextDocumentContract for &OwnedTextDocument {
178 fn into_text_document_contract(self) -> TextDocumentContract {
179 self.as_document().into_text_document_contract()
180 }
181}
182
183impl From<TextDocument<'_>> for TextDocumentContract {
184 fn from(value: TextDocument<'_>) -> Self {
185 value.into_text_document_contract()
186 }
187}
188
189impl From<OwnedTextDocument> for TextDocumentContract {
190 fn from(value: OwnedTextDocument) -> Self {
191 value.into_text_document_contract()
192 }
193}
194
195#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
196#[serde(rename_all = "camelCase")]
197pub struct TextSegmentContract {
198 #[serde(default)]
199 pub stream_id: Option<String>,
200 pub segment_index: u64,
201 pub text: String,
202 #[serde(default)]
203 pub language: Option<String>,
204 #[serde(default)]
205 pub timestamp: Option<TimestampContract>,
206 #[serde(default)]
207 pub duration_seconds: Option<f64>,
208 pub is_final: bool,
209 #[serde(default)]
210 pub attributes: BTreeMap<String, String>,
211 #[serde(default)]
212 pub source: Option<TextSourceRef>,
213 #[serde(default)]
214 pub provenance: Vec<TextProvenance>,
215 #[serde(default)]
216 pub annotations: Vec<TextAnnotationSpan>,
217}
218
219impl TextSegmentContract {
220 pub fn new(segment_index: u64, text: impl Into<String>) -> Self {
221 Self {
222 stream_id: None,
223 segment_index,
224 text: text.into(),
225 language: None,
226 timestamp: None,
227 duration_seconds: None,
228 is_final: true,
229 attributes: BTreeMap::new(),
230 source: None,
231 provenance: Vec::new(),
232 annotations: Vec::new(),
233 }
234 }
235
236 pub fn document_id(&self) -> Option<String> {
237 self.stream_id
238 .as_deref()
239 .map(|stream_id| segment_document_id(stream_id, self.segment_index))
240 }
241
242 pub fn to_owned_text_segment(&self) -> OwnedTextSegment {
243 let mut segment =
244 OwnedTextSegment::new(self.segment_index, self.text.clone()).finality(self.is_final);
245 if let Some(language) = &self.language {
246 segment = segment.language(language.clone());
247 }
248 if let Some(timestamp) = self.timestamp {
249 segment = segment.timestamp(timestamp.into());
250 }
251 segment
252 }
253
254 pub fn to_text_document_contract(&self) -> TextDocumentContract {
255 TextDocumentContract {
256 id: self
257 .document_id()
258 .unwrap_or_else(|| self.segment_index.to_string()),
259 text: self.text.clone(),
260 language: self.language.clone(),
261 timestamp: self.timestamp,
262 attributes: self.attributes.clone(),
263 source: self.source.clone().or_else(|| {
264 (self.timestamp.is_some() || self.duration_seconds.is_some()).then(|| {
265 TextSourceRef {
266 source_id: self.stream_id.clone(),
267 source_kind: Some("text_segment".to_string()),
268 uri: None,
269 media_timestamp: self.timestamp,
270 duration_seconds: self.duration_seconds,
271 }
272 })
273 }),
274 provenance: self.provenance.clone(),
275 annotations: self.annotations.clone(),
276 }
277 }
278
279 pub fn from_document_contract(document: &TextDocumentContract, segment_index: u64) -> Self {
280 Self {
281 stream_id: None,
282 segment_index,
283 text: document.text.clone(),
284 language: document.language.clone(),
285 timestamp: document.timestamp.or_else(|| {
286 document
287 .source
288 .as_ref()
289 .and_then(|source| source.media_timestamp)
290 }),
291 duration_seconds: document
292 .source
293 .as_ref()
294 .and_then(|source| source.duration_seconds),
295 is_final: true,
296 attributes: document.attributes.clone(),
297 source: document.source.clone(),
298 provenance: document.provenance.clone(),
299 annotations: document.annotations.clone(),
300 }
301 }
302}
303
304pub trait AsTextSegmentContract {
305 fn as_text_segment_contract(&self) -> TextSegmentContract;
306}
307
308impl AsTextSegmentContract for TextSegment<'_> {
309 fn as_text_segment_contract(&self) -> TextSegmentContract {
310 TextSegmentContract {
311 stream_id: None,
312 segment_index: self.segment_index,
313 text: self.text.to_string(),
314 language: self.language.map(ToString::to_string),
315 timestamp: self.timestamp.map(Into::into),
316 duration_seconds: None,
317 is_final: self.is_final,
318 attributes: BTreeMap::new(),
319 source: None,
320 provenance: Vec::new(),
321 annotations: Vec::new(),
322 }
323 }
324}
325
326impl AsTextSegmentContract for OwnedTextSegment {
327 fn as_text_segment_contract(&self) -> TextSegmentContract {
328 self.as_segment().as_text_segment_contract()
329 }
330}
331
332impl From<TextSegment<'_>> for TextSegmentContract {
333 fn from(value: TextSegment<'_>) -> Self {
334 value.as_text_segment_contract()
335 }
336}
337
338impl From<OwnedTextSegment> for TextSegmentContract {
339 fn from(value: OwnedTextSegment) -> Self {
340 value.as_text_segment_contract()
341 }
342}
343
344#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
345#[serde(rename_all = "camelCase")]
346pub struct TextStatisticsRequest {
347 pub text: String,
348}
349
350#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
351#[serde(rename_all = "camelCase")]
352pub struct TextStatisticsResult {
353 pub byte_count: usize,
354 pub character_count: usize,
355 pub word_count: usize,
356 pub line_count: usize,
357 pub sentence_count: usize,
358}
359
360pub fn text_statistics_metadata() -> OperationMetadata {
361 OperationMetadata {
362 id: OperationId::new("text.statistics"),
363 name: "Text statistics".to_string(),
364 description: Some("Counts bytes, characters, words, lines, and sentences.".to_string()),
365 version: env!("CARGO_PKG_VERSION").to_string(),
366 capabilities: RuntimeCapabilities {
367 native: true,
368 server: true,
369 wasm: true,
370 mobile: MobileCapability::Wasm,
371 requirements: Vec::new(),
372 max_recommended_input_bytes: Some(1_000_000),
373 },
374 }
375}