Skip to main content

text_transcripts/
surface.rs

1//! Library-owned runtime surface for `text-transcripts`.
2
3use runtime_core::{
4    structured_surface_value, OperationId, PackageSurface, RuntimeCapabilities, SurfaceOperation,
5    SurfaceRequest, SurfaceResponse,
6};
7use serde::Deserialize;
8use text_core::TextSegmentContract;
9
10use crate::{
11    format_srt, format_webvtt, normalize_transcription_contract, parse_plain_lines, parse_srt,
12    parse_webvtt, parse_whisper_json, parse_whisperx_json, TranscriptionContract,
13    TranscriptionResult,
14};
15
16/// Returns the package surface exposed by every transport wrapper.
17pub fn package_surface() -> PackageSurface {
18    PackageSurface {
19        library: env!("CARGO_PKG_NAME").to_string(),
20        version: env!("CARGO_PKG_VERSION").to_string(),
21        capabilities: RuntimeCapabilities::pure_rust(),
22        operations: vec![
23            operation(
24                "describe",
25                "Inspect package metadata",
26                "Transcript parsing and ASR command adapters for video-analysis.",
27                serde_json::json!({"includeOperations": true}),
28            ),
29            operation(
30                "transcripts.parse",
31                "Parse transcript",
32                "Parses plain text, Whisper JSON, SRT, or WebVTT into the transcript contract.",
33                serde_json::json!({"format": "srt", "content": "1\n00:00:01,000 --> 00:00:02,000\nHello.\n"}),
34            ),
35            operation(
36                "transcripts.normalize",
37                "Normalize transcript",
38                "Normalizes transcript contract text, segments, words, and confidence.",
39                serde_json::json!({"segments": [{"index": 0, "text": " hello ", "isFinal": true}]}),
40            ),
41            operation(
42                "transcripts.importWhisperX",
43                "Import WhisperX JSON",
44                "Parses existing WhisperX JSON output into the normalized transcript contract without running external tools.",
45                serde_json::json!({"content": "{\"segments\":[{\"start\":0.0,\"end\":1.0,\"text\":\"Hello.\",\"words\":[{\"word\":\"Hello\",\"start\":0.0,\"end\":0.8,\"score\":0.9}]}]}"}),
46            ),
47            operation(
48                "transcripts.formatSrt",
49                "Format SRT",
50                "Formats a transcript contract as SRT text.",
51                serde_json::json!({"segments": [{"index": 0, "startSeconds": 1.0, "endSeconds": 2.0, "text": "Hello.", "isFinal": true}]}),
52            ),
53            operation(
54                "transcripts.formatWebVtt",
55                "Format WebVTT",
56                "Formats a transcript contract as WebVTT text.",
57                serde_json::json!({"segments": [{"index": 0, "startSeconds": 1.0, "endSeconds": 2.0, "text": "Hello.", "isFinal": true}]}),
58            ),
59            operation(
60                "transcripts.toTextSegments",
61                "Convert to text segments",
62                "Converts transcript segments into shared text segment contracts and document records.",
63                serde_json::json!({"streamId": "transcript-1", "segments": [{"index": 0, "startSeconds": 1.0, "endSeconds": 2.0, "text": "Hello.", "language": "en", "speaker": "A", "confidence": 0.9, "isFinal": true}]}),
64            ),
65        ],
66    }
67}
68
69fn operation(
70    id: &str,
71    name: &str,
72    description: &str,
73    example_request: serde_json::Value,
74) -> SurfaceOperation {
75    let mut operation = runtime_core::surface_operation(id, name, description, example_request);
76    if id == "transcripts.toTextSegments" {
77        runtime_core::attach_landscape_contract(
78            &mut operation,
79            runtime_core::landscape::LandscapeOperationContract::new(
80                runtime_core::landscape::LandscapeFunction::new(
81                    "text.transcripts.toTextSegments",
82                    env!("CARGO_PKG_NAME"),
83                )
84                .input(
85                    runtime_core::landscape::LandscapePort::new(
86                        "segments",
87                        runtime_core::landscape::well_known::text_transcript_segment(),
88                    )
89                    .many(),
90                )
91                .output(
92                    runtime_core::landscape::LandscapePort::new(
93                        "segments",
94                        runtime_core::landscape::well_known::text_segment(),
95                    )
96                    .many(),
97                )
98                .output(
99                    runtime_core::landscape::LandscapePort::new(
100                        "documents",
101                        runtime_core::landscape::well_known::text_document(),
102                    )
103                    .many(),
104                ),
105            ),
106        );
107    }
108    operation
109}
110
111/// Runs one library-owned operation.
112pub fn run_surface_operation(request: SurfaceRequest) -> Result<SurfaceResponse, String> {
113    let operation = request.operation.clone();
114    let value = match request.operation.as_str() {
115        "describe" => describe_value(request.input),
116        "transcripts.parse" => parse_value(parse_input(request.input)?)?,
117        "transcripts.normalize" => normalize_value(parse_input(request.input)?)?,
118        "transcripts.importWhisperX" => import_whisperx_value(parse_input(request.input)?)?,
119        "transcripts.formatSrt" => format_srt_value(parse_input(request.input)?)?,
120        "transcripts.formatWebVtt" => format_webvtt_value(parse_input(request.input)?)?,
121        "transcripts.toTextSegments" => to_text_segments_value(parse_input(request.input)?)?,
122        operation => {
123            return Err(runtime_core::SurfaceError::unsupported_operation(
124                operation,
125                env!("CARGO_PKG_NAME"),
126            )
127            .to_error_string())
128        }
129    };
130    let value = annotated_value(&operation, value);
131    Ok(SurfaceResponse {
132        operation,
133        value,
134        diagnostics: Vec::new(),
135        artifacts: Vec::new(),
136    })
137}
138
139fn describe_value(input: serde_json::Value) -> serde_json::Value {
140    let surface = package_surface();
141    serde_json::json!({
142        "library": surface.library,
143        "version": surface.version,
144        "operationCount": surface.operations.len(),
145        "operations": surface.operations.iter().map(|operation| operation.id.as_str()).collect::<Vec<_>>(),
146        "input": input
147    })
148}
149
150fn annotated_value(operation: &OperationId, value: serde_json::Value) -> serde_json::Value {
151    let (title, message, summary) = match operation.as_str() {
152        "describe" => (
153            "Package surface metadata",
154            "Inspected the text-transcripts package operations and runtime support.",
155            serde_json::json!({
156                "status": "ok",
157                "operationCount": value["operationCount"]
158            }),
159        ),
160        "transcripts.parse" => (
161            "Transcript parse result",
162            "Parsed transcript content into the normalized transcript contract.",
163            serde_json::json!({
164                "status": "ok",
165                "segmentCount": value["segments"].as_array().map(Vec::len).unwrap_or(0),
166                "hasText": value["text"].as_str().map(|text| !text.is_empty()).unwrap_or(false)
167            }),
168        ),
169        "transcripts.normalize" => (
170            "Transcript normalization result",
171            "Normalized transcript contract text, segments, words, and confidence values.",
172            serde_json::json!({
173                "status": "ok",
174                "segmentCount": value["segments"].as_array().map(Vec::len).unwrap_or(0),
175                "hasText": value["text"].as_str().map(|text| !text.is_empty()).unwrap_or(false)
176            }),
177        ),
178        "transcripts.importWhisperX" => (
179            "WhisperX import result",
180            "Imported WhisperX JSON into the normalized transcript contract.",
181            serde_json::json!({
182                "status": "ok",
183                "segmentCount": value["segments"].as_array().map(Vec::len).unwrap_or(0),
184                "hasText": value["text"].as_str().map(|text| !text.is_empty()).unwrap_or(false)
185            }),
186        ),
187        "transcripts.formatSrt" => (
188            "SRT formatting result",
189            "Formatted a normalized transcript contract as SRT text.",
190            serde_json::json!({
191                "status": "ok",
192                "bytes": value["srt"].as_str().map(str::len).unwrap_or(0)
193            }),
194        ),
195        "transcripts.formatWebVtt" => (
196            "WebVTT formatting result",
197            "Formatted a normalized transcript contract as WebVTT text.",
198            serde_json::json!({
199                "status": "ok",
200                "bytes": value["webVtt"].as_str().map(str::len).unwrap_or(0)
201            }),
202        ),
203        "transcripts.toTextSegments" => (
204            "Text segment conversion result",
205            "Converted normalized transcript segments into shared text segment and document contracts.",
206            serde_json::json!({
207                "status": "ok",
208                "segmentCount": value["segments"].as_array().map(Vec::len).unwrap_or(0),
209                "documentCount": value["documents"].as_array().map(Vec::len).unwrap_or(0),
210                "streamId": value["streamId"]
211            }),
212        ),
213        _ => (
214            "Transcript result",
215            "Ran a text-transcripts package operation.",
216            serde_json::json!({"status": "ok"}),
217        ),
218    };
219    structured_surface_value(operation, title, message, summary, value)
220}
221
222#[derive(Debug, Deserialize)]
223#[serde(rename_all = "camelCase")]
224struct ParseRequest {
225    format: String,
226    content: String,
227}
228
229#[derive(Debug, Deserialize)]
230#[serde(rename_all = "camelCase")]
231struct ImportContentRequest {
232    content: String,
233}
234
235#[derive(Debug, Deserialize)]
236#[serde(rename_all = "camelCase")]
237struct ToTextSegmentsRequest {
238    stream_id: Option<String>,
239    #[serde(flatten)]
240    contract: TranscriptionContract,
241}
242
243fn parse_value(request: ParseRequest) -> Result<serde_json::Value, String> {
244    let result = match request.format.as_str() {
245        "plain" | "lines" => parse_plain_lines(&request.content),
246        "whisperJson" | "whisper_json" | "whisper-json" => {
247            parse_whisper_json(request.content.as_bytes()).map_err(|error| error.to_string())?
248        }
249        "srt" => parse_srt(&request.content).map_err(|error| error.to_string())?,
250        "webVtt" | "webvtt" | "web-vtt" => {
251            parse_webvtt(&request.content).map_err(|error| error.to_string())?
252        }
253        other => return Err(format!("unsupported transcript format `{other}`")),
254    };
255    let mut contract = TranscriptionContract::from(result)
256        .normalized()
257        .map_err(|error| error.to_string())?;
258    let joined = contract.joined_text();
259    if !joined.is_empty() {
260        contract.text = Some(joined);
261    }
262    Ok(serde_json::json!(contract))
263}
264
265fn normalize_value(contract: TranscriptionContract) -> Result<serde_json::Value, String> {
266    Ok(serde_json::json!(normalize_transcription_contract(
267        contract
268    )
269    .map_err(|error| error.to_string())?))
270}
271
272fn import_whisperx_value(request: ImportContentRequest) -> Result<serde_json::Value, String> {
273    Ok(serde_json::json!(parse_whisperx_json(
274        request.content.as_bytes()
275    )
276    .map_err(|error| error.to_string())?))
277}
278
279fn format_srt_value(contract: TranscriptionContract) -> Result<serde_json::Value, String> {
280    let normalized = contract.normalized().map_err(|error| error.to_string())?;
281    let result = TranscriptionResult::from(normalized);
282    Ok(serde_json::json!({ "srt": format_srt(&result.segments) }))
283}
284
285fn format_webvtt_value(contract: TranscriptionContract) -> Result<serde_json::Value, String> {
286    let normalized = contract.normalized().map_err(|error| error.to_string())?;
287    let result = TranscriptionResult::from(normalized);
288    Ok(serde_json::json!({ "webVtt": format_webvtt(&result.segments) }))
289}
290
291fn to_text_segments_value(request: ToTextSegmentsRequest) -> Result<serde_json::Value, String> {
292    let normalized = request
293        .contract
294        .normalized()
295        .map_err(|error| error.to_string())?;
296    let segments = normalized
297        .segments
298        .iter()
299        .map(|segment| {
300            let mut text_segment = TextSegmentContract::from(segment);
301            if let Some(stream_id) = &request.stream_id {
302                text_segment.stream_id = Some(stream_id.clone());
303            }
304            text_segment
305        })
306        .collect::<Vec<_>>();
307    let documents = segments
308        .iter()
309        .filter_map(|segment| {
310            segment.document_id().map(|id| {
311                serde_json::json!({
312                    "id": id,
313                    "text": segment.text
314                })
315            })
316        })
317        .collect::<Vec<_>>();
318    Ok(serde_json::json!({
319        "streamId": request.stream_id,
320        "segments": segments,
321        "documents": documents
322    }))
323}
324
325fn parse_input<T: for<'de> Deserialize<'de>>(input: serde_json::Value) -> Result<T, String> {
326    runtime_core::parse_surface_input(None, input)
327}
328
329#[cfg(test)]
330mod tests {
331    use super::*;
332
333    #[test]
334    fn package_surface_lists_transcript_operations() {
335        let ids = package_surface()
336            .operations
337            .into_iter()
338            .map(|operation| operation.id.0)
339            .collect::<Vec<_>>();
340        assert!(ids.contains(&"transcripts.parse".to_string()));
341        assert!(ids.contains(&"transcripts.importWhisperX".to_string()));
342        assert!(ids.contains(&"transcripts.formatSrt".to_string()));
343        assert!(ids.contains(&"transcripts.formatWebVtt".to_string()));
344        assert!(ids.contains(&"transcripts.toTextSegments".to_string()));
345    }
346
347    #[test]
348    fn parse_operation_normalizes_plain_lines() {
349        let response = run_surface_operation(SurfaceRequest {
350            operation: OperationId::new("transcripts.parse"),
351            input: serde_json::json!({"format": "lines", "content": "hello\n\nworld\n"}),
352        })
353        .expect("parse");
354        assert_eq!(response.value["text"], "hello world");
355        assert_eq!(response.value["segments"].as_array().unwrap().len(), 2);
356    }
357
358    #[test]
359    fn malformed_input_returns_typed_error_string() {
360        let error = run_surface_operation(SurfaceRequest {
361            operation: OperationId::new("transcripts.parse"),
362            input: serde_json::json!({"format": "srt"}),
363        })
364        .expect_err("invalid request");
365        assert!(error.contains("invalid request"));
366    }
367
368    #[test]
369    fn format_webvtt_operation_returns_cue_text() {
370        let response = run_surface_operation(SurfaceRequest {
371            operation: OperationId::new("transcripts.formatWebVtt"),
372            input: serde_json::json!({"segments": [{"index": 0, "startSeconds": 1.0, "endSeconds": 2.0, "text": "Hello.", "isFinal": true}]}),
373        })
374        .expect("format webvtt");
375        let webvtt = response.value["result"]["webVtt"].as_str().unwrap();
376        assert!(webvtt.starts_with("WEBVTT"));
377        assert!(webvtt.contains("Hello."));
378    }
379
380    #[test]
381    fn to_text_segments_sets_stream_and_documents() {
382        let response = run_surface_operation(SurfaceRequest {
383            operation: OperationId::new("transcripts.toTextSegments"),
384            input: serde_json::json!({
385                "streamId": "transcript-1",
386                "segments": [{
387                    "index": 0,
388                    "startSeconds": 1.0,
389                    "endSeconds": 2.0,
390                    "text": "Hello.",
391                    "language": "en",
392                    "speaker": "A",
393                    "confidence": 0.9,
394                    "isFinal": true
395                }]
396            }),
397        })
398        .expect("to text segments");
399        assert_eq!(
400            response.value["result"]["segments"][0]["streamId"],
401            "transcript-1"
402        );
403        assert_eq!(
404            response.value["result"]["documents"][0]["id"],
405            "transcript-1:0"
406        );
407    }
408}