1use runtime_core::{
4 structured_surface_value, OperationId, PackageSurface, RuntimeCapabilities, SurfaceOperation,
5 SurfaceRequest, SurfaceResponse,
6};
7use serde::Deserialize;
8use text_core::TextSegmentContract;
9
10use crate::{
11 format_srt, format_webvtt, normalize_transcription_contract, parse_plain_lines, parse_srt,
12 parse_webvtt, parse_whisper_json, parse_whisperx_json, TranscriptionContract,
13 TranscriptionResult,
14};
15
16pub fn package_surface() -> PackageSurface {
18 PackageSurface {
19 library: env!("CARGO_PKG_NAME").to_string(),
20 version: env!("CARGO_PKG_VERSION").to_string(),
21 capabilities: RuntimeCapabilities::pure_rust(),
22 operations: vec![
23 operation(
24 "describe",
25 "Inspect package metadata",
26 "Transcript parsing and ASR command adapters for video-analysis.",
27 serde_json::json!({"includeOperations": true}),
28 ),
29 operation(
30 "transcripts.parse",
31 "Parse transcript",
32 "Parses plain text, Whisper JSON, SRT, or WebVTT into the transcript contract.",
33 serde_json::json!({"format": "srt", "content": "1\n00:00:01,000 --> 00:00:02,000\nHello.\n"}),
34 ),
35 operation(
36 "transcripts.normalize",
37 "Normalize transcript",
38 "Normalizes transcript contract text, segments, words, and confidence.",
39 serde_json::json!({"segments": [{"index": 0, "text": " hello ", "isFinal": true}]}),
40 ),
41 operation(
42 "transcripts.importWhisperX",
43 "Import WhisperX JSON",
44 "Parses existing WhisperX JSON output into the normalized transcript contract without running external tools.",
45 serde_json::json!({"content": "{\"segments\":[{\"start\":0.0,\"end\":1.0,\"text\":\"Hello.\",\"words\":[{\"word\":\"Hello\",\"start\":0.0,\"end\":0.8,\"score\":0.9}]}]}"}),
46 ),
47 operation(
48 "transcripts.formatSrt",
49 "Format SRT",
50 "Formats a transcript contract as SRT text.",
51 serde_json::json!({"segments": [{"index": 0, "startSeconds": 1.0, "endSeconds": 2.0, "text": "Hello.", "isFinal": true}]}),
52 ),
53 operation(
54 "transcripts.formatWebVtt",
55 "Format WebVTT",
56 "Formats a transcript contract as WebVTT text.",
57 serde_json::json!({"segments": [{"index": 0, "startSeconds": 1.0, "endSeconds": 2.0, "text": "Hello.", "isFinal": true}]}),
58 ),
59 operation(
60 "transcripts.toTextSegments",
61 "Convert to text segments",
62 "Converts transcript segments into shared text segment contracts and document records.",
63 serde_json::json!({"streamId": "transcript-1", "segments": [{"index": 0, "startSeconds": 1.0, "endSeconds": 2.0, "text": "Hello.", "language": "en", "speaker": "A", "confidence": 0.9, "isFinal": true}]}),
64 ),
65 ],
66 }
67}
68
69fn operation(
70 id: &str,
71 name: &str,
72 description: &str,
73 example_request: serde_json::Value,
74) -> SurfaceOperation {
75 let mut operation = runtime_core::surface_operation(id, name, description, example_request);
76 if id == "transcripts.toTextSegments" {
77 runtime_core::attach_landscape_contract(
78 &mut operation,
79 runtime_core::landscape::LandscapeOperationContract::new(
80 runtime_core::landscape::LandscapeFunction::new(
81 "text.transcripts.toTextSegments",
82 env!("CARGO_PKG_NAME"),
83 )
84 .input(
85 runtime_core::landscape::LandscapePort::new(
86 "segments",
87 runtime_core::landscape::well_known::text_transcript_segment(),
88 )
89 .many(),
90 )
91 .output(
92 runtime_core::landscape::LandscapePort::new(
93 "segments",
94 runtime_core::landscape::well_known::text_segment(),
95 )
96 .many(),
97 )
98 .output(
99 runtime_core::landscape::LandscapePort::new(
100 "documents",
101 runtime_core::landscape::well_known::text_document(),
102 )
103 .many(),
104 ),
105 ),
106 );
107 }
108 operation
109}
110
111pub fn run_surface_operation(request: SurfaceRequest) -> Result<SurfaceResponse, String> {
113 let operation = request.operation.clone();
114 let value = match request.operation.as_str() {
115 "describe" => describe_value(request.input),
116 "transcripts.parse" => parse_value(parse_input(request.input)?)?,
117 "transcripts.normalize" => normalize_value(parse_input(request.input)?)?,
118 "transcripts.importWhisperX" => import_whisperx_value(parse_input(request.input)?)?,
119 "transcripts.formatSrt" => format_srt_value(parse_input(request.input)?)?,
120 "transcripts.formatWebVtt" => format_webvtt_value(parse_input(request.input)?)?,
121 "transcripts.toTextSegments" => to_text_segments_value(parse_input(request.input)?)?,
122 operation => {
123 return Err(runtime_core::SurfaceError::unsupported_operation(
124 operation,
125 env!("CARGO_PKG_NAME"),
126 )
127 .to_error_string())
128 }
129 };
130 let value = annotated_value(&operation, value);
131 Ok(SurfaceResponse {
132 operation,
133 value,
134 diagnostics: Vec::new(),
135 artifacts: Vec::new(),
136 })
137}
138
139fn describe_value(input: serde_json::Value) -> serde_json::Value {
140 let surface = package_surface();
141 serde_json::json!({
142 "library": surface.library,
143 "version": surface.version,
144 "operationCount": surface.operations.len(),
145 "operations": surface.operations.iter().map(|operation| operation.id.as_str()).collect::<Vec<_>>(),
146 "input": input
147 })
148}
149
150fn annotated_value(operation: &OperationId, value: serde_json::Value) -> serde_json::Value {
151 let (title, message, summary) = match operation.as_str() {
152 "describe" => (
153 "Package surface metadata",
154 "Inspected the text-transcripts package operations and runtime support.",
155 serde_json::json!({
156 "status": "ok",
157 "operationCount": value["operationCount"]
158 }),
159 ),
160 "transcripts.parse" => (
161 "Transcript parse result",
162 "Parsed transcript content into the normalized transcript contract.",
163 serde_json::json!({
164 "status": "ok",
165 "segmentCount": value["segments"].as_array().map(Vec::len).unwrap_or(0),
166 "hasText": value["text"].as_str().map(|text| !text.is_empty()).unwrap_or(false)
167 }),
168 ),
169 "transcripts.normalize" => (
170 "Transcript normalization result",
171 "Normalized transcript contract text, segments, words, and confidence values.",
172 serde_json::json!({
173 "status": "ok",
174 "segmentCount": value["segments"].as_array().map(Vec::len).unwrap_or(0),
175 "hasText": value["text"].as_str().map(|text| !text.is_empty()).unwrap_or(false)
176 }),
177 ),
178 "transcripts.importWhisperX" => (
179 "WhisperX import result",
180 "Imported WhisperX JSON into the normalized transcript contract.",
181 serde_json::json!({
182 "status": "ok",
183 "segmentCount": value["segments"].as_array().map(Vec::len).unwrap_or(0),
184 "hasText": value["text"].as_str().map(|text| !text.is_empty()).unwrap_or(false)
185 }),
186 ),
187 "transcripts.formatSrt" => (
188 "SRT formatting result",
189 "Formatted a normalized transcript contract as SRT text.",
190 serde_json::json!({
191 "status": "ok",
192 "bytes": value["srt"].as_str().map(str::len).unwrap_or(0)
193 }),
194 ),
195 "transcripts.formatWebVtt" => (
196 "WebVTT formatting result",
197 "Formatted a normalized transcript contract as WebVTT text.",
198 serde_json::json!({
199 "status": "ok",
200 "bytes": value["webVtt"].as_str().map(str::len).unwrap_or(0)
201 }),
202 ),
203 "transcripts.toTextSegments" => (
204 "Text segment conversion result",
205 "Converted normalized transcript segments into shared text segment and document contracts.",
206 serde_json::json!({
207 "status": "ok",
208 "segmentCount": value["segments"].as_array().map(Vec::len).unwrap_or(0),
209 "documentCount": value["documents"].as_array().map(Vec::len).unwrap_or(0),
210 "streamId": value["streamId"]
211 }),
212 ),
213 _ => (
214 "Transcript result",
215 "Ran a text-transcripts package operation.",
216 serde_json::json!({"status": "ok"}),
217 ),
218 };
219 structured_surface_value(operation, title, message, summary, value)
220}
221
222#[derive(Debug, Deserialize)]
223#[serde(rename_all = "camelCase")]
224struct ParseRequest {
225 format: String,
226 content: String,
227}
228
229#[derive(Debug, Deserialize)]
230#[serde(rename_all = "camelCase")]
231struct ImportContentRequest {
232 content: String,
233}
234
235#[derive(Debug, Deserialize)]
236#[serde(rename_all = "camelCase")]
237struct ToTextSegmentsRequest {
238 stream_id: Option<String>,
239 #[serde(flatten)]
240 contract: TranscriptionContract,
241}
242
243fn parse_value(request: ParseRequest) -> Result<serde_json::Value, String> {
244 let result = match request.format.as_str() {
245 "plain" | "lines" => parse_plain_lines(&request.content),
246 "whisperJson" | "whisper_json" | "whisper-json" => {
247 parse_whisper_json(request.content.as_bytes()).map_err(|error| error.to_string())?
248 }
249 "srt" => parse_srt(&request.content).map_err(|error| error.to_string())?,
250 "webVtt" | "webvtt" | "web-vtt" => {
251 parse_webvtt(&request.content).map_err(|error| error.to_string())?
252 }
253 other => return Err(format!("unsupported transcript format `{other}`")),
254 };
255 let mut contract = TranscriptionContract::from(result)
256 .normalized()
257 .map_err(|error| error.to_string())?;
258 let joined = contract.joined_text();
259 if !joined.is_empty() {
260 contract.text = Some(joined);
261 }
262 Ok(serde_json::json!(contract))
263}
264
265fn normalize_value(contract: TranscriptionContract) -> Result<serde_json::Value, String> {
266 Ok(serde_json::json!(normalize_transcription_contract(
267 contract
268 )
269 .map_err(|error| error.to_string())?))
270}
271
272fn import_whisperx_value(request: ImportContentRequest) -> Result<serde_json::Value, String> {
273 Ok(serde_json::json!(parse_whisperx_json(
274 request.content.as_bytes()
275 )
276 .map_err(|error| error.to_string())?))
277}
278
279fn format_srt_value(contract: TranscriptionContract) -> Result<serde_json::Value, String> {
280 let normalized = contract.normalized().map_err(|error| error.to_string())?;
281 let result = TranscriptionResult::from(normalized);
282 Ok(serde_json::json!({ "srt": format_srt(&result.segments) }))
283}
284
285fn format_webvtt_value(contract: TranscriptionContract) -> Result<serde_json::Value, String> {
286 let normalized = contract.normalized().map_err(|error| error.to_string())?;
287 let result = TranscriptionResult::from(normalized);
288 Ok(serde_json::json!({ "webVtt": format_webvtt(&result.segments) }))
289}
290
291fn to_text_segments_value(request: ToTextSegmentsRequest) -> Result<serde_json::Value, String> {
292 let normalized = request
293 .contract
294 .normalized()
295 .map_err(|error| error.to_string())?;
296 let segments = normalized
297 .segments
298 .iter()
299 .map(|segment| {
300 let mut text_segment = TextSegmentContract::from(segment);
301 if let Some(stream_id) = &request.stream_id {
302 text_segment.stream_id = Some(stream_id.clone());
303 }
304 text_segment
305 })
306 .collect::<Vec<_>>();
307 let documents = segments
308 .iter()
309 .filter_map(|segment| {
310 segment.document_id().map(|id| {
311 serde_json::json!({
312 "id": id,
313 "text": segment.text
314 })
315 })
316 })
317 .collect::<Vec<_>>();
318 Ok(serde_json::json!({
319 "streamId": request.stream_id,
320 "segments": segments,
321 "documents": documents
322 }))
323}
324
325fn parse_input<T: for<'de> Deserialize<'de>>(input: serde_json::Value) -> Result<T, String> {
326 runtime_core::parse_surface_input(None, input)
327}
328
329#[cfg(test)]
330mod tests {
331 use super::*;
332
333 #[test]
334 fn package_surface_lists_transcript_operations() {
335 let ids = package_surface()
336 .operations
337 .into_iter()
338 .map(|operation| operation.id.0)
339 .collect::<Vec<_>>();
340 assert!(ids.contains(&"transcripts.parse".to_string()));
341 assert!(ids.contains(&"transcripts.importWhisperX".to_string()));
342 assert!(ids.contains(&"transcripts.formatSrt".to_string()));
343 assert!(ids.contains(&"transcripts.formatWebVtt".to_string()));
344 assert!(ids.contains(&"transcripts.toTextSegments".to_string()));
345 }
346
347 #[test]
348 fn parse_operation_normalizes_plain_lines() {
349 let response = run_surface_operation(SurfaceRequest {
350 operation: OperationId::new("transcripts.parse"),
351 input: serde_json::json!({"format": "lines", "content": "hello\n\nworld\n"}),
352 })
353 .expect("parse");
354 assert_eq!(response.value["text"], "hello world");
355 assert_eq!(response.value["segments"].as_array().unwrap().len(), 2);
356 }
357
358 #[test]
359 fn malformed_input_returns_typed_error_string() {
360 let error = run_surface_operation(SurfaceRequest {
361 operation: OperationId::new("transcripts.parse"),
362 input: serde_json::json!({"format": "srt"}),
363 })
364 .expect_err("invalid request");
365 assert!(error.contains("invalid request"));
366 }
367
368 #[test]
369 fn format_webvtt_operation_returns_cue_text() {
370 let response = run_surface_operation(SurfaceRequest {
371 operation: OperationId::new("transcripts.formatWebVtt"),
372 input: serde_json::json!({"segments": [{"index": 0, "startSeconds": 1.0, "endSeconds": 2.0, "text": "Hello.", "isFinal": true}]}),
373 })
374 .expect("format webvtt");
375 let webvtt = response.value["result"]["webVtt"].as_str().unwrap();
376 assert!(webvtt.starts_with("WEBVTT"));
377 assert!(webvtt.contains("Hello."));
378 }
379
380 #[test]
381 fn to_text_segments_sets_stream_and_documents() {
382 let response = run_surface_operation(SurfaceRequest {
383 operation: OperationId::new("transcripts.toTextSegments"),
384 input: serde_json::json!({
385 "streamId": "transcript-1",
386 "segments": [{
387 "index": 0,
388 "startSeconds": 1.0,
389 "endSeconds": 2.0,
390 "text": "Hello.",
391 "language": "en",
392 "speaker": "A",
393 "confidence": 0.9,
394 "isFinal": true
395 }]
396 }),
397 })
398 .expect("to text segments");
399 assert_eq!(
400 response.value["result"]["segments"][0]["streamId"],
401 "transcript-1"
402 );
403 assert_eq!(
404 response.value["result"]["documents"][0]["id"],
405 "transcript-1:0"
406 );
407 }
408}