oximedia_caption_gen/lib.rs
1//! # oximedia-caption-gen
2//!
3//! Advanced caption and subtitle generation for the OxiMedia Sovereign Media
4//! Framework.
5//!
6//! This crate provides speech-to-caption alignment with frame-accurate timing,
7//! greedy and optimal (Knuth-Plass DP) line-breaking algorithms, WCAG 2.1
8//! accessibility compliance checking, and speaker diarization metadata with
9//! crosstalk detection — all in pure Rust.
10//!
11//! ## Modules
12//!
13//! - [`alignment`] — Word timestamps, transcript segments, segment
14//! merging/splitting, frame alignment, and caption block construction.
15//! - [`autopunct`] — Deterministic auto-punctuation and sentence capitalisation.
16//! - [`burn_in`] — Burned-in subtitle rendering onto raw RGBA video frames
17//! using a built-in 8×12 bitmap font.
18//! - [`caption_diff`] — Compare two caption tracks and report differences.
19//! - [`caption_format_adapter`] — Serialize caption tracks to SRT/VTT/TTML.
20//! - [`caption_style_guide`] — Style guide rule enforcement over caption tracks.
21//! - [`caption_timing_adjuster`] — Shift, stretch, snap, and EDL-remap
22//! caption timestamps.
23//! - [`diarization`] — Speaker metadata, turn merging, per-speaker statistics,
24//! crosstalk detection, voice activity ratio, and speaker-to-caption
25//! assignment.
26//! - [`forced_narrative`] — Forced narrative (FN) and SDH subtitle detection
27//! and classification.
28//! - [`language_detect`] — Byte-trigram language detection for locale-aware
29//! line-breaking.
30//! - [`line_breaking`] — Greedy and optimal line-breaking, reading-speed
31//! helpers (CPS), and line-balance optimisation.
32//! - [`multi_language`] — Bilingual caption layout (primary + secondary
33//! language).
34//! - [`multi_language_sync`] — Anchor-point synchronisation of multi-language
35//! caption tracks.
36//! - [`multilang`] — Multi-language subtitle support with ISO 639-1 validated
37//! language codes, SRT export, and cross-language timing merge.
38//! - [`phoneme_timing`] — Phoneme-level timing estimation from word timestamps.
39//! - [`profanity`] — Configurable profanity filter for caption text.
40//! - [`punctuation_restoration`] — Rule-based punctuation restoration for raw
41//! ASR output.
42//! - [`reading_speed`] — Caption reading-speed validation (WPS-based).
43//! - [`style_generator`] — Font size, position, and colour suggestions based on
44//! video frame analysis.
45//! - [`style_presets`] — Ready-made caption style configs (Netflix, BBC, WCAG).
46//! - [`translate`] — Stub subtitle translation pipeline.
47//! - [`wcag`] — WCAG 2.1 compliance checks (1.2.2, 1.2.4, 1.2.6), reading
48//! speed validation, minimum display duration, gap detection, and compliance
49//! scoring.
50
51pub mod alignment;
52pub mod autopunct;
53pub mod burn_in;
54pub mod caption_diff;
55pub mod caption_format_adapter;
56pub mod caption_style_guide;
57pub mod caption_timing_adjuster;
58pub mod diarization;
59pub mod forced_narrative;
60pub mod language_detect;
61pub mod line_breaking;
62#[cfg(feature = "onnx")]
63pub mod ml;
64pub mod multi_language;
65pub mod multi_language_sync;
66pub mod multilang;
67pub mod phoneme_timing;
68pub mod profanity;
69pub mod punctuation_restoration;
70pub mod reading_speed;
71pub mod style_generator;
72pub mod style_presets;
73pub mod translate;
74pub mod wcag;
75
76// ── Re-exports of key public types ──────────────────────────────────────────
77
78pub use alignment::{
79 align_to_frames, build_caption_blocks, merge_short_segments, split_long_segments,
80 AlignmentError, CaptionBlock, CaptionPosition, TranscriptSegment, WordTimestamp,
81};
82pub use diarization::{
83 assign_speakers_to_blocks, dominant_speaker, format_speaker_label, merge_consecutive_turns,
84 speaker_stats, voice_activity_ratio, CrosstalkDetector, DiarizationResult, Speaker,
85 SpeakerGender, SpeakerStats, SpeakerTurn,
86};
87pub use line_breaking::{
88 compute_cps, greedy_break, optimal_break, reading_speed_ok, rebalance_lines, LineBalance,
89 LineBreakAlgorithm, LineBreakConfig,
90};
91pub use wcag::{
92 check_caption_coverage, check_cps, check_live_latency, check_min_duration, check_sign_language,
93 compliance_score, run_all_checks, WcagChecker, WcagLevel, WcagViolation,
94};
95
96// ─── Error type ─────────────────────────────────────────────────────────────
97
98/// Errors produced by caption generation operations.
99///
100/// Note: `Clone` and `PartialEq` are intentionally *not* derived because the
101/// feature-gated [`CaptionGenError::Ml`] variant wraps [`oximedia_ml::MlError`]
102/// which implements neither trait. Equality and cloning of error values are
103/// uncommon for caption-generation callers (errors typically flow through `?`
104/// once and are rendered via `Display`), so dropping those derives is the
105/// minimum-impact way of extending the enum for ML.
106#[derive(Debug, thiserror::Error)]
107pub enum CaptionGenError {
108 /// A speech-to-caption alignment operation failed.
109 #[error("alignment error: {0}")]
110 Alignment(#[from] AlignmentError),
111
112 /// A parameter value is invalid.
113 #[error("invalid parameter: {0}")]
114 InvalidParameter(String),
115
116 /// A timestamp is invalid (e.g. start >= end).
117 #[error("invalid timestamp")]
118 InvalidTimestamp,
119
120 /// The transcript is empty and cannot be processed.
121 #[error("empty transcript")]
122 EmptyTranscript,
123
124 /// Parsing of caption data or configuration failed.
125 #[error("parse error: {0}")]
126 ParseError(String),
127
128 /// An ML pipeline error surfaced from [`oximedia_ml`] (only available when
129 /// the `onnx` feature is enabled).
130 #[cfg(feature = "onnx")]
131 #[error("ml error: {0}")]
132 Ml(#[from] oximedia_ml::MlError),
133}
134
135/// Result alias used by caption-generation APIs.
136pub type CaptionGenResult<T> = std::result::Result<T, CaptionGenError>;
137
138pub use burn_in::{BurnInConfig, SubtitleBurnIn, SubtitlePosition};
139#[cfg(feature = "onnx")]
140pub use ml::{greedy_decode, top_k_sample, CaptionEncoder, EncoderOutput};
141pub use multilang::{CaptionEntry, LanguageCode, MultiLangCaption, MultiLangCaptionBuilder};