Skip to main content

oximedia_caption_gen/
lib.rs

1//! # oximedia-caption-gen
2//!
3//! Advanced caption and subtitle generation for the OxiMedia Sovereign Media
4//! Framework.
5//!
6//! This crate provides speech-to-caption alignment with frame-accurate timing,
7//! greedy and optimal (Knuth-Plass DP) line-breaking algorithms, WCAG 2.1
8//! accessibility compliance checking, and speaker diarization metadata with
9//! crosstalk detection — all in pure Rust.
10//!
11//! ## Modules
12//!
13//! - [`alignment`] — Word timestamps, transcript segments, segment
14//!   merging/splitting, frame alignment, and caption block construction.
15//! - [`autopunct`] — Deterministic auto-punctuation and sentence capitalisation.
16//! - [`burn_in`] — Burned-in subtitle rendering onto raw RGBA video frames
17//!   using a built-in 8×12 bitmap font.
18//! - [`caption_diff`] — Compare two caption tracks and report differences.
19//! - [`caption_format_adapter`] — Serialize caption tracks to SRT/VTT/TTML.
20//! - [`caption_style_guide`] — Style guide rule enforcement over caption tracks.
21//! - [`caption_timing_adjuster`] — Shift, stretch, snap, and EDL-remap
22//!   caption timestamps.
23//! - [`diarization`] — Speaker metadata, turn merging, per-speaker statistics,
24//!   crosstalk detection, voice activity ratio, and speaker-to-caption
25//!   assignment.
26//! - [`forced_narrative`] — Forced narrative (FN) and SDH subtitle detection
27//!   and classification.
28//! - [`language_detect`] — Byte-trigram language detection for locale-aware
29//!   line-breaking.
30//! - [`line_breaking`] — Greedy and optimal line-breaking, reading-speed
31//!   helpers (CPS), and line-balance optimisation.
32//! - [`multi_language`] — Bilingual caption layout (primary + secondary
33//!   language).
34//! - [`multi_language_sync`] — Anchor-point synchronisation of multi-language
35//!   caption tracks.
36//! - [`multilang`] — Multi-language subtitle support with ISO 639-1 validated
37//!   language codes, SRT export, and cross-language timing merge.
38//! - [`phoneme_timing`] — Phoneme-level timing estimation from word timestamps.
39//! - [`profanity`] — Configurable profanity filter for caption text.
40//! - [`punctuation_restoration`] — Rule-based punctuation restoration for raw
41//!   ASR output.
42//! - [`reading_speed`] — Caption reading-speed validation (WPS-based).
43//! - [`style_generator`] — Font size, position, and colour suggestions based on
44//!   video frame analysis.
45//! - [`style_presets`] — Ready-made caption style configs (Netflix, BBC, WCAG).
46//! - [`translate`] — Stub subtitle translation pipeline.
47//! - [`wcag`] — WCAG 2.1 compliance checks (1.2.2, 1.2.4, 1.2.6), reading
48//!   speed validation, minimum display duration, gap detection, and compliance
49//!   scoring.
50
51pub mod alignment;
52pub mod autopunct;
53pub mod burn_in;
54pub mod caption_diff;
55pub mod caption_format_adapter;
56pub mod caption_style_guide;
57pub mod caption_timing_adjuster;
58pub mod diarization;
59pub mod forced_narrative;
60pub mod language_detect;
61pub mod line_breaking;
62#[cfg(feature = "onnx")]
63pub mod ml;
64pub mod multi_language;
65pub mod multi_language_sync;
66pub mod multilang;
67pub mod phoneme_timing;
68pub mod profanity;
69pub mod punctuation_restoration;
70pub mod reading_speed;
71pub mod style_generator;
72pub mod style_presets;
73pub mod translate;
74pub mod wcag;
75
76// ── Re-exports of key public types ──────────────────────────────────────────
77
78pub use alignment::{
79    align_to_frames, build_caption_blocks, merge_short_segments, split_long_segments,
80    AlignmentError, CaptionBlock, CaptionPosition, TranscriptSegment, WordTimestamp,
81};
82pub use diarization::{
83    assign_speakers_to_blocks, dominant_speaker, format_speaker_label, merge_consecutive_turns,
84    speaker_stats, voice_activity_ratio, CrosstalkDetector, DiarizationResult, Speaker,
85    SpeakerGender, SpeakerStats, SpeakerTurn,
86};
87pub use line_breaking::{
88    compute_cps, greedy_break, optimal_break, reading_speed_ok, rebalance_lines, LineBalance,
89    LineBreakAlgorithm, LineBreakConfig,
90};
91pub use wcag::{
92    check_caption_coverage, check_cps, check_live_latency, check_min_duration, check_sign_language,
93    compliance_score, run_all_checks, WcagChecker, WcagLevel, WcagViolation,
94};
95
96// ─── Error type ─────────────────────────────────────────────────────────────
97
98/// Errors produced by caption generation operations.
99///
100/// Note: `Clone` and `PartialEq` are intentionally *not* derived because the
101/// feature-gated [`CaptionGenError::Ml`] variant wraps [`oximedia_ml::MlError`]
102/// which implements neither trait. Equality and cloning of error values are
103/// uncommon for caption-generation callers (errors typically flow through `?`
104/// once and are rendered via `Display`), so dropping those derives is the
105/// minimum-impact way of extending the enum for ML.
106#[derive(Debug, thiserror::Error)]
107pub enum CaptionGenError {
108    /// A speech-to-caption alignment operation failed.
109    #[error("alignment error: {0}")]
110    Alignment(#[from] AlignmentError),
111
112    /// A parameter value is invalid.
113    #[error("invalid parameter: {0}")]
114    InvalidParameter(String),
115
116    /// A timestamp is invalid (e.g. start >= end).
117    #[error("invalid timestamp")]
118    InvalidTimestamp,
119
120    /// The transcript is empty and cannot be processed.
121    #[error("empty transcript")]
122    EmptyTranscript,
123
124    /// Parsing of caption data or configuration failed.
125    #[error("parse error: {0}")]
126    ParseError(String),
127
128    /// An ML pipeline error surfaced from [`oximedia_ml`] (only available when
129    /// the `onnx` feature is enabled).
130    #[cfg(feature = "onnx")]
131    #[error("ml error: {0}")]
132    Ml(#[from] oximedia_ml::MlError),
133}
134
135/// Result alias used by caption-generation APIs.
136pub type CaptionGenResult<T> = std::result::Result<T, CaptionGenError>;
137
138pub use burn_in::{BurnInConfig, SubtitleBurnIn, SubtitlePosition};
139#[cfg(feature = "onnx")]
140pub use ml::{greedy_decode, top_k_sample, CaptionEncoder, EncoderOutput};
141pub use multilang::{CaptionEntry, LanguageCode, MultiLangCaption, MultiLangCaptionBuilder};