#[derive(Debug, Clone, PartialEq, Eq)]
#[allow(dead_code)]
pub enum TextDirection {
LeftToRight,
RightToLeft,
}
#[derive(Debug, Clone, PartialEq, Eq)]
#[allow(dead_code)]
pub struct Language {
pub code: String,
pub name: String,
pub direction: TextDirection,
pub is_rtl: bool,
}
impl Language {
#[allow(dead_code)]
pub fn new(code: impl Into<String>, name: impl Into<String>, direction: TextDirection) -> Self {
let is_rtl = direction == TextDirection::RightToLeft;
Self {
code: code.into(),
name: name.into(),
direction,
is_rtl,
}
}
#[allow(dead_code)]
pub fn from_code(code: &str) -> Option<Self> {
match code {
"en" => Some(Self::new("en", "English", TextDirection::LeftToRight)),
"fr" => Some(Self::new("fr", "French", TextDirection::LeftToRight)),
"de" => Some(Self::new("de", "German", TextDirection::LeftToRight)),
"es" => Some(Self::new("es", "Spanish", TextDirection::LeftToRight)),
"ar" => Some(Self::new("ar", "Arabic", TextDirection::RightToLeft)),
"he" => Some(Self::new("he", "Hebrew", TextDirection::RightToLeft)),
"zh" => Some(Self::new("zh", "Chinese", TextDirection::LeftToRight)),
"ja" => Some(Self::new("ja", "Japanese", TextDirection::LeftToRight)),
"ko" => Some(Self::new("ko", "Korean", TextDirection::LeftToRight)),
"ru" => Some(Self::new("ru", "Russian", TextDirection::LeftToRight)),
_ => None,
}
}
}
#[derive(Debug, Clone)]
#[allow(dead_code)]
pub struct TranslationUnit {
pub source_lang: String,
pub target_lang: String,
pub source_text: String,
pub translated_text: String,
pub confidence: f32,
}
impl TranslationUnit {
#[allow(dead_code)]
pub fn new(
source_lang: impl Into<String>,
target_lang: impl Into<String>,
source_text: impl Into<String>,
translated_text: impl Into<String>,
confidence: f32,
) -> Self {
Self {
source_lang: source_lang.into(),
target_lang: target_lang.into(),
source_text: source_text.into(),
translated_text: translated_text.into(),
confidence: confidence.clamp(0.0, 1.0),
}
}
}
#[allow(dead_code)]
pub fn levenshtein(a: &str, b: &str) -> usize {
let a: Vec<char> = a.chars().collect();
let b: Vec<char> = b.chars().collect();
let m = a.len();
let n = b.len();
if m == 0 {
return n;
}
if n == 0 {
return m;
}
let mut dp = vec![vec![0usize; n + 1]; m + 1];
for (i, row) in dp.iter_mut().enumerate() {
row[0] = i;
}
for (j, cell) in dp[0].iter_mut().enumerate() {
*cell = j;
}
for i in 1..=m {
for j in 1..=n {
dp[i][j] = if a[i - 1] == b[j - 1] {
dp[i - 1][j - 1]
} else {
1 + dp[i - 1][j].min(dp[i][j - 1]).min(dp[i - 1][j - 1])
};
}
}
dp[m][n]
}
#[derive(Debug, Clone, Default)]
#[allow(dead_code)]
pub struct TranslationMemory {
pub units: Vec<TranslationUnit>,
}
impl TranslationMemory {
#[allow(dead_code)]
pub fn new() -> Self {
Self { units: Vec::new() }
}
#[allow(dead_code)]
pub fn add(&mut self, unit: TranslationUnit) {
self.units.push(unit);
}
#[allow(dead_code)]
pub fn find_match<'a>(
&'a self,
text: &str,
src: &str,
tgt: &str,
) -> Option<(&'a TranslationUnit, f32)> {
let candidates: Vec<&TranslationUnit> = self
.units
.iter()
.filter(|u| u.source_lang == src && u.target_lang == tgt)
.collect();
if candidates.is_empty() {
return None;
}
let mut best: Option<(&TranslationUnit, f32)> = None;
for unit in candidates {
let dist = levenshtein(text, &unit.source_text);
let max_len = text.len().max(unit.source_text.len());
let similarity = if max_len == 0 {
1.0
} else {
1.0 - (dist as f32 / max_len as f32)
};
if best.is_none() || similarity > best.as_ref().map(|b| b.1).unwrap_or(0.0) {
best = Some((unit, similarity));
}
}
best
}
}
#[derive(Debug, Clone)]
#[allow(dead_code)]
pub struct BilingualSubtitle {
pub source: String,
pub translation: String,
pub start_ms: u64,
pub end_ms: u64,
}
impl BilingualSubtitle {
#[allow(dead_code)]
pub fn new(
source: impl Into<String>,
translation: impl Into<String>,
start_ms: u64,
end_ms: u64,
) -> Self {
Self {
source: source.into(),
translation: translation.into(),
start_ms,
end_ms,
}
}
#[allow(dead_code)]
pub fn duration_ms(&self) -> u64 {
self.end_ms.saturating_sub(self.start_ms)
}
#[allow(dead_code)]
pub fn is_active(&self, timestamp_ms: u64) -> bool {
timestamp_ms >= self.start_ms && timestamp_ms < self.end_ms
}
#[allow(dead_code)]
pub fn formatted(&self) -> String {
format!("{}\n{}", self.source, self.translation)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_language_from_code_known() {
let lang = Language::from_code("en").expect("should succeed in test");
assert_eq!(lang.code, "en");
assert_eq!(lang.name, "English");
assert!(!lang.is_rtl);
}
#[test]
fn test_language_from_code_rtl() {
let lang = Language::from_code("ar").expect("should succeed in test");
assert!(lang.is_rtl);
assert_eq!(lang.direction, TextDirection::RightToLeft);
}
#[test]
fn test_language_from_code_hebrew() {
let lang = Language::from_code("he").expect("should succeed in test");
assert!(lang.is_rtl);
}
#[test]
fn test_language_from_code_unknown() {
assert!(Language::from_code("xx").is_none());
}
#[test]
fn test_language_all_known_codes() {
for code in &["en", "fr", "de", "es", "ar", "he", "zh", "ja", "ko", "ru"] {
assert!(Language::from_code(code).is_some(), "Missing: {code}");
}
}
#[test]
fn test_levenshtein_equal() {
assert_eq!(levenshtein("hello", "hello"), 0);
}
#[test]
fn test_levenshtein_empty() {
assert_eq!(levenshtein("", "abc"), 3);
assert_eq!(levenshtein("abc", ""), 3);
}
#[test]
fn test_levenshtein_simple_substitution() {
assert_eq!(levenshtein("kitten", "sitting"), 3);
}
#[test]
fn test_levenshtein_single_insertion() {
assert_eq!(levenshtein("cat", "cats"), 1);
}
#[test]
fn test_translation_memory_exact_match() {
let mut mem = TranslationMemory::new();
mem.add(TranslationUnit::new(
"en",
"fr",
"Hello world",
"Bonjour le monde",
1.0,
));
let result = mem.find_match("Hello world", "en", "fr");
assert!(result.is_some());
let (unit, score) = result.expect("should succeed in test");
assert!(
(score - 1.0).abs() < 1e-5,
"Expected exact match score 1.0, got {score}"
);
assert_eq!(unit.translated_text, "Bonjour le monde");
}
#[test]
fn test_translation_memory_fuzzy_match() {
let mut mem = TranslationMemory::new();
mem.add(TranslationUnit::new(
"en",
"de",
"Hello world",
"Hallo Welt",
1.0,
));
let result = mem.find_match("Hello World!", "en", "de");
assert!(result.is_some());
let (_, score) = result.expect("should succeed in test");
assert!(
score > 0.5,
"Fuzzy match should have reasonable similarity, got {score}"
);
}
#[test]
fn test_translation_memory_no_match_wrong_lang() {
let mut mem = TranslationMemory::new();
mem.add(TranslationUnit::new("en", "fr", "Hello", "Bonjour", 1.0));
let result = mem.find_match("Hello", "en", "de");
assert!(result.is_none());
}
#[test]
fn test_translation_memory_empty() {
let mem = TranslationMemory::new();
assert!(mem.find_match("test", "en", "fr").is_none());
}
#[test]
fn test_bilingual_subtitle_is_active() {
let sub = BilingualSubtitle::new("Hello", "Hola", 1000, 4000);
assert!(sub.is_active(2000));
assert!(!sub.is_active(500));
assert!(!sub.is_active(4000));
}
#[test]
fn test_bilingual_subtitle_duration() {
let sub = BilingualSubtitle::new("Hello", "Hola", 1000, 4000);
assert_eq!(sub.duration_ms(), 3000);
}
#[test]
fn test_bilingual_subtitle_formatted() {
let sub = BilingualSubtitle::new("Hello", "Hola", 0, 1000);
let fmt = sub.formatted();
assert!(fmt.contains("Hello"));
assert!(fmt.contains("Hola"));
assert!(fmt.contains('\n'));
}
}