use std::path::Path;
use std::sync::LazyLock;
use regex::Regex;
use tokio::fs;
use crate::error::Result;
use crate::model::caption::Extension;
static VTT_TIMESTAMP_RE: LazyLock<Regex> = LazyLock::new(|| {
Regex::new(r"(\d{2}):(\d{2}):(\d{2})\.(\d{3})\s+-->\s+(\d{2}):(\d{2}):(\d{2})\.(\d{3})")
.expect("valid VTT timestamp regex")
});
static SRT_TIMESTAMP_RE: LazyLock<Regex> = LazyLock::new(|| {
Regex::new(r"(\d{2}):(\d{2}):(\d{2}),(\d{3})\s+-->\s+(\d{2}):(\d{2}):(\d{2}),(\d{3})")
.expect("valid SRT timestamp regex")
});
static SRT_INDEX_RE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^\d+$").expect("valid SRT index regex"));
#[derive(Debug, Clone, PartialEq)]
pub struct ValidationResult {
pub is_valid: bool,
pub format: Option<Extension>,
pub entry_count: usize,
pub errors: Vec<String>,
pub warnings: Vec<String>,
}
impl ValidationResult {
pub fn valid(format: Extension, entry_count: usize) -> Self {
Self {
is_valid: true,
format: Some(format),
entry_count,
errors: Vec::new(),
warnings: Vec::new(),
}
}
pub fn invalid(errors: Vec<String>) -> Self {
Self {
is_valid: false,
format: None,
entry_count: 0,
errors,
warnings: Vec::new(),
}
}
pub fn with_warning(mut self, warning: String) -> Self {
self.warnings.push(warning);
self
}
pub fn with_warnings(mut self, warnings: Vec<String>) -> Self {
self.warnings.extend(warnings);
self
}
}
pub async fn validate_subtitle(subtitle_path: impl AsRef<Path>) -> Result<ValidationResult> {
let subtitle_path = subtitle_path.as_ref();
tracing::debug!(path = ?subtitle_path, "💬 Validating subtitle file");
if !subtitle_path.exists() {
return Ok(ValidationResult::invalid(vec![
"Subtitle file does not exist".to_string(),
]));
}
let content = match fs::read_to_string(subtitle_path).await {
Ok(content) => content,
Err(e) => {
return Ok(ValidationResult::invalid(vec![format!(
"Failed to read subtitle file: {}",
e
)]));
}
};
if content.trim().is_empty() {
return Ok(ValidationResult::invalid(vec!["Subtitle file is empty".to_string()]));
}
let format = match super::detect_subtitle_format(&content) {
Ok(fmt) => fmt,
Err(_) => {
return Ok(ValidationResult::invalid(vec![
"Could not detect subtitle format".to_string(),
]));
}
};
tracing::debug!(format = ?format, "💬 Detected subtitle format for validation");
match format {
Extension::Vtt => validate_vtt(&content),
Extension::Srt => validate_srt(&content),
Extension::Ass | Extension::Ssa => validate_ass(&content),
_ => Ok(ValidationResult::invalid(vec![format!(
"Unsupported subtitle format for validation: {:?}",
format
)])),
}
}
pub fn is_format_compatible(subtitle_format: &Extension, container_format: &str) -> bool {
match container_format.to_lowercase().as_str() {
"mp4" | "m4v" => {
matches!(subtitle_format, Extension::Vtt | Extension::Srt)
}
"mkv" | "webm" => {
matches!(
subtitle_format,
Extension::Vtt | Extension::Srt | Extension::Ass | Extension::Ssa
)
}
"avi" => {
matches!(subtitle_format, Extension::Srt)
}
_ => {
true
}
}
}
fn parse_timestamp(caps: ®ex::Captures, offset: usize) -> f64 {
let h: f64 = caps[offset + 1].parse().unwrap_or(0.0);
let m: f64 = caps[offset + 2].parse().unwrap_or(0.0);
let s: f64 = caps[offset + 3].parse().unwrap_or(0.0);
let ms: f64 = caps[offset + 4].parse().unwrap_or(0.0);
h * 3600.0 + m * 60.0 + s + ms / 1000.0
}
fn validate_entry_timestamps(
entry_count: usize,
start_time: f64,
end_time: f64,
last_end_time: Option<f64>,
errors: &mut Vec<String>,
warnings: &mut Vec<String>,
) {
if start_time >= end_time {
errors.push(format!(
"Invalid time range in entry {}: start time must be before end time",
entry_count
));
}
if let Some(last_end) = last_end_time
&& start_time < last_end
{
warnings.push(format!(
"Entry {} has overlapping or out-of-order timestamps",
entry_count
));
}
}
fn validate_generic_subtitle(content: &str, timestamp_re: &Regex, format: Extension) -> Result<ValidationResult> {
let mut errors = Vec::new();
let mut warnings = Vec::new();
let mut entry_count = 0;
let mut last_end_time: Option<f64> = None;
for line in content.lines() {
if let Some(caps) = timestamp_re.captures(line.trim()) {
entry_count += 1;
let start_time = parse_timestamp(&caps, 0);
let end_time = parse_timestamp(&caps, 4);
validate_entry_timestamps(
entry_count,
start_time,
end_time,
last_end_time,
&mut errors,
&mut warnings,
);
last_end_time = Some(end_time);
}
}
if entry_count == 0 {
errors.push("No valid subtitle entries found".to_string());
}
let result = if errors.is_empty() {
ValidationResult::valid(format, entry_count).with_warnings(warnings)
} else {
ValidationResult::invalid(errors).with_warnings(warnings)
};
Ok(result)
}
fn validate_vtt(content: &str) -> Result<ValidationResult> {
let mut result = validate_generic_subtitle(content, &VTT_TIMESTAMP_RE, Extension::Vtt)?;
if !content.trim().starts_with("WEBVTT") {
if result.is_valid {
result.is_valid = false;
result.format = None;
}
result
.errors
.insert(0, "VTT file must start with 'WEBVTT' header".to_string());
}
Ok(result)
}
fn validate_srt(content: &str) -> Result<ValidationResult> {
let mut result = validate_generic_subtitle(content, &SRT_TIMESTAMP_RE, Extension::Srt)?;
let mut last_index = 0;
let mut expect_index = true;
let mut has_index = false;
for line in content.lines() {
let trimmed = line.trim();
if trimmed.is_empty() {
expect_index = true;
continue;
}
if expect_index && SRT_INDEX_RE.is_match(trimmed) {
has_index = true;
let index: usize = trimmed.parse().unwrap_or(0);
if index != last_index + 1 && last_index != 0 {
result.warnings.push(format!(
"Subtitle index {} is not sequential (expected {})",
index,
last_index + 1
));
}
last_index = index;
expect_index = false;
continue;
}
if SRT_TIMESTAMP_RE.is_match(trimmed) {
expect_index = false;
}
}
if !has_index && result.entry_count > 0 {
result.warnings.push("SRT file is missing subtitle indices".to_string());
}
Ok(result)
}
fn validate_ass(content: &str) -> Result<ValidationResult> {
let mut errors = Vec::new();
let mut warnings = Vec::new();
let mut entry_count = 0;
if !content.contains("[Script Info]") {
errors.push("ASS/SSA file must contain [Script Info] section".to_string());
}
if !content.contains("[Events]") {
errors.push("ASS/SSA file must contain [Events] section".to_string());
}
let format = if content.contains("[V4+ Styles]") {
Extension::Ass
} else {
Extension::Ssa
};
for line in content.lines() {
let trimmed = line.trim();
if trimmed.starts_with("Dialogue:") {
entry_count += 1;
}
}
if entry_count == 0 {
warnings.push("No dialogue lines found in ASS/SSA file".to_string());
}
let result = if errors.is_empty() {
ValidationResult::valid(format, entry_count).with_warnings(warnings)
} else {
ValidationResult::invalid(errors).with_warnings(warnings)
};
Ok(result)
}