use crate::{
analysis::{
linting::{IssueCategory, IssueSeverity, LintIssue, LintRule},
ScriptAnalysis,
},
parser::Section,
};
use alloc::{format, string::ToString, vec::Vec};
pub struct EncodingRule;
impl LintRule for EncodingRule {
fn id(&self) -> &'static str {
"encoding"
}
fn name(&self) -> &'static str {
"Encoding"
}
fn description(&self) -> &'static str {
"Detects potential encoding or character issues"
}
fn default_severity(&self) -> IssueSeverity {
IssueSeverity::Warning
}
fn category(&self) -> IssueCategory {
IssueCategory::Encoding
}
fn check_script(&self, analysis: &ScriptAnalysis) -> Vec<LintIssue> {
let mut issues = Vec::new();
if let Some(Section::Events(events)) = analysis
.script()
.sections()
.iter()
.find(|s| matches!(s, Section::Events(_)))
{
for event in events {
self.check_event_encoding(&mut issues, event);
}
}
self.check_script_info_encoding(&mut issues, analysis.script());
issues
}
}
impl EncodingRule {
fn check_event_encoding(&self, issues: &mut Vec<LintIssue>, event: &crate::parser::Event) {
if event
.text
.chars()
.any(|c| c.is_control() && c != '\n' && c != '\r' && c != '\t')
{
let issue = LintIssue::new(
self.default_severity(),
IssueCategory::Encoding,
self.id(),
"Event contains non-printable control characters".to_string(),
)
.with_description(
"Control characters may cause display issues in subtitle renderers".to_string(),
)
.with_suggested_fix(
"Remove or replace control characters with appropriate text".to_string(),
);
issues.push(issue);
}
if event.text.contains('\u{FFFD}') {
let issue = LintIssue::new(
self.default_severity(),
IssueCategory::Encoding,
self.id(),
"Event contains Unicode replacement character (�)".to_string(),
)
.with_description(
"Replacement characters indicate corrupted or invalid encoding".to_string(),
)
.with_suggested_fix("Check source file encoding and re-import".to_string());
issues.push(issue);
}
let char_count = event.text.chars().count();
let byte_count = event.text.len();
if char_count > 0 && byte_count > char_count * 3 {
let issue = LintIssue::new(
IssueSeverity::Hint,
IssueCategory::Encoding,
self.id(),
"Event contains many multi-byte characters".to_string(),
)
.with_description(
"Heavy use of multi-byte characters may impact performance".to_string(),
);
issues.push(issue);
}
}
fn check_script_info_encoding(
&self,
issues: &mut Vec<LintIssue>,
script: &crate::parser::Script,
) {
if let Some(Section::ScriptInfo(info)) = script
.sections()
.iter()
.find(|s| matches!(s, Section::ScriptInfo(_)))
{
for (key, value) in &info.fields {
if value
.chars()
.any(|c| c.is_control() && c != '\n' && c != '\r')
{
let issue = LintIssue::new(
self.default_severity(),
IssueCategory::Encoding,
self.id(),
format!("Script info field '{key}' contains control characters"),
);
issues.push(issue);
}
}
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn rule_metadata_correct() {
let rule = EncodingRule;
assert_eq!(rule.id(), "encoding");
assert_eq!(rule.name(), "Encoding");
assert_eq!(
rule.description(),
"Detects potential encoding or character issues"
);
assert_eq!(rule.default_severity(), IssueSeverity::Warning);
assert_eq!(rule.category(), IssueCategory::Encoding);
}
#[test]
fn empty_script_no_issues() {
let script_text = "[Script Info]\nTitle: Test";
let script = crate::parser::Script::parse(script_text).unwrap();
let analysis = ScriptAnalysis::analyze(&script).unwrap();
let rule = EncodingRule;
let issues = rule.check_script(&analysis);
assert!(issues.is_empty());
}
#[test]
fn valid_text_no_issues() {
let script_text = r"[Events]
Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
Dialogue: 0,0:00:00.00,0:00:05.00,Default,,0,0,0,,Valid text with unicode: ñáéíóú";
let script = crate::parser::Script::parse(script_text).unwrap();
let analysis = ScriptAnalysis::analyze(&script).unwrap();
let rule = EncodingRule;
let issues = rule.check_script(&analysis);
assert!(issues.is_empty());
}
#[test]
fn newlines_allowed() {
let script_text = r"[Events]
Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
Dialogue: 0,0:00:00.00,0:00:05.00,Default,,0,0,0,,Text with\Nline break";
let script = crate::parser::Script::parse(script_text).unwrap();
let rule = EncodingRule;
let analysis = ScriptAnalysis::analyze(&script).unwrap();
let issues = rule.check_script(&analysis);
assert!(issues.is_empty());
}
#[test]
fn tabs_allowed() {
let script_text = "[Events]\nFormat: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text\nDialogue: 0,0:00:00.00,0:00:05.00,Default,,0,0,0,,Text with\ttab";
let script = crate::parser::Script::parse(script_text).unwrap();
let rule = EncodingRule;
let analysis = ScriptAnalysis::analyze(&script).unwrap();
let issues = rule.check_script(&analysis);
assert!(issues.is_empty());
}
#[test]
fn replacement_character_detected() {
let script_text = r"[Events]
Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
Dialogue: 0,0:00:00.00,0:00:05.00,Default,,0,0,0,,Text with � replacement";
let script = crate::parser::Script::parse(script_text).unwrap();
let rule = EncodingRule;
let analysis = ScriptAnalysis::analyze(&script).unwrap();
let issues = rule.check_script(&analysis);
assert!(!issues.is_empty());
assert!(issues
.iter()
.any(|issue| issue.message().contains("replacement character")));
}
#[test]
fn control_character_in_script_info() {
let script_text = "[Script Info]\nTitle: Test\x00\n\n[Events]\nFormat: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text";
let script = crate::parser::Script::parse(script_text).unwrap();
let rule = EncodingRule;
let analysis = ScriptAnalysis::analyze(&script).unwrap();
let issues = rule.check_script(&analysis);
assert!(!issues.is_empty());
assert!(issues
.iter()
.any(|issue| issue.message().contains("control characters")));
}
#[test]
fn no_events_section_no_issues() {
let script_text = r"[Script Info]
Title: Test
[V4+ Styles]
Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding
Style: Default,Arial,20,&H00FFFFFF&,&H000000FF&,&H00000000&,&H00000000&,0,0,0,0,100,100,0,0,1,2,0,2,10,10,10,1";
let script = crate::parser::Script::parse(script_text).unwrap();
let rule = EncodingRule;
let analysis = ScriptAnalysis::analyze(&script).unwrap();
let issues = rule.check_script(&analysis);
assert!(issues.is_empty());
}
#[test]
fn multibyte_characters_hint() {
let heavy_unicode = "🎵🎶🎵🎶".repeat(20);
let script_text = format!(
r"[Events]
Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
Dialogue: 0,0:00:00.00,0:00:05.00,Default,,0,0,0,,{heavy_unicode}"
);
let script = crate::parser::Script::parse(&script_text).unwrap();
let rule = EncodingRule;
let analysis = ScriptAnalysis::analyze(&script).unwrap();
let issues = rule.check_script(&analysis);
assert!(issues
.iter()
.any(|issue| issue.message().contains("multi-byte characters")));
}
#[test]
fn control_character_in_event_detected() {
let script_text = "[Events]\nFormat: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text\nDialogue: 0,0:00:00.00,0:00:05.00,Default,,0,0,0,,Text with\x00control char";
let script = crate::parser::Script::parse(script_text).unwrap();
let rule = EncodingRule;
let analysis = ScriptAnalysis::analyze(&script).unwrap();
let issues = rule.check_script(&analysis);
assert!(!issues.is_empty());
assert!(issues
.iter()
.any(|issue| issue.message().contains("non-printable control characters")));
let control_issue = issues
.iter()
.find(|issue| issue.message().contains("non-printable control characters"))
.unwrap();
assert!(control_issue.description().is_some());
assert!(control_issue.suggested_fix().is_some());
}
}