Skip to main content

ass_core/analysis/linting/rules/encoding/
rule.rs

1//! [`EncodingRule`] implementation for detecting encoding and character issues.
2//!
3//! Provides the [`EncodingRule`] lint rule along with its helper checks for
4//! individual events and the script-info section.
5
6use crate::{
7    analysis::{
8        linting::{IssueCategory, IssueSeverity, LintIssue, LintRule},
9        ScriptAnalysis,
10    },
11    parser::Section,
12};
13use alloc::{format, string::ToString, vec::Vec};
14
15/// Rule for detecting encoding and character issues in subtitle scripts
16///
17/// Analyzes scripts for problematic characters that may cause encoding
18/// issues, display problems, or compatibility issues across different
19/// subtitle renderers and media players.
20///
21/// # Encoding Checks
22///
23/// - Non-printable characters: Detects control characters that shouldn't appear in text
24/// - Invalid UTF-8 sequences: Identifies corrupted character data
25/// - Suspicious character patterns: Warns about potentially problematic sequences
26///
27/// # Performance
28///
29/// - Time complexity: O(n * m) for n events and m characters per event
30/// - Memory: O(1) additional space
31/// - Target: <2ms for typical scripts with 1000 events
32///
33/// # Example
34///
35/// ```rust
36/// use ass_core::analysis::linting::rules::encoding::EncodingRule;
37/// use ass_core::analysis::linting::LintRule;
38/// use ass_core::{Script, ScriptAnalysis};
39///
40/// let script_text = format!("[Events]\nFormat: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text\nDialogue: 0,0:00:00.00,0:00:05.00,Default,,0,0,0,,Text with{}invalid character", '\x00');
41/// let script = Script::parse(&script_text)?;
42///
43/// let rule = EncodingRule;
44/// let analysis = ScriptAnalysis::analyze(&script).unwrap();
45/// let issues = rule.check_script(&analysis);
46/// assert!(!issues.is_empty()); // Should detect the control character
47/// # Ok::<(), Box<dyn std::error::Error>>(())
48/// ```
49pub struct EncodingRule;
50
51impl LintRule for EncodingRule {
52    fn id(&self) -> &'static str {
53        "encoding"
54    }
55
56    fn name(&self) -> &'static str {
57        "Encoding"
58    }
59
60    fn description(&self) -> &'static str {
61        "Detects potential encoding or character issues"
62    }
63
64    fn default_severity(&self) -> IssueSeverity {
65        IssueSeverity::Warning
66    }
67
68    fn category(&self) -> IssueCategory {
69        IssueCategory::Encoding
70    }
71
72    fn check_script(&self, analysis: &ScriptAnalysis) -> Vec<LintIssue> {
73        let mut issues = Vec::new();
74
75        if let Some(Section::Events(events)) = analysis
76            .script()
77            .sections()
78            .iter()
79            .find(|s| matches!(s, Section::Events(_)))
80        {
81            for event in events {
82                self.check_event_encoding(&mut issues, event);
83            }
84        }
85
86        self.check_script_info_encoding(&mut issues, analysis.script());
87
88        issues
89    }
90}
91
92impl EncodingRule {
93    /// Check encoding issues in a single event
94    fn check_event_encoding(&self, issues: &mut Vec<LintIssue>, event: &crate::parser::Event) {
95        if event
96            .text
97            .chars()
98            .any(|c| c.is_control() && c != '\n' && c != '\r' && c != '\t')
99        {
100            let issue = LintIssue::new(
101                self.default_severity(),
102                IssueCategory::Encoding,
103                self.id(),
104                "Event contains non-printable control characters".to_string(),
105            )
106            .with_description(
107                "Control characters may cause display issues in subtitle renderers".to_string(),
108            )
109            .with_suggested_fix(
110                "Remove or replace control characters with appropriate text".to_string(),
111            );
112            issues.push(issue);
113        }
114
115        if event.text.contains('\u{FFFD}') {
116            let issue = LintIssue::new(
117                self.default_severity(),
118                IssueCategory::Encoding,
119                self.id(),
120                "Event contains Unicode replacement character (�)".to_string(),
121            )
122            .with_description(
123                "Replacement characters indicate corrupted or invalid encoding".to_string(),
124            )
125            .with_suggested_fix("Check source file encoding and re-import".to_string());
126            issues.push(issue);
127        }
128
129        let char_count = event.text.chars().count();
130        let byte_count = event.text.len();
131
132        // Check for heavy multi-byte character usage
133        if char_count > 0 && byte_count > char_count * 3 {
134            let issue = LintIssue::new(
135                IssueSeverity::Hint,
136                IssueCategory::Encoding,
137                self.id(),
138                "Event contains many multi-byte characters".to_string(),
139            )
140            .with_description(
141                "Heavy use of multi-byte characters may impact performance".to_string(),
142            );
143            issues.push(issue);
144        }
145    }
146
147    /// Check encoding issues in script info section
148    fn check_script_info_encoding(
149        &self,
150        issues: &mut Vec<LintIssue>,
151        script: &crate::parser::Script,
152    ) {
153        if let Some(Section::ScriptInfo(info)) = script
154            .sections()
155            .iter()
156            .find(|s| matches!(s, Section::ScriptInfo(_)))
157        {
158            for (key, value) in &info.fields {
159                if value
160                    .chars()
161                    .any(|c| c.is_control() && c != '\n' && c != '\r')
162                {
163                    let issue = LintIssue::new(
164                        self.default_severity(),
165                        IssueCategory::Encoding,
166                        self.id(),
167                        format!("Script info field '{key}' contains control characters"),
168                    );
169                    issues.push(issue);
170                }
171            }
172        }
173    }
174}