1use crate::{
7 analysis::{
8 linting::{IssueCategory, IssueSeverity, LintIssue, LintRule},
9 ScriptAnalysis,
10 },
11 parser::Section,
12};
13use alloc::{format, string::ToString, vec::Vec};
14
15pub struct EncodingRule;
50
51impl LintRule for EncodingRule {
52 fn id(&self) -> &'static str {
53 "encoding"
54 }
55
56 fn name(&self) -> &'static str {
57 "Encoding"
58 }
59
60 fn description(&self) -> &'static str {
61 "Detects potential encoding or character issues"
62 }
63
64 fn default_severity(&self) -> IssueSeverity {
65 IssueSeverity::Warning
66 }
67
68 fn category(&self) -> IssueCategory {
69 IssueCategory::Encoding
70 }
71
72 fn check_script(&self, analysis: &ScriptAnalysis) -> Vec<LintIssue> {
73 let mut issues = Vec::new();
74
75 if let Some(Section::Events(events)) = analysis
76 .script()
77 .sections()
78 .iter()
79 .find(|s| matches!(s, Section::Events(_)))
80 {
81 for event in events {
82 self.check_event_encoding(&mut issues, event);
83 }
84 }
85
86 self.check_script_info_encoding(&mut issues, analysis.script());
87
88 issues
89 }
90}
91
92impl EncodingRule {
93 fn check_event_encoding(&self, issues: &mut Vec<LintIssue>, event: &crate::parser::Event) {
95 if event
96 .text
97 .chars()
98 .any(|c| c.is_control() && c != '\n' && c != '\r' && c != '\t')
99 {
100 let issue = LintIssue::new(
101 self.default_severity(),
102 IssueCategory::Encoding,
103 self.id(),
104 "Event contains non-printable control characters".to_string(),
105 )
106 .with_description(
107 "Control characters may cause display issues in subtitle renderers".to_string(),
108 )
109 .with_suggested_fix(
110 "Remove or replace control characters with appropriate text".to_string(),
111 );
112 issues.push(issue);
113 }
114
115 if event.text.contains('\u{FFFD}') {
116 let issue = LintIssue::new(
117 self.default_severity(),
118 IssueCategory::Encoding,
119 self.id(),
120 "Event contains Unicode replacement character (�)".to_string(),
121 )
122 .with_description(
123 "Replacement characters indicate corrupted or invalid encoding".to_string(),
124 )
125 .with_suggested_fix("Check source file encoding and re-import".to_string());
126 issues.push(issue);
127 }
128
129 let char_count = event.text.chars().count();
130 let byte_count = event.text.len();
131
132 if char_count > 0 && byte_count > char_count * 3 {
134 let issue = LintIssue::new(
135 IssueSeverity::Hint,
136 IssueCategory::Encoding,
137 self.id(),
138 "Event contains many multi-byte characters".to_string(),
139 )
140 .with_description(
141 "Heavy use of multi-byte characters may impact performance".to_string(),
142 );
143 issues.push(issue);
144 }
145 }
146
147 fn check_script_info_encoding(
149 &self,
150 issues: &mut Vec<LintIssue>,
151 script: &crate::parser::Script,
152 ) {
153 if let Some(Section::ScriptInfo(info)) = script
154 .sections()
155 .iter()
156 .find(|s| matches!(s, Section::ScriptInfo(_)))
157 {
158 for (key, value) in &info.fields {
159 if value
160 .chars()
161 .any(|c| c.is_control() && c != '\n' && c != '\r')
162 {
163 let issue = LintIssue::new(
164 self.default_severity(),
165 IssueCategory::Encoding,
166 self.id(),
167 format!("Script info field '{key}' contains control characters"),
168 );
169 issues.push(issue);
170 }
171 }
172 }
173 }
174}
175
176#[cfg(test)]
177mod tests {
178 use super::*;
179
180 #[test]
181 fn rule_metadata_correct() {
182 let rule = EncodingRule;
183 assert_eq!(rule.id(), "encoding");
184 assert_eq!(rule.name(), "Encoding");
185 assert_eq!(
186 rule.description(),
187 "Detects potential encoding or character issues"
188 );
189 assert_eq!(rule.default_severity(), IssueSeverity::Warning);
190 assert_eq!(rule.category(), IssueCategory::Encoding);
191 }
192
193 #[test]
194 fn empty_script_no_issues() {
195 let script_text = "[Script Info]\nTitle: Test";
196 let script = crate::parser::Script::parse(script_text).unwrap();
197 let analysis = ScriptAnalysis::analyze(&script).unwrap();
198
199 let rule = EncodingRule;
200 let issues = rule.check_script(&analysis);
201
202 assert!(issues.is_empty());
203 }
204
205 #[test]
206 fn valid_text_no_issues() {
207 let script_text = r"[Events]
208Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
209Dialogue: 0,0:00:00.00,0:00:05.00,Default,,0,0,0,,Valid text with unicode: ñáéíóú";
210
211 let script = crate::parser::Script::parse(script_text).unwrap();
212 let analysis = ScriptAnalysis::analyze(&script).unwrap();
213 let rule = EncodingRule;
214 let issues = rule.check_script(&analysis);
215
216 assert!(issues.is_empty());
217 }
218
219 #[test]
220 fn newlines_allowed() {
221 let script_text = r"[Events]
222Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
223Dialogue: 0,0:00:00.00,0:00:05.00,Default,,0,0,0,,Text with\Nline break";
224
225 let script = crate::parser::Script::parse(script_text).unwrap();
226 let rule = EncodingRule;
227 let analysis = ScriptAnalysis::analyze(&script).unwrap();
228 let issues = rule.check_script(&analysis);
229
230 assert!(issues.is_empty());
231 }
232
233 #[test]
234 fn tabs_allowed() {
235 let script_text = "[Events]\nFormat: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text\nDialogue: 0,0:00:00.00,0:00:05.00,Default,,0,0,0,,Text with\ttab";
236
237 let script = crate::parser::Script::parse(script_text).unwrap();
238 let rule = EncodingRule;
239 let analysis = ScriptAnalysis::analyze(&script).unwrap();
240 let issues = rule.check_script(&analysis);
241
242 assert!(issues.is_empty());
243 }
244
245 #[test]
246 fn replacement_character_detected() {
247 let script_text = r"[Events]
248Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
249Dialogue: 0,0:00:00.00,0:00:05.00,Default,,0,0,0,,Text with � replacement";
250
251 let script = crate::parser::Script::parse(script_text).unwrap();
252 let rule = EncodingRule;
253 let analysis = ScriptAnalysis::analyze(&script).unwrap();
254 let issues = rule.check_script(&analysis);
255
256 assert!(!issues.is_empty());
257 assert!(issues
258 .iter()
259 .any(|issue| issue.message().contains("replacement character")));
260 }
261
262 #[test]
263 fn control_character_in_script_info() {
264 let script_text = "[Script Info]\nTitle: Test\x00\n\n[Events]\nFormat: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text";
265
266 let script = crate::parser::Script::parse(script_text).unwrap();
267 let rule = EncodingRule;
268 let analysis = ScriptAnalysis::analyze(&script).unwrap();
269 let issues = rule.check_script(&analysis);
270
271 assert!(!issues.is_empty());
272 assert!(issues
273 .iter()
274 .any(|issue| issue.message().contains("control characters")));
275 }
276
277 #[test]
278 fn no_events_section_no_issues() {
279 let script_text = r"[Script Info]
280Title: Test
281
282[V4+ Styles]
283Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding
284Style: Default,Arial,20,&H00FFFFFF&,&H000000FF&,&H00000000&,&H00000000&,0,0,0,0,100,100,0,0,1,2,0,2,10,10,10,1";
285
286 let script = crate::parser::Script::parse(script_text).unwrap();
287 let rule = EncodingRule;
288 let analysis = ScriptAnalysis::analyze(&script).unwrap();
289 let issues = rule.check_script(&analysis);
290
291 assert!(issues.is_empty());
292 }
293
294 #[test]
295 fn multibyte_characters_hint() {
296 let heavy_unicode = "🎵🎶🎵🎶".repeat(20);
297 let script_text = format!(
298 r"[Events]
299Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
300Dialogue: 0,0:00:00.00,0:00:05.00,Default,,0,0,0,,{heavy_unicode}"
301 );
302
303 let script = crate::parser::Script::parse(&script_text).unwrap();
304 let rule = EncodingRule;
305 let analysis = ScriptAnalysis::analyze(&script).unwrap();
306 let issues = rule.check_script(&analysis);
307
308 assert!(issues
309 .iter()
310 .any(|issue| issue.message().contains("multi-byte characters")));
311 }
312
313 #[test]
314 fn control_character_in_event_detected() {
315 let script_text = "[Events]\nFormat: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text\nDialogue: 0,0:00:00.00,0:00:05.00,Default,,0,0,0,,Text with\x00control char";
317
318 let script = crate::parser::Script::parse(script_text).unwrap();
319 let rule = EncodingRule;
320 let analysis = ScriptAnalysis::analyze(&script).unwrap();
321 let issues = rule.check_script(&analysis);
322
323 assert!(!issues.is_empty());
324 assert!(issues
325 .iter()
326 .any(|issue| issue.message().contains("non-printable control characters")));
327
328 let control_issue = issues
330 .iter()
331 .find(|issue| issue.message().contains("non-printable control characters"))
332 .unwrap();
333 assert!(control_issue.description().is_some());
334 assert!(control_issue.suggested_fix().is_some());
335 }
336}