ass_core/analysis/linting/rules/encoding/
rule.rs1use crate::{
7 analysis::{
8 linting::{IssueCategory, IssueSeverity, LintIssue, LintRule},
9 ScriptAnalysis,
10 },
11 parser::Section,
12};
13use alloc::{format, string::ToString, vec::Vec};
14
15pub struct EncodingRule;
50
51impl LintRule for EncodingRule {
52 fn id(&self) -> &'static str {
53 "encoding"
54 }
55
56 fn name(&self) -> &'static str {
57 "Encoding"
58 }
59
60 fn description(&self) -> &'static str {
61 "Detects potential encoding or character issues"
62 }
63
64 fn default_severity(&self) -> IssueSeverity {
65 IssueSeverity::Warning
66 }
67
68 fn category(&self) -> IssueCategory {
69 IssueCategory::Encoding
70 }
71
72 fn check_script(&self, analysis: &ScriptAnalysis) -> Vec<LintIssue> {
73 let mut issues = Vec::new();
74
75 if let Some(Section::Events(events)) = analysis
76 .script()
77 .sections()
78 .iter()
79 .find(|s| matches!(s, Section::Events(_)))
80 {
81 for event in events {
82 self.check_event_encoding(&mut issues, event);
83 }
84 }
85
86 self.check_script_info_encoding(&mut issues, analysis.script());
87
88 issues
89 }
90}
91
92impl EncodingRule {
93 fn check_event_encoding(&self, issues: &mut Vec<LintIssue>, event: &crate::parser::Event) {
95 if event
96 .text
97 .chars()
98 .any(|c| c.is_control() && c != '\n' && c != '\r' && c != '\t')
99 {
100 let issue = LintIssue::new(
101 self.default_severity(),
102 IssueCategory::Encoding,
103 self.id(),
104 "Event contains non-printable control characters".to_string(),
105 )
106 .with_description(
107 "Control characters may cause display issues in subtitle renderers".to_string(),
108 )
109 .with_suggested_fix(
110 "Remove or replace control characters with appropriate text".to_string(),
111 );
112 issues.push(issue);
113 }
114
115 if event.text.contains('\u{FFFD}') {
116 let issue = LintIssue::new(
117 self.default_severity(),
118 IssueCategory::Encoding,
119 self.id(),
120 "Event contains Unicode replacement character (�)".to_string(),
121 )
122 .with_description(
123 "Replacement characters indicate corrupted or invalid encoding".to_string(),
124 )
125 .with_suggested_fix("Check source file encoding and re-import".to_string());
126 issues.push(issue);
127 }
128
129 let char_count = event.text.chars().count();
130 let byte_count = event.text.len();
131
132 if char_count > 0 && byte_count > char_count * 3 {
134 let issue = LintIssue::new(
135 IssueSeverity::Hint,
136 IssueCategory::Encoding,
137 self.id(),
138 "Event contains many multi-byte characters".to_string(),
139 )
140 .with_description(
141 "Heavy use of multi-byte characters may impact performance".to_string(),
142 );
143 issues.push(issue);
144 }
145 }
146
147 fn check_script_info_encoding(
149 &self,
150 issues: &mut Vec<LintIssue>,
151 script: &crate::parser::Script,
152 ) {
153 if let Some(Section::ScriptInfo(info)) = script
154 .sections()
155 .iter()
156 .find(|s| matches!(s, Section::ScriptInfo(_)))
157 {
158 for (key, value) in &info.fields {
159 if value
160 .chars()
161 .any(|c| c.is_control() && c != '\n' && c != '\r')
162 {
163 let issue = LintIssue::new(
164 self.default_severity(),
165 IssueCategory::Encoding,
166 self.id(),
167 format!("Script info field '{key}' contains control characters"),
168 );
169 issues.push(issue);
170 }
171 }
172 }
173 }
174}