1use html5ever::driver::ParseOpts;
2use html5ever::parse_document;
3use html5ever::tendril::TendrilSink;
4use markup5ever_rcdom::RcDom;
5use regex::Regex;
6use serde::{Deserialize, Serialize};
7use std::collections::HashMap;
8use thiserror::Error;
9
10mod checks;
11mod dom;
12
13use dom::{DOMIndex, IndexedNode};
14
15#[derive(Error, Debug)]
16pub enum LinterError {
17 #[error("Parse error: {0}")]
18 ParseError(String),
19 #[error("Rule error: {0}")]
20 RuleError(String),
21 #[error("Invalid selector: {0}")]
22 SelectorError(String),
23 #[error("IO error: {0}")]
24 IoError(#[from] std::io::Error),
25}
26
27#[derive(Debug, Serialize, Deserialize, Clone)]
28pub enum RuleType {
29 ElementPresence,
30 AttributePresence,
31 AttributeValue,
32 ElementOrder,
33 TextContent,
34 ElementContent,
35 WhiteSpace,
36 Nesting,
37 Semantics,
38 Compound,
39 Custom(String),
40 DocumentStructure,
41 ElementCount,
42 ElementCase,
43 AttributeQuotes,
44}
45
46#[derive(Debug, Serialize, Deserialize, Clone)]
47pub struct Rule {
48 pub name: String,
49 pub rule_type: RuleType,
50 pub severity: Severity,
51 pub selector: String, pub condition: String, pub message: String, #[serde(default)]
55 pub options: HashMap<String, String>, }
57
58#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
59pub enum Severity {
60 Error,
61 Warning,
62 Info,
63}
64
65#[derive(Debug, Clone)]
66pub struct LintResult {
67 pub rule: String,
68 pub severity: Severity,
69 pub message: String,
70 pub location: Location,
71 pub source: String,
72}
73
74#[derive(Debug, Clone)]
75pub struct Location {
76 pub line: usize,
77 pub column: usize,
78 pub element: String,
79}
80
81#[derive(Debug, Serialize, Deserialize, Clone, Default)]
82pub struct LinterOptions {
83 pub ignore_files: Vec<String>,
84 pub custom_selectors: HashMap<String, String>,
85 pub max_line_length: Option<usize>,
86 pub allow_inline_styles: bool,
87}
88
89#[derive(Debug, Serialize, Deserialize)]
90struct MetaTagRule {
91 name: Option<String>, property: Option<String>, pattern: MetaTagPattern, required: bool, }
96
97#[derive(Debug, Serialize, Deserialize)]
98#[serde(tag = "type", content = "value")]
99enum MetaTagPattern {
100 Regex(String), MinLength(usize), MaxLength(usize), NonEmpty, Exact(String), OneOf(Vec<String>), Contains(String), StartsWith(String), EndsWith(String), }
110
111#[derive(Debug, Serialize, Deserialize)]
112#[serde(tag = "type")]
113pub enum CompoundCondition {
114 TextContent {
115 pattern: String,
116 },
117 AttributeValue {
118 attribute: String,
119 pattern: String,
120 },
121 AttributeReference {
122 attribute: String,
123 reference_must_exist: bool,
124 },
125 ElementPresence {
126 selector: String,
127 },
128}
129
130pub struct HtmlLinter {
131 pub(crate) rules: Vec<Rule>,
132 options: LinterOptions,
133}
134
135impl HtmlLinter {
136 pub fn new(rules: Vec<Rule>, options: Option<LinterOptions>) -> Self {
137 Self {
138 rules,
139 options: options.unwrap_or_default(),
140 }
141 }
142
143 pub fn lint(&self, html: &str) -> Result<Vec<LintResult>, LinterError> {
144 let dom = parse_document(RcDom::default(), ParseOpts::default())
145 .from_utf8()
146 .read_from(&mut html.as_bytes())
147 .map_err(|e| LinterError::ParseError(e.to_string()))?;
148
149 let index = DOMIndex::new(&dom, html);
150 let mut results = Vec::new();
151
152 for rule in &self.rules {
154 if !self.should_ignore_rule(&rule.name) {
155 results.extend(self.process_rule(rule, &index)?);
156 }
157 }
158
159 Ok(results)
160 }
161
162 pub fn from_json(json: &str, options: Option<LinterOptions>) -> Result<Self, LinterError> {
163 let rules: Vec<Rule> = serde_json::from_str(json)
164 .map_err(|e| LinterError::ParseError(format!("Failed to parse rules JSON: {}", e)))?;
165 Ok(Self::new(rules, options))
166 }
167
168 pub fn from_json_file(path: &str, options: Option<LinterOptions>) -> Result<Self, LinterError> {
169 let content = std::fs::read_to_string(path)?;
170 Self::from_json(&content, options)
171 }
172
173 fn should_ignore_rule(&self, rule_name: &str) -> bool {
174 self.options.ignore_files.iter().any(|pattern| {
175 if let Ok(regex) = Regex::new(pattern) {
176 regex.is_match(rule_name)
177 } else {
178 pattern == rule_name
179 }
180 })
181 }
182
183 fn process_rule(&self, rule: &Rule, index: &DOMIndex) -> Result<Vec<LintResult>, LinterError> {
184 match rule.rule_type {
185 RuleType::ElementPresence => self.check_element_presence(rule, index),
186 RuleType::AttributePresence => self.check_attribute_presence(rule, index),
187 RuleType::AttributeValue => self.check_attribute_value(rule, index),
188 RuleType::ElementOrder => self.check_element_order(rule, index),
189 RuleType::TextContent => self.check_text_content(rule, index),
190 RuleType::ElementContent => self.check_element_content(rule, index),
191 RuleType::WhiteSpace => self.check_whitespace(rule, index),
192 RuleType::Nesting => self.check_nesting(rule, index),
193 RuleType::Semantics => self.check_semantics(rule, index),
194 RuleType::Compound => self.check_compound(rule, index),
195 RuleType::Custom(ref validator) => self.check_custom(rule, validator, index),
196 RuleType::DocumentStructure => self.check_document_structure(rule, index),
197 RuleType::ElementCount => self.check_element_count(rule, index),
198 RuleType::ElementCase => self.check_element_case(rule, index),
199 RuleType::AttributeQuotes => self.check_attribute_quotes(rule, index),
200 }
201 }
202
203 fn create_lint_result(&self, rule: &Rule, node: &IndexedNode, index: &DOMIndex) -> LintResult {
204 LintResult {
205 rule: rule.name.clone(),
206 severity: rule.severity.clone(),
207 message: rule.message.clone(),
208 location: Location {
209 line: node.source_info.line,
210 column: node.source_info.column,
211 element: index
212 .resolve_symbol(node.tag_name)
213 .unwrap_or_default()
214 .to_string(),
215 },
216 source: node.source_info.source.clone(),
217 }
218 }
219
220 pub fn get_rules(&self) -> Vec<Rule> {
221 self.rules.clone()
222 }
223}
224
225#[cfg(test)]
226mod tests {
227 use super::*;
228
229 #[test]
230 fn test_basic_linting() {
231 let rules = vec![Rule {
232 name: "img-alt".to_string(),
233 rule_type: RuleType::AttributePresence,
234 severity: Severity::Error,
235 selector: "img".to_string(),
236 condition: "alt-missing".to_string(),
237 message: "Image must have alt attribute".to_string(),
238 options: HashMap::new(),
239 }];
240
241 let linter = HtmlLinter::new(rules, None);
242 let html = r#"<img src="test.jpg">"#;
243 let results = linter.lint(html).unwrap();
244 assert_eq!(results.len(), 1);
245 assert_eq!(results[0].severity, Severity::Error);
246 }
247
248 #[test]
249 fn test_compound_rule() {
250 }
252}