html-linter 0.1.1

An HTML linting library for checking HTML structure and semantics
Documentation
use crate::*;
use markup5ever_rcdom::NodeData;
use regex::Regex;
use serde::Deserialize;

#[derive(Debug, Clone, Deserialize)]
struct MetaTagRule {
    #[serde(default)]
    name: Option<String>,
    #[serde(default)]
    property: Option<String>,
    pattern: PatternRule,
    #[serde(default = "default_required")]
    required: bool,
}

#[derive(Debug, Clone, Deserialize)]
#[serde(tag = "type")]
enum PatternRule {
    #[serde(rename = "MinLength")]
    MinLength { value: usize },
    #[serde(rename = "LengthRange")]
    LengthRange { min: usize, max: usize },
    #[serde(rename = "OneOf")]
    OneOf { value: Vec<String> },
    #[serde(rename = "NonEmpty")]
    NonEmpty,
    #[serde(rename = "Exact")]
    Exact { value: String },
    #[serde(rename = "Regex")]
    Regex { value: String },
}

fn default_required() -> bool {
    false
}

impl MetaTagRule {
    fn _matches_element(&self, element: &NodeData) -> bool {
        if let NodeData::Element { attrs, .. } = element {
            let attrs = attrs.borrow();
            if let Some(name) = &self.name {
                if attrs
                    .iter()
                    .any(|attr| attr.name.local.as_ref() == "name" && attr.value.as_ref() == name)
                {
                    return true;
                }
            }

            if let Some(property) = &self.property {
                if attrs.iter().any(|attr| {
                    attr.name.local.as_ref() == "property" && attr.value.as_ref() == property
                }) {
                    return true;
                }
            }
        }
        false
    }

    fn _validate_content(&self, content: &str) -> bool {
        match &self.pattern {
            PatternRule::MinLength { value } => content.len() >= *value,
            PatternRule::OneOf { value } => value.contains(&content.to_string()),
            PatternRule::NonEmpty => !content.is_empty(),
            PatternRule::Exact { value } => content == value,
            PatternRule::LengthRange { min, max } => content.len() >= *min && content.len() <= *max,
            PatternRule::Regex { value } => {
                let regex = Regex::new(value).unwrap();
                regex.is_match(content)
            }
        }
    }
}

impl HtmlLinter {
    pub(crate) fn check_text_content(
        &self,
        rule: &Rule,
        index: &DOMIndex,
    ) -> Result<Vec<LintResult>, LinterError> {
        let mut results = Vec::new();
        let matches = index.query(&rule.selector);

        match rule.condition.as_str() {
            "max-length" => {
                let max_length = rule
                    .options
                    .get("max_length")
                    .and_then(|v| v.parse().ok())
                    .unwrap_or(80);

                for node_idx in matches {
                    if let Some(node) = index.get_node(node_idx) {
                        let text = dom::utils::get_node_text_content(node_idx, index);
                        if text.len() > max_length {
                            results.push(self.create_lint_result(rule, node, index));
                        }
                    }
                }
            }
            "content-length" => {
                let min_length = rule
                    .options
                    .get("min_length")
                    .and_then(|v| v.parse().ok())
                    .unwrap_or(0);
                let max_length = rule
                    .options
                    .get("max_length")
                    .and_then(|v| v.parse().ok())
                    .unwrap_or(usize::MAX);

                for node_idx in matches {
                    if let Some(node) = index.get_node(node_idx) {
                        let text = dom::utils::get_node_text_content(node_idx, index);
                        if text.len() < min_length || text.len() > max_length {
                            results.push(self.create_lint_result(rule, node, index));
                        }
                    }
                }
            }
            _ => {
                if let Some(pattern) = rule.options.get("pattern") {
                    let regex =
                        Regex::new(pattern).map_err(|e| LinterError::RuleError(e.to_string()))?;

                    for node_idx in matches {
                        if let Some(node) = index.get_node(node_idx) {
                            let mut text = String::new();
                            dom::utils::extract_text(node.handle.as_ref().unwrap(), &mut text);
                            let check_mode = rule
                                .options
                                .get("check_mode")
                                .map(String::as_str)
                                .unwrap_or("normal");

                            let matches = regex.is_match(&text);
                            let should_report = match check_mode {
                                "ensure_existence" => !matches,
                                "ensure_nonexistence" => matches,
                                _ => matches,
                            };

                            if should_report {
                                results.push(self.create_lint_result(rule, node, index));
                            }
                        }
                    }
                }
            }
        }

        Ok(results)
    }

    pub(crate) fn check_element_content(
        &self,
        rule: &Rule,
        index: &DOMIndex,
    ) -> Result<Vec<LintResult>, LinterError> {
        let mut results = Vec::new();
        let matches = index.query(&rule.selector);

        for node_idx in matches {
            if let Some(node) = index.get_node(node_idx) {
                let should_report = match rule.condition.as_str() {
                    "meta-tags" => {
                        if let Some(required_tags) = rule.options.get("required_meta_tags") {
                            let meta_rules: Vec<MetaTagRule> = serde_json::from_str(required_tags)
                                .map_err(|e| LinterError::RuleError(e.to_string()))?;
                            !self.validate_meta_tags(node_idx, &meta_rules, index)?
                        } else {
                            false
                        }
                    }
                    "empty-or-default" => {
                        let content = dom::utils::get_node_text_content(node_idx, index);
                        content.is_empty()
                            || content.trim() == "Untitled"
                            || content.trim() == "Default"
                    }
                    _ => false,
                };

                if should_report {
                    results.push(self.create_lint_result(rule, node, index));
                }
            }
        }

        Ok(results)
    }

    pub(crate) fn check_whitespace(
        &self,
        rule: &Rule,
        index: &DOMIndex,
    ) -> Result<Vec<LintResult>, LinterError> {
        let mut results = Vec::new();

        match rule.condition.as_str() {
            "trailing-whitespace" => {
                let matches = index.query(&rule.selector);
                for node_idx in matches {
                    if let Some(node) = index.get_node(node_idx) {
                        let lines = node.source_info.source.lines();
                        for (i, line) in lines.enumerate() {
                            if line.trim_end().len() != line.len() {
                                results.push(LintResult {
                                    rule: rule.name.clone(),
                                    severity: rule.severity.clone(),
                                    message: "Line contains trailing whitespace".to_string(),
                                    location: Location {
                                        line: node.source_info.line + i,
                                        column: line.trim_end().len() + 1,
                                        element: index
                                            .resolve_symbol(node.tag_name)
                                            .unwrap_or_default()
                                            .to_string(),
                                    },
                                    source: line.to_string(),
                                });
                            }
                        }
                    }
                }
            }
            _ => {}
        }

        Ok(results)
    }

    fn validate_meta_tags(
        &self,
        node_idx: usize,
        meta_rules: &[MetaTagRule],
        index: &DOMIndex,
    ) -> Result<bool, LinterError> {
        if let Some(_node) = index.get_node(node_idx) {
            for rule in meta_rules {
                let meta_nodes = index.query("meta");
                let mut found_valid_tag = false;

                for meta_node_idx in meta_nodes {
                    if let Some(meta_node) = index.get_node(meta_node_idx) {
                        let matches_identifier = meta_node.attributes.iter().any(|attr| {
                            let attr_name = index.resolve_symbol(attr.name).unwrap_or_default();
                            let attr_value = index.resolve_symbol(attr.value).unwrap_or_default();

                            match (&rule.name, &rule.property) {
                                (Some(name), _) => {
                                    attr_name == "name" && attr_value == name.to_string()
                                }
                                (_, Some(property)) => {
                                    attr_name == "property" && attr_value == property.to_string()
                                }
                                (None, None) => false,
                            }
                        });

                        if matches_identifier {
                            let content_valid = meta_node.attributes.iter().any(|attr| {
                                let name = index.resolve_symbol(attr.name).unwrap_or_default();
                                let value = index.resolve_symbol(attr.value).unwrap_or_default();
                                if name == "content" {
                                    let content = value.as_str();
                                    match &rule.pattern {
                                        PatternRule::MinLength { value: min_len } => {
                                            content.len() >= *min_len
                                        }
                                        PatternRule::LengthRange { min, max } => {
                                            content.len() >= *min && content.len() <= *max
                                        }
                                        PatternRule::OneOf { value } => {
                                            value.contains(&content.to_string())
                                        }
                                        PatternRule::NonEmpty => !content.trim().is_empty(),
                                        PatternRule::Exact { value: exact } => content == *exact,
                                        PatternRule::Regex { value: regex } => {
                                            let regex = Regex::new(regex).unwrap();
                                            regex.is_match(content)
                                        }
                                    }
                                } else {
                                    false
                                }
                            });

                            if content_valid {
                                found_valid_tag = true;
                                break;
                            }
                        }
                    }
                }

                if !found_valid_tag && rule.required {
                    return Ok(false);
                }
            }
            Ok(true)
        } else {
            Ok(false)
        }
    }
}