normalize-syntax-rules 0.3.1

Syntax-based linting rules with tree-sitter queries
Documentation
//! Rule loading from multiple sources.
//!
//! Rules are loaded in this order (later overrides earlier by `id`):
//! 1. Embedded builtins (compiled into normalize)
//! 2. User global rules (`~/.config/normalize/rules/*.scm`)
//! 3. Project rules (`.normalize/rules/*.scm`)

use crate::builtin::BUILTIN_RULES;
use crate::{Rule, Severity};
use glob::Pattern;
use std::collections::HashMap;
use std::path::{Path, PathBuf};

pub use normalize_rules_config::{RuleOverride, RulesConfig};

/// Load all rules from all sources, merged by ID.
/// Order: builtins → ~/.config/normalize/rules/ → .normalize/rules/
/// Then applies config overrides (severity, disable).
pub fn load_all_rules(project_root: &Path, config: &RulesConfig) -> Vec<Rule> {
    let mut rules_by_id: HashMap<String, Rule> = HashMap::new();

    // 1. Load embedded builtins
    for builtin in BUILTIN_RULES {
        if let Some(rule) = parse_rule_content(builtin.content, builtin.id, true) {
            rules_by_id.insert(rule.id.clone(), rule);
        }
    }

    // 2. Load user global rules (~/.config/normalize/rules/)
    if let Some(config_dir) = dirs::config_dir() {
        let user_rules_dir = config_dir.join("normalize").join("rules");
        for rule in load_rules_from_dir(&user_rules_dir) {
            rules_by_id.insert(rule.id.clone(), rule);
        }
    }

    // 3. Load project rules (.normalize/rules/)
    let project_rules_dir = project_root.join(".normalize").join("rules");
    for rule in load_rules_from_dir(&project_rules_dir) {
        rules_by_id.insert(rule.id.clone(), rule);
    }

    // 4. Apply config overrides
    for (rule_id, override_cfg) in &config.rules {
        if let Some(rule) = rules_by_id.get_mut(rule_id) {
            if let Some(ref severity_str) = override_cfg.severity
                && let Ok(severity) = severity_str.parse()
            {
                rule.severity = severity;
            }
            if let Some(enabled) = override_cfg.enabled {
                rule.enabled = enabled;
            }
            // Merge additional allow patterns from config
            for pattern_str in &override_cfg.allow {
                if let Ok(pattern) = Pattern::new(pattern_str) {
                    rule.allow.push(pattern);
                }
            }
            // Append additional tags from config (additive, does not replace)
            for tag in &override_cfg.tags {
                if !rule.tags.contains(tag) {
                    rule.tags.push(tag.clone());
                }
            }
        }
    }

    // 5. Apply global allow patterns to every rule
    let global_patterns: Vec<Pattern> = config
        .global_allow
        .iter()
        .filter_map(|s| Pattern::new(s).ok())
        .collect();
    if !global_patterns.is_empty() {
        for rule in rules_by_id.values_mut() {
            rule.allow.extend_from_slice(&global_patterns);
        }
    }

    rules_by_id.into_values().collect()
}

/// Load rules from a directory.
fn load_rules_from_dir(rules_dir: &Path) -> Vec<Rule> {
    let mut rules = Vec::new();

    if !rules_dir.exists() {
        return rules;
    }

    let entries = match std::fs::read_dir(rules_dir) {
        Ok(e) => e,
        Err(_) => return rules,
    };

    for entry in entries.flatten() {
        let path = entry.path();
        if path.extension().is_some_and(|e| e == "scm")
            && let Some(rule) = parse_rule_file(&path)
        {
            rules.push(rule);
        }
    }

    rules
}

/// Parse a rule file with TOML frontmatter.
fn parse_rule_file(path: &Path) -> Option<Rule> {
    let content = std::fs::read_to_string(path).ok()?;
    let default_id = path
        .file_stem()
        .and_then(|s| s.to_str())
        .unwrap_or("unknown");

    let mut rule = parse_rule_content(&content, default_id, false)?;
    rule.source_path = path.to_path_buf();
    Some(rule)
}

/// Parse rule content string with TOML frontmatter.
pub fn parse_rule_content(content: &str, default_id: &str, is_builtin: bool) -> Option<Rule> {
    let lines: Vec<&str> = content.lines().collect();

    let mut in_frontmatter = false;
    let mut frontmatter_done = false;
    let mut frontmatter_lines = Vec::new();
    let mut doc_lines = Vec::new();
    let mut query_lines = Vec::new();

    for line in &lines {
        let trimmed = line.trim();
        if trimmed == "# ---" {
            if in_frontmatter {
                frontmatter_done = true;
            }
            in_frontmatter = !in_frontmatter;
            continue;
        }

        if in_frontmatter {
            let fm_line = line.strip_prefix('#').unwrap_or(line).trim_start();
            frontmatter_lines.push(fm_line);
        } else if frontmatter_done && query_lines.is_empty() && trimmed.starts_with('#') {
            // Doc block: comment lines after frontmatter, before query
            let doc_line = line.strip_prefix('#').unwrap_or("").trim_start_matches(' ');
            doc_lines.push(doc_line);
        } else if !frontmatter_lines.is_empty()
            || (frontmatter_lines.is_empty() && !trimmed.is_empty() && !trimmed.starts_with('#'))
        {
            query_lines.push(*line);
        }
    }

    let (frontmatter_str, query_str) = if frontmatter_lines.is_empty() {
        (String::new(), content.to_string())
    } else {
        (frontmatter_lines.join("\n"), query_lines.join("\n"))
    };

    let doc = if doc_lines.is_empty() {
        None
    } else {
        let text = doc_lines.join("\n").trim().to_string();
        if text.is_empty() { None } else { Some(text) }
    };

    let frontmatter: toml::Value = if frontmatter_str.is_empty() {
        toml::Value::Table(toml::map::Map::new())
    } else {
        match toml::from_str(&frontmatter_str) {
            Ok(v) => v,
            Err(e) => {
                eprintln!("Warning: invalid frontmatter: {}", e);
                return None;
            }
        }
    };

    let id = frontmatter
        .get("id")
        .and_then(|v| v.as_str())
        .map(|s| s.to_string())
        .unwrap_or_else(|| default_id.to_string());

    let severity = frontmatter
        .get("severity")
        .and_then(|v| v.as_str())
        .and_then(|s| s.parse().ok())
        .unwrap_or(Severity::Warning);

    let message = frontmatter
        .get("message")
        .and_then(|v| v.as_str())
        .unwrap_or("Rule violation")
        .to_string();

    let allow: Vec<Pattern> = frontmatter
        .get("allow")
        .and_then(|v| v.as_array())
        .map(|arr| {
            arr.iter()
                .filter_map(|v| v.as_str())
                .filter_map(|s| Pattern::new(s).ok())
                .collect()
        })
        .unwrap_or_default();

    let files: Vec<Pattern> = frontmatter
        .get("files")
        .and_then(|v| v.as_array())
        .map(|arr| {
            arr.iter()
                .filter_map(|v| v.as_str())
                .filter_map(|s| Pattern::new(s).ok())
                .collect()
        })
        .unwrap_or_default();

    let languages: Vec<String> = frontmatter
        .get("languages")
        .and_then(|v| v.as_array())
        .map(|arr| {
            arr.iter()
                .filter_map(|v| v.as_str())
                .map(|s| s.to_string())
                .collect()
        })
        .unwrap_or_default();

    let enabled = frontmatter
        .get("enabled")
        .and_then(|v| v.as_bool())
        .unwrap_or(true);

    let requires: HashMap<String, String> = frontmatter
        .get("requires")
        .and_then(|v| v.as_table())
        .map(|tbl| {
            tbl.iter()
                .filter_map(|(k, v)| v.as_str().map(|s| (k.clone(), s.to_string())))
                .collect()
        })
        .unwrap_or_default();

    let fix = frontmatter
        .get("fix")
        .and_then(|v| v.as_str())
        .map(|s| s.to_string());

    let tags: Vec<String> = frontmatter
        .get("tags")
        .and_then(|v| v.as_array())
        .map(|arr| {
            arr.iter()
                .filter_map(|v| v.as_str())
                .map(|s| s.to_string())
                .collect()
        })
        .unwrap_or_default();

    let recommended = frontmatter
        .get("recommended")
        .and_then(|v| v.as_bool())
        .unwrap_or(false);

    let applies_in_tests = frontmatter
        .get("applies_in_tests")
        .and_then(|v| v.as_bool())
        .unwrap_or(false);

    Some(Rule {
        id,
        query_str: query_str.trim().to_string(),
        severity,
        message,
        allow,
        files,
        source_path: PathBuf::new(),
        languages,
        enabled,
        builtin: is_builtin,
        requires,
        fix,
        tags,
        doc,
        recommended,
        applies_in_tests,
    })
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_rules_config_toml_deserialization() {
        // Keys with '/' must be quoted in TOML table headers.
        // In normalize.toml this appears as [rules.rule."rust/foo"].
        let toml_str = r#"
global-allow = ["**/tests/fixtures/**", "**/test/**"]

[rule."rust/foo"]
severity = "error"
enabled = true
allow = ["some/path/**"]

[rule."rust/bar"]
severity = "warning"
"#;
        let config: RulesConfig = toml::from_str(toml_str).expect("failed to parse RulesConfig");
        assert_eq!(
            config.global_allow,
            vec!["**/tests/fixtures/**", "**/test/**"]
        );
        assert!(config.rules.contains_key("rust/foo"));
        assert!(config.rules.contains_key("rust/bar"));
        assert_eq!(config.rules["rust/foo"].severity.as_deref(), Some("error"));
        assert_eq!(
            config.rules["rust/bar"].severity.as_deref(),
            Some("warning")
        );
    }

    #[test]
    fn test_rules_config_empty_global_allow() {
        let toml_str = r#"
[rule."rust/baz"]
enabled = false
"#;
        let config: RulesConfig = toml::from_str(toml_str).expect("failed to parse RulesConfig");
        assert!(config.global_allow.is_empty());
        assert!(config.rules.contains_key("rust/baz"));
    }
}