Skip to main content

rumdl_lib/config/
registry.rs

1use std::sync::LazyLock;
2
3use crate::rule::Rule;
4
5use super::flavor::normalize_key;
6
7/// Lazily-initialized default `RuleRegistry` built from rules with default config.
8///
9/// Rule config schemas (valid keys, types, aliases) are intrinsic to each rule type
10/// and do not change based on runtime configuration. This static registry avoids
11/// repeatedly constructing 67+ rule instances just to extract their schemas.
12static DEFAULT_REGISTRY: LazyLock<RuleRegistry> = LazyLock::new(|| {
13    let default_config = super::types::Config::default();
14    let rules = crate::rules::all_rules(&default_config);
15    RuleRegistry::from_rules(&rules)
16});
17
18/// Returns a reference to the lazily-initialized default `RuleRegistry`.
19///
20/// Use this instead of `all_rules(&Config::default())` + `RuleRegistry::from_rules()`
21/// when you only need rule metadata (names, config schemas, aliases) rather than
22/// configured rule instances for linting.
23pub fn default_registry() -> &'static RuleRegistry {
24    &DEFAULT_REGISTRY
25}
26
27/// Registry of all known rules and their config schemas
28pub struct RuleRegistry {
29    /// Map of rule name (e.g. "MD013") to set of valid config keys and their TOML value types
30    pub rule_schemas: std::collections::BTreeMap<String, toml::map::Map<String, toml::Value>>,
31    /// Map of rule name to config key aliases
32    pub rule_aliases: std::collections::BTreeMap<String, std::collections::HashMap<String, String>>,
33}
34
35impl RuleRegistry {
36    /// Build a registry from a list of rules
37    pub fn from_rules(rules: &[Box<dyn Rule>]) -> Self {
38        let mut rule_schemas = std::collections::BTreeMap::new();
39        let mut rule_aliases = std::collections::BTreeMap::new();
40
41        for rule in rules {
42            let norm_name = if let Some((name, toml::Value::Table(mut table))) = rule.default_config_section() {
43                let norm_name = normalize_key(&name); // Normalize the name from default_config_section
44                // Overwrite polymorphic keys with the sentinel so the validator skips
45                // type checking for fields whose deserializer accepts multiple TOML
46                // types. The clean default is preserved for `rumdl config --defaults`
47                // because that path calls `default_config_section()` directly.
48                for key in rule.polymorphic_config_keys() {
49                    table.insert(
50                        (*key).to_string(),
51                        crate::rule_config_serde::polymorphic_sentinel_value(),
52                    );
53                }
54                rule_schemas.insert(norm_name.clone(), table);
55                norm_name
56            } else {
57                let norm_name = normalize_key(rule.name()); // Normalize the name from rule.name()
58                rule_schemas.insert(norm_name.clone(), toml::map::Map::new());
59                norm_name
60            };
61
62            // Store aliases if the rule provides them
63            if let Some(aliases) = rule.config_aliases() {
64                rule_aliases.insert(norm_name, aliases);
65            }
66        }
67
68        RuleRegistry {
69            rule_schemas,
70            rule_aliases,
71        }
72    }
73
74    /// Get all known rule names
75    pub fn rule_names(&self) -> std::collections::BTreeSet<String> {
76        self.rule_schemas.keys().cloned().collect()
77    }
78
79    /// Get the valid configuration keys for a rule, including both original and normalized variants
80    pub fn config_keys_for(&self, rule: &str) -> Option<std::collections::BTreeSet<String>> {
81        self.rule_schemas.get(rule).map(|schema| {
82            let mut all_keys = std::collections::BTreeSet::new();
83
84            // Always allow 'severity' and 'enabled' for any rule
85            all_keys.insert("severity".to_string());
86            all_keys.insert("enabled".to_string());
87
88            // Add original keys from schema
89            for key in schema.keys() {
90                all_keys.insert(key.clone());
91            }
92
93            // Add normalized variants for markdownlint compatibility
94            for key in schema.keys() {
95                // Add kebab-case variant
96                all_keys.insert(key.replace('_', "-"));
97                // Add snake_case variant
98                all_keys.insert(key.replace('-', "_"));
99                // Add normalized variant
100                all_keys.insert(normalize_key(key));
101            }
102
103            // Add any aliases defined by the rule
104            if let Some(aliases) = self.rule_aliases.get(rule) {
105                for alias_key in aliases.keys() {
106                    all_keys.insert(alias_key.clone());
107                    // Also add normalized variants of the alias
108                    all_keys.insert(alias_key.replace('_', "-"));
109                    all_keys.insert(alias_key.replace('-', "_"));
110                    all_keys.insert(normalize_key(alias_key));
111                }
112            }
113
114            all_keys
115        })
116    }
117
118    /// Get the expected value type for a rule's configuration key, trying variants.
119    /// Returns `None` for sentinel values (nullable Option fields, polymorphic fields
120    /// that accept multiple TOML types), which signals the caller to skip type checking
121    /// for that key while still recognizing the key as valid.
122    pub fn expected_value_for(&self, rule: &str, key: &str) -> Option<&toml::Value> {
123        let schema = self.rule_schemas.get(rule)?;
124
125        // Check if this key is an alias
126        if let Some(aliases) = self.rule_aliases.get(rule)
127            && let Some(canonical_key) = aliases.get(key)
128            && let Some(value) = schema.get(canonical_key)
129        {
130            return filter_type_check_sentinels(value);
131        }
132
133        // Try the original key
134        if let Some(value) = schema.get(key) {
135            return filter_type_check_sentinels(value);
136        }
137
138        // Try key variants
139        let key_variants = [
140            key.replace('-', "_"), // Convert kebab-case to snake_case
141            key.replace('_', "-"), // Convert snake_case to kebab-case
142            normalize_key(key),    // Normalized key (lowercase, kebab-case)
143        ];
144
145        for variant in &key_variants {
146            if let Some(value) = schema.get(variant) {
147                return filter_type_check_sentinels(value);
148            }
149        }
150
151        None
152    }
153
154    /// Resolve any rule name (canonical or alias) to its canonical form
155    /// Returns None if the rule name is not recognized
156    ///
157    /// Resolution order:
158    /// 1. Direct canonical name match
159    /// 2. Static aliases (built-in markdownlint aliases)
160    pub fn resolve_rule_name(&self, name: &str) -> Option<String> {
161        // Try normalized canonical name first
162        let normalized = normalize_key(name);
163        if self.rule_schemas.contains_key(&normalized) {
164            return Some(normalized);
165        }
166
167        // Try static alias resolution (O(1) perfect hash lookup)
168        resolve_rule_name_alias(name).map(std::string::ToString::to_string)
169    }
170}
171
172/// Returns `None` if the value is a sentinel that signals "skip type check"
173/// (nullable Option fields, polymorphic fields that accept multiple types).
174/// Otherwise returns `Some(value)` so the validator can compare types.
175fn filter_type_check_sentinels(value: &toml::Value) -> Option<&toml::Value> {
176    if crate::rule_config_serde::is_nullable_sentinel(value) || crate::rule_config_serde::is_polymorphic_sentinel(value)
177    {
178        None
179    } else {
180        Some(value)
181    }
182}
183
184/// Compile-time perfect hash map for O(1) rule alias lookups
185/// Uses phf for zero-cost abstraction - compiles to direct jumps
186pub static RULE_ALIAS_MAP: phf::Map<&'static str, &'static str> = phf::phf_map! {
187    // Canonical names (identity mapping for consistency)
188    "MD001" => "MD001",
189    "MD003" => "MD003",
190    "MD004" => "MD004",
191    "MD005" => "MD005",
192    "MD007" => "MD007",
193    "MD009" => "MD009",
194    "MD010" => "MD010",
195    "MD011" => "MD011",
196    "MD012" => "MD012",
197    "MD013" => "MD013",
198    "MD014" => "MD014",
199    "MD018" => "MD018",
200    "MD019" => "MD019",
201    "MD020" => "MD020",
202    "MD021" => "MD021",
203    "MD022" => "MD022",
204    "MD023" => "MD023",
205    "MD024" => "MD024",
206    "MD025" => "MD025",
207    "MD026" => "MD026",
208    "MD027" => "MD027",
209    "MD028" => "MD028",
210    "MD029" => "MD029",
211    "MD030" => "MD030",
212    "MD031" => "MD031",
213    "MD032" => "MD032",
214    "MD033" => "MD033",
215    "MD034" => "MD034",
216    "MD035" => "MD035",
217    "MD036" => "MD036",
218    "MD037" => "MD037",
219    "MD038" => "MD038",
220    "MD039" => "MD039",
221    "MD040" => "MD040",
222    "MD041" => "MD041",
223    "MD042" => "MD042",
224    "MD043" => "MD043",
225    "MD044" => "MD044",
226    "MD045" => "MD045",
227    "MD046" => "MD046",
228    "MD047" => "MD047",
229    "MD048" => "MD048",
230    "MD049" => "MD049",
231    "MD050" => "MD050",
232    "MD051" => "MD051",
233    "MD052" => "MD052",
234    "MD053" => "MD053",
235    "MD054" => "MD054",
236    "MD055" => "MD055",
237    "MD056" => "MD056",
238    "MD057" => "MD057",
239    "MD058" => "MD058",
240    "MD059" => "MD059",
241    "MD060" => "MD060",
242    "MD061" => "MD061",
243    "MD062" => "MD062",
244    "MD063" => "MD063",
245    "MD064" => "MD064",
246    "MD065" => "MD065",
247    "MD066" => "MD066",
248    "MD067" => "MD067",
249    "MD068" => "MD068",
250    "MD069" => "MD069",
251    "MD070" => "MD070",
252    "MD071" => "MD071",
253    "MD072" => "MD072",
254    "MD073" => "MD073",
255    "MD074" => "MD074",
256    "MD075" => "MD075",
257    "MD076" => "MD076",
258    "MD077" => "MD077",
259    "MD078" => "MD078",
260    "MD079" => "MD079",
261    "MD080" => "MD080",
262    "MD081" => "MD081",
263
264    // Aliases (hyphen format)
265    "HEADING-INCREMENT" => "MD001",
266    "HEADING-STYLE" => "MD003",
267    "UL-STYLE" => "MD004",
268    "LIST-INDENT" => "MD005",
269    "UL-INDENT" => "MD007",
270    "NO-TRAILING-SPACES" => "MD009",
271    "NO-HARD-TABS" => "MD010",
272    "NO-REVERSED-LINKS" => "MD011",
273    "NO-MULTIPLE-BLANKS" => "MD012",
274    "LINE-LENGTH" => "MD013",
275    "COMMANDS-SHOW-OUTPUT" => "MD014",
276    "NO-MISSING-SPACE-ATX" => "MD018",
277    "NO-MULTIPLE-SPACE-ATX" => "MD019",
278    "NO-MISSING-SPACE-CLOSED-ATX" => "MD020",
279    "NO-MULTIPLE-SPACE-CLOSED-ATX" => "MD021",
280    "BLANKS-AROUND-HEADINGS" => "MD022",
281    "HEADING-START-LEFT" => "MD023",
282    "NO-DUPLICATE-HEADING" => "MD024",
283    "SINGLE-TITLE" => "MD025",
284    "SINGLE-H1" => "MD025",
285    "NO-TRAILING-PUNCTUATION" => "MD026",
286    "NO-MULTIPLE-SPACE-BLOCKQUOTE" => "MD027",
287    "NO-BLANKS-BLOCKQUOTE" => "MD028",
288    "OL-PREFIX" => "MD029",
289    "LIST-MARKER-SPACE" => "MD030",
290    "BLANKS-AROUND-FENCES" => "MD031",
291    "BLANKS-AROUND-LISTS" => "MD032",
292    "NO-INLINE-HTML" => "MD033",
293    "NO-BARE-URLS" => "MD034",
294    "HR-STYLE" => "MD035",
295    "NO-EMPHASIS-AS-HEADING" => "MD036",
296    "NO-SPACE-IN-EMPHASIS" => "MD037",
297    "NO-SPACE-IN-CODE" => "MD038",
298    "NO-SPACE-IN-LINKS" => "MD039",
299    "FENCED-CODE-LANGUAGE" => "MD040",
300    "FIRST-LINE-HEADING" => "MD041",
301    "FIRST-LINE-H1" => "MD041",
302    "NO-EMPTY-LINKS" => "MD042",
303    "REQUIRED-HEADINGS" => "MD043",
304    "PROPER-NAMES" => "MD044",
305    "NO-ALT-TEXT" => "MD045",
306    "CODE-BLOCK-STYLE" => "MD046",
307    "SINGLE-TRAILING-NEWLINE" => "MD047",
308    "CODE-FENCE-STYLE" => "MD048",
309    "EMPHASIS-STYLE" => "MD049",
310    "STRONG-STYLE" => "MD050",
311    "LINK-FRAGMENTS" => "MD051",
312    "REFERENCE-LINKS-IMAGES" => "MD052",
313    "LINK-IMAGE-REFERENCE-DEFINITIONS" => "MD053",
314    "LINK-IMAGE-STYLE" => "MD054",
315    "TABLE-PIPE-STYLE" => "MD055",
316    "TABLE-COLUMN-COUNT" => "MD056",
317    "EXISTING-RELATIVE-LINKS" => "MD057",
318    "BLANKS-AROUND-TABLES" => "MD058",
319    "DESCRIPTIVE-LINK-TEXT" => "MD059",
320    "TABLE-CELL-ALIGNMENT" => "MD060",
321    "TABLE-FORMAT" => "MD060",
322    "FORBIDDEN-TERMS" => "MD061",
323    "LINK-DESTINATION-WHITESPACE" => "MD062",
324    "HEADING-CAPITALIZATION" => "MD063",
325    "NO-MULTIPLE-CONSECUTIVE-SPACES" => "MD064",
326    "BLANKS-AROUND-HORIZONTAL-RULES" => "MD065",
327    "FOOTNOTE-VALIDATION" => "MD066",
328    "FOOTNOTE-DEFINITION-ORDER" => "MD067",
329    "EMPTY-FOOTNOTE-DEFINITION" => "MD068",
330    "NO-DUPLICATE-LIST-MARKERS" => "MD069",
331    "NESTED-CODE-FENCE" => "MD070",
332    "BLANK-LINE-AFTER-FRONTMATTER" => "MD071",
333    "FRONTMATTER-KEY-SORT" => "MD072",
334    "TOC-VALIDATION" => "MD073",
335    "MKDOCS-NAV" => "MD074",
336    "ORPHANED-TABLE-ROWS" => "MD075",
337    "LIST-ITEM-SPACING" => "MD076",
338    "LIST-CONTINUATION-INDENT" => "MD077",
339    "MISSING-CHUNK-LABELS" => "MD078",
340    "CHUNK-LABEL-SPACES" => "MD079",
341    "HEADING-ANCHOR-COLLISION" => "MD080",
342    "NO-EXCESSIVE-EMPHASIS" => "MD081",
343};
344
345/// Resolve a rule name alias to its canonical form with O(1) perfect hash lookup
346/// Converts rule aliases (like "ul-style", "line-length") to canonical IDs (like "MD004", "MD013")
347/// Returns None if the rule name is not recognized
348pub fn resolve_rule_name_alias(key: &str) -> Option<&'static str> {
349    // Normalize: uppercase and replace underscores with hyphens
350    let normalized_key = key.to_ascii_uppercase().replace('_', "-");
351
352    // O(1) perfect hash lookup
353    RULE_ALIAS_MAP.get(normalized_key.as_str()).copied()
354}
355
356/// Resolves a rule name to its canonical ID, supporting both rule IDs and aliases.
357/// Returns the canonical ID (e.g., "MD001") for any valid input:
358/// - "MD001" → "MD001" (canonical)
359/// - "heading-increment" → "MD001" (alias)
360/// - "HEADING_INCREMENT" → "MD001" (case-insensitive, underscore variant)
361///
362/// For unknown names, falls back to normalization (uppercase for MDxxx pattern, otherwise kebab-case).
363pub fn resolve_rule_name(name: &str) -> String {
364    resolve_rule_name_alias(name).map_or_else(|| normalize_key(name), std::string::ToString::to_string)
365}
366
367/// Resolves a comma-separated list of rule names to canonical IDs.
368/// Handles CLI input like "MD001,line-length,heading-increment".
369/// Empty entries and whitespace are filtered out.
370pub fn resolve_rule_names(input: &str) -> std::collections::HashSet<String> {
371    input
372        .split(',')
373        .map(str::trim)
374        .filter(|s| !s.is_empty())
375        .map(resolve_rule_name)
376        .collect()
377}
378
379/// Checks if a rule name (or alias) is valid.
380/// Returns true if the name resolves to a known rule.
381/// Handles the special "all" value and all aliases.
382pub fn is_valid_rule_name(name: &str) -> bool {
383    // Check for special "all" value (case-insensitive)
384    if name.eq_ignore_ascii_case("all") {
385        return true;
386    }
387    resolve_rule_name_alias(name).is_some()
388}
389
390/// Canonicalizes a rule-name list in place: every entry is rewritten to its canonical
391/// rule ID via [`resolve_rule_name`], duplicates are removed (keeping first occurrence),
392/// and the special `"all"` keyword is preserved as-is (case-insensitive).
393///
394/// This enforces the runtime invariant that rule lists in `Config` (`enable`, `disable`,
395/// `extend_enable`, `extend_disable`, `fixable`, `unfixable`, and per-file ignore values)
396/// always contain canonical rule IDs. Consumers can therefore compare against
397/// `rule.name()` with simple string equality without needing alias resolution at every
398/// call site.
399///
400/// The operation is idempotent: running it twice produces the same result as once.
401pub fn canonicalize_rule_list_in_place(list: &mut Vec<String>) {
402    if list.is_empty() {
403        return;
404    }
405    let mut seen: std::collections::HashSet<String> = std::collections::HashSet::with_capacity(list.len());
406    let mut out: Vec<String> = Vec::with_capacity(list.len());
407    for entry in list.drain(..) {
408        let canonical = if entry.eq_ignore_ascii_case("all") {
409            "all".to_string()
410        } else {
411            resolve_rule_name(&entry)
412        };
413        if seen.insert(canonical.clone()) {
414            out.push(canonical);
415        }
416    }
417    *list = out;
418}
419
420#[cfg(test)]
421mod canonicalize_tests {
422    use super::canonicalize_rule_list_in_place;
423
424    #[test]
425    fn rewrites_aliases_to_canonical_ids() {
426        let mut list = vec!["no-inline-html".to_string(), "line-length".to_string()];
427        canonicalize_rule_list_in_place(&mut list);
428        assert_eq!(list, vec!["MD033".to_string(), "MD013".to_string()]);
429    }
430
431    #[test]
432    fn dedupes_alias_and_canonical_preserving_order() {
433        let mut list = vec!["MD033".to_string(), "no-inline-html".to_string(), "MD013".to_string()];
434        canonicalize_rule_list_in_place(&mut list);
435        assert_eq!(list, vec!["MD033".to_string(), "MD013".to_string()]);
436    }
437
438    #[test]
439    fn preserves_all_keyword_normalized() {
440        let mut list = vec!["ALL".to_string(), "MD013".to_string()];
441        canonicalize_rule_list_in_place(&mut list);
442        assert_eq!(list, vec!["all".to_string(), "MD013".to_string()]);
443    }
444
445    #[test]
446    fn is_idempotent() {
447        let mut list = vec!["no-inline-html".to_string(), "MD013".to_string()];
448        canonicalize_rule_list_in_place(&mut list);
449        let once = list.clone();
450        canonicalize_rule_list_in_place(&mut list);
451        assert_eq!(list, once);
452    }
453
454    #[test]
455    fn handles_empty_and_unknown_inputs() {
456        let mut empty: Vec<String> = Vec::new();
457        canonicalize_rule_list_in_place(&mut empty);
458        assert!(empty.is_empty());
459
460        let mut unknown = vec!["custom-rule".to_string(), "Custom-Rule".to_string()];
461        canonicalize_rule_list_in_place(&mut unknown);
462        // Both normalize to the same kebab-case form, so they dedupe.
463        assert_eq!(unknown, vec!["custom-rule".to_string()]);
464    }
465}