Skip to main content

rumdl_lib/config/
registry.rs

1use std::sync::LazyLock;
2
3use crate::rule::Rule;
4
5use super::flavor::normalize_key;
6
7/// Lazily-initialized default `RuleRegistry` built from rules with default config.
8///
9/// Rule config schemas (valid keys, types, aliases) are intrinsic to each rule type
10/// and do not change based on runtime configuration. This static registry avoids
11/// repeatedly constructing 67+ rule instances just to extract their schemas.
12static DEFAULT_REGISTRY: LazyLock<RuleRegistry> = LazyLock::new(|| {
13    let default_config = super::types::Config::default();
14    let rules = crate::rules::all_rules(&default_config);
15    RuleRegistry::from_rules(&rules)
16});
17
18/// Returns a reference to the lazily-initialized default `RuleRegistry`.
19///
20/// Use this instead of `all_rules(&Config::default())` + `RuleRegistry::from_rules()`
21/// when you only need rule metadata (names, config schemas, aliases) rather than
22/// configured rule instances for linting.
23pub fn default_registry() -> &'static RuleRegistry {
24    &DEFAULT_REGISTRY
25}
26
27/// Registry of all known rules and their config schemas
28pub struct RuleRegistry {
29    /// Map of rule name (e.g. "MD013") to set of valid config keys and their TOML value types
30    pub rule_schemas: std::collections::BTreeMap<String, toml::map::Map<String, toml::Value>>,
31    /// Map of rule name to config key aliases
32    pub rule_aliases: std::collections::BTreeMap<String, std::collections::HashMap<String, String>>,
33}
34
35impl RuleRegistry {
36    /// Build a registry from a list of rules
37    pub fn from_rules(rules: &[Box<dyn Rule>]) -> Self {
38        let mut rule_schemas = std::collections::BTreeMap::new();
39        let mut rule_aliases = std::collections::BTreeMap::new();
40
41        for rule in rules {
42            let norm_name = if let Some((name, toml::Value::Table(mut table))) = rule.default_config_section() {
43                let norm_name = normalize_key(&name); // Normalize the name from default_config_section
44                // Overwrite polymorphic keys with the sentinel so the validator skips
45                // type checking for fields whose deserializer accepts multiple TOML
46                // types. The clean default is preserved for `rumdl config --defaults`
47                // because that path calls `default_config_section()` directly.
48                for key in rule.polymorphic_config_keys() {
49                    table.insert(
50                        (*key).to_string(),
51                        crate::rule_config_serde::polymorphic_sentinel_value(),
52                    );
53                }
54                rule_schemas.insert(norm_name.clone(), table);
55                norm_name
56            } else {
57                let norm_name = normalize_key(rule.name()); // Normalize the name from rule.name()
58                rule_schemas.insert(norm_name.clone(), toml::map::Map::new());
59                norm_name
60            };
61
62            // Store aliases if the rule provides them
63            if let Some(aliases) = rule.config_aliases() {
64                rule_aliases.insert(norm_name, aliases);
65            }
66        }
67
68        RuleRegistry {
69            rule_schemas,
70            rule_aliases,
71        }
72    }
73
74    /// Get all known rule names
75    pub fn rule_names(&self) -> std::collections::BTreeSet<String> {
76        self.rule_schemas.keys().cloned().collect()
77    }
78
79    /// Get the valid configuration keys for a rule, including both original and normalized variants
80    pub fn config_keys_for(&self, rule: &str) -> Option<std::collections::BTreeSet<String>> {
81        self.rule_schemas.get(rule).map(|schema| {
82            let mut all_keys = std::collections::BTreeSet::new();
83
84            // Always allow 'severity' and 'enabled' for any rule
85            all_keys.insert("severity".to_string());
86            all_keys.insert("enabled".to_string());
87
88            // Add original keys from schema
89            for key in schema.keys() {
90                all_keys.insert(key.clone());
91            }
92
93            // Add normalized variants for markdownlint compatibility
94            for key in schema.keys() {
95                // Add kebab-case variant
96                all_keys.insert(key.replace('_', "-"));
97                // Add snake_case variant
98                all_keys.insert(key.replace('-', "_"));
99                // Add normalized variant
100                all_keys.insert(normalize_key(key));
101            }
102
103            // Add any aliases defined by the rule
104            if let Some(aliases) = self.rule_aliases.get(rule) {
105                for alias_key in aliases.keys() {
106                    all_keys.insert(alias_key.clone());
107                    // Also add normalized variants of the alias
108                    all_keys.insert(alias_key.replace('_', "-"));
109                    all_keys.insert(alias_key.replace('-', "_"));
110                    all_keys.insert(normalize_key(alias_key));
111                }
112            }
113
114            all_keys
115        })
116    }
117
118    /// Get the expected value type for a rule's configuration key, trying variants.
119    /// Returns `None` for sentinel values (nullable Option fields, polymorphic fields
120    /// that accept multiple TOML types), which signals the caller to skip type checking
121    /// for that key while still recognizing the key as valid.
122    pub fn expected_value_for(&self, rule: &str, key: &str) -> Option<&toml::Value> {
123        let schema = self.rule_schemas.get(rule)?;
124
125        // Check if this key is an alias
126        if let Some(aliases) = self.rule_aliases.get(rule)
127            && let Some(canonical_key) = aliases.get(key)
128            && let Some(value) = schema.get(canonical_key)
129        {
130            return filter_type_check_sentinels(value);
131        }
132
133        // Try the original key
134        if let Some(value) = schema.get(key) {
135            return filter_type_check_sentinels(value);
136        }
137
138        // Try key variants
139        let key_variants = [
140            key.replace('-', "_"), // Convert kebab-case to snake_case
141            key.replace('_', "-"), // Convert snake_case to kebab-case
142            normalize_key(key),    // Normalized key (lowercase, kebab-case)
143        ];
144
145        for variant in &key_variants {
146            if let Some(value) = schema.get(variant) {
147                return filter_type_check_sentinels(value);
148            }
149        }
150
151        None
152    }
153
154    /// Resolve any rule name (canonical or alias) to its canonical form
155    /// Returns None if the rule name is not recognized
156    ///
157    /// Resolution order:
158    /// 1. Direct canonical name match
159    /// 2. Static aliases (built-in markdownlint aliases)
160    pub fn resolve_rule_name(&self, name: &str) -> Option<String> {
161        // Try normalized canonical name first
162        let normalized = normalize_key(name);
163        if self.rule_schemas.contains_key(&normalized) {
164            return Some(normalized);
165        }
166
167        // Try static alias resolution (O(1) perfect hash lookup)
168        resolve_rule_name_alias(name).map(std::string::ToString::to_string)
169    }
170}
171
172/// Returns `None` if the value is a sentinel that signals "skip type check"
173/// (nullable Option fields, polymorphic fields that accept multiple types).
174/// Otherwise returns `Some(value)` so the validator can compare types.
175fn filter_type_check_sentinels(value: &toml::Value) -> Option<&toml::Value> {
176    if crate::rule_config_serde::is_nullable_sentinel(value) || crate::rule_config_serde::is_polymorphic_sentinel(value)
177    {
178        None
179    } else {
180        Some(value)
181    }
182}
183
184/// Compile-time perfect hash map for O(1) rule alias lookups
185/// Uses phf for zero-cost abstraction - compiles to direct jumps
186pub static RULE_ALIAS_MAP: phf::Map<&'static str, &'static str> = phf::phf_map! {
187    // Canonical names (identity mapping for consistency)
188    "MD001" => "MD001",
189    "MD003" => "MD003",
190    "MD004" => "MD004",
191    "MD005" => "MD005",
192    "MD007" => "MD007",
193    "MD009" => "MD009",
194    "MD010" => "MD010",
195    "MD011" => "MD011",
196    "MD012" => "MD012",
197    "MD013" => "MD013",
198    "MD014" => "MD014",
199    "MD018" => "MD018",
200    "MD019" => "MD019",
201    "MD020" => "MD020",
202    "MD021" => "MD021",
203    "MD022" => "MD022",
204    "MD023" => "MD023",
205    "MD024" => "MD024",
206    "MD025" => "MD025",
207    "MD026" => "MD026",
208    "MD027" => "MD027",
209    "MD028" => "MD028",
210    "MD029" => "MD029",
211    "MD030" => "MD030",
212    "MD031" => "MD031",
213    "MD032" => "MD032",
214    "MD033" => "MD033",
215    "MD034" => "MD034",
216    "MD035" => "MD035",
217    "MD036" => "MD036",
218    "MD037" => "MD037",
219    "MD038" => "MD038",
220    "MD039" => "MD039",
221    "MD040" => "MD040",
222    "MD041" => "MD041",
223    "MD042" => "MD042",
224    "MD043" => "MD043",
225    "MD044" => "MD044",
226    "MD045" => "MD045",
227    "MD046" => "MD046",
228    "MD047" => "MD047",
229    "MD048" => "MD048",
230    "MD049" => "MD049",
231    "MD050" => "MD050",
232    "MD051" => "MD051",
233    "MD052" => "MD052",
234    "MD053" => "MD053",
235    "MD054" => "MD054",
236    "MD055" => "MD055",
237    "MD056" => "MD056",
238    "MD057" => "MD057",
239    "MD058" => "MD058",
240    "MD059" => "MD059",
241    "MD060" => "MD060",
242    "MD061" => "MD061",
243    "MD062" => "MD062",
244    "MD063" => "MD063",
245    "MD064" => "MD064",
246    "MD065" => "MD065",
247    "MD066" => "MD066",
248    "MD067" => "MD067",
249    "MD068" => "MD068",
250    "MD069" => "MD069",
251    "MD070" => "MD070",
252    "MD071" => "MD071",
253    "MD072" => "MD072",
254    "MD073" => "MD073",
255    "MD074" => "MD074",
256    "MD075" => "MD075",
257    "MD076" => "MD076",
258    "MD077" => "MD077",
259    "MD078" => "MD078",
260    "MD079" => "MD079",
261    "MD080" => "MD080",
262
263    // Aliases (hyphen format)
264    "HEADING-INCREMENT" => "MD001",
265    "HEADING-STYLE" => "MD003",
266    "UL-STYLE" => "MD004",
267    "LIST-INDENT" => "MD005",
268    "UL-INDENT" => "MD007",
269    "NO-TRAILING-SPACES" => "MD009",
270    "NO-HARD-TABS" => "MD010",
271    "NO-REVERSED-LINKS" => "MD011",
272    "NO-MULTIPLE-BLANKS" => "MD012",
273    "LINE-LENGTH" => "MD013",
274    "COMMANDS-SHOW-OUTPUT" => "MD014",
275    "NO-MISSING-SPACE-ATX" => "MD018",
276    "NO-MULTIPLE-SPACE-ATX" => "MD019",
277    "NO-MISSING-SPACE-CLOSED-ATX" => "MD020",
278    "NO-MULTIPLE-SPACE-CLOSED-ATX" => "MD021",
279    "BLANKS-AROUND-HEADINGS" => "MD022",
280    "HEADING-START-LEFT" => "MD023",
281    "NO-DUPLICATE-HEADING" => "MD024",
282    "SINGLE-TITLE" => "MD025",
283    "SINGLE-H1" => "MD025",
284    "NO-TRAILING-PUNCTUATION" => "MD026",
285    "NO-MULTIPLE-SPACE-BLOCKQUOTE" => "MD027",
286    "NO-BLANKS-BLOCKQUOTE" => "MD028",
287    "OL-PREFIX" => "MD029",
288    "LIST-MARKER-SPACE" => "MD030",
289    "BLANKS-AROUND-FENCES" => "MD031",
290    "BLANKS-AROUND-LISTS" => "MD032",
291    "NO-INLINE-HTML" => "MD033",
292    "NO-BARE-URLS" => "MD034",
293    "HR-STYLE" => "MD035",
294    "NO-EMPHASIS-AS-HEADING" => "MD036",
295    "NO-SPACE-IN-EMPHASIS" => "MD037",
296    "NO-SPACE-IN-CODE" => "MD038",
297    "NO-SPACE-IN-LINKS" => "MD039",
298    "FENCED-CODE-LANGUAGE" => "MD040",
299    "FIRST-LINE-HEADING" => "MD041",
300    "FIRST-LINE-H1" => "MD041",
301    "NO-EMPTY-LINKS" => "MD042",
302    "REQUIRED-HEADINGS" => "MD043",
303    "PROPER-NAMES" => "MD044",
304    "NO-ALT-TEXT" => "MD045",
305    "CODE-BLOCK-STYLE" => "MD046",
306    "SINGLE-TRAILING-NEWLINE" => "MD047",
307    "CODE-FENCE-STYLE" => "MD048",
308    "EMPHASIS-STYLE" => "MD049",
309    "STRONG-STYLE" => "MD050",
310    "LINK-FRAGMENTS" => "MD051",
311    "REFERENCE-LINKS-IMAGES" => "MD052",
312    "LINK-IMAGE-REFERENCE-DEFINITIONS" => "MD053",
313    "LINK-IMAGE-STYLE" => "MD054",
314    "TABLE-PIPE-STYLE" => "MD055",
315    "TABLE-COLUMN-COUNT" => "MD056",
316    "EXISTING-RELATIVE-LINKS" => "MD057",
317    "BLANKS-AROUND-TABLES" => "MD058",
318    "DESCRIPTIVE-LINK-TEXT" => "MD059",
319    "TABLE-CELL-ALIGNMENT" => "MD060",
320    "TABLE-FORMAT" => "MD060",
321    "FORBIDDEN-TERMS" => "MD061",
322    "LINK-DESTINATION-WHITESPACE" => "MD062",
323    "HEADING-CAPITALIZATION" => "MD063",
324    "NO-MULTIPLE-CONSECUTIVE-SPACES" => "MD064",
325    "BLANKS-AROUND-HORIZONTAL-RULES" => "MD065",
326    "FOOTNOTE-VALIDATION" => "MD066",
327    "FOOTNOTE-DEFINITION-ORDER" => "MD067",
328    "EMPTY-FOOTNOTE-DEFINITION" => "MD068",
329    "NO-DUPLICATE-LIST-MARKERS" => "MD069",
330    "NESTED-CODE-FENCE" => "MD070",
331    "BLANK-LINE-AFTER-FRONTMATTER" => "MD071",
332    "FRONTMATTER-KEY-SORT" => "MD072",
333    "TOC-VALIDATION" => "MD073",
334    "MKDOCS-NAV" => "MD074",
335    "ORPHANED-TABLE-ROWS" => "MD075",
336    "LIST-ITEM-SPACING" => "MD076",
337    "LIST-CONTINUATION-INDENT" => "MD077",
338    "MISSING-CHUNK-LABELS" => "MD078",
339    "CHUNK-LABEL-SPACES" => "MD079",
340    "HEADING-ANCHOR-COLLISION" => "MD080",
341};
342
343/// Resolve a rule name alias to its canonical form with O(1) perfect hash lookup
344/// Converts rule aliases (like "ul-style", "line-length") to canonical IDs (like "MD004", "MD013")
345/// Returns None if the rule name is not recognized
346pub fn resolve_rule_name_alias(key: &str) -> Option<&'static str> {
347    // Normalize: uppercase and replace underscores with hyphens
348    let normalized_key = key.to_ascii_uppercase().replace('_', "-");
349
350    // O(1) perfect hash lookup
351    RULE_ALIAS_MAP.get(normalized_key.as_str()).copied()
352}
353
354/// Resolves a rule name to its canonical ID, supporting both rule IDs and aliases.
355/// Returns the canonical ID (e.g., "MD001") for any valid input:
356/// - "MD001" → "MD001" (canonical)
357/// - "heading-increment" → "MD001" (alias)
358/// - "HEADING_INCREMENT" → "MD001" (case-insensitive, underscore variant)
359///
360/// For unknown names, falls back to normalization (uppercase for MDxxx pattern, otherwise kebab-case).
361pub fn resolve_rule_name(name: &str) -> String {
362    resolve_rule_name_alias(name).map_or_else(|| normalize_key(name), std::string::ToString::to_string)
363}
364
365/// Resolves a comma-separated list of rule names to canonical IDs.
366/// Handles CLI input like "MD001,line-length,heading-increment".
367/// Empty entries and whitespace are filtered out.
368pub fn resolve_rule_names(input: &str) -> std::collections::HashSet<String> {
369    input
370        .split(',')
371        .map(str::trim)
372        .filter(|s| !s.is_empty())
373        .map(resolve_rule_name)
374        .collect()
375}
376
377/// Checks if a rule name (or alias) is valid.
378/// Returns true if the name resolves to a known rule.
379/// Handles the special "all" value and all aliases.
380pub fn is_valid_rule_name(name: &str) -> bool {
381    // Check for special "all" value (case-insensitive)
382    if name.eq_ignore_ascii_case("all") {
383        return true;
384    }
385    resolve_rule_name_alias(name).is_some()
386}
387
388/// Canonicalizes a rule-name list in place: every entry is rewritten to its canonical
389/// rule ID via [`resolve_rule_name`], duplicates are removed (keeping first occurrence),
390/// and the special `"all"` keyword is preserved as-is (case-insensitive).
391///
392/// This enforces the runtime invariant that rule lists in `Config` (`enable`, `disable`,
393/// `extend_enable`, `extend_disable`, `fixable`, `unfixable`, and per-file ignore values)
394/// always contain canonical rule IDs. Consumers can therefore compare against
395/// `rule.name()` with simple string equality without needing alias resolution at every
396/// call site.
397///
398/// The operation is idempotent: running it twice produces the same result as once.
399pub fn canonicalize_rule_list_in_place(list: &mut Vec<String>) {
400    if list.is_empty() {
401        return;
402    }
403    let mut seen: std::collections::HashSet<String> = std::collections::HashSet::with_capacity(list.len());
404    let mut out: Vec<String> = Vec::with_capacity(list.len());
405    for entry in list.drain(..) {
406        let canonical = if entry.eq_ignore_ascii_case("all") {
407            "all".to_string()
408        } else {
409            resolve_rule_name(&entry)
410        };
411        if seen.insert(canonical.clone()) {
412            out.push(canonical);
413        }
414    }
415    *list = out;
416}
417
418#[cfg(test)]
419mod canonicalize_tests {
420    use super::canonicalize_rule_list_in_place;
421
422    #[test]
423    fn rewrites_aliases_to_canonical_ids() {
424        let mut list = vec!["no-inline-html".to_string(), "line-length".to_string()];
425        canonicalize_rule_list_in_place(&mut list);
426        assert_eq!(list, vec!["MD033".to_string(), "MD013".to_string()]);
427    }
428
429    #[test]
430    fn dedupes_alias_and_canonical_preserving_order() {
431        let mut list = vec!["MD033".to_string(), "no-inline-html".to_string(), "MD013".to_string()];
432        canonicalize_rule_list_in_place(&mut list);
433        assert_eq!(list, vec!["MD033".to_string(), "MD013".to_string()]);
434    }
435
436    #[test]
437    fn preserves_all_keyword_normalized() {
438        let mut list = vec!["ALL".to_string(), "MD013".to_string()];
439        canonicalize_rule_list_in_place(&mut list);
440        assert_eq!(list, vec!["all".to_string(), "MD013".to_string()]);
441    }
442
443    #[test]
444    fn is_idempotent() {
445        let mut list = vec!["no-inline-html".to_string(), "MD013".to_string()];
446        canonicalize_rule_list_in_place(&mut list);
447        let once = list.clone();
448        canonicalize_rule_list_in_place(&mut list);
449        assert_eq!(list, once);
450    }
451
452    #[test]
453    fn handles_empty_and_unknown_inputs() {
454        let mut empty: Vec<String> = Vec::new();
455        canonicalize_rule_list_in_place(&mut empty);
456        assert!(empty.is_empty());
457
458        let mut unknown = vec!["custom-rule".to_string(), "Custom-Rule".to_string()];
459        canonicalize_rule_list_in_place(&mut unknown);
460        // Both normalize to the same kebab-case form, so they dedupe.
461        assert_eq!(unknown, vec!["custom-rule".to_string()]);
462    }
463}