Skip to main content

marco_core/intelligence/
catalog.rs

1//! Embedded diagnostics catalog loaded from RON at compile time.
2//!
3//! Catalog sources live next to this module and are embedded via `include_str!`:
4//! - extension catalog file
5//! - markdownlint baseline catalog file},{
6
7use serde::Deserialize;
8use std::sync::LazyLock;
9
10#[derive(Debug, Clone, Deserialize, Default)]
11/// Root embedded diagnostics catalog model.
12pub struct DiagnosticsCatalog {
13    /// Catalog schema/content version.
14    pub version: u32,
15    #[serde(default)]
16    /// Runtime policy defaults.
17    pub settings: DiagnosticsCatalogSettings,
18    #[serde(default)]
19    /// Group metadata for diagnostics code ranges.
20    pub groups: Vec<DiagnosticsCatalogGroup>,
21    #[serde(default)]
22    /// Feature coverage metadata records.
23    pub features: Vec<MarkdownFeatureCoverage>,
24    /// Diagnostic catalog entries.
25    pub entries: Vec<DiagnosticsCatalogEntry>,
26}
27
28#[derive(Debug, Clone, Deserialize)]
29/// Coverage metadata for a Markdown feature.
30pub struct MarkdownFeatureCoverage {
31    /// Stable feature key.
32    pub key: String,
33    /// Human-readable feature title.
34    pub title: String,
35    /// Feature category label.
36    pub category: String,
37    /// Coverage status label.
38    pub status: String,
39    #[serde(default)]
40    /// Related AST node kind names.
41    pub node_kinds: Vec<String>,
42    /// Optional showcase document path/id.
43    pub showcase_doc: Option<String>,
44    #[serde(default)]
45    /// Related diagnostic code ids.
46    pub related_diagnostics: Vec<String>,
47    #[serde(default)]
48    /// Free-form notes.
49    pub notes: String,
50    #[serde(default)]
51    /// Example snippets for this feature.
52    pub examples: Vec<String>,
53}
54
55#[derive(Debug, Clone, Deserialize)]
56/// Metadata group for a family of diagnostics.
57pub struct DiagnosticsCatalogGroup {
58    /// Stable group id.
59    pub id: String,
60    /// Human-readable title.
61    pub title: String,
62    /// Group description.
63    pub description: String,
64    /// Code prefix matched by this group.
65    pub code_prefix: String,
66    #[serde(default)]
67    /// Free-form group tags.
68    pub tags: Vec<String>,
69}
70
71#[derive(Debug, Clone, Deserialize)]
72/// Shared diagnostics runtime settings loaded from catalog.
73pub struct DiagnosticsCatalogSettings {
74    /// Maximum heading length before warning.
75    pub heading_too_long_threshold: usize,
76    /// URL schemes considered unsafe.
77    pub unsafe_protocols: Vec<String>,
78    /// URL prefixes treated as insecure.
79    pub insecure_link_prefixes: Vec<String>,
80    /// Marker substrings used to detect script tags.
81    pub script_tag_markers: Vec<String>,
82    /// Fallback code id when unknown.
83    pub unknown_code_fallback: String,
84    /// Fallback message when unknown.
85    pub unknown_message_fallback: String,
86    /// Fallback fix suggestion when unknown.
87    pub unknown_fix_suggestion_fallback: String,
88    /// Label used for unknown protocol values.
89    pub unknown_protocol_label: String,
90}
91
92impl Default for DiagnosticsCatalogSettings {
93    fn default() -> Self {
94        Self {
95            heading_too_long_threshold: 120,
96            unsafe_protocols: vec!["javascript".to_string(), "data".to_string()],
97            insecure_link_prefixes: vec!["http://".to_string()],
98            script_tag_markers: vec!["<script".to_string()],
99            unknown_code_fallback: "UNKNOWN".to_string(),
100            unknown_message_fallback: "Unknown diagnostic".to_string(),
101            unknown_fix_suggestion_fallback: "No fix suggestion available.".to_string(),
102            unknown_protocol_label: "unknown".to_string(),
103        }
104    }
105}
106
107#[derive(Debug, Clone, Deserialize)]
108/// A single diagnostics catalog entry.
109pub struct DiagnosticsCatalogEntry {
110    /// Stable enum-like key (for example `EmptyImageUrl`).
111    pub key: String,
112    /// Stable external code id (for example `MD401`).
113    pub code: String,
114    /// Human-readable short title.
115    pub title: String,
116    #[serde(default)]
117    /// Optional parameterized message template.
118    pub message_template: Option<String>,
119    /// Default severity string (`Error`, `Warning`, `Info`, `Hint`).
120    pub default_severity: String,
121    /// Suggested remediation text.
122    pub fix_suggestion: String,
123    /// Rich explanation/description.
124    pub description: String,
125    #[serde(default)]
126    /// Free-form entry tags.
127    pub tags: Vec<String>,
128    #[serde(default)]
129    /// Example snippets associated with this diagnostic.
130    pub examples: Vec<String>,
131}
132
133const DIAGNOSTICS_CATALOG_MARCO_RON: &str = include_str!("diagnostics_catalog_marco.ron");
134const DIAGNOSTICS_CATALOG_MARKDOWNLINT_RON: &str =
135    include_str!("diagnostics_catalog_markdownlint.ron");
136
137fn parse_catalog(source_name: &str, ron_src: &str) -> Option<DiagnosticsCatalog> {
138    match ron::de::from_str::<DiagnosticsCatalog>(ron_src) {
139        Ok(catalog) => Some(catalog),
140        Err(err) => {
141            log::error!(
142                "Failed to parse embedded diagnostics catalog ({}): {}",
143                source_name,
144                err
145            );
146            None
147        }
148    }
149}
150
151fn merge_catalogs(
152    mut marco: DiagnosticsCatalog,
153    markdownlint: DiagnosticsCatalog,
154) -> DiagnosticsCatalog {
155    // Keep extension-catalog settings as authoritative for runtime policy.
156    marco.version = marco.version.max(markdownlint.version);
157
158    for group in markdownlint.groups {
159        if marco.groups.iter().all(|g| g.id != group.id) {
160            marco.groups.push(group);
161        }
162    }
163
164    for feature in markdownlint.features {
165        if marco.features.iter().all(|f| f.key != feature.key) {
166            marco.features.push(feature);
167        }
168    }
169
170    for entry in markdownlint.entries {
171        let duplicate_key = marco.entries.iter().any(|e| e.key == entry.key);
172        let duplicate_code = marco.entries.iter().any(|e| e.code == entry.code);
173        if !(duplicate_key || duplicate_code) {
174            marco.entries.push(entry);
175        }
176    }
177
178    marco
179}
180
181static DIAGNOSTICS_CATALOG: LazyLock<DiagnosticsCatalog> = LazyLock::new(|| {
182    let marco = parse_catalog("marco", DIAGNOSTICS_CATALOG_MARCO_RON);
183    let markdownlint = parse_catalog("markdownlint", DIAGNOSTICS_CATALOG_MARKDOWNLINT_RON);
184
185    match (marco, markdownlint) {
186        (Some(marco), Some(markdownlint)) => merge_catalogs(marco, markdownlint),
187        (Some(marco), None) => marco,
188        (None, Some(markdownlint)) => markdownlint,
189        (None, None) => DiagnosticsCatalog::default(),
190    }
191});
192
193/// Returns the embedded diagnostics catalog parsed from RON.
194pub fn diagnostics_catalog() -> &'static DiagnosticsCatalog {
195    &DIAGNOSTICS_CATALOG
196}
197
198/// Returns shared diagnostics analysis policy settings.
199pub fn diagnostics_catalog_settings() -> &'static DiagnosticsCatalogSettings {
200    &diagnostics_catalog().settings
201}
202
203/// Returns diagnostics groups metadata from the embedded catalog.
204pub fn diagnostics_catalog_groups() -> &'static [DiagnosticsCatalogGroup] {
205    &diagnostics_catalog().groups
206}
207
208/// Lookup a diagnostics group by id (e.g. `links`, `html`).
209pub fn find_catalog_group(id: &str) -> Option<&'static DiagnosticsCatalogGroup> {
210    diagnostics_catalog_groups()
211        .iter()
212        .find(|group| group.id == id)
213}
214
215/// Lookup a diagnostics group by code id prefix (e.g. `MD2` for links).
216pub fn find_catalog_group_by_code(code: &str) -> Option<&'static DiagnosticsCatalogGroup> {
217    diagnostics_catalog_groups()
218        .iter()
219        .filter(|group| code.starts_with(group.code_prefix.as_str()))
220        .max_by_key(|group| group.code_prefix.len())
221}
222
223/// Returns markdown feature coverage metadata from the embedded catalog.
224pub fn diagnostics_markdown_features() -> &'static [MarkdownFeatureCoverage] {
225    &diagnostics_catalog().features
226}
227
228/// Lookup a markdown feature coverage record by key.
229pub fn find_markdown_feature(key: &str) -> Option<&'static MarkdownFeatureCoverage> {
230    diagnostics_markdown_features()
231        .iter()
232        .find(|feature| feature.key == key)
233}
234
235/// Fast lookup by diagnostic code id (e.g. `MD101`).
236pub fn find_catalog_entry(code: &str) -> Option<&'static DiagnosticsCatalogEntry> {
237    diagnostics_catalog()
238        .entries
239        .iter()
240        .find(|entry| entry.code == code)
241}
242
243/// Fast lookup by diagnostic enum key (e.g. `EmptyImageUrl`).
244pub fn find_catalog_entry_by_key(key: &str) -> Option<&'static DiagnosticsCatalogEntry> {
245    diagnostics_catalog()
246        .entries
247        .iter()
248        .find(|entry| entry.key == key)
249}
250
251#[cfg(test)]
252mod tests {
253    use super::*;
254    use std::collections::HashSet;
255
256    fn is_valid_severity(value: &str) -> bool {
257        matches!(value, "Error" | "Warning" | "Info" | "Hint")
258    }
259
260    fn is_md_three_digit_code(code: &str) -> bool {
261        let mut chars = code.chars();
262        matches!(
263            (
264                chars.next(),
265                chars.next(),
266                chars.next(),
267                chars.next(),
268                chars.next(),
269                chars.next(),
270            ),
271            (Some('M'), Some('D'), Some(a), Some(b), Some(c), None)
272                if a.is_ascii_digit() && b.is_ascii_digit() && c.is_ascii_digit()
273        )
274    }
275
276    #[test]
277    fn smoke_test_embedded_catalog_parses() {
278        let catalog = diagnostics_catalog();
279        assert!(catalog.version >= 1);
280        assert!(!catalog.entries.is_empty());
281    }
282
283    #[test]
284    fn smoke_test_catalog_has_known_code() {
285        let md060 = find_catalog_entry("MD060");
286        assert!(md060.is_some());
287    }
288
289    #[test]
290    fn smoke_test_markdownlint_code_present() {
291        let md060 = find_catalog_entry("MD060");
292        assert!(md060.is_some());
293    }
294
295    #[test]
296    fn smoke_test_catalog_has_known_key() {
297        let entry = find_catalog_entry_by_key("EmptyImageUrl");
298        assert!(entry.is_some());
299    }
300
301    #[test]
302    fn smoke_test_catalog_settings_have_defaults() {
303        let settings = diagnostics_catalog_settings();
304        assert!(settings.heading_too_long_threshold > 0);
305        assert!(!settings.unsafe_protocols.is_empty());
306        assert!(!settings.insecure_link_prefixes.is_empty());
307        assert!(!settings.script_tag_markers.is_empty());
308        assert!(!settings.unknown_code_fallback.is_empty());
309        assert!(!settings.unknown_message_fallback.is_empty());
310        assert!(!settings.unknown_fix_suggestion_fallback.is_empty());
311        assert!(!settings.unknown_protocol_label.is_empty());
312    }
313
314    #[test]
315    fn smoke_test_catalog_has_groups() {
316        assert!(!diagnostics_catalog_groups().is_empty());
317        assert!(find_catalog_group("links").is_some());
318        assert!(find_catalog_group_by_code(&["MD", "203"].concat()).is_some());
319    }
320
321    #[test]
322    fn smoke_test_group_lookup_prefers_longest_prefix_match() {
323        // MD101 should resolve to the parse group (prefix MD1)
324        // instead of the broad markdownlint baseline group (prefix MD).
325        let group = find_catalog_group_by_code("MD101").expect("expected group for MD101");
326        assert_eq!(group.id, "parse");
327    }
328
329    #[test]
330    fn smoke_test_catalog_has_markdown_feature_coverage() {
331        let features = diagnostics_markdown_features();
332        assert!(!features.is_empty());
333        assert!(find_markdown_feature("math").is_some());
334        assert!(find_markdown_feature("task-lists").is_some());
335        assert!(
336            features.iter().all(|feature| !feature.examples.is_empty()),
337            "all markdown feature records should include at least one example"
338        );
339    }
340
341    #[test]
342    fn smoke_test_feature_node_kinds_match_known_ast_variants() {
343        let known_node_kinds: HashSet<&'static str> = [
344            "Heading",
345            "Paragraph",
346            "CodeBlock",
347            "ThematicBreak",
348            "List",
349            "ListItem",
350            "DefinitionList",
351            "DefinitionTerm",
352            "DefinitionDescription",
353            "TaskCheckbox",
354            "Blockquote",
355            "Admonition",
356            "TabGroup",
357            "TabItem",
358            "SliderDeck",
359            "Slide",
360            "Table",
361            "TableRow",
362            "TableCell",
363            "HtmlBlock",
364            "FootnoteDefinition",
365            "Text",
366            "TaskCheckboxInline",
367            "Emphasis",
368            "Strong",
369            "StrongEmphasis",
370            "Strikethrough",
371            "Mark",
372            "Superscript",
373            "Subscript",
374            "Link",
375            "LinkReference",
376            "FootnoteReference",
377            "Image",
378            "CodeSpan",
379            "InlineHtml",
380            "HardBreak",
381            "SoftBreak",
382            "PlatformMention",
383            "InlineMath",
384            "DisplayMath",
385            "MermaidDiagram",
386        ]
387        .into_iter()
388        .collect();
389
390        for feature in diagnostics_markdown_features() {
391            for kind in &feature.node_kinds {
392                assert!(
393                    known_node_kinds.contains(kind.as_str()),
394                    "unknown node kind '{}' in feature '{}'",
395                    kind,
396                    feature.key
397                );
398            }
399        }
400    }
401
402    #[test]
403    fn smoke_test_marco_catalog_entries_use_supported_prefixes() {
404        let marco = parse_catalog("marco", DIAGNOSTICS_CATALOG_MARCO_RON)
405            .expect("marco catalog should parse in tests");
406
407        for entry in &marco.entries {
408            assert!(
409                entry.code.starts_with("MD")
410                    || entry.code.starts_with("MO")
411                    || entry.code.starts_with("MG"),
412                "unsupported diagnostics prefix for {} ({})",
413                entry.key,
414                entry.code
415            );
416        }
417    }
418
419    #[test]
420    fn smoke_test_marco_catalog_has_no_code_overlap_with_markdownlint() {
421        let marco = parse_catalog("marco", DIAGNOSTICS_CATALOG_MARCO_RON)
422            .expect("marco catalog should parse in tests");
423        let markdownlint = parse_catalog("markdownlint", DIAGNOSTICS_CATALOG_MARKDOWNLINT_RON)
424            .expect("markdownlint catalog should parse in tests");
425
426        let marco_codes: HashSet<&str> = marco
427            .entries
428            .iter()
429            .map(|entry| entry.code.as_str())
430            .collect();
431        let markdownlint_codes: HashSet<&str> = markdownlint
432            .entries
433            .iter()
434            .map(|entry| entry.code.as_str())
435            .collect();
436
437        let overlaps: Vec<&str> = marco_codes
438            .intersection(&markdownlint_codes)
439            .copied()
440            .collect();
441
442        assert!(
443            overlaps.is_empty(),
444            "marco/markdownlint code overlap detected: {:?}",
445            overlaps
446        );
447    }
448
449    #[test]
450    fn smoke_test_all_catalog_entries_have_editor_required_fields() {
451        let marco = parse_catalog("marco", DIAGNOSTICS_CATALOG_MARCO_RON)
452            .expect("marco catalog should parse in tests");
453        let markdownlint = parse_catalog("markdownlint", DIAGNOSTICS_CATALOG_MARKDOWNLINT_RON)
454            .expect("markdownlint catalog should parse in tests");
455
456        for (source, catalog) in [("marco", marco), ("markdownlint", markdownlint)] {
457            for entry in &catalog.entries {
458                assert!(
459                    !entry.key.trim().is_empty(),
460                    "{} entry has empty key (code={})",
461                    source,
462                    entry.code
463                );
464                assert!(
465                    !entry.code.trim().is_empty(),
466                    "{} entry has empty code (key={})",
467                    source,
468                    entry.key
469                );
470                assert!(
471                    !entry.title.trim().is_empty(),
472                    "{} entry {} has empty title",
473                    source,
474                    entry.code
475                );
476                assert!(
477                    !entry.description.trim().is_empty(),
478                    "{} entry {} has empty description",
479                    source,
480                    entry.code
481                );
482                assert!(
483                    !entry.fix_suggestion.trim().is_empty(),
484                    "{} entry {} has empty fix_suggestion",
485                    source,
486                    entry.code
487                );
488                assert!(
489                    is_valid_severity(entry.default_severity.as_str()),
490                    "{} entry {} has unsupported severity {}",
491                    source,
492                    entry.code,
493                    entry.default_severity
494                );
495                if let Some(template) = &entry.message_template {
496                    assert!(
497                        !template.trim().is_empty(),
498                        "{} entry {} has empty message_template",
499                        source,
500                        entry.code
501                    );
502                }
503                assert!(
504                    !entry.examples.is_empty(),
505                    "{} entry {} must include at least one example",
506                    source,
507                    entry.code
508                );
509                assert!(
510                    entry.examples.iter().all(|e| !e.trim().is_empty()),
511                    "{} entry {} has blank example text",
512                    source,
513                    entry.code
514                );
515            }
516        }
517    }
518
519    #[test]
520    fn smoke_test_markdownlint_entries_have_editor_friendly_content() {
521        let markdownlint = parse_catalog("markdownlint", DIAGNOSTICS_CATALOG_MARKDOWNLINT_RON)
522            .expect("markdownlint catalog should parse in tests");
523
524        for entry in &markdownlint.entries {
525            assert!(
526                is_md_three_digit_code(&entry.code),
527                "markdownlint entry has invalid code format: {}",
528                entry.code
529            );
530            assert!(
531                entry.key.starts_with("MarkdownlintMD"),
532                "markdownlint entry key must start with MarkdownlintMD: {}",
533                entry.key
534            );
535            assert!(
536                !entry
537                    .fix_suggestion
538                    .contains("See markdownlint docs for MD"),
539                "markdownlint entry {} contains placeholder fix text",
540                entry.code
541            );
542
543            for example in &entry.examples {
544                let text = example.trim();
545                let is_url_only = (text.starts_with("http://") || text.starts_with("https://"))
546                    && !text.contains(char::is_whitespace);
547                assert!(
548                    !is_url_only,
549                    "markdownlint entry {} has URL-only example: {}",
550                    entry.code, text
551                );
552            }
553        }
554    }
555
556    #[test]
557    fn smoke_test_merged_catalog_has_unique_keys_and_codes() {
558        let catalog = diagnostics_catalog();
559
560        let mut keys = HashSet::new();
561        let mut codes = HashSet::new();
562
563        for entry in &catalog.entries {
564            assert!(
565                keys.insert(entry.key.as_str()),
566                "duplicate catalog key in merged catalog: {}",
567                entry.key
568            );
569            assert!(
570                codes.insert(entry.code.as_str()),
571                "duplicate catalog code in merged catalog: {}",
572                entry.code
573            );
574        }
575    }
576}