Skip to main content

marco_core/intelligence/
catalog.rs

1//! Embedded diagnostics catalog loaded from RON at compile time.
2//!
3//! Catalog sources live next to this module and are embedded via `include_str!`:
4//! - Marco-native catalog: `core/src/intelligence/diagnostics_catalog_marco.ron`
5//! - markdownlint baseline catalog: `core/src/intelligence/diagnostics_catalog_markdownlint.ron`
6
7use serde::Deserialize;
8use std::sync::LazyLock;
9
10#[derive(Debug, Clone, Deserialize, Default)]
11pub struct DiagnosticsCatalog {
12    pub version: u32,
13    #[serde(default)]
14    pub settings: DiagnosticsCatalogSettings,
15    #[serde(default)]
16    pub groups: Vec<DiagnosticsCatalogGroup>,
17    #[serde(default)]
18    pub features: Vec<MarkdownFeatureCoverage>,
19    pub entries: Vec<DiagnosticsCatalogEntry>,
20}
21
22#[derive(Debug, Clone, Deserialize)]
23pub struct MarkdownFeatureCoverage {
24    pub key: String,
25    pub title: String,
26    pub category: String,
27    pub status: String,
28    #[serde(default)]
29    pub node_kinds: Vec<String>,
30    pub showcase_doc: Option<String>,
31    #[serde(default)]
32    pub related_diagnostics: Vec<String>,
33    #[serde(default)]
34    pub notes: String,
35    #[serde(default)]
36    pub examples: Vec<String>,
37}
38
39#[derive(Debug, Clone, Deserialize)]
40pub struct DiagnosticsCatalogGroup {
41    pub id: String,
42    pub title: String,
43    pub description: String,
44    pub code_prefix: String,
45    #[serde(default)]
46    pub tags: Vec<String>,
47}
48
49#[derive(Debug, Clone, Deserialize)]
50pub struct DiagnosticsCatalogSettings {
51    pub heading_too_long_threshold: usize,
52    pub unsafe_protocols: Vec<String>,
53    pub insecure_link_prefixes: Vec<String>,
54    pub script_tag_markers: Vec<String>,
55    pub unknown_code_fallback: String,
56    pub unknown_message_fallback: String,
57    pub unknown_fix_suggestion_fallback: String,
58    pub unknown_protocol_label: String,
59}
60
61impl Default for DiagnosticsCatalogSettings {
62    fn default() -> Self {
63        Self {
64            heading_too_long_threshold: 120,
65            unsafe_protocols: vec!["javascript".to_string(), "data".to_string()],
66            insecure_link_prefixes: vec!["http://".to_string()],
67            script_tag_markers: vec!["<script".to_string()],
68            unknown_code_fallback: "UNKNOWN".to_string(),
69            unknown_message_fallback: "Unknown diagnostic".to_string(),
70            unknown_fix_suggestion_fallback: "No fix suggestion available.".to_string(),
71            unknown_protocol_label: "unknown".to_string(),
72        }
73    }
74}
75
76#[derive(Debug, Clone, Deserialize)]
77pub struct DiagnosticsCatalogEntry {
78    pub key: String,
79    pub code: String,
80    pub title: String,
81    #[serde(default)]
82    pub message_template: Option<String>,
83    pub default_severity: String,
84    pub fix_suggestion: String,
85    pub description: String,
86    #[serde(default)]
87    pub tags: Vec<String>,
88    #[serde(default)]
89    pub examples: Vec<String>,
90}
91
92const DIAGNOSTICS_CATALOG_MARCO_RON: &str = include_str!("diagnostics_catalog_marco.ron");
93const DIAGNOSTICS_CATALOG_MARKDOWNLINT_RON: &str =
94    include_str!("diagnostics_catalog_markdownlint.ron");
95
96fn parse_catalog(source_name: &str, ron_src: &str) -> Option<DiagnosticsCatalog> {
97    match ron::de::from_str::<DiagnosticsCatalog>(ron_src) {
98        Ok(catalog) => Some(catalog),
99        Err(err) => {
100            log::error!(
101                "Failed to parse embedded diagnostics catalog ({}): {}",
102                source_name,
103                err
104            );
105            None
106        }
107    }
108}
109
110fn merge_catalogs(
111    mut marco: DiagnosticsCatalog,
112    markdownlint: DiagnosticsCatalog,
113) -> DiagnosticsCatalog {
114    // Keep Marco settings as authoritative for runtime policy.
115    marco.version = marco.version.max(markdownlint.version);
116
117    for group in markdownlint.groups {
118        if marco.groups.iter().all(|g| g.id != group.id) {
119            marco.groups.push(group);
120        }
121    }
122
123    for feature in markdownlint.features {
124        if marco.features.iter().all(|f| f.key != feature.key) {
125            marco.features.push(feature);
126        }
127    }
128
129    for entry in markdownlint.entries {
130        let duplicate_key = marco.entries.iter().any(|e| e.key == entry.key);
131        let duplicate_code = marco.entries.iter().any(|e| e.code == entry.code);
132        if !(duplicate_key || duplicate_code) {
133            marco.entries.push(entry);
134        }
135    }
136
137    marco
138}
139
140static DIAGNOSTICS_CATALOG: LazyLock<DiagnosticsCatalog> = LazyLock::new(|| {
141    let marco = parse_catalog("marco", DIAGNOSTICS_CATALOG_MARCO_RON);
142    let markdownlint = parse_catalog("markdownlint", DIAGNOSTICS_CATALOG_MARKDOWNLINT_RON);
143
144    match (marco, markdownlint) {
145        (Some(marco), Some(markdownlint)) => merge_catalogs(marco, markdownlint),
146        (Some(marco), None) => marco,
147        (None, Some(markdownlint)) => markdownlint,
148        (None, None) => DiagnosticsCatalog::default(),
149    }
150});
151
152/// Returns the embedded diagnostics catalog parsed from RON.
153pub fn diagnostics_catalog() -> &'static DiagnosticsCatalog {
154    &DIAGNOSTICS_CATALOG
155}
156
157/// Returns shared diagnostics analysis policy settings.
158pub fn diagnostics_catalog_settings() -> &'static DiagnosticsCatalogSettings {
159    &diagnostics_catalog().settings
160}
161
162/// Returns diagnostics groups metadata from the embedded catalog.
163pub fn diagnostics_catalog_groups() -> &'static [DiagnosticsCatalogGroup] {
164    &diagnostics_catalog().groups
165}
166
167/// Lookup a diagnostics group by id (e.g. `links`, `html`).
168pub fn find_catalog_group(id: &str) -> Option<&'static DiagnosticsCatalogGroup> {
169    diagnostics_catalog_groups()
170        .iter()
171        .find(|group| group.id == id)
172}
173
174/// Lookup a diagnostics group by code id prefix (e.g. `MD2` for links).
175pub fn find_catalog_group_by_code(code: &str) -> Option<&'static DiagnosticsCatalogGroup> {
176    diagnostics_catalog_groups()
177        .iter()
178        .filter(|group| code.starts_with(group.code_prefix.as_str()))
179        .max_by_key(|group| group.code_prefix.len())
180}
181
182/// Returns markdown feature coverage metadata from the embedded catalog.
183pub fn diagnostics_markdown_features() -> &'static [MarkdownFeatureCoverage] {
184    &diagnostics_catalog().features
185}
186
187/// Lookup a markdown feature coverage record by key.
188pub fn find_markdown_feature(key: &str) -> Option<&'static MarkdownFeatureCoverage> {
189    diagnostics_markdown_features()
190        .iter()
191        .find(|feature| feature.key == key)
192}
193
194/// Fast lookup by diagnostic code id (e.g. `MD101`).
195pub fn find_catalog_entry(code: &str) -> Option<&'static DiagnosticsCatalogEntry> {
196    diagnostics_catalog()
197        .entries
198        .iter()
199        .find(|entry| entry.code == code)
200}
201
202/// Fast lookup by diagnostic enum key (e.g. `EmptyImageUrl`).
203pub fn find_catalog_entry_by_key(key: &str) -> Option<&'static DiagnosticsCatalogEntry> {
204    diagnostics_catalog()
205        .entries
206        .iter()
207        .find(|entry| entry.key == key)
208}
209
210#[cfg(test)]
211mod tests {
212    use super::*;
213    use std::collections::HashSet;
214
215    fn is_valid_severity(value: &str) -> bool {
216        matches!(value, "Error" | "Warning" | "Info" | "Hint")
217    }
218
219    fn is_md_three_digit_code(code: &str) -> bool {
220        let mut chars = code.chars();
221        matches!(
222            (
223                chars.next(),
224                chars.next(),
225                chars.next(),
226                chars.next(),
227                chars.next(),
228                chars.next(),
229            ),
230            (Some('M'), Some('D'), Some(a), Some(b), Some(c), None)
231                if a.is_ascii_digit() && b.is_ascii_digit() && c.is_ascii_digit()
232        )
233    }
234
235    #[test]
236    fn smoke_test_embedded_catalog_parses() {
237        let catalog = diagnostics_catalog();
238        assert!(catalog.version >= 1);
239        assert!(!catalog.entries.is_empty());
240    }
241
242    #[test]
243    fn smoke_test_catalog_has_known_code() {
244        let md060 = find_catalog_entry("MD060");
245        assert!(md060.is_some());
246    }
247
248    #[test]
249    fn smoke_test_markdownlint_code_present() {
250        let md060 = find_catalog_entry("MD060");
251        assert!(md060.is_some());
252    }
253
254    #[test]
255    fn smoke_test_catalog_has_known_key() {
256        let entry = find_catalog_entry_by_key("EmptyImageUrl");
257        assert!(entry.is_some());
258    }
259
260    #[test]
261    fn smoke_test_catalog_settings_have_defaults() {
262        let settings = diagnostics_catalog_settings();
263        assert!(settings.heading_too_long_threshold > 0);
264        assert!(!settings.unsafe_protocols.is_empty());
265        assert!(!settings.insecure_link_prefixes.is_empty());
266        assert!(!settings.script_tag_markers.is_empty());
267        assert!(!settings.unknown_code_fallback.is_empty());
268        assert!(!settings.unknown_message_fallback.is_empty());
269        assert!(!settings.unknown_fix_suggestion_fallback.is_empty());
270        assert!(!settings.unknown_protocol_label.is_empty());
271    }
272
273    #[test]
274    fn smoke_test_catalog_has_groups() {
275        assert!(!diagnostics_catalog_groups().is_empty());
276        assert!(find_catalog_group("links").is_some());
277        assert!(find_catalog_group_by_code(&["MD", "203"].concat()).is_some());
278    }
279
280    #[test]
281    fn smoke_test_group_lookup_prefers_longest_prefix_match() {
282        // MD101 should resolve to the Marco parse group (prefix MD1)
283        // instead of the broad markdownlint baseline group (prefix MD).
284        let group = find_catalog_group_by_code("MD101").expect("expected group for MD101");
285        assert_eq!(group.id, "parse");
286    }
287
288    #[test]
289    fn smoke_test_catalog_has_markdown_feature_coverage() {
290        let features = diagnostics_markdown_features();
291        assert!(!features.is_empty());
292        assert!(find_markdown_feature("math").is_some());
293        assert!(find_markdown_feature("task-lists").is_some());
294        assert!(
295            features.iter().all(|feature| !feature.examples.is_empty()),
296            "all markdown feature records should include at least one example"
297        );
298    }
299
300    #[test]
301    fn smoke_test_feature_node_kinds_match_known_ast_variants() {
302        let known_node_kinds: HashSet<&'static str> = [
303            "Heading",
304            "Paragraph",
305            "CodeBlock",
306            "ThematicBreak",
307            "List",
308            "ListItem",
309            "DefinitionList",
310            "DefinitionTerm",
311            "DefinitionDescription",
312            "TaskCheckbox",
313            "Blockquote",
314            "Admonition",
315            "TabGroup",
316            "TabItem",
317            "SliderDeck",
318            "Slide",
319            "Table",
320            "TableRow",
321            "TableCell",
322            "HtmlBlock",
323            "FootnoteDefinition",
324            "Text",
325            "TaskCheckboxInline",
326            "Emphasis",
327            "Strong",
328            "StrongEmphasis",
329            "Strikethrough",
330            "Mark",
331            "Superscript",
332            "Subscript",
333            "Link",
334            "LinkReference",
335            "FootnoteReference",
336            "Image",
337            "CodeSpan",
338            "InlineHtml",
339            "HardBreak",
340            "SoftBreak",
341            "PlatformMention",
342            "InlineMath",
343            "DisplayMath",
344            "MermaidDiagram",
345        ]
346        .into_iter()
347        .collect();
348
349        for feature in diagnostics_markdown_features() {
350            for kind in &feature.node_kinds {
351                assert!(
352                    known_node_kinds.contains(kind.as_str()),
353                    "unknown node kind '{}' in feature '{}'",
354                    kind,
355                    feature.key
356                );
357            }
358        }
359    }
360
361    #[test]
362    fn smoke_test_marco_catalog_entries_use_supported_prefixes() {
363        let marco = parse_catalog("marco", DIAGNOSTICS_CATALOG_MARCO_RON)
364            .expect("marco catalog should parse in tests");
365
366        for entry in &marco.entries {
367            assert!(
368                entry.code.starts_with("MD")
369                    || entry.code.starts_with("MO")
370                    || entry.code.starts_with("MG"),
371                "unsupported diagnostics prefix for {} ({})",
372                entry.key,
373                entry.code
374            );
375        }
376    }
377
378    #[test]
379    fn smoke_test_marco_catalog_has_no_code_overlap_with_markdownlint() {
380        let marco = parse_catalog("marco", DIAGNOSTICS_CATALOG_MARCO_RON)
381            .expect("marco catalog should parse in tests");
382        let markdownlint = parse_catalog("markdownlint", DIAGNOSTICS_CATALOG_MARKDOWNLINT_RON)
383            .expect("markdownlint catalog should parse in tests");
384
385        let marco_codes: HashSet<&str> = marco
386            .entries
387            .iter()
388            .map(|entry| entry.code.as_str())
389            .collect();
390        let markdownlint_codes: HashSet<&str> = markdownlint
391            .entries
392            .iter()
393            .map(|entry| entry.code.as_str())
394            .collect();
395
396        let overlaps: Vec<&str> = marco_codes
397            .intersection(&markdownlint_codes)
398            .copied()
399            .collect();
400
401        assert!(
402            overlaps.is_empty(),
403            "marco/markdownlint code overlap detected: {:?}",
404            overlaps
405        );
406    }
407
408    #[test]
409    fn smoke_test_all_catalog_entries_have_editor_required_fields() {
410        let marco = parse_catalog("marco", DIAGNOSTICS_CATALOG_MARCO_RON)
411            .expect("marco catalog should parse in tests");
412        let markdownlint = parse_catalog("markdownlint", DIAGNOSTICS_CATALOG_MARKDOWNLINT_RON)
413            .expect("markdownlint catalog should parse in tests");
414
415        for (source, catalog) in [("marco", marco), ("markdownlint", markdownlint)] {
416            for entry in &catalog.entries {
417                assert!(
418                    !entry.key.trim().is_empty(),
419                    "{} entry has empty key (code={})",
420                    source,
421                    entry.code
422                );
423                assert!(
424                    !entry.code.trim().is_empty(),
425                    "{} entry has empty code (key={})",
426                    source,
427                    entry.key
428                );
429                assert!(
430                    !entry.title.trim().is_empty(),
431                    "{} entry {} has empty title",
432                    source,
433                    entry.code
434                );
435                assert!(
436                    !entry.description.trim().is_empty(),
437                    "{} entry {} has empty description",
438                    source,
439                    entry.code
440                );
441                assert!(
442                    !entry.fix_suggestion.trim().is_empty(),
443                    "{} entry {} has empty fix_suggestion",
444                    source,
445                    entry.code
446                );
447                assert!(
448                    is_valid_severity(entry.default_severity.as_str()),
449                    "{} entry {} has unsupported severity {}",
450                    source,
451                    entry.code,
452                    entry.default_severity
453                );
454                if let Some(template) = &entry.message_template {
455                    assert!(
456                        !template.trim().is_empty(),
457                        "{} entry {} has empty message_template",
458                        source,
459                        entry.code
460                    );
461                }
462                assert!(
463                    !entry.examples.is_empty(),
464                    "{} entry {} must include at least one example",
465                    source,
466                    entry.code
467                );
468                assert!(
469                    entry.examples.iter().all(|e| !e.trim().is_empty()),
470                    "{} entry {} has blank example text",
471                    source,
472                    entry.code
473                );
474            }
475        }
476    }
477
478    #[test]
479    fn smoke_test_markdownlint_entries_have_editor_friendly_content() {
480        let markdownlint = parse_catalog("markdownlint", DIAGNOSTICS_CATALOG_MARKDOWNLINT_RON)
481            .expect("markdownlint catalog should parse in tests");
482
483        for entry in &markdownlint.entries {
484            assert!(
485                is_md_three_digit_code(&entry.code),
486                "markdownlint entry has invalid code format: {}",
487                entry.code
488            );
489            assert!(
490                entry.key.starts_with("MarkdownlintMD"),
491                "markdownlint entry key must start with MarkdownlintMD: {}",
492                entry.key
493            );
494            assert!(
495                !entry
496                    .fix_suggestion
497                    .contains("See markdownlint docs for MD"),
498                "markdownlint entry {} contains placeholder fix text",
499                entry.code
500            );
501
502            for example in &entry.examples {
503                let text = example.trim();
504                let is_url_only = (text.starts_with("http://") || text.starts_with("https://"))
505                    && !text.contains(char::is_whitespace);
506                assert!(
507                    !is_url_only,
508                    "markdownlint entry {} has URL-only example: {}",
509                    entry.code, text
510                );
511            }
512        }
513    }
514
515    #[test]
516    fn smoke_test_merged_catalog_has_unique_keys_and_codes() {
517        let catalog = diagnostics_catalog();
518
519        let mut keys = HashSet::new();
520        let mut codes = HashSet::new();
521
522        for entry in &catalog.entries {
523            assert!(
524                keys.insert(entry.key.as_str()),
525                "duplicate catalog key in merged catalog: {}",
526                entry.key
527            );
528            assert!(
529                codes.insert(entry.code.as_str()),
530                "duplicate catalog code in merged catalog: {}",
531                entry.code
532            );
533        }
534    }
535}