Skip to main content

marco_core/intelligence/analysis/
diagnostics.rs

1// Diagnostics: parse errors, broken links, etc.
2
3use crate::parser::{Document, Node, NodeKind, Position, Span};
4use std::borrow::Cow;
5use std::collections::{HashMap, HashSet};
6use std::sync::OnceLock;
7
8#[derive(Debug, Clone, PartialEq)]
9pub struct Diagnostic {
10    pub code: DiagnosticCode,
11    pub span: Span,
12    pub severity: DiagnosticSeverity,
13    pub message: String,
14}
15
16#[derive(Debug, Clone, Copy, PartialEq, Eq)]
17pub enum DiagnosticsProfile {
18    /// Emit all diagnostics (full analysis mode).
19    All,
20    /// Emit only critical diagnostics (currently severity=Error).
21    CriticalOnly,
22}
23
24#[derive(Debug, Clone, Copy, PartialEq, Eq)]
25pub struct DiagnosticsOptions {
26    pub profile: DiagnosticsProfile,
27    /// Optional cap to avoid flooding downstream consumers.
28    pub max_diagnostics: Option<usize>,
29}
30
31impl DiagnosticsOptions {
32    pub const fn all() -> Self {
33        Self {
34            profile: DiagnosticsProfile::All,
35            max_diagnostics: None,
36        }
37    }
38
39    pub const fn critical_only() -> Self {
40        Self {
41            profile: DiagnosticsProfile::CriticalOnly,
42            max_diagnostics: None,
43        }
44    }
45}
46
47impl Default for DiagnosticsOptions {
48    fn default() -> Self {
49        // Product direction: prefer minimal critical diagnostics by default.
50        Self::critical_only()
51    }
52}
53
54/// Stable diagnostic codes for markdown intelligence.
55///
56/// These identifiers are intended to be stable across releases so frontend
57/// components (status panes, filters, telemetry) can rely on them.
58#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
59pub enum DiagnosticCode {
60    // Parse / ingestion (MD0xx)
61    ParseFailure,
62
63    // Headings (MD1xx)
64    InvalidHeadingLevel,
65    EmptyHeadingText,
66    DuplicateHeadingId,
67    HeadingTooLong,
68
69    // Links (MD2xx)
70    EmptyLinkUrl,
71    UnsafeLinkProtocol,
72    InsecureLinkProtocol,
73    UnresolvedLinkReference,
74    EmptyLinkReferenceLabel,
75
76    // Code blocks (MD3xx)
77    EmptyCodeBlock,
78    MissingCodeBlockLanguage,
79
80    // Images (MD-4xx)
81    EmptyImageUrl,
82    ImageMissingAltText,
83    UnsafeImageProtocol,
84
85    // Inline HTML (MD-5xx)
86    InlineHtmlContainsScript,
87    InlineHtmlJavascriptUrl,
88    InlineHtmlUnsafeEventHandler,
89
90    // Block HTML (MD6xx)
91    HtmlBlockContainsScript,
92    HtmlBlockJavascriptUrl,
93    EmptyHtmlBlock,
94    HtmlBlockMismatchedAngles,
95    HtmlBlockUnsafeEventHandler,
96
97    // Structural blocks (MD7xx)
98    EmptyList,
99    EmptyListItem,
100    MalformedTaskCheckbox,
101    EmptyTaskListItem,
102    EmptyBlockquote,
103    EmptyDefinitionList,
104    EmptyDefinitionTerm,
105    EmptyDefinitionDescription,
106    EmptyTableCell,
107
108    // Footnotes (MD8xx)
109    MissingFootnoteDefinition,
110    DuplicateFootnoteDefinition,
111    UnusedFootnoteDefinition,
112
113    // Extended blocks & rich content (MD9xx)
114    EmptyTabGroup,
115    EmptyTabTitle,
116    DuplicateTabTitle,
117    EmptyTabPanel,
118    EmptySliderDeck,
119    EmptySlide,
120    EmptyAdmonitionBody,
121    EmptyMathExpression,
122    EmptyMermaidDiagram,
123    EmptyAdmonitionTitle,
124    UnknownAdmonitionKind,
125    InvalidSliderTimer,
126    EmptyPlatformMentionUsername,
127    UnknownPlatformMentionPlatform,
128    UnknownEmojiShortcode,
129    EmptyPlatformMentionDisplayName,
130}
131
132impl DiagnosticCode {
133    pub fn catalog_key(self) -> String {
134        format!("{self:?}")
135    }
136
137    pub fn as_str(self) -> &'static str {
138        self.catalog_entry()
139            .map(|entry| entry.code.as_str())
140            .unwrap_or_else(|| {
141                crate::intelligence::catalog::diagnostics_catalog_settings()
142                    .unknown_code_fallback
143                    .as_str()
144            })
145    }
146
147    /// Default user-facing diagnostic message sourced from embedded catalog metadata.
148    pub fn default_message(self) -> &'static str {
149        self.message_template()
150    }
151
152    /// Catalog-provided message template (or title when template is absent).
153    pub fn message_template(self) -> &'static str {
154        self.catalog_entry()
155            .map(|entry| {
156                entry
157                    .message_template
158                    .as_deref()
159                    .unwrap_or(entry.title.as_str())
160            })
161            .unwrap_or_else(|| {
162                crate::intelligence::catalog::diagnostics_catalog_settings()
163                    .unknown_message_fallback
164                    .as_str()
165            })
166    }
167
168    /// Resolve the default diagnostic severity from catalog metadata.
169    pub fn default_severity(self) -> DiagnosticSeverity {
170        self.catalog_entry()
171            .and_then(|entry| DiagnosticSeverity::from_catalog_str(&entry.default_severity))
172            .unwrap_or(DiagnosticSeverity::Warning)
173    }
174
175    /// Format message template placeholders like `{protocol}` with values.
176    pub fn format_message(self, pairs: &[(&str, String)]) -> String {
177        let mut message = self.message_template().to_string();
178        for (key, value) in pairs {
179            let placeholder = format!("{{{}}}", key);
180            message = message.replace(&placeholder, value);
181        }
182        message
183    }
184
185    /// Optional embedded catalog entry for this diagnostic code.
186    pub fn catalog_entry(
187        self,
188    ) -> Option<&'static crate::intelligence::catalog::DiagnosticsCatalogEntry> {
189        let key = self.catalog_key();
190        crate::intelligence::catalog::find_catalog_entry_by_key(&key)
191    }
192
193    /// Fix suggestion sourced from the embedded diagnostics catalog.
194    pub fn fix_suggestion(self) -> &'static str {
195        self.catalog_entry()
196            .map(|entry| entry.fix_suggestion.as_str())
197            .unwrap_or_else(|| {
198                crate::intelligence::catalog::diagnostics_catalog_settings()
199                    .unknown_fix_suggestion_fallback
200                    .as_str()
201            })
202    }
203
204    /// Resolve fix suggestion as a `Cow` for UI integration.
205    pub fn fix_suggestion_resolved(self) -> Cow<'static, str> {
206        Cow::Borrowed(self.fix_suggestion())
207    }
208}
209
210impl Diagnostic {
211    /// Stable external code id from catalog (e.g. `MD103`) for this diagnostic.
212    pub fn code_id(&self) -> &'static str {
213        self.code.as_str()
214    }
215
216    /// Stable quick fix suggestion associated with this diagnostic code.
217    pub fn fix_suggestion(&self) -> &'static str {
218        self.code.fix_suggestion()
219    }
220
221    /// Optional embedded catalog entry for this diagnostic.
222    pub fn catalog_entry(
223        &self,
224    ) -> Option<&'static crate::intelligence::catalog::DiagnosticsCatalogEntry> {
225        self.code.catalog_entry()
226    }
227
228    /// Human title from embedded catalog if present.
229    pub fn title_resolved(&self) -> Option<&'static str> {
230        self.catalog_entry().map(|entry| entry.title.as_str())
231    }
232
233    /// Rich description from embedded catalog if present.
234    pub fn description_resolved(&self) -> Option<&'static str> {
235        self.catalog_entry().map(|entry| entry.description.as_str())
236    }
237
238    /// Resolve fix suggestion from embedded catalog when available,
239    /// with a stable in-code fallback.
240    pub fn fix_suggestion_resolved(&self) -> Cow<'static, str> {
241        self.code.fix_suggestion_resolved()
242    }
243
244    /// Build a parse-failure diagnostic anchored at a specific position.
245    pub fn parse_error_at(position: Position, message: impl Into<String>) -> Self {
246        let span = Span {
247            start: position,
248            end: Position {
249                line: position.line,
250                column: position.column.saturating_add(1),
251                offset: position.offset.saturating_add(1),
252            },
253        };
254
255        Self {
256            code: DiagnosticCode::ParseFailure,
257            span,
258            severity: DiagnosticCode::ParseFailure.default_severity(),
259            message: message.into(),
260        }
261    }
262
263    /// Build a parse-failure diagnostic at a safe default location (1:1).
264    pub fn parse_error(message: impl Into<String>) -> Self {
265        Self::parse_error_at(
266            Position {
267                line: 1,
268                column: 1,
269                offset: 0,
270            },
271            message,
272        )
273    }
274}
275
276#[derive(Debug, Clone, Copy, PartialEq, Eq)]
277pub enum DiagnosticSeverity {
278    Error,
279    Warning,
280    Info,
281    Hint,
282}
283
284impl DiagnosticSeverity {
285    pub fn from_catalog_str(value: &str) -> Option<Self> {
286        match value {
287            "Error" => Some(Self::Error),
288            "Warning" => Some(Self::Warning),
289            "Info" => Some(Self::Info),
290            "Hint" => Some(Self::Hint),
291            _ => None,
292        }
293    }
294
295    fn sort_rank(self) -> u8 {
296        match self {
297            // Higher severity first when multiple diagnostics target the same span.
298            Self::Error => 0,
299            Self::Warning => 1,
300            Self::Info => 2,
301            Self::Hint => 3,
302        }
303    }
304}
305
306fn sort_and_dedup_diagnostics(diagnostics: &mut Vec<Diagnostic>) {
307    diagnostics.sort_by(|a, b| {
308        (
309            a.span.start.offset,
310            a.span.end.offset,
311            a.severity.sort_rank(),
312            a.code.as_str(),
313            a.message.as_str(),
314        )
315            .cmp(&(
316                b.span.start.offset,
317                b.span.end.offset,
318                b.severity.sort_rank(),
319                b.code.as_str(),
320                b.message.as_str(),
321            ))
322    });
323
324    diagnostics.dedup_by(|a, b| {
325        a.span == b.span && a.severity == b.severity && a.code == b.code && a.message == b.message
326    });
327}
328
329fn diag(
330    diagnostics: &mut Vec<Diagnostic>,
331    code: DiagnosticCode,
332    span: Span,
333    severity: DiagnosticSeverity,
334    message: impl Into<String>,
335) {
336    diagnostics.push(Diagnostic {
337        code,
338        span,
339        severity,
340        message: message.into(),
341    });
342}
343
344fn diag_catalog(diagnostics: &mut Vec<Diagnostic>, code: DiagnosticCode, span: Span) {
345    diag(
346        diagnostics,
347        code,
348        span,
349        code.default_severity(),
350        code.default_message(),
351    );
352}
353
354fn diag_catalog_message(
355    diagnostics: &mut Vec<Diagnostic>,
356    code: DiagnosticCode,
357    span: Span,
358    message: impl Into<String>,
359) {
360    diag(
361        diagnostics,
362        code,
363        span,
364        code.default_severity(),
365        message.into(),
366    );
367}
368
369fn has_disallowed_scheme(url_lower: &str, disallowed_schemes: &[String]) -> bool {
370    let scheme = url_lower
371        .split_once(':')
372        .map(|(prefix, _)| prefix)
373        .unwrap_or_default();
374
375    !scheme.is_empty() && disallowed_schemes.iter().any(|item| item == scheme)
376}
377
378fn starts_with_any_prefix(url_lower: &str, prefixes: &[String]) -> bool {
379    prefixes.iter().any(|prefix| url_lower.starts_with(prefix))
380}
381
382fn contains_unsafe_protocol_marker(text_lower: &str, protocols: &[String]) -> bool {
383    protocols
384        .iter()
385        .map(|scheme| format!("{}:", scheme))
386        .any(|needle| text_lower.contains(&needle))
387}
388
389fn contains_any_marker(text_lower: &str, markers: &[String]) -> bool {
390    markers.iter().any(|marker| text_lower.contains(marker))
391}
392
393fn contains_unsafe_event_handler_attr(text_lower: &str) -> bool {
394    const EVENT_ATTRS: &[&str] = &[
395        "onabort",
396        "onanimationend",
397        "onanimationiteration",
398        "onanimationstart",
399        "onauxclick",
400        "onbeforeinput",
401        "onbeforeunload",
402        "onblur",
403        "oncancel",
404        "oncanplay",
405        "oncanplaythrough",
406        "onchange",
407        "onclick",
408        "onclose",
409        "oncontextmenu",
410        "oncopy",
411        "oncuechange",
412        "oncut",
413        "ondblclick",
414        "ondrag",
415        "ondragend",
416        "ondragenter",
417        "ondragleave",
418        "ondragover",
419        "ondragstart",
420        "ondrop",
421        "ondurationchange",
422        "onended",
423        "onerror",
424        "onfocus",
425        "onfocusin",
426        "onfocusout",
427        "onformdata",
428        "oninput",
429        "oninvalid",
430        "onkeydown",
431        "onkeypress",
432        "onkeyup",
433        "onload",
434        "onloadeddata",
435        "onloadedmetadata",
436        "onloadstart",
437        "onmousedown",
438        "onmouseenter",
439        "onmouseleave",
440        "onmousemove",
441        "onmouseout",
442        "onmouseover",
443        "onmouseup",
444        "onpaste",
445        "onpause",
446        "onplay",
447        "onplaying",
448        "onprogress",
449        "onratechange",
450        "onreset",
451        "onresize",
452        "onscroll",
453        "onsecuritypolicyviolation",
454        "onseeked",
455        "onseeking",
456        "onselect",
457        "onslotchange",
458        "onstalled",
459        "onsubmit",
460        "onsuspend",
461        "ontimeupdate",
462        "ontoggle",
463        "ontransitionend",
464        "onunload",
465        "onvolumechange",
466        "onwaiting",
467        "onwheel",
468    ];
469
470    EVENT_ATTRS.iter().any(|attr| {
471        text_lower.contains(&format!(" {}=", attr))
472            || text_lower.contains(&format!("\n{}=", attr))
473            || text_lower.contains(&format!("\t{}=", attr))
474            || text_lower.contains(&format!("<{}=", attr))
475    })
476}
477
478fn is_known_platform(platform_lower: &str) -> bool {
479    matches!(
480        platform_lower,
481        "github"
482            | "gitlab"
483            | "codeberg"
484            | "twitter"
485            | "x"
486            | "mastodon"
487            | "bluesky"
488            | "linkedin"
489            | "xing"
490            | "medium"
491            | "dribbble"
492            | "behance"
493            | "reddit"
494            | "discord"
495            | "telegram"
496            | "youtube"
497            | "twitch"
498    )
499}
500
501fn list_item_has_malformed_task_marker(node: &Node) -> bool {
502    if node
503        .children
504        .iter()
505        .any(|child| matches!(child.kind, NodeKind::TaskCheckbox { .. }))
506    {
507        return false;
508    }
509
510    let Some(first_child) = node.children.first() else {
511        return false;
512    };
513
514    let candidate_text = match &first_child.kind {
515        NodeKind::Text(text) => Some(text.as_str()),
516        NodeKind::Paragraph => first_child
517            .children
518            .iter()
519            .find_map(|inline| match &inline.kind {
520                NodeKind::Text(text) => Some(text.as_str()),
521                _ => None,
522            }),
523        _ => None,
524    };
525
526    let Some(text) = candidate_text else {
527        return false;
528    };
529
530    let trimmed = text.trim_start();
531    if !trimmed.starts_with('[') {
532        return false;
533    }
534
535    let Some(close_idx) = trimmed.find(']') else {
536        return false;
537    };
538
539    let marker_body = trimmed[1..close_idx].trim();
540    if marker_body.is_empty() {
541        // "[ ]" is valid; empty marker body after trimming means this is a valid checkbox marker.
542        return false;
543    }
544
545    !matches!(marker_body, "x" | "X")
546}
547
548fn known_admonition_kind(marker_kind_upper: &str) -> bool {
549    matches!(
550        marker_kind_upper,
551        "NOTE" | "TIP" | "IMPORTANT" | "WARNING" | "CAUTION"
552    )
553}
554
555fn blockquote_has_unknown_admonition_marker(node: &Node) -> bool {
556    let Some(first_block) = node.children.first() else {
557        return false;
558    };
559
560    if !matches!(first_block.kind, NodeKind::Paragraph) {
561        return false;
562    }
563
564    let mut raw = String::new();
565    for inline in &first_block.children {
566        match &inline.kind {
567            NodeKind::Text(text) => raw.push_str(text),
568            NodeKind::SoftBreak | NodeKind::HardBreak => break,
569            _ => return false,
570        }
571    }
572
573    let trimmed = raw.trim();
574    if !trimmed.starts_with("[!") {
575        return false;
576    }
577
578    let Some(close_idx) = trimmed.find(']') else {
579        return false;
580    };
581
582    let marker = &trimmed[2..close_idx].trim();
583    if marker.is_empty() {
584        return false;
585    }
586
587    let marker_upper = marker.to_ascii_uppercase();
588    !known_admonition_kind(&marker_upper)
589}
590
591fn known_emoji_shortcodes() -> &'static HashSet<String> {
592    static SHORTCODES: OnceLock<HashSet<String>> = OnceLock::new();
593
594    SHORTCODES.get_or_init(|| {
595        crate::logic::text_completion::emoji_shortcodes_for_completion()
596            .iter()
597            .map(|shortcode| shortcode.to_ascii_lowercase())
598            .collect::<HashSet<_>>()
599    })
600}
601
602fn is_shortcode_body_char(ch: char) -> bool {
603    ch.is_ascii_alphanumeric() || ch == '_' || ch == '+' || ch == '-'
604}
605
606fn strip_surrounding_shortcode_wrappers(token: &str) -> &str {
607    token.trim_matches(|c: char| {
608        matches!(
609            c,
610            ',' | '.'
611                | ';'
612                | '!'
613                | '?'
614                | '('
615                | ')'
616                | '['
617                | ']'
618                | '{'
619                | '}'
620                | '<'
621                | '>'
622                | '"'
623                | '\''
624                | '`'
625        )
626    })
627}
628
629fn shortcode_candidate_from_token(token: &str) -> Option<&str> {
630    let trimmed = strip_surrounding_shortcode_wrappers(token);
631
632    if trimmed.len() < 3 || !trimmed.starts_with(':') || !trimmed.ends_with(':') {
633        return None;
634    }
635
636    let body = &trimmed[1..trimmed.len() - 1];
637    if body.is_empty() || !body.chars().all(is_shortcode_body_char) {
638        return None;
639    }
640
641    if !body.chars().any(|ch| ch.is_ascii_alphabetic()) {
642        return None;
643    }
644
645    // Enforce token-like boundaries inside the candidate itself.
646    // This avoids odd cases like ":-name:" / ":name-:" / ":name--x:".
647    if body.starts_with(['-', '_', '+']) || body.ends_with(['-', '_', '+']) {
648        return None;
649    }
650
651    if body.contains("--") || body.contains("__") || body.contains("++") {
652        return None;
653    }
654
655    Some(trimmed)
656}
657
658fn text_has_unknown_emoji_shortcode(text: &str) -> bool {
659    let known = known_emoji_shortcodes();
660
661    text.split_whitespace().any(|token| {
662        shortcode_candidate_from_token(token)
663            .map(|candidate| !known.contains(&candidate.to_ascii_lowercase()))
664            .unwrap_or(false)
665    })
666}
667
668// Compute diagnostics for document
669pub fn compute_diagnostics(document: &Document) -> Vec<Diagnostic> {
670    compute_diagnostics_with_options(document, DiagnosticsOptions::all())
671}
672
673/// Compute diagnostics using configurable policy controls.
674pub fn compute_diagnostics_with_options(
675    document: &Document,
676    options: DiagnosticsOptions,
677) -> Vec<Diagnostic> {
678    log::debug!(
679        "Computing diagnostics for {} nodes",
680        document.children.len()
681    );
682
683    let mut diagnostics = Vec::new();
684
685    for node in &document.children {
686        collect_diagnostics(node, &mut diagnostics);
687    }
688
689    collect_document_level_diagnostics(document, &mut diagnostics);
690    sort_and_dedup_diagnostics(&mut diagnostics);
691
692    match options.profile {
693        DiagnosticsProfile::All => {}
694        DiagnosticsProfile::CriticalOnly => {
695            diagnostics.retain(|d| matches!(d.severity, DiagnosticSeverity::Error));
696        }
697    }
698
699    if let Some(max) = options.max_diagnostics {
700        diagnostics.truncate(max);
701    }
702
703    log::info!("Found {} diagnostics", diagnostics.len());
704    diagnostics
705}
706
707/// Compute only critical diagnostics (errors) using the default policy profile.
708pub fn compute_diagnostics_critical(document: &Document) -> Vec<Diagnostic> {
709    compute_diagnostics_with_options(document, DiagnosticsOptions::critical_only())
710}
711
712fn collect_document_level_diagnostics(document: &Document, diagnostics: &mut Vec<Diagnostic>) {
713    // Detect duplicate explicit heading IDs (e.g. "{#id}").
714    // We intentionally diagnose the second and subsequent occurrences.
715    let mut seen: HashMap<String, Span> = HashMap::new();
716    collect_duplicate_heading_ids(&document.children, &mut seen, diagnostics);
717
718    collect_footnote_consistency_diagnostics(&document.children, diagnostics);
719    collect_link_reference_consistency_diagnostics(
720        &document.children,
721        &document.references,
722        diagnostics,
723    );
724}
725
726fn normalize_label_for_diagnostics(label: &str) -> String {
727    label
728        .split_whitespace()
729        .collect::<Vec<_>>()
730        .join(" ")
731        .to_lowercase()
732}
733
734fn collect_footnote_consistency_diagnostics(nodes: &[Node], diagnostics: &mut Vec<Diagnostic>) {
735    let mut definitions: HashMap<String, Span> = HashMap::new();
736    let mut references: Vec<(String, Span)> = Vec::new();
737
738    collect_footnote_nodes(nodes, &mut definitions, &mut references, diagnostics);
739
740    let mut reference_counts: HashMap<String, usize> = HashMap::new();
741    for (normalized_label, span) in references {
742        *reference_counts
743            .entry(normalized_label.clone())
744            .or_insert(0) += 1;
745        if !definitions.contains_key(&normalized_label) {
746            diag_catalog(diagnostics, DiagnosticCode::MissingFootnoteDefinition, span);
747        }
748    }
749
750    for (label, span) in definitions {
751        if !reference_counts.contains_key(&label) {
752            diag_catalog(diagnostics, DiagnosticCode::UnusedFootnoteDefinition, span);
753        }
754    }
755}
756
757fn collect_footnote_nodes(
758    nodes: &[Node],
759    definitions: &mut HashMap<String, Span>,
760    references: &mut Vec<(String, Span)>,
761    diagnostics: &mut Vec<Diagnostic>,
762) {
763    for node in nodes {
764        if let Some(span) = node.span {
765            match &node.kind {
766                NodeKind::FootnoteDefinition { label } => {
767                    let normalized = normalize_label_for_diagnostics(label);
768                    if let std::collections::hash_map::Entry::Vacant(entry) =
769                        definitions.entry(normalized)
770                    {
771                        entry.insert(span);
772                    } else {
773                        diag_catalog(
774                            diagnostics,
775                            DiagnosticCode::DuplicateFootnoteDefinition,
776                            span,
777                        );
778                    }
779                }
780                NodeKind::FootnoteReference { label } => {
781                    references.push((normalize_label_for_diagnostics(label), span));
782                }
783                _ => {}
784            }
785        }
786
787        if !node.children.is_empty() {
788            collect_footnote_nodes(&node.children, definitions, references, diagnostics);
789        }
790    }
791}
792
793fn node_has_meaningful_content(node: &Node) -> bool {
794    match &node.kind {
795        NodeKind::Text(text) => !text.trim().is_empty(),
796        NodeKind::CodeSpan(code) => !code.trim().is_empty(),
797        NodeKind::InlineHtml(html) => !html.trim().is_empty(),
798        _ => node.children.iter().any(node_has_meaningful_content),
799    }
800}
801
802fn collect_duplicate_heading_ids(
803    nodes: &[Node],
804    seen: &mut HashMap<String, Span>,
805    diagnostics: &mut Vec<Diagnostic>,
806) {
807    for node in nodes {
808        if let (NodeKind::Heading { id: Some(id), .. }, Some(span)) = (&node.kind, node.span) {
809            let key = id.trim().to_lowercase();
810            if !key.is_empty() {
811                if let Some(first_span) = seen.get(&key) {
812                    diag(
813                        diagnostics,
814                        DiagnosticCode::DuplicateHeadingId,
815                        span,
816                        DiagnosticCode::DuplicateHeadingId.default_severity(),
817                        DiagnosticCode::DuplicateHeadingId.format_message(&[
818                            ("id", id.clone()),
819                            ("line", first_span.start.line.to_string()),
820                        ]),
821                    );
822                } else {
823                    seen.insert(key, span);
824                }
825            }
826        }
827
828        if !node.children.is_empty() {
829            collect_duplicate_heading_ids(&node.children, seen, diagnostics);
830        }
831    }
832}
833
834// Recursively collect diagnostics from a node and its children
835fn collect_diagnostics(node: &Node, diagnostics: &mut Vec<Diagnostic>) {
836    if let Some(span) = &node.span {
837        match &node.kind {
838            NodeKind::Heading { level, text, .. } => {
839                if *level > 6 {
840                    diag(
841                        diagnostics,
842                        DiagnosticCode::InvalidHeadingLevel,
843                        *span,
844                        DiagnosticCode::InvalidHeadingLevel.default_severity(),
845                        DiagnosticCode::InvalidHeadingLevel
846                            .format_message(&[("level", level.to_string())]),
847                    );
848                }
849
850                if text.trim().is_empty() {
851                    diag_catalog(diagnostics, DiagnosticCode::EmptyHeadingText, *span);
852                }
853
854                // Friendly style guardrail for very long headings.
855                if text.chars().count()
856                    > crate::intelligence::catalog::diagnostics_catalog_settings()
857                        .heading_too_long_threshold
858                {
859                    diag_catalog(diagnostics, DiagnosticCode::HeadingTooLong, *span);
860                }
861            }
862            NodeKind::Link { url, .. } => {
863                if url.trim().is_empty() {
864                    diag_catalog(diagnostics, DiagnosticCode::EmptyLinkUrl, *span);
865                }
866
867                let lower_url = url.to_lowercase();
868                let settings = crate::intelligence::catalog::diagnostics_catalog_settings();
869                if has_disallowed_scheme(&lower_url, &settings.unsafe_protocols) {
870                    let protocol = url
871                        .split_once(':')
872                        .map(|(prefix, _)| prefix)
873                        .unwrap_or(settings.unknown_protocol_label.as_str())
874                        .to_string();
875                    diag_catalog_message(
876                        diagnostics,
877                        DiagnosticCode::UnsafeLinkProtocol,
878                        *span,
879                        DiagnosticCode::UnsafeLinkProtocol
880                            .format_message(&[("protocol", protocol)]),
881                    );
882                }
883
884                if starts_with_any_prefix(&lower_url, &settings.insecure_link_prefixes) {
885                    diag_catalog(diagnostics, DiagnosticCode::InsecureLinkProtocol, *span);
886                }
887            }
888            NodeKind::LinkReference { .. } => {}
889            NodeKind::CodeBlock { language, code } => {
890                if code.trim().is_empty() {
891                    diag_catalog(diagnostics, DiagnosticCode::EmptyCodeBlock, *span);
892                }
893
894                if !code.trim().is_empty() && language.is_none() {
895                    diag_catalog(diagnostics, DiagnosticCode::MissingCodeBlockLanguage, *span);
896                }
897            }
898            NodeKind::Image { url, alt } => {
899                if url.trim().is_empty() {
900                    diag_catalog(diagnostics, DiagnosticCode::EmptyImageUrl, *span);
901                }
902
903                if alt.trim().is_empty() {
904                    diag_catalog(diagnostics, DiagnosticCode::ImageMissingAltText, *span);
905                }
906
907                let lower_url = url.to_lowercase();
908                let settings = crate::intelligence::catalog::diagnostics_catalog_settings();
909                if has_disallowed_scheme(&lower_url, &settings.unsafe_protocols) {
910                    let protocol = url
911                        .split_once(':')
912                        .map(|(prefix, _)| prefix)
913                        .unwrap_or(settings.unknown_protocol_label.as_str())
914                        .to_string();
915                    diag_catalog_message(
916                        diagnostics,
917                        DiagnosticCode::UnsafeImageProtocol,
918                        *span,
919                        DiagnosticCode::UnsafeImageProtocol
920                            .format_message(&[("protocol", protocol)]),
921                    );
922                }
923            }
924            NodeKind::InlineHtml(html) => {
925                let lower_html = html.to_lowercase();
926                let settings = crate::intelligence::catalog::diagnostics_catalog_settings();
927                if contains_any_marker(&lower_html, &settings.script_tag_markers) {
928                    diag_catalog(diagnostics, DiagnosticCode::InlineHtmlContainsScript, *span);
929                }
930
931                if contains_unsafe_protocol_marker(&lower_html, &settings.unsafe_protocols) {
932                    diag_catalog(diagnostics, DiagnosticCode::InlineHtmlJavascriptUrl, *span);
933                }
934
935                if contains_unsafe_event_handler_attr(&lower_html) {
936                    diag_catalog(
937                        diagnostics,
938                        DiagnosticCode::InlineHtmlUnsafeEventHandler,
939                        *span,
940                    );
941                }
942            }
943            NodeKind::List { .. } => {
944                if node.children.is_empty() {
945                    diag_catalog(diagnostics, DiagnosticCode::EmptyList, *span);
946                }
947            }
948            NodeKind::ListItem => {
949                if node.children.is_empty() {
950                    diag_catalog(diagnostics, DiagnosticCode::EmptyListItem, *span);
951                }
952
953                let has_task_checkbox = node
954                    .children
955                    .iter()
956                    .any(|child| matches!(child.kind, NodeKind::TaskCheckbox { .. }));
957
958                if has_task_checkbox {
959                    let has_task_content = node.children.iter().any(|child| {
960                        !matches!(child.kind, NodeKind::TaskCheckbox { .. })
961                            && node_has_meaningful_content(child)
962                    });
963
964                    if !has_task_content {
965                        diag_catalog(diagnostics, DiagnosticCode::EmptyTaskListItem, *span);
966                    }
967                } else if list_item_has_malformed_task_marker(node) {
968                    diag_catalog(diagnostics, DiagnosticCode::MalformedTaskCheckbox, *span);
969                }
970            }
971            NodeKind::HtmlBlock { html } => {
972                let lower_html = html.to_lowercase();
973                let settings = crate::intelligence::catalog::diagnostics_catalog_settings();
974
975                if contains_any_marker(&lower_html, &settings.script_tag_markers) {
976                    diag_catalog(diagnostics, DiagnosticCode::HtmlBlockContainsScript, *span);
977                }
978
979                if contains_unsafe_protocol_marker(&lower_html, &settings.unsafe_protocols) {
980                    diag_catalog(diagnostics, DiagnosticCode::HtmlBlockJavascriptUrl, *span);
981                }
982
983                if html.trim().is_empty() {
984                    diag_catalog(diagnostics, DiagnosticCode::EmptyHtmlBlock, *span);
985                }
986
987                let open_angles = html.matches('<').count();
988                let close_angles = html.matches('>').count();
989                if open_angles != close_angles {
990                    diag_catalog(
991                        diagnostics,
992                        DiagnosticCode::HtmlBlockMismatchedAngles,
993                        *span,
994                    );
995                }
996
997                if contains_unsafe_event_handler_attr(&lower_html) {
998                    diag_catalog(
999                        diagnostics,
1000                        DiagnosticCode::HtmlBlockUnsafeEventHandler,
1001                        *span,
1002                    );
1003                }
1004            }
1005            NodeKind::Blockquote => {
1006                if node.children.is_empty() {
1007                    diag_catalog(diagnostics, DiagnosticCode::EmptyBlockquote, *span);
1008                }
1009
1010                if blockquote_has_unknown_admonition_marker(node) {
1011                    diag_catalog(diagnostics, DiagnosticCode::UnknownAdmonitionKind, *span);
1012                }
1013            }
1014            NodeKind::DefinitionList => {
1015                if node.children.is_empty() {
1016                    diag_catalog(diagnostics, DiagnosticCode::EmptyDefinitionList, *span);
1017                }
1018            }
1019            NodeKind::DefinitionTerm => {
1020                if !node_has_meaningful_content(node) {
1021                    diag_catalog(diagnostics, DiagnosticCode::EmptyDefinitionTerm, *span);
1022                }
1023            }
1024            NodeKind::DefinitionDescription => {
1025                if !node_has_meaningful_content(node) {
1026                    diag_catalog(
1027                        diagnostics,
1028                        DiagnosticCode::EmptyDefinitionDescription,
1029                        *span,
1030                    );
1031                }
1032            }
1033            NodeKind::TableCell { .. } => {
1034                if !node_has_meaningful_content(node) {
1035                    diag_catalog(diagnostics, DiagnosticCode::EmptyTableCell, *span);
1036                }
1037            }
1038            NodeKind::TabGroup => {
1039                if node.children.is_empty() {
1040                    diag_catalog(diagnostics, DiagnosticCode::EmptyTabGroup, *span);
1041                }
1042
1043                let mut seen_titles: HashMap<String, Span> = HashMap::new();
1044                for child in &node.children {
1045                    if let (NodeKind::TabItem { title }, Some(tab_span)) = (&child.kind, child.span)
1046                    {
1047                        let normalized = title.trim().to_lowercase();
1048
1049                        if normalized.is_empty() {
1050                            diag_catalog(diagnostics, DiagnosticCode::EmptyTabTitle, tab_span);
1051                        }
1052
1053                        if !normalized.is_empty() {
1054                            if let std::collections::hash_map::Entry::Vacant(entry) =
1055                                seen_titles.entry(normalized)
1056                            {
1057                                entry.insert(tab_span);
1058                            } else {
1059                                diag_catalog(
1060                                    diagnostics,
1061                                    DiagnosticCode::DuplicateTabTitle,
1062                                    tab_span,
1063                                );
1064                            }
1065                        }
1066
1067                        if !node_has_meaningful_content(child) {
1068                            diag_catalog(diagnostics, DiagnosticCode::EmptyTabPanel, tab_span);
1069                        }
1070                    }
1071                }
1072            }
1073            NodeKind::SliderDeck { timer_seconds } => {
1074                if node.children.is_empty() {
1075                    diag_catalog(diagnostics, DiagnosticCode::EmptySliderDeck, *span);
1076                }
1077
1078                if timer_seconds.is_some_and(|value| value == 0) {
1079                    diag_catalog(diagnostics, DiagnosticCode::InvalidSliderTimer, *span);
1080                }
1081            }
1082            NodeKind::Slide { .. } => {
1083                if !node_has_meaningful_content(node) {
1084                    diag_catalog(diagnostics, DiagnosticCode::EmptySlide, *span);
1085                }
1086            }
1087            NodeKind::Admonition { title, .. } => {
1088                if node.children.is_empty() {
1089                    diag_catalog(diagnostics, DiagnosticCode::EmptyAdmonitionBody, *span);
1090                }
1091
1092                if let Some(custom_title) = title {
1093                    if custom_title.trim().is_empty() {
1094                        diag_catalog(diagnostics, DiagnosticCode::EmptyAdmonitionTitle, *span);
1095                    }
1096                }
1097            }
1098            NodeKind::InlineMath { content } | NodeKind::DisplayMath { content } => {
1099                if content.trim().is_empty() {
1100                    diag_catalog(diagnostics, DiagnosticCode::EmptyMathExpression, *span);
1101                }
1102            }
1103            NodeKind::MermaidDiagram { content } => {
1104                if content.trim().is_empty() {
1105                    diag_catalog(diagnostics, DiagnosticCode::EmptyMermaidDiagram, *span);
1106                }
1107            }
1108            NodeKind::PlatformMention {
1109                username,
1110                platform,
1111                display,
1112            } => {
1113                if username.trim().is_empty() {
1114                    diag_catalog(
1115                        diagnostics,
1116                        DiagnosticCode::EmptyPlatformMentionUsername,
1117                        *span,
1118                    );
1119                }
1120
1121                if !is_known_platform(&platform.trim().to_lowercase()) {
1122                    diag_catalog(
1123                        diagnostics,
1124                        DiagnosticCode::UnknownPlatformMentionPlatform,
1125                        *span,
1126                    );
1127                }
1128
1129                if display.as_ref().is_some_and(|d| d.trim().is_empty()) {
1130                    diag_catalog(
1131                        diagnostics,
1132                        DiagnosticCode::EmptyPlatformMentionDisplayName,
1133                        *span,
1134                    );
1135                }
1136            }
1137            NodeKind::Text(text) => {
1138                if text_has_unknown_emoji_shortcode(text) {
1139                    diag_catalog(diagnostics, DiagnosticCode::UnknownEmojiShortcode, *span);
1140                }
1141            }
1142            _ => {}
1143        }
1144    }
1145
1146    for child in &node.children {
1147        collect_diagnostics(child, diagnostics);
1148    }
1149}
1150
1151fn collect_link_reference_consistency_diagnostics(
1152    nodes: &[Node],
1153    references: &crate::parser::ReferenceMap,
1154    diagnostics: &mut Vec<Diagnostic>,
1155) {
1156    for node in nodes {
1157        if let (NodeKind::LinkReference { label, .. }, Some(span)) = (&node.kind, node.span) {
1158            let normalized = normalize_label_for_diagnostics(label);
1159
1160            if normalized.is_empty() {
1161                diag_catalog(diagnostics, DiagnosticCode::EmptyLinkReferenceLabel, span);
1162            } else if !references.contains(label) {
1163                diag_catalog(diagnostics, DiagnosticCode::UnresolvedLinkReference, span);
1164            }
1165        }
1166
1167        if !node.children.is_empty() {
1168            collect_link_reference_consistency_diagnostics(&node.children, references, diagnostics);
1169        }
1170    }
1171}
1172
1173#[cfg(test)]
1174mod tests {
1175    use super::*;
1176    use crate::parser::Position;
1177
1178    fn span(line: usize, start_col: usize, end_col: usize, start_offset: usize) -> Span {
1179        Span {
1180            start: Position {
1181                line,
1182                column: start_col,
1183                offset: start_offset,
1184            },
1185            end: Position {
1186                line,
1187                column: end_col,
1188                offset: start_offset + (end_col.saturating_sub(start_col)),
1189            },
1190        }
1191    }
1192
1193    #[test]
1194    fn smoke_test_codes_are_stable_strings() {
1195        for code in [
1196            DiagnosticCode::ParseFailure,
1197            DiagnosticCode::InvalidHeadingLevel,
1198            DiagnosticCode::DuplicateHeadingId,
1199            DiagnosticCode::UnresolvedLinkReference,
1200            DiagnosticCode::EmptyLinkReferenceLabel,
1201            DiagnosticCode::MissingCodeBlockLanguage,
1202            DiagnosticCode::ImageMissingAltText,
1203            DiagnosticCode::InlineHtmlUnsafeEventHandler,
1204            DiagnosticCode::HtmlBlockUnsafeEventHandler,
1205            DiagnosticCode::EmptyDefinitionList,
1206            DiagnosticCode::MissingFootnoteDefinition,
1207            DiagnosticCode::EmptyTaskListItem,
1208            DiagnosticCode::InvalidSliderTimer,
1209            DiagnosticCode::EmptyPlatformMentionUsername,
1210            DiagnosticCode::UnknownPlatformMentionPlatform,
1211            DiagnosticCode::UnknownEmojiShortcode,
1212            DiagnosticCode::EmptyPlatformMentionDisplayName,
1213            DiagnosticCode::DuplicateTabTitle,
1214            DiagnosticCode::EmptyMathExpression,
1215            DiagnosticCode::EmptyAdmonitionTitle,
1216            DiagnosticCode::UnknownAdmonitionKind,
1217        ] {
1218            let id = code.as_str();
1219            assert!(
1220                id.starts_with("MD") || id.starts_with("MO") || id.starts_with("MG"),
1221                "unexpected diagnostic namespace for code id: {}",
1222                id
1223            );
1224            assert_eq!(id.len(), 5);
1225        }
1226    }
1227
1228    #[test]
1229    fn smoke_test_all_diagnostic_codes_are_in_catalog() {
1230        let all_codes = [
1231            DiagnosticCode::ParseFailure,
1232            DiagnosticCode::InvalidHeadingLevel,
1233            DiagnosticCode::EmptyHeadingText,
1234            DiagnosticCode::DuplicateHeadingId,
1235            DiagnosticCode::HeadingTooLong,
1236            DiagnosticCode::EmptyLinkUrl,
1237            DiagnosticCode::UnsafeLinkProtocol,
1238            DiagnosticCode::InsecureLinkProtocol,
1239            DiagnosticCode::UnresolvedLinkReference,
1240            DiagnosticCode::EmptyLinkReferenceLabel,
1241            DiagnosticCode::EmptyCodeBlock,
1242            DiagnosticCode::MissingCodeBlockLanguage,
1243            DiagnosticCode::EmptyImageUrl,
1244            DiagnosticCode::ImageMissingAltText,
1245            DiagnosticCode::UnsafeImageProtocol,
1246            DiagnosticCode::InlineHtmlContainsScript,
1247            DiagnosticCode::InlineHtmlJavascriptUrl,
1248            DiagnosticCode::InlineHtmlUnsafeEventHandler,
1249            DiagnosticCode::HtmlBlockContainsScript,
1250            DiagnosticCode::HtmlBlockJavascriptUrl,
1251            DiagnosticCode::EmptyHtmlBlock,
1252            DiagnosticCode::HtmlBlockMismatchedAngles,
1253            DiagnosticCode::HtmlBlockUnsafeEventHandler,
1254            DiagnosticCode::EmptyList,
1255            DiagnosticCode::EmptyListItem,
1256            DiagnosticCode::MalformedTaskCheckbox,
1257            DiagnosticCode::EmptyTaskListItem,
1258            DiagnosticCode::EmptyBlockquote,
1259            DiagnosticCode::EmptyDefinitionList,
1260            DiagnosticCode::EmptyDefinitionTerm,
1261            DiagnosticCode::EmptyDefinitionDescription,
1262            DiagnosticCode::EmptyTableCell,
1263            DiagnosticCode::MissingFootnoteDefinition,
1264            DiagnosticCode::DuplicateFootnoteDefinition,
1265            DiagnosticCode::UnusedFootnoteDefinition,
1266            DiagnosticCode::EmptyTabGroup,
1267            DiagnosticCode::EmptyTabTitle,
1268            DiagnosticCode::DuplicateTabTitle,
1269            DiagnosticCode::EmptyTabPanel,
1270            DiagnosticCode::EmptySliderDeck,
1271            DiagnosticCode::EmptySlide,
1272            DiagnosticCode::EmptyAdmonitionBody,
1273            DiagnosticCode::EmptyMathExpression,
1274            DiagnosticCode::EmptyMermaidDiagram,
1275            DiagnosticCode::EmptyAdmonitionTitle,
1276            DiagnosticCode::UnknownAdmonitionKind,
1277            DiagnosticCode::InvalidSliderTimer,
1278            DiagnosticCode::EmptyPlatformMentionUsername,
1279            DiagnosticCode::UnknownPlatformMentionPlatform,
1280            DiagnosticCode::UnknownEmojiShortcode,
1281            DiagnosticCode::EmptyPlatformMentionDisplayName,
1282        ];
1283
1284        for code in all_codes {
1285            assert!(
1286                code.catalog_entry().is_some(),
1287                "missing catalog entry for {:?}",
1288                code
1289            );
1290        }
1291    }
1292
1293    #[test]
1294    fn smoke_test_fix_suggestions_are_available() {
1295        assert!(DiagnosticCode::DuplicateHeadingId
1296            .fix_suggestion()
1297            .contains("unique"));
1298        assert!(DiagnosticCode::MissingCodeBlockLanguage
1299            .fix_suggestion()
1300            .contains("```"));
1301        assert!(DiagnosticCode::MissingFootnoteDefinition
1302            .fix_suggestion()
1303            .contains("[^label]:"));
1304        assert!(DiagnosticCode::EmptySliderDeck
1305            .fix_suggestion()
1306            .contains("@slidestart"));
1307    }
1308
1309    #[test]
1310    fn smoke_test_diagnostic_methods_expose_fixit_metadata() {
1311        let d = Diagnostic {
1312            code: DiagnosticCode::ImageMissingAltText,
1313            span: span(1, 1, 10, 0),
1314            severity: DiagnosticSeverity::Warning,
1315            message: "Image missing alt text".to_string(),
1316        };
1317
1318        assert!(
1319            d.code_id().starts_with("MD")
1320                || d.code_id().starts_with("MO")
1321                || d.code_id().starts_with("MG")
1322        );
1323        assert!(d.fix_suggestion().contains("alt text"));
1324    }
1325
1326    #[test]
1327    fn smoke_test_resolved_catalog_metadata_available_for_seed_code() {
1328        let d = Diagnostic {
1329            code: DiagnosticCode::EmptyImageUrl,
1330            span: span(1, 1, 5, 0),
1331            severity: DiagnosticSeverity::Error,
1332            message: "Empty image URL".to_string(),
1333        };
1334
1335        assert_eq!(d.title_resolved(), Some("Empty image URL"));
1336        assert!(d
1337            .description_resolved()
1338            .expect("expected embedded catalog description")
1339            .contains("cannot render an image"));
1340    }
1341
1342    #[test]
1343    fn smoke_test_resolved_fix_suggestion_uses_catalog_override_when_present() {
1344        let d = Diagnostic {
1345            code: DiagnosticCode::ImageMissingAltText,
1346            span: span(1, 1, 10, 0),
1347            severity: DiagnosticSeverity::Warning,
1348            message: "Image missing alt text".to_string(),
1349        };
1350
1351        assert_eq!(
1352            d.fix_suggestion_resolved(),
1353            "Add descriptive alt text between '[' and ']' for accessibility and better screen-reader output."
1354        );
1355    }
1356
1357    #[test]
1358    fn smoke_test_parse_error_diagnostic_builder() {
1359        let d = Diagnostic::parse_error("Parse failed");
1360        assert_eq!(d.code, DiagnosticCode::ParseFailure);
1361        assert_eq!(d.severity, DiagnosticSeverity::Error);
1362        assert!(d.code_id().starts_with("MD"));
1363        assert_eq!(d.span.start.line, 1);
1364        assert_eq!(d.span.start.column, 1);
1365    }
1366
1367    #[test]
1368    fn smoke_test_diagnostics_options_critical_only_filters_non_errors() {
1369        let doc = Document {
1370            children: vec![Node {
1371                kind: NodeKind::Heading {
1372                    level: 1,
1373                    text: "This heading is intentionally very long to trigger an informational diagnostic while remaining syntactically valid and useful for filtering checks".to_string(),
1374                    id: None,
1375                },
1376                span: Some(span(1, 1, 20, 0)),
1377                children: vec![],
1378            }],
1379            ..Default::default()
1380        };
1381
1382        let all = compute_diagnostics_with_options(&doc, DiagnosticsOptions::all());
1383        let critical = compute_diagnostics_with_options(&doc, DiagnosticsOptions::critical_only());
1384
1385        assert!(all
1386            .iter()
1387            .any(|d| matches!(d.severity, DiagnosticSeverity::Info)));
1388        assert!(critical.is_empty());
1389    }
1390
1391    #[test]
1392    fn smoke_test_diagnostics_options_max_limit_is_applied() {
1393        let doc = Document {
1394            children: vec![
1395                Node {
1396                    kind: NodeKind::Heading {
1397                        level: 10,
1398                        text: "".to_string(),
1399                        id: None,
1400                    },
1401                    span: Some(span(1, 1, 2, 0)),
1402                    children: vec![],
1403                },
1404                Node {
1405                    kind: NodeKind::Image {
1406                        url: "".to_string(),
1407                        alt: "".to_string(),
1408                    },
1409                    span: Some(span(2, 1, 3, 10)),
1410                    children: vec![],
1411                },
1412            ],
1413            ..Default::default()
1414        };
1415
1416        let diagnostics = compute_diagnostics_with_options(
1417            &doc,
1418            DiagnosticsOptions {
1419                profile: DiagnosticsProfile::All,
1420                max_diagnostics: Some(2),
1421            },
1422        );
1423
1424        assert_eq!(diagnostics.len(), 2);
1425    }
1426
1427    #[test]
1428    fn smoke_test_duplicate_heading_ids_diagnosed() {
1429        let doc = Document {
1430            children: vec![
1431                Node {
1432                    kind: NodeKind::Heading {
1433                        level: 2,
1434                        text: "A".to_string(),
1435                        id: Some("dup-id".to_string()),
1436                    },
1437                    span: Some(span(1, 1, 5, 0)),
1438                    children: vec![],
1439                },
1440                Node {
1441                    kind: NodeKind::Heading {
1442                        level: 2,
1443                        text: "B".to_string(),
1444                        id: Some("dup-id".to_string()),
1445                    },
1446                    span: Some(span(3, 1, 5, 20)),
1447                    children: vec![],
1448                },
1449            ],
1450            ..Default::default()
1451        };
1452
1453        let diagnostics = compute_diagnostics(&doc);
1454        assert!(diagnostics.iter().any(|d| {
1455            d.code == DiagnosticCode::DuplicateHeadingId
1456                && d.severity == DiagnosticSeverity::Warning
1457        }));
1458    }
1459
1460    #[test]
1461    fn smoke_test_missing_language_and_http_link_rules() {
1462        let doc = Document {
1463            children: vec![
1464                Node {
1465                    kind: NodeKind::CodeBlock {
1466                        language: None,
1467                        code: "let x = 1;".to_string(),
1468                    },
1469                    span: Some(span(1, 1, 4, 0)),
1470                    children: vec![],
1471                },
1472                Node {
1473                    kind: NodeKind::Paragraph,
1474                    span: Some(span(3, 1, 30, 30)),
1475                    children: vec![Node {
1476                        kind: NodeKind::Link {
1477                            url: ["http", "://example.com"].concat(),
1478                            title: None,
1479                        },
1480                        span: Some(span(3, 5, 20, 34)),
1481                        children: vec![],
1482                    }],
1483                },
1484            ],
1485            ..Default::default()
1486        };
1487
1488        let diagnostics = compute_diagnostics(&doc);
1489        assert!(diagnostics
1490            .iter()
1491            .any(|d| d.code == DiagnosticCode::MissingCodeBlockLanguage));
1492        assert!(diagnostics
1493            .iter()
1494            .any(|d| d.code == DiagnosticCode::InsecureLinkProtocol));
1495    }
1496
1497    #[test]
1498    fn smoke_test_footnote_consistency_rules() {
1499        let doc = Document {
1500            children: vec![
1501                Node {
1502                    kind: NodeKind::Paragraph,
1503                    span: Some(span(1, 1, 20, 0)),
1504                    children: vec![Node {
1505                        kind: NodeKind::FootnoteReference {
1506                            label: "missing".to_string(),
1507                        },
1508                        span: Some(span(1, 10, 19, 9)),
1509                        children: vec![],
1510                    }],
1511                },
1512                Node {
1513                    kind: NodeKind::FootnoteDefinition {
1514                        label: "dup".to_string(),
1515                    },
1516                    span: Some(span(3, 1, 10, 30)),
1517                    children: vec![Node {
1518                        kind: NodeKind::Paragraph,
1519                        span: Some(span(3, 5, 14, 34)),
1520                        children: vec![Node {
1521                            kind: NodeKind::Text("def one".to_string()),
1522                            span: Some(span(3, 5, 11, 34)),
1523                            children: vec![],
1524                        }],
1525                    }],
1526                },
1527                Node {
1528                    kind: NodeKind::FootnoteDefinition {
1529                        label: "DUP".to_string(),
1530                    },
1531                    span: Some(span(5, 1, 10, 60)),
1532                    children: vec![],
1533                },
1534                Node {
1535                    kind: NodeKind::FootnoteDefinition {
1536                        label: "unused".to_string(),
1537                    },
1538                    span: Some(span(7, 1, 12, 90)),
1539                    children: vec![],
1540                },
1541            ],
1542            ..Default::default()
1543        };
1544
1545        let diagnostics = compute_diagnostics(&doc);
1546
1547        assert!(diagnostics
1548            .iter()
1549            .any(|d| d.code == DiagnosticCode::MissingFootnoteDefinition));
1550        assert!(diagnostics
1551            .iter()
1552            .any(|d| d.code == DiagnosticCode::DuplicateFootnoteDefinition));
1553        assert!(diagnostics
1554            .iter()
1555            .any(|d| d.code == DiagnosticCode::UnusedFootnoteDefinition));
1556    }
1557
1558    #[test]
1559    fn smoke_test_empty_table_cell_and_definition_entries() {
1560        let doc = Document {
1561            children: vec![
1562                Node {
1563                    kind: NodeKind::DefinitionList,
1564                    span: Some(span(1, 1, 4, 0)),
1565                    children: vec![],
1566                },
1567                Node {
1568                    kind: NodeKind::Table {
1569                        alignments: vec![crate::parser::TableAlignment::None],
1570                    },
1571                    span: Some(span(3, 1, 4, 20)),
1572                    children: vec![Node {
1573                        kind: NodeKind::TableRow { header: false },
1574                        span: Some(span(3, 1, 4, 20)),
1575                        children: vec![Node {
1576                            kind: NodeKind::TableCell {
1577                                header: false,
1578                                alignment: crate::parser::TableAlignment::None,
1579                            },
1580                            span: Some(span(3, 2, 3, 21)),
1581                            children: vec![Node {
1582                                kind: NodeKind::Text("   ".to_string()),
1583                                span: Some(span(3, 2, 3, 21)),
1584                                children: vec![],
1585                            }],
1586                        }],
1587                    }],
1588                },
1589            ],
1590            ..Default::default()
1591        };
1592
1593        let diagnostics = compute_diagnostics(&doc);
1594        assert!(diagnostics
1595            .iter()
1596            .any(|d| d.code == DiagnosticCode::EmptyDefinitionList));
1597        assert!(diagnostics
1598            .iter()
1599            .any(|d| d.code == DiagnosticCode::EmptyTableCell));
1600    }
1601
1602    #[test]
1603    fn smoke_test_tab_group_and_slider_rules() {
1604        let doc = Document {
1605            children: vec![
1606                Node {
1607                    kind: NodeKind::TabGroup,
1608                    span: Some(span(1, 1, 10, 0)),
1609                    children: vec![
1610                        Node {
1611                            kind: NodeKind::TabItem {
1612                                title: "One".to_string(),
1613                            },
1614                            span: Some(span(2, 1, 8, 11)),
1615                            children: vec![],
1616                        },
1617                        Node {
1618                            kind: NodeKind::TabItem {
1619                                title: " one ".to_string(),
1620                            },
1621                            span: Some(span(3, 1, 10, 20)),
1622                            children: vec![],
1623                        },
1624                    ],
1625                },
1626                Node {
1627                    kind: NodeKind::SliderDeck {
1628                        timer_seconds: Some(5),
1629                    },
1630                    span: Some(span(5, 1, 12, 40)),
1631                    children: vec![Node {
1632                        kind: NodeKind::Slide { vertical: false },
1633                        span: Some(span(6, 1, 8, 50)),
1634                        children: vec![Node {
1635                            kind: NodeKind::Text("  ".to_string()),
1636                            span: Some(span(6, 1, 3, 50)),
1637                            children: vec![],
1638                        }],
1639                    }],
1640                },
1641            ],
1642            ..Default::default()
1643        };
1644
1645        let diagnostics = compute_diagnostics(&doc);
1646
1647        assert!(diagnostics
1648            .iter()
1649            .any(|d| d.code == DiagnosticCode::DuplicateTabTitle));
1650        assert!(diagnostics
1651            .iter()
1652            .any(|d| d.code == DiagnosticCode::EmptyTabPanel));
1653        assert!(diagnostics
1654            .iter()
1655            .any(|d| d.code == DiagnosticCode::EmptySlide));
1656    }
1657
1658    #[test]
1659    fn smoke_test_empty_admonition_math_and_mermaid_rules() {
1660        let doc = Document {
1661            children: vec![
1662                Node {
1663                    kind: NodeKind::Admonition {
1664                        kind: crate::parser::AdmonitionKind::Note,
1665                        title: Some("".to_string()),
1666                        icon: None,
1667                        style: crate::parser::AdmonitionStyle::Alert,
1668                    },
1669                    span: Some(span(1, 1, 10, 0)),
1670                    children: vec![],
1671                },
1672                Node {
1673                    kind: NodeKind::Paragraph,
1674                    span: Some(span(3, 1, 12, 20)),
1675                    children: vec![
1676                        Node {
1677                            kind: NodeKind::InlineMath {
1678                                content: "   ".to_string(),
1679                            },
1680                            span: Some(span(3, 2, 6, 21)),
1681                            children: vec![],
1682                        },
1683                        Node {
1684                            kind: NodeKind::DisplayMath {
1685                                content: "\n\t".to_string(),
1686                            },
1687                            span: Some(span(3, 7, 11, 26)),
1688                            children: vec![],
1689                        },
1690                    ],
1691                },
1692                Node {
1693                    kind: NodeKind::MermaidDiagram {
1694                        content: "".to_string(),
1695                    },
1696                    span: Some(span(5, 1, 4, 40)),
1697                    children: vec![],
1698                },
1699            ],
1700            ..Default::default()
1701        };
1702
1703        let diagnostics = compute_diagnostics(&doc);
1704
1705        assert!(diagnostics
1706            .iter()
1707            .any(|d| d.code == DiagnosticCode::EmptyAdmonitionBody));
1708        assert!(diagnostics
1709            .iter()
1710            .any(|d| d.code == DiagnosticCode::EmptyMathExpression));
1711        assert!(diagnostics
1712            .iter()
1713            .any(|d| d.code == DiagnosticCode::EmptyMermaidDiagram));
1714    }
1715
1716    #[test]
1717    fn smoke_test_link_reference_and_html_event_handler_rules() {
1718        let doc = Document {
1719            children: vec![
1720                Node {
1721                    kind: NodeKind::Paragraph,
1722                    span: Some(span(1, 1, 24, 0)),
1723                    children: vec![Node {
1724                        kind: NodeKind::LinkReference {
1725                            label: "missing-ref".to_string(),
1726                            suffix: "[missing-ref]".to_string(),
1727                        },
1728                        span: Some(span(1, 2, 20, 1)),
1729                        children: vec![Node {
1730                            kind: NodeKind::Text("Guide".to_string()),
1731                            span: Some(span(1, 3, 8, 2)),
1732                            children: vec![],
1733                        }],
1734                    }],
1735                },
1736                Node {
1737                    kind: NodeKind::InlineHtml("<a onclick=\"x()\">x</a>".to_string()),
1738                    span: Some(span(2, 1, 22, 25)),
1739                    children: vec![],
1740                },
1741                Node {
1742                    kind: NodeKind::HtmlBlock {
1743                        html: "<img onerror=\"x()\" src=\"/a.png\">".to_string(),
1744                    },
1745                    span: Some(span(3, 1, 30, 48)),
1746                    children: vec![],
1747                },
1748            ],
1749            ..Default::default()
1750        };
1751
1752        let diagnostics = compute_diagnostics(&doc);
1753
1754        assert!(diagnostics
1755            .iter()
1756            .any(|d| d.code == DiagnosticCode::UnresolvedLinkReference));
1757        assert!(diagnostics
1758            .iter()
1759            .any(|d| d.code == DiagnosticCode::InlineHtmlUnsafeEventHandler));
1760        assert!(diagnostics
1761            .iter()
1762            .any(|d| d.code == DiagnosticCode::HtmlBlockUnsafeEventHandler));
1763    }
1764
1765    #[test]
1766    fn smoke_test_task_item_and_platform_mention_rules() {
1767        let doc = Document {
1768            children: vec![
1769                Node {
1770                    kind: NodeKind::ListItem,
1771                    span: Some(span(1, 1, 6, 0)),
1772                    children: vec![Node {
1773                        kind: NodeKind::TaskCheckbox { checked: false },
1774                        span: Some(span(1, 3, 5, 2)),
1775                        children: vec![],
1776                    }],
1777                },
1778                Node {
1779                    kind: NodeKind::PlatformMention {
1780                        username: "   ".to_string(),
1781                        platform: "unknownplatform".to_string(),
1782                        display: Some("   ".to_string()),
1783                    },
1784                    span: Some(span(2, 1, 22, 8)),
1785                    children: vec![],
1786                },
1787            ],
1788            ..Default::default()
1789        };
1790
1791        let diagnostics = compute_diagnostics(&doc);
1792
1793        assert!(diagnostics
1794            .iter()
1795            .any(|d| d.code == DiagnosticCode::EmptyTaskListItem));
1796        assert!(diagnostics
1797            .iter()
1798            .any(|d| d.code == DiagnosticCode::EmptyPlatformMentionUsername));
1799        assert!(diagnostics
1800            .iter()
1801            .any(|d| d.code == DiagnosticCode::UnknownPlatformMentionPlatform));
1802        assert!(diagnostics
1803            .iter()
1804            .any(|d| d.code == DiagnosticCode::EmptyPlatformMentionDisplayName));
1805    }
1806
1807    #[test]
1808    fn smoke_test_malformed_task_unknown_admonition_and_unknown_emoji_rules() {
1809        let doc = Document {
1810            children: vec![
1811                Node {
1812                    kind: NodeKind::ListItem,
1813                    span: Some(span(1, 1, 16, 0)),
1814                    children: vec![Node {
1815                        kind: NodeKind::Paragraph,
1816                        span: Some(span(1, 3, 16, 2)),
1817                        children: vec![Node {
1818                            kind: NodeKind::Text("[maybe] investigate".to_string()),
1819                            span: Some(span(1, 3, 16, 2)),
1820                            children: vec![],
1821                        }],
1822                    }],
1823                },
1824                Node {
1825                    kind: NodeKind::Blockquote,
1826                    span: Some(span(2, 1, 24, 20)),
1827                    children: vec![Node {
1828                        kind: NodeKind::Paragraph,
1829                        span: Some(span(2, 3, 24, 22)),
1830                        children: vec![Node {
1831                            kind: NodeKind::Text("[!CUSTOM] body".to_string()),
1832                            span: Some(span(2, 3, 24, 22)),
1833                            children: vec![],
1834                        }],
1835                    }],
1836                },
1837                Node {
1838                    kind: NodeKind::Paragraph,
1839                    span: Some(span(3, 1, 18, 45)),
1840                    children: vec![Node {
1841                        kind: NodeKind::Text("Status :not_an_emoji:".to_string()),
1842                        span: Some(span(3, 8, 23, 52)),
1843                        children: vec![],
1844                    }],
1845                },
1846            ],
1847            ..Default::default()
1848        };
1849
1850        let diagnostics = compute_diagnostics(&doc);
1851
1852        assert!(diagnostics
1853            .iter()
1854            .any(|d| d.code == DiagnosticCode::MalformedTaskCheckbox));
1855        assert!(diagnostics
1856            .iter()
1857            .any(|d| d.code == DiagnosticCode::UnknownAdmonitionKind));
1858        assert!(diagnostics
1859            .iter()
1860            .any(|d| d.code == DiagnosticCode::UnknownEmojiShortcode));
1861    }
1862
1863    #[test]
1864    fn smoke_test_unknown_emoji_shortcode_avoids_common_false_positives() {
1865        let doc = Document {
1866            children: vec![Node {
1867                kind: NodeKind::Paragraph,
1868                span: Some(span(1, 1, 80, 0)),
1869                children: vec![Node {
1870                    kind: NodeKind::Text(
1871                        "Visit https://example.com:8080/path, ratio a:b:c, and valid :smile:."
1872                            .to_string(),
1873                    ),
1874                    span: Some(span(1, 1, 80, 0)),
1875                    children: vec![],
1876                }],
1877            }],
1878            ..Default::default()
1879        };
1880
1881        let diagnostics = compute_diagnostics(&doc);
1882
1883        assert!(diagnostics
1884            .iter()
1885            .all(|d| d.code != DiagnosticCode::UnknownEmojiShortcode));
1886    }
1887
1888    #[test]
1889    fn smoke_test_unknown_emoji_shortcode_detects_punctuation_wrapped_token() {
1890        let doc = Document {
1891            children: vec![Node {
1892                kind: NodeKind::Paragraph,
1893                span: Some(span(1, 1, 42, 0)),
1894                children: vec![Node {
1895                    kind: NodeKind::Text("Please review (:not_an_emoji:) now.".to_string()),
1896                    span: Some(span(1, 1, 42, 0)),
1897                    children: vec![],
1898                }],
1899            }],
1900            ..Default::default()
1901        };
1902
1903        let diagnostics = compute_diagnostics(&doc);
1904
1905        assert!(diagnostics
1906            .iter()
1907            .any(|d| d.code == DiagnosticCode::UnknownEmojiShortcode));
1908    }
1909
1910    #[test]
1911    fn smoke_test_diagnostics_are_sorted_for_editor_stability() {
1912        let doc = Document {
1913            children: vec![
1914                Node {
1915                    kind: NodeKind::Heading {
1916                        level: 10,
1917                        text: "".to_string(),
1918                        id: None,
1919                    },
1920                    span: Some(span(2, 1, 2, 20)),
1921                    children: vec![],
1922                },
1923                Node {
1924                    kind: NodeKind::Image {
1925                        url: "".to_string(),
1926                        alt: "".to_string(),
1927                    },
1928                    span: Some(span(1, 1, 3, 0)),
1929                    children: vec![],
1930                },
1931            ],
1932            ..Default::default()
1933        };
1934
1935        let diagnostics = compute_diagnostics(&doc);
1936        for window in diagnostics.windows(2) {
1937            let left = &window[0];
1938            let right = &window[1];
1939            let l_key = (
1940                left.span.start.offset,
1941                left.span.end.offset,
1942                left.severity.sort_rank(),
1943                left.code.as_str(),
1944                left.message.as_str(),
1945            );
1946            let r_key = (
1947                right.span.start.offset,
1948                right.span.end.offset,
1949                right.severity.sort_rank(),
1950                right.code.as_str(),
1951                right.message.as_str(),
1952            );
1953            assert!(
1954                l_key <= r_key,
1955                "diagnostics must be sorted for stable editor rendering"
1956            );
1957        }
1958    }
1959
1960    #[test]
1961    fn smoke_test_sort_and_dedup_diagnostics_removes_exact_duplicates() {
1962        let mut diagnostics = vec![
1963            Diagnostic {
1964                code: DiagnosticCode::EmptyImageUrl,
1965                span: span(1, 1, 3, 0),
1966                severity: DiagnosticSeverity::Error,
1967                message: "Empty image URL".to_string(),
1968            },
1969            Diagnostic {
1970                code: DiagnosticCode::EmptyImageUrl,
1971                span: span(1, 1, 3, 0),
1972                severity: DiagnosticSeverity::Error,
1973                message: "Empty image URL".to_string(),
1974            },
1975            Diagnostic {
1976                code: DiagnosticCode::ImageMissingAltText,
1977                span: span(1, 1, 3, 0),
1978                severity: DiagnosticSeverity::Warning,
1979                message: "Image missing alt text".to_string(),
1980            },
1981        ];
1982
1983        sort_and_dedup_diagnostics(&mut diagnostics);
1984
1985        assert_eq!(diagnostics.len(), 2);
1986        assert_eq!(diagnostics[0].code, DiagnosticCode::EmptyImageUrl);
1987        assert_eq!(diagnostics[1].code, DiagnosticCode::ImageMissingAltText);
1988    }
1989}