Skip to main content

marco_core/intelligence/analysis/
diagnostics.rs

1//! Diagnostics for parse, structure, links, and extension validation.
2
3use crate::parser::{Document, Node, NodeKind, Position, Span};
4use std::borrow::Cow;
5use std::collections::{HashMap, HashSet};
6use std::sync::OnceLock;
7
8#[derive(Debug, Clone, PartialEq)]
9/// Diagnostic entry emitted by markdown analysis.
10pub struct Diagnostic {
11    /// Stable diagnostic code identifier.
12    pub code: DiagnosticCode,
13    /// Source span where the issue applies.
14    pub span: Span,
15    /// Severity level.
16    pub severity: DiagnosticSeverity,
17    /// User-facing diagnostic message.
18    pub message: String,
19}
20
21#[derive(Debug, Clone, Copy, PartialEq, Eq)]
22/// Policy profile controlling which diagnostics are emitted.
23pub enum DiagnosticsProfile {
24    /// Emit all diagnostics (full analysis mode).
25    All,
26    /// Emit only critical diagnostics (currently severity=Error).
27    CriticalOnly,
28}
29
30#[derive(Debug, Clone, Copy, PartialEq, Eq)]
31/// Runtime options used while computing diagnostics.
32pub struct DiagnosticsOptions {
33    /// Profile deciding which severities are emitted.
34    pub profile: DiagnosticsProfile,
35    /// Optional cap to avoid flooding downstream consumers.
36    pub max_diagnostics: Option<usize>,
37}
38
39impl DiagnosticsOptions {
40    /// Create options that emit all diagnostics.
41    pub const fn all() -> Self {
42        Self {
43            profile: DiagnosticsProfile::All,
44            max_diagnostics: None,
45        }
46    }
47
48    /// Create options that emit only critical diagnostics.
49    pub const fn critical_only() -> Self {
50        Self {
51            profile: DiagnosticsProfile::CriticalOnly,
52            max_diagnostics: None,
53        }
54    }
55}
56
57impl Default for DiagnosticsOptions {
58    fn default() -> Self {
59        // Product direction: prefer minimal critical diagnostics by default.
60        Self::critical_only()
61    }
62}
63
64/// Stable diagnostic codes for markdown intelligence.
65///
66/// These identifiers are intended to be stable across releases so frontend
67/// components (status panes, filters, telemetry) can rely on them.
68#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
69pub enum DiagnosticCode {
70    // Parse / ingestion (MD0xx)
71    /// Generic parse failure.
72    ParseFailure,
73
74    // Headings (MD1xx)
75    /// Heading level is invalid.
76    InvalidHeadingLevel,
77    /// Heading text is empty.
78    EmptyHeadingText,
79    /// Explicit heading id is duplicated.
80    DuplicateHeadingId,
81    /// Heading content exceeds recommended size.
82    HeadingTooLong,
83
84    // Links (MD2xx)
85    /// Link URL is empty.
86    EmptyLinkUrl,
87    /// Link uses a blocked protocol.
88    UnsafeLinkProtocol,
89    /// Link uses an insecure protocol.
90    InsecureLinkProtocol,
91    /// Reference-style link cannot be resolved.
92    UnresolvedLinkReference,
93    /// Link reference label is empty.
94    EmptyLinkReferenceLabel,
95
96    // Code blocks (MD3xx)
97    /// Code block has no content.
98    EmptyCodeBlock,
99    /// Code block is missing language hint.
100    MissingCodeBlockLanguage,
101
102    // Images (MD-4xx)
103    /// Image URL is empty.
104    EmptyImageUrl,
105    /// Image alt text is missing.
106    ImageMissingAltText,
107    /// Image uses a blocked protocol.
108    UnsafeImageProtocol,
109
110    // Inline HTML (MD-5xx)
111    /// Inline HTML contains script content.
112    InlineHtmlContainsScript,
113    /// Inline HTML uses a JavaScript URL.
114    InlineHtmlJavascriptUrl,
115    /// Inline HTML contains unsafe event handler attributes.
116    InlineHtmlUnsafeEventHandler,
117
118    // Block HTML (MD6xx)
119    /// HTML block contains script content.
120    HtmlBlockContainsScript,
121    /// HTML block uses a JavaScript URL.
122    HtmlBlockJavascriptUrl,
123    /// HTML block has no meaningful content.
124    EmptyHtmlBlock,
125    /// HTML block has mismatched angle-bracket structure.
126    HtmlBlockMismatchedAngles,
127    /// HTML block contains unsafe event handler attributes.
128    HtmlBlockUnsafeEventHandler,
129
130    // Structural blocks (MD7xx)
131    /// List is empty.
132    EmptyList,
133    /// List item is empty.
134    EmptyListItem,
135    /// Task checkbox markup is malformed.
136    MalformedTaskCheckbox,
137    /// Task list item contains no content after checkbox.
138    EmptyTaskListItem,
139    /// Blockquote contains no meaningful content.
140    EmptyBlockquote,
141    /// Definition list is empty.
142    EmptyDefinitionList,
143    /// Definition term is empty.
144    EmptyDefinitionTerm,
145    /// Definition description is empty.
146    EmptyDefinitionDescription,
147    /// Table cell is empty.
148    EmptyTableCell,
149
150    // Footnotes (MD8xx)
151    /// Footnote reference has no matching definition.
152    MissingFootnoteDefinition,
153    /// Footnote definition label is duplicated.
154    DuplicateFootnoteDefinition,
155    /// Footnote definition is never referenced.
156    UnusedFootnoteDefinition,
157
158    // Extended blocks & rich content (MD9xx)
159    /// Tab group has no tab items.
160    EmptyTabGroup,
161    /// Tab title is empty.
162    EmptyTabTitle,
163    /// Tab title is duplicated in same tab group.
164    DuplicateTabTitle,
165    /// Tab panel has no content.
166    EmptyTabPanel,
167    /// Slider deck has no slides.
168    EmptySliderDeck,
169    /// Slide has no content.
170    EmptySlide,
171    /// Admonition body has no content.
172    EmptyAdmonitionBody,
173    /// Math expression is empty.
174    EmptyMathExpression,
175    /// Mermaid diagram source is empty.
176    EmptyMermaidDiagram,
177    /// Admonition title is empty.
178    EmptyAdmonitionTitle,
179    /// Admonition kind is unknown.
180    UnknownAdmonitionKind,
181    /// Slider timer value is invalid.
182    InvalidSliderTimer,
183    /// Platform mention username is empty.
184    EmptyPlatformMentionUsername,
185    /// Platform mention target platform is unknown.
186    UnknownPlatformMentionPlatform,
187    /// Emoji shortcode is unknown.
188    UnknownEmojiShortcode,
189    /// Platform mention display name is empty.
190    EmptyPlatformMentionDisplayName,
191}
192
193impl DiagnosticCode {
194    /// Internal catalog key (Rust enum variant name).
195    pub fn catalog_key(self) -> String {
196        format!("{self:?}")
197    }
198
199    /// Stable external code value (for example `MD103`).
200    pub fn as_str(self) -> &'static str {
201        self.catalog_entry()
202            .map(|entry| entry.code.as_str())
203            .unwrap_or_else(|| {
204                crate::intelligence::catalog::diagnostics_catalog_settings()
205                    .unknown_code_fallback
206                    .as_str()
207            })
208    }
209
210    /// Default user-facing diagnostic message sourced from embedded catalog metadata.
211    pub fn default_message(self) -> &'static str {
212        self.message_template()
213    }
214
215    /// Catalog-provided message template (or title when template is absent).
216    pub fn message_template(self) -> &'static str {
217        self.catalog_entry()
218            .map(|entry| {
219                entry
220                    .message_template
221                    .as_deref()
222                    .unwrap_or(entry.title.as_str())
223            })
224            .unwrap_or_else(|| {
225                crate::intelligence::catalog::diagnostics_catalog_settings()
226                    .unknown_message_fallback
227                    .as_str()
228            })
229    }
230
231    /// Resolve the default diagnostic severity from catalog metadata.
232    pub fn default_severity(self) -> DiagnosticSeverity {
233        self.catalog_entry()
234            .and_then(|entry| DiagnosticSeverity::from_catalog_str(&entry.default_severity))
235            .unwrap_or(DiagnosticSeverity::Warning)
236    }
237
238    /// Format message template placeholders like `{protocol}` with values.
239    pub fn format_message(self, pairs: &[(&str, String)]) -> String {
240        let mut message = self.message_template().to_string();
241        for (key, value) in pairs {
242            let placeholder = format!("{{{}}}", key);
243            message = message.replace(&placeholder, value);
244        }
245        message
246    }
247
248    /// Optional embedded catalog entry for this diagnostic code.
249    pub fn catalog_entry(
250        self,
251    ) -> Option<&'static crate::intelligence::catalog::DiagnosticsCatalogEntry> {
252        let key = self.catalog_key();
253        crate::intelligence::catalog::find_catalog_entry_by_key(&key)
254    }
255
256    /// Fix suggestion sourced from the embedded diagnostics catalog.
257    pub fn fix_suggestion(self) -> &'static str {
258        self.catalog_entry()
259            .map(|entry| entry.fix_suggestion.as_str())
260            .unwrap_or_else(|| {
261                crate::intelligence::catalog::diagnostics_catalog_settings()
262                    .unknown_fix_suggestion_fallback
263                    .as_str()
264            })
265    }
266
267    /// Resolve fix suggestion as a `Cow` for UI integration.
268    pub fn fix_suggestion_resolved(self) -> Cow<'static, str> {
269        Cow::Borrowed(self.fix_suggestion())
270    }
271}
272
273impl Diagnostic {
274    /// Stable external code id from catalog (e.g. `MD103`) for this diagnostic.
275    pub fn code_id(&self) -> &'static str {
276        self.code.as_str()
277    }
278
279    /// Stable quick fix suggestion associated with this diagnostic code.
280    pub fn fix_suggestion(&self) -> &'static str {
281        self.code.fix_suggestion()
282    }
283
284    /// Optional embedded catalog entry for this diagnostic.
285    pub fn catalog_entry(
286        &self,
287    ) -> Option<&'static crate::intelligence::catalog::DiagnosticsCatalogEntry> {
288        self.code.catalog_entry()
289    }
290
291    /// Human title from embedded catalog if present.
292    pub fn title_resolved(&self) -> Option<&'static str> {
293        self.catalog_entry().map(|entry| entry.title.as_str())
294    }
295
296    /// Rich description from embedded catalog if present.
297    pub fn description_resolved(&self) -> Option<&'static str> {
298        self.catalog_entry().map(|entry| entry.description.as_str())
299    }
300
301    /// Resolve fix suggestion from embedded catalog when available,
302    /// with a stable in-code fallback.
303    pub fn fix_suggestion_resolved(&self) -> Cow<'static, str> {
304        self.code.fix_suggestion_resolved()
305    }
306
307    /// Build a parse-failure diagnostic anchored at a specific position.
308    pub fn parse_error_at(position: Position, message: impl Into<String>) -> Self {
309        let span = Span {
310            start: position,
311            end: Position {
312                line: position.line,
313                column: position.column.saturating_add(1),
314                offset: position.offset.saturating_add(1),
315            },
316        };
317
318        Self {
319            code: DiagnosticCode::ParseFailure,
320            span,
321            severity: DiagnosticCode::ParseFailure.default_severity(),
322            message: message.into(),
323        }
324    }
325
326    /// Build a parse-failure diagnostic at a safe default location (1:1).
327    pub fn parse_error(message: impl Into<String>) -> Self {
328        Self::parse_error_at(
329            Position {
330                line: 1,
331                column: 1,
332                offset: 0,
333            },
334            message,
335        )
336    }
337}
338
339#[derive(Debug, Clone, Copy, PartialEq, Eq)]
340/// Diagnostic severity level.
341pub enum DiagnosticSeverity {
342    /// Error severity.
343    Error,
344    /// Warning severity.
345    Warning,
346    /// Informational severity.
347    Info,
348    /// Hint severity.
349    Hint,
350}
351
352impl DiagnosticSeverity {
353    /// Parse severity from catalog string value.
354    pub fn from_catalog_str(value: &str) -> Option<Self> {
355        match value {
356            "Error" => Some(Self::Error),
357            "Warning" => Some(Self::Warning),
358            "Info" => Some(Self::Info),
359            "Hint" => Some(Self::Hint),
360            _ => None,
361        }
362    }
363
364    fn sort_rank(self) -> u8 {
365        match self {
366            // Higher severity first when multiple diagnostics target the same span.
367            Self::Error => 0,
368            Self::Warning => 1,
369            Self::Info => 2,
370            Self::Hint => 3,
371        }
372    }
373}
374
375fn sort_and_dedup_diagnostics(diagnostics: &mut Vec<Diagnostic>) {
376    diagnostics.sort_by(|a, b| {
377        (
378            a.span.start.offset,
379            a.span.end.offset,
380            a.severity.sort_rank(),
381            a.code.as_str(),
382            a.message.as_str(),
383        )
384            .cmp(&(
385                b.span.start.offset,
386                b.span.end.offset,
387                b.severity.sort_rank(),
388                b.code.as_str(),
389                b.message.as_str(),
390            ))
391    });
392
393    diagnostics.dedup_by(|a, b| {
394        a.span == b.span && a.severity == b.severity && a.code == b.code && a.message == b.message
395    });
396}
397
398fn diag(
399    diagnostics: &mut Vec<Diagnostic>,
400    code: DiagnosticCode,
401    span: Span,
402    severity: DiagnosticSeverity,
403    message: impl Into<String>,
404) {
405    diagnostics.push(Diagnostic {
406        code,
407        span,
408        severity,
409        message: message.into(),
410    });
411}
412
413fn diag_catalog(diagnostics: &mut Vec<Diagnostic>, code: DiagnosticCode, span: Span) {
414    diag(
415        diagnostics,
416        code,
417        span,
418        code.default_severity(),
419        code.default_message(),
420    );
421}
422
423fn diag_catalog_message(
424    diagnostics: &mut Vec<Diagnostic>,
425    code: DiagnosticCode,
426    span: Span,
427    message: impl Into<String>,
428) {
429    diag(
430        diagnostics,
431        code,
432        span,
433        code.default_severity(),
434        message.into(),
435    );
436}
437
438fn has_disallowed_scheme(url_lower: &str, disallowed_schemes: &[String]) -> bool {
439    let scheme = url_lower
440        .split_once(':')
441        .map(|(prefix, _)| prefix)
442        .unwrap_or_default();
443
444    !scheme.is_empty() && disallowed_schemes.iter().any(|item| item == scheme)
445}
446
447fn starts_with_any_prefix(url_lower: &str, prefixes: &[String]) -> bool {
448    prefixes.iter().any(|prefix| url_lower.starts_with(prefix))
449}
450
451fn contains_unsafe_protocol_marker(text_lower: &str, protocols: &[String]) -> bool {
452    protocols
453        .iter()
454        .map(|scheme| format!("{}:", scheme))
455        .any(|needle| text_lower.contains(&needle))
456}
457
458fn contains_any_marker(text_lower: &str, markers: &[String]) -> bool {
459    markers.iter().any(|marker| text_lower.contains(marker))
460}
461
462fn contains_unsafe_event_handler_attr(text_lower: &str) -> bool {
463    const EVENT_ATTRS: &[&str] = &[
464        "onabort",
465        "onanimationend",
466        "onanimationiteration",
467        "onanimationstart",
468        "onauxclick",
469        "onbeforeinput",
470        "onbeforeunload",
471        "onblur",
472        "oncancel",
473        "oncanplay",
474        "oncanplaythrough",
475        "onchange",
476        "onclick",
477        "onclose",
478        "oncontextmenu",
479        "oncopy",
480        "oncuechange",
481        "oncut",
482        "ondblclick",
483        "ondrag",
484        "ondragend",
485        "ondragenter",
486        "ondragleave",
487        "ondragover",
488        "ondragstart",
489        "ondrop",
490        "ondurationchange",
491        "onended",
492        "onerror",
493        "onfocus",
494        "onfocusin",
495        "onfocusout",
496        "onformdata",
497        "oninput",
498        "oninvalid",
499        "onkeydown",
500        "onkeypress",
501        "onkeyup",
502        "onload",
503        "onloadeddata",
504        "onloadedmetadata",
505        "onloadstart",
506        "onmousedown",
507        "onmouseenter",
508        "onmouseleave",
509        "onmousemove",
510        "onmouseout",
511        "onmouseover",
512        "onmouseup",
513        "onpaste",
514        "onpause",
515        "onplay",
516        "onplaying",
517        "onprogress",
518        "onratechange",
519        "onreset",
520        "onresize",
521        "onscroll",
522        "onsecuritypolicyviolation",
523        "onseeked",
524        "onseeking",
525        "onselect",
526        "onslotchange",
527        "onstalled",
528        "onsubmit",
529        "onsuspend",
530        "ontimeupdate",
531        "ontoggle",
532        "ontransitionend",
533        "onunload",
534        "onvolumechange",
535        "onwaiting",
536        "onwheel",
537    ];
538
539    EVENT_ATTRS.iter().any(|attr| {
540        text_lower.contains(&format!(" {}=", attr))
541            || text_lower.contains(&format!("\n{}=", attr))
542            || text_lower.contains(&format!("\t{}=", attr))
543            || text_lower.contains(&format!("<{}=", attr))
544    })
545}
546
547fn is_known_platform(platform_lower: &str) -> bool {
548    matches!(
549        platform_lower,
550        "github"
551            | "gitlab"
552            | "codeberg"
553            | "twitter"
554            | "x"
555            | "mastodon"
556            | "bluesky"
557            | "linkedin"
558            | "xing"
559            | "medium"
560            | "dribbble"
561            | "behance"
562            | "reddit"
563            | "discord"
564            | "telegram"
565            | "youtube"
566            | "twitch"
567    )
568}
569
570fn list_item_has_malformed_task_marker(node: &Node) -> bool {
571    if node
572        .children
573        .iter()
574        .any(|child| matches!(child.kind, NodeKind::TaskCheckbox { .. }))
575    {
576        return false;
577    }
578
579    let Some(first_child) = node.children.first() else {
580        return false;
581    };
582
583    let candidate_text = match &first_child.kind {
584        NodeKind::Text(text) => Some(text.as_str()),
585        NodeKind::Paragraph => first_child
586            .children
587            .iter()
588            .find_map(|inline| match &inline.kind {
589                NodeKind::Text(text) => Some(text.as_str()),
590                _ => None,
591            }),
592        _ => None,
593    };
594
595    let Some(text) = candidate_text else {
596        return false;
597    };
598
599    let trimmed = text.trim_start();
600    if !trimmed.starts_with('[') {
601        return false;
602    }
603
604    let Some(close_idx) = trimmed.find(']') else {
605        return false;
606    };
607
608    let marker_body = trimmed[1..close_idx].trim();
609    if marker_body.is_empty() {
610        // "[ ]" is valid; empty marker body after trimming means this is a valid checkbox marker.
611        return false;
612    }
613
614    !matches!(marker_body, "x" | "X")
615}
616
617fn known_admonition_kind(marker_kind_upper: &str) -> bool {
618    matches!(
619        marker_kind_upper,
620        "NOTE" | "TIP" | "IMPORTANT" | "WARNING" | "CAUTION"
621    )
622}
623
624fn blockquote_has_unknown_admonition_marker(node: &Node) -> bool {
625    let Some(first_block) = node.children.first() else {
626        return false;
627    };
628
629    if !matches!(first_block.kind, NodeKind::Paragraph) {
630        return false;
631    }
632
633    let mut raw = String::new();
634    for inline in &first_block.children {
635        match &inline.kind {
636            NodeKind::Text(text) => raw.push_str(text),
637            NodeKind::SoftBreak | NodeKind::HardBreak => break,
638            _ => return false,
639        }
640    }
641
642    let trimmed = raw.trim();
643    if !trimmed.starts_with("[!") {
644        return false;
645    }
646
647    let Some(close_idx) = trimmed.find(']') else {
648        return false;
649    };
650
651    let marker = &trimmed[2..close_idx].trim();
652    if marker.is_empty() {
653        return false;
654    }
655
656    let marker_upper = marker.to_ascii_uppercase();
657    !known_admonition_kind(&marker_upper)
658}
659
660fn known_emoji_shortcodes() -> &'static HashSet<String> {
661    static SHORTCODES: OnceLock<HashSet<String>> = OnceLock::new();
662
663    SHORTCODES.get_or_init(|| {
664        crate::logic::text_completion::emoji_shortcodes_for_completion()
665            .iter()
666            .map(|shortcode| shortcode.to_ascii_lowercase())
667            .collect::<HashSet<_>>()
668    })
669}
670
671fn is_shortcode_body_char(ch: char) -> bool {
672    ch.is_ascii_alphanumeric() || ch == '_' || ch == '+' || ch == '-'
673}
674
675fn strip_surrounding_shortcode_wrappers(token: &str) -> &str {
676    token.trim_matches(|c: char| {
677        matches!(
678            c,
679            ',' | '.'
680                | ';'
681                | '!'
682                | '?'
683                | '('
684                | ')'
685                | '['
686                | ']'
687                | '{'
688                | '}'
689                | '<'
690                | '>'
691                | '"'
692                | '\''
693                | '`'
694        )
695    })
696}
697
698fn shortcode_candidate_from_token(token: &str) -> Option<&str> {
699    let trimmed = strip_surrounding_shortcode_wrappers(token);
700
701    if trimmed.len() < 3 || !trimmed.starts_with(':') || !trimmed.ends_with(':') {
702        return None;
703    }
704
705    let body = &trimmed[1..trimmed.len() - 1];
706    if body.is_empty() || !body.chars().all(is_shortcode_body_char) {
707        return None;
708    }
709
710    if !body.chars().any(|ch| ch.is_ascii_alphabetic()) {
711        return None;
712    }
713
714    // Enforce token-like boundaries inside the candidate itself.
715    // This avoids odd cases like ":-name:" / ":name-:" / ":name--x:".
716    if body.starts_with(['-', '_', '+']) || body.ends_with(['-', '_', '+']) {
717        return None;
718    }
719
720    if body.contains("--") || body.contains("__") || body.contains("++") {
721        return None;
722    }
723
724    Some(trimmed)
725}
726
727fn text_has_unknown_emoji_shortcode(text: &str) -> bool {
728    let known = known_emoji_shortcodes();
729
730    text.split_whitespace().any(|token| {
731        shortcode_candidate_from_token(token)
732            .map(|candidate| !known.contains(&candidate.to_ascii_lowercase()))
733            .unwrap_or(false)
734    })
735}
736
737/// Compute diagnostics for a parsed markdown document.
738pub fn compute_diagnostics(document: &Document) -> Vec<Diagnostic> {
739    compute_diagnostics_with_options(document, DiagnosticsOptions::all())
740}
741
742/// Compute diagnostics using configurable policy controls.
743pub fn compute_diagnostics_with_options(
744    document: &Document,
745    options: DiagnosticsOptions,
746) -> Vec<Diagnostic> {
747    log::debug!(
748        "Computing diagnostics for {} nodes",
749        document.children.len()
750    );
751
752    let mut diagnostics = Vec::new();
753
754    for node in &document.children {
755        collect_diagnostics(node, &mut diagnostics);
756    }
757
758    collect_document_level_diagnostics(document, &mut diagnostics);
759    sort_and_dedup_diagnostics(&mut diagnostics);
760
761    match options.profile {
762        DiagnosticsProfile::All => {}
763        DiagnosticsProfile::CriticalOnly => {
764            diagnostics.retain(|d| matches!(d.severity, DiagnosticSeverity::Error));
765        }
766    }
767
768    if let Some(max) = options.max_diagnostics {
769        diagnostics.truncate(max);
770    }
771
772    log::info!("Found {} diagnostics", diagnostics.len());
773    diagnostics
774}
775
776/// Compute only critical diagnostics (errors) using the default policy profile.
777pub fn compute_diagnostics_critical(document: &Document) -> Vec<Diagnostic> {
778    compute_diagnostics_with_options(document, DiagnosticsOptions::critical_only())
779}
780
781fn collect_document_level_diagnostics(document: &Document, diagnostics: &mut Vec<Diagnostic>) {
782    // Detect duplicate explicit heading IDs (e.g. "{#id}").
783    // We intentionally diagnose the second and subsequent occurrences.
784    let mut seen: HashMap<String, Span> = HashMap::new();
785    collect_duplicate_heading_ids(&document.children, &mut seen, diagnostics);
786
787    collect_footnote_consistency_diagnostics(&document.children, diagnostics);
788    collect_link_reference_consistency_diagnostics(
789        &document.children,
790        &document.references,
791        diagnostics,
792    );
793}
794
795fn normalize_label_for_diagnostics(label: &str) -> String {
796    label
797        .split_whitespace()
798        .collect::<Vec<_>>()
799        .join(" ")
800        .to_lowercase()
801}
802
803fn collect_footnote_consistency_diagnostics(nodes: &[Node], diagnostics: &mut Vec<Diagnostic>) {
804    let mut definitions: HashMap<String, Span> = HashMap::new();
805    let mut references: Vec<(String, Span)> = Vec::new();
806
807    collect_footnote_nodes(nodes, &mut definitions, &mut references, diagnostics);
808
809    let mut reference_counts: HashMap<String, usize> = HashMap::new();
810    for (normalized_label, span) in references {
811        *reference_counts
812            .entry(normalized_label.clone())
813            .or_insert(0) += 1;
814        if !definitions.contains_key(&normalized_label) {
815            diag_catalog(diagnostics, DiagnosticCode::MissingFootnoteDefinition, span);
816        }
817    }
818
819    for (label, span) in definitions {
820        if !reference_counts.contains_key(&label) {
821            diag_catalog(diagnostics, DiagnosticCode::UnusedFootnoteDefinition, span);
822        }
823    }
824}
825
826fn collect_footnote_nodes(
827    nodes: &[Node],
828    definitions: &mut HashMap<String, Span>,
829    references: &mut Vec<(String, Span)>,
830    diagnostics: &mut Vec<Diagnostic>,
831) {
832    for node in nodes {
833        if let Some(span) = node.span {
834            match &node.kind {
835                NodeKind::FootnoteDefinition { label } => {
836                    let normalized = normalize_label_for_diagnostics(label);
837                    if let std::collections::hash_map::Entry::Vacant(entry) =
838                        definitions.entry(normalized)
839                    {
840                        entry.insert(span);
841                    } else {
842                        diag_catalog(
843                            diagnostics,
844                            DiagnosticCode::DuplicateFootnoteDefinition,
845                            span,
846                        );
847                    }
848                }
849                NodeKind::FootnoteReference { label } => {
850                    references.push((normalize_label_for_diagnostics(label), span));
851                }
852                _ => {}
853            }
854        }
855
856        if !node.children.is_empty() {
857            collect_footnote_nodes(&node.children, definitions, references, diagnostics);
858        }
859    }
860}
861
862fn node_has_meaningful_content(node: &Node) -> bool {
863    match &node.kind {
864        NodeKind::Text(text) => !text.trim().is_empty(),
865        NodeKind::CodeSpan(code) => !code.trim().is_empty(),
866        NodeKind::InlineHtml(html) => !html.trim().is_empty(),
867        _ => node.children.iter().any(node_has_meaningful_content),
868    }
869}
870
871fn collect_duplicate_heading_ids(
872    nodes: &[Node],
873    seen: &mut HashMap<String, Span>,
874    diagnostics: &mut Vec<Diagnostic>,
875) {
876    for node in nodes {
877        if let (NodeKind::Heading { id: Some(id), .. }, Some(span)) = (&node.kind, node.span) {
878            let key = id.trim().to_lowercase();
879            if !key.is_empty() {
880                if let Some(first_span) = seen.get(&key) {
881                    diag(
882                        diagnostics,
883                        DiagnosticCode::DuplicateHeadingId,
884                        span,
885                        DiagnosticCode::DuplicateHeadingId.default_severity(),
886                        DiagnosticCode::DuplicateHeadingId.format_message(&[
887                            ("id", id.clone()),
888                            ("line", first_span.start.line.to_string()),
889                        ]),
890                    );
891                } else {
892                    seen.insert(key, span);
893                }
894            }
895        }
896
897        if !node.children.is_empty() {
898            collect_duplicate_heading_ids(&node.children, seen, diagnostics);
899        }
900    }
901}
902
903// Recursively collect diagnostics from a node and its children
904fn collect_diagnostics(node: &Node, diagnostics: &mut Vec<Diagnostic>) {
905    if let Some(span) = &node.span {
906        match &node.kind {
907            NodeKind::Heading { level, text, .. } => {
908                if *level > 6 {
909                    diag(
910                        diagnostics,
911                        DiagnosticCode::InvalidHeadingLevel,
912                        *span,
913                        DiagnosticCode::InvalidHeadingLevel.default_severity(),
914                        DiagnosticCode::InvalidHeadingLevel
915                            .format_message(&[("level", level.to_string())]),
916                    );
917                }
918
919                if text.trim().is_empty() {
920                    diag_catalog(diagnostics, DiagnosticCode::EmptyHeadingText, *span);
921                }
922
923                // Friendly style guardrail for very long headings.
924                if text.chars().count()
925                    > crate::intelligence::catalog::diagnostics_catalog_settings()
926                        .heading_too_long_threshold
927                {
928                    diag_catalog(diagnostics, DiagnosticCode::HeadingTooLong, *span);
929                }
930            }
931            NodeKind::Link { url, .. } => {
932                if url.trim().is_empty() {
933                    diag_catalog(diagnostics, DiagnosticCode::EmptyLinkUrl, *span);
934                }
935
936                let lower_url = url.to_lowercase();
937                let settings = crate::intelligence::catalog::diagnostics_catalog_settings();
938                if has_disallowed_scheme(&lower_url, &settings.unsafe_protocols) {
939                    let protocol = url
940                        .split_once(':')
941                        .map(|(prefix, _)| prefix)
942                        .unwrap_or(settings.unknown_protocol_label.as_str())
943                        .to_string();
944                    diag_catalog_message(
945                        diagnostics,
946                        DiagnosticCode::UnsafeLinkProtocol,
947                        *span,
948                        DiagnosticCode::UnsafeLinkProtocol
949                            .format_message(&[("protocol", protocol)]),
950                    );
951                }
952
953                if starts_with_any_prefix(&lower_url, &settings.insecure_link_prefixes) {
954                    diag_catalog(diagnostics, DiagnosticCode::InsecureLinkProtocol, *span);
955                }
956            }
957            NodeKind::LinkReference { .. } => {}
958            NodeKind::CodeBlock { language, code } => {
959                if code.trim().is_empty() {
960                    diag_catalog(diagnostics, DiagnosticCode::EmptyCodeBlock, *span);
961                }
962
963                if !code.trim().is_empty() && language.is_none() {
964                    diag_catalog(diagnostics, DiagnosticCode::MissingCodeBlockLanguage, *span);
965                }
966            }
967            NodeKind::Image { url, alt } => {
968                if url.trim().is_empty() {
969                    diag_catalog(diagnostics, DiagnosticCode::EmptyImageUrl, *span);
970                }
971
972                if alt.trim().is_empty() {
973                    diag_catalog(diagnostics, DiagnosticCode::ImageMissingAltText, *span);
974                }
975
976                let lower_url = url.to_lowercase();
977                let settings = crate::intelligence::catalog::diagnostics_catalog_settings();
978                if has_disallowed_scheme(&lower_url, &settings.unsafe_protocols) {
979                    let protocol = url
980                        .split_once(':')
981                        .map(|(prefix, _)| prefix)
982                        .unwrap_or(settings.unknown_protocol_label.as_str())
983                        .to_string();
984                    diag_catalog_message(
985                        diagnostics,
986                        DiagnosticCode::UnsafeImageProtocol,
987                        *span,
988                        DiagnosticCode::UnsafeImageProtocol
989                            .format_message(&[("protocol", protocol)]),
990                    );
991                }
992            }
993            NodeKind::InlineHtml(html) => {
994                let lower_html = html.to_lowercase();
995                let settings = crate::intelligence::catalog::diagnostics_catalog_settings();
996                if contains_any_marker(&lower_html, &settings.script_tag_markers) {
997                    diag_catalog(diagnostics, DiagnosticCode::InlineHtmlContainsScript, *span);
998                }
999
1000                if contains_unsafe_protocol_marker(&lower_html, &settings.unsafe_protocols) {
1001                    diag_catalog(diagnostics, DiagnosticCode::InlineHtmlJavascriptUrl, *span);
1002                }
1003
1004                if contains_unsafe_event_handler_attr(&lower_html) {
1005                    diag_catalog(
1006                        diagnostics,
1007                        DiagnosticCode::InlineHtmlUnsafeEventHandler,
1008                        *span,
1009                    );
1010                }
1011            }
1012            NodeKind::List { .. } => {
1013                if node.children.is_empty() {
1014                    diag_catalog(diagnostics, DiagnosticCode::EmptyList, *span);
1015                }
1016            }
1017            NodeKind::ListItem => {
1018                if node.children.is_empty() {
1019                    diag_catalog(diagnostics, DiagnosticCode::EmptyListItem, *span);
1020                }
1021
1022                let has_task_checkbox = node
1023                    .children
1024                    .iter()
1025                    .any(|child| matches!(child.kind, NodeKind::TaskCheckbox { .. }));
1026
1027                if has_task_checkbox {
1028                    let has_task_content = node.children.iter().any(|child| {
1029                        !matches!(child.kind, NodeKind::TaskCheckbox { .. })
1030                            && node_has_meaningful_content(child)
1031                    });
1032
1033                    if !has_task_content {
1034                        diag_catalog(diagnostics, DiagnosticCode::EmptyTaskListItem, *span);
1035                    }
1036                } else if list_item_has_malformed_task_marker(node) {
1037                    diag_catalog(diagnostics, DiagnosticCode::MalformedTaskCheckbox, *span);
1038                }
1039            }
1040            NodeKind::HtmlBlock { html } => {
1041                let lower_html = html.to_lowercase();
1042                let settings = crate::intelligence::catalog::diagnostics_catalog_settings();
1043
1044                if contains_any_marker(&lower_html, &settings.script_tag_markers) {
1045                    diag_catalog(diagnostics, DiagnosticCode::HtmlBlockContainsScript, *span);
1046                }
1047
1048                if contains_unsafe_protocol_marker(&lower_html, &settings.unsafe_protocols) {
1049                    diag_catalog(diagnostics, DiagnosticCode::HtmlBlockJavascriptUrl, *span);
1050                }
1051
1052                if html.trim().is_empty() {
1053                    diag_catalog(diagnostics, DiagnosticCode::EmptyHtmlBlock, *span);
1054                }
1055
1056                let open_angles = html.matches('<').count();
1057                let close_angles = html.matches('>').count();
1058                if open_angles != close_angles {
1059                    diag_catalog(
1060                        diagnostics,
1061                        DiagnosticCode::HtmlBlockMismatchedAngles,
1062                        *span,
1063                    );
1064                }
1065
1066                if contains_unsafe_event_handler_attr(&lower_html) {
1067                    diag_catalog(
1068                        diagnostics,
1069                        DiagnosticCode::HtmlBlockUnsafeEventHandler,
1070                        *span,
1071                    );
1072                }
1073            }
1074            NodeKind::Blockquote => {
1075                if node.children.is_empty() {
1076                    diag_catalog(diagnostics, DiagnosticCode::EmptyBlockquote, *span);
1077                }
1078
1079                if blockquote_has_unknown_admonition_marker(node) {
1080                    diag_catalog(diagnostics, DiagnosticCode::UnknownAdmonitionKind, *span);
1081                }
1082            }
1083            NodeKind::DefinitionList => {
1084                if node.children.is_empty() {
1085                    diag_catalog(diagnostics, DiagnosticCode::EmptyDefinitionList, *span);
1086                }
1087            }
1088            NodeKind::DefinitionTerm => {
1089                if !node_has_meaningful_content(node) {
1090                    diag_catalog(diagnostics, DiagnosticCode::EmptyDefinitionTerm, *span);
1091                }
1092            }
1093            NodeKind::DefinitionDescription => {
1094                if !node_has_meaningful_content(node) {
1095                    diag_catalog(
1096                        diagnostics,
1097                        DiagnosticCode::EmptyDefinitionDescription,
1098                        *span,
1099                    );
1100                }
1101            }
1102            NodeKind::TableCell { .. } => {
1103                if !node_has_meaningful_content(node) {
1104                    diag_catalog(diagnostics, DiagnosticCode::EmptyTableCell, *span);
1105                }
1106            }
1107            NodeKind::TabGroup => {
1108                if node.children.is_empty() {
1109                    diag_catalog(diagnostics, DiagnosticCode::EmptyTabGroup, *span);
1110                }
1111
1112                let mut seen_titles: HashMap<String, Span> = HashMap::new();
1113                for child in &node.children {
1114                    if let (NodeKind::TabItem { title }, Some(tab_span)) = (&child.kind, child.span)
1115                    {
1116                        let normalized = title.trim().to_lowercase();
1117
1118                        if normalized.is_empty() {
1119                            diag_catalog(diagnostics, DiagnosticCode::EmptyTabTitle, tab_span);
1120                        }
1121
1122                        if !normalized.is_empty() {
1123                            if let std::collections::hash_map::Entry::Vacant(entry) =
1124                                seen_titles.entry(normalized)
1125                            {
1126                                entry.insert(tab_span);
1127                            } else {
1128                                diag_catalog(
1129                                    diagnostics,
1130                                    DiagnosticCode::DuplicateTabTitle,
1131                                    tab_span,
1132                                );
1133                            }
1134                        }
1135
1136                        if !node_has_meaningful_content(child) {
1137                            diag_catalog(diagnostics, DiagnosticCode::EmptyTabPanel, tab_span);
1138                        }
1139                    }
1140                }
1141            }
1142            NodeKind::SliderDeck { timer_seconds } => {
1143                if node.children.is_empty() {
1144                    diag_catalog(diagnostics, DiagnosticCode::EmptySliderDeck, *span);
1145                }
1146
1147                if timer_seconds.is_some_and(|value| value == 0) {
1148                    diag_catalog(diagnostics, DiagnosticCode::InvalidSliderTimer, *span);
1149                }
1150            }
1151            NodeKind::Slide { .. } => {
1152                if !node_has_meaningful_content(node) {
1153                    diag_catalog(diagnostics, DiagnosticCode::EmptySlide, *span);
1154                }
1155            }
1156            NodeKind::Admonition { title, .. } => {
1157                if node.children.is_empty() {
1158                    diag_catalog(diagnostics, DiagnosticCode::EmptyAdmonitionBody, *span);
1159                }
1160
1161                if let Some(custom_title) = title {
1162                    if custom_title.trim().is_empty() {
1163                        diag_catalog(diagnostics, DiagnosticCode::EmptyAdmonitionTitle, *span);
1164                    }
1165                }
1166            }
1167            NodeKind::InlineMath { content } | NodeKind::DisplayMath { content } => {
1168                if content.trim().is_empty() {
1169                    diag_catalog(diagnostics, DiagnosticCode::EmptyMathExpression, *span);
1170                }
1171            }
1172            NodeKind::MermaidDiagram { content } => {
1173                if content.trim().is_empty() {
1174                    diag_catalog(diagnostics, DiagnosticCode::EmptyMermaidDiagram, *span);
1175                }
1176            }
1177            NodeKind::PlatformMention {
1178                username,
1179                platform,
1180                display,
1181            } => {
1182                if username.trim().is_empty() {
1183                    diag_catalog(
1184                        diagnostics,
1185                        DiagnosticCode::EmptyPlatformMentionUsername,
1186                        *span,
1187                    );
1188                }
1189
1190                if !is_known_platform(&platform.trim().to_lowercase()) {
1191                    diag_catalog(
1192                        diagnostics,
1193                        DiagnosticCode::UnknownPlatformMentionPlatform,
1194                        *span,
1195                    );
1196                }
1197
1198                if display.as_ref().is_some_and(|d| d.trim().is_empty()) {
1199                    diag_catalog(
1200                        diagnostics,
1201                        DiagnosticCode::EmptyPlatformMentionDisplayName,
1202                        *span,
1203                    );
1204                }
1205            }
1206            NodeKind::Text(text) => {
1207                if text_has_unknown_emoji_shortcode(text) {
1208                    diag_catalog(diagnostics, DiagnosticCode::UnknownEmojiShortcode, *span);
1209                }
1210            }
1211            _ => {}
1212        }
1213    }
1214
1215    for child in &node.children {
1216        collect_diagnostics(child, diagnostics);
1217    }
1218}
1219
1220fn collect_link_reference_consistency_diagnostics(
1221    nodes: &[Node],
1222    references: &crate::parser::ReferenceMap,
1223    diagnostics: &mut Vec<Diagnostic>,
1224) {
1225    for node in nodes {
1226        if let (NodeKind::LinkReference { label, .. }, Some(span)) = (&node.kind, node.span) {
1227            let normalized = normalize_label_for_diagnostics(label);
1228
1229            if normalized.is_empty() {
1230                diag_catalog(diagnostics, DiagnosticCode::EmptyLinkReferenceLabel, span);
1231            } else if !references.contains(label) {
1232                diag_catalog(diagnostics, DiagnosticCode::UnresolvedLinkReference, span);
1233            }
1234        }
1235
1236        if !node.children.is_empty() {
1237            collect_link_reference_consistency_diagnostics(&node.children, references, diagnostics);
1238        }
1239    }
1240}
1241
1242#[cfg(test)]
1243mod tests {
1244    use super::*;
1245    use crate::parser::Position;
1246
1247    fn span(line: usize, start_col: usize, end_col: usize, start_offset: usize) -> Span {
1248        Span {
1249            start: Position {
1250                line,
1251                column: start_col,
1252                offset: start_offset,
1253            },
1254            end: Position {
1255                line,
1256                column: end_col,
1257                offset: start_offset + (end_col.saturating_sub(start_col)),
1258            },
1259        }
1260    }
1261
1262    #[test]
1263    fn smoke_test_codes_are_stable_strings() {
1264        for code in [
1265            DiagnosticCode::ParseFailure,
1266            DiagnosticCode::InvalidHeadingLevel,
1267            DiagnosticCode::DuplicateHeadingId,
1268            DiagnosticCode::UnresolvedLinkReference,
1269            DiagnosticCode::EmptyLinkReferenceLabel,
1270            DiagnosticCode::MissingCodeBlockLanguage,
1271            DiagnosticCode::ImageMissingAltText,
1272            DiagnosticCode::InlineHtmlUnsafeEventHandler,
1273            DiagnosticCode::HtmlBlockUnsafeEventHandler,
1274            DiagnosticCode::EmptyDefinitionList,
1275            DiagnosticCode::MissingFootnoteDefinition,
1276            DiagnosticCode::EmptyTaskListItem,
1277            DiagnosticCode::InvalidSliderTimer,
1278            DiagnosticCode::EmptyPlatformMentionUsername,
1279            DiagnosticCode::UnknownPlatformMentionPlatform,
1280            DiagnosticCode::UnknownEmojiShortcode,
1281            DiagnosticCode::EmptyPlatformMentionDisplayName,
1282            DiagnosticCode::DuplicateTabTitle,
1283            DiagnosticCode::EmptyMathExpression,
1284            DiagnosticCode::EmptyAdmonitionTitle,
1285            DiagnosticCode::UnknownAdmonitionKind,
1286        ] {
1287            let id = code.as_str();
1288            assert!(
1289                id.starts_with("MD") || id.starts_with("MO") || id.starts_with("MG"),
1290                "unexpected diagnostic namespace for code id: {}",
1291                id
1292            );
1293            assert_eq!(id.len(), 5);
1294        }
1295    }
1296
1297    #[test]
1298    fn smoke_test_all_diagnostic_codes_are_in_catalog() {
1299        let all_codes = [
1300            DiagnosticCode::ParseFailure,
1301            DiagnosticCode::InvalidHeadingLevel,
1302            DiagnosticCode::EmptyHeadingText,
1303            DiagnosticCode::DuplicateHeadingId,
1304            DiagnosticCode::HeadingTooLong,
1305            DiagnosticCode::EmptyLinkUrl,
1306            DiagnosticCode::UnsafeLinkProtocol,
1307            DiagnosticCode::InsecureLinkProtocol,
1308            DiagnosticCode::UnresolvedLinkReference,
1309            DiagnosticCode::EmptyLinkReferenceLabel,
1310            DiagnosticCode::EmptyCodeBlock,
1311            DiagnosticCode::MissingCodeBlockLanguage,
1312            DiagnosticCode::EmptyImageUrl,
1313            DiagnosticCode::ImageMissingAltText,
1314            DiagnosticCode::UnsafeImageProtocol,
1315            DiagnosticCode::InlineHtmlContainsScript,
1316            DiagnosticCode::InlineHtmlJavascriptUrl,
1317            DiagnosticCode::InlineHtmlUnsafeEventHandler,
1318            DiagnosticCode::HtmlBlockContainsScript,
1319            DiagnosticCode::HtmlBlockJavascriptUrl,
1320            DiagnosticCode::EmptyHtmlBlock,
1321            DiagnosticCode::HtmlBlockMismatchedAngles,
1322            DiagnosticCode::HtmlBlockUnsafeEventHandler,
1323            DiagnosticCode::EmptyList,
1324            DiagnosticCode::EmptyListItem,
1325            DiagnosticCode::MalformedTaskCheckbox,
1326            DiagnosticCode::EmptyTaskListItem,
1327            DiagnosticCode::EmptyBlockquote,
1328            DiagnosticCode::EmptyDefinitionList,
1329            DiagnosticCode::EmptyDefinitionTerm,
1330            DiagnosticCode::EmptyDefinitionDescription,
1331            DiagnosticCode::EmptyTableCell,
1332            DiagnosticCode::MissingFootnoteDefinition,
1333            DiagnosticCode::DuplicateFootnoteDefinition,
1334            DiagnosticCode::UnusedFootnoteDefinition,
1335            DiagnosticCode::EmptyTabGroup,
1336            DiagnosticCode::EmptyTabTitle,
1337            DiagnosticCode::DuplicateTabTitle,
1338            DiagnosticCode::EmptyTabPanel,
1339            DiagnosticCode::EmptySliderDeck,
1340            DiagnosticCode::EmptySlide,
1341            DiagnosticCode::EmptyAdmonitionBody,
1342            DiagnosticCode::EmptyMathExpression,
1343            DiagnosticCode::EmptyMermaidDiagram,
1344            DiagnosticCode::EmptyAdmonitionTitle,
1345            DiagnosticCode::UnknownAdmonitionKind,
1346            DiagnosticCode::InvalidSliderTimer,
1347            DiagnosticCode::EmptyPlatformMentionUsername,
1348            DiagnosticCode::UnknownPlatformMentionPlatform,
1349            DiagnosticCode::UnknownEmojiShortcode,
1350            DiagnosticCode::EmptyPlatformMentionDisplayName,
1351        ];
1352
1353        for code in all_codes {
1354            assert!(
1355                code.catalog_entry().is_some(),
1356                "missing catalog entry for {:?}",
1357                code
1358            );
1359        }
1360    }
1361
1362    #[test]
1363    fn smoke_test_fix_suggestions_are_available() {
1364        assert!(DiagnosticCode::DuplicateHeadingId
1365            .fix_suggestion()
1366            .contains("unique"));
1367        assert!(DiagnosticCode::MissingCodeBlockLanguage
1368            .fix_suggestion()
1369            .contains("```"));
1370        assert!(DiagnosticCode::MissingFootnoteDefinition
1371            .fix_suggestion()
1372            .contains("[^label]:"));
1373        assert!(DiagnosticCode::EmptySliderDeck
1374            .fix_suggestion()
1375            .contains("@slidestart"));
1376    }
1377
1378    #[test]
1379    fn smoke_test_diagnostic_methods_expose_fixit_metadata() {
1380        let d = Diagnostic {
1381            code: DiagnosticCode::ImageMissingAltText,
1382            span: span(1, 1, 10, 0),
1383            severity: DiagnosticSeverity::Warning,
1384            message: "Image missing alt text".to_string(),
1385        };
1386
1387        assert!(
1388            d.code_id().starts_with("MD")
1389                || d.code_id().starts_with("MO")
1390                || d.code_id().starts_with("MG")
1391        );
1392        assert!(d.fix_suggestion().contains("alt text"));
1393    }
1394
1395    #[test]
1396    fn smoke_test_resolved_catalog_metadata_available_for_seed_code() {
1397        let d = Diagnostic {
1398            code: DiagnosticCode::EmptyImageUrl,
1399            span: span(1, 1, 5, 0),
1400            severity: DiagnosticSeverity::Error,
1401            message: "Empty image URL".to_string(),
1402        };
1403
1404        assert_eq!(d.title_resolved(), Some("Empty image URL"));
1405        assert!(d
1406            .description_resolved()
1407            .expect("expected embedded catalog description")
1408            .contains("cannot render an image"));
1409    }
1410
1411    #[test]
1412    fn smoke_test_resolved_fix_suggestion_uses_catalog_override_when_present() {
1413        let d = Diagnostic {
1414            code: DiagnosticCode::ImageMissingAltText,
1415            span: span(1, 1, 10, 0),
1416            severity: DiagnosticSeverity::Warning,
1417            message: "Image missing alt text".to_string(),
1418        };
1419
1420        assert_eq!(
1421            d.fix_suggestion_resolved(),
1422            "Add descriptive alt text between '[' and ']' for accessibility and better screen-reader output."
1423        );
1424    }
1425
1426    #[test]
1427    fn smoke_test_parse_error_diagnostic_builder() {
1428        let d = Diagnostic::parse_error("Parse failed");
1429        assert_eq!(d.code, DiagnosticCode::ParseFailure);
1430        assert_eq!(d.severity, DiagnosticSeverity::Error);
1431        assert!(d.code_id().starts_with("MD"));
1432        assert_eq!(d.span.start.line, 1);
1433        assert_eq!(d.span.start.column, 1);
1434    }
1435
1436    #[test]
1437    fn smoke_test_diagnostics_options_critical_only_filters_non_errors() {
1438        let doc = Document {
1439            children: vec![Node {
1440                kind: NodeKind::Heading {
1441                    level: 1,
1442                    text: "This heading is intentionally very long to trigger an informational diagnostic while remaining syntactically valid and useful for filtering checks".to_string(),
1443                    id: None,
1444                },
1445                span: Some(span(1, 1, 20, 0)),
1446                children: vec![],
1447            }],
1448            ..Default::default()
1449        };
1450
1451        let all = compute_diagnostics_with_options(&doc, DiagnosticsOptions::all());
1452        let critical = compute_diagnostics_with_options(&doc, DiagnosticsOptions::critical_only());
1453
1454        assert!(all
1455            .iter()
1456            .any(|d| matches!(d.severity, DiagnosticSeverity::Info)));
1457        assert!(critical.is_empty());
1458    }
1459
1460    #[test]
1461    fn smoke_test_diagnostics_options_max_limit_is_applied() {
1462        let doc = Document {
1463            children: vec![
1464                Node {
1465                    kind: NodeKind::Heading {
1466                        level: 10,
1467                        text: "".to_string(),
1468                        id: None,
1469                    },
1470                    span: Some(span(1, 1, 2, 0)),
1471                    children: vec![],
1472                },
1473                Node {
1474                    kind: NodeKind::Image {
1475                        url: "".to_string(),
1476                        alt: "".to_string(),
1477                    },
1478                    span: Some(span(2, 1, 3, 10)),
1479                    children: vec![],
1480                },
1481            ],
1482            ..Default::default()
1483        };
1484
1485        let diagnostics = compute_diagnostics_with_options(
1486            &doc,
1487            DiagnosticsOptions {
1488                profile: DiagnosticsProfile::All,
1489                max_diagnostics: Some(2),
1490            },
1491        );
1492
1493        assert_eq!(diagnostics.len(), 2);
1494    }
1495
1496    #[test]
1497    fn smoke_test_duplicate_heading_ids_diagnosed() {
1498        let doc = Document {
1499            children: vec![
1500                Node {
1501                    kind: NodeKind::Heading {
1502                        level: 2,
1503                        text: "A".to_string(),
1504                        id: Some("dup-id".to_string()),
1505                    },
1506                    span: Some(span(1, 1, 5, 0)),
1507                    children: vec![],
1508                },
1509                Node {
1510                    kind: NodeKind::Heading {
1511                        level: 2,
1512                        text: "B".to_string(),
1513                        id: Some("dup-id".to_string()),
1514                    },
1515                    span: Some(span(3, 1, 5, 20)),
1516                    children: vec![],
1517                },
1518            ],
1519            ..Default::default()
1520        };
1521
1522        let diagnostics = compute_diagnostics(&doc);
1523        assert!(diagnostics.iter().any(|d| {
1524            d.code == DiagnosticCode::DuplicateHeadingId
1525                && d.severity == DiagnosticSeverity::Warning
1526        }));
1527    }
1528
1529    #[test]
1530    fn smoke_test_missing_language_and_http_link_rules() {
1531        let doc = Document {
1532            children: vec![
1533                Node {
1534                    kind: NodeKind::CodeBlock {
1535                        language: None,
1536                        code: "let x = 1;".to_string(),
1537                    },
1538                    span: Some(span(1, 1, 4, 0)),
1539                    children: vec![],
1540                },
1541                Node {
1542                    kind: NodeKind::Paragraph,
1543                    span: Some(span(3, 1, 30, 30)),
1544                    children: vec![Node {
1545                        kind: NodeKind::Link {
1546                            url: ["http", "://example.com"].concat(),
1547                            title: None,
1548                        },
1549                        span: Some(span(3, 5, 20, 34)),
1550                        children: vec![],
1551                    }],
1552                },
1553            ],
1554            ..Default::default()
1555        };
1556
1557        let diagnostics = compute_diagnostics(&doc);
1558        assert!(diagnostics
1559            .iter()
1560            .any(|d| d.code == DiagnosticCode::MissingCodeBlockLanguage));
1561        assert!(diagnostics
1562            .iter()
1563            .any(|d| d.code == DiagnosticCode::InsecureLinkProtocol));
1564    }
1565
1566    #[test]
1567    fn smoke_test_footnote_consistency_rules() {
1568        let doc = Document {
1569            children: vec![
1570                Node {
1571                    kind: NodeKind::Paragraph,
1572                    span: Some(span(1, 1, 20, 0)),
1573                    children: vec![Node {
1574                        kind: NodeKind::FootnoteReference {
1575                            label: "missing".to_string(),
1576                        },
1577                        span: Some(span(1, 10, 19, 9)),
1578                        children: vec![],
1579                    }],
1580                },
1581                Node {
1582                    kind: NodeKind::FootnoteDefinition {
1583                        label: "dup".to_string(),
1584                    },
1585                    span: Some(span(3, 1, 10, 30)),
1586                    children: vec![Node {
1587                        kind: NodeKind::Paragraph,
1588                        span: Some(span(3, 5, 14, 34)),
1589                        children: vec![Node {
1590                            kind: NodeKind::Text("def one".to_string()),
1591                            span: Some(span(3, 5, 11, 34)),
1592                            children: vec![],
1593                        }],
1594                    }],
1595                },
1596                Node {
1597                    kind: NodeKind::FootnoteDefinition {
1598                        label: "DUP".to_string(),
1599                    },
1600                    span: Some(span(5, 1, 10, 60)),
1601                    children: vec![],
1602                },
1603                Node {
1604                    kind: NodeKind::FootnoteDefinition {
1605                        label: "unused".to_string(),
1606                    },
1607                    span: Some(span(7, 1, 12, 90)),
1608                    children: vec![],
1609                },
1610            ],
1611            ..Default::default()
1612        };
1613
1614        let diagnostics = compute_diagnostics(&doc);
1615
1616        assert!(diagnostics
1617            .iter()
1618            .any(|d| d.code == DiagnosticCode::MissingFootnoteDefinition));
1619        assert!(diagnostics
1620            .iter()
1621            .any(|d| d.code == DiagnosticCode::DuplicateFootnoteDefinition));
1622        assert!(diagnostics
1623            .iter()
1624            .any(|d| d.code == DiagnosticCode::UnusedFootnoteDefinition));
1625    }
1626
1627    #[test]
1628    fn smoke_test_empty_table_cell_and_definition_entries() {
1629        let doc = Document {
1630            children: vec![
1631                Node {
1632                    kind: NodeKind::DefinitionList,
1633                    span: Some(span(1, 1, 4, 0)),
1634                    children: vec![],
1635                },
1636                Node {
1637                    kind: NodeKind::Table {
1638                        alignments: vec![crate::parser::TableAlignment::None],
1639                    },
1640                    span: Some(span(3, 1, 4, 20)),
1641                    children: vec![Node {
1642                        kind: NodeKind::TableRow { header: false },
1643                        span: Some(span(3, 1, 4, 20)),
1644                        children: vec![Node {
1645                            kind: NodeKind::TableCell {
1646                                header: false,
1647                                alignment: crate::parser::TableAlignment::None,
1648                            },
1649                            span: Some(span(3, 2, 3, 21)),
1650                            children: vec![Node {
1651                                kind: NodeKind::Text("   ".to_string()),
1652                                span: Some(span(3, 2, 3, 21)),
1653                                children: vec![],
1654                            }],
1655                        }],
1656                    }],
1657                },
1658            ],
1659            ..Default::default()
1660        };
1661
1662        let diagnostics = compute_diagnostics(&doc);
1663        assert!(diagnostics
1664            .iter()
1665            .any(|d| d.code == DiagnosticCode::EmptyDefinitionList));
1666        assert!(diagnostics
1667            .iter()
1668            .any(|d| d.code == DiagnosticCode::EmptyTableCell));
1669    }
1670
1671    #[test]
1672    fn smoke_test_tab_group_and_slider_rules() {
1673        let doc = Document {
1674            children: vec![
1675                Node {
1676                    kind: NodeKind::TabGroup,
1677                    span: Some(span(1, 1, 10, 0)),
1678                    children: vec![
1679                        Node {
1680                            kind: NodeKind::TabItem {
1681                                title: "One".to_string(),
1682                            },
1683                            span: Some(span(2, 1, 8, 11)),
1684                            children: vec![],
1685                        },
1686                        Node {
1687                            kind: NodeKind::TabItem {
1688                                title: " one ".to_string(),
1689                            },
1690                            span: Some(span(3, 1, 10, 20)),
1691                            children: vec![],
1692                        },
1693                    ],
1694                },
1695                Node {
1696                    kind: NodeKind::SliderDeck {
1697                        timer_seconds: Some(5),
1698                    },
1699                    span: Some(span(5, 1, 12, 40)),
1700                    children: vec![Node {
1701                        kind: NodeKind::Slide { vertical: false },
1702                        span: Some(span(6, 1, 8, 50)),
1703                        children: vec![Node {
1704                            kind: NodeKind::Text("  ".to_string()),
1705                            span: Some(span(6, 1, 3, 50)),
1706                            children: vec![],
1707                        }],
1708                    }],
1709                },
1710            ],
1711            ..Default::default()
1712        };
1713
1714        let diagnostics = compute_diagnostics(&doc);
1715
1716        assert!(diagnostics
1717            .iter()
1718            .any(|d| d.code == DiagnosticCode::DuplicateTabTitle));
1719        assert!(diagnostics
1720            .iter()
1721            .any(|d| d.code == DiagnosticCode::EmptyTabPanel));
1722        assert!(diagnostics
1723            .iter()
1724            .any(|d| d.code == DiagnosticCode::EmptySlide));
1725    }
1726
1727    #[test]
1728    fn smoke_test_empty_admonition_math_and_mermaid_rules() {
1729        let doc = Document {
1730            children: vec![
1731                Node {
1732                    kind: NodeKind::Admonition {
1733                        kind: crate::parser::AdmonitionKind::Note,
1734                        title: Some("".to_string()),
1735                        icon: None,
1736                        style: crate::parser::AdmonitionStyle::Alert,
1737                    },
1738                    span: Some(span(1, 1, 10, 0)),
1739                    children: vec![],
1740                },
1741                Node {
1742                    kind: NodeKind::Paragraph,
1743                    span: Some(span(3, 1, 12, 20)),
1744                    children: vec![
1745                        Node {
1746                            kind: NodeKind::InlineMath {
1747                                content: "   ".to_string(),
1748                            },
1749                            span: Some(span(3, 2, 6, 21)),
1750                            children: vec![],
1751                        },
1752                        Node {
1753                            kind: NodeKind::DisplayMath {
1754                                content: "\n\t".to_string(),
1755                            },
1756                            span: Some(span(3, 7, 11, 26)),
1757                            children: vec![],
1758                        },
1759                    ],
1760                },
1761                Node {
1762                    kind: NodeKind::MermaidDiagram {
1763                        content: "".to_string(),
1764                    },
1765                    span: Some(span(5, 1, 4, 40)),
1766                    children: vec![],
1767                },
1768            ],
1769            ..Default::default()
1770        };
1771
1772        let diagnostics = compute_diagnostics(&doc);
1773
1774        assert!(diagnostics
1775            .iter()
1776            .any(|d| d.code == DiagnosticCode::EmptyAdmonitionBody));
1777        assert!(diagnostics
1778            .iter()
1779            .any(|d| d.code == DiagnosticCode::EmptyMathExpression));
1780        assert!(diagnostics
1781            .iter()
1782            .any(|d| d.code == DiagnosticCode::EmptyMermaidDiagram));
1783    }
1784
1785    #[test]
1786    fn smoke_test_link_reference_and_html_event_handler_rules() {
1787        let doc = Document {
1788            children: vec![
1789                Node {
1790                    kind: NodeKind::Paragraph,
1791                    span: Some(span(1, 1, 24, 0)),
1792                    children: vec![Node {
1793                        kind: NodeKind::LinkReference {
1794                            label: "missing-ref".to_string(),
1795                            suffix: "[missing-ref]".to_string(),
1796                        },
1797                        span: Some(span(1, 2, 20, 1)),
1798                        children: vec![Node {
1799                            kind: NodeKind::Text("Guide".to_string()),
1800                            span: Some(span(1, 3, 8, 2)),
1801                            children: vec![],
1802                        }],
1803                    }],
1804                },
1805                Node {
1806                    kind: NodeKind::InlineHtml("<a onclick=\"x()\">x</a>".to_string()),
1807                    span: Some(span(2, 1, 22, 25)),
1808                    children: vec![],
1809                },
1810                Node {
1811                    kind: NodeKind::HtmlBlock {
1812                        html: "<img onerror=\"x()\" src=\"/a.png\">".to_string(),
1813                    },
1814                    span: Some(span(3, 1, 30, 48)),
1815                    children: vec![],
1816                },
1817            ],
1818            ..Default::default()
1819        };
1820
1821        let diagnostics = compute_diagnostics(&doc);
1822
1823        assert!(diagnostics
1824            .iter()
1825            .any(|d| d.code == DiagnosticCode::UnresolvedLinkReference));
1826        assert!(diagnostics
1827            .iter()
1828            .any(|d| d.code == DiagnosticCode::InlineHtmlUnsafeEventHandler));
1829        assert!(diagnostics
1830            .iter()
1831            .any(|d| d.code == DiagnosticCode::HtmlBlockUnsafeEventHandler));
1832    }
1833
1834    #[test]
1835    fn smoke_test_task_item_and_platform_mention_rules() {
1836        let doc = Document {
1837            children: vec![
1838                Node {
1839                    kind: NodeKind::ListItem,
1840                    span: Some(span(1, 1, 6, 0)),
1841                    children: vec![Node {
1842                        kind: NodeKind::TaskCheckbox { checked: false },
1843                        span: Some(span(1, 3, 5, 2)),
1844                        children: vec![],
1845                    }],
1846                },
1847                Node {
1848                    kind: NodeKind::PlatformMention {
1849                        username: "   ".to_string(),
1850                        platform: "unknownplatform".to_string(),
1851                        display: Some("   ".to_string()),
1852                    },
1853                    span: Some(span(2, 1, 22, 8)),
1854                    children: vec![],
1855                },
1856            ],
1857            ..Default::default()
1858        };
1859
1860        let diagnostics = compute_diagnostics(&doc);
1861
1862        assert!(diagnostics
1863            .iter()
1864            .any(|d| d.code == DiagnosticCode::EmptyTaskListItem));
1865        assert!(diagnostics
1866            .iter()
1867            .any(|d| d.code == DiagnosticCode::EmptyPlatformMentionUsername));
1868        assert!(diagnostics
1869            .iter()
1870            .any(|d| d.code == DiagnosticCode::UnknownPlatformMentionPlatform));
1871        assert!(diagnostics
1872            .iter()
1873            .any(|d| d.code == DiagnosticCode::EmptyPlatformMentionDisplayName));
1874    }
1875
1876    #[test]
1877    fn smoke_test_malformed_task_unknown_admonition_and_unknown_emoji_rules() {
1878        let doc = Document {
1879            children: vec![
1880                Node {
1881                    kind: NodeKind::ListItem,
1882                    span: Some(span(1, 1, 16, 0)),
1883                    children: vec![Node {
1884                        kind: NodeKind::Paragraph,
1885                        span: Some(span(1, 3, 16, 2)),
1886                        children: vec![Node {
1887                            kind: NodeKind::Text("[maybe] investigate".to_string()),
1888                            span: Some(span(1, 3, 16, 2)),
1889                            children: vec![],
1890                        }],
1891                    }],
1892                },
1893                Node {
1894                    kind: NodeKind::Blockquote,
1895                    span: Some(span(2, 1, 24, 20)),
1896                    children: vec![Node {
1897                        kind: NodeKind::Paragraph,
1898                        span: Some(span(2, 3, 24, 22)),
1899                        children: vec![Node {
1900                            kind: NodeKind::Text("[!CUSTOM] body".to_string()),
1901                            span: Some(span(2, 3, 24, 22)),
1902                            children: vec![],
1903                        }],
1904                    }],
1905                },
1906                Node {
1907                    kind: NodeKind::Paragraph,
1908                    span: Some(span(3, 1, 18, 45)),
1909                    children: vec![Node {
1910                        kind: NodeKind::Text("Status :not_an_emoji:".to_string()),
1911                        span: Some(span(3, 8, 23, 52)),
1912                        children: vec![],
1913                    }],
1914                },
1915            ],
1916            ..Default::default()
1917        };
1918
1919        let diagnostics = compute_diagnostics(&doc);
1920
1921        assert!(diagnostics
1922            .iter()
1923            .any(|d| d.code == DiagnosticCode::MalformedTaskCheckbox));
1924        assert!(diagnostics
1925            .iter()
1926            .any(|d| d.code == DiagnosticCode::UnknownAdmonitionKind));
1927        assert!(diagnostics
1928            .iter()
1929            .any(|d| d.code == DiagnosticCode::UnknownEmojiShortcode));
1930    }
1931
1932    #[test]
1933    fn smoke_test_unknown_emoji_shortcode_avoids_common_false_positives() {
1934        let doc = Document {
1935            children: vec![Node {
1936                kind: NodeKind::Paragraph,
1937                span: Some(span(1, 1, 80, 0)),
1938                children: vec![Node {
1939                    kind: NodeKind::Text(
1940                        "Visit https://example.com:8080/path, ratio a:b:c, and valid :smile:."
1941                            .to_string(),
1942                    ),
1943                    span: Some(span(1, 1, 80, 0)),
1944                    children: vec![],
1945                }],
1946            }],
1947            ..Default::default()
1948        };
1949
1950        let diagnostics = compute_diagnostics(&doc);
1951
1952        assert!(diagnostics
1953            .iter()
1954            .all(|d| d.code != DiagnosticCode::UnknownEmojiShortcode));
1955    }
1956
1957    #[test]
1958    fn smoke_test_unknown_emoji_shortcode_detects_punctuation_wrapped_token() {
1959        let doc = Document {
1960            children: vec![Node {
1961                kind: NodeKind::Paragraph,
1962                span: Some(span(1, 1, 42, 0)),
1963                children: vec![Node {
1964                    kind: NodeKind::Text("Please review (:not_an_emoji:) now.".to_string()),
1965                    span: Some(span(1, 1, 42, 0)),
1966                    children: vec![],
1967                }],
1968            }],
1969            ..Default::default()
1970        };
1971
1972        let diagnostics = compute_diagnostics(&doc);
1973
1974        assert!(diagnostics
1975            .iter()
1976            .any(|d| d.code == DiagnosticCode::UnknownEmojiShortcode));
1977    }
1978
1979    #[test]
1980    fn smoke_test_diagnostics_are_sorted_for_editor_stability() {
1981        let doc = Document {
1982            children: vec![
1983                Node {
1984                    kind: NodeKind::Heading {
1985                        level: 10,
1986                        text: "".to_string(),
1987                        id: None,
1988                    },
1989                    span: Some(span(2, 1, 2, 20)),
1990                    children: vec![],
1991                },
1992                Node {
1993                    kind: NodeKind::Image {
1994                        url: "".to_string(),
1995                        alt: "".to_string(),
1996                    },
1997                    span: Some(span(1, 1, 3, 0)),
1998                    children: vec![],
1999                },
2000            ],
2001            ..Default::default()
2002        };
2003
2004        let diagnostics = compute_diagnostics(&doc);
2005        for window in diagnostics.windows(2) {
2006            let left = &window[0];
2007            let right = &window[1];
2008            let l_key = (
2009                left.span.start.offset,
2010                left.span.end.offset,
2011                left.severity.sort_rank(),
2012                left.code.as_str(),
2013                left.message.as_str(),
2014            );
2015            let r_key = (
2016                right.span.start.offset,
2017                right.span.end.offset,
2018                right.severity.sort_rank(),
2019                right.code.as_str(),
2020                right.message.as_str(),
2021            );
2022            assert!(
2023                l_key <= r_key,
2024                "diagnostics must be sorted for stable editor rendering"
2025            );
2026        }
2027    }
2028
2029    #[test]
2030    fn smoke_test_sort_and_dedup_diagnostics_removes_exact_duplicates() {
2031        let mut diagnostics = vec![
2032            Diagnostic {
2033                code: DiagnosticCode::EmptyImageUrl,
2034                span: span(1, 1, 3, 0),
2035                severity: DiagnosticSeverity::Error,
2036                message: "Empty image URL".to_string(),
2037            },
2038            Diagnostic {
2039                code: DiagnosticCode::EmptyImageUrl,
2040                span: span(1, 1, 3, 0),
2041                severity: DiagnosticSeverity::Error,
2042                message: "Empty image URL".to_string(),
2043            },
2044            Diagnostic {
2045                code: DiagnosticCode::ImageMissingAltText,
2046                span: span(1, 1, 3, 0),
2047                severity: DiagnosticSeverity::Warning,
2048                message: "Image missing alt text".to_string(),
2049            },
2050        ];
2051
2052        sort_and_dedup_diagnostics(&mut diagnostics);
2053
2054        assert_eq!(diagnostics.len(), 2);
2055        assert_eq!(diagnostics[0].code, DiagnosticCode::EmptyImageUrl);
2056        assert_eq!(diagnostics[1].code, DiagnosticCode::ImageMissingAltText);
2057    }
2058}