Skip to main content

lex_analysis/
diagnostics.rs

1use crate::inline::extract_references;
2use lex_config::{DiagnosticsRulesConfig, RuleConfig, Severity};
3use lex_core::lex::ast::{
4    Annotation, ContentItem, Document, Range, Session, Table, TableRow, TextContent,
5};
6use lex_core::lex::inlines::ReferenceType;
7use lex_extension_host::Registry;
8use std::borrow::Cow;
9use std::collections::HashSet;
10
11#[derive(Debug, Clone, PartialEq, Eq)]
12pub enum DiagnosticKind {
13    MissingFootnoteDefinition,
14    UnusedFootnoteDefinition,
15    TableInconsistentColumns,
16    /// A label invocation failed schema pre-validation before the
17    /// handler was dispatched. The variant carries which of the
18    /// pre-validation checks tripped.
19    SchemaValidation(SchemaValidationKind),
20    /// A diagnostic emitted by a registered extension handler. The
21    /// `namespace` field is the namespace name (the part before the
22    /// first `.`, e.g., `"acme"` for label `"acme.task"`) — `lex-lsp`
23    /// surfaces it as the diagnostic `source: "lex:<namespace>"` so
24    /// editors can filter by extension.
25    ///
26    /// `code` carries the **bare leaf** the handler supplied (the
27    /// `code` field on `lex_extension::Diagnostic`), *not* the wire
28    /// form. The analyser glues on the namespace prefix in
29    /// [`DiagnosticKind::code`] to produce the wire shape per spec §9
30    /// (`<namespace>.<leaf>`, e.g. `"acme.foo"`; or the per-namespace
31    /// fallback `"acme.diagnostic"` when the handler set `None`).
32    /// Passing an already-prefixed value here would produce a
33    /// double-prefixed wire code (`"acme.acme.foo"`) — handlers should
34    /// supply just the leaf.
35    Handler {
36        namespace: String,
37        code: Option<String>,
38    },
39    /// A label uses the reserved `doc.*` prefix (forbidden under
40    /// `comms/specs/general.lex` §4.1). PR 4 of #584 emits this when
41    /// permissive-mode parse lets the label flow through; the LSP
42    /// then offers a quickfix to rewrite to the blessed shortcut
43    /// (`doc.table` → `table`, `doc.image` → `image`, etc.).
44    ForbiddenLabelPrefix,
45    /// A `lex.*` literal that doesn't match any registered canonical
46    /// in [`lex_core::lex::builtins::CANONICAL_LABELS`]. Typically a
47    /// typo (`lex.fooar`) or a label authored against a future
48    /// version of the core schemas.
49    UnknownLexCanonical,
50    /// A paragraph line that looks like an annotation header (`:: label`)
51    /// but has no closing `::`. There is no "open form" — such a line is
52    /// kept as paragraph text rather than dropped (lex#700) — so this
53    /// warns the author that what looks like metadata is being treated as
54    /// content. The fix is to close the marker: `:: label ::`.
55    UnclosedAnnotation,
56    /// A session reference (`[#2.1]`) whose identifier matches no session
57    /// in the merged document. Emitted only by the opt-in
58    /// [`analyze_references`] pass (`check --references`), never by the
59    /// always-on analyser.
60    MissingSessionTarget,
61    /// A definition reference (`[Title]`) whose subject matches no
62    /// definition in the merged document. Opt-in (`check --references`).
63    MissingDefinitionTarget,
64    /// An annotation reference (`[::label]`) whose label matches no
65    /// annotation in the merged document. Opt-in (`check --references`).
66    MissingAnnotationTarget,
67    /// A citation reference (`[@key]`) whose key matches no annotation
68    /// label or definition subject in the merged document. Opt-in
69    /// (`check --references`).
70    MissingCitationTarget,
71    /// A URL reference (`[http://…]`, `[https://…]`, `[mailto:…]`) that
72    /// is not well-formed (embedded space, empty host, otherwise
73    /// unparseable).
74    /// Opt-in (`check --references`); a pure parse check — network
75    /// reachability is out of scope. Emitted by [`analyze_references`].
76    MalformedUrl,
77    /// A file-path reference — an inline `ReferenceType::File`
78    /// (`[./x.txt]`, `[../y]`, `[/abs]`) or a verbatim block's `src=`
79    /// parameter — that points at no file on disk, or whose target escapes
80    /// the resolution root / is a platform-absolute path. Opt-in:
81    /// emitted only by `check --references` (the existence check is
82    /// IO-bearing, so it runs in the CLI seam, not the pure analyser).
83    /// `lex.include src=` is excluded — its path is validated by the
84    /// base command via include expansion.
85    MissingFileTarget,
86}
87
88/// Severity for analysis-emitted diagnostics. The analyser populates
89/// it for every diagnostic — `lex-lsp` reads `diag.severity`
90/// directly when mapping onto the LSP wire. (Earlier the LSP layer
91/// derived severity from `DiagnosticKind`; that mapping moved
92/// upstream once the extension-emitted diagnostics needed
93/// per-instance severities.)
94#[derive(Debug, Clone, Copy, PartialEq, Eq)]
95pub enum DiagnosticSeverity {
96    Error,
97    Warning,
98    Info,
99    Hint,
100}
101
102/// One of the schema pre-validation checks the analyser owns before
103/// dispatching to a handler. Wire spec / proposal §13.2.
104#[derive(Debug, Clone, PartialEq, Eq)]
105pub enum SchemaValidationKind {
106    /// The namespace is registered but the schema set for that
107    /// namespace doesn't declare this exact label. The walker emits
108    /// this when `Registry::schema_for(label)` returns `None` while
109    /// `is_namespace_healthy(<ns prefix>)` is `true`. Distinguishes
110    /// "typo / out-of-version label" (this variant, surfaced as a
111    /// document error) from "unknown namespace" (silent pass-through
112    /// per the bounded-extensibility rule).
113    UnknownLabel,
114    MissingParam,
115    ParamTypeMismatch,
116    BadAttachment,
117    BodyShapeMismatch,
118}
119
120impl SchemaValidationKind {
121    /// The on-the-wire code for this schema-validation kind. Matches
122    /// the `[diagnostics.rules.schema]` field name in `.lex.toml`.
123    pub fn code(&self) -> &'static str {
124        match self {
125            SchemaValidationKind::UnknownLabel => "schema.unknown-label",
126            SchemaValidationKind::MissingParam => "schema.missing-param",
127            SchemaValidationKind::ParamTypeMismatch => "schema.param-type-mismatch",
128            SchemaValidationKind::BadAttachment => "schema.bad-attachment",
129            SchemaValidationKind::BodyShapeMismatch => "schema.body-shape-mismatch",
130        }
131    }
132}
133
134impl DiagnosticKind {
135    /// The on-the-wire code for this diagnostic kind. The same value
136    /// travels in `lsp_types::Diagnostic.code` and is the key the
137    /// `[diagnostics.rules]` block in `.lex.toml` matches against
138    /// (see [`DiagnosticsRulesConfig::lookup_by_code`]).
139    ///
140    /// For the `Handler` variant — extension-emitted diagnostics —
141    /// this returns the namespace-prefixed code: `"acme.foo"` for
142    /// `Handler { namespace: "acme", code: Some("foo") }`, or
143    /// `"acme.diagnostic"` when the handler omitted a code. The
144    /// namespace prefix is what `[diagnostics.rules]` keys match
145    /// against (spec §9), and the per-namespace `.diagnostic` fallback
146    /// gives users one knob per namespace for code-less handler
147    /// diagnostics rather than a single global `"handler.diagnostic"`.
148    ///
149    /// Returns `Cow<'static, str>` so built-in variants borrow a
150    /// static string (no allocation) while the `Handler` variant owns
151    /// the `format!`-produced result. `apply_rules` runs on every
152    /// document change in the LSP, so avoiding per-built-in allocations
153    /// matters.
154    pub fn code(&self) -> Cow<'static, str> {
155        match self {
156            DiagnosticKind::MissingFootnoteDefinition => "missing-footnote".into(),
157            DiagnosticKind::UnusedFootnoteDefinition => "unused-footnote".into(),
158            DiagnosticKind::TableInconsistentColumns => "table-inconsistent-columns".into(),
159            DiagnosticKind::SchemaValidation(kind) => kind.code().into(),
160            DiagnosticKind::Handler { namespace, code } => match code {
161                Some(c) => format!("{namespace}.{c}").into(),
162                None => format!("{namespace}.diagnostic").into(),
163            },
164            DiagnosticKind::ForbiddenLabelPrefix => "forbidden-label-prefix".into(),
165            DiagnosticKind::UnknownLexCanonical => "unknown-lex-canonical".into(),
166            DiagnosticKind::UnclosedAnnotation => "unclosed-annotation".into(),
167            DiagnosticKind::MissingSessionTarget => "missing-session-target".into(),
168            DiagnosticKind::MissingDefinitionTarget => "missing-definition-target".into(),
169            DiagnosticKind::MissingAnnotationTarget => "missing-annotation-target".into(),
170            DiagnosticKind::MissingCitationTarget => "missing-citation-target".into(),
171            DiagnosticKind::MalformedUrl => "malformed-url".into(),
172            DiagnosticKind::MissingFileTarget => "missing-file-target".into(),
173        }
174    }
175}
176
177/// Apply a `[diagnostics.rules]` configuration to a stream of analyser
178/// diagnostics in place. Drops diagnostics whose resolved severity is
179/// `allow`, and remaps the remaining diagnostics' `severity` field:
180///
181/// - `warn` → the diagnostic's intrinsic severity stays unchanged.
182/// - `deny` → severity is upgraded to `Error`.
183///
184/// `lookup_rule` is the resolution function — typically
185/// [`LoadedLexConfig::lookup_diagnostic_rule`](lex_config::LoadedLexConfig::lookup_diagnostic_rule),
186/// which consults the named built-in fields first and the
187/// extension-rules side-channel second. Diagnostics whose code has no
188/// matching entry on either surface pass through untouched at their
189/// intrinsic severity.
190pub fn apply_rules<F>(diagnostics: &mut Vec<AnalysisDiagnostic>, lookup_rule: F)
191where
192    F: Fn(&str) -> Option<RuleConfig>,
193{
194    diagnostics.retain_mut(|diag| {
195        let code = diag.kind.code();
196        let Some(rule) = lookup_rule(&code) else {
197            return true;
198        };
199        match rule.severity() {
200            Severity::Allow => false,
201            Severity::Warn => true,
202            Severity::Deny => {
203                diag.severity = DiagnosticSeverity::Error;
204                true
205            }
206        }
207    });
208}
209
210#[derive(Debug, Clone, PartialEq, Eq)]
211pub struct AnalysisDiagnostic {
212    pub range: Range,
213    /// Severity, set by the analyser for every diagnostic it
214    /// produces. `lex-lsp` reads this directly when mapping onto LSP
215    /// wire severities; the kind-to-severity mapping that lived in
216    /// `to_lsp_diagnostic` is no longer authoritative.
217    pub severity: DiagnosticSeverity,
218    pub kind: DiagnosticKind,
219    pub message: String,
220}
221
222/// Run the analyser without an extension registry — equivalent to
223/// running with an empty registry. Provided for callers that haven't
224/// adopted the extension system yet.
225pub fn analyze(document: &Document) -> Vec<AnalysisDiagnostic> {
226    let registry = Registry::new();
227    analyze_with_registry(document, &registry)
228}
229
230/// Run the analyser with a populated extension registry. Labels whose
231/// namespace is registered get pre-validated against their schema and,
232/// if pre-validation passes, dispatched to the handler's `on_validate`
233/// hook. Handler-emitted diagnostics are merged into the same stream as
234/// the built-in checks.
235pub fn analyze_with_registry(document: &Document, registry: &Registry) -> Vec<AnalysisDiagnostic> {
236    let mut diagnostics = Vec::new();
237    check_footnotes(document, &mut diagnostics);
238    check_tables(document, &mut diagnostics);
239    check_labels(document, &mut diagnostics);
240    check_unclosed_annotations(document, &mut diagnostics);
241    crate::label_dispatch::dispatch_labels(document, registry, &mut diagnostics);
242    diagnostics
243}
244
245/// Warn on paragraph lines that look like an annotation header but never close
246/// the `:: ::` marker (lex#700). There is no "open form": `:: label` with no
247/// closing `::` is not a recognized element, so the parser keeps it as paragraph
248/// text rather than dropping it. This surfaces that — the author likely meant an
249/// annotation and forgot the trailing `::`.
250fn check_unclosed_annotations(document: &Document, diagnostics: &mut Vec<AnalysisDiagnostic>) {
251    fn emit(
252        tl: &lex_core::lex::ast::elements::paragraph::TextLine,
253        out: &mut Vec<AnalysisDiagnostic>,
254    ) {
255        if looks_like_unclosed_annotation(tl.text()) {
256            out.push(AnalysisDiagnostic {
257                range: tl.location.clone(),
258                severity: DiagnosticSeverity::Warning,
259                kind: DiagnosticKind::UnclosedAnnotation,
260                message: "this line looks like an annotation but has no closing `::`, \
261                          so it is treated as text. Close the marker to make it an \
262                          annotation, e.g. `:: label ::`."
263                    .to_string(),
264            });
265        }
266    }
267
268    fn walk(item: &ContentItem, out: &mut Vec<AnalysisDiagnostic>) {
269        if let ContentItem::Paragraph(p) = item {
270            for line in &p.lines {
271                if let ContentItem::TextLine(tl) = line {
272                    emit(tl, out);
273                }
274            }
275        }
276        if let Some(children) = item.children() {
277            for child in children {
278                walk(child, out);
279            }
280        }
281    }
282
283    for child in &document.root.children {
284        walk(child, diagnostics);
285    }
286}
287
288/// True when a line is shaped like an annotation header (`:: label …`) but has no
289/// closing `::`. Detection is intentionally a lightweight text heuristic — by the
290/// time content reaches the analyser, a *closed* annotation is already its own
291/// node, so any `::`-leading paragraph line is the unclosed shape.
292fn looks_like_unclosed_annotation(text: &str) -> bool {
293    let Some(rest) = text.trim().strip_prefix("::") else {
294        return false;
295    };
296    // A second *structural* `::` means a closed marker — not the unclosed shape.
297    // Scan quote-aware so a `::` inside a quoted parameter value (e.g.
298    // `:: note foo=":: value"`) does not count as a close, matching how the
299    // lexer's structural-marker detection treats it.
300    let mut in_quotes = false;
301    let mut chars = rest.chars().peekable();
302    while let Some(c) = chars.next() {
303        match c {
304            '"' => in_quotes = !in_quotes,
305            ':' if !in_quotes && chars.peek() == Some(&':') => return false,
306            _ => {}
307        }
308    }
309    // Require whitespace after the opening marker, then a label-shaped token
310    // (label.lex: a letter, then letters/digits/`_`/`-`/`.`).
311    let label = rest.trim_start();
312    rest.len() != label.len() && label.chars().next().is_some_and(|c| c.is_alphabetic())
313}
314
315/// Run the analyser with both an extension registry and a
316/// `[diagnostics.rules]` configuration. The configuration is applied
317/// after all checks run, so rule overrides ([`Severity::Allow`] /
318/// [`Severity::Deny`]) take effect uniformly across the diagnostic
319/// stream.
320pub fn analyze_with_rules(
321    document: &Document,
322    registry: &Registry,
323    rules: &DiagnosticsRulesConfig,
324) -> Vec<AnalysisDiagnostic> {
325    let mut diagnostics = analyze_with_registry(document, registry);
326    apply_rules(&mut diagnostics, |code| rules.lookup_by_code(code).cloned());
327    diagnostics
328}
329
330/// Opt-in pass: validate internal cross-references over the (merged)
331/// document and emit a `missing-*-target` diagnostic for each dangling
332/// in-document reference.
333///
334/// **Deliberately separate from [`analyze_with_registry`]** so the
335/// always-on analyser (and thus the LSP, which calls
336/// [`analyze_with_rules`] on every keystroke) does *not* emit these.
337/// `check --references` calls this explicitly; the LSP can opt in later.
338///
339/// Resolution runs over the single merged tree, so it is bidirectional:
340/// a reference resolves against targets defined anywhere in the document
341/// — any included fragment or the master — and a `missing-*` fires only
342/// when the target is absent from the *whole* tree. Each finding's range
343/// carries the reference's origin (via [`extract_references`]), so the
344/// caller blames it on the file the reference was authored in.
345///
346/// Checked kinds and their codes:
347///
348/// - [`ReferenceType::Session`] → `missing-session-target`
349/// - [`ReferenceType::General`] → `missing-definition-target`
350/// - [`ReferenceType::AnnotationReference`] → `missing-annotation-target`
351/// - [`ReferenceType::Citation`] → `missing-citation-target`
352/// - [`ReferenceType::Url`] → `malformed-url` (well-formedness only)
353///
354/// The `Url` arm is *not* a cross-reference check: it validates the URL
355/// is well-formed (a pure, IO-free parse — **no network**, by design;
356/// reachability is out of scope, issue #762). It runs in this pass
357/// because well-formedness is pure and `--references` already gates it.
358///
359/// `ToCome` / `NotSure` are intentional placeholders and never flagged;
360/// `FootnoteNumber` is validated by the always-on analyser
361/// ([`check_footnotes`]); `File` is out of scope here (issue #761). All
362/// emitted diagnostics default to [`DiagnosticSeverity::Warning`] —
363/// callers apply `[diagnostics.rules]` via [`apply_rules`] for per-kind
364/// overrides.
365pub fn analyze_references(document: &Document) -> Vec<AnalysisDiagnostic> {
366    use crate::reference_targets::{targets_from_reference_type, ReferenceTarget};
367    use crate::references::target_resolves;
368
369    let mut diagnostics = Vec::new();
370    crate::utils::for_each_text_content(document, &mut |text| {
371        for reference in extract_references(text) {
372            let (kind, render): (DiagnosticKind, String) = match &reference.reference_type {
373                ReferenceType::Session { target } if !target.trim().is_empty() => (
374                    DiagnosticKind::MissingSessionTarget,
375                    format!(
376                        "Session reference [#{}] has no matching session",
377                        target.trim()
378                    ),
379                ),
380                ReferenceType::General { target } if !target.trim().is_empty() => (
381                    DiagnosticKind::MissingDefinitionTarget,
382                    format!("Reference [{}] has no matching definition", target.trim()),
383                ),
384                ReferenceType::AnnotationReference { label } if !label.trim().is_empty() => (
385                    DiagnosticKind::MissingAnnotationTarget,
386                    format!(
387                        "Annotation reference [::{}] has no matching annotation",
388                        label.trim()
389                    ),
390                ),
391                ReferenceType::Url { target } if !target.trim().is_empty() => {
392                    // URL references are validated for well-formedness
393                    // only — a pure, IO-free parse check (no network: see
394                    // [`url_is_malformed`]). This is self-contained (no
395                    // document resolution), so it emits inline and
396                    // `continue`s like the citation arm rather than
397                    // falling through to the target-resolution tail.
398                    let target = target.trim();
399                    if url_is_malformed(target) {
400                        diagnostics.push(AnalysisDiagnostic {
401                            range: reference.range.clone(),
402                            severity: DiagnosticSeverity::Warning,
403                            kind: DiagnosticKind::MalformedUrl,
404                            message: format!("URL [{target}] is malformed"),
405                        });
406                    }
407                    continue;
408                }
409                ReferenceType::Citation(data) => {
410                    // A citation may carry multiple keys; each is its own
411                    // potential dangling target. Emit per unresolved key.
412                    for key in &data.keys {
413                        if key.trim().is_empty() {
414                            continue;
415                        }
416                        let target = ReferenceTarget::CitationKey(key.trim().to_string());
417                        if !target_resolves(document, &target) {
418                            diagnostics.push(AnalysisDiagnostic {
419                                range: reference.range.clone(),
420                                severity: DiagnosticSeverity::Warning,
421                                kind: DiagnosticKind::MissingCitationTarget,
422                                message: format!(
423                                    "Citation [@{}] has no matching annotation or definition",
424                                    key.trim()
425                                ),
426                            });
427                        }
428                    }
429                    continue;
430                }
431                // Placeholders, footnotes (always-on), URL/File (out of
432                // scope), and empty-target references: skip.
433                _ => continue,
434            };
435
436            // Non-citation kinds: resolve via the reference's targets and
437            // emit when none match anywhere in the merged tree.
438            let resolves = targets_from_reference_type(&reference.reference_type)
439                .iter()
440                .any(|t| target_resolves(document, t));
441            if !resolves {
442                diagnostics.push(AnalysisDiagnostic {
443                    range: reference.range.clone(),
444                    severity: DiagnosticSeverity::Warning,
445                    kind,
446                    message: render,
447                });
448            }
449        }
450    });
451    diagnostics
452}
453
454/// Is `target` a malformed URL? Pure, IO-free well-formedness check —
455/// **never opens a connection**. Classification (`ReferenceType::Url`)
456/// already guarantees one of the `http://` / `https://` / `mailto:`
457/// scheme prefixes, so this catches what classification can't: embedded
458/// spaces, an empty host, and otherwise-unparseable targets.
459///
460/// A bare `url::Url::parse(...).is_err()` is sufficient: under the WHATWG
461/// URL standard the `url` crate implements, the special schemes we
462/// validate (`http`/`https`) require a non-empty host, so a missing host
463/// (`https://`) already parse-fails with `EmptyHost`; `mailto:` is
464/// host-less and parses fine — exactly the behavior we want, with no
465/// scheme-specific host check needed.
466///
467/// A future opt-in `--check-urls-online` would layer network
468/// reachability *on top* of this — deliberately unimplemented here
469/// (issue #762: reachability out of scope).
470fn url_is_malformed(target: &str) -> bool {
471    url::Url::parse(target).is_err()
472}
473
474/// A non-include file-path reference and the range to blame it on.
475///
476/// Produced by [`collect_file_references`] for the opt-in
477/// `check --references` *file-path* pass. The range is origin-stamped
478/// (it comes from the reference's authoring file, via
479/// [`extract_references`] for inline refs or the verbatim node's own
480/// range), so a consumer that resolves `target` relative to that origin
481/// — and blames findings on it — stays origin-faithful across an include
482/// merge.
483#[derive(Debug, Clone)]
484pub struct FileReference {
485    /// The raw path target as authored (`./x.txt`, `../y`, `/abs`).
486    pub target: String,
487    /// Origin-stamped range to resolve against and blame.
488    pub range: Range,
489}
490
491/// Collect every **non-include** file-path reference in the (merged)
492/// `document`: inline [`ReferenceType::File`] (`[./x.txt]`, `[../y]`,
493/// `[/abs]`) and the `src=` parameter of any verbatim block (image,
494/// data, video, …) — `lex.include` excepted (see below).
495///
496/// This is the pure (no-IO) half of the `check --references` file-path
497/// check: it gathers the targets and their origin-stamped ranges; the
498/// caller performs filesystem resolution + existence (which needs a
499/// resolution root and disk access, neither of which belongs in a pure
500/// `&Document` analysis).
501///
502/// `lex.include src=` is intentionally **not** collected: it is an
503/// *annotation*, not a verbatim block, so it never matches the verbatim
504/// `src=` arm — and after include expansion it has been spliced out
505/// entirely (its path already validated by the base command, #759).
506///
507/// Inline refs reuse [`extract_references`], whose ranges are already
508/// origin-stamped (see `inline::ReferenceWalker::make_range`). Verbatim
509/// `src=` carries the verbatim node's own range, which the include
510/// resolver stamps with the authoring file's origin.
511pub fn collect_file_references(document: &Document) -> Vec<FileReference> {
512    use lex_core::lex::ast::traits::AstNode;
513
514    let mut refs = Vec::new();
515
516    // Inline `[./x]` file references — origin-stamped via extract_references.
517    crate::utils::for_each_text_content(document, &mut |text| {
518        for reference in extract_references(text) {
519            if let ReferenceType::File { target } = &reference.reference_type {
520                if !target.trim().is_empty() {
521                    refs.push(FileReference {
522                        target: target.clone(),
523                        range: reference.range.clone(),
524                    });
525                }
526            }
527        }
528    });
529
530    // Any verbatim block's `src=` parameter (image, data, video, …). The
531    // verbatim's own range carries its origin; `lex.include` is an
532    // annotation, not a verbatim block, so it is structurally excluded
533    // here.
534    //
535    // Two normalizations the inline path gets for free but verbatim does
536    // not, because a verbatim `src=` parameter is *not* pre-classified:
537    //
538    // - **Unquote.** `src="./x.png"` stores the raw, still-quoted value
539    //   on `Parameter.value`; we resolve a *path*, so unquote via the
540    //   canonical `Parameter::unquoted_value` (the same path
541    //   `Annotation::include_src` takes) — otherwise existence-checks
542    //   look for a filename that literally includes the quotes.
543    // - **Skip URLs.** The media `src` is documented as "URL or path", so
544    //   `src=https://…/d.png` is a URL, not a local file; checking it on
545    //   disk would be a guaranteed false positive. (Inline refs are
546    //   already classified `Url` vs `File`, so only this arm needs it.)
547    for item in document.root.iter_all_nodes() {
548        if let ContentItem::VerbatimBlock(verbatim) = item {
549            if let Some(param) = verbatim
550                .closing_data
551                .parameters
552                .iter()
553                .find(|p| p.key == "src")
554            {
555                let target = param.unquoted_value();
556                let trimmed = target.trim();
557                if !trimmed.is_empty() && !is_url_like(trimmed) {
558                    refs.push(FileReference {
559                        target: target.clone(),
560                        range: verbatim.range().clone(),
561                    });
562                }
563            }
564        }
565    }
566
567    refs
568}
569
570/// Is `src` a URL rather than a local file path? Mirrors the inline
571/// reference classifier's URL detection (`http://`, `https://`,
572/// `mailto:`) plus a generic `scheme://` catch, so a verbatim
573/// `src=<url>` is excluded from the file-path existence check the same
574/// way an inline `[<url>]` is classified `Url` and skipped.
575///
576/// The generic `scheme://` arm requires a *real* URL scheme rather than
577/// a bare `"://"` substring: the part before `://` must be a valid
578/// RFC 3986 scheme — start with an ASCII letter, then ASCII
579/// alphanumerics / `+` / `-` / `.` — and be at least two characters
580/// long. The length-≥2 floor is the point of the fix: a single-letter
581/// "scheme" is exactly the Windows drive-letter ambiguity, so `C://path`
582/// is *not* treated as a URL (it falls through to be resolved / flagged
583/// as a platform-absolute path), while every real scheme we care about
584/// (`http`, `https`, …) is ≥2 chars and still matches. `mailto:` has no
585/// `//`, so it keeps its own explicit prefix arm.
586fn is_url_like(src: &str) -> bool {
587    src.starts_with("http://")
588        || src.starts_with("https://")
589        || src.starts_with("mailto:")
590        || has_url_scheme(src)
591}
592
593/// Does `src` begin with a genuine `scheme://` (a length-≥2 RFC 3986
594/// scheme), as opposed to a Windows drive path like `C://…`?
595fn has_url_scheme(src: &str) -> bool {
596    let Some((scheme, _)) = src.split_once("://") else {
597        return false;
598    };
599    scheme.len() >= 2
600        && scheme.starts_with(|c: char| c.is_ascii_alphabetic())
601        && scheme
602            .chars()
603            .all(|c| c.is_ascii_alphanumeric() || matches!(c, '+' | '-' | '.'))
604}
605
606/// Walk every label site in the document and re-classify via
607/// [`classify_label`](lex_core::lex::assembling::stages::normalize_labels::classify_label).
608/// Emits diagnostics for sites that strict-mode parsing would have
609/// rejected — `doc.*` (forbidden) and unknown `lex.*` (not a
610/// registered canonical). The LSP-side permissive parse keeps the
611/// AST building so these surface as in-place diagnostics rather than
612/// as a wholesale parse failure.
613fn check_labels(document: &Document, diagnostics: &mut Vec<AnalysisDiagnostic>) {
614    use lex_core::lex::assembling::stages::normalize_labels::{
615        classify_label, RejectReason, Resolution,
616    };
617    use lex_core::lex::ast::Label;
618
619    fn emit(label: &Label, diagnostics: &mut Vec<AnalysisDiagnostic>) {
620        if let Resolution::Rejected(reason) = classify_label(&label.value) {
621            // Reuse the normative wording from `RejectReason::message()`
622            // so the strict-mode parser error and the permissive-mode
623            // analysis diagnostic stay literally identical — no chance
624            // of wording drift between the two surfaces.
625            let message = reason.message();
626            let kind = match reason {
627                RejectReason::Forbidden { .. } => DiagnosticKind::ForbiddenLabelPrefix,
628                RejectReason::UnknownCanonical { .. } => DiagnosticKind::UnknownLexCanonical,
629            };
630            diagnostics.push(AnalysisDiagnostic {
631                range: label.location.clone(),
632                severity: DiagnosticSeverity::Error,
633                kind,
634                message,
635            });
636        }
637    }
638
639    // Unified dispatch: every ContentItem flows through `walk_item`,
640    // which emits the type-specific label sites (annotation label,
641    // verbatim closer label, table cells/footnotes) exactly once and
642    // then defers to `attached_annotations` + `item.children()` for
643    // the uniform recursion. The earlier shape had type-specific
644    // walkers (`walk_annotation`, `walk_verbatim`, `walk_table`) that
645    // descended on their own and then `walk_item` descended again —
646    // duplicate-walk regression caught by Copilot's review on PR 589.
647    fn walk_item(item: &ContentItem, diagnostics: &mut Vec<AnalysisDiagnostic>) {
648        match item {
649            ContentItem::Annotation(a) => emit(&a.data.label, diagnostics),
650            ContentItem::VerbatimBlock(v) => emit(&v.closing_data.label, diagnostics),
651            ContentItem::Table(t) => {
652                for row in t.header_rows.iter().chain(t.body_rows.iter()) {
653                    for cell in &row.cells {
654                        for child in cell.children.iter() {
655                            walk_item(child, diagnostics);
656                        }
657                    }
658                }
659                if let Some(footnotes) = t.footnotes.as_ref() {
660                    for ann in footnotes.annotations() {
661                        walk_annotation(ann, diagnostics);
662                    }
663                    for fn_item in footnotes.items.iter() {
664                        walk_item(fn_item, diagnostics);
665                    }
666                }
667            }
668            _ => {}
669        }
670        // Attached annotations (sessions, paragraphs, lists, list
671        // items, verbatim blocks, tables — see `attached_annotations`).
672        if let Some(attached) = attached_annotations(item) {
673            for annotation in attached {
674                walk_annotation(annotation, diagnostics);
675            }
676        }
677        // Generic child descent. For ContentItem::Annotation,
678        // `item.children()` returns the annotation's body children, so
679        // type-specific walking of nested annotations is not needed.
680        if let Some(children) = item.children() {
681            for child in children {
682                walk_item(child, diagnostics);
683            }
684        }
685    }
686
687    fn walk_annotation(annotation: &Annotation, diagnostics: &mut Vec<AnalysisDiagnostic>) {
688        emit(&annotation.data.label, diagnostics);
689        for child in annotation.children.iter() {
690            walk_item(child, diagnostics);
691        }
692    }
693
694    fn walk_session(session: &Session, diagnostics: &mut Vec<AnalysisDiagnostic>) {
695        for annotation in session.annotations() {
696            walk_annotation(annotation, diagnostics);
697        }
698        for child in &session.children {
699            walk_item(child, diagnostics);
700        }
701    }
702
703    fn attached_annotations(item: &ContentItem) -> Option<&[Annotation]> {
704        match item {
705            ContentItem::Session(s) => Some(s.annotations()),
706            ContentItem::Paragraph(p) => Some(p.annotations()),
707            ContentItem::Definition(d) => Some(d.annotations()),
708            ContentItem::List(l) => Some(l.annotations()),
709            ContentItem::ListItem(li) => Some(li.annotations()),
710            ContentItem::VerbatimBlock(v) => Some(v.annotations()),
711            ContentItem::Table(t) => Some(t.annotations()),
712            _ => None,
713        }
714    }
715
716    // Document-level annotations.
717    for annotation in document.annotations() {
718        walk_annotation(annotation, diagnostics);
719    }
720    // Root session walks.
721    walk_session(&document.root, diagnostics);
722}
723
724fn check_footnotes(document: &Document, diagnostics: &mut Vec<AnalysisDiagnostic>) {
725    // Numbered definitions reachable from outside any table: :: notes ::
726    // annotated lists at document or session scope.
727    let outer_defs: HashSet<u32> = crate::utils::collect_footnote_definitions(document)
728        .into_iter()
729        .filter_map(|(label, _)| label.parse::<u32>().ok())
730        .collect();
731
732    // References outside tables resolve to `outer_defs`; references inside a
733    // table resolve first to that table's own positional footnote list
734    // (`table.footnotes`) and then fall back to `outer_defs`.
735    if let Some(title) = &document.title {
736        check_text(&title.content, &outer_defs, diagnostics);
737    }
738    for annotation in document.annotations() {
739        check_annotation(annotation, &outer_defs, diagnostics);
740    }
741    check_session(&document.root, &outer_defs, diagnostics);
742}
743
744fn check_session(
745    session: &Session,
746    defs: &HashSet<u32>,
747    diagnostics: &mut Vec<AnalysisDiagnostic>,
748) {
749    check_text(&session.title, defs, diagnostics);
750    for annotation in session.annotations() {
751        check_annotation(annotation, defs, diagnostics);
752    }
753    for child in session.children.iter() {
754        check_content(child, defs, diagnostics);
755    }
756}
757
758fn check_content(
759    item: &ContentItem,
760    defs: &HashSet<u32>,
761    diagnostics: &mut Vec<AnalysisDiagnostic>,
762) {
763    match item {
764        ContentItem::Paragraph(p) => {
765            for line in &p.lines {
766                if let ContentItem::TextLine(tl) = line {
767                    check_text(&tl.content, defs, diagnostics);
768                }
769            }
770            for annotation in p.annotations() {
771                check_annotation(annotation, defs, diagnostics);
772            }
773        }
774        ContentItem::Session(s) => check_session(s, defs, diagnostics),
775        ContentItem::List(list) => {
776            for annotation in list.annotations() {
777                check_annotation(annotation, defs, diagnostics);
778            }
779            for entry in &list.items {
780                if let ContentItem::ListItem(li) = entry {
781                    for text in &li.text {
782                        check_text(text, defs, diagnostics);
783                    }
784                    for annotation in li.annotations() {
785                        check_annotation(annotation, defs, diagnostics);
786                    }
787                    for child in li.children.iter() {
788                        check_content(child, defs, diagnostics);
789                    }
790                }
791            }
792        }
793        ContentItem::Definition(def) => {
794            check_text(&def.subject, defs, diagnostics);
795            for annotation in def.annotations() {
796                check_annotation(annotation, defs, diagnostics);
797            }
798            for child in def.children.iter() {
799                check_content(child, defs, diagnostics);
800            }
801        }
802        ContentItem::Annotation(a) => check_annotation(a, defs, diagnostics),
803        ContentItem::VerbatimBlock(v) => {
804            check_text(&v.subject, defs, diagnostics);
805            for annotation in v.annotations() {
806                check_annotation(annotation, defs, diagnostics);
807            }
808        }
809        ContentItem::Table(table) => check_table(table, defs, diagnostics),
810        _ => {}
811    }
812}
813
814fn check_annotation(
815    annotation: &Annotation,
816    defs: &HashSet<u32>,
817    diagnostics: &mut Vec<AnalysisDiagnostic>,
818) {
819    for child in annotation.children.iter() {
820        check_content(child, defs, diagnostics);
821    }
822}
823
824fn check_table(
825    table: &Table,
826    outer_defs: &HashSet<u32>,
827    diagnostics: &mut Vec<AnalysisDiagnostic>,
828) {
829    // Extend the in-scope definitions with the table's positional footnote
830    // list. The table's own numbered items shadow nothing — they just add
831    // table-local numbers that references inside this table may resolve to.
832    // Fast path: most tables have no footnotes, so reuse `outer_defs` rather
833    // than cloning it into a new `HashSet` for every such table.
834    let table_defs = table_footnote_numbers(table);
835    if table_defs.is_empty() {
836        check_table_text(table, outer_defs, diagnostics);
837        return;
838    }
839    let mut scope = outer_defs.clone();
840    scope.extend(table_defs);
841    check_table_text(table, &scope, diagnostics);
842}
843
844fn check_table_text(table: &Table, defs: &HashSet<u32>, diagnostics: &mut Vec<AnalysisDiagnostic>) {
845    check_text(&table.subject, defs, diagnostics);
846    for row in table.all_rows() {
847        for cell in &row.cells {
848            check_text(&cell.content, defs, diagnostics);
849        }
850    }
851    for annotation in table.annotations() {
852        check_annotation(annotation, defs, diagnostics);
853    }
854}
855
856fn table_footnote_numbers(table: &Table) -> HashSet<u32> {
857    let Some(list) = &table.footnotes else {
858        return HashSet::new();
859    };
860    let mut numbers = HashSet::new();
861    for entry in &list.items {
862        if let ContentItem::ListItem(li) = entry {
863            let label = li
864                .marker()
865                .trim()
866                .trim_end_matches(['.', ')', ':'].as_ref())
867                .trim();
868            if let Ok(n) = label.parse::<u32>() {
869                numbers.insert(n);
870            }
871        }
872    }
873    numbers
874}
875
876fn check_text(text: &TextContent, defs: &HashSet<u32>, diagnostics: &mut Vec<AnalysisDiagnostic>) {
877    for reference in extract_references(text) {
878        if let ReferenceType::FootnoteNumber { number } = reference.reference_type {
879            if !defs.contains(&number) {
880                diagnostics.push(AnalysisDiagnostic {
881                    range: reference.range,
882                    severity: DiagnosticSeverity::Error,
883                    kind: DiagnosticKind::MissingFootnoteDefinition,
884                    message: format!(
885                        "Footnote [{number}] has no matching footnote definition in scope"
886                    ),
887                });
888            }
889        }
890    }
891}
892
893fn check_tables(document: &Document, diagnostics: &mut Vec<AnalysisDiagnostic>) {
894    visit_tables_in_session(&document.root, diagnostics);
895}
896
897fn visit_tables_in_session(session: &Session, diagnostics: &mut Vec<AnalysisDiagnostic>) {
898    for child in session.children.iter() {
899        visit_tables_in_content(child, diagnostics);
900    }
901}
902
903fn visit_tables_in_content(item: &ContentItem, diagnostics: &mut Vec<AnalysisDiagnostic>) {
904    match item {
905        ContentItem::Table(table) => check_table_columns(table, diagnostics),
906        ContentItem::Session(session) => visit_tables_in_session(session, diagnostics),
907        ContentItem::Definition(def) => {
908            for child in def.children.iter() {
909                visit_tables_in_content(child, diagnostics);
910            }
911        }
912        ContentItem::List(list) => {
913            for entry in &list.items {
914                if let ContentItem::ListItem(li) = entry {
915                    for child in li.children.iter() {
916                        visit_tables_in_content(child, diagnostics);
917                    }
918                }
919            }
920        }
921        ContentItem::Annotation(ann) => {
922            for child in ann.children.iter() {
923                visit_tables_in_content(child, diagnostics);
924            }
925        }
926        _ => {}
927    }
928}
929
930/// Check that all rows in a table have the same effective column count.
931///
932/// The effective width of a row accounts for both colspans of its own cells
933/// and rowspan carry-over from cells in prior rows that extend into it.
934/// Rows with different effective widths indicate a structural error (missing
935/// or extra cells).
936fn check_table_columns(table: &Table, diagnostics: &mut Vec<AnalysisDiagnostic>) {
937    let rows: Vec<_> = table.all_rows().collect();
938    if rows.len() < 2 {
939        return;
940    }
941
942    let widths = compute_row_widths(&rows);
943    let expected = widths[0];
944    for (i, &width) in widths.iter().enumerate().skip(1) {
945        if width != expected {
946            diagnostics.push(AnalysisDiagnostic {
947                range: rows[i].location.clone(),
948                severity: DiagnosticSeverity::Warning,
949                kind: DiagnosticKind::TableInconsistentColumns,
950                message: format!(
951                    "Row has {width} columns, expected {expected} (matching first row)"
952                ),
953            });
954        }
955    }
956}
957
958/// Simulate the virtual table grid to compute each row's effective width.
959///
960/// `carry[col]` tracks how many more rows (including the current one) a cell
961/// placed in a prior row still occupies column `col`. Own cells skip columns
962/// where `carry[col] > 0` (those are held by a cell from above via rowspan).
963fn compute_row_widths(rows: &[&TableRow]) -> Vec<usize> {
964    let mut carry: Vec<usize> = Vec::new();
965    let mut widths = Vec::with_capacity(rows.len());
966
967    for row in rows {
968        let mut col = 0;
969        for cell in &row.cells {
970            while col < carry.len() && carry[col] > 0 {
971                col += 1;
972            }
973            let end = col + cell.colspan;
974            if end > carry.len() {
975                carry.resize(end, 0);
976            }
977            for slot in carry.iter_mut().take(end).skip(col) {
978                *slot = cell.rowspan;
979            }
980            col = end;
981        }
982
983        let width = carry
984            .iter()
985            .rposition(|&r| r > 0)
986            .map(|i| i + 1)
987            .unwrap_or(0);
988        widths.push(width);
989
990        // Columns at or beyond `width` are guaranteed 0 (that's how width is
991        // defined), so limit the decrement to the active range and drop the
992        // trailing zeros to keep `carry` proportional to the live grid.
993        for c in carry.iter_mut().take(width) {
994            if *c > 0 {
995                *c -= 1;
996            }
997        }
998        carry.truncate(width);
999    }
1000
1001    widths
1002}
1003
1004#[cfg(test)]
1005mod tests {
1006    use super::*;
1007    use lex_core::lex::parsing::parse_document_permissive;
1008    use lex_core::lex::testing::lexplore::Lexplore;
1009
1010    fn unclosed_annotation_diags(source: &str) -> Vec<AnalysisDiagnostic> {
1011        let doc = parse_document_permissive(source).expect("permissive parse");
1012        analyze(&doc)
1013            .into_iter()
1014            .filter(|d| d.kind == DiagnosticKind::UnclosedAnnotation)
1015            .collect()
1016    }
1017
1018    #[test]
1019    fn unclosed_annotation_warns_on_open_form() {
1020        // `:: note severity=high` (no closing `::`) parses as a paragraph; the
1021        // analyser flags it so the author knows it isn't an annotation (lex#700).
1022        let diags = unclosed_annotation_diags("Open form:\n\t:: note severity=high\n");
1023        assert_eq!(diags.len(), 1, "expected one unclosed-annotation warning");
1024        assert_eq!(diags[0].severity, DiagnosticSeverity::Warning);
1025        assert_eq!(diags[0].kind.code(), "unclosed-annotation");
1026    }
1027
1028    #[test]
1029    fn unclosed_annotation_silent_on_closed_form_and_prose() {
1030        // A properly closed annotation is its own node, not a flagged paragraph.
1031        assert!(unclosed_annotation_diags(":: note severity=high ::\n\nBody.\n").is_empty());
1032        // Prose that merely mentions `::` is not flagged.
1033        assert!(unclosed_annotation_diags("Use :: to start a marker.\n").is_empty());
1034    }
1035
1036    #[test]
1037    fn looks_like_unclosed_annotation_heuristic() {
1038        assert!(looks_like_unclosed_annotation(":: note"));
1039        assert!(looks_like_unclosed_annotation("    :: note severity=high"));
1040        // A `::` inside a quoted value is not a structural close, so this is still
1041        // an unclosed annotation (lex#704 review).
1042        assert!(looks_like_unclosed_annotation(":: note foo=\":: value\""));
1043        assert!(!looks_like_unclosed_annotation(":: note ::"));
1044        assert!(!looks_like_unclosed_annotation(
1045            ":: note foo=\":: value\" ::"
1046        )); // real close
1047        assert!(!looks_like_unclosed_annotation("::note")); // no whitespace after marker
1048        assert!(!looks_like_unclosed_annotation("::")); // no label
1049        assert!(!looks_like_unclosed_annotation("just prose"));
1050    }
1051
1052    fn footnote_diags(doc: &Document) -> Vec<AnalysisDiagnostic> {
1053        analyze(doc)
1054            .into_iter()
1055            .filter(|d| d.kind == DiagnosticKind::MissingFootnoteDefinition)
1056            .collect()
1057    }
1058
1059    fn label_diags(source: &str) -> Vec<AnalysisDiagnostic> {
1060        let doc = parse_document_permissive(source).expect("permissive parse");
1061        analyze(&doc)
1062            .into_iter()
1063            .filter(|d| {
1064                matches!(
1065                    d.kind,
1066                    DiagnosticKind::ForbiddenLabelPrefix | DiagnosticKind::UnknownLexCanonical
1067                )
1068            })
1069            .collect()
1070    }
1071
1072    #[test]
1073    fn check_labels_emits_for_doc_prefix() {
1074        let diags = label_diags(":: doc.table :: x\n\nBody.\n");
1075        assert_eq!(diags.len(), 1, "expected 1 forbidden-prefix diagnostic");
1076        assert_eq!(diags[0].kind, DiagnosticKind::ForbiddenLabelPrefix);
1077        assert_eq!(diags[0].severity, DiagnosticSeverity::Error);
1078        assert!(
1079            diags[0].message.contains("doc.table") && diags[0].message.contains("reserved"),
1080            "message names the offending prefix; got: {}",
1081            diags[0].message
1082        );
1083    }
1084
1085    #[test]
1086    fn check_labels_emits_for_unknown_lex_canonical() {
1087        let diags = label_diags(":: lex.foobar :: x\n\nBody.\n");
1088        assert_eq!(diags.len(), 1, "expected 1 unknown-canonical diagnostic");
1089        assert_eq!(diags[0].kind, DiagnosticKind::UnknownLexCanonical);
1090        assert_eq!(diags[0].severity, DiagnosticSeverity::Error);
1091        assert!(
1092            diags[0].message.contains("lex.foobar"),
1093            "message names the offending label; got: {}",
1094            diags[0].message
1095        );
1096    }
1097
1098    #[test]
1099    fn check_labels_silent_on_accepted_forms() {
1100        // Shortcut, prefix-stripped, canonical, and community labels
1101        // all accept silently — analysis only flags the two reject
1102        // categories from `classify_label`.
1103        let sources = [
1104            ":: author :: Alice\n\nBody.\n",
1105            ":: metadata.author :: Alice\n\nBody.\n",
1106            ":: lex.metadata.author :: Alice\n\nBody.\n",
1107            ":: acme.task :: x\n\nBody.\n",
1108        ];
1109        for src in sources {
1110            let diags = label_diags(src);
1111            assert!(
1112                diags.is_empty(),
1113                "no label diagnostics expected for {src:?}; got {diags:?}"
1114            );
1115        }
1116    }
1117
1118    #[test]
1119    fn check_labels_finds_verbatim_closer_violations() {
1120        let diags =
1121            label_diags("Table:\n    | a | b |\n    |---|---|\n    | 1 | 2 |\n:: doc.table ::\n");
1122        assert_eq!(diags.len(), 1);
1123        assert_eq!(diags[0].kind, DiagnosticKind::ForbiddenLabelPrefix);
1124    }
1125
1126    #[test]
1127    fn check_labels_emits_each_offending_site_exactly_once() {
1128        // Regression for Copilot's PR 589 callout: the earlier
1129        // walker shape descended into a node's children twice (once
1130        // via the type-specific helper, once via the generic
1131        // `walk_item` fallback), which produced duplicate
1132        // diagnostics for any forbidden label nested inside another
1133        // label-bearing site. Three nested + adjacent forbidden
1134        // labels should produce exactly three diagnostics, not six.
1135        let src = ":: doc.outer ::\n    :: doc.inner :: nested body\n\n:: doc.sibling :: x\n";
1136        let diags = label_diags(src);
1137        assert_eq!(
1138            diags.len(),
1139            3,
1140            "exactly one diagnostic per offending site: {diags:?}"
1141        );
1142        for d in &diags {
1143            assert_eq!(d.kind, DiagnosticKind::ForbiddenLabelPrefix);
1144        }
1145    }
1146
1147    #[test]
1148    fn detects_missing_footnote_definition() {
1149        let doc = Lexplore::footnotes(1).parse().unwrap();
1150        let diags = analyze(&doc);
1151        assert_eq!(diags.len(), 1);
1152        assert_eq!(diags[0].kind, DiagnosticKind::MissingFootnoteDefinition);
1153    }
1154
1155    #[test]
1156    fn ignores_valid_footnote_with_notes_annotation() {
1157        // :: notes :: annotated list at the document root provides the definitions
1158        let doc = Lexplore::footnotes(2).parse().unwrap();
1159        assert!(footnote_diags(&doc).is_empty());
1160    }
1161
1162    #[test]
1163    fn ignores_valid_list_footnote_in_session() {
1164        // :: notes :: inside a session
1165        let doc = Lexplore::footnotes(3).parse().unwrap();
1166        assert!(footnote_diags(&doc).is_empty());
1167    }
1168
1169    #[test]
1170    fn list_without_notes_annotation_is_not_footnotes() {
1171        // A "Notes" session without :: notes :: does NOT define footnotes
1172        let doc = Lexplore::footnotes(4).parse().unwrap();
1173        assert_eq!(footnote_diags(&doc).len(), 1);
1174    }
1175
1176    fn table_diags(doc: &Document) -> Vec<AnalysisDiagnostic> {
1177        analyze(doc)
1178            .into_iter()
1179            .filter(|d| d.kind == DiagnosticKind::TableInconsistentColumns)
1180            .collect()
1181    }
1182
1183    #[test]
1184    fn detects_inconsistent_table_columns() {
1185        // table-13: 3-col header, 2-col row, 3-col row — middle row is short.
1186        let doc = Lexplore::table(13).parse().unwrap();
1187        let diags = table_diags(&doc);
1188        assert_eq!(diags.len(), 1);
1189        assert!(diags[0].message.contains("2 columns"));
1190        assert!(diags[0].message.contains("expected 3"));
1191    }
1192
1193    #[test]
1194    fn consistent_table_no_diagnostic() {
1195        // table-01: minimal 2-column table, all rows consistent.
1196        let doc = Lexplore::table(1).parse().unwrap();
1197        assert!(table_diags(&doc).is_empty());
1198    }
1199
1200    #[test]
1201    fn table_with_rowspan_counts_carry_over() {
1202        // table-17: rowspan via ^^ — effective widths remain consistent across rows.
1203        let doc = Lexplore::table(17).parse().unwrap();
1204        let diags = table_diags(&doc);
1205        assert!(
1206            diags.is_empty(),
1207            "rowspan carry-over should not trigger inconsistent-columns, got: {diags:?}"
1208        );
1209    }
1210
1211    #[test]
1212    fn table_with_colspan_and_rowspan_mixed() {
1213        // table-18: combined >> colspan and ^^ rowspan; effective widths stay consistent.
1214        let doc = Lexplore::table(18).parse().unwrap();
1215        let diags = table_diags(&doc);
1216        assert!(
1217            diags.is_empty(),
1218            "mixed colspan/rowspan should not trigger inconsistent-columns, got: {diags:?}"
1219        );
1220    }
1221
1222    #[test]
1223    fn table_with_colspan_counts_effective_width() {
1224        // table-04: colspan via >> contributes to effective width; all rows consistent.
1225        let doc = Lexplore::table(4).parse().unwrap();
1226        assert!(table_diags(&doc).is_empty());
1227    }
1228
1229    #[test]
1230    fn footnote_ref_in_table_cell_is_checked() {
1231        // footnotes-09: table cell contains [1] but no footnote definition
1232        // anywhere in scope — document, session, or table-local.
1233        let doc = Lexplore::footnotes(9).parse().unwrap();
1234        let diags = footnote_diags(&doc);
1235        assert_eq!(diags.len(), 1);
1236        assert!(diags[0].message.contains("[1]"));
1237    }
1238
1239    #[test]
1240    fn table_scoped_footnotes_resolve_cell_refs() {
1241        // footnotes-11: cell refs [1] and [2] resolve to the table's own
1242        // positional footnote list (no :: notes :: annotation needed).
1243        let doc = Lexplore::footnotes(11).parse().unwrap();
1244        let diags = footnote_diags(&doc);
1245        assert!(
1246            diags.is_empty(),
1247            "table-scoped cell refs should resolve to table.footnotes, got: {diags:?}"
1248        );
1249    }
1250
1251    #[test]
1252    fn table_scoped_footnotes_do_not_leak_out() {
1253        // footnotes-12: a [1] ref in body text outside the table must NOT
1254        // resolve to the table's own positional footnote list even when the
1255        // numbers happen to match. The table's list is table-local.
1256        let doc = Lexplore::footnotes(12).parse().unwrap();
1257        let diags = footnote_diags(&doc);
1258        assert_eq!(
1259            diags.len(),
1260            1,
1261            "only the paragraph ref [1] should be unresolved, got: {diags:?}"
1262        );
1263        assert!(diags[0].message.contains("[1]"));
1264    }
1265
1266    // ─────────────── apply_rules / DiagnosticKind::code ───────────────
1267
1268    fn dummy_diag(kind: DiagnosticKind, severity: DiagnosticSeverity) -> AnalysisDiagnostic {
1269        AnalysisDiagnostic {
1270            range: Range::default(),
1271            severity,
1272            kind,
1273            message: "test".into(),
1274        }
1275    }
1276
1277    #[test]
1278    fn diagnostic_kind_code_matches_lookup_for_every_builtin() {
1279        // Drift test: every built-in DiagnosticKind variant must have a
1280        // matching entry in DiagnosticsRulesConfig::lookup_by_code so
1281        // configuration overrides reach every rule.
1282        let rules = DiagnosticsRulesConfig::default();
1283        for kind in [
1284            DiagnosticKind::MissingFootnoteDefinition,
1285            DiagnosticKind::UnusedFootnoteDefinition,
1286            DiagnosticKind::TableInconsistentColumns,
1287            DiagnosticKind::ForbiddenLabelPrefix,
1288            DiagnosticKind::UnknownLexCanonical,
1289            DiagnosticKind::SchemaValidation(SchemaValidationKind::UnknownLabel),
1290            DiagnosticKind::SchemaValidation(SchemaValidationKind::MissingParam),
1291            DiagnosticKind::SchemaValidation(SchemaValidationKind::ParamTypeMismatch),
1292            DiagnosticKind::SchemaValidation(SchemaValidationKind::BadAttachment),
1293            DiagnosticKind::SchemaValidation(SchemaValidationKind::BodyShapeMismatch),
1294        ] {
1295            let code = kind.code();
1296            assert!(
1297                rules.lookup_by_code(&code).is_some(),
1298                "DiagnosticsRulesConfig is missing a field for built-in code {code:?} \
1299                 — add it to lookup_by_code (and likely as a struct field too)"
1300            );
1301        }
1302    }
1303
1304    #[test]
1305    fn handler_code_carries_namespace_prefix() {
1306        // Wire-shape contract (spec §9): the wire `code` is the
1307        // namespace-prefixed form so a `.lex.toml` rule like
1308        // `"acme.task-stuck" = "deny"` actually matches what the
1309        // handler emitted. The handler supplies the bare leaf (`code`
1310        // field on `Diagnostic`); the analyser glues on the namespace.
1311        let with_code = DiagnosticKind::Handler {
1312            namespace: "acme".into(),
1313            code: Some("task-stuck".into()),
1314        };
1315        assert_eq!(with_code.code(), "acme.task-stuck");
1316        // Code-less handler diagnostic gets a per-namespace fallback
1317        // — users can target it as `"acme.diagnostic" = "warn"` rather
1318        // than a single global literal.
1319        let without_code = DiagnosticKind::Handler {
1320            namespace: "acme".into(),
1321            code: None,
1322        };
1323        assert_eq!(without_code.code(), "acme.diagnostic");
1324    }
1325
1326    #[test]
1327    fn apply_rules_matches_extension_code_via_side_channel() {
1328        // End-to-end: handler emits `acme.foo`, user configured
1329        // `"acme.foo" = "allow"` in `[diagnostics.rules]` (now
1330        // captured into the LSP's `extension_diagnostic_rules`
1331        // side-channel by the `on_unknown_key` callback rather than
1332        // landing in a `#[serde(flatten)] extra` map); diagnostic
1333        // gets dropped.
1334        use std::collections::BTreeMap;
1335        // The closure mirrors `LoadedLexConfig::lookup_diagnostic_rule`:
1336        // built-in first, side-channel second.
1337        let lookup = |code: &str, side: &BTreeMap<String, lex_config::RuleConfig>| {
1338            DiagnosticsRulesConfig::default()
1339                .lookup_by_code(code)
1340                .cloned()
1341                .or_else(|| side.get(code).cloned())
1342        };
1343
1344        let side: BTreeMap<String, lex_config::RuleConfig> = [(
1345            "acme.foo".to_string(),
1346            lex_config::RuleConfig::Bare(Severity::Allow),
1347        )]
1348        .into_iter()
1349        .collect();
1350        let mut diags = vec![dummy_diag(
1351            DiagnosticKind::Handler {
1352                namespace: "acme".into(),
1353                code: Some("foo".into()),
1354            },
1355            DiagnosticSeverity::Error,
1356        )];
1357        apply_rules(&mut diags, |code| lookup(code, &side));
1358        assert!(diags.is_empty(), "allow drops the extension diagnostic");
1359
1360        // `warn` keeps the intrinsic severity (Error stays Error).
1361        let side: BTreeMap<String, lex_config::RuleConfig> = [(
1362            "acme.foo".to_string(),
1363            lex_config::RuleConfig::Bare(Severity::Warn),
1364        )]
1365        .into_iter()
1366        .collect();
1367        let mut diags = vec![dummy_diag(
1368            DiagnosticKind::Handler {
1369                namespace: "acme".into(),
1370                code: Some("foo".into()),
1371            },
1372            DiagnosticSeverity::Error,
1373        )];
1374        apply_rules(&mut diags, |code| lookup(code, &side));
1375        assert_eq!(diags.len(), 1);
1376        assert_eq!(
1377            diags[0].severity,
1378            DiagnosticSeverity::Error,
1379            "warn preserves the handler's intrinsic severity"
1380        );
1381
1382        // `deny` is a no-op when the intrinsic is already Error, but
1383        // still keeps the diagnostic — symmetry with built-ins.
1384        let side: BTreeMap<String, lex_config::RuleConfig> = [(
1385            "acme.foo".to_string(),
1386            lex_config::RuleConfig::Bare(Severity::Deny),
1387        )]
1388        .into_iter()
1389        .collect();
1390        let mut diags = vec![dummy_diag(
1391            DiagnosticKind::Handler {
1392                namespace: "acme".into(),
1393                code: Some("foo".into()),
1394            },
1395            DiagnosticSeverity::Error,
1396        )];
1397        apply_rules(&mut diags, |code| lookup(code, &side));
1398        assert_eq!(diags.len(), 1);
1399        assert_eq!(diags[0].severity, DiagnosticSeverity::Error);
1400
1401        // A configured rule whose code doesn't match the emitted one
1402        // passes the diagnostic through untouched.
1403        let side: BTreeMap<String, lex_config::RuleConfig> = [(
1404            "acme.other".to_string(),
1405            lex_config::RuleConfig::Bare(Severity::Allow),
1406        )]
1407        .into_iter()
1408        .collect();
1409        let mut diags = vec![dummy_diag(
1410            DiagnosticKind::Handler {
1411                namespace: "acme".into(),
1412                code: Some("foo".into()),
1413            },
1414            DiagnosticSeverity::Warning,
1415        )];
1416        apply_rules(&mut diags, |code| lookup(code, &side));
1417        assert_eq!(diags.len(), 1);
1418        assert_eq!(diags[0].severity, DiagnosticSeverity::Warning);
1419    }
1420
1421    #[test]
1422    fn apply_rules_allow_drops_diagnostic() {
1423        let mut diags = vec![dummy_diag(
1424            DiagnosticKind::MissingFootnoteDefinition,
1425            DiagnosticSeverity::Error,
1426        )];
1427        let rules = DiagnosticsRulesConfig {
1428            missing_footnote: lex_config::RuleConfig::Bare(Severity::Allow),
1429            ..Default::default()
1430        };
1431        apply_rules(&mut diags, |code| rules.lookup_by_code(code).cloned());
1432        assert!(diags.is_empty(), "allow should drop the diagnostic");
1433    }
1434
1435    #[test]
1436    fn apply_rules_deny_upgrades_to_error() {
1437        let mut diags = vec![dummy_diag(
1438            DiagnosticKind::TableInconsistentColumns,
1439            DiagnosticSeverity::Warning,
1440        )];
1441        let rules = DiagnosticsRulesConfig {
1442            table_inconsistent_columns: lex_config::RuleConfig::Bare(Severity::Deny),
1443            ..Default::default()
1444        };
1445        apply_rules(&mut diags, |code| rules.lookup_by_code(code).cloned());
1446        assert_eq!(diags.len(), 1);
1447        assert_eq!(diags[0].severity, DiagnosticSeverity::Error);
1448    }
1449
1450    #[test]
1451    fn apply_rules_warn_keeps_intrinsic_severity() {
1452        let mut diags = vec![dummy_diag(
1453            DiagnosticKind::TableInconsistentColumns,
1454            DiagnosticSeverity::Warning,
1455        )];
1456        let rules = DiagnosticsRulesConfig {
1457            table_inconsistent_columns: lex_config::RuleConfig::Bare(Severity::Warn),
1458            ..Default::default()
1459        };
1460        apply_rules(&mut diags, |code| rules.lookup_by_code(code).cloned());
1461        assert_eq!(diags.len(), 1);
1462        assert_eq!(
1463            diags[0].severity,
1464            DiagnosticSeverity::Warning,
1465            "warn should not change the intrinsic severity"
1466        );
1467    }
1468
1469    #[test]
1470    fn apply_rules_unknown_code_is_passthrough() {
1471        // An extension-emitted diagnostic with a code the registry
1472        // does not know about must pass through unmodified. The
1473        // handler's `code` is the bare leaf — the analyser glues on
1474        // `acme.` to produce wire `acme.unknown`.
1475        let mut diags = vec![dummy_diag(
1476            DiagnosticKind::Handler {
1477                namespace: "acme".into(),
1478                code: Some("unknown".into()),
1479            },
1480            DiagnosticSeverity::Warning,
1481        )];
1482        let rules = DiagnosticsRulesConfig::default();
1483        apply_rules(&mut diags, |code| rules.lookup_by_code(code).cloned());
1484        assert_eq!(diags.len(), 1, "unknown codes should pass through");
1485        assert_eq!(diags[0].severity, DiagnosticSeverity::Warning);
1486    }
1487
1488    #[test]
1489    fn apply_rules_preserves_order_of_kept_diagnostics() {
1490        // Mixed stream: one to drop, one to keep, one to upgrade.
1491        let mut diags = vec![
1492            dummy_diag(
1493                DiagnosticKind::MissingFootnoteDefinition,
1494                DiagnosticSeverity::Error,
1495            ),
1496            dummy_diag(
1497                DiagnosticKind::UnusedFootnoteDefinition,
1498                DiagnosticSeverity::Warning,
1499            ),
1500            dummy_diag(
1501                DiagnosticKind::TableInconsistentColumns,
1502                DiagnosticSeverity::Warning,
1503            ),
1504        ];
1505        let rules = DiagnosticsRulesConfig {
1506            missing_footnote: lex_config::RuleConfig::Bare(Severity::Allow),
1507            table_inconsistent_columns: lex_config::RuleConfig::Bare(Severity::Deny),
1508            ..Default::default()
1509        };
1510        apply_rules(&mut diags, |code| rules.lookup_by_code(code).cloned());
1511        assert_eq!(diags.len(), 2);
1512        assert_eq!(diags[0].kind, DiagnosticKind::UnusedFootnoteDefinition);
1513        assert_eq!(diags[0].severity, DiagnosticSeverity::Warning);
1514        assert_eq!(diags[1].kind, DiagnosticKind::TableInconsistentColumns);
1515        assert_eq!(diags[1].severity, DiagnosticSeverity::Error);
1516    }
1517
1518    // ========================================================================
1519    // analyze_references (opt-in `check --references`) unit tests
1520    // ========================================================================
1521
1522    fn reference_diags(source: &str) -> Vec<AnalysisDiagnostic> {
1523        let doc = parse_document_permissive(source).expect("permissive parse");
1524        analyze_references(&doc)
1525    }
1526
1527    fn ref_codes(source: &str) -> Vec<String> {
1528        let mut codes: Vec<String> = reference_diags(source)
1529            .into_iter()
1530            .map(|d| d.kind.code().into_owned())
1531            .collect();
1532        codes.sort();
1533        codes
1534    }
1535
1536    #[test]
1537    fn references_pass_is_not_run_by_the_always_on_analyser() {
1538        // A dangling definition reference produces nothing from `analyze`
1539        // (the always-on path) — only the opt-in pass flags it. This pins
1540        // the separation that keeps the LSP from emitting these unasked.
1541        let doc = parse_document_permissive("Body with a [Dangling] reference.\n")
1542            .expect("permissive parse");
1543        let always_on = analyze(&doc);
1544        assert!(
1545            always_on
1546                .iter()
1547                .all(|d| !d.kind.code().starts_with("missing-")
1548                    || d.kind == DiagnosticKind::MissingFootnoteDefinition),
1549            "always-on analyser must not emit reference-target diagnostics"
1550        );
1551    }
1552
1553    #[test]
1554    fn dangling_definition_reference_flagged() {
1555        let codes = ref_codes("1. Intro\n\n    See [Nope].\n");
1556        assert_eq!(codes, vec!["missing-definition-target"]);
1557    }
1558
1559    #[test]
1560    fn dangling_session_reference_flagged() {
1561        let codes = ref_codes("1. Intro\n\n    See [#9.9].\n");
1562        assert_eq!(codes, vec!["missing-session-target"]);
1563    }
1564
1565    #[test]
1566    fn dangling_annotation_reference_flagged() {
1567        let codes = ref_codes("1. Intro\n\n    See [::ghost].\n");
1568        assert_eq!(codes, vec!["missing-annotation-target"]);
1569    }
1570
1571    #[test]
1572    fn dangling_citation_flagged() {
1573        let codes = ref_codes("1. Intro\n\n    See [@missing2024].\n");
1574        assert_eq!(codes, vec!["missing-citation-target"]);
1575    }
1576
1577    #[test]
1578    fn resolved_references_are_clean() {
1579        // Definition + annotation + session all defined; references to
1580        // each resolve and produce no findings.
1581        let source = ":: mynote ::\n\
1582             \x20   Note body.\n\
1583             \n\
1584             Cache:\n\
1585             \x20   Definition body.\n\
1586             \n\
1587             2. Topic\n\
1588             \n\
1589             \x20   See [Cache] and [::mynote] and [#2].\n";
1590        assert!(
1591            reference_diags(source).is_empty(),
1592            "resolved references must be clean: {:?}",
1593            reference_diags(source)
1594        );
1595    }
1596
1597    #[test]
1598    fn citation_resolves_via_annotation_label() {
1599        // `[@spec]` resolves to a `:: spec ::` annotation (its label is a
1600        // citation key too).
1601        let source = ":: spec ::\n    Body.\n\n1. Intro\n\n    See [@spec].\n";
1602        assert!(reference_diags(source).is_empty());
1603    }
1604
1605    #[test]
1606    fn annotation_matching_is_case_insensitive() {
1607        // `[::MyNote]` resolves to `:: mynote ::` — resolution is
1608        // case-insensitive, mirroring `references::reference_matches`.
1609        let source = ":: mynote ::\n    Body.\n\n1. Intro\n\n    See [::MyNote].\n";
1610        assert!(reference_diags(source).is_empty());
1611    }
1612
1613    #[test]
1614    fn placeholders_never_flagged() {
1615        // `[TK]` / `[TK-id]` and an unclassifiable reference are
1616        // intentional placeholders — never flagged.
1617        assert!(reference_diags("1. Intro\n\n    A [TK] and [TK-later].\n").is_empty());
1618    }
1619
1620    #[test]
1621    fn each_unresolved_citation_key_is_flagged() {
1622        // A multi-key citation flags each unresolved key independently —
1623        // both `@a` and `@b`, not just the first. Exactly two pins the
1624        // per-key behaviour against a regression that reports only one.
1625        let diags = reference_diags("1. Intro\n\n    See [@a; @b].\n");
1626        let citation = diags
1627            .iter()
1628            .filter(|d| d.kind == DiagnosticKind::MissingCitationTarget)
1629            .count();
1630        assert_eq!(
1631            citation, 2,
1632            "both unresolved keys must be flagged: {diags:?}"
1633        );
1634    }
1635
1636    #[test]
1637    fn reference_findings_default_to_warning() {
1638        let diags = reference_diags("1. Intro\n\n    See [Nope].\n");
1639        assert_eq!(diags.len(), 1);
1640        assert_eq!(diags[0].severity, DiagnosticSeverity::Warning);
1641    }
1642
1643    // ========================================================================
1644    // URL well-formedness (issue #762). Validated inside analyze_references;
1645    // pure parse, no network.
1646    // ========================================================================
1647
1648    #[test]
1649    fn malformed_url_embedded_space_flagged() {
1650        // An embedded space makes the URL unparseable.
1651        let codes = ref_codes("1. Intro\n\n    See [https://exa mple.com].\n");
1652        assert_eq!(codes, vec!["malformed-url"]);
1653    }
1654
1655    #[test]
1656    fn malformed_url_empty_host_flagged() {
1657        // `https://` with no host is well-formed-prefix but empty-host.
1658        let codes = ref_codes("1. Intro\n\n    See [https:// ].\n");
1659        assert_eq!(codes, vec!["malformed-url"]);
1660    }
1661
1662    #[test]
1663    fn well_formed_https_url_not_flagged() {
1664        assert!(
1665            reference_diags("1. Intro\n\n    See [https://example.com/path?q=1].\n").is_empty(),
1666            "a well-formed https URL must not be flagged"
1667        );
1668    }
1669
1670    #[test]
1671    fn well_formed_http_url_not_flagged() {
1672        assert!(reference_diags("1. Intro\n\n    See [http://example.com].\n").is_empty());
1673    }
1674
1675    #[test]
1676    fn well_formed_mailto_not_flagged() {
1677        // `mailto:` has no host component — an empty host is expected and
1678        // must not be flagged.
1679        assert!(
1680            reference_diags("1. Intro\n\n    Write [mailto:hi@example.com].\n").is_empty(),
1681            "a well-formed mailto must not be flagged"
1682        );
1683    }
1684
1685    #[test]
1686    fn malformed_url_defaults_to_warning() {
1687        let diags = reference_diags("1. Intro\n\n    See [https://exa mple.com].\n");
1688        assert_eq!(diags.len(), 1);
1689        assert_eq!(diags[0].kind, DiagnosticKind::MalformedUrl);
1690        assert_eq!(diags[0].severity, DiagnosticSeverity::Warning);
1691    }
1692
1693    #[test]
1694    fn url_check_makes_no_network_calls_by_construction() {
1695        // `url_is_malformed` is a pure parse over a string — it borrows no
1696        // socket, client, or runtime, so the default check path cannot
1697        // make a network call. Exercising both a well-formed and a
1698        // malformed URL here documents that the only work done is parsing.
1699        assert!(!url_is_malformed("https://example.com"));
1700        assert!(url_is_malformed("https://exa mple.com"));
1701        assert!(!url_is_malformed("mailto:a@b.com"));
1702    }
1703
1704    // ========================================================================
1705    // collect_file_references (file-path pass, #761) unit tests
1706    // ========================================================================
1707
1708    fn file_ref_targets(source: &str) -> Vec<String> {
1709        let doc = parse_document_permissive(source).expect("permissive parse");
1710        let mut targets: Vec<String> = collect_file_references(&doc)
1711            .into_iter()
1712            .map(|r| r.target)
1713            .collect();
1714        targets.sort();
1715        targets
1716    }
1717
1718    #[test]
1719    fn collects_inline_file_references() {
1720        // The three inline file-reference shapes (`./`, `../`, `/`) are
1721        // all collected; a non-file `[General]` reference is not.
1722        let source = "1. Intro\n\n    See [./a.txt] and [../b] and [/c] but not [Nope].\n";
1723        assert_eq!(
1724            file_ref_targets(source),
1725            vec!["../b".to_string(), "./a.txt".to_string(), "/c".to_string()]
1726        );
1727    }
1728
1729    #[test]
1730    fn collects_verbatim_src_but_not_lex_include() {
1731        // An image verbatim `src=` is collected. `lex.include` is an
1732        // annotation (not a verbatim block) and is structurally excluded
1733        // — collecting it here would double-validate a path the base
1734        // command already checks via expansion.
1735        let source = "Photo:\n    Caption.\n:: image src=./diagram.png ::\n\n";
1736        assert_eq!(file_ref_targets(source), vec!["./diagram.png".to_string()]);
1737    }
1738
1739    #[test]
1740    fn verbatim_src_is_unquoted() {
1741        // A quoted `src="./x.png"` is collected as the bare path, not the
1742        // still-quoted raw value — otherwise the existence check looks for
1743        // a filename that literally includes the quotes.
1744        let source = "Photo:\n    Caption.\n:: image src=\"./diagram.png\" ::\n\n";
1745        assert_eq!(file_ref_targets(source), vec!["./diagram.png".to_string()]);
1746    }
1747
1748    #[test]
1749    fn ignores_url_references() {
1750        // URLs are out of scope for the file-path pass (#762 owns them).
1751        // Inline `[<url>]` is classified `Url` (not `File`); a verbatim
1752        // `src=<url>` is not pre-classified, so the collector filters it.
1753        assert!(file_ref_targets("1. Intro\n\n    See [https://example.com].\n").is_empty());
1754        assert!(file_ref_targets(
1755            "Photo:\n    Caption.\n:: image src=https://example.com/diagram.png ::\n\n"
1756        )
1757        .is_empty());
1758        // Quoted URL form, too.
1759        assert!(file_ref_targets(
1760            "Photo:\n    Caption.\n:: image src=\"https://example.com/diagram.png\" ::\n\n"
1761        )
1762        .is_empty());
1763    }
1764
1765    #[test]
1766    fn is_url_like_matches_real_schemes_not_windows_drives() {
1767        // A genuine `scheme://` URL is URL-like and filtered out.
1768        assert!(is_url_like("https://example.com"));
1769        assert!(is_url_like("http://example.com"));
1770        assert!(is_url_like("mailto:user@example.com"));
1771        // A length-≥2 custom scheme still matches.
1772        assert!(is_url_like("ftp://host/path"));
1773        // A Windows drive path is NOT a URL — its single-letter "scheme"
1774        // is exactly the ambiguity the length-≥2 floor disambiguates.
1775        assert!(!is_url_like("C://path"));
1776        assert!(!is_url_like("C:\\path"));
1777        // A plain relative path is not URL-like.
1778        assert!(!is_url_like("./rel/path"));
1779    }
1780}