Skip to main content

lex_core/lex/
includes.rs

1//! Include resolution for Lex documents.
2//!
3//! This module turns `:: lex.include src="..." ::` annotations into spliced
4//! content from the referenced files. It is *opt-in*: callers that want the
5//! unresolved tree (the formatter, tree-sitter parity, editor tooling that
6//! displays include statements as authored) skip this pass entirely. The
7//! parser itself never touches the filesystem — all I/O goes through the
8//! injected [`Loader`] trait.
9//!
10//! See `comms/specs/proposals/includes.lex` for the full design.
11//!
12//! # Status
13//!
14//! This module is being built up across PRs 3–6:
15//!
16//! - PR 3: skeleton — trait, config, errors, stub.
17//! - PR 4: single-pass splice + container-policy validation +
18//!   doc-title/doc-annotation conversion + origin stamping + root-escape
19//!   check.
20//! - PR 5: recursive resolution into included files + cycle detection
21//!   (chain stack) + depth limit. Each loaded file gets walked in its OWN
22//!   directory, so relative paths inside an included file resolve from
23//!   that file's directory, not the entry's.
24//! - PR 6: origin-aware reference helpers. [`resolve_file_reference`]
25//!   resolves a `ReferenceType::File` target from the authoring file's
26//!   directory using `Range.origin_path`.
27//!   `Document::find_annotation_by_label_in_origin` scopes footnote
28//!   lookups to the file the reference was authored in.
29//! - PR 7 (this PR): [`FsLoader`] — production loader that reads from the
30//!   filesystem with `std::fs::read_to_string`. CLI wires the resolver
31//!   into `lex convert` and `lex inspect` (default-on, opt-out via
32//!   `--no-includes`); `lex format` never expands.
33//!
34//! # Layering
35//!
36//! Of all of lex-core, only [`FsLoader`] references `std::fs`. The
37//! resolver itself does no I/O — it always goes through the [`Loader`]
38//! trait. Callers can swap loaders to keep the resolver sandboxed:
39//!
40//! - The LSP wraps [`FsLoader`] with file-watch invalidation (PR 8).
41//! - WASM builds provide a JS-backed loader instead of [`FsLoader`].
42//! - Tests use [`MemoryLoader`] (gated behind `test-support`).
43//!
44//! For tests, lex-core itself ships [`MemoryLoader`] gated behind the
45//! `test-support` cargo feature. It is not intended for production use.
46
47use crate::lex::assembling::AttachAnnotations;
48use crate::lex::ast::elements::container::GeneralContainer;
49use crate::lex::ast::elements::content_item::ContentItem;
50use crate::lex::ast::elements::paragraph::Paragraph;
51use crate::lex::ast::elements::session::Session;
52use crate::lex::ast::range::Range;
53use crate::lex::ast::Document;
54use crate::lex::transforms::Runnable;
55use std::path::{Path, PathBuf};
56use std::sync::Arc;
57
58/// Configuration for the include resolution pass.
59#[derive(Debug, Clone)]
60pub struct ResolveConfig {
61    /// Directory all include paths resolve under. Any include that
62    /// canonicalizes outside this root is a [`IncludeError::RootEscape`].
63    ///
64    /// Must be an **absolute** path. Lexical normalization treats `.`
65    /// and `..` against an empty buffer as no-ops; passing a relative
66    /// or unnormalized root weakens the root-escape prefix check.
67    /// Callers (CLI, LSP) should canonicalize the root before
68    /// constructing `ResolveConfig`.
69    pub root: PathBuf,
70    /// Maximum include depth. Default 8 (see [`ResolveConfig::DEFAULT_MAX_DEPTH`]).
71    /// Hitting the limit is an error, not a silent truncation.
72    pub max_depth: usize,
73}
74
75impl ResolveConfig {
76    /// Default maximum include depth — enough for any reasonable atomization
77    /// strategy (aggregator → per-chapter → per-section), bounded enough to
78    /// keep the resolver's worst-case work predictable.
79    pub const DEFAULT_MAX_DEPTH: usize = 8;
80
81    /// Construct a config with the given root and default depth.
82    pub fn with_root(root: PathBuf) -> Self {
83        Self {
84            root,
85            max_depth: Self::DEFAULT_MAX_DEPTH,
86        }
87    }
88}
89
90/// A pluggable source-text loader.
91///
92/// Implementations decide where bytes come from (filesystem, in-memory map,
93/// virtual filesystem, content-addressed store, …). lex-core never references
94/// `std::fs` directly through this trait; that keeps the resolver pure and
95/// usable in WASM, sandboxes, and unit tests.
96pub trait Loader {
97    /// Load the source text for `path`. The path is the canonical absolute
98    /// path the resolver decided on after applying the rules in §4 of the
99    /// proposal.
100    fn load(&self, path: &Path) -> Result<String, LoadError>;
101}
102
103/// Errors a [`Loader`] can produce.
104#[derive(Debug, Clone)]
105pub enum LoadError {
106    /// The loader could not find a resource at the given path.
107    NotFound { path: PathBuf },
108    /// Underlying I/O error (or virtual-filesystem equivalent).
109    Io { path: PathBuf, message: String },
110}
111
112impl std::fmt::Display for LoadError {
113    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
114        match self {
115            LoadError::NotFound { path } => write!(f, "include not found: {}", path.display()),
116            LoadError::Io { path, message } => {
117                write!(f, "io error reading {}: {message}", path.display())
118            }
119        }
120    }
121}
122
123impl std::error::Error for LoadError {}
124
125/// Errors the include resolver can produce.
126#[derive(Debug, Clone)]
127pub enum IncludeError {
128    /// An include chain looped back on itself. `chain` is the resolution
129    /// stack at the moment the duplicate `path` was about to be pushed,
130    /// in source-order (entry first, deepest last). `include_site` is the
131    /// range of the offending `lex.include` annotation in its host file —
132    /// useful for diagnostics that highlight the exact line.
133    Cycle {
134        include_site: Range,
135        path: PathBuf,
136        chain: Vec<PathBuf>,
137    },
138    /// The include depth exceeded [`ResolveConfig::max_depth`]. `chain`
139    /// shows the resolution stack at the moment of failure, in source
140    /// order. `include_site` is the range of the offending
141    /// `lex.include` annotation in its host file.
142    DepthExceeded {
143        include_site: Range,
144        limit: usize,
145        chain: Vec<PathBuf>,
146    },
147    /// A path resolved outside the configured [`ResolveConfig::root`].
148    RootEscape { path: PathBuf, root: PathBuf },
149    /// The loader could not find or read the included file. `include_site`
150    /// is the range of the offending `lex.include` annotation in its host
151    /// file, so editors can squiggle the line that asked for the missing
152    /// file rather than the document head.
153    NotFound { include_site: Range, path: PathBuf },
154    /// The loader returned text that the parser rejected.
155    ParseFailed { path: PathBuf, message: String },
156    /// The included file's content is not legal in the include site's
157    /// parent container.
158    ///
159    /// Today this only occurs when an included file has top-level Sessions
160    /// and the include site is inside a `GeneralContainer` (Definition,
161    /// ListItem, or another Annotation's body). The `violation` field
162    /// names the offending content kind (e.g. `"Sessions"`) so future
163    /// container/policy combinations can reuse this variant without a
164    /// breaking change.
165    ContainerPolicy {
166        include_site: Range,
167        container: &'static str,
168        file: PathBuf,
169        violation: &'static str,
170    },
171    /// Loader propagated a non-`NotFound` I/O error.
172    LoaderIo { path: PathBuf, message: String },
173    /// `lex.include` annotation was missing the mandatory `src=` parameter.
174    MissingSrc { include_site: Range },
175}
176
177impl std::fmt::Display for IncludeError {
178    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
179        match self {
180            IncludeError::Cycle { path, chain, .. } => {
181                let chain_display: Vec<String> =
182                    chain.iter().map(|p| p.display().to_string()).collect();
183                write!(
184                    f,
185                    "include cycle: {} (chain: {})",
186                    path.display(),
187                    chain_display.join(" -> ")
188                )
189            }
190            IncludeError::DepthExceeded { limit, chain, .. } => {
191                let chain_display: Vec<String> =
192                    chain.iter().map(|p| p.display().to_string()).collect();
193                write!(
194                    f,
195                    "include depth exceeded limit of {limit} (chain: {})",
196                    chain_display.join(" -> ")
197                )
198            }
199            IncludeError::RootEscape { path, root } => write!(
200                f,
201                "include path {} escapes resolution root {}",
202                path.display(),
203                root.display()
204            ),
205            IncludeError::NotFound { path, .. } => {
206                write!(f, "include not found: {}", path.display())
207            }
208            IncludeError::ParseFailed { path, message } => {
209                write!(f, "failed to parse {}: {message}", path.display())
210            }
211            IncludeError::ContainerPolicy {
212                container,
213                file,
214                violation,
215                ..
216            } => write!(
217                f,
218                "included file {} contains {} but include site is inside {} \
219                 (which does not allow {})",
220                file.display(),
221                violation,
222                container,
223                violation
224            ),
225            IncludeError::LoaderIo { path, message } => {
226                write!(f, "loader error reading {}: {message}", path.display())
227            }
228            IncludeError::MissingSrc { .. } => {
229                write!(f, "lex.include annotation missing required src= parameter")
230            }
231        }
232    }
233}
234
235impl std::error::Error for IncludeError {}
236
237// No `From<LoadError>` impl: `IncludeError::NotFound` carries the include
238// site (the `lex.include` annotation's range), which a loader doesn't know
239// about. Callers map `LoadError` explicitly at the call site, where the
240// site is available.
241
242/// Which container the include site sits in. Determines the splice-time
243/// policy check (the only one today is "no Sessions in `GeneralContainer`").
244#[derive(Debug, Clone, Copy)]
245enum ContainerKind {
246    /// `Document.root.children` or `Session.children` — accepts everything.
247    Session,
248    /// `Definition.children` — `GeneralContainer`.
249    Definition,
250    /// `Annotation.children` — `GeneralContainer`.
251    AnnotationBody,
252    /// `ListItem.children` — `GeneralContainer`.
253    ListItem,
254}
255
256impl ContainerKind {
257    fn name(self) -> &'static str {
258        match self {
259            ContainerKind::Session => "Session",
260            ContainerKind::Definition => "Definition",
261            ContainerKind::AnnotationBody => "Annotation body",
262            ContainerKind::ListItem => "ListItem",
263        }
264    }
265
266    fn allows_sessions(self) -> bool {
267        matches!(self, ContainerKind::Session)
268    }
269}
270
271/// Resolve `:: lex.include ::` annotations starting from `source`, recursively.
272///
273/// `source_path` identifies the entry-point file. It is used to (a) resolve
274/// relative include paths against the entry file's directory, (b) stamp
275/// `Range.origin_path` on every node so downstream code (file-ref resolution,
276/// diagnostics, LSP goto) can report locations against the authoring file,
277/// and (c) seed the cycle-detection chain so an include cycle that loops
278/// back to the entry is caught. When `None`, relative paths resolve against
279/// `config.root`, origin stamping is skipped on the entry, and the chain
280/// starts empty.
281///
282/// # Pre/post-attachment
283///
284/// Internally this re-parses each source (entry + every loaded file) *without*
285/// annotation attachment so `lex.include` annotations are visible as standalone
286/// children where the splice can replace them in-place. After all splices,
287/// [`AttachAnnotations`] runs once on the merged tree, which lands the include
288/// annotation on the first spliced node by the standard "attach to next
289/// sibling" rule. This matches the textual paste mental model from the proposal.
290///
291/// # Recursion
292///
293/// Each loaded file is fully resolved (its own includes replaced) *before*
294/// being spliced into the host. The recursion uses each file's own directory
295/// as `host_dir`, so a relative path inside an included file resolves from
296/// that file's location — not the entry's. An active-chain stack of
297/// canonicalized paths gates against cycles; the depth counter gates against
298/// pathological nesting (default 8, configurable via [`ResolveConfig::max_depth`]).
299pub fn resolve_from_source(
300    source: &str,
301    source_path: Option<PathBuf>,
302    config: &ResolveConfig,
303    loader: &dyn Loader,
304) -> Result<Document, IncludeError> {
305    let entry_origin = source_path.as_ref().map(|p| Arc::new(p.clone()));
306    let host_dir = source_path
307        .as_ref()
308        .and_then(|p| p.parent().map(Path::to_path_buf))
309        .unwrap_or_else(|| config.root.clone());
310
311    let mut doc = parse_no_attach(source).map_err(|message| IncludeError::ParseFailed {
312        path: source_path.clone().unwrap_or_default(),
313        message,
314    })?;
315
316    if let Some(origin) = entry_origin.as_ref() {
317        stamp_doc(&mut doc, origin);
318    }
319
320    // Seed the chain with the lexically-normalized entry path (when known)
321    // so an include that loops back to the entry is detected as a cycle.
322    // Normalization here is essential — `target_path` values produced by
323    // `resolve_path` are also lexically normalized, so an unnormalized
324    // entry would never compare equal to its normalized self.
325    let mut chain: Vec<PathBuf> = source_path
326        .as_ref()
327        .map(|p| vec![lexical_normalize(p)])
328        .unwrap_or_default();
329    let mut state = ResolverState {
330        config,
331        loader,
332        chain: &mut chain,
333        depth: 0,
334    };
335
336    splice_in_session_container(doc.root.children.as_mut_vec(), &host_dir, &mut state)?;
337
338    let doc = AttachAnnotations::new()
339        .run(doc)
340        .map_err(|e| IncludeError::ParseFailed {
341            path: source_path.unwrap_or_default(),
342            message: format!("annotation attachment failed: {e}"),
343        })?;
344
345    Ok(doc)
346}
347
348// ============================================================================
349// Splicing
350// ============================================================================
351
352/// Per-resolution state threaded through the recursive walker. Keeps the
353/// signatures of the splice/process functions short and ensures
354/// `chain`/`depth` are updated in lock-step (push/pop, +1/back-out) at
355/// each include site.
356struct ResolverState<'a> {
357    config: &'a ResolveConfig,
358    loader: &'a dyn Loader,
359    /// Active resolution stack: lexically-normalized absolute paths
360    /// currently being resolved. Pushed when we begin loading a file and
361    /// popped when its tree is fully resolved. A push that finds the
362    /// path already on the stack is a cycle.
363    ///
364    /// Normalization (not filesystem canonicalization) is what's used
365    /// here: the resolver never touches `std::fs`, so symlink resolution
366    /// is out. Two paths that lexically refer to the same file (after
367    /// `.`/`..` collapse) compare equal; two paths reaching the same
368    /// inode via different routes do not. For real-FS use cases this is
369    /// fine because `FsLoader` will canonicalize on load before the
370    /// chain comparison sees the path.
371    chain: &'a mut Vec<PathBuf>,
372    /// Number of include hops from the entry point. Each recursion into a
373    /// loaded file increments by 1. Hitting `config.max_depth` is an error.
374    depth: usize,
375}
376
377fn splice_in_session_container(
378    children: &mut Vec<ContentItem>,
379    host_dir: &Path,
380    state: &mut ResolverState<'_>,
381) -> Result<(), IncludeError> {
382    // Post-order: recurse into nested containers first, splice this
383    // container's includes second. The recurse step walks the *original*
384    // tree; the splice step inserts already-fully-resolved content
385    // (recursion happens inside `process_includes`), which is therefore
386    // never re-walked.
387    recurse_into_children(children, host_dir, state)?;
388    process_includes(children, host_dir, state, ContainerKind::Session)
389}
390
391fn splice_in_general_container(
392    container: &mut GeneralContainer,
393    host_dir: &Path,
394    state: &mut ResolverState<'_>,
395    kind: ContainerKind,
396) -> Result<(), IncludeError> {
397    recurse_into_children(container.as_mut_vec(), host_dir, state)?;
398    process_includes(container.as_mut_vec(), host_dir, state, kind)
399}
400
401// Allow &mut Vec because `splice` needs Vec-specific operations.
402#[allow(clippy::ptr_arg)]
403fn process_includes(
404    children: &mut Vec<ContentItem>,
405    host_dir: &Path,
406    state: &mut ResolverState<'_>,
407    kind: ContainerKind,
408) -> Result<(), IncludeError> {
409    // Collect indices of standalone include annotations in this container.
410    let include_indices: Vec<usize> = children
411        .iter()
412        .enumerate()
413        .filter_map(|(i, item)| match item {
414            ContentItem::Annotation(a) if a.is_include() => Some(i),
415            _ => None,
416        })
417        .collect();
418
419    // Process in reverse order so earlier indices stay valid.
420    for i in include_indices.into_iter().rev() {
421        let annotation = match &children[i] {
422            ContentItem::Annotation(a) => a.clone(),
423            _ => unreachable!("index came from include filter"),
424        };
425
426        let splice_items = resolve_one_include(&annotation, host_dir, state, kind)?;
427
428        // Replace the include annotation with the splice content.
429        // The annotation itself stays in the children list immediately
430        // before the splice, so the post-resolution AttachAnnotations
431        // pass moves it onto the first spliced node by the standard
432        // "attach to next sibling" rule.
433        let mut replacement = Vec::with_capacity(splice_items.len() + 1);
434        replacement.push(ContentItem::Annotation(annotation));
435        replacement.extend(splice_items);
436        children.splice(i..=i, replacement);
437    }
438
439    Ok(())
440}
441
442/// Resolve a single include annotation: path → load → parse → recurse →
443/// stamp → policy-check → splice list.
444///
445/// The recursion happens *here*: after parsing the loaded file, we walk
446/// its tree with the loaded file's own directory as `host_dir`, with the
447/// loaded file pushed onto `state.chain` and `state.depth` bumped by 1.
448/// When this call returns, the splice list is fully resolved and ready to
449/// be inserted into the host container.
450fn resolve_one_include(
451    annotation: &crate::lex::ast::elements::annotation::Annotation,
452    host_dir: &Path,
453    state: &mut ResolverState<'_>,
454    parent_kind: ContainerKind,
455) -> Result<Vec<ContentItem>, IncludeError> {
456    let src = annotation
457        .include_src()
458        .ok_or_else(|| IncludeError::MissingSrc {
459            include_site: annotation.location.clone(),
460        })?;
461
462    let target_path = resolve_path(&src, host_dir, &state.config.root)?;
463
464    // Cycle check before load — keep loader free of duplicate work.
465    if state.chain.iter().any(|p| p == &target_path) {
466        return Err(IncludeError::Cycle {
467            include_site: annotation.location.clone(),
468            path: target_path,
469            chain: state.chain.clone(),
470        });
471    }
472
473    // Depth check before recursing into the loaded file. A site that sits
474    // exactly at `max_depth` is fine; a site that would push us *past* it
475    // is the failure case.
476    if state.depth >= state.config.max_depth {
477        return Err(IncludeError::DepthExceeded {
478            include_site: annotation.location.clone(),
479            limit: state.config.max_depth,
480            chain: state.chain.clone(),
481        });
482    }
483
484    let target_source = state.loader.load(&target_path).map_err(|e| match e {
485        LoadError::NotFound { path } => IncludeError::NotFound {
486            include_site: annotation.location.clone(),
487            path,
488        },
489        LoadError::Io { path, message } => IncludeError::LoaderIo { path, message },
490    })?;
491
492    let mut included =
493        parse_no_attach(&target_source).map_err(|message| IncludeError::ParseFailed {
494            path: target_path.clone(),
495            message,
496        })?;
497
498    let target_origin = Arc::new(target_path.clone());
499    stamp_doc(&mut included, &target_origin);
500
501    // Recursively resolve includes inside the loaded file. The host_dir
502    // for that walk is the loaded file's own parent; the chain gains
503    // this path and depth bumps by 1 — both are popped/restored on the
504    // way back so siblings see the same state we got.
505    let included_dir = target_path
506        .parent()
507        .map(Path::to_path_buf)
508        .unwrap_or_else(|| state.config.root.clone());
509
510    state.chain.push(target_path.clone());
511    let saved_depth = state.depth;
512    state.depth = saved_depth + 1;
513    let recurse_result =
514        splice_in_session_container(included.root.children.as_mut_vec(), &included_dir, state);
515    state.depth = saved_depth;
516    state.chain.pop();
517    recurse_result?;
518
519    let splice_items = prepare_splice_list(included);
520    validate_against_kind(
521        &splice_items,
522        parent_kind,
523        &annotation.location,
524        &target_path,
525    )?;
526
527    Ok(splice_items)
528}
529
530#[allow(clippy::ptr_arg)]
531fn recurse_into_children(
532    children: &mut Vec<ContentItem>,
533    host_dir: &Path,
534    state: &mut ResolverState<'_>,
535) -> Result<(), IncludeError> {
536    for item in children.iter_mut() {
537        match item {
538            ContentItem::Session(s) => {
539                splice_in_session_container(s.children.as_mut_vec(), host_dir, state)?;
540            }
541            ContentItem::Definition(d) => {
542                splice_in_general_container(
543                    &mut d.children,
544                    host_dir,
545                    state,
546                    ContainerKind::Definition,
547                )?;
548            }
549            ContentItem::Annotation(a) if !a.is_include() => {
550                splice_in_general_container(
551                    &mut a.children,
552                    host_dir,
553                    state,
554                    ContainerKind::AnnotationBody,
555                )?;
556            }
557            ContentItem::List(l) => {
558                for li in l.items.as_mut_vec().iter_mut() {
559                    if let ContentItem::ListItem(item) = li {
560                        splice_in_general_container(
561                            &mut item.children,
562                            host_dir,
563                            state,
564                            ContainerKind::ListItem,
565                        )?;
566                    }
567                }
568            }
569            _ => {}
570        }
571    }
572    Ok(())
573}
574
575fn prepare_splice_list(mut included: Document) -> Vec<ContentItem> {
576    let mut items: Vec<ContentItem> = Vec::new();
577
578    // Document title → Paragraph, prepended.
579    // Equivalent to what a textual paste would parse (an unindented line
580    // becomes a paragraph in the host's context). Per the revised
581    // spec §5.2 this is "do nothing" semantics — converting matches what
582    // the parser would do if the included source were inlined and reparsed.
583    if let Some(title) = included.title {
584        let location = title.location.clone();
585        let para = Paragraph::from_line(title.as_str().to_string()).at(location);
586        items.push(ContentItem::Paragraph(para));
587    }
588
589    // Document-level annotations → regular annotations, prepended.
590    for ann in included.annotations {
591        items.push(ContentItem::Annotation(ann));
592    }
593
594    // Body of the included document.
595    items.append(included.root.children.as_mut_vec());
596
597    items
598}
599
600fn validate_against_kind(
601    items: &[ContentItem],
602    kind: ContainerKind,
603    site: &Range,
604    file: &Path,
605) -> Result<(), IncludeError> {
606    if kind.allows_sessions() {
607        return Ok(());
608    }
609    if items.iter().any(|i| matches!(i, ContentItem::Session(_))) {
610        return Err(IncludeError::ContainerPolicy {
611            include_site: site.clone(),
612            container: kind.name(),
613            file: file.to_path_buf(),
614            violation: "Sessions",
615        });
616    }
617    Ok(())
618}
619
620// ============================================================================
621// Path resolution
622// ============================================================================
623
624/// Resolve a file-reference target string the same way the include
625/// resolver resolves include paths.
626///
627/// Use this when consuming `ReferenceType::File { target }` (or any other
628/// node-attached path) so that relative paths resolve from the *authoring*
629/// file's directory, not from wherever the merged document happens to be
630/// rooted. Pass `ref_origin` as the [`Range::origin_path`] of the inline's
631/// containing node (or `None` if the node was never stamped — in that case
632/// the path is treated as if authored at the root).
633///
634/// Behaviour matches the include resolver:
635/// - Root-absolute targets (leading `/`) resolve under `root`.
636/// - Other targets resolve relative to `ref_origin`'s parent (or `root`
637///   when `ref_origin` is `None`).
638/// - The result is lexically normalized and checked against `root` —
639///   paths that escape it return `RootEscape`.
640///
641/// This is a sister to the resolver's internal `resolve_path` and shares
642/// the same lexical-normalization caveat: it does not touch the filesystem.
643pub fn resolve_file_reference(
644    target: &str,
645    ref_origin: Option<&Path>,
646    root: &Path,
647) -> Result<PathBuf, IncludeError> {
648    let host_dir: PathBuf = ref_origin
649        .and_then(|p| p.parent())
650        .map(Path::to_path_buf)
651        .unwrap_or_else(|| root.to_path_buf());
652    resolve_path(target, &host_dir, root)
653}
654
655fn resolve_path(src: &str, host_dir: &Path, root: &Path) -> Result<PathBuf, IncludeError> {
656    let candidate = if let Some(rel) = src.strip_prefix('/') {
657        // Root-absolute: leading slash means "from the resolution root".
658        root.join(rel)
659    } else {
660        // Relative: from the host file's directory.
661        host_dir.join(src)
662    };
663    let normalized = lexical_normalize(&candidate);
664    let canonical_root = lexical_normalize(root);
665    if !normalized.starts_with(&canonical_root) {
666        return Err(IncludeError::RootEscape {
667            path: normalized,
668            root: canonical_root,
669        });
670    }
671    Ok(normalized)
672}
673
674/// Lexical (no-filesystem) path normalization: resolve `.` and `..` components.
675///
676/// Filesystem-based canonicalization (`std::fs::canonicalize`) requires the
677/// path to exist, which breaks tests that use [`MemoryLoader`]. The lexical
678/// version is sufficient for include-site path resolution because the
679/// resolver only needs a stable identity for cycle detection and a uniform
680/// shape for the root-escape prefix check.
681///
682/// `..` is collapsed only when the *last* component in the buffer is a
683/// real directory name (`Component::Normal`). When the buffer is empty
684/// or its last component is itself `..` (or a root marker), the new `..`
685/// is *preserved* in the buffer.
686///
687/// This is what defeats `../../etc/passwd` from collapsing to
688/// `etc/passwd` and bypassing the root-escape check — `PathBuf::pop`
689/// would happily strip a `..` (since `Path::new("..").parent()` returns
690/// `Some("")`), silently losing the second `..` and producing a path
691/// that falsely starts with the root prefix. Each unmatched `..` in the
692/// preserved form keeps the normalized path outside any sane root, so
693/// the escape check fires correctly.
694fn lexical_normalize(p: &Path) -> PathBuf {
695    let mut out = PathBuf::new();
696    for c in p.components() {
697        match c {
698            std::path::Component::ParentDir => {
699                let can_pop = matches!(
700                    out.components().next_back(),
701                    Some(std::path::Component::Normal(_))
702                );
703                if can_pop {
704                    out.pop();
705                } else {
706                    out.push("..");
707                }
708            }
709            std::path::Component::CurDir => {}
710            other => out.push(other.as_os_str()),
711        }
712    }
713    out
714}
715
716// ============================================================================
717// Origin stamping
718// ============================================================================
719//
720// Walk every node in a Document and set `Range.origin_path` on each
721// `.location` field. The walk only stamps the *block-level* `.location`
722// fields here; finer-grained inline ranges land in PR 6 when file-ref
723// resolution starts consulting them.
724
725fn stamp_doc(doc: &mut Document, origin: &Arc<PathBuf>) {
726    if let Some(title) = doc.title.as_mut() {
727        title.location.origin_path = Some(Arc::clone(origin));
728    }
729    for ann in doc.annotations.iter_mut() {
730        stamp_annotation(ann, origin);
731    }
732    stamp_session(&mut doc.root, origin);
733}
734
735fn stamp_session(s: &mut Session, origin: &Arc<PathBuf>) {
736    s.location.origin_path = Some(Arc::clone(origin));
737    if let Some(loc) = s.title.location.as_mut() {
738        loc.origin_path = Some(Arc::clone(origin));
739    }
740    for ann in s.annotations.iter_mut() {
741        stamp_annotation(ann, origin);
742    }
743    for item in s.children.as_mut_vec().iter_mut() {
744        stamp_item(item, origin);
745    }
746}
747
748fn stamp_annotation(
749    a: &mut crate::lex::ast::elements::annotation::Annotation,
750    origin: &Arc<PathBuf>,
751) {
752    a.location.origin_path = Some(Arc::clone(origin));
753    a.data.location.origin_path = Some(Arc::clone(origin));
754    for item in a.children.as_mut_vec().iter_mut() {
755        stamp_item(item, origin);
756    }
757}
758
759fn stamp_item(item: &mut ContentItem, origin: &Arc<PathBuf>) {
760    match item {
761        ContentItem::Session(s) => stamp_session(s, origin),
762        ContentItem::Annotation(a) => stamp_annotation(a, origin),
763        ContentItem::Paragraph(p) => {
764            p.location.origin_path = Some(Arc::clone(origin));
765            for ann in p.annotations.iter_mut() {
766                stamp_annotation(ann, origin);
767            }
768            for line in p.lines.iter_mut() {
769                stamp_item(line, origin);
770            }
771        }
772        ContentItem::List(l) => {
773            l.location.origin_path = Some(Arc::clone(origin));
774            for li in l.items.as_mut_vec().iter_mut() {
775                stamp_item(li, origin);
776            }
777        }
778        ContentItem::ListItem(li) => {
779            li.location.origin_path = Some(Arc::clone(origin));
780            for ann in li.annotations.iter_mut() {
781                stamp_annotation(ann, origin);
782            }
783            for child in li.children.as_mut_vec().iter_mut() {
784                stamp_item(child, origin);
785            }
786        }
787        ContentItem::Definition(d) => {
788            d.location.origin_path = Some(Arc::clone(origin));
789            for ann in d.annotations.iter_mut() {
790                stamp_annotation(ann, origin);
791            }
792            for child in d.children.as_mut_vec().iter_mut() {
793                stamp_item(child, origin);
794            }
795        }
796        ContentItem::VerbatimBlock(v) => {
797            v.location.origin_path = Some(Arc::clone(origin));
798        }
799        ContentItem::VerbatimLine(vl) => {
800            vl.location.origin_path = Some(Arc::clone(origin));
801        }
802        ContentItem::Table(t) => {
803            t.location.origin_path = Some(Arc::clone(origin));
804        }
805        ContentItem::TextLine(tl) => {
806            tl.location.origin_path = Some(Arc::clone(origin));
807        }
808        ContentItem::BlankLineGroup(b) => {
809            b.location.origin_path = Some(Arc::clone(origin));
810        }
811    }
812}
813
814// ============================================================================
815// Parser glue
816// ============================================================================
817
818/// Parse `source` into a Document but skip the annotation-attachment stage,
819/// so include annotations are findable in container children lists.
820fn parse_no_attach(source: &str) -> Result<Document, String> {
821    crate::lex::testing::parse_without_annotation_attachment(source)
822}
823
824// ============================================================================
825// Filesystem-backed loader
826// ============================================================================
827
828/// [`Loader`] that reads files from the filesystem with `std::fs::read_to_string`.
829///
830/// This is the production loader used by the CLI; the LSP wraps it with a
831/// file-watch invalidation layer in PR 8. lex-core's *resolver* code does not
832/// reference `std::fs` — `FsLoader` is the one place where it does, isolated
833/// behind the [`Loader`] trait so the rest of the crate stays sandbox- and
834/// WASM-friendly.
835///
836/// `FsLoader` is stateless; construct one at the start of a resolution and
837/// share it for the duration. Errors map cleanly:
838/// - `std::io::ErrorKind::NotFound` → [`LoadError::NotFound`]
839/// - any other I/O error → [`LoadError::Io`]
840pub struct FsLoader;
841
842impl FsLoader {
843    pub fn new() -> Self {
844        Self
845    }
846}
847
848impl Default for FsLoader {
849    fn default() -> Self {
850        Self::new()
851    }
852}
853
854impl Loader for FsLoader {
855    fn load(&self, path: &Path) -> Result<String, LoadError> {
856        std::fs::read_to_string(path).map_err(|e| match e.kind() {
857            std::io::ErrorKind::NotFound => LoadError::NotFound {
858                path: path.to_path_buf(),
859            },
860            _ => LoadError::Io {
861                path: path.to_path_buf(),
862                message: e.to_string(),
863            },
864        })
865    }
866}
867
868// ============================================================================
869// Test fixtures (test-support feature + cfg(test))
870// ============================================================================
871
872/// In-memory [`Loader`] backed by a `HashMap<PathBuf, String>`.
873#[cfg(any(test, feature = "test-support"))]
874pub struct MemoryLoader {
875    files: std::collections::HashMap<PathBuf, String>,
876}
877
878#[cfg(any(test, feature = "test-support"))]
879impl MemoryLoader {
880    /// Create an empty loader. Add files with [`MemoryLoader::insert`].
881    pub fn new() -> Self {
882        Self {
883            files: std::collections::HashMap::new(),
884        }
885    }
886
887    /// Register a file at `path` with the given source text.
888    pub fn insert<P: Into<PathBuf>, S: Into<String>>(&mut self, path: P, contents: S) -> &mut Self {
889        self.files.insert(path.into(), contents.into());
890        self
891    }
892
893    /// Convenience constructor: build a loader from any iterator of
894    /// `(path, contents)` pairs.
895    pub fn from_pairs<I, P, S>(pairs: I) -> Self
896    where
897        I: IntoIterator<Item = (P, S)>,
898        P: Into<PathBuf>,
899        S: Into<String>,
900    {
901        let mut loader = Self::new();
902        for (path, contents) in pairs {
903            loader.insert(path, contents);
904        }
905        loader
906    }
907}
908
909#[cfg(any(test, feature = "test-support"))]
910impl Default for MemoryLoader {
911    fn default() -> Self {
912        Self::new()
913    }
914}
915
916#[cfg(any(test, feature = "test-support"))]
917impl Loader for MemoryLoader {
918    fn load(&self, path: &Path) -> Result<String, LoadError> {
919        self.files
920            .get(path)
921            .cloned()
922            .ok_or_else(|| LoadError::NotFound {
923                path: path.to_path_buf(),
924            })
925    }
926}
927
928// ============================================================================
929// Tests
930// ============================================================================
931
932#[cfg(test)]
933mod tests;