lex_core/lex/includes.rs
1//! Include resolution for Lex documents.
2//!
3//! This module turns `:: lex.include src="..." ::` annotations into spliced
4//! content from the referenced files. It is *opt-in*: callers that want the
5//! unresolved tree (the formatter, tree-sitter parity, editor tooling that
6//! displays include statements as authored) skip this pass entirely. The
7//! parser itself never touches the filesystem — all I/O goes through the
8//! injected [`Loader`] trait.
9//!
10//! See `comms/specs/proposals/includes.lex` for the full design.
11//!
12//! # Status
13//!
14//! This module is being built up across PRs 3–6:
15//!
16//! - PR 3: skeleton — trait, config, errors, stub.
17//! - PR 4: single-pass splice + container-policy validation +
18//! doc-title/doc-annotation conversion + origin stamping + root-escape
19//! check.
20//! - PR 5: recursive resolution into included files + cycle detection
21//! (chain stack) + depth limit. Each loaded file gets walked in its OWN
22//! directory, so relative paths inside an included file resolve from
23//! that file's directory, not the entry's.
24//! - PR 6: origin-aware reference helpers. [`resolve_file_reference`]
25//! resolves a `ReferenceType::File` target from the authoring file's
26//! directory using `Range.origin_path`.
27//! `Document::find_annotation_by_label_in_origin` scopes footnote
28//! lookups to the file the reference was authored in.
29//! - PR 7 (this PR): [`FsLoader`] — production loader that reads from the
30//! filesystem with `std::fs::read_to_string`. CLI wires the resolver
31//! into `lex convert` and `lex inspect` (default-on, opt-out via
32//! `--no-includes`); `lex format` never expands.
33//!
34//! # Layering
35//!
36//! Of all of lex-core, only [`FsLoader`] references `std::fs`. The
37//! resolver itself does no I/O — it always goes through the [`Loader`]
38//! trait. Callers can swap loaders to keep the resolver sandboxed:
39//!
40//! - The LSP wraps [`FsLoader`] with file-watch invalidation (PR 8).
41//! - WASM builds provide a JS-backed loader instead of [`FsLoader`].
42//! - Tests use [`MemoryLoader`] (gated behind `test-support`).
43//!
44//! For tests, lex-core itself ships [`MemoryLoader`] gated behind the
45//! `test-support` cargo feature. It is not intended for production use.
46
47use crate::lex::assembling::AttachAnnotations;
48use crate::lex::ast::elements::container::GeneralContainer;
49use crate::lex::ast::elements::content_item::ContentItem;
50use crate::lex::ast::elements::paragraph::Paragraph;
51use crate::lex::ast::elements::session::Session;
52use crate::lex::ast::range::Range;
53use crate::lex::ast::Document;
54use crate::lex::transforms::Runnable;
55use std::path::{Path, PathBuf};
56use std::sync::Arc;
57
58/// Configuration for the include resolution pass.
59#[derive(Debug, Clone)]
60pub struct ResolveConfig {
61 /// Directory all include paths resolve under. Any include that
62 /// canonicalizes outside this root is a [`IncludeError::RootEscape`].
63 ///
64 /// Must be an **absolute** path. Lexical normalization treats `.`
65 /// and `..` against an empty buffer as no-ops; passing a relative
66 /// or unnormalized root weakens the root-escape prefix check.
67 /// Callers (CLI, LSP) should canonicalize the root before
68 /// constructing `ResolveConfig`.
69 pub root: PathBuf,
70 /// Maximum include depth. Default 8 (see [`ResolveConfig::DEFAULT_MAX_DEPTH`]).
71 /// Hitting the limit is an error, not a silent truncation.
72 pub max_depth: usize,
73}
74
75impl ResolveConfig {
76 /// Default maximum include depth — enough for any reasonable atomization
77 /// strategy (aggregator → per-chapter → per-section), bounded enough to
78 /// keep the resolver's worst-case work predictable.
79 pub const DEFAULT_MAX_DEPTH: usize = 8;
80
81 /// Construct a config with the given root and default depth.
82 pub fn with_root(root: PathBuf) -> Self {
83 Self {
84 root,
85 max_depth: Self::DEFAULT_MAX_DEPTH,
86 }
87 }
88}
89
90/// A pluggable source-text loader.
91///
92/// Implementations decide where bytes come from (filesystem, in-memory map,
93/// virtual filesystem, content-addressed store, …). lex-core never references
94/// `std::fs` directly through this trait; that keeps the resolver pure and
95/// usable in WASM, sandboxes, and unit tests.
96pub trait Loader {
97 /// Load the source text for `path`. The path is the canonical absolute
98 /// path the resolver decided on after applying the rules in §4 of the
99 /// proposal.
100 fn load(&self, path: &Path) -> Result<String, LoadError>;
101}
102
103/// Errors a [`Loader`] can produce.
104#[derive(Debug, Clone)]
105pub enum LoadError {
106 /// The loader could not find a resource at the given path.
107 NotFound { path: PathBuf },
108 /// Underlying I/O error (or virtual-filesystem equivalent).
109 Io { path: PathBuf, message: String },
110}
111
112impl std::fmt::Display for LoadError {
113 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
114 match self {
115 LoadError::NotFound { path } => write!(f, "include not found: {}", path.display()),
116 LoadError::Io { path, message } => {
117 write!(f, "io error reading {}: {message}", path.display())
118 }
119 }
120 }
121}
122
123impl std::error::Error for LoadError {}
124
125/// Errors the include resolver can produce.
126#[derive(Debug, Clone)]
127pub enum IncludeError {
128 /// An include chain looped back on itself. `chain` is the resolution
129 /// stack at the moment the duplicate `path` was about to be pushed,
130 /// in source-order (entry first, deepest last). `include_site` is the
131 /// range of the offending `lex.include` annotation in its host file —
132 /// useful for diagnostics that highlight the exact line.
133 Cycle {
134 include_site: Range,
135 path: PathBuf,
136 chain: Vec<PathBuf>,
137 },
138 /// The include depth exceeded [`ResolveConfig::max_depth`]. `chain`
139 /// shows the resolution stack at the moment of failure, in source
140 /// order. `include_site` is the range of the offending
141 /// `lex.include` annotation in its host file.
142 DepthExceeded {
143 include_site: Range,
144 limit: usize,
145 chain: Vec<PathBuf>,
146 },
147 /// A path resolved outside the configured [`ResolveConfig::root`].
148 RootEscape { path: PathBuf, root: PathBuf },
149 /// The loader could not find or read the included file.
150 NotFound { path: PathBuf },
151 /// The loader returned text that the parser rejected.
152 ParseFailed { path: PathBuf, message: String },
153 /// The included file's content is not legal in the include site's
154 /// parent container.
155 ///
156 /// Today this only occurs when an included file has top-level Sessions
157 /// and the include site is inside a `GeneralContainer` (Definition,
158 /// ListItem, or another Annotation's body). The `violation` field
159 /// names the offending content kind (e.g. `"Sessions"`) so future
160 /// container/policy combinations can reuse this variant without a
161 /// breaking change.
162 ContainerPolicy {
163 include_site: Range,
164 container: &'static str,
165 file: PathBuf,
166 violation: &'static str,
167 },
168 /// Loader propagated a non-`NotFound` I/O error.
169 LoaderIo { path: PathBuf, message: String },
170 /// `lex.include` annotation was missing the mandatory `src=` parameter.
171 MissingSrc { include_site: Range },
172}
173
174impl std::fmt::Display for IncludeError {
175 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
176 match self {
177 IncludeError::Cycle { path, chain, .. } => {
178 let chain_display: Vec<String> =
179 chain.iter().map(|p| p.display().to_string()).collect();
180 write!(
181 f,
182 "include cycle: {} (chain: {})",
183 path.display(),
184 chain_display.join(" -> ")
185 )
186 }
187 IncludeError::DepthExceeded { limit, chain, .. } => {
188 let chain_display: Vec<String> =
189 chain.iter().map(|p| p.display().to_string()).collect();
190 write!(
191 f,
192 "include depth exceeded limit of {limit} (chain: {})",
193 chain_display.join(" -> ")
194 )
195 }
196 IncludeError::RootEscape { path, root } => write!(
197 f,
198 "include path {} escapes resolution root {}",
199 path.display(),
200 root.display()
201 ),
202 IncludeError::NotFound { path } => write!(f, "include not found: {}", path.display()),
203 IncludeError::ParseFailed { path, message } => {
204 write!(f, "failed to parse {}: {message}", path.display())
205 }
206 IncludeError::ContainerPolicy {
207 container,
208 file,
209 violation,
210 ..
211 } => write!(
212 f,
213 "included file {} contains {} but include site is inside {} \
214 (which does not allow {})",
215 file.display(),
216 violation,
217 container,
218 violation
219 ),
220 IncludeError::LoaderIo { path, message } => {
221 write!(f, "loader error reading {}: {message}", path.display())
222 }
223 IncludeError::MissingSrc { .. } => {
224 write!(f, "lex.include annotation missing required src= parameter")
225 }
226 }
227 }
228}
229
230impl std::error::Error for IncludeError {}
231
232impl From<LoadError> for IncludeError {
233 fn from(err: LoadError) -> Self {
234 match err {
235 LoadError::NotFound { path } => IncludeError::NotFound { path },
236 LoadError::Io { path, message } => IncludeError::LoaderIo { path, message },
237 }
238 }
239}
240
241/// Which container the include site sits in. Determines the splice-time
242/// policy check (the only one today is "no Sessions in `GeneralContainer`").
243#[derive(Debug, Clone, Copy)]
244enum ContainerKind {
245 /// `Document.root.children` or `Session.children` — accepts everything.
246 Session,
247 /// `Definition.children` — `GeneralContainer`.
248 Definition,
249 /// `Annotation.children` — `GeneralContainer`.
250 AnnotationBody,
251 /// `ListItem.children` — `GeneralContainer`.
252 ListItem,
253}
254
255impl ContainerKind {
256 fn name(self) -> &'static str {
257 match self {
258 ContainerKind::Session => "Session",
259 ContainerKind::Definition => "Definition",
260 ContainerKind::AnnotationBody => "Annotation body",
261 ContainerKind::ListItem => "ListItem",
262 }
263 }
264
265 fn allows_sessions(self) -> bool {
266 matches!(self, ContainerKind::Session)
267 }
268}
269
270/// Resolve `:: lex.include ::` annotations starting from `source`, recursively.
271///
272/// `source_path` identifies the entry-point file. It is used to (a) resolve
273/// relative include paths against the entry file's directory, (b) stamp
274/// `Range.origin_path` on every node so downstream code (file-ref resolution,
275/// diagnostics, LSP goto) can report locations against the authoring file,
276/// and (c) seed the cycle-detection chain so an include cycle that loops
277/// back to the entry is caught. When `None`, relative paths resolve against
278/// `config.root`, origin stamping is skipped on the entry, and the chain
279/// starts empty.
280///
281/// # Pre/post-attachment
282///
283/// Internally this re-parses each source (entry + every loaded file) *without*
284/// annotation attachment so `lex.include` annotations are visible as standalone
285/// children where the splice can replace them in-place. After all splices,
286/// [`AttachAnnotations`] runs once on the merged tree, which lands the include
287/// annotation on the first spliced node by the standard "attach to next
288/// sibling" rule. This matches the textual paste mental model from the proposal.
289///
290/// # Recursion
291///
292/// Each loaded file is fully resolved (its own includes replaced) *before*
293/// being spliced into the host. The recursion uses each file's own directory
294/// as `host_dir`, so a relative path inside an included file resolves from
295/// that file's location — not the entry's. An active-chain stack of
296/// canonicalized paths gates against cycles; the depth counter gates against
297/// pathological nesting (default 8, configurable via [`ResolveConfig::max_depth`]).
298pub fn resolve_from_source(
299 source: &str,
300 source_path: Option<PathBuf>,
301 config: &ResolveConfig,
302 loader: &dyn Loader,
303) -> Result<Document, IncludeError> {
304 let entry_origin = source_path.as_ref().map(|p| Arc::new(p.clone()));
305 let host_dir = source_path
306 .as_ref()
307 .and_then(|p| p.parent().map(Path::to_path_buf))
308 .unwrap_or_else(|| config.root.clone());
309
310 let mut doc = parse_no_attach(source).map_err(|message| IncludeError::ParseFailed {
311 path: source_path.clone().unwrap_or_default(),
312 message,
313 })?;
314
315 if let Some(origin) = entry_origin.as_ref() {
316 stamp_doc(&mut doc, origin);
317 }
318
319 // Seed the chain with the lexically-normalized entry path (when known)
320 // so an include that loops back to the entry is detected as a cycle.
321 // Normalization here is essential — `target_path` values produced by
322 // `resolve_path` are also lexically normalized, so an unnormalized
323 // entry would never compare equal to its normalized self.
324 let mut chain: Vec<PathBuf> = source_path
325 .as_ref()
326 .map(|p| vec![lexical_normalize(p)])
327 .unwrap_or_default();
328 let mut state = ResolverState {
329 config,
330 loader,
331 chain: &mut chain,
332 depth: 0,
333 };
334
335 splice_in_session_container(doc.root.children.as_mut_vec(), &host_dir, &mut state)?;
336
337 let doc = AttachAnnotations::new()
338 .run(doc)
339 .map_err(|e| IncludeError::ParseFailed {
340 path: source_path.unwrap_or_default(),
341 message: format!("annotation attachment failed: {e}"),
342 })?;
343
344 Ok(doc)
345}
346
347// ============================================================================
348// Splicing
349// ============================================================================
350
351/// Per-resolution state threaded through the recursive walker. Keeps the
352/// signatures of the splice/process functions short and ensures
353/// `chain`/`depth` are updated in lock-step (push/pop, +1/back-out) at
354/// each include site.
355struct ResolverState<'a> {
356 config: &'a ResolveConfig,
357 loader: &'a dyn Loader,
358 /// Active resolution stack: lexically-normalized absolute paths
359 /// currently being resolved. Pushed when we begin loading a file and
360 /// popped when its tree is fully resolved. A push that finds the
361 /// path already on the stack is a cycle.
362 ///
363 /// Normalization (not filesystem canonicalization) is what's used
364 /// here: the resolver never touches `std::fs`, so symlink resolution
365 /// is out. Two paths that lexically refer to the same file (after
366 /// `.`/`..` collapse) compare equal; two paths reaching the same
367 /// inode via different routes do not. For real-FS use cases this is
368 /// fine because `FsLoader` will canonicalize on load before the
369 /// chain comparison sees the path.
370 chain: &'a mut Vec<PathBuf>,
371 /// Number of include hops from the entry point. Each recursion into a
372 /// loaded file increments by 1. Hitting `config.max_depth` is an error.
373 depth: usize,
374}
375
376fn splice_in_session_container(
377 children: &mut Vec<ContentItem>,
378 host_dir: &Path,
379 state: &mut ResolverState<'_>,
380) -> Result<(), IncludeError> {
381 // Post-order: recurse into nested containers first, splice this
382 // container's includes second. The recurse step walks the *original*
383 // tree; the splice step inserts already-fully-resolved content
384 // (recursion happens inside `process_includes`), which is therefore
385 // never re-walked.
386 recurse_into_children(children, host_dir, state)?;
387 process_includes(children, host_dir, state, ContainerKind::Session)
388}
389
390fn splice_in_general_container(
391 container: &mut GeneralContainer,
392 host_dir: &Path,
393 state: &mut ResolverState<'_>,
394 kind: ContainerKind,
395) -> Result<(), IncludeError> {
396 recurse_into_children(container.as_mut_vec(), host_dir, state)?;
397 process_includes(container.as_mut_vec(), host_dir, state, kind)
398}
399
400// Allow &mut Vec because `splice` needs Vec-specific operations.
401#[allow(clippy::ptr_arg)]
402fn process_includes(
403 children: &mut Vec<ContentItem>,
404 host_dir: &Path,
405 state: &mut ResolverState<'_>,
406 kind: ContainerKind,
407) -> Result<(), IncludeError> {
408 // Collect indices of standalone include annotations in this container.
409 let include_indices: Vec<usize> = children
410 .iter()
411 .enumerate()
412 .filter_map(|(i, item)| match item {
413 ContentItem::Annotation(a) if a.is_include() => Some(i),
414 _ => None,
415 })
416 .collect();
417
418 // Process in reverse order so earlier indices stay valid.
419 for i in include_indices.into_iter().rev() {
420 let annotation = match &children[i] {
421 ContentItem::Annotation(a) => a.clone(),
422 _ => unreachable!("index came from include filter"),
423 };
424
425 let splice_items = resolve_one_include(&annotation, host_dir, state, kind)?;
426
427 // Replace the include annotation with the splice content.
428 // The annotation itself stays in the children list immediately
429 // before the splice, so the post-resolution AttachAnnotations
430 // pass moves it onto the first spliced node by the standard
431 // "attach to next sibling" rule.
432 let mut replacement = Vec::with_capacity(splice_items.len() + 1);
433 replacement.push(ContentItem::Annotation(annotation));
434 replacement.extend(splice_items);
435 children.splice(i..=i, replacement);
436 }
437
438 Ok(())
439}
440
441/// Resolve a single include annotation: path → load → parse → recurse →
442/// stamp → policy-check → splice list.
443///
444/// The recursion happens *here*: after parsing the loaded file, we walk
445/// its tree with the loaded file's own directory as `host_dir`, with the
446/// loaded file pushed onto `state.chain` and `state.depth` bumped by 1.
447/// When this call returns, the splice list is fully resolved and ready to
448/// be inserted into the host container.
449fn resolve_one_include(
450 annotation: &crate::lex::ast::elements::annotation::Annotation,
451 host_dir: &Path,
452 state: &mut ResolverState<'_>,
453 parent_kind: ContainerKind,
454) -> Result<Vec<ContentItem>, IncludeError> {
455 let src = annotation
456 .include_src()
457 .ok_or_else(|| IncludeError::MissingSrc {
458 include_site: annotation.location.clone(),
459 })?;
460
461 let target_path = resolve_path(&src, host_dir, &state.config.root)?;
462
463 // Cycle check before load — keep loader free of duplicate work.
464 if state.chain.iter().any(|p| p == &target_path) {
465 return Err(IncludeError::Cycle {
466 include_site: annotation.location.clone(),
467 path: target_path,
468 chain: state.chain.clone(),
469 });
470 }
471
472 // Depth check before recursing into the loaded file. A site that sits
473 // exactly at `max_depth` is fine; a site that would push us *past* it
474 // is the failure case.
475 if state.depth >= state.config.max_depth {
476 return Err(IncludeError::DepthExceeded {
477 include_site: annotation.location.clone(),
478 limit: state.config.max_depth,
479 chain: state.chain.clone(),
480 });
481 }
482
483 let target_source = state.loader.load(&target_path)?;
484
485 let mut included =
486 parse_no_attach(&target_source).map_err(|message| IncludeError::ParseFailed {
487 path: target_path.clone(),
488 message,
489 })?;
490
491 let target_origin = Arc::new(target_path.clone());
492 stamp_doc(&mut included, &target_origin);
493
494 // Recursively resolve includes inside the loaded file. The host_dir
495 // for that walk is the loaded file's own parent; the chain gains
496 // this path and depth bumps by 1 — both are popped/restored on the
497 // way back so siblings see the same state we got.
498 let included_dir = target_path
499 .parent()
500 .map(Path::to_path_buf)
501 .unwrap_or_else(|| state.config.root.clone());
502
503 state.chain.push(target_path.clone());
504 let saved_depth = state.depth;
505 state.depth = saved_depth + 1;
506 let recurse_result =
507 splice_in_session_container(included.root.children.as_mut_vec(), &included_dir, state);
508 state.depth = saved_depth;
509 state.chain.pop();
510 recurse_result?;
511
512 let splice_items = prepare_splice_list(included);
513 validate_against_kind(
514 &splice_items,
515 parent_kind,
516 &annotation.location,
517 &target_path,
518 )?;
519
520 Ok(splice_items)
521}
522
523#[allow(clippy::ptr_arg)]
524fn recurse_into_children(
525 children: &mut Vec<ContentItem>,
526 host_dir: &Path,
527 state: &mut ResolverState<'_>,
528) -> Result<(), IncludeError> {
529 for item in children.iter_mut() {
530 match item {
531 ContentItem::Session(s) => {
532 splice_in_session_container(s.children.as_mut_vec(), host_dir, state)?;
533 }
534 ContentItem::Definition(d) => {
535 splice_in_general_container(
536 &mut d.children,
537 host_dir,
538 state,
539 ContainerKind::Definition,
540 )?;
541 }
542 ContentItem::Annotation(a) if !a.is_include() => {
543 splice_in_general_container(
544 &mut a.children,
545 host_dir,
546 state,
547 ContainerKind::AnnotationBody,
548 )?;
549 }
550 ContentItem::List(l) => {
551 for li in l.items.as_mut_vec().iter_mut() {
552 if let ContentItem::ListItem(item) = li {
553 splice_in_general_container(
554 &mut item.children,
555 host_dir,
556 state,
557 ContainerKind::ListItem,
558 )?;
559 }
560 }
561 }
562 _ => {}
563 }
564 }
565 Ok(())
566}
567
568fn prepare_splice_list(mut included: Document) -> Vec<ContentItem> {
569 let mut items: Vec<ContentItem> = Vec::new();
570
571 // Document title → Paragraph, prepended.
572 // Equivalent to what a textual paste would parse (an unindented line
573 // becomes a paragraph in the host's context). Per the revised
574 // spec §5.2 this is "do nothing" semantics — converting matches what
575 // the parser would do if the included source were inlined and reparsed.
576 if let Some(title) = included.title {
577 let location = title.location.clone();
578 let para = Paragraph::from_line(title.as_str().to_string()).at(location);
579 items.push(ContentItem::Paragraph(para));
580 }
581
582 // Document-level annotations → regular annotations, prepended.
583 for ann in included.annotations {
584 items.push(ContentItem::Annotation(ann));
585 }
586
587 // Body of the included document.
588 items.append(included.root.children.as_mut_vec());
589
590 items
591}
592
593fn validate_against_kind(
594 items: &[ContentItem],
595 kind: ContainerKind,
596 site: &Range,
597 file: &Path,
598) -> Result<(), IncludeError> {
599 if kind.allows_sessions() {
600 return Ok(());
601 }
602 if items.iter().any(|i| matches!(i, ContentItem::Session(_))) {
603 return Err(IncludeError::ContainerPolicy {
604 include_site: site.clone(),
605 container: kind.name(),
606 file: file.to_path_buf(),
607 violation: "Sessions",
608 });
609 }
610 Ok(())
611}
612
613// ============================================================================
614// Path resolution
615// ============================================================================
616
617/// Resolve a file-reference target string the same way the include
618/// resolver resolves include paths.
619///
620/// Use this when consuming `ReferenceType::File { target }` (or any other
621/// node-attached path) so that relative paths resolve from the *authoring*
622/// file's directory, not from wherever the merged document happens to be
623/// rooted. Pass `ref_origin` as the [`Range::origin_path`] of the inline's
624/// containing node (or `None` if the node was never stamped — in that case
625/// the path is treated as if authored at the root).
626///
627/// Behaviour matches the include resolver:
628/// - Root-absolute targets (leading `/`) resolve under `root`.
629/// - Other targets resolve relative to `ref_origin`'s parent (or `root`
630/// when `ref_origin` is `None`).
631/// - The result is lexically normalized and checked against `root` —
632/// paths that escape it return `RootEscape`.
633///
634/// This is a sister to the resolver's internal `resolve_path` and shares
635/// the same lexical-normalization caveat: it does not touch the filesystem.
636pub fn resolve_file_reference(
637 target: &str,
638 ref_origin: Option<&Path>,
639 root: &Path,
640) -> Result<PathBuf, IncludeError> {
641 let host_dir: PathBuf = ref_origin
642 .and_then(|p| p.parent())
643 .map(Path::to_path_buf)
644 .unwrap_or_else(|| root.to_path_buf());
645 resolve_path(target, &host_dir, root)
646}
647
648fn resolve_path(src: &str, host_dir: &Path, root: &Path) -> Result<PathBuf, IncludeError> {
649 let candidate = if let Some(rel) = src.strip_prefix('/') {
650 // Root-absolute: leading slash means "from the resolution root".
651 root.join(rel)
652 } else {
653 // Relative: from the host file's directory.
654 host_dir.join(src)
655 };
656 let normalized = lexical_normalize(&candidate);
657 let canonical_root = lexical_normalize(root);
658 if !normalized.starts_with(&canonical_root) {
659 return Err(IncludeError::RootEscape {
660 path: normalized,
661 root: canonical_root,
662 });
663 }
664 Ok(normalized)
665}
666
667/// Lexical (no-filesystem) path normalization: resolve `.` and `..` components.
668///
669/// Filesystem-based canonicalization (`std::fs::canonicalize`) requires the
670/// path to exist, which breaks tests that use [`MemoryLoader`]. The lexical
671/// version is sufficient for include-site path resolution because the
672/// resolver only needs a stable identity for cycle detection and a uniform
673/// shape for the root-escape prefix check.
674///
675/// `..` is collapsed only when the *last* component in the buffer is a
676/// real directory name (`Component::Normal`). When the buffer is empty
677/// or its last component is itself `..` (or a root marker), the new `..`
678/// is *preserved* in the buffer.
679///
680/// This is what defeats `../../etc/passwd` from collapsing to
681/// `etc/passwd` and bypassing the root-escape check — `PathBuf::pop`
682/// would happily strip a `..` (since `Path::new("..").parent()` returns
683/// `Some("")`), silently losing the second `..` and producing a path
684/// that falsely starts with the root prefix. Each unmatched `..` in the
685/// preserved form keeps the normalized path outside any sane root, so
686/// the escape check fires correctly.
687fn lexical_normalize(p: &Path) -> PathBuf {
688 let mut out = PathBuf::new();
689 for c in p.components() {
690 match c {
691 std::path::Component::ParentDir => {
692 let can_pop = matches!(
693 out.components().next_back(),
694 Some(std::path::Component::Normal(_))
695 );
696 if can_pop {
697 out.pop();
698 } else {
699 out.push("..");
700 }
701 }
702 std::path::Component::CurDir => {}
703 other => out.push(other.as_os_str()),
704 }
705 }
706 out
707}
708
709// ============================================================================
710// Origin stamping
711// ============================================================================
712//
713// Walk every node in a Document and set `Range.origin_path` on each
714// `.location` field. The walk only stamps the *block-level* `.location`
715// fields here; finer-grained inline ranges land in PR 6 when file-ref
716// resolution starts consulting them.
717
718fn stamp_doc(doc: &mut Document, origin: &Arc<PathBuf>) {
719 if let Some(title) = doc.title.as_mut() {
720 title.location.origin_path = Some(Arc::clone(origin));
721 }
722 for ann in doc.annotations.iter_mut() {
723 stamp_annotation(ann, origin);
724 }
725 stamp_session(&mut doc.root, origin);
726}
727
728fn stamp_session(s: &mut Session, origin: &Arc<PathBuf>) {
729 s.location.origin_path = Some(Arc::clone(origin));
730 if let Some(loc) = s.title.location.as_mut() {
731 loc.origin_path = Some(Arc::clone(origin));
732 }
733 for ann in s.annotations.iter_mut() {
734 stamp_annotation(ann, origin);
735 }
736 for item in s.children.as_mut_vec().iter_mut() {
737 stamp_item(item, origin);
738 }
739}
740
741fn stamp_annotation(
742 a: &mut crate::lex::ast::elements::annotation::Annotation,
743 origin: &Arc<PathBuf>,
744) {
745 a.location.origin_path = Some(Arc::clone(origin));
746 a.data.location.origin_path = Some(Arc::clone(origin));
747 for item in a.children.as_mut_vec().iter_mut() {
748 stamp_item(item, origin);
749 }
750}
751
752fn stamp_item(item: &mut ContentItem, origin: &Arc<PathBuf>) {
753 match item {
754 ContentItem::Session(s) => stamp_session(s, origin),
755 ContentItem::Annotation(a) => stamp_annotation(a, origin),
756 ContentItem::Paragraph(p) => {
757 p.location.origin_path = Some(Arc::clone(origin));
758 for ann in p.annotations.iter_mut() {
759 stamp_annotation(ann, origin);
760 }
761 for line in p.lines.iter_mut() {
762 stamp_item(line, origin);
763 }
764 }
765 ContentItem::List(l) => {
766 l.location.origin_path = Some(Arc::clone(origin));
767 for li in l.items.as_mut_vec().iter_mut() {
768 stamp_item(li, origin);
769 }
770 }
771 ContentItem::ListItem(li) => {
772 li.location.origin_path = Some(Arc::clone(origin));
773 for ann in li.annotations.iter_mut() {
774 stamp_annotation(ann, origin);
775 }
776 for child in li.children.as_mut_vec().iter_mut() {
777 stamp_item(child, origin);
778 }
779 }
780 ContentItem::Definition(d) => {
781 d.location.origin_path = Some(Arc::clone(origin));
782 for ann in d.annotations.iter_mut() {
783 stamp_annotation(ann, origin);
784 }
785 for child in d.children.as_mut_vec().iter_mut() {
786 stamp_item(child, origin);
787 }
788 }
789 ContentItem::VerbatimBlock(v) => {
790 v.location.origin_path = Some(Arc::clone(origin));
791 }
792 ContentItem::VerbatimLine(vl) => {
793 vl.location.origin_path = Some(Arc::clone(origin));
794 }
795 ContentItem::Table(t) => {
796 t.location.origin_path = Some(Arc::clone(origin));
797 }
798 ContentItem::TextLine(tl) => {
799 tl.location.origin_path = Some(Arc::clone(origin));
800 }
801 ContentItem::BlankLineGroup(b) => {
802 b.location.origin_path = Some(Arc::clone(origin));
803 }
804 }
805}
806
807// ============================================================================
808// Parser glue
809// ============================================================================
810
811/// Parse `source` into a Document but skip the annotation-attachment stage,
812/// so include annotations are findable in container children lists.
813fn parse_no_attach(source: &str) -> Result<Document, String> {
814 crate::lex::testing::parse_without_annotation_attachment(source)
815}
816
817// ============================================================================
818// Filesystem-backed loader
819// ============================================================================
820
821/// [`Loader`] that reads files from the filesystem with `std::fs::read_to_string`.
822///
823/// This is the production loader used by the CLI; the LSP wraps it with a
824/// file-watch invalidation layer in PR 8. lex-core's *resolver* code does not
825/// reference `std::fs` — `FsLoader` is the one place where it does, isolated
826/// behind the [`Loader`] trait so the rest of the crate stays sandbox- and
827/// WASM-friendly.
828///
829/// `FsLoader` is stateless; construct one at the start of a resolution and
830/// share it for the duration. Errors map cleanly:
831/// - `std::io::ErrorKind::NotFound` → [`LoadError::NotFound`]
832/// - any other I/O error → [`LoadError::Io`]
833pub struct FsLoader;
834
835impl FsLoader {
836 pub fn new() -> Self {
837 Self
838 }
839}
840
841impl Default for FsLoader {
842 fn default() -> Self {
843 Self::new()
844 }
845}
846
847impl Loader for FsLoader {
848 fn load(&self, path: &Path) -> Result<String, LoadError> {
849 std::fs::read_to_string(path).map_err(|e| match e.kind() {
850 std::io::ErrorKind::NotFound => LoadError::NotFound {
851 path: path.to_path_buf(),
852 },
853 _ => LoadError::Io {
854 path: path.to_path_buf(),
855 message: e.to_string(),
856 },
857 })
858 }
859}
860
861// ============================================================================
862// Test fixtures (test-support feature + cfg(test))
863// ============================================================================
864
865/// In-memory [`Loader`] backed by a `HashMap<PathBuf, String>`.
866#[cfg(any(test, feature = "test-support"))]
867pub struct MemoryLoader {
868 files: std::collections::HashMap<PathBuf, String>,
869}
870
871#[cfg(any(test, feature = "test-support"))]
872impl MemoryLoader {
873 /// Create an empty loader. Add files with [`MemoryLoader::insert`].
874 pub fn new() -> Self {
875 Self {
876 files: std::collections::HashMap::new(),
877 }
878 }
879
880 /// Register a file at `path` with the given source text.
881 pub fn insert<P: Into<PathBuf>, S: Into<String>>(&mut self, path: P, contents: S) -> &mut Self {
882 self.files.insert(path.into(), contents.into());
883 self
884 }
885
886 /// Convenience constructor: build a loader from any iterator of
887 /// `(path, contents)` pairs.
888 pub fn from_pairs<I, P, S>(pairs: I) -> Self
889 where
890 I: IntoIterator<Item = (P, S)>,
891 P: Into<PathBuf>,
892 S: Into<String>,
893 {
894 let mut loader = Self::new();
895 for (path, contents) in pairs {
896 loader.insert(path, contents);
897 }
898 loader
899 }
900}
901
902#[cfg(any(test, feature = "test-support"))]
903impl Default for MemoryLoader {
904 fn default() -> Self {
905 Self::new()
906 }
907}
908
909#[cfg(any(test, feature = "test-support"))]
910impl Loader for MemoryLoader {
911 fn load(&self, path: &Path) -> Result<String, LoadError> {
912 self.files
913 .get(path)
914 .cloned()
915 .ok_or_else(|| LoadError::NotFound {
916 path: path.to_path_buf(),
917 })
918 }
919}
920
921// ============================================================================
922// Tests
923// ============================================================================
924
925#[cfg(test)]
926mod tests;