use crate::lex::assembling::AttachAnnotations;
use crate::lex::ast::elements::container::GeneralContainer;
use crate::lex::ast::elements::content_item::ContentItem;
use crate::lex::ast::elements::paragraph::Paragraph;
use crate::lex::ast::elements::session::Session;
use crate::lex::ast::range::Range;
use crate::lex::ast::Document;
use crate::lex::transforms::Runnable;
use std::path::{Path, PathBuf};
use std::sync::Arc;
#[derive(Debug, Clone)]
pub struct ResolveConfig {
pub root: PathBuf,
pub max_depth: usize,
pub max_total_includes: usize,
}
impl ResolveConfig {
pub const DEFAULT_MAX_DEPTH: usize = 8;
pub const DEFAULT_MAX_TOTAL_INCLUDES: usize = 1000;
pub fn with_root(root: PathBuf) -> Self {
Self {
root,
max_depth: Self::DEFAULT_MAX_DEPTH,
max_total_includes: Self::DEFAULT_MAX_TOTAL_INCLUDES,
}
}
}
pub trait Loader {
fn load(&self, path: &Path) -> Result<LoadedFile, LoadError>;
}
#[derive(Debug, Clone)]
pub struct LoadedFile {
pub source: String,
pub canonical_path: PathBuf,
}
#[derive(Debug, Clone)]
pub enum LoadError {
NotFound { path: PathBuf },
OutsideRoot { path: PathBuf, root: PathBuf },
TooLarge {
path: PathBuf,
size: u64,
limit: u64,
},
Io { path: PathBuf, message: String },
}
impl std::fmt::Display for LoadError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
LoadError::NotFound { path } => write!(f, "include not found: {}", path.display()),
LoadError::OutsideRoot { path, root } => write!(
f,
"include path {} resolves outside loader root {}",
path.display(),
root.display()
),
LoadError::TooLarge { path, size, limit } => write!(
f,
"include file {} is {size} bytes, exceeds limit of {limit} bytes",
path.display()
),
LoadError::Io { path, message } => {
write!(f, "io error reading {}: {message}", path.display())
}
}
}
}
impl std::error::Error for LoadError {}
#[derive(Debug, Clone)]
pub enum IncludeError {
Cycle {
include_site: Range,
path: PathBuf,
chain: Vec<PathBuf>,
},
DepthExceeded {
include_site: Range,
limit: usize,
chain: Vec<PathBuf>,
},
TotalIncludesExceeded { include_site: Range, limit: usize },
FileTooLarge {
include_site: Range,
path: PathBuf,
size: u64,
limit: u64,
},
RootEscape { path: PathBuf, root: PathBuf },
AbsolutePath { path: PathBuf },
NotFound { include_site: Range, path: PathBuf },
ParseFailed { path: PathBuf, message: String },
ContainerPolicy {
include_site: Range,
container: &'static str,
file: PathBuf,
violation: &'static str,
},
LoaderIo { path: PathBuf, message: String },
MissingSrc { include_site: Range },
}
impl std::fmt::Display for IncludeError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
IncludeError::Cycle { path, chain, .. } => {
let chain_display: Vec<String> =
chain.iter().map(|p| p.display().to_string()).collect();
write!(
f,
"include cycle: {} (chain: {})",
path.display(),
chain_display.join(" -> ")
)
}
IncludeError::DepthExceeded { limit, chain, .. } => {
let chain_display: Vec<String> =
chain.iter().map(|p| p.display().to_string()).collect();
write!(
f,
"include depth exceeded limit of {limit} (chain: {})",
chain_display.join(" -> ")
)
}
IncludeError::TotalIncludesExceeded { limit, .. } => {
write!(f, "total include count exceeded limit of {limit}")
}
IncludeError::FileTooLarge {
path, size, limit, ..
} => {
write!(
f,
"included file {} is {size} bytes, exceeds limit of {limit} bytes",
path.display()
)
}
IncludeError::RootEscape { path, root } => write!(
f,
"include path {} escapes resolution root {}",
path.display(),
root.display()
),
IncludeError::AbsolutePath { path } => write!(
f,
"include src {} is a platform-absolute path; \
the spec forbids absolute filesystem paths — use a relative path \
(chapters/01.lex) or a root-absolute path (/shared/01.lex)",
path.display()
),
IncludeError::NotFound { path, .. } => {
write!(f, "include not found: {}", path.display())
}
IncludeError::ParseFailed { path, message } => {
write!(f, "failed to parse {}: {message}", path.display())
}
IncludeError::ContainerPolicy {
container,
file,
violation,
..
} => write!(
f,
"included file {} contains {} but include site is inside {} \
(which does not allow {})",
file.display(),
violation,
container,
violation
),
IncludeError::LoaderIo { path, message } => {
write!(f, "loader error reading {}: {message}", path.display())
}
IncludeError::MissingSrc { .. } => {
write!(f, "lex.include annotation missing required src= parameter")
}
}
}
}
impl std::error::Error for IncludeError {}
#[derive(Debug, Clone, Copy)]
enum ContainerKind {
Session,
Definition,
AnnotationBody,
ListItem,
}
impl ContainerKind {
fn name(self) -> &'static str {
match self {
ContainerKind::Session => "Session",
ContainerKind::Definition => "Definition",
ContainerKind::AnnotationBody => "Annotation body",
ContainerKind::ListItem => "ListItem",
}
}
fn allows_sessions(self) -> bool {
matches!(self, ContainerKind::Session)
}
}
pub fn resolve_from_source(
source: &str,
source_path: Option<PathBuf>,
config: &ResolveConfig,
loader: &dyn Loader,
) -> Result<Document, IncludeError> {
let entry_origin = source_path.as_ref().map(|p| Arc::new(p.clone()));
let host_dir = source_path
.as_ref()
.and_then(|p| p.parent().map(Path::to_path_buf))
.unwrap_or_else(|| config.root.clone());
let mut doc = parse_no_attach(source).map_err(|message| IncludeError::ParseFailed {
path: source_path.clone().unwrap_or_default(),
message,
})?;
if let Some(origin) = entry_origin.as_ref() {
stamp_doc(&mut doc, origin);
}
let mut chain: Vec<PathBuf> = source_path
.as_ref()
.map(|p| vec![lexical_normalize(p)])
.unwrap_or_default();
let mut state = ResolverState {
config,
loader,
chain: &mut chain,
depth: 0,
total_resolved: 0,
};
splice_in_session_container(doc.root.children.as_mut_vec(), &host_dir, &mut state)?;
let doc = AttachAnnotations::new()
.run(doc)
.map_err(|e| IncludeError::ParseFailed {
path: source_path.unwrap_or_default(),
message: format!("annotation attachment failed: {e}"),
})?;
Ok(doc)
}
struct ResolverState<'a> {
config: &'a ResolveConfig,
loader: &'a dyn Loader,
chain: &'a mut Vec<PathBuf>,
depth: usize,
total_resolved: usize,
}
fn splice_in_session_container(
children: &mut Vec<ContentItem>,
host_dir: &Path,
state: &mut ResolverState<'_>,
) -> Result<(), IncludeError> {
recurse_into_children(children, host_dir, state)?;
process_includes(children, host_dir, state, ContainerKind::Session)
}
fn splice_in_general_container(
container: &mut GeneralContainer,
host_dir: &Path,
state: &mut ResolverState<'_>,
kind: ContainerKind,
) -> Result<(), IncludeError> {
recurse_into_children(container.as_mut_vec(), host_dir, state)?;
process_includes(container.as_mut_vec(), host_dir, state, kind)
}
#[allow(clippy::ptr_arg)]
fn process_includes(
children: &mut Vec<ContentItem>,
host_dir: &Path,
state: &mut ResolverState<'_>,
kind: ContainerKind,
) -> Result<(), IncludeError> {
let include_indices: Vec<usize> = children
.iter()
.enumerate()
.filter_map(|(i, item)| match item {
ContentItem::Annotation(a) if a.is_include() => Some(i),
_ => None,
})
.collect();
for i in include_indices.into_iter().rev() {
let annotation = match &children[i] {
ContentItem::Annotation(a) => a.clone(),
_ => unreachable!("index came from include filter"),
};
let splice_items = resolve_one_include(&annotation, host_dir, state, kind)?;
let mut replacement = Vec::with_capacity(splice_items.len() + 1);
replacement.push(ContentItem::Annotation(annotation));
replacement.extend(splice_items);
children.splice(i..=i, replacement);
}
Ok(())
}
fn resolve_one_include(
annotation: &crate::lex::ast::elements::annotation::Annotation,
host_dir: &Path,
state: &mut ResolverState<'_>,
parent_kind: ContainerKind,
) -> Result<Vec<ContentItem>, IncludeError> {
let src = annotation
.include_src()
.ok_or_else(|| IncludeError::MissingSrc {
include_site: annotation.location.clone(),
})?;
let target_path = resolve_path(&src, host_dir, &state.config.root)?;
if state.depth >= state.config.max_depth {
return Err(IncludeError::DepthExceeded {
include_site: annotation.location.clone(),
limit: state.config.max_depth,
chain: state.chain.clone(),
});
}
if state.total_resolved >= state.config.max_total_includes {
return Err(IncludeError::TotalIncludesExceeded {
include_site: annotation.location.clone(),
limit: state.config.max_total_includes,
});
}
let LoadedFile {
source: target_source,
canonical_path,
} = state.loader.load(&target_path).map_err(|e| match e {
LoadError::NotFound { path } => IncludeError::NotFound {
include_site: annotation.location.clone(),
path,
},
LoadError::OutsideRoot { path, root } => IncludeError::RootEscape { path, root },
LoadError::TooLarge { path, size, limit } => IncludeError::FileTooLarge {
include_site: annotation.location.clone(),
path,
size,
limit,
},
LoadError::Io { path, message } => IncludeError::LoaderIo { path, message },
})?;
state.total_resolved += 1;
if state.chain.iter().any(|p| p == &canonical_path) {
return Err(IncludeError::Cycle {
include_site: annotation.location.clone(),
path: canonical_path,
chain: state.chain.clone(),
});
}
let mut included =
parse_no_attach(&target_source).map_err(|message| IncludeError::ParseFailed {
path: canonical_path.clone(),
message,
})?;
let target_origin = Arc::new(canonical_path.clone());
stamp_doc(&mut included, &target_origin);
let included_dir = canonical_path
.parent()
.map(Path::to_path_buf)
.unwrap_or_else(|| state.config.root.clone());
state.chain.push(canonical_path.clone());
let saved_depth = state.depth;
state.depth = saved_depth + 1;
let recurse_result =
splice_in_session_container(included.root.children.as_mut_vec(), &included_dir, state);
state.depth = saved_depth;
state.chain.pop();
recurse_result?;
let splice_items = prepare_splice_list(included);
validate_against_kind(
&splice_items,
parent_kind,
&annotation.location,
&canonical_path,
)?;
Ok(splice_items)
}
#[allow(clippy::ptr_arg)]
fn recurse_into_children(
children: &mut Vec<ContentItem>,
host_dir: &Path,
state: &mut ResolverState<'_>,
) -> Result<(), IncludeError> {
for item in children.iter_mut() {
match item {
ContentItem::Session(s) => {
splice_in_session_container(s.children.as_mut_vec(), host_dir, state)?;
}
ContentItem::Definition(d) => {
splice_in_general_container(
&mut d.children,
host_dir,
state,
ContainerKind::Definition,
)?;
}
ContentItem::Annotation(a) if !a.is_include() => {
splice_in_general_container(
&mut a.children,
host_dir,
state,
ContainerKind::AnnotationBody,
)?;
}
ContentItem::List(l) => {
for li in l.items.as_mut_vec().iter_mut() {
if let ContentItem::ListItem(item) = li {
splice_in_general_container(
&mut item.children,
host_dir,
state,
ContainerKind::ListItem,
)?;
}
}
}
_ => {}
}
}
Ok(())
}
fn prepare_splice_list(mut included: Document) -> Vec<ContentItem> {
let mut items: Vec<ContentItem> = Vec::new();
if let Some(title) = included.title {
let location = title.location.clone();
let para = Paragraph::from_line(title.as_str().to_string()).at(location);
items.push(ContentItem::Paragraph(para));
}
for ann in included.annotations {
items.push(ContentItem::Annotation(ann));
}
items.append(included.root.children.as_mut_vec());
items
}
fn validate_against_kind(
items: &[ContentItem],
kind: ContainerKind,
site: &Range,
file: &Path,
) -> Result<(), IncludeError> {
if kind.allows_sessions() {
return Ok(());
}
if items.iter().any(|i| matches!(i, ContentItem::Session(_))) {
return Err(IncludeError::ContainerPolicy {
include_site: site.clone(),
container: kind.name(),
file: file.to_path_buf(),
violation: "Sessions",
});
}
Ok(())
}
pub fn resolve_file_reference(
target: &str,
ref_origin: Option<&Path>,
root: &Path,
) -> Result<PathBuf, IncludeError> {
let host_dir: PathBuf = ref_origin
.and_then(|p| p.parent())
.map(Path::to_path_buf)
.unwrap_or_else(|| root.to_path_buf());
resolve_path(target, &host_dir, root)
}
fn resolve_path(src: &str, host_dir: &Path, root: &Path) -> Result<PathBuf, IncludeError> {
let candidate = if let Some(rel) = src.strip_prefix('/') {
root.join(rel)
} else {
if Path::new(src).is_absolute() {
return Err(IncludeError::AbsolutePath {
path: PathBuf::from(src),
});
}
host_dir.join(src)
};
let normalized = lexical_normalize(&candidate);
let canonical_root = lexical_normalize(root);
if !normalized.starts_with(&canonical_root) {
return Err(IncludeError::RootEscape {
path: normalized,
root: canonical_root,
});
}
Ok(normalized)
}
fn lexical_normalize(p: &Path) -> PathBuf {
let mut out = PathBuf::new();
for c in p.components() {
match c {
std::path::Component::ParentDir => {
let can_pop = matches!(
out.components().next_back(),
Some(std::path::Component::Normal(_))
);
if can_pop {
out.pop();
} else {
out.push("..");
}
}
std::path::Component::CurDir => {}
other => out.push(other.as_os_str()),
}
}
out
}
fn stamp_doc(doc: &mut Document, origin: &Arc<PathBuf>) {
if let Some(title) = doc.title.as_mut() {
title.location.origin_path = Some(Arc::clone(origin));
}
for ann in doc.annotations.iter_mut() {
stamp_annotation(ann, origin);
}
stamp_session(&mut doc.root, origin);
}
fn stamp_session(s: &mut Session, origin: &Arc<PathBuf>) {
s.location.origin_path = Some(Arc::clone(origin));
if let Some(loc) = s.title.location.as_mut() {
loc.origin_path = Some(Arc::clone(origin));
}
for ann in s.annotations.iter_mut() {
stamp_annotation(ann, origin);
}
for item in s.children.as_mut_vec().iter_mut() {
stamp_item(item, origin);
}
}
fn stamp_annotation(
a: &mut crate::lex::ast::elements::annotation::Annotation,
origin: &Arc<PathBuf>,
) {
a.location.origin_path = Some(Arc::clone(origin));
a.data.location.origin_path = Some(Arc::clone(origin));
for item in a.children.as_mut_vec().iter_mut() {
stamp_item(item, origin);
}
}
fn stamp_item(item: &mut ContentItem, origin: &Arc<PathBuf>) {
match item {
ContentItem::Session(s) => stamp_session(s, origin),
ContentItem::Annotation(a) => stamp_annotation(a, origin),
ContentItem::Paragraph(p) => {
p.location.origin_path = Some(Arc::clone(origin));
for ann in p.annotations.iter_mut() {
stamp_annotation(ann, origin);
}
for line in p.lines.iter_mut() {
stamp_item(line, origin);
}
}
ContentItem::List(l) => {
l.location.origin_path = Some(Arc::clone(origin));
for li in l.items.as_mut_vec().iter_mut() {
stamp_item(li, origin);
}
}
ContentItem::ListItem(li) => {
li.location.origin_path = Some(Arc::clone(origin));
for ann in li.annotations.iter_mut() {
stamp_annotation(ann, origin);
}
for child in li.children.as_mut_vec().iter_mut() {
stamp_item(child, origin);
}
}
ContentItem::Definition(d) => {
d.location.origin_path = Some(Arc::clone(origin));
for ann in d.annotations.iter_mut() {
stamp_annotation(ann, origin);
}
for child in d.children.as_mut_vec().iter_mut() {
stamp_item(child, origin);
}
}
ContentItem::VerbatimBlock(v) => {
v.location.origin_path = Some(Arc::clone(origin));
}
ContentItem::VerbatimLine(vl) => {
vl.location.origin_path = Some(Arc::clone(origin));
}
ContentItem::Table(t) => {
t.location.origin_path = Some(Arc::clone(origin));
}
ContentItem::TextLine(tl) => {
tl.location.origin_path = Some(Arc::clone(origin));
}
ContentItem::BlankLineGroup(b) => {
b.location.origin_path = Some(Arc::clone(origin));
}
}
}
fn parse_no_attach(source: &str) -> Result<Document, String> {
crate::lex::testing::parse_without_annotation_attachment(source)
}
pub struct FsLoader {
canonical_root: PathBuf,
max_file_size: u64,
}
impl FsLoader {
pub const DEFAULT_MAX_FILE_SIZE: u64 = 10 * 1024 * 1024;
pub fn new(root: PathBuf) -> Self {
let canonical_root = std::fs::canonicalize(&root).unwrap_or(root);
Self {
canonical_root,
max_file_size: Self::DEFAULT_MAX_FILE_SIZE,
}
}
pub fn with_max_file_size(mut self, max_file_size: u64) -> Self {
self.max_file_size = max_file_size;
self
}
}
impl Loader for FsLoader {
fn load(&self, path: &Path) -> Result<LoadedFile, LoadError> {
let canonical_path = std::fs::canonicalize(path).map_err(|e| match e.kind() {
std::io::ErrorKind::NotFound => LoadError::NotFound {
path: path.to_path_buf(),
},
_ => LoadError::Io {
path: path.to_path_buf(),
message: e.to_string(),
},
})?;
if !canonical_path.starts_with(&self.canonical_root) {
return Err(LoadError::OutsideRoot {
path: canonical_path,
root: self.canonical_root.clone(),
});
}
let meta = std::fs::metadata(&canonical_path).map_err(|e| LoadError::Io {
path: canonical_path.clone(),
message: e.to_string(),
})?;
if !meta.is_file() {
return Err(LoadError::Io {
path: canonical_path,
message: "include target is not a regular file".to_string(),
});
}
let size = meta.len();
if size > self.max_file_size {
return Err(LoadError::TooLarge {
path: canonical_path,
size,
limit: self.max_file_size,
});
}
let source = std::fs::read_to_string(&canonical_path).map_err(|e| LoadError::Io {
path: canonical_path.clone(),
message: e.to_string(),
})?;
Ok(LoadedFile {
source,
canonical_path,
})
}
}
#[cfg(any(test, feature = "test-support"))]
pub struct MemoryLoader {
files: std::collections::HashMap<PathBuf, String>,
}
#[cfg(any(test, feature = "test-support"))]
impl MemoryLoader {
pub fn new() -> Self {
Self {
files: std::collections::HashMap::new(),
}
}
pub fn insert<P: Into<PathBuf>, S: Into<String>>(&mut self, path: P, contents: S) -> &mut Self {
self.files.insert(path.into(), contents.into());
self
}
pub fn from_pairs<I, P, S>(pairs: I) -> Self
where
I: IntoIterator<Item = (P, S)>,
P: Into<PathBuf>,
S: Into<String>,
{
let mut loader = Self::new();
for (path, contents) in pairs {
loader.insert(path, contents);
}
loader
}
}
#[cfg(any(test, feature = "test-support"))]
impl Default for MemoryLoader {
fn default() -> Self {
Self::new()
}
}
#[cfg(any(test, feature = "test-support"))]
impl Loader for MemoryLoader {
fn load(&self, path: &Path) -> Result<LoadedFile, LoadError> {
let source = self
.files
.get(path)
.cloned()
.ok_or_else(|| LoadError::NotFound {
path: path.to_path_buf(),
})?;
Ok(LoadedFile {
source,
canonical_path: path.to_path_buf(),
})
}
}
#[cfg(test)]
mod tests;