use std::collections::BTreeSet;
use std::fmt::Write as _;
use std::path::{Path, PathBuf};
use serde::{Deserialize, Serialize};
use crate::auto::strip_html_comments;
pub const MAX_IMPORT_DEPTH: u8 = 5;
pub const IMPORT_MAX_BYTES: usize = 64 * 1024;
pub const IMPORT_TOTAL_BUDGET: usize = 256 * 1024;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum ImportApproval {
AlwaysAllow,
AllowOnce,
Deny,
}
pub type ApprovalCallback<'a> = dyn Fn(&Path, &Path) -> ImportApproval + Send + Sync + 'a;
#[derive(Debug, Default, Serialize, Deserialize)]
pub struct ImportAllowlist {
#[serde(default = "default_version")]
pub version: u32,
#[serde(default)]
pub approved: Vec<ApprovedEntry>,
}
fn default_version() -> u32 {
1
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ApprovedEntry {
pub path: PathBuf,
pub approved_at: String,
#[serde(default)]
pub approved_session: Option<String>,
}
impl ImportAllowlist {
pub fn load(path: &Path) -> std::io::Result<Self> {
match std::fs::read(path) {
Ok(bytes) => serde_json::from_slice(&bytes)
.map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e.to_string())),
Err(e) if e.kind() == std::io::ErrorKind::NotFound => Ok(Self::default()),
Err(e) => Err(e),
}
}
pub fn save(&self, path: &Path) -> std::io::Result<()> {
let bytes = serde_json::to_vec_pretty(self)
.map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e.to_string()))?;
caliban_common::fs::write_atomic(path, &bytes)
}
#[must_use]
pub fn contains(&self, path: &Path) -> bool {
let needle = canonical_or(path);
self.approved
.iter()
.any(|e| canonical_or(&e.path) == needle)
}
pub fn add(&mut self, path: &Path, session_id: Option<&str>) {
if self.contains(path) {
return;
}
self.approved.push(ApprovedEntry {
path: canonical_or(path),
approved_at: chrono::Utc::now().to_rfc3339(),
approved_session: session_id.map(String::from),
});
}
}
pub enum ApprovalMode<'a> {
Interactive(Box<ApprovalCallback<'a>>),
AutoAllow,
AutoDeny,
}
impl std::fmt::Debug for ApprovalMode<'_> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::Interactive(_) => write!(f, "Interactive(<fn>)"),
Self::AutoAllow => write!(f, "AutoAllow"),
Self::AutoDeny => write!(f, "AutoDeny"),
}
}
}
pub struct ImportState<'a> {
pub workspace_root: PathBuf,
pub allowlist: ImportAllowlist,
pub allowlist_path: Option<PathBuf>,
pub approval: ApprovalMode<'a>,
pub loaded: BTreeSet<PathBuf>,
pub depth: u8,
pub import_stack: Vec<PathBuf>,
pub bytes_emitted: usize,
pub bytes_shed: usize,
pub session_allow_once: BTreeSet<PathBuf>,
}
impl<'a> ImportState<'a> {
#[must_use]
pub fn new(workspace_root: PathBuf, approval: ApprovalMode<'a>) -> Self {
Self {
workspace_root,
allowlist: ImportAllowlist::default(),
allowlist_path: None,
approval,
loaded: BTreeSet::new(),
depth: 0,
import_stack: Vec::new(),
bytes_emitted: 0,
bytes_shed: 0,
session_allow_once: BTreeSet::new(),
}
}
#[must_use]
pub fn with_allowlist(mut self, allowlist: ImportAllowlist, path: Option<PathBuf>) -> Self {
self.allowlist = allowlist;
self.allowlist_path = path;
self
}
}
#[must_use]
pub fn parse_import_directive(line: &str) -> Option<&str> {
let trimmed = line.trim_start();
let rest = trimmed.strip_prefix('@')?;
let token = rest.split_whitespace().next()?;
if token.is_empty() {
return None;
}
if !(token.contains('/') || token.starts_with('~') || token.contains('.')) {
return None;
}
Some(token)
}
#[derive(Debug)]
enum ImportFailure {
UnsupportedScheme,
NotFound,
TooLarge { bytes: usize },
BudgetExceeded,
Denied,
Cycle,
DepthCap,
InvalidPath,
}
impl std::fmt::Display for ImportFailure {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::UnsupportedScheme => write!(f, "unsupported-scheme"),
Self::NotFound => write!(f, "not-found"),
Self::TooLarge { bytes } => write!(f, "too-large ({bytes} bytes)"),
Self::BudgetExceeded => write!(f, "tier-budget-exceeded"),
Self::Denied => write!(f, "denied"),
Self::Cycle => write!(f, "cycle"),
Self::DepthCap => write!(f, "depth-cap"),
Self::InvalidPath => write!(f, "invalid-path"),
}
}
}
pub fn resolve_imports(body: &str, importer: &Path, state: &mut ImportState<'_>) -> String {
let importer_canonical = canonical_or(importer);
let pushed_importer = if state.import_stack.iter().any(|p| p == &importer_canonical) {
false
} else {
state.import_stack.push(importer_canonical.clone());
state.loaded.insert(importer_canonical.clone());
true
};
let out = resolve_imports_inner(body, importer, state);
if pushed_importer {
state.import_stack.pop();
}
out
}
fn resolve_imports_inner(body: &str, importer: &Path, state: &mut ImportState<'_>) -> String {
let mut out = String::with_capacity(body.len());
for line in body.lines() {
let Some(token) = parse_import_directive(line) else {
out.push_str(line);
out.push('\n');
continue;
};
if token.starts_with("http://") || token.starts_with("https://") {
push_failure(&mut out, line, token, &ImportFailure::UnsupportedScheme);
continue;
}
let Some(resolved) = resolve_relative(token, importer) else {
push_failure(&mut out, line, token, &ImportFailure::InvalidPath);
continue;
};
let canonical = canonical_or(&resolved);
if state.depth >= MAX_IMPORT_DEPTH {
tracing::warn!(
target: caliban_common::tracing_targets::TARGET_MEMORY,
importer = %importer.display(),
token,
"@-import depth cap reached",
);
push_failure(&mut out, line, token, &ImportFailure::DepthCap);
continue;
}
if state.import_stack.iter().any(|p| p == &canonical) {
push_failure(&mut out, line, token, &ImportFailure::Cycle);
continue;
}
if state.loaded.contains(&canonical) {
let _ = writeln!(out, "[@-import already loaded: {token}]");
continue;
}
if needs_approval(&canonical, &state.workspace_root)
&& !approval_grants(&canonical, importer, state)
{
push_failure(&mut out, line, token, &ImportFailure::Denied);
continue;
}
let raw = match std::fs::metadata(&canonical) {
Ok(md) if md.is_file() => {
let len_usize = usize::try_from(md.len()).unwrap_or(usize::MAX);
if len_usize > IMPORT_MAX_BYTES {
push_failure(
&mut out,
line,
token,
&ImportFailure::TooLarge { bytes: len_usize },
);
continue;
}
if let Ok(bytes) = std::fs::read(&canonical) {
String::from_utf8_lossy(&bytes).into_owned()
} else {
push_failure(&mut out, line, token, &ImportFailure::NotFound);
continue;
}
}
_ => {
push_failure(&mut out, line, token, &ImportFailure::NotFound);
continue;
}
};
let projected = state.bytes_emitted.saturating_add(raw.len());
if projected > IMPORT_TOTAL_BUDGET {
state.bytes_shed = state.bytes_shed.saturating_add(raw.len());
push_failure(&mut out, line, token, &ImportFailure::BudgetExceeded);
continue;
}
state.bytes_emitted = projected;
state.loaded.insert(canonical.clone());
state.depth += 1;
state.import_stack.push(canonical.clone());
let sub = resolve_imports_inner(&raw, &canonical, state);
let sub_stripped = strip_html_comments(&sub);
state.import_stack.pop();
state.depth -= 1;
let _ = writeln!(
out,
"<!-- imported from {} (depth={}) -->",
canonical.display(),
state.depth + 1,
);
out.push_str(&sub_stripped);
if !sub_stripped.ends_with('\n') {
out.push('\n');
}
let _ = writeln!(out, "<!-- end {} -->", canonical.display());
}
out
}
fn push_failure(out: &mut String, line: &str, token: &str, why: &ImportFailure) {
let _ = writeln!(out, "[@-import skipped ({why}): {token}]");
if matches!(
why,
ImportFailure::UnsupportedScheme | ImportFailure::InvalidPath
) {
out.push_str(line);
out.push('\n');
}
}
fn needs_approval(resolved: &Path, workspace_root: &Path) -> bool {
let resolved_c = canonical_or(resolved);
let workspace_c = canonical_or(workspace_root);
if resolved_c.starts_with(&workspace_c) || resolved.starts_with(workspace_root) {
return false;
}
if let Some(config_dir) = dirs::config_dir().map(|d| d.join("caliban")) {
let cfg_c = canonical_or(&config_dir);
if resolved_c.starts_with(&cfg_c) || resolved.starts_with(&config_dir) {
return false;
}
}
true
}
fn approval_grants(resolved: &Path, importer: &Path, state: &mut ImportState<'_>) -> bool {
let canon = canonical_or(resolved);
if state.allowlist.contains(&canon) || state.session_allow_once.contains(&canon) {
return true;
}
match &state.approval {
ApprovalMode::AutoAllow => {
state.allowlist.add(&canon, None);
if let Some(p) = state.allowlist_path.as_deref() {
let _ = state.allowlist.save(p);
}
true
}
ApprovalMode::AutoDeny => {
tracing::warn!(
target: caliban_common::tracing_targets::TARGET_MEMORY,
path = %canon.display(),
"external @-import auto-denied (non-interactive mode)",
);
false
}
ApprovalMode::Interactive(cb) => match cb(&canon, importer) {
ImportApproval::AlwaysAllow => {
state.allowlist.add(&canon, None);
if let Some(p) = state.allowlist_path.as_deref() {
let _ = state.allowlist.save(p);
}
true
}
ImportApproval::AllowOnce => {
state.session_allow_once.insert(canon);
true
}
ImportApproval::Deny => false,
},
}
}
#[must_use]
fn resolve_relative(token: &str, importer: &Path) -> Option<PathBuf> {
if token.is_empty() {
return None;
}
if let Some(rest) = token.strip_prefix("~/") {
let home = dirs::home_dir()?;
return Some(home.join(rest));
}
if token == "~" {
return dirs::home_dir();
}
let p = Path::new(token);
if p.is_absolute() {
return Some(p.to_path_buf());
}
let base = importer.parent().unwrap_or_else(|| Path::new("."));
Some(normalize(&base.join(p)))
}
fn normalize(p: &Path) -> PathBuf {
let mut out = PathBuf::new();
for c in p.components() {
match c {
std::path::Component::ParentDir => {
out.pop();
}
std::path::Component::CurDir => {}
other => out.push(other.as_os_str()),
}
}
out
}
#[must_use]
pub fn canonical_or(p: &Path) -> PathBuf {
std::fs::canonicalize(p).unwrap_or_else(|_| normalize(p))
}
#[cfg(test)]
mod tests {
use super::*;
use std::fs;
use tempfile::TempDir;
fn deny_cb<'a>() -> ApprovalMode<'a> {
ApprovalMode::AutoDeny
}
#[test]
fn parse_import_directive_recognizes_path_like_tokens() {
assert_eq!(parse_import_directive("@./foo.md"), Some("./foo.md"));
assert_eq!(
parse_import_directive("@~/notes/api.md"),
Some("~/notes/api.md"),
);
assert_eq!(
parse_import_directive("@/abs/path.md"),
Some("/abs/path.md")
);
assert_eq!(parse_import_directive("@foo.md"), Some("foo.md"));
assert_eq!(parse_import_directive(" @./foo.md"), Some("./foo.md"));
}
#[test]
fn parse_import_directive_rejects_user_mentions_and_interface_names() {
assert_eq!(parse_import_directive("@someone"), None);
assert_eq!(parse_import_directive("@MyInterface"), None);
assert_eq!(parse_import_directive("ping @someone here"), None);
assert_eq!(parse_import_directive("@_underscore"), None);
assert_eq!(parse_import_directive("@"), None);
}
#[test]
fn resolve_imports_inlines_referenced_file() {
let tmp = TempDir::new().unwrap();
let root = tmp.path();
let importer = root.join("CLAUDE.md");
fs::write(root.join("part.md"), "PART-BODY\n").unwrap();
fs::write(&importer, "header\n@./part.md\nfooter\n").unwrap();
let body = fs::read_to_string(&importer).unwrap();
let mut state = ImportState::new(root.to_path_buf(), deny_cb());
let out = resolve_imports(&body, &importer, &mut state);
assert!(out.contains("header"));
assert!(out.contains("PART-BODY"));
assert!(out.contains("footer"));
assert!(out.contains("imported from"));
assert!(out.contains("end"));
}
#[test]
fn resolve_imports_enforces_depth_cap_at_five() {
let tmp = TempDir::new().unwrap();
let root = tmp.path();
fs::write(root.join("top.md"), "@./a.md\n").unwrap();
fs::write(root.join("a.md"), "A-LEVEL\n@./b.md\n").unwrap();
fs::write(root.join("b.md"), "B-LEVEL\n@./c.md\n").unwrap();
fs::write(root.join("c.md"), "C-LEVEL\n@./d.md\n").unwrap();
fs::write(root.join("d.md"), "D-LEVEL\n@./e.md\n").unwrap();
fs::write(root.join("e.md"), "E-LEVEL\n@./f.md\n").unwrap();
fs::write(root.join("f.md"), "F-SHOULD-NOT-APPEAR\n").unwrap();
let body = fs::read_to_string(root.join("top.md")).unwrap();
let mut state = ImportState::new(root.to_path_buf(), deny_cb());
let out = resolve_imports(&body, &root.join("top.md"), &mut state);
assert!(out.contains("A-LEVEL"));
assert!(out.contains("E-LEVEL"));
assert!(
!out.contains("F-SHOULD-NOT-APPEAR"),
"depth-6 file should have been rejected: {out}",
);
assert!(out.contains("depth-cap"));
}
#[test]
fn resolve_imports_allows_exactly_five_levels() {
let tmp = TempDir::new().unwrap();
let root = tmp.path();
fs::write(root.join("top.md"), "@./a.md\n").unwrap();
fs::write(root.join("a.md"), "A-LEVEL\n@./b.md\n").unwrap();
fs::write(root.join("b.md"), "B-LEVEL\n@./c.md\n").unwrap();
fs::write(root.join("c.md"), "C-LEVEL\n@./d.md\n").unwrap();
fs::write(root.join("d.md"), "D-LEVEL\n@./e.md\n").unwrap();
fs::write(root.join("e.md"), "E-LEAF\n").unwrap();
let body = fs::read_to_string(root.join("top.md")).unwrap();
let mut state = ImportState::new(root.to_path_buf(), deny_cb());
let out = resolve_imports(&body, &root.join("top.md"), &mut state);
assert!(out.contains("E-LEAF"));
}
#[test]
fn resolve_imports_detects_cycles() {
let tmp = TempDir::new().unwrap();
let root = tmp.path();
fs::write(root.join("a.md"), "A-BODY\n@./b.md\n").unwrap();
fs::write(root.join("b.md"), "B-BODY\n@./a.md\n").unwrap();
let mut state = ImportState::new(root.to_path_buf(), deny_cb());
let body = fs::read_to_string(root.join("a.md")).unwrap();
let out = resolve_imports(&body, &root.join("a.md"), &mut state);
assert!(out.contains("A-BODY"));
assert!(out.contains("B-BODY"));
assert!(out.contains("cycle"), "no cycle marker: {out}");
}
#[test]
fn resolve_imports_rejects_http_urls() {
let tmp = TempDir::new().unwrap();
let root = tmp.path();
let importer = root.join("CLAUDE.md");
fs::write(&importer, "header\n@https://example.com/x.md\nfooter\n").unwrap();
let body = fs::read_to_string(&importer).unwrap();
let mut state = ImportState::new(root.to_path_buf(), ApprovalMode::AutoAllow);
let out = resolve_imports(&body, &importer, &mut state);
assert!(out.contains("unsupported-scheme"));
}
#[test]
fn first_time_external_import_prompts_then_denies() {
let tmp = TempDir::new().unwrap();
let external = tmp.path().join("outside");
fs::create_dir_all(&external).unwrap();
fs::write(external.join("rules.md"), "EXTERNAL").unwrap();
let workspace = tmp.path().join("ws");
fs::create_dir_all(&workspace).unwrap();
let importer = workspace.join("CLAUDE.md");
let import_token = format!("@{}", external.join("rules.md").display());
fs::write(&importer, format!("{import_token}\n")).unwrap();
let body = fs::read_to_string(&importer).unwrap();
let mut state = ImportState::new(workspace.clone(), ApprovalMode::AutoDeny);
let out = resolve_imports(&body, &importer, &mut state);
assert!(!out.contains("EXTERNAL"));
assert!(out.contains("denied"));
}
#[test]
fn first_time_external_import_can_be_approved() {
let tmp = TempDir::new().unwrap();
let external = tmp.path().join("outside");
fs::create_dir_all(&external).unwrap();
fs::write(external.join("rules.md"), "EXTERNAL").unwrap();
let workspace = tmp.path().join("ws");
fs::create_dir_all(&workspace).unwrap();
let importer = workspace.join("CLAUDE.md");
let import_token = format!("@{}", external.join("rules.md").display());
fs::write(&importer, format!("{import_token}\n")).unwrap();
let body = fs::read_to_string(&importer).unwrap();
let cb: Box<ApprovalCallback<'static>> =
Box::new(|_p: &Path, _i: &Path| ImportApproval::AlwaysAllow);
let mut state = ImportState::new(workspace.clone(), ApprovalMode::Interactive(cb));
let out = resolve_imports(&body, &importer, &mut state);
assert!(out.contains("EXTERNAL"), "expected EXTERNAL inlined: {out}");
assert!(
state.allowlist.contains(&external.join("rules.md")),
"always-allow should add to allowlist",
);
}
#[test]
fn cached_approval_skips_dialog_on_second_load() {
let tmp = TempDir::new().unwrap();
let external = tmp.path().join("outside");
fs::create_dir_all(&external).unwrap();
fs::write(external.join("rules.md"), "EXTERNAL").unwrap();
let workspace = tmp.path().join("ws");
fs::create_dir_all(&workspace).unwrap();
let importer = workspace.join("CLAUDE.md");
let import_token = format!("@{}", external.join("rules.md").display());
fs::write(&importer, format!("{import_token}\n")).unwrap();
let body = fs::read_to_string(&importer).unwrap();
let mut allow = ImportAllowlist::default();
allow.add(&external.join("rules.md"), None);
let cb: Box<ApprovalCallback<'static>> =
Box::new(|_p: &Path, _i: &Path| panic!("dialog should not be invoked"));
let mut state = ImportState::new(workspace.clone(), ApprovalMode::Interactive(cb))
.with_allowlist(allow, None);
let out = resolve_imports(&body, &importer, &mut state);
assert!(out.contains("EXTERNAL"));
}
#[test]
fn allowlist_round_trips_through_disk() {
let tmp = TempDir::new().unwrap();
let path = tmp.path().join(".caliban").join("imports-allowlist.json");
let mut allow = ImportAllowlist::default();
allow.add(Path::new("/Users/x/notes/api.md"), Some("session-1"));
allow.save(&path).unwrap();
let loaded = ImportAllowlist::load(&path).unwrap();
assert_eq!(loaded.approved.len(), 1);
assert!(loaded.contains(Path::new("/Users/x/notes/api.md")));
}
#[test]
fn html_comments_stripped_from_imported_content() {
let tmp = TempDir::new().unwrap();
let root = tmp.path();
let importer = root.join("CLAUDE.md");
fs::write(
root.join("part.md"),
"VISIBLE\n<!-- secret stuff -->\nMORE\n",
)
.unwrap();
fs::write(&importer, "@./part.md\n").unwrap();
let body = fs::read_to_string(&importer).unwrap();
let mut state = ImportState::new(root.to_path_buf(), deny_cb());
let out = resolve_imports(&body, &importer, &mut state);
assert!(out.contains("VISIBLE"));
assert!(out.contains("MORE"));
assert!(
!out.contains("secret stuff"),
"html comment leaked into output: {out}",
);
}
#[test]
fn empty_body_after_stripping_does_not_panic() {
let tmp = TempDir::new().unwrap();
let root = tmp.path();
let importer = root.join("CLAUDE.md");
fs::write(root.join("part.md"), "<!-- nothing -->\n").unwrap();
fs::write(&importer, "@./part.md\n").unwrap();
let body = fs::read_to_string(&importer).unwrap();
let mut state = ImportState::new(root.to_path_buf(), deny_cb());
let _ = resolve_imports(&body, &importer, &mut state);
}
}