use anyhow::{Result, bail};
use std::collections::HashMap;
use std::path::{Component, Path, PathBuf};
use crate::core::file_error::{FileOperation, FileResultExt, LARGE_FILE_SIZE};
const ALLOWED_EXTENSIONS: &[&str] = &["md", "txt", "json", "toml", "yaml", "yml"];
pub const MAX_RENDER_DEPTH: usize = 10;
pub fn validate_content_path(
path_str: &str,
project_dir: &Path,
max_size: Option<u64>,
) -> Result<PathBuf> {
let path = Path::new(path_str);
if path.is_absolute() {
bail!(
"Absolute paths are not allowed in content filter. \
Path '{}' must be relative to project root.",
path_str
);
}
let mut components_count: i32 = 0;
for component in path.components() {
match component {
Component::Normal(_) => components_count += 1,
Component::ParentDir => {
components_count -= 1;
if components_count < 0 {
bail!(
"Path traversal outside project directory is not allowed. \
Path '{}' attempts to access parent directories beyond project root.",
path_str
);
}
}
Component::CurDir => {
}
_ => {
bail!("Invalid path component in '{}'. Only relative paths are allowed.", path_str);
}
}
}
let extension = path.extension().and_then(|ext| ext.to_str()).ok_or_else(|| {
anyhow::anyhow!(
"File '{}' has no extension. Allowed extensions: {}",
path_str,
ALLOWED_EXTENSIONS.join(", ")
)
})?;
let extension_lower = extension.to_lowercase();
if !ALLOWED_EXTENSIONS.contains(&extension_lower.as_str()) {
bail!(
"File extension '.{}' is not allowed. \
Allowed extensions: {}. \
Path: '{}'",
extension,
ALLOWED_EXTENSIONS.join(", "),
path_str
);
}
let full_path = project_dir.join(path);
if !full_path.exists() {
bail!(
"File not found: '{}'. \
The content filter requires files to exist. \
Full path attempted: {}",
path_str,
full_path.display()
);
}
if !full_path.is_file() {
bail!(
"Path '{}' is not a regular file. \
The content filter only works with files, not directories or special files.",
path_str
);
}
let canonical_path = full_path.canonicalize().with_file_context(
FileOperation::Canonicalize,
&full_path,
"resolving absolute path for security validation in content filter",
"content_filter",
)?;
let canonical_project = project_dir.canonicalize().with_file_context(
FileOperation::Canonicalize,
project_dir,
"resolving project directory for security validation in content filter",
"content_filter",
)?;
if !canonical_path.starts_with(&canonical_project) {
bail!(
"Security violation: Path '{}' resolves to '{}' which is outside project directory '{}'",
path_str,
canonical_path.display(),
canonical_project.display()
);
}
if let Some(max_bytes) = max_size {
let metadata = canonical_path.metadata().with_file_context(
FileOperation::Metadata,
&canonical_path,
"checking file size in content filter",
"content_filter",
)?;
let file_size = metadata.len();
if file_size > max_bytes {
bail!(
"File '{}' is too large ({} bytes). Maximum allowed size: {} bytes ({:.2} MB vs {:.2} MB limit).",
path_str,
file_size,
max_bytes,
file_size as f64 / (LARGE_FILE_SIZE as f64),
max_bytes as f64 / (LARGE_FILE_SIZE as f64)
);
}
}
Ok(canonical_path)
}
pub fn read_and_process_content(file_path: &Path) -> Result<String> {
let content = std::fs::read_to_string(file_path).with_file_context(
FileOperation::Read,
file_path,
format!("reading content for template embedding in '{}'", file_path.display()),
"content_filter",
)?;
let extension = file_path
.extension()
.and_then(|ext| ext.to_str())
.map(|s| s.to_lowercase())
.unwrap_or_default();
let processed_content = match extension.as_str() {
"md" => {
match crate::markdown::MarkdownDocument::parse(&content) {
Ok(doc) => doc.content,
Err(e) => {
tracing::warn!(
"Failed to parse markdown file '{}': {}. Using raw content.",
file_path.display(),
e
);
content
}
}
}
"json" => {
match serde_json::from_str::<serde_json::Value>(&content) {
Ok(json) => serde_json::to_string_pretty(&json).unwrap_or(content),
Err(e) => {
tracing::warn!(
"Failed to parse JSON file '{}': {}. Using raw content.",
file_path.display(),
e
);
content
}
}
}
_ => {
content
}
};
Ok(processed_content)
}
pub fn create_content_filter(
project_dir: PathBuf,
max_size: Option<u64>,
) -> impl tera::Filter + 'static {
move |value: &tera::Value, _args: &HashMap<String, tera::Value>| -> tera::Result<tera::Value> {
let path_str = value
.as_str()
.ok_or_else(|| tera::Error::msg("content filter requires a string path"))?;
let file_path = validate_content_path(path_str, &project_dir, max_size)
.map_err(|e| tera::Error::msg(format!("content filter error: {}", e)))?;
let content = read_and_process_content(&file_path)
.map_err(|e| tera::Error::msg(format!("content filter error: {}", e)))?;
Ok(tera::Value::String(content))
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::fs;
use tempfile::TempDir;
fn create_test_project() -> TempDir {
let temp = TempDir::new().unwrap();
let project_dir = temp.path();
fs::create_dir_all(project_dir.join("docs")).unwrap();
fs::create_dir_all(project_dir.join("project")).unwrap();
fs::write(project_dir.join("docs/guide.md"), "# Guide\n\nContent here").unwrap();
fs::write(project_dir.join("docs/notes.txt"), "Plain text notes").unwrap();
fs::write(project_dir.join("project/config.json"), r#"{"key": "value"}"#).unwrap();
fs::write(
project_dir.join("docs/with-frontmatter.md"),
"---\ntitle: Test\n---\n\n# Content",
)
.unwrap();
temp
}
#[test]
fn test_validate_valid_path() -> Result<(), Box<dyn std::error::Error>> {
let temp = create_test_project();
let project_dir = temp.path();
let path = validate_content_path("docs/guide.md", project_dir, None)?;
assert!(path.ends_with("docs/guide.md"));
assert!(path.is_absolute());
Ok(())
}
#[test]
fn test_validate_rejects_absolute_path() {
let temp = create_test_project();
let project_dir = temp.path();
#[cfg(windows)]
let absolute_path = "C:\\Windows\\System32\\config";
#[cfg(not(windows))]
let absolute_path = "/etc/passwd";
let result = validate_content_path(absolute_path, project_dir, None);
assert!(result.is_err());
assert!(result.unwrap_err().to_string().contains("Absolute paths"));
}
#[test]
fn test_validate_rejects_traversal() {
let temp = create_test_project();
let project_dir = temp.path();
let result = validate_content_path("../../etc/passwd", project_dir, None);
assert!(result.is_err());
assert!(result.unwrap_err().to_string().contains("traversal"));
}
#[test]
fn test_validate_rejects_invalid_extension() {
let temp = create_test_project();
let project_dir = temp.path();
fs::write(project_dir.join("script.sh"), "#!/bin/bash").unwrap();
let result = validate_content_path("script.sh", project_dir, None);
assert!(result.is_err());
assert!(result.unwrap_err().to_string().contains("not allowed"));
}
#[test]
fn test_validate_rejects_missing_file() {
let temp = create_test_project();
let project_dir = temp.path();
let result = validate_content_path("docs/missing.md", project_dir, None);
assert!(result.is_err());
assert!(result.unwrap_err().to_string().contains("not found"));
}
#[test]
fn test_validate_rejects_file_too_large() -> Result<(), Box<dyn std::error::Error>> {
let temp = create_test_project();
let project_dir = temp.path();
let large_file = project_dir.join("large.md");
fs::write(&large_file, "a".repeat(1000)).unwrap();
validate_content_path("large.md", project_dir, Some(1001))?;
let result = validate_content_path("large.md", project_dir, Some(999));
assert!(result.is_err());
let err_msg = result.unwrap_err().to_string();
assert!(err_msg.contains("too large"));
assert!(err_msg.contains("1000 bytes"));
assert!(err_msg.contains("999 bytes"));
Ok(())
}
#[test]
fn test_read_markdown_strips_frontmatter() {
let temp = create_test_project();
let project_dir = temp.path();
let path = project_dir.join("docs/with-frontmatter.md");
let content = read_and_process_content(&path).unwrap();
assert!(!content.contains("---"));
assert!(!content.contains("title: Test"));
assert!(content.contains("# Content"));
}
#[test]
fn test_read_json_pretty_prints() {
let temp = create_test_project();
let project_dir = temp.path();
let path = project_dir.join("project/config.json");
let content = read_and_process_content(&path).unwrap();
assert!(content.contains('\n'));
assert!(content.contains("\"key\""));
assert!(content.contains("\"value\""));
}
#[test]
fn test_read_text_returns_raw() {
let temp = create_test_project();
let project_dir = temp.path();
let path = project_dir.join("docs/notes.txt");
let content = read_and_process_content(&path).unwrap();
assert_eq!(content, "Plain text notes");
}
#[test]
fn test_filter_function() {
use tera::Tera;
let temp = create_test_project();
let project_dir = temp.path().to_path_buf();
let mut tera = Tera::default();
tera.register_filter("content", create_content_filter(project_dir, None));
let template = r#"{{ 'docs/guide.md' | content }}"#;
let context = tera::Context::new();
let result = tera.render_str(template, &context);
assert!(result.is_ok(), "Filter should render successfully");
let content = result.unwrap();
assert!(content.contains("# Guide"));
assert!(content.contains("Content here"));
}
#[test]
fn test_filter_rejects_non_string() {
use tera::Tera;
let temp = create_test_project();
let project_dir = temp.path().to_path_buf();
let mut tera = Tera::default();
tera.register_filter("content", create_content_filter(project_dir, None));
let template = r#"{{ 42 | content }}"#;
let context = tera::Context::new();
let result = tera.render_str(template, &context);
assert!(result.is_err(), "Filter should reject non-string values");
}
#[test]
fn test_recursive_template_rendering() {
}
}