agpm_cli/templating/filters.rs
1//! Custom Tera filters for AGPM templates.
2//!
3//! This module provides template filters that extend Tera's functionality for
4//! AGPM-specific use cases, such as reading project files, content manipulation,
5//! and other template operations.
6//!
7//! # Security
8//!
9//! All file access is restricted to the project directory with the following protections:
10//! - Only relative paths are allowed (no absolute paths)
11//! - Directory traversal outside project root is prevented
12//! - Only text file types are permitted (.md, .txt, .json, .toml, .yaml)
13//! - Missing files produce hard errors to fail fast
14//!
15//! # Supported File Types
16//!
17//! - **Markdown (.md)**: YAML/TOML frontmatter is automatically stripped
18//! - **JSON (.json)**: Parsed and pretty-printed
19//! - **Text (.txt)**: Raw content
20//! - **TOML (.toml)**: Raw content
21//! - **YAML (.yaml, .yml)**: Raw content
22//!
23//! # Examples
24//!
25//! ## Basic File Reading
26//!
27//! ```markdown
28//! ---
29//! agpm.templating: true
30//! ---
31//! # Code Review Agent
32//!
33//! ## Style Guide
34//! {{ 'project/styleguide.md' | content }}
35//!
36//! ## Best Practices
37//! {{ 'docs/best-practices.txt' | content }}
38//! ```
39//!
40//! ## Combining with Dependency Content Embedding
41//!
42//! Use both `content` filter and dependency `.content` fields together:
43//!
44//! ```markdown
45//! ---
46//! agpm.templating: true
47//! dependencies:
48//! snippets:
49//! - path: snippets/rust-patterns.md
50//! name: rust_patterns
51//! ---
52//! # Rust Code Reviewer
53//!
54//! ## Shared Rust Patterns (versioned, from AGPM)
55//! {{ agpm.deps.snippets.rust_patterns.content }}
56//!
57//! ## Project-Specific Style Guide (local)
58//! {{ 'project/rust-style.md' | content }}
59//! ```
60//!
61//! **When to use each**:
62//! - **`agpm.deps.<type>.<name>.content`**: Versioned content from AGPM repositories
63//! - **`content` filter**: Project-local files (team docs, company standards)
64//!
65//! ## Recursive Templates
66//!
67//! Project files can themselves contain template syntax:
68//!
69//! **project/styleguide.md**:
70//! ```markdown
71//! # Coding Standards
72//!
73//! ## Rust-Specific Rules
74//! {{ 'project/rust-style.md' | content }}
75//!
76//! ## Common Guidelines
77//! {{ 'project/common-style.md' | content }}
78//! ```
79//!
80//! The template system will render up to 10 levels of nested references.
81
82use anyhow::{Result, bail};
83use std::collections::HashMap;
84use std::path::{Component, Path, PathBuf};
85
86use crate::core::file_error::{FileOperation, FileResultExt, LARGE_FILE_SIZE};
87
88/// Allowed file extensions for project file access.
89///
90/// Only text-based formats are permitted to prevent binary file inclusion
91/// and ensure content can be safely embedded in templates.
92const ALLOWED_EXTENSIONS: &[&str] = &["md", "txt", "json", "toml", "yaml", "yml"];
93
94/// Maximum nesting depth for recursive template rendering.
95///
96/// This prevents infinite loops and excessive memory usage when files
97/// reference each other cyclically or create deep nesting chains.
98pub const MAX_RENDER_DEPTH: usize = 10;
99
100/// Validates a file path for security and correctness.
101///
102/// This function ensures that:
103/// 1. The path is relative (not absolute)
104/// 2. The path doesn't traverse outside the project directory using `..`
105/// 3. The file extension is in the allowed list
106/// 4. The file exists and is readable
107/// 5. The file size doesn't exceed the maximum allowed
108///
109/// # Arguments
110///
111/// * `path_str` - The path string from the template
112/// * `project_dir` - The project root directory
113/// * `max_size` - Maximum file size in bytes (None for no limit)
114///
115/// # Returns
116///
117/// Returns the canonicalized absolute path to the file if all checks pass.
118///
119/// # Errors
120///
121/// Returns an error if:
122/// - Path is absolute
123/// - Path contains `..` components that escape project directory
124/// - File extension is not in the allowed list
125/// - File doesn't exist
126/// - File is not accessible (permissions, etc.)
127/// - File size exceeds the maximum allowed
128///
129/// # Security
130///
131/// This function is critical for preventing directory traversal attacks.
132/// It validates paths before any file system access occurs.
133///
134/// # Examples
135///
136/// ```rust,no_run
137/// # use std::path::Path;
138/// # use agpm_cli::templating::filters::validate_content_path;
139/// # fn example() -> anyhow::Result<()> {
140/// let project_dir = Path::new("/home/user/project");
141///
142/// // Valid relative path with no size limit
143/// let path = validate_content_path("docs/guide.md", project_dir, None)?;
144///
145/// // With size limit (1 MB)
146/// let path = validate_content_path("docs/guide.md", project_dir, Some(1024 * 1024))?;
147///
148/// // Invalid: absolute path
149/// let result = validate_content_path("/etc/passwd", project_dir, None);
150/// assert!(result.is_err());
151///
152/// // Invalid: directory traversal
153/// let result = validate_content_path("../../etc/passwd", project_dir, None);
154/// assert!(result.is_err());
155///
156/// // Invalid: wrong extension
157/// let result = validate_content_path("script.sh", project_dir, None);
158/// assert!(result.is_err());
159/// # Ok(())
160/// # }
161/// ```
162pub fn validate_content_path(
163 path_str: &str,
164 project_dir: &Path,
165 max_size: Option<u64>,
166) -> Result<PathBuf> {
167 // Parse the path
168 let path = Path::new(path_str);
169
170 // Reject absolute paths
171 if path.is_absolute() {
172 bail!(
173 "Absolute paths are not allowed in content filter. \
174 Path '{}' must be relative to project root.",
175 path_str
176 );
177 }
178
179 // Check for directory traversal attempts
180 // We need to resolve the path and ensure it stays within project_dir
181 let mut components_count: i32 = 0;
182 for component in path.components() {
183 match component {
184 Component::Normal(_) => components_count += 1,
185 Component::ParentDir => {
186 components_count -= 1;
187 // If we go negative, we're trying to escape the project directory
188 if components_count < 0 {
189 bail!(
190 "Path traversal outside project directory is not allowed. \
191 Path '{}' attempts to access parent directories beyond project root.",
192 path_str
193 );
194 }
195 }
196 Component::CurDir => {
197 // `.` is fine, just ignore it
198 }
199 _ => {
200 // Prefix, RootDir shouldn't appear in relative paths
201 bail!("Invalid path component in '{}'. Only relative paths are allowed.", path_str);
202 }
203 }
204 }
205
206 // Validate file extension
207 let extension = path.extension().and_then(|ext| ext.to_str()).ok_or_else(|| {
208 anyhow::anyhow!(
209 "File '{}' has no extension. Allowed extensions: {}",
210 path_str,
211 ALLOWED_EXTENSIONS.join(", ")
212 )
213 })?;
214
215 let extension_lower = extension.to_lowercase();
216 if !ALLOWED_EXTENSIONS.contains(&extension_lower.as_str()) {
217 bail!(
218 "File extension '.{}' is not allowed. \
219 Allowed extensions: {}. \
220 Path: '{}'",
221 extension,
222 ALLOWED_EXTENSIONS.join(", "),
223 path_str
224 );
225 }
226
227 // Construct full path relative to project directory
228 let full_path = project_dir.join(path);
229
230 // Check if file exists
231 if !full_path.exists() {
232 bail!(
233 "File not found: '{}'. \
234 The content filter requires files to exist. \
235 Full path attempted: {}",
236 path_str,
237 full_path.display()
238 );
239 }
240
241 // Check if it's a regular file (not a directory or symlink)
242 if !full_path.is_file() {
243 bail!(
244 "Path '{}' is not a regular file. \
245 The content filter only works with files, not directories or special files.",
246 path_str
247 );
248 }
249
250 // Canonicalize to get absolute path and verify it's still within project_dir
251 let canonical_path = full_path.canonicalize().with_file_context(
252 FileOperation::Canonicalize,
253 &full_path,
254 "resolving absolute path for security validation in content filter",
255 "content_filter",
256 )?;
257
258 let canonical_project = project_dir.canonicalize().with_file_context(
259 FileOperation::Canonicalize,
260 project_dir,
261 "resolving project directory for security validation in content filter",
262 "content_filter",
263 )?;
264
265 // Final security check: ensure canonical path is within project directory
266 if !canonical_path.starts_with(&canonical_project) {
267 bail!(
268 "Security violation: Path '{}' resolves to '{}' which is outside project directory '{}'",
269 path_str,
270 canonical_path.display(),
271 canonical_project.display()
272 );
273 }
274
275 // Check file size if limit is specified
276 if let Some(max_bytes) = max_size {
277 let metadata = canonical_path.metadata().with_file_context(
278 FileOperation::Metadata,
279 &canonical_path,
280 "checking file size in content filter",
281 "content_filter",
282 )?;
283
284 let file_size = metadata.len();
285 if file_size > max_bytes {
286 bail!(
287 "File '{}' is too large ({} bytes). Maximum allowed size: {} bytes ({:.2} MB vs {:.2} MB limit).",
288 path_str,
289 file_size,
290 max_bytes,
291 file_size as f64 / (LARGE_FILE_SIZE as f64),
292 max_bytes as f64 / (LARGE_FILE_SIZE as f64)
293 );
294 }
295 }
296
297 Ok(canonical_path)
298}
299
300/// Reads and processes a project file based on its type.
301///
302/// This function handles different file types appropriately:
303/// - Markdown: Strips YAML/TOML frontmatter
304/// - JSON: Parses and pretty-prints
305/// - Other text files: Returns raw content
306///
307/// # Arguments
308///
309/// * `file_path` - Validated absolute path to the file
310///
311/// # Returns
312///
313/// Returns the processed file content as a string.
314///
315/// # Errors
316///
317/// Returns an error if:
318/// - File cannot be read (I/O error)
319/// - File contains invalid UTF-8
320/// - JSON file has invalid syntax
321/// - Markdown frontmatter is malformed
322///
323/// # Examples
324///
325/// ```rust,no_run
326/// # use std::path::Path;
327/// # use agpm_cli::templating::filters::read_and_process_content;
328/// # fn example() -> anyhow::Result<()> {
329/// let path = Path::new("/home/user/project/docs/guide.md");
330/// let content = read_and_process_content(path)?;
331/// println!("{}", content);
332/// # Ok(())
333/// # }
334/// ```
335pub fn read_and_process_content(file_path: &Path) -> Result<String> {
336 // Read file content with structured context
337 let content = std::fs::read_to_string(file_path).with_file_context(
338 FileOperation::Read,
339 file_path,
340 format!("reading content for template embedding in '{}'", file_path.display()),
341 "content_filter",
342 )?;
343
344 // Process based on file extension
345 let extension = file_path
346 .extension()
347 .and_then(|ext| ext.to_str())
348 .map(|s| s.to_lowercase())
349 .unwrap_or_default();
350
351 let processed_content = match extension.as_str() {
352 "md" => {
353 // Markdown: strip frontmatter
354 match crate::markdown::MarkdownDocument::parse(&content) {
355 Ok(doc) => doc.content,
356 Err(e) => {
357 tracing::warn!(
358 "Failed to parse markdown file '{}': {}. Using raw content.",
359 file_path.display(),
360 e
361 );
362 content
363 }
364 }
365 }
366 "json" => {
367 // JSON: parse and pretty-print
368 match serde_json::from_str::<serde_json::Value>(&content) {
369 Ok(json) => serde_json::to_string_pretty(&json).unwrap_or(content),
370 Err(e) => {
371 tracing::warn!(
372 "Failed to parse JSON file '{}': {}. Using raw content.",
373 file_path.display(),
374 e
375 );
376 content
377 }
378 }
379 }
380 _ => {
381 // Text, TOML, YAML: return raw content
382 content
383 }
384 };
385
386 Ok(processed_content)
387}
388
389/// Creates a Tera filter function for reading and embedding file content.
390///
391/// This function returns a closure that can be registered as a Tera filter.
392/// The closure captures the project directory and uses it to validate and
393/// read files during template rendering.
394///
395/// # Arguments
396///
397/// * `project_dir` - The project root directory for path validation
398///
399/// # Returns
400///
401/// Returns a boxed closure compatible with Tera's filter registration API.
402///
403/// # Filter Usage
404///
405/// In templates, use the filter with a string value containing the relative path:
406///
407/// ```markdown
408/// {{ 'docs/styleguide.md' | content }}
409/// ```
410///
411/// # Errors
412///
413/// The returned filter will produce template rendering errors if:
414/// - The input value is not a string
415/// - Path validation fails (absolute path, traversal, invalid extension, etc.)
416/// - File cannot be read or processed
417///
418/// # Examples
419///
420/// ```rust,no_run
421/// # use std::path::Path;
422/// # use agpm_cli::core::file_error::LARGE_FILE_SIZE;
423/// # use agpm_cli::templating::filters::create_content_filter;
424/// # fn example() -> anyhow::Result<()> {
425/// let project_dir = Path::new("/home/user/project");
426/// let max_size = Some((10 * LARGE_FILE_SIZE) as u64); // 10 MB limit
427/// let filter = create_content_filter(project_dir.to_path_buf(), max_size);
428///
429/// // Filter is registered in Tera:
430/// // tera.register_filter("content", filter);
431/// # Ok(())
432/// # }
433/// ```
434pub fn create_content_filter(
435 project_dir: PathBuf,
436 max_size: Option<u64>,
437) -> impl tera::Filter + 'static {
438 move |value: &tera::Value, _args: &HashMap<String, tera::Value>| -> tera::Result<tera::Value> {
439 // Extract path string from filter input
440 let path_str = value
441 .as_str()
442 .ok_or_else(|| tera::Error::msg("content filter requires a string path"))?;
443
444 // Validate and read the file
445 let file_path = validate_content_path(path_str, &project_dir, max_size)
446 .map_err(|e| tera::Error::msg(format!("content filter error: {}", e)))?;
447
448 let content = read_and_process_content(&file_path)
449 .map_err(|e| tera::Error::msg(format!("content filter error: {}", e)))?;
450
451 // Return content as string value
452 Ok(tera::Value::String(content))
453 }
454}
455
456#[cfg(test)]
457mod tests {
458 use super::*;
459 use std::fs;
460 use tempfile::TempDir;
461
462 fn create_test_project() -> TempDir {
463 let temp = TempDir::new().unwrap();
464 let project_dir = temp.path();
465
466 // Create directory structure
467 fs::create_dir_all(project_dir.join("docs")).unwrap();
468 fs::create_dir_all(project_dir.join("project")).unwrap();
469
470 // Create test files
471 fs::write(project_dir.join("docs/guide.md"), "# Guide\n\nContent here").unwrap();
472 fs::write(project_dir.join("docs/notes.txt"), "Plain text notes").unwrap();
473 fs::write(project_dir.join("project/config.json"), r#"{"key": "value"}"#).unwrap();
474
475 // Create markdown with frontmatter
476 fs::write(
477 project_dir.join("docs/with-frontmatter.md"),
478 "---\ntitle: Test\n---\n\n# Content",
479 )
480 .unwrap();
481
482 temp
483 }
484
485 #[test]
486 fn test_validate_valid_path() -> Result<(), Box<dyn std::error::Error>> {
487 let temp = create_test_project();
488 let project_dir = temp.path();
489
490 let path = validate_content_path("docs/guide.md", project_dir, None)?;
491 assert!(path.ends_with("docs/guide.md"));
492 assert!(path.is_absolute());
493 Ok(())
494 }
495
496 #[test]
497 fn test_validate_rejects_absolute_path() {
498 let temp = create_test_project();
499 let project_dir = temp.path();
500
501 // Use platform-specific absolute paths
502 #[cfg(windows)]
503 let absolute_path = "C:\\Windows\\System32\\config";
504 #[cfg(not(windows))]
505 let absolute_path = "/etc/passwd";
506
507 let result = validate_content_path(absolute_path, project_dir, None);
508 assert!(result.is_err());
509 assert!(result.unwrap_err().to_string().contains("Absolute paths"));
510 }
511
512 #[test]
513 fn test_validate_rejects_traversal() {
514 let temp = create_test_project();
515 let project_dir = temp.path();
516
517 let result = validate_content_path("../../etc/passwd", project_dir, None);
518 assert!(result.is_err());
519 assert!(result.unwrap_err().to_string().contains("traversal"));
520 }
521
522 #[test]
523 fn test_validate_rejects_invalid_extension() {
524 let temp = create_test_project();
525 let project_dir = temp.path();
526
527 // Create a .sh file
528 fs::write(project_dir.join("script.sh"), "#!/bin/bash").unwrap();
529
530 let result = validate_content_path("script.sh", project_dir, None);
531 assert!(result.is_err());
532 assert!(result.unwrap_err().to_string().contains("not allowed"));
533 }
534
535 #[test]
536 fn test_validate_rejects_missing_file() {
537 let temp = create_test_project();
538 let project_dir = temp.path();
539
540 let result = validate_content_path("docs/missing.md", project_dir, None);
541 assert!(result.is_err());
542 assert!(result.unwrap_err().to_string().contains("not found"));
543 }
544
545 #[test]
546 fn test_validate_rejects_file_too_large() -> Result<(), Box<dyn std::error::Error>> {
547 let temp = create_test_project();
548 let project_dir = temp.path();
549
550 // Create a file with known size (1000 bytes)
551 let large_file = project_dir.join("large.md");
552 fs::write(&large_file, "a".repeat(1000)).unwrap();
553
554 // Should succeed with larger limit
555 validate_content_path("large.md", project_dir, Some(1001))?;
556
557 // Should fail with smaller limit
558 let result = validate_content_path("large.md", project_dir, Some(999));
559 assert!(result.is_err());
560 let err_msg = result.unwrap_err().to_string();
561 assert!(err_msg.contains("too large"));
562 assert!(err_msg.contains("1000 bytes"));
563 assert!(err_msg.contains("999 bytes"));
564 Ok(())
565 }
566
567 #[test]
568 fn test_read_markdown_strips_frontmatter() {
569 let temp = create_test_project();
570 let project_dir = temp.path();
571
572 let path = project_dir.join("docs/with-frontmatter.md");
573 let content = read_and_process_content(&path).unwrap();
574
575 assert!(!content.contains("---"));
576 assert!(!content.contains("title: Test"));
577 assert!(content.contains("# Content"));
578 }
579
580 #[test]
581 fn test_read_json_pretty_prints() {
582 let temp = create_test_project();
583 let project_dir = temp.path();
584
585 let path = project_dir.join("project/config.json");
586 let content = read_and_process_content(&path).unwrap();
587
588 // Should be pretty-printed (contains newlines)
589 assert!(content.contains('\n'));
590 assert!(content.contains("\"key\""));
591 assert!(content.contains("\"value\""));
592 }
593
594 #[test]
595 fn test_read_text_returns_raw() {
596 let temp = create_test_project();
597 let project_dir = temp.path();
598
599 let path = project_dir.join("docs/notes.txt");
600 let content = read_and_process_content(&path).unwrap();
601
602 assert_eq!(content, "Plain text notes");
603 }
604
605 #[test]
606 fn test_filter_function() {
607 use tera::Tera;
608
609 let temp = create_test_project();
610 let project_dir = temp.path().to_path_buf();
611
612 // Register the filter in a Tera instance
613 let mut tera = Tera::default();
614 tera.register_filter("content", create_content_filter(project_dir, None));
615
616 // Test with valid path using Tera's template rendering
617 let template = r#"{{ 'docs/guide.md' | content }}"#;
618 let context = tera::Context::new();
619
620 let result = tera.render_str(template, &context);
621 assert!(result.is_ok(), "Filter should render successfully");
622
623 let content = result.unwrap();
624 assert!(content.contains("# Guide"));
625 assert!(content.contains("Content here"));
626 }
627
628 #[test]
629 fn test_filter_rejects_non_string() {
630 use tera::Tera;
631
632 let temp = create_test_project();
633 let project_dir = temp.path().to_path_buf();
634
635 // Register the filter in a Tera instance
636 let mut tera = Tera::default();
637 tera.register_filter("content", create_content_filter(project_dir, None));
638
639 // Test with number instead of string (this will be caught at template render time)
640 let template = r#"{{ 42 | content }}"#;
641 let context = tera::Context::new();
642
643 let result = tera.render_str(template, &context);
644 // The important thing is that it fails - Tera may wrap our error message
645 assert!(result.is_err(), "Filter should reject non-string values");
646 }
647
648 #[test]
649 fn test_recursive_template_rendering() {
650 // This test is in the templating module tests
651 // See test_recursive_content_rendering in mod.rs
652 }
653}
654
655// Integration tests for recursive rendering have been removed since multi-pass rendering was
656// removed in v0.5.0. The content filter now returns literal content without template processing.
657// If you need template processing, make files into AGPM dependencies instead.