agpm_cli/templating/
filters.rs

1//! Custom Tera filters for AGPM templates.
2//!
3//! This module provides template filters that extend Tera's functionality for
4//! AGPM-specific use cases, such as reading project files, content manipulation,
5//! and other template operations.
6//!
7//! # Security
8//!
9//! All file access is restricted to the project directory with the following protections:
10//! - Only relative paths are allowed (no absolute paths)
11//! - Directory traversal outside project root is prevented
12//! - Only text file types are permitted (.md, .txt, .json, .toml, .yaml)
13//! - Missing files produce hard errors to fail fast
14//!
15//! # Supported File Types
16//!
17//! - **Markdown (.md)**: YAML/TOML frontmatter is automatically stripped
18//! - **JSON (.json)**: Parsed and pretty-printed
19//! - **Text (.txt)**: Raw content
20//! - **TOML (.toml)**: Raw content
21//! - **YAML (.yaml, .yml)**: Raw content
22//!
23//! # Examples
24//!
25//! ## Basic File Reading
26//!
27//! ```markdown
28//! ---
29//! agpm.templating: true
30//! ---
31//! # Code Review Agent
32//!
33//! ## Style Guide
34//! {{ 'project/styleguide.md' | content }}
35//!
36//! ## Best Practices
37//! {{ 'docs/best-practices.txt' | content }}
38//! ```
39//!
40//! ## Combining with Dependency Content Embedding
41//!
42//! Use both `content` filter and dependency `.content` fields together:
43//!
44//! ```markdown
45//! ---
46//! agpm.templating: true
47//! dependencies:
48//!   snippets:
49//!     - path: snippets/rust-patterns.md
50//!       name: rust_patterns
51//! ---
52//! # Rust Code Reviewer
53//!
54//! ## Shared Rust Patterns (versioned, from AGPM)
55//! {{ agpm.deps.snippets.rust_patterns.content }}
56//!
57//! ## Project-Specific Style Guide (local)
58//! {{ 'project/rust-style.md' | content }}
59//! ```
60//!
61//! **When to use each**:
62//! - **`agpm.deps.<type>.<name>.content`**: Versioned content from AGPM repositories
63//! - **`content` filter**: Project-local files (team docs, company standards)
64//!
65//! ## Recursive Templates
66//!
67//! Project files can themselves contain template syntax:
68//!
69//! **project/styleguide.md**:
70//! ```markdown
71//! # Coding Standards
72//!
73//! ## Rust-Specific Rules
74//! {{ 'project/rust-style.md' | content }}
75//!
76//! ## Common Guidelines
77//! {{ 'project/common-style.md' | content }}
78//! ```
79//!
80//! The template system will render up to 10 levels of nested references.
81
82use anyhow::{Result, bail};
83use std::collections::HashMap;
84use std::path::{Component, Path, PathBuf};
85
86use crate::core::file_error::{FileOperation, FileResultExt, LARGE_FILE_SIZE};
87
88/// Allowed file extensions for project file access.
89///
90/// Only text-based formats are permitted to prevent binary file inclusion
91/// and ensure content can be safely embedded in templates.
92const ALLOWED_EXTENSIONS: &[&str] = &["md", "txt", "json", "toml", "yaml", "yml"];
93
94/// Maximum nesting depth for recursive template rendering.
95///
96/// This prevents infinite loops and excessive memory usage when files
97/// reference each other cyclically or create deep nesting chains.
98pub const MAX_RENDER_DEPTH: usize = 10;
99
100/// Validates a file path for security and correctness.
101///
102/// This function ensures that:
103/// 1. The path is relative (not absolute)
104/// 2. The path doesn't traverse outside the project directory using `..`
105/// 3. The file extension is in the allowed list
106/// 4. The file exists and is readable
107/// 5. The file size doesn't exceed the maximum allowed
108///
109/// # Arguments
110///
111/// * `path_str` - The path string from the template
112/// * `project_dir` - The project root directory
113/// * `max_size` - Maximum file size in bytes (None for no limit)
114///
115/// # Returns
116///
117/// Returns the canonicalized absolute path to the file if all checks pass.
118///
119/// # Errors
120///
121/// Returns an error if:
122/// - Path is absolute
123/// - Path contains `..` components that escape project directory
124/// - File extension is not in the allowed list
125/// - File doesn't exist
126/// - File is not accessible (permissions, etc.)
127/// - File size exceeds the maximum allowed
128///
129/// # Security
130///
131/// This function is critical for preventing directory traversal attacks.
132/// It validates paths before any file system access occurs.
133///
134/// # Examples
135///
136/// ```rust,no_run
137/// # use std::path::Path;
138/// # use agpm_cli::templating::filters::validate_content_path;
139/// # fn example() -> anyhow::Result<()> {
140/// let project_dir = Path::new("/home/user/project");
141///
142/// // Valid relative path with no size limit
143/// let path = validate_content_path("docs/guide.md", project_dir, None)?;
144///
145/// // With size limit (1 MB)
146/// let path = validate_content_path("docs/guide.md", project_dir, Some(1024 * 1024))?;
147///
148/// // Invalid: absolute path
149/// let result = validate_content_path("/etc/passwd", project_dir, None);
150/// assert!(result.is_err());
151///
152/// // Invalid: directory traversal
153/// let result = validate_content_path("../../etc/passwd", project_dir, None);
154/// assert!(result.is_err());
155///
156/// // Invalid: wrong extension
157/// let result = validate_content_path("script.sh", project_dir, None);
158/// assert!(result.is_err());
159/// # Ok(())
160/// # }
161/// ```
162pub fn validate_content_path(
163    path_str: &str,
164    project_dir: &Path,
165    max_size: Option<u64>,
166) -> Result<PathBuf> {
167    // Parse the path
168    let path = Path::new(path_str);
169
170    // Reject absolute paths
171    if path.is_absolute() {
172        bail!(
173            "Absolute paths are not allowed in content filter. \
174             Path '{}' must be relative to project root.",
175            path_str
176        );
177    }
178
179    // Check for directory traversal attempts
180    // We need to resolve the path and ensure it stays within project_dir
181    let mut components_count: i32 = 0;
182    for component in path.components() {
183        match component {
184            Component::Normal(_) => components_count += 1,
185            Component::ParentDir => {
186                components_count -= 1;
187                // If we go negative, we're trying to escape the project directory
188                if components_count < 0 {
189                    bail!(
190                        "Path traversal outside project directory is not allowed. \
191                         Path '{}' attempts to access parent directories beyond project root.",
192                        path_str
193                    );
194                }
195            }
196            Component::CurDir => {
197                // `.` is fine, just ignore it
198            }
199            _ => {
200                // Prefix, RootDir shouldn't appear in relative paths
201                bail!("Invalid path component in '{}'. Only relative paths are allowed.", path_str);
202            }
203        }
204    }
205
206    // Validate file extension
207    let extension = path.extension().and_then(|ext| ext.to_str()).ok_or_else(|| {
208        anyhow::anyhow!(
209            "File '{}' has no extension. Allowed extensions: {}",
210            path_str,
211            ALLOWED_EXTENSIONS.join(", ")
212        )
213    })?;
214
215    let extension_lower = extension.to_lowercase();
216    if !ALLOWED_EXTENSIONS.contains(&extension_lower.as_str()) {
217        bail!(
218            "File extension '.{}' is not allowed. \
219             Allowed extensions: {}. \
220             Path: '{}'",
221            extension,
222            ALLOWED_EXTENSIONS.join(", "),
223            path_str
224        );
225    }
226
227    // Construct full path relative to project directory
228    let full_path = project_dir.join(path);
229
230    // Check if file exists
231    if !full_path.exists() {
232        bail!(
233            "File not found: '{}'. \
234             The content filter requires files to exist. \
235             Full path attempted: {}",
236            path_str,
237            full_path.display()
238        );
239    }
240
241    // Check if it's a regular file (not a directory or symlink)
242    if !full_path.is_file() {
243        bail!(
244            "Path '{}' is not a regular file. \
245             The content filter only works with files, not directories or special files.",
246            path_str
247        );
248    }
249
250    // Canonicalize to get absolute path and verify it's still within project_dir
251    let canonical_path = full_path.canonicalize().with_file_context(
252        FileOperation::Canonicalize,
253        &full_path,
254        "resolving absolute path for security validation in content filter",
255        "content_filter",
256    )?;
257
258    let canonical_project = project_dir.canonicalize().with_file_context(
259        FileOperation::Canonicalize,
260        project_dir,
261        "resolving project directory for security validation in content filter",
262        "content_filter",
263    )?;
264
265    // Final security check: ensure canonical path is within project directory
266    if !canonical_path.starts_with(&canonical_project) {
267        bail!(
268            "Security violation: Path '{}' resolves to '{}' which is outside project directory '{}'",
269            path_str,
270            canonical_path.display(),
271            canonical_project.display()
272        );
273    }
274
275    // Check file size if limit is specified
276    if let Some(max_bytes) = max_size {
277        let metadata = canonical_path.metadata().with_file_context(
278            FileOperation::Metadata,
279            &canonical_path,
280            "checking file size in content filter",
281            "content_filter",
282        )?;
283
284        let file_size = metadata.len();
285        if file_size > max_bytes {
286            bail!(
287                "File '{}' is too large ({} bytes). Maximum allowed size: {} bytes ({:.2} MB vs {:.2} MB limit).",
288                path_str,
289                file_size,
290                max_bytes,
291                file_size as f64 / (LARGE_FILE_SIZE as f64),
292                max_bytes as f64 / (LARGE_FILE_SIZE as f64)
293            );
294        }
295    }
296
297    Ok(canonical_path)
298}
299
300/// Reads and processes a project file based on its type.
301///
302/// This function handles different file types appropriately:
303/// - Markdown: Strips YAML/TOML frontmatter
304/// - JSON: Parses and pretty-prints
305/// - Other text files: Returns raw content
306///
307/// # Arguments
308///
309/// * `file_path` - Validated absolute path to the file
310///
311/// # Returns
312///
313/// Returns the processed file content as a string.
314///
315/// # Errors
316///
317/// Returns an error if:
318/// - File cannot be read (I/O error)
319/// - File contains invalid UTF-8
320/// - JSON file has invalid syntax
321/// - Markdown frontmatter is malformed
322///
323/// # Examples
324///
325/// ```rust,no_run
326/// # use std::path::Path;
327/// # use agpm_cli::templating::filters::read_and_process_content;
328/// # fn example() -> anyhow::Result<()> {
329/// let path = Path::new("/home/user/project/docs/guide.md");
330/// let content = read_and_process_content(path)?;
331/// println!("{}", content);
332/// # Ok(())
333/// # }
334/// ```
335pub fn read_and_process_content(file_path: &Path) -> Result<String> {
336    // Read file content with structured context
337    let content = std::fs::read_to_string(file_path).with_file_context(
338        FileOperation::Read,
339        file_path,
340        format!("reading content for template embedding in '{}'", file_path.display()),
341        "content_filter",
342    )?;
343
344    // Process based on file extension
345    let extension = file_path
346        .extension()
347        .and_then(|ext| ext.to_str())
348        .map(|s| s.to_lowercase())
349        .unwrap_or_default();
350
351    let processed_content = match extension.as_str() {
352        "md" => {
353            // Markdown: strip frontmatter
354            match crate::markdown::MarkdownDocument::parse(&content) {
355                Ok(doc) => doc.content,
356                Err(e) => {
357                    tracing::warn!(
358                        "Failed to parse markdown file '{}': {}. Using raw content.",
359                        file_path.display(),
360                        e
361                    );
362                    content
363                }
364            }
365        }
366        "json" => {
367            // JSON: parse and pretty-print
368            match serde_json::from_str::<serde_json::Value>(&content) {
369                Ok(json) => serde_json::to_string_pretty(&json).unwrap_or(content),
370                Err(e) => {
371                    tracing::warn!(
372                        "Failed to parse JSON file '{}': {}. Using raw content.",
373                        file_path.display(),
374                        e
375                    );
376                    content
377                }
378            }
379        }
380        _ => {
381            // Text, TOML, YAML: return raw content
382            content
383        }
384    };
385
386    Ok(processed_content)
387}
388
389/// Creates a Tera filter function for reading and embedding file content.
390///
391/// This function returns a closure that can be registered as a Tera filter.
392/// The closure captures the project directory and uses it to validate and
393/// read files during template rendering.
394///
395/// # Arguments
396///
397/// * `project_dir` - The project root directory for path validation
398///
399/// # Returns
400///
401/// Returns a boxed closure compatible with Tera's filter registration API.
402///
403/// # Filter Usage
404///
405/// In templates, use the filter with a string value containing the relative path:
406///
407/// ```markdown
408/// {{ 'docs/styleguide.md' | content }}
409/// ```
410///
411/// # Errors
412///
413/// The returned filter will produce template rendering errors if:
414/// - The input value is not a string
415/// - Path validation fails (absolute path, traversal, invalid extension, etc.)
416/// - File cannot be read or processed
417///
418/// # Examples
419///
420/// ```rust,no_run
421/// # use std::path::Path;
422/// # use agpm_cli::core::file_error::LARGE_FILE_SIZE;
423/// # use agpm_cli::templating::filters::create_content_filter;
424/// # fn example() -> anyhow::Result<()> {
425/// let project_dir = Path::new("/home/user/project");
426/// let max_size = Some((10 * LARGE_FILE_SIZE) as u64); // 10 MB limit
427/// let filter = create_content_filter(project_dir.to_path_buf(), max_size);
428///
429/// // Filter is registered in Tera:
430/// // tera.register_filter("content", filter);
431/// # Ok(())
432/// # }
433/// ```
434pub fn create_content_filter(
435    project_dir: PathBuf,
436    max_size: Option<u64>,
437) -> impl tera::Filter + 'static {
438    move |value: &tera::Value, _args: &HashMap<String, tera::Value>| -> tera::Result<tera::Value> {
439        // Extract path string from filter input
440        let path_str = value
441            .as_str()
442            .ok_or_else(|| tera::Error::msg("content filter requires a string path"))?;
443
444        // Validate and read the file
445        let file_path = validate_content_path(path_str, &project_dir, max_size)
446            .map_err(|e| tera::Error::msg(format!("content filter error: {}", e)))?;
447
448        let content = read_and_process_content(&file_path)
449            .map_err(|e| tera::Error::msg(format!("content filter error: {}", e)))?;
450
451        // Return content as string value
452        Ok(tera::Value::String(content))
453    }
454}
455
456#[cfg(test)]
457mod tests {
458    use super::*;
459    use std::fs;
460    use tempfile::TempDir;
461
462    fn create_test_project() -> TempDir {
463        let temp = TempDir::new().unwrap();
464        let project_dir = temp.path();
465
466        // Create directory structure
467        fs::create_dir_all(project_dir.join("docs")).unwrap();
468        fs::create_dir_all(project_dir.join("project")).unwrap();
469
470        // Create test files
471        fs::write(project_dir.join("docs/guide.md"), "# Guide\n\nContent here").unwrap();
472        fs::write(project_dir.join("docs/notes.txt"), "Plain text notes").unwrap();
473        fs::write(project_dir.join("project/config.json"), r#"{"key": "value"}"#).unwrap();
474
475        // Create markdown with frontmatter
476        fs::write(
477            project_dir.join("docs/with-frontmatter.md"),
478            "---\ntitle: Test\n---\n\n# Content",
479        )
480        .unwrap();
481
482        temp
483    }
484
485    #[test]
486    fn test_validate_valid_path() -> Result<(), Box<dyn std::error::Error>> {
487        let temp = create_test_project();
488        let project_dir = temp.path();
489
490        let path = validate_content_path("docs/guide.md", project_dir, None)?;
491        assert!(path.ends_with("docs/guide.md"));
492        assert!(path.is_absolute());
493        Ok(())
494    }
495
496    #[test]
497    fn test_validate_rejects_absolute_path() {
498        let temp = create_test_project();
499        let project_dir = temp.path();
500
501        // Use platform-specific absolute paths
502        #[cfg(windows)]
503        let absolute_path = "C:\\Windows\\System32\\config";
504        #[cfg(not(windows))]
505        let absolute_path = "/etc/passwd";
506
507        let result = validate_content_path(absolute_path, project_dir, None);
508        assert!(result.is_err());
509        assert!(result.unwrap_err().to_string().contains("Absolute paths"));
510    }
511
512    #[test]
513    fn test_validate_rejects_traversal() {
514        let temp = create_test_project();
515        let project_dir = temp.path();
516
517        let result = validate_content_path("../../etc/passwd", project_dir, None);
518        assert!(result.is_err());
519        assert!(result.unwrap_err().to_string().contains("traversal"));
520    }
521
522    #[test]
523    fn test_validate_rejects_invalid_extension() {
524        let temp = create_test_project();
525        let project_dir = temp.path();
526
527        // Create a .sh file
528        fs::write(project_dir.join("script.sh"), "#!/bin/bash").unwrap();
529
530        let result = validate_content_path("script.sh", project_dir, None);
531        assert!(result.is_err());
532        assert!(result.unwrap_err().to_string().contains("not allowed"));
533    }
534
535    #[test]
536    fn test_validate_rejects_missing_file() {
537        let temp = create_test_project();
538        let project_dir = temp.path();
539
540        let result = validate_content_path("docs/missing.md", project_dir, None);
541        assert!(result.is_err());
542        assert!(result.unwrap_err().to_string().contains("not found"));
543    }
544
545    #[test]
546    fn test_validate_rejects_file_too_large() -> Result<(), Box<dyn std::error::Error>> {
547        let temp = create_test_project();
548        let project_dir = temp.path();
549
550        // Create a file with known size (1000 bytes)
551        let large_file = project_dir.join("large.md");
552        fs::write(&large_file, "a".repeat(1000)).unwrap();
553
554        // Should succeed with larger limit
555        validate_content_path("large.md", project_dir, Some(1001))?;
556
557        // Should fail with smaller limit
558        let result = validate_content_path("large.md", project_dir, Some(999));
559        assert!(result.is_err());
560        let err_msg = result.unwrap_err().to_string();
561        assert!(err_msg.contains("too large"));
562        assert!(err_msg.contains("1000 bytes"));
563        assert!(err_msg.contains("999 bytes"));
564        Ok(())
565    }
566
567    #[test]
568    fn test_read_markdown_strips_frontmatter() {
569        let temp = create_test_project();
570        let project_dir = temp.path();
571
572        let path = project_dir.join("docs/with-frontmatter.md");
573        let content = read_and_process_content(&path).unwrap();
574
575        assert!(!content.contains("---"));
576        assert!(!content.contains("title: Test"));
577        assert!(content.contains("# Content"));
578    }
579
580    #[test]
581    fn test_read_json_pretty_prints() {
582        let temp = create_test_project();
583        let project_dir = temp.path();
584
585        let path = project_dir.join("project/config.json");
586        let content = read_and_process_content(&path).unwrap();
587
588        // Should be pretty-printed (contains newlines)
589        assert!(content.contains('\n'));
590        assert!(content.contains("\"key\""));
591        assert!(content.contains("\"value\""));
592    }
593
594    #[test]
595    fn test_read_text_returns_raw() {
596        let temp = create_test_project();
597        let project_dir = temp.path();
598
599        let path = project_dir.join("docs/notes.txt");
600        let content = read_and_process_content(&path).unwrap();
601
602        assert_eq!(content, "Plain text notes");
603    }
604
605    #[test]
606    fn test_filter_function() {
607        use tera::Tera;
608
609        let temp = create_test_project();
610        let project_dir = temp.path().to_path_buf();
611
612        // Register the filter in a Tera instance
613        let mut tera = Tera::default();
614        tera.register_filter("content", create_content_filter(project_dir, None));
615
616        // Test with valid path using Tera's template rendering
617        let template = r#"{{ 'docs/guide.md' | content }}"#;
618        let context = tera::Context::new();
619
620        let result = tera.render_str(template, &context);
621        assert!(result.is_ok(), "Filter should render successfully");
622
623        let content = result.unwrap();
624        assert!(content.contains("# Guide"));
625        assert!(content.contains("Content here"));
626    }
627
628    #[test]
629    fn test_filter_rejects_non_string() {
630        use tera::Tera;
631
632        let temp = create_test_project();
633        let project_dir = temp.path().to_path_buf();
634
635        // Register the filter in a Tera instance
636        let mut tera = Tera::default();
637        tera.register_filter("content", create_content_filter(project_dir, None));
638
639        // Test with number instead of string (this will be caught at template render time)
640        let template = r#"{{ 42 | content }}"#;
641        let context = tera::Context::new();
642
643        let result = tera.render_str(template, &context);
644        // The important thing is that it fails - Tera may wrap our error message
645        assert!(result.is_err(), "Filter should reject non-string values");
646    }
647
648    #[test]
649    fn test_recursive_template_rendering() {
650        // This test is in the templating module tests
651        // See test_recursive_content_rendering in mod.rs
652    }
653}
654
655// Integration tests for recursive rendering have been removed since multi-pass rendering was
656// removed in v0.5.0. The content filter now returns literal content without template processing.
657// If you need template processing, make files into AGPM dependencies instead.