Skip to main content

humanize_cli_core/
fs.rs

1//! File system operations for Humanize.
2//!
3//! This module provides safe file operations with path validation
4//! to prevent security issues like path traversal and symlink attacks.
5
6use std::path::{Component, Path, PathBuf};
7
8use crate::constants::MAX_JSON_DEPTH;
9
10/// Errors that can occur during file operations.
11#[derive(Debug, thiserror::Error)]
12pub enum FsError {
13    #[error("Path is not relative: {0}")]
14    NotRelative(String),
15
16    #[error("Path contains parent directory traversal: {0}")]
17    PathTraversal(String),
18
19    #[error("Path is absolute: {0}")]
20    AbsolutePath(String),
21
22    #[error("Symlink not allowed: {0}")]
23    SymlinkNotAllowed(String),
24
25    #[error("Path points outside repository: {0}")]
26    OutsideRepository(String),
27
28    #[error("Invalid UTF-8 in path")]
29    InvalidUtf8,
30
31    #[error("Null byte in path")]
32    NullByte,
33
34    #[error("IO error: {0}")]
35    IoError(String),
36}
37
38impl From<std::io::Error> for FsError {
39    fn from(e: std::io::Error) -> Self {
40        FsError::IoError(e.to_string())
41    }
42}
43
44/// Options for path validation.
45#[derive(Debug, Clone)]
46pub struct PathValidationOptions {
47    /// Whether symlinks are allowed.
48    pub allow_symlinks: bool,
49    /// Whether absolute paths are allowed.
50    pub allow_absolute: bool,
51    /// Whether parent directory traversal (..) is allowed.
52    pub allow_parent_traversal: bool,
53    /// Repository root for boundary checking.
54    pub repo_root: Option<PathBuf>,
55}
56
57impl Default for PathValidationOptions {
58    fn default() -> Self {
59        Self {
60            allow_symlinks: false,
61            allow_absolute: false,
62            allow_parent_traversal: false,
63            repo_root: None,
64        }
65    }
66}
67
68/// Validate a path for security.
69pub fn validate_path(path: &str, options: &PathValidationOptions) -> Result<PathBuf, FsError> {
70    // Check for null bytes
71    if path.contains('\0') {
72        return Err(FsError::NullByte);
73    }
74
75    // Check for valid UTF-8 (already guaranteed by &str)
76
77    let parsed = PathBuf::from(path);
78
79    // Check for absolute paths
80    if parsed.is_absolute() && !options.allow_absolute {
81        return Err(FsError::AbsolutePath(path.to_string()));
82    }
83
84    // Check for parent directory traversal
85    if !options.allow_parent_traversal {
86        for component in parsed.components() {
87            if matches!(component, Component::ParentDir) {
88                return Err(FsError::PathTraversal(path.to_string()));
89            }
90        }
91    }
92
93    // Check symlinks if we have a repo root
94    if let Some(ref repo_root) = options.repo_root {
95        let full_path = repo_root.join(&parsed);
96        let canonical_repo_root = canonicalize_for_boundary_check(repo_root);
97
98        // Check if path resolves outside repo (symlink check)
99        if full_path.exists() {
100            if let Ok(canonical) = full_path.canonicalize() {
101                if !canonical.starts_with(&canonical_repo_root) {
102                    return Err(FsError::OutsideRepository(path.to_string()));
103                }
104            }
105        }
106    }
107
108    Ok(parsed)
109}
110
111fn canonicalize_for_boundary_check(path: &Path) -> PathBuf {
112    path.canonicalize().unwrap_or_else(|_| path.to_path_buf())
113}
114
115/// Validate JSON depth to prevent DoS attacks.
116pub fn validate_json_depth(json: &str) -> Result<(), FsError> {
117    let mut depth: usize = 0;
118    let mut max_depth: usize = 0;
119    let mut in_string = false;
120    let mut escape_next = false;
121
122    for ch in json.chars() {
123        if escape_next {
124            escape_next = false;
125            continue;
126        }
127
128        match ch {
129            '\\' if in_string => escape_next = true,
130            '"' => in_string = !in_string,
131            '{' | '[' if !in_string => {
132                depth += 1;
133                max_depth = max_depth.max(depth);
134                if depth > MAX_JSON_DEPTH {
135                    return Err(FsError::IoError(format!(
136                        "JSON nesting depth exceeds maximum ({})",
137                        MAX_JSON_DEPTH
138                    )));
139                }
140            }
141            '}' | ']' if !in_string => {
142                depth = depth.saturating_sub(1);
143            }
144            _ => {}
145        }
146    }
147
148    Ok(())
149}
150
151/// Check if a path is a round-specific file that should be blocked from reading.
152///
153/// Matches pattern: round-N-*.md where N is a number.
154pub fn is_round_specific_file(path: &str) -> bool {
155    let path_lower = path.to_lowercase();
156    let filename = path_lower.rsplit('/').next().unwrap_or(&path_lower);
157
158    // Must start with "round-" and end with ".md"
159    if !filename.starts_with("round-") || !filename.ends_with(".md") {
160        return false;
161    }
162
163    // Check if it has a number after "round-"
164    let rest = &filename[6..]; // skip "round-"
165    rest.chars().next().map_or(false, |c| c.is_ascii_digit())
166}
167
168/// Check if a path is a protected state file that should be blocked from writing.
169pub fn is_protected_state_file(path: &str) -> bool {
170    let path_lower = path.to_lowercase();
171
172    // Check for state.md in .humanize/rlcr/*/ or .humanize/pr-loop/*/
173    if path_lower.contains(".humanize/rlcr/") || path_lower.contains(".humanize/pr-loop/") {
174        if path_lower.ends_with("/state.md") {
175            return true;
176        }
177    }
178
179    false
180}
181
182/// Round file types that have special handling in validators.
183#[derive(Debug, Clone, Copy, PartialEq, Eq)]
184pub enum RoundFileType {
185    /// Summary file (round-N-summary.md)
186    Summary,
187    /// Prompt file (round-N-prompt.md)
188    Prompt,
189    /// Todos file (round-N-todos.md)
190    Todos,
191}
192
193/// Check if a path is a specific round file type.
194///
195/// Matches the pattern: round-N-<type>.md where N is a number.
196pub fn is_round_file_type(path: &str, file_type: RoundFileType) -> bool {
197    let path_lower = path.to_lowercase();
198    let type_str = match file_type {
199        RoundFileType::Summary => "summary",
200        RoundFileType::Prompt => "prompt",
201        RoundFileType::Todos => "todos",
202    };
203
204    // Extract filename
205    let filename = path_lower.rsplit('/').next().unwrap_or(&path_lower);
206
207    // Must start with "round-"
208    if !filename.starts_with("round-") {
209        return false;
210    }
211
212    // Must end with -<type>.md
213    let suffix = format!("-{}.md", type_str);
214    if !filename.ends_with(&suffix) {
215        return false;
216    }
217
218    // Extract the part between "round-" and "-<type>.md"
219    let rest = &filename[6..]; // skip "round-"
220    if let Some(num_part) = rest.strip_suffix(&suffix) {
221        // Must be all digits
222        return num_part.chars().all(|c| c.is_ascii_digit()) && !num_part.is_empty();
223    }
224
225    false
226}
227
228/// Check if a path is any round file (summary, prompt, or todos).
229pub fn is_any_round_file(path: &str) -> bool {
230    is_round_file_type(path, RoundFileType::Summary)
231        || is_round_file_type(path, RoundFileType::Prompt)
232        || is_round_file_type(path, RoundFileType::Todos)
233}
234
235/// Check if a path is inside .humanize/rlcr/ or .humanize/pr-loop/.
236pub fn is_in_humanize_loop_dir(path: &str) -> bool {
237    let path_lower = path.to_lowercase();
238    path_lower.contains(".humanize/rlcr/") || path_lower.contains(".humanize/pr-loop/")
239}
240
241/// Extract round number from a round filename.
242///
243/// Returns None if the filename doesn't match round-N-*.md pattern.
244pub fn extract_round_number(filename: &str) -> Option<u32> {
245    let filename_lower = filename.to_lowercase();
246    let filename_only = filename_lower.rsplit('/').next().unwrap_or(&filename_lower);
247
248    if !filename_only.starts_with("round-") || !filename_only.ends_with(".md") {
249        return None;
250    }
251
252    // Extract "N" from "round-N-*.md"
253    let rest = &filename_only[6..filename_only.len() - 3]; // skip "round-" and ".md"
254
255    // Find the first dash after the number
256    let num_end = rest.find('-')?;
257    let num_str = &rest[..num_end];
258
259    num_str.parse().ok()
260}
261
262/// Check if a file path is allowlisted for reading during a loop.
263///
264/// A file is allowlisted if:
265/// - It's the current round's summary/prompt file
266/// - It's a historical summary file (for context)
267/// - It's in the active loop directory and matches current round
268pub fn is_allowlisted_file(
269    file_path: &str,
270    loop_dir: &std::path::Path,
271    current_round: u32,
272) -> bool {
273    let file_path_lower = file_path.to_lowercase();
274    let loop_dir_str = loop_dir.to_string_lossy().to_lowercase();
275
276    // Must be in the loop directory
277    if !file_path_lower.starts_with(&loop_dir_str) {
278        return false;
279    }
280
281    // Extract filename
282    let filename = file_path_lower
283        .rsplit('/')
284        .next()
285        .unwrap_or(&file_path_lower);
286
287    // Allow current round's summary and prompt
288    let current_summary = format!("round-{}-summary.md", current_round);
289    let current_prompt = format!("round-{}-prompt.md", current_round);
290
291    if filename == current_summary || filename == current_prompt {
292        return true;
293    }
294
295    // Allow historical summaries (round-N-summary.md where N < current_round)
296    if is_round_file_type(file_path, RoundFileType::Summary) {
297        if let Some(round) = extract_round_number(file_path) {
298            if round < current_round {
299                return true;
300            }
301        }
302    }
303
304    false
305}
306
307#[cfg(test)]
308mod tests {
309    use super::*;
310    use std::fs;
311
312    #[test]
313    fn test_validate_path_relative() {
314        let options = PathValidationOptions::default();
315        assert!(validate_path("src/main.rs", &options).is_ok());
316    }
317
318    #[test]
319    fn test_validate_path_absolute_rejected() {
320        let options = PathValidationOptions::default();
321        assert!(matches!(
322            validate_path("/etc/passwd", &options),
323            Err(FsError::AbsolutePath(_))
324        ));
325    }
326
327    #[test]
328    fn test_validate_path_traversal_rejected() {
329        let options = PathValidationOptions::default();
330        assert!(matches!(
331            validate_path("../../../etc/passwd", &options),
332            Err(FsError::PathTraversal(_))
333        ));
334    }
335
336    #[test]
337    fn test_validate_path_accepts_existing_file_with_noncanonical_repo_root() {
338        let tempdir = tempfile::tempdir().unwrap();
339        let repo_root = tempdir.path().join("repo");
340        fs::create_dir_all(repo_root.join("docs")).unwrap();
341        fs::create_dir_all(repo_root.join("work")).unwrap();
342        fs::write(repo_root.join("docs/plan.md"), "plan\n").unwrap();
343
344        let options = PathValidationOptions {
345            repo_root: Some(repo_root.join("work").join("..")),
346            ..PathValidationOptions::default()
347        };
348
349        assert!(validate_path("docs/plan.md", &options).is_ok());
350    }
351
352    #[test]
353    fn test_is_round_specific_file() {
354        assert!(is_round_specific_file("round-1-summary.md"));
355        assert!(is_round_specific_file(
356            ".humanize/rlcr/test/round-2-review-prompt.md"
357        ));
358        assert!(!is_round_specific_file("src/main.rs"));
359        assert!(!is_round_specific_file("roundup.md"));
360    }
361
362    #[test]
363    fn test_is_protected_state_file() {
364        assert!(is_protected_state_file(
365            ".humanize/rlcr/2026-03-17/state.md"
366        ));
367        assert!(is_protected_state_file(
368            ".humanize/pr-loop/2026-03-17/state.md"
369        ));
370        assert!(!is_protected_state_file("docs/state.md"));
371        assert!(!is_protected_state_file(
372            ".humanize/rlcr/2026-03-17/complete-state.md"
373        ));
374    }
375
376    #[test]
377    fn test_json_depth_validation() {
378        assert!(validate_json_depth(r#"{"a": 1}"#).is_ok());
379        assert!(validate_json_depth(r#"[[[[[[]]]]]]"#).is_ok());
380
381        // Create deeply nested JSON
382        let deep_json = "[".repeat(35) + &"]".repeat(35);
383        assert!(validate_json_depth(&deep_json).is_err());
384    }
385
386    #[test]
387    fn test_is_round_file_type() {
388        // Summary files
389        assert!(is_round_file_type(
390            "round-1-summary.md",
391            RoundFileType::Summary
392        ));
393        assert!(is_round_file_type(
394            ".humanize/rlcr/test/round-2-summary.md",
395            RoundFileType::Summary
396        ));
397        assert!(!is_round_file_type(
398            "round-1-prompt.md",
399            RoundFileType::Summary
400        ));
401
402        // Prompt files
403        assert!(is_round_file_type(
404            "round-1-prompt.md",
405            RoundFileType::Prompt
406        ));
407        assert!(!is_round_file_type(
408            "round-1-summary.md",
409            RoundFileType::Prompt
410        ));
411
412        // Todos files
413        assert!(is_round_file_type("round-1-todos.md", RoundFileType::Todos));
414        assert!(!is_round_file_type(
415            "round-1-summary.md",
416            RoundFileType::Todos
417        ));
418
419        // Invalid patterns
420        assert!(!is_round_file_type("roundup.md", RoundFileType::Summary));
421        assert!(!is_round_file_type(
422            "round--summary.md",
423            RoundFileType::Summary
424        ));
425    }
426
427    #[test]
428    fn test_is_in_humanize_loop_dir() {
429        assert!(is_in_humanize_loop_dir(
430            ".humanize/rlcr/2026-03-17/state.md"
431        ));
432        assert!(is_in_humanize_loop_dir(
433            ".humanize/pr-loop/2026-03-17/state.md"
434        ));
435        assert!(!is_in_humanize_loop_dir("src/main.rs"));
436        assert!(!is_in_humanize_loop_dir(".humanize/config.json"));
437    }
438
439    #[test]
440    fn test_extract_round_number() {
441        assert_eq!(extract_round_number("round-1-summary.md"), Some(1));
442        assert_eq!(extract_round_number("round-42-prompt.md"), Some(42));
443        assert_eq!(
444            extract_round_number(".humanize/rlcr/test/round-3-todos.md"),
445            Some(3)
446        );
447        assert_eq!(extract_round_number("roundup.md"), None);
448        assert_eq!(extract_round_number("round--summary.md"), None);
449        assert_eq!(extract_round_number("src/main.rs"), None);
450    }
451}