Skip to main content

humanize_cli_core/
fs.rs

1//! File system operations for Humanize.
2//!
3//! This module provides safe file operations with path validation
4//! to prevent security issues like path traversal and symlink attacks.
5
6use std::path::{Component, PathBuf};
7
8use crate::constants::MAX_JSON_DEPTH;
9
10/// Errors that can occur during file operations.
11#[derive(Debug, thiserror::Error)]
12pub enum FsError {
13    #[error("Path is not relative: {0}")]
14    NotRelative(String),
15
16    #[error("Path contains parent directory traversal: {0}")]
17    PathTraversal(String),
18
19    #[error("Path is absolute: {0}")]
20    AbsolutePath(String),
21
22    #[error("Symlink not allowed: {0}")]
23    SymlinkNotAllowed(String),
24
25    #[error("Path points outside repository: {0}")]
26    OutsideRepository(String),
27
28    #[error("Invalid UTF-8 in path")]
29    InvalidUtf8,
30
31    #[error("Null byte in path")]
32    NullByte,
33
34    #[error("IO error: {0}")]
35    IoError(String),
36}
37
38impl From<std::io::Error> for FsError {
39    fn from(e: std::io::Error) -> Self {
40        FsError::IoError(e.to_string())
41    }
42}
43
44/// Options for path validation.
45#[derive(Debug, Clone)]
46pub struct PathValidationOptions {
47    /// Whether symlinks are allowed.
48    pub allow_symlinks: bool,
49    /// Whether absolute paths are allowed.
50    pub allow_absolute: bool,
51    /// Whether parent directory traversal (..) is allowed.
52    pub allow_parent_traversal: bool,
53    /// Repository root for boundary checking.
54    pub repo_root: Option<PathBuf>,
55}
56
57impl Default for PathValidationOptions {
58    fn default() -> Self {
59        Self {
60            allow_symlinks: false,
61            allow_absolute: false,
62            allow_parent_traversal: false,
63            repo_root: None,
64        }
65    }
66}
67
68/// Validate a path for security.
69pub fn validate_path(path: &str, options: &PathValidationOptions) -> Result<PathBuf, FsError> {
70    // Check for null bytes
71    if path.contains('\0') {
72        return Err(FsError::NullByte);
73    }
74
75    // Check for valid UTF-8 (already guaranteed by &str)
76
77    let parsed = PathBuf::from(path);
78
79    // Check for absolute paths
80    if parsed.is_absolute() && !options.allow_absolute {
81        return Err(FsError::AbsolutePath(path.to_string()));
82    }
83
84    // Check for parent directory traversal
85    if !options.allow_parent_traversal {
86        for component in parsed.components() {
87            if matches!(component, Component::ParentDir) {
88                return Err(FsError::PathTraversal(path.to_string()));
89            }
90        }
91    }
92
93    // Check symlinks if we have a repo root
94    if let Some(ref repo_root) = options.repo_root {
95        let full_path = repo_root.join(&parsed);
96
97        // Check if path resolves outside repo (symlink check)
98        if full_path.exists() {
99            if let Ok(canonical) = full_path.canonicalize() {
100                if !canonical.starts_with(repo_root) {
101                    return Err(FsError::OutsideRepository(path.to_string()));
102                }
103            }
104        }
105    }
106
107    Ok(parsed)
108}
109
110/// Validate JSON depth to prevent DoS attacks.
111pub fn validate_json_depth(json: &str) -> Result<(), FsError> {
112    let mut depth: usize = 0;
113    let mut max_depth: usize = 0;
114    let mut in_string = false;
115    let mut escape_next = false;
116
117    for ch in json.chars() {
118        if escape_next {
119            escape_next = false;
120            continue;
121        }
122
123        match ch {
124            '\\' if in_string => escape_next = true,
125            '"' => in_string = !in_string,
126            '{' | '[' if !in_string => {
127                depth += 1;
128                max_depth = max_depth.max(depth);
129                if depth > MAX_JSON_DEPTH {
130                    return Err(FsError::IoError(format!(
131                        "JSON nesting depth exceeds maximum ({})",
132                        MAX_JSON_DEPTH
133                    )));
134                }
135            }
136            '}' | ']' if !in_string => {
137                depth = depth.saturating_sub(1);
138            }
139            _ => {}
140        }
141    }
142
143    Ok(())
144}
145
146/// Check if a path is a round-specific file that should be blocked from reading.
147///
148/// Matches pattern: round-N-*.md where N is a number.
149pub fn is_round_specific_file(path: &str) -> bool {
150    let path_lower = path.to_lowercase();
151    let filename = path_lower.rsplit('/').next().unwrap_or(&path_lower);
152
153    // Must start with "round-" and end with ".md"
154    if !filename.starts_with("round-") || !filename.ends_with(".md") {
155        return false;
156    }
157
158    // Check if it has a number after "round-"
159    let rest = &filename[6..]; // skip "round-"
160    rest.chars().next().map_or(false, |c| c.is_ascii_digit())
161}
162
163/// Check if a path is a protected state file that should be blocked from writing.
164pub fn is_protected_state_file(path: &str) -> bool {
165    let path_lower = path.to_lowercase();
166
167    // Check for state.md in .humanize/rlcr/*/ or .humanize/pr-loop/*/
168    if path_lower.contains(".humanize/rlcr/") || path_lower.contains(".humanize/pr-loop/") {
169        if path_lower.ends_with("/state.md") {
170            return true;
171        }
172    }
173
174    false
175}
176
177/// Round file types that have special handling in validators.
178#[derive(Debug, Clone, Copy, PartialEq, Eq)]
179pub enum RoundFileType {
180    /// Summary file (round-N-summary.md)
181    Summary,
182    /// Prompt file (round-N-prompt.md)
183    Prompt,
184    /// Todos file (round-N-todos.md)
185    Todos,
186}
187
188/// Check if a path is a specific round file type.
189///
190/// Matches the pattern: round-N-<type>.md where N is a number.
191pub fn is_round_file_type(path: &str, file_type: RoundFileType) -> bool {
192    let path_lower = path.to_lowercase();
193    let type_str = match file_type {
194        RoundFileType::Summary => "summary",
195        RoundFileType::Prompt => "prompt",
196        RoundFileType::Todos => "todos",
197    };
198
199    // Extract filename
200    let filename = path_lower.rsplit('/').next().unwrap_or(&path_lower);
201
202    // Must start with "round-"
203    if !filename.starts_with("round-") {
204        return false;
205    }
206
207    // Must end with -<type>.md
208    let suffix = format!("-{}.md", type_str);
209    if !filename.ends_with(&suffix) {
210        return false;
211    }
212
213    // Extract the part between "round-" and "-<type>.md"
214    let rest = &filename[6..]; // skip "round-"
215    if let Some(num_part) = rest.strip_suffix(&suffix) {
216        // Must be all digits
217        return num_part.chars().all(|c| c.is_ascii_digit()) && !num_part.is_empty();
218    }
219
220    false
221}
222
223/// Check if a path is any round file (summary, prompt, or todos).
224pub fn is_any_round_file(path: &str) -> bool {
225    is_round_file_type(path, RoundFileType::Summary)
226        || is_round_file_type(path, RoundFileType::Prompt)
227        || is_round_file_type(path, RoundFileType::Todos)
228}
229
230/// Check if a path is inside .humanize/rlcr/ or .humanize/pr-loop/.
231pub fn is_in_humanize_loop_dir(path: &str) -> bool {
232    let path_lower = path.to_lowercase();
233    path_lower.contains(".humanize/rlcr/") || path_lower.contains(".humanize/pr-loop/")
234}
235
236/// Extract round number from a round filename.
237///
238/// Returns None if the filename doesn't match round-N-*.md pattern.
239pub fn extract_round_number(filename: &str) -> Option<u32> {
240    let filename_lower = filename.to_lowercase();
241    let filename_only = filename_lower.rsplit('/').next().unwrap_or(&filename_lower);
242
243    if !filename_only.starts_with("round-") || !filename_only.ends_with(".md") {
244        return None;
245    }
246
247    // Extract "N" from "round-N-*.md"
248    let rest = &filename_only[6..filename_only.len() - 3]; // skip "round-" and ".md"
249
250    // Find the first dash after the number
251    let num_end = rest.find('-')?;
252    let num_str = &rest[..num_end];
253
254    num_str.parse().ok()
255}
256
257/// Check if a file path is allowlisted for reading during a loop.
258///
259/// A file is allowlisted if:
260/// - It's the current round's summary/prompt file
261/// - It's a historical summary file (for context)
262/// - It's in the active loop directory and matches current round
263pub fn is_allowlisted_file(
264    file_path: &str,
265    loop_dir: &std::path::Path,
266    current_round: u32,
267) -> bool {
268    let file_path_lower = file_path.to_lowercase();
269    let loop_dir_str = loop_dir.to_string_lossy().to_lowercase();
270
271    // Must be in the loop directory
272    if !file_path_lower.starts_with(&loop_dir_str) {
273        return false;
274    }
275
276    // Extract filename
277    let filename = file_path_lower
278        .rsplit('/')
279        .next()
280        .unwrap_or(&file_path_lower);
281
282    // Allow current round's summary and prompt
283    let current_summary = format!("round-{}-summary.md", current_round);
284    let current_prompt = format!("round-{}-prompt.md", current_round);
285
286    if filename == current_summary || filename == current_prompt {
287        return true;
288    }
289
290    // Allow historical summaries (round-N-summary.md where N < current_round)
291    if is_round_file_type(file_path, RoundFileType::Summary) {
292        if let Some(round) = extract_round_number(file_path) {
293            if round < current_round {
294                return true;
295            }
296        }
297    }
298
299    false
300}
301
302#[cfg(test)]
303mod tests {
304    use super::*;
305
306    #[test]
307    fn test_validate_path_relative() {
308        let options = PathValidationOptions::default();
309        assert!(validate_path("src/main.rs", &options).is_ok());
310    }
311
312    #[test]
313    fn test_validate_path_absolute_rejected() {
314        let options = PathValidationOptions::default();
315        assert!(matches!(
316            validate_path("/etc/passwd", &options),
317            Err(FsError::AbsolutePath(_))
318        ));
319    }
320
321    #[test]
322    fn test_validate_path_traversal_rejected() {
323        let options = PathValidationOptions::default();
324        assert!(matches!(
325            validate_path("../../../etc/passwd", &options),
326            Err(FsError::PathTraversal(_))
327        ));
328    }
329
330    #[test]
331    fn test_is_round_specific_file() {
332        assert!(is_round_specific_file("round-1-summary.md"));
333        assert!(is_round_specific_file(
334            ".humanize/rlcr/test/round-2-review-prompt.md"
335        ));
336        assert!(!is_round_specific_file("src/main.rs"));
337        assert!(!is_round_specific_file("roundup.md"));
338    }
339
340    #[test]
341    fn test_is_protected_state_file() {
342        assert!(is_protected_state_file(
343            ".humanize/rlcr/2026-03-17/state.md"
344        ));
345        assert!(is_protected_state_file(
346            ".humanize/pr-loop/2026-03-17/state.md"
347        ));
348        assert!(!is_protected_state_file("docs/state.md"));
349        assert!(!is_protected_state_file(
350            ".humanize/rlcr/2026-03-17/complete-state.md"
351        ));
352    }
353
354    #[test]
355    fn test_json_depth_validation() {
356        assert!(validate_json_depth(r#"{"a": 1}"#).is_ok());
357        assert!(validate_json_depth(r#"[[[[[[]]]]]]"#).is_ok());
358
359        // Create deeply nested JSON
360        let deep_json = "[".repeat(35) + &"]".repeat(35);
361        assert!(validate_json_depth(&deep_json).is_err());
362    }
363
364    #[test]
365    fn test_is_round_file_type() {
366        // Summary files
367        assert!(is_round_file_type(
368            "round-1-summary.md",
369            RoundFileType::Summary
370        ));
371        assert!(is_round_file_type(
372            ".humanize/rlcr/test/round-2-summary.md",
373            RoundFileType::Summary
374        ));
375        assert!(!is_round_file_type(
376            "round-1-prompt.md",
377            RoundFileType::Summary
378        ));
379
380        // Prompt files
381        assert!(is_round_file_type(
382            "round-1-prompt.md",
383            RoundFileType::Prompt
384        ));
385        assert!(!is_round_file_type(
386            "round-1-summary.md",
387            RoundFileType::Prompt
388        ));
389
390        // Todos files
391        assert!(is_round_file_type("round-1-todos.md", RoundFileType::Todos));
392        assert!(!is_round_file_type(
393            "round-1-summary.md",
394            RoundFileType::Todos
395        ));
396
397        // Invalid patterns
398        assert!(!is_round_file_type("roundup.md", RoundFileType::Summary));
399        assert!(!is_round_file_type(
400            "round--summary.md",
401            RoundFileType::Summary
402        ));
403    }
404
405    #[test]
406    fn test_is_in_humanize_loop_dir() {
407        assert!(is_in_humanize_loop_dir(
408            ".humanize/rlcr/2026-03-17/state.md"
409        ));
410        assert!(is_in_humanize_loop_dir(
411            ".humanize/pr-loop/2026-03-17/state.md"
412        ));
413        assert!(!is_in_humanize_loop_dir("src/main.rs"));
414        assert!(!is_in_humanize_loop_dir(".humanize/config.json"));
415    }
416
417    #[test]
418    fn test_extract_round_number() {
419        assert_eq!(extract_round_number("round-1-summary.md"), Some(1));
420        assert_eq!(extract_round_number("round-42-prompt.md"), Some(42));
421        assert_eq!(
422            extract_round_number(".humanize/rlcr/test/round-3-todos.md"),
423            Some(3)
424        );
425        assert_eq!(extract_round_number("roundup.md"), None);
426        assert_eq!(extract_round_number("round--summary.md"), None);
427        assert_eq!(extract_round_number("src/main.rs"), None);
428    }
429}