Skip to main content

humanize_cli_core/
fs.rs

1//! File system operations for Humanize.
2//!
3//! This module provides safe file operations with path validation
4//! to prevent security issues like path traversal and symlink attacks.
5
6use std::path::{Component, PathBuf};
7
8use crate::constants::MAX_JSON_DEPTH;
9
10/// Errors that can occur during file operations.
11#[derive(Debug, thiserror::Error)]
12pub enum FsError {
13    #[error("Path is not relative: {0}")]
14    NotRelative(String),
15
16    #[error("Path contains parent directory traversal: {0}")]
17    PathTraversal(String),
18
19    #[error("Path is absolute: {0}")]
20    AbsolutePath(String),
21
22    #[error("Symlink not allowed: {0}")]
23    SymlinkNotAllowed(String),
24
25    #[error("Path points outside repository: {0}")]
26    OutsideRepository(String),
27
28    #[error("Invalid UTF-8 in path")]
29    InvalidUtf8,
30
31    #[error("Null byte in path")]
32    NullByte,
33
34    #[error("IO error: {0}")]
35    IoError(String),
36}
37
38impl From<std::io::Error> for FsError {
39    fn from(e: std::io::Error) -> Self {
40        FsError::IoError(e.to_string())
41    }
42}
43
44/// Options for path validation.
45#[derive(Debug, Clone)]
46pub struct PathValidationOptions {
47    /// Whether symlinks are allowed.
48    pub allow_symlinks: bool,
49    /// Whether absolute paths are allowed.
50    pub allow_absolute: bool,
51    /// Whether parent directory traversal (..) is allowed.
52    pub allow_parent_traversal: bool,
53    /// Repository root for boundary checking.
54    pub repo_root: Option<PathBuf>,
55}
56
57impl Default for PathValidationOptions {
58    fn default() -> Self {
59        Self {
60            allow_symlinks: false,
61            allow_absolute: false,
62            allow_parent_traversal: false,
63            repo_root: None,
64        }
65    }
66}
67
68/// Validate a path for security.
69pub fn validate_path(path: &str, options: &PathValidationOptions) -> Result<PathBuf, FsError> {
70    // Check for null bytes
71    if path.contains('\0') {
72        return Err(FsError::NullByte);
73    }
74
75    // Check for valid UTF-8 (already guaranteed by &str)
76
77    let parsed = PathBuf::from(path);
78
79    // Check for absolute paths
80    if parsed.is_absolute() && !options.allow_absolute {
81        return Err(FsError::AbsolutePath(path.to_string()));
82    }
83
84    // Check for parent directory traversal
85    if !options.allow_parent_traversal {
86        for component in parsed.components() {
87            if matches!(component, Component::ParentDir) {
88                return Err(FsError::PathTraversal(path.to_string()));
89            }
90        }
91    }
92
93    // Check symlinks if we have a repo root
94    if let Some(ref repo_root) = options.repo_root {
95        let full_path = repo_root.join(&parsed);
96
97        // Check if path resolves outside repo (symlink check)
98        if full_path.exists() {
99            if let Ok(canonical) = full_path.canonicalize() {
100                if !canonical.starts_with(repo_root) {
101                    return Err(FsError::OutsideRepository(path.to_string()));
102                }
103            }
104        }
105    }
106
107    Ok(parsed)
108}
109
110/// Validate JSON depth to prevent DoS attacks.
111pub fn validate_json_depth(json: &str) -> Result<(), FsError> {
112    let mut depth: usize = 0;
113    let mut max_depth: usize = 0;
114    let mut in_string = false;
115    let mut escape_next = false;
116
117    for ch in json.chars() {
118        if escape_next {
119            escape_next = false;
120            continue;
121        }
122
123        match ch {
124            '\\' if in_string => escape_next = true,
125            '"' => in_string = !in_string,
126            '{' | '[' if !in_string => {
127                depth += 1;
128                max_depth = max_depth.max(depth);
129                if depth > MAX_JSON_DEPTH {
130                    return Err(FsError::IoError(format!(
131                        "JSON nesting depth exceeds maximum ({})",
132                        MAX_JSON_DEPTH
133                    )));
134                }
135            }
136            '}' | ']' if !in_string => {
137                depth = depth.saturating_sub(1);
138            }
139            _ => {}
140        }
141    }
142
143    Ok(())
144}
145
146/// Check if a path is a round-specific file that should be blocked from reading.
147///
148/// Matches pattern: round-N-*.md where N is a number.
149pub fn is_round_specific_file(path: &str) -> bool {
150    let path_lower = path.to_lowercase();
151    let filename = path_lower.rsplit('/').next().unwrap_or(&path_lower);
152
153    // Must start with "round-" and end with ".md"
154    if !filename.starts_with("round-") || !filename.ends_with(".md") {
155        return false;
156    }
157
158    // Check if it has a number after "round-"
159    let rest = &filename[6..]; // skip "round-"
160    rest.chars().next().map_or(false, |c| c.is_ascii_digit())
161}
162
163/// Check if a path is a protected state file that should be blocked from writing.
164pub fn is_protected_state_file(path: &str) -> bool {
165    let path_lower = path.to_lowercase();
166
167    // Check for state.md in .humanize/rlcr/*/ or .humanize/pr-loop/*/
168    if path_lower.contains(".humanize/rlcr/") || path_lower.contains(".humanize/pr-loop/") {
169        if path_lower.ends_with("/state.md") {
170            return true;
171        }
172    }
173
174    false
175}
176
177/// Round file types that have special handling in validators.
178#[derive(Debug, Clone, Copy, PartialEq, Eq)]
179pub enum RoundFileType {
180    /// Summary file (round-N-summary.md)
181    Summary,
182    /// Prompt file (round-N-prompt.md)
183    Prompt,
184    /// Todos file (round-N-todos.md)
185    Todos,
186}
187
188/// Check if a path is a specific round file type.
189///
190/// Matches the pattern: round-N-<type>.md where N is a number.
191pub fn is_round_file_type(path: &str, file_type: RoundFileType) -> bool {
192    let path_lower = path.to_lowercase();
193    let type_str = match file_type {
194        RoundFileType::Summary => "summary",
195        RoundFileType::Prompt => "prompt",
196        RoundFileType::Todos => "todos",
197    };
198
199    // Extract filename
200    let filename = path_lower
201        .rsplit('/')
202        .next()
203        .unwrap_or(&path_lower);
204
205    // Must start with "round-"
206    if !filename.starts_with("round-") {
207        return false;
208    }
209
210    // Must end with -<type>.md
211    let suffix = format!("-{}.md", type_str);
212    if !filename.ends_with(&suffix) {
213        return false;
214    }
215
216    // Extract the part between "round-" and "-<type>.md"
217    let rest = &filename[6..]; // skip "round-"
218    if let Some(num_part) = rest.strip_suffix(&suffix) {
219        // Must be all digits
220        return num_part.chars().all(|c| c.is_ascii_digit()) && !num_part.is_empty();
221    }
222
223    false
224}
225
226/// Check if a path is any round file (summary, prompt, or todos).
227pub fn is_any_round_file(path: &str) -> bool {
228    is_round_file_type(path, RoundFileType::Summary)
229        || is_round_file_type(path, RoundFileType::Prompt)
230        || is_round_file_type(path, RoundFileType::Todos)
231}
232
233/// Check if a path is inside .humanize/rlcr/ or .humanize/pr-loop/.
234pub fn is_in_humanize_loop_dir(path: &str) -> bool {
235    let path_lower = path.to_lowercase();
236    path_lower.contains(".humanize/rlcr/") || path_lower.contains(".humanize/pr-loop/")
237}
238
239/// Extract round number from a round filename.
240///
241/// Returns None if the filename doesn't match round-N-*.md pattern.
242pub fn extract_round_number(filename: &str) -> Option<u32> {
243    let filename_lower = filename.to_lowercase();
244    let filename_only = filename_lower.rsplit('/').next().unwrap_or(&filename_lower);
245
246    if !filename_only.starts_with("round-") || !filename_only.ends_with(".md") {
247        return None;
248    }
249
250    // Extract "N" from "round-N-*.md"
251    let rest = &filename_only[6..filename_only.len() - 3]; // skip "round-" and ".md"
252
253    // Find the first dash after the number
254    let num_end = rest.find('-')?;
255    let num_str = &rest[..num_end];
256
257    num_str.parse().ok()
258}
259
260/// Check if a file path is allowlisted for reading during a loop.
261///
262/// A file is allowlisted if:
263/// - It's the current round's summary/prompt file
264/// - It's a historical summary file (for context)
265/// - It's in the active loop directory and matches current round
266pub fn is_allowlisted_file(file_path: &str, loop_dir: &std::path::Path, current_round: u32) -> bool {
267    let file_path_lower = file_path.to_lowercase();
268    let loop_dir_str = loop_dir.to_string_lossy().to_lowercase();
269
270    // Must be in the loop directory
271    if !file_path_lower.starts_with(&loop_dir_str) {
272        return false;
273    }
274
275    // Extract filename
276    let filename = file_path_lower.rsplit('/').next().unwrap_or(&file_path_lower);
277
278    // Allow current round's summary and prompt
279    let current_summary = format!("round-{}-summary.md", current_round);
280    let current_prompt = format!("round-{}-prompt.md", current_round);
281
282    if filename == current_summary || filename == current_prompt {
283        return true;
284    }
285
286    // Allow historical summaries (round-N-summary.md where N < current_round)
287    if is_round_file_type(file_path, RoundFileType::Summary) {
288        if let Some(round) = extract_round_number(file_path) {
289            if round < current_round {
290                return true;
291            }
292        }
293    }
294
295    false
296}
297
298#[cfg(test)]
299mod tests {
300    use super::*;
301
302    #[test]
303    fn test_validate_path_relative() {
304        let options = PathValidationOptions::default();
305        assert!(validate_path("src/main.rs", &options).is_ok());
306    }
307
308    #[test]
309    fn test_validate_path_absolute_rejected() {
310        let options = PathValidationOptions::default();
311        assert!(matches!(
312            validate_path("/etc/passwd", &options),
313            Err(FsError::AbsolutePath(_))
314        ));
315    }
316
317    #[test]
318    fn test_validate_path_traversal_rejected() {
319        let options = PathValidationOptions::default();
320        assert!(matches!(
321            validate_path("../../../etc/passwd", &options),
322            Err(FsError::PathTraversal(_))
323        ));
324    }
325
326    #[test]
327    fn test_is_round_specific_file() {
328        assert!(is_round_specific_file("round-1-summary.md"));
329        assert!(is_round_specific_file(
330            ".humanize/rlcr/test/round-2-review-prompt.md"
331        ));
332        assert!(!is_round_specific_file("src/main.rs"));
333        assert!(!is_round_specific_file("roundup.md"));
334    }
335
336    #[test]
337    fn test_is_protected_state_file() {
338        assert!(is_protected_state_file(
339            ".humanize/rlcr/2026-03-17/state.md"
340        ));
341        assert!(is_protected_state_file(
342            ".humanize/pr-loop/2026-03-17/state.md"
343        ));
344        assert!(!is_protected_state_file("docs/state.md"));
345        assert!(!is_protected_state_file(
346            ".humanize/rlcr/2026-03-17/complete-state.md"
347        ));
348    }
349
350    #[test]
351    fn test_json_depth_validation() {
352        assert!(validate_json_depth(r#"{"a": 1}"#).is_ok());
353        assert!(validate_json_depth(r#"[[[[[[]]]]]]"#).is_ok());
354
355        // Create deeply nested JSON
356        let deep_json = "[".repeat(35) + &"]".repeat(35);
357        assert!(validate_json_depth(&deep_json).is_err());
358    }
359
360    #[test]
361    fn test_is_round_file_type() {
362        // Summary files
363        assert!(is_round_file_type("round-1-summary.md", RoundFileType::Summary));
364        assert!(is_round_file_type(".humanize/rlcr/test/round-2-summary.md", RoundFileType::Summary));
365        assert!(!is_round_file_type("round-1-prompt.md", RoundFileType::Summary));
366
367        // Prompt files
368        assert!(is_round_file_type("round-1-prompt.md", RoundFileType::Prompt));
369        assert!(!is_round_file_type("round-1-summary.md", RoundFileType::Prompt));
370
371        // Todos files
372        assert!(is_round_file_type("round-1-todos.md", RoundFileType::Todos));
373        assert!(!is_round_file_type("round-1-summary.md", RoundFileType::Todos));
374
375        // Invalid patterns
376        assert!(!is_round_file_type("roundup.md", RoundFileType::Summary));
377        assert!(!is_round_file_type("round--summary.md", RoundFileType::Summary));
378    }
379
380    #[test]
381    fn test_is_in_humanize_loop_dir() {
382        assert!(is_in_humanize_loop_dir(".humanize/rlcr/2026-03-17/state.md"));
383        assert!(is_in_humanize_loop_dir(".humanize/pr-loop/2026-03-17/state.md"));
384        assert!(!is_in_humanize_loop_dir("src/main.rs"));
385        assert!(!is_in_humanize_loop_dir(".humanize/config.json"));
386    }
387
388    #[test]
389    fn test_extract_round_number() {
390        assert_eq!(extract_round_number("round-1-summary.md"), Some(1));
391        assert_eq!(extract_round_number("round-42-prompt.md"), Some(42));
392        assert_eq!(extract_round_number(".humanize/rlcr/test/round-3-todos.md"), Some(3));
393        assert_eq!(extract_round_number("roundup.md"), None);
394        assert_eq!(extract_round_number("round--summary.md"), None);
395        assert_eq!(extract_round_number("src/main.rs"), None);
396    }
397}