Skip to main content

mermaid_cli/agents/
filesystem.rs

1use anyhow::{Context, Result};
2use base64::{Engine as _, engine::general_purpose};
3use std::fs;
4use std::path::{Path, PathBuf};
5
6/// Read a file from the filesystem
7pub fn read_file(path: &str) -> Result<String> {
8    let path = normalize_path_for_read(path)?;
9
10    // Security check: block sensitive files but allow reading outside project
11    validate_path_for_read(&path)?;
12
13    fs::read_to_string(&path).with_context(|| format!("Failed to read file: {}", path.display()))
14}
15
16/// Read a file from the filesystem asynchronously (for parallel operations)
17pub async fn read_file_async(path: String) -> Result<String> {
18    tokio::task::spawn_blocking(move || read_file(&path))
19        .await
20        .context("Failed to spawn blocking task for file read")?
21}
22
23/// Check if a file is a binary format that should be base64-encoded
24pub fn is_binary_file(path: &str) -> bool {
25    let path = Path::new(path);
26    if let Some(ext) = path.extension() {
27        let ext_str = ext.to_string_lossy().to_lowercase();
28        matches!(
29            ext_str.as_str(),
30            "pdf" | "png" | "jpg" | "jpeg" | "gif" | "webp" | "bmp" | "ico" | "tiff"
31        )
32    } else {
33        false
34    }
35}
36
37/// Read a binary file and encode it as base64
38pub fn read_binary_file(path: &str) -> Result<String> {
39    let path = normalize_path_for_read(path)?;
40
41    // Security check: block sensitive files but allow reading outside project
42    validate_path_for_read(&path)?;
43
44    let bytes = fs::read(&path)
45        .with_context(|| format!("Failed to read binary file: {}", path.display()))?;
46
47    Ok(general_purpose::STANDARD.encode(&bytes))
48}
49
50/// Write content to a file atomically with timestamped backup
51pub fn write_file(path: &str, content: &str) -> Result<()> {
52    let path = normalize_path(path)?;
53
54    // Security check
55    validate_path(&path)?;
56
57    // Create parent directories if they don't exist
58    if let Some(parent) = path.parent() {
59        fs::create_dir_all(parent).with_context(|| {
60            format!(
61                "Failed to create parent directories for: {}",
62                path.display()
63            )
64        })?;
65    }
66
67    // Create timestamped backup if file exists
68    if path.exists() {
69        create_timestamped_backup(&path)?;
70    }
71
72    // Atomic write: write to temporary file, then rename
73    let temp_path = format!("{}.tmp.{}", path.display(), std::process::id());
74    let temp_path = std::path::PathBuf::from(&temp_path);
75
76    // Write to temporary file
77    fs::write(&temp_path, content)
78        .with_context(|| format!("Failed to write to temporary file: {}", temp_path.display()))?;
79
80    // Atomically rename temp file to target
81    fs::rename(&temp_path, &path).with_context(|| {
82        format!(
83            "Failed to finalize write to: {} (temp file: {})",
84            path.display(),
85            temp_path.display()
86        )
87    })?;
88
89    Ok(())
90}
91
92/// Create a timestamped backup of a file
93/// Format: file.txt.backup.2025-10-20-01-45-32
94fn create_timestamped_backup(path: &std::path::Path) -> Result<()> {
95    let timestamp = chrono::Local::now().format("%Y-%m-%d-%H-%M-%S");
96    let backup_path = format!("{}.backup.{}", path.display(), timestamp);
97
98    fs::copy(path, &backup_path).with_context(|| {
99        format!(
100            "Failed to create backup of: {} to {}",
101            path.display(),
102            backup_path
103        )
104    })?;
105
106    Ok(())
107}
108
109/// Edit a file by replacing a unique occurrence of old_string with new_string
110/// Returns a unified diff showing the changes
111pub fn edit_file(path: &str, old_string: &str, new_string: &str) -> Result<String> {
112    let path = normalize_path(path)?;
113
114    // Security check
115    validate_path(&path)?;
116
117    // Read current content
118    let content = fs::read_to_string(&path)
119        .with_context(|| format!("Failed to read file for editing: {}", path.display()))?;
120
121    // Check that old_string occurs exactly once
122    let match_count = content.matches(old_string).count();
123    if match_count == 0 {
124        anyhow::bail!(
125            "old_string not found in {}. Make sure the text matches exactly, including whitespace and indentation.",
126            path.display()
127        );
128    }
129    if match_count > 1 {
130        anyhow::bail!(
131            "old_string appears {} times in {}. It must be unique. Include more surrounding context to make it unique.",
132            match_count,
133            path.display()
134        );
135    }
136
137    // Perform the replacement
138    let new_content = content.replacen(old_string, new_string, 1);
139
140    // Create timestamped backup
141    create_timestamped_backup(&path)?;
142
143    // Atomic write: write to temporary file, then rename
144    let temp_path = format!("{}.tmp.{}", path.display(), std::process::id());
145    let temp_path = std::path::PathBuf::from(&temp_path);
146
147    fs::write(&temp_path, &new_content)
148        .with_context(|| format!("Failed to write to temporary file: {}", temp_path.display()))?;
149
150    fs::rename(&temp_path, &path).with_context(|| {
151        format!(
152            "Failed to finalize edit to: {} (temp file: {})",
153            path.display(),
154            temp_path.display()
155        )
156    })?;
157
158    // Generate diff
159    let diff = generate_diff(&content, &new_content, old_string, new_string);
160    Ok(diff)
161}
162
163/// Generate a unified diff showing the changed lines with context
164fn generate_diff(
165    old_content: &str,
166    new_content: &str,
167    old_string: &str,
168    new_string: &str,
169) -> String {
170    let old_lines: Vec<&str> = old_content.lines().collect();
171    let new_lines: Vec<&str> = new_content.lines().collect();
172
173    let removed_count = old_string.lines().count();
174    let added_count = new_string.lines().count();
175
176    // Find where the change starts in the old content
177    let prefix_len = old_content[..old_content.find(old_string).unwrap_or(0)].len();
178    let change_start_line = old_content[..prefix_len].matches('\n').count();
179
180    let context_lines = 3;
181    let diff_start = change_start_line.saturating_sub(context_lines);
182    let new_diff_end = (change_start_line + added_count + context_lines).min(new_lines.len());
183
184    let mut output = String::new();
185    output.push_str(&format!(
186        "Added {} lines, removed {} lines\n",
187        added_count, removed_count
188    ));
189
190    // Context before
191    for i in diff_start..change_start_line {
192        if i < old_lines.len() {
193            output.push_str(&format!("{:>4}   {}\n", i + 1, old_lines[i]));
194        }
195    }
196
197    // Removed lines
198    for i in 0..removed_count {
199        let line_num = change_start_line + i;
200        if line_num < old_lines.len() {
201            output.push_str(&format!("{:>4} - {}\n", line_num + 1, old_lines[line_num]));
202        }
203    }
204
205    // Added lines
206    for i in 0..added_count {
207        let line_num = change_start_line + i;
208        if line_num < new_lines.len() {
209            output.push_str(&format!("{:>4} + {}\n", line_num + 1, new_lines[line_num]));
210        }
211    }
212
213    // Context after
214    let context_after_start = change_start_line + added_count;
215    for i in context_after_start..new_diff_end {
216        if i < new_lines.len() {
217            output.push_str(&format!("{:>4}   {}\n", i + 1, new_lines[i]));
218        }
219    }
220
221    output
222}
223
224/// Delete a file with timestamped backup (for recovery)
225pub fn delete_file(path: &str) -> Result<()> {
226    let path = normalize_path(path)?;
227
228    // Security check
229    validate_path(&path)?;
230
231    // Create timestamped backup before deletion
232    if path.exists() {
233        create_timestamped_backup(&path)?;
234    }
235
236    fs::remove_file(&path).with_context(|| format!("Failed to delete file: {}", path.display()))
237}
238
239/// Create a directory
240pub fn create_directory(path: &str) -> Result<()> {
241    let path = normalize_path(path)?;
242
243    // Security check
244    validate_path(&path)?;
245
246    fs::create_dir_all(&path)
247        .with_context(|| format!("Failed to create directory: {}", path.display()))
248}
249
250/// Normalize a path for reading (allows absolute paths anywhere)
251fn normalize_path_for_read(path: &str) -> Result<PathBuf> {
252    let path = Path::new(path);
253
254    if path.is_absolute() {
255        // For absolute paths, return as-is (user has specified exact location)
256        Ok(path.to_path_buf())
257    } else {
258        // For relative paths, resolve from current directory
259        let current_dir = std::env::current_dir()?;
260        Ok(current_dir.join(path))
261    }
262}
263
264/// Normalize a path (resolve relative paths) - strict version for writes
265fn normalize_path(path: &str) -> Result<PathBuf> {
266    let path = Path::new(path);
267
268    // Reject paths containing ".." to prevent directory traversal.
269    // Symlinks in existing ancestors are resolved by canonicalize() in validate_path,
270    // but ".." in non-existent portions would be silently dropped by file_name().
271    for component in path.components() {
272        if matches!(component, std::path::Component::ParentDir) {
273            anyhow::bail!("Access denied: path contains '..' component");
274        }
275    }
276
277    if path.is_absolute() {
278        // For absolute paths, ensure they're within the current directory
279        let current_dir = std::env::current_dir()?;
280        if !path.starts_with(&current_dir) {
281            anyhow::bail!("Access denied: path outside of project directory");
282        }
283        Ok(path.to_path_buf())
284    } else {
285        // For relative paths, resolve from current directory
286        let current_dir = std::env::current_dir()?;
287        Ok(current_dir.join(path))
288    }
289}
290
291/// Check if a path component or filename matches a sensitive pattern.
292///
293/// Uses path-component matching (not substring) to avoid false positives
294/// like ".environment.ts" matching ".env". Checks both directory components
295/// and file extensions.
296fn is_sensitive_path(path: &Path) -> bool {
297    // Directory components that are always sensitive
298    let sensitive_dirs = [".ssh", ".aws", ".gnupg", ".docker"];
299
300    // Filenames/extensions that are sensitive
301    let sensitive_filenames = [
302        ".npmrc",
303        ".pypirc",
304        ".netrc",
305        "id_rsa",
306        "id_ed25519",
307        "id_ecdsa",
308        "id_dsa",
309        "credentials.json",
310        "secrets.yaml",
311        "secrets.yml",
312        "token.json",
313        "config.json", // Docker registry auth, various credential stores
314    ];
315
316    // File extensions that are sensitive
317    let sensitive_extensions = ["pem", "key"];
318
319    let path_str = path.to_string_lossy();
320
321    // Check for .git/config specifically (substring is fine here, it's unique)
322    if path_str.contains(".git/config") || path_str.contains(".git\\config") {
323        return true;
324    }
325
326    // Check for mermaid config (contains cloud_api_key)
327    if (path_str.contains("mermaid/config.toml") || path_str.contains("mermaid\\config.toml"))
328        && (path_str.contains(".config/") || path_str.contains(".config\\"))
329    {
330        return true;
331    }
332
333    for component in path.components() {
334        let name = component.as_os_str().to_string_lossy();
335
336        // Check sensitive directories
337        for dir in &sensitive_dirs {
338            if name == *dir {
339                return true;
340            }
341        }
342
343        // Check .env files: match ".env" exactly or ".env.*" (like .env.local, .env.production)
344        // but NOT files that merely contain "env" (like .environment.ts)
345        if name == ".env" || name.starts_with(".env.") {
346            return true;
347        }
348
349        // Check sensitive filenames
350        for filename in &sensitive_filenames {
351            if name == *filename {
352                return true;
353            }
354        }
355    }
356
357    // Check sensitive extensions
358    if let Some(ext) = path.extension() {
359        let ext_str = ext.to_string_lossy().to_lowercase();
360        for sensitive_ext in &sensitive_extensions {
361            if ext_str == *sensitive_ext {
362                return true;
363            }
364        }
365    }
366
367    false
368}
369
370/// Validate that a path is safe to read from (blocks sensitive files only)
371fn validate_path_for_read(path: &Path) -> Result<()> {
372    if is_sensitive_path(path) {
373        anyhow::bail!(
374            "Security error: attempted to access potentially sensitive file: {}",
375            path.display()
376        );
377    }
378    Ok(())
379}
380
381/// Validate that a path is safe to write to (strict - must be in project)
382fn validate_path(path: &Path) -> Result<()> {
383    let current_dir = std::env::current_dir()?;
384
385    // Resolve the path to handle .. and .
386    // For non-existent paths, walk up to find the first existing ancestor
387    let canonical = if path.exists() {
388        path.canonicalize()?
389    } else {
390        // Walk up the path to find the first existing ancestor
391        let mut ancestors_to_join = Vec::new();
392        let mut current = path;
393
394        while let Some(parent) = current.parent() {
395            if let Some(name) = current.file_name() {
396                ancestors_to_join.push(name.to_os_string());
397            }
398            if parent.as_os_str().is_empty() {
399                // Reached the root of a relative path
400                break;
401            }
402            if parent.exists() {
403                // Found existing ancestor - canonicalize it and join the rest
404                let mut result = parent.canonicalize()?;
405                for component in ancestors_to_join.iter().rev() {
406                    result = result.join(component);
407                }
408                return validate_canonical_path(&result, &current_dir);
409            }
410            current = parent;
411        }
412
413        // No existing ancestor found - use current_dir as base
414        let mut result = current_dir
415            .canonicalize()
416            .unwrap_or_else(|_| current_dir.clone());
417        for component in ancestors_to_join.iter().rev() {
418            result = result.join(component);
419        }
420        result
421    };
422
423    validate_canonical_path(&canonical, &current_dir)
424}
425
426/// Helper to validate a canonical path against the current directory
427fn validate_canonical_path(canonical: &Path, current_dir: &Path) -> Result<()> {
428    // Canonicalize current_dir for consistent comparison (Windows adds \\?\ prefix)
429    let current_dir_canonical = current_dir
430        .canonicalize()
431        .unwrap_or_else(|_| current_dir.to_path_buf());
432
433    // Ensure the path is within the current directory
434    if !canonical.starts_with(&current_dir_canonical) {
435        anyhow::bail!(
436            "Security error: attempted to access path outside of project directory: {}",
437            canonical.display()
438        );
439    }
440
441    // Check for sensitive files using shared path-component matcher
442    if is_sensitive_path(canonical) {
443        anyhow::bail!(
444            "Security error: attempted to access potentially sensitive file: {}",
445            canonical.display()
446        );
447    }
448
449    Ok(())
450}
451
452#[cfg(test)]
453mod tests {
454    use super::*;
455
456    // Phase 2 Test Suite: Filesystem Operations - 10 comprehensive tests
457
458    #[test]
459    fn test_read_file_valid() {
460        // Test reading an existing file in the current project
461        let result = read_file("Cargo.toml");
462        assert!(
463            result.is_ok(),
464            "Should successfully read valid file from project"
465        );
466        let content = result.unwrap();
467        assert!(
468            content.contains("[package]") || !content.is_empty(),
469            "Content should be reasonable"
470        );
471    }
472
473    #[test]
474    fn test_read_file_not_found() {
475        let result = read_file("this_file_definitely_does_not_exist_12345.txt");
476        assert!(result.is_err(), "Should fail to read non-existent file");
477        let err_msg = result.unwrap_err().to_string();
478        assert!(
479            err_msg.contains("Failed to read file"),
480            "Error message should indicate read failure, got: {}",
481            err_msg
482        );
483    }
484
485    #[test]
486    fn test_write_and_read_roundtrip() {
487        // Test actual write + read roundtrip in target/ (always within project)
488        let test_path = "target/test_write_roundtrip.txt";
489        let content = "Hello, Mermaid!";
490        let result = write_file(test_path, content);
491        assert!(result.is_ok(), "Write should succeed in target/");
492
493        let read_back = read_file(test_path);
494        assert!(read_back.is_ok(), "Should read back written file");
495        assert_eq!(read_back.unwrap(), content);
496
497        // Cleanup
498        let _ = fs::remove_file(test_path);
499        // Also clean up backup file
500        let _ = fs::remove_file(format!("{}.backup", test_path));
501    }
502
503    #[test]
504    fn test_delete_file_not_found() {
505        let result = delete_file("this_definitely_should_not_exist_xyz123.txt");
506        assert!(result.is_err(), "Should fail to delete non-existent file");
507    }
508
509    #[test]
510    fn test_create_directory_simple() {
511        let dir_path = "target/test_dir_creation";
512
513        let result = create_directory(dir_path);
514        assert!(result.is_ok(), "Should successfully create directory");
515
516        let full_path = Path::new(dir_path);
517        assert!(full_path.exists(), "Directory should exist");
518        assert!(full_path.is_dir(), "Should be a directory");
519
520        // Cleanup
521        fs::remove_dir(dir_path).ok();
522    }
523
524    #[test]
525    fn test_create_nested_directories_all() {
526        let nested_path = "target/level1/level2/level3";
527
528        let result = create_directory(nested_path);
529        assert!(
530            result.is_ok(),
531            "Should create nested directories: {}",
532            result.unwrap_err()
533        );
534
535        let full_path = Path::new(nested_path);
536        assert!(full_path.exists(), "Nested directory should exist");
537        assert!(full_path.is_dir(), "Should be a directory");
538
539        // Cleanup
540        fs::remove_dir_all("target/level1").ok();
541    }
542
543    #[test]
544    fn test_path_validation_blocks_dotenv() {
545        let result = read_file(".env");
546        assert!(result.is_err(), "Should reject .env file access");
547        let error = result.unwrap_err().to_string();
548        assert!(
549            error.contains("Security"),
550            "Error should mention Security: {}",
551            error
552        );
553    }
554
555    #[test]
556    fn test_path_validation_blocks_dotenv_variants() {
557        // .env.local, .env.production should be blocked
558        assert!(is_sensitive_path(Path::new("/project/.env.local")));
559        assert!(is_sensitive_path(Path::new("/project/.env.production")));
560        // But .environment.ts should NOT be blocked (path-component matching)
561        assert!(!is_sensitive_path(Path::new(
562            "/project/src/.environment.ts"
563        )));
564        assert!(!is_sensitive_path(Path::new("/project/src/environment.rs")));
565    }
566
567    #[test]
568    fn test_path_validation_blocks_ssh_keys() {
569        let result = read_file(".ssh/id_rsa");
570        assert!(result.is_err(), "Should reject .ssh/id_rsa access");
571        let error = result.unwrap_err().to_string();
572        assert!(
573            error.contains("Security"),
574            "Error should mention Security: {}",
575            error
576        );
577    }
578
579    #[test]
580    fn test_path_validation_blocks_aws_credentials() {
581        let result = read_file(".aws/credentials");
582        assert!(result.is_err(), "Should reject .aws/credentials access");
583        let error = result.unwrap_err().to_string();
584        assert!(
585            error.contains("Security"),
586            "Error should mention Security: {}",
587            error
588        );
589    }
590
591    #[test]
592    fn test_path_validation_blocks_new_sensitive_patterns() {
593        // Verify the expanded blocklist
594        assert!(is_sensitive_path(Path::new("/home/user/credentials.json")));
595        assert!(is_sensitive_path(Path::new("/project/secrets.yaml")));
596        assert!(is_sensitive_path(Path::new("/project/server.pem")));
597        assert!(is_sensitive_path(Path::new("/project/private.key")));
598        assert!(is_sensitive_path(Path::new("/project/token.json")));
599        assert!(is_sensitive_path(Path::new(
600            "/home/user/.gnupg/pubring.kbx"
601        )));
602        // Docker and netrc
603        assert!(is_sensitive_path(Path::new(
604            "/home/user/.docker/config.json"
605        )));
606        assert!(is_sensitive_path(Path::new("/home/user/.netrc")));
607        // Mermaid config (contains cloud_api_key)
608        assert!(is_sensitive_path(Path::new(
609            "/home/user/.config/mermaid/config.toml"
610        )));
611        // But NOT arbitrary config.toml files in project directories
612        assert!(!is_sensitive_path(Path::new("/project/config.toml")));
613    }
614}