Skip to main content

semantic_diff/diff/
untracked.rs

1use std::fmt::Write;
2
3/// Maximum file size (1 MB) for generating synthetic diffs of untracked files.
4const MAX_FILE_SIZE: u64 = 1_048_576;
5
6/// Discover untracked files in the repository (excluding gitignored files).
7pub fn discover_untracked_files() -> Vec<String> {
8    let output = std::process::Command::new("git")
9        .args(["ls-files", "--others", "--exclude-standard"])
10        .output();
11
12    match output {
13        Ok(out) if out.status.success() => {
14            let text = String::from_utf8_lossy(&out.stdout);
15            text.lines()
16                .filter(|l| !l.is_empty())
17                .map(|l| l.to_string())
18                .collect()
19        }
20        _ => Vec::new(),
21    }
22}
23
24/// Async version of discover_untracked_files for use in spawn contexts.
25pub async fn discover_untracked_files_async() -> Vec<String> {
26    let output = tokio::process::Command::new("git")
27        .args(["ls-files", "--others", "--exclude-standard"])
28        .output()
29        .await;
30
31    match output {
32        Ok(out) if out.status.success() => {
33            let text = String::from_utf8_lossy(&out.stdout);
34            text.lines()
35                .filter(|l| !l.is_empty())
36                .map(|l| l.to_string())
37                .collect()
38        }
39        _ => Vec::new(),
40    }
41}
42
43/// Generate synthetic unified diff text for untracked files.
44///
45/// Returns `(diff_text, binary_untracked_paths)`: the unified diff string for
46/// text files, and a list of paths that were detected as binary.
47pub fn generate_untracked_diff(paths: &[String]) -> (String, Vec<String>) {
48    let mut diff_text = String::new();
49    let mut binary_paths = Vec::new();
50
51    for path in paths {
52        // Validate path (reuse same rules as diff parser)
53        if path.starts_with('/') || path.split('/').any(|c| c == "..") || path.contains('\0') {
54            continue;
55        }
56
57        let metadata = match std::fs::metadata(path) {
58            Ok(m) => m,
59            Err(_) => continue,
60        };
61
62        // Skip directories and files that are too large
63        if metadata.is_dir() || metadata.len() > MAX_FILE_SIZE {
64            continue;
65        }
66
67        // Read file content
68        let content = match std::fs::read(path) {
69            Ok(c) => c,
70            Err(_) => continue,
71        };
72
73        // Detect binary: check for null bytes in the first 8KB
74        let check_len = content.len().min(8192);
75        if content[..check_len].contains(&0) {
76            binary_paths.push(path.clone());
77            continue;
78        }
79
80        let text = match String::from_utf8(content) {
81            Ok(t) => t,
82            Err(_) => {
83                binary_paths.push(path.clone());
84                continue;
85            }
86        };
87
88        let lines: Vec<&str> = text.lines().collect();
89        let line_count = lines.len();
90
91        // Generate unified diff header
92        let _ = writeln!(diff_text, "diff --git a/{path} b/{path}");
93        let _ = writeln!(diff_text, "new file mode 100644");
94        let _ = writeln!(diff_text, "--- /dev/null");
95        let _ = writeln!(diff_text, "+++ b/{path}");
96        let _ = writeln!(diff_text, "@@ -0,0 +1,{line_count} @@");
97        for line in &lines {
98            let _ = writeln!(diff_text, "+{line}");
99        }
100    }
101
102    (diff_text, binary_paths)
103}
104
105#[cfg(test)]
106mod tests {
107    use super::*;
108
109    #[test]
110    fn test_generate_untracked_diff_empty() {
111        let (diff, binary) = generate_untracked_diff(&[]);
112        assert!(diff.is_empty());
113        assert!(binary.is_empty());
114    }
115
116    #[test]
117    fn test_generate_untracked_diff_nonexistent() {
118        let (diff, binary) = generate_untracked_diff(&["nonexistent_file_xyz.rs".to_string()]);
119        assert!(diff.is_empty());
120        assert!(binary.is_empty());
121    }
122
123    #[test]
124    fn test_generate_untracked_diff_rejects_traversal() {
125        let (diff, binary) = generate_untracked_diff(&["../../../etc/passwd".to_string()]);
126        assert!(diff.is_empty());
127        assert!(binary.is_empty());
128    }
129
130    #[test]
131    fn test_generate_untracked_diff_rejects_absolute() {
132        let (diff, binary) = generate_untracked_diff(&["/etc/passwd".to_string()]);
133        assert!(diff.is_empty());
134        assert!(binary.is_empty());
135    }
136
137    #[test]
138    fn test_generate_untracked_diff_format() {
139        // Use a relative path (the function rejects absolute paths)
140        let test_dir = "target/test_untracked_diff";
141        let _ = std::fs::create_dir_all(test_dir);
142        let file_path = format!("{test_dir}/test_file.txt");
143        std::fs::write(&file_path, "line1\nline2\nline3\n").unwrap();
144
145        let (diff, binary) = generate_untracked_diff(&[file_path.clone()]);
146
147        // Should not be binary
148        assert!(binary.is_empty());
149
150        // Should contain diff header and all lines as additions
151        assert!(diff.contains(&format!("+++ b/{file_path}")));
152        assert!(diff.contains("@@ -0,0 +1,3 @@"));
153        assert!(diff.contains("+line1"));
154        assert!(diff.contains("+line2"));
155        assert!(diff.contains("+line3"));
156
157        // Cleanup
158        let _ = std::fs::remove_dir_all(test_dir);
159    }
160}