Skip to main content

cgx_engine/
walker.rs

1use std::path::{Path, PathBuf};
2
3use ignore::WalkBuilder;
4
5#[derive(Debug, Clone, PartialEq, Eq, Hash)]
6pub enum Language {
7    TypeScript,
8    JavaScript,
9    Python,
10    Rust,
11    Go,
12    Java,
13    CSharp,
14    Php,
15    Unknown,
16}
17
18#[derive(Debug, Clone)]
19pub struct SourceFile {
20    pub path: PathBuf,
21    pub relative_path: String,
22    pub language: Language,
23    pub content: String,
24    pub size_bytes: u64,
25}
26
27pub fn walk_repo(repo_path: &Path) -> anyhow::Result<Vec<SourceFile>> {
28    let mut files = Vec::new();
29    let canonical = repo_path.canonicalize()?;
30
31    let mut walker = WalkBuilder::new(&canonical);
32    walker.standard_filters(true);
33    walker.hidden(true);
34    // Explicitly add overrides for common non-source directories
35    let mut override_builder = ignore::overrides::OverrideBuilder::new(&canonical);
36    for pattern in &[
37        "!node_modules/",
38        "!target/",
39        "!dist/",
40        "!__pycache__/",
41        "!.git/",
42    ] {
43        let _ = override_builder.add(pattern);
44    }
45    let overrides = override_builder.build()?;
46    walker.overrides(overrides);
47
48    for entry in walker.build() {
49        let entry = match entry {
50            Ok(e) => e,
51            Err(_) => continue,
52        };
53
54        if !entry.file_type().map(|ft| ft.is_file()).unwrap_or(false) {
55            continue;
56        }
57
58        let path = entry.path().to_path_buf();
59
60        let metadata = match std::fs::metadata(&path) {
61            Ok(m) => m,
62            Err(_) => continue,
63        };
64        let size_bytes = metadata.len();
65
66        if size_bytes > 2 * 1024 * 1024 {
67            continue;
68        }
69
70        let Some(ext) = path.extension().and_then(|e| e.to_str()) else {
71            continue;
72        };
73        let language = detect_language(ext);
74
75        if matches!(language, Language::Unknown) {
76            continue;
77        }
78
79        if is_binary(&path)? {
80            continue;
81        }
82
83        let content = match std::fs::read_to_string(&path) {
84            Ok(c) => c,
85            Err(_) => continue,
86        };
87
88        let relative_path = match path.strip_prefix(&canonical) {
89            Ok(r) => r.to_string_lossy().to_string(),
90            Err(_) => path.to_string_lossy().to_string(),
91        };
92
93        files.push(SourceFile {
94            path,
95            relative_path,
96            language,
97            content,
98            size_bytes,
99        });
100    }
101
102    Ok(files)
103}
104
105fn detect_language(ext: &str) -> Language {
106    match ext {
107        "ts" | "tsx" => Language::TypeScript,
108        "js" | "jsx" | "mjs" | "cjs" => Language::JavaScript,
109        "py" => Language::Python,
110        "rs" => Language::Rust,
111        "go" => Language::Go,
112        "java" => Language::Java,
113        "cs" => Language::CSharp,
114        "php" => Language::Php,
115        _ => Language::Unknown,
116    }
117}
118
119fn is_binary(path: &Path) -> anyhow::Result<bool> {
120    let mut file = std::fs::File::open(path)?;
121    let mut buf = vec![0u8; 8192];
122    let n = std::io::Read::read(&mut file, &mut buf).unwrap_or(0);
123    Ok(buf[..n].contains(&0))
124}