Skip to main content

atomcode_core/setup/
scan.rs

1//! Scan a project directory and produce ProjectSignals. Pure filesystem
2//! introspection — no external commands (no git/npm/cargo CLI), no LLM.
3
4use crate::setup::state::compute_signals_hash;
5use crate::setup::types::*;
6use std::path::{Path, PathBuf};
7
8const README_HEAD_BYTES: usize = 2048;
9const ROOT_TREE_MAX_ENTRIES: usize = 50;
10
11pub fn scan(project_root: &Path) -> ProjectSignals {
12    let mut s = ProjectSignals::empty(project_root.to_path_buf());
13    s.markers = collect_markers(project_root);
14    s.stacks = derive_stacks(&s.markers);
15    s.frameworks = derive_frameworks(project_root, &s.markers);
16    s.package_mgrs = derive_pkg_mgrs(project_root, &s.markers);
17    s.vcs = derive_vcs(project_root);
18    s.ci = derive_ci(project_root);
19    s.containerized = s
20        .markers
21        .iter()
22        .any(|m| m.kind == MarkerKind::Dockerfile || m.kind == MarkerKind::K8sManifest);
23    s.test_frameworks = derive_test_frameworks(project_root, &s.markers);
24    s.root_tree = collect_root_tree(project_root);
25    s.readme_head = read_readme_head(project_root);
26    s.signals_hash = compute_signals_hash(
27        &s.markers.iter().map(|m| m.path.clone()).collect::<Vec<_>>(),
28    );
29    s
30}
31
32fn collect_markers(root: &Path) -> Vec<Marker> {
33    let probes: &[(&str, MarkerKind)] = &[
34        ("Cargo.toml", MarkerKind::CargoToml),
35        ("package.json", MarkerKind::PackageJson),
36        ("pom.xml", MarkerKind::PomXml),
37        ("build.gradle", MarkerKind::BuildGradle),
38        ("build.gradle.kts", MarkerKind::BuildGradle),
39        ("pyproject.toml", MarkerKind::PyprojectToml),
40        ("requirements.txt", MarkerKind::RequirementsTxt),
41        ("go.mod", MarkerKind::GoMod),
42        ("Dockerfile", MarkerKind::Dockerfile),
43        (".eslintrc.js", MarkerKind::EslintConfig),
44        (".eslintrc.json", MarkerKind::EslintConfig),
45        (".eslintrc.yml", MarkerKind::EslintConfig),
46        ("rustfmt.toml", MarkerKind::RustfmtToml),
47        ("clippy.toml", MarkerKind::ClippyToml),
48        ("tsconfig.json", MarkerKind::TsConfig),
49    ];
50    let mut found = vec![];
51    for (name, kind) in probes {
52        let p = root.join(name);
53        if p.exists() {
54            found.push(Marker { path: p, kind: *kind });
55        }
56    }
57    if root.join(".git").is_dir() {
58        found.push(Marker { path: root.join(".git"), kind: MarkerKind::GitDir });
59    }
60    if root.join(".github/workflows").is_dir() {
61        found.push(Marker {
62            path: root.join(".github/workflows"),
63            kind: MarkerKind::GhActionsDir,
64        });
65    }
66    if root.join("prisma").is_dir() {
67        found.push(Marker { path: root.join("prisma"), kind: MarkerKind::PrismaDir });
68    }
69    // k8s heuristic — top-level k8s/ or helm/ dir.
70    if root.join("k8s").is_dir() || root.join("helm").is_dir() {
71        let path = if root.join("k8s").is_dir() {
72            root.join("k8s")
73        } else {
74            root.join("helm")
75        };
76        found.push(Marker { path, kind: MarkerKind::K8sManifest });
77    }
78    found
79}
80
81fn derive_stacks(markers: &[Marker]) -> Vec<Stack> {
82    let mut s = vec![];
83    let has = |k: MarkerKind| markers.iter().any(|m| m.kind == k);
84    if has(MarkerKind::CargoToml) {
85        s.push(Stack::Rust);
86    }
87    if has(MarkerKind::PackageJson) {
88        s.push(Stack::Node);
89    }
90    if has(MarkerKind::PomXml) || has(MarkerKind::BuildGradle) {
91        s.push(Stack::Java);
92    }
93    if has(MarkerKind::PyprojectToml) || has(MarkerKind::RequirementsTxt) {
94        s.push(Stack::Python);
95    }
96    if has(MarkerKind::GoMod) {
97        s.push(Stack::Go);
98    }
99    s
100}
101
102fn derive_frameworks(root: &Path, markers: &[Marker]) -> Vec<Framework> {
103    let mut f = vec![];
104    if markers.iter().any(|m| m.kind == MarkerKind::PackageJson) {
105        if let Ok(raw) = std::fs::read_to_string(root.join("package.json")) {
106            if raw.contains("\"react\"") {
107                f.push(Framework::React);
108            }
109            if raw.contains("\"vue\"") {
110                f.push(Framework::Vue);
111            }
112            if raw.contains("\"next\"") {
113                f.push(Framework::Next);
114            }
115        }
116    }
117    if markers.iter().any(|m| m.kind == MarkerKind::CargoToml) {
118        if let Ok(raw) = std::fs::read_to_string(root.join("Cargo.toml")) {
119            if raw.contains("tokio") {
120                f.push(Framework::Tokio);
121            }
122        }
123    }
124    if markers.iter().any(|m| m.kind == MarkerKind::PomXml) {
125        if let Ok(raw) = std::fs::read_to_string(root.join("pom.xml")) {
126            if raw.contains("spring-boot-starter") {
127                f.push(Framework::Spring);
128            }
129        }
130    }
131    for fname in &["pyproject.toml", "requirements.txt"] {
132        if let Ok(raw) = std::fs::read_to_string(root.join(fname)) {
133            let lower = raw.to_lowercase();
134            if lower.contains("django") && !f.contains(&Framework::Django) {
135                f.push(Framework::Django);
136            }
137            if lower.contains("flask") && !f.contains(&Framework::Flask) {
138                f.push(Framework::Flask);
139            }
140        }
141    }
142    f
143}
144
145fn derive_pkg_mgrs(root: &Path, markers: &[Marker]) -> Vec<PkgMgr> {
146    let mut p = vec![];
147    let has = |k: MarkerKind| markers.iter().any(|m| m.kind == k);
148    if has(MarkerKind::CargoToml) {
149        p.push(PkgMgr::Cargo);
150    }
151    if has(MarkerKind::GoMod) {
152        p.push(PkgMgr::GoMod);
153    }
154    if has(MarkerKind::PomXml) {
155        p.push(PkgMgr::Maven);
156    }
157    if has(MarkerKind::BuildGradle) {
158        p.push(PkgMgr::Gradle);
159    }
160    if has(MarkerKind::PyprojectToml) {
161        if root.join("poetry.lock").exists() {
162            p.push(PkgMgr::Poetry);
163        } else {
164            p.push(PkgMgr::Pip);
165        }
166    } else if has(MarkerKind::RequirementsTxt) {
167        p.push(PkgMgr::Pip);
168    }
169    if has(MarkerKind::PackageJson) {
170        if root.join("pnpm-lock.yaml").exists() {
171            p.push(PkgMgr::Pnpm);
172        } else if root.join("yarn.lock").exists() {
173            p.push(PkgMgr::Yarn);
174        } else {
175            p.push(PkgMgr::Npm);
176        }
177    }
178    p
179}
180
181fn derive_vcs(root: &Path) -> VcsInfo {
182    if !root.join(".git").exists() {
183        return VcsInfo::None;
184    }
185    let remote = std::fs::read_to_string(root.join(".git/config")).ok().and_then(|cfg| {
186        cfg.lines()
187            .find(|l| l.trim().starts_with("url"))
188            .and_then(|l| l.split('=').nth(1))
189            .map(|s| s.trim().to_string())
190    });
191    VcsInfo::Git { remote }
192}
193
194fn derive_ci(root: &Path) -> CiInfo {
195    let workflows = root.join(".github/workflows");
196    if workflows.is_dir() {
197        let count = std::fs::read_dir(&workflows)
198            .map(|it| it.filter_map(|e| e.ok()).count())
199            .unwrap_or(0);
200        return CiInfo::GhActions { workflow_count: count };
201    }
202    if root.join(".gitlab-ci.yml").exists() {
203        return CiInfo::GitLab;
204    }
205    if root.join(".circleci").is_dir() || root.join("Jenkinsfile").exists() {
206        return CiInfo::Other;
207    }
208    CiInfo::None
209}
210
211fn derive_test_frameworks(root: &Path, markers: &[Marker]) -> Vec<TestFw> {
212    let mut tfs = vec![];
213    if markers.iter().any(|m| m.kind == MarkerKind::CargoToml) {
214        tfs.push(TestFw::CargoTest);
215    }
216    if let Ok(raw) = std::fs::read_to_string(root.join("package.json")) {
217        if raw.contains("\"jest\"") {
218            tfs.push(TestFw::Jest);
219        }
220        if raw.contains("\"vitest\"") {
221            tfs.push(TestFw::Vitest);
222        }
223    }
224    if root.join("pytest.ini").exists()
225        || std::fs::read_to_string(root.join("pyproject.toml"))
226            .ok()
227            .map_or(false, |s| s.contains("[tool.pytest"))
228    {
229        tfs.push(TestFw::Pytest);
230    }
231    if root.join("pom.xml").exists()
232        && std::fs::read_to_string(root.join("pom.xml"))
233            .ok()
234            .map_or(false, |s| s.contains("junit"))
235    {
236        tfs.push(TestFw::JUnit);
237    }
238    tfs
239}
240
241fn collect_root_tree(root: &Path) -> Vec<PathBuf> {
242    let mut entries: Vec<PathBuf> = std::fs::read_dir(root)
243        .ok()
244        .into_iter()
245        .flatten()
246        .filter_map(|e| e.ok())
247        .map(|e| e.path())
248        .filter(|p| {
249            let name = p.file_name().and_then(|n| n.to_str()).unwrap_or("");
250            !matches!(
251                name,
252                "node_modules" | "target" | ".git" | "dist" | "build" | ".next"
253            )
254        })
255        .collect();
256    entries.sort();
257    entries.truncate(ROOT_TREE_MAX_ENTRIES);
258    entries
259}
260
261fn read_readme_head(root: &Path) -> Option<String> {
262    for name in ["README.md", "README.rst", "README.txt", "README"] {
263        if let Ok(bytes) = std::fs::read(root.join(name)) {
264            let head = if bytes.len() > README_HEAD_BYTES {
265                &bytes[..README_HEAD_BYTES]
266            } else {
267                &bytes
268            };
269            return Some(String::from_utf8_lossy(head).to_string());
270        }
271    }
272    None
273}
274
275#[cfg(test)]
276mod tests {
277    use super::*;
278
279    fn setup_dir(files: &[(&str, &str)]) -> tempfile::TempDir {
280        let dir = tempfile::tempdir().unwrap();
281        for (name, content) in files {
282            let p = dir.path().join(name);
283            if let Some(parent) = p.parent() {
284                std::fs::create_dir_all(parent).unwrap();
285            }
286            std::fs::write(&p, content).unwrap();
287        }
288        dir
289    }
290
291    #[test]
292    fn scan_empty_dir_returns_empty_signals() {
293        let dir = tempfile::tempdir().unwrap();
294        let s = scan(dir.path());
295        assert!(s.markers.is_empty());
296        assert!(s.stacks.is_empty());
297        assert!(matches!(s.vcs, VcsInfo::None));
298    }
299
300    #[test]
301    fn scan_rust_project_detects_cargo_and_stack() {
302        let dir = setup_dir(&[("Cargo.toml", "[package]\nname = \"x\"")]);
303        let s = scan(dir.path());
304        assert!(s.markers.iter().any(|m| m.kind == MarkerKind::CargoToml));
305        assert_eq!(s.stacks, vec![Stack::Rust]);
306        assert!(s.package_mgrs.contains(&PkgMgr::Cargo));
307    }
308
309    #[test]
310    fn scan_react_project_detects_framework() {
311        let dir = setup_dir(&[("package.json", r#"{"dependencies":{"react":"^18"}}"#)]);
312        let s = scan(dir.path());
313        assert!(s.frameworks.contains(&Framework::React));
314        assert!(s.package_mgrs.contains(&PkgMgr::Npm));
315    }
316
317    #[test]
318    fn scan_with_git_dir_marks_vcs() {
319        let dir = tempfile::tempdir().unwrap();
320        std::fs::create_dir_all(dir.path().join(".git")).unwrap();
321        std::fs::write(
322            dir.path().join(".git/config"),
323            "[remote \"origin\"]\n\turl = git@x.com:a/b\n",
324        )
325        .unwrap();
326        let s = scan(dir.path());
327        match s.vcs {
328            VcsInfo::Git { remote } => assert!(remote.unwrap().contains("a/b")),
329            _ => panic!("expected Git"),
330        }
331    }
332
333    #[test]
334    fn scan_docker_marks_containerized() {
335        let dir = setup_dir(&[("Dockerfile", "FROM rust:1.80")]);
336        let s = scan(dir.path());
337        assert!(s.containerized);
338    }
339
340    #[test]
341    fn scan_truncates_root_tree_to_50() {
342        let dir = tempfile::tempdir().unwrap();
343        for i in 0..100 {
344            std::fs::write(dir.path().join(format!("file{i}.txt")), "x").unwrap();
345        }
346        let s = scan(dir.path());
347        assert!(s.root_tree.len() <= 50);
348    }
349
350    #[test]
351    fn scan_reads_readme_head_2kb_max() {
352        let big = "x".repeat(5000);
353        let dir = setup_dir(&[("README.md", &big)]);
354        let s = scan(dir.path());
355        let head = s.readme_head.unwrap();
356        assert!(head.len() <= 2048);
357    }
358
359    #[test]
360    fn scan_signals_hash_changes_when_marker_content_changes() {
361        let dir = setup_dir(&[("Cargo.toml", "v1")]);
362        let h1 = scan(dir.path()).signals_hash;
363        std::fs::write(dir.path().join("Cargo.toml"), "v2").unwrap();
364        let h2 = scan(dir.path()).signals_hash;
365        assert_ne!(h1, h2);
366    }
367}