ricecoder_research/
codebase_scanner.rs1use crate::error::ResearchError;
4use crate::models::{Framework, Language};
5use ignore::WalkBuilder;
6use std::collections::HashSet;
7use std::path::{Path, PathBuf};
8
9#[derive(Debug, Clone)]
11pub struct FileMetadata {
12 pub path: PathBuf,
14 pub language: Option<Language>,
16 pub size: u64,
18 pub is_test: bool,
20}
21
22#[derive(Debug, Clone)]
24pub struct ScanResult {
25 pub files: Vec<FileMetadata>,
27 pub languages: Vec<Language>,
29 pub frameworks: Vec<Framework>,
31 pub source_dirs: Vec<PathBuf>,
33 pub test_dirs: Vec<PathBuf>,
35}
36
37pub struct CodebaseScanner;
39
40impl CodebaseScanner {
41 pub fn scan(root: &Path) -> Result<ScanResult, ResearchError> {
49 if !root.exists() {
50 return Err(ResearchError::ProjectNotFound {
51 path: root.to_path_buf(),
52 reason: "Cannot scan codebase: root directory does not exist".to_string(),
53 });
54 }
55
56 let mut files = Vec::new();
57 let mut languages = HashSet::new();
58 let mut source_dirs = HashSet::new();
59 let mut test_dirs = HashSet::new();
60
61 let walker = WalkBuilder::new(root).hidden(true).git_ignore(true).build();
63
64 for entry in walker {
65 let entry = match entry {
66 Ok(e) => e,
67 Err(_) => continue,
68 };
69
70 let path = entry.path();
71
72 if path.is_dir() {
74 continue;
75 }
76
77 let language = Self::detect_language(path);
79 let is_test = Self::is_test_file(path);
80
81 if let Some(parent) = path.parent() {
83 if is_test {
84 test_dirs.insert(parent.to_path_buf());
85 } else if language.is_some() {
86 source_dirs.insert(parent.to_path_buf());
87 }
88 }
89
90 if let Ok(metadata) = std::fs::metadata(path) {
91 files.push(FileMetadata {
92 path: path.to_path_buf(),
93 language: language.clone(),
94 size: metadata.len(),
95 is_test,
96 });
97
98 if let Some(lang) = language {
99 languages.insert(lang);
100 }
101 }
102 }
103
104 let mut languages_vec: Vec<Language> = languages.into_iter().collect();
106 languages_vec.sort_by(|a, b| format!("{:?}", a).cmp(&format!("{:?}", b)));
107
108 let mut source_dirs_vec: Vec<PathBuf> = source_dirs.into_iter().collect();
109 source_dirs_vec.sort();
110
111 let mut test_dirs_vec: Vec<PathBuf> = test_dirs.into_iter().collect();
112 test_dirs_vec.sort();
113
114 Ok(ScanResult {
115 files,
116 languages: languages_vec,
117 frameworks: Vec::new(), source_dirs: source_dirs_vec,
119 test_dirs: test_dirs_vec,
120 })
121 }
122
123 fn detect_language(path: &Path) -> Option<Language> {
125 let extension = path.extension()?.to_str()?;
126
127 match extension {
128 "rs" => Some(Language::Rust),
129 "ts" | "tsx" | "js" | "jsx" => Some(Language::TypeScript),
130 "py" => Some(Language::Python),
131 "go" => Some(Language::Go),
132 "java" => Some(Language::Java),
133 "kt" | "kts" => Some(Language::Kotlin),
134 "cs" => Some(Language::CSharp),
135 "php" => Some(Language::Php),
136 "rb" => Some(Language::Ruby),
137 "swift" => Some(Language::Swift),
138 "dart" => Some(Language::Dart),
139 _ => None,
140 }
141 }
142
143 fn is_test_file(path: &Path) -> bool {
145 for component in path.components() {
147 if let std::path::Component::Normal(name) = component {
148 let name_str = name.to_string_lossy();
149 if name_str == "tests" || name_str == "test" || name_str == "__tests__" {
150 return true;
151 }
152 }
153 }
154
155 let file_name = path.file_name().unwrap_or_default().to_string_lossy();
157 file_name.ends_with("_test.rs")
158 || file_name.ends_with(".test.ts")
159 || file_name.ends_with(".test.js")
160 || file_name.ends_with("_test.py")
161 || file_name.ends_with("_test.go")
162 || file_name.ends_with("Test.java")
163 || file_name.ends_with("Test.kt")
164 || file_name.ends_with("Tests.cs")
165 || file_name.ends_with("_test.rb")
166 || file_name.ends_with("Tests.swift")
167 }
168}
169
170#[cfg(test)]
171mod tests {
172 use super::*;
173 use std::fs;
174 use tempfile::TempDir;
175
176 #[test]
177 fn test_detect_language_rust() {
178 let path = PathBuf::from("main.rs");
179 assert_eq!(
180 CodebaseScanner::detect_language(&path),
181 Some(Language::Rust)
182 );
183 }
184
185 #[test]
186 fn test_detect_language_typescript() {
187 let path = PathBuf::from("main.ts");
188 assert_eq!(
189 CodebaseScanner::detect_language(&path),
190 Some(Language::TypeScript)
191 );
192 }
193
194 #[test]
195 fn test_detect_language_python() {
196 let path = PathBuf::from("main.py");
197 assert_eq!(
198 CodebaseScanner::detect_language(&path),
199 Some(Language::Python)
200 );
201 }
202
203 #[test]
204 fn test_detect_language_unknown() {
205 let path = PathBuf::from("README.md");
206 assert_eq!(CodebaseScanner::detect_language(&path), None);
207 }
208
209 #[test]
210 fn test_is_test_file_rust() {
211 let path = PathBuf::from("src/lib_test.rs");
212 assert!(CodebaseScanner::is_test_file(&path));
213 }
214
215 #[test]
216 fn test_is_test_file_typescript() {
217 let path = PathBuf::from("src/main.test.ts");
218 assert!(CodebaseScanner::is_test_file(&path));
219 }
220
221 #[test]
222 fn test_is_test_file_directory() {
223 let path = PathBuf::from("tests/integration.rs");
224 assert!(CodebaseScanner::is_test_file(&path));
225 }
226
227 #[test]
228 fn test_is_test_file_not_test() {
229 let path = PathBuf::from("src/main.rs");
230 assert!(!CodebaseScanner::is_test_file(&path));
231 }
232
233 #[test]
234 fn test_scan_simple_project() -> Result<(), Box<dyn std::error::Error>> {
235 let temp_dir = TempDir::new()?;
236 let root = temp_dir.path();
237
238 fs::create_dir_all(root.join("src"))?;
240 fs::create_dir_all(root.join("tests"))?;
241 fs::write(root.join("src/main.rs"), "fn main() {}")?;
242 fs::write(root.join("src/lib.rs"), "pub fn lib() {}")?;
243 fs::write(
244 root.join("tests/integration_test.rs"),
245 "#[test]\nfn test() {}",
246 )?;
247
248 let result = CodebaseScanner::scan(root)?;
249
250 assert_eq!(result.files.len(), 3);
251 assert!(result.languages.contains(&Language::Rust));
252 assert!(!result.source_dirs.is_empty());
253 assert!(!result.test_dirs.is_empty());
254
255 Ok(())
256 }
257
258 #[test]
259 fn test_scan_nonexistent_directory() {
260 let result = CodebaseScanner::scan(Path::new("/nonexistent/path"));
261 assert!(result.is_err());
262 }
263}