ricecoder_research/
project_analyzer.rs

1//! Project analyzer for detecting project type and structure
2
3use crate::error::ResearchError;
4use crate::models::{Framework, Language, ProjectStructure, ProjectType};
5use std::path::{Path, PathBuf};
6use walkdir::WalkDir;
7
8/// Analyzes project structure and metadata to understand project type and organization
9#[derive(Debug)]
10pub struct ProjectAnalyzer;
11
12impl ProjectAnalyzer {
13    /// Create a new ProjectAnalyzer
14    pub fn new() -> Self {
15        ProjectAnalyzer
16    }
17
18    /// Detect the type of project at the given path
19    ///
20    /// Analyzes the project structure to determine if it's a library, application,
21    /// service, monorepo, or unknown type.
22    ///
23    /// # Arguments
24    ///
25    /// * `root` - Root path of the project
26    ///
27    /// # Returns
28    ///
29    /// The detected `ProjectType`, or a `ResearchError`
30    pub fn detect_type(&self, root: &Path) -> Result<ProjectType, ResearchError> {
31        if !root.exists() {
32            return Err(ResearchError::ProjectNotFound {
33                path: root.to_path_buf(),
34                reason: "Cannot detect project type: directory does not exist".to_string(),
35            });
36        }
37
38        // Detect languages first to understand project type
39        let languages = self.detect_languages(root)?;
40
41        // Check for monorepo patterns
42        if self.is_monorepo(root, &languages)? {
43            return Ok(ProjectType::Monorepo);
44        }
45
46        // Check for library vs application
47        if self.is_library(root, &languages)? {
48            Ok(ProjectType::Library)
49        } else if self.is_service(root, &languages)? {
50            Ok(ProjectType::Service)
51        } else {
52            Ok(ProjectType::Application)
53        }
54    }
55
56    /// Analyze the structure of a project
57    ///
58    /// Identifies source directories, test directories, configuration files,
59    /// and entry points.
60    ///
61    /// # Arguments
62    ///
63    /// * `root` - Root path of the project
64    ///
65    /// # Returns
66    ///
67    /// A `ProjectStructure` containing the analysis, or a `ResearchError`
68    pub fn analyze_structure(&self, root: &Path) -> Result<ProjectStructure, ResearchError> {
69        if !root.exists() {
70            return Err(ResearchError::ProjectNotFound {
71                path: root.to_path_buf(),
72                reason: "Cannot analyze structure: directory does not exist".to_string(),
73            });
74        }
75
76        let source_dirs = self.find_source_directories(root)?;
77        let test_dirs = self.find_test_directories(root)?;
78        let config_files = self.find_config_files(root)?;
79        let entry_points = self.find_entry_points(root)?;
80
81        Ok(ProjectStructure {
82            root: root.to_path_buf(),
83            source_dirs,
84            test_dirs,
85            config_files,
86            entry_points,
87        })
88    }
89
90    /// Identify frameworks and libraries used in the project
91    ///
92    /// # Arguments
93    ///
94    /// * `root` - Root path of the project
95    ///
96    /// # Returns
97    ///
98    /// A vector of detected `Framework`s, or a `ResearchError`
99    pub fn identify_frameworks(&self, root: &Path) -> Result<Vec<Framework>, ResearchError> {
100        if !root.exists() {
101            return Err(ResearchError::ProjectNotFound {
102                path: root.to_path_buf(),
103                reason: "Cannot identify frameworks: directory does not exist".to_string(),
104            });
105        }
106
107        let mut frameworks = Vec::new();
108
109        // Check for common frameworks based on manifest files
110        if let Ok(cargo_toml) = std::fs::read_to_string(root.join("Cargo.toml")) {
111            // Parse Cargo.toml for dependencies
112            if cargo_toml.contains("tokio") {
113                frameworks.push(Framework {
114                    name: "tokio".to_string(),
115                    version: self.extract_version(&cargo_toml, "tokio"),
116                });
117            }
118            if cargo_toml.contains("serde") {
119                frameworks.push(Framework {
120                    name: "serde".to_string(),
121                    version: self.extract_version(&cargo_toml, "serde"),
122                });
123            }
124            if cargo_toml.contains("actix") {
125                frameworks.push(Framework {
126                    name: "actix".to_string(),
127                    version: self.extract_version(&cargo_toml, "actix"),
128                });
129            }
130            if cargo_toml.contains("axum") {
131                frameworks.push(Framework {
132                    name: "axum".to_string(),
133                    version: self.extract_version(&cargo_toml, "axum"),
134                });
135            }
136        }
137
138        if let Ok(package_json) = std::fs::read_to_string(root.join("package.json")) {
139            // Parse package.json for dependencies
140            if package_json.contains("\"react\"") {
141                frameworks.push(Framework {
142                    name: "react".to_string(),
143                    version: self.extract_json_version(&package_json, "react"),
144                });
145            }
146            if package_json.contains("\"express\"") {
147                frameworks.push(Framework {
148                    name: "express".to_string(),
149                    version: self.extract_json_version(&package_json, "express"),
150                });
151            }
152            if package_json.contains("\"next\"") {
153                frameworks.push(Framework {
154                    name: "next".to_string(),
155                    version: self.extract_json_version(&package_json, "next"),
156                });
157            }
158        }
159
160        Ok(frameworks)
161    }
162
163    // ========================================================================
164    // Private helper methods
165    // ========================================================================
166
167    /// Detect programming languages used in the project
168    fn detect_languages(&self, root: &Path) -> Result<Vec<Language>, ResearchError> {
169        let mut languages = Vec::new();
170
171        // Check for Rust
172        if root.join("Cargo.toml").exists() {
173            languages.push(Language::Rust);
174        }
175
176        // Check for Node.js
177        if root.join("package.json").exists() {
178            languages.push(Language::TypeScript);
179        }
180
181        // Check for Python
182        if root.join("pyproject.toml").exists() || root.join("requirements.txt").exists() {
183            languages.push(Language::Python);
184        }
185
186        // Check for Go
187        if root.join("go.mod").exists() {
188            languages.push(Language::Go);
189        }
190
191        // Check for Java
192        if root.join("pom.xml").exists() || root.join("build.gradle").exists() {
193            languages.push(Language::Java);
194        }
195
196        // Check for Kotlin
197        if root.join("build.gradle.kts").exists() {
198            languages.push(Language::Kotlin);
199        }
200
201        // Check for .NET
202        if self.has_csproj_files(root)? || root.join("packages.config").exists() {
203            languages.push(Language::CSharp);
204        }
205
206        // Check for PHP
207        if root.join("composer.json").exists() {
208            languages.push(Language::Php);
209        }
210
211        // Check for Ruby
212        if root.join("Gemfile").exists() {
213            languages.push(Language::Ruby);
214        }
215
216        // Check for Swift
217        if root.join("Package.swift").exists() {
218            languages.push(Language::Swift);
219        }
220
221        // Check for Dart
222        if root.join("pubspec.yaml").exists() {
223            languages.push(Language::Dart);
224        }
225
226        Ok(languages)
227    }
228
229    /// Check if the project is a monorepo
230    fn is_monorepo(&self, root: &Path, languages: &[Language]) -> Result<bool, ResearchError> {
231        // Rust workspace
232        if languages.contains(&Language::Rust) {
233            if let Ok(cargo_toml) = std::fs::read_to_string(root.join("Cargo.toml")) {
234                if cargo_toml.contains("[workspace]") {
235                    return Ok(true);
236                }
237            }
238        }
239
240        // Node.js monorepo (lerna, yarn workspaces, npm workspaces)
241        if languages.contains(&Language::TypeScript) {
242            if let Ok(package_json) = std::fs::read_to_string(root.join("package.json")) {
243                if package_json.contains("\"workspaces\"") {
244                    return Ok(true);
245                }
246            }
247            if root.join("lerna.json").exists() {
248                return Ok(true);
249            }
250        }
251
252        // Check for multiple independent projects in subdirectories
253        let mut project_count = 0;
254        for entry in WalkDir::new(root)
255            .max_depth(2)
256            .into_iter()
257            .filter_map(|e| e.ok())
258        {
259            let path = entry.path();
260            if path.join("Cargo.toml").exists()
261                || path.join("package.json").exists()
262                || path.join("pyproject.toml").exists()
263            {
264                project_count += 1;
265            }
266        }
267
268        Ok(project_count > 1)
269    }
270
271    /// Check if the project is a library
272    fn is_library(&self, root: &Path, languages: &[Language]) -> Result<bool, ResearchError> {
273        // Rust library
274        if languages.contains(&Language::Rust) {
275            if let Ok(cargo_toml) = std::fs::read_to_string(root.join("Cargo.toml")) {
276                // Check for [lib] section
277                if cargo_toml.contains("[lib]") {
278                    return Ok(true);
279                }
280                // Check if there's no [[bin]] section
281                if !cargo_toml.contains("[[bin]]") && root.join("src/lib.rs").exists() {
282                    return Ok(true);
283                }
284            }
285        }
286
287        // Node.js library (has "main" or "exports" in package.json)
288        if languages.contains(&Language::TypeScript) {
289            if let Ok(package_json) = std::fs::read_to_string(root.join("package.json")) {
290                if (package_json.contains("\"main\"") || package_json.contains("\"exports\""))
291                    && !package_json.contains("\"bin\"")
292                {
293                    return Ok(true);
294                }
295            }
296        }
297
298        Ok(false)
299    }
300
301    /// Check if the project is a service/microservice
302    fn is_service(&self, root: &Path, _languages: &[Language]) -> Result<bool, ResearchError> {
303        // Check for common service indicators
304        if let Ok(cargo_toml) = std::fs::read_to_string(root.join("Cargo.toml")) {
305            // Web frameworks indicate a service
306            if cargo_toml.contains("actix")
307                || cargo_toml.contains("axum")
308                || cargo_toml.contains("rocket")
309            {
310                return Ok(true);
311            }
312        }
313
314        if let Ok(package_json) = std::fs::read_to_string(root.join("package.json")) {
315            // Express, Fastify, etc. indicate a service
316            if package_json.contains("\"express\"")
317                || package_json.contains("\"fastify\"")
318                || package_json.contains("\"koa\"")
319            {
320                return Ok(true);
321            }
322        }
323
324        // Check for Dockerfile (common in services)
325        if root.join("Dockerfile").exists() {
326            return Ok(true);
327        }
328
329        Ok(false)
330    }
331
332    /// Find source directories in the project
333    fn find_source_directories(&self, root: &Path) -> Result<Vec<PathBuf>, ResearchError> {
334        let mut source_dirs = Vec::new();
335
336        // Common source directory patterns
337        let common_patterns = vec!["src", "lib", "app", "source", "code"];
338
339        for pattern in common_patterns {
340            let path = root.join(pattern);
341            if path.exists() && path.is_dir() {
342                source_dirs.push(path);
343            }
344        }
345
346        // Language-specific patterns
347        if (root.join("src/main.rs").exists() || root.join("src/lib.rs").exists())
348            && !source_dirs.contains(&root.join("src"))
349        {
350            source_dirs.push(root.join("src"));
351        }
352
353        Ok(source_dirs)
354    }
355
356    /// Find test directories in the project
357    fn find_test_directories(&self, root: &Path) -> Result<Vec<PathBuf>, ResearchError> {
358        let mut test_dirs = Vec::new();
359
360        // Common test directory patterns
361        let common_patterns = vec!["tests", "test", "__tests__", "spec", "specs"];
362
363        for pattern in common_patterns {
364            let path = root.join(pattern);
365            if path.exists() && path.is_dir() {
366                test_dirs.push(path);
367            }
368        }
369
370        Ok(test_dirs)
371    }
372
373    /// Find configuration files in the project
374    fn find_config_files(&self, root: &Path) -> Result<Vec<PathBuf>, ResearchError> {
375        let mut config_files = Vec::new();
376
377        // Common configuration files
378        let config_patterns = vec![
379            "Cargo.toml",
380            "package.json",
381            "pyproject.toml",
382            "go.mod",
383            "pom.xml",
384            "build.gradle",
385            "build.gradle.kts",
386            ".csproj",
387            "composer.json",
388            "Gemfile",
389            "Package.swift",
390            "pubspec.yaml",
391            "Dockerfile",
392            ".env",
393            ".env.example",
394            "tsconfig.json",
395            "jest.config.js",
396            "webpack.config.js",
397        ];
398
399        for pattern in config_patterns {
400            let path = root.join(pattern);
401            if path.exists() && path.is_file() {
402                config_files.push(path);
403            }
404        }
405
406        Ok(config_files)
407    }
408
409    /// Find entry points in the project
410    fn find_entry_points(&self, root: &Path) -> Result<Vec<PathBuf>, ResearchError> {
411        let mut entry_points = Vec::new();
412
413        // Rust entry points
414        if root.join("src/main.rs").exists() {
415            entry_points.push(root.join("src/main.rs"));
416        }
417
418        // Node.js entry points
419        if let Ok(package_json) = std::fs::read_to_string(root.join("package.json")) {
420            if let Some(main_start) = package_json.find("\"main\"") {
421                if let Some(colon_pos) = package_json[main_start..].find(':') {
422                    if let Some(quote_start) = package_json[main_start + colon_pos..].find('"') {
423                        if let Some(quote_end) =
424                            package_json[main_start + colon_pos + quote_start + 1..].find('"')
425                        {
426                            let main_file = &package_json[main_start + colon_pos + quote_start + 1
427                                ..main_start + colon_pos + quote_start + 1 + quote_end];
428                            let path = root.join(main_file);
429                            if path.exists() {
430                                entry_points.push(path);
431                            }
432                        }
433                    }
434                }
435            }
436        }
437
438        // Python entry points
439        if root.join("main.py").exists() {
440            entry_points.push(root.join("main.py"));
441        }
442        if root.join("__main__.py").exists() {
443            entry_points.push(root.join("__main__.py"));
444        }
445
446        // Go entry points
447        if root.join("main.go").exists() {
448            entry_points.push(root.join("main.go"));
449        }
450
451        Ok(entry_points)
452    }
453
454    /// Check if project has .csproj files
455    fn has_csproj_files(&self, root: &Path) -> Result<bool, ResearchError> {
456        for entry in WalkDir::new(root)
457            .max_depth(2)
458            .into_iter()
459            .filter_map(|e| e.ok())
460        {
461            if entry.path().extension().is_some_and(|ext| ext == "csproj") {
462                return Ok(true);
463            }
464        }
465        Ok(false)
466    }
467
468    /// Extract version from Cargo.toml
469    fn extract_version(&self, content: &str, package: &str) -> Option<String> {
470        // Simple regex-free version extraction
471        let search_str = format!("{} =", package);
472        if let Some(pos) = content.find(&search_str) {
473            let after = &content[pos + search_str.len()..];
474            if let Some(quote_pos) = after.find('"') {
475                if let Some(end_quote) = after[quote_pos + 1..].find('"') {
476                    let version = &after[quote_pos + 1..quote_pos + 1 + end_quote];
477                    return Some(version.to_string());
478                }
479            }
480        }
481        None
482    }
483
484    /// Extract version from package.json
485    fn extract_json_version(&self, content: &str, package: &str) -> Option<String> {
486        let search_str = format!("\"{}\":", package);
487        if let Some(pos) = content.find(&search_str) {
488            let after = &content[pos + search_str.len()..];
489            if let Some(quote_pos) = after.find('"') {
490                if let Some(end_quote) = after[quote_pos + 1..].find('"') {
491                    let version = &after[quote_pos + 1..quote_pos + 1 + end_quote];
492                    return Some(version.to_string());
493                }
494            }
495        }
496        None
497    }
498}
499
500impl Default for ProjectAnalyzer {
501    fn default() -> Self {
502        Self::new()
503    }
504}
505
506#[cfg(test)]
507mod tests {
508    use super::*;
509    use tempfile::TempDir;
510
511    #[test]
512    fn test_project_analyzer_creation() {
513        let analyzer = ProjectAnalyzer::new();
514        assert_eq!(std::mem::size_of_val(&analyzer), 0);
515    }
516
517    #[test]
518    fn test_project_analyzer_default() {
519        let analyzer = ProjectAnalyzer::default();
520        assert_eq!(std::mem::size_of_val(&analyzer), 0);
521    }
522
523    #[test]
524    fn test_detect_type_nonexistent_path() {
525        let analyzer = ProjectAnalyzer::new();
526        let result = analyzer.detect_type(Path::new("/nonexistent/path"));
527        assert!(result.is_err());
528    }
529
530    #[test]
531    fn test_analyze_structure_nonexistent_path() {
532        let analyzer = ProjectAnalyzer::new();
533        let result = analyzer.analyze_structure(Path::new("/nonexistent/path"));
534        assert!(result.is_err());
535    }
536
537    #[test]
538    fn test_identify_frameworks_nonexistent_path() {
539        let analyzer = ProjectAnalyzer::new();
540        let result = analyzer.identify_frameworks(Path::new("/nonexistent/path"));
541        assert!(result.is_err());
542    }
543
544    #[test]
545    fn test_detect_rust_project() {
546        let temp_dir = TempDir::new().unwrap();
547        let cargo_toml = temp_dir.path().join("Cargo.toml");
548        std::fs::write(&cargo_toml, "[package]\nname = \"test\"\n").unwrap();
549
550        let analyzer = ProjectAnalyzer::new();
551        let result = analyzer.detect_type(temp_dir.path());
552        assert!(result.is_ok());
553    }
554
555    #[test]
556    fn test_find_source_directories() {
557        let temp_dir = TempDir::new().unwrap();
558        std::fs::create_dir(temp_dir.path().join("src")).unwrap();
559        std::fs::create_dir(temp_dir.path().join("lib")).unwrap();
560
561        let analyzer = ProjectAnalyzer::new();
562        let result = analyzer.find_source_directories(temp_dir.path()).unwrap();
563        assert!(result.len() >= 2);
564    }
565
566    #[test]
567    fn test_find_test_directories() {
568        let temp_dir = TempDir::new().unwrap();
569        std::fs::create_dir(temp_dir.path().join("tests")).unwrap();
570        std::fs::create_dir(temp_dir.path().join("test")).unwrap();
571
572        let analyzer = ProjectAnalyzer::new();
573        let result = analyzer.find_test_directories(temp_dir.path()).unwrap();
574        assert!(result.len() >= 2);
575    }
576
577    #[test]
578    fn test_find_config_files() {
579        let temp_dir = TempDir::new().unwrap();
580        std::fs::write(temp_dir.path().join("Cargo.toml"), "").unwrap();
581        std::fs::write(temp_dir.path().join("package.json"), "").unwrap();
582
583        let analyzer = ProjectAnalyzer::new();
584        let result = analyzer.find_config_files(temp_dir.path()).unwrap();
585        assert!(result.len() >= 2);
586    }
587
588    #[test]
589    fn test_find_entry_points() {
590        let temp_dir = TempDir::new().unwrap();
591        std::fs::create_dir(temp_dir.path().join("src")).unwrap();
592        std::fs::write(temp_dir.path().join("src/main.rs"), "").unwrap();
593
594        let analyzer = ProjectAnalyzer::new();
595        let result = analyzer.find_entry_points(temp_dir.path()).unwrap();
596        assert!(!result.is_empty());
597    }
598}