Skip to main content

ralph_workflow/language_detector/
mod.rs

1//! Language and Stack Detection Module
2//!
3//! Detects the primary technology stack of a repository by analyzing file extensions,
4//! configuration files, and common patterns. This enables language-specific review
5//! guidance without requiring an LLM.
6//!
7//! The detection is fast (< 100ms typically) and uses heuristics based on:
8//! - File extension counts
9//! - Signature files (Cargo.toml, package.json, etc.)
10//! - Framework indicators in config files
11//!
12//! # Module Structure
13//!
14//! - [`extensions`] - Extension to language mapping
15//! - [`signatures`] - Signature file detection for frameworks and package managers
16//! - [`scanner`] - File system scanning utilities
17
18#![deny(unsafe_code)]
19
20mod extensions;
21mod scanner;
22mod signatures;
23
24use std::collections::HashMap;
25use std::io;
26use std::path::Path;
27
28use crate::workspace::{Workspace, WorkspaceFs};
29
30pub use extensions::extension_to_language;
31use extensions::is_non_primary_language;
32
33/// Maximum number of secondary languages to include in the stack summary.
34///
35/// Polyglot repos commonly have more than 3 relevant languages (e.g. PHP + TS + JS + SQL),
36/// but we still cap this to keep prompts/banners readable.
37const MAX_SECONDARY_LANGUAGES: usize = 6;
38
39/// Minimum file count to consider a language as present
40const MIN_FILES_FOR_DETECTION: usize = 1;
41
42/// Represents the detected technology stack of a project
43#[derive(Debug, Clone, PartialEq, Eq)]
44pub struct ProjectStack {
45    /// Primary programming language (most prevalent)
46    pub(crate) primary_language: String,
47    /// Secondary languages used in the project
48    pub(crate) secondary_languages: Vec<String>,
49    /// Detected frameworks (React, Django, Rails, etc.)
50    pub(crate) frameworks: Vec<String>,
51    /// Whether the project appears to have tests
52    pub(crate) has_tests: bool,
53    /// Detected test framework (if any)
54    pub(crate) test_framework: Option<String>,
55    /// Package manager detected
56    pub(crate) package_manager: Option<String>,
57}
58
59impl Default for ProjectStack {
60    fn default() -> Self {
61        Self {
62            primary_language: "Unknown".to_string(),
63            secondary_languages: Vec::new(),
64            frameworks: Vec::new(),
65            has_tests: false,
66            test_framework: None,
67            package_manager: None,
68        }
69    }
70}
71
72impl ProjectStack {
73    /// Returns true if the project uses Rust
74    pub(crate) fn is_rust(&self) -> bool {
75        self.primary_language == "Rust" || self.secondary_languages.iter().any(|l| l == "Rust")
76    }
77
78    /// Returns true if the project uses Python
79    pub(crate) fn is_python(&self) -> bool {
80        self.primary_language == "Python" || self.secondary_languages.iter().any(|l| l == "Python")
81    }
82
83    /// Returns true if the project uses JavaScript or TypeScript
84    pub(crate) fn is_javascript_or_typescript(&self) -> bool {
85        matches!(self.primary_language.as_str(), "JavaScript" | "TypeScript")
86            || self
87                .secondary_languages
88                .iter()
89                .any(|l| l == "JavaScript" || l == "TypeScript")
90    }
91
92    /// Returns true if the project uses Go
93    pub(crate) fn is_go(&self) -> bool {
94        self.primary_language == "Go" || self.secondary_languages.iter().any(|l| l == "Go")
95    }
96
97    /// Format as a summary string for display
98    pub(crate) fn summary(&self) -> String {
99        let mut parts = vec![self.primary_language.clone()];
100
101        if !self.secondary_languages.is_empty() {
102            parts.push(format!("(+{})", self.secondary_languages.join(", ")));
103        }
104
105        if !self.frameworks.is_empty() {
106            parts.push(format!("[{}]", self.frameworks.join(", ")));
107        }
108
109        if self.has_tests {
110            if let Some(ref tf) = self.test_framework {
111                parts.push(format!("tests:{tf}"));
112            } else {
113                parts.push("tests:yes".to_string());
114            }
115        }
116
117        parts.join(" ")
118    }
119}
120
121/// Detect the project stack for a given repository root.
122///
123/// This is a convenience wrapper that creates a [`WorkspaceFs`] and calls
124/// [`detect_stack_with_workspace`].
125pub fn detect_stack(root: &Path) -> io::Result<ProjectStack> {
126    let workspace = WorkspaceFs::new(root.to_path_buf());
127    detect_stack_with_workspace(&workspace, Path::new(""))
128}
129
130/// Detect stack and return a summary string (for display in banner)
131pub fn detect_stack_summary(root: &Path) -> String {
132    detect_stack(root).map_or_else(|_| "Unknown".to_string(), |stack| stack.summary())
133}
134
135#[cfg(test)]
136mod tests;
137
138// =============================================================================
139// Workspace-based variants
140// =============================================================================
141
142/// Detect project stack using workspace abstraction.
143///
144/// This is the testable version of [`detect_stack`] that uses workspace
145/// for all filesystem operations.
146pub fn detect_stack_with_workspace(
147    workspace: &dyn Workspace,
148    root: &Path,
149) -> io::Result<ProjectStack> {
150    // Count file extensions
151    let extension_counts = scanner::count_extensions_with_workspace(workspace, root)?;
152
153    // Convert extensions to languages and aggregate
154    let mut language_counts: HashMap<&str, usize> = HashMap::new();
155    for (ext, count) in &extension_counts {
156        if let Some(lang) = extension_to_language(ext) {
157            *language_counts.entry(lang).or_insert(0) += count;
158        }
159    }
160
161    // Sort languages by count (descending)
162    let mut language_vec: Vec<_> = language_counts
163        .into_iter()
164        .filter(|(_, count)| *count >= MIN_FILES_FOR_DETECTION)
165        .collect();
166    language_vec.sort_by(|a, b| b.1.cmp(&a.1));
167
168    // Determine primary and secondary languages
169    let primary_language = language_vec
170        .iter()
171        .find(|(lang, _)| !is_non_primary_language(lang))
172        .or_else(|| language_vec.first())
173        .map_or_else(|| "Unknown".to_string(), |(lang, _)| (*lang).to_string());
174
175    let secondary_languages: Vec<String> = language_vec
176        .iter()
177        .filter(|(lang, _)| *lang != primary_language.as_str())
178        .take(MAX_SECONDARY_LANGUAGES)
179        .map(|(lang, _)| (*lang).to_string())
180        .collect();
181
182    // Detect signature files for frameworks and test frameworks
183    let (frameworks, test_framework, package_manager) =
184        signatures::detect_signature_files_with_workspace(workspace, root);
185
186    // Detect if tests exist
187    let has_tests = test_framework.is_some()
188        || scanner::detect_tests_with_workspace(workspace, root, &primary_language);
189
190    Ok(ProjectStack {
191        primary_language,
192        secondary_languages,
193        frameworks,
194        has_tests,
195        test_framework,
196        package_manager,
197    })
198}
199
200#[cfg(test)]
201mod workspace_tests {
202    use super::*;
203    use crate::workspace::MemoryWorkspace;
204
205    #[test]
206    fn test_detect_stack_with_workspace_rust_project() {
207        let workspace = MemoryWorkspace::new_test()
208            .with_file(
209                "Cargo.toml",
210                r#"
211[package]
212name = "test"
213[dependencies]
214axum = "0.7"
215[dev-dependencies]
216"#,
217            )
218            .with_file("src/main.rs", "fn main() {}")
219            .with_file("src/lib.rs", "pub mod foo;")
220            .with_file("tests/integration.rs", "#[test] fn test() {}");
221
222        let stack = detect_stack_with_workspace(&workspace, Path::new("")).unwrap();
223
224        assert_eq!(stack.primary_language, "Rust");
225        assert!(stack.frameworks.contains(&"Axum".to_string()));
226        assert!(stack.has_tests);
227        assert_eq!(stack.package_manager, Some("Cargo".to_string()));
228    }
229
230    #[test]
231    fn test_detect_stack_with_workspace_js_project() {
232        let workspace = MemoryWorkspace::new_test()
233            .with_file(
234                "package.json",
235                r#"
236{
237  "dependencies": { "react": "^18.0.0" },
238  "devDependencies": { "jest": "^29.0.0" }
239}
240
241"#,
242            )
243            .with_file("src/index.js", "export default {}")
244            .with_file("src/App.jsx", "export function App() {}")
245            .with_file("src/utils.js", "export const foo = 1");
246
247        let stack = detect_stack_with_workspace(&workspace, Path::new("")).unwrap();
248
249        assert_eq!(stack.primary_language, "JavaScript");
250        assert!(stack.frameworks.contains(&"React".to_string()));
251        assert_eq!(stack.test_framework, Some("Jest".to_string()));
252    }
253}