Skip to main content

ralph_workflow/language_detector/
mod.rs

1//! Language and Stack Detection Module
2//!
3//! Detects the primary technology stack of a repository by analyzing file extensions,
4//! configuration files, and common patterns. This enables language-specific review
5//! guidance without requiring an LLM.
6//!
7//! The detection is fast (< 100ms typically) and uses heuristics based on:
8//! - File extension counts
9//! - Signature files (Cargo.toml, package.json, etc.)
10//! - Framework indicators in config files
11//!
12//! # Module Structure
13//!
14//! - `extensions` - Extension to language mapping
15//! - `signatures` - Signature file detection for frameworks and package managers
16//! - `scanner` - File system scanning utilities
17//! - `io` - Boundary module for filesystem operations
18
19#![deny(unsafe_code)]
20
21mod extensions;
22mod io;
23mod scanner;
24mod signatures;
25
26use std::collections::BTreeMap;
27use std::path::Path;
28
29use itertools::Itertools;
30
31use crate::workspace::Workspace;
32
33pub use extensions::extension_to_language;
34use extensions::is_non_primary_language;
35
36/// Maximum number of secondary languages to include in the stack summary.
37///
38/// Polyglot repos commonly have more than 3 relevant languages (e.g. PHP + TS + JS + SQL),
39/// but we still cap this to keep prompts/banners readable.
40const MAX_SECONDARY_LANGUAGES: usize = 6;
41
42/// Minimum file count to consider a language as present
43const MIN_FILES_FOR_DETECTION: usize = 1;
44
45/// Represents the detected technology stack of a project
46#[derive(Debug, Clone, PartialEq, Eq)]
47pub struct ProjectStack {
48    /// Primary programming language (most prevalent)
49    pub(crate) primary_language: String,
50    /// Secondary languages used in the project
51    pub(crate) secondary_languages: Vec<String>,
52    /// Detected frameworks (React, Django, Rails, etc.)
53    pub(crate) frameworks: Vec<String>,
54    /// Whether the project appears to have tests
55    pub(crate) has_tests: bool,
56    /// Detected test framework (if any)
57    pub(crate) test_framework: Option<String>,
58    /// Package manager detected
59    pub(crate) package_manager: Option<String>,
60}
61
62impl Default for ProjectStack {
63    fn default() -> Self {
64        Self {
65            primary_language: "Unknown".to_string(),
66            secondary_languages: Vec::new(),
67            frameworks: Vec::new(),
68            has_tests: false,
69            test_framework: None,
70            package_manager: None,
71        }
72    }
73}
74
75impl ProjectStack {
76    /// Returns true if the project uses Rust
77    pub(crate) fn is_rust(&self) -> bool {
78        self.primary_language == "Rust" || self.secondary_languages.iter().any(|l| l == "Rust")
79    }
80
81    /// Returns true if the project uses Python
82    pub(crate) fn is_python(&self) -> bool {
83        self.primary_language == "Python" || self.secondary_languages.iter().any(|l| l == "Python")
84    }
85
86    /// Returns true if the project uses JavaScript or TypeScript
87    pub(crate) fn is_javascript_or_typescript(&self) -> bool {
88        matches!(self.primary_language.as_str(), "JavaScript" | "TypeScript")
89            || self
90                .secondary_languages
91                .iter()
92                .any(|l| l == "JavaScript" || l == "TypeScript")
93    }
94
95    /// Returns true if the project uses Go
96    pub(crate) fn is_go(&self) -> bool {
97        self.primary_language == "Go" || self.secondary_languages.iter().any(|l| l == "Go")
98    }
99
100    /// Format as a summary string for display
101    pub(crate) fn summary(&self) -> String {
102        let secondary = (!self.secondary_languages.is_empty())
103            .then_some(format!("(+{})", self.secondary_languages.join(", ")));
104        let frameworks =
105            (!self.frameworks.is_empty()).then_some(format!("[{}]", self.frameworks.join(", ")));
106        let tests = self.has_tests.then_some(
107            self.test_framework
108                .as_ref()
109                .map(|tf| format!("tests:{tf}"))
110                .unwrap_or_else(|| "tests:yes".to_string()),
111        );
112
113        std::iter::once(self.primary_language.clone())
114            .chain(secondary)
115            .chain(frameworks)
116            .chain(tests)
117            .collect::<Vec<_>>()
118            .join(" ")
119    }
120}
121
122/// Detect the project stack for a given repository root.
123///
124/// This is a convenience wrapper that creates a [`WorkspaceFs`] and calls
125/// [`detect_stack_with_workspace`].
126///
127/// # Errors
128///
129/// Returns error if the operation fails.
130pub fn detect_stack(root: &Path) -> std::io::Result<ProjectStack> {
131    use crate::workspace::WorkspaceFs;
132
133    let workspace = WorkspaceFs::new(root.to_path_buf());
134    detect_stack_with_workspace(&workspace, Path::new(""))
135}
136
137/// Detect stack and return a summary string (for display in banner)
138#[must_use]
139pub fn detect_stack_summary(root: &Path) -> String {
140    detect_stack(root).map_or_else(|_| "Unknown".to_string(), |stack| stack.summary())
141}
142
143#[cfg(test)]
144mod tests;
145
146// =============================================================================
147// Workspace-based variants
148// =============================================================================
149
150/// Detect project stack using workspace abstraction.
151///
152/// This is the testable version of [`detect_stack`] that uses workspace
153/// for all filesystem operations.
154///
155/// # Errors
156///
157/// Returns error if the operation fails.
158pub fn detect_stack_with_workspace(
159    workspace: &dyn Workspace,
160    root: &Path,
161) -> std::io::Result<ProjectStack> {
162    let extension_counts = count_extensions_with_workspace(workspace, root)?;
163
164    let lang_pairs: Vec<(String, usize)> = extension_counts
165        .iter()
166        .filter_map(|(ext, count)| {
167            extension_to_language(ext).map(|lang| (lang.to_string(), *count))
168        })
169        .collect();
170
171    let language_counts: BTreeMap<String, usize> = lang_pairs
172        .iter()
173        .map(|(lang, _)| lang.clone())
174        .collect::<std::collections::BTreeSet<_>>()
175        .into_iter()
176        .map(|lang| {
177            let total: usize = lang_pairs
178                .iter()
179                .filter(|(l, _)| *l == lang)
180                .map(|(_, c)| *c)
181                .sum();
182            (lang, total)
183        })
184        .collect();
185
186    let language_vec: Vec<_> = language_counts
187        .into_iter()
188        .filter(|(_, count)| *count >= MIN_FILES_FOR_DETECTION)
189        .map(|(lang, count)| (count, lang))
190        .sorted_by(|a, b| b.0.cmp(&a.0))
191        .map(|(count, lang)| (lang, count))
192        .collect();
193
194    let primary_language = language_vec
195        .iter()
196        .find(|(lang, _)| !is_non_primary_language(lang))
197        .or_else(|| language_vec.first())
198        .map_or_else(|| "Unknown".to_string(), |(lang, _)| (*lang).to_string());
199
200    let secondary_languages: Vec<String> = language_vec
201        .iter()
202        .filter(|(lang, _)| *lang != primary_language.as_str())
203        .take(MAX_SECONDARY_LANGUAGES)
204        .map(|(lang, _)| (*lang).to_string())
205        .collect();
206
207    let (frameworks, test_framework, package_manager) =
208        signatures::detect_signature_files_with_workspace(workspace, root);
209
210    let has_tests =
211        test_framework.is_some() || detect_tests_with_workspace(workspace, root, &primary_language);
212
213    Ok(ProjectStack {
214        primary_language,
215        secondary_languages,
216        frameworks,
217        has_tests,
218        test_framework,
219        package_manager,
220    })
221}
222
223pub fn count_extensions_with_workspace(
224    workspace: &dyn Workspace,
225    root: &Path,
226) -> std::io::Result<std::collections::HashMap<String, usize>> {
227    io::count_extensions_with_workspace(workspace, root)
228}
229
230pub fn detect_tests_with_workspace(
231    workspace: &dyn Workspace,
232    root: &Path,
233    primary_lang: &str,
234) -> bool {
235    io::detect_tests_with_workspace(workspace, root, primary_lang)
236}
237
238fn collect_signature_files_with_workspace(
239    workspace: &dyn Workspace,
240    root: &Path,
241) -> signatures::SignatureFiles {
242    io::collect_signature_files_with_workspace(workspace, root)
243}
244
245#[cfg(test)]
246mod workspace_tests {
247    use super::*;
248    use crate::workspace::MemoryWorkspace;
249
250    #[test]
251    fn test_detect_stack_with_workspace_rust_project() {
252        let workspace = MemoryWorkspace::new_test()
253            .with_file(
254                "Cargo.toml",
255                r#"
256[package]
257name = "test"
258[dependencies]
259axum = "0.7"
260[dev-dependencies]
261"#,
262            )
263            .with_file("src/main.rs", "fn main() {}")
264            .with_file("src/lib.rs", "pub mod foo;")
265            .with_file("tests/integration.rs", "#[test] fn test() {}");
266
267        let stack = detect_stack_with_workspace(&workspace, Path::new("")).unwrap();
268
269        assert_eq!(stack.primary_language, "Rust");
270        assert!(stack.frameworks.contains(&"Axum".to_string()));
271        assert!(stack.has_tests);
272        assert_eq!(stack.package_manager, Some("Cargo".to_string()));
273    }
274
275    #[test]
276    fn test_detect_stack_with_workspace_js_project() {
277        let workspace = MemoryWorkspace::new_test()
278            .with_file(
279                "package.json",
280                r#"
281{
282  "dependencies": { "react": "^18.0.0" },
283  "devDependencies": { "jest": "^29.0.0" }
284}
285
286"#,
287            )
288            .with_file("src/index.js", "export default {}")
289            .with_file("src/App.jsx", "export function App() {}")
290            .with_file("src/utils.js", "export const foo = 1");
291
292        let stack = detect_stack_with_workspace(&workspace, Path::new("")).unwrap();
293
294        assert_eq!(stack.primary_language, "JavaScript");
295        assert!(stack.frameworks.contains(&"React".to_string()));
296        assert_eq!(stack.test_framework, Some("Jest".to_string()));
297    }
298}