ralph_workflow/language_detector/
mod.rs

1//! Language and Stack Detection Module
2//!
3//! Detects the primary technology stack of a repository by analyzing file extensions,
4//! configuration files, and common patterns. This enables language-specific review
5//! guidance without requiring an LLM.
6//!
7//! The detection is fast (< 100ms typically) and uses heuristics based on:
8//! - File extension counts
9//! - Signature files (Cargo.toml, package.json, etc.)
10//! - Framework indicators in config files
11//!
12//! # Module Structure
13//!
14//! - [`extensions`] - Extension to language mapping
15//! - [`signatures`] - Signature file detection for frameworks and package managers
16//! - [`scanner`] - File system scanning utilities
17
18#![deny(unsafe_code)]
19
20mod extensions;
21mod scanner;
22mod signatures;
23
24use std::collections::HashMap;
25use std::io;
26use std::path::Path;
27
28pub use extensions::extension_to_language;
29use extensions::is_non_primary_language;
30use scanner::{count_extensions, detect_tests};
31use signatures::detect_signature_files;
32
33/// Maximum number of secondary languages to include in the stack summary.
34///
35/// Polyglot repos commonly have more than 3 relevant languages (e.g. PHP + TS + JS + SQL),
36/// but we still cap this to keep prompts/banners readable.
37const MAX_SECONDARY_LANGUAGES: usize = 6;
38
39/// Minimum file count to consider a language as present
40const MIN_FILES_FOR_DETECTION: usize = 1;
41
42/// Represents the detected technology stack of a project
43#[derive(Debug, Clone, PartialEq, Eq)]
44pub struct ProjectStack {
45    /// Primary programming language (most prevalent)
46    pub(crate) primary_language: String,
47    /// Secondary languages used in the project
48    pub(crate) secondary_languages: Vec<String>,
49    /// Detected frameworks (React, Django, Rails, etc.)
50    pub(crate) frameworks: Vec<String>,
51    /// Whether the project appears to have tests
52    pub(crate) has_tests: bool,
53    /// Detected test framework (if any)
54    pub(crate) test_framework: Option<String>,
55    /// Package manager detected
56    pub(crate) package_manager: Option<String>,
57}
58
59impl Default for ProjectStack {
60    fn default() -> Self {
61        Self {
62            primary_language: "Unknown".to_string(),
63            secondary_languages: Vec::new(),
64            frameworks: Vec::new(),
65            has_tests: false,
66            test_framework: None,
67            package_manager: None,
68        }
69    }
70}
71
72impl ProjectStack {
73    /// Returns true if the project uses Rust
74    pub(crate) fn is_rust(&self) -> bool {
75        self.primary_language == "Rust" || self.secondary_languages.iter().any(|l| l == "Rust")
76    }
77
78    /// Returns true if the project uses Python
79    pub(crate) fn is_python(&self) -> bool {
80        self.primary_language == "Python" || self.secondary_languages.iter().any(|l| l == "Python")
81    }
82
83    /// Returns true if the project uses JavaScript or TypeScript
84    pub(crate) fn is_javascript_or_typescript(&self) -> bool {
85        matches!(self.primary_language.as_str(), "JavaScript" | "TypeScript")
86            || self
87                .secondary_languages
88                .iter()
89                .any(|l| l == "JavaScript" || l == "TypeScript")
90    }
91
92    /// Returns true if the project uses Go
93    pub(crate) fn is_go(&self) -> bool {
94        self.primary_language == "Go" || self.secondary_languages.iter().any(|l| l == "Go")
95    }
96
97    /// Format as a summary string for display
98    pub(crate) fn summary(&self) -> String {
99        let mut parts = vec![self.primary_language.clone()];
100
101        if !self.secondary_languages.is_empty() {
102            parts.push(format!("(+{})", self.secondary_languages.join(", ")));
103        }
104
105        if !self.frameworks.is_empty() {
106            parts.push(format!("[{}]", self.frameworks.join(", ")));
107        }
108
109        if self.has_tests {
110            if let Some(ref tf) = self.test_framework {
111                parts.push(format!("tests:{tf}"));
112            } else {
113                parts.push("tests:yes".to_string());
114            }
115        }
116
117        parts.join(" ")
118    }
119}
120
121/// Detect the project stack for a given repository root
122pub fn detect_stack(root: &Path) -> io::Result<ProjectStack> {
123    // Count file extensions
124    let extension_counts = count_extensions(root)?;
125
126    // Convert extensions to languages and aggregate
127    let mut language_counts: HashMap<&str, usize> = HashMap::new();
128    for (ext, count) in &extension_counts {
129        if let Some(lang) = extension_to_language(ext) {
130            *language_counts.entry(lang).or_insert(0) += count;
131        }
132    }
133
134    // Sort languages by count (descending)
135    let mut language_vec: Vec<_> = language_counts
136        .into_iter()
137        .filter(|(_, count)| *count >= MIN_FILES_FOR_DETECTION)
138        .collect();
139    language_vec.sort_by(|a, b| b.1.cmp(&a.1));
140
141    // Determine primary and secondary languages.
142    //
143    // Prefer "code" languages as primary when present, even if the repo contains lots of
144    // config/markup files (YAML/JSON/CSS/etc).
145    let primary_language = language_vec
146        .iter()
147        .find(|(lang, _)| !is_non_primary_language(lang))
148        .or_else(|| language_vec.first())
149        .map_or_else(|| "Unknown".to_string(), |(lang, _)| (*lang).to_string());
150
151    let secondary_languages: Vec<String> = language_vec
152        .iter()
153        .filter(|(lang, _)| *lang != primary_language.as_str())
154        .take(MAX_SECONDARY_LANGUAGES)
155        .map(|(lang, _)| (*lang).to_string())
156        .collect();
157
158    // Detect signature files for frameworks and test frameworks
159    let (frameworks, test_framework, package_manager) = detect_signature_files(root);
160
161    // Detect if tests exist
162    let has_tests = test_framework.is_some() || detect_tests(root, &primary_language);
163
164    Ok(ProjectStack {
165        primary_language,
166        secondary_languages,
167        frameworks,
168        has_tests,
169        test_framework,
170        package_manager,
171    })
172}
173
174/// Detect stack and return a summary string (for display in banner)
175pub fn detect_stack_summary(root: &Path) -> String {
176    detect_stack(root).map_or_else(|_| "Unknown".to_string(), |stack| stack.summary())
177}
178
179#[cfg(test)]
180mod tests;