ralph_workflow/language_detector/
mod.rs1#![deny(unsafe_code)]
20
21mod extensions;
22mod io;
23mod scanner;
24mod signatures;
25
26use std::collections::BTreeMap;
27use std::path::Path;
28
29use itertools::Itertools;
30
31use crate::workspace::Workspace;
32
33pub use extensions::extension_to_language;
34use extensions::is_non_primary_language;
35
36const MAX_SECONDARY_LANGUAGES: usize = 6;
41
42const MIN_FILES_FOR_DETECTION: usize = 1;
44
45#[derive(Debug, Clone, PartialEq, Eq)]
47pub struct ProjectStack {
48 pub(crate) primary_language: String,
50 pub(crate) secondary_languages: Vec<String>,
52 pub(crate) frameworks: Vec<String>,
54 pub(crate) has_tests: bool,
56 pub(crate) test_framework: Option<String>,
58 pub(crate) package_manager: Option<String>,
60}
61
62impl Default for ProjectStack {
63 fn default() -> Self {
64 Self {
65 primary_language: "Unknown".to_string(),
66 secondary_languages: Vec::new(),
67 frameworks: Vec::new(),
68 has_tests: false,
69 test_framework: None,
70 package_manager: None,
71 }
72 }
73}
74
75impl ProjectStack {
76 pub(crate) fn is_rust(&self) -> bool {
78 self.primary_language == "Rust" || self.secondary_languages.iter().any(|l| l == "Rust")
79 }
80
81 pub(crate) fn is_python(&self) -> bool {
83 self.primary_language == "Python" || self.secondary_languages.iter().any(|l| l == "Python")
84 }
85
86 pub(crate) fn is_javascript_or_typescript(&self) -> bool {
88 matches!(self.primary_language.as_str(), "JavaScript" | "TypeScript")
89 || self
90 .secondary_languages
91 .iter()
92 .any(|l| l == "JavaScript" || l == "TypeScript")
93 }
94
95 pub(crate) fn is_go(&self) -> bool {
97 self.primary_language == "Go" || self.secondary_languages.iter().any(|l| l == "Go")
98 }
99
100 pub(crate) fn summary(&self) -> String {
102 let secondary = (!self.secondary_languages.is_empty())
103 .then_some(format!("(+{})", self.secondary_languages.join(", ")));
104 let frameworks =
105 (!self.frameworks.is_empty()).then_some(format!("[{}]", self.frameworks.join(", ")));
106 let tests = self.has_tests.then_some(
107 self.test_framework
108 .as_ref()
109 .map(|tf| format!("tests:{tf}"))
110 .unwrap_or_else(|| "tests:yes".to_string()),
111 );
112
113 std::iter::once(self.primary_language.clone())
114 .chain(secondary)
115 .chain(frameworks)
116 .chain(tests)
117 .collect::<Vec<_>>()
118 .join(" ")
119 }
120}
121
122pub fn detect_stack(root: &Path) -> std::io::Result<ProjectStack> {
131 use crate::workspace::WorkspaceFs;
132
133 let workspace = WorkspaceFs::new(root.to_path_buf());
134 detect_stack_with_workspace(&workspace, Path::new(""))
135}
136
137#[must_use]
139pub fn detect_stack_summary(root: &Path) -> String {
140 detect_stack(root).map_or_else(|_| "Unknown".to_string(), |stack| stack.summary())
141}
142
143#[cfg(test)]
144mod tests;
145
146pub fn detect_stack_with_workspace(
159 workspace: &dyn Workspace,
160 root: &Path,
161) -> std::io::Result<ProjectStack> {
162 let extension_counts = count_extensions_with_workspace(workspace, root)?;
163
164 let lang_pairs: Vec<(String, usize)> = extension_counts
165 .iter()
166 .filter_map(|(ext, count)| {
167 extension_to_language(ext).map(|lang| (lang.to_string(), *count))
168 })
169 .collect();
170
171 let language_counts: BTreeMap<String, usize> = lang_pairs
172 .iter()
173 .map(|(lang, _)| lang.clone())
174 .collect::<std::collections::BTreeSet<_>>()
175 .into_iter()
176 .map(|lang| {
177 let total: usize = lang_pairs
178 .iter()
179 .filter(|(l, _)| *l == lang)
180 .map(|(_, c)| *c)
181 .sum();
182 (lang, total)
183 })
184 .collect();
185
186 let language_vec: Vec<_> = language_counts
187 .into_iter()
188 .filter(|(_, count)| *count >= MIN_FILES_FOR_DETECTION)
189 .map(|(lang, count)| (count, lang))
190 .sorted_by(|a, b| b.0.cmp(&a.0))
191 .map(|(count, lang)| (lang, count))
192 .collect();
193
194 let primary_language = language_vec
195 .iter()
196 .find(|(lang, _)| !is_non_primary_language(lang))
197 .or_else(|| language_vec.first())
198 .map_or_else(|| "Unknown".to_string(), |(lang, _)| (*lang).to_string());
199
200 let secondary_languages: Vec<String> = language_vec
201 .iter()
202 .filter(|(lang, _)| *lang != primary_language.as_str())
203 .take(MAX_SECONDARY_LANGUAGES)
204 .map(|(lang, _)| (*lang).to_string())
205 .collect();
206
207 let (frameworks, test_framework, package_manager) =
208 signatures::detect_signature_files_with_workspace(workspace, root);
209
210 let has_tests =
211 test_framework.is_some() || detect_tests_with_workspace(workspace, root, &primary_language);
212
213 Ok(ProjectStack {
214 primary_language,
215 secondary_languages,
216 frameworks,
217 has_tests,
218 test_framework,
219 package_manager,
220 })
221}
222
223pub fn count_extensions_with_workspace(
224 workspace: &dyn Workspace,
225 root: &Path,
226) -> std::io::Result<std::collections::HashMap<String, usize>> {
227 io::count_extensions_with_workspace(workspace, root)
228}
229
230pub fn detect_tests_with_workspace(
231 workspace: &dyn Workspace,
232 root: &Path,
233 primary_lang: &str,
234) -> bool {
235 io::detect_tests_with_workspace(workspace, root, primary_lang)
236}
237
238fn collect_signature_files_with_workspace(
239 workspace: &dyn Workspace,
240 root: &Path,
241) -> signatures::SignatureFiles {
242 io::collect_signature_files_with_workspace(workspace, root)
243}
244
245#[cfg(test)]
246mod workspace_tests {
247 use super::*;
248 use crate::workspace::MemoryWorkspace;
249
250 #[test]
251 fn test_detect_stack_with_workspace_rust_project() {
252 let workspace = MemoryWorkspace::new_test()
253 .with_file(
254 "Cargo.toml",
255 r#"
256[package]
257name = "test"
258[dependencies]
259axum = "0.7"
260[dev-dependencies]
261"#,
262 )
263 .with_file("src/main.rs", "fn main() {}")
264 .with_file("src/lib.rs", "pub mod foo;")
265 .with_file("tests/integration.rs", "#[test] fn test() {}");
266
267 let stack = detect_stack_with_workspace(&workspace, Path::new("")).unwrap();
268
269 assert_eq!(stack.primary_language, "Rust");
270 assert!(stack.frameworks.contains(&"Axum".to_string()));
271 assert!(stack.has_tests);
272 assert_eq!(stack.package_manager, Some("Cargo".to_string()));
273 }
274
275 #[test]
276 fn test_detect_stack_with_workspace_js_project() {
277 let workspace = MemoryWorkspace::new_test()
278 .with_file(
279 "package.json",
280 r#"
281{
282 "dependencies": { "react": "^18.0.0" },
283 "devDependencies": { "jest": "^29.0.0" }
284}
285
286"#,
287 )
288 .with_file("src/index.js", "export default {}")
289 .with_file("src/App.jsx", "export function App() {}")
290 .with_file("src/utils.js", "export const foo = 1");
291
292 let stack = detect_stack_with_workspace(&workspace, Path::new("")).unwrap();
293
294 assert_eq!(stack.primary_language, "JavaScript");
295 assert!(stack.frameworks.contains(&"React".to_string()));
296 assert_eq!(stack.test_framework, Some("Jest".to_string()));
297 }
298}