use std::collections::HashMap;
use std::path::Path;
#[derive(Debug, Clone)]
pub struct DetectionResult {
pub primary_language: String,
pub languages: Vec<String>,
pub build_system: Option<String>,
pub confidence: f32,
pub frameworks: Vec<String>,
}
pub struct LanguageDetector;
fn detect_rust(lower: &str) -> Option<&'static str> {
if lower.ends_with(".rs") {
Some("rust")
} else {
None
}
}
fn detect_typescript(lower: &str) -> Option<&'static str> {
if lower.ends_with(".tsx") || lower.ends_with(".ts") {
Some("typescript")
} else {
None
}
}
fn detect_javascript(lower: &str) -> Option<&'static str> {
const EXTS: &[&str] = &[".jsx", ".js", ".mjs", ".cjs"];
if EXTS.iter().any(|e| lower.ends_with(e)) {
Some("javascript")
} else {
None
}
}
fn detect_python(lower: &str) -> Option<&'static str> {
if lower.ends_with(".py") || lower.ends_with(".pyi") {
Some("python")
} else {
None
}
}
fn detect_java(lower: &str) -> Option<&'static str> {
if lower.ends_with(".java") {
Some("java")
} else {
None
}
}
fn detect_go(lower: &str) -> Option<&'static str> {
if lower.ends_with(".go") {
Some("go")
} else {
None
}
}
fn detect_cpp(lower: &str) -> Option<&'static str> {
const EXTS: &[&str] = &[".cpp", ".cc", ".cxx", ".hpp", ".hh", ".hxx", ".c", ".h"];
if EXTS.iter().any(|e| lower.ends_with(e)) {
Some("cpp")
} else {
None
}
}
type LanguageDetectorFn = fn(&str) -> Option<&'static str>;
const LANGUAGE_DETECTORS: &[LanguageDetectorFn] = &[
detect_rust,
detect_typescript,
detect_javascript,
detect_python,
detect_java,
detect_go,
detect_cpp,
];
fn build_cargo(lower: &str) -> Option<&'static str> {
matches_basename(lower, &["cargo.toml"]).then_some("cargo")
}
fn build_maven(lower: &str) -> Option<&'static str> {
matches_basename(lower, &["pom.xml"]).then_some("maven")
}
fn build_gradle(lower: &str) -> Option<&'static str> {
matches_basename(lower, &["build.gradle", "build.gradle.kts"]).then_some("gradle")
}
fn build_npm(lower: &str) -> Option<&'static str> {
matches_basename(lower, &["package.json"]).then_some("npm")
}
fn build_pip(lower: &str) -> Option<&'static str> {
matches_basename(lower, &["pyproject.toml", "setup.py", "requirements.txt"]).then_some("pip")
}
fn build_go_mod(lower: &str) -> Option<&'static str> {
matches_basename(lower, &["go.mod"]).then_some("go-mod")
}
const BUILD_SYSTEM_DETECTORS: &[LanguageDetectorFn] = &[
build_cargo,
build_maven,
build_gradle,
build_npm,
build_pip,
build_go_mod,
];
fn matches_basename(lower: &str, names: &[&str]) -> bool {
names
.iter()
.any(|n| lower == *n || lower.ends_with(&format!("/{n}")))
}
impl LanguageDetector {
pub fn detect_file(path: &str) -> Option<String> {
let lower = path.to_lowercase();
LANGUAGE_DETECTORS
.iter()
.find_map(|d| d(&lower))
.map(|s| s.to_string())
}
fn detect_build_system_for(path: &str) -> Option<&'static str> {
let lower = path.to_lowercase();
BUILD_SYSTEM_DETECTORS.iter().find_map(|d| d(&lower))
}
pub fn detect(files: &[&str]) -> DetectionResult {
let mut counts: HashMap<String, usize> = HashMap::new();
let mut build: Option<&'static str> = None;
let total = files.len().max(1);
let mut matched = 0usize;
for f in files {
if let Some(lang) = Self::detect_file(f) {
*counts.entry(lang).or_insert(0) += 1;
matched += 1;
}
if build.is_none() {
if let Some(bs) = Self::detect_build_system_for(f) {
build = Some(bs);
}
}
}
let mut langs: Vec<(String, usize)> = counts.into_iter().collect();
langs.sort_by(|a, b| b.1.cmp(&a.1).then_with(|| a.0.cmp(&b.0)));
let primary = langs
.first()
.map(|(l, _)| l.clone())
.unwrap_or_else(|| "unknown".into());
let all = langs.iter().map(|(l, _)| l.clone()).collect();
DetectionResult {
primary_language: primary,
languages: all,
build_system: build.map(|s| s.to_string()),
confidence: matched as f32 / total as f32,
frameworks: Vec::new(),
}
}
}
pub fn detect_frameworks(project_root: &Path) -> Vec<String> {
let mut found: Vec<&'static str> = Vec::new();
if let Some(text) = read_manifest(project_root, "package.json") {
if let Ok(v) = serde_json::from_str::<serde_json::Value>(&text) {
let mut seen_keys: Vec<String> = Vec::new();
for field in ["dependencies", "devDependencies"] {
if let Some(obj) = v.get(field).and_then(serde_json::Value::as_object) {
for k in obj.keys() {
seen_keys.push(k.clone());
}
}
}
for key in &seen_keys {
if let Some(name) = match key.as_str() {
"next" => Some("Next.js"),
"react" => Some("React"),
"vue" => Some("Vue"),
"svelte" => Some("Svelte"),
"@angular/core" => Some("Angular"),
"vite" => Some("Vite"),
_ => None,
} {
push_unique(&mut found, name);
}
}
}
}
if let Some(text) = read_manifest(project_root, "Cargo.toml") {
if cargo_has_dep(&text, "axum") {
push_unique(&mut found, "Axum");
}
if cargo_has_dep(&text, "actix-web") {
push_unique(&mut found, "Actix-Web");
}
if cargo_has_dep(&text, "rocket") {
push_unique(&mut found, "Rocket");
}
}
if read_manifest(project_root, "pubspec.yaml").is_some() {
push_unique(&mut found, "Flutter");
}
if let Some(text) = read_manifest(project_root, "Gemfile") {
if gem_has(&text, "rails") {
push_unique(&mut found, "Rails");
}
}
let py_text = read_manifest(project_root, "pyproject.toml")
.or_else(|| read_manifest(project_root, "requirements.txt"))
.unwrap_or_default();
if !py_text.is_empty() {
if python_has(&py_text, "django") {
push_unique(&mut found, "Django");
}
if python_has(&py_text, "fastapi") {
push_unique(&mut found, "FastAPI");
}
if python_has(&py_text, "flask") {
push_unique(&mut found, "Flask");
}
}
let java_text = read_manifest(project_root, "pom.xml")
.or_else(|| read_manifest(project_root, "build.gradle"))
.or_else(|| read_manifest(project_root, "build.gradle.kts"))
.unwrap_or_default();
if !java_text.is_empty() && java_text.contains("spring-boot") {
push_unique(&mut found, "Spring Boot");
}
if let Some(text) = read_manifest(project_root, "composer.json") {
if text.contains("laravel/framework") {
push_unique(&mut found, "Laravel");
}
}
let mut out: Vec<String> = found.iter().map(|s| s.to_string()).collect();
out.sort();
out.dedup();
out
}
fn read_manifest(project_root: &Path, name: &str) -> Option<String> {
std::fs::read_to_string(project_root.join(name)).ok()
}
fn push_unique(out: &mut Vec<&'static str>, s: &'static str) {
if !out.contains(&s) {
out.push(s);
}
}
fn cargo_has_dep(text: &str, name: &str) -> bool {
for line in text.lines() {
let trimmed = line.trim();
if let Some(rest) = trimmed.strip_prefix(name) {
if rest.starts_with(|c: char| c.is_whitespace() || c == '=') {
return true;
}
}
}
false
}
fn gem_has(text: &str, name: &str) -> bool {
let needle_single = format!("gem '{name}'");
let needle_double = format!("gem \"{name}\"");
text.contains(&needle_single) || text.contains(&needle_double)
}
fn python_has(text: &str, name: &str) -> bool {
text.to_ascii_lowercase()
.contains(&name.to_ascii_lowercase())
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn detect_file_extension_mapping() {
assert_eq!(
LanguageDetector::detect_file("src/main.rs"),
Some("rust".into())
);
assert_eq!(
LanguageDetector::detect_file("App.tsx"),
Some("typescript".into())
);
assert_eq!(
LanguageDetector::detect_file("foo.ts"),
Some("typescript".into())
);
assert_eq!(
LanguageDetector::detect_file("foo.js"),
Some("javascript".into())
);
assert_eq!(
LanguageDetector::detect_file("module.mjs"),
Some("javascript".into())
);
assert_eq!(
LanguageDetector::detect_file("script.py"),
Some("python".into())
);
assert_eq!(
LanguageDetector::detect_file("Foo.java"),
Some("java".into())
);
assert_eq!(LanguageDetector::detect_file("main.go"), Some("go".into()));
assert_eq!(LanguageDetector::detect_file("README.md"), None);
}
#[test]
fn detect_picks_primary_language() {
let files = ["a.rs", "b.rs", "c.rs", "d.ts", "Cargo.toml"];
let r = LanguageDetector::detect(&files);
assert_eq!(r.primary_language, "rust");
assert!(r.languages.contains(&"rust".to_string()));
assert!(r.languages.contains(&"typescript".to_string()));
assert_eq!(r.build_system.as_deref(), Some("cargo"));
assert!(r.confidence > 0.5);
}
#[test]
fn detect_recognizes_npm_and_python() {
let files = ["a.ts", "package.json", "tsconfig.json"];
let r = LanguageDetector::detect(&files);
assert_eq!(r.build_system.as_deref(), Some("npm"));
let files = ["main.py", "pyproject.toml"];
let r = LanguageDetector::detect(&files);
assert_eq!(r.primary_language, "python");
assert_eq!(r.build_system.as_deref(), Some("pip"));
}
#[test]
fn detect_rust_helper_matches_only_dot_rs() {
assert_eq!(detect_rust("foo.rs"), Some("rust"));
assert_eq!(detect_rust("foo.RS"), None); assert_eq!(detect_rust("foo.rust"), None);
assert_eq!(detect_rust("foo.py"), None);
}
#[test]
fn detect_javascript_helper_covers_all_js_variants() {
assert_eq!(detect_javascript("a.js"), Some("javascript"));
assert_eq!(detect_javascript("a.jsx"), Some("javascript"));
assert_eq!(detect_javascript("a.mjs"), Some("javascript"));
assert_eq!(detect_javascript("a.cjs"), Some("javascript"));
assert_eq!(detect_javascript("a.ts"), None);
}
#[test]
fn detect_cpp_helper_covers_c_and_cpp_extensions() {
for ext in [".cpp", ".cc", ".cxx", ".hpp", ".hh", ".hxx", ".c", ".h"] {
let path = format!("file{ext}");
assert_eq!(detect_cpp(&path), Some("cpp"), "ext={ext}");
}
assert_eq!(detect_cpp("file.txt"), None);
}
#[test]
fn matches_basename_handles_root_and_nested() {
assert!(matches_basename("cargo.toml", &["cargo.toml"]));
assert!(matches_basename("a/b/cargo.toml", &["cargo.toml"]));
assert!(!matches_basename("cargo.toml.bak", &["cargo.toml"]));
assert!(!matches_basename("notcargo.toml", &["cargo.toml"]));
}
fn write_manifest(dir: &Path, name: &str, contents: &str) {
std::fs::write(dir.join(name), contents).expect("write manifest");
}
#[test]
fn detect_frameworks_finds_nextjs_and_react_in_package_json() {
let tmp = tempfile::tempdir().unwrap();
write_manifest(
tmp.path(),
"package.json",
r#"{ "dependencies": { "next": "14", "react": "18" } }"#,
);
let fws = detect_frameworks(tmp.path());
assert!(fws.contains(&"Next.js".to_string()), "got {fws:?}");
assert!(fws.contains(&"React".to_string()), "got {fws:?}");
}
#[test]
fn detect_frameworks_finds_vite_in_dev_dependencies() {
let tmp = tempfile::tempdir().unwrap();
write_manifest(
tmp.path(),
"package.json",
r#"{ "devDependencies": { "vite": "^5" } }"#,
);
let fws = detect_frameworks(tmp.path());
assert_eq!(fws, vec!["Vite".to_string()]);
}
#[test]
fn detect_frameworks_finds_axum_in_cargo_toml() {
let tmp = tempfile::tempdir().unwrap();
write_manifest(
tmp.path(),
"Cargo.toml",
"[dependencies]\naxum = \"0.7\"\nserde = \"1\"\n",
);
let fws = detect_frameworks(tmp.path());
assert!(fws.contains(&"Axum".to_string()), "got {fws:?}");
}
#[test]
fn detect_frameworks_finds_django_and_fastapi_in_requirements() {
let tmp = tempfile::tempdir().unwrap();
write_manifest(
tmp.path(),
"requirements.txt",
"Django==4.2\nfastapi==0.110\nrequests==2.31\n",
);
let fws = detect_frameworks(tmp.path());
assert!(fws.contains(&"Django".to_string()), "got {fws:?}");
assert!(fws.contains(&"FastAPI".to_string()), "got {fws:?}");
}
#[test]
fn detect_frameworks_finds_rails_in_gemfile() {
let tmp = tempfile::tempdir().unwrap();
write_manifest(
tmp.path(),
"Gemfile",
"source 'https://rubygems.org'\ngem 'rails', '~> 7.0'\n",
);
let fws = detect_frameworks(tmp.path());
assert_eq!(fws, vec!["Rails".to_string()]);
}
#[test]
fn detect_frameworks_finds_flutter_via_pubspec_presence() {
let tmp = tempfile::tempdir().unwrap();
write_manifest(tmp.path(), "pubspec.yaml", "name: my_app\n");
let fws = detect_frameworks(tmp.path());
assert_eq!(fws, vec!["Flutter".to_string()]);
}
#[test]
fn detect_frameworks_finds_spring_boot_in_pom() {
let tmp = tempfile::tempdir().unwrap();
write_manifest(
tmp.path(),
"pom.xml",
"<project><dependencies><dependency><groupId>org.springframework.boot</groupId>\
<artifactId>spring-boot-starter-web</artifactId></dependency></dependencies></project>",
);
let fws = detect_frameworks(tmp.path());
assert_eq!(fws, vec!["Spring Boot".to_string()]);
}
#[test]
fn detect_frameworks_finds_laravel_in_composer_json() {
let tmp = tempfile::tempdir().unwrap();
write_manifest(
tmp.path(),
"composer.json",
r#"{ "require": { "laravel/framework": "^10.0" } }"#,
);
let fws = detect_frameworks(tmp.path());
assert_eq!(fws, vec!["Laravel".to_string()]);
}
#[test]
fn detect_frameworks_returns_sorted_deduplicated() {
let tmp = tempfile::tempdir().unwrap();
write_manifest(
tmp.path(),
"package.json",
r#"{
"dependencies": { "react": "18", "next": "14" },
"devDependencies": { "react": "18", "vite": "5" }
}"#,
);
let fws = detect_frameworks(tmp.path());
let mut expected = vec![
"Next.js".to_string(),
"React".to_string(),
"Vite".to_string(),
];
expected.sort();
assert_eq!(fws, expected);
}
#[test]
fn detect_frameworks_returns_empty_for_empty_project() {
let tmp = tempfile::tempdir().unwrap();
let fws = detect_frameworks(tmp.path());
assert!(fws.is_empty(), "got {fws:?}");
}
#[test]
fn cargo_has_dep_avoids_prefix_collisions() {
let toml = "[dependencies]\naxum-extra = \"0.9\"\n";
assert!(!cargo_has_dep(toml, "axum"));
let toml2 = "[dependencies]\naxum = \"0.7\"\n";
assert!(cargo_has_dep(toml2, "axum"));
}
#[test]
fn build_system_helpers_are_independent() {
assert_eq!(build_cargo("cargo.toml"), Some("cargo"));
assert_eq!(build_maven("pom.xml"), Some("maven"));
assert_eq!(build_gradle("build.gradle.kts"), Some("gradle"));
assert_eq!(build_npm("a/package.json"), Some("npm"));
assert_eq!(build_pip("requirements.txt"), Some("pip"));
assert_eq!(build_go_mod("go.mod"), Some("go-mod"));
assert_eq!(build_cargo("pom.xml"), None);
}
}