use std::collections::HashMap;
use std::path::Path;
#[derive(Debug, Clone)]
pub struct CommentPair {
pub start: &'static str,
pub end: &'static str,
}
impl CommentPair {
pub const fn new(start: &'static str, end: &'static str) -> Self {
Self { start, end }
}
}
#[derive(Debug, Clone)]
pub struct LanguageInfo {
pub name: &'static str,
pub extensions: &'static [&'static str],
pub single_line_comments: &'static [&'static str],
pub multi_line_comments: &'static [CommentPair],
}
impl LanguageInfo {
pub const fn new(
name: &'static str,
extensions: &'static [&'static str],
single_line_comments: &'static [&'static str],
multi_line_comments: &'static [CommentPair],
) -> Self {
Self {
name,
extensions,
single_line_comments,
multi_line_comments,
}
}
}
pub const LANGUAGES: &[LanguageInfo] = &[
LanguageInfo::new(
"JavaScript",
&["js", "jsx", "mjs", "cjs"],
&["//"],
&[CommentPair::new("/*", "*/")],
),
LanguageInfo::new(
"TypeScript",
&["ts", "tsx"],
&["//"],
&[CommentPair::new("/*", "*/")],
),
LanguageInfo::new(
"Python",
&["py", "pyw"],
&["#"],
&[
CommentPair::new("\"\"\"", "\"\"\""),
CommentPair::new("'''", "'''"),
],
),
LanguageInfo::new(
"Rust",
&["rs"],
&["//", "///", "//!"],
&[CommentPair::new("/*", "*/")],
),
LanguageInfo::new("Go", &["go"], &["//"], &[CommentPair::new("/*", "*/")]),
LanguageInfo::new("Java", &["java"], &["//"], &[CommentPair::new("/*", "*/")]),
LanguageInfo::new("C", &["c", "h"], &["//"], &[CommentPair::new("/*", "*/")]),
LanguageInfo::new(
"C++",
&["cpp", "cc", "cxx", "hpp", "hxx", "hh"],
&["//"],
&[CommentPair::new("/*", "*/")],
),
LanguageInfo::new("C#", &["cs"], &["//"], &[CommentPair::new("/*", "*/")]),
LanguageInfo::new(
"Ruby",
&["rb"],
&["#"],
&[CommentPair::new("=begin", "=end")],
),
LanguageInfo::new("Shell", &["sh", "bash", "zsh"], &["#"], &[]),
LanguageInfo::new(
"HTML",
&["html", "htm"],
&[],
&[CommentPair::new("<!--", "-->")],
),
LanguageInfo::new("CSS", &["css"], &[], &[CommentPair::new("/*", "*/")]),
LanguageInfo::new("SCSS", &["scss"], &["//"], &[CommentPair::new("/*", "*/")]),
LanguageInfo::new("Sass", &["sass"], &["//"], &[CommentPair::new("/*", "*/")]),
LanguageInfo::new(
"Markdown",
&["md", "markdown"],
&[],
&[CommentPair::new("<!--", "-->")],
),
LanguageInfo::new("JSON", &["json"], &[], &[]),
LanguageInfo::new("YAML", &["yaml", "yml"], &["#"], &[]),
LanguageInfo::new("XML", &["xml"], &[], &[CommentPair::new("<!--", "-->")]),
LanguageInfo::new(
"PHP",
&["php"],
&["//", "#"],
&[CommentPair::new("/*", "*/")],
),
LanguageInfo::new(
"Kotlin",
&["kt", "kts"],
&["//"],
&[CommentPair::new("/*", "*/")],
),
LanguageInfo::new(
"Swift",
&["swift"],
&["//"],
&[CommentPair::new("/*", "*/")],
),
LanguageInfo::new(
"Scala",
&["scala"],
&["//"],
&[CommentPair::new("/*", "*/")],
),
LanguageInfo::new("Elixir", &["ex", "exs"], &["#"], &[]),
LanguageInfo::new("Erlang", &["erl", "hrl"], &["%"], &[]),
LanguageInfo::new(
"Haskell",
&["hs", "lhs"],
&["--"],
&[CommentPair::new("{-", "-}")],
),
LanguageInfo::new("Lua", &["lua"], &["--"], &[CommentPair::new("--[[", "]]")]),
LanguageInfo::new(
"Perl",
&["pl", "pm"],
&["#"],
&[CommentPair::new("=pod", "=cut")],
),
LanguageInfo::new("R", &["r", "R"], &["#"], &[]),
LanguageInfo::new("SQL", &["sql"], &["--"], &[CommentPair::new("/*", "*/")]),
LanguageInfo::new("TOML", &["toml"], &["#"], &[]),
LanguageInfo::new("INI", &["ini", "cfg"], &[";", "#"], &[]),
LanguageInfo::new("Vim Script", &["vim"], &["\""], &[]),
];
pub struct LanguageDetector {
extension_map: HashMap<String, &'static LanguageInfo>,
}
impl LanguageDetector {
pub fn new() -> Self {
let mut extension_map = HashMap::new();
for lang in LANGUAGES {
for ext in lang.extensions {
extension_map.insert(ext.to_string(), lang);
}
}
Self { extension_map }
}
pub fn detect_from_path(&self, path: &Path) -> Option<&'static LanguageInfo> {
path.extension()
.and_then(|ext| ext.to_str())
.and_then(|ext| self.extension_map.get(&ext.to_lowercase()))
.copied()
}
}
impl Default for LanguageDetector {
fn default() -> Self {
Self::new()
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::path::PathBuf;
#[test]
fn test_language_detector_new() {
let detector = LanguageDetector::new();
assert!(!detector.extension_map.is_empty());
}
#[test]
fn test_detect_rust() {
let detector = LanguageDetector::new();
let path = PathBuf::from("main.rs");
let lang = detector.detect_from_path(&path);
assert!(lang.is_some());
assert_eq!(lang.unwrap().name, "Rust");
}
#[test]
fn test_detect_python() {
let detector = LanguageDetector::new();
let path = PathBuf::from("script.py");
let lang = detector.detect_from_path(&path);
assert!(lang.is_some());
assert_eq!(lang.unwrap().name, "Python");
}
#[test]
fn test_detect_javascript() {
let detector = LanguageDetector::new();
let path = PathBuf::from("app.js");
let lang = detector.detect_from_path(&path);
assert!(lang.is_some());
assert_eq!(lang.unwrap().name, "JavaScript");
}
#[test]
fn test_detect_typescript() {
let detector = LanguageDetector::new();
let path = PathBuf::from("component.tsx");
let lang = detector.detect_from_path(&path);
assert!(lang.is_some());
assert_eq!(lang.unwrap().name, "TypeScript");
}
#[test]
fn test_detect_case_insensitive() {
let detector = LanguageDetector::new();
let path = PathBuf::from("main.RS");
let lang = detector.detect_from_path(&path);
assert!(lang.is_some());
assert_eq!(lang.unwrap().name, "Rust");
}
#[test]
fn test_detect_unknown_extension() {
let detector = LanguageDetector::new();
let path = PathBuf::from("file.unknown");
let lang = detector.detect_from_path(&path);
assert!(lang.is_none());
}
#[test]
fn test_detect_no_extension() {
let detector = LanguageDetector::new();
let path = PathBuf::from("Makefile");
let lang = detector.detect_from_path(&path);
assert!(lang.is_none());
}
#[test]
fn test_c_vs_cpp() {
let detector = LanguageDetector::new();
let c_path = PathBuf::from("main.c");
let c_lang = detector.detect_from_path(&c_path);
assert_eq!(c_lang.unwrap().name, "C");
let cpp_path = PathBuf::from("main.cpp");
let cpp_lang = detector.detect_from_path(&cpp_path);
assert_eq!(cpp_lang.unwrap().name, "C++");
}
#[test]
fn test_all_languages_have_extensions() {
for lang in LANGUAGES {
assert!(
!lang.extensions.is_empty(),
"Language {} has no extensions",
lang.name
);
}
}
}