use std::collections::HashMap;
use crate::FileType;
pub(crate) fn classify(content: &str) -> Option<FileType> {
let tokens = tokenize(content);
let mut scores: HashMap<FileType, i32> = HashMap::new();
for token in tokens {
if token == "fn" || token == "let" || token == "mut" || token == "impl" {
*scores.entry(FileType::Rust).or_insert(0) += 3;
}
if token == "def" || token == "class" || token == "import" || token == "from" {
*scores.entry(FileType::Python).or_insert(0) += 2;
}
if token == "const" || token == "let" || token == "=> " || token == "function" {
*scores.entry(FileType::JavaScript).or_insert(0) += 2;
}
if token == "func" || token == "var" || token == "type" || token == "struct" {
*scores.entry(FileType::Go).or_insert(0) += 2;
}
if token == "public" || token == "private" || token == "class" || token == "interface" {
*scores.entry(FileType::Java).or_insert(0) += 2;
}
if token == "printf" || token == "scanf" || token == "malloc" {
*scores.entry(FileType::C).or_insert(0) += 2;
}
if token == "end" || token == "require" || token == "module" {
*scores.entry(FileType::Ruby).or_insert(0) += 2;
}
}
scores
.into_iter()
.max_by_key(|&(_, score)| score)
.map(|(ft, _)| ft)
}
fn tokenize(content: &str) -> Vec<&str> {
content
.split(|c: char| {
c.is_whitespace()
|| c == '('
|| c == ')'
|| c == '{'
|| c == '}'
|| c == '['
|| c == ']'
|| c == ';'
|| c == ','
|| c == '.'
|| c == '='
|| c == ':'
|| c == '"'
})
.filter(|s| !s.is_empty())
.collect()
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_classify_rust() {
let rust_code = "fn main() {\n let x = 42;\n println!(\"{}\");\n}";
assert_eq!(Some(FileType::Rust), classify(rust_code));
}
#[test]
fn test_classify_python() {
let python_code = "def main():\n import os\n class MyClass:\n pass";
assert_eq!(Some(FileType::Python), classify(python_code));
}
#[test]
fn test_classify_empty() {
assert_eq!(None, classify(""));
}
}