use crate::assets::{EXTENSION_LOOKUP, FILENAME_LOOKUP};
use crate::result::Lang;
use std::path::Path;
pub fn classify_file(path: &Path, content: &[u8]) -> Lang {
let filename = path
.file_name()
.and_then(|n| n.to_str())
.unwrap_or("");
if let Some(lang_name) = FILENAME_LOOKUP.get(filename) {
return Lang::Identified(lang_name.clone());
}
if content.starts_with(b"#!") && let Some(lang) = guess_shebang(content) {
return lang;
}
if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
let ext = ext.to_ascii_lowercase();
if let Some(lang_name) = EXTENSION_LOOKUP.get(&ext) {
return Lang::Identified((*lang_name).to_string());
}
}
Lang::None
}
fn guess_shebang(content: &[u8]) -> Option<Lang> {
let line = content.split(|&b| b == b'\n').next()?;
let line = String::from_utf8_lossy(line);
let mut parts = line.trim_start_matches("#!").split_whitespace();
let interp = normalize_interp(parts.next()?)?;
let interp = if interp == "env" {
normalize_interp(parts.next()?)?
} else {
interp
};
match interp.as_str() {
"python" | "python3" | "python2" => {
Some(Lang::Identified("Python".to_string()))
}
"sh" | "bash" | "zsh" | "dash" => {
Some(Lang::Identified("Shell".to_string()))
}
_ => None,
}
}
fn normalize_interp(token: &str) -> Option<String> {
Path::new(token)
.file_name()
.and_then(|s| s.to_str())
.map(|s| s.to_ascii_lowercase())
}
#[cfg(test)]
mod tests {
use super::*;
use std::path::Path;
#[test]
fn shebang_env_python_is_detected() {
let detected = guess_shebang(b"#!/usr/bin/env python -O\nprint(1)\n");
assert_eq!(detected, Some(Lang::Identified("Python".to_string())));
}
#[test]
fn shebang_env_bash_is_detected() {
let detected = guess_shebang(b"#!/usr/bin/env bash\nset -e\n");
assert_eq!(detected, Some(Lang::Identified("Shell".to_string())));
}
#[test]
fn shebang_direct_path_is_detected() {
let detected = guess_shebang(b"#!/bin/bash\necho hi\n");
assert_eq!(detected, Some(Lang::Identified("Shell".to_string())));
}
#[test]
fn cpp_extension_is_detected() {
let detected = classify_file(Path::new("main.cpp"), b"int main() { return 0; }\n");
assert_eq!(detected, Lang::Identified("C++".to_string()));
}
#[test]
fn cmake_lists_is_not_cpp() {
let detected = classify_file(
Path::new("CMakeLists.txt"),
b"cmake_minimum_required(VERSION 3.20)\n",
);
assert_ne!(detected, Lang::Identified("C++".to_string()));
}
}