use std::path::Path;
use std::sync::LazyLock;
use serde::{Deserialize, Serialize};
use zeph_common::treesitter::{
GO_SYM_Q, JS_SYM_Q, PYTHON_SYM_Q, RUST_SYM_Q, TS_SYM_Q, compile_query,
};
const RUST_METHOD_Q: &str = "
(impl_item body: (declaration_list
(function_item (visibility_modifier)? @vis name: (identifier) @name) @def))
";
const PYTHON_METHOD_Q: &str = "
(class_definition body: (block
(function_definition name: (identifier) @name) @def))
";
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
#[serde(rename_all = "lowercase")]
pub enum Lang {
Rust,
Python,
JavaScript,
TypeScript,
Go,
Bash,
Toml,
Json,
Markdown,
}
impl Lang {
#[must_use]
pub fn id(self) -> &'static str {
match self {
Self::Rust => "rust",
Self::Python => "python",
Self::JavaScript => "javascript",
Self::TypeScript => "typescript",
Self::Go => "go",
Self::Bash => "bash",
Self::Toml => "toml",
Self::Json => "json",
Self::Markdown => "markdown",
}
}
#[must_use]
pub fn grammar(self) -> Option<tree_sitter::Language> {
match self {
Self::Rust => Some(tree_sitter_rust::LANGUAGE.into()),
Self::Python => Some(tree_sitter_python::LANGUAGE.into()),
Self::JavaScript => Some(tree_sitter_javascript::LANGUAGE.into()),
Self::TypeScript => Some(tree_sitter_typescript::LANGUAGE_TYPESCRIPT.into()),
Self::Go => Some(tree_sitter_go::LANGUAGE.into()),
Self::Bash => Some(tree_sitter_bash::LANGUAGE.into()),
Self::Toml => Some(tree_sitter_toml_ng::LANGUAGE.into()),
Self::Json => Some(tree_sitter_json::LANGUAGE.into()),
Self::Markdown => Some(tree_sitter_md::LANGUAGE.into()),
}
}
#[must_use]
pub fn symbol_query(self) -> Option<&'static tree_sitter::Query> {
match self {
Self::Rust => {
static Q: LazyLock<Option<tree_sitter::Query>> = LazyLock::new(|| {
let lang: tree_sitter::Language = tree_sitter_rust::LANGUAGE.into();
compile_query(&lang, RUST_SYM_Q, "rust symbol")
});
Q.as_ref()
}
Self::Python => {
static Q: LazyLock<Option<tree_sitter::Query>> = LazyLock::new(|| {
let lang: tree_sitter::Language = tree_sitter_python::LANGUAGE.into();
compile_query(&lang, PYTHON_SYM_Q, "python symbol")
});
Q.as_ref()
}
Self::JavaScript => {
static Q: LazyLock<Option<tree_sitter::Query>> = LazyLock::new(|| {
let lang: tree_sitter::Language = tree_sitter_javascript::LANGUAGE.into();
compile_query(&lang, JS_SYM_Q, "js symbol")
});
Q.as_ref()
}
Self::TypeScript => {
static Q: LazyLock<Option<tree_sitter::Query>> = LazyLock::new(|| {
let lang: tree_sitter::Language =
tree_sitter_typescript::LANGUAGE_TYPESCRIPT.into();
compile_query(&lang, TS_SYM_Q, "ts symbol")
});
Q.as_ref()
}
Self::Go => {
static Q: LazyLock<Option<tree_sitter::Query>> = LazyLock::new(|| {
let lang: tree_sitter::Language = tree_sitter_go::LANGUAGE.into();
compile_query(&lang, GO_SYM_Q, "go symbol")
});
Q.as_ref()
}
_ => None,
}
}
#[must_use]
pub fn method_query(self) -> Option<&'static tree_sitter::Query> {
match self {
Self::Rust => {
static Q: LazyLock<Option<tree_sitter::Query>> = LazyLock::new(|| {
let lang: tree_sitter::Language = tree_sitter_rust::LANGUAGE.into();
compile_query(&lang, RUST_METHOD_Q, "rust method")
});
Q.as_ref()
}
Self::Python => {
static Q: LazyLock<Option<tree_sitter::Query>> = LazyLock::new(|| {
let lang: tree_sitter::Language = tree_sitter_python::LANGUAGE.into();
compile_query(&lang, PYTHON_METHOD_Q, "python method")
});
Q.as_ref()
}
_ => None,
}
}
#[must_use]
pub fn entity_node_kinds(self) -> &'static [&'static str] {
match self {
Self::Rust => &[
"function_item",
"struct_item",
"enum_item",
"trait_item",
"impl_item",
"type_item",
"const_item",
"static_item",
"macro_definition",
"mod_item",
],
Self::Python => &[
"function_definition",
"class_definition",
"decorated_definition",
],
Self::JavaScript | Self::TypeScript => &[
"function_declaration",
"class_declaration",
"method_definition",
"arrow_function",
"export_statement",
"lexical_declaration",
],
Self::Go => &[
"function_declaration",
"method_declaration",
"type_declaration",
"const_declaration",
],
_ => &[],
}
}
}
impl std::fmt::Display for Lang {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.write_str(self.id())
}
}
#[must_use]
pub fn detect_language(path: &Path) -> Option<Lang> {
let ext = path.extension()?.to_str()?;
match ext {
"rs" => Some(Lang::Rust),
"py" | "pyi" => Some(Lang::Python),
"js" | "jsx" | "mjs" | "cjs" => Some(Lang::JavaScript),
"ts" | "tsx" | "mts" | "cts" => Some(Lang::TypeScript),
"go" => Some(Lang::Go),
"sh" | "bash" | "zsh" => Some(Lang::Bash),
"toml" => Some(Lang::Toml),
"json" | "jsonc" => Some(Lang::Json),
"md" | "markdown" => Some(Lang::Markdown),
_ => None,
}
}
#[must_use]
pub fn is_indexable(path: &Path) -> bool {
detect_language(path).and_then(Lang::grammar).is_some()
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn detect_language_rs() {
assert_eq!(detect_language(Path::new("src/main.rs")), Some(Lang::Rust));
}
#[test]
fn detect_language_py() {
assert_eq!(detect_language(Path::new("script.py")), Some(Lang::Python));
}
#[test]
fn detect_language_js_variants() {
for ext in &["js", "jsx", "mjs", "cjs"] {
let path = format!("file.{ext}");
assert_eq!(
detect_language(Path::new(&path)),
Some(Lang::JavaScript),
"failed for .{ext}"
);
}
}
#[test]
fn detect_language_ts_variants() {
for ext in &["ts", "tsx", "mts", "cts"] {
let path = format!("file.{ext}");
assert_eq!(
detect_language(Path::new(&path)),
Some(Lang::TypeScript),
"failed for .{ext}"
);
}
}
#[test]
fn detect_language_unknown_ext_returns_none() {
assert_eq!(detect_language(Path::new("file.xyz")), None);
assert_eq!(detect_language(Path::new("file")), None);
}
#[test]
fn entity_node_kinds_rust_includes_function_item() {
let kinds = Lang::Rust.entity_node_kinds();
assert!(kinds.contains(&"function_item"));
assert!(kinds.contains(&"impl_item"));
assert!(kinds.contains(&"struct_item"));
}
#[test]
fn entity_node_kinds_config_empty() {
assert!(Lang::Toml.entity_node_kinds().is_empty());
assert!(Lang::Json.entity_node_kinds().is_empty());
assert!(Lang::Markdown.entity_node_kinds().is_empty());
}
#[test]
fn grammar_returns_some_for_all_langs() {
assert!(Lang::Rust.grammar().is_some());
assert!(Lang::Python.grammar().is_some());
assert!(Lang::JavaScript.grammar().is_some());
assert!(Lang::TypeScript.grammar().is_some());
assert!(Lang::Go.grammar().is_some());
assert!(Lang::Bash.grammar().is_some());
assert!(Lang::Toml.grammar().is_some());
assert!(Lang::Json.grammar().is_some());
assert!(Lang::Markdown.grammar().is_some());
}
#[test]
fn is_indexable_known_extension() {
assert!(is_indexable(Path::new("src/main.rs")));
}
#[test]
fn is_indexable_unknown_extension() {
assert!(!is_indexable(Path::new("file.xyz")));
}
#[test]
fn lang_id_roundtrip() {
let langs = [
Lang::Rust,
Lang::Python,
Lang::JavaScript,
Lang::TypeScript,
Lang::Go,
Lang::Bash,
Lang::Toml,
Lang::Json,
Lang::Markdown,
];
for lang in langs {
assert!(!lang.id().is_empty());
assert_eq!(lang.to_string(), lang.id());
}
}
}