#![allow(clippy::needless_update)]
use super::{
ChunkType, FieldStyle, InjectionRule, LanguageDef, PostProcessChunkFn, SignatureStyle,
};
const DEFAULTS: LanguageDef = LanguageDef {
name: "",
grammar: None,
extensions: &[],
chunk_query: "",
call_query: None,
signature_style: SignatureStyle::UntilBrace,
doc_nodes: &[],
method_node_kinds: &[],
method_containers: &[],
stopwords: &[],
extract_return_nl: |_| None,
test_file_suggestion: None,
test_name_suggestion: None,
type_query: None,
common_types: &[],
container_body_kinds: &[],
extract_container_name: None,
extract_qualified_method: None,
post_process_chunk: None,
test_markers: &[],
test_path_patterns: &[],
structural_matchers: None,
entry_point_names: &[],
trait_method_names: &[],
injections: &[],
doc_format: "default",
doc_convention: "",
field_style: FieldStyle::None,
skip_line_prefixes: &[],
};
static LANG_BASH: LanguageDef = LanguageDef {
name: "bash",
grammar: Some(|| tree_sitter_bash::LANGUAGE.into()),
extensions: &["sh", "bash"],
chunk_query: include_str!("queries/bash.chunks.scm"),
call_query: Some(include_str!("queries/bash.calls.scm")),
doc_nodes: &["comment"],
stopwords: &[
"if", "then", "else", "elif", "fi", "for", "do", "done", "while", "until", "case", "esac",
"in", "function", "return", "exit", "export", "local", "declare", "readonly", "unset",
"shift", "set", "eval", "exec", "source", "true", "false", "echo", "printf", "read",
"test",
],
test_path_patterns: &["%/tests/%", "%\\_test.sh", "%.bats"],
entry_point_names: &["main"],
post_process_chunk: Some(post_process_bash_bash as PostProcessChunkFn),
..DEFAULTS
};
fn post_process_bash_bash(
_name: &mut String,
chunk_type: &mut ChunkType,
node: tree_sitter::Node,
_source: &str,
) -> bool {
if *chunk_type == ChunkType::Variable {
let mut parent = node.parent();
while let Some(p) = parent {
if p.kind() == "function_definition" {
return false;
}
parent = p.parent();
}
}
true
}
pub fn definition_bash() -> &'static LanguageDef {
&LANG_BASH
}
fn extract_return_c(signature: &str) -> Option<String> {
if let Some(paren) = signature.find('(') {
let before = signature[..paren].trim();
let words: Vec<&str> = before.split_whitespace().collect();
if words.len() >= 2 {
let type_words: Vec<&str> = words[..words.len() - 1]
.iter()
.filter(|w| !matches!(**w, "static" | "inline" | "extern" | "const" | "volatile"))
.copied()
.collect();
if !type_words.is_empty() && type_words != ["void"] {
let ret = type_words.join(" ");
let ret_words = crate::nl::tokenize_identifier(&ret).join(" ");
return Some(format!("Returns {}", ret_words));
}
}
}
None
}
static LANG_C: LanguageDef = LanguageDef {
name: "c",
grammar: Some(|| tree_sitter_c::LANGUAGE.into()),
extensions: &["c", "h"],
chunk_query: include_str!("queries/c.chunks.scm"),
call_query: Some(include_str!("queries/c.calls.scm")),
doc_nodes: &["comment"],
stopwords: &[
"if", "else", "for", "while", "do", "switch", "case", "break", "continue", "return",
"typedef", "struct", "enum", "union", "void", "int", "char", "float", "double", "long",
"short", "unsigned", "signed", "static", "extern", "const", "volatile", "sizeof", "null",
"true", "false",
],
extract_return_nl: extract_return_c,
type_query: Some(include_str!("queries/c.types.scm")),
common_types: &[
"int",
"char",
"float",
"double",
"void",
"long",
"short",
"unsigned",
"size_t",
"ssize_t",
"ptrdiff_t",
"FILE",
"bool",
],
test_path_patterns: &["%/tests/%", "%\\_test.c"],
entry_point_names: &["main"],
doc_format: "javadoc",
doc_convention: "Use Doxygen format: @param, @return, @throws tags.",
field_style: FieldStyle::TypeFirst {
strip_prefixes: "static const volatile extern unsigned signed",
},
skip_line_prefixes: &["struct ", "union ", "enum ", "typedef "],
..DEFAULTS
};
pub fn definition_c() -> &'static LanguageDef {
&LANG_C
}
fn extract_qualified_method_cpp(node: tree_sitter::Node, source: &str) -> Option<String> {
let func_decl = node.child_by_field_name("declarator")?;
let inner_decl = func_decl.child_by_field_name("declarator")?;
if inner_decl.kind() != "qualified_identifier" {
return None;
}
let scope = inner_decl.child_by_field_name("scope")?;
Some(source[scope.byte_range()].to_string())
}
fn extract_return_cpp(signature: &str) -> Option<String> {
if let Some(paren) = signature.rfind(')') {
let after = &signature[paren + 1..];
if let Some(arrow) = after.find("->") {
let ret_part = after[arrow + 2..].trim();
let end = ret_part.find('{').unwrap_or(ret_part.len());
let ret_type = ret_part[..end].trim();
if !ret_type.is_empty() {
let ret_words = crate::nl::tokenize_identifier(ret_type).join(" ");
return Some(format!("Returns {}", ret_words));
}
}
}
if let Some(paren) = signature.find('(') {
let before = signature[..paren].trim();
let words: Vec<&str> = before.split_whitespace().collect();
if words.len() >= 2 {
let type_words: Vec<&str> = words[..words.len() - 1]
.iter()
.filter(|w| {
!matches!(
**w,
"static"
| "inline"
| "extern"
| "const"
| "volatile"
| "virtual"
| "explicit"
| "friend"
| "constexpr"
| "consteval"
| "constinit"
| "auto"
)
})
.copied()
.collect();
if !type_words.is_empty() && type_words != ["void"] {
let ret = type_words.join(" ");
let ret_words = crate::nl::tokenize_identifier(&ret).join(" ");
return Some(format!("Returns {}", ret_words));
}
}
}
None
}
#[allow(clippy::ptr_arg)] fn post_process_cpp_cpp(
name: &mut String,
chunk_type: &mut ChunkType,
node: tree_sitter::Node,
_source: &str,
) -> bool {
if !matches!(*chunk_type, ChunkType::Function | ChunkType::Method) {
return true;
}
if name.starts_with('~') {
return true;
}
if node.kind() == "function_definition" {
let has_return_type = node.child_by_field_name("type").is_some();
if !has_return_type {
*chunk_type = ChunkType::Constructor;
}
}
let mut parent = node.parent();
while let Some(p) = parent {
if p.kind() == "linkage_specification" {
*chunk_type = ChunkType::Extern;
break;
}
if p.kind() == "translation_unit" {
break;
}
parent = p.parent();
}
true
}
static LANG_CPP: LanguageDef = LanguageDef {
name: "cpp",
grammar: Some(|| tree_sitter_cpp::LANGUAGE.into()),
extensions: &["cpp", "cxx", "cc", "hpp", "hxx", "hh", "ipp"],
chunk_query: include_str!("queries/cpp.chunks.scm"),
call_query: Some(include_str!("queries/cpp.calls.scm")),
doc_nodes: &["comment"],
method_containers: &["class_specifier", "struct_specifier"],
stopwords: &[
"if",
"else",
"for",
"while",
"do",
"switch",
"case",
"break",
"continue",
"return",
"class",
"struct",
"enum",
"namespace",
"template",
"typename",
"using",
"typedef",
"virtual",
"override",
"final",
"const",
"static",
"inline",
"explicit",
"extern",
"friend",
"public",
"private",
"protected",
"void",
"int",
"char",
"float",
"double",
"long",
"short",
"unsigned",
"signed",
"auto",
"new",
"delete",
"this",
"true",
"false",
"nullptr",
"sizeof",
"dynamic_cast",
"static_cast",
"reinterpret_cast",
"const_cast",
"throw",
"try",
"catch",
"noexcept",
"operator",
"concept",
"requires",
"constexpr",
"consteval",
"constinit",
"mutable",
"volatile",
"co_await",
"co_yield",
"co_return",
"decltype",
],
extract_return_nl: extract_return_cpp,
test_file_suggestion: Some(|stem, parent| format!("{parent}/tests/{stem}_test.cpp")),
type_query: Some(include_str!("queries/cpp.types.scm")),
common_types: &[
"string",
"wstring",
"string_view",
"vector",
"map",
"unordered_map",
"set",
"unordered_set",
"multimap",
"multiset",
"list",
"deque",
"array",
"forward_list",
"pair",
"tuple",
"optional",
"variant",
"any",
"expected",
"shared_ptr",
"unique_ptr",
"weak_ptr",
"function",
"size_t",
"ptrdiff_t",
"int8_t",
"int16_t",
"int32_t",
"int64_t",
"uint8_t",
"uint16_t",
"uint32_t",
"uint64_t",
"nullptr_t",
"span",
"basic_string",
"iterator",
"const_iterator",
"reverse_iterator",
"ostream",
"istream",
"iostream",
"fstream",
"ifstream",
"ofstream",
"stringstream",
"istringstream",
"ostringstream",
"thread",
"mutex",
"recursive_mutex",
"condition_variable",
"atomic",
"future",
"promise",
"exception",
"runtime_error",
"logic_error",
"invalid_argument",
"out_of_range",
"overflow_error",
"bad_alloc",
"type_info",
"initializer_list",
"allocator",
"hash",
"equal_to",
"less",
"greater",
"reference_wrapper",
"bitset",
"complex",
"regex",
"chrono",
],
container_body_kinds: &["field_declaration_list"],
extract_qualified_method: Some(extract_qualified_method_cpp),
post_process_chunk: Some(post_process_cpp_cpp as PostProcessChunkFn),
test_markers: &["TEST(", "TEST_F(", "EXPECT_", "ASSERT_"],
test_path_patterns: &["%/tests/%", "%\\_test.cpp", "%\\_test.cc"],
entry_point_names: &["main"],
doc_format: "javadoc",
doc_convention: "Use Doxygen format: @param, @return, @throws tags.",
field_style: FieldStyle::TypeFirst {
strip_prefixes: "static const volatile mutable virtual inline",
},
skip_line_prefixes: &["class ", "struct ", "union ", "enum ", "template"],
..DEFAULTS
};
pub fn definition_cpp() -> &'static LanguageDef {
&LANG_CPP
}
fn extract_return_csharp(signature: &str) -> Option<String> {
if let Some(paren) = signature.find('(') {
let before = signature[..paren].trim();
let words: Vec<&str> = before.split_whitespace().collect();
if words.len() >= 2 {
let ret_type = words[words.len() - 2];
if !matches!(
ret_type,
"void"
| "public"
| "private"
| "protected"
| "internal"
| "static"
| "abstract"
| "virtual"
| "override"
| "sealed"
| "async"
| "extern"
| "partial"
| "new"
| "unsafe"
) {
let ret_words = crate::nl::tokenize_identifier(ret_type).join(" ");
return Some(format!("Returns {}", ret_words));
}
}
}
None
}
fn post_process_csharp_csharp(
_name: &mut String,
chunk_type: &mut ChunkType,
node: tree_sitter::Node,
source: &str,
) -> bool {
if node.kind() == "constructor_declaration"
&& matches!(*chunk_type, ChunkType::Function | ChunkType::Method)
{
*chunk_type = ChunkType::Constructor;
}
if matches!(*chunk_type, ChunkType::Function | ChunkType::Method) {
let node_text = &source[node.byte_range()];
let header = if let Some(brace) = node_text.find('{') {
&node_text[..brace]
} else {
&node_text[..node_text.floor_char_boundary(200)]
};
if header.contains("[Test]")
|| header.contains("[Fact]")
|| header.contains("[Theory]")
|| header.contains("[TestMethod]")
{
*chunk_type = ChunkType::Test;
} else if header.contains("[HttpGet]")
|| header.contains("[HttpPost]")
|| header.contains("[HttpPut]")
|| header.contains("[HttpDelete]")
|| header.contains("[HttpPatch]")
|| header.contains("[Route(")
{
*chunk_type = ChunkType::Endpoint;
}
}
if *chunk_type == ChunkType::Property {
let field_text = &source[node.byte_range()];
if field_text.contains("const ") || field_text.contains("static readonly ") {
*chunk_type = ChunkType::Constant;
}
}
true
}
static LANG_CSHARP: LanguageDef = LanguageDef {
name: "csharp",
grammar: Some(|| tree_sitter_c_sharp::LANGUAGE.into()),
extensions: &["cs"],
chunk_query: include_str!("queries/csharp.chunks.scm"),
call_query: Some(include_str!("queries/csharp.calls.scm")),
doc_nodes: &["comment"],
method_containers: &[
"class_declaration",
"struct_declaration",
"record_declaration",
"interface_declaration",
"declaration_list",
],
stopwords: &[
"public",
"private",
"protected",
"internal",
"static",
"readonly",
"sealed",
"abstract",
"virtual",
"override",
"async",
"await",
"class",
"struct",
"interface",
"enum",
"namespace",
"using",
"return",
"if",
"else",
"for",
"foreach",
"while",
"do",
"switch",
"case",
"break",
"continue",
"new",
"this",
"base",
"try",
"catch",
"finally",
"throw",
"var",
"void",
"int",
"string",
"bool",
"true",
"false",
"null",
"get",
"set",
"value",
"where",
"partial",
"event",
"delegate",
"record",
"yield",
"in",
"out",
"ref",
],
extract_return_nl: extract_return_csharp,
test_file_suggestion: Some(|stem, parent| format!("{parent}/{stem}Tests.cs")),
test_name_suggestion: Some(|name| {
let pn = super::pascal_test_name("", name);
format!("{pn}_ShouldWork")
}),
type_query: Some(include_str!("queries/csharp.types.scm")),
common_types: &[
"string",
"int",
"bool",
"object",
"void",
"double",
"float",
"long",
"byte",
"char",
"decimal",
"short",
"uint",
"ulong",
"Task",
"ValueTask",
"List",
"Dictionary",
"HashSet",
"Queue",
"Stack",
"IEnumerable",
"IList",
"IDictionary",
"ICollection",
"IQueryable",
"Action",
"Func",
"Predicate",
"EventHandler",
"EventArgs",
"IDisposable",
"CancellationToken",
"ILogger",
"StringBuilder",
"Exception",
"Nullable",
"Span",
"Memory",
"ReadOnlySpan",
"IServiceProvider",
"HttpContext",
"IConfiguration",
],
container_body_kinds: &["declaration_list"],
post_process_chunk: Some(post_process_csharp_csharp as PostProcessChunkFn),
test_markers: &["[Test]", "[Fact]", "[Theory]", "[TestMethod]"],
test_path_patterns: &["%/Tests/%", "%/tests/%", "%Tests.cs"],
entry_point_names: &["Main"],
trait_method_names: &[
"Equals",
"GetHashCode",
"ToString",
"CompareTo",
"Dispose",
"GetEnumerator",
"MoveNext",
],
doc_format: "javadoc",
doc_convention: "Use XML doc comments: <summary>, <param>, <returns>, <exception> tags.",
field_style: FieldStyle::TypeFirst {
strip_prefixes:
"private protected public internal static readonly virtual override abstract sealed new",
},
skip_line_prefixes: &["class ", "struct ", "interface ", "enum ", "record "],
..DEFAULTS
};
pub fn definition_csharp() -> &'static LanguageDef {
&LANG_CSHARP
}
fn post_process_css_css(
name: &mut String,
chunk_type: &mut ChunkType,
node: tree_sitter::Node,
source: &str,
) -> bool {
match node.kind() {
"rule_set" => *chunk_type = ChunkType::Property,
"keyframes_statement" => *chunk_type = ChunkType::Section,
"media_statement" => {
*chunk_type = ChunkType::Section;
let text = node.utf8_text(source.as_bytes()).unwrap_or("");
if let Some(brace) = text.find('{') {
let after_media = if text.starts_with("@media") { 6 } else { 0 };
if after_media < brace {
let query = text[after_media..brace].trim();
if !query.is_empty() {
*name = format!("@media {query}");
return true;
}
}
}
*name = "@media".to_string();
}
_ => {}
}
true
}
fn extract_return_css(_signature: &str) -> Option<String> {
None
}
static LANG_CSS: LanguageDef = LanguageDef {
name: "css",
grammar: Some(|| tree_sitter_css::LANGUAGE.into()),
extensions: &["css"],
chunk_query: include_str!("queries/css.chunks.scm"),
signature_style: SignatureStyle::FirstLine,
doc_nodes: &["comment"],
stopwords: &[
"auto",
"inherit",
"initial",
"unset",
"none",
"block",
"inline",
"flex",
"grid",
"absolute",
"relative",
"fixed",
"sticky",
"hidden",
"visible",
"solid",
"dashed",
"dotted",
"normal",
"bold",
"italic",
"center",
"left",
"right",
"top",
"bottom",
"transparent",
"currentColor",
"important",
"media",
"keyframes",
"from",
"to",
],
extract_return_nl: extract_return_css,
post_process_chunk: Some(post_process_css_css as PostProcessChunkFn),
..DEFAULTS
};
pub fn definition_css() -> &'static LanguageDef {
&LANG_CSS
}
fn extract_return_cuda(signature: &str) -> Option<String> {
if let Some(paren) = signature.rfind(')') {
let after = &signature[paren + 1..];
if let Some(arrow) = after.find("->") {
let ret_part = after[arrow + 2..].trim();
let end = ret_part.find('{').unwrap_or(ret_part.len());
let ret_type = ret_part[..end].trim();
if !ret_type.is_empty() {
let ret_words = crate::nl::tokenize_identifier(ret_type).join(" ");
return Some(format!("Returns {}", ret_words));
}
}
}
if let Some(paren) = signature.find('(') {
let before = signature[..paren].trim();
let words: Vec<&str> = before.split_whitespace().collect();
if words.len() >= 2 {
let type_words: Vec<&str> = words[..words.len() - 1]
.iter()
.filter(|w| {
!matches!(
**w,
"static"
| "inline"
| "extern"
| "const"
| "volatile"
| "virtual"
| "explicit"
| "__global__"
| "__device__"
| "__host__"
| "__forceinline__"
| "__noinline__"
| "auto"
)
})
.copied()
.collect();
if !type_words.is_empty() && type_words != ["void"] {
let ret = type_words.join(" ");
let ret_words = crate::nl::tokenize_identifier(&ret).join(" ");
return Some(format!("Returns {}", ret_words));
}
}
}
None
}
fn extract_qualified_method_cuda(node: tree_sitter::Node, source: &str) -> Option<String> {
let func_decl = node.child_by_field_name("declarator")?;
let inner_decl = func_decl.child_by_field_name("declarator")?;
if inner_decl.kind() != "qualified_identifier" {
return None;
}
let scope = inner_decl.child_by_field_name("scope")?;
Some(source[scope.byte_range()].to_string())
}
static LANG_CUDA: LanguageDef = LanguageDef {
name: "cuda",
grammar: Some(|| tree_sitter_cuda::LANGUAGE.into()),
extensions: &["cu", "cuh"],
chunk_query: include_str!("queries/cuda.chunks.scm"),
call_query: Some(include_str!("queries/cuda.calls.scm")),
doc_nodes: &["comment"],
method_containers: &["class_specifier", "struct_specifier"],
stopwords: &[
"if",
"else",
"for",
"while",
"do",
"switch",
"case",
"break",
"continue",
"return",
"class",
"struct",
"enum",
"namespace",
"template",
"typename",
"using",
"typedef",
"virtual",
"override",
"final",
"const",
"static",
"inline",
"explicit",
"extern",
"friend",
"public",
"private",
"protected",
"void",
"int",
"char",
"float",
"double",
"long",
"short",
"unsigned",
"signed",
"auto",
"new",
"delete",
"this",
"true",
"false",
"nullptr",
"sizeof",
"__global__",
"__device__",
"__host__",
"__shared__",
"__constant__",
"__managed__",
"__restrict__",
"__noinline__",
"__forceinline__",
"dim3",
"blockIdx",
"threadIdx",
"blockDim",
"gridDim",
"warpSize",
"cudaMalloc",
"cudaFree",
"cudaMemcpy",
],
extract_return_nl: extract_return_cuda,
common_types: &[
"int",
"char",
"float",
"double",
"void",
"long",
"short",
"unsigned",
"size_t",
"dim3",
"cudaError_t",
"cudaStream_t",
"cudaEvent_t",
"float2",
"float3",
"float4",
"int2",
"int3",
"int4",
"uint2",
"uint3",
"uint4",
"half",
"__half",
"__half2",
],
container_body_kinds: &["field_declaration_list"],
extract_qualified_method: Some(extract_qualified_method_cuda),
test_markers: &["TEST(", "TEST_F(", "EXPECT_", "ASSERT_"],
test_path_patterns: &["%/tests/%", "%\\_test.cu"],
entry_point_names: &["main"],
doc_format: "javadoc",
doc_convention: "Use Doxygen format: @param, @return, @throws tags.",
field_style: FieldStyle::TypeFirst {
strip_prefixes: "static const volatile mutable virtual inline",
},
skip_line_prefixes: &["class ", "struct ", "union ", "enum ", "template"],
post_process_chunk: Some(post_process_cuda_cuda as PostProcessChunkFn),
..DEFAULTS
};
#[allow(clippy::ptr_arg)]
fn post_process_cuda_cuda(
name: &mut String,
chunk_type: &mut ChunkType,
node: tree_sitter::Node,
_source: &str,
) -> bool {
if !matches!(*chunk_type, ChunkType::Function | ChunkType::Method) {
return true;
}
if name.starts_with('~') {
return true;
}
if node.kind() == "function_definition" && node.child_by_field_name("type").is_none() {
*chunk_type = ChunkType::Constructor;
}
let mut parent = node.parent();
while let Some(p) = parent {
if p.kind() == "linkage_specification" {
*chunk_type = ChunkType::Extern;
break;
}
if p.kind() == "translation_unit" {
break;
}
parent = p.parent();
}
true
}
pub fn definition_cuda() -> &'static LanguageDef {
&LANG_CUDA
}
#[cfg(feature = "lang-dart")]
static LANG_DART: LanguageDef = LanguageDef {
name: "dart",
grammar: Some(|| tree_sitter_dart::LANGUAGE.into()),
extensions: &["dart"],
chunk_query: include_str!("queries/dart.chunks.scm"),
signature_style: SignatureStyle::UntilBrace,
doc_nodes: &["comment", "documentation_comment"],
method_node_kinds: &[],
method_containers: &["class_body", "extension_body"],
stopwords: &[
"if",
"else",
"for",
"while",
"do",
"return",
"class",
"extends",
"implements",
"import",
"void",
"var",
"final",
"const",
"static",
"this",
"super",
"new",
"null",
"true",
"false",
"async",
"await",
"switch",
"case",
"break",
"continue",
"try",
"catch",
"throw",
"with",
"abstract",
"mixin",
"enum",
"late",
"required",
"dynamic",
"override",
],
common_types: &[
"String", "int", "double", "bool", "List", "Map", "Set", "Future", "Stream", "void",
"dynamic", "Object", "Iterable", "Function", "Type", "Null", "num", "Never",
],
container_body_kinds: &["class_body", "extension_body", "enum_body"],
test_markers: &["@test", "test("],
test_path_patterns: &["%_test.dart", "%/test/%"],
entry_point_names: &["main"],
doc_format: "triple_slash",
doc_convention:
"Use /// for documentation comments. Follow Effective Dart documentation guidelines.",
field_style: FieldStyle::NameFirst {
separators: ":",
strip_prefixes: "final late var static const",
},
extract_return_nl: extract_return_dart,
post_process_chunk: Some(post_process_dart_dart as PostProcessChunkFn),
..DEFAULTS
};
#[cfg(feature = "lang-dart")]
fn post_process_dart_dart(
name: &mut String,
chunk_type: &mut ChunkType,
node: tree_sitter::Node,
source: &str,
) -> bool {
if matches!(*chunk_type, ChunkType::Function | ChunkType::Method)
&& (name.starts_with("test") || name == "group")
{
*chunk_type = ChunkType::Test;
}
if matches!(*chunk_type, ChunkType::Function | ChunkType::Method) {
let node_text = &source[node.byte_range()];
if node_text.trim_start().starts_with("factory ") || node.kind() == "constructor_signature"
{
*chunk_type = ChunkType::Constructor;
}
}
if *chunk_type == ChunkType::Class {
let node_text = &source[node.byte_range()];
if node_text.trim_start().starts_with("extension ") {
*chunk_type = ChunkType::Extension;
}
}
true
}
#[cfg(feature = "lang-dart")]
fn extract_return_dart(sig: &str) -> Option<String> {
let sig = sig.trim();
if sig.starts_with("class ")
|| sig.starts_with("enum ")
|| sig.starts_with("mixin ")
|| sig.starts_with("extension ")
{
return None;
}
let paren = sig.find('(')?;
let before_paren = sig[..paren].trim();
let parts: Vec<&str> = before_paren.split_whitespace().collect();
if parts.len() >= 2 {
let type_part = parts[parts.len() - 2];
if type_part == "void" {
return None;
}
if [
"static", "abstract", "external", "factory", "get", "set", "operator",
]
.contains(&type_part)
{
return None;
}
let ret_words = crate::nl::tokenize_identifier(type_part).join(" ");
return Some(format!("Returns {}", ret_words));
}
None
}
#[cfg(feature = "lang-dart")]
pub fn definition_dart() -> &'static LanguageDef {
&LANG_DART
}
fn post_process_elixir_elixir(
name: &mut String,
chunk_type: &mut ChunkType,
node: tree_sitter::Node,
source: &str,
) -> bool {
let keyword = node
.child_by_field_name("target")
.and_then(|t| t.utf8_text(source.as_bytes()).ok())
.unwrap_or("");
match keyword {
"def" | "defp" | "defguard" | "defguardp" | "defdelegate" => {
*chunk_type = ChunkType::Function;
}
"defmacro" | "defmacrop" => {
*chunk_type = ChunkType::Macro;
}
"defmodule" => {
*chunk_type = ChunkType::Module;
}
"defprotocol" => {
*chunk_type = ChunkType::Interface;
}
"defimpl" => {
*chunk_type = ChunkType::Object;
}
"defstruct" => {
*chunk_type = ChunkType::Struct;
let mut parent = node.parent();
while let Some(p) = parent {
if p.kind() == "call" {
if let Some(target) = p.child_by_field_name("target") {
if target.utf8_text(source.as_bytes()).ok() == Some("defmodule") {
let mut cursor = p.walk();
for child in p.named_children(&mut cursor) {
if child.kind() == "arguments" {
let mut inner_cursor = child.walk();
for arg in child.named_children(&mut inner_cursor) {
if arg.kind() == "alias" {
if let Ok(mod_name) = arg.utf8_text(source.as_bytes()) {
*name = mod_name.to_string();
return true;
}
}
}
}
}
}
}
}
parent = p.parent();
}
return false;
}
_ => {}
}
true
}
fn extract_return_elixir(_signature: &str) -> Option<String> {
None
}
static LANG_ELIXIR: LanguageDef = LanguageDef {
name: "elixir",
grammar: Some(|| tree_sitter_elixir::LANGUAGE.into()),
extensions: &["ex", "exs"],
chunk_query: include_str!("queries/elixir.chunks.scm"),
call_query: Some(include_str!("queries/elixir.calls.scm")),
signature_style: SignatureStyle::FirstLine,
doc_nodes: &["comment"],
stopwords: &[
"def",
"defp",
"defmodule",
"defprotocol",
"defimpl",
"defmacro",
"defmacrop",
"defstruct",
"defguard",
"defguardp",
"defdelegate",
"defexception",
"defoverridable",
"do",
"end",
"fn",
"case",
"cond",
"if",
"else",
"unless",
"when",
"with",
"for",
"receive",
"try",
"catch",
"rescue",
"after",
"raise",
"throw",
"import",
"require",
"use",
"alias",
"nil",
"true",
"false",
"and",
"or",
"not",
"in",
"is",
"self",
"super",
"send",
"spawn",
"apply",
"Enum",
"List",
"Map",
"String",
"IO",
"Kernel",
"Agent",
"Task",
"GenServer",
],
extract_return_nl: extract_return_elixir,
test_file_suggestion: Some(|stem, _parent| format!("test/{stem}_test.exs")),
test_name_suggestion: Some(|name| format!("test \"{}\"", name)),
container_body_kinds: &["do_block"],
post_process_chunk: Some(post_process_elixir_elixir as PostProcessChunkFn),
test_markers: &["test ", "describe "],
test_path_patterns: &["%/test/%", "%_test.exs"],
entry_point_names: &["start", "init", "handle_call", "handle_cast", "handle_info"],
trait_method_names: &[
"init",
"handle_call",
"handle_cast",
"handle_info",
"terminate",
"code_change",
],
doc_format: "elixir_doc",
doc_convention: "Use @doc with ## Examples section per Elixir conventions.",
field_style: FieldStyle::NameFirst {
separators: ":",
strip_prefixes: "",
},
skip_line_prefixes: &["defmodule", "defstruct"],
..DEFAULTS
};
pub fn definition_elixir() -> &'static LanguageDef {
&LANG_ELIXIR
}
#[cfg(feature = "lang-elm")]
static LANG_ELM: LanguageDef = LanguageDef {
name: "elm",
grammar: Some(|| tree_sitter_elm::LANGUAGE.into()),
extensions: &["elm"],
chunk_query: include_str!("queries/elm.chunks.scm"),
doc_nodes: &["block_comment", "line_comment"],
stopwords: &[
"module", "import", "exposing", "type", "alias", "port", "let", "in", "case", "of", "if",
"then", "else", "as", "where",
],
..DEFAULTS
};
#[cfg(feature = "lang-elm")]
pub fn definition_elm() -> &'static LanguageDef {
&LANG_ELM
}
fn post_process_erlang_erlang(
_name: &mut String,
chunk_type: &mut ChunkType,
node: tree_sitter::Node,
_source: &str,
) -> bool {
match node.kind() {
"fun_decl" => *chunk_type = ChunkType::Function,
"module_attribute" => *chunk_type = ChunkType::Module,
"type_alias" | "opaque" => *chunk_type = ChunkType::TypeAlias,
"record_decl" => *chunk_type = ChunkType::Struct,
"behaviour_attribute" => *chunk_type = ChunkType::Interface,
"callback" => *chunk_type = ChunkType::Interface,
"pp_define" => *chunk_type = ChunkType::Macro,
_ => {}
}
true
}
fn extract_return_erlang(_signature: &str) -> Option<String> {
None
}
static LANG_ERLANG: LanguageDef = LanguageDef {
name: "erlang",
grammar: Some(|| tree_sitter_erlang::LANGUAGE.into()),
extensions: &["erl", "hrl"],
chunk_query: include_str!("queries/erlang.chunks.scm"),
call_query: Some(include_str!("queries/erlang.calls.scm")),
signature_style: SignatureStyle::FirstLine,
doc_nodes: &["comment"],
stopwords: &[
"module",
"export",
"import",
"behaviour",
"behavior",
"callback",
"spec",
"type",
"opaque",
"record",
"define",
"ifdef",
"ifndef",
"endif",
"include",
"include_lib",
"fun",
"end",
"case",
"of",
"if",
"receive",
"after",
"when",
"try",
"catch",
"throw",
"begin",
"and",
"or",
"not",
"band",
"bor",
"bxor",
"bnot",
"bsl",
"bsr",
"div",
"rem",
"true",
"false",
"undefined",
"ok",
"error",
"self",
"lists",
"maps",
"io",
"gen_server",
"gen_statem",
"supervisor",
"application",
"ets",
"mnesia",
"erlang",
"string",
"binary",
],
extract_return_nl: extract_return_erlang,
test_file_suggestion: Some(|stem, _parent| format!("test/{stem}_SUITE.erl")),
post_process_chunk: Some(post_process_erlang_erlang as PostProcessChunkFn),
test_path_patterns: &["%/test/%", "%_SUITE.erl", "%_tests.erl"],
entry_point_names: &[
"start",
"start_link",
"init",
"handle_call",
"handle_cast",
"handle_info",
],
trait_method_names: &[
"init",
"handle_call",
"handle_cast",
"handle_info",
"terminate",
"code_change",
],
doc_format: "erlang_edoc",
doc_convention: "Use EDoc format: @param, @returns, @throws tags.",
skip_line_prefixes: &["-record"],
..DEFAULTS
};
pub fn definition_erlang() -> &'static LanguageDef {
&LANG_ERLANG
}
fn extract_return_fsharp(signature: &str) -> Option<String> {
let eq_pos = signature.find('=')?;
let before_eq = &signature[..eq_pos];
let mut paren_depth = 0i32;
let mut last_colon_outside = None;
for (i, ch) in before_eq.char_indices() {
match ch {
'(' => paren_depth += 1,
')' => paren_depth -= 1,
':' if paren_depth == 0 => last_colon_outside = Some(i),
_ => {}
}
}
let colon_pos = last_colon_outside?;
let ret_type = before_eq[colon_pos + 1..].trim();
if ret_type.is_empty() || ret_type == "unit" {
return None;
}
let ret_words = crate::nl::tokenize_identifier(ret_type).join(" ");
Some(format!("Returns {}", ret_words))
}
fn extract_container_name_fsharp_fsharp(node: tree_sitter::Node, source: &str) -> Option<String> {
let mut cursor = node.walk();
for child in node.children(&mut cursor) {
if child.kind() == "type_name" {
if let Some(name) = child.child_by_field_name("type_name") {
return Some(source[name.byte_range()].to_string());
}
}
}
None
}
static LANG_FSHARP: LanguageDef = LanguageDef {
name: "fsharp",
grammar: Some(|| tree_sitter_fsharp::LANGUAGE_FSHARP.into()),
extensions: &["fs", "fsi"],
chunk_query: include_str!("queries/fsharp.chunks.scm"),
call_query: Some(include_str!("queries/fsharp.calls.scm")),
doc_nodes: &["line_comment", "block_comment"],
method_containers: &[
"anon_type_defn",
"interface_type_defn",
"record_type_defn",
"union_type_defn",
],
stopwords: &[
"let",
"in",
"if",
"then",
"else",
"match",
"with",
"fun",
"function",
"type",
"module",
"open",
"do",
"for",
"while",
"yield",
"return",
"mutable",
"rec",
"and",
"or",
"not",
"true",
"false",
"null",
"abstract",
"member",
"override",
"static",
"private",
"public",
"internal",
"val",
"new",
"inherit",
"interface",
"end",
"begin",
"of",
"as",
"when",
"upcast",
"downcast",
"use",
"try",
"finally",
"raise",
"async",
"task",
],
extract_return_nl: extract_return_fsharp,
test_file_suggestion: Some(|stem, parent| format!("{parent}/{stem}Tests.fs")),
type_query: Some(include_str!("queries/fsharp.types.scm")),
common_types: &[
"string",
"int",
"bool",
"float",
"decimal",
"byte",
"char",
"unit",
"obj",
"int64",
"uint",
"int16",
"double",
"nativeint",
"bigint",
"seq",
"list",
"array",
"option",
"voption",
"result",
"Map",
"Set",
"Dictionary",
"HashSet",
"ResizeArray",
"Task",
"Async",
"IDisposable",
"IEnumerable",
"IComparable",
"Exception",
"StringBuilder",
"CancellationToken",
],
extract_container_name: Some(extract_container_name_fsharp_fsharp),
test_markers: &["[<Test>]", "[<Fact>]", "[<Theory>]"],
test_path_patterns: &["%/Tests/%", "%/tests/%", "%Tests.fs"],
entry_point_names: &["main"],
trait_method_names: &["Equals", "GetHashCode", "ToString", "CompareTo", "Dispose"],
doc_format: "triple_slash",
doc_convention: "Use XML doc comments: <summary>, <param>, <returns> tags.",
field_style: FieldStyle::NameFirst {
separators: ":",
strip_prefixes: "mutable",
},
skip_line_prefixes: &["type "],
post_process_chunk: Some(post_process_fsharp_fsharp as PostProcessChunkFn),
..DEFAULTS
};
fn post_process_fsharp_fsharp(
name: &mut String,
chunk_type: &mut ChunkType,
node: tree_sitter::Node,
source: &str,
) -> bool {
if matches!(*chunk_type, ChunkType::Function | ChunkType::Method) {
let mut prev = node.prev_named_sibling();
while let Some(sib) = prev {
let sib_text = &source[sib.byte_range()];
if sib_text.starts_with("[<") {
if sib_text.contains("Test")
|| sib_text.contains("Fact")
|| sib_text.contains("Theory")
{
*chunk_type = ChunkType::Test;
return true;
}
} else {
break;
}
prev = sib.prev_named_sibling();
}
let node_text = &source[node.byte_range()];
let header = &node_text[..node_text.floor_char_boundary(200)];
if header.contains("[<Test>]")
|| header.contains("[<Fact>]")
|| header.contains("[<Theory>]")
{
*chunk_type = ChunkType::Test;
}
}
let _ = name;
true
}
pub fn definition_fsharp() -> &'static LanguageDef {
&LANG_FSHARP
}
fn post_process_gleam_gleam(
_name: &mut String,
chunk_type: &mut ChunkType,
node: tree_sitter::Node,
_source: &str,
) -> bool {
match node.kind() {
"function" => {
*chunk_type = ChunkType::Function;
let fn_text = &_source[node.byte_range()];
if fn_text.contains("@external") {
*chunk_type = ChunkType::Extern;
}
}
"type_definition" => *chunk_type = ChunkType::Enum,
"type_alias" => *chunk_type = ChunkType::TypeAlias,
"constant" => *chunk_type = ChunkType::Constant,
_ => {}
}
true
}
fn extract_return_gleam(signature: &str) -> Option<String> {
let trimmed = signature.trim();
let arrow = trimmed.find("->")?;
let after = trimmed[arrow + 2..].trim();
let ret = after.split('{').next()?.trim();
if ret.is_empty() {
return None;
}
if ret == "Nil" {
return None;
}
let words = crate::nl::tokenize_identifier(ret).join(" ");
Some(format!("Returns {}", words.to_lowercase()))
}
static LANG_GLEAM: LanguageDef = LanguageDef {
name: "gleam",
grammar: Some(|| tree_sitter_gleam::LANGUAGE.into()),
extensions: &["gleam"],
chunk_query: include_str!("queries/gleam.chunks.scm"),
call_query: Some(include_str!("queries/gleam.calls.scm")),
signature_style: SignatureStyle::FirstLine,
doc_nodes: &["module_comment", "statement_comment", "comment"],
stopwords: &[
"fn", "pub", "let", "assert", "case", "if", "else", "use", "import", "type", "const",
"opaque", "external", "todo", "panic", "as", "try", "Ok", "Error", "True", "False", "Nil",
"Int", "Float", "String", "Bool", "List", "Result", "Option", "BitArray", "Dict", "io",
"int", "float", "string", "list", "result", "option", "dict", "map",
],
extract_return_nl: extract_return_gleam,
test_file_suggestion: Some(|stem, _parent| format!("test/{stem}_test.gleam")),
common_types: &[
"Int", "Float", "String", "Bool", "List", "Result", "Option", "Nil", "BitArray", "Dict",
],
post_process_chunk: Some(post_process_gleam_gleam as PostProcessChunkFn),
test_path_patterns: &["%/test/%", "%_test.gleam"],
entry_point_names: &["main"],
doc_convention: "Use /// doc comments describing parameters and return values.",
field_style: FieldStyle::NameFirst {
separators: ":",
strip_prefixes: "pub",
},
skip_line_prefixes: &["type ", "pub type"],
..DEFAULTS
};
pub fn definition_gleam() -> &'static LanguageDef {
&LANG_GLEAM
}
fn extract_return_glsl(signature: &str) -> Option<String> {
if let Some(paren) = signature.find('(') {
let before = signature[..paren].trim();
let words: Vec<&str> = before.split_whitespace().collect();
if words.len() >= 2 {
let type_words: Vec<&str> = words[..words.len() - 1]
.iter()
.filter(|w| {
!matches!(
**w,
"static" | "inline" | "const" | "volatile" | "highp" | "mediump" | "lowp"
)
})
.copied()
.collect();
if !type_words.is_empty() && type_words != ["void"] {
let ret = type_words.join(" ");
let ret_words = crate::nl::tokenize_identifier(&ret).join(" ");
return Some(format!("Returns {}", ret_words));
}
}
}
None
}
static LANG_GLSL: LanguageDef = LanguageDef {
name: "glsl",
grammar: Some(|| tree_sitter_glsl::LANGUAGE_GLSL.into()),
extensions: &["glsl", "vert", "frag", "geom", "comp", "tesc", "tese"],
chunk_query: include_str!("queries/glsl.chunks.scm"),
call_query: Some(include_str!("queries/glsl.calls.scm")),
doc_nodes: &["comment"],
stopwords: &[
"if",
"else",
"for",
"while",
"do",
"switch",
"case",
"break",
"continue",
"return",
"typedef",
"struct",
"enum",
"union",
"void",
"int",
"char",
"float",
"double",
"const",
"static",
"sizeof",
"true",
"false",
"uniform",
"varying",
"attribute",
"in",
"out",
"inout",
"flat",
"smooth",
"noperspective",
"centroid",
"sample",
"patch",
"layout",
"location",
"binding",
"set",
"push_constant",
"precision",
"lowp",
"mediump",
"highp",
"vec2",
"vec3",
"vec4",
"ivec2",
"ivec3",
"ivec4",
"uvec2",
"uvec3",
"uvec4",
"bvec2",
"bvec3",
"bvec4",
"mat2",
"mat3",
"mat4",
"mat2x3",
"mat3x4",
"sampler2D",
"sampler3D",
"samplerCube",
"sampler2DShadow",
"texture",
"discard",
"gl_Position",
"gl_FragColor",
],
extract_return_nl: extract_return_glsl,
common_types: &[
"int",
"float",
"double",
"void",
"bool",
"vec2",
"vec3",
"vec4",
"ivec2",
"ivec3",
"ivec4",
"uvec2",
"uvec3",
"uvec4",
"bvec2",
"bvec3",
"bvec4",
"mat2",
"mat3",
"mat4",
"mat2x3",
"mat2x4",
"mat3x2",
"mat3x4",
"mat4x2",
"mat4x3",
"sampler2D",
"sampler3D",
"samplerCube",
"sampler2DShadow",
],
entry_point_names: &["main"],
doc_format: "javadoc",
doc_convention: "Use Doxygen format: @param, @return tags.",
field_style: FieldStyle::TypeFirst {
strip_prefixes: "static const volatile extern unsigned signed",
},
skip_line_prefixes: &["struct "],
..DEFAULTS
};
pub fn definition_glsl() -> &'static LanguageDef {
&LANG_GLSL
}
fn extract_return_go(signature: &str) -> Option<String> {
let sig = signature.trim_end_matches('{').trim();
if sig.ends_with(')') {
let mut depth = 0;
let mut start_idx = None;
for (i, c) in sig.char_indices().rev() {
match c {
')' => depth += 1,
'(' => {
depth -= 1;
if depth == 0 {
start_idx = Some(i);
break;
}
}
_ => {}
}
}
if let Some(start) = start_idx {
let before = &sig[..start].trim();
if before.ends_with(')') {
let ret = &sig[start..];
if !ret.is_empty() {
return Some(format!("Returns {}", ret));
}
}
}
return None;
} else {
if let Some(paren) = sig.rfind(')') {
let ret = sig[paren + 1..].trim();
if ret.is_empty() {
return None;
}
let ret_words = crate::nl::tokenize_identifier(ret).join(" ");
return Some(format!("Returns {}", ret_words));
}
}
None
}
#[allow(clippy::ptr_arg)] fn post_process_go_go(
name: &mut String,
chunk_type: &mut ChunkType,
_node: tree_sitter::Node,
_source: &str,
) -> bool {
if *chunk_type == ChunkType::Function && name.starts_with("New") && name.len() > 3 {
*chunk_type = ChunkType::Constructor;
}
if *chunk_type == ChunkType::Function && name.starts_with("Test") && name.len() > 4 {
*chunk_type = ChunkType::Test;
}
true
}
static LANG_GO: LanguageDef = LanguageDef {
name: "go",
grammar: Some(|| tree_sitter_go::LANGUAGE.into()),
extensions: &["go"],
chunk_query: include_str!("queries/go.chunks.scm"),
call_query: Some(include_str!("queries/go.calls.scm")),
doc_nodes: &["comment"],
method_node_kinds: &["method_declaration"],
stopwords: &[
"func",
"var",
"const",
"type",
"struct",
"interface",
"return",
"if",
"else",
"for",
"range",
"switch",
"case",
"break",
"continue",
"go",
"defer",
"select",
"chan",
"map",
"package",
"import",
"true",
"false",
"nil",
],
extract_return_nl: extract_return_go,
test_file_suggestion: Some(|stem, parent| format!("{parent}/{stem}_test.go")),
test_name_suggestion: Some(|name| super::pascal_test_name("Test", name)),
type_query: Some(include_str!("queries/go.types.scm")),
common_types: &[
"string",
"int",
"int8",
"int16",
"int32",
"int64",
"uint",
"uint8",
"uint16",
"uint32",
"uint64",
"float32",
"float64",
"bool",
"byte",
"rune",
"error",
"any",
"comparable",
"Context",
],
post_process_chunk: Some(post_process_go_go as PostProcessChunkFn),
test_path_patterns: &["%\\_test.go"],
entry_point_names: &["main", "init"],
trait_method_names: &[
"String",
"Error",
"Close",
"Read",
"Write",
"ServeHTTP",
"Len",
"Less",
"Swap",
"MarshalJSON",
"UnmarshalJSON",
],
doc_format: "go_comment",
doc_convention: "Start with the function name per Go conventions.",
field_style: FieldStyle::NameFirst {
separators: " ",
strip_prefixes: "",
},
skip_line_prefixes: &["type ", "func "],
..DEFAULTS
};
pub fn definition_go() -> &'static LanguageDef {
&LANG_GO
}
static LANG_GRAPHQL: LanguageDef = LanguageDef {
name: "graphql",
grammar: Some(|| tree_sitter_graphql::LANGUAGE.into()),
extensions: &["graphql", "gql"],
chunk_query: include_str!("queries/graphql.chunks.scm"),
call_query: Some(include_str!("queries/graphql.calls.scm")),
doc_nodes: &["description"],
stopwords: &[
"type",
"interface",
"enum",
"union",
"input",
"scalar",
"directive",
"query",
"mutation",
"subscription",
"fragment",
"on",
"extend",
"implements",
"schema",
"true",
"false",
"null",
"repeatable",
],
common_types: &["String", "Int", "Float", "Boolean", "ID"],
field_style: FieldStyle::NameFirst {
separators: ":",
strip_prefixes: "",
},
skip_line_prefixes: &["type ", "input ", "interface ", "enum "],
..DEFAULTS
};
pub fn definition_graphql() -> &'static LanguageDef {
&LANG_GRAPHQL
}
fn post_process_haskell_haskell(
_name: &mut String,
chunk_type: &mut ChunkType,
node: tree_sitter::Node,
_source: &str,
) -> bool {
match node.kind() {
"function" => *chunk_type = ChunkType::Function,
"data_type" => *chunk_type = ChunkType::Enum,
"newtype" => *chunk_type = ChunkType::Struct,
"type_synomym" => *chunk_type = ChunkType::TypeAlias,
"class" => *chunk_type = ChunkType::Trait,
"instance" => *chunk_type = ChunkType::Impl,
_ => {}
}
true
}
fn extract_return_haskell(signature: &str) -> Option<String> {
let type_part = signature.split("::").nth(1)?;
let return_type = if type_part.contains("->") {
type_part.rsplit("->").next()?.trim()
} else {
type_part.trim()
};
let return_type = return_type.split("where").next()?.trim();
if return_type.is_empty() {
return None;
}
let clean = return_type.strip_prefix("IO ").unwrap_or(return_type);
let clean = clean.trim_start_matches('(').trim_end_matches(')').trim();
if clean.is_empty() || clean == "()" {
return None;
}
let ret_words = crate::nl::tokenize_identifier(clean).join(" ");
Some(format!("Returns {}", ret_words.to_lowercase()))
}
static LANG_HASKELL: LanguageDef = LanguageDef {
name: "haskell",
grammar: Some(|| tree_sitter_haskell::LANGUAGE.into()),
extensions: &["hs"],
chunk_query: include_str!("queries/haskell.chunks.scm"),
call_query: Some(include_str!("queries/haskell.calls.scm")),
signature_style: SignatureStyle::FirstLine,
doc_nodes: &["comment"],
stopwords: &[
"module",
"where",
"import",
"qualified",
"as",
"hiding",
"data",
"type",
"newtype",
"class",
"instance",
"deriving",
"do",
"let",
"in",
"case",
"of",
"if",
"then",
"else",
"forall",
"infixl",
"infixr",
"infix",
"default",
"foreign",
"True",
"False",
"Nothing",
"Just",
"Maybe",
"Either",
"Left",
"Right",
"IO",
"Int",
"Integer",
"Float",
"Double",
"Char",
"String",
"Bool",
"Show",
"Read",
"Eq",
"Ord",
"Num",
"Monad",
"Functor",
"Applicative",
"Foldable",
"Traversable",
"return",
"pure",
"putStrLn",
"print",
"map",
"filter",
"fmap",
],
extract_return_nl: extract_return_haskell,
test_file_suggestion: Some(|stem, _parent| format!("test/{stem}Spec.hs")),
common_types: &[
"Int", "Integer", "Float", "Double", "Char", "String", "Bool", "IO", "Maybe", "Either",
"Show", "Read", "Eq", "Ord", "Num",
],
container_body_kinds: &["class_declarations", "instance_declarations"],
post_process_chunk: Some(post_process_haskell_haskell as PostProcessChunkFn),
test_markers: &["hspec", "describe", "it ", "prop "],
test_path_patterns: &["%/test/%", "%Spec.hs", "%Test.hs"],
entry_point_names: &["main"],
trait_method_names: &[
"show",
"read",
"readsPrec",
"showsPrec",
"compare",
"fmap",
"pure",
"return",
"fromInteger",
],
doc_format: "haskell_haddock",
doc_convention: "Use Haddock format with -- | comments.",
field_style: FieldStyle::NameFirst {
separators: ":",
strip_prefixes: "",
},
skip_line_prefixes: &["data ", "newtype ", "type "],
..DEFAULTS
};
pub fn definition_haskell() -> &'static LanguageDef {
&LANG_HASKELL
}
const SHELL_HEREDOC_IDS_HCL: &[&str] = &[
"BASH",
"SHELL",
"SH",
"SCRIPT",
"EOT",
"EOF",
"USERDATA",
"USER_DATA",
];
fn detect_heredoc_language_hcl(node: tree_sitter::Node, source: &str) -> Option<&'static str> {
let mut cursor = node.walk();
for child in node.children(&mut cursor) {
if child.kind() == "heredoc_identifier" {
let ident = source[child.byte_range()].trim().to_uppercase();
if SHELL_HEREDOC_IDS_HCL.contains(&ident.as_str()) {
tracing::debug!(identifier = %ident, "HCL heredoc identified as shell");
return None; }
if ident == "PYTHON" || ident == "PY" {
tracing::debug!(identifier = %ident, "HCL heredoc identified as python");
return Some("python");
}
tracing::debug!(identifier = %ident, "HCL heredoc identifier not recognized, skipping");
return Some("_skip");
}
}
Some("_skip")
}
fn post_process_hcl_hcl(
name: &mut String,
chunk_type: &mut ChunkType,
node: tree_sitter::Node,
source: &str,
) -> bool {
let _span = tracing::debug_span!("post_process_hcl", name = %name).entered();
if let Some(parent) = node.parent() {
if parent.kind() == "body" {
if let Some(grandparent) = parent.parent() {
if grandparent.kind() == "block" {
tracing::debug!("Skipping nested block inside parent block");
return false;
}
}
}
}
let mut block_type = None;
let mut labels: Vec<String> = Vec::new();
let mut cursor = node.walk();
for child in node.children(&mut cursor) {
match child.kind() {
"identifier" if block_type.is_none() => {
block_type = Some(source[child.byte_range()].to_string());
}
"string_lit" => {
let mut inner = child.walk();
let mut found = false;
for c in child.children(&mut inner) {
if c.kind() == "template_literal" {
labels.push(source[c.byte_range()].to_string());
found = true;
}
}
if !found {
tracing::trace!("string_lit with no template_literal child, skipping label");
}
}
_ => {}
}
}
let bt = block_type.as_deref().unwrap_or("");
if labels.is_empty() {
tracing::debug!(block_type = bt, "Skipping block with no labels");
return false;
}
let last_label = &labels[labels.len() - 1];
match bt {
"resource" | "data" => {
*chunk_type = ChunkType::Struct;
*name = if labels.len() >= 2 {
format!("{}.{}", labels[0], labels[1])
} else {
last_label.clone()
};
}
"variable" | "output" => {
*chunk_type = ChunkType::Constant;
*name = last_label.clone();
}
"module" => {
*chunk_type = ChunkType::Module;
*name = last_label.clone();
}
_ => {
*chunk_type = ChunkType::Struct;
*name = last_label.clone();
}
}
tracing::debug!(
block_type = bt,
name = %name,
chunk_type = ?chunk_type,
"Reclassified HCL block"
);
true
}
static LANG_HCL: LanguageDef = LanguageDef {
name: "hcl",
grammar: Some(|| tree_sitter_hcl::LANGUAGE.into()),
extensions: &["tf", "tfvars", "hcl"],
chunk_query: include_str!("queries/hcl.chunks.scm"),
call_query: Some(include_str!("queries/hcl.calls.scm")),
doc_nodes: &["comment"],
stopwords: &[
"resource",
"data",
"variable",
"output",
"module",
"provider",
"terraform",
"locals",
"backend",
"required_providers",
"required_version",
"count",
"for_each",
"depends_on",
"lifecycle",
"provisioner",
"connection",
"source",
"version",
"type",
"default",
"description",
"sensitive",
"validation",
"condition",
"error_message",
"true",
"false",
"null",
"each",
"self",
"var",
"local",
"path",
],
post_process_chunk: Some(post_process_hcl_hcl),
injections: &[
InjectionRule {
container_kind: "heredoc_template",
content_kind: "template_literal",
target_language: "bash",
detect_language: Some(detect_heredoc_language_hcl),
content_scoped_lines: false,
},
],
..DEFAULTS
};
pub fn definition_hcl() -> &'static LanguageDef {
&LANG_HCL
}
const LANDMARK_TAGS_HTML: &[&str] = &[
"nav", "main", "header", "footer", "section", "article", "aside", "form",
];
const NOISE_TAGS_HTML: &[&str] = &[
"html",
"head",
"body",
"div",
"span",
"p",
"ul",
"ol",
"li",
"table",
"thead",
"tbody",
"tfoot",
"tr",
"td",
"th",
"br",
"hr",
"img",
"a",
"em",
"strong",
"b",
"i",
"u",
"small",
"sub",
"sup",
"abbr",
"code",
"pre",
"blockquote",
"dl",
"dt",
"dd",
"link",
"meta",
"title",
"base",
];
fn post_process_html_html(
name: &mut String,
chunk_type: &mut ChunkType,
node: tree_sitter::Node,
source: &str,
) -> bool {
let tag = name.as_str();
if matches!(tag, "h1" | "h2" | "h3" | "h4" | "h5" | "h6") {
*chunk_type = ChunkType::Section;
if let Some(text) = extract_element_text_html(node, source) {
if !text.is_empty() {
*name = text;
}
}
return true;
}
if tag == "script" || tag == "style" {
*chunk_type = ChunkType::Module;
let start_tag = find_child_by_kind_html(node, "start_tag");
if let Some(start) = start_tag {
if let Some(attr_val) = find_attribute_value_html(start, "src", source) {
*name = format!("script:{attr_val}");
} else if let Some(attr_val) = find_attribute_value_html(start, "type", source) {
*name = format!("{tag}:{attr_val}");
}
}
return true;
}
if LANDMARK_TAGS_HTML.contains(&tag) {
*chunk_type = ChunkType::Section;
let start_tag = find_child_by_kind_html(node, "start_tag");
if let Some(start) = start_tag {
if let Some(id) = find_attribute_value_html(start, "id", source) {
*name = format!("{tag}#{id}");
} else if let Some(label) = find_attribute_value_html(start, "aria-label", source) {
*name = format!("{tag}:{label}");
}
}
return true;
}
if NOISE_TAGS_HTML.contains(&tag) {
let start_tag = find_child_by_kind_html(node, "start_tag");
if let Some(start) = start_tag {
if let Some(id) = find_attribute_value_html(start, "id", source) {
*name = format!("{tag}#{id}");
*chunk_type = ChunkType::Property;
return true;
}
}
return false;
}
true
}
pub(crate) fn find_child_by_kind_html<'a>(
node: tree_sitter::Node<'a>,
kind: &str,
) -> Option<tree_sitter::Node<'a>> {
crate::parser::find_child_by_kind(node, kind)
}
pub(crate) fn has_attribute_html(
start_tag: tree_sitter::Node,
attr_name: &str,
source: &str,
) -> bool {
let mut cursor = start_tag.walk();
for child in start_tag.children(&mut cursor) {
if child.kind() == "attribute" {
let mut attr_cursor = child.walk();
for attr_child in child.children(&mut attr_cursor) {
if attr_child.kind() == "attribute_name" {
let name_text = attr_child.utf8_text(source.as_bytes()).unwrap_or("");
if name_text == attr_name {
return true;
}
}
}
}
}
false
}
pub(crate) fn find_attribute_value_html(
start_tag: tree_sitter::Node,
attr_name: &str,
source: &str,
) -> Option<String> {
let mut cursor = start_tag.walk();
for child in start_tag.children(&mut cursor) {
if child.kind() == "attribute" {
let mut attr_cursor = child.walk();
let mut found_name = false;
for attr_child in child.children(&mut attr_cursor) {
if attr_child.kind() == "attribute_name" {
let name_text = attr_child.utf8_text(source.as_bytes()).unwrap_or("");
if name_text == attr_name {
found_name = true;
}
} else if found_name
&& (attr_child.kind() == "quoted_attribute_value"
|| attr_child.kind() == "attribute_value")
{
let val = attr_child.utf8_text(source.as_bytes()).unwrap_or("");
let val = val.trim_matches('"').trim_matches('\'');
return Some(val.to_string());
}
}
}
}
None
}
fn extract_element_text_html(node: tree_sitter::Node, source: &str) -> Option<String> {
let mut cursor = node.walk();
for child in node.children(&mut cursor) {
if child.kind() == "text" {
let text = child.utf8_text(source.as_bytes()).unwrap_or("").trim();
if !text.is_empty() {
let truncated = if text.len() > 80 {
format!("{}...", &text[..text.floor_char_boundary(77)])
} else {
text.to_string()
};
return Some(truncated);
}
}
}
None
}
fn extract_return_html(_signature: &str) -> Option<String> {
None
}
pub(crate) fn detect_script_language_html(
node: tree_sitter::Node,
source: &str,
) -> Option<&'static str> {
let start_tag = find_child_by_kind_html(node, "start_tag")?;
if let Some(lang_val) = find_attribute_value_html(start_tag, "lang", source) {
let lower = lang_val.to_lowercase();
if lower == "ts" || lower == "typescript" {
tracing::debug!("Detected TypeScript from lang attribute");
return Some("typescript");
}
}
if let Some(type_val) = find_attribute_value_html(start_tag, "type", source) {
let lower = type_val.to_lowercase();
if lower.contains("typescript") {
tracing::debug!("Detected TypeScript from type attribute");
return Some("typescript");
}
if !lower.is_empty()
&& !matches!(
lower.as_str(),
"text/javascript"
| "application/javascript"
| "module"
| "text/ecmascript"
| "application/ecmascript"
)
{
tracing::debug!(r#type = %type_val, "Skipping non-JS script type");
return Some("_skip"); }
}
None }
static LANG_HTML: LanguageDef = LanguageDef {
name: "html",
grammar: Some(|| tree_sitter_html::LANGUAGE.into()),
extensions: &["html", "htm", "xhtml"],
chunk_query: include_str!("queries/html.chunks.scm"),
signature_style: SignatureStyle::FirstLine,
doc_nodes: &["comment"],
stopwords: &[
"div",
"span",
"class",
"style",
"href",
"src",
"alt",
"title",
"type",
"value",
"name",
"content",
"http",
"equiv",
"charset",
"viewport",
"width",
"height",
"rel",
"stylesheet",
],
extract_return_nl: extract_return_html,
post_process_chunk: Some(post_process_html_html as PostProcessChunkFn),
injections: &[
InjectionRule {
container_kind: "script_element",
content_kind: "raw_text",
target_language: "javascript",
detect_language: Some(detect_script_language_html),
content_scoped_lines: false,
},
InjectionRule {
container_kind: "style_element",
content_kind: "raw_text",
target_language: "css",
detect_language: None,
content_scoped_lines: false,
},
],
..DEFAULTS
};
pub fn definition_html() -> &'static LanguageDef {
&LANG_HTML
}
fn extract_return_ini(_signature: &str) -> Option<String> {
None
}
static LANG_INI: LanguageDef = LanguageDef {
name: "ini",
grammar: Some(|| tree_sitter_ini::LANGUAGE.into()),
extensions: &["ini", "cfg"],
chunk_query: include_str!("queries/ini.chunks.scm"),
signature_style: SignatureStyle::FirstLine,
doc_nodes: &["comment"],
stopwords: &["true", "false", "yes", "no", "on", "off"],
extract_return_nl: extract_return_ini,
..DEFAULTS
};
pub fn definition_ini() -> &'static LanguageDef {
&LANG_INI
}
fn post_process_java_java(
_name: &mut String,
chunk_type: &mut ChunkType,
node: tree_sitter::Node,
source: &str,
) -> bool {
if *chunk_type == ChunkType::Property && node.kind() == "field_declaration" {
let field_text = &source[node.start_byte()..node.end_byte()];
let has_static = field_text.contains("static");
let has_final = field_text.contains("final");
if has_static && has_final {
*chunk_type = ChunkType::Constant;
}
}
if node.kind() == "constructor_declaration"
&& matches!(*chunk_type, ChunkType::Function | ChunkType::Method)
{
*chunk_type = ChunkType::Constructor;
}
if matches!(*chunk_type, ChunkType::Function | ChunkType::Method) {
let node_text = &source[node.byte_range()];
let header = if let Some(brace) = node_text.find('{') {
&node_text[..brace]
} else {
&node_text[..node_text.floor_char_boundary(200)]
};
if header.contains("@Test")
|| header.contains("@ParameterizedTest")
|| header.contains("@RepeatedTest")
{
*chunk_type = ChunkType::Test;
} else if header.contains("@GetMapping")
|| header.contains("@PostMapping")
|| header.contains("@PutMapping")
|| header.contains("@DeleteMapping")
|| header.contains("@PatchMapping")
|| header.contains("@RequestMapping")
{
*chunk_type = ChunkType::Endpoint;
}
}
true
}
fn extract_return_java(signature: &str) -> Option<String> {
if let Some(paren) = signature.find('(') {
let before = signature[..paren].trim();
let words: Vec<&str> = before.split_whitespace().collect();
if words.len() >= 2 {
let ret_type = words[words.len() - 2];
if !matches!(
ret_type,
"void"
| "public"
| "private"
| "protected"
| "static"
| "final"
| "abstract"
| "synchronized"
| "native"
) {
let ret_words = crate::nl::tokenize_identifier(ret_type).join(" ");
return Some(format!("Returns {}", ret_words));
}
}
}
None
}
static LANG_JAVA: LanguageDef = LanguageDef {
name: "java",
grammar: Some(|| tree_sitter_java::LANGUAGE.into()),
extensions: &["java"],
chunk_query: include_str!("queries/java.chunks.scm"),
call_query: Some(include_str!("queries/java.calls.scm")),
doc_nodes: &["line_comment", "block_comment"],
method_containers: &["class_body", "class_declaration"],
stopwords: &[
"public",
"private",
"protected",
"static",
"final",
"abstract",
"class",
"interface",
"extends",
"implements",
"return",
"if",
"else",
"for",
"while",
"do",
"switch",
"case",
"break",
"continue",
"new",
"this",
"super",
"try",
"catch",
"finally",
"throw",
"throws",
"import",
"package",
"void",
"int",
"boolean",
"string",
"true",
"false",
"null",
],
extract_return_nl: extract_return_java,
test_file_suggestion: Some(|stem, parent| format!("{parent}/{stem}Test.java")),
test_name_suggestion: Some(|name| super::pascal_test_name("test", name)),
type_query: Some(include_str!("queries/java.types.scm")),
common_types: &[
"String",
"Object",
"Integer",
"Long",
"Double",
"Float",
"Boolean",
"Byte",
"Character",
"List",
"ArrayList",
"Map",
"HashMap",
"Set",
"HashSet",
"Collection",
"Iterator",
"Iterable",
"Optional",
"Stream",
"Exception",
"RuntimeException",
"IOException",
"Class",
"Void",
"Comparable",
"Serializable",
"Cloneable",
],
container_body_kinds: &["class_body"],
post_process_chunk: Some(post_process_java_java as PostProcessChunkFn),
test_markers: &["@Test", "@ParameterizedTest", "@RepeatedTest"],
test_path_patterns: &["%/test/%", "%/tests/%", "%Test.java"],
entry_point_names: &["main"],
trait_method_names: &[
"equals",
"hashCode",
"toString",
"compareTo",
"clone",
"iterator",
"run",
"call",
"close",
"accept",
"apply",
"get",
],
doc_format: "javadoc",
doc_convention: "Use Javadoc format: @param, @return, @throws tags.",
field_style: FieldStyle::TypeFirst {
strip_prefixes: "private protected public static final volatile transient",
},
skip_line_prefixes: &[
"class ",
"interface ",
"enum ",
"public class",
"abstract class",
],
..DEFAULTS
};
pub fn definition_java() -> &'static LanguageDef {
&LANG_JAVA
}
fn is_inside_function_javascript(node: tree_sitter::Node) -> bool {
let mut cursor = node.parent();
while let Some(parent) = cursor {
match parent.kind() {
"function_declaration"
| "function_expression"
| "arrow_function"
| "method_definition"
| "generator_function_declaration"
| "generator_function" => return true,
_ => {}
}
cursor = parent.parent();
}
false
}
fn post_process_javascript_javascript(
_name: &mut String,
chunk_type: &mut ChunkType,
node: tree_sitter::Node,
_source: &str,
) -> bool {
if *chunk_type == ChunkType::Constant || *chunk_type == ChunkType::Variable {
if is_inside_function_javascript(node) {
return false;
}
if let Some(value) = node.child_by_field_name("value") {
let kind = value.kind();
if kind == "arrow_function" || kind == "function_expression" || kind == "function" {
return false;
}
}
}
if *chunk_type == ChunkType::Method && _name == "constructor" {
*chunk_type = ChunkType::Constructor;
}
true
}
fn extract_return_javascript(_signature: &str) -> Option<String> {
None
}
static LANG_JAVASCRIPT: LanguageDef = LanguageDef {
name: "javascript",
grammar: Some(|| tree_sitter_javascript::LANGUAGE.into()),
extensions: &["js", "jsx", "mjs", "cjs"],
chunk_query: include_str!("queries/javascript.chunks.scm"),
call_query: Some(include_str!("queries/javascript.calls.scm")),
doc_nodes: &["comment"],
method_containers: &["class_body", "class_declaration"],
stopwords: &[
"function",
"const",
"let",
"var",
"return",
"if",
"else",
"for",
"while",
"do",
"switch",
"case",
"break",
"continue",
"new",
"this",
"class",
"extends",
"import",
"export",
"from",
"default",
"try",
"catch",
"finally",
"throw",
"async",
"await",
"true",
"false",
"null",
"undefined",
"typeof",
"instanceof",
"void",
],
extract_return_nl: extract_return_javascript,
test_file_suggestion: Some(|stem, parent| format!("{parent}/{stem}.test.js")),
test_name_suggestion: Some(|name| format!("test('{}', ...)", name)),
common_types: &[
"Array", "Map", "Set", "Promise", "Date", "Error", "RegExp", "Function", "Object",
"Symbol", "WeakMap", "WeakSet",
],
container_body_kinds: &["class_body"],
post_process_chunk: Some(post_process_javascript_javascript as PostProcessChunkFn),
test_markers: &["describe(", "it(", "test("],
test_path_patterns: &["%.test.%", "%.spec.%", "%/tests/%"],
entry_point_names: &[
"handler",
"middleware",
"beforeEach",
"afterEach",
"beforeAll",
"afterAll",
],
trait_method_names: &["toString", "valueOf", "toJSON"],
doc_format: "javadoc",
doc_convention: "Use JSDoc format: @param {type} name, @returns {type}, @throws {type}.",
field_style: FieldStyle::NameFirst {
separators: ":=;",
strip_prefixes: "public private protected readonly static",
},
skip_line_prefixes: &["class ", "export "],
..DEFAULTS
};
pub fn definition_javascript() -> &'static LanguageDef {
&LANG_JAVASCRIPT
}
fn extract_return_json(_signature: &str) -> Option<String> {
None
}
fn post_process_json_json(
_name: &mut String,
_chunk_type: &mut ChunkType,
node: tree_sitter::Node,
_source: &str,
) -> bool {
if let Some(parent) = node.parent() {
if parent.kind() == "object" {
if let Some(grandparent) = parent.parent() {
return grandparent.kind() == "document";
}
}
}
false
}
static LANG_JSON: LanguageDef = LanguageDef {
name: "json",
grammar: Some(|| tree_sitter_json::LANGUAGE.into()),
extensions: &["json", "jsonc"],
chunk_query: include_str!("queries/json.chunks.scm"),
signature_style: SignatureStyle::FirstLine,
doc_nodes: &["comment"],
stopwords: &["true", "false", "null"],
extract_return_nl: extract_return_json,
post_process_chunk: Some(post_process_json_json),
..DEFAULTS
};
pub fn definition_json() -> &'static LanguageDef {
&LANG_JSON
}
fn post_process_julia_julia(
_name: &mut String,
chunk_type: &mut ChunkType,
node: tree_sitter::Node,
_source: &str,
) -> bool {
match node.kind() {
"function_definition" => *chunk_type = ChunkType::Function,
"struct_definition" => *chunk_type = ChunkType::Struct,
"abstract_definition" => *chunk_type = ChunkType::TypeAlias,
"module_definition" => *chunk_type = ChunkType::Module,
"macro_definition" => *chunk_type = ChunkType::Macro,
"macrocall_expression" => {
let node_text = &_source[node.byte_range()];
if node_text.starts_with("@test") {
*chunk_type = ChunkType::Test;
}
}
_ => {}
}
if *chunk_type == ChunkType::Function && _name.starts_with("test_") {
*chunk_type = ChunkType::Test;
}
if *chunk_type == ChunkType::Struct {
let node_text = &_source[node.byte_range()];
if node_text.trim_start().starts_with("const ") && !node_text.contains("struct") {
*chunk_type = ChunkType::Constant;
}
}
true
}
fn extract_return_julia(signature: &str) -> Option<String> {
let trimmed = signature.trim();
let paren_pos = trimmed.rfind(')')?;
let after = trimmed[paren_pos + 1..].trim();
let ret = after.strip_prefix("::")?.trim();
let ret = ret.split_whitespace().next()?;
if ret.is_empty() {
return None;
}
if ret == "Nothing" {
return None;
}
let words = crate::nl::tokenize_identifier(ret).join(" ");
Some(format!("Returns {}", words.to_lowercase()))
}
static LANG_JULIA: LanguageDef = LanguageDef {
name: "julia",
grammar: Some(|| tree_sitter_julia::LANGUAGE.into()),
extensions: &["jl"],
chunk_query: include_str!("queries/julia.chunks.scm"),
call_query: Some(include_str!("queries/julia.calls.scm")),
signature_style: SignatureStyle::FirstLine,
doc_nodes: &["line_comment", "block_comment"],
stopwords: &[
"function",
"end",
"module",
"struct",
"mutable",
"abstract",
"type",
"macro",
"begin",
"let",
"const",
"if",
"elseif",
"else",
"for",
"while",
"do",
"try",
"catch",
"finally",
"return",
"break",
"continue",
"import",
"using",
"export",
"true",
"false",
"nothing",
"where",
"in",
"isa",
"typeof",
"Int",
"Int64",
"Float64",
"String",
"Bool",
"Char",
"Vector",
"Array",
"Dict",
"Set",
"Tuple",
"Nothing",
"Any",
"Union",
"AbstractFloat",
"AbstractString",
"println",
"print",
"push!",
"pop!",
"length",
"size",
"map",
"filter",
],
extract_return_nl: extract_return_julia,
test_file_suggestion: Some(|stem, _parent| format!("test/{stem}_test.jl")),
common_types: &[
"Int", "Int64", "Float64", "String", "Bool", "Char", "Vector", "Array", "Dict", "Set",
"Tuple", "Nothing", "Any",
],
post_process_chunk: Some(post_process_julia_julia as PostProcessChunkFn),
test_markers: &["@test", "@testset"],
test_path_patterns: &["%/test/%", "%_test.jl"],
entry_point_names: &["main"],
trait_method_names: &[
"show",
"convert",
"promote_rule",
"iterate",
"length",
"getindex",
"setindex!",
],
doc_convention: "Use triple-quoted docstrings with # Arguments, # Returns sections.",
field_style: FieldStyle::NameFirst {
separators: ":",
strip_prefixes: "",
},
skip_line_prefixes: &["struct ", "mutable struct"],
..DEFAULTS
};
pub fn definition_julia() -> &'static LanguageDef {
&LANG_JULIA
}
fn post_process_kotlin_kotlin(
name: &mut String,
chunk_type: &mut ChunkType,
node: tree_sitter::Node,
source: &str,
) -> bool {
match node.kind() {
"secondary_constructor" => {
*chunk_type = ChunkType::Constructor;
*name = "constructor".to_string();
return true;
}
"anonymous_initializer" => {
*chunk_type = ChunkType::Constructor;
*name = "init".to_string();
return true;
}
_ => {}
}
if matches!(*chunk_type, ChunkType::Function | ChunkType::Method) {
let node_text = &source[node.byte_range()];
let header = if let Some(brace) = node_text.find('{') {
&node_text[..brace]
} else {
&node_text[..node_text.floor_char_boundary(200)]
};
if header.contains("@Test")
|| header.contains("@ParameterizedTest")
|| header.contains("@RepeatedTest")
{
*chunk_type = ChunkType::Test;
} else if header.contains("@GetMapping")
|| header.contains("@PostMapping")
|| header.contains("@PutMapping")
|| header.contains("@DeleteMapping")
|| header.contains("@PatchMapping")
|| header.contains("@RequestMapping")
{
*chunk_type = ChunkType::Endpoint;
}
}
if *chunk_type == ChunkType::Property {
let prop_text = &source[node.byte_range()];
if prop_text.contains("const ") {
*chunk_type = ChunkType::Constant;
}
}
if node.kind() != "class_declaration" {
return true;
}
let mut has_enum_modifier = false;
let mut cursor = node.walk();
for child in node.children(&mut cursor) {
match child.kind() {
"modifiers" => {
let mut mod_cursor = child.walk();
for modifier in child.children(&mut mod_cursor) {
if modifier.kind() == "class_modifier" {
let text = &source[modifier.byte_range()];
if text == "enum" {
has_enum_modifier = true;
}
}
}
}
"interface" => {
*chunk_type = ChunkType::Interface;
return true;
}
_ => {}
}
}
if has_enum_modifier {
*chunk_type = ChunkType::Enum;
}
true
}
fn extract_return_kotlin(signature: &str) -> Option<String> {
let paren_pos = signature.rfind(')')?;
let after_paren = &signature[paren_pos + 1..];
let end_pos = after_paren
.find('{')
.or_else(|| after_paren.find('='))
.unwrap_or(after_paren.len());
let between = &after_paren[..end_pos];
let colon_pos = between.find(':')?;
let ret_type = between[colon_pos + 1..].trim();
if ret_type.is_empty() || ret_type == "Unit" {
return None;
}
let ret_words = crate::nl::tokenize_identifier(ret_type).join(" ");
Some(format!("Returns {}", ret_words))
}
static LANG_KOTLIN: LanguageDef = LanguageDef {
name: "kotlin",
grammar: Some(|| tree_sitter_kotlin::LANGUAGE.into()),
extensions: &["kt", "kts"],
chunk_query: include_str!("queries/kotlin.chunks.scm"),
call_query: Some(include_str!("queries/kotlin.calls.scm")),
doc_nodes: &["line_comment", "multiline_comment"],
method_containers: &["class_body"],
stopwords: &[
"fun",
"val",
"var",
"class",
"interface",
"object",
"companion",
"data",
"sealed",
"enum",
"abstract",
"open",
"override",
"private",
"protected",
"public",
"internal",
"return",
"if",
"else",
"when",
"for",
"while",
"do",
"break",
"continue",
"this",
"super",
"import",
"package",
"is",
"as",
"in",
"null",
"true",
"false",
"typealias",
"const",
"lateinit",
"suspend",
"inline",
"reified",
],
extract_return_nl: extract_return_kotlin,
test_file_suggestion: Some(|stem, parent| format!("{parent}/{stem}Test.kt")),
test_name_suggestion: Some(|name| super::pascal_test_name("test", name)),
type_query: Some(include_str!("queries/kotlin.types.scm")),
common_types: &[
"String",
"Int",
"Long",
"Double",
"Float",
"Boolean",
"Byte",
"Short",
"Char",
"Unit",
"Nothing",
"Any",
"List",
"ArrayList",
"Map",
"HashMap",
"Set",
"HashSet",
"Collection",
"MutableList",
"MutableMap",
"MutableSet",
"Sequence",
"Array",
"Pair",
"Triple",
"Comparable",
"Iterable",
],
container_body_kinds: &["class_body"],
post_process_chunk: Some(post_process_kotlin_kotlin),
test_markers: &["@Test", "@ParameterizedTest"],
test_path_patterns: &["%/test/%", "%/tests/%", "%Test.kt"],
entry_point_names: &["main"],
trait_method_names: &["equals", "hashCode", "toString", "compareTo", "iterator"],
doc_format: "javadoc",
doc_convention: "Use KDoc format: @param, @return, @throws tags.",
field_style: FieldStyle::NameFirst {
separators: ":",
strip_prefixes: "val var private protected public internal override lateinit",
},
skip_line_prefixes: &[
"class ",
"data class",
"sealed class",
"enum class",
"interface ",
],
..DEFAULTS
};
pub fn definition_kotlin() -> &'static LanguageDef {
&LANG_KOTLIN
}
fn map_code_language_latex(lang: &str) -> Option<&'static str> {
match lang.to_lowercase().as_str() {
"python" | "python3" | "py" => Some("python"),
"rust" => Some("rust"),
"c" => Some("c"),
"cpp" | "c++" => Some("cpp"),
"java" => Some("java"),
"javascript" | "js" => Some("javascript"),
"typescript" | "ts" => Some("typescript"),
"go" | "golang" => Some("go"),
"bash" | "sh" | "shell" => Some("bash"),
"ruby" | "rb" => Some("ruby"),
"sql" => Some("sql"),
"haskell" | "hs" => Some("haskell"),
"lua" => Some("lua"),
"scala" => Some("scala"),
"r" => Some("r"),
_ => {
tracing::debug!(
language = lang,
"Unrecognized code listing language, skipping"
);
Some("_skip")
}
}
}
fn detect_minted_language_latex(node: tree_sitter::Node, source: &str) -> Option<&'static str> {
let mut cursor = node.walk();
for child in node.children(&mut cursor) {
if child.kind() == "begin" {
let mut begin_cursor = child.walk();
let mut found_name = false;
for begin_child in child.children(&mut begin_cursor) {
if begin_child.kind() == "curly_group_text" {
if !found_name {
found_name = true;
continue;
}
let text = source[begin_child.byte_range()].trim();
let lang = text
.strip_prefix('{')
.and_then(|s| s.strip_suffix('}'))
.unwrap_or(text)
.trim();
if !lang.is_empty() {
tracing::debug!(language = lang, "Minted environment language detected");
return map_code_language_latex(lang);
}
}
}
}
}
Some("_skip")
}
fn detect_listing_language_latex(node: tree_sitter::Node, source: &str) -> Option<&'static str> {
let mut cursor = node.walk();
for child in node.children(&mut cursor) {
if child.kind() == "source_code" {
let text = &source[child.byte_range()];
let trimmed = text.trim_start();
if trimmed.starts_with('[') {
let text_lower = trimmed.to_ascii_lowercase();
if let Some(pos) = text_lower.find("language=") {
let after = &trimmed[pos + 9..];
let lang: String = after
.chars()
.take_while(|c| c.is_alphanumeric() || *c == '+')
.collect();
if !lang.is_empty() {
tracing::debug!(
language = %lang,
"Listing environment language detected"
);
return map_code_language_latex(&lang);
}
}
}
}
}
Some("_skip")
}
fn post_process_latex_latex(
name: &mut String,
_chunk_type: &mut ChunkType,
_node: tree_sitter::Node,
_source: &str,
) -> bool {
if name.starts_with('{') && name.ends_with('}') {
*name = name[1..name.len() - 1].trim().to_string();
}
if name.starts_with('\\') {
*name = name[1..].to_string();
}
!name.is_empty()
}
static LANG_LATEX: LanguageDef = LanguageDef {
name: "latex",
grammar: Some(|| tree_sitter_latex::LANGUAGE.into()),
extensions: &["tex", "sty", "cls"],
chunk_query: include_str!("queries/latex.chunks.scm"),
signature_style: SignatureStyle::FirstLine,
doc_nodes: &["comment"],
stopwords: &[
"begin",
"end",
"documentclass",
"usepackage",
"input",
"include",
"label",
"ref",
"cite",
"bibliography",
"maketitle",
"tableofcontents",
"textbf",
"textit",
"emph",
"item",
"hline",
"vspace",
"hspace",
"newline",
"newpage",
"par",
],
post_process_chunk: Some(post_process_latex_latex),
injections: &[
InjectionRule {
container_kind: "minted_environment",
content_kind: "source_code",
target_language: "python", detect_language: Some(detect_minted_language_latex),
content_scoped_lines: false,
},
InjectionRule {
container_kind: "listing_environment",
content_kind: "source_code",
target_language: "c", detect_language: Some(detect_listing_language_latex),
content_scoped_lines: false,
},
],
..DEFAULTS
};
pub fn definition_latex() -> &'static LanguageDef {
&LANG_LATEX
}
fn is_upper_snake_case_lua(name: &str) -> bool {
!name.is_empty()
&& name
.bytes()
.all(|b| b.is_ascii_uppercase() || b.is_ascii_digit() || b == b'_')
&& name.bytes().any(|b| b.is_ascii_uppercase())
}
fn is_inside_function_lua(node: tree_sitter::Node) -> bool {
let mut cursor = node.parent();
while let Some(parent) = cursor {
match parent.kind() {
"function_declaration" | "function_definition" => return true,
_ => {}
}
cursor = parent.parent();
}
false
}
#[allow(clippy::ptr_arg)] fn post_process_lua_lua(
name: &mut String,
chunk_type: &mut ChunkType,
node: tree_sitter::Node,
_source: &str,
) -> bool {
if *chunk_type == ChunkType::Constant {
if node.kind() == "assignment_statement" {
if let Some(parent) = node.parent() {
if parent.kind() == "variable_declaration" {
return false;
}
}
}
if is_inside_function_lua(node) {
return false;
}
if has_function_value_lua(node) {
return false;
}
if !is_upper_snake_case_lua(name) {
*chunk_type = ChunkType::Variable;
}
return true;
}
true
}
fn has_function_value_lua(node: tree_sitter::Node) -> bool {
let mut cursor = node.walk();
if !cursor.goto_first_child() {
return false;
}
loop {
let child = cursor.node();
if child.kind() == "expression_list" || child.kind() == "assignment_statement" {
if has_function_value_lua(child) {
return true;
}
}
if child.kind() == "function_definition" {
return true;
}
if !cursor.goto_next_sibling() {
break;
}
}
false
}
fn extract_return_lua(_signature: &str) -> Option<String> {
None
}
static LANG_LUA: LanguageDef = LanguageDef {
name: "lua",
grammar: Some(|| tree_sitter_lua::LANGUAGE.into()),
extensions: &["lua"],
chunk_query: include_str!("queries/lua.chunks.scm"),
call_query: Some(include_str!("queries/lua.calls.scm")),
doc_nodes: &["comment"],
stopwords: &[
"function",
"end",
"local",
"return",
"if",
"then",
"else",
"elseif",
"for",
"do",
"while",
"repeat",
"until",
"break",
"in",
"and",
"or",
"not",
"nil",
"true",
"false",
"self",
"require",
"module",
"print",
"pairs",
"ipairs",
"table",
"string",
"math",
"io",
"os",
"type",
"tostring",
"tonumber",
"error",
"pcall",
"xpcall",
"setmetatable",
"getmetatable",
],
extract_return_nl: extract_return_lua,
post_process_chunk: Some(post_process_lua_lua as PostProcessChunkFn),
test_path_patterns: &["%/tests/%", "%/test/%", "%_test.lua", "%_spec.lua"],
doc_format: "lua_ldoc",
doc_convention: "Use LDoc format: @param, @return tags.",
field_style: FieldStyle::NameFirst {
separators: "=",
strip_prefixes: "local",
},
..DEFAULTS
};
pub fn definition_lua() -> &'static LanguageDef {
&LANG_LUA
}
static LANG_MAKE: LanguageDef = LanguageDef {
name: "make",
grammar: Some(|| tree_sitter_make::LANGUAGE.into()),
extensions: &["mk", "mak"],
chunk_query: include_str!("queries/make.chunks.scm"),
signature_style: SignatureStyle::FirstLine,
doc_nodes: &["comment"],
stopwords: &[
"all",
"clean",
"install",
"uninstall",
"dist",
"distclean",
"check",
"test",
"phony",
"default",
"ifdef",
"ifndef",
"ifeq",
"ifneq",
"else",
"endif",
"include",
"override",
"export",
"unexport",
"define",
"endef",
"wildcard",
"patsubst",
"subst",
"filter",
"sort",
"word",
"words",
"foreach",
"call",
"eval",
"origin",
"shell",
"info",
"warning",
"error",
],
entry_point_names: &["all", "default"],
injections: &[InjectionRule {
container_kind: "recipe",
content_kind: "shell_text",
target_language: "bash",
detect_language: None,
content_scoped_lines: false,
}],
..DEFAULTS
};
pub fn definition_make() -> &'static LanguageDef {
&LANG_MAKE
}
static LANG_MARKDOWN: LanguageDef = LanguageDef {
name: "markdown",
grammar: None, extensions: &["md", "mdx"],
signature_style: SignatureStyle::Breadcrumb,
stopwords: &[
"the",
"and",
"for",
"with",
"that",
"this",
"from",
"are",
"was",
"will",
"can",
"has",
"have",
"been",
"being",
"also",
"such",
"each",
"when",
"which",
"would",
"about",
"into",
"over",
"after",
"before",
"more",
"than",
"then",
"only",
"very",
"just",
"may",
"must",
"should",
"could",
"does",
"did",
"had",
"not",
"but",
"all",
"any",
"both",
"its",
"our",
"their",
"there",
"here",
"where",
"what",
"how",
"who",
"see",
"use",
"used",
"using",
"following",
"example",
"note",
"important",
"below",
"above",
"refer",
"section",
"page",
"chapter",
"figure",
"table",
],
..DEFAULTS
};
pub fn definition_markdown() -> &'static LanguageDef {
&LANG_MARKDOWN
}
const SHELL_CONTEXTS_NIX: &[&str] = &[
"buildPhase",
"installPhase",
"configurePhase",
"checkPhase",
"unpackPhase",
"patchPhase",
"fixupPhase",
"distPhase",
"shellHook",
"preBuild",
"postBuild",
"preInstall",
"postInstall",
"preCheck",
"postCheck",
"preConfigure",
"postConfigure",
"preUnpack",
"postUnpack",
"prePatch",
"postPatch",
"preFixup",
"postFixup",
"script",
"buildCommand",
"installCommand",
];
fn detect_nix_shell_context_nix(node: tree_sitter::Node, source: &str) -> Option<&'static str> {
let parent = match node.parent() {
Some(p) if p.kind() == "binding" => p,
_ => {
tracing::debug!("Nix indented string not in binding context, skipping injection");
return Some("_skip");
}
};
let mut cursor = parent.walk();
for child in parent.children(&mut cursor) {
if child.kind() == "attrpath" {
let mut inner_cursor = child.walk();
let mut last_ident = None;
for attr_child in child.children(&mut inner_cursor) {
if attr_child.kind() == "identifier" {
last_ident = Some(&source[attr_child.byte_range()]);
}
}
if let Some(ident) = last_ident {
if SHELL_CONTEXTS_NIX.contains(&ident) {
tracing::debug!(
binding = ident,
"Nix shell context detected, injecting bash"
);
return None; }
tracing::debug!(binding = ident, "Nix binding not a shell context, skipping");
return Some("_skip");
}
}
}
Some("_skip")
}
static LANG_NIX: LanguageDef = LanguageDef {
name: "nix",
grammar: Some(|| tree_sitter_nix::LANGUAGE.into()),
extensions: &["nix"],
chunk_query: include_str!("queries/nix.chunks.scm"),
call_query: Some(include_str!("queries/nix.calls.scm")),
doc_nodes: &["comment"],
stopwords: &[
"true", "false", "null", "if", "then", "else", "let", "in", "with", "rec", "inherit",
"import", "assert", "builtins", "throw", "abort",
],
injections: &[
InjectionRule {
container_kind: "indented_string_expression",
content_kind: "string_fragment",
target_language: "bash",
detect_language: Some(detect_nix_shell_context_nix),
content_scoped_lines: false,
},
],
..DEFAULTS
};
pub fn definition_nix() -> &'static LanguageDef {
&LANG_NIX
}
fn extract_return_objc(_signature: &str) -> Option<String> {
None
}
fn post_process_objc_objc(
_name: &mut String,
chunk_type: &mut ChunkType,
node: tree_sitter::Node,
_source: &str,
) -> bool {
match node.kind() {
"class_interface" | "class_implementation" => {
if node.child_by_field_name("category").is_some() {
*chunk_type = ChunkType::Extension;
}
}
_ => {}
}
if matches!(*chunk_type, ChunkType::Function | ChunkType::Method) && _name.starts_with("test") {
*chunk_type = ChunkType::Test;
}
if matches!(*chunk_type, ChunkType::Function | ChunkType::Method) && _name.starts_with("init") {
*chunk_type = ChunkType::Constructor;
}
true
}
static LANG_OBJC: LanguageDef = LanguageDef {
name: "objc",
grammar: Some(|| tree_sitter_objc::LANGUAGE.into()),
extensions: &["m", "mm"],
chunk_query: include_str!("queries/objc.chunks.scm"),
call_query: Some(include_str!("queries/objc.calls.scm")),
doc_nodes: &["comment"],
method_containers: &[
"class_interface",
"implementation_definition",
"protocol_declaration",
],
stopwords: &[
"self",
"super",
"nil",
"NULL",
"YES",
"NO",
"true",
"false",
"if",
"else",
"for",
"while",
"do",
"switch",
"case",
"break",
"continue",
"return",
"void",
"int",
"float",
"double",
"char",
"long",
"short",
"unsigned",
"signed",
"static",
"extern",
"const",
"typedef",
"struct",
"enum",
"union",
"id",
"Class",
"SEL",
"IMP",
"BOOL",
"NSObject",
"NSString",
"NSInteger",
"NSUInteger",
"CGFloat",
"nonatomic",
"strong",
"weak",
"copy",
"assign",
"readonly",
"readwrite",
"atomic",
"property",
"synthesize",
"dynamic",
"interface",
"implementation",
"protocol",
"end",
"optional",
"required",
"import",
"include",
],
extract_return_nl: extract_return_objc,
test_file_suggestion: Some(|stem, parent| format!("{parent}/{stem}Tests.m")),
container_body_kinds: &["implementation_definition"],
post_process_chunk: Some(post_process_objc_objc),
test_markers: &["- (void)test"],
test_path_patterns: &["%/Tests/%", "%Tests.m"],
entry_point_names: &["main"],
trait_method_names: &[
"init",
"dealloc",
"description",
"hash",
"isEqual",
"copyWithZone",
"encodeWithCoder",
"initWithCoder",
],
doc_format: "javadoc",
doc_convention: "Use Doxygen format: @param, @return, @throws tags.",
skip_line_prefixes: &["@interface", "@implementation", "@protocol"],
..DEFAULTS
};
pub fn definition_objc() -> &'static LanguageDef {
&LANG_OBJC
}
fn post_process_ocaml_ocaml(
_name: &mut String,
chunk_type: &mut ChunkType,
node: tree_sitter::Node,
source: &str,
) -> bool {
match node.kind() {
"value_definition" => *chunk_type = ChunkType::Function,
"type_definition" => {
let text = node.utf8_text(source.as_bytes()).unwrap_or("");
if let Some(eq_pos) = text.find('=') {
let after_eq = &text[eq_pos + 1..];
if after_eq.contains('|') {
*chunk_type = ChunkType::Enum;
} else if after_eq.contains('{') {
*chunk_type = ChunkType::Struct;
} else {
*chunk_type = ChunkType::TypeAlias;
}
} else {
*chunk_type = ChunkType::TypeAlias;
}
}
"module_definition" => *chunk_type = ChunkType::Module,
"external" => *chunk_type = ChunkType::Extern,
_ => {}
}
true
}
fn extract_return_ocaml(signature: &str) -> Option<String> {
let trimmed = signature.trim();
if trimmed.starts_with("val ") {
let type_part = trimmed.split_once(':')?.1.trim();
let ret = if type_part.contains("->") {
type_part.rsplit("->").next()?.trim()
} else {
type_part
};
if ret.is_empty() {
return None;
}
let words = crate::nl::tokenize_identifier(ret).join(" ");
return Some(format!("Returns {}", words.to_lowercase()));
}
None
}
static LANG_OCAML: LanguageDef = LanguageDef {
name: "ocaml",
grammar: Some(|| tree_sitter_ocaml::LANGUAGE_OCAML.into()),
extensions: &["ml", "mli"],
chunk_query: include_str!("queries/ocaml.chunks.scm"),
call_query: Some(include_str!("queries/ocaml.calls.scm")),
signature_style: SignatureStyle::FirstLine,
doc_nodes: &["comment"],
stopwords: &[
"let",
"in",
"val",
"type",
"module",
"struct",
"sig",
"end",
"fun",
"function",
"match",
"with",
"when",
"if",
"then",
"else",
"begin",
"do",
"done",
"for",
"to",
"downto",
"while",
"open",
"include",
"rec",
"and",
"of",
"mutable",
"ref",
"try",
"raise",
"exception",
"external",
"true",
"false",
"unit",
"int",
"float",
"string",
"bool",
"char",
"list",
"option",
"array",
"Some",
"None",
"Ok",
"Error",
"failwith",
"Printf",
"Scanf",
"List",
"Array",
"Map",
"Set",
"Hashtbl",
"Buffer",
"String",
],
extract_return_nl: extract_return_ocaml,
test_file_suggestion: Some(|stem, _parent| format!("test/test_{stem}.ml")),
common_types: &[
"int", "float", "string", "bool", "char", "unit", "list", "option", "array", "ref",
],
container_body_kinds: &["structure"],
post_process_chunk: Some(post_process_ocaml_ocaml as PostProcessChunkFn),
test_markers: &["let%test", "let%expect_test", "let test_"],
test_path_patterns: &["%/test/%", "%_test.ml"],
entry_point_names: &["main"],
trait_method_names: &[
"compare",
"equal",
"hash",
"pp",
"show",
"to_string",
"of_string",
],
doc_format: "ocaml_doc",
doc_convention: "Use OCamldoc format with (** *) comments.",
skip_line_prefixes: &["type "],
..DEFAULTS
};
pub fn definition_ocaml() -> &'static LanguageDef {
&LANG_OCAML
}
fn post_process_perl_perl(
name: &mut String,
chunk_type: &mut ChunkType,
node: tree_sitter::Node,
source: &str,
) -> bool {
match node.kind() {
"function_definition" => *chunk_type = ChunkType::Function,
"package_statement" => {
*chunk_type = ChunkType::Module;
let text = node.utf8_text(source.as_bytes()).unwrap_or("");
let text = text.trim();
if let Some(rest) = text.strip_prefix("package") {
let rest = rest.trim();
let pkg_name: String = rest
.chars()
.take_while(|c| *c != ';' && *c != '{' && !c.is_whitespace())
.collect();
if !pkg_name.is_empty() {
*name = pkg_name;
}
}
}
_ => {}
}
if matches!(*chunk_type, ChunkType::Function | ChunkType::Method) && name == "new" {
*chunk_type = ChunkType::Constructor;
}
if matches!(*chunk_type, ChunkType::Function | ChunkType::Method) && name.starts_with("test_") {
*chunk_type = ChunkType::Test;
}
true
}
fn extract_return_perl(_signature: &str) -> Option<String> {
None
}
static LANG_PERL: LanguageDef = LanguageDef {
name: "perl",
grammar: Some(|| tree_sitter_perl::LANGUAGE.into()),
extensions: &["pl", "pm"],
chunk_query: include_str!("queries/perl.chunks.scm"),
call_query: Some(include_str!("queries/perl.calls.scm")),
signature_style: SignatureStyle::FirstLine,
doc_nodes: &["comments", "pod"],
stopwords: &[
"sub", "my", "our", "local", "use", "require", "package", "return", "if", "elsif", "else",
"unless", "while", "until", "for", "foreach", "do", "eval", "die", "warn", "print", "say",
"chomp", "chop", "push", "pop", "shift", "unshift", "splice", "join", "split", "map",
"grep", "sort", "keys", "values", "each", "exists", "delete", "defined", "ref", "bless",
"new", "BEGIN", "END", "AUTOLOAD", "DESTROY", "open", "close", "read", "write", "seek",
"tell", "Carp", "Exporter", "Scalar", "List", "File", "IO", "POSIX", "Data", "Dumper",
"strict", "warnings", "utf8", "Encode", "Getopt", "Test", "More",
],
extract_return_nl: extract_return_perl,
test_file_suggestion: Some(|stem, _parent| format!("t/{stem}.t")),
post_process_chunk: Some(post_process_perl_perl as PostProcessChunkFn),
test_path_patterns: &["%/t/%", "%.t"],
entry_point_names: &["main"],
trait_method_names: &["new", "AUTOLOAD", "DESTROY", "import", "BEGIN", "END"],
doc_format: "hash_comment",
doc_convention: "Use POD format for documentation sections.",
field_style: FieldStyle::NameFirst {
separators: "=",
strip_prefixes: "my our local",
},
..DEFAULTS
};
pub fn definition_perl() -> &'static LanguageDef {
&LANG_PERL
}
fn post_process_php_php(
name: &mut String,
chunk_type: &mut ChunkType,
_node: tree_sitter::Node,
_source: &str,
) -> bool {
if let Some(stripped) = name.strip_prefix('$') {
*name = stripped.to_string();
}
if *chunk_type == ChunkType::Method && name == "__construct" {
*chunk_type = ChunkType::Constructor;
}
if matches!(*chunk_type, ChunkType::Function | ChunkType::Method) && name.starts_with("test") {
*chunk_type = ChunkType::Test;
}
true
}
fn extract_return_php(signature: &str) -> Option<String> {
let paren_pos = signature.rfind(')')?;
let after_paren = &signature[paren_pos + 1..];
let colon_pos = after_paren.find(':')?;
let end_pos = after_paren.find('{').unwrap_or(after_paren.len());
if colon_pos + 1 >= end_pos {
return None;
}
let ret_type = after_paren[colon_pos + 1..end_pos].trim();
if ret_type.is_empty() || ret_type == "void" || ret_type == "mixed" {
return None;
}
let ret_type = ret_type.strip_prefix('?').unwrap_or(ret_type);
let ret_words = crate::nl::tokenize_identifier(ret_type).join(" ");
Some(format!("Returns {}", ret_words))
}
static LANG_PHP: LanguageDef = LanguageDef {
name: "php",
grammar: Some(|| tree_sitter_php::LANGUAGE_PHP.into()),
extensions: &["php"],
chunk_query: include_str!("queries/php.chunks.scm"),
call_query: Some(include_str!("queries/php.calls.scm")),
doc_nodes: &["comment"],
method_containers: &["declaration_list"],
stopwords: &[
"function",
"class",
"interface",
"trait",
"enum",
"namespace",
"use",
"extends",
"implements",
"abstract",
"final",
"static",
"public",
"protected",
"private",
"return",
"if",
"else",
"elseif",
"for",
"foreach",
"while",
"do",
"switch",
"case",
"break",
"continue",
"new",
"try",
"catch",
"finally",
"throw",
"echo",
"print",
"var",
"const",
"true",
"false",
"null",
"self",
"parent",
"this",
"array",
"string",
"int",
"float",
"bool",
"void",
"mixed",
"never",
"callable",
"iterable",
"object",
"isset",
"unset",
"empty",
],
extract_return_nl: extract_return_php,
test_file_suggestion: Some(|stem, parent| format!("{parent}/{stem}Test.php")),
type_query: Some(include_str!("queries/php.types.scm")),
common_types: &[
"string",
"int",
"float",
"bool",
"array",
"object",
"callable",
"iterable",
"void",
"null",
"mixed",
"never",
"self",
"parent",
"static",
"false",
"true",
"Closure",
"Iterator",
"Generator",
"Traversable",
"Countable",
"Throwable",
"Exception",
"RuntimeException",
"InvalidArgumentException",
"stdClass",
],
container_body_kinds: &["declaration_list"],
post_process_chunk: Some(post_process_php_php),
test_markers: &["@test", "function test"],
test_path_patterns: &["%/tests/%", "%/Tests/%", "%Test.php"],
trait_method_names: &[
"__construct",
"__destruct",
"__toString",
"__get",
"__set",
"__call",
"__isset",
"__unset",
"__sleep",
"__wakeup",
"__clone",
"__invoke",
],
injections: &[
InjectionRule {
container_kind: "program",
content_kind: "text",
target_language: "html",
detect_language: None,
content_scoped_lines: true,
},
InjectionRule {
container_kind: "text_interpolation",
content_kind: "text",
target_language: "html",
detect_language: None,
content_scoped_lines: true,
},
],
doc_format: "javadoc",
doc_convention: "Use PHPDoc format: @param, @return, @throws tags.",
field_style: FieldStyle::NameFirst {
separators: "=;",
strip_prefixes: "public private protected static var",
},
skip_line_prefixes: &["class ", "interface ", "trait ", "enum "],
..DEFAULTS
};
pub fn definition_php() -> &'static LanguageDef {
&LANG_PHP
}
fn extract_return_powershell(_signature: &str) -> Option<String> {
None
}
fn extract_container_name_ps_powershell(node: tree_sitter::Node, source: &str) -> Option<String> {
let mut cursor = node.walk();
for child in node.children(&mut cursor) {
if child.kind() == "simple_name" {
return Some(source[child.byte_range()].to_string());
}
}
None
}
static LANG_POWERSHELL: LanguageDef = LanguageDef {
name: "powershell",
grammar: Some(|| tree_sitter_powershell::LANGUAGE.into()),
extensions: &["ps1", "psm1"],
chunk_query: include_str!("queries/powershell.chunks.scm"),
call_query: Some(include_str!("queries/powershell.calls.scm")),
doc_nodes: &["comment"],
method_containers: &["class_statement"],
stopwords: &[
"function",
"param",
"begin",
"process",
"end",
"if",
"else",
"elseif",
"switch",
"for",
"foreach",
"while",
"do",
"until",
"try",
"catch",
"finally",
"throw",
"return",
"exit",
"break",
"continue",
"class",
"enum",
"using",
"namespace",
"hidden",
"static",
"void",
"new",
"true",
"false",
"null",
],
extract_return_nl: extract_return_powershell,
test_file_suggestion: Some(|stem, parent| format!("{parent}/{stem}.Tests.ps1")),
common_types: &[
"string",
"int",
"bool",
"object",
"void",
"double",
"float",
"long",
"byte",
"char",
"decimal",
"array",
"hashtable",
"PSObject",
"PSCustomObject",
"ScriptBlock",
"DateTime",
"TimeSpan",
"Guid",
"IPAddress",
"SecureString",
"PSCredential",
"ErrorRecord",
],
extract_container_name: Some(extract_container_name_ps_powershell),
test_markers: &["Describe ", "It ", "Context "],
test_path_patterns: &["%/Tests/%", "%/tests/%", "%.Tests.ps1"],
doc_convention: "Use comment-based help: .SYNOPSIS, .PARAMETER, .OUTPUTS sections.",
post_process_chunk: Some(post_process_powershell_powershell as PostProcessChunkFn),
..DEFAULTS
};
fn post_process_powershell_powershell(
name: &mut String,
chunk_type: &mut ChunkType,
_node: tree_sitter::Node,
_source: &str,
) -> bool {
if matches!(*chunk_type, ChunkType::Function | ChunkType::Method)
&& (name == "Describe" || name == "It" || name == "Context" || name.starts_with("Test"))
{
*chunk_type = ChunkType::Test;
}
true
}
pub fn definition_powershell() -> &'static LanguageDef {
&LANG_POWERSHELL
}
fn extract_container_name_protobuf(node: tree_sitter::Node, source: &str) -> Option<String> {
let mut cursor = node.walk();
for child in node.children(&mut cursor) {
if child.kind() == "service_name" {
return Some(source[child.byte_range()].to_string());
}
}
None
}
static LANG_PROTOBUF: LanguageDef = LanguageDef {
name: "protobuf",
grammar: Some(|| tree_sitter_proto::LANGUAGE.into()),
extensions: &["proto"],
chunk_query: include_str!("queries/protobuf.chunks.scm"),
call_query: Some(include_str!("queries/protobuf.calls.scm")),
doc_nodes: &["comment"],
method_containers: &["service"],
stopwords: &[
"syntax", "package", "import", "option", "message", "service", "rpc", "enum", "oneof",
"map", "repeated", "optional", "required", "reserved", "returns", "stream", "extend",
"true", "false", "string", "bytes", "bool", "int32", "int64", "uint32", "uint64", "sint32",
"sint64", "fixed32", "fixed64", "sfixed32", "sfixed64", "float", "double", "google",
],
extract_container_name: Some(extract_container_name_protobuf),
field_style: FieldStyle::NameFirst {
separators: " ",
strip_prefixes: "optional repeated required",
},
skip_line_prefixes: &["message ", "enum ", "service "],
..DEFAULTS
};
pub fn definition_protobuf() -> &'static LanguageDef {
&LANG_PROTOBUF
}
fn is_upper_snake_case_python(name: &str) -> bool {
!name.is_empty()
&& name
.bytes()
.all(|b| b.is_ascii_uppercase() || b.is_ascii_digit() || b == b'_')
&& name.bytes().any(|b| b.is_ascii_uppercase())
}
fn is_inside_function_python(node: tree_sitter::Node) -> bool {
let mut cursor = node.parent();
while let Some(parent) = cursor {
if parent.kind() == "function_definition" {
return true;
}
cursor = parent.parent();
}
false
}
#[allow(clippy::ptr_arg)] fn post_process_python_python(
name: &mut String,
chunk_type: &mut ChunkType,
node: tree_sitter::Node,
_source: &str,
) -> bool {
if *chunk_type == ChunkType::Constant {
if is_inside_function_python(node) {
return false;
}
if is_upper_snake_case_python(name) {
return true;
}
*chunk_type = ChunkType::Variable;
return true;
}
if *chunk_type == ChunkType::Method && name == "__init__" {
*chunk_type = ChunkType::Constructor;
}
if (*chunk_type == ChunkType::Function || *chunk_type == ChunkType::Method)
&& name.starts_with("test_")
{
*chunk_type = ChunkType::Test;
}
if *chunk_type == ChunkType::Class {
let node_text = &_source[node.byte_range()];
let header = &node_text[..node_text.floor_char_boundary(200)];
if header.contains("(Enum)")
|| header.contains("(IntEnum)")
|| header.contains("(StrEnum)")
|| header.contains("(Flag)")
|| header.contains("(IntFlag)")
|| header.contains("enum.Enum")
|| header.contains("enum.IntEnum")
{
*chunk_type = ChunkType::Enum;
}
}
if *chunk_type == ChunkType::Function {
let mut parent = node.parent();
while let Some(p) = parent {
if p.kind() == "decorated_definition" {
let text = &_source[p.byte_range()];
if text.starts_with("@")
&& (text.contains(".route(")
|| text.contains(".get(")
|| text.contains(".post(")
|| text.contains(".put(")
|| text.contains(".delete(")
|| text.contains(".patch("))
{
*chunk_type = ChunkType::Endpoint;
}
break;
}
parent = p.parent();
}
}
true
}
fn extract_return_python(signature: &str) -> Option<String> {
if let Some(arrow) = signature.rfind("->") {
let ret = signature[arrow + 2..].trim().trim_end_matches(':');
if ret.is_empty() {
return None;
}
let ret_words = crate::nl::tokenize_identifier(ret).join(" ");
return Some(format!("Returns {}", ret_words));
}
None
}
static LANG_PYTHON: LanguageDef = LanguageDef {
name: "python",
grammar: Some(|| tree_sitter_python::LANGUAGE.into()),
extensions: &["py", "pyi"],
chunk_query: include_str!("queries/python.chunks.scm"),
call_query: Some(include_str!("queries/python.calls.scm")),
signature_style: SignatureStyle::UntilColon,
doc_nodes: &["string", "comment"],
method_containers: &["class_definition"],
stopwords: &[
"def", "class", "self", "return", "if", "elif", "else", "for", "while", "import", "from",
"as", "with", "try", "except", "finally", "raise", "pass", "break", "continue", "and",
"or", "not", "in", "is", "true", "false", "none", "lambda", "yield", "global", "nonlocal",
],
extract_return_nl: extract_return_python,
test_file_suggestion: Some(|stem, parent| format!("{parent}/test_{stem}.py")),
type_query: Some(include_str!("queries/python.types.scm")),
common_types: &[
"str",
"int",
"float",
"bool",
"list",
"dict",
"set",
"tuple",
"None",
"Any",
"Optional",
"Union",
"List",
"Dict",
"Set",
"Tuple",
"Type",
"Callable",
"Iterator",
"Generator",
"Coroutine",
"Exception",
"ValueError",
"TypeError",
"KeyError",
"IndexError",
"Path",
"Self",
],
post_process_chunk: Some(post_process_python_python as PostProcessChunkFn),
test_markers: &["def test_", "pytest"],
test_path_patterns: &["%/tests/%", "%\\_test.py", "%/test\\_%"],
entry_point_names: &["__init__", "setup", "teardown"],
trait_method_names: &[
"__str__",
"__repr__",
"__eq__",
"__ne__",
"__lt__",
"__le__",
"__gt__",
"__ge__",
"__hash__",
"__bool__",
"__len__",
"__iter__",
"__next__",
"__contains__",
"__getitem__",
"__setitem__",
"__delitem__",
"__call__",
"__enter__",
"__exit__",
"__del__",
"__new__",
"__init_subclass__",
"__class_getitem__",
],
doc_format: "python_docstring",
doc_convention: "Format as a Google-style docstring (Args/Returns/Raises sections).",
field_style: FieldStyle::NameFirst {
separators: ":=",
strip_prefixes: "",
},
skip_line_prefixes: &["class ", "@property", "def "],
..DEFAULTS
};
pub fn definition_python() -> &'static LanguageDef {
&LANG_PYTHON
}
fn is_upper_snake_case_r(name: &str) -> bool {
!name.is_empty()
&& name
.bytes()
.all(|b| b.is_ascii_uppercase() || b.is_ascii_digit() || b == b'_')
&& name.bytes().any(|b| b.is_ascii_uppercase())
}
fn extract_return_r(_signature: &str) -> Option<String> {
None
}
fn is_inside_function_r(node: tree_sitter::Node) -> bool {
let mut cursor = node.parent();
while let Some(parent) = cursor {
if parent.kind() == "function_definition" {
return true;
}
cursor = parent.parent();
}
false
}
fn first_string_arg_r<'a>(node: tree_sitter::Node, source: &'a str) -> Option<&'a str> {
let args = node.child_by_field_name("arguments")?;
let mut cursor = args.walk();
if !cursor.goto_first_child() {
return None;
}
loop {
let child = cursor.node();
if child.kind() == "argument" {
let mut inner = child.walk();
if inner.goto_first_child() {
loop {
let ic = inner.node();
if ic.kind() == "string" {
let mut sc = ic.walk();
if sc.goto_first_child() {
loop {
if sc.node().kind() == "string_content" {
return Some(&source[sc.node().byte_range()]);
}
if !sc.goto_next_sibling() {
break;
}
}
}
return None;
}
if !inner.goto_next_sibling() {
break;
}
}
}
}
if !cursor.goto_next_sibling() {
break;
}
}
None
}
fn call_function_name_r<'a>(node: tree_sitter::Node, source: &'a str) -> Option<&'a str> {
let func = node.child_by_field_name("function")?;
if func.kind() == "identifier" {
return Some(&source[func.byte_range()]);
}
if func.kind() == "namespace_operator" {
let mut cursor = func.walk();
if cursor.goto_first_child() {
loop {
let child = cursor.node();
if child.is_named() && child.kind() == "identifier" {
let text = &source[child.byte_range()];
if !cursor.goto_next_sibling() {
return Some(text);
}
loop {
let next = cursor.node();
if next.is_named() && next.kind() == "identifier" {
return Some(&source[next.byte_range()]);
}
if !cursor.goto_next_sibling() {
break;
}
}
return Some(text);
}
if !cursor.goto_next_sibling() {
break;
}
}
}
}
None
}
const S4_CLASS_FUNCTIONS_R: &[&str] = &["setClass", "setRefClass"];
#[allow(clippy::ptr_arg)]
fn post_process_r_r(
name: &mut String,
chunk_type: &mut ChunkType,
node: tree_sitter::Node,
source: &str,
) -> bool {
match *chunk_type {
ChunkType::Class => {
if !S4_CLASS_FUNCTIONS_R.contains(&name.as_str()) {
return false;
}
if let Some(class_name) = first_string_arg_r(node, source) {
*name = class_name.to_string();
true
} else {
false
}
}
ChunkType::Constant => {
if is_inside_function_r(node) {
return false;
}
let rhs = node.child_by_field_name("rhs");
if let Some(rhs_node) = rhs {
if rhs_node.kind() == "call" {
if let Some(fn_name) = call_function_name_r(rhs_node, source) {
if fn_name == "R6Class" {
*chunk_type = ChunkType::Class;
return true;
}
}
}
}
if !is_upper_snake_case_r(name) {
*chunk_type = ChunkType::Variable;
}
true
}
_ => true,
}
}
static LANG_R: LanguageDef = LanguageDef {
name: "r",
grammar: Some(|| tree_sitter_r::LANGUAGE.into()),
extensions: &["r", "R"],
chunk_query: include_str!("queries/r.chunks.scm"),
call_query: Some(include_str!("queries/r.calls.scm")),
doc_nodes: &["comment"],
stopwords: &[
"function", "if", "else", "for", "in", "while", "repeat", "break", "next", "return",
"library", "require", "source", "TRUE", "FALSE", "NULL", "NA", "Inf", "NaN", "print",
"cat", "paste", "paste0", "sprintf", "message", "warning", "stop", "tryCatch", "c", "list",
"data", "frame", "matrix", "vector", "length", "nrow", "ncol",
],
extract_return_nl: extract_return_r,
test_file_suggestion: Some(|stem, parent| format!("{parent}/tests/testthat/test-{stem}.R")),
post_process_chunk: Some(post_process_r_r as PostProcessChunkFn),
test_markers: &["test_that", "expect_"],
test_path_patterns: &["%/tests/%", "%/testthat/%", "test-%.R", "test_%.R"],
doc_format: "r_roxygen",
doc_convention: "Use roxygen2 format: @param, @return, @export tags.",
field_style: FieldStyle::NameFirst {
separators: "=<",
strip_prefixes: "",
},
..DEFAULTS
};
pub fn definition_r() -> &'static LanguageDef {
&LANG_R
}
fn detect_razor_element_language_razor(
node: tree_sitter::Node,
source: &str,
) -> Option<&'static str> {
let text = &source[node.byte_range()];
let prefix = &text[..text.floor_char_boundary(200)];
let lower = prefix.to_ascii_lowercase();
if lower.starts_with("<script") {
if lower.contains("lang=\"ts\"") || lower.contains("type=\"text/typescript\"") {
tracing::debug!("Razor <script> detected as TypeScript");
return Some("typescript");
}
tracing::debug!("Razor <script> detected as JavaScript");
None } else if lower.starts_with("<style") {
tracing::debug!("Razor <style> detected as CSS");
Some("css")
} else {
Some("_skip") }
}
fn extract_tag_name_razor(node: tree_sitter::Node, source: &str) -> Option<String> {
let text = &source[node.byte_range()];
if !text.starts_with('<') {
return None;
}
let after_lt = &text[1..];
let name: String = after_lt
.chars()
.take_while(|c| c.is_alphanumeric() || *c == '-' || *c == '_')
.collect();
if name.is_empty() {
return None;
}
Some(name.to_lowercase())
}
fn extract_text_content_razor(node: tree_sitter::Node, source: &str) -> String {
let full = &source[node.byte_range()];
let after_open = if let Some(pos) = full.find('>') {
&full[pos + 1..]
} else {
return String::new();
};
let content = if let Some(pos) = after_open.rfind("</") {
&after_open[..pos]
} else {
after_open
};
let mut result = String::new();
let mut in_tag = false;
for ch in content.chars() {
if ch == '<' {
in_tag = true;
} else if ch == '>' {
in_tag = false;
} else if !in_tag {
result.push(ch);
}
}
result.trim().to_string()
}
fn extract_attribute_from_text_razor(text: &str, attr_name: &str) -> Option<String> {
let lower = text.to_ascii_lowercase();
let pattern = format!("{}=\"", attr_name);
if let Some(pos) = lower.find(&pattern) {
let after = &text[pos + pattern.len()..];
if let Some(end) = after.find('"') {
let value = &after[..end];
if !value.is_empty() {
return Some(value.to_string());
}
}
}
None
}
const HEADING_TAGS_RAZOR: &[&str] = &["h1", "h2", "h3", "h4", "h5", "h6"];
const LANDMARK_TAGS_RAZOR: &[&str] = &["header", "nav", "main", "footer", "aside", "article"];
fn post_process_razor_razor(
name: &mut String,
chunk_type: &mut ChunkType,
node: tree_sitter::Node,
source: &str,
) -> bool {
match node.kind() {
"razor_block" => {
let text = &source[node.byte_range()];
if text.starts_with("@code") {
*name = "code".to_string();
} else if text.starts_with("@functions") {
*name = "functions".to_string();
} else {
tracing::debug!("Skipping anonymous razor block");
return false;
}
*chunk_type = ChunkType::Module;
true
}
"element" => {
let tag = match extract_tag_name_razor(node, source) {
Some(t) => t,
None => return false,
};
if HEADING_TAGS_RAZOR.contains(&tag.as_str()) {
let text = extract_text_content_razor(node, source);
if text.is_empty() {
return false;
}
*name = text;
*chunk_type = ChunkType::Section;
tracing::debug!(tag = %tag, name = %name, "Razor heading element");
true
} else if LANDMARK_TAGS_RAZOR.contains(&tag.as_str()) {
let text = &source[node.byte_range()];
let label = extract_attribute_from_text_razor(text, "id")
.or_else(|| extract_attribute_from_text_razor(text, "aria-label"));
*name = label.unwrap_or_else(|| tag.clone());
*chunk_type = ChunkType::Section;
tracing::debug!(tag = %tag, name = %name, "Razor landmark element");
true
} else {
false
}
}
"constructor_declaration"
if matches!(*chunk_type, ChunkType::Function | ChunkType::Method) =>
{
*chunk_type = ChunkType::Constructor;
true
}
_ => true, }
}
fn extract_return_razor(signature: &str) -> Option<String> {
if let Some(paren) = signature.find('(') {
let before = signature[..paren].trim();
let words: Vec<&str> = before.split_whitespace().collect();
if words.len() >= 2 {
let ret_type = words[words.len() - 2];
if !matches!(
ret_type,
"void"
| "public"
| "private"
| "protected"
| "internal"
| "static"
| "abstract"
| "virtual"
| "override"
| "sealed"
| "async"
| "extern"
| "partial"
| "new"
| "unsafe"
) {
let ret_words = crate::nl::tokenize_identifier(ret_type).join(" ");
return Some(format!("Returns {}", ret_words));
}
}
}
None
}
static LANG_RAZOR: LanguageDef = LanguageDef {
name: "razor",
grammar: Some(|| tree_sitter_razor::LANGUAGE.into()),
extensions: &["cshtml", "razor"],
chunk_query: include_str!("queries/razor.chunks.scm"),
call_query: Some(include_str!("queries/razor.calls.scm")),
doc_nodes: &["comment", "razor_comment"],
method_containers: &[
"class_declaration",
"struct_declaration",
"record_declaration",
"interface_declaration",
"declaration_list",
"razor_block",
],
stopwords: &[
"public",
"private",
"protected",
"internal",
"static",
"readonly",
"sealed",
"abstract",
"virtual",
"override",
"async",
"await",
"class",
"struct",
"interface",
"enum",
"namespace",
"using",
"return",
"if",
"else",
"for",
"foreach",
"while",
"do",
"switch",
"case",
"break",
"continue",
"new",
"this",
"base",
"try",
"catch",
"finally",
"throw",
"var",
"void",
"int",
"string",
"bool",
"true",
"false",
"null",
"get",
"set",
"value",
"where",
"partial",
"event",
"delegate",
"record",
"yield",
"in",
"out",
"ref",
"page",
"model",
"inject",
"code",
"functions",
"rendermode",
"attribute",
"layout",
"inherits",
"implements",
"preservewhitespace",
"typeparam",
"section",
],
extract_return_nl: extract_return_razor,
type_query: Some(include_str!("queries/razor.types.scm")),
common_types: &[
"string",
"int",
"bool",
"object",
"void",
"double",
"float",
"long",
"byte",
"char",
"decimal",
"short",
"uint",
"ulong",
"Task",
"ValueTask",
"List",
"Dictionary",
"HashSet",
"Queue",
"Stack",
"IEnumerable",
"IList",
"IDictionary",
"ICollection",
"IQueryable",
"Action",
"Func",
"Predicate",
"EventHandler",
"EventArgs",
"IDisposable",
"CancellationToken",
"ILogger",
"StringBuilder",
"Exception",
"Nullable",
"Span",
"Memory",
"ReadOnlySpan",
"IServiceProvider",
"HttpContext",
"IConfiguration",
],
container_body_kinds: &["declaration_list"],
post_process_chunk: Some(post_process_razor_razor),
test_markers: &["[Test]", "[Fact]", "[Theory]", "[TestMethod]"],
entry_point_names: &["Main", "OnInitializedAsync", "OnParametersSetAsync"],
trait_method_names: &[
"Equals",
"GetHashCode",
"ToString",
"Dispose",
"OnInitialized",
"OnParametersSet",
"OnAfterRender",
"SetParametersAsync",
],
injections: &[
InjectionRule {
container_kind: "element",
content_kind: "_inner",
target_language: "javascript",
detect_language: Some(detect_razor_element_language_razor),
content_scoped_lines: false,
},
],
..DEFAULTS
};
pub fn definition_razor() -> &'static LanguageDef {
&LANG_RAZOR
}
static LANG_RUBY: LanguageDef = LanguageDef {
name: "ruby",
grammar: Some(|| tree_sitter_ruby::LANGUAGE.into()),
extensions: &["rb", "rake", "gemspec"],
chunk_query: include_str!("queries/ruby.chunks.scm"),
call_query: Some(include_str!("queries/ruby.calls.scm")),
signature_style: SignatureStyle::FirstLine,
doc_nodes: &["comment"],
method_node_kinds: &["singleton_method"],
method_containers: &["class", "module"],
stopwords: &[
"def",
"class",
"module",
"end",
"if",
"elsif",
"else",
"unless",
"case",
"when",
"for",
"while",
"until",
"do",
"begin",
"rescue",
"ensure",
"raise",
"return",
"yield",
"self",
"super",
"true",
"false",
"nil",
"and",
"or",
"not",
"in",
"include",
"extend",
"prepend",
"require",
"private",
"protected",
"public",
"attr_accessor",
"attr_reader",
"attr_writer",
],
test_file_suggestion: Some(|stem, parent| format!("{parent}/spec/{stem}_spec.rb")),
test_markers: &["describe ", "it ", "context "],
test_path_patterns: &["%/spec/%", "%/test/%", "%\\_spec.rb", "%\\_test.rb"],
trait_method_names: &[
"to_s",
"to_i",
"to_f",
"to_a",
"to_h",
"inspect",
"hash",
"eql?",
"==",
"<=>",
"each",
"initialize",
],
doc_format: "hash_comment",
doc_convention: "Use YARD format: @param, @return, @raise tags.",
field_style: FieldStyle::NameFirst {
separators: "=",
strip_prefixes: "attr_accessor attr_reader attr_writer",
},
skip_line_prefixes: &["class ", "module "],
post_process_chunk: Some(post_process_ruby_ruby as PostProcessChunkFn),
..DEFAULTS
};
fn post_process_ruby_ruby(
name: &mut String,
chunk_type: &mut ChunkType,
_node: tree_sitter::Node,
_source: &str,
) -> bool {
if matches!(*chunk_type, ChunkType::Function | ChunkType::Method) && name == "initialize" {
*chunk_type = ChunkType::Constructor;
}
if matches!(*chunk_type, ChunkType::Function | ChunkType::Method) && name.starts_with("test_") {
*chunk_type = ChunkType::Test;
}
true
}
pub fn definition_ruby() -> &'static LanguageDef {
&LANG_RUBY
}
fn extract_return_rust(signature: &str) -> Option<String> {
if let Some(arrow) = signature.find("->") {
let ret = signature[arrow + 2..].trim();
if ret.is_empty() {
return None;
}
let ret_words = crate::nl::tokenize_identifier(ret).join(" ");
return Some(format!("Returns {}", ret_words));
}
None
}
fn extract_container_name_rust_rust(container: tree_sitter::Node, source: &str) -> Option<String> {
if container.kind() == "impl_item" {
container.child_by_field_name("type").and_then(|t| {
if t.kind() == "type_identifier" {
Some(source[t.byte_range()].to_string())
} else {
let mut cursor = t.walk();
for child in t.children(&mut cursor) {
if child.kind() == "type_identifier" {
return Some(source[child.byte_range()].to_string());
}
}
None
}
})
} else {
container
.child_by_field_name("name")
.map(|n| source[n.byte_range()].to_string())
}
}
fn post_process_rust_rust(
name: &mut String,
chunk_type: &mut ChunkType,
node: tree_sitter::Node,
source: &str,
) -> bool {
if *chunk_type == ChunkType::Method && name == "new" {
*chunk_type = ChunkType::Constructor;
}
if *chunk_type == ChunkType::Function || *chunk_type == ChunkType::Method {
let mut sibling = node.prev_sibling();
while let Some(sib) = sibling {
if sib.kind() == "attribute_item" {
let attr_text = &source[sib.byte_range()];
if attr_text.contains("#[test]") || attr_text.contains("#[tokio::test]") {
*chunk_type = ChunkType::Test;
break;
}
} else if sib.kind() != "line_comment" && sib.kind() != "block_comment" {
break; }
sibling = sib.prev_sibling();
}
}
if *chunk_type == ChunkType::Constant && node.kind() == "static_item" {
let text = &source[node.byte_range()];
if text.contains("static mut ") {
*chunk_type = ChunkType::Variable;
}
}
if *chunk_type == ChunkType::Function {
let mut parent = node.parent();
while let Some(p) = parent {
if p.kind() == "extern_block" || p.kind() == "foreign_mod_item" {
*chunk_type = ChunkType::Extern;
break;
}
parent = p.parent();
}
}
true
}
static LANG_RUST: LanguageDef = LanguageDef {
name: "rust",
grammar: Some(|| tree_sitter_rust::LANGUAGE.into()),
extensions: &["rs"],
chunk_query: include_str!("queries/rust.chunks.scm"),
call_query: Some(include_str!("queries/rust.calls.scm")),
doc_nodes: &["line_comment", "block_comment"],
method_containers: &["impl_item", "trait_item"],
stopwords: &[
"fn", "let", "mut", "pub", "use", "impl", "mod", "struct", "enum", "trait", "type",
"where", "const", "static", "unsafe", "async", "await", "move", "ref", "self", "super",
"crate", "return", "if", "else", "for", "while", "loop", "match", "break", "continue",
"as", "in", "true", "false", "some", "none", "ok", "err",
],
extract_return_nl: extract_return_rust,
test_file_suggestion: Some(|stem, parent| format!("{parent}/tests/{stem}_test.rs")),
type_query: Some(include_str!("queries/rust.types.scm")),
common_types: &[
"String",
"Vec",
"Result",
"Option",
"Box",
"Arc",
"Rc",
"HashMap",
"HashSet",
"BTreeMap",
"BTreeSet",
"Path",
"PathBuf",
"Value",
"Error",
"Self",
"None",
"Some",
"Ok",
"Err",
"Mutex",
"RwLock",
"Cow",
"Pin",
"Future",
"Iterator",
"Display",
"Debug",
"Clone",
"Default",
"Send",
"Sync",
"Sized",
"Copy",
"From",
"Into",
"AsRef",
"AsMut",
"Deref",
"DerefMut",
"Read",
"Write",
"Seek",
"BufRead",
"ToString",
"Serialize",
"Deserialize",
],
extract_container_name: Some(extract_container_name_rust_rust),
post_process_chunk: Some(post_process_rust_rust as PostProcessChunkFn),
test_markers: &["#[test]", "#[cfg(test)]"],
test_path_patterns: &["%/tests/%", "%\\_test.rs"],
entry_point_names: &["main"],
trait_method_names: &[
"fmt",
"from",
"into",
"try_from",
"try_into",
"deref",
"deref_mut",
"drop",
"index",
"index_mut",
"add",
"sub",
"mul",
"div",
"rem",
"neg",
"not",
"bitor",
"bitand",
"bitxor",
"shl",
"shr",
"eq",
"ne",
"partial_cmp",
"cmp",
"hash",
"clone",
"clone_from",
"default",
"next",
"into_iter",
"read",
"write",
"flush",
"from_str",
"as_ref",
"as_mut",
"borrow",
"borrow_mut",
"serialize",
"deserialize",
"source",
"poll",
],
doc_format: "triple_slash",
doc_convention:
"Use `# Arguments`, `# Returns`, `# Errors`, `# Panics` sections as appropriate.",
field_style: FieldStyle::NameFirst {
separators: ":",
strip_prefixes: "pub pub(crate) pub(super)",
},
skip_line_prefixes: &[
"pub struct",
"pub enum",
"pub union",
"struct",
"enum",
"union",
],
..DEFAULTS
};
pub fn definition_rust() -> &'static LanguageDef {
&LANG_RUST
}
#[allow(clippy::ptr_arg)]
fn post_process_scala_scala(
name: &mut String,
chunk_type: &mut ChunkType,
node: tree_sitter::Node,
source: &str,
) -> bool {
if matches!(*chunk_type, ChunkType::Function | ChunkType::Method) {
let node_text = &source[node.byte_range()];
let header = &node_text[..node_text.floor_char_boundary(200)];
if header.contains("@Test") || name.starts_with("test") {
*chunk_type = ChunkType::Test;
}
}
if *chunk_type == ChunkType::Constant {
let node_text = &source[node.byte_range()];
if node_text.trim_start().starts_with("var ") {
*chunk_type = ChunkType::Variable;
}
}
true
}
fn extract_return_scala(signature: &str) -> Option<String> {
let paren_pos = signature.rfind(')')?;
let after_paren = &signature[paren_pos + 1..];
let end_pos = after_paren
.find('=')
.or_else(|| after_paren.find('{'))
.unwrap_or(after_paren.len());
let between = &after_paren[..end_pos];
let colon_pos = between.find(':')?;
let ret_type = between[colon_pos + 1..].trim();
if ret_type.is_empty() {
return None;
}
let ret_words = crate::nl::tokenize_identifier(ret_type).join(" ");
Some(format!("Returns {}", ret_words))
}
static LANG_SCALA: LanguageDef = LanguageDef {
name: "scala",
grammar: Some(|| tree_sitter_scala::LANGUAGE.into()),
extensions: &["scala", "sc"],
chunk_query: include_str!("queries/scala.chunks.scm"),
call_query: Some(include_str!("queries/scala.calls.scm")),
doc_nodes: &["comment", "block_comment"],
method_containers: &["class_definition", "trait_definition", "object_definition"],
stopwords: &[
"def", "val", "var", "class", "object", "trait", "sealed", "case", "abstract", "override",
"implicit", "lazy", "extends", "with", "import", "package", "match", "if", "else", "for",
"while", "yield", "return", "throw", "try", "catch", "finally", "new", "this", "super",
"true", "false", "null",
],
extract_return_nl: extract_return_scala,
test_file_suggestion: Some(|stem, parent| format!("{parent}/src/test/scala/{stem}Spec.scala")),
type_query: Some(include_str!("queries/scala.types.scm")),
common_types: &[
"String", "Int", "Long", "Double", "Float", "Boolean", "Char", "Byte", "Short", "Unit",
"Any", "AnyRef", "AnyVal", "Nothing", "Null", "Option", "Some", "None", "List", "Map",
"Set", "Seq", "Vector", "Array", "Future", "Either", "Left", "Right", "Try", "Success",
"Failure", "Iterator", "Iterable", "Ordering",
],
container_body_kinds: &["template_body"],
test_markers: &["@Test", "\"should", "it should"],
test_path_patterns: &["%/test/%", "%/tests/%", "%Spec.scala", "%Test.scala"],
entry_point_names: &["main"],
trait_method_names: &[
"equals", "hashCode", "toString", "compare", "apply", "unapply",
],
doc_format: "javadoc",
doc_convention: "Use Scaladoc format: @param, @return, @throws tags.",
field_style: FieldStyle::NameFirst {
separators: ":",
strip_prefixes: "val var private protected override lazy",
},
skip_line_prefixes: &["class ", "case class", "sealed class", "trait ", "object "],
post_process_chunk: Some(post_process_scala_scala as PostProcessChunkFn),
..DEFAULTS
};
pub fn definition_scala() -> &'static LanguageDef {
&LANG_SCALA
}
fn extract_return_solidity(signature: &str) -> Option<String> {
if let Some(ret_idx) = signature.find("returns") {
let after = signature[ret_idx + 7..].trim();
let inner = after
.trim_start_matches('(')
.trim_end_matches(')')
.trim_end_matches('{')
.trim();
if !inner.is_empty() {
let ret_words = crate::nl::tokenize_identifier(inner).join(" ");
return Some(format!("Returns {}", ret_words));
}
}
None
}
static LANG_SOLIDITY: LanguageDef = LanguageDef {
name: "solidity",
grammar: Some(|| tree_sitter_solidity::LANGUAGE.into()),
extensions: &["sol"],
chunk_query: include_str!("queries/solidity.chunks.scm"),
call_query: Some(include_str!("queries/solidity.calls.scm")),
doc_nodes: &["comment"],
method_containers: &["contract_body"],
stopwords: &[
"if",
"else",
"for",
"while",
"do",
"return",
"break",
"continue",
"contract",
"interface",
"library",
"struct",
"enum",
"function",
"modifier",
"event",
"error",
"mapping",
"address",
"bool",
"string",
"bytes",
"uint",
"int",
"uint256",
"int256",
"uint8",
"bytes32",
"public",
"private",
"internal",
"external",
"view",
"pure",
"payable",
"memory",
"storage",
"calldata",
"indexed",
"virtual",
"override",
"abstract",
"immutable",
"constant",
"emit",
"require",
"assert",
"revert",
"this",
"super",
"true",
"false",
"msg",
"block",
"tx",
],
extract_return_nl: extract_return_solidity,
common_types: &[
"address", "bool", "string", "bytes", "uint256", "int256", "uint8", "uint16", "uint32",
"uint64", "uint128", "int8", "int16", "int32", "int64", "int128", "bytes32", "bytes4",
"bytes20",
],
container_body_kinds: &["contract_body"],
test_path_patterns: &["%/test/%", "%.t.sol"],
entry_point_names: &["constructor", "receive", "fallback"],
doc_format: "javadoc",
doc_convention: "Use NatSpec format: @param, @return, @dev tags.",
field_style: FieldStyle::NameFirst {
separators: ";",
strip_prefixes: "public private internal constant immutable",
},
skip_line_prefixes: &["contract ", "struct ", "enum ", "interface "],
post_process_chunk: Some(post_process_solidity_solidity as PostProcessChunkFn),
..DEFAULTS
};
fn post_process_solidity_solidity(
name: &mut String,
chunk_type: &mut ChunkType,
_node: tree_sitter::Node,
_source: &str,
) -> bool {
if matches!(*chunk_type, ChunkType::Function | ChunkType::Method) && name == "constructor" {
*chunk_type = ChunkType::Constructor;
}
if *chunk_type == ChunkType::Property {
let text = &_source[_node.byte_range()];
if text.contains("constant ") || text.contains("immutable ") {
*chunk_type = ChunkType::Constant;
}
}
true
}
pub fn definition_solidity() -> &'static LanguageDef {
&LANG_SOLIDITY
}
fn extract_return_sql(signature: &str) -> Option<String> {
let upper = signature.to_uppercase();
if let Some(ret_pos) = upper.find("RETURNS") {
let after = &signature[ret_pos + 7..].trim();
let type_str = after.split_whitespace().next()?;
let base_type = type_str.split('(').next().unwrap_or(type_str);
return Some(format!("Returns {}", base_type.to_lowercase()));
}
None
}
static LANG_SQL: LanguageDef = LanguageDef {
name: "sql",
grammar: Some(|| tree_sitter_sql::LANGUAGE.into()),
extensions: &["sql"],
chunk_query: include_str!("queries/sql.chunks.scm"),
call_query: Some(include_str!("queries/sql.calls.scm")),
signature_style: SignatureStyle::UntilAs,
doc_nodes: &["comment", "marginalia"],
stopwords: &[
"create",
"alter",
"procedure",
"function",
"view",
"trigger",
"begin",
"end",
"declare",
"set",
"select",
"from",
"where",
"insert",
"into",
"update",
"delete",
"exec",
"execute",
"as",
"returns",
"return",
"if",
"else",
"while",
"and",
"or",
"not",
"null",
"int",
"varchar",
"nvarchar",
"decimal",
"table",
"on",
"after",
"before",
"instead",
"of",
"for",
"each",
"row",
"order",
"by",
"group",
"having",
"join",
"inner",
"left",
"right",
"outer",
"go",
"with",
"nocount",
"language",
"replace",
],
extract_return_nl: extract_return_sql,
..DEFAULTS
};
pub fn definition_sql() -> &'static LanguageDef {
&LANG_SQL
}
static LANG_STRUCTURED_TEXT: LanguageDef = LanguageDef {
name: "structured_text",
grammar: Some(|| tree_sitter_structured_text::LANGUAGE.into()),
extensions: &["st", "stl"],
chunk_query: include_str!("queries/structured_text.chunks.scm"),
call_query: Some(include_str!("queries/structured_text.calls.scm")),
signature_style: SignatureStyle::FirstLine,
doc_nodes: &["block_comment", "inline_comment"],
method_node_kinds: &["method_definition"],
method_containers: &["function_block_definition"],
stopwords: &[
"IF",
"THEN",
"ELSIF",
"ELSE",
"END_IF",
"CASE",
"OF",
"END_CASE",
"FOR",
"TO",
"BY",
"DO",
"END_FOR",
"WHILE",
"END_WHILE",
"REPEAT",
"UNTIL",
"END_REPEAT",
"RETURN",
"EXIT",
"PROGRAM",
"END_PROGRAM",
"FUNCTION",
"END_FUNCTION",
"FUNCTION_BLOCK",
"END_FUNCTION_BLOCK",
"METHOD",
"END_METHOD",
"ACTION",
"END_ACTION",
"TYPE",
"END_TYPE",
"STRUCT",
"END_STRUCT",
"VAR",
"VAR_INPUT",
"VAR_OUTPUT",
"VAR_IN_OUT",
"VAR_TEMP",
"VAR_GLOBAL",
"END_VAR",
"CONSTANT",
"RETAIN",
"PERSISTENT",
"BOOL",
"BYTE",
"WORD",
"DWORD",
"LWORD",
"SINT",
"INT",
"DINT",
"LINT",
"USINT",
"UINT",
"UDINT",
"ULINT",
"REAL",
"LREAL",
"STRING",
"WSTRING",
"TIME",
"DATE",
"DATE_AND_TIME",
"TIME_OF_DAY",
"ARRAY",
"AND",
"OR",
"XOR",
"NOT",
"MOD",
"TRUE",
"FALSE",
"PUBLIC",
"PRIVATE",
"PROTECTED",
"INTERNAL",
"FINAL",
"ABSTRACT",
"EXTENDS",
],
type_query: Some(include_str!("queries/structured_text.types.scm")),
common_types: &[
"BOOL", "BYTE", "WORD", "DWORD", "LWORD", "SINT", "INT", "DINT", "LINT", "USINT", "UINT",
"UDINT", "ULINT", "REAL", "LREAL", "STRING", "WSTRING", "TIME", "DATE", "TON", "TOF", "TP",
"CTU", "CTD", "CTUD", "R_TRIG", "F_TRIG",
],
entry_point_names: &["Main", "MAIN"],
doc_format: "block_comment",
doc_convention: "Use (* ... *) block comments before declarations.",
field_style: FieldStyle::NameFirst {
separators: ":",
strip_prefixes: "",
},
skip_line_prefixes: &[
"VAR",
"END_VAR",
"FUNCTION",
"END_FUNCTION",
"PROGRAM",
"END_PROGRAM",
],
..DEFAULTS
};
pub fn definition_structured_text() -> &'static LanguageDef {
&LANG_STRUCTURED_TEXT
}
const HEADING_TAGS_SVELTE: &[&str] = &["h1", "h2", "h3", "h4", "h5", "h6"];
const LANDMARK_TAGS_SVELTE: &[&str] = &[
"nav", "main", "header", "footer", "aside", "section", "article", "form",
];
const NOISE_TAGS_SVELTE: &[&str] = &[
"div",
"span",
"p",
"ul",
"ol",
"li",
"table",
"tr",
"td",
"th",
"dl",
"dt",
"dd",
"figure",
"figcaption",
"details",
"summary",
"blockquote",
"pre",
"code",
"a",
"img",
"button",
"input",
"label",
"select",
"textarea",
"option",
];
fn post_process_svelte_svelte(
name: &mut String,
chunk_type: &mut ChunkType,
node: tree_sitter::Node,
source: &str,
) -> bool {
let tag = name.to_lowercase();
if HEADING_TAGS_SVELTE.contains(&tag.as_str()) {
*chunk_type = ChunkType::Section;
let content = &source[node.byte_range()];
let text = content
.split('>')
.nth(1)
.and_then(|s| s.split('<').next())
.map(|s| s.trim().to_string())
.unwrap_or_default();
if !text.is_empty() {
*name = text;
}
return true;
}
if tag == "script" || tag == "style" {
*chunk_type = ChunkType::Module;
let start_tag = find_child_by_kind_html(node, "start_tag");
if let Some(st) = start_tag {
if let Some(src_val) = find_attribute_value_html(st, "src", source) {
*name = format!("script:{src_val}");
return true;
}
if let Some(lang_val) = find_attribute_value_html(st, "lang", source) {
*name = format!("{tag}:{lang_val}");
return true;
}
}
return true;
}
if LANDMARK_TAGS_SVELTE.contains(&tag.as_str()) {
*chunk_type = ChunkType::Section;
let start_tag = find_child_by_kind_html(node, "start_tag");
if let Some(st) = start_tag {
if let Some(id) = find_attribute_value_html(st, "id", source) {
*name = format!("{tag}#{id}");
return true;
}
if let Some(label) = find_attribute_value_html(st, "aria-label", source) {
*name = format!("{tag}:{label}");
return true;
}
}
return true;
}
if NOISE_TAGS_SVELTE.contains(&tag.as_str()) {
let start_tag = find_child_by_kind_html(node, "start_tag")
.or_else(|| find_child_by_kind_html(node, "self_closing_tag"));
if let Some(st) = start_tag {
if let Some(id) = find_attribute_value_html(st, "id", source) {
*name = format!("{tag}#{id}");
*chunk_type = ChunkType::Property;
return true;
}
}
return false; }
true
}
static LANG_SVELTE: LanguageDef = LanguageDef {
name: "svelte",
grammar: Some(|| tree_sitter_svelte::LANGUAGE.into()),
extensions: &["svelte"],
chunk_query: include_str!("queries/svelte.chunks.scm"),
signature_style: SignatureStyle::Breadcrumb,
doc_nodes: &["comment"],
stopwords: &[
"div",
"span",
"p",
"a",
"img",
"ul",
"ol",
"li",
"table",
"tr",
"td",
"th",
"form",
"input",
"button",
"label",
"select",
"option",
"textarea",
"br",
"hr",
"head",
"body",
"html",
"meta",
"link",
"title",
"script",
"style",
"class",
"id",
"href",
"src",
"alt",
"type",
"value",
"name",
"slot",
"each",
"if",
"else",
"await",
"then",
"catch",
"key",
"let",
"const",
"export",
"import",
"bind",
"on",
"use",
"transition",
"animate",
"in",
"out",
],
post_process_chunk: Some(post_process_svelte_svelte),
injections: &[
InjectionRule {
container_kind: "script_element",
content_kind: "raw_text",
target_language: "javascript",
detect_language: Some(detect_script_language_html),
content_scoped_lines: false,
},
InjectionRule {
container_kind: "style_element",
content_kind: "raw_text",
target_language: "css",
detect_language: None,
content_scoped_lines: false,
},
],
..DEFAULTS
};
pub fn definition_svelte() -> &'static LanguageDef {
&LANG_SVELTE
}
fn extract_return_swift(signature: &str) -> Option<String> {
let arrow_pos = signature.find("->")?;
let after_arrow = &signature[arrow_pos + 2..];
let end_pos = after_arrow.find('{').unwrap_or(after_arrow.len());
let ret_type = after_arrow[..end_pos].trim();
if ret_type.is_empty() || ret_type == "Void" {
return None;
}
let ret_words = crate::nl::tokenize_identifier(ret_type).join(" ");
Some(format!("Returns {}", ret_words))
}
fn post_process_swift_swift(
name: &mut String,
chunk_type: &mut ChunkType,
node: tree_sitter::Node,
source: &str,
) -> bool {
if node.kind() == "init_declaration" {
*chunk_type = ChunkType::Constructor;
if name == "<anonymous>" {
*name = "init".to_string();
}
return true;
}
if matches!(*chunk_type, ChunkType::Function | ChunkType::Method) && name == "init" {
*chunk_type = ChunkType::Constructor;
return true;
}
if matches!(*chunk_type, ChunkType::Function | ChunkType::Method) && name.starts_with("test") {
*chunk_type = ChunkType::Test;
return true;
}
if node.kind() != "class_declaration" {
return true;
}
let _span = tracing::debug_span!("post_process_swift", kind = node.kind()).entered();
let mut cursor = node.walk();
let mut has_enum_body = false;
let mut keyword = "";
for child in node.children(&mut cursor) {
match child.kind() {
"enum_class_body" => has_enum_body = true,
_ if !child.is_named() => {
let text = &source[child.byte_range()];
match text {
"struct" => keyword = "struct",
"class" => keyword = "class",
"actor" => keyword = "actor",
"extension" => keyword = "extension",
_ => {}
}
}
_ => {}
}
}
if has_enum_body {
*chunk_type = ChunkType::Enum;
tracing::debug!("Reclassified class_declaration as Enum (has enum_class_body)");
} else {
match keyword {
"struct" => {
*chunk_type = ChunkType::Struct;
tracing::debug!("Reclassified class_declaration as Struct");
}
"actor" => {
tracing::debug!("Reclassified class_declaration as Class (actor)");
}
"extension" => {
*chunk_type = ChunkType::Extension;
tracing::debug!("Reclassified class_declaration as Extension");
}
_ => {
}
}
}
true
}
static LANG_SWIFT: LanguageDef = LanguageDef {
name: "swift",
grammar: Some(|| tree_sitter_swift::LANGUAGE.into()),
extensions: &["swift"],
chunk_query: include_str!("queries/swift.chunks.scm"),
call_query: Some(include_str!("queries/swift.calls.scm")),
doc_nodes: &["comment", "multiline_comment"],
method_containers: &["class_body"],
stopwords: &[
"func",
"var",
"let",
"class",
"struct",
"enum",
"protocol",
"extension",
"actor",
"import",
"return",
"if",
"else",
"guard",
"switch",
"case",
"for",
"while",
"repeat",
"break",
"continue",
"self",
"super",
"nil",
"true",
"false",
"is",
"as",
"in",
"try",
"catch",
"throw",
"throws",
"async",
"await",
"public",
"private",
"internal",
"open",
"fileprivate",
"static",
"final",
"override",
"mutating",
"typealias",
"where",
"some",
"any",
],
extract_return_nl: extract_return_swift,
test_file_suggestion: Some(|stem, parent| format!("{parent}/{stem}Tests.swift")),
test_name_suggestion: Some(|name| super::pascal_test_name("test", name)),
type_query: Some(include_str!("queries/swift.types.scm")),
common_types: &[
"String",
"Int",
"Double",
"Float",
"Bool",
"Character",
"UInt",
"Int8",
"Int16",
"Int32",
"Int64",
"UInt8",
"UInt16",
"UInt32",
"UInt64",
"Optional",
"Array",
"Dictionary",
"Set",
"Any",
"AnyObject",
"Void",
"Never",
"Error",
"Codable",
"Equatable",
"Hashable",
"Comparable",
"Identifiable",
"CustomStringConvertible",
],
container_body_kinds: &["class_body", "protocol_body"],
post_process_chunk: Some(post_process_swift_swift),
test_markers: &["func test"],
test_path_patterns: &["%/Tests/%", "%Tests.swift"],
entry_point_names: &["main"],
trait_method_names: &["hash", "encode", "init", "deinit", "description"],
doc_format: "javadoc",
doc_convention: "Use Swift doc comments: - Parameters:, - Returns:, - Throws: sections.",
field_style: FieldStyle::NameFirst {
separators: ":",
strip_prefixes: "let var private public internal fileprivate open static weak lazy",
},
skip_line_prefixes: &["class ", "struct ", "enum ", "protocol "],
..DEFAULTS
};
pub fn definition_swift() -> &'static LanguageDef {
&LANG_SWIFT
}
fn post_process_toml_toml(
name: &mut String,
_chunk_type: &mut ChunkType,
node: tree_sitter::Node,
_source: &str,
) -> bool {
if node.kind() == "pair" {
if let Some(parent) = node.parent() {
if parent.kind() == "table" || parent.kind() == "table_array_element" {
return false;
}
}
}
if name.starts_with('"') && name.ends_with('"') && name.len() >= 2 {
*name = name[1..name.len() - 1].to_string();
}
true
}
fn extract_return_toml(_signature: &str) -> Option<String> {
None
}
static LANG_TOML: LanguageDef = LanguageDef {
name: "toml",
grammar: Some(|| tree_sitter_toml::LANGUAGE.into()),
extensions: &["toml"],
chunk_query: include_str!("queries/toml_lang.chunks.scm"),
signature_style: SignatureStyle::FirstLine,
doc_nodes: &["comment"],
stopwords: &["true", "false"],
extract_return_nl: extract_return_toml,
post_process_chunk: Some(post_process_toml_toml),
..DEFAULTS
};
pub fn definition_toml() -> &'static LanguageDef {
&LANG_TOML
}
fn is_inside_function_typescript(node: tree_sitter::Node) -> bool {
let mut cursor = node.parent();
while let Some(parent) = cursor {
match parent.kind() {
"function_declaration"
| "function_expression"
| "arrow_function"
| "method_definition"
| "generator_function_declaration"
| "generator_function" => return true,
_ => {}
}
cursor = parent.parent();
}
false
}
fn post_process_typescript_typescript(
_name: &mut String,
chunk_type: &mut ChunkType,
node: tree_sitter::Node,
_source: &str,
) -> bool {
if *chunk_type == ChunkType::Constant || *chunk_type == ChunkType::Variable {
if is_inside_function_typescript(node) {
return false;
}
if let Some(value) = node.child_by_field_name("value") {
let kind = value.kind();
if kind == "arrow_function" || kind == "function_expression" || kind == "function" {
return false;
}
}
}
if *chunk_type == ChunkType::Method && _name == "constructor" {
*chunk_type = ChunkType::Constructor;
}
true
}
fn extract_return_typescript(signature: &str) -> Option<String> {
if let Some(colon) = signature.rfind("):") {
let ret = signature[colon + 2..].trim();
if ret.is_empty() {
return None;
}
let ret_words = crate::nl::tokenize_identifier(ret).join(" ");
return Some(format!("Returns {}", ret_words));
}
None
}
static LANG_TYPESCRIPT: LanguageDef = LanguageDef {
name: "typescript",
grammar: Some(|| tree_sitter_typescript::LANGUAGE_TYPESCRIPT.into()),
extensions: &["ts", "tsx"],
chunk_query: include_str!("queries/typescript.chunks.scm"),
call_query: Some(include_str!("queries/typescript.calls.scm")),
doc_nodes: &["comment"],
method_containers: &["class_body", "class_declaration"],
stopwords: &[
"function",
"const",
"let",
"var",
"return",
"if",
"else",
"for",
"while",
"do",
"switch",
"case",
"break",
"continue",
"new",
"this",
"class",
"extends",
"import",
"export",
"from",
"default",
"try",
"catch",
"finally",
"throw",
"async",
"await",
"true",
"false",
"null",
"undefined",
"typeof",
"instanceof",
"void",
],
extract_return_nl: extract_return_typescript,
test_file_suggestion: Some(|stem, parent| format!("{parent}/{stem}.test.ts")),
test_name_suggestion: Some(|name| format!("test('{}', ...)", name)),
type_query: Some(include_str!("queries/typescript.types.scm")),
common_types: &[
"string",
"number",
"boolean",
"void",
"null",
"undefined",
"any",
"never",
"unknown",
"Array",
"Map",
"Set",
"Promise",
"Record",
"Partial",
"Required",
"Readonly",
"Pick",
"Omit",
"Exclude",
"Extract",
"NonNullable",
"ReturnType",
"Date",
"Error",
"RegExp",
"Function",
"Object",
"Symbol",
],
container_body_kinds: &["class_body"],
post_process_chunk: Some(post_process_typescript_typescript as PostProcessChunkFn),
test_markers: &["describe(", "it(", "test("],
test_path_patterns: &["%.test.%", "%.spec.%", "%/tests/%"],
entry_point_names: &[
"handler",
"middleware",
"beforeEach",
"afterEach",
"beforeAll",
"afterAll",
],
trait_method_names: &["toString", "valueOf", "toJSON"],
doc_format: "javadoc",
doc_convention: "Use JSDoc format: @param {type} name, @returns {type}, @throws {type}.",
field_style: FieldStyle::NameFirst {
separators: ":=;",
strip_prefixes: "public private protected readonly static",
},
skip_line_prefixes: &["class ", "interface ", "type ", "export "],
..DEFAULTS
};
pub fn definition_typescript() -> &'static LanguageDef {
&LANG_TYPESCRIPT
}
fn post_process_vbnet_vbnet(
name: &mut String,
kind: &mut ChunkType,
node: tree_sitter::Node,
_source: &str,
) -> bool {
if node.kind() == "constructor_declaration" {
*name = "New".to_string();
*kind = ChunkType::Constructor;
return true;
}
if matches!(*kind, ChunkType::Function | ChunkType::Method) {
let node_text = &_source[node.byte_range()];
let header = if let Some(pos) = node_text.find('\n') {
&node_text[..node_text.floor_char_boundary(pos.min(300))]
} else {
&node_text[..node_text.floor_char_boundary(300)]
};
let mut prev = node.prev_named_sibling();
while let Some(sib) = prev {
if sib.kind() == "attribute_list" || sib.kind() == "attribute" {
let attr_text = &_source[sib.byte_range()];
if attr_text.contains("Test")
|| attr_text.contains("Fact")
|| attr_text.contains("Theory")
|| attr_text.contains("TestMethod")
{
*kind = ChunkType::Test;
return true;
}
if attr_text.contains("HttpGet")
|| attr_text.contains("HttpPost")
|| attr_text.contains("HttpPut")
|| attr_text.contains("HttpDelete")
|| attr_text.contains("HttpPatch")
{
*kind = ChunkType::Endpoint;
return true;
}
} else {
break;
}
prev = sib.prev_named_sibling();
}
if header.contains("<Test>")
|| header.contains("<Fact>")
|| header.contains("<Theory>")
|| header.contains("<TestMethod>")
{
*kind = ChunkType::Test;
} else if header.contains("<HttpGet>")
|| header.contains("<HttpPost>")
|| header.contains("<HttpPut>")
|| header.contains("<HttpDelete>")
{
*kind = ChunkType::Endpoint;
}
}
true
}
fn extract_return_vbnet(signature: &str) -> Option<String> {
if let Some(paren_close) = signature.rfind(')') {
let after = signature[paren_close + 1..].trim();
if let Some(rest) = after
.strip_prefix("As")
.or_else(|| after.strip_prefix("as"))
{
let ret_type = rest.split_whitespace().next()?;
if !ret_type.is_empty() {
let ret_words = crate::nl::tokenize_identifier(ret_type).join(" ");
return Some(format!("Returns {}", ret_words));
}
}
}
None
}
static LANG_VBNET: LanguageDef = LanguageDef {
name: "vbnet",
grammar: Some(|| tree_sitter_vb_dotnet::LANGUAGE.into()),
extensions: &["vb"],
chunk_query: include_str!("queries/vbnet.chunks.scm"),
call_query: Some(include_str!("queries/vbnet.calls.scm")),
signature_style: SignatureStyle::FirstLine,
doc_nodes: &["comment"],
method_containers: &[
"class_block",
"module_block",
"structure_block",
"interface_block",
],
stopwords: &[
"public",
"private",
"protected",
"friend",
"shared",
"readonly",
"mustinherit",
"notinheritable",
"mustoverride",
"overridable",
"overrides",
"overloads",
"shadows",
"class",
"module",
"structure",
"interface",
"enum",
"namespace",
"imports",
"return",
"if",
"then",
"else",
"elseif",
"end",
"for",
"each",
"next",
"while",
"do",
"loop",
"select",
"case",
"exit",
"continue",
"new",
"me",
"mybase",
"myclass",
"try",
"catch",
"finally",
"throw",
"dim",
"as",
"sub",
"function",
"property",
"event",
"delegate",
"integer",
"string",
"boolean",
"double",
"single",
"long",
"byte",
"char",
"decimal",
"short",
"object",
"true",
"false",
"nothing",
"void",
"get",
"set",
"value",
"where",
"partial",
"of",
"in",
"out",
"byval",
"byref",
"optional",
"paramarray",
"handles",
"withevents",
"addhandler",
"removehandler",
"raiseevent",
"not",
"and",
"or",
"andalso",
"orelse",
"xor",
"mod",
"like",
"is",
"isnot",
"with",
"using",
"synclock",
"redim",
"preserve",
"goto",
],
extract_return_nl: extract_return_vbnet,
test_file_suggestion: Some(|stem, parent| format!("{parent}/{stem}Tests.vb")),
type_query: Some(include_str!("queries/vbnet.types.scm")),
common_types: &[
"String",
"Integer",
"Boolean",
"Object",
"Double",
"Single",
"Long",
"Byte",
"Char",
"Decimal",
"Short",
"UInteger",
"ULong",
"Task",
"ValueTask",
"List",
"Dictionary",
"HashSet",
"Queue",
"Stack",
"IEnumerable",
"IList",
"IDictionary",
"ICollection",
"IQueryable",
"Action",
"Func",
"Predicate",
"EventHandler",
"EventArgs",
"IDisposable",
"CancellationToken",
"ILogger",
"StringBuilder",
"Exception",
"Nullable",
],
post_process_chunk: Some(post_process_vbnet_vbnet),
test_markers: &["<Test>", "<Fact>", "<Theory>", "<TestMethod>"],
test_path_patterns: &["%/Tests/%", "%/tests/%", "%Tests.vb"],
entry_point_names: &["Main"],
trait_method_names: &[
"Equals",
"GetHashCode",
"ToString",
"CompareTo",
"Dispose",
"GetEnumerator",
"MoveNext",
],
doc_convention: "Use XML doc comments: <summary>, <param>, <returns> tags.",
skip_line_prefixes: &["Class ", "Structure ", "Interface ", "Enum "],
..DEFAULTS
};
pub fn definition_vbnet() -> &'static LanguageDef {
&LANG_VBNET
}
const HEADING_TAGS_VUE: &[&str] = &["h1", "h2", "h3", "h4", "h5", "h6"];
const LANDMARK_TAGS_VUE: &[&str] = &[
"nav", "main", "header", "footer", "aside", "section", "article", "form",
];
const NOISE_TAGS_VUE: &[&str] = &[
"div",
"span",
"p",
"ul",
"ol",
"li",
"table",
"tr",
"td",
"th",
"dl",
"dt",
"dd",
"figure",
"figcaption",
"details",
"summary",
"blockquote",
"pre",
"code",
"a",
"img",
"button",
"input",
"label",
"select",
"textarea",
"option",
];
fn post_process_vue_vue(
name: &mut String,
chunk_type: &mut ChunkType,
node: tree_sitter::Node,
source: &str,
) -> bool {
let tag = name.to_lowercase();
if HEADING_TAGS_VUE.contains(&tag.as_str()) {
*chunk_type = ChunkType::Section;
let content = &source[node.byte_range()];
let text = content
.split('>')
.nth(1)
.and_then(|s| s.split('<').next())
.map(|s| s.trim().to_string())
.unwrap_or_default();
if !text.is_empty() {
*name = text;
}
return true;
}
if tag == "script" || tag == "style" || tag == "template" {
*chunk_type = ChunkType::Module;
let start_tag = find_child_by_kind_html(node, "start_tag");
if let Some(st) = start_tag {
if let Some(src_val) = find_attribute_value_html(st, "src", source) {
*name = format!("script:{src_val}");
return true;
}
if let Some(lang_val) = find_attribute_value_html(st, "lang", source) {
*name = format!("{tag}:{lang_val}");
return true;
}
if has_attribute_html(st, "setup", source) {
*name = "script:setup".to_string();
return true;
}
}
return true;
}
if LANDMARK_TAGS_VUE.contains(&tag.as_str()) {
*chunk_type = ChunkType::Section;
let start_tag = find_child_by_kind_html(node, "start_tag");
if let Some(st) = start_tag {
if let Some(id) = find_attribute_value_html(st, "id", source) {
*name = format!("{tag}#{id}");
return true;
}
if let Some(label) = find_attribute_value_html(st, "aria-label", source) {
*name = format!("{tag}:{label}");
return true;
}
}
return true;
}
if NOISE_TAGS_VUE.contains(&tag.as_str()) {
let start_tag = find_child_by_kind_html(node, "start_tag")
.or_else(|| find_child_by_kind_html(node, "self_closing_tag"));
if let Some(st) = start_tag {
if let Some(id) = find_attribute_value_html(st, "id", source) {
*name = format!("{tag}#{id}");
*chunk_type = ChunkType::Property;
return true;
}
}
return false; }
true
}
#[cfg(feature = "lang-vue")]
static LANG_VUE: LanguageDef = LanguageDef {
name: "vue",
grammar: Some(|| tree_sitter_vue::LANGUAGE.into()),
extensions: &["vue"],
chunk_query: include_str!("queries/vue.chunks.scm"),
signature_style: SignatureStyle::Breadcrumb,
doc_nodes: &["comment"],
stopwords: &[
"div",
"span",
"p",
"a",
"img",
"ul",
"ol",
"li",
"table",
"tr",
"td",
"th",
"form",
"input",
"button",
"label",
"select",
"option",
"textarea",
"br",
"hr",
"head",
"body",
"html",
"meta",
"link",
"title",
"script",
"style",
"class",
"id",
"href",
"src",
"alt",
"type",
"value",
"name",
"slot",
"template",
"component",
"transition",
"keep",
"alive",
"teleport",
"suspense",
"v-if",
"v-else",
"v-for",
"v-show",
"v-bind",
"v-on",
"v-model",
"v-slot",
"v-html",
"const",
"let",
"var",
"export",
"import",
"default",
"ref",
"reactive",
"computed",
"watch",
"defineProps",
"defineEmits",
"defineExpose",
"withDefaults",
],
post_process_chunk: Some(post_process_vue_vue),
injections: &[
InjectionRule {
container_kind: "script_element",
content_kind: "raw_text",
target_language: "javascript",
detect_language: Some(detect_script_language_html),
content_scoped_lines: false,
},
InjectionRule {
container_kind: "style_element",
content_kind: "raw_text",
target_language: "css",
detect_language: None,
content_scoped_lines: false,
},
],
..DEFAULTS
};
pub fn definition_vue() -> &'static LanguageDef {
&LANG_VUE
}
fn extract_return_xml(_signature: &str) -> Option<String> {
None
}
fn post_process_xml_xml(
_name: &mut String,
_chunk_type: &mut ChunkType,
node: tree_sitter::Node,
_source: &str,
) -> bool {
if node.kind() == "PI" {
return true;
}
if let Some(parent) = node.parent() {
let pk = parent.kind();
if pk == "document" {
return true;
}
if pk == "content" {
if let Some(grandparent) = parent.parent() {
if grandparent.kind() == "element" {
if let Some(ggp) = grandparent.parent() {
return ggp.kind() == "document";
}
}
}
}
}
false
}
static LANG_XML: LanguageDef = LanguageDef {
name: "xml",
grammar: Some(|| tree_sitter_xml::LANGUAGE_XML.into()),
extensions: &[
"xml", "xsl", "xslt", "xsd", "svg", "wsdl", "rss", "plist", "l5x", "l5k",
],
chunk_query: include_str!("queries/xml.chunks.scm"),
signature_style: SignatureStyle::FirstLine,
doc_nodes: &["Comment"],
stopwords: &[
"xml",
"xmlns",
"version",
"encoding",
"standalone",
"xsi",
"xsd",
"type",
"name",
"value",
],
extract_return_nl: extract_return_xml,
post_process_chunk: Some(post_process_xml_xml),
..DEFAULTS
};
pub fn definition_xml() -> &'static LanguageDef {
&LANG_XML
}
fn extract_return_yaml(_signature: &str) -> Option<String> {
None
}
fn post_process_yaml_yaml(
_name: &mut String,
_chunk_type: &mut ChunkType,
node: tree_sitter::Node,
_source: &str,
) -> bool {
if let Some(parent) = node.parent() {
if let Some(grandparent) = parent.parent() {
let gp_kind = grandparent.kind();
return gp_kind == "stream"
|| gp_kind == "document"
|| grandparent
.parent()
.is_some_and(|ggp| ggp.kind() == "stream" || ggp.kind() == "document");
}
}
true
}
static LANG_YAML: LanguageDef = LanguageDef {
name: "yaml",
grammar: Some(|| tree_sitter_yaml::LANGUAGE.into()),
extensions: &["yaml", "yml"],
chunk_query: include_str!("queries/yaml.chunks.scm"),
signature_style: SignatureStyle::FirstLine,
doc_nodes: &["comment"],
stopwords: &["true", "false", "null", "yes", "no", "on", "off"],
extract_return_nl: extract_return_yaml,
post_process_chunk: Some(post_process_yaml_yaml),
..DEFAULTS
};
pub fn definition_yaml() -> &'static LanguageDef {
&LANG_YAML
}
fn post_process_zig_zig(
name: &mut String,
chunk_type: &mut ChunkType,
node: tree_sitter::Node,
source: &str,
) -> bool {
let kind = node.kind();
if kind == "test_declaration" {
*chunk_type = ChunkType::Test;
for i in 0..node.named_child_count() {
if let Some(child) = node.named_child(i as u32) {
if child.kind() == "string" || child.kind() == "identifier" {
let text = &source[child.start_byte()..child.end_byte()];
let clean = text.trim_matches('"');
*name = clean.to_string();
return true;
}
}
}
*name = "anonymous_test".to_string();
return true;
}
if kind == "variable_declaration" {
let text = &source[node.start_byte()..node.end_byte()];
if text.contains("struct") {
*chunk_type = ChunkType::Struct;
} else if text.contains("enum") {
*chunk_type = ChunkType::Enum;
} else if text.contains("union") {
*chunk_type = ChunkType::TypeAlias;
} else if text.contains("error{") || text.contains("error {") {
*chunk_type = ChunkType::Enum;
} else {
if text.starts_with("pub const ") || text.starts_with("const ") {
*chunk_type = ChunkType::Constant;
} else {
*chunk_type = ChunkType::Variable;
}
}
}
if matches!(*chunk_type, ChunkType::Function | ChunkType::Method) {
let fn_text = &source[node.byte_range()];
if fn_text.starts_with("extern ") || fn_text.starts_with("pub extern ") {
*chunk_type = ChunkType::Extern;
}
}
true
}
fn extract_return_zig(signature: &str) -> Option<String> {
let paren_pos = signature.rfind(')')?;
let after_paren = &signature[paren_pos + 1..];
let brace_pos = after_paren.find('{').unwrap_or(after_paren.len());
let ret_part = after_paren[..brace_pos].trim();
if ret_part.is_empty() || ret_part == "void" || ret_part == "noreturn" || ret_part == "anytype"
{
return None;
}
let ret_type = ret_part.strip_prefix('!').unwrap_or(ret_part).trim();
if ret_type.is_empty() || ret_type == "void" {
return None;
}
let ret_words = crate::nl::tokenize_identifier(ret_type).join(" ");
if ret_words.is_empty() {
return None;
}
Some(format!("Returns {}", ret_words))
}
static LANG_ZIG: LanguageDef = LanguageDef {
name: "zig",
grammar: Some(|| tree_sitter_zig::LANGUAGE.into()),
extensions: &["zig"],
chunk_query: include_str!("queries/zig.chunks.scm"),
call_query: Some(include_str!("queries/zig.calls.scm")),
doc_nodes: &["doc_comment", "line_comment"],
method_containers: &[
"struct_declaration",
"enum_declaration",
"union_declaration",
],
stopwords: &[
"fn",
"pub",
"const",
"var",
"return",
"if",
"else",
"for",
"while",
"break",
"continue",
"switch",
"unreachable",
"undefined",
"null",
"true",
"false",
"and",
"or",
"try",
"catch",
"comptime",
"inline",
"extern",
"export",
"struct",
"enum",
"union",
"error",
"test",
"defer",
"errdefer",
"async",
"await",
"suspend",
"resume",
"nosuspend",
"orelse",
"anytype",
"anyframe",
"void",
"noreturn",
"type",
"usize",
"isize",
"bool",
],
extract_return_nl: extract_return_zig,
type_query: Some(include_str!("queries/zig.types.scm")),
common_types: &[
"void",
"noreturn",
"bool",
"u8",
"u16",
"u32",
"u64",
"u128",
"usize",
"i8",
"i16",
"i32",
"i64",
"i128",
"isize",
"f16",
"f32",
"f64",
"f128",
"anytype",
"anyframe",
"type",
"anyerror",
"anyopaque",
],
container_body_kinds: &[
"struct_declaration",
"enum_declaration",
"union_declaration",
],
post_process_chunk: Some(post_process_zig_zig),
test_markers: &["test "],
test_path_patterns: &["%/tests/%", "%_test.zig"],
entry_point_names: &["main"],
doc_convention: "Use /// doc comments describing parameters and return values.",
field_style: FieldStyle::NameFirst {
separators: ":",
strip_prefixes: "pub",
},
skip_line_prefixes: &["const ", "pub const"],
..DEFAULTS
};
pub fn definition_zig() -> &'static LanguageDef {
&LANG_ZIG
}
static LANG_ASPX: LanguageDef = LanguageDef {
name: "aspx",
grammar: None, extensions: &["aspx", "ascx", "asmx", "master"],
signature_style: SignatureStyle::FirstLine,
stopwords: &[
"page",
"control",
"master",
"runat",
"server",
"autopostback",
"viewstate",
"postback",
"handler",
"event",
"sender",
"eventargs",
"codebehind",
"inherits",
"aspx",
"ascx",
"asmx",
],
entry_point_names: &["Page_Load", "Page_Init", "Page_PreRender"],
..DEFAULTS
};
pub fn definition_aspx() -> &'static LanguageDef {
&LANG_ASPX
}