use std::hash::{Hash, Hasher};
use tree_sitter::Node;
use crate::parser::Language;
const WALK_LIMIT: usize = 32;
const MAX_PARENT_WALK_DEPTH: usize = 64;
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
pub(super) enum ItemKind {
Function,
Method,
Impl,
Module,
Use,
Struct,
Enum,
Trait,
TypeAlias,
Const,
Static,
}
#[derive(Clone, Debug)]
pub(super) enum UseIdentity {
Plain(Vec<String>),
Unanalyzable,
}
pub(super) struct Classified<'a> {
pub kind: ItemKind,
pub name: String,
pub container_body: Option<Node<'a>>,
pub signature_hash: u64,
pub extra_scope: Vec<String>,
}
pub(super) struct MetadataRule {
pub kind: &'static str,
pub binding: MetadataBinding,
}
pub(super) enum MetadataBinding {
Always,
NoBlankLine,
RustOuterComment,
}
pub(super) trait LanguageRules: Sync {
fn classify_node<'a>(
&self,
language: Language,
source: &'a str,
node: Node<'a>,
) -> Option<Classified<'a>>;
fn leading_metadata_kinds(&self) -> &'static [MetadataRule];
fn signature_hash(&self, language: Language, source: &str, item_node: Node<'_>) -> u64;
fn extra_scope(&self, language: Language, source: &str, item_node: Node<'_>) -> Vec<String>;
}
pub(super) fn generic_signature_hash(language: Language, source: &str, item_node: Node<'_>) -> u64 {
signature_hash_from_field(language, source, item_node, "parameters")
}
pub(super) fn no_extra_scope(
_language: Language,
_source: &str,
_item_node: Node<'_>,
) -> Vec<String> {
Vec::new()
}
pub(super) fn rules_for(language: Language) -> Option<&'static dyn LanguageRules> {
match language {
Language::Rust => Some(&RustRules),
Language::Python => Some(&PythonRules),
Language::JavaScript | Language::TypeScript => Some(&JsTsRules),
Language::Go => Some(&GoRules),
Language::C | Language::Cpp => Some(&CppRules),
Language::Java => Some(&JavaRules),
Language::Unknown => None,
}
}
pub(super) struct RustRules;
static RUST_METADATA: &[MetadataRule] = &[
MetadataRule {
kind: "attribute_item",
binding: MetadataBinding::Always,
},
MetadataRule {
kind: "line_comment",
binding: MetadataBinding::RustOuterComment,
},
MetadataRule {
kind: "block_comment",
binding: MetadataBinding::RustOuterComment,
},
];
impl LanguageRules for RustRules {
fn classify_node<'a>(
&self,
language: Language,
source: &'a str,
node: Node<'a>,
) -> Option<Classified<'a>> {
let kind = node.kind();
match kind {
"function_item" => {
let name = name_from_field(source, node, "name")?;
let signature_hash = self.signature_hash(language, source, node);
Some(Classified {
kind: ItemKind::Function,
name,
container_body: None,
signature_hash,
extra_scope: Vec::new(),
})
}
"function_signature_item" => {
let name = name_from_field(source, node, "name")?;
let signature_hash = self.signature_hash(language, source, node);
Some(Classified {
kind: ItemKind::Method,
name,
container_body: None,
signature_hash,
extra_scope: Vec::new(),
})
}
"impl_item" => {
let name = rust_impl_name(source, node)?;
let container_body = node.child_by_field_name("body");
Some(Classified {
kind: ItemKind::Impl,
name,
container_body,
signature_hash: 0,
extra_scope: Vec::new(),
})
}
"mod_item" => {
let name = name_from_field(source, node, "name")?;
let container_body = node.child_by_field_name("body");
Some(Classified {
kind: ItemKind::Module,
name,
container_body,
signature_hash: 0,
extra_scope: Vec::new(),
})
}
"struct_item" => leaf_item(ItemKind::Struct, source, node, "name"),
"enum_item" => leaf_item(ItemKind::Enum, source, node, "name"),
"trait_item" => {
let name = name_from_field(source, node, "name")?;
let container_body = node.child_by_field_name("body");
Some(Classified {
kind: ItemKind::Trait,
name,
container_body,
signature_hash: 0,
extra_scope: Vec::new(),
})
}
"union_item" => leaf_item(ItemKind::Struct, source, node, "name"),
"type_item" => leaf_item(ItemKind::TypeAlias, source, node, "name"),
"const_item" => leaf_item(ItemKind::Const, source, node, "name"),
"static_item" => leaf_item(ItemKind::Static, source, node, "name"),
"use_declaration" => {
let argument = node.child_by_field_name("argument")?;
let name = rust_use_key(source, argument);
Some(Classified {
kind: ItemKind::Use,
name,
container_body: None,
signature_hash: 0,
extra_scope: Vec::new(),
})
}
_ => None,
}
}
fn leading_metadata_kinds(&self) -> &'static [MetadataRule] {
RUST_METADATA
}
fn signature_hash(&self, language: Language, source: &str, item_node: Node<'_>) -> u64 {
generic_signature_hash(language, source, item_node)
}
fn extra_scope(&self, language: Language, source: &str, item_node: Node<'_>) -> Vec<String> {
no_extra_scope(language, source, item_node)
}
}
fn rust_impl_name(source: &str, node: Node<'_>) -> Option<String> {
let trait_node = node.child_by_field_name("trait");
let type_node = node.child_by_field_name("type")?;
let type_name = source[type_node.byte_range()].to_string();
let key = if let Some(trait_node) = trait_node {
format!("{} for {}", &source[trait_node.byte_range()], type_name)
} else {
type_name
};
Some(strip_whitespace(&key))
}
pub(super) const USE_POISON_KEY: &str = "\u{0}use::poison";
fn rust_use_key(source: &str, argument: Node<'_>) -> String {
rust_use_leaves(source, argument)
.and_then(|leaves| leaves.into_iter().min())
.unwrap_or_else(|| USE_POISON_KEY.to_string())
}
fn rust_use_leaves(source: &str, argument: Node<'_>) -> Option<Vec<String>> {
let mut leaves = Vec::new();
if collect_use_leaves(source, argument, "", &mut leaves) && !leaves.is_empty() {
Some(leaves)
} else {
None
}
}
pub(super) fn use_identity(
language: Language,
source: &str,
node: Node<'_>,
) -> Option<UseIdentity> {
if language != Language::Rust || node.kind() != "use_declaration" {
return None;
}
let identity = match node.child_by_field_name("argument") {
Some(argument) => match rust_use_leaves(source, argument) {
Some(leaves) => UseIdentity::Plain(leaves),
None => UseIdentity::Unanalyzable,
},
None => UseIdentity::Unanalyzable,
};
Some(identity)
}
fn collect_use_leaves(source: &str, node: Node<'_>, prefix: &str, out: &mut Vec<String>) -> bool {
match node.kind() {
"identifier" | "scoped_identifier" => {
out.push(format!(
"{prefix}{}",
strip_whitespace(&source[node.byte_range()])
));
true
}
"scoped_use_list" => {
let new_prefix = match node.child_by_field_name("path") {
Some(p) => format!("{prefix}{}::", strip_whitespace(&source[p.byte_range()])),
None => prefix.to_string(),
};
let Some(list) = node.child_by_field_name("list") else {
return false;
};
expand_flat_use_list(source, list, &new_prefix, out)
}
"use_list" => expand_flat_use_list(source, node, prefix, out),
_ => false,
}
}
fn expand_flat_use_list(source: &str, list: Node<'_>, prefix: &str, out: &mut Vec<String>) -> bool {
let mut cursor = list.walk();
let mut any = false;
for child in list.named_children(&mut cursor) {
match child.kind() {
"identifier" | "scoped_identifier" => {
out.push(format!(
"{prefix}{}",
strip_whitespace(&source[child.byte_range()])
));
any = true;
}
_ => return false,
}
}
any
}
pub(super) struct PythonRules;
impl LanguageRules for PythonRules {
fn classify_node<'a>(
&self,
language: Language,
source: &'a str,
node: Node<'a>,
) -> Option<Classified<'a>> {
let kind = node.kind();
match kind {
"function_definition" => {
let name = name_from_field(source, node, "name")?;
let signature_hash = self.signature_hash(language, source, node);
Some(Classified {
kind: ItemKind::Function,
name,
container_body: None,
signature_hash,
extra_scope: Vec::new(),
})
}
"class_definition" => {
let name = name_from_field(source, node, "name")?;
let container_body = node.child_by_field_name("body");
Some(Classified {
kind: ItemKind::Module,
name,
container_body,
signature_hash: 0,
extra_scope: Vec::new(),
})
}
"decorated_definition" => {
let inner = node.child_by_field_name("definition")?;
let inner_classified = self.classify_node(language, source, inner)?;
Some(Classified {
container_body: None,
..inner_classified
})
}
_ => None,
}
}
fn leading_metadata_kinds(&self) -> &'static [MetadataRule] {
&[]
}
fn signature_hash(&self, language: Language, source: &str, item_node: Node<'_>) -> u64 {
generic_signature_hash(language, source, item_node)
}
fn extra_scope(&self, language: Language, source: &str, item_node: Node<'_>) -> Vec<String> {
no_extra_scope(language, source, item_node)
}
}
pub(super) struct JsTsRules;
static JS_TS_METADATA: &[MetadataRule] = &[
MetadataRule {
kind: "decorator",
binding: MetadataBinding::Always,
},
];
impl LanguageRules for JsTsRules {
fn classify_node<'a>(
&self,
language: Language,
source: &'a str,
node: Node<'a>,
) -> Option<Classified<'a>> {
let kind = node.kind();
match kind {
"function_declaration" | "generator_function_declaration" => {
let name = name_from_field(source, node, "name")?;
let signature_hash = self.signature_hash(language, source, node);
Some(Classified {
kind: ItemKind::Function,
name,
container_body: None,
signature_hash,
extra_scope: Vec::new(),
})
}
"class_declaration" | "abstract_class_declaration" | "interface_declaration" => {
let name = name_from_field(source, node, "name")?;
let container_body = node.child_by_field_name("body");
Some(Classified {
kind: ItemKind::Module,
name,
container_body,
signature_hash: 0,
extra_scope: Vec::new(),
})
}
"method_definition" | "method_signature" | "abstract_method_signature" => {
let name = name_from_field(source, node, "name")?;
let signature_hash = self.signature_hash(language, source, node);
Some(Classified {
kind: ItemKind::Method,
name,
container_body: None,
signature_hash,
extra_scope: Vec::new(),
})
}
_ => None,
}
}
fn leading_metadata_kinds(&self) -> &'static [MetadataRule] {
JS_TS_METADATA
}
fn signature_hash(&self, language: Language, source: &str, item_node: Node<'_>) -> u64 {
generic_signature_hash(language, source, item_node)
}
fn extra_scope(&self, language: Language, source: &str, item_node: Node<'_>) -> Vec<String> {
no_extra_scope(language, source, item_node)
}
}
pub(super) struct GoRules;
static GO_METADATA: &[MetadataRule] = &[MetadataRule {
kind: "comment",
binding: MetadataBinding::NoBlankLine,
}];
impl LanguageRules for GoRules {
fn classify_node<'a>(
&self,
language: Language,
source: &'a str,
node: Node<'a>,
) -> Option<Classified<'a>> {
let kind = node.kind();
match kind {
"function_declaration" => {
let name = name_from_field(source, node, "name")?;
let signature_hash = self.signature_hash(language, source, node);
Some(Classified {
kind: ItemKind::Function,
name,
container_body: None,
signature_hash,
extra_scope: Vec::new(),
})
}
"method_declaration" => {
let name = name_from_field(source, node, "name")?;
let signature_hash = self.signature_hash(language, source, node);
let extra_scope = self.extra_scope(language, source, node);
Some(Classified {
kind: ItemKind::Method,
name,
container_body: None,
signature_hash,
extra_scope,
})
}
_ => None,
}
}
fn leading_metadata_kinds(&self) -> &'static [MetadataRule] {
GO_METADATA
}
fn signature_hash(&self, language: Language, source: &str, item_node: Node<'_>) -> u64 {
generic_signature_hash(language, source, item_node)
}
fn extra_scope(&self, _language: Language, source: &str, item_node: Node<'_>) -> Vec<String> {
go_receiver_type(source, item_node)
.map(|t| vec![t])
.unwrap_or_default()
}
}
fn go_receiver_type(source: &str, node: Node<'_>) -> Option<String> {
let receiver = node.child_by_field_name("receiver")?;
let mut cursor = receiver.walk();
for child in receiver.children(&mut cursor) {
if child.kind() == "parameter_declaration"
&& let Some(ty) = child.child_by_field_name("type")
{
return Some(strip_whitespace(&source[ty.byte_range()]));
}
}
None
}
pub(super) struct JavaRules;
static JAVA_METADATA: &[MetadataRule] = &[
MetadataRule {
kind: "marker_annotation",
binding: MetadataBinding::Always,
},
MetadataRule {
kind: "annotation",
binding: MetadataBinding::Always,
},
MetadataRule {
kind: "line_comment",
binding: MetadataBinding::NoBlankLine,
},
MetadataRule {
kind: "block_comment",
binding: MetadataBinding::NoBlankLine,
},
];
impl LanguageRules for JavaRules {
fn classify_node<'a>(
&self,
language: Language,
source: &'a str,
node: Node<'a>,
) -> Option<Classified<'a>> {
let kind = node.kind();
match kind {
"method_declaration" | "constructor_declaration" => {
let name = name_from_field(source, node, "name")?;
let signature_hash = self.signature_hash(language, source, node);
Some(Classified {
kind: ItemKind::Method,
name,
container_body: None,
signature_hash,
extra_scope: Vec::new(),
})
}
"class_declaration" | "interface_declaration" => {
let name = name_from_field(source, node, "name")?;
let container_body = node.child_by_field_name("body");
Some(Classified {
kind: ItemKind::Module,
name,
container_body,
signature_hash: 0,
extra_scope: Vec::new(),
})
}
_ => None,
}
}
fn leading_metadata_kinds(&self) -> &'static [MetadataRule] {
JAVA_METADATA
}
fn signature_hash(&self, language: Language, source: &str, item_node: Node<'_>) -> u64 {
generic_signature_hash(language, source, item_node)
}
fn extra_scope(&self, language: Language, source: &str, item_node: Node<'_>) -> Vec<String> {
no_extra_scope(language, source, item_node)
}
}
pub(super) struct CppRules;
impl LanguageRules for CppRules {
fn classify_node<'a>(
&self,
language: Language,
source: &'a str,
node: Node<'a>,
) -> Option<Classified<'a>> {
let kind = node.kind();
match kind {
"function_definition" => {
let declarator = node.child_by_field_name("declarator")?;
let name = c_function_name(source, declarator)?;
let extra_scope = self.extra_scope(language, source, node);
let signature_hash = self.signature_hash(language, source, node);
Some(Classified {
kind: ItemKind::Function,
name,
container_body: None,
signature_hash,
extra_scope,
})
}
"class_specifier" | "struct_specifier" | "union_specifier" => {
let name = stripped_name(source, node, "name")?;
let container_body = node.child_by_field_name("body");
Some(Classified {
kind: ItemKind::Module,
name,
container_body,
signature_hash: 0,
extra_scope: Vec::new(),
})
}
"namespace_definition" if language == Language::Cpp => {
let name = stripped_name(source, node, "name")?;
let container_body = node.child_by_field_name("body");
Some(Classified {
kind: ItemKind::Module,
name,
container_body,
signature_hash: 0,
extra_scope: Vec::new(),
})
}
_ => None,
}
}
fn leading_metadata_kinds(&self) -> &'static [MetadataRule] {
&[]
}
fn signature_hash(&self, language: Language, source: &str, item_node: Node<'_>) -> u64 {
let Some(declarator) = item_node.child_by_field_name("declarator") else {
return 0;
};
c_signature_hash(language, source, declarator)
}
fn extra_scope(&self, _language: Language, source: &str, item_node: Node<'_>) -> Vec<String> {
let Some(declarator) = item_node.child_by_field_name("declarator") else {
return Vec::new();
};
c_function_scope(source, item_node, declarator)
}
}
enum DeclaratorEvent<'a> {
Scope(Node<'a>),
Name(Node<'a>),
}
fn walk_c_declarator<'tree, F>(function_declarator: Node<'tree>, mut callback: F)
where
F: FnMut(DeclaratorEvent<'tree>),
{
let Some(mut current) = function_declarator.child_by_field_name("declarator") else {
return;
};
for _ in 0..WALK_LIMIT {
match current.kind() {
"identifier"
| "field_identifier"
| "type_identifier"
| "property_identifier"
| "operator_name"
| "destructor_name" => {
callback(DeclaratorEvent::Name(current));
return;
}
"qualified_identifier" => {
if let Some(scope_node) = current.child_by_field_name("scope") {
callback(DeclaratorEvent::Scope(scope_node));
}
let Some(next) = current.child_by_field_name("name") else {
return;
};
current = next;
}
"template_function" => {
let Some(next) = current.child_by_field_name("name") else {
return;
};
current = next;
}
"pointer_declarator"
| "reference_declarator"
| "function_declarator"
| "parenthesized_declarator" => {
let Some(next) = current.child_by_field_name("declarator") else {
return;
};
current = next;
}
_ => return,
}
}
}
fn c_function_name(source: &str, function_declarator: Node<'_>) -> Option<String> {
let mut name = None;
walk_c_declarator(function_declarator, |event| {
if let DeclaratorEvent::Name(node) = event {
name = Some(source[node.byte_range()].to_string());
}
});
name
}
fn c_function_scope(
source: &str,
function_definition: Node<'_>,
function_declarator: Node<'_>,
) -> Vec<String> {
let mut scope = Vec::new();
let param_lists = enclosing_template_param_lists(function_definition, source);
walk_c_declarator(function_declarator, |event| {
if let DeclaratorEvent::Scope(node) = event {
scope.push(scope_component_text(source, node, ¶m_lists));
}
});
scope
}
fn scope_component_text(source: &str, scope_node: Node<'_>, param_lists: &[Vec<String>]) -> String {
let raw = strip_whitespace(&source[scope_node.byte_range()]);
if scope_node.kind() != "template_type" || param_lists.is_empty() {
return raw;
}
let Some(name_node) = scope_node.child_by_field_name("name") else {
return raw;
};
let Some(args_node) = scope_node.child_by_field_name("arguments") else {
return raw;
};
if template_args_match_any_param_list(source, args_node, param_lists) {
strip_whitespace(&source[name_node.byte_range()])
} else {
raw
}
}
fn enclosing_template_param_lists(node: Node<'_>, source: &str) -> Vec<Vec<String>> {
let mut lists = Vec::new();
let mut current = node;
for _ in 0..MAX_PARENT_WALK_DEPTH {
let Some(parent) = current.parent() else {
break;
};
if parent.kind() == "template_declaration"
&& let Some(params_node) = parent.child_by_field_name("parameters")
{
let mut names = Vec::new();
let mut cursor = params_node.walk();
let mut all_named = true;
for child in params_node.named_children(&mut cursor) {
match template_param_name(source, child) {
Some(n) => names.push(n),
None => {
all_named = false;
break;
}
}
}
if all_named && !names.is_empty() {
lists.push(names);
}
}
current = parent;
}
lists
}
fn template_param_name(source: &str, param: Node<'_>) -> Option<String> {
if param.kind() == "template_template_parameter_declaration" {
let mut cursor = param.walk();
let last_decl = param
.named_children(&mut cursor)
.filter(|c| {
matches!(
c.kind(),
"type_parameter_declaration"
| "variadic_type_parameter_declaration"
| "template_template_parameter_declaration"
)
})
.last();
return last_decl.and_then(|n| template_param_name(source, n));
}
let mut last = None;
let mut cursor = param.walk();
for child in param.named_children(&mut cursor) {
if matches!(child.kind(), "identifier" | "type_identifier") {
last = Some(child);
}
}
last.map(|n| strip_whitespace(&source[n.byte_range()]))
}
fn template_args_match_any_param_list(
source: &str,
args: Node<'_>,
param_lists: &[Vec<String>],
) -> bool {
let mut arg_names = Vec::new();
let mut cursor = args.walk();
for child in args.named_children(&mut cursor) {
let Some(name) = parameter_usage_arg_name(source, child) else {
return false;
};
arg_names.push(name);
}
param_lists.contains(&arg_names)
}
fn parameter_usage_arg_name(source: &str, arg: Node<'_>) -> Option<String> {
if arg.kind() == "parameter_pack_expansion" {
let pattern = arg.child_by_field_name("pattern")?;
return parameter_usage_arg_name(source, pattern);
}
if arg.kind() != "type_descriptor" {
return None;
}
let mut cursor = arg.walk();
let mut only: Option<Node<'_>> = None;
let mut count = 0usize;
for child in arg.named_children(&mut cursor) {
count += 1;
only = Some(child);
if count > 1 {
return None;
}
}
let only = only?;
if only.kind() != "type_identifier" {
return None;
}
Some(strip_whitespace(&source[only.byte_range()]))
}
fn c_signature_hash(language: Language, source: &str, declarator: Node<'_>) -> u64 {
let mut hasher = std::collections::hash_map::DefaultHasher::new();
let name_fd = c_name_bearing_function_declarator(declarator);
let param_hash = name_fd
.and_then(|fd| fd.child_by_field_name("parameters"))
.map(|n| signature_hash_from_parameter_list(language, source, n))
.unwrap_or(0);
param_hash.hash(&mut hasher);
if let Some(fd) = name_fd {
let mut cursor = fd.walk();
for child in fd.children(&mut cursor) {
match child.kind() {
"type_qualifier" | "ref_qualifier" => {
b"@".hash(&mut hasher);
child.kind().hash(&mut hasher);
strip_whitespace(&source[child.byte_range()]).hash(&mut hasher);
}
_ => {}
}
}
}
hasher.finish()
}
fn c_name_bearing_function_declarator(declarator: Node<'_>) -> Option<Node<'_>> {
let mut current = declarator;
let mut last_fd: Option<Node<'_>> = None;
for _ in 0..WALK_LIMIT {
match current.kind() {
"function_declarator" => {
last_fd = Some(current);
let Some(next) = current.child_by_field_name("declarator") else {
return last_fd;
};
current = next;
}
"pointer_declarator" | "reference_declarator" | "parenthesized_declarator" => {
let Some(next) = current.child_by_field_name("declarator") else {
return last_fd;
};
current = next;
}
"qualified_identifier" | "template_function" => {
let Some(next) = current.child_by_field_name("name") else {
return last_fd;
};
current = next;
}
_ => return last_fd,
}
}
last_fd
}
fn emit_c_declarator_shape(node: Node<'_>, out: &mut String) {
match node.kind() {
"identifier" | "field_identifier" | "type_identifier" => {}
"pointer_declarator" | "abstract_pointer_declarator" => out.push('*'),
"reference_declarator" | "abstract_reference_declarator" => out.push('&'),
"array_declarator" | "abstract_array_declarator" => out.push_str("[]"),
"function_declarator" | "abstract_function_declarator" => out.push_str("()"),
"parenthesized_declarator" | "abstract_parenthesized_declarator" => {}
k => {
out.push('<');
out.push_str(k);
out.push('>');
}
}
let mut cursor = node.walk();
for child in node.named_children(&mut cursor) {
emit_c_declarator_shape(child, out);
}
}
fn leaf_item<'a>(
kind: ItemKind,
source: &'a str,
node: Node<'a>,
name_field: &str,
) -> Option<Classified<'a>> {
let name = name_from_field(source, node, name_field)?;
Some(Classified {
kind,
name,
container_body: None,
signature_hash: 0,
extra_scope: Vec::new(),
})
}
fn name_from_field(source: &str, node: Node<'_>, field: &str) -> Option<String> {
let name_node = node.child_by_field_name(field)?;
Some(source[name_node.byte_range()].to_string())
}
fn stripped_name(source: &str, node: Node<'_>, field: &str) -> Option<String> {
name_from_field(source, node, field).map(|n| strip_whitespace(&n))
}
fn strip_whitespace(s: &str) -> String {
s.chars().filter(|c| !c.is_whitespace()).collect()
}
fn signature_hash_from_field(language: Language, source: &str, node: Node<'_>, field: &str) -> u64 {
let Some(params) = node.child_by_field_name(field) else {
return 0;
};
signature_hash_from_parameter_list(language, source, params)
}
fn signature_hash_from_parameter_list(language: Language, source: &str, params: Node<'_>) -> u64 {
let mut hasher = std::collections::hash_map::DefaultHasher::new();
let mut cursor = params.walk();
let mut arity: u64 = 0;
let is_c_family = matches!(language, Language::C | Language::Cpp);
for child in params.named_children(&mut cursor) {
if child.kind() == "comment" {
continue;
}
arity += 1;
child.kind().hash(&mut hasher);
b":".hash(&mut hasher);
let type_text = child
.child_by_field_name("type")
.map(|t| strip_whitespace(&source[t.byte_range()]))
.unwrap_or_else(|| "_".to_string());
type_text.hash(&mut hasher);
if is_c_family {
b"@".hash(&mut hasher);
let mut shape = String::new();
if let Some(decl) = child.child_by_field_name("declarator") {
emit_c_declarator_shape(decl, &mut shape);
}
shape.hash(&mut hasher);
}
b"|".hash(&mut hasher);
}
arity.hash(&mut hasher);
hasher.finish()
}
#[cfg(test)]
mod tests {
use super::*;
fn kinds_set(rules: &dyn LanguageRules) -> std::collections::BTreeSet<&'static str> {
rules
.leading_metadata_kinds()
.iter()
.map(|r| r.kind)
.collect()
}
#[test]
fn rust_leading_metadata_includes_attributes_and_outer_comments() {
let rules = rules_for(Language::Rust).expect("rust rules registered");
let kinds = kinds_set(rules);
assert!(kinds.contains("attribute_item"));
assert!(kinds.contains("line_comment"));
assert!(kinds.contains("block_comment"));
assert_eq!(rules.leading_metadata_kinds().len(), 3);
}
#[test]
fn python_leading_metadata_is_empty() {
let rules = rules_for(Language::Python).expect("python rules registered");
assert!(rules.leading_metadata_kinds().is_empty());
}
#[test]
fn javascript_leading_metadata_is_decorator_only() {
let rules = rules_for(Language::JavaScript).expect("javascript rules registered");
let kinds = kinds_set(rules);
assert_eq!(kinds.len(), 1);
assert!(kinds.contains("decorator"));
}
#[test]
fn typescript_shares_javascript_rules() {
let js_rules = rules_for(Language::JavaScript).expect("js rules registered");
let ts_rules = rules_for(Language::TypeScript).expect("ts rules registered");
assert_eq!(kinds_set(js_rules), kinds_set(ts_rules));
}
#[test]
fn go_leading_metadata_is_comment_only() {
let rules = rules_for(Language::Go).expect("go rules registered");
let kinds = kinds_set(rules);
assert_eq!(kinds.len(), 1);
assert!(kinds.contains("comment"));
}
#[test]
fn c_leading_metadata_is_empty() {
let rules = rules_for(Language::C).expect("c rules registered");
assert!(rules.leading_metadata_kinds().is_empty());
}
#[test]
fn cpp_leading_metadata_is_empty() {
let rules = rules_for(Language::Cpp).expect("cpp rules registered");
assert!(rules.leading_metadata_kinds().is_empty());
}
#[test]
fn java_leading_metadata_includes_annotations_and_comments() {
let rules = rules_for(Language::Java).expect("java rules registered");
let kinds = kinds_set(rules);
assert!(kinds.contains("marker_annotation"));
assert!(kinds.contains("annotation"));
assert!(kinds.contains("line_comment"));
assert!(kinds.contains("block_comment"));
assert_eq!(rules.leading_metadata_kinds().len(), 4);
}
#[test]
fn unknown_language_has_no_rules() {
assert!(rules_for(Language::Unknown).is_none());
}
#[test]
fn leading_metadata_kinds_per_language_is_exhaustive() {
for lang in [
Language::Rust,
Language::Python,
Language::JavaScript,
Language::TypeScript,
Language::Go,
Language::C,
Language::Cpp,
Language::Java,
Language::Unknown,
] {
let expected_empty = match lang {
Language::Rust => false,
Language::Python => true,
Language::JavaScript => false,
Language::TypeScript => false,
Language::Go => false,
Language::C => true,
Language::Cpp => true,
Language::Java => false,
Language::Unknown => continue,
};
let rules = rules_for(lang).expect("non-unknown language has rules");
assert_eq!(
rules.leading_metadata_kinds().is_empty(),
expected_empty,
"leading_metadata_kinds emptiness mismatch for {lang:?}"
);
}
}
}