use crate::code_tree::models::Annotation;
use aho_corasick::AhoCorasick;
use regex::Regex;
use std::collections::HashSet;
use std::path::Path;
use std::sync::OnceLock;
use tree_sitter::Node;
pub(super) fn file_to_module_path(filepath: &Path, src_root: &Path, separator: char) -> String {
let stem = filepath.file_stem().and_then(|o| o.to_str()).unwrap_or("");
let pkg = src_root.file_name().and_then(|o| o.to_str()).unwrap_or("");
match (pkg.is_empty(), stem.is_empty()) {
(true, _) => stem.to_string(),
(false, true) => pkg.to_string(),
(false, false) => format!("{pkg}{separator}{stem}"),
}
}
pub(super) fn make_qualified(
module_path: &str,
owner_prefix: &str,
name: &str,
separator: char,
) -> String {
match (module_path.is_empty(), owner_prefix.is_empty()) {
(true, true) => name.to_string(),
(true, false) => format!("{owner_prefix}{separator}{name}"),
(false, true) => format!("{module_path}{separator}{name}"),
(false, false) => format!("{owner_prefix}{separator}{name}"),
}
}
fn has_annotation_keyword(source: &[u8]) -> bool {
static AC: OnceLock<AhoCorasick> = OnceLock::new();
let ac = AC.get_or_init(|| {
AhoCorasick::builder()
.ascii_case_insensitive(true)
.build([
"TODO", "FIXME", "HACK", "SAFETY", "XXX", "BUG", "NOTE", "WARNING",
])
.expect("aho-corasick patterns compile")
});
ac.is_match(source)
}
pub fn is_test_path(rel_path: &str, filename: &str, suffix_patterns: &[&str]) -> bool {
if suffix_patterns.iter().any(|s| filename.ends_with(s)) {
return true;
}
let normalised = rel_path.replace('\\', "/");
for segment in normalised.split('/') {
if matches!(segment, "test" | "tests" | "__tests__" | "spec" | "specs") {
return true;
}
}
false
}
#[inline]
pub fn node_text<'a>(node: Node<'a>, source: &'a [u8]) -> &'a str {
std::str::from_utf8(&source[node.byte_range()]).unwrap_or("")
}
pub fn count_lines(source: &[u8]) -> u32 {
let newlines = bytecount_newlines(source);
if !source.is_empty() && !source.ends_with(b"\n") {
newlines + 1
} else {
newlines
}
}
#[inline]
fn bytecount_newlines(src: &[u8]) -> u32 {
src.iter().filter(|&&b| b == b'\n').count() as u32
}
pub fn get_type_parameters(node: Node, source: &[u8], node_type: &str) -> Option<String> {
let mut cursor = node.walk();
for child in node.children(&mut cursor) {
if child.kind() == node_type {
let text = node_text(child, source).trim();
let stripped = if let Some(inner) =
text.strip_prefix('<').and_then(|s| s.strip_suffix('>'))
{
inner.trim()
} else if let Some(inner) = text.strip_prefix('[').and_then(|s| s.strip_suffix(']')) {
inner.trim()
} else {
text
};
if stripped.is_empty() {
return None;
}
return Some(stripped.to_string());
}
}
None
}
fn annotation_regex() -> &'static Regex {
static RE: OnceLock<Regex> = OnceLock::new();
RE.get_or_init(|| {
Regex::new(r"(?i)\b(TODO|FIXME|HACK|SAFETY|XXX|BUG|NOTE|WARNING)\b[:\s]*(.*)")
.expect("annotation regex compiles")
})
}
pub const DEFAULT_COMMENT_TYPES: &[&str] = &["line_comment", "block_comment", "comment"];
pub fn extract_comment_annotations(
root: Node,
source: &[u8],
comment_types: &[&str],
) -> Option<Vec<Annotation>> {
if !has_annotation_keyword(source) {
return None;
}
let re = annotation_regex();
let mut out: Vec<Annotation> = Vec::new();
let mut stack: Vec<Node> = vec![root];
while let Some(node) = stack.pop() {
let kind = node.kind();
if comment_types.iter().any(|t| *t == kind) {
let text = node_text(node, source);
for caps in re.captures_iter(text) {
let kind = caps
.get(1)
.map(|m| m.as_str().to_ascii_uppercase())
.unwrap_or_default();
let body = caps.get(2).map(|m| m.as_str().trim()).unwrap_or("");
let truncated: String = body.chars().take(200).collect();
out.push(Annotation {
kind,
text: truncated,
line: node.start_position().row as u32 + 1,
});
}
}
let mut cursor = node.walk();
let children: Vec<Node> = node.children(&mut cursor).collect();
for child in children.into_iter().rev() {
stack.push(child);
}
}
if out.is_empty() {
None
} else {
Some(out)
}
}
pub const BRANCH_KINDS_PYTHON: &[&str] = &[
"if_statement",
"elif_clause",
"while_statement",
"for_statement",
"except_clause",
"case_clause",
"conditional_expression", "boolean_operator", ];
pub const BRANCH_KINDS_RUST: &[&str] = &[
"if_expression",
"while_expression",
"while_let_expression",
"for_expression",
"loop_expression",
"match_arm",
"try_expression", ];
pub const BRANCH_KINDS_GO: &[&str] = &[
"if_statement",
"for_statement",
"expression_case",
"type_case",
"communication_case",
];
pub const BRANCH_KINDS_JAVA: &[&str] = &[
"if_statement",
"while_statement",
"for_statement",
"enhanced_for_statement",
"do_statement",
"switch_label",
"catch_clause",
"ternary_expression",
];
pub const BRANCH_KINDS_TS: &[&str] = &[
"if_statement",
"while_statement",
"for_statement",
"for_in_statement",
"for_of_statement",
"do_statement",
"switch_case",
"catch_clause",
"ternary_expression",
];
pub const BRANCH_KINDS_CPP: &[&str] = &[
"if_statement",
"while_statement",
"for_statement",
"for_range_loop",
"do_statement",
"case_statement",
"catch_clause",
"conditional_expression",
];
pub const BRANCH_KINDS_CSHARP: &[&str] = &[
"if_statement",
"while_statement",
"for_statement",
"for_each_statement",
"do_statement",
"switch_section",
"catch_clause",
"conditional_expression",
];
pub const BRANCH_KINDS_DART: &[&str] = &[
"if_statement",
"while_statement",
"for_statement",
"do_statement",
"switch_statement_case",
"switch_expression_case",
"catch_clause",
"conditional_expression",
"if_null_expression",
"logical_and_expression",
"logical_or_expression",
"if_element",
"for_element",
];
pub fn compute_complexity(
body: Node,
branch_kinds: &[&str],
nested_scope_kinds: &[&str],
) -> (u32, u32) {
let branch_set: HashSet<&str> = branch_kinds.iter().copied().collect();
let nested_set: HashSet<&str> = nested_scope_kinds.iter().copied().collect();
let mut count: u32 = 0;
let mut max_depth: u32 = 0;
walk(
body,
&branch_set,
&nested_set,
0,
&mut count,
&mut max_depth,
);
(count, max_depth)
}
fn walk(
node: Node,
branches: &HashSet<&str>,
nested: &HashSet<&str>,
depth: u32,
count: &mut u32,
max_depth: &mut u32,
) {
let kind = node.kind();
let is_branch = branches.contains(kind);
let next_depth = if is_branch {
*count += 1;
let d = depth + 1;
if d > *max_depth {
*max_depth = d;
}
d
} else {
depth
};
let mut cursor = node.walk();
for child in node.children(&mut cursor) {
if nested.contains(child.kind()) {
continue;
}
walk(child, branches, nested, next_depth, count, max_depth);
}
}
pub fn looks_like_macro_decorator(text: &str) -> bool {
if text.len() < 2 {
return false;
}
let mut chars = text.chars();
let first = chars.next().unwrap();
if !(first.is_ascii_uppercase() || first == '_') {
return false;
}
text.chars()
.all(|c| c.is_ascii_uppercase() || c == '_' || c.is_ascii_digit())
}
fn procedure_annotation_regex() -> &'static Regex {
static RE: OnceLock<Regex> = OnceLock::new();
RE.get_or_init(|| {
Regex::new(r"(?m)^\s*@(?:cypher_)?procedure[:\s]+([a-zA-Z_][a-zA-Z0-9_]*)")
.expect("procedure annotation regex compiles")
})
}
pub fn extract_procedure_annotations(docstring: Option<&str>) -> Vec<String> {
let Some(text) = docstring else {
return Vec::new();
};
procedure_annotation_regex()
.captures_iter(text)
.filter_map(|c| c.get(1).map(|m| m.as_str().to_string()))
.collect()
}
fn generated_marker_regex() -> &'static Regex {
static RE: OnceLock<Regex> = OnceLock::new();
RE.get_or_init(|| {
Regex::new(concat!(
r"(?i)(",
r"\bcode\s+generated\b",
r"|\bdo\s+not\s+edit\b",
r"|\bautomatically\s+generated\b",
r"|<\s*auto-generated\s*>",
r"|@generated\b",
r"|\bgenerated\s+by\s+openapi",
r"|^\s*(?://|#|/\*|\*|--|<!--)\s*Generated\s+by\b",
r")",
))
.expect("generated marker regex compiles")
})
}
fn is_comment_line(line: &str) -> bool {
let t = line.trim_start();
t.starts_with("//")
|| t.starts_with('#')
|| t.starts_with("/*")
|| t.starts_with('*')
|| t.starts_with("--")
|| t.starts_with("<!--")
|| t.starts_with(';')
}
pub fn is_generated_or_minified(source: &[u8]) -> Option<&'static str> {
if source.is_empty() {
return None;
}
let head_len = source.len().min(4096);
let head_str = std::str::from_utf8(&source[..head_len]).unwrap_or("");
let re = generated_marker_regex();
let mut non_empty_seen = 0;
for line in head_str.lines() {
let trimmed = line.trim();
if trimmed.is_empty() {
continue;
}
non_empty_seen += 1;
if non_empty_seen > 10 {
break;
}
if is_comment_line(line) && re.is_match(line) {
return Some("generated");
}
}
if source.len() >= 1024 {
let newline_count = source.iter().filter(|&&b| b == b'\n').count();
if newline_count == 0 {
return Some("minified");
}
}
let mut lines_seen: u32 = 0;
let mut total_width: u64 = 0;
for raw_line in source.split(|&b| b == b'\n') {
if raw_line.is_empty() {
continue;
}
total_width += raw_line.len() as u64;
lines_seen += 1;
if lines_seen >= 50 {
break;
}
}
if lines_seen >= 5 {
let avg = total_width / lines_seen as u64;
if avg > 500 {
return Some("minified");
}
}
None
}