#![cfg_attr(coverage_nightly, coverage(off))]
use std::path::Path;
use tracing::info;
use super::SimpleDeepContext;
impl SimpleDeepContext {
pub(super) fn extract_names_by_regex(content: &str, patterns: &[&str]) -> Vec<String> {
let mut names = Vec::new();
for pattern in patterns {
if let Ok(re) = regex::Regex::new(pattern) {
for cap in re.captures_iter(content) {
if let Some(name) = cap.get(1) {
names.push(name.as_str().to_string());
}
}
}
}
names
}
pub(super) fn extract_js_ts_function_names(content: &str, file_path: &Path) -> Vec<String> {
let patterns = [
r"function\s+(\w+)\s*\(",
r"(?m)^\s*(?:const|let|var)\s+(\w+)\s*=\s*(?:async\s+)?\([^)]*\)\s*=>",
r"(?m)^\s*(?:async\s+)?(\w+)\s*\([^)]*\)\s*\{",
r"(?m)^\s*(?:static\s+)?(\w+)\s*\([^)]*\)\s*\{",
r"(\w+)\s*:\s*function\s*\([^)]*\)",
r"(\w+)\s*\([^)]*\)\s*\{",
r"(?m)^\s*(?:async\s+)?(\w+)\s*\([^)]*\)\s*:",
];
info!(
"Using comprehensive TypeScript/JavaScript regex patterns for {}",
file_path.display()
);
let mut function_names = Vec::new();
for pattern in &patterns {
if let Ok(re) = regex::Regex::new(pattern) {
for cap in re.captures_iter(content) {
if let Some(name) = cap.get(1) {
let name_str = name.as_str().to_string();
if !function_names.contains(&name_str) {
function_names.push(name_str);
}
}
}
}
}
function_names
}
pub(super) async fn extract_function_names_heuristic(
&self,
file_path: &Path,
extension: &str,
) -> anyhow::Result<Vec<String>> {
use tokio::fs;
let content = fs::read_to_string(file_path).await?;
if matches!(extension, "js" | "ts") {
return Ok(Self::extract_js_ts_function_names(&content, file_path));
}
let patterns: &[&str] = match extension {
"py" => &[r"(?m)^\s*(?:async\s+)?def\s+(\w+)\s*\("],
"java" => &[
r"(?:public|private|protected)\s+(?:static\s+)?(?:\w+(?:<[^>]*>)?\s+)+(\w+)\s*\([^)]*\)\s*\{",
],
"go" => &[r"(?m)^func\s+(?:\([^)]*\)\s+)?(\w+)\s*\("],
"c" | "cpp" | "cc" | "cxx" | "cu" | "cuh" => &[r"(?m)^\s*\w+(?:\s*\**)?\s+(\w+)\s*\([^)]*\)\s*\{"],
"rb" | "ruchy" => &[r"(?m)^\s*def\s+(\w+)"],
"kt" => &[r"(?m)^\s*(?:suspend\s+)?fun\s+(\w+)\s*\("],
"cs" => &[
r"(?:public|private|protected|internal)?\s*(?:static|async)?\s*\w+\s+(\w+)\s*\([^)]*\)",
],
"lua" => &[
r"(?m)^\s*function\s+(\w+(?:[.:]\w+)*)\s*\(",
r"(?m)^\s*local\s+function\s+(\w+)\s*\(",
],
"lean" => &[
r"(?m)^\s*(?:noncomputable\s+|partial\s+|private\s+|protected\s+)?def\s+(\w+)",
r"(?m)^\s*(?:private\s+)?theorem\s+(\w+)",
r"(?m)^\s*(?:private\s+)?lemma\s+(\w+)",
r"(?m)^\s*(?:structure|class|inductive)\s+(\w+)",
],
_ => return Ok(vec![]),
};
let mut names = Self::extract_names_by_regex(&content, patterns);
let keywords: &[&str] = match extension {
"c" | "cpp" | "cc" | "cxx" | "cu" | "cuh" => &["if", "for", "while", "switch", "catch"],
"cs" => &["if", "while", "for", "foreach", "switch"],
_ => &[],
};
if !keywords.is_empty() {
names.retain(|n| !keywords.contains(&n.as_str()));
}
Ok(names)
}
pub(super) async fn analyze_file_complexity_heuristic(
&self,
file_path: &Path,
extension: &str,
) -> anyhow::Result<(usize, usize, f64)> {
use tokio::fs;
let content = fs::read_to_string(file_path).await?;
let function_patterns = match extension {
"py" => vec![r"(?m)^\s*def\s+\w+", r"(?m)^\s*async\s+def\s+\w+"],
"js" | "ts" => vec![
r"function\s+\w+",
r"(?m)^\s*const\s+\w+\s*=.*=>",
r"(?m)^\s*\w+\s*\([^)]*\)\s*\{",
],
"java" => vec![r"(public|private|protected)\s+\w+\s+\w+\s*\("],
"go" => vec![r"(?m)^func\s+(\(\w+\s+\*?\w+\)\s+)?\w+\s*\("],
"c" | "cpp" | "cc" | "cxx" | "cu" | "cuh" => vec![r"(?m)^\w+\s+\w+\s*\([^)]*\)\s*\{"],
"cs" => {
vec![r"(public|private|protected|internal)?\s*(static|async)?\s*\w+\s+\w+\s*\("]
}
"kt" => vec![r"(?m)^\s*(?:suspend\s+)?fun\s+\w+\s*\("],
"lua" => vec![r"(?m)^\s*function\s+\w+", r"(?m)^\s*local\s+function\s+\w+"],
"lean" => vec![
r"(?m)^\s*(?:noncomputable\s+|partial\s+|private\s+|protected\s+)?def\s+\w+",
r"(?m)^\s*(?:private\s+)?theorem\s+\w+",
r"(?m)^\s*(?:private\s+)?lemma\s+\w+",
],
_ => vec![],
};
if function_patterns.is_empty() {
return Ok((0, 0, 0.0));
}
let mut function_count = 0;
let mut complexity_sum = 0;
let mut high_complexity_count = 0;
for pattern in function_patterns {
if let Ok(re) = regex::Regex::new(pattern) {
for cap in re.captures_iter(&content) {
function_count += 1;
if let Some(func_match) = cap.get(0) {
let start = func_match.start();
let func_end = self
.find_function_end(content.get(start..).unwrap_or_default(), extension);
if let Some(end) = func_end {
let func_body = content.get(start..start + end).unwrap_or_default();
let complexity = self.estimate_complexity(func_body, extension);
complexity_sum += complexity;
if complexity > 10 {
high_complexity_count += 1;
}
}
}
}
}
}
let avg_complexity = if function_count > 0 {
complexity_sum as f64 / function_count as f64
} else {
0.0
};
Ok((function_count, high_complexity_count, avg_complexity))
}
pub(super) fn find_function_end(&self, content: &str, extension: &str) -> Option<usize> {
match extension {
"py" => Self::find_function_end_python(content),
"lua" => Self::find_function_end_lua(content),
_ => Self::find_function_end_brace(content),
}
}
pub(super) fn find_function_end_python(content: &str) -> Option<usize> {
let lines: Vec<&str> = content.lines().collect();
if lines.is_empty() {
return None;
}
let first_indent = lines[0].len() - lines[0].trim_start().len();
for (i, line) in lines.iter().enumerate().skip(1) {
if !line.trim().is_empty() {
let indent = line.len() - line.trim_start().len();
if indent <= first_indent {
return Some(lines[..i].join("\n").len());
}
}
}
Some(content.len())
}
pub(super) fn find_function_end_lua(content: &str) -> Option<usize> {
let mut depth = 0;
for (i, line) in content.lines().enumerate() {
let trimmed = line.trim();
if trimmed.starts_with("function ")
|| trimmed.starts_with("local function ")
|| trimmed.starts_with("if ")
|| trimmed.starts_with("for ")
|| trimmed.starts_with("while ")
|| trimmed == "do"
|| trimmed.starts_with("do ")
{
depth += 1;
}
if trimmed == "end" || trimmed.starts_with("end ") || trimmed.starts_with("end,") {
depth -= 1;
if depth <= 0 {
let byte_offset: usize = content.lines().take(i + 1).map(|l| l.len() + 1).sum();
return Some(byte_offset);
}
}
}
Some(content.len())
}
pub(super) fn find_function_end_brace(content: &str) -> Option<usize> {
let mut depth = 0i32;
let mut in_string = false;
let mut escape = false;
for (i, ch) in content.chars().enumerate() {
if escape {
escape = false;
continue;
}
if ch == '\\' && in_string {
escape = true;
continue;
}
if ch == '"' {
in_string = !in_string;
continue;
}
if in_string {
continue;
}
if ch == '{' {
depth += 1;
}
if ch == '}' {
depth -= 1;
if depth == 0 {
return Some(i + 1);
}
}
}
None
}
pub(super) fn estimate_complexity(&self, func_body: &str, extension: &str) -> usize {
let control_flow_keywords = match extension {
"py" => vec![
"if ", "elif ", "else:", "for ", "while ", "try:", "except:", "finally:",
],
"js" | "ts" => vec![
"if ", "else ", "for ", "while ", "do ", "switch ", "case ", "catch ", "finally ",
],
"java" | "c" | "cpp" | "cu" | "go" => vec![
"if ", "else ", "for ", "while ", "do ", "switch ", "case ", "catch ", "finally ",
],
"lua" => vec![
"if ", "elseif ", "else", "for ", "while ", "repeat", "until ",
],
_ => vec![],
};
let mut complexity = 1;
for keyword in control_flow_keywords {
complexity += func_body.matches(keyword).count();
}
complexity += func_body.matches("&&").count();
complexity += func_body.matches("||").count();
if extension == "lua" {
complexity += func_body.matches(" and ").count();
complexity += func_body.matches(" or ").count();
}
complexity
}
}