use crate::language::get_language_by_extension;
use lazy_static::lazy_static;
use regex::Regex;
use std::collections::HashSet;
use std::path::Path;
lazy_static! {
static ref CALL_PATTERN: Regex = Regex::new(r"\b([a-zA-Z_][a-zA-Z0-9_]*)\s*\(")
.expect("Failed to compile CALL_PATTERN regex");
static ref IDENTIFIER_RE: Regex =
Regex::new(r"\b([a-zA-Z_][a-zA-Z0-9_]*)\b").expect("Failed to compile IDENTIFIER_RE regex");
}
fn get_file_extension(file_path: &str) -> &str {
Path::new(file_path)
.extension()
.and_then(|e| e.to_str())
.unwrap_or("")
}
pub fn is_keyword_or_builtin(name: &str) -> bool {
matches!(
name,
"if" | "else"
| "for"
| "while"
| "loop"
| "match"
| "switch"
| "case"
| "return"
| "break"
| "continue"
| "fn"
| "function"
| "def"
| "func"
| "class"
| "struct"
| "impl"
| "trait"
| "interface"
| "enum"
| "type"
| "let"
| "const"
| "var"
| "mut"
| "pub"
| "public"
| "private"
| "protected"
| "static"
| "async"
| "await"
| "try"
| "catch"
| "throw"
| "new"
| "import"
| "export"
| "use"
| "from"
| "require"
| "include"
| "true"
| "false"
| "null"
| "None"
| "nil"
| "undefined"
| "self"
| "this"
| "super"
| "println"
| "print"
| "printf"
| "console"
| "String"
| "Vec"
| "Option"
| "Result"
| "Ok"
| "Err"
| "Some"
| "len"
| "append"
| "push"
| "pop"
| "get"
| "set"
| "map"
| "filter"
)
}
pub fn extract_identifier_from_match(caps: ®ex::Captures) -> Option<String> {
let name = caps.get(1).map(|m| m.as_str().to_string()).or_else(|| {
caps.get(0).and_then(|m| {
m.as_str()
.split_whitespace()
.last()
.map(|s| s.trim_matches(|c: char| !c.is_alphanumeric() && c != '_'))
.filter(|s| !s.is_empty())
.map(String::from)
})
})?;
if is_keyword_or_builtin(&name) {
return None;
}
Some(name)
}
pub fn extract_functions(content: &str, file_path: &str) -> Vec<(String, usize)> {
let ext = get_file_extension(file_path);
let mut functions = Vec::new();
if let Some(lang) = get_language_by_extension(ext) {
for pattern in lang.function_patterns {
if let Ok(re) = Regex::new(pattern) {
for (line_num, line) in content.lines().enumerate() {
if let Some(caps) = re.captures(line) {
if let Some(name) = extract_identifier_from_match(&caps) {
functions.push((name, line_num + 1));
}
}
}
}
}
}
functions
}
pub fn extract_classes(content: &str, file_path: &str) -> Vec<(String, usize)> {
let ext = get_file_extension(file_path);
let mut classes = Vec::new();
if let Some(lang) = get_language_by_extension(ext) {
for pattern in lang.class_patterns {
if let Ok(re) = Regex::new(pattern) {
for (line_num, line) in content.lines().enumerate() {
if let Some(caps) = re.captures(line) {
if let Some(name) = extract_identifier_from_match(&caps) {
classes.push((name, line_num + 1));
}
}
}
}
}
}
classes
}
pub fn extract_function_calls(line: &str) -> HashSet<String> {
let mut calls = HashSet::new();
for cap in CALL_PATTERN.captures_iter(line) {
if let Some(name) = cap.get(1) {
let called = name.as_str().to_string();
if !is_keyword_or_builtin(&called) {
calls.insert(called);
}
}
}
calls
}
pub fn extract_identifier_references(content: &str) -> HashSet<String> {
let mut references = HashSet::new();
for cap in IDENTIFIER_RE.captures_iter(content) {
if let Some(name) = cap.get(1) {
let ref_name = name.as_str().to_string();
if !is_keyword_or_builtin(&ref_name) {
references.insert(ref_name);
}
}
}
references
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_is_keyword_or_builtin() {
assert!(is_keyword_or_builtin("if"));
assert!(is_keyword_or_builtin("function"));
assert!(!is_keyword_or_builtin("myFunction"));
}
#[test]
fn test_extract_function_calls() {
let line = "let result = calculate(x) + process(y);";
let calls = extract_function_calls(line);
assert!(calls.contains("calculate"));
assert!(calls.contains("process"));
assert!(!calls.contains("let"));
}
#[test]
fn test_extract_identifier_references() {
let content = "let x = 10; let y = x + 5;";
let refs = extract_identifier_references(content);
assert!(refs.contains("x"));
assert!(refs.contains("y"));
assert!(!refs.contains("let"));
}
}