use std::collections::{HashMap, HashSet};
use argyph_parse::{Import, ParsedFile, Symbol, SymbolKind};
use camino::Utf8PathBuf;
use crate::edge::{Confidence, Edge, EdgeKind};
use crate::resolve::{
python::PythonResolver, rust::RustResolver, typescript::TypeScriptResolver, ImportResolver,
};
pub trait GraphBuilder {
fn build_edges(
&self,
files: &[(Utf8PathBuf, ParsedFile)],
) -> Result<Vec<Edge>, crate::error::GraphError>;
}
pub struct DefaultGraphBuilder;
fn normalized(path: &Utf8PathBuf) -> String {
super::resolve::normalize_path(path.as_str())
}
impl GraphBuilder for DefaultGraphBuilder {
fn build_edges(
&self,
files: &[(Utf8PathBuf, ParsedFile)],
) -> Result<Vec<Edge>, crate::error::GraphError> {
let mut edges = Vec::new();
let mut all_symbols: HashMap<String, Vec<&Symbol>> = HashMap::new();
for (file_path, parsed) in files {
let key = normalized(file_path);
for sym in &parsed.symbols {
all_symbols.entry(key.clone()).or_default().push(sym);
}
}
for (file_path, parsed) in files {
if parsed.symbols.is_empty() {
continue;
}
for sym in &parsed.symbols {
edges.push(Edge {
from: sym.id.clone(),
to: sym.id.clone(),
kind: EdgeKind::Defines,
confidence: Confidence::Resolved,
});
}
let file_symbols: Vec<&Symbol> = parsed.symbols.iter().collect();
build_within_file_references(
&file_symbols,
&parsed.chunks,
&parsed.symbols,
&mut edges,
);
let resolver = resolver_for(file_path);
if let Some(resolver) = resolver {
build_import_edges(
file_path,
&parsed.imports,
&all_symbols,
&*resolver,
&parsed.symbols,
&mut edges,
);
}
build_cross_file_references(file_path, &parsed.imports, &all_symbols, &mut edges);
}
Ok(edges)
}
}
fn resolver_for(file_path: &Utf8PathBuf) -> Option<Box<dyn ImportResolver>> {
let s = file_path.as_str();
if s.ends_with(".rs") {
Some(Box::new(RustResolver))
} else if s.ends_with(".ts") || s.ends_with(".tsx") {
Some(Box::new(TypeScriptResolver))
} else if s.ends_with(".py") {
Some(Box::new(PythonResolver))
} else {
None
}
}
struct WordIndex {
mentioned: HashSet<String>,
called: HashSet<String>,
}
fn index_words(text: &str) -> WordIndex {
let mut mentioned = HashSet::new();
let mut called = HashSet::new();
let bytes = text.as_bytes();
let mut i = 0;
while i < bytes.len() {
let b = bytes[i];
let is_ident_start = b == b'_' || b.is_ascii_alphabetic();
if !is_ident_start {
i += 1;
continue;
}
let start = i;
while i < bytes.len() {
let c = bytes[i];
if c == b'_' || c.is_ascii_alphanumeric() {
i += 1;
} else {
break;
}
}
let word = &text[start..i];
if bytes.get(i) == Some(&b'(') {
called.insert(word.to_string());
}
mentioned.insert(word.to_string());
}
WordIndex { mentioned, called }
}
fn build_within_file_references(
symbols: &[&Symbol],
chunks: &[argyph_parse::Chunk],
all_file_symbols: &[Symbol],
edges: &mut Vec<Edge>,
) {
for sym in symbols {
if sym.kind == SymbolKind::Variable || sym.kind == SymbolKind::Constant {
continue;
}
let mut index = WordIndex {
mentioned: HashSet::new(),
called: HashSet::new(),
};
for chunk in chunks {
if range_overlap(&sym.range, &chunk.range) {
let wi = index_words(&chunk.text);
index.mentioned.extend(wi.mentioned);
index.called.extend(wi.called);
}
}
for other in all_file_symbols {
if other.id == sym.id {
continue;
}
if index.mentioned.contains(other.name.as_str()) {
edges.push(Edge {
from: sym.id.clone(),
to: other.id.clone(),
kind: EdgeKind::References,
confidence: Confidence::Heuristic,
});
}
let is_callable = matches!(other.kind, SymbolKind::Function | SymbolKind::Method);
if is_callable && index.called.contains(other.name.as_str()) {
edges.push(Edge {
from: sym.id.clone(),
to: other.id.clone(),
kind: EdgeKind::Calls,
confidence: Confidence::Heuristic,
});
}
}
}
}
fn build_import_edges(
source_file: &Utf8PathBuf,
imports: &[Import],
all_symbols: &HashMap<String, Vec<&Symbol>>,
resolver: &dyn ImportResolver,
source_symbols: &[Symbol],
edges: &mut Vec<Edge>,
) {
for import in imports {
let resolved = resolver.resolve_import(source_file, &import.module_path, &import.raw);
let target_file = match resolved {
Some(t) => super::resolve::normalize_path(&t.file_path),
None => continue,
};
let target_symbols = all_symbols.get(&target_file);
let Some(target_symbols) = target_symbols else {
continue;
};
for source_sym in source_symbols {
for item_name in &import.items {
if let Some(target_sym) = target_symbols
.iter()
.find(|s| s.name.as_str() == item_name.as_str())
{
edges.push(Edge {
from: source_sym.id.clone(),
to: target_sym.id.clone(),
kind: EdgeKind::Imports,
confidence: Confidence::Heuristic,
});
edges.push(Edge {
from: target_sym.id.clone(),
to: source_sym.id.clone(),
kind: EdgeKind::ImportedBy,
confidence: Confidence::Heuristic,
});
}
}
}
}
}
fn build_cross_file_references(
_source_file: &Utf8PathBuf,
_imports: &[Import],
_all_symbols: &HashMap<String, Vec<&Symbol>>,
_edges: &mut [Edge],
) {
}
fn range_overlap(a: &argyph_parse::ByteRange, b: &argyph_parse::ByteRange) -> bool {
a.start < b.end && b.start < a.end
}
#[cfg(test)]
#[allow(clippy::unwrap_used, clippy::expect_used)]
mod tests {
use super::*;
use argyph_parse::SymbolId;
fn make_symbol(name: &str, kind: SymbolKind, file: &str, start: usize, end: usize) -> Symbol {
use argyph_parse::ByteRange;
Symbol {
id: SymbolId::new(&Utf8PathBuf::from(file), name, start),
name: name.to_string(),
kind,
file: Utf8PathBuf::from(file),
range: ByteRange::new(start, end),
signature: None,
parent: None,
}
}
fn make_chunk(text: &str, file: &str, start: usize, end: usize) -> argyph_parse::Chunk {
use argyph_parse::{ByteRange, Chunk, ChunkId, ChunkKind};
Chunk {
id: ChunkId::from_text(text),
file: Utf8PathBuf::from(file),
range: ByteRange::new(start, end),
text: text.to_string(),
kind: ChunkKind::FunctionBody,
language: argyph_fs::Language::Rust,
}
}
#[test]
fn every_symbol_gets_defines_edge() {
let sym = make_symbol("foo", SymbolKind::Function, "src/lib.rs", 0, 10);
let parsed = ParsedFile {
symbols: vec![sym],
chunks: vec![],
imports: vec![],
};
let builder = DefaultGraphBuilder;
let edges = builder
.build_edges(&[(Utf8PathBuf::from("src/lib.rs"), parsed)])
.expect("build_edges");
let defines: Vec<&Edge> = edges
.iter()
.filter(|e| e.kind == EdgeKind::Defines)
.collect();
assert_eq!(defines.len(), 1);
assert_eq!(defines[0].from, defines[0].to);
assert_eq!(defines[0].confidence, Confidence::Resolved);
}
#[test]
fn word_index_matches_whole_identifiers() {
let wi = index_words("let x = foo + 1");
assert!(wi.mentioned.contains("foo"));
let wi = index_words("let x = foobar + 1");
assert!(!wi.mentioned.contains("foo"));
assert!(wi.mentioned.contains("foobar"));
let wi = index_words("snafoo()");
assert!(!wi.mentioned.contains("foo"));
assert!(wi.mentioned.contains("snafoo"));
}
#[test]
fn word_index_detects_calls() {
let wi = index_words("void foo(a, b)");
assert!(wi.called.contains("foo"));
let wi = index_words("let x = foo(1, 2)");
assert!(wi.called.contains("foo"));
let wi = index_words("let x = foo");
assert!(wi.mentioned.contains("foo"));
assert!(!wi.called.contains("foo"));
let wi = index_words("foo_bar()");
assert!(wi.called.contains("foo_bar"));
assert!(!wi.called.contains("foo"));
}
#[test]
fn detect_within_file_reference() {
let sym_a = make_symbol("helper", SymbolKind::Function, "src/lib.rs", 0, 50);
let sym_b = make_symbol("main_func", SymbolKind::Function, "src/lib.rs", 60, 200);
let chunk = make_chunk("let x = helper(1);", "src/lib.rs", 70, 190);
let parsed = ParsedFile {
symbols: vec![sym_a, sym_b],
chunks: vec![chunk],
imports: vec![],
};
let builder = DefaultGraphBuilder;
let edges = builder
.build_edges(&[(Utf8PathBuf::from("src/lib.rs"), parsed)])
.expect("build_edges");
let refs: Vec<&Edge> = edges
.iter()
.filter(|e| e.kind == EdgeKind::References)
.collect();
assert!(!refs.is_empty(), "expected at least one reference edge");
}
}