Skip to main content

argyph_graph/
builder.rs

1use std::collections::{HashMap, HashSet};
2
3use argyph_parse::{Import, ParsedFile, Symbol, SymbolKind};
4use camino::Utf8PathBuf;
5
6use crate::edge::{Confidence, Edge, EdgeKind};
7use crate::resolve::{
8    python::PythonResolver, rust::RustResolver, typescript::TypeScriptResolver, ImportResolver,
9};
10
11pub trait GraphBuilder {
12    fn build_edges(
13        &self,
14        files: &[(Utf8PathBuf, ParsedFile)],
15    ) -> Result<Vec<Edge>, crate::error::GraphError>;
16}
17
18pub struct DefaultGraphBuilder;
19
20fn normalized(path: &Utf8PathBuf) -> String {
21    super::resolve::normalize_path(path.as_str())
22}
23
24impl GraphBuilder for DefaultGraphBuilder {
25    fn build_edges(
26        &self,
27        files: &[(Utf8PathBuf, ParsedFile)],
28    ) -> Result<Vec<Edge>, crate::error::GraphError> {
29        let mut edges = Vec::new();
30        let mut all_symbols: HashMap<String, Vec<&Symbol>> = HashMap::new();
31
32        for (file_path, parsed) in files {
33            let key = normalized(file_path);
34            for sym in &parsed.symbols {
35                all_symbols.entry(key.clone()).or_default().push(sym);
36            }
37        }
38
39        for (file_path, parsed) in files {
40            if parsed.symbols.is_empty() {
41                continue;
42            }
43
44            for sym in &parsed.symbols {
45                edges.push(Edge {
46                    from: sym.id.clone(),
47                    to: sym.id.clone(),
48                    kind: EdgeKind::Defines,
49                    confidence: Confidence::Resolved,
50                });
51            }
52
53            let file_symbols: Vec<&Symbol> = parsed.symbols.iter().collect();
54            build_within_file_references(
55                &file_symbols,
56                &parsed.chunks,
57                &parsed.symbols,
58                &mut edges,
59            );
60
61            let resolver = resolver_for(file_path);
62            if let Some(resolver) = resolver {
63                build_import_edges(
64                    file_path,
65                    &parsed.imports,
66                    &all_symbols,
67                    &*resolver,
68                    &parsed.symbols,
69                    &mut edges,
70                );
71            }
72
73            build_cross_file_references(file_path, &parsed.imports, &all_symbols, &mut edges);
74        }
75
76        Ok(edges)
77    }
78}
79
80fn resolver_for(file_path: &Utf8PathBuf) -> Option<Box<dyn ImportResolver>> {
81    let s = file_path.as_str();
82    if s.ends_with(".rs") {
83        Some(Box::new(RustResolver))
84    } else if s.ends_with(".ts") || s.ends_with(".tsx") {
85        Some(Box::new(TypeScriptResolver))
86    } else if s.ends_with(".py") {
87        Some(Box::new(PythonResolver))
88    } else {
89        None
90    }
91}
92
93/// All distinct identifier words in a span of text, plus the subset of
94/// those words that appear immediately followed by `(` (a call site).
95struct WordIndex {
96    mentioned: HashSet<String>,
97    called: HashSet<String>,
98}
99
100/// Tokenize `text` once into its identifier words. An identifier is the
101/// usual `[A-Za-z_][A-Za-z0-9_]*`. A word is recorded as "called" when
102/// the character immediately after it is `(`, matching the previous
103/// `name(` substring heuristic.
104fn index_words(text: &str) -> WordIndex {
105    let mut mentioned = HashSet::new();
106    let mut called = HashSet::new();
107    let bytes = text.as_bytes();
108    let mut i = 0;
109    while i < bytes.len() {
110        let b = bytes[i];
111        let is_ident_start = b == b'_' || b.is_ascii_alphabetic();
112        if !is_ident_start {
113            i += 1;
114            continue;
115        }
116        let start = i;
117        while i < bytes.len() {
118            let c = bytes[i];
119            if c == b'_' || c.is_ascii_alphanumeric() {
120                i += 1;
121            } else {
122                break;
123            }
124        }
125        // `start..i` is ASCII-only by construction, so slicing is safe.
126        let word = &text[start..i];
127        if bytes.get(i) == Some(&b'(') {
128            called.insert(word.to_string());
129        }
130        mentioned.insert(word.to_string());
131    }
132    WordIndex { mentioned, called }
133}
134
135/// Resolve within-file references and calls.
136///
137/// Previously this scanned every symbol's chunk text for every other
138/// symbol's name (`O(symbols² × text-length)`), which did not scale to
139/// symbol-dense files. Now each symbol's owning chunks are tokenized
140/// once into a `WordIndex`, and every cross-symbol check is an O(1)
141/// hash-set membership test.
142fn build_within_file_references(
143    symbols: &[&Symbol],
144    chunks: &[argyph_parse::Chunk],
145    all_file_symbols: &[Symbol],
146    edges: &mut Vec<Edge>,
147) {
148    for sym in symbols {
149        if sym.kind == SymbolKind::Variable || sym.kind == SymbolKind::Constant {
150            continue;
151        }
152
153        // Tokenize this symbol's owning chunks exactly once.
154        let mut index = WordIndex {
155            mentioned: HashSet::new(),
156            called: HashSet::new(),
157        };
158        for chunk in chunks {
159            if range_overlap(&sym.range, &chunk.range) {
160                let wi = index_words(&chunk.text);
161                index.mentioned.extend(wi.mentioned);
162                index.called.extend(wi.called);
163            }
164        }
165
166        for other in all_file_symbols {
167            if other.id == sym.id {
168                continue;
169            }
170
171            if index.mentioned.contains(other.name.as_str()) {
172                edges.push(Edge {
173                    from: sym.id.clone(),
174                    to: other.id.clone(),
175                    kind: EdgeKind::References,
176                    confidence: Confidence::Heuristic,
177                });
178            }
179
180            let is_callable = matches!(other.kind, SymbolKind::Function | SymbolKind::Method);
181            if is_callable && index.called.contains(other.name.as_str()) {
182                edges.push(Edge {
183                    from: sym.id.clone(),
184                    to: other.id.clone(),
185                    kind: EdgeKind::Calls,
186                    confidence: Confidence::Heuristic,
187                });
188            }
189        }
190    }
191}
192
193fn build_import_edges(
194    source_file: &Utf8PathBuf,
195    imports: &[Import],
196    all_symbols: &HashMap<String, Vec<&Symbol>>,
197    resolver: &dyn ImportResolver,
198    source_symbols: &[Symbol],
199    edges: &mut Vec<Edge>,
200) {
201    for import in imports {
202        let resolved = resolver.resolve_import(source_file, &import.module_path, &import.raw);
203        let target_file = match resolved {
204            Some(t) => super::resolve::normalize_path(&t.file_path),
205            None => continue,
206        };
207
208        let target_symbols = all_symbols.get(&target_file);
209        let Some(target_symbols) = target_symbols else {
210            continue;
211        };
212
213        for source_sym in source_symbols {
214            for item_name in &import.items {
215                if let Some(target_sym) = target_symbols
216                    .iter()
217                    .find(|s| s.name.as_str() == item_name.as_str())
218                {
219                    edges.push(Edge {
220                        from: source_sym.id.clone(),
221                        to: target_sym.id.clone(),
222                        kind: EdgeKind::Imports,
223                        confidence: Confidence::Heuristic,
224                    });
225
226                    edges.push(Edge {
227                        from: target_sym.id.clone(),
228                        to: source_sym.id.clone(),
229                        kind: EdgeKind::ImportedBy,
230                        confidence: Confidence::Heuristic,
231                    });
232                }
233            }
234        }
235    }
236}
237
238fn build_cross_file_references(
239    _source_file: &Utf8PathBuf,
240    _imports: &[Import],
241    _all_symbols: &HashMap<String, Vec<&Symbol>>,
242    _edges: &mut [Edge],
243) {
244}
245
246fn range_overlap(a: &argyph_parse::ByteRange, b: &argyph_parse::ByteRange) -> bool {
247    a.start < b.end && b.start < a.end
248}
249
250#[cfg(test)]
251#[allow(clippy::unwrap_used, clippy::expect_used)]
252mod tests {
253    use super::*;
254    use argyph_parse::SymbolId;
255
256    fn make_symbol(name: &str, kind: SymbolKind, file: &str, start: usize, end: usize) -> Symbol {
257        use argyph_parse::ByteRange;
258        Symbol {
259            id: SymbolId::new(&Utf8PathBuf::from(file), name, start),
260            name: name.to_string(),
261            kind,
262            file: Utf8PathBuf::from(file),
263            range: ByteRange::new(start, end),
264            signature: None,
265            parent: None,
266        }
267    }
268
269    fn make_chunk(text: &str, file: &str, start: usize, end: usize) -> argyph_parse::Chunk {
270        use argyph_parse::{ByteRange, Chunk, ChunkId, ChunkKind};
271        Chunk {
272            id: ChunkId::from_text(text),
273            file: Utf8PathBuf::from(file),
274            range: ByteRange::new(start, end),
275            text: text.to_string(),
276            kind: ChunkKind::FunctionBody,
277            language: argyph_fs::Language::Rust,
278        }
279    }
280
281    #[test]
282    fn every_symbol_gets_defines_edge() {
283        let sym = make_symbol("foo", SymbolKind::Function, "src/lib.rs", 0, 10);
284        let parsed = ParsedFile {
285            symbols: vec![sym],
286            chunks: vec![],
287            imports: vec![],
288        };
289        let builder = DefaultGraphBuilder;
290        let edges = builder
291            .build_edges(&[(Utf8PathBuf::from("src/lib.rs"), parsed)])
292            .expect("build_edges");
293
294        let defines: Vec<&Edge> = edges
295            .iter()
296            .filter(|e| e.kind == EdgeKind::Defines)
297            .collect();
298        assert_eq!(defines.len(), 1);
299        assert_eq!(defines[0].from, defines[0].to);
300        assert_eq!(defines[0].confidence, Confidence::Resolved);
301    }
302
303    #[test]
304    fn word_index_matches_whole_identifiers() {
305        let wi = index_words("let x = foo + 1");
306        assert!(wi.mentioned.contains("foo"));
307
308        let wi = index_words("let x = foobar + 1");
309        // `foobar` is one word; `foo` must not be reported.
310        assert!(!wi.mentioned.contains("foo"));
311        assert!(wi.mentioned.contains("foobar"));
312
313        let wi = index_words("snafoo()");
314        assert!(!wi.mentioned.contains("foo"));
315        assert!(wi.mentioned.contains("snafoo"));
316    }
317
318    #[test]
319    fn word_index_detects_calls() {
320        let wi = index_words("void foo(a, b)");
321        assert!(wi.called.contains("foo"));
322
323        let wi = index_words("let x = foo(1, 2)");
324        assert!(wi.called.contains("foo"));
325
326        let wi = index_words("let x = foo");
327        assert!(wi.mentioned.contains("foo"));
328        assert!(!wi.called.contains("foo"));
329
330        // `foo_bar()` is a call to `foo_bar`, not `foo`.
331        let wi = index_words("foo_bar()");
332        assert!(wi.called.contains("foo_bar"));
333        assert!(!wi.called.contains("foo"));
334    }
335
336    #[test]
337    fn detect_within_file_reference() {
338        let sym_a = make_symbol("helper", SymbolKind::Function, "src/lib.rs", 0, 50);
339        let sym_b = make_symbol("main_func", SymbolKind::Function, "src/lib.rs", 60, 200);
340        let chunk = make_chunk("let x = helper(1);", "src/lib.rs", 70, 190);
341        let parsed = ParsedFile {
342            symbols: vec![sym_a, sym_b],
343            chunks: vec![chunk],
344            imports: vec![],
345        };
346        let builder = DefaultGraphBuilder;
347        let edges = builder
348            .build_edges(&[(Utf8PathBuf::from("src/lib.rs"), parsed)])
349            .expect("build_edges");
350
351        let refs: Vec<&Edge> = edges
352            .iter()
353            .filter(|e| e.kind == EdgeKind::References)
354            .collect();
355        assert!(!refs.is_empty(), "expected at least one reference edge");
356    }
357}