Skip to main content

infigraph_core/extract/
mod.rs

1mod entities;
2mod relations;
3pub use entities::extract_entities;
4pub use relations::{extract_relations, extract_relations_with_custom_edges};
5
6use anyhow::Result;
7use sha2::{Digest, Sha256};
8
9use crate::analysis::extract_statements;
10use crate::lang::{LanguagePack, ParserBackend};
11use crate::model::{FileExtraction, Relation, RelationKind, Span, Statement, SymbolKind};
12
13thread_local! {
14    static TS_PARSER: std::cell::RefCell<tree_sitter::Parser> = std::cell::RefCell::new(tree_sitter::Parser::new());
15}
16
17/// Parse a source file and extract all symbols and relationships.
18pub fn extract_file(path: &str, source: &[u8], pack: &LanguagePack) -> Result<FileExtraction> {
19    let (symbols, mut relations, statements) = match &pack.backend {
20        ParserBackend::TreeSitter {
21            grammar,
22            entity_query,
23            relation_query,
24        } => TS_PARSER.with(|cell| -> Result<_> {
25            let mut parser = cell.borrow_mut();
26            parser.set_language(grammar)?;
27
28            let tree = parser
29                .parse(source, None)
30                .ok_or_else(|| anyhow::anyhow!("failed to parse {}", path))?;
31
32            let root = tree.root_node();
33
34            let symbols = extract_entities(path, source, root, entity_query, &pack.name);
35            let relations = if pack.custom_edges.is_empty() {
36                extract_relations(path, source, root, relation_query)
37            } else {
38                extract_relations_with_custom_edges(
39                    path,
40                    source,
41                    root,
42                    relation_query,
43                    &pack.custom_edges,
44                )
45            };
46            let stmts = extract_statements_for_symbols(root, source, &symbols);
47            Ok((symbols, relations, stmts))
48        })?,
49        ParserBackend::Custom(extractor) => {
50            let (s, r) = extractor.extract(path, source, &pack.name)?;
51            (s, r, Vec::new())
52        }
53    };
54
55    // Generate CALLS edges from Route symbols to their handler functions
56    generate_route_handler_edges(path, &symbols, &mut relations);
57
58    let content_hash = {
59        let mut hasher = Sha256::new();
60        hasher.update(source);
61        format!("{:x}", hasher.finalize())
62    };
63
64    Ok(FileExtraction {
65        file: path.to_string(),
66        language: pack.name.clone(),
67        content_hash,
68        symbols,
69        relations,
70        statements,
71    })
72}
73
74fn extract_statements_for_symbols(
75    root: tree_sitter::Node<'_>,
76    source: &[u8],
77    symbols: &[crate::model::Symbol],
78) -> Vec<Statement> {
79    let fn_symbols: Vec<(&str, u32, u32)> = symbols
80        .iter()
81        .filter(|s| {
82            matches!(
83                s.kind,
84                SymbolKind::Function | SymbolKind::Method | SymbolKind::Test
85            )
86        })
87        .map(|s| (s.id.as_str(), s.span.start_line, s.span.end_line))
88        .collect();
89
90    if fn_symbols.is_empty() {
91        return Vec::new();
92    }
93
94    let mut all_stmts = Vec::new();
95    collect_fn_nodes(root, source, &fn_symbols, &mut all_stmts);
96    let mut seen = std::collections::HashSet::new();
97    all_stmts.retain(|s| seen.insert(s.id.clone()));
98    all_stmts
99}
100
101fn collect_fn_nodes<'a>(
102    node: tree_sitter::Node<'a>,
103    source: &'a [u8],
104    fn_symbols: &[(&str, u32, u32)],
105    stmts: &mut Vec<Statement>,
106) {
107    let start = node.start_position().row as u32 + 1;
108    let end = node.end_position().row as u32 + 1;
109
110    if let Some((sym_id, _, _)) = fn_symbols
111        .iter()
112        .find(|(_, sl, el)| start == *sl && end == *el)
113    {
114        let mut extracted = extract_statements(node, source, sym_id, "");
115        stmts.append(&mut extracted);
116    }
117
118    for i in 0..node.child_count() {
119        if let Some(child) = node.child(i as u32) {
120            collect_fn_nodes(child, source, fn_symbols, stmts);
121        }
122    }
123}
124
125/// Create CALLS relations from Route symbols to handler functions in the same file.
126/// Matches route handler names from docstrings OR route names containing function names.
127fn generate_route_handler_edges(
128    file: &str,
129    symbols: &[crate::model::Symbol],
130    relations: &mut Vec<Relation>,
131) {
132    // Collect function/method names for matching
133    let functions: Vec<(&str, &str)> = symbols
134        .iter()
135        .filter(|s| {
136            (s.kind == SymbolKind::Function || s.kind == SymbolKind::Method) && s.span.file == file
137        })
138        .map(|s| (s.name.as_str(), s.id.as_str()))
139        .collect();
140
141    for sym in symbols {
142        if sym.kind != SymbolKind::Route {
143            continue;
144        }
145
146        let mut target_id: Option<String> = None;
147
148        // Method 1: explicit handler= in docstring
149        if let Some(doc) = &sym.docstring {
150            if let Some(handler_name) = doc.split("handler=").nth(1).map(|h| h.trim()) {
151                target_id = functions
152                    .iter()
153                    .find(|(name, _)| *name == handler_name)
154                    .map(|(_, id)| id.to_string());
155            }
156        }
157
158        // Method 2: Route is on the same line range as a function — check for overlap
159        if target_id.is_none() {
160            target_id = symbols
161                .iter()
162                .find(|s| {
163                    (s.kind == SymbolKind::Function || s.kind == SymbolKind::Method)
164                        && s.span.file == file
165                        && s.span.start_line <= sym.span.end_line
166                        && s.span.end_line >= sym.span.start_line
167                })
168                .map(|s| s.id.clone());
169        }
170
171        if let Some(tid) = target_id {
172            if tid != sym.id {
173                relations.push(Relation {
174                    source_id: sym.id.clone(),
175                    target_id: tid,
176                    kind: RelationKind::Calls,
177                    span: Some(Span {
178                        file: file.to_string(),
179                        start_line: sym.span.start_line,
180                        start_col: sym.span.start_col,
181                        end_line: sym.span.end_line,
182                        end_col: sym.span.end_col,
183                    }),
184                    receiver: None,
185                });
186            }
187        }
188    }
189}