Skip to main content

rag_rat_core/query/
graph_meta.rs

1use std::collections::BTreeSet;
2
3use rusqlite::{Connection, OptionalExtension, params};
4use serde::Serialize;
5
6use crate::{query::ReadChunk, search::lexical::SearchHit};
7
8const FULL_GRAPH_NOTE: &str = "Call graph is tree-sitter/syntactic, not compiler-resolved.";
9
10#[derive(Debug, Clone, Copy, PartialEq, Eq)]
11pub enum GraphMetaMode {
12    None,
13    Compact,
14    Full,
15}
16
17impl GraphMetaMode {
18    pub fn parse(value: &str) -> anyhow::Result<Self> {
19        match value {
20            "none" | "false" => Ok(Self::None),
21            "compact" | "true" => Ok(Self::Compact),
22            "full" => Ok(Self::Full),
23            other => anyhow::bail!(
24                "unknown graph metadata mode `{other}`; expected none, compact, or full"
25            ),
26        }
27    }
28}
29
30#[derive(Debug, Clone, Serialize)]
31pub struct GraphEvidence {
32    #[serde(skip_serializing_if = "Option::is_none")]
33    pub symbol: Option<GraphSymbol>,
34    pub caller_count: u64,
35    pub callee_count: u64,
36    #[serde(skip_serializing_if = "Vec::is_empty")]
37    pub top_callers: Vec<CallerEvidence>,
38    #[serde(skip_serializing_if = "Vec::is_empty")]
39    pub top_callees: Vec<CalleeEvidence>,
40    #[serde(skip_serializing_if = "Vec::is_empty")]
41    pub callers: Vec<CallerEvidence>,
42    #[serde(skip_serializing_if = "Vec::is_empty")]
43    pub callees: Vec<CalleeEvidence>,
44    #[serde(skip_serializing_if = "Vec::is_empty")]
45    pub imports: Vec<ImportEvidence>,
46    #[serde(skip_serializing_if = "Vec::is_empty")]
47    pub referenced_types: Vec<TypeEvidence>,
48    pub truncated: GraphTruncation,
49    #[serde(skip_serializing_if = "Vec::is_empty")]
50    pub notes: Vec<String>,
51}
52
53#[derive(Debug, Clone, Serialize)]
54pub struct GraphSymbol {
55    pub id: i64,
56    pub name: String,
57    pub qualified_name: String,
58    pub kind: String,
59    pub symbol_path: String,
60}
61
62#[derive(Debug, Clone, Serialize)]
63pub struct CallerEvidence {
64    pub symbol_path: String,
65    pub path: String,
66    pub line: i64,
67    pub callsite: CallsiteEvidence,
68    pub edge_kind: String,
69    pub confidence: String,
70}
71
72#[derive(Debug, Clone, Serialize)]
73pub struct CalleeEvidence {
74    pub target: String,
75    #[serde(skip_serializing_if = "Option::is_none")]
76    pub resolved_symbol_path: Option<String>,
77    #[serde(skip_serializing_if = "Option::is_none")]
78    pub path: Option<String>,
79    #[serde(skip_serializing_if = "Option::is_none")]
80    pub line: Option<i64>,
81    pub callsite: CallsiteEvidence,
82    pub edge_kind: String,
83    pub confidence: String,
84}
85
86#[derive(Debug, Clone, Serialize)]
87pub struct CallsiteEvidence {
88    pub path: String,
89    pub line: i64,
90    pub span: [i64; 2],
91}
92
93#[derive(Debug, Clone, Serialize)]
94pub struct ImportEvidence {
95    pub target: String,
96    pub confidence: String,
97}
98
99#[derive(Debug, Clone, Serialize)]
100pub struct TypeEvidence {
101    pub name: String,
102    pub confidence: String,
103}
104
105#[derive(Debug, Clone, Default, Serialize)]
106pub struct GraphTruncation {
107    pub callers: bool,
108    pub callees: bool,
109    #[serde(skip_serializing_if = "is_false")]
110    pub imports: bool,
111    #[serde(skip_serializing_if = "is_false")]
112    pub referenced_types: bool,
113}
114
115pub fn attach_to_search_hits(
116    conn: &Connection,
117    hits: &mut [SearchHit],
118    mode: GraphMetaMode,
119    limit: u32,
120) -> anyhow::Result<()> {
121    if mode == GraphMetaMode::None {
122        return Ok(());
123    }
124    let limit = limit.max(1);
125    for hit in hits {
126        hit.graph = evidence_for_chunk(conn, hit.chunk_id, mode, limit)?;
127    }
128    Ok(())
129}
130
131pub fn attach_to_read_chunk(
132    conn: &Connection,
133    chunk: &mut ReadChunk,
134    mode: GraphMetaMode,
135    limit: u32,
136) -> anyhow::Result<()> {
137    if mode == GraphMetaMode::None {
138        return Ok(());
139    }
140    chunk.graph = evidence_for_chunk(conn, chunk.chunk_id, mode, limit.max(1))?;
141    Ok(())
142}
143
144fn evidence_for_chunk(
145    conn: &Connection,
146    chunk_id: i64,
147    mode: GraphMetaMode,
148    limit: u32,
149) -> anyhow::Result<Option<GraphEvidence>> {
150    let Some(symbol) = primary_symbol(conn, chunk_id)? else {
151        return Ok(None);
152    };
153    let caller_count = count_callers(conn, &symbol)?;
154    let callee_count = count_callees(conn, symbol.id)?;
155    let mut evidence = GraphEvidence {
156        symbol: (mode == GraphMetaMode::Full).then(|| symbol.public.clone()),
157        caller_count,
158        callee_count,
159        top_callers: Vec::new(),
160        top_callees: Vec::new(),
161        callers: Vec::new(),
162        callees: Vec::new(),
163        imports: Vec::new(),
164        referenced_types: Vec::new(),
165        truncated: GraphTruncation::default(),
166        notes: Vec::new(),
167    };
168    let callers = callers(conn, &symbol, limit)?;
169    let callees = callees(conn, symbol.id, limit)?;
170    evidence.truncated.callers = caller_count > u64::try_from(callers.len()).unwrap_or(u64::MAX);
171    evidence.truncated.callees = callee_count > u64::try_from(callees.len()).unwrap_or(u64::MAX);
172    if mode == GraphMetaMode::Full {
173        evidence.callers = callers;
174        evidence.callees = callees;
175        evidence.imports = imports(conn, chunk_id, limit)?;
176        evidence.referenced_types = referenced_types(conn, symbol.id, limit)?;
177        evidence.truncated.imports =
178            count_imports(conn, chunk_id)? > u64::try_from(evidence.imports.len()).unwrap_or(0);
179        evidence.truncated.referenced_types = count_referenced_types(conn, symbol.id)?
180            > u64::try_from(evidence.referenced_types.len()).unwrap_or(0);
181        evidence.notes.push(FULL_GRAPH_NOTE.to_string());
182    } else {
183        evidence.top_callers = callers;
184        evidence.top_callees = callees;
185    }
186    Ok(Some(evidence))
187}
188
189#[derive(Debug, Clone)]
190struct PrimarySymbol {
191    id: i64,
192    name: String,
193    public: GraphSymbol,
194}
195
196fn primary_symbol(conn: &Connection, chunk_id: i64) -> anyhow::Result<Option<PrimarySymbol>> {
197    Ok(conn
198        .query_row(
199            "
200            SELECT symbols.id, symbols.name, symbols.qualified_name, symbols.kind, files.path
201            FROM chunks
202            JOIN symbols ON symbols.file_id = chunks.file_id
203             AND symbols.start_byte < chunks.end_byte
204             AND symbols.end_byte > chunks.start_byte
205            JOIN files ON files.id = symbols.file_id
206            WHERE chunks.id = ?1
207            ORDER BY
208              CASE symbols.kind
209                WHEN 'function' THEN 0
210                WHEN 'method' THEN 1
211                WHEN 'class' THEN 2
212                WHEN 'struct' THEN 3
213                ELSE 9
214              END,
215              symbols.start_byte ASC
216            LIMIT 1
217            ",
218            [chunk_id],
219            |row| {
220                let id = row.get(0)?;
221                let name: String = row.get(1)?;
222                let qualified_name: String = row.get(2)?;
223                let kind = row.get(3)?;
224                let path: String = row.get(4)?;
225                Ok(PrimarySymbol {
226                    id,
227                    name: name.clone(),
228                    public: GraphSymbol {
229                        id,
230                        name,
231                        qualified_name: qualified_name.clone(),
232                        kind,
233                        symbol_path: symbol_path(&path, &qualified_name),
234                    },
235                })
236            },
237        )
238        .optional()?)
239}
240
241fn count_callers(conn: &Connection, symbol: &PrimarySymbol) -> anyhow::Result<u64> {
242    let count = conn.query_row(
243        "
244        SELECT COUNT(DISTINCT COALESCE(from_symbol_id, -id))
245        FROM edges
246        WHERE edge_kind IN ('calls_name', 'constructs', 'uses_macro')
247          AND (to_symbol_id = ?1 OR (to_symbol_id IS NULL AND to_name = ?2))
248        ",
249        params![symbol.id, symbol.name],
250        |row| row.get::<_, i64>(0),
251    )?;
252    Ok(u64::try_from(count).unwrap_or(0))
253}
254
255fn count_callees(conn: &Connection, symbol_id: i64) -> anyhow::Result<u64> {
256    // Mirror the filter in `callees()` so `callee_count` (and thus the `truncated` flag) reflects
257    // the callees actually surfaced — not the unresolved name-only std calls we hide.
258    let count = conn.query_row(
259        "
260        SELECT COUNT(DISTINCT COALESCE(CAST(to_symbol_id AS TEXT), to_name))
261        FROM edges
262        WHERE from_symbol_id = ?1
263          AND edge_kind IN ('calls_name', 'constructs', 'uses_macro')
264          AND (
265              edge_kind != 'calls_name'
266              OR to_symbol_id IS NOT NULL
267              OR (confidence = 'Syntactic' AND target_qualified_name IS NOT NULL)
268          )
269        ",
270        [symbol_id],
271        |row| row.get::<_, i64>(0),
272    )?;
273    Ok(u64::try_from(count).unwrap_or(0))
274}
275
276fn count_imports(conn: &Connection, chunk_id: i64) -> anyhow::Result<u64> {
277    count_edges_for_chunk(conn, chunk_id, &["imports"])
278}
279
280fn count_referenced_types(conn: &Connection, symbol_id: i64) -> anyhow::Result<u64> {
281    count_edges_for_symbol(conn, symbol_id, &["references_type", "implements", "extends"])
282}
283
284fn count_edges_for_symbol(
285    conn: &Connection,
286    symbol_id: i64,
287    edge_kinds: &[&str],
288) -> anyhow::Result<u64> {
289    let count = conn.query_row(
290        &format!(
291            "
292        SELECT COUNT(DISTINCT COALESCE(CAST(to_symbol_id AS TEXT), to_name))
293        FROM edges
294            WHERE from_symbol_id = ?1
295              AND edge_kind IN ({})
296            ",
297            quoted(edge_kinds),
298        ),
299        [symbol_id],
300        |row| row.get::<_, i64>(0),
301    )?;
302    Ok(u64::try_from(count).unwrap_or(0))
303}
304
305fn count_edges_for_chunk(
306    conn: &Connection,
307    chunk_id: i64,
308    edge_kinds: &[&str],
309) -> anyhow::Result<u64> {
310    let count = conn.query_row(
311        &format!(
312            "
313            SELECT COUNT(*)
314            FROM edges
315            JOIN chunks ON chunks.file_id = edges.source_file_id
316            WHERE chunks.id = ?1
317              AND edges.from_symbol_id IS NULL
318              AND edges.edge_kind IN ({})
319            ",
320            quoted(edge_kinds),
321        ),
322        [chunk_id],
323        |row| row.get::<_, i64>(0),
324    )?;
325    Ok(u64::try_from(count).unwrap_or(0))
326}
327
328fn callers(
329    conn: &Connection,
330    symbol: &PrimarySymbol,
331    limit: u32,
332) -> anyhow::Result<Vec<CallerEvidence>> {
333    let mut stmt = conn.prepare(
334        "
335        SELECT DISTINCT
336               source_files.path,
337               COALESCE(source_symbols.qualified_name, edges.from_name, source_files.path),
338               COALESCE(NULLIF(edges.source_start_line, 0), source_chunks.start_line, 1),
339               COALESCE(NULLIF(edges.source_end_line, 0), NULLIF(edges.source_start_line, 0), source_chunks.start_line, 1),
340               edges.edge_kind,
341               edges.confidence
342        FROM edges
343        JOIN files source_files ON source_files.id = edges.source_file_id
344        LEFT JOIN symbols source_symbols ON source_symbols.id = edges.from_symbol_id
345        LEFT JOIN chunks source_chunks ON source_chunks.file_id = edges.source_file_id
346          AND source_symbols.start_byte >= source_chunks.start_byte
347          AND source_symbols.start_byte < source_chunks.end_byte
348        WHERE edges.edge_kind IN ('calls_name', 'constructs', 'uses_macro')
349          AND (edges.to_symbol_id = ?1 OR (edges.to_symbol_id IS NULL AND edges.to_name = ?2))
350        ORDER BY
351          CASE edges.confidence
352            WHEN 'Exact' THEN 0
353            WHEN 'Syntactic' THEN 1
354            WHEN 'NameOnly' THEN 2
355            ELSE 3
356          END,
357          source_files.path,
358          source_chunks.start_line
359        LIMIT ?3
360        ",
361    )?;
362    let rows = stmt.query_map(params![symbol.id, symbol.name, expanded_limit(limit)], |row| {
363        let path: String = row.get(0)?;
364        let qualified_name: String = row.get(1)?;
365        let source_start_line = row.get(2)?;
366        let source_end_line = row.get(3)?;
367        Ok(CallerEvidence {
368            symbol_path: symbol_path(&path, &qualified_name),
369            path: path.clone(),
370            line: source_start_line,
371            callsite: CallsiteEvidence {
372                path,
373                line: source_start_line,
374                span: [source_start_line, source_end_line],
375            },
376            edge_kind: row.get(4)?,
377            confidence: confidence(row.get::<_, String>(5)?.as_str()).to_string(),
378        })
379    })?;
380    let mut seen = BTreeSet::new();
381    let mut callers = collect_rows(rows)?
382        .into_iter()
383        .filter(|caller| seen.insert((caller.symbol_path.clone(), caller.edge_kind.clone())))
384        .collect::<Vec<_>>();
385    callers.truncate(usize::try_from(limit).unwrap_or(usize::MAX));
386    Ok(callers)
387}
388
389fn callees(conn: &Connection, symbol_id: i64, limit: u32) -> anyhow::Result<Vec<CalleeEvidence>> {
390    let mut stmt = conn.prepare(
391        "
392        SELECT DISTINCT
393               edges.to_name,
394               target_files.path,
395               target_symbols.qualified_name,
396               COALESCE(edges.target_start_line, target_chunks.start_line),
397               source_files.path,
398               COALESCE(NULLIF(edges.source_start_line, 0), source_chunks.start_line, 1),
399               COALESCE(NULLIF(edges.source_end_line, 0), NULLIF(edges.source_start_line, 0), source_chunks.start_line, 1),
400               edges.edge_kind,
401               edges.confidence
402        FROM edges
403        JOIN files source_files ON source_files.id = edges.source_file_id
404        LEFT JOIN symbols target_symbols ON target_symbols.id = edges.to_symbol_id
405        LEFT JOIN files target_files ON target_files.id = target_symbols.file_id
406        LEFT JOIN chunks target_chunks ON target_chunks.file_id = target_symbols.file_id
407          AND target_symbols.start_byte >= target_chunks.start_byte
408          AND target_symbols.start_byte < target_chunks.end_byte
409        LEFT JOIN symbols source_symbols ON source_symbols.id = edges.from_symbol_id
410        LEFT JOIN chunks source_chunks ON source_chunks.file_id = edges.source_file_id
411          AND source_symbols.start_byte >= source_chunks.start_byte
412          AND source_symbols.start_byte < source_chunks.end_byte
413        WHERE edges.from_symbol_id = ?1
414          AND edges.edge_kind IN ('calls_name', 'constructs', 'uses_macro')
415          -- Drop unresolved name-only calls (`.map()`, `.var_os()`, std combinators): they
416          -- resolve to nothing in-repo and are pure noise in a chunk's callee summary. Keep
417          -- resolved calls and syntactically-resolvable ones, plus constructs/macros.
418          AND (
419              edges.edge_kind != 'calls_name'
420              OR edges.to_symbol_id IS NOT NULL
421              OR (edges.confidence = 'Syntactic' AND edges.target_qualified_name IS NOT NULL)
422          )
423        ORDER BY
424          CASE edges.confidence
425            WHEN 'Exact' THEN 0
426            WHEN 'Syntactic' THEN 1
427            WHEN 'NameOnly' THEN 2
428            ELSE 3
429          END,
430          source_chunks.start_line,
431          edges.to_name
432        LIMIT ?2
433        ",
434    )?;
435    let rows = stmt.query_map(params![symbol_id, expanded_limit(limit)], |row| {
436        let target: String = row.get(0)?;
437        let path: Option<String> = row.get(1)?;
438        let qualified_name: Option<String> = row.get(2)?;
439        let callsite_path: String = row.get(4)?;
440        let callsite_start_line = row.get(5)?;
441        let callsite_end_line = row.get(6)?;
442        Ok(CalleeEvidence {
443            target,
444            resolved_symbol_path: path
445                .as_ref()
446                .zip(qualified_name.as_ref())
447                .map(|(path, qualified_name)| symbol_path(path, qualified_name)),
448            path,
449            line: row.get(3)?,
450            callsite: CallsiteEvidence {
451                path: callsite_path,
452                line: callsite_start_line,
453                span: [callsite_start_line, callsite_end_line],
454            },
455            edge_kind: row.get(7)?,
456            confidence: confidence(row.get::<_, String>(8)?.as_str()).to_string(),
457        })
458    })?;
459    let mut seen = BTreeSet::new();
460    let mut callees = collect_rows(rows)?
461        .into_iter()
462        .filter(|callee| {
463            seen.insert((
464                callee.target.clone(),
465                callee.resolved_symbol_path.clone(),
466                callee.edge_kind.clone(),
467            ))
468        })
469        .collect::<Vec<_>>();
470    callees.truncate(usize::try_from(limit).unwrap_or(usize::MAX));
471    Ok(callees)
472}
473
474fn imports(conn: &Connection, chunk_id: i64, limit: u32) -> anyhow::Result<Vec<ImportEvidence>> {
475    let mut stmt = conn.prepare(
476        "
477        SELECT edges.to_name, edges.confidence
478        FROM edges
479        JOIN chunks ON chunks.file_id = edges.source_file_id
480        WHERE chunks.id = ?1
481          AND edges.from_symbol_id IS NULL
482          AND edges.edge_kind = 'imports'
483        ORDER BY edges.to_name
484        LIMIT ?2
485        ",
486    )?;
487    let rows = stmt.query_map(params![chunk_id, i64::from(limit)], |row| {
488        Ok(ImportEvidence {
489            target: row.get(0)?,
490            confidence: confidence(row.get::<_, String>(1)?.as_str()).to_string(),
491        })
492    })?;
493    collect_rows(rows)
494}
495
496fn referenced_types(
497    conn: &Connection,
498    symbol_id: i64,
499    limit: u32,
500) -> anyhow::Result<Vec<TypeEvidence>> {
501    let mut stmt = conn.prepare(
502        "
503        SELECT DISTINCT edges.to_name, edges.confidence
504        FROM edges
505        WHERE edges.from_symbol_id = ?1
506          AND edges.edge_kind IN ('references_type', 'implements', 'extends')
507        ORDER BY
508          CASE edges.confidence
509            WHEN 'Exact' THEN 0
510            WHEN 'Syntactic' THEN 1
511            WHEN 'NameOnly' THEN 2
512            ELSE 3
513          END,
514          edges.to_name
515        LIMIT ?2
516        ",
517    )?;
518    let rows = stmt.query_map(params![symbol_id, i64::from(limit)], |row| {
519        Ok(TypeEvidence {
520            name: row.get(0)?,
521            confidence: confidence(row.get::<_, String>(1)?.as_str()).to_string(),
522        })
523    })?;
524    collect_rows(rows)
525}
526
527fn symbol_path(path: &str, qualified_name: &str) -> String {
528    if qualified_name == path || qualified_name.starts_with(&format!("{path}::")) {
529        return qualified_name.to_string();
530    }
531    format!("{path}::{qualified_name}")
532}
533
534fn confidence(value: &str) -> &'static str {
535    crate::query::graph::normalize_confidence(value)
536}
537
538fn quoted(values: &[&str]) -> String {
539    values.iter().map(|value| format!("'{value}'")).collect::<Vec<_>>().join(", ")
540}
541
542fn expanded_limit(limit: u32) -> i64 {
543    i64::from(limit.max(1)).saturating_mul(4)
544}
545
546fn is_false(value: &bool) -> bool {
547    !*value
548}
549
550fn collect_rows<T>(
551    rows: rusqlite::MappedRows<'_, impl FnMut(&rusqlite::Row<'_>) -> rusqlite::Result<T>>,
552) -> anyhow::Result<Vec<T>> {
553    let mut out = Vec::new();
554    for row in rows {
555        out.push(row?);
556    }
557    Ok(out)
558}