Skip to main content

lean_ctx/core/
cross_source_edges.rs

1//! Cross-source graph edges — connects external data to code via the graph index.
2//!
3//! When provider data (issues, PRs, DB schemas) references code files, this module
4//! creates `IndexEdge` entries that the graph index uses for related-file discovery.
5//!
6//! Edge kinds:
7//!   - `mentions`   — issue/PR body references a code file
8//!   - `queries`    — code file queries a DB table
9//!   - `documents`  — wiki page documents a code module
10//!   - `resolves`   — PR resolves/fixes an issue
11//!
12//! Scientific basis: Scale-free networks (Barabasi-Albert) — cross-source edges
13//! follow preferential attachment: files mentioned in many issues become graph hubs.
14
15use crate::core::content_chunk::ContentChunk;
16use crate::core::graph_index::IndexEdge;
17
18/// Edge kind constants for cross-source relationships.
19pub const EDGE_MENTIONS: &str = "mentions";
20pub const EDGE_QUERIES: &str = "queries";
21pub const EDGE_DOCUMENTS: &str = "documents";
22pub const EDGE_RESOLVES: &str = "resolves";
23
24/// Extract cross-source edges from a set of ContentChunks.
25///
26/// For each external chunk, creates edges from the chunk's URI to every
27/// file path in its `references` list.
28pub fn extract_cross_source_edges(chunks: &[ContentChunk]) -> Vec<IndexEdge> {
29    let mut edges = Vec::new();
30
31    for chunk in chunks {
32        if !chunk.is_external() || chunk.references.is_empty() {
33            continue;
34        }
35
36        let edge_kind = chunk_to_edge_kind(chunk);
37
38        for ref_path in &chunk.references {
39            edges.push(IndexEdge {
40                from: chunk.file_path.clone(),
41                to: ref_path.clone(),
42                kind: edge_kind.to_string(),
43                weight: edge_weight_for_kind(edge_kind),
44            });
45
46            edges.push(IndexEdge {
47                from: ref_path.clone(),
48                to: chunk.file_path.clone(),
49                kind: "mentioned_in".to_string(),
50                weight: edge_weight_for_kind(edge_kind) * 0.8,
51            });
52        }
53    }
54
55    edges
56}
57
58/// Determine the edge kind based on the chunk's ChunkKind.
59fn chunk_to_edge_kind(chunk: &ContentChunk) -> &'static str {
60    use crate::core::bm25_index::ChunkKind;
61    match chunk.kind {
62        ChunkKind::PullRequest => EDGE_RESOLVES,
63        ChunkKind::WikiPage => EDGE_DOCUMENTS,
64        ChunkKind::DbSchema => EDGE_QUERIES,
65        _ => EDGE_MENTIONS,
66    }
67}
68
69/// Higher weight = stronger relationship. Issues and PRs that reference
70/// code are high-value signals.
71fn edge_weight_for_kind(kind: &str) -> f32 {
72    match kind {
73        EDGE_RESOLVES => 1.5,
74        EDGE_QUERIES => 1.2,
75        EDGE_DOCUMENTS => 0.8,
76        _ => 1.0,
77    }
78}
79
80/// Merge cross-source edges into an existing ProjectIndex edge list.
81/// Deduplicates edges with the same (from, to, kind) triple, keeping
82/// the higher weight.
83pub fn merge_edges(existing: &mut Vec<IndexEdge>, new_edges: Vec<IndexEdge>) -> usize {
84    let mut added = 0usize;
85    for edge in new_edges {
86        let duplicate = existing
87            .iter_mut()
88            .find(|e| e.from == edge.from && e.to == edge.to && e.kind == edge.kind);
89
90        if let Some(existing_edge) = duplicate {
91            if edge.weight > existing_edge.weight {
92                existing_edge.weight = edge.weight;
93            }
94        } else {
95            existing.push(edge);
96            added += 1;
97        }
98    }
99    added
100}
101
102#[cfg(test)]
103mod tests {
104    use super::*;
105    use crate::core::bm25_index::ChunkKind;
106    use crate::core::content_chunk::ContentChunk;
107
108    fn issue_chunk(id: &str, refs: Vec<&str>) -> ContentChunk {
109        ContentChunk::from_provider(
110            "github",
111            "issues",
112            id,
113            &format!("Issue #{id}"),
114            ChunkKind::Issue,
115            format!("Body of issue #{id}"),
116            refs.into_iter().map(String::from).collect(),
117            None,
118        )
119    }
120
121    fn pr_chunk(id: &str, refs: Vec<&str>) -> ContentChunk {
122        ContentChunk::from_provider(
123            "github",
124            "pull_requests",
125            id,
126            &format!("PR #{id}"),
127            ChunkKind::PullRequest,
128            format!("PR #{id} fixes auth"),
129            refs.into_iter().map(String::from).collect(),
130            None,
131        )
132    }
133
134    fn wiki_chunk(id: &str, refs: Vec<&str>) -> ContentChunk {
135        ContentChunk::from_provider(
136            "confluence",
137            "wikis",
138            id,
139            &format!("Wiki {id}"),
140            ChunkKind::WikiPage,
141            format!("Documentation for {id}"),
142            refs.into_iter().map(String::from).collect(),
143            None,
144        )
145    }
146
147    #[test]
148    fn issue_creates_mentions_edges() {
149        let chunks = vec![issue_chunk("42", vec!["src/auth.rs", "src/db.rs"])];
150        let edges = extract_cross_source_edges(&chunks);
151
152        assert_eq!(edges.len(), 4); // 2 forward + 2 reverse
153        assert!(edges.iter().any(|e| e.from.contains("issues/42")
154            && e.to == "src/auth.rs"
155            && e.kind == EDGE_MENTIONS));
156        assert!(edges.iter().any(|e| e.from == "src/auth.rs"
157            && e.to.contains("issues/42")
158            && e.kind == "mentioned_in"));
159    }
160
161    #[test]
162    fn pr_creates_resolves_edges() {
163        let chunks = vec![pr_chunk("10", vec!["src/handler.rs"])];
164        let edges = extract_cross_source_edges(&chunks);
165
166        assert!(edges.iter().any(|e| e.kind == EDGE_RESOLVES));
167        assert_eq!(
168            edges
169                .iter()
170                .find(|e| e.kind == EDGE_RESOLVES)
171                .unwrap()
172                .weight,
173            1.5
174        );
175    }
176
177    #[test]
178    fn wiki_creates_documents_edges() {
179        let chunks = vec![wiki_chunk("auth-guide", vec!["src/auth/mod.rs"])];
180        let edges = extract_cross_source_edges(&chunks);
181
182        assert!(edges.iter().any(|e| e.kind == EDGE_DOCUMENTS));
183    }
184
185    #[test]
186    fn no_edges_for_file_source_chunks() {
187        let code_chunk = ContentChunk::from(crate::core::bm25_index::CodeChunk {
188            file_path: "src/main.rs".into(),
189            symbol_name: "main".into(),
190            kind: ChunkKind::Function,
191            start_line: 1,
192            end_line: 10,
193            content: "fn main() {}".into(),
194            tokens: vec![],
195            token_count: 0,
196        });
197        let edges = extract_cross_source_edges(&[code_chunk]);
198        assert!(edges.is_empty());
199    }
200
201    #[test]
202    fn no_edges_for_chunks_without_references() {
203        let chunk = ContentChunk::from_provider(
204            "github",
205            "issues",
206            "1",
207            "Title",
208            ChunkKind::Issue,
209            "No file refs".into(),
210            vec![],
211            None,
212        );
213        let edges = extract_cross_source_edges(&[chunk]);
214        assert!(edges.is_empty());
215    }
216
217    #[test]
218    fn merge_edges_deduplicates() {
219        let mut existing = vec![IndexEdge {
220            from: "a".into(),
221            to: "b".into(),
222            kind: EDGE_MENTIONS.into(),
223            weight: 1.0,
224        }];
225
226        let new = vec![
227            IndexEdge {
228                from: "a".into(),
229                to: "b".into(),
230                kind: EDGE_MENTIONS.into(),
231                weight: 0.5, // lower weight, should not replace
232            },
233            IndexEdge {
234                from: "a".into(),
235                to: "c".into(),
236                kind: EDGE_MENTIONS.into(),
237                weight: 1.0,
238            },
239        ];
240
241        let added = merge_edges(&mut existing, new);
242        assert_eq!(added, 1);
243        assert_eq!(existing.len(), 2);
244        assert_eq!(existing.iter().find(|e| e.to == "b").unwrap().weight, 1.0);
245    }
246
247    #[test]
248    fn merge_edges_upgrades_weight() {
249        let mut existing = vec![IndexEdge {
250            from: "a".into(),
251            to: "b".into(),
252            kind: EDGE_MENTIONS.into(),
253            weight: 0.5,
254        }];
255
256        let new = vec![IndexEdge {
257            from: "a".into(),
258            to: "b".into(),
259            kind: EDGE_MENTIONS.into(),
260            weight: 2.0,
261        }];
262
263        merge_edges(&mut existing, new);
264        assert_eq!(existing[0].weight, 2.0);
265    }
266
267    #[test]
268    fn multiple_issues_referencing_same_file_creates_hub() {
269        let chunks = vec![
270            issue_chunk("1", vec!["src/auth.rs"]),
271            issue_chunk("2", vec!["src/auth.rs"]),
272            issue_chunk("3", vec!["src/auth.rs"]),
273        ];
274
275        let edges = extract_cross_source_edges(&chunks);
276        let auth_incoming = edges
277            .iter()
278            .filter(|e| e.to == "src/auth.rs" && e.kind == EDGE_MENTIONS)
279            .count();
280        assert_eq!(auth_incoming, 3);
281    }
282}