Skip to main content

codemem_engine/index/
linker.rs

1//! Cross-repo linker: connects symbols across namespaces via
2//! package registry matching and symbol resolution.
3
4use crate::index::manifest::ManifestResult;
5use crate::index::symbol::{Symbol, Visibility};
6
7/// A package registered in the cross-repo registry.
8#[derive(Debug, Clone)]
9pub struct RegisteredPackage {
10    pub package_name: String,
11    pub namespace: String,
12    pub version: String,
13    pub manifest: String,
14}
15
16/// An unresolved reference awaiting cross-repo resolution.
17#[derive(Debug, Clone)]
18pub struct PendingRef {
19    pub id: String,
20    pub namespace: String,
21    pub source_node: String,
22    pub target_name: String,
23    pub package_hint: Option<String>,
24    pub ref_kind: String,
25    pub file_path: Option<String>,
26    pub line: Option<usize>,
27}
28
29/// A cross-namespace edge produced by the linker.
30#[derive(Debug, Clone)]
31pub struct CrossRepoEdge {
32    /// Edge ID: "xref:{src_ns}/{src_sym}->{dst_ns}/{dst_sym}"
33    pub id: String,
34    /// Source node ID (e.g., "sym:handler.process")
35    pub source: String,
36    /// Target node ID (e.g., "sym:validate")
37    pub target: String,
38    /// Relationship type string (e.g., "Calls", "Imports")
39    pub relationship: String,
40    /// Confidence of the cross-repo resolution
41    pub confidence: f64,
42    /// Source namespace
43    pub source_namespace: String,
44    /// Target namespace
45    pub target_namespace: String,
46}
47
48/// Result of a cross-repo linking pass.
49#[derive(Debug, Default)]
50pub struct LinkResult {
51    /// Packages registered in this pass.
52    pub packages_registered: usize,
53    /// Cross-namespace edges created via forward linking.
54    pub forward_edges: Vec<CrossRepoEdge>,
55    /// Cross-namespace edges created via backward linking.
56    pub backward_edges: Vec<CrossRepoEdge>,
57    /// IDs of unresolved refs that were resolved (to be deleted).
58    pub resolved_ref_ids: Vec<String>,
59}
60
61/// A matched symbol for cross-repo resolution.
62#[derive(Debug, Clone)]
63pub struct SymbolMatch {
64    pub qualified_name: String,
65    pub visibility: Visibility,
66    pub kind: String,
67}
68
69/// Extract packages from manifests for registry insertion.
70pub fn extract_packages(manifests: &ManifestResult, namespace: &str) -> Vec<RegisteredPackage> {
71    manifests
72        .packages
73        .iter()
74        .map(|(name, manifest_path)| {
75            // Find version from dependencies (self-reference)
76            let version = manifests
77                .dependencies
78                .iter()
79                .find(|d| d.name == *name)
80                .map(|d| d.version.clone())
81                .unwrap_or_default();
82            RegisteredPackage {
83                package_name: name.clone(),
84                namespace: namespace.to_string(),
85                version,
86                manifest: manifest_path.clone(),
87            }
88        })
89        .collect()
90}
91
92/// Forward link: resolve this namespace's pending refs against known packages.
93///
94/// For each pending ref from `namespace` that has a `package_hint` matching
95/// a registered package in another namespace, attempt to match the target_name
96/// against the provided symbols from that namespace.
97pub fn forward_link(
98    namespace: &str,
99    pending_refs: &[PendingRef],
100    registry: &[RegisteredPackage],
101    resolve_fn: &dyn Fn(&str, &str) -> Vec<SymbolMatch>,
102) -> LinkResult {
103    let mut result = LinkResult::default();
104
105    for pending_ref in pending_refs {
106        // Only process refs from this namespace
107        if pending_ref.namespace != namespace {
108            continue;
109        }
110
111        // Skip refs without a package hint
112        let package_hint = match &pending_ref.package_hint {
113            Some(hint) => hint,
114            None => continue,
115        };
116
117        // Find matching registry entries in OTHER namespaces
118        let matching_entries: Vec<&RegisteredPackage> = registry
119            .iter()
120            .filter(|entry| entry.package_name == *package_hint && entry.namespace != namespace)
121            .collect();
122
123        // Try all matching namespaces and pick the best overall match,
124        // rather than stopping at the first namespace that resolves.
125        let mut best_edge: Option<(CrossRepoEdge, f64)> = None;
126        for entry in matching_entries {
127            let matches = resolve_fn(&entry.namespace, &pending_ref.target_name);
128            if let Some(best) = pick_best_match(&matches) {
129                let confidence = match_confidence_for_symbol(best);
130                if best_edge.as_ref().is_none_or(|(_, c)| confidence > *c) {
131                    best_edge = Some((
132                        CrossRepoEdge {
133                            id: make_edge_id(
134                                namespace,
135                                &pending_ref.source_node,
136                                &entry.namespace,
137                                &best.qualified_name,
138                            ),
139                            source: pending_ref.source_node.clone(),
140                            target: format!("sym:{}", best.qualified_name),
141                            relationship: ref_kind_to_relationship(&pending_ref.ref_kind)
142                                .to_string(),
143                            confidence,
144                            source_namespace: namespace.to_string(),
145                            target_namespace: entry.namespace.clone(),
146                        },
147                        confidence,
148                    ));
149                }
150            }
151        }
152        if let Some((edge, _)) = best_edge {
153            result.forward_edges.push(edge);
154            result.resolved_ref_ids.push(pending_ref.id.clone());
155        }
156    }
157
158    result
159}
160
161/// Backward link: resolve OTHER namespaces' pending refs against THIS namespace's symbols.
162///
163/// Only considers refs whose `package_hint` matches one of our `package_names`,
164/// or refs with no package hint (best-effort matching).
165pub fn backward_link(
166    namespace: &str,
167    package_names: &[String],
168    pending_refs_for_packages: &[PendingRef],
169    symbols: &[Symbol],
170) -> LinkResult {
171    let mut result = LinkResult::default();
172
173    for pending_ref in pending_refs_for_packages {
174        // Don't self-link
175        if pending_ref.namespace == namespace {
176            continue;
177        }
178
179        // Only consider refs that target one of our packages.
180        // Skip refs with no package_hint — they would match any symbol and produce
181        // false-positive cross-repo edges for common names.
182        let Some(ref hint) = pending_ref.package_hint else {
183            continue;
184        };
185        if !package_names.iter().any(|p| p == hint) {
186            continue;
187        }
188
189        if let Some((qualified_name, confidence)) = match_symbol(&pending_ref.target_name, symbols)
190        {
191            let edge = CrossRepoEdge {
192                id: make_edge_id(
193                    &pending_ref.namespace,
194                    &pending_ref.source_node,
195                    namespace,
196                    &qualified_name,
197                ),
198                source: pending_ref.source_node.clone(),
199                target: format!("sym:{qualified_name}"),
200                relationship: ref_kind_to_relationship(&pending_ref.ref_kind).to_string(),
201                confidence,
202                source_namespace: pending_ref.namespace.clone(),
203                target_namespace: namespace.to_string(),
204            };
205            result.backward_edges.push(edge);
206            result.resolved_ref_ids.push(pending_ref.id.clone());
207        }
208    }
209
210    result
211}
212
213/// Match a target_name against a set of symbols with confidence scoring.
214///
215/// Strategy:
216/// 1. Exact qualified name match -> confidence 1.0
217/// 2. Suffix match (e.g., "validate" matches "utils.validate") -> confidence 0.85
218/// 3. Simple name match -> confidence 0.7, prefers public symbols and shortest qualified name
219/// 4. None
220pub fn match_symbol(target_name: &str, symbols: &[Symbol]) -> Option<(String, f64)> {
221    // 1. Exact qualified name match
222    if let Some(sym) = symbols.iter().find(|s| s.qualified_name == target_name) {
223        let boost = visibility_boost(sym.visibility);
224        return Some((sym.qualified_name.clone(), (1.0 + boost).min(1.0)));
225    }
226
227    // 2. Suffix match: target matches the last segment(s) of a qualified name
228    let suffix_matches: Vec<&Symbol> = symbols
229        .iter()
230        .filter(|s| {
231            // Check if qualified_name ends with the target after a separator
232            let qn = &s.qualified_name;
233            qn.ends_with(target_name)
234                && (qn.len() == target_name.len()
235                    || qn[..qn.len() - target_name.len()].ends_with('.')
236                    || qn[..qn.len() - target_name.len()].ends_with("::"))
237        })
238        .collect();
239
240    if !suffix_matches.is_empty() {
241        // Prefer public symbols
242        let public_matches: Vec<&&Symbol> = suffix_matches
243            .iter()
244            .filter(|s| s.visibility == Visibility::Public)
245            .collect();
246
247        let best = if !public_matches.is_empty() {
248            public_matches
249                .iter()
250                .min_by_key(|s| s.qualified_name.len())
251                .unwrap()
252        } else {
253            suffix_matches
254                .iter()
255                .min_by_key(|s| s.qualified_name.len())
256                .unwrap()
257        };
258
259        let boost = visibility_boost(best.visibility);
260        return Some((best.qualified_name.clone(), (0.85 + boost).min(1.0)));
261    }
262
263    // 3. Simple name match
264    let simple_name = simple_name_of(target_name);
265    let name_matches: Vec<&Symbol> = symbols.iter().filter(|s| s.name == simple_name).collect();
266
267    if !name_matches.is_empty() {
268        let best = pick_best_by_visibility(&name_matches);
269        let boost = visibility_boost(best.visibility);
270        return Some((best.qualified_name.clone(), (0.7 + boost).min(1.0)));
271    }
272
273    None
274}
275
276/// Build a cross-repo edge ID.
277fn make_edge_id(src_ns: &str, src_sym: &str, dst_ns: &str, dst_sym: &str) -> String {
278    format!("xref:{src_ns}/{src_sym}->{dst_ns}/{dst_sym}")
279}
280
281/// Map ref_kind string to relationship type string.
282fn ref_kind_to_relationship(ref_kind: &str) -> &str {
283    match ref_kind {
284        "call" => "Calls",
285        "import" => "Imports",
286        "inherits" => "Inherits",
287        "implements" => "Implements",
288        "type_usage" => "DependsOn",
289        _ => "RelatesTo",
290    }
291}
292
293// ── Internal helpers ─────────────────────────────────────────────────────
294
295/// Extract the simple (unqualified) name from a potentially qualified name.
296fn simple_name_of(name: &str) -> &str {
297    // Try :: separator first (Rust-style), then . (most other languages)
298    name.rsplit("::")
299        .next()
300        .unwrap_or(name)
301        .rsplit('.')
302        .next()
303        .unwrap_or(name)
304}
305
306/// Visibility boost for scoring: public symbols get a small confidence bump.
307fn visibility_boost(vis: Visibility) -> f64 {
308    match vis {
309        Visibility::Public => 0.05,
310        Visibility::Crate => 0.02,
311        Visibility::Protected => 0.01,
312        Visibility::Private => 0.0,
313    }
314}
315
316/// Pick the best symbol from a set of name matches by visibility, then shortest qualified name.
317fn pick_best_by_visibility<'a>(candidates: &[&'a Symbol]) -> &'a Symbol {
318    candidates
319        .iter()
320        .max_by(|a, b| {
321            let vis_ord = visibility_rank(a.visibility).cmp(&visibility_rank(b.visibility));
322            // If same visibility, prefer shortest qualified name
323            vis_ord.then_with(|| b.qualified_name.len().cmp(&a.qualified_name.len()))
324        })
325        .unwrap()
326}
327
328/// Rank visibility for sorting (higher = better).
329fn visibility_rank(vis: Visibility) -> u8 {
330    match vis {
331        Visibility::Public => 4,
332        Visibility::Crate => 3,
333        Visibility::Protected => 2,
334        Visibility::Private => 1,
335    }
336}
337
338/// Pick the best match from resolved symbols (highest confidence via visibility).
339fn pick_best_match(matches: &[SymbolMatch]) -> Option<&SymbolMatch> {
340    matches.iter().max_by(|a, b| {
341        let va = visibility_rank(a.visibility);
342        let vb = visibility_rank(b.visibility);
343        va.cmp(&vb)
344            .then_with(|| b.qualified_name.len().cmp(&a.qualified_name.len()))
345    })
346}
347
348/// Compute confidence for a `SymbolMatch` from the resolve callback.
349fn match_confidence_for_symbol(m: &SymbolMatch) -> f64 {
350    0.85 + visibility_boost(m.visibility)
351}
352
353#[cfg(test)]
354#[path = "tests/linker_tests.rs"]
355mod tests;