Skip to main content

codemem_engine/index/
linker.rs

1//! Cross-repo linker: connects symbols across namespaces via
2//! package registry matching and symbol resolution.
3
4use crate::index::manifest::ManifestResult;
5use crate::index::symbol::{Symbol, Visibility};
6
7/// A package registered in the cross-repo registry.
8#[derive(Debug, Clone)]
9pub struct RegisteredPackage {
10    pub package_name: String,
11    pub namespace: String,
12    pub version: String,
13    pub manifest: String,
14}
15
16/// An unresolved reference awaiting cross-repo resolution.
17#[derive(Debug, Clone)]
18pub struct PendingRef {
19    pub id: String,
20    pub namespace: String,
21    pub source_node: String,
22    pub target_name: String,
23    pub package_hint: Option<String>,
24    pub ref_kind: String,
25    pub file_path: Option<String>,
26    pub line: Option<usize>,
27}
28
29/// A cross-namespace edge produced by the linker.
30#[derive(Debug, Clone)]
31pub struct CrossRepoEdge {
32    /// Edge ID: "xref:{src_ns}/{src_sym}->{dst_ns}/{dst_sym}"
33    pub id: String,
34    /// Source node ID (e.g., "sym:handler.process")
35    pub source: String,
36    /// Target node ID (e.g., "sym:validate")
37    pub target: String,
38    /// Relationship type string (e.g., "Calls", "Imports")
39    pub relationship: String,
40    /// Confidence of the cross-repo resolution
41    pub confidence: f64,
42    /// Source namespace
43    pub source_namespace: String,
44    /// Target namespace
45    pub target_namespace: String,
46}
47
48/// Result of a cross-repo linking pass.
49#[derive(Debug, Default)]
50pub struct LinkResult {
51    /// Packages registered in this pass.
52    pub packages_registered: usize,
53    /// Cross-namespace edges created via forward linking.
54    pub forward_edges: Vec<CrossRepoEdge>,
55    /// Cross-namespace edges created via backward linking.
56    pub backward_edges: Vec<CrossRepoEdge>,
57    /// IDs of unresolved refs that were resolved (to be deleted).
58    pub resolved_ref_ids: Vec<String>,
59}
60
61/// A matched symbol for cross-repo resolution.
62#[derive(Debug, Clone)]
63pub struct SymbolMatch {
64    pub qualified_name: String,
65    pub visibility: Visibility,
66    pub kind: String,
67}
68
69/// Extract packages from manifests for registry insertion.
70pub fn extract_packages(manifests: &ManifestResult, namespace: &str) -> Vec<RegisteredPackage> {
71    manifests
72        .packages
73        .iter()
74        .map(|(name, manifest_path)| {
75            // Find version from dependencies (self-reference)
76            let version = manifests
77                .dependencies
78                .iter()
79                .find(|d| d.name == *name)
80                .map(|d| d.version.clone())
81                .unwrap_or_default();
82            RegisteredPackage {
83                package_name: name.clone(),
84                namespace: namespace.to_string(),
85                version,
86                manifest: manifest_path.clone(),
87            }
88        })
89        .collect()
90}
91
92/// Forward link: resolve this namespace's pending refs against known packages.
93///
94/// For each pending ref from `namespace` that has a `package_hint` matching
95/// a registered package in another namespace, attempt to match the target_name
96/// against the provided symbols from that namespace.
97pub fn forward_link(
98    namespace: &str,
99    pending_refs: &[PendingRef],
100    registry: &[RegisteredPackage],
101    resolve_fn: &dyn Fn(&str, &str) -> Vec<SymbolMatch>,
102) -> LinkResult {
103    let mut result = LinkResult::default();
104
105    for pending_ref in pending_refs {
106        // Only process refs from this namespace
107        if pending_ref.namespace != namespace {
108            continue;
109        }
110
111        // Skip refs without a package hint
112        let package_hint = match &pending_ref.package_hint {
113            Some(hint) => hint,
114            None => continue,
115        };
116
117        // Find matching registry entries in OTHER namespaces
118        let matching_entries: Vec<&RegisteredPackage> = registry
119            .iter()
120            .filter(|entry| entry.package_name == *package_hint && entry.namespace != namespace)
121            .collect();
122
123        for entry in matching_entries {
124            let matches = resolve_fn(&entry.namespace, &pending_ref.target_name);
125            if let Some(best) = pick_best_match(&matches) {
126                let edge = CrossRepoEdge {
127                    id: make_edge_id(
128                        namespace,
129                        &pending_ref.source_node,
130                        &entry.namespace,
131                        &best.qualified_name,
132                    ),
133                    source: pending_ref.source_node.clone(),
134                    target: format!("sym:{}", best.qualified_name),
135                    relationship: ref_kind_to_relationship(&pending_ref.ref_kind).to_string(),
136                    confidence: match_confidence_for_symbol(best),
137                    source_namespace: namespace.to_string(),
138                    target_namespace: entry.namespace.clone(),
139                };
140                result.forward_edges.push(edge);
141                result.resolved_ref_ids.push(pending_ref.id.clone());
142                break; // Don't match same ref to multiple namespaces
143            }
144        }
145    }
146
147    result
148}
149
150/// Backward link: resolve OTHER namespaces' pending refs against THIS namespace's symbols.
151///
152/// Only considers refs whose `package_hint` matches one of our `package_names`,
153/// or refs with no package hint (best-effort matching).
154pub fn backward_link(
155    namespace: &str,
156    package_names: &[String],
157    pending_refs_for_packages: &[PendingRef],
158    symbols: &[Symbol],
159) -> LinkResult {
160    let mut result = LinkResult::default();
161
162    for pending_ref in pending_refs_for_packages {
163        // Don't self-link
164        if pending_ref.namespace == namespace {
165            continue;
166        }
167
168        // Only consider refs that target one of our packages.
169        // Skip refs with no package_hint — they would match any symbol and produce
170        // false-positive cross-repo edges for common names.
171        let Some(ref hint) = pending_ref.package_hint else {
172            continue;
173        };
174        if !package_names.iter().any(|p| p == hint) {
175            continue;
176        }
177
178        if let Some((qualified_name, confidence)) = match_symbol(&pending_ref.target_name, symbols)
179        {
180            let edge = CrossRepoEdge {
181                id: make_edge_id(
182                    &pending_ref.namespace,
183                    &pending_ref.source_node,
184                    namespace,
185                    &qualified_name,
186                ),
187                source: pending_ref.source_node.clone(),
188                target: format!("sym:{qualified_name}"),
189                relationship: ref_kind_to_relationship(&pending_ref.ref_kind).to_string(),
190                confidence,
191                source_namespace: pending_ref.namespace.clone(),
192                target_namespace: namespace.to_string(),
193            };
194            result.backward_edges.push(edge);
195            result.resolved_ref_ids.push(pending_ref.id.clone());
196        }
197    }
198
199    result
200}
201
202/// Match a target_name against a set of symbols with confidence scoring.
203///
204/// Strategy:
205/// 1. Exact qualified name match -> confidence 1.0
206/// 2. Suffix match (e.g., "validate" matches "utils.validate") -> confidence 0.85
207/// 3. Simple name match -> confidence 0.7, prefers public symbols and shortest qualified name
208/// 4. None
209pub fn match_symbol(target_name: &str, symbols: &[Symbol]) -> Option<(String, f64)> {
210    // 1. Exact qualified name match
211    if let Some(sym) = symbols.iter().find(|s| s.qualified_name == target_name) {
212        let boost = visibility_boost(sym.visibility);
213        return Some((sym.qualified_name.clone(), (1.0 + boost).min(1.0)));
214    }
215
216    // 2. Suffix match: target matches the last segment(s) of a qualified name
217    let suffix_matches: Vec<&Symbol> = symbols
218        .iter()
219        .filter(|s| {
220            // Check if qualified_name ends with the target after a separator
221            let qn = &s.qualified_name;
222            qn.ends_with(target_name)
223                && (qn.len() == target_name.len()
224                    || qn[..qn.len() - target_name.len()].ends_with('.')
225                    || qn[..qn.len() - target_name.len()].ends_with("::"))
226        })
227        .collect();
228
229    if !suffix_matches.is_empty() {
230        // Prefer public symbols
231        let public_matches: Vec<&&Symbol> = suffix_matches
232            .iter()
233            .filter(|s| s.visibility == Visibility::Public)
234            .collect();
235
236        let best = if !public_matches.is_empty() {
237            public_matches
238                .iter()
239                .min_by_key(|s| s.qualified_name.len())
240                .unwrap()
241        } else {
242            suffix_matches
243                .iter()
244                .min_by_key(|s| s.qualified_name.len())
245                .unwrap()
246        };
247
248        let boost = visibility_boost(best.visibility);
249        return Some((best.qualified_name.clone(), (0.85 + boost).min(1.0)));
250    }
251
252    // 3. Simple name match
253    let simple_name = simple_name_of(target_name);
254    let name_matches: Vec<&Symbol> = symbols.iter().filter(|s| s.name == simple_name).collect();
255
256    if !name_matches.is_empty() {
257        let best = pick_best_by_visibility(&name_matches);
258        let boost = visibility_boost(best.visibility);
259        return Some((best.qualified_name.clone(), (0.7 + boost).min(1.0)));
260    }
261
262    None
263}
264
265/// Build a cross-repo edge ID.
266fn make_edge_id(src_ns: &str, src_sym: &str, dst_ns: &str, dst_sym: &str) -> String {
267    format!("xref:{src_ns}/{src_sym}->{dst_ns}/{dst_sym}")
268}
269
270/// Map ref_kind string to relationship type string.
271fn ref_kind_to_relationship(ref_kind: &str) -> &str {
272    match ref_kind {
273        "call" => "Calls",
274        "import" => "Imports",
275        "inherits" => "Inherits",
276        "implements" => "Implements",
277        "type_usage" => "DependsOn",
278        _ => "RelatesTo",
279    }
280}
281
282// ── Internal helpers ─────────────────────────────────────────────────────
283
284/// Extract the simple (unqualified) name from a potentially qualified name.
285fn simple_name_of(name: &str) -> &str {
286    // Try :: separator first (Rust-style), then . (most other languages)
287    name.rsplit("::")
288        .next()
289        .unwrap_or(name)
290        .rsplit('.')
291        .next()
292        .unwrap_or(name)
293}
294
295/// Visibility boost for scoring: public symbols get a small confidence bump.
296fn visibility_boost(vis: Visibility) -> f64 {
297    match vis {
298        Visibility::Public => 0.05,
299        Visibility::Crate => 0.02,
300        Visibility::Protected => 0.01,
301        Visibility::Private => 0.0,
302    }
303}
304
305/// Pick the best symbol from a set of name matches by visibility, then shortest qualified name.
306fn pick_best_by_visibility<'a>(candidates: &[&'a Symbol]) -> &'a Symbol {
307    candidates
308        .iter()
309        .max_by(|a, b| {
310            let vis_ord = visibility_rank(a.visibility).cmp(&visibility_rank(b.visibility));
311            // If same visibility, prefer shortest qualified name
312            vis_ord.then_with(|| b.qualified_name.len().cmp(&a.qualified_name.len()))
313        })
314        .unwrap()
315}
316
317/// Rank visibility for sorting (higher = better).
318fn visibility_rank(vis: Visibility) -> u8 {
319    match vis {
320        Visibility::Public => 4,
321        Visibility::Crate => 3,
322        Visibility::Protected => 2,
323        Visibility::Private => 1,
324    }
325}
326
327/// Pick the best match from resolved symbols (highest confidence via visibility).
328fn pick_best_match(matches: &[SymbolMatch]) -> Option<&SymbolMatch> {
329    matches.iter().max_by(|a, b| {
330        let va = visibility_rank(a.visibility);
331        let vb = visibility_rank(b.visibility);
332        va.cmp(&vb)
333            .then_with(|| b.qualified_name.len().cmp(&a.qualified_name.len()))
334    })
335}
336
337/// Compute confidence for a `SymbolMatch` from the resolve callback.
338fn match_confidence_for_symbol(m: &SymbolMatch) -> f64 {
339    0.85 + visibility_boost(m.visibility)
340}
341
342#[cfg(test)]
343#[path = "tests/linker_tests.rs"]
344mod tests;