Skip to main content

travsr_core/
lib.rs

1//! travsr-core — graph primitives for the Travsr code-intelligence daemon.
2//!
3//! This crate defines the foundational data model: Kythe-style VNames,
4//! node identifiers, edges, and the multiplex graph types that every other
5//! Travsr crate builds on. It has zero dependencies on any other internal
6//! Travsr crate by design (see crate dependency rules in CLAUDE.md).
7
8#![forbid(unsafe_code)]
9
10use std::path::Path;
11
12use serde::{Deserialize, Serialize};
13
14/// Version of the VName signature format baked into every `NodeId` hash.
15///
16/// This byte is the **first input** to the BLAKE3 hasher in `VName::id()`.
17/// Changing it produces a disjoint `NodeId` space — any `.travsr/graph.db`
18/// built with a different version must be fully re-indexed before it can be
19/// queried. See `docs/rfcs/RFC-002-vname-signature-versioning.md`.
20///
21/// Version history:
22///   0 — legacy (no version byte; all pre-RFC-002 databases)
23///   1 — current: Tree-sitter vocabulary (`class:X`, `fn:X`, `method:X.Y`, `var:X`)
24pub const SIGNATURE_FORMAT_VERSION: u8 = 1;
25
26// ── Corpus derivation (ARCH-102) ─────────────────────────────────────────────
27
28/// Derive the canonical Travsr corpus identifier from a git remote URL.
29///
30/// **Canonical form:** `host/org/repo` — all lowercase, no scheme prefix,
31/// no `.git` suffix, no trailing slash. See `docs/rfcs/ARCH-102`.
32///
33/// Handles all standard git remote URL formats:
34///
35/// | Input | Output |
36/// |---|---|
37/// | `https://github.com/acme/foo.git` | `github.com/acme/foo` |
38/// | `git@github.com:acme/foo.git`     | `github.com/acme/foo` |
39/// | `ssh://git@github.com/acme/foo`   | `github.com/acme/foo` |
40/// | `git://github.com/acme/foo.git`   | `github.com/acme/foo` |
41pub fn canonical_corpus(remote_url: &str) -> String {
42    let s = remote_url.trim();
43
44    // SCP-style SSH: git@host:org/repo[.git]
45    if let Some(rest) = s.strip_prefix("git@") {
46        if let Some((host, path)) = rest.split_once(':') {
47            return format!("{}/{}", host.to_lowercase(), normalize_path(path));
48        }
49    }
50
51    // URL schemes: https://, http://, ssh://, git://
52    let after_scheme = s.split_once("://").map_or(s, |(_, r)| r);
53    // Strip userinfo (ssh://git@host/path → host/path)
54    let after_at = after_scheme
55        .split_once('@')
56        .map_or(after_scheme, |(_, r)| r);
57
58    if let Some((host_port, path)) = after_at.split_once('/') {
59        // Strip port from host (github.com:443 → github.com)
60        let host = host_port.split(':').next().unwrap_or(host_port);
61        return format!("{}/{}", host.to_lowercase(), normalize_path(path));
62    }
63
64    // No path component — fall back to local name
65    format!("local/{}", sanitize_local(s))
66}
67
68/// Derive corpus for a local-only repo (no git remote): `local/<basename>`.
69///
70/// Non-alphanumeric characters (except `-` and `_`) are replaced by `-`.
71/// Cross-repo `Exports` edges are impossible for local corpora by definition.
72pub fn canonical_corpus_local(repo_root: &Path) -> String {
73    let basename = repo_root
74        .file_name()
75        .and_then(|n| n.to_str())
76        .unwrap_or("unknown");
77    format!("local/{}", sanitize_local(basename))
78}
79
80fn normalize_path(path: &str) -> String {
81    // Lowercase first so .trim_end_matches(".git") also catches ".GIT".
82    let lower = path.to_lowercase();
83    lower
84        .trim_end_matches('/')
85        .trim_end_matches(".git")
86        .to_string()
87}
88
89fn sanitize_local(s: &str) -> String {
90    s.chars()
91        .map(|c| {
92            if c.is_alphanumeric() || c == '-' || c == '_' {
93                c
94            } else {
95                '-'
96            }
97        })
98        .collect::<String>()
99        .to_lowercase()
100}
101
102// ── Language ──────────────────────────────────────────────────────────────────
103
104/// Source language of a graph node.
105///
106/// Used by the indexer dispatcher ([`Language::from_extension`]) and stored on
107/// the `nodes.language` column. The `#[non_exhaustive]` attribute prevents
108/// external crates from writing exhaustive matches — Phase 4 will add `Go`
109/// without a breaking change. **Within the travsr workspace** the compiler
110/// still enforces exhaustive matches, so adding a variant is a compile-time
111/// forcing function that updates every dispatch site.
112///
113/// See RFC-003 and ADR-005 for the design rationale.
114#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
115#[non_exhaustive]
116pub enum Language {
117    TypeScript,
118    Rust,
119    Python,
120    Go,
121    Java,
122    Kotlin,
123    Ruby,
124    CSharp,
125    Php,
126    Scala,
127    Cpp,
128    C,
129    // Swift: grammar crate blocked on tree-sitter version conflict; variant reserved.
130}
131
132impl Language {
133    /// Map a file extension to a `Language`.
134    ///
135    /// Returns `None` for unrecognised extensions — callers skip those files.
136    pub fn from_extension(ext: &str) -> Option<Self> {
137        match ext {
138            "ts" | "tsx" | "mts" | "cts" => Some(Self::TypeScript),
139            "rs" => Some(Self::Rust),
140            "py" | "pyi" => Some(Self::Python),
141            "go" => Some(Self::Go),
142            "java" => Some(Self::Java),
143            "kt" | "kts" => Some(Self::Kotlin),
144            "rb" | "rake" | "gemspec" => Some(Self::Ruby),
145            "cs" => Some(Self::CSharp),
146            "php" | "phtml" | "php8" => Some(Self::Php),
147            "scala" | "sc" => Some(Self::Scala),
148            "cpp" | "cc" | "cxx" | "hpp" | "hh" | "hxx" => Some(Self::Cpp),
149            "c" | "h" => Some(Self::C),
150            _ => None,
151        }
152    }
153
154    /// Human-readable string stored in the `nodes.language` column.
155    pub fn as_str(self) -> &'static str {
156        match self {
157            Self::TypeScript => "typescript",
158            Self::Rust => "rust",
159            Self::Python => "python",
160            Self::Go => "go",
161            Self::Java => "java",
162            Self::Kotlin => "kotlin",
163            Self::Ruby => "ruby",
164            Self::CSharp => "csharp",
165            Self::Php => "php",
166            Self::Scala => "scala",
167            Self::Cpp => "cpp",
168            Self::C => "c",
169        }
170    }
171
172    /// Parse from the storage string produced by [`Language::as_str`].
173    #[allow(clippy::should_implement_trait)]
174    pub fn from_str(s: &str) -> Option<Self> {
175        match s {
176            "typescript" => Some(Self::TypeScript),
177            "rust" => Some(Self::Rust),
178            "python" => Some(Self::Python),
179            "go" => Some(Self::Go),
180            "java" => Some(Self::Java),
181            "kotlin" => Some(Self::Kotlin),
182            "ruby" => Some(Self::Ruby),
183            "csharp" => Some(Self::CSharp),
184            "php" => Some(Self::Php),
185            "scala" => Some(Self::Scala),
186            "cpp" => Some(Self::Cpp),
187            "c" => Some(Self::C),
188            _ => None,
189        }
190    }
191}
192
193/// Kythe-style globally unique identifier for a code entity.
194///
195/// VNames are stable across repos, languages, and time — they form the
196/// universal address space of the Travsr graph.
197#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
198pub struct VName {
199    /// Logical corpus (e.g. repo URL or org/project).
200    pub corpus: String,
201    /// Root within the corpus (e.g. branch or build root).
202    pub root: String,
203    /// Path within the root (e.g. `src/foo.ts`).
204    pub path: String,
205    /// Source language identifier (e.g. `typescript`, `rust`).
206    pub language: String,
207    /// Symbol signature within the file (e.g. `class:PaymentService#charge`).
208    pub signature: String,
209}
210
211impl VName {
212    /// Construct a `VName` from its five components.
213    pub fn new(
214        corpus: impl Into<String>,
215        root: impl Into<String>,
216        path: impl Into<String>,
217        language: impl Into<String>,
218        signature: impl Into<String>,
219    ) -> Self {
220        Self {
221            corpus: corpus.into(),
222            root: root.into(),
223            path: path.into(),
224            language: language.into(),
225            signature: signature.into(),
226        }
227    }
228
229    /// Stable 64-bit identifier derived from the five-field VName.
230    ///
231    /// The hash is the first 8 bytes of the BLAKE3 digest of:
232    ///   `[SIGNATURE_FORMAT_VERSION] || [len_u32_le][corpus] || [len_u32_le][root] || ...`
233    ///
234    /// Length-prefix encoding (4-byte little-endian field length before each
235    /// field) replaces the NUL-separator scheme. This guarantees that no two
236    /// distinct VNames share the same byte stream, and that a v0 byte stream
237    /// (which starts with raw corpus bytes) can never equal a v1 stream (which
238    /// starts with `[version_byte][len]`). See RFC-002.
239    pub fn id(&self) -> NodeId {
240        let mut hasher = blake3::Hasher::new();
241        // Version domain separator — must be first. Changing SIGNATURE_FORMAT_VERSION
242        // produces disjoint NodeId spaces; see RFC-002.
243        hasher.update(&[SIGNATURE_FORMAT_VERSION]);
244        // Length-prefix each field so no two distinct VNames share the same byte
245        // stream regardless of field contents (no NUL-injection ambiguity).
246        for field in [
247            self.corpus.as_str(),
248            self.root.as_str(),
249            self.path.as_str(),
250            self.language.as_str(),
251            self.signature.as_str(),
252        ] {
253            let bytes = field.as_bytes();
254            hasher.update(&(bytes.len() as u32).to_le_bytes());
255            hasher.update(bytes);
256        }
257        let digest = hasher.finalize();
258        let mut buf = [0u8; 8];
259        buf.copy_from_slice(&digest.as_bytes()[..8]);
260        NodeId(u64::from_le_bytes(buf))
261    }
262}
263
264/// Opaque, content-addressed identifier for a node in the graph.
265///
266/// `NodeId` is a stable BLAKE3-derived hash of a `VName` (see
267/// [`VName::id`]). It is the SQLite primary key for the `nodes` table.
268#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize)]
269pub struct NodeId(pub u64);
270
271/// The kinds of edges supported in the Travsr multiplex graph.
272#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
273pub enum EdgeKind {
274    /// File / module import. Corresponds to Kythe `%kythe/edge/depends`.
275    #[serde(rename = "depends")]
276    Depends,
277    /// Call-site reference. Corresponds to Kythe `%kythe/edge/ref/call`.
278    #[serde(rename = "ref/call")]
279    RefCall,
280    /// Definition-binding edge (parent → child in the AST).
281    #[serde(rename = "defines/binding")]
282    DefinesBinding,
283    /// A symbol exported from a module.
284    #[serde(rename = "exports")]
285    Exports,
286    /// An import node resolved to the file node it targets.
287    /// Connects `import:./foo` → `file:foo.ts`, enabling transitive
288    /// caller traversal across file boundaries.
289    #[serde(rename = "resolves-to")]
290    ResolvesTo,
291    /// Named import specifier reference emitted by the LSIF pipeline.
292    /// Distinguishes semantic import references from file-level `Depends` edges.
293    #[serde(rename = "ref/imports")]
294    RefImports,
295    /// Class-to-interface implementation edge emitted by the LSIF pipeline.
296    #[serde(rename = "is-implementation")]
297    IsImplementation,
298    /// Method override edge emitted by the LSIF pipeline when a subclass
299    /// method shadows a same-named method in the base class.
300    #[serde(rename = "overrides")]
301    Overrides,
302    /// Cross-language FFI call edge (RFC-005). Confidence lives on `Edge.confidence`
303    /// so `EdgeKind` stays `Copy`. PPR weight: 0.85 (ADR-003 amendment, 2026-05-24).
304    #[serde(rename = "ffi/call")]
305    FFICall,
306}
307
308impl EdgeKind {
309    /// Stable string representation used as the storage key.
310    pub fn as_str(self) -> &'static str {
311        match self {
312            Self::Depends => "depends",
313            Self::RefCall => "ref/call",
314            Self::DefinesBinding => "defines/binding",
315            Self::Exports => "exports",
316            Self::ResolvesTo => "resolves-to",
317            Self::RefImports => "ref/imports",
318            Self::IsImplementation => "is-implementation",
319            Self::Overrides => "overrides",
320            Self::FFICall => "ffi/call",
321        }
322    }
323
324    /// PPR transition weight for this edge kind.
325    ///
326    /// Weights encode the semantic importance of each edge type for
327    /// Personalized PageRank: a higher weight means PPR mass flows more
328    /// readily across edges of this kind, producing higher scores for
329    /// reachable nodes.
330    ///
331    /// # Rationale (DEBT-016 / ADR-003)
332    ///
333    /// | Kind              | Weight | Reasoning                               |
334    /// |---|---|---|
335    /// | `RefCall`         | 1.00   | Direct call — strongest semantic link   |
336    /// | `DefinesBinding`  | 0.70   | Parent→child definition — strong structural link |
337    /// | `Exports`         | 0.60   | Exported API surface — important for callers |
338    /// | `Depends`         | 0.50   | File import — broad but less targeted   |
339    /// | `ResolvesTo`      | 0.50   | Import→file resolution — same as Depends |
340    /// | `RefImports`      | 0.40   | Named import specifier — narrower than file import |
341    /// | `IsImplementation`| 0.40   | Class implements interface — type-system link |
342    /// | `Overrides`       | 0.30   | Method override — weakest semantic tie  |
343    ///
344    /// Weights are normalised per-node at PPR iteration time so their
345    /// absolute scale does not matter — only the ratios between kinds.
346    pub fn ppr_weight(self) -> f32 {
347        match self {
348            Self::RefCall => 1.00,
349            Self::DefinesBinding => 0.70,
350            Self::Exports => 0.60,
351            Self::Depends => 0.50,
352            Self::ResolvesTo => 0.50,
353            Self::RefImports => 0.40,
354            Self::IsImplementation => 0.40,
355            Self::Overrides => 0.30,
356            Self::FFICall => 0.85,
357        }
358    }
359
360    /// Parse from the stable string representation.
361    #[allow(clippy::should_implement_trait)]
362    pub fn from_str(s: &str) -> Option<Self> {
363        match s {
364            "depends" => Some(Self::Depends),
365            "ref/call" => Some(Self::RefCall),
366            "defines/binding" => Some(Self::DefinesBinding),
367            "exports" => Some(Self::Exports),
368            "resolves-to" => Some(Self::ResolvesTo),
369            "ref/imports" => Some(Self::RefImports),
370            "is-implementation" => Some(Self::IsImplementation),
371            "overrides" => Some(Self::Overrides),
372            "ffi/call" => Some(Self::FFICall),
373            _ => None,
374        }
375    }
376}
377
378/// A node in the code graph.
379///
380/// `PartialEq` compares all fields including `package`. Use `node.id == other.id`
381/// for identity-only comparisons (two nodes are the same symbol regardless of
382/// their package annotation).
383#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
384pub struct Node {
385    pub id: NodeId,
386    pub vname: VName,
387    pub kind: String,
388    /// Sub-unit identity within the corpus (ADR-005 Rule 2).
389    ///
390    /// Stored in `nodes.package`; **not** part of the BLAKE3 hash input.
391    /// Empty string for nodes where package identity is unknown or irrelevant.
392    ///
393    /// | Language   | Value                                       |
394    /// |------------|---------------------------------------------|
395    /// | TypeScript | npm package name from `package.json`        |
396    /// | Rust       | Cargo package name from `Cargo.toml`        |
397    /// | Python     | top-level package dir (highest `__init__.py`)|
398    pub package: String,
399    /// 1-based source line of the symbol's definition site.
400    /// `None` for file-kind nodes and synthetic import nodes.
401    pub line: Option<u32>,
402}
403
404impl Node {
405    /// Build a `Node` from a `VName` and a free-form kind string.
406    ///
407    /// The `id` is derived deterministically from the VName. `package`
408    /// defaults to an empty string; use [`Node::with_package`] to set it.
409    /// `line` defaults to `None`; use [`Node::with_line`] to set it.
410    pub fn new(vname: VName, kind: impl Into<String>) -> Self {
411        let id = vname.id();
412        Self {
413            id,
414            vname,
415            kind: kind.into(),
416            package: String::new(),
417            line: None,
418        }
419    }
420
421    /// Set the `package` field and return `self` (builder pattern).
422    ///
423    /// ```
424    /// use travsr_core::{Node, VName};
425    /// let n = Node::new(VName::new("github.com/a/b", "", "src/lib.rs", "rust", "fn:main"), "function")
426    ///     .with_package("my-crate");
427    /// assert_eq!(n.package, "my-crate");
428    /// ```
429    pub fn with_package(mut self, package: impl Into<String>) -> Self {
430        self.package = package.into();
431        self
432    }
433
434    /// Set the `line` field (1-based) and return `self` (builder pattern).
435    pub fn with_line(mut self, line: u32) -> Self {
436        self.line = Some(line);
437        self
438    }
439}
440
441/// A directed, typed edge between two nodes.
442#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
443pub struct Edge {
444    pub src: NodeId,
445    pub dst: NodeId,
446    pub kind: EdgeKind,
447    /// Confidence score 0..=100 for cross-language FFI edges (RFC-005).
448    /// `None` for all non-FFI edges. Stored in `edges.confidence` (migration v6).
449    #[serde(default, skip_serializing_if = "Option::is_none")]
450    pub confidence: Option<u8>,
451}
452
453impl Edge {
454    pub fn new(src: NodeId, dst: NodeId, kind: EdgeKind) -> Self {
455        Self {
456            src,
457            dst,
458            kind,
459            confidence: None,
460        }
461    }
462
463    /// Build a cross-language FFI edge with a confidence score (RFC-005).
464    ///
465    /// `confidence` must be in `0..=100`. Panics in debug builds if violated.
466    pub fn ffi_call(src: NodeId, dst: NodeId, confidence: u8) -> Self {
467        debug_assert!(
468            confidence <= 100,
469            "confidence must be 0..=100, got {confidence}"
470        );
471        Self {
472            src,
473            dst,
474            kind: EdgeKind::FFICall,
475            confidence: Some(confidence),
476        }
477    }
478}
479
480#[cfg(test)]
481mod tests {
482    use super::*;
483
484    fn sample_vname() -> VName {
485        VName::new(
486            "github.com/raj-rkv/travsr",
487            "main",
488            "crates/travsr-core/src/lib.rs",
489            "rust",
490            "fn:sample",
491        )
492    }
493
494    #[test]
495    fn vname_round_trips_through_serde_json() {
496        let v = sample_vname();
497        let json = serde_json::to_string(&v).unwrap();
498        let back: VName = serde_json::from_str(&json).unwrap();
499        assert_eq!(v, back);
500    }
501
502    #[test]
503    fn vname_id_is_deterministic() {
504        assert_eq!(sample_vname().id(), sample_vname().id());
505    }
506
507    #[test]
508    fn vname_id_differs_on_any_field_change() {
509        let base = sample_vname();
510        let mut other = base.clone();
511        other.signature = "fn:different".into();
512        assert_ne!(base.id(), other.id());
513    }
514
515    #[test]
516    fn edge_kind_round_trips_through_string() {
517        for kind in [
518            EdgeKind::Depends,
519            EdgeKind::RefCall,
520            EdgeKind::DefinesBinding,
521            EdgeKind::Exports,
522            EdgeKind::ResolvesTo,
523            EdgeKind::RefImports,
524            EdgeKind::IsImplementation,
525            EdgeKind::Overrides,
526            EdgeKind::FFICall,
527        ] {
528            assert_eq!(EdgeKind::from_str(kind.as_str()), Some(kind));
529        }
530    }
531
532    #[test]
533    fn ppr_weights_are_ordered_by_semantic_strength() {
534        // RefCall > DefinesBinding > Exports > Depends == ResolvesTo > RefImports == IsImplementation > Overrides
535        assert!(EdgeKind::RefCall.ppr_weight() > EdgeKind::DefinesBinding.ppr_weight());
536        assert!(EdgeKind::DefinesBinding.ppr_weight() > EdgeKind::Exports.ppr_weight());
537        assert!(EdgeKind::Exports.ppr_weight() > EdgeKind::Depends.ppr_weight());
538        assert_eq!(
539            EdgeKind::Depends.ppr_weight(),
540            EdgeKind::ResolvesTo.ppr_weight()
541        );
542        assert!(EdgeKind::Depends.ppr_weight() > EdgeKind::RefImports.ppr_weight());
543        assert_eq!(
544            EdgeKind::RefImports.ppr_weight(),
545            EdgeKind::IsImplementation.ppr_weight()
546        );
547        assert!(EdgeKind::IsImplementation.ppr_weight() > EdgeKind::Overrides.ppr_weight());
548    }
549
550    #[test]
551    fn ppr_weights_are_positive_and_at_most_one() {
552        for kind in [
553            EdgeKind::Depends,
554            EdgeKind::RefCall,
555            EdgeKind::DefinesBinding,
556            EdgeKind::Exports,
557            EdgeKind::ResolvesTo,
558            EdgeKind::RefImports,
559            EdgeKind::IsImplementation,
560            EdgeKind::Overrides,
561            EdgeKind::FFICall,
562        ] {
563            let w = kind.ppr_weight();
564            assert!(
565                w > 0.0 && w <= 1.0,
566                "weight {w} for {kind:?} must be in (0, 1]"
567            );
568        }
569    }
570
571    #[test]
572    fn node_id_matches_vname_id() {
573        let v = sample_vname();
574        let node = Node::new(v.clone(), "function");
575        assert_eq!(node.id, v.id());
576    }
577
578    #[test]
579    fn version_byte_produces_different_id_than_unversioned() {
580        // Regression guard: confirms the RFC-002 domain separator is actually
581        // prepended and that length-prefix encoding is used. The v1 format starts
582        // with [0x01][len][corpus...]; the v0 format starts with raw corpus bytes.
583        // These byte streams can never be equal regardless of field contents.
584        let v = sample_vname();
585        let versioned_id = v.id(); // uses SIGNATURE_FORMAT_VERSION byte + length-prefix
586
587        // Compute the legacy (no version byte, NUL-separated) hash directly.
588        let mut hasher = blake3::Hasher::new();
589        hasher.update(v.corpus.as_bytes());
590        hasher.update(b"\0");
591        hasher.update(v.root.as_bytes());
592        hasher.update(b"\0");
593        hasher.update(v.path.as_bytes());
594        hasher.update(b"\0");
595        hasher.update(v.language.as_bytes());
596        hasher.update(b"\0");
597        hasher.update(v.signature.as_bytes());
598        let digest = hasher.finalize();
599        let mut buf = [0u8; 8];
600        buf.copy_from_slice(&digest.as_bytes()[..8]);
601        let legacy_id = NodeId(u64::from_le_bytes(buf));
602
603        assert_ne!(
604            versioned_id, legacy_id,
605            "RFC-002 version byte + length-prefix must produce a different NodeId than the legacy NUL-separated hash"
606        );
607    }
608
609    // ── ARCH-102: canonical_corpus tests ─────────────────────────────────────
610
611    #[test]
612    fn canonical_corpus_handles_https_with_git_suffix() {
613        assert_eq!(
614            canonical_corpus("https://github.com/raj-rkv/travsr.git"),
615            "github.com/raj-rkv/travsr"
616        );
617    }
618
619    #[test]
620    fn canonical_corpus_handles_https_without_git_suffix() {
621        assert_eq!(
622            canonical_corpus("https://github.com/raj-rkv/travsr"),
623            "github.com/raj-rkv/travsr"
624        );
625    }
626
627    #[test]
628    fn canonical_corpus_handles_scp_style_ssh() {
629        assert_eq!(
630            canonical_corpus("git@github.com:raj-rkv/travsr.git"),
631            "github.com/raj-rkv/travsr"
632        );
633        assert_eq!(
634            canonical_corpus("git@github.com:raj-rkv/travsr"),
635            "github.com/raj-rkv/travsr"
636        );
637    }
638
639    #[test]
640    fn canonical_corpus_handles_ssh_url() {
641        assert_eq!(
642            canonical_corpus("ssh://git@github.com/raj-rkv/travsr.git"),
643            "github.com/raj-rkv/travsr"
644        );
645    }
646
647    #[test]
648    fn canonical_corpus_handles_git_protocol() {
649        assert_eq!(
650            canonical_corpus("git://github.com/raj-rkv/travsr.git"),
651            "github.com/raj-rkv/travsr"
652        );
653    }
654
655    #[test]
656    fn canonical_corpus_lowercases_input() {
657        assert_eq!(
658            canonical_corpus("HTTPS://GITHUB.COM/Raj-Rkv/Travsr.GIT"),
659            "github.com/raj-rkv/travsr"
660        );
661    }
662
663    #[test]
664    fn canonical_corpus_strips_port() {
665        assert_eq!(
666            canonical_corpus("https://github.com:443/raj-rkv/travsr.git"),
667            "github.com/raj-rkv/travsr"
668        );
669    }
670
671    #[test]
672    fn canonical_corpus_strips_trailing_slash() {
673        assert_eq!(
674            canonical_corpus("https://github.com/raj-rkv/travsr/"),
675            "github.com/raj-rkv/travsr"
676        );
677    }
678
679    #[test]
680    fn canonical_corpus_gitlab() {
681        assert_eq!(
682            canonical_corpus("https://gitlab.com/acme/payments-api.git"),
683            "gitlab.com/acme/payments-api"
684        );
685    }
686
687    #[test]
688    fn canonical_corpus_local_uses_basename() {
689        let path = std::path::Path::new("/home/user/my-project");
690        assert_eq!(canonical_corpus_local(path), "local/my-project");
691    }
692
693    #[test]
694    fn canonical_corpus_local_sanitises_special_chars() {
695        let path = std::path::Path::new("/tmp/My Project (v2)");
696        let result = canonical_corpus_local(path);
697        assert!(result.starts_with("local/"));
698        assert!(!result.contains(' '), "spaces must be replaced");
699        assert!(!result.contains('('), "parens must be replaced");
700    }
701
702    #[test]
703    fn different_corpus_produces_non_colliding_node_ids() {
704        // Regression: same file + same signature in two different repos must
705        // produce different NodeIds because corpus is part of the BLAKE3 input.
706        let v_repo_a = VName::new(
707            "github.com/acme/repo-a",
708            "",
709            "src/foo.ts",
710            "typescript",
711            "fn:bar",
712        );
713        let v_repo_b = VName::new(
714            "github.com/acme/repo-b",
715            "",
716            "src/foo.ts",
717            "typescript",
718            "fn:bar",
719        );
720        assert_ne!(
721            v_repo_a.id(),
722            v_repo_b.id(),
723            "different corpora must produce different NodeIds (cross-repo VName collision)"
724        );
725    }
726
727    // ── Language enum (ADR-005 / RFC-003) ─────────────────────────────────────
728
729    #[test]
730    fn language_from_extension_covers_all_variants() {
731        assert_eq!(Language::from_extension("ts"), Some(Language::TypeScript));
732        assert_eq!(Language::from_extension("tsx"), Some(Language::TypeScript));
733        assert_eq!(Language::from_extension("mts"), Some(Language::TypeScript));
734        assert_eq!(Language::from_extension("cts"), Some(Language::TypeScript));
735        assert_eq!(Language::from_extension("rs"), Some(Language::Rust));
736        assert_eq!(Language::from_extension("py"), Some(Language::Python));
737        assert_eq!(Language::from_extension("pyi"), Some(Language::Python));
738        assert_eq!(Language::from_extension("go"), Some(Language::Go));
739        assert_eq!(Language::from_extension("js"), None);
740        assert_eq!(Language::from_extension(""), None);
741    }
742
743    #[test]
744    fn language_as_str_and_from_str_round_trip() {
745        for lang in [
746            Language::TypeScript,
747            Language::Rust,
748            Language::Python,
749            Language::Go,
750        ] {
751            let s = lang.as_str();
752            assert_eq!(
753                Language::from_str(s),
754                Some(lang),
755                "round-trip failed for {s}"
756            );
757        }
758    }
759
760    #[test]
761    fn language_as_str_values_are_lowercase() {
762        assert_eq!(Language::TypeScript.as_str(), "typescript");
763        assert_eq!(Language::Rust.as_str(), "rust");
764        assert_eq!(Language::Python.as_str(), "python");
765        assert_eq!(Language::Go.as_str(), "go");
766    }
767
768    #[test]
769    fn language_from_str_returns_none_for_unknown() {
770        assert_eq!(Language::from_str("go"), Some(Language::Go));
771        assert_eq!(Language::from_str("TypeScript"), None);
772        assert_eq!(Language::from_str(""), None);
773    }
774
775    // Regression: two symbols in different languages (same file path, same sig)
776    // produce different NodeIds because language is part of the BLAKE3 input.
777    #[test]
778    fn language_field_prevents_cross_language_vname_collision() {
779        let ts = VName::new("github.com/a/b", "", "src/main.rs", "typescript", "fn:main");
780        let rs = VName::new("github.com/a/b", "", "src/main.rs", "rust", "fn:main");
781        assert_ne!(
782            ts.id(),
783            rs.id(),
784            "different language fields must produce different NodeIds"
785        );
786    }
787
788    // node.with_package() sets package without changing id.
789    #[test]
790    fn node_with_package_does_not_change_id() {
791        let vname = VName::new("github.com/a/b", "", "src/lib.rs", "rust", "fn:open");
792        let plain = Node::new(vname.clone(), "function");
793        let packaged = Node::new(vname, "function").with_package("my-crate");
794        assert_eq!(plain.id, packaged.id, "package must not affect NodeId");
795        assert_eq!(packaged.package, "my-crate");
796        assert_eq!(plain.package, "");
797    }
798
799    #[test]
800    fn edge_ffi_call_builder_sets_confidence() {
801        let e = Edge::ffi_call(NodeId(1), NodeId(2), 90);
802        assert_eq!(e.kind, EdgeKind::FFICall);
803        assert_eq!(e.confidence, Some(90));
804    }
805
806    #[test]
807    fn edge_new_has_no_confidence() {
808        let e = Edge::new(NodeId(1), NodeId(2), EdgeKind::RefCall);
809        assert_eq!(e.confidence, None);
810    }
811
812    #[test]
813    fn edge_kind_ffi_call_roundtrip() {
814        assert_eq!(EdgeKind::FFICall.as_str(), "ffi/call");
815        assert_eq!(EdgeKind::from_str("ffi/call"), Some(EdgeKind::FFICall));
816    }
817
818    #[test]
819    fn ppr_weight_ffi_call_is_between_refcall_and_defines_binding() {
820        assert!(EdgeKind::FFICall.ppr_weight() < EdgeKind::RefCall.ppr_weight());
821        assert!(EdgeKind::FFICall.ppr_weight() > EdgeKind::DefinesBinding.ppr_weight());
822        assert!((EdgeKind::FFICall.ppr_weight() - 0.85_f32).abs() < 1e-6);
823    }
824
825    #[test]
826    fn edge_serde_roundtrip_with_confidence() {
827        let e = Edge::ffi_call(NodeId(42), NodeId(99), 75);
828        let json = serde_json::to_string(&e).unwrap();
829        assert!(json.contains("\"confidence\":75"));
830        let e2: Edge = serde_json::from_str(&json).unwrap();
831        assert_eq!(e2.confidence, Some(75));
832    }
833
834    #[test]
835    fn edge_serde_roundtrip_without_confidence_field() {
836        // JSON produced before v6 (no confidence field) must deserialize to None
837        let json = r#"{"src":1,"dst":2,"kind":"ref/call"}"#;
838        let e: Edge = serde_json::from_str(json).unwrap();
839        assert_eq!(e.confidence, None);
840    }
841}