1#![forbid(unsafe_code)]
9
10use std::path::Path;
11
12use serde::{Deserialize, Serialize};
13
14pub const SIGNATURE_FORMAT_VERSION: u8 = 1;
25
26pub fn canonical_corpus(remote_url: &str) -> String {
42 let s = remote_url.trim();
43
44 if let Some(rest) = s.strip_prefix("git@") {
46 if let Some((host, path)) = rest.split_once(':') {
47 return format!("{}/{}", host.to_lowercase(), normalize_path(path));
48 }
49 }
50
51 let after_scheme = s.split_once("://").map_or(s, |(_, r)| r);
53 let after_at = after_scheme
55 .split_once('@')
56 .map_or(after_scheme, |(_, r)| r);
57
58 if let Some((host_port, path)) = after_at.split_once('/') {
59 let host = host_port.split(':').next().unwrap_or(host_port);
61 return format!("{}/{}", host.to_lowercase(), normalize_path(path));
62 }
63
64 format!("local/{}", sanitize_local(s))
66}
67
68pub fn canonical_corpus_local(repo_root: &Path) -> String {
73 let basename = repo_root
74 .file_name()
75 .and_then(|n| n.to_str())
76 .unwrap_or("unknown");
77 format!("local/{}", sanitize_local(basename))
78}
79
80fn normalize_path(path: &str) -> String {
81 let lower = path.to_lowercase();
83 lower
84 .trim_end_matches('/')
85 .trim_end_matches(".git")
86 .to_string()
87}
88
89fn sanitize_local(s: &str) -> String {
90 s.chars()
91 .map(|c| {
92 if c.is_alphanumeric() || c == '-' || c == '_' {
93 c
94 } else {
95 '-'
96 }
97 })
98 .collect::<String>()
99 .to_lowercase()
100}
101
102#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
115#[non_exhaustive]
116pub enum Language {
117 TypeScript,
118 Rust,
119 Python,
120 Go,
121 Java,
122 Kotlin,
123 Ruby,
124 CSharp,
125 Php,
126 Scala,
127 Cpp,
128 C,
129 }
131
132impl Language {
133 pub fn from_extension(ext: &str) -> Option<Self> {
137 match ext {
138 "ts" | "tsx" | "mts" | "cts" => Some(Self::TypeScript),
139 "rs" => Some(Self::Rust),
140 "py" | "pyi" => Some(Self::Python),
141 "go" => Some(Self::Go),
142 "java" => Some(Self::Java),
143 "kt" | "kts" => Some(Self::Kotlin),
144 "rb" | "rake" | "gemspec" => Some(Self::Ruby),
145 "cs" => Some(Self::CSharp),
146 "php" | "phtml" | "php8" => Some(Self::Php),
147 "scala" | "sc" => Some(Self::Scala),
148 "cpp" | "cc" | "cxx" | "hpp" | "hh" | "hxx" => Some(Self::Cpp),
149 "c" | "h" => Some(Self::C),
150 _ => None,
151 }
152 }
153
154 pub fn as_str(self) -> &'static str {
156 match self {
157 Self::TypeScript => "typescript",
158 Self::Rust => "rust",
159 Self::Python => "python",
160 Self::Go => "go",
161 Self::Java => "java",
162 Self::Kotlin => "kotlin",
163 Self::Ruby => "ruby",
164 Self::CSharp => "csharp",
165 Self::Php => "php",
166 Self::Scala => "scala",
167 Self::Cpp => "cpp",
168 Self::C => "c",
169 }
170 }
171
172 #[allow(clippy::should_implement_trait)]
174 pub fn from_str(s: &str) -> Option<Self> {
175 match s {
176 "typescript" => Some(Self::TypeScript),
177 "rust" => Some(Self::Rust),
178 "python" => Some(Self::Python),
179 "go" => Some(Self::Go),
180 "java" => Some(Self::Java),
181 "kotlin" => Some(Self::Kotlin),
182 "ruby" => Some(Self::Ruby),
183 "csharp" => Some(Self::CSharp),
184 "php" => Some(Self::Php),
185 "scala" => Some(Self::Scala),
186 "cpp" => Some(Self::Cpp),
187 "c" => Some(Self::C),
188 _ => None,
189 }
190 }
191}
192
193#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
198pub struct VName {
199 pub corpus: String,
201 pub root: String,
203 pub path: String,
205 pub language: String,
207 pub signature: String,
209}
210
211impl VName {
212 pub fn new(
214 corpus: impl Into<String>,
215 root: impl Into<String>,
216 path: impl Into<String>,
217 language: impl Into<String>,
218 signature: impl Into<String>,
219 ) -> Self {
220 Self {
221 corpus: corpus.into(),
222 root: root.into(),
223 path: path.into(),
224 language: language.into(),
225 signature: signature.into(),
226 }
227 }
228
229 pub fn id(&self) -> NodeId {
240 let mut hasher = blake3::Hasher::new();
241 hasher.update(&[SIGNATURE_FORMAT_VERSION]);
244 for field in [
247 self.corpus.as_str(),
248 self.root.as_str(),
249 self.path.as_str(),
250 self.language.as_str(),
251 self.signature.as_str(),
252 ] {
253 let bytes = field.as_bytes();
254 hasher.update(&(bytes.len() as u32).to_le_bytes());
255 hasher.update(bytes);
256 }
257 let digest = hasher.finalize();
258 let mut buf = [0u8; 8];
259 buf.copy_from_slice(&digest.as_bytes()[..8]);
260 NodeId(u64::from_le_bytes(buf))
261 }
262}
263
264#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize)]
269pub struct NodeId(pub u64);
270
271#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
273pub enum EdgeKind {
274 #[serde(rename = "depends")]
276 Depends,
277 #[serde(rename = "ref/call")]
279 RefCall,
280 #[serde(rename = "defines/binding")]
282 DefinesBinding,
283 #[serde(rename = "exports")]
285 Exports,
286 #[serde(rename = "resolves-to")]
290 ResolvesTo,
291 #[serde(rename = "ref/imports")]
294 RefImports,
295 #[serde(rename = "is-implementation")]
297 IsImplementation,
298 #[serde(rename = "overrides")]
301 Overrides,
302 #[serde(rename = "ffi/call")]
305 FFICall,
306}
307
308impl EdgeKind {
309 pub fn as_str(self) -> &'static str {
311 match self {
312 Self::Depends => "depends",
313 Self::RefCall => "ref/call",
314 Self::DefinesBinding => "defines/binding",
315 Self::Exports => "exports",
316 Self::ResolvesTo => "resolves-to",
317 Self::RefImports => "ref/imports",
318 Self::IsImplementation => "is-implementation",
319 Self::Overrides => "overrides",
320 Self::FFICall => "ffi/call",
321 }
322 }
323
324 pub fn ppr_weight(self) -> f32 {
347 match self {
348 Self::RefCall => 1.00,
349 Self::DefinesBinding => 0.70,
350 Self::Exports => 0.60,
351 Self::Depends => 0.50,
352 Self::ResolvesTo => 0.50,
353 Self::RefImports => 0.40,
354 Self::IsImplementation => 0.40,
355 Self::Overrides => 0.30,
356 Self::FFICall => 0.85,
357 }
358 }
359
360 #[allow(clippy::should_implement_trait)]
362 pub fn from_str(s: &str) -> Option<Self> {
363 match s {
364 "depends" => Some(Self::Depends),
365 "ref/call" => Some(Self::RefCall),
366 "defines/binding" => Some(Self::DefinesBinding),
367 "exports" => Some(Self::Exports),
368 "resolves-to" => Some(Self::ResolvesTo),
369 "ref/imports" => Some(Self::RefImports),
370 "is-implementation" => Some(Self::IsImplementation),
371 "overrides" => Some(Self::Overrides),
372 "ffi/call" => Some(Self::FFICall),
373 _ => None,
374 }
375 }
376}
377
378#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
384pub struct Node {
385 pub id: NodeId,
386 pub vname: VName,
387 pub kind: String,
388 pub package: String,
399 pub line: Option<u32>,
402}
403
404impl Node {
405 pub fn new(vname: VName, kind: impl Into<String>) -> Self {
411 let id = vname.id();
412 Self {
413 id,
414 vname,
415 kind: kind.into(),
416 package: String::new(),
417 line: None,
418 }
419 }
420
421 pub fn with_package(mut self, package: impl Into<String>) -> Self {
430 self.package = package.into();
431 self
432 }
433
434 pub fn with_line(mut self, line: u32) -> Self {
436 self.line = Some(line);
437 self
438 }
439}
440
441#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
443pub struct Edge {
444 pub src: NodeId,
445 pub dst: NodeId,
446 pub kind: EdgeKind,
447 #[serde(default, skip_serializing_if = "Option::is_none")]
450 pub confidence: Option<u8>,
451}
452
453impl Edge {
454 pub fn new(src: NodeId, dst: NodeId, kind: EdgeKind) -> Self {
455 Self {
456 src,
457 dst,
458 kind,
459 confidence: None,
460 }
461 }
462
463 pub fn ffi_call(src: NodeId, dst: NodeId, confidence: u8) -> Self {
467 debug_assert!(
468 confidence <= 100,
469 "confidence must be 0..=100, got {confidence}"
470 );
471 Self {
472 src,
473 dst,
474 kind: EdgeKind::FFICall,
475 confidence: Some(confidence),
476 }
477 }
478}
479
480#[cfg(test)]
481mod tests {
482 use super::*;
483
484 fn sample_vname() -> VName {
485 VName::new(
486 "github.com/raj-rkv/travsr",
487 "main",
488 "crates/travsr-core/src/lib.rs",
489 "rust",
490 "fn:sample",
491 )
492 }
493
494 #[test]
495 fn vname_round_trips_through_serde_json() {
496 let v = sample_vname();
497 let json = serde_json::to_string(&v).unwrap();
498 let back: VName = serde_json::from_str(&json).unwrap();
499 assert_eq!(v, back);
500 }
501
502 #[test]
503 fn vname_id_is_deterministic() {
504 assert_eq!(sample_vname().id(), sample_vname().id());
505 }
506
507 #[test]
508 fn vname_id_differs_on_any_field_change() {
509 let base = sample_vname();
510 let mut other = base.clone();
511 other.signature = "fn:different".into();
512 assert_ne!(base.id(), other.id());
513 }
514
515 #[test]
516 fn edge_kind_round_trips_through_string() {
517 for kind in [
518 EdgeKind::Depends,
519 EdgeKind::RefCall,
520 EdgeKind::DefinesBinding,
521 EdgeKind::Exports,
522 EdgeKind::ResolvesTo,
523 EdgeKind::RefImports,
524 EdgeKind::IsImplementation,
525 EdgeKind::Overrides,
526 EdgeKind::FFICall,
527 ] {
528 assert_eq!(EdgeKind::from_str(kind.as_str()), Some(kind));
529 }
530 }
531
532 #[test]
533 fn ppr_weights_are_ordered_by_semantic_strength() {
534 assert!(EdgeKind::RefCall.ppr_weight() > EdgeKind::DefinesBinding.ppr_weight());
536 assert!(EdgeKind::DefinesBinding.ppr_weight() > EdgeKind::Exports.ppr_weight());
537 assert!(EdgeKind::Exports.ppr_weight() > EdgeKind::Depends.ppr_weight());
538 assert_eq!(
539 EdgeKind::Depends.ppr_weight(),
540 EdgeKind::ResolvesTo.ppr_weight()
541 );
542 assert!(EdgeKind::Depends.ppr_weight() > EdgeKind::RefImports.ppr_weight());
543 assert_eq!(
544 EdgeKind::RefImports.ppr_weight(),
545 EdgeKind::IsImplementation.ppr_weight()
546 );
547 assert!(EdgeKind::IsImplementation.ppr_weight() > EdgeKind::Overrides.ppr_weight());
548 }
549
550 #[test]
551 fn ppr_weights_are_positive_and_at_most_one() {
552 for kind in [
553 EdgeKind::Depends,
554 EdgeKind::RefCall,
555 EdgeKind::DefinesBinding,
556 EdgeKind::Exports,
557 EdgeKind::ResolvesTo,
558 EdgeKind::RefImports,
559 EdgeKind::IsImplementation,
560 EdgeKind::Overrides,
561 EdgeKind::FFICall,
562 ] {
563 let w = kind.ppr_weight();
564 assert!(
565 w > 0.0 && w <= 1.0,
566 "weight {w} for {kind:?} must be in (0, 1]"
567 );
568 }
569 }
570
571 #[test]
572 fn node_id_matches_vname_id() {
573 let v = sample_vname();
574 let node = Node::new(v.clone(), "function");
575 assert_eq!(node.id, v.id());
576 }
577
578 #[test]
579 fn version_byte_produces_different_id_than_unversioned() {
580 let v = sample_vname();
585 let versioned_id = v.id(); let mut hasher = blake3::Hasher::new();
589 hasher.update(v.corpus.as_bytes());
590 hasher.update(b"\0");
591 hasher.update(v.root.as_bytes());
592 hasher.update(b"\0");
593 hasher.update(v.path.as_bytes());
594 hasher.update(b"\0");
595 hasher.update(v.language.as_bytes());
596 hasher.update(b"\0");
597 hasher.update(v.signature.as_bytes());
598 let digest = hasher.finalize();
599 let mut buf = [0u8; 8];
600 buf.copy_from_slice(&digest.as_bytes()[..8]);
601 let legacy_id = NodeId(u64::from_le_bytes(buf));
602
603 assert_ne!(
604 versioned_id, legacy_id,
605 "RFC-002 version byte + length-prefix must produce a different NodeId than the legacy NUL-separated hash"
606 );
607 }
608
609 #[test]
612 fn canonical_corpus_handles_https_with_git_suffix() {
613 assert_eq!(
614 canonical_corpus("https://github.com/raj-rkv/travsr.git"),
615 "github.com/raj-rkv/travsr"
616 );
617 }
618
619 #[test]
620 fn canonical_corpus_handles_https_without_git_suffix() {
621 assert_eq!(
622 canonical_corpus("https://github.com/raj-rkv/travsr"),
623 "github.com/raj-rkv/travsr"
624 );
625 }
626
627 #[test]
628 fn canonical_corpus_handles_scp_style_ssh() {
629 assert_eq!(
630 canonical_corpus("git@github.com:raj-rkv/travsr.git"),
631 "github.com/raj-rkv/travsr"
632 );
633 assert_eq!(
634 canonical_corpus("git@github.com:raj-rkv/travsr"),
635 "github.com/raj-rkv/travsr"
636 );
637 }
638
639 #[test]
640 fn canonical_corpus_handles_ssh_url() {
641 assert_eq!(
642 canonical_corpus("ssh://git@github.com/raj-rkv/travsr.git"),
643 "github.com/raj-rkv/travsr"
644 );
645 }
646
647 #[test]
648 fn canonical_corpus_handles_git_protocol() {
649 assert_eq!(
650 canonical_corpus("git://github.com/raj-rkv/travsr.git"),
651 "github.com/raj-rkv/travsr"
652 );
653 }
654
655 #[test]
656 fn canonical_corpus_lowercases_input() {
657 assert_eq!(
658 canonical_corpus("HTTPS://GITHUB.COM/Raj-Rkv/Travsr.GIT"),
659 "github.com/raj-rkv/travsr"
660 );
661 }
662
663 #[test]
664 fn canonical_corpus_strips_port() {
665 assert_eq!(
666 canonical_corpus("https://github.com:443/raj-rkv/travsr.git"),
667 "github.com/raj-rkv/travsr"
668 );
669 }
670
671 #[test]
672 fn canonical_corpus_strips_trailing_slash() {
673 assert_eq!(
674 canonical_corpus("https://github.com/raj-rkv/travsr/"),
675 "github.com/raj-rkv/travsr"
676 );
677 }
678
679 #[test]
680 fn canonical_corpus_gitlab() {
681 assert_eq!(
682 canonical_corpus("https://gitlab.com/acme/payments-api.git"),
683 "gitlab.com/acme/payments-api"
684 );
685 }
686
687 #[test]
688 fn canonical_corpus_local_uses_basename() {
689 let path = std::path::Path::new("/home/user/my-project");
690 assert_eq!(canonical_corpus_local(path), "local/my-project");
691 }
692
693 #[test]
694 fn canonical_corpus_local_sanitises_special_chars() {
695 let path = std::path::Path::new("/tmp/My Project (v2)");
696 let result = canonical_corpus_local(path);
697 assert!(result.starts_with("local/"));
698 assert!(!result.contains(' '), "spaces must be replaced");
699 assert!(!result.contains('('), "parens must be replaced");
700 }
701
702 #[test]
703 fn different_corpus_produces_non_colliding_node_ids() {
704 let v_repo_a = VName::new(
707 "github.com/acme/repo-a",
708 "",
709 "src/foo.ts",
710 "typescript",
711 "fn:bar",
712 );
713 let v_repo_b = VName::new(
714 "github.com/acme/repo-b",
715 "",
716 "src/foo.ts",
717 "typescript",
718 "fn:bar",
719 );
720 assert_ne!(
721 v_repo_a.id(),
722 v_repo_b.id(),
723 "different corpora must produce different NodeIds (cross-repo VName collision)"
724 );
725 }
726
727 #[test]
730 fn language_from_extension_covers_all_variants() {
731 assert_eq!(Language::from_extension("ts"), Some(Language::TypeScript));
732 assert_eq!(Language::from_extension("tsx"), Some(Language::TypeScript));
733 assert_eq!(Language::from_extension("mts"), Some(Language::TypeScript));
734 assert_eq!(Language::from_extension("cts"), Some(Language::TypeScript));
735 assert_eq!(Language::from_extension("rs"), Some(Language::Rust));
736 assert_eq!(Language::from_extension("py"), Some(Language::Python));
737 assert_eq!(Language::from_extension("pyi"), Some(Language::Python));
738 assert_eq!(Language::from_extension("go"), Some(Language::Go));
739 assert_eq!(Language::from_extension("js"), None);
740 assert_eq!(Language::from_extension(""), None);
741 }
742
743 #[test]
744 fn language_as_str_and_from_str_round_trip() {
745 for lang in [
746 Language::TypeScript,
747 Language::Rust,
748 Language::Python,
749 Language::Go,
750 ] {
751 let s = lang.as_str();
752 assert_eq!(
753 Language::from_str(s),
754 Some(lang),
755 "round-trip failed for {s}"
756 );
757 }
758 }
759
760 #[test]
761 fn language_as_str_values_are_lowercase() {
762 assert_eq!(Language::TypeScript.as_str(), "typescript");
763 assert_eq!(Language::Rust.as_str(), "rust");
764 assert_eq!(Language::Python.as_str(), "python");
765 assert_eq!(Language::Go.as_str(), "go");
766 }
767
768 #[test]
769 fn language_from_str_returns_none_for_unknown() {
770 assert_eq!(Language::from_str("go"), Some(Language::Go));
771 assert_eq!(Language::from_str("TypeScript"), None);
772 assert_eq!(Language::from_str(""), None);
773 }
774
775 #[test]
778 fn language_field_prevents_cross_language_vname_collision() {
779 let ts = VName::new("github.com/a/b", "", "src/main.rs", "typescript", "fn:main");
780 let rs = VName::new("github.com/a/b", "", "src/main.rs", "rust", "fn:main");
781 assert_ne!(
782 ts.id(),
783 rs.id(),
784 "different language fields must produce different NodeIds"
785 );
786 }
787
788 #[test]
790 fn node_with_package_does_not_change_id() {
791 let vname = VName::new("github.com/a/b", "", "src/lib.rs", "rust", "fn:open");
792 let plain = Node::new(vname.clone(), "function");
793 let packaged = Node::new(vname, "function").with_package("my-crate");
794 assert_eq!(plain.id, packaged.id, "package must not affect NodeId");
795 assert_eq!(packaged.package, "my-crate");
796 assert_eq!(plain.package, "");
797 }
798
799 #[test]
800 fn edge_ffi_call_builder_sets_confidence() {
801 let e = Edge::ffi_call(NodeId(1), NodeId(2), 90);
802 assert_eq!(e.kind, EdgeKind::FFICall);
803 assert_eq!(e.confidence, Some(90));
804 }
805
806 #[test]
807 fn edge_new_has_no_confidence() {
808 let e = Edge::new(NodeId(1), NodeId(2), EdgeKind::RefCall);
809 assert_eq!(e.confidence, None);
810 }
811
812 #[test]
813 fn edge_kind_ffi_call_roundtrip() {
814 assert_eq!(EdgeKind::FFICall.as_str(), "ffi/call");
815 assert_eq!(EdgeKind::from_str("ffi/call"), Some(EdgeKind::FFICall));
816 }
817
818 #[test]
819 fn ppr_weight_ffi_call_is_between_refcall_and_defines_binding() {
820 assert!(EdgeKind::FFICall.ppr_weight() < EdgeKind::RefCall.ppr_weight());
821 assert!(EdgeKind::FFICall.ppr_weight() > EdgeKind::DefinesBinding.ppr_weight());
822 assert!((EdgeKind::FFICall.ppr_weight() - 0.85_f32).abs() < 1e-6);
823 }
824
825 #[test]
826 fn edge_serde_roundtrip_with_confidence() {
827 let e = Edge::ffi_call(NodeId(42), NodeId(99), 75);
828 let json = serde_json::to_string(&e).unwrap();
829 assert!(json.contains("\"confidence\":75"));
830 let e2: Edge = serde_json::from_str(&json).unwrap();
831 assert_eq!(e2.confidence, Some(75));
832 }
833
834 #[test]
835 fn edge_serde_roundtrip_without_confidence_field() {
836 let json = r#"{"src":1,"dst":2,"kind":"ref/call"}"#;
838 let e: Edge = serde_json::from_str(json).unwrap();
839 assert_eq!(e.confidence, None);
840 }
841}