#![forbid(unsafe_code)]
use std::path::Path;
use serde::{Deserialize, Serialize};
pub const SIGNATURE_FORMAT_VERSION: u8 = 1;
pub fn canonical_corpus(remote_url: &str) -> String {
let s = remote_url.trim();
if let Some(rest) = s.strip_prefix("git@") {
if let Some((host, path)) = rest.split_once(':') {
return format!("{}/{}", host.to_lowercase(), normalize_path(path));
}
}
let after_scheme = s.split_once("://").map_or(s, |(_, r)| r);
let after_at = after_scheme
.split_once('@')
.map_or(after_scheme, |(_, r)| r);
if let Some((host_port, path)) = after_at.split_once('/') {
let host = host_port.split(':').next().unwrap_or(host_port);
return format!("{}/{}", host.to_lowercase(), normalize_path(path));
}
format!("local/{}", sanitize_local(s))
}
pub fn canonical_corpus_local(repo_root: &Path) -> String {
let basename = repo_root
.file_name()
.and_then(|n| n.to_str())
.unwrap_or("unknown");
format!("local/{}", sanitize_local(basename))
}
fn normalize_path(path: &str) -> String {
let lower = path.to_lowercase();
lower
.trim_end_matches('/')
.trim_end_matches(".git")
.to_string()
}
fn sanitize_local(s: &str) -> String {
s.chars()
.map(|c| {
if c.is_alphanumeric() || c == '-' || c == '_' {
c
} else {
'-'
}
})
.collect::<String>()
.to_lowercase()
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
#[non_exhaustive]
pub enum Language {
TypeScript,
Rust,
Python,
Go,
Java,
Kotlin,
Ruby,
CSharp,
Php,
Scala,
Cpp,
C,
}
impl Language {
pub fn from_extension(ext: &str) -> Option<Self> {
match ext {
"ts" | "tsx" | "mts" | "cts" => Some(Self::TypeScript),
"rs" => Some(Self::Rust),
"py" | "pyi" => Some(Self::Python),
"go" => Some(Self::Go),
"java" => Some(Self::Java),
"kt" | "kts" => Some(Self::Kotlin),
"rb" | "rake" | "gemspec" => Some(Self::Ruby),
"cs" => Some(Self::CSharp),
"php" | "phtml" | "php8" => Some(Self::Php),
"scala" | "sc" => Some(Self::Scala),
"cpp" | "cc" | "cxx" | "hpp" | "hh" | "hxx" => Some(Self::Cpp),
"c" | "h" => Some(Self::C),
_ => None,
}
}
pub fn as_str(self) -> &'static str {
match self {
Self::TypeScript => "typescript",
Self::Rust => "rust",
Self::Python => "python",
Self::Go => "go",
Self::Java => "java",
Self::Kotlin => "kotlin",
Self::Ruby => "ruby",
Self::CSharp => "csharp",
Self::Php => "php",
Self::Scala => "scala",
Self::Cpp => "cpp",
Self::C => "c",
}
}
#[allow(clippy::should_implement_trait)]
pub fn from_str(s: &str) -> Option<Self> {
match s {
"typescript" => Some(Self::TypeScript),
"rust" => Some(Self::Rust),
"python" => Some(Self::Python),
"go" => Some(Self::Go),
"java" => Some(Self::Java),
"kotlin" => Some(Self::Kotlin),
"ruby" => Some(Self::Ruby),
"csharp" => Some(Self::CSharp),
"php" => Some(Self::Php),
"scala" => Some(Self::Scala),
"cpp" => Some(Self::Cpp),
"c" => Some(Self::C),
_ => None,
}
}
}
#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
pub struct VName {
pub corpus: String,
pub root: String,
pub path: String,
pub language: String,
pub signature: String,
}
impl VName {
pub fn new(
corpus: impl Into<String>,
root: impl Into<String>,
path: impl Into<String>,
language: impl Into<String>,
signature: impl Into<String>,
) -> Self {
Self {
corpus: corpus.into(),
root: root.into(),
path: path.into(),
language: language.into(),
signature: signature.into(),
}
}
pub fn id(&self) -> NodeId {
let mut hasher = blake3::Hasher::new();
hasher.update(&[SIGNATURE_FORMAT_VERSION]);
for field in [
self.corpus.as_str(),
self.root.as_str(),
self.path.as_str(),
self.language.as_str(),
self.signature.as_str(),
] {
let bytes = field.as_bytes();
hasher.update(&(bytes.len() as u32).to_le_bytes());
hasher.update(bytes);
}
let digest = hasher.finalize();
let mut buf = [0u8; 8];
buf.copy_from_slice(&digest.as_bytes()[..8]);
NodeId(u64::from_le_bytes(buf))
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize)]
pub struct NodeId(pub u64);
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
pub enum EdgeKind {
#[serde(rename = "depends")]
Depends,
#[serde(rename = "ref/call")]
RefCall,
#[serde(rename = "defines/binding")]
DefinesBinding,
#[serde(rename = "exports")]
Exports,
#[serde(rename = "resolves-to")]
ResolvesTo,
#[serde(rename = "ref/imports")]
RefImports,
#[serde(rename = "is-implementation")]
IsImplementation,
#[serde(rename = "overrides")]
Overrides,
#[serde(rename = "ffi/call")]
FFICall,
}
impl EdgeKind {
pub fn as_str(self) -> &'static str {
match self {
Self::Depends => "depends",
Self::RefCall => "ref/call",
Self::DefinesBinding => "defines/binding",
Self::Exports => "exports",
Self::ResolvesTo => "resolves-to",
Self::RefImports => "ref/imports",
Self::IsImplementation => "is-implementation",
Self::Overrides => "overrides",
Self::FFICall => "ffi/call",
}
}
pub fn ppr_weight(self) -> f32 {
match self {
Self::RefCall => 1.00,
Self::DefinesBinding => 0.70,
Self::Exports => 0.60,
Self::Depends => 0.50,
Self::ResolvesTo => 0.50,
Self::RefImports => 0.40,
Self::IsImplementation => 0.40,
Self::Overrides => 0.30,
Self::FFICall => 0.85,
}
}
#[allow(clippy::should_implement_trait)]
pub fn from_str(s: &str) -> Option<Self> {
match s {
"depends" => Some(Self::Depends),
"ref/call" => Some(Self::RefCall),
"defines/binding" => Some(Self::DefinesBinding),
"exports" => Some(Self::Exports),
"resolves-to" => Some(Self::ResolvesTo),
"ref/imports" => Some(Self::RefImports),
"is-implementation" => Some(Self::IsImplementation),
"overrides" => Some(Self::Overrides),
"ffi/call" => Some(Self::FFICall),
_ => None,
}
}
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct Node {
pub id: NodeId,
pub vname: VName,
pub kind: String,
pub package: String,
pub line: Option<u32>,
}
impl Node {
pub fn new(vname: VName, kind: impl Into<String>) -> Self {
let id = vname.id();
Self {
id,
vname,
kind: kind.into(),
package: String::new(),
line: None,
}
}
pub fn with_package(mut self, package: impl Into<String>) -> Self {
self.package = package.into();
self
}
pub fn with_line(mut self, line: u32) -> Self {
self.line = Some(line);
self
}
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct Edge {
pub src: NodeId,
pub dst: NodeId,
pub kind: EdgeKind,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub confidence: Option<u8>,
}
impl Edge {
pub fn new(src: NodeId, dst: NodeId, kind: EdgeKind) -> Self {
Self {
src,
dst,
kind,
confidence: None,
}
}
pub fn ffi_call(src: NodeId, dst: NodeId, confidence: u8) -> Self {
debug_assert!(
confidence <= 100,
"confidence must be 0..=100, got {confidence}"
);
Self {
src,
dst,
kind: EdgeKind::FFICall,
confidence: Some(confidence),
}
}
}
#[cfg(test)]
mod tests {
use super::*;
fn sample_vname() -> VName {
VName::new(
"github.com/raj-rkv/travsr",
"main",
"crates/travsr-core/src/lib.rs",
"rust",
"fn:sample",
)
}
#[test]
fn vname_round_trips_through_serde_json() {
let v = sample_vname();
let json = serde_json::to_string(&v).unwrap();
let back: VName = serde_json::from_str(&json).unwrap();
assert_eq!(v, back);
}
#[test]
fn vname_id_is_deterministic() {
assert_eq!(sample_vname().id(), sample_vname().id());
}
#[test]
fn vname_id_differs_on_any_field_change() {
let base = sample_vname();
let mut other = base.clone();
other.signature = "fn:different".into();
assert_ne!(base.id(), other.id());
}
#[test]
fn edge_kind_round_trips_through_string() {
for kind in [
EdgeKind::Depends,
EdgeKind::RefCall,
EdgeKind::DefinesBinding,
EdgeKind::Exports,
EdgeKind::ResolvesTo,
EdgeKind::RefImports,
EdgeKind::IsImplementation,
EdgeKind::Overrides,
EdgeKind::FFICall,
] {
assert_eq!(EdgeKind::from_str(kind.as_str()), Some(kind));
}
}
#[test]
fn ppr_weights_are_ordered_by_semantic_strength() {
assert!(EdgeKind::RefCall.ppr_weight() > EdgeKind::DefinesBinding.ppr_weight());
assert!(EdgeKind::DefinesBinding.ppr_weight() > EdgeKind::Exports.ppr_weight());
assert!(EdgeKind::Exports.ppr_weight() > EdgeKind::Depends.ppr_weight());
assert_eq!(
EdgeKind::Depends.ppr_weight(),
EdgeKind::ResolvesTo.ppr_weight()
);
assert!(EdgeKind::Depends.ppr_weight() > EdgeKind::RefImports.ppr_weight());
assert_eq!(
EdgeKind::RefImports.ppr_weight(),
EdgeKind::IsImplementation.ppr_weight()
);
assert!(EdgeKind::IsImplementation.ppr_weight() > EdgeKind::Overrides.ppr_weight());
}
#[test]
fn ppr_weights_are_positive_and_at_most_one() {
for kind in [
EdgeKind::Depends,
EdgeKind::RefCall,
EdgeKind::DefinesBinding,
EdgeKind::Exports,
EdgeKind::ResolvesTo,
EdgeKind::RefImports,
EdgeKind::IsImplementation,
EdgeKind::Overrides,
EdgeKind::FFICall,
] {
let w = kind.ppr_weight();
assert!(
w > 0.0 && w <= 1.0,
"weight {w} for {kind:?} must be in (0, 1]"
);
}
}
#[test]
fn node_id_matches_vname_id() {
let v = sample_vname();
let node = Node::new(v.clone(), "function");
assert_eq!(node.id, v.id());
}
#[test]
fn version_byte_produces_different_id_than_unversioned() {
let v = sample_vname();
let versioned_id = v.id();
let mut hasher = blake3::Hasher::new();
hasher.update(v.corpus.as_bytes());
hasher.update(b"\0");
hasher.update(v.root.as_bytes());
hasher.update(b"\0");
hasher.update(v.path.as_bytes());
hasher.update(b"\0");
hasher.update(v.language.as_bytes());
hasher.update(b"\0");
hasher.update(v.signature.as_bytes());
let digest = hasher.finalize();
let mut buf = [0u8; 8];
buf.copy_from_slice(&digest.as_bytes()[..8]);
let legacy_id = NodeId(u64::from_le_bytes(buf));
assert_ne!(
versioned_id, legacy_id,
"RFC-002 version byte + length-prefix must produce a different NodeId than the legacy NUL-separated hash"
);
}
#[test]
fn canonical_corpus_handles_https_with_git_suffix() {
assert_eq!(
canonical_corpus("https://github.com/raj-rkv/travsr.git"),
"github.com/raj-rkv/travsr"
);
}
#[test]
fn canonical_corpus_handles_https_without_git_suffix() {
assert_eq!(
canonical_corpus("https://github.com/raj-rkv/travsr"),
"github.com/raj-rkv/travsr"
);
}
#[test]
fn canonical_corpus_handles_scp_style_ssh() {
assert_eq!(
canonical_corpus("git@github.com:raj-rkv/travsr.git"),
"github.com/raj-rkv/travsr"
);
assert_eq!(
canonical_corpus("git@github.com:raj-rkv/travsr"),
"github.com/raj-rkv/travsr"
);
}
#[test]
fn canonical_corpus_handles_ssh_url() {
assert_eq!(
canonical_corpus("ssh://git@github.com/raj-rkv/travsr.git"),
"github.com/raj-rkv/travsr"
);
}
#[test]
fn canonical_corpus_handles_git_protocol() {
assert_eq!(
canonical_corpus("git://github.com/raj-rkv/travsr.git"),
"github.com/raj-rkv/travsr"
);
}
#[test]
fn canonical_corpus_lowercases_input() {
assert_eq!(
canonical_corpus("HTTPS://GITHUB.COM/Raj-Rkv/Travsr.GIT"),
"github.com/raj-rkv/travsr"
);
}
#[test]
fn canonical_corpus_strips_port() {
assert_eq!(
canonical_corpus("https://github.com:443/raj-rkv/travsr.git"),
"github.com/raj-rkv/travsr"
);
}
#[test]
fn canonical_corpus_strips_trailing_slash() {
assert_eq!(
canonical_corpus("https://github.com/raj-rkv/travsr/"),
"github.com/raj-rkv/travsr"
);
}
#[test]
fn canonical_corpus_gitlab() {
assert_eq!(
canonical_corpus("https://gitlab.com/acme/payments-api.git"),
"gitlab.com/acme/payments-api"
);
}
#[test]
fn canonical_corpus_local_uses_basename() {
let path = std::path::Path::new("/home/user/my-project");
assert_eq!(canonical_corpus_local(path), "local/my-project");
}
#[test]
fn canonical_corpus_local_sanitises_special_chars() {
let path = std::path::Path::new("/tmp/My Project (v2)");
let result = canonical_corpus_local(path);
assert!(result.starts_with("local/"));
assert!(!result.contains(' '), "spaces must be replaced");
assert!(!result.contains('('), "parens must be replaced");
}
#[test]
fn different_corpus_produces_non_colliding_node_ids() {
let v_repo_a = VName::new(
"github.com/acme/repo-a",
"",
"src/foo.ts",
"typescript",
"fn:bar",
);
let v_repo_b = VName::new(
"github.com/acme/repo-b",
"",
"src/foo.ts",
"typescript",
"fn:bar",
);
assert_ne!(
v_repo_a.id(),
v_repo_b.id(),
"different corpora must produce different NodeIds (cross-repo VName collision)"
);
}
#[test]
fn language_from_extension_covers_all_variants() {
assert_eq!(Language::from_extension("ts"), Some(Language::TypeScript));
assert_eq!(Language::from_extension("tsx"), Some(Language::TypeScript));
assert_eq!(Language::from_extension("mts"), Some(Language::TypeScript));
assert_eq!(Language::from_extension("cts"), Some(Language::TypeScript));
assert_eq!(Language::from_extension("rs"), Some(Language::Rust));
assert_eq!(Language::from_extension("py"), Some(Language::Python));
assert_eq!(Language::from_extension("pyi"), Some(Language::Python));
assert_eq!(Language::from_extension("go"), Some(Language::Go));
assert_eq!(Language::from_extension("js"), None);
assert_eq!(Language::from_extension(""), None);
}
#[test]
fn language_as_str_and_from_str_round_trip() {
for lang in [
Language::TypeScript,
Language::Rust,
Language::Python,
Language::Go,
] {
let s = lang.as_str();
assert_eq!(
Language::from_str(s),
Some(lang),
"round-trip failed for {s}"
);
}
}
#[test]
fn language_as_str_values_are_lowercase() {
assert_eq!(Language::TypeScript.as_str(), "typescript");
assert_eq!(Language::Rust.as_str(), "rust");
assert_eq!(Language::Python.as_str(), "python");
assert_eq!(Language::Go.as_str(), "go");
}
#[test]
fn language_from_str_returns_none_for_unknown() {
assert_eq!(Language::from_str("go"), Some(Language::Go));
assert_eq!(Language::from_str("TypeScript"), None);
assert_eq!(Language::from_str(""), None);
}
#[test]
fn language_field_prevents_cross_language_vname_collision() {
let ts = VName::new("github.com/a/b", "", "src/main.rs", "typescript", "fn:main");
let rs = VName::new("github.com/a/b", "", "src/main.rs", "rust", "fn:main");
assert_ne!(
ts.id(),
rs.id(),
"different language fields must produce different NodeIds"
);
}
#[test]
fn node_with_package_does_not_change_id() {
let vname = VName::new("github.com/a/b", "", "src/lib.rs", "rust", "fn:open");
let plain = Node::new(vname.clone(), "function");
let packaged = Node::new(vname, "function").with_package("my-crate");
assert_eq!(plain.id, packaged.id, "package must not affect NodeId");
assert_eq!(packaged.package, "my-crate");
assert_eq!(plain.package, "");
}
#[test]
fn edge_ffi_call_builder_sets_confidence() {
let e = Edge::ffi_call(NodeId(1), NodeId(2), 90);
assert_eq!(e.kind, EdgeKind::FFICall);
assert_eq!(e.confidence, Some(90));
}
#[test]
fn edge_new_has_no_confidence() {
let e = Edge::new(NodeId(1), NodeId(2), EdgeKind::RefCall);
assert_eq!(e.confidence, None);
}
#[test]
fn edge_kind_ffi_call_roundtrip() {
assert_eq!(EdgeKind::FFICall.as_str(), "ffi/call");
assert_eq!(EdgeKind::from_str("ffi/call"), Some(EdgeKind::FFICall));
}
#[test]
fn ppr_weight_ffi_call_is_between_refcall_and_defines_binding() {
assert!(EdgeKind::FFICall.ppr_weight() < EdgeKind::RefCall.ppr_weight());
assert!(EdgeKind::FFICall.ppr_weight() > EdgeKind::DefinesBinding.ppr_weight());
assert!((EdgeKind::FFICall.ppr_weight() - 0.85_f32).abs() < 1e-6);
}
#[test]
fn edge_serde_roundtrip_with_confidence() {
let e = Edge::ffi_call(NodeId(42), NodeId(99), 75);
let json = serde_json::to_string(&e).unwrap();
assert!(json.contains("\"confidence\":75"));
let e2: Edge = serde_json::from_str(&json).unwrap();
assert_eq!(e2.confidence, Some(75));
}
#[test]
fn edge_serde_roundtrip_without_confidence_field() {
let json = r#"{"src":1,"dst":2,"kind":"ref/call"}"#;
let e: Edge = serde_json::from_str(json).unwrap();
assert_eq!(e.confidence, None);
}
}