use sha1::{Digest, Sha1};
pub fn code_symbol_node_id(
project_id: &str,
language: &str,
file_path: &str,
fqn: &str,
) -> String {
let fqn_full = format!("{language}:{file_path}:{fqn}");
let to_hash = format!("{project_id}:{fqn_full}");
let mut hasher = Sha1::new();
hasher.update(to_hash.as_bytes());
let digest_bytes = hasher.finalize();
let hex = hex_encode(&digest_bytes);
format!("node-{}", &hex[..16])
}
fn hex_encode(bytes: &[u8]) -> String {
let mut s = String::with_capacity(bytes.len() * 2);
for b in bytes {
s.push_str(&format!("{b:02x}"));
}
s
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_deterministic_for_same_inputs() {
let a = code_symbol_node_id("proj1", "python", "a/b/c.py", "a.b.c.f");
let b = code_symbol_node_id("proj1", "python", "a/b/c.py", "a.b.c.f");
assert_eq!(a, b);
}
#[test]
fn test_id_format_is_node_prefix_plus_16_hex() {
let id = code_symbol_node_id("proj1", "python", "a/b/c.py", "a.b.c.f");
assert!(id.starts_with("node-"));
let hex_part = &id[5..];
assert_eq!(hex_part.len(), 16);
assert!(
hex_part.chars().all(|c| c.is_ascii_hexdigit() && (c.is_ascii_digit() || c.is_ascii_lowercase())),
"expected lowercase hex chars only, got {hex_part}"
);
}
#[test]
fn test_different_projects_diverge() {
let a = code_symbol_node_id("proj1", "python", "a/b/c.py", "a.b.c.f");
let b = code_symbol_node_id("proj2", "python", "a/b/c.py", "a.b.c.f");
assert_ne!(a, b);
}
#[test]
fn test_different_languages_diverge() {
let a = code_symbol_node_id("proj1", "python", "a/b/c.py", "a.b.c.f");
let b = code_symbol_node_id("proj1", "java", "a/b/c.py", "a.b.c.f");
assert_ne!(a, b);
}
#[test]
fn test_different_files_diverge() {
let a = code_symbol_node_id("proj1", "python", "a/b/c.py", "a.b.c.f");
let b = code_symbol_node_id("proj1", "python", "a/b/d.py", "a.b.c.f");
assert_ne!(a, b);
}
#[test]
fn test_different_fqns_diverge() {
let a = code_symbol_node_id("proj1", "python", "a/b/c.py", "a.b.c.f");
let b = code_symbol_node_id("proj1", "python", "a/b/c.py", "a.b.c.g");
assert_ne!(a, b);
}
}