use std::hash::Hasher;
use tree_sitter::Node;
use xxhash_rust::xxh3::Xxh3;
pub fn content_hash(content: &str) -> String {
format!("{:016x}", xxhash_rust::xxh3::xxh3_64(content.as_bytes()))
}
pub fn short_hash(content: &str, length: usize) -> String {
let hash = content_hash(content);
hash[..length.min(hash.len())].to_string()
}
pub fn structural_hash(node: Node, source: &[u8]) -> String {
let mut hasher = Xxh3::new();
hash_structural_tokens(node, source, &mut hasher);
format!("{:016x}", hasher.finish())
}
pub fn structural_hash_excluding_range(
node: Node,
source: &[u8],
exclude_start: usize,
exclude_end: usize,
) -> String {
let mut hasher = Xxh3::new();
hash_structural_tokens_excluding(node, source, &mut hasher, exclude_start, exclude_end);
format!("{:016x}", hasher.finish())
}
fn hash_structural_tokens(node: Node, source: &[u8], hasher: &mut Xxh3) {
let kind = node.kind();
if is_comment_node(kind) {
return;
}
if node.child_count() == 0 {
let start = node.start_byte();
let end = node.end_byte();
if start < end && end <= source.len() {
let bytes = &source[start..end];
let trimmed = trim_bytes(bytes);
if !trimmed.is_empty() {
hasher.write(trimmed);
hasher.write(b" ");
}
}
} else {
hasher.write(kind.as_bytes());
hasher.write(b":");
let mut cursor = node.walk();
for child in node.children(&mut cursor) {
hash_structural_tokens(child, source, hasher);
}
}
}
fn hash_structural_tokens_excluding(
node: Node,
source: &[u8],
hasher: &mut Xxh3,
exclude_start: usize,
exclude_end: usize,
) {
let kind = node.kind();
if is_comment_node(kind) {
return;
}
if node.child_count() == 0 {
let start = node.start_byte();
let end = node.end_byte();
if start < exclude_end && end > exclude_start {
return;
}
if start < end && end <= source.len() {
let bytes = &source[start..end];
let trimmed = trim_bytes(bytes);
if !trimmed.is_empty() {
hasher.write(trimmed);
hasher.write(b" ");
}
}
} else {
hasher.write(kind.as_bytes());
hasher.write(b":");
let mut cursor = node.walk();
for child in node.children(&mut cursor) {
hash_structural_tokens_excluding(child, source, hasher, exclude_start, exclude_end);
}
}
}
#[inline]
fn trim_bytes(bytes: &[u8]) -> &[u8] {
let start = bytes.iter().position(|b| !b.is_ascii_whitespace()).unwrap_or(bytes.len());
let end = bytes.iter().rposition(|b| !b.is_ascii_whitespace()).map_or(start, |p| p + 1);
&bytes[start..end]
}
fn is_comment_node(kind: &str) -> bool {
matches!(
kind,
"comment" | "line_comment" | "block_comment" | "doc_comment" | "tag_comment"
)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_content_hash_deterministic() {
let h1 = content_hash("hello world");
let h2 = content_hash("hello world");
assert_eq!(h1, h2);
}
#[test]
fn test_content_hash_hex_format() {
let h = content_hash("test");
assert_eq!(h.len(), 16); assert!(h.chars().all(|c| c.is_ascii_hexdigit()));
}
#[test]
fn test_short_hash() {
let h = short_hash("test", 8);
assert_eq!(h.len(), 8);
}
}