1use std::hash::Hasher;
2use tree_sitter::Node;
3use xxhash_rust::xxh3::Xxh3;
4
5pub fn content_hash(content: &str) -> String {
6 format!("{:016x}", xxhash_rust::xxh3::xxh3_64(content.as_bytes()))
7}
8
9pub fn short_hash(content: &str, length: usize) -> String {
10 let hash = content_hash(content);
11 hash[..length.min(hash.len())].to_string()
12}
13
14pub fn structural_hash(node: Node, source: &[u8]) -> String {
19 let mut hasher = Xxh3::new();
20 hash_structural_tokens(node, source, &mut hasher);
21 format!("{:016x}", hasher.finish())
22}
23
24pub fn structural_hash_excluding_range(
28 node: Node,
29 source: &[u8],
30 exclude_start: usize,
31 exclude_end: usize,
32) -> String {
33 let mut hasher = Xxh3::new();
34 hash_structural_tokens_excluding(node, source, &mut hasher, exclude_start, exclude_end);
35 format!("{:016x}", hasher.finish())
36}
37
38fn hash_structural_tokens(node: Node, source: &[u8], hasher: &mut Xxh3) {
43 let kind = node.kind();
44
45 if is_comment_node(kind) {
46 return;
47 }
48
49 if node.child_count() == 0 {
50 let start = node.start_byte();
52 let end = node.end_byte();
53 if start < end && end <= source.len() {
54 let bytes = &source[start..end];
55 let trimmed = trim_bytes(bytes);
57 if !trimmed.is_empty() {
58 hasher.write(trimmed);
59 hasher.write(b" ");
60 }
61 }
62 } else {
63 hasher.write(kind.as_bytes());
66 hasher.write(b":");
67 let mut cursor = node.walk();
68 for child in node.children(&mut cursor) {
69 hash_structural_tokens(child, source, hasher);
70 }
71 }
72}
73
74fn hash_structural_tokens_excluding(
77 node: Node,
78 source: &[u8],
79 hasher: &mut Xxh3,
80 exclude_start: usize,
81 exclude_end: usize,
82) {
83 let kind = node.kind();
84
85 if is_comment_node(kind) {
86 return;
87 }
88
89 if node.child_count() == 0 {
90 let start = node.start_byte();
91 let end = node.end_byte();
92 if start < exclude_end && end > exclude_start {
94 return;
95 }
96 if start < end && end <= source.len() {
97 let bytes = &source[start..end];
98 let trimmed = trim_bytes(bytes);
99 if !trimmed.is_empty() {
100 hasher.write(trimmed);
101 hasher.write(b" ");
102 }
103 }
104 } else {
105 hasher.write(kind.as_bytes());
106 hasher.write(b":");
107 let mut cursor = node.walk();
108 for child in node.children(&mut cursor) {
109 hash_structural_tokens_excluding(child, source, hasher, exclude_start, exclude_end);
110 }
111 }
112}
113
114#[inline]
116fn trim_bytes(bytes: &[u8]) -> &[u8] {
117 let start = bytes.iter().position(|b| !b.is_ascii_whitespace()).unwrap_or(bytes.len());
118 let end = bytes.iter().rposition(|b| !b.is_ascii_whitespace()).map_or(start, |p| p + 1);
119 &bytes[start..end]
120}
121
122fn is_comment_node(kind: &str) -> bool {
123 matches!(
124 kind,
125 "comment" | "line_comment" | "block_comment" | "doc_comment" | "tag_comment"
126 )
127}
128
129#[cfg(test)]
130mod tests {
131 use super::*;
132
133 #[test]
134 fn test_content_hash_deterministic() {
135 let h1 = content_hash("hello world");
136 let h2 = content_hash("hello world");
137 assert_eq!(h1, h2);
138 }
139
140 #[test]
141 fn test_content_hash_hex_format() {
142 let h = content_hash("test");
143 assert_eq!(h.len(), 16); assert!(h.chars().all(|c| c.is_ascii_hexdigit()));
145 }
146
147 #[test]
148 fn test_short_hash() {
149 let h = short_hash("test", 8);
150 assert_eq!(h.len(), 8);
151 }
152}