1use std::hash::Hasher;
2use tree_sitter::Node;
3use xxhash_rust::xxh3::Xxh3;
4
5pub fn content_hash(content: &str) -> String {
6 format!("{:016x}", xxhash_rust::xxh3::xxh3_64(content.as_bytes()))
7}
8
9pub fn short_hash(content: &str, length: usize) -> String {
10 let hash = content_hash(content);
11 hash[..length.min(hash.len())].to_string()
12}
13
14pub fn structural_hash(node: Node, source: &[u8]) -> String {
19 let mut hasher = Xxh3::new();
20 hash_structural_tokens(node, source, &mut hasher);
21 format!("{:016x}", hasher.finish())
22}
23
24pub fn structural_hash_excluding_range(
28 node: Node,
29 source: &[u8],
30 exclude_start: usize,
31 exclude_end: usize,
32) -> String {
33 let mut hasher = Xxh3::new();
34 hash_structural_tokens_excluding(node, source, &mut hasher, exclude_start, exclude_end);
35 format!("{:016x}", hasher.finish())
36}
37
38fn hash_structural_tokens(root: Node, source: &[u8], hasher: &mut Xxh3) {
43 let mut worklist = vec![root];
44 while let Some(node) = worklist.pop() {
45 let kind = node.kind();
46
47 if is_comment_node(kind) {
48 continue;
49 }
50
51 if node.child_count() == 0 {
52 let start = node.start_byte();
54 let end = node.end_byte();
55 if start < end && end <= source.len() {
56 let bytes = &source[start..end];
57 let trimmed = trim_bytes(bytes);
59 if !trimmed.is_empty() {
60 hasher.write(trimmed);
61 hasher.write(b" ");
62 }
63 }
64 } else {
65 hasher.write(kind.as_bytes());
68 hasher.write(b":");
69 let mut cursor = node.walk();
70 let children: Vec<_> = node.children(&mut cursor).collect();
71 for child in children.into_iter().rev() {
72 worklist.push(child);
73 }
74 }
75 }
76}
77
78fn hash_structural_tokens_excluding(
81 root: Node,
82 source: &[u8],
83 hasher: &mut Xxh3,
84 exclude_start: usize,
85 exclude_end: usize,
86) {
87 let mut worklist = vec![root];
88 while let Some(node) = worklist.pop() {
89 let kind = node.kind();
90
91 if is_comment_node(kind) {
92 continue;
93 }
94
95 if node.child_count() == 0 {
96 let start = node.start_byte();
97 let end = node.end_byte();
98 if start < exclude_end && end > exclude_start {
100 continue;
101 }
102 if start < end && end <= source.len() {
103 let bytes = &source[start..end];
104 let trimmed = trim_bytes(bytes);
105 if !trimmed.is_empty() {
106 hasher.write(trimmed);
107 hasher.write(b" ");
108 }
109 }
110 } else {
111 hasher.write(kind.as_bytes());
112 hasher.write(b":");
113 let mut cursor = node.walk();
114 let children: Vec<_> = node.children(&mut cursor).collect();
115 for child in children.into_iter().rev() {
116 worklist.push(child);
117 }
118 }
119 }
120}
121
122#[inline]
124fn trim_bytes(bytes: &[u8]) -> &[u8] {
125 let start = bytes.iter().position(|b| !b.is_ascii_whitespace()).unwrap_or(bytes.len());
126 let end = bytes.iter().rposition(|b| !b.is_ascii_whitespace()).map_or(start, |p| p + 1);
127 &bytes[start..end]
128}
129
130fn is_comment_node(kind: &str) -> bool {
131 matches!(
132 kind,
133 "comment" | "line_comment" | "block_comment" | "doc_comment" | "tag_comment"
134 )
135}
136
137#[cfg(test)]
138mod tests {
139 use super::*;
140
141 #[test]
142 fn test_content_hash_deterministic() {
143 let h1 = content_hash("hello world");
144 let h2 = content_hash("hello world");
145 assert_eq!(h1, h2);
146 }
147
148 #[test]
149 fn test_content_hash_hex_format() {
150 let h = content_hash("test");
151 assert_eq!(h.len(), 16); assert!(h.chars().all(|c| c.is_ascii_hexdigit()));
153 }
154
155 #[test]
156 fn test_short_hash() {
157 let h = short_hash("test", 8);
158 assert_eq!(h.len(), 8);
159 }
160}