1use std::hash::Hasher;
2use tree_sitter::Node;
3use xxhash_rust::xxh3::Xxh3;
4
5pub fn content_hash(content: &str) -> String {
6 content_hash_bytes(content.as_bytes())
7}
8
9pub fn content_hash_bytes(content: &[u8]) -> String {
10 format!("{:016x}", xxhash_rust::xxh3::xxh3_64(content))
11}
12
13pub fn short_hash(content: &str, length: usize) -> String {
14 let hash = content_hash(content);
15 hash[..length.min(hash.len())].to_string()
16}
17
18pub fn structural_hash(node: Node, source: &[u8]) -> String {
23 let mut hasher = Xxh3::new();
24 hash_structural_tokens(node, source, &mut hasher);
25 format!("{:016x}", hasher.finish())
26}
27
28pub fn structural_hash_excluding_range(
32 node: Node,
33 source: &[u8],
34 exclude_start: usize,
35 exclude_end: usize,
36) -> String {
37 let mut hasher = Xxh3::new();
38 hash_structural_tokens_excluding(node, source, &mut hasher, exclude_start, exclude_end);
39 format!("{:016x}", hasher.finish())
40}
41
42fn hash_structural_tokens(root: Node, source: &[u8], hasher: &mut Xxh3) {
47 let mut worklist = vec![root];
48 while let Some(node) = worklist.pop() {
49 let kind = node.kind();
50
51 if is_comment_node(kind) {
52 continue;
53 }
54
55 if node.child_count() == 0 {
56 let start = node.start_byte();
58 let end = node.end_byte();
59 if start < end && end <= source.len() {
60 let bytes = &source[start..end];
61 let trimmed = trim_bytes(bytes);
63 if !trimmed.is_empty() {
64 hasher.write(trimmed);
65 hasher.write(b" ");
66 }
67 }
68 } else {
69 hasher.write(kind.as_bytes());
72 hasher.write(b":");
73 let mut cursor = node.walk();
74 let children: Vec<_> = node.children(&mut cursor).collect();
75 for child in children.into_iter().rev() {
76 worklist.push(child);
77 }
78 }
79 }
80}
81
82fn hash_structural_tokens_excluding(
85 root: Node,
86 source: &[u8],
87 hasher: &mut Xxh3,
88 exclude_start: usize,
89 exclude_end: usize,
90) {
91 let mut worklist = vec![root];
92 while let Some(node) = worklist.pop() {
93 let kind = node.kind();
94
95 if is_comment_node(kind) {
96 continue;
97 }
98
99 if node.child_count() == 0 {
100 let start = node.start_byte();
101 let end = node.end_byte();
102 if start < exclude_end && end > exclude_start {
104 continue;
105 }
106 if start < end && end <= source.len() {
107 let bytes = &source[start..end];
108 let trimmed = trim_bytes(bytes);
109 if !trimmed.is_empty() {
110 hasher.write(trimmed);
111 hasher.write(b" ");
112 }
113 }
114 } else {
115 hasher.write(kind.as_bytes());
116 hasher.write(b":");
117 let mut cursor = node.walk();
118 let children: Vec<_> = node.children(&mut cursor).collect();
119 for child in children.into_iter().rev() {
120 worklist.push(child);
121 }
122 }
123 }
124}
125
126#[inline]
128fn trim_bytes(bytes: &[u8]) -> &[u8] {
129 let start = bytes.iter().position(|b| !b.is_ascii_whitespace()).unwrap_or(bytes.len());
130 let end = bytes.iter().rposition(|b| !b.is_ascii_whitespace()).map_or(start, |p| p + 1);
131 &bytes[start..end]
132}
133
134fn is_comment_node(kind: &str) -> bool {
135 matches!(
136 kind,
137 "comment" | "line_comment" | "block_comment" | "doc_comment" | "tag_comment"
138 )
139}
140
141#[cfg(test)]
142mod tests {
143 use super::*;
144
145 #[test]
146 fn test_content_hash_deterministic() {
147 let h1 = content_hash("hello world");
148 let h2 = content_hash("hello world");
149 assert_eq!(h1, h2);
150 }
151
152 #[test]
153 fn test_content_hash_hex_format() {
154 let h = content_hash("test");
155 assert_eq!(h.len(), 16); assert!(h.chars().all(|c| c.is_ascii_hexdigit()));
157 }
158
159 #[test]
160 fn test_content_hash_bytes_matches_string_hash() {
161 assert_eq!(content_hash_bytes(b"test"), content_hash("test"));
162 }
163
164 #[test]
165 fn test_short_hash() {
166 let h = short_hash("test", 8);
167 assert_eq!(h.len(), 8);
168 }
169}