tokmd_redact/lib.rs
1//! # tokmd-redact
2//!
3//! **Tier 0.5 (Utilities)**
4//!
5//! This crate provides redaction utilities for `tokmd` receipts.
6//! It's the canonical source for hashing functions used to redact sensitive
7//! information (paths, patterns) in output while preserving useful structure.
8//!
9//! ## What belongs here
10//! * Path redaction (hash while preserving extension)
11//! * String hashing for redaction
12//!
13//! ## What does NOT belong here
14//! * General-purpose file hashing (see `tokmd-content`)
15//! * Integrity hashing (see `tokmd-analysis`)
16
17use std::path::Path;
18
19/// Clean a path by normalizing separators and resolving `.` and `./` segments.
20///
21/// This ensures that logically identical paths produce the same hash.
22/// For example, `./src/lib.rs` and `src/lib.rs` will produce the same hash.
23fn clean_path(s: &str) -> String {
24 let mut normalized = s.replace('\\', "/");
25 // Strip leading ./
26 while let Some(stripped) = normalized.strip_prefix("./") {
27 normalized = stripped.to_string();
28 }
29 // Remove interior /./
30 while normalized.contains("/./") {
31 normalized = normalized.replace("/./", "/");
32 }
33 // Remove trailing /.
34 if normalized.ends_with("/.") {
35 normalized.truncate(normalized.len() - 2);
36 }
37 normalized
38}
39
40/// Compute a short (16-character) BLAKE3 hash of a string.
41///
42/// This is used for redacting sensitive strings like excluded patterns
43/// or module names in receipts.
44///
45/// Path separators are normalized to forward slashes before hashing
46/// to ensure consistent hashes across operating systems. Redundant `.`
47/// segments are also resolved so that logically identical paths hash
48/// identically.
49///
50/// # Example
51///
52/// ```
53/// use tokmd_redact::short_hash;
54///
55/// let hash = short_hash("my-secret-path");
56/// assert_eq!(hash.len(), 16);
57///
58/// // Cross-platform consistency: same hash regardless of separator
59/// assert_eq!(short_hash("src\\lib"), short_hash("src/lib"));
60/// ```
61///
62/// Dot-prefix and interior-dot normalization:
63///
64/// ```
65/// use tokmd_redact::short_hash;
66///
67/// // Leading "./" is stripped before hashing
68/// assert_eq!(short_hash("./src/lib"), short_hash("src/lib"));
69///
70/// // Interior "/." segments are resolved
71/// assert_eq!(short_hash("crates/./foo"), short_hash("crates/foo"));
72///
73/// // Different inputs always produce different hashes
74/// assert_ne!(short_hash("alpha"), short_hash("beta"));
75/// ```
76pub fn short_hash(s: &str) -> String {
77 let cleaned = clean_path(s);
78 let mut hex = blake3::hash(cleaned.as_bytes()).to_hex().to_string();
79 hex.truncate(16);
80 hex
81}
82
83/// Redact a path by hashing it while preserving the file extension.
84///
85/// This allows redacted paths to still be recognizable by file type
86/// while hiding the actual path structure.
87///
88/// Path separators are normalized to forward slashes before hashing
89/// to ensure consistent hashes across operating systems.
90///
91/// # Example
92///
93/// ```
94/// use tokmd_redact::redact_path;
95///
96/// let redacted = redact_path("src/secrets/config.json");
97/// assert!(redacted.ends_with(".json"));
98/// assert_eq!(redacted.len(), 16 + 1 + 4); // hash + dot + "json"
99///
100/// // Cross-platform consistency: same hash regardless of separator
101/// assert_eq!(redact_path("src\\main.rs"), redact_path("src/main.rs"));
102/// ```
103///
104/// Files without an extension produce a bare 16-character hash:
105///
106/// ```
107/// use tokmd_redact::redact_path;
108///
109/// let bare = redact_path("Makefile");
110/// assert_eq!(bare.len(), 16);
111/// assert!(!bare.contains('.'));
112///
113/// // Double extensions: only the final extension is preserved
114/// let gz = redact_path("archive.tar.gz");
115/// assert!(gz.ends_with(".gz"));
116/// ```
117pub fn redact_path(path: &str) -> String {
118 let cleaned = clean_path(path);
119 let ext = Path::new(&cleaned)
120 .extension()
121 .and_then(|e| e.to_str())
122 .unwrap_or("");
123 let mut out = short_hash(&cleaned);
124 if !ext.is_empty() {
125 out.push('.');
126 out.push_str(ext);
127 }
128 out
129}
130
131#[cfg(test)]
132mod tests {
133 use super::*;
134
135 #[test]
136 fn test_short_hash_length() {
137 let hash = short_hash("test");
138 assert_eq!(hash.len(), 16);
139 }
140
141 #[test]
142 fn test_short_hash_deterministic() {
143 let h1 = short_hash("same input");
144 let h2 = short_hash("same input");
145 assert_eq!(h1, h2);
146 }
147
148 #[test]
149 fn test_short_hash_different_inputs() {
150 let h1 = short_hash("input1");
151 let h2 = short_hash("input2");
152 assert_ne!(h1, h2);
153 }
154
155 #[test]
156 fn test_redact_path_preserves_extension() {
157 let redacted = redact_path("src/lib.rs");
158 assert!(redacted.ends_with(".rs"));
159 }
160
161 #[test]
162 fn test_redact_path_no_extension() {
163 let redacted = redact_path("Makefile");
164 assert_eq!(redacted.len(), 16);
165 assert!(!redacted.contains('.'));
166 }
167
168 #[test]
169 fn test_redact_path_double_extension() {
170 // Only preserves final extension
171 let redacted = redact_path("archive.tar.gz");
172 assert!(redacted.ends_with(".gz"));
173 }
174
175 #[test]
176 fn test_redact_path_deterministic() {
177 let r1 = redact_path("src/main.rs");
178 let r2 = redact_path("src/main.rs");
179 assert_eq!(r1, r2);
180 }
181
182 #[test]
183 fn test_short_hash_normalizes_separators() {
184 // Same logical path with different separators should hash identically
185 let h1 = short_hash("src/lib");
186 let h2 = short_hash("src\\lib");
187 assert_eq!(h1, h2);
188 }
189
190 #[test]
191 fn test_short_hash_normalizes_mixed_separators() {
192 let h1 = short_hash("crates/foo/src/lib");
193 let h2 = short_hash("crates\\foo\\src\\lib");
194 let h3 = short_hash("crates/foo\\src/lib");
195 assert_eq!(h1, h2);
196 assert_eq!(h2, h3);
197 }
198
199 #[test]
200 fn test_redact_path_normalizes_separators() {
201 let r1 = redact_path("src/main.rs");
202 let r2 = redact_path("src\\main.rs");
203 assert_eq!(r1, r2);
204 }
205
206 #[test]
207 fn test_redact_path_normalizes_deep_paths() {
208 let r1 = redact_path("crates/tokmd/src/commands/run.rs");
209 let r2 = redact_path("crates\\tokmd\\src\\commands\\run.rs");
210 assert_eq!(r1, r2);
211 assert!(r1.ends_with(".rs"));
212 }
213
214 #[test]
215 fn test_short_hash_normalizes_dot_prefix() {
216 assert_eq!(short_hash("src/lib.rs"), short_hash("./src/lib.rs"));
217 }
218
219 #[test]
220 fn test_short_hash_normalizes_interior_dot_segments() {
221 assert_eq!(
222 short_hash("crates/foo/./src/lib.rs"),
223 short_hash("crates/foo/src/lib.rs")
224 );
225 }
226
227 #[test]
228 fn test_redact_path_normalizes_dot_prefix() {
229 assert_eq!(redact_path("src/main.rs"), redact_path("./src/main.rs"));
230 }
231}