Skip to main content

tokmd_redact/
lib.rs

1//! # tokmd-redact
2//!
3//! **Tier 0.5 (Utilities)**
4//!
5//! This crate provides redaction utilities for `tokmd` receipts.
6//! It's the canonical source for hashing functions used to redact sensitive
7//! information (paths, patterns) in output while preserving useful structure.
8//!
9//! ## What belongs here
10//! * Path redaction (hash while preserving extension)
11//! * String hashing for redaction
12//!
13//! ## What does NOT belong here
14//! * General-purpose file hashing (see `tokmd-content`)
15//! * Integrity hashing (see `tokmd-analysis`)
16
17use std::path::Path;
18
19/// Clean a path by normalizing separators and resolving `.` and `./` segments.
20///
21/// This ensures that logically identical paths produce the same hash.
22/// For example, `./src/lib.rs` and `src/lib.rs` will produce the same hash.
23fn clean_path(s: &str) -> String {
24    let mut normalized = s.replace('\\', "/");
25    // Strip leading ./
26    while let Some(stripped) = normalized.strip_prefix("./") {
27        normalized = stripped.to_string();
28    }
29    // Remove interior /./
30    while normalized.contains("/./") {
31        normalized = normalized.replace("/./", "/");
32    }
33    // Remove trailing /.
34    if normalized.ends_with("/.") {
35        normalized.truncate(normalized.len() - 2);
36    }
37    normalized
38}
39
40/// Compute a short (16-character) BLAKE3 hash of a string.
41///
42/// This is used for redacting sensitive strings like excluded patterns
43/// or module names in receipts.
44///
45/// Path separators are normalized to forward slashes before hashing
46/// to ensure consistent hashes across operating systems. Redundant `.`
47/// segments are also resolved so that logically identical paths hash
48/// identically.
49///
50/// # Example
51///
52/// ```
53/// use tokmd_redact::short_hash;
54///
55/// let hash = short_hash("my-secret-path");
56/// assert_eq!(hash.len(), 16);
57///
58/// // Cross-platform consistency: same hash regardless of separator
59/// assert_eq!(short_hash("src\\lib"), short_hash("src/lib"));
60/// ```
61///
62/// Dot-prefix and interior-dot normalization:
63///
64/// ```
65/// use tokmd_redact::short_hash;
66///
67/// // Leading "./" is stripped before hashing
68/// assert_eq!(short_hash("./src/lib"), short_hash("src/lib"));
69///
70/// // Interior "/." segments are resolved
71/// assert_eq!(short_hash("crates/./foo"), short_hash("crates/foo"));
72///
73/// // Different inputs always produce different hashes
74/// assert_ne!(short_hash("alpha"), short_hash("beta"));
75/// ```
76pub fn short_hash(s: &str) -> String {
77    let cleaned = clean_path(s);
78    let mut hex = blake3::hash(cleaned.as_bytes()).to_hex().to_string();
79    hex.truncate(16);
80    hex
81}
82
83/// Redact a path by hashing it while preserving the file extension.
84///
85/// This allows redacted paths to still be recognizable by file type
86/// while hiding the actual path structure.
87///
88/// Path separators are normalized to forward slashes before hashing
89/// to ensure consistent hashes across operating systems.
90///
91/// # Example
92///
93/// ```
94/// use tokmd_redact::redact_path;
95///
96/// let redacted = redact_path("src/secrets/config.json");
97/// assert!(redacted.ends_with(".json"));
98/// assert_eq!(redacted.len(), 16 + 1 + 4); // hash + dot + "json"
99///
100/// // Cross-platform consistency: same hash regardless of separator
101/// assert_eq!(redact_path("src\\main.rs"), redact_path("src/main.rs"));
102/// ```
103///
104/// Files without an extension produce a bare 16-character hash:
105///
106/// ```
107/// use tokmd_redact::redact_path;
108///
109/// let bare = redact_path("Makefile");
110/// assert_eq!(bare.len(), 16);
111/// assert!(!bare.contains('.'));
112///
113/// // Double extensions: only the final extension is preserved
114/// let gz = redact_path("archive.tar.gz");
115/// assert!(gz.ends_with(".gz"));
116/// ```
117pub fn redact_path(path: &str) -> String {
118    let cleaned = clean_path(path);
119    let ext = Path::new(&cleaned)
120        .extension()
121        .and_then(|e| e.to_str())
122        .unwrap_or("");
123    let mut out = short_hash(&cleaned);
124    if !ext.is_empty() {
125        out.push('.');
126        out.push_str(ext);
127    }
128    out
129}
130
131#[cfg(test)]
132mod tests {
133    use super::*;
134
135    #[test]
136    fn test_short_hash_length() {
137        let hash = short_hash("test");
138        assert_eq!(hash.len(), 16);
139    }
140
141    #[test]
142    fn test_short_hash_deterministic() {
143        let h1 = short_hash("same input");
144        let h2 = short_hash("same input");
145        assert_eq!(h1, h2);
146    }
147
148    #[test]
149    fn test_short_hash_different_inputs() {
150        let h1 = short_hash("input1");
151        let h2 = short_hash("input2");
152        assert_ne!(h1, h2);
153    }
154
155    #[test]
156    fn test_redact_path_preserves_extension() {
157        let redacted = redact_path("src/lib.rs");
158        assert!(redacted.ends_with(".rs"));
159    }
160
161    #[test]
162    fn test_redact_path_no_extension() {
163        let redacted = redact_path("Makefile");
164        assert_eq!(redacted.len(), 16);
165        assert!(!redacted.contains('.'));
166    }
167
168    #[test]
169    fn test_redact_path_double_extension() {
170        // Only preserves final extension
171        let redacted = redact_path("archive.tar.gz");
172        assert!(redacted.ends_with(".gz"));
173    }
174
175    #[test]
176    fn test_redact_path_deterministic() {
177        let r1 = redact_path("src/main.rs");
178        let r2 = redact_path("src/main.rs");
179        assert_eq!(r1, r2);
180    }
181
182    #[test]
183    fn test_short_hash_normalizes_separators() {
184        // Same logical path with different separators should hash identically
185        let h1 = short_hash("src/lib");
186        let h2 = short_hash("src\\lib");
187        assert_eq!(h1, h2);
188    }
189
190    #[test]
191    fn test_short_hash_normalizes_mixed_separators() {
192        let h1 = short_hash("crates/foo/src/lib");
193        let h2 = short_hash("crates\\foo\\src\\lib");
194        let h3 = short_hash("crates/foo\\src/lib");
195        assert_eq!(h1, h2);
196        assert_eq!(h2, h3);
197    }
198
199    #[test]
200    fn test_redact_path_normalizes_separators() {
201        let r1 = redact_path("src/main.rs");
202        let r2 = redact_path("src\\main.rs");
203        assert_eq!(r1, r2);
204    }
205
206    #[test]
207    fn test_redact_path_normalizes_deep_paths() {
208        let r1 = redact_path("crates/tokmd/src/commands/run.rs");
209        let r2 = redact_path("crates\\tokmd\\src\\commands\\run.rs");
210        assert_eq!(r1, r2);
211        assert!(r1.ends_with(".rs"));
212    }
213
214    #[test]
215    fn test_short_hash_normalizes_dot_prefix() {
216        assert_eq!(short_hash("src/lib.rs"), short_hash("./src/lib.rs"));
217    }
218
219    #[test]
220    fn test_short_hash_normalizes_interior_dot_segments() {
221        assert_eq!(
222            short_hash("crates/foo/./src/lib.rs"),
223            short_hash("crates/foo/src/lib.rs")
224        );
225    }
226
227    #[test]
228    fn test_redact_path_normalizes_dot_prefix() {
229        assert_eq!(redact_path("src/main.rs"), redact_path("./src/main.rs"));
230    }
231}