1use std::io::Write;
7
8use flate2::write::GzEncoder;
9use flate2::Compression;
10use sha2::{Digest, Sha256};
11
12pub(crate) fn truncate_for_log(s: &str, max_len: usize) -> &str {
19 if s.len() <= max_len {
20 s
21 } else {
22 let mut end = max_len;
24 while end > 0 && !s.is_char_boundary(end) {
25 end -= 1;
26 }
27 &s[..end]
28 }
29}
30
31pub fn compress_log(scrubbed_text: &str) -> Result<Vec<u8>, std::io::Error> {
52 let mut encoder = GzEncoder::new(Vec::new(), Compression::default());
53 encoder.write_all(scrubbed_text.as_bytes())?;
54 encoder.finish()
55}
56
57pub fn content_hash(compressed_bytes: &[u8]) -> String {
72 let mut hasher = Sha256::new();
73 hasher.update(compressed_bytes);
74 let digest = hasher.finalize();
75 digest.iter().fold(String::with_capacity(64), |mut acc, b| {
76 use std::fmt::Write;
77 let _ = write!(acc, "{b:02x}");
80 acc
81 })
82}
83
84#[cfg(test)]
89mod tests {
90 use super::*;
91
92 #[test]
93 fn test_truncate_for_log_short_string_unchanged() {
94 assert_eq!(truncate_for_log("hello", 10), "hello");
95 }
96
97 #[test]
98 fn test_truncate_for_log_exact_length_unchanged() {
99 assert_eq!(truncate_for_log("hello", 5), "hello");
100 }
101
102 #[test]
103 fn test_truncate_for_log_long_string_truncated() {
104 assert_eq!(truncate_for_log("hello world", 5), "hello");
105 }
106
107 #[test]
108 fn test_truncate_for_log_multibyte_safe() {
109 let s = "caf\u{00e9}";
112 let result = truncate_for_log(s, 4);
113 assert_eq!(result, "caf");
114 }
115
116 #[test]
117 fn test_truncate_for_log_empty_string() {
118 assert_eq!(truncate_for_log("", 10), "");
119 }
120
121 #[test]
122 fn test_truncate_for_log_zero_max_len() {
123 assert_eq!(truncate_for_log("hello", 0), "");
124 }
125
126 fn gzip_decompress(data: &[u8]) -> Vec<u8> {
130 use flate2::read::GzDecoder;
131 use std::io::Read;
132 let mut decoder = GzDecoder::new(data);
133 let mut result = Vec::new();
134 decoder
135 .read_to_end(&mut result)
136 .unwrap_or_else(|_| unreachable!());
137 result
138 }
139
140 #[test]
141 fn test_compress_log_round_trip() {
142 let input = "Line one\nLine two\nLine three\n";
143 let compressed = compress_log(input).unwrap_or_else(|_| unreachable!());
144 let decompressed = gzip_decompress(&compressed);
145 assert_eq!(
146 String::from_utf8(decompressed).unwrap_or_else(|_| unreachable!()),
147 input
148 );
149 }
150
151 #[test]
152 fn test_compress_log_gzip_magic_bytes() {
153 let input = "some log data";
154 let compressed = compress_log(input).unwrap_or_else(|_| unreachable!());
155 assert!(
156 compressed.len() >= 2,
157 "compressed output should be at least 2 bytes"
158 );
159 assert_eq!(compressed[0], 0x1f, "first magic byte should be 0x1f");
160 assert_eq!(compressed[1], 0x8b, "second magic byte should be 0x8b");
161 }
162
163 #[test]
164 fn test_compress_log_empty_input_produces_valid_gzip() {
165 let compressed = compress_log("").unwrap_or_else(|_| unreachable!());
166 assert!(compressed.len() >= 2);
167 assert_eq!(compressed[0], 0x1f);
168 assert_eq!(compressed[1], 0x8b);
169 let decompressed = gzip_decompress(&compressed);
170 assert!(decompressed.is_empty());
171 }
172
173 #[test]
174 fn test_compress_log_large_input_does_not_panic() {
175 let line = "Normal log line without sensitive data, repeating to build volume.\n";
176 let large_input: String = line.repeat(75_000);
177 let compressed = compress_log(&large_input).unwrap_or_else(|_| unreachable!());
178 assert!(
179 compressed.len() < large_input.len(),
180 "compressed size ({}) should be less than raw size ({})",
181 compressed.len(),
182 large_input.len()
183 );
184 let decompressed = gzip_decompress(&compressed);
185 assert_eq!(decompressed.len(), large_input.len());
186 }
187
188 #[test]
189 fn test_compress_log_output_smaller_than_input() {
190 let input = "repeated data line\n".repeat(1_000);
191 let compressed = compress_log(&input).unwrap_or_else(|_| unreachable!());
192 assert!(
193 compressed.len() < input.len() / 2,
194 "repetitive text should compress to less than half its size"
195 );
196 }
197
198 #[test]
201 fn test_content_hash_known_vector() {
202 let expected = "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855";
203 assert_eq!(content_hash(&[]), expected);
204 }
205
206 #[test]
207 fn test_content_hash_format_64_lowercase_hex() {
208 let hash = content_hash(b"arbitrary payload bytes");
209 assert_eq!(hash.len(), 64, "SHA-256 hex digest must be 64 characters");
210 assert!(
211 hash.chars()
212 .all(|c| c.is_ascii_hexdigit() && !c.is_ascii_uppercase()),
213 "hash must be lowercase hex only, got: {hash}"
214 );
215 }
216
217 #[test]
218 fn test_content_hash_deterministic() {
219 let data = b"the same input twice";
220 assert_eq!(
221 content_hash(data),
222 content_hash(data),
223 "same input must always produce the same hash"
224 );
225 }
226
227 #[test]
228 fn test_content_hash_different_inputs_differ() {
229 let hash_a = content_hash(b"payload A");
230 let hash_b = content_hash(b"payload B");
231 assert_ne!(
232 hash_a, hash_b,
233 "different inputs should produce different hashes"
234 );
235 }
236
237 #[test]
238 fn test_content_hash_nonempty_known_value() {
239 let expected = "2cf24dba5fb0a30e26e83b2ac5b9e29e1b161e5c1fa7425e73043362938b9824";
240 assert_eq!(content_hash(b"hello"), expected);
241 }
242}