1use std::fs::File;
58use std::io::{BufRead, BufReader, BufWriter, Write};
59use std::path::Path;
60
61use sha2::{Digest, Sha256};
62
63pub struct Hasher {
64 eol: String,
65 ignore_whitespaces: bool,
66 no_eof: bool,
67}
68
69impl Default for Hasher {
70 fn default() -> Self {
71 Self {
72 eol: "\n".to_string(),
73 ignore_whitespaces: false,
74 no_eof: false,
75 }
76 }
77}
78
79impl Hasher {
80 pub fn new() -> Self {
107 Default::default()
108 }
109
110 pub fn eol(mut self, eol: impl Into<String>) -> Self {
123 self.eol = eol.into();
124 self
125 }
126
127 pub fn ignore_whitespaces(mut self, ignore_whitespaces: bool) -> Self {
131 self.ignore_whitespaces = ignore_whitespaces;
132 self
133 }
134
135 pub fn no_eof(mut self, no_eof: bool) -> Self {
148 self.no_eof = no_eof;
149 self
150 }
151
152 pub fn hash_file(
174 &self,
175 file_in: impl AsRef<Path>,
176 file_out: Option<impl AsRef<Path>>,
177 ) -> String {
178 let file_in = File::open(file_in).unwrap();
179 let file_in = BufReader::new(file_in);
180
181 let mut file_out = file_out.and_then(|file_out| {
182 let file_out = File::create(file_out).unwrap();
183 let file_out = BufWriter::new(file_out);
184 Some(file_out)
185 });
186
187 let mut hasher = Sha256::new();
188
189 let mut is_first_line = true;
190 for line in file_in.lines() {
191 let line = line.unwrap();
192
193 let line = if self.ignore_whitespaces {
194 line.replace(|c: char| c.is_whitespace(), "")
195 } else {
196 line
197 };
198
199 let line = if !is_first_line {
200 format!("{}{}", &self.eol, line)
201 } else {
202 line
203 };
204
205 hasher.update(&line);
206
207 if let Some(file_out) = &mut file_out {
208 file_out.write_all(line.as_bytes()).unwrap();
209 }
210
211 is_first_line = false;
212 }
213
214 if !self.no_eof {
215 hasher.update(&self.eol);
216
217 if let Some(file_out) = &mut file_out {
218 file_out.write_all(&self.eol.as_bytes()).unwrap();
219 }
220 }
221
222 let hash = hasher.finalize();
223
224 base16ct::lower::encode_string(&hash)
225 }
226}
227
228#[cfg(test)]
229mod tests {
230 use std::error::Error;
231 use std::ffi::OsString;
232 use std::fs;
233 use std::iter::zip;
234 use std::ops::Add;
235
236 use tempfile::NamedTempFile;
237
238 use super::*;
239
240 struct TestEnv {
241 file_with_crlf: NamedTempFile,
242 file_with_crlf_noeof: NamedTempFile,
243 file_with_lf: NamedTempFile,
244 file_with_lf_noeof: NamedTempFile,
245
246 normalized_file_with_crlf: NamedTempFile,
247 normalized_file_with_crlf_noeof: NamedTempFile,
248 normalized_file_with_lf: NamedTempFile,
249 normalized_file_with_lf_noeof: NamedTempFile,
250 }
251
252 impl TestEnv {
253 fn new() -> Result<Self, std::io::Error> {
254 let mut file_with_crlf = NamedTempFile::new()?;
255 let mut file_with_crlf_noeof = NamedTempFile::new()?;
256 let mut file_with_lf = NamedTempFile::new()?;
257 let mut file_with_lf_noeof = NamedTempFile::new()?;
258
259 let normalized_file_with_crlf_noeof = NamedTempFile::new()?;
260 let normalized_file_with_crlf = NamedTempFile::new()?;
261 let normalized_file_with_lf_noeof = NamedTempFile::new()?;
262 let normalized_file_with_lf = NamedTempFile::new()?;
263
264 let content = vec!["A B", "C D"];
265
266 file_with_crlf.write_all(content.join("\r\n").add("\r\n").as_bytes())?;
267 file_with_crlf_noeof.write_all(content.join("\r\n").as_bytes())?;
268 file_with_lf.write_all(content.join("\n").add("\n").as_bytes())?;
269 file_with_lf_noeof.write_all(content.join("\n").as_bytes())?;
270
271 Ok(TestEnv {
272 file_with_crlf,
273 file_with_crlf_noeof,
274 file_with_lf,
275 file_with_lf_noeof,
276
277 normalized_file_with_crlf,
278 normalized_file_with_crlf_noeof,
279 normalized_file_with_lf,
280 normalized_file_with_lf_noeof,
281 })
282 }
283
284 fn get_input_files(&self) -> Vec<&NamedTempFile> {
285 vec![
286 &self.file_with_crlf,
287 &self.file_with_crlf_noeof,
288 &self.file_with_lf,
289 &self.file_with_lf_noeof,
290 ]
291 }
292
293 fn get_output_files(&self) -> Vec<&NamedTempFile> {
294 vec![
295 &self.normalized_file_with_crlf,
296 &self.normalized_file_with_crlf_noeof,
297 &self.normalized_file_with_lf,
298 &self.normalized_file_with_lf_noeof,
299 ]
300 }
301
302 fn hash_files(&self, hasher: &Hasher) -> Result<(String, String), Box<dyn Error>> {
303 let mut hash_check = None;
304 let mut content_check = None;
305
306 for (file_in, file_out) in zip(self.get_input_files(), self.get_output_files()) {
307 let hash = hasher.hash_file(file_in, Some(file_out));
308
309 if hash_check.is_none() {
310 hash_check = Some(hash.clone());
311 content_check = Some(fs::read_to_string(file_out)?)
312 }
313
314 if let (Some(hash_check), Some(content_check)) = (&hash_check, &content_check) {
315 assert_eq!(&hash, hash_check, "Hashes don't match");
316 assert_eq!(
317 &fs::read_to_string(file_out)?,
318 content_check,
319 "Normalized files don't match"
320 );
321 }
322 }
323
324 let (Some(hash_check), Some(content_check)) = (hash_check, content_check) else {
325 unreachable!()
326 };
327
328 Ok((hash_check, content_check))
329 }
330 }
331
332 #[test]
333 fn check_empty_file() -> Result<(), Box<dyn Error>> {
334 let file = NamedTempFile::new()?;
335
336 let hash_expected = "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855";
340 let hash_actual = Hasher::new().eol("").hash_file(&file, None::<OsString>);
341 assert_eq!(hash_actual, hash_expected);
342
343 let hash_expected = "01ba4719c80b6fe911b091a7c05124b64eeece964e09c058ef8f9805daca546b";
345 let hash_actual = Hasher::new().hash_file(&file, None::<OsString>);
346 assert_eq!(hash_actual, hash_expected);
347
348 Ok(())
349 }
350
351 #[test]
352 fn check_default_options() -> Result<(), Box<dyn Error>> {
353 let test_env = TestEnv::new()?;
354 let (_, normalized_content) = test_env.hash_files(&Hasher::new())?;
355
356 assert_eq!(
357 fs::read_to_string(&test_env.file_with_lf)?,
358 normalized_content,
359 "Normalized files do not have LF"
360 );
361
362 Ok(())
363 }
364
365 #[test]
366 fn check_with_custom_eol() -> Result<(), Box<dyn Error>> {
367 let test_env = TestEnv::new()?;
368 let (_, normalized_content) = test_env.hash_files(&Hasher::new().eol("\r\n"))?;
369
370 assert_eq!(
371 fs::read_to_string(&test_env.file_with_crlf)?,
372 normalized_content,
373 "Normalized files do not have CRLF"
374 );
375
376 Ok(())
377 }
378
379 #[test]
380 fn check_without_eof() -> Result<(), Box<dyn Error>> {
381 let test_env = TestEnv::new()?;
382 let (_, normalized_content) = test_env.hash_files(&Hasher::new().no_eof(true))?;
383
384 assert_eq!(
385 fs::read_to_string(&test_env.file_with_lf_noeof)?,
386 normalized_content,
387 "Normalized files do not have LF without EOF"
388 );
389
390 Ok(())
391 }
392
393 #[test]
394 fn check_ignore_spaces() -> Result<(), Box<dyn Error>> {
395 let test_env = TestEnv::new()?;
396 let hasher = Hasher::new().eol("").ignore_whitespaces(true).no_eof(true);
397 let (normalized_hash, normalized_content) = test_env.hash_files(&hasher)?;
398
399 let mut file_with_lf_without_spaces = NamedTempFile::new()?;
400 let normalized_file_with_lf_without_spaces = NamedTempFile::new()?;
401
402 file_with_lf_without_spaces.write_all("ABCD".as_bytes())?;
403
404 let hash = hasher.hash_file(
405 &file_with_lf_without_spaces,
406 Some(normalized_file_with_lf_without_spaces),
407 );
408
409 assert_eq!(hash, normalized_hash, "Hashes don't match");
410 assert_eq!(
411 fs::read_to_string(&file_with_lf_without_spaces)?,
412 normalized_content,
413 "Normalized files do not ignore white spaces"
414 );
415
416 Ok(())
417 }
418}