wayback_rs/
digest.rs

1//! Utilities for computing digests used by the Wayback Machine.
2//!
3//! The Wayback Machine's CDX index provides a digest for each page in its
4//! search results. These digests can be computed by
5
6use data_encoding::BASE32;
7use flate2::read::GzDecoder;
8use sha1::{Digest, Sha1};
9use std::io::{BufWriter, Error, Read};
10
11/// Decode a Base32 string into the SHA-1 bytes, returning an empty value if
12/// the input is not a valid Base2-encoded SHA-1 hash.
13pub fn string_to_bytes(digest: &str) -> Option<[u8; 20]> {
14    if digest.len() == 32 {
15        let mut output = [0; 20];
16        let count = BASE32.decode_mut(digest.as_bytes(), &mut output).ok()?;
17
18        if count == 20 {
19            Some(output)
20        } else {
21            None
22        }
23    } else {
24        None
25    }
26}
27
28/// Encode a SHA-1 hash into a 32-character Base32 string.
29pub fn bytes_to_string(bytes: &[u8; 20]) -> String {
30    BASE32.encode(bytes)
31}
32
33/// Compute the SHA-1 hash for bytes read from a source and encode it as a
34/// Base32 string.
35pub fn compute_digest<R: Read>(input: &mut R) -> Result<String, Error> {
36    let sha1 = Sha1::new();
37
38    let mut buffered = BufWriter::new(sha1);
39    std::io::copy(input, &mut buffered)?;
40
41    let result = buffered.into_inner()?.finalize();
42
43    let mut output = String::new();
44    BASE32.encode_append(&result, &mut output);
45
46    Ok(output)
47}
48
49/// Compute the SHA-1 hash for bytes read from a GZip-compressed source and
50/// encode it as a Base32 string.
51pub fn compute_digest_gz<R: Read>(input: &mut R) -> Result<String, Error> {
52    compute_digest(&mut GzDecoder::new(input))
53}
54
55#[cfg(test)]
56mod tests {
57    use std::fs::File;
58    use std::io::BufReader;
59
60    #[test]
61    fn compute_digest() {
62        let digest = "ZHYT52YPEOCHJD5FZINSDYXGQZI22WJ4";
63        let path = format!("examples/wayback/{}", digest);
64
65        let mut reader = BufReader::new(File::open(path).unwrap());
66
67        assert_eq!(super::compute_digest(&mut reader).unwrap(), digest);
68    }
69
70    #[test]
71    fn round_trip() {
72        let digest = "ZHYT52YPEOCHJD5FZINSDYXGQZI22WJ4";
73
74        let bytes = super::string_to_bytes(&digest).unwrap();
75        let string = super::bytes_to_string(&bytes);
76
77        assert_eq!(digest, string);
78    }
79}