use sha2::{Digest, Sha256};
use std::fmt::Write as _;
use std::fs::File;
use std::io::{BufReader, Read};
const HASH_BUFFER_SIZE: usize = 1024 * 1024;
pub fn sha256_file(path: &std::path::Path) -> Result<String, std::io::Error> {
let file = File::open(path)?;
let mut reader = BufReader::new(file);
let mut hasher = Sha256::new();
let mut buffer = vec![0; HASH_BUFFER_SIZE];
loop {
let bytes_read = reader.read(&mut buffer)?;
if bytes_read == 0 {
break;
}
hasher.update(&buffer[..bytes_read]);
}
let digest = hasher.finalize();
Ok(hex_lower(digest.as_ref()))
}
pub fn partition_id(path: &str) -> String {
sha1_hex(path.as_bytes())
}
fn sha1_hex(bytes: &[u8]) -> String {
let digest = sha1(bytes);
hex_lower(&digest[..10])
}
fn hex_lower(bytes: &[u8]) -> String {
let mut output = String::with_capacity(bytes.len() * 2);
for byte in bytes {
write!(&mut output, "{byte:02x}").expect("writing to a String cannot fail");
}
output
}
fn sha1(input: &[u8]) -> [u8; 20] {
let mut h0: u32 = 0x67452301;
let mut h1: u32 = 0xefcdab89;
let mut h2: u32 = 0x98badcfe;
let mut h3: u32 = 0x10325476;
let mut h4: u32 = 0xc3d2e1f0;
let bit_len = (input.len() as u64) * 8;
let mut message = input.to_vec();
message.push(0x80);
while message.len() % 64 != 56 {
message.push(0);
}
message.extend_from_slice(&bit_len.to_be_bytes());
for chunk in message.chunks(64) {
let mut words = [0u32; 80];
for (index, word) in words.iter_mut().enumerate().take(16) {
let offset = index * 4;
*word = u32::from_be_bytes([
chunk[offset],
chunk[offset + 1],
chunk[offset + 2],
chunk[offset + 3],
]);
}
for index in 16..80 {
words[index] =
(words[index - 3] ^ words[index - 8] ^ words[index - 14] ^ words[index - 16])
.rotate_left(1);
}
let mut a = h0;
let mut b = h1;
let mut c = h2;
let mut d = h3;
let mut e = h4;
for (index, word) in words.iter().enumerate() {
let (f, k) = match index {
0..=19 => ((b & c) | ((!b) & d), 0x5a827999),
20..=39 => (b ^ c ^ d, 0x6ed9eba1),
40..=59 => ((b & c) | (b & d) | (c & d), 0x8f1bbcdc),
_ => (b ^ c ^ d, 0xca62c1d6),
};
let temp = a
.rotate_left(5)
.wrapping_add(f)
.wrapping_add(e)
.wrapping_add(k)
.wrapping_add(*word);
e = d;
d = c;
c = b.rotate_left(30);
b = a;
a = temp;
}
h0 = h0.wrapping_add(a);
h1 = h1.wrapping_add(b);
h2 = h2.wrapping_add(c);
h3 = h3.wrapping_add(d);
h4 = h4.wrapping_add(e);
}
let mut output = [0u8; 20];
for (index, word) in [h0, h1, h2, h3, h4].iter().enumerate() {
output[index * 4..index * 4 + 4].copy_from_slice(&word.to_be_bytes());
}
output
}
#[cfg(test)]
fn sha256_hex(input: &[u8]) -> String {
let digest = Sha256::digest(input);
hex_lower(digest.as_ref())
}
#[cfg(test)]
mod tests {
use super::*;
use std::fs;
use std::time::{SystemTime, UNIX_EPOCH};
#[test]
fn stable_hashes_match_reference_values() {
assert_eq!(sha1_hex(b"abc"), "a9993e364706816aba3e");
assert_eq!(partition_id("src/lib.rs"), "b24749917179fb5e3e61");
assert_eq!(
sha256_hex(b"abc"),
"ba7816bf8f01cfea414140de5dae2223b00361a396177a9cb410ff61f20015ad"
);
}
#[test]
fn sha256_file_streams_file_content() {
let nanos = SystemTime::now()
.duration_since(UNIX_EPOCH)
.unwrap()
.as_nanos();
let path = std::env::temp_dir().join(format!("codebase_graph_hash_{nanos}.txt"));
fs::write(&path, b"abc").unwrap();
let digest = sha256_file(&path).unwrap();
fs::remove_file(&path).unwrap();
assert_eq!(
digest,
"ba7816bf8f01cfea414140de5dae2223b00361a396177a9cb410ff61f20015ad"
);
}
}