use std::path::{Path, PathBuf};
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};
use sha2::{Digest, Sha256};
#[derive(Debug, Clone, PartialEq, Eq)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub struct DatasetRef {
pub path: PathBuf,
pub kind: String,
pub bytes: u64,
pub sha256: String,
}
impl DatasetRef {
pub fn from_file(path: impl AsRef<Path>, kind: impl Into<String>) -> std::io::Result<Self> {
let path = path.as_ref().to_path_buf();
let bytes_vec = std::fs::read(&path)?;
let bytes = bytes_vec.len() as u64;
let mut hasher = Sha256::new();
hasher.update(&bytes_vec);
let sha256 = hex::encode(hasher.finalize());
Ok(Self {
path,
kind: kind.into(),
bytes,
sha256,
})
}
pub fn from_bytes(path: impl Into<PathBuf>, kind: impl Into<String>, data: &[u8]) -> Self {
let mut hasher = Sha256::new();
hasher.update(data);
Self {
path: path.into(),
kind: kind.into(),
bytes: data.len() as u64,
sha256: hex::encode(hasher.finalize()),
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn from_bytes_hashes_deterministically() {
let a = DatasetRef::from_bytes("/tmp/a", "test", b"hello world");
let b = DatasetRef::from_bytes("/tmp/a", "test", b"hello world");
assert_eq!(a, b);
assert_eq!(
a.sha256,
"b94d27b9934d3e08a52e52d7da7dabfac484efe37a5380ee9088f7ace2efcde9"
);
assert_eq!(a.bytes, 11);
}
#[test]
fn from_file_hashes_like_from_bytes() {
let path = std::env::temp_dir().join(format!(
"siderust_dataset_{}_{}",
std::process::id(),
"hello_world.bin"
));
std::fs::write(&path, b"hello world").unwrap();
let from_file = DatasetRef::from_file(&path, "test").unwrap();
let from_bytes = DatasetRef::from_bytes(&path, "test", b"hello world");
assert_eq!(from_file, from_bytes);
let _ = std::fs::remove_file(path);
}
}