use crate::checksum::Ctx;
use crate::error::Error::SumsFileError;
use crate::error::{Error, Result};
use crate::io::S3Client;
use crate::io::sums::{ObjectSums, ObjectSumsBuilder};
use serde::{Deserialize, Serialize};
use serde_json::{from_slice, to_string};
use std::cmp::Ordering;
use std::collections::BTreeMap;
use std::fmt::{Debug, Formatter};
use std::hash::{Hash, Hasher};
pub const OUTPUT_FILE_VERSION: &str = "1";
pub const SUMS_FILE_ENDING: &str = ".sums";
pub struct State {
pub(crate) name: String,
pub(crate) object_sums: Box<dyn ObjectSums + Send>,
}
impl State {
pub async fn try_from(name: String, client: Option<S3Client>) -> Result<Self> {
Ok(Self {
object_sums: ObjectSumsBuilder::default()
.set_client(client)
.build(SumsFile::format_target_file(&name))
.await?,
name,
})
}
pub fn into_inner(self) -> (String, Box<dyn ObjectSums + Send>) {
(self.name, self.object_sums)
}
}
impl Debug for State {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
f.debug_struct("State").field("name", &self.name).finish()
}
}
impl Eq for State {}
impl PartialEq for State {
fn eq(&self, other: &Self) -> bool {
self.name == other.name
}
}
impl Hash for State {
fn hash<H: Hasher>(&self, state: &mut H) {
self.name.hash(state);
}
}
impl Ord for State {
fn cmp(&self, other: &Self) -> Ordering {
self.name.cmp(&other.name)
}
}
impl PartialOrd for State {
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
Some(self.cmp(other))
}
}
impl Clone for State {
fn clone(&self) -> Self {
Self {
name: self.name.clone(),
object_sums: dyn_clone::clone_box(&*self.object_sums),
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Ord, PartialOrd, Hash)]
#[serde(rename_all = "kebab-case")]
pub struct SumsFile {
pub(crate) version: String,
#[serde(skip_serializing_if = "Option::is_none")]
pub(crate) size: Option<u64>,
#[serde(flatten)]
pub(crate) checksums: BTreeMap<Ctx, Checksum>,
}
impl Default for SumsFile {
fn default() -> Self {
Self::new(None, BTreeMap::new())
}
}
impl SumsFile {
pub fn new(size: Option<u64>, checksums: BTreeMap<Ctx, Checksum>) -> Self {
Self {
version: OUTPUT_FILE_VERSION.to_string(),
size,
checksums,
}
}
pub fn format_sums_file(name: &str) -> String {
if name.ends_with(SUMS_FILE_ENDING) {
name.to_string()
} else {
format!("{}{}", name, SUMS_FILE_ENDING)
}
}
pub fn format_target_file(name: &str) -> String {
name.strip_suffix(SUMS_FILE_ENDING)
.unwrap_or(name)
.to_string()
}
pub fn to_json_string(&self) -> Result<String> {
Ok(to_string(&self)?)
}
pub async fn read_from_slice(slice: &[u8]) -> Result<Self> {
slice.try_into()
}
pub fn merge(mut self, other: Self) -> Result<Self> {
if self.size != other.size && !self.checksums.is_empty() && !other.checksums.is_empty() {
return Err(SumsFileError(
"the size of output files do not match".to_string(),
));
}
self.merge_mut(other);
Ok(self)
}
pub fn merge_mut(&mut self, other: Self) {
for (key, checksum) in other.checksums {
self.checksums.insert(key, checksum);
}
}
pub fn split(self) -> Vec<SumsFile> {
self.checksums
.iter()
.map(|(ctx, checksum)| {
let mut sums_file = Self::default().with_size(self.size);
sums_file.add_checksum(ctx.clone(), checksum.clone());
sums_file
})
.collect()
}
pub fn is_same(&self, other: &Self) -> Option<(&Ctx, &Checksum)> {
if self.size != other.size {
return None;
}
for (key, checksum) in &self.checksums {
if let Some(other_checksum) = other.checksums.get(key) {
if checksum == other_checksum {
return Some((key, checksum));
}
}
}
None
}
pub fn comparable(&self, other: &Self) -> Option<(&Ctx, &Checksum)> {
if self.size != other.size {
return None;
}
for (key, value) in &self.checksums {
if other.checksums.contains_key(key) {
return Some((key, value));
}
}
None
}
pub fn with_size(mut self, size: Option<u64>) -> Self {
self.set_size(size);
self
}
pub fn set_size(&mut self, size: Option<u64>) {
self.size = size;
}
pub fn add_checksum(&mut self, ctx: Ctx, checksum: Checksum) {
self.checksums.insert(ctx, checksum);
}
pub fn is_empty(&self) -> bool {
self.checksums.is_empty()
}
}
impl TryFrom<&[u8]> for SumsFile {
type Error = Error;
fn try_from(value: &[u8]) -> Result<Self> {
Ok(from_slice(value)?)
}
}
#[derive(Debug, Default, Clone, Serialize, Deserialize, PartialEq, Eq, Ord, PartialOrd, Hash)]
#[serde(rename_all = "kebab-case")]
pub struct Checksum(String);
impl Checksum {
pub fn new(checksum: String) -> Self {
Self(checksum)
}
}
#[cfg(test)]
pub(crate) mod test {
use super::*;
use crate::checksum::aws_etag::test::expected_md5_1gib;
use crate::checksum::standard::test::EXPECTED_MD5_SUM;
use serde_json::{Value, from_value, json, to_value};
const EXPECTED_ETAG: &str = "1c3490f45b0cdc4299a128410def3a1d-b";
#[test]
fn serialize_output_file() -> Result<()> {
let value = expected_output_file();
let result = to_value(&value)?;
let expected = expected_output_json();
assert_eq!(result, expected);
Ok(())
}
#[test]
fn deserialize_output_file() -> Result<()> {
let value = expected_output_json();
let result: SumsFile = from_value(value)?;
let expected = expected_output_file();
assert_eq!(result, expected);
Ok(())
}
#[test]
fn is_same() -> Result<()> {
let file_one = expected_output_file();
let mut file_two = file_one.clone();
let mut aws: Ctx = "md5-aws-123b".parse()?;
aws.set_file_size(Some(123));
file_two
.checksums
.insert(aws, Checksum::new(EXPECTED_ETAG.to_string()));
assert!(file_one.is_same(&file_two).is_some());
let mut file_two = file_one.clone();
let mut aws: Ctx = "aws-etag-1b".parse()?;
aws.set_file_size(Some(1));
set_checksums(&mut file_two, aws);
assert!(file_one.is_same(&file_two).is_none());
Ok(())
}
#[test]
fn comparable() -> Result<()> {
let file_one = expected_output_file();
let mut file_two = file_one.clone();
let mut aws: Ctx = "md5-aws-1b".parse()?;
aws.set_file_size(Some(1));
file_two
.checksums
.insert(aws, Checksum::new(expected_md5_1gib().to_string()));
assert!(file_one.comparable(&file_two).is_some());
let mut file_two = file_one.clone();
let mut aws: Ctx = "aws-etag-1b".parse()?;
aws.set_file_size(Some(1));
set_checksums(&mut file_two, aws);
assert!(file_one.comparable(&file_two).is_none());
Ok(())
}
#[test]
fn merge() -> Result<()> {
let expected_md5 = EXPECTED_MD5_SUM;
let mut file_one = expected_output_file();
let mut aws_one: Ctx = "aws-etag-123b".parse()?;
aws_one.set_file_size(Some(123));
file_one
.checksums
.insert(aws_one.clone(), Checksum::new(expected_md5.to_string()));
let mut file_two = expected_output_file();
let mut aws_two: Ctx = "md5-aws-123b".parse()?;
aws_two.set_file_size(Some(123));
set_checksums(&mut file_two, aws_two.clone());
let result = file_one.clone().merge(file_two)?;
assert_eq!(result.size, file_one.size);
assert_eq!(
result.checksums,
BTreeMap::from_iter(vec![
(aws_two, Checksum::new(expected_md5_1gib().to_string()),),
(aws_one, Checksum::new(expected_md5_1gib().to_string())),
])
);
Ok(())
}
fn set_checksums(file_two: &mut SumsFile, aws: Ctx) {
file_two.checksums =
BTreeMap::from_iter(vec![(aws, Checksum::new(expected_md5_1gib().to_string()))]);
}
fn expected_output_file() -> SumsFile {
let mut aws: Ctx = "md5-aws-123b".parse().unwrap();
aws.set_file_size(Some(123));
let checksums = vec![(aws, Checksum::new(EXPECTED_ETAG.to_string()))];
SumsFile::new(Some(123), BTreeMap::from_iter(checksums))
}
fn expected_output_json() -> Value {
json!({
"version": OUTPUT_FILE_VERSION,
"size": 123,
"md5-aws-123b": EXPECTED_ETAG,
})
}
}