use chrono::{DateTime, Utc};
use serde::{Deserialize, Serialize};
use super::digest::{ByteDigest, ContentDigest, MetaDigest};
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct FileFingerprint {
#[serde(default, skip_serializing_if = "Option::is_none", rename = "file_hash")]
pub byte_digest: Option<ByteDigest>,
#[serde(
default,
skip_serializing_if = "Option::is_none",
rename = "content_hash"
)]
pub content_digest: Option<ContentDigest>,
#[serde(default, skip_serializing_if = "Option::is_none", rename = "meta_hash")]
pub meta_digest: Option<MetaDigest>,
pub size: u64,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub modified_at: Option<DateTime<Utc>>,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
pub enum FingerprintPrecision {
SizeOnly = 0,
Metadata = 1,
ByteLevel = 2,
MetaLevel = 3,
Semantic = 4,
}
impl FileFingerprint {
pub fn matches_within_location(&self, other: &FileFingerprint) -> bool {
if let (Some(a), Some(b)) = (&self.byte_digest, &other.byte_digest) {
match a.matches_same_algo(b) {
Ok(result) => return result,
Err(_) => { }
}
}
if let (Some(a), Some(b)) = (&self.content_digest, &other.content_digest) {
return a == b;
}
if let (Some(a), Some(b)) = (&self.meta_digest, &other.meta_digest) {
return a == b;
}
if self.size != other.size {
return false;
}
if let (Some(a), Some(b)) = (&self.modified_at, &other.modified_at) {
return a == b;
}
true
}
pub fn precision(&self) -> FingerprintPrecision {
if self.content_digest.is_some() {
FingerprintPrecision::Semantic
} else if self.meta_digest.is_some() {
FingerprintPrecision::MetaLevel
} else if self.byte_digest.is_some() {
FingerprintPrecision::ByteLevel
} else if self.modified_at.is_some() {
FingerprintPrecision::Metadata
} else {
FingerprintPrecision::SizeOnly
}
}
pub fn effective_precision(&self, other: &FileFingerprint) -> FingerprintPrecision {
std::cmp::min(self.precision(), other.precision())
}
}
impl std::fmt::Display for FingerprintPrecision {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::SizeOnly => f.write_str("size-only"),
Self::Metadata => f.write_str("metadata"),
Self::ByteLevel => f.write_str("byte-level"),
Self::MetaLevel => f.write_str("meta-level"),
Self::Semantic => f.write_str("semantic"),
}
}
}
#[cfg(test)]
mod tests {
use super::*;
fn hash_fp(
byte_digest: ByteDigest,
content_digest: Option<&str>,
size: u64,
) -> FileFingerprint {
FileFingerprint {
byte_digest: Some(byte_digest),
content_digest: content_digest.map(|s| ContentDigest(s.to_string())),
meta_digest: None,
size,
modified_at: None,
}
}
fn hash_fp_with_meta(
byte_digest: ByteDigest,
content_digest: Option<&str>,
meta_digest: Option<&str>,
size: u64,
) -> FileFingerprint {
FileFingerprint {
byte_digest: Some(byte_digest),
content_digest: content_digest.map(|s| ContentDigest(s.to_string())),
meta_digest: meta_digest.map(|s| MetaDigest(s.to_string())),
size,
modified_at: None,
}
}
fn metadata_fp(size: u64, mtime: Option<DateTime<Utc>>) -> FileFingerprint {
FileFingerprint {
byte_digest: None,
content_digest: None,
meta_digest: None,
size,
modified_at: mtime,
}
}
#[test]
fn matches_byte_level_trumps_semantic() {
let a = hash_fp(ByteDigest::Djb2("h1".into()), Some("c1"), 100);
let b = hash_fp(ByteDigest::Djb2("h1".into()), Some("c2"), 200);
assert!(a.matches_within_location(&b));
}
#[test]
fn matches_byte_level_different_trumps_semantic() {
let a = hash_fp(ByteDigest::Djb2("h1".into()), Some("c1"), 100);
let b = hash_fp(ByteDigest::Djb2("h2".into()), Some("c1"), 100);
assert!(!a.matches_within_location(&b));
}
#[test]
fn matches_semantic_fallback_same() {
let a = FileFingerprint {
byte_digest: None,
content_digest: Some(ContentDigest("c1".into())),
meta_digest: None,
size: 100,
modified_at: None,
};
let b = FileFingerprint {
byte_digest: None,
content_digest: Some(ContentDigest("c1".into())),
meta_digest: None,
size: 200,
modified_at: None,
};
assert!(a.matches_within_location(&b));
}
#[test]
fn matches_semantic_fallback_different() {
let a = FileFingerprint {
byte_digest: None,
content_digest: Some(ContentDigest("c1".into())),
meta_digest: None,
size: 100,
modified_at: None,
};
let b = FileFingerprint {
byte_digest: None,
content_digest: Some(ContentDigest("c2".into())),
meta_digest: None,
size: 100,
modified_at: None,
};
assert!(!a.matches_within_location(&b));
}
#[test]
fn matches_metadata_same() {
let t = Utc::now();
let a = metadata_fp(1024, Some(t));
let b = metadata_fp(1024, Some(t));
assert!(a.matches_within_location(&b));
}
#[test]
fn matches_metadata_size_differs() {
let t = Utc::now();
let a = metadata_fp(1024, Some(t));
let b = metadata_fp(2048, Some(t));
assert!(!a.matches_within_location(&b));
}
#[test]
fn matches_metadata_mtime_differs() {
let t1 = DateTime::parse_from_rfc3339("2024-01-01T00:00:00Z")
.unwrap()
.with_timezone(&Utc);
let t2 = DateTime::parse_from_rfc3339("2024-06-01T00:00:00Z")
.unwrap()
.with_timezone(&Utc);
let a = metadata_fp(1024, Some(t1));
let b = metadata_fp(1024, Some(t2));
assert!(!a.matches_within_location(&b));
}
#[test]
fn matches_size_only_same() {
let a = metadata_fp(1024, None);
let b = metadata_fp(1024, None);
assert!(a.matches_within_location(&b));
}
#[test]
fn matches_size_only_different() {
let a = metadata_fp(1024, None);
let b = metadata_fp(2048, None);
assert!(!a.matches_within_location(&b));
}
#[test]
fn matches_hash_vs_metadata_size_match() {
let a = hash_fp(ByteDigest::Djb2("h1".into()), None, 1024);
let b = metadata_fp(1024, None);
assert!(a.matches_within_location(&b));
}
#[test]
fn matches_hash_vs_metadata_size_differs() {
let a = hash_fp(ByteDigest::Djb2("h1".into()), None, 1024);
let b = metadata_fp(2048, None);
assert!(!a.matches_within_location(&b));
}
#[test]
fn cross_algorithm_falls_back_to_size() {
let a = hash_fp(ByteDigest::Djb2("h1".into()), None, 1024);
let b = hash_fp(ByteDigest::Sha256("h2".into()), None, 1024);
assert!(a.matches_within_location(&b));
}
#[test]
fn precision_semantic() {
let fp = hash_fp(ByteDigest::Djb2("h".into()), Some("c"), 100);
assert_eq!(fp.precision(), FingerprintPrecision::Semantic);
}
#[test]
fn precision_byte_level() {
let fp = hash_fp(ByteDigest::Djb2("h".into()), None, 100);
assert_eq!(fp.precision(), FingerprintPrecision::ByteLevel);
}
#[test]
fn precision_metadata() {
let fp = metadata_fp(100, Some(Utc::now()));
assert_eq!(fp.precision(), FingerprintPrecision::Metadata);
}
#[test]
fn precision_size_only() {
let fp = metadata_fp(100, None);
assert_eq!(fp.precision(), FingerprintPrecision::SizeOnly);
}
#[test]
fn effective_precision_downgrades() {
let hash = hash_fp(ByteDigest::Djb2("h".into()), Some("c"), 100);
let meta = metadata_fp(100, Some(Utc::now()));
assert_eq!(
hash.effective_precision(&meta),
FingerprintPrecision::Metadata
);
}
#[test]
fn effective_precision_same_level() {
let a = hash_fp(ByteDigest::Djb2("h1".into()), None, 100);
let b = hash_fp(ByteDigest::Djb2("h2".into()), None, 200);
assert_eq!(a.effective_precision(&b), FingerprintPrecision::ByteLevel);
}
#[test]
fn precision_display() {
assert_eq!(FingerprintPrecision::Semantic.to_string(), "semantic");
assert_eq!(FingerprintPrecision::MetaLevel.to_string(), "meta-level");
assert_eq!(FingerprintPrecision::ByteLevel.to_string(), "byte-level");
assert_eq!(FingerprintPrecision::Metadata.to_string(), "metadata");
assert_eq!(FingerprintPrecision::SizeOnly.to_string(), "size-only");
}
#[test]
fn matches_meta_digest_fallback_same() {
let a = FileFingerprint {
byte_digest: None,
content_digest: None,
meta_digest: Some(MetaDigest("m1".into())),
size: 100,
modified_at: None,
};
let b = FileFingerprint {
byte_digest: None,
content_digest: None,
meta_digest: Some(MetaDigest("m1".into())),
size: 200,
modified_at: None,
};
assert!(a.matches_within_location(&b));
}
#[test]
fn matches_meta_digest_fallback_different() {
let a = FileFingerprint {
byte_digest: None,
content_digest: None,
meta_digest: Some(MetaDigest("m1".into())),
size: 100,
modified_at: None,
};
let b = FileFingerprint {
byte_digest: None,
content_digest: None,
meta_digest: Some(MetaDigest("m2".into())),
size: 100,
modified_at: None,
};
assert!(!a.matches_within_location(&b));
}
#[test]
fn content_same_meta_different_means_meta_only_change() {
let a = hash_fp_with_meta(ByteDigest::Djb2("h1".into()), Some("c1"), Some("m1"), 1024);
let b = hash_fp_with_meta(ByteDigest::Djb2("h2".into()), Some("c1"), Some("m2"), 1024);
assert!(!a.matches_within_location(&b));
}
#[test]
fn precision_meta_level() {
let fp = FileFingerprint {
byte_digest: None,
content_digest: None,
meta_digest: Some(MetaDigest("m1".into())),
size: 100,
modified_at: None,
};
assert_eq!(fp.precision(), FingerprintPrecision::MetaLevel);
}
#[test]
fn precision_semantic_trumps_meta() {
let fp = FileFingerprint {
byte_digest: None,
content_digest: Some(ContentDigest("c1".into())),
meta_digest: Some(MetaDigest("m1".into())),
size: 100,
modified_at: None,
};
assert_eq!(fp.precision(), FingerprintPrecision::Semantic);
}
#[test]
fn precision_ordering() {
assert!(FingerprintPrecision::SizeOnly < FingerprintPrecision::Metadata);
assert!(FingerprintPrecision::Metadata < FingerprintPrecision::ByteLevel);
assert!(FingerprintPrecision::ByteLevel < FingerprintPrecision::MetaLevel);
assert!(FingerprintPrecision::MetaLevel < FingerprintPrecision::Semantic);
}
#[test]
fn entity_model_meta_change_does_not_break_identity() {
let before = FileFingerprint {
byte_digest: None,
content_digest: Some(ContentDigest("pixel_hash_abc".into())),
meta_digest: Some(MetaDigest("meta_v1".into())),
size: 10240,
modified_at: None,
};
let after = FileFingerprint {
byte_digest: None,
content_digest: Some(ContentDigest("pixel_hash_abc".into())),
meta_digest: Some(MetaDigest("meta_v2".into())),
size: 10300,
modified_at: None,
};
assert!(before.matches_within_location(&after));
}
#[test]
fn entity_model_content_change_is_detected() {
let v1 = FileFingerprint {
byte_digest: None,
content_digest: Some(ContentDigest("pixel_v1".into())),
meta_digest: Some(MetaDigest("meta_v1".into())),
size: 10240,
modified_at: None,
};
let v2 = FileFingerprint {
byte_digest: None,
content_digest: Some(ContentDigest("pixel_v2".into())),
meta_digest: Some(MetaDigest("meta_v1".into())),
size: 10240,
modified_at: None,
};
assert!(!v1.matches_within_location(&v2));
}
#[test]
fn entity_model_reexport_with_ts_in_meta() {
let original = hash_fp_with_meta(
ByteDigest::Djb2("file_h1".into()),
Some("pixel_abc"),
Some("meta_ts1"),
10240,
);
let reexport = hash_fp_with_meta(
ByteDigest::Djb2("file_h2".into()),
Some("pixel_abc"),
Some("meta_ts2"),
10300,
);
assert!(!original.matches_within_location(&reexport));
assert_eq!(
original.content_digest.as_ref().map(|cd| cd.as_str()),
reexport.content_digest.as_ref().map(|cd| cd.as_str()),
);
}
}