use bytes::Bytes;
use crate::domain::types::CorpusEntry;
#[must_use]
pub fn hamming_distance(a: &[u8], b: &[u8]) -> Option<u64> {
if a.len() != b.len() {
return None;
}
let mut dist: u64 = 0;
for (x, y) in a.iter().zip(b.iter()) {
dist = dist.strict_add(u64::from((x ^ y).count_ones()));
}
Some(dist)
}
#[must_use]
pub fn extract_lsb_pattern(pixel_bytes: &[u8]) -> Bytes {
let out_len = pixel_bytes.len().div_ceil(8);
let mut pattern = vec![0u8; out_len];
for (i, &byte) in pixel_bytes.iter().enumerate() {
let out_byte_idx = i / 8;
let bit_idx = 7 - (i % 8);
if byte & 1 == 1 {
#[expect(
clippy::indexing_slicing,
reason = "out_byte_idx = i/8 < ceil(len/8) = out_len"
)]
{
pattern[out_byte_idx] |= 1 << bit_idx;
}
}
}
Bytes::from(pattern)
}
#[must_use]
pub fn payload_to_bit_pattern(payload: &[u8], target_bits: Option<usize>) -> Bytes {
target_bits.map_or_else(
|| Bytes::copy_from_slice(payload),
|target| {
let needed_bytes = target.div_ceil(8);
let mut result = Vec::with_capacity(needed_bytes);
result.extend_from_slice(payload);
result.resize(needed_bytes, 0);
Bytes::from(result)
},
)
}
#[must_use]
pub fn score_match(corpus_pattern: &[u8], payload_pattern: &[u8]) -> u64 {
let compare_len = corpus_pattern.len().min(payload_pattern.len());
if compare_len == 0 {
return u64::MAX;
}
match (
corpus_pattern.get(..compare_len),
payload_pattern.get(..compare_len),
) {
(Some(a), Some(b)) => hamming_distance(a, b).unwrap_or(u64::MAX),
_ => u64::MAX,
}
}
#[must_use]
pub const fn is_close_match(distance: u64, total_bits: u64) -> bool {
if total_bits == 0 {
return false;
}
distance.strict_mul(20) <= total_bits
}
#[must_use]
pub fn filter_by_model<'a>(
entries: &'a [CorpusEntry],
model_id: &str,
resolution: (u32, u32),
) -> Vec<&'a CorpusEntry> {
entries
.iter()
.filter(|e| {
e.spectral_key
.as_ref()
.is_some_and(|k| k.model_id == model_id && k.resolution == resolution)
})
.collect()
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn hamming_distance_identical() {
let a = [0xAA, 0x55, 0xFF];
let b = [0xAA, 0x55, 0xFF];
assert_eq!(hamming_distance(&a, &b), Some(0));
}
#[test]
fn hamming_distance_one_bit() {
let a = [0b0000_0000];
let b = [0b0000_0001];
assert_eq!(hamming_distance(&a, &b), Some(1));
}
#[test]
fn hamming_distance_all_bits() {
let a = [0x00];
let b = [0xFF];
assert_eq!(hamming_distance(&a, &b), Some(8));
}
#[test]
fn hamming_distance_unequal_lengths() {
let a = [0x00, 0x00];
let b = [0xFF];
assert_eq!(hamming_distance(&a, &b), None);
}
#[test]
fn extract_lsb_pattern_basic() {
let pixels = [0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00];
let pattern = extract_lsb_pattern(&pixels);
assert_eq!(pattern.as_ref(), &[0xAA]);
}
#[test]
fn extract_lsb_pattern_partial_byte() {
let pixels = [0x01, 0x03, 0x00];
let pattern = extract_lsb_pattern(&pixels);
assert_eq!(pattern.len(), 1);
assert_eq!(pattern.as_ref(), &[0b1100_0000]);
}
#[test]
fn payload_to_bit_pattern_no_target() {
let payload = b"hello";
let result = payload_to_bit_pattern(payload, None);
assert_eq!(result.as_ref(), b"hello");
}
#[test]
fn payload_to_bit_pattern_with_padding() {
let payload = b"\xFF";
let result = payload_to_bit_pattern(payload, Some(16)); assert_eq!(result.len(), 2);
assert_eq!(result.as_ref(), &[0xFF, 0x00]);
}
#[test]
fn score_match_identical() {
let a = [0xAA, 0x55];
let b = [0xAA, 0x55];
assert_eq!(score_match(&a, &b), 0);
}
#[test]
fn score_match_empty() {
let a: [u8; 0] = [];
let b: [u8; 0] = [];
assert_eq!(score_match(&a, &b), u64::MAX);
}
#[test]
fn is_close_match_zero_distance() {
assert!(is_close_match(0, 100));
}
#[test]
fn is_close_match_exactly_five_percent() {
assert!(is_close_match(5, 100));
}
#[test]
fn is_close_match_above_threshold() {
assert!(!is_close_match(6, 100));
}
#[test]
fn is_close_match_zero_total() {
assert!(!is_close_match(0, 0));
}
fn make_entry(model_id: Option<&str>, resolution: Option<(u32, u32)>) -> CorpusEntry {
use crate::domain::types::{CoverMediaKind, SpectralKey};
CorpusEntry {
file_hash: [0u8; 32],
path: "test.png".to_string(),
cover_kind: CoverMediaKind::PngImage,
precomputed_bit_pattern: Bytes::new(),
spectral_key: model_id.zip(resolution).map(|(id, res)| SpectralKey {
model_id: id.to_string(),
resolution: res,
}),
}
}
#[test]
fn filter_by_model_returns_matching_entries() {
let entries = vec![
make_entry(Some("gemini"), Some((1024, 1024))),
make_entry(Some("gemini"), Some((512, 512))),
make_entry(Some("other"), Some((1024, 1024))),
make_entry(None, None),
];
let result = filter_by_model(&entries, "gemini", (1024, 1024));
assert_eq!(result.len(), 1);
}
#[test]
fn filter_by_model_returns_empty_when_no_match() {
let entries = vec![
make_entry(Some("gemini"), Some((512, 512))),
make_entry(None, None),
];
let result = filter_by_model(&entries, "gemini", (1024, 1024));
assert!(result.is_empty());
}
#[test]
fn filter_by_model_excludes_no_key_entries() {
let entries = vec![make_entry(None, None), make_entry(None, None)];
let result = filter_by_model(&entries, "gemini", (1024, 1024));
assert!(result.is_empty());
}
}