use llama_cpp_2::token::LlamaToken;
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub(crate) enum SlotEntry {
Text(LlamaToken),
#[cfg_attr(not(feature = "mtmd"), allow(dead_code))]
Image {
hash: u64,
group_id: u32,
},
}
pub(crate) fn get_common_prefix(cur: &[SlotEntry], new: &[SlotEntry]) -> usize {
let mut i = 0;
let max = cur.len().min(new.len());
while i < max {
match (cur[i], new[i]) {
(SlotEntry::Text(a), SlotEntry::Text(b)) if a == b => {
i += 1;
}
(
SlotEntry::Image {
hash: ha,
group_id: ga,
},
SlotEntry::Image {
hash: hb,
group_id: gb,
},
) if ha == hb => {
let len_a = group_run_len(cur, i, ha, ga);
let len_b = group_run_len(new, i, hb, gb);
if len_a == len_b && i + len_a <= max {
i += len_a;
} else {
return i;
}
}
_ => return i,
}
}
i
}
fn group_run_len(entries: &[SlotEntry], start: usize, hash: u64, group_id: u32) -> usize {
let mut len = 0;
while start + len < entries.len() {
match entries[start + len] {
SlotEntry::Image {
hash: h,
group_id: g,
} if h == hash && g == group_id => {
len += 1;
}
_ => break,
}
}
len
}
#[cfg_attr(not(feature = "mtmd"), allow(dead_code))]
pub(crate) fn fnv1a_64(bytes: &[u8]) -> u64 {
let mut hash: u64 = 0xcbf2_9ce4_8422_2325;
for b in bytes {
hash ^= *b as u64;
hash = hash.wrapping_mul(0x0000_0100_0000_01b3);
}
hash
}
#[cfg(test)]
mod tests {
use super::*;
fn t(id: i32) -> SlotEntry {
SlotEntry::Text(LlamaToken::new(id))
}
fn img(hash: u64, group_id: u32) -> SlotEntry {
SlotEntry::Image { hash, group_id }
}
#[test]
fn empty_inputs() {
assert_eq!(get_common_prefix(&[], &[]), 0);
assert_eq!(get_common_prefix(&[t(1)], &[]), 0);
assert_eq!(get_common_prefix(&[], &[t(1)]), 0);
}
#[test]
fn identical_text() {
let a = vec![t(1), t(2), t(3)];
assert_eq!(get_common_prefix(&a, &a), 3);
}
#[test]
fn text_divergence() {
let a = vec![t(1), t(2), t(3)];
let b = vec![t(1), t(2), t(99)];
assert_eq!(get_common_prefix(&a, &b), 2);
}
#[test]
fn new_extends_cur() {
let a = vec![t(1), t(2)];
let b = vec![t(1), t(2), t(3), t(4)];
assert_eq!(get_common_prefix(&a, &b), 2);
}
#[test]
fn identical_image() {
let a = vec![t(1), img(0xabcd, 0), img(0xabcd, 0), img(0xabcd, 0), t(9)];
let b = a.clone();
assert_eq!(get_common_prefix(&a, &b), 5);
}
#[test]
fn image_hash_mismatch_at_offset() {
let a = vec![t(1), img(0xabcd, 0), img(0xabcd, 0)];
let b = vec![t(1), img(0xbeef, 0), img(0xbeef, 0)];
assert_eq!(get_common_prefix(&a, &b), 1);
}
#[test]
fn image_size_mismatch_same_hash() {
let a = vec![img(0xabcd, 0), img(0xabcd, 0), img(0xabcd, 0)];
let b = vec![img(0xabcd, 0), img(0xabcd, 0)];
assert_eq!(get_common_prefix(&a, &b), 0);
}
#[test]
fn image_vs_text_at_offset() {
let a = vec![t(1), img(0xabcd, 0)];
let b = vec![t(1), t(2)];
assert_eq!(get_common_prefix(&a, &b), 1);
}
#[test]
fn group_boundary_respects_group_id() {
let a = vec![img(0x1, 0), img(0x1, 0), img(0x1, 1), img(0x1, 1)];
let b = vec![img(0x1, 0), img(0x1, 0), img(0x1, 1), img(0x1, 1)];
assert_eq!(get_common_prefix(&a, &b), 4);
let c = vec![img(0x1, 0), img(0x1, 0), img(0x1, 1), img(0x1, 1)];
let d = vec![img(0x1, 0), img(0x1, 0), img(0x1, 1)];
assert_eq!(get_common_prefix(&c, &d), 2);
}
#[test]
fn fnv1a_64_known_vectors() {
assert_eq!(fnv1a_64(b""), 0xcbf2_9ce4_8422_2325);
assert_eq!(fnv1a_64(b"a"), 0xaf63_dc4c_8601_ec8c);
assert_eq!(fnv1a_64(b"foobar"), 0x8594_4171_f739_67e8);
}
#[test]
fn fnv1a_64_image_distinct() {
let a = vec![1u8; 1024];
let b = vec![2u8; 1024];
assert_ne!(fnv1a_64(&a), fnv1a_64(&b));
}
}