use std::collections::hash_map::DefaultHasher;
use std::hash::{Hash, Hasher};
use super::types::ContextFingerprint;
const MAX_SUMMARY_LENGTH: usize = 50;
#[derive(Debug, Clone, Default)]
pub struct ContextFingerprintGenerator {
seed: u64,
}
impl ContextFingerprintGenerator {
#[must_use]
pub fn new() -> Self {
Self::default()
}
#[must_use]
pub fn with_seed(seed: u64) -> Self {
Self { seed }
}
#[must_use]
pub fn generate(&self, content: &str) -> ContextFingerprint {
let hash = self.compute_hash(content);
let prefix_length = content.len();
let content_summary = Self::create_summary(content);
ContextFingerprint::new(hash, prefix_length, content_summary)
}
#[must_use]
pub fn generate_with_length(&self, content: &str, prefix_length: usize) -> ContextFingerprint {
let hash = self.compute_hash(content);
let content_summary = Self::create_summary(content);
ContextFingerprint::new(hash, prefix_length, content_summary)
}
#[must_use]
pub fn generate_prefix(&self, content: &str, prefix_len: usize) -> ContextFingerprint {
let prefix = if prefix_len >= content.len() {
content
} else {
let mut end = prefix_len;
while end > 0 && !content.is_char_boundary(end) {
end -= 1;
}
&content[..end]
};
let hash = self.compute_hash(prefix);
let content_summary = Self::create_summary(prefix);
ContextFingerprint::new(hash, prefix.len(), content_summary)
}
#[must_use]
pub fn generate_prefix_hierarchy(
&self,
content: &str,
step: usize,
min_length: usize,
) -> Vec<ContextFingerprint> {
let mut fingerprints = Vec::new();
let mut current_len = min_length.min(content.len());
while current_len <= content.len() {
fingerprints.push(self.generate_prefix(content, current_len));
if current_len == content.len() {
break;
}
current_len = (current_len + step).min(content.len());
}
fingerprints
}
fn compute_hash(&self, content: &str) -> u64 {
let mut hasher = DefaultHasher::new();
self.seed.hash(&mut hasher);
content.hash(&mut hasher);
hasher.finish()
}
fn create_summary(content: &str) -> String {
if content.len() <= MAX_SUMMARY_LENGTH {
content.to_string()
} else {
let mut end = MAX_SUMMARY_LENGTH - 3;
while end > 0 && !content.is_char_boundary(end) {
end -= 1;
}
format!("{}...", &content[..end])
}
}
#[must_use]
pub fn matches(&self, fp1: &ContextFingerprint, fp2: &ContextFingerprint) -> bool {
fp1.hash == fp2.hash && fp1.prefix_length == fp2.prefix_length
}
#[must_use]
pub fn could_be_prefix(&self, fp1: &ContextFingerprint, fp2: &ContextFingerprint) -> bool {
fp1.prefix_length <= fp2.prefix_length
}
}
pub trait Fingerprintable {
fn fingerprint(&self) -> ContextFingerprint;
fn fingerprint_with(&self, generator: &ContextFingerprintGenerator) -> ContextFingerprint;
}
impl Fingerprintable for str {
fn fingerprint(&self) -> ContextFingerprint {
ContextFingerprintGenerator::new().generate(self)
}
fn fingerprint_with(&self, generator: &ContextFingerprintGenerator) -> ContextFingerprint {
generator.generate(self)
}
}
impl Fingerprintable for String {
fn fingerprint(&self) -> ContextFingerprint {
ContextFingerprintGenerator::new().generate(self)
}
fn fingerprint_with(&self, generator: &ContextFingerprintGenerator) -> ContextFingerprint {
generator.generate(self)
}
}
#[derive(Debug, Clone)]
pub struct RollingHasher {
base: u64,
current_hash: u64,
base_power: u64,
length: usize,
}
impl RollingHasher {
#[must_use]
pub fn new() -> Self {
Self {
base: 31,
current_hash: 0,
base_power: 1,
length: 0,
}
}
#[must_use]
pub fn with_base(base: u64) -> Self {
Self {
base,
current_hash: 0,
base_power: 1,
length: 0,
}
}
pub fn append(&mut self, c: char) {
let char_value = c as u64;
self.current_hash = self
.current_hash
.wrapping_mul(self.base)
.wrapping_add(char_value);
self.base_power = self.base_power.wrapping_mul(self.base);
self.length += 1;
}
pub fn append_str(&mut self, s: &str) {
for c in s.chars() {
self.append(c);
}
}
#[must_use]
pub fn hash(&self) -> u64 {
self.current_hash
}
#[must_use]
pub fn length(&self) -> usize {
self.length
}
pub fn reset(&mut self) {
self.current_hash = 0;
self.base_power = 1;
self.length = 0;
}
#[must_use]
pub fn to_fingerprint(&self, content_summary: impl Into<String>) -> ContextFingerprint {
ContextFingerprint::new(self.current_hash, self.length, content_summary)
}
}
impl Default for RollingHasher {
fn default() -> Self {
Self::new()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_fingerprint_generator_basic() {
let generator = ContextFingerprintGenerator::new();
let fp = generator.generate("Hello, world!");
assert_eq!(fp.prefix_length, 13);
assert_eq!(fp.content_summary, "Hello, world!");
}
#[test]
fn test_fingerprint_generator_with_seed() {
let generator1 = ContextFingerprintGenerator::with_seed(42);
let generator2 = ContextFingerprintGenerator::with_seed(43);
let fp1 = generator1.generate("test");
let fp2 = generator2.generate("test");
assert_ne!(fp1.hash, fp2.hash);
}
#[test]
fn test_fingerprint_generator_long_content() {
let generator = ContextFingerprintGenerator::new();
let content = "A".repeat(100);
let fp = generator.generate(&content);
assert_eq!(fp.prefix_length, 100);
assert!(fp.content_summary.ends_with("..."));
assert!(fp.content_summary.len() <= MAX_SUMMARY_LENGTH);
}
#[test]
fn test_fingerprint_generator_deterministic() {
let generator = ContextFingerprintGenerator::new();
let fp1 = generator.generate("test content");
let fp2 = generator.generate("test content");
assert_eq!(fp1.hash, fp2.hash);
assert_eq!(fp1.prefix_length, fp2.prefix_length);
}
#[test]
fn test_fingerprint_generator_different_content() {
let generator = ContextFingerprintGenerator::new();
let fp1 = generator.generate("content A");
let fp2 = generator.generate("content B");
assert_ne!(fp1.hash, fp2.hash);
}
#[test]
fn test_generate_with_length() {
let generator = ContextFingerprintGenerator::new();
let fp = generator.generate_with_length("test content", 50);
assert_eq!(fp.prefix_length, 50);
assert_eq!(fp.content_summary, "test content");
}
#[test]
fn test_generate_prefix() {
let generator = ContextFingerprintGenerator::new();
let full = generator.generate("Hello, world!");
let prefix = generator.generate_prefix("Hello, world!", 5);
assert_ne!(full.hash, prefix.hash);
assert_eq!(prefix.prefix_length, 5);
}
#[test]
fn test_generate_prefix_utf8_safe() {
let generator = ContextFingerprintGenerator::new();
let content = "\u{3042}\u{3044}\u{3046}"; let prefix = generator.generate_prefix(content, 4);
assert!(prefix.prefix_length <= 4);
}
#[test]
fn test_generate_prefix_hierarchy() {
let generator = ContextFingerprintGenerator::new();
let fps = generator.generate_prefix_hierarchy("Hello world!", 3, 3);
assert!(!fps.is_empty());
for i in 1..fps.len() {
assert!(fps[i].prefix_length >= fps[i - 1].prefix_length);
}
assert_eq!(fps.last().unwrap().prefix_length, 12);
}
#[test]
fn test_matches() {
let generator = ContextFingerprintGenerator::new();
let fp1 = generator.generate("test");
let fp2 = generator.generate("test");
let fp3 = generator.generate("other");
assert!(generator.matches(&fp1, &fp2));
assert!(!generator.matches(&fp1, &fp3));
}
#[test]
fn test_could_be_prefix() {
let generator = ContextFingerprintGenerator::new();
let short = generator.generate("short");
let long = generator.generate("longer text");
assert!(generator.could_be_prefix(&short, &long));
assert!(!generator.could_be_prefix(&long, &short));
}
#[test]
fn test_fingerprintable_str() {
let fp = "test content".fingerprint();
assert_eq!(fp.prefix_length, 12);
}
#[test]
fn test_fingerprintable_string() {
let s = String::from("test content");
let fp = s.fingerprint();
assert_eq!(fp.prefix_length, 12);
}
#[test]
fn test_fingerprintable_with_generator() {
let generator = ContextFingerprintGenerator::with_seed(42);
let fp1 = "test".fingerprint_with(&generator);
let fp2 = "test".fingerprint();
assert_ne!(fp1.hash, fp2.hash);
}
#[test]
fn test_rolling_hasher_basic() {
let mut hasher = RollingHasher::new();
hasher.append('a');
hasher.append('b');
hasher.append('c');
assert_eq!(hasher.length(), 3);
assert_ne!(hasher.hash(), 0);
}
#[test]
fn test_rolling_hasher_append_str() {
let mut hasher1 = RollingHasher::new();
hasher1.append_str("abc");
let mut hasher2 = RollingHasher::new();
hasher2.append('a');
hasher2.append('b');
hasher2.append('c');
assert_eq!(hasher1.hash(), hasher2.hash());
assert_eq!(hasher1.length(), hasher2.length());
}
#[test]
fn test_rolling_hasher_reset() {
let mut hasher = RollingHasher::new();
hasher.append_str("test");
assert_ne!(hasher.hash(), 0);
assert_eq!(hasher.length(), 4);
hasher.reset();
assert_eq!(hasher.hash(), 0);
assert_eq!(hasher.length(), 0);
}
#[test]
fn test_rolling_hasher_to_fingerprint() {
let mut hasher = RollingHasher::new();
hasher.append_str("test");
let fp = hasher.to_fingerprint("test summary");
assert_eq!(fp.hash, hasher.hash());
assert_eq!(fp.prefix_length, 4);
assert_eq!(fp.content_summary, "test summary");
}
#[test]
fn test_rolling_hasher_with_base() {
let hasher1 = RollingHasher::new();
let hasher2 = RollingHasher::with_base(37);
let mut h1 = hasher1;
let mut h2 = hasher2;
h1.append_str("test");
h2.append_str("test");
assert_ne!(h1.hash(), h2.hash());
}
#[test]
fn test_rolling_hasher_incremental() {
let mut hasher = RollingHasher::new();
hasher.append_str("Hello");
let hash1 = hasher.hash();
hasher.append_str(", world!");
let hash2 = hasher.hash();
assert_ne!(hash1, hash2);
}
}