use std::collections::hash_map::DefaultHasher;
use std::hash::{Hash, Hasher};
fn main() {
println!("๐งช Demonstrating Content-Based Deduplication Logic");
println!("==================================================");
let content1 = "This is an EC2 instance with standard configuration";
let content2 = "This is an EC2 instance with standard configuration"; let content3 = "This is a completely different RDS database instance";
let hash1 = generate_content_hash(content1);
let hash2 = generate_content_hash(content2);
let hash3 = generate_content_hash(content3);
let doc_id1 = format!("aws_estate-{:x}", hash1);
let doc_id2 = format!("aws_estate-{:x}", hash2);
let doc_id3 = format!("aws_estate-{:x}", hash3);
println!("\n๐ Content 1: \"{}\"", content1);
println!("๐ Document ID 1: {}", doc_id1);
println!("\n๐ Content 2: \"{}\"", content2);
println!("๐ Document ID 2: {}", doc_id2);
println!("\n๐ Content 3: \"{}\"", content3);
println!("๐ Document ID 3: {}", doc_id3);
println!("\nโ
**Deduplication Results:**");
if doc_id1 == doc_id2 {
println!("โ
Same content produces SAME document ID โ Will be deduplicated!");
println!(" Content 1 and 2 will share the same document (upsert behavior)");
} else {
println!("โ Same content produces DIFFERENT document IDs โ Would create duplicates!");
}
if doc_id1 != doc_id3 {
println!("โ
Different content produces DIFFERENT document ID โ Will create new document!");
println!(" Content 1 and 3 will be stored as separate documents");
} else {
println!("โ Different content produces SAME document ID โ Would cause collisions!");
}
println!("\n๐ **Hash Distribution:**");
println!("Hash 1: {:016x}", hash1);
println!("Hash 2: {:016x}", hash2);
println!("Hash 3: {:016x}", hash3);
println!("\n๐ **How This Solves Your Problem:**");
println!("1. ๐ Same content โ Same hash โ Same doc ID โ Upsert (delete old + add new)");
println!("2. ๐ Same content + different metadata โ Same hash โ Updates existing document");
println!("3. ๐ Different content โ Different hash โ Different doc ID โ Creates new document");
println!("4. ๐ Scales to 2M+ documents with O(1) hash lookup");
}
fn generate_content_hash(content: &str) -> u64 {
let mut hasher = DefaultHasher::new();
content.hash(&mut hasher);
hasher.finish()
}