pub const MIN_SIMILARITY_LENGTH: usize = 10;
Minimum content length for similarity check (to avoid short words matching everything).