use serde_json::json;
use super::*;
fn msg(id: &str, content: &str, created: &str) -> ConversationMessage {
ConversationMessage {
id: id.to_string(),
content: content.to_string(),
message_type: "text".to_string(),
extra_metadata: json!({}),
sender: "user".to_string(),
created_at: created.to_string(),
}
}
#[test]
fn substring_inside_word_matches() {
let mut idx = InvertedIndex::new();
idx.insert(
"t1",
msg("m1", "concatenate the strings", "2026-04-10T10:00:00Z"),
);
let hits = idx.search("cat", 10, None);
assert_eq!(hits.len(), 1);
assert_eq!(hits[0].message_id, "m1");
}
#[test]
fn polish_diacritics_normalized_both_sides() {
let mut idx = InvertedIndex::new();
idx.insert(
"t1",
msg("m1", "Lecę dziś do Krakowa", "2026-04-10T10:00:00Z"),
);
let hits = idx.search("krakow", 10, None);
assert_eq!(hits.len(), 1, "krakow should match Krakowa");
}
#[test]
fn japanese_bigram_match() {
let mut idx = InvertedIndex::new();
idx.insert(
"t1",
msg("m1", "東京タワーが見える", "2026-04-10T10:00:00Z"),
);
let hits = idx.search("東京", 10, None);
assert_eq!(hits.len(), 1, "two-char CJK query should match");
}
#[test]
fn arabic_harakat_stripped() {
let mut idx = InvertedIndex::new();
idx.insert("t1", msg("m1", "كَتَبَ الطالب", "2026-04-10T10:00:00Z"));
let hits = idx.search("كتب", 10, None);
assert_eq!(hits.len(), 1, "harakat stripping should equalize forms");
}
#[test]
fn excludes_active_thread() {
let mut idx = InvertedIndex::new();
idx.insert(
"active",
msg("ma", "postgres deploy", "2026-04-10T10:00:00Z"),
);
idx.insert(
"other",
msg("mo", "postgres deploy", "2026-04-10T10:01:00Z"),
);
let hits = idx.search("postgres", 10, Some("active"));
assert_eq!(hits.len(), 1);
assert_eq!(hits[0].thread_id, "other");
}
#[test]
fn empty_query_returns_empty() {
let mut idx = InvertedIndex::new();
idx.insert("t", msg("m", "anything here", "2026-04-10T10:00:00Z"));
assert!(idx.search("", 10, None).is_empty());
}
#[test]
fn short_terms_only_returns_empty() {
let mut idx = InvertedIndex::new();
idx.insert("t", msg("m", "Postgres", "2026-04-10T10:00:00Z"));
assert!(idx.search("a is on", 10, None).is_empty());
}
#[test]
fn limit_zero_returns_empty() {
let mut idx = InvertedIndex::new();
idx.insert("t", msg("m", "Postgres", "2026-04-10T10:00:00Z"));
assert!(idx.search("postgres", 0, None).is_empty());
}
#[test]
fn score_matches_legacy_semantics() {
let mut idx = InvertedIndex::new();
idx.insert(
"t1",
msg(
"m1",
"Remember: my project is called Phoenix and uses Go and PostgreSQL.",
"2026-04-10T10:00:00Z",
),
);
let hits = idx.search("What database does my project use", 10, None);
assert_eq!(hits.len(), 1);
assert!(
(hits[0].score - 0.4).abs() < 1e-9,
"score = {}",
hits[0].score
);
}
#[test]
fn remove_thread_drops_all_messages() {
let mut idx = InvertedIndex::new();
idx.insert("t1", msg("m1", "postgres deploy", "2026-04-10T10:00:00Z"));
idx.insert("t1", msg("m2", "postgres backup", "2026-04-10T10:01:00Z"));
idx.insert("t2", msg("m3", "postgres replica", "2026-04-10T10:02:00Z"));
assert_eq!(idx.search("postgres", 10, None).len(), 3);
idx.remove_thread("t1");
let hits = idx.search("postgres", 10, None);
assert_eq!(hits.len(), 1);
assert_eq!(hits[0].thread_id, "t2");
}
#[test]
fn duplicate_message_id_is_idempotent() {
let mut idx = InvertedIndex::new();
let m = msg("m1", "postgres deploy", "2026-04-10T10:00:00Z");
idx.insert("t1", m.clone());
idx.insert("t1", m); let hits = idx.search("postgres", 10, None);
assert_eq!(hits.len(), 1, "duplicate insert must not duplicate hits");
}
#[test]
fn pathological_query_short_circuits_to_recency() {
let mut idx = InvertedIndex::new();
let n = LARGE_CANDIDATE_LIMIT + 50;
for i in 0..n {
let created = format!("2026-04-10T10:{:02}:{:02}Z", i / 60, i % 60);
idx.insert("bulk", msg(&format!("m{i}"), "common payload", &created));
}
let hits = idx.search("common", 5, None);
assert_eq!(hits.len(), 5, "fallback must still respect limit");
assert!(hits.iter().all(|h| h.score == 0.0));
}
#[test]
fn intern_pool_dedupes_repeated_thread_id_and_role() {
let mut idx = InvertedIndex::new();
for i in 0..5 {
let ts = format!("2026-04-10T10:00:{:02}Z", i);
idx.insert("shared-thread", msg(&format!("m{i}"), "payload", &ts));
}
assert_eq!(idx.thread_id_pool.len(), 1);
assert_eq!(idx.role_pool.len(), 1);
idx.remove_thread("shared-thread");
assert_eq!(idx.thread_id_pool.len(), 0);
}
#[test]
fn intersect_sorted_with_btreeset_basic() {
let other: BTreeSet<u32> = [2, 4, 5, 9].into_iter().collect();
let mut acc: Vec<u32> = vec![1, 2, 3, 5, 7, 9];
intersect_sorted_with_btreeset(&mut acc, &other);
assert_eq!(acc, vec![2, 5, 9]);
}
#[test]
fn intersect_sorted_with_btreeset_empty_other() {
let other: BTreeSet<u32> = BTreeSet::new();
let mut acc: Vec<u32> = vec![1, 2, 3];
intersect_sorted_with_btreeset(&mut acc, &other);
assert!(acc.is_empty());
}