mod base;
mod index;
mod ngram;
mod pipeline;
mod search;
mod tokenizer;
mod types;
pub use types::{
DocData, InMemoryIndex, PositionEncoding, SNAPSHOT_VERSION, SearchHit, SearchMode,
SnapshotData, TermDomain,
};
pub use tokenizer::dictionary::{
DictionaryConfig, DictionaryLanguage, DictionarySegmenter, ScriptDictionary,
train_dictionary_config,
};
#[cfg(test)]
mod tests {
use super::types::MatchedTerm;
use super::*;
use std::collections::HashSet;
use tempfile::tempdir;
const INDEX: &str = "test-index";
const DOC_CN: &str = "doc-cn";
const DOC_EN: &str = "doc-en";
const DOC_JP: &str = "doc-jp";
fn assert_contains_doc(results: &[(String, f64)], doc_id: &str) {
assert!(
results.iter().any(|(id, _)| id == doc_id),
"expected results to contain doc {doc_id}, got {:?}",
results
);
}
#[test]
fn chinese_full_pinyin_search() {
let mut index = InMemoryIndex::default();
index.add_doc(INDEX, DOC_CN, "你好世界", true);
let hits = index.search(INDEX, "nihao");
assert_contains_doc(&hits, DOC_CN);
}
#[test]
fn chinese_initials_search() {
let mut index = InMemoryIndex::default();
index.add_doc(INDEX, DOC_CN, "你好世界", true);
let hits = index.search(INDEX, "nh");
assert_contains_doc(&hits, DOC_CN);
}
#[test]
fn chinese_initials_prefix_search() {
let mut index = InMemoryIndex::default();
index.add_doc(INDEX, DOC_CN, "你好世界", true);
let hits = index.search(INDEX, "nhs");
assert_contains_doc(&hits, DOC_CN);
let exact = index.get_matches(INDEX, DOC_CN, "nhsj");
assert!(!exact.is_empty());
let hit = index
.search_hits(INDEX, "nhs")
.into_iter()
.find(|h| h.doc_id == DOC_CN)
.expect("expected hit for prefix query");
let prefix_matches = index.get_matches_for_matched_terms(INDEX, DOC_CN, &hit.matched_terms);
assert!(!prefix_matches.is_empty());
assert!(
prefix_matches
.iter()
.any(|p| exact.iter().any(|e| e.0 == p.0)),
"prefix highlight should align to original start"
);
}
#[test]
fn chinese_full_pinyin_prefix_search() {
let mut index = InMemoryIndex::default();
index.add_doc(INDEX, DOC_CN, "你好世界", true);
let hits = index.search(INDEX, "nih");
assert_contains_doc(&hits, DOC_CN);
let exact = index.get_matches(INDEX, DOC_CN, "nihaoshijie");
assert!(!exact.is_empty());
let hit = index
.search_hits(INDEX, "nih")
.into_iter()
.find(|h| h.doc_id == DOC_CN)
.expect("expected hit for prefix query");
let prefix_matches = index.get_matches_for_matched_terms(INDEX, DOC_CN, &hit.matched_terms);
assert!(!prefix_matches.is_empty());
assert!(
prefix_matches
.iter()
.any(|p| exact.iter().any(|e| e.0 == p.0)),
"prefix highlight should align to original start"
);
}
#[test]
fn pinyin_fuzzy_search() {
let mut index = InMemoryIndex::default();
index.add_doc(INDEX, DOC_CN, "你好世界", true);
let hits = index.search_hits(INDEX, "nihap");
assert!(
hits.iter()
.any(|h| h.doc_id == DOC_CN && !h.matched_terms.is_empty()),
"expected matched pinyin term in fuzzy hits: {:?}",
hits.iter()
.map(|h| (&h.doc_id, &h.matched_terms))
.collect::<Vec<_>>()
);
let fuzzy_original = index.search_with_mode(INDEX, "nihap", SearchMode::Fuzzy);
assert!(
fuzzy_original.is_empty(),
"expected SearchMode::Fuzzy to only search original domain, got {:?}",
fuzzy_original
);
}
#[test]
fn english_fuzzy_search() {
let mut index = InMemoryIndex::default();
index.add_doc(INDEX, DOC_EN, "fuzzy search handles typos", true);
let hits = index.search_hits(INDEX, "fuzze");
assert!(hits.iter().any(|h| {
h.doc_id == DOC_EN
&& h.matched_terms
.iter()
.any(|t| t.term == "fuzzy" && t.domain == TermDomain::Original)
}));
}
#[test]
fn english_query_splits_separators_and_lowercases() {
let mut index = InMemoryIndex::default();
index.add_doc(INDEX, DOC_EN, "MEMORY-INDEXER", true);
let hits = index.search_with_mode(INDEX, "memory-indexer", SearchMode::Exact);
assert_contains_doc(&hits, DOC_EN);
}
#[test]
fn cyrillic_term_matches_inside_phrase() {
let mut index = InMemoryIndex::default();
let doc_id = "doc-ru";
index.add_doc(INDEX, doc_id, "привет мир", true);
let hits = index.search_with_mode(INDEX, "привет", SearchMode::Exact);
assert_contains_doc(&hits, doc_id);
}
#[test]
fn greek_term_matches_inside_phrase() {
let mut index = InMemoryIndex::default();
let doc_id = "doc-gr";
index.add_doc(INDEX, doc_id, "γειά σου κόσμε", true);
let hits = index.search_with_mode(INDEX, "γειά", SearchMode::Exact);
assert_contains_doc(&hits, doc_id);
}
#[test]
fn cyrillic_term_matches_with_punctuation() {
let mut index = InMemoryIndex::default();
let doc_id = "doc-ru-punct";
index.add_doc(INDEX, doc_id, "привет, привет", true);
let hits = index.search_with_mode(INDEX, "привет", SearchMode::Exact);
assert_contains_doc(&hits, doc_id);
}
#[test]
fn armenian_term_matches_with_punctuation() {
let mut index = InMemoryIndex::default();
let doc_id = "doc-hy-punct";
index.add_doc(INDEX, doc_id, "բարեւ, աշխարհ", true);
let hits = index.search_with_mode(INDEX, "բարեւ", SearchMode::Exact);
assert_contains_doc(&hits, doc_id);
}
#[test]
fn fuzzy_search_allows_alphanumeric_terms() {
let mut index = InMemoryIndex::default();
index.add_doc(INDEX, DOC_EN, "version2 stable", true);
let hits = index.search_with_mode(INDEX, "versoin2", SearchMode::Fuzzy);
assert_contains_doc(&hits, DOC_EN);
}
#[test]
fn fuzzy_search_handles_separated_query_terms() {
let mut index = InMemoryIndex::default();
index.add_doc(INDEX, DOC_EN, "memory-indexer", true);
let hits = index.search_with_mode(INDEX, "memry-indexer", SearchMode::Fuzzy);
assert_contains_doc(&hits, DOC_EN);
}
#[test]
fn fuzzy_search_handles_short_terms() {
let mut index = InMemoryIndex::default();
index.add_doc(INDEX, DOC_EN, "go go", true);
let hits = index.search_with_mode(INDEX, "go", SearchMode::Fuzzy);
assert_contains_doc(&hits, DOC_EN);
}
#[test]
fn pinyin_highlight_uses_original_positions() {
let mut index = InMemoryIndex::default();
index.add_doc(INDEX, DOC_CN, "你好世界", true);
let direct = index.get_matches(INDEX, DOC_CN, "你好");
assert!(
!direct.is_empty(),
"expected direct chinese match to have positions"
);
let pinyin = index.get_matches(INDEX, DOC_CN, "nihao");
assert_eq!(pinyin, direct);
}
#[test]
fn highlight_prefers_original_for_mixed_scripts() {
let mut index = InMemoryIndex::default();
index.add_doc(INDEX, DOC_CN, "hello 世界", true);
let hits = index.search_hits(INDEX, "hello shi");
let Some(hit) = hits.iter().find(|h| h.doc_id == DOC_CN) else {
panic!("expected hit for mixed script query");
};
let matches = index.get_matches_for_matched_terms(INDEX, DOC_CN, &hit.matched_terms);
let content = index.get_doc(INDEX, DOC_CN).unwrap();
let slices: Vec<String> = matches
.iter()
.map(|(s, e)| utf16_slice(&content, *s, *e))
.collect();
assert!(
slices.iter().any(|s| s == "hello"),
"expected original spans for mixed script matches, got {:?}",
slices
);
if slices.iter().any(|s| s.chars().any(|c| !c.is_ascii())) {
assert!(
slices.iter().any(|s| s == "世界"),
"expected CJK spans for mixed script matches, got {:?}",
slices
);
}
}
#[test]
fn pinyin_prefix_highlight_uses_original_spans() {
let mut index = InMemoryIndex::default();
index.add_doc(INDEX, DOC_CN, "你好世界", true);
let hits = index.search_hits(INDEX, "nih");
let Some(hit) = hits.iter().find(|h| h.doc_id == DOC_CN) else {
panic!("expected prefix pinyin hit");
};
let matches = index.get_matches_for_matched_terms(INDEX, DOC_CN, &hit.matched_terms);
let direct = index.get_matches(INDEX, DOC_CN, "你好");
assert_eq!(
matches, direct,
"prefix highlight should map back to original spans"
);
}
#[test]
fn pinyin_highlight_handles_trailing_ascii() {
let mut index = InMemoryIndex::with_position_encoding(PositionEncoding::Utf16);
index.add_doc(
INDEX,
DOC_CN,
"美光将在全球内存供应短缺之际退出消费级内存业务",
true,
);
let hits = index.search_hits(INDEX, "neicun");
let hit = hits
.iter()
.find(|h| h.doc_id == DOC_CN)
.unwrap_or_else(|| panic!("expected hit for neicun, got {:?}", hits));
let matches = index.get_matches_for_matched_terms(INDEX, DOC_CN, &hit.matched_terms);
assert!(
!matches.is_empty(),
"expected highlight spans for pinyin match, got none"
);
let content = index.get_doc(INDEX, DOC_CN).unwrap();
let slices: Vec<String> = matches
.iter()
.map(|(s, e)| utf16_slice(&content, *s, *e))
.collect();
assert!(
slices.iter().all(|s| s == "内存"),
"expected highlights to stay on original term, got {:?}",
slices
);
}
fn utf16_slice(content: &str, start: u32, end: u32) -> String {
let mut utf16_pos = 0u32;
let mut start_byte = 0usize;
let mut end_byte = content.len();
for (idx, ch) in content.char_indices() {
if utf16_pos == start {
start_byte = idx;
}
utf16_pos += ch.len_utf16() as u32;
if utf16_pos == end {
end_byte = idx + ch.len_utf8();
break;
}
}
content[start_byte..end_byte].to_string()
}
#[test]
fn exact_search_prefers_original_terms() {
let mut index = InMemoryIndex::default();
index.add_doc(INDEX, DOC_EN, "nihao greeting", true);
index.add_doc(INDEX, DOC_CN, "你好世界", true);
let exact_hits = index.search_with_mode(INDEX, "nihao", SearchMode::Exact);
assert_contains_doc(&exact_hits, DOC_EN);
assert!(
exact_hits.iter().all(|(id, _)| id == DOC_EN),
"expected exact search to ignore pinyin matches, got {:?}",
exact_hits
);
let auto_hits = index.search(INDEX, "nihao");
assert_contains_doc(&auto_hits, DOC_EN);
assert!(
auto_hits.iter().all(|(id, _)| id != DOC_CN),
"auto search should stop at exact matches"
);
let pinyin_hits = index.search_with_mode(INDEX, "nihao", SearchMode::Pinyin);
assert_contains_doc(&pinyin_hits, DOC_CN);
}
#[test]
fn japanese_ngram_search() {
let mut index = InMemoryIndex::default();
index.add_doc(INDEX, DOC_JP, "こんにちは世界", true);
let hits = index.search(INDEX, "こん");
assert_contains_doc(&hits, DOC_JP);
let matches = index.get_matches(INDEX, DOC_JP, "こん");
assert!(
!matches.is_empty(),
"expected offsets for japanese ngram matches"
);
}
#[test]
fn kanji_adjacent_to_kana_skips_pinyin() {
let mut index = InMemoryIndex::default();
index.add_doc(INDEX, DOC_JP, "東京へようこそ", true);
let hits = index.search_with_mode(INDEX, "dongjing", SearchMode::Pinyin);
assert!(
hits.is_empty(),
"kanji near kana should not derive pinyin, got {:?}",
hits
);
}
#[test]
fn exact_search_applies_minimum_should_match() {
let mut index = InMemoryIndex::default();
index.add_doc(INDEX, "doc-2-terms", "apple banana", true);
index.add_doc(INDEX, "doc-3-terms", "apple banana cherry", true);
index.add_doc(INDEX, "doc-1-term", "apple", true);
let hits = index.search_with_mode(INDEX, "apple banana cherry", SearchMode::Exact);
assert_contains_doc(&hits, "doc-2-terms");
assert_contains_doc(&hits, "doc-3-terms");
assert!(
!hits.iter().any(|(id, _)| id == "doc-1-term"),
"docs below minimum_should_match should be filtered out"
);
let score_two = hits
.iter()
.find(|(id, _)| id == "doc-2-terms")
.map(|(_, s)| *s)
.unwrap();
let score_three = hits
.iter()
.find(|(id, _)| id == "doc-3-terms")
.map(|(_, s)| *s)
.unwrap();
assert!(
score_three > score_two,
"more matched terms should score higher: {} vs {}",
score_three,
score_two
);
}
#[test]
fn pinyin_polyphonic_variants_for_short_tokens() {
let mut index = InMemoryIndex::default();
index.add_doc(INDEX, DOC_CN, "重庆火锅", true);
let hits_zhong = index.search_with_mode_hits(INDEX, "zhongqing", SearchMode::Pinyin);
assert!(
hits_zhong.iter().any(|h| h.doc_id == DOC_CN),
"expected zhongqing variant to hit"
);
let hits_chong = index.search_with_mode_hits(INDEX, "chongqing", SearchMode::Pinyin);
assert!(
hits_chong.iter().any(|h| h.doc_id == DOC_CN),
"expected chongqing variant to hit"
);
let matched_terms: Vec<MatchedTerm> = hits_zhong
.into_iter()
.find(|h| h.doc_id == DOC_CN)
.map(|h| h.matched_terms)
.unwrap_or_default();
assert!(
matched_terms
.iter()
.any(|t| t.term.contains("zhongqing") || t.term.contains("chongqing")),
"expected polyphonic pinyin variants in matched_terms, got {:?}",
matched_terms
);
}
#[test]
fn get_matches_for_terms_uses_matched_terms() {
let mut index = InMemoryIndex::default();
index.add_doc(INDEX, DOC_EN, "memoryIndexer", true);
let hits = index.search_hits(INDEX, "memryindexer");
let Some(hit) = hits.iter().find(|h| h.doc_id == DOC_EN) else {
panic!("expected hit for doc");
};
assert!(
hit.matched_terms
.iter()
.any(|t| t.term == "memoryindexer" && t.domain == TermDomain::Original),
"expected matched term memoryIndexer, got {:?}",
hit.matched_terms
);
let matches = index.get_matches_for_matched_terms(INDEX, DOC_EN, &hit.matched_terms);
assert!(!matches.is_empty(), "expected matches from matched_terms");
}
#[test]
fn fullwidth_pinyin_query_hits() {
let mut index = InMemoryIndex::default();
index.add_doc(INDEX, DOC_CN, "你好世界", true);
let hits = index.search_hits(INDEX, "NIHAO");
assert!(
hits.iter().any(|h| h.doc_id == DOC_CN),
"expected full-width pinyin query to hit, got {:?}",
hits.iter()
.map(|h| (&h.doc_id, &h.matched_terms))
.collect::<Vec<_>>()
);
let matched = hits.iter().find(|h| h.doc_id == DOC_CN).and_then(|h| {
h.matched_terms
.iter()
.find(|t| t.domain == TermDomain::PinyinFull)
});
assert!(
matched.is_some(),
"expected matched pinyin full term, got {:?}",
hits.iter()
.find(|h| h.doc_id == DOC_CN)
.map(|h| h.matched_terms.clone())
);
}
#[test]
fn short_pinyin_fuzzy_hits() {
let mut index = InMemoryIndex::default();
index.add_doc(INDEX, DOC_CN, "你好", true);
let hits = index.search_hits(INDEX, "niha");
assert!(
hits.iter().any(|h| h.doc_id == DOC_CN),
"expected fuzzy pinyin hit for short query, got {:?}",
hits.iter()
.map(|h| (&h.doc_id, &h.matched_terms))
.collect::<Vec<_>>()
);
let matched = hits.iter().find(|h| h.doc_id == DOC_CN).and_then(|h| {
h.matched_terms
.iter()
.find(|t| matches!(t.domain, TermDomain::PinyinFull))
});
assert!(
matched.is_some(),
"expected matched pinyin term, got {:?}",
hits.iter()
.find(|h| h.doc_id == DOC_CN)
.map(|h| h.matched_terms.clone())
);
}
#[test]
fn non_ascii_auto_fuzzy_fallback() {
let mut index = InMemoryIndex::default();
index.add_doc(INDEX, DOC_CN, "北京大学", true);
let hits = index.search_hits(INDEX, "北景大学");
assert!(
hits.iter().any(|h| h.doc_id == DOC_CN),
"expected non-ascii fuzzy fallback to hit, got {:?}",
hits.iter()
.map(|h| (&h.doc_id, &h.matched_terms))
.collect::<Vec<_>>()
);
}
#[test]
fn mixed_script_query_hits_all_tokens() {
let mut index = InMemoryIndex::default();
index.add_doc(INDEX, DOC_CN, "hello 世界", true);
let hits = index.search_hits(INDEX, "hello 世界");
assert!(
hits.iter().any(|h| h.doc_id == DOC_CN),
"expected mixed-script query to hit doc, got {:?}",
hits.iter()
.map(|h| (&h.doc_id, &h.matched_terms))
.collect::<Vec<_>>()
);
let matched = hits
.iter()
.find(|h| h.doc_id == DOC_CN)
.map(|h| h.matched_terms.clone())
.unwrap_or_default();
assert!(
matched
.iter()
.any(|t| t.term == "hello" && t.domain == TermDomain::Original),
"expected matched original term hello, got {:?}",
matched
);
assert!(
matched.iter().any(|t| t.term == "世界"),
"expected matched CJK term 世界, got {:?}",
matched
);
}
#[test]
fn chinese_oov_fuzzy_recall() {
let mut index = InMemoryIndex::default();
index.add_doc(INDEX, DOC_CN, "明博", true);
let hits = index.search_hits(INDEX, "明搏");
assert!(
hits.iter().any(|h| h.doc_id == DOC_CN),
"expected OOV chinese fuzzy to hit, got {:?}",
hits.iter()
.map(|h| (&h.doc_id, &h.matched_terms))
.collect::<Vec<_>>()
);
}
#[test]
fn load_snapshot_restores_domains_and_lengths() {
let mut index = InMemoryIndex::default();
index.add_doc(INDEX, DOC_CN, "你好世界", true);
let snapshot = index
.get_snapshot_data(INDEX)
.expect("snapshot should exist");
let expected_total_len = snapshot.total_len;
let expected_domain_len = snapshot.domain_total_len.get(TermDomain::Original);
let mut restored = InMemoryIndex::default();
restored.load_snapshot(INDEX, snapshot);
let hits = restored.search_hits(INDEX, "nihap");
assert!(
hits.iter().any(|hit| hit.doc_id == DOC_CN),
"expected restored index to serve pinyin fuzzy hits"
);
let restored_state = restored
.indexes
.get(INDEX)
.expect("restored index state should exist");
assert_eq!(restored_state.total_len, expected_total_len);
assert_eq!(
restored_state.domain_total_len.get(TermDomain::Original),
expected_domain_len
);
}
#[test]
fn has_unpersisted_changes_tracks_dirty_and_deleted() {
let mut index = InMemoryIndex::default();
assert!(!index.has_unpersisted_changes(None));
index.add_doc(INDEX, DOC_EN, "pending doc", true);
assert!(index.has_unpersisted_changes(Some(INDEX)));
assert!(index.has_unpersisted_changes(None));
index.take_dirty_and_deleted();
assert!(!index.has_unpersisted_changes(Some(INDEX)));
assert!(!index.has_unpersisted_changes(None));
index.remove_doc(INDEX, DOC_EN);
assert!(index.has_unpersisted_changes(Some(INDEX)));
assert!(index.has_unpersisted_changes(None));
}
#[test]
fn load_snapshot_clears_pending_flags() {
let mut index = InMemoryIndex::default();
index.add_doc(INDEX, DOC_EN, "snapshot doc", true);
let snapshot = index
.get_snapshot_data(INDEX)
.expect("snapshot should exist");
assert!(index.has_unpersisted_changes(Some(INDEX)));
index.load_snapshot(INDEX, snapshot);
assert!(
!index.has_unpersisted_changes(Some(INDEX)),
"loading a snapshot should reset pending persistence markers"
);
}
#[test]
fn persist_if_dirty_skips_when_clean() {
let mut index = InMemoryIndex::default();
let mut called = false;
let persisted = index
.persist_if_dirty(INDEX, |_snapshot| -> Result<(), ()> {
called = true;
Ok(())
})
.unwrap();
assert!(!persisted, "clean index should skip persistence");
assert!(!called, "callback should not run when skipped");
}
#[test]
fn persist_if_dirty_persists_and_marks_clean_on_success() {
let mut index = InMemoryIndex::default();
index.add_doc(INDEX, DOC_EN, "persist me", true);
let mut called = false;
let persisted = index
.persist_if_dirty(INDEX, |snapshot| -> Result<(), ()> {
called = true;
assert_eq!(snapshot.docs.len(), 1, "snapshot should include doc");
Ok(())
})
.unwrap();
assert!(persisted, "dirty index should persist");
assert!(called, "callback should run on persistence");
assert!(
!index.has_unpersisted_changes(Some(INDEX)),
"successful persist should mark index clean"
);
}
#[test]
fn persist_if_dirty_keeps_pending_on_error() {
let mut index = InMemoryIndex::default();
index.add_doc(INDEX, DOC_EN, "persist error", true);
let err = index
.persist_if_dirty(INDEX, |_snapshot| -> Result<(), &'static str> {
Err("boom")
})
.unwrap_err();
assert_eq!(err, "boom");
assert!(
index.has_unpersisted_changes(Some(INDEX)),
"failed persist should leave index dirty"
);
}
#[test]
fn fuzzy_msm_filters_insufficient_matches() {
let mut index = InMemoryIndex::default();
index.add_doc(INDEX, "doc-long", "apple banana", true);
index.add_doc(INDEX, "doc-short", "apple", true);
let hits = index.search_with_mode_hits(INDEX, "applr banaan", SearchMode::Fuzzy);
assert!(
hits.iter().any(|h| h.doc_id == "doc-long"),
"expected fuzzy msm to keep doc with both terms, got {:?}",
hits
);
assert!(
hits.iter().all(|h| h.doc_id != "doc-short"),
"docs below min_should_match should be filtered out: {:?}",
hits
);
}
#[test]
fn short_cjk_fuzzy_recall_uses_2gram() {
let mut index = InMemoryIndex::default();
index.add_doc(INDEX, "doc-short-cjk", "方案", true);
let hits = index.search_hits(INDEX, "方桉");
assert!(
hits.iter().any(|h| h.doc_id == "doc-short-cjk"),
"expected 2-gram fuzzy recall for short CJK tokens, got {:?}",
hits
);
}
#[test]
fn dictionary_load_and_fallback() {
let dir = tempdir().unwrap();
let path = dir.path().join("dict.json");
let mut entries = HashSet::new();
entries.insert("こんにちは".to_string());
let config = DictionaryConfig {
japanese: Some(ScriptDictionary {
version: Some("v1".to_string()),
entries,
}),
hangul: None,
};
std::fs::write(&path, serde_json::to_vec(&config).unwrap()).unwrap();
let loaded: DictionaryConfig =
serde_json::from_slice(&std::fs::read(&path).unwrap()).expect("should deserialize");
let mut index = InMemoryIndex::with_dictionary_config(loaded.clone());
index.add_doc(INDEX, DOC_JP, "こんにちは世界", true);
let hits = index.search_with_mode_hits(INDEX, "こんにちは", SearchMode::Exact);
assert!(
hits.iter().any(|h| h.doc_id == DOC_JP),
"expected dictionary-backed search hit, got {:?}",
hits
);
let mut fallback_index = InMemoryIndex::default();
fallback_index.add_doc(INDEX, DOC_JP, "こんにちは世界", true);
let fallback_hits =
fallback_index.search_with_mode_hits(INDEX, "こんにちは", SearchMode::Exact);
assert!(
fallback_hits.iter().any(|h| h.doc_id == DOC_JP),
"expected fallback tokenization to still recall doc, got {:?}",
fallback_hits
);
}
#[test]
fn id_like_tokens_match_exact() {
let mut index = InMemoryIndex::default();
let doc_id = "doc-id";
let id_like = "IKPeA9Zu9eo_pXlKWVFcf";
index.add_doc(INDEX, doc_id, id_like, true);
let hits = index.search_with_mode_hits(INDEX, id_like, SearchMode::Exact);
assert!(
hits.iter().any(|h| h.doc_id == doc_id),
"expected exact search to hit id-like token, got {:?}",
hits
);
}
}