use std::path::Path;
use tantivy::{
collector::TopDocs,
doc,
query::{BooleanQuery, FuzzyTermQuery, Occur, Query, QueryParser},
schema::{Term, Value},
};
use crate::models::{Event, Location, Party};
use crate::Result;
pub mod index;
pub mod query;
pub use index::{EventIndex, EventIndexSchema, IndexStats};
pub struct SearchEngine {
index: EventIndex,
}
impl SearchEngine {
pub fn new<P: AsRef<Path>>(index_path: P) -> Result<Self> {
Ok(Self {
index: EventIndex::create_or_open(index_path)?,
})
}
pub fn index_event(&self, event: &Event) -> Result<()> {
let mut writer = self.index.writer(50)?;
let s = self.index.schema();
let d = build_doc(event, s);
writer
.add_document(d)
.map_err(|e| crate::Error::Search(format!("add_document: {e}")))?;
writer
.commit()
.map_err(|e| crate::Error::Search(format!("commit: {e}")))?;
Ok(())
}
pub fn index_events(&self, events: &[Event]) -> Result<()> {
let mut writer = self.index.writer(100)?;
let s = self.index.schema();
for event in events {
writer
.add_document(build_doc(event, s))
.map_err(|e| crate::Error::Search(format!("add_document: {e}")))?;
}
writer
.commit()
.map_err(|e| crate::Error::Search(format!("commit: {e}")))?;
Ok(())
}
pub fn search(&self, query_str: &str, limit: usize) -> Result<Vec<String>> {
let searcher = self.index.reader().searcher();
let s = self.index.schema();
let parser = QueryParser::for_index(
self.index.index(),
vec![
s.name,
s.alternate_names,
s.description,
s.keywords,
s.organizer_name,
s.performer_name,
s.identifier_value,
],
);
let query = parser
.parse_query(query_str)
.map_err(|e| crate::Error::Search(format!("parse_query: {e}")))?;
let top = searcher
.search(&query, &TopDocs::with_limit(limit))
.map_err(|e| crate::Error::Search(format!("search: {e}")))?;
Ok(extract_ids(&searcher, s.id, &top))
}
pub fn fuzzy_search(&self, query_str: &str, limit: usize) -> Result<Vec<String>> {
let searcher = self.index.reader().searcher();
let s = self.index.schema();
let term = Term::from_field_text(s.name, query_str);
let query = FuzzyTermQuery::new(term, 2, true);
let top = searcher
.search(&query, &TopDocs::with_limit(limit))
.map_err(|e| crate::Error::Search(format!("fuzzy_search: {e}")))?;
Ok(extract_ids(&searcher, s.id, &top))
}
pub fn search_by_date_range(
&self,
from_yyyy_mm_dd: Option<&str>,
to_yyyy_mm_dd: Option<&str>,
limit: usize,
) -> Result<Vec<String>> {
let searcher = self.index.reader().searcher();
let s = self.index.schema();
let from = from_yyyy_mm_dd.unwrap_or("*");
let to = to_yyyy_mm_dd.unwrap_or("*");
let query_str = format!("start_date:[{from} TO {to}]");
let parser = QueryParser::for_index(self.index.index(), vec![s.start_date]);
let query = parser
.parse_query(&query_str)
.map_err(|e| crate::Error::Search(format!("parse range: {e}")))?;
let top = searcher
.search(&query, &TopDocs::with_limit(limit))
.map_err(|e| crate::Error::Search(format!("search range: {e}")))?;
Ok(extract_ids(&searcher, s.id, &top))
}
pub fn search_by_name_and_date(
&self,
name: &str,
date_yyyy_mm_dd: Option<&str>,
limit: usize,
) -> Result<Vec<String>> {
let searcher = self.index.reader().searcher();
let s = self.index.schema();
let name_term = Term::from_field_text(s.name, name);
let name_q: Box<dyn Query> = Box::new(FuzzyTermQuery::new(name_term, 2, true));
let final_q: Box<dyn Query> = if let Some(date) = date_yyyy_mm_dd {
let parser = QueryParser::for_index(self.index.index(), vec![s.start_date]);
if let Ok(date_q) = parser.parse_query(date) {
Box::new(BooleanQuery::new(vec![
(Occur::Must, name_q),
(Occur::Should, date_q),
]))
} else {
name_q
}
} else {
name_q
};
let top = searcher
.search(final_q.as_ref(), &TopDocs::with_limit(limit))
.map_err(|e| crate::Error::Search(format!("search name+date: {e}")))?;
Ok(extract_ids(&searcher, s.id, &top))
}
pub fn delete_event(&self, event_id: &str) -> Result<()> {
let mut writer = self.index.writer(50)?;
let s = self.index.schema();
let term = Term::from_field_text(s.id, event_id);
writer.delete_term(term);
writer
.commit()
.map_err(|e| crate::Error::Search(format!("commit delete: {e}")))?;
Ok(())
}
pub fn stats(&self) -> Result<IndexStats> {
self.index.stats()
}
pub fn optimize(&self) -> Result<()> {
self.index.optimize()
}
pub fn reload(&self) -> Result<()> {
self.index.reload()
}
}
fn build_doc(event: &Event, s: &EventIndexSchema) -> tantivy::TantivyDocument {
let alternate_names = event.alternate_names.join(" ");
let keywords = event.keywords.join(" ");
let languages = event.in_language.join(" ");
let identifier_values = event
.identifiers
.iter()
.map(|id| id.value.as_str())
.collect::<Vec<_>>()
.join(" ");
let (loc_name, loc_city, loc_country, loc_url) = summarize_locations(&event.location);
let organizer_name = parties_names(&event.organizers);
let performer_name = parties_names(&event.performers);
let start_date = event.start_date.format("%Y-%m-%d").to_string();
let end_date = event
.end_date
.map(|d| d.format("%Y-%m-%d").to_string())
.unwrap_or_default();
let description = event.description.clone().unwrap_or_default();
doc!(
s.id => event.id.to_string(),
s.name => event.name.clone(),
s.alternate_names => alternate_names,
s.description => description,
s.keywords => keywords,
s.start_date => start_date,
s.end_date => end_date,
s.event_status => serde_plain(&event.event_status),
s.event_attendance_mode => serde_plain(&event.event_attendance_mode),
s.event_type => serde_plain(&event.event_type),
s.in_language => languages,
s.location_name => loc_name,
s.location_city => loc_city,
s.location_country => loc_country,
s.location_url => loc_url,
s.organizer_name => organizer_name,
s.performer_name => performer_name,
s.identifier_value => identifier_values,
s.active => if event.active { "true" } else { "false" },
)
}
fn parties_names(parties: &[Party]) -> String {
parties
.iter()
.map(|p| p.name.as_str())
.collect::<Vec<_>>()
.join(" ")
}
fn summarize_locations(locs: &[Location]) -> (String, String, String, String) {
let mut names = Vec::new();
let mut cities = Vec::new();
let mut countries = Vec::new();
let mut urls = Vec::new();
for l in locs {
match l {
Location::Place(p) => {
names.push(p.name.clone());
if let Some(ref addr) = p.address {
if let Some(c) = &addr.city {
cities.push(c.clone());
}
if let Some(c) = &addr.country {
countries.push(c.clone());
}
}
if let Some(u) = &p.url {
urls.push(u.clone());
}
}
Location::PostalAddress(addr) => {
if let Some(c) = &addr.city {
cities.push(c.clone());
}
if let Some(c) = &addr.country {
countries.push(c.clone());
}
}
Location::Virtual(v) => {
if let Some(n) = &v.name {
names.push(n.clone());
}
urls.push(v.url.clone());
}
Location::Text { value } => names.push(value.clone()),
}
}
(
names.join(" "),
cities.join(" "),
countries.join(" "),
urls.join(" "),
)
}
fn serde_plain<T: serde::Serialize>(v: &T) -> String {
serde_json::to_value(v)
.ok()
.and_then(|x| x.as_str().map(|s| s.to_string()))
.unwrap_or_default()
}
fn extract_ids(
searcher: &tantivy::Searcher,
id_field: tantivy::schema::Field,
docs: &[(f32, tantivy::DocAddress)],
) -> Vec<String> {
let mut ids = Vec::new();
for (_, addr) in docs {
let doc: tantivy::TantivyDocument = match searcher.doc(*addr) {
Ok(d) => d,
Err(_) => continue,
};
if let Some(v) = doc.get_first(id_field) {
if let Some(s) = v.as_str() {
ids.push(s.to_string());
}
}
}
ids
}
#[cfg(test)]
mod tests {
use super::*;
use crate::models::{Event, EventType, Party, PartyKind};
use chrono::{TimeZone, Utc};
use tempfile::TempDir;
fn evt(name: &str, when: chrono::DateTime<Utc>) -> Event {
Event::new(name, when)
}
#[test]
fn index_and_search_event_by_name() {
let tmp = TempDir::new().unwrap();
let engine = SearchEngine::new(tmp.path()).unwrap();
let when = Utc.with_ymd_and_hms(2026, 3, 1, 9, 0, 0).unwrap();
let event = evt("Annual Conference", when);
engine.index_event(&event).unwrap();
engine.reload().unwrap();
let ids = engine.search("Conference", 10).unwrap();
assert_eq!(ids.len(), 1);
assert_eq!(ids[0], event.id.to_string());
}
#[test]
fn fuzzy_search_finds_typo() {
let tmp = TempDir::new().unwrap();
let engine = SearchEngine::new(tmp.path()).unwrap();
let when = Utc.with_ymd_and_hms(2026, 3, 1, 9, 0, 0).unwrap();
let event = evt("Hackathon", when);
engine.index_event(&event).unwrap();
engine.reload().unwrap();
let ids = engine.fuzzy_search("Hakathon", 10).unwrap();
assert_eq!(ids.len(), 1);
assert_eq!(ids[0], event.id.to_string());
}
#[test]
fn search_finds_organizer_name() {
let tmp = TempDir::new().unwrap();
let engine = SearchEngine::new(tmp.path()).unwrap();
let when = Utc.with_ymd_and_hms(2026, 3, 1, 9, 0, 0).unwrap();
let mut event = evt("Talk", when);
event.event_type = EventType::Conference;
event.organizers.push(Party {
kind: PartyKind::Organization,
id: None,
name: "Cal Performances".into(),
email: None,
url: None,
});
engine.index_event(&event).unwrap();
engine.reload().unwrap();
let ids = engine.search("Performances", 10).unwrap();
assert_eq!(ids.len(), 1);
}
#[test]
fn delete_event_drops_from_index() {
let tmp = TempDir::new().unwrap();
let engine = SearchEngine::new(tmp.path()).unwrap();
let when = Utc.with_ymd_and_hms(2026, 3, 1, 9, 0, 0).unwrap();
let event = evt("Workshop", when);
engine.index_event(&event).unwrap();
engine.reload().unwrap();
assert_eq!(engine.stats().unwrap().num_docs, 1);
engine.delete_event(&event.id.to_string()).unwrap();
engine.reload().unwrap();
assert_eq!(engine.search("Workshop", 10).unwrap().len(), 0);
}
#[test]
fn bulk_index() {
let tmp = TempDir::new().unwrap();
let engine = SearchEngine::new(tmp.path()).unwrap();
let when = Utc.with_ymd_and_hms(2026, 3, 1, 9, 0, 0).unwrap();
let events = vec![
evt("One", when),
evt("Two", when),
evt("Three", when),
];
engine.index_events(&events).unwrap();
engine.reload().unwrap();
assert_eq!(engine.stats().unwrap().num_docs, 3);
}
}