use std::collections::HashMap;
use serde::{Deserialize, Serialize};
#[derive(Clone, Debug, Serialize, Deserialize)]
struct IndexedMessage {
room_id: String,
event_id: String,
sender: String,
body: String,
body_lower: String,
timestamp: u64,
}
#[derive(Clone, Debug, PartialEq)]
pub struct SearchResult {
pub room_id: String,
pub event_id: String,
pub sender: String,
pub body: String,
pub timestamp: u64,
pub snippet: String,
}
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct SearchIndex {
messages: HashMap<String, IndexedMessage>,
room_index: HashMap<String, Vec<String>>,
#[serde(default = "default_auto_save_threshold")]
pub auto_save_threshold: usize,
#[serde(default)]
last_save_count: usize,
#[serde(skip)]
dirty: bool,
}
fn default_auto_save_threshold() -> usize {
100
}
impl Default for SearchIndex {
fn default() -> Self {
Self::new()
}
}
impl SearchIndex {
pub fn new() -> Self {
Self {
messages: HashMap::new(),
room_index: HashMap::new(),
auto_save_threshold: default_auto_save_threshold(),
last_save_count: 0,
dirty: false,
}
}
pub fn index_message(
&mut self,
room_id: &str,
event_id: &str,
sender: &str,
body: &str,
timestamp: u64,
) {
if body.trim().is_empty() {
return;
}
let entry = IndexedMessage {
room_id: room_id.to_string(),
event_id: event_id.to_string(),
sender: sender.to_string(),
body: body.to_string(),
body_lower: body.to_lowercase(),
timestamp,
};
if let Some(old) = self.messages.get(event_id) {
let old_room = old.room_id.clone();
if let Some(ids) = self.room_index.get_mut(&old_room) {
ids.retain(|id| id != event_id);
}
}
self.room_index
.entry(room_id.to_string())
.or_default()
.push(event_id.to_string());
self.messages.insert(event_id.to_string(), entry);
self.dirty = true;
}
pub fn search(&self, query: &str, room_id: Option<&str>) -> Vec<SearchResult> {
if query.trim().is_empty() {
return Vec::new();
}
let query_lower = query.to_lowercase();
let candidates: Box<dyn Iterator<Item = &IndexedMessage> + '_> = match room_id {
Some(rid) => {
let event_ids = self.room_index.get(rid);
match event_ids {
Some(ids) => Box::new(
ids.iter().filter_map(|eid| self.messages.get(eid)),
),
None => return Vec::new(),
}
}
None => Box::new(self.messages.values()),
};
let mut results: Vec<SearchResult> = candidates
.filter(|msg| msg.body_lower.contains(&query_lower))
.map(|msg| {
let snippet = build_snippet(&msg.body, &query_lower);
SearchResult {
room_id: msg.room_id.clone(),
event_id: msg.event_id.clone(),
sender: msg.sender.clone(),
body: msg.body.clone(),
timestamp: msg.timestamp,
snippet,
}
})
.collect();
results.sort_by(|a, b| b.timestamp.cmp(&a.timestamp));
results
}
pub fn clear_room(&mut self, room_id: &str) {
if let Some(event_ids) = self.room_index.remove(room_id) {
for eid in &event_ids {
self.messages.remove(eid);
}
self.dirty = true;
}
}
pub fn message_count(&self) -> usize {
self.messages.len()
}
pub fn indexed_room_ids(&self) -> Vec<String> {
self.room_index
.iter()
.filter(|(_, ids)| !ids.is_empty())
.map(|(rid, _)| rid.clone())
.collect()
}
pub fn save_to_file(&mut self, path: &std::path::Path) -> Result<(), String> {
let json = serde_json::to_string(self).map_err(|e| {
let err = e.to_string();
format!("Failed to serialize search index: {}", err)
})?;
if let Some(parent) = path.parent() {
std::fs::create_dir_all(parent).map_err(|e| {
let err = e.to_string();
format!("Failed to create directory for search index: {}", err)
})?;
}
std::fs::write(path, json).map_err(|e| {
let err = e.to_string();
format!("Failed to write search index file: {}", err)
})?;
self.last_save_count = self.messages.len();
self.dirty = false;
tracing::debug!("Search index saved to {:?} ({} messages)", path, self.last_save_count);
Ok(())
}
pub fn load_from_file(path: &std::path::Path) -> Result<Self, String> {
if !path.exists() {
tracing::debug!("No search index file at {:?}, starting fresh", path);
return Ok(Self::new());
}
let json = std::fs::read_to_string(path).map_err(|e| {
let err = e.to_string();
format!("Failed to read search index file: {}", err)
})?;
let mut index: SearchIndex = serde_json::from_str(&json).map_err(|e| {
let err = e.to_string();
format!("Failed to deserialize search index: {}", err)
})?;
index.last_save_count = index.messages.len();
index.dirty = false;
tracing::debug!("Search index loaded from {:?} ({} messages)", path, index.messages.len());
Ok(index)
}
pub fn mark_dirty(&mut self) {
self.dirty = true;
}
pub fn needs_save(&self) -> bool {
if !self.dirty {
return false;
}
if self.auto_save_threshold == 0 {
return true;
}
let messages_since_save = self.messages.len().saturating_sub(self.last_save_count);
messages_since_save >= self.auto_save_threshold
}
}
fn build_snippet(body: &str, query_lower: &str) -> String {
let body_lower = body.to_lowercase();
let match_start = match body_lower.find(query_lower) {
Some(pos) => pos,
None => return body.to_string(),
};
let context_chars: usize = 40;
let match_end = match_start + query_lower.len();
let snippet_start = if match_start > context_chars {
let desired = match_start - context_chars;
body[desired..match_start]
.find(' ')
.map(|pos| desired + pos + 1)
.unwrap_or(desired)
} else {
0
};
let snippet_end = if match_end + context_chars < body.len() {
let desired = match_end + context_chars;
body[match_end..desired]
.rfind(' ')
.map(|pos| match_end + pos)
.unwrap_or(desired)
} else {
body.len()
};
let prefix = if snippet_start > 0 { "..." } else { "" };
let suffix = if snippet_end < body.len() { "..." } else { "" };
let before = &body[snippet_start..match_start];
let matched = &body[match_start..match_end];
let after = &body[match_end..snippet_end];
format!(
"{}{}<mark>{}</mark>{}{}",
prefix, before, matched, after, suffix
)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_index_and_search() {
let mut index = SearchIndex::new();
index.index_message("!room1:example.com", "$evt1", "@alice:example.com", "Hello world", 1000);
index.index_message("!room1:example.com", "$evt2", "@bob:example.com", "Goodbye world", 2000);
index.index_message("!room2:example.com", "$evt3", "@alice:example.com", "Hello again", 3000);
assert_eq!(index.message_count(), 3);
let results = index.search("hello", None);
assert_eq!(results.len(), 2);
assert_eq!(results[0].event_id, "$evt3");
assert_eq!(results[1].event_id, "$evt1");
}
#[test]
fn test_search_by_room() {
let mut index = SearchIndex::new();
index.index_message("!room1:example.com", "$evt1", "@alice:example.com", "Hello world", 1000);
index.index_message("!room2:example.com", "$evt2", "@bob:example.com", "Hello there", 2000);
let results = index.search("hello", Some("!room1:example.com"));
assert_eq!(results.len(), 1);
assert_eq!(results[0].room_id, "!room1:example.com");
}
#[test]
fn test_clear_room() {
let mut index = SearchIndex::new();
index.index_message("!room1:example.com", "$evt1", "@alice:example.com", "Hello", 1000);
index.index_message("!room1:example.com", "$evt2", "@bob:example.com", "World", 2000);
index.index_message("!room2:example.com", "$evt3", "@alice:example.com", "Test", 3000);
assert_eq!(index.message_count(), 3);
index.clear_room("!room1:example.com");
assert_eq!(index.message_count(), 1);
}
#[test]
fn test_empty_query_returns_nothing() {
let mut index = SearchIndex::new();
index.index_message("!room1:example.com", "$evt1", "@alice:example.com", "Hello", 1000);
let results = index.search("", None);
assert!(results.is_empty());
}
#[test]
fn test_empty_body_not_indexed() {
let mut index = SearchIndex::new();
index.index_message("!room1:example.com", "$evt1", "@alice:example.com", "", 1000);
index.index_message("!room1:example.com", "$evt2", "@alice:example.com", " ", 2000);
assert_eq!(index.message_count(), 0);
}
#[test]
fn test_snippet_has_mark_tags() {
let mut index = SearchIndex::new();
index.index_message("!room1:example.com", "$evt1", "@alice:example.com", "Hello world", 1000);
let results = index.search("world", None);
assert_eq!(results.len(), 1);
assert!(results[0].snippet.contains("<mark>world</mark>"));
}
#[test]
fn test_case_insensitive() {
let mut index = SearchIndex::new();
index.index_message("!room1:example.com", "$evt1", "@alice:example.com", "Hello World", 1000);
let results = index.search("HELLO", None);
assert_eq!(results.len(), 1);
assert!(results[0].snippet.contains("<mark>Hello</mark>"));
}
#[test]
fn test_deduplication() {
let mut index = SearchIndex::new();
index.index_message("!room1:example.com", "$evt1", "@alice:example.com", "Hello v1", 1000);
index.index_message("!room1:example.com", "$evt1", "@alice:example.com", "Hello v2", 2000);
assert_eq!(index.message_count(), 1);
let results = index.search("Hello", None);
assert_eq!(results.len(), 1);
assert_eq!(results[0].body, "Hello v2");
}
#[test]
fn test_dirty_tracking() {
let mut index = SearchIndex::new();
assert!(!index.needs_save());
index.index_message("!room1:example.com", "$evt1", "@alice:example.com", "Hello", 1000);
assert!(index.dirty);
assert!(!index.needs_save());
index.auto_save_threshold = 1;
assert!(index.needs_save());
}
#[test]
fn test_mark_dirty() {
let mut index = SearchIndex::new();
assert!(!index.dirty);
index.mark_dirty();
assert!(index.dirty);
}
#[test]
fn test_save_and_load() {
let dir = std::env::temp_dir().join("synpad_test_search_index");
let _ = std::fs::create_dir_all(&dir);
let path = dir.join("test_index.json");
let mut index = SearchIndex::new();
index.index_message("!room1:example.com", "$evt1", "@alice:example.com", "Hello world", 1000);
index.index_message("!room2:example.com", "$evt2", "@bob:example.com", "Test message", 2000);
index.save_to_file(&path).expect("save should succeed");
assert!(!index.needs_save());
assert!(!index.dirty);
let loaded = SearchIndex::load_from_file(&path).expect("load should succeed");
assert_eq!(loaded.message_count(), 2);
let results = loaded.search("hello", None);
assert_eq!(results.len(), 1);
assert_eq!(results[0].event_id, "$evt1");
let _ = std::fs::remove_file(&path);
let _ = std::fs::remove_dir(&dir);
}
#[test]
fn test_load_nonexistent_file() {
let path = std::path::Path::new("/tmp/nonexistent_synpad_search_index_xyz.json");
let index = SearchIndex::load_from_file(path).expect("should return empty index");
assert_eq!(index.message_count(), 0);
}
}