#[cfg(feature = "lex")]
use std::collections::{BTreeSet, HashSet};
#[cfg(feature = "lex")]
use std::time::Instant;
use crate::memvid::lifecycle::Memvid;
use crate::types::{FrameId, SearchEngineKind, SearchParams, SearchRequest, SearchResponse};
use crate::{MemvidError, Result};
mod api;
mod builders;
#[cfg(feature = "lex")]
mod fallback;
pub(crate) mod helpers;
#[cfg(feature = "lex")]
mod tantivy;
#[cfg(any(feature = "lex", feature = "temporal_track"))]
mod time_filter;
pub use api::{
DEFAULT_MAX_INDEX_PAYLOAD, is_frame_text_indexable, is_text_indexable_mime, max_index_payload,
};
#[cfg(feature = "lex")]
use fallback::{search_with_filters_only, search_with_lex_fallback};
use helpers::{build_context, empty_search_response};
#[cfg(feature = "lex")]
pub use tantivy::parse_content_date_to_timestamp;
#[cfg(feature = "lex")]
use tantivy::try_tantivy_search;
#[cfg(feature = "temporal_track")]
pub use time_filter::frame_ids_for_temporal_filter;
#[cfg(feature = "lex")]
use time_filter::frame_ids_in_date_range;
#[cfg(feature = "lex")]
impl Memvid {
pub fn search(&mut self, request: SearchRequest) -> Result<SearchResponse> {
if !self.lex_enabled {
return Err(MemvidError::LexNotEnabled);
}
if self.tantivy.is_none() {
self.init_tantivy()?;
}
let start_time = Instant::now();
let parsed = crate::search::parse_query(&request.query)?;
let mut query_tokens = parsed.text_tokens();
query_tokens.retain(|token| !token.trim().is_empty());
query_tokens = query_tokens
.into_iter()
.map(|s| s.as_str().to_ascii_lowercase())
.collect::<BTreeSet<_>>()
.into_iter()
.collect();
let has_text_terms = !query_tokens.is_empty();
let has_field_terms = parsed.contains_field_terms();
if !has_text_terms && !has_field_terms {
return Err(MemvidError::InvalidQuery {
reason: "query must include at least one search term or field filter".into(),
});
}
let params = SearchParams {
top_k: request.top_k,
snippet_chars: request.snippet_chars,
cursor: request.cursor.clone(),
};
let date_range = parsed.required_date_range();
#[allow(unused_mut)]
let mut candidate_filter: Option<HashSet<FrameId>> = if let Some(ref range) = date_range {
if range.is_empty() {
let elapsed = start_time.elapsed().as_millis();
return Ok(empty_search_response(
request.query.clone(),
params.clone(),
elapsed,
SearchEngineKind::Tantivy,
));
}
match frame_ids_in_date_range(self, range)? {
Some(ids) => {
if ids.is_empty() {
let elapsed = start_time.elapsed().as_millis();
return Ok(empty_search_response(
request.query.clone(),
params.clone(),
elapsed,
SearchEngineKind::Tantivy,
));
}
Some(ids.into_iter().collect())
}
None => None,
}
} else {
None
};
#[cfg(feature = "temporal_track")]
if let Some(ref temporal_filter) = request.temporal {
if !temporal_filter.is_empty() {
match time_filter::frame_ids_for_temporal_filter(self, temporal_filter)? {
Some(ids) => {
if ids.is_empty() {
let elapsed = start_time.elapsed().as_millis();
return Ok(empty_search_response(
request.query.clone(),
params.clone(),
elapsed,
SearchEngineKind::Tantivy,
));
}
let new_set: HashSet<FrameId> = ids.into_iter().collect();
candidate_filter = match candidate_filter {
Some(existing) => {
let filtered: HashSet<FrameId> = existing
.into_iter()
.filter(|id| new_set.contains(id))
.collect();
if filtered.is_empty() {
let elapsed = start_time.elapsed().as_millis();
return Ok(empty_search_response(
request.query.clone(),
params.clone(),
elapsed,
SearchEngineKind::Tantivy,
));
}
Some(filtered)
}
None => Some(new_set),
};
}
None => {}
}
}
}
if request.as_of_frame.is_some() || request.as_of_ts.is_some() {
let replay_ids = self.get_replay_frame_ids(&request)?;
if replay_ids.is_empty() {
let elapsed = start_time.elapsed().as_millis();
return Ok(empty_search_response(
request.query.clone(),
params.clone(),
elapsed,
SearchEngineKind::Tantivy,
));
}
let replay_set: HashSet<FrameId> = replay_ids.into_iter().collect();
candidate_filter = match candidate_filter {
Some(existing) => {
let filtered: HashSet<FrameId> = existing
.into_iter()
.filter(|id| replay_set.contains(id))
.collect();
if filtered.is_empty() {
let elapsed = start_time.elapsed().as_millis();
return Ok(empty_search_response(
request.query.clone(),
params.clone(),
elapsed,
SearchEngineKind::Tantivy,
));
}
Some(filtered)
}
None => Some(replay_set),
};
}
if self.has_sketches() && has_text_terms && !request.no_sketch {
let sketch_start = Instant::now();
let sketch_options = crate::SketchSearchOptions {
hamming_threshold: 32,
max_candidates: (params.top_k * 10).max(500),
min_score: 0.0,
};
let sketch_candidates =
self.find_sketch_candidates(&request.query, Some(sketch_options));
if !sketch_candidates.is_empty() {
let sketch_set: HashSet<FrameId> =
sketch_candidates.iter().map(|c| c.frame_id).collect();
tracing::debug!(
sketch_candidates = sketch_candidates.len(),
sketch_time_us = sketch_start.elapsed().as_micros(),
"sketch pre-filter applied"
);
candidate_filter = match candidate_filter {
Some(existing) => {
let filtered: HashSet<FrameId> = existing
.into_iter()
.filter(|id| sketch_set.contains(id))
.collect();
if filtered.is_empty() {
Some(sketch_set)
} else {
Some(filtered)
}
}
None => Some(sketch_set),
};
}
}
let mut response = if let Some(response) = try_tantivy_search(
self,
&parsed,
&query_tokens,
&request,
¶ms,
start_time,
candidate_filter.as_ref(),
)? {
response
} else {
self.ensure_lex_index()?;
if has_text_terms {
search_with_lex_fallback(
self,
&parsed,
&query_tokens,
&request,
¶ms,
start_time,
candidate_filter.as_ref(),
)?
} else {
search_with_filters_only(
self,
&parsed,
&request,
¶ms,
start_time,
candidate_filter.as_ref(),
)?
}
};
self.apply_acl_to_search_hits(
&mut response.hits,
request.acl_context.as_ref(),
request.acl_enforcement_mode,
)?;
if request.acl_enforcement_mode == crate::types::AclEnforcementMode::Enforce {
response.total_hits = response.hits.len();
response.context = build_context(&response.hits);
}
if self.has_logic_mesh() {
helpers::enrich_hits_with_entities(&mut response.hits, self);
}
#[cfg(feature = "replay")]
{
let result_frames: Vec<u64> = response.hits.iter().map(|h| h.frame_id).collect();
self.record_find_action(
&request.query,
&format!("{:?}", response.engine),
response.hits.len(),
result_frames,
);
}
Ok(response)
}
}
#[cfg(not(feature = "lex"))]
impl Memvid {
pub fn search(&mut self, _request: SearchRequest) -> Result<SearchResponse> {
Err(MemvidError::LexNotEnabled)
}
}