Skip to main content

FtsIndex

Struct FtsIndex 

Source
pub struct FtsIndex<B>
where B: FtsBackend,
{ /* private fields */ }
Expand description

Full-text search index generic over storage backend.

Provides identical indexing, search, and highlighting logic for Origin (redb), Lite (in-memory), and WASM deployments.

Writes accumulate in an in-memory Memtable. When the memtable exceeds its threshold, it is flushed to an immutable segment stored via the backend. Queries merge the active memtable with all persisted segments.

An optional MemoryGovernor can be injected via FtsIndex::set_governor to enforce per-engine memory budgets on large allocations (compaction, segment merge, query term collection). When no governor is set, allocations proceed without budget enforcement — which is the correct behaviour for NodeDB-Lite and WASM deployments where nodedb-mem is not available.

Implementations§

Source§

impl<B> FtsIndex<B>
where B: FtsBackend,

Source

pub fn highlight( &self, text: &str, query: &str, prefix: &str, suffix: &str, ) -> String

Generate highlighted text with matched query terms wrapped in tags.

Returns the original text with each occurrence of a matched query term surrounded by prefix and suffix (e.g., <b> and </b>).

Source

pub fn offsets(&self, text: &str, query: &str) -> Vec<MatchOffset>

Return byte offsets of matched query terms in the original text.

Source§

impl<B> FtsIndex<B>
where B: FtsBackend,

Source

pub fn set_collection_analyzer( &self, tid: u64, collection: &str, analyzer_name: &str, ) -> Result<(), <B as FtsBackend>::Error>

Set the analyzer for a collection. Persists to backend metadata.

Source

pub fn set_collection_language( &self, tid: u64, collection: &str, lang_code: &str, ) -> Result<(), <B as FtsBackend>::Error>

Set the language for a collection. Persists to backend metadata.

Source

pub fn get_collection_analyzer( &self, tid: u64, collection: &str, ) -> Result<Option<String>, <B as FtsBackend>::Error>

Get the configured analyzer name for a collection.

Source

pub fn get_collection_language( &self, tid: u64, collection: &str, ) -> Result<Option<String>, <B as FtsBackend>::Error>

Get the configured language for a collection.

Source

pub fn analyze_for_collection( &self, tid: u64, collection: &str, text: &str, ) -> Result<Vec<String>, <B as FtsBackend>::Error>

Analyze text using the collection’s configured analyzer.

Falls back to the standard English analyzer if no analyzer is configured.

Source

pub fn tokenize_raw_for_collection( &self, tid: u64, collection: &str, text: &str, ) -> Result<Vec<String>, <B as FtsBackend>::Error>

Tokenize text WITHOUT stemming for fuzzy matching.

Source§

impl<B> FtsIndex<B>
where B: FtsBackend,

Source

pub fn read_fieldnorm( &self, tid: u64, collection: &str, doc_id: Surrogate, ) -> Result<Option<u32>, <B as FtsBackend>::Error>

Get the fieldnorm (SmallFloat-encoded doc length) for a doc.

Returns the decoded approximate u32 length, or None if not stored.

Source

pub fn write_fieldnorm( &self, tid: u64, collection: &str, doc_id: Surrogate, doc_length: u32, ) -> Result<(), <B as FtsBackend>::Error>

Write a fieldnorm byte for a surrogate. Grows the array if needed.

Source§

impl<B> FtsIndex<B>
where B: FtsBackend,

Source

pub fn index_stats( &self, tid: u64, collection: &str, ) -> Result<(u32, f32), <B as FtsBackend>::Error>

Get total document count and average document length for a collection.

Returns (total_docs, avg_doc_len). If the collection is empty, returns (0, 1.0) to avoid division by zero.

Source§

impl<B> FtsIndex<B>
where B: FtsBackend,

Source

pub fn put_synonym_group( &self, tid: u64, record: &SynonymGroupRecord, ) -> Result<(), <B as FtsBackend>::Error>

Persist a synonym group. Overwrites any existing group with the same name.

Source

pub fn delete_synonym_group( &self, tid: u64, name: &str, ) -> Result<bool, <B as FtsBackend>::Error>

Delete a synonym group. Returns true if it existed.

Source

pub fn get_synonym_group( &self, tid: u64, name: &str, ) -> Result<Option<SynonymGroupRecord>, <B as FtsBackend>::Error>

Read a single synonym group by name. Returns None if not found or tombstoned.

Source

pub fn list_synonym_groups( &self, tid: u64, ) -> Result<Vec<SynonymGroupRecord>, <B as FtsBackend>::Error>

List all synonym group records for a tenant.

Source

pub fn build_synonym_map_for_tenant( &self, _tid: u64, all_groups: &[SynonymGroupRecord], ) -> SynonymMap

Build an in-memory SynonymMap from a slice of synonym group records.

Each term in every group maps to all other terms in that group (bidirectional OR-expansion). Terms are analyzed with the default analyzer so synonym keys match the stemmed tokens produced at query time by search_with_mode.

Source

pub fn expand_query_with_synonyms( &self, tid: u64, tokens: Vec<String>, ) -> Result<Vec<String>, <B as FtsBackend>::Error>

Load all synonym groups for a tenant and build the expansion map.

Called at FTS query time inside search_with_mode to expand query tokens before BM25 scoring.

Source§

impl<B> FtsIndex<B>
where B: FtsBackend,

Source

pub fn new(backend: B) -> FtsIndex<B>

Create a new FTS index with the given backend and default BM25 params.

Source

pub fn with_params(backend: B, params: Bm25Params) -> FtsIndex<B>

Create a new FTS index with custom BM25 parameters.

Source

pub fn set_governor(&mut self, governor: Arc<MemoryGovernor>)

Inject a MemoryGovernor to enforce per-engine memory budgets on large allocations (compaction, merge, query). When not set, all allocations proceed without budget enforcement.

This is the correct pattern for Origin deployments. NodeDB-Lite and WASM builds should leave the governor unset (no nodedb-mem dependency).

Source

pub fn backend(&self) -> &B

Access the underlying backend.

Source

pub fn backend_mut(&mut self) -> &mut B

Mutable access to the underlying backend.

Source

pub fn memtable(&self) -> &Memtable

Access the active memtable (for LSM query merging).

Source

pub fn index_document( &self, tid: u64, collection: &str, doc_id: Surrogate, text: &str, ) -> Result<(), FtsIndexError<<B as FtsBackend>::Error>>

Index a document’s text content.

Returns Err(FtsIndexError::SurrogateOutOfRange) if doc_id is Surrogate::ZERO (the unassigned sentinel) or exceeds MAX_INDEXABLE_SURROGATE. The FTS memtable uses the surrogate’s raw u32 value as a direct array index into per-doc fieldnorm storage; values near u32::MAX would cause multi-GiB allocations. Rejecting out-of-range surrogates at this boundary is the correct fix — not a debug_assert!, which would be a silent-wrap equivalent.

Source

pub fn remove_document( &self, tid: u64, collection: &str, doc_id: Surrogate, ) -> Result<(), <B as FtsBackend>::Error>

Remove a document from the index.

Source

pub fn purge_collection( &self, tid: u64, collection: &str, ) -> Result<usize, <B as FtsBackend>::Error>

Purge all entries for a collection. Returns count of removed entries.

Source

pub fn purge_tenant(&self, tid: u64) -> Result<usize, <B as FtsBackend>::Error>

Purge all entries for a tenant across every collection.

Source§

impl<B> FtsIndex<B>
where B: FtsBackend,

Source

pub fn search( &self, tid: u64, collection: &str, query: &str, top_k: usize, fuzzy_enabled: bool, prefilter: Option<&SurrogateBitmap>, ) -> Result<Vec<TextSearchResult>, FtsIndexError<<B as FtsBackend>::Error>>

Search the index using BM25 scoring.

Supports NOT <term> and -<term> negation in the query string. Returns Err(FtsIndexError::InvalidQuery) for ill-formed queries such as NOT-only queries or unsupported parenthesised groups.

Source

pub fn search_with_mode( &self, tid: u64, collection: &str, query: &str, top_k: usize, fuzzy_enabled: bool, mode: QueryMode, prefilter: Option<&SurrogateBitmap>, ) -> Result<Vec<TextSearchResult>, FtsIndexError<<B as FtsBackend>::Error>>

Search with explicit boolean mode (AND or OR).

Supports NOT <term> and -<term> negation in the query string.

Auto Trait Implementations§

§

impl<B> !Freeze for FtsIndex<B>

§

impl<B> !RefUnwindSafe for FtsIndex<B>

§

impl<B> Send for FtsIndex<B>
where B: Send,

§

impl<B> !Sync for FtsIndex<B>

§

impl<B> Unpin for FtsIndex<B>
where B: Unpin,

§

impl<B> UnsafeUnpin for FtsIndex<B>
where B: UnsafeUnpin,

§

impl<B> UnwindSafe for FtsIndex<B>
where B: UnwindSafe,

Blanket Implementations§

Source§

impl<T> Any for T
where T: 'static + ?Sized,

Source§

fn type_id(&self) -> TypeId

Gets the TypeId of self. Read more
Source§

impl<T> ArchivePointee for T

Source§

type ArchivedMetadata = ()

The archived version of the pointer metadata for this type.
Source§

fn pointer_metadata( _: &<T as ArchivePointee>::ArchivedMetadata, ) -> <T as Pointee>::Metadata

Converts some archived metadata to the pointer metadata for itself.
Source§

impl<T> Borrow<T> for T
where T: ?Sized,

Source§

fn borrow(&self) -> &T

Immutably borrows from an owned value. Read more
Source§

impl<T> BorrowMut<T> for T
where T: ?Sized,

Source§

fn borrow_mut(&mut self) -> &mut T

Mutably borrows from an owned value. Read more
Source§

impl<T> From<T> for T

Source§

fn from(t: T) -> T

Returns the argument unchanged.

Source§

impl<T> Instrument for T

Source§

fn instrument(self, span: Span) -> Instrumented<Self>

Instruments this type with the provided Span, returning an Instrumented wrapper. Read more
Source§

fn in_current_span(self) -> Instrumented<Self>

Instruments this type with the current Span, returning an Instrumented wrapper. Read more
Source§

impl<T, U> Into<U> for T
where U: From<T>,

Source§

fn into(self) -> U

Calls U::from(self).

That is, this conversion is whatever the implementation of From<T> for U chooses to do.

Source§

impl<T> IntoEither for T

Source§

fn into_either(self, into_left: bool) -> Either<Self, Self>

Converts self into a Left variant of Either<Self, Self> if into_left is true. Converts self into a Right variant of Either<Self, Self> otherwise. Read more
Source§

fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
where F: FnOnce(&Self) -> bool,

Converts self into a Left variant of Either<Self, Self> if into_left(&self) returns true. Converts self into a Right variant of Either<Self, Self> otherwise. Read more
Source§

impl<T> LayoutRaw for T

Source§

fn layout_raw(_: <T as Pointee>::Metadata) -> Result<Layout, LayoutError>

Returns the layout of the type.
Source§

impl<T, N1, N2> Niching<NichedOption<T, N1>> for N2
where T: SharedNiching<N1, N2>, N1: Niching<T>, N2: Niching<T>,

Source§

unsafe fn is_niched(niched: *const NichedOption<T, N1>) -> bool

Returns whether the given value has been niched. Read more
Source§

fn resolve_niched(out: Place<NichedOption<T, N1>>)

Writes data to out indicating that a T is niched.
Source§

impl<T> Pointee for T

Source§

type Metadata = ()

The metadata type for pointers and references to this type.
Source§

impl<T> Same for T

Source§

type Output = T

Should always be Self
Source§

impl<SS, SP> SupersetOf<SS> for SP
where SS: SubsetOf<SP>,

Source§

fn to_subset(&self) -> Option<SS>

The inverse inclusion map: attempts to construct self from the equivalent element of its superset. Read more
Source§

fn is_in_subset(&self) -> bool

Checks if self is actually part of its subset T (and can be converted to it).
Source§

fn to_subset_unchecked(&self) -> SS

Use with care! Same as self.to_subset but without any property checks. Always succeeds.
Source§

fn from_subset(element: &SS) -> SP

The inclusion map: converts self to the equivalent element of its superset.
Source§

impl<T, U> TryFrom<U> for T
where U: Into<T>,

Source§

type Error = Infallible

The type returned in the event of a conversion error.
Source§

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

Performs the conversion.
Source§

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,

Source§

type Error = <U as TryFrom<T>>::Error

The type returned in the event of a conversion error.
Source§

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

Performs the conversion.
Source§

impl<V, T> VZip<V> for T
where V: MultiLane<T>,

Source§

fn vzip(self) -> V

Source§

impl<T> WithSubscriber for T

Source§

fn with_subscriber<S>(self, subscriber: S) -> WithDispatch<Self>
where S: Into<Dispatch>,

Attaches the provided Subscriber to this type, returning a WithDispatch wrapper. Read more
Source§

fn with_current_subscriber(self) -> WithDispatch<Self>

Attaches the current default Subscriber to this type, returning a WithDispatch wrapper. Read more
Source§

impl<T> ErasedDestructor for T
where T: 'static,

Source§

impl<T> MaybeSendSync for T