Skip to main content

nodedb_fts/backend/
traits.rs

1// SPDX-License-Identifier: Apache-2.0
2
3use nodedb_types::Surrogate;
4
5use crate::posting::Posting;
6
7/// Storage backend abstraction for the full-text search engine.
8///
9/// Origin implements this with redb (persistent). Lite implements with
10/// in-memory HashMap. All scoring, BMW, compression, and analysis logic
11/// works identically over any backend.
12///
13/// Every tenant-partitioned method takes `tid: u64` as a first-class
14/// parameter. Backends are required to isolate tenants structurally —
15/// no tenant boundary may depend on lexical-prefix ordering of a
16/// composed string key.
17///
18/// Write methods take `&self` (not `&mut self`) because:
19/// - Redb provides transactional isolation internally — concurrent writes
20///   are safe through redb's MVCC.
21/// - MemoryBackend uses interior mutability (`RefCell`) to match the same
22///   trait signature, keeping the trait uniform.
23pub trait FtsBackend {
24    /// Error type for backend operations.
25    type Error: std::fmt::Display;
26
27    /// Read the posting list for a term in a collection.
28    fn read_postings(
29        &self,
30        tid: u64,
31        collection: &str,
32        term: &str,
33    ) -> Result<Vec<Posting>, Self::Error>;
34
35    /// Write/replace the posting list for a term in a collection.
36    fn write_postings(
37        &self,
38        tid: u64,
39        collection: &str,
40        term: &str,
41        postings: &[Posting],
42    ) -> Result<(), Self::Error>;
43
44    /// Remove a term's posting list entirely.
45    fn remove_postings(&self, tid: u64, collection: &str, term: &str) -> Result<(), Self::Error>;
46
47    /// Read the document length (token count) for a document.
48    fn read_doc_length(
49        &self,
50        tid: u64,
51        collection: &str,
52        doc_id: Surrogate,
53    ) -> Result<Option<u32>, Self::Error>;
54
55    /// Write/replace the document length for a document.
56    fn write_doc_length(
57        &self,
58        tid: u64,
59        collection: &str,
60        doc_id: Surrogate,
61        length: u32,
62    ) -> Result<(), Self::Error>;
63
64    /// Remove a document's length entry.
65    fn remove_doc_length(
66        &self,
67        tid: u64,
68        collection: &str,
69        doc_id: Surrogate,
70    ) -> Result<(), Self::Error>;
71
72    /// Get all term names in a collection (for fuzzy matching).
73    fn collection_terms(&self, tid: u64, collection: &str) -> Result<Vec<String>, Self::Error>;
74
75    /// Get total document count and sum of all document lengths for a collection.
76    /// Returns `(doc_count, total_token_sum)`.
77    ///
78    /// Implementations should maintain these incrementally for O(1) lookup.
79    fn collection_stats(&self, tid: u64, collection: &str) -> Result<(u32, u64), Self::Error>;
80
81    /// Increment collection stats after indexing a document.
82    /// `doc_len` is the number of tokens in the newly indexed document.
83    fn increment_stats(&self, tid: u64, collection: &str, doc_len: u32) -> Result<(), Self::Error>;
84
85    /// Decrement collection stats after removing a document.
86    /// `doc_len` is the token count of the removed document.
87    fn decrement_stats(&self, tid: u64, collection: &str, doc_len: u32) -> Result<(), Self::Error>;
88
89    /// Read a metadata blob by sub-key (e.g., "docmap", "fieldnorms",
90    /// "analyzer", "language").
91    fn read_meta(
92        &self,
93        tid: u64,
94        collection: &str,
95        subkey: &str,
96    ) -> Result<Option<Vec<u8>>, Self::Error>;
97
98    /// Write a metadata blob by sub-key.
99    fn write_meta(
100        &self,
101        tid: u64,
102        collection: &str,
103        subkey: &str,
104        value: &[u8],
105    ) -> Result<(), Self::Error>;
106
107    /// Write a segment blob. `segment_id` is a stable per-collection
108    /// identifier (e.g., `"L{level}:{id:016x}"`).
109    fn write_segment(
110        &self,
111        tid: u64,
112        collection: &str,
113        segment_id: &str,
114        data: &[u8],
115    ) -> Result<(), Self::Error>;
116
117    /// Read a segment blob. Returns None if not found.
118    fn read_segment(
119        &self,
120        tid: u64,
121        collection: &str,
122        segment_id: &str,
123    ) -> Result<Option<Vec<u8>>, Self::Error>;
124
125    /// List all segment ids for a collection.
126    fn list_segments(&self, tid: u64, collection: &str) -> Result<Vec<String>, Self::Error>;
127
128    /// Remove a segment blob.
129    fn remove_segment(
130        &self,
131        tid: u64,
132        collection: &str,
133        segment_id: &str,
134    ) -> Result<(), Self::Error>;
135
136    /// Remove all entries for a collection. Returns count of removed entries.
137    fn purge_collection(&self, tid: u64, collection: &str) -> Result<usize, Self::Error>;
138
139    /// Remove all entries for a tenant across every collection. Returns
140    /// count of removed entries. Implementations MUST use a structural
141    /// drop (e.g., tuple range `(tid, ..)..(tid+1, ..)`) rather than a
142    /// lexical-prefix scan.
143    fn purge_tenant(&self, tid: u64) -> Result<usize, Self::Error>;
144}