nodedb_fts/backend/traits.rs
1// SPDX-License-Identifier: Apache-2.0
2
3use nodedb_types::Surrogate;
4
5use crate::posting::Posting;
6
7/// Storage backend abstraction for the full-text search engine.
8///
9/// Origin implements this with redb (persistent). Lite implements with
10/// in-memory HashMap. All scoring, BMW, compression, and analysis logic
11/// works identically over any backend.
12///
13/// Every tenant-partitioned method takes `tid: u64` as a first-class
14/// parameter. Backends are required to isolate tenants structurally —
15/// no tenant boundary may depend on lexical-prefix ordering of a
16/// composed string key.
17///
18/// Write methods take `&self` (not `&mut self`) because:
19/// - Redb provides transactional isolation internally — concurrent writes
20/// are safe through redb's MVCC.
21/// - MemoryBackend uses interior mutability (`RefCell`) to match the same
22/// trait signature, keeping the trait uniform.
23pub trait FtsBackend {
24 /// Error type for backend operations.
25 type Error: std::fmt::Display;
26
27 /// Read the posting list for a term in a collection.
28 fn read_postings(
29 &self,
30 tid: u64,
31 collection: &str,
32 term: &str,
33 ) -> Result<Vec<Posting>, Self::Error>;
34
35 /// Write/replace the posting list for a term in a collection.
36 fn write_postings(
37 &self,
38 tid: u64,
39 collection: &str,
40 term: &str,
41 postings: &[Posting],
42 ) -> Result<(), Self::Error>;
43
44 /// Remove a term's posting list entirely.
45 fn remove_postings(&self, tid: u64, collection: &str, term: &str) -> Result<(), Self::Error>;
46
47 /// Read the document length (token count) for a document.
48 fn read_doc_length(
49 &self,
50 tid: u64,
51 collection: &str,
52 doc_id: Surrogate,
53 ) -> Result<Option<u32>, Self::Error>;
54
55 /// Write/replace the document length for a document.
56 fn write_doc_length(
57 &self,
58 tid: u64,
59 collection: &str,
60 doc_id: Surrogate,
61 length: u32,
62 ) -> Result<(), Self::Error>;
63
64 /// Remove a document's length entry.
65 fn remove_doc_length(
66 &self,
67 tid: u64,
68 collection: &str,
69 doc_id: Surrogate,
70 ) -> Result<(), Self::Error>;
71
72 /// Get all term names in a collection (for fuzzy matching).
73 fn collection_terms(&self, tid: u64, collection: &str) -> Result<Vec<String>, Self::Error>;
74
75 /// Get total document count and sum of all document lengths for a collection.
76 /// Returns `(doc_count, total_token_sum)`.
77 ///
78 /// Implementations should maintain these incrementally for O(1) lookup.
79 fn collection_stats(&self, tid: u64, collection: &str) -> Result<(u32, u64), Self::Error>;
80
81 /// Increment collection stats after indexing a document.
82 /// `doc_len` is the number of tokens in the newly indexed document.
83 fn increment_stats(&self, tid: u64, collection: &str, doc_len: u32) -> Result<(), Self::Error>;
84
85 /// Decrement collection stats after removing a document.
86 /// `doc_len` is the token count of the removed document.
87 fn decrement_stats(&self, tid: u64, collection: &str, doc_len: u32) -> Result<(), Self::Error>;
88
89 /// Read a metadata blob by sub-key (e.g., "docmap", "fieldnorms",
90 /// "analyzer", "language").
91 fn read_meta(
92 &self,
93 tid: u64,
94 collection: &str,
95 subkey: &str,
96 ) -> Result<Option<Vec<u8>>, Self::Error>;
97
98 /// Write a metadata blob by sub-key.
99 fn write_meta(
100 &self,
101 tid: u64,
102 collection: &str,
103 subkey: &str,
104 value: &[u8],
105 ) -> Result<(), Self::Error>;
106
107 /// Write a segment blob. `segment_id` is a stable per-collection
108 /// identifier (e.g., `"L{level}:{id:016x}"`).
109 fn write_segment(
110 &self,
111 tid: u64,
112 collection: &str,
113 segment_id: &str,
114 data: &[u8],
115 ) -> Result<(), Self::Error>;
116
117 /// Read a segment blob. Returns None if not found.
118 fn read_segment(
119 &self,
120 tid: u64,
121 collection: &str,
122 segment_id: &str,
123 ) -> Result<Option<Vec<u8>>, Self::Error>;
124
125 /// List all segment ids for a collection.
126 fn list_segments(&self, tid: u64, collection: &str) -> Result<Vec<String>, Self::Error>;
127
128 /// Remove a segment blob.
129 fn remove_segment(
130 &self,
131 tid: u64,
132 collection: &str,
133 segment_id: &str,
134 ) -> Result<(), Self::Error>;
135
136 /// Remove all entries for a collection. Returns count of removed entries.
137 fn purge_collection(&self, tid: u64, collection: &str) -> Result<usize, Self::Error>;
138
139 /// Remove all entries for a tenant across every collection. Returns
140 /// count of removed entries. Implementations MUST use a structural
141 /// drop (e.g., tuple range `(tid, ..)..(tid+1, ..)`) rather than a
142 /// lexical-prefix scan.
143 fn purge_tenant(&self, tid: u64) -> Result<usize, Self::Error>;
144}