1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
// SPDX-License-Identifier: Apache-2.0 OR MIT
//! Public `Query` for `Memory::search`.
//!
//! Three modes:
//! - `Query::semantic(text)` — top-K cosine over vector indices.
//! - `Query::text(text)` — Tantivy parsed query over lexical indices.
//! - `Query::hybrid(text)` — RRF fusion of the two; alpha-weighted.
use crate::memory::{MemoryId, MemoryRef};
use crate::partition::PartitionPath;
/// Search mode.
#[derive(Debug, Clone, PartialEq)]
#[non_exhaustive]
pub enum QueryMode {
/// Semantic only.
Semantic,
/// Lexical only.
Text,
/// Reciprocal-rank fusion of both. `alpha` weights the semantic side; the
/// text side gets `1 - alpha`. Default `0.6`.
Hybrid {
/// Semantic weight in `[0, 1]`.
alpha: f32,
},
}
/// A search query.
#[derive(Debug, Clone)]
#[non_exhaustive]
pub struct Query {
pub(crate) text: String,
pub(crate) mode: QueryMode,
pub(crate) within: Option<PartitionPath>,
pub(crate) precomputed_embedding: Option<Vec<f32>>,
/// Plan 10: opt-in hierarchical descent.
pub(crate) hierarchical: bool,
/// Plan 10: minimum cosine score (after dedup) for a child node to be
/// descended into during hierarchical search. `None` means no pruning.
pub(crate) prune_threshold: Option<f32>,
/// Plan 10: at each level, descend into top `k * descend_factor` children.
pub(crate) descend_factor: u32,
}
impl Query {
/// Pure semantic search.
#[must_use]
pub fn semantic(text: impl Into<String>) -> Self {
Query {
text: text.into(),
mode: QueryMode::Semantic,
within: None,
precomputed_embedding: None,
hierarchical: false,
prune_threshold: None,
descend_factor: 4,
}
}
/// Pure lexical search.
#[must_use]
pub fn text(text: impl Into<String>) -> Self {
Query {
text: text.into(),
mode: QueryMode::Text,
within: None,
precomputed_embedding: None,
hierarchical: false,
prune_threshold: None,
descend_factor: 4,
}
}
/// Hybrid (RRF) search; default `alpha = 0.6`.
#[must_use]
pub fn hybrid(text: impl Into<String>) -> Self {
Query {
text: text.into(),
mode: QueryMode::Hybrid { alpha: 0.6 },
within: None,
precomputed_embedding: None,
hierarchical: false,
prune_threshold: None,
descend_factor: 4,
}
}
/// Plan 10: opt into hierarchical descent. The search starts at the
/// configured scope (or the tenant root if unset) and descends through
/// internal nodes, scoring child summaries at each level and pruning
/// branches whose top scores fall below [`Query::prune_threshold`].
/// Leaves are searched only after the parent's index has identified
/// them as relevant. Defaults to flat (whole-tree) search when off.
#[must_use]
pub fn hierarchical(mut self) -> Self {
self.hierarchical = true;
self
}
/// Plan 10: prune children whose mid-level summary score is below `t`.
/// Default is no pruning — every child whose score made the top-K of the
/// parent's index is descended into. Clamped to `[-1, 1]` (cosine range).
#[must_use]
pub fn prune_threshold(mut self, t: f32) -> Self {
self.prune_threshold = Some(t.clamp(-1.0, 1.0));
self
}
/// Plan 10: at each internal level, descend into the top
/// `k * descend_factor` children. Higher = wider beam = slower but more
/// recall; lower = narrower beam = faster but more risk of pruning the
/// right answer. Default 4. Clamped to `>= 1`.
#[must_use]
pub fn descend_factor(mut self, n: u32) -> Self {
self.descend_factor = n.max(1);
self
}
/// Whether this query is in hierarchical mode.
#[must_use]
pub fn is_hierarchical(&self) -> bool {
self.hierarchical
}
/// Override hybrid alpha. No effect on non-hybrid queries.
#[must_use]
pub fn alpha(mut self, alpha: f32) -> Self {
if let QueryMode::Hybrid { alpha: a } = &mut self.mode {
*a = alpha.clamp(0.0, 1.0);
}
self
}
/// Restrict the search to a partition (and its descendants).
#[must_use]
pub fn within(mut self, path: PartitionPath) -> Self {
self.within = Some(path);
self
}
/// Caller hands the engine a pre-computed query vector. Used by
/// caller-owned-models pathways (Apple Foundation Models, OpenAI proxies,
/// Swift FFI consumers) where the model runs outside the library on the
/// query side.
///
/// When set, the engine bypasses its `Embedder` (if any) for the query
/// step. The vector's length must match `schema_meta.embedder_dims`;
/// mismatch surfaces from the underlying vector index. Has no effect on
/// `QueryMode::Text` (lexical search ignores the vector).
///
/// See spec § 12 (caller-owned models) and § 12.13.
#[must_use]
pub fn with_embedding(mut self, vector: Vec<f32>) -> Self {
self.precomputed_embedding = Some(vector);
self
}
/// Borrow the precomputed query vector, if any. `None` means the engine
/// must invoke its configured `Embedder` to derive the query vector.
#[must_use]
pub fn precomputed_embedding(&self) -> Option<&[f32]> {
self.precomputed_embedding.as_deref()
}
/// Borrow the query text.
#[must_use]
pub fn text_str(&self) -> &str {
&self.text
}
/// Borrow the mode.
#[must_use]
pub fn mode(&self) -> &QueryMode {
&self.mode
}
/// Borrow the partition restriction (if any).
#[must_use]
pub fn scope(&self) -> Option<&PartitionPath> {
self.within.as_ref()
}
}
/// One hit in a search result. Score is mode-specific:
/// - semantic: cosine similarity in `[-1, 1]` (mock embedder is unit-norm so usually `[0, 1]`).
/// - text: Tantivy BM25-ish raw score.
/// - hybrid: RRF score (sum of `alpha / (60 + rank)`).
#[derive(Debug, Clone, PartialEq, serde::Serialize)]
#[non_exhaustive]
pub struct SearchHit {
/// Memory.
pub r#ref: MemoryRef,
/// Score (higher = better).
pub score: f32,
}
impl SearchHit {
/// Construct.
#[must_use]
pub fn new(id: MemoryId, partition: PartitionPath, score: f32) -> Self {
SearchHit {
r#ref: MemoryRef { id, partition },
score,
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn hybrid_clamps_alpha() {
let q = Query::hybrid("x").alpha(2.0);
match q.mode() {
QueryMode::Hybrid { alpha } => assert!((alpha - 1.0).abs() < f32::EPSILON),
_ => panic!("expected hybrid"),
}
let q = Query::hybrid("x").alpha(-0.5);
match q.mode() {
QueryMode::Hybrid { alpha } => assert!(alpha.abs() < f32::EPSILON),
_ => panic!("expected hybrid"),
}
}
#[test]
fn alpha_on_non_hybrid_is_noop() {
let q = Query::semantic("x").alpha(0.1);
assert!(matches!(q.mode(), QueryMode::Semantic));
}
}