1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
//! Search tool output types.
use serde::{Deserialize, Serialize};
use crate::contracts::{ContainerPath, VaultPath};
use super::input::SearchMode;
/// Match provenance for a search result.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "kebab-case")]
pub enum MatchKind {
/// Full-text match.
Fulltext,
/// Semantic/vector match.
Semantic,
/// Title match.
Title,
/// Alias match.
Alias,
/// Related-note match.
Related,
}
/// Which retrieval strategy produced an anchor.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub enum AnchorKind {
/// BM25 / lexical match — positionally precise, fragment-level.
Bm25,
/// Semantic / vector match — chunk-level with char offsets.
Semantic,
}
/// A scroll-to / highlight anchor for a specific block inside the source note.
///
/// Ports `MatchAnchor` from `obsidian-hybrid-search` (MIT licensed).
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct MatchAnchor {
/// Retrieval strategy that produced this anchor.
pub kind: AnchorKind,
/// Heading chain above the matching block (e.g. `"Section > Sub"`).
#[serde(skip_serializing_if = "Option::is_none")]
pub heading_path: Option<String>,
/// DOM-matchable text derived from the block (first 80 chars, syntax stripped).
pub match_text: String,
/// UTF-8 char offset of the block start relative to note body.
#[serde(skip_serializing_if = "Option::is_none")]
pub char_start: Option<u32>,
/// UTF-8 char offset of the block end relative to note body.
#[serde(skip_serializing_if = "Option::is_none")]
pub char_end: Option<u32>,
}
/// Search result.
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct SearchResult {
/// Vault-relative path.
pub vault_path: VaultPath,
/// Display title.
pub title: String,
/// Result snippet (with heading breadcrumb prepended when available).
pub snippet: String,
/// Result score (after scope multiplier).
pub score: f64,
/// Pre-multiplier score (before scope boost).
#[serde(skip_serializing_if = "Option::is_none")]
pub raw_score: Option<f64>,
/// Match provenance.
pub match_kind: MatchKind,
/// Resolved scope name, if applicable.
#[serde(skip_serializing_if = "Option::is_none")]
pub scope: Option<String>,
/// File modification time as RFC 3339 / ISO 8601 (`"2026-04-25T10:23:00Z"`).
#[serde(skip_serializing_if = "Option::is_none")]
pub mtime: Option<String>,
/// True when this result looks like an index/overview page.
#[serde(skip_serializing_if = "std::ops::Not::not", default)]
pub is_index: bool,
/// Source paths listed in this note's `sources:` frontmatter.
#[serde(skip_serializing_if = "Vec::is_empty", default)]
pub citations: Vec<String>,
/// Notes this result links to.
#[serde(skip_serializing_if = "Vec::is_empty", default)]
pub links: Vec<String>,
/// Notes that link to this result.
#[serde(skip_serializing_if = "Vec::is_empty", default)]
pub backlinks: Vec<String>,
/// Tags attached to the result note.
#[serde(skip_serializing_if = "Vec::is_empty", default)]
pub tags: Vec<String>,
/// Aliases attached to the result note.
#[serde(skip_serializing_if = "Vec::is_empty", default)]
pub aliases: Vec<String>,
/// Per-result match anchors (populated when `SearchInput.anchors == true`).
#[serde(skip_serializing_if = "Option::is_none")]
pub preview_anchors: Option<Vec<MatchAnchor>>,
}
/// Pipeline-stage diagnostics, populated only when verbose is requested.
///
/// All fields are optional so that short-circuited stages (fast mode, decisive
/// BM25 probe, missing rerank inference) simply omit their entry rather than
/// reporting zeroes. Mirrors qmd's stderr stage timings (qmd `cli/qmd.ts:2407`)
/// in a structured form.
#[derive(Debug, Clone, Default, PartialEq, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct SearchDiagnostics {
/// Wall-clock time spent on LLM query expansion.
#[serde(skip_serializing_if = "Option::is_none")]
pub expansion_ms: Option<u64>,
/// Top BM25 probe score that bypassed expansion (when present, the LLM
/// expansion stage was skipped entirely).
#[serde(skip_serializing_if = "Option::is_none")]
pub strong_signal_score: Option<f64>,
/// Number of candidates sent to the cross-encoder reranker.
#[serde(skip_serializing_if = "Option::is_none")]
pub rerank_candidates: Option<u32>,
/// Wall-clock time spent reranking.
#[serde(skip_serializing_if = "Option::is_none")]
pub rerank_ms: Option<u64>,
/// Graph refinement impact after lexical/semantic retrieval.
#[serde(skip_serializing_if = "Option::is_none")]
pub graph: Option<GraphSearchDiagnostics>,
}
impl SearchDiagnostics {
/// Returns `true` when no stage produced a measurement.
#[must_use]
pub const fn is_empty(&self) -> bool {
self.expansion_ms.is_none()
&& self.strong_signal_score.is_none()
&& self.rerank_candidates.is_none()
&& self.rerank_ms.is_none()
&& self.graph.is_none()
}
}
/// Search graph-refinement diagnostics.
#[derive(Debug, Clone, Default, PartialEq, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct GraphSearchDiagnostics {
/// Existing retrieval candidates whose score was boosted by graph evidence.
pub boosted_results: u32,
/// New candidates added from the persisted graph artifact.
pub expanded_results: u32,
/// Sum of graph score contribution before final sorting/truncation.
pub score_contribution: f64,
}
/// Search response.
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct SearchResponse {
/// Vault root (absolute container path).
#[serde(skip_serializing_if = "Option::is_none")]
pub vault: Option<ContainerPath>,
/// Effective query.
pub query: Option<String>,
/// Effective mode.
pub mode: SearchMode,
/// Whether lexical-only mode was used.
pub fast: bool,
/// Whether query expansion ran.
pub expanded: bool,
/// Query variants produced by expansion or supplied explicitly.
#[serde(skip_serializing_if = "Vec::is_empty", default)]
pub expanded_queries: Vec<String>,
/// Whether reranking ran.
pub reranked: bool,
/// Index version.
pub index_version: String,
/// Result count.
pub total: u32,
/// Search results.
pub results: Vec<SearchResult>,
/// Pipeline-stage diagnostics, populated only when verbose is requested.
#[serde(skip_serializing_if = "Option::is_none", default)]
pub diagnostics: Option<SearchDiagnostics>,
}
impl SearchResponse {
/// Builds an empty response when the input query is blank.
#[must_use]
pub fn empty_input() -> Self {
Self {
vault: None,
query: None,
mode: SearchMode::Hybrid,
fast: false,
expanded: false,
expanded_queries: Vec::new(),
reranked: false,
index_version: "1".to_string(),
total: 0,
results: Vec::new(),
diagnostics: None,
}
}
}