1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
// Hybrid scoring algorithm
use serde::{Deserialize, Serialize};
/// Combined score from multiple signals
#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Default)]
pub struct Score {
/// Overall score (0-1)
pub overall: f32,
/// TF-IDF similarity component (keyword-based semantic)
pub tfidf: f32,
/// Neural/remote similarity component (deep semantic)
pub neural: f32,
/// Structural relevance component
pub structural: f32,
/// Text match component
pub text_match: f32,
}
impl Score {
/// Create a new score using default code-search weights (legacy method for compatibility)
#[deprecated(
since = "1.6.4",
note = "Use new_hybrid instead for TF-IDF + neural scoring"
)]
pub fn new(semantic: f32, structural: f32, text_match: f32) -> Self {
Self::new_hybrid(semantic, 0.0, structural, text_match)
}
/// Create a new hybrid score with TF-IDF and neural components
pub fn new_hybrid(tfidf: f32, neural: f32, structural: f32, text_match: f32) -> Self {
let overall = HybridScorer::new()
.score_hybrid(tfidf, neural, structural, text_match)
.overall;
Self {
overall,
tfidf,
neural,
structural,
text_match,
}
}
/// Get the overall score
pub fn value(&self) -> f32 {
self.overall
}
}
/// Hybrid scorer combining TF-IDF, neural, structural, and text signals
#[derive(Debug, Clone, Copy, PartialEq, Default)]
pub struct HybridScorer {
/// Weight for TF-IDF component
tfidf_weight: f32,
/// Weight for neural component
neural_weight: f32,
/// Weight for structural component
structural_weight: f32,
/// Weight for text match component
text_weight: f32,
}
impl HybridScorer {
/// Create a new hybrid scorer
///
/// Default weights are optimized for code search with neural embeddings:
/// - tfidf: 0.30 (keyword-based semantic)
/// - neural: 0.40 (deep semantic understanding)
/// - structural: 0.15 (moderate complexity signal)
/// - text: 0.15 (exact keyword matching)
pub fn new() -> Self {
Self::for_code()
}
/// Scorer tuned for code symbol search (with neural embeddings)
///
/// Optimized for finding code symbols where semantic understanding
/// and keyword overlap are both important.
pub fn for_code() -> Self {
Self {
tfidf_weight: 0.30,
neural_weight: 0.40,
structural_weight: 0.15,
text_weight: 0.15,
}
}
/// Scorer tuned for code search without neural embeddings (TF-IDF only)
///
/// When neural embeddings are unavailable, TF-IDF gets higher weight.
pub fn for_code_without_neural() -> Self {
Self {
tfidf_weight: 0.60,
neural_weight: 0.00,
structural_weight: 0.20,
text_weight: 0.20,
}
}
/// Scorer tuned for natural-language/prose search
///
/// Optimized for searching documentation, READMEs, and other
/// prose where semantic understanding is more valuable.
pub fn for_prose() -> Self {
Self {
tfidf_weight: 0.25,
neural_weight: 0.55,
structural_weight: 0.10,
text_weight: 0.10,
}
}
/// Set custom weights (legacy method for compatibility)
#[deprecated(since = "1.6.4", note = "Use with_weights_hybrid instead")]
pub fn with_weights(mut self, semantic: f32, structural: f32, text: f32) -> Self {
// Map legacy semantic to tfidf for backward compatibility
self.tfidf_weight = semantic;
self.neural_weight = 0.0;
self.structural_weight = structural;
self.text_weight = text;
self
}
/// Set custom hybrid weights
pub fn with_weights_hybrid(
mut self,
tfidf: f32,
neural: f32,
structural: f32,
text: f32,
) -> Self {
self.tfidf_weight = tfidf;
self.neural_weight = neural;
self.structural_weight = structural;
self.text_weight = text;
self
}
/// Calculate combined score (legacy method for compatibility)
#[deprecated(
since = "1.6.4",
note = "Use score_hybrid instead for TF-IDF + neural scoring"
)]
pub fn score(&self, semantic: f32, structural: f32, text_match: f32) -> Score {
self.score_hybrid(semantic, 0.0, structural, text_match)
}
/// Calculate combined hybrid score with TF-IDF and neural components
pub fn score_hybrid(&self, tfidf: f32, neural: f32, structural: f32, text_match: f32) -> Score {
let overall = tfidf * self.tfidf_weight
+ neural * self.neural_weight
+ structural * self.structural_weight
+ text_match * self.text_weight;
Score {
overall: overall.clamp(0.0, 1.0),
tfidf,
neural,
structural,
text_match,
}
}
/// Re-rank results based on query type (legacy method for compatibility)
#[deprecated(
since = "1.6.4",
note = "Use rerank_hybrid instead for TF-IDF + neural reranking"
)]
pub fn rerank(&self, results: Vec<ScoreResult>, query_type: QueryType) -> Vec<ScoreResult> {
self.rerank_hybrid(results, query_type)
}
/// Re-rank hybrid results based on query type
pub fn rerank_hybrid(
&self,
results: Vec<ScoreResult>,
query_type: QueryType,
) -> Vec<ScoreResult> {
let mut ranked = results;
match query_type {
QueryType::Semantic => {
// Boost neural and TF-IDF scores
for result in &mut ranked {
result.score.neural *= 1.2;
result.score.tfidf *= 1.1;
result.score.overall = result.score.tfidf * self.tfidf_weight
+ result.score.neural * self.neural_weight
+ result.score.structural * self.structural_weight
+ result.score.text_match * self.text_weight;
}
}
QueryType::Structural => {
// Boost structural scores
for result in &mut ranked {
result.score.structural *= 1.2;
result.score.overall = result.score.tfidf * self.tfidf_weight
+ result.score.neural * self.neural_weight
+ result.score.structural * self.structural_weight
+ result.score.text_match * self.text_weight;
}
}
QueryType::Text => {
// Boost text match scores
for result in &mut ranked {
result.score.text_match *= 1.2;
result.score.overall = result.score.tfidf * self.tfidf_weight
+ result.score.neural * self.neural_weight
+ result.score.structural * self.structural_weight
+ result.score.text_match * self.text_weight;
}
}
}
ranked.sort_by(|a, b| {
b.score
.overall
.partial_cmp(&a.score.overall)
.unwrap_or(std::cmp::Ordering::Equal)
});
ranked
}
}
/// Query type for adaptive ranking
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
pub enum QueryType {
/// Semantic-heavy query
Semantic,
/// Structural-heavy query
Structural,
/// Text-heavy query
Text,
}
/// Score result with metadata
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ScoreResult {
/// Node ID
pub node_id: String,
/// Calculated score
pub score: Score,
/// Query type detected
pub query_type: QueryType,
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_score_creation_legacy() {
#[allow(deprecated)]
let score = Score::new(0.9, 0.7, 0.5);
assert_eq!(score.tfidf, 0.9); // Legacy semantic mapped to tfidf
assert_eq!(score.neural, 0.0);
assert_eq!(score.structural, 0.7);
assert_eq!(score.text_match, 0.5);
}
#[test]
fn test_score_creation_hybrid() {
let score = Score::new_hybrid(0.7, 0.9, 0.6, 0.5);
assert_eq!(score.tfidf, 0.7);
assert_eq!(score.neural, 0.9);
assert_eq!(score.structural, 0.6);
assert_eq!(score.text_match, 0.5);
}
#[test]
fn test_hybrid_scorer_legacy() {
#[allow(deprecated)]
{
let scorer = HybridScorer::new();
let score = scorer.score(0.8, 0.6, 0.4);
// Default weights: 0.30 * 0.8 + 0.40 * 0.0 + 0.15 * 0.6 + 0.15 * 0.4 = 0.39
assert!((score.overall - 0.39).abs() < 0.01);
}
}
#[test]
fn test_hybrid_scorer_new() {
let scorer = HybridScorer::new();
let score = scorer.score_hybrid(0.8, 0.9, 0.6, 0.4);
// Default weights: 0.30 * 0.8 + 0.40 * 0.9 + 0.15 * 0.6 + 0.15 * 0.4 = 0.75
assert!((score.overall - 0.75).abs() < 0.01);
}
#[test]
fn test_custom_weights_legacy() {
#[allow(deprecated)]
let scorer = HybridScorer::new().with_weights(0.3, 0.5, 0.2);
#[allow(deprecated)]
let score = scorer.score(0.8, 0.6, 0.4);
// Custom weights (mapped): 0.3 * 0.8 + 0.0 * 0.0 + 0.5 * 0.6 + 0.2 * 0.4 = 0.62
assert!((score.overall - 0.62).abs() < 0.01);
}
#[test]
fn test_custom_weights_hybrid() {
let scorer = HybridScorer::new().with_weights_hybrid(0.3, 0.4, 0.2, 0.1);
let score = scorer.score_hybrid(0.8, 0.9, 0.6, 0.4);
// Custom hybrid weights: 0.3 * 0.8 + 0.4 * 0.9 + 0.2 * 0.6 + 0.1 * 0.4 = 0.76
assert!((score.overall - 0.76).abs() < 0.01);
}
#[test]
fn test_for_code_scorer_with_neural() {
let scorer = HybridScorer::for_code();
let score = scorer.score_hybrid(0.8, 0.9, 0.6, 0.4);
// Code weights with neural: 0.30 * 0.8 + 0.40 * 0.9 + 0.15 * 0.6 + 0.15 * 0.4 = 0.75
assert!((score.overall - 0.75).abs() < 0.01);
}
#[test]
fn test_for_code_scorer_without_neural() {
let scorer = HybridScorer::for_code_without_neural();
let score = scorer.score_hybrid(0.8, 0.0, 0.6, 0.4);
// Code weights without neural: 0.60 * 0.8 + 0.00 * 0.0 + 0.20 * 0.6 + 0.20 * 0.4 = 0.68
assert!((score.overall - 0.68).abs() < 0.01);
}
#[test]
fn test_for_prose_scorer() {
let scorer = HybridScorer::for_prose();
let score = scorer.score_hybrid(0.8, 0.9, 0.6, 0.4);
// Prose weights: 0.25 * 0.8 + 0.55 * 0.9 + 0.10 * 0.6 + 0.10 * 0.4 = 0.795
assert!((score.overall - 0.795).abs() < 0.01);
}
}