1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
// BM25 Search Engine (trueno-rag integration)
// Contains: Bm25SearchEngine struct, impl, and Default trait.
// TRUENO-RAG-1-BM25: Replace ripgrep+RRF with true BM25 scoring.
/// BM25-based keyword search engine using trueno-rag
/// Provides true BM25 scoring instead of rank-based RRF heuristics
pub struct Bm25SearchEngine {
index: BM25Index,
/// Maps ChunkId to file metadata
chunk_metadata: HashMap<ChunkId, KeywordMatch>,
}
impl Bm25SearchEngine {
/// Create a new BM25 search engine
pub fn new() -> Self {
Self {
index: BM25Index::new(),
chunk_metadata: HashMap::new(),
}
}
/// Create with custom BM25 parameters
///
/// # Arguments
/// * `k1` - Term frequency saturation (default 1.2)
/// * `b` - Length normalization (default 0.75)
pub fn with_params(k1: f32, b: f32) -> Self {
Self {
index: BM25Index::with_params(k1, b),
chunk_metadata: HashMap::new(),
}
}
/// Index a code file
///
/// # Arguments
/// * `file_path` - Path to the file
/// * `content` - File content
/// * `language` - Programming language
pub fn index_file(&mut self, file_path: &str, content: &str, _language: &str) {
// Split content into lines and index each
for (line_num, line) in content.lines().enumerate() {
if line.trim().is_empty() {
continue;
}
let doc_id = DocumentId::new();
let chunk = Chunk::new(doc_id, line.to_string(), line_num, line_num + 1);
let chunk_id = chunk.id;
self.index.add(&chunk);
self.chunk_metadata.insert(
chunk_id,
KeywordMatch {
file_path: file_path.to_string(),
line_number: line_num + 1,
content: line.to_string(),
},
);
}
}
/// Search using BM25 scoring
///
/// # Arguments
/// * `query` - Search query
/// * `limit` - Maximum results
///
/// # Returns
/// Results with true BM25 scores (not rank-based)
pub fn search(&self, query: &str, limit: usize) -> Vec<(KeywordMatch, f32)> {
let results = self.index.search(query, limit);
results
.into_iter()
.filter_map(|(chunk_id, score)| {
self.chunk_metadata
.get(&chunk_id)
.map(|meta| (meta.clone(), score))
})
.collect()
}
/// Get the number of indexed documents
pub fn len(&self) -> usize {
self.index.len()
}
/// Check if the index is empty
pub fn is_empty(&self) -> bool {
self.index.is_empty()
}
}
impl Default for Bm25SearchEngine {
fn default() -> Self {
Self::new()
}
}