1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
//! Text Index Operations (Full-Text Search)
//!
//! Extracted from database_legacy.rs
//! Provides full-text search with BM25 ranking
use crate::database::core::MoteDB;
use crate::types::RowId;
use crate::{Result, StorageError};
use crate::index::text_fts::{TextFTSIndex, TextFTSStats};
use parking_lot::RwLock;
use std::sync::Arc;
impl MoteDB {
/// Create a text index for full-text search
///
/// # Example
/// ```ignore
/// db.create_text_index("articles_content")?;
/// ```
pub fn create_text_index(&self, name: &str) -> Result<()> {
// 🎯 统一路径:{db}.mote/indexes/text_{name}/
let indexes_dir = self.path.join("indexes");
std::fs::create_dir_all(&indexes_dir)?;
let index_path = indexes_dir.join(format!("text_{}", name));
let index = TextFTSIndex::new(index_path)?;
let index_arc = Arc::new(RwLock::new(index));
self.text_indexes.insert(name.to_string(), index_arc.clone());
// ✅ P0 FIX: 只创建空索引,不在这里回填数据
// 原因:
// 1. 避免双重扫描(create_text_index + executor各扫一次)
// 2. 避免内存爆炸(全量加载到Vec)
// 3. 避免锁风暴(100万次写锁)
// 回填工作由 executor.rs 负责(使用批量流式处理)
Ok(())
}
/// Insert text for a row into text index
///
/// # Example
/// ```ignore
/// db.insert_text(row_id, "articles_content", "The quick brown fox...")?;
/// ```
pub fn insert_text(&self, row_id: RowId, index_name: &str, text: &str) -> Result<()> {
let index_ref = self.text_indexes.get(index_name)
.ok_or_else(|| StorageError::Index(format!("Text index '{}' not found", index_name)))?;
index_ref.value().write().insert(row_id, text)?;
Ok(())
}
/// Delete text for a row from text index
///
/// # Example
/// ```ignore
/// db.delete_text(row_id, "articles_content", "The quick brown fox...")?;
/// ```
pub fn delete_text(&self, row_id: RowId, index_name: &str, text: &str) -> Result<()> {
let index_ref = self.text_indexes.get(index_name)
.ok_or_else(|| StorageError::Index(format!("Text index '{}' not found", index_name)))?;
index_ref.value().write().delete(row_id, text)?;
Ok(())
}
/// Update text for a row in text index
///
/// # Example
/// ```ignore
/// db.update_text(row_id, "articles_content", "old text", "new text")?;
/// ```
pub fn update_text(&self, row_id: RowId, index_name: &str, old_text: &str, new_text: &str) -> Result<()> {
let index_ref = self.text_indexes.get(index_name)
.ok_or_else(|| StorageError::Index(format!("Text index '{}' not found", index_name)))?;
index_ref.value().write().update(row_id, old_text, new_text)?;
Ok(())
}
/// Batch insert texts for multiple rows (10-100x faster than individual inserts)
///
/// # Performance Optimization
/// - Avoids repeated lock acquisition
/// - Builds all inverted lists at once
/// - Zero-copy: passes &str references instead of String copies
///
/// # Example
/// ```ignore
/// let texts: Vec<(u64, &str)> = vec![
/// (1, "The quick brown fox"),
/// (2, "jumps over the lazy dog"),
/// (3, "The lazy cat"),
/// ];
/// db.batch_insert_texts("description", &texts)?;
/// ```
pub fn batch_insert_texts(&self, index_name: &str, texts: &[(RowId, &str)]) -> Result<usize> {
if texts.is_empty() {
return Ok(0);
}
let index_ref = self.text_indexes.get(index_name)
.ok_or_else(|| StorageError::Index(format!("Text index '{}' not found", index_name)))?;
let count = texts.len();
index_ref.value().write().batch_insert(texts)?;
Ok(count)
}
/// Search for documents containing query terms (boolean AND)
///
/// # Example
/// ```ignore
/// let doc_ids = db.text_search("articles_content", "rust database")?;
/// ```
pub fn text_search(&self, index_name: &str, query: &str) -> Result<Vec<RowId>> {
let index_ref = self.text_indexes.get(index_name)
.ok_or_else(|| StorageError::Index(format!("Text index '{}' not found", index_name)))?;
let results = index_ref.value().read().search(query)?;
Ok(results)
}
/// Search with BM25 ranking (returns top-k results sorted by relevance)
///
/// # Example
/// ```ignore
/// // Get top 10 most relevant documents
/// let results = db.text_search_ranked("articles_content", "rust database", 10)?;
/// for (row_id, score) in results {
/// println!("Document {}: score {:.3}", row_id, score);
/// }
/// ```
pub fn text_search_ranked(&self, index_name: &str, query: &str, top_k: usize) -> Result<Vec<(RowId, f32)>> {
let index_ref = self.text_indexes.get(index_name)
.ok_or_else(|| StorageError::Index(format!("Text index '{}' not found", index_name)))?;
let results = index_ref.value().read().search_ranked(query, top_k)?;
Ok(results)
}
/// Get text index statistics
///
/// # Example
/// ```ignore
/// let stats = db.text_index_stats("articles_content")?;
/// println!("Total documents: {}", stats.total_documents);
/// println!("Unique terms: {}", stats.unique_terms);
/// ```
pub fn text_index_stats(&self, name: &str) -> Result<TextFTSStats> {
let index_ref = self.text_indexes.get(name)
.ok_or_else(|| StorageError::Index(format!("Text index '{}' not found", name)))?;
let index_guard = index_ref.value().read();
Ok(index_guard.stats())
}
/// Flush text indexes to disk
///
/// Persists all in-memory inverted lists to disk.
/// Note: Index metadata is managed by IndexRegistry (index_metadata.bin),
/// no need to save text_indexes_metadata.bin separately.
pub fn flush_text_indexes(&self) -> Result<()> {
// 🚀 DashMap: 直接遍历并 flush
for entry in self.text_indexes.iter() {
entry.value().write().flush()?;
}
Ok(())
}
}