1use crate::tokenizer::{TokenizerMode, ensure_jieba_tokenizer_registered, tokenize_text};
2use rusqlite::{Connection, params};
3use serde::{Deserialize, Serialize};
4
5#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
8pub struct EnsureFtsIndexResult {
9 pub success: bool,
12 pub message: String,
15 pub index_name: String,
18 pub tokenizer_mode: String,
21}
22
23#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
26pub struct RebuildFtsIndexResult {
27 pub success: bool,
30 pub message: String,
33 pub index_name: String,
36 pub tokenizer_mode: String,
39 pub reindexed_rows: u64,
42}
43
44#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
47pub struct FtsMutationResult {
48 pub success: bool,
51 pub message: String,
54 pub affected_rows: u64,
57 pub index_name: String,
60}
61
62#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
65pub struct SearchFtsHit {
66 pub id: String,
69 pub file_path: String,
72 pub title: String,
75 pub title_highlight: String,
78 pub content_snippet: String,
81 pub score: f64,
84 pub rank: u64,
87 pub raw_score: f64,
90}
91
92#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
95pub struct SearchFtsResult {
96 pub success: bool,
99 pub message: String,
102 pub index_name: String,
105 pub tokenizer_mode: String,
108 pub normalized_query: String,
111 pub fts_query: String,
114 pub source: String,
117 pub query_mode: String,
120 pub total: u64,
123 pub hits: Vec<SearchFtsHit>,
126}
127
128pub fn ensure_fts_index(
131 connection: &Connection,
132 index_name: &str,
133 tokenizer_mode: TokenizerMode,
134) -> rusqlite::Result<EnsureFtsIndexResult> {
135 if tokenizer_mode == TokenizerMode::Jieba {
136 ensure_jieba_tokenizer_registered(connection)?;
137 }
138
139 let index_name = sanitize_index_name(index_name)?;
140 let quoted_index_name = quote_identifier(&index_name);
141 let tokenizer_sql = tokenizer_sql(tokenizer_mode);
142
143 connection.execute_batch(&format!(
144 "CREATE VIRTUAL TABLE IF NOT EXISTS {index_name} USING fts5(
145 id UNINDEXED,
146 file_path UNINDEXED,
147 title,
148 content,
149 tokenize={tokenizer_sql}
150 );",
151 index_name = quoted_index_name,
152 tokenizer_sql = tokenizer_sql,
153 ))?;
154
155 Ok(EnsureFtsIndexResult {
156 success: true,
157 message: "fts index ensured / FTS 索引已确认存在".to_string(),
158 index_name,
159 tokenizer_mode: tokenizer_mode.as_str().to_string(),
160 })
161}
162
163pub fn rebuild_fts_index(
166 connection: &Connection,
167 index_name: &str,
168 tokenizer_mode: TokenizerMode,
169) -> rusqlite::Result<RebuildFtsIndexResult> {
170 if tokenizer_mode == TokenizerMode::Jieba {
171 ensure_jieba_tokenizer_registered(connection)?;
172 }
173
174 let index_name = sanitize_index_name(index_name)?;
175 let quoted_index_name = quote_identifier(&index_name);
176 let exists: i64 = connection.query_row(
177 "SELECT COUNT(*) FROM sqlite_master WHERE type = 'table' AND name = ?1",
178 params![index_name.as_str()],
179 |row| row.get(0),
180 )?;
181
182 if exists == 0 {
183 let ensured = ensure_fts_index(connection, index_name.as_str(), tokenizer_mode)?;
184 return Ok(RebuildFtsIndexResult {
185 success: true,
186 message: "fts index created during rebuild / FTS 索引在重建过程中已创建".to_string(),
187 index_name: ensured.index_name,
188 tokenizer_mode: ensured.tokenizer_mode,
189 reindexed_rows: 0,
190 });
191 }
192
193 let mut statement = connection.prepare(&format!(
194 "SELECT id, file_path, title, content FROM {index_name} ORDER BY rowid ASC",
195 index_name = quoted_index_name
196 ))?;
197 let mut rows = statement.query([])?;
198 let mut documents = Vec::new();
199 while let Some(row) = rows.next()? {
200 documents.push((
201 row.get::<_, Option<String>>(0)?.unwrap_or_default(),
202 row.get::<_, Option<String>>(1)?.unwrap_or_default(),
203 row.get::<_, Option<String>>(2)?.unwrap_or_default(),
204 row.get::<_, Option<String>>(3)?.unwrap_or_default(),
205 ));
206 }
207 drop(rows);
208 drop(statement);
209
210 connection.execute_batch("BEGIN IMMEDIATE TRANSACTION;")?;
211 let rebuild_result = (|| -> rusqlite::Result<RebuildFtsIndexResult> {
212 connection.execute_batch(&format!(
213 "DROP TABLE IF EXISTS {index_name};",
214 index_name = quoted_index_name
215 ))?;
216 let ensured = ensure_fts_index(connection, index_name.as_str(), tokenizer_mode)?;
217 let mut reindexed_rows = 0_u64;
218 for (id, file_path, title, content) in documents {
219 upsert_fts_document(
220 connection,
221 ensured.index_name.as_str(),
222 tokenizer_mode,
223 id.as_str(),
224 file_path.as_str(),
225 title.as_str(),
226 content.as_str(),
227 )?;
228 reindexed_rows += 1;
229 }
230 Ok(RebuildFtsIndexResult {
231 success: true,
232 message: format!(
233 "fts index rebuilt (rows={}) / FTS 索引已重建",
234 reindexed_rows
235 ),
236 index_name: ensured.index_name,
237 tokenizer_mode: ensured.tokenizer_mode,
238 reindexed_rows,
239 })
240 })();
241
242 match rebuild_result {
243 Ok(result) => {
244 connection.execute_batch("COMMIT;")?;
245 Ok(result)
246 }
247 Err(error) => {
248 let _ = connection.execute_batch("ROLLBACK;");
249 Err(error)
250 }
251 }
252}
253
254pub fn upsert_fts_document(
257 connection: &Connection,
258 index_name: &str,
259 tokenizer_mode: TokenizerMode,
260 id: &str,
261 file_path: &str,
262 title: &str,
263 content: &str,
264) -> rusqlite::Result<FtsMutationResult> {
265 let ensured = ensure_fts_index(connection, index_name, tokenizer_mode)?;
266 let quoted_index_name = quote_identifier(&ensured.index_name);
267
268 let mut affected_rows = 0_u64;
269 affected_rows += connection.execute(
270 &format!("DELETE FROM {index_name} WHERE id = ?1", index_name = quoted_index_name),
271 params![id],
272 )? as u64;
273 affected_rows += connection.execute(
274 &format!(
275 "INSERT INTO {index_name} (id, file_path, title, content) VALUES (?1, ?2, ?3, ?4)",
276 index_name = quoted_index_name
277 ),
278 params![id, file_path, title, content],
279 )? as u64;
280
281 Ok(FtsMutationResult {
282 success: true,
283 message: "fts document upserted / FTS 文档已写入".to_string(),
284 affected_rows,
285 index_name: ensured.index_name,
286 })
287}
288
289pub fn delete_fts_document(
292 connection: &Connection,
293 index_name: &str,
294 id: &str,
295) -> rusqlite::Result<FtsMutationResult> {
296 let index_name = sanitize_index_name(index_name)?;
297 let quoted_index_name = quote_identifier(&index_name);
298 let affected_rows = connection.execute(
299 &format!("DELETE FROM {index_name} WHERE id = ?1", index_name = quoted_index_name),
300 params![id],
301 )? as u64;
302
303 Ok(FtsMutationResult {
304 success: true,
305 message: if affected_rows > 0 {
306 "fts document removed / FTS 文档已删除".to_string()
307 } else {
308 "fts document not found / FTS 文档不存在".to_string()
309 },
310 affected_rows,
311 index_name,
312 })
313}
314
315pub fn search_fts(
318 connection: &Connection,
319 index_name: &str,
320 tokenizer_mode: TokenizerMode,
321 query: &str,
322 limit: u32,
323 offset: u32,
324) -> rusqlite::Result<SearchFtsResult> {
325 let ensured = ensure_fts_index(connection, index_name, tokenizer_mode)?;
326 let tokenized_query = tokenize_text(Some(connection), tokenizer_mode, query, true)?;
327 let quoted_index_name = quote_identifier(&ensured.index_name);
328 let effective_limit = limit.clamp(1, 200);
329
330 let total: u64 = connection.query_row(
331 &format!(
332 "SELECT COUNT(*) FROM {index_name} WHERE {index_name} MATCH ?1",
333 index_name = quoted_index_name,
334 ),
335 params![tokenized_query.fts_query.as_str()],
336 |row| row.get::<_, i64>(0),
337 )? as u64;
338
339 let mut statement = connection.prepare(&format!(
340 "SELECT
341 id,
342 file_path,
343 title,
344 highlight({index_name}, 2, '<mark>', '</mark>') AS title_highlight,
345 snippet({index_name}, 3, '<mark>', '</mark>', '...', 12) AS content_snippet,
346 bm25({index_name}, 2.0, 1.0) AS raw_score
347 FROM {index_name}
348 WHERE {index_name} MATCH ?1
349 ORDER BY raw_score ASC, file_path ASC, id ASC
350 LIMIT ?2 OFFSET ?3",
351 index_name = quoted_index_name,
352 ))?;
353
354 let mut rows = statement.query(params![
355 tokenized_query.fts_query.as_str(),
356 effective_limit as i64,
357 offset as i64
358 ])?;
359 let mut hits = Vec::new();
360 let mut rank = offset as u64 + 1;
361 while let Some(row) = rows.next()? {
362 let raw_score = row.get::<_, f64>(5)?;
363 hits.push(SearchFtsHit {
364 id: row.get(0)?,
365 file_path: row.get(1)?,
366 title: row.get::<_, Option<String>>(2)?.unwrap_or_default(),
367 title_highlight: row.get::<_, Option<String>>(3)?.unwrap_or_default(),
368 content_snippet: row.get::<_, Option<String>>(4)?.unwrap_or_default(),
369 score: -raw_score,
370 rank,
371 raw_score,
372 });
373 rank += 1;
374 }
375
376 Ok(SearchFtsResult {
377 success: true,
378 message: format!("fts search completed (hits={}) / FTS 检索完成", hits.len()),
379 index_name: ensured.index_name,
380 tokenizer_mode: ensured.tokenizer_mode,
381 normalized_query: tokenized_query.normalized_text,
382 fts_query: tokenized_query.fts_query,
383 source: "sqlite_fts".to_string(),
384 query_mode: "fts".to_string(),
385 total,
386 hits,
387 })
388}
389
390fn sanitize_index_name(index_name: &str) -> rusqlite::Result<String> {
393 let trimmed = index_name.trim();
394 if trimmed.is_empty() {
395 return Err(rusqlite::Error::InvalidParameterName(
396 "index_name must not be empty / index_name 不能为空".to_string(),
397 ));
398 }
399
400 let mut chars = trimmed.chars();
401 let Some(first) = chars.next() else {
402 return Err(rusqlite::Error::InvalidParameterName(
403 "index_name must not be empty / index_name 不能为空".to_string(),
404 ));
405 };
406 if !(first.is_ascii_alphabetic() || first == '_') {
407 return Err(rusqlite::Error::InvalidParameterName(
408 "index_name must start with [A-Za-z_] / index_name 必须以字母或下划线开头".to_string(),
409 ));
410 }
411 if !trimmed
412 .chars()
413 .all(|ch| ch.is_ascii_alphanumeric() || ch == '_')
414 {
415 return Err(rusqlite::Error::InvalidParameterName(
416 "index_name only supports [A-Za-z0-9_] / index_name 仅支持字母数字下划线".to_string(),
417 ));
418 }
419 if trimmed.starts_with("_vulcan_") {
420 return Err(rusqlite::Error::InvalidParameterName(
421 "reserved index_name prefix / 保留索引名前缀".to_string(),
422 ));
423 }
424
425 Ok(trimmed.to_string())
426}
427
428fn quote_identifier(identifier: &str) -> String {
431 format!("\"{}\"", identifier.replace('"', "\"\""))
432}
433
434fn tokenizer_sql(tokenizer_mode: TokenizerMode) -> &'static str {
437 match tokenizer_mode {
438 TokenizerMode::None => "'unicode61 remove_diacritics 2'",
439 TokenizerMode::Jieba => "'jieba'",
440 }
441}
442
443#[cfg(test)]
444mod tests {
445 use super::*;
446
447 #[test]
450 fn ensure_upsert_and_search_fts() -> rusqlite::Result<()> {
451 let connection = Connection::open_in_memory()?;
452 let ensured = ensure_fts_index(&connection, "memory_docs", TokenizerMode::Jieba)?;
453 assert!(ensured.success);
454 assert_eq!(ensured.index_name, "memory_docs");
455
456 upsert_fts_document(
457 &connection,
458 "memory_docs",
459 TokenizerMode::Jieba,
460 "doc-1",
461 "/demo/file.md",
462 "测试标题",
463 "市民田-女士急匆匆",
464 )?;
465 let _ = crate::tokenizer::upsert_custom_word(&connection, "田-女士", 42)?;
466 upsert_fts_document(
467 &connection,
468 "memory_docs",
469 TokenizerMode::Jieba,
470 "doc-1",
471 "/demo/file.md",
472 "测试标题",
473 "市民田-女士急匆匆",
474 )?;
475
476 let result = search_fts(
477 &connection,
478 "memory_docs",
479 TokenizerMode::Jieba,
480 "田-女士",
481 10,
482 0,
483 )?;
484 assert!(result.success);
485 assert_eq!(result.total, 1);
486 assert_eq!(result.hits.len(), 1);
487 assert_eq!(result.hits[0].id, "doc-1");
488 assert_eq!(result.hits[0].file_path, "/demo/file.md");
489 assert_eq!(result.hits[0].rank, 1);
490 assert!(result.hits[0].content_snippet.contains("mark"));
491 assert_eq!(result.source, "sqlite_fts");
492 assert_eq!(result.query_mode, "fts");
493
494 Ok(())
495 }
496
497 #[test]
500 fn rebuild_fts_index_reindexes_existing_documents() -> rusqlite::Result<()> {
501 let connection = Connection::open_in_memory()?;
502 ensure_fts_index(&connection, "memory_docs", TokenizerMode::Jieba)?;
503 upsert_fts_document(
504 &connection,
505 "memory_docs",
506 TokenizerMode::Jieba,
507 "doc-1",
508 "/demo/file.md",
509 "测试标题",
510 "市民田-女士急匆匆",
511 )?;
512
513 connection.execute_batch(
514 "CREATE VIRTUAL TABLE IF NOT EXISTS memory_docs_vocab USING fts5vocab(
515 memory_docs,
516 'instance'
517 );",
518 )?;
519 let before_count: i64 = connection.query_row(
520 "SELECT count(*) FROM memory_docs_vocab WHERE term = ?1",
521 params!["田-女士"],
522 |row| row.get(0),
523 )?;
524 assert_eq!(before_count, 0);
525
526 crate::tokenizer::upsert_custom_word(&connection, "田-女士", 42)?;
527 let rebuild = rebuild_fts_index(&connection, "memory_docs", TokenizerMode::Jieba)?;
528 assert!(rebuild.success);
529 assert_eq!(rebuild.reindexed_rows, 1);
530
531 connection.execute_batch("DROP TABLE IF EXISTS memory_docs_vocab;")?;
532 connection.execute_batch(
533 "CREATE VIRTUAL TABLE IF NOT EXISTS memory_docs_vocab USING fts5vocab(
534 memory_docs,
535 'instance'
536 );",
537 )?;
538 let after_count: i64 = connection.query_row(
539 "SELECT count(*) FROM memory_docs_vocab WHERE term = ?1",
540 params!["田-女士"],
541 |row| row.get(0),
542 )?;
543 assert_eq!(after_count, 1);
544
545 Ok(())
546 }
547}