Skip to main content

khive_db/stores/
text.rs

1//! FTS5-backed `TextSearch` implementation.
2//!
3//! Each `Fts5TextSearch` manages a single FTS5 virtual table for full-text
4//! search. The table stores document metadata alongside the indexed text
5//! columns (`title` and `body`), with non-searchable columns marked
6//! `UNINDEXED`.
7//!
8//! # FTS5 table layout
9//!
10//! ```sql
11//! CREATE VIRTUAL TABLE fts_{key} USING fts5(
12//!     subject_id UNINDEXED,
13//!     kind UNINDEXED,
14//!     title,
15//!     body,
16//!     tags UNINDEXED,
17//!     namespace UNINDEXED,
18//!     metadata UNINDEXED,
19//!     updated_at UNINDEXED
20//! );
21//! ```
22//!
23//! Only `title` and `body` are full-text indexed. The remaining columns are
24//! stored for retrieval and filtering but do not participate in FTS ranking.
25//!
26//! # Connection strategy
27//!
28//! Follows the same dual-mode pattern as `SqliteVecStore`:
29//! - **File-backed**: Opens standalone connections per operation.
30//! - **In-memory**: Acquires pool connections via `spawn_blocking`.
31//!
32//! # Score normalization
33//!
34//! FTS5 `rank` values are negative (more negative = more relevant). We
35//! normalize within each result set so scores span `(0.05, 1.0]`, preserving
36//! relative ordering across all tokenizers (including trigram, which produces
37//! a narrow absolute range that the old `1/(1+|rank|)` formula collapsed into
38//! near-uniform noise). The best hit in a result set receives score `1.0`;
39//! the worst receives `0.05`. When all hits have the same rank (single hit or
40//! degenerate match), score is `1.0`.
41
42use std::sync::Arc;
43
44use async_trait::async_trait;
45use chrono::{DateTime, TimeZone, Utc};
46use uuid::Uuid;
47
48use khive_score::DeterministicScore;
49use khive_storage::error::StorageError;
50use khive_storage::types::{
51    BatchWriteSummary, IndexRebuildScope, TextDocument, TextFilter, TextIndexStats, TextQueryMode,
52    TextSearchHit, TextSearchRequest,
53};
54use khive_storage::StorageCapability;
55use khive_storage::TextSearch;
56use khive_types::SubstrateKind;
57
58use crate::error::SqliteError;
59use crate::pool::ConnectionPool;
60
61/// Ensure the FTS5 virtual table for `table_key` exists.
62///
63/// Used in tests to set up an in-memory FTS5 table without the full `StorageBackend`.
64#[cfg(test)]
65pub(crate) fn ensure_fts5_schema(
66    conn: &rusqlite::Connection,
67    table_key: &str,
68) -> Result<(), rusqlite::Error> {
69    let table_name = format!("fts_{}", table_key);
70    let ddl = format!(
71        "CREATE VIRTUAL TABLE IF NOT EXISTS {} USING fts5(\
72         subject_id UNINDEXED, \
73         kind UNINDEXED, \
74         title, \
75         body, \
76         tags UNINDEXED, \
77         namespace UNINDEXED, \
78         metadata UNINDEXED, \
79         updated_at UNINDEXED\
80         )",
81        table_name
82    );
83    conn.execute_batch(&ddl)
84}
85
86fn map_err(e: rusqlite::Error, op: &'static str) -> StorageError {
87    StorageError::driver(StorageCapability::Text, op, e)
88}
89
90fn map_sqlite_err(e: SqliteError, op: &'static str) -> StorageError {
91    StorageError::driver(StorageCapability::Text, op, e)
92}
93
94/// A TextSearch backed by SQLite FTS5 virtual tables.
95///
96/// Each instance manages one table: `fts_{table_key}`. Documents are stored
97/// with their metadata in UNINDEXED columns; only `title` and `body` are
98/// full-text indexed.
99pub struct Fts5TextSearch {
100    pool: Arc<ConnectionPool>,
101    is_file_backed: bool,
102    table_name: String,
103}
104
105impl Fts5TextSearch {
106    /// Create a new FTS5 text search instance.
107    ///
108    /// The FTS5 virtual table must already exist (created by `StorageBackend::text()`).
109    pub(crate) fn new(pool: Arc<ConnectionPool>, is_file_backed: bool, table_key: String) -> Self {
110        let table_name = format!("fts_{}", table_key);
111        Self {
112            pool,
113            is_file_backed,
114            table_name,
115        }
116    }
117
118    fn open_standalone_writer(&self) -> Result<rusqlite::Connection, StorageError> {
119        let config = self.pool.config();
120        let path = config.path.as_ref().ok_or_else(|| StorageError::Pool {
121            operation: "fts_writer".into(),
122            message: "in-memory databases do not support standalone connections".into(),
123        })?;
124
125        let conn = rusqlite::Connection::open_with_flags(
126            path,
127            rusqlite::OpenFlags::SQLITE_OPEN_READ_WRITE
128                | rusqlite::OpenFlags::SQLITE_OPEN_NO_MUTEX
129                | rusqlite::OpenFlags::SQLITE_OPEN_URI,
130        )
131        .map_err(|e| map_err(e, "open_fts_writer"))?;
132
133        conn.busy_timeout(config.busy_timeout)
134            .map_err(|e| map_err(e, "open_fts_writer"))?;
135        conn.pragma_update(None, "foreign_keys", "ON")
136            .map_err(|e| map_err(e, "open_fts_writer"))?;
137        conn.pragma_update(None, "synchronous", "NORMAL")
138            .map_err(|e| map_err(e, "open_fts_writer"))?;
139
140        Ok(conn)
141    }
142
143    fn open_standalone_reader(&self) -> Result<rusqlite::Connection, StorageError> {
144        let config = self.pool.config();
145        let path = config.path.as_ref().ok_or_else(|| StorageError::Pool {
146            operation: "fts_reader".into(),
147            message: "in-memory databases do not support standalone connections".into(),
148        })?;
149
150        let conn = rusqlite::Connection::open_with_flags(
151            path,
152            rusqlite::OpenFlags::SQLITE_OPEN_READ_ONLY
153                | rusqlite::OpenFlags::SQLITE_OPEN_NO_MUTEX
154                | rusqlite::OpenFlags::SQLITE_OPEN_URI,
155        )
156        .map_err(|e| map_err(e, "open_fts_reader"))?;
157
158        conn.busy_timeout(config.busy_timeout)
159            .map_err(|e| map_err(e, "open_fts_reader"))?;
160        conn.pragma_update(None, "foreign_keys", "ON")
161            .map_err(|e| map_err(e, "open_fts_reader"))?;
162        conn.pragma_update(None, "synchronous", "NORMAL")
163            .map_err(|e| map_err(e, "open_fts_reader"))?;
164
165        Ok(conn)
166    }
167
168    async fn with_writer<F, R>(&self, op: &'static str, f: F) -> Result<R, StorageError>
169    where
170        F: FnOnce(&rusqlite::Connection) -> Result<R, rusqlite::Error> + Send + 'static,
171        R: Send + 'static,
172    {
173        if self.is_file_backed {
174            let conn = self.open_standalone_writer()?;
175            tokio::task::spawn_blocking(move || f(&conn).map_err(|e| map_err(e, op)))
176                .await
177                .map_err(|e| StorageError::driver(StorageCapability::Text, op, e))?
178        } else {
179            let pool = Arc::clone(&self.pool);
180            tokio::task::spawn_blocking(move || {
181                let guard = pool.try_writer().map_err(|e| map_sqlite_err(e, op))?;
182                f(guard.conn()).map_err(|e| map_err(e, op))
183            })
184            .await
185            .map_err(|e| StorageError::driver(StorageCapability::Text, op, e))?
186        }
187    }
188
189    async fn with_reader<F, R>(&self, op: &'static str, f: F) -> Result<R, StorageError>
190    where
191        F: FnOnce(&rusqlite::Connection) -> Result<R, rusqlite::Error> + Send + 'static,
192        R: Send + 'static,
193    {
194        if self.is_file_backed {
195            let conn = self.open_standalone_reader()?;
196            tokio::task::spawn_blocking(move || f(&conn).map_err(|e| map_err(e, op)))
197                .await
198                .map_err(|e| StorageError::driver(StorageCapability::Text, op, e))?
199        } else {
200            let pool = Arc::clone(&self.pool);
201            tokio::task::spawn_blocking(move || {
202                let guard = pool.reader().map_err(|e| map_sqlite_err(e, op))?;
203                f(guard.conn()).map_err(|e| map_err(e, op))
204            })
205            .await
206            .map_err(|e| StorageError::driver(StorageCapability::Text, op, e))?
207        }
208    }
209}
210
211// -- Helper functions --
212
213fn tags_to_json(tags: &[String]) -> String {
214    serde_json::to_string(tags).unwrap_or_else(|_| "[]".to_string())
215}
216
217fn tags_from_json(s: &str) -> Vec<String> {
218    serde_json::from_str(s).unwrap_or_default()
219}
220
221fn dt_to_micros(dt: &DateTime<Utc>) -> i64 {
222    dt.timestamp_micros()
223}
224
225fn micros_to_dt(micros: i64) -> DateTime<Utc> {
226    Utc.timestamp_micros(micros)
227        .single()
228        .unwrap_or_else(Utc::now)
229}
230
231/// Sanitize an FTS5 query string to prevent driver errors from special chars.
232///
233/// Two-pass approach:
234/// 1. **Replace** grouping/separator chars with spaces so adjacent tokens are
235///    not merged. This prevents `NEAR(smile,5)` from becoming `NEARsmile5`.
236///    Chars replaced with space: `(`, `)`, `,`
237/// 2. **Remove** remaining FTS5 operator characters (H1: `~`, `!` added):
238///    `*`, `"`, `+`, `-`, `:`, `^`, `.`, `~`, `!`, `\0`, control characters
239///
240/// After character processing, split on whitespace and remove FTS5 keyword
241/// tokens: AND, OR, NOT, NEAR.
242///
243/// For Phrase mode, the caller wraps the result in double quotes.
244fn sanitize_fts5_query(query: &str) -> String {
245    // Pass 1: replace grouping/separator chars with spaces to isolate tokens.
246    // Colon is included here (not in Pass 2) so that "tenant:isolation" becomes
247    // "tenant isolation" rather than "tenantisolation".
248    let spaced: String = query
249        .chars()
250        .map(|c| {
251            if matches!(c, '(' | ')' | ',' | ':') {
252                ' '
253            } else {
254                c
255            }
256        })
257        .collect();
258
259    // Pass 2: remove remaining FTS5 special chars and control characters.
260    // Single quote (apostrophe) is included because FTS5 Plain-mode queries treat
261    // it as a string-literal delimiter causing "syntax error near '''".
262    let sanitized: String = spaced
263        .chars()
264        .filter(|c| {
265            !matches!(
266                c,
267                '*' | '"' | '\'' | '+' | '-' | '^' | '.' | '~' | '!' | '\0'
268            ) && !c.is_control()
269        })
270        .collect();
271
272    // Pass 3: filter FTS5 operator keywords.
273    sanitized
274        .split_whitespace()
275        .filter(|t| {
276            !matches!(
277                t.to_ascii_uppercase().as_str(),
278                "AND" | "OR" | "NOT" | "NEAR"
279            )
280        })
281        .collect::<Vec<_>>()
282        .join(" ")
283}
284
285/// Build a WHERE clause fragment and params for a `TextFilter`.
286///
287/// Returns `(clause, params)` where clause is empty if no filters are active.
288/// Parameter indices start at `?{start_idx}`.
289fn build_filter_clause(
290    filter: &TextFilter,
291    table: &str,
292    start_idx: usize,
293) -> (String, Vec<Box<dyn rusqlite::types::ToSql>>) {
294    let mut conditions: Vec<String> = Vec::new();
295    let mut params: Vec<Box<dyn rusqlite::types::ToSql>> = Vec::new();
296    let mut idx = start_idx;
297
298    if !filter.ids.is_empty() {
299        let placeholders: Vec<String> = filter
300            .ids
301            .iter()
302            .map(|_| {
303                let p = format!("?{}", idx);
304                idx += 1;
305                p
306            })
307            .collect();
308        conditions.push(format!(
309            "{}.subject_id IN ({})",
310            table,
311            placeholders.join(", ")
312        ));
313        for id in &filter.ids {
314            params.push(Box::new(id.to_string()));
315        }
316    }
317
318    if !filter.kinds.is_empty() {
319        let placeholders: Vec<String> = filter
320            .kinds
321            .iter()
322            .map(|_| {
323                let p = format!("?{}", idx);
324                idx += 1;
325                p
326            })
327            .collect();
328        conditions.push(format!("{}.kind IN ({})", table, placeholders.join(", ")));
329        for kind in &filter.kinds {
330            params.push(Box::new(kind.to_string()));
331        }
332    }
333
334    if !filter.namespaces.is_empty() {
335        let placeholders: Vec<String> = filter
336            .namespaces
337            .iter()
338            .map(|_| {
339                let p = format!("?{}", idx);
340                idx += 1;
341                p
342            })
343            .collect();
344        conditions.push(format!(
345            "{}.namespace IN ({})",
346            table,
347            placeholders.join(", ")
348        ));
349        for ns in &filter.namespaces {
350            params.push(Box::new(ns.clone()));
351        }
352    }
353
354    if conditions.is_empty() {
355        (String::new(), params)
356    } else {
357        (format!(" AND {}", conditions.join(" AND ")), params)
358    }
359}
360
361#[async_trait]
362impl TextSearch for Fts5TextSearch {
363    async fn upsert_document(&self, document: TextDocument) -> Result<(), StorageError> {
364        let table = self.table_name.clone();
365        let namespace = document.namespace.clone();
366
367        self.with_writer("fts_upsert", move |conn| {
368            conn.execute_batch("BEGIN IMMEDIATE")?;
369
370            let del_sql = format!(
371                "DELETE FROM {} WHERE namespace = ?1 AND subject_id = ?2",
372                table
373            );
374            if let Err(e) = conn.execute(
375                &del_sql,
376                rusqlite::params![&namespace, document.subject_id.to_string()],
377            ) {
378                let _ = conn.execute_batch("ROLLBACK");
379                return Err(e);
380            }
381
382            let ins_sql = format!(
383                "INSERT INTO {} \
384                 (subject_id, kind, title, body, tags, namespace, metadata, updated_at) \
385                 VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8)",
386                table
387            );
388            let tags_json = tags_to_json(&document.tags);
389            let metadata_json: Option<String> = document.metadata.as_ref().map(|v| v.to_string());
390
391            if let Err(e) = conn.execute(
392                &ins_sql,
393                rusqlite::params![
394                    document.subject_id.to_string(),
395                    document.kind.to_string(),
396                    document.title.as_deref().unwrap_or(""),
397                    document.body,
398                    tags_json,
399                    &namespace,
400                    metadata_json,
401                    dt_to_micros(&document.updated_at),
402                ],
403            ) {
404                let _ = conn.execute_batch("ROLLBACK");
405                return Err(e);
406            }
407
408            conn.execute_batch("COMMIT")?;
409            Ok(())
410        })
411        .await
412    }
413
414    async fn upsert_documents(
415        &self,
416        documents: Vec<TextDocument>,
417    ) -> Result<BatchWriteSummary, StorageError> {
418        let table = self.table_name.clone();
419        let attempted = documents.len() as u64;
420
421        self.with_writer("fts_upsert_batch", move |conn| {
422            let del_sql = format!(
423                "DELETE FROM {} WHERE namespace = ?1 AND subject_id = ?2",
424                table
425            );
426            let ins_sql = format!(
427                "INSERT INTO {} \
428                 (subject_id, kind, title, body, tags, namespace, metadata, updated_at) \
429                 VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8)",
430                table
431            );
432
433            conn.execute_batch("BEGIN IMMEDIATE")?;
434            let mut affected = 0u64;
435            let mut failed = 0u64;
436
437            for doc in &documents {
438                conn.execute_batch("SAVEPOINT fts_upsert_doc")?;
439                let id_str = doc.subject_id.to_string();
440                let namespace = &doc.namespace;
441                let result = (|| {
442                    conn.execute(&del_sql, rusqlite::params![namespace, &id_str])?;
443
444                    let tags_json = tags_to_json(&doc.tags);
445                    let metadata_json: Option<String> =
446                        doc.metadata.as_ref().map(|v| v.to_string());
447
448                    conn.execute(
449                        &ins_sql,
450                        rusqlite::params![
451                            &id_str,
452                            &doc.kind.to_string(),
453                            doc.title.as_deref().unwrap_or(""),
454                            &doc.body,
455                            &tags_json,
456                            namespace,
457                            &metadata_json,
458                            dt_to_micros(&doc.updated_at),
459                        ],
460                    )?;
461                    Ok::<(), rusqlite::Error>(())
462                })();
463
464                match result {
465                    Ok(()) => {
466                        conn.execute_batch("RELEASE SAVEPOINT fts_upsert_doc")?;
467                        affected += 1;
468                    }
469                    Err(_) => {
470                        let _ = conn.execute_batch("ROLLBACK TO SAVEPOINT fts_upsert_doc");
471                        let _ = conn.execute_batch("RELEASE SAVEPOINT fts_upsert_doc");
472                        failed += 1;
473                    }
474                }
475            }
476
477            conn.execute_batch("COMMIT")?;
478
479            Ok(BatchWriteSummary {
480                attempted,
481                affected,
482                failed,
483                first_error: String::new(),
484            })
485        })
486        .await
487    }
488
489    async fn delete_document(
490        &self,
491        namespace: &str,
492        subject_id: Uuid,
493    ) -> Result<bool, StorageError> {
494        let namespace = namespace.to_string();
495        let table = self.table_name.clone();
496
497        self.with_writer("fts_delete", move |conn| {
498            let sql = format!(
499                "DELETE FROM {} WHERE namespace = ?1 AND subject_id = ?2",
500                table
501            );
502            let deleted =
503                conn.execute(&sql, rusqlite::params![namespace, subject_id.to_string()])?;
504            Ok(deleted > 0)
505        })
506        .await
507    }
508
509    async fn get_document(
510        &self,
511        namespace: &str,
512        subject_id: Uuid,
513    ) -> Result<Option<TextDocument>, StorageError> {
514        let namespace = namespace.to_string();
515        let table = self.table_name.clone();
516
517        self.with_reader("fts_get", move |conn| {
518            let sql = format!(
519                "SELECT subject_id, kind, title, body, tags, namespace, metadata, updated_at \
520                 FROM {} WHERE namespace = ?1 AND subject_id = ?2",
521                table
522            );
523            let mut stmt = conn.prepare(&sql)?;
524            let mut rows = stmt.query(rusqlite::params![namespace, subject_id.to_string()])?;
525
526            match rows.next()? {
527                Some(row) => {
528                    let id_str: String = row.get(0)?;
529                    let kind_str: String = row.get(1)?;
530                    let title: String = row.get(2)?;
531                    let body: String = row.get(3)?;
532                    let tags_json: String = row.get(4)?;
533                    let ns: String = row.get(5)?;
534                    let metadata_json: Option<String> = row.get(6)?;
535                    let updated_at_micros: i64 = row.get(7)?;
536
537                    let sid = Uuid::parse_str(&id_str).map_err(|e| {
538                        rusqlite::Error::FromSqlConversionFailure(
539                            0,
540                            rusqlite::types::Type::Text,
541                            Box::new(e),
542                        )
543                    })?;
544
545                    let kind = kind_str.parse::<SubstrateKind>().map_err(|e| {
546                        rusqlite::Error::FromSqlConversionFailure(
547                            1,
548                            rusqlite::types::Type::Text,
549                            Box::new(e),
550                        )
551                    })?;
552
553                    Ok(Some(TextDocument {
554                        subject_id: sid,
555                        kind,
556                        title: if title.is_empty() { None } else { Some(title) },
557                        body,
558                        tags: tags_from_json(&tags_json),
559                        namespace: ns,
560                        metadata: metadata_json.and_then(|s| serde_json::from_str(&s).ok()),
561                        updated_at: micros_to_dt(updated_at_micros),
562                    }))
563                }
564                None => Ok(None),
565            }
566        })
567        .await
568    }
569
570    async fn search(&self, request: TextSearchRequest) -> Result<Vec<TextSearchHit>, StorageError> {
571        let table = self.table_name.clone();
572
573        self.with_reader("fts_search", move |conn| {
574            let sanitized = sanitize_fts5_query(&request.query);
575            if sanitized.is_empty() {
576                return Ok(Vec::new());
577            }
578
579            let match_expr = match request.mode {
580                TextQueryMode::Phrase => format!("\"{}\"", sanitized),
581                TextQueryMode::Plain => sanitized,
582            };
583
584            // Snippet column index 3 = body in the FTS5 schema.
585            let snippet_chars = request.snippet_chars.max(1) as i32;
586
587            let (filter_clause, filter_params) = if let Some(ref filter) = request.filter {
588                build_filter_clause(filter, &table, 3)
589            } else {
590                (String::new(), Vec::new())
591            };
592
593            let sql = format!(
594                "SELECT subject_id, rank, title, snippet({table}, 3, '', '', '...', {snippet_chars}) \
595                 FROM {table} WHERE {table} MATCH ?1{filter_clause} \
596                 ORDER BY rank LIMIT ?2",
597            );
598
599            let mut stmt = conn.prepare(&sql)?;
600            stmt.raw_bind_parameter(1, &match_expr)?;
601            stmt.raw_bind_parameter(2, request.top_k as i64)?;
602
603            for (i, param) in filter_params.iter().enumerate() {
604                param
605                    .to_sql()
606                    .map(|val| stmt.raw_bind_parameter(3 + i, val))
607                    .map_err(|e| rusqlite::Error::ToSqlConversionFailure(Box::new(e)))??;
608            }
609
610            let mut hits = Vec::new();
611            let mut rows = stmt.raw_query();
612            let mut rank_idx = 0u32;
613
614            while let Some(row) = rows.next()? {
615                let id_str: String = row.get(0)?;
616                let fts_rank: f64 = row.get(1)?;
617                let title: String = row.get(2)?;
618                let snippet: String = row.get(3)?;
619
620                let subject_id = Uuid::parse_str(&id_str).map_err(|e| {
621                    rusqlite::Error::FromSqlConversionFailure(
622                        0,
623                        rusqlite::types::Type::Text,
624                        Box::new(e),
625                    )
626                })?;
627
628                rank_idx += 1;
629                hits.push((subject_id, fts_rank, rank_idx, title, snippet));
630            }
631
632            // Normalize scores within the result set to (0.05, 1.0].
633            // Best rank (most negative) maps to 1.0, worst to 0.05.
634            let min_rank = hits.iter().map(|h| h.1).fold(f64::INFINITY, f64::min);
635            let max_rank = hits.iter().map(|h| h.1).fold(f64::NEG_INFINITY, f64::max);
636            let range = max_rank - min_rank;
637
638            let results = hits
639                .into_iter()
640                .map(|(subject_id, raw_rank, rank, title, snippet)| {
641                    let score = if range.abs() < 1e-12 {
642                        1.0
643                    } else {
644                        let t = (max_rank - raw_rank) / range;
645                        0.05 + 0.95 * t
646                    };
647                    TextSearchHit {
648                        subject_id,
649                        score: DeterministicScore::from_f64(score),
650                        rank,
651                        title: if title.is_empty() { None } else { Some(title) },
652                        snippet: if snippet.is_empty() { None } else { Some(snippet) },
653                    }
654                })
655                .collect();
656
657            Ok(results)
658        })
659        .await
660    }
661
662    async fn count(&self, filter: TextFilter) -> Result<u64, StorageError> {
663        let table = self.table_name.clone();
664
665        self.with_reader("fts_count", move |conn| {
666            let (filter_clause, filter_params) = build_filter_clause(&filter, &table, 1);
667
668            let sql = if filter_clause.is_empty() {
669                format!("SELECT COUNT(*) FROM {}", table)
670            } else {
671                let where_part = filter_clause.trim_start_matches(" AND ");
672                format!("SELECT COUNT(*) FROM {} WHERE {}", table, where_part)
673            };
674
675            let mut stmt = conn.prepare(&sql)?;
676
677            for (i, param) in filter_params.iter().enumerate() {
678                param
679                    .to_sql()
680                    .map(|val| stmt.raw_bind_parameter(1 + i, val))
681                    .map_err(|e| rusqlite::Error::ToSqlConversionFailure(Box::new(e)))??;
682            }
683
684            let mut rows = stmt.raw_query();
685            match rows.next()? {
686                Some(row) => {
687                    let count: i64 = row.get(0)?;
688                    Ok(count as u64)
689                }
690                None => Ok(0),
691            }
692        })
693        .await
694    }
695
696    async fn stats(&self) -> Result<TextIndexStats, StorageError> {
697        let table = self.table_name.clone();
698
699        self.with_reader("fts_stats", move |conn| {
700            let sql = format!("SELECT COUNT(*) FROM {}", table);
701            let count: i64 = conn.query_row(&sql, [], |row| row.get(0))?;
702
703            Ok(TextIndexStats {
704                document_count: count as u64,
705                needs_rebuild: false,
706                last_rebuild_at: None,
707            })
708        })
709        .await
710    }
711
712    async fn rebuild(&self, _scope: IndexRebuildScope) -> Result<TextIndexStats, StorageError> {
713        let table = self.table_name.clone();
714
715        self.with_writer("fts_rebuild", move |conn| {
716            // FTS5 rebuild command: repopulates the internal index structures.
717            let sql = format!("INSERT INTO {}({}) VALUES('rebuild')", table, table);
718            conn.execute(&sql, [])?;
719
720            let count_sql = format!("SELECT COUNT(*) FROM {}", table);
721            let count: i64 = conn.query_row(&count_sql, [], |row| row.get(0))?;
722
723            Ok(TextIndexStats {
724                document_count: count as u64,
725                needs_rebuild: false,
726                last_rebuild_at: Some(Utc::now()),
727            })
728        })
729        .await
730    }
731}
732
733impl Fts5TextSearch {
734    /// Move all FTS5 documents from `old_namespace` to `new_namespace` in a
735    /// single transaction.
736    ///
737    /// FTS5 virtual tables do not support updating indexed columns (`title`,
738    /// `body`) via UPDATE. The correct approach is read-then-delete-then-reinsert.
739    ///
740    /// Callers must invoke this after any SQL-level namespace change on the
741    /// backing entity table so that FTS5 keyword search stays consistent with
742    /// the entity store.
743    #[allow(dead_code)]
744    pub(crate) async fn rename_namespace(
745        &self,
746        old_namespace: &str,
747        new_namespace: &str,
748    ) -> Result<u64, StorageError> {
749        if old_namespace == new_namespace {
750            return Ok(0);
751        }
752        let table = self.table_name.clone();
753        let old_ns = old_namespace.to_string();
754        let new_ns = new_namespace.to_string();
755
756        self.with_writer("fts_rename_namespace", move |conn| {
757            let sel_sql = format!(
758                "SELECT subject_id, kind, title, body, tags, metadata, updated_at \
759                 FROM {} WHERE namespace = ?1",
760                table
761            );
762            struct Row {
763                subject_id: String,
764                kind: String,
765                title: String,
766                body: String,
767                tags: String,
768                metadata: Option<String>,
769                updated_at: i64,
770            }
771            let rows: Vec<Row> = {
772                let mut stmt = conn.prepare(&sel_sql)?;
773                let iter = stmt.query_map(rusqlite::params![&old_ns], |row| {
774                    Ok(Row {
775                        subject_id: row.get(0)?,
776                        kind: row.get(1)?,
777                        title: row.get(2)?,
778                        body: row.get(3)?,
779                        tags: row.get(4)?,
780                        metadata: row.get(5)?,
781                        updated_at: row.get(6)?,
782                    })
783                })?;
784                iter.collect::<Result<Vec<_>, _>>()?
785            };
786            let moved = rows.len() as u64;
787            if moved == 0 {
788                return Ok(0u64);
789            }
790
791            conn.execute_batch("BEGIN IMMEDIATE")?;
792
793            let del_sql = format!("DELETE FROM {} WHERE namespace = ?1", table);
794            if let Err(e) = conn.execute(&del_sql, rusqlite::params![&old_ns]) {
795                let _ = conn.execute_batch("ROLLBACK");
796                return Err(e);
797            }
798
799            let ins_sql = format!(
800                "INSERT INTO {} \
801                 (subject_id, kind, title, body, tags, namespace, metadata, updated_at) \
802                 VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8)",
803                table
804            );
805            for row in &rows {
806                if let Err(e) = conn.execute(
807                    &ins_sql,
808                    rusqlite::params![
809                        row.subject_id,
810                        row.kind,
811                        row.title,
812                        row.body,
813                        row.tags,
814                        &new_ns,
815                        row.metadata,
816                        row.updated_at,
817                    ],
818                ) {
819                    let _ = conn.execute_batch("ROLLBACK");
820                    return Err(e);
821                }
822            }
823
824            conn.execute_batch("COMMIT")?;
825            Ok(moved)
826        })
827        .await
828    }
829}
830
831#[cfg(test)]
832mod tests {
833    use super::*;
834    use crate::pool::PoolConfig;
835
836    fn setup_memory_store(table_key: &str) -> Fts5TextSearch {
837        let config = PoolConfig {
838            path: None,
839            ..PoolConfig::default()
840        };
841        let pool = Arc::new(ConnectionPool::new(config).unwrap());
842
843        {
844            let writer = pool.writer().unwrap();
845            ensure_fts5_schema(writer.conn(), table_key).unwrap();
846        }
847
848        Fts5TextSearch::new(pool, false, table_key.to_string())
849    }
850
851    fn make_document(subject_id: Uuid, title: &str, body: &str) -> TextDocument {
852        TextDocument {
853            subject_id,
854            kind: SubstrateKind::Note,
855            title: if title.is_empty() {
856                None
857            } else {
858                Some(title.to_string())
859            },
860            body: body.to_string(),
861            tags: vec![],
862            namespace: "test_ns".to_string(),
863            metadata: None,
864            updated_at: Utc::now(),
865        }
866    }
867
868    fn ns_filter(namespace: &str) -> TextFilter {
869        TextFilter {
870            namespaces: vec![namespace.to_string()],
871            ..TextFilter::default()
872        }
873    }
874
875    #[tokio::test]
876    async fn test_upsert_and_search() {
877        let store = setup_memory_store("upsert_search");
878
879        let id = Uuid::new_v4();
880        let doc = TextDocument {
881            subject_id: id,
882            kind: SubstrateKind::Entity,
883            title: Some("Rust Programming".to_string()),
884            body: "Rust is a systems programming language focused on safety and performance."
885                .to_string(),
886            tags: vec!["rust".to_string(), "programming".to_string()],
887            namespace: "tech".to_string(),
888            metadata: None,
889            updated_at: Utc::now(),
890        };
891
892        store.upsert_document(doc).await.unwrap();
893
894        let hits = store
895            .search(TextSearchRequest {
896                query: "Rust programming".to_string(),
897                mode: TextQueryMode::Plain,
898                filter: Some(ns_filter("tech")),
899                top_k: 10,
900                snippet_chars: 64,
901            })
902            .await
903            .unwrap();
904
905        assert_eq!(hits.len(), 1);
906        assert_eq!(hits[0].subject_id, id);
907        assert_eq!(hits[0].rank, 1);
908        assert!(hits[0].score.to_f64() > 0.0);
909        assert!(hits[0].title.is_some());
910    }
911
912    #[tokio::test]
913    async fn test_phrase_search() {
914        let store = setup_memory_store("phrase");
915
916        let id1 = Uuid::new_v4();
917        let id2 = Uuid::new_v4();
918
919        store
920            .upsert_document(make_document(
921                id1,
922                "Animals",
923                "The quick brown fox jumps over the lazy dog.",
924            ))
925            .await
926            .unwrap();
927
928        store
929            .upsert_document(make_document(
930                id2,
931                "Colors",
932                "The brown paint was quick to dry, unlike the fox.",
933            ))
934            .await
935            .unwrap();
936
937        let hits = store
938            .search(TextSearchRequest {
939                query: "quick brown fox".to_string(),
940                mode: TextQueryMode::Phrase,
941                filter: Some(ns_filter("test_ns")),
942                top_k: 10,
943                snippet_chars: 64,
944            })
945            .await
946            .unwrap();
947
948        assert_eq!(hits.len(), 1);
949        assert_eq!(hits[0].subject_id, id1);
950
951        let hits = store
952            .search(TextSearchRequest {
953                query: "quick brown fox".to_string(),
954                mode: TextQueryMode::Plain,
955                filter: Some(ns_filter("test_ns")),
956                top_k: 10,
957                snippet_chars: 64,
958            })
959            .await
960            .unwrap();
961
962        assert_eq!(hits.len(), 2);
963    }
964
965    #[tokio::test]
966    async fn test_delete_document() {
967        let store = setup_memory_store("delete");
968
969        let id1 = Uuid::new_v4();
970        let id2 = Uuid::new_v4();
971
972        store
973            .upsert_document(make_document(id1, "Doc One", "First document content."))
974            .await
975            .unwrap();
976        store
977            .upsert_document(make_document(id2, "Doc Two", "Second document content."))
978            .await
979            .unwrap();
980
981        let stats = store.stats().await.unwrap();
982        assert_eq!(stats.document_count, 2);
983
984        let deleted = store.delete_document("test_ns", id1).await.unwrap();
985        assert!(deleted);
986
987        let stats = store.stats().await.unwrap();
988        assert_eq!(stats.document_count, 1);
989
990        let deleted_again = store.delete_document("test_ns", id1).await.unwrap();
991        assert!(!deleted_again);
992
993        let doc = store.get_document("test_ns", id2).await.unwrap();
994        assert!(doc.is_some());
995
996        let doc = store.get_document("test_ns", id1).await.unwrap();
997        assert!(doc.is_none());
998    }
999
1000    #[tokio::test]
1001    async fn test_count_with_filter() {
1002        let store = setup_memory_store("count_filter");
1003        let ns = "test_ns".to_string();
1004
1005        for i in 0..5 {
1006            let kind = if i % 2 == 0 {
1007                SubstrateKind::Entity
1008            } else {
1009                SubstrateKind::Note
1010            };
1011            let doc = TextDocument {
1012                subject_id: Uuid::new_v4(),
1013                kind,
1014                title: Some(format!("Doc {}", i)),
1015                body: format!("Content for document number {}", i),
1016                tags: vec![],
1017                namespace: ns.clone(),
1018                metadata: None,
1019                updated_at: Utc::now(),
1020            };
1021            store.upsert_document(doc).await.unwrap();
1022        }
1023
1024        let total = store
1025            .count(TextFilter {
1026                namespaces: vec![ns.clone()],
1027                ..TextFilter::default()
1028            })
1029            .await
1030            .unwrap();
1031        assert_eq!(total, 5);
1032
1033        let entities = store
1034            .count(TextFilter {
1035                namespaces: vec![ns.clone()],
1036                kinds: vec![SubstrateKind::Entity],
1037                ..TextFilter::default()
1038            })
1039            .await
1040            .unwrap();
1041        assert_eq!(entities, 3);
1042
1043        let notes = store
1044            .count(TextFilter {
1045                namespaces: vec![ns.clone()],
1046                kinds: vec![SubstrateKind::Note],
1047                ..TextFilter::default()
1048            })
1049            .await
1050            .unwrap();
1051        assert_eq!(notes, 2);
1052    }
1053
1054    #[tokio::test]
1055    async fn test_get_document_roundtrip() {
1056        let store = setup_memory_store("get_roundtrip");
1057
1058        let id = Uuid::new_v4();
1059        let original = TextDocument {
1060            subject_id: id,
1061            kind: SubstrateKind::Note,
1062            title: Some("Important Memo".to_string()),
1063            body: "This memo contains critical information.".to_string(),
1064            tags: vec!["important".to_string(), "memo".to_string()],
1065            namespace: "work".to_string(),
1066            metadata: Some(serde_json::json!({"priority": "high"})),
1067            updated_at: Utc::now(),
1068        };
1069
1070        store.upsert_document(original.clone()).await.unwrap();
1071
1072        let retrieved = store.get_document("work", id).await.unwrap().unwrap();
1073        assert_eq!(retrieved.subject_id, id);
1074        assert_eq!(retrieved.kind, SubstrateKind::Note);
1075        assert_eq!(retrieved.title, Some("Important Memo".to_string()));
1076        assert_eq!(retrieved.body, "This memo contains critical information.");
1077        assert_eq!(retrieved.tags, vec!["important", "memo"]);
1078        assert_eq!(retrieved.namespace, "work");
1079    }
1080
1081    #[tokio::test]
1082    async fn test_upsert_replaces_existing() {
1083        let store = setup_memory_store("replace");
1084
1085        let id = Uuid::new_v4();
1086        store
1087            .upsert_document(make_document(id, "Original", "Original body text."))
1088            .await
1089            .unwrap();
1090
1091        store
1092            .upsert_document(make_document(id, "Updated", "Updated body text."))
1093            .await
1094            .unwrap();
1095
1096        let stats = store.stats().await.unwrap();
1097        assert_eq!(stats.document_count, 1);
1098
1099        let doc = store.get_document("test_ns", id).await.unwrap().unwrap();
1100        assert_eq!(doc.title, Some("Updated".to_string()));
1101        assert_eq!(doc.body, "Updated body text.");
1102    }
1103
1104    #[tokio::test]
1105    async fn test_batch_upsert() {
1106        let store = setup_memory_store("batch");
1107
1108        let docs: Vec<TextDocument> = (0..50)
1109            .map(|i| TextDocument {
1110                subject_id: Uuid::new_v4(),
1111                kind: SubstrateKind::Entity,
1112                title: Some(format!("Item {}", i)),
1113                body: format!("This is the body content for item number {}", i),
1114                tags: vec![format!("tag_{}", i % 5)],
1115                namespace: "batch_ns".to_string(),
1116                metadata: None,
1117                updated_at: Utc::now(),
1118            })
1119            .collect();
1120
1121        let summary = store.upsert_documents(docs).await.unwrap();
1122        assert_eq!(summary.attempted, 50);
1123        assert_eq!(summary.affected, 50);
1124        assert_eq!(summary.failed, 0);
1125
1126        let stats = store.stats().await.unwrap();
1127        assert_eq!(stats.document_count, 50);
1128    }
1129
1130    #[tokio::test]
1131    async fn test_empty_search() {
1132        let store = setup_memory_store("empty");
1133
1134        let hits = store
1135            .search(TextSearchRequest {
1136                query: "nonexistent".to_string(),
1137                mode: TextQueryMode::Plain,
1138                filter: Some(ns_filter("test_ns")),
1139                top_k: 10,
1140                snippet_chars: 64,
1141            })
1142            .await
1143            .unwrap();
1144
1145        assert!(hits.is_empty());
1146    }
1147
1148    #[tokio::test]
1149    async fn test_rebuild() {
1150        let store = setup_memory_store("rebuild");
1151
1152        store
1153            .upsert_document(make_document(
1154                Uuid::new_v4(),
1155                "Test",
1156                "Test document for rebuild.",
1157            ))
1158            .await
1159            .unwrap();
1160
1161        let stats = store.rebuild(IndexRebuildScope::Full).await.unwrap();
1162        assert_eq!(stats.document_count, 1);
1163        assert!(!stats.needs_rebuild);
1164        assert!(stats.last_rebuild_at.is_some());
1165    }
1166
1167    #[tokio::test]
1168    async fn test_search_with_kind_filter() {
1169        let store = setup_memory_store("filter_kind");
1170
1171        let id_entity = Uuid::new_v4();
1172        let id_note = Uuid::new_v4();
1173
1174        store
1175            .upsert_document(TextDocument {
1176                subject_id: id_entity,
1177                kind: SubstrateKind::Entity,
1178                title: Some("Rust Guide".to_string()),
1179                body: "A comprehensive guide to Rust programming.".to_string(),
1180                tags: vec![],
1181                namespace: "test_ns".to_string(),
1182                metadata: None,
1183                updated_at: Utc::now(),
1184            })
1185            .await
1186            .unwrap();
1187
1188        store
1189            .upsert_document(TextDocument {
1190                subject_id: id_note,
1191                kind: SubstrateKind::Note,
1192                title: Some("Rust Notes".to_string()),
1193                body: "Quick notes about Rust concepts.".to_string(),
1194                tags: vec![],
1195                namespace: "test_ns".to_string(),
1196                metadata: None,
1197                updated_at: Utc::now(),
1198            })
1199            .await
1200            .unwrap();
1201
1202        let hits = store
1203            .search(TextSearchRequest {
1204                query: "Rust".to_string(),
1205                mode: TextQueryMode::Plain,
1206                filter: Some(TextFilter {
1207                    kinds: vec![SubstrateKind::Entity],
1208                    namespaces: vec!["test_ns".to_string()],
1209                    ..TextFilter::default()
1210                }),
1211                top_k: 10,
1212                snippet_chars: 64,
1213            })
1214            .await
1215            .unwrap();
1216
1217        assert_eq!(hits.len(), 1);
1218        assert_eq!(hits[0].subject_id, id_entity);
1219    }
1220
1221    #[tokio::test]
1222    async fn test_sanitize_fts5_query() {
1223        assert_eq!(sanitize_fts5_query("hello world"), "hello world");
1224        assert_eq!(sanitize_fts5_query("hello*world"), "helloworld");
1225        assert_eq!(sanitize_fts5_query("\"quoted\""), "quoted");
1226        assert_eq!(sanitize_fts5_query("(parens)"), "parens");
1227        assert_eq!(sanitize_fts5_query("a + b - c"), "a b c");
1228        assert_eq!(sanitize_fts5_query("col:value"), "col value");
1229        assert_eq!(sanitize_fts5_query(""), "");
1230        assert_eq!(sanitize_fts5_query("***"), "");
1231        // M-C4: decimal numbers must not produce "syntax error near '.'"
1232        assert_eq!(
1233            sanitize_fts5_query("salience 0.9 vs 0.3"),
1234            "salience 09 vs 03"
1235        );
1236        assert_eq!(sanitize_fts5_query("version 1.2.3"), "version 123");
1237        // H1: tilde and comma must be stripped to prevent FTS5 syntax errors
1238        assert_eq!(sanitize_fts5_query("~hello"), "hello");
1239        assert_eq!(sanitize_fts5_query("\"+_~!\""), "_");
1240        assert_eq!(sanitize_fts5_query("NEAR(smile, 5)"), "smile 5");
1241        assert_eq!(sanitize_fts5_query("a,b,c"), "a b c");
1242        // #570: full operator-class matrix
1243        // Apostrophe fix: single quote is an FTS5 string-literal delimiter in Plain mode.
1244        assert_eq!(sanitize_fts5_query("Bob's tenant"), "Bobs tenant");
1245        assert_eq!(
1246            sanitize_fts5_query("tenant AND isolation"),
1247            "tenant isolation"
1248        );
1249        assert_eq!(
1250            sanitize_fts5_query("tenant OR isolation"),
1251            "tenant isolation"
1252        );
1253        assert_eq!(
1254            sanitize_fts5_query("tenant NOT isolation"),
1255            "tenant isolation"
1256        );
1257        assert_eq!(
1258            sanitize_fts5_query("tenant NEAR(isolation, 5)"),
1259            "tenant isolation 5"
1260        );
1261        assert_eq!(sanitize_fts5_query("tenant:isolation"), "tenant isolation");
1262        assert_eq!(
1263            sanitize_fts5_query("tenant ^ isolation"),
1264            "tenant isolation"
1265        );
1266        assert_eq!(
1267            sanitize_fts5_query("(tenant isolation)"),
1268            "tenant isolation"
1269        );
1270        // whitespace-only becomes empty
1271        assert_eq!(sanitize_fts5_query("   "), "");
1272        // operator-only after stripping becomes empty
1273        assert_eq!(sanitize_fts5_query("AND OR NOT"), "");
1274    }
1275
1276    /// H1 regression: queries with tilde (~) must not produce "fts5: syntax error near '~'".
1277    #[tokio::test]
1278    async fn test_search_with_tilde_does_not_crash() {
1279        let store = setup_memory_store("tilde_query");
1280
1281        store
1282            .upsert_document(make_document(Uuid::new_v4(), "smile", "smiling face"))
1283            .await
1284            .unwrap();
1285
1286        let result = store
1287            .search(TextSearchRequest {
1288                query: "~smile".to_string(),
1289                mode: TextQueryMode::Plain,
1290                filter: Some(ns_filter("test_ns")),
1291                top_k: 10,
1292                snippet_chars: 64,
1293            })
1294            .await;
1295        assert!(
1296            result.is_ok(),
1297            "tilde query must not crash FTS5, got: {:?}",
1298            result.err()
1299        );
1300    }
1301
1302    /// H1 regression: NEAR() queries must not produce "fts5: syntax error near ','".
1303    #[tokio::test]
1304    async fn test_search_with_near_operator_does_not_crash() {
1305        let store = setup_memory_store("near_query");
1306
1307        store
1308            .upsert_document(make_document(Uuid::new_v4(), "smile", "quokka smile happy"))
1309            .await
1310            .unwrap();
1311
1312        let result = store
1313            .search(TextSearchRequest {
1314                query: "quokka NEAR(smile, 5)".to_string(),
1315                mode: TextQueryMode::Plain,
1316                filter: Some(ns_filter("test_ns")),
1317                top_k: 10,
1318                snippet_chars: 64,
1319            })
1320            .await;
1321        assert!(
1322            result.is_ok(),
1323            "NEAR() query must not crash FTS5, got: {:?}",
1324            result.err()
1325        );
1326    }
1327
1328    /// M-C4 regression: searching with decimal numbers must succeed (not crash FTS5).
1329    ///
1330    /// Previously `.` was not stripped, causing FTS5 to return
1331    /// "fts5: syntax error near '.'" when queries contained decimal literals like "0.9".
1332    #[tokio::test]
1333    async fn test_search_with_decimal_query_does_not_crash() {
1334        let store = setup_memory_store("decimal_query");
1335
1336        // Insert a document that contains decimal-like content.
1337        store
1338            .upsert_document(make_document(
1339                Uuid::new_v4(),
1340                "salience thresholds",
1341                "salience 09 vs 03 comparison",
1342            ))
1343            .await
1344            .unwrap();
1345
1346        // Must not return an error — previously "fts5: syntax error near '.'"
1347        let result = store
1348            .search(TextSearchRequest {
1349                query: "salience 0.9 vs 0.3".to_string(),
1350                mode: TextQueryMode::Plain,
1351                filter: Some(ns_filter("test_ns")),
1352                top_k: 10,
1353                snippet_chars: 64,
1354            })
1355            .await;
1356        assert!(
1357            result.is_ok(),
1358            "decimal query must succeed, got error: {:?}",
1359            result.err()
1360        );
1361
1362        // Also test with version strings.
1363        let result2 = store
1364            .search(TextSearchRequest {
1365                query: "salience 0.9 vs version 1.2.3".to_string(),
1366                mode: TextQueryMode::Plain,
1367                filter: Some(ns_filter("test_ns")),
1368                top_k: 10,
1369                snippet_chars: 64,
1370            })
1371            .await;
1372        assert!(
1373            result2.is_ok(),
1374            "version-string query must succeed, got error: {:?}",
1375            result2.err()
1376        );
1377    }
1378
1379    /// #570: all FTS5 operator classes must not crash the generic text search surface.
1380    #[tokio::test]
1381    async fn test_search_with_fts_operator_matrix_does_not_crash() {
1382        let store = setup_memory_store("fts_operator_matrix");
1383
1384        store
1385            .upsert_document(make_document(
1386                Uuid::new_v4(),
1387                "tenant isolation",
1388                "multi-tenant isolation operator regression anchor content",
1389            ))
1390            .await
1391            .unwrap();
1392
1393        let cases: &[&str] = &[
1394            "\"tenant isolation\"",
1395            "Bob \"quoted\" tenant",
1396            "tenant AND isolation",
1397            "tenant OR isolation",
1398            "tenant NOT isolation",
1399            "tenant NEAR(isolation, 5)",
1400            "tenant*",
1401            "***",
1402            "tenant:isolation",
1403            "tenant ^ isolation",
1404            "(tenant isolation)",
1405            "(\"+_~!\")",
1406            "tenant:foo^bar*",
1407            "multi-tenant isolation",
1408            "   ",
1409            "",
1410        ];
1411
1412        for query in cases {
1413            let result = store
1414                .search(TextSearchRequest {
1415                    query: query.to_string(),
1416                    mode: TextQueryMode::Plain,
1417                    filter: Some(ns_filter("test_ns")),
1418                    top_k: 10,
1419                    snippet_chars: 64,
1420                })
1421                .await;
1422            assert!(
1423                result.is_ok(),
1424                "#570 DB search query {query:?} must not crash FTS5, got: {:?}",
1425                result.err()
1426            );
1427        }
1428    }
1429
1430    #[tokio::test]
1431    async fn test_score_is_bounded() {
1432        let store = setup_memory_store("score_bounds");
1433
1434        for i in 0..5 {
1435            store
1436                .upsert_document(make_document(
1437                    Uuid::new_v4(),
1438                    &format!("Doc {}", i),
1439                    &format!("This document discusses topic number {}", i),
1440                ))
1441                .await
1442                .unwrap();
1443        }
1444
1445        let hits = store
1446            .search(TextSearchRequest {
1447                query: "document topic".to_string(),
1448                mode: TextQueryMode::Plain,
1449                filter: Some(ns_filter("test_ns")),
1450                top_k: 10,
1451                snippet_chars: 64,
1452            })
1453            .await
1454            .unwrap();
1455
1456        for hit in &hits {
1457            let score = hit.score.to_f64();
1458            assert!(
1459                score > 0.0 && score <= 1.0,
1460                "score out of (0, 1] range: {}",
1461                score
1462            );
1463        }
1464
1465        for (i, hit) in hits.iter().enumerate() {
1466            assert_eq!(hit.rank, (i + 1) as u32);
1467        }
1468    }
1469
1470    #[tokio::test]
1471    async fn test_rename_namespace() {
1472        let store = setup_memory_store("rename_ns");
1473
1474        let id = Uuid::new_v4();
1475        let doc = TextDocument {
1476            subject_id: id,
1477            kind: SubstrateKind::Note,
1478            title: Some("Rename test".to_string()),
1479            body: "keyword_unique_xyz".to_string(),
1480            tags: vec![],
1481            namespace: "old_ns".to_string(),
1482            metadata: None,
1483            updated_at: Utc::now(),
1484        };
1485        store.upsert_document(doc).await.unwrap();
1486
1487        let before = store
1488            .search(TextSearchRequest {
1489                query: "keyword_unique_xyz".to_string(),
1490                mode: TextQueryMode::Plain,
1491                filter: Some(ns_filter("old_ns")),
1492                top_k: 10,
1493                snippet_chars: 64,
1494            })
1495            .await
1496            .unwrap();
1497        assert_eq!(before.len(), 1);
1498
1499        let moved = store.rename_namespace("old_ns", "new_ns").await.unwrap();
1500        assert_eq!(moved, 1);
1501
1502        let after_new = store
1503            .search(TextSearchRequest {
1504                query: "keyword_unique_xyz".to_string(),
1505                mode: TextQueryMode::Plain,
1506                filter: Some(ns_filter("new_ns")),
1507                top_k: 10,
1508                snippet_chars: 64,
1509            })
1510            .await
1511            .unwrap();
1512        assert_eq!(after_new.len(), 1);
1513
1514        let after_old = store
1515            .search(TextSearchRequest {
1516                query: "keyword_unique_xyz".to_string(),
1517                mode: TextQueryMode::Plain,
1518                filter: Some(ns_filter("old_ns")),
1519                top_k: 10,
1520                snippet_chars: 64,
1521            })
1522            .await
1523            .unwrap();
1524        assert!(after_old.is_empty());
1525    }
1526
1527    #[tokio::test]
1528    async fn test_metadata_none_roundtrip() {
1529        let store = setup_memory_store("meta_none");
1530        let id = uuid::Uuid::new_v4();
1531        let doc = TextDocument {
1532            subject_id: id,
1533            kind: SubstrateKind::Note,
1534            namespace: "test_ns".to_string(),
1535            title: None,
1536            body: "no metadata".to_string(),
1537            tags: vec![],
1538            metadata: None,
1539            updated_at: Utc::now(),
1540        };
1541        store.upsert_document(doc).await.unwrap();
1542        let fetched = store.get_document("test_ns", id).await.unwrap().unwrap();
1543        assert!(fetched.metadata.is_none());
1544    }
1545
1546    #[tokio::test]
1547    async fn test_rename_namespace_noop() {
1548        let store = setup_memory_store("rename_noop");
1549
1550        let id = Uuid::new_v4();
1551        let doc = TextDocument {
1552            subject_id: id,
1553            kind: SubstrateKind::Note,
1554            title: None,
1555            body: "noop_test_content".to_string(),
1556            tags: vec![],
1557            namespace: "same_ns".to_string(),
1558            metadata: None,
1559            updated_at: Utc::now(),
1560        };
1561        store.upsert_document(doc).await.unwrap();
1562
1563        let moved = store.rename_namespace("same_ns", "same_ns").await.unwrap();
1564        assert_eq!(moved, 0);
1565
1566        let hits = store
1567            .search(TextSearchRequest {
1568                query: "noop_test_content".to_string(),
1569                mode: TextQueryMode::Plain,
1570                filter: Some(ns_filter("same_ns")),
1571                top_k: 10,
1572                snippet_chars: 64,
1573            })
1574            .await
1575            .unwrap();
1576        assert_eq!(hits.len(), 1);
1577    }
1578
1579    /// Score normalization: all scores stay in (0, 1], and a single-hit result
1580    /// scores ≈ 1.0. This validates the normalization formula independent of
1581    /// FTS5 rank ordering guarantees (which are already tested via `rank` field).
1582    #[tokio::test]
1583    async fn test_score_normalization_range() {
1584        let store = setup_memory_store("score_range");
1585
1586        // Insert three documents; only two match the query.
1587        let id1 = Uuid::new_v4();
1588        let id2 = Uuid::new_v4();
1589        let id3 = Uuid::new_v4();
1590        store
1591            .upsert_document(make_document(
1592                id1,
1593                "normtest topic",
1594                "normtest normtest normtest",
1595            ))
1596            .await
1597            .unwrap();
1598        store
1599            .upsert_document(make_document(
1600                id2,
1601                "normtest light",
1602                "other content without the keyword",
1603            ))
1604            .await
1605            .unwrap();
1606        store
1607            .upsert_document(make_document(
1608                id3,
1609                "irrelevant title",
1610                "completely different document content",
1611            ))
1612            .await
1613            .unwrap();
1614
1615        let hits = store
1616            .search(TextSearchRequest {
1617                query: "normtest".to_string(),
1618                mode: TextQueryMode::Plain,
1619                filter: Some(ns_filter("test_ns")),
1620                top_k: 10,
1621                snippet_chars: 64,
1622            })
1623            .await
1624            .unwrap();
1625
1626        // id3 must not match; id1 and id2 should.
1627        assert!(!hits.is_empty(), "at least one doc must match");
1628        assert!(
1629            hits.iter().all(|h| h.subject_id != id3),
1630            "id3 must not appear"
1631        );
1632
1633        // All scores must be in (0, 1].
1634        for h in &hits {
1635            let s = h.score.to_f64();
1636            assert!(s > 0.0 && s <= 1.0, "score out of (0,1]: {s}");
1637        }
1638        // Rank field must be 1-indexed and contiguous.
1639        for (i, h) in hits.iter().enumerate() {
1640            assert_eq!(h.rank, (i + 1) as u32, "rank must equal position+1");
1641        }
1642        // Best hit (rank=1) must score ≈ 1.0 — normalization anchors the best
1643        // rank to 1.0 regardless of absolute BM25 magnitude.
1644        assert!(
1645            hits[0].score.to_f64() > 0.99,
1646            "top hit must score ≈ 1.0, got {}",
1647            hits[0].score.to_f64()
1648        );
1649
1650        // Single-hit result: the only match scores ≈ 1.0 (degenerate case:
1651        // range == 0 → all hits get 1.0).
1652        let single_id = Uuid::new_v4();
1653        store
1654            .upsert_document(make_document(
1655                single_id,
1656                "xqzplurp_unique_marker",
1657                "xqzplurp_unique_marker body",
1658            ))
1659            .await
1660            .unwrap();
1661        let single = store
1662            .search(TextSearchRequest {
1663                query: "xqzplurp_unique_marker".to_string(),
1664                mode: TextQueryMode::Plain,
1665                filter: Some(ns_filter("test_ns")),
1666                top_k: 10,
1667                snippet_chars: 64,
1668            })
1669            .await
1670            .unwrap();
1671        assert_eq!(single.len(), 1);
1672        assert!(
1673            single[0].score.to_f64() > 0.99,
1674            "single-hit must score ≈ 1.0, got {}",
1675            single[0].score.to_f64()
1676        );
1677    }
1678}