Skip to main content

sqlite_graphrag/storage/
memories.rs

1//! Persistence layer for the `memories` table and its vector companion.
2//!
3//! Functions here encapsulate every SQL statement touching `memories`,
4//! `vec_memories` and the FTS5 `fts_memories` shadow table. Callers receive
5//! typed [`MemoryRow`] or [`NewMemory`] values and never build SQL strings.
6
7use crate::embedder::f32_to_bytes;
8use crate::errors::AppError;
9use crate::storage::utils::with_busy_retry;
10use rusqlite::{params, Connection};
11use serde::{Deserialize, Serialize};
12
13/// Input payload for inserting or updating a memory.
14///
15/// `body_hash` must be the BLAKE3 digest of `body`. The `metadata` field is
16/// stored as a TEXT column containing JSON.
17#[derive(Debug, Serialize, Deserialize)]
18pub struct NewMemory {
19    pub namespace: String,
20    pub name: String,
21    pub memory_type: String,
22    pub description: String,
23    pub body: String,
24    pub body_hash: String,
25    pub session_id: Option<String>,
26    pub source: String,
27    pub metadata: serde_json::Value,
28}
29
30/// Fully materialized row from the `memories` table.
31///
32/// Returned by [`read_by_name`], [`read_full`], [`list`] and [`fts_search`].
33/// The `metadata` field is kept as a JSON string to avoid double parsing.
34#[derive(Debug, Serialize)]
35pub struct MemoryRow {
36    pub id: i64,
37    pub namespace: String,
38    pub name: String,
39    pub memory_type: String,
40    pub description: String,
41    pub body: String,
42    pub body_hash: String,
43    pub session_id: Option<String>,
44    pub source: String,
45    pub metadata: String,
46    pub created_at: i64,
47    pub updated_at: i64,
48    /// Unix epoch when the memory was soft-deleted, or `None` for active memories.
49    /// Surfaced in `list --include-deleted --json` so LLM consumers can distinguish
50    /// active from soft-deleted rows without a second SQL query (v1.0.37 H7+M9 fix).
51    #[serde(skip_serializing_if = "Option::is_none")]
52    pub deleted_at: Option<i64>,
53}
54
55/// Finds a live memory by `(namespace, name)` and returns key metadata.
56///
57/// # Arguments
58///
59/// - `conn` — open SQLite connection configured with the project pragmas.
60/// - `namespace` — resolved namespace for the lookup.
61/// - `name` — kebab-case memory name.
62///
63/// # Returns
64///
65/// `Ok(Some((id, updated_at, max_version)))` when the memory exists and is
66/// not soft-deleted, `Ok(None)` otherwise.
67///
68/// # Errors
69///
70/// Returns `Err(AppError::Database)` on any `rusqlite` failure.
71pub fn find_by_name(
72    conn: &Connection,
73    namespace: &str,
74    name: &str,
75) -> Result<Option<(i64, i64, i64)>, AppError> {
76    let mut stmt = conn.prepare_cached(
77        "SELECT m.id, m.updated_at, COALESCE(MAX(v.version), 0)
78         FROM memories m
79         LEFT JOIN memory_versions v ON v.memory_id = m.id
80         WHERE m.namespace = ?1 AND m.name = ?2 AND m.deleted_at IS NULL
81         GROUP BY m.id",
82    )?;
83    let result = stmt.query_row(params![namespace, name], |r| {
84        Ok((
85            r.get::<_, i64>(0)?,
86            r.get::<_, i64>(1)?,
87            r.get::<_, i64>(2)?,
88        ))
89    });
90    match result {
91        Ok(row) => Ok(Some(row)),
92        Err(rusqlite::Error::QueryReturnedNoRows) => Ok(None),
93        Err(e) => Err(AppError::Database(e)),
94    }
95}
96
97/// Looks up a live memory by exact `body_hash` within a namespace.
98///
99/// Used during `remember` to short-circuit semantic duplicates before
100/// spending an embedding call.
101///
102/// # Returns
103///
104/// `Ok(Some(id))` when a live memory with the same hash exists,
105/// `Ok(None)` otherwise.
106///
107/// # Errors
108///
109/// Returns `Err(AppError::Database)` on any `rusqlite` failure.
110pub fn find_by_hash(
111    conn: &Connection,
112    namespace: &str,
113    body_hash: &str,
114) -> Result<Option<i64>, AppError> {
115    let mut stmt = conn.prepare_cached(
116        "SELECT id FROM memories WHERE namespace = ?1 AND body_hash = ?2 AND deleted_at IS NULL",
117    )?;
118    match stmt.query_row(params![namespace, body_hash], |r| r.get(0)) {
119        Ok(id) => Ok(Some(id)),
120        Err(rusqlite::Error::QueryReturnedNoRows) => Ok(None),
121        Err(e) => Err(AppError::Database(e)),
122    }
123}
124
125/// Inserts a new row into the `memories` table.
126///
127/// # Arguments
128///
129/// - `conn` — active SQLite connection, typically inside a transaction.
130/// - `m` — validated payload including `body_hash` and serialized metadata.
131///
132/// # Returns
133///
134/// The `rowid` assigned to the newly inserted memory.
135///
136/// # Errors
137///
138/// Returns `Err(AppError::Database)` on insertion failure and
139/// `Err(AppError::Json)` if metadata serialization fails.
140pub fn insert(conn: &Connection, m: &NewMemory) -> Result<i64, AppError> {
141    conn.execute(
142        "INSERT INTO memories (namespace, name, type, description, body, body_hash, session_id, source, metadata)
143         VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9)",
144        params![
145            m.namespace, m.name, m.memory_type, m.description, m.body,
146            m.body_hash, m.session_id, m.source,
147            serde_json::to_string(&m.metadata)?
148        ],
149    )?;
150    Ok(conn.last_insert_rowid())
151}
152
153/// Updates an existing memory optionally guarded by optimistic concurrency.
154///
155/// When `expected_updated_at` is `Some(ts)` the row is only updated if its
156/// current `updated_at` equals `ts`. This protects concurrent `edit` calls
157/// from silently clobbering each other.
158///
159/// # Returns
160///
161/// `Ok(true)` when exactly one row was updated, `Ok(false)` when the
162/// optimistic check failed or the memory does not exist.
163///
164/// # Errors
165///
166/// Returns `Err(AppError::Database)` on any `rusqlite` failure.
167pub fn update(
168    conn: &Connection,
169    id: i64,
170    m: &NewMemory,
171    expected_updated_at: Option<i64>,
172) -> Result<bool, AppError> {
173    let affected = if let Some(ts) = expected_updated_at {
174        conn.execute(
175            "UPDATE memories SET type=?2, description=?3, body=?4, body_hash=?5,
176             session_id=?6, source=?7, metadata=?8
177             WHERE id=?1 AND updated_at=?9 AND deleted_at IS NULL",
178            params![
179                id,
180                m.memory_type,
181                m.description,
182                m.body,
183                m.body_hash,
184                m.session_id,
185                m.source,
186                serde_json::to_string(&m.metadata)?,
187                ts
188            ],
189        )?
190    } else {
191        conn.execute(
192            "UPDATE memories SET type=?2, description=?3, body=?4, body_hash=?5,
193             session_id=?6, source=?7, metadata=?8
194             WHERE id=?1 AND deleted_at IS NULL",
195            params![
196                id,
197                m.memory_type,
198                m.description,
199                m.body,
200                m.body_hash,
201                m.session_id,
202                m.source,
203                serde_json::to_string(&m.metadata)?
204            ],
205        )?
206    };
207    Ok(affected == 1)
208}
209
210/// Replaces the vector row for a memory in `vec_memories`.
211///
212/// `sqlite-vec` virtual tables do not implement `INSERT OR REPLACE`, so the
213/// existing row is deleted first and a fresh vector is inserted. Callers
214/// must pass an `embedding` with length [`crate::constants::EMBEDDING_DIM`].
215///
216/// # Errors
217///
218/// Returns `Err(AppError::Database)` on any `rusqlite` failure.
219pub fn upsert_vec(
220    conn: &Connection,
221    memory_id: i64,
222    namespace: &str,
223    memory_type: &str,
224    embedding: &[f32],
225    name: &str,
226    snippet: &str,
227) -> Result<(), AppError> {
228    // sqlite-vec virtual tables do not support INSERT OR REPLACE semantics.
229    // Must delete the existing row first, then insert.  Both statements are
230    // wrapped in `with_busy_retry` because WAL-mode concurrent writers can
231    // cause SQLITE_BUSY on vec0 virtual table writes.
232    let embedding_bytes = f32_to_bytes(embedding);
233    with_busy_retry(|| {
234        conn.execute(
235            "DELETE FROM vec_memories WHERE memory_id = ?1",
236            params![memory_id],
237        )?;
238        conn.execute(
239            "INSERT INTO vec_memories(memory_id, namespace, type, embedding, name, snippet)
240             VALUES (?1, ?2, ?3, ?4, ?5, ?6)",
241            params![
242                memory_id,
243                namespace,
244                memory_type,
245                &embedding_bytes,
246                name,
247                snippet
248            ],
249        )?;
250        Ok(())
251    })
252}
253
254/// Deletes the vector row for `memory_id` from `vec_memories`.
255///
256/// Called during `forget` and `purge` to keep the vector table consistent
257/// with the logical state of `memories`.
258///
259/// # Errors
260///
261/// Returns `Err(AppError::Database)` on any `rusqlite` failure.
262pub fn delete_vec(conn: &Connection, memory_id: i64) -> Result<(), AppError> {
263    conn.execute(
264        "DELETE FROM vec_memories WHERE memory_id = ?1",
265        params![memory_id],
266    )?;
267    Ok(())
268}
269
270/// Fetches a live memory by `(namespace, name)` and returns all columns.
271///
272/// # Returns
273///
274/// `Ok(Some(row))` when found, `Ok(None)` when missing or soft-deleted.
275///
276/// # Errors
277///
278/// Returns `Err(AppError::Database)` on any `rusqlite` failure.
279pub fn read_by_name(
280    conn: &Connection,
281    namespace: &str,
282    name: &str,
283) -> Result<Option<MemoryRow>, AppError> {
284    let mut stmt = conn.prepare_cached(
285        "SELECT id, namespace, name, type, description, body, body_hash,
286                session_id, source, metadata, created_at, updated_at, deleted_at
287         FROM memories WHERE namespace=?1 AND name=?2 AND deleted_at IS NULL",
288    )?;
289    match stmt.query_row(params![namespace, name], |r| {
290        Ok(MemoryRow {
291            id: r.get(0)?,
292            namespace: r.get(1)?,
293            name: r.get(2)?,
294            memory_type: r.get(3)?,
295            description: r.get(4)?,
296            body: r.get(5)?,
297            body_hash: r.get(6)?,
298            session_id: r.get(7)?,
299            source: r.get(8)?,
300            metadata: r.get(9)?,
301            created_at: r.get(10)?,
302            updated_at: r.get(11)?,
303            deleted_at: r.get(12)?,
304        })
305    }) {
306        Ok(m) => Ok(Some(m)),
307        Err(rusqlite::Error::QueryReturnedNoRows) => Ok(None),
308        Err(e) => Err(AppError::Database(e)),
309    }
310}
311
312/// Soft-deletes a memory by setting `deleted_at = unixepoch()`.
313///
314/// Versions and chunks are preserved so `restore` can undo the operation
315/// until a subsequent `purge` reclaims the storage permanently.
316///
317/// # Returns
318///
319/// `Ok(true)` when a live memory was soft-deleted, `Ok(false)` when no
320/// matching live row existed.
321///
322/// # Errors
323///
324/// Returns `Err(AppError::Database)` on any `rusqlite` failure.
325pub fn soft_delete(conn: &Connection, namespace: &str, name: &str) -> Result<bool, AppError> {
326    let affected = conn.execute(
327        "UPDATE memories SET deleted_at = unixepoch() WHERE namespace=?1 AND name=?2 AND deleted_at IS NULL",
328        params![namespace, name],
329    )?;
330    Ok(affected == 1)
331}
332
333/// Lists live memories in a namespace ordered by `updated_at` descending.
334///
335/// # Arguments
336///
337/// - `memory_type` — optional filter on the `type` column.
338/// - `limit` / `offset` — standard pagination controls in rows.
339///
340/// # Errors
341///
342/// Returns `Err(AppError::Database)` on any `rusqlite` failure.
343pub fn list(
344    conn: &Connection,
345    namespace: &str,
346    memory_type: Option<&str>,
347    limit: usize,
348    offset: usize,
349    include_deleted: bool,
350) -> Result<Vec<MemoryRow>, AppError> {
351    let deleted_clause = if include_deleted {
352        ""
353    } else {
354        " AND deleted_at IS NULL"
355    };
356    if let Some(mt) = memory_type {
357        let sql = format!(
358            "SELECT id, namespace, name, type, description, body, body_hash,
359                    session_id, source, metadata, created_at, updated_at, deleted_at
360             FROM memories WHERE namespace=?1 AND type=?2{deleted_clause}
361             ORDER BY updated_at DESC LIMIT ?3 OFFSET ?4"
362        );
363        let mut stmt = conn.prepare(&sql)?;
364        let rows = stmt
365            .query_map(params![namespace, mt, limit as i64, offset as i64], |r| {
366                Ok(MemoryRow {
367                    id: r.get(0)?,
368                    namespace: r.get(1)?,
369                    name: r.get(2)?,
370                    memory_type: r.get(3)?,
371                    description: r.get(4)?,
372                    body: r.get(5)?,
373                    body_hash: r.get(6)?,
374                    session_id: r.get(7)?,
375                    source: r.get(8)?,
376                    metadata: r.get(9)?,
377                    created_at: r.get(10)?,
378                    updated_at: r.get(11)?,
379                    deleted_at: r.get(12)?,
380                })
381            })?
382            .collect::<Result<Vec<_>, _>>()?;
383        Ok(rows)
384    } else {
385        let sql = format!(
386            "SELECT id, namespace, name, type, description, body, body_hash,
387                    session_id, source, metadata, created_at, updated_at, deleted_at
388             FROM memories WHERE namespace=?1{deleted_clause}
389             ORDER BY updated_at DESC LIMIT ?2 OFFSET ?3"
390        );
391        let mut stmt = conn.prepare(&sql)?;
392        let rows = stmt
393            .query_map(params![namespace, limit as i64, offset as i64], |r| {
394                Ok(MemoryRow {
395                    id: r.get(0)?,
396                    namespace: r.get(1)?,
397                    name: r.get(2)?,
398                    memory_type: r.get(3)?,
399                    description: r.get(4)?,
400                    body: r.get(5)?,
401                    body_hash: r.get(6)?,
402                    session_id: r.get(7)?,
403                    source: r.get(8)?,
404                    metadata: r.get(9)?,
405                    created_at: r.get(10)?,
406                    updated_at: r.get(11)?,
407                    deleted_at: r.get(12)?,
408                })
409            })?
410            .collect::<Result<Vec<_>, _>>()?;
411        Ok(rows)
412    }
413}
414
415/// Runs a KNN search over `vec_memories`, optionally restricted to namespaces.
416///
417/// # Arguments
418///
419/// - `embedding` — query vector of length [`crate::constants::EMBEDDING_DIM`].
420/// - `namespaces` — namespaces to search. Empty slice means "all namespaces".
421/// - `memory_type` — optional filter on the `type` column.
422/// - `k` — maximum number of hits to return.
423///
424/// # Returns
425///
426/// A vector of `(memory_id, distance)` pairs sorted by ascending distance.
427///
428/// # Errors
429///
430/// Returns `Err(AppError::Database)` on any `rusqlite` failure.
431pub fn knn_search(
432    conn: &Connection,
433    embedding: &[f32],
434    namespaces: &[String],
435    memory_type: Option<&str>,
436    k: usize,
437) -> Result<Vec<(i64, f32)>, AppError> {
438    let bytes = f32_to_bytes(embedding);
439
440    match namespaces.len() {
441        0 => {
442            // No namespace filter — search all namespaces.
443            if let Some(mt) = memory_type {
444                let mut stmt = conn.prepare(
445                    "SELECT memory_id, distance FROM vec_memories \
446                     WHERE embedding MATCH ?1 AND type = ?2 \
447                     ORDER BY distance LIMIT ?3",
448                )?;
449                let rows = stmt
450                    .query_map(params![bytes, mt, k as i64], |r| {
451                        Ok((r.get::<_, i64>(0)?, r.get::<_, f32>(1)?))
452                    })?
453                    .collect::<Result<Vec<_>, _>>()?;
454                Ok(rows)
455            } else {
456                let mut stmt = conn.prepare(
457                    "SELECT memory_id, distance FROM vec_memories \
458                     WHERE embedding MATCH ?1 \
459                     ORDER BY distance LIMIT ?2",
460                )?;
461                let rows = stmt
462                    .query_map(params![bytes, k as i64], |r| {
463                        Ok((r.get::<_, i64>(0)?, r.get::<_, f32>(1)?))
464                    })?
465                    .collect::<Result<Vec<_>, _>>()?;
466                Ok(rows)
467            }
468        }
469        1 => {
470            // Fast single-namespace path (preserved from previous implementation).
471            let ns = &namespaces[0];
472            if let Some(mt) = memory_type {
473                let mut stmt = conn.prepare(
474                    "SELECT memory_id, distance FROM vec_memories \
475                     WHERE embedding MATCH ?1 AND namespace = ?2 AND type = ?3 \
476                     ORDER BY distance LIMIT ?4",
477                )?;
478                let rows = stmt
479                    .query_map(params![bytes, ns, mt, k as i64], |r| {
480                        Ok((r.get::<_, i64>(0)?, r.get::<_, f32>(1)?))
481                    })?
482                    .collect::<Result<Vec<_>, _>>()?;
483                Ok(rows)
484            } else {
485                let mut stmt = conn.prepare(
486                    "SELECT memory_id, distance FROM vec_memories \
487                     WHERE embedding MATCH ?1 AND namespace = ?2 \
488                     ORDER BY distance LIMIT ?3",
489                )?;
490                let rows = stmt
491                    .query_map(params![bytes, ns, k as i64], |r| {
492                        Ok((r.get::<_, i64>(0)?, r.get::<_, f32>(1)?))
493                    })?
494                    .collect::<Result<Vec<_>, _>>()?;
495                Ok(rows)
496            }
497        }
498        _ => {
499            // Multiple explicit namespaces: build IN clause with positional placeholders.
500            // rusqlite does not support array binding, so we generate "?,?,..." manually.
501            let placeholders = (0..namespaces.len())
502                .map(|_| "?")
503                .collect::<Vec<_>>()
504                .join(",");
505            if let Some(mt) = memory_type {
506                let query = format!(
507                    "SELECT memory_id, distance FROM vec_memories \
508                     WHERE embedding MATCH ? AND type = ? AND namespace IN ({placeholders}) \
509                     ORDER BY distance LIMIT ?"
510                );
511                let mut stmt = conn.prepare(&query)?;
512                // Params: [bytes, mt, ns0, ns1, ..., k]
513                let mut raw_params: Vec<Box<dyn rusqlite::ToSql>> =
514                    vec![Box::new(bytes), Box::new(mt.to_string())];
515                for ns in namespaces {
516                    raw_params.push(Box::new(ns.clone()));
517                }
518                raw_params.push(Box::new(k as i64));
519                let param_refs: Vec<&dyn rusqlite::ToSql> =
520                    raw_params.iter().map(|b| b.as_ref()).collect();
521                let rows = stmt
522                    .query_map(param_refs.as_slice(), |r| {
523                        Ok((r.get::<_, i64>(0)?, r.get::<_, f32>(1)?))
524                    })?
525                    .collect::<Result<Vec<_>, _>>()?;
526                Ok(rows)
527            } else {
528                let query = format!(
529                    "SELECT memory_id, distance FROM vec_memories \
530                     WHERE embedding MATCH ? AND namespace IN ({placeholders}) \
531                     ORDER BY distance LIMIT ?"
532                );
533                let mut stmt = conn.prepare(&query)?;
534                // Params: [bytes, ns0, ns1, ..., k]
535                let mut raw_params: Vec<Box<dyn rusqlite::ToSql>> = vec![Box::new(bytes)];
536                for ns in namespaces {
537                    raw_params.push(Box::new(ns.clone()));
538                }
539                raw_params.push(Box::new(k as i64));
540                let param_refs: Vec<&dyn rusqlite::ToSql> =
541                    raw_params.iter().map(|b| b.as_ref()).collect();
542                let rows = stmt
543                    .query_map(param_refs.as_slice(), |r| {
544                        Ok((r.get::<_, i64>(0)?, r.get::<_, f32>(1)?))
545                    })?
546                    .collect::<Result<Vec<_>, _>>()?;
547                Ok(rows)
548            }
549        }
550    }
551}
552
553/// Fetches a live memory by primary key and returns all columns.
554///
555/// Mirrors [`read_by_name`] but keyed on `rowid` for use after a KNN search.
556///
557/// # Errors
558///
559/// Returns `Err(AppError::Database)` on any `rusqlite` failure.
560pub fn read_full(conn: &Connection, memory_id: i64) -> Result<Option<MemoryRow>, AppError> {
561    let mut stmt = conn.prepare_cached(
562        "SELECT id, namespace, name, type, description, body, body_hash,
563                session_id, source, metadata, created_at, updated_at, deleted_at
564         FROM memories WHERE id=?1 AND deleted_at IS NULL",
565    )?;
566    match stmt.query_row(params![memory_id], |r| {
567        Ok(MemoryRow {
568            id: r.get(0)?,
569            namespace: r.get(1)?,
570            name: r.get(2)?,
571            memory_type: r.get(3)?,
572            description: r.get(4)?,
573            body: r.get(5)?,
574            body_hash: r.get(6)?,
575            session_id: r.get(7)?,
576            source: r.get(8)?,
577            metadata: r.get(9)?,
578            created_at: r.get(10)?,
579            updated_at: r.get(11)?,
580            deleted_at: r.get(12)?,
581        })
582    }) {
583        Ok(m) => Ok(Some(m)),
584        Err(rusqlite::Error::QueryReturnedNoRows) => Ok(None),
585        Err(e) => Err(AppError::Database(e)),
586    }
587}
588
589/// Fetches all memory_ids in a namespace that are soft-deleted and whose
590/// `deleted_at` is older than `before_ts` (unix epoch seconds).
591///
592/// Used by `purge` to collect stale rows for permanent deletion.
593///
594/// # Errors
595///
596/// Returns `Err(AppError::Database)` on any `rusqlite` failure.
597pub fn list_deleted_before(
598    conn: &Connection,
599    namespace: &str,
600    before_ts: i64,
601) -> Result<Vec<i64>, AppError> {
602    let mut stmt = conn.prepare_cached(
603        "SELECT id FROM memories WHERE namespace = ?1 AND deleted_at IS NOT NULL AND deleted_at < ?2",
604    )?;
605    let ids = stmt
606        .query_map(params![namespace, before_ts], |r| r.get::<_, i64>(0))?
607        .collect::<Result<Vec<_>, _>>()?;
608    Ok(ids)
609}
610
611/// Preprocesses a raw user query for FTS5 `MATCH`.
612///
613/// Technical separators (`-`, `.`, `_`, `/`) are treated as word boundaries by
614/// the `unicode61` tokenizer.  When the query contains any of these characters
615/// the function builds a compound FTS5 expression:
616///   1. A phrase query with the separated tokens (exact compound matching).
617///   2. Individual prefix terms joined with OR (broader recall).
618///
619/// Queries without separators keep the original `term*` prefix behaviour.
620fn preprocess_fts_query(raw: &str) -> String {
621    const SEPARATORS: &[char] = &['-', '.', '_', '/'];
622    const FTS5_SYNTAX: &[char] = &['"', '*', '(', ')', '^', ':'];
623    const FTS5_KEYWORDS: &[&str] = &["OR", "AND", "NOT", "NEAR"];
624
625    let sanitized: String = raw.chars().filter(|c| !FTS5_SYNTAX.contains(c)).collect();
626    let trimmed = sanitized.trim();
627    if trimmed.is_empty() {
628        return String::new();
629    }
630
631    let is_fts_keyword = |t: &str| FTS5_KEYWORDS.iter().any(|kw| kw.eq_ignore_ascii_case(t));
632
633    if !trimmed.chars().any(|c| SEPARATORS.contains(&c)) {
634        return trimmed
635            .split_whitespace()
636            .filter(|t| !is_fts_keyword(t))
637            .map(|t| format!("{t}*"))
638            .collect::<Vec<_>>()
639            .join(" ");
640    }
641    let tokens: Vec<&str> = trimmed
642        .split(|c: char| SEPARATORS.contains(&c) || c.is_whitespace())
643        .filter(|t| !t.is_empty() && !is_fts_keyword(t))
644        .collect();
645    if tokens.is_empty() {
646        return String::new();
647    }
648    let phrase = format!("\"{}\"", tokens.join(" "));
649    let prefix_terms: Vec<String> = tokens.iter().map(|t| format!("{t}*")).collect();
650    format!("{phrase} OR {}", prefix_terms.join(" OR "))
651}
652
653/// Executes an FTS5 search against `fts_memories` with query preprocessing.
654///
655/// Technical separators in the query are converted to phrase + prefix OR
656/// expressions so compound terms like `graphrag-precompact.sh` match correctly.
657///
658/// # Errors
659///
660/// Returns `Err(AppError::Database)` on any `rusqlite` failure.
661pub fn fts_search(
662    conn: &Connection,
663    query: &str,
664    namespace: &str,
665    memory_type: Option<&str>,
666    limit: usize,
667) -> Result<Vec<MemoryRow>, AppError> {
668    let fts_query = preprocess_fts_query(query);
669    if let Some(mt) = memory_type {
670        let mut stmt = conn.prepare(
671            "SELECT m.id, m.namespace, m.name, m.type, m.description, m.body, m.body_hash,
672                    m.session_id, m.source, m.metadata, m.created_at, m.updated_at, m.deleted_at
673             FROM fts_memories fts
674             JOIN memories m ON m.id = fts.rowid
675             WHERE fts_memories MATCH ?1 AND m.namespace = ?2 AND m.type = ?3 AND m.deleted_at IS NULL
676             ORDER BY rank LIMIT ?4",
677        )?;
678        let rows = stmt
679            .query_map(params![fts_query, namespace, mt, limit as i64], |r| {
680                Ok(MemoryRow {
681                    id: r.get(0)?,
682                    namespace: r.get(1)?,
683                    name: r.get(2)?,
684                    memory_type: r.get(3)?,
685                    description: r.get(4)?,
686                    body: r.get(5)?,
687                    body_hash: r.get(6)?,
688                    session_id: r.get(7)?,
689                    source: r.get(8)?,
690                    metadata: r.get(9)?,
691                    created_at: r.get(10)?,
692                    updated_at: r.get(11)?,
693                    deleted_at: r.get(12)?,
694                })
695            })?
696            .collect::<Result<Vec<_>, _>>()?;
697        Ok(rows)
698    } else {
699        let mut stmt = conn.prepare(
700            "SELECT m.id, m.namespace, m.name, m.type, m.description, m.body, m.body_hash,
701                    m.session_id, m.source, m.metadata, m.created_at, m.updated_at, m.deleted_at
702             FROM fts_memories fts
703             JOIN memories m ON m.id = fts.rowid
704             WHERE fts_memories MATCH ?1 AND m.namespace = ?2 AND m.deleted_at IS NULL
705             ORDER BY rank LIMIT ?3",
706        )?;
707        let rows = stmt
708            .query_map(params![fts_query, namespace, limit as i64], |r| {
709                Ok(MemoryRow {
710                    id: r.get(0)?,
711                    namespace: r.get(1)?,
712                    name: r.get(2)?,
713                    memory_type: r.get(3)?,
714                    description: r.get(4)?,
715                    body: r.get(5)?,
716                    body_hash: r.get(6)?,
717                    session_id: r.get(7)?,
718                    source: r.get(8)?,
719                    metadata: r.get(9)?,
720                    created_at: r.get(10)?,
721                    updated_at: r.get(11)?,
722                    deleted_at: r.get(12)?,
723                })
724            })?
725            .collect::<Result<Vec<_>, _>>()?;
726        Ok(rows)
727    }
728}
729
730#[cfg(test)]
731mod tests {
732    use super::*;
733    use rusqlite::Connection;
734
735    type TestResult = Result<(), Box<dyn std::error::Error>>;
736
737    fn setup_conn() -> Result<Connection, Box<dyn std::error::Error>> {
738        crate::storage::connection::register_vec_extension();
739        let mut conn = Connection::open_in_memory()?;
740        conn.execute_batch(
741            "PRAGMA foreign_keys = ON;
742             PRAGMA temp_store = MEMORY;",
743        )?;
744        crate::migrations::runner().run(&mut conn)?;
745        Ok(conn)
746    }
747
748    fn new_memory(name: &str) -> NewMemory {
749        NewMemory {
750            namespace: "global".to_string(),
751            name: name.to_string(),
752            memory_type: "user".to_string(),
753            description: "descricao de teste".to_string(),
754            body: "test memory body".to_string(),
755            body_hash: format!("hash-{name}"),
756            session_id: None,
757            source: "agent".to_string(),
758            metadata: serde_json::json!({}),
759        }
760    }
761
762    #[test]
763    fn insert_and_find_by_name_return_id() -> TestResult {
764        let conn = setup_conn()?;
765        let m = new_memory("mem-alpha");
766        let id = insert(&conn, &m)?;
767        assert!(id > 0);
768
769        let found = find_by_name(&conn, "global", "mem-alpha")?;
770        assert!(found.is_some());
771        let (found_id, _, _) = found.ok_or("mem-alpha should exist")?;
772        assert_eq!(found_id, id);
773        Ok(())
774    }
775
776    #[test]
777    fn find_by_name_returns_none_when_not_found() -> TestResult {
778        let conn = setup_conn()?;
779        let result = find_by_name(&conn, "global", "inexistente")?;
780        assert!(result.is_none());
781        Ok(())
782    }
783
784    #[test]
785    fn find_by_hash_returns_correct_id() -> TestResult {
786        let conn = setup_conn()?;
787        let m = new_memory("mem-hash");
788        let id = insert(&conn, &m)?;
789
790        let found = find_by_hash(&conn, "global", "hash-mem-hash")?;
791        assert_eq!(found, Some(id));
792        Ok(())
793    }
794
795    #[test]
796    fn find_by_hash_returns_none_when_hash_not_found() -> TestResult {
797        let conn = setup_conn()?;
798        let result = find_by_hash(&conn, "global", "hash-inexistente")?;
799        assert!(result.is_none());
800        Ok(())
801    }
802
803    #[test]
804    fn find_by_hash_ignores_different_namespace() -> TestResult {
805        let conn = setup_conn()?;
806        let m = new_memory("mem-ns");
807        insert(&conn, &m)?;
808
809        let result = find_by_hash(&conn, "outro-namespace", "hash-mem-ns")?;
810        assert!(result.is_none());
811        Ok(())
812    }
813
814    #[test]
815    fn read_by_name_returns_full_memory() -> TestResult {
816        let conn = setup_conn()?;
817        let m = new_memory("mem-read");
818        let id = insert(&conn, &m)?;
819
820        let row = read_by_name(&conn, "global", "mem-read")?.ok_or("mem-read should exist")?;
821        assert_eq!(row.id, id);
822        assert_eq!(row.name, "mem-read");
823        assert_eq!(row.memory_type, "user");
824        assert_eq!(row.body, "test memory body");
825        assert_eq!(row.namespace, "global");
826        Ok(())
827    }
828
829    #[test]
830    fn read_by_name_returns_none_for_missing() -> TestResult {
831        let conn = setup_conn()?;
832        let result = read_by_name(&conn, "global", "nao-existe")?;
833        assert!(result.is_none());
834        Ok(())
835    }
836
837    #[test]
838    fn read_full_by_id_returns_memory() -> TestResult {
839        let conn = setup_conn()?;
840        let m = new_memory("mem-full");
841        let id = insert(&conn, &m)?;
842
843        let row = read_full(&conn, id)?.ok_or("mem-full should exist")?;
844        assert_eq!(row.id, id);
845        assert_eq!(row.name, "mem-full");
846        Ok(())
847    }
848
849    #[test]
850    fn read_full_returns_none_for_missing_id() -> TestResult {
851        let conn = setup_conn()?;
852        let result = read_full(&conn, 9999)?;
853        assert!(result.is_none());
854        Ok(())
855    }
856
857    #[test]
858    fn update_without_optimism_modifies_fields() -> TestResult {
859        let conn = setup_conn()?;
860        let m = new_memory("mem-upd");
861        let id = insert(&conn, &m)?;
862
863        let mut m2 = new_memory("mem-upd");
864        m2.body = "updated body".to_string();
865        m2.body_hash = "hash-novo".to_string();
866        let ok = update(&conn, id, &m2, None)?;
867        assert!(ok);
868
869        let row = read_full(&conn, id)?.ok_or("mem-upd should exist")?;
870        assert_eq!(row.body, "updated body");
871        assert_eq!(row.body_hash, "hash-novo");
872        Ok(())
873    }
874
875    #[test]
876    fn update_with_correct_expected_updated_at_succeeds() -> TestResult {
877        let conn = setup_conn()?;
878        let m = new_memory("mem-opt");
879        let id = insert(&conn, &m)?;
880
881        let (_, updated_at, _) =
882            find_by_name(&conn, "global", "mem-opt")?.ok_or("mem-opt should exist")?;
883
884        let mut m2 = new_memory("mem-opt");
885        m2.body = "optimistic body".to_string();
886        m2.body_hash = "hash-optimistic".to_string();
887        let ok = update(&conn, id, &m2, Some(updated_at))?;
888        assert!(ok);
889
890        let row = read_full(&conn, id)?.ok_or("mem-opt should exist after update")?;
891        assert_eq!(row.body, "optimistic body");
892        Ok(())
893    }
894
895    #[test]
896    fn update_with_wrong_expected_updated_at_returns_false() -> TestResult {
897        let conn = setup_conn()?;
898        let m = new_memory("mem-conflict");
899        let id = insert(&conn, &m)?;
900
901        let mut m2 = new_memory("mem-conflict");
902        m2.body = "must not appear".to_string();
903        m2.body_hash = "hash-x".to_string();
904        let ok = update(&conn, id, &m2, Some(0))?;
905        assert!(!ok);
906
907        let row = read_full(&conn, id)?.ok_or("mem-conflict should exist")?;
908        assert_eq!(row.body, "test memory body");
909        Ok(())
910    }
911
912    #[test]
913    fn update_missing_id_returns_false() -> TestResult {
914        let conn = setup_conn()?;
915        let m = new_memory("fantasma");
916        let ok = update(&conn, 9999, &m, None)?;
917        assert!(!ok);
918        Ok(())
919    }
920
921    #[test]
922    fn soft_delete_marks_deleted_at() -> TestResult {
923        let conn = setup_conn()?;
924        let m = new_memory("mem-del");
925        insert(&conn, &m)?;
926
927        let ok = soft_delete(&conn, "global", "mem-del")?;
928        assert!(ok);
929
930        let result = find_by_name(&conn, "global", "mem-del")?;
931        assert!(result.is_none());
932
933        let result_read = read_by_name(&conn, "global", "mem-del")?;
934        assert!(result_read.is_none());
935        Ok(())
936    }
937
938    #[test]
939    fn soft_delete_returns_false_when_not_found() -> TestResult {
940        let conn = setup_conn()?;
941        let ok = soft_delete(&conn, "global", "nao-existe")?;
942        assert!(!ok);
943        Ok(())
944    }
945
946    #[test]
947    fn double_soft_delete_returns_false_on_second_call() -> TestResult {
948        let conn = setup_conn()?;
949        let m = new_memory("mem-del2");
950        insert(&conn, &m)?;
951
952        soft_delete(&conn, "global", "mem-del2")?;
953        let ok = soft_delete(&conn, "global", "mem-del2")?;
954        assert!(!ok);
955        Ok(())
956    }
957
958    #[test]
959    fn list_returns_memories_from_namespace() -> TestResult {
960        let conn = setup_conn()?;
961        insert(&conn, &new_memory("mem-list-a"))?;
962        insert(&conn, &new_memory("mem-list-b"))?;
963
964        let rows = list(&conn, "global", None, 10, 0, false)?;
965        assert!(rows.len() >= 2);
966        let nomes: Vec<_> = rows.iter().map(|r| r.name.as_str()).collect();
967        assert!(nomes.contains(&"mem-list-a"));
968        assert!(nomes.contains(&"mem-list-b"));
969        Ok(())
970    }
971
972    #[test]
973    fn list_with_type_filter_returns_only_correct_type() -> TestResult {
974        let conn = setup_conn()?;
975        insert(&conn, &new_memory("mem-user"))?;
976
977        let mut m2 = new_memory("mem-feedback");
978        m2.memory_type = "feedback".to_string();
979        insert(&conn, &m2)?;
980
981        let rows_user = list(&conn, "global", Some("user"), 10, 0, false)?;
982        assert!(rows_user.iter().all(|r| r.memory_type == "user"));
983
984        let rows_fb = list(&conn, "global", Some("feedback"), 10, 0, false)?;
985        assert!(rows_fb.iter().all(|r| r.memory_type == "feedback"));
986        Ok(())
987    }
988
989    #[test]
990    fn list_exclui_soft_deleted() -> TestResult {
991        let conn = setup_conn()?;
992        let m = new_memory("mem-excluida");
993        insert(&conn, &m)?;
994        soft_delete(&conn, "global", "mem-excluida")?;
995
996        let rows = list(&conn, "global", None, 10, 0, false)?;
997        assert!(rows.iter().all(|r| r.name != "mem-excluida"));
998        Ok(())
999    }
1000
1001    #[test]
1002    fn list_pagination_works() -> TestResult {
1003        let conn = setup_conn()?;
1004        for i in 0..5 {
1005            insert(&conn, &new_memory(&format!("mem-pag-{i}")))?;
1006        }
1007
1008        let pagina1 = list(&conn, "global", None, 2, 0, false)?;
1009        let pagina2 = list(&conn, "global", None, 2, 2, false)?;
1010        assert!(pagina1.len() <= 2);
1011        assert!(pagina2.len() <= 2);
1012        if !pagina1.is_empty() && !pagina2.is_empty() {
1013            assert_ne!(pagina1[0].id, pagina2[0].id);
1014        }
1015        Ok(())
1016    }
1017
1018    #[test]
1019    fn upsert_vec_and_delete_vec_work() -> TestResult {
1020        let conn = setup_conn()?;
1021        let m = new_memory("mem-vec");
1022        let id = insert(&conn, &m)?;
1023
1024        let embedding: Vec<f32> = vec![0.1; 384];
1025        upsert_vec(
1026            &conn, id, "global", "user", &embedding, "mem-vec", "snippet",
1027        )?;
1028
1029        let count: i64 = conn.query_row(
1030            "SELECT COUNT(*) FROM vec_memories WHERE memory_id = ?1",
1031            params![id],
1032            |r| r.get(0),
1033        )?;
1034        assert_eq!(count, 1);
1035
1036        delete_vec(&conn, id)?;
1037
1038        let count_after: i64 = conn.query_row(
1039            "SELECT COUNT(*) FROM vec_memories WHERE memory_id = ?1",
1040            params![id],
1041            |r| r.get(0),
1042        )?;
1043        assert_eq!(count_after, 0);
1044        Ok(())
1045    }
1046
1047    #[test]
1048    fn upsert_vec_replaces_existing_vector() -> TestResult {
1049        let conn = setup_conn()?;
1050        let m = new_memory("mem-vec-upsert");
1051        let id = insert(&conn, &m)?;
1052
1053        let emb1: Vec<f32> = vec![0.1; 384];
1054        upsert_vec(&conn, id, "global", "user", &emb1, "mem-vec-upsert", "s1")?;
1055
1056        let emb2: Vec<f32> = vec![0.9; 384];
1057        upsert_vec(&conn, id, "global", "user", &emb2, "mem-vec-upsert", "s2")?;
1058
1059        let count: i64 = conn.query_row(
1060            "SELECT COUNT(*) FROM vec_memories WHERE memory_id = ?1",
1061            params![id],
1062            |r| r.get(0),
1063        )?;
1064        assert_eq!(count, 1);
1065        Ok(())
1066    }
1067
1068    #[test]
1069    fn knn_search_returns_results_by_distance() -> TestResult {
1070        let conn = setup_conn()?;
1071
1072        // emb_a: predominantemente positivo — cosseno alto com query [1.0; 384]
1073        let ma = new_memory("mem-knn-a");
1074        let id_a = insert(&conn, &ma)?;
1075        let emb_a: Vec<f32> = vec![1.0; 384];
1076        upsert_vec(&conn, id_a, "global", "user", &emb_a, "mem-knn-a", "s")?;
1077
1078        // emb_b: predominantemente negativo — cosseno baixo com query [1.0; 384]
1079        let mb = new_memory("mem-knn-b");
1080        let id_b = insert(&conn, &mb)?;
1081        let emb_b: Vec<f32> = vec![-1.0; 384];
1082        upsert_vec(&conn, id_b, "global", "user", &emb_b, "mem-knn-b", "s")?;
1083
1084        let query: Vec<f32> = vec![1.0; 384];
1085        let results = knn_search(&conn, &query, &["global".to_string()], None, 2)?;
1086        assert!(!results.is_empty());
1087        assert_eq!(results[0].0, id_a);
1088        Ok(())
1089    }
1090
1091    #[test]
1092    fn knn_search_with_type_filter_restricts_result() -> TestResult {
1093        let conn = setup_conn()?;
1094
1095        let ma = new_memory("mem-knn-tipo-user");
1096        let id_a = insert(&conn, &ma)?;
1097        let emb: Vec<f32> = vec![1.0; 384];
1098        upsert_vec(
1099            &conn,
1100            id_a,
1101            "global",
1102            "user",
1103            &emb,
1104            "mem-knn-tipo-user",
1105            "s",
1106        )?;
1107
1108        let mut mb = new_memory("mem-knn-tipo-fb");
1109        mb.memory_type = "feedback".to_string();
1110        let id_b = insert(&conn, &mb)?;
1111        upsert_vec(
1112            &conn,
1113            id_b,
1114            "global",
1115            "feedback",
1116            &emb,
1117            "mem-knn-tipo-fb",
1118            "s",
1119        )?;
1120
1121        let query: Vec<f32> = vec![1.0; 384];
1122        let results_user = knn_search(&conn, &query, &["global".to_string()], Some("user"), 5)?;
1123        assert!(results_user.iter().all(|(id, _)| *id == id_a));
1124
1125        let results_fb = knn_search(&conn, &query, &["global".to_string()], Some("feedback"), 5)?;
1126        assert!(results_fb.iter().all(|(id, _)| *id == id_b));
1127        Ok(())
1128    }
1129
1130    #[test]
1131    fn fts_search_finds_by_prefix_in_body() -> TestResult {
1132        let conn = setup_conn()?;
1133        let mut m = new_memory("mem-fts");
1134        m.body = "linguagem de programacao rust".to_string();
1135        insert(&conn, &m)?;
1136
1137        conn.execute_batch(
1138            "INSERT INTO fts_memories(rowid, name, description, body)
1139             SELECT id, name, description, body FROM memories WHERE deleted_at IS NULL",
1140        )?;
1141
1142        let rows = fts_search(&conn, "programacao", "global", None, 10)?;
1143        assert!(!rows.is_empty());
1144        assert!(rows.iter().any(|r| r.name == "mem-fts"));
1145        Ok(())
1146    }
1147
1148    #[test]
1149    fn fts_search_with_type_filter() -> TestResult {
1150        let conn = setup_conn()?;
1151        let mut m = new_memory("mem-fts-tipo");
1152        m.body = "linguagem especial para filtro".to_string();
1153        insert(&conn, &m)?;
1154
1155        let mut m2 = new_memory("mem-fts-feedback");
1156        m2.memory_type = "feedback".to_string();
1157        m2.body = "linguagem especial para filtro".to_string();
1158        insert(&conn, &m2)?;
1159
1160        conn.execute_batch(
1161            "INSERT INTO fts_memories(rowid, name, description, body)
1162             SELECT id, name, description, body FROM memories WHERE deleted_at IS NULL",
1163        )?;
1164
1165        let rows_user = fts_search(&conn, "especial", "global", Some("user"), 10)?;
1166        assert!(rows_user.iter().all(|r| r.memory_type == "user"));
1167
1168        let rows_fb = fts_search(&conn, "especial", "global", Some("feedback"), 10)?;
1169        assert!(rows_fb.iter().all(|r| r.memory_type == "feedback"));
1170        Ok(())
1171    }
1172
1173    #[test]
1174    fn fts_search_excludes_deleted() -> TestResult {
1175        let conn = setup_conn()?;
1176        let mut m = new_memory("mem-fts-del");
1177        m.body = "deleted fts content".to_string();
1178        insert(&conn, &m)?;
1179
1180        conn.execute_batch(
1181            "INSERT INTO fts_memories(rowid, name, description, body)
1182             SELECT id, name, description, body FROM memories WHERE deleted_at IS NULL",
1183        )?;
1184
1185        soft_delete(&conn, "global", "mem-fts-del")?;
1186
1187        let rows = fts_search(&conn, "deleted", "global", None, 10)?;
1188        assert!(rows.iter().all(|r| r.name != "mem-fts-del"));
1189        Ok(())
1190    }
1191
1192    #[test]
1193    fn list_deleted_before_returns_correct_ids() -> TestResult {
1194        let conn = setup_conn()?;
1195        let m = new_memory("mem-purge");
1196        insert(&conn, &m)?;
1197        soft_delete(&conn, "global", "mem-purge")?;
1198
1199        let ids = list_deleted_before(&conn, "global", i64::MAX)?;
1200        assert!(!ids.is_empty());
1201
1202        let ids_antes = list_deleted_before(&conn, "global", 0)?;
1203        assert!(ids_antes.is_empty());
1204        Ok(())
1205    }
1206
1207    #[test]
1208    fn find_by_name_returns_correct_max_version() -> TestResult {
1209        let conn = setup_conn()?;
1210        let m = new_memory("mem-ver");
1211        let id = insert(&conn, &m)?;
1212
1213        let (_, _, v0) = find_by_name(&conn, "global", "mem-ver")?.ok_or("mem-ver should exist")?;
1214        assert_eq!(v0, 0);
1215
1216        conn.execute(
1217            "INSERT INTO memory_versions (memory_id, version, name, type, description, body, metadata, change_reason)
1218             VALUES (?1, 1, 'mem-ver', 'user', 'desc', 'body', '{}', 'create')",
1219            params![id],
1220        )?;
1221
1222        let (_, _, v1) =
1223            find_by_name(&conn, "global", "mem-ver")?.ok_or("mem-ver should exist after insert")?;
1224        assert_eq!(v1, 1);
1225        Ok(())
1226    }
1227
1228    #[test]
1229    fn insert_com_metadata_json() -> TestResult {
1230        let conn = setup_conn()?;
1231        let mut m = new_memory("mem-meta");
1232        m.metadata = serde_json::json!({"chave": "valor", "numero": 42});
1233        let id = insert(&conn, &m)?;
1234
1235        let row = read_full(&conn, id)?.ok_or("mem-meta should exist")?;
1236        let meta: serde_json::Value = serde_json::from_str(&row.metadata)?;
1237        assert_eq!(meta["chave"], "valor");
1238        assert_eq!(meta["numero"], 42);
1239        Ok(())
1240    }
1241
1242    #[test]
1243    fn insert_com_session_id() -> TestResult {
1244        let conn = setup_conn()?;
1245        let mut m = new_memory("mem-session");
1246        m.session_id = Some("sessao-xyz".to_string());
1247        let id = insert(&conn, &m)?;
1248
1249        let row = read_full(&conn, id)?.ok_or("mem-session should exist")?;
1250        assert_eq!(row.session_id, Some("sessao-xyz".to_string()));
1251        Ok(())
1252    }
1253
1254    #[test]
1255    fn delete_vec_for_nonexistent_id_does_not_fail() -> TestResult {
1256        let conn = setup_conn()?;
1257        let result = delete_vec(&conn, 99999);
1258        assert!(result.is_ok());
1259        Ok(())
1260    }
1261
1262    #[test]
1263    fn preprocess_fts_query_no_separators() {
1264        assert_eq!(preprocess_fts_query("hello"), "hello*");
1265        assert_eq!(preprocess_fts_query("hello world"), "hello* world*");
1266    }
1267
1268    #[test]
1269    fn preprocess_fts_query_with_hyphens() {
1270        let result = preprocess_fts_query("graphrag-precompact");
1271        assert!(result.contains("\"graphrag precompact\""));
1272        assert!(result.contains("graphrag*"));
1273        assert!(result.contains("precompact*"));
1274    }
1275
1276    #[test]
1277    fn preprocess_fts_query_with_dots() {
1278        let result = preprocess_fts_query("v1.0.44");
1279        assert!(result.contains("\"v1 0 44\""));
1280        assert!(result.contains("v1*"));
1281        assert!(result.contains("44*"));
1282    }
1283
1284    #[test]
1285    fn preprocess_fts_query_with_mixed_separators() {
1286        let result = preprocess_fts_query("graphrag-precompact.sh");
1287        assert!(result.contains("\"graphrag precompact sh\""));
1288        assert!(result.contains("graphrag*"));
1289    }
1290
1291    #[test]
1292    fn preprocess_fts_query_empty_and_whitespace() {
1293        assert_eq!(preprocess_fts_query(""), "");
1294        assert_eq!(preprocess_fts_query("  "), "");
1295    }
1296
1297    #[test]
1298    fn preprocess_fts_query_strips_quotes() {
1299        let result = preprocess_fts_query(r#"hello "world"#);
1300        assert!(result.contains("hello*"));
1301        assert!(result.contains("world*"));
1302    }
1303
1304    #[test]
1305    fn preprocess_fts_query_strips_asterisks() {
1306        assert_eq!(preprocess_fts_query("test*"), "test*");
1307    }
1308
1309    #[test]
1310    fn preprocess_fts_query_strips_parens() {
1311        let result = preprocess_fts_query("(hello)");
1312        assert!(result.contains("hello*"));
1313        assert!(!result.contains('('));
1314    }
1315
1316    #[test]
1317    fn preprocess_fts_query_filters_fts_keywords() {
1318        let result = preprocess_fts_query("foo OR bar");
1319        assert!(result.contains("foo*"));
1320        assert!(result.contains("bar*"));
1321        assert!(!result.contains("OR*"));
1322    }
1323
1324    #[test]
1325    fn preprocess_fts_query_only_fts_keywords() {
1326        assert_eq!(preprocess_fts_query("OR AND NOT"), "");
1327    }
1328
1329    #[test]
1330    fn preprocess_fts_query_keywords_with_separators() {
1331        let result = preprocess_fts_query("hello-OR-world");
1332        assert!(result.contains("hello*"));
1333        assert!(result.contains("world*"));
1334        assert!(!result.contains("OR*"));
1335    }
1336
1337    #[test]
1338    fn fts_search_finds_compound_term_with_hyphen() -> TestResult {
1339        let conn = setup_conn()?;
1340        let mut m = new_memory("mem-compound");
1341        m.body = "the graphrag-precompact script runs daily".to_string();
1342        insert(&conn, &m)?;
1343        conn.execute_batch(
1344            "INSERT INTO fts_memories(rowid, name, description, body)
1345             SELECT id, name, description, body FROM memories WHERE deleted_at IS NULL",
1346        )?;
1347        let rows = fts_search(&conn, "graphrag-precompact", "global", None, 10)?;
1348        assert!(!rows.is_empty(), "should find compound hyphenated term");
1349        Ok(())
1350    }
1351}