Skip to main content

tj_core/
fts.rs

1//! FTS5 query sanitization.
2//!
3//! SQLite FTS5 treats `-`, `:`, `*`, `(`, `)`, `"`, and `/` as syntax.
4//! A raw query like `OPS-306` parses as column-prefix `OPS-` followed by
5//! reference `306`, raising `no such column: 306` at runtime.
6//!
7//! [`sanitize_query`] wraps queries containing any metacharacter in
8//! double quotes, turning them into FTS5 phrase queries that match the
9//! sequence of tokens as the unicode61 tokenizer split them. Internal
10//! `"` characters are doubled per FTS5 escape rules.
11
12/// Wrap a query in FTS5 phrase quotes if it contains any character the
13/// FTS5 parser treats as syntax. Strings that only contain word
14/// characters / whitespace pass through untouched so multi-term
15/// queries keep their default AND semantics.
16pub fn sanitize_query(query: &str) -> String {
17    if query.is_empty() {
18        return String::new();
19    }
20    let needs_quote = query
21        .chars()
22        .any(|c| matches!(c, '-' | '"' | '*' | ':' | '(' | ')' | '/'));
23    if !needs_quote {
24        return query.to_string();
25    }
26    let escaped = query.replace('"', "\"\"");
27    format!("\"{escaped}\"")
28}
29
30/// Build a `LIKE` pattern equivalent to a free-text search — used as a
31/// last-resort fallback when an FTS5 search returns no hits and the
32/// caller wants to match the raw substring against `search_fts.text`.
33/// SQL `LIKE` escapes are not applied; callers MUST pass the result as
34/// a bound parameter, not interpolate it.
35pub fn like_pattern(query: &str) -> String {
36    format!("%{query}%")
37}
38
39#[cfg(test)]
40mod tests {
41    use super::{like_pattern, sanitize_query};
42
43    #[test]
44    fn plain_word_passes_through() {
45        assert_eq!(sanitize_query("hello"), "hello");
46    }
47
48    #[test]
49    fn multi_word_passes_through_for_default_and_search() {
50        assert_eq!(sanitize_query("bulk repack"), "bulk repack");
51    }
52
53    #[test]
54    fn cyrillic_passes_through() {
55        assert_eq!(sanitize_query("слим модели"), "слим модели");
56    }
57
58    #[test]
59    fn hyphenated_id_gets_phrase_quoted() {
60        assert_eq!(sanitize_query("OPS-306"), "\"OPS-306\"");
61    }
62
63    #[test]
64    fn slash_path_gets_phrase_quoted() {
65        assert_eq!(sanitize_query("src/main.rs"), "\"src/main.rs\"");
66    }
67
68    #[test]
69    fn colon_gets_phrase_quoted() {
70        assert_eq!(sanitize_query("ttl:30s"), "\"ttl:30s\"");
71    }
72
73    #[test]
74    fn star_gets_phrase_quoted() {
75        assert_eq!(sanitize_query("foo*bar"), "\"foo*bar\"");
76    }
77
78    #[test]
79    fn parens_get_phrase_quoted() {
80        assert_eq!(sanitize_query("func()"), "\"func()\"");
81    }
82
83    #[test]
84    fn embedded_quote_is_doubled() {
85        assert_eq!(sanitize_query("say \"hi\""), "\"say \"\"hi\"\"\"");
86    }
87
88    #[test]
89    fn empty_query_stays_empty() {
90        assert_eq!(sanitize_query(""), "");
91    }
92
93    #[test]
94    fn like_pattern_wraps_with_percent_signs() {
95        assert_eq!(like_pattern("OPS-306"), "%OPS-306%");
96    }
97}