intent_engine/
search.rs

1//! Search utilities for intent-engine
2//!
3//! This module provides:
4//! 1. CJK (Chinese, Japanese, Korean) search utilities for detecting when to use
5//!    LIKE fallback vs FTS5 trigram search
6//! 2. Unified search across tasks and events
7//!
8//! **Background**: SQLite FTS5 with trigram tokenizer requires at least 3 consecutive
9//! characters to match. This is problematic for CJK languages where single-character
10//! or two-character searches are common (e.g., "用户", "认证").
11//!
12//! **Solution**: For short CJK queries, we fallback to LIKE search which supports
13//! any length substring matching, albeit slower.
14
15/// Check if a character is a CJK character
16pub fn is_cjk_char(c: char) -> bool {
17    let code = c as u32;
18    matches!(code,
19        // CJK Unified Ideographs (most common Chinese characters)
20        0x4E00..=0x9FFF |
21        // CJK Extension A
22        0x3400..=0x4DBF |
23        // CJK Extension B-F (less common, but included for completeness)
24        0x20000..=0x2A6DF |
25        0x2A700..=0x2B73F |
26        0x2B740..=0x2B81F |
27        0x2B820..=0x2CEAF |
28        0x2CEB0..=0x2EBEF |
29        // Hiragana (Japanese)
30        0x3040..=0x309F |
31        // Katakana (Japanese)
32        0x30A0..=0x30FF |
33        // Hangul Syllables (Korean)
34        0xAC00..=0xD7AF
35    )
36}
37
38/// Determine if a query should use LIKE fallback instead of FTS5 trigram
39///
40/// Returns `true` if:
41/// - Query is a single CJK character, OR
42/// - Query is two CJK characters
43///
44/// Trigram tokenizer requires 3+ characters for matching, so we use LIKE
45/// for shorter CJK queries to ensure they work.
46pub fn needs_like_fallback(query: &str) -> bool {
47    let chars: Vec<char> = query.chars().collect();
48
49    // Single-character CJK
50    if chars.len() == 1 && is_cjk_char(chars[0]) {
51        return true;
52    }
53
54    // Two-character all-CJK
55    // This is optional - could also let trigram handle it, but trigram
56    // needs minimum 3 chars so two-char CJK won't work well
57    if chars.len() == 2 && chars.iter().all(|c| is_cjk_char(*c)) {
58        return true;
59    }
60
61    false
62}
63
64#[cfg(test)]
65mod tests {
66    use super::*;
67
68    #[test]
69    fn test_is_cjk_char() {
70        // Chinese characters
71        assert!(is_cjk_char('中'));
72        assert!(is_cjk_char('文'));
73        assert!(is_cjk_char('认'));
74        assert!(is_cjk_char('证'));
75
76        // Japanese Hiragana
77        assert!(is_cjk_char('あ'));
78        assert!(is_cjk_char('い'));
79
80        // Japanese Katakana
81        assert!(is_cjk_char('ア'));
82        assert!(is_cjk_char('イ'));
83
84        // Korean Hangul
85        assert!(is_cjk_char('가'));
86        assert!(is_cjk_char('나'));
87
88        // Non-CJK
89        assert!(!is_cjk_char('a'));
90        assert!(!is_cjk_char('A'));
91        assert!(!is_cjk_char('1'));
92        assert!(!is_cjk_char(' '));
93        assert!(!is_cjk_char('.'));
94    }
95
96    #[test]
97    fn test_needs_like_fallback() {
98        // Single CJK character - needs fallback
99        assert!(needs_like_fallback("中"));
100        assert!(needs_like_fallback("认"));
101        assert!(needs_like_fallback("あ"));
102        assert!(needs_like_fallback("가"));
103
104        // Two CJK characters - needs fallback
105        assert!(needs_like_fallback("中文"));
106        assert!(needs_like_fallback("认证"));
107        assert!(needs_like_fallback("用户"));
108
109        // Three+ CJK characters - can use FTS5
110        assert!(!needs_like_fallback("用户认"));
111        assert!(!needs_like_fallback("用户认证"));
112
113        // English - can use FTS5
114        assert!(!needs_like_fallback("JWT"));
115        assert!(!needs_like_fallback("auth"));
116        assert!(!needs_like_fallback("a")); // Single ASCII char, not CJK
117
118        // Mixed - can use FTS5
119        assert!(!needs_like_fallback("JWT认证"));
120        assert!(!needs_like_fallback("API接口"));
121    }
122}
123
124// ============================================================================
125// Unified Search
126// ============================================================================
127
128use crate::db::models::UnifiedSearchResult;
129use crate::error::Result;
130use crate::events::EventManager;
131use crate::tasks::TaskManager;
132use sqlx::SqlitePool;
133
134pub struct SearchManager<'a> {
135    pool: &'a SqlitePool,
136}
137
138impl<'a> SearchManager<'a> {
139    pub fn new(pool: &'a SqlitePool) -> Self {
140        Self { pool }
141    }
142
143    /// Unified search across tasks and events
144    ///
145    /// # Parameters
146    /// - `query`: FTS5 search query string
147    /// - `include_tasks`: Whether to search in tasks
148    /// - `include_events`: Whether to search in events
149    /// - `limit`: Maximum number of total results (default: 20)
150    ///
151    /// # Returns
152    /// A mixed vector of task and event search results, ordered by relevance (FTS5 rank)
153    pub async fn unified_search(
154        &self,
155        query: &str,
156        include_tasks: bool,
157        include_events: bool,
158        limit: Option<i64>,
159    ) -> Result<Vec<UnifiedSearchResult>> {
160        let total_limit = limit.unwrap_or(20);
161        let mut results = Vec::new();
162
163        // Calculate limits for each source
164        let (task_limit, event_limit) = match (include_tasks, include_events) {
165            (true, true) => (total_limit / 2, total_limit / 2),
166            (true, false) => (total_limit, 0),
167            (false, true) => (0, total_limit),
168            (false, false) => return Ok(results), // Early return if nothing to search
169        };
170
171        // Search tasks if enabled
172        if include_tasks && task_limit > 0 {
173            let task_mgr = TaskManager::new(self.pool);
174            let mut task_results = task_mgr.search_tasks(query).await?;
175
176            // Apply limit
177            task_results.truncate(task_limit as usize);
178
179            for task_result in task_results {
180                // Determine which field matched based on snippet content
181                let match_field = if task_result
182                    .match_snippet
183                    .to_lowercase()
184                    .contains(&task_result.task.name.to_lowercase())
185                {
186                    "name".to_string()
187                } else {
188                    "spec".to_string()
189                };
190
191                results.push(UnifiedSearchResult::Task {
192                    task: task_result.task,
193                    match_snippet: task_result.match_snippet,
194                    match_field,
195                });
196            }
197        }
198
199        // Search events if enabled
200        if include_events && event_limit > 0 {
201            let event_mgr = EventManager::new(self.pool);
202            let event_results = event_mgr
203                .search_events_fts5(query, Some(event_limit))
204                .await?;
205
206            let task_mgr = TaskManager::new(self.pool);
207            for event_result in event_results {
208                // Get task ancestry chain for this event
209                let task_chain = task_mgr
210                    .get_task_ancestry(event_result.event.task_id)
211                    .await?;
212
213                results.push(UnifiedSearchResult::Event {
214                    event: event_result.event,
215                    task_chain,
216                    match_snippet: event_result.match_snippet,
217                });
218            }
219        }
220
221        // Limit to total_limit (in case we got more from both sources)
222        results.truncate(total_limit as usize);
223
224        Ok(results)
225    }
226}
227
228#[cfg(test)]
229mod unified_search_tests {
230    use super::*;
231    use crate::test_utils::test_helpers::TestContext;
232
233    #[tokio::test]
234    async fn test_unified_search_basic() {
235        let ctx = TestContext::new().await;
236        let task_mgr = TaskManager::new(ctx.pool());
237        let event_mgr = EventManager::new(ctx.pool());
238        let search_mgr = SearchManager::new(ctx.pool());
239
240        // Create test task
241        let task = task_mgr
242            .add_task("JWT Authentication", Some("Implement JWT auth"), None)
243            .await
244            .unwrap();
245
246        // Add test event
247        event_mgr
248            .add_event(task.id, "decision", "Chose JWT over OAuth")
249            .await
250            .unwrap();
251
252        // Search for "JWT" - should find both task and event
253        let results = search_mgr
254            .unified_search("JWT", true, true, None)
255            .await
256            .unwrap();
257
258        assert!(results.len() >= 2);
259
260        // Verify we got both task and event results
261        let has_task = results
262            .iter()
263            .any(|r| matches!(r, UnifiedSearchResult::Task { .. }));
264        let has_event = results
265            .iter()
266            .any(|r| matches!(r, UnifiedSearchResult::Event { .. }));
267
268        assert!(has_task);
269        assert!(has_event);
270    }
271
272    #[tokio::test]
273    async fn test_unified_search_tasks_only() {
274        let ctx = TestContext::new().await;
275        let task_mgr = TaskManager::new(ctx.pool());
276        let search_mgr = SearchManager::new(ctx.pool());
277
278        // Create test task
279        task_mgr
280            .add_task("OAuth Implementation", None, None)
281            .await
282            .unwrap();
283
284        // Search tasks only
285        let results = search_mgr
286            .unified_search("OAuth", true, false, None)
287            .await
288            .unwrap();
289
290        assert!(!results.is_empty());
291
292        // All results should be tasks
293        for result in results {
294            assert!(matches!(result, UnifiedSearchResult::Task { .. }));
295        }
296    }
297
298    #[tokio::test]
299    async fn test_unified_search_events_only() {
300        let ctx = TestContext::new().await;
301        let task_mgr = TaskManager::new(ctx.pool());
302        let event_mgr = EventManager::new(ctx.pool());
303        let search_mgr = SearchManager::new(ctx.pool());
304
305        // Create test task and event
306        let task = task_mgr.add_task("Test task", None, None).await.unwrap();
307
308        event_mgr
309            .add_event(task.id, "blocker", "OAuth library missing")
310            .await
311            .unwrap();
312
313        // Search events only
314        let results = search_mgr
315            .unified_search("OAuth", false, true, None)
316            .await
317            .unwrap();
318
319        assert!(!results.is_empty());
320
321        // All results should be events
322        for result in results {
323            assert!(matches!(result, UnifiedSearchResult::Event { .. }));
324        }
325    }
326
327    #[tokio::test]
328    async fn test_unified_search_with_limit() {
329        let ctx = TestContext::new().await;
330        let task_mgr = TaskManager::new(ctx.pool());
331        let search_mgr = SearchManager::new(ctx.pool());
332
333        // Create multiple test tasks
334        for i in 0..10 {
335            task_mgr
336                .add_task(&format!("Test task {}", i), None, None)
337                .await
338                .unwrap();
339        }
340
341        // Search with limit of 3
342        let results = search_mgr
343            .unified_search("Test", true, true, Some(3))
344            .await
345            .unwrap();
346
347        assert!(results.len() <= 3);
348    }
349}