typstify_search_wasm/
query.rs

1//! Query parsing and search execution.
2//!
3//! Provides query parsing and search functionality for the WASM runtime.
4
5use serde::{Deserialize, Serialize};
6use wasm_bindgen::prelude::*;
7
8/// A search query with parsed terms.
9#[derive(Debug, Clone)]
10pub struct SearchQuery {
11    /// Raw query string.
12    pub raw: String,
13
14    /// Parsed and normalized terms.
15    pub terms: Vec<String>,
16
17    /// Maximum number of results.
18    pub limit: usize,
19}
20
21impl SearchQuery {
22    /// Parse a query string.
23    pub fn parse(query: &str, limit: usize) -> Self {
24        let terms = tokenize_query(query);
25
26        Self {
27            raw: query.to_string(),
28            terms,
29            limit,
30        }
31    }
32
33    /// Check if the query is empty.
34    pub fn is_empty(&self) -> bool {
35        self.terms.is_empty()
36    }
37}
38
39/// A single search result.
40#[derive(Debug, Clone, Serialize, Deserialize)]
41pub struct SearchResult {
42    /// Document URL.
43    pub url: String,
44
45    /// Document title.
46    pub title: String,
47
48    /// Document summary/description.
49    #[serde(skip_serializing_if = "Option::is_none")]
50    pub description: Option<String>,
51
52    /// Relevance score (higher is better).
53    pub score: f32,
54
55    /// Highlighted snippet showing matches.
56    #[serde(skip_serializing_if = "Option::is_none")]
57    pub snippet: Option<String>,
58}
59
60/// Search results container.
61#[derive(Debug, Clone, Serialize, Deserialize)]
62pub struct SearchResults {
63    /// Query that was executed.
64    pub query: String,
65
66    /// Total number of matches.
67    pub total: usize,
68
69    /// Result items.
70    pub results: Vec<SearchResult>,
71
72    /// Search duration in milliseconds.
73    pub duration_ms: u32,
74}
75
76impl SearchResults {
77    /// Create empty results.
78    pub fn empty(query: &str) -> Self {
79        Self {
80            query: query.to_string(),
81            total: 0,
82            results: Vec::new(),
83            duration_ms: 0,
84        }
85    }
86
87    /// Convert to JavaScript value.
88    pub fn to_js(&self) -> Result<JsValue, JsValue> {
89        serde_wasm_bindgen::to_value(self).map_err(|e| JsValue::from_str(&e.to_string()))
90    }
91}
92
93/// Tokenize a query string into normalized terms.
94fn tokenize_query(query: &str) -> Vec<String> {
95    query
96        .split(|c: char| !c.is_alphanumeric())
97        .filter(|s| s.len() >= 2) // Skip single characters
98        .map(|s| s.to_lowercase())
99        .collect()
100}
101
102/// Score a document against a query.
103///
104/// Returns a relevance score based on term frequency and position.
105pub fn score_document(query_terms: &[String], title: &str, body_terms: &[String]) -> f32 {
106    let title_lower = title.to_lowercase();
107    let title_terms: Vec<String> = tokenize_query(&title_lower);
108
109    let mut score = 0.0f32;
110
111    for query_term in query_terms {
112        // Title matches are worth more
113        for title_term in &title_terms {
114            if title_term.contains(query_term) {
115                score += 10.0;
116            }
117            if title_term == query_term {
118                score += 5.0; // Exact match bonus
119            }
120        }
121
122        // Body matches
123        for body_term in body_terms {
124            if body_term == query_term {
125                score += 1.0;
126            } else if body_term.contains(query_term) {
127                score += 0.5;
128            }
129        }
130    }
131
132    score
133}
134
135/// Generate a highlighted snippet for a result.
136pub fn generate_snippet(text: &str, query_terms: &[String], max_length: usize) -> Option<String> {
137    if text.is_empty() || query_terms.is_empty() {
138        return None;
139    }
140
141    let text_lower = text.to_lowercase();
142
143    // Find the first occurrence of any query term
144    let mut best_pos = None;
145    for term in query_terms {
146        if let Some(pos) = text_lower.find(term) {
147            match best_pos {
148                None => best_pos = Some(pos),
149                Some(current) if pos < current => best_pos = Some(pos),
150                _ => {}
151            }
152        }
153    }
154
155    let start_pos = best_pos.unwrap_or(0);
156
157    // Calculate snippet window
158    let snippet_start = if start_pos > 50 {
159        // Find word boundary
160        text[..start_pos]
161            .rfind(char::is_whitespace)
162            .map(|p| p + 1)
163            .unwrap_or(start_pos.saturating_sub(50))
164    } else {
165        0
166    };
167
168    let snippet_end = (snippet_start + max_length).min(text.len());
169    let snippet_end = text[..snippet_end]
170        .rfind(char::is_whitespace)
171        .unwrap_or(snippet_end);
172
173    let mut snippet = text[snippet_start..snippet_end].to_string();
174
175    // Add ellipsis if needed
176    if snippet_start > 0 {
177        snippet = format!("...{}", snippet.trim_start());
178    }
179    if snippet_end < text.len() {
180        snippet = format!("{}...", snippet.trim_end());
181    }
182
183    Some(snippet)
184}
185
186#[cfg(test)]
187mod tests {
188    use super::*;
189
190    #[test]
191    fn test_parse_query() {
192        let query = SearchQuery::parse("hello world", 10);
193        assert_eq!(query.terms, vec!["hello", "world"]);
194        assert_eq!(query.limit, 10);
195    }
196
197    #[test]
198    fn test_parse_query_filters_short() {
199        let query = SearchQuery::parse("a test b query c", 10);
200        // Single letters should be filtered out
201        assert_eq!(query.terms, vec!["test", "query"]);
202    }
203
204    #[test]
205    fn test_empty_query() {
206        let query = SearchQuery::parse("", 10);
207        assert!(query.is_empty());
208
209        let query = SearchQuery::parse("a b c", 10);
210        assert!(query.is_empty()); // All single chars
211    }
212
213    #[test]
214    fn test_score_document() {
215        let query_terms = vec!["rust".to_string()];
216        let body_terms = vec!["rust".to_string(), "programming".to_string()];
217
218        // Title match should score higher
219        let score_with_title = score_document(&query_terms, "Learning Rust", &body_terms);
220        let score_without_title = score_document(&query_terms, "Programming Guide", &body_terms);
221
222        assert!(score_with_title > score_without_title);
223    }
224
225    #[test]
226    fn test_generate_snippet() {
227        let text = "Rust is a systems programming language. It provides memory safety without garbage collection.";
228        let terms = vec!["rust".to_string()];
229
230        let snippet = generate_snippet(text, &terms, 50);
231        assert!(snippet.is_some());
232        assert!(snippet.unwrap().to_lowercase().contains("rust"));
233    }
234
235    #[test]
236    fn test_search_results_empty() {
237        let results = SearchResults::empty("test");
238        assert_eq!(results.total, 0);
239        assert!(results.results.is_empty());
240    }
241
242    #[test]
243    fn test_search_result_serialization() {
244        let result = SearchResult {
245            url: "/test".to_string(),
246            title: "Test Page".to_string(),
247            description: Some("A test page".to_string()),
248            score: 10.5,
249            snippet: None,
250        };
251
252        let json = serde_json::to_string(&result).unwrap();
253        assert!(json.contains("Test Page"));
254        assert!(json.contains("10.5"));
255    }
256}