typstify_search_wasm/
simple.rs

1//! Simple search implementation for small indices.
2//!
3//! Provides a lightweight search engine that loads the entire index into memory.
4//! Suitable for sites with fewer than a few hundred pages.
5
6use std::collections::HashMap;
7
8use gloo_net::http::Request;
9use serde::{Deserialize, Serialize};
10use wasm_bindgen::prelude::*;
11
12use crate::query::{SearchQuery, SearchResult, SearchResults, generate_snippet, score_document};
13
14/// A simple search index document.
15#[derive(Debug, Clone, Serialize, Deserialize)]
16pub struct SimpleDocument {
17    /// Document URL.
18    pub url: String,
19
20    /// Document title.
21    pub title: String,
22
23    /// Document description/summary.
24    #[serde(skip_serializing_if = "Option::is_none")]
25    pub description: Option<String>,
26
27    /// Language code.
28    #[serde(skip_serializing_if = "Option::is_none")]
29    pub lang: Option<String>,
30
31    /// Tags.
32    #[serde(default, skip_serializing_if = "Vec::is_empty")]
33    pub tags: Vec<String>,
34
35    /// Publication date as ISO string.
36    #[serde(skip_serializing_if = "Option::is_none")]
37    pub date: Option<String>,
38
39    /// Pre-tokenized terms from title and body.
40    pub terms: Vec<String>,
41}
42
43/// A simple JSON-based search index.
44#[derive(Debug, Clone, Serialize, Deserialize)]
45pub struct SimpleSearchIndex {
46    /// Index format version.
47    pub version: u32,
48
49    /// All indexed documents.
50    pub documents: Vec<SimpleDocument>,
51
52    /// Inverted index: term -> document indices.
53    pub index: HashMap<String, Vec<usize>>,
54}
55
56impl SimpleSearchIndex {
57    /// Create an empty index.
58    pub fn empty() -> Self {
59        Self {
60            version: 1,
61            documents: Vec::new(),
62            index: HashMap::new(),
63        }
64    }
65
66    /// Parse index from JSON string.
67    pub fn from_json(json: &str) -> Result<Self, String> {
68        serde_json::from_str(json).map_err(|e| e.to_string())
69    }
70
71    /// Search the index.
72    pub fn search(&self, query: &SearchQuery) -> SearchResults {
73        #[cfg(target_arch = "wasm32")]
74        let start = js_sys::Date::now();
75
76        if query.is_empty() {
77            return SearchResults::empty(&query.raw);
78        }
79
80        // Find documents containing any query term
81        let mut doc_scores: HashMap<usize, f32> = HashMap::new();
82
83        for term in &query.terms {
84            if let Some(postings) = self.index.get(term) {
85                for &doc_idx in postings {
86                    let doc = &self.documents[doc_idx];
87                    let score = score_document(&query.terms, &doc.title, &doc.terms);
88                    let entry = doc_scores.entry(doc_idx).or_insert(0.0);
89                    *entry = entry.max(score);
90                }
91            }
92        }
93
94        // Sort by score
95        let mut scored: Vec<_> = doc_scores.into_iter().collect();
96        scored.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
97
98        // Take top results
99        let results: Vec<SearchResult> = scored
100            .into_iter()
101            .take(query.limit)
102            .map(|(doc_idx, score)| {
103                let doc = &self.documents[doc_idx];
104                let snippet = doc
105                    .description
106                    .as_ref()
107                    .and_then(|d| generate_snippet(d, &query.terms, 150));
108
109                SearchResult {
110                    url: doc.url.clone(),
111                    title: doc.title.clone(),
112                    description: doc.description.clone(),
113                    score,
114                    snippet,
115                }
116            })
117            .collect();
118
119        #[cfg(target_arch = "wasm32")]
120        let duration_ms = (js_sys::Date::now() - start) as u32;
121        #[cfg(not(target_arch = "wasm32"))]
122        let duration_ms = 0u32;
123
124        SearchResults {
125            query: query.raw.clone(),
126            total: results.len(),
127            results,
128            duration_ms,
129        }
130    }
131
132    /// Get document count.
133    pub fn document_count(&self) -> usize {
134        self.documents.len()
135    }
136
137    /// Get term count.
138    pub fn term_count(&self) -> usize {
139        self.index.len()
140    }
141}
142
143/// Simple search engine for WASM.
144#[wasm_bindgen]
145pub struct SimpleSearchEngine {
146    index: SimpleSearchIndex,
147}
148
149#[wasm_bindgen]
150impl SimpleSearchEngine {
151    /// Load a simple search index from a URL.
152    #[wasm_bindgen(js_name = load)]
153    pub async fn load(index_url: &str) -> Result<SimpleSearchEngine, JsValue> {
154        let response = Request::get(index_url)
155            .send()
156            .await
157            .map_err(|e| JsValue::from_str(&format!("Network error: {e}")))?;
158
159        if !response.ok() {
160            return Err(JsValue::from_str(&format!(
161                "Failed to load index: HTTP {}",
162                response.status()
163            )));
164        }
165
166        let json = response
167            .text()
168            .await
169            .map_err(|e| JsValue::from_str(&format!("Failed to read response: {e}")))?;
170
171        let index = SimpleSearchIndex::from_json(&json)
172            .map_err(|e| JsValue::from_str(&format!("Failed to parse index: {e}")))?;
173
174        Ok(Self { index })
175    }
176
177    /// Create from a JSON string (for testing).
178    #[wasm_bindgen(js_name = fromJson)]
179    pub fn from_json(json: &str) -> Result<SimpleSearchEngine, JsValue> {
180        let index = SimpleSearchIndex::from_json(json)
181            .map_err(|e| JsValue::from_str(&format!("Failed to parse index: {e}")))?;
182
183        Ok(Self { index })
184    }
185
186    /// Search the index.
187    pub fn search(&self, query: &str, limit: Option<usize>) -> Result<JsValue, JsValue> {
188        let limit = limit.unwrap_or(10);
189        let parsed_query = SearchQuery::parse(query, limit);
190        let results = self.index.search(&parsed_query);
191        results.to_js()
192    }
193
194    /// Get the number of indexed documents.
195    #[wasm_bindgen(js_name = documentCount)]
196    pub fn document_count(&self) -> usize {
197        self.index.document_count()
198    }
199
200    /// Get the number of indexed terms.
201    #[wasm_bindgen(js_name = termCount)]
202    pub fn term_count(&self) -> usize {
203        self.index.term_count()
204    }
205}
206
207#[cfg(test)]
208mod tests {
209    use super::*;
210
211    fn create_test_index() -> SimpleSearchIndex {
212        let documents = vec![
213            SimpleDocument {
214                url: "/rust".to_string(),
215                title: "Learning Rust".to_string(),
216                description: Some("A guide to Rust programming".to_string()),
217                lang: Some("en".to_string()),
218                tags: vec!["rust".to_string()],
219                date: None,
220                terms: vec![
221                    "learning".to_string(),
222                    "rust".to_string(),
223                    "programming".to_string(),
224                ],
225            },
226            SimpleDocument {
227                url: "/go".to_string(),
228                title: "Learning Go".to_string(),
229                description: Some("A guide to Go programming".to_string()),
230                lang: Some("en".to_string()),
231                tags: vec!["go".to_string()],
232                date: None,
233                terms: vec![
234                    "learning".to_string(),
235                    "go".to_string(),
236                    "programming".to_string(),
237                ],
238            },
239        ];
240
241        let mut index = HashMap::new();
242        index.insert("learning".to_string(), vec![0, 1]);
243        index.insert("rust".to_string(), vec![0]);
244        index.insert("go".to_string(), vec![1]);
245        index.insert("programming".to_string(), vec![0, 1]);
246
247        SimpleSearchIndex {
248            version: 1,
249            documents,
250            index,
251        }
252    }
253
254    #[test]
255    fn test_simple_search() {
256        let index = create_test_index();
257        let query = SearchQuery::parse("rust", 10);
258        let results = index.search(&query);
259
260        assert_eq!(results.total, 1);
261        assert_eq!(results.results[0].url, "/rust");
262    }
263
264    #[test]
265    fn test_simple_search_multiple_results() {
266        let index = create_test_index();
267        let query = SearchQuery::parse("programming", 10);
268        let results = index.search(&query);
269
270        assert_eq!(results.total, 2);
271    }
272
273    #[test]
274    fn test_simple_search_no_results() {
275        let index = create_test_index();
276        let query = SearchQuery::parse("python", 10);
277        let results = index.search(&query);
278
279        assert_eq!(results.total, 0);
280    }
281
282    #[test]
283    fn test_simple_search_empty_query() {
284        let index = create_test_index();
285        let query = SearchQuery::parse("", 10);
286        let results = index.search(&query);
287
288        assert_eq!(results.total, 0);
289    }
290
291    #[test]
292    fn test_index_from_json() {
293        let json = r#"{
294            "version": 1,
295            "documents": [{
296                "url": "/test",
297                "title": "Test",
298                "terms": ["test"]
299            }],
300            "index": {"test": [0]}
301        }"#;
302
303        let index = SimpleSearchIndex::from_json(json).unwrap();
304        assert_eq!(index.documents.len(), 1);
305        assert_eq!(index.index.len(), 1);
306    }
307
308    #[test]
309    fn test_document_and_term_count() {
310        let index = create_test_index();
311        assert_eq!(index.document_count(), 2);
312        assert_eq!(index.term_count(), 4);
313    }
314}