Skip to main content

lago_knowledge/
search.rs

1//! Scored search over the knowledge index.
2//!
3//! Scoring: +2 per query term match in note name, +1 per term match
4//! in body, +1 per tag match. Excerpts are extracted from matching lines.
5
6use serde::Serialize;
7
8use crate::index::KnowledgeIndex;
9
10/// A search result with relevance scoring.
11#[derive(Debug, Clone, Serialize)]
12pub struct SearchResult {
13    /// Relative path in the manifest.
14    pub path: String,
15    /// Note name (filename without `.md`).
16    pub name: String,
17    /// Parsed YAML frontmatter.
18    #[serde(with = "yaml_value_as_json")]
19    pub frontmatter: serde_yaml::Value,
20    /// Matching lines from the body (up to 5).
21    pub excerpts: Vec<String>,
22    /// Outgoing wikilinks.
23    pub links: Vec<String>,
24    /// Relevance score (higher is better).
25    pub score: f64,
26}
27
28impl KnowledgeIndex {
29    /// Search notes by query terms with scoring.
30    ///
31    /// Scoring:
32    /// - +2 per term match in note name
33    /// - +1 per term match in body text
34    /// - +1 per tag match in frontmatter
35    ///
36    /// Returns results sorted by score descending, limited to `max_results`.
37    pub fn search(&self, query: &str, max_results: usize) -> Vec<SearchResult> {
38        let terms: Vec<String> = query
39            .to_lowercase()
40            .split_whitespace()
41            .filter(|t| !t.is_empty())
42            .map(String::from)
43            .collect();
44
45        if terms.is_empty() {
46            return Vec::new();
47        }
48
49        let mut results: Vec<SearchResult> = Vec::new();
50
51        for note in self.notes.values() {
52            let name_lower = note.name.to_lowercase();
53            let body_lower = note.body.to_lowercase();
54
55            // Base score: count term matches in body
56            let mut score: f64 = 0.0;
57            for term in &terms {
58                if body_lower.contains(term.as_str()) {
59                    score += 1.0;
60                }
61            }
62
63            // Must match at least one term somewhere
64            let name_matches: f64 = terms
65                .iter()
66                .filter(|t| name_lower.contains(t.as_str()))
67                .count() as f64;
68
69            if score == 0.0 && name_matches == 0.0 {
70                continue;
71            }
72
73            // Boost name matches
74            score += name_matches * 2.0;
75
76            // Boost tag matches
77            if let Some(tags) = note.frontmatter.get("tags") {
78                if let Some(tag_seq) = tags.as_sequence() {
79                    for tag_val in tag_seq {
80                        if let Some(tag_str) = tag_val.as_str() {
81                            let tag_lower = tag_str.to_lowercase();
82                            for term in &terms {
83                                if tag_lower == *term {
84                                    score += 1.0;
85                                }
86                            }
87                        }
88                    }
89                }
90            }
91
92            // Extract matching excerpt lines (up to 5)
93            let mut excerpts = Vec::new();
94            for line in note.body.lines() {
95                if excerpts.len() >= 5 {
96                    break;
97                }
98                let line_lower = line.to_lowercase();
99                let trimmed = line.trim();
100                if !trimmed.is_empty() && terms.iter().any(|t| line_lower.contains(t.as_str())) {
101                    excerpts.push(trimmed.to_string());
102                }
103            }
104
105            results.push(SearchResult {
106                path: note.path.clone(),
107                name: note.name.clone(),
108                frontmatter: note.frontmatter.clone(),
109                excerpts,
110                links: note.links.clone(),
111                score,
112            });
113        }
114
115        // Sort by score descending
116        results.sort_by(|a, b| {
117            b.score
118                .partial_cmp(&a.score)
119                .unwrap_or(std::cmp::Ordering::Equal)
120        });
121        results.truncate(max_results);
122        results
123    }
124}
125
126/// Serde helper to serialize `serde_yaml::Value` as JSON-compatible values.
127mod yaml_value_as_json {
128    use serde::{Serialize, Serializer};
129
130    pub fn serialize<S>(value: &serde_yaml::Value, serializer: S) -> Result<S::Ok, S::Error>
131    where
132        S: Serializer,
133    {
134        let json_value = yaml_to_json(value);
135        json_value.serialize(serializer)
136    }
137
138    fn yaml_to_json(value: &serde_yaml::Value) -> serde_json::Value {
139        match value {
140            serde_yaml::Value::Null => serde_json::Value::Null,
141            serde_yaml::Value::Bool(b) => serde_json::Value::Bool(*b),
142            serde_yaml::Value::Number(n) => {
143                if let Some(i) = n.as_i64() {
144                    serde_json::Value::Number(i.into())
145                } else if let Some(f) = n.as_f64() {
146                    serde_json::json!(f)
147                } else {
148                    serde_json::Value::Null
149                }
150            }
151            serde_yaml::Value::String(s) => serde_json::Value::String(s.clone()),
152            serde_yaml::Value::Sequence(seq) => {
153                serde_json::Value::Array(seq.iter().map(yaml_to_json).collect())
154            }
155            serde_yaml::Value::Mapping(map) => {
156                let mut obj = serde_json::Map::new();
157                for (k, v) in map {
158                    let key = match k {
159                        serde_yaml::Value::String(s) => s.clone(),
160                        _ => format!("{k:?}"),
161                    };
162                    obj.insert(key, yaml_to_json(v));
163                }
164                serde_json::Value::Object(obj)
165            }
166            serde_yaml::Value::Tagged(tagged) => yaml_to_json(&tagged.value),
167        }
168    }
169}
170
171#[cfg(test)]
172mod tests {
173    use crate::index::KnowledgeIndex;
174    use lago_core::ManifestEntry;
175    use lago_store::BlobStore;
176    use tempfile::TempDir;
177
178    fn build_index(files: &[(&str, &str)]) -> (TempDir, KnowledgeIndex) {
179        let tmp = TempDir::new().unwrap();
180        let store = BlobStore::open(tmp.path()).unwrap();
181        let mut entries = Vec::new();
182
183        for (path, content) in files {
184            let hash = store.put(content.as_bytes()).unwrap();
185            entries.push(ManifestEntry {
186                path: path.to_string(),
187                blob_hash: hash,
188                size_bytes: content.len() as u64,
189                content_type: Some("text/markdown".to_string()),
190                updated_at: 0,
191            });
192        }
193
194        let index = KnowledgeIndex::build(&entries, &store).unwrap();
195        (tmp, index)
196    }
197
198    #[test]
199    fn search_by_name() {
200        let (_tmp, index) = build_index(&[
201            ("/architecture.md", "# Architecture\n\nSystem design docs."),
202            ("/readme.md", "# Readme\n\nGeneral info."),
203        ]);
204
205        let results = index.search("architecture", 10);
206        assert_eq!(results.len(), 1);
207        assert_eq!(results[0].name, "architecture");
208        assert!(results[0].score > 0.0);
209    }
210
211    #[test]
212    fn search_by_body() {
213        let (_tmp, index) = build_index(&[
214            (
215                "/note.md",
216                "# Note\n\nThis talks about consciousness and awareness.",
217            ),
218            ("/other.md", "# Other\n\nNothing relevant here."),
219        ]);
220
221        let results = index.search("consciousness", 10);
222        assert_eq!(results.len(), 1);
223        assert_eq!(results[0].name, "note");
224    }
225
226    #[test]
227    fn search_name_boost() {
228        let (_tmp, index) = build_index(&[
229            ("/lago.md", "# Lago\n\nPersistence."),
230            ("/other.md", "# Other\n\nMentions lago in the body."),
231        ]);
232
233        let results = index.search("lago", 10);
234        assert_eq!(results.len(), 2);
235        // Name match should score higher
236        assert_eq!(results[0].name, "lago");
237        assert!(results[0].score > results[1].score);
238    }
239
240    #[test]
241    fn search_tag_boost() {
242        let (_tmp, index) = build_index(&[
243            (
244                "/tagged.md",
245                "---\ntags:\n  - rust\n  - agent\n---\n# Tagged\n\nSome content about rust.",
246            ),
247            ("/untagged.md", "# Untagged\n\nAlso about rust."),
248        ]);
249
250        let results = index.search("rust", 10);
251        assert_eq!(results.len(), 2);
252        // Tagged note should score higher (body + tag boost)
253        assert_eq!(results[0].name, "tagged");
254    }
255
256    #[test]
257    fn search_multi_term() {
258        let (_tmp, index) = build_index(&[
259            ("/a.md", "# A\n\nEvent sourcing and persistence."),
260            ("/b.md", "# B\n\nJust persistence."),
261        ]);
262
263        let results = index.search("event persistence", 10);
264        // A matches both terms, B matches one
265        assert_eq!(results[0].name, "a");
266    }
267
268    #[test]
269    fn search_no_matches() {
270        let (_tmp, index) = build_index(&[("/a.md", "# A\n\nSome content.")]);
271        let results = index.search("nonexistent", 10);
272        assert!(results.is_empty());
273    }
274
275    #[test]
276    fn search_empty_query() {
277        let (_tmp, index) = build_index(&[("/a.md", "# A")]);
278        let results = index.search("", 10);
279        assert!(results.is_empty());
280    }
281
282    #[test]
283    fn search_max_results() {
284        let (_tmp, index) = build_index(&[
285            ("/a.md", "# A\n\nRust code."),
286            ("/b.md", "# B\n\nRust code."),
287            ("/c.md", "# C\n\nRust code."),
288        ]);
289
290        let results = index.search("rust", 2);
291        assert_eq!(results.len(), 2);
292    }
293
294    #[test]
295    fn search_excerpts() {
296        let (_tmp, index) = build_index(&[(
297            "/note.md",
298            "# Note\n\nLine one.\nLine about rust.\nLine three.\nAnother rust line.",
299        )]);
300
301        let results = index.search("rust", 10);
302        assert_eq!(results[0].excerpts.len(), 2);
303        assert!(results[0].excerpts[0].contains("rust"));
304    }
305}