1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
use anyhow::Result;
use super::core::{KnowledgeManager, PageInfo, SearchMatch};
impl KnowledgeManager {
/// Search knowledge page content using word-level fuzzy matching.
///
/// Tokenizes the query into words and matches lines containing any query
/// term (case-insensitive). Results are ranked by the number of distinct
/// query terms matched within each page — pages matching more terms appear
/// first. Within a page, contiguous matching lines are grouped with
/// surrounding context.
///
/// # Errors
/// Returns an error if the cache directory cannot be read.
pub fn search_content(&self, query: &str, context: usize) -> Result<Vec<SearchMatch>> {
if !self.cache_dir.exists() {
return Ok(Vec::new());
}
let query_lower = query.to_lowercase();
let terms: Vec<&str> = query_lower.split_whitespace().collect();
if terms.is_empty() {
return Ok(Vec::new());
}
let mut entries: Vec<_> = std::fs::read_dir(&self.cache_dir)?
.filter_map(std::result::Result::ok)
.filter(|e| e.path().extension().is_some_and(|ext| ext == "md"))
.collect();
entries.sort_by_key(std::fs::DirEntry::file_name);
// Collect (term_match_count, matches) per file for ranking
let mut scored_results: Vec<(usize, Vec<SearchMatch>)> = Vec::new();
for entry in entries {
let path = entry.path();
let slug = path
.file_stem()
.unwrap_or_default()
.to_string_lossy()
.to_string();
let page_text = std::fs::read_to_string(&path)?;
let lines: Vec<&str> = page_text.lines().collect();
// Lowercase each line once and reuse for both term-hit counting
// and per-line matching (avoids redundant lowercasing of the
// entire content separately).
let lines_lower: Vec<String> = lines.iter().map(|l| l.to_lowercase()).collect();
// Count how many distinct query terms appear anywhere in this page
let term_hits = terms
.iter()
.filter(|term| lines_lower.iter().any(|ll| ll.contains(**term)))
.count();
if term_hits == 0 {
continue;
}
// Find lines matching any query term
let matching_indices: Vec<usize> = lines_lower
.iter()
.enumerate()
.filter(|(_, line_lower)| terms.iter().any(|term| line_lower.contains(term)))
.map(|(i, _)| i)
.collect();
let groups = group_matches(&matching_indices, context);
let mut file_matches = Vec::new();
for group in groups {
let first_match = group[0];
let start = first_match.saturating_sub(context);
let last_match = group[group.len() - 1];
let end = (last_match + context + 1).min(lines.len());
let context_lines: Vec<(usize, String)> = (start..end)
.map(|i| (i + 1, lines[i].to_string()))
.collect();
file_matches.push(SearchMatch {
slug: slug.clone(),
line_number: first_match + 1,
context_lines,
});
}
if !file_matches.is_empty() {
scored_results.push((term_hits, file_matches));
}
}
// Sort by term hit count descending (pages matching more terms first)
scored_results.sort_by_key(|b| std::cmp::Reverse(b.0));
Ok(scored_results
.into_iter()
.flat_map(|(_, matches)| matches)
.collect())
}
/// Search knowledge pages by source URL domain.
///
/// Finds pages that have a source whose URL contains the given domain string.
///
/// # Errors
/// Returns an error if listing pages fails.
pub fn search_sources(&self, domain: &str) -> Result<Vec<PageInfo>> {
let domain_lower = domain.to_lowercase();
let pages = self.list_pages()?;
let matches: Vec<PageInfo> = pages
.into_iter()
.filter(|page| {
page.frontmatter
.sources
.iter()
.any(|src| src.url.to_lowercase().contains(&domain_lower))
})
.collect();
Ok(matches)
}
}
/// Group matching line indices into contiguous groups based on context overlap.
///
/// Two matches are in the same group if their context windows overlap or are
/// adjacent (i.e., the distance between them is <= 2 * context).
pub(super) fn group_matches(indices: &[usize], context: usize) -> Vec<Vec<usize>> {
let mut groups: Vec<Vec<usize>> = Vec::new();
for &idx in indices {
let should_merge = groups
.last()
.and_then(|g| g.last())
.is_some_and(|&last_idx| idx <= last_idx + 2 * context + 1);
if should_merge {
if let Some(last_group) = groups.last_mut() {
last_group.push(idx);
}
} else {
groups.push(vec![idx]);
}
}
groups
}