agentroot_core/search/
snippet.rs1#[derive(Debug, Clone)]
5pub struct Snippet {
6 pub snippet: String,
7 pub start_pos: usize,
8 pub end_pos: usize,
9}
10
11pub fn extract_snippet(
13 content: &str,
14 query: &str,
15 max_length: Option<usize>,
16 chunk_pos: Option<usize>,
17) -> Snippet {
18 let max_len = max_length.unwrap_or(500);
19
20 if content.len() <= max_len {
22 return Snippet {
23 snippet: content.to_string(),
24 start_pos: 0,
25 end_pos: content.len(),
26 };
27 }
28
29 let center = chunk_pos.unwrap_or_else(|| find_query_position(content, query));
31
32 let half_len = max_len / 2;
34 let start = center.saturating_sub(half_len);
35 let end = (start + max_len).min(content.len());
36 let start = if end == content.len() {
37 end.saturating_sub(max_len)
38 } else {
39 start
40 };
41
42 let (start, end) = adjust_to_word_boundaries(content, start, end);
44
45 let mut snippet = content[start..end].to_string();
46
47 if start > 0 {
49 snippet = format!("...{}", snippet.trim_start());
50 }
51 if end < content.len() {
52 snippet = format!("{}...", snippet.trim_end());
53 }
54
55 Snippet {
56 snippet,
57 start_pos: start,
58 end_pos: end,
59 }
60}
61
62fn find_query_position(content: &str, query: &str) -> usize {
64 let content_lower = content.to_lowercase();
65 let query_lower = query.to_lowercase();
66
67 if let Some(pos) = content_lower.find(&query_lower) {
69 return pos;
70 }
71
72 let terms: Vec<&str> = query_lower
74 .split_whitespace()
75 .filter(|t| t.len() >= 3)
76 .collect();
77
78 for term in terms {
79 if let Some(pos) = content_lower.find(term) {
80 return pos;
81 }
82 }
83
84 0
86}
87
88fn adjust_to_word_boundaries(content: &str, start: usize, end: usize) -> (usize, usize) {
90 let bytes = content.as_bytes();
91
92 let mut new_start = start;
94 while new_start > 0
95 && bytes
96 .get(new_start - 1)
97 .map(|&b| !b.is_ascii_whitespace())
98 .unwrap_or(false)
99 {
100 new_start -= 1;
101 }
102
103 let mut new_end = end;
105 while new_end < bytes.len()
106 && bytes
107 .get(new_end)
108 .map(|&b| !b.is_ascii_whitespace())
109 .unwrap_or(false)
110 {
111 new_end += 1;
112 }
113
114 (new_start, new_end)
115}
116
117#[cfg(test)]
118mod tests {
119 use super::*;
120
121 #[test]
122 fn test_short_content() {
123 let snippet = extract_snippet("Hello world", "hello", None, None);
124 assert_eq!(snippet.snippet, "Hello world");
125 }
126
127 #[test]
128 fn test_long_content() {
129 let content = "a ".repeat(500);
130 let snippet = extract_snippet(&content, "test", Some(100), None);
131 assert!(snippet.snippet.len() <= 110); }
133}