1use super::TextExtractor;
6use crate::core::{FolioError, Result};
7use crate::cos::CosDoc;
8use crate::doc::PdfDoc;
9use regex::Regex;
10
11#[derive(Debug, Clone)]
13pub struct SearchOptions {
14 pub regex: bool,
16 pub case_sensitive: bool,
18 pub whole_word: bool,
20 pub start_page: u32,
22 pub end_page: u32,
24 pub max_results: usize,
26}
27
28impl Default for SearchOptions {
29 fn default() -> Self {
30 Self {
31 regex: false,
32 case_sensitive: false,
33 whole_word: false,
34 start_page: 1,
35 end_page: 0,
36 max_results: 0,
37 }
38 }
39}
40
41impl SearchOptions {
42 pub fn new() -> Self {
43 Self::default()
44 }
45
46 pub fn regex(mut self, enabled: bool) -> Self {
47 self.regex = enabled;
48 self
49 }
50
51 pub fn case_sensitive(mut self, enabled: bool) -> Self {
52 self.case_sensitive = enabled;
53 self
54 }
55
56 pub fn whole_word(mut self, enabled: bool) -> Self {
57 self.whole_word = enabled;
58 self
59 }
60
61 pub fn pages(mut self, start: u32, end: u32) -> Self {
62 self.start_page = start;
63 self.end_page = end;
64 self
65 }
66
67 pub fn max_results(mut self, max: usize) -> Self {
68 self.max_results = max;
69 self
70 }
71}
72
73#[derive(Debug, Clone)]
75pub struct SearchResult {
76 pub page_num: u32,
78 pub match_text: String,
80 pub offset: usize,
82 pub context: String,
84}
85
86pub struct TextSearch;
88
89impl TextSearch {
90 pub fn search(
94 doc: &mut PdfDoc,
95 pattern: &str,
96 options: &SearchOptions,
97 ) -> Result<Vec<SearchResult>> {
98 let compiled = compile_pattern(pattern, options)?;
99 let page_count = doc.page_count()?;
100
101 let start = options.start_page.max(1);
102 let end = if options.end_page == 0 || options.end_page > page_count {
103 page_count
104 } else {
105 options.end_page
106 };
107
108 let mut results = Vec::new();
109
110 for page_num in start..=end {
111 let page = match doc.get_page(page_num) {
112 Ok(p) => p,
113 Err(_) => continue,
114 };
115
116 let text = match TextExtractor::extract_from_page(&page, doc.cos_mut()) {
117 Ok(t) => t,
118 Err(_) => continue,
119 };
120
121 if text.is_empty() {
122 continue;
123 }
124
125 for mat in compiled.find_iter(&text) {
126 let match_text = mat.as_str().to_string();
127 let offset = mat.start();
128
129 let ctx_start = text[..offset]
131 .char_indices()
132 .rev()
133 .nth(40)
134 .map(|(i, _)| i)
135 .unwrap_or(0);
136 let ctx_end = text[mat.end()..]
137 .char_indices()
138 .nth(40)
139 .map(|(i, _)| mat.end() + i)
140 .unwrap_or(text.len());
141 let context = text[ctx_start..ctx_end].to_string();
142
143 results.push(SearchResult {
144 page_num,
145 match_text,
146 offset,
147 context,
148 });
149
150 if options.max_results > 0 && results.len() >= options.max_results {
151 return Ok(results);
152 }
153 }
154 }
155
156 Ok(results)
157 }
158
159 pub fn contains(doc: &mut PdfDoc, pattern: &str) -> Result<bool> {
161 let options = SearchOptions::new().max_results(1);
162 let results = Self::search(doc, pattern, &options)?;
163 Ok(!results.is_empty())
164 }
165
166 pub fn count(doc: &mut PdfDoc, pattern: &str) -> Result<usize> {
168 let results = Self::search(doc, pattern, &SearchOptions::new())?;
169 Ok(results.len())
170 }
171
172 pub fn search_regex(
174 doc: &mut PdfDoc,
175 pattern: &str,
176 options: &SearchOptions,
177 ) -> Result<Vec<SearchResult>> {
178 let mut opts = options.clone();
179 opts.regex = true;
180 Self::search(doc, pattern, &opts)
181 }
182}
183
184fn compile_pattern(pattern: &str, options: &SearchOptions) -> Result<Regex> {
186 let regex_pattern = if options.regex {
187 pattern.to_string()
188 } else {
189 regex::escape(pattern)
191 };
192
193 let regex_pattern = if options.whole_word {
195 format!(r"\b{}\b", regex_pattern)
196 } else {
197 regex_pattern
198 };
199
200 let regex_pattern = if options.case_sensitive {
202 regex_pattern
203 } else {
204 format!("(?i){}", regex_pattern)
205 };
206
207 Regex::new(®ex_pattern)
208 .map_err(|e| FolioError::InvalidArgument(format!("Invalid search pattern: {}", e)))
209}
210
211#[cfg(test)]
212mod tests {
213 use super::*;
214
215 #[test]
216 fn test_compile_literal() {
217 let opts = SearchOptions::new();
218 let re = compile_pattern("hello world", &opts).unwrap();
219 assert!(re.is_match("HELLO WORLD")); assert!(re.is_match("hello world"));
221 }
222
223 #[test]
224 fn test_compile_case_sensitive() {
225 let opts = SearchOptions::new().case_sensitive(true);
226 let re = compile_pattern("Hello", &opts).unwrap();
227 assert!(re.is_match("Hello"));
228 assert!(!re.is_match("hello"));
229 }
230
231 #[test]
232 fn test_compile_whole_word() {
233 let opts = SearchOptions::new().whole_word(true);
234 let re = compile_pattern("the", &opts).unwrap();
235 assert!(re.is_match("the cat"));
236 assert!(!re.is_match("other"));
237 }
238
239 #[test]
240 fn test_compile_regex() {
241 let opts = SearchOptions::new().regex(true);
242 let re = compile_pattern(r"\d{3}-\d{4}", &opts).unwrap();
243 assert!(re.is_match("Call 555-1234 now"));
244 assert!(!re.is_match("no numbers here"));
245 }
246
247 #[test]
248 fn test_escape_special_chars() {
249 let opts = SearchOptions::new();
250 let re = compile_pattern("price: $10.00", &opts).unwrap();
251 assert!(re.is_match("The price: $10.00 is final"));
252 }
253}