cs/search/text_search.rs
1//! # Builder Pattern and Concurrency - Rust Book Chapters 5, 10, 16
2//!
3//! This module demonstrates the builder pattern and concurrent programming from
4//! [The Rust Book](https://doc.rust-lang.org/book/).
5//!
6//! ## Key Concepts Demonstrated
7//!
8//! 1. **Builder Pattern** (Chapters 5.3, 10.2)
9//! - Method chaining by consuming and returning `Self`
10//! - Ergonomic API design with sensible defaults
11//! - Type-state pattern for compile-time guarantees
12//!
13//! 2. **Message Passing with Channels** (Chapter 16.2)
14//! - Using `mpsc::channel()` for thread communication
15//! - The critical `drop(tx)` pattern for channel termination
16//! - Collecting results from parallel workers
17//!
18//! 3. **Closures Capturing Environment** (Chapter 13.1)
19//! - `move` closures transferring ownership to threads
20//! - Cloning for shared access across threads
21//! - Nested closures with different capture modes
22//!
23//! ## Learning Notes
24//!
25//! **Why the builder pattern?**
26//! - Provides a fluent, readable API: `TextSearcher::new(dir).case_sensitive(true).search("text")`
27//! - Allows optional configuration without many constructors
28//! - Makes defaults explicit and overridable
29//!
30//! **Why channels for concurrency?**
31//! - Safe message passing between threads (no shared mutable state)
32//! - Natural fit for parallel file searching (many producers, one consumer)
33//! - Rust's ownership prevents data races at compile time
34
35use crate::error::{Result, SearchError};
36use grep_regex::RegexMatcherBuilder;
37use grep_searcher::sinks::UTF8;
38use grep_searcher::SearcherBuilder;
39use ignore::overrides::OverrideBuilder;
40use ignore::WalkBuilder;
41use std::path::PathBuf;
42use std::sync::mpsc;
43
44/// Represents a single match from a text search.
45///
46/// # Rust Book Reference
47///
48/// **Chapter 5.1: Defining and Instantiating Structs**
49/// https://doc.rust-lang.org/book/ch05-01-defining-structs.html
50///
51/// This is a simple data-carrying struct with public fields.
52#[derive(Debug, Clone, PartialEq, Eq)]
53pub struct Match {
54 /// File path where the match was found
55 pub file: PathBuf,
56 /// Line number (1-indexed)
57 pub line: usize,
58 /// Content of the matching line
59 pub content: String,
60}
61
62/// Text searcher that uses ripgrep as a library for fast text searching.
63///
64/// # Rust Book Reference
65///
66/// **Chapter 5.3: Method Syntax**
67/// https://doc.rust-lang.org/book/ch05-03-method-syntax.html
68///
69/// **Chapter 10.2: Traits as Parameters**
70/// https://doc.rust-lang.org/book/ch10-02-traits.html
71///
72/// # Educational Notes - The Builder Pattern
73///
74/// This struct demonstrates the builder pattern, a common Rust idiom for
75/// constructing complex objects with many optional parameters.
76///
77/// **Key characteristics:**
78/// 1. Private fields prevent direct construction
79/// 2. `new()` provides sensible defaults
80/// 3. Builder methods take `mut self` and return `Self`
81/// 4. Final `search()` method takes `&self` (doesn't consume)
82///
83/// **Why this pattern?**
84/// - Avoids constructors with many parameters
85/// - Makes optional configuration explicit
86/// - Enables method chaining for readability
87/// - Compile-time validation of configuration
88pub struct TextSearcher {
89 /// Whether to respect .gitignore files
90 respect_gitignore: bool,
91 /// Whether search is case-sensitive
92 case_sensitive: bool,
93 /// Whether to match whole words only
94 word_match: bool,
95 /// Whether to treat the query as a regex
96 is_regex: bool,
97 /// Glob patterns to include
98 globs: Vec<String>,
99 /// Patterns to exclude from search
100 exclusions: Vec<String>,
101 /// The base directory to search in
102 base_dir: PathBuf,
103}
104
105impl TextSearcher {
106 /// Create a new TextSearcher with default settings.
107 ///
108 /// # Rust Book Reference
109 ///
110 /// **Chapter 5.3: Method Syntax - Associated Functions**
111 /// https://doc.rust-lang.org/book/ch05-03-method-syntax.html#associated-functions
112 ///
113 /// # Educational Notes - Builder Constructor
114 ///
115 /// This is an associated function (not a method) that creates a new instance.
116 /// It's called with `TextSearcher::new(...)` rather than on an instance.
117 ///
118 /// **Design decisions:**
119 /// - Takes only required parameter (`base_dir`)
120 /// - Sets sensible defaults for all optional fields
121 /// - Returns owned `Self` (not `&Self`)
122 ///
123 /// **Usage pattern:**
124 /// ```rust,ignore
125 /// let searcher = TextSearcher::new(PathBuf::from("/path"))
126 /// .case_sensitive(true) // Optional: override default
127 /// .respect_gitignore(false); // Optional: override default
128 /// ```
129 pub fn new(base_dir: PathBuf) -> Self {
130 Self {
131 respect_gitignore: true,
132 case_sensitive: false,
133 word_match: false,
134 is_regex: false,
135 globs: Vec::new(),
136 exclusions: Vec::new(),
137 base_dir,
138 }
139 }
140
141 /// Set whether to respect .gitignore files (default: true).
142 ///
143 /// # Rust Book Reference
144 ///
145 /// **Chapter 5.3: Method Syntax**
146 /// https://doc.rust-lang.org/book/ch05-03-method-syntax.html
147 ///
148 /// # Educational Notes - Builder Method Pattern
149 ///
150 /// This method demonstrates the builder pattern's key technique:
151 ///
152 /// ```rust,ignore
153 /// pub fn respect_gitignore(mut self, value: bool) -> Self {
154 /// // ^^^^^^^^ ^^^^^^
155 /// // Takes ownership Returns ownership
156 /// self.respect_gitignore = value;
157 /// self // Return modified self for chaining
158 /// }
159 /// ```
160 ///
161 /// **Why `mut self` instead of `&mut self`?**
162 /// - `mut self` takes ownership, allowing method chaining
163 /// - `&mut self` would require explicit returns and be less ergonomic
164 /// - Ownership transfer prevents using partially-configured builders
165 ///
166 /// **Method chaining:**
167 /// ```rust,ignore
168 /// TextSearcher::new(dir)
169 /// .respect_gitignore(false) // Consumes and returns Self
170 /// .case_sensitive(true) // Consumes and returns Self
171 /// .search("text") // Final method takes &self
172 /// ```
173 pub fn respect_gitignore(mut self, value: bool) -> Self {
174 self.respect_gitignore = value;
175 self
176 }
177
178 /// Set whether search is case-sensitive (default: false).
179 ///
180 /// # Educational Notes
181 ///
182 /// Same builder pattern as `respect_gitignore()`. Each builder method:
183 /// 1. Takes ownership of `self`
184 /// 2. Modifies the field
185 /// 3. Returns ownership for chaining
186 pub fn case_sensitive(mut self, value: bool) -> Self {
187 self.case_sensitive = value;
188 self
189 }
190
191 /// Set whether to match whole words only (default: false)
192 pub fn word_match(mut self, value: bool) -> Self {
193 self.word_match = value;
194 self
195 }
196
197 /// Set whether to treat the query as a regex (default: false)
198 pub fn is_regex(mut self, value: bool) -> Self {
199 self.is_regex = value;
200 self
201 }
202
203 /// Add glob patterns to include
204 pub fn add_globs(mut self, globs: Vec<String>) -> Self {
205 self.globs.extend(globs);
206 self
207 }
208
209 /// Add exclusion patterns
210 pub fn add_exclusions(mut self, exclusions: Vec<String>) -> Self {
211 self.exclusions.extend(exclusions);
212 self
213 }
214
215 /// Search for text and return all matches.
216 ///
217 /// # Rust Book Reference
218 ///
219 /// **Chapter 16.2: Message Passing with Channels**
220 /// https://doc.rust-lang.org/book/ch16-02-message-passing.html
221 ///
222 /// **Chapter 13.1: Closures**
223 /// https://doc.rust-lang.org/book/ch13-01-closures.html
224 ///
225 /// # Educational Notes - Concurrent Search with Channels
226 ///
227 /// This method demonstrates concurrent programming using message passing:
228 ///
229 /// 1. **Create channel**: `let (tx, rx) = mpsc::channel()`
230 /// 2. **Spawn workers**: Each thread gets a cloned sender (`tx.clone()`)
231 /// 3. **Send results**: Workers send matches through the channel
232 /// 4. **Drop original sender**: Critical for terminating the receiver
233 /// 5. **Collect results**: Main thread receives all matches
234 ///
235 /// **Why channels instead of shared state?**
236 /// - No locks needed (no `Mutex`)
237 /// - Ownership prevents data races
238 /// - Natural producer-consumer pattern
239 /// - Rust's type system ensures thread safety
240 ///
241 /// # Arguments
242 /// * `text` - The text to search for
243 ///
244 /// # Returns
245 /// A vector of Match structs containing file path, line number, and content
246 pub fn search(&self, text: &str) -> Result<Vec<Match>> {
247 // Build the regex matcher with fixed string (literal) matching
248 let matcher = RegexMatcherBuilder::new()
249 .case_insensitive(!self.case_sensitive)
250 .word(self.word_match)
251 .fixed_strings(!self.is_regex) // Use fixed strings unless regex is enabled
252 .build(text)
253 .map_err(|e| SearchError::Generic(format!("Failed to build matcher: {}", e)))?;
254
255 // CHANNEL CREATION: Create a channel for collecting matches from parallel threads
256 // Chapter 16.2: mpsc = "multiple producer, single consumer"
257 // tx (transmitter) can be cloned for each thread
258 // rx (receiver) stays in the main thread
259 let (tx, rx) = mpsc::channel();
260
261 // Build parallel walker with .gitignore support
262 // Build overrides if any globs are provided
263 let mut builder = WalkBuilder::new(&self.base_dir);
264 let mut walk_builder = builder
265 .git_ignore(self.respect_gitignore)
266 .git_global(self.respect_gitignore)
267 .git_exclude(self.respect_gitignore)
268 .hidden(false); // Don't skip hidden files by default
269
270 if !self.globs.is_empty() {
271 let mut override_builder = OverrideBuilder::new(&self.base_dir);
272 for glob in &self.globs {
273 if let Err(e) = override_builder.add(glob) {
274 return Err(SearchError::Generic(format!(
275 "Invalid glob pattern '{}': {}",
276 glob, e
277 )));
278 }
279 }
280 if let Ok(overrides) = override_builder.build() {
281 walk_builder = walk_builder.overrides(overrides);
282 }
283 }
284
285 walk_builder.build_parallel().run(|| {
286 // CLONING FOR THREADS: Each thread gets its own sender and matcher
287 // Chapter 16.2: Clone tx so each thread can send messages
288 // Chapter 13.1: These clones will be moved into the closure below
289 let tx = tx.clone();
290 let matcher = matcher.clone();
291
292 // MOVE CLOSURE: Transfer ownership of tx and matcher to this thread
293 // Chapter 13.1: The `move` keyword forces the closure to take ownership
294 // Without `move`, the closure would try to borrow, which doesn't work across threads
295 Box::new(move |entry| {
296 use ignore::WalkState;
297
298 let entry = match entry {
299 Ok(e) => e,
300 Err(_) => return WalkState::Continue,
301 };
302
303 // Skip directories
304 if entry.file_type().is_none_or(|ft| ft.is_dir()) {
305 return WalkState::Continue;
306 }
307
308 let path = entry.path();
309 let path_buf = path.to_path_buf();
310
311 // THREAD-LOCAL ACCUMULATOR: Each thread collects its own matches
312 // This avoids contention - no need for Mutex or Arc
313 let mut file_matches = Vec::new();
314
315 // Build searcher
316 let mut searcher = SearcherBuilder::new().line_number(true).build();
317
318 // NESTED CLOSURE: Search the file with another closure
319 // Chapter 13.1: This closure captures `file_matches` and `path_buf`
320 // Note: This is NOT a `move` closure - it borrows from the outer closure
321 let result = searcher.search_path(
322 &matcher,
323 path,
324 UTF8(|line_num, line_content| {
325 file_matches.push(Match {
326 file: path_buf.clone(),
327 line: line_num as usize,
328 content: line_content.trim_end().to_string(),
329 });
330 Ok(true) // Continue searching
331 }),
332 );
333
334 // SEND THROUGH CHANNEL: Send matches to main thread
335 // Chapter 16.2: tx.send() transfers ownership of file_matches
336 // The `let _ =` ignores send errors (receiver might be dropped)
337 if result.is_ok() && !file_matches.is_empty() {
338 let _ = tx.send(file_matches);
339 }
340
341 WalkState::Continue
342 })
343 });
344
345 // CRITICAL: Drop the original sender so rx.iter() will terminate
346 // Chapter 16.2: The receiver's iterator only ends when ALL senders are dropped
347 // We cloned tx for each thread, but we still have the original here
348 // Without this drop, rx would wait forever!
349 drop(tx);
350
351 // COLLECT RESULTS: Receive all matches from worker threads
352 // Chapter 16.2: The for loop iterates until all senders are dropped
353 // This blocks until all threads finish and send their results
354 let mut all_matches = Vec::new();
355 for file_matches in rx {
356 all_matches.extend(file_matches);
357 }
358
359 Ok(all_matches)
360 }
361}
362
363impl Default for TextSearcher {
364 fn default() -> Self {
365 Self::new(std::env::current_dir().unwrap_or_else(|_| PathBuf::from(".")))
366 }
367}
368
369#[cfg(test)]
370mod tests {
371 use super::*;
372 use std::fs;
373 use tempfile::TempDir;
374
375 #[test]
376 fn test_basic_search() {
377 let temp_dir = TempDir::new().unwrap();
378 fs::write(
379 temp_dir.path().join("test.txt"),
380 "hello world\nfoo bar\nhello again",
381 )
382 .unwrap();
383
384 let searcher = TextSearcher::new(temp_dir.path().to_path_buf());
385 let matches = searcher.search("hello").unwrap();
386
387 assert_eq!(matches.len(), 2);
388 assert_eq!(matches[0].line, 1);
389 assert_eq!(matches[0].content, "hello world");
390 assert_eq!(matches[1].line, 3);
391 assert_eq!(matches[1].content, "hello again");
392 }
393
394 #[test]
395 fn test_case_insensitive_default() {
396 let temp_dir = TempDir::new().unwrap();
397 fs::write(
398 temp_dir.path().join("test.txt"),
399 "Hello World\nHELLO\nhello",
400 )
401 .unwrap();
402
403 let searcher = TextSearcher::new(temp_dir.path().to_path_buf());
404 let matches = searcher.search("hello").unwrap();
405
406 assert_eq!(matches.len(), 3); // Should match all variations
407 }
408
409 #[test]
410 fn test_case_sensitive() {
411 let temp_dir = TempDir::new().unwrap();
412 fs::write(
413 temp_dir.path().join("test.txt"),
414 "Hello World\nHELLO\nhello",
415 )
416 .unwrap();
417
418 let searcher = TextSearcher::new(temp_dir.path().to_path_buf()).case_sensitive(true);
419 let matches = searcher.search("hello").unwrap();
420
421 assert_eq!(matches.len(), 1); // Should only match exact case
422 assert_eq!(matches[0].content, "hello");
423 }
424
425 #[test]
426 fn test_no_matches() {
427 let temp_dir = TempDir::new().unwrap();
428 fs::write(temp_dir.path().join("test.txt"), "foo bar baz").unwrap();
429
430 let searcher = TextSearcher::new(temp_dir.path().to_path_buf());
431 let matches = searcher.search("notfound").unwrap();
432
433 assert_eq!(matches.len(), 0);
434 }
435
436 #[test]
437 fn test_multiple_files() {
438 let temp_dir = TempDir::new().unwrap();
439 fs::write(temp_dir.path().join("file1.txt"), "target line 1").unwrap();
440 fs::write(temp_dir.path().join("file2.txt"), "target line 2").unwrap();
441 fs::write(temp_dir.path().join("file3.txt"), "other content").unwrap();
442
443 let searcher = TextSearcher::new(temp_dir.path().to_path_buf());
444 let matches = searcher.search("target").unwrap();
445
446 assert_eq!(matches.len(), 2);
447 }
448
449 #[test]
450 fn test_gitignore_respected() {
451 let temp_dir = TempDir::new().unwrap();
452
453 // Initialize git repository (required for .gitignore to work)
454 fs::create_dir(temp_dir.path().join(".git")).unwrap();
455
456 // Create .gitignore
457 fs::write(temp_dir.path().join(".gitignore"), "ignored.txt\n").unwrap();
458
459 // Create files
460 fs::write(temp_dir.path().join("ignored.txt"), "target content").unwrap();
461 fs::write(temp_dir.path().join("tracked.txt"), "target content").unwrap();
462
463 let searcher = TextSearcher::new(temp_dir.path().to_path_buf()).respect_gitignore(true);
464 let matches = searcher.search("target").unwrap();
465
466 // Should only find in tracked.txt
467 assert_eq!(matches.len(), 1);
468 assert!(matches[0].file.ends_with("tracked.txt"));
469 }
470
471 #[test]
472 fn test_gitignore_disabled() {
473 let temp_dir = TempDir::new().unwrap();
474
475 // Initialize git repository
476 fs::create_dir(temp_dir.path().join(".git")).unwrap();
477
478 // Create .gitignore
479 fs::write(temp_dir.path().join(".gitignore"), "ignored.txt\n").unwrap();
480
481 // Create files
482 fs::write(temp_dir.path().join("ignored.txt"), "target content").unwrap();
483 fs::write(temp_dir.path().join("tracked.txt"), "target content").unwrap();
484
485 let searcher = TextSearcher::new(temp_dir.path().to_path_buf()).respect_gitignore(false);
486 let matches = searcher.search("target").unwrap();
487
488 // Should find in both files
489 assert_eq!(matches.len(), 2);
490 }
491
492 #[test]
493 fn test_builder_pattern() {
494 let searcher = TextSearcher::new(std::env::current_dir().unwrap())
495 .case_sensitive(true)
496 .respect_gitignore(false);
497
498 assert!(searcher.case_sensitive);
499 assert!(!searcher.respect_gitignore);
500 }
501
502 #[test]
503 fn test_default() {
504 let searcher = TextSearcher::default();
505
506 assert!(!searcher.case_sensitive);
507 assert!(searcher.respect_gitignore);
508 }
509
510 #[test]
511 fn test_special_characters() {
512 let temp_dir = TempDir::new().unwrap();
513 fs::write(
514 temp_dir.path().join("test.txt"),
515 "price: $19.99\nurl: http://example.com",
516 )
517 .unwrap();
518
519 let searcher = TextSearcher::new(temp_dir.path().to_path_buf());
520
521 // Test with special regex characters (should be treated as literals)
522 let matches = searcher.search("$19.99").unwrap();
523 assert_eq!(matches.len(), 1);
524
525 let matches = searcher.search("http://").unwrap();
526 assert_eq!(matches.len(), 1);
527 }
528
529 #[test]
530 fn test_line_numbers_accurate() {
531 let temp_dir = TempDir::new().unwrap();
532 let content = "line 1\nline 2\ntarget line 3\nline 4\ntarget line 5\nline 6";
533 fs::write(temp_dir.path().join("test.txt"), content).unwrap();
534
535 let searcher = TextSearcher::new(temp_dir.path().to_path_buf());
536 let matches = searcher.search("target").unwrap();
537
538 assert_eq!(matches.len(), 2);
539 assert_eq!(matches[0].line, 3);
540 assert_eq!(matches[1].line, 5);
541 }
542}