cs/search/
text_search.rs

1//! # Builder Pattern and Concurrency - Rust Book Chapters 5, 10, 16
2//!
3//! This module demonstrates the builder pattern and concurrent programming from
4//! [The Rust Book](https://doc.rust-lang.org/book/).
5//!
6//! ## Key Concepts Demonstrated
7//!
8//! 1. **Builder Pattern** (Chapters 5.3, 10.2)
9//!    - Method chaining by consuming and returning `Self`
10//!    - Ergonomic API design with sensible defaults
11//!    - Type-state pattern for compile-time guarantees
12//!
13//! 2. **Message Passing with Channels** (Chapter 16.2)
14//!    - Using `mpsc::channel()` for thread communication
15//!    - The critical `drop(tx)` pattern for channel termination
16//!    - Collecting results from parallel workers
17//!
18//! 3. **Closures Capturing Environment** (Chapter 13.1)
19//!    - `move` closures transferring ownership to threads
20//!    - Cloning for shared access across threads
21//!    - Nested closures with different capture modes
22//!
23//! ## Learning Notes
24//!
25//! **Why the builder pattern?**
26//! - Provides a fluent, readable API: `TextSearcher::new(dir).case_sensitive(true).search("text")`
27//! - Allows optional configuration without many constructors
28//! - Makes defaults explicit and overridable
29//!
30//! **Why channels for concurrency?**
31//! - Safe message passing between threads (no shared mutable state)
32//! - Natural fit for parallel file searching (many producers, one consumer)
33//! - Rust's ownership prevents data races at compile time
34
35use crate::error::{Result, SearchError};
36use grep_regex::RegexMatcherBuilder;
37use grep_searcher::sinks::UTF8;
38use grep_searcher::SearcherBuilder;
39use ignore::overrides::OverrideBuilder;
40use ignore::WalkBuilder;
41use std::path::PathBuf;
42use std::sync::mpsc;
43
44/// Represents a single match from a text search.
45///
46/// # Rust Book Reference
47///
48/// **Chapter 5.1: Defining and Instantiating Structs**
49/// https://doc.rust-lang.org/book/ch05-01-defining-structs.html
50///
51/// This is a simple data-carrying struct with public fields.
52#[derive(Debug, Clone, PartialEq, Eq)]
53pub struct Match {
54    /// File path where the match was found
55    pub file: PathBuf,
56    /// Line number (1-indexed)
57    pub line: usize,
58    /// Content of the matching line
59    pub content: String,
60}
61
62/// Text searcher that uses ripgrep as a library for fast text searching.
63///
64/// # Rust Book Reference
65///
66/// **Chapter 5.3: Method Syntax**
67/// https://doc.rust-lang.org/book/ch05-03-method-syntax.html
68///
69/// **Chapter 10.2: Traits as Parameters**
70/// https://doc.rust-lang.org/book/ch10-02-traits.html
71///
72/// # Educational Notes - The Builder Pattern
73///
74/// This struct demonstrates the builder pattern, a common Rust idiom for
75/// constructing complex objects with many optional parameters.
76///
77/// **Key characteristics:**
78/// 1. Private fields prevent direct construction
79/// 2. `new()` provides sensible defaults
80/// 3. Builder methods take `mut self` and return `Self`
81/// 4. Final `search()` method takes `&self` (doesn't consume)
82///
83/// **Why this pattern?**
84/// - Avoids constructors with many parameters
85/// - Makes optional configuration explicit
86/// - Enables method chaining for readability
87/// - Compile-time validation of configuration
88pub struct TextSearcher {
89    /// Whether to respect .gitignore files
90    respect_gitignore: bool,
91    /// Whether search is case-sensitive
92    case_sensitive: bool,
93    /// Whether to match whole words only
94    word_match: bool,
95    /// Whether to treat the query as a regex
96    is_regex: bool,
97    /// Glob patterns to include
98    globs: Vec<String>,
99    /// Patterns to exclude from search
100    exclusions: Vec<String>,
101    /// The base directory to search in
102    base_dir: PathBuf,
103}
104
105impl TextSearcher {
106    /// Create a new TextSearcher with default settings.
107    ///
108    /// # Rust Book Reference
109    ///
110    /// **Chapter 5.3: Method Syntax - Associated Functions**
111    /// https://doc.rust-lang.org/book/ch05-03-method-syntax.html#associated-functions
112    ///
113    /// # Educational Notes - Builder Constructor
114    ///
115    /// This is an associated function (not a method) that creates a new instance.
116    /// It's called with `TextSearcher::new(...)` rather than on an instance.
117    ///
118    /// **Design decisions:**
119    /// - Takes only required parameter (`base_dir`)
120    /// - Sets sensible defaults for all optional fields
121    /// - Returns owned `Self` (not `&Self`)
122    ///
123    /// **Usage pattern:**
124    /// ```rust,ignore
125    /// let searcher = TextSearcher::new(PathBuf::from("/path"))
126    ///     .case_sensitive(true)    // Optional: override default
127    ///     .respect_gitignore(false); // Optional: override default
128    /// ```
129    pub fn new(base_dir: PathBuf) -> Self {
130        Self {
131            respect_gitignore: true,
132            case_sensitive: false,
133            word_match: false,
134            is_regex: false,
135            globs: Vec::new(),
136            exclusions: Vec::new(),
137            base_dir,
138        }
139    }
140
141    /// Set whether to respect .gitignore files (default: true).
142    ///
143    /// # Rust Book Reference
144    ///
145    /// **Chapter 5.3: Method Syntax**
146    /// https://doc.rust-lang.org/book/ch05-03-method-syntax.html
147    ///
148    /// # Educational Notes - Builder Method Pattern
149    ///
150    /// This method demonstrates the builder pattern's key technique:
151    ///
152    /// ```rust,ignore
153    /// pub fn respect_gitignore(mut self, value: bool) -> Self {
154    /// //                       ^^^^^^^^              ^^^^^^
155    /// //                       Takes ownership       Returns ownership
156    ///     self.respect_gitignore = value;
157    ///     self  // Return modified self for chaining
158    /// }
159    /// ```
160    ///
161    /// **Why `mut self` instead of `&mut self`?**
162    /// - `mut self` takes ownership, allowing method chaining
163    /// - `&mut self` would require explicit returns and be less ergonomic
164    /// - Ownership transfer prevents using partially-configured builders
165    ///
166    /// **Method chaining:**
167    /// ```rust,ignore
168    /// TextSearcher::new(dir)
169    ///     .respect_gitignore(false)  // Consumes and returns Self
170    ///     .case_sensitive(true)      // Consumes and returns Self
171    ///     .search("text")            // Final method takes &self
172    /// ```
173    pub fn respect_gitignore(mut self, value: bool) -> Self {
174        self.respect_gitignore = value;
175        self
176    }
177
178    /// Set whether search is case-sensitive (default: false).
179    ///
180    /// # Educational Notes
181    ///
182    /// Same builder pattern as `respect_gitignore()`. Each builder method:
183    /// 1. Takes ownership of `self`
184    /// 2. Modifies the field
185    /// 3. Returns ownership for chaining
186    pub fn case_sensitive(mut self, value: bool) -> Self {
187        self.case_sensitive = value;
188        self
189    }
190
191    /// Set whether to match whole words only (default: false)
192    pub fn word_match(mut self, value: bool) -> Self {
193        self.word_match = value;
194        self
195    }
196
197    /// Set whether to treat the query as a regex (default: false)
198    pub fn is_regex(mut self, value: bool) -> Self {
199        self.is_regex = value;
200        self
201    }
202
203    /// Add glob patterns to include
204    pub fn add_globs(mut self, globs: Vec<String>) -> Self {
205        self.globs.extend(globs);
206        self
207    }
208
209    /// Add exclusion patterns
210    pub fn add_exclusions(mut self, exclusions: Vec<String>) -> Self {
211        self.exclusions.extend(exclusions);
212        self
213    }
214
215    /// Search for text and return all matches.
216    ///
217    /// # Rust Book Reference
218    ///
219    /// **Chapter 16.2: Message Passing with Channels**
220    /// https://doc.rust-lang.org/book/ch16-02-message-passing.html
221    ///
222    /// **Chapter 13.1: Closures**
223    /// https://doc.rust-lang.org/book/ch13-01-closures.html
224    ///
225    /// # Educational Notes - Concurrent Search with Channels
226    ///
227    /// This method demonstrates concurrent programming using message passing:
228    ///
229    /// 1. **Create channel**: `let (tx, rx) = mpsc::channel()`
230    /// 2. **Spawn workers**: Each thread gets a cloned sender (`tx.clone()`)
231    /// 3. **Send results**: Workers send matches through the channel
232    /// 4. **Drop original sender**: Critical for terminating the receiver
233    /// 5. **Collect results**: Main thread receives all matches
234    ///
235    /// **Why channels instead of shared state?**
236    /// - No locks needed (no `Mutex`)
237    /// - Ownership prevents data races
238    /// - Natural producer-consumer pattern
239    /// - Rust's type system ensures thread safety
240    ///
241    /// # Arguments
242    /// * `text` - The text to search for
243    ///
244    /// # Returns
245    /// A vector of Match structs containing file path, line number, and content
246    pub fn search(&self, text: &str) -> Result<Vec<Match>> {
247        // Build the regex matcher with fixed string (literal) matching
248        let matcher = RegexMatcherBuilder::new()
249            .case_insensitive(!self.case_sensitive)
250            .word(self.word_match)
251            .fixed_strings(!self.is_regex) // Use fixed strings unless regex is enabled
252            .build(text)
253            .map_err(|e| SearchError::Generic(format!("Failed to build matcher: {}", e)))?;
254
255        // CHANNEL CREATION: Create a channel for collecting matches from parallel threads
256        // Chapter 16.2: mpsc = "multiple producer, single consumer"
257        // tx (transmitter) can be cloned for each thread
258        // rx (receiver) stays in the main thread
259        let (tx, rx) = mpsc::channel();
260
261        // Build parallel walker with .gitignore support
262        // Build overrides if any globs are provided
263        let mut builder = WalkBuilder::new(&self.base_dir);
264        let mut walk_builder = builder
265            .git_ignore(self.respect_gitignore)
266            .git_global(self.respect_gitignore)
267            .git_exclude(self.respect_gitignore)
268            .hidden(false); // Don't skip hidden files by default
269
270        if !self.globs.is_empty() {
271            let mut override_builder = OverrideBuilder::new(&self.base_dir);
272            for glob in &self.globs {
273                if let Err(e) = override_builder.add(glob) {
274                    return Err(SearchError::Generic(format!(
275                        "Invalid glob pattern '{}': {}",
276                        glob, e
277                    )));
278                }
279            }
280            if let Ok(overrides) = override_builder.build() {
281                walk_builder = walk_builder.overrides(overrides);
282            }
283        }
284
285        walk_builder.build_parallel().run(|| {
286            // CLONING FOR THREADS: Each thread gets its own sender and matcher
287            // Chapter 16.2: Clone tx so each thread can send messages
288            // Chapter 13.1: These clones will be moved into the closure below
289            let tx = tx.clone();
290            let matcher = matcher.clone();
291
292            // MOVE CLOSURE: Transfer ownership of tx and matcher to this thread
293            // Chapter 13.1: The `move` keyword forces the closure to take ownership
294            // Without `move`, the closure would try to borrow, which doesn't work across threads
295            Box::new(move |entry| {
296                use ignore::WalkState;
297
298                let entry = match entry {
299                    Ok(e) => e,
300                    Err(_) => return WalkState::Continue,
301                };
302
303                // Skip directories
304                if entry.file_type().is_none_or(|ft| ft.is_dir()) {
305                    return WalkState::Continue;
306                }
307
308                let path = entry.path();
309                let path_buf = path.to_path_buf();
310
311                // THREAD-LOCAL ACCUMULATOR: Each thread collects its own matches
312                // This avoids contention - no need for Mutex or Arc
313                let mut file_matches = Vec::new();
314
315                // Build searcher
316                let mut searcher = SearcherBuilder::new().line_number(true).build();
317
318                // NESTED CLOSURE: Search the file with another closure
319                // Chapter 13.1: This closure captures `file_matches` and `path_buf`
320                // Note: This is NOT a `move` closure - it borrows from the outer closure
321                let result = searcher.search_path(
322                    &matcher,
323                    path,
324                    UTF8(|line_num, line_content| {
325                        file_matches.push(Match {
326                            file: path_buf.clone(),
327                            line: line_num as usize,
328                            content: line_content.trim_end().to_string(),
329                        });
330                        Ok(true) // Continue searching
331                    }),
332                );
333
334                // SEND THROUGH CHANNEL: Send matches to main thread
335                // Chapter 16.2: tx.send() transfers ownership of file_matches
336                // The `let _ =` ignores send errors (receiver might be dropped)
337                if result.is_ok() && !file_matches.is_empty() {
338                    let _ = tx.send(file_matches);
339                }
340
341                WalkState::Continue
342            })
343        });
344
345        // CRITICAL: Drop the original sender so rx.iter() will terminate
346        // Chapter 16.2: The receiver's iterator only ends when ALL senders are dropped
347        // We cloned tx for each thread, but we still have the original here
348        // Without this drop, rx would wait forever!
349        drop(tx);
350
351        // COLLECT RESULTS: Receive all matches from worker threads
352        // Chapter 16.2: The for loop iterates until all senders are dropped
353        // This blocks until all threads finish and send their results
354        let mut all_matches = Vec::new();
355        for file_matches in rx {
356            all_matches.extend(file_matches);
357        }
358
359        Ok(all_matches)
360    }
361}
362
363impl Default for TextSearcher {
364    fn default() -> Self {
365        Self::new(std::env::current_dir().unwrap_or_else(|_| PathBuf::from(".")))
366    }
367}
368
369#[cfg(test)]
370mod tests {
371    use super::*;
372    use std::fs;
373    use tempfile::TempDir;
374
375    #[test]
376    fn test_basic_search() {
377        let temp_dir = TempDir::new().unwrap();
378        fs::write(
379            temp_dir.path().join("test.txt"),
380            "hello world\nfoo bar\nhello again",
381        )
382        .unwrap();
383
384        let searcher = TextSearcher::new(temp_dir.path().to_path_buf());
385        let matches = searcher.search("hello").unwrap();
386
387        assert_eq!(matches.len(), 2);
388        assert_eq!(matches[0].line, 1);
389        assert_eq!(matches[0].content, "hello world");
390        assert_eq!(matches[1].line, 3);
391        assert_eq!(matches[1].content, "hello again");
392    }
393
394    #[test]
395    fn test_case_insensitive_default() {
396        let temp_dir = TempDir::new().unwrap();
397        fs::write(
398            temp_dir.path().join("test.txt"),
399            "Hello World\nHELLO\nhello",
400        )
401        .unwrap();
402
403        let searcher = TextSearcher::new(temp_dir.path().to_path_buf());
404        let matches = searcher.search("hello").unwrap();
405
406        assert_eq!(matches.len(), 3); // Should match all variations
407    }
408
409    #[test]
410    fn test_case_sensitive() {
411        let temp_dir = TempDir::new().unwrap();
412        fs::write(
413            temp_dir.path().join("test.txt"),
414            "Hello World\nHELLO\nhello",
415        )
416        .unwrap();
417
418        let searcher = TextSearcher::new(temp_dir.path().to_path_buf()).case_sensitive(true);
419        let matches = searcher.search("hello").unwrap();
420
421        assert_eq!(matches.len(), 1); // Should only match exact case
422        assert_eq!(matches[0].content, "hello");
423    }
424
425    #[test]
426    fn test_no_matches() {
427        let temp_dir = TempDir::new().unwrap();
428        fs::write(temp_dir.path().join("test.txt"), "foo bar baz").unwrap();
429
430        let searcher = TextSearcher::new(temp_dir.path().to_path_buf());
431        let matches = searcher.search("notfound").unwrap();
432
433        assert_eq!(matches.len(), 0);
434    }
435
436    #[test]
437    fn test_multiple_files() {
438        let temp_dir = TempDir::new().unwrap();
439        fs::write(temp_dir.path().join("file1.txt"), "target line 1").unwrap();
440        fs::write(temp_dir.path().join("file2.txt"), "target line 2").unwrap();
441        fs::write(temp_dir.path().join("file3.txt"), "other content").unwrap();
442
443        let searcher = TextSearcher::new(temp_dir.path().to_path_buf());
444        let matches = searcher.search("target").unwrap();
445
446        assert_eq!(matches.len(), 2);
447    }
448
449    #[test]
450    fn test_gitignore_respected() {
451        let temp_dir = TempDir::new().unwrap();
452
453        // Initialize git repository (required for .gitignore to work)
454        fs::create_dir(temp_dir.path().join(".git")).unwrap();
455
456        // Create .gitignore
457        fs::write(temp_dir.path().join(".gitignore"), "ignored.txt\n").unwrap();
458
459        // Create files
460        fs::write(temp_dir.path().join("ignored.txt"), "target content").unwrap();
461        fs::write(temp_dir.path().join("tracked.txt"), "target content").unwrap();
462
463        let searcher = TextSearcher::new(temp_dir.path().to_path_buf()).respect_gitignore(true);
464        let matches = searcher.search("target").unwrap();
465
466        // Should only find in tracked.txt
467        assert_eq!(matches.len(), 1);
468        assert!(matches[0].file.ends_with("tracked.txt"));
469    }
470
471    #[test]
472    fn test_gitignore_disabled() {
473        let temp_dir = TempDir::new().unwrap();
474
475        // Initialize git repository
476        fs::create_dir(temp_dir.path().join(".git")).unwrap();
477
478        // Create .gitignore
479        fs::write(temp_dir.path().join(".gitignore"), "ignored.txt\n").unwrap();
480
481        // Create files
482        fs::write(temp_dir.path().join("ignored.txt"), "target content").unwrap();
483        fs::write(temp_dir.path().join("tracked.txt"), "target content").unwrap();
484
485        let searcher = TextSearcher::new(temp_dir.path().to_path_buf()).respect_gitignore(false);
486        let matches = searcher.search("target").unwrap();
487
488        // Should find in both files
489        assert_eq!(matches.len(), 2);
490    }
491
492    #[test]
493    fn test_builder_pattern() {
494        let searcher = TextSearcher::new(std::env::current_dir().unwrap())
495            .case_sensitive(true)
496            .respect_gitignore(false);
497
498        assert!(searcher.case_sensitive);
499        assert!(!searcher.respect_gitignore);
500    }
501
502    #[test]
503    fn test_default() {
504        let searcher = TextSearcher::default();
505
506        assert!(!searcher.case_sensitive);
507        assert!(searcher.respect_gitignore);
508    }
509
510    #[test]
511    fn test_special_characters() {
512        let temp_dir = TempDir::new().unwrap();
513        fs::write(
514            temp_dir.path().join("test.txt"),
515            "price: $19.99\nurl: http://example.com",
516        )
517        .unwrap();
518
519        let searcher = TextSearcher::new(temp_dir.path().to_path_buf());
520
521        // Test with special regex characters (should be treated as literals)
522        let matches = searcher.search("$19.99").unwrap();
523        assert_eq!(matches.len(), 1);
524
525        let matches = searcher.search("http://").unwrap();
526        assert_eq!(matches.len(), 1);
527    }
528
529    #[test]
530    fn test_line_numbers_accurate() {
531        let temp_dir = TempDir::new().unwrap();
532        let content = "line 1\nline 2\ntarget line 3\nline 4\ntarget line 5\nline 6";
533        fs::write(temp_dir.path().join("test.txt"), content).unwrap();
534
535        let searcher = TextSearcher::new(temp_dir.path().to_path_buf());
536        let matches = searcher.search("target").unwrap();
537
538        assert_eq!(matches.len(), 2);
539        assert_eq!(matches[0].line, 3);
540        assert_eq!(matches[1].line, 5);
541    }
542}