cs/search/
text_search.rs

1//! # Builder Pattern and Concurrency - Rust Book Chapters 5, 10, 16
2//!
3//! This module demonstrates the builder pattern and concurrent programming from
4//! [The Rust Book](https://doc.rust-lang.org/book/).
5//!
6//! ## Key Concepts Demonstrated
7//!
8//! 1. **Builder Pattern** (Chapters 5.3, 10.2)
9//!    - Method chaining by consuming and returning `Self`
10//!    - Ergonomic API design with sensible defaults
11//!    - Type-state pattern for compile-time guarantees
12//!
13//! 2. **Message Passing with Channels** (Chapter 16.2)
14//!    - Using `mpsc::channel()` for thread communication
15//!    - The critical `drop(tx)` pattern for channel termination
16//!    - Collecting results from parallel workers
17//!
18//! 3. **Closures Capturing Environment** (Chapter 13.1)
19//!    - `move` closures transferring ownership to threads
20//!    - Cloning for shared access across threads
21//!    - Nested closures with different capture modes
22//!
23//! ## Learning Notes
24//!
25//! **Why the builder pattern?**
26//! - Provides a fluent, readable API: `TextSearcher::new(dir).case_sensitive(true).search("text")`
27//! - Allows optional configuration without many constructors
28//! - Makes defaults explicit and overridable
29//!
30//! **Why channels for concurrency?**
31//! - Safe message passing between threads (no shared mutable state)
32//! - Natural fit for parallel file searching (many producers, one consumer)
33//! - Rust's ownership prevents data races at compile time
34
35use crate::error::{Result, SearchError};
36use grep_matcher::Matcher;
37use grep_regex::RegexMatcherBuilder;
38use grep_searcher::sinks::UTF8;
39use grep_searcher::SearcherBuilder;
40use ignore::overrides::OverrideBuilder;
41use ignore::WalkBuilder;
42use std::path::PathBuf;
43use std::sync::mpsc;
44
45/// Represents a single match from a text search.
46///
47/// # Rust Book Reference
48///
49/// **Chapter 5.1: Defining and Instantiating Structs**
50/// https://doc.rust-lang.org/book/ch05-01-defining-structs.html
51///
52/// This is a simple data-carrying struct with public fields.
53#[derive(Debug, Clone, PartialEq, Eq)]
54pub struct Match {
55    /// File path where the match was found
56    pub file: PathBuf,
57    /// Line number (1-indexed)
58    pub line: usize,
59    /// Content of the matching line
60    pub content: String,
61    /// Context lines before the match
62    pub context_before: Vec<String>,
63    /// Context lines after the match
64    pub context_after: Vec<String>,
65}
66
67/// Text searcher that uses ripgrep as a library for fast text searching.
68///
69/// # Rust Book Reference
70///
71/// **Chapter 5.3: Method Syntax**
72/// https://doc.rust-lang.org/book/ch05-03-method-syntax.html
73///
74/// **Chapter 10.2: Traits as Parameters**
75/// https://doc.rust-lang.org/book/ch10-02-traits.html
76///
77/// # Educational Notes - The Builder Pattern
78///
79/// This struct demonstrates the builder pattern, a common Rust idiom for
80/// constructing complex objects with many optional parameters.
81///
82/// **Key characteristics:**
83/// 1. Private fields prevent direct construction
84/// 2. `new()` provides sensible defaults
85/// 3. Builder methods take `mut self` and return `Self`
86/// 4. Final `search()` method takes `&self` (doesn't consume)
87///
88/// **Why this pattern?**
89/// - Avoids constructors with many parameters
90/// - Makes optional configuration explicit
91/// - Enables method chaining for readability
92/// - Compile-time validation of configuration
93pub struct TextSearcher {
94    /// Whether to respect .gitignore files
95    respect_gitignore: bool,
96    /// Whether search is case-sensitive
97    case_sensitive: bool,
98    /// Whether to match whole words only
99    word_match: bool,
100    /// Whether to treat the query as a regex
101    is_regex: bool,
102    /// Glob patterns to include
103    globs: Vec<String>,
104    /// Patterns to exclude from search
105    exclusions: Vec<String>,
106    /// The base directory to search in
107    base_dir: PathBuf,
108    /// Number of context lines to show before and after matches
109    context_lines: usize,
110}
111
112impl TextSearcher {
113    /// Create a new TextSearcher with default settings.
114    ///
115    /// # Rust Book Reference
116    ///
117    /// **Chapter 5.3: Method Syntax - Associated Functions**
118    /// https://doc.rust-lang.org/book/ch05-03-method-syntax.html#associated-functions
119    ///
120    /// # Educational Notes - Builder Constructor
121    ///
122    /// This is an associated function (not a method) that creates a new instance.
123    /// It's called with `TextSearcher::new(...)` rather than on an instance.
124    ///
125    /// **Design decisions:**
126    /// - Takes only required parameter (`base_dir`)
127    /// - Sets sensible defaults for all optional fields
128    /// - Returns owned `Self` (not `&Self`)
129    ///
130    /// **Usage pattern:**
131    /// ```rust,ignore
132    /// let searcher = TextSearcher::new(PathBuf::from("/path"))
133    ///     .case_sensitive(true)    // Optional: override default
134    ///     .respect_gitignore(false); // Optional: override default
135    /// ```
136    pub fn new(base_dir: PathBuf) -> Self {
137        Self {
138            respect_gitignore: true,
139            case_sensitive: false,
140            word_match: false,
141            is_regex: false,
142            globs: Vec::new(),
143            exclusions: Vec::new(),
144            base_dir,
145            context_lines: 2, // Default: 2 lines before and after
146        }
147    }
148
149    /// Set whether to respect .gitignore files (default: true).
150    ///
151    /// # Rust Book Reference
152    ///
153    /// **Chapter 5.3: Method Syntax**
154    /// https://doc.rust-lang.org/book/ch05-03-method-syntax.html
155    ///
156    /// # Educational Notes - Builder Method Pattern
157    ///
158    /// This method demonstrates the builder pattern's key technique:
159    ///
160    /// ```rust,ignore
161    /// pub fn respect_gitignore(mut self, value: bool) -> Self {
162    /// //                       ^^^^^^^^              ^^^^^^
163    /// //                       Takes ownership       Returns ownership
164    ///     self.respect_gitignore = value;
165    ///     self  // Return modified self for chaining
166    /// }
167    /// ```
168    ///
169    /// **Why `mut self` instead of `&mut self`?**
170    /// - `mut self` takes ownership, allowing method chaining
171    /// - `&mut self` would require explicit returns and be less ergonomic
172    /// - Ownership transfer prevents using partially-configured builders
173    ///
174    /// **Method chaining:**
175    /// ```rust,ignore
176    /// TextSearcher::new(dir)
177    ///     .respect_gitignore(false)  // Consumes and returns Self
178    ///     .case_sensitive(true)      // Consumes and returns Self
179    ///     .search("text")            // Final method takes &self
180    /// ```
181    pub fn respect_gitignore(mut self, value: bool) -> Self {
182        self.respect_gitignore = value;
183        self
184    }
185
186    /// Set whether search is case-sensitive (default: false).
187    ///
188    /// # Educational Notes
189    ///
190    /// Same builder pattern as `respect_gitignore()`. Each builder method:
191    /// 1. Takes ownership of `self`
192    /// 2. Modifies the field
193    /// 3. Returns ownership for chaining
194    pub fn case_sensitive(mut self, value: bool) -> Self {
195        self.case_sensitive = value;
196        self
197    }
198
199    /// Set whether to match whole words only (default: false)
200    pub fn word_match(mut self, value: bool) -> Self {
201        self.word_match = value;
202        self
203    }
204
205    /// Set whether to treat the query as a regex (default: false)
206    pub fn is_regex(mut self, value: bool) -> Self {
207        self.is_regex = value;
208        self
209    }
210
211    /// Add glob patterns to include
212    pub fn add_globs(mut self, globs: Vec<String>) -> Self {
213        self.globs.extend(globs);
214        self
215    }
216
217    /// Add exclusion patterns
218    pub fn add_exclusions(mut self, exclusions: Vec<String>) -> Self {
219        self.exclusions.extend(exclusions);
220        self
221    }
222
223    /// Set number of context lines to show before and after matches (default: 2)
224    pub fn context_lines(mut self, lines: usize) -> Self {
225        self.context_lines = lines;
226        self
227    }
228
229    /// Search for text and return all matches.
230    ///
231    /// # Rust Book Reference
232    ///
233    /// **Chapter 16.2: Message Passing with Channels**
234    /// https://doc.rust-lang.org/book/ch16-02-message-passing.html
235    ///
236    /// **Chapter 13.1: Closures**
237    /// https://doc.rust-lang.org/book/ch13-01-closures.html
238    ///
239    /// # Educational Notes - Concurrent Search with Channels
240    ///
241    /// This method demonstrates concurrent programming using message passing:
242    ///
243    /// 1. **Create channel**: `let (tx, rx) = mpsc::channel()`
244    /// 2. **Spawn workers**: Each thread gets a cloned sender (`tx.clone()`)
245    /// 3. **Send results**: Workers send matches through the channel
246    /// 4. **Drop original sender**: Critical for terminating the receiver
247    /// 5. **Collect results**: Main thread receives all matches
248    ///
249    /// **Why channels instead of shared state?**
250    /// - No locks needed (no `Mutex`)
251    /// - Ownership prevents data races
252    /// - Natural producer-consumer pattern
253    /// - Rust's type system ensures thread safety
254    ///
255    /// # Arguments
256    /// * `text` - The text to search for
257    ///
258    /// # Returns
259    /// A vector of Match structs containing file path, line number, and content
260    pub fn search(&self, text: &str) -> Result<Vec<Match>> {
261        // Build the regex matcher with fixed string (literal) matching
262        let matcher = RegexMatcherBuilder::new()
263            .case_insensitive(!self.case_sensitive)
264            .word(self.word_match)
265            .fixed_strings(!self.is_regex) // Use fixed strings unless regex is enabled
266            .build(text)
267            .map_err(|e| SearchError::Generic(format!("Failed to build matcher: {}", e)))?;
268
269        // Build searcher with context lines (for reference, but we use manual context capture)
270        let _searcher = SearcherBuilder::new()
271            .before_context(self.context_lines)
272            .after_context(self.context_lines)
273            .line_number(true)
274            .build();
275
276        // CHANNEL CREATION: Create a channel for collecting matches from parallel threads
277        // Chapter 16.2: mpsc = "multiple producer, single consumer"
278        // tx (transmitter) can be cloned for each thread
279        // rx (receiver) stays in the main thread
280        let (tx, rx) = mpsc::channel();
281
282        // Build parallel walker with .gitignore support
283        // Build overrides if any globs are provided
284        let mut builder = WalkBuilder::new(&self.base_dir);
285        let mut walk_builder = builder
286            .git_ignore(self.respect_gitignore)
287            .git_global(self.respect_gitignore)
288            .git_exclude(self.respect_gitignore)
289            .hidden(false); // Don't skip hidden files by default
290
291        if !self.globs.is_empty() {
292            let mut override_builder = OverrideBuilder::new(&self.base_dir);
293            for glob in &self.globs {
294                if let Err(e) = override_builder.add(glob) {
295                    return Err(SearchError::Generic(format!(
296                        "Invalid glob pattern '{}': {}",
297                        glob, e
298                    )));
299                }
300            }
301            if let Ok(overrides) = override_builder.build() {
302                walk_builder = walk_builder.overrides(overrides);
303            }
304        }
305
306        walk_builder.build_parallel().run(|| {
307            // CLONING FOR THREADS: Each thread gets its own sender and matcher
308            // Chapter 16.2: Clone tx so each thread can send messages
309            // Chapter 13.1: These clones will be moved into the closure below
310            let tx = tx.clone();
311            let matcher = matcher.clone();
312            let context_lines = self.context_lines;
313
314            // MOVE CLOSURE: Transfer ownership of tx and matcher to this thread
315            // Chapter 13.1: The `move` keyword forces the closure to take ownership
316            // Without `move`, the closure would try to borrow, which doesn't work across threads
317            Box::new(move |entry| {
318                use ignore::WalkState;
319
320                let entry = match entry {
321                    Ok(e) => e,
322                    Err(_) => return WalkState::Continue,
323                };
324
325                // Skip directories
326                if entry.file_type().is_none_or(|ft| ft.is_dir()) {
327                    return WalkState::Continue;
328                }
329
330                let path = entry.path();
331                let path_buf = path.to_path_buf();
332
333                // THREAD-LOCAL ACCUMULATOR: Each thread collects its own matches
334                // This avoids contention - no need for Mutex or Arc
335                let mut file_matches = Vec::new();
336
337                // Use grep-searcher to search the file with context
338                let mut searcher = SearcherBuilder::new()
339                    .before_context(context_lines)
340                    .after_context(context_lines)
341                    .line_number(true)
342                    .build();
343
344                let result = searcher.search_path(
345                    &matcher,
346                    path,
347                    UTF8(|line_num, line_content| {
348                        // line_content is already a &str from UTF8 sink
349                        let line_str = line_content;
350
351                        // For now, we'll collect all matches and handle context parsing later
352                        // The grep library provides context in the output, but we need to parse it
353                        file_matches.push(Match {
354                            file: path_buf.clone(),
355                            line: line_num as usize,
356                            content: line_str.trim_end().to_string(),
357                            context_before: Vec::new(), // Will be populated by post-processing
358                            context_after: Vec::new(),  // Will be populated by post-processing
359                        });
360
361                        Ok(true) // Continue searching
362                    }),
363                );
364
365                // SEND THROUGH CHANNEL: Send matches to main thread
366                // Chapter 16.2: tx.send() transfers ownership of file_matches
367                // The `let _ =` ignores send errors (receiver might be dropped)
368                if result.is_ok() && !file_matches.is_empty() {
369                    let _ = tx.send(file_matches);
370                }
371
372                WalkState::Continue
373            })
374        });
375
376        // CRITICAL: Drop the original sender so rx.iter() will terminate
377        // Chapter 16.2: The receiver's iterator only ends when ALL senders are dropped
378        // We cloned tx for each thread, but we still have the original here
379        // Without this drop, rx would wait forever!
380        drop(tx);
381
382        // COLLECT RESULTS: Receive all matches from worker threads
383        // Chapter 16.2: The for loop iterates until all senders are dropped
384        // This blocks until all threads finish and send their results
385        let mut all_matches = Vec::new();
386        for file_matches in rx {
387            all_matches.extend(file_matches);
388        }
389
390        // Post-process to add context lines using a second pass
391        self.add_context_to_matches(&mut all_matches, &matcher)?;
392
393        Ok(all_matches)
394    }
395
396    /// Add context lines to matches by re-reading files
397    fn add_context_to_matches(&self, matches: &mut [Match], _matcher: &impl Matcher) -> Result<()> {
398        use std::collections::HashMap;
399
400        // Group matches by file to minimize file reads
401        let mut matches_by_file: HashMap<PathBuf, Vec<usize>> = HashMap::new();
402        for (idx, m) in matches.iter().enumerate() {
403            matches_by_file.entry(m.file.clone()).or_default().push(idx);
404        }
405
406        // Process each file
407        for (file_path, match_indices) in matches_by_file {
408            if let Ok(content) = std::fs::read_to_string(&file_path) {
409                let lines: Vec<&str> = content.lines().collect();
410
411                for &match_idx in &match_indices {
412                    let match_ref = &mut matches[match_idx];
413                    let line_idx = match_ref.line.saturating_sub(1); // Convert to 0-indexed
414
415                    if line_idx < lines.len() {
416                        // Capture context lines
417                        let context_start = line_idx.saturating_sub(self.context_lines);
418                        let context_end =
419                            std::cmp::min(line_idx + self.context_lines + 1, lines.len());
420
421                        match_ref.context_before = lines[context_start..line_idx]
422                            .iter()
423                            .map(|s| s.to_string())
424                            .collect();
425
426                        match_ref.context_after = lines[line_idx + 1..context_end]
427                            .iter()
428                            .map(|s| s.to_string())
429                            .collect();
430                    }
431                }
432            }
433        }
434
435        Ok(())
436    }
437}
438
439impl Default for TextSearcher {
440    fn default() -> Self {
441        Self::new(std::env::current_dir().unwrap_or_else(|_| PathBuf::from(".")))
442    }
443}
444
445#[cfg(test)]
446mod tests {
447    use super::*;
448    use std::fs;
449    use tempfile::TempDir;
450
451    #[test]
452    fn test_basic_search() {
453        let temp_dir = TempDir::new().unwrap();
454        fs::write(
455            temp_dir.path().join("test.txt"),
456            "hello world\nfoo bar\nhello again",
457        )
458        .unwrap();
459
460        let searcher = TextSearcher::new(temp_dir.path().to_path_buf());
461        let matches = searcher.search("hello").unwrap();
462
463        assert_eq!(matches.len(), 2);
464        assert_eq!(matches[0].line, 1);
465        assert_eq!(matches[0].content, "hello world");
466        assert_eq!(matches[1].line, 3);
467        assert_eq!(matches[1].content, "hello again");
468    }
469
470    #[test]
471    fn test_case_insensitive_default() {
472        let temp_dir = TempDir::new().unwrap();
473        fs::write(
474            temp_dir.path().join("test.txt"),
475            "Hello World\nHELLO\nhello",
476        )
477        .unwrap();
478
479        let searcher = TextSearcher::new(temp_dir.path().to_path_buf());
480        let matches = searcher.search("hello").unwrap();
481
482        assert_eq!(matches.len(), 3); // Should match all variations
483    }
484
485    #[test]
486    fn test_case_sensitive() {
487        let temp_dir = TempDir::new().unwrap();
488        fs::write(
489            temp_dir.path().join("test.txt"),
490            "Hello World\nHELLO\nhello",
491        )
492        .unwrap();
493
494        let searcher = TextSearcher::new(temp_dir.path().to_path_buf()).case_sensitive(true);
495        let matches = searcher.search("hello").unwrap();
496
497        assert_eq!(matches.len(), 1); // Should only match exact case
498        assert_eq!(matches[0].content, "hello");
499    }
500
501    #[test]
502    fn test_no_matches() {
503        let temp_dir = TempDir::new().unwrap();
504        fs::write(temp_dir.path().join("test.txt"), "foo bar baz").unwrap();
505
506        let searcher = TextSearcher::new(temp_dir.path().to_path_buf());
507        let matches = searcher.search("notfound").unwrap();
508
509        assert_eq!(matches.len(), 0);
510    }
511
512    #[test]
513    fn test_multiple_files() {
514        let temp_dir = TempDir::new().unwrap();
515        fs::write(temp_dir.path().join("file1.txt"), "target line 1").unwrap();
516        fs::write(temp_dir.path().join("file2.txt"), "target line 2").unwrap();
517        fs::write(temp_dir.path().join("file3.txt"), "other content").unwrap();
518
519        let searcher = TextSearcher::new(temp_dir.path().to_path_buf());
520        let matches = searcher.search("target").unwrap();
521
522        assert_eq!(matches.len(), 2);
523    }
524
525    #[test]
526    fn test_gitignore_respected() {
527        let temp_dir = TempDir::new().unwrap();
528
529        // Initialize git repository (required for .gitignore to work)
530        fs::create_dir(temp_dir.path().join(".git")).unwrap();
531
532        // Create .gitignore
533        fs::write(temp_dir.path().join(".gitignore"), "ignored.txt\n").unwrap();
534
535        // Create files
536        fs::write(temp_dir.path().join("ignored.txt"), "target content").unwrap();
537        fs::write(temp_dir.path().join("tracked.txt"), "target content").unwrap();
538
539        let searcher = TextSearcher::new(temp_dir.path().to_path_buf()).respect_gitignore(true);
540        let matches = searcher.search("target").unwrap();
541
542        // Should only find in tracked.txt
543        assert_eq!(matches.len(), 1);
544        assert!(matches[0].file.ends_with("tracked.txt"));
545    }
546
547    #[test]
548    fn test_gitignore_disabled() {
549        let temp_dir = TempDir::new().unwrap();
550
551        // Initialize git repository
552        fs::create_dir(temp_dir.path().join(".git")).unwrap();
553
554        // Create .gitignore
555        fs::write(temp_dir.path().join(".gitignore"), "ignored.txt\n").unwrap();
556
557        // Create files
558        fs::write(temp_dir.path().join("ignored.txt"), "target content").unwrap();
559        fs::write(temp_dir.path().join("tracked.txt"), "target content").unwrap();
560
561        let searcher = TextSearcher::new(temp_dir.path().to_path_buf()).respect_gitignore(false);
562        let matches = searcher.search("target").unwrap();
563
564        // Should find in both files
565        assert_eq!(matches.len(), 2);
566    }
567
568    #[test]
569    fn test_builder_pattern() {
570        let searcher = TextSearcher::new(std::env::current_dir().unwrap())
571            .case_sensitive(true)
572            .respect_gitignore(false);
573
574        assert!(searcher.case_sensitive);
575        assert!(!searcher.respect_gitignore);
576    }
577
578    #[test]
579    fn test_default() {
580        let searcher = TextSearcher::default();
581
582        assert!(!searcher.case_sensitive);
583        assert!(searcher.respect_gitignore);
584        assert_eq!(searcher.context_lines, 2);
585    }
586
587    #[test]
588    fn test_special_characters() {
589        let temp_dir = TempDir::new().unwrap();
590        fs::write(
591            temp_dir.path().join("test.txt"),
592            "price: $19.99\nurl: http://example.com",
593        )
594        .unwrap();
595
596        let searcher = TextSearcher::new(temp_dir.path().to_path_buf());
597
598        // Test with special regex characters (should be treated as literals)
599        let matches = searcher.search("$19.99").unwrap();
600        assert_eq!(matches.len(), 1);
601
602        let matches = searcher.search("http://").unwrap();
603        assert_eq!(matches.len(), 1);
604    }
605
606    #[test]
607    fn test_line_numbers_accurate() {
608        let temp_dir = TempDir::new().unwrap();
609        let content = "line 1\nline 2\ntarget line 3\nline 4\ntarget line 5\nline 6";
610        fs::write(temp_dir.path().join("test.txt"), content).unwrap();
611
612        let searcher = TextSearcher::new(temp_dir.path().to_path_buf());
613        let matches = searcher.search("target").unwrap();
614
615        assert_eq!(matches.len(), 2);
616        assert_eq!(matches[0].line, 3);
617        assert_eq!(matches[1].line, 5);
618    }
619}