Skip to main content

ix/
scanner.rs

1//! Fallback scanner (no index, competitive with ripgrep).
2//!
3//! Used when .ix index is missing or explicitly disabled.
4
5use crate::decompress::maybe_decompress;
6use crate::error::Result;
7use crate::format::is_binary;
8use crate::executor::{Match, QueryOptions};
9use ignore::WalkBuilder;
10use memmap2::Mmap;
11use rayon::prelude::*;
12use regex::Regex;
13use std::fs::File;
14use std::io::{BufRead, BufReader, Cursor, Read};
15use std::path::{Path, PathBuf};
16use std::sync::atomic::{AtomicU32, Ordering};
17
18pub struct Scanner {
19    root: PathBuf,
20}
21
22impl Scanner {
23    pub fn new(root: &Path) -> Self {
24        Self {
25            root: root.to_owned(),
26        }
27    }
28
29    pub fn scan(
30        &self,
31        pattern: &str,
32        is_regex: bool,
33        ignore_case: bool,
34        options: &QueryOptions,
35    ) -> Result<Vec<Match>> {
36        let raw = if is_regex {
37            pattern.to_string()
38        } else {
39            regex::escape(pattern)
40        };
41        let regex_pat = if ignore_case { format!("(?i){raw}") } else { raw };
42        let regex = Regex::new(&regex_pat)?;
43
44        let walker = WalkBuilder::new(&self.root)
45            .hidden(false)
46            .git_ignore(true)
47            .build();
48
49        let paths: Vec<PathBuf> = walker
50            .filter_map(|result| result.ok())
51            .filter(|entry| entry.file_type().map(|t| t.is_file()).unwrap_or(false))
52            .map(|entry| entry.path().to_owned())
53            .collect();
54
55        let matches_found = AtomicU32::new(0);
56        let mut matches: Vec<Match> = paths
57            .into_par_iter()
58            .filter_map(|path| {
59                if options.max_results > 0
60                    && matches_found.load(Ordering::Relaxed) >= options.max_results as u32
61                {
62                    return None;
63                }
64
65                // Filter by extension
66                if !options.type_filter.is_empty() {
67                    let ext = path.extension().and_then(|e| e.to_str()).unwrap_or("");
68                    if !options.type_filter.iter().any(|e: &String| e == ext) {
69                        return None;
70                    }
71                }
72
73                // Archive support
74                if options.archive {
75                    let _ext = path.extension().and_then(|e| e.to_str()).unwrap_or("");
76                    let _is_tar_gz = path.to_str().map(|s| s.ends_with(".tar.gz")).unwrap_or(false);
77
78                    #[cfg(feature = "archive")]
79                    {
80                        if _ext == "zip"
81                            && let Ok(archive_matches) = crate::archive::scan_zip(&path, &regex, options)
82                        {
83                            matches_found.fetch_add(archive_matches.len() as u32, Ordering::Relaxed);
84                            return Some(archive_matches);
85                        } else if _is_tar_gz
86                            && let Ok(archive_matches) = crate::archive::scan_tar_gz(&path, &regex, options)
87                        {
88                            matches_found.fetch_add(archive_matches.len() as u32, Ordering::Relaxed);
89                            return Some(archive_matches);
90                        }
91                    }
92                }
93
94                let file_matches = self.scan_file(&path, &regex, options).ok()?;
95                matches_found.fetch_add(file_matches.len() as u32, Ordering::Relaxed);
96                Some(file_matches)
97            })
98            .flatten()
99            .collect();
100
101        if options.max_results > 0 && matches.len() > options.max_results {
102            matches.truncate(options.max_results);
103        }
104
105        Ok(matches)
106    }
107
108    fn scan_stream<R: Read>(
109        &self,
110        reader: R,
111        path: &Path,
112        regex: &Regex,
113        options: &QueryOptions,
114    ) -> Result<Vec<Match>> {
115        let mut buf_reader = BufReader::new(reader);
116        let mut matches = Vec::new();
117        let mut line_number = 0u32;
118        let mut byte_offset = 0u64;
119
120        // Binary check on first 8KB
121        {
122            let buffer = buf_reader.fill_buf()?;
123            if buffer.is_empty() {
124                return Ok(vec![]);
125            }
126            let is_bin = is_binary(buffer);
127            if is_bin && !options.binary {
128                return Ok(vec![]);
129            }
130        }
131
132        let mut line = String::new();
133        let mut context_before = std::collections::VecDeque::new();
134        let mut pending_matches: Vec<Match> = Vec::new();
135
136        while buf_reader.read_line(&mut line)? > 0 {
137            line_number += 1;
138            let line_len = line.len() as u64;
139            let trimmed_line = line.trim_end().to_string();
140
141            // Fill context_after for pending matches
142            for m in &mut pending_matches {
143                if m.context_after.len() < options.context_lines {
144                    m.context_after.push(trimmed_line.clone());
145                }
146            }
147
148            // Move completed matches to final list
149            let (completed, still_pending): (Vec<_>, Vec<_>) = pending_matches
150                .into_iter()
151                .partition(|m| m.context_after.len() >= options.context_lines);
152            matches.extend(completed);
153            pending_matches = still_pending;
154
155            if let Some(m) = regex.find(&line) {
156                let context_before_vec: Vec<String> =
157                    context_before.iter().map(|s: &String| s.trim_end().to_string()).collect();
158
159                let new_match = Match {
160                    file_path: path.to_owned(),
161                    line_number,
162                    col: (m.start() + 1) as u32,
163                    line_content: if options.count_only {
164                        String::new()
165                    } else {
166                        trimmed_line.clone()
167                    },
168                    byte_offset: byte_offset + m.start() as u64,
169                    context_before: context_before_vec,
170                    context_after: vec![],
171                    is_binary: false,
172                };
173
174                if options.context_lines > 0 {
175                    pending_matches.push(new_match);
176                } else {
177                    matches.push(new_match);
178                }
179
180                if options.max_results > 0
181                    && (matches.len() + pending_matches.len()) >= options.max_results
182                    && (pending_matches.is_empty() || matches.len() >= options.max_results)
183                {
184                    break;
185                }
186            }
187
188            if options.context_lines > 0 {
189                context_before.push_back(line.clone());
190                if context_before.len() > options.context_lines {
191                    context_before.pop_front();
192                }
193            }
194
195            byte_offset += line_len;
196            line.clear();
197        }
198
199        matches.extend(pending_matches);
200        Ok(matches)
201    }
202
203    fn scan_file(
204        &self,
205        path: &Path,
206        regex: &Regex,
207        options: &QueryOptions,
208    ) -> Result<Vec<Match>> {
209        let file = File::open(path)?;
210        let metadata = file.metadata()?;
211        if metadata.len() > 100 * 1024 * 1024 && !options.decompress {
212            // Keep 100MB limit for raw files to avoid huge mmaps in parallel
213            return Ok(vec![]);
214        }
215
216        let mmap = unsafe { Mmap::map(&file)? };
217
218        if options.decompress
219            && let Some(reader) = maybe_decompress(path, &mmap)? {
220            return self.scan_stream(reader, path, regex, options);
221        }
222
223        // Default to streaming via Cursor for uncompressed files to ensure constant memory (R-02)
224        self.scan_stream(Cursor::new(&mmap[..]), path, regex, options)
225    }
226}