grep_searcher/sink.rs
1use std::io;
2
3use grep_matcher::LineTerminator;
4
5use crate::{
6 lines::LineIter,
7 searcher::{ConfigError, Searcher},
8};
9
10/// A trait that describes errors that can be reported by searchers and
11/// implementations of `Sink`.
12///
13/// Unless you have a specialized use case, you probably don't need to
14/// implement this trait explicitly. It's likely that using `std::io::Error`
15/// (which implements this trait) for your error type is good enough,
16/// largely because most errors that occur during search will likely be an
17/// `std::io::Error`.
18pub trait SinkError: Sized {
19 /// A constructor for converting any value that satisfies the
20 /// `std::fmt::Display` trait into an error.
21 fn error_message<T: std::fmt::Display>(message: T) -> Self;
22
23 /// A constructor for converting I/O errors that occur while searching into
24 /// an error of this type.
25 ///
26 /// By default, this is implemented via the `error_message` constructor.
27 fn error_io(err: io::Error) -> Self {
28 Self::error_message(err)
29 }
30
31 /// A constructor for converting configuration errors that occur while
32 /// building a searcher into an error of this type.
33 ///
34 /// By default, this is implemented via the `error_message` constructor.
35 fn error_config(err: ConfigError) -> Self {
36 Self::error_message(err)
37 }
38}
39
40/// An `std::io::Error` can be used as an error for `Sink` implementations out
41/// of the box.
42impl SinkError for io::Error {
43 fn error_message<T: std::fmt::Display>(message: T) -> io::Error {
44 io::Error::new(io::ErrorKind::Other, message.to_string())
45 }
46
47 fn error_io(err: io::Error) -> io::Error {
48 err
49 }
50}
51
52/// A `Box<dyn std::error::Error>` can be used as an error for `Sink`
53/// implementations out of the box.
54impl SinkError for Box<dyn std::error::Error> {
55 fn error_message<T: std::fmt::Display>(
56 message: T,
57 ) -> Box<dyn std::error::Error> {
58 Box::<dyn std::error::Error>::from(message.to_string())
59 }
60}
61
62/// A trait that defines how results from searchers are handled.
63///
64/// In this crate, a searcher follows the "push" model. What that means is that
65/// the searcher drives execution, and pushes results back to the caller. This
66/// is in contrast to a "pull" model where the caller drives execution and
67/// takes results as they need them. These are also known as "internal" and
68/// "external" iteration strategies, respectively.
69///
70/// For a variety of reasons, including the complexity of the searcher
71/// implementation, this crate chooses the "push" or "internal" model of
72/// execution. Thus, in order to act on search results, callers must provide
73/// an implementation of this trait to a searcher, and the searcher is then
74/// responsible for calling the methods on this trait.
75///
76/// This trait defines several behaviors:
77///
78/// * What to do when a match is found. Callers must provide this.
79/// * What to do when an error occurs. Callers must provide this via the
80/// [`SinkError`] trait. Generally, callers can just use `std::io::Error` for
81/// this, which already implements `SinkError`.
82/// * What to do when a contextual line is found. By default, these are
83/// ignored.
84/// * What to do when a gap between contextual lines has been found. By
85/// default, this is ignored.
86/// * What to do when a search has started. By default, this does nothing.
87/// * What to do when a search has finished successfully. By default, this does
88/// nothing.
89///
90/// Callers must, at minimum, specify the behavior when an error occurs and
91/// the behavior when a match occurs. The rest is optional. For each behavior,
92/// callers may report an error (say, if writing the result to another
93/// location failed) or simply return `false` if they want the search to stop
94/// (e.g., when implementing a cap on the number of search results to show).
95///
96/// When errors are reported (whether in the searcher or in the implementation
97/// of `Sink`), then searchers quit immediately without calling `finish`.
98///
99/// For simpler uses of `Sink`, callers may elect to use one of
100/// the more convenient but less flexible implementations in the
101/// [`sinks`] module.
102pub trait Sink {
103 /// The type of an error that should be reported by a searcher.
104 ///
105 /// Errors of this type are not only returned by the methods on this
106 /// trait, but the constructors defined in `SinkError` are also used in
107 /// the searcher implementation itself. e.g., When a I/O error occurs when
108 /// reading data from a file.
109 type Error: SinkError;
110
111 /// This method is called whenever a match is found.
112 ///
113 /// If multi line is enabled on the searcher, then the match reported here
114 /// may span multiple lines and it may include multiple matches. When multi
115 /// line is disabled, then the match is guaranteed to span exactly one
116 /// non-empty line (where a single line is, at minimum, a line terminator).
117 ///
118 /// If this returns `true`, then searching continues. If this returns
119 /// `false`, then searching is stopped immediately and `finish` is called.
120 ///
121 /// If this returns an error, then searching is stopped immediately,
122 /// `finish` is not called and the error is bubbled back up to the caller
123 /// of the searcher.
124 fn matched(
125 &mut self,
126 _searcher: &Searcher,
127 _mat: &SinkMatch<'_>,
128 ) -> Result<bool, Self::Error>;
129
130 /// This method is called whenever a context line is found, and is optional
131 /// to implement. By default, it does nothing and returns `true`.
132 ///
133 /// In all cases, the context given is guaranteed to span exactly one
134 /// non-empty line (where a single line is, at minimum, a line terminator).
135 ///
136 /// If this returns `true`, then searching continues. If this returns
137 /// `false`, then searching is stopped immediately and `finish` is called.
138 ///
139 /// If this returns an error, then searching is stopped immediately,
140 /// `finish` is not called and the error is bubbled back up to the caller
141 /// of the searcher.
142 #[inline]
143 fn context(
144 &mut self,
145 _searcher: &Searcher,
146 _context: &SinkContext<'_>,
147 ) -> Result<bool, Self::Error> {
148 Ok(true)
149 }
150
151 /// This method is called whenever a break in contextual lines is found,
152 /// and is optional to implement. By default, it does nothing and returns
153 /// `true`.
154 ///
155 /// A break can only occur when context reporting is enabled (that is,
156 /// either or both of `before_context` or `after_context` are greater than
157 /// `0`). More precisely, a break occurs between non-contiguous groups of
158 /// lines.
159 ///
160 /// If this returns `true`, then searching continues. If this returns
161 /// `false`, then searching is stopped immediately and `finish` is called.
162 ///
163 /// If this returns an error, then searching is stopped immediately,
164 /// `finish` is not called and the error is bubbled back up to the caller
165 /// of the searcher.
166 #[inline]
167 fn context_break(
168 &mut self,
169 _searcher: &Searcher,
170 ) -> Result<bool, Self::Error> {
171 Ok(true)
172 }
173
174 /// This method is called whenever binary detection is enabled and binary
175 /// data is found. If binary data is found, then this is called at least
176 /// once for the first occurrence with the absolute byte offset at which
177 /// the binary data begins.
178 ///
179 /// If this returns `true`, then searching continues. If this returns
180 /// `false`, then searching is stopped immediately and `finish` is called.
181 ///
182 /// If this returns an error, then searching is stopped immediately,
183 /// `finish` is not called and the error is bubbled back up to the caller
184 /// of the searcher.
185 ///
186 /// By default, it does nothing and returns `true`.
187 #[inline]
188 fn binary_data(
189 &mut self,
190 _searcher: &Searcher,
191 _binary_byte_offset: u64,
192 ) -> Result<bool, Self::Error> {
193 Ok(true)
194 }
195
196 /// This method is called when a search has begun, before any search is
197 /// executed. By default, this does nothing.
198 ///
199 /// If this returns `true`, then searching continues. If this returns
200 /// `false`, then searching is stopped immediately and `finish` is called.
201 ///
202 /// If this returns an error, then searching is stopped immediately,
203 /// `finish` is not called and the error is bubbled back up to the caller
204 /// of the searcher.
205 #[inline]
206 fn begin(&mut self, _searcher: &Searcher) -> Result<bool, Self::Error> {
207 Ok(true)
208 }
209
210 /// This method is called when a search has completed. By default, this
211 /// does nothing.
212 ///
213 /// If this returns an error, the error is bubbled back up to the caller of
214 /// the searcher.
215 #[inline]
216 fn finish(
217 &mut self,
218 _searcher: &Searcher,
219 _: &SinkFinish,
220 ) -> Result<(), Self::Error> {
221 Ok(())
222 }
223}
224
225impl<'a, S: Sink> Sink for &'a mut S {
226 type Error = S::Error;
227
228 #[inline]
229 fn matched(
230 &mut self,
231 searcher: &Searcher,
232 mat: &SinkMatch<'_>,
233 ) -> Result<bool, S::Error> {
234 (**self).matched(searcher, mat)
235 }
236
237 #[inline]
238 fn context(
239 &mut self,
240 searcher: &Searcher,
241 context: &SinkContext<'_>,
242 ) -> Result<bool, S::Error> {
243 (**self).context(searcher, context)
244 }
245
246 #[inline]
247 fn context_break(
248 &mut self,
249 searcher: &Searcher,
250 ) -> Result<bool, S::Error> {
251 (**self).context_break(searcher)
252 }
253
254 #[inline]
255 fn binary_data(
256 &mut self,
257 searcher: &Searcher,
258 binary_byte_offset: u64,
259 ) -> Result<bool, S::Error> {
260 (**self).binary_data(searcher, binary_byte_offset)
261 }
262
263 #[inline]
264 fn begin(&mut self, searcher: &Searcher) -> Result<bool, S::Error> {
265 (**self).begin(searcher)
266 }
267
268 #[inline]
269 fn finish(
270 &mut self,
271 searcher: &Searcher,
272 sink_finish: &SinkFinish,
273 ) -> Result<(), S::Error> {
274 (**self).finish(searcher, sink_finish)
275 }
276}
277
278impl<S: Sink + ?Sized> Sink for Box<S> {
279 type Error = S::Error;
280
281 #[inline]
282 fn matched(
283 &mut self,
284 searcher: &Searcher,
285 mat: &SinkMatch<'_>,
286 ) -> Result<bool, S::Error> {
287 (**self).matched(searcher, mat)
288 }
289
290 #[inline]
291 fn context(
292 &mut self,
293 searcher: &Searcher,
294 context: &SinkContext<'_>,
295 ) -> Result<bool, S::Error> {
296 (**self).context(searcher, context)
297 }
298
299 #[inline]
300 fn context_break(
301 &mut self,
302 searcher: &Searcher,
303 ) -> Result<bool, S::Error> {
304 (**self).context_break(searcher)
305 }
306
307 #[inline]
308 fn binary_data(
309 &mut self,
310 searcher: &Searcher,
311 binary_byte_offset: u64,
312 ) -> Result<bool, S::Error> {
313 (**self).binary_data(searcher, binary_byte_offset)
314 }
315
316 #[inline]
317 fn begin(&mut self, searcher: &Searcher) -> Result<bool, S::Error> {
318 (**self).begin(searcher)
319 }
320
321 #[inline]
322 fn finish(
323 &mut self,
324 searcher: &Searcher,
325 sink_finish: &SinkFinish,
326 ) -> Result<(), S::Error> {
327 (**self).finish(searcher, sink_finish)
328 }
329}
330
331/// Summary data reported at the end of a search.
332///
333/// This reports data such as the total number of bytes searched and the
334/// absolute offset of the first occurrence of binary data, if any were found.
335///
336/// A searcher that stops early because of an error does not call `finish`.
337/// A searcher that stops early because the `Sink` implementor instructed it
338/// to will still call `finish`.
339#[derive(Clone, Debug)]
340pub struct SinkFinish {
341 pub(crate) byte_count: u64,
342 pub(crate) binary_byte_offset: Option<u64>,
343}
344
345impl SinkFinish {
346 /// Return the total number of bytes searched.
347 #[inline]
348 pub fn byte_count(&self) -> u64 {
349 self.byte_count
350 }
351
352 /// If binary detection is enabled and if binary data was found, then this
353 /// returns the absolute byte offset of the first detected byte of binary
354 /// data.
355 ///
356 /// Note that since this is an absolute byte offset, it cannot be relied
357 /// upon to index into any addressable memory.
358 #[inline]
359 pub fn binary_byte_offset(&self) -> Option<u64> {
360 self.binary_byte_offset
361 }
362}
363
364/// A type that describes a match reported by a searcher.
365#[derive(Clone, Debug)]
366pub struct SinkMatch<'b> {
367 pub(crate) line_term: LineTerminator,
368 pub(crate) bytes: &'b [u8],
369 pub(crate) absolute_byte_offset: u64,
370 pub(crate) line_number: Option<u64>,
371 pub(crate) buffer: &'b [u8],
372 pub(crate) bytes_range_in_buffer: std::ops::Range<usize>,
373}
374
375impl<'b> SinkMatch<'b> {
376 /// Returns the bytes for all matching lines, including the line
377 /// terminators, if they exist.
378 #[inline]
379 pub fn bytes(&self) -> &'b [u8] {
380 self.bytes
381 }
382
383 /// Return an iterator over the lines in this match.
384 ///
385 /// If multi line search is enabled, then this may yield more than one
386 /// line (but always at least one line). If multi line search is disabled,
387 /// then this always reports exactly one line (but may consist of just
388 /// the line terminator).
389 ///
390 /// Lines yielded by this iterator include their terminators.
391 #[inline]
392 pub fn lines(&self) -> LineIter<'b> {
393 LineIter::new(self.line_term.as_byte(), self.bytes)
394 }
395
396 /// Returns the absolute byte offset of the start of this match. This
397 /// offset is absolute in that it is relative to the very beginning of the
398 /// input in a search, and can never be relied upon to be a valid index
399 /// into an in-memory slice.
400 #[inline]
401 pub fn absolute_byte_offset(&self) -> u64 {
402 self.absolute_byte_offset
403 }
404
405 /// Returns the line number of the first line in this match, if available.
406 ///
407 /// Line numbers are only available when the search builder is instructed
408 /// to compute them.
409 #[inline]
410 pub fn line_number(&self) -> Option<u64> {
411 self.line_number
412 }
413
414 /// Exposes as much of the underlying buffer that was search as possible.
415 #[inline]
416 pub fn buffer(&self) -> &'b [u8] {
417 self.buffer
418 }
419
420 /// Returns a range that corresponds to where [`SinkMatch::bytes`] appears
421 /// in [`SinkMatch::buffer`].
422 #[inline]
423 pub fn bytes_range_in_buffer(&self) -> std::ops::Range<usize> {
424 self.bytes_range_in_buffer.clone()
425 }
426}
427
428/// The type of context reported by a searcher.
429#[derive(Clone, Debug, Eq, PartialEq)]
430pub enum SinkContextKind {
431 /// The line reported occurred before a match.
432 Before,
433 /// The line reported occurred after a match.
434 After,
435 /// Any other type of context reported, e.g., as a result of a searcher's
436 /// "passthru" mode.
437 Other,
438}
439
440/// A type that describes a contextual line reported by a searcher.
441#[derive(Clone, Debug)]
442pub struct SinkContext<'b> {
443 #[cfg(test)]
444 pub(crate) line_term: LineTerminator,
445 pub(crate) bytes: &'b [u8],
446 pub(crate) kind: SinkContextKind,
447 pub(crate) absolute_byte_offset: u64,
448 pub(crate) line_number: Option<u64>,
449}
450
451impl<'b> SinkContext<'b> {
452 /// Returns the context bytes, including line terminators.
453 #[inline]
454 pub fn bytes(&self) -> &'b [u8] {
455 self.bytes
456 }
457
458 /// Returns the type of context.
459 #[inline]
460 pub fn kind(&self) -> &SinkContextKind {
461 &self.kind
462 }
463
464 /// Return an iterator over the lines in this match.
465 ///
466 /// This always yields exactly one line (and that one line may contain just
467 /// the line terminator).
468 ///
469 /// Lines yielded by this iterator include their terminators.
470 #[cfg(test)]
471 pub(crate) fn lines(&self) -> LineIter<'b> {
472 LineIter::new(self.line_term.as_byte(), self.bytes)
473 }
474
475 /// Returns the absolute byte offset of the start of this context. This
476 /// offset is absolute in that it is relative to the very beginning of the
477 /// input in a search, and can never be relied upon to be a valid index
478 /// into an in-memory slice.
479 #[inline]
480 pub fn absolute_byte_offset(&self) -> u64 {
481 self.absolute_byte_offset
482 }
483
484 /// Returns the line number of the first line in this context, if
485 /// available.
486 ///
487 /// Line numbers are only available when the search builder is instructed
488 /// to compute them.
489 #[inline]
490 pub fn line_number(&self) -> Option<u64> {
491 self.line_number
492 }
493}
494
495/// A collection of convenience implementations of `Sink`.
496///
497/// Each implementation in this module makes some kind of sacrifice in the name
498/// of making common cases easier to use. Most frequently, each type is a
499/// wrapper around a closure specified by the caller that provides limited
500/// access to the full suite of information available to implementors of
501/// `Sink`.
502///
503/// For example, the `UTF8` sink makes the following sacrifices:
504///
505/// * All matches must be UTF-8. An arbitrary `Sink` does not have this
506/// restriction and can deal with arbitrary data. If this sink sees invalid
507/// UTF-8, then an error is returned and searching stops. (Use the `Lossy`
508/// sink instead to suppress this error.)
509/// * The searcher must be configured to report line numbers. If it isn't,
510/// an error is reported at the first match and searching stops.
511/// * Context lines, context breaks and summary data reported at the end of
512/// a search are all ignored.
513/// * Implementors are forced to use `std::io::Error` as their error type.
514///
515/// If you need more flexibility, then you're advised to implement the `Sink`
516/// trait directly.
517pub mod sinks {
518 use std::io;
519
520 use crate::searcher::Searcher;
521
522 use super::{Sink, SinkError, SinkMatch};
523
524 /// A sink that provides line numbers and matches as strings while ignoring
525 /// everything else.
526 ///
527 /// This implementation will return an error if a match contains invalid
528 /// UTF-8 or if the searcher was not configured to count lines. Errors
529 /// on invalid UTF-8 can be suppressed by using the `Lossy` sink instead
530 /// of this one.
531 ///
532 /// The closure accepts two parameters: a line number and a UTF-8 string
533 /// containing the matched data. The closure returns a
534 /// `Result<bool, std::io::Error>`. If the `bool` is `false`, then the
535 /// search stops immediately. Otherwise, searching continues.
536 ///
537 /// If multi line mode was enabled, the line number refers to the line
538 /// number of the first line in the match.
539 #[derive(Clone, Debug)]
540 pub struct UTF8<F>(pub F)
541 where
542 F: FnMut(u64, &str) -> Result<bool, io::Error>;
543
544 impl<F> Sink for UTF8<F>
545 where
546 F: FnMut(u64, &str) -> Result<bool, io::Error>,
547 {
548 type Error = io::Error;
549
550 fn matched(
551 &mut self,
552 _searcher: &Searcher,
553 mat: &SinkMatch<'_>,
554 ) -> Result<bool, io::Error> {
555 let matched = match std::str::from_utf8(mat.bytes()) {
556 Ok(matched) => matched,
557 Err(err) => return Err(io::Error::error_message(err)),
558 };
559 let line_number = match mat.line_number() {
560 Some(line_number) => line_number,
561 None => {
562 let msg = "line numbers not enabled";
563 return Err(io::Error::error_message(msg));
564 }
565 };
566 (self.0)(line_number, &matched)
567 }
568 }
569
570 /// A sink that provides line numbers and matches as (lossily converted)
571 /// strings while ignoring everything else.
572 ///
573 /// This is like `UTF8`, except that if a match contains invalid UTF-8,
574 /// then it will be lossily converted to valid UTF-8 by substituting
575 /// invalid UTF-8 with Unicode replacement characters.
576 ///
577 /// This implementation will return an error on the first match if the
578 /// searcher was not configured to count lines.
579 ///
580 /// The closure accepts two parameters: a line number and a UTF-8 string
581 /// containing the matched data. The closure returns a
582 /// `Result<bool, std::io::Error>`. If the `bool` is `false`, then the
583 /// search stops immediately. Otherwise, searching continues.
584 ///
585 /// If multi line mode was enabled, the line number refers to the line
586 /// number of the first line in the match.
587 #[derive(Clone, Debug)]
588 pub struct Lossy<F>(pub F)
589 where
590 F: FnMut(u64, &str) -> Result<bool, io::Error>;
591
592 impl<F> Sink for Lossy<F>
593 where
594 F: FnMut(u64, &str) -> Result<bool, io::Error>,
595 {
596 type Error = io::Error;
597
598 fn matched(
599 &mut self,
600 _searcher: &Searcher,
601 mat: &SinkMatch<'_>,
602 ) -> Result<bool, io::Error> {
603 use std::borrow::Cow;
604
605 let matched = match std::str::from_utf8(mat.bytes()) {
606 Ok(matched) => Cow::Borrowed(matched),
607 // TODO: In theory, it should be possible to amortize
608 // allocation here, but `std` doesn't provide such an API.
609 // Regardless, this only happens on matches with invalid UTF-8,
610 // which should be pretty rare.
611 Err(_) => String::from_utf8_lossy(mat.bytes()),
612 };
613 let line_number = match mat.line_number() {
614 Some(line_number) => line_number,
615 None => {
616 let msg = "line numbers not enabled";
617 return Err(io::Error::error_message(msg));
618 }
619 };
620 (self.0)(line_number, &matched)
621 }
622 }
623
624 /// A sink that provides line numbers and matches as raw bytes while
625 /// ignoring everything else.
626 ///
627 /// This implementation will return an error on the first match if the
628 /// searcher was not configured to count lines.
629 ///
630 /// The closure accepts two parameters: a line number and a raw byte string
631 /// containing the matched data. The closure returns a
632 /// `Result<bool, std::io::Error>`. If the `bool` is `false`, then the
633 /// search stops immediately. Otherwise, searching continues.
634 ///
635 /// If multi line mode was enabled, the line number refers to the line
636 /// number of the first line in the match.
637 #[derive(Clone, Debug)]
638 pub struct Bytes<F>(pub F)
639 where
640 F: FnMut(u64, &[u8]) -> Result<bool, io::Error>;
641
642 impl<F> Sink for Bytes<F>
643 where
644 F: FnMut(u64, &[u8]) -> Result<bool, io::Error>,
645 {
646 type Error = io::Error;
647
648 fn matched(
649 &mut self,
650 _searcher: &Searcher,
651 mat: &SinkMatch<'_>,
652 ) -> Result<bool, io::Error> {
653 let line_number = match mat.line_number() {
654 Some(line_number) => line_number,
655 None => {
656 let msg = "line numbers not enabled";
657 return Err(io::Error::error_message(msg));
658 }
659 };
660 (self.0)(line_number, mat.bytes())
661 }
662 }
663}