grep_searcher/
sink.rs

1use std::io;
2
3use grep_matcher::LineTerminator;
4
5use crate::{
6    lines::LineIter,
7    searcher::{ConfigError, Searcher},
8};
9
10/// A trait that describes errors that can be reported by searchers and
11/// implementations of `Sink`.
12///
13/// Unless you have a specialized use case, you probably don't need to
14/// implement this trait explicitly. It's likely that using `std::io::Error`
15/// (which implements this trait) for your error type is good enough,
16/// largely because most errors that occur during search will likely be an
17/// `std::io::Error`.
18pub trait SinkError: Sized {
19    /// A constructor for converting any value that satisfies the
20    /// `std::fmt::Display` trait into an error.
21    fn error_message<T: std::fmt::Display>(message: T) -> Self;
22
23    /// A constructor for converting I/O errors that occur while searching into
24    /// an error of this type.
25    ///
26    /// By default, this is implemented via the `error_message` constructor.
27    fn error_io(err: io::Error) -> Self {
28        Self::error_message(err)
29    }
30
31    /// A constructor for converting configuration errors that occur while
32    /// building a searcher into an error of this type.
33    ///
34    /// By default, this is implemented via the `error_message` constructor.
35    fn error_config(err: ConfigError) -> Self {
36        Self::error_message(err)
37    }
38}
39
40/// An `std::io::Error` can be used as an error for `Sink` implementations out
41/// of the box.
42impl SinkError for io::Error {
43    fn error_message<T: std::fmt::Display>(message: T) -> io::Error {
44        io::Error::new(io::ErrorKind::Other, message.to_string())
45    }
46
47    fn error_io(err: io::Error) -> io::Error {
48        err
49    }
50}
51
52/// A `Box<dyn std::error::Error>` can be used as an error for `Sink`
53/// implementations out of the box.
54impl SinkError for Box<dyn std::error::Error> {
55    fn error_message<T: std::fmt::Display>(
56        message: T,
57    ) -> Box<dyn std::error::Error> {
58        Box::<dyn std::error::Error>::from(message.to_string())
59    }
60}
61
62/// A trait that defines how results from searchers are handled.
63///
64/// In this crate, a searcher follows the "push" model. What that means is that
65/// the searcher drives execution, and pushes results back to the caller. This
66/// is in contrast to a "pull" model where the caller drives execution and
67/// takes results as they need them. These are also known as "internal" and
68/// "external" iteration strategies, respectively.
69///
70/// For a variety of reasons, including the complexity of the searcher
71/// implementation, this crate chooses the "push" or "internal" model of
72/// execution. Thus, in order to act on search results, callers must provide
73/// an implementation of this trait to a searcher, and the searcher is then
74/// responsible for calling the methods on this trait.
75///
76/// This trait defines several behaviors:
77///
78/// * What to do when a match is found. Callers must provide this.
79/// * What to do when an error occurs. Callers must provide this via the
80///   [`SinkError`] trait. Generally, callers can just use `std::io::Error` for
81///   this, which already implements `SinkError`.
82/// * What to do when a contextual line is found. By default, these are
83///   ignored.
84/// * What to do when a gap between contextual lines has been found. By
85///   default, this is ignored.
86/// * What to do when a search has started. By default, this does nothing.
87/// * What to do when a search has finished successfully. By default, this does
88///   nothing.
89///
90/// Callers must, at minimum, specify the behavior when an error occurs and
91/// the behavior when a match occurs. The rest is optional. For each behavior,
92/// callers may report an error (say, if writing the result to another
93/// location failed) or simply return `false` if they want the search to stop
94/// (e.g., when implementing a cap on the number of search results to show).
95///
96/// When errors are reported (whether in the searcher or in the implementation
97/// of `Sink`), then searchers quit immediately without calling `finish`.
98///
99/// For simpler uses of `Sink`, callers may elect to use one of
100/// the more convenient but less flexible implementations in the
101/// [`sinks`] module.
102pub trait Sink {
103    /// The type of an error that should be reported by a searcher.
104    ///
105    /// Errors of this type are not only returned by the methods on this
106    /// trait, but the constructors defined in `SinkError` are also used in
107    /// the searcher implementation itself. e.g., When a I/O error occurs when
108    /// reading data from a file.
109    type Error: SinkError;
110
111    /// This method is called whenever a match is found.
112    ///
113    /// If multi line is enabled on the searcher, then the match reported here
114    /// may span multiple lines and it may include multiple matches. When multi
115    /// line is disabled, then the match is guaranteed to span exactly one
116    /// non-empty line (where a single line is, at minimum, a line terminator).
117    ///
118    /// If this returns `true`, then searching continues. If this returns
119    /// `false`, then searching is stopped immediately and `finish` is called.
120    ///
121    /// If this returns an error, then searching is stopped immediately,
122    /// `finish` is not called and the error is bubbled back up to the caller
123    /// of the searcher.
124    fn matched(
125        &mut self,
126        _searcher: &Searcher,
127        _mat: &SinkMatch<'_>,
128    ) -> Result<bool, Self::Error>;
129
130    /// This method is called whenever a context line is found, and is optional
131    /// to implement. By default, it does nothing and returns `true`.
132    ///
133    /// In all cases, the context given is guaranteed to span exactly one
134    /// non-empty line (where a single line is, at minimum, a line terminator).
135    ///
136    /// If this returns `true`, then searching continues. If this returns
137    /// `false`, then searching is stopped immediately and `finish` is called.
138    ///
139    /// If this returns an error, then searching is stopped immediately,
140    /// `finish` is not called and the error is bubbled back up to the caller
141    /// of the searcher.
142    #[inline]
143    fn context(
144        &mut self,
145        _searcher: &Searcher,
146        _context: &SinkContext<'_>,
147    ) -> Result<bool, Self::Error> {
148        Ok(true)
149    }
150
151    /// This method is called whenever a break in contextual lines is found,
152    /// and is optional to implement. By default, it does nothing and returns
153    /// `true`.
154    ///
155    /// A break can only occur when context reporting is enabled (that is,
156    /// either or both of `before_context` or `after_context` are greater than
157    /// `0`). More precisely, a break occurs between non-contiguous groups of
158    /// lines.
159    ///
160    /// If this returns `true`, then searching continues. If this returns
161    /// `false`, then searching is stopped immediately and `finish` is called.
162    ///
163    /// If this returns an error, then searching is stopped immediately,
164    /// `finish` is not called and the error is bubbled back up to the caller
165    /// of the searcher.
166    #[inline]
167    fn context_break(
168        &mut self,
169        _searcher: &Searcher,
170    ) -> Result<bool, Self::Error> {
171        Ok(true)
172    }
173
174    /// This method is called whenever binary detection is enabled and binary
175    /// data is found. If binary data is found, then this is called at least
176    /// once for the first occurrence with the absolute byte offset at which
177    /// the binary data begins.
178    ///
179    /// If this returns `true`, then searching continues. If this returns
180    /// `false`, then searching is stopped immediately and `finish` is called.
181    ///
182    /// If this returns an error, then searching is stopped immediately,
183    /// `finish` is not called and the error is bubbled back up to the caller
184    /// of the searcher.
185    ///
186    /// By default, it does nothing and returns `true`.
187    #[inline]
188    fn binary_data(
189        &mut self,
190        _searcher: &Searcher,
191        _binary_byte_offset: u64,
192    ) -> Result<bool, Self::Error> {
193        Ok(true)
194    }
195
196    /// This method is called when a search has begun, before any search is
197    /// executed. By default, this does nothing.
198    ///
199    /// If this returns `true`, then searching continues. If this returns
200    /// `false`, then searching is stopped immediately and `finish` is called.
201    ///
202    /// If this returns an error, then searching is stopped immediately,
203    /// `finish` is not called and the error is bubbled back up to the caller
204    /// of the searcher.
205    #[inline]
206    fn begin(&mut self, _searcher: &Searcher) -> Result<bool, Self::Error> {
207        Ok(true)
208    }
209
210    /// This method is called when a search has completed. By default, this
211    /// does nothing.
212    ///
213    /// If this returns an error, the error is bubbled back up to the caller of
214    /// the searcher.
215    #[inline]
216    fn finish(
217        &mut self,
218        _searcher: &Searcher,
219        _: &SinkFinish,
220    ) -> Result<(), Self::Error> {
221        Ok(())
222    }
223}
224
225impl<'a, S: Sink> Sink for &'a mut S {
226    type Error = S::Error;
227
228    #[inline]
229    fn matched(
230        &mut self,
231        searcher: &Searcher,
232        mat: &SinkMatch<'_>,
233    ) -> Result<bool, S::Error> {
234        (**self).matched(searcher, mat)
235    }
236
237    #[inline]
238    fn context(
239        &mut self,
240        searcher: &Searcher,
241        context: &SinkContext<'_>,
242    ) -> Result<bool, S::Error> {
243        (**self).context(searcher, context)
244    }
245
246    #[inline]
247    fn context_break(
248        &mut self,
249        searcher: &Searcher,
250    ) -> Result<bool, S::Error> {
251        (**self).context_break(searcher)
252    }
253
254    #[inline]
255    fn binary_data(
256        &mut self,
257        searcher: &Searcher,
258        binary_byte_offset: u64,
259    ) -> Result<bool, S::Error> {
260        (**self).binary_data(searcher, binary_byte_offset)
261    }
262
263    #[inline]
264    fn begin(&mut self, searcher: &Searcher) -> Result<bool, S::Error> {
265        (**self).begin(searcher)
266    }
267
268    #[inline]
269    fn finish(
270        &mut self,
271        searcher: &Searcher,
272        sink_finish: &SinkFinish,
273    ) -> Result<(), S::Error> {
274        (**self).finish(searcher, sink_finish)
275    }
276}
277
278impl<S: Sink + ?Sized> Sink for Box<S> {
279    type Error = S::Error;
280
281    #[inline]
282    fn matched(
283        &mut self,
284        searcher: &Searcher,
285        mat: &SinkMatch<'_>,
286    ) -> Result<bool, S::Error> {
287        (**self).matched(searcher, mat)
288    }
289
290    #[inline]
291    fn context(
292        &mut self,
293        searcher: &Searcher,
294        context: &SinkContext<'_>,
295    ) -> Result<bool, S::Error> {
296        (**self).context(searcher, context)
297    }
298
299    #[inline]
300    fn context_break(
301        &mut self,
302        searcher: &Searcher,
303    ) -> Result<bool, S::Error> {
304        (**self).context_break(searcher)
305    }
306
307    #[inline]
308    fn binary_data(
309        &mut self,
310        searcher: &Searcher,
311        binary_byte_offset: u64,
312    ) -> Result<bool, S::Error> {
313        (**self).binary_data(searcher, binary_byte_offset)
314    }
315
316    #[inline]
317    fn begin(&mut self, searcher: &Searcher) -> Result<bool, S::Error> {
318        (**self).begin(searcher)
319    }
320
321    #[inline]
322    fn finish(
323        &mut self,
324        searcher: &Searcher,
325        sink_finish: &SinkFinish,
326    ) -> Result<(), S::Error> {
327        (**self).finish(searcher, sink_finish)
328    }
329}
330
331/// Summary data reported at the end of a search.
332///
333/// This reports data such as the total number of bytes searched and the
334/// absolute offset of the first occurrence of binary data, if any were found.
335///
336/// A searcher that stops early because of an error does not call `finish`.
337/// A searcher that stops early because the `Sink` implementor instructed it
338/// to will still call `finish`.
339#[derive(Clone, Debug)]
340pub struct SinkFinish {
341    pub(crate) byte_count: u64,
342    pub(crate) binary_byte_offset: Option<u64>,
343}
344
345impl SinkFinish {
346    /// Return the total number of bytes searched.
347    #[inline]
348    pub fn byte_count(&self) -> u64 {
349        self.byte_count
350    }
351
352    /// If binary detection is enabled and if binary data was found, then this
353    /// returns the absolute byte offset of the first detected byte of binary
354    /// data.
355    ///
356    /// Note that since this is an absolute byte offset, it cannot be relied
357    /// upon to index into any addressable memory.
358    #[inline]
359    pub fn binary_byte_offset(&self) -> Option<u64> {
360        self.binary_byte_offset
361    }
362}
363
364/// A type that describes a match reported by a searcher.
365#[derive(Clone, Debug)]
366pub struct SinkMatch<'b> {
367    pub(crate) line_term: LineTerminator,
368    pub(crate) bytes: &'b [u8],
369    pub(crate) absolute_byte_offset: u64,
370    pub(crate) line_number: Option<u64>,
371    pub(crate) buffer: &'b [u8],
372    pub(crate) bytes_range_in_buffer: std::ops::Range<usize>,
373}
374
375impl<'b> SinkMatch<'b> {
376    /// Returns the bytes for all matching lines, including the line
377    /// terminators, if they exist.
378    #[inline]
379    pub fn bytes(&self) -> &'b [u8] {
380        self.bytes
381    }
382
383    /// Return an iterator over the lines in this match.
384    ///
385    /// If multi line search is enabled, then this may yield more than one
386    /// line (but always at least one line). If multi line search is disabled,
387    /// then this always reports exactly one line (but may consist of just
388    /// the line terminator).
389    ///
390    /// Lines yielded by this iterator include their terminators.
391    #[inline]
392    pub fn lines(&self) -> LineIter<'b> {
393        LineIter::new(self.line_term.as_byte(), self.bytes)
394    }
395
396    /// Returns the absolute byte offset of the start of this match. This
397    /// offset is absolute in that it is relative to the very beginning of the
398    /// input in a search, and can never be relied upon to be a valid index
399    /// into an in-memory slice.
400    #[inline]
401    pub fn absolute_byte_offset(&self) -> u64 {
402        self.absolute_byte_offset
403    }
404
405    /// Returns the line number of the first line in this match, if available.
406    ///
407    /// Line numbers are only available when the search builder is instructed
408    /// to compute them.
409    #[inline]
410    pub fn line_number(&self) -> Option<u64> {
411        self.line_number
412    }
413
414    /// Exposes as much of the underlying buffer that was search as possible.
415    #[inline]
416    pub fn buffer(&self) -> &'b [u8] {
417        self.buffer
418    }
419
420    /// Returns a range that corresponds to where [`SinkMatch::bytes`] appears
421    /// in [`SinkMatch::buffer`].
422    #[inline]
423    pub fn bytes_range_in_buffer(&self) -> std::ops::Range<usize> {
424        self.bytes_range_in_buffer.clone()
425    }
426}
427
428/// The type of context reported by a searcher.
429#[derive(Clone, Debug, Eq, PartialEq)]
430pub enum SinkContextKind {
431    /// The line reported occurred before a match.
432    Before,
433    /// The line reported occurred after a match.
434    After,
435    /// Any other type of context reported, e.g., as a result of a searcher's
436    /// "passthru" mode.
437    Other,
438}
439
440/// A type that describes a contextual line reported by a searcher.
441#[derive(Clone, Debug)]
442pub struct SinkContext<'b> {
443    #[cfg(test)]
444    pub(crate) line_term: LineTerminator,
445    pub(crate) bytes: &'b [u8],
446    pub(crate) kind: SinkContextKind,
447    pub(crate) absolute_byte_offset: u64,
448    pub(crate) line_number: Option<u64>,
449}
450
451impl<'b> SinkContext<'b> {
452    /// Returns the context bytes, including line terminators.
453    #[inline]
454    pub fn bytes(&self) -> &'b [u8] {
455        self.bytes
456    }
457
458    /// Returns the type of context.
459    #[inline]
460    pub fn kind(&self) -> &SinkContextKind {
461        &self.kind
462    }
463
464    /// Return an iterator over the lines in this match.
465    ///
466    /// This always yields exactly one line (and that one line may contain just
467    /// the line terminator).
468    ///
469    /// Lines yielded by this iterator include their terminators.
470    #[cfg(test)]
471    pub(crate) fn lines(&self) -> LineIter<'b> {
472        LineIter::new(self.line_term.as_byte(), self.bytes)
473    }
474
475    /// Returns the absolute byte offset of the start of this context. This
476    /// offset is absolute in that it is relative to the very beginning of the
477    /// input in a search, and can never be relied upon to be a valid index
478    /// into an in-memory slice.
479    #[inline]
480    pub fn absolute_byte_offset(&self) -> u64 {
481        self.absolute_byte_offset
482    }
483
484    /// Returns the line number of the first line in this context, if
485    /// available.
486    ///
487    /// Line numbers are only available when the search builder is instructed
488    /// to compute them.
489    #[inline]
490    pub fn line_number(&self) -> Option<u64> {
491        self.line_number
492    }
493}
494
495/// A collection of convenience implementations of `Sink`.
496///
497/// Each implementation in this module makes some kind of sacrifice in the name
498/// of making common cases easier to use. Most frequently, each type is a
499/// wrapper around a closure specified by the caller that provides limited
500/// access to the full suite of information available to implementors of
501/// `Sink`.
502///
503/// For example, the `UTF8` sink makes the following sacrifices:
504///
505/// * All matches must be UTF-8. An arbitrary `Sink` does not have this
506///   restriction and can deal with arbitrary data. If this sink sees invalid
507///   UTF-8, then an error is returned and searching stops. (Use the `Lossy`
508///   sink instead to suppress this error.)
509/// * The searcher must be configured to report line numbers. If it isn't,
510///   an error is reported at the first match and searching stops.
511/// * Context lines, context breaks and summary data reported at the end of
512///   a search are all ignored.
513/// * Implementors are forced to use `std::io::Error` as their error type.
514///
515/// If you need more flexibility, then you're advised to implement the `Sink`
516/// trait directly.
517pub mod sinks {
518    use std::io;
519
520    use crate::searcher::Searcher;
521
522    use super::{Sink, SinkError, SinkMatch};
523
524    /// A sink that provides line numbers and matches as strings while ignoring
525    /// everything else.
526    ///
527    /// This implementation will return an error if a match contains invalid
528    /// UTF-8 or if the searcher was not configured to count lines. Errors
529    /// on invalid UTF-8 can be suppressed by using the `Lossy` sink instead
530    /// of this one.
531    ///
532    /// The closure accepts two parameters: a line number and a UTF-8 string
533    /// containing the matched data. The closure returns a
534    /// `Result<bool, std::io::Error>`. If the `bool` is `false`, then the
535    /// search stops immediately. Otherwise, searching continues.
536    ///
537    /// If multi line mode was enabled, the line number refers to the line
538    /// number of the first line in the match.
539    #[derive(Clone, Debug)]
540    pub struct UTF8<F>(pub F)
541    where
542        F: FnMut(u64, &str) -> Result<bool, io::Error>;
543
544    impl<F> Sink for UTF8<F>
545    where
546        F: FnMut(u64, &str) -> Result<bool, io::Error>,
547    {
548        type Error = io::Error;
549
550        fn matched(
551            &mut self,
552            _searcher: &Searcher,
553            mat: &SinkMatch<'_>,
554        ) -> Result<bool, io::Error> {
555            let matched = match std::str::from_utf8(mat.bytes()) {
556                Ok(matched) => matched,
557                Err(err) => return Err(io::Error::error_message(err)),
558            };
559            let line_number = match mat.line_number() {
560                Some(line_number) => line_number,
561                None => {
562                    let msg = "line numbers not enabled";
563                    return Err(io::Error::error_message(msg));
564                }
565            };
566            (self.0)(line_number, &matched)
567        }
568    }
569
570    /// A sink that provides line numbers and matches as (lossily converted)
571    /// strings while ignoring everything else.
572    ///
573    /// This is like `UTF8`, except that if a match contains invalid UTF-8,
574    /// then it will be lossily converted to valid UTF-8 by substituting
575    /// invalid UTF-8 with Unicode replacement characters.
576    ///
577    /// This implementation will return an error on the first match if the
578    /// searcher was not configured to count lines.
579    ///
580    /// The closure accepts two parameters: a line number and a UTF-8 string
581    /// containing the matched data. The closure returns a
582    /// `Result<bool, std::io::Error>`. If the `bool` is `false`, then the
583    /// search stops immediately. Otherwise, searching continues.
584    ///
585    /// If multi line mode was enabled, the line number refers to the line
586    /// number of the first line in the match.
587    #[derive(Clone, Debug)]
588    pub struct Lossy<F>(pub F)
589    where
590        F: FnMut(u64, &str) -> Result<bool, io::Error>;
591
592    impl<F> Sink for Lossy<F>
593    where
594        F: FnMut(u64, &str) -> Result<bool, io::Error>,
595    {
596        type Error = io::Error;
597
598        fn matched(
599            &mut self,
600            _searcher: &Searcher,
601            mat: &SinkMatch<'_>,
602        ) -> Result<bool, io::Error> {
603            use std::borrow::Cow;
604
605            let matched = match std::str::from_utf8(mat.bytes()) {
606                Ok(matched) => Cow::Borrowed(matched),
607                // TODO: In theory, it should be possible to amortize
608                // allocation here, but `std` doesn't provide such an API.
609                // Regardless, this only happens on matches with invalid UTF-8,
610                // which should be pretty rare.
611                Err(_) => String::from_utf8_lossy(mat.bytes()),
612            };
613            let line_number = match mat.line_number() {
614                Some(line_number) => line_number,
615                None => {
616                    let msg = "line numbers not enabled";
617                    return Err(io::Error::error_message(msg));
618                }
619            };
620            (self.0)(line_number, &matched)
621        }
622    }
623
624    /// A sink that provides line numbers and matches as raw bytes while
625    /// ignoring everything else.
626    ///
627    /// This implementation will return an error on the first match if the
628    /// searcher was not configured to count lines.
629    ///
630    /// The closure accepts two parameters: a line number and a raw byte string
631    /// containing the matched data. The closure returns a
632    /// `Result<bool, std::io::Error>`. If the `bool` is `false`, then the
633    /// search stops immediately. Otherwise, searching continues.
634    ///
635    /// If multi line mode was enabled, the line number refers to the line
636    /// number of the first line in the match.
637    #[derive(Clone, Debug)]
638    pub struct Bytes<F>(pub F)
639    where
640        F: FnMut(u64, &[u8]) -> Result<bool, io::Error>;
641
642    impl<F> Sink for Bytes<F>
643    where
644        F: FnMut(u64, &[u8]) -> Result<bool, io::Error>,
645    {
646        type Error = io::Error;
647
648        fn matched(
649            &mut self,
650            _searcher: &Searcher,
651            mat: &SinkMatch<'_>,
652        ) -> Result<bool, io::Error> {
653            let line_number = match mat.line_number() {
654                Some(line_number) => line_number,
655                None => {
656                    let msg = "line numbers not enabled";
657                    return Err(io::Error::error_message(msg));
658                }
659            };
660            (self.0)(line_number, mat.bytes())
661        }
662    }
663}