grep_reader/
grep.rs

1// [[file:../grep-reader.note::1abc6d68][1abc6d68]]
2use super::*;
3
4use std::fs::File;
5use std::io::prelude::*;
6use std::io::BufReader;
7// 1abc6d68 ends here
8
9// [[file:../grep-reader.note::480b544e][480b544e]]
10mod grep_lib;
11// mod grep_bin;
12// 480b544e ends here
13
14// [[file:../grep-reader.note::b3c30bcf][b3c30bcf]]
15use std::io::SeekFrom;
16use std::path::{Path, PathBuf};
17
18/// Quick grep text by marking the line that matching a pattern,
19/// suitable for very large text file.
20#[derive(Debug)]
21pub struct GrepReader {
22    src: PathBuf,
23    // A BufReader for File
24    reader: BufReader<File>,
25    // marked positions
26    position_markers: Vec<u64>,
27    // current position
28    marker_index: usize,
29}
30
31impl GrepReader {
32    /// Build from file in path
33    pub fn try_from_path(p: &Path) -> Result<Self> {
34        let f = File::open(p)?;
35        let reader = BufReader::new(f);
36        let grep = Self {
37            reader,
38            src: p.to_owned(),
39            position_markers: vec![],
40            marker_index: 0,
41        };
42        Ok(grep)
43    }
44
45    /// Mark positions that matching `pattern`, so that we can seek
46    /// these positions later. Regex can be used in `pattern`. Return
47    /// the number of marked positions.
48    ///
49    /// # Paramters
50    /// * max_count: exits search if max_count matches reached.
51    pub fn mark(&mut self, pattern: &str, max_count: impl Into<Option<usize>>) -> Result<usize> {
52        use self::grep_lib::mark_matched_positions_with_ripgrep;
53
54        let max_count = max_count.into();
55        self.position_markers = mark_matched_positions_with_ripgrep(pattern, &self.src, max_count)?;
56
57        self.marker_index = 0;
58        Ok(self.position_markers.len())
59    }
60
61    /// Goto the start of inner file.
62    pub fn goto_start(&mut self) {
63        let _ = self.reader.rewind();
64    }
65
66    /// Goto the end of inner file.
67    pub fn goto_end(&mut self) {
68        let _ = self.reader.seek(SeekFrom::End(0));
69    }
70
71    /// Return the number of marked positions.
72    pub fn num_markers(&self) -> usize {
73        self.position_markers.len()
74    }
75
76    /// Goto the next position that marked. Return marker position on success.
77    /// Return Err if already reached the last marker or other errors.
78    pub fn goto_next_marker(&mut self) -> Result<u64> {
79        let n = self.position_markers.len();
80        if self.marker_index < n {
81            let pos = self.position_markers[self.marker_index];
82            self.marker_index += 1;
83            let _ = self.reader.seek(SeekFrom::Start(pos))?;
84            Ok(pos)
85        } else {
86            bail!("Already reached the last marker or no marker at all!");
87        }
88    }
89
90    /// Goto the marked position in `marker_index`. Will panic if marker_index
91    /// out of range.
92    pub fn goto_marker(&mut self, marker_index: usize) -> Result<u64> {
93        let pos = self.position_markers[marker_index];
94        let _ = self.reader.seek(SeekFrom::Start(pos))?;
95        self.marker_index = marker_index + 1;
96        Ok(pos)
97    }
98
99    /// Return current marker index
100    pub fn current_marker(&mut self) -> usize {
101        self.marker_index
102    }
103
104    /// Return `n` lines in string on success from current
105    /// position. Return error if reached EOF early.
106    pub fn read_lines(&mut self, n: usize, buffer: &mut String) -> Result<()> {
107        for i in 0..n {
108            let nbytes = self.reader.read_line(buffer)?;
109            if nbytes == 0 {
110                bail!("The stream has reached EOF. Required {} lines, but filled {} lines", n, i);
111            }
112        }
113        Ok(())
114    }
115
116    /// Gets a mutable reference to the underlying reader.
117    pub fn get_mut(&mut self) -> &mut BufReader<File> {
118        &mut self.reader
119    }
120
121    /// Return text from current position to the next marker or file
122    /// end. It method will forward the cursor to the next marker.
123    pub fn read_until_next_marker(&mut self, s: &mut String) -> Result<()> {
124        ensure!(!self.position_markers.is_empty(), "No markers present!");
125
126        let i = self.marker_index;
127
128        // read until EOF?
129        let pos_cur = self.reader.stream_position()?;
130        if i < self.position_markers.len() {
131            let pos_mark = self.position_markers[i];
132            ensure!(pos_cur <= pos_mark, "cannot continue: cursor is behind current marker");
133            let delta = pos_mark - pos_cur;
134            let mut nsum = 0;
135            for _ in 0.. {
136                let n = self.reader.read_line(s)?;
137                assert_ne!(n, 0);
138                nsum += n as u64;
139                if nsum >= delta {
140                    break;
141                }
142            }
143            self.marker_index += 1;
144        } else {
145            // when already in the end, we refuse to continue
146            let pos_mark = self.position_markers.last().expect("no marker");
147            ensure!(pos_cur <= *pos_mark, "cannot continue: cursor is behind current marker");
148            while self.reader.read_line(s)? != 0 {
149                //
150            }
151        }
152        Ok(())
153    }
154}
155// b3c30bcf ends here