1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
use super::{entry::Entry, seek, Result};
use chrono::prelude::*;
use rand::distributions::{Distribution, Uniform};
use std::convert::TryInto;
use std::io::{BufRead, Read, Seek, SeekFrom};

pub struct Entries<T: Seek + Read + BufRead> {
    f: T,
    buf: String,
}

impl<T: Seek + Read + BufRead> Entries<T> {
    pub fn new(f: T) -> Self {
        Entries {
            f,
            buf: String::with_capacity(4096),
        }
    }

    pub fn len(&mut self) -> Result<u64> {
        let prev = self.f.seek(SeekFrom::Current(0))?;
        let len = self.f.seek(SeekFrom::End(0))?;
        self.f.seek(SeekFrom::Start(prev))?;
        Ok(len)
    }

    pub fn is_empty(&mut self) -> Result<bool> {
        Ok(self.len()? == 0)
    }

    pub fn at(&mut self, pos: u64) -> Result<Option<Entry>> {
        if pos > self.len()? {
            return Ok(None);
        }

        self.f.seek(SeekFrom::Start(pos))?;
        seek::start_of_current_line(&mut self.f)?;
        self.next_entry()
    }

    pub fn seek_to_end(&mut self) -> Result<()> {
        let len = self.len()?;
        self.at(len)?;
        Ok(())
    }

    pub fn seek_to_next(&mut self) -> Result<Option<u64>> {
        seek::start_of_next_line(&mut self.f)
    }

    pub fn seek_to_prev(&mut self) -> Result<Option<u64>> {
        seek::start_of_prev_line(&mut self.f)
    }

    pub fn next_entry(&mut self) -> Result<Option<Entry>> {
        self.buf.clear();
        self.f.read_line(&mut self.buf)?;

        // read_line will leave the buffer empty if it was attempting to read
        // past the end of the file. We set the file cursor to past the end of
        // the file so that we can check later on when trying to come back and
        // read a previous line we can read the last line instead of skipping
        // over it, because prev_line() by default skips the line that was just
        // read.
        if self.buf.is_empty() {
            self.f.seek(SeekFrom::End(1))?;
            return Ok(None);
        }

        let row = quick_csv::Csv::from_reader(self.buf.as_bytes())
            .next()
            .unwrap()?;
        Ok(Some((&row).try_into()?))
    }

    pub fn rand_entry(&mut self) -> Result<Option<Entry>> {
        let mut rng = rand::thread_rng();
        let range = Uniform::new(0, self.len()?);
        self.at(range.sample(&mut rng))
    }

    pub fn prev_entry(&mut self) -> Result<Option<Entry>> {
        // This seek takes us to the start of the line that was just read. It
        // will sometimes be None if we're already at the start of the file but
        // that's fine. We don't do this seek if we've previously read past the
        // end of the file, so that when we do read past the end of the file we
        // can again go back and read the last line.
        if self.f.seek(SeekFrom::Current(0))? <= self.len()? {
            self.seek_to_prev()?;
        }

        // This seek takes us to the actual previous entry. If this one returns None
        // it means we're trying to go past the start of the file, and there is no
        // previous entry.
        if self.seek_to_prev()?.is_none() {
            return Ok(None);
        }

        self.next_entry()
    }

    pub fn seek_to_first(&mut self, date: &chrono::DateTime<FixedOffset>) -> Result<()> {
        let file_size = self.len()?;
        let mut end = file_size;
        let mut start = self.f.seek(SeekFrom::Start(0))?;

        while start < end {
            let cur = start + (end - start) / 2;

            let entry = match self.at(cur)? {
                Some(entry) => entry,
                // If we get none back from at() it means we've tried to seek past
                // the end of the file. We break out of the loop in this case and
                // ultimately return to the caller with the file cursor at end of
                // file. This allows people to seek backwards from the end if they
                // want to.
                None => break,
            };

            if entry.datetime() >= date {
                end = cur - 1;
            } else {
                start = cur + 1;
            }
        }

        // When we exit the binary search loop we know that we're in one of the following
        // states:
        //
        //   - We're at the very start of the file.
        //   - We're at or past the end of the file.
        //   - We're somewhere in the middle, potentially on the row before the row we
        //     want to return.
        //
        // We need to navigate to the line that is exactly after the line before us that
        // is less than the given time.

        // If we're at the end of the file, it means that there are no lines in the file
        // that can be less than the given date, so we return with the file cursor at the
        // end of the file.
        if end >= file_size {
            return Ok(());
        }

        // We have to move forward one line at first, as we could have exited the binary
        // search loop on the entry before the one that we need to return.
        self.next_entry()?;

        loop {
            match self.prev_entry()? {
                None => break,
                Some(entry) => {
                    if entry.datetime() < date {
                        break;
                    }
                }
            }
        }

        Ok(())
    }
}

impl<T: Seek + Read + BufRead> Iterator for Entries<T> {
    type Item = Result<Entry>;

    fn next(&mut self) -> Option<Self::Item> {
        match self.next_entry() {
            Ok(opt) => match opt {
                Some(entry) => Some(Ok(entry)),
                None => None,
            },
            Err(e) => Some(Err(e)),
        }
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    use std::io::Cursor;
    use test_case::test_case;

    // Each TESTDATA line is 43 characters long, 44 if you count the newline.
    const TESTDATA: &str = "2020-01-01T00:01:00.899849209+00:00,\"\"\"1\"\"\"
2020-02-12T23:08:40.987613062+00:00,\"\"\"2\"\"\"
2020-03-12T00:00:00.000000000+00:00,\"\"\"3\"\"\"
2020-04-12T23:28:45.726598931+00:00,\"\"\"4\"\"\"
2020-05-12T23:28:48.495151445+00:00,\"\"\"5\"\"\"
2020-06-13T10:12:53.353050231+00:00,\"\"\"6\"\"\"
";

    // Clippy isn't a big fan of mathematics that can be represented simpler
    // or evaluates to zero, but in these tests it helps make clear that we're
    // searching in to offsets of each line, so we allow it.
    #[allow(clippy::identity_op, clippy::erasing_op)]
    #[test_case(44 * 0 + 0  => Some("1".to_owned()))]
    #[test_case(44 * 0 + 10 => Some("1".to_owned()))]
    #[test_case(44 * 0 + 43 => Some("1".to_owned()))]
    #[test_case(44 * 1 + 0  => Some("2".to_owned()))]
    #[test_case(44 * 1 + 10 => Some("2".to_owned()))]
    #[test_case(44 * 1 + 43 => Some("2".to_owned()))]
    #[test_case(44 * 2 + 0  => Some("3".to_owned()))]
    #[test_case(44 * 2 + 10 => Some("3".to_owned()))]
    #[test_case(44 * 2 + 43 => Some("3".to_owned()))]
    #[test_case(44 * 3 + 0  => Some("4".to_owned()))]
    #[test_case(44 * 3 + 10 => Some("4".to_owned()))]
    #[test_case(44 * 3 + 43 => Some("4".to_owned()))]
    #[test_case(44 * 4 + 0  => Some("5".to_owned()))]
    #[test_case(44 * 4 + 10 => Some("5".to_owned()))]
    #[test_case(44 * 4 + 43 => Some("5".to_owned()))]
    #[test_case(44 * 5 + 0  => Some("6".to_owned()))]
    #[test_case(44 * 5 + 10 => Some("6".to_owned()))]
    #[test_case(44 * 5 + 43 => Some("6".to_owned()))]
    #[test_case(44 * 6 + 0  => None)]
    #[test_case(44 * 7 + 0  => None)]
    #[test_case(44 * 8 + 0  => None)]
    fn test_entry_at(pos: u64) -> Option<String> {
        let r = Cursor::new(Vec::from(TESTDATA.as_bytes()));
        Entries::new(r)
            .at(pos)
            .unwrap()
            .map(|e| e.message().to_owned())
    }

    // Test cases for exact date matches on each line.
    #[test_case("2020-01-01T00:01:00.899849209+00:00" => Some("1".to_owned()))]
    #[test_case("2020-02-12T23:08:40.987613062+00:00" => Some("2".to_owned()))]
    #[test_case("2020-03-12T00:00:00.000000000+00:00" => Some("3".to_owned()))]
    #[test_case("2020-04-12T23:28:45.726598931+00:00" => Some("4".to_owned()))]
    #[test_case("2020-05-12T23:28:48.495151445+00:00" => Some("5".to_owned()))]
    #[test_case("2020-06-13T10:12:53.353050231+00:00" => Some("6".to_owned()))]
    // Testing dates before and after the dates in the file.
    #[test_case("2000-01-01T00:01:00.000000000+00:00" => Some("1".to_owned()))]
    #[test_case("2021-01-01T00:00:00.000000000+00:00" => None)]
    // Testing dates that aren't exact matches but land us in the middle of the
    // file.
    #[test_case("2020-02-12T23:08:00+00:00" => Some("2".to_owned()))]
    #[test_case("2020-02-12T23:59:00+00:00" => Some("3".to_owned()))]
    #[test_case("2020-04-12T23:27:00+00:00" => Some("4".to_owned()))]
    #[test_case("2020-05-12T23:27:00+00:00" => Some("5".to_owned()))]
    #[test_case("2020-06-13T10:00:00+00:00" => Some("6".to_owned()))]
    fn test_seek_to_first(date_str: &str) -> Option<String> {
        let date = DateTime::parse_from_rfc3339(date_str).unwrap();
        let r = Cursor::new(Vec::from(TESTDATA.as_bytes()));
        let mut entries = Entries::new(r);
        entries.seek_to_first(&date).unwrap();
        entries
            .next_entry()
            .unwrap()
            .map(|e| e.message().to_owned())
    }

    #[test]
    fn test_navigating_entries() -> Result<()> {
        let r = Cursor::new(Vec::from(TESTDATA.as_bytes()));
        let mut entries = Entries::new(r);

        assert_eq!(entries.next_entry()?.unwrap().message(), "1");
        assert_eq!(entries.next_entry()?.unwrap().message(), "2");
        assert_eq!(entries.next_entry()?.unwrap().message(), "3");
        assert_eq!(entries.next_entry()?.unwrap().message(), "4");
        assert_eq!(entries.next_entry()?.unwrap().message(), "5");
        assert_eq!(entries.next_entry()?.unwrap().message(), "6");
        assert_eq!(entries.next_entry()?.is_none(), true);
        assert_eq!(entries.prev_entry()?.unwrap().message(), "6");
        assert_eq!(entries.prev_entry()?.unwrap().message(), "5");
        assert_eq!(entries.prev_entry()?.unwrap().message(), "4");
        assert_eq!(entries.prev_entry()?.unwrap().message(), "3");
        assert_eq!(entries.prev_entry()?.unwrap().message(), "2");
        assert_eq!(entries.prev_entry()?.unwrap().message(), "1");
        assert_eq!(entries.prev_entry()?.is_none(), true);
        assert_eq!(entries.prev_entry()?.is_none(), true);
        assert_eq!(entries.prev_entry()?.is_none(), true);
        assert_eq!(entries.next_entry()?.unwrap().message(), "1");
        assert_eq!(entries.next_entry()?.unwrap().message(), "2");
        assert_eq!(entries.next_entry()?.unwrap().message(), "3");
        assert_eq!(entries.next_entry()?.unwrap().message(), "4");
        assert_eq!(entries.next_entry()?.unwrap().message(), "5");
        assert_eq!(entries.next_entry()?.unwrap().message(), "6");
        assert_eq!(entries.next_entry()?.is_none(), true);
        Ok(())
    }

    #[test]
    fn test_seek_to_end() -> Result<()> {
        let r = Cursor::new(Vec::from(TESTDATA.as_bytes()));
        let mut entries = Entries::new(r);

        entries.seek_to_end()?;
        assert_eq!(entries.prev_entry()?.unwrap().message(), "6");
        Ok(())
    }

    #[test]
    fn test_iterator() -> Result<()> {
        let r = Cursor::new(Vec::from(TESTDATA.as_bytes()));
        let mut entries = Entries::new(r);

        assert_eq!(entries.next().unwrap().unwrap().message(), "1");
        assert_eq!(entries.next().unwrap().unwrap().message(), "2");
        assert_eq!(entries.next().unwrap().unwrap().message(), "3");
        assert_eq!(entries.next().unwrap().unwrap().message(), "4");
        assert_eq!(entries.next().unwrap().unwrap().message(), "5");
        assert_eq!(entries.next().unwrap().unwrap().message(), "6");
        assert_eq!(entries.next().is_none(), true);
        Ok(())
    }
}