tokenizers/utils/
iter.rs

1//! This comes from the Rust libcore and is duplicated here because it is not exported
2//! (cf <https://github.com/rust-lang/rust/blob/25091ed9b7739e12466fb2490baa1e8a2815121c/src/libcore/iter/adapters/mod.rs#L2664>)
3//! We are now using the version from <https://stackoverflow.com/questions/44544323/how-to-unzip-a-sequence-of-resulta-b-e-to-a-veca-vecb-and-stop-on-f>
4//! because the one from the libcore seems to cause overflowing stacks in some cases
5//! It also contains a lines_with_ending that copies std::io::BufRead but keeps line endings.
6use std::io::BufRead;
7
8pub struct ResultShunt<I, E> {
9    iter: I,
10    error: Option<E>,
11}
12
13impl<I, T, E> ResultShunt<I, E>
14where
15    I: Iterator<Item = Result<T, E>>,
16{
17    /// Process the given iterator as if it yielded a `T` instead of a
18    /// `Result<T, _>`. Any errors will stop the inner iterator and
19    /// the overall result will be an error.
20    pub fn process<F, U>(iter: I, mut f: F) -> Result<U, E>
21    where
22        F: FnMut(&mut Self) -> U,
23    {
24        let mut shunt = ResultShunt::new(iter);
25        let value = f(shunt.by_ref());
26        shunt.reconstruct(value)
27    }
28
29    fn new(iter: I) -> Self {
30        ResultShunt { iter, error: None }
31    }
32
33    /// Consume the adapter and rebuild a `Result` value. This should
34    /// *always* be called, otherwise any potential error would be
35    /// lost.
36    fn reconstruct<U>(self, val: U) -> Result<U, E> {
37        match self.error {
38            None => Ok(val),
39            Some(e) => Err(e),
40        }
41    }
42}
43
44impl<I, T, E> Iterator for ResultShunt<I, E>
45where
46    I: Iterator<Item = Result<T, E>>,
47{
48    type Item = T;
49
50    fn next(&mut self) -> Option<Self::Item> {
51        match self.iter.next() {
52            Some(Ok(v)) => Some(v),
53            Some(Err(e)) => {
54                self.error = Some(e);
55                None
56            }
57            None => None,
58        }
59    }
60}
61
62/// Copied from std::io::BufRead but keep newline characters.
63#[derive(Debug)]
64pub struct Lines<B> {
65    buf: B,
66}
67
68pub trait LinesWithEnding<B> {
69    fn lines_with_ending(self) -> Lines<B>;
70}
71
72impl<B> LinesWithEnding<B> for B
73where
74    B: BufRead,
75{
76    fn lines_with_ending(self) -> Lines<B> {
77        Lines::<B> { buf: self }
78    }
79}
80impl<B: BufRead> Iterator for Lines<B> {
81    type Item = std::io::Result<String>;
82
83    fn next(&mut self) -> Option<Self::Item> {
84        let mut buf = String::new();
85        match self.buf.read_line(&mut buf) {
86            Ok(0) => None,
87            Ok(_n) => {
88                // if buf.ends_with('\n') {
89                //     buf.pop();
90                //     if buf.ends_with('\r') {
91                //         buf.pop();
92                //     }
93                // }
94                Some(Ok(buf))
95            }
96            Err(e) => Some(Err(e)),
97        }
98    }
99}