stringmetrics/
iter.rs

1//! Tools to help with processing iterators
2
3use std::cmp::min;
4
5#[cfg(not(feature = "bench"))]
6#[derive(Debug, PartialEq)]
7pub(crate) struct IterPairInfo {
8    pub(crate) a_len: u32,
9    pub(crate) b_len: u32,
10    pub(crate) start_same: u32,
11    pub(crate) end_same: u32,
12}
13
14#[cfg(feature = "bench")]
15#[derive(Debug, PartialEq)]
16pub struct IterPairInfo {
17    pub(crate) a_len: u32,
18    pub(crate) b_len: u32,
19    pub(crate) start_same: u32,
20    pub(crate) end_same: u32,
21}
22
23impl IterPairInfo {
24    #[allow(dead_code)]
25    pub(crate) const fn new(a_len: u32, b_len: u32, start_same: u32, end_same: u32) -> Self {
26        Self {
27            a_len,
28            b_len,
29            start_same,
30            end_same,
31        }
32    }
33
34    /// Length of `a` that is different from `b`
35    #[inline]
36    pub const fn a_diff_len(&self) -> u32 {
37        self.a_len - self.start_same - self.end_same
38    }
39
40    /// Length of `b` that is different from `a`
41    #[inline]
42    pub const fn b_diff_len(&self) -> u32 {
43        self.b_len - self.start_same - self.end_same
44    }
45}
46
47/// This function has an unstable API
48///
49/// It is designed to identify the longest similar start and end sequences in a
50/// pair of iterators, as well as their total length.
51#[inline]
52pub(crate) fn find_eq_end_items<I, T, D>(a: I, b: I) -> IterPairInfo
53where
54    I: IntoIterator<IntoIter = D>,
55    D: DoubleEndedIterator<Item = T> + Clone,
56    T: PartialEq,
57{
58    let a_len;
59    let b_len;
60
61    let mut start_same = 0usize;
62    // let mut end_same = 0usize;
63    let mut counting = true;
64
65    // Weirdly, this outperformed manual += 1 counting
66    let mut r_iter = 0..;
67    let mut a_iter = a.into_iter();
68    let mut b_iter = b.into_iter();
69
70    // We need to recreate the iterator here so we can use it later
71    // Note: this is likely doable without cloning if we work from the front,
72    // then work from the back, and add the difference.
73    let a_iter2 = a_iter.clone();
74    let b_iter2 = b_iter.clone();
75
76    loop {
77        match (a_iter.next(), b_iter.next()) {
78            (Some(a_v), Some(b_v)) => {
79                r_iter.next();
80                if counting && a_v == b_v {
81                    start_same += 1;
82                } else if counting {
83                    counting = false;
84                }
85            }
86            (Some(_), None) => {
87                let common_len = r_iter.next().unwrap();
88                a_len = common_len + 1 + a_iter.count();
89                b_len = common_len;
90                break;
91            }
92            (None, Some(_)) => {
93                let common_len = r_iter.next().unwrap();
94                b_len = common_len + 1 + b_iter.count();
95                a_len = common_len;
96                break;
97            }
98            (None, None) => {
99                let common_len = r_iter.next().unwrap();
100                a_len = common_len;
101                b_len = common_len;
102                break;
103            }
104        }
105    }
106
107    // Get characters at the end that are the same using iterators
108    // #[allow(clippy::pattern_type_mismatch)]
109    let end_same = end_same(a_iter2, b_iter2, a_len, b_len, start_same);
110
111    IterPairInfo {
112        a_len: a_len.try_into().expect("> u32::MAX items"),
113        b_len: b_len.try_into().expect("> u32::MAX items"),
114        start_same: start_same as u32,
115        end_same: end_same as u32,
116    }
117}
118
119// Get characters the same at the end of an iterator
120#[inline]
121#[allow(clippy::pattern_type_mismatch)]
122fn end_same<D, T>(a_iter: D, b_iter: D, a_len: usize, b_len: usize, start_same: usize) -> usize
123where
124    D: DoubleEndedIterator<Item = T> + Clone,
125    T: PartialEq,
126{
127    a_iter
128        .rev()
129        .zip(b_iter.rev())
130        // Limit to this difference
131        .take(min(a_len, b_len) - start_same)
132        // Count if items are equal, break if not
133        .take_while(|(a_item, b_item)| a_item == b_item)
134        .count()
135}
136
137#[inline]
138#[cfg(feature = "bench")]
139pub fn find_eq_end_items_bench<I, T, D>(a: I, b: I) -> IterPairInfo
140where
141    I: IntoIterator<IntoIter = D>,
142    D: DoubleEndedIterator<Item = T> + Clone,
143    T: PartialEq,
144{
145    find_eq_end_items(a, b)
146}
147
148#[cfg(test)]
149mod tests {
150    use super::*;
151
152    #[test]
153    fn test_iterpair_difflen() {
154        let info = IterPairInfo::new(10, 20, 5, 4);
155        assert_eq!(info.a_diff_len(), 1);
156        assert_eq!(info.b_diff_len(), 11);
157    }
158
159    #[test]
160    fn test_find_eq_items() {
161        assert_eq!(
162            find_eq_end_items("".chars(), "".chars()),
163            IterPairInfo::new(0, 0, 0, 0)
164        );
165        assert_eq!(
166            find_eq_end_items("aaaa".chars(), "".chars()),
167            IterPairInfo::new(4, 0, 0, 0)
168        );
169        assert_eq!(
170            find_eq_end_items("".chars(), "aaaa".chars()),
171            IterPairInfo::new(0, 4, 0, 0)
172        );
173        assert_eq!(
174            find_eq_end_items("abcd".chars(), "abcd".chars()),
175            IterPairInfo::new(4, 4, 4, 0)
176        );
177        assert_eq!(
178            find_eq_end_items("aaaa".chars(), "aa".chars()),
179            IterPairInfo::new(4, 2, 2, 0)
180        );
181        assert_eq!(
182            find_eq_end_items("aaaa".chars(), "aabbbb".chars()),
183            IterPairInfo::new(4, 6, 2, 0)
184        );
185        assert_eq!(
186            find_eq_end_items("xxaa".chars(), "yyyyaa".chars()),
187            IterPairInfo::new(4, 6, 0, 2)
188        );
189        assert_eq!(
190            find_eq_end_items("aaxxxxbb".chars(), "aaaabbbb".chars()),
191            IterPairInfo::new(8, 8, 2, 2)
192        );
193        assert_eq!(
194            find_eq_end_items("aaaa".chars(), "bbbb".chars()),
195            IterPairInfo::new(4, 4, 0, 0)
196        );
197    }
198
199    #[test]
200    fn test_tricky() {
201        assert_eq!(
202            find_eq_end_items("notate".chars(), "to ate".chars()),
203            IterPairInfo::new(6, 6, 0, 3)
204        );
205        assert_eq!(
206            find_eq_end_items("to ate".chars(), "notate".chars()),
207            IterPairInfo::new(6, 6, 0, 3)
208        );
209        assert_eq!(
210            find_eq_end_items("to be a".chars(), "not to".chars()),
211            IterPairInfo::new(7, 6, 0, 0)
212        );
213        assert_eq!(
214            find_eq_end_items("not to".chars(), "to be a".chars()),
215            IterPairInfo::new(6, 7, 0, 0)
216        );
217        assert_eq!(
218            find_eq_end_items("abccc".chars(), "accc".chars()),
219            IterPairInfo::new(5, 4, 1, 3)
220        );
221    }
222}