tracexec_core/primitives/
regex.rs

1use std::sync::LazyLock;
2
3/// Regex and regex-cursor related code
4pub use regex_cursor::*;
5
6use crate::{
7  cache::ArcStr,
8  event::OutputMsg,
9};
10
11pub trait BidirectionalIterator: Iterator {
12  fn prev(&mut self) -> Option<Self::Item>;
13}
14
15pub trait IntoBidirectionalIterator {
16  type Iter: BidirectionalIterator;
17
18  fn into_bidirectional_iter(self) -> Self::Iter;
19}
20
21pub struct BidirectionalIter<'a, T> {
22  slice: &'a [T],
23  index: BidirectionalIterIndex,
24}
25
26impl<'a, T> BidirectionalIter<'a, T> {
27  fn new(slice: &'a [T]) -> Self {
28    Self {
29      slice,
30      index: BidirectionalIterIndex::Start,
31    }
32  }
33}
34
35impl<T> BidirectionalIter<'_, T> {
36  pub fn by_ref(&mut self) -> &mut Self {
37    self
38  }
39}
40
41impl<'a, T> Iterator for BidirectionalIter<'a, T> {
42  type Item = &'a T;
43
44  fn next(&mut self) -> Option<Self::Item> {
45    self.index.next(self.slice.len());
46    self.index.get(self.slice)
47  }
48}
49
50impl<T> BidirectionalIterator for BidirectionalIter<'_, T> {
51  fn prev(&mut self) -> Option<Self::Item> {
52    self.index.prev(self.slice.len());
53    self.index.get(self.slice)
54  }
55}
56
57enum BidirectionalIterIndex {
58  Start,
59  Index(usize),
60  End,
61}
62
63impl BidirectionalIterIndex {
64  fn next(&mut self, len: usize) {
65    match self {
66      Self::Start => {
67        *self = if len > 0 { Self::Index(0) } else { Self::Start };
68      }
69      Self::Index(index) => {
70        if *index + 1 < len {
71          *index += 1;
72        } else {
73          *self = Self::End;
74        }
75      }
76      Self::End => {}
77    }
78  }
79
80  fn prev(&mut self, len: usize) {
81    match self {
82      Self::Start => {}
83      Self::Index(index) => {
84        if *index > 0 {
85          *index -= 1;
86        } else {
87          *self = Self::Start;
88        }
89      }
90      Self::End => {
91        *self = Self::Index(len.saturating_sub(1));
92      }
93    }
94  }
95
96  fn get<'a, T>(&self, slice: &'a [T]) -> Option<&'a T> {
97    match self {
98      Self::Start => None,
99      Self::Index(index) => slice.get(*index),
100      Self::End => None,
101    }
102  }
103}
104
105impl<'a, T> IntoBidirectionalIterator for &'a [T] {
106  type Iter = BidirectionalIter<'a, T>;
107
108  fn into_bidirectional_iter(self) -> Self::Iter {
109    BidirectionalIter::new(self)
110  }
111}
112
113enum BidirectionalInterspersedIterIndex {
114  Start,
115  Index(usize),
116  // The separator after the index
117  Separator(usize),
118  End,
119}
120
121pub struct BidirectionalInterspersedIter<'a, T> {
122  slice: &'a [T],
123  index: BidirectionalInterspersedIterIndex,
124  separator: &'a T,
125}
126
127impl<'a, T> BidirectionalInterspersedIter<'a, T> {
128  fn new(slice: &'a [T], separator: &'a T) -> Self {
129    Self {
130      slice,
131      index: BidirectionalInterspersedIterIndex::Start,
132      separator,
133    }
134  }
135}
136
137impl BidirectionalInterspersedIterIndex {
138  fn next(&mut self, len: usize) {
139    match self {
140      Self::Start => {
141        *self = if len > 0 { Self::Index(0) } else { Self::Start };
142      }
143      Self::Index(index) => {
144        if *index + 1 < len {
145          // Not last one
146          *self = Self::Separator(*index)
147        } else {
148          *self = Self::End;
149        }
150      }
151      Self::End => {}
152      Self::Separator(index) => *self = Self::Index(*index + 1),
153    }
154  }
155
156  fn prev(&mut self, len: usize) {
157    match self {
158      Self::Start => {}
159      Self::Index(index) => {
160        if *index > 0 {
161          // Not first one
162          *self = Self::Separator(*index - 1)
163        } else {
164          *self = Self::Start;
165        }
166      }
167      Self::End => {
168        if len > 0 {
169          *self = Self::Index(len.saturating_sub(1));
170        } else {
171          *self = Self::Start;
172        }
173      }
174      Self::Separator(index) => *self = Self::Index(*index),
175    }
176  }
177
178  fn get<'a, T>(&self, slice: &'a [T], separator: &'a T) -> Option<&'a T> {
179    match self {
180      Self::Start => None,
181      Self::Index(index) => slice.get(*index),
182      Self::Separator(_) => Some(separator),
183      Self::End => None,
184    }
185  }
186}
187
188impl<'a, T> Iterator for BidirectionalInterspersedIter<'a, T> {
189  type Item = &'a T;
190
191  fn next(&mut self) -> Option<Self::Item> {
192    self.index.next(self.slice.len());
193    self.index.get(self.slice, self.separator)
194  }
195}
196
197impl<T> BidirectionalIterator for BidirectionalInterspersedIter<'_, T> {
198  fn prev(&mut self) -> Option<Self::Item> {
199    self.index.prev(self.slice.len());
200    self.index.get(self.slice, self.separator)
201  }
202}
203
204#[cfg(test)]
205mod iter_tests {
206  use super::{
207    BidirectionalInterspersedIter,
208    BidirectionalIterator,
209  };
210
211  #[test]
212  fn biter_interspersed_roundtrip() {
213    let slice = [1, 2, 3, 4];
214    let separator = 0;
215    let mut biter = BidirectionalInterspersedIter::new(&slice, &separator);
216    assert_eq!(biter.next(), Some(&1));
217    assert_eq!(biter.next(), Some(&0));
218    assert_eq!(biter.next(), Some(&2));
219    assert_eq!(biter.next(), Some(&0));
220    assert_eq!(biter.next(), Some(&3));
221    assert_eq!(biter.next(), Some(&0));
222    assert_eq!(biter.next(), Some(&4));
223    assert_eq!(biter.next(), None);
224    assert_eq!(biter.prev(), Some(&4));
225    assert_eq!(biter.prev(), Some(&0));
226    assert_eq!(biter.prev(), Some(&3));
227    assert_eq!(biter.prev(), Some(&0));
228    assert_eq!(biter.prev(), Some(&2));
229    assert_eq!(biter.prev(), Some(&0));
230    assert_eq!(biter.prev(), Some(&1));
231    assert_eq!(biter.prev(), None);
232  }
233
234  #[test]
235  fn biter_interspersed_two_items() {
236    let slice = [1, 2];
237    let separator = 0;
238    let mut biter = BidirectionalInterspersedIter::new(&slice, &separator);
239    assert_eq!(biter.next(), Some(&1));
240    assert_eq!(biter.next(), Some(&0));
241    assert_eq!(biter.next(), Some(&2));
242    assert_eq!(biter.prev(), Some(&0));
243    assert_eq!(biter.next(), Some(&2));
244    assert_eq!(biter.next(), None);
245    assert_eq!(biter.next(), None);
246    assert_eq!(biter.prev(), Some(&2));
247    assert_eq!(biter.prev(), Some(&0));
248    assert_eq!(biter.prev(), Some(&1));
249    assert_eq!(biter.prev(), None);
250    assert_eq!(biter.prev(), None);
251    assert_eq!(biter.next(), Some(&1));
252    assert_eq!(biter.next(), Some(&0));
253    assert_eq!(biter.prev(), Some(&1));
254  }
255
256  #[test]
257  fn biter_interspersed_single_item() {
258    let slice = [1];
259    let separator = 0;
260    let mut biter = BidirectionalInterspersedIter::new(&slice, &separator);
261    assert_eq!(biter.next(), Some(&1));
262    assert_eq!(biter.next(), None);
263    assert_eq!(biter.next(), None);
264    assert_eq!(biter.prev(), Some(&1));
265    assert_eq!(biter.prev(), None);
266  }
267}
268
269// Original Copyright Notice for the following code:
270
271// Copyright (c) 2024 Pascal Kuthe
272
273// Permission is hereby granted, free of charge, to any
274// person obtaining a copy of this software and associated
275// documentation files (the "Software"), to deal in the
276// Software without restriction, including without
277// limitation the rights to use, copy, modify, merge,
278// publish, distribute, sublicense, and/or sell copies of
279// the Software, and to permit persons to whom the Software
280// is furnished to do so, subject to the following
281// conditions:
282
283// The above copyright notice and this permission notice
284// shall be included in all copies or substantial portions
285// of the Software.
286
287// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF
288// ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
289// TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
290// PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
291// SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
292// CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
293// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
294// IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
295// DEALINGS IN THE SOFTWARE.
296
297#[derive(Debug, Clone, Copy, PartialEq, Eq)]
298enum CursorPosition {
299  ChunkStart,
300  ChunkEnd,
301}
302
303/// Argv joined by a single white space between arguments
304pub struct ArgvCursor<'a, T> {
305  iter: BidirectionalInterspersedIter<'a, T>,
306  current: &'a [u8],
307  position: CursorPosition,
308  len: usize,
309  offset: usize,
310}
311
312pub static SPACE: LazyLock<OutputMsg> = LazyLock::new(|| OutputMsg::Ok(ArcStr::from(" ")));
313
314impl<'a, T> ArgvCursor<'a, T>
315where
316  T: AsRef<str>,
317{
318  pub fn new(slice: &'a [T], separator: &'a T) -> Self {
319    let mut res = Self {
320      iter: BidirectionalInterspersedIter::new(slice, separator),
321      current: &[],
322      position: CursorPosition::ChunkEnd,
323      len: slice.iter().map(|s| s.as_ref().len()).sum::<usize>()
324        + (slice.len().saturating_sub(1)) * separator.as_ref().len(),
325      offset: 0,
326    };
327    res.advance();
328    res
329  }
330}
331
332impl<T> Cursor for ArgvCursor<'_, T>
333where
334  T: AsRef<str>,
335{
336  fn chunk(&self) -> &[u8] {
337    self.current
338  }
339
340  fn advance(&mut self) -> bool {
341    match self.position {
342      CursorPosition::ChunkStart => {
343        self.iter.next();
344        self.position = CursorPosition::ChunkEnd;
345      }
346      CursorPosition::ChunkEnd => (),
347    }
348    for next in self.iter.by_ref() {
349      if next.as_ref().is_empty() {
350        continue;
351      }
352      self.offset += self.current.len();
353      self.current = next.as_ref().as_bytes();
354      return true;
355    }
356    false
357  }
358
359  fn backtrack(&mut self) -> bool {
360    match self.position {
361      CursorPosition::ChunkStart => {}
362      CursorPosition::ChunkEnd => {
363        self.iter.prev();
364        self.position = CursorPosition::ChunkStart;
365      }
366    }
367    while let Some(prev) = self.iter.prev() {
368      if prev.as_ref().is_empty() {
369        continue;
370      }
371      self.offset -= prev.as_ref().len();
372      self.current = prev.as_ref().as_bytes();
373      return true;
374    }
375    false
376  }
377
378  fn total_bytes(&self) -> Option<usize> {
379    Some(self.len)
380  }
381
382  fn offset(&self) -> usize {
383    self.offset
384  }
385}
386
387#[cfg(test)]
388mod cursor_tests {
389  use super::*;
390  use crate::cache::ArcStr;
391
392  #[test]
393  fn smoke_test() {
394    let single = vec![ArcStr::from("abc")];
395    let separator = ArcStr::from(" ");
396    let mut cursor = ArgvCursor::new(single.as_slice(), &separator);
397    assert_eq!(cursor.chunk(), b"abc");
398    assert!(!cursor.advance());
399    assert_eq!(cursor.chunk(), b"abc");
400    assert!(!cursor.backtrack());
401    assert_eq!(cursor.chunk(), b"abc");
402    // 2
403    let two = vec![ArcStr::from("abc"); 2];
404    let mut cursor = ArgvCursor::new(two.as_slice(), &separator);
405    let mut offset = 0;
406    loop {
407      assert_eq!(cursor.offset(), offset);
408      offset += cursor.chunk().len();
409      if !cursor.advance() {
410        break;
411      }
412    }
413    loop {
414      offset -= cursor.chunk().len();
415      assert_eq!(cursor.offset(), offset);
416      if !cursor.backtrack() {
417        break;
418      }
419    }
420    assert_eq!(cursor.offset(), 0);
421    assert_eq!(offset, 0);
422    // multi
423    let multi = vec![ArcStr::from("abc"); 100];
424    let mut cursor = ArgvCursor::new(multi.as_slice(), &separator);
425    let mut offset = 0;
426    loop {
427      assert_eq!(cursor.offset(), offset);
428      offset += cursor.chunk().len();
429      if !cursor.advance() {
430        break;
431      }
432    }
433    loop {
434      offset -= cursor.chunk().len();
435      assert_eq!(cursor.offset(), offset);
436      if !cursor.backtrack() {
437        break;
438      }
439    }
440    assert_eq!(cursor.offset(), 0);
441    assert_eq!(offset, 0);
442  }
443}