pattern_3/omgwtf8/
mod.rs

1use haystack::{Hay, Span};
2use std::ops::Range;
3
4pub mod wtf8;
5pub use self::wtf8::Wtf8;
6
7unsafe impl Hay for Wtf8 {
8    type Index = usize;
9
10    #[inline]
11    fn empty<'a>() -> &'a Self {
12        Wtf8::from_str("")
13    }
14
15    #[inline]
16    fn start_index(&self) -> usize {
17        0
18    }
19
20    #[inline]
21    fn end_index(&self) -> usize {
22        self.len()
23    }
24
25    #[inline]
26    unsafe fn slice_unchecked(&self, range: Range<usize>) -> &Self {
27        &self[range]
28    }
29
30    #[inline]
31    unsafe fn next_index(&self, index: usize) -> usize {
32        let offset = match *self.as_inner().get_unchecked(index) {
33            0x00..=0x7f => 1,
34            0x80..=0xbf => if index == 0 { 3 } else { 2 },
35            0xc0..=0xdf => 2,
36            0xe0..=0xef => 3,
37            0xf0..=0xff => if index + 3 == self.len() { 3 } else { 2 },
38            _ => unreachable!(),
39        };
40        index + offset
41    }
42
43    #[inline]
44    unsafe fn prev_index(&self, index: usize) -> usize {
45        let bytes = self.as_inner();
46        let mut e = index - 1;
47
48        let mut c = *bytes.get_unchecked(e);
49        if c < 0x80 {
50            return e;
51        }
52        e -= 1;
53        c = *bytes.get_unchecked(e);
54        if c >= 0xc0 {
55            return e;
56        }
57        e -= 1;
58        c = *bytes.get_unchecked(e);
59        if c < 0xc0 && e != 0 {
60            e += 1;
61        }
62        e
63    }
64}
65
66#[test]
67fn test_wtf8_next_last_index() {
68    let string = unsafe { Wtf8::from_bytes_unchecked(b"a\xc3\xa9 \xed\xa0\xbd\xf0\x9f\x92\xa9") };
69    unsafe {
70        for w in [0, 1, 3, 4, 7, 9, 11].windows(2) {
71            let i = w[0];
72            let j = w[1];
73            assert_eq!(string.next_index(i), j);
74            assert_eq!(string.prev_index(j), i);
75        }
76    }
77}
78
79impl<'h> Span<&'h Wtf8> {
80    pub fn as_bytes(self) -> Span<&'h [u8]> {
81        let (haystack, range) = self.into_parts();
82        unsafe {
83            Span::from_parts(haystack.as_inner(), range)
84        }
85    }
86}
87
88mod wtf8_pat;