#[derive(Clone, Copy, Debug)]
#[allow(dead_code)]
pub enum Op {
Byte(u8),
ByteRange(u8, u8),
NotByteRanges { count: u8, ranges: [(u8, u8); 4] },
ByteRanges { count: u8, ranges: [(u8, u8); 4] },
ByteClassRef { index: u16, negated: bool },
CpRange(u32, u32),
NotCpRange(u32, u32),
CpClassRef { index: u16, negated: bool },
Any,
Split(u32),
Jump(u32),
Save(u16),
Match,
StartAnchor,
EndAnchor,
WordBoundary,
NotWordBoundary,
Backref(u16),
}
#[inline]
pub fn decode_utf8(bytes: &[u8]) -> Option<(u32, usize)> {
if bytes.is_empty() {
return None;
}
let b0 = bytes[0];
if b0 < 0x80 {
return Some((b0 as u32, 1));
}
if bytes.len() < 2 {
return None;
}
let b1 = bytes[1];
if (b0 & 0xE0) == 0xC0 {
return Some((((b0 as u32 & 0x1F) << 6) | (b1 as u32 & 0x3F), 2));
}
if bytes.len() < 3 {
return None;
}
let b2 = bytes[2];
if (b0 & 0xF0) == 0xE0 {
return Some((
((b0 as u32 & 0x0F) << 12) | ((b1 as u32 & 0x3F) << 6) | (b2 as u32 & 0x3F),
3,
));
}
if bytes.len() < 4 {
return None;
}
let b3 = bytes[3];
if (b0 & 0xF8) == 0xF0 {
return Some((
((b0 as u32 & 0x07) << 18)
| ((b1 as u32 & 0x3F) << 12)
| ((b2 as u32 & 0x3F) << 6)
| (b3 as u32 & 0x3F),
4,
));
}
None
}
#[inline]
pub fn is_word_byte(b: u8) -> bool {
b.is_ascii_alphanumeric() || b == b'_'
}