use crate::hir::CodepointClass;
#[derive(Debug, Clone)]
pub struct CodepointClassMatcher {
class: CodepointClass,
}
impl CodepointClassMatcher {
pub fn new(class: CodepointClass) -> Self {
Self { class }
}
pub fn is_match(&self, input: &[u8]) -> bool {
self.find(input).is_some()
}
pub fn find(&self, input: &[u8]) -> Option<(usize, usize)> {
let s = std::str::from_utf8(input).ok()?;
for (byte_idx, ch) in s.char_indices() {
let cp = ch as u32;
if self.class.contains(cp) {
return Some((byte_idx, byte_idx + ch.len_utf8()));
}
}
None
}
pub fn captures(&self, input: &[u8]) -> Option<Vec<Option<(usize, usize)>>> {
self.find(input).map(|m| vec![Some(m)])
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_ascii_class() {
let class = CodepointClass::new(vec![(0x61, 0x7A)], false);
let matcher = CodepointClassMatcher::new(class);
assert!(matcher.is_match(b"hello"));
assert!(matcher.is_match(b"123abc"));
assert!(!matcher.is_match(b"123"));
assert_eq!(matcher.find(b"123abc"), Some((3, 4)));
}
#[test]
fn test_greek_class() {
let class = CodepointClass::new(vec![(0x03B1, 0x03C9)], false);
let matcher = CodepointClassMatcher::new(class);
assert!(matcher.is_match("αβγ".as_bytes()));
assert!(matcher.is_match("hello α world".as_bytes()));
assert!(!matcher.is_match(b"hello world"));
let result = matcher.find("hello α".as_bytes());
assert_eq!(result, Some((6, 8)));
}
#[test]
fn test_negated_greek_class() {
let class = CodepointClass::new(vec![(0x03B1, 0x03C9)], true);
let matcher = CodepointClassMatcher::new(class);
assert!(matcher.is_match(b"hello"));
assert!(matcher.is_match("αβγhello".as_bytes()));
assert!(!matcher.is_match("αβγ".as_bytes()));
let result = matcher.find("αβγhello".as_bytes());
assert_eq!(result, Some((6, 7)));
}
#[test]
fn test_empty_input() {
let class = CodepointClass::new(vec![(0x61, 0x7A)], false);
let matcher = CodepointClassMatcher::new(class);
assert!(!matcher.is_match(b""));
assert_eq!(matcher.find(b""), None);
}
#[test]
fn test_invalid_utf8() {
let class = CodepointClass::new(vec![(0x61, 0x7A)], false);
let matcher = CodepointClassMatcher::new(class);
assert!(!matcher.is_match(&[0xFF, 0xFE]));
assert_eq!(matcher.find(&[0xFF, 0xFE]), None);
}
}