use eregex::{MatchStatus, Regex};
fn re(p: &str) -> Regex {
Regex::new(p).unwrap_or_else(|_| panic!("failed to compile {p:?}"))
}
const PAT: &str = r"(STM32)(F[0-9]{3})([A-Z0-9]{4})";
struct Segment {
re: Regex,
label: &'static str,
}
struct GapMatch {
reconstructed: String,
segments: Vec<(&'static str, usize, usize)>,
skipped: Vec<(usize, usize)>,
}
fn gap_find(haystack: &str, segments: &[Segment], max_gap: usize) -> Option<GapMatch> {
let mut reconstructed = String::new();
let mut hits = Vec::with_capacity(segments.len());
let mut skipped = Vec::new();
let mut cursor = 0usize; for (i, seg) in segments.iter().enumerate() {
let m = seg.re.find_at(haystack, cursor)?;
let (s, e) = (m.start(), m.end());
if i > 0 {
let gap = s - cursor;
if gap > max_gap {
return None;
}
if gap > 0 {
skipped.push((cursor, s));
}
}
hits.push((seg.label, s, e));
reconstructed.push_str(m.as_str());
cursor = e;
}
Some(GapMatch {
reconstructed,
segments: hits,
skipped,
})
}
fn stm32_segments() -> Vec<Segment> {
vec![
Segment {
re: re(r"STM32F"),
label: "STM32F",
},
Segment {
re: re(r"[0-9]{3}[A-Z0-9]{4}"),
label: "407VGT6",
},
]
}
#[test]
fn stm32_contiguous_full_match() {
let r = re(PAT);
let hay = "Microcontroller STM32F407VGT6";
let m = r.find(hay).expect("full match");
assert_eq!(m.as_str(), "STM32F407VGT6");
assert_eq!(m.start(), 16);
assert_eq!(m.group(1), Some("STM32"));
assert_eq!(m.group(2), Some("F407"));
assert_eq!(m.group(3), Some("VGT6"));
let p = r.find_partial(hay).expect("partial-or-full");
assert!(p.is_full());
assert_eq!(p.matched, "STM32F407VGT6");
}
#[test]
fn stm32_noisy_input_strict_regex_no_match() {
let r = re(PAT);
let hay = "Microcontroller STM32F dutyu7 8 407VGT6 ";
assert!(r.find(hay).is_none());
assert!(!r.is_match(hay));
assert!(r.find_partial(hay).is_none());
}
#[test]
fn stm32_partial_after_series_letter() {
let r = re(PAT);
let m = r.find_partial("Microcontroller STM32F").expect("partial");
assert!(m.is_partial());
assert_eq!(m.matched, "STM32F");
assert_eq!(m.start, 16);
assert_eq!(m.end, 22);
assert_eq!(m.group(1), Some("STM32"));
assert!(m.group_matched(1));
assert_eq!(m.group(2), Some("F"));
assert!(m.group_partial(2));
assert!(m.group_none(3));
}
#[test]
fn stm32_partial_inside_series_digits() {
let r = re(PAT);
let m = r.find_partial("Microcontroller STM32F40").expect("partial");
assert!(m.is_partial());
assert_eq!(m.matched, "STM32F40");
assert_eq!(m.group(1), Some("STM32"));
assert!(m.group_matched(1));
assert_eq!(m.group(2), Some("F40"));
assert!(m.group_partial(2));
assert!(m.group_none(3));
}
#[test]
fn stm32_partial_suffix() {
let r = re(PAT);
let m = r
.find_partial("Microcontroller STM32F407VG")
.expect("partial");
assert!(m.is_partial());
assert_eq!(m.matched, "STM32F407VG");
assert_eq!(m.group(1), Some("STM32"));
assert!(m.group_matched(1));
assert_eq!(m.group(2), Some("F407"));
assert!(m.group_matched(2));
assert_eq!(m.group(3), Some("VG"));
assert!(m.group_partial(3));
}
#[test]
fn stm32_wrong_char_inside_series() {
let r = re(PAT);
let hay = "Microcontroller STM32FX07VGT6";
assert!(r.find(hay).is_none());
assert!(r.find_partial(hay).is_none());
}
#[test]
fn stm32_wrong_family_no_match() {
let r = re(PAT);
let hay = "Microcontroller STM8F407VGT6";
assert!(r.find(hay).is_none());
assert!(r.find_partial(hay).is_none());
}
#[test]
fn stm32_split_by_noise_between_f_and_407() {
let hay = "Microcontroller STM32F dutyu7 8 407VGT6 ";
let gm = gap_find(hay, &stm32_segments(), 16).expect("gap match");
assert_eq!(gm.reconstructed, "STM32F407VGT6");
assert_eq!(gm.segments, vec![("STM32F", 16, 22), ("407VGT6", 32, 39)]);
assert_eq!(&hay[16..22], "STM32F");
assert_eq!(&hay[32..39], "407VGT6");
assert_eq!(gm.skipped, vec![(22, 32)]);
assert_eq!(&hay[22..32], " dutyu7 8 ");
let full = re(PAT);
let p = full
.find_partial(&gm.reconstructed)
.expect("reconstruction re-matches");
assert_eq!(p.status, MatchStatus::Full);
assert_eq!(p.group(1), Some("STM32"));
assert_eq!(p.group(2), Some("F407"));
assert_eq!(p.group(3), Some("VGT6"));
let (_, _, seg1_end) = gm.segments[0];
let (_, seg2_start, _) = gm.segments[1];
assert_eq!(&hay[seg1_end - 1..seg1_end], "F");
assert_eq!(&hay[seg2_start..seg2_start + 3], "407");
}
#[test]
fn stm32_gap_too_long_no_match() {
let hay = "Microcontroller STM32F very very very long unrelated text 407VGT6";
assert!(gap_find(hay, &stm32_segments(), 8).is_none());
assert!(gap_find(hay, &stm32_segments(), 64).is_some());
}
#[test]
fn stm32_gap_allowed_between_f_and_digits() {
let hay = "Microcontroller STM32F dutyu7 8 407VGT6";
let gm = gap_find(hay, &stm32_segments(), 16).expect("gap match");
assert_eq!(gm.reconstructed, "STM32F407VGT6");
assert_eq!(gm.segments, vec![("STM32F", 16, 22), ("407VGT6", 32, 39)]);
assert_eq!(gm.skipped, vec![(22, 32)]);
let full = re(PAT);
let p = full
.find_partial(&gm.reconstructed)
.expect("reconstruction re-matches");
assert_eq!(p.status, MatchStatus::Full);
assert_eq!(p.group(1), Some("STM32"));
assert_eq!(p.group(2), Some("F407"));
assert_eq!(p.group(3), Some("VGT6"));
}