use eregex::{MatchStatus, Regex};
struct Segment {
re: Regex,
label: &'static str,
}
struct GapMatch {
reconstructed: String,
segments: Vec<(&'static str, usize, usize)>,
skipped: Vec<(usize, usize)>,
}
fn gap_find(haystack: &str, segments: &[Segment], max_gap: usize) -> Option<GapMatch> {
let mut reconstructed = String::new();
let mut hits = Vec::with_capacity(segments.len());
let mut skipped = Vec::new();
let mut cursor = 0usize;
for (i, seg) in segments.iter().enumerate() {
let m = seg.re.find_at(haystack, cursor)?;
let (s, e) = (m.start(), m.end());
if i > 0 {
let gap = s - cursor;
if gap > max_gap {
return None; }
if gap > 0 {
skipped.push((cursor, s));
}
}
hits.push((seg.label, s, e));
reconstructed.push_str(m.as_str());
cursor = e;
}
Some(GapMatch {
reconstructed,
segments: hits,
skipped,
})
}
fn stm32_segments() -> Vec<Segment> {
vec![
Segment {
re: Regex::new(r"STM32F").unwrap(),
label: "STM32F",
},
Segment {
re: Regex::new(r"[0-9]{3}[A-Z0-9]{4}").unwrap(),
label: "407VGT6",
},
]
}
const FULL: &str = r"(STM32)(F[0-9]{3})([A-Z0-9]{4})";
fn report(name: &str, hay: &str, max_gap: usize) {
println!("\n=== {name} ===");
println!("haystack: {hay:?}");
let full = Regex::new(FULL).unwrap();
println!(
" find : {:?}",
full.find(hay).map(|m| m.as_str().to_string())
);
println!(
" find_partial : {:?}",
match full.find_partial(hay) {
None => "None".to_string(),
Some(p) => format!("{:?} matched={:?}", p.status, p.matched),
}
);
match gap_find(hay, &stm32_segments(), max_gap) {
Some(gm) => {
println!(" gap_find : OK");
println!(" reconstructed = {:?}", gm.reconstructed);
for (label, s, e) in &gm.segments {
println!(" segment {label:<8} = [{s}..{e}] {:?}", &hay[*s..*e]);
}
for (s, e) in &gm.skipped {
println!(" skipped = [{s}..{e}] {:?}", &hay[*s..*e]);
}
let p = full
.find_partial(&gm.reconstructed)
.expect("reconstruction must re-match the full pattern");
assert_eq!(p.status, MatchStatus::Full, "reconstruction must be Full");
println!(
" groups = [STM32={:?}, F407={:?}, VGT6={:?}]",
p.group(1),
p.group(2),
p.group(3),
);
if name.contains("#2") {
let (_, _, seg1_end) = gm.segments[0];
let (_, seg2_start, _) = gm.segments[1];
println!(
" group F407 split = [{}..{}] + [{}..{}] (\"F\" + \"407\")",
seg1_end - 1,
seg1_end,
seg2_start,
seg2_start + 3
);
}
}
None => println!(" gap_find : None (NoMatch)"),
}
}
fn main() {
report(
"#2 split by noise (max_gap=16)",
"Microcontroller STM32F dutyu7 8 407VGT6 ",
16,
);
report(
"#9 gap too long (max_gap=8)",
"Microcontroller STM32F very very very long unrelated text 407VGT6",
8,
);
report(
"#7 wrong char inside series",
"Microcontroller STM32FX07VGT6",
16,
);
report("#8 wrong family", "Microcontroller STM8F407VGT6", 16);
report(
"#1 contiguous full match",
"Microcontroller STM32F407VGT6",
16,
);
println!("\n--- end-of-input partials (find_partial alone, no gap mode) ---");
let full = Regex::new(FULL).unwrap();
for hay in [
"Microcontroller STM32F",
"Microcontroller STM32F40",
"Microcontroller STM32F407VG",
] {
let p = full.find_partial(hay).unwrap();
println!(
" {:<32} -> {:?} matched={:?} g1={:?} g2={:?} g3={:?}",
hay,
p.status,
p.matched,
p.group(1),
p.group(2),
p.group(3)
);
}
println!("\nAll reconstruction checks passed.");
}