use vyre_foundation::match_result::Match;
use vyre_primitives::matching::region::{dedup_regions_inplace, RegionTriple};
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum PostProcessError {
InvalidRange {
pattern_id: u32,
start: u32,
end: u32,
haystack_len: usize,
},
}
impl std::fmt::Display for PostProcessError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match *self {
Self::InvalidRange {
pattern_id,
start,
end,
haystack_len,
} => write!(
f,
"match range is outside the scanned haystack: pattern_id={pattern_id}, start={start}, end={end}, haystack_len={haystack_len}. Fix: preserve matcher readback bounds and reject corrupt hit triples before scoring."
),
}
}
}
impl std::error::Error for PostProcessError {}
#[derive(Debug, Clone, Copy, PartialEq)]
pub struct PostProcessedMatch {
pub pattern_id: u32,
pub start: u32,
pub end: u32,
pub entropy_bits_per_byte: f32,
pub confidence: f32,
}
#[cfg(any(test, feature = "cpu-parity"))]
pub fn try_reference_post_process(
matches: &[Match],
haystack: &[u8],
) -> Result<Vec<PostProcessedMatch>, PostProcessError> {
let mut triples = Vec::new();
let mut out = Vec::new();
try_reference_post_process_into(matches, haystack, &mut triples, &mut out)?;
Ok(out)
}
#[cfg(any(test, feature = "cpu-parity"))]
pub fn try_reference_post_process_into(
matches: &[Match],
haystack: &[u8],
triples: &mut Vec<RegionTriple>,
out: &mut Vec<PostProcessedMatch>,
) -> Result<(), PostProcessError> {
triples.clear();
out.clear();
if matches.is_empty() {
return Ok(());
}
triples.reserve(matches.len());
triples.extend(
matches
.iter()
.map(|m| RegionTriple::new(m.pattern_id, m.start, m.end)),
);
dedup_regions_inplace(triples);
out.reserve(triples.len());
for &t in triples.iter() {
let s = t.start as usize;
let e = t.end as usize;
if e > haystack.len() || s > e {
out.clear();
return Err(PostProcessError::InvalidRange {
pattern_id: t.pid,
start: t.start,
end: t.end,
haystack_len: haystack.len(),
});
}
let bytes = &haystack[s..e];
let entropy = shannon_entropy_bits_per_byte(bytes);
let len_score = (bytes.len() as f32 / 16.0).min(1.0);
let entropy_score = entropy / 8.0;
let confidence = (len_score * entropy_score).clamp(0.0, 1.0);
out.push(PostProcessedMatch {
pattern_id: t.pid,
start: t.start,
end: t.end,
entropy_bits_per_byte: entropy,
confidence,
});
}
Ok(())
}
#[must_use]
#[cfg(any(test, feature = "cpu-parity"))]
pub fn reference_post_process(matches: &[Match], haystack: &[u8]) -> Vec<PostProcessedMatch> {
try_reference_post_process(matches, haystack).unwrap_or_else(|error| {
panic!("vyre-libs scan Reference oracle post-process contract failed: {error}")
})
}
#[must_use]
#[cfg(any(test, feature = "cpu-parity"))]
pub fn shannon_entropy_bits_per_byte(bytes: &[u8]) -> f32 {
if bytes.is_empty() {
return 0.0;
}
let counts = vyre_primitives::text::byte_histogram::reference_byte_histogram(bytes);
let n = bytes.len() as f32;
let mut h = 0.0_f32;
for c in counts {
if c == 0 {
continue;
}
let p = c as f32 / n;
h -= p * p.log2();
}
h
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn into_reuses_scratch_and_matches_allocating_api() {
let haystack = b"AKIA1234567890ZZ";
let matches = [
Match::new(7, 0, 8),
Match::new(7, 0, 8),
Match::new(8, 4, 12),
];
let expected = try_reference_post_process(&matches, haystack).unwrap();
let mut triples = Vec::with_capacity(16);
let triples_ptr = triples.as_ptr();
let mut out = Vec::with_capacity(16);
let out_ptr = out.as_ptr();
try_reference_post_process_into(&matches, haystack, &mut triples, &mut out).unwrap();
assert_eq!(out, expected);
assert_eq!(triples.as_ptr(), triples_ptr);
assert_eq!(out.as_ptr(), out_ptr);
}
#[test]
fn into_clears_outputs_on_empty_input() {
let mut triples = vec![RegionTriple::new(1, 0, 1)];
let mut out = vec![PostProcessedMatch {
pattern_id: 1,
start: 0,
end: 1,
entropy_bits_per_byte: 0.0,
confidence: 0.0,
}];
try_reference_post_process_into(&[], b"", &mut triples, &mut out).unwrap();
assert!(triples.is_empty());
assert!(out.is_empty());
}
#[test]
fn into_reports_invalid_ranges_without_partial_output() {
let mut triples = Vec::new();
let mut out = Vec::new();
let err = try_reference_post_process_into(
&[Match::new(1, 10, 12)],
b"short",
&mut triples,
&mut out,
)
.unwrap_err();
assert_eq!(
err,
PostProcessError::InvalidRange {
pattern_id: 1,
start: 10,
end: 12,
haystack_len: 5,
}
);
assert!(out.is_empty());
}
#[test]
#[should_panic(expected = "post-process contract failed")]
fn infallible_wrapper_panics_on_corrupt_ranges() {
let _ = reference_post_process(&[Match::new(1, 10, 12)], b"short");
}
}