pub fn find_best_anchor_sequence(bytes: &[u8], mask: &[bool]) -> Option<Vec<(usize, u8)>> {
let mut best_sequence: Option<Vec<(usize, u8)>> = None;
let mut best_score = u32::MAX;
let mut current_seq = Vec::new();
for (i, &byte) in bytes.iter().enumerate() {
if mask[i] {
current_seq.push((i, byte));
if current_seq.len() >= 2 && current_seq.len() <= 4 {
let score = calculate_sequence_rarity(bytes, mask, ¤t_seq);
if score < best_score {
best_score = score;
best_sequence = Some(current_seq.clone());
}
}
if current_seq.len() == 4 {
current_seq.remove(0);
}
} else {
current_seq.clear();
}
}
best_sequence
}
fn calculate_sequence_rarity(bytes: &[u8], mask: &[bool], sequence: &[(usize, u8)]) -> u32 {
sequence.iter()
.map(|(_, byte)| get_byte_frequency(bytes, mask, *byte))
.sum()
}
fn get_byte_frequency(bytes: &[u8], mask: &[bool], byte: u8) -> u32 {
bytes.iter()
.zip(mask.iter())
.filter(|(_, &m)| m)
.filter(|(&b, _)| b == byte)
.count() as u32
}
pub fn find_rarest_byte_index(bytes: &[u8], mask: &[bool]) -> Option<usize> {
if bytes.is_empty() {
return None;
}
let mut freq = [0u32; 256];
for (i, &byte) in bytes.iter().enumerate() {
if mask[i] {
freq[byte as usize] += 1;
}
}
mask.iter()
.enumerate()
.filter(|(_, &m)| m)
.min_by_key(|(i, _)| freq[bytes[*i] as usize])
.map(|(i, _)| i)
}