pub const ATOM_SIZE: usize = 4;
pub fn pick_atom_in_literal(lit: &[u8]) -> (usize, usize) {
if lit.len() <= ATOM_SIZE {
return (0, 0);
}
lit.windows(ATOM_SIZE)
.enumerate()
.max_by_key(|(_, s)| atom_rank(s))
.map_or((0, 0), |(i, _)| (i, lit.len() - i - ATOM_SIZE))
}
pub fn atom_quality_from_literal(lit: &[u8]) -> u32 {
if lit.len() <= ATOM_SIZE {
atom_rank(lit)
} else {
lit.windows(ATOM_SIZE).map(atom_rank).max().unwrap_or(0)
}
}
pub fn atoms_rank(literals: &[Vec<u8>]) -> u32 {
literals
.iter()
.map(|lit| {
if lit.len() > 4 {
lit.windows(ATOM_SIZE).map(atom_rank).max().unwrap()
} else {
atom_rank(lit)
}
})
.min()
.unwrap_or(0)
}
fn atom_rank(atom: &[u8]) -> u32 {
let mut quality = 0_u32;
let mut bitmask = [false; 256];
let mut nb_uniq = 0;
for b in atom {
quality += byte_rank(*b);
if !bitmask[*b as usize] {
bitmask[*b as usize] = true;
nb_uniq += 1;
}
}
if nb_uniq == 1 && (bitmask[0] || bitmask[0x20] || bitmask[0xCC] || bitmask[0xFF]) {
quality -= 10 * u32::try_from(atom.len()).unwrap_or(30);
}
else {
quality += 2 * nb_uniq;
}
quality
}
pub fn byte_rank(b: u8) -> u32 {
match b {
0x00 | 0xCC | 0xFF => 10,
v if v.is_ascii_lowercase() => 18,
_ => 20,
}
}