use alloc::{collections::VecDeque, rc::Rc, vec::Vec};
use core::marker::PhantomData;
use bitvec::{bitvec, vec::BitVec};
use libafl_bolts::{Error, impl_serdeany};
use serde::{Deserialize, Serialize};
use crate::{
HasMetadata,
inputs::{BytesInput, HasTargetBytes},
stages::{Restartable, Stage},
state::{HasCorpus, HasCurrentTestcase},
};
#[derive(Debug, Default, Serialize, Deserialize, Clone)]
pub struct UnicodeIdentificationMetadata {
ranges: Rc<Vec<(usize, BitVec)>>,
}
impl_serdeany!(UnicodeIdentificationMetadata);
impl UnicodeIdentificationMetadata {
#[must_use]
pub fn ranges(&self) -> &Vec<(usize, BitVec)> {
self.ranges.as_ref()
}
}
pub(crate) fn extract_metadata(bytes: &[u8]) -> UnicodeIdentificationMetadata {
let mut ranges = Vec::new();
if !bytes.is_empty() {
let mut queue = VecDeque::new();
let mut visited = bitvec![0; bytes.len()];
queue.push_back(0);
while let Some(i) = queue.pop_front() {
if i >= bytes.len() || visited[i] {
continue;
}
visited.set(i, true); let s = core::str::from_utf8(&bytes[i..]).unwrap_or_else(|e| {
queue.push_back(i + e.valid_up_to() + 1); core::str::from_utf8(&bytes[i..][..e.valid_up_to()]).unwrap()
});
if !s.is_empty() {
let mut entries = bitvec![0; s.len()];
for (c_idx, _) in s.char_indices() {
entries.set(c_idx, true);
visited.set(i + c_idx, true);
}
for unset in entries.iter_zeros() {
queue.push_back(unset);
}
ranges.push((i, entries));
}
}
}
UnicodeIdentificationMetadata {
ranges: Rc::new(ranges),
}
}
#[derive(Debug)]
pub struct UnicodeIdentificationStage<I, S> {
phantom: PhantomData<(I, S)>,
}
impl<I, S> Default for UnicodeIdentificationStage<I, S> {
fn default() -> Self {
Self::new()
}
}
impl<I, S> UnicodeIdentificationStage<I, S> {
#[must_use]
pub fn new() -> Self {
Self {
phantom: PhantomData,
}
}
fn identify_unicode_in_current_testcase(state: &mut S) -> Result<(), Error>
where
S: HasCurrentTestcase<I>,
I: HasTargetBytes,
{
let mut tc = state.current_testcase_mut()?;
if tc.has_metadata::<UnicodeIdentificationMetadata>() {
return Ok(()); }
let input = tc.load_input(state.corpus())?;
let bytes = input.target_bytes();
let metadata = extract_metadata(&bytes);
tc.add_metadata(metadata);
Ok(())
}
}
impl<E, EM, S, Z> Stage<E, EM, S, Z> for UnicodeIdentificationStage<BytesInput, S>
where
S: HasCorpus<BytesInput> + HasCurrentTestcase<BytesInput>,
{
fn perform(
&mut self,
_fuzzer: &mut Z,
_executor: &mut E,
state: &mut S,
_manager: &mut EM,
) -> Result<(), Error> {
UnicodeIdentificationStage::identify_unicode_in_current_testcase(state)
}
}
impl<S> Restartable<S> for UnicodeIdentificationStage<BytesInput, S> {
#[inline]
fn should_restart(&mut self, _state: &mut S) -> Result<bool, Error> {
Ok(true)
}
#[inline]
fn clear_progress(&mut self, _state: &mut S) -> Result<(), Error> {
Ok(())
}
}