libafl/stages/
generalization.rs

1//! The tracing stage can trace the target and enrich a [`crate::corpus::Testcase`] with metadata, for example for `CmpLog`.
2
3use alloc::{
4    borrow::{Cow, ToOwned},
5    vec::Vec,
6};
7use core::{fmt::Debug, marker::PhantomData};
8
9use libafl_bolts::{
10    AsSlice, Named,
11    tuples::{Handle, Handled},
12};
13
14#[cfg(feature = "introspection")]
15use crate::monitors::stats::PerfFeature;
16use crate::{
17    Error, HasMetadata, HasNamedMetadata,
18    corpus::{Corpus, HasCurrentCorpusId},
19    executors::{Executor, HasObservers},
20    feedbacks::map::MapNoveltiesMetadata,
21    inputs::{
22        BytesInput, GeneralizedInputMetadata, GeneralizedItem, HasMutatorBytes, ResizableMutator,
23    },
24    mark_feature_time,
25    observers::{CanTrack, MapObserver, ObserversTuple},
26    require_novelties_tracking,
27    stages::{Restartable, RetryCountRestartHelper, Stage},
28    start_timer,
29    state::{HasCorpus, HasExecutions, MaybeHasClientPerfMonitor},
30};
31
32const MAX_GENERALIZED_LEN: usize = 8192;
33
34const fn increment_by_offset(_list: &[Option<u8>], idx: usize, off: u8) -> usize {
35    idx + 1 + off as usize
36}
37
38fn find_next_char(list: &[Option<u8>], mut idx: usize, ch: u8) -> usize {
39    while idx < list.len() {
40        if list[idx] == Some(ch) {
41            return idx + 1;
42        }
43        idx += 1;
44    }
45    idx
46}
47
48/// The name for generalization stage
49pub static GENERALIZATION_STAGE_NAME: &str = "generalization";
50
51/// A stage that runs a tracer executor
52#[derive(Clone, Debug)]
53pub struct GeneralizationStage<C, EM, I, O, OT, S, Z> {
54    name: Cow<'static, str>,
55    map_observer_handle: Handle<C>,
56    phantom: PhantomData<(EM, I, O, OT, S, Z)>,
57}
58
59impl<C, EM, I, O, OT, S, Z> Named for GeneralizationStage<C, EM, I, O, OT, S, Z> {
60    fn name(&self) -> &Cow<'static, str> {
61        &self.name
62    }
63}
64
65impl<C, EM, I, O, OT, S, Z> Restartable<S> for GeneralizationStage<C, EM, I, O, OT, S, Z>
66where
67    S: HasMetadata + HasNamedMetadata + HasCurrentCorpusId,
68{
69    #[inline]
70    fn should_restart(&mut self, state: &mut S) -> Result<bool, Error> {
71        // TODO: We need to be able to resume better if something crashes or times out
72        RetryCountRestartHelper::should_restart::<S>(state, &self.name, 3)
73    }
74
75    #[inline]
76    fn clear_progress(&mut self, state: &mut S) -> Result<(), Error> {
77        // TODO: We need to be able to resume better if something crashes or times out
78        RetryCountRestartHelper::clear_progress::<S>(state, &self.name)
79    }
80}
81
82impl<C, E, EM, O, S, Z> Stage<E, EM, S, Z>
83    for GeneralizationStage<C, EM, BytesInput, O, E::Observers, S, Z>
84where
85    C: CanTrack + AsRef<O> + Named,
86    E: Executor<EM, BytesInput, S, Z> + HasObservers,
87    E::Observers: ObserversTuple<BytesInput, S>,
88    O: MapObserver,
89    S: HasExecutions
90        + HasMetadata
91        + HasCorpus<BytesInput>
92        + HasNamedMetadata
93        + HasCurrentCorpusId
94        + MaybeHasClientPerfMonitor,
95{
96    #[inline]
97    #[expect(clippy::too_many_lines)]
98    fn perform(
99        &mut self,
100        fuzzer: &mut Z,
101        executor: &mut E,
102        state: &mut S,
103        manager: &mut EM,
104    ) -> Result<(), Error> {
105        let Some(corpus_id) = state.current_corpus_id()? else {
106            return Err(Error::illegal_state(
107                "state is not currently processing a corpus index",
108            ));
109        };
110
111        let (mut payload, original, novelties) = {
112            start_timer!(state);
113            {
114                let corpus = state.corpus();
115                let mut testcase = corpus.get(corpus_id)?.borrow_mut();
116                if testcase.scheduled_count() > 0 {
117                    return Ok(());
118                }
119
120                corpus.load_input_into(&mut testcase)?;
121            }
122            mark_feature_time!(state, PerfFeature::GetInputFromCorpus);
123            let mut entry = state.corpus().get(corpus_id)?.borrow_mut();
124            let input = entry.input_mut().as_mut().unwrap();
125
126            let payload: Vec<_> = input.mutator_bytes().iter().map(|&x| Some(x)).collect();
127
128            if payload.len() > MAX_GENERALIZED_LEN {
129                return Ok(());
130            }
131
132            let original = input.clone();
133            let meta = entry.metadata_map().get::<MapNoveltiesMetadata>().ok_or_else(|| {
134                    Error::key_not_found(format!(
135                        "MapNoveltiesMetadata needed for GeneralizationStage not found in testcase #{corpus_id} (check the arguments of MapFeedback::new(...))"
136                    ))
137                })?;
138            if meta.as_slice().is_empty() {
139                return Ok(()); // don't generalise inputs which don't have novelties
140            }
141            (payload, original, meta.as_slice().to_vec())
142        };
143
144        // Do not generalized unstable inputs
145        if !self.verify_input(fuzzer, executor, state, manager, &novelties, &original)? {
146            return Ok(());
147        }
148
149        self.find_gaps(
150            fuzzer,
151            executor,
152            state,
153            manager,
154            &mut payload,
155            &novelties,
156            increment_by_offset,
157            255,
158        )?;
159        self.find_gaps(
160            fuzzer,
161            executor,
162            state,
163            manager,
164            &mut payload,
165            &novelties,
166            increment_by_offset,
167            127,
168        )?;
169        self.find_gaps(
170            fuzzer,
171            executor,
172            state,
173            manager,
174            &mut payload,
175            &novelties,
176            increment_by_offset,
177            63,
178        )?;
179        self.find_gaps(
180            fuzzer,
181            executor,
182            state,
183            manager,
184            &mut payload,
185            &novelties,
186            increment_by_offset,
187            31,
188        )?;
189        self.find_gaps(
190            fuzzer,
191            executor,
192            state,
193            manager,
194            &mut payload,
195            &novelties,
196            increment_by_offset,
197            0,
198        )?;
199
200        self.find_gaps(
201            fuzzer,
202            executor,
203            state,
204            manager,
205            &mut payload,
206            &novelties,
207            find_next_char,
208            b'.',
209        )?;
210        self.find_gaps(
211            fuzzer,
212            executor,
213            state,
214            manager,
215            &mut payload,
216            &novelties,
217            find_next_char,
218            b';',
219        )?;
220        self.find_gaps(
221            fuzzer,
222            executor,
223            state,
224            manager,
225            &mut payload,
226            &novelties,
227            find_next_char,
228            b',',
229        )?;
230        self.find_gaps(
231            fuzzer,
232            executor,
233            state,
234            manager,
235            &mut payload,
236            &novelties,
237            find_next_char,
238            b'\n',
239        )?;
240        self.find_gaps(
241            fuzzer,
242            executor,
243            state,
244            manager,
245            &mut payload,
246            &novelties,
247            find_next_char,
248            b'\r',
249        )?;
250        self.find_gaps(
251            fuzzer,
252            executor,
253            state,
254            manager,
255            &mut payload,
256            &novelties,
257            find_next_char,
258            b'#',
259        )?;
260        self.find_gaps(
261            fuzzer,
262            executor,
263            state,
264            manager,
265            &mut payload,
266            &novelties,
267            find_next_char,
268            b' ',
269        )?;
270
271        self.find_gaps_in_closures(
272            fuzzer,
273            executor,
274            state,
275            manager,
276            &mut payload,
277            &novelties,
278            b'(',
279            b')',
280        )?;
281        self.find_gaps_in_closures(
282            fuzzer,
283            executor,
284            state,
285            manager,
286            &mut payload,
287            &novelties,
288            b'[',
289            b']',
290        )?;
291        self.find_gaps_in_closures(
292            fuzzer,
293            executor,
294            state,
295            manager,
296            &mut payload,
297            &novelties,
298            b'{',
299            b'}',
300        )?;
301        self.find_gaps_in_closures(
302            fuzzer,
303            executor,
304            state,
305            manager,
306            &mut payload,
307            &novelties,
308            b'<',
309            b'>',
310        )?;
311        self.find_gaps_in_closures(
312            fuzzer,
313            executor,
314            state,
315            manager,
316            &mut payload,
317            &novelties,
318            b'\'',
319            b'\'',
320        )?;
321        self.find_gaps_in_closures(
322            fuzzer,
323            executor,
324            state,
325            manager,
326            &mut payload,
327            &novelties,
328            b'"',
329            b'"',
330        )?;
331
332        // Save the modified input in the corpus
333        {
334            let meta = GeneralizedInputMetadata::generalized_from_options(&payload);
335
336            assert!(meta.generalized().first() == Some(&GeneralizedItem::Gap));
337            assert!(meta.generalized().last() == Some(&GeneralizedItem::Gap));
338
339            let mut entry = state.corpus().get(corpus_id)?.borrow_mut();
340            entry.metadata_map_mut().insert(meta);
341        }
342
343        Ok(())
344    }
345}
346
347impl<C, EM, O, OT, S, Z> GeneralizationStage<C, EM, BytesInput, O, OT, S, Z>
348where
349    O: MapObserver,
350    C: CanTrack + AsRef<O> + Named,
351    S: HasExecutions + HasMetadata + HasCorpus<BytesInput> + MaybeHasClientPerfMonitor,
352    OT: ObserversTuple<BytesInput, S>,
353{
354    /// Create a new [`GeneralizationStage`].
355    #[must_use]
356    pub fn new(map_observer: &C) -> Self {
357        require_novelties_tracking!("GeneralizationStage", C);
358        let name = map_observer.name().clone();
359        Self {
360            name: Cow::Owned(
361                GENERALIZATION_STAGE_NAME.to_owned() + ":" + name.into_owned().as_str(),
362            ),
363            map_observer_handle: map_observer.handle(),
364            phantom: PhantomData,
365        }
366    }
367
368    fn verify_input<E>(
369        &self,
370        fuzzer: &mut Z,
371        executor: &mut E,
372        state: &mut S,
373        manager: &mut EM,
374        novelties: &[usize],
375        input: &BytesInput,
376    ) -> Result<bool, Error>
377    where
378        E: Executor<EM, BytesInput, S, Z> + HasObservers,
379        E::Observers: ObserversTuple<BytesInput, S>,
380    {
381        start_timer!(state);
382        executor.observers_mut().pre_exec_all(state, input)?;
383        mark_feature_time!(state, PerfFeature::PreExecObservers);
384
385        start_timer!(state);
386        let exit_kind = executor.run_target(fuzzer, state, manager, input)?;
387        mark_feature_time!(state, PerfFeature::TargetExecution);
388
389        start_timer!(state);
390        executor
391            .observers_mut()
392            .post_exec_all(state, input, &exit_kind)?;
393        mark_feature_time!(state, PerfFeature::PostExecObservers);
394
395        let cnt = executor.observers()[&self.map_observer_handle]
396            .as_ref()
397            .how_many_set(novelties);
398
399        Ok(cnt == novelties.len())
400    }
401
402    fn trim_payload(payload: &mut Vec<Option<u8>>) {
403        let mut previous = false;
404        payload.retain(|&x| !(x.is_none() & core::mem::replace(&mut previous, x.is_none())));
405    }
406
407    #[expect(clippy::too_many_arguments)]
408    fn find_gaps<E>(
409        &self,
410        fuzzer: &mut Z,
411        executor: &mut E,
412        state: &mut S,
413        manager: &mut EM,
414        payload: &mut Vec<Option<u8>>,
415        novelties: &[usize],
416        find_next_index: fn(&[Option<u8>], usize, u8) -> usize,
417        split_char: u8,
418    ) -> Result<(), Error>
419    where
420        E: Executor<EM, BytesInput, S, Z> + HasObservers<Observers = OT>,
421    {
422        let mut start = 0;
423        while start < payload.len() {
424            let mut end = find_next_index(payload, start, split_char);
425            if end > payload.len() {
426                end = payload.len();
427            }
428            let mut candidate = BytesInput::new(vec![]);
429            candidate.extend(payload[..start].iter().flatten());
430            candidate.extend(payload[end..].iter().flatten());
431
432            if self.verify_input(fuzzer, executor, state, manager, novelties, &candidate)? {
433                for item in &mut payload[start..end] {
434                    *item = None;
435                }
436            }
437
438            start = end;
439        }
440
441        Self::trim_payload(payload);
442        Ok(())
443    }
444
445    #[expect(clippy::too_many_arguments)]
446    fn find_gaps_in_closures<E>(
447        &self,
448        fuzzer: &mut Z,
449        executor: &mut E,
450        state: &mut S,
451        manager: &mut EM,
452        payload: &mut Vec<Option<u8>>,
453        novelties: &[usize],
454        opening_char: u8,
455        closing_char: u8,
456    ) -> Result<(), Error>
457    where
458        E: Executor<EM, BytesInput, S, Z> + HasObservers<Observers = OT>,
459    {
460        let mut index = 0;
461        while index < payload.len() {
462            // Find start index
463            while index < payload.len() {
464                if payload[index] == Some(opening_char) {
465                    break;
466                }
467                index += 1;
468            }
469            let mut start = index;
470            let mut end = payload.len() - 1;
471            let mut endings = 0;
472            // Process every ending
473            while end > start {
474                if payload[end] == Some(closing_char) {
475                    endings += 1;
476                    let mut candidate = BytesInput::new(vec![]);
477                    candidate.extend(payload[..start].iter().flatten());
478                    candidate.extend(payload[end..].iter().flatten());
479
480                    if self.verify_input(fuzzer, executor, state, manager, novelties, &candidate)? {
481                        for item in &mut payload[start..end] {
482                            *item = None;
483                        }
484                    }
485                    start = end;
486                }
487                end -= 1;
488                index += 1;
489            }
490
491            if endings == 0 {
492                break;
493            }
494        }
495
496        Self::trim_payload(payload);
497        Ok(())
498    }
499}