1use alloc::{
4 borrow::{Cow, ToOwned},
5 vec::Vec,
6};
7use core::{fmt::Debug, marker::PhantomData};
8
9use libafl_bolts::{
10 AsSlice, Named,
11 tuples::{Handle, Handled},
12};
13
14#[cfg(feature = "introspection")]
15use crate::monitors::stats::PerfFeature;
16use crate::{
17 Error, HasMetadata, HasNamedMetadata,
18 corpus::{Corpus, HasCurrentCorpusId},
19 executors::{Executor, HasObservers},
20 feedbacks::map::MapNoveltiesMetadata,
21 inputs::{
22 BytesInput, GeneralizedInputMetadata, GeneralizedItem, HasMutatorBytes, ResizableMutator,
23 },
24 mark_feature_time,
25 observers::{CanTrack, MapObserver, ObserversTuple},
26 require_novelties_tracking,
27 stages::{Restartable, RetryCountRestartHelper, Stage},
28 start_timer,
29 state::{HasCorpus, HasExecutions, MaybeHasClientPerfMonitor},
30};
31
32const MAX_GENERALIZED_LEN: usize = 8192;
33
34const fn increment_by_offset(_list: &[Option<u8>], idx: usize, off: u8) -> usize {
35 idx + 1 + off as usize
36}
37
38fn find_next_char(list: &[Option<u8>], mut idx: usize, ch: u8) -> usize {
39 while idx < list.len() {
40 if list[idx] == Some(ch) {
41 return idx + 1;
42 }
43 idx += 1;
44 }
45 idx
46}
47
48pub static GENERALIZATION_STAGE_NAME: &str = "generalization";
50
51#[derive(Clone, Debug)]
53pub struct GeneralizationStage<C, EM, I, O, OT, S, Z> {
54 name: Cow<'static, str>,
55 map_observer_handle: Handle<C>,
56 phantom: PhantomData<(EM, I, O, OT, S, Z)>,
57}
58
59impl<C, EM, I, O, OT, S, Z> Named for GeneralizationStage<C, EM, I, O, OT, S, Z> {
60 fn name(&self) -> &Cow<'static, str> {
61 &self.name
62 }
63}
64
65impl<C, EM, I, O, OT, S, Z> Restartable<S> for GeneralizationStage<C, EM, I, O, OT, S, Z>
66where
67 S: HasMetadata + HasNamedMetadata + HasCurrentCorpusId,
68{
69 #[inline]
70 fn should_restart(&mut self, state: &mut S) -> Result<bool, Error> {
71 RetryCountRestartHelper::should_restart::<S>(state, &self.name, 3)
73 }
74
75 #[inline]
76 fn clear_progress(&mut self, state: &mut S) -> Result<(), Error> {
77 RetryCountRestartHelper::clear_progress::<S>(state, &self.name)
79 }
80}
81
82impl<C, E, EM, O, S, Z> Stage<E, EM, S, Z>
83 for GeneralizationStage<C, EM, BytesInput, O, E::Observers, S, Z>
84where
85 C: CanTrack + AsRef<O> + Named,
86 E: Executor<EM, BytesInput, S, Z> + HasObservers,
87 E::Observers: ObserversTuple<BytesInput, S>,
88 O: MapObserver,
89 S: HasExecutions
90 + HasMetadata
91 + HasCorpus<BytesInput>
92 + HasNamedMetadata
93 + HasCurrentCorpusId
94 + MaybeHasClientPerfMonitor,
95{
96 #[inline]
97 #[expect(clippy::too_many_lines)]
98 fn perform(
99 &mut self,
100 fuzzer: &mut Z,
101 executor: &mut E,
102 state: &mut S,
103 manager: &mut EM,
104 ) -> Result<(), Error> {
105 let Some(corpus_id) = state.current_corpus_id()? else {
106 return Err(Error::illegal_state(
107 "state is not currently processing a corpus index",
108 ));
109 };
110
111 let (mut payload, original, novelties) = {
112 start_timer!(state);
113 {
114 let corpus = state.corpus();
115 let mut testcase = corpus.get(corpus_id)?.borrow_mut();
116 if testcase.scheduled_count() > 0 {
117 return Ok(());
118 }
119
120 corpus.load_input_into(&mut testcase)?;
121 }
122 mark_feature_time!(state, PerfFeature::GetInputFromCorpus);
123 let mut entry = state.corpus().get(corpus_id)?.borrow_mut();
124 let input = entry.input_mut().as_mut().unwrap();
125
126 let payload: Vec<_> = input.mutator_bytes().iter().map(|&x| Some(x)).collect();
127
128 if payload.len() > MAX_GENERALIZED_LEN {
129 return Ok(());
130 }
131
132 let original = input.clone();
133 let meta = entry.metadata_map().get::<MapNoveltiesMetadata>().ok_or_else(|| {
134 Error::key_not_found(format!(
135 "MapNoveltiesMetadata needed for GeneralizationStage not found in testcase #{corpus_id} (check the arguments of MapFeedback::new(...))"
136 ))
137 })?;
138 if meta.as_slice().is_empty() {
139 return Ok(()); }
141 (payload, original, meta.as_slice().to_vec())
142 };
143
144 if !self.verify_input(fuzzer, executor, state, manager, &novelties, &original)? {
146 return Ok(());
147 }
148
149 self.find_gaps(
150 fuzzer,
151 executor,
152 state,
153 manager,
154 &mut payload,
155 &novelties,
156 increment_by_offset,
157 255,
158 )?;
159 self.find_gaps(
160 fuzzer,
161 executor,
162 state,
163 manager,
164 &mut payload,
165 &novelties,
166 increment_by_offset,
167 127,
168 )?;
169 self.find_gaps(
170 fuzzer,
171 executor,
172 state,
173 manager,
174 &mut payload,
175 &novelties,
176 increment_by_offset,
177 63,
178 )?;
179 self.find_gaps(
180 fuzzer,
181 executor,
182 state,
183 manager,
184 &mut payload,
185 &novelties,
186 increment_by_offset,
187 31,
188 )?;
189 self.find_gaps(
190 fuzzer,
191 executor,
192 state,
193 manager,
194 &mut payload,
195 &novelties,
196 increment_by_offset,
197 0,
198 )?;
199
200 self.find_gaps(
201 fuzzer,
202 executor,
203 state,
204 manager,
205 &mut payload,
206 &novelties,
207 find_next_char,
208 b'.',
209 )?;
210 self.find_gaps(
211 fuzzer,
212 executor,
213 state,
214 manager,
215 &mut payload,
216 &novelties,
217 find_next_char,
218 b';',
219 )?;
220 self.find_gaps(
221 fuzzer,
222 executor,
223 state,
224 manager,
225 &mut payload,
226 &novelties,
227 find_next_char,
228 b',',
229 )?;
230 self.find_gaps(
231 fuzzer,
232 executor,
233 state,
234 manager,
235 &mut payload,
236 &novelties,
237 find_next_char,
238 b'\n',
239 )?;
240 self.find_gaps(
241 fuzzer,
242 executor,
243 state,
244 manager,
245 &mut payload,
246 &novelties,
247 find_next_char,
248 b'\r',
249 )?;
250 self.find_gaps(
251 fuzzer,
252 executor,
253 state,
254 manager,
255 &mut payload,
256 &novelties,
257 find_next_char,
258 b'#',
259 )?;
260 self.find_gaps(
261 fuzzer,
262 executor,
263 state,
264 manager,
265 &mut payload,
266 &novelties,
267 find_next_char,
268 b' ',
269 )?;
270
271 self.find_gaps_in_closures(
272 fuzzer,
273 executor,
274 state,
275 manager,
276 &mut payload,
277 &novelties,
278 b'(',
279 b')',
280 )?;
281 self.find_gaps_in_closures(
282 fuzzer,
283 executor,
284 state,
285 manager,
286 &mut payload,
287 &novelties,
288 b'[',
289 b']',
290 )?;
291 self.find_gaps_in_closures(
292 fuzzer,
293 executor,
294 state,
295 manager,
296 &mut payload,
297 &novelties,
298 b'{',
299 b'}',
300 )?;
301 self.find_gaps_in_closures(
302 fuzzer,
303 executor,
304 state,
305 manager,
306 &mut payload,
307 &novelties,
308 b'<',
309 b'>',
310 )?;
311 self.find_gaps_in_closures(
312 fuzzer,
313 executor,
314 state,
315 manager,
316 &mut payload,
317 &novelties,
318 b'\'',
319 b'\'',
320 )?;
321 self.find_gaps_in_closures(
322 fuzzer,
323 executor,
324 state,
325 manager,
326 &mut payload,
327 &novelties,
328 b'"',
329 b'"',
330 )?;
331
332 {
334 let meta = GeneralizedInputMetadata::generalized_from_options(&payload);
335
336 assert!(meta.generalized().first() == Some(&GeneralizedItem::Gap));
337 assert!(meta.generalized().last() == Some(&GeneralizedItem::Gap));
338
339 let mut entry = state.corpus().get(corpus_id)?.borrow_mut();
340 entry.metadata_map_mut().insert(meta);
341 }
342
343 Ok(())
344 }
345}
346
347impl<C, EM, O, OT, S, Z> GeneralizationStage<C, EM, BytesInput, O, OT, S, Z>
348where
349 O: MapObserver,
350 C: CanTrack + AsRef<O> + Named,
351 S: HasExecutions + HasMetadata + HasCorpus<BytesInput> + MaybeHasClientPerfMonitor,
352 OT: ObserversTuple<BytesInput, S>,
353{
354 #[must_use]
356 pub fn new(map_observer: &C) -> Self {
357 require_novelties_tracking!("GeneralizationStage", C);
358 let name = map_observer.name().clone();
359 Self {
360 name: Cow::Owned(
361 GENERALIZATION_STAGE_NAME.to_owned() + ":" + name.into_owned().as_str(),
362 ),
363 map_observer_handle: map_observer.handle(),
364 phantom: PhantomData,
365 }
366 }
367
368 fn verify_input<E>(
369 &self,
370 fuzzer: &mut Z,
371 executor: &mut E,
372 state: &mut S,
373 manager: &mut EM,
374 novelties: &[usize],
375 input: &BytesInput,
376 ) -> Result<bool, Error>
377 where
378 E: Executor<EM, BytesInput, S, Z> + HasObservers,
379 E::Observers: ObserversTuple<BytesInput, S>,
380 {
381 start_timer!(state);
382 executor.observers_mut().pre_exec_all(state, input)?;
383 mark_feature_time!(state, PerfFeature::PreExecObservers);
384
385 start_timer!(state);
386 let exit_kind = executor.run_target(fuzzer, state, manager, input)?;
387 mark_feature_time!(state, PerfFeature::TargetExecution);
388
389 start_timer!(state);
390 executor
391 .observers_mut()
392 .post_exec_all(state, input, &exit_kind)?;
393 mark_feature_time!(state, PerfFeature::PostExecObservers);
394
395 let cnt = executor.observers()[&self.map_observer_handle]
396 .as_ref()
397 .how_many_set(novelties);
398
399 Ok(cnt == novelties.len())
400 }
401
402 fn trim_payload(payload: &mut Vec<Option<u8>>) {
403 let mut previous = false;
404 payload.retain(|&x| !(x.is_none() & core::mem::replace(&mut previous, x.is_none())));
405 }
406
407 #[expect(clippy::too_many_arguments)]
408 fn find_gaps<E>(
409 &self,
410 fuzzer: &mut Z,
411 executor: &mut E,
412 state: &mut S,
413 manager: &mut EM,
414 payload: &mut Vec<Option<u8>>,
415 novelties: &[usize],
416 find_next_index: fn(&[Option<u8>], usize, u8) -> usize,
417 split_char: u8,
418 ) -> Result<(), Error>
419 where
420 E: Executor<EM, BytesInput, S, Z> + HasObservers<Observers = OT>,
421 {
422 let mut start = 0;
423 while start < payload.len() {
424 let mut end = find_next_index(payload, start, split_char);
425 if end > payload.len() {
426 end = payload.len();
427 }
428 let mut candidate = BytesInput::new(vec![]);
429 candidate.extend(payload[..start].iter().flatten());
430 candidate.extend(payload[end..].iter().flatten());
431
432 if self.verify_input(fuzzer, executor, state, manager, novelties, &candidate)? {
433 for item in &mut payload[start..end] {
434 *item = None;
435 }
436 }
437
438 start = end;
439 }
440
441 Self::trim_payload(payload);
442 Ok(())
443 }
444
445 #[expect(clippy::too_many_arguments)]
446 fn find_gaps_in_closures<E>(
447 &self,
448 fuzzer: &mut Z,
449 executor: &mut E,
450 state: &mut S,
451 manager: &mut EM,
452 payload: &mut Vec<Option<u8>>,
453 novelties: &[usize],
454 opening_char: u8,
455 closing_char: u8,
456 ) -> Result<(), Error>
457 where
458 E: Executor<EM, BytesInput, S, Z> + HasObservers<Observers = OT>,
459 {
460 let mut index = 0;
461 while index < payload.len() {
462 while index < payload.len() {
464 if payload[index] == Some(opening_char) {
465 break;
466 }
467 index += 1;
468 }
469 let mut start = index;
470 let mut end = payload.len() - 1;
471 let mut endings = 0;
472 while end > start {
474 if payload[end] == Some(closing_char) {
475 endings += 1;
476 let mut candidate = BytesInput::new(vec![]);
477 candidate.extend(payload[..start].iter().flatten());
478 candidate.extend(payload[end..].iter().flatten());
479
480 if self.verify_input(fuzzer, executor, state, manager, novelties, &candidate)? {
481 for item in &mut payload[start..end] {
482 *item = None;
483 }
484 }
485 start = end;
486 }
487 end -= 1;
488 index += 1;
489 }
490
491 if endings == 0 {
492 break;
493 }
494 }
495
496 Self::trim_payload(payload);
497 Ok(())
498 }
499}