feroxfuzz 1.0.0-rc.8

Structure-aware, black box HTTP fuzzing library
Documentation
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};

use super::{set_states_corpus_index, CorpusIndex, Scheduler};
use crate::error::FeroxFuzzError;
use crate::state::SharedState;
use crate::std_ext::ops::Len;
use crate::std_ext::tuple::Named;

use tracing::{error, instrument, trace};

/// Cartesian product of multiple [`Corpus`] entries
///
/// [`Corpus`]: crate::corpora::Corpus
///
/// roughly equivalent to nested for-loops
///
/// # Examples
///
/// if you have a corpus with the following entries:
///
/// Users: ["user1", "user2", "user3"]
/// Passwords: ["pass1", "pass2", "pass3"]
///
/// then the Cartesian product of the two corpora would be:
///
///   user1: pass1
///   user1: pass2
///   user1: pass3
///   user2: pass1
///   user2: pass2
///   user2: pass3
///   user3: pass1
///   user3: pass2
///   user3: pass3
///
#[derive(Clone)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub struct ProductScheduler {
    pub(super) current: usize,
    pub(super) indices: Vec<CorpusIndex>,

    #[cfg_attr(feature = "serde", serde(skip))]
    pub(super) state: SharedState,
}

impl std::fmt::Debug for ProductScheduler {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        f.debug_struct("ProductScheduler")
            .field("current", &self.current)
            .field("indices", &self.indices)
            .finish()
    }
}

impl Scheduler for ProductScheduler {
    #[instrument(skip(self), fields(%self.current, ?self.indices), level = "trace")]
    fn next(&mut self) -> Result<(), FeroxFuzzError> {
        // increment the zeroth index on every call to scheduler.next(); the zeroth
        // index is the innermost loop (since we reversed the vec) and should be
        // incremented every time.
        //
        // raises an error if the zeroth index has reached the total number of
        // expected iterations
        //
        // since we check the length of the incoming corpus iterator in the
        // constructor, we know we have at least one entry in `self.indices`
        let num_indices = self.indices.len();

        // this shouldn't ever error, since we have at least one corpus; at worst, inner and outer
        // are the same corpus
        let outermost = self.indices.last().ok_or_else(|| {
            error!("scheduler has no associated CorpusIndex");
            FeroxFuzzError::EmptyCorpusIndices
        })?;

        if self.current > 0 && outermost.should_reset(self.current) {
            // when the outermost loop reaches the end of its loop, the entire
            // nested loop structure has run to completion
            trace!("scheduler has run to completion");
            return Err(FeroxFuzzError::IterationStopped);
        }

        let innermost = &mut self.indices[0];

        innermost.next()?;

        // if we have a single entry, we're done, just need to set the state's view
        // of the current index to what was calculated here
        if num_indices == 1 {
            set_states_corpus_index(&self.state, innermost.name(), innermost.current())?;

            self.current += 1; // update the total number of times .next has been called

            return Ok(());
        }

        if !innermost.should_reset(self.current) {
            // if the current scheduler.next iteration is not a modulo value
            // for the innermost loop, we don't need to progress any further
            //
            // i.e. if the innermost loop has 3 entries, we need to increment
            // innermost until it reaches 3, then reset it to 0 before moving
            // on to the next outer loop
            set_states_corpus_index(&self.state, innermost.name(), innermost.current())?;

            self.current += 1; // update the total number of times .next has been called

            return Ok(());
        }

        // innermost loop has completed a full iteration, so we need to reset
        innermost.reset();

        set_states_corpus_index(&self.state, innermost.name(), innermost.current())?;

        // the for loop below iterates over an unknown number of corpora lengths
        // and increments each scheduled index if their previous index's modulo
        // value was 0. In the case of the first index, this will always be true
        // since we just determined the innermost loop's modulo value above.

        // The pattern is that when an index reaches its modulo value, it is
        // reset to 0 and the next greater loop is incremented.
        for index in self.indices[1..].iter_mut() {
            // due to len==1 check above, the slice is ok
            index.next()?;

            if !index.should_reset(self.current) {
                // if the current index is not yet at its modulo value,
                // continue to the next iteration, since it's not time to
                // increment any further indices
                //
                // recall that the innermost loop is the first index in the vec
                // and the outermost loop is the last index in the vec. so we
                // only progress to each outer-more loop when the current inner-more
                // loop has completed a full iteration.
                set_states_corpus_index(&self.state, index.name(), index.current())?;
                break;
            }

            index.reset();

            set_states_corpus_index(&self.state, index.name(), index.current())?;
        }

        self.current += 1; // update the total number of times .next has been called

        Ok(())
    }

    /// resets all indexes that are tracked by the scheduler as well as their associated atomic
    /// indexes in the [`SharedState`] instance
    #[instrument(skip(self), level = "trace")]
    fn reset(&mut self) {
        self.current = 0;

        let mut total_iterations = 1;

        self.indices.iter_mut().for_each(|index| {
            // first, we get the corpus associated with the current corpus_index
            let corpus = self.state.corpus_by_name(index.name()).unwrap();

            // and then get its length
            let len = corpus.len();

            // if any items were added to the corpus, we'll need to update the length/expected iterations
            // accordingly
            //
            // note: self.indices is in the same order as what ::new() produced initially, so
            // we can use the same strategy to update the total_iterations here, in the event
            // that we add items to any of the corpora
            total_iterations *= len;

            index.update_length(len);
            index.update_iterations(total_iterations);

            // we'll also reset the current index as well
            index.reset();

            // finally, we get the SharedState's view of the index in sync with the Scheduler's
            //
            // i.e. at this point, the state and local indices should all be 0, and any items
            // added to the corpus should be reflected in each index's length/iterations
            set_states_corpus_index(&self.state, index.name(), 0).unwrap();
        });

        trace!("scheduler has been reset");
    }

    fn update_length(&mut self) {
        let mut total_iterations = 1;

        self.indices.iter_mut().for_each(|index| {
            // first, we get the corpus associated with the current corpus_index
            let corpus = self.state.corpus_by_name(index.name()).unwrap();

            // and then get its length
            let len = corpus.len();
            let difference = len - index.len();

            if difference == 0 {
                // if the length of the corpus hasn't changed, we'll reset the index
                // so that it will receive a full iteration on the next pass through
                // the scheduler
                index.reset();
            }

            // if any items were added to the corpus, we'll need to update the length/expected iterations
            // accordingly
            //
            // note: self.indices is in the same order as what ::new() produced initially, so
            // we can use the same strategy to update the total_iterations here, in the event
            // that we add items to any of the corpora
            total_iterations *= len;

            index.update_length(len);
            index.update_iterations(total_iterations);
        });
    }
}

impl ProductScheduler {
    /// create a new `ProductScheduler` scheduler
    ///
    /// # Errors
    ///
    /// This function will return an error for the following reasons
    /// - if the `corpus_order` parameter is empty
    /// - if any of the corpora in the `corpus_order` parameter is not found in the `SharedState`'s `corpora` map
    /// - if any of the found corpora found in the `SharedState`'s `corpora` map are empty
    ///
    /// # Examples
    ///
    /// see `examples/cartesian-product.rs` for a more robust example
    /// and explanation
    ///
    /// ```
    /// use feroxfuzz::schedulers::{Scheduler, ProductScheduler};
    /// use feroxfuzz::prelude::*;
    /// use feroxfuzz::corpora::{RangeCorpus, Wordlist};
    /// # fn main() -> Result<(), Box<dyn std::error::Error>> {
    /// // create two corpora, one with a set of user names, and one with a range of ids
    /// // where only even ids are considered
    /// let users = Wordlist::new().words(["user", "admin"]).name("users").build();
    /// let ids = RangeCorpus::with_stop(5).name("ids").build()?;
    ///
    /// let state = SharedState::with_corpora([ids, users]);
    ///
    /// let order = ["users", "ids"];
    /// let mut scheduler = ProductScheduler::new(order, state.clone())?;
    ///
    /// let mut counter = 0;
    ///
    /// while Scheduler::next(&mut scheduler).is_ok() {
    ///     counter += 1;
    /// }
    ///
    /// // users.len() * ids.len() = 2 * 5 = 10
    /// assert_eq!(counter, 10);
    ///
    /// # Ok(())
    /// # }
    #[inline]
    #[instrument(skip(state), level = "trace")]
    #[allow(clippy::cast_possible_truncation)]
    #[allow(clippy::cast_sign_loss)]
    pub fn new<'a, I>(corpus_order: I, state: SharedState) -> Result<Self, FeroxFuzzError>
    where
        I: IntoIterator<Item = &'a str> + std::fmt::Debug,
    {
        let corpora = state.corpora();

        let mut current = state
            .stats()
            .read()
            .map_or(0, |stats| stats.requests() as usize);

        let mut lengths = Vec::with_capacity(corpora.len());

        for name in corpus_order {
            let corpus = state.corpus_by_name(name)?;

            let length = corpus.len();

            if length == 0 {
                // one of the corpora was empty
                error!(%name, "corpus is empty");

                return Err(FeroxFuzzError::EmptyCorpus {
                    name: name.to_string(),
                });
            }

            lengths.push((name, length));
        }

        if lengths.is_empty() {
            // empty iterator passed in
            error!("no corpora were found");
            return Err(FeroxFuzzError::EmptyCorpusMap);
        }

        let mut indices = Vec::with_capacity(lengths.len());
        let mut total_iterations = 1;

        for (name, length) in lengths.iter().rev() {
            // iterate over the lengths of the corpora, in reverse, in order to
            // determine the modulo value for each outer loop. the modulo value
            // determines when the index should be reset to 0.
            //
            // when the outermost loop reaches its modulo value, the entire
            // nested loop structure has run to completion
            total_iterations *= *length;
            indices.push(CorpusIndex::new(name, *length, total_iterations));
        }

        let mut scheduler = Self {
            indices,
            state,
            current: 0,
        };

        while current > 0 {
            Scheduler::next(&mut scheduler)?;
            current -= 1;
        }

        Ok(scheduler)
    }
}

#[allow(clippy::copy_iterator)]
impl Iterator for ProductScheduler {
    type Item = ();

    fn next(&mut self) -> Option<Self::Item> {
        Scheduler::next(self).ok()
    }
}

impl Named for ProductScheduler {
    fn name(&self) -> &str {
        "ProductScheduler"
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::atomic_load;
    use crate::corpora::{RangeCorpus, Wordlist};
    use crate::requests::{Request, ShouldFuzz};
    use std::sync::atomic::Ordering;

    /// test that the call to `set_states_corpus_index` behaves as expected
    #[test]
    fn test_corpus_index_is_set_in_state() {
        let users = Wordlist::new()
            .name("users")
            .word("user")
            .word("admin")
            .build();
        let state = SharedState::with_corpus(users);

        set_states_corpus_index(&state, "users", 1).unwrap();
        assert_eq!(
            atomic_load!(state.corpus_index_by_name("users").unwrap()),
            1
        );

        assert!(set_states_corpus_index(&state, "non-existent", 1).is_err());

        set_states_corpus_index(&state, "users", 43278).unwrap();
        assert_eq!(
            atomic_load!(state.corpus_index_by_name("users").unwrap()),
            43278
        );
    }

    /// test that a non-existent corpus name returns an error when passed as the ordering iterable
    #[test]
    fn test_product_with_non_existent_corpus() {
        let users = Wordlist::new()
            .name("users")
            .words(["user", "admin"])
            .build();

        let state = SharedState::with_corpus(users);

        let result = ProductScheduler::new(["derp"], state);

        assert!(result.is_err());
    }

    /// test that an empty corpus returns an error when passed to the constructor
    #[test]
    fn test_product_with_empty_corpus() {
        let range = RangeCorpus::with_stop(0).name("range").build();
        assert!(range.is_err());

        let state = SharedState::with_corpus(
            Wordlist::with_words(Vec::<String>::new())
                .name("empty")
                .build(),
        );

        let result = ProductScheduler::new(["range"], state);
        assert!(result.is_err());
    }

    /// test that an empty set of corpora returns an error when passed to the constructor
    #[test]
    fn test_product_with_empty_corpora() {
        let state = SharedState::with_corpora([]);

        let result = ProductScheduler::new(["users"], state);

        assert!(result.is_err());
    }

    /// test that the iterator works as expected
    #[test]
    fn test_product_iterator() {
        let users = Wordlist::new()
            .words(["user", "admin"])
            .name("users")
            .build();
        let ids = RangeCorpus::with_stop(5).name("ids").build().unwrap();

        let state = SharedState::with_corpora([ids, users]);

        let order = ["users", "ids"];
        let mut scheduler = ProductScheduler::new(order, state).unwrap();

        let mut counter = 0;

        while Scheduler::next(&mut scheduler).is_ok() {
            counter += 1;
        }

        // users.len() * ids.len() = 2 * 5 = 10
        assert_eq!(counter, 10);
    }

    /// test that the iterator works as expected when there is only one corpus
    #[test]
    fn test_product_iterator_with_single_corpus() {
        let users = Wordlist::new()
            .words(["user", "admin"])
            .name("users")
            .build();

        let state = SharedState::with_corpus(users);

        let order = ["users"];
        let mut scheduler = ProductScheduler::new(order, state).unwrap();

        let mut counter = 0;

        while Iterator::next(&mut scheduler).is_some() {
            counter += 1;
        }

        // users.len() == 2
        assert_eq!(counter, 2);

        scheduler.reset();
    }

    /// test that the iterator works as expected when a corpus has an entry added to it
    #[test]
    fn test_product_iterator_with_add_to_corpus() {
        let users = Wordlist::new()
            .words(["user", "admin"])
            .name("users")
            .build();

        let ids = RangeCorpus::with_stop(5).name("ids").build().unwrap();

        let state = SharedState::with_corpora([ids, users]);

        let order = ["users", "ids"];
        let mut scheduler = ProductScheduler::new(order, state.clone()).unwrap();

        let mut counter = 0;

        while Scheduler::next(&mut scheduler).is_ok() {
            counter += 1;
        }

        // users.len() * ids.len() = 2 * 5 = 10
        assert_eq!(counter, 10);

        // request with a fuzzable field, that should propogate to the corpus as an additional entry
        let request = Request::from_url(
            "http://localhost",
            Some(&[ShouldFuzz::RequestBody(b"administrator")]),
        )
        .unwrap();

        state
            .add_request_fields_to_corpus("users", &request)
            .unwrap();

        let users_corpus = state.corpus_by_name("users").unwrap();
        assert_eq!(users_corpus.len(), 3); // new user made it to the corpus

        scheduler.reset();

        counter = 0;

        while Scheduler::next(&mut scheduler).is_ok() {
            counter += 1;
        }

        // users.len() * ids.len() = 3 * 5 = 15
        assert_eq!(counter, 15);

        // try it again with both corpora having an entry added to them

        state.add_request_fields_to_corpus("ids", &request).unwrap();
        state
            .add_request_fields_to_corpus("users", &request)
            .unwrap();

        let ids_corpus = state.corpus_by_name("ids").unwrap();

        assert_eq!(users_corpus.len(), 4);
        assert_eq!(ids_corpus.len(), 6);

        scheduler.reset();

        counter = 0;

        while Scheduler::next(&mut scheduler).is_ok() {
            counter += 1;
        }

        // users.len() * ids.len() = 4 * 6 = 24
        assert_eq!(counter, 24);
    }

    /// test that the iterator works as expected when a corpus has an entry added to it
    ///
    /// differs from teh test above in that we'll use the set of values i used to build
    /// the product scheduler
    ///
    /// given the corpus lengths `[4, 3, 3, 3]`, and a `ProductScheduler`, the
    /// expected iterations would be `[4, 12, 36, 108]`
    #[test]
    fn test_product_iterator_with_add_to_corpus_complex() {
        let outer = RangeCorpus::with_stop(2).name("outer").build().unwrap();
        let first_middle = RangeCorpus::with_stop(1)
            .name("first_middle")
            .build()
            .unwrap();
        let second_middle = RangeCorpus::with_stop(1)
            .name("second_middle")
            .build()
            .unwrap();
        let inner = RangeCorpus::with_stop(1).name("inner").build().unwrap();

        assert_eq!(outer.len(), 2);
        assert_eq!(first_middle.len(), 1);
        assert_eq!(second_middle.len(), 1);
        assert_eq!(inner.len(), 1);

        let state = SharedState::with_corpora([outer, first_middle, second_middle, inner]);

        let order = ["outer", "first_middle", "second_middle", "inner"];
        let mut scheduler = ProductScheduler::new(order, state.clone()).unwrap();

        let mut counter = 0;

        while Scheduler::next(&mut scheduler).is_ok() {
            counter += 1;
        }

        // 2 * 1 * 1 * 1 = 1
        assert_eq!(counter, 2);

        // request with 2 fuzzable fields
        let request = Request::from_url(
            "http://localhost/admin",
            Some(&[
                ShouldFuzz::RequestBody(b"administrator"),
                ShouldFuzz::URLPath,
            ]),
        )
        .unwrap();

        // when added to each corpus their legnth should increase by 2
        for name in order {
            state.add_request_fields_to_corpus(name, &request).unwrap();
        }

        assert_eq!(state.corpus_by_name("outer").unwrap().len(), 4);
        assert_eq!(state.corpus_by_name("first_middle").unwrap().len(), 3);
        assert_eq!(state.corpus_by_name("second_middle").unwrap().len(), 3);
        assert_eq!(state.corpus_by_name("inner").unwrap().len(), 3);

        scheduler.reset();

        counter = 0;

        while Scheduler::next(&mut scheduler).is_ok() {
            counter += 1;
        }

        // 4 * 3 * 3 * 3 = 108
        assert_eq!(counter, 108);
    }
}