use std::collections::HashSet;
use std::fmt::{self, Debug, Display, Formatter};
use std::fs::File;
use std::io::{BufRead, BufReader};
use std::marker::PhantomData;
use std::ops::{Index, IndexMut};
use std::path::Path;
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};
use tracing::{error, instrument};
use super::{Corpus, CorpusType, Named};
use crate::corpora::typestate::{
CorpusBuildState, HasItems, HasName, NoItems, NoName, NotUnique, Unique,
};
use crate::error::FeroxFuzzError;
use crate::input::Data;
use crate::std_ext::convert::AsInner;
use crate::std_ext::fmt::DisplayExt;
use crate::std_ext::ops::Len;
#[derive(Clone, Default, Debug, PartialEq, Eq)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub struct Wordlist {
items: Vec<Data>,
unique_items: Option<HashSet<Data>>,
corpus_name: String,
}
impl<'i> IntoIterator for &'i Wordlist {
type Item = <&'i [Data] as IntoIterator>::Item;
type IntoIter = <&'i [Data] as IntoIterator>::IntoIter;
#[inline]
fn into_iter(self) -> Self::IntoIter {
self.items.iter()
}
}
impl<'i> IntoIterator for &'i mut Wordlist {
type Item = <&'i mut [Data] as IntoIterator>::Item;
type IntoIter = <&'i mut [Data] as IntoIterator>::IntoIter;
#[inline]
fn into_iter(self) -> Self::IntoIter {
self.items.iter_mut()
}
}
impl IntoIterator for Wordlist {
type Item = <Vec<Data> as IntoIterator>::Item;
type IntoIter = <Vec<Data> as IntoIterator>::IntoIter;
#[inline]
fn into_iter(self) -> Self::IntoIter {
self.items.into_iter()
}
}
impl Wordlist {
#[must_use]
#[allow(clippy::new_ret_no_self)]
pub const fn new() -> WordlistBuilder<NoItems, NoName, NotUnique> {
WordlistBuilder {
items: Vec::new(),
corpus_name: None,
unique_items: None,
_item_state: PhantomData,
_name_state: PhantomData,
_unique_state: PhantomData,
}
}
#[inline]
pub fn with_words<I, T>(words: I) -> WordlistBuilder<HasItems, NoName, NotUnique>
where
Data: From<T>,
I: IntoIterator<Item = T>,
{
WordlistBuilder {
items: words.into_iter().map(Data::from).collect(),
corpus_name: None,
unique_items: None,
_item_state: PhantomData,
_name_state: PhantomData,
_unique_state: PhantomData,
}
}
#[instrument(skip_all, level = "trace")]
pub fn from_file<P>(
file_path: P,
) -> Result<WordlistBuilder<HasItems, NoName, NotUnique>, FeroxFuzzError>
where
P: AsRef<Path>,
Self: Corpus,
{
let file = File::open(&file_path).map_err(|source| {
error!(
file = file_path.as_ref().to_string_lossy().to_string(),
"could not open file while populating the corpus: {}", source
);
FeroxFuzzError::CorpusFileOpenError {
source,
path: file_path.as_ref().to_string_lossy().to_string(),
}
})?;
let reader = BufReader::new(file);
let mut items = Vec::new();
for line in reader.lines().map_while(Result::ok) {
if line.is_empty() || line.starts_with('#') {
continue;
}
items.push(line.into());
}
Ok(WordlistBuilder {
items,
unique_items: None,
corpus_name: None,
_item_state: PhantomData,
_name_state: PhantomData,
_unique_state: PhantomData,
})
}
#[inline]
pub fn items_mut(&mut self) -> &mut [Data] {
&mut self.items
}
#[must_use]
pub fn iter_mut(&mut self) -> <&mut [Data] as IntoIterator>::IntoIter {
<&mut Self as IntoIterator>::into_iter(self)
}
#[must_use]
pub fn iter(&self) -> <&[Data] as IntoIterator>::IntoIter {
<&Self as IntoIterator>::into_iter(self)
}
}
impl AsInner for Wordlist {
type Type = Vec<Data>;
fn inner(&self) -> &Self::Type {
&self.items
}
}
impl Display for Wordlist {
#[inline]
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
write!(f, "{}", self.display_top(3))
}
}
impl Corpus for Wordlist {
#[inline]
fn add(&mut self, value: Data) {
if let Some(ref mut unique_items) = self.unique_items {
if unique_items.contains(&value) {
return;
}
unique_items.insert(value.clone());
}
self.items.push(value);
}
fn get(&self, index: usize) -> Option<&Data> {
self.items.get(index)
}
#[inline]
fn items(&self) -> &[Data] {
&self.items
}
}
impl Named for Wordlist {
#[inline]
fn name(&self) -> &str {
&self.corpus_name
}
}
impl Index<usize> for Wordlist {
type Output = Data;
fn index(&self, index: usize) -> &Self::Output {
&self.items()[index]
}
}
impl IndexMut<usize> for Wordlist {
fn index_mut(&mut self, index: usize) -> &mut Self::Output {
&mut self.items_mut()[index]
}
}
impl Len for Wordlist {
#[inline]
fn len(&self) -> usize {
self.items.len()
}
}
pub struct WordlistBuilder<ItemState, NameState, UniqueNess>
where
ItemState: CorpusBuildState,
NameState: CorpusBuildState,
UniqueNess: CorpusBuildState,
{
items: Vec<Data>,
unique_items: Option<HashSet<Data>>,
corpus_name: Option<String>,
_item_state: PhantomData<ItemState>,
_name_state: PhantomData<NameState>,
_unique_state: PhantomData<UniqueNess>,
}
impl<ItemState, UniqueNess> WordlistBuilder<ItemState, NoName, UniqueNess>
where
ItemState: CorpusBuildState,
UniqueNess: CorpusBuildState,
{
pub fn name(self, corpus_name: &str) -> WordlistBuilder<ItemState, HasName, UniqueNess> {
WordlistBuilder {
items: self.items,
unique_items: self.unique_items,
corpus_name: Some(corpus_name.to_string()),
_item_state: PhantomData,
_name_state: PhantomData,
_unique_state: PhantomData,
}
}
}
impl<ItemState, NameState> WordlistBuilder<ItemState, NameState, NotUnique>
where
ItemState: CorpusBuildState,
NameState: CorpusBuildState,
{
#[allow(clippy::missing_const_for_fn)]
pub fn unique(self) -> WordlistBuilder<ItemState, NameState, Unique> {
WordlistBuilder {
items: self.items,
unique_items: self.unique_items,
corpus_name: self.corpus_name,
_item_state: PhantomData,
_name_state: PhantomData,
_unique_state: PhantomData,
}
}
}
impl<ItemState, NameState, UniqueNess> WordlistBuilder<ItemState, NameState, UniqueNess>
where
ItemState: CorpusBuildState,
NameState: CorpusBuildState,
UniqueNess: CorpusBuildState,
{
pub fn word<T>(mut self, word: T) -> WordlistBuilder<HasItems, NameState, UniqueNess>
where
Data: From<T>,
{
self.items.push(word.into());
WordlistBuilder {
items: self.items,
unique_items: self.unique_items,
corpus_name: self.corpus_name,
_item_state: PhantomData,
_name_state: PhantomData,
_unique_state: PhantomData,
}
}
pub fn words<I, T>(mut self, words: I) -> WordlistBuilder<HasItems, NameState, UniqueNess>
where
Data: From<T>,
I: IntoIterator<Item = T>,
{
self.items.extend(words.into_iter().map(Data::from));
WordlistBuilder {
items: self.items,
unique_items: self.unique_items,
corpus_name: self.corpus_name,
_item_state: PhantomData,
_name_state: PhantomData,
_unique_state: PhantomData,
}
}
}
impl WordlistBuilder<HasItems, HasName, Unique> {
pub fn build(mut self) -> CorpusType {
self.items.sort_unstable();
self.items.dedup();
let mut unique_items = HashSet::with_capacity(self.items.len());
unique_items.extend(self.items.iter().cloned());
CorpusType::Wordlist(Wordlist {
items: self.items,
unique_items: Some(unique_items),
corpus_name: self.corpus_name.unwrap(),
})
}
}
impl WordlistBuilder<HasItems, HasName, NotUnique> {
pub fn build(self) -> CorpusType {
CorpusType::Wordlist(Wordlist {
items: self.items,
unique_items: None,
corpus_name: self.corpus_name.unwrap(),
})
}
}
impl WordlistBuilder<NoItems, HasName, Unique> {
pub fn build(self) -> CorpusType {
CorpusType::Wordlist(Wordlist {
items: Vec::new(),
unique_items: Some(HashSet::new()),
corpus_name: self.corpus_name.unwrap(),
})
}
}
impl WordlistBuilder<NoItems, HasName, NotUnique> {
pub fn build(self) -> CorpusType {
CorpusType::Wordlist(Wordlist {
items: Vec::new(),
unique_items: None,
corpus_name: self.corpus_name.unwrap(),
})
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_wordlist_builder_with_single_word() {
let wordlist = Wordlist::new()
.word("one")
.word("two")
.word("three")
.name("words")
.words(["four", "five", "six"])
.build();
assert_eq!(wordlist.len(), 6);
assert_eq!(
wordlist.items(),
&["one", "two", "three", "four", "five", "six"]
);
assert_eq!(wordlist.name(), "words");
}
#[test]
fn test_wordlist_builder_with_both_word_methods() {
let wordlist = Wordlist::new()
.words(["one", "two", "three"])
.word("four")
.name("words")
.build();
assert_eq!(wordlist.len(), 4);
assert_eq!(wordlist.items(), &["one", "two", "three", "four"]);
assert_eq!(wordlist.name(), "words");
}
#[test]
fn test_wordlist_builder_with_name_first() {
let wordlist = Wordlist::new()
.name("words")
.words(["one", "two", "three"])
.word("four")
.build();
assert_eq!(wordlist.len(), 4);
assert_eq!(wordlist.items(), &["one", "two", "three", "four"]);
assert_eq!(wordlist.name(), "words");
}
#[test]
fn test_wordlist_with_unique_items_first() {
let wordlist = Wordlist::new()
.words(["one", "two", "three", "one", "two", "three"])
.name("words")
.unique()
.build();
assert_eq!(wordlist.len(), 3);
for item in wordlist.items() {
assert!([Data::from("one"), Data::from("two"), Data::from("three")].contains(item));
}
assert_eq!(wordlist.name(), "words");
}
#[test]
fn test_wordlist_with_unique_items_second() {
let wordlist = Wordlist::new()
.words(["one", "two", "three", "one", "two", "three"])
.unique()
.name("words")
.build();
assert_eq!(wordlist.len(), 3);
for item in wordlist.items() {
assert!([Data::from("one"), Data::from("two"), Data::from("three")].contains(item));
}
assert_eq!(wordlist.name(), "words");
}
#[test]
fn test_wordlist_with_unique_items_last() {
let wordlist = Wordlist::new()
.words(["one", "two", "three", "one", "two", "three"])
.name("words")
.unique()
.build();
assert_eq!(wordlist.len(), 3);
for item in wordlist.items() {
assert!([Data::from("one"), Data::from("two"), Data::from("three")].contains(item));
}
assert_eq!(wordlist.name(), "words");
}
#[test]
fn test_unique_wordlist_remains_unique_after_using_corpus_add() {
let mut wordlist = Wordlist::new()
.words(["one", "two"])
.name("words")
.unique()
.build();
wordlist.add("one".into());
wordlist.add("two".into());
wordlist.add("three".into());
wordlist.add("three".into());
assert_eq!(wordlist.len(), 3);
for item in wordlist.items() {
assert!([Data::from("one"), Data::from("two"), Data::from("three"),].contains(item));
}
assert_eq!(wordlist.name(), "words");
}
}