use crate::config::RegExpConfig;
use crate::regexp::RegExp;
use itertools::Itertools;
use std::io::ErrorKind;
use std::path::PathBuf;
pub(crate) const MISSING_TEST_CASES_MESSAGE: &str =
"No test cases have been provided for regular expression generation";
pub(crate) const MINIMUM_REPETITIONS_MESSAGE: &str =
"Quantity of minimum repetitions must be greater than zero";
pub(crate) const MINIMUM_SUBSTRING_LENGTH_MESSAGE: &str =
"Minimum substring length must be greater than zero";
#[derive(Clone)]
#[cfg_attr(feature = "python", pyo3::prelude::pyclass)]
pub struct RegExpBuilder {
pub(crate) test_cases: Vec<String>,
pub(crate) config: RegExpConfig,
}
impl RegExpBuilder {
pub fn from<T: Clone + Into<String>>(test_cases: &[T]) -> Self {
if test_cases.is_empty() {
panic!("{}", MISSING_TEST_CASES_MESSAGE);
}
Self {
test_cases: test_cases.iter().cloned().map(|it| it.into()).collect_vec(),
config: RegExpConfig::new(),
}
}
pub fn from_file<T: Into<PathBuf>>(file_path: T) -> Self {
match std::fs::read_to_string(file_path.into()) {
Ok(file_content) => Self {
test_cases: file_content.lines().map(|it| it.to_string()).collect_vec(),
config: RegExpConfig::new(),
},
Err(error) => match error.kind() {
ErrorKind::NotFound => panic!("The specified file could not be found"),
ErrorKind::InvalidData => {
panic!("The specified file's encoding is not valid UTF-8")
}
ErrorKind::PermissionDenied => {
panic!("Permission denied: The specified file could not be opened")
}
_ => panic!("{}", error),
},
}
}
pub fn with_conversion_of_digits(&mut self) -> &mut Self {
self.config.is_digit_converted = true;
self
}
pub fn with_conversion_of_non_digits(&mut self) -> &mut Self {
self.config.is_non_digit_converted = true;
self
}
pub fn with_conversion_of_whitespace(&mut self) -> &mut Self {
self.config.is_space_converted = true;
self
}
pub fn with_conversion_of_non_whitespace(&mut self) -> &mut Self {
self.config.is_non_space_converted = true;
self
}
pub fn with_conversion_of_words(&mut self) -> &mut Self {
self.config.is_word_converted = true;
self
}
pub fn with_conversion_of_non_words(&mut self) -> &mut Self {
self.config.is_non_word_converted = true;
self
}
pub fn with_conversion_of_repetitions(&mut self) -> &mut Self {
self.config.is_repetition_converted = true;
self
}
pub fn with_case_insensitive_matching(&mut self) -> &mut Self {
self.config.is_case_insensitive_matching = true;
self
}
pub fn with_capturing_groups(&mut self) -> &mut Self {
self.config.is_capturing_group_enabled = true;
self
}
pub fn with_minimum_repetitions(&mut self, quantity: u32) -> &mut Self {
if quantity == 0 {
panic!("{}", MINIMUM_REPETITIONS_MESSAGE);
}
self.config.minimum_repetitions = quantity;
self
}
pub fn with_minimum_substring_length(&mut self, length: u32) -> &mut Self {
if length == 0 {
panic!("{}", MINIMUM_SUBSTRING_LENGTH_MESSAGE);
}
self.config.minimum_substring_length = length;
self
}
pub fn with_escaping_of_non_ascii_chars(&mut self, use_surrogate_pairs: bool) -> &mut Self {
self.config.is_non_ascii_char_escaped = true;
self.config.is_astral_code_point_converted_to_surrogate = use_surrogate_pairs;
self
}
pub fn with_verbose_mode(&mut self) -> &mut Self {
self.config.is_verbose_mode_enabled = true;
self
}
pub fn without_start_anchor(&mut self) -> &mut Self {
self.config.is_start_anchor_disabled = true;
self
}
pub fn without_end_anchor(&mut self) -> &mut Self {
self.config.is_end_anchor_disabled = true;
self
}
pub fn without_anchors(&mut self) -> &mut Self {
self.config.is_start_anchor_disabled = true;
self.config.is_end_anchor_disabled = true;
self
}
#[cfg(feature = "cli")]
#[doc(hidden)]
pub fn with_syntax_highlighting(&mut self) -> &mut Self {
self.config.is_output_colorized = true;
self
}
pub fn build(&mut self) -> String {
RegExp::from(&mut self.test_cases, &self.config).to_string()
}
}