#![doc(html_root_url = "https://docs.rs/bulk_examples_generator/0.1.0")]
use aho_corasick::AhoCorasick;
use indicatif::{ProgressBar, ProgressStyle};
use pest::error::{Error, ErrorVariant, InputLocation};
use pest_meta::ast::Rule as AstRule;
use pest_meta::parser::{self, Rule};
use pest_meta::{optimizer, validator};
use pest_vm::Vm;
use rayon::prelude::*;
use std::collections::HashMap;
use std::fs::File;
use std::io::Write;
use std::path::Path;
pub mod config;
mod generator;
pub use pest;
pub use pest_meta;
use crate::config::*;
use crate::generator::*;
pub fn compile_grammar(grammar: String) -> Result<Grammar, Vec<HashMap<String, String>>> {
let grammar = replace_builtin_rules(&grammar).unwrap();
let result = parser::parse(Rule::grammar_rules, &grammar).map_err(|error| {
error.renamed_rules(|rule| match *rule {
Rule::grammar_rule => "rule".to_owned(),
Rule::_push => "push".to_owned(),
Rule::assignment_operator => "`=`".to_owned(),
Rule::silent_modifier => "`_`".to_owned(),
Rule::atomic_modifier => "`@`".to_owned(),
Rule::compound_atomic_modifier => "`$`".to_owned(),
Rule::non_atomic_modifier => "`!`".to_owned(),
Rule::opening_brace => "`{`".to_owned(),
Rule::closing_brace => "`}`".to_owned(),
Rule::opening_paren => "`(`".to_owned(),
Rule::positive_predicate_operator => "`&`".to_owned(),
Rule::negative_predicate_operator => "`!`".to_owned(),
Rule::sequence_operator => "`&`".to_owned(),
Rule::choice_operator => "`|`".to_owned(),
Rule::optional_operator => "`?`".to_owned(),
Rule::repeat_operator => "`*`".to_owned(),
Rule::repeat_once_operator => "`+`".to_owned(),
Rule::comma => "`,`".to_owned(),
Rule::closing_paren => "`)`".to_owned(),
Rule::quote => "`\"`".to_owned(),
Rule::insensitive_string => "`^`".to_owned(),
Rule::range_operator => "`..`".to_owned(),
Rule::single_quote => "`'`".to_owned(),
other_rule => format!("{:?}", other_rule),
})
});
let pairs = match result {
Ok(pairs) => pairs,
Err(error) => {
return Err(vec![convert_error(error, &grammar)]);
}
};
if let Err(errors) = validator::validate_pairs(pairs.clone()) {
return Err(errors
.into_iter()
.map(|e| convert_error(e, &grammar))
.collect());
}
let ast = match parser::consume_rules(pairs) {
Ok(ast) => ast,
Err(errors) => {
return Err(errors
.into_iter()
.map(|e| convert_error(e, &grammar))
.collect());
}
};
let hashmap_ast_rules: HashMap<String, AstRule> = ast
.iter()
.map(|rule| (rule.name.to_string(), rule.clone()))
.collect();
Ok(Grammar {
rules: hashmap_ast_rules,
})
}
pub fn parallel_generate_examples(
grammar_string: String,
quantity: u32,
start: String,
config: &GeneratorConfig,
print_progress: bool,
print_stdout: bool,
) -> Vec<Result<String, String>> {
let input_data = InputData::new(grammar_string);
let mut vec = vec![];
if config.benchmark_mode {
(1..quantity + 1).into_par_iter().for_each(|_| {
let _r = generator::generate_example(input_data.clone(), start.clone(), config);
});
} else if print_stdout {
(1..quantity + 1).into_par_iter().for_each(|i| {
let r = generator::generate_example(input_data.clone(), start.clone(), config);
if print_progress {
println!("Example #{} generated:\r\n{}", i, r.unwrap());
} else {
println!("{}", r.unwrap());
}
});
} else {
vec = Vec::with_capacity(quantity as usize);
vec.par_extend((1..quantity + 1).into_par_iter().map(|i| {
let r = generator::generate_example(input_data.clone(), start.clone(), config);
if print_progress {
println!("Example #{} generated", i);
}
r
}));
}
vec
}
pub fn parallel_generate_save_examples<S>(
grammar_string: String,
quantity: u32,
start: String,
path: S,
name_format: String,
config: &GeneratorConfig,
) where
S: AsRef<Path>,
{
let path_cloned = path.as_ref();
let progress_bar = ProgressBar::new(quantity.into());
progress_bar.set_style(
ProgressStyle::default_bar()
.template(
"[{elapsed_precise}] {wide_bar} {pos:>3}/{len:3} {msg} {percent}% {eta_precise}",
)
.progress_chars("██░"),
);
progress_bar.tick();
let input_data = InputData::new(grammar_string);
(0..quantity).into_par_iter().for_each(|i| {
let r = generator::generate_example(input_data.clone(), start.clone(), config);
match r {
Ok(example) => {
let new_path = path_cloned.join(name_format.replace("{}", &i.to_string()));
let mut f = File::create(new_path).expect("Unable to create file");
f.write_all(example.as_bytes())
.expect("Unable to write data");
progress_bar.inc(1);
}
Err(error) => {
println!("{}", error);
}
}
});
progress_bar.finish();
}
pub fn parse_input<'a>(grammar: Grammar, rule: String, input: String) -> Result<(), String> {
let vm = Vm::new(optimizer::optimize(
grammar.rules.values().map(|r| r.clone()).collect(),
));
parse_input_with_vm(vm, rule, input)
}
fn parse_input_with_vm<'a>(vm: Vm, rule: String, input: String) -> Result<(), String> {
match vm.parse(&rule, &input) {
Ok(_pairs) => {
Ok(())
}
Err(error) => {
Err(format!("{}", error.renamed_rules(|r| r.to_string())))
}
}
}
fn convert_error(error: Error<Rule>, grammar: &str) -> HashMap<String, String> {
let message = match error.variant {
ErrorVariant::CustomError { message } => message,
_ => unreachable!(),
};
match error.location {
InputLocation::Pos(pos) => {
let mut map = HashMap::new();
map.insert("from".to_owned(), line_col(pos, grammar));
map.insert("to".to_owned(), line_col(pos, grammar));
map.insert("message".to_owned(), format!("{}", message));
map
}
InputLocation::Span((start, end)) => {
let mut map = HashMap::new();
map.insert("from".to_owned(), line_col(start, grammar));
map.insert("to".to_owned(), line_col(end, grammar));
map.insert("message".to_owned(), format!("{}", message));
map
}
}
}
fn line_col(pos: usize, input: &str) -> String {
let (line, col) = {
let mut pos = pos;
let slice = &input[..pos];
let mut chars = slice.chars().peekable();
let mut line_col = (1, 1);
while pos != 0 {
match chars.next() {
Some('\r') => {
if let Some(&'\n') = chars.peek() {
chars.next();
if pos == 1 {
pos -= 1;
} else {
pos -= 2;
}
line_col = (line_col.0 + 1, 1);
} else {
pos -= 1;
line_col = (line_col.0, line_col.1 + 1);
}
}
Some('\n') => {
pos -= 1;
line_col = (line_col.0 + 1, 1);
}
Some(c) => {
pos -= c.len_utf8();
line_col = (line_col.0, line_col.1 + 1);
}
None => unreachable!(),
}
}
line_col
};
format!("({}, {})", line - 1, col - 1)
}
fn replace_builtin_rules(grammar: &String) -> Result<String, std::io::Error> {
let patterns = &[
"ANY",
"ASCII_DIGIT",
"ASCII_NONZERO_DIGIT",
"ASCII_BIN_DIGIT",
"ASCII_OCT_DIGIT",
"ASCII_HEX_DIGIT",
"ASCII_ALPHA_LOWER",
"ASCII_ALPHA_UPPER",
"ASCII_ALPHANUMERIC",
"NEWLINE",
];
let replace_with = &[
"('\u{00}'..'\u{10FFFF}')",
"('0'..'9')",
"('1'..'9')",
"('0'..'1')",
"('0'..'7')",
"('0'..'9' | 'a'..'f' | 'A'..'F')",
"('a'..'z')",
"('A'..'Z')",
"('0'..'9' | 'a'..'z' | 'A'..'Z')",
r#"("\n" | "\r\n" | "\r")"#,
];
let mut wtr = vec![];
let ac = AhoCorasick::new(patterns);
ac.stream_replace_all(grammar.as_bytes(), &mut wtr, replace_with)?;
let mut s = match String::from_utf8(wtr) {
Ok(v) => v,
Err(e) => panic!("Invalid UTF-8 sequence: {}", e),
};
s = s.replace("ASCII_ALPHA", "('a'..'z' | 'A'..'Z')");
Ok(s)
}