mo/lib.rs
1#![doc(html_root_url = "https://docs.rs/mo/0.1.0")]
2//! # Usage
3//!
4//! mo come in two flavors: binary or crate (library)
5//!
6//! For a basic/medium/advanced usage of the binary/library, please see the [Readme](https://github.com/yuulive/mo).
7//!
8//! ## Frequently Asked Questions? (FAQ)
9//!
10//! See the [Readme](https://github.com/yuulive/mo).
11//!
12//! ## I just want to see how this code works
13//!
14//! Please see first the Readme. Well if you really want to see the code, go ahead
15//!
16use aho_corasick::AhoCorasick;
17use indicatif::{ProgressBar, ProgressStyle};
18use pest::error::{Error, ErrorVariant, InputLocation};
19use pest_meta::ast::Rule as AstRule;
20use pest_meta::parser::{self, Rule};
21use pest_meta::{optimizer, validator};
22use pest_vm::Vm;
23use rayon::prelude::*;
24use std::collections::HashMap;
25use std::fs::File;
26use std::io::Write;
27use std::path::Path;
28
29pub mod config;
30mod generator;
31
32// Re-exports
33pub use pest;
34pub use pest_meta;
35
36use crate::config::*;
37use crate::generator::*;
38
39/// Compile a grammar string and creates a `HashMap` with rules found as keys and their components (AST) as entries
40///
41/// In this step, the grammar is validated with the pest reference grammar, and the built-in rules are replaced for
42/// their equivalents
43/// ```
44/// use mo::compile_grammar;
45///
46/// // Grammar string
47/// let mut grammar = r#"
48/// language = {"Rust" | "Python" | "Go" | "Java" | "PHP" | "Haskell"}
49/// one = {"1"}
50/// daysNumber = {one ~ " day" | !one ~ ASCII_NONZERO_DIGIT ~ " days"}
51/// sentence = {"I have been programming in " ~ language ~ " for " ~ daysNumber ~ "."}
52/// "#;
53///
54/// // Generate the ast
55/// let grammar_ast = compile_grammar(grammar.to_string());
56///
57/// println!("{:?}", grammar_ast);
58/// ```
59pub fn compile_grammar(grammar: String) -> Result<Grammar, Vec<HashMap<String, String>>> {
60 // Replace builtin pest rules for their equivalents
61 let grammar = replace_builtin_rules(&grammar).unwrap();
62
63 // Print grammar after replaces builtin rules
64 // println!("{}", grammar.clone());
65
66 let result = parser::parse(Rule::grammar_rules, &grammar).map_err(|error| {
67 error.renamed_rules(|rule| match *rule {
68 Rule::grammar_rule => "rule".to_owned(),
69 Rule::_push => "push".to_owned(),
70 Rule::assignment_operator => "`=`".to_owned(),
71 Rule::silent_modifier => "`_`".to_owned(),
72 Rule::atomic_modifier => "`@`".to_owned(),
73 Rule::compound_atomic_modifier => "`$`".to_owned(),
74 Rule::non_atomic_modifier => "`!`".to_owned(),
75 Rule::opening_brace => "`{`".to_owned(),
76 Rule::closing_brace => "`}`".to_owned(),
77 Rule::opening_paren => "`(`".to_owned(),
78 Rule::positive_predicate_operator => "`&`".to_owned(),
79 Rule::negative_predicate_operator => "`!`".to_owned(),
80 Rule::sequence_operator => "`&`".to_owned(),
81 Rule::choice_operator => "`|`".to_owned(),
82 Rule::optional_operator => "`?`".to_owned(),
83 Rule::repeat_operator => "`*`".to_owned(),
84 Rule::repeat_once_operator => "`+`".to_owned(),
85 Rule::comma => "`,`".to_owned(),
86 Rule::closing_paren => "`)`".to_owned(),
87 Rule::quote => "`\"`".to_owned(),
88 Rule::insensitive_string => "`^`".to_owned(),
89 Rule::range_operator => "`..`".to_owned(),
90 Rule::single_quote => "`'`".to_owned(),
91 other_rule => format!("{:?}", other_rule),
92 })
93 });
94
95 let pairs = match result {
96 Ok(pairs) => pairs,
97 Err(error) => {
98 // add_rules_to_select(vec![]);
99 return Err(vec![convert_error(error, &grammar)]);
100 }
101 };
102
103 if let Err(errors) = validator::validate_pairs(pairs.clone()) {
104 // add_rules_to_select(vec![]);
105 return Err(errors
106 .into_iter()
107 .map(|e| convert_error(e, &grammar))
108 .collect());
109 }
110
111 let ast = match parser::consume_rules(pairs) {
112 Ok(ast) => ast,
113 Err(errors) => {
114 // add_rules_to_select(vec![]);
115 return Err(errors
116 .into_iter()
117 .map(|e| convert_error(e, &grammar))
118 .collect());
119 }
120 };
121
122 let hashmap_ast_rules: HashMap<String, AstRule> = ast
123 .iter()
124 .map(|rule| (rule.name.to_string(), rule.clone()))
125 .collect();
126
127 Ok(Grammar {
128 rules: hashmap_ast_rules,
129 })
130}
131
132/// Generate a number of examples with the grammar,start rule and config provided
133///
134/// ```
135/// use mo::config::GeneratorConfig;
136/// use mo::parallel_generate_examples;
137///
138/// // Default configuration for the generator
139/// let mut config: GeneratorConfig = Default::default();
140///
141/// // Grammar string
142/// let mut grammar = r#"
143/// language = {"Rust" | "Python" | "Go" | "Java" | "PHP" | "Haskell"}
144/// one = {"1"}
145/// daysNumber = {one ~ " day" | !one ~ ASCII_NONZERO_DIGIT ~ " days"}
146/// sentence = {"I have been programming in " ~ language ~ " for " ~ daysNumber ~ "."}
147/// "#;
148///
149/// // Generate the examples
150/// let results = parallel_generate_examples(
151/// grammar.to_string(), // The grammar
152/// 5, // Quantity of examples
153/// "sentence".to_string(), // Start rule
154/// &config, // Config of the generator
155/// false, // Print progress
156/// false, // Print in stdout, false return a vector with the examples
157/// );
158///
159/// println!("{:?}", results);
160/// ```
161///
162pub fn parallel_generate_examples(
163 grammar_string: String,
164 quantity: u32,
165 start: String,
166 config: &GeneratorConfig,
167 print_progress: bool,
168 print_stdout: bool,
169) -> Vec<Result<String, String>> {
170 let input_data = InputData::new(grammar_string);
171 let mut vec = vec![];
172
173 // This mode is for avoid printing the examples, nothing special
174 // Nobody wants to generate examples and then discard all of them... right?
175 if config.benchmark_mode {
176 // Print examples as they are generated
177 (1..quantity + 1).into_par_iter().for_each(|_| {
178 // This isn't optimized by the compiler... right?
179 let _r = generator::generate_example(input_data.clone(), start.clone(), config);
180 });
181 } else if print_stdout {
182 // Print examples as they are generated
183 (1..quantity + 1).into_par_iter().for_each(|i| {
184 let r = generator::generate_example(input_data.clone(), start.clone(), config);
185 if print_progress {
186 println!("Example #{} generated:\r\n{}", i, r.unwrap());
187 } else {
188 println!("{}", r.unwrap());
189 }
190 });
191 } else {
192 vec = Vec::with_capacity(quantity as usize);
193 vec.par_extend((1..quantity + 1).into_par_iter().map(|i| {
194 let r = generator::generate_example(input_data.clone(), start.clone(), config);
195 if print_progress {
196 println!("Example #{} generated", i);
197 }
198 r
199 }));
200 }
201
202 vec
203}
204
205// pub fn gen_random_html_and_save(examples: u32,)
206// where>
207// S: AsRef<Path>,
208// Creación de ejemplos recibiendo una gramática, numero de ejemplos, regla de inicio, path de guardado, nombre de archivos en formato "example-{}.ext"
209/// Generate and save a number of examples with the grammar,start rule and config provided
210///
211/// ```ignore
212/// # // This doc_test is ignored because have side effects (the files)
213/// use std::path::Path;
214/// use mo::config::GeneratorConfig;
215/// use mo::parallel_generate_save_examples;
216///
217/// // Default configuration for the generator
218/// let mut config: GeneratorConfig = Default::default();
219///
220/// // Grammar string
221/// let mut grammar = r#"
222/// language = {"Rust" | "Python" | "Go" | "Java" | "PHP" | "Haskell"}
223/// one = {"1"}
224/// daysNumber = {one ~ " day" | !one ~ ASCII_NONZERO_DIGIT ~ " days"}
225/// sentence = {"I have been programming in " ~ language ~ " for " ~ daysNumber ~ "."}
226/// "#;
227///
228/// // Folder to save the examples
229/// let path = Path::new("./my-examples/");
230///
231/// let template_name = "relevant-example-{}.txt".to_string();
232///
233/// // Generate and save the examples
234/// let results = parallel_generate_save_examples(
235/// grammar.to_string(), // The grammar
236/// 5, // Quantity of examples
237/// "sentence".to_string(), // Start rule
238/// path, // The folder to save the examples
239/// template_name, // The name of the files saved
240/// &config, // Config of the generator
241/// );
242///
243/// ```
244pub fn parallel_generate_save_examples<S>(
245 grammar_string: String,
246 quantity: u32,
247 start: String,
248 path: S,
249 name_format: String,
250 config: &GeneratorConfig,
251) where
252 S: AsRef<Path>,
253{
254 // use env_logger;
255 // env_logger::init();
256
257 let path_cloned = path.as_ref();
258
259 // Creación de la barra de progreso
260 let progress_bar = ProgressBar::new(quantity.into());
261 progress_bar.set_style(
262 ProgressStyle::default_bar()
263 .template(
264 "[{elapsed_precise}] {wide_bar} {pos:>3}/{len:3} {msg} {percent}% {eta_precise}",
265 )
266 .progress_chars("██░"),
267 );
268
269 // Forzar el pintado inicial de la barra
270 progress_bar.tick();
271
272 let input_data = InputData::new(grammar_string);
273
274 (0..quantity).into_par_iter().for_each(|i| {
275 // Generar el ejemplo
276 let r = generator::generate_example(input_data.clone(), start.clone(), config);
277
278 match r {
279 Ok(example) => {
280 let new_path = path_cloned.join(name_format.replace("{}", &i.to_string()));
281 // println!("for {:?}", new_path);
282
283 // Guardar el archivo
284 let mut f = File::create(new_path).expect("Unable to create file");
285 f.write_all(example.as_bytes())
286 .expect("Unable to write data");
287
288 // Modificando la barra de progreso
289 progress_bar.inc(1);
290 // println!(
291 // "HTML {} completado y guardado, total esperado {}",
292 // i, examples
293 // );
294 }
295 Err(error) => {
296 println!("{}", error);
297 }
298 }
299 });
300
301 // Terminando la barra de progreso
302 progress_bar.finish();
303}
304
305// Parsea `input` usando la gramática `grammar`, iniciando el parseo desde `rule`
306// retorna Ok si es exitoso el parseo, Err si no es posible parsear
307// Es usado en términos generales como shorcut en los tests para validar si una cadena generada, puede ser parseada por la misma gramatica que la genero
308/// Parse input with the provided grammar and start rule returns `Ok` if the parse is sucessfull, `Err` otherwise
309///
310/// It's used for validate the examples generated with the original grammar
311///
312/// ```
313/// use mo::config::GeneratorConfig;
314/// use mo::{compile_grammar, parse_input, parallel_generate_examples};
315///
316/// // Default configuration for the generator
317/// let mut config: GeneratorConfig = Default::default();
318///
319/// // Grammar string
320/// let mut grammar = r#"
321/// language = {"Rust" | "Python" | "Go" | "Java" | "PHP" | "Haskell"}
322/// one = {"1"}
323/// daysNumber = {one ~ " day" | !one ~ ASCII_NONZERO_DIGIT ~ " days"}
324/// sentence = {"I have been programming in " ~ language ~ " for " ~ daysNumber ~ "."}
325/// "#;
326///
327/// // Generate the ast
328/// let grammar_ast = compile_grammar(grammar.to_string());
329///
330/// // Generate the examples
331/// let results = parallel_generate_examples(
332/// grammar.to_string(), // The grammar
333/// 1, // Quantity of examples
334/// "sentences".to_string(), // Start rule
335/// &config, // Config of the generator
336/// false, // Print progress
337/// false, // Print in stdout, false return a vector with the examples
338/// );
339///
340/// let one_example = results[0].as_ref().unwrap();
341///
342/// // Validate the generated example with the grammar
343/// let validate = parse_input(grammar_ast.unwrap(), "sentence".to_string(), one_example.to_string());
344///
345/// println!("{:?}", validate);
346/// ```
347///
348pub fn parse_input<'a>(grammar: Grammar, rule: String, input: String) -> Result<(), String> {
349 // Es necesario entregar una copia entera de las reglas al vm
350 let vm = Vm::new(optimizer::optimize(
351 grammar.rules.values().map(|r| r.clone()).collect(),
352 ));
353
354 parse_input_with_vm(vm, rule, input)
355}
356
357/// Parsea `input` usando la máquina `Vm`, iniciando el parseo desde `rule`
358/// retorna Ok si es exitoso el parseo, Err si no es posible parsear
359fn parse_input_with_vm<'a>(vm: Vm, rule: String, input: String) -> Result<(), String> {
360 match vm.parse(&rule, &input) {
361 Ok(_pairs) => {
362 // let lines: Vec<_> = pairs.map(|pair| format_pair(pair, 0, true)).collect();
363 // let lines = lines.join("\n");
364
365 // output.set_value(&format!("{}", lines));
366 Ok(())
367 }
368 Err(error) => {
369 // output.set_value(&format!("{}", error.renamed_rules(|r| r.to_string())))
370 // FIXME: Eliminar el string para usar un tipo de error más "Rustacean"
371 Err(format!("{}", error.renamed_rules(|r| r.to_string())))
372 }
373 }
374 // }
375}
376
377fn convert_error(error: Error<Rule>, grammar: &str) -> HashMap<String, String> {
378 let message = match error.variant {
379 ErrorVariant::CustomError { message } => message,
380 _ => unreachable!(),
381 };
382
383 match error.location {
384 InputLocation::Pos(pos) => {
385 let mut map = HashMap::new();
386
387 map.insert("from".to_owned(), line_col(pos, grammar));
388 map.insert("to".to_owned(), line_col(pos, grammar));
389 map.insert("message".to_owned(), format!("{}", message));
390
391 map
392 }
393 InputLocation::Span((start, end)) => {
394 let mut map = HashMap::new();
395
396 map.insert("from".to_owned(), line_col(start, grammar));
397 map.insert("to".to_owned(), line_col(end, grammar));
398 map.insert("message".to_owned(), format!("{}", message));
399
400 map
401 }
402 }
403}
404
405fn line_col(pos: usize, input: &str) -> String {
406 let (line, col) = {
407 let mut pos = pos;
408 // Position's pos is always a UTF-8 border.
409 let slice = &input[..pos];
410 let mut chars = slice.chars().peekable();
411
412 let mut line_col = (1, 1);
413
414 while pos != 0 {
415 match chars.next() {
416 Some('\r') => {
417 if let Some(&'\n') = chars.peek() {
418 chars.next();
419
420 if pos == 1 {
421 pos -= 1;
422 } else {
423 pos -= 2;
424 }
425
426 line_col = (line_col.0 + 1, 1);
427 } else {
428 pos -= 1;
429 line_col = (line_col.0, line_col.1 + 1);
430 }
431 }
432 Some('\n') => {
433 pos -= 1;
434 line_col = (line_col.0 + 1, 1);
435 }
436 Some(c) => {
437 pos -= c.len_utf8();
438 line_col = (line_col.0, line_col.1 + 1);
439 }
440 None => unreachable!(),
441 }
442 }
443
444 line_col
445 };
446
447 format!("({}, {})", line - 1, col - 1)
448}
449
450/// Replace builtin pest rules for their equivalents
451///
452/// For example in a grammar like this:
453/// ```text
454/// small_number = ASCII_DIGIT{3}
455/// ```
456///
457/// the replaced equivalent will be this:
458/// ASCII_DIGIT
459/// ```text
460/// small_number = ('0'..'9'){3}
461/// ```
462///
463/// **Note:** currently only the ASCII rules are replaced
464///
465/// For the list of equivalent rules see https://pest.rs/book/grammars/built-ins.html
466fn replace_builtin_rules(grammar: &String) -> Result<String, std::io::Error> {
467 //TODO: Add the Unicode rules from https://pest.rs/book/grammars/built-ins.html
468
469 let patterns = &[
470 "ANY",
471 "ASCII_DIGIT",
472 "ASCII_NONZERO_DIGIT",
473 "ASCII_BIN_DIGIT",
474 "ASCII_OCT_DIGIT",
475 "ASCII_HEX_DIGIT",
476 "ASCII_ALPHA_LOWER",
477 "ASCII_ALPHA_UPPER",
478 "ASCII_ALPHANUMERIC",
479 "NEWLINE",
480 ];
481
482 // Parentheses are kept to facilitate things like ASCII_ALPHA{1,5}
483 let replace_with = &[
484 "('\u{00}'..'\u{10FFFF}')",
485 "('0'..'9')",
486 "('1'..'9')",
487 "('0'..'1')",
488 "('0'..'7')",
489 "('0'..'9' | 'a'..'f' | 'A'..'F')",
490 "('a'..'z')",
491 "('A'..'Z')",
492 "('0'..'9' | 'a'..'z' | 'A'..'Z')",
493 r#"("\n" | "\r\n" | "\r")"#,
494 ];
495
496 // Replace all strings in a single pass
497 let mut wtr = vec![];
498 let ac = AhoCorasick::new(patterns);
499 ac.stream_replace_all(grammar.as_bytes(), &mut wtr, replace_with)?;
500
501 // println!("{:?}", wtr);
502 let mut s = match String::from_utf8(wtr) {
503 Ok(v) => v,
504 Err(e) => panic!("Invalid UTF-8 sequence: {}", e),
505 };
506
507 // ASCII_ALPHA it is replaced last because it has conflict with ASCII_ALPHA_LOWER y ASCII_ALPHA_UPPER
508 // because the word "ASCII_ALPHA" is shorter
509 s = s.replace("ASCII_ALPHA", "('a'..'z' | 'A'..'Z')");
510
511 // println!("result: {}", s);
512 Ok(s)
513}