mo/
lib.rs

1#![doc(html_root_url = "https://docs.rs/mo/0.1.0")]
2//! # Usage
3//!
4//! mo come in two flavors: binary or crate (library)
5//!
6//! For a basic/medium/advanced usage of the binary/library, please see the [Readme](https://github.com/yuulive/mo).
7//!
8//! ## Frequently Asked Questions? (FAQ)
9//!
10//! See the [Readme](https://github.com/yuulive/mo).
11//!
12//! ## I just want to see how this code works
13//!
14//! Please see first the Readme. Well if you really want to see the code, go ahead
15//!
16use aho_corasick::AhoCorasick;
17use indicatif::{ProgressBar, ProgressStyle};
18use pest::error::{Error, ErrorVariant, InputLocation};
19use pest_meta::ast::Rule as AstRule;
20use pest_meta::parser::{self, Rule};
21use pest_meta::{optimizer, validator};
22use pest_vm::Vm;
23use rayon::prelude::*;
24use std::collections::HashMap;
25use std::fs::File;
26use std::io::Write;
27use std::path::Path;
28
29pub mod config;
30mod generator;
31
32// Re-exports
33pub use pest;
34pub use pest_meta;
35
36use crate::config::*;
37use crate::generator::*;
38
39/// Compile a grammar string and creates a `HashMap` with rules found as keys and their components (AST) as entries
40///
41/// In this step, the grammar is validated with the pest reference grammar, and the built-in rules are replaced for
42/// their equivalents
43/// ```
44/// use mo::compile_grammar;
45///
46/// // Grammar string
47/// let mut grammar = r#"
48///         language = {"Rust" | "Python" | "Go" | "Java" | "PHP" | "Haskell"}
49///         one = {"1"}
50///         daysNumber = {one ~ " day" | !one ~ ASCII_NONZERO_DIGIT ~ " days"}
51///         sentence = {"I have been programming in " ~ language ~ " for " ~ daysNumber ~ "."}
52///     "#;
53///
54/// // Generate the ast
55/// let grammar_ast = compile_grammar(grammar.to_string());
56///
57/// println!("{:?}", grammar_ast);
58/// ```
59pub fn compile_grammar(grammar: String) -> Result<Grammar, Vec<HashMap<String, String>>> {
60    // Replace builtin pest rules for their equivalents
61    let grammar = replace_builtin_rules(&grammar).unwrap();
62
63    // Print grammar after replaces builtin rules
64    // println!("{}", grammar.clone());
65
66    let result = parser::parse(Rule::grammar_rules, &grammar).map_err(|error| {
67        error.renamed_rules(|rule| match *rule {
68            Rule::grammar_rule => "rule".to_owned(),
69            Rule::_push => "push".to_owned(),
70            Rule::assignment_operator => "`=`".to_owned(),
71            Rule::silent_modifier => "`_`".to_owned(),
72            Rule::atomic_modifier => "`@`".to_owned(),
73            Rule::compound_atomic_modifier => "`$`".to_owned(),
74            Rule::non_atomic_modifier => "`!`".to_owned(),
75            Rule::opening_brace => "`{`".to_owned(),
76            Rule::closing_brace => "`}`".to_owned(),
77            Rule::opening_paren => "`(`".to_owned(),
78            Rule::positive_predicate_operator => "`&`".to_owned(),
79            Rule::negative_predicate_operator => "`!`".to_owned(),
80            Rule::sequence_operator => "`&`".to_owned(),
81            Rule::choice_operator => "`|`".to_owned(),
82            Rule::optional_operator => "`?`".to_owned(),
83            Rule::repeat_operator => "`*`".to_owned(),
84            Rule::repeat_once_operator => "`+`".to_owned(),
85            Rule::comma => "`,`".to_owned(),
86            Rule::closing_paren => "`)`".to_owned(),
87            Rule::quote => "`\"`".to_owned(),
88            Rule::insensitive_string => "`^`".to_owned(),
89            Rule::range_operator => "`..`".to_owned(),
90            Rule::single_quote => "`'`".to_owned(),
91            other_rule => format!("{:?}", other_rule),
92        })
93    });
94
95    let pairs = match result {
96        Ok(pairs) => pairs,
97        Err(error) => {
98            // add_rules_to_select(vec![]);
99            return Err(vec![convert_error(error, &grammar)]);
100        }
101    };
102
103    if let Err(errors) = validator::validate_pairs(pairs.clone()) {
104        // add_rules_to_select(vec![]);
105        return Err(errors
106            .into_iter()
107            .map(|e| convert_error(e, &grammar))
108            .collect());
109    }
110
111    let ast = match parser::consume_rules(pairs) {
112        Ok(ast) => ast,
113        Err(errors) => {
114            // add_rules_to_select(vec![]);
115            return Err(errors
116                .into_iter()
117                .map(|e| convert_error(e, &grammar))
118                .collect());
119        }
120    };
121
122    let hashmap_ast_rules: HashMap<String, AstRule> = ast
123        .iter()
124        .map(|rule| (rule.name.to_string(), rule.clone()))
125        .collect();
126
127    Ok(Grammar {
128        rules: hashmap_ast_rules,
129    })
130}
131
132/// Generate a number of examples with the grammar,start rule and config provided
133///
134/// ```
135/// use mo::config::GeneratorConfig;
136/// use mo::parallel_generate_examples;
137///
138/// // Default configuration for the generator
139/// let mut config: GeneratorConfig = Default::default();
140///
141/// // Grammar string
142/// let mut grammar = r#"
143///         language = {"Rust" | "Python" | "Go" | "Java" | "PHP" | "Haskell"}
144///         one = {"1"}
145///         daysNumber = {one ~ " day" | !one ~ ASCII_NONZERO_DIGIT ~ " days"}
146///         sentence = {"I have been programming in " ~ language ~ " for " ~ daysNumber ~ "."}
147///     "#;
148///
149/// // Generate the examples
150/// let results = parallel_generate_examples(
151///             grammar.to_string(),        // The grammar
152///             5,                          // Quantity of examples
153///             "sentence".to_string(),    // Start rule
154///             &config,                    // Config of the generator
155///             false,                      // Print progress
156///             false,                      // Print in stdout, false return a vector with the examples
157///         );
158///
159/// println!("{:?}", results);
160/// ```
161///
162pub fn parallel_generate_examples(
163    grammar_string: String,
164    quantity: u32,
165    start: String,
166    config: &GeneratorConfig,
167    print_progress: bool,
168    print_stdout: bool,
169) -> Vec<Result<String, String>> {
170    let input_data = InputData::new(grammar_string);
171    let mut vec = vec![];
172
173    // This mode is for avoid printing the examples, nothing special
174    // Nobody wants to generate examples and then discard all of them... right?
175    if config.benchmark_mode {
176        // Print examples as they are generated
177        (1..quantity + 1).into_par_iter().for_each(|_| {
178            // This isn't optimized by the compiler... right?
179            let _r = generator::generate_example(input_data.clone(), start.clone(), config);
180        });
181    } else if print_stdout {
182        // Print examples as they are generated
183        (1..quantity + 1).into_par_iter().for_each(|i| {
184            let r = generator::generate_example(input_data.clone(), start.clone(), config);
185            if print_progress {
186                println!("Example #{} generated:\r\n{}", i, r.unwrap());
187            } else {
188                println!("{}", r.unwrap());
189            }
190        });
191    } else {
192        vec = Vec::with_capacity(quantity as usize);
193        vec.par_extend((1..quantity + 1).into_par_iter().map(|i| {
194            let r = generator::generate_example(input_data.clone(), start.clone(), config);
195            if print_progress {
196                println!("Example #{} generated", i);
197            }
198            r
199        }));
200    }
201
202    vec
203}
204
205// pub fn gen_random_html_and_save(examples: u32,)
206// where>
207//     S: AsRef<Path>,
208// Creación de ejemplos recibiendo una gramática, numero de ejemplos, regla de inicio, path de guardado, nombre de archivos en formato "example-{}.ext"
209/// Generate and save a number of examples with the grammar,start rule and config provided
210///
211/// ```ignore
212/// # // This doc_test is ignored because have side effects (the files)
213/// use std::path::Path;
214/// use mo::config::GeneratorConfig;
215/// use mo::parallel_generate_save_examples;
216///
217/// // Default configuration for the generator
218/// let mut config: GeneratorConfig = Default::default();
219///
220/// // Grammar string
221/// let mut grammar = r#"
222///         language = {"Rust" | "Python" | "Go" | "Java" | "PHP" | "Haskell"}
223///         one = {"1"}
224///         daysNumber = {one ~ " day" | !one ~ ASCII_NONZERO_DIGIT ~ " days"}
225///         sentence = {"I have been programming in " ~ language ~ " for " ~ daysNumber ~ "."}
226///     "#;
227///
228/// // Folder to save the examples
229/// let path = Path::new("./my-examples/");
230///
231/// let template_name = "relevant-example-{}.txt".to_string();
232///
233/// // Generate and save the examples
234/// let results = parallel_generate_save_examples(
235///             grammar.to_string(),       // The grammar
236///             5,                         // Quantity of examples
237///             "sentence".to_string(),   // Start rule
238///             path,                      // The folder to save the examples
239///             template_name,             // The name of the files saved
240///             &config,                   // Config of the generator
241///         );
242///
243/// ```
244pub fn parallel_generate_save_examples<S>(
245    grammar_string: String,
246    quantity: u32,
247    start: String,
248    path: S,
249    name_format: String,
250    config: &GeneratorConfig,
251) where
252    S: AsRef<Path>,
253{
254    // use env_logger;
255    // env_logger::init();
256
257    let path_cloned = path.as_ref();
258
259    // Creación de la barra de progreso
260    let progress_bar = ProgressBar::new(quantity.into());
261    progress_bar.set_style(
262        ProgressStyle::default_bar()
263            .template(
264                "[{elapsed_precise}] {wide_bar} {pos:>3}/{len:3} {msg} {percent}% {eta_precise}",
265            )
266            .progress_chars("██░"),
267    );
268
269    // Forzar el pintado inicial de la barra
270    progress_bar.tick();
271
272    let input_data = InputData::new(grammar_string);
273
274    (0..quantity).into_par_iter().for_each(|i| {
275        // Generar el ejemplo
276        let r = generator::generate_example(input_data.clone(), start.clone(), config);
277
278        match r {
279            Ok(example) => {
280                let new_path = path_cloned.join(name_format.replace("{}", &i.to_string()));
281                // println!("for {:?}", new_path);
282
283                // Guardar el archivo
284                let mut f = File::create(new_path).expect("Unable to create file");
285                f.write_all(example.as_bytes())
286                    .expect("Unable to write data");
287
288                // Modificando la barra de progreso
289                progress_bar.inc(1);
290                // println!(
291                //     "HTML {} completado y guardado, total esperado {}",
292                //     i, examples
293                // );
294            }
295            Err(error) => {
296                println!("{}", error);
297            }
298        }
299    });
300
301    // Terminando la barra de progreso
302    progress_bar.finish();
303}
304
305// Parsea `input` usando la gramática `grammar`, iniciando el parseo desde `rule`
306// retorna Ok si es exitoso el parseo, Err si no es posible parsear
307// Es usado en términos generales como shorcut en los tests para validar si una cadena generada, puede ser parseada por la misma gramatica que la genero
308/// Parse input with the provided grammar and start rule returns `Ok` if the parse is sucessfull, `Err` otherwise
309///
310/// It's used for validate the examples generated with the original grammar
311///
312/// ```
313/// use mo::config::GeneratorConfig;
314/// use mo::{compile_grammar, parse_input, parallel_generate_examples};
315///
316/// // Default configuration for the generator
317/// let mut config: GeneratorConfig = Default::default();
318///
319/// // Grammar string
320/// let mut grammar = r#"
321///         language = {"Rust" | "Python" | "Go" | "Java" | "PHP" | "Haskell"}
322///         one = {"1"}
323///         daysNumber = {one ~ " day" | !one ~ ASCII_NONZERO_DIGIT ~ " days"}
324///         sentence = {"I have been programming in " ~ language ~ " for " ~ daysNumber ~ "."}
325///     "#;
326///
327/// // Generate the ast
328/// let grammar_ast = compile_grammar(grammar.to_string());
329///
330/// // Generate the examples
331/// let results = parallel_generate_examples(
332///             grammar.to_string(),        // The grammar
333///             1,                          // Quantity of examples
334///             "sentences".to_string(),    // Start rule
335///             &config,                    // Config of the generator
336///             false,                      // Print progress
337///             false,                      // Print in stdout, false return a vector with the examples
338///         );
339///
340/// let one_example = results[0].as_ref().unwrap();
341///
342/// // Validate the generated example with the grammar
343/// let validate = parse_input(grammar_ast.unwrap(), "sentence".to_string(), one_example.to_string());
344///
345/// println!("{:?}", validate);
346/// ```
347///
348pub fn parse_input<'a>(grammar: Grammar, rule: String, input: String) -> Result<(), String> {
349    // Es necesario entregar una copia entera de las reglas al vm
350    let vm = Vm::new(optimizer::optimize(
351        grammar.rules.values().map(|r| r.clone()).collect(),
352    ));
353
354    parse_input_with_vm(vm, rule, input)
355}
356
357/// Parsea `input` usando la máquina `Vm`, iniciando el parseo desde `rule`
358/// retorna Ok si es exitoso el parseo, Err si no es posible parsear
359fn parse_input_with_vm<'a>(vm: Vm, rule: String, input: String) -> Result<(), String> {
360    match vm.parse(&rule, &input) {
361        Ok(_pairs) => {
362            // let lines: Vec<_> = pairs.map(|pair| format_pair(pair, 0, true)).collect();
363            // let lines = lines.join("\n");
364
365            // output.set_value(&format!("{}", lines));
366            Ok(())
367        }
368        Err(error) => {
369            // output.set_value(&format!("{}", error.renamed_rules(|r| r.to_string())))
370            // FIXME: Eliminar el string para usar un tipo de error más "Rustacean"
371            Err(format!("{}", error.renamed_rules(|r| r.to_string())))
372        }
373    }
374    // }
375}
376
377fn convert_error(error: Error<Rule>, grammar: &str) -> HashMap<String, String> {
378    let message = match error.variant {
379        ErrorVariant::CustomError { message } => message,
380        _ => unreachable!(),
381    };
382
383    match error.location {
384        InputLocation::Pos(pos) => {
385            let mut map = HashMap::new();
386
387            map.insert("from".to_owned(), line_col(pos, grammar));
388            map.insert("to".to_owned(), line_col(pos, grammar));
389            map.insert("message".to_owned(), format!("{}", message));
390
391            map
392        }
393        InputLocation::Span((start, end)) => {
394            let mut map = HashMap::new();
395
396            map.insert("from".to_owned(), line_col(start, grammar));
397            map.insert("to".to_owned(), line_col(end, grammar));
398            map.insert("message".to_owned(), format!("{}", message));
399
400            map
401        }
402    }
403}
404
405fn line_col(pos: usize, input: &str) -> String {
406    let (line, col) = {
407        let mut pos = pos;
408        // Position's pos is always a UTF-8 border.
409        let slice = &input[..pos];
410        let mut chars = slice.chars().peekable();
411
412        let mut line_col = (1, 1);
413
414        while pos != 0 {
415            match chars.next() {
416                Some('\r') => {
417                    if let Some(&'\n') = chars.peek() {
418                        chars.next();
419
420                        if pos == 1 {
421                            pos -= 1;
422                        } else {
423                            pos -= 2;
424                        }
425
426                        line_col = (line_col.0 + 1, 1);
427                    } else {
428                        pos -= 1;
429                        line_col = (line_col.0, line_col.1 + 1);
430                    }
431                }
432                Some('\n') => {
433                    pos -= 1;
434                    line_col = (line_col.0 + 1, 1);
435                }
436                Some(c) => {
437                    pos -= c.len_utf8();
438                    line_col = (line_col.0, line_col.1 + 1);
439                }
440                None => unreachable!(),
441            }
442        }
443
444        line_col
445    };
446
447    format!("({}, {})", line - 1, col - 1)
448}
449
450/// Replace builtin pest rules for their equivalents
451///
452/// For example in a grammar like this:
453/// ```text
454/// small_number = ASCII_DIGIT{3}
455/// ```
456///
457/// the replaced equivalent will be this:
458/// ASCII_DIGIT
459/// ```text
460/// small_number = ('0'..'9'){3}
461/// ```
462///
463/// **Note:** currently only the ASCII rules are replaced
464///
465/// For the list of equivalent rules see https://pest.rs/book/grammars/built-ins.html
466fn replace_builtin_rules(grammar: &String) -> Result<String, std::io::Error> {
467    //TODO: Add the Unicode rules from https://pest.rs/book/grammars/built-ins.html
468
469    let patterns = &[
470        "ANY",
471        "ASCII_DIGIT",
472        "ASCII_NONZERO_DIGIT",
473        "ASCII_BIN_DIGIT",
474        "ASCII_OCT_DIGIT",
475        "ASCII_HEX_DIGIT",
476        "ASCII_ALPHA_LOWER",
477        "ASCII_ALPHA_UPPER",
478        "ASCII_ALPHANUMERIC",
479        "NEWLINE",
480    ];
481
482    // Parentheses are kept to facilitate things like ASCII_ALPHA{1,5}
483    let replace_with = &[
484        "('\u{00}'..'\u{10FFFF}')",
485        "('0'..'9')",
486        "('1'..'9')",
487        "('0'..'1')",
488        "('0'..'7')",
489        "('0'..'9' | 'a'..'f' | 'A'..'F')",
490        "('a'..'z')",
491        "('A'..'Z')",
492        "('0'..'9' | 'a'..'z' | 'A'..'Z')",
493        r#"("\n" | "\r\n" | "\r")"#,
494    ];
495
496    // Replace all strings in a single pass
497    let mut wtr = vec![];
498    let ac = AhoCorasick::new(patterns);
499    ac.stream_replace_all(grammar.as_bytes(), &mut wtr, replace_with)?;
500
501    // println!("{:?}", wtr);
502    let mut s = match String::from_utf8(wtr) {
503        Ok(v) => v,
504        Err(e) => panic!("Invalid UTF-8 sequence: {}", e),
505    };
506
507    // ASCII_ALPHA it is replaced last because it has conflict with ASCII_ALPHA_LOWER y ASCII_ALPHA_UPPER
508    // because the word "ASCII_ALPHA" is shorter
509    s = s.replace("ASCII_ALPHA", "('a'..'z' | 'A'..'Z')");
510
511    // println!("result: {}", s);
512    Ok(s)
513}