nameless_peg_parser/lib.rs
1#[allow(unused)]
2use crate::peg::transformer::Transformer;
3#[allow(unused)]
4use log::LevelFilter;
5#[allow(unused)]
6use std::fs::create_dir;
7#[allow(unused)]
8use std::fs::File;
9#[allow(unused)]
10use std::io::ErrorKind::AlreadyExists;
11#[allow(unused)]
12use std::io::Write;
13
14pub mod peg;
15
16// ```
17// PEG = (NT, T, R, S)
18// NT = Set of NonTerminals
19// T = Set of Terminals // may not be necessary to define in practice,
20// since it's just "all characters" (ASCII) unless we want to restrict it
21// R = Rules i.e. R(NT) -> e with e = parsing expression
22// S = Start Symbol i.e. S elem NT
23//
24// Parser Input: ASCII String
25// Parser Output: List of captured tokens. Terminals do not contain other tokens, Non Terminals are a list of tokens
26//
27// parse(rule, text) => (cost, cursor, Result<Token, ()>) // Should Error contain more information?
28// cursor = new position in input text
29// Token = NonTerminal(Capture, Tokens) | Terminal(Capture)
30// Capture = (start, end) | index into the input string or should this just be the input string? using slices this would
31// probably work without immense storage costs
32//
33// Parsing expression
34// if e, e1 and e2 is a parsing expression then the following are also parsing expressions
35// - e1e2 | sequence
36// - e1/e2 | prio choice
37// - !e | not predicate
38// - &e | and predicate
39// - (e) | group
40// - e* | zero or more
41// - e+ | one or more (same as ee*)
42// - 'x' with x elem T^* | literal
43// - "x" with x elem T^* | literal
44// - [xyz] with x,y,z elem T | class
45// - [x-z] with x,z elem T | range
46// - . | any
47// - empty | the empty string
48//
49// Parsing Rules
50// EMPTY:
51// parse(empty, x) => (1, x, Ok(empty))
52// ANY:
53// parse(any, xy) => (1, y, Ok(x)) if x elem T
54// parse(any, x) => (1, x, Err(())) if x is empty
55// RANGE:
56// parse(range(a, b), xy) => (1, y, Ok(x)) if x elem T and a <= x <= b (in ASCII)
57// parse(range(a, b), x) => (1, x, Err(())) if x is empty
58// CLASS:
59// parse(class(s), xy) => (1, y, Ok(x)) if s subset of T^* and x elem s
60// parse(class(s), x) => (1, x, Err(())) if s subset of T^* and x not elem s
61// LITERAL || define recursive?
62// parse(literal(x), xy) => (len(x) + 1, y, Ok(x)) with x elem T^*
63// parse(literal(x), y) => (len(nr of matches + 1), y, Err(())) with x elem T^* and x not prefix of y
64// ZERO OR MORE
65// parse(e*, xyz) => (n1+n2+1, z, Ok(xy)) if parse(e, xy) => (n1, y, Ok(x)) and parse(e*, yz) => (n2, z, Ok(y))
66// parse(e*, x) => (n1+1, x, Ok(empty)) if parse(e, x) => (n1, x, Err(()))
67// ONE OR MORE
68// parse(e+, x) => parse(ee*, x)
69// GROUP
70// parse((e), x) => parse(e, x)
71// AND
72// parse(&e, xy) => (n+1, x, Ok(empty)) if parse(e, xy) => (n, y, Ok(x))
73// parse(&e, xy) => (n+1, x, Err(())) if parse(e, xy) => (n, x, Err())
74// NOT
75// parse(!e, xy) => (n+1, x, Ok(empty)) if parse(e, xy) => (n, y, Err(x))
76// parse(!e, xy) => (n+1, x, Err(())) if parse(e, xy) => (n, x, Ok(x))
77// CHOICE
78// parse(e1/e1, xy) => (n+1, y, Ok(x)) if parse(e1, xy) => (n, y, Ok(x))
79// parse(e1/e1, xy) => (n1+n2+1, y, Ok(x)) if parse(e1, xy) => (n1, x, Err(())) and parse(e2, xy) => (n2, y, Ok(x))
80// parse(e1/e2, xy) => (n1+n2+1, x, Err(())) if neither e1 nor e2 parse xy
81// SEQUENCE
82// parse(e1e2, xyz) => (n1+n2+1, z, Ok(xy)) if parse(e1, xy) => (n1, y, Ok(x)) and parse(e2, yz) => (n2, z, Ok(y))
83// parse(e1e2, xyz) => (n1+1, x, Err(()) if e1 does not parse xyz
84// parse(e1e2, xyz) => (n1+n2+1, x, Err(()) if parse(e1, xy) => (n1, y, Ok(x)) and e2 does not parse xyz
85//
86// ```