mod peg {
#![allow(unused_imports, dead_code, clippy::all)]
use std::collections::HashMap;
use regex::Regex;
use unicode_xid::UnicodeXID;
#[derive(Clone, Debug)]
pub struct Node {
pub rule: Rule,
pub start: usize,
pub end: usize,
pub children: Vec<Node>,
pub alternative: Option<u16>
}
impl Node {
fn new(rule: Rule, start: usize, end: usize, alternative: Option<u16>) -> Self {
Self {
rule,
start,
end,
children: Vec::new(),
alternative,
}
}
pub fn print_to_string(&self, input: &str) -> String {
let children = if self.children.is_empty() {
String::new()
} else {
format!(", {}",
self
.children
.iter()
.map(|node| node.print_to_string(input))
.collect::<Vec<String>>()
.join(", "))
};
format!(
"({:?}, {}\"{}\"{})",
self.rule,
if let Some(alt) = self.alternative {
format!("alt={}, ", alt)
} else {
String::new()
},
escape_string(&input[self.start..self.end]),
children,
)
}
pub fn is_empty(&self) -> bool {
self.start == self.end
}
pub fn as_str<'s>(&self, input: &'s str) -> &'s str {
&input[self.start..self.end]
}
}
fn escape_string(str: &str) -> String {
let mut res = String::new();
for ch in str.chars() {
match ch {
'\\' => res.push_str("\\\\"),
'\t' => res.push_str("\\t"),
'\n' => res.push_str("\\n"),
'\r' => res.push_str("\\r"),
'"' => res.push_str("\\\""),
ch => res.push(ch),
}
}
res
}
#[derive(Clone, Copy, Hash, PartialEq, Eq)]
#[allow(non_camel_case_types)]
enum Terminal {
re,
Literal_9,
Regex,
Literal_7,
Literal_5,
Literal_0,
Literal_6,
Literal_1,
Literal_8,
Literal_2,
Literal,
Literal_4,
Regex_0,
Literal_3,
}
#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq)]
#[allow(non_camel_case_types)]
pub enum Rule {
Terminal,
grammar,
definition,
expression,
sequence,
alternative,
primary,
string_literal,
regex,
ws,
WHITESPACE,
EOI,
XID_IDENTIFIER,
}
#[allow(non_snake_case)]
pub struct PEG {
terminal_memo: HashMap<(usize, Terminal), Option<usize>>,
rule_memo: HashMap<(usize, Rule), Result<Node, usize>>,
regex_re: Regex,
regex_Regex: Regex,
regex_Regex_0: Regex,
}
impl PEG {
pub fn new() -> Self {
Self {
terminal_memo: HashMap::new(),
rule_memo: HashMap::new(),
regex_re: Regex::new(r########"^re#([^#\\]|\\.)*#"########).unwrap(),
regex_Regex: Regex::new(r########"^"([^"\\]|\\.)*""########).unwrap(),
regex_Regex_0: Regex::new(r########"^'([^'\\]|\\.)*'"########).unwrap(),
}
}
pub fn parse(&mut self, input: &str) -> Result<Node, (usize, usize)> {
self.rule_memo.clear();
self.terminal_memo.clear();
self.rule_grammar(0, input)
.map_err(|pos| {
let mut line_no = 0;
let mut col_no = pos;
for l in input
.char_indices()
.filter_map(|(index, c)| if c == '\n' { Some(index + 1) } else { None })
{
if pos < l {
break;
}
if pos == l {
col_no -= 1;
break;
}
col_no = pos - l;
line_no += 1;
}
(line_no, col_no)
})
}
#[allow(non_snake_case)]
fn rule_grammar(&mut self, pos: usize, input: &str) -> Result<Node, usize> {
let key = (pos, Rule::grammar);
if let Some(res) = self.rule_memo.get(&key) {
return res.clone();
}
let res = {
let mut list = Vec::new();
let start = pos;
self.rule_ws(pos, input)
.and_then(|node| {
let pos = node.end;
list.push(node);
{
let mut list = Vec::new();
let start = pos;
let mut pos = pos;
while let Ok(node) = self.rule_definition(pos, input) {
if pos == node.end {
break;
}
pos = node.end;
list.push(node);
}
if list.is_empty() {
Err(start)
} else {
Ok(Node {
rule: Rule::Terminal,
start,
end: pos,
children: list,
alternative: None,
})
}
}
})
.and_then(|node| {
let pos = node.end;
list.push(node);
self.builtin_eoi(pos, input, None)
})
.map(|node| {
let end = node.end;
list.push(node);
Node {
rule: Rule::grammar,
start,
end,
children: list,
alternative: None,
}
})
};
self.rule_memo.insert(key, res.clone());
res
}
#[allow(non_snake_case)]
fn rule_definition(&mut self, pos: usize, input: &str) -> Result<Node, usize> {
let key = (pos, Rule::definition);
if let Some(res) = self.rule_memo.get(&key) {
return res.clone();
}
let res = {
let mut list = Vec::new();
let start = pos;
self.builtin_xid_identifier(pos, input, None)
.and_then(|node| {
let pos = node.end;
list.push(node);
self.rule_ws(pos, input)
})
.and_then(|node| {
let pos = node.end;
list.push(node);
self.match_terminal(pos, input, Terminal::Literal)
.map(|end| Node::new(Rule::Terminal, pos, end, None))
.ok_or(pos)
})
.and_then(|node| {
let pos = node.end;
list.push(node);
self.rule_ws(pos, input)
})
.and_then(|node| {
let pos = node.end;
list.push(node);
self.rule_expression(pos, input)
})
.and_then(|node| {
let pos = node.end;
list.push(node);
self.match_terminal(pos, input, Terminal::Literal_0)
.map(|end| Node::new(Rule::Terminal, pos, end, None))
.ok_or(pos).or_else(|_| Ok(Node::new(Rule::Terminal, pos, pos, None)))
})
.and_then(|node| {
let pos = node.end;
list.push(node);
self.rule_ws(pos, input)
})
.map(|node| {
let end = node.end;
list.push(node);
Node {
rule: Rule::definition,
start,
end,
children: list,
alternative: None,
}
})
};
self.rule_memo.insert(key, res.clone());
res
}
#[allow(non_snake_case)]
fn rule_expression(&mut self, pos: usize, input: &str) -> Result<Node, usize> {
let key = (pos, Rule::expression);
if let Some(res) = self.rule_memo.get(&key) {
return res.clone();
}
let res = {
let mut list = Vec::new();
let start = pos;
self.rule_sequence(pos, input)
.and_then(|node| {
let pos = node.end;
list.push(node);
{
let mut list = Vec::new();
let start = pos;
let mut pos = pos;
while let Ok(node) = {
let mut list = Vec::new();
let start = pos;
self.match_terminal(pos, input, Terminal::Literal_1)
.map(|end| Node::new(Rule::Terminal, pos, end, None))
.ok_or(pos)
.and_then(|node| {
let pos = node.end;
list.push(node);
self.rule_ws(pos, input)
})
.and_then(|node| {
let pos = node.end;
list.push(node);
self.rule_sequence(pos, input)
})
.map(|node| {
let end = node.end;
list.push(node);
Node {
rule: Rule::Terminal,
start,
end,
children: list,
alternative: None,
}
})
} {
if pos == node.end {
break;
}
pos = node.end;
list.push(node);
}
Ok(Node {
rule: Rule::Terminal,
start,
end: pos,
children: list,
alternative: None,
})
}
})
.map(|node| {
let end = node.end;
list.push(node);
Node {
rule: Rule::expression,
start,
end,
children: list,
alternative: None,
}
})
};
self.rule_memo.insert(key, res.clone());
res
}
#[allow(non_snake_case)]
fn rule_sequence(&mut self, pos: usize, input: &str) -> Result<Node, usize> {
let key = (pos, Rule::sequence);
if let Some(res) = self.rule_memo.get(&key) {
return res.clone();
}
let res = {
let mut list = Vec::new();
let start = pos;
let mut pos = pos;
while let Ok(node) = self.rule_alternative(pos, input)
.map(|node| { Node {
rule: Rule::sequence,
start: node.start,
end: node.end,
children: vec![node],
alternative: None,
}
}) {
if pos == node.end {
break;
}
pos = node.end;
list.push(node);
}
if list.is_empty() {
Err(start)
} else {
Ok(Node {
rule: Rule::sequence,
start,
end: pos,
children: list,
alternative: None,
})
}
};
self.rule_memo.insert(key, res.clone());
res
}
#[allow(non_snake_case)]
fn rule_alternative(&mut self, pos: usize, input: &str) -> Result<Node, usize> {
let key = (pos, Rule::alternative);
if let Some(res) = self.rule_memo.get(&key) {
return res.clone();
}
self.rule_memo.insert(key, Err(pos));
let mut res = Err(pos);
let mut next_pos = pos;
loop {
let r = {
let mut list = Vec::new();
let start = pos;
match self.rule_memo.get(&(pos, Rule::alternative)) {
Some(e) => e.clone(),
None => Err(pos),
}
.and_then(|node| {
let pos = node.end;
list.push(node);
self.match_terminal(pos, input, Terminal::Literal_2)
.map(|end| Node::new(Rule::Terminal, pos, end, None))
.ok_or(pos)
})
.and_then(|node| {
let pos = node.end;
list.push(node);
self.rule_ws(pos, input)
})
.map(|node| {
let end = node.end;
list.push(node);
Node {
rule: Rule::alternative,
start,
end,
children: list,
alternative: Some(0),
}
})
}
.or_else(|_| {
let mut list = Vec::new();
let start = pos;
match self.rule_memo.get(&(pos, Rule::alternative)) {
Some(e) => e.clone(),
None => Err(pos),
}
.and_then(|node| {
let pos = node.end;
list.push(node);
self.match_terminal(pos, input, Terminal::Literal_3)
.map(|end| Node::new(Rule::Terminal, pos, end, None))
.ok_or(pos)
})
.and_then(|node| {
let pos = node.end;
list.push(node);
self.rule_ws(pos, input)
})
.map(|node| {
let end = node.end;
list.push(node);
Node {
rule: Rule::alternative,
start,
end,
children: list,
alternative: Some(1),
}
})
})
.or_else(|_| {
let mut list = Vec::new();
let start = pos;
match self.rule_memo.get(&(pos, Rule::alternative)) {
Some(e) => e.clone(),
None => Err(pos),
}
.and_then(|node| {
let pos = node.end;
list.push(node);
self.match_terminal(pos, input, Terminal::Literal_4)
.map(|end| Node::new(Rule::Terminal, pos, end, None))
.ok_or(pos)
})
.and_then(|node| {
let pos = node.end;
list.push(node);
self.rule_ws(pos, input)
})
.map(|node| {
let end = node.end;
list.push(node);
Node {
rule: Rule::alternative,
start,
end,
children: list,
alternative: Some(2),
}
})
})
.or_else(|_| {
let mut list = Vec::new();
let start = pos;
self.match_terminal(pos, input, Terminal::Literal_5)
.map(|end| Node::new(Rule::Terminal, pos, end, None))
.ok_or(pos)
.and_then(|node| {
let pos = node.end;
list.push(node);
self.rule_ws(pos, input)
})
.and_then(|node| {
let pos = node.end;
list.push(node);
self.rule_expression(pos, input)
})
.and_then(|node| {
let pos = node.end;
list.push(node);
self.match_terminal(pos, input, Terminal::Literal_6)
.map(|end| Node::new(Rule::Terminal, pos, end, None))
.ok_or(pos)
})
.and_then(|node| {
let pos = node.end;
list.push(node);
self.rule_ws(pos, input)
})
.map(|node| {
let end = node.end;
list.push(node);
Node {
rule: Rule::alternative,
start,
end,
children: list,
alternative: Some(3),
}
})
})
.or_else(|_| {
let mut list = Vec::new();
let start = pos;
self.rule_primary(pos, input)
.and_then(|node| {
let pos = node.end;
list.push(node);
self.rule_ws(pos, input)
})
.map(|node| {
let end = node.end;
list.push(node);
Node {
rule: Rule::alternative,
start,
end,
children: list,
alternative: Some(4),
}
})
});
if let Ok(new_node) = &r {
if new_node.end > next_pos {
next_pos = new_node.end;
res = r;
self.rule_memo.insert(key, res.clone());
continue;
}
}
break;
}
self.rule_memo.insert(key, res.clone());
res
}
#[allow(non_snake_case)]
fn rule_primary(&mut self, pos: usize, input: &str) -> Result<Node, usize> {
let key = (pos, Rule::primary);
if let Some(res) = self.rule_memo.get(&key) {
return res.clone();
}
let res = {
let mut list = Vec::new();
let start = pos;
self.match_terminal(pos, input, Terminal::Literal_7)
.map(|end| Node::new(Rule::Terminal, pos, end, None))
.ok_or(pos)
.and_then(|node| {
let pos = node.end;
list.push(node);
self.rule_ws(pos, input)
})
.and_then(|node| {
let pos = node.end;
list.push(node);
self.rule_primary(pos, input)
})
.map(|node| {
let end = node.end;
list.push(node);
Node {
rule: Rule::primary,
start,
end,
children: list,
alternative: Some(0),
}
})
}
.or_else(|_| {
let mut list = Vec::new();
let start = pos;
self.match_terminal(pos, input, Terminal::Literal_8)
.map(|end| Node::new(Rule::Terminal, pos, end, None))
.ok_or(pos)
.and_then(|node| {
let pos = node.end;
list.push(node);
self.rule_ws(pos, input)
})
.and_then(|node| {
let pos = node.end;
list.push(node);
self.rule_primary(pos, input)
})
.map(|node| {
let end = node.end;
list.push(node);
Node {
rule: Rule::primary,
start,
end,
children: list,
alternative: Some(1),
}
})
})
.or_else(|_| {
let mut list = Vec::new();
let start = pos;
self.rule_regex(pos, input)
.and_then(|node| {
let pos = node.end;
list.push(node);
self.rule_ws(pos, input)
})
.map(|node| {
let end = node.end;
list.push(node);
Node {
rule: Rule::primary,
start,
end,
children: list,
alternative: Some(2),
}
})
})
.or_else(|_| {
let mut list = Vec::new();
let start = pos;
self.builtin_xid_identifier(pos, input, None)
.and_then(|node| {
let pos = node.end;
list.push(node);
self.rule_ws(pos, input)
})
.map(|node| {
let end = node.end;
list.push(node);
Node {
rule: Rule::primary,
start,
end,
children: list,
alternative: Some(3),
}
})
})
.or_else(|_| {
let mut list = Vec::new();
let start = pos;
self.rule_string_literal(pos, input)
.and_then(|node| {
let pos = node.end;
list.push(node);
self.rule_ws(pos, input)
})
.map(|node| {
let end = node.end;
list.push(node);
Node {
rule: Rule::primary,
start,
end,
children: list,
alternative: Some(4),
}
})
})
.or_else(|_| {
let mut list = Vec::new();
let start = pos;
self.match_terminal(pos, input, Terminal::Literal_9)
.map(|end| Node::new(Rule::Terminal, pos, end, None))
.ok_or(pos)
.and_then(|node| {
let pos = node.end;
list.push(node);
self.rule_ws(pos, input)
})
.map(|node| {
let end = node.end;
list.push(node);
Node {
rule: Rule::primary,
start,
end,
children: list,
alternative: Some(5),
}
})
});
self.rule_memo.insert(key, res.clone());
res
}
#[allow(non_snake_case)]
fn rule_string_literal(&mut self, pos: usize, input: &str) -> Result<Node, usize> {
let key = (pos, Rule::string_literal);
if let Some(res) = self.rule_memo.get(&key) {
return res.clone();
}
let res = self.match_terminal(pos, input, Terminal::Regex)
.map(|end| Node::new(Rule::string_literal, pos, end, Some(0)))
.ok_or(pos)
.or_else(|_| self.match_terminal(pos, input, Terminal::Regex_0)
.map(|end| Node::new(Rule::string_literal, pos, end, Some(1)))
.ok_or(pos));
self.rule_memo.insert(key, res.clone());
res
}
#[allow(non_snake_case)]
fn rule_regex(&mut self, pos: usize, input: &str) -> Result<Node, usize> {
let key = (pos, Rule::regex);
if let Some(res) = self.rule_memo.get(&key) {
return res.clone();
}
let res = self.match_terminal(pos, input, Terminal::re)
.map(|end| Node::new(Rule::regex, pos, end, None))
.ok_or(pos);
self.rule_memo.insert(key, res.clone());
res
}
#[allow(non_snake_case)]
fn rule_ws(&mut self, pos: usize, input: &str) -> Result<Node, usize> {
let key = (pos, Rule::ws);
if let Some(res) = self.rule_memo.get(&key) {
return res.clone();
}
let res = self.builtin_whitespace(pos, input, None)
.map(|node| { Node {
rule: Rule::ws,
start: node.start,
end: node.end,
children: vec![node],
alternative: None,
}
});
self.rule_memo.insert(key, res.clone());
res
}
fn builtin_whitespace(&self, pos: usize, input: &str, alt: Option<u16>) -> Result<Node, usize> {
let mut chars = input[pos..].char_indices();
let mut next_pos;
loop {
if let Some((off, ch)) = chars.next() {
next_pos = pos + off;
if !ch.is_whitespace() {
break;
}
} else {
next_pos = input.len();
break;
}
}
Ok(Node::new(Rule::WHITESPACE, pos, next_pos, alt))
}
fn builtin_eoi(&self, pos: usize, input: &str, alt: Option<u16>) -> Result<Node, usize> {
if pos == input.len() {
Ok(Node::new(Rule::EOI, pos, pos, alt))
} else {
Err(pos)
}
}
fn builtin_xid_identifier(&mut self, pos: usize, input: &str, alt: Option<u16>) -> Result<Node, usize> {
let key = (pos, Rule::XID_IDENTIFIER);
if let Some(res) = self.rule_memo.get(&key) {{
let mut res = res.clone();
if let Ok(res) = &mut res {
res.alternative = alt;
}
return res;
}}
let mut chars = input[pos..].char_indices();
let mut end = pos;
let mut res = if let Some((_, ch)) = chars.next() {
if UnicodeXID::is_xid_start(ch) {
while {
if let Some((off, ch)) = chars.next() {
end = pos + off;
UnicodeXID::is_xid_continue(ch)
} else {
false
}
} {}
Ok(Node::new(Rule::XID_IDENTIFIER, pos, end, alt))
} else {
Err(pos)
}
} else {
Err(pos)
};
self.rule_memo.insert(key, res.clone());
if let Ok(res) = &mut res {
res.alternative = alt;
}
res
}
fn match_terminal(&mut self, pos: usize, input: &str, terminal: Terminal) -> Option<usize> {
let key = (pos, terminal);
if let Some(res) = self.terminal_memo.get(&key) {
return *res;
}
let res = if pos > input.len() {
None
} else {
match terminal {
Terminal::re => {
self.regex_re.find(&input[pos..]).map(|m| {
m.end() + pos
})
}
Terminal::Literal_9 => {
if input[pos..].starts_with('.') {
Some(pos + ".".len())
} else {
None
}
}
Terminal::Regex => {
self.regex_Regex.find(&input[pos..]).map(|m| {
m.end() + pos
})
}
Terminal::Literal_7 => {
if input[pos..].starts_with('&') {
Some(pos + "&".len())
} else {
None
}
}
Terminal::Literal_5 => {
if input[pos..].starts_with('(') {
Some(pos + "(".len())
} else {
None
}
}
Terminal::Literal_0 => {
if input[pos..].starts_with(';') {
Some(pos + ";".len())
} else {
None
}
}
Terminal::Literal_6 => {
if input[pos..].starts_with(')') {
Some(pos + ")".len())
} else {
None
}
}
Terminal::Literal_1 => {
if input[pos..].starts_with('/') {
Some(pos + "/".len())
} else {
None
}
}
Terminal::Literal_8 => {
if input[pos..].starts_with('!') {
Some(pos + "!".len())
} else {
None
}
}
Terminal::Literal_2 => {
if input[pos..].starts_with('?') {
Some(pos + "?".len())
} else {
None
}
}
Terminal::Literal => {
if input[pos..].starts_with("<-") {
Some(pos + "<-".len())
} else {
None
}
}
Terminal::Literal_4 => {
if input[pos..].starts_with('+') {
Some(pos + "+".len())
} else {
None
}
}
Terminal::Regex_0 => {
self.regex_Regex_0.find(&input[pos..]).map(|m| {
m.end() + pos
})
}
Terminal::Literal_3 => {
if input[pos..].starts_with('*') {
Some(pos + "*".len())
} else {
None
}
}
}
};
self.terminal_memo.insert(key, res);
res
}
}
}