use regex::Regex;
#[derive(Clone, Copy, Debug)]
#[allow(missing_docs)]
pub enum SyntacticalInstanceKind {
Questioned(&'static str, &'static SyntacticalInstance),
ZeroOrMore(&'static SyntacticalInstance),
OneOrMore(&'static SyntacticalInstance),
Either(&'static [SyntacticalInstance]),
Set(&'static [SyntacticalInstance]),
Not(&'static SyntacticalInstance),
Regular(&'static str),
Search(&'static str),
Regex(&'static Regex)
}
#[derive(Debug)]
enum Section {
Hashtagged(usize, usize),
Questioned(usize, usize),
Starred(usize, usize),
Regular(usize, usize),
Plussed(usize, usize),
Barred(usize, usize),
Anded(usize, usize),
Regex(usize, usize),
Not(usize, usize),
}
trait Leak<T: ?Sized> {
fn leak(self) -> &'static T;
}
trait LeakBox<T: ?Sized> {
fn leak_box(self) -> &'static T;
}
impl<T: ?Sized> LeakBox<T> for Box<T> {
fn leak_box(self) -> &'static T {
Box::leak(self)
}
}
impl Leak<str> for String {
fn leak(self) -> &'static str {
Box::leak(self.into_boxed_str())
}
}
impl<T: Sized> Leak<[T]> for Vec<T> {
fn leak(self) -> &'static [T] {
Box::leak(self.into_boxed_slice())
}
}
#[derive(Clone, Copy, Debug)]
pub struct SyntacticalInstance {
pub name: &'static str,
pub kind: SyntacticalInstanceKind,
pub error_on_fail: &'static str
}
#[derive(Clone, Copy, Debug)]
pub struct SPDLParser {
pub root: SyntacticalInstance,
pub vars: &'static [SyntacticalInstance]
}
#[derive(Clone, Copy, Debug)]
pub struct SyntaxTree {
pub branch: &'static str,
pub branch_value: &'static str,
pub branches: &'static [SyntaxTree]
}
#[derive(Clone, Copy, PartialEq)]
pub struct SPDLError {
pub value: &'static str,
pub line: usize,
pub column: usize
}
impl std::fmt::Debug for SPDLError {
fn fmt(&self, formatter: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(formatter, "On line {}, column {}: {}", self.line, self.column, self.value)
}
}
impl SPDLParser {
pub fn get_syntax_tree(&self, input: &'static str, trace: bool) -> Result<SyntaxTree, Vec<SPDLError>> {
let mut cpos = 0usize;
let mut line = 1usize;
let mut column = 0usize;
let mut branches = Vec::new();
let mut last_line_pos = 0usize;
let mut parsed = SyntaxTree {
branch: "Root",
branch_value: input,
branches: &[]
};
while cpos < input.len() - 1 {
let now_parsed = self.root.parse_on(input, &mut cpos, &self.root.name, &mut line, &mut last_line_pos, &mut column, self, trace);
if now_parsed.is_ok() {
branches.push(now_parsed.unwrap());
} else {
return now_parsed;
}
}
parsed.branches = branches.leak();
Ok(parsed)
}
}
impl SyntacticalInstance {
fn parse_on(&self, input: &'static str, cpos: &mut usize, branch: &'static str, line: &mut usize, last_lpos: &mut usize, column: &mut usize, top_level: &SPDLParser, trace: bool) -> Result<SyntaxTree, Vec<SPDLError>> {
let mut branches = Vec::new();
let mut errors: Vec<&'static str> = Vec::new();
let mut positioned_errors: Vec<SPDLError> = Vec::new();
let mut parsed: SyntaxTree = SyntaxTree {
branch: branch,
branch_value: "",
branches: &[],
};
if trace {
println!("At position {}: {}", cpos, self.name);
}
let origlen = errors.len();
let orig_cpos: usize = *cpos;
let orig_line: usize = *line;
let orig_column: usize = *column;
let mut branch_value = String::new();
match self.kind {
SyntacticalInstanceKind::ZeroOrMore(instance) => {
let mut parsing = instance.parse_on(input, cpos, &instance.name, line, last_lpos, column, top_level, trace);
while parsing.is_ok() {
branches.push(parsing.unwrap());
parsing = instance.parse_on(input, cpos, &instance.name, line, last_lpos, column, top_level, trace);
}
},
SyntacticalInstanceKind::OneOrMore(instance) => {
let mut parsing = instance.parse_on(input, cpos, &instance.name, line, last_lpos, column, top_level, trace);
if parsing.is_err() {
positioned_errors = parsing.unwrap_err();
} else {
while parsing.is_ok() {
branches.push(parsing.unwrap());
parsing = instance.parse_on(input, cpos, &instance.name, line, last_lpos, column, top_level, trace);
}
}
},
SyntacticalInstanceKind::Either(instances) => {
let mut hparsed = false;
let mut parse_errs: Option<Vec<SPDLError>> = None;
let mut min_pos = 0usize;
for instance in instances.iter() {
let parsing = instance.parse_on(input, cpos, &instance.name, line, last_lpos, column, top_level, trace);
if parsing.is_ok() {
branches.push(parsing.unwrap());
hparsed = true;
break;
} else {
let errs = parsing.unwrap_err();
let current = errs.iter().max_by_key(|err| *last_lpos + err.column).unwrap().column + *last_lpos;
if current >= min_pos {
min_pos = current;
parse_errs = Some(errs);
}
}
}
if !hparsed {
positioned_errors.extend(parse_errs.unwrap().into_iter());
}
},
SyntacticalInstanceKind::Set(instances) => {
for instance in instances.iter() {
let parsing = instance.parse_on(input, cpos, &instance.name, line, last_lpos, column, top_level, trace);
if parsing.is_ok() {
branches.push(parsing.unwrap());
} else {
positioned_errors = parsing.unwrap_err();
break;
}
}
},
SyntacticalInstanceKind::Regular(section) => {
if input[*cpos..].starts_with(section) {
branch_value += section;
parsed.branches = Box::new([SyntaxTree {
branch: "Raw Syntax",
branch_value: section,
branches: &[]
}]).leak_box();
*cpos += section.len();
} else {
errors.push(format!("Expected this: \"{}\"", section).leak());
}
},
SyntacticalInstanceKind::Questioned(top, section) => {
let res = section.parse_on(input, cpos, §ion.name, line, last_lpos, column, top_level, trace);
if res.is_ok() {
let finding = top_level.vars.iter().find(|x| x.name == top);
if finding.is_none() {
errors.push(format!("The variable {} does not exist in this file!", top).leak())
} else {
let branch = res.unwrap();
let res = SPDLParser {
root: finding.unwrap().clone(),
vars: &[]
}.get_syntax_tree(&branch.branch_value, trace);
if res.is_err() {
positioned_errors = res.unwrap_err();
} else {
parsed.branches = Box::new([branch, res.unwrap()]).leak_box();
}
}
} else {
positioned_errors = res.unwrap_err();
}
},
SyntacticalInstanceKind::Search(name) => {
let finding = top_level.vars.iter().find(|x| x.name == name);
if finding.is_none() {
panic!("Internal Parser Error: The syntactical instance \"{}\" doesn't exist in the parsing variable, but it was searched for!", name);
} else {
let syntax = finding.unwrap();
let res = syntax.parse_on(input, cpos, name, line, last_lpos, column, top_level, trace);
if res.is_ok() {
parsed.branches = Box::new([res.unwrap()]).leak_box();
} else {
positioned_errors = res.unwrap_err();
}
}
},
SyntacticalInstanceKind::Not(section) => {
let parsing = section.parse_on(input, cpos, §ion.name, line, last_lpos, column, top_level, trace);
if parsing.is_err() {
parsed.branches = Box::new([SyntaxTree {
branch: "Not Statement",
branch_value: format!("{:#?}", section).leak(),
branches: &[]
}]).leak_box();
} else {
errors.push(format!("Expected something thats isn't this syntactical instance: {}", section.name).leak());
}
},
SyntacticalInstanceKind::Regex(regex) => {
let find = regex.find_at(input, *cpos);
if find.is_none() {
errors.push(format!("Expected something that fits this regex: {:?}", regex).leak());
} else {
let finding = find.unwrap().as_str();
branch_value += finding;
parsed.branches = Box::new([SyntaxTree {
branch: "Regex Finding",
branch_value: finding.to_string().leak(),
branches: &[]
}]).leak_box();
*cpos += finding.len();
}
},
}
if parsed.branches.len() == 1 {
parsed = parsed.branches[0].clone();
parsed.branch = branch;
}
if errors.len() - origlen > 0 || positioned_errors.len() > 0 {
positioned_errors.extend(errors.into_iter().map(|x| {
SPDLError {
value: x,
column: *column,
line: *line,
}
}));
*cpos = orig_cpos;
*line = orig_line;
*column = orig_column;
Err(positioned_errors)
} else {
if parsed.branches.len() == 0 {
parsed.branches = branches.leak();
}
if parsed.branches.len() != 0 {
parsed.branch_value = parsed.branches.iter().map(|x| x.branch_value).collect::<String>().leak();
} else {
parsed.branch_value = branch_value.leak();
}
if *cpos < input.len() && input[orig_cpos..*cpos].contains("\n") {
*last_lpos = *cpos;
*column = 0;
*line += 1;
} else {
*column = *cpos - orig_cpos;
}
Ok(parsed)
}
}
}
fn parse_instance(instances: &[SyntacticalInstance], inp: &'static str, name: &'static str, error_on_fail: &'static str, instnames: &'static [&'static str], freeform: bool) -> Result<SyntacticalInstance, &'static str> {
let punctuation = ['*', '|', '+', '\\', '#', '/', '^', '?', '&'];
let mut regular: Vec<(usize, usize)> = Vec::new();
let mut hashtagged: Vec<usize> = Vec::new();
let mut questioned: Vec<usize> = Vec::new();
let mut starred: Vec<usize> = Vec::new();
let mut plussed: Vec<usize> = Vec::new();
let mut regexed: Vec<usize> = Vec::new();
let mut notted: Vec<usize> = Vec::new();
let mut barred: Vec<usize> = Vec::new();
let mut anded: Vec<usize> = Vec::new();
let valid_name = |name: &str| name.chars().all(char::is_alphanumeric);
let unescape = |val: &str| val.replace("\\*", "*").replace("\\/", "/").replace("\\#", "#").replace("\\+", "+")
.replace("\\?", "?").replace("\\|", "|").replace("\\^", "^").replace("\\&", "&").replace("\\b", "\\");
let mut current_start = 0usize;
let mut continuing = false;
let mut in_section;
let mut in_question = false;
let mut in_hashtag = false;
let mut in_plussed = false;
let mut in_starred = false;
let mut in_regged = false;
let mut in_barred = false;
let mut in_anded = false;
let mut in_not = false;
for (n, x) in inp.chars().enumerate() {
if continuing {
continuing = false;
continue;
}
if punctuation.contains(&x) {
in_section = in_hashtag || in_plussed || in_starred || in_regged || in_barred || in_not || in_question || in_anded;
if current_start < n && !in_section {
regular.push((current_start, n));
}
current_start = n + 1;
if x != '\\' {
let last = n;
match x {
'#' => {
if in_hashtag || !in_section {
hashtagged.push(last);
in_hashtag = !in_hashtag;
}
},
'?' => {
if in_question && !in_section {
in_question = !in_question;
questioned.push(last);
}
},
'&' => {
if in_anded || !in_section {
anded.push(last);
in_anded = !in_anded;
}
},
'+' => {
if in_plussed || !in_section {
plussed.push(last);
in_plussed = !in_plussed;
}
},
'/' => {
if in_regged || !in_section {
regexed.push(last);
in_regged = !in_regged;
}
},
'*' => {
if in_starred || !in_section {
starred.push(last);
in_starred = !in_starred;
}
},
'|' => {
if in_barred || !in_section {
barred.push(last);
in_barred = !in_barred;
}
},
'^' => {
if in_not || !in_section {
notted.push(last);
in_not = !in_not;
}
},
_ => {
unreachable!();
}
}
} else if n == inp.len() - 1 {
return Err("Invalid escape!");
} else {
continuing = true;
}
} else if n == inp.len() - 1 && current_start != n + 1 {
regular.push((current_start, n + 1));
}
}
let mut sections = Vec::new();
if starred.len() % 2 != 0 {
return Err("Unclosed zero-or-more statement!");
}
if plussed.len() % 2 != 0 {
return Err("Unclosed one-or-more statement!");
}
if barred.len() % 2 != 0 {
return Err("Unclosed either statement!");
}
if regexed.len() % 2 != 0 {
return Err("Unclosed regex!");
}
if anded.len() % 2 != 0 {
return Err("Unclosed search statement!");
}
if hashtagged.len() % 2 != 0 {
return Err("Unclosed variable inclusion!");
}
if questioned.len() % 2 != 0 {
return Err("Unclosed parse statement!");
}
if notted.len() % 2 != 0 {
return Err("Unclosed not statement!");
}
let mut set: Vec<SyntacticalInstance> = Vec::new();
if hashtagged.len() != 0 {
for n in (0..hashtagged.len() - 1).step_by(2) {
let start = hashtagged[n];
let end = hashtagged[n + 1];
sections.push(Section::Hashtagged(start + 1, end));
}
}
if questioned.len() != 0 {
for n in (0..questioned.len() - 1).step_by(2) {
let start = questioned[n];
let end = questioned[n + 1];
sections.push(Section::Questioned(start + 1, end));
}
}
if regexed.len() != 0 {
for n in (0..regexed.len() - 1).step_by(2) {
let start = regexed[n];
let end = regexed[n + 1];
sections.push(Section::Regex(start + 1, end));
}
}
if starred.len() != 0 {
for n in (0..starred.len() - 1).step_by(2) {
let start = starred[n];
let end = starred[n + 1];
sections.push(Section::Starred(start + 1, end));
}
}
if plussed.len() != 0 {
for n in (0..plussed.len() - 1).step_by(2) {
let start = plussed[n];
let end = plussed[n + 1];
sections.push(Section::Plussed(start + 1, end));
}
}
if barred.len() != 0 {
for n in (0..barred.len() - 1).step_by(2) {
let start = barred[n];
let end = barred[n + 1];
sections.push(Section::Barred(start + 1, end));
}
}
if anded.len() != 0 {
for n in (0..anded.len() - 1).step_by(2) {
let start = anded[n];
let end = anded[n + 1];
sections.push(Section::Anded(start + 1, end));
}
}
if notted.len() != 0 {
for n in (0..notted.len() - 1).step_by(2) {
let start = notted[n];
let end = notted[n + 1];
sections.push(Section::Not(start + 1, end));
}
}
if regular.len() != 0 {
for r in regular.iter() {
sections.push(Section::Regular(r.0, r.1));
}
}
sections.sort_by(|a, b| {
let measure = |x: &Section| -> usize {
match x {
Section::Hashtagged(x, _) | Section::Questioned(x, _) | Section::Regex(x, _) | Section::Anded(x, _) |
Section::Starred(x, _) | Section::Plussed(x, _) | Section::Barred(x, _) | Section::Regular(x, _) |
Section::Not(x, _) => {
*x
}
}
};
measure(a).cmp(&measure(b))
});
let optional_whitespace: SyntacticalInstance = SyntacticalInstance {
name: "Freeform Whitespace",
error_on_fail: "",
kind: SyntacticalInstanceKind::Regex(Box::new(Regex::new(r"\s*").unwrap()).leak_box())
};
for sect in sections.iter() {
match *sect {
Section::Starred(start, end) => {
let parsed = parse_instance(instances, &inp[start..end], name, "", instnames, freeform);
if parsed.is_err() {
return parsed;
} else {
let mut parsed = parsed.unwrap();
parsed.kind = SyntacticalInstanceKind::ZeroOrMore(Box::new(parsed.clone()).leak_box());
parsed.name = "Zero or More Statement";
parsed.error_on_fail = "";
set.push(parsed);
}
},
Section::Plussed(start, end) => {
let parsed = parse_instance(instances, &inp[start..end], name, "", instnames, freeform);
if parsed.is_err() {
return parsed;
} else {
let mut parsed = parsed.unwrap();
parsed.kind = SyntacticalInstanceKind::OneOrMore(Box::new(parsed.clone()).leak_box());
parsed.name = "One or More Statement";
parsed.error_on_fail = "";
set.push(parsed);
}
},
Section::Barred(start, end) => {
let section = &inp[start..end];
let split = section.split("$");
if split.clone().count() < 2 {
return Err("Invalid either statement! There must be a dollar sign splitting it.");
}
let mut parsed = Vec::new();
for (n, slice) in split.enumerate() {
parsed.push(parse_instance(instances, slice, format!("Argument #{} of Either Statement", n).leak(), "", instnames, freeform));
}
let err = parsed.iter().find(|x| x.is_err());
if err.is_some() {
return Err(err.as_ref().unwrap().as_ref().unwrap_err());
} else {
set.push(SyntacticalInstance {
name: "Either Statement",
error_on_fail: "",
kind: SyntacticalInstanceKind::Either(
parsed.into_iter().map(|x| x.unwrap()).collect::<Vec<SyntacticalInstance>>().leak()
)
});
}
},
Section::Regex(start, end) => {
let section = &(inp[start..end].replace("\\/", "/"));
let regex = Regex::new(§ion);
if regex.is_err() {
return Err(format!("Invalid regex due to error \"{}\": {}", regex.unwrap_err(), section).leak());
} else {
let regex = regex.unwrap();
set.push(SyntacticalInstance {
name: format!("Regex Statement: {:?}", regex).leak(),
error_on_fail: "",
kind: SyntacticalInstanceKind::Regex(Box::new(regex).leak_box())
});
}
},
Section::Hashtagged(start, end) => {
let section = &inp[start..end];
if !valid_name(§ion) {
return Err(format!("Invalid variable name in variable inclusion: \"{}\"", section).leak());
}
let finding = instances.iter().position(|x| x.name == section);
if finding.is_none() {
return Err(format!("No such valid variable has been declared yet in variable inclusion: {}", section).leak());
}
set.push(instances[finding.unwrap()].clone());
},
Section::Questioned(start, end) => {
let section = &inp[start..end];
let split = section.split(":").collect::<Vec<&str>>();
if split.len() != 2 {
return Err(format!("Invalid parse statement, expected a colon to split it: {}", section).leak());
}
let res = parse_instance(instances, split[1], "Not Statement Innards", "", instnames, freeform);
if res.is_err() {
return res;
}
set.push(SyntacticalInstance {
name: format!("Raw Syntax for Not Statement: {}", &inp[start..end]).leak(),
error_on_fail: "",
kind: SyntacticalInstanceKind::Questioned(split[0], Box::new(res.unwrap()).leak_box())
});
},
Section::Anded(start, end) => {
let section = &inp[start..end];
if !instnames.contains(§ion) {
return Err(format!("Invalid search statement: The variable \"{}\" is never declared!", section).leak());
}
set.push(SyntacticalInstance {
name: format!("Search Statement for \"{}\"", section).leak(),
error_on_fail: "",
kind: SyntacticalInstanceKind::Search(section)
});
},
Section::Regular(start, end) => {
let section = &inp[start..end];
if freeform && !section.chars().all(|x| x.is_whitespace()) {
set.push(optional_whitespace.clone());
}
set.push(SyntacticalInstance {
name: format!("Raw Syntax: {}", &inp[start..end]).leak(),
error_on_fail: "",
kind: SyntacticalInstanceKind::Regular(unescape(section).leak())
});
if freeform {
set.push(optional_whitespace.clone());
}
},
Section::Not(start, end) => {
let res = parse_instance(instances, &inp[start..end], "Not Statement Innards", "", instnames, freeform);
if res.is_err() {
return res;
}
set.push(SyntacticalInstance {
name: format!("Raw Syntax for Not Statement: {}", &inp[start..end]).leak(),
error_on_fail: "",
kind: SyntacticalInstanceKind::Not(Box::new(res.unwrap()).leak_box())
});
}
}
}
if set.len() == 1 {
set[0].name = name;
set[0].error_on_fail = error_on_fail;
Ok(set[0].clone())
} else {
Ok(SyntacticalInstance {
name: name,
error_on_fail: error_on_fail,
kind: SyntacticalInstanceKind::Set(set.leak())
})
}
}
pub fn process_spdl(spdl: &str) -> Result<SPDLParser, Vec<String>> {
let mut errors: Vec<String> = Vec::new();
let mut instances: Vec<SyntacticalInstance> = Vec::new();
let mut parsing: Option<SyntacticalInstance> = None;
let mut parsed = false;
let mut instnames: Vec<&'static str> = Vec::new();
for line in spdl.lines() {
let line = line.trim();
let split: Vec<&str> = line.split_whitespace().collect();
if split.len() == 0 {
continue;
}
match split[0] {
"seterror" | "parse" | "freeform" => {},
x => {
instnames.push(Box::leak(x.to_string().into_boxed_str()));
}
}
}
let instnames: &'static [&'static str] = Box::leak(instnames.into_boxed_slice());
let mut freeform = false;
for (n, line) in spdl.lines().enumerate() {
let line_trimmed = line.trim();
let mut line = String::new();
for character in line_trimmed.chars() {
if character.is_whitespace() {
line.push(' ');
} else {
line.push(character);
}
}
let line: &'static str = line.leak();
let mut line_err = |err: &str| errors.push(format!("Error on line {}: {}", n + 1, err));
let valid_name = |name: &str| name.chars().all(|x| x.is_ascii_alphabetic());
let split: Vec<&str> = line.split_whitespace().collect();
let mut inds: Vec<usize> = Vec::new();
let mut counter = 0usize;
for x in split.iter() {
counter += x.len() + 1;
inds.push(counter);
}
if split.len() == 0 || line.starts_with("#") {
continue;
}
match split[0] {
"seterror" => {
if split.len() < 2 {
line_err("Expected variable name after seterror keyword, got nothing!");
continue;
}
let name = split[1];
if !valid_name(name) {
line_err("Invalid variable name for seterror!");
continue;
}
let finding = instances.iter_mut().find(|x| x.name == name);
if finding.is_none() {
line_err(&format!("No such valid variable has been declared yet in seterror command: {}", name));
continue;
}
let mut finding: &mut SyntacticalInstance = finding.unwrap();
finding.error_on_fail = if split.len() > 2 {
line[inds[1]..].to_string().leak()
} else {
format!("Expected syntactical structure \"{}\"!", finding.name).leak()
};
},
"parse" => {
if split.len() < 2 {
line_err("Nothing specified after parse keyword!");
continue;
}
let res = instances.iter().find(|x| x.name == line[inds[0]..].to_string());
if res.is_none() {
line_err("Expected a variable after parse keyword!");
continue;
}
parsed = true;
parsing = Some(res.unwrap().clone());
},
"freeform" => {
if split.len() != 2 {
line_err("Expected a boolean and nothing else after the freeform keyword!");
}
match split[1] {
"true" => {
freeform = true;
},
"false" => {
freeform = false;
},
_ => line_err("Invalid boolean after freeform keyword!")
}
},
x => {
if split.len() < 3 {
line_err("Too short of a variable declaration!");
continue;
}
if !valid_name(x) {
line_err(&format!("Invalid variable name for declaration: {}", x));
continue;
}
if split[1] != "=" {
line_err("Expected \"=\" after variable name in declaration with spaces around it.");
continue;
}
let res = parse_instance(
&instances, &line[inds[1]..], x, "", instnames, freeform
);
if res.is_err() {
line_err(&res.unwrap_err());
continue;
}
let mut res = res.unwrap();
res.name = x;
instances.push(res);
}
}
}
if !parsed {
errors.push("Fatal Error: Nothing specified for parsing!".to_string());
}
if errors.len() > 0 {
Err(errors)
} else {
Ok(SPDLParser {
root: parsing.unwrap(),
vars: instances.leak()
})
}
}
#[cfg(test)]
mod tests {
#[test]
fn printing_language() {
let code = r#"
print "Hello, world!";
"#.repeat(300_000);
let spdl = r##"
freeform true
string = /"[^"]*"/
printStmt = print #string#;
seterror printStmt Invalid print statement!
parse printStmt
"##;
let parser = crate::process_spdl(spdl);
if parser.is_err() {
let errors = parser.unwrap_err();
for err in errors {
println!("{}", err);
}
panic!("Failed test!");
}
let parser = parser.unwrap();
let time = std::time::Instant::now();
parser.get_syntax_tree(Box::leak(Box::new(code.into_boxed_str())), false).unwrap();
println!("{:?}", time.elapsed());
}
}