use std::collections::HashMap;
use std::fmt::Display;
use std::sync::OnceLock;
use std::{borrow::Cow, sync::Arc};
use pest::iterators::Pair;
use pest::Parser;
use pest_derive::Parser;
use rand::distributions::{Alphanumeric, DistString};
use rand::rngs::StdRng;
use rand::SeedableRng;
use regex::Regex;
use crate::ast::Node;
use crate::compiler::rule_argument::Arg;
use crate::types::{LibError, RuleMap};
#[derive(Parser)]
#[grammar = "lib/compiler/rule_grammar.pest"]
struct RawRuleParser;
#[derive(Debug, Clone)]
pub(crate) struct LangRule {
#[allow(unused)]
path: String,
regex: Regex,
pub instructions: Vec<LangInstr>,
pub settings: LangSettings,
}
impl LangRule {
pub fn matches(&self, path: &str) -> bool {
self.regex.is_match(path)
}
}
#[derive(Debug, Eq, PartialEq, Clone)]
pub(crate) struct LangInstr {
pub op: String,
pub args: Vec<Arg>,
}
impl Display for LangInstr {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}", self.op)?;
if !self.args.is_empty() {
write!(f, " ")?;
}
let args = self.args.iter().map(|a| a.to_string()).collect::<Vec<_>>();
write!(f, "{}", args.join(" "))
}
}
#[derive(Debug, Copy, Clone, Default, PartialEq)]
pub struct LangSettings {
pub is_composable: bool,
pub is_paragraphable: bool,
pub is_unescaped_value: bool,
pub default_src: Option<&'static str>,
pub default_ref: Option<&'static str>,
}
impl LangSettings {
pub fn merge(&mut self, other: &LangSettings) {
self.is_composable = self.is_composable || other.is_composable;
self.is_paragraphable = self.is_paragraphable || other.is_paragraphable;
self.is_unescaped_value = self.is_unescaped_value || other.is_unescaped_value;
}
}
#[derive(Debug, Clone, Default)]
pub struct Context<'a> {
pub stacks: HashMap<Arc<str>, Vec<Cow<'a, str>>>,
pub index: usize,
pub parent_value: Option<Arc<str>>,
pub parent_headers: Option<Vec<Arc<str>>>,
pub rng: StringRng,
}
#[derive(Debug, Clone)]
pub struct StringRng(StdRng);
impl Default for StringRng {
fn default() -> Self {
Self(StdRng::seed_from_u64(1234))
}
}
impl StringRng {
pub fn sample_string(&mut self) -> String {
Alphanumeric.sample_string(&mut self.0, 6).to_lowercase()
}
}
#[allow(clippy::needless_lifetimes)]
impl<'a> Context<'a> {
pub fn set_parent(&mut self, node: &Node) {
self.parent_value = node.value.clone();
self.parent_headers = node.headers.clone();
}
pub fn set_index(&mut self, idx: usize) {
self.index = idx;
}
}
pub fn parse(input: &str) -> Result<(String, String, RuleMap), LibError> {
RawRuleParser::parse(Rule::Root, input)
.map_err(|e| LibError::from(Box::new(e)))
.and_then(parse_root)
}
fn parse_root<'a>(
mut pairs: impl Iterator<Item = Pair<'a, Rule>> + 'a,
) -> Result<(String, String, RuleMap), LibError> {
let mut name = String::new();
let mut media_type = String::new();
let mut compile_rules = Vec::new();
let mut preprocess_rules = Vec::new();
let pairs = pairs.next().unwrap().into_inner();
for pair in pairs {
if pair.as_rule() == Rule::Section {
let mut pairs = pair.into_inner();
match pairs.next().unwrap().as_str() {
"COMPILE" => compile_rules.extend(parse_recursive(pairs)?),
"PREPROCESS" => preprocess_rules.extend(parse_recursive(pairs)?),
section => return Err(LibError::unknown_rule_section(section)),
}
} else if pair.as_rule() == Rule::Preamble {
let mut pairs = pair.into_inner();
name = pairs.next().unwrap().as_str().to_string();
media_type = pairs.next().unwrap().as_str().to_string();
}
}
let rules = HashMap::from([("COMPILE", compile_rules), ("PREPROCESS", preprocess_rules)]);
Ok((name, media_type, rules))
}
fn parse_recursive<'a>(
pairs: impl Iterator<Item = Pair<'a, Rule>>,
) -> Result<Vec<LangRule>, LibError> {
let mut result = Vec::new();
static DEFAULT_SCHEMAS: OnceLock<Regex> = OnceLock::new();
let re = DEFAULT_SCHEMAS.get_or_init(|| Regex::new(r"(\w+)-BY-(\w+)").unwrap());
for pair in pairs {
if pair.as_rule() == Rule::Rule {
let mut pairs = pair.into_inner().peekable();
let path = pairs.next().unwrap().as_str().to_string();
let settings = match pairs.peek().unwrap().as_rule() {
Rule::Settings => {
let settings = pairs.next().unwrap().as_str();
let mut default_src = None;
let mut default_ref = None;
re.captures_iter(settings).for_each(|c| {
match (c.get(1).map(|k| k.as_str()), c.get(2).map(|v| v.as_str())) {
(Some("SRC"), Some("EXEC")) => default_src = Some("exec"),
(Some("SRC"), Some("COPY")) => default_src = Some("copy"),
(Some("REF"), Some("COPY")) => default_ref = Some("copy"),
(Some(key), Some(value)) => panic!("unknown default {key} {value}"),
(_, _) => panic!("unknown default"),
}
});
LangSettings {
is_composable: settings.contains("COMPOSABLE"),
is_paragraphable: settings.contains("PARAGRAPHABLE"),
is_unescaped_value: settings.contains("UNESCAPED_VALUE"),
default_src,
default_ref,
}
}
_ => LangSettings::default(),
};
let regex = Regex::new(
&path
.replace('[', r"\[ ?")
.replace(']', r" ?\]")
.replace("...", r"[^]]*"),
)?;
let mut instructions = Vec::new();
for mut pairs in pairs.map(Pair::into_inner) {
let op = pairs.next().unwrap().as_str().to_string();
let args = pairs.map(Arg::try_from).collect::<Result<_, _>>()?;
instructions.push(LangInstr { op, args });
}
result.push(LangRule { path, regex, instructions, settings });
}
}
Ok(result)
}
#[cfg(test)]
mod tests {
use indoc::indoc;
use pretty_assertions::assert_eq;
use Arg::*;
use super::*;
use crate::compiler::lang::Lang;
#[test]
fn test_parse_rule() {
let input = indoc! {
r#"
RULES FOR test PRODUCE text/plain
COMPILE RULES:
[rule...]
IS COMPOSABLE PARAGRAPHABLE REF-BY-COPY SRC-BY-EXEC
PUSH foo "bar"
WRITE "this=\"that\"\n"
"#
};
let lang = Lang::new(input).unwrap();
let compile_rules = lang.rules.get("COMPILE").unwrap();
assert_eq!(compile_rules.len(), 1);
let rule = &compile_rules[0];
let expected = LangRule {
path: "[rule...]".to_string(),
regex: Regex::new(r#"\[ ?rule[^]]* ?\]"#).unwrap(),
instructions: vec![
LangInstr {
op: "PUSH".into(),
args: vec![StackRef("foo".into()), Str("bar".into())],
},
LangInstr {
op: "WRITE".into(),
args: vec![Str(r#"this=\"that\"\n"#.into())],
},
],
settings: LangSettings {
is_composable: true,
is_paragraphable: true,
is_unescaped_value: false,
default_src: Some("exec"),
default_ref: Some("copy"),
},
};
assert_eq!(rule.path, expected.path);
assert_eq!(rule.instructions, expected.instructions);
assert_eq!(rule.regex.as_str(), expected.regex.as_str());
assert_eq!(rule.settings, expected.settings);
}
}