pub mod error;
mod modifiers;
mod parser;
use std::{collections::HashMap, rc::Rc, str::FromStr};
pub use crate::error::Error;
use itertools::Itertools;
use ordered_float::OrderedFloat;
#[macro_use]
extern crate lazy_static;
pub type Result<T> = ::std::result::Result<T, Error>;
#[derive(Debug, Clone, PartialEq, Eq, Hash, Default)]
struct Expansion {
varrefs: Vec<String>,
text: String,
}
impl Expansion {
fn concat(self, expansion: Expansion) -> Self {
let mut varrefs = self.varrefs.clone();
varrefs.extend(expansion.varrefs);
let mut text = self.text;
text.push_str(&expansion.text);
Expansion { varrefs, text }
}
}
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
struct VarRef {
var: String,
modifier: Option<String>,
}
impl VarRef {
#[allow(dead_code)]
fn with_variable(var: &str) -> Self {
VarRef {
var: var.to_string(),
modifier: None,
}
}
#[allow(dead_code)]
fn with_variable_and_modifier(var: &str, modifier: &str) -> Self {
VarRef {
var: var.to_string(),
modifier: Some(modifier.to_string()),
}
}
}
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
enum Node {
Sequence(Vec<Node>),
Optional(Box<Node>),
Choice(Vec<Node>),
Text(String),
VarRef(VarRef),
NonTerminal(String),
}
impl Node {
fn expand(&self, grammar: &Grammar, data: &HashMap<String, String>) -> Result<Vec<Expansion>> {
match self {
Node::Text(text) => Ok(vec![Expansion {
varrefs: vec![],
text: text.clone(),
}]),
Node::VarRef(var) => match data.get(&var.var) {
Some(value) => {
let text = match &var.modifier {
Some(modifier) => match grammar.get_modifier(modifier) {
Some(modifier) => Ok(modifier(value)),
None => Err(Error::UnknownModifierError(modifier.to_string())),
},
None => Ok(value.clone()),
}?;
Ok(vec![Expansion {
varrefs: vec![var.var.clone()],
text,
}])
}
None => Ok(vec![]),
},
Node::NonTerminal(lhs) => match grammar.rules.get(lhs) {
Some(rhs) => rhs.expand(grammar, data),
None => Err(Error::UnknownNonTerminalError(lhs.clone())),
},
Node::Sequence(nodes) => {
let x: Vec<Vec<Expansion>> = nodes
.iter()
.map(|n| n.expand(grammar, data))
.collect::<Result<Vec<_>>>()?;
let y: Vec<Expansion> = x
.iter()
.multi_cartesian_product()
.map(|c| {
c.into_iter()
.fold(Expansion::default(), |a, b| a.concat(b.clone()))
})
.collect();
Ok(y)
}
Node::Optional(node) => {
let mut expansions = node.expand(grammar, data)?;
expansions.push(Expansion::default());
Ok(expansions)
}
Node::Choice(nodes) => {
let expansions: Vec<Expansion> = nodes
.iter()
.map(|n| n.expand(grammar, data))
.flat_map(|result| match result {
Ok(vec) => vec.into_iter().map(Ok).collect(),
Err(e) => vec![Err(e)],
})
.collect::<Result<Vec<_>>>()?;
Ok(expansions)
}
}
}
}
impl ToString for Node {
fn to_string(&self) -> String {
match self {
Node::Text(text) => text.to_string(),
Node::Sequence(children) => {
format!("[{}]", children.iter().map(|n| n.to_string()).join(""))
}
Node::VarRef(var) => match &var.modifier {
Some(modifier) => format!("#{}|{}#", var.var, modifier),
None => format!("#{}#", var.var),
},
Node::NonTerminal(id) => format!("<{}>", id),
Node::Optional(ref node) => format!("?:[{}]", node.to_string()),
Node::Choice(nodes) => {
format!("[{}]", nodes.iter().map(|n| n.to_string()).join("|"))
}
}
}
}
#[derive(Clone)]
pub struct Grammar {
rules: HashMap<String, Node>,
modifiers: HashMap<String, Rc<dyn Fn(&str) -> String>>,
default_weights: HashMap<String, f64>,
}
impl Grammar {
fn new() -> Grammar {
Grammar {
rules: HashMap::new(),
modifiers: HashMap::new(),
default_weights: HashMap::new(),
}
}
fn add_rule(&mut self, name: &str, node: Node) {
self.rules.insert(name.to_string(), node);
}
fn get_rule(&self, name: &str) -> Option<&Node> {
self.rules.get(name)
}
fn get_modifier(&self, modifier: &str) -> Option<&dyn Fn(&str) -> String> {
self.modifiers.get(modifier).map(|x| x.as_ref())
}
pub fn generate(&self, name: &str, data: &HashMap<String, String>) -> Result<Option<String>> {
self.generate_with_weights(name, data, &self.default_weights)
}
pub fn generate_all(&self, name: &str, data: &HashMap<String, String>) -> Result<Vec<String>> {
self.generate_all_with_weights(name, data, &self.default_weights)
}
pub fn generate_with_weights(
&self,
name: &str,
data: &HashMap<String, String>,
weights: &HashMap<String, f64>,
) -> Result<Option<String>> {
let node = self.get_rule(name).unwrap();
let mut expansions = node.expand(self, data)?;
expansions.sort_by_cached_key(|e| OrderedFloat(score_by_varref_weights(e, weights)));
Ok(expansions.last().map(|e| e.text.clone()))
}
pub fn generate_all_with_weights(
&self,
name: &str,
data: &HashMap<String, String>,
weights: &HashMap<String, f64>,
) -> Result<Vec<String>> {
let node = self
.get_rule(name)
.ok_or_else(|| Error::UnknownNonTerminalError(name.to_string()))?;
let mut expansions = node.expand(self, data)?;
expansions.sort_by_cached_key(|e| OrderedFloat(score_by_varref_weights(e, weights)));
Ok(expansions.into_iter().rev().map(|e| e.text).collect())
}
}
fn score_by_varref_weights(expansion: &Expansion, weights: &HashMap<String, f64>) -> f64 {
expansion
.varrefs
.iter()
.map(|varref| weights.get(varref).unwrap_or(&1.0))
.sum()
}
impl Default for Grammar {
fn default() -> Self {
let mut grammar = Grammar::new();
grammar.modifiers = modifiers::get_default_modifiers();
grammar
}
}
impl ToString for Grammar {
fn to_string(&self) -> String {
let mut s = String::new();
for (id, node) in &self.rules {
match node {
Node::Sequence(children) => {
s.push_str(&format!(
"{} = {}\n",
id,
children.iter().map(|n| n.to_string()).join("")
));
}
_ => {
s.push_str(&format!("{} = {}\n", id, node.to_string()));
}
}
}
s
}
}
impl FromStr for Grammar {
type Err = Error;
fn from_str(s: &str) -> Result<Self> {
let mut grammar = parser::parse_grammar(s)?;
grammar.modifiers = modifiers::get_default_modifiers();
Ok(grammar)
}
}
#[cfg(test)]
mod tests {
use std::collections::HashSet;
use super::*;
use maplit::hashmap;
fn grammar_and_data() -> (Grammar, HashMap<String, String>) {
let mut grammar = Grammar::default();
grammar.add_rule(
"location",
Node::VarRef(VarRef::with_variable_and_modifier("city", "capitalize")),
);
let data = hashmap! {
"name".to_string() => "John".to_string(),
"city".to_string() => "london".to_string(),
};
(grammar, data)
}
#[test]
fn test_expand_text() {
let (grammar, data) = grammar_and_data();
let node = Node::Text("hello".to_string());
let expansions = node.expand(&grammar, &data).unwrap();
assert_eq!(
expansions,
vec![Expansion {
varrefs: vec![],
text: "hello".to_string(),
}]
);
}
#[test]
fn test_expand_varref() {
let (grammar, data) = grammar_and_data();
let node = Node::VarRef(VarRef::with_variable("name"));
let expansions = node.expand(&grammar, &data).unwrap();
assert_eq!(
expansions,
vec![Expansion {
varrefs: vec!["name".to_string()],
text: "John".to_string(),
}]
);
}
#[test]
fn test_expand_nonterminal() {
let (grammar, data) = grammar_and_data();
let node = Node::NonTerminal("location".to_string());
let expansions = node.expand(&grammar, &data).unwrap();
assert_eq!(
expansions,
vec![Expansion {
varrefs: vec!["city".to_string()],
text: "London".to_string(),
}]
);
}
#[test]
fn test_expand_sequence() {
let (grammar, data) = grammar_and_data();
let c1 = Node::Text("in ".to_string());
let c2 = Node::NonTerminal("location".to_string());
let node = Node::Sequence(vec![c1, c2]);
let expansions = node.expand(&grammar, &data).unwrap();
assert_eq!(
expansions,
vec![Expansion {
varrefs: vec!["city".to_string()],
text: "in London".to_string(),
}]
);
}
#[test]
fn test_expand_optional() {
let (grammar, data) = grammar_and_data();
let hello = Node::Text("Hello ".to_string());
let dear = Node::Text("dear ".to_string());
let maybe_dear = Node::Optional(Box::new(dear));
let friend = Node::Text("friend".to_string());
let seq = Node::Sequence(vec![hello, maybe_dear, friend]);
let expansions = seq.expand(&grammar, &data).unwrap();
assert_eq!(
HashSet::<_>::from_iter(expansions),
HashSet::from_iter(vec![
Expansion {
varrefs: vec![],
text: "Hello friend".to_string(),
},
Expansion {
varrefs: vec![],
text: "Hello dear friend".to_string(),
}
])
);
}
#[test]
fn test_expand_choice() {
let (grammar, data) = grammar_and_data();
let snoopy = Node::Text("Snoopy".to_string());
let name = Node::VarRef(VarRef::with_variable("name"));
let linus = Node::Text("Linus".to_string());
let choice = Node::Choice(vec![snoopy, name, linus]);
let expansions = choice.expand(&grammar, &data).unwrap();
assert_eq!(
HashSet::<_>::from_iter(expansions),
HashSet::from_iter(vec![
Expansion {
varrefs: vec![],
text: "Snoopy".to_string(),
},
Expansion {
varrefs: vec!["name".to_string()],
text: "John".to_string(),
},
Expansion {
varrefs: vec![],
text: "Linus".to_string(),
},
])
);
}
#[test]
fn test_to_string() {
let mut grammar = Grammar::default();
grammar.add_rule(
"top",
Node::Sequence(vec![
Node::Text("hi ".to_string()),
Node::VarRef(VarRef::with_variable("name")),
Node::Text(" in ".to_string()),
Node::NonTerminal("location".to_string()),
]),
);
grammar.add_rule(
"location",
Node::Sequence(vec![
Node::Text("city of ".to_string()),
Node::VarRef(VarRef::with_variable("city")),
]),
);
assert_eq!(
HashSet::<_>::from_iter(grammar.to_string().split('\n').filter(|s| !s.is_empty())),
HashSet::from_iter(vec![
"top = hi #name# in <location>",
"location = city of #city#",
])
);
}
#[test]
fn test_generate() {
let grammar = Grammar::from_str(
r#"
top = Hi <name>?:[, my dear #gender#,] in <location>.
name = #name#
location = [city of #city#|#city# in #county# county]
"#,
)
.unwrap();
let data = hashmap! {
"name".to_string() => "John".to_string(),
"city".to_string() => "Janesville".to_string(),
"county".to_string() => "Rock".to_string(),
};
let r = grammar.generate("top", &data).unwrap().unwrap();
assert_eq!(r, "Hi John in Janesville in Rock county.");
let exps = HashSet::<_>::from_iter(grammar.generate_all("top", &data).unwrap());
assert_eq!(
exps,
HashSet::from_iter(vec![
"Hi John in Janesville in Rock county.".to_string(),
"Hi John in city of Janesville.".to_string(),
])
);
}
}