mod pgf_json;
pub use pgf_json::*;
use regex::Regex;
use serde::Serialize;
use std::collections::HashMap;
use std::sync::atomic::{AtomicBool, Ordering};
static DEBUG_ENABLED: AtomicBool = AtomicBool::new(false);
pub fn set_debug(enabled: bool) {
DEBUG_ENABLED.store(enabled, Ordering::Relaxed);
}
pub fn is_debug_enabled() -> bool {
DEBUG_ENABLED.load(Ordering::Relaxed)
}
macro_rules! debug_println {
($($arg:tt)*) => {
if crate::is_debug_enabled() {
println!("[DEBUG] {}", format!($($arg)*));
}
};
}
#[derive(Serialize, Debug, Clone)]
pub struct Type {
pub args: Vec<String>,
pub cat: String,
}
type HMS3 = HashMap<String, HashMap<String, String>>;
pub type FId = i32;
#[derive(Debug, Clone)]
pub struct CompletionAccumulator {
pub value: Option<Vec<ActiveItem>>,
}
impl Default for CompletionAccumulator {
fn default() -> Self {
Self::new()
}
}
impl CompletionAccumulator {
pub fn new() -> Self {
CompletionAccumulator { value: None }
}
}
#[derive(Debug, Clone)]
pub struct CompletionResult {
pub consumed: Vec<String>,
pub suggestions: Vec<String>,
}
pub struct GFGrammar {
pub abstract_grammar: GFAbstract,
pub concretes: HashMap<String, GFConcrete>,
}
impl GFGrammar {
pub fn new(
abstract_: GFAbstract,
concretes: HashMap<String, GFConcrete>,
) -> Self {
GFGrammar { abstract_grammar: abstract_, concretes }
}
pub fn from_json(json: PGF) -> Self {
debug_println!("Loading GFGrammar from JSON with start category: {}", json.abstract_.startcat);
debug_println!("Found {} concrete grammars: {:?}", json.concretes.len(), json.concretes.keys().collect::<Vec<_>>());
let cncs: HashMap<String, GFConcrete> = json
.concretes
.into_iter()
.map(|(key, concrete)| {
debug_println!("Loading concrete grammar: {}", key);
(key, GFConcrete::from_json(concrete))
})
.collect();
let abstract_grammar = GFAbstract::from_json(json.abstract_);
debug_println!("GFGrammar loaded successfully with {} concrete grammars", cncs.len());
GFGrammar {
abstract_grammar,
concretes: cncs,
}
}
pub fn translate(
&self,
input: &str,
from_lang: Option<&str>,
to_lang: Option<&str>,
) -> Vec<HMS3> {
debug_println!("Starting translation of input: '{}'", input);
debug_println!("From language: {:?}, To language: {:?}", from_lang, to_lang);
let mut outputs: Vec<HMS3> = Vec::new();
let from_cncs = if let Some(lang) = from_lang {
let mut map = HashMap::new();
if let Some(concrete) = self.concretes.get(lang) {
map.insert(lang.to_string(), concrete.clone());
}
map
} else {
self.concretes.clone()
};
let to_cncs = if let Some(lang) = to_lang {
let mut map = HashMap::new();
if let Some(concrete) = self.concretes.get(lang) {
map.insert(lang.to_string(), concrete.clone());
}
map
} else {
self.concretes.clone()
};
for (lang_code, concrete) in &from_cncs {
debug_println!("Attempting to parse with language: {}", lang_code);
let trees =
concrete.parse_string(input, &self.abstract_grammar.startcat);
debug_println!("Found {} parse tree(s) for language {}", trees.len(), lang_code);
if !trees.is_empty() {
let mut c1_outputs: HashMap<String, HashMap<String, String>> =
HashMap::new();
for tree in trees {
debug_println!("Processing parse tree: {}", tree.name);
let mut translations: HashMap<String, String> =
HashMap::new();
for (c2, to_concrete) in &to_cncs {
let linearized = to_concrete.linearize(&tree);
debug_println!("Linearized to {}: '{}'", c2, linearized);
translations
.insert(c2.clone(), linearized);
}
c1_outputs.insert(tree.name.clone(), translations);
}
outputs.push(c1_outputs);
}
}
debug_println!("Translation completed with {} result set(s)", outputs.len());
outputs
}
}
#[derive(Debug, Clone)]
pub struct GFAbstract {
pub startcat: String,
types: HashMap<String, Type>,
}
struct AbstractSyntaxParser {
tokens: Vec<String>,
position: usize,
}
impl AbstractSyntaxParser {
fn new(input: &str) -> Self {
let mut tokens = Vec::new();
let mut current_token = String::new();
for ch in input.chars() {
match ch {
'(' | ')' | '?' | ':' | ',' => {
if !current_token.is_empty() {
tokens.push(current_token.clone());
current_token.clear();
}
tokens.push(ch.to_string());
}
c if c.is_whitespace() => {
if !current_token.is_empty() {
tokens.push(current_token.clone());
current_token.clear();
}
}
c if c.is_alphanumeric() || c == '_' || c == '\'' || c == '.' || c == '"' => {
current_token.push(c);
}
_ => {
if !current_token.is_empty() {
tokens.push(current_token.clone());
current_token.clear();
}
tokens.push(ch.to_string());
}
}
}
if !current_token.is_empty() {
tokens.push(current_token);
}
Self {
tokens,
position: 0,
}
}
fn current_token(&self) -> Option<&String> {
self.tokens.get(self.position)
}
fn advance(&mut self) -> Option<String> {
if self.position < self.tokens.len() {
let token = self.tokens[self.position].clone();
self.position += 1;
Some(token)
} else {
None
}
}
fn parse_expression(&mut self) -> Result<Fun, &'static str> {
self.parse_tree_with_prec(0)
}
fn parse_tree_with_prec(&mut self, prec: usize) -> Result<Fun, &'static str> {
if self.tokens.is_empty() || self.position >= self.tokens.len() {
return Err("No more tokens");
}
let current = self.current_token().cloned();
if current.as_deref() == Some(")") {
return Err("Unexpected ')'");
}
let t = self.advance().ok_or("No token available")?;
if t == "(" {
let tree = self.parse_tree_with_prec(0)?;
if self.current_token().map(|s| s.as_str()) == Some(")") {
self.advance(); }
Ok(tree)
} else if t == "?" {
Ok(Fun::new("?".to_string(), vec![]))
} else {
let mut tree = Fun::new(t, vec![]);
if self.current_token().map(|s| s.as_str()) == Some("(") {
self.advance();
if self.current_token().map(|s| s.as_str()) != Some(")") {
loop {
let arg = self.parse_tree_with_prec(1)?;
tree.args.push(arg);
match self.current_token().map(|s| s.as_str()) {
Some(",") => {
self.advance(); continue;
}
Some(")") => break,
_ => return Err("Expected ',' or ')'"),
}
}
}
if self.current_token().map(|s| s.as_str()) == Some(")") {
self.advance(); }
} else if prec == 0 {
loop {
match self.parse_tree_with_prec(1) {
Ok(child) => tree.args.push(child),
Err(_) => break,
}
}
}
Ok(tree)
}
}
}
impl GFAbstract {
pub fn new(startcat: String, types: HashMap<String, Type>) -> Self {
GFAbstract { startcat, types }
}
pub fn from_json(json: Abstract) -> Self {
let types = json
.funs
.into_iter()
.map(|(key, fun)| (key, Type::new(fun.args, fun.cat)))
.collect();
GFAbstract { startcat: json.startcat, types }
}
pub fn add_type(&mut self, fun: String, args: Vec<String>, cat: String) {
self.types.insert(fun, Type::new(args, cat));
}
pub fn get_args(&self, fun: &str) -> Option<&Vec<String>> {
self.types.get(fun).map(|t| &t.args)
}
pub fn get_cat(&self, fun: &str) -> Option<&String> {
self.types.get(fun).map(|t| &t.cat)
}
fn annotate(&self, mut tree: Fun, r#type: Option<&String>) -> Fun {
if tree.is_meta() {
tree.type_ = r#type.cloned();
} else if let Some(typ) = self.types.get(&tree.name) {
for (arg, expected_type) in tree.args.iter_mut().zip(&typ.args) {
*arg = self.annotate(arg.clone(), Some(expected_type));
}
}
tree
}
pub fn handle_literals(&self, mut tree: Fun, r#type: &str) -> Fun {
if tree.name != "?" {
if r#type == "String" || r#type == "Int" || r#type == "Float" {
tree.name = format!("{}_Literal_{}", r#type, tree.name);
} else if let Some(typ) = self.types.get(&tree.name) {
for (arg, expected_type) in tree.args.iter_mut().zip(&typ.args)
{
*arg = self.handle_literals(arg.clone(), expected_type);
}
}
}
tree
}
#[allow(clippy::only_used_in_recursion)]
pub fn copy_tree(&self, x: &Fun) -> Fun {
let mut tree = Fun::new(x.name.clone(), vec![]);
tree.type_ = x.type_.clone();
for arg in &x.args {
tree.args.push(self.copy_tree(arg));
}
tree
}
pub fn parse_tree(
&self,
str: &str,
r#type: Option<&String>,
) -> Option<Fun> {
let mut parser = AbstractSyntaxParser::new(str);
match parser.parse_expression() {
Ok(tree) => Some(self.annotate(tree, r#type)),
Err(_) => None,
}
}
}
#[derive(Clone, Debug)]
pub struct GFConcrete {
pub flags: HashMap<String, String>,
functions: Vec<RuntimeCncFun>,
pub start_cats: HashMap<String, (i32, i32)>,
pub total_fids: i32,
pub pproductions: HashMap<i32, Vec<Production>>,
lproductions: HashMap<String, Vec<LProduction>>,
}
impl GFConcrete {
pub fn new(
flags: HashMap<String, String>,
functions: Vec<RuntimeCncFun>,
productions: HashMap<i32, Vec<Production>>,
start_cats: HashMap<String, (i32, i32)>,
total_fids: i32,
) -> Self {
let mut lproductions = HashMap::new();
#[allow(clippy::too_many_arguments)]
fn register_recursive(
args: &[PArg],
key: String,
i: usize,
lproductions: &mut HashMap<String, Vec<LProduction>>,
productions: &HashMap<i32, Vec<Production>>,
fun: &RuntimeCncFun,
fid: FId,
depth: usize,
) {
if depth > 100 {
return;
}
if i < args.len() {
let arg = args[i].fid;
let mut count = 0;
if let Some(rules) = productions.get(&arg) {
for rule in rules {
if let Production::Coerce(ref coerce_rule) = rule {
let new_key =
format!("{}_{}", key, coerce_rule.arg);
register_recursive(
args,
new_key,
i + 1,
lproductions,
productions,
fun,
fid,
depth + 1,
);
count += 1;
}
}
}
if count == 0 {
let new_key = format!("{key}_{arg}");
register_recursive(
args,
new_key,
i + 1,
lproductions,
productions,
fun,
fid,
depth + 1,
);
}
} else {
lproductions
.entry(key)
.or_default()
.push(LProduction { fun: fun.clone(), fid });
}
}
for (&fid, rules) in &productions {
for rule in rules {
if let Production::Apply(ref apply_rule) = rule {
match apply_rule.to_apply_fun() {
ApplyFun::FId(fun_id) => {
if (fun_id as usize) < functions.len() {
let fun = &functions[fun_id as usize];
register_recursive(
&apply_rule.args,
fun.name.clone(),
0,
&mut lproductions,
&productions,
fun,
fid,
0,
);
}
}
ApplyFun::CncFun(json_fun) => {
let runtime_fun = RuntimeCncFun::new(
json_fun.name.clone(),
LinType::FId(json_fun.lins.clone()),
);
register_recursive(
&apply_rule.args,
runtime_fun.name.clone(),
0,
&mut lproductions,
&productions,
&runtime_fun,
fid,
0,
);
}
}
}
}
}
GFConcrete {
flags,
pproductions: productions,
functions,
start_cats,
total_fids,
lproductions,
}
}
pub fn from_json(json: Concrete) -> Self {
let productions: HashMap<i32, Vec<Production>> = json.productions;
let sequences: Vec<Vec<Sym>> = json.sequences;
let functions: Vec<RuntimeCncFun> = json
.functions
.into_iter()
.map(|f| {
let lins = if f.lins.is_empty() {
LinType::Sym(vec![])
} else {
let symbol_sequences: Vec<Vec<Sym>> = f
.lins
.into_iter()
.map(|seq_idx| {
if seq_idx as usize >= sequences.len() {
vec![]
} else {
sequences[seq_idx as usize].clone()
}
})
.collect();
LinType::Sym(symbol_sequences)
};
RuntimeCncFun { name: f.name, lins }
})
.collect();
let start_cats = json
.categories
.into_iter()
.map(|(key, cat)| (key, (cat.start, cat.end)))
.collect();
GFConcrete::new(
json.flags,
functions,
productions,
start_cats,
json.totalfids,
)
}
pub fn linearize_syms(&self, tree: &Fun, tag: &str) -> Vec<LinearizedSym> {
let mut res = Vec::new();
if tree.is_string() {
let mut sym = SymKS::new(vec![tree.name.clone()]);
sym.tag = Some(tag.to_string());
res.push(LinearizedSym {
fid: -1,
table: vec![vec![Sym::SymKS(sym)]],
});
} else if tree.is_int() {
let mut sym = SymKS::new(vec![tree.name.clone()]);
sym.tag = Some(tag.to_string());
res.push(LinearizedSym {
fid: -2,
table: vec![vec![Sym::SymKS(sym)]],
});
} else if tree.is_float() {
let mut sym = SymKS::new(vec![tree.name.clone()]);
sym.tag = Some(tag.to_string());
res.push(LinearizedSym {
fid: -3,
table: vec![vec![Sym::SymKS(sym)]],
});
} else if tree.is_meta() {
let cat = self
.start_cats
.get(tree.type_.as_ref().unwrap_or(&String::new()))
.cloned()
.unwrap_or((0, 0));
let sym = Sym::SymKS(SymKS {
id: "KS".to_string(),
tokens: vec![tree.name.clone()],
tag: Some(tag.to_string()),
});
for fid in cat.0..=cat.1 {
res.push(LinearizedSym {
fid,
table: vec![vec![sym.clone()]],
});
}
} else {
let cs: Vec<LinearizedSym> = tree
.args
.iter()
.enumerate()
.filter_map(|(i, arg)| {
let syms = self.linearize_syms(arg, &format!("{tag}-{i}"));
syms.first().cloned()
})
.collect();
let mut key = tree.name.clone();
for c in &cs {
if c.fid == -5 {
if let Some((matched_key, _)) = self
.lproductions
.iter()
.find(|(k, _)| k.contains(&tree.name))
{
key = matched_key.clone();
}
break;
} else {
key = format!("{}_{}", key, c.fid);
}
}
if let Some(rules) = self.lproductions.get(&key) {
for rule in rules {
let mut row =
LinearizedSym { fid: rule.fid, table: Vec::new() };
match &rule.fun.lins {
LinType::Sym(lins) => {
for (j, lin) in lins.iter().enumerate() {
let mut toks: Vec<Sym> = Vec::new();
if j >= row.table.len() {
row.table.push(Vec::new());
}
for sym0 in lin {
match sym0 {
Sym::SymCat { i, .. }
| Sym::SymLit { i, .. } => {
if *i < cs.len()
&& j < cs[*i].table.len()
{
let ts = &cs[*i].table[j];
toks.extend_from_slice(ts);
}
}
Sym::SymKS(ks) => {
toks.push(Sym::SymKS(
ks.tag_with(tag),
));
}
Sym::SymKP(kp) => {
toks.push(Sym::SymKP(
kp.tag_with(tag),
));
}
}
}
row.table[j] = toks;
}
}
LinType::FId(_) => {
row.table.push(Vec::new());
}
}
res.push(row);
}
}
}
res
}
pub fn syms2toks(&self, syms: &[Sym]) -> Vec<TaggedString> {
let mut ts = Vec::new();
for i in 0..syms.len() {
match &syms[i] {
Sym::SymKS(sym) => {
if let Some(tag) = &sym.tag {
for token in &sym.tokens {
ts.push(TaggedString::new(token, tag));
}
}
}
Sym::SymKP(sym) => {
let mut added_alt = false;
if i + 1 < syms.len() {
if let Sym::SymKS(next_sym) = &syms[i + 1] {
if let Some(next_token) = next_sym.tokens.first() {
for alt in &sym.alts {
if alt
.prefixes
.iter()
.any(|p| next_token.starts_with(p))
{
for symks in &alt.tokens {
if let Some(tag) = &sym.tag {
for token in &symks.tokens {
ts.push(
TaggedString::new(
token, tag,
),
);
}
}
}
added_alt = true;
break;
}
}
}
}
}
if !added_alt {
if let Some(tag) = &sym.tag {
for symks in &sym.tokens {
for token in &symks.tokens {
ts.push(TaggedString::new(token, tag));
}
}
}
}
}
_ => {} }
}
ts
}
pub fn linearize_all(&self, tree: &Fun) -> Vec<String> {
self.linearize_syms(tree, "0")
.into_iter()
.map(|r| self.unlex(&self.syms2toks(&r.table[0])))
.collect()
}
pub fn linearize(&self, tree: &Fun) -> String {
debug_println!("Linearizing tree: {}", tree.name);
let res = self.linearize_syms(tree, "0");
debug_println!("Generated {} linearized symbol row(s)", res.len());
if !res.is_empty() {
let result = self.unlex(&self.syms2toks(&res[0].table[0]));
debug_println!("Linearization result: '{}'", result);
result
} else {
debug_println!("No linearization result generated");
String::new()
}
}
pub fn tag_and_linearize(&self, tree: &Fun) -> Vec<TaggedString> {
let res = self.linearize_syms(tree, "0");
if !res.is_empty() {
self.syms2toks(&res[0].table[0])
} else {
Vec::new()
}
}
fn unlex(&self, ts: &[TaggedString]) -> String {
if ts.is_empty() {
return String::new();
}
let no_space_after = Regex::new(r"^[\(\-\[]").unwrap();
let no_space_before = Regex::new(r"^[\.\,\?\!\)\:\;\-\]]").unwrap();
let mut s = String::new();
for i in 0..ts.len() {
let t = &ts[i].token;
s.push_str(t);
if i + 1 < ts.len() {
let after = &ts[i + 1].token;
if !no_space_after.is_match(t)
&& !no_space_before.is_match(after)
{
s.push(' ');
}
}
}
s
}
fn tokenize(&self, input: &str) -> Vec<String> {
input.split_whitespace().map(String::from).collect()
}
pub fn parse_string(&self, input: &str, cat: &str) -> Vec<Fun> {
debug_println!("Parsing string '{}' with start category '{}'", input, cat);
let tokens = self.tokenize(input);
debug_println!("Tokenized input into {} tokens: {:?}", tokens.len(), tokens);
let mut ps = ParseState::new(self.clone(), cat.to_string());
for (i, token) in tokens.iter().enumerate() {
debug_println!("Processing token {} of {}: '{}'", i + 1, tokens.len(), token);
if !ps.next(token) {
debug_println!("Parsing failed at token '{}'", token);
return Vec::new();
}
}
let trees = ps.extract_trees();
debug_println!("Extracted {} parse tree(s)", trees.len());
trees
}
pub fn complete(&self, input: &str, cat: &str) -> CompletionResult {
let mut tokens: Vec<String> = input
.split_whitespace()
.filter(|t| !t.is_empty())
.map(|t| t.to_string())
.collect();
let current = tokens.pop().unwrap_or_default();
let mut ps = ParseState::new(self.clone(), cat.to_string());
let mut ps2 = ParseState::new(self.clone(), cat.to_string());
for token in &tokens {
if !ps.next(token) {
return CompletionResult {
consumed: vec![],
suggestions: vec![],
};
}
ps2.next(token);
}
let mut current = current;
if ps2.next(¤t) {
ps.next(¤t);
tokens.push(current.clone());
current = String::new();
}
let acc = ps.complete(¤t);
let mut suggestions = Vec::new();
if let Some(items) = acc.value {
for a in items {
for s in &a.seq {
match s {
Sym::SymKS(sym) => {
for t in &sym.tokens {
suggestions.push(t.clone());
}
}
Sym::SymKP(sym) => {
for symks in &sym.tokens {
for t in &symks.tokens {
suggestions.push(t.clone());
}
}
}
_ => {}
}
}
}
}
CompletionResult { consumed: tokens, suggestions }
}
}
#[derive(Debug, Clone)]
pub struct Trie<T> {
value: Option<Vec<T>>,
items: HashMap<String, Trie<T>>,
}
impl<T: Clone> Default for Trie<T> {
fn default() -> Self {
Self::new()
}
}
impl<T: Clone> Trie<T> {
pub fn new() -> Self {
Trie { value: None, items: HashMap::new() }
}
pub fn insert_chain(&mut self, keys: &[String], obj: Vec<T>) {
let mut node = self;
for key in keys {
node = node.items.entry(key.clone()).or_default();
}
node.value = Some(obj);
}
pub fn insert_chain1(&mut self, keys: &[String], obj: T) {
let mut node = self;
for key in keys {
node = node.items.entry(key.clone()).or_default();
}
if let Some(value) = &mut node.value {
value.push(obj);
} else {
node.value = Some(vec![obj]);
}
}
pub fn lookup(&self, key: &str) -> Option<&Trie<T>> {
self.items.get(key)
}
pub fn is_empty(&self) -> bool {
self.value.is_none() && self.items.is_empty()
}
}
#[derive(Debug, Clone)]
pub struct Chart {
active: HashMap<FId, HashMap<i32, Vec<ActiveItem>>>,
actives: Vec<HashMap<FId, HashMap<i32, Vec<ActiveItem>>>>,
passive: HashMap<String, FId>,
pub forest: HashMap<FId, Vec<Production>>,
pub next_id: FId,
pub offset: usize,
}
impl Chart {
pub fn new(concrete: GFConcrete) -> Self {
let mut forest = HashMap::new();
for (&fid, prods) in &concrete.pproductions {
forest.insert(fid, prods.clone());
}
Chart {
active: HashMap::new(),
actives: vec![],
passive: HashMap::new(),
forest,
next_id: concrete.total_fids,
offset: 0,
}
}
pub fn lookup_ac(&self, fid: FId, label: i32) -> Option<&Vec<ActiveItem>> {
self.active.get(&fid).and_then(|m| m.get(&label))
}
pub fn lookup_aco(
&self,
offset: usize,
fid: FId,
label: i32,
) -> Option<&Vec<ActiveItem>> {
if offset == self.offset {
self.lookup_ac(fid, label)
} else {
self.actives.get(offset)?.get(&fid).and_then(|m| m.get(&label))
}
}
pub fn labels_ac(&self, fid: FId) -> Option<Vec<i32>> {
self.active.get(&fid).map(|m| m.keys().cloned().collect())
}
pub fn insert_ac(&mut self, fid: FId, label: i32, items: Vec<ActiveItem>) {
self.active.entry(fid).or_default().insert(label, items);
}
pub fn lookup_pc(
&self,
fid: FId,
label: i32,
offset: usize,
) -> Option<FId> {
let key = format!("{fid}.{label}-{offset}");
self.passive.get(&key).cloned()
}
pub fn insert_pc(
&mut self,
fid: FId,
label: i32,
offset: usize,
fid2: FId,
) {
let key = format!("{fid}.{label}-{offset}");
self.passive.insert(key, fid2);
}
pub fn shift(&mut self) {
self.actives.push(self.active.clone());
self.active.clear();
self.passive.clear();
self.offset += 1;
}
pub fn expand_forest(&self, fid: FId) -> Vec<Production> {
let mut rules = Vec::new();
fn go(
forest: &HashMap<FId, Vec<Production>>,
fid: FId,
rules: &mut Vec<Production>,
) {
if let Some(prods) = forest.get(&fid) {
for prod in prods {
match prod {
Production::Apply(apply) => {
rules.push(Production::Apply(apply.clone()))
}
Production::Coerce(coerce) => {
go(forest, coerce.arg, rules)
}
Production::Const(const_) => {
rules.push(Production::Const(const_.clone()))
}
}
}
}
}
go(&self.forest, fid, &mut rules);
rules
}
}
#[derive(Debug, Clone)]
pub struct ParseState {
concrete: GFConcrete,
start_cat: String,
items: Trie<ActiveItem>,
chart: Chart,
}
impl ParseState {
pub fn process<F, G>(
&mut self,
agenda: &mut Vec<ActiveItem>,
literal_callback: F,
mut token_callback: G,
) where
F: Fn(FId) -> Option<Const>,
G: FnMut(&[String], ActiveItem),
{
while let Some(item) = agenda.pop() {
if item.dot < item.seq.len() {
let sym = &item.seq[item.dot];
match sym {
Sym::SymCat { i, label } => {
let fid = item.args[*i].fid;
if let Some(items) = self.chart.lookup_ac(fid, *label as i32) {
if !items.contains(&item) {
let mut items = items.clone();
items.push(item.clone());
self.chart.insert_ac(fid, *label as i32, items);
if let Some(fid2) = self.chart.lookup_pc(fid, *label as i32, self.chart.offset) {
agenda.push(item.shift_over_arg(*i, fid2));
}
}
} else {
let rules = self.chart.expand_forest(fid);
for rule in rules {
if let Production::Apply(apply) = rule {
match apply.to_apply_fun() {
ApplyFun::CncFun(json_fun) => {
for &lin_idx in &json_fun.lins {
if (lin_idx as usize) < self.concrete.functions.len() {
let runtime_fun = self.concrete.functions[lin_idx as usize].clone();
match &runtime_fun.lins {
LinType::Sym(lins) => {
for (lbl, lin) in lins.iter().enumerate() {
if lbl == *label { let new_item = ActiveItem::new(
self.chart.offset,
0,
runtime_fun.clone(),
lin.clone(),
apply.args.clone(),
fid,
lbl as i32,
);
if !agenda.contains(&new_item) {
agenda.push(new_item);
}
}
}
}
LinType::FId(_) => {
let new_item = ActiveItem::new(
self.chart.offset,
0,
runtime_fun.clone(),
vec![],
apply.args.clone(),
fid,
0,
);
if !agenda.contains(&new_item) {
agenda.push(new_item);
}
}
}
}
}
}
ApplyFun::FId(fun_id) => {
if (fun_id as usize) < self.concrete.functions.len() {
let runtime_fun = self.concrete.functions[fun_id as usize].clone();
match &runtime_fun.lins {
LinType::Sym(lins) => {
for (lbl, lin) in lins.iter().enumerate() {
if lbl == *label { let new_item = ActiveItem::new(
self.chart.offset,
0,
runtime_fun.clone(),
lin.clone(),
apply.args.clone(),
fid,
lbl as i32,
);
if !agenda.contains(&new_item) {
agenda.push(new_item);
}
}
}
}
LinType::FId(_) => {
let new_item = ActiveItem::new(
self.chart.offset,
0,
runtime_fun.clone(),
vec![],
apply.args.clone(),
fid,
0,
);
if !agenda.contains(&new_item) {
agenda.push(new_item);
}
}
}
}
}
}
}
}
self.chart.insert_ac(fid, *label as i32, vec![item.clone()]);
}
}
Sym::SymKS(sym) => {
token_callback(&sym.tokens, item.shift_over_token())
}
Sym::SymKP(sym) => {
let pitem = item.shift_over_token();
for symks in &sym.tokens {
token_callback(&symks.tokens, pitem.clone());
}
for alt in &sym.alts {
for symks in &alt.tokens {
token_callback(&symks.tokens, pitem.clone());
}
}
}
Sym::SymLit { i, .. } => {
let fid = item.args[*i].fid;
if let Some(rules) = self.chart.forest.get(&fid) {
if let Some(Production::Const(const_rule)) =
rules.first()
{
token_callback(
&const_rule.toks,
item.shift_over_token(),
);
}
} else if let Some(rule) = literal_callback(fid) {
let new_fid = self.chart.next_id;
self.chart.next_id += 1;
self.chart.forest.insert(
new_fid,
vec![Production::Const(rule.clone())],
);
token_callback(
&rule.toks,
item.shift_over_arg(*i, new_fid),
);
}
}
}
} else {
let fid = self
.chart
.lookup_pc(item.fid, item.lbl, item.offset)
.unwrap_or_else(|| {
let new_fid = self.chart.next_id;
self.chart.next_id += 1;
self.chart.insert_pc(
item.fid,
item.lbl,
item.offset,
new_fid,
);
let apply = Apply {
fid: None,
fun: Some(CncFun::new(
item.fun.name.clone(),
vec![],
)),
args: item.args.clone(),
};
self.chart
.forest
.insert(new_fid, vec![Production::Apply(apply)]);
new_fid
});
if let Some(waiting_items) = self.chart.lookup_aco(item.offset, item.fid, item.lbl) {
for waiting_item in waiting_items.clone() {
if let Some(sym_cat) = waiting_item.seq.get(waiting_item.dot) {
if let Sym::SymCat { i, .. } = sym_cat {
agenda.push(waiting_item.shift_over_arg(*i, fid));
}
}
}
}
if let Some(labels) = self.chart.labels_ac(fid) {
for lbl in labels {
let seq = match &item.fun.lins {
LinType::Sym(syms) => {
if (lbl as usize) < syms.len() {
syms[lbl as usize].clone()
} else {
vec![]
}
}
LinType::FId(_) => vec![],
};
agenda.push(ActiveItem::new(
self.chart.offset,
0,
item.fun.clone(),
seq,
item.args.clone(),
fid,
lbl,
));
}
}
}
}
}
pub fn new(concrete: GFConcrete, start_cat: String) -> Self {
let mut items = Trie::new();
let chart = Chart::new(concrete.clone());
let mut active_items = Vec::new();
debug_println!("ParseState::new: Looking for start category '{}'", start_cat);
debug_println!("Available start categories: {:?}", concrete.start_cats.keys().collect::<Vec<_>>());
if let Some((start, end)) = concrete.start_cats.get(&start_cat) {
debug_println!("Found start category '{}' with FID range: {} to {}", start_cat, start, end);
for fid in *start..=*end {
let rules = chart.expand_forest(fid);
debug_println!(" FID {}: found {} rules", fid, rules.len());
for rule in rules {
if let Production::Apply(apply) = rule {
match apply.to_apply_fun() {
ApplyFun::CncFun(json_fun) => {
for &lin_idx in &json_fun.lins {
if (lin_idx as usize) < concrete.functions.len() {
let runtime_fun = concrete.functions[lin_idx as usize].clone();
match &runtime_fun.lins {
LinType::Sym(lins) => {
for (lbl, lin) in lins.iter().enumerate() {
active_items.push(ActiveItem::new(
0,
0,
runtime_fun.clone(),
lin.clone(),
apply.args.clone(),
fid,
lbl as i32,
));
}
}
LinType::FId(_) => {
active_items.push(ActiveItem::new(
0,
0,
runtime_fun.clone(),
vec![],
apply.args.clone(),
fid,
0,
));
}
}
}
}
}
ApplyFun::FId(fun_id) => {
if (fun_id as usize) < concrete.functions.len()
{
let runtime_fun = concrete.functions
[fun_id as usize]
.clone();
match &runtime_fun.lins {
LinType::Sym(lins) => {
for (lbl, lin) in
lins.iter().enumerate()
{
active_items.push(
ActiveItem::new(
0,
0,
runtime_fun.clone(),
lin.clone(),
apply.args.clone(),
fid,
lbl as i32,
),
);
}
}
LinType::FId(_) => {
active_items.push(
ActiveItem::new(
0,
0,
runtime_fun,
vec![],
apply.args.clone(),
fid,
0,
),
);
}
}
}
}
}
}
}
}
}
debug_println!("ParseState::new: Created {} initial active items", active_items.len());
for (i, item) in active_items.iter().enumerate() {
debug_println!(" Item {}: fun={}, seq_len={}, fid={}, lbl={}",
i, item.fun.name, item.seq.len(), item.fid, item.lbl);
if !item.seq.is_empty() {
debug_println!(" First symbol: {:?}", item.seq[0]);
}
}
items.insert_chain(&[], active_items);
ParseState { concrete, start_cat, items, chart }
}
pub fn next(&mut self, token: &str) -> bool {
let mut acc =
self.items.lookup(token).cloned().unwrap_or_else(Trie::new);
let mut agenda = self.items.value.clone().unwrap_or_default();
let mut completed_items = Vec::new();
self.process(
&mut agenda,
|fid| match fid {
-1 => Some(Const::new(
Fun::new(format!("\"{token}\""), vec![]),
vec![token.to_string()],
)),
-2 if token.parse::<i32>().is_ok() => Some(Const::new(
Fun::new(token.to_string(), vec![]),
vec![token.to_string()],
)),
-3 if token.parse::<f64>().is_ok() => Some(Const::new(
Fun::new(token.to_string(), vec![]),
vec![token.to_string()],
)),
_ => None,
},
|tokens, item| {
if tokens.first() == Some(&token.to_string()) {
let tokens1 = tokens[1..].to_vec();
if item.dot >= item.seq.len() {
completed_items.push(item.clone());
}
acc.insert_chain1(&tokens1, item);
}
},
);
if !completed_items.is_empty() {
agenda.extend(completed_items);
self.process(
&mut agenda,
|_| None, |_, _| {}, );
}
self.items = acc;
self.chart.shift();
!self.items.is_empty()
}
pub fn complete(&self, current_token: &str) -> CompletionAccumulator {
let mut acc = self
.items
.lookup(current_token)
.cloned()
.unwrap_or_else(Trie::new);
let mut agenda = self.items.value.clone().unwrap_or_default();
let mut clone_self = self.clone();
clone_self.process(
&mut agenda,
|_| None,
|tokens, item| {
if current_token.is_empty()
|| tokens
.first()
.is_some_and(|t| t.starts_with(current_token))
{
let tokens1 = tokens[1..].to_vec();
acc.insert_chain1(&tokens1, item);
}
},
);
CompletionAccumulator { value: acc.value }
}
pub fn extract_trees(&self) -> Vec<Fun> {
let total_fids = self.concrete.total_fids;
let forest = &self.chart.forest;
fn go(
fid: FId,
total_fids: FId,
forest: &HashMap<FId, Vec<Production>>,
) -> Vec<Fun> {
if fid < total_fids {
vec![Fun::new("?".to_string(), vec![])]
} else if let Some(rules) = forest.get(&fid) {
let mut trees = Vec::new();
for rule in rules {
match rule {
Production::Const(c) => trees.push(c.lit.clone()),
Production::Apply(a) => {
let arg_trees: Vec<Vec<Fun>> = a
.args
.iter()
.map(|arg| go(arg.fid, total_fids, forest))
.collect();
let mut indices = vec![0; a.args.len()];
loop {
let mut t = Fun::new(a.get_name(), vec![]);
for (k, idx) in indices.iter().enumerate() {
t.args.push(arg_trees[k][*idx].clone());
}
trees.push(t);
let mut carry = true;
for i in 0..indices.len() {
if carry {
indices[i] += 1;
if indices[i] < arg_trees[i].len() {
carry = false;
} else {
indices[i] = 0;
}
}
}
if carry {
break;
}
}
}
_ => {}
}
}
trees
} else {
vec![]
}
}
let mut trees = Vec::new();
if let Some((start, end)) =
self.concrete.start_cats.get(&self.start_cat)
{
for fid0 in *start..=*end {
let rules = self.chart.expand_forest(fid0);
let mut labels = vec![];
for rule in &rules {
if let Production::Apply(a) = rule {
match a.to_apply_fun() {
ApplyFun::CncFun(fun) => {
labels.extend(0..fun.lins.len() as i32);
}
ApplyFun::FId(fun_id) => {
if (fun_id as usize)
< self.concrete.functions.len()
{
let runtime_fun = &self.concrete.functions
[fun_id as usize];
match &runtime_fun.lins {
LinType::Sym(lins) => {
labels
.extend(0..lins.len() as i32);
}
LinType::FId(indices) => {
labels.extend(
0..indices.len() as i32,
);
}
}
}
}
}
}
}
for lbl in labels {
let mut found = false;
for try_offset in 0..=self.chart.offset {
if let Some(fid) = self.chart.lookup_pc(fid0, lbl, try_offset) {
let arg_trees = go(fid, total_fids, forest);
for tree in arg_trees {
if !trees.contains(&tree) {
trees.push(tree);
}
}
found = true;
break;
}
}
if !found {
for (&forest_fid, productions) in forest.iter() {
if forest_fid >= total_fids {
for production in productions {
if let Production::Apply(apply) = production {
let name = apply.get_name();
if !name.is_empty() && !name.chars().all(|c| c.is_ascii_digit()) {
let arg_trees = go(forest_fid, total_fids, forest);
for tree in arg_trees {
if !trees.contains(&tree) {
trees.push(tree);
}
}
}
}
}
}
}
}
}
}
}
trees
}
}
#[derive(Debug, Clone, PartialEq)]
pub struct RuntimeCncFun {
pub name: String,
pub lins: LinType,
}
impl RuntimeCncFun {
pub fn new(name: String, lins: LinType) -> Self {
RuntimeCncFun { name, lins }
}
}
#[derive(Debug, Clone)]
struct LProduction {
fid: FId,
fun: RuntimeCncFun,
}
#[derive(Debug, Clone)]
pub struct TaggedString {
pub token: String,
pub tag: String,
}
impl TaggedString {
pub fn new(token: &str, tag: &str) -> Self {
TaggedString { token: token.to_string(), tag: tag.to_string() }
}
}
#[derive(Debug, Clone)]
pub struct LinearizedSym {
pub fid: i32,
pub table: Vec<Vec<Sym>>,
}
#[derive(Debug, Clone, PartialEq)]
pub struct ActiveItem {
pub offset: usize,
pub dot: usize,
pub fun: RuntimeCncFun,
pub seq: Vec<Sym>,
pub args: Vec<PArg>,
pub fid: FId,
pub lbl: i32,
}
impl ActiveItem {
pub fn new(
offset: usize,
dot: usize,
fun: RuntimeCncFun,
seq: Vec<Sym>,
args: Vec<PArg>,
fid: FId,
lbl: i32,
) -> Self {
ActiveItem { offset, dot, fun, seq, args, fid, lbl }
}
pub fn is_equal(&self, other: &ActiveItem) -> bool {
self.offset == other.offset
&& self.dot == other.dot
&& self.fun.name == other.fun.name
&& self.seq == other.seq
&& self.args == other.args
&& self.fid == other.fid
&& self.lbl == other.lbl
}
pub fn shift_over_arg(&self, i: usize, fid: FId) -> ActiveItem {
let mut args = self.args.clone();
args[i].fid = fid;
ActiveItem {
offset: self.offset,
dot: self.dot + 1,
fun: self.fun.clone(),
seq: self.seq.clone(),
args,
fid: self.fid,
lbl: self.lbl,
}
}
pub fn shift_over_token(&self) -> ActiveItem {
ActiveItem {
offset: self.offset,
dot: self.dot + 1,
fun: self.fun.clone(),
seq: self.seq.clone(),
args: self.args.clone(),
fid: self.fid,
lbl: self.lbl,
}
}
}
pub fn is_undefined<T>(value: &Option<T>) -> bool {
value.is_none()
}
pub fn map_object<K: Eq + std::hash::Hash + Clone, V, F: Fn(&V) -> U, U>(
obj: &HashMap<K, V>,
fun: F,
) -> HashMap<K, U> {
obj.iter().map(|(k, v)| (k.clone(), fun(v))).collect()
}
impl Type {
pub fn new(args: Vec<String>, cat: String) -> Self {
Type { args, cat }
}
}
impl Apply {
pub fn show(&self, cat: &str) -> String {
format!("{} -> {} [{:?}]", cat, self.get_name(), self.args)
}
pub fn is_equal(&self, obj: &Apply) -> bool {
self.fun == obj.fun && self.args == obj.args
}
}
impl Coerce {
pub fn show(&self, cat: &str) -> String {
format!("{} -> _ [{}]", cat, self.arg)
}
}
impl PArg {
pub fn new(type_: String, hypos: Vec<FId>, fid: FId) -> Self {
PArg { type_, hypos, fid }
}
}
impl Const {
pub fn show(&self, cat: &str) -> String {
format!("{} -> {}", cat, self.lit.print())
}
pub fn is_equal(&self, obj: &Const) -> bool {
self.lit.is_equal(&obj.lit) && self.toks == obj.toks
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::fs;
#[test]
fn test_gfgrammar_new() {
let abstract_ = GFAbstract::new("S".to_string(), HashMap::new());
let concretes = HashMap::new();
let grammar = GFGrammar::new(abstract_, concretes);
assert_eq!(grammar.abstract_grammar.startcat, "S");
}
#[test]
fn test_gfabstract_add_type() {
let mut abstract_ = GFAbstract::new("S".to_string(), HashMap::new());
abstract_.add_type(
"MakeS".to_string(),
vec!["NP".to_string(), "VP".to_string()],
"S".to_string(),
);
assert_eq!(abstract_.get_cat("MakeS"), Some(&"S".to_string()));
assert_eq!(
abstract_.get_args("MakeS"),
Some(&vec!["NP".to_string(), "VP".to_string()])
);
}
#[test]
fn test_parse_tree() {
let abstract_ = GFAbstract::new("S".to_string(), HashMap::new());
let tree = abstract_.parse_tree("MakeS (NP) (VP)", None);
assert!(tree.is_some());
let tree = tree.unwrap();
assert_eq!(tree.name, "MakeS");
assert_eq!(tree.args.len(), 2);
}
#[test]
fn test_handle_literals() {
let abstract_ = GFAbstract::new("S".to_string(), HashMap::new());
let tree = Fun::new("\"test\"".to_string(), vec![]);
let handled = abstract_.handle_literals(tree, "String");
assert_eq!(handled.name, "String_Literal_\"test\"");
}
#[test]
fn test_tree_equality() {
let tree1 = Fun::new(
"Test".to_string(),
vec![Fun::new("Arg1".to_string(), vec![])],
);
let tree2 = Fun::new(
"Test".to_string(),
vec![Fun::new("Arg1".to_string(), vec![])],
);
assert!(tree1.is_equal(&tree2));
let tree3 = Fun::new(
"Test".to_string(),
vec![Fun::new("Arg2".to_string(), vec![])],
);
assert!(!tree1.is_equal(&tree3));
}
#[test]
fn test_import_from_json() {
let json_content = fs::read_to_string("grammars/Hello/Hello.json")
.expect("Failed to read Hello.json");
let json: serde_json::Value =
serde_json::from_str(&json_content).expect("Failed to parse JSON");
let pgf: PGF =
serde_json::from_value(json).expect("Failed to deserialize PGF");
let grammar = GFGrammar::from_json(pgf);
assert_eq!(grammar.abstract_grammar.startcat, "Greeting");
}
#[test]
fn test_parse_tree_from_grammar() {
let json_content = fs::read_to_string("grammars/Hello/Hello.json")
.expect("Failed to read Hello.json");
let json: serde_json::Value =
serde_json::from_str(&json_content).expect("Failed to parse JSON");
let pgf: PGF =
serde_json::from_value(json).expect("Failed to deserialize PGF");
let grammar = GFGrammar::from_json(pgf);
let tree = grammar.abstract_grammar.parse_tree("hello world", None);
assert!(tree.is_some(), "Should be able to parse 'hello world'");
let tree = tree.unwrap();
assert_eq!(tree.name, "hello");
assert_eq!(tree.args.len(), 1);
assert_eq!(tree.args[0].name, "world");
}
#[test]
fn test_linearize_english() {
let json_content = fs::read_to_string("grammars/Food/gf_make_generated.json")
.expect("Failed to read gf_make_generated.json");
let json: serde_json::Value =
serde_json::from_str(&json_content).expect("Failed to parse JSON");
let pgf: PGF =
serde_json::from_value(json).expect("Failed to deserialize PGF");
let grammar = GFGrammar::from_json(pgf);
let fish = Fun { name: "Fish".to_string(), args: vec![], type_: None };
let this_fish = Fun { name: "This".to_string(), args: vec![fish], type_: None };
let delicious = Fun { name: "Delicious".to_string(), args: vec![], type_: None };
let tree = Fun { name: "Is".to_string(), args: vec![this_fish, delicious], type_: None };
let linearized = grammar.concretes["FoodEng"].linearize(&tree);
assert_eq!(linearized, "this fish is delicious");
}
#[test]
#[ignore] fn test_linearize_italian() {
let json_content = fs::read_to_string("grammars/Food/gf_make_generated.json")
.expect("Failed to read gf_make_generated.json");
let json: serde_json::Value =
serde_json::from_str(&json_content).expect("Failed to parse JSON");
let pgf: PGF =
serde_json::from_value(json).expect("Failed to deserialize PGF");
let grammar = GFGrammar::from_json(pgf);
let tree = grammar
.abstract_grammar
.parse_tree("this fish is delicious", None)
.expect("Failed to parse tree");
let linearized = grammar.concretes["FoodEng"].linearize(&tree);
assert_eq!(linearized, "this fish is delicious");
}
}