use crate::symbols::*;
use anyhow::{Ok, Result};
use nar_dev_utils::list;
use narsese::{
api::NarseseOptions,
conversion::inter_type::lexical_fold::TryFoldInto,
lexical::{Budget, Narsese, Punctuation, Stamp, Term, Truth},
};
use pest::{iterators::Pair, Parser};
use pest_derive::Parser;
type MidParseResult = NarseseOptions<Budget, Term, Punctuation, Stamp, Truth>;
#[derive(Parser)] #[grammar = "src/language/dialect/narust_dialect.pest"]
pub struct DialectParser;
pub fn parse_lexical(input: &str) -> Result<Narsese> {
let pair = DialectParser::parse(Rule::narsese, input)?.next().unwrap();
let folded = fold_pest(pair)?;
Ok(folded)
}
pub fn parse_term(input: &str) -> Result<super::Term> {
let pair = DialectParser::parse(Rule::narsese, input)?.next().unwrap();
let folded = fold_pest(pair)?.try_into_term()?;
let term = folded.try_fold_into(&())?;
Ok(term)
}
fn fold_pest(pest_parsed: Pair<Rule>) -> Result<Narsese> {
let mut mid_result = MidParseResult {
budget: None,
term: None,
punctuation: None,
stamp: None,
truth: None,
};
fold_pest_procedural(pest_parsed, &mut mid_result)?;
match mid_result.fold() {
Some(narsese) => Ok(narsese),
None => Err(anyhow::anyhow!("无效的中间结果")),
}
}
fn fold_pest_procedural(pair: Pair<Rule>, result: &mut MidParseResult) -> Result<()> {
match pair.as_rule() {
Rule::punctuation => result.punctuation = Some(pair.as_str().into()),
Rule::stamp => result.stamp = Some(pair.as_str().into()),
Rule::truth => result.truth = Some(fold_pest_truth(pair)?),
Rule::sentence => {
for pair in pair.into_inner() {
fold_pest_procedural(pair, result)?;
}
}
Rule::budget => result.budget = Some(fold_pest_budget(pair)?),
Rule::task => {
for pair in pair.into_inner() {
fold_pest_procedural(pair, result)?;
}
}
Rule::atom | Rule::compound_unary | Rule::compound_binary | Rule::compound_multi => {
let folded = fold_pest_term(pair)?;
let term = reform_term(folded);
result.term = Some(term);
}
_ => unreachable!("仅出现在内部解析的不可达规则!{:?} {pair}", pair.as_rule()),
}
Ok(())
}
#[inline]
fn fold_pest_truth(pair: Pair<Rule>) -> Result<Truth> {
let mut v = Truth::new();
for pair_value_str in pair.into_inner() {
v.push(pair_value_str.as_str().to_string());
}
Ok(v)
}
#[inline]
fn fold_pest_budget(pair: Pair<Rule>) -> Result<Budget> {
let mut v = Budget::new();
for pair_value_str in pair.into_inner() {
v.push(pair_value_str.as_str().to_string());
}
Ok(v)
}
fn fold_pest_term(pair: Pair<Rule>) -> Result<Term> {
match pair.as_rule() {
Rule::atom => fold_pest_atom(pair),
Rule::compound_unary => fold_pest_compound_unary(pair),
Rule::compound_binary => fold_pest_compound_binary(pair),
Rule::compound_multi => fold_pest_compound_multi(pair),
_ => unreachable!("词项只有可能是原子与复合 | {pair}"),
}
}
#[inline]
fn fold_pest_atom(pair: Pair<Rule>) -> Result<Term> {
let mut prefix = String::new();
let mut name = String::new();
for pair in pair.into_inner() {
let pair_str = pair.as_str();
match pair.as_rule() {
Rule::symbol_normal | Rule::symbol_raw_value => prefix.push_str(pair_str),
Rule::name_normal | Rule::name_raw_value => {
let mut chars = pair_str.chars();
for c in chars.by_ref() {
match c {
'_' => prefix.push('_'),
_ => {
name.push(c);
break;
}
}
}
for c in chars {
name.push(c)
}
}
_ => unreachable!("不可达规则 @ 原子词项 {:?} {pair}", pair.as_rule()),
}
}
Ok(Term::Atom { prefix, name })
}
fn fold_pest_compound_unary(pair: Pair<Rule>) -> Result<Term> {
let mut pairs = pair.into_inner();
let connecter = pairs.next().unwrap().as_str().to_string();
let terms = vec![fold_pest_term(pairs.next().unwrap())?];
Ok(Term::Compound { connecter, terms })
}
fn fold_pest_compound_binary(pair: Pair<Rule>) -> Result<Term> {
let mut pairs = pair.into_inner();
let subject = fold_pest_term(pairs.next().unwrap())?;
let copula = pairs.next().unwrap().as_str();
let predicate = fold_pest_term(pairs.next().unwrap())?;
Ok(Term::new_statement(copula, subject, predicate))
}
fn fold_pest_compound_multi(pair: Pair<Rule>) -> Result<Term> {
let mut pairs = pair.into_inner();
let connecter = pairs.next().unwrap().as_str().to_string();
let terms = list![
(fold_pest_term(pair)?)
for pair in (pairs)
];
Ok(Term::Compound { connecter, terms })
}
fn reform_term(original: Term) -> Term {
use Term::*;
match original {
Atom { prefix, name } => Atom { prefix, name },
Set { .. } => unreachable!("集合词项不应出现在此处!"),
Statement {
copula,
subject,
predicate,
} => match super::Term::is_statement_identifier(&copula) {
true => Statement {
copula,
subject: Box::new(reform_term(*subject)),
predicate: Box::new(reform_term(*predicate)),
},
false => match copula.as_str() {
SET_EXT_OPERATOR => Set {
left_bracket: SET_EXT_OPENER.into(),
terms: vec![reform_term(*subject), reform_term(*predicate)],
right_bracket: SET_EXT_CLOSER.into(),
},
SET_INT_OPERATOR => Set {
left_bracket: SET_INT_OPENER.into(),
terms: vec![reform_term(*subject), reform_term(*predicate)],
right_bracket: SET_INT_CLOSER.into(),
},
_ => Compound {
connecter: copula,
terms: vec![reform_term(*subject), reform_term(*predicate)],
},
},
},
Compound { connecter, terms } => match connecter.as_str() {
SET_EXT_OPERATOR => Set {
left_bracket: SET_EXT_OPENER.into(),
terms: terms.into_iter().map(reform_term).collect(),
right_bracket: SET_EXT_CLOSER.into(),
},
SET_INT_OPERATOR => Set {
left_bracket: SET_INT_OPENER.into(),
terms: terms.into_iter().map(reform_term).collect(),
right_bracket: SET_INT_CLOSER.into(),
},
_ => Compound {
connecter,
terms: terms.into_iter().map(reform_term).collect(),
},
},
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::{util::AResult, util::ToDisplayAndBrief};
use narsese::{
api::NarseseValue, conversion::string::impl_lexical::format_instances::FORMAT_ASCII,
};
#[test]
fn test_dialect_parser() -> AResult {
let narseses = r#"
word
$i_var
#d_var
?q_var
137
go-to
{}(SELF)
[](good)
&(a b)
|(a b)
(a - b)
(a ~ b)
*({}(SELF) [](good))
\(a _ b)
/(D _ D)
&&(a b ||(a b c))
(-- neg)
(swam --> bird)
('文字,文字'-->'/* ~标点 --> 符号! */')
(a`<=>`b)
((a ==> b)<->( a <=> b ))
((a {-- b) {-] (a --] b))
(a`一段文字,但实际上是陈述系词`b)!
$$ &/(('@v@'-->b) (b`继承`c) *(b (^c <=> d)) +1 (-- n)). :|: %1.0; 0.9%
"#
.split('\n')
.map(str::trim)
.filter(|l| !l.is_empty());
for narsese in narseses {
let parsed = parse_lexical(narsese).expect("pest解析失败!");
if let NarseseValue::Term(term) = parsed {
let parsed_term = crate::language::Term::from_lexical(term)?;
println!(" {narsese:?}\n => {:?}", parsed_term.to_display_long());
}
else {
let parsed_str = FORMAT_ASCII.format_narsese(&parsed);
println!(" {narsese:?}\n => {:?}", parsed_str);
}
}
Ok(println!("测试完毕!"))
}
}