#[derive(Debug, Clone, PartialEq, Eq)]
pub struct EnglishNp {
pub head: String,
pub number: String,
pub adjective: Option<String>,
}
impl EnglishNp {
fn bare(head: String, number: String) -> Self {
EnglishNp { head, number, adjective: None }
}
}
#[derive(Debug, Clone, Default, PartialEq, Eq)]
pub struct EnglishClause {
pub subject: Option<EnglishNp>,
pub verb: Option<String>,
pub verb_person: String,
pub object: Option<EnglishNp>,
}
fn is_article(t: &str) -> bool {
matches!(t, "the" | "a" | "an")
}
fn subject_pronoun(t: &str) -> Option<(&'static str, bool)> {
Some(match t {
"i" => ("1", false),
"we" => ("1", true),
"you" => ("2", false),
"he" | "she" | "it" => ("3", false),
"they" => ("3", true),
_ => return None,
})
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Prepared {
pub person: String,
pub pronoun_subject: bool,
pub tokens: Vec<String>,
}
pub fn prepare(text: &str) -> Prepared {
let mut tokens: Vec<String> = text
.split(|c: char| !c.is_alphanumeric() && c != '\'')
.filter(|t| !t.is_empty())
.map(|t| t.to_lowercase())
.filter(|t| !is_article(t))
.collect();
let mut person = "3".to_string();
let mut pronoun_subject = false;
if let Some(first) = tokens.first() {
if let Some((p, _plural)) = subject_pronoun(first) {
person = p.to_string();
pronoun_subject = true;
tokens.remove(0);
}
}
Prepared { person, pronoun_subject, tokens }
}
pub fn depluralize(noun: &str) -> (String, bool) {
let lower = noun;
if let Some(stem) = lower.strip_suffix("ies") {
if stem.len() >= 3 {
return (format!("{stem}y"), true);
}
}
for suf in ["ses", "xes", "zes", "ches", "shes"] {
if let Some(stem) = lower.strip_suffix(suf) {
return (format!("{stem}{}", &suf[..suf.len() - 2]), true);
}
}
if lower.ends_with("ss") {
return (lower.to_string(), false);
}
if let Some(stem) = lower.strip_suffix('s') {
if stem.len() >= 2 {
return (stem.to_string(), true);
}
}
(lower.to_string(), false)
}
pub fn delemmatize_verb(verb: &str) -> String {
if let Some(stem) = verb.strip_suffix("ies") {
if stem.len() >= 3 {
return format!("{stem}y"); }
}
for suf in ["ses", "xes", "zes", "ches", "shes"] {
if let Some(stem) = verb.strip_suffix(suf) {
return format!("{stem}{}", &suf[..suf.len() - 2]); }
}
if verb.ends_with("ss") {
return verb.to_string();
}
if let Some(stem) = verb.strip_suffix('s') {
if stem.len() >= 2 {
return stem.to_string();
}
}
verb.to_string()
}
pub fn analyze(text: &str) -> EnglishClause {
let tokens: Vec<String> = text
.split(|c: char| !c.is_alphanumeric() && c != '\'')
.filter(|t| !t.is_empty())
.map(|t| t.to_lowercase())
.filter(|t| !is_article(t))
.collect();
let mut clause = EnglishClause { verb_person: "3".into(), ..Default::default() };
if tokens.is_empty() {
return clause;
}
let mut rest = tokens.as_slice();
if let Some((person, _plural)) = subject_pronoun(&tokens[0]) {
clause.verb_person = person.to_string();
rest = &tokens[1..];
match rest {
[v] => clause.verb = Some(delemmatize_verb(v)),
[v, o] => {
clause.verb = Some(delemmatize_verb(v));
let (lemma, plural) = depluralize(o);
clause.object = Some(EnglishNp::bare(lemma, number(plural)));
}
[v, rest_obj @ ..] if !rest_obj.is_empty() => {
clause.verb = Some(delemmatize_verb(v));
let o = rest_obj.last().unwrap();
let (lemma, plural) = depluralize(o);
clause.object = Some(EnglishNp::bare(lemma, number(plural)));
}
_ => {}
}
return clause;
}
match rest {
[s, v] => {
let (lemma, plural) = depluralize(s);
clause.subject = Some(EnglishNp::bare(lemma, number(plural)));
clause.verb = Some(delemmatize_verb(v));
}
[s, v, o, ..] => {
let (slemma, splural) = depluralize(s);
clause.subject = Some(EnglishNp::bare(slemma, number(splural)));
clause.verb = Some(delemmatize_verb(v));
let (olemma, oplural) = depluralize(o);
clause.object = Some(EnglishNp::bare(olemma, number(oplural)));
}
[s] => {
let (lemma, plural) = depluralize(s);
clause.subject = Some(EnglishNp::bare(lemma, number(plural)));
}
_ => {}
}
clause
}
fn number(plural: bool) -> String {
if plural { "pl".into() } else { "sg".into() }
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn svo_with_articles() {
let c = analyze("the bird sees the stone");
assert_eq!(c.subject, Some(EnglishNp::bare("bird".into(), "sg".into())));
assert_eq!(c.verb.as_deref(), Some("see"));
assert_eq!(c.object, Some(EnglishNp::bare("stone".into(), "sg".into())));
assert_eq!(c.verb_person, "3");
}
#[test]
fn plural_subject_and_object() {
let c = analyze("birds see stones");
assert_eq!(c.subject, Some(EnglishNp::bare("bird".into(), "pl".into())));
assert_eq!(c.verb.as_deref(), Some("see"));
assert_eq!(c.object, Some(EnglishNp::bare("stone".into(), "pl".into())));
}
#[test]
fn pronoun_subject_sets_person() {
let c = analyze("I see the stone");
assert!(c.subject.is_none());
assert_eq!(c.verb_person, "1");
assert_eq!(c.verb.as_deref(), Some("see"));
assert_eq!(c.object, Some(EnglishNp::bare("stone".into(), "sg".into())));
}
#[test]
fn intransitive() {
let c = analyze("the warrior sleeps");
assert_eq!(c.subject, Some(EnglishNp::bare("warrior".into(), "sg".into())));
assert_eq!(c.verb.as_deref(), Some("sleep"));
assert!(c.object.is_none());
}
#[test]
fn tricky_plurals() {
assert_eq!(depluralize("boxes"), ("box".into(), true));
assert_eq!(depluralize("cities"), ("city".into(), true));
assert_eq!(depluralize("grass"), ("grass".into(), false));
assert_eq!(depluralize("stone"), ("stone".into(), false));
}
#[test]
fn empty_is_empty() {
let c = analyze(" ");
assert!(c.subject.is_none() && c.verb.is_none() && c.object.is_none());
}
}