use crate::constants::*;
use crate::transform::{DependencyGraph, Transform};
pub struct RemoveAlternatives;
impl Transform for RemoveAlternatives {
fn transform(&self, graph: &dyn DependencyGraph, node: usize) -> String {
let token = graph.token(node);
let mut lemma = token.lemma();
if token.xpos().starts_with(PUNCTUATION_PREFIX)
|| token.xpos() == NON_WORD_TAG
|| token.xpos() == FOREIGN_WORD_TAG
{
return lemma.to_owned();
}
if let Some(idx) = lemma.find('|') {
lemma = &lemma[..idx];
}
lemma.to_owned()
}
}
pub struct RemoveReflexiveTag;
impl Transform for RemoveReflexiveTag {
fn transform(&self, graph: &dyn DependencyGraph, node: usize) -> String {
let token = graph.token(node);
let lemma = token.lemma();
if token.xpos() == REFLEXIVE_PERSONAL_PRONOUN_TAG {
return token.form().to_lowercase();
}
lemma.to_owned()
}
}
pub struct RemoveSepVerbPrefix;
impl Transform for RemoveSepVerbPrefix {
fn transform(&self, graph: &dyn DependencyGraph, node: usize) -> String {
let token = graph.token(node);
let mut lemma = token.lemma();
if is_verb(token.xpos()) {
if let Some(idx) = lemma.rfind('#') {
lemma = &lemma[idx + 1..];
}
}
lemma.to_owned()
}
}
pub struct RemoveTruncMarker;
impl Transform for RemoveTruncMarker {
fn transform(&self, graph: &dyn DependencyGraph, node: usize) -> String {
let token = graph.token(node);
let lemma = token.lemma();
if token.xpos() != TRUNCATED_TAG {
return lemma.to_owned();
}
if token.upos() == "NOUN" {
token.form().to_owned()
} else {
token.form().to_lowercase()
}
}
}
#[cfg(test)]
mod tests {
use crate::transform::test_helpers::run_test_cases;
use super::{RemoveSepVerbPrefix, RemoveTruncMarker};
#[test]
pub fn remove_sep_verb_prefix() {
run_test_cases("testdata/remove-sep-verb-prefix.test", RemoveSepVerbPrefix);
}
#[test]
pub fn remove_trunc_marker() {
run_test_cases("testdata/remove-trunc-marker.test", RemoveTruncMarker);
}
}