use std::path::PathBuf;
use std::fs;
use super::chardrip::CharDrip;
use super::tokenisers;
pub struct TrigramReader {
token_reader : Box<dyn tokenisers::TokenReader>,
pub prestrings : Vec<String>,
pub tokens : Vec<String>,
}
impl TrigramReader {
pub fn new (file : &PathBuf) -> TrigramReader {
if let Ok(contents) = fs::read_to_string (file.as_path ()) {
let dripper = CharDrip::new (contents.chars().collect ());
let extn = file.extension().unwrap ();
TrigramReader {
token_reader: tokenisers::make_token_reader (extn, dripper),
prestrings: vec![String::from(""), String::from(""), String::from("")],
tokens: vec![String::from(""), String::from(""), String::from("")]
}
} else {
panic! ("Could not open file");
}
}
pub fn last_trigram (&self) -> String {
format!("{} {} {}", &self.tokens[0], &self.tokens[1], &self.tokens[2])
}
pub fn read_trigram (&mut self) -> bool {
loop {
match &self.token_reader.read_token () {
Some(result) => {
push_to_triple (&mut self.prestrings, result.prestring.clone ());
push_to_triple (&mut self.tokens, result.token.clone ());
if &self.tokens[0] != "" { return true;
}
},
None => return false,
}
}
}
}
fn push_to_triple (items : &mut Vec<String>, item : String) {
debug_assert!(items.len() == 3, "Prestrings/Tokens must be length 3");
items.remove (0);
items.push (item);
}
#[cfg(test)]
mod tests {
use std::ffi::OsStr;
use super::*;
#[test]
fn test_java_trigram_reader () {
let tests = vec![
("int x+=3;", vec!["int x +=", "x += 3", "+= 3 ;"]),
];
test_trigram_reader (&tests, "java".to_string ());
}
#[test]
fn test_lisp_trigram_reader () {
let tests = vec![
("(a)", vec!["( a )"]),
("( a )", vec!["( a )"]),
("( a b )", vec!["( a b", "a b )"]),
("(abc b)", vec!["( abc b", "abc b )"]),
];
test_trigram_reader (&tests, "ss".to_string ());
}
#[test]
fn test_text_trigram_reader () {
let tests = vec![
("", vec![]),
("a", vec![]),
("a b", vec![]),
("a b c", vec!["a b c"]),
("some. words, with, punctuation 123 Numbers",
vec!["some words with", "words with punctuation", "with punctuation numbers"]),
];
test_trigram_reader (&tests, "txt".to_string ());
}
fn test_trigram_reader (tests : &Vec<(&str, Vec<&str>)>, extn : String) {
for (text, targets) in tests.iter () {
let mut reader = TrigramReader {
token_reader: tokenisers::make_token_reader (&OsStr::new(&extn), CharDrip::new (text.chars().collect ())),
prestrings: vec![String::from(""), String::from(""), String::from("")],
tokens: vec![String::from(""), String::from(""), String::from("")]
};
let mut results = vec![];
while reader.read_trigram () {
results.push(reader.last_trigram());
}
assert_eq! (results.len(), targets.len ());
for i in 0..results.len() {
assert_eq!(results[i], targets[i]);
}
}
}
}