1use std::path::PathBuf;
4use std::fs;
5
6use super::chardrip::CharDrip;
7use super::tokenisers;
8
9pub struct TrigramReader {
12 token_reader : Box<dyn tokenisers::TokenReader>,
13 pub prestrings : Vec<String>,
14 pub tokens : Vec<String>,
15}
16
17impl TrigramReader {
18 pub fn new (file : &PathBuf) -> TrigramReader {
20 if let Ok(contents) = fs::read_to_string (file.as_path ()) {
21 let dripper = CharDrip::new (contents.chars().collect ());
22 let extn = file.extension().unwrap ();
23 TrigramReader {
24 token_reader: tokenisers::make_token_reader (extn, dripper),
25 prestrings: vec![String::from(""), String::from(""), String::from("")],
26 tokens: vec![String::from(""), String::from(""), String::from("")]
27 }
28 } else {
29 panic! ("Could not open file");
30 }
31 }
32
33 pub fn last_trigram (&self) -> String {
35 format!("{} {} {}", &self.tokens[0], &self.tokens[1], &self.tokens[2])
36 }
37
38 pub fn read_trigram (&mut self) -> bool {
41 loop {
42 match &self.token_reader.read_token () {
43 Some(result) => {
44 push_to_triple (&mut self.prestrings, result.prestring.clone ());
45 push_to_triple (&mut self.tokens, result.token.clone ());
46
47 if &self.tokens[0] != "" { return true;
49 }
50 },
51 None => return false,
52 }
53 }
54 }
55}
56
57fn push_to_triple (items : &mut Vec<String>, item : String) {
60 debug_assert!(items.len() == 3, "Prestrings/Tokens must be length 3");
61 items.remove (0);
62 items.push (item);
63}
64
65
66#[cfg(test)]
67mod tests {
68 use std::ffi::OsStr;
69 use super::*;
70
71 #[test]
72 fn test_java_trigram_reader () {
73 let tests = vec![
74 ("int x+=3;", vec!["int x +=", "x += 3", "+= 3 ;"]),
75 ];
76 test_trigram_reader (&tests, "java".to_string ());
77 }
78
79 #[test]
80 fn test_lisp_trigram_reader () {
81 let tests = vec![
82 ("(a)", vec!["( a )"]),
83 ("( a )", vec!["( a )"]),
84 ("( a b )", vec!["( a b", "a b )"]),
85 ("(abc b)", vec!["( abc b", "abc b )"]),
86 ];
87 test_trigram_reader (&tests, "ss".to_string ());
88 }
89
90 #[test]
91 fn test_text_trigram_reader () {
92 let tests = vec![
93 ("", vec![]),
94 ("a", vec![]),
95 ("a b", vec![]),
96 ("a b c", vec!["a b c"]),
97 ("some. words, with, punctuation 123 Numbers",
98 vec!["some words with", "words with punctuation", "with punctuation numbers"]),
99 ];
100 test_trigram_reader (&tests, "txt".to_string ());
101 }
102
103 fn test_trigram_reader (tests : &Vec<(&str, Vec<&str>)>, extn : String) {
104 for (text, targets) in tests.iter () {
105 let mut reader = TrigramReader {
106 token_reader: tokenisers::make_token_reader (&OsStr::new(&extn), CharDrip::new (text.chars().collect ())),
107 prestrings: vec![String::from(""), String::from(""), String::from("")],
108 tokens: vec![String::from(""), String::from(""), String::from("")]
109 };
110 let mut results = vec![];
111 while reader.read_trigram () {
112 results.push(reader.last_trigram());
113 }
114
115 assert_eq! (results.len(), targets.len ());
116 for i in 0..results.len() {
117 assert_eq!(results[i], targets[i]);
118 }
119 }
120 }
121}