1
2extern crate regex;
3use std::collections::HashMap;
4use std::io::BufRead;
5use std::io::Lines;
6pub fn version() -> String{
14 return "1.1.3".to_string();
15}
16
17pub fn get_reserved_matchers() -> Vec<(String,regex::Regex)>
70{
71 let mut retvals:HashMap<String,regex::Regex> = HashMap::new();
72 retvals.insert(
73 "Date".to_string(),
74 regex::Regex::new(r"^\d{4}-\d{2}-\d{2}$").unwrap());
75 retvals.into_iter().collect()
76}
77
78pub fn read_kvc_line_default( input_line: &String ) ->
80(
81 Vec<(String,f32)>,
82 Vec<(String,String)>
83)
84{
85 read_kvc_line( input_line, &get_reserved_matchers(),&"")
86}
87
88pub fn read_kvc_line( line: &String, keywords: &Vec<(String,regex::Regex)>, start_sequence: &str) ->
90(
91 Vec<(String,f32)>,
92 Vec<(String,String)>
93)
94{
95 if line.len()==0 {
96 return (
97 vec![],
98 vec![]
99 );
100 }
101 let mut line_strings: HashMap<String,String> = HashMap::new();
102 let mut line_counter: HashMap<String,f32> = HashMap::new();
103 let input_line = match start_sequence.len()>0{
104 true=>{
105 let mut strings = line.split(start_sequence);
106 let _ = strings.next();
107 strings.collect()
108 },
109 false=>line.clone(),
110 };
111 let mut tok_iter = input_line.split_whitespace();
112 'nexttok: while let Some(kvpair) = tok_iter.next(){
113
114 assert!(kvpair.len() > 0);
116 if kvpair.chars().next().unwrap()=='#'{
117 break;
118 }
119 let mut kvitr = kvpair.split(":");
120 if let Some(key)=kvitr.next(){
121 for (name,matcher) in keywords{
124 if matcher.is_match(key)
125 {
126 line_strings.insert(name.clone(),key.to_string().clone());
127 continue 'nexttok;
128 }
129 }
130
131 let val=match kvitr.next(){
137 None=>1.0,
138 Some(s)=>{
139 if let Ok(f_val) = s.parse::<f32>(){
140 f_val
141 } else {
142 eprintln!("Got a non-accumulator (int/float) here: {}:{}",key,s);
143 continue 'nexttok;
144 }
145 },
146 };
147 let countref = line_counter.entry(key.to_string()).or_insert(0.0);
148 *countref = *countref + val;
149 } else {
150 panic!("Bug! Cannot process: '{}' from '{}'",kvpair,input_line);
151 }
152 }
153 return (
154 line_counter.into_iter().collect(),
155 line_strings.into_iter().collect(),
156 );
157}
158
159pub fn load_table_from_kvc_stream<B:BufRead> (
160 lines_input:Lines<B>,
161 keywords :&Vec<(String,regex::Regex)> ,
162 start_sequence: &str
163)->
164(
165 (usize,usize), Vec<((usize,usize),String)> , Vec<String> )
169{
170 let mut rows = 0;
171 let mut col_to_name: HashMap<usize,String> = HashMap::new();
172 let mut name_to_col: HashMap<String,usize> = HashMap::new();
173 let mut string_entries: HashMap< (usize,usize), String> = HashMap::new();
174
175 for line_res in lines_input{
176 let line = match line_res{
178 Ok(l)=>l,
179 Err(_)=> continue,
180 };
181 let (key_counts,key_strings)=read_kvc_line(&line,&keywords,start_sequence);
183
184 if key_counts.len() + key_strings.len()==0
186 {
187 continue;
188 }
189
190 for (key,val) in key_strings{
192 let colsize = name_to_col.len();
193 let colidx = name_to_col.entry(key.to_string()).or_insert(colsize);
194 col_to_name.insert(*colidx,key.to_string());
195 string_entries.insert( (rows,*colidx), val);
196 }
197 for (key,count) in key_counts{
198 let colsize = name_to_col.len();
199 let colidx = name_to_col.entry(key.to_string()).or_insert(colsize);
200 col_to_name.insert(*colidx,key.to_string());
201 string_entries.insert( (rows,*colidx), count.to_string());
202 }
203 rows+=1;
205 }
206
207 let cols = col_to_name.len();
209 let mut col_names:Vec<String> = vec!["".to_string(); cols];
210 for (idx,name) in col_to_name{
211 assert!(col_names[idx].len()==0,"Found non-zero column name! Error in read_kvc_line?");
212 col_names[idx]+=&name.to_string();
213 }
214 for idx in 0..cols{
215 assert!(col_names[idx].len()!=0,"Found zero-length column name! Error in read_kvc_line?")
216 }
217
218 return (
219 (rows,cols),
220 string_entries.into_iter().collect(),
221 col_names
222 )
223}
224
225pub fn load_table_from_kvc_stream_default<B:BufRead> (lines_input:Lines<B>)->
226(
227 (usize,usize),
228 Vec<((usize,usize),String)> , Vec<String> )
231{
232 return load_table_from_kvc_stream(lines_input, &get_reserved_matchers(),&"");
233}
234
235#[cfg(test)]
236mod tests{
237use super::*;
238use std::io::Cursor;
239
240 #[test]
241 fn keywords_are_returned(){
242 assert_eq!(get_reserved_matchers().len(),1);
243 let (name,_) = get_reserved_matchers().into_iter().next().unwrap();
244 assert_eq!(name,"Date");
245 }
246
247 #[test]
248 fn line_accepts_keywords(){
249 let mut keywords = get_reserved_matchers();
250 keywords.push(
251 ( "One-plus-one".to_string(), regex::Regex::new(r"^\d{1}\+\d{1}$").unwrap()) );
252 let (counts,strs) =read_kvc_line(&" 2021-01-01 \n 1+1 ".to_string(),&keywords,&"");
253 assert_eq!(counts.len(),0);
254 assert_eq!(strs.len(),2);
255 for pairing in strs{
256 let (name,val) = pairing;
257 match &name[..]{
258 "One-plus-one"=>assert_eq!(val,"1+1"),
259 "Date"=>assert_eq!(val,"2021-01-01"),
260 _=>assert!(false,"Unknown value!")
261 }
262 }
263 }
264
265 #[test]
266 fn line_gets_date(){
267 let (counts,strs) =read_kvc_line_default(&" 2021-01-01 ".to_string());
268 assert_eq!(strs.len(),1);
269 assert_eq!(counts.len(),0);
270 assert_eq!(strs[0],("Date".to_string(),"2021-01-01".to_string()));
271 }
272
273 #[test]
274 fn line_counts_tokens(){
275 let (counts,strs) =read_kvc_line_default(&" A A A B B C Z:4 Y:2 Y:3 ".to_string());
276 assert_eq!(strs.len(),0);
277 assert_eq!(counts.len(),5);
278 for (key,val) in counts{
279 match &key[..]{
280 "A"=>assert_eq!(val,3.0),
281 "B"=>assert_eq!(val,2.0),
282 "C"=>assert_eq!(val,1.0),
283 "Y"=>assert_eq!(val,5.0),
284 "Z"=>assert_eq!(val,4.0),
285 _=>panic!("Found unexpected token:{}",key)
286 }
287 }
288 }
289
290 #[test]
291 fn line_ignores_comments(){
292 let (counts,strs) =read_kvc_line_default(&" A # A A B B C Z:4 Y:2 Y:3 ".to_string());
293 assert_eq!(strs.len(),0);
294 assert_eq!(counts.len(),1);
295 for (key,val) in counts{
296 match &key[..]{
297 "A"=>assert_eq!(val,1.0),
298 _=>panic!("Found unexpected token:{}",key)
299 }
300 }
301 }
302
303 #[test]
304 fn table_size(){
305 let data =Cursor::new( "A # NO\n A A # \n A A A\n\n" );
306 let ( (r,c) ,_entries,names)=load_table_from_kvc_stream_default(data.lines());
307 assert_eq!(r,3);
308 assert_eq!(c,1);
309 assert_eq!(names[0],"A");
310 assert_eq!(names.len(),c);
311 }
312 #[test]
313 fn date_matches_only_date(){
314 let data=Cursor::new(" 2021-01-01AAAAAA \n 2021-01-012021-01-02 \n 2021-02-02 ");
315 let ((r,c), entries,names)=load_table_from_kvc_stream_default(data.lines());
316 assert_eq!(r,3);
318 assert_eq!(c,3);
319 assert_eq!(names[0],"2021-01-01AAAAAA");
320 assert_eq!(names[1],"2021-01-012021-01-02");
321 assert_eq!(names[2],"Date");
322 assert_eq!(entries.len(),3);
323
324 for (idx, entry) in entries{
325 eprintln!("Checking {}",entry);
326 match idx{
327 (0,0)=>assert_eq!(entry,(1.0).to_string()),
328 (1,1)=>assert_eq!(entry,(1.0).to_string()),
329 (2,2)=>assert_eq!(entry,"2021-02-02"),
330 _=>{
331 let (i,j)=idx;
332 panic!("Found unexpected entry: ({},{}) {}",i,j,entry);
333 }
334 }
335 }
336 }
337}