1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
use crate::{
cli_options::IndexingOptions,
score::{Score, ScoreCalculator, ScoringAlgo},
PresentationOptions,
};
use std::collections::HashMap;
trait Indexer {
fn create_index();
}
pub type LineNumber = usize;
pub type LineScores = HashMap<LineNumber, Score>;
pub type IndexContent = HashMap<LineNumber, LineScores>;
#[derive(Debug)]
pub struct Index {
index: IndexContent,
calculator: ScoreCalculator,
}
impl Index {
pub fn new(algo: ScoringAlgo) -> Index {
Index {
index: IndexContent::new(),
calculator: ScoreCalculator::new(algo),
}
}
pub fn create_index(&mut self, content: &Vec<&str>, opts: &IndexingOptions) {
for i in 0..content.len() {
self.process_line(&content, i, opts);
}
}
pub fn normalize(mut str: String, opts: &IndexingOptions) -> String{
if !opts.case_sensitive {
str = str.to_lowercase();
}
if opts.ignore_spaces {
str = str.replace(" ", "");
}
str
}
pub fn process_line(&mut self, content: &Vec<&str>, n: LineNumber, opts: &IndexingOptions) {
let base = Index::normalize(String::from(content[n]), opts);
for i in (n + 1)..=(n + opts.scope).min(content.len() - 1) {
let normalized_content = Index::normalize(String::from(content[i]), opts);
let score = self.calculator.get_score(&base, &normalized_content);
let m = self.index.entry(n).or_insert(LineScores::new());
m.insert(i, score);
}
}
pub fn get_index_content(&self) -> &IndexContent {
&self.index
}
pub fn get_match_count(&self, n: &LineNumber, opts: &PresentationOptions) -> usize {
match self.get_matches(n, opts) {
None => 0,
Some(s) => s.len()
}
}
pub fn get_matches (&self, n: &LineNumber, opts: &PresentationOptions) -> Option<Vec<LineNumber>> {
let mut res: Vec<LineNumber> = Vec::new();
match self.index.get(n) {
None => None,
Some(line_scores) => {
line_scores.iter()
.filter(|item| item.1 <= &opts.threshold)
.for_each( |x| res.push( (*x.0).clone()) );
Some(res)
},
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_full() {
let data = String::from(vec!["Line1", "Line2", "LIne3", "LINe4", "MEGA JUNK"].join("\n"));
let content: Vec<&str> = data.split("\n").collect();
let mut index = Index::new(ScoringAlgo::Levenshtein);
index.create_index(&content, &IndexingOptions::default());
println!("{:#?}", index.get_index_content());
}
#[test]
fn test_matches() {
let data = String::from(vec!["Line1", "Line2", "LIne3", "LINe4", "MEGA JUNK"].join("\n"));
let content: Vec<&str> = data.split("\n").collect();
let mut index = Index::new(ScoringAlgo::Levenshtein);
let index_options = IndexingOptions {
scope: 10,
..IndexingOptions::default()
};
index.create_index(&content, &index_options);
let mut matches = index.get_matches(&0, &PresentationOptions::default()).unwrap();
matches.sort();
assert_eq!(matches, vec![1,2,3]);
assert_eq!(index.get_match_count(&0, &PresentationOptions::default()), 3);
}
}