1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
//! The indexer is parsing the input and creates an index that can be re-used.
//! This will be useful for big files.

use crate::{
    cli_options::IndexingOptions,
    score::{Score, ScoreCalculator, ScoringAlgo},
    PresentationOptions,
};
use std::collections::HashMap;

trait Indexer {
    fn create_index();
}

pub type LineNumber = usize;
pub type LineScores = HashMap<LineNumber, Score>;
pub type IndexContent = HashMap<LineNumber, LineScores>;

/// Structure of our index.
#[derive(Debug)]
pub struct Index {
    // algo: ScoringAlgo,
    index: IndexContent,
    calculator: ScoreCalculator,
}

impl Index {
    pub fn new(algo: ScoringAlgo) -> Index {
        Index {
            // algo: ScoringAlgo::Levenshtein,
            index: IndexContent::new(),
            calculator: ScoreCalculator::new(algo),
        }
    }

    pub fn create_index(&mut self, content: &Vec<&str>, opts: &IndexingOptions) {
        for i in 0..content.len() {
            self.process_line(&content, i, opts);
        }
    }

    pub fn normalize(mut str: String, opts: &IndexingOptions) -> String{
        if !opts.case_sensitive {
            str = str.to_lowercase();
        }
        
        if opts.ignore_spaces {
            str = str.replace(" ", "");
        }

        str
    }

    pub fn process_line(&mut self, content: &Vec<&str>, n: LineNumber, opts: &IndexingOptions) {    
        // TODO: we could optimize by not calling normalize at all if not required
        let base = Index::normalize(String::from(content[n]), opts);

        for i in (n + 1)..=(n + opts.scope).min(content.len() - 1) {
            let normalized_content = Index::normalize(String::from(content[i]), opts);
            let score = self.calculator.get_score(&base, &normalized_content);
            let m = self.index.entry(n).or_insert(LineScores::new());
            m.insert(i, score);
        }
    }

    pub fn get_index_content(&self) -> &IndexContent {
        &self.index
    }

    /// Return the number of matches for a given live
    pub fn get_match_count(&self, n: &LineNumber, opts: &PresentationOptions) -> usize {
        match self.get_matches(n, opts) {
            None => 0,
            Some(s) => s.len()
        }
    }

    pub fn get_matches (&self, n: &LineNumber, opts: &PresentationOptions) -> Option<Vec<LineNumber>> {
        let mut res: Vec<LineNumber> = Vec::new();
        match self.index.get(n) {
            None => None,
            Some(line_scores) => {
                line_scores.iter()
                    .filter(|item| item.1 <= &opts.threshold)
                    .for_each( |x| res.push( (*x.0).clone()) );
                Some(res)
            },
        }
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    // use test::Bencher;
    
    #[test]
    fn test_full() {
        let data = String::from(vec!["Line1", "Line2", "LIne3", "LINe4", "MEGA JUNK"].join("\n"));
        let content: Vec<&str> = data.split("\n").collect();

        let mut index = Index::new(ScoringAlgo::Levenshtein);
        index.create_index(&content, &IndexingOptions::default());

        println!("{:#?}", index.get_index_content());
    }

    #[test]
    fn test_matches() {
        let data = String::from(vec!["Line1", "Line2", "LIne3", "LINe4", "MEGA JUNK"].join("\n"));
        let content: Vec<&str> = data.split("\n").collect();

        let mut index = Index::new(ScoringAlgo::Levenshtein);
        let index_options = IndexingOptions {
            scope: 10,
            ..IndexingOptions::default()
        };
        index.create_index(&content, &index_options);
        
        let mut matches = index.get_matches(&0, &PresentationOptions::default()).unwrap();
        matches.sort();

        assert_eq!(matches, vec![1,2,3]);
        assert_eq!(index.get_match_count(&0, &PresentationOptions::default()), 3);
    }
}