use std::collections;
use regex::Regex;
use sequence_trie::SequenceTrie;
use super::alphabet::Alphabet;
use super::raw_span::RawSpan;
use super::regexes::{NamedPattern, EXCLUDE_PATTERNS, PATTERNS};
use super::span::Span;
pub struct Model<'a> {
pub lines: &'a [&'a str],
pub reverse: bool,
pub spans: Vec<Span<'a>>,
pub lookup_trie: SequenceTrie<char, usize>,
}
impl<'a> Model<'a> {
pub fn new(
lines: &'a [&'a str],
alphabet: &'a Alphabet,
use_all_patterns: bool,
named_patterns: &'a [NamedPattern],
custom_patterns: &'a [String],
reverse: bool,
unique_hint: bool,
) -> Model<'a> {
let mut raw_spans =
find_raw_spans(lines, named_patterns, custom_patterns, use_all_patterns);
if reverse {
raw_spans.reverse();
}
let mut spans = associate_hints(&raw_spans, alphabet, unique_hint);
if reverse {
spans.reverse();
}
let lookup_trie = build_lookup_trie(&spans);
Model {
lines,
reverse,
spans,
lookup_trie,
}
}
}
fn find_raw_spans<'a>(
lines: &'a [&'a str],
named_patterns: &'a [NamedPattern],
custom_patterns: &'a [String],
use_all_patterns: bool,
) -> Vec<RawSpan<'a>> {
let exclude_regexes = EXCLUDE_PATTERNS
.iter()
.map(|&(name, pattern)| (name, Regex::new(pattern).unwrap()))
.collect::<Vec<_>>();
let custom_regexes = custom_patterns
.iter()
.map(|pattern| {
(
"custom",
Regex::new(pattern).expect("Invalid custom regexp"),
)
})
.collect::<Vec<_>>();
let regexes = if use_all_patterns {
PATTERNS
.iter()
.map(|&(name, pattern)| (name, Regex::new(pattern).unwrap()))
.collect::<Vec<(&str, regex::Regex)>>()
} else {
named_patterns
.iter()
.map(|NamedPattern(name, pattern)| (name.as_str(), Regex::new(pattern).unwrap()))
.collect::<Vec<(&str, regex::Regex)>>()
};
let all_regexes = [exclude_regexes, custom_regexes, regexes].concat();
let mut raw_spans = Vec::new();
for (index, line) in lines.iter().enumerate() {
let mut chunk: &str = line;
let mut offset: i32 = 0;
loop {
let chunk_matches = all_regexes
.iter()
.filter_map(|(pat_name, reg)| {
reg.find_iter(chunk)
.next()
.map(|reg_match| (pat_name, reg, reg_match))
})
.collect::<Vec<_>>();
if chunk_matches.is_empty() {
break;
}
let (pat_name, reg, reg_match) = chunk_matches
.iter()
.min_by_key(|element| element.2.start())
.unwrap();
if **pat_name != "ansi_colors" {
let text = reg_match.as_str();
let capture = reg
.captures_iter(text)
.next()
.expect("This regex is guaranteed to match.")
.get(1)
.expect("This regex should have a capture group.");
let (subtext, substart) = (capture.as_str(), capture.start());
raw_spans.push(RawSpan {
x: offset + reg_match.start() as i32 + substart as i32,
y: index as i32,
pattern: pat_name,
text: subtext,
});
}
chunk = chunk
.get(reg_match.end()..)
.expect("The chunk must be larger than the regex match.");
offset += reg_match.end() as i32;
}
}
raw_spans
}
fn associate_hints<'a>(
raw_spans: &[RawSpan<'a>],
alphabet: &'a Alphabet,
unique: bool,
) -> Vec<Span<'a>> {
let hints = alphabet.make_hints(raw_spans.len());
let mut hints_iter = hints.iter();
let mut result: Vec<Span<'a>> = vec![];
if unique {
let mut known: collections::HashMap<&str, &str> = collections::HashMap::new();
for raw_span in raw_spans {
let hint: &str = known.entry(raw_span.text).or_insert_with(|| {
hints_iter
.next()
.expect("We should have as many hints as necessary, even invisible ones.")
});
result.push(Span {
x: raw_span.x,
y: raw_span.y,
pattern: raw_span.pattern,
text: raw_span.text,
hint: hint.to_string(),
});
}
} else {
for raw_span in raw_spans {
let hint = hints_iter
.next()
.expect("We should have as many hints as necessary, even invisible ones.");
result.push(Span {
x: raw_span.x,
y: raw_span.y,
pattern: raw_span.pattern,
text: raw_span.text,
hint: hint.to_string(),
});
}
}
result
}
fn build_lookup_trie<'a>(spans: &'a [Span<'a>]) -> SequenceTrie<char, usize> {
let mut trie = SequenceTrie::new();
for (index, span) in spans.iter().enumerate() {
let hint_chars = span.hint.chars().collect::<Vec<char>>();
if trie.get(&hint_chars).is_none() {
trie.insert_owned(hint_chars, index);
}
}
trie
}