use super::{CharUnit, Dictionary, DictionaryNode};
#[derive(Debug, Clone)]
pub struct SubstringMatch<N>
where
N: DictionaryNode,
{
pub node: N,
pub term: String,
pub position: usize,
pub length: usize,
}
impl<N: DictionaryNode> SubstringMatch<N> {
pub fn new(node: N, term: String, position: usize, length: usize) -> Self {
SubstringMatch {
node,
term,
position,
length,
}
}
pub fn matched_substring(&self) -> &str {
let start_byte = self
.term
.char_indices()
.nth(self.position)
.map(|(i, _)| i)
.unwrap_or(self.term.len());
let end_byte = self
.term
.char_indices()
.nth(self.position + self.length)
.map(|(i, _)| i)
.unwrap_or(self.term.len());
&self.term[start_byte..end_byte]
}
pub fn prefix(&self) -> &str {
let start_byte = self
.term
.char_indices()
.nth(self.position)
.map(|(i, _)| i)
.unwrap_or(0);
&self.term[..start_byte]
}
pub fn suffix(&self) -> &str {
let end_byte = self
.term
.char_indices()
.nth(self.position + self.length)
.map(|(i, _)| i)
.unwrap_or(self.term.len());
&self.term[end_byte..]
}
#[inline]
pub fn left_context_len(&self) -> usize {
self.position
}
#[inline]
pub fn right_context_len(&self) -> usize {
self.term
.chars()
.count()
.saturating_sub(self.position + self.length)
}
}
pub trait SubstringDictionary: Dictionary {
fn find_exact_substring(&self, pattern: &str) -> Vec<SubstringMatch<Self::Node>>;
fn find_exact_substring_limited(
&self,
pattern: &str,
limit: usize,
) -> Vec<SubstringMatch<Self::Node>> {
let mut results = self.find_exact_substring(pattern);
results.truncate(limit);
results
}
fn contains_substring(&self, pattern: &str) -> bool {
!self.find_exact_substring_limited(pattern, 1).is_empty()
}
fn count_substring_matches(&self, pattern: &str) -> usize {
self.find_exact_substring(pattern).len()
}
}
pub trait BidirectionalDictionaryNode: DictionaryNode {
fn parent(&self) -> Option<Self>;
fn parent_label(&self) -> Option<Self::Unit>;
fn reverse_edges(&self) -> Box<dyn Iterator<Item = (Self::Unit, Self)> + '_>;
fn reverse_transition(&self, label: Self::Unit) -> Vec<Self>;
fn depth(&self) -> usize;
fn is_root(&self) -> bool {
self.parent().is_none()
}
fn path_from_root(&self) -> Vec<Self::Unit> {
let mut labels = Vec::new();
let mut current = self.clone();
while let Some(label) = current.parent_label() {
labels.push(label);
if let Some(parent) = current.parent() {
current = parent;
} else {
break;
}
}
labels.reverse();
labels
}
fn path_string(&self) -> String {
let units: Vec<Self::Unit> = self.path_from_root();
Self::Unit::to_string(&units)
}
}
#[derive(Debug, Clone)]
pub struct ExtensionResult {
pub term: String,
pub distance: usize,
pub query_start: usize,
pub query_end: usize,
}
impl ExtensionResult {
pub fn new(term: String, distance: usize, query_start: usize, query_end: usize) -> Self {
ExtensionResult {
term,
distance,
query_start,
query_end,
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[derive(Clone)]
struct MockNode;
impl DictionaryNode for MockNode {
type Unit = char;
fn is_final(&self) -> bool {
false
}
fn transition(&self, _label: char) -> Option<Self> {
None
}
fn edges(&self) -> Box<dyn Iterator<Item = (char, Self)> + '_> {
Box::new(std::iter::empty())
}
}
unsafe impl Send for MockNode {}
unsafe impl Sync for MockNode {}
#[test]
fn test_substring_match_creation() {
let node = MockNode;
let m = SubstringMatch::new(node, "cathedral".to_string(), 2, 5);
assert_eq!(m.term, "cathedral");
assert_eq!(m.position, 2);
assert_eq!(m.length, 5);
}
#[test]
fn test_substring_match_matched_substring() {
let node = MockNode;
let m = SubstringMatch::new(node, "cathedral".to_string(), 2, 5);
assert_eq!(m.matched_substring(), "thedr");
}
#[test]
fn test_substring_match_prefix() {
let node = MockNode;
let m = SubstringMatch::new(node, "cathedral".to_string(), 2, 5);
assert_eq!(m.prefix(), "ca");
}
#[test]
fn test_substring_match_suffix() {
let node = MockNode;
let m = SubstringMatch::new(node, "cathedral".to_string(), 2, 5);
assert_eq!(m.suffix(), "al");
}
#[test]
fn test_substring_match_context_lengths() {
let node = MockNode;
let m = SubstringMatch::new(node, "cathedral".to_string(), 2, 5);
assert_eq!(m.left_context_len(), 2);
assert_eq!(m.right_context_len(), 2);
}
#[test]
fn test_substring_match_unicode() {
let node = MockNode;
let m = SubstringMatch::new(node, "café".to_string(), 1, 2);
assert_eq!(m.matched_substring(), "af");
assert_eq!(m.prefix(), "c");
assert_eq!(m.suffix(), "é");
}
#[test]
fn test_substring_match_full_term() {
let node = MockNode;
let m = SubstringMatch::new(node, "hello".to_string(), 0, 5);
assert_eq!(m.matched_substring(), "hello");
assert_eq!(m.prefix(), "");
assert_eq!(m.suffix(), "");
assert_eq!(m.left_context_len(), 0);
assert_eq!(m.right_context_len(), 0);
}
#[test]
fn test_extension_result_creation() {
let result = ExtensionResult::new("cathedral".to_string(), 2, 0, 9);
assert_eq!(result.term, "cathedral");
assert_eq!(result.distance, 2);
assert_eq!(result.query_start, 0);
assert_eq!(result.query_end, 9);
}
}