yass/wrapped_strsim/
seq_strsim.rs

1use std::fmt::Display;
2
3use super::super::{ExpectTokenizerType, StrSim, StrSimWithTokenizer, Tokenizer};
4use crate::{
5    error::StrSimError, helper::ByValue, BaseTokenizer, MutTokenizer, StrSimWithMutTokenizer,
6};
7use anyhow::Result;
8
9pub struct SeqStrSim<
10    't,
11    T,
12    SS: StrSim<T> + Display + ExpectTokenizerType,
13    TK: BaseTokenizer<T, Return = ByValue> + Display,
14> {
15    pub tokenizer: &'t mut TK,
16    pub strsim: SS,
17    phantom: std::marker::PhantomData<T>,
18}
19
20impl<
21        't,
22        T,
23        SS: StrSim<T> + Display + ExpectTokenizerType,
24        TK: BaseTokenizer<T, Return = ByValue> + Display,
25    > SeqStrSim<'t, T, SS, TK>
26{
27    pub fn new(tokenizer: &'t mut TK, strsim: SS) -> Result<Self, StrSimError> {
28        let expect_tok_type = strsim.get_expected_tokenizer_type();
29        if !tokenizer.is_compatible(&expect_tok_type) || !expect_tok_type.is_outer_seq() {
30            Err(StrSimError::InvalidConfigData(format!(
31                "StrSim {} expect {:?} tokenizer, but get {} which is not compatible",
32                strsim, expect_tok_type, tokenizer,
33            )))
34        } else {
35            Ok(Self {
36                tokenizer,
37                strsim,
38                phantom: std::marker::PhantomData,
39            })
40        }
41    }
42}
43
44impl<
45        't,
46        T,
47        SS: StrSim<T> + Display + ExpectTokenizerType,
48        TK: Tokenizer<T, Return = ByValue> + Display,
49    > StrSimWithTokenizer<T> for SeqStrSim<'t, T, SS, TK>
50{
51    fn similarity(&self, key: &str, query: &str) -> Result<f64, StrSimError> {
52        let (s1, s2) = self.tokenizer.tokenize_pair(key, query);
53        self.strsim.similarity_pre_tok2(&s1, &s2)
54    }
55
56    fn similarity_pre_tok1(&self, key: &str, tokenized_query: &T) -> Result<f64, StrSimError> {
57        let s1 = self.tokenizer.tokenize(key);
58        self.strsim.similarity_pre_tok2(&s1, tokenized_query)
59    }
60
61    fn tokenize(&self, str: &str) -> T {
62        self.tokenizer.tokenize(str)
63    }
64
65    fn tokenize_list(&self, strs: &[&str]) -> Vec<T> {
66        strs.iter()
67            .map(|s| self.tokenizer.tokenize(s))
68            .collect::<Vec<T>>()
69    }
70}
71
72impl<
73        't,
74        T,
75        SS: StrSim<T> + Display + ExpectTokenizerType,
76        TK: MutTokenizer<T, Return = ByValue> + Display,
77    > StrSimWithMutTokenizer<T> for SeqStrSim<'t, T, SS, TK>
78{
79    fn similarity(&mut self, key: &str, query: &str) -> Result<f64, StrSimError> {
80        let (s1, s2) = self.tokenizer.tokenize_pair(key, query);
81        self.strsim.similarity_pre_tok2(&s1, &s2)
82    }
83
84    fn similarity_pre_tok1(&mut self, key: &str, tokenized_query: &T) -> Result<f64, StrSimError> {
85        let s1 = self.tokenizer.tokenize(key);
86        self.strsim.similarity_pre_tok2(&s1, tokenized_query)
87    }
88
89    fn tokenize(&mut self, str: &str) -> T {
90        self.tokenizer.tokenize(str)
91    }
92
93    fn tokenize_list(&mut self, strs: &[&str]) -> Vec<T> {
94        strs.iter()
95            .map(|s| self.tokenizer.tokenize(s))
96            .collect::<Vec<T>>()
97    }
98}
99
100impl<
101        't,
102        T,
103        SS: StrSim<T> + Display + ExpectTokenizerType,
104        TK: BaseTokenizer<T, Return = ByValue> + Display,
105    > StrSim<T> for SeqStrSim<'t, T, SS, TK>
106{
107    fn similarity_pre_tok2(
108        &self,
109        tokenized_key: &T,
110        tokenized_query: &T,
111    ) -> Result<f64, StrSimError> {
112        self.strsim
113            .similarity_pre_tok2(tokenized_key, tokenized_query)
114    }
115}