1use std::fmt::Display;
2
3use super::super::{ExpectTokenizerType, StrSim, StrSimWithTokenizer, Tokenizer};
4use crate::{
5 error::StrSimError, helper::ByValue, BaseTokenizer, MutTokenizer, StrSimWithMutTokenizer,
6};
7use anyhow::Result;
8
9pub struct SeqStrSim<
10 't,
11 T,
12 SS: StrSim<T> + Display + ExpectTokenizerType,
13 TK: BaseTokenizer<T, Return = ByValue> + Display,
14> {
15 pub tokenizer: &'t mut TK,
16 pub strsim: SS,
17 phantom: std::marker::PhantomData<T>,
18}
19
20impl<
21 't,
22 T,
23 SS: StrSim<T> + Display + ExpectTokenizerType,
24 TK: BaseTokenizer<T, Return = ByValue> + Display,
25 > SeqStrSim<'t, T, SS, TK>
26{
27 pub fn new(tokenizer: &'t mut TK, strsim: SS) -> Result<Self, StrSimError> {
28 let expect_tok_type = strsim.get_expected_tokenizer_type();
29 if !tokenizer.is_compatible(&expect_tok_type) || !expect_tok_type.is_outer_seq() {
30 Err(StrSimError::InvalidConfigData(format!(
31 "StrSim {} expect {:?} tokenizer, but get {} which is not compatible",
32 strsim, expect_tok_type, tokenizer,
33 )))
34 } else {
35 Ok(Self {
36 tokenizer,
37 strsim,
38 phantom: std::marker::PhantomData,
39 })
40 }
41 }
42}
43
44impl<
45 't,
46 T,
47 SS: StrSim<T> + Display + ExpectTokenizerType,
48 TK: Tokenizer<T, Return = ByValue> + Display,
49 > StrSimWithTokenizer<T> for SeqStrSim<'t, T, SS, TK>
50{
51 fn similarity(&self, key: &str, query: &str) -> Result<f64, StrSimError> {
52 let (s1, s2) = self.tokenizer.tokenize_pair(key, query);
53 self.strsim.similarity_pre_tok2(&s1, &s2)
54 }
55
56 fn similarity_pre_tok1(&self, key: &str, tokenized_query: &T) -> Result<f64, StrSimError> {
57 let s1 = self.tokenizer.tokenize(key);
58 self.strsim.similarity_pre_tok2(&s1, tokenized_query)
59 }
60
61 fn tokenize(&self, str: &str) -> T {
62 self.tokenizer.tokenize(str)
63 }
64
65 fn tokenize_list(&self, strs: &[&str]) -> Vec<T> {
66 strs.iter()
67 .map(|s| self.tokenizer.tokenize(s))
68 .collect::<Vec<T>>()
69 }
70}
71
72impl<
73 't,
74 T,
75 SS: StrSim<T> + Display + ExpectTokenizerType,
76 TK: MutTokenizer<T, Return = ByValue> + Display,
77 > StrSimWithMutTokenizer<T> for SeqStrSim<'t, T, SS, TK>
78{
79 fn similarity(&mut self, key: &str, query: &str) -> Result<f64, StrSimError> {
80 let (s1, s2) = self.tokenizer.tokenize_pair(key, query);
81 self.strsim.similarity_pre_tok2(&s1, &s2)
82 }
83
84 fn similarity_pre_tok1(&mut self, key: &str, tokenized_query: &T) -> Result<f64, StrSimError> {
85 let s1 = self.tokenizer.tokenize(key);
86 self.strsim.similarity_pre_tok2(&s1, tokenized_query)
87 }
88
89 fn tokenize(&mut self, str: &str) -> T {
90 self.tokenizer.tokenize(str)
91 }
92
93 fn tokenize_list(&mut self, strs: &[&str]) -> Vec<T> {
94 strs.iter()
95 .map(|s| self.tokenizer.tokenize(s))
96 .collect::<Vec<T>>()
97 }
98}
99
100impl<
101 't,
102 T,
103 SS: StrSim<T> + Display + ExpectTokenizerType,
104 TK: BaseTokenizer<T, Return = ByValue> + Display,
105 > StrSim<T> for SeqStrSim<'t, T, SS, TK>
106{
107 fn similarity_pre_tok2(
108 &self,
109 tokenized_key: &T,
110 tokenized_query: &T,
111 ) -> Result<f64, StrSimError> {
112 self.strsim
113 .similarity_pre_tok2(tokenized_key, tokenized_query)
114 }
115}