tangram_text/
ngram.rs

1use std::{borrow::Cow, fmt::Display, hash::Hash};
2
3#[derive(serde::Serialize, serde::Deserialize, Debug, Clone, Eq, PartialOrd, Ord)]
4#[serde(untagged)]
5pub enum NGram {
6	Unigram(String),
7	Bigram(String, String),
8}
9
10impl PartialEq for NGram {
11	fn eq(&self, other: &Self) -> bool {
12		match (self, other) {
13			(NGram::Unigram(self_token), NGram::Unigram(other_token)) => self_token == other_token,
14			(
15				NGram::Bigram(self_token_a, self_token_b),
16				NGram::Bigram(other_token_a, other_token_b),
17			) => self_token_a == other_token_a && self_token_b == other_token_b,
18			_ => false,
19		}
20	}
21}
22
23impl Hash for NGram {
24	fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
25		match self {
26			NGram::Unigram(token) => {
27				0usize.hash(state);
28				token.hash(state)
29			}
30			NGram::Bigram(token_a, token_b) => {
31				1usize.hash(state);
32				token_a.hash(state);
33				token_b.hash(state);
34			}
35		}
36	}
37}
38
39impl Display for NGram {
40	fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
41		match self {
42			NGram::Unigram(token) => {
43				write!(f, "{}", token)
44			}
45			NGram::Bigram(token_a, token_b) => {
46				write!(f, "{} {}", token_a, token_b)
47			}
48		}
49	}
50}
51
52#[derive(serde::Serialize, serde::Deserialize, Debug, Clone, Eq, PartialOrd, Ord)]
53pub enum NGramRef<'a> {
54	Unigram(Cow<'a, str>),
55	Bigram(Cow<'a, str>, Cow<'a, str>),
56}
57
58impl<'a> PartialEq for NGramRef<'a> {
59	fn eq(&self, other: &Self) -> bool {
60		match (self, other) {
61			(NGramRef::Unigram(self_token), NGramRef::Unigram(other_token)) => {
62				self_token == other_token
63			}
64			(
65				NGramRef::Bigram(self_token_a, self_token_b),
66				NGramRef::Bigram(other_token_a, other_token_b),
67			) => self_token_a == other_token_a && self_token_b == other_token_b,
68			_ => false,
69		}
70	}
71}
72
73impl<'a> Hash for NGramRef<'a> {
74	fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
75		match self {
76			NGramRef::Unigram(token) => {
77				0usize.hash(state);
78				token.hash(state)
79			}
80			NGramRef::Bigram(token_a, token_b) => {
81				1usize.hash(state);
82				token_a.hash(state);
83				token_b.hash(state);
84			}
85		}
86	}
87}
88
89impl<'a> indexmap::Equivalent<NGram> for NGramRef<'a> {
90	fn equivalent(&self, key: &NGram) -> bool {
91		match (self, key) {
92			(NGramRef::Unigram(unigram_ref), NGram::Unigram(unigram)) => unigram_ref == unigram,
93			(NGramRef::Bigram(bigram_a_ref, bigram_b_ref), NGram::Bigram(bigram_a, bigram_b)) => {
94				bigram_a_ref == bigram_a && bigram_b_ref == bigram_b
95			}
96			_ => false,
97		}
98	}
99}
100
101impl<'a> NGramRef<'a> {
102	pub fn to_ngram(&self) -> NGram {
103		match self {
104			NGramRef::Unigram(token) => NGram::Unigram(token.as_ref().to_owned()),
105			NGramRef::Bigram(token_a, token_b) => {
106				NGram::Bigram(token_a.as_ref().to_owned(), token_b.as_ref().to_owned())
107			}
108		}
109	}
110}
111
112#[derive(Clone, Debug, PartialEq, Eq, Hash, serde::Serialize, serde::Deserialize)]
113pub enum NGramType {
114	Unigram,
115	Bigram,
116}