1use std::{borrow::Cow, fmt::Display, hash::Hash};
2
3#[derive(serde::Serialize, serde::Deserialize, Debug, Clone, Eq, PartialOrd, Ord)]
4#[serde(untagged)]
5pub enum NGram {
6 Unigram(String),
7 Bigram(String, String),
8}
9
10impl PartialEq for NGram {
11 fn eq(&self, other: &Self) -> bool {
12 match (self, other) {
13 (NGram::Unigram(self_token), NGram::Unigram(other_token)) => self_token == other_token,
14 (
15 NGram::Bigram(self_token_a, self_token_b),
16 NGram::Bigram(other_token_a, other_token_b),
17 ) => self_token_a == other_token_a && self_token_b == other_token_b,
18 _ => false,
19 }
20 }
21}
22
23impl Hash for NGram {
24 fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
25 match self {
26 NGram::Unigram(token) => {
27 0usize.hash(state);
28 token.hash(state)
29 }
30 NGram::Bigram(token_a, token_b) => {
31 1usize.hash(state);
32 token_a.hash(state);
33 token_b.hash(state);
34 }
35 }
36 }
37}
38
39impl Display for NGram {
40 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
41 match self {
42 NGram::Unigram(token) => {
43 write!(f, "{}", token)
44 }
45 NGram::Bigram(token_a, token_b) => {
46 write!(f, "{} {}", token_a, token_b)
47 }
48 }
49 }
50}
51
52#[derive(serde::Serialize, serde::Deserialize, Debug, Clone, Eq, PartialOrd, Ord)]
53pub enum NGramRef<'a> {
54 Unigram(Cow<'a, str>),
55 Bigram(Cow<'a, str>, Cow<'a, str>),
56}
57
58impl<'a> PartialEq for NGramRef<'a> {
59 fn eq(&self, other: &Self) -> bool {
60 match (self, other) {
61 (NGramRef::Unigram(self_token), NGramRef::Unigram(other_token)) => {
62 self_token == other_token
63 }
64 (
65 NGramRef::Bigram(self_token_a, self_token_b),
66 NGramRef::Bigram(other_token_a, other_token_b),
67 ) => self_token_a == other_token_a && self_token_b == other_token_b,
68 _ => false,
69 }
70 }
71}
72
73impl<'a> Hash for NGramRef<'a> {
74 fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
75 match self {
76 NGramRef::Unigram(token) => {
77 0usize.hash(state);
78 token.hash(state)
79 }
80 NGramRef::Bigram(token_a, token_b) => {
81 1usize.hash(state);
82 token_a.hash(state);
83 token_b.hash(state);
84 }
85 }
86 }
87}
88
89impl<'a> indexmap::Equivalent<NGram> for NGramRef<'a> {
90 fn equivalent(&self, key: &NGram) -> bool {
91 match (self, key) {
92 (NGramRef::Unigram(unigram_ref), NGram::Unigram(unigram)) => unigram_ref == unigram,
93 (NGramRef::Bigram(bigram_a_ref, bigram_b_ref), NGram::Bigram(bigram_a, bigram_b)) => {
94 bigram_a_ref == bigram_a && bigram_b_ref == bigram_b
95 }
96 _ => false,
97 }
98 }
99}
100
101impl<'a> NGramRef<'a> {
102 pub fn to_ngram(&self) -> NGram {
103 match self {
104 NGramRef::Unigram(token) => NGram::Unigram(token.as_ref().to_owned()),
105 NGramRef::Bigram(token_a, token_b) => {
106 NGram::Bigram(token_a.as_ref().to_owned(), token_b.as_ref().to_owned())
107 }
108 }
109 }
110}
111
112#[derive(Clone, Debug, PartialEq, Eq, Hash, serde::Serialize, serde::Deserialize)]
113pub enum NGramType {
114 Unigram,
115 Bigram,
116}