1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
use std::{borrow::Cow, fmt::Display, hash::Hash};

#[derive(serde::Serialize, serde::Deserialize, Debug, Clone, Eq, PartialOrd, Ord)]
#[serde(untagged)]
pub enum NGram {
	Unigram(String),
	Bigram(String, String),
}

impl PartialEq for NGram {
	fn eq(&self, other: &Self) -> bool {
		match (self, other) {
			(NGram::Unigram(self_token), NGram::Unigram(other_token)) => self_token == other_token,
			(
				NGram::Bigram(self_token_a, self_token_b),
				NGram::Bigram(other_token_a, other_token_b),
			) => self_token_a == other_token_a && self_token_b == other_token_b,
			_ => false,
		}
	}
}

impl Hash for NGram {
	fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
		match self {
			NGram::Unigram(token) => {
				0usize.hash(state);
				token.hash(state)
			}
			NGram::Bigram(token_a, token_b) => {
				1usize.hash(state);
				token_a.hash(state);
				token_b.hash(state);
			}
		}
	}
}

impl Display for NGram {
	fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
		match self {
			NGram::Unigram(token) => {
				write!(f, "{}", token)
			}
			NGram::Bigram(token_a, token_b) => {
				write!(f, "{} {}", token_a, token_b)
			}
		}
	}
}

#[derive(serde::Serialize, serde::Deserialize, Debug, Clone, Eq, PartialOrd, Ord)]
pub enum NGramRef<'a> {
	Unigram(Cow<'a, str>),
	Bigram(Cow<'a, str>, Cow<'a, str>),
}

impl<'a> PartialEq for NGramRef<'a> {
	fn eq(&self, other: &Self) -> bool {
		match (self, other) {
			(NGramRef::Unigram(self_token), NGramRef::Unigram(other_token)) => {
				self_token == other_token
			}
			(
				NGramRef::Bigram(self_token_a, self_token_b),
				NGramRef::Bigram(other_token_a, other_token_b),
			) => self_token_a == other_token_a && self_token_b == other_token_b,
			_ => false,
		}
	}
}

impl<'a> Hash for NGramRef<'a> {
	fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
		match self {
			NGramRef::Unigram(token) => {
				0usize.hash(state);
				token.hash(state)
			}
			NGramRef::Bigram(token_a, token_b) => {
				1usize.hash(state);
				token_a.hash(state);
				token_b.hash(state);
			}
		}
	}
}

impl<'a> indexmap::Equivalent<NGram> for NGramRef<'a> {
	fn equivalent(&self, key: &NGram) -> bool {
		match (self, key) {
			(NGramRef::Unigram(unigram_ref), NGram::Unigram(unigram)) => unigram_ref == unigram,
			(NGramRef::Bigram(bigram_a_ref, bigram_b_ref), NGram::Bigram(bigram_a, bigram_b)) => {
				bigram_a_ref == bigram_a && bigram_b_ref == bigram_b
			}
			_ => false,
		}
	}
}

impl<'a> NGramRef<'a> {
	pub fn to_ngram(&self) -> NGram {
		match self {
			NGramRef::Unigram(token) => NGram::Unigram(token.as_ref().to_owned()),
			NGramRef::Bigram(token_a, token_b) => {
				NGram::Bigram(token_a.as_ref().to_owned(), token_b.as_ref().to_owned())
			}
		}
	}
}

#[derive(Clone, Debug, PartialEq, Eq, Hash, serde::Serialize, serde::Deserialize)]
pub enum NGramType {
	Unigram,
	Bigram,
}