tangram_text 0.7.0

Tangram makes it easy for programmers to train, deploy, and monitor machine learning models.
Documentation
use std::{borrow::Cow, fmt::Display, hash::Hash};

#[derive(serde::Serialize, serde::Deserialize, Debug, Clone, Eq, PartialOrd, Ord)]
#[serde(untagged)]
pub enum NGram {
	Unigram(String),
	Bigram(String, String),
}

impl PartialEq for NGram {
	fn eq(&self, other: &Self) -> bool {
		match (self, other) {
			(NGram::Unigram(self_token), NGram::Unigram(other_token)) => self_token == other_token,
			(
				NGram::Bigram(self_token_a, self_token_b),
				NGram::Bigram(other_token_a, other_token_b),
			) => self_token_a == other_token_a && self_token_b == other_token_b,
			_ => false,
		}
	}
}

impl Hash for NGram {
	fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
		match self {
			NGram::Unigram(token) => {
				0usize.hash(state);
				token.hash(state)
			}
			NGram::Bigram(token_a, token_b) => {
				1usize.hash(state);
				token_a.hash(state);
				token_b.hash(state);
			}
		}
	}
}

impl Display for NGram {
	fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
		match self {
			NGram::Unigram(token) => {
				write!(f, "{}", token)
			}
			NGram::Bigram(token_a, token_b) => {
				write!(f, "{} {}", token_a, token_b)
			}
		}
	}
}

#[derive(serde::Serialize, serde::Deserialize, Debug, Clone, Eq, PartialOrd, Ord)]
pub enum NGramRef<'a> {
	Unigram(Cow<'a, str>),
	Bigram(Cow<'a, str>, Cow<'a, str>),
}

impl<'a> PartialEq for NGramRef<'a> {
	fn eq(&self, other: &Self) -> bool {
		match (self, other) {
			(NGramRef::Unigram(self_token), NGramRef::Unigram(other_token)) => {
				self_token == other_token
			}
			(
				NGramRef::Bigram(self_token_a, self_token_b),
				NGramRef::Bigram(other_token_a, other_token_b),
			) => self_token_a == other_token_a && self_token_b == other_token_b,
			_ => false,
		}
	}
}

impl<'a> Hash for NGramRef<'a> {
	fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
		match self {
			NGramRef::Unigram(token) => {
				0usize.hash(state);
				token.hash(state)
			}
			NGramRef::Bigram(token_a, token_b) => {
				1usize.hash(state);
				token_a.hash(state);
				token_b.hash(state);
			}
		}
	}
}

impl<'a> indexmap::Equivalent<NGram> for NGramRef<'a> {
	fn equivalent(&self, key: &NGram) -> bool {
		match (self, key) {
			(NGramRef::Unigram(unigram_ref), NGram::Unigram(unigram)) => unigram_ref == unigram,
			(NGramRef::Bigram(bigram_a_ref, bigram_b_ref), NGram::Bigram(bigram_a, bigram_b)) => {
				bigram_a_ref == bigram_a && bigram_b_ref == bigram_b
			}
			_ => false,
		}
	}
}

impl<'a> NGramRef<'a> {
	pub fn to_ngram(&self) -> NGram {
		match self {
			NGramRef::Unigram(token) => NGram::Unigram(token.as_ref().to_owned()),
			NGramRef::Bigram(token_a, token_b) => {
				NGram::Bigram(token_a.as_ref().to_owned(), token_b.as_ref().to_owned())
			}
		}
	}
}

#[derive(Clone, Debug, PartialEq, Eq, Hash, serde::Serialize, serde::Deserialize)]
pub enum NGramType {
	Unigram,
	Bigram,
}