use std::borrow::Borrow;
pub trait Document {
type Term;
}
pub trait NaiveDocument: Document {
fn term_exists<K>(&self, term: K) -> bool
where
K: Borrow<Self::Term>;
}
pub trait ProcessedDocument: Document {
fn term_frequency<K>(&self, term: K) -> usize
where
K: Borrow<Self::Term>;
fn max(&self) -> Option<&Self::Term>;
}
pub trait ExpandableDocument<'a>: Document
where
<Self as Document>::Term: 'a,
{
type TermIterator: Iterator<Item = &'a Self::Term>;
fn terms(&self) -> Self::TermIterator;
}
impl<D, T> NaiveDocument for D
where
D: ProcessedDocument<Term = T>,
{
#[inline]
fn term_exists<K>(&self, term: K) -> bool
where
K: Borrow<T>,
{
self.term_frequency(term) > 0
}
}
pub trait Tf<T>
where
T: NaiveDocument,
{
fn tf<K>(term: K, doc: &T) -> f64
where
K: Borrow<T::Term>;
}
pub trait Idf<T>
where
T: NaiveDocument,
{
fn idf<'a, I, K>(term: K, docs: I) -> f64
where
I: Iterator<Item = &'a T>,
K: Borrow<T::Term>,
T: 'a;
}
pub trait NormalizationFactor {
fn factor() -> f64;
}
pub trait SmoothingFactor {
fn factor() -> f64;
}
pub trait TfIdf<T>
where
T: NaiveDocument,
{
type Tf: Tf<T>;
type Idf: Idf<T>;
fn tfidf<'a, K, I>(term: K, doc: &T, docs: I) -> f64
where
I: Iterator<Item = &'a T>,
K: Borrow<T::Term>,
T: 'a,
{
Self::Tf::tf(term.borrow(), doc) * Self::Idf::idf(term.borrow(), docs)
}
}