1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
use std::borrow::Borrow;
use prelude::{NaiveDocument, NormalizationFactor, ProcessedDocument, Tf};
#[derive(Copy, Clone)]
pub struct BinaryTf;
impl<T> Tf<T> for BinaryTf
where
T: NaiveDocument,
{
#[inline]
fn tf<K>(term: K, doc: &T) -> f64
where
K: Borrow<T::Term>,
{
if doc.term_exists(term) {
1f64
} else {
0f64
}
}
}
#[derive(Copy, Clone)]
pub struct RawFrequencyTf(f64);
impl<T> Tf<T> for RawFrequencyTf
where
T: ProcessedDocument,
{
#[inline]
fn tf<K>(term: K, doc: &T) -> f64
where
K: Borrow<T::Term>,
{
doc.term_frequency(term) as f64
}
}
#[derive(Copy, Clone)]
pub struct LogNormalizationTf;
impl<T> Tf<T> for LogNormalizationTf
where
T: ProcessedDocument,
{
#[inline]
fn tf<K>(term: K, doc: &T) -> f64
where
K: Borrow<T::Term>,
{
1f64 + (doc.term_frequency(term) as f64).ln()
}
}
pub trait DoubleKNormalizationTf: NormalizationFactor {}
impl<T, S> Tf<T> for S
where
S: DoubleKNormalizationTf,
T: ProcessedDocument,
{
#[inline]
fn tf<K>(term: K, doc: &T) -> f64
where
K: Borrow<T::Term>,
{
let max = match doc.max() {
Some(m) => doc.term_frequency(m) as f64,
None => 1f64,
};
S::factor() + ((1f64 - S::factor()) * ((doc.term_frequency(term) as f64) / max))
}
}
#[derive(Copy, Clone)]
pub struct DoubleHalfNormalizationTf;
impl NormalizationFactor for DoubleHalfNormalizationTf {
#[inline]
fn factor() -> f64 {
0.5f64
}
}
impl DoubleKNormalizationTf for DoubleHalfNormalizationTf {}