1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
/// Result of a distance/similarity algorithm.
pub struct Result<R> {
/// Indicates if it is a distance or a similarity metric.
pub(crate) is_distance: bool,
/// Absolute raw value of the metric.
pub(crate) abs: R,
/// Maximum possible value for the input of the given length.
pub(crate) max: R,
/// Length of the first analyzed sequence.
pub(crate) len1: usize,
/// Length of the second analyzed sequence.
pub(crate) len2: usize,
}
impl Result<usize> {
/// Raw value of the metric.
///
/// It is equivalent to `dist` for distance metrics
/// and to `sim` for similarity metrics.
///
/// use textdistance::{Algorithm, Hamming};
/// let h = Hamming::default();
/// let res = h.for_str("abc", "acbd");
/// assert!(res.val() == 3);
///
pub fn val(&self) -> usize {
self.abs
}
/// Absolute distance.
///
/// A non-negative number showing how different the two sequences are.
/// Two exactly the same sequences have the distance 0.
///
/// The highest possible number varies based on the length of the input strings.
/// Most often, each increment of this value indicates one symbol that differs
/// in the input sequences.
///
/// use textdistance::{Algorithm, Hamming};
/// let h = Hamming::default();
/// let res = h.for_str("abc", "acbd");
/// assert!(res.dist() == 3);
///
pub fn dist(&self) -> usize {
if self.is_distance {
self.abs
} else {
self.max - self.abs
}
}
/// Absolute similarity.
///
/// A non-negative number showing how similar the two sequences are.
/// Two absolutely different sequences have the similarity 0.
///
/// The highest possible number varies based on the length of the input strings.
/// Most often, each increment of this value indicates one symbol that is the same
/// in both sequences.
///
/// use textdistance::{Algorithm, Hamming};
/// let h = Hamming::default();
/// let res = h.for_str("abc", "acbd");
/// assert!(res.sim() == 1); // "a"
///
pub fn sim(&self) -> usize {
if self.is_distance {
self.max - self.abs
} else {
self.abs
}
}
/// Normalized raw value of the metric.
///
/// It is equivalent to `ndist` for distance metrics
/// and to `nsim` for similarity metrics.
///
/// use textdistance::{Algorithm, Hamming};
/// let h = Hamming::default();
/// let res = h.for_str("abc", "acbd");
/// assert!(res.nval() == 3.0 / 4.0);
///
pub fn nval(&self) -> f64 {
if self.is_distance {
self.ndist()
} else {
self.nsim()
}
}
/// Normalized distance.
///
/// A number from 0.0 to 1.0 showing how different the two sequences are.
/// 0.0 indicates that the sequences are the same,
/// and 1.0 indicates that the sequences are very different.
///
/// use textdistance::{Algorithm, Hamming};
/// let h = Hamming::default();
/// let res = h.for_str("abc", "acbd");
/// assert!(res.ndist() == 3.0 / 4.0);
///
pub fn ndist(&self) -> f64 {
if self.max == 0 {
self.dist() as f64
} else {
self.dist() as f64 / self.max as f64
}
}
/// Normalized similarity.
///
/// A number from 0.0 to 1.0 showing how similar the two sequences are.
/// 0.0 indicates that the sequences are very different,
/// and 1.0 indicates that the sequences are the same.
///
/// use textdistance::{Algorithm, Hamming};
/// let h = Hamming::default();
/// let res = h.for_str("abc", "acbd");
/// assert!(res.nsim() == 1.0 / 4.0);
///
pub fn nsim(&self) -> f64 {
if self.max == 0 {
1.0
} else {
self.sim() as f64 / self.max as f64
}
}
}
impl Result<f64> {
/// Normalized raw value of the metric.
///
/// It is equivalent to `ndist` for distance metrics
/// and to `nsim` for similarity metrics.
///
/// use textdistance::{Algorithm, Jaro};
/// let h = Jaro::default();
/// let res = h.for_str("test", "tset");
/// assert_eq!(res.nval(), 0.9166666666666666);
///
pub fn nval(&self) -> f64 {
self.abs
}
/// Normalized distance.
///
/// A number from 0.0 to 1.0 showing how different the two sequences are.
/// 0.0 indicates that the sequences are the same,
/// and 1.0 indicates that the sequences are very different.
///
/// use textdistance::{Algorithm, Jaro};
/// let h = Jaro::default();
/// let res = h.for_str("test", "tset");
/// assert_eq!(res.ndist(), 0.08333333333333337);
///
pub fn ndist(&self) -> f64 {
if self.is_distance {
self.abs
} else {
self.max - self.abs
}
}
/// Normalized similarity.
///
/// A number from 0.0 to 1.0 showing how similar the two sequences are.
/// 0.0 indicates that the sequences are very different,
/// and 1.0 indicates that the sequences are the same.
///
/// use textdistance::{Algorithm, Jaro};
/// let h = Jaro::default();
/// let res = h.for_str("test", "tset");
/// assert_eq!(res.nsim(), 0.9166666666666666);
///
pub fn nsim(&self) -> f64 {
if self.is_distance {
self.max - self.abs
} else {
self.abs
}
}
}