similarity_trait/lib.rs
1//! # Similarity trait
2//!
3//! The Similarity trait defines one function with one input and one output.
4//!
5//! ```no_run
6//! pub trait Similarity<InputType, OutputType> {
7//! fn similarity(input: InputType) -> OutputType;
8//! }
9//! ```
10//!
11//! This trait is purposefully very generic so you can use it as you wish.
12//!
13//! We use this trait in our programs to create multiple kinds of similarity
14//! functionality, such as for trying various similarity algorithms that we want
15//! to use with the same input type and same output type.
16//!
17//! ## Similarity of a pair
18//!
19//! One way to use this trait is to calculate the similarity of a pair of
20//! values, such as two numbers, or two strings, or two images.
21//!
22//! This is sometimes known as pairwise similarity or pair matching.
23//!
24//! Example: given two numbers, then return the percent change.
25//!
26//! ```rust
27//! use similarity_trait::Similarity;
28//! struct MyStruct;
29//!
30//! impl Similarity<(i32, i32), f64> for MyStruct {
31//! /// Similarity of numbers via percent change.
32//! fn similarity(input: (i32, i32)) -> f64 {
33//! (100.0 * (input.1 - input.0) as f64) / i32::abs(input.0) as f64
34//! }
35//! }
36//!
37//! let percent_change = MyStruct::similarity((100, 120));
38//! assert_eq!(percent_change, 20.0);
39//! ```
40//!
41//! ## Similarity of a collection
42//!
43//! One way to use this trait is to calculate the similarity of a collection of
44//! values, such as an array of numbers, or vector of strings, or set of images.
45//!
46//! This is sometimes called intra-group similarity or statistical correlation.
47//!
48//! Example: given numbers, then return the population standard deviation.
49//!
50//! ```rust
51//! use similarity_trait::Similarity;
52//! struct MyStruct;
53//!
54//! impl Similarity<Vec<f64>, Option<f64>> for MyStruct {
55//! /// Similarity of numbers via population standard deviation
56//! fn similarity(numbers: Vec<f64>) -> Option<f64> {
57//! if numbers.is_empty() { return None }
58//! let mean = numbers.iter().sum::<f64>() / numbers.len() as f64;
59//! let variance = numbers.iter().map(|x| (x - mean).powi(2)).sum::<f64>() / numbers.len() as f64;
60//! Some(variance.sqrt())
61//! }
62//! }
63//!
64//! let numbers = vec![2.0, 4.0, 4.0, 4.0, 5.0, 5.0, 7.0, 9.0];
65//! let standard_deviation = MyStruct::similarity(numbers).expect("similarity");
66//! assert!(standard_deviation > 1.999 && standard_deviation < 2.001);
67//! ```
68//!
69//! ## Similarity of a pair or a collection
70//!
71//! You may want to choose whether you prefer to calculate the similarity of a
72//! pair (such as two strings) or a collection (such as a vector of strings).
73//!
74//! Example: given a pair of strings, then return the Hamming distance.
75//!
76//! ```rust
77//! use similarity_trait::Similarity;
78//! struct MyStruct;
79//!
80//! impl Similarity<(&str, &str), usize> for MyStruct {
81//! /// Similarity of a pair of strings via Hamming distance.
82//! fn similarity(pair: (&str, &str)) -> usize {
83//! pair.0.chars().zip(pair.1.chars()).filter(|(c1, c2)| c1 != c2).count()
84//! }
85//! }
86//!
87//! let pair = ("information", "informatics");
88//! let hamming_distance = MyStruct::similarity(pair);
89//! assert_eq!(hamming_distance, 2);
90//! ```
91//!
92//! Example: given a collection of strings, then return the maximum Hamming
93//! distance.
94//!
95//! ```rust
96//! use similarity_trait::Similarity;
97//! struct MyStruct;
98//!
99//! impl Similarity<Vec<&str>, usize> for MyStruct {
100//! /// Similarity of a collection of strings via maximum Hamming distance.
101//! fn similarity(collection: Vec<&str>) -> usize {
102//! let mut max = 0;
103//! for i in 0..collection.len() {
104//! for j in (i + 1)..collection.len() {
105//! max = std::cmp::max(max, collection[i].chars().zip(collection[j].chars()).filter(|(c1, c2)| c1 != c2).count())
106//! }
107//! }
108//! max
109//! }
110//! }
111//!
112//! let collection = vec!["information", "informatics", "affirmation"];
113//! let maximum_hamming_distance = MyStruct::similarity(collection);
114//! assert_eq!(maximum_hamming_distance, 5);
115//! ```
116
117pub trait Similarity<InputType, OutputType> {
118 fn similarity(input: InputType) -> OutputType;
119}
120
121#[cfg(test)]
122mod tests {
123 use super::*;
124 struct MyStruct {}
125
126 mod percent_change {
127 use super::*;
128
129 impl Similarity<(i32, i32), f64> for MyStruct {
130 /// Similarity of two numbers via percent change.
131 fn similarity(input: (i32, i32)) -> f64 {
132 (100.0 * (input.1 - input.0) as f64) / i32::abs(input.0) as f64
133 }
134 }
135
136 #[test]
137 fn test() {
138 let percent_change = MyStruct::similarity((100, 120));
139 assert_eq!(percent_change, 20.0);
140 }
141
142 }
143
144 mod population_standard_deviation {
145 use super::*;
146
147 impl Similarity<Vec<f64>, Option<f64>> for MyStruct {
148 /// Similarity of numbers via population standard deviation.
149 fn similarity(numbers: Vec<f64>) -> Option<f64> {
150 if numbers.is_empty() { return None }
151 let mean = numbers.iter().sum::<f64>() / numbers.len() as f64;
152 let variance = numbers.iter().map(|x| (x - mean).powi(2)).sum::<f64>() / numbers.len() as f64;
153 Some(variance.sqrt())
154 }
155 }
156
157 #[test]
158 fn test() {
159 let numbers = vec![2.0, 4.0, 4.0, 4.0, 5.0, 5.0, 7.0, 9.0];
160 let standard_deviation = MyStruct::similarity(numbers).expect("similarity");
161 assert!(standard_deviation > 1.999 && standard_deviation < 2.001);
162 }
163
164 }
165
166 mod hamming_distance_for_a_pair_of_strings {
167 use super::*;
168
169 impl Similarity<(&str, &str), usize> for MyStruct {
170 /// Similarity of two strings via Hamming distance.
171 fn similarity(input: (&str, &str)) -> usize {
172 input.0.chars().zip(input.1.chars()).filter(|(c1, c2)| c1 != c2).count()
173 }
174 }
175
176 #[test]
177 fn test() {
178 let pair = ("information", "informatics");
179 let hamming_distance = MyStruct::similarity(pair);
180 assert_eq!(hamming_distance, 2);
181 }
182
183 }
184
185 mod hamming_distance_for_a_collection_of_strings {
186 use super::*;
187
188 impl Similarity<Vec<&str>, usize> for MyStruct {
189 /// Similarity of a collection strings via maximum Hamming distance.
190 fn similarity(strings: Vec<&str>) -> usize {
191 let mut max = 0;
192 for i in 0..strings.len() {
193 for j in (i + 1)..strings.len() {
194 max = std::cmp::max(max, strings[i].chars().zip(strings[j].chars()).filter(|(c1, c2)| c1 != c2).count())
195 }
196 }
197 max
198 }
199 }
200
201 #[test]
202 fn test() {
203 let collection = vec!["information", "informatics", "affirmation"];
204 let maximum_hamming_distance = MyStruct::similarity(collection);
205 assert_eq!(maximum_hamming_distance, 5);
206 }
207
208 }
209
210}