similarity_trait/
lib.rs

1//! # Similarity trait
2//!
3//! The Similarity trait defines one function with one input and one output.
4//!
5//! ```no_run
6//! pub trait Similarity<InputType, OutputType> {
7//!     fn similarity(input: InputType) -> OutputType;
8//! }
9//! ```
10//! 
11//! This trait is purposefully very generic so you can use it as you wish.
12//!
13//! We use this trait in our programs to create multiple kinds of similarity
14//! functionality, such as for trying various similarity algorithms that we want
15//! to use with the same input type and same output type.
16//!
17//! ## Similarity of a pair
18//!
19//! One way to use this trait is to calculate the similarity of a pair of
20//! values, such as two numbers, or two strings, or two images.
21//! 
22//! This is sometimes known as pairwise similarity or pair matching.
23//! 
24//! Example: given two numbers, then return the percent change.
25//!
26//! ```rust
27//! use similarity_trait::Similarity;
28//! struct MyStruct;
29//!
30//! impl Similarity<(i32, i32), f64> for MyStruct {
31//!     /// Similarity of numbers via percent change.
32//!     fn similarity(input: (i32, i32)) -> f64 {
33//!         (100.0 * (input.1 - input.0) as f64) / i32::abs(input.0) as f64
34//!     }
35//! }
36//!
37//! let percent_change = MyStruct::similarity((100, 120));
38//! assert_eq!(percent_change, 20.0);
39//! ```
40//!
41//! ## Similarity of a collection
42//!
43//! One way to use this trait is to calculate the similarity of a collection of
44//! values, such as an array of numbers, or vector of strings, or set of images.
45//! 
46//! This is sometimes called intra-group similarity or statistical correlation.
47//!
48//! Example: given numbers, then return the population standard deviation.
49//!
50//! ```rust
51//! use similarity_trait::Similarity;
52//! struct MyStruct;
53//!
54//! impl Similarity<Vec<f64>, Option<f64>> for MyStruct {
55//!     /// Similarity of numbers via population standard deviation
56//!     fn similarity(numbers: Vec<f64>) -> Option<f64> {
57//!         if numbers.is_empty() { return None }
58//!         let mean = numbers.iter().sum::<f64>() / numbers.len() as f64;
59//!         let variance = numbers.iter().map(|x| (x - mean).powi(2)).sum::<f64>() / numbers.len() as f64;
60//!         Some(variance.sqrt())
61//!     }
62//! }
63//!
64//! let numbers = vec![2.0, 4.0, 4.0, 4.0, 5.0, 5.0, 7.0, 9.0];
65//! let standard_deviation = MyStruct::similarity(numbers).expect("similarity");
66//! assert!(standard_deviation > 1.999 && standard_deviation < 2.001);
67//! ```
68//!
69//! ## Similarity of a pair or a collection
70//!
71//! You may want to choose whether you prefer to calculate the similarity of a
72//! pair (such as two strings) or a collection (such as a vector of strings).
73//!
74//! Example: given a pair of strings, then return the Hamming distance.
75//!
76//! ```rust
77//! use similarity_trait::Similarity;
78//! struct MyStruct;
79//!
80//! impl Similarity<(&str, &str), usize> for MyStruct {
81//!     /// Similarity of a pair of strings via Hamming distance.
82//!     fn similarity(pair: (&str, &str)) -> usize {
83//!         pair.0.chars().zip(pair.1.chars()).filter(|(c1, c2)| c1 != c2).count()
84//!     }
85//! }
86//!
87//! let pair = ("information", "informatics");
88//! let hamming_distance = MyStruct::similarity(pair);
89//! assert_eq!(hamming_distance, 2);
90//! ```
91//!
92//! Example: given a collection of strings, then return the maximum Hamming
93//! distance.
94//!
95//! ```rust
96//! use similarity_trait::Similarity;
97//! struct MyStruct;
98//!
99//! impl Similarity<Vec<&str>, usize> for MyStruct {
100//!     /// Similarity of a collection of strings via maximum Hamming distance.
101//!     fn similarity(collection: Vec<&str>) -> usize {
102//!         let mut max = 0;
103//!         for i in 0..collection.len() {
104//!             for j in (i + 1)..collection.len() {
105//!                 max = std::cmp::max(max, collection[i].chars().zip(collection[j].chars()).filter(|(c1, c2)| c1 != c2).count())
106//!             }
107//!         }
108//!         max
109//!     }
110//! }
111//!
112//! let collection = vec!["information", "informatics", "affirmation"];
113//! let maximum_hamming_distance = MyStruct::similarity(collection);
114//! assert_eq!(maximum_hamming_distance, 5);
115//! ```
116
117pub trait Similarity<InputType, OutputType> {
118    fn similarity(input: InputType) -> OutputType;
119}
120
121#[cfg(test)]
122mod tests {
123    use super::*;
124    struct MyStruct {}
125
126    mod percent_change {
127        use super::*;
128
129        impl Similarity<(i32, i32), f64> for MyStruct {
130            /// Similarity of two numbers via percent change.
131            fn similarity(input: (i32, i32)) -> f64 {
132                (100.0 * (input.1 - input.0) as f64) / i32::abs(input.0) as f64
133            }
134        }
135
136        #[test]
137        fn test() {
138            let percent_change = MyStruct::similarity((100, 120));
139            assert_eq!(percent_change, 20.0);
140        }
141
142    }
143
144    mod population_standard_deviation {
145        use super::*;
146
147        impl Similarity<Vec<f64>, Option<f64>> for MyStruct {
148            /// Similarity of numbers via population standard deviation.
149            fn similarity(numbers: Vec<f64>) -> Option<f64> {
150                if numbers.is_empty() { return None }
151                let mean = numbers.iter().sum::<f64>() / numbers.len() as f64;
152                let variance = numbers.iter().map(|x| (x - mean).powi(2)).sum::<f64>() / numbers.len() as f64;
153                Some(variance.sqrt())
154            }
155        }
156
157        #[test]
158        fn test() {
159            let numbers = vec![2.0, 4.0, 4.0, 4.0, 5.0, 5.0, 7.0, 9.0];
160            let standard_deviation = MyStruct::similarity(numbers).expect("similarity");
161            assert!(standard_deviation > 1.999 && standard_deviation < 2.001);
162        }
163
164    }
165
166    mod hamming_distance_for_a_pair_of_strings {
167        use super::*;
168
169        impl Similarity<(&str, &str), usize> for MyStruct {
170            /// Similarity of two strings via Hamming distance.
171            fn similarity(input: (&str, &str)) -> usize {
172                input.0.chars().zip(input.1.chars()).filter(|(c1, c2)| c1 != c2).count()
173            }
174        }
175
176        #[test]
177        fn test() {
178            let pair = ("information", "informatics");
179            let hamming_distance = MyStruct::similarity(pair);
180            assert_eq!(hamming_distance, 2);
181        }
182
183    }
184
185    mod hamming_distance_for_a_collection_of_strings {
186        use super::*;
187
188        impl Similarity<Vec<&str>, usize> for MyStruct {
189            /// Similarity of a collection strings via maximum Hamming distance.
190            fn similarity(strings: Vec<&str>) -> usize {
191                let mut max = 0;
192                for i in 0..strings.len() {
193                    for j in (i + 1)..strings.len() {
194                        max = std::cmp::max(max, strings[i].chars().zip(strings[j].chars()).filter(|(c1, c2)| c1 != c2).count())
195                    }
196                }
197                max
198            }
199        }
200
201        #[test]
202        fn test() {
203            let collection = vec!["information", "informatics", "affirmation"];
204            let maximum_hamming_distance = MyStruct::similarity(collection);
205            assert_eq!(maximum_hamming_distance, 5);
206        }
207
208    }
209
210}