b_k_tree/
metrics.rs

1ham_int!(i128,i16,i32,i64,i8,isize,u128,u16,u32,u64,u8,usize);
2impl CeilL2<Coords>{
3	/// creates a new metric that is the ceiling function of the euclidean metric
4	pub fn new()->Self{Self::with_factor(1.0)}
5	/// creates a new ceiling euclidean metric that rounds up to the given factor
6	pub fn with_factor(factor:f64)->Self{Self::with_factor_for(factor,Coords)}
7}
8impl Hamming<Bits>{
9	/// creates a new hamming distance metric
10	pub fn new()->Self{Self::new_for(Bits)}
11}
12impl Levenshtein<Symbols>{
13	/// creates a new levenshtein distance metric
14	pub fn new()->Self{Self::new_for(Symbols)}
15}
16impl SymbolIter for String{
17	fn symbol_count(&self)->usize{self.chars().count()}
18	fn symbol_iter(&self)->Self::Iter<'_>{self.chars()}
19	type Item=char;
20	type Iter<'a>=Chars<'a> where Self:'a;
21}
22impl SymbolIter for str{
23	fn symbol_count(&self)->usize{self.chars().count()}
24	fn symbol_iter(&self)->Self::Iter<'_>{self.chars()}
25	type Item=char;
26	type Iter<'a>=Chars<'a> where Self:'a;
27}
28impl<E:Add<Output=E>+Copy+Into<f64>+Mul<Output=E>+Sub<Output=E>,const N:usize> CoordIter for [E;N]{
29	fn coord_iter(&self)->Self::Iter<'_>{self.iter().copied()}
30	type Item=E;
31	type Iter<'a>=Copied<SliceIter<'a,E>> where Self:'a;
32}
33impl<E:Add<Output=E>+Copy+Into<f64>+Mul<Output=E>+Sub<Output=E>> CoordIter for [E]{
34	fn coord_iter(&self)->Self::Iter<'_>{self.iter().copied()}
35	type Item=E;
36	type Iter<'a>=Copied<SliceIter<'a,E>> where Self:'a;
37}
38impl<E:Add<Output=E>+Copy+Into<f64>+Mul<Output=E>+Sub<Output=E>> CoordIter for Vec<E>{
39	fn coord_iter(&self)->Self::Iter<'_>{self.iter().copied()}
40	type Item=E;
41	type Iter<'a>=Copied<SliceIter<'a,E>> where Self:'a;
42}
43impl<E:Copy+Eq,const N:usize> SymbolIter for [E;N]{
44	fn symbol_count(&self)->usize{self.len()}
45	fn symbol_iter(&self)->Self::Iter<'_>{self.iter().copied()}
46	type Item=E;
47	type Iter<'a>=Copied<SliceIter<'a,E>> where Self:'a;
48}
49impl<E:Copy+Eq> SymbolIter for [E]{
50	fn symbol_count(&self)->usize{self.len()}
51	fn symbol_iter(&self)->Self::Iter<'_>{self.iter().copied()}
52	type Item=E;
53	type Iter<'a>=Copied<SliceIter<'a,E>> where Self:'a;
54}
55impl<E:Copy+Eq> SymbolIter for Vec<E>{
56	fn symbol_count(&self)->usize{self.len()}
57	fn symbol_iter(&self)->Self::Iter<'_>{self.iter().copied()}
58	type Item=E;
59	type Iter<'a>=Copied<SliceIter<'a,E>> where Self:'a;
60}
61impl<T:?Sized+CoordIter> CoordIter for &T{
62	fn coord_iter(&self)->Self::Iter<'_>{(*self).coord_iter()}
63	type Item=T::Item;
64	type Iter<'a>=T::Iter<'a> where Self:'a;
65}
66impl<T:Clone> Clone for Levenshtein<T>{
67	fn clone(&self)->Self{Self::new_for(self._marker.clone())}
68}
69impl<T:Default> Default for CeilL2<T>{
70	fn default()->Self{Self::with_factor_for(1.0,Default::default())}
71}
72
73impl<T:?Sized+SymbolIter> SymbolIter for &T{
74	fn symbol_count(&self)->usize{(*self).symbol_count()}
75	fn symbol_iter(&self)->Self::Iter<'_>{(*self).symbol_iter()}
76	type Item=T::Item;
77	type Iter<'a>=T::Iter<'a> where Self:'a;
78}
79impl<T> AsMut<Self> for CeilL2<T>{
80	fn as_mut(&mut self)->&mut Self{self}
81}
82impl<T> AsMut<Self> for Hamming<T>{
83	fn as_mut(&mut self)->&mut Self{self}
84}
85impl<T> AsMut<Self> for Levenshtein<T>{
86	fn as_mut(&mut self)->&mut Self{self}
87}
88impl<T> AsRef<Self> for CeilL2<T>{
89	fn as_ref(&self)->&Self{self}
90}
91impl<T> AsRef<Self> for Hamming<T>{
92	fn as_ref(&self)->&Self{self}
93}
94impl<T> AsRef<Self> for Levenshtein<T>{
95	fn as_ref(&self)->&Self{self}
96}
97impl<T> CeilL2<T>{
98	/// creates a new metric that is the ceiling function of the euclidean metric
99	pub fn new_for(t:T)->Self{Self::with_factor_for(1.0,t)}
100	/// creates a new ceiling euclidean metric that rounds up to the given factor
101	pub fn with_factor_for(factor:f64,t:T)->Self{
102		Self{factor,_marker:t}
103	}
104}
105impl<T> Hamming<T>{
106	/// internal symbol based calculation
107	fn _symbolic_distance<U:?Sized+SymbolIter,V:?Sized+SymbolIter<Item=U::Item>>(&self,x:&U,y:&V)->usize{
108		let (mut x,mut y)=(x.symbol_iter(),y.symbol_iter());
109		let mut count=0;
110
111		loop {
112			match (x.next(),y.next()){
113				(Some(a),Some(b))=>if a!=b{count+=1},
114				(Some(_), None) | (None, Some(_))=>count+=1,
115				(None, None)=>break,
116			}
117		}
118		count
119	}
120	/// creates a new hamming distance metric
121	pub fn new_for(t:T)->Self{
122		Self{_marker:t}
123	}
124}
125impl<T> Levenshtein<T>{
126	/// internal symbol based calculation
127	fn _symbolic_distance<U:?Sized+SymbolIter,V:?Sized+SymbolIter<Item=U::Item>>(&self,x:&U,y:&V)->usize{
128		let (xl,yl)=(x.symbol_count(),y.symbol_count());
129		if xl==0{return yl}
130		if yl==0{return xl}
131
132		let mut distances:Vec<usize>=if let Ok(mut c)=self.cache.try_lock(){take(&mut c)}else{Vec::new()};
133
134		distances.clear();
135		distances.reserve(xl);
136		for n in 0..xl{distances.push(n+1)}
137		for (k,y) in y.symbol_iter().enumerate(){
138			let mut diagonal=k;
139			let mut left=k+1;
140			let mut up;
141			for (n,x) in x.symbol_iter().enumerate(){
142				up=distances[n];
143				let d=(diagonal+if x==y{0}else{1}).min(left+1).min(up+1);
144				(diagonal,left)=(up,d);
145				distances[n]=d;
146			}
147		}
148		let distance=*distances.last().unwrap();
149
150		if let Ok(mut c)=self.cache.try_lock(){*c=distances}
151		distance
152	}
153	/// creates a new levenshtein distance metric
154	pub fn new_for(t:T)->Self{
155		Self{cache:Mutex::new(Vec::new()),_marker:t}
156	}
157}
158impl<U:?Sized+AsRef<str>,V:?Sized+AsRef<str>> DiscreteMetric<U,V> for Hamming<StrRef>{
159	fn distance(&self,u:&U,v:&V)->usize{self._symbolic_distance(u.as_ref(),v.as_ref())}
160}
161impl<U:?Sized+AsRef<str>,V:?Sized+AsRef<str>> DiscreteMetric<U,V> for Levenshtein<StrRef>{
162	fn distance(&self,u:&U,v:&V)->usize{
163		let (mut u,mut v)=(u.as_ref(),v.as_ref());
164
165		if u.len()>v.len(){swap(&mut u,&mut v)}
166		self._symbolic_distance(u,v)
167	}
168}
169impl<U:?Sized+CoordIter,V:?Sized+CoordIter<Item=U::Item>> DiscreteMetric<U,V> for CeilL2{
170	fn distance(&self,u:&U,v:&V)->usize{
171		let (mut u,mut v)=(u.coord_iter(),v.coord_iter());
172		let factor=self.factor;
173		let mut r2=None;
174
175		loop{
176			let d=if let (Some(u),Some(v))=(u.next(),v.next()){u-v}else{break};
177			let d2=d*d;
178			r2=Some(if let Some(r2)=r2{d2+r2}else{d2})
179		}
180		let r2=r2.map(Into::into).unwrap_or(0.0);
181		(r2.sqrt()/factor).ceil() as usize
182	}
183}
184impl<U:?Sized+InherentDiscreteMetric<V>,V:?Sized> DiscreteMetric<U,V> for Inherent{
185	fn distance(&self,u:&U,v:&V)->usize{u.distance_to(v)}
186}
187impl<U:?Sized+SymbolIter,V:?Sized+SymbolIter<Item=U::Item>> DiscreteMetric<U,V> for Hamming<Symbols>{
188	fn distance(&self,u:&U,v:&V)->usize{self._symbolic_distance(u,v)}
189}
190impl<U:?Sized+SymbolIter,V:?Sized+SymbolIter<Item=U::Item>> DiscreteMetric<U,V> for Levenshtein<Symbols>{
191	fn distance(&self,u:&U,v:&V)->usize{self._symbolic_distance(u,v)}
192}
193/// implements hamming distance for integers
194macro_rules! ham_int{
195	($($int:ty),*)=>($(
196		impl DiscreteMetric<&$int,&$int> for Hamming<Bits>{
197			fn distance(&self,x:&&$int,y:&&$int)->usize{(*x^*y).count_ones() as usize}
198		}
199		impl DiscreteMetric<&$int,$int> for Hamming<Bits>{
200			fn distance(&self,x:&&$int,y:&$int)->usize{(*x^*y).count_ones() as usize}
201		}
202		impl DiscreteMetric<$int,&$int> for Hamming<Bits>{
203			fn distance(&self,x:&$int,y:&&$int)->usize{(*x^*y).count_ones() as usize}
204		}
205		impl DiscreteMetric<$int,$int> for Hamming<Bits>{
206			fn distance(&self,x:&$int,y:&$int)->usize{(*x^*y).count_ones() as usize}
207		}
208	)*)
209}
210#[cfg(test)]
211mod tests{
212	#[test]
213	fn ceil_l2_2d_int(){
214		let m=CeilL2::new();
215		assert_eq!(m.distance(&[1,2],&[2,1]),2);
216		assert_eq!(m.distance(&[3,3],&[3,4]),1);
217		assert_eq!(m.distance(&[0,0],&[3,4]),5);
218	}
219	#[test]
220	fn ham_int(){
221		let m=Hamming::new();
222		assert_eq!(m.distance(&1,&0),1);
223		assert_eq!(m.distance(&0b1011101,&0b1001001),2);
224		assert_eq!(m.distance(&9999,&9999),0);
225		assert_eq!(m.distance(&-1_i32,&0_i32),32);
226	}
227	#[test]
228	fn ham_string() {
229		let m=Hamming::new_for(StrRef);
230		assert_eq!(m.distance("rust","rust"),0);
231		assert_eq!(m.distance("karolin","kathrin"),3);
232		assert_eq!(m.distance("1011101","1001001"),2);
233		assert_eq!(m.distance("2173896","2233796"),3);
234	}
235	#[test]
236	fn lev(){
237		let metric=Levenshtein::new();
238		assert_eq!(metric.distance("here","there"),1);
239		assert_eq!(metric.distance("hi","hello"),4);
240		assert_eq!(metric.distance("kitten","sitting"),3);
241		assert_eq!(metric.distance("saturday","sunday"),3);
242		assert_eq!(metric.distance("there","there"),0);
243		assert_eq!(metric.distance("there","there's"),2);
244	}
245
246	use super::*;
247}
248#[derive(Clone,Copy,Debug,Default,Eq,Hash,Ord,PartialEq,PartialOrd)]
249/// marker for bitwise distance computation
250pub struct Bits;
251#[derive(Clone,Debug)]
252/// distance metric that is the usual euclidean metric, divided by a scale factor and rounded up. behavior on length mismatch is currently unspecified
253pub struct CeilL2<T=Coords>{factor:f64,_marker:T}
254#[derive(Clone,Copy,Debug,Default,Eq,Hash,Ord,PartialEq,PartialOrd)]
255/// marker for coordinate based distance computation
256pub struct Coords;
257#[derive(Clone,Copy,Debug,Default,Eq,Hash,Ord,PartialEq,PartialOrd)]
258/// metric for types that have an inherent discrete distance metric
259pub struct Inherent;
260#[derive(Clone,Debug,Default)]
261/// hamming distance metric that is bitwise on integers and charwise on strings. behavior on length mismatch is currently unspecified
262pub struct Hamming<T=Bits>{_marker:T}
263#[derive(Debug,Default)]
264/// levenshtein distance metric for strings
265pub struct Levenshtein<T=Symbols>{cache:Mutex<Vec<usize>>,_marker:T}
266#[derive(Clone,Copy,Debug,Default,Eq,Hash,Ord,PartialEq,PartialOrd)]
267/// marker for usage of string references for edit distance computation
268pub struct StrRef;
269#[derive(Clone,Copy,Debug,Default,Eq,Hash,Ord,PartialEq,PartialOrd)]
270/// marker for usage of the symbol iter trait for edit distance computation
271pub struct Symbols;
272/// helper trait for computing edit distances by iteration by iteration over primitives
273pub trait CoordIter{
274	/// returns an iterator over coordinates
275	fn coord_iter(&self)->Self::Iter<'_>;
276	/// the item type
277	type Item:Add<Output=Self::Item>+Copy+Into<f64>+Mul<Output=Self::Item>+Sub<Output=Self::Item>;
278	/// the iterator type
279	type Iter<'a>:Iterator<Item=Self::Item> where Self:'a;
280}
281/// allows metrics inherent to type
282pub trait InherentDiscreteMetric<V:?Sized>{
283	/// finds the distance between self and v
284	fn distance_to(&self,v:&V)->usize;
285}
286/// helper trait for computing edit distances by iteration
287pub trait SymbolIter{
288	/// returns the length of the sequence
289	fn symbol_count(&self)->usize;
290	/// returns an iterator for comparing characters for an edit distance. Since the computation may require multiple iterations, the returned iterator shouldn't change length or sequence without self being mutated
291	fn symbol_iter(&self)->Self::Iter<'_>;
292	/// the item type
293	type Item:Eq;
294	/// the iterator type
295	type Iter<'a>:Iterator<Item=Self::Item> where Self:'a;
296}
297use {
298	crate::DiscreteMetric,
299	ham_int,
300	std::{
301		cmp::Eq,iter::Copied,mem::{swap,take},ops::{Add,Mul,Sub},slice::Iter as SliceIter,str::Chars,sync::Mutex
302	}
303};