rapidfuzz 0.1.0

rapid string matching library
Documentation
use crate::string_utils;
use std::cmp::min;


pub fn weighted_levenshtein(a: &str, b: &str) -> usize {
    // check which is the bigger string for later operations
	let (mut longer, mut shorter) = if a.len() >= b.len() {
		(a, b)
	} else {
		(b, a)
	};

	let string_affix = string_utils::StringAffix::find(&longer, &shorter);

	let longer_len = string_affix.first_string_len;
	let shorter_len = string_affix.second_string_len;

	if shorter_len == 0 {
		return longer_len;
	}

	let prefix_len = string_affix.prefix_len;
	longer = &longer[prefix_len..prefix_len + longer_len];
	shorter = &shorter[prefix_len..prefix_len + shorter_len];
  
  	// calculate edit distance
	let mut cache: Vec<usize> = (1..=longer_len).collect();
	let mut result = longer_len;
  
	for (i, a_elem) in shorter.chars().enumerate() {
		result = i + 1;
		let mut distance_b = i;
  
		for (j, b_elem) in longer.chars().enumerate() {
			if a_elem == b_elem {
				result = distance_b - 1;
			} else {
				result += 1;
			}
			distance_b = cache[j] + 1;
			result = result.min(distance_b);
			cache[j] = result;
		}
	}
	result
}


pub fn normalized_weighted_levenshtein(a: &str, b: &str) -> f32 {
	if a == b {
		return 1.0;
	}

	if a.is_empty() || b.is_empty() {
		return 0.0;
  	}
   
	let lensum = a.chars().count() + b.chars().count();
	let distance = weighted_levenshtein(a, b);

	1.0 - distance as f32 / lensum as f32
}


pub fn levenshtein(first_string: &str, second_string: &str) -> usize {
    // check which is the bigger string for later operations
	let (mut longer, mut shorter) = if first_string.len() >= second_string.len() {
		(first_string, second_string)
	} else {
		(second_string, first_string)
	};
	
	let string_affix = string_utils::StringAffix::find(&longer, &shorter);

	let longer_len = string_affix.first_string_len;
	let shorter_len = string_affix.second_string_len;

	if shorter_len == 0 {
		return longer_len;
	}

	let prefix_len = string_affix.prefix_len;
	longer = &longer[prefix_len..prefix_len + longer_len];
	shorter = &shorter[prefix_len..prefix_len + shorter_len];
  
  	// calculate edit distance
	let mut cache: Vec<usize> = (1..=longer_len).collect();
	let mut result = longer_len;
  
	for (i, a_elem) in shorter.chars().enumerate() {
		result = i + 1;
		let mut distance_b = i;
  
		for (j, b_elem) in longer.chars().enumerate() {
			let distance_a =  distance_b + (a_elem != b_elem) as usize;
			distance_b = cache[j];
			result = min(result + 1, min(distance_a, distance_b + 1));
			cache[j] = result;
		}
	}
	result
}


pub fn normalized_levenshtein(a: &str, b: &str) -> f32 {
	if a == b {
		return 1.0;
	}

	if a.is_empty() || b.is_empty() {
		return 0.0;
  	}
   
	let max_len = a.chars().count().max(b.chars().count());
	let distance = levenshtein(a, b);

	(1 - distance) as f32 / max_len as f32
}