use crate::levenshtein::{normalized_weighted_levenshtein, weighted_levenshtein};
use crate::editops;
pub fn ratio(query: &str, choice: &str, limit: u8) -> u8 {
if query == choice {
return 100;
}
if query.is_empty() || choice.is_empty() || limit == 100 {
return 0;
}
let len_a = query.len() as f32;
let len_b = choice.len() as f32;
let len_ratio = if len_a > len_b {
len_a / len_b
} else {
len_b / len_a
};
if len_ratio < 1.5 {
full_ratio(query, choice, limit)
} else if len_ratio < 8.0 {
partial_ratio(query, choice, 0.9, limit)
} else {
partial_ratio(query, choice, 0.6, limit)
}
}
fn partial_ratio(query: &str, choice: &str, partial_scale: f32, limit: u8) -> u8 {
let mut sratio = normalized_weighted_levenshtein(query, choice);
const UNBASE_SCALE: f32 = 0.95;
let mut min_ratio = (limit as f32 / 100.0).max(sratio);
if min_ratio < UNBASE_SCALE {
sratio = sratio.max(partial_string_ratio(query, choice) * UNBASE_SCALE);
min_ratio = min_ratio.max(sratio);
if min_ratio < UNBASE_SCALE * partial_scale {
sratio = sratio.max(partial_token_ratio(query, choice) * UNBASE_SCALE * partial_scale )
}
}
(sratio * 100.0).round() as u8
}
fn full_ratio(query: &str, choice: &str, limit: u8) -> u8{
let mut sratio = normalized_weighted_levenshtein(query, choice);
const UNBASE_SCALE: f32 = 0.95;
let min_ratio = (limit as f32 / 100.0).max(sratio);
if min_ratio < UNBASE_SCALE {
sratio = sratio.max(token_ratio(query, choice) * UNBASE_SCALE);
}
(sratio * 100.0).round() as u8
}
fn partial_string_ratio(query: &str, choice: &str) -> f32 {
if query.is_empty() || choice.is_empty() {
return 0.0;
}
let shorter;
let longer;
if query.len() <= choice.len() {
shorter = query;
longer = choice;
} else {
longer = query;
shorter = choice;
}
let edit_ops = editops::editops_find(shorter, longer);
let blocks = editops::editops_matching_blocks(shorter.len(), longer.len(), &edit_ops);
let mut scores: Vec<f32> = vec![];
for block in blocks {
let long_start = if block.second_start > block.first_start {
block.second_start - block.first_start
} else {
0
};
let long_end = long_start + shorter.chars().count();
let long_substr = &longer[long_start..long_end];
let ls_ratio = normalized_weighted_levenshtein(shorter, long_substr);
if ls_ratio > 0.995 {
return 1.0;
} else {
scores.push(ls_ratio)
}
}
scores.iter().fold(0.0f32, |max, &val| max.max(val))
}
fn intersection_count_sorted_vec(a: &Vec<&str>, b: &mut Vec<&str> ) -> (String, String, String) {
let mut sorted_sect: Vec<&str> = vec![];
let mut sorted_1to2: Vec<&str> = vec![];
for current_a in a {
match b.binary_search(¤t_a) {
Ok(index) => {
b.remove(index);
sorted_sect.push(current_a)
},
_ => sorted_1to2.push(current_a),
}
}
(sorted_sect.join(" "), sorted_1to2.join(" "), b.join(" "))
}
fn token_ratio(query: &str, choice: &str) -> f32 {
let mut query_tokens: Vec<_> = query.split_whitespace().collect();
query_tokens.sort_unstable();
let mut choice_tokens: Vec<_> = choice.split_whitespace().collect();
choice_tokens.sort_unstable();
let sorted_query = query_tokens.join(" ");
let sorted_choice = choice_tokens.join(" ");
query_tokens.dedup();
choice_tokens.dedup();
let (sorted_sect, sorted_1to2, sorted_2to1) = intersection_count_sorted_vec(&query_tokens, &mut choice_tokens);
if sorted_1to2.is_empty() || sorted_2to1.is_empty() {
return 1.0;
}
let double_prefix = 2 * sorted_sect.chars().count();
let mut len_1to2 = sorted_1to2.chars().count();
let mut len_2to1 = sorted_2to1.chars().count();
if !sorted_sect.is_empty() {
len_1to2 += 1;
len_2to1 += 1;
}
normalized_weighted_levenshtein(&sorted_query, &sorted_choice)
.max(1.0 - len_1to2 as f32 / (len_1to2 + double_prefix) as f32)
.max(1.0 - len_2to1 as f32 / (len_2to1 + double_prefix) as f32)
.max(1.0 - weighted_levenshtein(&sorted_2to1, &sorted_1to2) as f32 / (len_1to2 + len_2to1 + double_prefix) as f32)
}
fn partial_token_ratio(query: &str, choice: &str) -> f32 {
let mut query_tokens: Vec<_> = query.split_whitespace().collect();
query_tokens.sort_unstable();
let mut choice_tokens: Vec<_> = choice.split_whitespace().collect();
choice_tokens.sort_unstable();
let sorted_query = query_tokens.join(" ");
let sorted_choice = choice_tokens.join(" ");
query_tokens.dedup();
choice_tokens.dedup();
let (sorted_sect, mut sorted_1to2, mut sorted_2to1) = intersection_count_sorted_vec(&query_tokens, &mut choice_tokens);
if sorted_sect.is_empty() == false {
if sorted_1to2.is_empty() == false {
sorted_1to2.insert_str(0, " ");
}
if sorted_2to1.is_empty() == false {
sorted_2to1.insert_str(0, " ");
}
}
let combined_1to2 = String::from(&sorted_sect) + &sorted_1to2;
let combined_2to1 = String::from(&sorted_sect) + &sorted_2to1;
partial_string_ratio(&sorted_query, &sorted_choice)
.max(partial_string_ratio(&sorted_sect, &combined_1to2))
.max(partial_string_ratio(&sorted_sect, &combined_2to1))
.max(partial_string_ratio(&combined_1to2, &combined_2to1))
}