use crate::types::TaskContext;
pub(super) fn sequence_similarity(seq1: &[String], seq2: &[String]) -> f32 {
if seq1.is_empty() && seq2.is_empty() {
return 1.0;
}
if seq1.is_empty() || seq2.is_empty() {
return 0.0;
}
let distance = edit_distance(seq1, seq2);
let max_len = seq1.len().max(seq2.len());
1.0 - (distance as f32 / max_len as f32)
}
#[allow(clippy::needless_range_loop)]
fn edit_distance(seq1: &[String], seq2: &[String]) -> usize {
let len1 = seq1.len();
let len2 = seq2.len();
if len1 == 0 {
return len2;
}
if len2 == 0 {
return len1;
}
let mut matrix = vec![vec![0; len2 + 1]; len1 + 1];
for i in 0..=len1 {
matrix[i][0] = i;
}
for j in 0..=len2 {
matrix[0][j] = j;
}
for i in 1..=len1 {
for j in 1..=len2 {
let cost = usize::from(seq1[i - 1] != seq2[j - 1]);
matrix[i][j] = (matrix[i - 1][j] + 1) .min(matrix[i][j - 1] + 1) .min(matrix[i - 1][j - 1] + cost); }
}
matrix[len1][len2]
}
pub(super) fn string_similarity(s1: &str, s2: &str) -> f32 {
if s1.is_empty() && s2.is_empty() {
return 1.0;
}
if s1.is_empty() || s2.is_empty() {
return 0.0;
}
let chars1: Vec<char> = s1.chars().collect();
let chars2: Vec<char> = s2.chars().collect();
let distance = char_edit_distance(&chars1, &chars2);
let max_len = chars1.len().max(chars2.len());
1.0 - (distance as f32 / max_len as f32)
}
#[allow(clippy::needless_range_loop)]
fn char_edit_distance(chars1: &[char], chars2: &[char]) -> usize {
let len1 = chars1.len();
let len2 = chars2.len();
if len1 == 0 {
return len2;
}
if len2 == 0 {
return len1;
}
let mut matrix = vec![vec![0; len2 + 1]; len1 + 1];
for i in 0..=len1 {
matrix[i][0] = i;
}
for j in 0..=len2 {
matrix[0][j] = j;
}
for i in 1..=len1 {
for j in 1..=len2 {
let cost = usize::from(chars1[i - 1] != chars2[j - 1]);
matrix[i][j] = (matrix[i - 1][j] + 1)
.min(matrix[i][j - 1] + 1)
.min(matrix[i - 1][j - 1] + cost);
}
}
matrix[len1][len2]
}
pub(super) fn context_similarity(ctx1: &TaskContext, ctx2: &TaskContext) -> f32 {
let mut score = 0.0;
let mut weight_sum = 0.0;
if ctx1.domain == ctx2.domain {
score += 0.4;
}
weight_sum += 0.4;
match (&ctx1.language, &ctx2.language) {
(Some(l1), Some(l2)) if l1 == l2 => score += 0.3,
(None, None) => score += 0.15, _ => {}
}
weight_sum += 0.3;
if !ctx1.tags.is_empty() || !ctx2.tags.is_empty() {
let common_tags: Vec<_> = ctx1.tags.iter().filter(|t| ctx2.tags.contains(t)).collect();
let total_unique_tags = ctx1
.tags
.iter()
.chain(ctx2.tags.iter())
.collect::<std::collections::HashSet<_>>()
.len();
if total_unique_tags > 0 {
let jaccard = common_tags.len() as f32 / total_unique_tags as f32;
score += jaccard * 0.3;
}
}
weight_sum += 0.3;
if weight_sum > 0.0 {
score / weight_sum
} else {
0.0
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_sequence_similarity() {
let seq1 = vec!["a".to_string(), "b".to_string(), "c".to_string()];
let seq2 = vec!["a".to_string(), "b".to_string(), "c".to_string()];
assert_eq!(sequence_similarity(&seq1, &seq2), 1.0);
let seq3 = vec!["a".to_string(), "b".to_string(), "d".to_string()];
let sim = sequence_similarity(&seq1, &seq3);
assert!(sim > 0.6 && sim < 0.7);
}
#[test]
fn test_string_similarity() {
assert_eq!(string_similarity("hello", "hello"), 1.0);
assert_eq!(string_similarity("", ""), 1.0);
assert_eq!(string_similarity("abc", ""), 0.0);
let sim = string_similarity("hello", "hallo");
assert!(sim > 0.7 && sim < 0.9);
}
#[test]
fn test_context_similarity() {
let ctx1 = TaskContext {
domain: "web-api".to_string(),
language: Some("rust".to_string()),
tags: vec!["async".to_string(), "http".to_string()],
..Default::default()
};
let ctx2 = TaskContext {
domain: "web-api".to_string(),
language: Some("rust".to_string()),
tags: vec!["async".to_string(), "rest".to_string()],
..Default::default()
};
let similarity = context_similarity(&ctx1, &ctx2);
assert!(similarity > 0.7);
}
}