talon_cli/mcp/session/
fingerprint.rs1use std::collections::HashSet;
2
3#[derive(Debug, Clone, PartialEq, Eq)]
5pub struct QueryFingerprint {
6 pub normalized: String,
7 pub token_set: HashSet<String>,
8}
9
10impl QueryFingerprint {
11 #[must_use]
12 pub fn from_message(message: &str) -> Self {
13 let normalized = normalize(message);
14 let token_set = tokenize(&normalized);
15 Self {
16 normalized,
17 token_set,
18 }
19 }
20
21 #[must_use]
23 pub fn similarity(&self, other: &Self) -> f64 {
24 if self.token_set.is_empty() && other.token_set.is_empty() {
25 return 1.0;
26 }
27 let intersection = self.token_set.intersection(&other.token_set).count();
28 let union = self.token_set.union(&other.token_set).count();
29 if union == 0 {
30 1.0
31 } else {
32 #[expect(
33 clippy::cast_precision_loss,
34 reason = "precision loss is acceptable for word-token similarity scores"
35 )]
36 let result = intersection as f64 / union as f64;
37 result
38 }
39 }
40
41 #[must_use]
42 pub fn as_str(&self) -> &str {
43 &self.normalized
44 }
45}
46
47fn normalize(s: &str) -> String {
48 s.chars()
49 .filter(|c| c.is_alphanumeric() || c.is_whitespace())
50 .flat_map(char::to_lowercase)
51 .collect::<String>()
52 .split_whitespace()
53 .collect::<Vec<_>>()
54 .join(" ")
55}
56
57fn tokenize(s: &str) -> HashSet<String> {
58 s.split_whitespace().map(String::from).collect()
59}
60
61#[cfg(test)]
62mod tests {
63 use super::QueryFingerprint;
64
65 #[test]
66 fn identical_messages_have_similarity_one() {
67 let a = QueryFingerprint::from_message("how does recall work");
68 let b = QueryFingerprint::from_message("how does recall work");
69 let sim = a.similarity(&b);
70 assert!(
71 (sim - 1.0).abs() < f64::EPSILON,
72 "expected similarity 1.0 for identical messages, got {sim}"
73 );
74 }
75
76 #[test]
77 fn empty_message_similarity() {
78 let a = QueryFingerprint::from_message("");
79 let b = QueryFingerprint::from_message("");
80 let sim = a.similarity(&b);
81 assert!(
82 (sim - 1.0).abs() < f64::EPSILON,
83 "expected similarity 1.0 for two empty messages, got {sim}"
84 );
85 }
86
87 #[test]
88 fn different_messages_have_lower_similarity() {
89 let a = QueryFingerprint::from_message("how does recall work in talon");
90 let b = QueryFingerprint::from_message("what is the vault indexing strategy");
91 let sim = a.similarity(&b);
92 assert!(
93 sim < 0.5,
94 "expected similarity < 0.5 for very different messages, got {sim}"
95 );
96 }
97}