Skip to main content

talon_cli/mcp/session/
fingerprint.rs

1use std::collections::HashSet;
2
3/// Normalized query fingerprint for turn deduplication.
4#[derive(Debug, Clone, PartialEq, Eq)]
5pub struct QueryFingerprint {
6    pub normalized: String,
7    pub token_set: HashSet<String>,
8}
9
10impl QueryFingerprint {
11    #[must_use]
12    pub fn from_message(message: &str) -> Self {
13        let normalized = normalize(message);
14        let token_set = tokenize(&normalized);
15        Self {
16            normalized,
17            token_set,
18        }
19    }
20
21    /// Jaccard similarity in [0.0, 1.0].
22    #[must_use]
23    pub fn similarity(&self, other: &Self) -> f64 {
24        if self.token_set.is_empty() && other.token_set.is_empty() {
25            return 1.0;
26        }
27        let intersection = self.token_set.intersection(&other.token_set).count();
28        let union = self.token_set.union(&other.token_set).count();
29        if union == 0 {
30            1.0
31        } else {
32            #[expect(
33                clippy::cast_precision_loss,
34                reason = "precision loss is acceptable for word-token similarity scores"
35            )]
36            let result = intersection as f64 / union as f64;
37            result
38        }
39    }
40
41    #[must_use]
42    pub fn as_str(&self) -> &str {
43        &self.normalized
44    }
45}
46
47fn normalize(s: &str) -> String {
48    s.chars()
49        .filter(|c| c.is_alphanumeric() || c.is_whitespace())
50        .flat_map(char::to_lowercase)
51        .collect::<String>()
52        .split_whitespace()
53        .collect::<Vec<_>>()
54        .join(" ")
55}
56
57fn tokenize(s: &str) -> HashSet<String> {
58    s.split_whitespace().map(String::from).collect()
59}
60
61#[cfg(test)]
62mod tests {
63    use super::QueryFingerprint;
64
65    #[test]
66    fn identical_messages_have_similarity_one() {
67        let a = QueryFingerprint::from_message("how does recall work");
68        let b = QueryFingerprint::from_message("how does recall work");
69        let sim = a.similarity(&b);
70        assert!(
71            (sim - 1.0).abs() < f64::EPSILON,
72            "expected similarity 1.0 for identical messages, got {sim}"
73        );
74    }
75
76    #[test]
77    fn empty_message_similarity() {
78        let a = QueryFingerprint::from_message("");
79        let b = QueryFingerprint::from_message("");
80        let sim = a.similarity(&b);
81        assert!(
82            (sim - 1.0).abs() < f64::EPSILON,
83            "expected similarity 1.0 for two empty messages, got {sim}"
84        );
85    }
86
87    #[test]
88    fn different_messages_have_lower_similarity() {
89        let a = QueryFingerprint::from_message("how does recall work in talon");
90        let b = QueryFingerprint::from_message("what is the vault indexing strategy");
91        let sim = a.similarity(&b);
92        assert!(
93            sim < 0.5,
94            "expected similarity < 0.5 for very different messages, got {sim}"
95        );
96    }
97}