ck_core/
heatmap.rs

1use std::collections::HashSet;
2
3/// Represents the gradient band for semantic heatmap scores.
4#[derive(Debug, Clone, Copy, PartialEq, Eq)]
5pub enum HeatmapBucket {
6    None,
7    Step1,
8    Step2,
9    Step3,
10    Step4,
11    Step5,
12    Step6,
13    Step7,
14    Step8,
15}
16
17impl HeatmapBucket {
18    /// Map a similarity score (0.0..=1.0) to a colour bucket.
19    pub fn from_score(score: f32) -> Self {
20        if score >= 0.875 {
21            HeatmapBucket::Step8
22        } else if score >= 0.75 {
23            HeatmapBucket::Step7
24        } else if score >= 0.625 {
25            HeatmapBucket::Step6
26        } else if score >= 0.5 {
27            HeatmapBucket::Step5
28        } else if score >= 0.375 {
29            HeatmapBucket::Step4
30        } else if score >= 0.25 {
31            HeatmapBucket::Step3
32        } else if score >= 0.125 {
33            HeatmapBucket::Step2
34        } else if score > 0.0 {
35            HeatmapBucket::Step1
36        } else {
37            HeatmapBucket::None
38        }
39    }
40
41    /// RGB colour that should be used for the bucket, if any.
42    pub fn rgb(self) -> Option<(u8, u8, u8)> {
43        match self {
44            HeatmapBucket::None => None,
45            HeatmapBucket::Step1 => Some((180, 180, 180)),
46            HeatmapBucket::Step2 => Some((140, 140, 140)),
47            HeatmapBucket::Step3 => Some((100, 130, 100)),
48            HeatmapBucket::Step4 => Some((50, 120, 80)),
49            HeatmapBucket::Step5 => Some((0, 140, 60)),
50            HeatmapBucket::Step6 => Some((0, 160, 70)),
51            HeatmapBucket::Step7 => Some((0, 180, 80)),
52            HeatmapBucket::Step8 => Some((0, 255, 100)),
53        }
54    }
55
56    /// Whether the bucket should receive a bold style for additional emphasis.
57    pub fn is_bold(self) -> bool {
58        matches!(self, HeatmapBucket::Step8)
59    }
60}
61
62/// Split text into meaningful tokens for heatmap highlighting, preserving spacing
63/// and punctuation as discrete tokens so coloured output lines up with the original input.
64pub fn split_into_tokens(text: &str) -> Vec<String> {
65    let mut tokens = Vec::new();
66    let mut current_token = String::new();
67
68    for ch in text.chars() {
69        match ch {
70            ' ' | '\t' | '\n' => {
71                if !current_token.is_empty() {
72                    tokens.push(current_token.clone());
73                    current_token.clear();
74                }
75                tokens.push(ch.to_string());
76            }
77            '(' | ')' | '[' | ']' | '{' | '}' | ',' | ';' | ':' | '.' | '!' | '?' => {
78                if !current_token.is_empty() {
79                    tokens.push(current_token.clone());
80                    current_token.clear();
81                }
82                tokens.push(ch.to_string());
83            }
84            _ => current_token.push(ch),
85        }
86    }
87
88    if !current_token.is_empty() {
89        tokens.push(current_token);
90    }
91
92    tokens
93}
94
95/// Calculate a similarity score between an individual token and the raw query text.
96/// Whitespace and punctuation tokens are ignored and score 0.0.
97pub fn calculate_token_similarity(token: &str, pattern: &str) -> f32 {
98    if token.trim().is_empty() || token.chars().all(|c| !c.is_alphanumeric()) {
99        return 0.0;
100    }
101
102    let token_lower = token.to_lowercase();
103    let pattern_lower = pattern.to_lowercase();
104
105    if token_lower == pattern_lower {
106        return 1.0;
107    }
108
109    let pattern_words: Vec<&str> = pattern_lower.split_whitespace().collect();
110    let mut max_score: f32 = 0.0;
111
112    for pattern_word in &pattern_words {
113        if pattern_word.len() < 3 {
114            continue;
115        }
116
117        if token_lower == *pattern_word {
118            max_score = max_score.max(0.9);
119        } else if token_lower.contains(pattern_word) {
120            let ratio = pattern_word.len() as f32 / token_lower.len() as f32;
121            max_score = max_score.max(0.6 * ratio);
122        } else if pattern_word.contains(&token_lower) && token_lower.len() >= 3 {
123            let ratio = token_lower.len() as f32 / pattern_word.len() as f32;
124            max_score = max_score.max(0.5 * ratio);
125        } else {
126            let similarity = calculate_fuzzy_similarity(&token_lower, pattern_word);
127            max_score = max_score.max(similarity * 0.4);
128        }
129    }
130
131    max_score
132}
133
134fn calculate_fuzzy_similarity(s1: &str, s2: &str) -> f32 {
135    if s1.is_empty() || s2.is_empty() || s1.len() < 3 || s2.len() < 3 {
136        return 0.0;
137    }
138
139    let len1 = s1.len();
140    let len2 = s2.len();
141    let max_len = len1.max(len2);
142
143    let s1_chars: HashSet<char> = s1.chars().collect();
144    let s2_chars: HashSet<char> = s2.chars().collect();
145    let common_chars = s1_chars.intersection(&s2_chars).count();
146
147    common_chars as f32 / max_len as f32
148}
149
150#[cfg(test)]
151mod tests {
152    use super::*;
153
154    #[test]
155    fn tokenisation_preserves_spacing_and_punctuation() {
156        let tokens = split_into_tokens("fn main() {\n    println!(\"hello\");\n}");
157        // Each space is its own token to enable independent highlighting
158        assert_eq!(
159            tokens,
160            vec![
161                "fn".to_string(),
162                " ".to_string(),
163                "main".to_string(),
164                "(".to_string(),
165                ")".to_string(),
166                " ".to_string(),
167                "{".to_string(),
168                "\n".to_string(),
169                " ".to_string(),
170                " ".to_string(),
171                " ".to_string(),
172                " ".to_string(),
173                "println".to_string(),
174                "!".to_string(),
175                "(".to_string(),
176                "\"hello\"".to_string(),
177                ")".to_string(),
178                ";".to_string(),
179                "\n".to_string(),
180                "}".to_string(),
181            ]
182        );
183    }
184
185    #[test]
186    fn similarity_scores_expected_patterns() {
187        assert_eq!(calculate_token_similarity("hello", "hello"), 1.0);
188        assert!(calculate_token_similarity("hello", "hell") > 0.0);
189        assert_eq!(calculate_token_similarity("{", "hello"), 0.0);
190    }
191
192    #[test]
193    fn heatmap_bucket_mapping_matches_thresholds() {
194        assert_eq!(HeatmapBucket::from_score(0.0), HeatmapBucket::None);
195        assert_eq!(HeatmapBucket::from_score(0.01), HeatmapBucket::Step1);
196        assert_eq!(HeatmapBucket::from_score(0.2), HeatmapBucket::Step2);
197        assert_eq!(HeatmapBucket::from_score(0.3), HeatmapBucket::Step3);
198        assert_eq!(HeatmapBucket::from_score(0.4), HeatmapBucket::Step4);
199        assert_eq!(HeatmapBucket::from_score(0.5), HeatmapBucket::Step5);
200        assert_eq!(HeatmapBucket::from_score(0.7), HeatmapBucket::Step6);
201        assert_eq!(HeatmapBucket::from_score(0.8), HeatmapBucket::Step7);
202        assert_eq!(HeatmapBucket::from_score(0.9), HeatmapBucket::Step8);
203    }
204
205    #[test]
206    fn bucket_rgb_matches_expected_values() {
207        assert_eq!(HeatmapBucket::Step1.rgb(), Some((180, 180, 180)));
208        assert_eq!(HeatmapBucket::Step8.rgb(), Some((0, 255, 100)));
209        assert!(HeatmapBucket::None.rgb().is_none());
210    }
211}