1use std::collections::HashSet;
2
3#[derive(Debug, Clone, Copy, PartialEq, Eq)]
5pub enum HeatmapBucket {
6 None,
7 Step1,
8 Step2,
9 Step3,
10 Step4,
11 Step5,
12 Step6,
13 Step7,
14 Step8,
15}
16
17impl HeatmapBucket {
18 pub fn from_score(score: f32) -> Self {
20 if score >= 0.875 {
21 HeatmapBucket::Step8
22 } else if score >= 0.75 {
23 HeatmapBucket::Step7
24 } else if score >= 0.625 {
25 HeatmapBucket::Step6
26 } else if score >= 0.5 {
27 HeatmapBucket::Step5
28 } else if score >= 0.375 {
29 HeatmapBucket::Step4
30 } else if score >= 0.25 {
31 HeatmapBucket::Step3
32 } else if score >= 0.125 {
33 HeatmapBucket::Step2
34 } else if score > 0.0 {
35 HeatmapBucket::Step1
36 } else {
37 HeatmapBucket::None
38 }
39 }
40
41 pub fn rgb(self) -> Option<(u8, u8, u8)> {
43 match self {
44 HeatmapBucket::None => None,
45 HeatmapBucket::Step1 => Some((180, 180, 180)),
46 HeatmapBucket::Step2 => Some((140, 140, 140)),
47 HeatmapBucket::Step3 => Some((100, 130, 100)),
48 HeatmapBucket::Step4 => Some((50, 120, 80)),
49 HeatmapBucket::Step5 => Some((0, 140, 60)),
50 HeatmapBucket::Step6 => Some((0, 160, 70)),
51 HeatmapBucket::Step7 => Some((0, 180, 80)),
52 HeatmapBucket::Step8 => Some((0, 255, 100)),
53 }
54 }
55
56 pub fn is_bold(self) -> bool {
58 matches!(self, HeatmapBucket::Step8)
59 }
60}
61
62pub fn split_into_tokens(text: &str) -> Vec<String> {
65 let mut tokens = Vec::new();
66 let mut current_token = String::new();
67
68 for ch in text.chars() {
69 match ch {
70 ' ' | '\t' | '\n' => {
71 if !current_token.is_empty() {
72 tokens.push(current_token.clone());
73 current_token.clear();
74 }
75 tokens.push(ch.to_string());
76 }
77 '(' | ')' | '[' | ']' | '{' | '}' | ',' | ';' | ':' | '.' | '!' | '?' => {
78 if !current_token.is_empty() {
79 tokens.push(current_token.clone());
80 current_token.clear();
81 }
82 tokens.push(ch.to_string());
83 }
84 _ => current_token.push(ch),
85 }
86 }
87
88 if !current_token.is_empty() {
89 tokens.push(current_token);
90 }
91
92 tokens
93}
94
95pub fn calculate_token_similarity(token: &str, pattern: &str) -> f32 {
98 if token.trim().is_empty() || token.chars().all(|c| !c.is_alphanumeric()) {
99 return 0.0;
100 }
101
102 let token_lower = token.to_lowercase();
103 let pattern_lower = pattern.to_lowercase();
104
105 if token_lower == pattern_lower {
106 return 1.0;
107 }
108
109 let pattern_words: Vec<&str> = pattern_lower.split_whitespace().collect();
110 let mut max_score: f32 = 0.0;
111
112 for pattern_word in &pattern_words {
113 if pattern_word.len() < 3 {
114 continue;
115 }
116
117 if token_lower == *pattern_word {
118 max_score = max_score.max(0.9);
119 } else if token_lower.contains(pattern_word) {
120 let ratio = pattern_word.len() as f32 / token_lower.len() as f32;
121 max_score = max_score.max(0.6 * ratio);
122 } else if pattern_word.contains(&token_lower) && token_lower.len() >= 3 {
123 let ratio = token_lower.len() as f32 / pattern_word.len() as f32;
124 max_score = max_score.max(0.5 * ratio);
125 } else {
126 let similarity = calculate_fuzzy_similarity(&token_lower, pattern_word);
127 max_score = max_score.max(similarity * 0.4);
128 }
129 }
130
131 max_score
132}
133
134fn calculate_fuzzy_similarity(s1: &str, s2: &str) -> f32 {
135 if s1.is_empty() || s2.is_empty() || s1.len() < 3 || s2.len() < 3 {
136 return 0.0;
137 }
138
139 let len1 = s1.len();
140 let len2 = s2.len();
141 let max_len = len1.max(len2);
142
143 let s1_chars: HashSet<char> = s1.chars().collect();
144 let s2_chars: HashSet<char> = s2.chars().collect();
145 let common_chars = s1_chars.intersection(&s2_chars).count();
146
147 common_chars as f32 / max_len as f32
148}
149
150#[cfg(test)]
151mod tests {
152 use super::*;
153
154 #[test]
155 fn tokenisation_preserves_spacing_and_punctuation() {
156 let tokens = split_into_tokens("fn main() {\n println!(\"hello\");\n}");
157 assert_eq!(
159 tokens,
160 vec![
161 "fn".to_string(),
162 " ".to_string(),
163 "main".to_string(),
164 "(".to_string(),
165 ")".to_string(),
166 " ".to_string(),
167 "{".to_string(),
168 "\n".to_string(),
169 " ".to_string(),
170 " ".to_string(),
171 " ".to_string(),
172 " ".to_string(),
173 "println".to_string(),
174 "!".to_string(),
175 "(".to_string(),
176 "\"hello\"".to_string(),
177 ")".to_string(),
178 ";".to_string(),
179 "\n".to_string(),
180 "}".to_string(),
181 ]
182 );
183 }
184
185 #[test]
186 fn similarity_scores_expected_patterns() {
187 assert_eq!(calculate_token_similarity("hello", "hello"), 1.0);
188 assert!(calculate_token_similarity("hello", "hell") > 0.0);
189 assert_eq!(calculate_token_similarity("{", "hello"), 0.0);
190 }
191
192 #[test]
193 fn heatmap_bucket_mapping_matches_thresholds() {
194 assert_eq!(HeatmapBucket::from_score(0.0), HeatmapBucket::None);
195 assert_eq!(HeatmapBucket::from_score(0.01), HeatmapBucket::Step1);
196 assert_eq!(HeatmapBucket::from_score(0.2), HeatmapBucket::Step2);
197 assert_eq!(HeatmapBucket::from_score(0.3), HeatmapBucket::Step3);
198 assert_eq!(HeatmapBucket::from_score(0.4), HeatmapBucket::Step4);
199 assert_eq!(HeatmapBucket::from_score(0.5), HeatmapBucket::Step5);
200 assert_eq!(HeatmapBucket::from_score(0.7), HeatmapBucket::Step6);
201 assert_eq!(HeatmapBucket::from_score(0.8), HeatmapBucket::Step7);
202 assert_eq!(HeatmapBucket::from_score(0.9), HeatmapBucket::Step8);
203 }
204
205 #[test]
206 fn bucket_rgb_matches_expected_values() {
207 assert_eq!(HeatmapBucket::Step1.rgb(), Some((180, 180, 180)));
208 assert_eq!(HeatmapBucket::Step8.rgb(), Some((0, 255, 100)));
209 assert!(HeatmapBucket::None.rgb().is_none());
210 }
211}