1use crate::Kind;
2
3#[derive(Debug)]
5pub struct ScoringConfig {
6 pub weights: Option<ScoringWeights>,
8 pub scoring_fn: fn(&ScoringWeights, &ScoringInputs) -> f64,
10}
11
12impl Default for ScoringConfig {
13 fn default() -> Self {
14 Self {
15 weights: Default::default(),
16 scoring_fn: compute_score,
17 }
18 }
19}
20
21#[derive(Debug, Clone, Copy)]
23pub struct ScoringWeights {
24 pub token_coverage: f64,
26 pub exact_match: f64,
28 pub exact_filename_match: f64,
30 pub exact_stem_match: f64,
32 pub filename_match: f64,
34 pub filename_prefix_match: f64,
36 pub path_prefix_match: f64,
38 pub midword_penalty: f64,
40 pub recency_boost: f64,
42 pub recency_decay: f64,
44 pub kind_file_boost: f64,
46 pub kind_dir_boost: f64,
48 pub proximity_bonus: f64,
50 pub ordering_bonus: f64,
52}
53
54impl Default for ScoringWeights {
55 fn default() -> Self {
56 Self {
57 token_coverage: 30.0,
58 exact_match: 10.0,
59 exact_filename_match: 100.0,
60 exact_stem_match: 70.0,
61 filename_match: 15.0,
62 filename_prefix_match: 50.0,
63 path_prefix_match: 20.0,
64 midword_penalty: 30.0,
65 recency_boost: 10.0,
66 recency_decay: 2.0,
67 kind_file_boost: 2.0,
68 kind_dir_boost: 2.0,
69 proximity_bonus: 20.0,
70 ordering_bonus: 15.0,
71 }
72 }
73}
74
75#[derive(Debug)]
76pub struct ScoringInputs<'a> {
77 pub path: &'a str,
78 pub query_tokens: &'a [String],
79 pub raw_query_tokens: &'a [&'a str],
80 pub last_modified: u64,
81 pub last_accessed: u64,
82 pub kind: Kind,
83 pub now_micros: f64,
84}
85
86pub(crate) fn compute_score(weights: &ScoringWeights, inputs: &ScoringInputs) -> f64 {
87 let normalized = if inputs.path.is_ascii() {
88 inputs.path.to_lowercase()
89 } else {
90 crate::tokenizer::fold_path(inputs.path)
91 };
92
93 let trimmed_path = normalized.trim_end_matches(std::path::MAIN_SEPARATOR);
94
95 let file_name_start_idx = trimmed_path
96 .rfind(std::path::MAIN_SEPARATOR)
97 .map(|i| i + 1)
98 .unwrap_or(0);
99 let mut score = 0.0;
100
101 let mut unique_matched_indices = Vec::new();
102
103 for token in inputs.query_tokens {
105 let t_str = token.as_str();
106
107 let mut is_exact_filename = false;
108 let mut is_exact_stem = false;
109 let mut is_filename_start = false;
110 let mut is_in_filename = false;
111 let mut is_in_path = false;
112 let mut is_exact_word = false;
113 let mut has_any_match = false;
114
115 for (idx, _) in normalized.match_indices(t_str) {
116 has_any_match = true;
117 unique_matched_indices.push(idx); let start_boundary =
120 idx == 0 || !normalized[..idx].chars().last().unwrap().is_alphanumeric();
121 let end_boundary = idx + t_str.len() == normalized.len()
122 || !normalized[idx + t_str.len()..]
123 .chars()
124 .next()
125 .unwrap()
126 .is_alphanumeric();
127
128 if start_boundary {
129 if idx == file_name_start_idx {
130 let end_idx = idx + t_str.len();
131 if end_idx <= trimmed_path.len() {
132 let remainder = &trimmed_path[end_idx..];
133 if remainder.is_empty() {
134 is_exact_filename = true;
135 } else if remainder.starts_with('.') {
136 is_exact_stem = true;
137 } else {
138 is_filename_start = true;
139 }
140 }
141 } else if idx >= file_name_start_idx {
142 is_in_filename = true;
143 } else {
144 is_in_path = true;
145 }
146
147 if end_boundary {
148 is_exact_word = true;
149 }
150 }
151 }
152
153 if is_exact_filename {
154 score += weights.exact_filename_match;
155 } else if is_exact_stem {
156 score += weights.exact_stem_match;
157 } else if is_filename_start {
158 score += weights.filename_prefix_match;
159 } else if is_in_filename {
160 score += weights.filename_match;
161 } else if is_in_path {
162 score += weights.path_prefix_match;
163 } else if has_any_match {
164 score -= weights.midword_penalty;
165 }
166
167 if is_exact_word {
168 score += weights.exact_match;
169 }
170
171 if normalized.ends_with(&format!(".{}", t_str)) {
172 score -= 30.0;
173 }
174 }
175
176 unique_matched_indices.sort_unstable();
178 unique_matched_indices.dedup();
179
180 let path_word_count = trimmed_path
181 .split(|c: char| !c.is_alphanumeric())
182 .filter(|s| !s.is_empty())
183 .count();
184
185 if path_word_count > 0 {
186 let effective_length = (path_word_count as f64).min(8.0);
187 let coverage_ratio = (unique_matched_indices.len() as f64 / effective_length).min(1.0);
188 score += weights.token_coverage * coverage_ratio;
189 }
190
191 let recent_date = inputs.last_modified.max(inputs.last_accessed);
193 let age_days = (inputs.now_micros - recent_date as f64) / (1_000_000.0 * 86_400.0);
194 score += weights.recency_boost - weights.recency_decay * (1.0 + age_days.max(0.0)).ln();
195
196 score += match inputs.kind {
197 Kind::Directory => weights.kind_dir_boost,
198 Kind::File => weights.kind_file_boost,
199 Kind::Symlink => weights.kind_file_boost * 0.5,
200 };
201
202 if inputs.query_tokens.len() > 1 {
204 let mut min_pos = usize::MAX;
205 let mut max_pos = 0;
206 let mut total_token_len = 0;
207 let mut matched_count = 0;
208
209 for q in inputs.query_tokens {
210 if let Some(pos) = normalized.find(q.as_str()) {
211 min_pos = min_pos.min(pos);
212 max_pos = max_pos.max(pos + q.len());
213 total_token_len += q.len();
214 matched_count += 1;
215 }
216 }
217
218 if matched_count > 1 && max_pos > min_pos {
219 let span = max_pos - min_pos;
220 let density = (total_token_len as f64 / span as f64).min(1.0);
221 score += weights.proximity_bonus * density;
222 }
223 }
224
225 if inputs.raw_query_tokens.len() > 1 {
226 let mut last_pos = 0;
227 let mut is_ordered = true;
228
229 for raw_token in inputs.raw_query_tokens {
230 if let Some(pos) = normalized[last_pos..].find(raw_token) {
231 last_pos += pos + raw_token.len();
232 } else {
233 is_ordered = false;
234 break;
235 }
236 }
237
238 if is_ordered {
239 score += weights.ordering_bonus;
240 }
241 }
242
243 score
244}
245
246#[cfg(test)]
247mod tests {
248 use super::*;
249
250 #[test]
251 fn test_compute_score_basic() {
252 let weights = ScoringWeights::default();
253 let query_tokens = vec!["abc".to_string()];
254 let raw_query_tokens = vec!["abc"];
255 let now = 1_000_000.0;
256 let inputs1 = ScoringInputs {
257 path: "abc.txt",
258 query_tokens: &query_tokens,
259 raw_query_tokens: &raw_query_tokens,
260 last_modified: 1_000_000,
261 last_accessed: 1_000_000,
262 kind: Kind::File,
263 now_micros: now,
264 };
265
266 let inputs2 = ScoringInputs {
267 path: "other.txt",
268 query_tokens: &query_tokens,
269 raw_query_tokens: &raw_query_tokens,
270 last_modified: 1_000_000,
271 last_accessed: 1_000_000,
272 kind: Kind::File,
273 now_micros: now,
274 };
275
276 let score1 = compute_score(&weights, &inputs1);
277 let score2 = compute_score(&weights, &inputs2);
278
279 assert!(score1 > score2);
280 }
281
282 #[test]
283 fn test_compute_score_filename_boost() {
284 let config = ScoringWeights::default();
285 let query_tokens = vec!["abc".to_string()];
286 let raw_query_tokens = vec!["abc"];
287 let now = 1_000_000.0;
288 let sep = std::path::MAIN_SEPARATOR_STR;
289
290 let score1 = compute_score(
292 &config,
293 &ScoringInputs {
294 path: &format!("{}foo{}abc{}file.txt", sep, sep, sep),
295 query_tokens: &query_tokens,
296 raw_query_tokens: &raw_query_tokens,
297 last_modified: 1_000_000,
298 last_accessed: 1_000_000,
299 kind: Kind::File,
300 now_micros: now,
301 },
302 );
303 let score2 = compute_score(
304 &config,
305 &ScoringInputs {
306 path: &format!("{}foo{}bar{}abc.txt", sep, sep, sep),
307 query_tokens: &query_tokens,
308 raw_query_tokens: &raw_query_tokens,
309 last_modified: 1_000_000,
310 last_accessed: 1_000_000,
311 kind: Kind::File,
312 now_micros: now,
313 },
314 );
315
316 assert!(score2 > score1);
318 }
319
320 #[test]
321 fn test_compute_score_depth_penalty() {
322 let config = ScoringWeights::default();
323 let query_tokens = vec!["abc".to_string()];
324 let raw_query_tokens = vec!["abc"];
325 let now = 1_000_000.0;
326
327 let sep = std::path::MAIN_SEPARATOR;
328 let path1 = format!("{}abc.txt", sep);
329 let path2 = format!("{}foo{}bar{}baz{}abc.txt", sep, sep, sep, sep);
330
331 let score1 = compute_score(
332 &config,
333 &ScoringInputs {
334 path: &path1,
335 query_tokens: &query_tokens,
336 raw_query_tokens: &raw_query_tokens,
337 last_modified: 1_000_000,
338 last_accessed: 1_000_000,
339 kind: Kind::File,
340 now_micros: now,
341 },
342 );
343 let score2 = compute_score(
344 &config,
345 &ScoringInputs {
346 path: &path2,
347 query_tokens: &query_tokens,
348 raw_query_tokens: &raw_query_tokens,
349 last_modified: 1_000_000,
350 last_accessed: 1_000_000,
351 kind: Kind::File,
352 now_micros: now,
353 },
354 );
355
356 assert!(score1 > score2); }
358
359 #[test]
360 fn test_compute_score_recency() {
361 let config = ScoringWeights::default();
362 let query_tokens = vec!["abc".to_string()];
363 let raw_query_tokens = vec!["abc"];
364 let now = 2_000_000_000_000.0; let score_recent = compute_score(
367 &config,
368 &ScoringInputs {
369 path: "abc.txt",
370 query_tokens: &query_tokens,
371 raw_query_tokens: &raw_query_tokens,
372 last_modified: 1_900_000_000_000,
373 last_accessed: 1_900_000_000_000,
374 kind: Kind::File,
375 now_micros: now,
376 },
377 );
378 let score_old = compute_score(
379 &config,
380 &ScoringInputs {
381 path: "abc.txt",
382 query_tokens: &query_tokens,
383 raw_query_tokens: &raw_query_tokens,
384 last_modified: 1_000_000_000_000,
385 last_accessed: 1_000_000_000_000,
386 kind: Kind::File,
387 now_micros: now,
388 },
389 );
390
391 assert!(score_recent > score_old);
392 }
393
394 #[test]
395 fn test_compute_score_ordering() {
396 let config = ScoringWeights::default();
397 let query_tokens = vec!["foo".to_string(), "bar".to_string()];
398 let raw_query_tokens = vec!["foo", "bar"];
399 let now = 1_000_000.0;
400
401 let score_ordered = compute_score(
402 &config,
403 &ScoringInputs {
404 path: "foo_bar.txt",
405 query_tokens: &query_tokens,
406 raw_query_tokens: &raw_query_tokens,
407 last_modified: 1_000_000,
408 last_accessed: 1_000_000,
409 kind: Kind::File,
410 now_micros: now,
411 },
412 );
413 let score_unordered = compute_score(
414 &config,
415 &ScoringInputs {
416 path: "bar_foo.txt",
417 query_tokens: &query_tokens,
418 raw_query_tokens: &raw_query_tokens,
419 last_modified: 1_000_000,
420 last_accessed: 1_000_000,
421 kind: Kind::File,
422 now_micros: now,
423 },
424 );
425
426 assert!(score_ordered > score_unordered);
427 }
428
429 #[test]
430 #[cfg(windows)]
431 fn test_compute_score_windows_paths() {
432 let weights = ScoringWeights::default();
433 let query_tokens = vec!["report".to_string()];
434 let raw_query_tokens = vec!["report"];
435 let now = 1_000_000.0;
436
437 let score1 = compute_score(
439 &weights,
440 &ScoringInputs {
441 path: "C:\\Users\\joao\\report.pdf",
442 query_tokens: &query_tokens,
443 raw_query_tokens: &raw_query_tokens,
444 last_modified: 1_000_000,
445 last_accessed: 1_000_000,
446 kind: Kind::File,
447 now_micros: now,
448 },
449 );
450
451 let score2 = compute_score(
453 &weights,
454 &ScoringInputs {
455 path: "\\\\?\\D:\\Backup\\report.pdf",
456 query_tokens: &query_tokens,
457 raw_query_tokens: &raw_query_tokens,
458 last_modified: 1_000_000,
459 last_accessed: 1_000_000,
460 kind: Kind::File,
461 now_micros: now,
462 },
463 );
464
465 let score3 = compute_score(
467 &weights,
468 &ScoringInputs {
469 path: "\\\\server\\share\\finance\\report.pdf",
470 query_tokens: &query_tokens,
471 raw_query_tokens: &raw_query_tokens,
472 last_modified: 1_000_000,
473 last_accessed: 1_000_000,
474 kind: Kind::File,
475 now_micros: now,
476 },
477 );
478
479 assert!(score1 > 50.0);
481 assert!(score2 > 50.0);
482 assert!(score3 > 50.0);
483 }
484}