1use crate::db_healthcheck::DbHealthChecker;
2use crate::file_picker::FFFMode;
3use crate::{SharedFrecency, error::Error, git::is_modified_status};
4use heed::{Database, Env, EnvOpenOptions};
5use heed::{
6 EnvFlags,
7 types::{Bytes, SerdeBincode},
8};
9use std::fs;
10use std::path::PathBuf;
11use std::time::{SystemTime, UNIX_EPOCH};
12use std::{collections::VecDeque, path::Path};
13
14const DECAY_CONSTANT: f64 = 0.0693; const SECONDS_PER_DAY: f64 = 86400.0;
16const MAX_HISTORY_DAYS: f64 = 30.0; const AI_DECAY_CONSTANT: f64 = 0.231; const AI_MAX_HISTORY_DAYS: f64 = 7.0; #[derive(Debug)]
23pub struct FrecencyTracker {
24 env: Env,
25 db: Database<Bytes, SerdeBincode<VecDeque<u64>>>,
26}
27
28const MODIFICATION_THRESHOLDS: [(i64, u64); 5] = [
29 (16, 60 * 2), (8, 60 * 15), (4, 60 * 60), (2, 60 * 60 * 24), (1, 60 * 60 * 24 * 7), ];
35
36const AI_MODIFICATION_THRESHOLDS: [(i64, u64); 5] = [
38 (16, 30), (8, 60 * 5), (4, 60 * 15), (2, 60 * 60), (1, 60 * 60 * 4), ];
44
45impl DbHealthChecker for FrecencyTracker {
46 fn get_env(&self) -> &heed::Env {
47 &self.env
48 }
49
50 fn count_entries(&self) -> Result<Vec<(&'static str, u64)>, Error> {
51 let rtxn = self.env.read_txn().map_err(Error::DbStartReadTxn)?;
52 let count = self.db.len(&rtxn).map_err(Error::DbRead)?;
53
54 Ok(vec![("absolute_frecency_entries", count)])
55 }
56}
57
58impl FrecencyTracker {
59 pub fn new(db_path: &str, use_unsafe_no_lock: bool) -> Result<Self, Error> {
60 fs::create_dir_all(db_path).map_err(Error::CreateDir)?;
61 let env = unsafe {
62 let mut opts = EnvOpenOptions::new();
63 opts.map_size(24 * 1024 * 1024); if use_unsafe_no_lock {
65 opts.flags(EnvFlags::NO_LOCK | EnvFlags::NO_SYNC | EnvFlags::NO_META_SYNC);
66 }
67 opts.open(db_path).map_err(Error::EnvOpen)?
68 };
69 env.clear_stale_readers()
70 .map_err(Error::DbClearStaleReaders)?;
71
72 let mut wtxn = env.write_txn().map_err(Error::DbStartWriteTxn)?;
74 let db = env
75 .create_database(&mut wtxn, None)
76 .map_err(Error::DbCreate)?;
77
78 Ok(FrecencyTracker {
79 db,
80 env: env.clone(),
81 })
82 }
83
84 pub fn spawn_gc(shared: SharedFrecency, db_path: String, use_unsafe_no_lock: bool) {
91 std::thread::Builder::new()
92 .name("fff-frecency-gc".into())
93 .spawn(move || Self::run_frecency_gc(shared, db_path, use_unsafe_no_lock))
94 .ok();
95 }
96
97 #[tracing::instrument(skip(shared), fields(db_path = %db_path))]
98 fn run_frecency_gc(shared: SharedFrecency, db_path: String, use_unsafe_no_lock: bool) {
99 let start = std::time::Instant::now();
100 let data_path = PathBuf::from(&db_path).join("data.mdb");
101
102 let (deleted, pruned) = {
106 let guard = match shared.read() {
107 Ok(g) => g,
108 Err(e) => {
109 tracing::debug!("Failed to acquire read lock: {e}");
110 return;
111 }
112 };
113 let Some(ref tracker) = *guard else {
114 return;
115 };
116 match tracker.purge_stale_entries() {
117 Ok(result) => result,
118 Err(e) => {
119 tracing::debug!("Purge failed: {e}");
120 return;
121 }
122 }
123 };
124
125 if deleted > 0 || pruned > 0 {
126 tracing::info!(deleted, pruned, elapsed = ?start.elapsed(), "Frecency GC purged entries");
127 }
128
129 let file_size = fs::metadata(&data_path).map(|m| m.len()).unwrap_or(0);
131 if deleted == 0 && pruned == 0 && file_size <= 512 * 1024 {
132 return;
133 }
134
135 let mut guard = match shared.write() {
137 Ok(g) => g,
138 Err(e) => {
139 tracing::debug!("Failed to acquire write lock: {e}");
140 return;
141 }
142 };
143
144 let entries: Vec<(Vec<u8>, VecDeque<u64>)> = match guard.as_ref() {
146 Some(tracker) => {
147 let rtxn = match tracker.env.read_txn() {
148 Ok(t) => t,
149 Err(e) => {
150 tracing::debug!("Compaction read_txn failed: {e}");
151 return;
152 }
153 };
154 let iter = match tracker.db.iter(&rtxn) {
155 Ok(i) => i,
156 Err(e) => {
157 tracing::debug!("Compaction iter failed: {e}");
158 return;
159 }
160 };
161 let mut entries = Vec::new();
162 let mut read_errors = 0u32;
163 for result in iter {
164 match result {
165 Ok((key, value)) => entries.push((key.to_vec(), value)),
166 Err(_) => read_errors += 1,
167 }
168 }
169 if read_errors > 0 {
170 tracing::warn!(
171 read_errors,
172 "Skipped corrupted entries during compaction read"
173 );
174 }
175 entries
176 }
177 None => return,
178 };
179
180 *guard = None;
182
183 let lock_path = PathBuf::from(&db_path).join("lock.mdb");
184 let _ = fs::remove_file(&data_path);
185 let _ = fs::remove_file(&lock_path);
186
187 let tracker = match FrecencyTracker::new(&db_path, use_unsafe_no_lock) {
188 Ok(t) => t,
189 Err(e) => {
190 tracing::error!("Compaction reopen failed, frecency disabled: {e}");
191 return;
192 }
193 };
194
195 let write_result = (|| -> std::result::Result<(), heed::Error> {
196 let mut wtxn = tracker.env.write_txn()?;
197 for (key, value) in &entries {
198 tracker.db.put(&mut wtxn, key.as_slice(), value)?;
199 }
200 wtxn.commit()?;
201 Ok(())
202 })();
203
204 match write_result {
205 Ok(()) => {
206 let new_size = fs::metadata(&data_path).map(|m| m.len()).unwrap_or(0);
207 *guard = Some(tracker);
208 tracing::debug!(
209 entries = entries.len(),
210 old_size = file_size,
211 new_size,
212 elapsed = ?start.elapsed(),
213 "Frecency DB compacted"
214 );
215 }
216 Err(e) => {
217 tracing::error!("Compaction write failed, frecency data may be incomplete: {e}");
218 *guard = Some(tracker);
219 }
220 }
221 }
222
223 fn purge_stale_entries(&self) -> Result<(usize, usize), Error> {
227 let now = self.get_now();
228 let cutoff_time = now.saturating_sub((MAX_HISTORY_DAYS * SECONDS_PER_DAY) as u64);
229
230 let rtxn = self.env.read_txn().map_err(Error::DbStartReadTxn)?;
232 let mut to_delete: Vec<Vec<u8>> = Vec::new();
233 let mut to_update: Vec<(Vec<u8>, VecDeque<u64>)> = Vec::new();
234
235 let iter = self.db.iter(&rtxn).map_err(Error::DbRead)?;
236 for result in iter {
237 let (key, accesses) = result.map_err(Error::DbRead)?;
238
239 let fresh_start = accesses.iter().position(|&ts| ts >= cutoff_time);
242 match fresh_start {
243 None => {
244 to_delete.push(key.to_vec());
246 }
247 Some(0) => {
248 }
250 Some(start) => {
251 let pruned: VecDeque<u64> = accesses.iter().skip(start).copied().collect();
253 to_update.push((key.to_vec(), pruned));
254 }
255 }
256 }
257 drop(rtxn);
258
259 if to_delete.is_empty() && to_update.is_empty() {
260 return Ok((0, 0));
261 }
262
263 let mut wtxn = self.env.write_txn().map_err(Error::DbStartWriteTxn)?;
265 for key in &to_delete {
266 self.db.delete(&mut wtxn, key).map_err(Error::DbWrite)?;
267 }
268 for (key, accesses) in &to_update {
269 self.db
270 .put(&mut wtxn, key, accesses)
271 .map_err(Error::DbWrite)?;
272 }
273 wtxn.commit().map_err(Error::DbCommit)?;
274
275 Ok((to_delete.len(), to_update.len()))
276 }
277
278 fn get_accesses(&self, path: &Path) -> Result<Option<VecDeque<u64>>, Error> {
279 let rtxn = self.env.read_txn().map_err(Error::DbStartReadTxn)?;
280
281 let key_hash = Self::path_to_hash_bytes(path)?;
282 self.db.get(&rtxn, &key_hash).map_err(Error::DbRead)
283 }
284
285 fn get_now(&self) -> u64 {
286 SystemTime::now()
287 .duration_since(UNIX_EPOCH)
288 .unwrap()
289 .as_secs()
290 }
291
292 fn path_to_hash_bytes(path: &Path) -> Result<[u8; 32], Error> {
293 let Some(key) = path.to_str() else {
294 return Err(Error::InvalidPath(path.to_path_buf()));
295 };
296
297 Ok(*blake3::hash(key.as_bytes()).as_bytes())
298 }
299
300 pub fn seconds_since_last_access(&self, path: &Path) -> Result<Option<u64>, Error> {
303 let accesses = self.get_accesses(path)?;
304 let last = accesses.and_then(|a| a.back().copied());
305 Ok(last.map(|ts| self.get_now().saturating_sub(ts)))
306 }
307
308 pub fn track_access(&self, path: &Path) -> Result<(), Error> {
309 let mut wtxn = self.env.write_txn().map_err(Error::DbStartWriteTxn)?;
310
311 let key_hash = Self::path_to_hash_bytes(path)?;
312 let mut accesses = self.get_accesses(path)?.unwrap_or_default();
313
314 let now = self.get_now();
315 let cutoff_time = now.saturating_sub((MAX_HISTORY_DAYS * SECONDS_PER_DAY) as u64);
316 while let Some(&front_time) = accesses.front() {
317 if front_time < cutoff_time {
318 accesses.pop_front();
319 } else {
320 break;
321 }
322 }
323
324 accesses.push_back(now);
325 tracing::debug!(?path, accesses = accesses.len(), "Tracking access");
326
327 self.db
328 .put(&mut wtxn, &key_hash, &accesses)
329 .map_err(Error::DbWrite)?;
330
331 wtxn.commit().map_err(Error::DbCommit)?;
332
333 Ok(())
334 }
335
336 pub fn get_access_score(&self, file_path: &Path, mode: FFFMode) -> i64 {
337 let accesses = self
338 .get_accesses(file_path)
339 .ok()
340 .flatten()
341 .unwrap_or_default();
342
343 if accesses.is_empty() {
344 return 0;
345 }
346
347 let decay_constant = if mode.is_ai() {
348 AI_DECAY_CONSTANT
349 } else {
350 DECAY_CONSTANT
351 };
352 let max_history_days = if mode.is_ai() {
353 AI_MAX_HISTORY_DAYS
354 } else {
355 MAX_HISTORY_DAYS
356 };
357
358 let now = self.get_now();
359 let mut total_frecency = 0.0;
360
361 let cutoff_time = now.saturating_sub((max_history_days * SECONDS_PER_DAY) as u64);
362
363 for &access_time in accesses.iter().rev() {
364 if access_time < cutoff_time {
365 break; }
367
368 let days_ago = (now.saturating_sub(access_time) as f64) / SECONDS_PER_DAY;
369 let decay_factor = (-decay_constant * days_ago).exp();
370 total_frecency += decay_factor;
371 }
372
373 let normalized_frecency = if total_frecency <= 10.0 {
374 total_frecency
375 } else {
376 10.0 + (total_frecency - 10.0).sqrt() };
378
379 normalized_frecency.round() as i64
380 }
381
382 pub fn get_modification_score(
384 &self,
385 modified_time: u64,
386 git_status: Option<git2::Status>,
387 mode: FFFMode,
388 ) -> i64 {
389 let is_modified_git_status = git_status.is_some_and(is_modified_status);
390 if !is_modified_git_status {
391 return 0;
392 }
393
394 let thresholds = if mode.is_ai() {
395 &AI_MODIFICATION_THRESHOLDS
396 } else {
397 &MODIFICATION_THRESHOLDS
398 };
399
400 let now = self.get_now();
401 let duration_since = now.saturating_sub(modified_time);
402
403 for i in 0..thresholds.len() {
404 let (current_points, current_threshold) = thresholds[i];
405
406 if duration_since <= current_threshold {
407 if i == 0 || duration_since == current_threshold {
408 return current_points;
409 }
410
411 let (prev_points, prev_threshold) = thresholds[i - 1];
412
413 let time_range = current_threshold - prev_threshold;
414 let time_offset = duration_since - prev_threshold;
415 let points_diff = prev_points - current_points;
416
417 let interpolated_score =
418 prev_points - (points_diff * time_offset as i64) / time_range as i64;
419
420 return interpolated_score;
421 }
422 }
423
424 0
425 }
426}
427
428#[cfg(test)]
429mod tests {
430 use super::*;
431 use crate::file_picker::FFFMode;
432
433 fn calculate_test_frecency_score(access_timestamps: &[u64], current_time: u64) -> i64 {
434 let mut total_frecency = 0.0;
435
436 for &access_time in access_timestamps {
437 let days_ago = (current_time.saturating_sub(access_time) as f64) / SECONDS_PER_DAY;
438 let decay_factor = (-DECAY_CONSTANT * days_ago).exp();
439 total_frecency += decay_factor;
440 }
441
442 let normalized_frecency = if total_frecency <= 20.0 {
443 total_frecency
444 } else {
445 20.0 + (total_frecency - 10.0).sqrt()
446 };
447
448 normalized_frecency.round() as i64
449 }
450
451 #[test]
452 fn test_frecency_calculation() {
453 let current_time = 1000000000; let score = calculate_test_frecency_score(&[], current_time);
456 assert_eq!(score, 0);
457
458 let accesses = [current_time]; let score = calculate_test_frecency_score(&accesses, current_time);
460 assert_eq!(score, 1); let ten_days_seconds = 10 * 86400; let accesses = [current_time - ten_days_seconds];
464 let score = calculate_test_frecency_score(&accesses, current_time);
465 assert_eq!(score, 1); let accesses = [
468 current_time, current_time - 86400, current_time - 172800, ];
472 let score = calculate_test_frecency_score(&accesses, current_time);
473 assert!(score > 2 && score < 4, "Score: {}", score); let thirty_days = 30 * 86400;
476 let accesses = [current_time - thirty_days]; let score = calculate_test_frecency_score(&accesses, current_time);
478 assert!(
479 score < 2,
480 "Old access should have minimal score, got: {}",
481 score
482 );
483
484 let recent_frequent = [current_time, current_time - 86400, current_time - 172800];
485 let old_single = [current_time - ten_days_seconds];
486
487 let recent_score = calculate_test_frecency_score(&recent_frequent, current_time);
488 let old_score = calculate_test_frecency_score(&old_single, current_time);
489
490 assert!(
491 recent_score > old_score,
492 "Recent frequent access ({}) should score higher than old single access ({})",
493 recent_score,
494 old_score
495 );
496 }
497
498 #[test]
499 fn test_modification_score_interpolation() {
500 let temp_dir = std::env::temp_dir().join("fff_test_interpolation");
501 let _ = std::fs::remove_dir_all(&temp_dir);
502 let tracker = FrecencyTracker::new(temp_dir.to_str().unwrap(), true).unwrap();
503
504 let current_time = tracker.get_now();
505 let git_status = Some(git2::Status::WT_MODIFIED);
506
507 let five_minutes_ago = current_time - (5 * 60);
509 let score = tracker.get_modification_score(five_minutes_ago, git_status, FFFMode::Neovim);
510
511 assert_eq!(score, 15, "5 minutes should interpolate to 15 points");
514
515 let two_minutes_ago = current_time - (2 * 60);
516 let score = tracker.get_modification_score(two_minutes_ago, git_status, FFFMode::Neovim);
517 assert_eq!(score, 16, "2 minutes should be exactly 16 points");
518
519 let fifteen_minutes_ago = current_time - (15 * 60);
520 let score =
521 tracker.get_modification_score(fifteen_minutes_ago, git_status, FFFMode::Neovim);
522 assert_eq!(score, 8, "15 minutes should be exactly 8 points");
523
524 let twelve_hours_ago = current_time - (12 * 60 * 60);
526 let score = tracker.get_modification_score(twelve_hours_ago, git_status, FFFMode::Neovim);
527 assert_eq!(score, 4, "12 hours should interpolate to 4 points");
530
531 let eighteen_hours_ago = current_time - (18 * 60 * 60);
533 let score = tracker.get_modification_score(eighteen_hours_ago, git_status, FFFMode::Neovim);
534 assert_eq!(score, 3, "18 hours should interpolate to 3 points");
536
537 let score = tracker.get_modification_score(five_minutes_ago, None, FFFMode::Neovim);
538 assert_eq!(score, 0, "No git status should return 0");
539
540 let _ = std::fs::remove_dir_all(&temp_dir);
541 }
542}