1use crate::db_healthcheck::DbHealthChecker;
2use crate::error::{Error, Result};
3use crate::file_picker::FFFMode;
4use crate::git::is_modified_status;
5use crate::shared::SharedFrecency;
6use heed::{Database, Env, EnvOpenOptions};
7use heed::{
8 EnvFlags,
9 types::{Bytes, SerdeBincode},
10};
11use std::fs;
12use std::path::PathBuf;
13use std::time::{SystemTime, UNIX_EPOCH};
14use std::{collections::VecDeque, path::Path};
15
16const DECAY_CONSTANT: f64 = 0.0693; const SECONDS_PER_DAY: f64 = 86400.0;
18const MAX_HISTORY_DAYS: f64 = 30.0; const AI_DECAY_CONSTANT: f64 = 0.231; const AI_MAX_HISTORY_DAYS: f64 = 7.0; #[derive(Debug)]
25pub struct FrecencyTracker {
26 env: Env,
27 db: Database<Bytes, SerdeBincode<VecDeque<u64>>>,
28}
29
30const MODIFICATION_THRESHOLDS: [(i64, u64); 5] = [
31 (16, 60 * 2), (8, 60 * 15), (4, 60 * 60), (2, 60 * 60 * 24), (1, 60 * 60 * 24 * 7), ];
37
38const AI_MODIFICATION_THRESHOLDS: [(i64, u64); 5] = [
40 (16, 30), (8, 60 * 5), (4, 60 * 15), (2, 60 * 60), (1, 60 * 60 * 4), ];
46
47impl DbHealthChecker for FrecencyTracker {
48 fn get_env(&self) -> &heed::Env {
49 &self.env
50 }
51
52 fn count_entries(&self) -> Result<Vec<(&'static str, u64)>> {
53 let rtxn = self.env.read_txn().map_err(Error::DbStartReadTxn)?;
54 let count = self.db.len(&rtxn).map_err(Error::DbRead)?;
55
56 Ok(vec![("absolute_frecency_entries", count)])
57 }
58}
59
60impl FrecencyTracker {
61 pub fn db_path(&self) -> &Path {
63 self.env.path()
64 }
65
66 pub fn new(db_path: impl AsRef<Path>, use_unsafe_no_lock: bool) -> Result<Self> {
67 let db_path = db_path.as_ref();
68 fs::create_dir_all(db_path).map_err(Error::CreateDir)?;
69
70 let env = unsafe {
71 let mut opts = EnvOpenOptions::new();
72 opts.map_size(24 * 1024 * 1024); if use_unsafe_no_lock {
74 opts.flags(EnvFlags::NO_LOCK | EnvFlags::NO_SYNC | EnvFlags::NO_META_SYNC);
75 }
76 opts.open(db_path).map_err(Error::EnvOpen)?
77 };
78 env.clear_stale_readers()
79 .map_err(Error::DbClearStaleReaders)?;
80
81 let rtxn = env.read_txn().map_err(Error::DbStartReadTxn)?;
86 let maybe_db: Option<Database<Bytes, SerdeBincode<VecDeque<u64>>>> =
87 env.open_database(&rtxn, None).map_err(Error::DbOpen)?;
88
89 drop(rtxn);
90
91 let db = match maybe_db {
92 Some(db) => db,
93 None => {
94 let mut wtxn = env.write_txn().map_err(Error::DbStartWriteTxn)?;
96 let db = env
97 .create_database(&mut wtxn, None)
98 .map_err(Error::DbCreate)?;
99 wtxn.commit().map_err(Error::DbCommit)?;
100 db
101 }
102 };
103
104 Ok(FrecencyTracker {
105 db,
106 env: env.clone(),
107 })
108 }
109
110 pub fn spawn_gc(
122 shared: SharedFrecency,
123 db_path: String,
124 use_unsafe_no_lock: bool,
125 ) -> Result<std::thread::JoinHandle<()>> {
126 Ok(std::thread::Builder::new()
127 .name("fff-frecency-gc".into())
128 .spawn(move || Self::run_frecency_gc(shared, db_path, use_unsafe_no_lock))?)
129 }
130
131 #[tracing::instrument(skip(shared), fields(db_path = %db_path))]
132 fn run_frecency_gc(shared: SharedFrecency, db_path: String, use_unsafe_no_lock: bool) {
133 let start = std::time::Instant::now();
134 let data_path = PathBuf::from(&db_path).join("data.mdb");
135
136 let (deleted, pruned) = {
140 let guard = match shared.read() {
141 Ok(g) => g,
142 Err(e) => {
143 tracing::debug!("Failed to acquire read lock: {e}");
144 return;
145 }
146 };
147 let Some(ref tracker) = *guard else {
148 return;
149 };
150 match tracker.purge_stale_entries() {
151 Ok(result) => result,
152 Err(e) => {
153 tracing::debug!("Purge failed: {e}");
154 return;
155 }
156 }
157 };
158
159 if deleted > 0 || pruned > 0 {
160 tracing::info!(deleted, pruned, elapsed = ?start.elapsed(), "Frecency GC purged entries");
161 }
162
163 let file_size = fs::metadata(&data_path).map(|m| m.len()).unwrap_or(0);
165 if deleted == 0 && pruned == 0 && file_size <= 512 * 1024 {
166 return;
167 }
168
169 let mut guard = match shared.write() {
171 Ok(g) => g,
172 Err(e) => {
173 tracing::debug!("Failed to acquire write lock: {e}");
174 return;
175 }
176 };
177
178 let entries: Vec<(Vec<u8>, VecDeque<u64>)> = match guard.as_ref() {
180 Some(tracker) => {
181 let rtxn = match tracker.env.read_txn() {
182 Ok(t) => t,
183 Err(e) => {
184 tracing::debug!("Compaction read_txn failed: {e}");
185 return;
186 }
187 };
188 let iter = match tracker.db.iter(&rtxn) {
189 Ok(i) => i,
190 Err(e) => {
191 tracing::debug!("Compaction iter failed: {e}");
192 return;
193 }
194 };
195 let mut entries = Vec::new();
196 let mut read_errors = 0u32;
197 for result in iter {
198 match result {
199 Ok((key, value)) => entries.push((key.to_vec(), value)),
200 Err(_) => read_errors += 1,
201 }
202 }
203 if read_errors > 0 {
204 tracing::warn!(
205 read_errors,
206 "Skipped corrupted entries during compaction read"
207 );
208 }
209 entries
210 }
211 None => return,
212 };
213
214 *guard = None;
216
217 let lock_path = PathBuf::from(&db_path).join("lock.mdb");
218 let _ = fs::remove_file(&data_path);
219 let _ = fs::remove_file(&lock_path);
220
221 let tracker = match FrecencyTracker::new(&db_path, use_unsafe_no_lock) {
222 Ok(t) => t,
223 Err(e) => {
224 tracing::error!("Compaction reopen failed, frecency disabled: {e}");
225 return;
226 }
227 };
228
229 let write_result = (|| -> std::result::Result<(), heed::Error> {
230 let mut wtxn = tracker.env.write_txn()?;
231 for (key, value) in &entries {
232 tracker.db.put(&mut wtxn, key.as_slice(), value)?;
233 }
234 wtxn.commit()?;
235 Ok(())
236 })();
237
238 match write_result {
239 Ok(()) => {
240 let new_size = fs::metadata(&data_path).map(|m| m.len()).unwrap_or(0);
241 *guard = Some(tracker);
242 tracing::debug!(
243 entries = entries.len(),
244 old_size = file_size,
245 new_size,
246 elapsed = ?start.elapsed(),
247 "Frecency DB compacted"
248 );
249 }
250 Err(e) => {
251 tracing::error!("Compaction write failed, frecency data may be incomplete: {e}");
252 *guard = Some(tracker);
253 }
254 }
255 }
256
257 fn purge_stale_entries(&self) -> Result<(usize, usize)> {
261 let now = self.get_now();
262 let cutoff_time = now.saturating_sub((MAX_HISTORY_DAYS * SECONDS_PER_DAY) as u64);
263
264 let rtxn = self.env.read_txn().map_err(Error::DbStartReadTxn)?;
266 let mut to_delete: Vec<Vec<u8>> = Vec::new();
267 let mut to_update: Vec<(Vec<u8>, VecDeque<u64>)> = Vec::new();
268
269 let iter = self.db.iter(&rtxn).map_err(Error::DbRead)?;
270 for result in iter {
271 let (key, accesses) = result.map_err(Error::DbRead)?;
272
273 let fresh_start = accesses.iter().position(|&ts| ts >= cutoff_time);
276 match fresh_start {
277 None => {
278 to_delete.push(key.to_vec());
280 }
281 Some(0) => {
282 }
284 Some(start) => {
285 let pruned: VecDeque<u64> = accesses.iter().skip(start).copied().collect();
287 to_update.push((key.to_vec(), pruned));
288 }
289 }
290 }
291 drop(rtxn);
292
293 if to_delete.is_empty() && to_update.is_empty() {
294 return Ok((0, 0));
295 }
296
297 let mut wtxn = self.env.write_txn().map_err(Error::DbStartWriteTxn)?;
299 for key in &to_delete {
300 self.db.delete(&mut wtxn, key).map_err(Error::DbWrite)?;
301 }
302 for (key, accesses) in &to_update {
303 self.db
304 .put(&mut wtxn, key, accesses)
305 .map_err(Error::DbWrite)?;
306 }
307 wtxn.commit().map_err(Error::DbCommit)?;
308
309 Ok((to_delete.len(), to_update.len()))
310 }
311
312 fn get_accesses(&self, path: &Path) -> Result<Option<VecDeque<u64>>> {
313 let rtxn = self.env.read_txn().map_err(Error::DbStartReadTxn)?;
314
315 let key_hash = Self::path_to_hash_bytes(path)?;
316 self.db.get(&rtxn, &key_hash).map_err(Error::DbRead)
317 }
318
319 fn get_now(&self) -> u64 {
320 SystemTime::now()
321 .duration_since(UNIX_EPOCH)
322 .unwrap()
323 .as_secs()
324 }
325
326 fn path_to_hash_bytes(path: &Path) -> Result<[u8; 32]> {
327 let Some(key) = path.to_str() else {
328 return Err(Error::InvalidPath(path.to_path_buf()));
329 };
330
331 Ok(*blake3::hash(key.as_bytes()).as_bytes())
332 }
333
334 pub fn seconds_since_last_access(&self, path: &Path) -> Result<Option<u64>> {
337 let accesses = self.get_accesses(path)?;
338 let last = accesses.and_then(|a| a.back().copied());
339 Ok(last.map(|ts| self.get_now().saturating_sub(ts)))
340 }
341
342 pub fn track_access(&self, path: &Path) -> Result<()> {
343 let mut wtxn = self.env.write_txn().map_err(Error::DbStartWriteTxn)?;
344
345 let key_hash = Self::path_to_hash_bytes(path)?;
346 let mut accesses = self.get_accesses(path)?.unwrap_or_default();
347
348 let now = self.get_now();
349 let cutoff_time = now.saturating_sub((MAX_HISTORY_DAYS * SECONDS_PER_DAY) as u64);
350 while let Some(&front_time) = accesses.front() {
351 if front_time < cutoff_time {
352 accesses.pop_front();
353 } else {
354 break;
355 }
356 }
357
358 accesses.push_back(now);
359 tracing::debug!(?path, accesses = accesses.len(), "Tracking access");
360
361 self.db
362 .put(&mut wtxn, &key_hash, &accesses)
363 .map_err(Error::DbWrite)?;
364
365 wtxn.commit().map_err(Error::DbCommit)?;
366
367 Ok(())
368 }
369
370 pub fn get_access_score(&self, file_path: &Path, mode: FFFMode) -> i64 {
371 let accesses = self
372 .get_accesses(file_path)
373 .ok()
374 .flatten()
375 .unwrap_or_default();
376
377 if accesses.is_empty() {
378 return 0;
379 }
380
381 let decay_constant = if mode.is_ai() {
382 AI_DECAY_CONSTANT
383 } else {
384 DECAY_CONSTANT
385 };
386 let max_history_days = if mode.is_ai() {
387 AI_MAX_HISTORY_DAYS
388 } else {
389 MAX_HISTORY_DAYS
390 };
391
392 let now = self.get_now();
393 let mut total_frecency = 0.0;
394
395 let cutoff_time = now.saturating_sub((max_history_days * SECONDS_PER_DAY) as u64);
396
397 for &access_time in accesses.iter().rev() {
398 if access_time < cutoff_time {
399 break; }
401
402 let days_ago = (now.saturating_sub(access_time) as f64) / SECONDS_PER_DAY;
403 let decay_factor = (-decay_constant * days_ago).exp();
404 total_frecency += decay_factor;
405 }
406
407 let normalized_frecency = if total_frecency <= 10.0 {
408 total_frecency
409 } else {
410 10.0 + (total_frecency - 10.0).sqrt() };
412
413 normalized_frecency.round() as i64
414 }
415
416 pub fn get_modification_score(
418 &self,
419 modified_time: u64,
420 git_status: Option<git2::Status>,
421 mode: FFFMode,
422 ) -> i64 {
423 let is_modified_git_status = git_status.is_some_and(is_modified_status);
424 if !is_modified_git_status {
425 return 0;
426 }
427
428 let thresholds = if mode.is_ai() {
429 &AI_MODIFICATION_THRESHOLDS
430 } else {
431 &MODIFICATION_THRESHOLDS
432 };
433
434 let now = self.get_now();
435 let duration_since = now.saturating_sub(modified_time);
436
437 for i in 0..thresholds.len() {
438 let (current_points, current_threshold) = thresholds[i];
439
440 if duration_since <= current_threshold {
441 if i == 0 || duration_since == current_threshold {
442 return current_points;
443 }
444
445 let (prev_points, prev_threshold) = thresholds[i - 1];
446
447 let time_range = current_threshold - prev_threshold;
448 let time_offset = duration_since - prev_threshold;
449 let points_diff = prev_points - current_points;
450
451 let interpolated_score =
452 prev_points - (points_diff * time_offset as i64) / time_range as i64;
453
454 return interpolated_score;
455 }
456 }
457
458 0
459 }
460}
461
462#[cfg(test)]
463mod tests {
464 use super::*;
465 use crate::file_picker::FFFMode;
466
467 fn calculate_test_frecency_score(access_timestamps: &[u64], current_time: u64) -> i64 {
468 let mut total_frecency = 0.0;
469
470 for &access_time in access_timestamps {
471 let days_ago = (current_time.saturating_sub(access_time) as f64) / SECONDS_PER_DAY;
472 let decay_factor = (-DECAY_CONSTANT * days_ago).exp();
473 total_frecency += decay_factor;
474 }
475
476 let normalized_frecency = if total_frecency <= 20.0 {
477 total_frecency
478 } else {
479 20.0 + (total_frecency - 10.0).sqrt()
480 };
481
482 normalized_frecency.round() as i64
483 }
484
485 #[test]
486 fn test_frecency_calculation() {
487 let current_time = 1000000000; let score = calculate_test_frecency_score(&[], current_time);
490 assert_eq!(score, 0);
491
492 let accesses = [current_time]; let score = calculate_test_frecency_score(&accesses, current_time);
494 assert_eq!(score, 1); let ten_days_seconds = 10 * 86400; let accesses = [current_time - ten_days_seconds];
498 let score = calculate_test_frecency_score(&accesses, current_time);
499 assert_eq!(score, 1); let accesses = [
502 current_time, current_time - 86400, current_time - 172800, ];
506 let score = calculate_test_frecency_score(&accesses, current_time);
507 assert!(score > 2 && score < 4, "Score: {}", score); let thirty_days = 30 * 86400;
510 let accesses = [current_time - thirty_days]; let score = calculate_test_frecency_score(&accesses, current_time);
512 assert!(
513 score < 2,
514 "Old access should have minimal score, got: {}",
515 score
516 );
517
518 let recent_frequent = [current_time, current_time - 86400, current_time - 172800];
519 let old_single = [current_time - ten_days_seconds];
520
521 let recent_score = calculate_test_frecency_score(&recent_frequent, current_time);
522 let old_score = calculate_test_frecency_score(&old_single, current_time);
523
524 assert!(
525 recent_score > old_score,
526 "Recent frequent access ({}) should score higher than old single access ({})",
527 recent_score,
528 old_score
529 );
530 }
531
532 #[test]
533 fn test_modification_score_interpolation() {
534 let temp_dir = std::env::temp_dir().join("fff_test_interpolation");
535 let _ = std::fs::remove_dir_all(&temp_dir);
536 let tracker = FrecencyTracker::new(temp_dir.to_str().unwrap(), true).unwrap();
537
538 let current_time = tracker.get_now();
539 let git_status = Some(git2::Status::WT_MODIFIED);
540
541 let five_minutes_ago = current_time - (5 * 60);
543 let score = tracker.get_modification_score(five_minutes_ago, git_status, FFFMode::Neovim);
544
545 assert_eq!(score, 15, "5 minutes should interpolate to 15 points");
548
549 let two_minutes_ago = current_time - (2 * 60);
550 let score = tracker.get_modification_score(two_minutes_ago, git_status, FFFMode::Neovim);
551 assert_eq!(score, 16, "2 minutes should be exactly 16 points");
552
553 let fifteen_minutes_ago = current_time - (15 * 60);
554 let score =
555 tracker.get_modification_score(fifteen_minutes_ago, git_status, FFFMode::Neovim);
556 assert_eq!(score, 8, "15 minutes should be exactly 8 points");
557
558 let twelve_hours_ago = current_time - (12 * 60 * 60);
560 let score = tracker.get_modification_score(twelve_hours_ago, git_status, FFFMode::Neovim);
561 assert_eq!(score, 4, "12 hours should interpolate to 4 points");
564
565 let eighteen_hours_ago = current_time - (18 * 60 * 60);
567 let score = tracker.get_modification_score(eighteen_hours_ago, git_status, FFFMode::Neovim);
568 assert_eq!(score, 3, "18 hours should interpolate to 3 points");
570
571 let score = tracker.get_modification_score(five_minutes_ago, None, FFFMode::Neovim);
572 assert_eq!(score, 0, "No git status should return 0");
573
574 let _ = std::fs::remove_dir_all(&temp_dir);
575 }
576}