Skip to main content

fff_search/
frecency.rs

1use crate::db_healthcheck::DbHealthChecker;
2use crate::file_picker::FFFMode;
3use crate::{SharedFrecency, error::Error, git::is_modified_status};
4use heed::{Database, Env, EnvOpenOptions};
5use heed::{
6    EnvFlags,
7    types::{Bytes, SerdeBincode},
8};
9use std::fs;
10use std::path::PathBuf;
11use std::time::{SystemTime, UNIX_EPOCH};
12use std::{collections::VecDeque, path::Path};
13
14const DECAY_CONSTANT: f64 = 0.0693; // ln(2)/10 for 10-day half-life
15const SECONDS_PER_DAY: f64 = 86400.0;
16const MAX_HISTORY_DAYS: f64 = 30.0; // Only consider accesses within 30 days
17
18// AI mode: faster decay since AI sessions are shorter and more intense
19const AI_DECAY_CONSTANT: f64 = 0.231; // ln(2)/3 for 3-day half-life
20const AI_MAX_HISTORY_DAYS: f64 = 7.0; // Only consider accesses within 7 days
21
22#[derive(Debug)]
23pub struct FrecencyTracker {
24    env: Env,
25    db: Database<Bytes, SerdeBincode<VecDeque<u64>>>,
26}
27
28const MODIFICATION_THRESHOLDS: [(i64, u64); 5] = [
29    (16, 60 * 2),          // 2 minutes
30    (8, 60 * 15),          // 15 minutes
31    (4, 60 * 60),          // 1 hour
32    (2, 60 * 60 * 24),     // 1 day
33    (1, 60 * 60 * 24 * 7), // 1 week
34];
35
36// AI mode: compressed thresholds since AI edits happen in rapid bursts
37const AI_MODIFICATION_THRESHOLDS: [(i64, u64); 5] = [
38    (16, 30),         // 30 seconds
39    (8, 60 * 5),      // 5 minutes
40    (4, 60 * 15),     // 15 minutes
41    (2, 60 * 60),     // 1 hour
42    (1, 60 * 60 * 4), // 4 hours
43];
44
45impl DbHealthChecker for FrecencyTracker {
46    fn get_env(&self) -> &heed::Env {
47        &self.env
48    }
49
50    fn count_entries(&self) -> Result<Vec<(&'static str, u64)>, Error> {
51        let rtxn = self.env.read_txn().map_err(Error::DbStartReadTxn)?;
52        let count = self.db.len(&rtxn).map_err(Error::DbRead)?;
53
54        Ok(vec![("absolute_frecency_entries", count)])
55    }
56}
57
58impl FrecencyTracker {
59    pub fn new(db_path: &str, use_unsafe_no_lock: bool) -> Result<Self, Error> {
60        fs::create_dir_all(db_path).map_err(Error::CreateDir)?;
61        let env = unsafe {
62            let mut opts = EnvOpenOptions::new();
63            opts.map_size(24 * 1024 * 1024); // 24 MiB
64            if use_unsafe_no_lock {
65                opts.flags(EnvFlags::NO_LOCK | EnvFlags::NO_SYNC | EnvFlags::NO_META_SYNC);
66            }
67            opts.open(db_path).map_err(Error::EnvOpen)?
68        };
69        env.clear_stale_readers()
70            .map_err(Error::DbClearStaleReaders)?;
71
72        // we will open the default unnamed database
73        let mut wtxn = env.write_txn().map_err(Error::DbStartWriteTxn)?;
74        let db = env
75            .create_database(&mut wtxn, None)
76            .map_err(Error::DbCreate)?;
77
78        Ok(FrecencyTracker {
79            db,
80            env: env.clone(),
81        })
82    }
83
84    /// Spawns a background thread to purge stale frecency entries and compact the database.
85    ///
86    /// Phase 1 (read lock): purge stale entries — deletes expired entries and prunes old timestamps.
87    /// Phase 2 (write lock): compact the database by re-writing entries into a fresh LMDB env.
88    ///   We can't use LMDB's copy_to_path with NO_LOCK envs (MDB_INCOMPATIBLE),
89    ///   so instead we: read all entries → drop env → delete files → reopen → write back.
90    pub fn spawn_gc(shared: SharedFrecency, db_path: String, use_unsafe_no_lock: bool) {
91        std::thread::Builder::new()
92            .name("fff-frecency-gc".into())
93            .spawn(move || Self::run_frecency_gc(shared, db_path, use_unsafe_no_lock))
94            .ok();
95    }
96
97    #[tracing::instrument(skip(shared), fields(db_path = %db_path))]
98    fn run_frecency_gc(shared: SharedFrecency, db_path: String, use_unsafe_no_lock: bool) {
99        let start = std::time::Instant::now();
100        let data_path = PathBuf::from(&db_path).join("data.mdb");
101
102        // Phase 1: Purge stale entries.
103        // The RwLock protects the Option<FrecencyTracker> (not the DB itself),
104        // so a read lock is sufficient — LMDB handles its own write serialization.
105        let (deleted, pruned) = {
106            let guard = match shared.read() {
107                Ok(g) => g,
108                Err(e) => {
109                    tracing::debug!("Failed to acquire read lock: {e}");
110                    return;
111                }
112            };
113            let Some(ref tracker) = *guard else {
114                return;
115            };
116            match tracker.purge_stale_entries() {
117                Ok(result) => result,
118                Err(e) => {
119                    tracing::debug!("Purge failed: {e}");
120                    return;
121                }
122            }
123        };
124
125        if deleted > 0 || pruned > 0 {
126            tracing::info!(deleted, pruned, elapsed = ?start.elapsed(), "Frecency GC purged entries");
127        }
128
129        // Compact if we purged entries OR the file has significant freelist bloat
130        let file_size = fs::metadata(&data_path).map(|m| m.len()).unwrap_or(0);
131        if deleted == 0 && pruned == 0 && file_size <= 512 * 1024 {
132            return;
133        }
134
135        // Phase 2: Manual compaction under a single write lock
136        let mut guard = match shared.write() {
137            Ok(g) => g,
138            Err(e) => {
139                tracing::debug!("Failed to acquire write lock: {e}");
140                return;
141            }
142        };
143
144        // Read all entries from current env
145        let entries: Vec<(Vec<u8>, VecDeque<u64>)> = match guard.as_ref() {
146            Some(tracker) => {
147                let rtxn = match tracker.env.read_txn() {
148                    Ok(t) => t,
149                    Err(e) => {
150                        tracing::debug!("Compaction read_txn failed: {e}");
151                        return;
152                    }
153                };
154                let iter = match tracker.db.iter(&rtxn) {
155                    Ok(i) => i,
156                    Err(e) => {
157                        tracing::debug!("Compaction iter failed: {e}");
158                        return;
159                    }
160                };
161                let mut entries = Vec::new();
162                let mut read_errors = 0u32;
163                for result in iter {
164                    match result {
165                        Ok((key, value)) => entries.push((key.to_vec(), value)),
166                        Err(_) => read_errors += 1,
167                    }
168                }
169                if read_errors > 0 {
170                    tracing::warn!(
171                        read_errors,
172                        "Skipped corrupted entries during compaction read"
173                    );
174                }
175                entries
176            }
177            None => return,
178        };
179
180        // Drop old tracker, delete files, create fresh env, write back
181        *guard = None;
182
183        let lock_path = PathBuf::from(&db_path).join("lock.mdb");
184        let _ = fs::remove_file(&data_path);
185        let _ = fs::remove_file(&lock_path);
186
187        let tracker = match FrecencyTracker::new(&db_path, use_unsafe_no_lock) {
188            Ok(t) => t,
189            Err(e) => {
190                tracing::error!("Compaction reopen failed, frecency disabled: {e}");
191                return;
192            }
193        };
194
195        let write_result = (|| -> std::result::Result<(), heed::Error> {
196            let mut wtxn = tracker.env.write_txn()?;
197            for (key, value) in &entries {
198                tracker.db.put(&mut wtxn, key.as_slice(), value)?;
199            }
200            wtxn.commit()?;
201            Ok(())
202        })();
203
204        match write_result {
205            Ok(()) => {
206                let new_size = fs::metadata(&data_path).map(|m| m.len()).unwrap_or(0);
207                *guard = Some(tracker);
208                tracing::debug!(
209                    entries = entries.len(),
210                    old_size = file_size,
211                    new_size,
212                    elapsed = ?start.elapsed(),
213                    "Frecency DB compacted"
214                );
215            }
216            Err(e) => {
217                tracing::error!("Compaction write failed, frecency data may be incomplete: {e}");
218                *guard = Some(tracker);
219            }
220        }
221    }
222
223    /// Removes entries where all timestamps are older than MAX_HISTORY_DAYS,
224    /// and prunes stale timestamps from entries that still have recent ones.
225    /// Returns (deleted_count, pruned_count).
226    fn purge_stale_entries(&self) -> Result<(usize, usize), Error> {
227        let now = self.get_now();
228        let cutoff_time = now.saturating_sub((MAX_HISTORY_DAYS * SECONDS_PER_DAY) as u64);
229
230        // Collect entries to delete or update
231        let rtxn = self.env.read_txn().map_err(Error::DbStartReadTxn)?;
232        let mut to_delete: Vec<Vec<u8>> = Vec::new();
233        let mut to_update: Vec<(Vec<u8>, VecDeque<u64>)> = Vec::new();
234
235        let iter = self.db.iter(&rtxn).map_err(Error::DbRead)?;
236        for result in iter {
237            let (key, accesses) = result.map_err(Error::DbRead)?;
238
239            // Timestamps are chronologically ordered (oldest at front).
240            // Find the first timestamp that is still within the retention window.
241            let fresh_start = accesses.iter().position(|&ts| ts >= cutoff_time);
242            match fresh_start {
243                None => {
244                    // All timestamps are stale — delete the entire entry
245                    to_delete.push(key.to_vec());
246                }
247                Some(0) => {
248                    // All timestamps are fresh — nothing to do
249                }
250                Some(start) => {
251                    // Some timestamps are stale — keep only the fresh ones
252                    let pruned: VecDeque<u64> = accesses.iter().skip(start).copied().collect();
253                    to_update.push((key.to_vec(), pruned));
254                }
255            }
256        }
257        drop(rtxn);
258
259        if to_delete.is_empty() && to_update.is_empty() {
260            return Ok((0, 0));
261        }
262
263        // Apply all changes in a single write transaction
264        let mut wtxn = self.env.write_txn().map_err(Error::DbStartWriteTxn)?;
265        for key in &to_delete {
266            self.db.delete(&mut wtxn, key).map_err(Error::DbWrite)?;
267        }
268        for (key, accesses) in &to_update {
269            self.db
270                .put(&mut wtxn, key, accesses)
271                .map_err(Error::DbWrite)?;
272        }
273        wtxn.commit().map_err(Error::DbCommit)?;
274
275        Ok((to_delete.len(), to_update.len()))
276    }
277
278    fn get_accesses(&self, path: &Path) -> Result<Option<VecDeque<u64>>, Error> {
279        let rtxn = self.env.read_txn().map_err(Error::DbStartReadTxn)?;
280
281        let key_hash = Self::path_to_hash_bytes(path)?;
282        self.db.get(&rtxn, &key_hash).map_err(Error::DbRead)
283    }
284
285    fn get_now(&self) -> u64 {
286        SystemTime::now()
287            .duration_since(UNIX_EPOCH)
288            .unwrap()
289            .as_secs()
290    }
291
292    fn path_to_hash_bytes(path: &Path) -> Result<[u8; 32], Error> {
293        let Some(key) = path.to_str() else {
294            return Err(Error::InvalidPath(path.to_path_buf()));
295        };
296
297        Ok(*blake3::hash(key.as_bytes()).as_bytes())
298    }
299
300    /// Returns seconds since the most recent tracked access, or `None` if the
301    /// file has never been tracked.
302    pub fn seconds_since_last_access(&self, path: &Path) -> Result<Option<u64>, Error> {
303        let accesses = self.get_accesses(path)?;
304        let last = accesses.and_then(|a| a.back().copied());
305        Ok(last.map(|ts| self.get_now().saturating_sub(ts)))
306    }
307
308    pub fn track_access(&self, path: &Path) -> Result<(), Error> {
309        let mut wtxn = self.env.write_txn().map_err(Error::DbStartWriteTxn)?;
310
311        let key_hash = Self::path_to_hash_bytes(path)?;
312        let mut accesses = self.get_accesses(path)?.unwrap_or_default();
313
314        let now = self.get_now();
315        let cutoff_time = now.saturating_sub((MAX_HISTORY_DAYS * SECONDS_PER_DAY) as u64);
316        while let Some(&front_time) = accesses.front() {
317            if front_time < cutoff_time {
318                accesses.pop_front();
319            } else {
320                break;
321            }
322        }
323
324        accesses.push_back(now);
325        tracing::debug!(?path, accesses = accesses.len(), "Tracking access");
326
327        self.db
328            .put(&mut wtxn, &key_hash, &accesses)
329            .map_err(Error::DbWrite)?;
330
331        wtxn.commit().map_err(Error::DbCommit)?;
332
333        Ok(())
334    }
335
336    pub fn get_access_score(&self, file_path: &Path, mode: FFFMode) -> i64 {
337        let accesses = self
338            .get_accesses(file_path)
339            .ok()
340            .flatten()
341            .unwrap_or_default();
342
343        if accesses.is_empty() {
344            return 0;
345        }
346
347        let decay_constant = if mode.is_ai() {
348            AI_DECAY_CONSTANT
349        } else {
350            DECAY_CONSTANT
351        };
352        let max_history_days = if mode.is_ai() {
353            AI_MAX_HISTORY_DAYS
354        } else {
355            MAX_HISTORY_DAYS
356        };
357
358        let now = self.get_now();
359        let mut total_frecency = 0.0;
360
361        let cutoff_time = now.saturating_sub((max_history_days * SECONDS_PER_DAY) as u64);
362
363        for &access_time in accesses.iter().rev() {
364            if access_time < cutoff_time {
365                break; // All remaining entries are older, stop processing
366            }
367
368            let days_ago = (now.saturating_sub(access_time) as f64) / SECONDS_PER_DAY;
369            let decay_factor = (-decay_constant * days_ago).exp();
370            total_frecency += decay_factor;
371        }
372
373        let normalized_frecency = if total_frecency <= 10.0 {
374            total_frecency
375        } else {
376            10.0 + (total_frecency - 10.0).sqrt() // Diminishing: >10 accesses grow slowly
377        };
378
379        normalized_frecency.round() as i64
380    }
381
382    /// Calculating modification score but only if the file is modified in the current git dir
383    pub fn get_modification_score(
384        &self,
385        modified_time: u64,
386        git_status: Option<git2::Status>,
387        mode: FFFMode,
388    ) -> i64 {
389        let is_modified_git_status = git_status.is_some_and(is_modified_status);
390        if !is_modified_git_status {
391            return 0;
392        }
393
394        let thresholds = if mode.is_ai() {
395            &AI_MODIFICATION_THRESHOLDS
396        } else {
397            &MODIFICATION_THRESHOLDS
398        };
399
400        let now = self.get_now();
401        let duration_since = now.saturating_sub(modified_time);
402
403        for i in 0..thresholds.len() {
404            let (current_points, current_threshold) = thresholds[i];
405
406            if duration_since <= current_threshold {
407                if i == 0 || duration_since == current_threshold {
408                    return current_points;
409                }
410
411                let (prev_points, prev_threshold) = thresholds[i - 1];
412
413                let time_range = current_threshold - prev_threshold;
414                let time_offset = duration_since - prev_threshold;
415                let points_diff = prev_points - current_points;
416
417                let interpolated_score =
418                    prev_points - (points_diff * time_offset as i64) / time_range as i64;
419
420                return interpolated_score;
421            }
422        }
423
424        0
425    }
426}
427
428#[cfg(test)]
429mod tests {
430    use super::*;
431    use crate::file_picker::FFFMode;
432
433    fn calculate_test_frecency_score(access_timestamps: &[u64], current_time: u64) -> i64 {
434        let mut total_frecency = 0.0;
435
436        for &access_time in access_timestamps {
437            let days_ago = (current_time.saturating_sub(access_time) as f64) / SECONDS_PER_DAY;
438            let decay_factor = (-DECAY_CONSTANT * days_ago).exp();
439            total_frecency += decay_factor;
440        }
441
442        let normalized_frecency = if total_frecency <= 20.0 {
443            total_frecency
444        } else {
445            20.0 + (total_frecency - 10.0).sqrt()
446        };
447
448        normalized_frecency.round() as i64
449    }
450
451    #[test]
452    fn test_frecency_calculation() {
453        let current_time = 1000000000; // Base timestamp
454
455        let score = calculate_test_frecency_score(&[], current_time);
456        assert_eq!(score, 0);
457
458        let accesses = [current_time]; // Accessed right now
459        let score = calculate_test_frecency_score(&accesses, current_time);
460        assert_eq!(score, 1); // 1.0 decay factor = 1
461
462        let ten_days_seconds = 10 * 86400; // 10 days in seconds
463        let accesses = [current_time - ten_days_seconds];
464        let score = calculate_test_frecency_score(&accesses, current_time);
465        assert_eq!(score, 1); // ~0.5 decay factor rounds to 1
466
467        let accesses = [
468            current_time,          // Today
469            current_time - 86400,  // 1 day ago
470            current_time - 172800, // 2 days ago
471        ];
472        let score = calculate_test_frecency_score(&accesses, current_time);
473        assert!(score > 2 && score < 4, "Score: {}", score); // About 3 accesses with decay
474
475        let thirty_days = 30 * 86400;
476        let accesses = [current_time - thirty_days]; // 30 days ago
477        let score = calculate_test_frecency_score(&accesses, current_time);
478        assert!(
479            score < 2,
480            "Old access should have minimal score, got: {}",
481            score
482        );
483
484        let recent_frequent = [current_time, current_time - 86400, current_time - 172800];
485        let old_single = [current_time - ten_days_seconds];
486
487        let recent_score = calculate_test_frecency_score(&recent_frequent, current_time);
488        let old_score = calculate_test_frecency_score(&old_single, current_time);
489
490        assert!(
491            recent_score > old_score,
492            "Recent frequent access ({}) should score higher than old single access ({})",
493            recent_score,
494            old_score
495        );
496    }
497
498    #[test]
499    fn test_modification_score_interpolation() {
500        let temp_dir = std::env::temp_dir().join("fff_test_interpolation");
501        let _ = std::fs::remove_dir_all(&temp_dir);
502        let tracker = FrecencyTracker::new(temp_dir.to_str().unwrap(), true).unwrap();
503
504        let current_time = tracker.get_now();
505        let git_status = Some(git2::Status::WT_MODIFIED);
506
507        // At 5 minutes: should interpolate between 16 and 8 points
508        let five_minutes_ago = current_time - (5 * 60);
509        let score = tracker.get_modification_score(five_minutes_ago, git_status, FFFMode::Neovim);
510
511        // Expected: 16 - (8 * 3 / 13) = 16 - 1 = 15 points
512        // (time_offset = 5-2 = 3, time_range = 15-2 = 13, points_diff = 16-8 = 8)
513        assert_eq!(score, 15, "5 minutes should interpolate to 15 points");
514
515        let two_minutes_ago = current_time - (2 * 60);
516        let score = tracker.get_modification_score(two_minutes_ago, git_status, FFFMode::Neovim);
517        assert_eq!(score, 16, "2 minutes should be exactly 16 points");
518
519        let fifteen_minutes_ago = current_time - (15 * 60);
520        let score =
521            tracker.get_modification_score(fifteen_minutes_ago, git_status, FFFMode::Neovim);
522        assert_eq!(score, 8, "15 minutes should be exactly 8 points");
523
524        // At 12 hours: should interpolate between 4 and 2 points
525        let twelve_hours_ago = current_time - (12 * 60 * 60);
526        let score = tracker.get_modification_score(twelve_hours_ago, git_status, FFFMode::Neovim);
527        // Expected: 4 - (2 * 11 / 23) = 4 - 0 = 4 points (integer division)
528        // (time_offset = 12-1 = 11 hours, time_range = 24-1 = 23 hours, points_diff = 4-2 = 2)
529        assert_eq!(score, 4, "12 hours should interpolate to 4 points");
530
531        // at 18 hours for more significant interpolation
532        let eighteen_hours_ago = current_time - (18 * 60 * 60);
533        let score = tracker.get_modification_score(eighteen_hours_ago, git_status, FFFMode::Neovim);
534        // Expected: 4 - (2 * 17 / 23) = 4 - 1 = 3 points
535        assert_eq!(score, 3, "18 hours should interpolate to 3 points");
536
537        let score = tracker.get_modification_score(five_minutes_ago, None, FFFMode::Neovim);
538        assert_eq!(score, 0, "No git status should return 0");
539
540        let _ = std::fs::remove_dir_all(&temp_dir);
541    }
542}