Skip to main content

fff_search/
frecency.rs

1use crate::db_healthcheck::DbHealthChecker;
2use crate::error::{Error, Result};
3use crate::file_picker::FFFMode;
4use crate::git::is_modified_status;
5use crate::shared::SharedFrecency;
6use heed::{Database, Env, EnvOpenOptions};
7use heed::{
8    EnvFlags,
9    types::{Bytes, SerdeBincode},
10};
11use std::fs;
12use std::path::PathBuf;
13use std::time::{SystemTime, UNIX_EPOCH};
14use std::{collections::VecDeque, path::Path};
15
16const DECAY_CONSTANT: f64 = 0.0693; // ln(2)/10 for 10-day half-life
17const SECONDS_PER_DAY: f64 = 86400.0;
18const MAX_HISTORY_DAYS: f64 = 30.0; // Only consider accesses within 30 days
19
20// AI mode: faster decay since AI sessions are shorter and more intense
21const AI_DECAY_CONSTANT: f64 = 0.231; // ln(2)/3 for 3-day half-life
22const AI_MAX_HISTORY_DAYS: f64 = 7.0; // Only consider accesses within 7 days
23
24#[derive(Debug)]
25pub struct FrecencyTracker {
26    env: Env,
27    db: Database<Bytes, SerdeBincode<VecDeque<u64>>>,
28}
29
30const MODIFICATION_THRESHOLDS: [(i64, u64); 5] = [
31    (16, 60 * 2),          // 2 minutes
32    (8, 60 * 15),          // 15 minutes
33    (4, 60 * 60),          // 1 hour
34    (2, 60 * 60 * 24),     // 1 day
35    (1, 60 * 60 * 24 * 7), // 1 week
36];
37
38// AI mode: compressed thresholds since AI edits happen in rapid bursts
39const AI_MODIFICATION_THRESHOLDS: [(i64, u64); 5] = [
40    (16, 30),         // 30 seconds
41    (8, 60 * 5),      // 5 minutes
42    (4, 60 * 15),     // 15 minutes
43    (2, 60 * 60),     // 1 hour
44    (1, 60 * 60 * 4), // 4 hours
45];
46
47impl DbHealthChecker for FrecencyTracker {
48    fn get_env(&self) -> &heed::Env {
49        &self.env
50    }
51
52    fn count_entries(&self) -> Result<Vec<(&'static str, u64)>> {
53        let rtxn = self.env.read_txn().map_err(Error::DbStartReadTxn)?;
54        let count = self.db.len(&rtxn).map_err(Error::DbRead)?;
55
56        Ok(vec![("absolute_frecency_entries", count)])
57    }
58}
59
60impl FrecencyTracker {
61    /// Returns the on-disk path of the LMDB environment directory.
62    pub fn db_path(&self) -> &Path {
63        self.env.path()
64    }
65
66    pub fn new(db_path: impl AsRef<Path>, use_unsafe_no_lock: bool) -> Result<Self> {
67        let db_path = db_path.as_ref();
68        fs::create_dir_all(db_path).map_err(Error::CreateDir)?;
69
70        let env = unsafe {
71            let mut opts = EnvOpenOptions::new();
72            opts.map_size(24 * 1024 * 1024); // 24 MiB
73            if use_unsafe_no_lock {
74                opts.flags(EnvFlags::NO_LOCK | EnvFlags::NO_SYNC | EnvFlags::NO_META_SYNC);
75            }
76            opts.open(db_path).map_err(Error::EnvOpen)?
77        };
78        env.clear_stale_readers()
79            .map_err(Error::DbClearStaleReaders)?;
80
81        // Try read-only open first — avoids blocking on the LMDB write lock
82        // when another process (Neovim, another fff-mcp) already has it.
83        // Only fall back to create_database (which needs a write txn) if the
84        // database doesn't exist yet.
85        let rtxn = env.read_txn().map_err(Error::DbStartReadTxn)?;
86        let maybe_db: Option<Database<Bytes, SerdeBincode<VecDeque<u64>>>> =
87            env.open_database(&rtxn, None).map_err(Error::DbOpen)?;
88
89        drop(rtxn);
90
91        let db = match maybe_db {
92            Some(db) => db,
93            None => {
94                // First time: create the database (requires write lock).
95                let mut wtxn = env.write_txn().map_err(Error::DbStartWriteTxn)?;
96                let db = env
97                    .create_database(&mut wtxn, None)
98                    .map_err(Error::DbCreate)?;
99                wtxn.commit().map_err(Error::DbCommit)?;
100                db
101            }
102        };
103
104        Ok(FrecencyTracker {
105            db,
106            env: env.clone(),
107        })
108    }
109
110    /// Spawns a background thread to purge stale frecency entries and compact the database.
111    /// Run it once in a while to purge old pages and keep DB file size reasonable.
112    ///
113    /// It's okay to not join this thread since it acquires locks for the db access
114    ///
115    /// ```
116    /// use fff_search::frecency::FrecencyTracker;
117    /// use fff_search::SharedFrecency;
118    /// let shared_frecency: SharedFrecency = Default::default();
119    /// let _ = FrecencyTracker::spawn_gc(shared_frecency, "/path/to/frecency_db".into(), true).ok();
120    /// ```
121    pub fn spawn_gc(
122        shared: SharedFrecency,
123        db_path: String,
124        use_unsafe_no_lock: bool,
125    ) -> Result<std::thread::JoinHandle<()>> {
126        Ok(std::thread::Builder::new()
127            .name("fff-frecency-gc".into())
128            .spawn(move || Self::run_frecency_gc(shared, db_path, use_unsafe_no_lock))?)
129    }
130
131    #[tracing::instrument(skip(shared), fields(db_path = %db_path))]
132    fn run_frecency_gc(shared: SharedFrecency, db_path: String, use_unsafe_no_lock: bool) {
133        let start = std::time::Instant::now();
134        let data_path = PathBuf::from(&db_path).join("data.mdb");
135
136        // Phase 1: Purge stale entries.
137        // The RwLock protects the Option<FrecencyTracker> (not the DB itself),
138        // so a read lock is sufficient — LMDB handles its own write serialization.
139        let (deleted, pruned) = {
140            let guard = match shared.read() {
141                Ok(g) => g,
142                Err(e) => {
143                    tracing::debug!("Failed to acquire read lock: {e}");
144                    return;
145                }
146            };
147            let Some(ref tracker) = *guard else {
148                return;
149            };
150            match tracker.purge_stale_entries() {
151                Ok(result) => result,
152                Err(e) => {
153                    tracing::debug!("Purge failed: {e}");
154                    return;
155                }
156            }
157        };
158
159        if deleted > 0 || pruned > 0 {
160            tracing::info!(deleted, pruned, elapsed = ?start.elapsed(), "Frecency GC purged entries");
161        }
162
163        // Compact if we purged entries OR the file has significant freelist bloat
164        let file_size = fs::metadata(&data_path).map(|m| m.len()).unwrap_or(0);
165        if deleted == 0 && pruned == 0 && file_size <= 512 * 1024 {
166            return;
167        }
168
169        // Phase 2: Manual compaction under a single write lock
170        let mut guard = match shared.write() {
171            Ok(g) => g,
172            Err(e) => {
173                tracing::debug!("Failed to acquire write lock: {e}");
174                return;
175            }
176        };
177
178        // Read all entries from current env
179        let entries: Vec<(Vec<u8>, VecDeque<u64>)> = match guard.as_ref() {
180            Some(tracker) => {
181                let rtxn = match tracker.env.read_txn() {
182                    Ok(t) => t,
183                    Err(e) => {
184                        tracing::debug!("Compaction read_txn failed: {e}");
185                        return;
186                    }
187                };
188                let iter = match tracker.db.iter(&rtxn) {
189                    Ok(i) => i,
190                    Err(e) => {
191                        tracing::debug!("Compaction iter failed: {e}");
192                        return;
193                    }
194                };
195                let mut entries = Vec::new();
196                let mut read_errors = 0u32;
197                for result in iter {
198                    match result {
199                        Ok((key, value)) => entries.push((key.to_vec(), value)),
200                        Err(_) => read_errors += 1,
201                    }
202                }
203                if read_errors > 0 {
204                    tracing::warn!(
205                        read_errors,
206                        "Skipped corrupted entries during compaction read"
207                    );
208                }
209                entries
210            }
211            None => return,
212        };
213
214        // Drop old tracker, delete files, create fresh env, write back
215        *guard = None;
216
217        let lock_path = PathBuf::from(&db_path).join("lock.mdb");
218        let _ = fs::remove_file(&data_path);
219        let _ = fs::remove_file(&lock_path);
220
221        let tracker = match FrecencyTracker::new(&db_path, use_unsafe_no_lock) {
222            Ok(t) => t,
223            Err(e) => {
224                tracing::error!("Compaction reopen failed, frecency disabled: {e}");
225                return;
226            }
227        };
228
229        let write_result = (|| -> std::result::Result<(), heed::Error> {
230            let mut wtxn = tracker.env.write_txn()?;
231            for (key, value) in &entries {
232                tracker.db.put(&mut wtxn, key.as_slice(), value)?;
233            }
234            wtxn.commit()?;
235            Ok(())
236        })();
237
238        match write_result {
239            Ok(()) => {
240                let new_size = fs::metadata(&data_path).map(|m| m.len()).unwrap_or(0);
241                *guard = Some(tracker);
242                tracing::debug!(
243                    entries = entries.len(),
244                    old_size = file_size,
245                    new_size,
246                    elapsed = ?start.elapsed(),
247                    "Frecency DB compacted"
248                );
249            }
250            Err(e) => {
251                tracing::error!("Compaction write failed, frecency data may be incomplete: {e}");
252                *guard = Some(tracker);
253            }
254        }
255    }
256
257    /// Removes entries where all timestamps are older than MAX_HISTORY_DAYS,
258    /// and prunes stale timestamps from entries that still have recent ones.
259    /// Returns (deleted_count, pruned_count).
260    fn purge_stale_entries(&self) -> Result<(usize, usize)> {
261        let now = self.get_now();
262        let cutoff_time = now.saturating_sub((MAX_HISTORY_DAYS * SECONDS_PER_DAY) as u64);
263
264        // Collect entries to delete or update
265        let rtxn = self.env.read_txn().map_err(Error::DbStartReadTxn)?;
266        let mut to_delete: Vec<Vec<u8>> = Vec::new();
267        let mut to_update: Vec<(Vec<u8>, VecDeque<u64>)> = Vec::new();
268
269        let iter = self.db.iter(&rtxn).map_err(Error::DbRead)?;
270        for result in iter {
271            let (key, accesses) = result.map_err(Error::DbRead)?;
272
273            // Timestamps are chronologically ordered (oldest at front).
274            // Find the first timestamp that is still within the retention window.
275            let fresh_start = accesses.iter().position(|&ts| ts >= cutoff_time);
276            match fresh_start {
277                None => {
278                    // All timestamps are stale — delete the entire entry
279                    to_delete.push(key.to_vec());
280                }
281                Some(0) => {
282                    // All timestamps are fresh — nothing to do
283                }
284                Some(start) => {
285                    // Some timestamps are stale — keep only the fresh ones
286                    let pruned: VecDeque<u64> = accesses.iter().skip(start).copied().collect();
287                    to_update.push((key.to_vec(), pruned));
288                }
289            }
290        }
291        drop(rtxn);
292
293        if to_delete.is_empty() && to_update.is_empty() {
294            return Ok((0, 0));
295        }
296
297        // Apply all changes in a single write transaction
298        let mut wtxn = self.env.write_txn().map_err(Error::DbStartWriteTxn)?;
299        for key in &to_delete {
300            self.db.delete(&mut wtxn, key).map_err(Error::DbWrite)?;
301        }
302        for (key, accesses) in &to_update {
303            self.db
304                .put(&mut wtxn, key, accesses)
305                .map_err(Error::DbWrite)?;
306        }
307        wtxn.commit().map_err(Error::DbCommit)?;
308
309        Ok((to_delete.len(), to_update.len()))
310    }
311
312    fn get_accesses(&self, path: &Path) -> Result<Option<VecDeque<u64>>> {
313        let rtxn = self.env.read_txn().map_err(Error::DbStartReadTxn)?;
314
315        let key_hash = Self::path_to_hash_bytes(path)?;
316        self.db.get(&rtxn, &key_hash).map_err(Error::DbRead)
317    }
318
319    fn get_now(&self) -> u64 {
320        SystemTime::now()
321            .duration_since(UNIX_EPOCH)
322            .unwrap()
323            .as_secs()
324    }
325
326    fn path_to_hash_bytes(path: &Path) -> Result<[u8; 32]> {
327        let Some(key) = path.to_str() else {
328            return Err(Error::InvalidPath(path.to_path_buf()));
329        };
330
331        Ok(*blake3::hash(key.as_bytes()).as_bytes())
332    }
333
334    /// Returns seconds since the most recent tracked access, or `None` if the
335    /// file has never been tracked.
336    pub fn seconds_since_last_access(&self, path: &Path) -> Result<Option<u64>> {
337        let accesses = self.get_accesses(path)?;
338        let last = accesses.and_then(|a| a.back().copied());
339        Ok(last.map(|ts| self.get_now().saturating_sub(ts)))
340    }
341
342    pub fn track_access(&self, path: &Path) -> Result<()> {
343        let mut wtxn = self.env.write_txn().map_err(Error::DbStartWriteTxn)?;
344
345        let key_hash = Self::path_to_hash_bytes(path)?;
346        let mut accesses = self.get_accesses(path)?.unwrap_or_default();
347
348        let now = self.get_now();
349        let cutoff_time = now.saturating_sub((MAX_HISTORY_DAYS * SECONDS_PER_DAY) as u64);
350        while let Some(&front_time) = accesses.front() {
351            if front_time < cutoff_time {
352                accesses.pop_front();
353            } else {
354                break;
355            }
356        }
357
358        accesses.push_back(now);
359        tracing::debug!(?path, accesses = accesses.len(), "Tracking access");
360
361        self.db
362            .put(&mut wtxn, &key_hash, &accesses)
363            .map_err(Error::DbWrite)?;
364
365        wtxn.commit().map_err(Error::DbCommit)?;
366
367        Ok(())
368    }
369
370    pub fn get_access_score(&self, file_path: &Path, mode: FFFMode) -> i64 {
371        let accesses = self
372            .get_accesses(file_path)
373            .ok()
374            .flatten()
375            .unwrap_or_default();
376
377        if accesses.is_empty() {
378            return 0;
379        }
380
381        let decay_constant = if mode.is_ai() {
382            AI_DECAY_CONSTANT
383        } else {
384            DECAY_CONSTANT
385        };
386        let max_history_days = if mode.is_ai() {
387            AI_MAX_HISTORY_DAYS
388        } else {
389            MAX_HISTORY_DAYS
390        };
391
392        let now = self.get_now();
393        let mut total_frecency = 0.0;
394
395        let cutoff_time = now.saturating_sub((max_history_days * SECONDS_PER_DAY) as u64);
396
397        for &access_time in accesses.iter().rev() {
398            if access_time < cutoff_time {
399                break; // All remaining entries are older, stop processing
400            }
401
402            let days_ago = (now.saturating_sub(access_time) as f64) / SECONDS_PER_DAY;
403            let decay_factor = (-decay_constant * days_ago).exp();
404            total_frecency += decay_factor;
405        }
406
407        let normalized_frecency = if total_frecency <= 10.0 {
408            total_frecency
409        } else {
410            10.0 + (total_frecency - 10.0).sqrt() // Diminishing: >10 accesses grow slowly
411        };
412
413        normalized_frecency.round() as i64
414    }
415
416    /// Calculating modification score but only if the file is modified in the current git dir
417    pub fn get_modification_score(
418        &self,
419        modified_time: u64,
420        git_status: Option<git2::Status>,
421        mode: FFFMode,
422    ) -> i64 {
423        let is_modified_git_status = git_status.is_some_and(is_modified_status);
424        if !is_modified_git_status {
425            return 0;
426        }
427
428        let thresholds = if mode.is_ai() {
429            &AI_MODIFICATION_THRESHOLDS
430        } else {
431            &MODIFICATION_THRESHOLDS
432        };
433
434        let now = self.get_now();
435        let duration_since = now.saturating_sub(modified_time);
436
437        for i in 0..thresholds.len() {
438            let (current_points, current_threshold) = thresholds[i];
439
440            if duration_since <= current_threshold {
441                if i == 0 || duration_since == current_threshold {
442                    return current_points;
443                }
444
445                let (prev_points, prev_threshold) = thresholds[i - 1];
446
447                let time_range = current_threshold - prev_threshold;
448                let time_offset = duration_since - prev_threshold;
449                let points_diff = prev_points - current_points;
450
451                let interpolated_score =
452                    prev_points - (points_diff * time_offset as i64) / time_range as i64;
453
454                return interpolated_score;
455            }
456        }
457
458        0
459    }
460}
461
462#[cfg(test)]
463mod tests {
464    use super::*;
465    use crate::file_picker::FFFMode;
466
467    fn calculate_test_frecency_score(access_timestamps: &[u64], current_time: u64) -> i64 {
468        let mut total_frecency = 0.0;
469
470        for &access_time in access_timestamps {
471            let days_ago = (current_time.saturating_sub(access_time) as f64) / SECONDS_PER_DAY;
472            let decay_factor = (-DECAY_CONSTANT * days_ago).exp();
473            total_frecency += decay_factor;
474        }
475
476        let normalized_frecency = if total_frecency <= 20.0 {
477            total_frecency
478        } else {
479            20.0 + (total_frecency - 10.0).sqrt()
480        };
481
482        normalized_frecency.round() as i64
483    }
484
485    #[test]
486    fn test_frecency_calculation() {
487        let current_time = 1000000000; // Base timestamp
488
489        let score = calculate_test_frecency_score(&[], current_time);
490        assert_eq!(score, 0);
491
492        let accesses = [current_time]; // Accessed right now
493        let score = calculate_test_frecency_score(&accesses, current_time);
494        assert_eq!(score, 1); // 1.0 decay factor = 1
495
496        let ten_days_seconds = 10 * 86400; // 10 days in seconds
497        let accesses = [current_time - ten_days_seconds];
498        let score = calculate_test_frecency_score(&accesses, current_time);
499        assert_eq!(score, 1); // ~0.5 decay factor rounds to 1
500
501        let accesses = [
502            current_time,          // Today
503            current_time - 86400,  // 1 day ago
504            current_time - 172800, // 2 days ago
505        ];
506        let score = calculate_test_frecency_score(&accesses, current_time);
507        assert!(score > 2 && score < 4, "Score: {}", score); // About 3 accesses with decay
508
509        let thirty_days = 30 * 86400;
510        let accesses = [current_time - thirty_days]; // 30 days ago
511        let score = calculate_test_frecency_score(&accesses, current_time);
512        assert!(
513            score < 2,
514            "Old access should have minimal score, got: {}",
515            score
516        );
517
518        let recent_frequent = [current_time, current_time - 86400, current_time - 172800];
519        let old_single = [current_time - ten_days_seconds];
520
521        let recent_score = calculate_test_frecency_score(&recent_frequent, current_time);
522        let old_score = calculate_test_frecency_score(&old_single, current_time);
523
524        assert!(
525            recent_score > old_score,
526            "Recent frequent access ({}) should score higher than old single access ({})",
527            recent_score,
528            old_score
529        );
530    }
531
532    #[test]
533    fn test_modification_score_interpolation() {
534        let temp_dir = std::env::temp_dir().join("fff_test_interpolation");
535        let _ = std::fs::remove_dir_all(&temp_dir);
536        let tracker = FrecencyTracker::new(temp_dir.to_str().unwrap(), true).unwrap();
537
538        let current_time = tracker.get_now();
539        let git_status = Some(git2::Status::WT_MODIFIED);
540
541        // At 5 minutes: should interpolate between 16 and 8 points
542        let five_minutes_ago = current_time - (5 * 60);
543        let score = tracker.get_modification_score(five_minutes_ago, git_status, FFFMode::Neovim);
544
545        // Expected: 16 - (8 * 3 / 13) = 16 - 1 = 15 points
546        // (time_offset = 5-2 = 3, time_range = 15-2 = 13, points_diff = 16-8 = 8)
547        assert_eq!(score, 15, "5 minutes should interpolate to 15 points");
548
549        let two_minutes_ago = current_time - (2 * 60);
550        let score = tracker.get_modification_score(two_minutes_ago, git_status, FFFMode::Neovim);
551        assert_eq!(score, 16, "2 minutes should be exactly 16 points");
552
553        let fifteen_minutes_ago = current_time - (15 * 60);
554        let score =
555            tracker.get_modification_score(fifteen_minutes_ago, git_status, FFFMode::Neovim);
556        assert_eq!(score, 8, "15 minutes should be exactly 8 points");
557
558        // At 12 hours: should interpolate between 4 and 2 points
559        let twelve_hours_ago = current_time - (12 * 60 * 60);
560        let score = tracker.get_modification_score(twelve_hours_ago, git_status, FFFMode::Neovim);
561        // Expected: 4 - (2 * 11 / 23) = 4 - 0 = 4 points (integer division)
562        // (time_offset = 12-1 = 11 hours, time_range = 24-1 = 23 hours, points_diff = 4-2 = 2)
563        assert_eq!(score, 4, "12 hours should interpolate to 4 points");
564
565        // at 18 hours for more significant interpolation
566        let eighteen_hours_ago = current_time - (18 * 60 * 60);
567        let score = tracker.get_modification_score(eighteen_hours_ago, git_status, FFFMode::Neovim);
568        // Expected: 4 - (2 * 17 / 23) = 4 - 1 = 3 points
569        assert_eq!(score, 3, "18 hours should interpolate to 3 points");
570
571        let score = tracker.get_modification_score(five_minutes_ago, None, FFFMode::Neovim);
572        assert_eq!(score, 0, "No git status should return 0");
573
574        let _ = std::fs::remove_dir_all(&temp_dir);
575    }
576}