use super::db_healthcheck::DbHealthChecker;
use super::lmdb::{LmdbStore, is_map_full};
use crate::error::{Error, Result};
use crate::file_picker::FFFMode;
use crate::git::is_modified_status;
use crate::shared::SharedFrecency;
use heed::types::{Bytes, SerdeBincode};
use heed::{Database, Env};
use std::fs;
use std::path::PathBuf;
use std::time::{SystemTime, UNIX_EPOCH};
use std::{collections::VecDeque, path::Path};
const DECAY_CONSTANT: f64 = 0.0693; const SECONDS_PER_DAY: f64 = 86400.0;
const MAX_HISTORY_DAYS: f64 = 30.0; const MAX_TIMESTAMPS_PER_FILE: usize = 128;
const AI_DECAY_CONSTANT: f64 = 0.231; const AI_MAX_HISTORY_DAYS: f64 = 7.0;
#[derive(Debug)]
pub struct FrecencyTracker {
env: Env,
db: Database<Bytes, SerdeBincode<VecDeque<u64>>>,
}
const MODIFICATION_THRESHOLDS: [(i64, u64); 5] = [
(16, 60 * 2), (8, 60 * 15), (4, 60 * 60), (2, 60 * 60 * 24), (1, 60 * 60 * 24 * 7), ];
const AI_MODIFICATION_THRESHOLDS: [(i64, u64); 5] = [
(16, 30), (8, 60 * 5), (4, 60 * 15), (2, 60 * 60), (1, 60 * 60 * 4), ];
impl DbHealthChecker for FrecencyTracker {
fn get_env(&self) -> &heed::Env {
&self.env
}
fn count_entries(&self) -> Result<Vec<(&'static str, u64)>> {
let rtxn = self.env.read_txn().map_err(Error::DbStartReadTxn)?;
let count = self.db.len(&rtxn).map_err(Error::DbRead)?;
Ok(vec![("absolute_frecency_entries", count)])
}
}
impl LmdbStore for FrecencyTracker {
const MAX_DBS: u32 = 0;
const MAP_SIZE: usize = 10 * 1024 * 1024;
const SIZE_CAP_BYTES: u64 = 8 * 1024 * 1024;
}
impl FrecencyTracker {
pub fn db_path(&self) -> &Path {
self.env.path()
}
pub fn open(db_path: impl AsRef<Path>) -> Result<Self> {
let db_path = db_path.as_ref();
let env = Self::open_env(db_path)?;
let db = Self::open_database_safe(&env, None)?;
Ok(FrecencyTracker { db, env })
}
#[deprecated(
since = "0.7.0",
note = "LMDB unsafe no-lock mode is no longer supported; use `FrecencyTracker::open` instead. \
The `_use_unsafe_no_lock` argument is ignored."
)]
pub fn new(db_path: impl AsRef<Path>, _use_unsafe_no_lock: bool) -> Result<Self> {
Self::open(db_path)
}
pub fn spawn_gc(
shared: SharedFrecency,
db_path: String,
) -> Result<std::thread::JoinHandle<()>> {
Ok(std::thread::Builder::new()
.name("fff-frecency-gc".into())
.spawn(move || Self::run_frecency_gc(shared, db_path))?)
}
#[tracing::instrument(skip(shared), fields(db_path = %db_path))]
fn run_frecency_gc(shared: SharedFrecency, db_path: String) {
let start = std::time::Instant::now();
let (deleted, pruned) = {
let guard = match shared.read() {
Ok(g) => g,
Err(e) => {
tracing::debug!("Failed to acquire read lock: {e}");
return;
}
};
let Some(ref tracker) = *guard else {
return;
};
if let Err(e) = tracker.env.clear_stale_readers() {
tracing::debug!("clear_stale_readers failed: {e}");
}
match tracker.purge_stale_entries() {
Ok(result) => result,
Err(e) => {
tracing::debug!("Purge failed: {e}");
return;
}
}
};
if deleted > 0 || pruned > 0 {
tracing::info!(deleted, pruned, elapsed = ?start.elapsed(), "Frecency GC purged entries");
}
let data_path = PathBuf::from(&db_path).join("data.mdb");
let file_size = fs::metadata(&data_path).map(|m| m.len()).unwrap_or(0);
if file_size > <Self as LmdbStore>::SIZE_CAP_BYTES {
tracing::warn!(
size = file_size,
cap = <Self as LmdbStore>::SIZE_CAP_BYTES,
"Frecency DB exceeds size cap — will be erased on next open"
);
}
}
fn purge_stale_entries(&self) -> Result<(usize, usize)> {
let now = self.get_now();
let cutoff_time = now.saturating_sub((MAX_HISTORY_DAYS * SECONDS_PER_DAY) as u64);
let rtxn = self.env.read_txn().map_err(Error::DbStartReadTxn)?;
let mut to_delete: Vec<Vec<u8>> = Vec::new();
let mut to_update: Vec<(Vec<u8>, VecDeque<u64>)> = Vec::new();
let iter = self.db.iter(&rtxn).map_err(Error::DbRead)?;
for result in iter {
let (key, accesses) = result.map_err(Error::DbRead)?;
let fresh_start = accesses.iter().position(|&ts| ts >= cutoff_time);
match fresh_start {
None => {
to_delete.push(key.to_vec());
}
Some(0) => {
}
Some(start) => {
let pruned: VecDeque<u64> = accesses.iter().skip(start).copied().collect();
to_update.push((key.to_vec(), pruned));
}
}
}
drop(rtxn);
if to_delete.is_empty() && to_update.is_empty() {
return Ok((0, 0));
}
let mut wtxn = self.env.write_txn().map_err(Error::DbStartWriteTxn)?;
for key in &to_delete {
self.db.delete(&mut wtxn, key).map_err(Error::DbWrite)?;
}
for (key, accesses) in &to_update {
self.db
.put(&mut wtxn, key, accesses)
.map_err(Error::DbWrite)?;
}
wtxn.commit().map_err(Error::DbCommit)?;
Ok((to_delete.len(), to_update.len()))
}
fn get_accesses(&self, path: &Path) -> Result<Option<VecDeque<u64>>> {
let key_hash = Self::path_to_hash_bytes(path)?;
let rtxn = self.env.read_txn().map_err(Error::DbStartReadTxn)?;
let result = self.db.get(&rtxn, &key_hash).map_err(Error::DbRead)?;
rtxn.commit().map_err(Error::DbCommit)?;
Ok(result)
}
fn get_now(&self) -> u64 {
SystemTime::now()
.duration_since(UNIX_EPOCH)
.unwrap()
.as_secs()
}
fn path_to_hash_bytes(path: &Path) -> Result<[u8; 32]> {
let Some(key) = path.to_str() else {
return Err(Error::InvalidPath(path.to_path_buf()));
};
Ok(*blake3::hash(key.as_bytes()).as_bytes())
}
pub fn seconds_since_last_access(&self, path: &Path) -> Result<Option<u64>> {
let accesses = self.get_accesses(path)?;
let last = accesses.and_then(|a| a.back().copied());
Ok(last.map(|ts| self.get_now().saturating_sub(ts)))
}
pub fn track_access(&self, path: &Path) -> Result<()> {
let key_hash = Self::path_to_hash_bytes(path)?;
let mut accesses = self.get_accesses(path)?.unwrap_or_default();
let now = self.get_now();
let cutoff_time = now.saturating_sub((MAX_HISTORY_DAYS * SECONDS_PER_DAY) as u64);
while let Some(&front_time) = accesses.front() {
if front_time < cutoff_time || accesses.len() >= MAX_TIMESTAMPS_PER_FILE {
accesses.pop_front();
} else {
break;
}
}
accesses.push_back(now);
tracing::debug!(?path, accesses = accesses.len(), "Tracking access");
let mut wtxn = self.env.write_txn().map_err(Error::DbStartWriteTxn)?;
if let Err(e) = self.db.put(&mut wtxn, &key_hash, &accesses) {
if is_map_full(&e) {
tracing::error!(
?path,
"Frecency DB hit MDB_MAP_FULL; dropping write — db will be \
erased on next open via LmdbStore::erase_if_oversized"
);
return Ok(());
}
return Err(Error::DbWrite(e));
}
wtxn.commit()
.inspect_err(|e| {
if is_map_full(e) {
tracing::error!(
?path,
"Frecency DB hit MDB_MAP_FULL on commit; dropping write"
);
}
})
.map_err(Error::DbCommit)
}
pub fn get_access_score(&self, file_path: &Path, mode: FFFMode) -> i64 {
let accesses = self
.get_accesses(file_path)
.ok()
.flatten()
.unwrap_or_default();
if accesses.is_empty() {
return 0;
}
let decay_constant = if mode.is_ai() {
AI_DECAY_CONSTANT
} else {
DECAY_CONSTANT
};
let max_history_days = if mode.is_ai() {
AI_MAX_HISTORY_DAYS
} else {
MAX_HISTORY_DAYS
};
let now = self.get_now();
let mut total_frecency = 0.0;
let cutoff_time = now.saturating_sub((max_history_days * SECONDS_PER_DAY) as u64);
for &access_time in accesses.iter().rev() {
if access_time < cutoff_time {
break; }
let days_ago = (now.saturating_sub(access_time) as f64) / SECONDS_PER_DAY;
let decay_factor = (-decay_constant * days_ago).exp();
total_frecency += decay_factor;
}
let normalized_frecency = if total_frecency <= 10.0 {
total_frecency
} else {
10.0 + (total_frecency - 10.0).sqrt() };
normalized_frecency.round() as i64
}
pub fn get_modification_score(
&self,
modified_time: u64,
git_status: Option<git2::Status>,
mode: FFFMode,
) -> i64 {
let is_modified_git_status = git_status.is_some_and(is_modified_status);
if !is_modified_git_status {
return 0;
}
let thresholds = if mode.is_ai() {
&AI_MODIFICATION_THRESHOLDS
} else {
&MODIFICATION_THRESHOLDS
};
let now = self.get_now();
let duration_since = now.saturating_sub(modified_time);
for i in 0..thresholds.len() {
let (current_points, current_threshold) = thresholds[i];
if duration_since <= current_threshold {
if i == 0 || duration_since == current_threshold {
return current_points;
}
let (prev_points, prev_threshold) = thresholds[i - 1];
let time_range = current_threshold - prev_threshold;
let time_offset = duration_since - prev_threshold;
let points_diff = prev_points - current_points;
let interpolated_score =
prev_points - (points_diff * time_offset as i64) / time_range as i64;
return interpolated_score;
}
}
0
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::file_picker::FFFMode;
fn calculate_test_frecency_score(access_timestamps: &[u64], current_time: u64) -> i64 {
let mut total_frecency = 0.0;
for &access_time in access_timestamps {
let days_ago = (current_time.saturating_sub(access_time) as f64) / SECONDS_PER_DAY;
let decay_factor = (-DECAY_CONSTANT * days_ago).exp();
total_frecency += decay_factor;
}
let normalized_frecency = if total_frecency <= 20.0 {
total_frecency
} else {
20.0 + (total_frecency - 10.0).sqrt()
};
normalized_frecency.round() as i64
}
#[test]
fn test_frecency_calculation() {
let current_time = 1000000000;
let score = calculate_test_frecency_score(&[], current_time);
assert_eq!(score, 0);
let accesses = [current_time]; let score = calculate_test_frecency_score(&accesses, current_time);
assert_eq!(score, 1);
let ten_days_seconds = 10 * 86400; let accesses = [current_time - ten_days_seconds];
let score = calculate_test_frecency_score(&accesses, current_time);
assert_eq!(score, 1);
let accesses = [
current_time, current_time - 86400, current_time - 172800, ];
let score = calculate_test_frecency_score(&accesses, current_time);
assert!(score > 2 && score < 4, "Score: {}", score);
let thirty_days = 30 * 86400;
let accesses = [current_time - thirty_days]; let score = calculate_test_frecency_score(&accesses, current_time);
assert!(
score < 2,
"Old access should have minimal score, got: {}",
score
);
let recent_frequent = [current_time, current_time - 86400, current_time - 172800];
let old_single = [current_time - ten_days_seconds];
let recent_score = calculate_test_frecency_score(&recent_frequent, current_time);
let old_score = calculate_test_frecency_score(&old_single, current_time);
assert!(
recent_score > old_score,
"Recent frequent access ({}) should score higher than old single access ({})",
recent_score,
old_score
);
}
#[test]
fn test_modification_score_interpolation() {
let temp_dir = std::env::temp_dir().join("fff_test_interpolation");
let _ = std::fs::remove_dir_all(&temp_dir);
let tracker = FrecencyTracker::open(temp_dir.to_str().unwrap()).unwrap();
let current_time = tracker.get_now();
let git_status = Some(git2::Status::WT_MODIFIED);
let five_minutes_ago = current_time - (5 * 60);
let score = tracker.get_modification_score(five_minutes_ago, git_status, FFFMode::Neovim);
assert_eq!(score, 15, "5 minutes should interpolate to 15 points");
let two_minutes_ago = current_time - (2 * 60);
let score = tracker.get_modification_score(two_minutes_ago, git_status, FFFMode::Neovim);
assert_eq!(score, 16, "2 minutes should be exactly 16 points");
let fifteen_minutes_ago = current_time - (15 * 60);
let score =
tracker.get_modification_score(fifteen_minutes_ago, git_status, FFFMode::Neovim);
assert_eq!(score, 8, "15 minutes should be exactly 8 points");
let twelve_hours_ago = current_time - (12 * 60 * 60);
let score = tracker.get_modification_score(twelve_hours_ago, git_status, FFFMode::Neovim);
assert_eq!(score, 4, "12 hours should interpolate to 4 points");
let eighteen_hours_ago = current_time - (18 * 60 * 60);
let score = tracker.get_modification_score(eighteen_hours_ago, git_status, FFFMode::Neovim);
assert_eq!(score, 3, "18 hours should interpolate to 3 points");
let score = tracker.get_modification_score(five_minutes_ago, None, FFFMode::Neovim);
assert_eq!(score, 0, "No git status should return 0");
let _ = std::fs::remove_dir_all(&temp_dir);
}
}