use chrono::{DateTime, Utc};
use serde::{Deserialize, Serialize};
use std::time::Duration as StdDuration;
pub const DEFAULT_MAX_AGE_DAYS: i64 = 90;
pub const DEFAULT_MIN_REWARD_THRESHOLD: f64 = 0.3;
pub const DEFAULT_MAX_EPISODES: usize = 50_000;
pub const DEFAULT_CLEANUP_INTERVAL: StdDuration = StdDuration::from_secs(24 * 60 * 60);
pub const DEFAULT_CLEANUP_BATCH_SIZE: usize = 100;
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
pub enum RetentionCriterion {
KeepAll,
AgeBased,
RewardBased,
Unreferenced,
FailedOnly,
Combined,
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub enum RetentionTrigger {
Scheduled,
StorageExceeded {
current: usize,
max: usize,
},
MemoryPressure {
level: f64,
},
Manual,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct EpisodeRetentionPolicy {
pub max_age_days: i64,
pub min_reward_threshold: f64,
pub max_episodes: usize,
pub cleanup_interval_secs: u64,
pub cleanup_batch_size: usize,
pub criterion: RetentionCriterion,
pub keep_pattern_sources: bool,
pub keep_heuristic_sources: bool,
pub dry_run: bool,
}
impl Default for EpisodeRetentionPolicy {
fn default() -> Self {
Self {
max_age_days: DEFAULT_MAX_AGE_DAYS,
min_reward_threshold: DEFAULT_MIN_REWARD_THRESHOLD,
max_episodes: DEFAULT_MAX_EPISODES,
cleanup_interval_secs: DEFAULT_CLEANUP_INTERVAL.as_secs(),
cleanup_batch_size: DEFAULT_CLEANUP_BATCH_SIZE,
criterion: RetentionCriterion::Combined,
keep_pattern_sources: true,
keep_heuristic_sources: true,
dry_run: false,
}
}
}
impl EpisodeRetentionPolicy {
#[must_use]
pub fn new() -> Self {
Self::default()
}
#[must_use]
pub fn storage_limited() -> Self {
Self {
max_age_days: 30,
min_reward_threshold: 0.5,
max_episodes: 5_000,
cleanup_interval_secs: 12 * 60 * 60, cleanup_batch_size: 50,
criterion: RetentionCriterion::Combined,
keep_pattern_sources: true,
keep_heuristic_sources: true,
dry_run: false,
}
}
#[must_use]
pub fn archival() -> Self {
Self {
max_age_days: 365, min_reward_threshold: 0.0,
max_episodes: usize::MAX,
cleanup_interval_secs: 7 * 24 * 60 * 60, cleanup_batch_size: 100,
criterion: RetentionCriterion::KeepAll,
keep_pattern_sources: true,
keep_heuristic_sources: true,
dry_run: false,
}
}
#[must_use]
pub fn aggressive() -> Self {
Self {
max_age_days: 7,
min_reward_threshold: 0.6,
max_episodes: 1_000,
cleanup_interval_secs: 6 * 60 * 60, cleanup_batch_size: 200,
criterion: RetentionCriterion::Combined,
keep_pattern_sources: false,
keep_heuristic_sources: false,
dry_run: false,
}
}
#[must_use]
pub fn with_max_age_days(mut self, days: i64) -> Self {
self.max_age_days = days;
self
}
#[must_use]
pub fn with_min_reward_threshold(mut self, threshold: f64) -> Self {
self.min_reward_threshold = threshold.clamp(0.0, 1.0);
self
}
#[must_use]
pub fn with_max_episodes(mut self, max: usize) -> Self {
self.max_episodes = max;
self
}
#[must_use]
pub fn with_cleanup_interval(mut self, interval: StdDuration) -> Self {
self.cleanup_interval_secs = interval.as_secs();
self
}
#[must_use]
pub fn with_cleanup_batch_size(mut self, size: usize) -> Self {
self.cleanup_batch_size = size.max(1);
self
}
#[must_use]
pub fn with_criterion(mut self, criterion: RetentionCriterion) -> Self {
self.criterion = criterion;
self
}
#[must_use]
pub fn with_dry_run(mut self, dry_run: bool) -> Self {
self.dry_run = dry_run;
self
}
pub fn validate(&self) -> Result<(), RetentionPolicyError> {
if self.max_age_days < 0 {
return Err(RetentionPolicyError::InvalidMaxAge(self.max_age_days));
}
if !(0.0..=1.0).contains(&self.min_reward_threshold) {
return Err(RetentionPolicyError::InvalidThreshold(
self.min_reward_threshold,
));
}
if self.cleanup_batch_size == 0 {
return Err(RetentionPolicyError::InvalidBatchSize);
}
Ok(())
}
#[must_use]
pub fn should_retain(&self, episode: &crate::Episode, now: DateTime<Utc>) -> bool {
match self.criterion {
RetentionCriterion::KeepAll => true,
RetentionCriterion::AgeBased => self.check_age(episode, now),
RetentionCriterion::RewardBased => self.check_reward(episode),
RetentionCriterion::Unreferenced => self.check_references(episode),
RetentionCriterion::FailedOnly => self.check_failed(episode),
RetentionCriterion::Combined => {
self.check_age(episode, now)
&& self.check_reward(episode)
&& self.check_references(episode)
}
}
}
fn check_age(&self, episode: &crate::Episode, now: DateTime<Utc>) -> bool {
let age_days = (now - episode.start_time).num_days();
age_days <= self.max_age_days
}
fn check_reward(&self, episode: &crate::Episode) -> bool {
match episode.reward.as_ref() {
None => true,
Some(r) => r.total >= self.min_reward_threshold as f32,
}
}
fn check_references(&self, episode: &crate::Episode) -> bool {
if self.keep_pattern_sources && !episode.patterns.is_empty() {
return true;
}
if self.keep_heuristic_sources && !episode.heuristics.is_empty() {
return true;
}
false
}
fn check_failed(&self, episode: &crate::Episode) -> bool {
match episode.outcome.as_ref() {
None => true,
Some(o) if !matches!(o, crate::types::TaskOutcome::Failure { .. }) => true,
Some(_) => self.check_references(episode),
}
}
#[must_use]
pub fn cleanup_interval(&self) -> StdDuration {
StdDuration::from_secs(self.cleanup_interval_secs)
}
}
#[derive(Debug, Clone, PartialEq)]
pub enum RetentionPolicyError {
InvalidMaxAge(i64),
InvalidThreshold(f64),
InvalidBatchSize,
}
impl std::fmt::Display for RetentionPolicyError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::InvalidMaxAge(days) => write!(f, "Invalid max age: {days} (must be >= 0)"),
Self::InvalidThreshold(threshold) => {
write!(f, "Invalid threshold: {threshold} (must be 0.0 - 1.0)")
}
Self::InvalidBatchSize => write!(f, "Invalid batch size: must be > 0"),
}
}
}
impl std::error::Error for RetentionPolicyError {}
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct CleanupResult {
pub evaluated: usize,
pub deleted: usize,
pub kept: usize,
pub deleted_ids: Vec<uuid::Uuid>,
pub kept_ids: Vec<uuid::Uuid>,
pub trigger: Option<RetentionTrigger>,
pub duration_ms: u64,
pub errors: Vec<String>,
}
impl CleanupResult {
#[must_use]
pub fn new() -> Self {
Self::default()
}
pub fn add_deleted(&mut self, id: uuid::Uuid) {
self.deleted += 1;
self.deleted_ids.push(id);
}
pub fn add_kept(&mut self, id: uuid::Uuid) {
self.kept += 1;
self.kept_ids.push(id);
}
pub fn add_error(&mut self, error: String) {
self.errors.push(error);
}
#[must_use]
pub fn has_errors(&self) -> bool {
!self.errors.is_empty()
}
#[must_use]
pub fn success_rate(&self) -> f64 {
if self.evaluated == 0 {
return 1.0;
}
let successful = self.evaluated - self.errors.len();
successful as f64 / self.evaluated as f64
}
}
#[cfg(test)]
mod tests;