Skip to main content

chasm/agency/
archival.rs

1// Copyright (c) 2024-2027 Nervosys LLC
2// SPDX-License-Identifier: AGPL-3.0-only
3//! Autonomous Session Archival Agent
4//!
5//! An AI agent that automatically archives old or inactive sessions based on
6//! configurable rules and policies.
7
8use chrono::{DateTime, Duration, Utc};
9use serde::{Deserialize, Serialize};
10use std::collections::HashMap;
11use std::sync::Arc;
12use tokio::sync::RwLock;
13
14use crate::agency::{Agent, AgentBuilder, AgentConfig, AgentRole, AgentStatus};
15use crate::database::ChatDatabase;
16
17/// Archival policy configuration
18#[derive(Debug, Clone, Serialize, Deserialize)]
19pub struct ArchivalPolicy {
20    /// Policy name
21    pub name: String,
22    /// Whether policy is enabled
23    pub enabled: bool,
24    /// Days of inactivity before archival
25    pub inactive_days: u32,
26    /// Minimum message count to archive (skip small sessions)
27    pub min_messages: u32,
28    /// Maximum message count (archive large sessions sooner)
29    pub max_messages: Option<u32>,
30    /// Providers to include (empty = all)
31    pub providers: Vec<String>,
32    /// Workspaces to include (empty = all)
33    pub workspace_ids: Vec<String>,
34    /// Tags that prevent archival
35    pub exclude_tags: Vec<String>,
36    /// Tags that trigger immediate archival
37    pub include_tags: Vec<String>,
38    /// Whether to compress archived sessions
39    pub compress: bool,
40    /// Whether to notify on archival
41    pub notify: bool,
42}
43
44impl Default for ArchivalPolicy {
45    fn default() -> Self {
46        Self {
47            name: "default".to_string(),
48            enabled: true,
49            inactive_days: 30,
50            min_messages: 5,
51            max_messages: None,
52            providers: vec![],
53            workspace_ids: vec![],
54            exclude_tags: vec!["pinned".to_string(), "important".to_string()],
55            include_tags: vec!["archive".to_string()],
56            compress: true,
57            notify: true,
58        }
59    }
60}
61
62/// Session candidate for archival
63#[derive(Debug, Clone, Serialize, Deserialize)]
64pub struct ArchivalCandidate {
65    /// Session ID
66    pub session_id: String,
67    /// Session title
68    pub title: String,
69    /// Provider
70    pub provider: String,
71    /// Workspace ID
72    pub workspace_id: Option<String>,
73    /// Message count
74    pub message_count: u32,
75    /// Last activity
76    pub last_activity: DateTime<Utc>,
77    /// Days inactive
78    pub days_inactive: u32,
79    /// Matching policy
80    pub policy: String,
81    /// Reason for archival
82    pub reason: String,
83    /// Priority (higher = archive sooner)
84    pub priority: u8,
85}
86
87/// Archival decision
88#[derive(Debug, Clone, Serialize, Deserialize)]
89pub struct ArchivalDecision {
90    /// Session ID
91    pub session_id: String,
92    /// Whether to archive
93    pub should_archive: bool,
94    /// Confidence (0.0 - 1.0)
95    pub confidence: f64,
96    /// Reasoning
97    pub reasoning: String,
98    /// Matched policies
99    pub policies: Vec<String>,
100}
101
102/// Archival result
103#[derive(Debug, Clone, Serialize, Deserialize)]
104pub struct ArchivalResult {
105    /// Sessions archived
106    pub archived_count: u32,
107    /// Sessions skipped
108    pub skipped_count: u32,
109    /// Total size saved (bytes)
110    pub bytes_saved: u64,
111    /// Errors
112    pub errors: Vec<String>,
113    /// Timestamp
114    pub timestamp: DateTime<Utc>,
115    /// Duration (ms)
116    pub duration_ms: u64,
117}
118
119/// Archival agent state
120pub struct ArchivalAgentState {
121    /// Policies
122    policies: Vec<ArchivalPolicy>,
123    /// Last run time
124    last_run: Option<DateTime<Utc>>,
125    /// Statistics
126    stats: ArchivalStats,
127    /// Pending candidates
128    pending: Vec<ArchivalCandidate>,
129}
130
131/// Archival statistics
132#[derive(Debug, Clone, Default, Serialize, Deserialize)]
133pub struct ArchivalStats {
134    /// Total runs
135    pub total_runs: u64,
136    /// Total archived
137    pub total_archived: u64,
138    /// Total skipped
139    pub total_skipped: u64,
140    /// Total bytes saved
141    pub total_bytes_saved: u64,
142    /// Average confidence
143    pub avg_confidence: f64,
144}
145
146/// Autonomous session archival agent
147pub struct ArchivalAgent {
148    /// Agent configuration
149    config: AgentConfig,
150    /// Agent state
151    state: Arc<RwLock<ArchivalAgentState>>,
152    /// Database reference
153    db: Option<Arc<ChatDatabase>>,
154    /// Whether agent is running
155    running: Arc<RwLock<bool>>,
156}
157
158impl ArchivalAgent {
159    /// Create a new archival agent
160    pub fn new() -> Self {
161        let config = AgentConfig {
162            name: "archival-agent".to_string(),
163            description: "Autonomous session archival agent".to_string(),
164            instruction: ARCHIVAL_SYSTEM_PROMPT.to_string(),
165            ..Default::default()
166        };
167
168        let state = ArchivalAgentState {
169            policies: vec![ArchivalPolicy::default()],
170            last_run: None,
171            stats: ArchivalStats::default(),
172            pending: vec![],
173        };
174
175        Self {
176            config,
177            state: Arc::new(RwLock::new(state)),
178            db: None,
179            running: Arc::new(RwLock::new(false)),
180        }
181    }
182
183    /// Create with custom policies
184    pub fn with_policies(policies: Vec<ArchivalPolicy>) -> Self {
185        let agent = Self::new();
186        let mut state = agent.state.blocking_write();
187        state.policies = policies;
188        drop(state);
189        agent
190    }
191
192    /// Set database reference
193    pub fn with_database(mut self, db: Arc<ChatDatabase>) -> Self {
194        self.db = Some(db);
195        self
196    }
197
198    /// Add a policy
199    pub async fn add_policy(&self, policy: ArchivalPolicy) {
200        let mut state = self.state.write().await;
201        state.policies.push(policy);
202    }
203
204    /// Remove a policy by name
205    pub async fn remove_policy(&self, name: &str) -> bool {
206        let mut state = self.state.write().await;
207        let len_before = state.policies.len();
208        state.policies.retain(|p| p.name != name);
209        state.policies.len() < len_before
210    }
211
212    /// Get all policies
213    pub async fn get_policies(&self) -> Vec<ArchivalPolicy> {
214        let state = self.state.read().await;
215        state.policies.clone()
216    }
217
218    /// Scan for archival candidates
219    pub async fn scan_candidates(&self) -> Vec<ArchivalCandidate> {
220        let state = self.state.read().await;
221        let candidates = Vec::new();
222        let _now = Utc::now();
223
224        // In real implementation, query database for sessions
225        // For now, return placeholder logic
226        for policy in &state.policies {
227            if !policy.enabled {
228                continue;
229            }
230
231            // Would query: SELECT * FROM sessions WHERE
232            // - updated_at < now - inactive_days
233            // - message_count >= min_messages
234            // - NOT archived
235            // - provider IN policies.providers (if specified)
236            // - workspace_id IN policies.workspace_ids (if specified)
237            // - tags NOT IN exclude_tags
238        }
239
240        candidates
241    }
242
243    /// Evaluate a session for archival
244    pub async fn evaluate_session(&self, session_id: &str) -> ArchivalDecision {
245        let state = self.state.read().await;
246        let mut matched_policies = Vec::new();
247        let mut reasons = Vec::new();
248        let mut should_archive = false;
249        let mut confidence = 0.0;
250
251        // Check against all enabled policies
252        for policy in &state.policies {
253            if !policy.enabled {
254                continue;
255            }
256
257            // In real implementation:
258            // 1. Fetch session from database
259            // 2. Check each policy condition
260            // 3. Use LLM for nuanced decisions if needed
261
262            // Placeholder decision logic
263            matched_policies.push(policy.name.clone());
264        }
265
266        if !matched_policies.is_empty() {
267            should_archive = true;
268            confidence = 0.85;
269            reasons.push("Matched archival policies".to_string());
270        }
271
272        ArchivalDecision {
273            session_id: session_id.to_string(),
274            should_archive,
275            confidence,
276            reasoning: reasons.join("; "),
277            policies: matched_policies,
278        }
279    }
280
281    /// Archive a single session
282    pub async fn archive_session(&self, _session_id: &str) -> Result<bool, String> {
283        // In real implementation:
284        // 1. Mark session as archived in database
285        // 2. Optionally compress/export
286        // 3. Update statistics
287
288        let mut state = self.state.write().await;
289        state.stats.total_archived += 1;
290
291        Ok(true)
292    }
293
294    /// Run the archival agent
295    pub async fn run(&self) -> ArchivalResult {
296        let start = std::time::Instant::now();
297        let mut result = ArchivalResult {
298            archived_count: 0,
299            skipped_count: 0,
300            bytes_saved: 0,
301            errors: vec![],
302            timestamp: Utc::now(),
303            duration_ms: 0,
304        };
305
306        // Set running flag
307        {
308            let mut running = self.running.write().await;
309            if *running {
310                result.errors.push("Agent already running".to_string());
311                return result;
312            }
313            *running = true;
314        }
315
316        // Scan for candidates
317        let candidates = self.scan_candidates().await;
318
319        // Evaluate and archive each candidate
320        for candidate in candidates {
321            let decision = self.evaluate_session(&candidate.session_id).await;
322
323            if decision.should_archive && decision.confidence >= 0.7 {
324                match self.archive_session(&candidate.session_id).await {
325                    Ok(true) => {
326                        result.archived_count += 1;
327                    }
328                    Ok(false) => {
329                        result.skipped_count += 1;
330                    }
331                    Err(e) => {
332                        result
333                            .errors
334                            .push(format!("Failed to archive {}: {}", candidate.session_id, e));
335                    }
336                }
337            } else {
338                result.skipped_count += 1;
339            }
340        }
341
342        // Update state
343        {
344            let mut state = self.state.write().await;
345            state.last_run = Some(Utc::now());
346            state.stats.total_runs += 1;
347            state.stats.total_bytes_saved += result.bytes_saved;
348        }
349
350        // Clear running flag
351        {
352            let mut running = self.running.write().await;
353            *running = false;
354        }
355
356        result.duration_ms = start.elapsed().as_millis() as u64;
357        result
358    }
359
360    /// Get agent statistics
361    pub async fn get_stats(&self) -> ArchivalStats {
362        let state = self.state.read().await;
363        state.stats.clone()
364    }
365
366    /// Get last run time
367    pub async fn get_last_run(&self) -> Option<DateTime<Utc>> {
368        let state = self.state.read().await;
369        state.last_run
370    }
371
372    /// Check if agent is running
373    pub async fn is_running(&self) -> bool {
374        let running = self.running.read().await;
375        *running
376    }
377
378    /// Stop the agent
379    pub async fn stop(&self) {
380        let mut running = self.running.write().await;
381        *running = false;
382    }
383}
384
385impl Default for ArchivalAgent {
386    fn default() -> Self {
387        Self::new()
388    }
389}
390
391/// System prompt for the archival agent
392const ARCHIVAL_SYSTEM_PROMPT: &str = r#"You are an autonomous session archival agent for Chasm.
393
394Your role is to analyze chat sessions and determine which should be archived based on:
3951. Inactivity period (days since last message)
3962. Session size and importance
3973. Content relevance and quality
3984. User-defined policies and tags
399
400When evaluating a session for archival, consider:
401- Is the conversation complete or ongoing?
402- Does it contain important information that should be preserved?
403- Are there pinned or important tags?
404- How much space would archiving save?
405
406Provide clear reasoning for your archival decisions.
407"#;
408
409/// Archival scheduler for periodic runs
410pub struct ArchivalScheduler {
411    /// Agent reference
412    agent: Arc<ArchivalAgent>,
413    /// Run interval
414    interval: Duration,
415    /// Whether scheduler is active
416    active: Arc<RwLock<bool>>,
417}
418
419impl ArchivalScheduler {
420    /// Create a new scheduler
421    pub fn new(agent: Arc<ArchivalAgent>, interval_hours: u32) -> Self {
422        Self {
423            agent,
424            interval: Duration::hours(interval_hours as i64),
425            active: Arc::new(RwLock::new(false)),
426        }
427    }
428
429    /// Start the scheduler
430    /// Note: This currently logs a start message. Full background scheduling
431    /// requires a LocalSet or refactoring ChatDatabase for Send+Sync.
432    pub async fn start(&self) {
433        let mut active = self.active.write().await;
434        *active = true;
435        drop(active);
436
437        // TODO: Implement background scheduling with LocalSet
438        // For now, just mark as active - call run() manually
439        println!(
440            "[ArchivalScheduler] Started with interval {:?}. Call run() to execute.",
441            self.interval
442        );
443    }
444
445    /// Stop the scheduler
446    pub async fn stop(&self) {
447        let mut active = self.active.write().await;
448        *active = false;
449    }
450
451    /// Check if scheduler is active
452    pub async fn is_active(&self) -> bool {
453        let active = self.active.read().await;
454        *active
455    }
456}
457
458#[cfg(test)]
459mod tests {
460    use super::*;
461
462    #[tokio::test]
463    async fn test_archival_agent_creation() {
464        let agent = ArchivalAgent::new();
465        let policies = agent.get_policies().await;
466        assert_eq!(policies.len(), 1);
467        assert_eq!(policies[0].name, "default");
468    }
469
470    #[tokio::test]
471    async fn test_add_remove_policy() {
472        let agent = ArchivalAgent::new();
473
474        let custom_policy = ArchivalPolicy {
475            name: "aggressive".to_string(),
476            inactive_days: 7,
477            ..Default::default()
478        };
479
480        agent.add_policy(custom_policy).await;
481        let policies = agent.get_policies().await;
482        assert_eq!(policies.len(), 2);
483
484        agent.remove_policy("aggressive").await;
485        let policies = agent.get_policies().await;
486        assert_eq!(policies.len(), 1);
487    }
488
489    #[tokio::test]
490    async fn test_evaluate_session() {
491        let agent = ArchivalAgent::new();
492        let decision = agent.evaluate_session("test-session-123").await;
493        assert!(!decision.session_id.is_empty());
494    }
495}