Skip to main content

mdvault_core/index/
derived.rs

1//! Derived index computation.
2//!
3//! This module builds secondary indices from the primary note and link data:
4//! - `temporal_activity`: When notes are referenced in daily notes
5//! - `activity_summary`: Aggregated activity metrics per note
6//! - `note_cooccurrence`: Notes that appear together in daily notes
7
8use chrono::{Duration, NaiveDate, Utc};
9use thiserror::Error;
10
11use super::IndexError;
12use super::db::IndexDb;
13
14/// Errors that can occur during derived index computation.
15#[derive(Debug, Error)]
16pub enum DerivedError {
17    #[error("Index database error: {0}")]
18    Index(#[from] IndexError),
19
20    #[error("Failed to parse date: {0}")]
21    DateParse(String),
22}
23
24/// Statistics from derived index computation.
25#[derive(Debug, Clone, Default)]
26pub struct DerivedStats {
27    /// Number of daily notes processed.
28    pub dailies_processed: usize,
29    /// Number of temporal activity records created.
30    pub activity_records: usize,
31    /// Number of activity summaries computed.
32    pub summaries_computed: usize,
33    /// Number of cooccurrence pairs found.
34    pub cooccurrence_pairs: usize,
35    /// Duration in milliseconds.
36    pub duration_ms: u64,
37}
38
39/// Builder for computing derived indices.
40pub struct DerivedIndexBuilder<'a> {
41    db: &'a IndexDb,
42}
43
44impl<'a> DerivedIndexBuilder<'a> {
45    /// Create a new derived index builder.
46    pub fn new(db: &'a IndexDb) -> Self {
47        Self { db }
48    }
49
50    /// Compute all derived indices.
51    ///
52    /// This should be called after the primary index is built/updated.
53    pub fn compute_all(&self) -> Result<DerivedStats, DerivedError> {
54        let start = std::time::Instant::now();
55        let mut stats = DerivedStats::default();
56
57        // Clear existing derived data
58        self.db.clear_derived_tables()?;
59
60        // Step 1: Build temporal activity from daily notes
61        stats.dailies_processed = self.build_temporal_activity()?;
62
63        // Step 2: Count activity records
64        stats.activity_records = self.db.count_temporal_activity()? as usize;
65
66        // Step 3: Compute activity summaries
67        stats.summaries_computed = self.compute_activity_summaries()?;
68
69        // Step 4: Compute cooccurrence matrix
70        stats.cooccurrence_pairs = self.compute_cooccurrence()?;
71
72        stats.duration_ms = start.elapsed().as_millis() as u64;
73        Ok(stats)
74    }
75
76    /// Build temporal activity records from daily notes.
77    ///
78    /// For each daily note, finds all outgoing links and creates
79    /// temporal_activity records linking the referenced note to the daily.
80    fn build_temporal_activity(&self) -> Result<usize, DerivedError> {
81        // Get all daily notes
82        let dailies = self.db.get_notes_by_type("daily")?;
83        let mut count = 0;
84
85        for daily in &dailies {
86            let daily_id = match daily.id {
87                Some(id) => id,
88                None => continue,
89            };
90
91            // Extract date from the daily note
92            // Daily notes typically have date in frontmatter or path
93            let activity_date = self.extract_daily_date(daily)?;
94
95            // Get all outgoing links from this daily
96            let links = self.db.get_outgoing_links(daily_id)?;
97
98            for link in &links {
99                // Skip self-references and unresolved links
100                if link.target_id.is_none() {
101                    continue;
102                }
103
104                let target_id = link.target_id.unwrap();
105                if target_id == daily_id {
106                    continue; // Skip self-links
107                }
108
109                // Create temporal activity record
110                self.db.insert_temporal_activity(
111                    target_id,
112                    daily_id,
113                    &activity_date,
114                    link.context.as_deref(),
115                )?;
116            }
117
118            count += 1;
119        }
120
121        Ok(count)
122    }
123
124    /// Extract the date from a daily note.
125    fn extract_daily_date(
126        &self,
127        daily: &super::types::IndexedNote,
128    ) -> Result<String, DerivedError> {
129        // Try to get date from frontmatter first
130        if let Some(ref fm_json) = daily.frontmatter_json
131            && let Ok(fm) = serde_json::from_str::<serde_json::Value>(fm_json)
132            && let Some(date) = fm.get("date").and_then(|v| v.as_str())
133        {
134            return Ok(date.to_string());
135        }
136
137        // Fall back to extracting date from path (e.g., "daily/2025-01-15.md")
138        let path_str = daily.path.to_string_lossy();
139        if let Some(date_str) = extract_date_from_path(&path_str) {
140            return Ok(date_str);
141        }
142
143        // Fall back to modified date
144        Ok(daily.modified.format("%Y-%m-%d").to_string())
145    }
146
147    /// Compute activity summaries for all notes.
148    fn compute_activity_summaries(&self) -> Result<usize, DerivedError> {
149        let today = Utc::now().date_naive();
150        let thirty_days_ago = today - Duration::days(30);
151        let ninety_days_ago = today - Duration::days(90);
152
153        // Get aggregated activity data
154        let summaries = self.db.aggregate_activity(
155            &thirty_days_ago.to_string(),
156            &ninety_days_ago.to_string(),
157        )?;
158
159        let mut count = 0;
160        for summary in summaries {
161            // Compute staleness score
162            let staleness = self.compute_staleness_score(
163                summary.last_seen.as_deref(),
164                summary.access_count_30d,
165                summary.access_count_90d,
166            );
167
168            self.db.upsert_activity_summary(
169                summary.note_id,
170                summary.last_seen.as_deref(),
171                summary.access_count_30d,
172                summary.access_count_90d,
173                staleness,
174            )?;
175            count += 1;
176        }
177
178        Ok(count)
179    }
180
181    /// Compute staleness score based on activity patterns.
182    ///
183    /// Score ranges from 0.0 (very active) to 1.0 (very stale).
184    fn compute_staleness_score(
185        &self,
186        last_seen: Option<&str>,
187        count_30d: i32,
188        count_90d: i32,
189    ) -> f64 {
190        let today = Utc::now().date_naive();
191
192        // Days since last seen (default to 365 if never seen)
193        let days_since = last_seen
194            .and_then(|s| NaiveDate::parse_from_str(s, "%Y-%m-%d").ok())
195            .map(|d| (today - d).num_days() as f64)
196            .unwrap_or(365.0);
197
198        // Base staleness from recency (0.0 = today, 1.0 = 90+ days)
199        let recency_score = (days_since / 90.0).min(1.0);
200
201        // Activity factor (more activity = less stale)
202        let activity_factor = if count_30d > 0 {
203            0.0 // Active in last 30 days - not stale
204        } else if count_90d > 0 {
205            0.3 // Active in last 90 days - slightly stale
206        } else {
207            0.6 // No recent activity - more stale
208        };
209
210        // Combined score
211        (recency_score * 0.6 + activity_factor * 0.4).min(1.0)
212    }
213
214    /// Compute note cooccurrence matrix.
215    ///
216    /// Finds pairs of notes that are referenced together in daily notes.
217    fn compute_cooccurrence(&self) -> Result<usize, DerivedError> {
218        // Get cooccurrence data from temporal activity
219        let pairs = self.db.compute_cooccurrence_pairs()?;
220        let mut count = 0;
221
222        for pair in pairs {
223            self.db.upsert_cooccurrence(
224                pair.note_a_id,
225                pair.note_b_id,
226                pair.shared_count,
227                pair.most_recent.as_deref(),
228            )?;
229            count += 1;
230        }
231
232        Ok(count)
233    }
234}
235
236/// Extract a date string (YYYY-MM-DD) from a file path.
237fn extract_date_from_path(path: &str) -> Option<String> {
238    // Look for date patterns in the path
239    let re = regex::Regex::new(r"(\d{4}-\d{2}-\d{2})").ok()?;
240    re.captures(path).map(|c| c[1].to_string())
241}
242
243#[cfg(test)]
244mod tests {
245    use super::*;
246
247    #[test]
248    fn test_extract_date_from_path() {
249        assert_eq!(
250            extract_date_from_path("daily/2025-01-15.md"),
251            Some("2025-01-15".to_string())
252        );
253        assert_eq!(
254            extract_date_from_path("2025-01-15-meeting.md"),
255            Some("2025-01-15".to_string())
256        );
257        assert_eq!(extract_date_from_path("notes/random.md"), None);
258    }
259
260    #[test]
261    fn test_staleness_score() {
262        let builder = DerivedIndexBuilder { db: &IndexDb::open_in_memory().unwrap() };
263
264        // Very active (accessed today, high count)
265        let score = builder.compute_staleness_score(
266            Some(&Utc::now().format("%Y-%m-%d").to_string()),
267            5,
268            10,
269        );
270        assert!(score < 0.1, "Active notes should have low staleness");
271
272        // Never seen: days_since=365, recency_score=1.0, activity_factor=0.6
273        // Combined: 1.0*0.6 + 0.6*0.4 = 0.84
274        let score = builder.compute_staleness_score(None, 0, 0);
275        assert!(score > 0.8, "Never-seen notes should be stale (score: {})", score);
276    }
277}