mdvault_core/index/
derived.rs1use chrono::{Duration, NaiveDate, Utc};
9use thiserror::Error;
10
11use super::IndexError;
12use super::db::IndexDb;
13
14#[derive(Debug, Error)]
16pub enum DerivedError {
17 #[error("Index database error: {0}")]
18 Index(#[from] IndexError),
19
20 #[error("Failed to parse date: {0}")]
21 DateParse(String),
22}
23
24#[derive(Debug, Clone, Default)]
26pub struct DerivedStats {
27 pub dailies_processed: usize,
29 pub activity_records: usize,
31 pub summaries_computed: usize,
33 pub cooccurrence_pairs: usize,
35 pub duration_ms: u64,
37}
38
39pub struct DerivedIndexBuilder<'a> {
41 db: &'a IndexDb,
42}
43
44impl<'a> DerivedIndexBuilder<'a> {
45 pub fn new(db: &'a IndexDb) -> Self {
47 Self { db }
48 }
49
50 pub fn compute_all(&self) -> Result<DerivedStats, DerivedError> {
54 let start = std::time::Instant::now();
55 let mut stats = DerivedStats::default();
56
57 self.db.clear_derived_tables()?;
59
60 stats.dailies_processed = self.build_temporal_activity()?;
62
63 stats.activity_records = self.db.count_temporal_activity()? as usize;
65
66 stats.summaries_computed = self.compute_activity_summaries()?;
68
69 stats.cooccurrence_pairs = self.compute_cooccurrence()?;
71
72 stats.duration_ms = start.elapsed().as_millis() as u64;
73 Ok(stats)
74 }
75
76 fn build_temporal_activity(&self) -> Result<usize, DerivedError> {
81 let dailies = self.db.get_notes_by_type("daily")?;
83 let mut count = 0;
84
85 for daily in &dailies {
86 let daily_id = match daily.id {
87 Some(id) => id,
88 None => continue,
89 };
90
91 let activity_date = self.extract_daily_date(daily)?;
94
95 let links = self.db.get_outgoing_links(daily_id)?;
97
98 for link in &links {
99 if link.target_id.is_none() {
101 continue;
102 }
103
104 let target_id = link.target_id.unwrap();
105 if target_id == daily_id {
106 continue; }
108
109 self.db.insert_temporal_activity(
111 target_id,
112 daily_id,
113 &activity_date,
114 link.context.as_deref(),
115 )?;
116 }
117
118 count += 1;
119 }
120
121 Ok(count)
122 }
123
124 fn extract_daily_date(
126 &self,
127 daily: &super::types::IndexedNote,
128 ) -> Result<String, DerivedError> {
129 if let Some(ref fm_json) = daily.frontmatter_json
131 && let Ok(fm) = serde_json::from_str::<serde_json::Value>(fm_json)
132 && let Some(date) = fm.get("date").and_then(|v| v.as_str())
133 {
134 return Ok(date.to_string());
135 }
136
137 let path_str = daily.path.to_string_lossy();
139 if let Some(date_str) = extract_date_from_path(&path_str) {
140 return Ok(date_str);
141 }
142
143 Ok(daily.modified.format("%Y-%m-%d").to_string())
145 }
146
147 fn compute_activity_summaries(&self) -> Result<usize, DerivedError> {
149 let today = Utc::now().date_naive();
150 let thirty_days_ago = today - Duration::days(30);
151 let ninety_days_ago = today - Duration::days(90);
152
153 let summaries = self.db.aggregate_activity(
155 &thirty_days_ago.to_string(),
156 &ninety_days_ago.to_string(),
157 )?;
158
159 let mut count = 0;
160 for summary in summaries {
161 let staleness = self.compute_staleness_score(
163 summary.last_seen.as_deref(),
164 summary.access_count_30d,
165 summary.access_count_90d,
166 );
167
168 self.db.upsert_activity_summary(
169 summary.note_id,
170 summary.last_seen.as_deref(),
171 summary.access_count_30d,
172 summary.access_count_90d,
173 staleness,
174 )?;
175 count += 1;
176 }
177
178 Ok(count)
179 }
180
181 fn compute_staleness_score(
185 &self,
186 last_seen: Option<&str>,
187 count_30d: i32,
188 count_90d: i32,
189 ) -> f64 {
190 let today = Utc::now().date_naive();
191
192 let days_since = last_seen
194 .and_then(|s| NaiveDate::parse_from_str(s, "%Y-%m-%d").ok())
195 .map(|d| (today - d).num_days() as f64)
196 .unwrap_or(365.0);
197
198 let recency_score = (days_since / 90.0).min(1.0);
200
201 let activity_factor = if count_30d > 0 {
203 0.0 } else if count_90d > 0 {
205 0.3 } else {
207 0.6 };
209
210 (recency_score * 0.6 + activity_factor * 0.4).min(1.0)
212 }
213
214 fn compute_cooccurrence(&self) -> Result<usize, DerivedError> {
218 let pairs = self.db.compute_cooccurrence_pairs()?;
220 let mut count = 0;
221
222 for pair in pairs {
223 self.db.upsert_cooccurrence(
224 pair.note_a_id,
225 pair.note_b_id,
226 pair.shared_count,
227 pair.most_recent.as_deref(),
228 )?;
229 count += 1;
230 }
231
232 Ok(count)
233 }
234}
235
236fn extract_date_from_path(path: &str) -> Option<String> {
238 let re = regex::Regex::new(r"(\d{4}-\d{2}-\d{2})").ok()?;
240 re.captures(path).map(|c| c[1].to_string())
241}
242
243#[cfg(test)]
244mod tests {
245 use super::*;
246
247 #[test]
248 fn test_extract_date_from_path() {
249 assert_eq!(
250 extract_date_from_path("daily/2025-01-15.md"),
251 Some("2025-01-15".to_string())
252 );
253 assert_eq!(
254 extract_date_from_path("2025-01-15-meeting.md"),
255 Some("2025-01-15".to_string())
256 );
257 assert_eq!(extract_date_from_path("notes/random.md"), None);
258 }
259
260 #[test]
261 fn test_staleness_score() {
262 let builder = DerivedIndexBuilder { db: &IndexDb::open_in_memory().unwrap() };
263
264 let score = builder.compute_staleness_score(
266 Some(&Utc::now().format("%Y-%m-%d").to_string()),
267 5,
268 10,
269 );
270 assert!(score < 0.1, "Active notes should have low staleness");
271
272 let score = builder.compute_staleness_score(None, 0, 0);
275 assert!(score > 0.8, "Never-seen notes should be stale (score: {})", score);
276 }
277}