do_memory_core/indexing/mod.rs
1//! Hierarchical indexing module for spatiotemporal episode organization.
2//!
3//! This module provides efficient indexing structures for episode retrieval:
4//!
5//! - **Spatiotemporal Index**: Time-based hierarchical index (year/month/day/hour)
6//! - **Hierarchical Index**: Multi-level index combining domain, task type, and time
7//!
8//! # Architecture
9//!
10//! ```text
11//! HierarchicalIndex
12//! ├── DomainLevelIndex (e.g., "web-api")
13//! │ ├── TaskTypeLevelIndex (e.g., CodeGeneration)
14//! │ │ └── SpatiotemporalIndex (time hierarchy)
15//! │ └── TaskTypeLevelIndex (e.g., Debugging)
16//! │ └── SpatiotemporalIndex
17//! └── DomainLevelIndex (e.g., "data-processing")
18//! └── ...
19//! ```
20//!
21//! # Performance
22//!
23//! | Operation | Time Complexity | Space Complexity |
24//! |-----------|----------------|------------------|
25//! | Insert | O(log n) | O(1) amortized |
26//! | Query | O(log n + k) | O(k) |
27//! | Remove | O(log n) | O(1) |
28//!
29//! Where n is total episodes and k is results count.
30//!
31//! # Usage Examples
32//!
33//! ## Basic Spatiotemporal Index
34//!
35//! ```
36//! use do_memory_core::indexing::{SpatiotemporalIndex, TimeBucket};
37//! use do_memory_core::{Episode, TaskContext, TaskType};
38//!
39//! let mut index = SpatiotemporalIndex::new();
40//!
41//! // Insert episode
42//! let episode = Episode::new(
43//! "Test task".to_string(),
44//! TaskContext::default(),
45//! TaskType::CodeGeneration,
46//! );
47//! index.insert(&episode);
48//!
49//! // Query by time range
50//! let start = chrono::Utc::now() - chrono::Duration::hours(1);
51//! let end = chrono::Utc::now();
52//! let results = index.query_range(start, end, 100);
53//!
54//! // Query by time bucket
55//! let bucket = TimeBucket::Month { year: 2024, month: 3 };
56//! let results = index.query_bucket(&bucket);
57//! ```
58//!
59//! ## Hierarchical Index
60//!
61//! ```
62//! use do_memory_core::indexing::{HierarchicalIndex, HierarchicalQuery};
63//! use do_memory_core::{Episode, TaskContext, TaskType};
64//!
65//! let mut index = HierarchicalIndex::new();
66//!
67//! // Insert episode
68//! let episode = Episode::new(
69//! "Test task".to_string(),
70//! TaskContext::default(),
71//! TaskType::CodeGeneration,
72//! );
73//! index.insert(&episode);
74//!
75//! // Query with filters
76//! let query = HierarchicalQuery::new()
77//! .with_domain("web-api")
78//! .with_task_type(TaskType::CodeGeneration)
79//! .with_limit(10);
80//! let results = index.query(&query);
81//! ```
82
83pub mod hierarchical;
84pub mod spatiotemporal;
85
86pub use hierarchical::{HierarchicalIndex, HierarchicalIndexStats, HierarchicalQuery};
87pub use spatiotemporal::{IndexStats, QueryOptions, SpatiotemporalIndex, TimeBucket};
88
89use chrono::{DateTime, Utc};
90use uuid::Uuid;
91
92/// Extension trait for integrating indexing with `SelfLearningMemory`.
93///
94/// Provides methods to build and query hierarchical indexes from memory.
95pub trait IndexableMemory {
96 /// Build a hierarchical index from all episodes in memory.
97 ///
98 /// # Returns
99 ///
100 /// A hierarchical index containing all episodes.
101 fn build_hierarchical_index(
102 &self,
103 ) -> impl std::future::Future<Output = HierarchicalIndex> + Send;
104
105 /// Build a spatiotemporal index from all episodes in memory.
106 ///
107 /// # Returns
108 ///
109 /// A spatiotemporal index containing all episodes.
110 fn build_spatiotemporal_index(
111 &self,
112 ) -> impl std::future::Future<Output = SpatiotemporalIndex> + Send;
113
114 /// Query episodes by time range using the index.
115 ///
116 /// # Arguments
117 ///
118 /// * `start` - Start of the time range
119 /// * `end` - End of the time range
120 /// * `limit` - Maximum number of results
121 ///
122 /// # Returns
123 ///
124 /// Vector of episode IDs within the time range.
125 fn query_by_time_range(
126 &self,
127 start: DateTime<Utc>,
128 end: DateTime<Utc>,
129 limit: usize,
130 ) -> impl std::future::Future<Output = Vec<Uuid>> + Send;
131}
132
133/// Performance comparison between indexed and linear scan queries.
134#[derive(Debug, Clone, Copy, PartialEq)]
135pub struct QueryPerformance {
136 /// Time taken by indexed query in microseconds
137 pub indexed_time_us: f64,
138 /// Time taken by linear scan in microseconds
139 pub linear_time_us: f64,
140 /// Speedup factor (`linear_time` / `indexed_time`)
141 pub speedup_factor: f64,
142 /// Number of episodes scanned
143 pub episodes_scanned: usize,
144 /// Number of episodes returned
145 pub results_count: usize,
146}
147
148impl QueryPerformance {
149 /// Calculate the speedup factor.
150 #[must_use]
151 pub fn calculate_speedup(indexed_time_us: f64, linear_time_us: f64) -> f64 {
152 if indexed_time_us > 0.0 {
153 linear_time_us / indexed_time_us
154 } else {
155 1.0
156 }
157 }
158}
159
160/// Index metrics for monitoring and optimization.
161#[derive(Debug, Clone, PartialEq)]
162pub struct IndexMetrics {
163 /// Total number of episodes indexed
164 pub total_episodes: usize,
165 /// Number of domains in the index
166 pub domain_count: usize,
167 /// Number of task types in the index
168 pub task_type_count: usize,
169 /// Memory usage in bytes
170 pub memory_usage_bytes: usize,
171 /// Average query time in microseconds
172 pub avg_query_time_us: f64,
173 /// Query speedup factor vs linear scan
174 pub speedup_factor: f64,
175 /// Cache hit rate (0.0 to 1.0)
176 pub cache_hit_rate: f64,
177}
178
179/// Benchmark result comparing indexed vs linear scan performance.
180#[derive(Debug, Clone, PartialEq)]
181pub struct BenchmarkResult {
182 /// Number of episodes in the dataset
183 pub episode_count: usize,
184 /// Number of queries executed
185 pub query_count: usize,
186 /// Average indexed query time
187 pub avg_indexed_time_us: f64,
188 /// Average linear scan time
189 pub avg_linear_time_us: f64,
190 /// Overall speedup factor
191 pub overall_speedup: f64,
192 /// Memory overhead percentage
193 pub memory_overhead_percent: f64,
194}
195
196#[cfg(test)]
197mod tests {
198 use super::*;
199 use crate::Episode;
200 use crate::types::{ComplexityLevel, TaskContext, TaskType};
201 use chrono::{Datelike, Timelike};
202
203 fn create_test_episode(domain: &str, task_type: TaskType) -> Episode {
204 let context = TaskContext {
205 domain: domain.to_string(),
206 complexity: ComplexityLevel::Simple,
207 tags: vec![],
208 ..Default::default()
209 };
210 Episode::new("Test episode".to_string(), context, task_type)
211 }
212
213 #[test]
214 fn test_query_performance_calculation() {
215 let perf = QueryPerformance {
216 indexed_time_us: 10.0,
217 linear_time_us: 100.0,
218 speedup_factor: QueryPerformance::calculate_speedup(10.0, 100.0),
219 episodes_scanned: 1000,
220 results_count: 10,
221 };
222
223 assert_eq!(perf.speedup_factor, 10.0);
224 }
225
226 #[test]
227 fn test_hierarchical_query_builder() {
228 let query = HierarchicalQuery::new()
229 .with_domain("web-api")
230 .with_task_type(TaskType::CodeGeneration)
231 .with_limit(50);
232
233 assert_eq!(query.domain, Some("web-api".to_string()));
234 assert_eq!(query.task_type, Some(TaskType::CodeGeneration));
235 assert_eq!(query.limit, 50);
236 }
237
238 #[test]
239 fn test_time_bucket_variants() {
240 let year_bucket = TimeBucket::Year(2024);
241 assert_eq!(year_bucket, TimeBucket::Year(2024));
242
243 let month_bucket = TimeBucket::Month {
244 year: 2024,
245 month: 3,
246 };
247 assert_eq!(
248 month_bucket,
249 TimeBucket::Month {
250 year: 2024,
251 month: 3
252 }
253 );
254
255 let day_bucket = TimeBucket::Day {
256 year: 2024,
257 month: 3,
258 day: 15,
259 };
260 assert_eq!(
261 day_bucket,
262 TimeBucket::Day {
263 year: 2024,
264 month: 3,
265 day: 15,
266 }
267 );
268
269 let hour_bucket = TimeBucket::Hour {
270 year: 2024,
271 month: 3,
272 day: 15,
273 hour: 10,
274 };
275 assert_eq!(
276 hour_bucket,
277 TimeBucket::Hour {
278 year: 2024,
279 month: 3,
280 day: 15,
281 hour: 10,
282 }
283 );
284 }
285
286 #[test]
287 fn test_index_metrics() {
288 let metrics = IndexMetrics {
289 total_episodes: 1000,
290 domain_count: 5,
291 task_type_count: 10,
292 memory_usage_bytes: 102_400,
293 avg_query_time_us: 5.5,
294 speedup_factor: 10.0,
295 cache_hit_rate: 0.85,
296 };
297
298 assert_eq!(metrics.total_episodes, 1000);
299 assert_eq!(metrics.speedup_factor, 10.0);
300 assert!((metrics.cache_hit_rate - 0.85).abs() < f64::EPSILON);
301 }
302
303 #[test]
304 fn test_benchmark_result() {
305 let result = BenchmarkResult {
306 episode_count: 10000,
307 query_count: 100,
308 avg_indexed_time_us: 5.0,
309 avg_linear_time_us: 100.0,
310 overall_speedup: 20.0,
311 memory_overhead_percent: 8.5,
312 };
313
314 assert_eq!(result.episode_count, 10000);
315 assert_eq!(result.overall_speedup, 20.0);
316 assert!((result.memory_overhead_percent - 8.5).abs() < f64::EPSILON);
317 }
318
319 #[test]
320 fn test_spatiotemporal_index_basic() {
321 let mut index = SpatiotemporalIndex::new();
322 let episode = create_test_episode("web-api", TaskType::CodeGeneration);
323
324 index.insert(&episode);
325
326 assert_eq!(index.len(), 1);
327 assert!(!index.is_empty());
328
329 let year = episode.start_time.year() as u32;
330 let month = episode.start_time.month() as u8;
331 let day = episode.start_time.day() as u8;
332 let hour = episode.start_time.hour() as u8;
333
334 let results = index.query_hour(year, month, day, hour);
335 assert_eq!(results.len(), 1);
336 assert_eq!(results[0], episode.episode_id);
337 }
338
339 #[test]
340 fn test_hierarchical_index_basic() {
341 let mut index = HierarchicalIndex::new();
342 let episode = create_test_episode("web-api", TaskType::CodeGeneration);
343
344 index.insert(&episode);
345
346 assert_eq!(index.len(), 1);
347 assert_eq!(index.domain_count(), 1);
348
349 let results = index.query_by_domain("web-api", 10);
350 assert_eq!(results.len(), 1);
351 assert_eq!(results[0], episode.episode_id);
352 }
353}