Skip to main content

do_memory_core/indexing/
mod.rs

1//! Hierarchical indexing module for spatiotemporal episode organization.
2//!
3//! This module provides efficient indexing structures for episode retrieval:
4//!
5//! - **Spatiotemporal Index**: Time-based hierarchical index (year/month/day/hour)
6//! - **Hierarchical Index**: Multi-level index combining domain, task type, and time
7//!
8//! # Architecture
9//!
10//! ```text
11//! HierarchicalIndex
12//! ├── DomainLevelIndex (e.g., "web-api")
13//! │   ├── TaskTypeLevelIndex (e.g., CodeGeneration)
14//! │   │   └── SpatiotemporalIndex (time hierarchy)
15//! │   └── TaskTypeLevelIndex (e.g., Debugging)
16//! │       └── SpatiotemporalIndex
17//! └── DomainLevelIndex (e.g., "data-processing")
18//!     └── ...
19//! ```
20//!
21//! # Performance
22//!
23//! | Operation | Time Complexity | Space Complexity |
24//! |-----------|----------------|------------------|
25//! | Insert    | O(log n)       | O(1) amortized   |
26//! | Query     | O(log n + k)   | O(k)             |
27//! | Remove    | O(log n)       | O(1)             |
28//!
29//! Where n is total episodes and k is results count.
30//!
31//! # Usage Examples
32//!
33//! ## Basic Spatiotemporal Index
34//!
35//! ```
36//! use do_memory_core::indexing::{SpatiotemporalIndex, TimeBucket};
37//! use do_memory_core::{Episode, TaskContext, TaskType};
38//!
39//! let mut index = SpatiotemporalIndex::new();
40//!
41//! // Insert episode
42//! let episode = Episode::new(
43//!     "Test task".to_string(),
44//!     TaskContext::default(),
45//!     TaskType::CodeGeneration,
46//! );
47//! index.insert(&episode);
48//!
49//! // Query by time range
50//! let start = chrono::Utc::now() - chrono::Duration::hours(1);
51//! let end = chrono::Utc::now();
52//! let results = index.query_range(start, end, 100);
53//!
54//! // Query by time bucket
55//! let bucket = TimeBucket::Month { year: 2024, month: 3 };
56//! let results = index.query_bucket(&bucket);
57//! ```
58//!
59//! ## Hierarchical Index
60//!
61//! ```
62//! use do_memory_core::indexing::{HierarchicalIndex, HierarchicalQuery};
63//! use do_memory_core::{Episode, TaskContext, TaskType};
64//!
65//! let mut index = HierarchicalIndex::new();
66//!
67//! // Insert episode
68//! let episode = Episode::new(
69//!     "Test task".to_string(),
70//!     TaskContext::default(),
71//!     TaskType::CodeGeneration,
72//! );
73//! index.insert(&episode);
74//!
75//! // Query with filters
76//! let query = HierarchicalQuery::new()
77//!     .with_domain("web-api")
78//!     .with_task_type(TaskType::CodeGeneration)
79//!     .with_limit(10);
80//! let results = index.query(&query);
81//! ```
82
83pub mod hierarchical;
84pub mod spatiotemporal;
85
86pub use hierarchical::{HierarchicalIndex, HierarchicalIndexStats, HierarchicalQuery};
87pub use spatiotemporal::{IndexStats, QueryOptions, SpatiotemporalIndex, TimeBucket};
88
89use chrono::{DateTime, Utc};
90use uuid::Uuid;
91
92/// Extension trait for integrating indexing with `SelfLearningMemory`.
93///
94/// Provides methods to build and query hierarchical indexes from memory.
95pub trait IndexableMemory {
96    /// Build a hierarchical index from all episodes in memory.
97    ///
98    /// # Returns
99    ///
100    /// A hierarchical index containing all episodes.
101    fn build_hierarchical_index(
102        &self,
103    ) -> impl std::future::Future<Output = HierarchicalIndex> + Send;
104
105    /// Build a spatiotemporal index from all episodes in memory.
106    ///
107    /// # Returns
108    ///
109    /// A spatiotemporal index containing all episodes.
110    fn build_spatiotemporal_index(
111        &self,
112    ) -> impl std::future::Future<Output = SpatiotemporalIndex> + Send;
113
114    /// Query episodes by time range using the index.
115    ///
116    /// # Arguments
117    ///
118    /// * `start` - Start of the time range
119    /// * `end` - End of the time range
120    /// * `limit` - Maximum number of results
121    ///
122    /// # Returns
123    ///
124    /// Vector of episode IDs within the time range.
125    fn query_by_time_range(
126        &self,
127        start: DateTime<Utc>,
128        end: DateTime<Utc>,
129        limit: usize,
130    ) -> impl std::future::Future<Output = Vec<Uuid>> + Send;
131}
132
133/// Performance comparison between indexed and linear scan queries.
134#[derive(Debug, Clone, Copy, PartialEq)]
135pub struct QueryPerformance {
136    /// Time taken by indexed query in microseconds
137    pub indexed_time_us: f64,
138    /// Time taken by linear scan in microseconds
139    pub linear_time_us: f64,
140    /// Speedup factor (`linear_time` / `indexed_time`)
141    pub speedup_factor: f64,
142    /// Number of episodes scanned
143    pub episodes_scanned: usize,
144    /// Number of episodes returned
145    pub results_count: usize,
146}
147
148impl QueryPerformance {
149    /// Calculate the speedup factor.
150    #[must_use]
151    pub fn calculate_speedup(indexed_time_us: f64, linear_time_us: f64) -> f64 {
152        if indexed_time_us > 0.0 {
153            linear_time_us / indexed_time_us
154        } else {
155            1.0
156        }
157    }
158}
159
160/// Index metrics for monitoring and optimization.
161#[derive(Debug, Clone, PartialEq)]
162pub struct IndexMetrics {
163    /// Total number of episodes indexed
164    pub total_episodes: usize,
165    /// Number of domains in the index
166    pub domain_count: usize,
167    /// Number of task types in the index
168    pub task_type_count: usize,
169    /// Memory usage in bytes
170    pub memory_usage_bytes: usize,
171    /// Average query time in microseconds
172    pub avg_query_time_us: f64,
173    /// Query speedup factor vs linear scan
174    pub speedup_factor: f64,
175    /// Cache hit rate (0.0 to 1.0)
176    pub cache_hit_rate: f64,
177}
178
179/// Benchmark result comparing indexed vs linear scan performance.
180#[derive(Debug, Clone, PartialEq)]
181pub struct BenchmarkResult {
182    /// Number of episodes in the dataset
183    pub episode_count: usize,
184    /// Number of queries executed
185    pub query_count: usize,
186    /// Average indexed query time
187    pub avg_indexed_time_us: f64,
188    /// Average linear scan time
189    pub avg_linear_time_us: f64,
190    /// Overall speedup factor
191    pub overall_speedup: f64,
192    /// Memory overhead percentage
193    pub memory_overhead_percent: f64,
194}
195
196#[cfg(test)]
197mod tests {
198    use super::*;
199    use crate::Episode;
200    use crate::types::{ComplexityLevel, TaskContext, TaskType};
201    use chrono::{Datelike, Timelike};
202
203    fn create_test_episode(domain: &str, task_type: TaskType) -> Episode {
204        let context = TaskContext {
205            domain: domain.to_string(),
206            complexity: ComplexityLevel::Simple,
207            tags: vec![],
208            ..Default::default()
209        };
210        Episode::new("Test episode".to_string(), context, task_type)
211    }
212
213    #[test]
214    fn test_query_performance_calculation() {
215        let perf = QueryPerformance {
216            indexed_time_us: 10.0,
217            linear_time_us: 100.0,
218            speedup_factor: QueryPerformance::calculate_speedup(10.0, 100.0),
219            episodes_scanned: 1000,
220            results_count: 10,
221        };
222
223        assert_eq!(perf.speedup_factor, 10.0);
224    }
225
226    #[test]
227    fn test_hierarchical_query_builder() {
228        let query = HierarchicalQuery::new()
229            .with_domain("web-api")
230            .with_task_type(TaskType::CodeGeneration)
231            .with_limit(50);
232
233        assert_eq!(query.domain, Some("web-api".to_string()));
234        assert_eq!(query.task_type, Some(TaskType::CodeGeneration));
235        assert_eq!(query.limit, 50);
236    }
237
238    #[test]
239    fn test_time_bucket_variants() {
240        let year_bucket = TimeBucket::Year(2024);
241        assert_eq!(year_bucket, TimeBucket::Year(2024));
242
243        let month_bucket = TimeBucket::Month {
244            year: 2024,
245            month: 3,
246        };
247        assert_eq!(
248            month_bucket,
249            TimeBucket::Month {
250                year: 2024,
251                month: 3
252            }
253        );
254
255        let day_bucket = TimeBucket::Day {
256            year: 2024,
257            month: 3,
258            day: 15,
259        };
260        assert_eq!(
261            day_bucket,
262            TimeBucket::Day {
263                year: 2024,
264                month: 3,
265                day: 15,
266            }
267        );
268
269        let hour_bucket = TimeBucket::Hour {
270            year: 2024,
271            month: 3,
272            day: 15,
273            hour: 10,
274        };
275        assert_eq!(
276            hour_bucket,
277            TimeBucket::Hour {
278                year: 2024,
279                month: 3,
280                day: 15,
281                hour: 10,
282            }
283        );
284    }
285
286    #[test]
287    fn test_index_metrics() {
288        let metrics = IndexMetrics {
289            total_episodes: 1000,
290            domain_count: 5,
291            task_type_count: 10,
292            memory_usage_bytes: 102_400,
293            avg_query_time_us: 5.5,
294            speedup_factor: 10.0,
295            cache_hit_rate: 0.85,
296        };
297
298        assert_eq!(metrics.total_episodes, 1000);
299        assert_eq!(metrics.speedup_factor, 10.0);
300        assert!((metrics.cache_hit_rate - 0.85).abs() < f64::EPSILON);
301    }
302
303    #[test]
304    fn test_benchmark_result() {
305        let result = BenchmarkResult {
306            episode_count: 10000,
307            query_count: 100,
308            avg_indexed_time_us: 5.0,
309            avg_linear_time_us: 100.0,
310            overall_speedup: 20.0,
311            memory_overhead_percent: 8.5,
312        };
313
314        assert_eq!(result.episode_count, 10000);
315        assert_eq!(result.overall_speedup, 20.0);
316        assert!((result.memory_overhead_percent - 8.5).abs() < f64::EPSILON);
317    }
318
319    #[test]
320    fn test_spatiotemporal_index_basic() {
321        let mut index = SpatiotemporalIndex::new();
322        let episode = create_test_episode("web-api", TaskType::CodeGeneration);
323
324        index.insert(&episode);
325
326        assert_eq!(index.len(), 1);
327        assert!(!index.is_empty());
328
329        let year = episode.start_time.year() as u32;
330        let month = episode.start_time.month() as u8;
331        let day = episode.start_time.day() as u8;
332        let hour = episode.start_time.hour() as u8;
333
334        let results = index.query_hour(year, month, day, hour);
335        assert_eq!(results.len(), 1);
336        assert_eq!(results[0], episode.episode_id);
337    }
338
339    #[test]
340    fn test_hierarchical_index_basic() {
341        let mut index = HierarchicalIndex::new();
342        let episode = create_test_episode("web-api", TaskType::CodeGeneration);
343
344        index.insert(&episode);
345
346        assert_eq!(index.len(), 1);
347        assert_eq!(index.domain_count(), 1);
348
349        let results = index.query_by_domain("web-api", 10);
350        assert_eq!(results.len(), 1);
351        assert_eq!(results[0], episode.episode_id);
352    }
353}