organizational_intelligence_plugin/
storage.rs

1//! Feature Storage with trueno-db
2//!
3//! Implements Section 4.4: Trueno-DB Storage Layer
4//! GPU-first columnar storage using Arrow/Parquet
5//!
6//! UAT: Added JSON persistence for Phase 1
7
8use anyhow::Result;
9use std::path::Path;
10
11use crate::features::CommitFeatures;
12
13/// Feature storage using trueno-db
14///
15/// Phase 1: Basic in-memory storage with file persistence
16/// Phase 2: Full trueno-db integration with GPU-resident data
17pub struct FeatureStore {
18    features: Vec<CommitFeatures>,
19}
20
21impl FeatureStore {
22    /// Create new feature store
23    pub fn new() -> Result<Self> {
24        Ok(Self {
25            features: Vec::new(),
26        })
27    }
28
29    /// Insert single feature
30    pub fn insert(&mut self, feature: CommitFeatures) -> Result<()> {
31        self.features.push(feature);
32        Ok(())
33    }
34
35    /// Bulk insert features (optimized for GPU batch processing)
36    pub fn bulk_insert(&mut self, features: Vec<CommitFeatures>) -> Result<()> {
37        self.features.extend(features);
38        Ok(())
39    }
40
41    /// Query features by defect category
42    pub fn query_by_category(&self, category: u8) -> Result<Vec<&CommitFeatures>> {
43        Ok(self
44            .features
45            .iter()
46            .filter(|f| f.defect_category == category)
47            .collect())
48    }
49
50    /// Query features by time range (for sliding window correlation)
51    ///
52    /// Returns features where start_time <= timestamp < end_time
53    /// Time is in Unix epoch seconds (f64)
54    pub fn query_by_time_range(
55        &self,
56        start_time: f64,
57        end_time: f64,
58    ) -> Result<Vec<&CommitFeatures>> {
59        Ok(self
60            .features
61            .iter()
62            .filter(|f| f.timestamp >= start_time && f.timestamp < end_time)
63            .collect())
64    }
65
66    /// Get all features (for compatibility)
67    pub fn all_features(&self) -> &[CommitFeatures] {
68        &self.features
69    }
70
71    /// Get all features as vectors (for GPU transfer)
72    pub fn to_vectors(&self) -> Vec<Vec<f32>> {
73        self.features.iter().map(|f| f.to_vector()).collect()
74    }
75
76    /// Get feature count
77    pub fn len(&self) -> usize {
78        self.features.len()
79    }
80
81    /// Check if store is empty
82    pub fn is_empty(&self) -> bool {
83        self.features.is_empty()
84    }
85
86    /// Save to file (JSON in Phase 1, Parquet in Phase 2)
87    pub async fn save(&self, path: &Path) -> Result<()> {
88        // Phase 1: JSON persistence
89        let json = serde_json::to_string(&self.features)?;
90        std::fs::write(path, json)?;
91        Ok(())
92    }
93
94    /// Load from file (JSON in Phase 1, Parquet in Phase 2)
95    pub async fn load(path: &Path) -> Result<Self> {
96        // Phase 1: JSON persistence
97        if !path.exists() {
98            return Self::new();
99        }
100        let json = std::fs::read_to_string(path)?;
101        let features: Vec<CommitFeatures> = serde_json::from_str(&json)?;
102        Ok(Self { features })
103    }
104}
105
106impl Default for FeatureStore {
107    fn default() -> Self {
108        Self::new().unwrap()
109    }
110}
111
112#[cfg(test)]
113mod tests {
114    use super::*;
115
116    fn make_test_feature(category: u8, files: usize) -> CommitFeatures {
117        CommitFeatures {
118            defect_category: category,
119            files_changed: files as f32,
120            lines_added: 100.0,
121            lines_deleted: 50.0,
122            complexity_delta: 0.0,
123            timestamp: 1700000000.0,
124            hour_of_day: 14,
125            day_of_week: 2,
126            ..Default::default()
127        }
128    }
129
130    #[test]
131    fn test_store_creation() {
132        let store = FeatureStore::new();
133        assert!(store.is_ok());
134        assert!(store.unwrap().is_empty());
135    }
136
137    #[test]
138    fn test_insert_single() {
139        let mut store = FeatureStore::new().unwrap();
140        let feature = make_test_feature(1, 3);
141
142        store.insert(feature).unwrap();
143
144        assert_eq!(store.len(), 1);
145        assert!(!store.is_empty());
146    }
147
148    #[test]
149    fn test_bulk_insert() {
150        let mut store = FeatureStore::new().unwrap();
151
152        let features = vec![
153            make_test_feature(1, 2),
154            make_test_feature(2, 3),
155            make_test_feature(1, 1),
156        ];
157
158        store.bulk_insert(features).unwrap();
159
160        assert_eq!(store.len(), 3);
161    }
162
163    #[test]
164    fn test_query_by_category() {
165        let mut store = FeatureStore::new().unwrap();
166
167        store
168            .bulk_insert(vec![
169                make_test_feature(1, 2),
170                make_test_feature(2, 3),
171                make_test_feature(1, 1),
172                make_test_feature(3, 5),
173            ])
174            .unwrap();
175
176        let category1 = store.query_by_category(1).unwrap();
177        assert_eq!(category1.len(), 2);
178
179        let category2 = store.query_by_category(2).unwrap();
180        assert_eq!(category2.len(), 1);
181
182        let category9 = store.query_by_category(9).unwrap();
183        assert_eq!(category9.len(), 0);
184    }
185
186    #[test]
187    fn test_to_vectors() {
188        let mut store = FeatureStore::new().unwrap();
189
190        store
191            .bulk_insert(vec![make_test_feature(1, 2), make_test_feature(2, 3)])
192            .unwrap();
193
194        let vectors = store.to_vectors();
195
196        assert_eq!(vectors.len(), 2);
197        assert_eq!(vectors[0].len(), CommitFeatures::DIMENSION);
198        assert_eq!(vectors[0][0], 1.0); // category
199        assert_eq!(vectors[0][1], 2.0); // files
200        assert_eq!(vectors[1][0], 2.0); // category
201        assert_eq!(vectors[1][1], 3.0); // files
202    }
203
204    #[test]
205    fn test_query_by_time_range() {
206        let mut store = FeatureStore::new().unwrap();
207
208        // Insert features with different timestamps
209        let mut f1 = make_test_feature(1, 2);
210        f1.timestamp = 1000.0;
211        let mut f2 = make_test_feature(2, 3);
212        f2.timestamp = 2000.0;
213        let mut f3 = make_test_feature(3, 4);
214        f3.timestamp = 3000.0;
215        let mut f4 = make_test_feature(4, 5);
216        f4.timestamp = 4000.0;
217
218        store.bulk_insert(vec![f1, f2, f3, f4]).unwrap();
219
220        // Query range [2000, 4000) - should get f2 and f3
221        let range_result = store.query_by_time_range(2000.0, 4000.0).unwrap();
222        assert_eq!(range_result.len(), 2);
223        assert_eq!(range_result[0].timestamp, 2000.0);
224        assert_eq!(range_result[1].timestamp, 3000.0);
225
226        // Query range [1000, 2500) - should get f1 and f2
227        let range_result2 = store.query_by_time_range(1000.0, 2500.0).unwrap();
228        assert_eq!(range_result2.len(), 2);
229
230        // Query empty range
231        let empty_range = store.query_by_time_range(5000.0, 6000.0).unwrap();
232        assert_eq!(empty_range.len(), 0);
233    }
234
235    #[tokio::test]
236    async fn test_save_load() {
237        let store = FeatureStore::new().unwrap();
238
239        // Phase 1: save/load are stubs, just verify they compile
240        store.save(Path::new("test.parquet")).await.unwrap();
241        let _loaded = FeatureStore::load(Path::new("test.parquet")).await.unwrap();
242    }
243}