1use crate::hnsw::VectorIndex;
7use std::time::{Duration, Instant};
8
9#[derive(Debug, Clone, Copy, PartialEq, Eq)]
11pub enum HealthStatus {
12 Healthy,
14 Warning,
16 Degraded,
18 Critical,
20}
21
22#[derive(Debug, Clone)]
24pub struct DiagnosticReport {
25 pub status: HealthStatus,
27 pub size: usize,
29 pub memory_usage: usize,
31 pub issues: Vec<DiagnosticIssue>,
33 pub recommendations: Vec<String>,
35 pub performance: PerformanceMetrics,
37}
38
39#[derive(Debug, Clone)]
41pub struct DiagnosticIssue {
42 pub severity: IssueSeverity,
44 pub category: IssueCategory,
46 pub description: String,
48 pub suggested_fix: Option<String>,
50}
51
52#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
54pub enum IssueSeverity {
55 Info,
57 Warning,
59 Error,
61 Critical,
63}
64
65#[derive(Debug, Clone, Copy, PartialEq, Eq)]
67pub enum IssueCategory {
68 Memory,
70 Performance,
72 Configuration,
74 DataQuality,
76 IndexStructure,
78}
79
80#[derive(Debug, Clone)]
82pub struct PerformanceMetrics {
83 pub avg_query_latency: Option<Duration>,
85 pub cache_hit_rate: Option<f32>,
87 pub estimated_qps: Option<f32>,
89}
90
91pub fn diagnose_index(index: &VectorIndex) -> DiagnosticReport {
93 let mut issues = Vec::new();
94 let mut recommendations = Vec::new();
95
96 let size = index.len();
97 let dimension = index.dimension();
98
99 let vector_memory = size * dimension * 4;
103 let graph_memory = size * 16 * 8; let overhead = size * 100; let memory_usage = vector_memory + graph_memory + overhead;
106
107 if size == 0 {
109 issues.push(DiagnosticIssue {
110 severity: IssueSeverity::Warning,
111 category: IssueCategory::IndexStructure,
112 description: "Index is empty".to_string(),
113 suggested_fix: Some("Add vectors to the index before querying".to_string()),
114 });
115 } else if size > 10_000_000 {
116 issues.push(DiagnosticIssue {
117 severity: IssueSeverity::Warning,
118 category: IssueCategory::Performance,
119 description: format!("Very large index ({} vectors)", size),
120 suggested_fix: Some("Consider using DiskANN for datasets > 10M vectors".to_string()),
121 });
122 recommendations
123 .push("Consider partitioning the index or using distributed search".to_string());
124 }
125
126 if memory_usage > 10 * 1024 * 1024 * 1024 {
128 issues.push(DiagnosticIssue {
130 severity: IssueSeverity::Warning,
131 category: IssueCategory::Memory,
132 description: format!("High memory usage: ~{:.2} GB", memory_usage as f64 / 1e9),
133 suggested_fix: Some("Consider using quantization or DiskANN".to_string()),
134 });
135 }
136
137 if dimension > 2048 {
139 issues.push(DiagnosticIssue {
140 severity: IssueSeverity::Info,
141 category: IssueCategory::Performance,
142 description: format!("High dimensionality: {}", dimension),
143 suggested_fix: Some("Consider dimensionality reduction or PCA".to_string()),
144 });
145 recommendations
146 .push("High-dimensional vectors may benefit from dimensionality reduction".to_string());
147 }
148
149 let status = if issues.iter().any(|i| i.severity == IssueSeverity::Critical) {
151 HealthStatus::Critical
152 } else if issues.iter().any(|i| i.severity == IssueSeverity::Error) {
153 HealthStatus::Degraded
154 } else if issues.iter().any(|i| i.severity == IssueSeverity::Warning) {
155 HealthStatus::Warning
156 } else {
157 HealthStatus::Healthy
158 };
159
160 DiagnosticReport {
161 status,
162 size,
163 memory_usage,
164 issues,
165 recommendations,
166 performance: PerformanceMetrics {
167 avg_query_latency: None,
168 cache_hit_rate: None,
169 estimated_qps: None,
170 },
171 }
172}
173
174pub struct SearchProfiler {
176 start_time: Instant,
177 query_count: usize,
178 total_duration: Duration,
179 min_latency: Option<Duration>,
180 max_latency: Option<Duration>,
181}
182
183impl SearchProfiler {
184 pub fn new() -> Self {
186 Self {
187 start_time: Instant::now(),
188 query_count: 0,
189 total_duration: Duration::from_secs(0),
190 min_latency: None,
191 max_latency: None,
192 }
193 }
194
195 pub fn record_query(&mut self, duration: Duration) {
197 self.query_count += 1;
198 self.total_duration += duration;
199
200 self.min_latency = Some(match self.min_latency {
201 Some(min) => min.min(duration),
202 None => duration,
203 });
204
205 self.max_latency = Some(match self.max_latency {
206 Some(max) => max.max(duration),
207 None => duration,
208 });
209 }
210
211 pub fn stats(&self) -> ProfilerStats {
213 let avg_latency = if self.query_count > 0 {
214 self.total_duration / self.query_count as u32
215 } else {
216 Duration::from_secs(0)
217 };
218
219 let elapsed = self.start_time.elapsed();
220 let qps = if elapsed.as_secs() > 0 {
221 self.query_count as f64 / elapsed.as_secs_f64()
222 } else {
223 0.0
224 };
225
226 ProfilerStats {
227 total_queries: self.query_count,
228 avg_latency,
229 min_latency: self.min_latency,
230 max_latency: self.max_latency,
231 qps,
232 elapsed,
233 }
234 }
235
236 pub fn reset(&mut self) {
238 self.start_time = Instant::now();
239 self.query_count = 0;
240 self.total_duration = Duration::from_secs(0);
241 self.min_latency = None;
242 self.max_latency = None;
243 }
244}
245
246impl Default for SearchProfiler {
247 fn default() -> Self {
248 Self::new()
249 }
250}
251
252#[derive(Debug, Clone)]
254pub struct ProfilerStats {
255 pub total_queries: usize,
257 pub avg_latency: Duration,
259 pub min_latency: Option<Duration>,
261 pub max_latency: Option<Duration>,
263 pub qps: f64,
265 pub elapsed: Duration,
267}
268
269pub struct HealthMonitor {
271 last_report: Option<DiagnosticReport>,
273 last_check: Option<Instant>,
275 check_interval: Duration,
277}
278
279impl HealthMonitor {
280 pub fn new(check_interval: Duration) -> Self {
282 Self {
283 last_report: None,
284 last_check: None,
285 check_interval,
286 }
287 }
288
289 pub fn should_check(&self) -> bool {
291 match self.last_check {
292 Some(last) => last.elapsed() >= self.check_interval,
293 None => true,
294 }
295 }
296
297 pub fn check(&mut self, index: &VectorIndex) -> &DiagnosticReport {
299 self.last_report = Some(diagnose_index(index));
300 self.last_check = Some(Instant::now());
301 self.last_report.as_ref().unwrap()
302 }
303
304 pub fn last_report(&self) -> Option<&DiagnosticReport> {
306 self.last_report.as_ref()
307 }
308
309 pub fn time_since_last_check(&self) -> Option<Duration> {
311 self.last_check.map(|t| t.elapsed())
312 }
313}
314
315impl Default for HealthMonitor {
316 fn default() -> Self {
317 Self::new(Duration::from_secs(300)) }
319}
320
321#[cfg(test)]
322mod tests {
323 use super::*;
324
325 #[test]
326 fn test_diagnose_empty_index() {
327 let index = VectorIndex::with_defaults(128).unwrap();
328 let report = diagnose_index(&index);
329
330 assert_eq!(report.size, 0);
331 assert!(!report.issues.is_empty());
332 assert!(report
333 .issues
334 .iter()
335 .any(|i| i.category == IssueCategory::IndexStructure));
336 }
337
338 #[test]
339 fn test_diagnose_normal_index() {
340 let mut index = VectorIndex::with_defaults(128).unwrap();
341 let cid: ipfrs_core::Cid = "bafybeigdyrzt5sfp7udm7hu76uh7y26nf3efuylqabf3oclgtqy55fbzdi"
342 .parse()
343 .unwrap();
344 index.insert(&cid, &vec![0.1; 128]).unwrap();
345
346 let report = diagnose_index(&index);
347
348 assert_eq!(report.size, 1);
349 assert!(report.status == HealthStatus::Healthy || report.status == HealthStatus::Warning);
350 }
351
352 #[test]
353 fn test_search_profiler() {
354 let mut profiler = SearchProfiler::new();
355
356 profiler.record_query(Duration::from_millis(10));
357 profiler.record_query(Duration::from_millis(20));
358 profiler.record_query(Duration::from_millis(15));
359
360 let stats = profiler.stats();
361
362 assert_eq!(stats.total_queries, 3);
363 assert!(stats.avg_latency.as_millis() >= 10);
364 assert!(stats.avg_latency.as_millis() <= 20);
365 assert_eq!(stats.min_latency, Some(Duration::from_millis(10)));
366 assert_eq!(stats.max_latency, Some(Duration::from_millis(20)));
367 }
368
369 #[test]
370 fn test_health_monitor() {
371 let mut monitor = HealthMonitor::new(Duration::from_millis(100));
372 let index = VectorIndex::with_defaults(128).unwrap();
373
374 assert!(monitor.should_check());
375
376 monitor.check(&index);
377
378 assert!(!monitor.should_check());
379 assert!(monitor.last_report().is_some());
380
381 std::thread::sleep(Duration::from_millis(150));
382 assert!(monitor.should_check());
383 }
384
385 #[test]
386 fn test_profiler_reset() {
387 let mut profiler = SearchProfiler::new();
388
389 profiler.record_query(Duration::from_millis(10));
390 profiler.record_query(Duration::from_millis(20));
391
392 assert_eq!(profiler.stats().total_queries, 2);
393
394 profiler.reset();
395
396 assert_eq!(profiler.stats().total_queries, 0);
397 assert_eq!(profiler.stats().min_latency, None);
398 assert_eq!(profiler.stats().max_latency, None);
399 }
400}