1use prometheus::{
13 Histogram, HistogramOpts, HistogramVec, IntCounter, IntCounterVec, IntGauge, IntGaugeVec, Opts,
14 Registry,
15};
16use std::sync::{LazyLock, OnceLock};
17
18static METRICS_INIT: OnceLock<Result<(), MetricsError>> = OnceLock::new();
20
21#[derive(Debug, Clone)]
23pub struct MetricsError {
24 pub message: String,
25}
26
27impl std::fmt::Display for MetricsError {
28 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
29 write!(f, "Metrics initialization failed: {}", self.message)
30 }
31}
32
33impl std::error::Error for MetricsError {}
34
35fn latency_histogram_opts(name: &str, help: &str) -> HistogramOpts {
37 HistogramOpts::new(name, help).buckets(vec![
38 0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1.0, 2.5, 5.0,
39 ])
40}
41
42fn fast_histogram_opts(name: &str, help: &str) -> HistogramOpts {
44 HistogramOpts::new(name, help).buckets(vec![
45 0.0001, 0.0005, 0.001, 0.0025, 0.005, 0.01, 0.025, 0.05,
46 ])
47}
48
49pub static METRICS_REGISTRY: LazyLock<Registry> = LazyLock::new(Registry::new);
51
52pub static HTTP_REQUEST_DURATION: LazyLock<HistogramVec> = LazyLock::new(|| {
58 HistogramVec::new(
59 latency_histogram_opts(
60 "shodh_http_request_duration_seconds",
61 "HTTP request duration in seconds",
62 ),
63 &["method", "endpoint", "status"],
64 )
65 .expect("HTTP_REQUEST_DURATION metric must be valid at compile time")
66});
67
68pub static HTTP_REQUESTS_TOTAL: LazyLock<IntCounterVec> = LazyLock::new(|| {
70 IntCounterVec::new(
71 Opts::new("shodh_http_requests_total", "Total HTTP requests"),
72 &["method", "endpoint", "status"],
73 )
74 .expect("HTTP_REQUESTS_TOTAL metric must be valid at compile time")
75});
76
77pub static MEMORY_STORE_TOTAL: LazyLock<IntCounterVec> = LazyLock::new(|| {
84 IntCounterVec::new(
85 Opts::new("shodh_memory_store_total", "Total memory store operations"),
86 &["result"],
87 )
88 .expect("MEMORY_STORE_TOTAL metric must be valid at compile time")
89});
90
91pub static MEMORY_STORE_DURATION: LazyLock<Histogram> = LazyLock::new(|| {
93 Histogram::with_opts(
94 HistogramOpts::new(
95 "shodh_memory_store_duration_seconds",
96 "Memory store operation duration",
97 )
98 .buckets(vec![0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5]),
99 )
100 .expect("MEMORY_STORE_DURATION metric must be valid at compile time")
101});
102
103pub static MEMORY_RETRIEVE_TOTAL: LazyLock<IntCounterVec> = LazyLock::new(|| {
105 IntCounterVec::new(
106 Opts::new(
107 "shodh_memory_retrieve_total",
108 "Total memory retrieve operations",
109 ),
110 &["retrieval_mode", "result"],
111 )
112 .expect("MEMORY_RETRIEVE_TOTAL metric must be valid at compile time")
113});
114
115pub static MEMORY_RETRIEVE_DURATION: LazyLock<HistogramVec> = LazyLock::new(|| {
117 HistogramVec::new(
118 HistogramOpts::new(
119 "shodh_memory_retrieve_duration_seconds",
120 "Memory retrieve operation duration",
121 )
122 .buckets(vec![0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1.0]),
123 &["retrieval_mode"],
124 )
125 .expect("MEMORY_RETRIEVE_DURATION metric must be valid at compile time")
126});
127
128pub static MEMORY_RETRIEVE_RESULTS: LazyLock<HistogramVec> = LazyLock::new(|| {
130 HistogramVec::new(
131 HistogramOpts::new(
132 "shodh_memory_retrieve_results",
133 "Number of results returned per query",
134 )
135 .buckets(vec![0.0, 1.0, 5.0, 10.0, 25.0, 50.0, 100.0]),
136 &["retrieval_mode"],
137 )
138 .expect("MEMORY_RETRIEVE_RESULTS metric must be valid at compile time")
139});
140
141pub static EMBEDDING_GENERATE_TOTAL: LazyLock<IntCounterVec> = LazyLock::new(|| {
147 IntCounterVec::new(
148 Opts::new(
149 "shodh_embedding_generate_total",
150 "Total embedding generations",
151 ),
152 &["mode", "result"], )
154 .expect("EMBEDDING_GENERATE_TOTAL metric must be valid at compile time")
155});
156
157pub static EMBEDDING_GENERATE_DURATION: LazyLock<HistogramVec> = LazyLock::new(|| {
159 HistogramVec::new(
160 HistogramOpts::new(
161 "shodh_embedding_generate_duration_seconds",
162 "Embedding generation duration",
163 )
164 .buckets(vec![
165 0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1.0, 5.0,
166 ]),
167 &["mode"],
168 )
169 .expect("EMBEDDING_GENERATE_DURATION metric must be valid at compile time")
170});
171
172pub static EMBEDDING_TIMEOUT_TOTAL: LazyLock<IntCounter> = LazyLock::new(|| {
174 IntCounter::new(
175 "shodh_embedding_timeout_total",
176 "Total embedding generation timeouts",
177 )
178 .expect("EMBEDDING_TIMEOUT_TOTAL metric must be valid at compile time")
179});
180
181pub static NER_LOCK_TIMEOUT_TOTAL: LazyLock<IntCounter> = LazyLock::new(|| {
183 IntCounter::new(
184 "shodh_ner_lock_timeout_total",
185 "Total NER session lock timeouts (degraded entity extraction)",
186 )
187 .expect("NER_LOCK_TIMEOUT_TOTAL metric must be valid at compile time")
188});
189
190pub static ACTIVE_USERS: LazyLock<IntGauge> = LazyLock::new(|| {
196 IntGauge::new(
197 "shodh_active_users",
198 "Number of users with active memory sessions",
199 )
200 .expect("ACTIVE_USERS metric must be valid at compile time")
201});
202
203pub static MEMORIES_BY_TIER: LazyLock<IntGaugeVec> = LazyLock::new(|| {
205 IntGaugeVec::new(
206 Opts::new("shodh_memories_by_tier", "Total memories by tier"),
207 &["tier"], )
209 .expect("MEMORIES_BY_TIER metric must be valid at compile time")
210});
211
212pub static MEMORY_HEAP_BYTES_TOTAL: LazyLock<IntGauge> = LazyLock::new(|| {
214 IntGauge::new(
215 "shodh_memory_heap_bytes_total",
216 "Total estimated heap usage across all users",
217 )
218 .expect("MEMORY_HEAP_BYTES_TOTAL metric must be valid at compile time")
219});
220
221pub static VECTOR_INDEX_SIZE_TOTAL: LazyLock<IntGauge> = LazyLock::new(|| {
227 IntGauge::new(
228 "shodh_vector_index_size_total",
229 "Total number of vectors in all indices",
230 )
231 .expect("VECTOR_INDEX_SIZE_TOTAL metric must be valid at compile time")
232});
233
234pub static VECTOR_SEARCH_TOTAL: LazyLock<IntCounterVec> = LazyLock::new(|| {
236 IntCounterVec::new(
237 Opts::new(
238 "shodh_vector_search_total",
239 "Total vector search operations",
240 ),
241 &["result"],
242 )
243 .expect("VECTOR_SEARCH_TOTAL metric must be valid at compile time")
244});
245
246pub static VECTOR_SEARCH_DURATION: LazyLock<Histogram> = LazyLock::new(|| {
248 Histogram::with_opts(fast_histogram_opts(
249 "shodh_vector_search_duration_seconds",
250 "Vector search duration",
251 ))
252 .expect("VECTOR_SEARCH_DURATION metric must be valid at compile time")
253});
254
255pub static ROCKSDB_OPS_TOTAL: LazyLock<IntCounterVec> = LazyLock::new(|| {
261 IntCounterVec::new(
262 Opts::new("shodh_rocksdb_ops_total", "Total RocksDB operations"),
263 &["operation", "result"], )
265 .expect("ROCKSDB_OPS_TOTAL metric must be valid at compile time")
266});
267
268pub static ROCKSDB_OPS_DURATION: LazyLock<HistogramVec> = LazyLock::new(|| {
270 HistogramVec::new(
271 fast_histogram_opts(
272 "shodh_rocksdb_ops_duration_seconds",
273 "RocksDB operation duration",
274 ),
275 &["operation"],
276 )
277 .expect("ROCKSDB_OPS_DURATION metric must be valid at compile time")
278});
279
280pub static LEGACY_FALLBACK_BRANCH_TOTAL: LazyLock<IntCounterVec> = LazyLock::new(|| {
282 IntCounterVec::new(
283 Opts::new(
284 "shodh_legacy_fallback_branch_total",
285 "Total fallback deserialization branch hits",
286 ),
287 &["branch"],
288 )
289 .expect("LEGACY_FALLBACK_BRANCH_TOTAL metric must be valid at compile time")
290});
291
292pub static ERRORS_TOTAL: LazyLock<IntCounterVec> = LazyLock::new(|| {
298 IntCounterVec::new(
299 Opts::new("shodh_errors_total", "Total errors by type"),
300 &["error_type", "endpoint"],
301 )
302 .expect("ERRORS_TOTAL metric must be valid at compile time")
303});
304
305pub static RESOURCE_LIMIT_REJECTIONS: LazyLock<IntCounterVec> = LazyLock::new(|| {
307 IntCounterVec::new(
308 Opts::new(
309 "shodh_resource_limit_rejections",
310 "Requests rejected due to resource limits",
311 ),
312 &["resource"],
313 )
314 .expect("RESOURCE_LIMIT_REJECTIONS metric must be valid at compile time")
315});
316
317pub static CONCURRENT_REQUESTS: LazyLock<IntGauge> = LazyLock::new(|| {
323 IntGauge::new(
324 "shodh_concurrent_requests",
325 "Current number of concurrent requests",
326 )
327 .expect("CONCURRENT_REQUESTS metric must be valid at compile time")
328});
329
330pub static REQUEST_QUEUE_SIZE: LazyLock<IntGauge> = LazyLock::new(|| {
332 IntGauge::new("shodh_request_queue_size", "Number of queued requests")
333 .expect("REQUEST_QUEUE_SIZE metric must be valid at compile time")
334});
335
336pub static HEBBIAN_REINFORCE_TOTAL: LazyLock<IntCounterVec> = LazyLock::new(|| {
342 IntCounterVec::new(
343 Opts::new(
344 "shodh_hebbian_reinforce_total",
345 "Total Hebbian reinforcement operations",
346 ),
347 &["outcome", "result"], )
349 .expect("HEBBIAN_REINFORCE_TOTAL metric must be valid at compile time")
350});
351
352pub static HEBBIAN_REINFORCE_DURATION: LazyLock<HistogramVec> = LazyLock::new(|| {
354 HistogramVec::new(
355 HistogramOpts::new(
356 "shodh_hebbian_reinforce_duration_seconds",
357 "Hebbian reinforcement operation duration",
358 )
359 .buckets(vec![0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5]),
360 &["outcome"],
361 )
362 .expect("HEBBIAN_REINFORCE_DURATION metric must be valid at compile time")
363});
364
365pub static CONSOLIDATE_TOTAL: LazyLock<IntCounterVec> = LazyLock::new(|| {
371 IntCounterVec::new(
372 Opts::new(
373 "shodh_consolidate_total",
374 "Total memory consolidation operations",
375 ),
376 &["result"],
377 )
378 .expect("CONSOLIDATE_TOTAL metric must be valid at compile time")
379});
380
381pub static CONSOLIDATE_DURATION: LazyLock<Histogram> = LazyLock::new(|| {
383 Histogram::with_opts(
384 HistogramOpts::new(
385 "shodh_consolidate_duration_seconds",
386 "Memory consolidation operation duration",
387 )
388 .buckets(vec![0.01, 0.05, 0.1, 0.25, 0.5, 1.0, 2.5, 5.0, 10.0]),
389 )
390 .expect("CONSOLIDATE_DURATION metric must be valid at compile time")
391});
392
393pub static BATCH_STORE_DURATION: LazyLock<Histogram> = LazyLock::new(|| {
399 Histogram::with_opts(
400 HistogramOpts::new(
401 "shodh_batch_store_duration_seconds",
402 "Batch memory store operation duration",
403 )
404 .buckets(vec![0.01, 0.05, 0.1, 0.25, 0.5, 1.0, 2.5, 5.0, 10.0, 30.0]),
405 )
406 .expect("BATCH_STORE_DURATION metric must be valid at compile time")
407});
408
409pub static BATCH_STORE_SIZE: LazyLock<Histogram> = LazyLock::new(|| {
411 Histogram::with_opts(
412 HistogramOpts::new(
413 "shodh_batch_store_size",
414 "Number of memories in batch store operations",
415 )
416 .buckets(vec![
417 1.0, 5.0, 10.0, 25.0, 50.0, 100.0, 250.0, 500.0, 1000.0,
418 ]),
419 )
420 .expect("BATCH_STORE_SIZE metric must be valid at compile time")
421});
422
423pub static EMBEDDING_CACHE_QUERY: LazyLock<IntCounterVec> = LazyLock::new(|| {
429 IntCounterVec::new(
430 Opts::new(
431 "shodh_embedding_cache_query_total",
432 "Query embedding cache operations",
433 ),
434 &["result"], )
436 .expect("EMBEDDING_CACHE_QUERY metric must be valid at compile time")
437});
438
439pub static EMBEDDING_CACHE_CONTENT: LazyLock<IntCounterVec> = LazyLock::new(|| {
441 IntCounterVec::new(
442 Opts::new(
443 "shodh_embedding_cache_content_total",
444 "Content embedding cache operations",
445 ),
446 &["result"], )
448 .expect("EMBEDDING_CACHE_CONTENT metric must be valid at compile time")
449});
450
451pub static EMBEDDING_CACHE_QUERY_SIZE: LazyLock<IntGauge> = LazyLock::new(|| {
453 IntGauge::new(
454 "shodh_embedding_cache_query_size",
455 "Current number of entries in query embedding cache",
456 )
457 .expect("EMBEDDING_CACHE_QUERY_SIZE metric must be valid at compile time")
458});
459
460pub static EMBEDDING_CACHE_CONTENT_SIZE: LazyLock<IntGauge> = LazyLock::new(|| {
462 IntGauge::new(
463 "shodh_embedding_cache_content_size",
464 "Current number of entries in content embedding cache",
465 )
466 .expect("EMBEDDING_CACHE_CONTENT_SIZE metric must be valid at compile time")
467});
468
469pub fn register_metrics() -> Result<(), MetricsError> {
480 if let Some(result) = METRICS_INIT.get() {
482 return result.clone();
483 }
484
485 let result = do_register_metrics();
486 let _ = METRICS_INIT.set(result.clone());
487 result
488}
489
490fn do_register_metrics() -> Result<(), MetricsError> {
491 let mut errors = Vec::new();
492
493 macro_rules! register {
495 ($metric:expr, $name:expr) => {
496 if let Err(e) = METRICS_REGISTRY.register(Box::new($metric.clone())) {
497 errors.push(format!("{}: {}", $name, e));
498 }
499 };
500 }
501
502 register!(HTTP_REQUEST_DURATION, "HTTP_REQUEST_DURATION");
504 register!(HTTP_REQUESTS_TOTAL, "HTTP_REQUESTS_TOTAL");
505
506 register!(MEMORY_STORE_TOTAL, "MEMORY_STORE_TOTAL");
508 register!(MEMORY_STORE_DURATION, "MEMORY_STORE_DURATION");
509 register!(MEMORY_RETRIEVE_TOTAL, "MEMORY_RETRIEVE_TOTAL");
510 register!(MEMORY_RETRIEVE_DURATION, "MEMORY_RETRIEVE_DURATION");
511 register!(MEMORY_RETRIEVE_RESULTS, "MEMORY_RETRIEVE_RESULTS");
512
513 register!(EMBEDDING_GENERATE_TOTAL, "EMBEDDING_GENERATE_TOTAL");
515 register!(EMBEDDING_GENERATE_DURATION, "EMBEDDING_GENERATE_DURATION");
516 register!(EMBEDDING_TIMEOUT_TOTAL, "EMBEDDING_TIMEOUT_TOTAL");
517 register!(NER_LOCK_TIMEOUT_TOTAL, "NER_LOCK_TIMEOUT_TOTAL");
518
519 register!(ACTIVE_USERS, "ACTIVE_USERS");
521 register!(MEMORIES_BY_TIER, "MEMORIES_BY_TIER");
522 register!(MEMORY_HEAP_BYTES_TOTAL, "MEMORY_HEAP_BYTES_TOTAL");
523
524 register!(VECTOR_INDEX_SIZE_TOTAL, "VECTOR_INDEX_SIZE_TOTAL");
526 register!(VECTOR_SEARCH_TOTAL, "VECTOR_SEARCH_TOTAL");
527 register!(VECTOR_SEARCH_DURATION, "VECTOR_SEARCH_DURATION");
528
529 register!(ROCKSDB_OPS_TOTAL, "ROCKSDB_OPS_TOTAL");
531 register!(ROCKSDB_OPS_DURATION, "ROCKSDB_OPS_DURATION");
532 register!(LEGACY_FALLBACK_BRANCH_TOTAL, "LEGACY_FALLBACK_BRANCH_TOTAL");
533
534 register!(ERRORS_TOTAL, "ERRORS_TOTAL");
536 register!(RESOURCE_LIMIT_REJECTIONS, "RESOURCE_LIMIT_REJECTIONS");
537
538 register!(CONCURRENT_REQUESTS, "CONCURRENT_REQUESTS");
540 register!(REQUEST_QUEUE_SIZE, "REQUEST_QUEUE_SIZE");
541
542 register!(HEBBIAN_REINFORCE_TOTAL, "HEBBIAN_REINFORCE_TOTAL");
544 register!(HEBBIAN_REINFORCE_DURATION, "HEBBIAN_REINFORCE_DURATION");
545
546 register!(CONSOLIDATE_TOTAL, "CONSOLIDATE_TOTAL");
548 register!(CONSOLIDATE_DURATION, "CONSOLIDATE_DURATION");
549
550 register!(BATCH_STORE_DURATION, "BATCH_STORE_DURATION");
552 register!(BATCH_STORE_SIZE, "BATCH_STORE_SIZE");
553
554 register!(EMBEDDING_CACHE_QUERY, "EMBEDDING_CACHE_QUERY");
556 register!(EMBEDDING_CACHE_CONTENT, "EMBEDDING_CACHE_CONTENT");
557 register!(EMBEDDING_CACHE_QUERY_SIZE, "EMBEDDING_CACHE_QUERY_SIZE");
558 register!(EMBEDDING_CACHE_CONTENT_SIZE, "EMBEDDING_CACHE_CONTENT_SIZE");
559
560 if errors.is_empty() {
561 Ok(())
562 } else {
563 Err(MetricsError {
564 message: errors.join("; "),
565 })
566 }
567}
568
569pub struct Timer {
572 histogram: Histogram,
573 start: std::time::Instant,
574}
575
576impl Timer {
577 pub fn new(histogram: Histogram) -> Self {
579 Self {
580 histogram,
581 start: std::time::Instant::now(),
582 }
583 }
584}
585
586impl Drop for Timer {
587 fn drop(&mut self) {
588 let duration = self.start.elapsed().as_secs_f64();
589 self.histogram.observe(duration);
590 }
591}
592
593#[cfg(test)]
594mod tests {
595 use super::*;
596 use prometheus::core::Metric;
597
598 #[test]
599 fn test_metrics_registration_is_idempotent() {
600 let result1 = register_metrics();
602 let result2 = register_metrics();
604
605 assert_eq!(result1.is_ok(), result2.is_ok());
607 }
608
609 #[test]
610 fn test_timer_records_duration() {
611 let histogram = Histogram::with_opts(HistogramOpts::new(
613 "test_timer_histogram",
614 "Test histogram for timer",
615 ))
616 .unwrap();
617
618 {
619 let _timer = Timer::new(histogram.clone());
620 std::thread::sleep(std::time::Duration::from_millis(10));
621 }
622
623 let metric = histogram.metric();
625 assert_eq!(metric.get_histogram().get_sample_count(), 1);
626 assert!(metric.get_histogram().get_sample_sum() >= 0.01);
628 }
629}