1use std::sync::OnceLock;
13use std::sync::atomic::{AtomicU64, Ordering};
14
15use prometheus::{
16 Encoder, HistogramOpts, HistogramVec, IntCounter, IntCounterVec, IntGauge, Registry,
17 TextEncoder,
18};
19
20static HNSW_EVICTIONS_TOTAL: AtomicU64 = AtomicU64::new(0);
43static HNSW_LAST_EVICTION_AT_NANOS: AtomicU64 = AtomicU64::new(0);
44
45pub fn record_hnsw_eviction(count: u64, now_nanos: u64) {
51 HNSW_EVICTIONS_TOTAL.fetch_add(count, Ordering::Relaxed);
52 HNSW_LAST_EVICTION_AT_NANOS.store(now_nanos, Ordering::Relaxed);
53 let r = registry();
54 r.hnsw_evictions_total.inc_by(count);
55 #[allow(clippy::cast_possible_wrap)]
59 let nanos_i64 = i64::try_from(now_nanos).unwrap_or(i64::MAX);
60 r.hnsw_last_eviction_at_nanos.set(nanos_i64);
61}
62
63#[must_use]
67pub fn hnsw_evictions_total() -> u64 {
68 HNSW_EVICTIONS_TOTAL.load(Ordering::Relaxed)
69}
70
71#[must_use]
76pub fn hnsw_last_eviction_at_nanos() -> u64 {
77 HNSW_LAST_EVICTION_AT_NANOS.load(Ordering::Relaxed)
78}
79
80#[doc(hidden)]
91pub fn reset_hnsw_eviction_counters_for_test() {
92 HNSW_EVICTIONS_TOTAL.store(0, Ordering::Relaxed);
93 HNSW_LAST_EVICTION_AT_NANOS.store(0, Ordering::Relaxed);
94 registry().hnsw_last_eviction_at_nanos.set(0);
97}
98
99#[allow(dead_code)]
111pub struct Metrics {
112 pub registry: Registry,
113 pub store_total: IntCounterVec,
114 pub recall_total: IntCounterVec,
115 pub recall_latency_seconds: HistogramVec,
116 pub autonomy_hook_total: IntCounterVec,
117 pub contradiction_detected_total: IntCounter,
118 pub webhook_dispatched_total: IntCounter,
119 pub webhook_failed_total: IntCounter,
120 pub memories_gauge: IntGauge,
121 pub hnsw_size_gauge: IntGauge,
122 pub subscriptions_active_gauge: IntGauge,
123 pub curator_cycles_total: IntCounter,
124 pub curator_operations_total: IntCounterVec,
125 pub curator_cycle_duration_seconds: HistogramVec,
126 pub federation_fanout_dropped_total: IntCounterVec,
130 pub federation_fanout_retry_total: IntCounterVec,
135 pub federation_partial_quorum_total: IntCounter,
141 pub corrupt_provenance_rows_total: IntCounterVec,
150 pub auto_export_spawn_failed_total: IntCounter,
160 pub federation_push_dlq_depth: IntGauge,
167
168 pub federation_push_dlq_quarantined: IntCounter,
178
179 pub hnsw_evictions_total: IntCounter,
189
190 pub hnsw_last_eviction_at_nanos: IntGauge,
197
198 pub subscription_dlq_overflow_total: IntCounter,
208
209 pub federation_cred_verify_total: IntCounterVec,
218
219 pub federation_inbound_cred_total: IntCounterVec,
228
229 pub federation_cred_max_age_seconds: IntGauge,
238
239 pub federation_renewal_lag_seconds: IntGauge,
248}
249
250pub fn registry() -> &'static Metrics {
252 static HANDLE: OnceLock<Metrics> = OnceLock::new();
253 HANDLE.get_or_init(Metrics::new_or_panic)
254}
255
256impl Metrics {
257 fn new_or_panic() -> Self {
258 Self::try_new().expect("prometheus registry init failed")
263 }
264
265 #[allow(clippy::too_many_lines)]
290 pub(crate) fn try_new() -> prometheus::Result<Self> {
291 let registry = Registry::new();
292
293 let store_total = IntCounterVec::new(
294 prometheus::Opts::new(
295 "ai_memory_store_total",
296 "Total memory_store calls, labeled by tier and result.",
297 ),
298 &["tier", "result"],
299 )?;
300 registry.register(Box::new(store_total.clone()))?;
301
302 let recall_total = IntCounterVec::new(
303 prometheus::Opts::new(
304 "ai_memory_recall_total",
305 "Total memory_recall calls, labeled by mode.",
306 ),
307 &["mode"],
308 )?;
309 registry.register(Box::new(recall_total.clone()))?;
310
311 let recall_latency_seconds = HistogramVec::new(
312 HistogramOpts::new(
313 "ai_memory_recall_latency_seconds",
314 "Recall latency in seconds, labeled by mode.",
315 )
316 .buckets(vec![
317 0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1.0, 2.5, 5.0,
318 ]),
319 &["mode"],
320 )?;
321 registry.register(Box::new(recall_latency_seconds.clone()))?;
322
323 let autonomy_hook_total = IntCounterVec::new(
324 prometheus::Opts::new(
325 "ai_memory_autonomy_hook_total",
326 "Post-store autonomy hook invocations, labeled by kind and result.",
327 ),
328 &["kind", "result"],
329 )?;
330 registry.register(Box::new(autonomy_hook_total.clone()))?;
331
332 let contradiction_detected_total = IntCounter::new(
333 "ai_memory_contradiction_detected_total",
334 "Count of contradictions the LLM hook confirmed.",
335 )?;
336 registry.register(Box::new(contradiction_detected_total.clone()))?;
337
338 let webhook_dispatched_total = IntCounter::new(
339 "ai_memory_webhook_dispatched_total",
340 "Total webhook deliveries attempted.",
341 )?;
342 registry.register(Box::new(webhook_dispatched_total.clone()))?;
343
344 let webhook_failed_total = IntCounter::new(
345 "ai_memory_webhook_failed_total",
346 "Webhook deliveries that failed after all retries.",
347 )?;
348 registry.register(Box::new(webhook_failed_total.clone()))?;
349
350 let memories_gauge = IntGauge::new(
351 "ai_memory_memories",
352 "Current count of non-archived memories.",
353 )?;
354 registry.register(Box::new(memories_gauge.clone()))?;
355
356 let hnsw_size_gauge = IntGauge::new(
357 "ai_memory_hnsw_size",
358 "Current HNSW vector index population.",
359 )?;
360 registry.register(Box::new(hnsw_size_gauge.clone()))?;
361
362 let subscriptions_active_gauge = IntGauge::new(
363 "ai_memory_subscriptions_active",
364 "Current count of active webhook subscriptions.",
365 )?;
366 registry.register(Box::new(subscriptions_active_gauge.clone()))?;
367
368 let curator_cycles_total = IntCounter::new(
369 "ai_memory_curator_cycles_total",
370 "Total curator sweep cycles completed.",
371 )?;
372 registry.register(Box::new(curator_cycles_total.clone()))?;
373
374 let curator_operations_total = IntCounterVec::new(
375 prometheus::Opts::new(
376 "ai_memory_curator_operations_total",
377 "Curator operations, labeled by kind (auto_tag|contradiction|persist) and result.",
378 ),
379 &["kind", "result"],
380 )?;
381 registry.register(Box::new(curator_operations_total.clone()))?;
382
383 let curator_cycle_duration_seconds = HistogramVec::new(
384 HistogramOpts::new(
385 "ai_memory_curator_cycle_duration_seconds",
386 "Curator sweep cycle wall-clock duration, labeled by dry_run.",
387 )
388 .buckets(vec![
389 0.1,
390 0.5,
391 1.0,
392 5.0,
393 15.0,
394 60.0,
395 300.0,
396 900.0,
397 crate::SECS_PER_HOUR as f64,
398 ]),
399 &["dry_run"],
400 )?;
401 registry.register(Box::new(curator_cycle_duration_seconds.clone()))?;
402
403 let federation_fanout_dropped_total = IntCounterVec::new(
404 prometheus::Opts::new(
405 "ai_memory_federation_fanout_dropped_total",
406 "Post-quorum fanout tasks whose outcome could not be observed. \
407 reason=shutdown|panic|join_error. Non-zero indicates mesh divergence risk.",
408 ),
409 &["reason"],
410 )?;
411 registry.register(Box::new(federation_fanout_dropped_total.clone()))?;
412
413 let federation_fanout_retry_total = IntCounterVec::new(
414 prometheus::Opts::new(
415 "ai_memory_federation_fanout_retry_total",
416 "Peer POSTs that hit a transient failure on first attempt and \
417 were retried once via the Idempotency-Key path. \
418 outcome=ok|fail|id_drift. Non-zero ok indicates the retry \
419 recovered a row that would otherwise be missing on a peer.",
420 ),
421 &["outcome"],
422 )?;
423 registry.register(Box::new(federation_fanout_retry_total.clone()))?;
424
425 let federation_partial_quorum_total = IntCounter::new(
427 "ai_memory_federation_partial_quorum_total",
428 "Quorum writes that succeeded (W met) but where at least one \
429 configured peer did not ack inside the deadline.",
430 )?;
431 registry.register(Box::new(federation_partial_quorum_total.clone()))?;
432
433 let corrupt_provenance_rows_total = IntCounterVec::new(
435 prometheus::Opts::new(
436 "ai_memory_corrupt_provenance_rows_total",
437 "Memory rows whose Form 4 fact-provenance JSON columns \
438 failed to deserialise and were silently defaulted. \
439 Non-zero indicates schema drift, writer-side corruption, \
440 or a migration leaving malformed JSON.",
441 ),
442 &["column"],
443 )?;
444 registry.register(Box::new(corrupt_provenance_rows_total.clone()))?;
445
446 let auto_export_spawn_failed_total = IntCounter::new(
449 "ai_memory_auto_export_spawn_failed_total",
450 "Detached post_reflect.auto_export worker invocations whose \
451 outcome was a panic or returned Err. Non-zero means at \
452 least one reflection was committed to the DB but its \
453 on-disk markdown/json artefact did not land — operators \
454 use this to alert on otherwise-silent disk-write failures.",
455 )?;
456 registry.register(Box::new(auto_export_spawn_failed_total.clone()))?;
457
458 let federation_push_dlq_depth = IntGauge::new(
460 "ai_memory_federation_push_dlq_depth",
461 "Current count of pending federation_push_dlq rows \
462 (replayed_at IS NULL). Refreshed on every replay tick. \
463 Non-zero sustained depth indicates one or more peers are \
464 persistently unreachable; healthy meshes drain back to 0 \
465 within one replay interval after peer recovery.",
466 )?;
467 registry.register(Box::new(federation_push_dlq_depth.clone()))?;
468
469 let federation_push_dlq_quarantined = IntCounter::new(
471 "ai_memory_federation_push_dlq_quarantined_total",
472 "Monotonic counter of federation_push_dlq rows the replay \
473 worker has skipped because their attempt_count exceeded \
474 MAX_REPLAY_ATTEMPTS (currently 100). Non-zero sustained \
475 rate indicates poison-message rows that need operator \
476 intervention via `ai-memory federation dlq drain \
477 --quarantined`. Pre-#1032 the worker retried these \
478 forever, amplifying network load against rejecting peers.",
479 )?;
480 registry.register(Box::new(federation_push_dlq_quarantined.clone()))?;
481
482 let hnsw_evictions_total = IntCounter::new(
490 "ai_memory_hnsw_evictions_total",
491 "Cumulative HNSW oldest-eviction count since process start. \
492 Non-zero indicates the in-memory vector index has hit \
493 MAX_ENTRIES and dropped older embeddings; recall quality \
494 may have degraded for evicted ids until they are \
495 re-inserted on next access.",
496 )?;
497 registry.register(Box::new(hnsw_evictions_total.clone()))?;
498
499 let hnsw_last_eviction_at_nanos = IntGauge::new(
500 "ai_memory_hnsw_last_eviction_at_nanos",
501 "Wall-clock UNIX nanoseconds of the most recent HNSW \
502 eviction (0 if none). Capabilities derives \
503 hnsw.evicted_recently from this with a 60s rolling window.",
504 )?;
505 registry.register(Box::new(hnsw_last_eviction_at_nanos.clone()))?;
506
507 let subscription_dlq_overflow_total = IntCounter::new(
509 "ai_memory_subscription_dlq_overflow_total",
510 "Monotonic counter of subscription_dlq inserts refused \
511 because the per-subscription DLQ depth had already hit \
512 MAX_SUBSCRIPTION_DLQ_ROWS (10_000). Non-zero indicates a \
513 hostile or persistently-broken webhook target that would \
514 otherwise fill the operator's disk with quarantined rows. \
515 Operators drain the queue via `ai-memory subscription dlq \
516 drain <subscription_id>` before resetting.",
517 )?;
518 registry.register(Box::new(subscription_dlq_overflow_total.clone()))?;
519
520 let federation_cred_verify_total = IntCounterVec::new(
524 prometheus::Opts::new(
525 "ai_memory_federation_cred_verify_total",
526 "Federation credential-verification outcomes on the \
527 receiver path, labeled result (ok|fail). \
528 verify-failure-rate SLO = fail / (ok + fail). Non-zero \
529 sustained fail rate means peers present credentials the \
530 local trust bundle cannot verify (expired leaf, revoked \
531 issuer, clock skew, or a chain that fails to anchor).",
532 ),
533 &["result"],
534 )?;
535 registry.register(Box::new(federation_cred_verify_total.clone()))?;
536
537 let federation_inbound_cred_total = IntCounterVec::new(
538 prometheus::Opts::new(
539 "ai_memory_federation_inbound_cred_total",
540 "Inbound federation requests bucketed by whether they \
541 presented a signed credential, labeled presence \
542 (signed|unsigned). signed-vs-unsigned-ratio SLO = \
543 signed / (signed + unsigned). Climbs toward 1.0 as \
544 peers upgrade to credential-presenting builds.",
545 ),
546 &["presence"],
547 )?;
548 registry.register(Box::new(federation_inbound_cred_total.clone()))?;
549
550 let federation_cred_max_age_seconds = IntGauge::new(
551 "ai_memory_federation_cred_max_age_seconds",
552 "Age in seconds of the local outbound leaf credential \
553 (now - issued_at), refreshed on every renewal tick. \
554 max-cred-age SLO alerts when this approaches the leaf TTL \
555 — a credential aging past its TTL without a renewal means \
556 the refresh worker has stalled and outbound sync will \
557 start failing peer verification.",
558 )?;
559 registry.register(Box::new(federation_cred_max_age_seconds.clone()))?;
560
561 let federation_renewal_lag_seconds = IntGauge::new(
562 "ai_memory_federation_renewal_lag_seconds",
563 "Seconds since the last successful outbound-credential \
564 renewal (now - last-renew wall clock), refreshed on every \
565 renewal tick. renewal-lag SLO alerts when this exceeds the \
566 configured refresh interval by a safety margin: a lag \
567 larger than the interval means renewals are silently \
568 failing even though the worker thread is still alive.",
569 )?;
570 registry.register(Box::new(federation_renewal_lag_seconds.clone()))?;
571
572 Ok(Self {
573 registry,
574 store_total,
575 recall_total,
576 recall_latency_seconds,
577 autonomy_hook_total,
578 contradiction_detected_total,
579 webhook_dispatched_total,
580 webhook_failed_total,
581 memories_gauge,
582 hnsw_size_gauge,
583 subscriptions_active_gauge,
584 curator_cycles_total,
585 curator_operations_total,
586 curator_cycle_duration_seconds,
587 federation_fanout_dropped_total,
588 federation_fanout_retry_total,
589 federation_partial_quorum_total,
590 corrupt_provenance_rows_total,
591 auto_export_spawn_failed_total,
592 federation_push_dlq_depth,
593 federation_push_dlq_quarantined,
594 hnsw_evictions_total,
595 hnsw_last_eviction_at_nanos,
596 subscription_dlq_overflow_total,
597 federation_cred_verify_total,
598 federation_inbound_cred_total,
599 federation_cred_max_age_seconds,
600 federation_renewal_lag_seconds,
601 })
602 }
603}
604
605pub fn record_subscription_dlq_overflow() {
611 registry().subscription_dlq_overflow_total.inc();
612}
613
614#[must_use]
618pub fn subscription_dlq_overflow_count() -> u64 {
619 registry().subscription_dlq_overflow_total.get()
620}
621
622pub fn record_federation_cred_verify(ok: bool) {
628 let result = if ok { "ok" } else { "fail" };
629 registry()
630 .federation_cred_verify_total
631 .with_label_values(&[result])
632 .inc();
633}
634
635#[must_use]
638pub fn federation_cred_verify_count(result: &str) -> u64 {
639 registry()
640 .federation_cred_verify_total
641 .with_label_values(&[result])
642 .get()
643}
644
645pub fn record_federation_inbound_cred(signed: bool) {
651 let presence = if signed { "signed" } else { "unsigned" };
652 registry()
653 .federation_inbound_cred_total
654 .with_label_values(&[presence])
655 .inc();
656}
657
658#[must_use]
662pub fn federation_inbound_cred_count(presence: &str) -> u64 {
663 registry()
664 .federation_inbound_cred_total
665 .with_label_values(&[presence])
666 .get()
667}
668
669pub fn set_federation_cred_max_age_seconds(secs: i64) {
673 registry().federation_cred_max_age_seconds.set(secs);
674}
675
676pub fn set_federation_renewal_lag_seconds(secs: i64) {
680 registry().federation_renewal_lag_seconds.set(secs);
681}
682
683pub fn record_corrupt_provenance(column: &str) {
689 registry()
690 .corrupt_provenance_rows_total
691 .with_label_values(&[column])
692 .inc();
693}
694
695pub fn record_auto_export_spawn_failed() {
703 registry().auto_export_spawn_failed_total.inc();
704}
705
706#[must_use]
712pub fn auto_export_spawn_failed_count() -> u64 {
713 registry().auto_export_spawn_failed_total.get()
714}
715
716#[must_use]
721pub fn render() -> String {
722 let encoder = TextEncoder::new();
723 let mut buf = Vec::new();
724 let _ = encoder.encode(®istry().registry.gather(), &mut buf);
725 String::from_utf8(buf).unwrap_or_default()
726}
727
728#[allow(dead_code)]
730pub fn record_store(tier: &str, ok: bool) {
731 let result = if ok { "ok" } else { "err" };
732 registry()
733 .store_total
734 .with_label_values(&[tier, result])
735 .inc();
736}
737
738#[allow(dead_code)]
740pub fn record_recall(mode: &str, latency_seconds: f64) {
741 registry().recall_total.with_label_values(&[mode]).inc();
742 registry()
743 .recall_latency_seconds
744 .with_label_values(&[mode])
745 .observe(latency_seconds);
746}
747
748#[allow(dead_code)]
750pub fn record_autonomy_hook(kind: &str, ok: bool) {
751 let result = if ok { "ok" } else { "err" };
752 registry()
753 .autonomy_hook_total
754 .with_label_values(&[kind, result])
755 .inc();
756}
757
758#[allow(dead_code)]
760pub fn curator_cycle_completed(
761 operations_attempted: usize,
762 auto_tagged: usize,
763 contradictions_found: usize,
764 errors: usize,
765) {
766 let r = registry();
767 r.curator_cycles_total.inc();
768 if auto_tagged > 0 {
769 r.curator_operations_total
770 .with_label_values(&["auto_tag", "ok"])
771 .inc_by(auto_tagged as u64);
772 }
773 if contradictions_found > 0 {
774 r.curator_operations_total
775 .with_label_values(&["contradiction", "ok"])
776 .inc_by(contradictions_found as u64);
777 }
778 let failed = operations_attempted.saturating_sub(auto_tagged + contradictions_found);
779 if failed > 0 || errors > 0 {
780 r.curator_operations_total
781 .with_label_values(&["any", "err"])
782 .inc_by(errors as u64);
783 }
784}
785
786#[cfg(test)]
787mod tests {
788 use super::*;
789 use crate::models::Tier;
790
791 #[test]
792 fn registry_is_singleton() {
793 let r1 = registry();
794 let r2 = registry();
795 assert!(std::ptr::eq(std::ptr::from_ref(r1), std::ptr::from_ref(r2)));
797 }
798
799 #[test]
800 fn render_includes_registered_names() {
801 record_store(Tier::Short.as_str(), true);
803 record_recall("hybrid", 0.042);
804 record_autonomy_hook("auto_tag", true);
805 registry().contradiction_detected_total.inc();
806 registry().webhook_dispatched_total.inc();
807 registry().memories_gauge.set(42);
808 registry().hnsw_size_gauge.set(42);
809 registry().subscriptions_active_gauge.set(3);
810 registry().federation_push_dlq_depth.set(0);
811 record_federation_cred_verify(true);
813 record_federation_inbound_cred(true);
814 set_federation_cred_max_age_seconds(0);
815 set_federation_renewal_lag_seconds(0);
816
817 let text = render();
818 for name in [
819 "ai_memory_store_total",
820 "ai_memory_recall_total",
821 "ai_memory_recall_latency_seconds",
822 "ai_memory_autonomy_hook_total",
823 "ai_memory_contradiction_detected_total",
824 "ai_memory_webhook_dispatched_total",
825 "ai_memory_webhook_failed_total",
826 "ai_memory_memories",
827 "ai_memory_hnsw_size",
828 "ai_memory_subscriptions_active",
829 "ai_memory_federation_push_dlq_depth",
831 "ai_memory_federation_cred_verify_total",
833 "ai_memory_federation_inbound_cred_total",
834 "ai_memory_federation_cred_max_age_seconds",
835 "ai_memory_federation_renewal_lag_seconds",
836 ] {
837 assert!(text.contains(name), "/metrics missing {name}\n\n{text}");
838 }
839 }
840
841 #[test]
842 fn federation_cred_verify_labels_outcome() {
843 let before_ok = federation_cred_verify_count("ok");
844 let before_fail = federation_cred_verify_count("fail");
845 record_federation_cred_verify(true);
846 record_federation_cred_verify(false);
847 assert!(federation_cred_verify_count("ok") >= before_ok + 1);
848 assert!(federation_cred_verify_count("fail") >= before_fail + 1);
849 let text = render();
850 assert!(text.contains("ai_memory_federation_cred_verify_total{result=\"ok\"}"));
851 assert!(text.contains("ai_memory_federation_cred_verify_total{result=\"fail\"}"));
852 }
853
854 #[test]
855 fn federation_inbound_cred_labels_presence() {
856 let before_signed = federation_inbound_cred_count("signed");
857 let before_unsigned = federation_inbound_cred_count("unsigned");
858 record_federation_inbound_cred(true);
859 record_federation_inbound_cred(false);
860 assert!(federation_inbound_cred_count("signed") >= before_signed + 1);
861 assert!(federation_inbound_cred_count("unsigned") >= before_unsigned + 1);
862 }
863
864 #[test]
865 fn federation_cred_age_and_lag_gauges_settable() {
866 set_federation_cred_max_age_seconds(1234);
867 set_federation_renewal_lag_seconds(56);
868 assert_eq!(registry().federation_cred_max_age_seconds.get(), 1234);
869 assert_eq!(registry().federation_renewal_lag_seconds.get(), 56);
870 }
871
872 #[test]
873 fn record_store_labels_tier() {
874 record_store(Tier::Long.as_str(), true);
875 let text = render();
876 assert!(text.contains("ai_memory_store_total{result=\"ok\",tier=\"long\"}"));
877 }
878
879 #[test]
883 fn curator_cycle_completed_increments_total() {
884 let before = registry().curator_cycles_total.get();
887 curator_cycle_completed(0, 0, 0, 0);
888 let after = registry().curator_cycles_total.get();
889 assert!(
890 after >= before + 1,
891 "curator_cycles_total did not advance (before={before}, after={after})"
892 );
893 }
894
895 #[test]
896 fn curator_cycle_completed_records_auto_tag_ok() {
897 curator_cycle_completed(5, 3, 0, 0);
898 let text = render();
899 assert!(
900 text.contains("ai_memory_curator_operations_total"),
901 "curator_operations_total counter missing from /metrics output"
902 );
903 }
904
905 #[test]
906 fn curator_cycle_completed_records_contradiction_ok() {
907 curator_cycle_completed(2, 0, 2, 0);
908 let text = render();
909 assert!(text.contains("ai_memory_curator_operations_total"));
910 }
911
912 #[test]
913 fn curator_cycle_completed_records_errors() {
914 curator_cycle_completed(5, 2, 1, 1);
917 let text = render();
918 assert!(text.contains("ai_memory_curator_operations_total"));
919 }
920
921 #[test]
922 fn curator_cycle_completed_with_zero_args_is_safe() {
923 let before = registry().curator_cycles_total.get();
925 curator_cycle_completed(0, 0, 0, 0);
926 let after = registry().curator_cycles_total.get();
927 assert!(after >= before + 1);
929 }
930
931 #[test]
936 fn record_store_err_path() {
937 record_store(Tier::Short.as_str(), false);
938 let text = render();
939 assert!(text.contains("ai_memory_store_total{result=\"err\",tier=\"short\""));
940 }
941
942 #[test]
943 fn record_recall_emits_latency_histogram() {
944 record_recall("keyword", 0.5);
945 let text = render();
946 assert!(text.contains("ai_memory_recall_total{mode=\"keyword\""));
947 assert!(text.contains("ai_memory_recall_latency_seconds"));
948 }
949
950 #[test]
951 fn record_autonomy_hook_err_path() {
952 record_autonomy_hook("contradiction", false);
953 let text = render();
954 assert!(
955 text.contains("ai_memory_autonomy_hook_total{kind=\"contradiction\",result=\"err\"")
956 );
957 }
958
959 #[test]
960 fn render_emits_help_and_type_lines() {
961 record_store(Tier::Mid.as_str(), true);
963 let text = render();
964 assert!(text.contains("# HELP ai_memory_store_total"));
965 assert!(text.contains("# TYPE ai_memory_store_total counter"));
966 }
967
968 #[test]
969 fn fanout_dropped_counter_increments() {
970 registry()
971 .federation_fanout_dropped_total
972 .with_label_values(&["shutdown"])
973 .inc();
974 let text = render();
975 assert!(text.contains("ai_memory_federation_fanout_dropped_total{reason=\"shutdown\""));
976 }
977
978 #[test]
979 fn fanout_retry_counter_outcome_labels() {
980 for outcome in ["ok", "fail", "id_drift"] {
982 registry()
983 .federation_fanout_retry_total
984 .with_label_values(&[outcome])
985 .inc();
986 }
987 let text = render();
988 assert!(text.contains("ai_memory_federation_fanout_retry_total"));
989 }
990
991 #[test]
992 fn curator_cycle_duration_histogram_buckets() {
993 registry()
996 .curator_cycle_duration_seconds
997 .with_label_values(&["false"])
998 .observe(0.42);
999 let text = render();
1000 assert!(text.contains("ai_memory_curator_cycle_duration_seconds"));
1001 }
1002
1003 #[test]
1011 fn try_new_builds_a_fresh_metrics_handle() {
1012 let m = super::Metrics::try_new().expect("fresh registry must succeed");
1018 m.store_total
1021 .with_label_values(&[Tier::Short.as_str(), "ok"])
1022 .inc();
1023 m.recall_total.with_label_values(&["hybrid"]).inc();
1024 m.recall_latency_seconds
1025 .with_label_values(&["hybrid"])
1026 .observe(0.001);
1027 m.autonomy_hook_total.with_label_values(&["x", "ok"]).inc();
1028 m.contradiction_detected_total.inc();
1029 m.webhook_dispatched_total.inc();
1030 m.webhook_failed_total.inc();
1031 m.memories_gauge.set(1);
1032 m.hnsw_size_gauge.set(1);
1033 m.subscriptions_active_gauge.set(1);
1034 m.curator_cycles_total.inc();
1035 m.curator_operations_total
1036 .with_label_values(&["auto_tag", "ok"])
1037 .inc();
1038 m.curator_cycle_duration_seconds
1039 .with_label_values(&["true"])
1040 .observe(1.0);
1041 m.federation_fanout_dropped_total
1042 .with_label_values(&["panic"])
1043 .inc();
1044 m.federation_fanout_retry_total
1045 .with_label_values(&["ok"])
1046 .inc();
1047 m.federation_partial_quorum_total.inc();
1048 m.auto_export_spawn_failed_total.inc();
1049 }
1050
1051 #[test]
1052 fn try_new_can_build_two_isolated_registries() {
1053 let a = super::Metrics::try_new().expect("first");
1056 let b = super::Metrics::try_new().expect("second");
1057 a.store_total
1059 .with_label_values(&[Tier::Short.as_str(), "ok"])
1060 .inc();
1061 b.store_total
1062 .with_label_values(&[Tier::Short.as_str(), "ok"])
1063 .inc();
1064 let mut buf_a = Vec::new();
1065 let mut buf_b = Vec::new();
1066 let enc = TextEncoder::new();
1067 enc.encode(&a.registry.gather(), &mut buf_a).unwrap();
1068 enc.encode(&b.registry.gather(), &mut buf_b).unwrap();
1069 assert!(String::from_utf8_lossy(&buf_a).contains("ai_memory_store_total"));
1070 assert!(String::from_utf8_lossy(&buf_b).contains("ai_memory_store_total"));
1071 }
1072
1073 #[test]
1074 fn record_auto_export_spawn_failed_increments_singleton() {
1075 let before = auto_export_spawn_failed_count();
1080 record_auto_export_spawn_failed();
1081 let after = auto_export_spawn_failed_count();
1082 assert!(
1083 after >= before + 1,
1084 "auto_export_spawn_failed_total did not advance \
1085 (before={before}, after={after})"
1086 );
1087 let text = render();
1090 assert!(
1091 text.contains("ai_memory_auto_export_spawn_failed_total"),
1092 "/metrics output missing auto_export counter\n\n{text}"
1093 );
1094 }
1095
1096 #[test]
1097 fn curator_cycle_completed_no_progress_branch_skips_err_increment() {
1098 let before = registry().curator_cycles_total.get();
1103 curator_cycle_completed(0, 0, 0, 0);
1104 let after = registry().curator_cycles_total.get();
1105 assert!(after >= before + 1);
1106 }
1107}