use std::sync::OnceLock;
use opentelemetry::global;
use opentelemetry::metrics::{Counter, Histogram, UpDownCounter};
pub struct MetricRegistry {
pub pages_total: Counter<u64>,
pub documents_discovered_total: Counter<u64>,
pub robots_blocked_total: Counter<u64>,
pub waf_blocks_total: Counter<u64>,
pub backend_escalations_total: Counter<u64>,
pub bypass_requests_total: Counter<u64>,
pub bypass_failures_total: Counter<u64>,
pub duration_seconds: Histogram<f64>,
pub pages_duration_seconds: Histogram<f64>,
pub browser_sessions_active: UpDownCounter<i64>,
}
static REGISTRY: OnceLock<MetricRegistry> = OnceLock::new();
impl MetricRegistry {
fn new() -> Self {
let meter = global::meter("crawlberg");
let pages_total = meter
.u64_counter("crawl_pages_total")
.with_description("Pages fetched by the crawl engine, partitioned by terminal status")
.build();
let documents_discovered_total = meter
.u64_counter("crawl_documents_discovered_total")
.with_description("Documents (PDF, DOCX, \u{2026}) discovered during crawling, partitioned by mime type")
.build();
let robots_blocked_total = meter
.u64_counter("crawl_robots_blocked_total")
.with_description("Page fetches blocked by robots.txt")
.build();
let waf_blocks_total = meter
.u64_counter("crawl_waf_blocks_total")
.with_description("WAF / antibot challenges detected, partitioned by vendor")
.build();
let backend_escalations_total = meter
.u64_counter("crawl_backend_escalations_total")
.with_description("Tier escalations in the dispatch chain")
.build();
let bypass_requests_total = meter
.u64_counter("crawl_bypass_requests_total")
.with_description("Requests routed through a bypass provider")
.build();
let bypass_failures_total = meter
.u64_counter("crawl_bypass_failures_total")
.with_description("Bypass provider failures, partitioned by reason")
.build();
let duration_seconds = meter
.f64_histogram("crawl_duration_seconds")
.with_description("End-to-end crawl duration in seconds")
.with_unit("s")
.build();
let pages_duration_seconds = meter
.f64_histogram("crawl_pages_duration_seconds")
.with_description("Per-page fetch duration in seconds")
.with_unit("s")
.build();
let browser_sessions_active = meter
.i64_up_down_counter("crawl_browser_sessions_active")
.with_description("Active headless-browser sessions")
.build();
Self {
pages_total,
documents_discovered_total,
robots_blocked_total,
waf_blocks_total,
backend_escalations_total,
bypass_requests_total,
bypass_failures_total,
duration_seconds,
pages_duration_seconds,
browser_sessions_active,
}
}
}
pub fn registry() -> &'static MetricRegistry {
REGISTRY.get_or_init(MetricRegistry::new)
}