halldyll-core 0.1.0

Core scraping engine for Halldyll - high-performance async web scraper for AI agents
Documentation
//! Prometheus Metrics Exporter
//!
//! Exports metrics in Prometheus text format for monitoring.
//!
//! ## Usage
//!
//! ```rust,ignore
//! let collector = MetricsCollector::new();
//! let exporter = PrometheusExporter::new(&collector);
//! 
//! // Get metrics in Prometheus format
//! let output = exporter.export();
//! // Output:
//! // # HELP halldyll_requests_total Total number of HTTP requests
//! // # TYPE halldyll_requests_total counter
//! // halldyll_requests_total 1234
//! ```

use crate::observe::metrics::{MetricsCollector, MetricsSnapshot};

/// Prometheus metrics exporter
pub struct PrometheusExporter<'a> {
    collector: &'a MetricsCollector,
    prefix: String,
}

impl<'a> PrometheusExporter<'a> {
    /// Create a new exporter with default prefix "halldyll"
    pub fn new(collector: &'a MetricsCollector) -> Self {
        Self {
            collector,
            prefix: "halldyll".to_string(),
        }
    }

    /// Create with custom prefix
    pub fn with_prefix(collector: &'a MetricsCollector, prefix: impl Into<String>) -> Self {
        Self {
            collector,
            prefix: prefix.into(),
        }
    }

    /// Export metrics in Prometheus text format
    pub fn export(&self) -> String {
        let snapshot = self.collector.global().snapshot();
        self.format_metrics(&snapshot)
    }

    /// Export with additional labels
    pub fn export_with_labels(&self, labels: &[(&str, &str)]) -> String {
        let snapshot = self.collector.global().snapshot();
        self.format_metrics_with_labels(&snapshot, labels)
    }

    fn format_metrics(&self, snapshot: &MetricsSnapshot) -> String {
        let mut output = String::with_capacity(2048);
        let p = &self.prefix;

        // Requests total
        output.push_str(&format!(
            "# HELP {p}_requests_total Total number of HTTP requests\n\
             # TYPE {p}_requests_total counter\n\
             {p}_requests_total {}\n\n",
            snapshot.requests_total
        ));

        // Requests success
        output.push_str(&format!(
            "# HELP {p}_requests_success_total Total number of successful requests\n\
             # TYPE {p}_requests_success_total counter\n\
             {p}_requests_success_total {}\n\n",
            snapshot.requests_success
        ));

        // Requests failed
        output.push_str(&format!(
            "# HELP {p}_requests_failed_total Total number of failed requests\n\
             # TYPE {p}_requests_failed_total counter\n\
             {p}_requests_failed_total {}\n\n",
            snapshot.requests_failed
        ));

        // Requests rate limited
        output.push_str(&format!(
            "# HELP {p}_requests_rate_limited_total Total number of rate-limited requests\n\
             # TYPE {p}_requests_rate_limited_total counter\n\
             {p}_requests_rate_limited_total {}\n\n",
            snapshot.requests_rate_limited
        ));

        // Bytes downloaded
        output.push_str(&format!(
            "# HELP {p}_bytes_downloaded_total Total bytes downloaded\n\
             # TYPE {p}_bytes_downloaded_total counter\n\
             {p}_bytes_downloaded_total {}\n\n",
            snapshot.bytes_downloaded
        ));

        // Retries
        output.push_str(&format!(
            "# HELP {p}_retries_total Total number of retries performed\n\
             # TYPE {p}_retries_total counter\n\
             {p}_retries_total {}\n\n",
            snapshot.retries_total
        ));

        // Documents extracted
        output.push_str(&format!(
            "# HELP {p}_documents_extracted_total Total number of documents extracted\n\
             # TYPE {p}_documents_extracted_total counter\n\
             {p}_documents_extracted_total {}\n\n",
            snapshot.documents_extracted
        ));

        // Success rate (gauge)
        output.push_str(&format!(
            "# HELP {p}_success_rate Current success rate (0.0-1.0)\n\
             # TYPE {p}_success_rate gauge\n\
             {p}_success_rate {:.4}\n\n",
            snapshot.success_rate
        ));

        // Average latency (gauge)
        output.push_str(&format!(
            "# HELP {p}_avg_latency_ms Average request latency in milliseconds\n\
             # TYPE {p}_avg_latency_ms gauge\n\
             {p}_avg_latency_ms {:.2}\n",
            snapshot.avg_latency_ms
        ));

        output
    }

    fn format_metrics_with_labels(&self, snapshot: &MetricsSnapshot, labels: &[(&str, &str)]) -> String {
        let label_str = if labels.is_empty() {
            String::new()
        } else {
            let pairs: Vec<String> = labels
                .iter()
                .map(|(k, v)| format!("{}=\"{}\"", k, v))
                .collect();
            format!("{{{}}}", pairs.join(","))
        };

        let mut output = String::with_capacity(2048);
        let p = &self.prefix;

        output.push_str(&format!(
            "# HELP {p}_requests_total Total number of HTTP requests\n\
             # TYPE {p}_requests_total counter\n\
             {p}_requests_total{label_str} {}\n\n",
            snapshot.requests_total
        ));

        output.push_str(&format!(
            "# HELP {p}_requests_success_total Total number of successful requests\n\
             # TYPE {p}_requests_success_total counter\n\
             {p}_requests_success_total{label_str} {}\n\n",
            snapshot.requests_success
        ));

        output.push_str(&format!(
            "# HELP {p}_requests_failed_total Total number of failed requests\n\
             # TYPE {p}_requests_failed_total counter\n\
             {p}_requests_failed_total{label_str} {}\n\n",
            snapshot.requests_failed
        ));

        output.push_str(&format!(
            "# HELP {p}_requests_rate_limited_total Total number of rate-limited requests\n\
             # TYPE {p}_requests_rate_limited_total counter\n\
             {p}_requests_rate_limited_total{label_str} {}\n\n",
            snapshot.requests_rate_limited
        ));

        output.push_str(&format!(
            "# HELP {p}_bytes_downloaded_total Total bytes downloaded\n\
             # TYPE {p}_bytes_downloaded_total counter\n\
             {p}_bytes_downloaded_total{label_str} {}\n\n",
            snapshot.bytes_downloaded
        ));

        output.push_str(&format!(
            "# HELP {p}_retries_total Total number of retries performed\n\
             # TYPE {p}_retries_total counter\n\
             {p}_retries_total{label_str} {}\n\n",
            snapshot.retries_total
        ));

        output.push_str(&format!(
            "# HELP {p}_documents_extracted_total Total number of documents extracted\n\
             # TYPE {p}_documents_extracted_total counter\n\
             {p}_documents_extracted_total{label_str} {}\n\n",
            snapshot.documents_extracted
        ));

        output.push_str(&format!(
            "# HELP {p}_success_rate Current success rate (0.0-1.0)\n\
             # TYPE {p}_success_rate gauge\n\
             {p}_success_rate{label_str} {:.4}\n\n",
            snapshot.success_rate
        ));

        output.push_str(&format!(
            "# HELP {p}_avg_latency_ms Average request latency in milliseconds\n\
             # TYPE {p}_avg_latency_ms gauge\n\
             {p}_avg_latency_ms{label_str} {:.2}\n",
            snapshot.avg_latency_ms
        ));

        output
    }
}

/// Standalone function to export metrics
pub fn export_prometheus(collector: &MetricsCollector) -> String {
    PrometheusExporter::new(collector).export()
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_prometheus_export() {
        let collector = MetricsCollector::new();
        collector.global().inc_requests();
        collector.global().inc_success();
        collector.global().add_bytes(1024);

        let exporter = PrometheusExporter::new(&collector);
        let output = exporter.export();

        assert!(output.contains("halldyll_requests_total 1"));
        assert!(output.contains("halldyll_requests_success_total 1"));
        assert!(output.contains("halldyll_bytes_downloaded_total 1024"));
        assert!(output.contains("# TYPE halldyll_requests_total counter"));
    }

    #[test]
    fn test_prometheus_with_labels() {
        let collector = MetricsCollector::new();
        collector.global().inc_requests();

        let exporter = PrometheusExporter::new(&collector);
        let output = exporter.export_with_labels(&[("instance", "scraper-1"), ("env", "prod")]);

        assert!(output.contains("halldyll_requests_total{instance=\"scraper-1\",env=\"prod\"} 1"));
    }

    #[test]
    fn test_custom_prefix() {
        let collector = MetricsCollector::new();
        collector.global().inc_requests();

        let exporter = PrometheusExporter::with_prefix(&collector, "myapp_scraper");
        let output = exporter.export();

        assert!(output.contains("myapp_scraper_requests_total 1"));
    }
}