alloy-telemetry 0.2.0

Shared telemetry facade for the Alloy workspace
Documentation
//! Stable public API surface for telemetry contracts.

/// Describes the current Sprint 4 scope for this crate.
#[must_use]
pub const fn scope() -> &'static str {
    "phase-2-v1"
}

/// Machine-checkable health state for shared platform surfaces.
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
pub enum HealthState {
    /// The component is ready to serve its intended workload.
    Ready,
    /// The component is live but not yet ready for full workload.
    Live,
    /// The component is not yet ready.
    NotReady,
}

/// Shared health snapshot shape for embedded and service-intent consumers.
#[derive(Clone, Debug, Eq, PartialEq)]
pub struct HealthSnapshot<'a> {
    /// Component identifier.
    pub component: &'a str,
    /// Current readiness signal.
    pub readiness: HealthState,
    /// Current liveness signal.
    pub liveness: HealthState,
    /// Optional degradation reason.
    pub degradation_reason: Option<&'a str>,
    /// Monotonic or wall-clock checkpoint provided by the producer.
    pub checked_at_epoch_seconds: u64,
}

impl<'a> HealthSnapshot<'a> {
    /// Builds a degraded snapshot while keeping the process live.
    #[must_use]
    pub const fn degraded(
        component: &'a str,
        degradation_reason: &'a str,
        checked_at_epoch_seconds: u64,
    ) -> Self {
        Self {
            component,
            readiness: HealthState::NotReady,
            liveness: HealthState::Live,
            degradation_reason: Some(degradation_reason),
            checked_at_epoch_seconds,
        }
    }
}

/// Supported metric families in the shared catalog.
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
pub enum MetricKind {
    /// Monotonic count.
    Counter,
    /// Instantaneous or sampled value.
    Gauge,
    /// Distribution over time or sizes.
    Histogram,
}

/// Shared metric declaration for downstream adoption.
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
pub struct MetricSpec {
    /// Stable metric name.
    pub name: &'static str,
    /// Metric family.
    pub kind: MetricKind,
    /// Display unit or storage unit.
    pub unit: &'static str,
    /// Short human-readable description.
    pub description: &'static str,
    /// Required provenance or scoping tags.
    pub required_tags: &'static [&'static str],
}

const BASELINE_METRICS: [MetricSpec; 3] = [
    MetricSpec {
        name: "alloy.build.info",
        kind: MetricKind::Gauge,
        unit: "build",
        description: "Build and version provenance for the emitting surface.",
        required_tags: &["crate", "version", "commit", "maturity_tier"],
    },
    MetricSpec {
        name: "alloy.operation.duration_ms",
        kind: MetricKind::Histogram,
        unit: "milliseconds",
        description: "Duration for externally visible operations.",
        required_tags: &["surface", "operation", "outcome"],
    },
    MetricSpec {
        name: "alloy.cache.requests",
        kind: MetricKind::Counter,
        unit: "requests",
        description: "Cache interactions for artifact and storage seams.",
        required_tags: &["surface", "cache_name", "outcome"],
    },
];

const SERVICE_METRICS: [MetricSpec; 4] = [
    MetricSpec {
        name: "alloy.service.requests_total",
        kind: MetricKind::Counter,
        unit: "requests",
        description: "Total externally visible service requests handled by the surface.",
        required_tags: &["service", "route", "method", "outcome"],
    },
    MetricSpec {
        name: "alloy.service.request_duration_ms",
        kind: MetricKind::Histogram,
        unit: "milliseconds",
        description: "Latency distribution for externally visible service requests.",
        required_tags: &["service", "route", "method", "outcome"],
    },
    MetricSpec {
        name: "alloy.service.in_flight_requests",
        kind: MetricKind::Gauge,
        unit: "requests",
        description: "Current concurrent externally visible requests for the service surface.",
        required_tags: &["service"],
    },
    MetricSpec {
        name: "alloy.service.exporter_failures_total",
        kind: MetricKind::Counter,
        unit: "failures",
        description: "Exporter failures observed while publishing telemetry from the surface.",
        required_tags: &["service", "exporter", "failure_kind"],
    },
];

/// Returns the baseline metric catalog required by the platform contract.
#[must_use]
pub const fn baseline_metric_catalog() -> &'static [MetricSpec] {
    &BASELINE_METRICS
}

/// Returns the service-path metric catalog required by the Sprint 4 contract.
#[must_use]
pub const fn service_metric_catalog() -> &'static [MetricSpec] {
    &SERVICE_METRICS
}

/// Supported exporter modes for service telemetry.
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
pub enum ExporterMode {
    /// Export telemetry to structured logs only.
    Log,
    /// Export telemetry to a Prometheus-compatible scrape surface.
    Prometheus,
    /// Export telemetry to an OpenTelemetry collector or equivalent sink.
    OpenTelemetry,
}

impl ExporterMode {
    /// Returns the stable wire spelling for the exporter mode.
    #[must_use]
    pub const fn as_str(self) -> &'static str {
        match self {
            Self::Log => "log",
            Self::Prometheus => "prometheus",
            Self::OpenTelemetry => "opentelemetry",
        }
    }
}

/// Shared exporter configuration helper for service-intent consumers.
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
pub struct ExporterSpec<'a> {
    /// Exporter mode in use.
    pub mode: ExporterMode,
    /// Logical endpoint or sink identifier.
    pub endpoint: &'a str,
    /// Stable service identifier that owns the exporter.
    pub service_name: &'a str,
}

impl<'a> ExporterSpec<'a> {
    /// Builds a stable exporter helper spec.
    #[must_use]
    pub const fn new(mode: ExporterMode, endpoint: &'a str, service_name: &'a str) -> Self {
        Self {
            mode,
            endpoint,
            service_name,
        }
    }

    /// Returns whether the exporter is configured to emit to a concrete sink.
    #[must_use]
    pub fn is_enabled(&self) -> bool {
        !self.endpoint.is_empty()
    }
}

/// Shared trace span naming helper for service request flows.
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
pub struct ServiceTraceSpec<'a> {
    /// Stable service identifier.
    pub service_name: &'a str,
    /// Route or logical endpoint.
    pub route: &'a str,
    /// Method or operation category.
    pub method: &'a str,
}

impl<'a> ServiceTraceSpec<'a> {
    /// Builds a service tracing specification.
    #[must_use]
    pub const fn new(service_name: &'a str, route: &'a str, method: &'a str) -> Self {
        Self {
            service_name,
            route,
            method,
        }
    }

    /// Returns the canonical span name for service request traces.
    #[must_use]
    pub fn span_name(&self) -> String {
        format!("{}.{}.{}", self.service_name, self.method, self.route)
    }
}

/// Trace and correlation context propagated through evaluator and scenario flows.
#[derive(Clone, Debug, Eq, PartialEq)]
pub struct TraceContext<'a> {
    /// Logical operation name.
    pub operation: &'a str,
    /// Stable trace identifier.
    pub trace_id: &'a str,
    /// Current span identifier.
    pub span_id: &'a str,
    /// Optional parent span.
    pub parent_span_id: Option<&'a str>,
    /// Optional higher-level scenario or correlation identifier.
    pub correlation_id: Option<&'a str>,
}

impl<'a> TraceContext<'a> {
    /// Builds a child span context.
    #[must_use]
    pub const fn child(
        operation: &'a str,
        trace_id: &'a str,
        span_id: &'a str,
        parent_span_id: &'a str,
        correlation_id: Option<&'a str>,
    ) -> Self {
        Self {
            operation,
            trace_id,
            span_id,
            parent_span_id: Some(parent_span_id),
            correlation_id,
        }
    }
}

#[cfg(test)]
mod tests {
    use super::{
        ExporterMode, ExporterSpec, HealthSnapshot, HealthState, MetricKind, ServiceTraceSpec,
        TraceContext, baseline_metric_catalog, scope, service_metric_catalog,
    };

    #[test]
    fn sprint_4_scope_is_exposed() {
        assert_eq!(scope(), "phase-2-v1");
    }

    #[test]
    fn baseline_catalog_covers_required_metric_groups() {
        let catalog = baseline_metric_catalog();
        assert!(
            catalog
                .iter()
                .any(|metric| metric.name == "alloy.build.info")
        );
        assert!(
            catalog
                .iter()
                .any(|metric| metric.name == "alloy.operation.duration_ms")
        );
        assert!(
            catalog
                .iter()
                .any(|metric| metric.name == "alloy.cache.requests")
        );
        assert!(
            catalog
                .iter()
                .any(|metric| metric.kind == MetricKind::Histogram)
        );
    }

    #[test]
    fn degraded_health_snapshot_carries_reason() {
        let snapshot =
            HealthSnapshot::degraded("iridium-evaluator", "cache warmup in progress", 42);

        assert_eq!(snapshot.component, "iridium-evaluator");
        assert_eq!(snapshot.readiness, HealthState::NotReady);
        assert_eq!(snapshot.liveness, HealthState::Live);
        assert_eq!(
            snapshot.degradation_reason,
            Some("cache warmup in progress")
        );
    }

    #[test]
    fn trace_context_formats_parent_relationship() {
        let trace = TraceContext::child(
            "embedded-query",
            "trace-001",
            "span-002",
            "span-001",
            Some("scenario-abc"),
        );

        assert_eq!(trace.operation, "embedded-query");
        assert_eq!(trace.parent_span_id, Some("span-001"));
        assert_eq!(trace.correlation_id, Some("scenario-abc"));
    }

    #[test]
    fn service_metric_catalog_covers_request_and_exporter_paths() {
        let catalog = service_metric_catalog();

        assert!(
            catalog
                .iter()
                .any(|metric| metric.name == "alloy.service.requests_total")
        );
        assert!(
            catalog
                .iter()
                .any(|metric| metric.name == "alloy.service.request_duration_ms")
        );
        assert!(
            catalog
                .iter()
                .any(|metric| metric.name == "alloy.service.exporter_failures_total")
        );
    }

    #[test]
    fn exporter_spec_tracks_enabled_state() {
        let enabled = ExporterSpec::new(
            ExporterMode::Prometheus,
            "127.0.0.1:9090",
            "iridium-service",
        );
        let disabled = ExporterSpec::new(ExporterMode::Log, "", "iridium-service");

        assert!(enabled.is_enabled());
        assert!(!disabled.is_enabled());
        assert_eq!(enabled.mode.as_str(), "prometheus");
    }

    #[test]
    fn service_trace_spec_builds_canonical_span_name() {
        let trace = ServiceTraceSpec::new("iridium", "query", "post");

        assert_eq!(trace.span_name(), "iridium.post.query");
    }
}