etl-unit 0.1.0

Semantic data model for ETL units — qualities and measurements over subjects and time. Built on Polars.
//! Data Transfer Objects for schema TOML deserialization.
//!
//! Qualities, measurements, and derivations are keyed by name as TOML tables —
//! the map key *is* the canonical name; the DTO carries everything else.
//! These types are converted to runtime types by `convert.rs`.

use indexmap::IndexMap;
use serde::{Deserialize, Serialize};

use crate::{MeasurementKind, PointwiseExpr};

/// Root DTO for schema TOML files.
///
/// # Example TOML
///
/// ```toml
/// name    = "pump_station"
/// subject = "station"
/// time    = "timestamp"
///
/// [qualities.region]
///
/// [measurements.water_level]
/// kind = "measure"
///
/// [measurements.water_level.signal_policy]
/// max_staleness = "60s"
/// windowing = { type = "instant" }
///
/// [measurements.engine_status]
/// kind = "categorical"
/// components = ["engine_id"]
///
/// [measurements.engine_status.signal_policy]
/// max_staleness = "60s"
/// windowing = { type = "instant" }
/// ```
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SchemaDto {
    pub name: String,

    /// Canonical subject name
    pub subject: String,

    /// Canonical time name
    pub time: String,

    /// Qualities, keyed by canonical name.
    #[serde(default)]
    pub qualities: IndexMap<String, QualityDto>,

    /// Measurements, keyed by canonical name.
    #[serde(default)]
    pub measurements: IndexMap<String, MeasurementDto>,

    /// Derivations, keyed by output name.
    #[serde(default)]
    pub derivations: IndexMap<String, DerivationDto>,
}

// =============================================================================
// Quality DTO
// =============================================================================

#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct QualityDto {
    /// Optional chart hints
    #[serde(default)]
    pub chart_hints: Option<ChartHintsDto>,
}

// =============================================================================
// Measurement DTO
// =============================================================================

#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct MeasurementDto {
    /// Measurement kind
    pub kind: MeasurementKind,

    /// Component canonical names (optional)
    #[serde(default)]
    pub components: Vec<String>,

    /// Optional signal policy
    #[serde(default)]
    pub signal_policy: Option<SignalPolicyDto>,

    /// Native sample rate, e.g. `"60s"`, `"1h"`. Required for measurements
    /// loaded into a Universe; optional in the DTO so partial / draft TOML
    /// files still deserialize.
    #[serde(default, with = "humantime_serde::option")]
    pub sample_rate: Option<std::time::Duration>,

    /// Optional chart hints
    #[serde(default)]
    pub chart_hints: Option<ChartHintsDto>,
}

// =============================================================================
// Signal Policy DTO
// =============================================================================

#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SignalPolicyDto {
    /// Maximum staleness duration (e.g., "60s", "5m")
    #[serde(with = "humantime_serde")]
    pub max_staleness: std::time::Duration,

    /// Windowing strategy
    pub windowing: WindowStrategyDto,

    /// Time format string (e.g., "%Y-%m-%d %H:%M:%S")
    #[serde(default)]
    pub time_format: Option<String>,
}

#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(tag = "type", rename_all = "snake_case")]
pub enum WindowStrategyDto {
    /// Instant value (no windowing)
    Instant,

    /// Sliding window with duration and minimum samples
    Sliding {
        #[serde(with = "humantime_serde")]
        duration: std::time::Duration,
        min_samples: u32,
    },
}

// =============================================================================
// Chart Hints DTO
// =============================================================================

#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ChartHintsDto {
    #[serde(default)]
    pub label: Option<String>,

    #[serde(default)]
    pub color: Option<String>,

    #[serde(default)]
    pub stepped: Option<bool>,

    #[serde(default)]
    pub tension: Option<f32>,

    /// Axis identifier: "y", "y1", or "y2"
    #[serde(default)]
    pub axis: Option<String>,

    /// Chart type: "line", "bar", "scatter", or { "bubble": { "size": "column" } }
    #[serde(default)]
    pub chart_type: Option<ChartTypeDto>,

    /// Index type: "time", "subject", or { "quality": "column" }, { "component": "column" }
    #[serde(default)]
    pub index: Option<IndexDto>,

    /// Series grouping: "subject", or { "component": "column" }, { "quality": "column" }
    #[serde(default)]
    pub series: Option<SeriesDto>,
}

#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum ChartTypeDto {
    Line,
    Bar,
    Scatter,
    Bubble { size: String },
}

#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum IndexDto {
    Time,
    Subject,
    Quality(String),
    Component(String),
}

#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum SeriesDto {
    Subject,
    Quality(String),
    Component(String),
    SubjectAndComponent(String),
}

// =============================================================================
// Derivation DTO
// =============================================================================

#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct DerivationDto {
    /// Computation definition
    pub computation: ComputationDto,

    /// Output measurement kind
    pub kind: MeasurementKind,

    /// Optional chart hints
    #[serde(default)]
    pub chart_hints: Option<ChartHintsDto>,
}

#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ComputationDto {
    /// Pointwise expression - uses the runtime type directly since it's serde-compatible
    pub pointwise: PointwiseExpr,
}