datafusion-dist 0.3.0

A distributed streaming execution library for Apache DataFusion
Documentation
use std::{
    collections::HashMap,
    fmt::{Debug, Display},
};

use serde::{Deserialize, Serialize};

use crate::{DistError, DistResult};

#[async_trait::async_trait]
pub trait DistCluster: Debug + Send + Sync {
    // Send heartbeat
    async fn heartbeat(&self, node_id: NodeId, state: NodeState) -> DistResult<()>;
    // Get alive nodes
    async fn alive_nodes(&self) -> DistResult<HashMap<NodeId, NodeState>>;
}

#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize)]
pub struct NodeId {
    pub host: String,
    pub port: u16,
}

impl Display for NodeId {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        write!(f, "{}:{}", self.host, self.port)
    }
}

#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
pub enum NodeStatus {
    #[default]
    Available,
    Terminating,
}

impl Display for NodeStatus {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        match self {
            NodeStatus::Available => write!(f, "Available"),
            NodeStatus::Terminating => write!(f, "Terminating"),
        }
    }
}

impl std::str::FromStr for NodeStatus {
    type Err = DistError;

    fn from_str(s: &str) -> Result<Self, Self::Err> {
        match s {
            "Available" => Ok(NodeStatus::Available),
            "Terminating" => Ok(NodeStatus::Terminating),
            _ => Err(DistError::internal(format!("Unknown NodeStatus: {s}"))),
        }
    }
}

#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct NodeState {
    pub status: NodeStatus,
    pub total_memory: u64,
    pub used_memory: u64,
    pub free_memory: u64,
    pub available_memory: u64,
    pub global_cpu_usage: f32,
    pub num_running_tasks: u32,
}