use async_trait::async_trait;
use chrono::{DateTime, Utc};
use serde::{Deserialize, Serialize};
use uuid::Uuid;
use crate::error::LatticeError;
use crate::types::{
AllocId, Allocation, AllocationState, GroupId, ImageMetadata, ImageRef, ImageType, Node,
NodeId, NodeOwnership, NodeState, SchedulerType, TenantId, UserId, VClusterId,
};
#[derive(Debug, Clone, thiserror::Error)]
pub enum ImageResolveError {
#[error("image not found: {spec}")]
NotFound { spec: String },
#[error("registry unavailable: {url}: {reason}")]
RegistryUnavailable { url: String, reason: String },
#[error("invalid image spec: {spec}: {reason}")]
InvalidSpec { spec: String, reason: String },
#[error("view not found in image {image}: {view}")]
ViewNotFound { image: String, view: String },
#[error("EDF inheritance cycle: {chain:?}")]
InheritanceCycle { chain: Vec<String> },
#[error("EDF inheritance depth exceeded: {depth}/{max}")]
InheritanceDepthExceeded { depth: usize, max: usize },
#[error("image signature invalid: {image}")]
SignatureInvalid { image: String },
#[error("vulnerability scan required: {image}")]
ScanRequired { image: String },
}
#[async_trait]
pub trait ImageResolver: Send + Sync {
async fn resolve(
&self,
spec: &str,
image_type: ImageType,
) -> Result<Option<ImageRef>, ImageResolveError>;
async fn metadata(&self, image: &ImageRef) -> Result<ImageMetadata, ImageResolveError>;
}
#[async_trait]
pub trait StorageService: Send + Sync {
async fn data_readiness(&self, source: &str) -> Result<f64, LatticeError>;
async fn stage_data(&self, source: &str, target: &str) -> Result<(), LatticeError>;
async fn set_qos(&self, path: &str, floor_gbps: f64) -> Result<(), LatticeError>;
async fn wipe_data(&self, path: &str) -> Result<(), LatticeError>;
}
#[async_trait]
pub trait InfrastructureService: Send + Sync {
async fn boot_node(&self, node_id: &NodeId, image: &str) -> Result<(), LatticeError>;
async fn wipe_node(&self, node_id: &NodeId) -> Result<(), LatticeError>;
async fn query_node_health(&self, node_id: &NodeId) -> Result<NodeHealthReport, LatticeError>;
}
#[async_trait]
pub trait AccountingService: Send + Sync {
async fn report_start(&self, allocation: &Allocation) -> Result<(), LatticeError>;
async fn report_completion(&self, allocation: &Allocation) -> Result<(), LatticeError>;
async fn remaining_budget(&self, tenant: &TenantId) -> Result<Option<f64>, LatticeError>;
}
#[async_trait]
pub trait NodeRegistry: Send + Sync {
async fn get_node(&self, id: &NodeId) -> Result<Node, LatticeError>;
async fn list_nodes(&self, filter: &NodeFilter) -> Result<Vec<Node>, LatticeError>;
async fn update_node_state(&self, id: &NodeId, state: NodeState) -> Result<(), LatticeError>;
async fn claim_node(&self, id: &NodeId, ownership: NodeOwnership) -> Result<(), LatticeError>;
async fn release_node(&self, id: &NodeId) -> Result<(), LatticeError>;
}
#[async_trait]
pub trait AllocationStore: Send + Sync {
async fn insert(&self, allocation: Allocation) -> Result<(), LatticeError>;
async fn get(&self, id: &AllocId) -> Result<Allocation, LatticeError>;
async fn update_state(&self, id: &AllocId, state: AllocationState) -> Result<(), LatticeError>;
async fn list(&self, filter: &AllocationFilter) -> Result<Vec<Allocation>, LatticeError>;
async fn count_running(&self, tenant: &TenantId) -> Result<u32, LatticeError>;
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct AuditArchiveInfo {
pub object_key: String,
pub entry_count: usize,
pub first_timestamp: DateTime<Utc>,
pub last_timestamp: DateTime<Utc>,
}
#[async_trait]
pub trait AuditLog: Send + Sync {
async fn record(&self, entry: AuditEntry) -> Result<(), LatticeError>;
async fn query(&self, filter: &AuditFilter) -> Result<Vec<AuditEntry>, LatticeError>;
async fn archive_info(&self) -> Result<Vec<AuditArchiveInfo>, LatticeError>;
async fn total_entry_count(&self) -> Result<usize, LatticeError>;
}
#[async_trait]
pub trait VClusterScheduler: Send + Sync {
async fn schedule(
&self,
pending: &[Allocation],
nodes: &[Node],
) -> Result<Vec<Placement>, LatticeError>;
fn scheduler_type(&self) -> SchedulerType;
}
#[async_trait]
pub trait CheckpointBroker: Send + Sync {
async fn should_checkpoint(&self, allocation: &Allocation) -> Result<bool, LatticeError>;
async fn initiate_checkpoint(&self, id: &AllocId) -> Result<(), LatticeError>;
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct NodeHealthReport {
pub healthy: bool,
pub issues: Vec<String>,
}
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct NodeFilter {
pub state: Option<NodeState>,
pub group: Option<GroupId>,
pub tenant: Option<TenantId>,
}
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct AllocationFilter {
pub user: Option<UserId>,
pub tenant: Option<TenantId>,
pub state: Option<AllocationState>,
pub vcluster: Option<VClusterId>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct AuditEntry {
pub event: hpc_audit::AuditEvent,
#[serde(default)]
pub previous_hash: String,
#[serde(default)]
pub signature: String,
}
impl AuditEntry {
pub fn new(event: hpc_audit::AuditEvent) -> Self {
Self {
event,
previous_hash: String::new(),
signature: String::new(),
}
}
pub fn action(&self) -> &str {
&self.event.action
}
pub fn principal_identity(&self) -> &str {
&self.event.principal.identity
}
}
pub mod audit_actions {
pub use hpc_audit::actions::*;
pub const NODE_CLAIM: &str = "lattice.node.claim";
pub const NODE_RELEASE: &str = "lattice.node.release";
pub const ALLOCATION_COMPLETE: &str = "lattice.allocation.complete";
pub const ALLOCATION_FAILED: &str = "lattice.allocation.failed";
pub const ALLOCATION_CANCELLED: &str = "lattice.allocation.cancelled";
pub const ALLOCATION_REQUEUED: &str = "lattice.allocation.requeued";
pub const ALLOCATION_SUSPENDED: &str = "lattice.allocation.suspended";
pub const DATA_ACCESS: &str = "lattice.data.access";
pub const ATTACH_SESSION: &str = "lattice.attach.session";
pub const LOG_ACCESS: &str = "lattice.log.access";
pub const METRICS_QUERY: &str = "lattice.metrics.query";
pub const WIPE_STARTED: &str = "lattice.wipe.started";
pub const WIPE_COMPLETED: &str = "lattice.wipe.completed";
pub const WIPE_FAILED: &str = "lattice.wipe.failed";
pub const PROPOSAL_COMMITTED: &str = "lattice.scheduling.proposal_committed";
pub const PROPOSAL_REJECTED: &str = "lattice.scheduling.proposal_rejected";
pub const PREEMPTION_INITIATED: &str = "lattice.scheduling.preemption_initiated";
pub const QUOTA_EXCEEDED: &str = "lattice.quota.exceeded";
pub const QUOTA_UPDATED: &str = "lattice.quota.updated";
pub const DAG_SUBMITTED: &str = "lattice.dag.submitted";
pub const DAG_COMPLETED: &str = "lattice.dag.completed";
pub const VNI_ASSIGNED: &str = "lattice.network.vni_assigned";
pub const VNI_RELEASED: &str = "lattice.network.vni_released";
}
pub fn lattice_audit_event(
action: &str,
principal_identity: &str,
scope: hpc_audit::AuditScope,
outcome: hpc_audit::AuditOutcome,
detail: &str,
metadata: serde_json::Value,
source: hpc_audit::AuditSource,
) -> hpc_audit::AuditEvent {
hpc_audit::AuditEvent {
id: Uuid::new_v4().to_string(),
timestamp: Utc::now(),
principal: hpc_audit::AuditPrincipal {
identity: principal_identity.to_string(),
principal_type: hpc_audit::PrincipalType::Human,
role: String::new(),
},
action: action.to_string(),
scope,
outcome,
detail: detail.to_string(),
metadata,
source,
}
}
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct AuditFilter {
pub principal: Option<String>,
pub allocation: Option<AllocId>,
pub action: Option<String>,
pub since: Option<DateTime<Utc>>,
pub until: Option<DateTime<Utc>>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Placement {
pub allocation_id: AllocId,
pub nodes: Vec<NodeId>,
}
#[derive(Debug, Clone)]
pub struct InvalidTransition {
pub from: String,
pub to: String,
}