Skip to main content

scirs2_core/distributed/
mod.rs

1//! Production-grade distributed computing infrastructure
2//!
3//! This module provides comprehensive distributed computing capabilities
4//! for SciRS2 Core 1.0, including distributed arrays, cluster management,
5//! fault tolerance, and scalable computation orchestration.
6//!
7//! ## Distributed Primitives
8//!
9//! Low-level local primitives (work queues, worker pools, map-reduce) are
10//! provided in [`primitives`].  These complement the cluster-management
11//! machinery with simple, channel-based concurrency helpers.
12
13pub mod array;
14pub mod cluster;
15pub mod communication;
16pub mod fault_tolerance;
17pub mod load_balancing;
18pub mod lock_free;
19pub mod orchestration;
20pub mod par_iter;
21pub mod parallel_scan;
22pub mod primitives;
23pub mod scheduler;
24pub mod task_graph;
25
26// Array operations
27pub use array::{DistributedArray, DistributedArrayManager};
28
29// Cluster management
30pub use cluster::{
31    initialize_cluster_manager, BackoffStrategy, ClusterConfiguration, ClusterEventLog,
32    ClusterHealth, ClusterManager, ClusterState, ComputeCapacity, DistributedTask,
33    NodeCapabilities, NodeInfo as ClusterNodeInfo, NodeMetadata, NodeStatus, NodeType,
34    ResourceRequirements, RetryPolicy, TaskId, TaskParameters, TaskPriority as ClusterTaskPriority,
35    TaskType,
36};
37
38// Communication
39pub use communication::{
40    CommunicationEndpoint, CommunicationManager, DistributedMessage, HeartbeatHandler,
41    MessageHandler,
42};
43
44// Fault tolerance
45pub use fault_tolerance::{
46    initialize_fault_tolerance, ClusterHealthSummary, FaultDetectionStrategy,
47    FaultToleranceManager, NodeHealth as FaultNodeHealth, NodeInfo as FaultNodeInfo,
48    RecoveryStrategy,
49};
50
51// Load balancing
52pub use load_balancing::{
53    LoadBalancer as DistributedLoadBalancer, LoadBalancingStats, LoadBalancingStrategy,
54    NodeLoad as LoadBalancerNodeLoad, TaskAssignment as LoadBalancerTaskAssignment,
55};
56
57// Orchestration
58pub use orchestration::{
59    OrchestrationEngine, OrchestrationStats, OrchestratorNode, Task as OrchestrationTask,
60    TaskPriority as OrchestrationTaskPriority, TaskStatus as OrchestrationTaskStatus, Workflow,
61    WorkflowStatus,
62};
63
64// Scheduler
65pub use scheduler::{
66    initialize_distributed_scheduler, CompletedTask, DistributedScheduler, ExecutionTracker,
67    FailedTask, LoadBalancer as SchedulerLoadBalancer,
68    LoadBalancingStrategy as SchedulerLoadBalancingStrategy, NodeLoad as SchedulerNodeLoad,
69    SchedulingAlgorithm, SchedulingPolicies, TaskAssignment as SchedulerTaskAssignment, TaskQueue,
70};
71
72// Distributed primitives (work queue, worker pool, map-reduce, resource monitor)
73pub use primitives::{
74    chunked_parallel_process, distributed_map, distributed_map_reduce, try_distributed_map,
75    try_distributed_map_reduce, DistributedError, DistributedSliceExt, ResourceMonitor, WorkQueue,
76    WorkReceiver, WorkerPool,
77};
78
79// Lock-free data structures
80pub use lock_free::{LockFreeCounter, LockFreeQueue, LockFreeStack};
81
82// Task graph executor
83pub use task_graph::TaskGraph;
84
85// Parallel scan / prefix sum
86pub use parallel_scan::{
87    parallel_prefix_max, parallel_prefix_min, parallel_prefix_sum, parallel_prefix_sum_exclusive,
88    parallel_prefix_sum_f64, parallel_prefix_sum_i64, parallel_scan, parallel_scan_exclusive,
89    segmented_prefix_sum, try_parallel_prefix_sum, try_parallel_scan,
90};
91
92// Parallel iterator combinators
93pub use par_iter::{
94    par_all, par_any, par_filter, par_filter_map, par_fold, par_for_each, par_map, par_sort,
95    par_sort_by, try_par_fold, try_par_map,
96};
97
98/// Initialize distributed computing infrastructure
99#[allow(dead_code)]
100pub fn initialize_distributed_computing() -> crate::error::CoreResult<()> {
101    cluster::initialize_cluster_manager()?;
102    scheduler::initialize_distributed_scheduler()?;
103    fault_tolerance::initialize_fault_tolerance()?;
104    Ok(())
105}
106
107/// Get distributed system status
108#[allow(dead_code)]
109pub fn get_distributed_status() -> crate::error::CoreResult<DistributedSystemStatus> {
110    let cluster_manager = cluster::ClusterManager::global()?;
111    let scheduler = scheduler::DistributedScheduler::global()?;
112
113    Ok(DistributedSystemStatus {
114        cluster_health: cluster_manager.get_health()?,
115        active_nodes: cluster_manager.get_active_nodes()?.len(),
116        pending_tasks: scheduler.get_pending_task_count()?,
117        total_capacity: cluster_manager.get_total_capacity()?,
118    })
119}
120
121#[derive(Debug, Clone)]
122pub struct DistributedSystemStatus {
123    pub cluster_health: ClusterHealth,
124    pub active_nodes: usize,
125    pub pending_tasks: usize,
126    pub total_capacity: ComputeCapacity,
127}