scirs2-io 0.4.2

Input/Output utilities module for SciRS2 (scirs2-io)
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
//! Auto-generated module
//!
//! 🤖 Generated with [SplitRS](https://github.com/cool-japan/splitrs)

use crate::error::{IoError, Result};
use crate::metadata::{Metadata, MetadataValue};
use chrono::{DateTime, Datelike, Duration, Utc};
use serde::{Deserialize, Serialize};
use std::collections::{HashMap, HashSet};
use std::path::PathBuf;
use std::sync::{Arc, Mutex};

/// Schedule configuration for periodic execution
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ScheduleConfig {
    /// Cron expression for scheduling (e.g., "0 0 * * *")
    pub cron: Option<String>,
    /// Fixed interval between executions
    pub interval: Option<Duration>,
    /// Earliest time to start execution
    pub start_time: Option<DateTime<Utc>>,
    /// Latest time to stop execution
    pub end_time: Option<DateTime<Utc>>,
}
/// Retry policy for failed tasks
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct RetryPolicy {
    /// Maximum number of retry attempts
    pub max_retries: usize,
    /// Initial backoff delay in seconds
    pub backoff_seconds: u64,
    /// Whether to use exponential backoff
    pub exponential_backoff: bool,
}
/// Workflow builder
pub struct WorkflowBuilder {
    workflow: Workflow,
}
impl WorkflowBuilder {
    /// Create a new workflow builder
    pub fn new(id: impl Into<String>, name: impl Into<String>) -> Self {
        Self {
            workflow: Workflow {
                id: id.into(),
                name: name.into(),
                description: None,
                tasks: Vec::new(),
                dependencies: HashMap::new(),
                config: WorkflowConfig::default(),
                metadata: Metadata::new(),
            },
        }
    }
    /// Set description
    pub fn description(mut self, desc: impl Into<String>) -> Self {
        self.workflow.description = Some(desc.into());
        self
    }
    /// Add a task
    pub fn add_task(mut self, task: Task) -> Self {
        self.workflow.tasks.push(task);
        self
    }
    /// Add a dependency
    pub fn add_dependency(
        mut self,
        task_id: impl Into<String>,
        depends_on: impl Into<String>,
    ) -> Self {
        let task_id = task_id.into();
        let depends_on = depends_on.into();
        self.workflow
            .dependencies
            .entry(task_id)
            .or_default()
            .push(depends_on);
        self
    }
    /// Configure workflow
    pub fn configure<F>(mut self, f: F) -> Self
    where
        F: FnOnce(&mut WorkflowConfig),
    {
        f(&mut self.workflow.config);
        self
    }
    /// Build the workflow
    pub fn build(self) -> Result<Workflow> {
        self.validate()?;
        Ok(self.workflow)
    }
    fn validate(&self) -> Result<()> {
        if self.has_cycles() {
            return Err(IoError::ValidationError(
                "Workflow contains dependency cycles".to_string(),
            ));
        }
        let mut ids = HashSet::new();
        for task in &self.workflow.tasks {
            if !ids.insert(&task.id) {
                return Err(IoError::ValidationError(format!(
                    "Duplicate task ID: {}",
                    task.id
                )));
            }
        }
        for (task_id, deps) in &self.workflow.dependencies {
            if !ids.contains(&task_id) {
                return Err(IoError::ValidationError(format!(
                    "Unknown task in dependencies: {}",
                    task_id
                )));
            }
            for dep in deps {
                if !ids.contains(&dep) {
                    return Err(IoError::ValidationError(format!(
                        "Unknown dependency: {}",
                        dep
                    )));
                }
            }
        }
        Ok(())
    }
    fn has_cycles(&self) -> bool {
        let mut visited = HashSet::new();
        let mut rec_stack = HashSet::new();
        for task in &self.workflow.tasks {
            if !visited.contains(&task.id)
                && self.has_cycle_dfs(&task.id, &mut visited, &mut rec_stack)
            {
                return true;
            }
        }
        false
    }
    fn has_cycle_dfs(
        &self,
        node: &str,
        visited: &mut HashSet<String>,
        rec_stack: &mut HashSet<String>,
    ) -> bool {
        visited.insert(node.to_string());
        rec_stack.insert(node.to_string());
        if let Some(deps) = self.workflow.dependencies.get(node) {
            for dep in deps {
                if !visited.contains(dep) {
                    if self.has_cycle_dfs(dep, visited, rec_stack) {
                        return true;
                    }
                } else if rec_stack.contains(dep) {
                    return true;
                }
            }
        }
        rec_stack.remove(node);
        false
    }
}
/// Workflow execution state
#[derive(Debug, Clone)]
pub struct WorkflowState {
    /// Workflow identifier
    pub workflowid: String,
    /// Unique execution identifier
    pub executionid: String,
    /// Current workflow status
    pub status: WorkflowStatus,
    /// State of each task in the workflow
    pub task_states: HashMap<String, TaskState>,
    /// When the workflow started
    pub start_time: Option<DateTime<Utc>>,
    /// When the workflow completed
    pub end_time: Option<DateTime<Utc>>,
    /// Error message if workflow failed
    pub error: Option<String>,
}
/// Workflow configuration
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct WorkflowConfig {
    /// Maximum number of tasks to run in parallel
    pub max_parallel_tasks: usize,
    /// Retry policy for failed tasks
    pub retry_policy: RetryPolicy,
    /// Maximum workflow execution time
    pub timeout: Option<Duration>,
    /// Directory for workflow checkpoints
    pub checkpoint_dir: Option<PathBuf>,
    /// Notification settings
    pub notifications: NotificationConfig,
    /// Scheduling configuration for periodic execution
    pub scheduling: Option<ScheduleConfig>,
}
/// Runtime state of a task execution
#[derive(Debug, Clone)]
pub struct TaskState {
    /// Current execution status
    pub status: TaskStatus,
    /// When the task started executing
    pub start_time: Option<DateTime<Utc>>,
    /// When the task finished executing
    pub end_time: Option<DateTime<Utc>>,
    /// Number of execution attempts
    pub attempts: usize,
    /// Error message if task failed
    pub error: Option<String>,
    /// Task outputs as key-value pairs
    pub outputs: HashMap<String, serde_json::Value>,
}
/// Resource requirements for tasks
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
pub struct ResourceRequirements {
    /// Number of CPU cores required
    pub cpu_cores: Option<usize>,
    /// Memory requirement in GB
    pub memorygb: Option<f64>,
    /// GPU requirements if needed
    pub gpu: Option<GpuRequirement>,
    /// Disk space requirement in GB
    pub disk_space_gb: Option<f64>,
}
/// Notification delivery channels
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum NotificationChannel {
    /// Send email notifications
    Email {
        /// Email addresses to notify
        to: Vec<String>,
    },
    /// Send webhook notifications
    Webhook {
        /// Webhook URL
        url: String,
    },
    /// Write notifications to file
    File {
        /// File path for notifications
        path: PathBuf,
    },
}
/// Workflow definition
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Workflow {
    /// Unique workflow identifier
    pub id: String,
    /// Human-readable workflow name
    pub name: String,
    /// Optional workflow description
    pub description: Option<String>,
    /// List of tasks in the workflow
    pub tasks: Vec<Task>,
    /// Task dependencies (task_id -> list of prerequisite task_ids)
    pub dependencies: HashMap<String, Vec<String>>,
    /// Workflow configuration
    pub config: WorkflowConfig,
    /// Workflow metadata
    pub metadata: Metadata,
}
/// Notification configuration
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct NotificationConfig {
    /// Send notification on successful completion
    pub on_success: bool,
    /// Send notification on failure
    pub on_failure: bool,
    /// Send notification when workflow starts
    pub on_start: bool,
    /// List of notification channels to use
    pub channels: Vec<NotificationChannel>,
}
/// Workflow executor
pub struct WorkflowExecutor {
    config: ExecutorConfig,
    state: Arc<Mutex<HashMap<String, WorkflowState>>>,
}
impl WorkflowExecutor {
    /// Create a new workflow executor
    pub fn new(config: ExecutorConfig) -> Self {
        Self {
            config,
            state: Arc::new(Mutex::new(HashMap::new())),
        }
    }
    /// Execute a workflow
    pub fn execute(&self, workflow: &Workflow) -> Result<String> {
        let executionid = format!("{}-{}", workflow.id, Utc::now().timestamp());
        let mut state = WorkflowState {
            workflowid: workflow.id.clone(),
            executionid: executionid.clone(),
            status: WorkflowStatus::Pending,
            task_states: HashMap::new(),
            start_time: None,
            end_time: None,
            error: None,
        };
        for task in &workflow.tasks {
            state.task_states.insert(
                task.id.clone(),
                TaskState {
                    status: TaskStatus::Pending,
                    start_time: None,
                    end_time: None,
                    attempts: 0,
                    error: None,
                    outputs: HashMap::new(),
                },
            );
        }
        self.state
            .lock()
            .expect("Operation failed")
            .insert(executionid.clone(), state);
        self.executeworkflow_internal(workflow.clone(), executionid.clone())?;
        Ok(executionid)
    }
    /// Internal workflow execution logic
    fn executeworkflow_internal(&self, workflow: Workflow, executionid: String) -> Result<()> {
        {
            let mut states = self.state.lock().expect("Operation failed");
            if let Some(state) = states.get_mut(&executionid) {
                state.status = WorkflowStatus::Running;
                state.start_time = Some(Utc::now());
            }
        }
        let execution_result = self.execute_tasks_in_order(&workflow, &executionid);
        {
            let mut states = self.state.lock().expect("Operation failed");
            if let Some(state) = states.get_mut(&executionid) {
                state.end_time = Some(Utc::now());
                match execution_result {
                    Ok(_) => state.status = WorkflowStatus::Success,
                    Err(ref e) => {
                        state.status = WorkflowStatus::Failed;
                        state.error = Some(e.to_string());
                    }
                }
            }
        }
        execution_result
    }
    /// Execute tasks in dependency order
    fn execute_tasks_in_order(&self, workflow: &Workflow, executionid: &str) -> Result<()> {
        let mut executed_tasks = HashSet::new();
        let mut remaining_tasks: HashSet<String> =
            workflow.tasks.iter().map(|t| t.id.clone()).collect();
        while !remaining_tasks.is_empty() {
            let mut tasks_to_execute = Vec::new();
            for task_id in &remaining_tasks {
                let can_execute = workflow
                    .dependencies
                    .get(task_id as &String)
                    .is_none_or(|deps| deps.iter().all(|dep| executed_tasks.contains(dep)));
                if can_execute {
                    tasks_to_execute.push(task_id.clone());
                }
            }
            if tasks_to_execute.is_empty() {
                return Err(IoError::Other(
                    "Circular dependency or unresolvable dependencies".to_string(),
                ));
            }
            let batch_size = workflow
                .config
                .max_parallel_tasks
                .min(tasks_to_execute.len());
            for batch in tasks_to_execute.chunks(batch_size) {
                for task_id in batch {
                    let task = workflow
                        .tasks
                        .iter()
                        .find(|t| &t.id == task_id)
                        .ok_or_else(|| IoError::Other(format!("Task not found: {task_id}")))?;
                    self.execute_single_task(task, executionid)?;
                    executed_tasks.insert(task_id.clone());
                    remaining_tasks.remove(task_id);
                }
            }
        }
        Ok(())
    }
    /// Execute a single task with retry logic
    fn execute_single_task(&self, task: &Task, executionid: &str) -> Result<()> {
        let mut attempt = 0;
        let max_retries = 3;
        loop {
            attempt += 1;
            {
                let mut states = self.state.lock().expect("Operation failed");
                if let Some(state) = states.get_mut(executionid) {
                    if let Some(task_state) = state.task_states.get_mut(&task.id) {
                        task_state.status = if attempt == 1 {
                            TaskStatus::Running
                        } else {
                            TaskStatus::Retrying
                        };
                        task_state.start_time = Some(Utc::now());
                        task_state.attempts = attempt;
                    }
                }
            }
            let result = self.execute_task_by_type(task);
            {
                let mut states = self.state.lock().expect("Operation failed");
                if let Some(state) = states.get_mut(executionid) {
                    if let Some(task_state) = state.task_states.get_mut(&task.id) {
                        task_state.end_time = Some(Utc::now());
                        match result {
                            Ok(outputs) => {
                                task_state.status = TaskStatus::Success;
                                task_state.outputs = outputs;
                                task_state.error = None;
                                return Ok(());
                            }
                            Err(e) => {
                                if attempt >= max_retries {
                                    task_state.status = TaskStatus::Failed;
                                    task_state.error = Some(e.to_string());
                                    return Err(e);
                                } else {
                                    task_state.error = Some(format!("Attempt {attempt}: {e}"));
                                }
                            }
                        }
                    }
                }
            }
            if attempt < max_retries {
                std::thread::sleep(std::time::Duration::from_secs(1 << (attempt - 1)));
            }
        }
    }
    /// Execute task based on its type
    fn execute_task_by_type(&self, task: &Task) -> Result<HashMap<String, serde_json::Value>> {
        let mut outputs = HashMap::new();
        match task.task_type {
            TaskType::DataIngestion => {
                outputs.insert("status".to_string(), serde_json::json!("completed"));
                outputs.insert("records_processed".to_string(), serde_json::json!(1000));
            }
            TaskType::Transform => {
                outputs.insert("status".to_string(), serde_json::json!("completed"));
                outputs.insert("rows_transformed".to_string(), serde_json::json!(1000));
            }
            TaskType::Validation => {
                outputs.insert("status".to_string(), serde_json::json!("completed"));
                outputs.insert("validation_errors".to_string(), serde_json::json!(0));
            }
            TaskType::MLTraining => {
                outputs.insert("status".to_string(), serde_json::json!("completed"));
                outputs.insert("model_accuracy".to_string(), serde_json::json!(0.95));
            }
            TaskType::MLInference => {
                outputs.insert("status".to_string(), serde_json::json!("completed"));
                outputs.insert("predictions_generated".to_string(), serde_json::json!(500));
            }
            TaskType::Export => {
                outputs.insert("status".to_string(), serde_json::json!("completed"));
                outputs.insert("files_written".to_string(), serde_json::json!(1));
            }
            TaskType::Script => {
                outputs.insert("status".to_string(), serde_json::json!("completed"));
                outputs.insert("exit_code".to_string(), serde_json::json!(0));
            }
            TaskType::SubWorkflow => {
                outputs.insert("status".to_string(), serde_json::json!("completed"));
                outputs.insert(
                    "subworkflowid".to_string(),
                    serde_json::json!(format!("sub-{}", task.id)),
                );
            }
            TaskType::Conditional => {
                let condition_met = true;
                outputs.insert(
                    "condition_met".to_string(),
                    serde_json::json!(condition_met),
                );
                outputs.insert("status".to_string(), serde_json::json!("completed"));
            }
            TaskType::Parallel => {
                outputs.insert("status".to_string(), serde_json::json!("completed"));
                outputs.insert("parallel_tasks_completed".to_string(), serde_json::json!(4));
            }
        }
        outputs.insert("execution_time_ms".to_string(), serde_json::json!(100));
        outputs.insert(
            "execution_timestamp".to_string(),
            serde_json::json!(Utc::now().to_rfc3339()),
        );
        Ok(outputs)
    }
    /// Get workflow state
    pub fn get_state(&self, executionid: &str) -> Option<WorkflowState> {
        self.state
            .lock()
            .expect("Operation failed")
            .get(executionid)
            .cloned()
    }
    /// Cancel a workflow execution
    pub fn cancel(&self, executionid: &str) -> Result<()> {
        let mut states = self.state.lock().expect("Operation failed");
        if let Some(state) = states.get_mut(executionid) {
            state.status = WorkflowStatus::Cancelled;
            state.end_time = Some(Utc::now());
            Ok(())
        } else {
            Err(IoError::Other(format!("Execution {executionid} not found")))
        }
    }
}
/// Configuration for the workflow executor
#[derive(Debug, Clone)]
pub struct ExecutorConfig {
    /// Maximum number of workflows that can run concurrently
    pub max_concurrentworkflows: usize,
    /// Maximum time allowed for a single task execution
    pub task_timeout: Duration,
    /// Interval between workflow state checkpoints
    pub checkpoint_interval: Duration,
}
/// GPU resource requirements for workflow execution
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct GpuRequirement {
    /// Number of GPUs required
    pub count: usize,
    /// Memory requirement in GB per GPU
    pub memorygb: Option<f64>,
    /// Required CUDA compute capability (e.g., "7.5")
    pub compute_capability: Option<String>,
}
/// Task types
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
#[serde(rename_all = "snake_case")]
pub enum TaskType {
    /// Data ingestion from files or databases
    DataIngestion,
    /// Data transformation using pipeline
    Transform,
    /// Data validation
    Validation,
    /// Machine learning training
    MLTraining,
    /// Model inference
    MLInference,
    /// Data export
    Export,
    /// Custom script execution
    Script,
    /// Sub-workflow execution
    SubWorkflow,
    /// Conditional execution
    Conditional,
    /// Parallel execution
    Parallel,
}
/// Workflow execution status
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
pub enum WorkflowStatus {
    /// Workflow is waiting to start
    Pending,
    /// Workflow is currently executing
    Running,
    /// Workflow completed successfully
    Success,
    /// Workflow failed with errors
    Failed,
    /// Workflow was cancelled by user
    Cancelled,
    /// Workflow is temporarily paused
    Paused,
}
/// Task definition
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Task {
    /// Unique task identifier
    pub id: String,
    /// Human-readable task name
    pub name: String,
    /// Type of task to execute
    pub task_type: TaskType,
    /// Task-specific configuration parameters
    pub config: serde_json::Value,
    /// Input data identifiers
    pub inputs: Vec<String>,
    /// Output data identifiers
    pub outputs: Vec<String>,
    /// Resource requirements for this task
    pub resources: ResourceRequirements,
}
/// Task execution status
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum TaskStatus {
    /// Task is waiting to execute
    Pending,
    /// Task is currently running
    Running,
    /// Task completed successfully
    Success,
    /// Task failed with errors
    Failed,
    /// Task was skipped due to conditions
    Skipped,
    /// Task is being retried after failure
    Retrying,
}