use serde::{Deserialize, Serialize};
use crate::models::{
ComputeNodeModel, EventModel, FailureHandlerModel, FileModel, JobModel, LocalSchedulerModel,
ResourceRequirementsModel, ResultModel, RoCrateEntityModel, SlurmSchedulerModel, UserDataModel,
WorkflowActionModel, WorkflowModel,
};
pub const EXPORT_VERSION: &str = "1.0";
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct WorkflowExport {
pub export_version: String,
pub exported_at: String,
pub workflow: WorkflowModel,
pub files: Vec<FileModel>,
pub user_data: Vec<UserDataModel>,
pub resource_requirements: Vec<ResourceRequirementsModel>,
pub slurm_schedulers: Vec<SlurmSchedulerModel>,
pub local_schedulers: Vec<LocalSchedulerModel>,
#[serde(default)]
pub failure_handlers: Vec<FailureHandlerModel>,
#[serde(default)]
pub ro_crate_entities: Vec<RoCrateEntityModel>,
pub jobs: Vec<JobModel>,
pub workflow_actions: Vec<WorkflowActionModel>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub compute_nodes: Option<Vec<ComputeNodeModel>>,
#[serde(skip_serializing_if = "Option::is_none")]
pub results: Option<Vec<ResultModel>>,
#[serde(skip_serializing_if = "Option::is_none")]
pub events: Option<Vec<EventModel>>,
}
impl WorkflowExport {
pub fn new(workflow: WorkflowModel) -> Self {
Self {
export_version: EXPORT_VERSION.to_string(),
exported_at: chrono::Utc::now().to_rfc3339(),
workflow,
files: Vec::new(),
user_data: Vec::new(),
resource_requirements: Vec::new(),
slurm_schedulers: Vec::new(),
local_schedulers: Vec::new(),
failure_handlers: Vec::new(),
ro_crate_entities: Vec::new(),
jobs: Vec::new(),
workflow_actions: Vec::new(),
compute_nodes: None,
results: None,
events: None,
}
}
}
#[derive(Debug, Clone, Default)]
pub struct ExportImportStats {
pub jobs: usize,
pub files: usize,
pub user_data: usize,
pub resource_requirements: usize,
pub slurm_schedulers: usize,
pub local_schedulers: usize,
pub failure_handlers: usize,
pub ro_crate_entities: usize,
pub workflow_actions: usize,
pub compute_nodes: usize,
pub results: usize,
pub events: usize,
}
impl ExportImportStats {
pub fn from_export(export: &WorkflowExport) -> Self {
Self {
jobs: export.jobs.len(),
files: export.files.len(),
user_data: export.user_data.len(),
resource_requirements: export.resource_requirements.len(),
slurm_schedulers: export.slurm_schedulers.len(),
local_schedulers: export.local_schedulers.len(),
failure_handlers: export.failure_handlers.len(),
ro_crate_entities: export.ro_crate_entities.len(),
workflow_actions: export.workflow_actions.len(),
compute_nodes: export.compute_nodes.as_ref().map(|c| c.len()).unwrap_or(0),
results: export.results.as_ref().map(|r| r.len()).unwrap_or(0),
events: export.events.as_ref().map(|e| e.len()).unwrap_or(0),
}
}
}
use std::collections::HashMap;
#[derive(Debug, Default)]
pub struct IdMappings {
pub files: HashMap<i64, i64>,
pub user_data: HashMap<i64, i64>,
pub resource_requirements: HashMap<i64, i64>,
pub slurm_schedulers: HashMap<i64, i64>,
pub local_schedulers: HashMap<i64, i64>,
pub failure_handlers: HashMap<i64, i64>,
pub jobs: HashMap<i64, i64>,
pub compute_nodes: HashMap<i64, i64>,
}
impl IdMappings {
pub fn new() -> Self {
Self::default()
}
pub fn remap_file_id(&self, old_id: i64) -> Option<i64> {
self.files.get(&old_id).copied()
}
pub fn remap_user_data_id(&self, old_id: i64) -> Option<i64> {
self.user_data.get(&old_id).copied()
}
pub fn remap_resource_requirements_id(&self, old_id: i64) -> Option<i64> {
self.resource_requirements.get(&old_id).copied()
}
pub fn remap_scheduler_id(&self, old_id: i64) -> Option<i64> {
self.slurm_schedulers
.get(&old_id)
.or_else(|| self.local_schedulers.get(&old_id))
.copied()
}
pub fn remap_failure_handler_id(&self, old_id: i64) -> Option<i64> {
self.failure_handlers.get(&old_id).copied()
}
pub fn remap_job_id(&self, old_id: i64) -> Option<i64> {
self.jobs.get(&old_id).copied()
}
pub fn remap_compute_node_id(&self, old_id: i64) -> Option<i64> {
self.compute_nodes.get(&old_id).copied()
}
pub fn remap_file_ids(&self, old_ids: &[i64]) -> Vec<i64> {
old_ids
.iter()
.filter_map(|id| self.remap_file_id(*id))
.collect()
}
pub fn remap_user_data_ids(&self, old_ids: &[i64]) -> Vec<i64> {
old_ids
.iter()
.filter_map(|id| self.remap_user_data_id(*id))
.collect()
}
pub fn remap_job_ids(&self, old_ids: &[i64]) -> Vec<i64> {
old_ids
.iter()
.filter_map(|id| self.remap_job_id(*id))
.collect()
}
pub fn remap_ro_crate_job_ids(&self, entity_id: &str, metadata: &str) -> (String, String) {
let job_id_pattern = regex::Regex::new(r"#job-(\d+)-attempt-(\d+)").unwrap();
let new_entity_id = job_id_pattern
.replace_all(entity_id, |caps: ®ex::Captures| {
let old_job_id: i64 = caps[1].parse().unwrap_or(0);
let attempt_id = &caps[2];
if let Some(new_job_id) = self.remap_job_id(old_job_id) {
format!("#job-{}-attempt-{}", new_job_id, attempt_id)
} else {
caps[0].to_string()
}
})
.to_string();
let new_metadata = job_id_pattern
.replace_all(metadata, |caps: ®ex::Captures| {
let old_job_id: i64 = caps[1].parse().unwrap_or(0);
let attempt_id = &caps[2];
if let Some(new_job_id) = self.remap_job_id(old_job_id) {
format!("#job-{}-attempt-{}", new_job_id, attempt_id)
} else {
caps[0].to_string()
}
})
.to_string();
(new_entity_id, new_metadata)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_remap_ro_crate_job_ids_entity_id() {
let mut mappings = IdMappings::new();
mappings.jobs.insert(42, 99);
mappings.jobs.insert(43, 100);
let (new_entity_id, _) = mappings.remap_ro_crate_job_ids("#job-42-attempt-1", "{}");
assert_eq!(new_entity_id, "#job-99-attempt-1");
let (new_entity_id, _) = mappings.remap_ro_crate_job_ids("#job-43-attempt-3", "{}");
assert_eq!(new_entity_id, "#job-100-attempt-3");
}
#[test]
fn test_remap_ro_crate_job_ids_metadata() {
let mut mappings = IdMappings::new();
mappings.jobs.insert(42, 99);
let metadata = r##"{"@id": "output.csv", "wasGeneratedBy": {"@id": "#job-42-attempt-1"}}"##;
let (_, new_metadata) = mappings.remap_ro_crate_job_ids("output.csv", metadata);
assert!(new_metadata.contains("#job-99-attempt-1"));
assert!(!new_metadata.contains("#job-42-attempt-1"));
}
#[test]
fn test_remap_ro_crate_job_ids_no_mapping() {
let mappings = IdMappings::new();
let (new_entity_id, _) = mappings.remap_ro_crate_job_ids("#job-42-attempt-1", "{}");
assert_eq!(new_entity_id, "#job-42-attempt-1");
}
#[test]
fn test_remap_ro_crate_job_ids_non_job_entity() {
let mut mappings = IdMappings::new();
mappings.jobs.insert(42, 99);
let (new_entity_id, _) =
mappings.remap_ro_crate_job_ids("data/output.csv", r#"{"@id": "data/output.csv"}"#);
assert_eq!(new_entity_id, "data/output.csv");
}
}