Skip to main content

scirs2_cluster/serialization/
mod.rs

1//! Model serialization and deserialization
2//!
3//! This module provides comprehensive serialization capabilities for clustering models,
4//! including metadata enrichment, cross-platform compatibility, and workflow management.
5//!
6//! # Examples
7//!
8//! ## Basic Model Serialization
9//!
10//! ```rust
11//! use scirs2_cluster::serialization::{SerializableModel, EnhancedModel};
12//! use scirs2_core::ndarray::Array2;
13//!
14//! // Pretend we trained a KMeans and have centroids
15//! let centroids = Array2::from_shape_vec((3, 2), vec![0.0, 0.0, 1.0, 1.0, 2.0, 2.0]).expect("Operation failed");
16//! let model = scirs2_cluster::KMeansModel::new(centroids, 3, 10, 0.5, None);
17//!
18//! // Save model with enhanced metadata
19//! let enhanced_model = EnhancedModel::with_auto_metadata(model, "kmeans");
20//! enhanced_model.save_to_file("model.json").expect("Operation failed");
21//!
22//! // Load model back
23//! let loaded_model: EnhancedModel<scirs2_cluster::KMeansModel> = EnhancedModel::load_from_file("model.json").expect("Operation failed");
24//! ```
25//!
26//! ## Advanced Export with Multiple Formats
27//!
28//! ```rust
29//! use scirs2_cluster::serialization::{AdvancedExport, ExportFormat, ModelMetadata};
30//! use scirs2_core::ndarray::Array2;
31//! use scirs2_cluster::serialization::utils::create_default_metadata;
32//!
33//! // Export model in different formats
34//! let centroids = Array2::from_shape_vec((2, 2), vec![0.0, 0.0, 1.0, 1.0]).expect("Operation failed");
35//! let model = scirs2_cluster::KMeansModel::new(centroids, 2, 10, 0.1, None);
36//! let metadata = create_default_metadata("kmeans");
37//! let json_data = model.export_with_metadata(ExportFormat::Json, Some(metadata)).expect("Operation failed");
38//! // YAML export requires enabling the `yaml` feature
39//!
40//! // Export for compatibility with other libraries
41//! let sklearn_format = model.export_compatible("sklearn").expect("Operation failed");
42//! let pytorch_format = model.export_compatible("pytorch").expect("Operation failed");
43//! ```
44//!
45//! ## Workflow Management
46//!
47//! ```rust
48//! use scirs2_cluster::serialization::{ClusteringWorkflow, WorkflowConfig, TrainingStep};
49//!
50//! // Create and manage clustering workflows
51//! let config = WorkflowConfig::default();
52//! let mut workflow = ClusteringWorkflow::new("my_experiment".to_string(), config);
53//!
54//! // Add and execute a simple step
55//! workflow.add_step(TrainingStep {
56//!     name: "data_preprocessing".to_string(),
57//!     algorithm: "kmeans".to_string(),
58//!     parameters: Default::default(),
59//!     dependencies: vec![],
60//!     completed: false,
61//!     execution_time: None,
62//!     results: None,
63//! });
64//! workflow.execute().expect("Operation failed");
65//!
66//! // Save and resume workflow state
67//! // Save and load checkpoint using default path in config
68//! // Note: to actually create a checkpoint file, set `checkpoint_dir` in `WorkflowConfig`.
69//! ```
70
71pub mod compatibility;
72pub mod core;
73pub mod export;
74pub mod models;
75pub mod workflow;
76
77use scirs2_core::ndarray::Array1;
78
79// Re-export main types for convenience
80pub use core::{
81    format_timestamp, DataCharacteristics, EnhancedModel, EnhancedModelMetadata, PlatformInfo,
82    SerializableModel, TrainingMetrics,
83};
84
85pub use export::{
86    AdvancedExport, AlgorithmConfig, ConvergenceInfo, ExportFormat, ExportSettings, FeatureStats,
87    ModelDataCharacteristics, ModelInfo, ModelMetadata, PerformanceMetrics,
88};
89
90pub use models::{
91    // Conversion functions
92    affinity_propagation_to_model,
93    birch_to_model,
94    dbscan_to_model,
95    gmm_to_model,
96    hierarchy_to_model,
97    kmeans_to_model,
98    leader_to_model,
99    leadertree_to_model,
100    meanshift_to_model,
101    // Save functions
102    save_affinity_propagation,
103    save_birch,
104    save_dbscan,
105    save_gmm,
106    save_hierarchy,
107    save_kmeans,
108    save_leader,
109    save_leadertree,
110    save_meanshift,
111    save_spectral_clustering,
112    spectral_clustering_to_model,
113    AffinityPropagationModel,
114    BirchModel,
115    ClusteringModel,
116    DBSCANModel,
117    GMMModel,
118    HierarchicalModel,
119    KMeansModel,
120    LeaderModel,
121    LeaderTreeModel,
122    MeanShiftModel,
123    SpectralClusteringModel,
124    SpectralModel,
125};
126
127pub use workflow::{
128    AlgorithmState, AutoSaveConfig, ClusteringWorkflow, ClusteringWorkflowManager, ExecutionRecord,
129    StepResult, TrainingStep, WorkflowConfig, WorkflowState, WorkflowStep,
130};
131
132// Re-export utility modules
133pub use export::utils;
134
135// Re-export compatibility functions
136pub use compatibility::*;
137
138/// Convenience function to create a serializable K-means model
139pub fn create_kmeans_model(
140    centroids: scirs2_core::ndarray::Array2<f64>,
141    n_clusters: usize,
142    n_iter: usize,
143    inertia: f64,
144    labels: Option<scirs2_core::ndarray::Array1<usize>>,
145) -> KMeansModel {
146    KMeansModel::new(centroids, n_clusters, n_iter, inertia, labels)
147}
148
149/// Convenience function to create a serializable DBSCAN model
150pub fn create_dbscan_model(
151    core_sample_indices: Vec<usize>,
152    components: scirs2_core::ndarray::Array2<f64>,
153    labels: scirs2_core::ndarray::Array1<i32>,
154    eps: f64,
155    min_samples: usize,
156) -> DBSCANModel {
157    DBSCANModel::new(
158        Array1::from_vec(core_sample_indices),
159        labels,
160        eps,
161        min_samples,
162    )
163}
164
165/// Convenience function to create a serializable hierarchical clustering model
166pub fn create_hierarchical_model(
167    n_clusters: usize,
168    labels: scirs2_core::ndarray::Array1<usize>,
169    linkage_matrix: scirs2_core::ndarray::Array2<f64>,
170    distances: Vec<f64>,
171) -> HierarchicalModel {
172    HierarchicalModel::new(linkage_matrix, n_clusters, "ward".to_string(), None)
173}
174
175/// Convenience function to create enhanced model metadata
176pub fn create_enhanced_metadata(algorithm_name: &str) -> EnhancedModelMetadata {
177    let mut metadata = EnhancedModelMetadata::default();
178    metadata.algorithm_signature = algorithm_name.to_string();
179    metadata
180}
181
182/// Convenience function to create default export settings
183pub fn default_export_settings() -> ExportSettings {
184    ExportSettings::default()
185}
186
187/// Convenience function to export model to multiple formats
188pub fn export_model_multi_format<T: AdvancedExport>(
189    model: &T,
190    base_path: &str,
191    formats: &[ExportFormat],
192    metadata: Option<ModelMetadata>,
193) -> crate::error::Result<()> {
194    for format in formats {
195        let extension = match format {
196            ExportFormat::Json => "json",
197            ExportFormat::JsonGz => "json.gz",
198            ExportFormat::Yaml => "yaml",
199            ExportFormat::Csv => "csv",
200            ExportFormat::Xml => "xml",
201            ExportFormat::Hdf5 => "h5",
202            ExportFormat::Binary => "bin",
203            ExportFormat::Custom(ext) => ext,
204        };
205
206        let file_path = format!("{}.{}", base_path, extension);
207        let data = model.export_with_metadata(format.clone(), metadata.clone())?;
208        std::fs::write(file_path, data).map_err(|e| {
209            crate::error::ClusteringError::InvalidInput(format!("Failed to write file: {}", e))
210        })?;
211    }
212    Ok(())
213}
214
215/// Convenience function to validate model before serialization
216pub fn validate_model_for_serialization<T: AdvancedExport>(model: &T) -> crate::error::Result<()> {
217    model.validate_for_export()
218}
219
220/// Convenience function to create a workflow with default configuration
221pub fn create_default_workflow(name: String) -> ClusteringWorkflow {
222    ClusteringWorkflow::new(name, WorkflowConfig::default())
223}
224
225/// Batch export multiple models with different formats
226pub fn batch_export_models<T: AdvancedExport>(
227    models: &[(String, &T)],
228    base_directory: &str,
229    format: ExportFormat,
230    metadata_fn: Option<fn(&str) -> ModelMetadata>,
231) -> crate::error::Result<()> {
232    std::fs::create_dir_all(base_directory).map_err(|e| {
233        crate::error::ClusteringError::InvalidInput(format!("Failed to create directory: {}", e))
234    })?;
235
236    for (name, model) in models {
237        let metadata = metadata_fn.map(|f| f(name));
238        let file_path = std::path::Path::new(base_directory).join(name);
239        let data = model.export_with_metadata(format.clone(), metadata)?;
240        std::fs::write(file_path, data).map_err(|e| {
241            crate::error::ClusteringError::InvalidInput(format!(
242                "Failed to write model {}: {}",
243                name, e
244            ))
245        })?;
246    }
247
248    Ok(())
249}
250
251#[cfg(test)]
252mod tests {
253    use super::*;
254    use scirs2_core::ndarray::Array2;
255
256    #[test]
257    fn test_create_kmeans_model() {
258        let centroids =
259            Array2::from_shape_vec((2, 2), vec![0.0, 0.0, 1.0, 1.0]).expect("Operation failed");
260        let model = create_kmeans_model(centroids, 2, 10, 0.5, None);
261        assert_eq!(model.n_clusters, 2);
262        assert_eq!(model.n_iter, 10);
263    }
264
265    #[test]
266    fn test_create_dbscan_model() {
267        let core_samples = vec![0, 1, 2];
268        let components = Array2::from_shape_vec((3, 2), vec![0.0, 0.0, 1.0, 1.0, 2.0, 2.0])
269            .expect("Operation failed");
270        let labels = scirs2_core::ndarray::Array1::from_vec(vec![0, 0, 1]);
271        let model = create_dbscan_model(core_samples, components, labels, 0.5, 2);
272        assert_eq!(model.eps, 0.5);
273        assert_eq!(model.min_samples, 2);
274    }
275
276    #[test]
277    fn test_enhanced_metadata_creation() {
278        let metadata = create_enhanced_metadata("test_algorithm");
279        assert_eq!(metadata.algorithm_signature, "test_algorithm");
280        assert!(!metadata.platform_info.os.is_empty());
281    }
282
283    #[test]
284    fn test_default_export_settings() {
285        let settings = default_export_settings();
286        assert!(settings.include_raw_data);
287        assert!(!settings.include_training_data);
288        assert_eq!(settings.float_precision, Some(6));
289    }
290
291    #[test]
292    fn test_create_default_workflow() {
293        let workflow = create_default_workflow("test_workflow".to_string());
294        assert_eq!(workflow.workflow_id, "test_workflow");
295        assert!(matches!(workflow.current_state, AlgorithmState::NotStarted));
296    }
297}