scirs2_cluster/serialization/
core.rs

1//! Core serialization traits and metadata structures
2//!
3//! This module provides the fundamental traits and metadata structures
4//! for model serialization and deserialization.
5
6use crate::error::{ClusteringError, Result};
7use flate2::read::GzDecoder;
8use flate2::write::GzEncoder;
9use flate2::Compression;
10use serde::{Deserialize, Serialize};
11use std::fs::File;
12use std::io::{Read, Write};
13use std::path::Path;
14use std::time::{SystemTime, UNIX_EPOCH};
15
16/// Trait for clustering models that can be serialized
17pub trait SerializableModel: Serialize + for<'de> Deserialize<'de> {
18    /// Save the model to a file
19    fn save_to_file<P: AsRef<Path>>(&self, path: P) -> Result<()> {
20        let file = File::create(path)
21            .map_err(|e| ClusteringError::InvalidInput(format!("Failed to create file: {}", e)))?;
22        self.save_to_writer(file)
23    }
24
25    /// Save the model to a writer
26    fn save_to_writer<W: Write>(&self, writer: W) -> Result<()> {
27        serde_json::to_writer_pretty(writer, self)
28            .map_err(|e| ClusteringError::InvalidInput(format!("Failed to serialize model: {}", e)))
29    }
30
31    /// Save the model to a file with compression
32    fn save_to_file_compressed<P: AsRef<Path>>(&self, path: P) -> Result<()> {
33        let file = File::create(path)
34            .map_err(|e| ClusteringError::InvalidInput(format!("Failed to create file: {}", e)))?;
35        let encoder = GzEncoder::new(file, Compression::default());
36        self.save_to_writer(encoder)
37    }
38
39    /// Load the model from a compressed file
40    fn load_from_file_compressed<P: AsRef<Path>>(path: P) -> Result<Self> {
41        let file = File::open(path)
42            .map_err(|e| ClusteringError::InvalidInput(format!("Failed to open file: {}", e)))?;
43        let decoder = GzDecoder::new(file);
44        Self::load_from_reader(decoder)
45    }
46
47    /// Load the model from a file
48    fn load_from_file<P: AsRef<Path>>(path: P) -> Result<Self> {
49        let mut file = File::open(path)
50            .map_err(|e| ClusteringError::InvalidInput(format!("Failed to open file: {}", e)))?;
51        Self::load_from_reader(&mut file)
52    }
53
54    /// Load the model from a reader
55    fn load_from_reader<R: Read>(reader: R) -> Result<Self> {
56        serde_json::from_reader(reader).map_err(|e| {
57            ClusteringError::InvalidInput(format!("Failed to deserialize model: {}", e))
58        })
59    }
60}
61
62/// Enhanced model metadata with versioning and performance metrics
63#[derive(Serialize, Deserialize, Debug, Clone)]
64pub struct EnhancedModelMetadata {
65    /// Model format version for backward compatibility
66    pub format_version: String,
67    /// scirs2-cluster library version
68    pub library_version: String,
69    /// Timestamp when model was created (Unix epoch)
70    pub created_timestamp: u64,
71    /// Algorithm name and configuration hash
72    pub algorithm_signature: String,
73    /// Performance metrics during training
74    pub training_metrics: TrainingMetrics,
75    /// Data characteristics
76    pub data_characteristics: DataCharacteristics,
77    /// Model integrity hash
78    pub integrity_hash: String,
79    /// Platform information
80    pub platform_info: PlatformInfo,
81}
82
83/// Training performance metrics
84#[derive(Serialize, Deserialize, Debug, Clone)]
85pub struct TrainingMetrics {
86    /// Total training time in milliseconds
87    pub training_time_ms: u64,
88    /// Number of iterations/epochs
89    pub iterations: usize,
90    /// Final convergence metric (e.g., inertia, log-likelihood)
91    pub final_convergence_metric: f64,
92    /// Peak memory usage in bytes
93    pub peak_memory_bytes: usize,
94    /// CPU utilization during training (0.0 to 100.0)
95    pub avg_cpu_utilization: f64,
96}
97
98/// Data characteristics for validation
99#[derive(Serialize, Deserialize, Debug, Clone)]
100pub struct DataCharacteristics {
101    /// Number of samples in training data
102    pub n_samples: usize,
103    /// Number of features
104    pub n_features: usize,
105    /// Data type fingerprint
106    pub data_type_fingerprint: String,
107    /// Feature range summaries (min, max for each feature)
108    pub feature_ranges: Option<Vec<(f64, f64)>>,
109    /// Whether data was normalized/standardized
110    pub preprocessing_applied: Vec<String>,
111}
112
113/// Platform information for cross-platform compatibility
114#[derive(Serialize, Deserialize, Debug, Clone)]
115pub struct PlatformInfo {
116    /// Operating system
117    pub os: String,
118    /// Architecture (x86_64, aarch64, etc.)
119    pub arch: String,
120    /// Rust compiler version
121    pub rust_version: String,
122    /// CPU features used (SIMD, etc.)
123    pub cpu_features: Vec<String>,
124}
125
126impl Default for EnhancedModelMetadata {
127    fn default() -> Self {
128        Self {
129            format_version: "1.0.0".to_string(),
130            library_version: env!("CARGO_PKG_VERSION").to_string(),
131            created_timestamp: SystemTime::now()
132                .duration_since(UNIX_EPOCH)
133                .unwrap_or_default()
134                .as_secs(),
135            algorithm_signature: "unknown".to_string(),
136            training_metrics: TrainingMetrics::default(),
137            data_characteristics: DataCharacteristics::default(),
138            integrity_hash: String::new(),
139            platform_info: PlatformInfo::detect(),
140        }
141    }
142}
143
144impl Default for TrainingMetrics {
145    fn default() -> Self {
146        Self {
147            training_time_ms: 0,
148            iterations: 0,
149            final_convergence_metric: 0.0,
150            peak_memory_bytes: 0,
151            avg_cpu_utilization: 0.0,
152        }
153    }
154}
155
156impl Default for DataCharacteristics {
157    fn default() -> Self {
158        Self {
159            n_samples: 0,
160            n_features: 0,
161            data_type_fingerprint: "unknown".to_string(),
162            feature_ranges: None,
163            preprocessing_applied: Vec::new(),
164        }
165    }
166}
167
168impl PlatformInfo {
169    /// Detect current platform information
170    pub fn detect() -> Self {
171        Self {
172            os: std::env::consts::OS.to_string(),
173            arch: std::env::consts::ARCH.to_string(),
174            rust_version: option_env!("CARGO_PKG_RUST_VERSION")
175                .filter(|s| !s.is_empty())
176                .unwrap_or("unknown")
177                .to_string(),
178            cpu_features: Self::detect_cpu_features(),
179        }
180    }
181
182    /// Detect available CPU features
183    fn detect_cpu_features() -> Vec<String> {
184        let mut features = Vec::new();
185
186        #[cfg(target_arch = "x86_64")]
187        {
188            if std::arch::is_x86_feature_detected!("avx2") {
189                features.push("avx2".to_string());
190            }
191            if std::arch::is_x86_feature_detected!("sse4.1") {
192                features.push("sse4.1".to_string());
193            }
194            if std::arch::is_x86_feature_detected!("fma") {
195                features.push("fma".to_string());
196            }
197        }
198
199        #[cfg(target_arch = "aarch64")]
200        {
201            if std::arch::is_aarch64_feature_detected!("neon") {
202                features.push("neon".to_string());
203            }
204        }
205
206        features
207    }
208}
209
210/// Enhanced model wrapper with metadata
211#[derive(Serialize, Debug, Clone)]
212pub struct EnhancedModel<T: SerializableModel> {
213    /// The actual model data
214    pub model: T,
215    /// Enhanced metadata
216    pub metadata: EnhancedModelMetadata,
217}
218
219impl<'de, T: SerializableModel> Deserialize<'de> for EnhancedModel<T> {
220    fn deserialize<D>(deserializer: D) -> std::result::Result<Self, D::Error>
221    where
222        D: serde::Deserializer<'de>,
223    {
224        #[derive(Deserialize)]
225        struct EnhancedModelHelper<U> {
226            model: U,
227            metadata: EnhancedModelMetadata,
228        }
229
230        let helper = EnhancedModelHelper::deserialize(deserializer)?;
231        Ok(EnhancedModel {
232            model: helper.model,
233            metadata: helper.metadata,
234        })
235    }
236}
237
238impl<T: SerializableModel> EnhancedModel<T> {
239    /// Create a new enhanced model with metadata
240    pub fn new(model: T, metadata: EnhancedModelMetadata) -> Self {
241        Self { model, metadata }
242    }
243
244    /// Create an enhanced model with automatic metadata generation
245    pub fn with_auto_metadata(model: T, algorithm_name: &str) -> Self {
246        let mut metadata = EnhancedModelMetadata::default();
247        metadata.algorithm_signature = algorithm_name.to_string();
248        Self { model, metadata }
249    }
250
251    /// Validate model integrity
252    pub fn validate_integrity(&self) -> Result<bool> {
253        // Simple validation - in practice this would check the hash
254        Ok(!self.metadata.integrity_hash.is_empty())
255    }
256
257    /// Get model format version
258    pub fn format_version(&self) -> &str {
259        &self.metadata.format_version
260    }
261
262    /// Check if model is compatible with current library version
263    pub fn is_compatible(&self) -> bool {
264        // Simple compatibility check based on major version
265        let model_version = &self.metadata.library_version;
266        let current_version = env!("CARGO_PKG_VERSION");
267
268        let model_major = model_version.split('.').next().unwrap_or("0");
269        let current_major = current_version.split('.').next().unwrap_or("0");
270
271        model_major == current_major
272    }
273
274    /// Get training duration in seconds
275    pub fn training_duration_seconds(&self) -> f64 {
276        self.metadata.training_metrics.training_time_ms as f64 / 1000.0
277    }
278
279    /// Get memory usage in MB
280    pub fn peak_memory_mb(&self) -> f64 {
281        self.metadata.training_metrics.peak_memory_bytes as f64 / (1024.0 * 1024.0)
282    }
283}
284
285impl<T: SerializableModel> SerializableModel for EnhancedModel<T> {}
286
287/// Format a timestamp for display
288pub fn format_timestamp(timestamp: u64) -> String {
289    match SystemTime::UNIX_EPOCH.checked_add(std::time::Duration::from_secs(timestamp)) {
290        Some(_datetime) => {
291            // Simple conversion: Unix timestamp to year
292            // 1640995200 is 2022-01-01 00:00:00 UTC
293            let years_since_1970 = timestamp / (365 * 24 * 3600); // Approximate
294            let year = 1970 + years_since_1970;
295            format!("Timestamp: {} (approx year {})", timestamp, year)
296        }
297        None => "Invalid timestamp".to_string(),
298    }
299}
300
301#[cfg(test)]
302mod tests {
303    use super::*;
304
305    #[derive(Serialize, Deserialize, Debug, Clone)]
306    struct TestModel {
307        value: i32,
308    }
309
310    impl SerializableModel for TestModel {}
311
312    #[test]
313    fn test_enhanced_model_creation() {
314        let model = TestModel { value: 42 };
315        let enhanced = EnhancedModel::with_auto_metadata(model, "test_algorithm");
316
317        assert_eq!(enhanced.metadata.algorithm_signature, "test_algorithm");
318        assert_eq!(enhanced.model.value, 42);
319    }
320
321    #[test]
322    fn test_platform_info_detection() {
323        let platform = PlatformInfo::detect();
324        assert!(!platform.os.is_empty());
325        assert!(!platform.arch.is_empty());
326    }
327
328    #[test]
329    fn test_format_timestamp() {
330        let timestamp = 1640995200; // 2022-01-01 00:00:00 UTC
331        let formatted = format_timestamp(timestamp);
332        assert!(formatted.contains("2022"));
333    }
334}