scirs2_cluster/serialization/
core.rs1use crate::error::{ClusteringError, Result};
7use flate2::read::GzDecoder;
8use flate2::write::GzEncoder;
9use flate2::Compression;
10use serde::{Deserialize, Serialize};
11use std::fs::File;
12use std::io::{Read, Write};
13use std::path::Path;
14use std::time::{SystemTime, UNIX_EPOCH};
15
16pub trait SerializableModel: Serialize + for<'de> Deserialize<'de> {
18 fn save_to_file<P: AsRef<Path>>(&self, path: P) -> Result<()> {
20 let file = File::create(path)
21 .map_err(|e| ClusteringError::InvalidInput(format!("Failed to create file: {}", e)))?;
22 self.save_to_writer(file)
23 }
24
25 fn save_to_writer<W: Write>(&self, writer: W) -> Result<()> {
27 serde_json::to_writer_pretty(writer, self)
28 .map_err(|e| ClusteringError::InvalidInput(format!("Failed to serialize model: {}", e)))
29 }
30
31 fn save_to_file_compressed<P: AsRef<Path>>(&self, path: P) -> Result<()> {
33 let file = File::create(path)
34 .map_err(|e| ClusteringError::InvalidInput(format!("Failed to create file: {}", e)))?;
35 let encoder = GzEncoder::new(file, Compression::default());
36 self.save_to_writer(encoder)
37 }
38
39 fn load_from_file_compressed<P: AsRef<Path>>(path: P) -> Result<Self> {
41 let file = File::open(path)
42 .map_err(|e| ClusteringError::InvalidInput(format!("Failed to open file: {}", e)))?;
43 let decoder = GzDecoder::new(file);
44 Self::load_from_reader(decoder)
45 }
46
47 fn load_from_file<P: AsRef<Path>>(path: P) -> Result<Self> {
49 let mut file = File::open(path)
50 .map_err(|e| ClusteringError::InvalidInput(format!("Failed to open file: {}", e)))?;
51 Self::load_from_reader(&mut file)
52 }
53
54 fn load_from_reader<R: Read>(reader: R) -> Result<Self> {
56 serde_json::from_reader(reader).map_err(|e| {
57 ClusteringError::InvalidInput(format!("Failed to deserialize model: {}", e))
58 })
59 }
60}
61
62#[derive(Serialize, Deserialize, Debug, Clone)]
64pub struct EnhancedModelMetadata {
65 pub format_version: String,
67 pub library_version: String,
69 pub created_timestamp: u64,
71 pub algorithm_signature: String,
73 pub training_metrics: TrainingMetrics,
75 pub data_characteristics: DataCharacteristics,
77 pub integrity_hash: String,
79 pub platform_info: PlatformInfo,
81}
82
83#[derive(Serialize, Deserialize, Debug, Clone)]
85pub struct TrainingMetrics {
86 pub training_time_ms: u64,
88 pub iterations: usize,
90 pub final_convergence_metric: f64,
92 pub peak_memory_bytes: usize,
94 pub avg_cpu_utilization: f64,
96}
97
98#[derive(Serialize, Deserialize, Debug, Clone)]
100pub struct DataCharacteristics {
101 pub n_samples: usize,
103 pub n_features: usize,
105 pub data_type_fingerprint: String,
107 pub feature_ranges: Option<Vec<(f64, f64)>>,
109 pub preprocessing_applied: Vec<String>,
111}
112
113#[derive(Serialize, Deserialize, Debug, Clone)]
115pub struct PlatformInfo {
116 pub os: String,
118 pub arch: String,
120 pub rust_version: String,
122 pub cpu_features: Vec<String>,
124}
125
126impl Default for EnhancedModelMetadata {
127 fn default() -> Self {
128 Self {
129 format_version: "1.0.0".to_string(),
130 library_version: env!("CARGO_PKG_VERSION").to_string(),
131 created_timestamp: SystemTime::now()
132 .duration_since(UNIX_EPOCH)
133 .unwrap_or_default()
134 .as_secs(),
135 algorithm_signature: "unknown".to_string(),
136 training_metrics: TrainingMetrics::default(),
137 data_characteristics: DataCharacteristics::default(),
138 integrity_hash: String::new(),
139 platform_info: PlatformInfo::detect(),
140 }
141 }
142}
143
144impl Default for TrainingMetrics {
145 fn default() -> Self {
146 Self {
147 training_time_ms: 0,
148 iterations: 0,
149 final_convergence_metric: 0.0,
150 peak_memory_bytes: 0,
151 avg_cpu_utilization: 0.0,
152 }
153 }
154}
155
156impl Default for DataCharacteristics {
157 fn default() -> Self {
158 Self {
159 n_samples: 0,
160 n_features: 0,
161 data_type_fingerprint: "unknown".to_string(),
162 feature_ranges: None,
163 preprocessing_applied: Vec::new(),
164 }
165 }
166}
167
168impl PlatformInfo {
169 pub fn detect() -> Self {
171 Self {
172 os: std::env::consts::OS.to_string(),
173 arch: std::env::consts::ARCH.to_string(),
174 rust_version: option_env!("CARGO_PKG_RUST_VERSION")
175 .filter(|s| !s.is_empty())
176 .unwrap_or("unknown")
177 .to_string(),
178 cpu_features: Self::detect_cpu_features(),
179 }
180 }
181
182 fn detect_cpu_features() -> Vec<String> {
184 let mut features = Vec::new();
185
186 #[cfg(target_arch = "x86_64")]
187 {
188 if std::arch::is_x86_feature_detected!("avx2") {
189 features.push("avx2".to_string());
190 }
191 if std::arch::is_x86_feature_detected!("sse4.1") {
192 features.push("sse4.1".to_string());
193 }
194 if std::arch::is_x86_feature_detected!("fma") {
195 features.push("fma".to_string());
196 }
197 }
198
199 #[cfg(target_arch = "aarch64")]
200 {
201 if std::arch::is_aarch64_feature_detected!("neon") {
202 features.push("neon".to_string());
203 }
204 }
205
206 features
207 }
208}
209
210#[derive(Serialize, Debug, Clone)]
212pub struct EnhancedModel<T: SerializableModel> {
213 pub model: T,
215 pub metadata: EnhancedModelMetadata,
217}
218
219impl<'de, T: SerializableModel> Deserialize<'de> for EnhancedModel<T> {
220 fn deserialize<D>(deserializer: D) -> std::result::Result<Self, D::Error>
221 where
222 D: serde::Deserializer<'de>,
223 {
224 #[derive(Deserialize)]
225 struct EnhancedModelHelper<U> {
226 model: U,
227 metadata: EnhancedModelMetadata,
228 }
229
230 let helper = EnhancedModelHelper::deserialize(deserializer)?;
231 Ok(EnhancedModel {
232 model: helper.model,
233 metadata: helper.metadata,
234 })
235 }
236}
237
238impl<T: SerializableModel> EnhancedModel<T> {
239 pub fn new(model: T, metadata: EnhancedModelMetadata) -> Self {
241 Self { model, metadata }
242 }
243
244 pub fn with_auto_metadata(model: T, algorithm_name: &str) -> Self {
246 let mut metadata = EnhancedModelMetadata::default();
247 metadata.algorithm_signature = algorithm_name.to_string();
248 Self { model, metadata }
249 }
250
251 pub fn validate_integrity(&self) -> Result<bool> {
253 Ok(!self.metadata.integrity_hash.is_empty())
255 }
256
257 pub fn format_version(&self) -> &str {
259 &self.metadata.format_version
260 }
261
262 pub fn is_compatible(&self) -> bool {
264 let model_version = &self.metadata.library_version;
266 let current_version = env!("CARGO_PKG_VERSION");
267
268 let model_major = model_version.split('.').next().unwrap_or("0");
269 let current_major = current_version.split('.').next().unwrap_or("0");
270
271 model_major == current_major
272 }
273
274 pub fn training_duration_seconds(&self) -> f64 {
276 self.metadata.training_metrics.training_time_ms as f64 / 1000.0
277 }
278
279 pub fn peak_memory_mb(&self) -> f64 {
281 self.metadata.training_metrics.peak_memory_bytes as f64 / (1024.0 * 1024.0)
282 }
283}
284
285impl<T: SerializableModel> SerializableModel for EnhancedModel<T> {}
286
287pub fn format_timestamp(timestamp: u64) -> String {
289 match SystemTime::UNIX_EPOCH.checked_add(std::time::Duration::from_secs(timestamp)) {
290 Some(_datetime) => {
291 let years_since_1970 = timestamp / (365 * 24 * 3600); let year = 1970 + years_since_1970;
295 format!("Timestamp: {} (approx year {})", timestamp, year)
296 }
297 None => "Invalid timestamp".to_string(),
298 }
299}
300
301#[cfg(test)]
302mod tests {
303 use super::*;
304
305 #[derive(Serialize, Deserialize, Debug, Clone)]
306 struct TestModel {
307 value: i32,
308 }
309
310 impl SerializableModel for TestModel {}
311
312 #[test]
313 fn test_enhanced_model_creation() {
314 let model = TestModel { value: 42 };
315 let enhanced = EnhancedModel::with_auto_metadata(model, "test_algorithm");
316
317 assert_eq!(enhanced.metadata.algorithm_signature, "test_algorithm");
318 assert_eq!(enhanced.model.value, 42);
319 }
320
321 #[test]
322 fn test_platform_info_detection() {
323 let platform = PlatformInfo::detect();
324 assert!(!platform.os.is_empty());
325 assert!(!platform.arch.is_empty());
326 }
327
328 #[test]
329 fn test_format_timestamp() {
330 let timestamp = 1640995200; let formatted = format_timestamp(timestamp);
332 assert!(formatted.contains("2022"));
333 }
334}