helix/dna/mds/
serializer.rs

1use super::codegen::HelixIR;
2use crate::dna::hel::binary::{
3    HelixBinary, BinaryFlags, BinaryMetadata, DataSection, SectionType, SymbolTable,
4    Instruction, Value, CompressionMethod,
5};
6use std::path::Path;
7use std::fs::File;
8use std::io::{Write, Read};
9use bincode;
10pub use crate::dna::atp::types::*;
11
12
13pub struct BinarySerializer {
14    enable_compression: bool,
15    compression_method: CompressionMethod,
16}
17impl BinarySerializer {
18    pub fn new(enable_compression: bool) -> Self {
19        Self {
20            enable_compression,
21            compression_method: CompressionMethod::Lz4,
22        }
23    }
24    pub fn with_compression_method(mut self, method: CompressionMethod) -> Self {
25        self.compression_method = method;
26        self
27    }
28    pub fn serialize(
29        &self,
30        ir: HelixIR,
31        source_path: Option<&Path>,
32    ) -> Result<HelixBinary, SerializationError> {
33        let mut binary = HelixBinary::new();
34        binary.metadata = BinaryMetadata {
35            created_at: std::time::SystemTime::now()
36                .duration_since(std::time::UNIX_EPOCH)
37                .unwrap()
38                .as_secs(),
39            compiler_version: env!("CARGO_PKG_VERSION").to_string(),
40            source_hash: self.calculate_source_hash(&ir),
41            optimization_level: 2,
42            platform: format!("{}-{}", std::env::consts::OS, std::env::consts::ARCH),
43            source_path: source_path.map(|p| p.display().to_string()),
44            extra: Default::default(),
45        };
46        binary.flags = BinaryFlags {
47            compressed: self.enable_compression,
48            optimized: true,
49            encrypted: false,
50            signed: false,
51            custom: 0,
52        };
53        binary.symbol_table = self.convert_symbol_table(&ir);
54        binary.data_sections = self.create_data_sections(&ir)?;
55        if self.enable_compression {
56            for section in &mut binary.data_sections {
57                section.compress(self.compression_method.clone())?;
58            }
59        }
60        binary.checksum = binary.calculate_checksum();
61        Ok(binary)
62    }
63    pub fn write_to_file(
64        &self,
65        binary: &HelixBinary,
66        path: &Path,
67    ) -> Result<(), SerializationError> {
68        let data = bincode::serialize(binary)
69            .map_err(|e| SerializationError::BincodeError(e.to_string()))?;
70        let mut file = File::create(path)
71            .map_err(|e| SerializationError::IoError(e.to_string()))?;
72        file.write_all(&data).map_err(|e| SerializationError::IoError(e.to_string()))?;
73        Ok(())
74    }
75    pub fn read_from_file(
76        &self,
77        path: &Path,
78    ) -> Result<HelixBinary, SerializationError> {
79        let mut file = File::open(path)
80            .map_err(|e| SerializationError::IoError(e.to_string()))?;
81        let mut data = Vec::new();
82        file.read_to_end(&mut data)
83            .map_err(|e| SerializationError::IoError(e.to_string()))?;
84        let binary: HelixBinary = bincode::deserialize(&data)
85            .map_err(|e| SerializationError::BincodeError(e.to_string()))?;
86        binary.validate().map_err(|e| SerializationError::ValidationError(e))?;
87        Ok(binary)
88    }
89    pub fn deserialize_to_ir(
90        &self,
91        binary: &HelixBinary,
92    ) -> Result<HelixIR, SerializationError> {
93        let mut ir = HelixIR {
94            version: binary.version,
95            metadata: self.convert_metadata(&binary.metadata),
96            symbol_table: self.convert_symbol_table_to_ir(&binary.symbol_table),
97            instructions: Vec::new(),
98            string_pool: super::codegen::StringPool {
99                strings: binary.symbol_table.strings.clone(),
100                index: binary.symbol_table.string_map.clone(),
101            },
102            constants: super::codegen::ConstantPool::new(),
103        };
104        for section in &binary.data_sections {
105            let mut section_clone = section.clone();
106            if section.compression.is_some() {
107                section_clone
108                    .decompress()
109                    .map_err(|e| SerializationError::DecompressionError(e))?;
110            }
111            match section.section_type {
112                SectionType::Instructions => {
113                    ir.instructions = self
114                        .deserialize_instructions(&section_clone.data)?;
115                }
116                _ => {}
117            }
118        }
119        Ok(ir)
120    }
121    fn convert_symbol_table(&self, ir: &HelixIR) -> SymbolTable {
122        let mut table = SymbolTable::default();
123        table.strings = ir.string_pool.strings.clone();
124        for (i, s) in table.strings.iter().enumerate() {
125            table.string_map.insert(s.clone(), i as u32);
126        }
127        for (id, agent) in &ir.symbol_table.agents {
128            if let Some(name) = ir.string_pool.get(agent.name_idx) {
129                table.agents.insert(name.clone(), *id);
130            }
131        }
132        for (id, workflow) in &ir.symbol_table.workflows {
133            if let Some(name) = ir.string_pool.get(workflow.name_idx) {
134                table.workflows.insert(name.clone(), *id);
135            }
136        }
137        for (id, context) in &ir.symbol_table.contexts {
138            if let Some(name) = ir.string_pool.get(context.name_idx) {
139                table.contexts.insert(name.clone(), *id);
140            }
141        }
142        for (id, crew) in &ir.symbol_table.crews {
143            if let Some(name) = ir.string_pool.get(crew.name_idx) {
144                table.crews.insert(name.clone(), *id);
145            }
146        }
147        table
148    }
149    fn convert_symbol_table_to_ir(
150        &self,
151        table: &SymbolTable,
152    ) -> super::codegen::SymbolTable {
153        use crate::dna::mds::codegen::{AgentSymbol, WorkflowSymbol, ContextSymbol, CrewSymbol};
154        use std::collections::HashMap;
155        let mut symbol_table = crate::dna::mds::codegen::SymbolTable::default();
156        for (name, id) in &table.agents {
157            let name_idx = table.string_map.get(name).copied().unwrap_or(0);
158            symbol_table
159                .agents
160                .insert(
161                    *id,
162                    AgentSymbol {
163                        id: *id,
164                        name_idx,
165                        model_idx: 0,
166                        role_idx: 0,
167                        temperature: None,
168                        max_tokens: None,
169                        capabilities: Vec::new(),
170                        backstory_idx: None,
171                    },
172                );
173        }
174        for (name, id) in &table.workflows {
175            let name_idx = table.string_map.get(name).copied().unwrap_or(0);
176            symbol_table
177                .workflows
178                .insert(
179                    *id,
180                    WorkflowSymbol {
181                        id: *id,
182                        name_idx,
183                        trigger_type: super::codegen::TriggerType::Manual,
184                        steps: Vec::new(),
185                        pipeline: None,
186                    },
187                );
188        }
189        for (name, id) in &table.contexts {
190            let name_idx = table.string_map.get(name).copied().unwrap_or(0);
191            symbol_table
192                .contexts
193                .insert(
194                    *id,
195                    ContextSymbol {
196                        id: *id,
197                        name_idx,
198                        environment_idx: 0,
199                        debug: false,
200                        max_tokens: None,
201                        secrets: HashMap::new(),
202                    },
203                );
204        }
205        for (name, id) in &table.crews {
206            let name_idx = table.string_map.get(name).copied().unwrap_or(0);
207            symbol_table
208                .crews
209                .insert(
210                    *id,
211                    CrewSymbol {
212                        id: *id,
213                        name_idx,
214                        agent_ids: Vec::new(),
215                        process_type: super::codegen::ProcessTypeIR::Sequential,
216                        manager_id: None,
217                    },
218                );
219        }
220        symbol_table
221    }
222    fn convert_metadata(&self, metadata: &BinaryMetadata) -> super::codegen::Metadata {
223        super::codegen::Metadata {
224            source_file: metadata.source_path.clone(),
225            compile_time: metadata.created_at,
226            compiler_version: metadata.compiler_version.clone(),
227            checksum: None,
228        }
229    }
230    fn create_data_sections(
231        &self,
232        ir: &HelixIR,
233    ) -> Result<Vec<DataSection>, SerializationError> {
234        let mut sections = Vec::new();
235        if !ir.instructions.is_empty() {
236            let instruction_data = self.serialize_instructions(&ir.instructions)?;
237            sections.push(DataSection::new(SectionType::Instructions, instruction_data));
238        }
239        if !ir.symbol_table.agents.is_empty() {
240            let agent_data = bincode::serialize(&ir.symbol_table.agents)
241                .map_err(|e| SerializationError::BincodeError(e.to_string()))?;
242            sections.push(DataSection::new(SectionType::Agents, agent_data));
243        }
244        if !ir.symbol_table.workflows.is_empty() {
245            let workflow_data = bincode::serialize(&ir.symbol_table.workflows)
246                .map_err(|e| SerializationError::BincodeError(e.to_string()))?;
247            sections.push(DataSection::new(SectionType::Workflows, workflow_data));
248        }
249        Ok(sections)
250    }
251    fn serialize_instructions(
252        &self,
253        instructions: &[super::codegen::Instruction],
254    ) -> Result<Vec<u8>, SerializationError> {
255        let binary_instructions: Vec<Instruction> = instructions
256            .iter()
257            .map(|inst| self.convert_instruction(inst))
258            .collect();
259        bincode::serialize(&binary_instructions)
260            .map_err(|e| SerializationError::BincodeError(e.to_string()))
261    }
262    fn deserialize_instructions(
263        &self,
264        data: &[u8],
265    ) -> Result<Vec<super::codegen::Instruction>, SerializationError> {
266        let binary_instructions: Vec<Instruction> = bincode::deserialize(data)
267            .map_err(|e| SerializationError::BincodeError(e.to_string()))?;
268        Ok(
269            binary_instructions
270                .iter()
271                .map(|inst| self.convert_instruction_to_ir(inst))
272                .collect(),
273        )
274    }
275    fn convert_instruction(&self, inst: &super::codegen::Instruction) -> Instruction {
276        match inst {
277            crate::dna::mds::codegen::Instruction::DeclareAgent(id) => {
278                Instruction::InvokeAgent(*id)
279            }
280            crate::dna::mds::codegen::Instruction::DeclareWorkflow(_id) => Instruction::Nop,
281            crate::dna::mds::codegen::Instruction::DeclareContext(_id) => Instruction::Nop,
282            crate::dna::mds::codegen::Instruction::DeclareCrew(id) => Instruction::InvokeCrew(*id),
283            crate::dna::mds::codegen::Instruction::SetProperty { .. } => Instruction::Nop,
284            crate::dna::mds::codegen::Instruction::SetCapability { .. } => Instruction::Nop,
285            crate::dna::mds::codegen::Instruction::SetSecret { .. } => Instruction::Nop,
286            crate::dna::mds::codegen::Instruction::DefineStep { .. } => Instruction::Nop,
287            crate::dna::mds::codegen::Instruction::DefinePipeline { workflow, .. } => {
288                Instruction::Pipeline(*workflow)
289            }
290            crate::dna::mds::codegen::Instruction::ResolveReference { .. } => Instruction::Nop,
291            crate::dna::mds::codegen::Instruction::SetMetadata { .. } => Instruction::Nop,
292        }
293    }
294    fn convert_instruction_to_ir(
295        &self,
296        inst: &Instruction,
297    ) -> crate::dna::mds::codegen::Instruction {
298        match inst {
299            Instruction::InvokeAgent(id) => {
300                crate::dna::mds::codegen::Instruction::DeclareAgent(*id)
301            }
302            Instruction::InvokeCrew(id) => crate::dna::mds::codegen::Instruction::DeclareCrew(*id),
303            Instruction::Pipeline(id) => {
304                crate::dna::mds::codegen::Instruction::DeclareWorkflow(*id)
305            }
306            _ => crate::dna::mds::codegen::Instruction::DeclareAgent(0),
307        }
308    }
309    #[allow(dead_code)]
310    fn convert_value(&self, val: &Value) -> Value {
311        match val {
312            Value::Bool(b) => Value::Bool(*b),
313            Value::Int(i) => Value::Int(*i),
314            Value::Float(n) => Value::Float(*n),
315            Value::String(_s) => {
316                let id = 0;
317                Value::String(id)
318            }
319            Value::Duration(secs) => {
320                Value::Duration(*secs)
321            }
322            Value::Array(_) => Value::Null,
323            Value::Object(_) => Value::Null,
324            Value::Reference(_) => Value::Null,
325            Value::Null => Value::Null,
326        }
327    }
328    #[allow(dead_code)]
329    fn convert_value_to_ir(&self, val: &Value) -> crate::dna::atp::types::Value {
330        match val {
331            Value::Null => crate::dna::atp::types::Value::String(String::new()),
332            Value::Bool(b) => crate::dna::atp::types::Value::Bool(*b),
333            Value::Int(i) => crate::dna::atp::types::Value::Number(*i as f64),
334            Value::Float(f) => crate::dna::atp::types::Value::Number(*f),
335            Value::String(_id) => crate::dna::atp::types::Value::String(String::new()),
336            Value::Duration(secs) => {
337                crate::dna::atp::types::Value::Duration(crate::dna::atp::types::Duration {
338                    value: (*secs / 60) as u64,
339                    unit: crate::dna::atp::types::TimeUnit::Minutes,
340                })
341            }
342            Value::Reference(_id) => crate::dna::atp::types::Value::Reference(String::new()),
343            Value::Array(arr) => {
344                crate::dna::atp::types::Value::Array(
345                    arr.iter().map(|v| self.convert_value_to_ir(v)).collect(),
346                )
347            }
348            Value::Object(obj) => {
349                let mut map = std::collections::HashMap::new();
350                for (key_idx, value) in obj {
351                    let key = format!("key_{}", key_idx);
352                    map.insert(key, self.convert_value_to_ir(value));
353                }
354                crate::dna::atp::types::Value::Object(map)
355            }
356        }
357    }
358    fn calculate_source_hash(&self, ir: &HelixIR) -> String {
359        use std::collections::hash_map::DefaultHasher;
360        use std::hash::{Hash, Hasher};
361        let mut hasher = DefaultHasher::new();
362        ir.version.hash(&mut hasher);
363        ir.string_pool.strings.len().hash(&mut hasher);
364        ir.instructions.len().hash(&mut hasher);
365        format!("{:x}", hasher.finish())
366    }
367}
368#[derive(Debug)]
369pub enum SerializationError {
370    IoError(String),
371    BincodeError(String),
372    CompressionError(String),
373    DecompressionError(String),
374    ValidationError(String),
375}
376impl std::fmt::Display for SerializationError {
377    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
378        match self {
379            Self::IoError(e) => write!(f, "I/O error: {}", e),
380            Self::BincodeError(e) => write!(f, "Bincode error: {}", e),
381            Self::CompressionError(e) => write!(f, "Compression error: {}", e),
382            Self::DecompressionError(e) => write!(f, "Decompression error: {}", e),
383            Self::ValidationError(e) => write!(f, "Validation error: {}", e),
384        }
385    }
386}
387impl std::error::Error for SerializationError {}
388impl From<String> for SerializationError {
389    fn from(s: String) -> Self {
390        Self::CompressionError(s)
391    }
392}
393#[cfg(test)]
394mod tests {
395    use super::*;
396    use crate::dna::mds::codegen::{StringPool, Metadata, ConstantPool};
397    #[test]
398    fn test_serialization_roundtrip() {
399        let mut string_pool = StringPool::new();
400        string_pool.intern("test");
401        let ir = HelixIR {
402            version: 1,
403            metadata: Metadata::default(),
404            symbol_table: crate::dna::mds::codegen::SymbolTable::default(),
405            instructions: vec![
406                crate ::codegen::Instruction::DeclareAgent(1), crate
407                ::codegen::Instruction::DeclareWorkflow(2),
408            ],
409            string_pool,
410            constants: ConstantPool::default(),
411        };
412        let serializer = BinarySerializer::new(false);
413        let binary = serializer.serialize(ir.clone(), None).unwrap();
414        let deserialized = serializer.deserialize_to_ir(&binary).unwrap();
415        assert_eq!(ir.version, deserialized.version);
416        assert_eq!(ir.instructions.len(), deserialized.instructions.len());
417    }
418}