struct_compression_analyzer/comparison/compare_groups/generate_bytes/
mod.rs

1//! Generates byte streams from schema-defined field groups for compression analysis.
2//!
3//! This module handles the core functionality of converting schema field group definitions
4//! into analyzable byte streams. It provides specialized handling for different group
5//! component types and manages bit-level operations for field transformations.
6//!
7//! # Core Types
8//!
9//! - [`GenerateBytesError`]: Comprehensive error handling for byte generation
10//! - [`GenerateBytesResult`]: Type alias for Result with GenerateBytesError
11//!
12//! # Internal Functions
13//!
14//! Two primary internal functions handle byte generation:
15//!
16//! - [`generate_group_bytes`]: Creates a [`Vec<u8>`] from group components
17//! - [`generate_output_for_compare_groups_entry`]: Writes directly to a provided bitstream
18//!
19//! # Component Types
20//!
21//! The module handles two primary component types:
22//!
23//! - Arrays: Sequential field values with optional bit slicing
24//! - Structs: Grouped fields with padding and alignment
25//!
26//! # Error Handling
27//!
28//! Comprehensive error handling covers:
29//! - Field lookup failures
30//! - Bit alignment issues
31//! - Read/write operations
32//! - Invalid component configurations
33//!
34//! # Implementation Notes
35//!
36//! - Handles both MSB and LSB bit ordering
37//! - Supports partial field reads via offset/bits
38//!
39//! # Submodules
40//!
41//! - [`write_array`]: Array component processing
42//! - [`write_struct`]: Struct component processing
43//!
44//! [`GenerateBytesError`]: crate::comparison::compare_groups::generate_bytes::GenerateBytesError
45//! [`GenerateBytesResult`]: crate::comparison::compare_groups::generate_bytes::GenerateBytesResult
46//! [`write_array`]: crate::comparison::compare_groups::generate_bytes::write_array
47//! [`write_struct`]: crate::comparison::compare_groups::generate_bytes::write_struct
48use thiserror::Error;
49mod write_array;
50mod write_struct;
51
52pub(crate) type GenerateBytesResult<T> = std::result::Result<T, GenerateBytesError>;
53use crate::comparison::compare_groups::generate_bytes::write_array::write_array;
54use crate::comparison::compare_groups::generate_bytes::write_struct::write_struct;
55use crate::{analyzer::AnalyzerFieldState, schema::GroupComponent};
56use ahash::AHashMap;
57use bitstream_io::{BigEndian, BitWrite, BitWriter, Endianness};
58use std::io::Cursor;
59
60/// Errors that can occur while generating bytes from a schema for analysis
61#[derive(Error, Debug)]
62pub enum GenerateBytesError {
63    #[error("Invalid component type - {0}")]
64    InvalidComponentType(String),
65
66    #[error("Failed to byte align writer: {0}")]
67    ByteAlignmentFailed(#[source] std::io::Error),
68
69    #[error("Field '{0}' not found in field stats")]
70    FieldNotFound(String),
71
72    #[error("Read error while {context}: {source}")]
73    ReadError {
74        #[source]
75        source: std::io::Error,
76        context: String,
77    },
78
79    #[error("Write error while {context}: {source}")]
80    WriteError {
81        #[source]
82        source: std::io::Error,
83        context: String,
84    },
85
86    #[error("Seek error during {operation}: {source}")]
87    SeekError {
88        #[source]
89        source: std::io::Error,
90        operation: String,
91    },
92
93    #[error("Nested structure contains unsupported component type. Nested arrays and structs are not allowed within structs.")]
94    UnsupportedNestedComponent,
95}
96
97/// Processes group components and writes them to a bitstream writer
98///
99/// # Parameters
100/// - `field_stats`: Mutable reference to field statistics map
101/// - `writer`: Bitstream writer implementing `std::io::Write`
102/// - `components`: Slice of group components to process
103///
104/// # Panics
105/// - If encountering any component type other than Array or Struct
106pub(crate) fn generate_output_for_compare_groups_entry<
107    TWrite: std::io::Write,
108    TEndian: Endianness,
109>(
110    field_stats: &mut AHashMap<String, AnalyzerFieldState>,
111    writer: &mut BitWriter<TWrite, TEndian>,
112    components: &[GroupComponent],
113) -> GenerateBytesResult<()> {
114    for component in components {
115        match component {
116            GroupComponent::Array(array) => write_array(field_stats, writer, array)?,
117            GroupComponent::Struct(struct_) => write_struct(field_stats, writer, struct_)?,
118            _ => {
119                return Err(GenerateBytesError::InvalidComponentType(
120                    "Only arrays and structs are allowed at top level".into(),
121                ))
122            }
123        }
124    }
125    Ok(())
126}
127
128pub(crate) fn generate_group_bytes(
129    components: &[GroupComponent],
130    field_stats: &mut AHashMap<String, AnalyzerFieldState>,
131) -> GenerateBytesResult<Vec<u8>> {
132    let mut output = Vec::new();
133    let mut writer = BitWriter::endian(Cursor::new(&mut output), BigEndian);
134
135    generate_output_for_compare_groups_entry(field_stats, &mut writer, components)?;
136    writer
137        .byte_align()
138        .map_err(GenerateBytesError::ByteAlignmentFailed)?;
139    Ok(output)
140}
141
142#[cfg(test)]
143mod generate_output_tests {
144    use super::*;
145    use crate::comparison::compare_groups::test_helpers::create_mock_field_states;
146    use crate::comparison::compare_groups::test_helpers::TEST_FIELD_NAME;
147    use crate::schema::default_entropy_multiplier;
148    use crate::schema::default_lz_match_multiplier;
149    use crate::schema::BitOrder;
150    use crate::schema::GroupComponentArray;
151    use crate::schema::GroupComponentField;
152    use crate::schema::GroupComponentStruct;
153    use ahash::AHashMap;
154    use bitstream_io::{BitWriter, LittleEndian};
155    use std::io::Cursor;
156
157    #[test]
158    fn can_write_array_component() {
159        let input_data = [0b0010_0001, 0b1000_0100];
160        let mut field_stats = create_mock_field_states(
161            TEST_FIELD_NAME,
162            &input_data,
163            4,
164            BitOrder::Lsb,
165            BitOrder::Lsb,
166        );
167        let mut output = Vec::new();
168        let mut writer = BitWriter::endian(Cursor::new(&mut output), LittleEndian);
169
170        let components = vec![GroupComponent::Array(GroupComponentArray {
171            field: TEST_FIELD_NAME.to_string(),
172            offset: 0,
173            bits: 4,
174            ..Default::default()
175        })];
176
177        generate_output_for_compare_groups_entry(&mut field_stats, &mut writer, &components)
178            .unwrap();
179        assert_eq!(input_data, output.as_slice());
180    }
181
182    #[test]
183    fn can_write_struct_component() {
184        let input_data = [0b0010_0001, 0b1000_0100];
185        let mut field_stats = create_mock_field_states(
186            TEST_FIELD_NAME,
187            &input_data,
188            4,
189            BitOrder::Lsb,
190            BitOrder::Lsb,
191        );
192        let mut output = Vec::new();
193        let mut writer = BitWriter::endian(Cursor::new(&mut output), LittleEndian);
194
195        let components = vec![GroupComponent::Struct(GroupComponentStruct {
196            fields: vec![GroupComponent::Field(GroupComponentField {
197                field: TEST_FIELD_NAME.to_string(),
198                bits: 4,
199            })],
200            lz_match_multiplier: default_lz_match_multiplier(),
201            entropy_multiplier: default_entropy_multiplier(),
202        })];
203
204        generate_output_for_compare_groups_entry(&mut field_stats, &mut writer, &components)
205            .unwrap();
206        assert_eq!(input_data, output.as_slice());
207    }
208
209    #[test]
210    fn can_write_multiple_components() {
211        let input_data = [0b0010_0001, 0b1000_0100];
212        let mut field_stats = create_mock_field_states(
213            TEST_FIELD_NAME,
214            &input_data,
215            4,
216            BitOrder::Lsb,
217            BitOrder::Lsb,
218        );
219        let mut output = Vec::new();
220        let mut writer = BitWriter::endian(Cursor::new(&mut output), LittleEndian);
221
222        let components = vec![
223            GroupComponent::Array(GroupComponentArray {
224                field: TEST_FIELD_NAME.to_string(),
225                offset: 0,
226                bits: 4,
227                ..Default::default()
228            }),
229            GroupComponent::Struct(GroupComponentStruct {
230                fields: vec![GroupComponent::Field(GroupComponentField {
231                    field: TEST_FIELD_NAME.to_string(),
232                    bits: 4,
233                })],
234                lz_match_multiplier: default_lz_match_multiplier(),
235                entropy_multiplier: default_entropy_multiplier(),
236            }),
237        ];
238
239        generate_output_for_compare_groups_entry(&mut field_stats, &mut writer, &components)
240            .unwrap();
241        assert_eq!(
242            &[input_data[0], input_data[1], input_data[0], input_data[1]],
243            output.as_slice()
244        );
245    }
246
247    #[test]
248    #[should_panic]
249    fn panics_on_invalid_component_type() {
250        let mut field_stats = AHashMap::new();
251        let mut output = Vec::new();
252        let mut writer = BitWriter::endian(Cursor::new(&mut output), LittleEndian);
253
254        let components = vec![GroupComponent::Field(GroupComponentField {
255            field: TEST_FIELD_NAME.to_string(),
256            bits: 4,
257        })];
258
259        generate_output_for_compare_groups_entry(&mut field_stats, &mut writer, &components)
260            .unwrap();
261    }
262}