struct_compression_analyzer/
schema.rs

1//! # Bit-Packed Structure Analysis Schema
2//!
3//! This module provides a structured way to define and analyze bit-packed data formats.
4//!
5//! The schema allows specifying:
6//!
7//! - Root structure definition
8//! - Conditional offsets for data alignment
9//! - Analysis configuration for comparing field groupings
10//!
11//! ## Public API
12//!
13//! ### Main Types
14//!
15//! - [Schema]: Main schema definition containing root structure and analysis configuration
16//! - [Group]: Represents a group of fields or nested groups
17//! - [FieldDefinition]: Defines a field or nested group
18//! - [AnalysisConfig]: Configuration for analysis operations
19//!
20//! ### Public Methods
21//!
22//! - [`Schema::from_yaml()`]: Parse schema from YAML string
23//! - [`Schema::load_from_file()`]: Load and parse schema from file path
24//!
25//! ### Group Component Methods
26//!
27//! - [`GroupComponentArray::get_bits()`]: Get number of bits to read from field
28//!
29//! ### Comparison Types
30//!
31//! - [SplitComparison]: Configuration for comparing two field group layouts
32//! - [CustomComparison]: Configuration for comparing custom field group transformations
33//!
34//! ### Group Components
35//!
36//! - [GroupComponent]: Enum representing different types of group components
37//!   - [GroupComponentArray]: Array of field values
38//!   - [GroupComponentStruct]: Structured group of components
39//!   - [GroupComponentPadding]: Padding bits
40//!   - [GroupComponentSkip]: Skip bits
41//!
42//! ### Error Handling
43//!
44//! - [SchemaError]: Error types for schema parsing and validation
45//!
46//! ## Main Components
47//!
48//! - **Schema**: The root configuration containing:
49//!   - Version
50//!   - Metadata
51//!   - Root group definition
52//!   - Bit order configuration
53//!   - Conditional offsets
54//!   - Analysis configuration
55//!
56//! - **Group**: Hierarchical structure representing:
57//!   - Group description
58//!   - Nested fields/components
59//!   - Bit order inheritance
60//!   - Skip conditions
61//!
62//! - **FieldDefinition**: Represents either a:
63//!   - [Field]: Single field with bit properties
64//!   - [Group]: Nested group of fields
65//!
66//! ## Example Usage
67//!
68//! ```rust no_run
69//! use struct_compression_analyzer::schema::*;
70//! use std::path::Path;
71//!
72//! let yaml = r#"
73//! version: '1.0'
74//! metadata: { name: Test }
75//! root: { type: group, fields: {} }
76//! "#;
77//!
78//! // Load schema from YAML
79//! let schema_from_file = Schema::load_from_file(Path::new("schema.yaml")).unwrap();
80//! let schema_from_str = Schema::from_yaml(&yaml).unwrap();
81//! ```
82
83use indexmap::IndexMap;
84use serde::Deserialize;
85use std::path::Path;
86
87use crate::analyzer::{AnalyzerFieldState, CompressionOptions};
88
89/// Represents the complete schema configuration for a bit-packed structure to analyze.
90///
91/// The schema defines the layout and structure of the bit-packed data format.
92/// It includes versioning, metadata, bit order configuration, and the root group definition.
93///
94/// # Examples
95///
96/// ```rust no_run
97/// use struct_compression_analyzer::schema::Schema;
98/// use std::path::Path;
99///
100/// let schema = Schema::load_from_file(Path::new("schema.yaml")).unwrap();
101/// ```
102#[derive(Debug, Deserialize, Default)]
103pub struct Schema {
104    /// Schema version. Currently only `1.0` is supported
105    pub version: String,
106    /// Contains user-provided metadata about the schema
107    #[serde(default)]
108    pub metadata: Metadata,
109    /// Determines whether the bytes are read from the most significant bit (MSB)
110    /// or least significant bit (LSB) first.
111    ///
112    /// - `Msb`: First bit is the high bit (7)
113    /// - `Lsb`: First bit is the low bit (0)
114    #[serde(default)]
115    pub bit_order: BitOrder,
116    /// Conditional offsets for the schema
117    #[serde(default)]
118    pub conditional_offsets: Vec<ConditionalOffset>,
119    /// Configuration for analysis operations and output grouping
120    #[serde(default)]
121    pub analysis: AnalysisConfig,
122    /// The root group of the schema
123    pub root: Group,
124}
125
126/// Metadata about the schema
127///
128/// Contains user-provided information about the schema's purpose and structure.
129#[derive(Clone, Debug, Deserialize, Default)]
130pub struct Metadata {
131    /// Name of the schema
132    #[serde(default)]
133    pub name: String,
134    /// Description of the schema
135    #[serde(default)]
136    pub description: String,
137}
138
139/// Configuration for analysis operations and output grouping.
140///
141/// Defines how field groups should be compared and analyzed between each other,
142/// to find the most optimal bit layout to use for the data.
143#[derive(Debug, Deserialize, Default)]
144pub struct AnalysisConfig {
145    /// Compare structural equivalence between different field groups. Each comparison
146    /// verifies that the compared groups have identical total bits and field structure.
147    ///
148    /// # Example
149    /// ```yaml
150    /// split_groups:
151    ///   - name: colors
152    ///     group_1: [colors]                       # Original interleaved (structure of array) RGB layout
153    ///     group_2: [color_r, color_g, color_b]    # array of structure layout (e.g. RRRGGGBBB)
154    ///     description: Compare compression ratio of original interleaved format against grouping of colour components.
155    /// ```
156    #[serde(default)]
157    pub split_groups: Vec<SplitComparison>,
158
159    /// Compare arbitrary field groups defined through custom transformations.
160    /// Each comparison defines a baseline and one or more comparison groups
161    /// that should be structurally equivalent but may have different bit layouts.
162    ///
163    /// # Example: Converting 7-bit colors to 8-bit
164    ///
165    /// ```yaml
166    /// compare_groups:
167    /// - name: convert_7_to_8_bit
168    ///   description: "Adjust 7-bit color channel to 8-bit by appending a padding bit."
169    ///   baseline: # R, R, R
170    ///     - { type: array, field: color7 } # reads all '7-bit' colours from input
171    ///   comparisons:
172    ///     padded_8bit: # R+0, R+0, R+0
173    ///       - type: struct
174    ///         fields:
175    ///           - { type: field, field: color7 } # reads 1 '7-bit' colour from input
176    ///           - { type: padding, bits: 1, value: 0 } # appends 1 padding bit
177    /// ```
178    #[serde(default)]
179    pub compare_groups: Vec<CustomComparison>,
180}
181
182/// Parameters for estimating compression size
183#[derive(Debug, Deserialize, Clone)]
184pub struct CompressionEstimationParams {
185    /// Multiplier for LZ matches in size estimation (default: 0.375)
186    #[serde(default = "default_lz_match_multiplier")]
187    pub lz_match_multiplier: f64,
188    /// Multiplier for entropy in size estimation (default: 1.0)
189    #[serde(default = "default_entropy_multiplier")]
190    pub entropy_multiplier: f64,
191}
192
193impl CompressionEstimationParams {
194    pub fn new(options: &CompressionOptions) -> Self {
195        Self {
196            lz_match_multiplier: options.lz_match_multiplier,
197            entropy_multiplier: options.entropy_multiplier,
198        }
199    }
200}
201
202/// Configuration for comparing field groups
203#[derive(Debug, Deserialize)]
204pub struct SplitComparison {
205    /// Friendly name for this comparison.
206    pub name: String,
207    /// First group path to compare. This is the 'baseline'.
208    pub group_1: Vec<String>,
209    /// Second group path to compare. This is the group compared against the baseline (group_1).
210    pub group_2: Vec<String>,
211    /// Optional description of the comparison
212    #[serde(default)]
213    pub description: String,
214    /// Compression estimation parameters for group 1
215    #[serde(default)]
216    pub compression_estimation_group_1: Option<CompressionEstimationParams>,
217    /// Compression estimation parameters for group 2
218    #[serde(default)]
219    pub compression_estimation_group_2: Option<CompressionEstimationParams>,
220}
221
222/// Configuration for custom field group comparisons
223#[derive(Debug, Deserialize)]
224pub struct CustomComparison {
225    /// Unique identifier for this comparison
226    pub name: String,
227
228    /// Baseline group definition
229    pub baseline: Vec<GroupComponent>,
230
231    /// Comparison group definitions with names
232    pub comparisons: IndexMap<String, Vec<GroupComponent>>,
233
234    /// Human-readable description
235    #[serde(default)]
236    pub description: String,
237}
238
239pub(crate) fn default_lz_match_multiplier() -> f64 {
240    0.375
241}
242
243pub(crate) fn default_entropy_multiplier() -> f64 {
244    1.0
245}
246
247#[derive(Debug, Deserialize, Clone)]
248#[serde(tag = "type")] // Use "type" field as variant discriminant
249pub enum GroupComponent {
250    /// Array of field values
251    #[serde(rename = "array")]
252    Array(GroupComponentArray),
253
254    /// Structured group of components
255    #[serde(rename = "struct")]
256    Struct(GroupComponentStruct),
257
258    /// Padding bits.
259    /// This should only be used from within structs.
260    #[serde(rename = "padding")]
261    Padding(GroupComponentPadding),
262
263    /// Read the data from a field, once.
264    /// This should only be used from within structs.
265    #[serde(rename = "field")]
266    Field(GroupComponentField),
267
268    /// Skip a number of bits from a field.
269    /// This should only be used from within structs.
270    #[serde(rename = "skip")]
271    Skip(GroupComponentSkip),
272}
273
274/// Reads all values of a single field until end of input.
275/// i.e. `R0`, `R0`, `R0` etc. until all R0 values are read.
276///
277/// ```yaml
278/// - { type: array, field: R } # reads all 'R' values from input
279/// ```
280///
281/// This is read in a loop until no more bytes are written to output.  
282/// Alternatively, you can read only some bits at a time using the `bits` field.  
283///
284/// ```yaml
285/// - { type: array, field: R, offset: 2, bits: 4 } # read slice [2-6] for 'R' values from input
286/// ```
287///
288/// Allowed properties:
289///
290/// - `offset`: Number of bits to skip before reading `bits`.
291/// - `bits`: Number of bits to read (default: size of field)
292/// - `field`: Field name
293///
294/// The `offset` and `bits` properties allow you to read a slice of a field.
295/// Regardless of the slice read however, after each read is done, the stream will be advanced to the
296/// next field.
297///
298/// Note: The `Array` type can be represented as `Struct` technically speaking, this is
299/// actually a shorthand.
300#[derive(Debug, Deserialize, Clone)]
301pub struct GroupComponentArray {
302    /// Name of the field to pull the data from.
303    pub field: String,
304    /// Offset in the field from which to read.
305    #[serde(default)]
306    pub offset: u32,
307    /// The number of bits to read from the field.
308    #[serde(default)]
309    pub bits: u32,
310    /// Multiplier for LZ matches in size estimation
311    #[serde(default = "default_lz_match_multiplier")]
312    pub lz_match_multiplier: f64,
313    /// Multiplier for entropy in size estimation
314    #[serde(default = "default_entropy_multiplier")]
315    pub entropy_multiplier: f64,
316}
317
318impl Default for GroupComponentArray {
319    fn default() -> Self {
320        Self {
321            field: String::new(),
322            offset: 0,
323            bits: 0,
324            lz_match_multiplier: default_lz_match_multiplier(),
325            entropy_multiplier: default_entropy_multiplier(),
326        }
327    }
328}
329
330impl GroupComponentArray {
331    /// Retrieve the number of bits to read from the field.
332    /// Either directly from the [`GroupComponentArray`] or if not specified, from the [`AnalyzerFieldState`].
333    pub fn get_bits(&self, field: &AnalyzerFieldState) -> u32 {
334        if self.bits == 0 {
335            field.lenbits
336        } else {
337            self.bits
338        }
339    }
340}
341
342/// Structured group of components
343///
344/// ```yaml
345/// - type: struct # R0 G0 B0. Repeats until no data written.
346///   fields:
347///     - { type: field, field: R } # reads 1 'R' value from input
348///     - { type: field, field: G } # reads 1 'G' value from input
349///     - { type: field, field: B } # reads 1 'B' value from input
350/// ```
351///
352/// Allowed properties:
353///
354/// - `fields`: Array of field names
355#[derive(Debug, Deserialize, Clone)]
356pub struct GroupComponentStruct {
357    /// Array of field names
358    pub fields: Vec<GroupComponent>,
359    /// Multiplier for LZ matches in size estimation
360    #[serde(default = "default_lz_match_multiplier")]
361    pub lz_match_multiplier: f64,
362    /// Multiplier for entropy in size estimation
363    #[serde(default = "default_entropy_multiplier")]
364    pub entropy_multiplier: f64,
365}
366
367/// Padding bits  
368/// This should only be used from within structs.
369///
370/// ```yaml
371/// - { type: padding, bits: 4, value: 0 } # appends 4 padding bits
372/// ```
373///
374/// Allowed properties:
375///
376/// - `bits`: Number of bits to insert
377/// - `value`: Value to insert in those bits
378#[derive(Debug, Deserialize, Clone)]
379pub struct GroupComponentPadding {
380    /// Number of bits to insert
381    pub bits: u8,
382    /// Value to insert in those bits
383    #[serde(default)]
384    pub value: u8,
385}
386
387/// Skip a number of bits from a field.
388/// This should only be used from within structs.
389///
390/// ```yaml
391/// - { type: skip, field: R, bits: 4 } # skips 4 bits from the 'R' field
392/// ```
393///
394/// Allowed properties:
395///
396/// - `field`: Field name
397/// - `bits`: Number of bits to skip
398#[derive(Debug, Deserialize, Clone)]
399pub struct GroupComponentSkip {
400    /// Name of the field to skip bits from.
401    pub field: String,
402    /// Number of bits to skip from the field.
403    pub bits: u32,
404}
405
406/// Read the data from a field, once.
407/// This should only be used from within structs.
408///
409/// ```yaml
410/// - { type: field, field: R } # reads 1 'R' value from input
411/// ```
412///
413/// Allowed properties:
414///
415/// - `field`: Field name
416/// - `bits`: Number of bits to read (default: size of field)
417#[derive(Debug, Deserialize, Clone)]
418pub struct GroupComponentField {
419    /// Name of the field
420    pub field: String,
421    /// Number of bits to read from the field
422    #[serde(default)]
423    pub bits: u32,
424}
425
426impl GroupComponentField {
427    /// Assign the number of bits to read from the field.
428    /// Either keep value from [`GroupComponentField`] if manually specified, or override from the parameter.
429    pub fn set_bits(&mut self, default: u32) {
430        if self.bits == 0 {
431            self.bits = default
432        }
433    }
434}
435
436/// Allows us to define a nested item as either a field or group
437#[derive(Debug, Deserialize)]
438#[serde(untagged)]
439#[non_exhaustive]
440pub enum FieldDefinition {
441    Field(Field),
442    Group(Group),
443}
444
445/// A single field definition
446#[derive(Debug)]
447pub struct Field {
448    pub bits: u32,
449    pub description: String,
450    pub bit_order: BitOrder,
451    pub skip_if_not: Vec<Condition>,
452    pub skip_frequency_analysis: bool,
453}
454
455impl<'de> Deserialize<'de> for Field {
456    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
457    where
458        D: serde::Deserializer<'de>,
459    {
460        #[derive(Deserialize)]
461        #[serde(untagged)]
462        enum FieldRepr {
463            Shorthand(u32),
464            Extended {
465                bits: u32,
466                #[serde(default)]
467                description: String,
468                #[serde(default)]
469                #[serde(rename = "bit_order")]
470                bit_order: BitOrder,
471                #[serde(default)]
472                skip_if_not: Vec<Condition>,
473                #[serde(default)]
474                skip_frequency_analysis: bool,
475            },
476        }
477
478        // The magic that allows for either shorthand or extended notation
479        match FieldRepr::deserialize(deserializer)? {
480            FieldRepr::Shorthand(size) => Ok(Field {
481                bits: size,
482                description: String::new(),
483                bit_order: BitOrder::default(),
484                skip_if_not: Vec::new(),
485                skip_frequency_analysis: false,
486            }),
487            FieldRepr::Extended {
488                bits,
489                description,
490                bit_order,
491                skip_if_not,
492                skip_frequency_analysis,
493            } => Ok(Field {
494                bits,
495                description,
496                bit_order,
497                skip_if_not,
498                skip_frequency_analysis,
499            }),
500        }
501    }
502}
503
504/// Group of related fields or components
505///
506/// Represents a logical grouping of fields in the bit-packed structure.
507/// Groups can contain both individual fields and nested sub-groups.
508///
509/// # Fields
510/// - `_type`: Must be "group" (validated during parsing)
511/// - `description`: Optional description of the group's purpose
512/// - `fields`: Map of field names to their definitions (fields or sub-groups)
513///
514/// # Examples
515/// ```yaml
516/// root:
517///   type: group
518///   description: Main structure
519///   fields:
520///     header:
521///       type: group
522///       fields:
523///         mode: 2
524///         partition: 4
525///     colors:
526///       type: group
527///       fields:
528///         r:
529///           type: group
530///           fields:
531///             R0: 5
532///             R1: 5
533/// ```
534#[derive(Debug, Default)]
535pub struct Group {
536    _type: String,
537    pub description: String,
538    pub fields: IndexMap<String, FieldDefinition>,
539    /// Total bits calculated from children fields/groups
540    pub bits: u32,
541    /// The bit order of this group.
542    /// Inherited by all the children unless explicitly overwritten.
543    pub bit_order: BitOrder,
544    pub skip_if_not: Vec<Condition>,
545    pub skip_frequency_analysis: bool,
546}
547
548impl<'de> Deserialize<'de> for Group {
549    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
550    where
551        D: serde::Deserializer<'de>,
552    {
553        #[derive(Deserialize)]
554        struct GroupRepr {
555            #[serde(rename = "type")]
556            _type: String,
557            #[serde(default)]
558            description: String,
559            #[serde(default)]
560            bit_order: BitOrder,
561            #[serde(default)]
562            fields: IndexMap<String, FieldDefinition>,
563            #[serde(default)]
564            skip_if_not: Vec<Condition>,
565            #[serde(default)]
566            skip_frequency_analysis: bool,
567        }
568
569        let group = GroupRepr::deserialize(deserializer)?;
570        if group._type != "group" {
571            return Err(serde::de::Error::custom(format!(
572                "Invalid group type: {} (must be 'group')",
573                group._type
574            )));
575        }
576
577        // Calculate total bits from children
578        // This is recursive. Deserialize of child would have calculated this for the child.
579        let bits = group
580            .fields
581            .values()
582            .map(|fd| match fd {
583                FieldDefinition::Field(f) => f.bits,
584                FieldDefinition::Group(g) => g.bits,
585            })
586            .sum();
587
588        // Create the group with its own bit_order
589        let mut group = Group {
590            _type: group._type,
591            description: group.description,
592            fields: group.fields,
593            bits,
594            bit_order: group.bit_order,
595            skip_if_not: group.skip_if_not,
596            skip_frequency_analysis: group.skip_frequency_analysis,
597        };
598
599        // Propagate bit_order to children if not explicitly set
600        let bit_order = group.bit_order;
601        propagate_bit_order(&mut group, bit_order);
602
603        Ok(group)
604    }
605}
606
607impl Group {
608    /// Collects a list of field paths in schema order
609    /// This includes both fields and groups
610    fn collect_field_paths(&self, paths: &mut Vec<String>, parent_path: &str) {
611        for (name, item) in &self.fields {
612            match item {
613                FieldDefinition::Field(_) => {
614                    let full_path = if parent_path.is_empty() {
615                        name
616                    } else {
617                        &format!("{}.{}", parent_path, name)
618                    };
619                    paths.push(full_path.clone());
620                }
621                FieldDefinition::Group(g) => {
622                    let new_parent = if parent_path.is_empty() {
623                        name
624                    } else {
625                        &format!("{}.{}", parent_path, name)
626                    };
627                    paths.push(new_parent.clone());
628                    g.collect_field_paths(paths, new_parent);
629                }
630            }
631        }
632    }
633}
634
635/// Bit ordering specification for field values
636///
637/// Determines how bits are interpreted within a field:
638/// - `Msb`: Most significant bit first (default)
639/// - `Lsb`: Least significant bit first
640///
641/// # Examples
642///
643/// ```yaml
644/// bit_order: msb  # Default, bits are read left-to-right
645/// bit_order: lsb  # Bits are read right-to-left
646/// ```
647#[derive(Debug, Deserialize, Default, PartialEq, Eq, Clone, Copy)]
648#[serde(rename_all = "snake_case")]
649pub enum BitOrder {
650    /// Not initialized. If not set down the road, defaults to [Msb](BitOrder::Msb)
651    #[default]
652    Default,
653    Msb,
654    Lsb,
655}
656
657impl BitOrder {
658    pub fn get_with_default_resolve(self) -> BitOrder {
659        if self == BitOrder::Default {
660            BitOrder::Msb
661        } else {
662            self
663        }
664    }
665}
666
667/// Recursively propagates bit_order to child fields and groups
668fn propagate_bit_order(group: &mut Group, parent_bit_order: BitOrder) {
669    for (_, field_def) in group.fields.iter_mut() {
670        match field_def {
671            FieldDefinition::Field(field) => {
672                // Only inherit if field has default bit_order
673                if field.bit_order == BitOrder::Default {
674                    field.bit_order = parent_bit_order;
675                }
676            }
677            FieldDefinition::Group(child_group) => {
678                // Only inherit if child group has default bit_order
679                if child_group.bit_order == BitOrder::Default {
680                    child_group.bit_order = parent_bit_order;
681                }
682                // Recursively propagate to nested groups
683                propagate_bit_order(child_group, child_group.bit_order);
684            }
685        }
686    }
687}
688
689/// Defines a single condition for offset selection
690///
691/// # Examples
692///
693/// ```yaml
694/// byte_offset: 0x00
695/// bit_offset: 0
696/// bits: 32
697/// value: 0x44445320  # DDS magic
698/// ```
699#[derive(Debug, PartialEq, Clone, serde::Deserialize)]
700pub struct Condition {
701    /// Byte offset from start of structure
702    pub byte_offset: u64,
703    /// Bit offset within the byte (0-7, left to right)
704    pub bit_offset: u8,
705    /// Number of bits to compare (1-32)
706    pub bits: u8,
707    /// Expected value in big-endian byte order
708    pub value: u64,
709    /// Bit order of the condition
710    #[serde(default)]
711    pub bit_order: BitOrder,
712}
713
714/// Defines conditional offset selection rules
715///
716/// # Examples
717///
718/// ```yaml
719/// - offset: 0x94  # BC7 data offset
720///   conditions:
721///     - byte_offset: 0x00
722///       bit_offset: 0
723///       bits: 32
724///       value: 0x44445320
725///     - byte_offset: 0x54
726///       bit_offset: 0
727///       bits: 32
728///       value: 0x44583130
729/// ```
730#[derive(Debug, Clone, Deserialize)]
731pub struct ConditionalOffset {
732    /// Target offset to use if conditions match
733    pub offset: u64,
734    /// List of conditions that must all be satisfied
735    pub conditions: Vec<Condition>,
736}
737
738#[derive(thiserror::Error, Debug)]
739pub enum SchemaError {
740    #[error("Invalid schema version (expected 1.0)")]
741    InvalidVersion,
742    #[error("YAML parsing error: {0}")]
743    YamlError(#[from] serde_yaml::Error),
744    #[error("I/O error: {0}")]
745    Io(#[from] std::io::Error),
746    #[error("Invalid group type: {0} (must be 'group')")]
747    InvalidGroupType(String),
748}
749
750impl Schema {
751    /// Creates a new Schema from a YAML string.
752    ///
753    /// # Arguments
754    /// * `content` - YAML string containing the schema definition
755    ///
756    /// # Returns
757    /// * `Result<Self, SchemaError>` - Resulting schema or error
758    pub fn from_yaml(content: &str) -> Result<Self, SchemaError> {
759        let schema: Schema = serde_yaml::from_str(content)?;
760
761        if schema.version != "1.0" {
762            return Err(SchemaError::InvalidVersion);
763        }
764
765        Ok(schema)
766    }
767
768    /// Loads and parses a schema from a YAML file.
769    ///
770    /// # Arguments
771    /// * `path` - Path to the schema YAML file
772    ///
773    /// # Returns
774    /// * `Result<Self, SchemaError>` - Resulting schema or error
775    pub fn load_from_file(path: &Path) -> Result<Self, SchemaError> {
776        let content = std::fs::read_to_string(path)?;
777        Self::from_yaml(&content)
778    }
779
780    /// Collects a list of field (and group) paths in schema order.
781    ///
782    /// # Examples
783    ///
784    /// Given the following schema:
785    ///
786    /// ```yaml
787    /// root:
788    ///   type: group
789    ///   fields:
790    ///     header:
791    ///       type: group
792    ///       fields:
793    ///         mode: 2
794    ///         partition: 4
795    ///     colors:
796    ///       type: group
797    ///       fields:
798    ///         r:
799    ///           type: array
800    ///           field: R
801    ///         g:
802    ///           type: array
803    ///           field: G
804    ///         b:
805    ///           type: array
806    ///           field: B
807    /// ```
808    ///
809    /// The resulting field paths would be:
810    /// - "header"
811    /// - "colors"
812    /// - "colors.r"
813    /// - "colors.g"
814    /// - "colors.b"
815    ///
816    /// # Returns
817    /// * `Vec<String>` - List of field paths in schema order
818    pub fn ordered_field_and_group_paths(&self) -> Vec<String> {
819        let mut paths = Vec::new();
820        self.root.collect_field_paths(&mut paths, "");
821        paths
822    }
823}
824
825#[cfg(test)]
826mod tests {
827    use super::*;
828
829    macro_rules! test_schema {
830        ($yaml:expr, $test:expr) => {{
831            let schema = Schema::from_yaml($yaml).expect("Failed to parse schema");
832            $test(schema);
833        }};
834    }
835
836    // Version Tests
837    mod version_tests {
838        use super::*;
839
840        #[test]
841        fn supports_version_10() {
842            let yaml = r#"
843version: '1.0'
844metadata: { name: Test }
845root: { type: group, fields: {} }
846bit_order: msb
847"#;
848            test_schema!(yaml, |schema: Schema| {
849                assert_eq!(schema.version, "1.0");
850                assert_eq!(schema.bit_order, BitOrder::Msb);
851            });
852        }
853
854        #[test]
855        fn rejects_unsupported_version() {
856            let yaml = r#"
857version: '2.0'
858metadata: { name: Test }
859root: { type: group, fields: {} }
860"#;
861            assert!(Schema::from_yaml(yaml).is_err());
862        }
863    }
864
865    // Metadata Tests
866    mod metadata_tests {
867        use super::*;
868
869        #[test]
870        fn parses_full_metadata() {
871            let yaml = r#"
872version: '1.0'
873metadata:
874    name: BC7 Mode4
875    description: Test description
876root: { type: group, fields: {} }
877"#;
878            test_schema!(yaml, |schema: Schema| {
879                assert_eq!(schema.metadata.name, "BC7 Mode4");
880                assert_eq!(schema.metadata.description, "Test description");
881            });
882        }
883
884        #[test]
885        fn handles_empty_metadata() {
886            let yaml = r#"
887version: '1.0'
888root: { type: group, fields: {} }
889"#;
890            test_schema!(yaml, |schema: Schema| {
891                assert_eq!(schema.metadata.name, "");
892                assert_eq!(schema.metadata.description, "");
893            });
894        }
895    }
896
897    // Fields Section Tests
898    mod fields_tests {
899        use super::*;
900
901        #[test]
902        fn supports_shorthand_field() {
903            let yaml = r#"
904version: '1.0'
905metadata: { name: Test }
906root:
907    type: group
908    fields:
909        mode: 2
910        partition: 4
911"#;
912            test_schema!(yaml, |schema: Schema| {
913                let mode = match schema.root.fields.get("mode") {
914                    Some(FieldDefinition::Field(f)) => f,
915                    _ => panic!("Expected field"),
916                };
917                assert_eq!(mode.bits, 2);
918
919                let partition = match schema.root.fields.get("partition") {
920                    Some(FieldDefinition::Field(f)) => f,
921                    _ => panic!("Expected field"),
922                };
923                assert_eq!(partition.bits, 4);
924            });
925        }
926
927        #[test]
928        fn supports_extended_field() {
929            let yaml = r#"
930version: '1.0'
931metadata: { name: Test }
932root:
933    type: group
934    fields:
935        mode:
936            type: field
937            bits: 3
938            description: Mode selector
939            bit_order: lsb
940bit_order: msb
941"#;
942            test_schema!(yaml, |schema: Schema| {
943                let field = match schema.root.fields.get("mode") {
944                    Some(FieldDefinition::Field(f)) => f,
945                    _ => panic!("Expected field"),
946                };
947                assert_eq!(field.bits, 3);
948                assert_eq!(field.description, "Mode selector");
949                assert_eq!(field.bit_order, BitOrder::Lsb);
950                assert_eq!(schema.bit_order, BitOrder::Msb);
951            });
952        }
953
954        #[test]
955        fn supports_nested_groups() {
956            let yaml = r#"
957version: '1.0'
958metadata: { name: Test }
959root:
960    type: group
961    fields:
962        header:
963            type: group
964            fields:
965                mode: 2
966                partition: 4
967        colors:
968            type: group
969            fields:
970                r:
971                    type: group
972                    fields:
973                        R0: 5
974                        R1: 5
975bit_order: msb
976"#;
977            test_schema!(yaml, |schema: Schema| {
978                let header = match schema.root.fields.get("header") {
979                    Some(FieldDefinition::Group(g)) => g,
980                    _ => panic!("Expected group"),
981                };
982                assert_eq!(header.fields.len(), 2);
983
984                let colors = match schema.root.fields.get("colors") {
985                    Some(FieldDefinition::Group(g)) => g,
986                    _ => panic!("Expected group"),
987                };
988                let r = match colors.fields.get("r") {
989                    Some(FieldDefinition::Group(g)) => g,
990                    _ => panic!("Expected group"),
991                };
992                assert_eq!(r.fields.len(), 2);
993                assert_eq!(schema.bit_order, BitOrder::Msb);
994            });
995        }
996
997        #[test]
998        fn calculates_group_bits_from_children() {
999            let yaml = r#"                                                                                                                                                
1000 version: '1.0'                                                                                                                                                    
1001 root:                                                                                                                                                             
1002     type: group                                                                                                                                                   
1003     fields:                                                                                                                                                       
1004         a: 4                                                                                                                                                      
1005         b: 8                                                                                                                                                      
1006         subgroup:                                                                                                                                                 
1007             type: group                                                                                                                                           
1008             fields:                                                                                                                                               
1009                 c: 2                                                                                                                                              
1010                 d: 2                                                                                                                                              
1011 "#;
1012            test_schema!(yaml, |schema: Schema| {
1013                // Top level group should have 4 + 8 + (2+2) = 16 bits
1014                assert_eq!(schema.root.bits, 16);
1015                // Subgroup should have 2 + 2 = 4 bits
1016                match schema.root.fields.get("subgroup") {
1017                    Some(FieldDefinition::Group(g)) => assert_eq!(g.bits, 4),
1018                    _ => panic!("Expected subgroup"),
1019                }
1020            });
1021        }
1022    }
1023
1024    // Bit Order Tests
1025    mod bit_order_tests {
1026        use super::*;
1027
1028        #[test]
1029        fn inherits_bit_order_from_parent() {
1030            let yaml = r#"
1031version: '1.0'
1032root:
1033    type: group
1034    bit_order: lsb
1035    fields:
1036        a: 4
1037        b: 8
1038        subgroup:
1039            type: group
1040            fields:
1041                c: 2
1042                d: 2
1043bit_order: msb
1044"#;
1045            test_schema!(yaml, |schema: Schema| {
1046                // Check root fields
1047                match schema.root.fields.get("a") {
1048                    Some(FieldDefinition::Field(f)) => assert_eq!(f.bit_order, BitOrder::Lsb),
1049                    _ => panic!("Expected field"),
1050                }
1051                match schema.root.fields.get("b") {
1052                    Some(FieldDefinition::Field(f)) => assert_eq!(f.bit_order, BitOrder::Lsb),
1053                    _ => panic!("Expected field"),
1054                }
1055
1056                // Check nested group and its fields
1057                match schema.root.fields.get("subgroup") {
1058                    Some(FieldDefinition::Group(g)) => {
1059                        assert_eq!(g.bit_order, BitOrder::Lsb);
1060                        match g.fields.get("c") {
1061                            Some(FieldDefinition::Field(f)) => {
1062                                assert_eq!(f.bit_order, BitOrder::Lsb)
1063                            }
1064                            _ => panic!("Expected field"),
1065                        }
1066                        match g.fields.get("d") {
1067                            Some(FieldDefinition::Field(f)) => {
1068                                assert_eq!(f.bit_order, BitOrder::Lsb)
1069                            }
1070                            _ => panic!("Expected field"),
1071                        }
1072                    }
1073                    _ => panic!("Expected subgroup"),
1074                }
1075            });
1076        }
1077
1078        #[test]
1079        fn preserves_explicit_bit_order_in_children() {
1080            let yaml = r#"
1081version: '1.0'
1082root:
1083    type: group
1084    bit_order: lsb
1085    fields:
1086        a: 4
1087        b:
1088            type: field
1089            bits: 8
1090            bit_order: msb
1091        subgroup:
1092            type: group
1093            bit_order: msb
1094            fields:
1095                c: 2
1096                d: 2
1097bit_order: msb
1098"#;
1099            test_schema!(yaml, |schema: Schema| {
1100                // Check root fields
1101                match schema.root.fields.get("a") {
1102                    Some(FieldDefinition::Field(f)) => assert_eq!(f.bit_order, BitOrder::Lsb),
1103                    _ => panic!("Expected field"),
1104                }
1105                match schema.root.fields.get("b") {
1106                    Some(FieldDefinition::Field(f)) => assert_eq!(f.bit_order, BitOrder::Msb),
1107                    _ => panic!("Expected field"),
1108                }
1109
1110                // Check nested group and its fields
1111                match schema.root.fields.get("subgroup") {
1112                    Some(FieldDefinition::Group(g)) => {
1113                        assert_eq!(g.bit_order, BitOrder::Msb);
1114                        match g.fields.get("c") {
1115                            Some(FieldDefinition::Field(f)) => {
1116                                assert_eq!(f.bit_order, BitOrder::Msb)
1117                            }
1118                            _ => panic!("Expected field"),
1119                        }
1120                        match g.fields.get("d") {
1121                            Some(FieldDefinition::Field(f)) => {
1122                                assert_eq!(f.bit_order, BitOrder::Msb)
1123                            }
1124                            _ => panic!("Expected field"),
1125                        }
1126                    }
1127                    _ => panic!("Expected subgroup"),
1128                }
1129            });
1130        }
1131
1132        #[test]
1133        fn uses_default_bit_order_when_not_specified() {
1134            let yaml = r#"
1135version: '1.0'
1136root:
1137    type: group
1138    fields:
1139        a: 4
1140        b: 8
1141"#;
1142            test_schema!(yaml, |schema: Schema| {
1143                match schema.root.fields.get("a") {
1144                    Some(FieldDefinition::Field(f)) => assert_eq!(f.bit_order, BitOrder::Default),
1145                    _ => panic!("Expected field"),
1146                }
1147                match schema.root.fields.get("b") {
1148                    Some(FieldDefinition::Field(f)) => assert_eq!(f.bit_order, BitOrder::Default),
1149                    _ => panic!("Expected field"),
1150                }
1151            });
1152        }
1153    }
1154
1155    // Edge Cases
1156    mod edge_cases {
1157        use super::*;
1158
1159        #[test]
1160        fn accepts_minimal_valid_schema() {
1161            let yaml = r#"
1162version: '1.0'
1163root: { type: group, fields: {} }
1164"#;
1165            test_schema!(yaml, |schema: Schema| {
1166                assert_eq!(schema.version, "1.0");
1167                assert!(schema.root.fields.is_empty());
1168            });
1169        }
1170
1171        #[test]
1172        fn handles_empty_analysis() {
1173            let yaml = r#"
1174version: '1.0'
1175metadata: { name: Test }
1176analysis: {}
1177root: { type: group, fields: {} }
1178"#;
1179            test_schema!(yaml, |schema: Schema| {
1180                assert!(schema.analysis.split_groups.is_empty());
1181            });
1182        }
1183    }
1184
1185    // Conditional Offset Tests
1186    mod conditional_offset_tests {
1187        use super::*;
1188
1189        #[test]
1190        fn parses_basic_conditional_offset() {
1191            let yaml = r#"
1192version: '1.0'
1193metadata:
1194  name: Test Schema
1195conditional_offsets:
1196  - offset: 0x94
1197    conditions:
1198      - byte_offset: 0x00
1199        bit_offset: 0
1200        bits: 32
1201        value: 0x44445320  # DDS magic
1202      - byte_offset: 0x54
1203        bit_offset: 0
1204        bits: 32
1205        value: 0x44583130
1206root:
1207  type: group
1208  fields: {}
1209"#;
1210
1211            let schema: Schema = serde_yaml::from_str(yaml).unwrap();
1212            assert_eq!(schema.conditional_offsets.len(), 1);
1213
1214            let offset = &schema.conditional_offsets[0];
1215            assert_eq!(offset.offset, 0x94);
1216            assert_eq!(offset.conditions.len(), 2);
1217
1218            let cond1 = &offset.conditions[0];
1219            assert_eq!(cond1.byte_offset, 0x00);
1220            assert_eq!(cond1.bit_offset, 0);
1221            assert_eq!(cond1.bits, 32);
1222            assert_eq!(cond1.value, 0x44445320);
1223        }
1224
1225        #[test]
1226        fn handles_missing_optional_fields() {
1227            let yaml = r#"
1228version: '1.0'
1229metadata:
1230  name: Minimal Schema
1231root:
1232  type: group
1233  fields: {}
1234"#;
1235
1236            let schema: Schema = serde_yaml::from_str(yaml).unwrap();
1237            assert!(schema.conditional_offsets.is_empty());
1238        }
1239
1240        #[test]
1241        fn supports_skip_if_not_conditions() {
1242            let yaml = r#"
1243version: '1.0'
1244metadata:
1245  name: Minimal Schema
1246root:
1247  type: group
1248  fields:
1249    header:
1250      type: group
1251      skip_if_not:
1252        - byte_offset: 0x00
1253          bit_offset: 0
1254          bits: 32
1255          value: 0x44445320
1256      fields:
1257        magic:
1258          type: field
1259          bits: 32
1260          skip_if_not:
1261            - byte_offset: 0x54
1262              bit_offset: 0
1263              bits: 32  
1264              value: 0x44583130
1265bit_order: msb
1266"#;
1267
1268            let schema = Schema::from_yaml(yaml).unwrap();
1269            let header_group = match &schema.root.fields["header"] {
1270                FieldDefinition::Field(_field) => panic!("Expected group, got field"),
1271                FieldDefinition::Group(group) => group,
1272            };
1273            let magic_field = match &header_group.fields["magic"] {
1274                FieldDefinition::Field(field) => field,
1275                FieldDefinition::Group(_group) => panic!("Expected field, got group"),
1276            };
1277
1278            // Test group-level conditions
1279            assert_eq!(header_group.skip_if_not.len(), 1);
1280            assert_eq!(header_group.skip_if_not[0].byte_offset, 0x00);
1281            assert_eq!(header_group.skip_if_not[0].value, 0x44445320);
1282
1283            // Test field-level conditions
1284            assert_eq!(magic_field.skip_if_not.len(), 1);
1285            assert_eq!(magic_field.skip_if_not[0].byte_offset, 0x54);
1286            assert_eq!(magic_field.skip_if_not[0].value, 0x44583130);
1287            assert_eq!(schema.bit_order, BitOrder::Msb);
1288        }
1289    }
1290
1291    mod split_compare_tests {
1292        use super::*;
1293
1294        #[test]
1295        fn parses_basic_comparison() {
1296            let yaml = r#"
1297version: '1.0'
1298analysis:
1299  split_groups:
1300    - name: color_layouts
1301      group_1: [colors]
1302      group_2: [color_r, color_g, color_b]
1303      description: Compare interleaved vs planar layouts
1304      compression_estimation_group_1:
1305        lz_match_multiplier: 0.5
1306        entropy_multiplier: 1.2
1307      compression_estimation_group_2:
1308        lz_match_multiplier: 0.7
1309        entropy_multiplier: 1.5
1310root:
1311  type: group
1312  fields: {}
1313"#;
1314
1315            let schema = Schema::from_yaml(yaml).unwrap();
1316            let comparisons = &schema.analysis.split_groups;
1317
1318            assert_eq!(comparisons.len(), 1);
1319            assert_eq!(comparisons[0].name, "color_layouts");
1320            assert_eq!(comparisons[0].group_1, vec!["colors"]);
1321            assert_eq!(
1322                comparisons[0].group_2,
1323                vec!["color_r", "color_g", "color_b"]
1324            );
1325            assert_eq!(
1326                comparisons[0].description,
1327                "Compare interleaved vs planar layouts"
1328            );
1329
1330            // Check that compression estimation groups have values
1331            assert!(comparisons[0].compression_estimation_group_1.is_some());
1332            assert!(comparisons[0].compression_estimation_group_2.is_some());
1333
1334            // Check the values
1335            let params1 = comparisons[0]
1336                .compression_estimation_group_1
1337                .as_ref()
1338                .unwrap();
1339            assert_eq!(params1.lz_match_multiplier, 0.5);
1340            assert_eq!(params1.entropy_multiplier, 1.2);
1341
1342            let params2 = comparisons[0]
1343                .compression_estimation_group_2
1344                .as_ref()
1345                .unwrap();
1346            assert_eq!(params2.lz_match_multiplier, 0.7);
1347            assert_eq!(params2.entropy_multiplier, 1.5);
1348        }
1349
1350        #[test]
1351        fn handles_minimal_comparison() {
1352            let yaml = r#"
1353version: '1.0'
1354analysis:
1355  split_groups:
1356    - name: basic
1357      group_1: [a]
1358      group_2: [b]
1359root:
1360  type: group
1361  fields: {}
1362"#;
1363
1364            let schema = Schema::from_yaml(yaml).unwrap();
1365            let comparisons = &schema.analysis.split_groups;
1366
1367            assert_eq!(comparisons.len(), 1);
1368            assert_eq!(comparisons[0].name, "basic");
1369            assert!(comparisons[0].description.is_empty());
1370            // Check that compression estimation groups are None when not specified
1371            assert!(comparisons[0].compression_estimation_group_1.is_none());
1372            assert!(comparisons[0].compression_estimation_group_2.is_none());
1373        }
1374    }
1375
1376    mod group_compare_tests {
1377        use crate::schema::{GroupComponent, Schema};
1378
1379        #[test]
1380        fn parses_custom_comparison() {
1381            let yaml = r#"
1382version: '1.0'
1383analysis:
1384  compare_groups:
1385    - name: convert_7_to_8_bit
1386      description: "Adjust 7-bit color channel to 8-bit by appending a padding bit."
1387      lz_match_multiplier: 0.45
1388      entropy_multiplier: 1.1
1389      baseline: # R, R, R
1390        - type: array
1391          field: color7
1392          bits: 7
1393          lz_match_multiplier: 0.5
1394          entropy_multiplier: 1.2
1395      comparisons:
1396        padded_8bit:
1397          - type: struct # R+0, R+0, R+0
1398            lz_match_multiplier: 0.6
1399            entropy_multiplier: 1.3
1400            fields:
1401              - { type: field, field: color7, bits: 7 } 
1402              - { type: padding, bits: 1, value: 0 } 
1403              - { type: skip, field: color7, bits: 0 } 
1404root:
1405  type: group
1406  fields: {}
1407"#;
1408
1409            let schema = Schema::from_yaml(yaml).unwrap();
1410            let comparisons = &schema.analysis.compare_groups;
1411
1412            assert_eq!(comparisons.len(), 1);
1413            assert_eq!(comparisons[0].name, "convert_7_to_8_bit");
1414
1415            // Verify baseline
1416            let baseline = &comparisons[0].baseline;
1417            assert_eq!(baseline.len(), 1);
1418            match baseline.first().unwrap() {
1419                GroupComponent::Array(array) => {
1420                    assert_eq!(array.field, "color7");
1421                    assert_eq!(array.bits, 7);
1422                    // Verify the array component's multipliers
1423                    assert_eq!(array.lz_match_multiplier, 0.5);
1424                    assert_eq!(array.entropy_multiplier, 1.2);
1425                }
1426                _ => unreachable!("Expected an array type"),
1427            }
1428
1429            // Verify comparisons
1430            let comps = &comparisons[0].comparisons;
1431            assert_eq!(comps.len(), 1);
1432            assert!(comps.contains_key("padded_8bit"));
1433
1434            let padded = &comps["padded_8bit"];
1435            assert_eq!(padded.len(), 1);
1436            match padded.first().unwrap() {
1437                GroupComponent::Struct(group) => {
1438                    // Verify the struct component's multipliers
1439                    assert_eq!(group.lz_match_multiplier, 0.6);
1440                    assert_eq!(group.entropy_multiplier, 1.3);
1441                    assert_eq!(group.fields.len(), 3);
1442
1443                    // Assert fields
1444                    match &group.fields[0] {
1445                        GroupComponent::Field(field) => {
1446                            assert_eq!(field.field, "color7");
1447                            assert_eq!(field.bits, 7);
1448                        }
1449                        _ => unreachable!("Expected a field type"),
1450                    }
1451                    match &group.fields[1] {
1452                        GroupComponent::Padding(padding) => {
1453                            assert_eq!(padding.bits, 1);
1454                            assert_eq!(padding.value, 0);
1455                        }
1456                        _ => unreachable!("Expected a padding type"),
1457                    }
1458                    match &group.fields[2] {
1459                        GroupComponent::Skip(skip) => {
1460                            assert_eq!(skip.bits, 0);
1461                        }
1462                        _ => unreachable!("Expected a skip type"),
1463                    }
1464                }
1465                _ => unreachable!("Expected a struct type"),
1466            }
1467        }
1468
1469        #[test]
1470        fn rejects_invalid_custom_comparison() {
1471            let yaml = r#"
1472
1473version: '1.0'
1474root:
1475  type: group
1476  fields: {}
1477analysis:
1478  compare_groups:
1479    - name: missing_fields
1480      group_1: [field_a]
1481"#;
1482
1483            let result = Schema::from_yaml(yaml);
1484            assert!(result.is_err());
1485        }
1486
1487        #[test]
1488        fn preserves_comparison_order() {
1489            let yaml = r#"
1490version: '1.0'
1491analysis:
1492  compare_groups:
1493    - name: bit_expansion
1494      description: "Test multiple comparison order preservation"
1495      baseline:
1496        - { type: array, field: original }
1497      comparisons:
1498        comparison_c:
1499          - { type: padding, bits: 1 }
1500        comparison_a: 
1501          - { type: padding, bits: 2 }
1502        comparison_b:
1503          - { type: padding, bits: 3 }
1504root:
1505  type: group
1506  fields: {}
1507"#;
1508
1509            let schema = Schema::from_yaml(yaml).unwrap();
1510            let comparison = &schema.analysis.compare_groups[0];
1511
1512            // Verify IndexMap preserves insertion order
1513            let keys: Vec<&str> = comparison.comparisons.keys().map(|s| s.as_str()).collect();
1514            assert_eq!(keys, vec!["comparison_c", "comparison_a", "comparison_b"]);
1515
1516            // Verify basic parsing
1517            assert_eq!(comparison.name, "bit_expansion");
1518            assert_eq!(
1519                comparison.description,
1520                "Test multiple comparison order preservation"
1521            );
1522            assert_eq!(comparison.comparisons.len(), 3);
1523        }
1524
1525        #[test]
1526        fn handles_minimal_custom_comparison() {
1527            let yaml = r#"
1528version: '1.0'
1529analysis:
1530  compare_groups:
1531    - name: minimal_test
1532      baseline: 
1533        - { type: array, field: test_field, bits: 8 } 
1534      comparisons:
1535        simple:
1536          - { type: array, field: test_field, bits: 8 } 
1537root:
1538  type: group
1539  fields: {}
1540"#;
1541
1542            let schema = Schema::from_yaml(yaml).unwrap();
1543            let comparisons = &schema.analysis.compare_groups;
1544
1545            assert_eq!(comparisons.len(), 1);
1546            assert_eq!(comparisons[0].name, "minimal_test");
1547            assert!(comparisons[0].description.is_empty());
1548        }
1549    }
1550}