Skip to main content

ironsbe_schema/
parser.rs

1//! SBE XML schema parser.
2//!
3//! This module provides functionality to parse FIX SBE XML schema files
4//! into the internal schema representation.
5
6use crate::error::ParseError;
7use crate::messages::{DataFieldDef, FieldDef, GroupDef, MessageDef};
8use crate::types::{
9    ByteOrder, CompositeDef, CompositeField, EnumDef, EnumValue, Presence, PrimitiveDef,
10    PrimitiveType, Schema, SetChoice, SetDef, TypeDef,
11};
12use quick_xml::Reader;
13use quick_xml::events::{BytesStart, Event};
14
15/// Parses an SBE XML schema from a string.
16///
17/// # Arguments
18/// * `xml` - XML schema content
19///
20/// # Returns
21/// Parsed schema or parse error.
22///
23/// # Errors
24/// Returns `ParseError` if the XML is malformed or contains invalid schema elements.
25pub fn parse_schema(xml: &str) -> Result<Schema, ParseError> {
26    let mut reader = Reader::from_str(xml);
27    reader.config_mut().trim_text(true);
28
29    let mut schema: Option<Schema> = None;
30    let mut buf = Vec::new();
31
32    loop {
33        match reader.read_event_into(&mut buf) {
34            Ok(Event::Start(ref e)) => {
35                let name_bytes = e.name().as_ref().to_vec();
36                let name = std::str::from_utf8(&name_bytes)?;
37                match name {
38                    "messageSchema" | "sbe:messageSchema" => {
39                        schema = Some(parse_message_schema(e)?);
40                    }
41                    "types" if schema.is_some() => {
42                        parse_types(&mut reader, schema.as_mut().unwrap())?;
43                    }
44                    "message" | "sbe:message" if schema.is_some() => {
45                        let msg = parse_message(&mut reader, e, schema.as_ref().unwrap())?;
46                        schema.as_mut().unwrap().messages.push(msg);
47                    }
48                    _ => {}
49                }
50            }
51            Ok(Event::Eof) => break,
52            Err(e) => return Err(ParseError::Xml(e)),
53            _ => {}
54        }
55        buf.clear();
56    }
57
58    schema.ok_or_else(|| ParseError::InvalidStructure {
59        message: "No messageSchema element found".to_string(),
60    })
61}
62
63/// Parses the messageSchema element attributes.
64fn parse_message_schema(e: &BytesStart<'_>) -> Result<Schema, ParseError> {
65    let mut package = String::new();
66    let mut id: u16 = 0;
67    let mut version: u16 = 0;
68    let mut semantic_version = String::new();
69    let mut description = None;
70    let mut byte_order = ByteOrder::LittleEndian;
71    let mut header_type = "messageHeader".to_string();
72
73    for attr in e.attributes().flatten() {
74        let key = std::str::from_utf8(attr.key.as_ref())?;
75        let value = std::str::from_utf8(&attr.value)?;
76
77        match key {
78            "package" => package = value.to_string(),
79            "id" => {
80                id = value
81                    .parse()
82                    .map_err(|_| ParseError::invalid_attr("messageSchema", "id", value))?
83            }
84            "version" => {
85                version = value
86                    .parse()
87                    .map_err(|_| ParseError::invalid_attr("messageSchema", "version", value))?
88            }
89            "semanticVersion" => semantic_version = value.to_string(),
90            "description" => description = Some(value.to_string()),
91            "byteOrder" => {
92                byte_order = ByteOrder::parse(value)
93                    .ok_or_else(|| ParseError::invalid_attr("messageSchema", "byteOrder", value))?
94            }
95            "headerType" => header_type = value.to_string(),
96            _ => {}
97        }
98    }
99
100    let mut schema = Schema::new(package, id, version);
101    schema.semantic_version = semantic_version;
102    schema.description = description;
103    schema.byte_order = byte_order;
104    schema.header_type = header_type;
105
106    Ok(schema)
107}
108
109/// Parses the types section.
110fn parse_types(reader: &mut Reader<&[u8]>, schema: &mut Schema) -> Result<(), ParseError> {
111    let mut buf = Vec::new();
112    let mut depth = 1;
113
114    loop {
115        match reader.read_event_into(&mut buf) {
116            Ok(Event::Start(ref e)) => {
117                depth += 1;
118                let name_bytes = e.name().as_ref().to_vec();
119                let name = std::str::from_utf8(&name_bytes)?;
120                match name {
121                    "type" => {
122                        let type_def = parse_primitive_type(reader, e)?;
123                        schema.add_type(TypeDef::Primitive(type_def));
124                        depth -= 1; // parse_primitive_type consumes the end tag
125                    }
126                    "composite" => {
127                        let composite = parse_composite(reader, e)?;
128                        schema.add_type(TypeDef::Composite(composite));
129                        depth -= 1;
130                    }
131                    "enum" => {
132                        let enum_def = parse_enum(reader, e)?;
133                        schema.add_type(TypeDef::Enum(enum_def));
134                        depth -= 1;
135                    }
136                    "set" => {
137                        let set_def = parse_set(reader, e)?;
138                        schema.add_type(TypeDef::Set(set_def));
139                        depth -= 1;
140                    }
141                    _ => {}
142                }
143            }
144            Ok(Event::Empty(ref e)) => {
145                let name_bytes = e.name().as_ref().to_vec();
146                let name = std::str::from_utf8(&name_bytes)?;
147                if name == "type" {
148                    let type_def = parse_primitive_type_empty(e)?;
149                    schema.add_type(TypeDef::Primitive(type_def));
150                }
151            }
152            Ok(Event::End(_)) => {
153                depth -= 1;
154                if depth == 0 {
155                    break;
156                }
157            }
158            Ok(Event::Eof) => break,
159            Err(e) => return Err(ParseError::Xml(e)),
160            _ => {}
161        }
162        buf.clear();
163    }
164
165    Ok(())
166}
167
168/// Parses a primitive type definition (with content).
169fn parse_primitive_type(
170    reader: &mut Reader<&[u8]>,
171    e: &BytesStart<'_>,
172) -> Result<PrimitiveDef, ParseError> {
173    let mut type_def = parse_primitive_type_empty(e)?;
174    let mut buf = Vec::new();
175
176    // Read until end tag, capturing any constant value
177    loop {
178        match reader.read_event_into(&mut buf) {
179            Ok(Event::Text(ref t)) => {
180                let text = std::str::from_utf8(t.as_ref())?.trim();
181                if !text.is_empty() {
182                    type_def.constant_value = Some(text.to_string());
183                }
184            }
185            Ok(Event::End(_)) => break,
186            Ok(Event::Eof) => break,
187            Err(e) => return Err(ParseError::Xml(e)),
188            _ => {}
189        }
190        buf.clear();
191    }
192
193    Ok(type_def)
194}
195
196/// Parses a primitive type definition (empty element).
197fn parse_primitive_type_empty(e: &BytesStart<'_>) -> Result<PrimitiveDef, ParseError> {
198    let mut name = String::new();
199    let mut primitive_type: Option<PrimitiveType> = None;
200    let mut length: Option<usize> = None;
201    let mut null_value = None;
202    let mut min_value = None;
203    let mut max_value = None;
204    let mut character_encoding = None;
205    let mut semantic_type = None;
206    let mut description = None;
207
208    for attr in e.attributes().flatten() {
209        let key = std::str::from_utf8(attr.key.as_ref())?;
210        let value = std::str::from_utf8(&attr.value)?;
211
212        match key {
213            "name" => name = value.to_string(),
214            "primitiveType" => {
215                primitive_type = Some(
216                    PrimitiveType::from_sbe_name(value)
217                        .ok_or_else(|| ParseError::invalid_attr("type", "primitiveType", value))?,
218                )
219            }
220            "length" => {
221                length = Some(
222                    value
223                        .parse()
224                        .map_err(|_| ParseError::invalid_attr("type", "length", value))?,
225                )
226            }
227            "nullValue" => null_value = Some(value.to_string()),
228            "minValue" => min_value = Some(value.to_string()),
229            "maxValue" => max_value = Some(value.to_string()),
230            "characterEncoding" => character_encoding = Some(value.to_string()),
231            "semanticType" => semantic_type = Some(value.to_string()),
232            "description" => description = Some(value.to_string()),
233            _ => {}
234        }
235    }
236
237    let primitive_type =
238        primitive_type.ok_or_else(|| ParseError::missing_attr("type", "primitiveType"))?;
239
240    let mut type_def = PrimitiveDef::new(name, primitive_type);
241    type_def.length = length;
242    type_def.null_value = null_value;
243    type_def.min_value = min_value;
244    type_def.max_value = max_value;
245    type_def.character_encoding = character_encoding;
246    type_def.semantic_type = semantic_type;
247    type_def.description = description;
248
249    Ok(type_def)
250}
251
252/// Parses a composite type definition.
253fn parse_composite(
254    reader: &mut Reader<&[u8]>,
255    e: &BytesStart<'_>,
256) -> Result<CompositeDef, ParseError> {
257    let mut name = String::new();
258    let mut description = None;
259    let mut semantic_type = None;
260
261    for attr in e.attributes().flatten() {
262        let key = std::str::from_utf8(attr.key.as_ref())?;
263        let value = std::str::from_utf8(&attr.value)?;
264
265        match key {
266            "name" => name = value.to_string(),
267            "description" => description = Some(value.to_string()),
268            "semanticType" => semantic_type = Some(value.to_string()),
269            _ => {}
270        }
271    }
272
273    let mut composite = CompositeDef::new(name);
274    composite.description = description;
275    composite.semantic_type = semantic_type;
276
277    let mut buf = Vec::new();
278    let mut current_offset = 0;
279
280    loop {
281        match reader.read_event_into(&mut buf) {
282            Ok(Event::Start(ref e)) | Ok(Event::Empty(ref e)) => {
283                let name_bytes = e.name().as_ref().to_vec();
284                let tag_name = std::str::from_utf8(&name_bytes)?;
285                if tag_name == "type" {
286                    let field = parse_composite_field(e, current_offset)?;
287                    current_offset += field.encoded_length;
288                    composite.add_field(field);
289                }
290            }
291            Ok(Event::End(_)) => break,
292            Ok(Event::Eof) => break,
293            Err(e) => return Err(ParseError::Xml(e)),
294            _ => {}
295        }
296        buf.clear();
297    }
298
299    Ok(composite)
300}
301
302/// Parses a field within a composite type.
303fn parse_composite_field(
304    e: &BytesStart<'_>,
305    default_offset: usize,
306) -> Result<CompositeField, ParseError> {
307    let mut name = String::new();
308    let mut primitive_type: Option<PrimitiveType> = None;
309    let mut offset = None;
310    let mut semantic_type = None;
311    let mut description = None;
312    let constant_value = None;
313
314    for attr in e.attributes().flatten() {
315        let key = std::str::from_utf8(attr.key.as_ref())?;
316        let value = std::str::from_utf8(&attr.value)?;
317
318        match key {
319            "name" => name = value.to_string(),
320            "primitiveType" => {
321                primitive_type = Some(
322                    PrimitiveType::from_sbe_name(value)
323                        .ok_or_else(|| ParseError::invalid_attr("type", "primitiveType", value))?,
324                )
325            }
326            "offset" => {
327                offset = Some(
328                    value
329                        .parse()
330                        .map_err(|_| ParseError::invalid_attr("type", "offset", value))?,
331                )
332            }
333            "semanticType" => semantic_type = Some(value.to_string()),
334            "description" => description = Some(value.to_string()),
335            "presence" if value == "constant" => {}
336            _ => {}
337        }
338    }
339
340    let prim = primitive_type.ok_or_else(|| ParseError::missing_attr("type", "primitiveType"))?;
341    let type_name = prim.sbe_name().to_string();
342    let encoded_length = prim.size();
343
344    let mut field = CompositeField::new(name, type_name, encoded_length);
345    field.primitive_type = Some(prim);
346    field.offset = offset.or(Some(default_offset));
347    field.semantic_type = semantic_type;
348    field.description = description;
349    field.constant_value = constant_value;
350
351    Ok(field)
352}
353
354/// Parses an enum type definition.
355fn parse_enum(reader: &mut Reader<&[u8]>, e: &BytesStart<'_>) -> Result<EnumDef, ParseError> {
356    let mut name = String::new();
357    let mut encoding_type: Option<PrimitiveType> = None;
358    let mut null_value = None;
359    let mut description = None;
360
361    for attr in e.attributes().flatten() {
362        let key = std::str::from_utf8(attr.key.as_ref())?;
363        let value = std::str::from_utf8(&attr.value)?;
364
365        match key {
366            "name" => name = value.to_string(),
367            "encodingType" => {
368                encoding_type = Some(
369                    PrimitiveType::from_sbe_name(value)
370                        .ok_or_else(|| ParseError::invalid_attr("enum", "encodingType", value))?,
371                )
372            }
373            "nullValue" => null_value = Some(value.to_string()),
374            "description" => description = Some(value.to_string()),
375            _ => {}
376        }
377    }
378
379    let encoding_type =
380        encoding_type.ok_or_else(|| ParseError::missing_attr("enum", "encodingType"))?;
381
382    let mut enum_def = EnumDef::new(name, encoding_type);
383    enum_def.null_value = null_value;
384    enum_def.description = description;
385
386    let mut buf = Vec::new();
387
388    loop {
389        match reader.read_event_into(&mut buf) {
390            Ok(Event::Start(ref e)) | Ok(Event::Empty(ref e)) => {
391                let name_bytes = e.name().as_ref().to_vec();
392                let tag_name = std::str::from_utf8(&name_bytes)?;
393                if tag_name == "validValue" {
394                    let value = parse_enum_value(reader, e)?;
395                    enum_def.add_value(value);
396                }
397            }
398            Ok(Event::End(_)) => break,
399            Ok(Event::Eof) => break,
400            Err(e) => return Err(ParseError::Xml(e)),
401            _ => {}
402        }
403        buf.clear();
404    }
405
406    Ok(enum_def)
407}
408
409/// Parses an enum valid value.
410fn parse_enum_value(
411    reader: &mut Reader<&[u8]>,
412    e: &BytesStart<'_>,
413) -> Result<EnumValue, ParseError> {
414    let mut name = String::new();
415    let mut description = None;
416    let mut since_version = None;
417    let mut deprecated = None;
418
419    for attr in e.attributes().flatten() {
420        let key = std::str::from_utf8(attr.key.as_ref())?;
421        let value = std::str::from_utf8(&attr.value)?;
422
423        match key {
424            "name" => name = value.to_string(),
425            "description" => description = Some(value.to_string()),
426            "sinceVersion" => since_version = value.parse().ok(),
427            "deprecated" => deprecated = value.parse().ok(),
428            _ => {}
429        }
430    }
431
432    // Read the value content
433    let mut buf = Vec::new();
434    let mut value_str = String::new();
435
436    loop {
437        match reader.read_event_into(&mut buf) {
438            Ok(Event::Text(ref t)) => {
439                value_str = std::str::from_utf8(t.as_ref())?.trim().to_string();
440            }
441            Ok(Event::End(_)) => break,
442            Ok(Event::Eof) => break,
443            Err(e) => return Err(ParseError::Xml(e)),
444            _ => {}
445        }
446        buf.clear();
447    }
448
449    let mut enum_value = EnumValue::new(name, value_str);
450    enum_value.description = description;
451    enum_value.since_version = since_version;
452    enum_value.deprecated = deprecated;
453
454    Ok(enum_value)
455}
456
457/// Parses a set (bitfield) type definition.
458fn parse_set(reader: &mut Reader<&[u8]>, e: &BytesStart<'_>) -> Result<SetDef, ParseError> {
459    let mut name = String::new();
460    let mut encoding_type: Option<PrimitiveType> = None;
461    let mut description = None;
462
463    for attr in e.attributes().flatten() {
464        let key = std::str::from_utf8(attr.key.as_ref())?;
465        let value = std::str::from_utf8(&attr.value)?;
466
467        match key {
468            "name" => name = value.to_string(),
469            "encodingType" => {
470                encoding_type = Some(
471                    PrimitiveType::from_sbe_name(value)
472                        .ok_or_else(|| ParseError::invalid_attr("set", "encodingType", value))?,
473                )
474            }
475            "description" => description = Some(value.to_string()),
476            _ => {}
477        }
478    }
479
480    let encoding_type =
481        encoding_type.ok_or_else(|| ParseError::missing_attr("set", "encodingType"))?;
482
483    let mut set_def = SetDef::new(name, encoding_type);
484    set_def.description = description;
485
486    let mut buf = Vec::new();
487
488    loop {
489        match reader.read_event_into(&mut buf) {
490            Ok(Event::Start(ref e)) | Ok(Event::Empty(ref e)) => {
491                let name_bytes = e.name().as_ref().to_vec();
492                let tag_name = std::str::from_utf8(&name_bytes)?;
493                if tag_name == "choice" {
494                    let choice = parse_set_choice(reader, e)?;
495                    set_def.add_choice(choice);
496                }
497            }
498            Ok(Event::End(_)) => break,
499            Ok(Event::Eof) => break,
500            Err(e) => return Err(ParseError::Xml(e)),
501            _ => {}
502        }
503        buf.clear();
504    }
505
506    Ok(set_def)
507}
508
509/// Parses a set choice.
510fn parse_set_choice(
511    reader: &mut Reader<&[u8]>,
512    e: &BytesStart<'_>,
513) -> Result<SetChoice, ParseError> {
514    let mut name = String::new();
515    let mut description = None;
516    let mut since_version = None;
517    let mut deprecated = None;
518
519    for attr in e.attributes().flatten() {
520        let key = std::str::from_utf8(attr.key.as_ref())?;
521        let value = std::str::from_utf8(&attr.value)?;
522
523        match key {
524            "name" => name = value.to_string(),
525            "description" => description = Some(value.to_string()),
526            "sinceVersion" => since_version = value.parse().ok(),
527            "deprecated" => deprecated = value.parse().ok(),
528            _ => {}
529        }
530    }
531
532    // Read the bit position content
533    let mut buf = Vec::new();
534    let mut bit_position: u8 = 0;
535
536    loop {
537        match reader.read_event_into(&mut buf) {
538            Ok(Event::Text(ref t)) => {
539                let text = std::str::from_utf8(t.as_ref())?.trim();
540                bit_position = text
541                    .parse()
542                    .map_err(|_| ParseError::invalid_attr("choice", "value", text))?;
543            }
544            Ok(Event::End(_)) => break,
545            Ok(Event::Eof) => break,
546            Err(e) => return Err(ParseError::Xml(e)),
547            _ => {}
548        }
549        buf.clear();
550    }
551
552    let mut choice = SetChoice::new(name, bit_position);
553    choice.description = description;
554    choice.since_version = since_version;
555    choice.deprecated = deprecated;
556
557    Ok(choice)
558}
559
560/// Parses a message definition.
561fn parse_message(
562    reader: &mut Reader<&[u8]>,
563    e: &BytesStart<'_>,
564    schema: &Schema,
565) -> Result<MessageDef, ParseError> {
566    let mut name = String::new();
567    let mut id: u16 = 0;
568    let mut block_length: u16 = 0;
569    let mut semantic_type = None;
570    let mut description = None;
571    let mut since_version = None;
572    let mut deprecated = None;
573
574    for attr in e.attributes().flatten() {
575        let key = std::str::from_utf8(attr.key.as_ref())?;
576        let value = std::str::from_utf8(&attr.value)?;
577
578        match key {
579            "name" => name = value.to_string(),
580            "id" => {
581                id = value
582                    .parse()
583                    .map_err(|_| ParseError::invalid_attr("message", "id", value))?
584            }
585            "blockLength" => {
586                block_length = value
587                    .parse()
588                    .map_err(|_| ParseError::invalid_attr("message", "blockLength", value))?
589            }
590            "semanticType" => semantic_type = Some(value.to_string()),
591            "description" => description = Some(value.to_string()),
592            "sinceVersion" => since_version = value.parse().ok(),
593            "deprecated" => deprecated = value.parse().ok(),
594            _ => {}
595        }
596    }
597
598    let mut msg = MessageDef::new(name, id, block_length);
599    msg.semantic_type = semantic_type;
600    msg.description = description;
601    msg.since_version = since_version;
602    msg.deprecated = deprecated;
603
604    let mut buf = Vec::new();
605
606    loop {
607        match reader.read_event_into(&mut buf) {
608            Ok(Event::Start(ref e)) | Ok(Event::Empty(ref e)) => {
609                let name_bytes = e.name().as_ref().to_vec();
610                let tag_name = std::str::from_utf8(&name_bytes)?;
611
612                match tag_name {
613                    "field" => {
614                        let field = parse_field(e, schema)?;
615                        msg.add_field(field);
616                    }
617                    "group" => {
618                        let group = parse_group(reader, e, schema)?;
619                        msg.add_group(group);
620                    }
621                    "data" => {
622                        let data = parse_data_field(e)?;
623                        msg.add_data_field(data);
624                    }
625                    _ => {}
626                }
627            }
628            Ok(Event::End(_)) => break,
629            Ok(Event::Eof) => break,
630            Err(e) => return Err(ParseError::Xml(e)),
631            _ => {}
632        }
633        buf.clear();
634    }
635
636    auto_compute_field_offsets(&mut msg.fields);
637
638    Ok(msg)
639}
640
641/// Parses a field definition.
642fn parse_field(e: &BytesStart<'_>, schema: &Schema) -> Result<FieldDef, ParseError> {
643    let mut name = String::new();
644    let mut id: u16 = 0;
645    let mut type_name = String::new();
646    let mut offset: usize = 0;
647    let mut presence = Presence::Required;
648    let mut semantic_type = None;
649    let mut description = None;
650    let mut since_version = None;
651    let mut deprecated = None;
652    let mut value_ref = None;
653
654    for attr in e.attributes().flatten() {
655        let key = std::str::from_utf8(attr.key.as_ref())?;
656        let value = std::str::from_utf8(&attr.value)?;
657
658        match key {
659            "name" => name = value.to_string(),
660            "id" => {
661                id = value
662                    .parse()
663                    .map_err(|_| ParseError::invalid_attr("field", "id", value))?
664            }
665            "type" => type_name = value.to_string(),
666            "offset" => {
667                offset = value
668                    .parse()
669                    .map_err(|_| ParseError::invalid_attr("field", "offset", value))?
670            }
671            "presence" => {
672                presence = Presence::parse(value)
673                    .ok_or_else(|| ParseError::invalid_attr("field", "presence", value))?
674            }
675            "semanticType" => semantic_type = Some(value.to_string()),
676            "description" => description = Some(value.to_string()),
677            "sinceVersion" => since_version = value.parse().ok(),
678            "deprecated" => deprecated = value.parse().ok(),
679            "valueRef" => value_ref = Some(value.to_string()),
680            _ => {}
681        }
682    }
683
684    let mut field = FieldDef::new(name, id, type_name.clone(), offset);
685    field.presence = presence;
686    field.semantic_type = semantic_type;
687    field.description = description;
688    field.since_version = since_version;
689    field.deprecated = deprecated;
690    field.value_ref = value_ref;
691
692    // Resolve encoded length from type
693    if let Some(type_def) = schema.get_type(&type_name) {
694        field.encoded_length = type_def.encoded_length();
695    }
696
697    Ok(field)
698}
699
700/// Parses a group definition.
701fn parse_group(
702    reader: &mut Reader<&[u8]>,
703    e: &BytesStart<'_>,
704    schema: &Schema,
705) -> Result<GroupDef, ParseError> {
706    let mut name = String::new();
707    let mut id: u16 = 0;
708    let mut block_length: u16 = 0;
709    let mut dimension_type = "groupSizeEncoding".to_string();
710    let mut description = None;
711    let mut since_version = None;
712    let mut deprecated = None;
713
714    for attr in e.attributes().flatten() {
715        let key = std::str::from_utf8(attr.key.as_ref())?;
716        let value = std::str::from_utf8(&attr.value)?;
717
718        match key {
719            "name" => name = value.to_string(),
720            "id" => {
721                id = value
722                    .parse()
723                    .map_err(|_| ParseError::invalid_attr("group", "id", value))?
724            }
725            "blockLength" => {
726                block_length = value
727                    .parse()
728                    .map_err(|_| ParseError::invalid_attr("group", "blockLength", value))?
729            }
730            "dimensionType" => dimension_type = value.to_string(),
731            "description" => description = Some(value.to_string()),
732            "sinceVersion" => since_version = value.parse().ok(),
733            "deprecated" => deprecated = value.parse().ok(),
734            _ => {}
735        }
736    }
737
738    let mut group = GroupDef::new(name, id, block_length);
739    group.dimension_type = dimension_type;
740    group.description = description;
741    group.since_version = since_version;
742    group.deprecated = deprecated;
743
744    let mut buf = Vec::new();
745
746    loop {
747        match reader.read_event_into(&mut buf) {
748            Ok(Event::Start(ref e)) | Ok(Event::Empty(ref e)) => {
749                let name_bytes = e.name().as_ref().to_vec();
750                let tag_name = std::str::from_utf8(&name_bytes)?;
751                match tag_name {
752                    "field" => {
753                        let field = parse_field(e, schema)?;
754                        group.add_field(field);
755                    }
756                    "group" => {
757                        let nested = parse_group(reader, e, schema)?;
758                        group.add_nested_group(nested);
759                    }
760                    "data" => {
761                        let data = parse_data_field(e)?;
762                        group.add_data_field(data);
763                    }
764                    _ => {}
765                }
766            }
767            Ok(Event::End(_)) => break,
768            Ok(Event::Eof) => break,
769            Err(e) => return Err(ParseError::Xml(e)),
770            _ => {}
771        }
772        buf.clear();
773    }
774
775    auto_compute_field_offsets(&mut group.fields);
776
777    Ok(group)
778}
779
780/// Auto-computes field offsets for fields that did not specify an explicit offset.
781///
782/// In SBE, the `offset` attribute on `<field>` elements is optional. When absent
783/// the parser defaults the offset to 0, which is only correct for the first field.
784/// This function walks the field list and, for any non-first field whose offset is
785/// still 0, assigns it the byte position immediately after the previous field.
786fn auto_compute_field_offsets(fields: &mut [FieldDef]) {
787    let mut running_offset = 0usize;
788    for field in fields.iter_mut() {
789        if running_offset > 0 && field.offset == 0 {
790            field.offset = running_offset;
791        }
792        running_offset = field.offset + field.encoded_length;
793    }
794}
795
796/// Parses a data (variable-length) field definition.
797fn parse_data_field(e: &BytesStart<'_>) -> Result<DataFieldDef, ParseError> {
798    let mut name = String::new();
799    let mut id: u16 = 0;
800    let mut type_name = String::new();
801    let mut description = None;
802    let mut since_version = None;
803    let mut deprecated = None;
804
805    for attr in e.attributes().flatten() {
806        let key = std::str::from_utf8(attr.key.as_ref())?;
807        let value = std::str::from_utf8(&attr.value)?;
808
809        match key {
810            "name" => name = value.to_string(),
811            "id" => {
812                id = value
813                    .parse()
814                    .map_err(|_| ParseError::invalid_attr("data", "id", value))?
815            }
816            "type" => type_name = value.to_string(),
817            "description" => description = Some(value.to_string()),
818            "sinceVersion" => since_version = value.parse().ok(),
819            "deprecated" => deprecated = value.parse().ok(),
820            _ => {}
821        }
822    }
823
824    let mut data = DataFieldDef::new(name, id, type_name);
825    data.description = description;
826    data.since_version = since_version;
827    data.deprecated = deprecated;
828
829    Ok(data)
830}
831
832/// Skips to the end of the current element.
833#[allow(dead_code)]
834fn skip_to_end(reader: &mut Reader<&[u8]>, _tag_name: &str) -> Result<(), ParseError> {
835    let mut buf = Vec::new();
836    let mut depth = 1;
837
838    loop {
839        match reader.read_event_into(&mut buf) {
840            Ok(Event::Start(_)) => depth += 1,
841            Ok(Event::End(_)) => {
842                depth -= 1;
843                if depth == 0 {
844                    break;
845                }
846            }
847            Ok(Event::Eof) => break,
848            Err(e) => return Err(ParseError::Xml(e)),
849            _ => {}
850        }
851        buf.clear();
852    }
853
854    Ok(())
855}
856
857#[cfg(test)]
858mod tests {
859    use super::*;
860
861    const SIMPLE_SCHEMA: &str = r#"<?xml version="1.0" encoding="UTF-8"?>
862<sbe:messageSchema xmlns:sbe="http://fixprotocol.io/2016/sbe"
863                   package="test"
864                   id="1"
865                   version="1"
866                   semanticVersion="1.0.0"
867                   byteOrder="littleEndian">
868    <types>
869        <type name="uint64" primitiveType="uint64"/>
870        <type name="Symbol" primitiveType="char" length="8"/>
871        <enum name="Side" encodingType="uint8">
872            <validValue name="Buy">1</validValue>
873            <validValue name="Sell">2</validValue>
874        </enum>
875    </types>
876    <sbe:message name="TestMessage" id="1" blockLength="16">
877        <field name="price" id="1" type="uint64" offset="0"/>
878        <field name="symbol" id="2" type="Symbol" offset="8"/>
879    </sbe:message>
880</sbe:messageSchema>"#;
881
882    #[test]
883    fn test_parse_simple_schema() {
884        let schema = parse_schema(SIMPLE_SCHEMA).expect("Failed to parse schema");
885
886        assert_eq!(schema.package, "test");
887        assert_eq!(schema.id, 1);
888        assert_eq!(schema.version, 1);
889        assert_eq!(schema.byte_order, ByteOrder::LittleEndian);
890    }
891
892    #[test]
893    fn test_parse_types() {
894        let schema = parse_schema(SIMPLE_SCHEMA).expect("Failed to parse schema");
895
896        assert!(schema.has_type("uint64"));
897        assert!(schema.has_type("Symbol"));
898        assert!(schema.has_type("Side"));
899
900        let symbol = schema.get_type("Symbol").unwrap();
901        assert!(symbol.is_primitive());
902        assert_eq!(symbol.encoded_length(), 8);
903
904        let side = schema.get_type("Side").unwrap();
905        assert!(side.is_enum());
906    }
907
908    #[test]
909    fn test_parse_message() {
910        let schema = parse_schema(SIMPLE_SCHEMA).expect("Failed to parse schema");
911
912        assert_eq!(schema.messages.len(), 1);
913        let msg = &schema.messages[0];
914        assert_eq!(msg.name, "TestMessage");
915        assert_eq!(msg.id, 1);
916        assert_eq!(msg.block_length, 16);
917        assert_eq!(msg.fields.len(), 2);
918    }
919
920    #[test]
921    fn test_group_field_offsets_auto_computed() {
922        let xml = r#"<?xml version="1.0" encoding="UTF-8"?>
923<sbe:messageSchema xmlns:sbe="http://fixprotocol.io/2016/sbe"
924                   package="test" id="1" version="1" byteOrder="littleEndian">
925    <types>
926        <type name="uint64" primitiveType="uint64"/>
927        <type name="uint32" primitiveType="uint32"/>
928    </types>
929    <sbe:message name="TestMsg" id="1" blockLength="0">
930        <group name="entries" id="100" dimensionType="groupSizeEncoding" blockLength="20">
931            <field name="orderId" id="1" type="uint64" offset="0"/>
932            <field name="instrumentId" id="2" type="uint32"/>
933            <field name="quantity" id="3" type="uint64"/>
934        </group>
935    </sbe:message>
936</sbe:messageSchema>"#;
937
938        let schema = parse_schema(xml).expect("Failed to parse schema");
939        let group = &schema.messages[0].groups[0];
940        assert_eq!(group.fields[0].name, "orderId");
941        assert_eq!(group.fields[0].offset, 0);
942        assert_eq!(group.fields[1].name, "instrumentId");
943        assert_eq!(group.fields[1].offset, 8);
944        assert_eq!(group.fields[2].name, "quantity");
945        assert_eq!(group.fields[2].offset, 12);
946    }
947
948    #[test]
949    fn test_group_field_offsets_explicit_preserved() {
950        let xml = r#"<?xml version="1.0" encoding="UTF-8"?>
951<sbe:messageSchema xmlns:sbe="http://fixprotocol.io/2016/sbe"
952                   package="test" id="1" version="1" byteOrder="littleEndian">
953    <types>
954        <type name="uint64" primitiveType="uint64"/>
955        <type name="uint32" primitiveType="uint32"/>
956    </types>
957    <sbe:message name="TestMsg" id="1" blockLength="0">
958        <group name="entries" id="100" dimensionType="groupSizeEncoding" blockLength="24">
959            <field name="orderId" id="1" type="uint64" offset="0"/>
960            <field name="instrumentId" id="2" type="uint32" offset="8"/>
961            <field name="quantity" id="3" type="uint64" offset="16"/>
962        </group>
963    </sbe:message>
964</sbe:messageSchema>"#;
965
966        let schema = parse_schema(xml).expect("Failed to parse schema");
967        let group = &schema.messages[0].groups[0];
968        assert_eq!(group.fields[0].offset, 0);
969        assert_eq!(group.fields[1].offset, 8);
970        assert_eq!(group.fields[2].offset, 16);
971    }
972}