Skip to main content

ironfix_tagvalue/
decoder.rs

1/******************************************************************************
2   Author: Joaquín Béjar García
3   Email: jb@taunais.com
4   Date: 27/1/26
5******************************************************************************/
6
7//! Zero-copy FIX message decoder.
8//!
9//! This module provides a high-performance decoder that parses FIX messages
10//! without allocating memory for field values. Field values are returned as
11//! references to the original buffer.
12
13use crate::checksum::{calculate_checksum, parse_checksum};
14use ironfix_core::error::DecodeError;
15use ironfix_core::field::FieldRef;
16use ironfix_core::message::{MsgType, RawMessage};
17use memchr::memchr;
18use smallvec::SmallVec;
19
20/// SOH (Start of Header) delimiter used in FIX messages.
21pub const SOH: u8 = 0x01;
22
23/// Equals sign delimiter between tag and value.
24pub const EQUALS: u8 = b'=';
25
26/// Zero-copy FIX message decoder.
27///
28/// The decoder parses FIX messages from a byte buffer, extracting fields
29/// as references to the original data without copying.
30#[derive(Debug)]
31pub struct Decoder<'a> {
32    /// Input buffer.
33    input: &'a [u8],
34    /// Current position in the buffer.
35    offset: usize,
36    /// Whether to validate checksums.
37    validate_checksum: bool,
38}
39
40impl<'a> Decoder<'a> {
41    /// Creates a new decoder for the given input buffer.
42    ///
43    /// # Arguments
44    /// * `input` - The FIX message bytes to decode
45    #[inline]
46    #[must_use]
47    pub const fn new(input: &'a [u8]) -> Self {
48        Self {
49            input,
50            offset: 0,
51            validate_checksum: true,
52        }
53    }
54
55    /// Sets whether to validate checksums during decoding.
56    ///
57    /// # Arguments
58    /// * `validate` - Whether to validate checksums
59    #[inline]
60    #[must_use]
61    pub const fn with_checksum_validation(mut self, validate: bool) -> Self {
62        self.validate_checksum = validate;
63        self
64    }
65
66    /// Decodes a complete FIX message from the buffer.
67    ///
68    /// # Returns
69    /// A `RawMessage` containing zero-copy references to the parsed fields.
70    ///
71    /// # Errors
72    /// Returns `DecodeError` if the message is malformed or incomplete.
73    pub fn decode(&mut self) -> Result<RawMessage<'a>, DecodeError> {
74        let start_offset = self.offset;
75
76        // Parse BeginString (tag 8)
77        let begin_string_field = self.next_field().ok_or(DecodeError::Incomplete)?;
78        if begin_string_field.tag != 8 {
79            return Err(DecodeError::InvalidBeginString);
80        }
81        let begin_string_start =
82            begin_string_field.value.as_ptr() as usize - self.input.as_ptr() as usize;
83        let begin_string_end = begin_string_start + begin_string_field.value.len();
84        let begin_string = begin_string_start..begin_string_end;
85
86        // Parse BodyLength (tag 9)
87        let body_length_field = self.next_field().ok_or(DecodeError::MissingBodyLength)?;
88        if body_length_field.tag != 9 {
89            return Err(DecodeError::MissingBodyLength);
90        }
91        let body_length: usize = body_length_field
92            .as_str()?
93            .parse()
94            .map_err(|_| DecodeError::InvalidBodyLength)?;
95
96        // Record body start position
97        let body_start = self.offset;
98
99        // Parse MsgType (tag 35) - should be first field in body
100        let msg_type_field = self.next_field().ok_or(DecodeError::MissingMsgType)?;
101        if msg_type_field.tag != 35 {
102            return Err(DecodeError::MissingMsgType);
103        }
104        let msg_type: MsgType = msg_type_field.as_str()?.parse().unwrap();
105
106        // Collect all fields
107        let mut fields: SmallVec<[FieldRef<'a>; 32]> = SmallVec::new();
108        fields.push(begin_string_field);
109        fields.push(body_length_field);
110        fields.push(msg_type_field);
111
112        // Parse remaining fields until checksum
113        let mut checksum_field: Option<FieldRef<'a>> = None;
114        while let Some(field) = self.next_field() {
115            if field.tag == 10 {
116                checksum_field = Some(field);
117                break;
118            }
119            fields.push(field);
120        }
121
122        // Validate checksum if enabled
123        if self.validate_checksum {
124            let checksum_ref = checksum_field.ok_or(DecodeError::Incomplete)?;
125            let declared = parse_checksum(checksum_ref.value).ok_or_else(|| {
126                DecodeError::InvalidFieldValue {
127                    tag: 10,
128                    reason: "invalid checksum format".to_string(),
129                }
130            })?;
131
132            // Calculate checksum of everything before the checksum field
133            let checksum_start =
134                checksum_ref.value.as_ptr() as usize - self.input.as_ptr() as usize - 3; // "10="
135            let calculated = calculate_checksum(&self.input[start_offset..checksum_start]);
136
137            if calculated != declared {
138                return Err(DecodeError::ChecksumMismatch {
139                    calculated,
140                    declared,
141                });
142            }
143        }
144
145        let body_end = body_start + body_length;
146        let body = body_start..body_end;
147
148        Ok(RawMessage::new(
149            &self.input[start_offset..self.offset],
150            begin_string,
151            body,
152            msg_type,
153            fields,
154        ))
155    }
156
157    /// Parses the next field from the buffer.
158    ///
159    /// # Returns
160    /// The next field, or `None` if the buffer is exhausted.
161    #[inline]
162    pub fn next_field(&mut self) -> Option<FieldRef<'a>> {
163        if self.offset >= self.input.len() {
164            return None;
165        }
166
167        let remaining = &self.input[self.offset..];
168
169        // Find '=' delimiter using SIMD-accelerated search
170        let eq_pos = memchr(EQUALS, remaining)?;
171        let tag_bytes = &remaining[..eq_pos];
172
173        // Parse tag number
174        let tag = parse_tag(tag_bytes)?;
175
176        // Find SOH delimiter
177        let value_start = eq_pos + 1;
178        let soh_pos = memchr(SOH, &remaining[value_start..])?;
179        let value = &remaining[value_start..value_start + soh_pos];
180
181        self.offset += value_start + soh_pos + 1;
182
183        Some(FieldRef::new(tag, value))
184    }
185
186    /// Returns the current offset in the buffer.
187    #[inline]
188    #[must_use]
189    pub const fn offset(&self) -> usize {
190        self.offset
191    }
192
193    /// Returns the remaining bytes in the buffer.
194    #[inline]
195    #[must_use]
196    pub fn remaining(&self) -> &'a [u8] {
197        &self.input[self.offset..]
198    }
199
200    /// Returns true if the buffer has been fully consumed.
201    #[inline]
202    #[must_use]
203    pub fn is_empty(&self) -> bool {
204        self.offset >= self.input.len()
205    }
206
207    /// Resets the decoder to the beginning of the buffer.
208    #[inline]
209    pub fn reset(&mut self) {
210        self.offset = 0;
211    }
212}
213
214/// Parses a tag number from ASCII bytes.
215///
216/// # Arguments
217/// * `bytes` - The ASCII bytes representing the tag number
218///
219/// # Returns
220/// The parsed tag number, or `None` if invalid.
221#[inline]
222fn parse_tag(bytes: &[u8]) -> Option<u32> {
223    if bytes.is_empty() || bytes.len() > 10 {
224        return None;
225    }
226
227    let mut result: u32 = 0;
228    for &b in bytes {
229        if !b.is_ascii_digit() {
230            return None;
231        }
232        result = result.checked_mul(10)?.checked_add((b - b'0') as u32)?;
233    }
234
235    Some(result)
236}
237
238#[cfg(test)]
239mod tests {
240    use super::*;
241
242    #[test]
243    fn test_parse_tag() {
244        assert_eq!(parse_tag(b"8"), Some(8));
245        assert_eq!(parse_tag(b"35"), Some(35));
246        assert_eq!(parse_tag(b"12345"), Some(12345));
247        assert_eq!(parse_tag(b""), None);
248        assert_eq!(parse_tag(b"abc"), None);
249        assert_eq!(parse_tag(b"12a"), None);
250    }
251
252    #[test]
253    fn test_next_field() {
254        let input = b"8=FIX.4.4\x019=5\x0135=0\x01";
255        let mut decoder = Decoder::new(input);
256
257        let field1 = decoder.next_field().unwrap();
258        assert_eq!(field1.tag, 8);
259        assert_eq!(field1.as_str().unwrap(), "FIX.4.4");
260
261        let field2 = decoder.next_field().unwrap();
262        assert_eq!(field2.tag, 9);
263        assert_eq!(field2.as_str().unwrap(), "5");
264
265        let field3 = decoder.next_field().unwrap();
266        assert_eq!(field3.tag, 35);
267        assert_eq!(field3.as_str().unwrap(), "0");
268
269        assert!(decoder.next_field().is_none());
270    }
271
272    #[test]
273    fn test_decoder_empty() {
274        let mut decoder = Decoder::new(b"");
275        assert!(decoder.next_field().is_none());
276        assert!(decoder.is_empty());
277    }
278
279    #[test]
280    fn test_decoder_incomplete() {
281        let input = b"8=FIX.4.4";
282        let mut decoder = Decoder::new(input);
283        assert!(decoder.next_field().is_none());
284    }
285}