nmea0183_parser/nmea0183/
mod.rs

1//! # NMEA 0183 Message Parser
2//!
3//! This module provides the main parsing functionality for NMEA 0183-style messages.
4//! It handles the standard NMEA 0183 format: `$HHH,D1,D2,...,Dn*CC\r\n`
5//!
6//! The parser is configurable to handle variations in:
7//! - Checksum requirements (required or optional)
8//! - Line ending requirements (CRLF required or forbidden)
9
10// TODO: Consider converting to Nmea0183 Builder pattern for more flexibility
11
12use nom::{
13    AsBytes, AsChar, Compare, Err, FindSubstring, Input, Parser,
14    branch::alt,
15    bytes::complete::{tag, take, take_until},
16    character::complete::{char, hex_digit0},
17    combinator::{opt, rest, rest_len, verify},
18    error::{ErrorKind, ParseError},
19    number::complete::hex_u32,
20    sequence::terminated,
21};
22
23use crate::{Error, IResult};
24
25/// Defines how the parser should handle NMEA message checksums.
26///
27/// NMEA 0183 messages can include an optional checksum in the format `*CC` where
28/// CC is a two-digit hexadecimal value representing the XOR of all bytes in the
29/// message content (excluding the '$' prefix and '*' delimiter).
30#[derive(Clone, Copy, PartialEq)]
31pub enum ChecksumMode {
32    /// Checksum is required and must be present.
33    ///
34    /// The parser will fail if no `*CC` checksum is found at the end of the message.
35    /// If a checksum is present, it will be validated against the calculated checksum.
36    ///
37    /// Use this mode for strict NMEA 0183 compliance or when data integrity is critical.
38    Required,
39
40    /// Checksum is optional but will be validated if present.
41    ///
42    /// The parser will accept messages both with and without checksums:
43    /// - If no checksum is present (`*CC` missing), parsing continues normally
44    /// - If a checksum is present, it must be valid or parsing will fail
45    ///
46    /// Use this mode when working with mixed message sources or legacy equipment
47    /// that may not always include checksums.
48    Optional,
49}
50
51/// Defines how the parser should handle CRLF line endings.
52///
53/// NMEA 0183 messages typically end with a carriage return and line feed (`\r\n`),
54/// but some systems or applications may omit these characters.
55#[derive(Clone, Copy, PartialEq)]
56pub enum LineEndingMode {
57    /// CRLF line ending is required and must be present.
58    ///
59    /// The parser will fail if the message does not end with `\r\n`.
60    /// This is the standard NMEA 0183 format for messages transmitted over
61    /// serial connections or stored in files.
62    ///
63    /// Use this mode when parsing standard NMEA log files or serial port data.
64    Required,
65
66    /// CRLF line ending is forbidden and must not be present.
67    ///
68    /// The parser will fail if the message ends with `\r\n`.
69    /// This mode is useful when parsing NMEA messages that have been processed
70    /// or transmitted through systems that strip line endings.
71    ///
72    /// Use this mode when parsing messages from APIs, databases, or other
73    /// sources where line endings have been removed.
74    Forbidden,
75}
76
77/// Creates a configurable NMEA 0183-style parser factory.
78///
79/// This struct allows you to configure the NMEA 0183 framing parser with different
80/// checksum and line ending modes before building the final parser.
81///
82/// It uses the builder pattern to allow for flexible configuration of the parser settings.
83///
84/// # Examples
85///
86/// ```rust
87/// use nmea0183_parser::{IResult, Nmea0183ParserBuilder};
88///
89/// fn content_parser(input: &str) -> IResult<&str, Vec<&str>> {
90///     Ok(("", input.split(',').collect()))
91/// }
92///
93/// // Create a parser with required checksum and CRLF
94/// let parser_factory = Nmea0183ParserBuilder::new();
95/// let mut parser = parser_factory.build(content_parser);
96/// ```
97///
98/// ## Configuration
99///
100/// ```rust
101/// use nmea0183_parser::{ChecksumMode, IResult, LineEndingMode, Nmea0183ParserBuilder};
102/// use nom::Parser;
103///
104/// fn content_parser(i: &str) -> IResult<&str, bool> {
105///     Ok((i, true))
106/// }
107///
108/// // Strict: checksum and CRLF both required
109/// let mut strict_parser = Nmea0183ParserBuilder::new()
110///     .checksum_mode(ChecksumMode::Required)
111///     .line_ending_mode(LineEndingMode::Required)
112///     .build(content_parser);
113/// assert!(strict_parser.parse("$GPGGA,data*6A\r\n").is_ok());
114/// assert!(strict_parser.parse("$GPGGA,data*6A").is_err()); // (missing CRLF)
115/// assert!(strict_parser.parse("$GPGGA,data\r\n").is_err()); // (missing checksum)
116///
117/// // Checksum required, no CRLF allowed
118/// let mut no_crlf_parser = Nmea0183ParserBuilder::new()
119///     .checksum_mode(ChecksumMode::Required)
120///     .line_ending_mode(LineEndingMode::Forbidden)
121///     .build(content_parser);
122/// assert!(no_crlf_parser.parse("$GPGGA,data*6A").is_ok());
123/// assert!(no_crlf_parser.parse("$GPGGA,data*6A\r\n").is_err()); // (CRLF present)
124/// assert!(no_crlf_parser.parse("$GPGGA,data").is_err()); // (missing checksum)
125///
126/// // Checksum optional, CRLF required
127/// let mut optional_checksum_parser = Nmea0183ParserBuilder::new()
128///     .checksum_mode(ChecksumMode::Optional)
129///     .line_ending_mode(LineEndingMode::Required)
130///     .build(content_parser);
131/// assert!(optional_checksum_parser.parse("$GPGGA,data*6A\r\n").is_ok()); // (with valid checksum)
132/// assert!(optional_checksum_parser.parse("$GPGGA,data\r\n").is_ok()); // (without checksum)
133/// assert!(optional_checksum_parser.parse("$GPGGA,data*99\r\n").is_err()); // (invalid checksum)
134/// assert!(optional_checksum_parser.parse("$GPGGA,data*6A").is_err()); // (missing CRLF)
135///
136/// // Lenient: checksum optional, CRLF forbidden
137/// let mut lenient_parser = Nmea0183ParserBuilder::new()
138///     .checksum_mode(ChecksumMode::Optional)
139///     .line_ending_mode(LineEndingMode::Forbidden)
140///     .build(content_parser);
141/// assert!(lenient_parser.parse("$GPGGA,data*6A").is_ok()); // (with valid checksum)
142/// assert!(lenient_parser.parse("$GPGGA,data").is_ok()); // (without checksum)
143/// assert!(lenient_parser.parse("$GPGGA,data*99").is_err()); // (invalid checksum)
144/// assert!(lenient_parser.parse("$GPGGA,data\r\n").is_err()); // (CRLF present)
145/// ```
146#[must_use]
147pub struct Nmea0183ParserBuilder {
148    /// Checksum mode for the parser.
149    checksum_mode: ChecksumMode,
150
151    /// Line ending mode for the parser.
152    line_ending_mode: LineEndingMode,
153}
154
155impl Nmea0183ParserBuilder {
156    /// Creates a new NMEA 0183 parser builder with default settings.
157    ///
158    /// The default settings are:
159    /// - Checksum mode: [`ChecksumMode::Required`]
160    /// - Line ending mode: [`LineEndingMode::Required`]
161    pub fn new() -> Self {
162        Nmea0183ParserBuilder {
163            checksum_mode: ChecksumMode::Required,
164            line_ending_mode: LineEndingMode::Required,
165        }
166    }
167
168    /// Sets the checksum mode for the parser.
169    ///
170    /// # Arguments
171    ///
172    /// * `mode` - The desired checksum mode:
173    ///   - [`ChecksumMode::Required`]: Checksum must be present and valid
174    ///   - [`ChecksumMode::Optional`]: Checksum may be absent or must be valid if present
175    pub fn checksum_mode(mut self, mode: ChecksumMode) -> Self {
176        self.checksum_mode = mode;
177        self
178    }
179
180    /// Sets the line ending mode for the parser.
181    ///
182    /// # Arguments
183    ///
184    /// * `mode` - The desired line ending mode:
185    ///   - [`LineEndingMode::Required`]: Message must end with `\r\n`
186    ///   - [`LineEndingMode::Forbidden`]: Message must not end with `\r\n`
187    pub fn line_ending_mode(mut self, mode: LineEndingMode) -> Self {
188        self.line_ending_mode = mode;
189        self
190    }
191
192    /// Builds the NMEA 0183-style parser with the configured settings.
193    ///
194    /// This method takes a user-provided parser function that will handle the
195    /// content of the message after the framing has been processed.
196    ///
197    /// The returned parser will:
198    /// * Validate that the input is ASCII-only
199    /// * Expect the message to start with `$`
200    /// * Extract the message content (everything before `*CC` or `\r\n`)
201    /// * Parse and validate the checksum using the provided checksum parser
202    /// * Call the user-provided parser on the message content
203    ///
204    /// # Arguments
205    ///
206    /// * `content_parser` - User-provided parser for the message content.
207    ///
208    /// # Returns
209    ///
210    /// A parser function that takes an input and returns a result containing the parsed content
211    /// or an error if the input does not conform to the expected NMEA 0183 format.
212    pub fn build<'a, I, O, F, E>(self, mut content_parser: F) -> impl FnMut(I) -> IResult<I, O, E>
213    where
214        I: Input + AsBytes + Compare<&'a str> + FindSubstring<&'a str>,
215        <I as Input>::Item: AsChar,
216        F: Parser<I, Output = O, Error = Error<I, E>>,
217        E: ParseError<I>,
218    {
219        move |i: I| {
220            if !i.as_bytes().is_ascii() {
221                return Err(nom::Err::Error(Error::NonAscii));
222            }
223
224            let (i, _) = char('$').parse(i)?;
225            let (cc, data) = alt((take_until("*"), take_until("\r\n"), rest)).parse(i)?;
226            let (_, cc) = checksum_crlf(self.checksum_mode, self.line_ending_mode).parse(cc)?;
227            let (data, calc_cc) = checksum(data);
228
229            if let Some(cc) = cc
230                && cc != calc_cc
231            {
232                return Err(nom::Err::Error(Error::ChecksumMismatch {
233                    expected: calc_cc,
234                    found: cc,
235                }));
236            }
237
238            content_parser.parse(data)
239        }
240    }
241}
242
243impl Default for Nmea0183ParserBuilder {
244    fn default() -> Self {
245        Nmea0183ParserBuilder::new()
246    }
247}
248
249/// Creates a parser for checksum and CRLF based on configuration.
250///
251/// This function returns a parser that can handle the end portion of NMEA messages,
252/// specifically the checksum (if present) and line ending (if present).
253///
254/// # Arguments
255///
256/// * `cc` - Checksum requirement:
257///   - [`ChecksumMode::Required`]: Parser will fail if no '*CC' is present
258///   - [`ChecksumMode::Optional`]: Parser accepts messages with or without '*CC',
259///     but validates checksum if present
260/// * `crlf` - CRLF requirement:
261///   - [`LineEndingMode::Required`]: Parser will fail if message doesn't end with `\r\n`
262///   - [`LineEndingMode::Forbidden`]: Parser will fail if message ends with `\r\n`
263///
264/// # Returns
265///
266/// A parser that extracts the checksum value ([`None`] if no checksum present).
267///
268/// # Message Format Expectations
269///
270/// - cc=[`ChecksumMode::Required`], crlf=[`LineEndingMode::Required`]: Expects `*CC\r\n`
271/// - cc=[`ChecksumMode::Required`], crlf=[`LineEndingMode::Forbidden`]: Expects `*CC`
272/// - cc=[`ChecksumMode::Optional`], crlf=[`LineEndingMode::Required`]: Expects `\r\n` or `*CC\r\n`
273/// - cc=[`ChecksumMode::Optional`], crlf=[`LineEndingMode::Forbidden`]: Expects nothing or `*CC`
274///
275/// # Examples
276///
277/// ```rust,ignore
278/// use nmea0183_parser::{ChecksumMode, IResult, LineEndingMode, checksum_crlf};
279/// use nom::Parser;
280///
281/// // Required checksum, required CRLF
282/// let mut parser = checksum_crlf(ChecksumMode::Required, LineEndingMode::Required);
283/// let result: IResult<_, _> = parser.parse("*51\r\n");
284/// assert_eq!(result, Ok(("", Some(0x51))));
285///
286/// // Optional checksum, forbidden CRLF
287/// let mut parser = checksum_crlf(ChecksumMode::Optional, LineEndingMode::Forbidden);
288/// let result1: IResult<_, _> = parser.parse("*51");     // With checksum
289/// let result2: IResult<_, _> = parser.parse("");        // Without checksum
290/// assert!(result1.is_ok());
291/// assert!(result2.is_ok());
292/// ```
293fn checksum_crlf<'a, I, E: ParseError<I>>(
294    cc: ChecksumMode,
295    le: LineEndingMode,
296) -> impl FnMut(I) -> nom::IResult<I, Option<u8>, E>
297where
298    I: Input + AsBytes + Compare<&'a str> + FindSubstring<&'a str>,
299    <I as Input>::Item: AsChar,
300{
301    move |i: I| {
302        let (i, _) = crlf(le).parse(i)?;
303
304        let (cc, parse_cc) = match cc {
305            ChecksumMode::Required => char('*').map(|_| true).parse(i)?,
306            ChecksumMode::Optional => opt(char('*')).map(|asterisk| asterisk.is_some()).parse(i)?,
307        };
308
309        if parse_cc {
310            let (_, cc) = consumed(take(2u8), ErrorKind::Count).parse(cc)?;
311            let (_, cc) = consumed(hex_digit0, ErrorKind::IsA).parse(cc)?;
312
313            hex_u32.map(|cc| Some(cc as u8)).parse(cc)
314        } else if cc.input_len() != 0 {
315            Err(Err::Error(E::from_error_kind(cc, ErrorKind::Count)))
316        } else {
317            Ok((cc, None))
318        }
319    }
320}
321
322/// Parses CRLF line endings based on configuration.
323///
324/// This function handles the parsing of carriage return and line feed characters
325/// at the end of NMEA messages, with support for both required and forbidden modes.
326///
327/// # Arguments
328///
329/// * `crlf` - CRLF requirement:
330///   - [`LineEndingMode::Required`]: Parser will fail if message doesn't end with `\r\n`
331///   - [`LineEndingMode::Forbidden`]: Parser will fail if message ends with `\r\n`
332///
333/// # Returns
334///
335/// A parser function that validates CRLF presence according to the configuration.
336///
337/// # Examples
338///
339/// ```rust,ignore
340/// use nmea0183_parser::{IResult, LineEndingMode, crlf};
341/// use nom::Parser;
342///
343/// // CRLF required
344/// let mut parser = crlf(LineEndingMode::Required);
345/// let result: IResult<_, _> = parser.parse("data\r\n");
346/// assert_eq!(result, Ok(("data", ())));
347///
348/// // CRLF forbidden
349/// let mut parser = crlf(LineEndingMode::Forbidden);
350/// let result: IResult<_, _> = parser.parse("data");
351/// assert_eq!(result, Ok(("data", ())));
352/// ```
353fn crlf<'a, I, E: ParseError<I>>(crlf: LineEndingMode) -> impl Fn(I) -> nom::IResult<I, (), E>
354where
355    I: Input + Compare<&'a str> + FindSubstring<&'a str>,
356{
357    move |i: I| {
358        let (i, data) = opt(take_until("\r\n")).parse(i)?;
359
360        let data = if crlf == LineEndingMode::Required {
361            match data {
362                Some(data) => {
363                    let (_, _) = consumed(tag("\r\n"), ErrorKind::CrLf).parse(i)?;
364                    data
365                }
366                None => {
367                    return Err(Err::Error(E::from_error_kind(i, ErrorKind::CrLf)));
368                }
369            }
370        } else if data.is_some() {
371            return Err(Err::Error(E::from_error_kind(i, ErrorKind::CrLf)));
372        } else {
373            i
374        };
375
376        Ok((data, ()))
377    }
378}
379
380/// Calculates the NMEA 0183 checksum for the given message content.
381///
382/// The NMEA 0183 checksum is calculated by performing an XOR (exclusive OR) operation
383/// on all bytes in the message content. This includes everything between the '$' prefix
384/// and the '*' checksum delimiter, but excludes both the '$' and '*' characters themselves.
385///
386/// # Algorithm
387///
388/// 1. Initialize checksum to 0
389/// 2. For each byte in the message content:
390///    - XOR the current checksum with the byte value
391/// 3. The final result is an 8-bit value (0-255)
392///
393/// # Arguments
394///
395/// * `input` - The message content to calculate checksum for (without '$' prefix or '*' delimiter)
396///
397/// # Returns
398///
399/// A tuple of (input, checksum) where:
400/// - `input` is returned unchanged (zero-copy)
401/// - `checksum` is the calculated XOR value as a u8
402///
403/// # NMEA 0183 Standard
404///
405/// According to the NMEA 0183 standard:
406/// - The checksum is represented as a two-digit hexadecimal number
407/// - It appears after the '*' character at the end of the sentence
408/// - Example: `$GPGGA,123456,data*41` where '41' is the hex representation of the checksum
409///
410/// # Performance Notes
411///
412/// This function uses `fold()` with XOR operation, which is:
413/// - Efficient for small to medium message sizes (typical NMEA messages are < 100 bytes)
414/// - Single-pass algorithm with O(n) time complexity
415/// - No memory allocation (zero-copy input handling)
416fn checksum<I>(input: I) -> (I, u8)
417where
418    I: Input + AsBytes,
419{
420    let calculated_checksum = input
421        .as_bytes()
422        .iter()
423        .fold(0u8, |accumulated_xor, &byte| accumulated_xor ^ byte);
424
425    (input, calculated_checksum)
426}
427
428/// Ensures that the parser consumes all input.
429///
430/// This is a convenience function for the common case of wanting to ensure that
431/// a parser consumes the entire input with no remainder.
432///
433/// # Arguments
434///
435/// * `f` - The parser to run
436/// * `e` - Error kind to return if input is not fully consumed
437///
438/// # Examples
439///
440/// ```ignore
441/// use nmea0183_parser::nmea0183::consumed;
442/// use nom::{IResult, Parser, bytes::complete::take, error::ErrorKind};
443///
444/// // Parse all 3 bytes
445/// let mut parser = consumed(take(3u8), ErrorKind::Count);
446/// let result: IResult<_, _> = parser.parse("abc");
447/// assert!(result.is_ok());
448///
449/// // This would fail because not all input is consumed
450/// let result = parser.parse("abcd");
451/// assert!(result.is_err());
452/// ```
453fn consumed<I, E: ParseError<I>, F>(
454    f: F,
455    e: ErrorKind,
456) -> impl Parser<I, Output = <F as Parser<I>>::Output, Error = E>
457where
458    I: Input,
459    F: Parser<I, Error = E>,
460{
461    terminated(
462        f,
463        verify(rest_len, |len| len == &0)
464            .or(move |i| Err(Err::Error(nom::error::make_error(i, e)))),
465    )
466}
467
468#[cfg(test)]
469mod tests {
470    mod cc_crlf00;
471    mod cc_crlf01;
472    mod cc_crlf10;
473    mod cc_crlf11;
474    mod crlf;
475}