Skip to main content

libmagic_rs/parser/
grammar.rs

1// Copyright (c) 2025-2026 the libmagic-rs contributors
2// SPDX-License-Identifier: Apache-2.0
3
4//! Grammar parsing for magic files using nom parser combinators
5//!
6//! This module implements the parsing logic for magic file syntax, converting
7//! text-based magic rules into the AST representation defined in ast.rs.
8
9use nom::{
10    IResult, Parser,
11    branch::alt,
12    bytes::complete::{tag, take_while},
13    character::complete::{char, digit1, hex_digit1, multispace0, none_of, one_of},
14    combinator::{map, opt, recognize},
15    error::Error as NomError,
16    multi::many0,
17    sequence::pair,
18};
19
20use crate::parser::ast::{
21    Endianness, MagicRule, OffsetSpec, Operator, StrengthModifier, TypeKind, Value,
22};
23
24/// Parse a decimal number with overflow protection
25fn parse_decimal_number(input: &str) -> IResult<&str, i64> {
26    let (input, digits) = digit1(input)?;
27
28    // Check for potential overflow before parsing
29    if digits.len() > 19 {
30        // i64::MAX has 19 digits, so anything longer will definitely overflow
31        return Err(nom::Err::Error(nom::error::Error::new(
32            input,
33            nom::error::ErrorKind::MapRes,
34        )));
35    }
36
37    let number = digits.parse::<i64>().map_err(|_| {
38        nom::Err::Error(nom::error::Error::new(input, nom::error::ErrorKind::MapRes))
39    })?;
40    Ok((input, number))
41}
42
43/// Parse a hexadecimal number (with 0x prefix) with overflow protection
44fn parse_hex_number(input: &str) -> IResult<&str, i64> {
45    let (input, _) = tag("0x")(input)?;
46    let (input, hex_str) = hex_digit1(input)?;
47
48    // Check for potential overflow - i64 can hold up to 16 hex digits (0x7FFFFFFFFFFFFFFF)
49    if hex_str.len() > 16 {
50        return Err(nom::Err::Error(nom::error::Error::new(
51            input,
52            nom::error::ErrorKind::MapRes,
53        )));
54    }
55
56    let number = i64::from_str_radix(hex_str, 16).map_err(|_| {
57        nom::Err::Error(nom::error::Error::new(input, nom::error::ErrorKind::MapRes))
58    })?;
59
60    Ok((input, number))
61}
62
63/// Parse a decimal or hexadecimal number
64///
65/// Supports both decimal (123, -456) and hexadecimal (0x1a2b, -0xFF) formats.
66///
67/// # Examples
68///
69/// ```
70/// use libmagic_rs::parser::grammar::parse_number;
71///
72/// assert_eq!(parse_number("123"), Ok(("", 123)));
73/// assert_eq!(parse_number("0x1a"), Ok(("", 26)));
74/// assert_eq!(parse_number("-42"), Ok(("", -42)));
75/// assert_eq!(parse_number("-0xFF"), Ok(("", -255)));
76/// ```
77///
78/// # Errors
79///
80/// Returns a nom parsing error if:
81/// - Input is empty or contains no valid digits
82/// - Hexadecimal number lacks proper "0x" prefix or contains invalid hex digits
83/// - Number cannot be parsed as a valid `i64` value
84/// - Input contains invalid characters for the detected number format
85pub fn parse_number(input: &str) -> IResult<&str, i64> {
86    let (input, sign) = opt(char('-')).parse(input)?;
87    let is_negative = sign.is_some();
88
89    // Check if input starts with "0x" - if so, it must be a valid hex number
90    let (input, number) = if input.starts_with("0x") {
91        parse_hex_number(input)?
92    } else {
93        parse_decimal_number(input)?
94    };
95
96    // Apply sign with overflow checking
97    let result = if is_negative {
98        number.checked_neg().ok_or_else(|| {
99            nom::Err::Error(nom::error::Error::new(input, nom::error::ErrorKind::MapRes))
100        })?
101    } else {
102        number
103    };
104
105    Ok((input, result))
106}
107
108/// Parse an offset specification for absolute offsets
109///
110/// Supports decimal and hexadecimal formats, both positive and negative.
111///
112/// # Examples
113///
114/// ```
115/// use libmagic_rs::parser::grammar::parse_offset;
116/// use libmagic_rs::parser::ast::OffsetSpec;
117///
118/// assert_eq!(parse_offset("0"), Ok(("", OffsetSpec::Absolute(0))));
119/// assert_eq!(parse_offset("123"), Ok(("", OffsetSpec::Absolute(123))));
120/// assert_eq!(parse_offset("0x10"), Ok(("", OffsetSpec::Absolute(16))));
121/// assert_eq!(parse_offset("-4"), Ok(("", OffsetSpec::Absolute(-4))));
122/// assert_eq!(parse_offset("-0xFF"), Ok(("", OffsetSpec::Absolute(-255))));
123/// ```
124///
125/// # Errors
126///
127/// Returns a nom parsing error if:
128/// - The input contains invalid number format (propagated from `parse_number`)
129/// - Input is empty or contains no parseable offset value
130/// - The offset value cannot be represented as a valid `i64`
131pub fn parse_offset(input: &str) -> IResult<&str, OffsetSpec> {
132    let (input, _) = multispace0(input)?;
133    let (input, offset_value) = parse_number(input)?;
134    let (input, _) = multispace0(input)?;
135
136    Ok((input, OffsetSpec::Absolute(offset_value)))
137}
138
139/// Parse comparison operators for magic rules
140///
141/// Supports both symbolic and text representations of operators:
142/// - `=` or `==` for equality
143/// - `!=` or `<>` for inequality
144/// - `&` for bitwise AND
145///
146/// # Examples
147///
148/// ```
149/// use libmagic_rs::parser::grammar::parse_operator;
150/// use libmagic_rs::parser::ast::Operator;
151///
152/// assert_eq!(parse_operator("="), Ok(("", Operator::Equal)));
153/// assert_eq!(parse_operator("=="), Ok(("", Operator::Equal)));
154/// assert_eq!(parse_operator("!="), Ok(("", Operator::NotEqual)));
155/// assert_eq!(parse_operator("<>"), Ok(("", Operator::NotEqual)));
156/// assert_eq!(parse_operator("&"), Ok(("", Operator::BitwiseAnd)));
157/// ```
158///
159/// # Errors
160///
161/// Returns a nom parsing error if:
162/// - Input does not start with a recognized operator symbol
163/// - Input is empty or contains no valid operator
164/// - Operator syntax is incomplete (e.g., just `!` without `=`)
165pub fn parse_operator(input: &str) -> IResult<&str, Operator> {
166    let (input, _) = multispace0(input)?;
167
168    // Try to parse each operator, starting with longer ones first
169    if let Ok((remaining, _)) = tag::<&str, &str, nom::error::Error<&str>>("==")(input) {
170        // Check that we don't have another '=' following (to reject "===")
171        if remaining.starts_with('=') {
172            return Err(nom::Err::Error(nom::error::Error::new(
173                input,
174                nom::error::ErrorKind::Tag,
175            )));
176        }
177        let (remaining, _) = multispace0(remaining)?;
178        return Ok((remaining, Operator::Equal));
179    }
180
181    if let Ok((remaining, _)) = tag::<&str, &str, nom::error::Error<&str>>("!=")(input) {
182        let (remaining, _) = multispace0(remaining)?;
183        return Ok((remaining, Operator::NotEqual));
184    }
185
186    if let Ok((remaining, _)) = tag::<&str, &str, nom::error::Error<&str>>("<>")(input) {
187        let (remaining, _) = multispace0(remaining)?;
188        return Ok((remaining, Operator::NotEqual));
189    }
190
191    if let Ok((remaining, _)) = tag::<&str, &str, nom::error::Error<&str>>("=")(input) {
192        // Check that we don't have another '=' following (to reject "==")
193        if remaining.starts_with('=') {
194            return Err(nom::Err::Error(nom::error::Error::new(
195                input,
196                nom::error::ErrorKind::Tag,
197            )));
198        }
199        let (remaining, _) = multispace0(remaining)?;
200        return Ok((remaining, Operator::Equal));
201    }
202
203    if let Ok((remaining, _)) = tag::<&str, &str, nom::error::Error<&str>>("&")(input) {
204        // Check that we don't have another '&' following (to reject "&&")
205        if remaining.starts_with('&') {
206            return Err(nom::Err::Error(nom::error::Error::new(
207                input,
208                nom::error::ErrorKind::Tag,
209            )));
210        }
211        let (remaining, _) = multispace0(remaining)?;
212        return Ok((remaining, Operator::BitwiseAnd));
213    }
214
215    // If no operator matches, return an error
216    Err(nom::Err::Error(nom::error::Error::new(
217        input,
218        nom::error::ErrorKind::Tag,
219    )))
220}
221
222/// Parse a single hex byte with \x prefix
223fn parse_hex_byte_with_prefix(input: &str) -> IResult<&str, u8> {
224    let (input, _) = tag("\\x")(input)?;
225    let (input, hex_str) = recognize(pair(
226        one_of("0123456789abcdefABCDEF"),
227        one_of("0123456789abcdefABCDEF"),
228    ))
229    .parse(input)?;
230    let byte_val = u8::from_str_radix(hex_str, 16)
231        .map_err(|_| nom::Err::Error(NomError::new(input, nom::error::ErrorKind::MapRes)))?;
232    Ok((input, byte_val))
233}
234
235/// Parse a hex byte sequence starting with \x prefix
236fn parse_hex_bytes_with_prefix(input: &str) -> IResult<&str, Vec<u8>> {
237    if input.starts_with("\\x") {
238        many0(parse_hex_byte_with_prefix).parse(input)
239    } else {
240        Err(nom::Err::Error(NomError::new(
241            input,
242            nom::error::ErrorKind::Tag,
243        )))
244    }
245}
246
247/// Parse a mixed hex and ASCII sequence (like \x7fELF)
248fn parse_mixed_hex_ascii(input: &str) -> IResult<&str, Vec<u8>> {
249    // Must start with \ to be considered an escape sequence
250    if !input.starts_with('\\') {
251        return Err(nom::Err::Error(NomError::new(
252            input,
253            nom::error::ErrorKind::Tag,
254        )));
255    }
256
257    let mut bytes = Vec::new();
258    let mut remaining = input;
259
260    while !remaining.is_empty() {
261        // Try to parse escape sequences first (hex, octal, etc.)
262        if let Ok((new_remaining, escaped_char)) = parse_escape_sequence(remaining) {
263            bytes.push(escaped_char as u8);
264            remaining = new_remaining;
265        } else if let Ok((new_remaining, hex_byte)) = parse_hex_byte_with_prefix(remaining) {
266            bytes.push(hex_byte);
267            remaining = new_remaining;
268        } else if let Ok((new_remaining, ascii_char)) =
269            none_of::<&str, &str, NomError<&str>>(" \t\n\r")(remaining)
270        {
271            // Parse regular ASCII character (not whitespace)
272            bytes.push(ascii_char as u8);
273            remaining = new_remaining;
274        } else {
275            // Stop if we can't parse anything more
276            break;
277        }
278    }
279
280    if bytes.is_empty() {
281        Err(nom::Err::Error(NomError::new(
282            input,
283            nom::error::ErrorKind::Tag,
284        )))
285    } else {
286        Ok((remaining, bytes))
287    }
288}
289
290/// Parse a hex byte sequence without prefix (only if it looks like pure hex bytes)
291fn parse_hex_bytes_no_prefix(input: &str) -> IResult<&str, Vec<u8>> {
292    // Only parse as hex bytes if:
293    // 1. Input has even number of hex digits (pairs)
294    // 2. All characters are hex digits
295    // 3. Doesn't start with 0x (that's a number)
296    // 4. Contains at least one non-decimal digit (a-f, A-F)
297
298    if input.starts_with("0x") || input.starts_with('-') {
299        return Err(nom::Err::Error(NomError::new(
300            input,
301            nom::error::ErrorKind::Tag,
302        )));
303    }
304
305    let hex_chars: String = input.chars().take_while(char::is_ascii_hexdigit).collect();
306
307    if hex_chars.is_empty() || hex_chars.len() % 2 != 0 {
308        return Err(nom::Err::Error(NomError::new(
309            input,
310            nom::error::ErrorKind::Tag,
311        )));
312    }
313
314    // Check if it contains non-decimal hex digits (a-f, A-F)
315    let has_hex_letters = hex_chars
316        .chars()
317        .any(|c| matches!(c, 'a'..='f' | 'A'..='F'));
318    if !has_hex_letters {
319        return Err(nom::Err::Error(NomError::new(
320            input,
321            nom::error::ErrorKind::Tag,
322        )));
323    }
324
325    // Parse pairs of hex digits
326    let mut bytes = Vec::with_capacity(hex_chars.len() / 2);
327    let mut chars = hex_chars.chars();
328    while let (Some(c1), Some(c2)) = (chars.next(), chars.next()) {
329        // Avoid format! allocation by parsing digits directly
330        let digit1 = c1
331            .to_digit(16)
332            .ok_or_else(|| nom::Err::Error(NomError::new(input, nom::error::ErrorKind::MapRes)))?;
333        let digit2 = c2
334            .to_digit(16)
335            .ok_or_else(|| nom::Err::Error(NomError::new(input, nom::error::ErrorKind::MapRes)))?;
336        let byte_val = u8::try_from((digit1 << 4) | digit2)
337            .map_err(|_| nom::Err::Error(NomError::new(input, nom::error::ErrorKind::MapRes)))?;
338        bytes.push(byte_val);
339    }
340
341    let remaining = &input[hex_chars.len()..];
342    Ok((remaining, bytes))
343}
344
345/// Parse a hex byte sequence (e.g., "\\x7f\\x45\\x4c\\x46", "7f454c46", or "\\x7fELF")
346fn parse_hex_bytes(input: &str) -> IResult<&str, Vec<u8>> {
347    alt((
348        parse_mixed_hex_ascii,
349        parse_hex_bytes_with_prefix,
350        parse_hex_bytes_no_prefix,
351    ))
352    .parse(input)
353}
354
355/// Parse escape sequences in strings
356fn parse_escape_sequence(input: &str) -> IResult<&str, char> {
357    let (input, _) = char('\\')(input)?;
358
359    // Try to parse octal escape sequence first (\377, \123, etc.)
360    if let Ok((remaining, octal_str)) = recognize(pair(
361        one_of::<&str, &str, NomError<&str>>("0123"),
362        pair(
363            one_of::<&str, &str, NomError<&str>>("01234567"),
364            one_of::<&str, &str, NomError<&str>>("01234567"),
365        ),
366    ))
367    .parse(input)
368    {
369        if let Ok(octal_value) = u8::from_str_radix(octal_str, 8) {
370            return Ok((remaining, octal_value as char));
371        }
372    }
373
374    // Parse standard escape sequences
375    let (input, escaped_char) = one_of("nrt\\\"'0")(input)?;
376
377    let result_char = match escaped_char {
378        'n' => '\n',
379        'r' => '\r',
380        't' => '\t',
381        '\\' => '\\',
382        '"' => '"',
383        '\'' => '\'',
384        '0' => '\0',
385        _ => escaped_char, // Fallback for other characters
386    };
387
388    Ok((input, result_char))
389}
390
391/// Parse a quoted string with escape sequences
392fn parse_quoted_string(input: &str) -> IResult<&str, String> {
393    let (input, _) = multispace0(input)?;
394    let (input, _) = char('"')(input)?;
395
396    let mut result = String::new();
397    let mut remaining = input;
398
399    loop {
400        // Try to parse an escape sequence first
401        if let Ok((new_remaining, escaped_char)) = parse_escape_sequence(remaining) {
402            result.push(escaped_char);
403            remaining = new_remaining;
404            continue;
405        }
406
407        // If no escape sequence, try to parse a regular character (not quote or backslash)
408        if let Ok((new_remaining, regular_char)) =
409            none_of::<&str, &str, NomError<&str>>("\"\\")(remaining)
410        {
411            result.push(regular_char);
412            remaining = new_remaining;
413            continue;
414        }
415
416        // If neither worked, we should be at the closing quote
417        break;
418    }
419
420    let (remaining, _) = char('"')(remaining)?;
421    let (remaining, _) = multispace0(remaining)?;
422
423    Ok((remaining, result))
424}
425
426/// Parse a numeric value (integer)
427fn parse_numeric_value(input: &str) -> IResult<&str, Value> {
428    let (input, _) = multispace0(input)?;
429    let (input, number) = parse_number(input)?;
430    let (input, _) = multispace0(input)?;
431
432    // Convert to appropriate Value variant based on sign
433    let value = if number >= 0 {
434        Value::Uint(number.unsigned_abs())
435    } else {
436        Value::Int(number)
437    };
438
439    Ok((input, value))
440}
441
442/// Parse string and numeric literals for magic rule values
443///
444/// Supports:
445/// - Quoted strings with escape sequences: "Hello\nWorld", "ELF\0"
446/// - Numeric literals (decimal): 123, -456
447/// - Numeric literals (hexadecimal): 0x1a2b, -0xFF
448/// - Hex byte sequences: \\x7f\\x45\\x4c\\x46 or 7f454c46
449///
450/// # Examples
451///
452/// ```
453/// use libmagic_rs::parser::grammar::parse_value;
454/// use libmagic_rs::parser::ast::Value;
455///
456/// // String values
457/// assert_eq!(parse_value("\"Hello\""), Ok(("", Value::String("Hello".to_string()))));
458/// assert_eq!(parse_value("\"Line1\\nLine2\""), Ok(("", Value::String("Line1\nLine2".to_string()))));
459///
460/// // Numeric values
461/// assert_eq!(parse_value("123"), Ok(("", Value::Uint(123))));
462/// assert_eq!(parse_value("-456"), Ok(("", Value::Int(-456))));
463/// assert_eq!(parse_value("0x1a"), Ok(("", Value::Uint(26))));
464/// assert_eq!(parse_value("-0xFF"), Ok(("", Value::Int(-255))));
465///
466/// // Hex byte sequences
467/// assert_eq!(parse_value("\\x7f\\x45"), Ok(("", Value::Bytes(vec![0x7f, 0x45]))));
468/// ```
469///
470/// # Errors
471///
472/// Returns a nom parsing error if:
473/// - Input is empty or contains no valid value
474/// - Quoted string is not properly terminated
475/// - Numeric value cannot be parsed as a valid integer
476/// - Hex byte sequence contains invalid hex digits
477/// - Input contains invalid characters for the detected value format
478pub fn parse_value(input: &str) -> IResult<&str, Value> {
479    let (input, _) = multispace0(input)?;
480
481    // Handle empty input case - should fail for magic rules
482    if input.is_empty() {
483        return Err(nom::Err::Error(NomError::new(
484            input,
485            nom::error::ErrorKind::Tag,
486        )));
487    }
488
489    // Try to parse different value types in order of specificity
490    let (input, value) = alt((
491        // Try quoted string first
492        map(parse_quoted_string, Value::String),
493        // Try hex byte sequence before numeric (to catch patterns like "7f", "ab", "\\x7fELF", etc.)
494        map(parse_hex_bytes, Value::Bytes),
495        // Try numeric value last (for pure numbers like 0x123, 1, etc.)
496        parse_numeric_value,
497    ))
498    .parse(input)?;
499
500    Ok((input, value))
501}
502
503#[cfg(test)]
504mod tests {
505    use super::*;
506
507    /// Helper function to test parsing with various whitespace patterns
508    #[allow(dead_code)] // TODO: Use this helper in future whitespace tests
509    fn test_with_whitespace_variants<T, F>(input: &str, expected: &T, parser: F)
510    where
511        T: Clone + PartialEq + std::fmt::Debug,
512        F: Fn(&str) -> IResult<&str, T>,
513    {
514        // Test with various whitespace patterns - pre-allocate Vec with known capacity
515        let mut whitespace_variants = Vec::with_capacity(9);
516        whitespace_variants.extend([
517            format!(" {input}"),    // Leading space
518            format!("  {input}"),   // Leading spaces
519            format!("\t{input}"),   // Leading tab
520            format!("{input} "),    // Trailing space
521            format!("{input}  "),   // Trailing spaces
522            format!("{input}\t"),   // Trailing tab
523            format!(" {input} "),   // Both leading and trailing space
524            format!("  {input}  "), // Both leading and trailing spaces
525            format!("\t{input}\t"), // Both leading and trailing tabs
526        ]);
527
528        for variant in whitespace_variants {
529            assert_eq!(
530                parser(&variant),
531                Ok(("", expected.clone())),
532                "Failed to parse with whitespace: '{variant}'"
533            );
534        }
535    }
536
537    /// Helper function to test number parsing with remaining input
538    fn test_number_with_remaining_input() {
539        // Pre-allocate with known capacity for better performance
540        let test_cases = [
541            ("123abc", 123, "abc"),
542            ("0xFF rest", 255, " rest"),
543            ("-42 more", -42, " more"),
544            ("0x10,next", 16, ",next"),
545        ];
546
547        for (input, expected_num, expected_remaining) in test_cases {
548            assert_eq!(
549                parse_number(input),
550                Ok((expected_remaining, expected_num)),
551                "Failed to parse number with remaining input: '{input}'"
552            );
553        }
554    }
555
556    #[test]
557    fn test_parse_decimal_number() {
558        assert_eq!(parse_decimal_number("123"), Ok(("", 123)));
559        assert_eq!(parse_decimal_number("0"), Ok(("", 0)));
560        assert_eq!(parse_decimal_number("999"), Ok(("", 999)));
561
562        // Should fail on non-digits
563        assert!(parse_decimal_number("abc").is_err());
564        assert!(parse_decimal_number("").is_err());
565    }
566
567    #[test]
568    fn test_parse_hex_number() {
569        assert_eq!(parse_hex_number("0x0"), Ok(("", 0)));
570        assert_eq!(parse_hex_number("0x10"), Ok(("", 16)));
571        assert_eq!(parse_hex_number("0xFF"), Ok(("", 255)));
572        assert_eq!(parse_hex_number("0xabc"), Ok(("", 2748)));
573        assert_eq!(parse_hex_number("0xABC"), Ok(("", 2748)));
574
575        // Should fail without 0x prefix
576        assert!(parse_hex_number("FF").is_err());
577        assert!(parse_hex_number("10").is_err());
578
579        // Should fail on invalid hex digits
580        assert!(parse_hex_number("0xGG").is_err());
581    }
582
583    #[test]
584    fn test_parse_number_positive() {
585        // Decimal numbers
586        assert_eq!(parse_number("0"), Ok(("", 0)));
587        assert_eq!(parse_number("123"), Ok(("", 123)));
588        assert_eq!(parse_number("999"), Ok(("", 999)));
589
590        // Hexadecimal numbers
591        assert_eq!(parse_number("0x0"), Ok(("", 0)));
592        assert_eq!(parse_number("0x10"), Ok(("", 16)));
593        assert_eq!(parse_number("0xFF"), Ok(("", 255)));
594        assert_eq!(parse_number("0xabc"), Ok(("", 2748)));
595    }
596
597    #[test]
598    fn test_parse_number_negative() {
599        // Negative decimal numbers
600        assert_eq!(parse_number("-1"), Ok(("", -1)));
601        assert_eq!(parse_number("-123"), Ok(("", -123)));
602        assert_eq!(parse_number("-999"), Ok(("", -999)));
603
604        // Negative hexadecimal numbers
605        assert_eq!(parse_number("-0x1"), Ok(("", -1)));
606        assert_eq!(parse_number("-0x10"), Ok(("", -16)));
607        assert_eq!(parse_number("-0xFF"), Ok(("", -255)));
608        assert_eq!(parse_number("-0xabc"), Ok(("", -2748)));
609    }
610
611    #[test]
612    fn test_parse_number_edge_cases() {
613        // Zero with different formats
614        assert_eq!(parse_number("0"), Ok(("", 0)));
615        assert_eq!(parse_number("-0"), Ok(("", 0)));
616        assert_eq!(parse_number("0x0"), Ok(("", 0)));
617        assert_eq!(parse_number("-0x0"), Ok(("", 0)));
618
619        // Large numbers
620        assert_eq!(parse_number("2147483647"), Ok(("", 2_147_483_647))); // i32::MAX
621        assert_eq!(parse_number("-2147483648"), Ok(("", -2_147_483_648))); // i32::MIN
622        assert_eq!(parse_number("0x7FFFFFFF"), Ok(("", 2_147_483_647))); // i32::MAX in hex
623
624        // Should fail on invalid input
625        assert!(parse_number("").is_err());
626        assert!(parse_number("abc").is_err());
627        assert!(parse_number("0xGG").is_err());
628        assert!(parse_number("--123").is_err());
629    }
630
631    #[test]
632    fn test_parse_number_with_remaining_input() {
633        // Use helper function to reduce code duplication
634        test_number_with_remaining_input();
635    }
636
637    #[test]
638    fn test_parse_offset_absolute_positive() {
639        assert_eq!(parse_offset("0"), Ok(("", OffsetSpec::Absolute(0))));
640        assert_eq!(parse_offset("123"), Ok(("", OffsetSpec::Absolute(123))));
641        assert_eq!(parse_offset("999"), Ok(("", OffsetSpec::Absolute(999))));
642
643        // Hexadecimal offsets
644        assert_eq!(parse_offset("0x0"), Ok(("", OffsetSpec::Absolute(0))));
645        assert_eq!(parse_offset("0x10"), Ok(("", OffsetSpec::Absolute(16))));
646        assert_eq!(parse_offset("0xFF"), Ok(("", OffsetSpec::Absolute(255))));
647        assert_eq!(parse_offset("0xabc"), Ok(("", OffsetSpec::Absolute(2748))));
648    }
649
650    #[test]
651    fn test_parse_offset_absolute_negative() {
652        assert_eq!(parse_offset("-1"), Ok(("", OffsetSpec::Absolute(-1))));
653        assert_eq!(parse_offset("-123"), Ok(("", OffsetSpec::Absolute(-123))));
654        assert_eq!(parse_offset("-999"), Ok(("", OffsetSpec::Absolute(-999))));
655
656        // Negative hexadecimal offsets
657        assert_eq!(parse_offset("-0x1"), Ok(("", OffsetSpec::Absolute(-1))));
658        assert_eq!(parse_offset("-0x10"), Ok(("", OffsetSpec::Absolute(-16))));
659        assert_eq!(parse_offset("-0xFF"), Ok(("", OffsetSpec::Absolute(-255))));
660        assert_eq!(
661            parse_offset("-0xabc"),
662            Ok(("", OffsetSpec::Absolute(-2748)))
663        );
664    }
665
666    #[test]
667    fn test_parse_offset_with_whitespace() {
668        // Leading whitespace
669        assert_eq!(parse_offset(" 123"), Ok(("", OffsetSpec::Absolute(123))));
670        assert_eq!(parse_offset("  0x10"), Ok(("", OffsetSpec::Absolute(16))));
671        assert_eq!(parse_offset("\t-42"), Ok(("", OffsetSpec::Absolute(-42))));
672
673        // Trailing whitespace
674        assert_eq!(parse_offset("123 "), Ok(("", OffsetSpec::Absolute(123))));
675        assert_eq!(parse_offset("0x10  "), Ok(("", OffsetSpec::Absolute(16))));
676        assert_eq!(parse_offset("-42\t"), Ok(("", OffsetSpec::Absolute(-42))));
677
678        // Both leading and trailing whitespace
679        assert_eq!(parse_offset(" 123 "), Ok(("", OffsetSpec::Absolute(123))));
680        assert_eq!(parse_offset("  0x10  "), Ok(("", OffsetSpec::Absolute(16))));
681        assert_eq!(parse_offset("\t-42\t"), Ok(("", OffsetSpec::Absolute(-42))));
682    }
683
684    #[test]
685    fn test_parse_offset_with_remaining_input() {
686        // Should parse offset and leave remaining input
687        assert_eq!(
688            parse_offset("123 byte"),
689            Ok(("byte", OffsetSpec::Absolute(123)))
690        );
691        assert_eq!(parse_offset("0xFF ="), Ok(("=", OffsetSpec::Absolute(255))));
692        assert_eq!(
693            parse_offset("-42,next"),
694            Ok((",next", OffsetSpec::Absolute(-42)))
695        );
696        assert_eq!(
697            parse_offset("0x10\tlong"),
698            Ok(("long", OffsetSpec::Absolute(16)))
699        );
700    }
701
702    #[test]
703    fn test_parse_offset_edge_cases() {
704        // Zero with different formats
705        assert_eq!(parse_offset("0"), Ok(("", OffsetSpec::Absolute(0))));
706        assert_eq!(parse_offset("-0"), Ok(("", OffsetSpec::Absolute(0))));
707        assert_eq!(parse_offset("0x0"), Ok(("", OffsetSpec::Absolute(0))));
708        assert_eq!(parse_offset("-0x0"), Ok(("", OffsetSpec::Absolute(0))));
709
710        // Large offsets
711        assert_eq!(
712            parse_offset("2147483647"),
713            Ok(("", OffsetSpec::Absolute(2_147_483_647)))
714        );
715        assert_eq!(
716            parse_offset("-2147483648"),
717            Ok(("", OffsetSpec::Absolute(-2_147_483_648)))
718        );
719        assert_eq!(
720            parse_offset("0x7FFFFFFF"),
721            Ok(("", OffsetSpec::Absolute(2_147_483_647)))
722        );
723
724        // Should fail on invalid input
725        assert!(parse_offset("").is_err());
726        assert!(parse_offset("abc").is_err());
727        assert!(parse_offset("0xGG").is_err());
728        assert!(parse_offset("--123").is_err());
729    }
730
731    #[test]
732    fn test_parse_offset_common_magic_file_values() {
733        // Common offsets found in magic files
734        assert_eq!(parse_offset("0"), Ok(("", OffsetSpec::Absolute(0)))); // File start
735        assert_eq!(parse_offset("4"), Ok(("", OffsetSpec::Absolute(4)))); // After magic number
736        assert_eq!(parse_offset("16"), Ok(("", OffsetSpec::Absolute(16)))); // Common header offset
737        assert_eq!(parse_offset("0x10"), Ok(("", OffsetSpec::Absolute(16)))); // Same as above in hex
738        assert_eq!(parse_offset("512"), Ok(("", OffsetSpec::Absolute(512)))); // Sector boundary
739        assert_eq!(parse_offset("0x200"), Ok(("", OffsetSpec::Absolute(512)))); // Same in hex
740
741        // Negative offsets (from end of file)
742        assert_eq!(parse_offset("-4"), Ok(("", OffsetSpec::Absolute(-4)))); // 4 bytes from end
743        assert_eq!(parse_offset("-16"), Ok(("", OffsetSpec::Absolute(-16)))); // 16 bytes from end
744        assert_eq!(parse_offset("-0x10"), Ok(("", OffsetSpec::Absolute(-16)))); // Same in hex
745    }
746
747    #[test]
748    fn test_parse_offset_boundary_values() {
749        // Test boundary values that might cause issues
750        assert_eq!(parse_offset("1"), Ok(("", OffsetSpec::Absolute(1))));
751        assert_eq!(parse_offset("-1"), Ok(("", OffsetSpec::Absolute(-1))));
752
753        // Powers of 2 (common in binary formats)
754        assert_eq!(parse_offset("256"), Ok(("", OffsetSpec::Absolute(256))));
755        assert_eq!(parse_offset("0x100"), Ok(("", OffsetSpec::Absolute(256))));
756        assert_eq!(parse_offset("1024"), Ok(("", OffsetSpec::Absolute(1024))));
757        assert_eq!(parse_offset("0x400"), Ok(("", OffsetSpec::Absolute(1024))));
758
759        // Large but reasonable file offsets
760        assert_eq!(
761            parse_offset("1048576"),
762            Ok(("", OffsetSpec::Absolute(1_048_576)))
763        ); // 1MB
764        assert_eq!(
765            parse_offset("0x100000"),
766            Ok(("", OffsetSpec::Absolute(1_048_576)))
767        );
768    }
769
770    // Operator parsing tests
771    #[test]
772    fn test_parse_operator_equality() {
773        // Single equals sign
774        assert_eq!(parse_operator("="), Ok(("", Operator::Equal)));
775
776        // Double equals sign
777        assert_eq!(parse_operator("=="), Ok(("", Operator::Equal)));
778
779        // With whitespace
780        assert_eq!(parse_operator(" = "), Ok(("", Operator::Equal)));
781        assert_eq!(parse_operator("  ==  "), Ok(("", Operator::Equal)));
782        assert_eq!(parse_operator("\t=\t"), Ok(("", Operator::Equal)));
783    }
784
785    #[test]
786    fn test_parse_operator_inequality() {
787        // Not equals
788        assert_eq!(parse_operator("!="), Ok(("", Operator::NotEqual)));
789
790        // Alternative not equals syntax
791        assert_eq!(parse_operator("<>"), Ok(("", Operator::NotEqual)));
792
793        // With whitespace
794        assert_eq!(parse_operator(" != "), Ok(("", Operator::NotEqual)));
795        assert_eq!(parse_operator("  <>  "), Ok(("", Operator::NotEqual)));
796        assert_eq!(parse_operator("\t!=\t"), Ok(("", Operator::NotEqual)));
797    }
798
799    #[test]
800    fn test_parse_operator_bitwise_and() {
801        // Bitwise AND
802        assert_eq!(parse_operator("&"), Ok(("", Operator::BitwiseAnd)));
803
804        // With whitespace
805        assert_eq!(parse_operator(" & "), Ok(("", Operator::BitwiseAnd)));
806        assert_eq!(parse_operator("  &  "), Ok(("", Operator::BitwiseAnd)));
807        assert_eq!(parse_operator("\t&\t"), Ok(("", Operator::BitwiseAnd)));
808    }
809
810    #[test]
811    fn test_parse_operator_with_remaining_input() {
812        // Should parse operator and leave remaining input
813        assert_eq!(parse_operator("= 123"), Ok(("123", Operator::Equal)));
814        assert_eq!(
815            parse_operator("!= value"),
816            Ok(("value", Operator::NotEqual))
817        );
818        assert_eq!(parse_operator("& 0xFF"), Ok(("0xFF", Operator::BitwiseAnd)));
819        assert_eq!(
820            parse_operator("== \"string\""),
821            Ok(("\"string\"", Operator::Equal))
822        );
823        assert_eq!(parse_operator("<> test"), Ok(("test", Operator::NotEqual)));
824    }
825
826    #[test]
827    fn test_parse_operator_precedence() {
828        // Test that longer operators are matched first
829        // This ensures "==" is parsed as Equal, not "=" followed by "="
830        assert_eq!(parse_operator("=="), Ok(("", Operator::Equal)));
831        assert_eq!(parse_operator("== extra"), Ok(("extra", Operator::Equal)));
832
833        // Test that "!=" is parsed correctly, not as "!" followed by "="
834        assert_eq!(parse_operator("!="), Ok(("", Operator::NotEqual)));
835        assert_eq!(
836            parse_operator("!= extra"),
837            Ok(("extra", Operator::NotEqual))
838        );
839
840        // Test that "<>" is parsed correctly
841        assert_eq!(parse_operator("<>"), Ok(("", Operator::NotEqual)));
842        assert_eq!(
843            parse_operator("<> extra"),
844            Ok(("extra", Operator::NotEqual))
845        );
846    }
847
848    #[test]
849    fn test_parse_operator_invalid_input() {
850        // Should fail on invalid operators
851        assert!(parse_operator("").is_err());
852        assert!(parse_operator("abc").is_err());
853        assert!(parse_operator("123").is_err());
854        assert!(parse_operator(">").is_err());
855        assert!(parse_operator("<").is_err());
856        assert!(parse_operator("!").is_err());
857        assert!(parse_operator("===").is_err()); // Too many equals
858        assert!(parse_operator("&&").is_err()); // Double ampersand not supported
859    }
860
861    #[test]
862    fn test_parse_operator_edge_cases() {
863        // Test operators at start of various contexts - multispace0 consumes all whitespace
864        assert_eq!(parse_operator("=\n"), Ok(("", Operator::Equal)));
865        assert_eq!(parse_operator("!=\r\n"), Ok(("", Operator::NotEqual)));
866        assert_eq!(parse_operator("&\t\t"), Ok(("", Operator::BitwiseAnd)));
867
868        // Test with mixed whitespace
869        assert_eq!(parse_operator(" \t = \t "), Ok(("", Operator::Equal)));
870        assert_eq!(parse_operator("\t != \t"), Ok(("", Operator::NotEqual)));
871        assert_eq!(parse_operator(" \t& \t "), Ok(("", Operator::BitwiseAnd)));
872    }
873
874    #[test]
875    fn test_parse_operator_common_magic_file_patterns() {
876        // Test patterns commonly found in magic files
877        assert_eq!(
878            parse_operator("= 0x7f454c46"),
879            Ok(("0x7f454c46", Operator::Equal))
880        );
881        assert_eq!(parse_operator("!= 0"), Ok(("0", Operator::NotEqual)));
882        assert_eq!(
883            parse_operator("& 0xFF00"),
884            Ok(("0xFF00", Operator::BitwiseAnd))
885        );
886        assert_eq!(
887            parse_operator("== \"ELF\""),
888            Ok(("\"ELF\"", Operator::Equal))
889        );
890        assert_eq!(parse_operator("<> \"\""), Ok(("\"\"", Operator::NotEqual)));
891
892        // Test with various spacing patterns found in real magic files
893        assert_eq!(
894            parse_operator("=\t0x504b0304"),
895            Ok(("0x504b0304", Operator::Equal))
896        );
897        assert_eq!(parse_operator("!=  0"), Ok(("0", Operator::NotEqual)));
898        assert_eq!(
899            parse_operator("&   0xFFFF"),
900            Ok(("0xFFFF", Operator::BitwiseAnd))
901        );
902    }
903
904    #[test]
905    fn test_parse_operator_all_variants() {
906        // Ensure all operator variants are tested
907        let test_cases = [
908            ("=", Operator::Equal),
909            ("==", Operator::Equal),
910            ("!=", Operator::NotEqual),
911            ("<>", Operator::NotEqual),
912            ("&", Operator::BitwiseAnd),
913        ];
914
915        for (input, expected) in test_cases {
916            assert_eq!(
917                parse_operator(input),
918                Ok(("", expected)),
919                "Failed to parse operator: '{input}'"
920            );
921        }
922    }
923
924    // Value parsing tests
925    #[test]
926    fn test_parse_hex_bytes_with_backslash_x() {
927        // Single hex byte with \x prefix
928        assert_eq!(parse_hex_bytes("\\x7f"), Ok(("", vec![0x7f])));
929        assert_eq!(parse_hex_bytes("\\x45"), Ok(("", vec![0x45])));
930        assert_eq!(parse_hex_bytes("\\x00"), Ok(("", vec![0x00])));
931        assert_eq!(parse_hex_bytes("\\xFF"), Ok(("", vec![0xFF])));
932
933        // Multiple hex bytes with \x prefix
934        assert_eq!(
935            parse_hex_bytes("\\x7f\\x45\\x4c\\x46"),
936            Ok(("", vec![0x7f, 0x45, 0x4c, 0x46]))
937        );
938        assert_eq!(
939            parse_hex_bytes("\\x50\\x4b\\x03\\x04"),
940            Ok(("", vec![0x50, 0x4b, 0x03, 0x04]))
941        );
942    }
943
944    #[test]
945    fn test_parse_hex_bytes_without_prefix() {
946        // Single hex byte without prefix (only works if it contains hex letters)
947        assert_eq!(parse_hex_bytes("7f"), Ok(("", vec![0x7f])));
948        assert_eq!(
949            parse_hex_bytes("45"),
950            Err(nom::Err::Error(NomError::new(
951                "45",
952                nom::error::ErrorKind::Tag
953            )))
954        ); // No hex letters
955        assert_eq!(parse_hex_bytes("ab"), Ok(("", vec![0xab])));
956        assert_eq!(parse_hex_bytes("FF"), Ok(("", vec![0xFF])));
957
958        // Multiple hex bytes without prefix
959        assert_eq!(
960            parse_hex_bytes("7f454c46"),
961            Ok(("", vec![0x7f, 0x45, 0x4c, 0x46]))
962        );
963        assert_eq!(
964            parse_hex_bytes("504b0304"),
965            Ok(("", vec![0x50, 0x4b, 0x03, 0x04]))
966        );
967    }
968
969    #[test]
970    fn test_parse_hex_bytes_mixed_case() {
971        // Test mixed case hex digits
972        assert_eq!(parse_hex_bytes("aB"), Ok(("", vec![0xab])));
973        assert_eq!(parse_hex_bytes("Cd"), Ok(("", vec![0xcd])));
974        assert_eq!(parse_hex_bytes("\\xEf"), Ok(("", vec![0xef])));
975        assert_eq!(parse_hex_bytes("\\x1A"), Ok(("", vec![0x1a])));
976    }
977
978    #[test]
979    fn test_parse_hex_bytes_empty() {
980        // Empty input should return error (no hex bytes to parse)
981        assert_eq!(
982            parse_hex_bytes(""),
983            Err(nom::Err::Error(NomError::new(
984                "",
985                nom::error::ErrorKind::Tag
986            )))
987        );
988    }
989
990    #[test]
991    fn test_parse_hex_bytes_with_remaining_input() {
992        // Should parse hex bytes and leave remaining input
993        assert_eq!(
994            parse_hex_bytes("7f45 rest"),
995            Ok((" rest", vec![0x7f, 0x45]))
996        );
997        assert_eq!(
998            parse_hex_bytes("\\x50\\x4b next"),
999            Ok((" next", vec![0x50, 0x4b]))
1000        );
1001        assert_eq!(parse_hex_bytes("ab\""), Ok(("\"", vec![0xab])));
1002    }
1003
1004    #[test]
1005    fn test_parse_escape_sequence() {
1006        // Standard escape sequences
1007        assert_eq!(parse_escape_sequence("\\n"), Ok(("", '\n')));
1008        assert_eq!(parse_escape_sequence("\\r"), Ok(("", '\r')));
1009        assert_eq!(parse_escape_sequence("\\t"), Ok(("", '\t')));
1010        assert_eq!(parse_escape_sequence("\\\\"), Ok(("", '\\')));
1011        assert_eq!(parse_escape_sequence("\\\""), Ok(("", '"')));
1012        assert_eq!(parse_escape_sequence("\\'"), Ok(("", '\'')));
1013        assert_eq!(parse_escape_sequence("\\0"), Ok(("", '\0')));
1014    }
1015
1016    #[test]
1017    fn test_parse_escape_sequence_with_remaining() {
1018        // Should parse escape and leave remaining input
1019        assert_eq!(parse_escape_sequence("\\n rest"), Ok((" rest", '\n')));
1020        assert_eq!(parse_escape_sequence("\\t\""), Ok(("\"", '\t')));
1021    }
1022
1023    #[test]
1024    fn test_parse_escape_sequence_invalid() {
1025        // Should fail on invalid escape sequences
1026        assert!(parse_escape_sequence("n").is_err()); // Missing backslash
1027        assert!(parse_escape_sequence("\\").is_err()); // Incomplete escape
1028        assert!(parse_escape_sequence("").is_err()); // Empty input
1029    }
1030
1031    #[test]
1032    fn test_parse_quoted_string_simple() {
1033        // Simple quoted strings
1034        assert_eq!(
1035            parse_quoted_string("\"hello\""),
1036            Ok(("", "hello".to_string()))
1037        );
1038        assert_eq!(
1039            parse_quoted_string("\"world\""),
1040            Ok(("", "world".to_string()))
1041        );
1042        assert_eq!(parse_quoted_string("\"\""), Ok(("", String::new())));
1043    }
1044
1045    #[test]
1046    fn test_parse_quoted_string_with_escapes() {
1047        // Strings with escape sequences
1048        assert_eq!(
1049            parse_quoted_string("\"Hello\\nWorld\""),
1050            Ok(("", "Hello\nWorld".to_string()))
1051        );
1052        assert_eq!(
1053            parse_quoted_string("\"Tab\\tSeparated\""),
1054            Ok(("", "Tab\tSeparated".to_string()))
1055        );
1056        assert_eq!(
1057            parse_quoted_string("\"Quote: \\\"text\\\"\""),
1058            Ok(("", "Quote: \"text\"".to_string()))
1059        );
1060        assert_eq!(
1061            parse_quoted_string("\"Backslash: \\\\\""),
1062            Ok(("", "Backslash: \\".to_string()))
1063        );
1064        assert_eq!(
1065            parse_quoted_string("\"Null\\0terminated\""),
1066            Ok(("", "Null\0terminated".to_string()))
1067        );
1068    }
1069
1070    #[test]
1071    fn test_parse_quoted_string_with_whitespace() {
1072        // Strings with leading/trailing whitespace
1073        assert_eq!(
1074            parse_quoted_string(" \"hello\" "),
1075            Ok(("", "hello".to_string()))
1076        );
1077        assert_eq!(
1078            parse_quoted_string("\t\"world\"\t"),
1079            Ok(("", "world".to_string()))
1080        );
1081        assert_eq!(
1082            parse_quoted_string("  \"test\"  "),
1083            Ok(("", "test".to_string()))
1084        );
1085    }
1086
1087    #[test]
1088    fn test_parse_quoted_string_with_remaining_input() {
1089        // Should parse string and leave remaining input
1090        assert_eq!(
1091            parse_quoted_string("\"hello\" world"),
1092            Ok(("world", "hello".to_string()))
1093        );
1094        assert_eq!(
1095            parse_quoted_string("\"test\" = 123"),
1096            Ok(("= 123", "test".to_string()))
1097        );
1098    }
1099
1100    #[test]
1101    fn test_parse_quoted_string_invalid() {
1102        // Should fail on invalid quoted strings
1103        assert!(parse_quoted_string("hello").is_err()); // No quotes
1104        assert!(parse_quoted_string("\"hello").is_err()); // Missing closing quote
1105        assert!(parse_quoted_string("hello\"").is_err()); // Missing opening quote
1106        assert!(parse_quoted_string("").is_err()); // Empty input
1107    }
1108
1109    #[test]
1110    fn test_parse_numeric_value_positive() {
1111        // Positive integers
1112        assert_eq!(parse_numeric_value("0"), Ok(("", Value::Uint(0))));
1113        assert_eq!(parse_numeric_value("123"), Ok(("", Value::Uint(123))));
1114        assert_eq!(parse_numeric_value("999"), Ok(("", Value::Uint(999))));
1115
1116        // Positive hex values
1117        assert_eq!(parse_numeric_value("0x0"), Ok(("", Value::Uint(0))));
1118        assert_eq!(parse_numeric_value("0x10"), Ok(("", Value::Uint(16))));
1119        assert_eq!(parse_numeric_value("0xFF"), Ok(("", Value::Uint(255))));
1120        assert_eq!(parse_numeric_value("0xabc"), Ok(("", Value::Uint(2748))));
1121    }
1122
1123    #[test]
1124    fn test_parse_numeric_value_negative() {
1125        // Negative integers
1126        assert_eq!(parse_numeric_value("-1"), Ok(("", Value::Int(-1))));
1127        assert_eq!(parse_numeric_value("-123"), Ok(("", Value::Int(-123))));
1128        assert_eq!(parse_numeric_value("-999"), Ok(("", Value::Int(-999))));
1129
1130        // Negative hex values
1131        assert_eq!(parse_numeric_value("-0x1"), Ok(("", Value::Int(-1))));
1132        assert_eq!(parse_numeric_value("-0x10"), Ok(("", Value::Int(-16))));
1133        assert_eq!(parse_numeric_value("-0xFF"), Ok(("", Value::Int(-255))));
1134        assert_eq!(parse_numeric_value("-0xabc"), Ok(("", Value::Int(-2748))));
1135    }
1136
1137    #[test]
1138    fn test_parse_numeric_value_with_whitespace() {
1139        // With leading/trailing whitespace
1140        assert_eq!(parse_numeric_value(" 123 "), Ok(("", Value::Uint(123))));
1141        assert_eq!(parse_numeric_value("\t-456\t"), Ok(("", Value::Int(-456))));
1142        assert_eq!(parse_numeric_value("  0xFF  "), Ok(("", Value::Uint(255))));
1143    }
1144
1145    #[test]
1146    fn test_parse_numeric_value_with_remaining_input() {
1147        // Should parse number and leave remaining input (numeric parser consumes trailing whitespace)
1148        assert_eq!(
1149            parse_numeric_value("123 rest"),
1150            Ok(("rest", Value::Uint(123)))
1151        );
1152        assert_eq!(
1153            parse_numeric_value("-456 more"),
1154            Ok(("more", Value::Int(-456)))
1155        );
1156        assert_eq!(parse_numeric_value("0xFF)"), Ok((")", Value::Uint(255))));
1157    }
1158
1159    #[test]
1160    fn test_parse_value_string_literals() {
1161        // String value parsing
1162        assert_eq!(
1163            parse_value("\"hello\""),
1164            Ok(("", Value::String("hello".to_string())))
1165        );
1166        assert_eq!(
1167            parse_value("\"ELF\""),
1168            Ok(("", Value::String("ELF".to_string())))
1169        );
1170        assert_eq!(parse_value("\"\""), Ok(("", Value::String(String::new()))));
1171
1172        // String with escape sequences
1173        assert_eq!(
1174            parse_value("\"Line1\\nLine2\""),
1175            Ok(("", Value::String("Line1\nLine2".to_string())))
1176        );
1177        assert_eq!(
1178            parse_value("\"Tab\\tSeparated\""),
1179            Ok(("", Value::String("Tab\tSeparated".to_string())))
1180        );
1181        assert_eq!(
1182            parse_value("\"Null\\0Term\""),
1183            Ok(("", Value::String("Null\0Term".to_string())))
1184        );
1185    }
1186
1187    #[test]
1188    fn test_parse_value_numeric_literals() {
1189        // Positive integers
1190        assert_eq!(parse_value("0"), Ok(("", Value::Uint(0))));
1191        assert_eq!(parse_value("123"), Ok(("", Value::Uint(123))));
1192        assert_eq!(parse_value("999"), Ok(("", Value::Uint(999))));
1193
1194        // Negative integers
1195        assert_eq!(parse_value("-1"), Ok(("", Value::Int(-1))));
1196        assert_eq!(parse_value("-123"), Ok(("", Value::Int(-123))));
1197        assert_eq!(parse_value("-999"), Ok(("", Value::Int(-999))));
1198
1199        // Hexadecimal values
1200        assert_eq!(parse_value("0x0"), Ok(("", Value::Uint(0))));
1201        assert_eq!(parse_value("0x10"), Ok(("", Value::Uint(16))));
1202        assert_eq!(parse_value("0xFF"), Ok(("", Value::Uint(255))));
1203        assert_eq!(parse_value("-0xFF"), Ok(("", Value::Int(-255))));
1204    }
1205
1206    #[test]
1207    fn test_parse_value_hex_byte_sequences() {
1208        // Hex bytes with \x prefix
1209        assert_eq!(parse_value("\\x7f"), Ok(("", Value::Bytes(vec![0x7f]))));
1210        assert_eq!(
1211            parse_value("\\x7f\\x45\\x4c\\x46"),
1212            Ok(("", Value::Bytes(vec![0x7f, 0x45, 0x4c, 0x46])))
1213        );
1214
1215        // Hex bytes without prefix
1216        assert_eq!(parse_value("7f"), Ok(("", Value::Bytes(vec![0x7f]))));
1217        assert_eq!(
1218            parse_value("7f454c46"),
1219            Ok(("", Value::Bytes(vec![0x7f, 0x45, 0x4c, 0x46])))
1220        );
1221
1222        // Mixed case
1223        assert_eq!(parse_value("aB"), Ok(("", Value::Bytes(vec![0xab]))));
1224        assert_eq!(parse_value("\\xCd"), Ok(("", Value::Bytes(vec![0xcd]))));
1225    }
1226
1227    #[test]
1228    fn test_parse_value_with_whitespace() {
1229        // All value types with whitespace - trailing whitespace is consumed by individual parsers
1230        assert_eq!(
1231            parse_value(" \"hello\" "),
1232            Ok(("", Value::String("hello".to_string())))
1233        );
1234        assert_eq!(parse_value("  123  "), Ok(("", Value::Uint(123))));
1235        assert_eq!(parse_value("\t-456\t"), Ok(("", Value::Int(-456))));
1236        // Hex bytes don't consume trailing whitespace by themselves
1237        assert_eq!(
1238            parse_value("  \\x7f\\x45  "),
1239            Ok(("  ", Value::Bytes(vec![0x7f, 0x45])))
1240        );
1241    }
1242
1243    #[test]
1244    fn test_parse_value_with_remaining_input() {
1245        // Should parse value and leave remaining input
1246        // Note: Individual parsers handle whitespace differently
1247        assert_eq!(
1248            parse_value("\"hello\" world"),
1249            Ok(("world", Value::String("hello".to_string())))
1250        );
1251        assert_eq!(
1252            parse_value("123 rest"),
1253            Ok(("rest", Value::Uint(123))) // Numeric parser consumes trailing space
1254        );
1255        assert_eq!(
1256            parse_value("-456 more"),
1257            Ok(("more", Value::Int(-456))) // Numeric parser consumes trailing space
1258        );
1259        assert_eq!(
1260            parse_value("\\x7f\\x45 next"),
1261            Ok((" next", Value::Bytes(vec![0x7f, 0x45]))) // Hex bytes don't consume trailing space
1262        );
1263    }
1264
1265    #[test]
1266    fn test_parse_value_edge_cases() {
1267        // Zero values in different formats
1268        assert_eq!(parse_value("0"), Ok(("", Value::Uint(0))));
1269        assert_eq!(parse_value("-0"), Ok(("", Value::Uint(0))));
1270        assert_eq!(parse_value("0x0"), Ok(("", Value::Uint(0))));
1271        assert_eq!(parse_value("-0x0"), Ok(("", Value::Uint(0))));
1272
1273        // Large values
1274        assert_eq!(
1275            parse_value("2147483647"),
1276            Ok(("", Value::Uint(2_147_483_647)))
1277        );
1278        assert_eq!(
1279            parse_value("-2147483648"),
1280            Ok(("", Value::Int(-2_147_483_648)))
1281        );
1282        assert_eq!(
1283            parse_value("0x7FFFFFFF"),
1284            Ok(("", Value::Uint(2_147_483_647)))
1285        );
1286
1287        // Empty input should fail
1288        assert!(parse_value("").is_err());
1289    }
1290
1291    #[test]
1292    fn test_parse_value_invalid_input() {
1293        // Should fail on completely invalid input
1294        assert!(parse_value("xyz").is_err()); // Not a valid value format
1295        assert!(parse_value("0xGG").is_err()); // Invalid hex digits
1296        assert!(parse_value("\"unclosed").is_err()); // Unclosed string
1297        assert!(parse_value("--123").is_err()); // Invalid number format
1298    }
1299
1300    #[test]
1301    fn test_parse_value_common_magic_file_patterns() {
1302        // Test patterns commonly found in magic files
1303        assert_eq!(
1304            parse_value("0x7f454c46"),
1305            Ok(("", Value::Uint(0x7f45_4c46)))
1306        );
1307        assert_eq!(
1308            parse_value("\"ELF\""),
1309            Ok(("", Value::String("ELF".to_string())))
1310        );
1311        assert_eq!(
1312            parse_value("\\x50\\x4b\\x03\\x04"),
1313            Ok(("", Value::Bytes(vec![0x50, 0x4b, 0x03, 0x04])))
1314        );
1315        assert_eq!(
1316            parse_value("\"\\377ELF\""),
1317            Ok(("", Value::String("\u{00ff}ELF".to_string())))
1318        );
1319        assert_eq!(parse_value("0"), Ok(("", Value::Uint(0))));
1320        assert_eq!(parse_value("-1"), Ok(("", Value::Int(-1))));
1321    }
1322
1323    #[test]
1324    fn test_parse_value_type_precedence() {
1325        // Test that parsing precedence works correctly
1326        // Quoted strings should be parsed as strings, not hex bytes
1327        assert_eq!(
1328            parse_value("\"7f\""),
1329            Ok(("", Value::String("7f".to_string())))
1330        );
1331
1332        // Hex patterns should be parsed as bytes when not quoted
1333        assert_eq!(parse_value("7f"), Ok(("", Value::Bytes(vec![0x7f]))));
1334
1335        // Numbers should be parsed as numbers when they don't look like hex bytes
1336        assert_eq!(parse_value("123"), Ok(("", Value::Uint(123))));
1337        assert_eq!(parse_value("-123"), Ok(("", Value::Int(-123))));
1338
1339        // Hex numbers with 0x prefix should be parsed as numbers
1340        assert_eq!(parse_value("0x123"), Ok(("", Value::Uint(0x123))));
1341    }
1342
1343    #[test]
1344    fn test_parse_value_boundary_conditions() {
1345        // Test boundary conditions for different value types
1346
1347        // Single character strings
1348        assert_eq!(
1349            parse_value("\"a\""),
1350            Ok(("", Value::String("a".to_string())))
1351        );
1352        assert_eq!(
1353            parse_value("\"1\""),
1354            Ok(("", Value::String("1".to_string())))
1355        );
1356
1357        // Single hex byte
1358        assert_eq!(parse_value("ab"), Ok(("", Value::Bytes(vec![0xab]))));
1359        assert_eq!(parse_value("\\x00"), Ok(("", Value::Bytes(vec![0x00]))));
1360
1361        // Minimum and maximum values
1362        assert_eq!(parse_value("1"), Ok(("", Value::Uint(1))));
1363        assert_eq!(parse_value("-1"), Ok(("", Value::Int(-1))));
1364
1365        // Powers of 2 (common in binary formats)
1366        assert_eq!(parse_value("256"), Ok(("", Value::Uint(256))));
1367        assert_eq!(parse_value("0x100"), Ok(("", Value::Uint(256))));
1368        assert_eq!(parse_value("1024"), Ok(("", Value::Uint(1024))));
1369        assert_eq!(parse_value("0x400"), Ok(("", Value::Uint(1024))));
1370    }
1371
1372    #[test]
1373    fn test_parse_operator_whitespace_handling() {
1374        // Test comprehensive whitespace handling
1375        let operators = ["=", "==", "!=", "<>", "&"];
1376        let whitespace_patterns = [
1377            "",     // No whitespace
1378            " ",    // Single space
1379            "  ",   // Multiple spaces
1380            "\t",   // Tab
1381            "\t\t", // Multiple tabs
1382            " \t",  // Mixed space and tab
1383            "\t ",  // Mixed tab and space
1384        ];
1385
1386        for op in operators {
1387            for leading_ws in whitespace_patterns {
1388                for trailing_ws in whitespace_patterns {
1389                    let input = format!("{leading_ws}{op}{trailing_ws}");
1390                    let result = parse_operator(&input);
1391
1392                    assert!(
1393                        result.is_ok(),
1394                        "Failed to parse operator with whitespace: '{input}'"
1395                    );
1396
1397                    let (remaining, _) = result.unwrap();
1398                    assert_eq!(remaining, "", "Unexpected remaining input for: '{input}'");
1399                }
1400            }
1401        }
1402    }
1403}
1404/// Parse a type specification (byte, short, long, string, etc.)
1405///
1406/// Supports various type formats found in magic files:
1407/// - `byte` - single byte
1408/// - `short` - 16-bit integer (native endian)
1409/// - `leshort` - 16-bit little-endian integer
1410/// - `beshort` - 16-bit big-endian integer
1411/// - `long` - 32-bit integer (native endian)
1412/// - `lelong` - 32-bit little-endian integer
1413/// - `belong` - 32-bit big-endian integer
1414/// - `string` - null-terminated string
1415///
1416/// # Examples
1417///
1418/// ```
1419/// use libmagic_rs::parser::grammar::parse_type;
1420/// use libmagic_rs::parser::ast::{TypeKind, Endianness};
1421///
1422/// assert_eq!(parse_type("byte"), Ok(("", TypeKind::Byte)));
1423/// assert_eq!(parse_type("leshort"), Ok(("", TypeKind::Short { endian: Endianness::Little, signed: false })));
1424/// assert_eq!(parse_type("string"), Ok(("", TypeKind::String { max_length: None })));
1425/// ```
1426/// Parse a type specification with optional attached operator
1427/// Parse a type specification followed by an optional operator
1428///
1429/// # Errors
1430/// Returns a nom parsing error if the input doesn't match the expected format
1431pub fn parse_type_and_operator(input: &str) -> IResult<&str, (TypeKind, Option<Operator>)> {
1432    let (input, _) = multispace0(input)?;
1433
1434    let (input, type_name) = alt((
1435        tag("lelong"),
1436        tag("belong"),
1437        tag("leshort"),
1438        tag("beshort"),
1439        tag("long"),
1440        tag("short"),
1441        tag("byte"),
1442        tag("string"),
1443    ))
1444    .parse(input)?;
1445
1446    // Check for attached operator with mask (like &0xf0000000)
1447    let (input, attached_op) = opt(alt((
1448        // Parse &mask format
1449        map(pair(char('&'), parse_number), |(_, mask)| {
1450            Operator::BitwiseAndMask(mask.unsigned_abs())
1451        }),
1452        // Parse standalone & (for backward compatibility)
1453        map(char('&'), |_| Operator::BitwiseAnd),
1454        // Add more operators as needed
1455    )))
1456    .parse(input)?;
1457
1458    let (input, _) = multispace0(input)?;
1459
1460    let type_kind = match type_name {
1461        "byte" => TypeKind::Byte,
1462        "short" => TypeKind::Short {
1463            endian: Endianness::Native,
1464            signed: false,
1465        },
1466        "leshort" => TypeKind::Short {
1467            endian: Endianness::Little,
1468            signed: false,
1469        },
1470        "beshort" => TypeKind::Short {
1471            endian: Endianness::Big,
1472            signed: false,
1473        },
1474        "long" => TypeKind::Long {
1475            endian: Endianness::Native,
1476            signed: false,
1477        },
1478        "lelong" => TypeKind::Long {
1479            endian: Endianness::Little,
1480            signed: false,
1481        },
1482        "belong" => TypeKind::Long {
1483            endian: Endianness::Big,
1484            signed: false,
1485        },
1486        "string" => TypeKind::String { max_length: None },
1487        _ => unreachable!("Parser should only match known types"),
1488    };
1489
1490    Ok((input, (type_kind, attached_op)))
1491}
1492
1493/// Parse a type specification (backward compatibility)
1494/// Parse a type specification (byte, short, long, string, etc.)
1495///
1496/// # Errors
1497/// Returns a nom parsing error if the input doesn't match any known type
1498pub fn parse_type(input: &str) -> IResult<&str, TypeKind> {
1499    let (input, (type_kind, _)) = parse_type_and_operator(input)?;
1500    Ok((input, type_kind))
1501}
1502
1503/// Parse the indentation level and offset for magic rules
1504///
1505/// Handles both absolute offsets and hierarchical child rules with `>` prefix.
1506/// Child rules can be nested multiple levels deep with multiple `>` characters.
1507///
1508/// # Examples
1509///
1510/// ```
1511/// use libmagic_rs::parser::grammar::parse_rule_offset;
1512/// use libmagic_rs::parser::ast::OffsetSpec;
1513///
1514/// // Absolute offset
1515/// assert_eq!(parse_rule_offset("0"), Ok(("", (0, OffsetSpec::Absolute(0)))));
1516/// assert_eq!(parse_rule_offset("16"), Ok(("", (0, OffsetSpec::Absolute(16)))));
1517///
1518/// // Child rule (level 1)
1519/// assert_eq!(parse_rule_offset(">4"), Ok(("", (1, OffsetSpec::Absolute(4)))));
1520///
1521/// // Nested child rule (level 2)
1522/// assert_eq!(parse_rule_offset(">>8"), Ok(("", (2, OffsetSpec::Absolute(8)))));
1523/// ```
1524/// Parse rule offset with hierarchy level (> prefixes) and offset specification
1525///
1526/// # Errors
1527/// Returns a nom parsing error if the input doesn't match the expected offset format
1528pub fn parse_rule_offset(input: &str) -> IResult<&str, (u32, OffsetSpec)> {
1529    let (input, _) = multispace0(input)?;
1530
1531    // Count the number of '>' characters for nesting level
1532    let (input, level_chars) = many0(char('>')).parse(input)?;
1533    let level = u32::try_from(level_chars.len()).unwrap_or(0);
1534
1535    // Parse the offset after the '>' characters
1536    let (input, offset_spec) = parse_offset(input)?;
1537
1538    Ok((input, (level, offset_spec)))
1539}
1540
1541/// Parse the message part of a magic rule
1542///
1543/// The message is everything after the value until the end of the line.
1544/// It may contain format specifiers and can be empty.
1545///
1546/// # Examples
1547///
1548/// ```
1549/// use libmagic_rs::parser::grammar::parse_message;
1550///
1551/// assert_eq!(parse_message("ELF executable"), Ok(("", "ELF executable".to_string())));
1552/// assert_eq!(parse_message(""), Ok(("", "".to_string())));
1553/// assert_eq!(parse_message("  \tPDF document  "), Ok(("", "PDF document".to_string())));
1554/// ```
1555/// Parse the message/description part of a magic rule
1556///
1557/// # Errors
1558/// Returns a nom parsing error if the input cannot be parsed as a message
1559pub fn parse_message(input: &str) -> IResult<&str, String> {
1560    let (input, _) = multispace0(input)?;
1561
1562    // Take everything until end of line, trimming whitespace
1563    // Use take_while instead of take_while1 to handle empty messages
1564    let (input, message_text) = take_while(|c: char| c != '\n' && c != '\r').parse(input)?;
1565    let message = message_text.trim().to_string();
1566
1567    Ok((input, message))
1568}
1569
1570/// Parse a strength directive (`!:strength` line)
1571///
1572/// Parses the `!:strength` directive that modifies rule strength.
1573/// Format: `!:strength [+|-|*|/|=]N` or `!:strength N`
1574///
1575/// # Examples
1576///
1577/// ```
1578/// use libmagic_rs::parser::grammar::parse_strength_directive;
1579/// use libmagic_rs::parser::ast::StrengthModifier;
1580///
1581/// assert_eq!(parse_strength_directive("!:strength +10"), Ok(("", StrengthModifier::Add(10))));
1582/// assert_eq!(parse_strength_directive("!:strength -5"), Ok(("", StrengthModifier::Subtract(5))));
1583/// assert_eq!(parse_strength_directive("!:strength *2"), Ok(("", StrengthModifier::Multiply(2))));
1584/// assert_eq!(parse_strength_directive("!:strength /2"), Ok(("", StrengthModifier::Divide(2))));
1585/// assert_eq!(parse_strength_directive("!:strength =50"), Ok(("", StrengthModifier::Set(50))));
1586/// assert_eq!(parse_strength_directive("!:strength 50"), Ok(("", StrengthModifier::Set(50))));
1587/// ```
1588///
1589/// # Errors
1590///
1591/// Returns a nom parsing error if:
1592/// - Input doesn't start with `!:strength`
1593/// - The modifier value cannot be parsed as a valid integer
1594/// - The operator is invalid
1595pub fn parse_strength_directive(input: &str) -> IResult<&str, StrengthModifier> {
1596    // Helper to safely convert i64 to i32 with clamping to valid strength range.
1597    // This prevents silent truncation to 0 on overflow while keeping values in bounds.
1598    fn clamp_to_i32(n: i64) -> i32 {
1599        // Use i64::from for lossless conversion, then clamp and convert back
1600        let clamped = n.clamp(i64::from(i32::MIN), i64::from(i32::MAX));
1601        // Safe to unwrap: clamped value is guaranteed to be in i32 range
1602        i32::try_from(clamped).unwrap()
1603    }
1604
1605    let (input, _) = multispace0(input)?;
1606    let (input, _) = tag("!:strength")(input)?;
1607    let (input, _) = multispace0(input)?;
1608
1609    // Parse the operator: +, -, *, /, = or bare number (implies =)
1610    let (input, modifier) = alt((
1611        // +N -> Add
1612        map(pair(char('+'), parse_number), |(_, n)| {
1613            StrengthModifier::Add(clamp_to_i32(n))
1614        }),
1615        // -N -> Subtract (note: parse_number handles negative, so we need special handling)
1616        map(pair(char('-'), parse_decimal_number), |(_, n)| {
1617            StrengthModifier::Subtract(clamp_to_i32(n))
1618        }),
1619        // *N -> Multiply
1620        map(pair(char('*'), parse_number), |(_, n)| {
1621            StrengthModifier::Multiply(clamp_to_i32(n))
1622        }),
1623        // /N -> Divide
1624        map(pair(char('/'), parse_number), |(_, n)| {
1625            StrengthModifier::Divide(clamp_to_i32(n))
1626        }),
1627        // =N -> Set
1628        map(pair(char('='), parse_number), |(_, n)| {
1629            StrengthModifier::Set(clamp_to_i32(n))
1630        }),
1631        // Bare number -> Set
1632        map(parse_number, |n| StrengthModifier::Set(clamp_to_i32(n))),
1633    ))
1634    .parse(input)?;
1635
1636    Ok((input, modifier))
1637}
1638
1639/// Check if a line is a strength directive (starts with !:strength)
1640///
1641/// # Examples
1642///
1643/// ```
1644/// use libmagic_rs::parser::grammar::is_strength_directive;
1645///
1646/// assert!(is_strength_directive("!:strength +10"));
1647/// assert!(is_strength_directive("  !:strength -5"));
1648/// assert!(!is_strength_directive("0 byte 1"));
1649/// ```
1650#[must_use]
1651pub fn is_strength_directive(input: &str) -> bool {
1652    input.trim().starts_with("!:strength")
1653}
1654
1655/// Parse a complete magic rule line from text format
1656///
1657/// Parses a complete magic rule in the format:
1658/// `[>...]offset type [operator] value [message]`
1659///
1660/// Where:
1661/// - `>...` indicates child rule nesting level (optional)
1662/// - `offset` is the byte offset to read from
1663/// - `type` is the data type (byte, short, long, string, etc.)
1664/// - `operator` is the comparison operator (=, !=, &) - defaults to = if omitted
1665/// - `value` is the expected value to compare against
1666/// - `message` is the human-readable description (optional)
1667///
1668/// # Examples
1669///
1670/// ```
1671/// use libmagic_rs::parser::grammar::parse_magic_rule;
1672/// use libmagic_rs::parser::ast::{MagicRule, OffsetSpec, TypeKind, Operator, Value};
1673///
1674/// // Basic rule
1675/// let input = "0 string \\x7fELF ELF executable";
1676/// let (_, rule) = parse_magic_rule(input).unwrap();
1677/// assert_eq!(rule.level, 0);
1678/// assert_eq!(rule.message, "ELF executable");
1679///
1680/// // Child rule
1681/// let input = ">4 byte 1 32-bit";
1682/// let (_, rule) = parse_magic_rule(input).unwrap();
1683/// assert_eq!(rule.level, 1);
1684/// assert_eq!(rule.message, "32-bit");
1685/// ```
1686///
1687/// # Errors
1688///
1689/// Returns a nom parsing error if:
1690/// - The offset specification is invalid
1691/// - The type specification is not recognized
1692/// - The operator is invalid (if present)
1693/// - The value cannot be parsed
1694/// - The input format doesn't match the expected magic rule syntax
1695pub fn parse_magic_rule(input: &str) -> IResult<&str, MagicRule> {
1696    let (input, _) = multispace0(input)?;
1697
1698    // Parse the offset with nesting level
1699    let (input, (level, offset)) = parse_rule_offset(input)?;
1700
1701    // Parse the type and any attached operator
1702    let (input, (typ, attached_op)) = parse_type_and_operator(input)?;
1703
1704    // Try to parse a separate operator (optional - use attached operator if present)
1705    let (input, separate_op) = opt(parse_operator).parse(input)?;
1706    let op = attached_op.or(separate_op).unwrap_or(Operator::Equal);
1707
1708    // Parse the value
1709    let (input, value) = parse_value(input)?;
1710
1711    // Parse the message (optional - everything remaining on the line)
1712    let (input, message) = if input.trim().is_empty() {
1713        (input, String::new())
1714    } else {
1715        parse_message(input)?
1716    };
1717
1718    let rule = MagicRule {
1719        offset,
1720        typ,
1721        op,
1722        value,
1723        message,
1724        children: vec![], // Children will be added during hierarchical parsing
1725        level,
1726        strength_modifier: None, // Will be set during directive parsing
1727    };
1728
1729    Ok((input, rule))
1730}
1731
1732/// Parse a comment line (starts with #)
1733///
1734/// Comments in magic files start with '#' and continue to the end of the line.
1735/// This function consumes the entire comment line.
1736///
1737/// # Examples
1738///
1739/// ```
1740/// use libmagic_rs::parser::grammar::parse_comment;
1741///
1742/// assert_eq!(parse_comment("# This is a comment"), Ok(("", "This is a comment".to_string())));
1743/// assert_eq!(parse_comment("#"), Ok(("", "".to_string())));
1744/// ```
1745/// Parse a comment line (starting with #)
1746///
1747/// # Errors
1748/// Returns a nom parsing error if the input is not a valid comment
1749pub fn parse_comment(input: &str) -> IResult<&str, String> {
1750    let (input, _) = multispace0(input)?;
1751    let (input, _) = char('#').parse(input)?;
1752    let (input, comment_text) = take_while(|c: char| c != '\n' && c != '\r').parse(input)?;
1753    let comment = comment_text.trim().to_string();
1754    Ok((input, comment))
1755}
1756
1757/// Check if a line is empty or contains only whitespace
1758///
1759/// # Examples
1760///
1761/// ```
1762/// use libmagic_rs::parser::grammar::is_empty_line;
1763///
1764/// assert!(is_empty_line(""));
1765/// assert!(is_empty_line("   "));
1766/// assert!(is_empty_line("\t\t"));
1767/// assert!(!is_empty_line("0 byte 1"));
1768/// ```
1769#[must_use]
1770pub fn is_empty_line(input: &str) -> bool {
1771    input.trim().is_empty()
1772}
1773
1774/// Check if a line is a comment (starts with #)
1775///
1776/// # Examples
1777///
1778/// ```
1779/// use libmagic_rs::parser::grammar::is_comment_line;
1780///
1781/// assert!(is_comment_line("# This is a comment"));
1782/// assert!(is_comment_line("#"));
1783/// assert!(is_comment_line("  # Indented comment"));
1784/// assert!(!is_comment_line("0 byte 1"));
1785/// ```
1786#[must_use]
1787pub fn is_comment_line(input: &str) -> bool {
1788    input.trim().starts_with('#')
1789}
1790
1791/// Check if a line ends with a continuation character (\)
1792///
1793/// Magic files support line continuation with backslash at the end of lines.
1794///
1795/// # Examples
1796///
1797/// ```
1798/// use libmagic_rs::parser::grammar::has_continuation;
1799///
1800/// assert!(has_continuation("0 string test \\"));
1801/// assert!(has_continuation("message continues \\"));
1802/// assert!(!has_continuation("0 string test"));
1803/// ```
1804#[must_use]
1805pub fn has_continuation(input: &str) -> bool {
1806    input.trim_end().ends_with('\\')
1807}
1808// Tests for new magic rule parsing functions
1809
1810#[test]
1811fn test_parse_type_basic() {
1812    assert_eq!(parse_type("byte"), Ok(("", TypeKind::Byte)));
1813    assert_eq!(
1814        parse_type("short"),
1815        Ok((
1816            "",
1817            TypeKind::Short {
1818                endian: Endianness::Native,
1819                signed: false
1820            }
1821        ))
1822    );
1823    assert_eq!(
1824        parse_type("long"),
1825        Ok((
1826            "",
1827            TypeKind::Long {
1828                endian: Endianness::Native,
1829                signed: false
1830            }
1831        ))
1832    );
1833    assert_eq!(
1834        parse_type("string"),
1835        Ok(("", TypeKind::String { max_length: None }))
1836    );
1837}
1838
1839#[test]
1840fn test_parse_type_endianness() {
1841    assert_eq!(
1842        parse_type("leshort"),
1843        Ok((
1844            "",
1845            TypeKind::Short {
1846                endian: Endianness::Little,
1847                signed: false
1848            }
1849        ))
1850    );
1851    assert_eq!(
1852        parse_type("beshort"),
1853        Ok((
1854            "",
1855            TypeKind::Short {
1856                endian: Endianness::Big,
1857                signed: false
1858            }
1859        ))
1860    );
1861    assert_eq!(
1862        parse_type("lelong"),
1863        Ok((
1864            "",
1865            TypeKind::Long {
1866                endian: Endianness::Little,
1867                signed: false
1868            }
1869        ))
1870    );
1871    assert_eq!(
1872        parse_type("belong"),
1873        Ok((
1874            "",
1875            TypeKind::Long {
1876                endian: Endianness::Big,
1877                signed: false
1878            }
1879        ))
1880    );
1881}
1882
1883#[test]
1884fn test_parse_type_with_whitespace() {
1885    assert_eq!(parse_type(" byte "), Ok(("", TypeKind::Byte)));
1886    assert_eq!(
1887        parse_type("\tstring\t"),
1888        Ok(("", TypeKind::String { max_length: None }))
1889    );
1890    assert_eq!(
1891        parse_type("  lelong  "),
1892        Ok((
1893            "",
1894            TypeKind::Long {
1895                endian: Endianness::Little,
1896                signed: false
1897            }
1898        ))
1899    );
1900}
1901
1902#[test]
1903fn test_parse_type_with_remaining_input() {
1904    assert_eq!(parse_type("byte ="), Ok(("=", TypeKind::Byte)));
1905    assert_eq!(
1906        parse_type("string \\x7f"),
1907        Ok(("\\x7f", TypeKind::String { max_length: None }))
1908    );
1909}
1910
1911#[test]
1912fn test_parse_type_invalid() {
1913    assert!(parse_type("").is_err());
1914    assert!(parse_type("invalid").is_err());
1915    assert!(parse_type("int").is_err());
1916    assert!(parse_type("float").is_err());
1917}
1918
1919#[test]
1920fn test_parse_rule_offset_absolute() {
1921    assert_eq!(
1922        parse_rule_offset("0"),
1923        Ok(("", (0, OffsetSpec::Absolute(0))))
1924    );
1925    assert_eq!(
1926        parse_rule_offset("16"),
1927        Ok(("", (0, OffsetSpec::Absolute(16))))
1928    );
1929    assert_eq!(
1930        parse_rule_offset("0x10"),
1931        Ok(("", (0, OffsetSpec::Absolute(16))))
1932    );
1933    assert_eq!(
1934        parse_rule_offset("-4"),
1935        Ok(("", (0, OffsetSpec::Absolute(-4))))
1936    );
1937}
1938
1939#[test]
1940fn test_parse_rule_offset_child_rules() {
1941    assert_eq!(
1942        parse_rule_offset(">4"),
1943        Ok(("", (1, OffsetSpec::Absolute(4))))
1944    );
1945    assert_eq!(
1946        parse_rule_offset(">>8"),
1947        Ok(("", (2, OffsetSpec::Absolute(8))))
1948    );
1949    assert_eq!(
1950        parse_rule_offset(">>>12"),
1951        Ok(("", (3, OffsetSpec::Absolute(12))))
1952    );
1953}
1954
1955#[test]
1956fn test_parse_rule_offset_with_whitespace() {
1957    assert_eq!(
1958        parse_rule_offset(" 0 "),
1959        Ok(("", (0, OffsetSpec::Absolute(0))))
1960    );
1961    assert_eq!(
1962        parse_rule_offset("  >4  "),
1963        Ok(("", (1, OffsetSpec::Absolute(4))))
1964    );
1965    assert_eq!(
1966        parse_rule_offset("\t>>0x10\t"),
1967        Ok(("", (2, OffsetSpec::Absolute(16))))
1968    );
1969}
1970
1971#[test]
1972fn test_parse_rule_offset_with_remaining_input() {
1973    assert_eq!(
1974        parse_rule_offset("0 byte"),
1975        Ok(("byte", (0, OffsetSpec::Absolute(0))))
1976    );
1977    assert_eq!(
1978        parse_rule_offset(">4 string"),
1979        Ok(("string", (1, OffsetSpec::Absolute(4))))
1980    );
1981}
1982
1983#[test]
1984fn test_parse_message_basic() {
1985    assert_eq!(
1986        parse_message("ELF executable"),
1987        Ok(("", "ELF executable".to_string()))
1988    );
1989    assert_eq!(
1990        parse_message("PDF document"),
1991        Ok(("", "PDF document".to_string()))
1992    );
1993    assert_eq!(parse_message(""), Ok(("", String::new())));
1994}
1995
1996#[test]
1997fn test_parse_message_with_whitespace() {
1998    assert_eq!(
1999        parse_message("  ELF executable  "),
2000        Ok(("", "ELF executable".to_string()))
2001    );
2002    assert_eq!(
2003        parse_message("\tPDF document\t"),
2004        Ok(("", "PDF document".to_string()))
2005    );
2006    assert_eq!(parse_message("   "), Ok(("", String::new())));
2007}
2008
2009#[test]
2010fn test_parse_message_complex() {
2011    assert_eq!(
2012        parse_message("ELF 64-bit LSB executable"),
2013        Ok(("", "ELF 64-bit LSB executable".to_string()))
2014    );
2015    assert_eq!(
2016        parse_message("ZIP archive, version %d.%d"),
2017        Ok(("", "ZIP archive, version %d.%d".to_string()))
2018    );
2019}
2020
2021#[test]
2022fn test_parse_magic_rule_basic() {
2023    let input = "0 string \\x7fELF ELF executable";
2024    let (remaining, rule) = parse_magic_rule(input).unwrap();
2025
2026    assert_eq!(remaining, "");
2027    assert_eq!(rule.level, 0);
2028    assert_eq!(rule.offset, OffsetSpec::Absolute(0));
2029    assert_eq!(rule.typ, TypeKind::String { max_length: None });
2030    assert_eq!(rule.op, Operator::Equal);
2031    assert_eq!(rule.value, Value::Bytes(vec![0x7f, 0x45, 0x4c, 0x46]));
2032    assert_eq!(rule.message, "ELF executable");
2033    assert!(rule.children.is_empty());
2034}
2035
2036#[test]
2037fn test_parse_magic_rule_child() {
2038    let input = ">4 byte 1 32-bit";
2039    let (remaining, rule) = parse_magic_rule(input).unwrap();
2040
2041    assert_eq!(remaining, "");
2042    assert_eq!(rule.level, 1);
2043    assert_eq!(rule.offset, OffsetSpec::Absolute(4));
2044    assert_eq!(rule.typ, TypeKind::Byte);
2045    assert_eq!(rule.op, Operator::Equal);
2046    assert_eq!(rule.value, Value::Uint(1));
2047    assert_eq!(rule.message, "32-bit");
2048}
2049
2050#[test]
2051fn test_parse_magic_rule_with_operator() {
2052    let input = "0 lelong&0xf0000000 0x10000000 MIPS-II";
2053    let (remaining, rule) = parse_magic_rule(input).unwrap();
2054
2055    assert_eq!(remaining, "");
2056    assert_eq!(rule.level, 0);
2057    assert_eq!(rule.offset, OffsetSpec::Absolute(0));
2058    assert_eq!(
2059        rule.typ,
2060        TypeKind::Long {
2061            endian: Endianness::Little,
2062            signed: false
2063        }
2064    );
2065    assert_eq!(rule.op, Operator::BitwiseAndMask(0xf000_0000));
2066    assert_eq!(rule.value, Value::Uint(0x1000_0000));
2067    assert_eq!(rule.message, "MIPS-II");
2068}
2069
2070#[test]
2071fn test_parse_magic_rule_no_message() {
2072    let input = "0 byte 0x7f";
2073    let (remaining, rule) = parse_magic_rule(input).unwrap();
2074
2075    assert_eq!(remaining, "");
2076    assert_eq!(rule.level, 0);
2077    assert_eq!(rule.offset, OffsetSpec::Absolute(0));
2078    assert_eq!(rule.typ, TypeKind::Byte);
2079    assert_eq!(rule.op, Operator::Equal);
2080    assert_eq!(rule.value, Value::Uint(0x7f));
2081    assert_eq!(rule.message, "");
2082}
2083
2084#[test]
2085fn test_parse_magic_rule_nested() {
2086    let input = ">>8 leshort 0x014c Microsoft COFF";
2087    let (remaining, rule) = parse_magic_rule(input).unwrap();
2088
2089    assert_eq!(remaining, "");
2090    assert_eq!(rule.level, 2);
2091    assert_eq!(rule.offset, OffsetSpec::Absolute(8));
2092    assert_eq!(
2093        rule.typ,
2094        TypeKind::Short {
2095            endian: Endianness::Little,
2096            signed: false
2097        }
2098    );
2099    assert_eq!(rule.op, Operator::Equal);
2100    assert_eq!(rule.value, Value::Uint(0x014c));
2101    assert_eq!(rule.message, "Microsoft COFF");
2102}
2103
2104#[test]
2105fn test_parse_magic_rule_with_whitespace() {
2106    let input = "  >  4   byte   =   1   32-bit  ";
2107    let (remaining, rule) = parse_magic_rule(input).unwrap();
2108
2109    assert_eq!(remaining, "");
2110    assert_eq!(rule.level, 1);
2111    assert_eq!(rule.offset, OffsetSpec::Absolute(4));
2112    assert_eq!(rule.typ, TypeKind::Byte);
2113    assert_eq!(rule.op, Operator::Equal);
2114    assert_eq!(rule.value, Value::Uint(1));
2115    assert_eq!(rule.message, "32-bit");
2116}
2117
2118#[test]
2119fn test_parse_magic_rule_string_value() {
2120    let input = "0 string \"PK\" ZIP archive";
2121    let (remaining, rule) = parse_magic_rule(input).unwrap();
2122
2123    assert_eq!(remaining, "");
2124    assert_eq!(rule.level, 0);
2125    assert_eq!(rule.offset, OffsetSpec::Absolute(0));
2126    assert_eq!(rule.typ, TypeKind::String { max_length: None });
2127    assert_eq!(rule.op, Operator::Equal);
2128    assert_eq!(rule.value, Value::String("PK".to_string()));
2129    assert_eq!(rule.message, "ZIP archive");
2130}
2131
2132#[test]
2133fn test_parse_magic_rule_hex_offset() {
2134    let input = "0x10 belong 0x12345678 Test data";
2135    let (remaining, rule) = parse_magic_rule(input).unwrap();
2136
2137    assert_eq!(remaining, "");
2138    assert_eq!(rule.level, 0);
2139    assert_eq!(rule.offset, OffsetSpec::Absolute(16));
2140    assert_eq!(
2141        rule.typ,
2142        TypeKind::Long {
2143            endian: Endianness::Big,
2144            signed: false
2145        }
2146    );
2147    assert_eq!(rule.op, Operator::Equal);
2148    assert_eq!(rule.value, Value::Uint(0x1234_5678));
2149    assert_eq!(rule.message, "Test data");
2150}
2151
2152#[test]
2153fn test_parse_magic_rule_negative_offset() {
2154    let input = "-4 byte 0 End marker";
2155    let (remaining, rule) = parse_magic_rule(input).unwrap();
2156
2157    assert_eq!(remaining, "");
2158    assert_eq!(rule.level, 0);
2159    assert_eq!(rule.offset, OffsetSpec::Absolute(-4));
2160    assert_eq!(rule.typ, TypeKind::Byte);
2161    assert_eq!(rule.op, Operator::Equal);
2162    assert_eq!(rule.value, Value::Uint(0));
2163    assert_eq!(rule.message, "End marker");
2164}
2165
2166#[test]
2167fn test_parse_comment() {
2168    assert_eq!(
2169        parse_comment("# This is a comment"),
2170        Ok(("", "This is a comment".to_string()))
2171    );
2172    assert_eq!(parse_comment("#"), Ok(("", String::new())));
2173    assert_eq!(
2174        parse_comment("# ELF executables"),
2175        Ok(("", "ELF executables".to_string()))
2176    );
2177}
2178
2179#[test]
2180fn test_parse_comment_with_whitespace() {
2181    assert_eq!(
2182        parse_comment("  # Indented comment  "),
2183        Ok(("", "Indented comment".to_string()))
2184    );
2185    assert_eq!(
2186        parse_comment("\t#\tTabbed comment\t"),
2187        Ok(("", "Tabbed comment".to_string()))
2188    );
2189}
2190
2191#[test]
2192fn test_is_empty_line() {
2193    assert!(is_empty_line(""));
2194    assert!(is_empty_line("   "));
2195    assert!(is_empty_line("\t\t"));
2196    assert!(is_empty_line(" \t \t "));
2197    assert!(!is_empty_line("0 byte 1"));
2198    assert!(!is_empty_line("  # comment"));
2199}
2200
2201#[test]
2202fn test_is_comment_line() {
2203    assert!(is_comment_line("# This is a comment"));
2204    assert!(is_comment_line("#"));
2205    assert!(is_comment_line("  # Indented comment"));
2206    assert!(is_comment_line("\t# Tabbed comment"));
2207    assert!(!is_comment_line("0 byte 1"));
2208    assert!(!is_comment_line("string test"));
2209}
2210
2211#[test]
2212fn test_has_continuation() {
2213    assert!(has_continuation("0 string test \\"));
2214    assert!(has_continuation("message continues \\"));
2215    assert!(has_continuation("line ends with backslash\\"));
2216    assert!(has_continuation("  trailing whitespace  \\  "));
2217    assert!(!has_continuation("0 string test"));
2218    assert!(!has_continuation("no continuation"));
2219    assert!(!has_continuation("backslash in middle \\ here"));
2220}
2221
2222#[test]
2223fn test_parse_magic_rule_real_world_examples() {
2224    // Real examples from /usr/share/file/magic/elf
2225    let examples = [
2226        "0 string \\177ELF ELF",
2227        ">4 byte 1 32-bit",
2228        ">4 byte 2 64-bit",
2229        ">5 byte 1 LSB",
2230        ">5 byte 2 MSB",
2231        ">>0 lelong&0xf0000000 0x10000000 MIPS-II",
2232    ];
2233
2234    for example in examples {
2235        let result = parse_magic_rule(example);
2236        assert!(
2237            result.is_ok(),
2238            "Failed to parse real-world example: '{example}'"
2239        );
2240
2241        let (remaining, rule) = result.unwrap();
2242        assert_eq!(remaining, "", "Unexpected remaining input for: '{example}'");
2243        assert!(
2244            !rule.message.is_empty() || example.contains("\\177ELF"),
2245            "Empty message for: '{example}'"
2246        );
2247    }
2248}
2249
2250#[test]
2251fn test_parse_magic_rule_edge_cases() {
2252    // Test various edge cases
2253    let edge_cases = [
2254        ("0 byte 0", 0, TypeKind::Byte, Value::Uint(0), ""),
2255        (
2256            ">>>16 string \"\" Empty string",
2257            3,
2258            TypeKind::String { max_length: None },
2259            Value::String(String::new()),
2260            "Empty string",
2261        ),
2262        (
2263            "0x100 lelong 0xFFFFFFFF Max value",
2264            0,
2265            TypeKind::Long {
2266                endian: Endianness::Little,
2267                signed: false,
2268            },
2269            Value::Uint(0xFFFF_FFFF),
2270            "Max value",
2271        ),
2272    ];
2273
2274    for (input, expected_level, expected_type, expected_value, expected_message) in edge_cases {
2275        let (remaining, rule) = parse_magic_rule(input).unwrap();
2276        assert_eq!(remaining, "");
2277        assert_eq!(rule.level, expected_level);
2278        assert_eq!(rule.typ, expected_type);
2279        assert_eq!(rule.value, expected_value);
2280        assert_eq!(rule.message, expected_message);
2281    }
2282}
2283
2284#[test]
2285fn test_parse_magic_rule_invalid_input() {
2286    let invalid_inputs = [
2287        "",               // Empty input
2288        "invalid format", // No valid offset
2289        "0",              // Missing type
2290        "0 invalid_type", // Invalid type
2291        "0 byte",         // Missing value
2292    ];
2293
2294    for invalid_input in invalid_inputs {
2295        let result = parse_magic_rule(invalid_input);
2296        assert!(
2297            result.is_err(),
2298            "Should fail to parse invalid input: '{invalid_input}'"
2299        );
2300    }
2301}
2302
2303// Strength directive tests
2304#[test]
2305fn test_parse_strength_directive_add() {
2306    assert_eq!(
2307        parse_strength_directive("!:strength +10"),
2308        Ok(("", StrengthModifier::Add(10)))
2309    );
2310    assert_eq!(
2311        parse_strength_directive("!:strength +0"),
2312        Ok(("", StrengthModifier::Add(0)))
2313    );
2314    assert_eq!(
2315        parse_strength_directive("!:strength +100"),
2316        Ok(("", StrengthModifier::Add(100)))
2317    );
2318}
2319
2320#[test]
2321fn test_parse_strength_directive_subtract() {
2322    assert_eq!(
2323        parse_strength_directive("!:strength -5"),
2324        Ok(("", StrengthModifier::Subtract(5)))
2325    );
2326    assert_eq!(
2327        parse_strength_directive("!:strength -0"),
2328        Ok(("", StrengthModifier::Subtract(0)))
2329    );
2330    assert_eq!(
2331        parse_strength_directive("!:strength -50"),
2332        Ok(("", StrengthModifier::Subtract(50)))
2333    );
2334}
2335
2336#[test]
2337fn test_parse_strength_directive_multiply() {
2338    assert_eq!(
2339        parse_strength_directive("!:strength *2"),
2340        Ok(("", StrengthModifier::Multiply(2)))
2341    );
2342    assert_eq!(
2343        parse_strength_directive("!:strength *10"),
2344        Ok(("", StrengthModifier::Multiply(10)))
2345    );
2346}
2347
2348#[test]
2349fn test_parse_strength_directive_divide() {
2350    assert_eq!(
2351        parse_strength_directive("!:strength /2"),
2352        Ok(("", StrengthModifier::Divide(2)))
2353    );
2354    assert_eq!(
2355        parse_strength_directive("!:strength /10"),
2356        Ok(("", StrengthModifier::Divide(10)))
2357    );
2358}
2359
2360#[test]
2361fn test_parse_strength_directive_set_explicit() {
2362    assert_eq!(
2363        parse_strength_directive("!:strength =50"),
2364        Ok(("", StrengthModifier::Set(50)))
2365    );
2366    assert_eq!(
2367        parse_strength_directive("!:strength =0"),
2368        Ok(("", StrengthModifier::Set(0)))
2369    );
2370    assert_eq!(
2371        parse_strength_directive("!:strength =100"),
2372        Ok(("", StrengthModifier::Set(100)))
2373    );
2374}
2375
2376#[test]
2377fn test_parse_strength_directive_set_bare() {
2378    // Bare number implies Set
2379    assert_eq!(
2380        parse_strength_directive("!:strength 50"),
2381        Ok(("", StrengthModifier::Set(50)))
2382    );
2383    assert_eq!(
2384        parse_strength_directive("!:strength 0"),
2385        Ok(("", StrengthModifier::Set(0)))
2386    );
2387    assert_eq!(
2388        parse_strength_directive("!:strength 100"),
2389        Ok(("", StrengthModifier::Set(100)))
2390    );
2391}
2392
2393#[test]
2394fn test_parse_strength_directive_with_whitespace() {
2395    assert_eq!(
2396        parse_strength_directive("  !:strength +10"),
2397        Ok(("", StrengthModifier::Add(10)))
2398    );
2399    assert_eq!(
2400        parse_strength_directive("\t!:strength -5"),
2401        Ok(("", StrengthModifier::Subtract(5)))
2402    );
2403    assert_eq!(
2404        parse_strength_directive("!:strength  *2"),
2405        Ok(("", StrengthModifier::Multiply(2)))
2406    );
2407    assert_eq!(
2408        parse_strength_directive("!:strength   50"),
2409        Ok(("", StrengthModifier::Set(50)))
2410    );
2411}
2412
2413#[test]
2414fn test_parse_strength_directive_with_remaining_input() {
2415    // Should leave remaining content after the directive
2416    assert_eq!(
2417        parse_strength_directive("!:strength +10 extra"),
2418        Ok((" extra", StrengthModifier::Add(10)))
2419    );
2420    assert_eq!(
2421        parse_strength_directive("!:strength 50\n"),
2422        Ok(("\n", StrengthModifier::Set(50)))
2423    );
2424}
2425
2426#[test]
2427fn test_parse_strength_directive_invalid() {
2428    // Should fail on invalid input
2429    assert!(parse_strength_directive("").is_err());
2430    assert!(parse_strength_directive("!:invalid").is_err());
2431    assert!(parse_strength_directive("strength +10").is_err());
2432    assert!(parse_strength_directive("0 byte 1").is_err());
2433}
2434
2435#[test]
2436fn test_is_strength_directive() {
2437    assert!(is_strength_directive("!:strength +10"));
2438    assert!(is_strength_directive("!:strength -5"));
2439    assert!(is_strength_directive("!:strength 50"));
2440    assert!(is_strength_directive("  !:strength +10"));
2441    assert!(is_strength_directive("\t!:strength *2"));
2442
2443    assert!(!is_strength_directive("0 byte 1"));
2444    assert!(!is_strength_directive("# comment"));
2445    assert!(!is_strength_directive(""));
2446    assert!(!is_strength_directive("!:mime application/pdf"));
2447}