Skip to main content

libmagic_rs/parser/
grammar.rs

1// Copyright (c) 2025-2026 the libmagic-rs contributors
2// SPDX-License-Identifier: Apache-2.0
3
4//! Grammar parsing for magic files using nom parser combinators
5//!
6//! This module implements the parsing logic for magic file syntax, converting
7//! text-based magic rules into the AST representation defined in ast.rs.
8
9use nom::{
10    IResult, Parser,
11    branch::alt,
12    bytes::complete::{tag, take_while},
13    character::complete::{char, digit1, hex_digit1, multispace0, none_of, one_of},
14    combinator::{map, opt, recognize},
15    error::Error as NomError,
16    multi::many0,
17    sequence::pair,
18};
19
20use crate::parser::ast::{MagicRule, OffsetSpec, Operator, StrengthModifier, TypeKind, Value};
21
22#[cfg(test)]
23use crate::parser::ast::Endianness;
24
25/// Parse a decimal number with overflow protection
26fn parse_decimal_number(input: &str) -> IResult<&str, i64> {
27    let (input, digits) = digit1(input)?;
28
29    // Check for potential overflow before parsing
30    if digits.len() > 19 {
31        // i64::MAX has 19 digits, so anything longer will definitely overflow
32        return Err(nom::Err::Error(nom::error::Error::new(
33            input,
34            nom::error::ErrorKind::MapRes,
35        )));
36    }
37
38    let number = digits.parse::<i64>().map_err(|_| {
39        nom::Err::Error(nom::error::Error::new(input, nom::error::ErrorKind::MapRes))
40    })?;
41    Ok((input, number))
42}
43
44/// Parse a decimal number as unsigned `u64` with overflow protection
45fn parse_unsigned_decimal_number(input: &str) -> IResult<&str, u64> {
46    let (input, digits) = digit1(input)?;
47
48    // u64::MAX (18446744073709551615) has 20 digits
49    if digits.len() > 20 {
50        return Err(nom::Err::Error(nom::error::Error::new(
51            input,
52            nom::error::ErrorKind::MapRes,
53        )));
54    }
55
56    let number = digits.parse::<u64>().map_err(|_| {
57        nom::Err::Error(nom::error::Error::new(input, nom::error::ErrorKind::MapRes))
58    })?;
59    Ok((input, number))
60}
61
62/// Parse a hexadecimal number (with 0x prefix) with overflow protection
63fn parse_hex_number(input: &str) -> IResult<&str, i64> {
64    let (input, _) = tag("0x")(input)?;
65    let (input, hex_str) = hex_digit1(input)?;
66
67    // Check for potential overflow - i64 can hold up to 16 hex digits (0x7FFFFFFFFFFFFFFF)
68    if hex_str.len() > 16 {
69        return Err(nom::Err::Error(nom::error::Error::new(
70            input,
71            nom::error::ErrorKind::MapRes,
72        )));
73    }
74
75    let number = i64::from_str_radix(hex_str, 16).map_err(|_| {
76        nom::Err::Error(nom::error::Error::new(input, nom::error::ErrorKind::MapRes))
77    })?;
78
79    Ok((input, number))
80}
81
82/// Parse a hexadecimal number (with 0x prefix) as unsigned `u64`
83fn parse_unsigned_hex_number(input: &str) -> IResult<&str, u64> {
84    let (input, _) = tag("0x")(input)?;
85    let (input, hex_str) = hex_digit1(input)?;
86
87    // u64 can hold up to 16 hex digits (0xFFFFFFFFFFFFFFFF)
88    if hex_str.len() > 16 {
89        return Err(nom::Err::Error(nom::error::Error::new(
90            input,
91            nom::error::ErrorKind::MapRes,
92        )));
93    }
94
95    let number = u64::from_str_radix(hex_str, 16).map_err(|_| {
96        nom::Err::Error(nom::error::Error::new(input, nom::error::ErrorKind::MapRes))
97    })?;
98
99    Ok((input, number))
100}
101
102/// Parse a non-negative number as unsigned `u64`
103///
104/// Supports both decimal and hexadecimal (0x prefix) formats.
105/// Does not handle a leading minus sign -- callers handle sign detection.
106fn parse_unsigned_number(input: &str) -> IResult<&str, u64> {
107    if input.starts_with("0x") {
108        parse_unsigned_hex_number(input)
109    } else {
110        parse_unsigned_decimal_number(input)
111    }
112}
113
114/// Parse a decimal or hexadecimal number
115///
116/// Supports both decimal (123, -456) and hexadecimal (0x1a2b, -0xFF) formats.
117///
118/// # Examples
119///
120/// ```
121/// use libmagic_rs::parser::grammar::parse_number;
122///
123/// assert_eq!(parse_number("123"), Ok(("", 123)));
124/// assert_eq!(parse_number("0x1a"), Ok(("", 26)));
125/// assert_eq!(parse_number("-42"), Ok(("", -42)));
126/// assert_eq!(parse_number("-0xFF"), Ok(("", -255)));
127/// ```
128///
129/// # Errors
130///
131/// Returns a nom parsing error if:
132/// - Input is empty or contains no valid digits
133/// - Hexadecimal number lacks proper "0x" prefix or contains invalid hex digits
134/// - Number cannot be parsed as a valid `i64` value
135/// - Input contains invalid characters for the detected number format
136pub fn parse_number(input: &str) -> IResult<&str, i64> {
137    let (input, sign) = opt(char('-')).parse(input)?;
138    let is_negative = sign.is_some();
139
140    // Check if input starts with "0x" - if so, it must be a valid hex number
141    let (input, number) = if input.starts_with("0x") {
142        parse_hex_number(input)?
143    } else {
144        parse_decimal_number(input)?
145    };
146
147    // Apply sign with overflow checking
148    let result = if is_negative {
149        number.checked_neg().ok_or_else(|| {
150            nom::Err::Error(nom::error::Error::new(input, nom::error::ErrorKind::MapRes))
151        })?
152    } else {
153        number
154    };
155
156    Ok((input, result))
157}
158
159/// Parse an offset specification for absolute offsets
160///
161/// Supports decimal and hexadecimal formats, both positive and negative.
162///
163/// # Examples
164///
165/// ```
166/// use libmagic_rs::parser::grammar::parse_offset;
167/// use libmagic_rs::parser::ast::OffsetSpec;
168///
169/// assert_eq!(parse_offset("0"), Ok(("", OffsetSpec::Absolute(0))));
170/// assert_eq!(parse_offset("123"), Ok(("", OffsetSpec::Absolute(123))));
171/// assert_eq!(parse_offset("0x10"), Ok(("", OffsetSpec::Absolute(16))));
172/// assert_eq!(parse_offset("-4"), Ok(("", OffsetSpec::Absolute(-4))));
173/// assert_eq!(parse_offset("-0xFF"), Ok(("", OffsetSpec::Absolute(-255))));
174/// ```
175///
176/// # Errors
177///
178/// Returns a nom parsing error if:
179/// - The input contains invalid number format (propagated from `parse_number`)
180/// - Input is empty or contains no parseable offset value
181/// - The offset value cannot be represented as a valid `i64`
182pub fn parse_offset(input: &str) -> IResult<&str, OffsetSpec> {
183    let (input, _) = multispace0(input)?;
184    let (input, offset_value) = parse_number(input)?;
185    let (input, _) = multispace0(input)?;
186
187    Ok((input, OffsetSpec::Absolute(offset_value)))
188}
189
190/// Parse comparison operators for magic rules
191///
192/// Supports both symbolic and text representations of operators:
193/// - `=` or `==` for equality
194/// - `!=` or `<>` for inequality
195/// - `<` for less-than
196/// - `>` for greater-than
197/// - `<=` for less-than-or-equal
198/// - `>=` for greater-than-or-equal
199/// - `&` for bitwise AND
200///
201/// # Examples
202///
203/// ```
204/// use libmagic_rs::parser::grammar::parse_operator;
205/// use libmagic_rs::parser::ast::Operator;
206///
207/// assert_eq!(parse_operator("="), Ok(("", Operator::Equal)));
208/// assert_eq!(parse_operator("=="), Ok(("", Operator::Equal)));
209/// assert_eq!(parse_operator("!="), Ok(("", Operator::NotEqual)));
210/// assert_eq!(parse_operator("<>"), Ok(("", Operator::NotEqual)));
211/// assert_eq!(parse_operator("<"), Ok(("", Operator::LessThan)));
212/// assert_eq!(parse_operator(">"), Ok(("", Operator::GreaterThan)));
213/// assert_eq!(parse_operator("<="), Ok(("", Operator::LessEqual)));
214/// assert_eq!(parse_operator(">="), Ok(("", Operator::GreaterEqual)));
215/// assert_eq!(parse_operator("&"), Ok(("", Operator::BitwiseAnd)));
216/// ```
217///
218/// # Errors
219///
220/// Returns a nom parsing error if:
221/// - Input does not start with a recognized operator symbol
222/// - Input is empty or contains no valid operator
223/// - Operator syntax is incomplete (e.g., just `!` without `=`)
224pub fn parse_operator(input: &str) -> IResult<&str, Operator> {
225    let (input, _) = multispace0(input)?;
226
227    // Try to parse each operator, starting with longer ones first
228    if let Ok((remaining, _)) = tag::<&str, &str, nom::error::Error<&str>>("==")(input) {
229        // Check that we don't have another '=' following (to reject "===")
230        if remaining.starts_with('=') {
231            return Err(nom::Err::Error(nom::error::Error::new(
232                input,
233                nom::error::ErrorKind::Tag,
234            )));
235        }
236        let (remaining, _) = multispace0(remaining)?;
237        return Ok((remaining, Operator::Equal));
238    }
239
240    if let Ok((remaining, _)) = tag::<&str, &str, nom::error::Error<&str>>("!=")(input) {
241        let (remaining, _) = multispace0(remaining)?;
242        return Ok((remaining, Operator::NotEqual));
243    }
244
245    if let Ok((remaining, _)) = tag::<&str, &str, nom::error::Error<&str>>("<>")(input) {
246        let (remaining, _) = multispace0(remaining)?;
247        return Ok((remaining, Operator::NotEqual));
248    }
249
250    if let Ok((remaining, _)) = tag::<&str, &str, nom::error::Error<&str>>("<=")(input) {
251        let (remaining, _) = multispace0(remaining)?;
252        return Ok((remaining, Operator::LessEqual));
253    }
254
255    if let Ok((remaining, _)) = tag::<&str, &str, nom::error::Error<&str>>(">=")(input) {
256        let (remaining, _) = multispace0(remaining)?;
257        return Ok((remaining, Operator::GreaterEqual));
258    }
259
260    if let Ok((remaining, _)) = tag::<&str, &str, nom::error::Error<&str>>("=")(input) {
261        // Check that we don't have another '=' following (to reject "==")
262        if remaining.starts_with('=') {
263            return Err(nom::Err::Error(nom::error::Error::new(
264                input,
265                nom::error::ErrorKind::Tag,
266            )));
267        }
268        let (remaining, _) = multispace0(remaining)?;
269        return Ok((remaining, Operator::Equal));
270    }
271
272    if let Ok((remaining, _)) = tag::<&str, &str, nom::error::Error<&str>>("&")(input) {
273        // Check that we don't have another '&' following (to reject "&&")
274        if remaining.starts_with('&') {
275            return Err(nom::Err::Error(nom::error::Error::new(
276                input,
277                nom::error::ErrorKind::Tag,
278            )));
279        }
280        let (remaining, _) = multispace0(remaining)?;
281        return Ok((remaining, Operator::BitwiseAnd));
282    }
283
284    if let Ok((remaining, _)) = tag::<&str, &str, nom::error::Error<&str>>("<")(input) {
285        let (remaining, _) = multispace0(remaining)?;
286        return Ok((remaining, Operator::LessThan));
287    }
288
289    if let Ok((remaining, _)) = tag::<&str, &str, nom::error::Error<&str>>(">")(input) {
290        let (remaining, _) = multispace0(remaining)?;
291        return Ok((remaining, Operator::GreaterThan));
292    }
293
294    // If no operator matches, return an error
295    Err(nom::Err::Error(nom::error::Error::new(
296        input,
297        nom::error::ErrorKind::Tag,
298    )))
299}
300
301/// Parse a single hex byte with \x prefix
302fn parse_hex_byte_with_prefix(input: &str) -> IResult<&str, u8> {
303    let (input, _) = tag("\\x")(input)?;
304    let (input, hex_str) = recognize(pair(
305        one_of("0123456789abcdefABCDEF"),
306        one_of("0123456789abcdefABCDEF"),
307    ))
308    .parse(input)?;
309    let byte_val = u8::from_str_radix(hex_str, 16)
310        .map_err(|_| nom::Err::Error(NomError::new(input, nom::error::ErrorKind::MapRes)))?;
311    Ok((input, byte_val))
312}
313
314/// Parse a hex byte sequence starting with \x prefix
315fn parse_hex_bytes_with_prefix(input: &str) -> IResult<&str, Vec<u8>> {
316    if input.starts_with("\\x") {
317        many0(parse_hex_byte_with_prefix).parse(input)
318    } else {
319        Err(nom::Err::Error(NomError::new(
320            input,
321            nom::error::ErrorKind::Tag,
322        )))
323    }
324}
325
326/// Parse a mixed hex and ASCII sequence (like \x7fELF)
327fn parse_mixed_hex_ascii(input: &str) -> IResult<&str, Vec<u8>> {
328    // Must start with \ to be considered an escape sequence
329    if !input.starts_with('\\') {
330        return Err(nom::Err::Error(NomError::new(
331            input,
332            nom::error::ErrorKind::Tag,
333        )));
334    }
335
336    let mut bytes = Vec::new();
337    let mut remaining = input;
338
339    while !remaining.is_empty() {
340        // Try to parse escape sequences first (hex, octal, etc.)
341        if let Ok((new_remaining, escaped_char)) = parse_escape_sequence(remaining) {
342            bytes.push(escaped_char as u8);
343            remaining = new_remaining;
344        } else if let Ok((new_remaining, hex_byte)) = parse_hex_byte_with_prefix(remaining) {
345            bytes.push(hex_byte);
346            remaining = new_remaining;
347        } else if let Ok((new_remaining, ascii_char)) =
348            none_of::<&str, &str, NomError<&str>>(" \t\n\r")(remaining)
349        {
350            // Parse regular ASCII character (not whitespace)
351            bytes.push(ascii_char as u8);
352            remaining = new_remaining;
353        } else {
354            // Stop if we can't parse anything more
355            break;
356        }
357    }
358
359    if bytes.is_empty() {
360        Err(nom::Err::Error(NomError::new(
361            input,
362            nom::error::ErrorKind::Tag,
363        )))
364    } else {
365        Ok((remaining, bytes))
366    }
367}
368
369/// Parse a hex byte sequence without prefix (only if it looks like pure hex bytes)
370fn parse_hex_bytes_no_prefix(input: &str) -> IResult<&str, Vec<u8>> {
371    // Only parse as hex bytes if:
372    // 1. Input has even number of hex digits (pairs)
373    // 2. All characters are hex digits
374    // 3. Doesn't start with 0x (that's a number)
375    // 4. Contains at least one non-decimal digit (a-f, A-F)
376
377    if input.starts_with("0x") || input.starts_with('-') {
378        return Err(nom::Err::Error(NomError::new(
379            input,
380            nom::error::ErrorKind::Tag,
381        )));
382    }
383
384    let hex_chars: String = input.chars().take_while(char::is_ascii_hexdigit).collect();
385
386    if hex_chars.is_empty() || !hex_chars.len().is_multiple_of(2) {
387        return Err(nom::Err::Error(NomError::new(
388            input,
389            nom::error::ErrorKind::Tag,
390        )));
391    }
392
393    // Check if it contains non-decimal hex digits (a-f, A-F)
394    let has_hex_letters = hex_chars
395        .chars()
396        .any(|c| matches!(c, 'a'..='f' | 'A'..='F'));
397    if !has_hex_letters {
398        return Err(nom::Err::Error(NomError::new(
399            input,
400            nom::error::ErrorKind::Tag,
401        )));
402    }
403
404    // Parse pairs of hex digits
405    let mut bytes = Vec::with_capacity(hex_chars.len() / 2);
406    let mut chars = hex_chars.chars();
407    while let (Some(c1), Some(c2)) = (chars.next(), chars.next()) {
408        // Avoid format! allocation by parsing digits directly
409        let digit1 = c1
410            .to_digit(16)
411            .ok_or_else(|| nom::Err::Error(NomError::new(input, nom::error::ErrorKind::MapRes)))?;
412        let digit2 = c2
413            .to_digit(16)
414            .ok_or_else(|| nom::Err::Error(NomError::new(input, nom::error::ErrorKind::MapRes)))?;
415        let byte_val = u8::try_from((digit1 << 4) | digit2)
416            .map_err(|_| nom::Err::Error(NomError::new(input, nom::error::ErrorKind::MapRes)))?;
417        bytes.push(byte_val);
418    }
419
420    let remaining = &input[hex_chars.len()..];
421    Ok((remaining, bytes))
422}
423
424/// Parse a hex byte sequence (e.g., "\\x7f\\x45\\x4c\\x46", "7f454c46", or "\\x7fELF")
425fn parse_hex_bytes(input: &str) -> IResult<&str, Vec<u8>> {
426    alt((
427        parse_mixed_hex_ascii,
428        parse_hex_bytes_with_prefix,
429        parse_hex_bytes_no_prefix,
430    ))
431    .parse(input)
432}
433
434/// Parse escape sequences in strings
435fn parse_escape_sequence(input: &str) -> IResult<&str, char> {
436    let (input, _) = char('\\')(input)?;
437
438    // Try to parse octal escape sequence first (\377, \123, etc.)
439    if let Ok((remaining, octal_str)) = recognize(pair(
440        one_of::<&str, &str, NomError<&str>>("0123"),
441        pair(
442            one_of::<&str, &str, NomError<&str>>("01234567"),
443            one_of::<&str, &str, NomError<&str>>("01234567"),
444        ),
445    ))
446    .parse(input)
447        && let Ok(octal_value) = u8::from_str_radix(octal_str, 8)
448    {
449        return Ok((remaining, octal_value as char));
450    }
451
452    // Parse standard escape sequences
453    let (input, escaped_char) = one_of("nrt\\\"'0")(input)?;
454
455    let result_char = match escaped_char {
456        'n' => '\n',
457        'r' => '\r',
458        't' => '\t',
459        '\\' => '\\',
460        '"' => '"',
461        '\'' => '\'',
462        '0' => '\0',
463        _ => unreachable!("one_of constrains input to known escape characters"),
464    };
465
466    Ok((input, result_char))
467}
468
469/// Parse a quoted string with escape sequences
470fn parse_quoted_string(input: &str) -> IResult<&str, String> {
471    let (input, _) = multispace0(input)?;
472    let (input, _) = char('"')(input)?;
473
474    let mut result = String::new();
475    let mut remaining = input;
476
477    loop {
478        // Try to parse an escape sequence first
479        if let Ok((new_remaining, escaped_char)) = parse_escape_sequence(remaining) {
480            result.push(escaped_char);
481            remaining = new_remaining;
482            continue;
483        }
484
485        // If no escape sequence, try to parse a regular character (not quote or backslash)
486        if let Ok((new_remaining, regular_char)) =
487            none_of::<&str, &str, NomError<&str>>("\"\\")(remaining)
488        {
489            result.push(regular_char);
490            remaining = new_remaining;
491            continue;
492        }
493
494        // If neither worked, we should be at the closing quote
495        break;
496    }
497
498    let (remaining, _) = char('"')(remaining)?;
499    let (remaining, _) = multispace0(remaining)?;
500
501    Ok((remaining, result))
502}
503
504/// Parse a numeric value (integer)
505///
506/// Non-negative literals are parsed directly as `u64` so the full unsigned
507/// 64-bit range is representable (required for `uquad` values above `i64::MAX`).
508/// Negative literals go through the signed `i64` path.
509fn parse_numeric_value(input: &str) -> IResult<&str, Value> {
510    let (input, _) = multispace0(input)?;
511
512    let (input, value) = if input.starts_with('-') {
513        // Negative: parse as i64
514        let (input, number) = parse_number(input)?;
515        (input, Value::Int(number))
516    } else {
517        // Non-negative: parse as u64 to support full unsigned 64-bit range
518        let (input, number) = parse_unsigned_number(input)?;
519        (input, Value::Uint(number))
520    };
521
522    let (input, _) = multispace0(input)?;
523    Ok((input, value))
524}
525
526/// Parse string and numeric literals for magic rule values
527///
528/// Supports:
529/// - Quoted strings with escape sequences: "Hello\nWorld", "ELF\0"
530/// - Numeric literals (decimal): 123, -456
531/// - Numeric literals (hexadecimal): 0x1a2b, -0xFF
532/// - Hex byte sequences: \\x7f\\x45\\x4c\\x46 or 7f454c46
533///
534/// # Examples
535///
536/// ```
537/// use libmagic_rs::parser::grammar::parse_value;
538/// use libmagic_rs::parser::ast::Value;
539///
540/// // String values
541/// assert_eq!(parse_value("\"Hello\""), Ok(("", Value::String("Hello".to_string()))));
542/// assert_eq!(parse_value("\"Line1\\nLine2\""), Ok(("", Value::String("Line1\nLine2".to_string()))));
543///
544/// // Numeric values
545/// assert_eq!(parse_value("123"), Ok(("", Value::Uint(123))));
546/// assert_eq!(parse_value("-456"), Ok(("", Value::Int(-456))));
547/// assert_eq!(parse_value("0x1a"), Ok(("", Value::Uint(26))));
548/// assert_eq!(parse_value("-0xFF"), Ok(("", Value::Int(-255))));
549///
550/// // Hex byte sequences
551/// assert_eq!(parse_value("\\x7f\\x45"), Ok(("", Value::Bytes(vec![0x7f, 0x45]))));
552/// ```
553///
554/// # Errors
555///
556/// Returns a nom parsing error if:
557/// - Input is empty or contains no valid value
558/// - Quoted string is not properly terminated
559/// - Numeric value cannot be parsed as a valid integer
560/// - Hex byte sequence contains invalid hex digits
561/// - Input contains invalid characters for the detected value format
562pub fn parse_value(input: &str) -> IResult<&str, Value> {
563    let (input, _) = multispace0(input)?;
564
565    // Handle empty input case - should fail for magic rules
566    if input.is_empty() {
567        return Err(nom::Err::Error(NomError::new(
568            input,
569            nom::error::ErrorKind::Tag,
570        )));
571    }
572
573    // Try to parse different value types in order of specificity
574    let (input, value) = alt((
575        // Try quoted string first
576        map(parse_quoted_string, Value::String),
577        // Try hex byte sequence before numeric (to catch patterns like "7f", "ab", "\\x7fELF", etc.)
578        map(parse_hex_bytes, Value::Bytes),
579        // Try numeric value last (for pure numbers like 0x123, 1, etc.)
580        parse_numeric_value,
581    ))
582    .parse(input)?;
583
584    Ok((input, value))
585}
586
587#[cfg(test)]
588mod tests {
589    use super::*;
590
591    /// Helper function to test parsing with various whitespace patterns
592    #[allow(dead_code)] // TODO: Use this helper in future whitespace tests
593    fn test_with_whitespace_variants<T, F>(input: &str, expected: &T, parser: F)
594    where
595        T: Clone + PartialEq + std::fmt::Debug,
596        F: Fn(&str) -> IResult<&str, T>,
597    {
598        // Test with various whitespace patterns - pre-allocate Vec with known capacity
599        let mut whitespace_variants = Vec::with_capacity(9);
600        whitespace_variants.extend([
601            format!(" {input}"),    // Leading space
602            format!("  {input}"),   // Leading spaces
603            format!("\t{input}"),   // Leading tab
604            format!("{input} "),    // Trailing space
605            format!("{input}  "),   // Trailing spaces
606            format!("{input}\t"),   // Trailing tab
607            format!(" {input} "),   // Both leading and trailing space
608            format!("  {input}  "), // Both leading and trailing spaces
609            format!("\t{input}\t"), // Both leading and trailing tabs
610        ]);
611
612        for variant in whitespace_variants {
613            assert_eq!(
614                parser(&variant),
615                Ok(("", expected.clone())),
616                "Failed to parse with whitespace: '{variant}'"
617            );
618        }
619    }
620
621    /// Helper function to test number parsing with remaining input
622    fn test_number_with_remaining_input() {
623        // Pre-allocate with known capacity for better performance
624        let test_cases = [
625            ("123abc", 123, "abc"),
626            ("0xFF rest", 255, " rest"),
627            ("-42 more", -42, " more"),
628            ("0x10,next", 16, ",next"),
629        ];
630
631        for (input, expected_num, expected_remaining) in test_cases {
632            assert_eq!(
633                parse_number(input),
634                Ok((expected_remaining, expected_num)),
635                "Failed to parse number with remaining input: '{input}'"
636            );
637        }
638    }
639
640    #[test]
641    fn test_parse_decimal_number() {
642        assert_eq!(parse_decimal_number("123"), Ok(("", 123)));
643        assert_eq!(parse_decimal_number("0"), Ok(("", 0)));
644        assert_eq!(parse_decimal_number("999"), Ok(("", 999)));
645
646        // Should fail on non-digits
647        assert!(parse_decimal_number("abc").is_err());
648        assert!(parse_decimal_number("").is_err());
649    }
650
651    #[test]
652    fn test_parse_hex_number() {
653        assert_eq!(parse_hex_number("0x0"), Ok(("", 0)));
654        assert_eq!(parse_hex_number("0x10"), Ok(("", 16)));
655        assert_eq!(parse_hex_number("0xFF"), Ok(("", 255)));
656        assert_eq!(parse_hex_number("0xabc"), Ok(("", 2748)));
657        assert_eq!(parse_hex_number("0xABC"), Ok(("", 2748)));
658
659        // Should fail without 0x prefix
660        assert!(parse_hex_number("FF").is_err());
661        assert!(parse_hex_number("10").is_err());
662
663        // Should fail on invalid hex digits
664        assert!(parse_hex_number("0xGG").is_err());
665    }
666
667    #[test]
668    fn test_parse_number_positive() {
669        // Decimal numbers
670        assert_eq!(parse_number("0"), Ok(("", 0)));
671        assert_eq!(parse_number("123"), Ok(("", 123)));
672        assert_eq!(parse_number("999"), Ok(("", 999)));
673
674        // Hexadecimal numbers
675        assert_eq!(parse_number("0x0"), Ok(("", 0)));
676        assert_eq!(parse_number("0x10"), Ok(("", 16)));
677        assert_eq!(parse_number("0xFF"), Ok(("", 255)));
678        assert_eq!(parse_number("0xabc"), Ok(("", 2748)));
679    }
680
681    #[test]
682    fn test_parse_number_negative() {
683        // Negative decimal numbers
684        assert_eq!(parse_number("-1"), Ok(("", -1)));
685        assert_eq!(parse_number("-123"), Ok(("", -123)));
686        assert_eq!(parse_number("-999"), Ok(("", -999)));
687
688        // Negative hexadecimal numbers
689        assert_eq!(parse_number("-0x1"), Ok(("", -1)));
690        assert_eq!(parse_number("-0x10"), Ok(("", -16)));
691        assert_eq!(parse_number("-0xFF"), Ok(("", -255)));
692        assert_eq!(parse_number("-0xabc"), Ok(("", -2748)));
693    }
694
695    #[test]
696    fn test_parse_number_edge_cases() {
697        // Zero with different formats
698        assert_eq!(parse_number("0"), Ok(("", 0)));
699        assert_eq!(parse_number("-0"), Ok(("", 0)));
700        assert_eq!(parse_number("0x0"), Ok(("", 0)));
701        assert_eq!(parse_number("-0x0"), Ok(("", 0)));
702
703        // Large numbers
704        assert_eq!(parse_number("2147483647"), Ok(("", 2_147_483_647))); // i32::MAX
705        assert_eq!(parse_number("-2147483648"), Ok(("", -2_147_483_648))); // i32::MIN
706        assert_eq!(parse_number("0x7FFFFFFF"), Ok(("", 2_147_483_647))); // i32::MAX in hex
707
708        // Should fail on invalid input
709        assert!(parse_number("").is_err());
710        assert!(parse_number("abc").is_err());
711        assert!(parse_number("0xGG").is_err());
712        assert!(parse_number("--123").is_err());
713    }
714
715    #[test]
716    fn test_parse_number_with_remaining_input() {
717        // Use helper function to reduce code duplication
718        test_number_with_remaining_input();
719    }
720
721    #[test]
722    fn test_parse_offset_absolute_positive() {
723        assert_eq!(parse_offset("0"), Ok(("", OffsetSpec::Absolute(0))));
724        assert_eq!(parse_offset("123"), Ok(("", OffsetSpec::Absolute(123))));
725        assert_eq!(parse_offset("999"), Ok(("", OffsetSpec::Absolute(999))));
726
727        // Hexadecimal offsets
728        assert_eq!(parse_offset("0x0"), Ok(("", OffsetSpec::Absolute(0))));
729        assert_eq!(parse_offset("0x10"), Ok(("", OffsetSpec::Absolute(16))));
730        assert_eq!(parse_offset("0xFF"), Ok(("", OffsetSpec::Absolute(255))));
731        assert_eq!(parse_offset("0xabc"), Ok(("", OffsetSpec::Absolute(2748))));
732    }
733
734    #[test]
735    fn test_parse_offset_absolute_negative() {
736        assert_eq!(parse_offset("-1"), Ok(("", OffsetSpec::Absolute(-1))));
737        assert_eq!(parse_offset("-123"), Ok(("", OffsetSpec::Absolute(-123))));
738        assert_eq!(parse_offset("-999"), Ok(("", OffsetSpec::Absolute(-999))));
739
740        // Negative hexadecimal offsets
741        assert_eq!(parse_offset("-0x1"), Ok(("", OffsetSpec::Absolute(-1))));
742        assert_eq!(parse_offset("-0x10"), Ok(("", OffsetSpec::Absolute(-16))));
743        assert_eq!(parse_offset("-0xFF"), Ok(("", OffsetSpec::Absolute(-255))));
744        assert_eq!(
745            parse_offset("-0xabc"),
746            Ok(("", OffsetSpec::Absolute(-2748)))
747        );
748    }
749
750    #[test]
751    fn test_parse_offset_with_whitespace() {
752        // Leading whitespace
753        assert_eq!(parse_offset(" 123"), Ok(("", OffsetSpec::Absolute(123))));
754        assert_eq!(parse_offset("  0x10"), Ok(("", OffsetSpec::Absolute(16))));
755        assert_eq!(parse_offset("\t-42"), Ok(("", OffsetSpec::Absolute(-42))));
756
757        // Trailing whitespace
758        assert_eq!(parse_offset("123 "), Ok(("", OffsetSpec::Absolute(123))));
759        assert_eq!(parse_offset("0x10  "), Ok(("", OffsetSpec::Absolute(16))));
760        assert_eq!(parse_offset("-42\t"), Ok(("", OffsetSpec::Absolute(-42))));
761
762        // Both leading and trailing whitespace
763        assert_eq!(parse_offset(" 123 "), Ok(("", OffsetSpec::Absolute(123))));
764        assert_eq!(parse_offset("  0x10  "), Ok(("", OffsetSpec::Absolute(16))));
765        assert_eq!(parse_offset("\t-42\t"), Ok(("", OffsetSpec::Absolute(-42))));
766    }
767
768    #[test]
769    fn test_parse_offset_with_remaining_input() {
770        // Should parse offset and leave remaining input
771        assert_eq!(
772            parse_offset("123 byte"),
773            Ok(("byte", OffsetSpec::Absolute(123)))
774        );
775        assert_eq!(parse_offset("0xFF ="), Ok(("=", OffsetSpec::Absolute(255))));
776        assert_eq!(
777            parse_offset("-42,next"),
778            Ok((",next", OffsetSpec::Absolute(-42)))
779        );
780        assert_eq!(
781            parse_offset("0x10\tlong"),
782            Ok(("long", OffsetSpec::Absolute(16)))
783        );
784    }
785
786    #[test]
787    fn test_parse_offset_edge_cases() {
788        // Zero with different formats
789        assert_eq!(parse_offset("0"), Ok(("", OffsetSpec::Absolute(0))));
790        assert_eq!(parse_offset("-0"), Ok(("", OffsetSpec::Absolute(0))));
791        assert_eq!(parse_offset("0x0"), Ok(("", OffsetSpec::Absolute(0))));
792        assert_eq!(parse_offset("-0x0"), Ok(("", OffsetSpec::Absolute(0))));
793
794        // Large offsets
795        assert_eq!(
796            parse_offset("2147483647"),
797            Ok(("", OffsetSpec::Absolute(2_147_483_647)))
798        );
799        assert_eq!(
800            parse_offset("-2147483648"),
801            Ok(("", OffsetSpec::Absolute(-2_147_483_648)))
802        );
803        assert_eq!(
804            parse_offset("0x7FFFFFFF"),
805            Ok(("", OffsetSpec::Absolute(2_147_483_647)))
806        );
807
808        // Should fail on invalid input
809        assert!(parse_offset("").is_err());
810        assert!(parse_offset("abc").is_err());
811        assert!(parse_offset("0xGG").is_err());
812        assert!(parse_offset("--123").is_err());
813    }
814
815    #[test]
816    fn test_parse_offset_common_magic_file_values() {
817        // Common offsets found in magic files
818        assert_eq!(parse_offset("0"), Ok(("", OffsetSpec::Absolute(0)))); // File start
819        assert_eq!(parse_offset("4"), Ok(("", OffsetSpec::Absolute(4)))); // After magic number
820        assert_eq!(parse_offset("16"), Ok(("", OffsetSpec::Absolute(16)))); // Common header offset
821        assert_eq!(parse_offset("0x10"), Ok(("", OffsetSpec::Absolute(16)))); // Same as above in hex
822        assert_eq!(parse_offset("512"), Ok(("", OffsetSpec::Absolute(512)))); // Sector boundary
823        assert_eq!(parse_offset("0x200"), Ok(("", OffsetSpec::Absolute(512)))); // Same in hex
824
825        // Negative offsets (from end of file)
826        assert_eq!(parse_offset("-4"), Ok(("", OffsetSpec::Absolute(-4)))); // 4 bytes from end
827        assert_eq!(parse_offset("-16"), Ok(("", OffsetSpec::Absolute(-16)))); // 16 bytes from end
828        assert_eq!(parse_offset("-0x10"), Ok(("", OffsetSpec::Absolute(-16)))); // Same in hex
829    }
830
831    #[test]
832    fn test_parse_offset_boundary_values() {
833        // Test boundary values that might cause issues
834        assert_eq!(parse_offset("1"), Ok(("", OffsetSpec::Absolute(1))));
835        assert_eq!(parse_offset("-1"), Ok(("", OffsetSpec::Absolute(-1))));
836
837        // Powers of 2 (common in binary formats)
838        assert_eq!(parse_offset("256"), Ok(("", OffsetSpec::Absolute(256))));
839        assert_eq!(parse_offset("0x100"), Ok(("", OffsetSpec::Absolute(256))));
840        assert_eq!(parse_offset("1024"), Ok(("", OffsetSpec::Absolute(1024))));
841        assert_eq!(parse_offset("0x400"), Ok(("", OffsetSpec::Absolute(1024))));
842
843        // Large but reasonable file offsets
844        assert_eq!(
845            parse_offset("1048576"),
846            Ok(("", OffsetSpec::Absolute(1_048_576)))
847        ); // 1MB
848        assert_eq!(
849            parse_offset("0x100000"),
850            Ok(("", OffsetSpec::Absolute(1_048_576)))
851        );
852    }
853
854    // Operator parsing tests
855    #[test]
856    fn test_parse_operator_equality() {
857        // Single equals sign
858        assert_eq!(parse_operator("="), Ok(("", Operator::Equal)));
859
860        // Double equals sign
861        assert_eq!(parse_operator("=="), Ok(("", Operator::Equal)));
862
863        // With whitespace
864        assert_eq!(parse_operator(" = "), Ok(("", Operator::Equal)));
865        assert_eq!(parse_operator("  ==  "), Ok(("", Operator::Equal)));
866        assert_eq!(parse_operator("\t=\t"), Ok(("", Operator::Equal)));
867    }
868
869    #[test]
870    fn test_parse_operator_inequality() {
871        // Not equals
872        assert_eq!(parse_operator("!="), Ok(("", Operator::NotEqual)));
873
874        // Alternative not equals syntax
875        assert_eq!(parse_operator("<>"), Ok(("", Operator::NotEqual)));
876
877        // With whitespace
878        assert_eq!(parse_operator(" != "), Ok(("", Operator::NotEqual)));
879        assert_eq!(parse_operator("  <>  "), Ok(("", Operator::NotEqual)));
880        assert_eq!(parse_operator("\t!=\t"), Ok(("", Operator::NotEqual)));
881    }
882
883    #[test]
884    fn test_parse_operator_bitwise_and() {
885        // Bitwise AND
886        assert_eq!(parse_operator("&"), Ok(("", Operator::BitwiseAnd)));
887
888        // With whitespace
889        assert_eq!(parse_operator(" & "), Ok(("", Operator::BitwiseAnd)));
890        assert_eq!(parse_operator("  &  "), Ok(("", Operator::BitwiseAnd)));
891        assert_eq!(parse_operator("\t&\t"), Ok(("", Operator::BitwiseAnd)));
892    }
893
894    #[test]
895    fn test_parse_operator_with_remaining_input() {
896        // Should parse operator and leave remaining input
897        assert_eq!(parse_operator("= 123"), Ok(("123", Operator::Equal)));
898        assert_eq!(
899            parse_operator("!= value"),
900            Ok(("value", Operator::NotEqual))
901        );
902        assert_eq!(parse_operator("& 0xFF"), Ok(("0xFF", Operator::BitwiseAnd)));
903        assert_eq!(
904            parse_operator("== \"string\""),
905            Ok(("\"string\"", Operator::Equal))
906        );
907        assert_eq!(parse_operator("<> test"), Ok(("test", Operator::NotEqual)));
908    }
909
910    #[test]
911    fn test_parse_operator_precedence() {
912        // Test that longer operators are matched first
913        // This ensures "==" is parsed as Equal, not "=" followed by "="
914        assert_eq!(parse_operator("=="), Ok(("", Operator::Equal)));
915        assert_eq!(parse_operator("== extra"), Ok(("extra", Operator::Equal)));
916
917        // Test that "!=" is parsed correctly, not as "!" followed by "="
918        assert_eq!(parse_operator("!="), Ok(("", Operator::NotEqual)));
919        assert_eq!(
920            parse_operator("!= extra"),
921            Ok(("extra", Operator::NotEqual))
922        );
923
924        // Test that "<>" is parsed correctly, not as "<" followed by ">"
925        assert_eq!(parse_operator("<>"), Ok(("", Operator::NotEqual)));
926        assert_eq!(
927            parse_operator("<> extra"),
928            Ok(("extra", Operator::NotEqual))
929        );
930
931        // Test that "<=" is parsed as LessEqual, not "<" followed by "="
932        assert_eq!(parse_operator("<="), Ok(("", Operator::LessEqual)));
933        assert_eq!(
934            parse_operator("<= extra"),
935            Ok(("extra", Operator::LessEqual))
936        );
937
938        // Test that ">=" is parsed as GreaterEqual, not ">" followed by "="
939        assert_eq!(parse_operator(">="), Ok(("", Operator::GreaterEqual)));
940        assert_eq!(
941            parse_operator(">= extra"),
942            Ok(("extra", Operator::GreaterEqual))
943        );
944    }
945
946    #[test]
947    fn test_parse_operator_invalid_input() {
948        // Should fail on invalid operators
949        assert!(parse_operator("").is_err());
950        assert!(parse_operator("abc").is_err());
951        assert!(parse_operator("123").is_err());
952        assert!(parse_operator("!").is_err());
953        assert!(parse_operator("===").is_err()); // Too many equals
954        assert!(parse_operator("&&").is_err()); // Double ampersand not supported
955    }
956
957    #[test]
958    fn test_parse_operator_edge_cases() {
959        // Test operators at start of various contexts - multispace0 consumes all whitespace
960        assert_eq!(parse_operator("=\n"), Ok(("", Operator::Equal)));
961        assert_eq!(parse_operator("!=\r\n"), Ok(("", Operator::NotEqual)));
962        assert_eq!(parse_operator("&\t\t"), Ok(("", Operator::BitwiseAnd)));
963
964        // Test with mixed whitespace
965        assert_eq!(parse_operator(" \t = \t "), Ok(("", Operator::Equal)));
966        assert_eq!(parse_operator("\t != \t"), Ok(("", Operator::NotEqual)));
967        assert_eq!(parse_operator(" \t& \t "), Ok(("", Operator::BitwiseAnd)));
968    }
969
970    #[test]
971    fn test_parse_operator_common_magic_file_patterns() {
972        // Test patterns commonly found in magic files
973        assert_eq!(
974            parse_operator("= 0x7f454c46"),
975            Ok(("0x7f454c46", Operator::Equal))
976        );
977        assert_eq!(parse_operator("!= 0"), Ok(("0", Operator::NotEqual)));
978        assert_eq!(
979            parse_operator("& 0xFF00"),
980            Ok(("0xFF00", Operator::BitwiseAnd))
981        );
982        assert_eq!(
983            parse_operator("== \"ELF\""),
984            Ok(("\"ELF\"", Operator::Equal))
985        );
986        assert_eq!(parse_operator("<> \"\""), Ok(("\"\"", Operator::NotEqual)));
987
988        // Test with various spacing patterns found in real magic files
989        assert_eq!(
990            parse_operator("=\t0x504b0304"),
991            Ok(("0x504b0304", Operator::Equal))
992        );
993        assert_eq!(parse_operator("!=  0"), Ok(("0", Operator::NotEqual)));
994        assert_eq!(
995            parse_operator("&   0xFFFF"),
996            Ok(("0xFFFF", Operator::BitwiseAnd))
997        );
998    }
999
1000    #[test]
1001    fn test_parse_operator_all_variants() {
1002        // Ensure all operator variants are tested
1003        let test_cases = [
1004            ("=", Operator::Equal),
1005            ("==", Operator::Equal),
1006            ("!=", Operator::NotEqual),
1007            ("<>", Operator::NotEqual),
1008            ("<", Operator::LessThan),
1009            (">", Operator::GreaterThan),
1010            ("<=", Operator::LessEqual),
1011            (">=", Operator::GreaterEqual),
1012            ("&", Operator::BitwiseAnd),
1013        ];
1014
1015        for (input, expected) in test_cases {
1016            assert_eq!(
1017                parse_operator(input),
1018                Ok(("", expected)),
1019                "Failed to parse operator: '{input}'"
1020            );
1021        }
1022    }
1023
1024    #[test]
1025    fn test_parse_operator_less_than() {
1026        // Basic less-than
1027        assert_eq!(parse_operator("<"), Ok(("", Operator::LessThan)));
1028
1029        // With whitespace
1030        assert_eq!(parse_operator(" < "), Ok(("", Operator::LessThan)));
1031        assert_eq!(parse_operator("  <  "), Ok(("", Operator::LessThan)));
1032        assert_eq!(parse_operator("\t<\t"), Ok(("", Operator::LessThan)));
1033
1034        // With remaining input
1035        assert_eq!(parse_operator("< 42"), Ok(("42", Operator::LessThan)));
1036    }
1037
1038    #[test]
1039    fn test_parse_operator_greater_than() {
1040        // Basic greater-than
1041        assert_eq!(parse_operator(">"), Ok(("", Operator::GreaterThan)));
1042
1043        // With whitespace
1044        assert_eq!(parse_operator(" > "), Ok(("", Operator::GreaterThan)));
1045        assert_eq!(parse_operator("  >  "), Ok(("", Operator::GreaterThan)));
1046        assert_eq!(parse_operator("\t>\t"), Ok(("", Operator::GreaterThan)));
1047
1048        // With remaining input
1049        assert_eq!(parse_operator("> 42"), Ok(("42", Operator::GreaterThan)));
1050    }
1051
1052    #[test]
1053    fn test_parse_operator_less_equal() {
1054        // Basic less-or-equal
1055        assert_eq!(parse_operator("<="), Ok(("", Operator::LessEqual)));
1056
1057        // With whitespace
1058        assert_eq!(parse_operator(" <= "), Ok(("", Operator::LessEqual)));
1059        assert_eq!(parse_operator("  <=  "), Ok(("", Operator::LessEqual)));
1060        assert_eq!(parse_operator("\t<=\t"), Ok(("", Operator::LessEqual)));
1061
1062        // With remaining input
1063        assert_eq!(parse_operator("<= 42"), Ok(("42", Operator::LessEqual)));
1064    }
1065
1066    #[test]
1067    fn test_parse_operator_greater_equal() {
1068        // Basic greater-or-equal
1069        assert_eq!(parse_operator(">="), Ok(("", Operator::GreaterEqual)));
1070
1071        // With whitespace
1072        assert_eq!(parse_operator(" >= "), Ok(("", Operator::GreaterEqual)));
1073        assert_eq!(parse_operator("  >=  "), Ok(("", Operator::GreaterEqual)));
1074        assert_eq!(parse_operator("\t>=\t"), Ok(("", Operator::GreaterEqual)));
1075
1076        // With remaining input
1077        assert_eq!(parse_operator(">= 42"), Ok(("42", Operator::GreaterEqual)));
1078    }
1079
1080    #[test]
1081    fn test_parse_operator_comparison_disambiguation() {
1082        // <> still parses as NotEqual
1083        assert_eq!(parse_operator("<>"), Ok(("", Operator::NotEqual)));
1084
1085        // <= parses as LessEqual, not LessThan with "=" remaining
1086        assert_eq!(parse_operator("<="), Ok(("", Operator::LessEqual)));
1087
1088        // >= parses as GreaterEqual, not GreaterThan with "=" remaining
1089        assert_eq!(parse_operator(">="), Ok(("", Operator::GreaterEqual)));
1090
1091        // "< >" (with space) parses as LessThan with "> " remaining
1092        assert_eq!(parse_operator("< >"), Ok((">", Operator::LessThan)));
1093
1094        // "> =" (with space) parses as GreaterThan with "= " remaining
1095        assert_eq!(parse_operator("> ="), Ok(("=", Operator::GreaterThan)));
1096    }
1097
1098    // Value parsing tests
1099    #[test]
1100    fn test_parse_hex_bytes_with_backslash_x() {
1101        // Single hex byte with \x prefix
1102        assert_eq!(parse_hex_bytes("\\x7f"), Ok(("", vec![0x7f])));
1103        assert_eq!(parse_hex_bytes("\\x45"), Ok(("", vec![0x45])));
1104        assert_eq!(parse_hex_bytes("\\x00"), Ok(("", vec![0x00])));
1105        assert_eq!(parse_hex_bytes("\\xFF"), Ok(("", vec![0xFF])));
1106
1107        // Multiple hex bytes with \x prefix
1108        assert_eq!(
1109            parse_hex_bytes("\\x7f\\x45\\x4c\\x46"),
1110            Ok(("", vec![0x7f, 0x45, 0x4c, 0x46]))
1111        );
1112        assert_eq!(
1113            parse_hex_bytes("\\x50\\x4b\\x03\\x04"),
1114            Ok(("", vec![0x50, 0x4b, 0x03, 0x04]))
1115        );
1116    }
1117
1118    #[test]
1119    fn test_parse_hex_bytes_without_prefix() {
1120        // Single hex byte without prefix (only works if it contains hex letters)
1121        assert_eq!(parse_hex_bytes("7f"), Ok(("", vec![0x7f])));
1122        assert_eq!(
1123            parse_hex_bytes("45"),
1124            Err(nom::Err::Error(NomError::new(
1125                "45",
1126                nom::error::ErrorKind::Tag
1127            )))
1128        ); // No hex letters
1129        assert_eq!(parse_hex_bytes("ab"), Ok(("", vec![0xab])));
1130        assert_eq!(parse_hex_bytes("FF"), Ok(("", vec![0xFF])));
1131
1132        // Multiple hex bytes without prefix
1133        assert_eq!(
1134            parse_hex_bytes("7f454c46"),
1135            Ok(("", vec![0x7f, 0x45, 0x4c, 0x46]))
1136        );
1137        assert_eq!(
1138            parse_hex_bytes("504b0304"),
1139            Ok(("", vec![0x50, 0x4b, 0x03, 0x04]))
1140        );
1141    }
1142
1143    #[test]
1144    fn test_parse_hex_bytes_mixed_case() {
1145        // Test mixed case hex digits
1146        assert_eq!(parse_hex_bytes("aB"), Ok(("", vec![0xab])));
1147        assert_eq!(parse_hex_bytes("Cd"), Ok(("", vec![0xcd])));
1148        assert_eq!(parse_hex_bytes("\\xEf"), Ok(("", vec![0xef])));
1149        assert_eq!(parse_hex_bytes("\\x1A"), Ok(("", vec![0x1a])));
1150    }
1151
1152    #[test]
1153    fn test_parse_hex_bytes_empty() {
1154        // Empty input should return error (no hex bytes to parse)
1155        assert_eq!(
1156            parse_hex_bytes(""),
1157            Err(nom::Err::Error(NomError::new(
1158                "",
1159                nom::error::ErrorKind::Tag
1160            )))
1161        );
1162    }
1163
1164    #[test]
1165    fn test_parse_hex_bytes_with_remaining_input() {
1166        // Should parse hex bytes and leave remaining input
1167        assert_eq!(
1168            parse_hex_bytes("7f45 rest"),
1169            Ok((" rest", vec![0x7f, 0x45]))
1170        );
1171        assert_eq!(
1172            parse_hex_bytes("\\x50\\x4b next"),
1173            Ok((" next", vec![0x50, 0x4b]))
1174        );
1175        assert_eq!(parse_hex_bytes("ab\""), Ok(("\"", vec![0xab])));
1176    }
1177
1178    #[test]
1179    fn test_parse_escape_sequence() {
1180        // Standard escape sequences
1181        assert_eq!(parse_escape_sequence("\\n"), Ok(("", '\n')));
1182        assert_eq!(parse_escape_sequence("\\r"), Ok(("", '\r')));
1183        assert_eq!(parse_escape_sequence("\\t"), Ok(("", '\t')));
1184        assert_eq!(parse_escape_sequence("\\\\"), Ok(("", '\\')));
1185        assert_eq!(parse_escape_sequence("\\\""), Ok(("", '"')));
1186        assert_eq!(parse_escape_sequence("\\'"), Ok(("", '\'')));
1187        assert_eq!(parse_escape_sequence("\\0"), Ok(("", '\0')));
1188    }
1189
1190    #[test]
1191    fn test_parse_escape_sequence_with_remaining() {
1192        // Should parse escape and leave remaining input
1193        assert_eq!(parse_escape_sequence("\\n rest"), Ok((" rest", '\n')));
1194        assert_eq!(parse_escape_sequence("\\t\""), Ok(("\"", '\t')));
1195    }
1196
1197    #[test]
1198    fn test_parse_escape_sequence_invalid() {
1199        // Should fail on invalid escape sequences
1200        assert!(parse_escape_sequence("n").is_err()); // Missing backslash
1201        assert!(parse_escape_sequence("\\").is_err()); // Incomplete escape
1202        assert!(parse_escape_sequence("").is_err()); // Empty input
1203    }
1204
1205    #[test]
1206    fn test_parse_quoted_string_simple() {
1207        // Simple quoted strings
1208        assert_eq!(
1209            parse_quoted_string("\"hello\""),
1210            Ok(("", "hello".to_string()))
1211        );
1212        assert_eq!(
1213            parse_quoted_string("\"world\""),
1214            Ok(("", "world".to_string()))
1215        );
1216        assert_eq!(parse_quoted_string("\"\""), Ok(("", String::new())));
1217    }
1218
1219    #[test]
1220    fn test_parse_quoted_string_with_escapes() {
1221        // Strings with escape sequences
1222        assert_eq!(
1223            parse_quoted_string("\"Hello\\nWorld\""),
1224            Ok(("", "Hello\nWorld".to_string()))
1225        );
1226        assert_eq!(
1227            parse_quoted_string("\"Tab\\tSeparated\""),
1228            Ok(("", "Tab\tSeparated".to_string()))
1229        );
1230        assert_eq!(
1231            parse_quoted_string("\"Quote: \\\"text\\\"\""),
1232            Ok(("", "Quote: \"text\"".to_string()))
1233        );
1234        assert_eq!(
1235            parse_quoted_string("\"Backslash: \\\\\""),
1236            Ok(("", "Backslash: \\".to_string()))
1237        );
1238        assert_eq!(
1239            parse_quoted_string("\"Null\\0terminated\""),
1240            Ok(("", "Null\0terminated".to_string()))
1241        );
1242    }
1243
1244    #[test]
1245    fn test_parse_quoted_string_with_whitespace() {
1246        // Strings with leading/trailing whitespace
1247        assert_eq!(
1248            parse_quoted_string(" \"hello\" "),
1249            Ok(("", "hello".to_string()))
1250        );
1251        assert_eq!(
1252            parse_quoted_string("\t\"world\"\t"),
1253            Ok(("", "world".to_string()))
1254        );
1255        assert_eq!(
1256            parse_quoted_string("  \"test\"  "),
1257            Ok(("", "test".to_string()))
1258        );
1259    }
1260
1261    #[test]
1262    fn test_parse_quoted_string_with_remaining_input() {
1263        // Should parse string and leave remaining input
1264        assert_eq!(
1265            parse_quoted_string("\"hello\" world"),
1266            Ok(("world", "hello".to_string()))
1267        );
1268        assert_eq!(
1269            parse_quoted_string("\"test\" = 123"),
1270            Ok(("= 123", "test".to_string()))
1271        );
1272    }
1273
1274    #[test]
1275    fn test_parse_quoted_string_invalid() {
1276        // Should fail on invalid quoted strings
1277        assert!(parse_quoted_string("hello").is_err()); // No quotes
1278        assert!(parse_quoted_string("\"hello").is_err()); // Missing closing quote
1279        assert!(parse_quoted_string("hello\"").is_err()); // Missing opening quote
1280        assert!(parse_quoted_string("").is_err()); // Empty input
1281    }
1282
1283    #[test]
1284    fn test_parse_numeric_value_positive() {
1285        // Positive integers
1286        assert_eq!(parse_numeric_value("0"), Ok(("", Value::Uint(0))));
1287        assert_eq!(parse_numeric_value("123"), Ok(("", Value::Uint(123))));
1288        assert_eq!(parse_numeric_value("999"), Ok(("", Value::Uint(999))));
1289
1290        // Positive hex values
1291        assert_eq!(parse_numeric_value("0x0"), Ok(("", Value::Uint(0))));
1292        assert_eq!(parse_numeric_value("0x10"), Ok(("", Value::Uint(16))));
1293        assert_eq!(parse_numeric_value("0xFF"), Ok(("", Value::Uint(255))));
1294        assert_eq!(parse_numeric_value("0xabc"), Ok(("", Value::Uint(2748))));
1295    }
1296
1297    #[test]
1298    fn test_parse_numeric_value_negative() {
1299        // Negative integers
1300        assert_eq!(parse_numeric_value("-1"), Ok(("", Value::Int(-1))));
1301        assert_eq!(parse_numeric_value("-123"), Ok(("", Value::Int(-123))));
1302        assert_eq!(parse_numeric_value("-999"), Ok(("", Value::Int(-999))));
1303
1304        // Negative hex values
1305        assert_eq!(parse_numeric_value("-0x1"), Ok(("", Value::Int(-1))));
1306        assert_eq!(parse_numeric_value("-0x10"), Ok(("", Value::Int(-16))));
1307        assert_eq!(parse_numeric_value("-0xFF"), Ok(("", Value::Int(-255))));
1308        assert_eq!(parse_numeric_value("-0xabc"), Ok(("", Value::Int(-2748))));
1309    }
1310
1311    #[test]
1312    fn test_parse_numeric_value_with_whitespace() {
1313        // With leading/trailing whitespace
1314        assert_eq!(parse_numeric_value(" 123 "), Ok(("", Value::Uint(123))));
1315        assert_eq!(parse_numeric_value("\t-456\t"), Ok(("", Value::Int(-456))));
1316        assert_eq!(parse_numeric_value("  0xFF  "), Ok(("", Value::Uint(255))));
1317    }
1318
1319    #[test]
1320    fn test_parse_numeric_value_with_remaining_input() {
1321        // Should parse number and leave remaining input (numeric parser consumes trailing whitespace)
1322        assert_eq!(
1323            parse_numeric_value("123 rest"),
1324            Ok(("rest", Value::Uint(123)))
1325        );
1326        assert_eq!(
1327            parse_numeric_value("-456 more"),
1328            Ok(("more", Value::Int(-456)))
1329        );
1330        assert_eq!(parse_numeric_value("0xFF)"), Ok((")", Value::Uint(255))));
1331    }
1332
1333    #[test]
1334    fn test_parse_numeric_value_large_unsigned_quad() {
1335        // Full u64 range -- values above i64::MAX required for uquad
1336        let test_cases = [
1337            // u64::MAX in hex
1338            ("0xffffffffffffffff", Value::Uint(u64::MAX)),
1339            // u64::MAX in decimal
1340            ("18446744073709551615", Value::Uint(u64::MAX)),
1341            // Exactly i64::MAX + 1 (first value that overflows i64)
1342            ("0x8000000000000000", Value::Uint(0x8000_0000_0000_0000)),
1343            // i64::MAX + 1 in decimal
1344            (
1345                "9223372036854775808",
1346                Value::Uint(9_223_372_036_854_775_808),
1347            ),
1348            // i64::MAX still works as Uint
1349            ("0x7fffffffffffffff", Value::Uint(i64::MAX as u64)),
1350            ("9223372036854775807", Value::Uint(i64::MAX as u64)),
1351            // Common magic constant patterns
1352            ("0xDEADBEEFDEADBEEF", Value::Uint(0xDEAD_BEEF_DEAD_BEEF)),
1353            ("0xCAFEBABECAFEBABE", Value::Uint(0xCAFE_BABE_CAFE_BABE)),
1354        ];
1355
1356        for (input, expected) in test_cases {
1357            assert_eq!(
1358                parse_numeric_value(input),
1359                Ok(("", expected)),
1360                "Failed to parse large unsigned quad literal: '{input}'"
1361            );
1362        }
1363    }
1364
1365    #[test]
1366    fn test_parse_value_string_literals() {
1367        // String value parsing
1368        assert_eq!(
1369            parse_value("\"hello\""),
1370            Ok(("", Value::String("hello".to_string())))
1371        );
1372        assert_eq!(
1373            parse_value("\"ELF\""),
1374            Ok(("", Value::String("ELF".to_string())))
1375        );
1376        assert_eq!(parse_value("\"\""), Ok(("", Value::String(String::new()))));
1377
1378        // String with escape sequences
1379        assert_eq!(
1380            parse_value("\"Line1\\nLine2\""),
1381            Ok(("", Value::String("Line1\nLine2".to_string())))
1382        );
1383        assert_eq!(
1384            parse_value("\"Tab\\tSeparated\""),
1385            Ok(("", Value::String("Tab\tSeparated".to_string())))
1386        );
1387        assert_eq!(
1388            parse_value("\"Null\\0Term\""),
1389            Ok(("", Value::String("Null\0Term".to_string())))
1390        );
1391    }
1392
1393    #[test]
1394    fn test_parse_value_numeric_literals() {
1395        // Positive integers
1396        assert_eq!(parse_value("0"), Ok(("", Value::Uint(0))));
1397        assert_eq!(parse_value("123"), Ok(("", Value::Uint(123))));
1398        assert_eq!(parse_value("999"), Ok(("", Value::Uint(999))));
1399
1400        // Negative integers
1401        assert_eq!(parse_value("-1"), Ok(("", Value::Int(-1))));
1402        assert_eq!(parse_value("-123"), Ok(("", Value::Int(-123))));
1403        assert_eq!(parse_value("-999"), Ok(("", Value::Int(-999))));
1404
1405        // Hexadecimal values
1406        assert_eq!(parse_value("0x0"), Ok(("", Value::Uint(0))));
1407        assert_eq!(parse_value("0x10"), Ok(("", Value::Uint(16))));
1408        assert_eq!(parse_value("0xFF"), Ok(("", Value::Uint(255))));
1409        assert_eq!(parse_value("-0xFF"), Ok(("", Value::Int(-255))));
1410    }
1411
1412    #[test]
1413    fn test_parse_value_hex_byte_sequences() {
1414        // Hex bytes with \x prefix
1415        assert_eq!(parse_value("\\x7f"), Ok(("", Value::Bytes(vec![0x7f]))));
1416        assert_eq!(
1417            parse_value("\\x7f\\x45\\x4c\\x46"),
1418            Ok(("", Value::Bytes(vec![0x7f, 0x45, 0x4c, 0x46])))
1419        );
1420
1421        // Hex bytes without prefix
1422        assert_eq!(parse_value("7f"), Ok(("", Value::Bytes(vec![0x7f]))));
1423        assert_eq!(
1424            parse_value("7f454c46"),
1425            Ok(("", Value::Bytes(vec![0x7f, 0x45, 0x4c, 0x46])))
1426        );
1427
1428        // Mixed case
1429        assert_eq!(parse_value("aB"), Ok(("", Value::Bytes(vec![0xab]))));
1430        assert_eq!(parse_value("\\xCd"), Ok(("", Value::Bytes(vec![0xcd]))));
1431    }
1432
1433    #[test]
1434    fn test_parse_value_with_whitespace() {
1435        // All value types with whitespace - trailing whitespace is consumed by individual parsers
1436        assert_eq!(
1437            parse_value(" \"hello\" "),
1438            Ok(("", Value::String("hello".to_string())))
1439        );
1440        assert_eq!(parse_value("  123  "), Ok(("", Value::Uint(123))));
1441        assert_eq!(parse_value("\t-456\t"), Ok(("", Value::Int(-456))));
1442        // Hex bytes don't consume trailing whitespace by themselves
1443        assert_eq!(
1444            parse_value("  \\x7f\\x45  "),
1445            Ok(("  ", Value::Bytes(vec![0x7f, 0x45])))
1446        );
1447    }
1448
1449    #[test]
1450    fn test_parse_value_with_remaining_input() {
1451        // Should parse value and leave remaining input
1452        // Note: Individual parsers handle whitespace differently
1453        assert_eq!(
1454            parse_value("\"hello\" world"),
1455            Ok(("world", Value::String("hello".to_string())))
1456        );
1457        assert_eq!(
1458            parse_value("123 rest"),
1459            Ok(("rest", Value::Uint(123))) // Numeric parser consumes trailing space
1460        );
1461        assert_eq!(
1462            parse_value("-456 more"),
1463            Ok(("more", Value::Int(-456))) // Numeric parser consumes trailing space
1464        );
1465        assert_eq!(
1466            parse_value("\\x7f\\x45 next"),
1467            Ok((" next", Value::Bytes(vec![0x7f, 0x45]))) // Hex bytes don't consume trailing space
1468        );
1469    }
1470
1471    #[test]
1472    fn test_parse_value_edge_cases() {
1473        // Zero values in different formats
1474        assert_eq!(parse_value("0"), Ok(("", Value::Uint(0))));
1475        assert_eq!(parse_value("-0"), Ok(("", Value::Int(0))));
1476        assert_eq!(parse_value("0x0"), Ok(("", Value::Uint(0))));
1477        assert_eq!(parse_value("-0x0"), Ok(("", Value::Int(0))));
1478
1479        // Large values
1480        assert_eq!(
1481            parse_value("2147483647"),
1482            Ok(("", Value::Uint(2_147_483_647)))
1483        );
1484        assert_eq!(
1485            parse_value("-2147483648"),
1486            Ok(("", Value::Int(-2_147_483_648)))
1487        );
1488        assert_eq!(
1489            parse_value("0x7FFFFFFF"),
1490            Ok(("", Value::Uint(2_147_483_647)))
1491        );
1492
1493        // Empty input should fail
1494        assert!(parse_value("").is_err());
1495    }
1496
1497    #[test]
1498    fn test_parse_value_invalid_input() {
1499        // Should fail on completely invalid input
1500        assert!(parse_value("xyz").is_err()); // Not a valid value format
1501        assert!(parse_value("0xGG").is_err()); // Invalid hex digits
1502        assert!(parse_value("\"unclosed").is_err()); // Unclosed string
1503        assert!(parse_value("--123").is_err()); // Invalid number format
1504    }
1505
1506    #[test]
1507    fn test_parse_value_common_magic_file_patterns() {
1508        // Test patterns commonly found in magic files
1509        assert_eq!(
1510            parse_value("0x7f454c46"),
1511            Ok(("", Value::Uint(0x7f45_4c46)))
1512        );
1513        assert_eq!(
1514            parse_value("\"ELF\""),
1515            Ok(("", Value::String("ELF".to_string())))
1516        );
1517        assert_eq!(
1518            parse_value("\\x50\\x4b\\x03\\x04"),
1519            Ok(("", Value::Bytes(vec![0x50, 0x4b, 0x03, 0x04])))
1520        );
1521        assert_eq!(
1522            parse_value("\"\\377ELF\""),
1523            Ok(("", Value::String("\u{00ff}ELF".to_string())))
1524        );
1525        assert_eq!(parse_value("0"), Ok(("", Value::Uint(0))));
1526        assert_eq!(parse_value("-1"), Ok(("", Value::Int(-1))));
1527    }
1528
1529    #[test]
1530    fn test_parse_value_type_precedence() {
1531        // Test that parsing precedence works correctly
1532        // Quoted strings should be parsed as strings, not hex bytes
1533        assert_eq!(
1534            parse_value("\"7f\""),
1535            Ok(("", Value::String("7f".to_string())))
1536        );
1537
1538        // Hex patterns should be parsed as bytes when not quoted
1539        assert_eq!(parse_value("7f"), Ok(("", Value::Bytes(vec![0x7f]))));
1540
1541        // Numbers should be parsed as numbers when they don't look like hex bytes
1542        assert_eq!(parse_value("123"), Ok(("", Value::Uint(123))));
1543        assert_eq!(parse_value("-123"), Ok(("", Value::Int(-123))));
1544
1545        // Hex numbers with 0x prefix should be parsed as numbers
1546        assert_eq!(parse_value("0x123"), Ok(("", Value::Uint(0x123))));
1547    }
1548
1549    #[test]
1550    fn test_parse_value_boundary_conditions() {
1551        // Test boundary conditions for different value types
1552
1553        // Single character strings
1554        assert_eq!(
1555            parse_value("\"a\""),
1556            Ok(("", Value::String("a".to_string())))
1557        );
1558        assert_eq!(
1559            parse_value("\"1\""),
1560            Ok(("", Value::String("1".to_string())))
1561        );
1562
1563        // Single hex byte
1564        assert_eq!(parse_value("ab"), Ok(("", Value::Bytes(vec![0xab]))));
1565        assert_eq!(parse_value("\\x00"), Ok(("", Value::Bytes(vec![0x00]))));
1566
1567        // Minimum and maximum values
1568        assert_eq!(parse_value("1"), Ok(("", Value::Uint(1))));
1569        assert_eq!(parse_value("-1"), Ok(("", Value::Int(-1))));
1570
1571        // Powers of 2 (common in binary formats)
1572        assert_eq!(parse_value("256"), Ok(("", Value::Uint(256))));
1573        assert_eq!(parse_value("0x100"), Ok(("", Value::Uint(256))));
1574        assert_eq!(parse_value("1024"), Ok(("", Value::Uint(1024))));
1575        assert_eq!(parse_value("0x400"), Ok(("", Value::Uint(1024))));
1576    }
1577
1578    #[test]
1579    fn test_parse_operator_whitespace_handling() {
1580        // Test comprehensive whitespace handling
1581        let operators = ["=", "==", "!=", "<>", "&"];
1582        let whitespace_patterns = [
1583            "",     // No whitespace
1584            " ",    // Single space
1585            "  ",   // Multiple spaces
1586            "\t",   // Tab
1587            "\t\t", // Multiple tabs
1588            " \t",  // Mixed space and tab
1589            "\t ",  // Mixed tab and space
1590        ];
1591
1592        for op in operators {
1593            for leading_ws in whitespace_patterns {
1594                for trailing_ws in whitespace_patterns {
1595                    let input = format!("{leading_ws}{op}{trailing_ws}");
1596                    let result = parse_operator(&input);
1597
1598                    assert!(
1599                        result.is_ok(),
1600                        "Failed to parse operator with whitespace: '{input}'"
1601                    );
1602
1603                    let (remaining, _) = result.unwrap();
1604                    assert_eq!(remaining, "", "Unexpected remaining input for: '{input}'");
1605                }
1606            }
1607        }
1608    }
1609}
1610/// Parse a type specification with an optional attached bitwise-AND mask operator
1611/// (e.g., `lelong&0xf0000000`).
1612///
1613/// Returns the `TypeKind` and an optional `Operator`.
1614///
1615/// # Errors
1616/// Returns a nom parsing error if the input doesn't match the expected format
1617pub fn parse_type_and_operator(input: &str) -> IResult<&str, (TypeKind, Option<Operator>)> {
1618    let (input, _) = multispace0(input)?;
1619
1620    let (input, type_name) = crate::parser::types::parse_type_keyword(input)?;
1621
1622    // Check for attached operator with mask (like &0xf0000000)
1623    // Uses unsigned parsing so full u64 masks (e.g. 0xffffffffffffffff) are supported.
1624    // If '&' is followed by digits/0x but the mask parse fails (overflow, etc.),
1625    // we return a hard error instead of silently falling back to standalone '&'.
1626    let (input, attached_op) = if let Some(after_amp) = input.strip_prefix('&') {
1627        if after_amp.starts_with("0x") || after_amp.starts_with(|c: char| c.is_ascii_digit()) {
1628            // '&' followed by what looks like a number -- must parse as mask
1629            let (rest, mask) = parse_unsigned_number(after_amp).map_err(|_| {
1630                nom::Err::Error(nom::error::Error::new(input, nom::error::ErrorKind::MapRes))
1631            })?;
1632            (rest, Some(Operator::BitwiseAndMask(mask)))
1633        } else if after_amp.starts_with('&') {
1634            // Reject '&&' -- not valid operator syntax
1635            return Err(nom::Err::Error(nom::error::Error::new(
1636                input,
1637                nom::error::ErrorKind::Tag,
1638            )));
1639        } else {
1640            // Standalone '&' (no digits following)
1641            (after_amp, Some(Operator::BitwiseAnd))
1642        }
1643    } else {
1644        (input, None)
1645    };
1646
1647    let (input, _) = multispace0(input)?;
1648
1649    let type_kind = crate::parser::types::type_keyword_to_kind(type_name);
1650
1651    Ok((input, (type_kind, attached_op)))
1652}
1653
1654/// Parse a type specification (byte, short, long, quad, string, etc.)
1655///
1656/// Supports various type formats found in magic files:
1657/// - `byte` / `ubyte` - single byte (signed / unsigned)
1658/// - `short` / `ushort` - 16-bit integer (native endian, signed / unsigned)
1659/// - `leshort` / `uleshort` - 16-bit little-endian integer
1660/// - `beshort` / `ubeshort` - 16-bit big-endian integer
1661/// - `long` / `ulong` - 32-bit integer (native endian, signed / unsigned)
1662/// - `lelong` / `ulelong` - 32-bit little-endian integer
1663/// - `belong` / `ubelong` - 32-bit big-endian integer
1664/// - `quad` / `uquad` - 64-bit integer (native endian, signed / unsigned)
1665/// - `lequad` / `ulequad` - 64-bit little-endian integer
1666/// - `bequad` / `ubequad` - 64-bit big-endian integer
1667/// - `string` - null-terminated string
1668///
1669/// # Examples
1670///
1671/// ```
1672/// use libmagic_rs::parser::grammar::parse_type;
1673/// use libmagic_rs::parser::ast::{TypeKind, Endianness};
1674///
1675/// assert_eq!(parse_type("byte"), Ok(("", TypeKind::Byte { signed: true })));
1676/// assert_eq!(parse_type("leshort"), Ok(("", TypeKind::Short { endian: Endianness::Little, signed: true })));
1677/// assert_eq!(parse_type("bequad"), Ok(("", TypeKind::Quad { endian: Endianness::Big, signed: true })));
1678/// assert_eq!(parse_type("string"), Ok(("", TypeKind::String { max_length: None })));
1679/// ```
1680///
1681/// # Errors
1682/// Returns a nom parsing error if the input doesn't match any known type
1683pub fn parse_type(input: &str) -> IResult<&str, TypeKind> {
1684    let (input, (type_kind, _)) = parse_type_and_operator(input)?;
1685    Ok((input, type_kind))
1686}
1687
1688/// Parse the indentation level and offset for magic rules
1689///
1690/// Handles both absolute offsets and hierarchical child rules with `>` prefix.
1691/// Child rules can be nested multiple levels deep with multiple `>` characters.
1692///
1693/// # Examples
1694///
1695/// ```
1696/// use libmagic_rs::parser::grammar::parse_rule_offset;
1697/// use libmagic_rs::parser::ast::OffsetSpec;
1698///
1699/// // Absolute offset
1700/// assert_eq!(parse_rule_offset("0"), Ok(("", (0, OffsetSpec::Absolute(0)))));
1701/// assert_eq!(parse_rule_offset("16"), Ok(("", (0, OffsetSpec::Absolute(16)))));
1702///
1703/// // Child rule (level 1)
1704/// assert_eq!(parse_rule_offset(">4"), Ok(("", (1, OffsetSpec::Absolute(4)))));
1705///
1706/// // Nested child rule (level 2)
1707/// assert_eq!(parse_rule_offset(">>8"), Ok(("", (2, OffsetSpec::Absolute(8)))));
1708/// ```
1709/// Parse rule offset with hierarchy level (> prefixes) and offset specification
1710///
1711/// # Errors
1712/// Returns a nom parsing error if the input doesn't match the expected offset format
1713pub fn parse_rule_offset(input: &str) -> IResult<&str, (u32, OffsetSpec)> {
1714    let (input, _) = multispace0(input)?;
1715
1716    // Count the number of '>' characters for nesting level
1717    let (input, level_chars) = many0(char('>')).parse(input)?;
1718    let level = u32::try_from(level_chars.len()).unwrap_or(0);
1719
1720    // Parse the offset after the '>' characters
1721    let (input, offset_spec) = parse_offset(input)?;
1722
1723    Ok((input, (level, offset_spec)))
1724}
1725
1726/// Parse the message part of a magic rule
1727///
1728/// The message is everything after the value until the end of the line.
1729/// It may contain format specifiers and can be empty.
1730///
1731/// # Examples
1732///
1733/// ```
1734/// use libmagic_rs::parser::grammar::parse_message;
1735///
1736/// assert_eq!(parse_message("ELF executable"), Ok(("", "ELF executable".to_string())));
1737/// assert_eq!(parse_message(""), Ok(("", "".to_string())));
1738/// assert_eq!(parse_message("  \tPDF document  "), Ok(("", "PDF document".to_string())));
1739/// ```
1740/// Parse the message/description part of a magic rule
1741///
1742/// # Errors
1743/// Returns a nom parsing error if the input cannot be parsed as a message
1744pub fn parse_message(input: &str) -> IResult<&str, String> {
1745    let (input, _) = multispace0(input)?;
1746
1747    // Take everything until end of line, trimming whitespace
1748    // Use take_while instead of take_while1 to handle empty messages
1749    let (input, message_text) = take_while(|c: char| c != '\n' && c != '\r').parse(input)?;
1750    let message = message_text.trim().to_string();
1751
1752    Ok((input, message))
1753}
1754
1755/// Parse a strength directive (`!:strength` line)
1756///
1757/// Parses the `!:strength` directive that modifies rule strength.
1758/// Format: `!:strength [+|-|*|/|=]N` or `!:strength N`
1759///
1760/// # Examples
1761///
1762/// ```
1763/// use libmagic_rs::parser::grammar::parse_strength_directive;
1764/// use libmagic_rs::parser::ast::StrengthModifier;
1765///
1766/// assert_eq!(parse_strength_directive("!:strength +10"), Ok(("", StrengthModifier::Add(10))));
1767/// assert_eq!(parse_strength_directive("!:strength -5"), Ok(("", StrengthModifier::Subtract(5))));
1768/// assert_eq!(parse_strength_directive("!:strength *2"), Ok(("", StrengthModifier::Multiply(2))));
1769/// assert_eq!(parse_strength_directive("!:strength /2"), Ok(("", StrengthModifier::Divide(2))));
1770/// assert_eq!(parse_strength_directive("!:strength =50"), Ok(("", StrengthModifier::Set(50))));
1771/// assert_eq!(parse_strength_directive("!:strength 50"), Ok(("", StrengthModifier::Set(50))));
1772/// ```
1773///
1774/// # Errors
1775///
1776/// Returns a nom parsing error if:
1777/// - Input doesn't start with `!:strength`
1778/// - The modifier value cannot be parsed as a valid integer
1779/// - The operator is invalid
1780pub fn parse_strength_directive(input: &str) -> IResult<&str, StrengthModifier> {
1781    // Helper to safely convert i64 to i32 with clamping to valid strength range.
1782    // This prevents silent truncation to 0 on overflow while keeping values in bounds.
1783    fn clamp_to_i32(n: i64) -> i32 {
1784        // Use i64::from for lossless conversion, then clamp and convert back
1785        let clamped = n.clamp(i64::from(i32::MIN), i64::from(i32::MAX));
1786        // Safe to unwrap: clamped value is guaranteed to be in i32 range
1787        i32::try_from(clamped).unwrap()
1788    }
1789
1790    let (input, _) = multispace0(input)?;
1791    let (input, _) = tag("!:strength")(input)?;
1792    let (input, _) = multispace0(input)?;
1793
1794    // Parse the operator: +, -, *, /, = or bare number (implies =)
1795    let (input, modifier) = alt((
1796        // +N -> Add
1797        map(pair(char('+'), parse_number), |(_, n)| {
1798            StrengthModifier::Add(clamp_to_i32(n))
1799        }),
1800        // -N -> Subtract (note: parse_number handles negative, so we need special handling)
1801        map(pair(char('-'), parse_decimal_number), |(_, n)| {
1802            StrengthModifier::Subtract(clamp_to_i32(n))
1803        }),
1804        // *N -> Multiply
1805        map(pair(char('*'), parse_number), |(_, n)| {
1806            StrengthModifier::Multiply(clamp_to_i32(n))
1807        }),
1808        // /N -> Divide
1809        map(pair(char('/'), parse_number), |(_, n)| {
1810            StrengthModifier::Divide(clamp_to_i32(n))
1811        }),
1812        // =N -> Set
1813        map(pair(char('='), parse_number), |(_, n)| {
1814            StrengthModifier::Set(clamp_to_i32(n))
1815        }),
1816        // Bare number -> Set
1817        map(parse_number, |n| StrengthModifier::Set(clamp_to_i32(n))),
1818    ))
1819    .parse(input)?;
1820
1821    Ok((input, modifier))
1822}
1823
1824/// Check if a line is a strength directive (starts with !:strength)
1825///
1826/// # Examples
1827///
1828/// ```
1829/// use libmagic_rs::parser::grammar::is_strength_directive;
1830///
1831/// assert!(is_strength_directive("!:strength +10"));
1832/// assert!(is_strength_directive("  !:strength -5"));
1833/// assert!(!is_strength_directive("0 byte 1"));
1834/// ```
1835#[must_use]
1836pub fn is_strength_directive(input: &str) -> bool {
1837    input.trim().starts_with("!:strength")
1838}
1839
1840/// Parse a complete magic rule line from text format
1841///
1842/// Parses a complete magic rule in the format:
1843/// `[>...]offset type [operator] value [message]`
1844///
1845/// Where:
1846/// - `>...` indicates child rule nesting level (optional)
1847/// - `offset` is the byte offset to read from
1848/// - `type` is the data type (byte, short, long, string, etc.)
1849/// - `operator` is the comparison operator (=, !=, &) - defaults to = if omitted
1850/// - `value` is the expected value to compare against
1851/// - `message` is the human-readable description (optional)
1852///
1853/// # Examples
1854///
1855/// ```
1856/// use libmagic_rs::parser::grammar::parse_magic_rule;
1857/// use libmagic_rs::parser::ast::{MagicRule, OffsetSpec, TypeKind, Operator, Value};
1858///
1859/// // Basic rule
1860/// let input = "0 string \\x7fELF ELF executable";
1861/// let (_, rule) = parse_magic_rule(input).unwrap();
1862/// assert_eq!(rule.level, 0);
1863/// assert_eq!(rule.message, "ELF executable");
1864///
1865/// // Child rule
1866/// let input = ">4 byte 1 32-bit";
1867/// let (_, rule) = parse_magic_rule(input).unwrap();
1868/// assert_eq!(rule.level, 1);
1869/// assert_eq!(rule.message, "32-bit");
1870/// ```
1871///
1872/// # Errors
1873///
1874/// Returns a nom parsing error if:
1875/// - The offset specification is invalid
1876/// - The type specification is not recognized
1877/// - The operator is invalid (if present)
1878/// - The value cannot be parsed
1879/// - The input format doesn't match the expected magic rule syntax
1880pub fn parse_magic_rule(input: &str) -> IResult<&str, MagicRule> {
1881    let (input, _) = multispace0(input)?;
1882
1883    // Parse the offset with nesting level
1884    let (input, (level, offset)) = parse_rule_offset(input)?;
1885
1886    // Parse the type and any attached operator
1887    let (input, (typ, attached_op)) = parse_type_and_operator(input)?;
1888
1889    // Try to parse a separate operator (optional - use attached operator if present)
1890    let (input, separate_op) = opt(parse_operator).parse(input)?;
1891    let op = attached_op.or(separate_op).unwrap_or(Operator::Equal);
1892
1893    // Parse the value
1894    let (input, value) = parse_value(input)?;
1895
1896    // Parse the message (optional - everything remaining on the line)
1897    let (input, message) = if input.trim().is_empty() {
1898        (input, String::new())
1899    } else {
1900        parse_message(input)?
1901    };
1902
1903    let rule = MagicRule {
1904        offset,
1905        typ,
1906        op,
1907        value,
1908        message,
1909        children: vec![], // Children will be added during hierarchical parsing
1910        level,
1911        strength_modifier: None, // Will be set during directive parsing
1912    };
1913
1914    Ok((input, rule))
1915}
1916
1917/// Parse a comment line (starts with #)
1918///
1919/// Comments in magic files start with '#' and continue to the end of the line.
1920/// This function consumes the entire comment line.
1921///
1922/// # Examples
1923///
1924/// ```
1925/// use libmagic_rs::parser::grammar::parse_comment;
1926///
1927/// assert_eq!(parse_comment("# This is a comment"), Ok(("", "This is a comment".to_string())));
1928/// assert_eq!(parse_comment("#"), Ok(("", "".to_string())));
1929/// ```
1930/// Parse a comment line (starting with #)
1931///
1932/// # Errors
1933/// Returns a nom parsing error if the input is not a valid comment
1934pub fn parse_comment(input: &str) -> IResult<&str, String> {
1935    let (input, _) = multispace0(input)?;
1936    let (input, _) = char('#').parse(input)?;
1937    let (input, comment_text) = take_while(|c: char| c != '\n' && c != '\r').parse(input)?;
1938    let comment = comment_text.trim().to_string();
1939    Ok((input, comment))
1940}
1941
1942/// Check if a line is empty or contains only whitespace
1943///
1944/// # Examples
1945///
1946/// ```
1947/// use libmagic_rs::parser::grammar::is_empty_line;
1948///
1949/// assert!(is_empty_line(""));
1950/// assert!(is_empty_line("   "));
1951/// assert!(is_empty_line("\t\t"));
1952/// assert!(!is_empty_line("0 byte 1"));
1953/// ```
1954#[must_use]
1955pub fn is_empty_line(input: &str) -> bool {
1956    input.trim().is_empty()
1957}
1958
1959/// Check if a line is a comment (starts with #)
1960///
1961/// # Examples
1962///
1963/// ```
1964/// use libmagic_rs::parser::grammar::is_comment_line;
1965///
1966/// assert!(is_comment_line("# This is a comment"));
1967/// assert!(is_comment_line("#"));
1968/// assert!(is_comment_line("  # Indented comment"));
1969/// assert!(!is_comment_line("0 byte 1"));
1970/// ```
1971#[must_use]
1972pub fn is_comment_line(input: &str) -> bool {
1973    input.trim().starts_with('#')
1974}
1975
1976/// Check if a line ends with a continuation character (\)
1977///
1978/// Magic files support line continuation with backslash at the end of lines.
1979///
1980/// # Examples
1981///
1982/// ```
1983/// use libmagic_rs::parser::grammar::has_continuation;
1984///
1985/// assert!(has_continuation("0 string test \\"));
1986/// assert!(has_continuation("message continues \\"));
1987/// assert!(!has_continuation("0 string test"));
1988/// ```
1989#[must_use]
1990pub fn has_continuation(input: &str) -> bool {
1991    input.trim_end().ends_with('\\')
1992}
1993// Tests for new magic rule parsing functions
1994
1995#[test]
1996fn test_parse_type_basic() {
1997    assert_eq!(
1998        parse_type("byte"),
1999        Ok(("", TypeKind::Byte { signed: true }))
2000    );
2001    assert_eq!(
2002        parse_type("short"),
2003        Ok((
2004            "",
2005            TypeKind::Short {
2006                endian: Endianness::Native,
2007                signed: true
2008            }
2009        ))
2010    );
2011    assert_eq!(
2012        parse_type("long"),
2013        Ok((
2014            "",
2015            TypeKind::Long {
2016                endian: Endianness::Native,
2017                signed: true
2018            }
2019        ))
2020    );
2021    assert_eq!(
2022        parse_type("string"),
2023        Ok(("", TypeKind::String { max_length: None }))
2024    );
2025}
2026
2027#[test]
2028fn test_parse_type_endianness() {
2029    assert_eq!(
2030        parse_type("leshort"),
2031        Ok((
2032            "",
2033            TypeKind::Short {
2034                endian: Endianness::Little,
2035                signed: true
2036            }
2037        ))
2038    );
2039    assert_eq!(
2040        parse_type("beshort"),
2041        Ok((
2042            "",
2043            TypeKind::Short {
2044                endian: Endianness::Big,
2045                signed: true
2046            }
2047        ))
2048    );
2049    assert_eq!(
2050        parse_type("lelong"),
2051        Ok((
2052            "",
2053            TypeKind::Long {
2054                endian: Endianness::Little,
2055                signed: true
2056            }
2057        ))
2058    );
2059    assert_eq!(
2060        parse_type("belong"),
2061        Ok((
2062            "",
2063            TypeKind::Long {
2064                endian: Endianness::Big,
2065                signed: true
2066            }
2067        ))
2068    );
2069}
2070
2071#[test]
2072fn test_parse_type_with_whitespace() {
2073    assert_eq!(
2074        parse_type(" byte "),
2075        Ok(("", TypeKind::Byte { signed: true }))
2076    );
2077    assert_eq!(
2078        parse_type("\tstring\t"),
2079        Ok(("", TypeKind::String { max_length: None }))
2080    );
2081    assert_eq!(
2082        parse_type("  lelong  "),
2083        Ok((
2084            "",
2085            TypeKind::Long {
2086                endian: Endianness::Little,
2087                signed: true
2088            }
2089        ))
2090    );
2091}
2092
2093#[test]
2094fn test_parse_type_with_remaining_input() {
2095    assert_eq!(
2096        parse_type("byte ="),
2097        Ok(("=", TypeKind::Byte { signed: true }))
2098    );
2099    assert_eq!(
2100        parse_type("string \\x7f"),
2101        Ok(("\\x7f", TypeKind::String { max_length: None }))
2102    );
2103}
2104
2105#[test]
2106fn test_parse_type_invalid() {
2107    assert!(parse_type("").is_err());
2108    assert!(parse_type("invalid").is_err());
2109    assert!(parse_type("int").is_err());
2110    assert!(parse_type("float").is_err());
2111}
2112
2113#[test]
2114fn test_parse_type_unsigned_variants() {
2115    assert_eq!(
2116        parse_type("ubyte"),
2117        Ok(("", TypeKind::Byte { signed: false }))
2118    );
2119    assert_eq!(
2120        parse_type("ushort"),
2121        Ok((
2122            "",
2123            TypeKind::Short {
2124                endian: Endianness::Native,
2125                signed: false,
2126            }
2127        ))
2128    );
2129    assert_eq!(
2130        parse_type("ubeshort"),
2131        Ok((
2132            "",
2133            TypeKind::Short {
2134                endian: Endianness::Big,
2135                signed: false,
2136            }
2137        ))
2138    );
2139    assert_eq!(
2140        parse_type("uleshort"),
2141        Ok((
2142            "",
2143            TypeKind::Short {
2144                endian: Endianness::Little,
2145                signed: false,
2146            }
2147        ))
2148    );
2149    assert_eq!(
2150        parse_type("ulong"),
2151        Ok((
2152            "",
2153            TypeKind::Long {
2154                endian: Endianness::Native,
2155                signed: false,
2156            }
2157        ))
2158    );
2159    assert_eq!(
2160        parse_type("ubelong"),
2161        Ok((
2162            "",
2163            TypeKind::Long {
2164                endian: Endianness::Big,
2165                signed: false,
2166            }
2167        ))
2168    );
2169    assert_eq!(
2170        parse_type("ulelong"),
2171        Ok((
2172            "",
2173            TypeKind::Long {
2174                endian: Endianness::Little,
2175                signed: false,
2176            }
2177        ))
2178    );
2179    assert_eq!(
2180        parse_type("uquad"),
2181        Ok((
2182            "",
2183            TypeKind::Quad {
2184                endian: Endianness::Native,
2185                signed: false,
2186            }
2187        ))
2188    );
2189    assert_eq!(
2190        parse_type("ubequad"),
2191        Ok((
2192            "",
2193            TypeKind::Quad {
2194                endian: Endianness::Big,
2195                signed: false,
2196            }
2197        ))
2198    );
2199    assert_eq!(
2200        parse_type("ulequad"),
2201        Ok((
2202            "",
2203            TypeKind::Quad {
2204                endian: Endianness::Little,
2205                signed: false,
2206            }
2207        ))
2208    );
2209}
2210
2211#[test]
2212fn test_parse_type_signed_defaults() {
2213    // In libmagic, unprefixed types are signed by default
2214    assert_eq!(
2215        parse_type("byte"),
2216        Ok(("", TypeKind::Byte { signed: true }))
2217    );
2218    assert_eq!(
2219        parse_type("short"),
2220        Ok((
2221            "",
2222            TypeKind::Short {
2223                endian: Endianness::Native,
2224                signed: true,
2225            }
2226        ))
2227    );
2228    assert_eq!(
2229        parse_type("long"),
2230        Ok((
2231            "",
2232            TypeKind::Long {
2233                endian: Endianness::Native,
2234                signed: true,
2235            }
2236        ))
2237    );
2238    assert_eq!(
2239        parse_type("beshort"),
2240        Ok((
2241            "",
2242            TypeKind::Short {
2243                endian: Endianness::Big,
2244                signed: true,
2245            }
2246        ))
2247    );
2248    assert_eq!(
2249        parse_type("belong"),
2250        Ok((
2251            "",
2252            TypeKind::Long {
2253                endian: Endianness::Big,
2254                signed: true,
2255            }
2256        ))
2257    );
2258    assert_eq!(
2259        parse_type("quad"),
2260        Ok((
2261            "",
2262            TypeKind::Quad {
2263                endian: Endianness::Native,
2264                signed: true,
2265            }
2266        ))
2267    );
2268    assert_eq!(
2269        parse_type("bequad"),
2270        Ok((
2271            "",
2272            TypeKind::Quad {
2273                endian: Endianness::Big,
2274                signed: true,
2275            }
2276        ))
2277    );
2278    assert_eq!(
2279        parse_type("lequad"),
2280        Ok((
2281            "",
2282            TypeKind::Quad {
2283                endian: Endianness::Little,
2284                signed: true,
2285            }
2286        ))
2287    );
2288}
2289
2290#[test]
2291fn test_parse_rule_offset_absolute() {
2292    assert_eq!(
2293        parse_rule_offset("0"),
2294        Ok(("", (0, OffsetSpec::Absolute(0))))
2295    );
2296    assert_eq!(
2297        parse_rule_offset("16"),
2298        Ok(("", (0, OffsetSpec::Absolute(16))))
2299    );
2300    assert_eq!(
2301        parse_rule_offset("0x10"),
2302        Ok(("", (0, OffsetSpec::Absolute(16))))
2303    );
2304    assert_eq!(
2305        parse_rule_offset("-4"),
2306        Ok(("", (0, OffsetSpec::Absolute(-4))))
2307    );
2308}
2309
2310#[test]
2311fn test_parse_rule_offset_child_rules() {
2312    assert_eq!(
2313        parse_rule_offset(">4"),
2314        Ok(("", (1, OffsetSpec::Absolute(4))))
2315    );
2316    assert_eq!(
2317        parse_rule_offset(">>8"),
2318        Ok(("", (2, OffsetSpec::Absolute(8))))
2319    );
2320    assert_eq!(
2321        parse_rule_offset(">>>12"),
2322        Ok(("", (3, OffsetSpec::Absolute(12))))
2323    );
2324}
2325
2326#[test]
2327fn test_parse_rule_offset_with_whitespace() {
2328    assert_eq!(
2329        parse_rule_offset(" 0 "),
2330        Ok(("", (0, OffsetSpec::Absolute(0))))
2331    );
2332    assert_eq!(
2333        parse_rule_offset("  >4  "),
2334        Ok(("", (1, OffsetSpec::Absolute(4))))
2335    );
2336    assert_eq!(
2337        parse_rule_offset("\t>>0x10\t"),
2338        Ok(("", (2, OffsetSpec::Absolute(16))))
2339    );
2340}
2341
2342#[test]
2343fn test_parse_rule_offset_with_remaining_input() {
2344    assert_eq!(
2345        parse_rule_offset("0 byte"),
2346        Ok(("byte", (0, OffsetSpec::Absolute(0))))
2347    );
2348    assert_eq!(
2349        parse_rule_offset(">4 string"),
2350        Ok(("string", (1, OffsetSpec::Absolute(4))))
2351    );
2352}
2353
2354#[test]
2355fn test_parse_message_basic() {
2356    assert_eq!(
2357        parse_message("ELF executable"),
2358        Ok(("", "ELF executable".to_string()))
2359    );
2360    assert_eq!(
2361        parse_message("PDF document"),
2362        Ok(("", "PDF document".to_string()))
2363    );
2364    assert_eq!(parse_message(""), Ok(("", String::new())));
2365}
2366
2367#[test]
2368fn test_parse_message_with_whitespace() {
2369    assert_eq!(
2370        parse_message("  ELF executable  "),
2371        Ok(("", "ELF executable".to_string()))
2372    );
2373    assert_eq!(
2374        parse_message("\tPDF document\t"),
2375        Ok(("", "PDF document".to_string()))
2376    );
2377    assert_eq!(parse_message("   "), Ok(("", String::new())));
2378}
2379
2380#[test]
2381fn test_parse_message_complex() {
2382    assert_eq!(
2383        parse_message("ELF 64-bit LSB executable"),
2384        Ok(("", "ELF 64-bit LSB executable".to_string()))
2385    );
2386    assert_eq!(
2387        parse_message("ZIP archive, version %d.%d"),
2388        Ok(("", "ZIP archive, version %d.%d".to_string()))
2389    );
2390}
2391
2392#[test]
2393fn test_parse_magic_rule_basic() {
2394    let input = "0 string \\x7fELF ELF executable";
2395    let (remaining, rule) = parse_magic_rule(input).unwrap();
2396
2397    assert_eq!(remaining, "");
2398    assert_eq!(rule.level, 0);
2399    assert_eq!(rule.offset, OffsetSpec::Absolute(0));
2400    assert_eq!(rule.typ, TypeKind::String { max_length: None });
2401    assert_eq!(rule.op, Operator::Equal);
2402    assert_eq!(rule.value, Value::Bytes(vec![0x7f, 0x45, 0x4c, 0x46]));
2403    assert_eq!(rule.message, "ELF executable");
2404    assert!(rule.children.is_empty());
2405}
2406
2407#[test]
2408fn test_parse_magic_rule_child() {
2409    let input = ">4 byte 1 32-bit";
2410    let (remaining, rule) = parse_magic_rule(input).unwrap();
2411
2412    assert_eq!(remaining, "");
2413    assert_eq!(rule.level, 1);
2414    assert_eq!(rule.offset, OffsetSpec::Absolute(4));
2415    assert_eq!(rule.typ, TypeKind::Byte { signed: true });
2416    assert_eq!(rule.op, Operator::Equal);
2417    assert_eq!(rule.value, Value::Uint(1));
2418    assert_eq!(rule.message, "32-bit");
2419}
2420
2421#[test]
2422fn test_parse_magic_rule_with_operator() {
2423    let input = "0 lelong&0xf0000000 0x10000000 MIPS-II";
2424    let (remaining, rule) = parse_magic_rule(input).unwrap();
2425
2426    assert_eq!(remaining, "");
2427    assert_eq!(rule.level, 0);
2428    assert_eq!(rule.offset, OffsetSpec::Absolute(0));
2429    assert_eq!(
2430        rule.typ,
2431        TypeKind::Long {
2432            endian: Endianness::Little,
2433            signed: true
2434        }
2435    );
2436    assert_eq!(rule.op, Operator::BitwiseAndMask(0xf000_0000));
2437    assert_eq!(rule.value, Value::Uint(0x1000_0000));
2438    assert_eq!(rule.message, "MIPS-II");
2439}
2440
2441#[test]
2442fn test_parse_magic_rule_no_message() {
2443    let input = "0 byte 0x7f";
2444    let (remaining, rule) = parse_magic_rule(input).unwrap();
2445
2446    assert_eq!(remaining, "");
2447    assert_eq!(rule.level, 0);
2448    assert_eq!(rule.offset, OffsetSpec::Absolute(0));
2449    assert_eq!(rule.typ, TypeKind::Byte { signed: true });
2450    assert_eq!(rule.op, Operator::Equal);
2451    assert_eq!(rule.value, Value::Uint(0x7f));
2452    assert_eq!(rule.message, "");
2453}
2454
2455#[test]
2456fn test_parse_magic_rule_nested() {
2457    let input = ">>8 leshort 0x014c Microsoft COFF";
2458    let (remaining, rule) = parse_magic_rule(input).unwrap();
2459
2460    assert_eq!(remaining, "");
2461    assert_eq!(rule.level, 2);
2462    assert_eq!(rule.offset, OffsetSpec::Absolute(8));
2463    assert_eq!(
2464        rule.typ,
2465        TypeKind::Short {
2466            endian: Endianness::Little,
2467            signed: true
2468        }
2469    );
2470    assert_eq!(rule.op, Operator::Equal);
2471    assert_eq!(rule.value, Value::Uint(0x014c));
2472    assert_eq!(rule.message, "Microsoft COFF");
2473}
2474
2475#[test]
2476fn test_parse_magic_rule_with_whitespace() {
2477    let input = "  >  4   byte   =   1   32-bit  ";
2478    let (remaining, rule) = parse_magic_rule(input).unwrap();
2479
2480    assert_eq!(remaining, "");
2481    assert_eq!(rule.level, 1);
2482    assert_eq!(rule.offset, OffsetSpec::Absolute(4));
2483    assert_eq!(rule.typ, TypeKind::Byte { signed: true });
2484    assert_eq!(rule.op, Operator::Equal);
2485    assert_eq!(rule.value, Value::Uint(1));
2486    assert_eq!(rule.message, "32-bit");
2487}
2488
2489#[test]
2490fn test_parse_magic_rule_string_value() {
2491    let input = "0 string \"PK\" ZIP archive";
2492    let (remaining, rule) = parse_magic_rule(input).unwrap();
2493
2494    assert_eq!(remaining, "");
2495    assert_eq!(rule.level, 0);
2496    assert_eq!(rule.offset, OffsetSpec::Absolute(0));
2497    assert_eq!(rule.typ, TypeKind::String { max_length: None });
2498    assert_eq!(rule.op, Operator::Equal);
2499    assert_eq!(rule.value, Value::String("PK".to_string()));
2500    assert_eq!(rule.message, "ZIP archive");
2501}
2502
2503#[test]
2504fn test_parse_magic_rule_hex_offset() {
2505    let input = "0x10 belong 0x12345678 Test data";
2506    let (remaining, rule) = parse_magic_rule(input).unwrap();
2507
2508    assert_eq!(remaining, "");
2509    assert_eq!(rule.level, 0);
2510    assert_eq!(rule.offset, OffsetSpec::Absolute(16));
2511    assert_eq!(
2512        rule.typ,
2513        TypeKind::Long {
2514            endian: Endianness::Big,
2515            signed: true
2516        }
2517    );
2518    assert_eq!(rule.op, Operator::Equal);
2519    assert_eq!(rule.value, Value::Uint(0x1234_5678));
2520    assert_eq!(rule.message, "Test data");
2521}
2522
2523#[test]
2524fn test_parse_magic_rule_negative_offset() {
2525    let input = "-4 byte 0 End marker";
2526    let (remaining, rule) = parse_magic_rule(input).unwrap();
2527
2528    assert_eq!(remaining, "");
2529    assert_eq!(rule.level, 0);
2530    assert_eq!(rule.offset, OffsetSpec::Absolute(-4));
2531    assert_eq!(rule.typ, TypeKind::Byte { signed: true });
2532    assert_eq!(rule.op, Operator::Equal);
2533    assert_eq!(rule.value, Value::Uint(0));
2534    assert_eq!(rule.message, "End marker");
2535}
2536
2537#[test]
2538fn test_parse_comment() {
2539    assert_eq!(
2540        parse_comment("# This is a comment"),
2541        Ok(("", "This is a comment".to_string()))
2542    );
2543    assert_eq!(parse_comment("#"), Ok(("", String::new())));
2544    assert_eq!(
2545        parse_comment("# ELF executables"),
2546        Ok(("", "ELF executables".to_string()))
2547    );
2548}
2549
2550#[test]
2551fn test_parse_comment_with_whitespace() {
2552    assert_eq!(
2553        parse_comment("  # Indented comment  "),
2554        Ok(("", "Indented comment".to_string()))
2555    );
2556    assert_eq!(
2557        parse_comment("\t#\tTabbed comment\t"),
2558        Ok(("", "Tabbed comment".to_string()))
2559    );
2560}
2561
2562#[test]
2563fn test_is_empty_line() {
2564    assert!(is_empty_line(""));
2565    assert!(is_empty_line("   "));
2566    assert!(is_empty_line("\t\t"));
2567    assert!(is_empty_line(" \t \t "));
2568    assert!(!is_empty_line("0 byte 1"));
2569    assert!(!is_empty_line("  # comment"));
2570}
2571
2572#[test]
2573fn test_is_comment_line() {
2574    assert!(is_comment_line("# This is a comment"));
2575    assert!(is_comment_line("#"));
2576    assert!(is_comment_line("  # Indented comment"));
2577    assert!(is_comment_line("\t# Tabbed comment"));
2578    assert!(!is_comment_line("0 byte 1"));
2579    assert!(!is_comment_line("string test"));
2580}
2581
2582#[test]
2583fn test_has_continuation() {
2584    assert!(has_continuation("0 string test \\"));
2585    assert!(has_continuation("message continues \\"));
2586    assert!(has_continuation("line ends with backslash\\"));
2587    assert!(has_continuation("  trailing whitespace  \\  "));
2588    assert!(!has_continuation("0 string test"));
2589    assert!(!has_continuation("no continuation"));
2590    assert!(!has_continuation("backslash in middle \\ here"));
2591}
2592
2593#[test]
2594fn test_parse_magic_rule_real_world_examples() {
2595    // Real examples from /usr/share/file/magic/elf
2596    let examples = [
2597        "0 string \\177ELF ELF",
2598        ">4 byte 1 32-bit",
2599        ">4 byte 2 64-bit",
2600        ">5 byte 1 LSB",
2601        ">5 byte 2 MSB",
2602        ">>0 lelong&0xf0000000 0x10000000 MIPS-II",
2603    ];
2604
2605    for example in examples {
2606        let result = parse_magic_rule(example);
2607        assert!(
2608            result.is_ok(),
2609            "Failed to parse real-world example: '{example}'"
2610        );
2611
2612        let (remaining, rule) = result.unwrap();
2613        assert_eq!(remaining, "", "Unexpected remaining input for: '{example}'");
2614        assert!(
2615            !rule.message.is_empty() || example.contains("\\177ELF"),
2616            "Empty message for: '{example}'"
2617        );
2618    }
2619}
2620
2621#[test]
2622fn test_parse_magic_rule_edge_cases() {
2623    // Test various edge cases
2624    let edge_cases = [
2625        (
2626            "0 byte 0",
2627            0,
2628            TypeKind::Byte { signed: true },
2629            Value::Uint(0),
2630            "",
2631        ),
2632        (
2633            ">>>16 string \"\" Empty string",
2634            3,
2635            TypeKind::String { max_length: None },
2636            Value::String(String::new()),
2637            "Empty string",
2638        ),
2639        (
2640            "0x100 lelong 0xFFFFFFFF Max value",
2641            0,
2642            TypeKind::Long {
2643                endian: Endianness::Little,
2644                signed: true,
2645            },
2646            Value::Uint(0xFFFF_FFFF),
2647            "Max value",
2648        ),
2649    ];
2650
2651    for (input, expected_level, expected_type, expected_value, expected_message) in edge_cases {
2652        let (remaining, rule) = parse_magic_rule(input).unwrap();
2653        assert_eq!(remaining, "");
2654        assert_eq!(rule.level, expected_level);
2655        assert_eq!(rule.typ, expected_type);
2656        assert_eq!(rule.value, expected_value);
2657        assert_eq!(rule.message, expected_message);
2658    }
2659}
2660
2661#[test]
2662fn test_parse_magic_rule_invalid_input() {
2663    let invalid_inputs = [
2664        "",               // Empty input
2665        "invalid format", // No valid offset
2666        "0",              // Missing type
2667        "0 invalid_type", // Invalid type
2668        "0 byte",         // Missing value
2669    ];
2670
2671    for invalid_input in invalid_inputs {
2672        let result = parse_magic_rule(invalid_input);
2673        assert!(
2674            result.is_err(),
2675            "Should fail to parse invalid input: '{invalid_input}'"
2676        );
2677    }
2678}
2679
2680// Strength directive tests
2681#[test]
2682fn test_parse_strength_directive_add() {
2683    assert_eq!(
2684        parse_strength_directive("!:strength +10"),
2685        Ok(("", StrengthModifier::Add(10)))
2686    );
2687    assert_eq!(
2688        parse_strength_directive("!:strength +0"),
2689        Ok(("", StrengthModifier::Add(0)))
2690    );
2691    assert_eq!(
2692        parse_strength_directive("!:strength +100"),
2693        Ok(("", StrengthModifier::Add(100)))
2694    );
2695}
2696
2697#[test]
2698fn test_parse_strength_directive_subtract() {
2699    assert_eq!(
2700        parse_strength_directive("!:strength -5"),
2701        Ok(("", StrengthModifier::Subtract(5)))
2702    );
2703    assert_eq!(
2704        parse_strength_directive("!:strength -0"),
2705        Ok(("", StrengthModifier::Subtract(0)))
2706    );
2707    assert_eq!(
2708        parse_strength_directive("!:strength -50"),
2709        Ok(("", StrengthModifier::Subtract(50)))
2710    );
2711}
2712
2713#[test]
2714fn test_parse_strength_directive_multiply() {
2715    assert_eq!(
2716        parse_strength_directive("!:strength *2"),
2717        Ok(("", StrengthModifier::Multiply(2)))
2718    );
2719    assert_eq!(
2720        parse_strength_directive("!:strength *10"),
2721        Ok(("", StrengthModifier::Multiply(10)))
2722    );
2723}
2724
2725#[test]
2726fn test_parse_strength_directive_divide() {
2727    assert_eq!(
2728        parse_strength_directive("!:strength /2"),
2729        Ok(("", StrengthModifier::Divide(2)))
2730    );
2731    assert_eq!(
2732        parse_strength_directive("!:strength /10"),
2733        Ok(("", StrengthModifier::Divide(10)))
2734    );
2735}
2736
2737#[test]
2738fn test_parse_strength_directive_set_explicit() {
2739    assert_eq!(
2740        parse_strength_directive("!:strength =50"),
2741        Ok(("", StrengthModifier::Set(50)))
2742    );
2743    assert_eq!(
2744        parse_strength_directive("!:strength =0"),
2745        Ok(("", StrengthModifier::Set(0)))
2746    );
2747    assert_eq!(
2748        parse_strength_directive("!:strength =100"),
2749        Ok(("", StrengthModifier::Set(100)))
2750    );
2751}
2752
2753#[test]
2754fn test_parse_strength_directive_set_bare() {
2755    // Bare number implies Set
2756    assert_eq!(
2757        parse_strength_directive("!:strength 50"),
2758        Ok(("", StrengthModifier::Set(50)))
2759    );
2760    assert_eq!(
2761        parse_strength_directive("!:strength 0"),
2762        Ok(("", StrengthModifier::Set(0)))
2763    );
2764    assert_eq!(
2765        parse_strength_directive("!:strength 100"),
2766        Ok(("", StrengthModifier::Set(100)))
2767    );
2768}
2769
2770#[test]
2771fn test_parse_strength_directive_with_whitespace() {
2772    assert_eq!(
2773        parse_strength_directive("  !:strength +10"),
2774        Ok(("", StrengthModifier::Add(10)))
2775    );
2776    assert_eq!(
2777        parse_strength_directive("\t!:strength -5"),
2778        Ok(("", StrengthModifier::Subtract(5)))
2779    );
2780    assert_eq!(
2781        parse_strength_directive("!:strength  *2"),
2782        Ok(("", StrengthModifier::Multiply(2)))
2783    );
2784    assert_eq!(
2785        parse_strength_directive("!:strength   50"),
2786        Ok(("", StrengthModifier::Set(50)))
2787    );
2788}
2789
2790#[test]
2791fn test_parse_strength_directive_with_remaining_input() {
2792    // Should leave remaining content after the directive
2793    assert_eq!(
2794        parse_strength_directive("!:strength +10 extra"),
2795        Ok((" extra", StrengthModifier::Add(10)))
2796    );
2797    assert_eq!(
2798        parse_strength_directive("!:strength 50\n"),
2799        Ok(("\n", StrengthModifier::Set(50)))
2800    );
2801}
2802
2803#[test]
2804fn test_parse_strength_directive_invalid() {
2805    // Should fail on invalid input
2806    assert!(parse_strength_directive("").is_err());
2807    assert!(parse_strength_directive("!:invalid").is_err());
2808    assert!(parse_strength_directive("strength +10").is_err());
2809    assert!(parse_strength_directive("0 byte 1").is_err());
2810}
2811
2812#[test]
2813fn test_is_strength_directive() {
2814    assert!(is_strength_directive("!:strength +10"));
2815    assert!(is_strength_directive("!:strength -5"));
2816    assert!(is_strength_directive("!:strength 50"));
2817    assert!(is_strength_directive("  !:strength +10"));
2818    assert!(is_strength_directive("\t!:strength *2"));
2819
2820    assert!(!is_strength_directive("0 byte 1"));
2821    assert!(!is_strength_directive("# comment"));
2822    assert!(!is_strength_directive(""));
2823    assert!(!is_strength_directive("!:mime application/pdf"));
2824}
2825
2826#[test]
2827fn test_parse_type_and_operator_quad_full_width_mask() {
2828    // Full u64 mask (0xffffffffffffffff) must parse successfully, not silently
2829    // fall back to standalone '&' leaving the mask as leftover input.
2830    let (remaining, (typ, op)) = parse_type_and_operator("uquad&0xffffffffffffffff").unwrap();
2831    assert_eq!(remaining, "");
2832    assert_eq!(
2833        typ,
2834        TypeKind::Quad {
2835            endian: Endianness::Native,
2836            signed: false,
2837        }
2838    );
2839    assert_eq!(op, Some(Operator::BitwiseAndMask(u64::MAX)));
2840}
2841
2842#[test]
2843fn test_parse_type_and_operator_quad_mask_various() {
2844    // Hex mask within i64 range
2845    let (remaining, (_, op)) = parse_type_and_operator("quad&0x7fffffffffffffff").unwrap();
2846    assert_eq!(remaining, "");
2847    assert_eq!(op, Some(Operator::BitwiseAndMask(i64::MAX as u64)));
2848
2849    // Decimal mask
2850    let (remaining, (_, op)) = parse_type_and_operator("uquad&255").unwrap();
2851    assert_eq!(remaining, "");
2852    assert_eq!(op, Some(Operator::BitwiseAndMask(255)));
2853
2854    // Standalone '&' (no digits following) still works
2855    let (remaining, (_, op)) = parse_type_and_operator("uquad& ").unwrap();
2856    assert_eq!(remaining, "");
2857    assert_eq!(op, Some(Operator::BitwiseAnd));
2858}
2859
2860#[test]
2861fn test_parse_type_and_operator_mask_overflow_fails() {
2862    // Decimal value exceeding u64::MAX must fail, not silently reinterpret
2863    let result = parse_type_and_operator("uquad&99999999999999999999");
2864    assert!(
2865        result.is_err(),
2866        "overflowing mask should produce a parse error"
2867    );
2868
2869    // Hex value exceeding u64 (17 hex digits) must fail
2870    let result = parse_type_and_operator("uquad&0x1ffffffffffffffff");
2871    assert!(
2872        result.is_err(),
2873        "overflowing hex mask should produce a parse error"
2874    );
2875}