Skip to main content

libmagic_rs/parser/
grammar.rs

1// Copyright (c) 2025-2026 the libmagic-rs contributors
2// SPDX-License-Identifier: Apache-2.0
3
4//! Grammar parsing for magic files using nom parser combinators
5//!
6//! This module implements the parsing logic for magic file syntax, converting
7//! text-based magic rules into the AST representation defined in ast.rs.
8
9use nom::{
10    IResult, Parser,
11    branch::alt,
12    bytes::complete::{tag, take_while},
13    character::complete::{char, digit1, hex_digit1, multispace0, none_of, one_of},
14    combinator::{map, opt, recognize},
15    error::Error as NomError,
16    multi::many0,
17    sequence::pair,
18};
19
20use crate::parser::ast::{MagicRule, OffsetSpec, Operator, StrengthModifier, TypeKind, Value};
21
22#[cfg(test)]
23use crate::parser::ast::Endianness;
24
25/// Parse a decimal number with overflow protection
26fn parse_decimal_number(input: &str) -> IResult<&str, i64> {
27    let (input, digits) = digit1(input)?;
28
29    // Check for potential overflow before parsing
30    if digits.len() > 19 {
31        // i64::MAX has 19 digits, so anything longer will definitely overflow
32        return Err(nom::Err::Error(nom::error::Error::new(
33            input,
34            nom::error::ErrorKind::MapRes,
35        )));
36    }
37
38    let number = digits.parse::<i64>().map_err(|_| {
39        nom::Err::Error(nom::error::Error::new(input, nom::error::ErrorKind::MapRes))
40    })?;
41    Ok((input, number))
42}
43
44/// Parse a decimal number as unsigned `u64` with overflow protection
45fn parse_unsigned_decimal_number(input: &str) -> IResult<&str, u64> {
46    let (input, digits) = digit1(input)?;
47
48    // u64::MAX (18446744073709551615) has 20 digits
49    if digits.len() > 20 {
50        return Err(nom::Err::Error(nom::error::Error::new(
51            input,
52            nom::error::ErrorKind::MapRes,
53        )));
54    }
55
56    let number = digits.parse::<u64>().map_err(|_| {
57        nom::Err::Error(nom::error::Error::new(input, nom::error::ErrorKind::MapRes))
58    })?;
59    Ok((input, number))
60}
61
62/// Parse a hexadecimal number (with 0x prefix) with overflow protection
63fn parse_hex_number(input: &str) -> IResult<&str, i64> {
64    let (input, _) = tag("0x")(input)?;
65    let (input, hex_str) = hex_digit1(input)?;
66
67    // Check for potential overflow - i64 can hold up to 16 hex digits (0x7FFFFFFFFFFFFFFF)
68    if hex_str.len() > 16 {
69        return Err(nom::Err::Error(nom::error::Error::new(
70            input,
71            nom::error::ErrorKind::MapRes,
72        )));
73    }
74
75    let number = i64::from_str_radix(hex_str, 16).map_err(|_| {
76        nom::Err::Error(nom::error::Error::new(input, nom::error::ErrorKind::MapRes))
77    })?;
78
79    Ok((input, number))
80}
81
82/// Parse a hexadecimal number (with 0x prefix) as unsigned `u64`
83fn parse_unsigned_hex_number(input: &str) -> IResult<&str, u64> {
84    let (input, _) = tag("0x")(input)?;
85    let (input, hex_str) = hex_digit1(input)?;
86
87    // u64 can hold up to 16 hex digits (0xFFFFFFFFFFFFFFFF)
88    if hex_str.len() > 16 {
89        return Err(nom::Err::Error(nom::error::Error::new(
90            input,
91            nom::error::ErrorKind::MapRes,
92        )));
93    }
94
95    let number = u64::from_str_radix(hex_str, 16).map_err(|_| {
96        nom::Err::Error(nom::error::Error::new(input, nom::error::ErrorKind::MapRes))
97    })?;
98
99    Ok((input, number))
100}
101
102/// Parse a non-negative number as unsigned `u64`
103///
104/// Supports both decimal and hexadecimal (0x prefix) formats.
105/// Does not handle a leading minus sign -- callers handle sign detection.
106fn parse_unsigned_number(input: &str) -> IResult<&str, u64> {
107    if input.starts_with("0x") {
108        parse_unsigned_hex_number(input)
109    } else {
110        parse_unsigned_decimal_number(input)
111    }
112}
113
114/// Parse a decimal or hexadecimal number
115///
116/// Supports both decimal (123, -456) and hexadecimal (0x1a2b, -0xFF) formats.
117///
118/// # Examples
119///
120/// ```
121/// use libmagic_rs::parser::grammar::parse_number;
122///
123/// assert_eq!(parse_number("123"), Ok(("", 123)));
124/// assert_eq!(parse_number("0x1a"), Ok(("", 26)));
125/// assert_eq!(parse_number("-42"), Ok(("", -42)));
126/// assert_eq!(parse_number("-0xFF"), Ok(("", -255)));
127/// ```
128///
129/// # Errors
130///
131/// Returns a nom parsing error if:
132/// - Input is empty or contains no valid digits
133/// - Hexadecimal number lacks proper "0x" prefix or contains invalid hex digits
134/// - Number cannot be parsed as a valid `i64` value
135/// - Input contains invalid characters for the detected number format
136pub fn parse_number(input: &str) -> IResult<&str, i64> {
137    let (input, sign) = opt(char('-')).parse(input)?;
138    let is_negative = sign.is_some();
139
140    // Check if input starts with "0x" - if so, it must be a valid hex number
141    let (input, number) = if input.starts_with("0x") {
142        parse_hex_number(input)?
143    } else {
144        parse_decimal_number(input)?
145    };
146
147    // Apply sign with overflow checking
148    let result = if is_negative {
149        number.checked_neg().ok_or_else(|| {
150            nom::Err::Error(nom::error::Error::new(input, nom::error::ErrorKind::MapRes))
151        })?
152    } else {
153        number
154    };
155
156    Ok((input, result))
157}
158
159/// Parse an offset specification for absolute offsets
160///
161/// Supports decimal and hexadecimal formats, both positive and negative.
162///
163/// # Examples
164///
165/// ```
166/// use libmagic_rs::parser::grammar::parse_offset;
167/// use libmagic_rs::parser::ast::OffsetSpec;
168///
169/// assert_eq!(parse_offset("0"), Ok(("", OffsetSpec::Absolute(0))));
170/// assert_eq!(parse_offset("123"), Ok(("", OffsetSpec::Absolute(123))));
171/// assert_eq!(parse_offset("0x10"), Ok(("", OffsetSpec::Absolute(16))));
172/// assert_eq!(parse_offset("-4"), Ok(("", OffsetSpec::Absolute(-4))));
173/// assert_eq!(parse_offset("-0xFF"), Ok(("", OffsetSpec::Absolute(-255))));
174/// ```
175///
176/// # Errors
177///
178/// Returns a nom parsing error if:
179/// - The input contains invalid number format (propagated from `parse_number`)
180/// - Input is empty or contains no parseable offset value
181/// - The offset value cannot be represented as a valid `i64`
182pub fn parse_offset(input: &str) -> IResult<&str, OffsetSpec> {
183    let (input, _) = multispace0(input)?;
184    let (input, offset_value) = parse_number(input)?;
185    let (input, _) = multispace0(input)?;
186
187    Ok((input, OffsetSpec::Absolute(offset_value)))
188}
189
190/// Parse comparison operators for magic rules
191///
192/// Supports both symbolic and text representations of operators:
193/// - `=` or `==` for equality
194/// - `!=` or `<>` for inequality
195/// - `<` for less-than
196/// - `>` for greater-than
197/// - `<=` for less-than-or-equal
198/// - `>=` for greater-than-or-equal
199/// - `&` for bitwise AND
200///
201/// # Examples
202///
203/// ```
204/// use libmagic_rs::parser::grammar::parse_operator;
205/// use libmagic_rs::parser::ast::Operator;
206///
207/// assert_eq!(parse_operator("="), Ok(("", Operator::Equal)));
208/// assert_eq!(parse_operator("=="), Ok(("", Operator::Equal)));
209/// assert_eq!(parse_operator("!="), Ok(("", Operator::NotEqual)));
210/// assert_eq!(parse_operator("<>"), Ok(("", Operator::NotEqual)));
211/// assert_eq!(parse_operator("<"), Ok(("", Operator::LessThan)));
212/// assert_eq!(parse_operator(">"), Ok(("", Operator::GreaterThan)));
213/// assert_eq!(parse_operator("<="), Ok(("", Operator::LessEqual)));
214/// assert_eq!(parse_operator(">="), Ok(("", Operator::GreaterEqual)));
215/// assert_eq!(parse_operator("&"), Ok(("", Operator::BitwiseAnd)));
216/// ```
217///
218/// # Errors
219///
220/// Returns a nom parsing error if:
221/// - Input does not start with a recognized operator symbol
222/// - Input is empty or contains no valid operator
223/// - Operator syntax is incomplete (e.g., just `!` without `=`)
224pub fn parse_operator(input: &str) -> IResult<&str, Operator> {
225    let (input, _) = multispace0(input)?;
226
227    // Try to parse each operator, starting with longer ones first
228    if let Ok((remaining, _)) = tag::<&str, &str, nom::error::Error<&str>>("==")(input) {
229        // Check that we don't have another '=' following (to reject "===")
230        if remaining.starts_with('=') {
231            return Err(nom::Err::Error(nom::error::Error::new(
232                input,
233                nom::error::ErrorKind::Tag,
234            )));
235        }
236        let (remaining, _) = multispace0(remaining)?;
237        return Ok((remaining, Operator::Equal));
238    }
239
240    if let Ok((remaining, _)) = tag::<&str, &str, nom::error::Error<&str>>("!=")(input) {
241        let (remaining, _) = multispace0(remaining)?;
242        return Ok((remaining, Operator::NotEqual));
243    }
244
245    if let Ok((remaining, _)) = tag::<&str, &str, nom::error::Error<&str>>("<>")(input) {
246        let (remaining, _) = multispace0(remaining)?;
247        return Ok((remaining, Operator::NotEqual));
248    }
249
250    if let Ok((remaining, _)) = tag::<&str, &str, nom::error::Error<&str>>("<=")(input) {
251        let (remaining, _) = multispace0(remaining)?;
252        return Ok((remaining, Operator::LessEqual));
253    }
254
255    if let Ok((remaining, _)) = tag::<&str, &str, nom::error::Error<&str>>(">=")(input) {
256        let (remaining, _) = multispace0(remaining)?;
257        return Ok((remaining, Operator::GreaterEqual));
258    }
259
260    if let Ok((remaining, _)) = tag::<&str, &str, nom::error::Error<&str>>("=")(input) {
261        // Check that we don't have another '=' following (to reject "==")
262        if remaining.starts_with('=') {
263            return Err(nom::Err::Error(nom::error::Error::new(
264                input,
265                nom::error::ErrorKind::Tag,
266            )));
267        }
268        let (remaining, _) = multispace0(remaining)?;
269        return Ok((remaining, Operator::Equal));
270    }
271
272    if let Ok((remaining, _)) = tag::<&str, &str, nom::error::Error<&str>>("&")(input) {
273        // Check that we don't have another '&' following (to reject "&&")
274        if remaining.starts_with('&') {
275            return Err(nom::Err::Error(nom::error::Error::new(
276                input,
277                nom::error::ErrorKind::Tag,
278            )));
279        }
280        let (remaining, _) = multispace0(remaining)?;
281        return Ok((remaining, Operator::BitwiseAnd));
282    }
283
284    if let Ok((remaining, _)) = tag::<&str, &str, nom::error::Error<&str>>("<")(input) {
285        let (remaining, _) = multispace0(remaining)?;
286        return Ok((remaining, Operator::LessThan));
287    }
288
289    if let Ok((remaining, _)) = tag::<&str, &str, nom::error::Error<&str>>(">")(input) {
290        let (remaining, _) = multispace0(remaining)?;
291        return Ok((remaining, Operator::GreaterThan));
292    }
293
294    // If no operator matches, return an error
295    Err(nom::Err::Error(nom::error::Error::new(
296        input,
297        nom::error::ErrorKind::Tag,
298    )))
299}
300
301/// Parse a single hex byte with \x prefix
302fn parse_hex_byte_with_prefix(input: &str) -> IResult<&str, u8> {
303    let (input, _) = tag("\\x")(input)?;
304    let (input, hex_str) = recognize(pair(
305        one_of("0123456789abcdefABCDEF"),
306        one_of("0123456789abcdefABCDEF"),
307    ))
308    .parse(input)?;
309    let byte_val = u8::from_str_radix(hex_str, 16)
310        .map_err(|_| nom::Err::Error(NomError::new(input, nom::error::ErrorKind::MapRes)))?;
311    Ok((input, byte_val))
312}
313
314/// Parse a hex byte sequence starting with \x prefix
315fn parse_hex_bytes_with_prefix(input: &str) -> IResult<&str, Vec<u8>> {
316    if input.starts_with("\\x") {
317        many0(parse_hex_byte_with_prefix).parse(input)
318    } else {
319        Err(nom::Err::Error(NomError::new(
320            input,
321            nom::error::ErrorKind::Tag,
322        )))
323    }
324}
325
326/// Parse a mixed hex and ASCII sequence (like \x7fELF)
327fn parse_mixed_hex_ascii(input: &str) -> IResult<&str, Vec<u8>> {
328    // Must start with \ to be considered an escape sequence
329    if !input.starts_with('\\') {
330        return Err(nom::Err::Error(NomError::new(
331            input,
332            nom::error::ErrorKind::Tag,
333        )));
334    }
335
336    let mut bytes = Vec::new();
337    let mut remaining = input;
338
339    while !remaining.is_empty() {
340        // Try to parse escape sequences first (hex, octal, etc.)
341        if let Ok((new_remaining, escaped_char)) = parse_escape_sequence(remaining) {
342            bytes.push(escaped_char as u8);
343            remaining = new_remaining;
344        } else if let Ok((new_remaining, hex_byte)) = parse_hex_byte_with_prefix(remaining) {
345            bytes.push(hex_byte);
346            remaining = new_remaining;
347        } else if let Ok((new_remaining, ascii_char)) =
348            none_of::<&str, &str, NomError<&str>>(" \t\n\r")(remaining)
349        {
350            // Parse regular ASCII character (not whitespace)
351            bytes.push(ascii_char as u8);
352            remaining = new_remaining;
353        } else {
354            // Stop if we can't parse anything more
355            break;
356        }
357    }
358
359    if bytes.is_empty() {
360        Err(nom::Err::Error(NomError::new(
361            input,
362            nom::error::ErrorKind::Tag,
363        )))
364    } else {
365        Ok((remaining, bytes))
366    }
367}
368
369/// Parse a hex byte sequence without prefix (only if it looks like pure hex bytes)
370fn parse_hex_bytes_no_prefix(input: &str) -> IResult<&str, Vec<u8>> {
371    // Only parse as hex bytes if:
372    // 1. Input has even number of hex digits (pairs)
373    // 2. All characters are hex digits
374    // 3. Doesn't start with 0x (that's a number)
375    // 4. Contains at least one non-decimal digit (a-f, A-F)
376
377    if input.starts_with("0x") || input.starts_with('-') {
378        return Err(nom::Err::Error(NomError::new(
379            input,
380            nom::error::ErrorKind::Tag,
381        )));
382    }
383
384    let hex_chars: String = input.chars().take_while(char::is_ascii_hexdigit).collect();
385
386    if hex_chars.is_empty() || hex_chars.len() % 2 != 0 {
387        return Err(nom::Err::Error(NomError::new(
388            input,
389            nom::error::ErrorKind::Tag,
390        )));
391    }
392
393    // Check if it contains non-decimal hex digits (a-f, A-F)
394    let has_hex_letters = hex_chars
395        .chars()
396        .any(|c| matches!(c, 'a'..='f' | 'A'..='F'));
397    if !has_hex_letters {
398        return Err(nom::Err::Error(NomError::new(
399            input,
400            nom::error::ErrorKind::Tag,
401        )));
402    }
403
404    // Parse pairs of hex digits
405    let mut bytes = Vec::with_capacity(hex_chars.len() / 2);
406    let mut chars = hex_chars.chars();
407    while let (Some(c1), Some(c2)) = (chars.next(), chars.next()) {
408        // Avoid format! allocation by parsing digits directly
409        let digit1 = c1
410            .to_digit(16)
411            .ok_or_else(|| nom::Err::Error(NomError::new(input, nom::error::ErrorKind::MapRes)))?;
412        let digit2 = c2
413            .to_digit(16)
414            .ok_or_else(|| nom::Err::Error(NomError::new(input, nom::error::ErrorKind::MapRes)))?;
415        let byte_val = u8::try_from((digit1 << 4) | digit2)
416            .map_err(|_| nom::Err::Error(NomError::new(input, nom::error::ErrorKind::MapRes)))?;
417        bytes.push(byte_val);
418    }
419
420    let remaining = &input[hex_chars.len()..];
421    Ok((remaining, bytes))
422}
423
424/// Parse a hex byte sequence (e.g., "\\x7f\\x45\\x4c\\x46", "7f454c46", or "\\x7fELF")
425fn parse_hex_bytes(input: &str) -> IResult<&str, Vec<u8>> {
426    alt((
427        parse_mixed_hex_ascii,
428        parse_hex_bytes_with_prefix,
429        parse_hex_bytes_no_prefix,
430    ))
431    .parse(input)
432}
433
434/// Parse escape sequences in strings
435fn parse_escape_sequence(input: &str) -> IResult<&str, char> {
436    let (input, _) = char('\\')(input)?;
437
438    // Try to parse octal escape sequence first (\377, \123, etc.)
439    if let Ok((remaining, octal_str)) = recognize(pair(
440        one_of::<&str, &str, NomError<&str>>("0123"),
441        pair(
442            one_of::<&str, &str, NomError<&str>>("01234567"),
443            one_of::<&str, &str, NomError<&str>>("01234567"),
444        ),
445    ))
446    .parse(input)
447    {
448        if let Ok(octal_value) = u8::from_str_radix(octal_str, 8) {
449            return Ok((remaining, octal_value as char));
450        }
451    }
452
453    // Parse standard escape sequences
454    let (input, escaped_char) = one_of("nrt\\\"'0")(input)?;
455
456    let result_char = match escaped_char {
457        'n' => '\n',
458        'r' => '\r',
459        't' => '\t',
460        '\\' => '\\',
461        '"' => '"',
462        '\'' => '\'',
463        '0' => '\0',
464        _ => unreachable!("one_of constrains input to known escape characters"),
465    };
466
467    Ok((input, result_char))
468}
469
470/// Parse a quoted string with escape sequences
471fn parse_quoted_string(input: &str) -> IResult<&str, String> {
472    let (input, _) = multispace0(input)?;
473    let (input, _) = char('"')(input)?;
474
475    let mut result = String::new();
476    let mut remaining = input;
477
478    loop {
479        // Try to parse an escape sequence first
480        if let Ok((new_remaining, escaped_char)) = parse_escape_sequence(remaining) {
481            result.push(escaped_char);
482            remaining = new_remaining;
483            continue;
484        }
485
486        // If no escape sequence, try to parse a regular character (not quote or backslash)
487        if let Ok((new_remaining, regular_char)) =
488            none_of::<&str, &str, NomError<&str>>("\"\\")(remaining)
489        {
490            result.push(regular_char);
491            remaining = new_remaining;
492            continue;
493        }
494
495        // If neither worked, we should be at the closing quote
496        break;
497    }
498
499    let (remaining, _) = char('"')(remaining)?;
500    let (remaining, _) = multispace0(remaining)?;
501
502    Ok((remaining, result))
503}
504
505/// Parse a numeric value (integer)
506///
507/// Non-negative literals are parsed directly as `u64` so the full unsigned
508/// 64-bit range is representable (required for `uquad` values above `i64::MAX`).
509/// Negative literals go through the signed `i64` path.
510fn parse_numeric_value(input: &str) -> IResult<&str, Value> {
511    let (input, _) = multispace0(input)?;
512
513    let (input, value) = if input.starts_with('-') {
514        // Negative: parse as i64
515        let (input, number) = parse_number(input)?;
516        (input, Value::Int(number))
517    } else {
518        // Non-negative: parse as u64 to support full unsigned 64-bit range
519        let (input, number) = parse_unsigned_number(input)?;
520        (input, Value::Uint(number))
521    };
522
523    let (input, _) = multispace0(input)?;
524    Ok((input, value))
525}
526
527/// Parse string and numeric literals for magic rule values
528///
529/// Supports:
530/// - Quoted strings with escape sequences: "Hello\nWorld", "ELF\0"
531/// - Numeric literals (decimal): 123, -456
532/// - Numeric literals (hexadecimal): 0x1a2b, -0xFF
533/// - Hex byte sequences: \\x7f\\x45\\x4c\\x46 or 7f454c46
534///
535/// # Examples
536///
537/// ```
538/// use libmagic_rs::parser::grammar::parse_value;
539/// use libmagic_rs::parser::ast::Value;
540///
541/// // String values
542/// assert_eq!(parse_value("\"Hello\""), Ok(("", Value::String("Hello".to_string()))));
543/// assert_eq!(parse_value("\"Line1\\nLine2\""), Ok(("", Value::String("Line1\nLine2".to_string()))));
544///
545/// // Numeric values
546/// assert_eq!(parse_value("123"), Ok(("", Value::Uint(123))));
547/// assert_eq!(parse_value("-456"), Ok(("", Value::Int(-456))));
548/// assert_eq!(parse_value("0x1a"), Ok(("", Value::Uint(26))));
549/// assert_eq!(parse_value("-0xFF"), Ok(("", Value::Int(-255))));
550///
551/// // Hex byte sequences
552/// assert_eq!(parse_value("\\x7f\\x45"), Ok(("", Value::Bytes(vec![0x7f, 0x45]))));
553/// ```
554///
555/// # Errors
556///
557/// Returns a nom parsing error if:
558/// - Input is empty or contains no valid value
559/// - Quoted string is not properly terminated
560/// - Numeric value cannot be parsed as a valid integer
561/// - Hex byte sequence contains invalid hex digits
562/// - Input contains invalid characters for the detected value format
563pub fn parse_value(input: &str) -> IResult<&str, Value> {
564    let (input, _) = multispace0(input)?;
565
566    // Handle empty input case - should fail for magic rules
567    if input.is_empty() {
568        return Err(nom::Err::Error(NomError::new(
569            input,
570            nom::error::ErrorKind::Tag,
571        )));
572    }
573
574    // Try to parse different value types in order of specificity
575    let (input, value) = alt((
576        // Try quoted string first
577        map(parse_quoted_string, Value::String),
578        // Try hex byte sequence before numeric (to catch patterns like "7f", "ab", "\\x7fELF", etc.)
579        map(parse_hex_bytes, Value::Bytes),
580        // Try numeric value last (for pure numbers like 0x123, 1, etc.)
581        parse_numeric_value,
582    ))
583    .parse(input)?;
584
585    Ok((input, value))
586}
587
588#[cfg(test)]
589mod tests {
590    use super::*;
591
592    /// Helper function to test parsing with various whitespace patterns
593    #[allow(dead_code)] // TODO: Use this helper in future whitespace tests
594    fn test_with_whitespace_variants<T, F>(input: &str, expected: &T, parser: F)
595    where
596        T: Clone + PartialEq + std::fmt::Debug,
597        F: Fn(&str) -> IResult<&str, T>,
598    {
599        // Test with various whitespace patterns - pre-allocate Vec with known capacity
600        let mut whitespace_variants = Vec::with_capacity(9);
601        whitespace_variants.extend([
602            format!(" {input}"),    // Leading space
603            format!("  {input}"),   // Leading spaces
604            format!("\t{input}"),   // Leading tab
605            format!("{input} "),    // Trailing space
606            format!("{input}  "),   // Trailing spaces
607            format!("{input}\t"),   // Trailing tab
608            format!(" {input} "),   // Both leading and trailing space
609            format!("  {input}  "), // Both leading and trailing spaces
610            format!("\t{input}\t"), // Both leading and trailing tabs
611        ]);
612
613        for variant in whitespace_variants {
614            assert_eq!(
615                parser(&variant),
616                Ok(("", expected.clone())),
617                "Failed to parse with whitespace: '{variant}'"
618            );
619        }
620    }
621
622    /// Helper function to test number parsing with remaining input
623    fn test_number_with_remaining_input() {
624        // Pre-allocate with known capacity for better performance
625        let test_cases = [
626            ("123abc", 123, "abc"),
627            ("0xFF rest", 255, " rest"),
628            ("-42 more", -42, " more"),
629            ("0x10,next", 16, ",next"),
630        ];
631
632        for (input, expected_num, expected_remaining) in test_cases {
633            assert_eq!(
634                parse_number(input),
635                Ok((expected_remaining, expected_num)),
636                "Failed to parse number with remaining input: '{input}'"
637            );
638        }
639    }
640
641    #[test]
642    fn test_parse_decimal_number() {
643        assert_eq!(parse_decimal_number("123"), Ok(("", 123)));
644        assert_eq!(parse_decimal_number("0"), Ok(("", 0)));
645        assert_eq!(parse_decimal_number("999"), Ok(("", 999)));
646
647        // Should fail on non-digits
648        assert!(parse_decimal_number("abc").is_err());
649        assert!(parse_decimal_number("").is_err());
650    }
651
652    #[test]
653    fn test_parse_hex_number() {
654        assert_eq!(parse_hex_number("0x0"), Ok(("", 0)));
655        assert_eq!(parse_hex_number("0x10"), Ok(("", 16)));
656        assert_eq!(parse_hex_number("0xFF"), Ok(("", 255)));
657        assert_eq!(parse_hex_number("0xabc"), Ok(("", 2748)));
658        assert_eq!(parse_hex_number("0xABC"), Ok(("", 2748)));
659
660        // Should fail without 0x prefix
661        assert!(parse_hex_number("FF").is_err());
662        assert!(parse_hex_number("10").is_err());
663
664        // Should fail on invalid hex digits
665        assert!(parse_hex_number("0xGG").is_err());
666    }
667
668    #[test]
669    fn test_parse_number_positive() {
670        // Decimal numbers
671        assert_eq!(parse_number("0"), Ok(("", 0)));
672        assert_eq!(parse_number("123"), Ok(("", 123)));
673        assert_eq!(parse_number("999"), Ok(("", 999)));
674
675        // Hexadecimal numbers
676        assert_eq!(parse_number("0x0"), Ok(("", 0)));
677        assert_eq!(parse_number("0x10"), Ok(("", 16)));
678        assert_eq!(parse_number("0xFF"), Ok(("", 255)));
679        assert_eq!(parse_number("0xabc"), Ok(("", 2748)));
680    }
681
682    #[test]
683    fn test_parse_number_negative() {
684        // Negative decimal numbers
685        assert_eq!(parse_number("-1"), Ok(("", -1)));
686        assert_eq!(parse_number("-123"), Ok(("", -123)));
687        assert_eq!(parse_number("-999"), Ok(("", -999)));
688
689        // Negative hexadecimal numbers
690        assert_eq!(parse_number("-0x1"), Ok(("", -1)));
691        assert_eq!(parse_number("-0x10"), Ok(("", -16)));
692        assert_eq!(parse_number("-0xFF"), Ok(("", -255)));
693        assert_eq!(parse_number("-0xabc"), Ok(("", -2748)));
694    }
695
696    #[test]
697    fn test_parse_number_edge_cases() {
698        // Zero with different formats
699        assert_eq!(parse_number("0"), Ok(("", 0)));
700        assert_eq!(parse_number("-0"), Ok(("", 0)));
701        assert_eq!(parse_number("0x0"), Ok(("", 0)));
702        assert_eq!(parse_number("-0x0"), Ok(("", 0)));
703
704        // Large numbers
705        assert_eq!(parse_number("2147483647"), Ok(("", 2_147_483_647))); // i32::MAX
706        assert_eq!(parse_number("-2147483648"), Ok(("", -2_147_483_648))); // i32::MIN
707        assert_eq!(parse_number("0x7FFFFFFF"), Ok(("", 2_147_483_647))); // i32::MAX in hex
708
709        // Should fail on invalid input
710        assert!(parse_number("").is_err());
711        assert!(parse_number("abc").is_err());
712        assert!(parse_number("0xGG").is_err());
713        assert!(parse_number("--123").is_err());
714    }
715
716    #[test]
717    fn test_parse_number_with_remaining_input() {
718        // Use helper function to reduce code duplication
719        test_number_with_remaining_input();
720    }
721
722    #[test]
723    fn test_parse_offset_absolute_positive() {
724        assert_eq!(parse_offset("0"), Ok(("", OffsetSpec::Absolute(0))));
725        assert_eq!(parse_offset("123"), Ok(("", OffsetSpec::Absolute(123))));
726        assert_eq!(parse_offset("999"), Ok(("", OffsetSpec::Absolute(999))));
727
728        // Hexadecimal offsets
729        assert_eq!(parse_offset("0x0"), Ok(("", OffsetSpec::Absolute(0))));
730        assert_eq!(parse_offset("0x10"), Ok(("", OffsetSpec::Absolute(16))));
731        assert_eq!(parse_offset("0xFF"), Ok(("", OffsetSpec::Absolute(255))));
732        assert_eq!(parse_offset("0xabc"), Ok(("", OffsetSpec::Absolute(2748))));
733    }
734
735    #[test]
736    fn test_parse_offset_absolute_negative() {
737        assert_eq!(parse_offset("-1"), Ok(("", OffsetSpec::Absolute(-1))));
738        assert_eq!(parse_offset("-123"), Ok(("", OffsetSpec::Absolute(-123))));
739        assert_eq!(parse_offset("-999"), Ok(("", OffsetSpec::Absolute(-999))));
740
741        // Negative hexadecimal offsets
742        assert_eq!(parse_offset("-0x1"), Ok(("", OffsetSpec::Absolute(-1))));
743        assert_eq!(parse_offset("-0x10"), Ok(("", OffsetSpec::Absolute(-16))));
744        assert_eq!(parse_offset("-0xFF"), Ok(("", OffsetSpec::Absolute(-255))));
745        assert_eq!(
746            parse_offset("-0xabc"),
747            Ok(("", OffsetSpec::Absolute(-2748)))
748        );
749    }
750
751    #[test]
752    fn test_parse_offset_with_whitespace() {
753        // Leading whitespace
754        assert_eq!(parse_offset(" 123"), Ok(("", OffsetSpec::Absolute(123))));
755        assert_eq!(parse_offset("  0x10"), Ok(("", OffsetSpec::Absolute(16))));
756        assert_eq!(parse_offset("\t-42"), Ok(("", OffsetSpec::Absolute(-42))));
757
758        // Trailing whitespace
759        assert_eq!(parse_offset("123 "), Ok(("", OffsetSpec::Absolute(123))));
760        assert_eq!(parse_offset("0x10  "), Ok(("", OffsetSpec::Absolute(16))));
761        assert_eq!(parse_offset("-42\t"), Ok(("", OffsetSpec::Absolute(-42))));
762
763        // Both leading and trailing whitespace
764        assert_eq!(parse_offset(" 123 "), Ok(("", OffsetSpec::Absolute(123))));
765        assert_eq!(parse_offset("  0x10  "), Ok(("", OffsetSpec::Absolute(16))));
766        assert_eq!(parse_offset("\t-42\t"), Ok(("", OffsetSpec::Absolute(-42))));
767    }
768
769    #[test]
770    fn test_parse_offset_with_remaining_input() {
771        // Should parse offset and leave remaining input
772        assert_eq!(
773            parse_offset("123 byte"),
774            Ok(("byte", OffsetSpec::Absolute(123)))
775        );
776        assert_eq!(parse_offset("0xFF ="), Ok(("=", OffsetSpec::Absolute(255))));
777        assert_eq!(
778            parse_offset("-42,next"),
779            Ok((",next", OffsetSpec::Absolute(-42)))
780        );
781        assert_eq!(
782            parse_offset("0x10\tlong"),
783            Ok(("long", OffsetSpec::Absolute(16)))
784        );
785    }
786
787    #[test]
788    fn test_parse_offset_edge_cases() {
789        // Zero with different formats
790        assert_eq!(parse_offset("0"), Ok(("", OffsetSpec::Absolute(0))));
791        assert_eq!(parse_offset("-0"), Ok(("", OffsetSpec::Absolute(0))));
792        assert_eq!(parse_offset("0x0"), Ok(("", OffsetSpec::Absolute(0))));
793        assert_eq!(parse_offset("-0x0"), Ok(("", OffsetSpec::Absolute(0))));
794
795        // Large offsets
796        assert_eq!(
797            parse_offset("2147483647"),
798            Ok(("", OffsetSpec::Absolute(2_147_483_647)))
799        );
800        assert_eq!(
801            parse_offset("-2147483648"),
802            Ok(("", OffsetSpec::Absolute(-2_147_483_648)))
803        );
804        assert_eq!(
805            parse_offset("0x7FFFFFFF"),
806            Ok(("", OffsetSpec::Absolute(2_147_483_647)))
807        );
808
809        // Should fail on invalid input
810        assert!(parse_offset("").is_err());
811        assert!(parse_offset("abc").is_err());
812        assert!(parse_offset("0xGG").is_err());
813        assert!(parse_offset("--123").is_err());
814    }
815
816    #[test]
817    fn test_parse_offset_common_magic_file_values() {
818        // Common offsets found in magic files
819        assert_eq!(parse_offset("0"), Ok(("", OffsetSpec::Absolute(0)))); // File start
820        assert_eq!(parse_offset("4"), Ok(("", OffsetSpec::Absolute(4)))); // After magic number
821        assert_eq!(parse_offset("16"), Ok(("", OffsetSpec::Absolute(16)))); // Common header offset
822        assert_eq!(parse_offset("0x10"), Ok(("", OffsetSpec::Absolute(16)))); // Same as above in hex
823        assert_eq!(parse_offset("512"), Ok(("", OffsetSpec::Absolute(512)))); // Sector boundary
824        assert_eq!(parse_offset("0x200"), Ok(("", OffsetSpec::Absolute(512)))); // Same in hex
825
826        // Negative offsets (from end of file)
827        assert_eq!(parse_offset("-4"), Ok(("", OffsetSpec::Absolute(-4)))); // 4 bytes from end
828        assert_eq!(parse_offset("-16"), Ok(("", OffsetSpec::Absolute(-16)))); // 16 bytes from end
829        assert_eq!(parse_offset("-0x10"), Ok(("", OffsetSpec::Absolute(-16)))); // Same in hex
830    }
831
832    #[test]
833    fn test_parse_offset_boundary_values() {
834        // Test boundary values that might cause issues
835        assert_eq!(parse_offset("1"), Ok(("", OffsetSpec::Absolute(1))));
836        assert_eq!(parse_offset("-1"), Ok(("", OffsetSpec::Absolute(-1))));
837
838        // Powers of 2 (common in binary formats)
839        assert_eq!(parse_offset("256"), Ok(("", OffsetSpec::Absolute(256))));
840        assert_eq!(parse_offset("0x100"), Ok(("", OffsetSpec::Absolute(256))));
841        assert_eq!(parse_offset("1024"), Ok(("", OffsetSpec::Absolute(1024))));
842        assert_eq!(parse_offset("0x400"), Ok(("", OffsetSpec::Absolute(1024))));
843
844        // Large but reasonable file offsets
845        assert_eq!(
846            parse_offset("1048576"),
847            Ok(("", OffsetSpec::Absolute(1_048_576)))
848        ); // 1MB
849        assert_eq!(
850            parse_offset("0x100000"),
851            Ok(("", OffsetSpec::Absolute(1_048_576)))
852        );
853    }
854
855    // Operator parsing tests
856    #[test]
857    fn test_parse_operator_equality() {
858        // Single equals sign
859        assert_eq!(parse_operator("="), Ok(("", Operator::Equal)));
860
861        // Double equals sign
862        assert_eq!(parse_operator("=="), Ok(("", Operator::Equal)));
863
864        // With whitespace
865        assert_eq!(parse_operator(" = "), Ok(("", Operator::Equal)));
866        assert_eq!(parse_operator("  ==  "), Ok(("", Operator::Equal)));
867        assert_eq!(parse_operator("\t=\t"), Ok(("", Operator::Equal)));
868    }
869
870    #[test]
871    fn test_parse_operator_inequality() {
872        // Not equals
873        assert_eq!(parse_operator("!="), Ok(("", Operator::NotEqual)));
874
875        // Alternative not equals syntax
876        assert_eq!(parse_operator("<>"), Ok(("", Operator::NotEqual)));
877
878        // With whitespace
879        assert_eq!(parse_operator(" != "), Ok(("", Operator::NotEqual)));
880        assert_eq!(parse_operator("  <>  "), Ok(("", Operator::NotEqual)));
881        assert_eq!(parse_operator("\t!=\t"), Ok(("", Operator::NotEqual)));
882    }
883
884    #[test]
885    fn test_parse_operator_bitwise_and() {
886        // Bitwise AND
887        assert_eq!(parse_operator("&"), Ok(("", Operator::BitwiseAnd)));
888
889        // With whitespace
890        assert_eq!(parse_operator(" & "), Ok(("", Operator::BitwiseAnd)));
891        assert_eq!(parse_operator("  &  "), Ok(("", Operator::BitwiseAnd)));
892        assert_eq!(parse_operator("\t&\t"), Ok(("", Operator::BitwiseAnd)));
893    }
894
895    #[test]
896    fn test_parse_operator_with_remaining_input() {
897        // Should parse operator and leave remaining input
898        assert_eq!(parse_operator("= 123"), Ok(("123", Operator::Equal)));
899        assert_eq!(
900            parse_operator("!= value"),
901            Ok(("value", Operator::NotEqual))
902        );
903        assert_eq!(parse_operator("& 0xFF"), Ok(("0xFF", Operator::BitwiseAnd)));
904        assert_eq!(
905            parse_operator("== \"string\""),
906            Ok(("\"string\"", Operator::Equal))
907        );
908        assert_eq!(parse_operator("<> test"), Ok(("test", Operator::NotEqual)));
909    }
910
911    #[test]
912    fn test_parse_operator_precedence() {
913        // Test that longer operators are matched first
914        // This ensures "==" is parsed as Equal, not "=" followed by "="
915        assert_eq!(parse_operator("=="), Ok(("", Operator::Equal)));
916        assert_eq!(parse_operator("== extra"), Ok(("extra", Operator::Equal)));
917
918        // Test that "!=" is parsed correctly, not as "!" followed by "="
919        assert_eq!(parse_operator("!="), Ok(("", Operator::NotEqual)));
920        assert_eq!(
921            parse_operator("!= extra"),
922            Ok(("extra", Operator::NotEqual))
923        );
924
925        // Test that "<>" is parsed correctly, not as "<" followed by ">"
926        assert_eq!(parse_operator("<>"), Ok(("", Operator::NotEqual)));
927        assert_eq!(
928            parse_operator("<> extra"),
929            Ok(("extra", Operator::NotEqual))
930        );
931
932        // Test that "<=" is parsed as LessEqual, not "<" followed by "="
933        assert_eq!(parse_operator("<="), Ok(("", Operator::LessEqual)));
934        assert_eq!(
935            parse_operator("<= extra"),
936            Ok(("extra", Operator::LessEqual))
937        );
938
939        // Test that ">=" is parsed as GreaterEqual, not ">" followed by "="
940        assert_eq!(parse_operator(">="), Ok(("", Operator::GreaterEqual)));
941        assert_eq!(
942            parse_operator(">= extra"),
943            Ok(("extra", Operator::GreaterEqual))
944        );
945    }
946
947    #[test]
948    fn test_parse_operator_invalid_input() {
949        // Should fail on invalid operators
950        assert!(parse_operator("").is_err());
951        assert!(parse_operator("abc").is_err());
952        assert!(parse_operator("123").is_err());
953        assert!(parse_operator("!").is_err());
954        assert!(parse_operator("===").is_err()); // Too many equals
955        assert!(parse_operator("&&").is_err()); // Double ampersand not supported
956    }
957
958    #[test]
959    fn test_parse_operator_edge_cases() {
960        // Test operators at start of various contexts - multispace0 consumes all whitespace
961        assert_eq!(parse_operator("=\n"), Ok(("", Operator::Equal)));
962        assert_eq!(parse_operator("!=\r\n"), Ok(("", Operator::NotEqual)));
963        assert_eq!(parse_operator("&\t\t"), Ok(("", Operator::BitwiseAnd)));
964
965        // Test with mixed whitespace
966        assert_eq!(parse_operator(" \t = \t "), Ok(("", Operator::Equal)));
967        assert_eq!(parse_operator("\t != \t"), Ok(("", Operator::NotEqual)));
968        assert_eq!(parse_operator(" \t& \t "), Ok(("", Operator::BitwiseAnd)));
969    }
970
971    #[test]
972    fn test_parse_operator_common_magic_file_patterns() {
973        // Test patterns commonly found in magic files
974        assert_eq!(
975            parse_operator("= 0x7f454c46"),
976            Ok(("0x7f454c46", Operator::Equal))
977        );
978        assert_eq!(parse_operator("!= 0"), Ok(("0", Operator::NotEqual)));
979        assert_eq!(
980            parse_operator("& 0xFF00"),
981            Ok(("0xFF00", Operator::BitwiseAnd))
982        );
983        assert_eq!(
984            parse_operator("== \"ELF\""),
985            Ok(("\"ELF\"", Operator::Equal))
986        );
987        assert_eq!(parse_operator("<> \"\""), Ok(("\"\"", Operator::NotEqual)));
988
989        // Test with various spacing patterns found in real magic files
990        assert_eq!(
991            parse_operator("=\t0x504b0304"),
992            Ok(("0x504b0304", Operator::Equal))
993        );
994        assert_eq!(parse_operator("!=  0"), Ok(("0", Operator::NotEqual)));
995        assert_eq!(
996            parse_operator("&   0xFFFF"),
997            Ok(("0xFFFF", Operator::BitwiseAnd))
998        );
999    }
1000
1001    #[test]
1002    fn test_parse_operator_all_variants() {
1003        // Ensure all operator variants are tested
1004        let test_cases = [
1005            ("=", Operator::Equal),
1006            ("==", Operator::Equal),
1007            ("!=", Operator::NotEqual),
1008            ("<>", Operator::NotEqual),
1009            ("<", Operator::LessThan),
1010            (">", Operator::GreaterThan),
1011            ("<=", Operator::LessEqual),
1012            (">=", Operator::GreaterEqual),
1013            ("&", Operator::BitwiseAnd),
1014        ];
1015
1016        for (input, expected) in test_cases {
1017            assert_eq!(
1018                parse_operator(input),
1019                Ok(("", expected)),
1020                "Failed to parse operator: '{input}'"
1021            );
1022        }
1023    }
1024
1025    #[test]
1026    fn test_parse_operator_less_than() {
1027        // Basic less-than
1028        assert_eq!(parse_operator("<"), Ok(("", Operator::LessThan)));
1029
1030        // With whitespace
1031        assert_eq!(parse_operator(" < "), Ok(("", Operator::LessThan)));
1032        assert_eq!(parse_operator("  <  "), Ok(("", Operator::LessThan)));
1033        assert_eq!(parse_operator("\t<\t"), Ok(("", Operator::LessThan)));
1034
1035        // With remaining input
1036        assert_eq!(parse_operator("< 42"), Ok(("42", Operator::LessThan)));
1037    }
1038
1039    #[test]
1040    fn test_parse_operator_greater_than() {
1041        // Basic greater-than
1042        assert_eq!(parse_operator(">"), Ok(("", Operator::GreaterThan)));
1043
1044        // With whitespace
1045        assert_eq!(parse_operator(" > "), Ok(("", Operator::GreaterThan)));
1046        assert_eq!(parse_operator("  >  "), Ok(("", Operator::GreaterThan)));
1047        assert_eq!(parse_operator("\t>\t"), Ok(("", Operator::GreaterThan)));
1048
1049        // With remaining input
1050        assert_eq!(parse_operator("> 42"), Ok(("42", Operator::GreaterThan)));
1051    }
1052
1053    #[test]
1054    fn test_parse_operator_less_equal() {
1055        // Basic less-or-equal
1056        assert_eq!(parse_operator("<="), Ok(("", Operator::LessEqual)));
1057
1058        // With whitespace
1059        assert_eq!(parse_operator(" <= "), Ok(("", Operator::LessEqual)));
1060        assert_eq!(parse_operator("  <=  "), Ok(("", Operator::LessEqual)));
1061        assert_eq!(parse_operator("\t<=\t"), Ok(("", Operator::LessEqual)));
1062
1063        // With remaining input
1064        assert_eq!(parse_operator("<= 42"), Ok(("42", Operator::LessEqual)));
1065    }
1066
1067    #[test]
1068    fn test_parse_operator_greater_equal() {
1069        // Basic greater-or-equal
1070        assert_eq!(parse_operator(">="), Ok(("", Operator::GreaterEqual)));
1071
1072        // With whitespace
1073        assert_eq!(parse_operator(" >= "), Ok(("", Operator::GreaterEqual)));
1074        assert_eq!(parse_operator("  >=  "), Ok(("", Operator::GreaterEqual)));
1075        assert_eq!(parse_operator("\t>=\t"), Ok(("", Operator::GreaterEqual)));
1076
1077        // With remaining input
1078        assert_eq!(parse_operator(">= 42"), Ok(("42", Operator::GreaterEqual)));
1079    }
1080
1081    #[test]
1082    fn test_parse_operator_comparison_disambiguation() {
1083        // <> still parses as NotEqual
1084        assert_eq!(parse_operator("<>"), Ok(("", Operator::NotEqual)));
1085
1086        // <= parses as LessEqual, not LessThan with "=" remaining
1087        assert_eq!(parse_operator("<="), Ok(("", Operator::LessEqual)));
1088
1089        // >= parses as GreaterEqual, not GreaterThan with "=" remaining
1090        assert_eq!(parse_operator(">="), Ok(("", Operator::GreaterEqual)));
1091
1092        // "< >" (with space) parses as LessThan with "> " remaining
1093        assert_eq!(parse_operator("< >"), Ok((">", Operator::LessThan)));
1094
1095        // "> =" (with space) parses as GreaterThan with "= " remaining
1096        assert_eq!(parse_operator("> ="), Ok(("=", Operator::GreaterThan)));
1097    }
1098
1099    // Value parsing tests
1100    #[test]
1101    fn test_parse_hex_bytes_with_backslash_x() {
1102        // Single hex byte with \x prefix
1103        assert_eq!(parse_hex_bytes("\\x7f"), Ok(("", vec![0x7f])));
1104        assert_eq!(parse_hex_bytes("\\x45"), Ok(("", vec![0x45])));
1105        assert_eq!(parse_hex_bytes("\\x00"), Ok(("", vec![0x00])));
1106        assert_eq!(parse_hex_bytes("\\xFF"), Ok(("", vec![0xFF])));
1107
1108        // Multiple hex bytes with \x prefix
1109        assert_eq!(
1110            parse_hex_bytes("\\x7f\\x45\\x4c\\x46"),
1111            Ok(("", vec![0x7f, 0x45, 0x4c, 0x46]))
1112        );
1113        assert_eq!(
1114            parse_hex_bytes("\\x50\\x4b\\x03\\x04"),
1115            Ok(("", vec![0x50, 0x4b, 0x03, 0x04]))
1116        );
1117    }
1118
1119    #[test]
1120    fn test_parse_hex_bytes_without_prefix() {
1121        // Single hex byte without prefix (only works if it contains hex letters)
1122        assert_eq!(parse_hex_bytes("7f"), Ok(("", vec![0x7f])));
1123        assert_eq!(
1124            parse_hex_bytes("45"),
1125            Err(nom::Err::Error(NomError::new(
1126                "45",
1127                nom::error::ErrorKind::Tag
1128            )))
1129        ); // No hex letters
1130        assert_eq!(parse_hex_bytes("ab"), Ok(("", vec![0xab])));
1131        assert_eq!(parse_hex_bytes("FF"), Ok(("", vec![0xFF])));
1132
1133        // Multiple hex bytes without prefix
1134        assert_eq!(
1135            parse_hex_bytes("7f454c46"),
1136            Ok(("", vec![0x7f, 0x45, 0x4c, 0x46]))
1137        );
1138        assert_eq!(
1139            parse_hex_bytes("504b0304"),
1140            Ok(("", vec![0x50, 0x4b, 0x03, 0x04]))
1141        );
1142    }
1143
1144    #[test]
1145    fn test_parse_hex_bytes_mixed_case() {
1146        // Test mixed case hex digits
1147        assert_eq!(parse_hex_bytes("aB"), Ok(("", vec![0xab])));
1148        assert_eq!(parse_hex_bytes("Cd"), Ok(("", vec![0xcd])));
1149        assert_eq!(parse_hex_bytes("\\xEf"), Ok(("", vec![0xef])));
1150        assert_eq!(parse_hex_bytes("\\x1A"), Ok(("", vec![0x1a])));
1151    }
1152
1153    #[test]
1154    fn test_parse_hex_bytes_empty() {
1155        // Empty input should return error (no hex bytes to parse)
1156        assert_eq!(
1157            parse_hex_bytes(""),
1158            Err(nom::Err::Error(NomError::new(
1159                "",
1160                nom::error::ErrorKind::Tag
1161            )))
1162        );
1163    }
1164
1165    #[test]
1166    fn test_parse_hex_bytes_with_remaining_input() {
1167        // Should parse hex bytes and leave remaining input
1168        assert_eq!(
1169            parse_hex_bytes("7f45 rest"),
1170            Ok((" rest", vec![0x7f, 0x45]))
1171        );
1172        assert_eq!(
1173            parse_hex_bytes("\\x50\\x4b next"),
1174            Ok((" next", vec![0x50, 0x4b]))
1175        );
1176        assert_eq!(parse_hex_bytes("ab\""), Ok(("\"", vec![0xab])));
1177    }
1178
1179    #[test]
1180    fn test_parse_escape_sequence() {
1181        // Standard escape sequences
1182        assert_eq!(parse_escape_sequence("\\n"), Ok(("", '\n')));
1183        assert_eq!(parse_escape_sequence("\\r"), Ok(("", '\r')));
1184        assert_eq!(parse_escape_sequence("\\t"), Ok(("", '\t')));
1185        assert_eq!(parse_escape_sequence("\\\\"), Ok(("", '\\')));
1186        assert_eq!(parse_escape_sequence("\\\""), Ok(("", '"')));
1187        assert_eq!(parse_escape_sequence("\\'"), Ok(("", '\'')));
1188        assert_eq!(parse_escape_sequence("\\0"), Ok(("", '\0')));
1189    }
1190
1191    #[test]
1192    fn test_parse_escape_sequence_with_remaining() {
1193        // Should parse escape and leave remaining input
1194        assert_eq!(parse_escape_sequence("\\n rest"), Ok((" rest", '\n')));
1195        assert_eq!(parse_escape_sequence("\\t\""), Ok(("\"", '\t')));
1196    }
1197
1198    #[test]
1199    fn test_parse_escape_sequence_invalid() {
1200        // Should fail on invalid escape sequences
1201        assert!(parse_escape_sequence("n").is_err()); // Missing backslash
1202        assert!(parse_escape_sequence("\\").is_err()); // Incomplete escape
1203        assert!(parse_escape_sequence("").is_err()); // Empty input
1204    }
1205
1206    #[test]
1207    fn test_parse_quoted_string_simple() {
1208        // Simple quoted strings
1209        assert_eq!(
1210            parse_quoted_string("\"hello\""),
1211            Ok(("", "hello".to_string()))
1212        );
1213        assert_eq!(
1214            parse_quoted_string("\"world\""),
1215            Ok(("", "world".to_string()))
1216        );
1217        assert_eq!(parse_quoted_string("\"\""), Ok(("", String::new())));
1218    }
1219
1220    #[test]
1221    fn test_parse_quoted_string_with_escapes() {
1222        // Strings with escape sequences
1223        assert_eq!(
1224            parse_quoted_string("\"Hello\\nWorld\""),
1225            Ok(("", "Hello\nWorld".to_string()))
1226        );
1227        assert_eq!(
1228            parse_quoted_string("\"Tab\\tSeparated\""),
1229            Ok(("", "Tab\tSeparated".to_string()))
1230        );
1231        assert_eq!(
1232            parse_quoted_string("\"Quote: \\\"text\\\"\""),
1233            Ok(("", "Quote: \"text\"".to_string()))
1234        );
1235        assert_eq!(
1236            parse_quoted_string("\"Backslash: \\\\\""),
1237            Ok(("", "Backslash: \\".to_string()))
1238        );
1239        assert_eq!(
1240            parse_quoted_string("\"Null\\0terminated\""),
1241            Ok(("", "Null\0terminated".to_string()))
1242        );
1243    }
1244
1245    #[test]
1246    fn test_parse_quoted_string_with_whitespace() {
1247        // Strings with leading/trailing whitespace
1248        assert_eq!(
1249            parse_quoted_string(" \"hello\" "),
1250            Ok(("", "hello".to_string()))
1251        );
1252        assert_eq!(
1253            parse_quoted_string("\t\"world\"\t"),
1254            Ok(("", "world".to_string()))
1255        );
1256        assert_eq!(
1257            parse_quoted_string("  \"test\"  "),
1258            Ok(("", "test".to_string()))
1259        );
1260    }
1261
1262    #[test]
1263    fn test_parse_quoted_string_with_remaining_input() {
1264        // Should parse string and leave remaining input
1265        assert_eq!(
1266            parse_quoted_string("\"hello\" world"),
1267            Ok(("world", "hello".to_string()))
1268        );
1269        assert_eq!(
1270            parse_quoted_string("\"test\" = 123"),
1271            Ok(("= 123", "test".to_string()))
1272        );
1273    }
1274
1275    #[test]
1276    fn test_parse_quoted_string_invalid() {
1277        // Should fail on invalid quoted strings
1278        assert!(parse_quoted_string("hello").is_err()); // No quotes
1279        assert!(parse_quoted_string("\"hello").is_err()); // Missing closing quote
1280        assert!(parse_quoted_string("hello\"").is_err()); // Missing opening quote
1281        assert!(parse_quoted_string("").is_err()); // Empty input
1282    }
1283
1284    #[test]
1285    fn test_parse_numeric_value_positive() {
1286        // Positive integers
1287        assert_eq!(parse_numeric_value("0"), Ok(("", Value::Uint(0))));
1288        assert_eq!(parse_numeric_value("123"), Ok(("", Value::Uint(123))));
1289        assert_eq!(parse_numeric_value("999"), Ok(("", Value::Uint(999))));
1290
1291        // Positive hex values
1292        assert_eq!(parse_numeric_value("0x0"), Ok(("", Value::Uint(0))));
1293        assert_eq!(parse_numeric_value("0x10"), Ok(("", Value::Uint(16))));
1294        assert_eq!(parse_numeric_value("0xFF"), Ok(("", Value::Uint(255))));
1295        assert_eq!(parse_numeric_value("0xabc"), Ok(("", Value::Uint(2748))));
1296    }
1297
1298    #[test]
1299    fn test_parse_numeric_value_negative() {
1300        // Negative integers
1301        assert_eq!(parse_numeric_value("-1"), Ok(("", Value::Int(-1))));
1302        assert_eq!(parse_numeric_value("-123"), Ok(("", Value::Int(-123))));
1303        assert_eq!(parse_numeric_value("-999"), Ok(("", Value::Int(-999))));
1304
1305        // Negative hex values
1306        assert_eq!(parse_numeric_value("-0x1"), Ok(("", Value::Int(-1))));
1307        assert_eq!(parse_numeric_value("-0x10"), Ok(("", Value::Int(-16))));
1308        assert_eq!(parse_numeric_value("-0xFF"), Ok(("", Value::Int(-255))));
1309        assert_eq!(parse_numeric_value("-0xabc"), Ok(("", Value::Int(-2748))));
1310    }
1311
1312    #[test]
1313    fn test_parse_numeric_value_with_whitespace() {
1314        // With leading/trailing whitespace
1315        assert_eq!(parse_numeric_value(" 123 "), Ok(("", Value::Uint(123))));
1316        assert_eq!(parse_numeric_value("\t-456\t"), Ok(("", Value::Int(-456))));
1317        assert_eq!(parse_numeric_value("  0xFF  "), Ok(("", Value::Uint(255))));
1318    }
1319
1320    #[test]
1321    fn test_parse_numeric_value_with_remaining_input() {
1322        // Should parse number and leave remaining input (numeric parser consumes trailing whitespace)
1323        assert_eq!(
1324            parse_numeric_value("123 rest"),
1325            Ok(("rest", Value::Uint(123)))
1326        );
1327        assert_eq!(
1328            parse_numeric_value("-456 more"),
1329            Ok(("more", Value::Int(-456)))
1330        );
1331        assert_eq!(parse_numeric_value("0xFF)"), Ok((")", Value::Uint(255))));
1332    }
1333
1334    #[test]
1335    fn test_parse_numeric_value_large_unsigned_quad() {
1336        // Full u64 range -- values above i64::MAX required for uquad
1337        let test_cases = [
1338            // u64::MAX in hex
1339            ("0xffffffffffffffff", Value::Uint(u64::MAX)),
1340            // u64::MAX in decimal
1341            ("18446744073709551615", Value::Uint(u64::MAX)),
1342            // Exactly i64::MAX + 1 (first value that overflows i64)
1343            ("0x8000000000000000", Value::Uint(0x8000_0000_0000_0000)),
1344            // i64::MAX + 1 in decimal
1345            (
1346                "9223372036854775808",
1347                Value::Uint(9_223_372_036_854_775_808),
1348            ),
1349            // i64::MAX still works as Uint
1350            ("0x7fffffffffffffff", Value::Uint(i64::MAX as u64)),
1351            ("9223372036854775807", Value::Uint(i64::MAX as u64)),
1352            // Common magic constant patterns
1353            ("0xDEADBEEFDEADBEEF", Value::Uint(0xDEAD_BEEF_DEAD_BEEF)),
1354            ("0xCAFEBABECAFEBABE", Value::Uint(0xCAFE_BABE_CAFE_BABE)),
1355        ];
1356
1357        for (input, expected) in test_cases {
1358            assert_eq!(
1359                parse_numeric_value(input),
1360                Ok(("", expected)),
1361                "Failed to parse large unsigned quad literal: '{input}'"
1362            );
1363        }
1364    }
1365
1366    #[test]
1367    fn test_parse_value_string_literals() {
1368        // String value parsing
1369        assert_eq!(
1370            parse_value("\"hello\""),
1371            Ok(("", Value::String("hello".to_string())))
1372        );
1373        assert_eq!(
1374            parse_value("\"ELF\""),
1375            Ok(("", Value::String("ELF".to_string())))
1376        );
1377        assert_eq!(parse_value("\"\""), Ok(("", Value::String(String::new()))));
1378
1379        // String with escape sequences
1380        assert_eq!(
1381            parse_value("\"Line1\\nLine2\""),
1382            Ok(("", Value::String("Line1\nLine2".to_string())))
1383        );
1384        assert_eq!(
1385            parse_value("\"Tab\\tSeparated\""),
1386            Ok(("", Value::String("Tab\tSeparated".to_string())))
1387        );
1388        assert_eq!(
1389            parse_value("\"Null\\0Term\""),
1390            Ok(("", Value::String("Null\0Term".to_string())))
1391        );
1392    }
1393
1394    #[test]
1395    fn test_parse_value_numeric_literals() {
1396        // Positive integers
1397        assert_eq!(parse_value("0"), Ok(("", Value::Uint(0))));
1398        assert_eq!(parse_value("123"), Ok(("", Value::Uint(123))));
1399        assert_eq!(parse_value("999"), Ok(("", Value::Uint(999))));
1400
1401        // Negative integers
1402        assert_eq!(parse_value("-1"), Ok(("", Value::Int(-1))));
1403        assert_eq!(parse_value("-123"), Ok(("", Value::Int(-123))));
1404        assert_eq!(parse_value("-999"), Ok(("", Value::Int(-999))));
1405
1406        // Hexadecimal values
1407        assert_eq!(parse_value("0x0"), Ok(("", Value::Uint(0))));
1408        assert_eq!(parse_value("0x10"), Ok(("", Value::Uint(16))));
1409        assert_eq!(parse_value("0xFF"), Ok(("", Value::Uint(255))));
1410        assert_eq!(parse_value("-0xFF"), Ok(("", Value::Int(-255))));
1411    }
1412
1413    #[test]
1414    fn test_parse_value_hex_byte_sequences() {
1415        // Hex bytes with \x prefix
1416        assert_eq!(parse_value("\\x7f"), Ok(("", Value::Bytes(vec![0x7f]))));
1417        assert_eq!(
1418            parse_value("\\x7f\\x45\\x4c\\x46"),
1419            Ok(("", Value::Bytes(vec![0x7f, 0x45, 0x4c, 0x46])))
1420        );
1421
1422        // Hex bytes without prefix
1423        assert_eq!(parse_value("7f"), Ok(("", Value::Bytes(vec![0x7f]))));
1424        assert_eq!(
1425            parse_value("7f454c46"),
1426            Ok(("", Value::Bytes(vec![0x7f, 0x45, 0x4c, 0x46])))
1427        );
1428
1429        // Mixed case
1430        assert_eq!(parse_value("aB"), Ok(("", Value::Bytes(vec![0xab]))));
1431        assert_eq!(parse_value("\\xCd"), Ok(("", Value::Bytes(vec![0xcd]))));
1432    }
1433
1434    #[test]
1435    fn test_parse_value_with_whitespace() {
1436        // All value types with whitespace - trailing whitespace is consumed by individual parsers
1437        assert_eq!(
1438            parse_value(" \"hello\" "),
1439            Ok(("", Value::String("hello".to_string())))
1440        );
1441        assert_eq!(parse_value("  123  "), Ok(("", Value::Uint(123))));
1442        assert_eq!(parse_value("\t-456\t"), Ok(("", Value::Int(-456))));
1443        // Hex bytes don't consume trailing whitespace by themselves
1444        assert_eq!(
1445            parse_value("  \\x7f\\x45  "),
1446            Ok(("  ", Value::Bytes(vec![0x7f, 0x45])))
1447        );
1448    }
1449
1450    #[test]
1451    fn test_parse_value_with_remaining_input() {
1452        // Should parse value and leave remaining input
1453        // Note: Individual parsers handle whitespace differently
1454        assert_eq!(
1455            parse_value("\"hello\" world"),
1456            Ok(("world", Value::String("hello".to_string())))
1457        );
1458        assert_eq!(
1459            parse_value("123 rest"),
1460            Ok(("rest", Value::Uint(123))) // Numeric parser consumes trailing space
1461        );
1462        assert_eq!(
1463            parse_value("-456 more"),
1464            Ok(("more", Value::Int(-456))) // Numeric parser consumes trailing space
1465        );
1466        assert_eq!(
1467            parse_value("\\x7f\\x45 next"),
1468            Ok((" next", Value::Bytes(vec![0x7f, 0x45]))) // Hex bytes don't consume trailing space
1469        );
1470    }
1471
1472    #[test]
1473    fn test_parse_value_edge_cases() {
1474        // Zero values in different formats
1475        assert_eq!(parse_value("0"), Ok(("", Value::Uint(0))));
1476        assert_eq!(parse_value("-0"), Ok(("", Value::Int(0))));
1477        assert_eq!(parse_value("0x0"), Ok(("", Value::Uint(0))));
1478        assert_eq!(parse_value("-0x0"), Ok(("", Value::Int(0))));
1479
1480        // Large values
1481        assert_eq!(
1482            parse_value("2147483647"),
1483            Ok(("", Value::Uint(2_147_483_647)))
1484        );
1485        assert_eq!(
1486            parse_value("-2147483648"),
1487            Ok(("", Value::Int(-2_147_483_648)))
1488        );
1489        assert_eq!(
1490            parse_value("0x7FFFFFFF"),
1491            Ok(("", Value::Uint(2_147_483_647)))
1492        );
1493
1494        // Empty input should fail
1495        assert!(parse_value("").is_err());
1496    }
1497
1498    #[test]
1499    fn test_parse_value_invalid_input() {
1500        // Should fail on completely invalid input
1501        assert!(parse_value("xyz").is_err()); // Not a valid value format
1502        assert!(parse_value("0xGG").is_err()); // Invalid hex digits
1503        assert!(parse_value("\"unclosed").is_err()); // Unclosed string
1504        assert!(parse_value("--123").is_err()); // Invalid number format
1505    }
1506
1507    #[test]
1508    fn test_parse_value_common_magic_file_patterns() {
1509        // Test patterns commonly found in magic files
1510        assert_eq!(
1511            parse_value("0x7f454c46"),
1512            Ok(("", Value::Uint(0x7f45_4c46)))
1513        );
1514        assert_eq!(
1515            parse_value("\"ELF\""),
1516            Ok(("", Value::String("ELF".to_string())))
1517        );
1518        assert_eq!(
1519            parse_value("\\x50\\x4b\\x03\\x04"),
1520            Ok(("", Value::Bytes(vec![0x50, 0x4b, 0x03, 0x04])))
1521        );
1522        assert_eq!(
1523            parse_value("\"\\377ELF\""),
1524            Ok(("", Value::String("\u{00ff}ELF".to_string())))
1525        );
1526        assert_eq!(parse_value("0"), Ok(("", Value::Uint(0))));
1527        assert_eq!(parse_value("-1"), Ok(("", Value::Int(-1))));
1528    }
1529
1530    #[test]
1531    fn test_parse_value_type_precedence() {
1532        // Test that parsing precedence works correctly
1533        // Quoted strings should be parsed as strings, not hex bytes
1534        assert_eq!(
1535            parse_value("\"7f\""),
1536            Ok(("", Value::String("7f".to_string())))
1537        );
1538
1539        // Hex patterns should be parsed as bytes when not quoted
1540        assert_eq!(parse_value("7f"), Ok(("", Value::Bytes(vec![0x7f]))));
1541
1542        // Numbers should be parsed as numbers when they don't look like hex bytes
1543        assert_eq!(parse_value("123"), Ok(("", Value::Uint(123))));
1544        assert_eq!(parse_value("-123"), Ok(("", Value::Int(-123))));
1545
1546        // Hex numbers with 0x prefix should be parsed as numbers
1547        assert_eq!(parse_value("0x123"), Ok(("", Value::Uint(0x123))));
1548    }
1549
1550    #[test]
1551    fn test_parse_value_boundary_conditions() {
1552        // Test boundary conditions for different value types
1553
1554        // Single character strings
1555        assert_eq!(
1556            parse_value("\"a\""),
1557            Ok(("", Value::String("a".to_string())))
1558        );
1559        assert_eq!(
1560            parse_value("\"1\""),
1561            Ok(("", Value::String("1".to_string())))
1562        );
1563
1564        // Single hex byte
1565        assert_eq!(parse_value("ab"), Ok(("", Value::Bytes(vec![0xab]))));
1566        assert_eq!(parse_value("\\x00"), Ok(("", Value::Bytes(vec![0x00]))));
1567
1568        // Minimum and maximum values
1569        assert_eq!(parse_value("1"), Ok(("", Value::Uint(1))));
1570        assert_eq!(parse_value("-1"), Ok(("", Value::Int(-1))));
1571
1572        // Powers of 2 (common in binary formats)
1573        assert_eq!(parse_value("256"), Ok(("", Value::Uint(256))));
1574        assert_eq!(parse_value("0x100"), Ok(("", Value::Uint(256))));
1575        assert_eq!(parse_value("1024"), Ok(("", Value::Uint(1024))));
1576        assert_eq!(parse_value("0x400"), Ok(("", Value::Uint(1024))));
1577    }
1578
1579    #[test]
1580    fn test_parse_operator_whitespace_handling() {
1581        // Test comprehensive whitespace handling
1582        let operators = ["=", "==", "!=", "<>", "&"];
1583        let whitespace_patterns = [
1584            "",     // No whitespace
1585            " ",    // Single space
1586            "  ",   // Multiple spaces
1587            "\t",   // Tab
1588            "\t\t", // Multiple tabs
1589            " \t",  // Mixed space and tab
1590            "\t ",  // Mixed tab and space
1591        ];
1592
1593        for op in operators {
1594            for leading_ws in whitespace_patterns {
1595                for trailing_ws in whitespace_patterns {
1596                    let input = format!("{leading_ws}{op}{trailing_ws}");
1597                    let result = parse_operator(&input);
1598
1599                    assert!(
1600                        result.is_ok(),
1601                        "Failed to parse operator with whitespace: '{input}'"
1602                    );
1603
1604                    let (remaining, _) = result.unwrap();
1605                    assert_eq!(remaining, "", "Unexpected remaining input for: '{input}'");
1606                }
1607            }
1608        }
1609    }
1610}
1611/// Parse a type specification with an optional attached bitwise-AND mask operator
1612/// (e.g., `lelong&0xf0000000`).
1613///
1614/// Returns the `TypeKind` and an optional `Operator`.
1615///
1616/// # Errors
1617/// Returns a nom parsing error if the input doesn't match the expected format
1618pub fn parse_type_and_operator(input: &str) -> IResult<&str, (TypeKind, Option<Operator>)> {
1619    let (input, _) = multispace0(input)?;
1620
1621    let (input, type_name) = crate::parser::types::parse_type_keyword(input)?;
1622
1623    // Check for attached operator with mask (like &0xf0000000)
1624    // Uses unsigned parsing so full u64 masks (e.g. 0xffffffffffffffff) are supported.
1625    // If '&' is followed by digits/0x but the mask parse fails (overflow, etc.),
1626    // we return a hard error instead of silently falling back to standalone '&'.
1627    let (input, attached_op) = if let Some(after_amp) = input.strip_prefix('&') {
1628        if after_amp.starts_with("0x") || after_amp.starts_with(|c: char| c.is_ascii_digit()) {
1629            // '&' followed by what looks like a number -- must parse as mask
1630            let (rest, mask) = parse_unsigned_number(after_amp).map_err(|_| {
1631                nom::Err::Error(nom::error::Error::new(input, nom::error::ErrorKind::MapRes))
1632            })?;
1633            (rest, Some(Operator::BitwiseAndMask(mask)))
1634        } else if after_amp.starts_with('&') {
1635            // Reject '&&' -- not valid operator syntax
1636            return Err(nom::Err::Error(nom::error::Error::new(
1637                input,
1638                nom::error::ErrorKind::Tag,
1639            )));
1640        } else {
1641            // Standalone '&' (no digits following)
1642            (after_amp, Some(Operator::BitwiseAnd))
1643        }
1644    } else {
1645        (input, None)
1646    };
1647
1648    let (input, _) = multispace0(input)?;
1649
1650    let type_kind = crate::parser::types::type_keyword_to_kind(type_name);
1651
1652    Ok((input, (type_kind, attached_op)))
1653}
1654
1655/// Parse a type specification (byte, short, long, quad, string, etc.)
1656///
1657/// Supports various type formats found in magic files:
1658/// - `byte` / `ubyte` - single byte (signed / unsigned)
1659/// - `short` / `ushort` - 16-bit integer (native endian, signed / unsigned)
1660/// - `leshort` / `uleshort` - 16-bit little-endian integer
1661/// - `beshort` / `ubeshort` - 16-bit big-endian integer
1662/// - `long` / `ulong` - 32-bit integer (native endian, signed / unsigned)
1663/// - `lelong` / `ulelong` - 32-bit little-endian integer
1664/// - `belong` / `ubelong` - 32-bit big-endian integer
1665/// - `quad` / `uquad` - 64-bit integer (native endian, signed / unsigned)
1666/// - `lequad` / `ulequad` - 64-bit little-endian integer
1667/// - `bequad` / `ubequad` - 64-bit big-endian integer
1668/// - `string` - null-terminated string
1669///
1670/// # Examples
1671///
1672/// ```
1673/// use libmagic_rs::parser::grammar::parse_type;
1674/// use libmagic_rs::parser::ast::{TypeKind, Endianness};
1675///
1676/// assert_eq!(parse_type("byte"), Ok(("", TypeKind::Byte { signed: true })));
1677/// assert_eq!(parse_type("leshort"), Ok(("", TypeKind::Short { endian: Endianness::Little, signed: true })));
1678/// assert_eq!(parse_type("bequad"), Ok(("", TypeKind::Quad { endian: Endianness::Big, signed: true })));
1679/// assert_eq!(parse_type("string"), Ok(("", TypeKind::String { max_length: None })));
1680/// ```
1681///
1682/// # Errors
1683/// Returns a nom parsing error if the input doesn't match any known type
1684pub fn parse_type(input: &str) -> IResult<&str, TypeKind> {
1685    let (input, (type_kind, _)) = parse_type_and_operator(input)?;
1686    Ok((input, type_kind))
1687}
1688
1689/// Parse the indentation level and offset for magic rules
1690///
1691/// Handles both absolute offsets and hierarchical child rules with `>` prefix.
1692/// Child rules can be nested multiple levels deep with multiple `>` characters.
1693///
1694/// # Examples
1695///
1696/// ```
1697/// use libmagic_rs::parser::grammar::parse_rule_offset;
1698/// use libmagic_rs::parser::ast::OffsetSpec;
1699///
1700/// // Absolute offset
1701/// assert_eq!(parse_rule_offset("0"), Ok(("", (0, OffsetSpec::Absolute(0)))));
1702/// assert_eq!(parse_rule_offset("16"), Ok(("", (0, OffsetSpec::Absolute(16)))));
1703///
1704/// // Child rule (level 1)
1705/// assert_eq!(parse_rule_offset(">4"), Ok(("", (1, OffsetSpec::Absolute(4)))));
1706///
1707/// // Nested child rule (level 2)
1708/// assert_eq!(parse_rule_offset(">>8"), Ok(("", (2, OffsetSpec::Absolute(8)))));
1709/// ```
1710/// Parse rule offset with hierarchy level (> prefixes) and offset specification
1711///
1712/// # Errors
1713/// Returns a nom parsing error if the input doesn't match the expected offset format
1714pub fn parse_rule_offset(input: &str) -> IResult<&str, (u32, OffsetSpec)> {
1715    let (input, _) = multispace0(input)?;
1716
1717    // Count the number of '>' characters for nesting level
1718    let (input, level_chars) = many0(char('>')).parse(input)?;
1719    let level = u32::try_from(level_chars.len()).unwrap_or(0);
1720
1721    // Parse the offset after the '>' characters
1722    let (input, offset_spec) = parse_offset(input)?;
1723
1724    Ok((input, (level, offset_spec)))
1725}
1726
1727/// Parse the message part of a magic rule
1728///
1729/// The message is everything after the value until the end of the line.
1730/// It may contain format specifiers and can be empty.
1731///
1732/// # Examples
1733///
1734/// ```
1735/// use libmagic_rs::parser::grammar::parse_message;
1736///
1737/// assert_eq!(parse_message("ELF executable"), Ok(("", "ELF executable".to_string())));
1738/// assert_eq!(parse_message(""), Ok(("", "".to_string())));
1739/// assert_eq!(parse_message("  \tPDF document  "), Ok(("", "PDF document".to_string())));
1740/// ```
1741/// Parse the message/description part of a magic rule
1742///
1743/// # Errors
1744/// Returns a nom parsing error if the input cannot be parsed as a message
1745pub fn parse_message(input: &str) -> IResult<&str, String> {
1746    let (input, _) = multispace0(input)?;
1747
1748    // Take everything until end of line, trimming whitespace
1749    // Use take_while instead of take_while1 to handle empty messages
1750    let (input, message_text) = take_while(|c: char| c != '\n' && c != '\r').parse(input)?;
1751    let message = message_text.trim().to_string();
1752
1753    Ok((input, message))
1754}
1755
1756/// Parse a strength directive (`!:strength` line)
1757///
1758/// Parses the `!:strength` directive that modifies rule strength.
1759/// Format: `!:strength [+|-|*|/|=]N` or `!:strength N`
1760///
1761/// # Examples
1762///
1763/// ```
1764/// use libmagic_rs::parser::grammar::parse_strength_directive;
1765/// use libmagic_rs::parser::ast::StrengthModifier;
1766///
1767/// assert_eq!(parse_strength_directive("!:strength +10"), Ok(("", StrengthModifier::Add(10))));
1768/// assert_eq!(parse_strength_directive("!:strength -5"), Ok(("", StrengthModifier::Subtract(5))));
1769/// assert_eq!(parse_strength_directive("!:strength *2"), Ok(("", StrengthModifier::Multiply(2))));
1770/// assert_eq!(parse_strength_directive("!:strength /2"), Ok(("", StrengthModifier::Divide(2))));
1771/// assert_eq!(parse_strength_directive("!:strength =50"), Ok(("", StrengthModifier::Set(50))));
1772/// assert_eq!(parse_strength_directive("!:strength 50"), Ok(("", StrengthModifier::Set(50))));
1773/// ```
1774///
1775/// # Errors
1776///
1777/// Returns a nom parsing error if:
1778/// - Input doesn't start with `!:strength`
1779/// - The modifier value cannot be parsed as a valid integer
1780/// - The operator is invalid
1781pub fn parse_strength_directive(input: &str) -> IResult<&str, StrengthModifier> {
1782    // Helper to safely convert i64 to i32 with clamping to valid strength range.
1783    // This prevents silent truncation to 0 on overflow while keeping values in bounds.
1784    fn clamp_to_i32(n: i64) -> i32 {
1785        // Use i64::from for lossless conversion, then clamp and convert back
1786        let clamped = n.clamp(i64::from(i32::MIN), i64::from(i32::MAX));
1787        // Safe to unwrap: clamped value is guaranteed to be in i32 range
1788        i32::try_from(clamped).unwrap()
1789    }
1790
1791    let (input, _) = multispace0(input)?;
1792    let (input, _) = tag("!:strength")(input)?;
1793    let (input, _) = multispace0(input)?;
1794
1795    // Parse the operator: +, -, *, /, = or bare number (implies =)
1796    let (input, modifier) = alt((
1797        // +N -> Add
1798        map(pair(char('+'), parse_number), |(_, n)| {
1799            StrengthModifier::Add(clamp_to_i32(n))
1800        }),
1801        // -N -> Subtract (note: parse_number handles negative, so we need special handling)
1802        map(pair(char('-'), parse_decimal_number), |(_, n)| {
1803            StrengthModifier::Subtract(clamp_to_i32(n))
1804        }),
1805        // *N -> Multiply
1806        map(pair(char('*'), parse_number), |(_, n)| {
1807            StrengthModifier::Multiply(clamp_to_i32(n))
1808        }),
1809        // /N -> Divide
1810        map(pair(char('/'), parse_number), |(_, n)| {
1811            StrengthModifier::Divide(clamp_to_i32(n))
1812        }),
1813        // =N -> Set
1814        map(pair(char('='), parse_number), |(_, n)| {
1815            StrengthModifier::Set(clamp_to_i32(n))
1816        }),
1817        // Bare number -> Set
1818        map(parse_number, |n| StrengthModifier::Set(clamp_to_i32(n))),
1819    ))
1820    .parse(input)?;
1821
1822    Ok((input, modifier))
1823}
1824
1825/// Check if a line is a strength directive (starts with !:strength)
1826///
1827/// # Examples
1828///
1829/// ```
1830/// use libmagic_rs::parser::grammar::is_strength_directive;
1831///
1832/// assert!(is_strength_directive("!:strength +10"));
1833/// assert!(is_strength_directive("  !:strength -5"));
1834/// assert!(!is_strength_directive("0 byte 1"));
1835/// ```
1836#[must_use]
1837pub fn is_strength_directive(input: &str) -> bool {
1838    input.trim().starts_with("!:strength")
1839}
1840
1841/// Parse a complete magic rule line from text format
1842///
1843/// Parses a complete magic rule in the format:
1844/// `[>...]offset type [operator] value [message]`
1845///
1846/// Where:
1847/// - `>...` indicates child rule nesting level (optional)
1848/// - `offset` is the byte offset to read from
1849/// - `type` is the data type (byte, short, long, string, etc.)
1850/// - `operator` is the comparison operator (=, !=, &) - defaults to = if omitted
1851/// - `value` is the expected value to compare against
1852/// - `message` is the human-readable description (optional)
1853///
1854/// # Examples
1855///
1856/// ```
1857/// use libmagic_rs::parser::grammar::parse_magic_rule;
1858/// use libmagic_rs::parser::ast::{MagicRule, OffsetSpec, TypeKind, Operator, Value};
1859///
1860/// // Basic rule
1861/// let input = "0 string \\x7fELF ELF executable";
1862/// let (_, rule) = parse_magic_rule(input).unwrap();
1863/// assert_eq!(rule.level, 0);
1864/// assert_eq!(rule.message, "ELF executable");
1865///
1866/// // Child rule
1867/// let input = ">4 byte 1 32-bit";
1868/// let (_, rule) = parse_magic_rule(input).unwrap();
1869/// assert_eq!(rule.level, 1);
1870/// assert_eq!(rule.message, "32-bit");
1871/// ```
1872///
1873/// # Errors
1874///
1875/// Returns a nom parsing error if:
1876/// - The offset specification is invalid
1877/// - The type specification is not recognized
1878/// - The operator is invalid (if present)
1879/// - The value cannot be parsed
1880/// - The input format doesn't match the expected magic rule syntax
1881pub fn parse_magic_rule(input: &str) -> IResult<&str, MagicRule> {
1882    let (input, _) = multispace0(input)?;
1883
1884    // Parse the offset with nesting level
1885    let (input, (level, offset)) = parse_rule_offset(input)?;
1886
1887    // Parse the type and any attached operator
1888    let (input, (typ, attached_op)) = parse_type_and_operator(input)?;
1889
1890    // Try to parse a separate operator (optional - use attached operator if present)
1891    let (input, separate_op) = opt(parse_operator).parse(input)?;
1892    let op = attached_op.or(separate_op).unwrap_or(Operator::Equal);
1893
1894    // Parse the value
1895    let (input, value) = parse_value(input)?;
1896
1897    // Parse the message (optional - everything remaining on the line)
1898    let (input, message) = if input.trim().is_empty() {
1899        (input, String::new())
1900    } else {
1901        parse_message(input)?
1902    };
1903
1904    let rule = MagicRule {
1905        offset,
1906        typ,
1907        op,
1908        value,
1909        message,
1910        children: vec![], // Children will be added during hierarchical parsing
1911        level,
1912        strength_modifier: None, // Will be set during directive parsing
1913    };
1914
1915    Ok((input, rule))
1916}
1917
1918/// Parse a comment line (starts with #)
1919///
1920/// Comments in magic files start with '#' and continue to the end of the line.
1921/// This function consumes the entire comment line.
1922///
1923/// # Examples
1924///
1925/// ```
1926/// use libmagic_rs::parser::grammar::parse_comment;
1927///
1928/// assert_eq!(parse_comment("# This is a comment"), Ok(("", "This is a comment".to_string())));
1929/// assert_eq!(parse_comment("#"), Ok(("", "".to_string())));
1930/// ```
1931/// Parse a comment line (starting with #)
1932///
1933/// # Errors
1934/// Returns a nom parsing error if the input is not a valid comment
1935pub fn parse_comment(input: &str) -> IResult<&str, String> {
1936    let (input, _) = multispace0(input)?;
1937    let (input, _) = char('#').parse(input)?;
1938    let (input, comment_text) = take_while(|c: char| c != '\n' && c != '\r').parse(input)?;
1939    let comment = comment_text.trim().to_string();
1940    Ok((input, comment))
1941}
1942
1943/// Check if a line is empty or contains only whitespace
1944///
1945/// # Examples
1946///
1947/// ```
1948/// use libmagic_rs::parser::grammar::is_empty_line;
1949///
1950/// assert!(is_empty_line(""));
1951/// assert!(is_empty_line("   "));
1952/// assert!(is_empty_line("\t\t"));
1953/// assert!(!is_empty_line("0 byte 1"));
1954/// ```
1955#[must_use]
1956pub fn is_empty_line(input: &str) -> bool {
1957    input.trim().is_empty()
1958}
1959
1960/// Check if a line is a comment (starts with #)
1961///
1962/// # Examples
1963///
1964/// ```
1965/// use libmagic_rs::parser::grammar::is_comment_line;
1966///
1967/// assert!(is_comment_line("# This is a comment"));
1968/// assert!(is_comment_line("#"));
1969/// assert!(is_comment_line("  # Indented comment"));
1970/// assert!(!is_comment_line("0 byte 1"));
1971/// ```
1972#[must_use]
1973pub fn is_comment_line(input: &str) -> bool {
1974    input.trim().starts_with('#')
1975}
1976
1977/// Check if a line ends with a continuation character (\)
1978///
1979/// Magic files support line continuation with backslash at the end of lines.
1980///
1981/// # Examples
1982///
1983/// ```
1984/// use libmagic_rs::parser::grammar::has_continuation;
1985///
1986/// assert!(has_continuation("0 string test \\"));
1987/// assert!(has_continuation("message continues \\"));
1988/// assert!(!has_continuation("0 string test"));
1989/// ```
1990#[must_use]
1991pub fn has_continuation(input: &str) -> bool {
1992    input.trim_end().ends_with('\\')
1993}
1994// Tests for new magic rule parsing functions
1995
1996#[test]
1997fn test_parse_type_basic() {
1998    assert_eq!(
1999        parse_type("byte"),
2000        Ok(("", TypeKind::Byte { signed: true }))
2001    );
2002    assert_eq!(
2003        parse_type("short"),
2004        Ok((
2005            "",
2006            TypeKind::Short {
2007                endian: Endianness::Native,
2008                signed: true
2009            }
2010        ))
2011    );
2012    assert_eq!(
2013        parse_type("long"),
2014        Ok((
2015            "",
2016            TypeKind::Long {
2017                endian: Endianness::Native,
2018                signed: true
2019            }
2020        ))
2021    );
2022    assert_eq!(
2023        parse_type("string"),
2024        Ok(("", TypeKind::String { max_length: None }))
2025    );
2026}
2027
2028#[test]
2029fn test_parse_type_endianness() {
2030    assert_eq!(
2031        parse_type("leshort"),
2032        Ok((
2033            "",
2034            TypeKind::Short {
2035                endian: Endianness::Little,
2036                signed: true
2037            }
2038        ))
2039    );
2040    assert_eq!(
2041        parse_type("beshort"),
2042        Ok((
2043            "",
2044            TypeKind::Short {
2045                endian: Endianness::Big,
2046                signed: true
2047            }
2048        ))
2049    );
2050    assert_eq!(
2051        parse_type("lelong"),
2052        Ok((
2053            "",
2054            TypeKind::Long {
2055                endian: Endianness::Little,
2056                signed: true
2057            }
2058        ))
2059    );
2060    assert_eq!(
2061        parse_type("belong"),
2062        Ok((
2063            "",
2064            TypeKind::Long {
2065                endian: Endianness::Big,
2066                signed: true
2067            }
2068        ))
2069    );
2070}
2071
2072#[test]
2073fn test_parse_type_with_whitespace() {
2074    assert_eq!(
2075        parse_type(" byte "),
2076        Ok(("", TypeKind::Byte { signed: true }))
2077    );
2078    assert_eq!(
2079        parse_type("\tstring\t"),
2080        Ok(("", TypeKind::String { max_length: None }))
2081    );
2082    assert_eq!(
2083        parse_type("  lelong  "),
2084        Ok((
2085            "",
2086            TypeKind::Long {
2087                endian: Endianness::Little,
2088                signed: true
2089            }
2090        ))
2091    );
2092}
2093
2094#[test]
2095fn test_parse_type_with_remaining_input() {
2096    assert_eq!(
2097        parse_type("byte ="),
2098        Ok(("=", TypeKind::Byte { signed: true }))
2099    );
2100    assert_eq!(
2101        parse_type("string \\x7f"),
2102        Ok(("\\x7f", TypeKind::String { max_length: None }))
2103    );
2104}
2105
2106#[test]
2107fn test_parse_type_invalid() {
2108    assert!(parse_type("").is_err());
2109    assert!(parse_type("invalid").is_err());
2110    assert!(parse_type("int").is_err());
2111    assert!(parse_type("float").is_err());
2112}
2113
2114#[test]
2115fn test_parse_type_unsigned_variants() {
2116    assert_eq!(
2117        parse_type("ubyte"),
2118        Ok(("", TypeKind::Byte { signed: false }))
2119    );
2120    assert_eq!(
2121        parse_type("ushort"),
2122        Ok((
2123            "",
2124            TypeKind::Short {
2125                endian: Endianness::Native,
2126                signed: false,
2127            }
2128        ))
2129    );
2130    assert_eq!(
2131        parse_type("ubeshort"),
2132        Ok((
2133            "",
2134            TypeKind::Short {
2135                endian: Endianness::Big,
2136                signed: false,
2137            }
2138        ))
2139    );
2140    assert_eq!(
2141        parse_type("uleshort"),
2142        Ok((
2143            "",
2144            TypeKind::Short {
2145                endian: Endianness::Little,
2146                signed: false,
2147            }
2148        ))
2149    );
2150    assert_eq!(
2151        parse_type("ulong"),
2152        Ok((
2153            "",
2154            TypeKind::Long {
2155                endian: Endianness::Native,
2156                signed: false,
2157            }
2158        ))
2159    );
2160    assert_eq!(
2161        parse_type("ubelong"),
2162        Ok((
2163            "",
2164            TypeKind::Long {
2165                endian: Endianness::Big,
2166                signed: false,
2167            }
2168        ))
2169    );
2170    assert_eq!(
2171        parse_type("ulelong"),
2172        Ok((
2173            "",
2174            TypeKind::Long {
2175                endian: Endianness::Little,
2176                signed: false,
2177            }
2178        ))
2179    );
2180    assert_eq!(
2181        parse_type("uquad"),
2182        Ok((
2183            "",
2184            TypeKind::Quad {
2185                endian: Endianness::Native,
2186                signed: false,
2187            }
2188        ))
2189    );
2190    assert_eq!(
2191        parse_type("ubequad"),
2192        Ok((
2193            "",
2194            TypeKind::Quad {
2195                endian: Endianness::Big,
2196                signed: false,
2197            }
2198        ))
2199    );
2200    assert_eq!(
2201        parse_type("ulequad"),
2202        Ok((
2203            "",
2204            TypeKind::Quad {
2205                endian: Endianness::Little,
2206                signed: false,
2207            }
2208        ))
2209    );
2210}
2211
2212#[test]
2213fn test_parse_type_signed_defaults() {
2214    // In libmagic, unprefixed types are signed by default
2215    assert_eq!(
2216        parse_type("byte"),
2217        Ok(("", TypeKind::Byte { signed: true }))
2218    );
2219    assert_eq!(
2220        parse_type("short"),
2221        Ok((
2222            "",
2223            TypeKind::Short {
2224                endian: Endianness::Native,
2225                signed: true,
2226            }
2227        ))
2228    );
2229    assert_eq!(
2230        parse_type("long"),
2231        Ok((
2232            "",
2233            TypeKind::Long {
2234                endian: Endianness::Native,
2235                signed: true,
2236            }
2237        ))
2238    );
2239    assert_eq!(
2240        parse_type("beshort"),
2241        Ok((
2242            "",
2243            TypeKind::Short {
2244                endian: Endianness::Big,
2245                signed: true,
2246            }
2247        ))
2248    );
2249    assert_eq!(
2250        parse_type("belong"),
2251        Ok((
2252            "",
2253            TypeKind::Long {
2254                endian: Endianness::Big,
2255                signed: true,
2256            }
2257        ))
2258    );
2259    assert_eq!(
2260        parse_type("quad"),
2261        Ok((
2262            "",
2263            TypeKind::Quad {
2264                endian: Endianness::Native,
2265                signed: true,
2266            }
2267        ))
2268    );
2269    assert_eq!(
2270        parse_type("bequad"),
2271        Ok((
2272            "",
2273            TypeKind::Quad {
2274                endian: Endianness::Big,
2275                signed: true,
2276            }
2277        ))
2278    );
2279    assert_eq!(
2280        parse_type("lequad"),
2281        Ok((
2282            "",
2283            TypeKind::Quad {
2284                endian: Endianness::Little,
2285                signed: true,
2286            }
2287        ))
2288    );
2289}
2290
2291#[test]
2292fn test_parse_rule_offset_absolute() {
2293    assert_eq!(
2294        parse_rule_offset("0"),
2295        Ok(("", (0, OffsetSpec::Absolute(0))))
2296    );
2297    assert_eq!(
2298        parse_rule_offset("16"),
2299        Ok(("", (0, OffsetSpec::Absolute(16))))
2300    );
2301    assert_eq!(
2302        parse_rule_offset("0x10"),
2303        Ok(("", (0, OffsetSpec::Absolute(16))))
2304    );
2305    assert_eq!(
2306        parse_rule_offset("-4"),
2307        Ok(("", (0, OffsetSpec::Absolute(-4))))
2308    );
2309}
2310
2311#[test]
2312fn test_parse_rule_offset_child_rules() {
2313    assert_eq!(
2314        parse_rule_offset(">4"),
2315        Ok(("", (1, OffsetSpec::Absolute(4))))
2316    );
2317    assert_eq!(
2318        parse_rule_offset(">>8"),
2319        Ok(("", (2, OffsetSpec::Absolute(8))))
2320    );
2321    assert_eq!(
2322        parse_rule_offset(">>>12"),
2323        Ok(("", (3, OffsetSpec::Absolute(12))))
2324    );
2325}
2326
2327#[test]
2328fn test_parse_rule_offset_with_whitespace() {
2329    assert_eq!(
2330        parse_rule_offset(" 0 "),
2331        Ok(("", (0, OffsetSpec::Absolute(0))))
2332    );
2333    assert_eq!(
2334        parse_rule_offset("  >4  "),
2335        Ok(("", (1, OffsetSpec::Absolute(4))))
2336    );
2337    assert_eq!(
2338        parse_rule_offset("\t>>0x10\t"),
2339        Ok(("", (2, OffsetSpec::Absolute(16))))
2340    );
2341}
2342
2343#[test]
2344fn test_parse_rule_offset_with_remaining_input() {
2345    assert_eq!(
2346        parse_rule_offset("0 byte"),
2347        Ok(("byte", (0, OffsetSpec::Absolute(0))))
2348    );
2349    assert_eq!(
2350        parse_rule_offset(">4 string"),
2351        Ok(("string", (1, OffsetSpec::Absolute(4))))
2352    );
2353}
2354
2355#[test]
2356fn test_parse_message_basic() {
2357    assert_eq!(
2358        parse_message("ELF executable"),
2359        Ok(("", "ELF executable".to_string()))
2360    );
2361    assert_eq!(
2362        parse_message("PDF document"),
2363        Ok(("", "PDF document".to_string()))
2364    );
2365    assert_eq!(parse_message(""), Ok(("", String::new())));
2366}
2367
2368#[test]
2369fn test_parse_message_with_whitespace() {
2370    assert_eq!(
2371        parse_message("  ELF executable  "),
2372        Ok(("", "ELF executable".to_string()))
2373    );
2374    assert_eq!(
2375        parse_message("\tPDF document\t"),
2376        Ok(("", "PDF document".to_string()))
2377    );
2378    assert_eq!(parse_message("   "), Ok(("", String::new())));
2379}
2380
2381#[test]
2382fn test_parse_message_complex() {
2383    assert_eq!(
2384        parse_message("ELF 64-bit LSB executable"),
2385        Ok(("", "ELF 64-bit LSB executable".to_string()))
2386    );
2387    assert_eq!(
2388        parse_message("ZIP archive, version %d.%d"),
2389        Ok(("", "ZIP archive, version %d.%d".to_string()))
2390    );
2391}
2392
2393#[test]
2394fn test_parse_magic_rule_basic() {
2395    let input = "0 string \\x7fELF ELF executable";
2396    let (remaining, rule) = parse_magic_rule(input).unwrap();
2397
2398    assert_eq!(remaining, "");
2399    assert_eq!(rule.level, 0);
2400    assert_eq!(rule.offset, OffsetSpec::Absolute(0));
2401    assert_eq!(rule.typ, TypeKind::String { max_length: None });
2402    assert_eq!(rule.op, Operator::Equal);
2403    assert_eq!(rule.value, Value::Bytes(vec![0x7f, 0x45, 0x4c, 0x46]));
2404    assert_eq!(rule.message, "ELF executable");
2405    assert!(rule.children.is_empty());
2406}
2407
2408#[test]
2409fn test_parse_magic_rule_child() {
2410    let input = ">4 byte 1 32-bit";
2411    let (remaining, rule) = parse_magic_rule(input).unwrap();
2412
2413    assert_eq!(remaining, "");
2414    assert_eq!(rule.level, 1);
2415    assert_eq!(rule.offset, OffsetSpec::Absolute(4));
2416    assert_eq!(rule.typ, TypeKind::Byte { signed: true });
2417    assert_eq!(rule.op, Operator::Equal);
2418    assert_eq!(rule.value, Value::Uint(1));
2419    assert_eq!(rule.message, "32-bit");
2420}
2421
2422#[test]
2423fn test_parse_magic_rule_with_operator() {
2424    let input = "0 lelong&0xf0000000 0x10000000 MIPS-II";
2425    let (remaining, rule) = parse_magic_rule(input).unwrap();
2426
2427    assert_eq!(remaining, "");
2428    assert_eq!(rule.level, 0);
2429    assert_eq!(rule.offset, OffsetSpec::Absolute(0));
2430    assert_eq!(
2431        rule.typ,
2432        TypeKind::Long {
2433            endian: Endianness::Little,
2434            signed: true
2435        }
2436    );
2437    assert_eq!(rule.op, Operator::BitwiseAndMask(0xf000_0000));
2438    assert_eq!(rule.value, Value::Uint(0x1000_0000));
2439    assert_eq!(rule.message, "MIPS-II");
2440}
2441
2442#[test]
2443fn test_parse_magic_rule_no_message() {
2444    let input = "0 byte 0x7f";
2445    let (remaining, rule) = parse_magic_rule(input).unwrap();
2446
2447    assert_eq!(remaining, "");
2448    assert_eq!(rule.level, 0);
2449    assert_eq!(rule.offset, OffsetSpec::Absolute(0));
2450    assert_eq!(rule.typ, TypeKind::Byte { signed: true });
2451    assert_eq!(rule.op, Operator::Equal);
2452    assert_eq!(rule.value, Value::Uint(0x7f));
2453    assert_eq!(rule.message, "");
2454}
2455
2456#[test]
2457fn test_parse_magic_rule_nested() {
2458    let input = ">>8 leshort 0x014c Microsoft COFF";
2459    let (remaining, rule) = parse_magic_rule(input).unwrap();
2460
2461    assert_eq!(remaining, "");
2462    assert_eq!(rule.level, 2);
2463    assert_eq!(rule.offset, OffsetSpec::Absolute(8));
2464    assert_eq!(
2465        rule.typ,
2466        TypeKind::Short {
2467            endian: Endianness::Little,
2468            signed: true
2469        }
2470    );
2471    assert_eq!(rule.op, Operator::Equal);
2472    assert_eq!(rule.value, Value::Uint(0x014c));
2473    assert_eq!(rule.message, "Microsoft COFF");
2474}
2475
2476#[test]
2477fn test_parse_magic_rule_with_whitespace() {
2478    let input = "  >  4   byte   =   1   32-bit  ";
2479    let (remaining, rule) = parse_magic_rule(input).unwrap();
2480
2481    assert_eq!(remaining, "");
2482    assert_eq!(rule.level, 1);
2483    assert_eq!(rule.offset, OffsetSpec::Absolute(4));
2484    assert_eq!(rule.typ, TypeKind::Byte { signed: true });
2485    assert_eq!(rule.op, Operator::Equal);
2486    assert_eq!(rule.value, Value::Uint(1));
2487    assert_eq!(rule.message, "32-bit");
2488}
2489
2490#[test]
2491fn test_parse_magic_rule_string_value() {
2492    let input = "0 string \"PK\" ZIP archive";
2493    let (remaining, rule) = parse_magic_rule(input).unwrap();
2494
2495    assert_eq!(remaining, "");
2496    assert_eq!(rule.level, 0);
2497    assert_eq!(rule.offset, OffsetSpec::Absolute(0));
2498    assert_eq!(rule.typ, TypeKind::String { max_length: None });
2499    assert_eq!(rule.op, Operator::Equal);
2500    assert_eq!(rule.value, Value::String("PK".to_string()));
2501    assert_eq!(rule.message, "ZIP archive");
2502}
2503
2504#[test]
2505fn test_parse_magic_rule_hex_offset() {
2506    let input = "0x10 belong 0x12345678 Test data";
2507    let (remaining, rule) = parse_magic_rule(input).unwrap();
2508
2509    assert_eq!(remaining, "");
2510    assert_eq!(rule.level, 0);
2511    assert_eq!(rule.offset, OffsetSpec::Absolute(16));
2512    assert_eq!(
2513        rule.typ,
2514        TypeKind::Long {
2515            endian: Endianness::Big,
2516            signed: true
2517        }
2518    );
2519    assert_eq!(rule.op, Operator::Equal);
2520    assert_eq!(rule.value, Value::Uint(0x1234_5678));
2521    assert_eq!(rule.message, "Test data");
2522}
2523
2524#[test]
2525fn test_parse_magic_rule_negative_offset() {
2526    let input = "-4 byte 0 End marker";
2527    let (remaining, rule) = parse_magic_rule(input).unwrap();
2528
2529    assert_eq!(remaining, "");
2530    assert_eq!(rule.level, 0);
2531    assert_eq!(rule.offset, OffsetSpec::Absolute(-4));
2532    assert_eq!(rule.typ, TypeKind::Byte { signed: true });
2533    assert_eq!(rule.op, Operator::Equal);
2534    assert_eq!(rule.value, Value::Uint(0));
2535    assert_eq!(rule.message, "End marker");
2536}
2537
2538#[test]
2539fn test_parse_comment() {
2540    assert_eq!(
2541        parse_comment("# This is a comment"),
2542        Ok(("", "This is a comment".to_string()))
2543    );
2544    assert_eq!(parse_comment("#"), Ok(("", String::new())));
2545    assert_eq!(
2546        parse_comment("# ELF executables"),
2547        Ok(("", "ELF executables".to_string()))
2548    );
2549}
2550
2551#[test]
2552fn test_parse_comment_with_whitespace() {
2553    assert_eq!(
2554        parse_comment("  # Indented comment  "),
2555        Ok(("", "Indented comment".to_string()))
2556    );
2557    assert_eq!(
2558        parse_comment("\t#\tTabbed comment\t"),
2559        Ok(("", "Tabbed comment".to_string()))
2560    );
2561}
2562
2563#[test]
2564fn test_is_empty_line() {
2565    assert!(is_empty_line(""));
2566    assert!(is_empty_line("   "));
2567    assert!(is_empty_line("\t\t"));
2568    assert!(is_empty_line(" \t \t "));
2569    assert!(!is_empty_line("0 byte 1"));
2570    assert!(!is_empty_line("  # comment"));
2571}
2572
2573#[test]
2574fn test_is_comment_line() {
2575    assert!(is_comment_line("# This is a comment"));
2576    assert!(is_comment_line("#"));
2577    assert!(is_comment_line("  # Indented comment"));
2578    assert!(is_comment_line("\t# Tabbed comment"));
2579    assert!(!is_comment_line("0 byte 1"));
2580    assert!(!is_comment_line("string test"));
2581}
2582
2583#[test]
2584fn test_has_continuation() {
2585    assert!(has_continuation("0 string test \\"));
2586    assert!(has_continuation("message continues \\"));
2587    assert!(has_continuation("line ends with backslash\\"));
2588    assert!(has_continuation("  trailing whitespace  \\  "));
2589    assert!(!has_continuation("0 string test"));
2590    assert!(!has_continuation("no continuation"));
2591    assert!(!has_continuation("backslash in middle \\ here"));
2592}
2593
2594#[test]
2595fn test_parse_magic_rule_real_world_examples() {
2596    // Real examples from /usr/share/file/magic/elf
2597    let examples = [
2598        "0 string \\177ELF ELF",
2599        ">4 byte 1 32-bit",
2600        ">4 byte 2 64-bit",
2601        ">5 byte 1 LSB",
2602        ">5 byte 2 MSB",
2603        ">>0 lelong&0xf0000000 0x10000000 MIPS-II",
2604    ];
2605
2606    for example in examples {
2607        let result = parse_magic_rule(example);
2608        assert!(
2609            result.is_ok(),
2610            "Failed to parse real-world example: '{example}'"
2611        );
2612
2613        let (remaining, rule) = result.unwrap();
2614        assert_eq!(remaining, "", "Unexpected remaining input for: '{example}'");
2615        assert!(
2616            !rule.message.is_empty() || example.contains("\\177ELF"),
2617            "Empty message for: '{example}'"
2618        );
2619    }
2620}
2621
2622#[test]
2623fn test_parse_magic_rule_edge_cases() {
2624    // Test various edge cases
2625    let edge_cases = [
2626        (
2627            "0 byte 0",
2628            0,
2629            TypeKind::Byte { signed: true },
2630            Value::Uint(0),
2631            "",
2632        ),
2633        (
2634            ">>>16 string \"\" Empty string",
2635            3,
2636            TypeKind::String { max_length: None },
2637            Value::String(String::new()),
2638            "Empty string",
2639        ),
2640        (
2641            "0x100 lelong 0xFFFFFFFF Max value",
2642            0,
2643            TypeKind::Long {
2644                endian: Endianness::Little,
2645                signed: true,
2646            },
2647            Value::Uint(0xFFFF_FFFF),
2648            "Max value",
2649        ),
2650    ];
2651
2652    for (input, expected_level, expected_type, expected_value, expected_message) in edge_cases {
2653        let (remaining, rule) = parse_magic_rule(input).unwrap();
2654        assert_eq!(remaining, "");
2655        assert_eq!(rule.level, expected_level);
2656        assert_eq!(rule.typ, expected_type);
2657        assert_eq!(rule.value, expected_value);
2658        assert_eq!(rule.message, expected_message);
2659    }
2660}
2661
2662#[test]
2663fn test_parse_magic_rule_invalid_input() {
2664    let invalid_inputs = [
2665        "",               // Empty input
2666        "invalid format", // No valid offset
2667        "0",              // Missing type
2668        "0 invalid_type", // Invalid type
2669        "0 byte",         // Missing value
2670    ];
2671
2672    for invalid_input in invalid_inputs {
2673        let result = parse_magic_rule(invalid_input);
2674        assert!(
2675            result.is_err(),
2676            "Should fail to parse invalid input: '{invalid_input}'"
2677        );
2678    }
2679}
2680
2681// Strength directive tests
2682#[test]
2683fn test_parse_strength_directive_add() {
2684    assert_eq!(
2685        parse_strength_directive("!:strength +10"),
2686        Ok(("", StrengthModifier::Add(10)))
2687    );
2688    assert_eq!(
2689        parse_strength_directive("!:strength +0"),
2690        Ok(("", StrengthModifier::Add(0)))
2691    );
2692    assert_eq!(
2693        parse_strength_directive("!:strength +100"),
2694        Ok(("", StrengthModifier::Add(100)))
2695    );
2696}
2697
2698#[test]
2699fn test_parse_strength_directive_subtract() {
2700    assert_eq!(
2701        parse_strength_directive("!:strength -5"),
2702        Ok(("", StrengthModifier::Subtract(5)))
2703    );
2704    assert_eq!(
2705        parse_strength_directive("!:strength -0"),
2706        Ok(("", StrengthModifier::Subtract(0)))
2707    );
2708    assert_eq!(
2709        parse_strength_directive("!:strength -50"),
2710        Ok(("", StrengthModifier::Subtract(50)))
2711    );
2712}
2713
2714#[test]
2715fn test_parse_strength_directive_multiply() {
2716    assert_eq!(
2717        parse_strength_directive("!:strength *2"),
2718        Ok(("", StrengthModifier::Multiply(2)))
2719    );
2720    assert_eq!(
2721        parse_strength_directive("!:strength *10"),
2722        Ok(("", StrengthModifier::Multiply(10)))
2723    );
2724}
2725
2726#[test]
2727fn test_parse_strength_directive_divide() {
2728    assert_eq!(
2729        parse_strength_directive("!:strength /2"),
2730        Ok(("", StrengthModifier::Divide(2)))
2731    );
2732    assert_eq!(
2733        parse_strength_directive("!:strength /10"),
2734        Ok(("", StrengthModifier::Divide(10)))
2735    );
2736}
2737
2738#[test]
2739fn test_parse_strength_directive_set_explicit() {
2740    assert_eq!(
2741        parse_strength_directive("!:strength =50"),
2742        Ok(("", StrengthModifier::Set(50)))
2743    );
2744    assert_eq!(
2745        parse_strength_directive("!:strength =0"),
2746        Ok(("", StrengthModifier::Set(0)))
2747    );
2748    assert_eq!(
2749        parse_strength_directive("!:strength =100"),
2750        Ok(("", StrengthModifier::Set(100)))
2751    );
2752}
2753
2754#[test]
2755fn test_parse_strength_directive_set_bare() {
2756    // Bare number implies Set
2757    assert_eq!(
2758        parse_strength_directive("!:strength 50"),
2759        Ok(("", StrengthModifier::Set(50)))
2760    );
2761    assert_eq!(
2762        parse_strength_directive("!:strength 0"),
2763        Ok(("", StrengthModifier::Set(0)))
2764    );
2765    assert_eq!(
2766        parse_strength_directive("!:strength 100"),
2767        Ok(("", StrengthModifier::Set(100)))
2768    );
2769}
2770
2771#[test]
2772fn test_parse_strength_directive_with_whitespace() {
2773    assert_eq!(
2774        parse_strength_directive("  !:strength +10"),
2775        Ok(("", StrengthModifier::Add(10)))
2776    );
2777    assert_eq!(
2778        parse_strength_directive("\t!:strength -5"),
2779        Ok(("", StrengthModifier::Subtract(5)))
2780    );
2781    assert_eq!(
2782        parse_strength_directive("!:strength  *2"),
2783        Ok(("", StrengthModifier::Multiply(2)))
2784    );
2785    assert_eq!(
2786        parse_strength_directive("!:strength   50"),
2787        Ok(("", StrengthModifier::Set(50)))
2788    );
2789}
2790
2791#[test]
2792fn test_parse_strength_directive_with_remaining_input() {
2793    // Should leave remaining content after the directive
2794    assert_eq!(
2795        parse_strength_directive("!:strength +10 extra"),
2796        Ok((" extra", StrengthModifier::Add(10)))
2797    );
2798    assert_eq!(
2799        parse_strength_directive("!:strength 50\n"),
2800        Ok(("\n", StrengthModifier::Set(50)))
2801    );
2802}
2803
2804#[test]
2805fn test_parse_strength_directive_invalid() {
2806    // Should fail on invalid input
2807    assert!(parse_strength_directive("").is_err());
2808    assert!(parse_strength_directive("!:invalid").is_err());
2809    assert!(parse_strength_directive("strength +10").is_err());
2810    assert!(parse_strength_directive("0 byte 1").is_err());
2811}
2812
2813#[test]
2814fn test_is_strength_directive() {
2815    assert!(is_strength_directive("!:strength +10"));
2816    assert!(is_strength_directive("!:strength -5"));
2817    assert!(is_strength_directive("!:strength 50"));
2818    assert!(is_strength_directive("  !:strength +10"));
2819    assert!(is_strength_directive("\t!:strength *2"));
2820
2821    assert!(!is_strength_directive("0 byte 1"));
2822    assert!(!is_strength_directive("# comment"));
2823    assert!(!is_strength_directive(""));
2824    assert!(!is_strength_directive("!:mime application/pdf"));
2825}
2826
2827#[test]
2828fn test_parse_type_and_operator_quad_full_width_mask() {
2829    // Full u64 mask (0xffffffffffffffff) must parse successfully, not silently
2830    // fall back to standalone '&' leaving the mask as leftover input.
2831    let (remaining, (typ, op)) = parse_type_and_operator("uquad&0xffffffffffffffff").unwrap();
2832    assert_eq!(remaining, "");
2833    assert_eq!(
2834        typ,
2835        TypeKind::Quad {
2836            endian: Endianness::Native,
2837            signed: false,
2838        }
2839    );
2840    assert_eq!(op, Some(Operator::BitwiseAndMask(u64::MAX)));
2841}
2842
2843#[test]
2844fn test_parse_type_and_operator_quad_mask_various() {
2845    // Hex mask within i64 range
2846    let (remaining, (_, op)) = parse_type_and_operator("quad&0x7fffffffffffffff").unwrap();
2847    assert_eq!(remaining, "");
2848    assert_eq!(op, Some(Operator::BitwiseAndMask(i64::MAX as u64)));
2849
2850    // Decimal mask
2851    let (remaining, (_, op)) = parse_type_and_operator("uquad&255").unwrap();
2852    assert_eq!(remaining, "");
2853    assert_eq!(op, Some(Operator::BitwiseAndMask(255)));
2854
2855    // Standalone '&' (no digits following) still works
2856    let (remaining, (_, op)) = parse_type_and_operator("uquad& ").unwrap();
2857    assert_eq!(remaining, "");
2858    assert_eq!(op, Some(Operator::BitwiseAnd));
2859}
2860
2861#[test]
2862fn test_parse_type_and_operator_mask_overflow_fails() {
2863    // Decimal value exceeding u64::MAX must fail, not silently reinterpret
2864    let result = parse_type_and_operator("uquad&99999999999999999999");
2865    assert!(
2866        result.is_err(),
2867        "overflowing mask should produce a parse error"
2868    );
2869
2870    // Hex value exceeding u64 (17 hex digits) must fail
2871    let result = parse_type_and_operator("uquad&0x1ffffffffffffffff");
2872    assert!(
2873        result.is_err(),
2874        "overflowing hex mask should produce a parse error"
2875    );
2876}