Skip to main content

libmagic_rs/parser/
grammar.rs

1// Copyright (c) 2025-2026 the libmagic-rs contributors
2// SPDX-License-Identifier: Apache-2.0
3
4//! Grammar parsing for magic files using nom parser combinators
5//!
6//! This module implements the parsing logic for magic file syntax, converting
7//! text-based magic rules into the AST representation defined in ast.rs.
8
9use nom::{
10    IResult, Parser,
11    branch::alt,
12    bytes::complete::{tag, take_while},
13    character::complete::{char, digit1, hex_digit1, multispace0, none_of, one_of},
14    combinator::{map, opt, recognize},
15    error::Error as NomError,
16    multi::many0,
17    sequence::pair,
18};
19
20use crate::parser::ast::{
21    Endianness, MagicRule, OffsetSpec, Operator, StrengthModifier, TypeKind, Value,
22};
23
24/// Parse a decimal number with overflow protection
25fn parse_decimal_number(input: &str) -> IResult<&str, i64> {
26    let (input, digits) = digit1(input)?;
27
28    // Check for potential overflow before parsing
29    if digits.len() > 19 {
30        // i64::MAX has 19 digits, so anything longer will definitely overflow
31        return Err(nom::Err::Error(nom::error::Error::new(
32            input,
33            nom::error::ErrorKind::MapRes,
34        )));
35    }
36
37    let number = digits.parse::<i64>().map_err(|_| {
38        nom::Err::Error(nom::error::Error::new(input, nom::error::ErrorKind::MapRes))
39    })?;
40    Ok((input, number))
41}
42
43/// Parse a hexadecimal number (with 0x prefix) with overflow protection
44fn parse_hex_number(input: &str) -> IResult<&str, i64> {
45    let (input, _) = tag("0x")(input)?;
46    let (input, hex_str) = hex_digit1(input)?;
47
48    // Check for potential overflow - i64 can hold up to 16 hex digits (0x7FFFFFFFFFFFFFFF)
49    if hex_str.len() > 16 {
50        return Err(nom::Err::Error(nom::error::Error::new(
51            input,
52            nom::error::ErrorKind::MapRes,
53        )));
54    }
55
56    let number = i64::from_str_radix(hex_str, 16).map_err(|_| {
57        nom::Err::Error(nom::error::Error::new(input, nom::error::ErrorKind::MapRes))
58    })?;
59
60    Ok((input, number))
61}
62
63/// Parse a decimal or hexadecimal number
64///
65/// Supports both decimal (123, -456) and hexadecimal (0x1a2b, -0xFF) formats.
66///
67/// # Examples
68///
69/// ```
70/// use libmagic_rs::parser::grammar::parse_number;
71///
72/// assert_eq!(parse_number("123"), Ok(("", 123)));
73/// assert_eq!(parse_number("0x1a"), Ok(("", 26)));
74/// assert_eq!(parse_number("-42"), Ok(("", -42)));
75/// assert_eq!(parse_number("-0xFF"), Ok(("", -255)));
76/// ```
77///
78/// # Errors
79///
80/// Returns a nom parsing error if:
81/// - Input is empty or contains no valid digits
82/// - Hexadecimal number lacks proper "0x" prefix or contains invalid hex digits
83/// - Number cannot be parsed as a valid `i64` value
84/// - Input contains invalid characters for the detected number format
85pub fn parse_number(input: &str) -> IResult<&str, i64> {
86    let (input, sign) = opt(char('-')).parse(input)?;
87    let is_negative = sign.is_some();
88
89    // Check if input starts with "0x" - if so, it must be a valid hex number
90    let (input, number) = if input.starts_with("0x") {
91        parse_hex_number(input)?
92    } else {
93        parse_decimal_number(input)?
94    };
95
96    // Apply sign with overflow checking
97    let result = if is_negative {
98        number.checked_neg().ok_or_else(|| {
99            nom::Err::Error(nom::error::Error::new(input, nom::error::ErrorKind::MapRes))
100        })?
101    } else {
102        number
103    };
104
105    Ok((input, result))
106}
107
108/// Parse an offset specification for absolute offsets
109///
110/// Supports decimal and hexadecimal formats, both positive and negative.
111///
112/// # Examples
113///
114/// ```
115/// use libmagic_rs::parser::grammar::parse_offset;
116/// use libmagic_rs::parser::ast::OffsetSpec;
117///
118/// assert_eq!(parse_offset("0"), Ok(("", OffsetSpec::Absolute(0))));
119/// assert_eq!(parse_offset("123"), Ok(("", OffsetSpec::Absolute(123))));
120/// assert_eq!(parse_offset("0x10"), Ok(("", OffsetSpec::Absolute(16))));
121/// assert_eq!(parse_offset("-4"), Ok(("", OffsetSpec::Absolute(-4))));
122/// assert_eq!(parse_offset("-0xFF"), Ok(("", OffsetSpec::Absolute(-255))));
123/// ```
124///
125/// # Errors
126///
127/// Returns a nom parsing error if:
128/// - The input contains invalid number format (propagated from `parse_number`)
129/// - Input is empty or contains no parseable offset value
130/// - The offset value cannot be represented as a valid `i64`
131pub fn parse_offset(input: &str) -> IResult<&str, OffsetSpec> {
132    let (input, _) = multispace0(input)?;
133    let (input, offset_value) = parse_number(input)?;
134    let (input, _) = multispace0(input)?;
135
136    Ok((input, OffsetSpec::Absolute(offset_value)))
137}
138
139/// Parse comparison operators for magic rules
140///
141/// Supports both symbolic and text representations of operators:
142/// - `=` or `==` for equality
143/// - `!=` or `<>` for inequality
144/// - `<` for less-than
145/// - `>` for greater-than
146/// - `<=` for less-than-or-equal
147/// - `>=` for greater-than-or-equal
148/// - `&` for bitwise AND
149///
150/// # Examples
151///
152/// ```
153/// use libmagic_rs::parser::grammar::parse_operator;
154/// use libmagic_rs::parser::ast::Operator;
155///
156/// assert_eq!(parse_operator("="), Ok(("", Operator::Equal)));
157/// assert_eq!(parse_operator("=="), Ok(("", Operator::Equal)));
158/// assert_eq!(parse_operator("!="), Ok(("", Operator::NotEqual)));
159/// assert_eq!(parse_operator("<>"), Ok(("", Operator::NotEqual)));
160/// assert_eq!(parse_operator("<"), Ok(("", Operator::LessThan)));
161/// assert_eq!(parse_operator(">"), Ok(("", Operator::GreaterThan)));
162/// assert_eq!(parse_operator("<="), Ok(("", Operator::LessEqual)));
163/// assert_eq!(parse_operator(">="), Ok(("", Operator::GreaterEqual)));
164/// assert_eq!(parse_operator("&"), Ok(("", Operator::BitwiseAnd)));
165/// ```
166///
167/// # Errors
168///
169/// Returns a nom parsing error if:
170/// - Input does not start with a recognized operator symbol
171/// - Input is empty or contains no valid operator
172/// - Operator syntax is incomplete (e.g., just `!` without `=`)
173pub fn parse_operator(input: &str) -> IResult<&str, Operator> {
174    let (input, _) = multispace0(input)?;
175
176    // Try to parse each operator, starting with longer ones first
177    if let Ok((remaining, _)) = tag::<&str, &str, nom::error::Error<&str>>("==")(input) {
178        // Check that we don't have another '=' following (to reject "===")
179        if remaining.starts_with('=') {
180            return Err(nom::Err::Error(nom::error::Error::new(
181                input,
182                nom::error::ErrorKind::Tag,
183            )));
184        }
185        let (remaining, _) = multispace0(remaining)?;
186        return Ok((remaining, Operator::Equal));
187    }
188
189    if let Ok((remaining, _)) = tag::<&str, &str, nom::error::Error<&str>>("!=")(input) {
190        let (remaining, _) = multispace0(remaining)?;
191        return Ok((remaining, Operator::NotEqual));
192    }
193
194    if let Ok((remaining, _)) = tag::<&str, &str, nom::error::Error<&str>>("<>")(input) {
195        let (remaining, _) = multispace0(remaining)?;
196        return Ok((remaining, Operator::NotEqual));
197    }
198
199    if let Ok((remaining, _)) = tag::<&str, &str, nom::error::Error<&str>>("<=")(input) {
200        let (remaining, _) = multispace0(remaining)?;
201        return Ok((remaining, Operator::LessEqual));
202    }
203
204    if let Ok((remaining, _)) = tag::<&str, &str, nom::error::Error<&str>>(">=")(input) {
205        let (remaining, _) = multispace0(remaining)?;
206        return Ok((remaining, Operator::GreaterEqual));
207    }
208
209    if let Ok((remaining, _)) = tag::<&str, &str, nom::error::Error<&str>>("=")(input) {
210        // Check that we don't have another '=' following (to reject "==")
211        if remaining.starts_with('=') {
212            return Err(nom::Err::Error(nom::error::Error::new(
213                input,
214                nom::error::ErrorKind::Tag,
215            )));
216        }
217        let (remaining, _) = multispace0(remaining)?;
218        return Ok((remaining, Operator::Equal));
219    }
220
221    if let Ok((remaining, _)) = tag::<&str, &str, nom::error::Error<&str>>("&")(input) {
222        // Check that we don't have another '&' following (to reject "&&")
223        if remaining.starts_with('&') {
224            return Err(nom::Err::Error(nom::error::Error::new(
225                input,
226                nom::error::ErrorKind::Tag,
227            )));
228        }
229        let (remaining, _) = multispace0(remaining)?;
230        return Ok((remaining, Operator::BitwiseAnd));
231    }
232
233    if let Ok((remaining, _)) = tag::<&str, &str, nom::error::Error<&str>>("<")(input) {
234        let (remaining, _) = multispace0(remaining)?;
235        return Ok((remaining, Operator::LessThan));
236    }
237
238    if let Ok((remaining, _)) = tag::<&str, &str, nom::error::Error<&str>>(">")(input) {
239        let (remaining, _) = multispace0(remaining)?;
240        return Ok((remaining, Operator::GreaterThan));
241    }
242
243    // If no operator matches, return an error
244    Err(nom::Err::Error(nom::error::Error::new(
245        input,
246        nom::error::ErrorKind::Tag,
247    )))
248}
249
250/// Parse a single hex byte with \x prefix
251fn parse_hex_byte_with_prefix(input: &str) -> IResult<&str, u8> {
252    let (input, _) = tag("\\x")(input)?;
253    let (input, hex_str) = recognize(pair(
254        one_of("0123456789abcdefABCDEF"),
255        one_of("0123456789abcdefABCDEF"),
256    ))
257    .parse(input)?;
258    let byte_val = u8::from_str_radix(hex_str, 16)
259        .map_err(|_| nom::Err::Error(NomError::new(input, nom::error::ErrorKind::MapRes)))?;
260    Ok((input, byte_val))
261}
262
263/// Parse a hex byte sequence starting with \x prefix
264fn parse_hex_bytes_with_prefix(input: &str) -> IResult<&str, Vec<u8>> {
265    if input.starts_with("\\x") {
266        many0(parse_hex_byte_with_prefix).parse(input)
267    } else {
268        Err(nom::Err::Error(NomError::new(
269            input,
270            nom::error::ErrorKind::Tag,
271        )))
272    }
273}
274
275/// Parse a mixed hex and ASCII sequence (like \x7fELF)
276fn parse_mixed_hex_ascii(input: &str) -> IResult<&str, Vec<u8>> {
277    // Must start with \ to be considered an escape sequence
278    if !input.starts_with('\\') {
279        return Err(nom::Err::Error(NomError::new(
280            input,
281            nom::error::ErrorKind::Tag,
282        )));
283    }
284
285    let mut bytes = Vec::new();
286    let mut remaining = input;
287
288    while !remaining.is_empty() {
289        // Try to parse escape sequences first (hex, octal, etc.)
290        if let Ok((new_remaining, escaped_char)) = parse_escape_sequence(remaining) {
291            bytes.push(escaped_char as u8);
292            remaining = new_remaining;
293        } else if let Ok((new_remaining, hex_byte)) = parse_hex_byte_with_prefix(remaining) {
294            bytes.push(hex_byte);
295            remaining = new_remaining;
296        } else if let Ok((new_remaining, ascii_char)) =
297            none_of::<&str, &str, NomError<&str>>(" \t\n\r")(remaining)
298        {
299            // Parse regular ASCII character (not whitespace)
300            bytes.push(ascii_char as u8);
301            remaining = new_remaining;
302        } else {
303            // Stop if we can't parse anything more
304            break;
305        }
306    }
307
308    if bytes.is_empty() {
309        Err(nom::Err::Error(NomError::new(
310            input,
311            nom::error::ErrorKind::Tag,
312        )))
313    } else {
314        Ok((remaining, bytes))
315    }
316}
317
318/// Parse a hex byte sequence without prefix (only if it looks like pure hex bytes)
319fn parse_hex_bytes_no_prefix(input: &str) -> IResult<&str, Vec<u8>> {
320    // Only parse as hex bytes if:
321    // 1. Input has even number of hex digits (pairs)
322    // 2. All characters are hex digits
323    // 3. Doesn't start with 0x (that's a number)
324    // 4. Contains at least one non-decimal digit (a-f, A-F)
325
326    if input.starts_with("0x") || input.starts_with('-') {
327        return Err(nom::Err::Error(NomError::new(
328            input,
329            nom::error::ErrorKind::Tag,
330        )));
331    }
332
333    let hex_chars: String = input.chars().take_while(char::is_ascii_hexdigit).collect();
334
335    if hex_chars.is_empty() || hex_chars.len() % 2 != 0 {
336        return Err(nom::Err::Error(NomError::new(
337            input,
338            nom::error::ErrorKind::Tag,
339        )));
340    }
341
342    // Check if it contains non-decimal hex digits (a-f, A-F)
343    let has_hex_letters = hex_chars
344        .chars()
345        .any(|c| matches!(c, 'a'..='f' | 'A'..='F'));
346    if !has_hex_letters {
347        return Err(nom::Err::Error(NomError::new(
348            input,
349            nom::error::ErrorKind::Tag,
350        )));
351    }
352
353    // Parse pairs of hex digits
354    let mut bytes = Vec::with_capacity(hex_chars.len() / 2);
355    let mut chars = hex_chars.chars();
356    while let (Some(c1), Some(c2)) = (chars.next(), chars.next()) {
357        // Avoid format! allocation by parsing digits directly
358        let digit1 = c1
359            .to_digit(16)
360            .ok_or_else(|| nom::Err::Error(NomError::new(input, nom::error::ErrorKind::MapRes)))?;
361        let digit2 = c2
362            .to_digit(16)
363            .ok_or_else(|| nom::Err::Error(NomError::new(input, nom::error::ErrorKind::MapRes)))?;
364        let byte_val = u8::try_from((digit1 << 4) | digit2)
365            .map_err(|_| nom::Err::Error(NomError::new(input, nom::error::ErrorKind::MapRes)))?;
366        bytes.push(byte_val);
367    }
368
369    let remaining = &input[hex_chars.len()..];
370    Ok((remaining, bytes))
371}
372
373/// Parse a hex byte sequence (e.g., "\\x7f\\x45\\x4c\\x46", "7f454c46", or "\\x7fELF")
374fn parse_hex_bytes(input: &str) -> IResult<&str, Vec<u8>> {
375    alt((
376        parse_mixed_hex_ascii,
377        parse_hex_bytes_with_prefix,
378        parse_hex_bytes_no_prefix,
379    ))
380    .parse(input)
381}
382
383/// Parse escape sequences in strings
384fn parse_escape_sequence(input: &str) -> IResult<&str, char> {
385    let (input, _) = char('\\')(input)?;
386
387    // Try to parse octal escape sequence first (\377, \123, etc.)
388    if let Ok((remaining, octal_str)) = recognize(pair(
389        one_of::<&str, &str, NomError<&str>>("0123"),
390        pair(
391            one_of::<&str, &str, NomError<&str>>("01234567"),
392            one_of::<&str, &str, NomError<&str>>("01234567"),
393        ),
394    ))
395    .parse(input)
396    {
397        if let Ok(octal_value) = u8::from_str_radix(octal_str, 8) {
398            return Ok((remaining, octal_value as char));
399        }
400    }
401
402    // Parse standard escape sequences
403    let (input, escaped_char) = one_of("nrt\\\"'0")(input)?;
404
405    let result_char = match escaped_char {
406        'n' => '\n',
407        'r' => '\r',
408        't' => '\t',
409        '\\' => '\\',
410        '"' => '"',
411        '\'' => '\'',
412        '0' => '\0',
413        _ => unreachable!("one_of constrains input to known escape characters"),
414    };
415
416    Ok((input, result_char))
417}
418
419/// Parse a quoted string with escape sequences
420fn parse_quoted_string(input: &str) -> IResult<&str, String> {
421    let (input, _) = multispace0(input)?;
422    let (input, _) = char('"')(input)?;
423
424    let mut result = String::new();
425    let mut remaining = input;
426
427    loop {
428        // Try to parse an escape sequence first
429        if let Ok((new_remaining, escaped_char)) = parse_escape_sequence(remaining) {
430            result.push(escaped_char);
431            remaining = new_remaining;
432            continue;
433        }
434
435        // If no escape sequence, try to parse a regular character (not quote or backslash)
436        if let Ok((new_remaining, regular_char)) =
437            none_of::<&str, &str, NomError<&str>>("\"\\")(remaining)
438        {
439            result.push(regular_char);
440            remaining = new_remaining;
441            continue;
442        }
443
444        // If neither worked, we should be at the closing quote
445        break;
446    }
447
448    let (remaining, _) = char('"')(remaining)?;
449    let (remaining, _) = multispace0(remaining)?;
450
451    Ok((remaining, result))
452}
453
454/// Parse a numeric value (integer)
455fn parse_numeric_value(input: &str) -> IResult<&str, Value> {
456    let (input, _) = multispace0(input)?;
457    let (input, number) = parse_number(input)?;
458    let (input, _) = multispace0(input)?;
459
460    // Convert to appropriate Value variant based on sign
461    let value = if number >= 0 {
462        Value::Uint(number.unsigned_abs())
463    } else {
464        Value::Int(number)
465    };
466
467    Ok((input, value))
468}
469
470/// Parse string and numeric literals for magic rule values
471///
472/// Supports:
473/// - Quoted strings with escape sequences: "Hello\nWorld", "ELF\0"
474/// - Numeric literals (decimal): 123, -456
475/// - Numeric literals (hexadecimal): 0x1a2b, -0xFF
476/// - Hex byte sequences: \\x7f\\x45\\x4c\\x46 or 7f454c46
477///
478/// # Examples
479///
480/// ```
481/// use libmagic_rs::parser::grammar::parse_value;
482/// use libmagic_rs::parser::ast::Value;
483///
484/// // String values
485/// assert_eq!(parse_value("\"Hello\""), Ok(("", Value::String("Hello".to_string()))));
486/// assert_eq!(parse_value("\"Line1\\nLine2\""), Ok(("", Value::String("Line1\nLine2".to_string()))));
487///
488/// // Numeric values
489/// assert_eq!(parse_value("123"), Ok(("", Value::Uint(123))));
490/// assert_eq!(parse_value("-456"), Ok(("", Value::Int(-456))));
491/// assert_eq!(parse_value("0x1a"), Ok(("", Value::Uint(26))));
492/// assert_eq!(parse_value("-0xFF"), Ok(("", Value::Int(-255))));
493///
494/// // Hex byte sequences
495/// assert_eq!(parse_value("\\x7f\\x45"), Ok(("", Value::Bytes(vec![0x7f, 0x45]))));
496/// ```
497///
498/// # Errors
499///
500/// Returns a nom parsing error if:
501/// - Input is empty or contains no valid value
502/// - Quoted string is not properly terminated
503/// - Numeric value cannot be parsed as a valid integer
504/// - Hex byte sequence contains invalid hex digits
505/// - Input contains invalid characters for the detected value format
506pub fn parse_value(input: &str) -> IResult<&str, Value> {
507    let (input, _) = multispace0(input)?;
508
509    // Handle empty input case - should fail for magic rules
510    if input.is_empty() {
511        return Err(nom::Err::Error(NomError::new(
512            input,
513            nom::error::ErrorKind::Tag,
514        )));
515    }
516
517    // Try to parse different value types in order of specificity
518    let (input, value) = alt((
519        // Try quoted string first
520        map(parse_quoted_string, Value::String),
521        // Try hex byte sequence before numeric (to catch patterns like "7f", "ab", "\\x7fELF", etc.)
522        map(parse_hex_bytes, Value::Bytes),
523        // Try numeric value last (for pure numbers like 0x123, 1, etc.)
524        parse_numeric_value,
525    ))
526    .parse(input)?;
527
528    Ok((input, value))
529}
530
531#[cfg(test)]
532mod tests {
533    use super::*;
534
535    /// Helper function to test parsing with various whitespace patterns
536    #[allow(dead_code)] // TODO: Use this helper in future whitespace tests
537    fn test_with_whitespace_variants<T, F>(input: &str, expected: &T, parser: F)
538    where
539        T: Clone + PartialEq + std::fmt::Debug,
540        F: Fn(&str) -> IResult<&str, T>,
541    {
542        // Test with various whitespace patterns - pre-allocate Vec with known capacity
543        let mut whitespace_variants = Vec::with_capacity(9);
544        whitespace_variants.extend([
545            format!(" {input}"),    // Leading space
546            format!("  {input}"),   // Leading spaces
547            format!("\t{input}"),   // Leading tab
548            format!("{input} "),    // Trailing space
549            format!("{input}  "),   // Trailing spaces
550            format!("{input}\t"),   // Trailing tab
551            format!(" {input} "),   // Both leading and trailing space
552            format!("  {input}  "), // Both leading and trailing spaces
553            format!("\t{input}\t"), // Both leading and trailing tabs
554        ]);
555
556        for variant in whitespace_variants {
557            assert_eq!(
558                parser(&variant),
559                Ok(("", expected.clone())),
560                "Failed to parse with whitespace: '{variant}'"
561            );
562        }
563    }
564
565    /// Helper function to test number parsing with remaining input
566    fn test_number_with_remaining_input() {
567        // Pre-allocate with known capacity for better performance
568        let test_cases = [
569            ("123abc", 123, "abc"),
570            ("0xFF rest", 255, " rest"),
571            ("-42 more", -42, " more"),
572            ("0x10,next", 16, ",next"),
573        ];
574
575        for (input, expected_num, expected_remaining) in test_cases {
576            assert_eq!(
577                parse_number(input),
578                Ok((expected_remaining, expected_num)),
579                "Failed to parse number with remaining input: '{input}'"
580            );
581        }
582    }
583
584    #[test]
585    fn test_parse_decimal_number() {
586        assert_eq!(parse_decimal_number("123"), Ok(("", 123)));
587        assert_eq!(parse_decimal_number("0"), Ok(("", 0)));
588        assert_eq!(parse_decimal_number("999"), Ok(("", 999)));
589
590        // Should fail on non-digits
591        assert!(parse_decimal_number("abc").is_err());
592        assert!(parse_decimal_number("").is_err());
593    }
594
595    #[test]
596    fn test_parse_hex_number() {
597        assert_eq!(parse_hex_number("0x0"), Ok(("", 0)));
598        assert_eq!(parse_hex_number("0x10"), Ok(("", 16)));
599        assert_eq!(parse_hex_number("0xFF"), Ok(("", 255)));
600        assert_eq!(parse_hex_number("0xabc"), Ok(("", 2748)));
601        assert_eq!(parse_hex_number("0xABC"), Ok(("", 2748)));
602
603        // Should fail without 0x prefix
604        assert!(parse_hex_number("FF").is_err());
605        assert!(parse_hex_number("10").is_err());
606
607        // Should fail on invalid hex digits
608        assert!(parse_hex_number("0xGG").is_err());
609    }
610
611    #[test]
612    fn test_parse_number_positive() {
613        // Decimal numbers
614        assert_eq!(parse_number("0"), Ok(("", 0)));
615        assert_eq!(parse_number("123"), Ok(("", 123)));
616        assert_eq!(parse_number("999"), Ok(("", 999)));
617
618        // Hexadecimal numbers
619        assert_eq!(parse_number("0x0"), Ok(("", 0)));
620        assert_eq!(parse_number("0x10"), Ok(("", 16)));
621        assert_eq!(parse_number("0xFF"), Ok(("", 255)));
622        assert_eq!(parse_number("0xabc"), Ok(("", 2748)));
623    }
624
625    #[test]
626    fn test_parse_number_negative() {
627        // Negative decimal numbers
628        assert_eq!(parse_number("-1"), Ok(("", -1)));
629        assert_eq!(parse_number("-123"), Ok(("", -123)));
630        assert_eq!(parse_number("-999"), Ok(("", -999)));
631
632        // Negative hexadecimal numbers
633        assert_eq!(parse_number("-0x1"), Ok(("", -1)));
634        assert_eq!(parse_number("-0x10"), Ok(("", -16)));
635        assert_eq!(parse_number("-0xFF"), Ok(("", -255)));
636        assert_eq!(parse_number("-0xabc"), Ok(("", -2748)));
637    }
638
639    #[test]
640    fn test_parse_number_edge_cases() {
641        // Zero with different formats
642        assert_eq!(parse_number("0"), Ok(("", 0)));
643        assert_eq!(parse_number("-0"), Ok(("", 0)));
644        assert_eq!(parse_number("0x0"), Ok(("", 0)));
645        assert_eq!(parse_number("-0x0"), Ok(("", 0)));
646
647        // Large numbers
648        assert_eq!(parse_number("2147483647"), Ok(("", 2_147_483_647))); // i32::MAX
649        assert_eq!(parse_number("-2147483648"), Ok(("", -2_147_483_648))); // i32::MIN
650        assert_eq!(parse_number("0x7FFFFFFF"), Ok(("", 2_147_483_647))); // i32::MAX in hex
651
652        // Should fail on invalid input
653        assert!(parse_number("").is_err());
654        assert!(parse_number("abc").is_err());
655        assert!(parse_number("0xGG").is_err());
656        assert!(parse_number("--123").is_err());
657    }
658
659    #[test]
660    fn test_parse_number_with_remaining_input() {
661        // Use helper function to reduce code duplication
662        test_number_with_remaining_input();
663    }
664
665    #[test]
666    fn test_parse_offset_absolute_positive() {
667        assert_eq!(parse_offset("0"), Ok(("", OffsetSpec::Absolute(0))));
668        assert_eq!(parse_offset("123"), Ok(("", OffsetSpec::Absolute(123))));
669        assert_eq!(parse_offset("999"), Ok(("", OffsetSpec::Absolute(999))));
670
671        // Hexadecimal offsets
672        assert_eq!(parse_offset("0x0"), Ok(("", OffsetSpec::Absolute(0))));
673        assert_eq!(parse_offset("0x10"), Ok(("", OffsetSpec::Absolute(16))));
674        assert_eq!(parse_offset("0xFF"), Ok(("", OffsetSpec::Absolute(255))));
675        assert_eq!(parse_offset("0xabc"), Ok(("", OffsetSpec::Absolute(2748))));
676    }
677
678    #[test]
679    fn test_parse_offset_absolute_negative() {
680        assert_eq!(parse_offset("-1"), Ok(("", OffsetSpec::Absolute(-1))));
681        assert_eq!(parse_offset("-123"), Ok(("", OffsetSpec::Absolute(-123))));
682        assert_eq!(parse_offset("-999"), Ok(("", OffsetSpec::Absolute(-999))));
683
684        // Negative hexadecimal offsets
685        assert_eq!(parse_offset("-0x1"), Ok(("", OffsetSpec::Absolute(-1))));
686        assert_eq!(parse_offset("-0x10"), Ok(("", OffsetSpec::Absolute(-16))));
687        assert_eq!(parse_offset("-0xFF"), Ok(("", OffsetSpec::Absolute(-255))));
688        assert_eq!(
689            parse_offset("-0xabc"),
690            Ok(("", OffsetSpec::Absolute(-2748)))
691        );
692    }
693
694    #[test]
695    fn test_parse_offset_with_whitespace() {
696        // Leading whitespace
697        assert_eq!(parse_offset(" 123"), Ok(("", OffsetSpec::Absolute(123))));
698        assert_eq!(parse_offset("  0x10"), Ok(("", OffsetSpec::Absolute(16))));
699        assert_eq!(parse_offset("\t-42"), Ok(("", OffsetSpec::Absolute(-42))));
700
701        // Trailing whitespace
702        assert_eq!(parse_offset("123 "), Ok(("", OffsetSpec::Absolute(123))));
703        assert_eq!(parse_offset("0x10  "), Ok(("", OffsetSpec::Absolute(16))));
704        assert_eq!(parse_offset("-42\t"), Ok(("", OffsetSpec::Absolute(-42))));
705
706        // Both leading and trailing whitespace
707        assert_eq!(parse_offset(" 123 "), Ok(("", OffsetSpec::Absolute(123))));
708        assert_eq!(parse_offset("  0x10  "), Ok(("", OffsetSpec::Absolute(16))));
709        assert_eq!(parse_offset("\t-42\t"), Ok(("", OffsetSpec::Absolute(-42))));
710    }
711
712    #[test]
713    fn test_parse_offset_with_remaining_input() {
714        // Should parse offset and leave remaining input
715        assert_eq!(
716            parse_offset("123 byte"),
717            Ok(("byte", OffsetSpec::Absolute(123)))
718        );
719        assert_eq!(parse_offset("0xFF ="), Ok(("=", OffsetSpec::Absolute(255))));
720        assert_eq!(
721            parse_offset("-42,next"),
722            Ok((",next", OffsetSpec::Absolute(-42)))
723        );
724        assert_eq!(
725            parse_offset("0x10\tlong"),
726            Ok(("long", OffsetSpec::Absolute(16)))
727        );
728    }
729
730    #[test]
731    fn test_parse_offset_edge_cases() {
732        // Zero with different formats
733        assert_eq!(parse_offset("0"), Ok(("", OffsetSpec::Absolute(0))));
734        assert_eq!(parse_offset("-0"), Ok(("", OffsetSpec::Absolute(0))));
735        assert_eq!(parse_offset("0x0"), Ok(("", OffsetSpec::Absolute(0))));
736        assert_eq!(parse_offset("-0x0"), Ok(("", OffsetSpec::Absolute(0))));
737
738        // Large offsets
739        assert_eq!(
740            parse_offset("2147483647"),
741            Ok(("", OffsetSpec::Absolute(2_147_483_647)))
742        );
743        assert_eq!(
744            parse_offset("-2147483648"),
745            Ok(("", OffsetSpec::Absolute(-2_147_483_648)))
746        );
747        assert_eq!(
748            parse_offset("0x7FFFFFFF"),
749            Ok(("", OffsetSpec::Absolute(2_147_483_647)))
750        );
751
752        // Should fail on invalid input
753        assert!(parse_offset("").is_err());
754        assert!(parse_offset("abc").is_err());
755        assert!(parse_offset("0xGG").is_err());
756        assert!(parse_offset("--123").is_err());
757    }
758
759    #[test]
760    fn test_parse_offset_common_magic_file_values() {
761        // Common offsets found in magic files
762        assert_eq!(parse_offset("0"), Ok(("", OffsetSpec::Absolute(0)))); // File start
763        assert_eq!(parse_offset("4"), Ok(("", OffsetSpec::Absolute(4)))); // After magic number
764        assert_eq!(parse_offset("16"), Ok(("", OffsetSpec::Absolute(16)))); // Common header offset
765        assert_eq!(parse_offset("0x10"), Ok(("", OffsetSpec::Absolute(16)))); // Same as above in hex
766        assert_eq!(parse_offset("512"), Ok(("", OffsetSpec::Absolute(512)))); // Sector boundary
767        assert_eq!(parse_offset("0x200"), Ok(("", OffsetSpec::Absolute(512)))); // Same in hex
768
769        // Negative offsets (from end of file)
770        assert_eq!(parse_offset("-4"), Ok(("", OffsetSpec::Absolute(-4)))); // 4 bytes from end
771        assert_eq!(parse_offset("-16"), Ok(("", OffsetSpec::Absolute(-16)))); // 16 bytes from end
772        assert_eq!(parse_offset("-0x10"), Ok(("", OffsetSpec::Absolute(-16)))); // Same in hex
773    }
774
775    #[test]
776    fn test_parse_offset_boundary_values() {
777        // Test boundary values that might cause issues
778        assert_eq!(parse_offset("1"), Ok(("", OffsetSpec::Absolute(1))));
779        assert_eq!(parse_offset("-1"), Ok(("", OffsetSpec::Absolute(-1))));
780
781        // Powers of 2 (common in binary formats)
782        assert_eq!(parse_offset("256"), Ok(("", OffsetSpec::Absolute(256))));
783        assert_eq!(parse_offset("0x100"), Ok(("", OffsetSpec::Absolute(256))));
784        assert_eq!(parse_offset("1024"), Ok(("", OffsetSpec::Absolute(1024))));
785        assert_eq!(parse_offset("0x400"), Ok(("", OffsetSpec::Absolute(1024))));
786
787        // Large but reasonable file offsets
788        assert_eq!(
789            parse_offset("1048576"),
790            Ok(("", OffsetSpec::Absolute(1_048_576)))
791        ); // 1MB
792        assert_eq!(
793            parse_offset("0x100000"),
794            Ok(("", OffsetSpec::Absolute(1_048_576)))
795        );
796    }
797
798    // Operator parsing tests
799    #[test]
800    fn test_parse_operator_equality() {
801        // Single equals sign
802        assert_eq!(parse_operator("="), Ok(("", Operator::Equal)));
803
804        // Double equals sign
805        assert_eq!(parse_operator("=="), Ok(("", Operator::Equal)));
806
807        // With whitespace
808        assert_eq!(parse_operator(" = "), Ok(("", Operator::Equal)));
809        assert_eq!(parse_operator("  ==  "), Ok(("", Operator::Equal)));
810        assert_eq!(parse_operator("\t=\t"), Ok(("", Operator::Equal)));
811    }
812
813    #[test]
814    fn test_parse_operator_inequality() {
815        // Not equals
816        assert_eq!(parse_operator("!="), Ok(("", Operator::NotEqual)));
817
818        // Alternative not equals syntax
819        assert_eq!(parse_operator("<>"), Ok(("", Operator::NotEqual)));
820
821        // With whitespace
822        assert_eq!(parse_operator(" != "), Ok(("", Operator::NotEqual)));
823        assert_eq!(parse_operator("  <>  "), Ok(("", Operator::NotEqual)));
824        assert_eq!(parse_operator("\t!=\t"), Ok(("", Operator::NotEqual)));
825    }
826
827    #[test]
828    fn test_parse_operator_bitwise_and() {
829        // Bitwise AND
830        assert_eq!(parse_operator("&"), Ok(("", Operator::BitwiseAnd)));
831
832        // With whitespace
833        assert_eq!(parse_operator(" & "), Ok(("", Operator::BitwiseAnd)));
834        assert_eq!(parse_operator("  &  "), Ok(("", Operator::BitwiseAnd)));
835        assert_eq!(parse_operator("\t&\t"), Ok(("", Operator::BitwiseAnd)));
836    }
837
838    #[test]
839    fn test_parse_operator_with_remaining_input() {
840        // Should parse operator and leave remaining input
841        assert_eq!(parse_operator("= 123"), Ok(("123", Operator::Equal)));
842        assert_eq!(
843            parse_operator("!= value"),
844            Ok(("value", Operator::NotEqual))
845        );
846        assert_eq!(parse_operator("& 0xFF"), Ok(("0xFF", Operator::BitwiseAnd)));
847        assert_eq!(
848            parse_operator("== \"string\""),
849            Ok(("\"string\"", Operator::Equal))
850        );
851        assert_eq!(parse_operator("<> test"), Ok(("test", Operator::NotEqual)));
852    }
853
854    #[test]
855    fn test_parse_operator_precedence() {
856        // Test that longer operators are matched first
857        // This ensures "==" is parsed as Equal, not "=" followed by "="
858        assert_eq!(parse_operator("=="), Ok(("", Operator::Equal)));
859        assert_eq!(parse_operator("== extra"), Ok(("extra", Operator::Equal)));
860
861        // Test that "!=" is parsed correctly, not as "!" followed by "="
862        assert_eq!(parse_operator("!="), Ok(("", Operator::NotEqual)));
863        assert_eq!(
864            parse_operator("!= extra"),
865            Ok(("extra", Operator::NotEqual))
866        );
867
868        // Test that "<>" is parsed correctly, not as "<" followed by ">"
869        assert_eq!(parse_operator("<>"), Ok(("", Operator::NotEqual)));
870        assert_eq!(
871            parse_operator("<> extra"),
872            Ok(("extra", Operator::NotEqual))
873        );
874
875        // Test that "<=" is parsed as LessEqual, not "<" followed by "="
876        assert_eq!(parse_operator("<="), Ok(("", Operator::LessEqual)));
877        assert_eq!(
878            parse_operator("<= extra"),
879            Ok(("extra", Operator::LessEqual))
880        );
881
882        // Test that ">=" is parsed as GreaterEqual, not ">" followed by "="
883        assert_eq!(parse_operator(">="), Ok(("", Operator::GreaterEqual)));
884        assert_eq!(
885            parse_operator(">= extra"),
886            Ok(("extra", Operator::GreaterEqual))
887        );
888    }
889
890    #[test]
891    fn test_parse_operator_invalid_input() {
892        // Should fail on invalid operators
893        assert!(parse_operator("").is_err());
894        assert!(parse_operator("abc").is_err());
895        assert!(parse_operator("123").is_err());
896        assert!(parse_operator("!").is_err());
897        assert!(parse_operator("===").is_err()); // Too many equals
898        assert!(parse_operator("&&").is_err()); // Double ampersand not supported
899    }
900
901    #[test]
902    fn test_parse_operator_edge_cases() {
903        // Test operators at start of various contexts - multispace0 consumes all whitespace
904        assert_eq!(parse_operator("=\n"), Ok(("", Operator::Equal)));
905        assert_eq!(parse_operator("!=\r\n"), Ok(("", Operator::NotEqual)));
906        assert_eq!(parse_operator("&\t\t"), Ok(("", Operator::BitwiseAnd)));
907
908        // Test with mixed whitespace
909        assert_eq!(parse_operator(" \t = \t "), Ok(("", Operator::Equal)));
910        assert_eq!(parse_operator("\t != \t"), Ok(("", Operator::NotEqual)));
911        assert_eq!(parse_operator(" \t& \t "), Ok(("", Operator::BitwiseAnd)));
912    }
913
914    #[test]
915    fn test_parse_operator_common_magic_file_patterns() {
916        // Test patterns commonly found in magic files
917        assert_eq!(
918            parse_operator("= 0x7f454c46"),
919            Ok(("0x7f454c46", Operator::Equal))
920        );
921        assert_eq!(parse_operator("!= 0"), Ok(("0", Operator::NotEqual)));
922        assert_eq!(
923            parse_operator("& 0xFF00"),
924            Ok(("0xFF00", Operator::BitwiseAnd))
925        );
926        assert_eq!(
927            parse_operator("== \"ELF\""),
928            Ok(("\"ELF\"", Operator::Equal))
929        );
930        assert_eq!(parse_operator("<> \"\""), Ok(("\"\"", Operator::NotEqual)));
931
932        // Test with various spacing patterns found in real magic files
933        assert_eq!(
934            parse_operator("=\t0x504b0304"),
935            Ok(("0x504b0304", Operator::Equal))
936        );
937        assert_eq!(parse_operator("!=  0"), Ok(("0", Operator::NotEqual)));
938        assert_eq!(
939            parse_operator("&   0xFFFF"),
940            Ok(("0xFFFF", Operator::BitwiseAnd))
941        );
942    }
943
944    #[test]
945    fn test_parse_operator_all_variants() {
946        // Ensure all operator variants are tested
947        let test_cases = [
948            ("=", Operator::Equal),
949            ("==", Operator::Equal),
950            ("!=", Operator::NotEqual),
951            ("<>", Operator::NotEqual),
952            ("<", Operator::LessThan),
953            (">", Operator::GreaterThan),
954            ("<=", Operator::LessEqual),
955            (">=", Operator::GreaterEqual),
956            ("&", Operator::BitwiseAnd),
957        ];
958
959        for (input, expected) in test_cases {
960            assert_eq!(
961                parse_operator(input),
962                Ok(("", expected)),
963                "Failed to parse operator: '{input}'"
964            );
965        }
966    }
967
968    #[test]
969    fn test_parse_operator_less_than() {
970        // Basic less-than
971        assert_eq!(parse_operator("<"), Ok(("", Operator::LessThan)));
972
973        // With whitespace
974        assert_eq!(parse_operator(" < "), Ok(("", Operator::LessThan)));
975        assert_eq!(parse_operator("  <  "), Ok(("", Operator::LessThan)));
976        assert_eq!(parse_operator("\t<\t"), Ok(("", Operator::LessThan)));
977
978        // With remaining input
979        assert_eq!(parse_operator("< 42"), Ok(("42", Operator::LessThan)));
980    }
981
982    #[test]
983    fn test_parse_operator_greater_than() {
984        // Basic greater-than
985        assert_eq!(parse_operator(">"), Ok(("", Operator::GreaterThan)));
986
987        // With whitespace
988        assert_eq!(parse_operator(" > "), Ok(("", Operator::GreaterThan)));
989        assert_eq!(parse_operator("  >  "), Ok(("", Operator::GreaterThan)));
990        assert_eq!(parse_operator("\t>\t"), Ok(("", Operator::GreaterThan)));
991
992        // With remaining input
993        assert_eq!(parse_operator("> 42"), Ok(("42", Operator::GreaterThan)));
994    }
995
996    #[test]
997    fn test_parse_operator_less_equal() {
998        // Basic less-or-equal
999        assert_eq!(parse_operator("<="), Ok(("", Operator::LessEqual)));
1000
1001        // With whitespace
1002        assert_eq!(parse_operator(" <= "), Ok(("", Operator::LessEqual)));
1003        assert_eq!(parse_operator("  <=  "), Ok(("", Operator::LessEqual)));
1004        assert_eq!(parse_operator("\t<=\t"), Ok(("", Operator::LessEqual)));
1005
1006        // With remaining input
1007        assert_eq!(parse_operator("<= 42"), Ok(("42", Operator::LessEqual)));
1008    }
1009
1010    #[test]
1011    fn test_parse_operator_greater_equal() {
1012        // Basic greater-or-equal
1013        assert_eq!(parse_operator(">="), Ok(("", Operator::GreaterEqual)));
1014
1015        // With whitespace
1016        assert_eq!(parse_operator(" >= "), Ok(("", Operator::GreaterEqual)));
1017        assert_eq!(parse_operator("  >=  "), Ok(("", Operator::GreaterEqual)));
1018        assert_eq!(parse_operator("\t>=\t"), Ok(("", Operator::GreaterEqual)));
1019
1020        // With remaining input
1021        assert_eq!(parse_operator(">= 42"), Ok(("42", Operator::GreaterEqual)));
1022    }
1023
1024    #[test]
1025    fn test_parse_operator_comparison_disambiguation() {
1026        // <> still parses as NotEqual
1027        assert_eq!(parse_operator("<>"), Ok(("", Operator::NotEqual)));
1028
1029        // <= parses as LessEqual, not LessThan with "=" remaining
1030        assert_eq!(parse_operator("<="), Ok(("", Operator::LessEqual)));
1031
1032        // >= parses as GreaterEqual, not GreaterThan with "=" remaining
1033        assert_eq!(parse_operator(">="), Ok(("", Operator::GreaterEqual)));
1034
1035        // "< >" (with space) parses as LessThan with "> " remaining
1036        assert_eq!(parse_operator("< >"), Ok((">", Operator::LessThan)));
1037
1038        // "> =" (with space) parses as GreaterThan with "= " remaining
1039        assert_eq!(parse_operator("> ="), Ok(("=", Operator::GreaterThan)));
1040    }
1041
1042    // Value parsing tests
1043    #[test]
1044    fn test_parse_hex_bytes_with_backslash_x() {
1045        // Single hex byte with \x prefix
1046        assert_eq!(parse_hex_bytes("\\x7f"), Ok(("", vec![0x7f])));
1047        assert_eq!(parse_hex_bytes("\\x45"), Ok(("", vec![0x45])));
1048        assert_eq!(parse_hex_bytes("\\x00"), Ok(("", vec![0x00])));
1049        assert_eq!(parse_hex_bytes("\\xFF"), Ok(("", vec![0xFF])));
1050
1051        // Multiple hex bytes with \x prefix
1052        assert_eq!(
1053            parse_hex_bytes("\\x7f\\x45\\x4c\\x46"),
1054            Ok(("", vec![0x7f, 0x45, 0x4c, 0x46]))
1055        );
1056        assert_eq!(
1057            parse_hex_bytes("\\x50\\x4b\\x03\\x04"),
1058            Ok(("", vec![0x50, 0x4b, 0x03, 0x04]))
1059        );
1060    }
1061
1062    #[test]
1063    fn test_parse_hex_bytes_without_prefix() {
1064        // Single hex byte without prefix (only works if it contains hex letters)
1065        assert_eq!(parse_hex_bytes("7f"), Ok(("", vec![0x7f])));
1066        assert_eq!(
1067            parse_hex_bytes("45"),
1068            Err(nom::Err::Error(NomError::new(
1069                "45",
1070                nom::error::ErrorKind::Tag
1071            )))
1072        ); // No hex letters
1073        assert_eq!(parse_hex_bytes("ab"), Ok(("", vec![0xab])));
1074        assert_eq!(parse_hex_bytes("FF"), Ok(("", vec![0xFF])));
1075
1076        // Multiple hex bytes without prefix
1077        assert_eq!(
1078            parse_hex_bytes("7f454c46"),
1079            Ok(("", vec![0x7f, 0x45, 0x4c, 0x46]))
1080        );
1081        assert_eq!(
1082            parse_hex_bytes("504b0304"),
1083            Ok(("", vec![0x50, 0x4b, 0x03, 0x04]))
1084        );
1085    }
1086
1087    #[test]
1088    fn test_parse_hex_bytes_mixed_case() {
1089        // Test mixed case hex digits
1090        assert_eq!(parse_hex_bytes("aB"), Ok(("", vec![0xab])));
1091        assert_eq!(parse_hex_bytes("Cd"), Ok(("", vec![0xcd])));
1092        assert_eq!(parse_hex_bytes("\\xEf"), Ok(("", vec![0xef])));
1093        assert_eq!(parse_hex_bytes("\\x1A"), Ok(("", vec![0x1a])));
1094    }
1095
1096    #[test]
1097    fn test_parse_hex_bytes_empty() {
1098        // Empty input should return error (no hex bytes to parse)
1099        assert_eq!(
1100            parse_hex_bytes(""),
1101            Err(nom::Err::Error(NomError::new(
1102                "",
1103                nom::error::ErrorKind::Tag
1104            )))
1105        );
1106    }
1107
1108    #[test]
1109    fn test_parse_hex_bytes_with_remaining_input() {
1110        // Should parse hex bytes and leave remaining input
1111        assert_eq!(
1112            parse_hex_bytes("7f45 rest"),
1113            Ok((" rest", vec![0x7f, 0x45]))
1114        );
1115        assert_eq!(
1116            parse_hex_bytes("\\x50\\x4b next"),
1117            Ok((" next", vec![0x50, 0x4b]))
1118        );
1119        assert_eq!(parse_hex_bytes("ab\""), Ok(("\"", vec![0xab])));
1120    }
1121
1122    #[test]
1123    fn test_parse_escape_sequence() {
1124        // Standard escape sequences
1125        assert_eq!(parse_escape_sequence("\\n"), Ok(("", '\n')));
1126        assert_eq!(parse_escape_sequence("\\r"), Ok(("", '\r')));
1127        assert_eq!(parse_escape_sequence("\\t"), Ok(("", '\t')));
1128        assert_eq!(parse_escape_sequence("\\\\"), Ok(("", '\\')));
1129        assert_eq!(parse_escape_sequence("\\\""), Ok(("", '"')));
1130        assert_eq!(parse_escape_sequence("\\'"), Ok(("", '\'')));
1131        assert_eq!(parse_escape_sequence("\\0"), Ok(("", '\0')));
1132    }
1133
1134    #[test]
1135    fn test_parse_escape_sequence_with_remaining() {
1136        // Should parse escape and leave remaining input
1137        assert_eq!(parse_escape_sequence("\\n rest"), Ok((" rest", '\n')));
1138        assert_eq!(parse_escape_sequence("\\t\""), Ok(("\"", '\t')));
1139    }
1140
1141    #[test]
1142    fn test_parse_escape_sequence_invalid() {
1143        // Should fail on invalid escape sequences
1144        assert!(parse_escape_sequence("n").is_err()); // Missing backslash
1145        assert!(parse_escape_sequence("\\").is_err()); // Incomplete escape
1146        assert!(parse_escape_sequence("").is_err()); // Empty input
1147    }
1148
1149    #[test]
1150    fn test_parse_quoted_string_simple() {
1151        // Simple quoted strings
1152        assert_eq!(
1153            parse_quoted_string("\"hello\""),
1154            Ok(("", "hello".to_string()))
1155        );
1156        assert_eq!(
1157            parse_quoted_string("\"world\""),
1158            Ok(("", "world".to_string()))
1159        );
1160        assert_eq!(parse_quoted_string("\"\""), Ok(("", String::new())));
1161    }
1162
1163    #[test]
1164    fn test_parse_quoted_string_with_escapes() {
1165        // Strings with escape sequences
1166        assert_eq!(
1167            parse_quoted_string("\"Hello\\nWorld\""),
1168            Ok(("", "Hello\nWorld".to_string()))
1169        );
1170        assert_eq!(
1171            parse_quoted_string("\"Tab\\tSeparated\""),
1172            Ok(("", "Tab\tSeparated".to_string()))
1173        );
1174        assert_eq!(
1175            parse_quoted_string("\"Quote: \\\"text\\\"\""),
1176            Ok(("", "Quote: \"text\"".to_string()))
1177        );
1178        assert_eq!(
1179            parse_quoted_string("\"Backslash: \\\\\""),
1180            Ok(("", "Backslash: \\".to_string()))
1181        );
1182        assert_eq!(
1183            parse_quoted_string("\"Null\\0terminated\""),
1184            Ok(("", "Null\0terminated".to_string()))
1185        );
1186    }
1187
1188    #[test]
1189    fn test_parse_quoted_string_with_whitespace() {
1190        // Strings with leading/trailing whitespace
1191        assert_eq!(
1192            parse_quoted_string(" \"hello\" "),
1193            Ok(("", "hello".to_string()))
1194        );
1195        assert_eq!(
1196            parse_quoted_string("\t\"world\"\t"),
1197            Ok(("", "world".to_string()))
1198        );
1199        assert_eq!(
1200            parse_quoted_string("  \"test\"  "),
1201            Ok(("", "test".to_string()))
1202        );
1203    }
1204
1205    #[test]
1206    fn test_parse_quoted_string_with_remaining_input() {
1207        // Should parse string and leave remaining input
1208        assert_eq!(
1209            parse_quoted_string("\"hello\" world"),
1210            Ok(("world", "hello".to_string()))
1211        );
1212        assert_eq!(
1213            parse_quoted_string("\"test\" = 123"),
1214            Ok(("= 123", "test".to_string()))
1215        );
1216    }
1217
1218    #[test]
1219    fn test_parse_quoted_string_invalid() {
1220        // Should fail on invalid quoted strings
1221        assert!(parse_quoted_string("hello").is_err()); // No quotes
1222        assert!(parse_quoted_string("\"hello").is_err()); // Missing closing quote
1223        assert!(parse_quoted_string("hello\"").is_err()); // Missing opening quote
1224        assert!(parse_quoted_string("").is_err()); // Empty input
1225    }
1226
1227    #[test]
1228    fn test_parse_numeric_value_positive() {
1229        // Positive integers
1230        assert_eq!(parse_numeric_value("0"), Ok(("", Value::Uint(0))));
1231        assert_eq!(parse_numeric_value("123"), Ok(("", Value::Uint(123))));
1232        assert_eq!(parse_numeric_value("999"), Ok(("", Value::Uint(999))));
1233
1234        // Positive hex values
1235        assert_eq!(parse_numeric_value("0x0"), Ok(("", Value::Uint(0))));
1236        assert_eq!(parse_numeric_value("0x10"), Ok(("", Value::Uint(16))));
1237        assert_eq!(parse_numeric_value("0xFF"), Ok(("", Value::Uint(255))));
1238        assert_eq!(parse_numeric_value("0xabc"), Ok(("", Value::Uint(2748))));
1239    }
1240
1241    #[test]
1242    fn test_parse_numeric_value_negative() {
1243        // Negative integers
1244        assert_eq!(parse_numeric_value("-1"), Ok(("", Value::Int(-1))));
1245        assert_eq!(parse_numeric_value("-123"), Ok(("", Value::Int(-123))));
1246        assert_eq!(parse_numeric_value("-999"), Ok(("", Value::Int(-999))));
1247
1248        // Negative hex values
1249        assert_eq!(parse_numeric_value("-0x1"), Ok(("", Value::Int(-1))));
1250        assert_eq!(parse_numeric_value("-0x10"), Ok(("", Value::Int(-16))));
1251        assert_eq!(parse_numeric_value("-0xFF"), Ok(("", Value::Int(-255))));
1252        assert_eq!(parse_numeric_value("-0xabc"), Ok(("", Value::Int(-2748))));
1253    }
1254
1255    #[test]
1256    fn test_parse_numeric_value_with_whitespace() {
1257        // With leading/trailing whitespace
1258        assert_eq!(parse_numeric_value(" 123 "), Ok(("", Value::Uint(123))));
1259        assert_eq!(parse_numeric_value("\t-456\t"), Ok(("", Value::Int(-456))));
1260        assert_eq!(parse_numeric_value("  0xFF  "), Ok(("", Value::Uint(255))));
1261    }
1262
1263    #[test]
1264    fn test_parse_numeric_value_with_remaining_input() {
1265        // Should parse number and leave remaining input (numeric parser consumes trailing whitespace)
1266        assert_eq!(
1267            parse_numeric_value("123 rest"),
1268            Ok(("rest", Value::Uint(123)))
1269        );
1270        assert_eq!(
1271            parse_numeric_value("-456 more"),
1272            Ok(("more", Value::Int(-456)))
1273        );
1274        assert_eq!(parse_numeric_value("0xFF)"), Ok((")", Value::Uint(255))));
1275    }
1276
1277    #[test]
1278    fn test_parse_value_string_literals() {
1279        // String value parsing
1280        assert_eq!(
1281            parse_value("\"hello\""),
1282            Ok(("", Value::String("hello".to_string())))
1283        );
1284        assert_eq!(
1285            parse_value("\"ELF\""),
1286            Ok(("", Value::String("ELF".to_string())))
1287        );
1288        assert_eq!(parse_value("\"\""), Ok(("", Value::String(String::new()))));
1289
1290        // String with escape sequences
1291        assert_eq!(
1292            parse_value("\"Line1\\nLine2\""),
1293            Ok(("", Value::String("Line1\nLine2".to_string())))
1294        );
1295        assert_eq!(
1296            parse_value("\"Tab\\tSeparated\""),
1297            Ok(("", Value::String("Tab\tSeparated".to_string())))
1298        );
1299        assert_eq!(
1300            parse_value("\"Null\\0Term\""),
1301            Ok(("", Value::String("Null\0Term".to_string())))
1302        );
1303    }
1304
1305    #[test]
1306    fn test_parse_value_numeric_literals() {
1307        // Positive integers
1308        assert_eq!(parse_value("0"), Ok(("", Value::Uint(0))));
1309        assert_eq!(parse_value("123"), Ok(("", Value::Uint(123))));
1310        assert_eq!(parse_value("999"), Ok(("", Value::Uint(999))));
1311
1312        // Negative integers
1313        assert_eq!(parse_value("-1"), Ok(("", Value::Int(-1))));
1314        assert_eq!(parse_value("-123"), Ok(("", Value::Int(-123))));
1315        assert_eq!(parse_value("-999"), Ok(("", Value::Int(-999))));
1316
1317        // Hexadecimal values
1318        assert_eq!(parse_value("0x0"), Ok(("", Value::Uint(0))));
1319        assert_eq!(parse_value("0x10"), Ok(("", Value::Uint(16))));
1320        assert_eq!(parse_value("0xFF"), Ok(("", Value::Uint(255))));
1321        assert_eq!(parse_value("-0xFF"), Ok(("", Value::Int(-255))));
1322    }
1323
1324    #[test]
1325    fn test_parse_value_hex_byte_sequences() {
1326        // Hex bytes with \x prefix
1327        assert_eq!(parse_value("\\x7f"), Ok(("", Value::Bytes(vec![0x7f]))));
1328        assert_eq!(
1329            parse_value("\\x7f\\x45\\x4c\\x46"),
1330            Ok(("", Value::Bytes(vec![0x7f, 0x45, 0x4c, 0x46])))
1331        );
1332
1333        // Hex bytes without prefix
1334        assert_eq!(parse_value("7f"), Ok(("", Value::Bytes(vec![0x7f]))));
1335        assert_eq!(
1336            parse_value("7f454c46"),
1337            Ok(("", Value::Bytes(vec![0x7f, 0x45, 0x4c, 0x46])))
1338        );
1339
1340        // Mixed case
1341        assert_eq!(parse_value("aB"), Ok(("", Value::Bytes(vec![0xab]))));
1342        assert_eq!(parse_value("\\xCd"), Ok(("", Value::Bytes(vec![0xcd]))));
1343    }
1344
1345    #[test]
1346    fn test_parse_value_with_whitespace() {
1347        // All value types with whitespace - trailing whitespace is consumed by individual parsers
1348        assert_eq!(
1349            parse_value(" \"hello\" "),
1350            Ok(("", Value::String("hello".to_string())))
1351        );
1352        assert_eq!(parse_value("  123  "), Ok(("", Value::Uint(123))));
1353        assert_eq!(parse_value("\t-456\t"), Ok(("", Value::Int(-456))));
1354        // Hex bytes don't consume trailing whitespace by themselves
1355        assert_eq!(
1356            parse_value("  \\x7f\\x45  "),
1357            Ok(("  ", Value::Bytes(vec![0x7f, 0x45])))
1358        );
1359    }
1360
1361    #[test]
1362    fn test_parse_value_with_remaining_input() {
1363        // Should parse value and leave remaining input
1364        // Note: Individual parsers handle whitespace differently
1365        assert_eq!(
1366            parse_value("\"hello\" world"),
1367            Ok(("world", Value::String("hello".to_string())))
1368        );
1369        assert_eq!(
1370            parse_value("123 rest"),
1371            Ok(("rest", Value::Uint(123))) // Numeric parser consumes trailing space
1372        );
1373        assert_eq!(
1374            parse_value("-456 more"),
1375            Ok(("more", Value::Int(-456))) // Numeric parser consumes trailing space
1376        );
1377        assert_eq!(
1378            parse_value("\\x7f\\x45 next"),
1379            Ok((" next", Value::Bytes(vec![0x7f, 0x45]))) // Hex bytes don't consume trailing space
1380        );
1381    }
1382
1383    #[test]
1384    fn test_parse_value_edge_cases() {
1385        // Zero values in different formats
1386        assert_eq!(parse_value("0"), Ok(("", Value::Uint(0))));
1387        assert_eq!(parse_value("-0"), Ok(("", Value::Uint(0))));
1388        assert_eq!(parse_value("0x0"), Ok(("", Value::Uint(0))));
1389        assert_eq!(parse_value("-0x0"), Ok(("", Value::Uint(0))));
1390
1391        // Large values
1392        assert_eq!(
1393            parse_value("2147483647"),
1394            Ok(("", Value::Uint(2_147_483_647)))
1395        );
1396        assert_eq!(
1397            parse_value("-2147483648"),
1398            Ok(("", Value::Int(-2_147_483_648)))
1399        );
1400        assert_eq!(
1401            parse_value("0x7FFFFFFF"),
1402            Ok(("", Value::Uint(2_147_483_647)))
1403        );
1404
1405        // Empty input should fail
1406        assert!(parse_value("").is_err());
1407    }
1408
1409    #[test]
1410    fn test_parse_value_invalid_input() {
1411        // Should fail on completely invalid input
1412        assert!(parse_value("xyz").is_err()); // Not a valid value format
1413        assert!(parse_value("0xGG").is_err()); // Invalid hex digits
1414        assert!(parse_value("\"unclosed").is_err()); // Unclosed string
1415        assert!(parse_value("--123").is_err()); // Invalid number format
1416    }
1417
1418    #[test]
1419    fn test_parse_value_common_magic_file_patterns() {
1420        // Test patterns commonly found in magic files
1421        assert_eq!(
1422            parse_value("0x7f454c46"),
1423            Ok(("", Value::Uint(0x7f45_4c46)))
1424        );
1425        assert_eq!(
1426            parse_value("\"ELF\""),
1427            Ok(("", Value::String("ELF".to_string())))
1428        );
1429        assert_eq!(
1430            parse_value("\\x50\\x4b\\x03\\x04"),
1431            Ok(("", Value::Bytes(vec![0x50, 0x4b, 0x03, 0x04])))
1432        );
1433        assert_eq!(
1434            parse_value("\"\\377ELF\""),
1435            Ok(("", Value::String("\u{00ff}ELF".to_string())))
1436        );
1437        assert_eq!(parse_value("0"), Ok(("", Value::Uint(0))));
1438        assert_eq!(parse_value("-1"), Ok(("", Value::Int(-1))));
1439    }
1440
1441    #[test]
1442    fn test_parse_value_type_precedence() {
1443        // Test that parsing precedence works correctly
1444        // Quoted strings should be parsed as strings, not hex bytes
1445        assert_eq!(
1446            parse_value("\"7f\""),
1447            Ok(("", Value::String("7f".to_string())))
1448        );
1449
1450        // Hex patterns should be parsed as bytes when not quoted
1451        assert_eq!(parse_value("7f"), Ok(("", Value::Bytes(vec![0x7f]))));
1452
1453        // Numbers should be parsed as numbers when they don't look like hex bytes
1454        assert_eq!(parse_value("123"), Ok(("", Value::Uint(123))));
1455        assert_eq!(parse_value("-123"), Ok(("", Value::Int(-123))));
1456
1457        // Hex numbers with 0x prefix should be parsed as numbers
1458        assert_eq!(parse_value("0x123"), Ok(("", Value::Uint(0x123))));
1459    }
1460
1461    #[test]
1462    fn test_parse_value_boundary_conditions() {
1463        // Test boundary conditions for different value types
1464
1465        // Single character strings
1466        assert_eq!(
1467            parse_value("\"a\""),
1468            Ok(("", Value::String("a".to_string())))
1469        );
1470        assert_eq!(
1471            parse_value("\"1\""),
1472            Ok(("", Value::String("1".to_string())))
1473        );
1474
1475        // Single hex byte
1476        assert_eq!(parse_value("ab"), Ok(("", Value::Bytes(vec![0xab]))));
1477        assert_eq!(parse_value("\\x00"), Ok(("", Value::Bytes(vec![0x00]))));
1478
1479        // Minimum and maximum values
1480        assert_eq!(parse_value("1"), Ok(("", Value::Uint(1))));
1481        assert_eq!(parse_value("-1"), Ok(("", Value::Int(-1))));
1482
1483        // Powers of 2 (common in binary formats)
1484        assert_eq!(parse_value("256"), Ok(("", Value::Uint(256))));
1485        assert_eq!(parse_value("0x100"), Ok(("", Value::Uint(256))));
1486        assert_eq!(parse_value("1024"), Ok(("", Value::Uint(1024))));
1487        assert_eq!(parse_value("0x400"), Ok(("", Value::Uint(1024))));
1488    }
1489
1490    #[test]
1491    fn test_parse_operator_whitespace_handling() {
1492        // Test comprehensive whitespace handling
1493        let operators = ["=", "==", "!=", "<>", "&"];
1494        let whitespace_patterns = [
1495            "",     // No whitespace
1496            " ",    // Single space
1497            "  ",   // Multiple spaces
1498            "\t",   // Tab
1499            "\t\t", // Multiple tabs
1500            " \t",  // Mixed space and tab
1501            "\t ",  // Mixed tab and space
1502        ];
1503
1504        for op in operators {
1505            for leading_ws in whitespace_patterns {
1506                for trailing_ws in whitespace_patterns {
1507                    let input = format!("{leading_ws}{op}{trailing_ws}");
1508                    let result = parse_operator(&input);
1509
1510                    assert!(
1511                        result.is_ok(),
1512                        "Failed to parse operator with whitespace: '{input}'"
1513                    );
1514
1515                    let (remaining, _) = result.unwrap();
1516                    assert_eq!(remaining, "", "Unexpected remaining input for: '{input}'");
1517                }
1518            }
1519        }
1520    }
1521}
1522/// Parse a type specification (byte, short, long, string, etc.)
1523///
1524/// Supports various type formats found in magic files:
1525/// - `byte` - single byte
1526/// - `short` - 16-bit integer (native endian)
1527/// - `leshort` - 16-bit little-endian integer
1528/// - `beshort` - 16-bit big-endian integer
1529/// - `long` - 32-bit integer (native endian)
1530/// - `lelong` - 32-bit little-endian integer
1531/// - `belong` - 32-bit big-endian integer
1532/// - `string` - null-terminated string
1533///
1534/// # Examples
1535///
1536/// ```
1537/// use libmagic_rs::parser::grammar::parse_type;
1538/// use libmagic_rs::parser::ast::{TypeKind, Endianness};
1539///
1540/// assert_eq!(parse_type("byte"), Ok(("", TypeKind::Byte { signed: true })));
1541/// assert_eq!(parse_type("leshort"), Ok(("", TypeKind::Short { endian: Endianness::Little, signed: true })));
1542/// assert_eq!(parse_type("string"), Ok(("", TypeKind::String { max_length: None })));
1543/// ```
1544/// Parse a type specification with optional attached operator
1545/// Parse a type specification followed by an optional operator
1546///
1547/// # Errors
1548/// Returns a nom parsing error if the input doesn't match the expected format
1549pub fn parse_type_and_operator(input: &str) -> IResult<&str, (TypeKind, Option<Operator>)> {
1550    let (input, _) = multispace0(input)?;
1551
1552    let (input, type_name) = alt((
1553        // Unsigned variants (longer names first to avoid partial matches)
1554        tag("ubelong"),
1555        tag("ulelong"),
1556        tag("ubeshort"),
1557        tag("uleshort"),
1558        tag("ulong"),
1559        tag("ushort"),
1560        tag("ubyte"),
1561        // Signed variants (default in libmagic)
1562        tag("lelong"),
1563        tag("belong"),
1564        tag("leshort"),
1565        tag("beshort"),
1566        tag("long"),
1567        tag("short"),
1568        tag("byte"),
1569        tag("string"),
1570    ))
1571    .parse(input)?;
1572
1573    // Check for attached operator with mask (like &0xf0000000)
1574    let (input, attached_op) = opt(alt((
1575        // Parse &mask format
1576        map(pair(char('&'), parse_number), |(_, mask)| {
1577            Operator::BitwiseAndMask(mask.unsigned_abs())
1578        }),
1579        // Parse standalone & (for backward compatibility)
1580        map(char('&'), |_| Operator::BitwiseAnd),
1581        // Add more operators as needed
1582    )))
1583    .parse(input)?;
1584
1585    let (input, _) = multispace0(input)?;
1586
1587    let type_kind = match type_name {
1588        "byte" => TypeKind::Byte { signed: true },
1589        "ubyte" => TypeKind::Byte { signed: false },
1590        "short" => TypeKind::Short {
1591            endian: Endianness::Native,
1592            signed: true,
1593        },
1594        "ushort" => TypeKind::Short {
1595            endian: Endianness::Native,
1596            signed: false,
1597        },
1598        "leshort" => TypeKind::Short {
1599            endian: Endianness::Little,
1600            signed: true,
1601        },
1602        "uleshort" => TypeKind::Short {
1603            endian: Endianness::Little,
1604            signed: false,
1605        },
1606        "beshort" => TypeKind::Short {
1607            endian: Endianness::Big,
1608            signed: true,
1609        },
1610        "ubeshort" => TypeKind::Short {
1611            endian: Endianness::Big,
1612            signed: false,
1613        },
1614        "long" => TypeKind::Long {
1615            endian: Endianness::Native,
1616            signed: true,
1617        },
1618        "ulong" => TypeKind::Long {
1619            endian: Endianness::Native,
1620            signed: false,
1621        },
1622        "lelong" => TypeKind::Long {
1623            endian: Endianness::Little,
1624            signed: true,
1625        },
1626        "ulelong" => TypeKind::Long {
1627            endian: Endianness::Little,
1628            signed: false,
1629        },
1630        "belong" => TypeKind::Long {
1631            endian: Endianness::Big,
1632            signed: true,
1633        },
1634        "ubelong" => TypeKind::Long {
1635            endian: Endianness::Big,
1636            signed: false,
1637        },
1638        "string" => TypeKind::String { max_length: None },
1639        _ => unreachable!("Parser should only match known types"),
1640    };
1641
1642    Ok((input, (type_kind, attached_op)))
1643}
1644
1645/// Parse a type specification (backward compatibility)
1646/// Parse a type specification (byte, short, long, string, etc.)
1647///
1648/// # Errors
1649/// Returns a nom parsing error if the input doesn't match any known type
1650pub fn parse_type(input: &str) -> IResult<&str, TypeKind> {
1651    let (input, (type_kind, _)) = parse_type_and_operator(input)?;
1652    Ok((input, type_kind))
1653}
1654
1655/// Parse the indentation level and offset for magic rules
1656///
1657/// Handles both absolute offsets and hierarchical child rules with `>` prefix.
1658/// Child rules can be nested multiple levels deep with multiple `>` characters.
1659///
1660/// # Examples
1661///
1662/// ```
1663/// use libmagic_rs::parser::grammar::parse_rule_offset;
1664/// use libmagic_rs::parser::ast::OffsetSpec;
1665///
1666/// // Absolute offset
1667/// assert_eq!(parse_rule_offset("0"), Ok(("", (0, OffsetSpec::Absolute(0)))));
1668/// assert_eq!(parse_rule_offset("16"), Ok(("", (0, OffsetSpec::Absolute(16)))));
1669///
1670/// // Child rule (level 1)
1671/// assert_eq!(parse_rule_offset(">4"), Ok(("", (1, OffsetSpec::Absolute(4)))));
1672///
1673/// // Nested child rule (level 2)
1674/// assert_eq!(parse_rule_offset(">>8"), Ok(("", (2, OffsetSpec::Absolute(8)))));
1675/// ```
1676/// Parse rule offset with hierarchy level (> prefixes) and offset specification
1677///
1678/// # Errors
1679/// Returns a nom parsing error if the input doesn't match the expected offset format
1680pub fn parse_rule_offset(input: &str) -> IResult<&str, (u32, OffsetSpec)> {
1681    let (input, _) = multispace0(input)?;
1682
1683    // Count the number of '>' characters for nesting level
1684    let (input, level_chars) = many0(char('>')).parse(input)?;
1685    let level = u32::try_from(level_chars.len()).unwrap_or(0);
1686
1687    // Parse the offset after the '>' characters
1688    let (input, offset_spec) = parse_offset(input)?;
1689
1690    Ok((input, (level, offset_spec)))
1691}
1692
1693/// Parse the message part of a magic rule
1694///
1695/// The message is everything after the value until the end of the line.
1696/// It may contain format specifiers and can be empty.
1697///
1698/// # Examples
1699///
1700/// ```
1701/// use libmagic_rs::parser::grammar::parse_message;
1702///
1703/// assert_eq!(parse_message("ELF executable"), Ok(("", "ELF executable".to_string())));
1704/// assert_eq!(parse_message(""), Ok(("", "".to_string())));
1705/// assert_eq!(parse_message("  \tPDF document  "), Ok(("", "PDF document".to_string())));
1706/// ```
1707/// Parse the message/description part of a magic rule
1708///
1709/// # Errors
1710/// Returns a nom parsing error if the input cannot be parsed as a message
1711pub fn parse_message(input: &str) -> IResult<&str, String> {
1712    let (input, _) = multispace0(input)?;
1713
1714    // Take everything until end of line, trimming whitespace
1715    // Use take_while instead of take_while1 to handle empty messages
1716    let (input, message_text) = take_while(|c: char| c != '\n' && c != '\r').parse(input)?;
1717    let message = message_text.trim().to_string();
1718
1719    Ok((input, message))
1720}
1721
1722/// Parse a strength directive (`!:strength` line)
1723///
1724/// Parses the `!:strength` directive that modifies rule strength.
1725/// Format: `!:strength [+|-|*|/|=]N` or `!:strength N`
1726///
1727/// # Examples
1728///
1729/// ```
1730/// use libmagic_rs::parser::grammar::parse_strength_directive;
1731/// use libmagic_rs::parser::ast::StrengthModifier;
1732///
1733/// assert_eq!(parse_strength_directive("!:strength +10"), Ok(("", StrengthModifier::Add(10))));
1734/// assert_eq!(parse_strength_directive("!:strength -5"), Ok(("", StrengthModifier::Subtract(5))));
1735/// assert_eq!(parse_strength_directive("!:strength *2"), Ok(("", StrengthModifier::Multiply(2))));
1736/// assert_eq!(parse_strength_directive("!:strength /2"), Ok(("", StrengthModifier::Divide(2))));
1737/// assert_eq!(parse_strength_directive("!:strength =50"), Ok(("", StrengthModifier::Set(50))));
1738/// assert_eq!(parse_strength_directive("!:strength 50"), Ok(("", StrengthModifier::Set(50))));
1739/// ```
1740///
1741/// # Errors
1742///
1743/// Returns a nom parsing error if:
1744/// - Input doesn't start with `!:strength`
1745/// - The modifier value cannot be parsed as a valid integer
1746/// - The operator is invalid
1747pub fn parse_strength_directive(input: &str) -> IResult<&str, StrengthModifier> {
1748    // Helper to safely convert i64 to i32 with clamping to valid strength range.
1749    // This prevents silent truncation to 0 on overflow while keeping values in bounds.
1750    fn clamp_to_i32(n: i64) -> i32 {
1751        // Use i64::from for lossless conversion, then clamp and convert back
1752        let clamped = n.clamp(i64::from(i32::MIN), i64::from(i32::MAX));
1753        // Safe to unwrap: clamped value is guaranteed to be in i32 range
1754        i32::try_from(clamped).unwrap()
1755    }
1756
1757    let (input, _) = multispace0(input)?;
1758    let (input, _) = tag("!:strength")(input)?;
1759    let (input, _) = multispace0(input)?;
1760
1761    // Parse the operator: +, -, *, /, = or bare number (implies =)
1762    let (input, modifier) = alt((
1763        // +N -> Add
1764        map(pair(char('+'), parse_number), |(_, n)| {
1765            StrengthModifier::Add(clamp_to_i32(n))
1766        }),
1767        // -N -> Subtract (note: parse_number handles negative, so we need special handling)
1768        map(pair(char('-'), parse_decimal_number), |(_, n)| {
1769            StrengthModifier::Subtract(clamp_to_i32(n))
1770        }),
1771        // *N -> Multiply
1772        map(pair(char('*'), parse_number), |(_, n)| {
1773            StrengthModifier::Multiply(clamp_to_i32(n))
1774        }),
1775        // /N -> Divide
1776        map(pair(char('/'), parse_number), |(_, n)| {
1777            StrengthModifier::Divide(clamp_to_i32(n))
1778        }),
1779        // =N -> Set
1780        map(pair(char('='), parse_number), |(_, n)| {
1781            StrengthModifier::Set(clamp_to_i32(n))
1782        }),
1783        // Bare number -> Set
1784        map(parse_number, |n| StrengthModifier::Set(clamp_to_i32(n))),
1785    ))
1786    .parse(input)?;
1787
1788    Ok((input, modifier))
1789}
1790
1791/// Check if a line is a strength directive (starts with !:strength)
1792///
1793/// # Examples
1794///
1795/// ```
1796/// use libmagic_rs::parser::grammar::is_strength_directive;
1797///
1798/// assert!(is_strength_directive("!:strength +10"));
1799/// assert!(is_strength_directive("  !:strength -5"));
1800/// assert!(!is_strength_directive("0 byte 1"));
1801/// ```
1802#[must_use]
1803pub fn is_strength_directive(input: &str) -> bool {
1804    input.trim().starts_with("!:strength")
1805}
1806
1807/// Parse a complete magic rule line from text format
1808///
1809/// Parses a complete magic rule in the format:
1810/// `[>...]offset type [operator] value [message]`
1811///
1812/// Where:
1813/// - `>...` indicates child rule nesting level (optional)
1814/// - `offset` is the byte offset to read from
1815/// - `type` is the data type (byte, short, long, string, etc.)
1816/// - `operator` is the comparison operator (=, !=, &) - defaults to = if omitted
1817/// - `value` is the expected value to compare against
1818/// - `message` is the human-readable description (optional)
1819///
1820/// # Examples
1821///
1822/// ```
1823/// use libmagic_rs::parser::grammar::parse_magic_rule;
1824/// use libmagic_rs::parser::ast::{MagicRule, OffsetSpec, TypeKind, Operator, Value};
1825///
1826/// // Basic rule
1827/// let input = "0 string \\x7fELF ELF executable";
1828/// let (_, rule) = parse_magic_rule(input).unwrap();
1829/// assert_eq!(rule.level, 0);
1830/// assert_eq!(rule.message, "ELF executable");
1831///
1832/// // Child rule
1833/// let input = ">4 byte 1 32-bit";
1834/// let (_, rule) = parse_magic_rule(input).unwrap();
1835/// assert_eq!(rule.level, 1);
1836/// assert_eq!(rule.message, "32-bit");
1837/// ```
1838///
1839/// # Errors
1840///
1841/// Returns a nom parsing error if:
1842/// - The offset specification is invalid
1843/// - The type specification is not recognized
1844/// - The operator is invalid (if present)
1845/// - The value cannot be parsed
1846/// - The input format doesn't match the expected magic rule syntax
1847pub fn parse_magic_rule(input: &str) -> IResult<&str, MagicRule> {
1848    let (input, _) = multispace0(input)?;
1849
1850    // Parse the offset with nesting level
1851    let (input, (level, offset)) = parse_rule_offset(input)?;
1852
1853    // Parse the type and any attached operator
1854    let (input, (typ, attached_op)) = parse_type_and_operator(input)?;
1855
1856    // Try to parse a separate operator (optional - use attached operator if present)
1857    let (input, separate_op) = opt(parse_operator).parse(input)?;
1858    let op = attached_op.or(separate_op).unwrap_or(Operator::Equal);
1859
1860    // Parse the value
1861    let (input, value) = parse_value(input)?;
1862
1863    // Parse the message (optional - everything remaining on the line)
1864    let (input, message) = if input.trim().is_empty() {
1865        (input, String::new())
1866    } else {
1867        parse_message(input)?
1868    };
1869
1870    let rule = MagicRule {
1871        offset,
1872        typ,
1873        op,
1874        value,
1875        message,
1876        children: vec![], // Children will be added during hierarchical parsing
1877        level,
1878        strength_modifier: None, // Will be set during directive parsing
1879    };
1880
1881    Ok((input, rule))
1882}
1883
1884/// Parse a comment line (starts with #)
1885///
1886/// Comments in magic files start with '#' and continue to the end of the line.
1887/// This function consumes the entire comment line.
1888///
1889/// # Examples
1890///
1891/// ```
1892/// use libmagic_rs::parser::grammar::parse_comment;
1893///
1894/// assert_eq!(parse_comment("# This is a comment"), Ok(("", "This is a comment".to_string())));
1895/// assert_eq!(parse_comment("#"), Ok(("", "".to_string())));
1896/// ```
1897/// Parse a comment line (starting with #)
1898///
1899/// # Errors
1900/// Returns a nom parsing error if the input is not a valid comment
1901pub fn parse_comment(input: &str) -> IResult<&str, String> {
1902    let (input, _) = multispace0(input)?;
1903    let (input, _) = char('#').parse(input)?;
1904    let (input, comment_text) = take_while(|c: char| c != '\n' && c != '\r').parse(input)?;
1905    let comment = comment_text.trim().to_string();
1906    Ok((input, comment))
1907}
1908
1909/// Check if a line is empty or contains only whitespace
1910///
1911/// # Examples
1912///
1913/// ```
1914/// use libmagic_rs::parser::grammar::is_empty_line;
1915///
1916/// assert!(is_empty_line(""));
1917/// assert!(is_empty_line("   "));
1918/// assert!(is_empty_line("\t\t"));
1919/// assert!(!is_empty_line("0 byte 1"));
1920/// ```
1921#[must_use]
1922pub fn is_empty_line(input: &str) -> bool {
1923    input.trim().is_empty()
1924}
1925
1926/// Check if a line is a comment (starts with #)
1927///
1928/// # Examples
1929///
1930/// ```
1931/// use libmagic_rs::parser::grammar::is_comment_line;
1932///
1933/// assert!(is_comment_line("# This is a comment"));
1934/// assert!(is_comment_line("#"));
1935/// assert!(is_comment_line("  # Indented comment"));
1936/// assert!(!is_comment_line("0 byte 1"));
1937/// ```
1938#[must_use]
1939pub fn is_comment_line(input: &str) -> bool {
1940    input.trim().starts_with('#')
1941}
1942
1943/// Check if a line ends with a continuation character (\)
1944///
1945/// Magic files support line continuation with backslash at the end of lines.
1946///
1947/// # Examples
1948///
1949/// ```
1950/// use libmagic_rs::parser::grammar::has_continuation;
1951///
1952/// assert!(has_continuation("0 string test \\"));
1953/// assert!(has_continuation("message continues \\"));
1954/// assert!(!has_continuation("0 string test"));
1955/// ```
1956#[must_use]
1957pub fn has_continuation(input: &str) -> bool {
1958    input.trim_end().ends_with('\\')
1959}
1960// Tests for new magic rule parsing functions
1961
1962#[test]
1963fn test_parse_type_basic() {
1964    assert_eq!(
1965        parse_type("byte"),
1966        Ok(("", TypeKind::Byte { signed: true }))
1967    );
1968    assert_eq!(
1969        parse_type("short"),
1970        Ok((
1971            "",
1972            TypeKind::Short {
1973                endian: Endianness::Native,
1974                signed: true
1975            }
1976        ))
1977    );
1978    assert_eq!(
1979        parse_type("long"),
1980        Ok((
1981            "",
1982            TypeKind::Long {
1983                endian: Endianness::Native,
1984                signed: true
1985            }
1986        ))
1987    );
1988    assert_eq!(
1989        parse_type("string"),
1990        Ok(("", TypeKind::String { max_length: None }))
1991    );
1992}
1993
1994#[test]
1995fn test_parse_type_endianness() {
1996    assert_eq!(
1997        parse_type("leshort"),
1998        Ok((
1999            "",
2000            TypeKind::Short {
2001                endian: Endianness::Little,
2002                signed: true
2003            }
2004        ))
2005    );
2006    assert_eq!(
2007        parse_type("beshort"),
2008        Ok((
2009            "",
2010            TypeKind::Short {
2011                endian: Endianness::Big,
2012                signed: true
2013            }
2014        ))
2015    );
2016    assert_eq!(
2017        parse_type("lelong"),
2018        Ok((
2019            "",
2020            TypeKind::Long {
2021                endian: Endianness::Little,
2022                signed: true
2023            }
2024        ))
2025    );
2026    assert_eq!(
2027        parse_type("belong"),
2028        Ok((
2029            "",
2030            TypeKind::Long {
2031                endian: Endianness::Big,
2032                signed: true
2033            }
2034        ))
2035    );
2036}
2037
2038#[test]
2039fn test_parse_type_with_whitespace() {
2040    assert_eq!(
2041        parse_type(" byte "),
2042        Ok(("", TypeKind::Byte { signed: true }))
2043    );
2044    assert_eq!(
2045        parse_type("\tstring\t"),
2046        Ok(("", TypeKind::String { max_length: None }))
2047    );
2048    assert_eq!(
2049        parse_type("  lelong  "),
2050        Ok((
2051            "",
2052            TypeKind::Long {
2053                endian: Endianness::Little,
2054                signed: true
2055            }
2056        ))
2057    );
2058}
2059
2060#[test]
2061fn test_parse_type_with_remaining_input() {
2062    assert_eq!(
2063        parse_type("byte ="),
2064        Ok(("=", TypeKind::Byte { signed: true }))
2065    );
2066    assert_eq!(
2067        parse_type("string \\x7f"),
2068        Ok(("\\x7f", TypeKind::String { max_length: None }))
2069    );
2070}
2071
2072#[test]
2073fn test_parse_type_invalid() {
2074    assert!(parse_type("").is_err());
2075    assert!(parse_type("invalid").is_err());
2076    assert!(parse_type("int").is_err());
2077    assert!(parse_type("float").is_err());
2078}
2079
2080#[test]
2081fn test_parse_type_unsigned_variants() {
2082    assert_eq!(
2083        parse_type("ubyte"),
2084        Ok(("", TypeKind::Byte { signed: false }))
2085    );
2086    assert_eq!(
2087        parse_type("ushort"),
2088        Ok((
2089            "",
2090            TypeKind::Short {
2091                endian: Endianness::Native,
2092                signed: false,
2093            }
2094        ))
2095    );
2096    assert_eq!(
2097        parse_type("ubeshort"),
2098        Ok((
2099            "",
2100            TypeKind::Short {
2101                endian: Endianness::Big,
2102                signed: false,
2103            }
2104        ))
2105    );
2106    assert_eq!(
2107        parse_type("uleshort"),
2108        Ok((
2109            "",
2110            TypeKind::Short {
2111                endian: Endianness::Little,
2112                signed: false,
2113            }
2114        ))
2115    );
2116    assert_eq!(
2117        parse_type("ulong"),
2118        Ok((
2119            "",
2120            TypeKind::Long {
2121                endian: Endianness::Native,
2122                signed: false,
2123            }
2124        ))
2125    );
2126    assert_eq!(
2127        parse_type("ubelong"),
2128        Ok((
2129            "",
2130            TypeKind::Long {
2131                endian: Endianness::Big,
2132                signed: false,
2133            }
2134        ))
2135    );
2136    assert_eq!(
2137        parse_type("ulelong"),
2138        Ok((
2139            "",
2140            TypeKind::Long {
2141                endian: Endianness::Little,
2142                signed: false,
2143            }
2144        ))
2145    );
2146}
2147
2148#[test]
2149fn test_parse_type_signed_defaults() {
2150    // In libmagic, unprefixed types are signed by default
2151    assert_eq!(
2152        parse_type("byte"),
2153        Ok(("", TypeKind::Byte { signed: true }))
2154    );
2155    assert_eq!(
2156        parse_type("short"),
2157        Ok((
2158            "",
2159            TypeKind::Short {
2160                endian: Endianness::Native,
2161                signed: true,
2162            }
2163        ))
2164    );
2165    assert_eq!(
2166        parse_type("long"),
2167        Ok((
2168            "",
2169            TypeKind::Long {
2170                endian: Endianness::Native,
2171                signed: true,
2172            }
2173        ))
2174    );
2175    assert_eq!(
2176        parse_type("beshort"),
2177        Ok((
2178            "",
2179            TypeKind::Short {
2180                endian: Endianness::Big,
2181                signed: true,
2182            }
2183        ))
2184    );
2185    assert_eq!(
2186        parse_type("belong"),
2187        Ok((
2188            "",
2189            TypeKind::Long {
2190                endian: Endianness::Big,
2191                signed: true,
2192            }
2193        ))
2194    );
2195}
2196
2197#[test]
2198fn test_parse_rule_offset_absolute() {
2199    assert_eq!(
2200        parse_rule_offset("0"),
2201        Ok(("", (0, OffsetSpec::Absolute(0))))
2202    );
2203    assert_eq!(
2204        parse_rule_offset("16"),
2205        Ok(("", (0, OffsetSpec::Absolute(16))))
2206    );
2207    assert_eq!(
2208        parse_rule_offset("0x10"),
2209        Ok(("", (0, OffsetSpec::Absolute(16))))
2210    );
2211    assert_eq!(
2212        parse_rule_offset("-4"),
2213        Ok(("", (0, OffsetSpec::Absolute(-4))))
2214    );
2215}
2216
2217#[test]
2218fn test_parse_rule_offset_child_rules() {
2219    assert_eq!(
2220        parse_rule_offset(">4"),
2221        Ok(("", (1, OffsetSpec::Absolute(4))))
2222    );
2223    assert_eq!(
2224        parse_rule_offset(">>8"),
2225        Ok(("", (2, OffsetSpec::Absolute(8))))
2226    );
2227    assert_eq!(
2228        parse_rule_offset(">>>12"),
2229        Ok(("", (3, OffsetSpec::Absolute(12))))
2230    );
2231}
2232
2233#[test]
2234fn test_parse_rule_offset_with_whitespace() {
2235    assert_eq!(
2236        parse_rule_offset(" 0 "),
2237        Ok(("", (0, OffsetSpec::Absolute(0))))
2238    );
2239    assert_eq!(
2240        parse_rule_offset("  >4  "),
2241        Ok(("", (1, OffsetSpec::Absolute(4))))
2242    );
2243    assert_eq!(
2244        parse_rule_offset("\t>>0x10\t"),
2245        Ok(("", (2, OffsetSpec::Absolute(16))))
2246    );
2247}
2248
2249#[test]
2250fn test_parse_rule_offset_with_remaining_input() {
2251    assert_eq!(
2252        parse_rule_offset("0 byte"),
2253        Ok(("byte", (0, OffsetSpec::Absolute(0))))
2254    );
2255    assert_eq!(
2256        parse_rule_offset(">4 string"),
2257        Ok(("string", (1, OffsetSpec::Absolute(4))))
2258    );
2259}
2260
2261#[test]
2262fn test_parse_message_basic() {
2263    assert_eq!(
2264        parse_message("ELF executable"),
2265        Ok(("", "ELF executable".to_string()))
2266    );
2267    assert_eq!(
2268        parse_message("PDF document"),
2269        Ok(("", "PDF document".to_string()))
2270    );
2271    assert_eq!(parse_message(""), Ok(("", String::new())));
2272}
2273
2274#[test]
2275fn test_parse_message_with_whitespace() {
2276    assert_eq!(
2277        parse_message("  ELF executable  "),
2278        Ok(("", "ELF executable".to_string()))
2279    );
2280    assert_eq!(
2281        parse_message("\tPDF document\t"),
2282        Ok(("", "PDF document".to_string()))
2283    );
2284    assert_eq!(parse_message("   "), Ok(("", String::new())));
2285}
2286
2287#[test]
2288fn test_parse_message_complex() {
2289    assert_eq!(
2290        parse_message("ELF 64-bit LSB executable"),
2291        Ok(("", "ELF 64-bit LSB executable".to_string()))
2292    );
2293    assert_eq!(
2294        parse_message("ZIP archive, version %d.%d"),
2295        Ok(("", "ZIP archive, version %d.%d".to_string()))
2296    );
2297}
2298
2299#[test]
2300fn test_parse_magic_rule_basic() {
2301    let input = "0 string \\x7fELF ELF executable";
2302    let (remaining, rule) = parse_magic_rule(input).unwrap();
2303
2304    assert_eq!(remaining, "");
2305    assert_eq!(rule.level, 0);
2306    assert_eq!(rule.offset, OffsetSpec::Absolute(0));
2307    assert_eq!(rule.typ, TypeKind::String { max_length: None });
2308    assert_eq!(rule.op, Operator::Equal);
2309    assert_eq!(rule.value, Value::Bytes(vec![0x7f, 0x45, 0x4c, 0x46]));
2310    assert_eq!(rule.message, "ELF executable");
2311    assert!(rule.children.is_empty());
2312}
2313
2314#[test]
2315fn test_parse_magic_rule_child() {
2316    let input = ">4 byte 1 32-bit";
2317    let (remaining, rule) = parse_magic_rule(input).unwrap();
2318
2319    assert_eq!(remaining, "");
2320    assert_eq!(rule.level, 1);
2321    assert_eq!(rule.offset, OffsetSpec::Absolute(4));
2322    assert_eq!(rule.typ, TypeKind::Byte { signed: true });
2323    assert_eq!(rule.op, Operator::Equal);
2324    assert_eq!(rule.value, Value::Uint(1));
2325    assert_eq!(rule.message, "32-bit");
2326}
2327
2328#[test]
2329fn test_parse_magic_rule_with_operator() {
2330    let input = "0 lelong&0xf0000000 0x10000000 MIPS-II";
2331    let (remaining, rule) = parse_magic_rule(input).unwrap();
2332
2333    assert_eq!(remaining, "");
2334    assert_eq!(rule.level, 0);
2335    assert_eq!(rule.offset, OffsetSpec::Absolute(0));
2336    assert_eq!(
2337        rule.typ,
2338        TypeKind::Long {
2339            endian: Endianness::Little,
2340            signed: true
2341        }
2342    );
2343    assert_eq!(rule.op, Operator::BitwiseAndMask(0xf000_0000));
2344    assert_eq!(rule.value, Value::Uint(0x1000_0000));
2345    assert_eq!(rule.message, "MIPS-II");
2346}
2347
2348#[test]
2349fn test_parse_magic_rule_no_message() {
2350    let input = "0 byte 0x7f";
2351    let (remaining, rule) = parse_magic_rule(input).unwrap();
2352
2353    assert_eq!(remaining, "");
2354    assert_eq!(rule.level, 0);
2355    assert_eq!(rule.offset, OffsetSpec::Absolute(0));
2356    assert_eq!(rule.typ, TypeKind::Byte { signed: true });
2357    assert_eq!(rule.op, Operator::Equal);
2358    assert_eq!(rule.value, Value::Uint(0x7f));
2359    assert_eq!(rule.message, "");
2360}
2361
2362#[test]
2363fn test_parse_magic_rule_nested() {
2364    let input = ">>8 leshort 0x014c Microsoft COFF";
2365    let (remaining, rule) = parse_magic_rule(input).unwrap();
2366
2367    assert_eq!(remaining, "");
2368    assert_eq!(rule.level, 2);
2369    assert_eq!(rule.offset, OffsetSpec::Absolute(8));
2370    assert_eq!(
2371        rule.typ,
2372        TypeKind::Short {
2373            endian: Endianness::Little,
2374            signed: true
2375        }
2376    );
2377    assert_eq!(rule.op, Operator::Equal);
2378    assert_eq!(rule.value, Value::Uint(0x014c));
2379    assert_eq!(rule.message, "Microsoft COFF");
2380}
2381
2382#[test]
2383fn test_parse_magic_rule_with_whitespace() {
2384    let input = "  >  4   byte   =   1   32-bit  ";
2385    let (remaining, rule) = parse_magic_rule(input).unwrap();
2386
2387    assert_eq!(remaining, "");
2388    assert_eq!(rule.level, 1);
2389    assert_eq!(rule.offset, OffsetSpec::Absolute(4));
2390    assert_eq!(rule.typ, TypeKind::Byte { signed: true });
2391    assert_eq!(rule.op, Operator::Equal);
2392    assert_eq!(rule.value, Value::Uint(1));
2393    assert_eq!(rule.message, "32-bit");
2394}
2395
2396#[test]
2397fn test_parse_magic_rule_string_value() {
2398    let input = "0 string \"PK\" ZIP archive";
2399    let (remaining, rule) = parse_magic_rule(input).unwrap();
2400
2401    assert_eq!(remaining, "");
2402    assert_eq!(rule.level, 0);
2403    assert_eq!(rule.offset, OffsetSpec::Absolute(0));
2404    assert_eq!(rule.typ, TypeKind::String { max_length: None });
2405    assert_eq!(rule.op, Operator::Equal);
2406    assert_eq!(rule.value, Value::String("PK".to_string()));
2407    assert_eq!(rule.message, "ZIP archive");
2408}
2409
2410#[test]
2411fn test_parse_magic_rule_hex_offset() {
2412    let input = "0x10 belong 0x12345678 Test data";
2413    let (remaining, rule) = parse_magic_rule(input).unwrap();
2414
2415    assert_eq!(remaining, "");
2416    assert_eq!(rule.level, 0);
2417    assert_eq!(rule.offset, OffsetSpec::Absolute(16));
2418    assert_eq!(
2419        rule.typ,
2420        TypeKind::Long {
2421            endian: Endianness::Big,
2422            signed: true
2423        }
2424    );
2425    assert_eq!(rule.op, Operator::Equal);
2426    assert_eq!(rule.value, Value::Uint(0x1234_5678));
2427    assert_eq!(rule.message, "Test data");
2428}
2429
2430#[test]
2431fn test_parse_magic_rule_negative_offset() {
2432    let input = "-4 byte 0 End marker";
2433    let (remaining, rule) = parse_magic_rule(input).unwrap();
2434
2435    assert_eq!(remaining, "");
2436    assert_eq!(rule.level, 0);
2437    assert_eq!(rule.offset, OffsetSpec::Absolute(-4));
2438    assert_eq!(rule.typ, TypeKind::Byte { signed: true });
2439    assert_eq!(rule.op, Operator::Equal);
2440    assert_eq!(rule.value, Value::Uint(0));
2441    assert_eq!(rule.message, "End marker");
2442}
2443
2444#[test]
2445fn test_parse_comment() {
2446    assert_eq!(
2447        parse_comment("# This is a comment"),
2448        Ok(("", "This is a comment".to_string()))
2449    );
2450    assert_eq!(parse_comment("#"), Ok(("", String::new())));
2451    assert_eq!(
2452        parse_comment("# ELF executables"),
2453        Ok(("", "ELF executables".to_string()))
2454    );
2455}
2456
2457#[test]
2458fn test_parse_comment_with_whitespace() {
2459    assert_eq!(
2460        parse_comment("  # Indented comment  "),
2461        Ok(("", "Indented comment".to_string()))
2462    );
2463    assert_eq!(
2464        parse_comment("\t#\tTabbed comment\t"),
2465        Ok(("", "Tabbed comment".to_string()))
2466    );
2467}
2468
2469#[test]
2470fn test_is_empty_line() {
2471    assert!(is_empty_line(""));
2472    assert!(is_empty_line("   "));
2473    assert!(is_empty_line("\t\t"));
2474    assert!(is_empty_line(" \t \t "));
2475    assert!(!is_empty_line("0 byte 1"));
2476    assert!(!is_empty_line("  # comment"));
2477}
2478
2479#[test]
2480fn test_is_comment_line() {
2481    assert!(is_comment_line("# This is a comment"));
2482    assert!(is_comment_line("#"));
2483    assert!(is_comment_line("  # Indented comment"));
2484    assert!(is_comment_line("\t# Tabbed comment"));
2485    assert!(!is_comment_line("0 byte 1"));
2486    assert!(!is_comment_line("string test"));
2487}
2488
2489#[test]
2490fn test_has_continuation() {
2491    assert!(has_continuation("0 string test \\"));
2492    assert!(has_continuation("message continues \\"));
2493    assert!(has_continuation("line ends with backslash\\"));
2494    assert!(has_continuation("  trailing whitespace  \\  "));
2495    assert!(!has_continuation("0 string test"));
2496    assert!(!has_continuation("no continuation"));
2497    assert!(!has_continuation("backslash in middle \\ here"));
2498}
2499
2500#[test]
2501fn test_parse_magic_rule_real_world_examples() {
2502    // Real examples from /usr/share/file/magic/elf
2503    let examples = [
2504        "0 string \\177ELF ELF",
2505        ">4 byte 1 32-bit",
2506        ">4 byte 2 64-bit",
2507        ">5 byte 1 LSB",
2508        ">5 byte 2 MSB",
2509        ">>0 lelong&0xf0000000 0x10000000 MIPS-II",
2510    ];
2511
2512    for example in examples {
2513        let result = parse_magic_rule(example);
2514        assert!(
2515            result.is_ok(),
2516            "Failed to parse real-world example: '{example}'"
2517        );
2518
2519        let (remaining, rule) = result.unwrap();
2520        assert_eq!(remaining, "", "Unexpected remaining input for: '{example}'");
2521        assert!(
2522            !rule.message.is_empty() || example.contains("\\177ELF"),
2523            "Empty message for: '{example}'"
2524        );
2525    }
2526}
2527
2528#[test]
2529fn test_parse_magic_rule_edge_cases() {
2530    // Test various edge cases
2531    let edge_cases = [
2532        (
2533            "0 byte 0",
2534            0,
2535            TypeKind::Byte { signed: true },
2536            Value::Uint(0),
2537            "",
2538        ),
2539        (
2540            ">>>16 string \"\" Empty string",
2541            3,
2542            TypeKind::String { max_length: None },
2543            Value::String(String::new()),
2544            "Empty string",
2545        ),
2546        (
2547            "0x100 lelong 0xFFFFFFFF Max value",
2548            0,
2549            TypeKind::Long {
2550                endian: Endianness::Little,
2551                signed: true,
2552            },
2553            Value::Uint(0xFFFF_FFFF),
2554            "Max value",
2555        ),
2556    ];
2557
2558    for (input, expected_level, expected_type, expected_value, expected_message) in edge_cases {
2559        let (remaining, rule) = parse_magic_rule(input).unwrap();
2560        assert_eq!(remaining, "");
2561        assert_eq!(rule.level, expected_level);
2562        assert_eq!(rule.typ, expected_type);
2563        assert_eq!(rule.value, expected_value);
2564        assert_eq!(rule.message, expected_message);
2565    }
2566}
2567
2568#[test]
2569fn test_parse_magic_rule_invalid_input() {
2570    let invalid_inputs = [
2571        "",               // Empty input
2572        "invalid format", // No valid offset
2573        "0",              // Missing type
2574        "0 invalid_type", // Invalid type
2575        "0 byte",         // Missing value
2576    ];
2577
2578    for invalid_input in invalid_inputs {
2579        let result = parse_magic_rule(invalid_input);
2580        assert!(
2581            result.is_err(),
2582            "Should fail to parse invalid input: '{invalid_input}'"
2583        );
2584    }
2585}
2586
2587// Strength directive tests
2588#[test]
2589fn test_parse_strength_directive_add() {
2590    assert_eq!(
2591        parse_strength_directive("!:strength +10"),
2592        Ok(("", StrengthModifier::Add(10)))
2593    );
2594    assert_eq!(
2595        parse_strength_directive("!:strength +0"),
2596        Ok(("", StrengthModifier::Add(0)))
2597    );
2598    assert_eq!(
2599        parse_strength_directive("!:strength +100"),
2600        Ok(("", StrengthModifier::Add(100)))
2601    );
2602}
2603
2604#[test]
2605fn test_parse_strength_directive_subtract() {
2606    assert_eq!(
2607        parse_strength_directive("!:strength -5"),
2608        Ok(("", StrengthModifier::Subtract(5)))
2609    );
2610    assert_eq!(
2611        parse_strength_directive("!:strength -0"),
2612        Ok(("", StrengthModifier::Subtract(0)))
2613    );
2614    assert_eq!(
2615        parse_strength_directive("!:strength -50"),
2616        Ok(("", StrengthModifier::Subtract(50)))
2617    );
2618}
2619
2620#[test]
2621fn test_parse_strength_directive_multiply() {
2622    assert_eq!(
2623        parse_strength_directive("!:strength *2"),
2624        Ok(("", StrengthModifier::Multiply(2)))
2625    );
2626    assert_eq!(
2627        parse_strength_directive("!:strength *10"),
2628        Ok(("", StrengthModifier::Multiply(10)))
2629    );
2630}
2631
2632#[test]
2633fn test_parse_strength_directive_divide() {
2634    assert_eq!(
2635        parse_strength_directive("!:strength /2"),
2636        Ok(("", StrengthModifier::Divide(2)))
2637    );
2638    assert_eq!(
2639        parse_strength_directive("!:strength /10"),
2640        Ok(("", StrengthModifier::Divide(10)))
2641    );
2642}
2643
2644#[test]
2645fn test_parse_strength_directive_set_explicit() {
2646    assert_eq!(
2647        parse_strength_directive("!:strength =50"),
2648        Ok(("", StrengthModifier::Set(50)))
2649    );
2650    assert_eq!(
2651        parse_strength_directive("!:strength =0"),
2652        Ok(("", StrengthModifier::Set(0)))
2653    );
2654    assert_eq!(
2655        parse_strength_directive("!:strength =100"),
2656        Ok(("", StrengthModifier::Set(100)))
2657    );
2658}
2659
2660#[test]
2661fn test_parse_strength_directive_set_bare() {
2662    // Bare number implies Set
2663    assert_eq!(
2664        parse_strength_directive("!:strength 50"),
2665        Ok(("", StrengthModifier::Set(50)))
2666    );
2667    assert_eq!(
2668        parse_strength_directive("!:strength 0"),
2669        Ok(("", StrengthModifier::Set(0)))
2670    );
2671    assert_eq!(
2672        parse_strength_directive("!:strength 100"),
2673        Ok(("", StrengthModifier::Set(100)))
2674    );
2675}
2676
2677#[test]
2678fn test_parse_strength_directive_with_whitespace() {
2679    assert_eq!(
2680        parse_strength_directive("  !:strength +10"),
2681        Ok(("", StrengthModifier::Add(10)))
2682    );
2683    assert_eq!(
2684        parse_strength_directive("\t!:strength -5"),
2685        Ok(("", StrengthModifier::Subtract(5)))
2686    );
2687    assert_eq!(
2688        parse_strength_directive("!:strength  *2"),
2689        Ok(("", StrengthModifier::Multiply(2)))
2690    );
2691    assert_eq!(
2692        parse_strength_directive("!:strength   50"),
2693        Ok(("", StrengthModifier::Set(50)))
2694    );
2695}
2696
2697#[test]
2698fn test_parse_strength_directive_with_remaining_input() {
2699    // Should leave remaining content after the directive
2700    assert_eq!(
2701        parse_strength_directive("!:strength +10 extra"),
2702        Ok((" extra", StrengthModifier::Add(10)))
2703    );
2704    assert_eq!(
2705        parse_strength_directive("!:strength 50\n"),
2706        Ok(("\n", StrengthModifier::Set(50)))
2707    );
2708}
2709
2710#[test]
2711fn test_parse_strength_directive_invalid() {
2712    // Should fail on invalid input
2713    assert!(parse_strength_directive("").is_err());
2714    assert!(parse_strength_directive("!:invalid").is_err());
2715    assert!(parse_strength_directive("strength +10").is_err());
2716    assert!(parse_strength_directive("0 byte 1").is_err());
2717}
2718
2719#[test]
2720fn test_is_strength_directive() {
2721    assert!(is_strength_directive("!:strength +10"));
2722    assert!(is_strength_directive("!:strength -5"));
2723    assert!(is_strength_directive("!:strength 50"));
2724    assert!(is_strength_directive("  !:strength +10"));
2725    assert!(is_strength_directive("\t!:strength *2"));
2726
2727    assert!(!is_strength_directive("0 byte 1"));
2728    assert!(!is_strength_directive("# comment"));
2729    assert!(!is_strength_directive(""));
2730    assert!(!is_strength_directive("!:mime application/pdf"));
2731}