Skip to main content

libmagic_rs/parser/
ast.rs

1// Copyright (c) 2025-2026 the libmagic-rs contributors
2// SPDX-License-Identifier: Apache-2.0
3
4//! Abstract Syntax Tree definitions for magic rules
5//!
6//! This module contains the core data structures that represent parsed magic rules
7//! and their components, including offset specifications, type kinds, operators, and values.
8
9use serde::{Deserialize, Serialize};
10use std::num::{NonZeroU32, NonZeroUsize};
11
12/// The width of the length prefix for Pascal strings.
13///
14/// Uppercase suffix letters (`/H`, `/L`) indicate big-endian byte order.
15/// Lowercase suffix letters (`/h`, `/l`) indicate little-endian byte order.
16///
17/// # Examples
18///
19/// ```
20/// use libmagic_rs::parser::ast::PStringLengthWidth;
21/// let width = PStringLengthWidth::OneByte;
22/// assert_eq!(width.byte_count(), 1);
23///
24/// let width = PStringLengthWidth::TwoByteBE;
25/// assert_eq!(width.byte_count(), 2);
26///
27/// let width = PStringLengthWidth::FourByteLE;
28/// assert_eq!(width.byte_count(), 4);
29/// ```
30#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
31#[allow(clippy::enum_variant_names)]
32#[non_exhaustive]
33pub enum PStringLengthWidth {
34    /// 1-byte length prefix (default, `/B` suffix)
35    ///
36    /// # Examples
37    ///
38    /// ```
39    /// use libmagic_rs::parser::ast::PStringLengthWidth;
40    /// let width = PStringLengthWidth::OneByte;
41    /// assert_eq!(width.byte_count(), 1);
42    /// ```
43    OneByte,
44    /// 2-byte big-endian length prefix (`/H` suffix)
45    ///
46    /// # Examples
47    ///
48    /// ```
49    /// use libmagic_rs::parser::ast::PStringLengthWidth;
50    /// let width = PStringLengthWidth::TwoByteBE;
51    /// assert_eq!(width.byte_count(), 2);
52    /// ```
53    TwoByteBE,
54    /// 2-byte little-endian length prefix (`/h` suffix)
55    ///
56    /// # Examples
57    ///
58    /// ```
59    /// use libmagic_rs::parser::ast::PStringLengthWidth;
60    /// let width = PStringLengthWidth::TwoByteLE;
61    /// assert_eq!(width.byte_count(), 2);
62    /// ```
63    TwoByteLE,
64    /// 4-byte big-endian length prefix (`/L` suffix)
65    ///
66    /// # Examples
67    ///
68    /// ```
69    /// use libmagic_rs::parser::ast::PStringLengthWidth;
70    /// let width = PStringLengthWidth::FourByteBE;
71    /// assert_eq!(width.byte_count(), 4);
72    /// ```
73    FourByteBE,
74    /// 4-byte little-endian length prefix (`/l` suffix)
75    ///
76    /// # Examples
77    ///
78    /// ```
79    /// use libmagic_rs::parser::ast::PStringLengthWidth;
80    /// let width = PStringLengthWidth::FourByteLE;
81    /// assert_eq!(width.byte_count(), 4);
82    /// ```
83    FourByteLE,
84}
85
86impl PStringLengthWidth {
87    /// Returns the number of bytes used for the length prefix.
88    #[must_use]
89    pub fn byte_count(&self) -> usize {
90        match self {
91            Self::OneByte => 1,
92            Self::TwoByteBE | Self::TwoByteLE => 2,
93            Self::FourByteBE | Self::FourByteLE => 4,
94        }
95    }
96}
97
98/// Arithmetic operation applied to the value read at an indirect offset's
99/// `base_offset` before the result is used as the final file offset.
100///
101/// magic(5) supports `+`, `-`, `*`, `/`, `%`, `&`, `|`, and `^` between the
102/// pointer-type specifier and the operand inside the parentheses. Addition
103/// and subtraction collapse to [`IndirectAdjustmentOp::Add`] with a signed
104/// `adjustment` (so `(N.X-1)` is `Add(-1)` rather than a separate `Sub`
105/// variant); the remaining operators each have a dedicated variant.
106///
107/// The default is [`IndirectAdjustmentOp::Add`]; an indirect offset with no
108/// arithmetic — just `(base.type)` — is encoded as `Add` with `adjustment:
109/// 0`, preserving backwards compatibility.
110///
111/// # Examples
112///
113/// ```
114/// use libmagic_rs::parser::ast::IndirectAdjustmentOp;
115///
116/// assert_eq!(IndirectAdjustmentOp::default(), IndirectAdjustmentOp::Add);
117/// ```
118#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize, Default)]
119#[non_exhaustive]
120pub enum IndirectAdjustmentOp {
121    /// Addition (also covers subtraction via negative `adjustment`).
122    ///
123    /// # Examples
124    ///
125    /// ```
126    /// use libmagic_rs::parser::ast::IndirectAdjustmentOp;
127    /// assert_eq!(IndirectAdjustmentOp::default(), IndirectAdjustmentOp::Add);
128    /// ```
129    #[default]
130    Add,
131    /// Multiplication: `pointer_value * adjustment`.
132    ///
133    /// # Examples
134    ///
135    /// ```
136    /// use libmagic_rs::parser::ast::IndirectAdjustmentOp;
137    /// let op = IndirectAdjustmentOp::Mul;
138    /// assert_eq!(op, IndirectAdjustmentOp::Mul);
139    /// ```
140    Mul,
141    /// Truncating integer division: `pointer_value / adjustment`. Division
142    /// by zero is rejected by the evaluator with an error.
143    ///
144    /// # Examples
145    ///
146    /// ```
147    /// use libmagic_rs::parser::ast::IndirectAdjustmentOp;
148    /// let op = IndirectAdjustmentOp::Div;
149    /// assert_eq!(op, IndirectAdjustmentOp::Div);
150    /// ```
151    Div,
152    /// Remainder: `pointer_value % adjustment`. Modulo by zero is rejected
153    /// by the evaluator with an error.
154    ///
155    /// # Examples
156    ///
157    /// ```
158    /// use libmagic_rs::parser::ast::IndirectAdjustmentOp;
159    /// let op = IndirectAdjustmentOp::Mod;
160    /// assert_eq!(op, IndirectAdjustmentOp::Mod);
161    /// ```
162    Mod,
163    /// Bitwise AND: `pointer_value & adjustment`.
164    ///
165    /// # Examples
166    ///
167    /// ```
168    /// use libmagic_rs::parser::ast::IndirectAdjustmentOp;
169    /// let op = IndirectAdjustmentOp::And;
170    /// assert_eq!(op, IndirectAdjustmentOp::And);
171    /// ```
172    And,
173    /// Bitwise OR: `pointer_value | adjustment`.
174    ///
175    /// # Examples
176    ///
177    /// ```
178    /// use libmagic_rs::parser::ast::IndirectAdjustmentOp;
179    /// let op = IndirectAdjustmentOp::Or;
180    /// assert_eq!(op, IndirectAdjustmentOp::Or);
181    /// ```
182    Or,
183    /// Bitwise XOR: `pointer_value ^ adjustment`.
184    ///
185    /// # Examples
186    ///
187    /// ```
188    /// use libmagic_rs::parser::ast::IndirectAdjustmentOp;
189    /// let op = IndirectAdjustmentOp::Xor;
190    /// assert_eq!(op, IndirectAdjustmentOp::Xor);
191    /// ```
192    Xor,
193}
194
195/// Offset specification for locating data in files
196#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
197#[non_exhaustive]
198pub enum OffsetSpec {
199    /// Absolute offset from file start (or from file end if negative)
200    ///
201    /// Positive values are offsets from the start of the file.
202    /// Negative values are offsets from the end of the file (same as `FromEnd`).
203    ///
204    /// # Examples
205    ///
206    /// ```
207    /// use libmagic_rs::parser::ast::OffsetSpec;
208    ///
209    /// let offset = OffsetSpec::Absolute(0x10); // Read at byte 16 from start
210    /// let from_end = OffsetSpec::Absolute(-4); // 4 bytes before end of file
211    /// ```
212    Absolute(i64),
213
214    /// Indirect offset through pointer dereferencing
215    ///
216    /// Reads a pointer value at `base_offset`, interprets it according to `pointer_type`
217    /// and `endian`, then combines `adjustment` with the pointer value using
218    /// `adjustment_op` to get the final offset. The default `adjustment_op`
219    /// is [`IndirectAdjustmentOp::Add`], so `(base.type)` and
220    /// `(base.type+N)` / `(base.type-N)` use addition (subtraction is
221    /// encoded as `Add` with a negative `adjustment`). magic(5) also
222    /// supports multiplicative and bitwise forms inside the parens, e.g.
223    /// `(0x200.s*2)` ([`IndirectAdjustmentOp::Mul`]).
224    ///
225    /// # Examples
226    ///
227    /// ```
228    /// use libmagic_rs::parser::ast::{OffsetSpec, TypeKind, Endianness, IndirectAdjustmentOp};
229    ///
230    /// let indirect = OffsetSpec::Indirect {
231    ///     base_offset: 0x20,
232    ///     base_relative: false,
233    ///     pointer_type: TypeKind::Long { endian: Endianness::Little, signed: false },
234    ///     adjustment: 4,
235    ///     adjustment_op: IndirectAdjustmentOp::Add,
236    ///     result_relative: false,
237    ///     endian: Endianness::Little,
238    /// };
239    /// ```
240    Indirect {
241        /// Base offset to read pointer from. When `base_relative` is
242        /// `true`, this value is added to the current anchor (last-match
243        /// position) rather than being treated as an absolute file
244        /// position.
245        base_offset: i64,
246        /// If `true`, `base_offset` is relative to the current anchor
247        /// (i.e., `(&N.X)` syntax in magic files). Defaults to `false`
248        /// for backwards compatibility with existing AST snapshots; the
249        /// serde `default` attribute lets older serialized AST round-trip.
250        #[serde(default)]
251        base_relative: bool,
252        /// Type of pointer value
253        pointer_type: TypeKind,
254        /// Operand combined with the pointer value via `adjustment_op`.
255        ///
256        /// For `IndirectAdjustmentOp::Add`, the operand is signed (negative
257        /// values encode subtraction). For multiplicative and bitwise ops
258        /// the operand is interpreted as `i64` but typically magic files
259        /// supply non-negative literals.
260        adjustment: i64,
261        /// Arithmetic operation applied to the pointer value with
262        /// `adjustment` as the operand. Defaults to
263        /// [`IndirectAdjustmentOp::Add`] for legacy AST consumers via
264        /// serde's `default` attribute.
265        #[serde(default)]
266        adjustment_op: IndirectAdjustmentOp,
267        /// If `true`, the resolved offset is added to the current anchor
268        /// instead of being treated as an absolute file position. This
269        /// corresponds to magic-file `&(...)` syntax wrapping an indirect
270        /// spec, e.g., `&(0x10.l)`.
271        #[serde(default)]
272        result_relative: bool,
273        /// Endianness for pointer reading
274        endian: Endianness,
275    },
276
277    /// Relative offset from previous match position
278    ///
279    /// # Examples
280    ///
281    /// ```
282    /// use libmagic_rs::parser::ast::OffsetSpec;
283    ///
284    /// let relative = OffsetSpec::Relative(8); // 8 bytes after previous match
285    /// ```
286    Relative(i64),
287
288    /// Offset from end of file (negative values move towards start)
289    ///
290    /// # Examples
291    ///
292    /// ```
293    /// use libmagic_rs::parser::ast::OffsetSpec;
294    ///
295    /// let from_end = OffsetSpec::FromEnd(-16); // 16 bytes before end of file
296    /// ```
297    FromEnd(i64),
298}
299
300/// Control-flow directive carried by [`TypeKind::Meta`].
301///
302/// These are not value-reading types -- they correspond to magic(5)
303/// control-flow keywords (`default`, `clear`, `name`, `use`, `indirect`,
304/// `offset`) that modify how a rule set is traversed rather than reading
305/// bytes from the buffer. All six variants are fully evaluated by the
306/// engine: `default`/`clear` manage per-level sibling-matched state;
307/// `name`/`use` implement subroutine dispatch; `indirect` re-applies the
308/// root rule database at a resolved offset; and `offset` emits the
309/// current file position as `Value::Uint` for printf-style formatting.
310#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
311#[non_exhaustive]
312pub enum MetaType {
313    /// `default` directive: fires when no sibling at the same indentation
314    /// level has matched at the current offset. See magic(5) for the
315    /// "default" type semantics.
316    ///
317    /// # Examples
318    ///
319    /// ```
320    /// use libmagic_rs::parser::ast::MetaType;
321    /// let meta = MetaType::Default;
322    /// assert_eq!(meta, MetaType::Default);
323    /// ```
324    Default,
325    /// `clear` directive: resets the sibling-matched flag so a later
326    /// `default` sibling can fire even if an earlier sibling matched.
327    /// See magic(5) for the "clear" type semantics.
328    ///
329    /// # Examples
330    ///
331    /// ```
332    /// use libmagic_rs::parser::ast::MetaType;
333    /// let meta = MetaType::Clear;
334    /// assert_eq!(meta, MetaType::Clear);
335    /// ```
336    Clear,
337    /// `name <identifier>` directive: declares a named subroutine that
338    /// can be invoked later via [`MetaType::Use`]. See magic(5) for the
339    /// "name" type semantics.
340    ///
341    /// # Examples
342    ///
343    /// ```
344    /// use libmagic_rs::parser::ast::MetaType;
345    /// let meta = MetaType::Name("part2".to_string());
346    /// assert_eq!(meta, MetaType::Name("part2".to_string()));
347    /// ```
348    Name(String),
349    /// `use <identifier>` directive: invokes a named subroutine
350    /// previously declared via [`MetaType::Name`]. See magic(5) for the
351    /// "use" type semantics.
352    ///
353    /// # Examples
354    ///
355    /// ```
356    /// use libmagic_rs::parser::ast::MetaType;
357    /// let meta = MetaType::Use("part2".to_string());
358    /// assert_eq!(meta, MetaType::Use("part2".to_string()));
359    /// ```
360    Use(String),
361    /// `indirect` directive: re-applies the entire magic database at the
362    /// resolved offset. See magic(5) for the "indirect" type semantics.
363    ///
364    /// # Examples
365    ///
366    /// ```
367    /// use libmagic_rs::parser::ast::MetaType;
368    /// let meta = MetaType::Indirect;
369    /// assert_eq!(meta, MetaType::Indirect);
370    /// ```
371    Indirect,
372    /// `offset` type keyword: reports the current file offset rather than
373    /// reading a typed value from the buffer. See magic(5) for the
374    /// "offset" type semantics.
375    ///
376    /// Evaluation: the engine resolves the rule's offset specification
377    /// to an absolute position and emits a `RuleMatch` whose `value` is
378    /// `Value::Uint(position)`. Message templates can reference that
379    /// value through printf-style format specifiers (e.g. `%lld`),
380    /// which are substituted by
381    /// [`crate::output::format::format_magic_message`] at description-
382    /// assembly time. The only supported operator is `x` (`AnyValue`);
383    /// any other operator is `debug!`-logged and skipped.
384    ///
385    /// # Examples
386    ///
387    /// ```
388    /// use libmagic_rs::parser::ast::MetaType;
389    /// let meta = MetaType::Offset;
390    /// assert_eq!(meta, MetaType::Offset);
391    /// ```
392    Offset,
393}
394
395/// Data type specifications for interpreting bytes
396#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
397#[non_exhaustive]
398pub enum TypeKind {
399    /// Single byte
400    ///
401    /// # Examples
402    ///
403    /// ```
404    /// use libmagic_rs::parser::ast::TypeKind;
405    ///
406    /// let byte = TypeKind::Byte { signed: true };
407    /// assert_eq!(byte, TypeKind::Byte { signed: true });
408    /// ```
409    Byte {
410        /// Whether value is signed
411        signed: bool,
412    },
413    /// 16-bit integer
414    ///
415    /// # Examples
416    ///
417    /// ```
418    /// use libmagic_rs::parser::ast::{TypeKind, Endianness};
419    ///
420    /// let short = TypeKind::Short { endian: Endianness::Little, signed: true };
421    /// assert_eq!(short, TypeKind::Short { endian: Endianness::Little, signed: true });
422    /// ```
423    Short {
424        /// Byte order
425        endian: Endianness,
426        /// Whether value is signed
427        signed: bool,
428    },
429    /// 32-bit integer
430    ///
431    /// # Examples
432    ///
433    /// ```
434    /// use libmagic_rs::parser::ast::{TypeKind, Endianness};
435    ///
436    /// let long = TypeKind::Long { endian: Endianness::Big, signed: false };
437    /// assert_eq!(long, TypeKind::Long { endian: Endianness::Big, signed: false });
438    /// ```
439    Long {
440        /// Byte order
441        endian: Endianness,
442        /// Whether value is signed
443        signed: bool,
444    },
445    /// 64-bit integer
446    ///
447    /// # Examples
448    ///
449    /// ```
450    /// use libmagic_rs::parser::ast::{TypeKind, Endianness};
451    ///
452    /// let quad = TypeKind::Quad { endian: Endianness::Big, signed: true };
453    /// assert_eq!(quad, TypeKind::Quad { endian: Endianness::Big, signed: true });
454    /// ```
455    Quad {
456        /// Byte order
457        endian: Endianness,
458        /// Whether value is signed
459        signed: bool,
460    },
461    /// 32-bit IEEE 754 floating-point
462    ///
463    /// # Examples
464    ///
465    /// ```
466    /// use libmagic_rs::parser::ast::{TypeKind, Endianness};
467    ///
468    /// let float = TypeKind::Float { endian: Endianness::Big };
469    /// assert_eq!(float, TypeKind::Float { endian: Endianness::Big });
470    /// ```
471    Float {
472        /// Byte order
473        endian: Endianness,
474    },
475    /// 64-bit IEEE 754 double-precision floating-point
476    ///
477    /// # Examples
478    ///
479    /// ```
480    /// use libmagic_rs::parser::ast::{TypeKind, Endianness};
481    ///
482    /// let double = TypeKind::Double { endian: Endianness::Big };
483    /// assert_eq!(double, TypeKind::Double { endian: Endianness::Big });
484    /// ```
485    Double {
486        /// Byte order
487        endian: Endianness,
488    },
489    /// 32-bit Unix timestamp (seconds since epoch)
490    ///
491    /// # Examples
492    ///
493    /// ```
494    /// use libmagic_rs::parser::ast::{TypeKind, Endianness};
495    ///
496    /// let date = TypeKind::Date { endian: Endianness::Big, utc: true };
497    /// assert_eq!(date, TypeKind::Date { endian: Endianness::Big, utc: true });
498    /// ```
499    Date {
500        /// Byte order
501        endian: Endianness,
502        /// true = UTC, false = local time
503        utc: bool,
504    },
505    /// 64-bit Unix timestamp (seconds since epoch)
506    ///
507    /// # Examples
508    ///
509    /// ```
510    /// use libmagic_rs::parser::ast::{TypeKind, Endianness};
511    ///
512    /// let qdate = TypeKind::QDate { endian: Endianness::Little, utc: false };
513    /// assert_eq!(qdate, TypeKind::QDate { endian: Endianness::Little, utc: false });
514    /// ```
515    QDate {
516        /// Byte order
517        endian: Endianness,
518        /// true = UTC, false = local time
519        utc: bool,
520    },
521    /// String data
522    ///
523    /// The `flags` field carries the modifier flags parsed from the
524    /// `/[cCwWtTbf]` suffix on a `string` rule. Default flags (all
525    /// `false`) preserve the existing byte-exact comparison path; any
526    /// non-default flag routes the rule through
527    /// `compare_string_with_flags` in `src/evaluator/types/string.rs`.
528    /// See [`StringFlags`] for per-flag semantics.
529    ///
530    /// # Examples
531    ///
532    /// ```
533    /// use libmagic_rs::parser::ast::{StringFlags, TypeKind};
534    ///
535    /// let s = TypeKind::String { max_length: None, flags: StringFlags::default() };
536    /// assert_eq!(s, TypeKind::String { max_length: None, flags: StringFlags::default() });
537    ///
538    /// let case_insensitive = TypeKind::String {
539    ///     max_length: None,
540    ///     flags: StringFlags::default().with_ignore_lowercase(true),
541    /// };
542    /// assert!(matches!(case_insensitive, TypeKind::String { flags, .. } if flags.ignore_lowercase));
543    /// ```
544    String {
545        /// Maximum length to read
546        max_length: Option<usize>,
547        /// Modifier flags from the `/[cCwWtTbf]` suffix
548        flags: StringFlags,
549    },
550    /// UCS-2 (16-bit Unicode) string with explicit byte order.
551    ///
552    /// Backs the magic(5) `lestring16` (little-endian) and `bestring16`
553    /// (big-endian) keywords. Each character occupies two bytes in the
554    /// file; the reader stops at a U+0000 terminator (encoded as the
555    /// 2-byte sequence `0x00 0x00`) or at the end of the buffer. The
556    /// decoded value is returned as a Rust `String` (so non-ASCII
557    /// characters are preserved when valid UCS-2).
558    ///
559    /// # Examples
560    ///
561    /// ```
562    /// use libmagic_rs::parser::ast::{TypeKind, Endianness};
563    ///
564    /// let le = TypeKind::String16 { endian: Endianness::Little };
565    /// assert_eq!(le, TypeKind::String16 { endian: Endianness::Little });
566    ///
567    /// let be = TypeKind::String16 { endian: Endianness::Big };
568    /// assert_eq!(be, TypeKind::String16 { endian: Endianness::Big });
569    /// ```
570    String16 {
571        /// Endianness for the 16-bit code units.
572        endian: Endianness,
573    },
574    /// Pascal string (length-prefixed, supports 1/2/4-byte prefix, with optional max length)
575    ///
576    /// Pascal strings store the length as a prefix (1, 2, or 4 bytes, with configurable endianness), followed by
577    /// that many bytes of string data. Unlike C strings, they are not null-terminated.
578    ///
579    /// # Examples
580    ///
581    /// ```
582    /// use libmagic_rs::parser::ast::{TypeKind, PStringLengthWidth};
583    ///
584    /// let pstring = TypeKind::PString { max_length: None, length_width: PStringLengthWidth::OneByte, length_includes_itself: false };
585    /// assert_eq!(pstring, TypeKind::PString { max_length: None, length_width: PStringLengthWidth::OneByte, length_includes_itself: false });
586    ///
587    /// let limited = TypeKind::PString { max_length: Some(64), length_width: PStringLengthWidth::TwoByteBE, length_includes_itself: false };
588    /// assert_eq!(limited, TypeKind::PString { max_length: Some(64), length_width: PStringLengthWidth::TwoByteBE, length_includes_itself: false });
589    ///
590    /// // /J flag: stored length includes the length field itself
591    /// let jpeg = TypeKind::PString { max_length: None, length_width: PStringLengthWidth::TwoByteBE, length_includes_itself: true };
592    /// assert_eq!(jpeg, TypeKind::PString { max_length: None, length_width: PStringLengthWidth::TwoByteBE, length_includes_itself: true });
593    /// ```
594    PString {
595        /// Maximum length to read (caps the length value)
596        max_length: Option<usize>,
597        /// Width of the length prefix
598        length_width: PStringLengthWidth,
599        /// Whether the stored length includes the length field itself (`/J` flag)
600        length_includes_itself: bool,
601    },
602    /// Regular expression matching against file contents
603    ///
604    /// Regex rules match a POSIX-extended regular expression pattern against the
605    /// file buffer. Patterns are compiled with multi-line mode always enabled
606    /// (matching libmagic's unconditional `REG_NEWLINE`), so `^` and `$` match
607    /// at line boundaries and `.` does not match `\n`. The `flags` control
608    /// case sensitivity and anchor advance semantics; the `count` field
609    /// controls the scan window (byte or line bounds). The scan window is
610    /// always capped at 8192 bytes (matching GNU `file`'s `FILE_REGEX_MAX`;
611    /// enforced in the evaluator).
612    ///
613    /// # Examples
614    ///
615    /// ```
616    /// use libmagic_rs::parser::ast::{RegexCount, RegexFlags, TypeKind};
617    /// use std::num::NonZeroU32;
618    ///
619    /// // Plain `regex` -- no flags, default 8192-byte scan window.
620    /// let plain = TypeKind::Regex {
621    ///     flags: RegexFlags::default(),
622    ///     count: RegexCount::Default,
623    /// };
624    ///
625    /// // `regex/1l` -- scan the first line only.
626    /// let first_line = TypeKind::Regex {
627    ///     flags: RegexFlags::default(),
628    ///     count: RegexCount::Lines(NonZeroU32::new(1)),
629    /// };
630    ///
631    /// // `regex/cs` -- case-insensitive, anchor advances to match-start.
632    /// let case_insensitive_start = TypeKind::Regex {
633    ///     flags: RegexFlags::default()
634    ///         .with_case_insensitive(true)
635    ///         .with_start_offset(true),
636    ///     count: RegexCount::Default,
637    /// };
638    /// ```
639    Regex {
640        /// Modifier flags from the `/[cs]` suffix (`/c` case-insensitive,
641        /// `/s` start-offset anchor). Line-mode is encoded by the
642        /// [`RegexCount::Lines`] variant of `count`, not a flag.
643        flags: RegexFlags,
644        /// Scan window specifier: default 8192 bytes, explicit byte
645        /// count, or explicit line count. See [`RegexCount`] for the
646        /// three cases.
647        count: RegexCount,
648    },
649    /// Multi-byte pattern search within a bounded range
650    ///
651    /// Search rules look for a literal byte pattern within `range` bytes of
652    /// the offset. Unlike [`TypeKind::String`], which only matches at the
653    /// exact offset, `search` scans forward up to `range` bytes for the
654    /// first occurrence. The range is **mandatory** per GNU `file`'s
655    /// magic(5) specification and is stored as a [`NonZeroUsize`] so a
656    /// zero-range search is unrepresentable.
657    ///
658    /// # Examples
659    ///
660    /// ```
661    /// use libmagic_rs::parser::ast::TypeKind;
662    /// use std::num::NonZeroUsize;
663    ///
664    /// // `search/256` -- scan up to 256 bytes for the literal pattern.
665    /// let bounded = TypeKind::Search {
666    ///     range: NonZeroUsize::new(256).unwrap(),
667    ///     flags: libmagic_rs::parser::ast::SearchFlags::default(),
668    /// };
669    /// ```
670    Search {
671        /// Scan window width in bytes, starting at the rule's offset.
672        range: NonZeroUsize,
673        /// Modifier flags from the `/[sCcWwTtBbf]` suffix on a `search`
674        /// rule. The `/s` flag controls anchor advance (match-START vs
675        /// match-END); the eight `StringFlags`-shared letters alter how
676        /// the literal pattern is compared against the file bytes. See
677        /// [`SearchFlags`] for the per-flag semantics.
678        flags: SearchFlags,
679    },
680    /// Control-flow directive (`default`, `clear`, `name`, `use`,
681    /// `indirect`, `offset`).
682    ///
683    /// These magic(5) keywords do not read or compare bytes; they modify
684    /// how a rule set is traversed. All six variants are fully evaluated:
685    /// `default` fires as a fallback when no sibling at the same level
686    /// has matched; `clear` resets that flag; `name`/`use` support
687    /// subroutine definition and invocation; `indirect` re-enters the
688    /// rule set at a resolved offset; `offset` emits the resolved file
689    /// position as `Value::Uint` for printf-style message substitution.
690    /// See [`MetaType`] for the individual variants.
691    ///
692    /// # Examples
693    ///
694    /// ```
695    /// use libmagic_rs::parser::ast::{MetaType, TypeKind};
696    /// let default_rule = TypeKind::Meta(MetaType::Default);
697    /// assert_eq!(default_rule, TypeKind::Meta(MetaType::Default));
698    /// ```
699    Meta(MetaType),
700}
701
702/// Regex modifier flags parsed from the `/[cs]` suffix on a `regex` rule.
703///
704/// The `/l` "line-based window" modifier is **not** represented here; it
705/// lives on [`RegexCount::Lines`] so that the type-level encoding makes
706/// "line count" and "byte count" mutually exclusive. An earlier design
707/// used two separate fields (`line_based: bool` + `count: Option<u32>`)
708/// which admitted the cross-field state `line_based: true, count: None`;
709/// under the current encoding that case is expressed explicitly as
710/// [`RegexCount::Lines(None)`](RegexCount::Lines) -- the `regex/l`
711/// shorthand -- and is behaviorally equivalent to [`RegexCount::Default`]
712/// (both walk the full 8192-byte capped window).
713///
714/// All flags default to `false` via [`RegexFlags::default`], equivalent
715/// to a plain `regex` with no `/c` or `/s` suffix.
716///
717/// # Examples
718///
719/// ```
720/// use libmagic_rs::parser::ast::RegexFlags;
721///
722/// let plain = RegexFlags::default();
723/// assert!(!plain.case_insensitive);
724/// assert!(!plain.start_offset);
725///
726/// let case_and_start = RegexFlags::default()
727///     .with_case_insensitive(true)
728///     .with_start_offset(true);
729/// assert!(case_and_start.case_insensitive);
730/// assert!(case_and_start.start_offset);
731/// ```
732#[derive(Debug, Clone, Copy, Default, Serialize, Deserialize, PartialEq, Eq)]
733#[non_exhaustive]
734pub struct RegexFlags {
735    /// `/c` -- case-insensitive matching. When `true`, ASCII letter
736    /// casing is ignored during pattern matching.
737    pub case_insensitive: bool,
738    /// `/s` -- advance the GNU `file` previous-match anchor to the start
739    /// of the matched region instead of its end. Matches libmagic's
740    /// `REGEX_OFFSET_START` flag, which zeros the length contribution in
741    /// `moffset()` for `FILE_REGEX`. Useful for chaining child rules that
742    /// need to re-match from the position where the parent regex began.
743    pub start_offset: bool,
744}
745
746impl RegexFlags {
747    /// Builder-style setter for [`RegexFlags::case_insensitive`] (`/c`).
748    ///
749    /// Chain after [`RegexFlags::default()`] to construct `RegexFlags`
750    /// values without exhaustive struct literals. If a new flag is
751    /// added to `RegexFlags` in the future, callers using the builder
752    /// form keep compiling; callers using struct literals would need
753    /// an update.
754    #[must_use]
755    pub const fn with_case_insensitive(mut self, value: bool) -> Self {
756        self.case_insensitive = value;
757        self
758    }
759
760    /// Builder-style setter for [`RegexFlags::start_offset`] (`/s`).
761    ///
762    /// Chain after [`RegexFlags::default()`] to construct `RegexFlags`
763    /// values without exhaustive struct literals.
764    #[must_use]
765    pub const fn with_start_offset(mut self, value: bool) -> Self {
766        self.start_offset = value;
767        self
768    }
769}
770
771/// String modifier flags parsed from the `/[cCwWtTbf]` suffix on a `string`
772/// rule.
773///
774/// Mirrors libmagic's `STRING_*` flag bits from `src/file.h`. Each flag
775/// alters how `compare_string_with_flags` walks the pattern and buffer in
776/// parallel. The default (all `false`) preserves byte-exact comparison.
777///
778/// **`/c` vs `/C` are asymmetric**: the pattern character controls
779/// direction. With `/c`, only lowercase pattern chars trigger case-folding
780/// (the file byte is `tolower`'d). With `/C`, only uppercase pattern chars
781/// trigger folding (the file byte is `toupper`'d). Mixed-case patterns
782/// behave intuitively: `/c FoO` matches `FoO`, `Foo`, `FOO` but not
783/// `fOO` (the uppercase `F` is literal). See GOTCHAS S6.5 for the
784/// rationale and `src/softmagic.c` for the canonical libmagic contract.
785///
786/// **`/B` is NOT a string flag** -- it is the `pstring` 1-byte length-width
787/// letter (`PSTRING_1_BE`). `string/B` is rejected at parse time. See
788/// GOTCHAS S6.6.
789///
790/// # Examples
791///
792/// ```
793/// use libmagic_rs::parser::ast::StringFlags;
794///
795/// let plain = StringFlags::default();
796/// assert!(!plain.ignore_lowercase);
797///
798/// let case_insensitive = StringFlags::default().with_ignore_lowercase(true);
799/// assert!(case_insensitive.ignore_lowercase);
800///
801/// let compound = StringFlags::default()
802///     .with_ignore_lowercase(true)
803///     .with_compact_optional_whitespace(true);
804/// assert!(compound.ignore_lowercase);
805/// assert!(compound.compact_optional_whitespace);
806/// ```
807#[derive(Debug, Clone, Copy, Default, Serialize, Deserialize, PartialEq, Eq)]
808// libmagic's contract is naturally a bitfield: each flag is a distinct
809// magic(5) letter (/c, /C, /w, /W, /t, /T, /b, /f) with its own STRING_*
810// constant in libmagic src/file.h. Flags compose freely (string/cw is
811// /c plus /w; string/wcCtTbf sets all eight). Folding pairs into enums
812// is possible (whitespace: none|optional|required; case: none|lower|upper)
813// but would obscure the libmagic mapping and produce verbose match arms
814// in every consumer. The bool-per-flag layout mirrors `RegexFlags` and
815// the libmagic source -- the clippy lint is overruled by the design.
816#[allow(clippy::struct_excessive_bools)]
817#[non_exhaustive]
818pub struct StringFlags {
819    /// `/W` -- `STRING_COMPACT_WHITESPACE`. Pattern whitespace requires at
820    /// least one whitespace byte in the file, then any further whitespace
821    /// in the file is consumed greedily.
822    pub compact_whitespace: bool,
823    /// `/w` -- `STRING_COMPACT_OPTIONAL_WHITESPACE`. Pattern whitespace
824    /// matches zero or more whitespace bytes in the file.
825    pub compact_optional_whitespace: bool,
826    /// `/c` -- `STRING_IGNORE_LOWERCASE`. When the pattern char is
827    /// lowercase, the file byte is `to_ascii_lowercase`'d before
828    /// comparison. Uppercase pattern chars are compared literally.
829    pub ignore_lowercase: bool,
830    /// `/C` -- `STRING_IGNORE_UPPERCASE`. When the pattern char is
831    /// uppercase, the file byte is `to_ascii_uppercase`'d before
832    /// comparison. Lowercase pattern chars are compared literally.
833    pub ignore_uppercase: bool,
834    /// `/t` -- `STRING_TEXTTEST`. Hint that this rule applies to text
835    /// files. Captured for MIME-output integration; does not currently
836    /// alter comparison.
837    pub text_test: bool,
838    /// `/T` -- `STRING_TRIM`. Trim leading and trailing ASCII whitespace
839    /// from the pattern before comparison. The trim is applied at
840    /// evaluation time (in `read_pattern_match`) so the AST keeps the
841    /// original pattern bytes; the comparison function receives the
842    /// trimmed slice.
843    pub trim: bool,
844    /// `/b` -- `STRING_BINTEST`. Hint that this rule applies to binary
845    /// files. Captured for MIME-output integration; does not currently
846    /// alter comparison.
847    pub bin_test: bool,
848    /// `/f` -- `STRING_FULL_WORD`. Post-match check that the byte after
849    /// the matched region is either end-of-buffer or a non-word
850    /// character (ASCII alphanumeric or `_`).
851    pub full_word: bool,
852}
853
854impl StringFlags {
855    /// Returns `true` when every flag is `false` (the byte-exact fast
856    /// path). The evaluator dispatcher uses this to skip the
857    /// parallel-walk comparison when no flags are set.
858    #[must_use]
859    pub const fn is_empty(self) -> bool {
860        !self.compact_whitespace
861            && !self.compact_optional_whitespace
862            && !self.ignore_lowercase
863            && !self.ignore_uppercase
864            && !self.text_test
865            && !self.trim
866            && !self.bin_test
867            && !self.full_word
868    }
869
870    /// Builder-style setter for `compact_whitespace` (`/W`).
871    #[must_use]
872    pub const fn with_compact_whitespace(mut self, value: bool) -> Self {
873        self.compact_whitespace = value;
874        self
875    }
876
877    /// Builder-style setter for `compact_optional_whitespace` (`/w`).
878    #[must_use]
879    pub const fn with_compact_optional_whitespace(mut self, value: bool) -> Self {
880        self.compact_optional_whitespace = value;
881        self
882    }
883
884    /// Builder-style setter for `ignore_lowercase` (`/c`).
885    #[must_use]
886    pub const fn with_ignore_lowercase(mut self, value: bool) -> Self {
887        self.ignore_lowercase = value;
888        self
889    }
890
891    /// Builder-style setter for `ignore_uppercase` (`/C`).
892    #[must_use]
893    pub const fn with_ignore_uppercase(mut self, value: bool) -> Self {
894        self.ignore_uppercase = value;
895        self
896    }
897
898    /// Builder-style setter for `text_test` (`/t`).
899    #[must_use]
900    pub const fn with_text_test(mut self, value: bool) -> Self {
901        self.text_test = value;
902        self
903    }
904
905    /// Builder-style setter for `trim` (`/T`).
906    #[must_use]
907    pub const fn with_trim(mut self, value: bool) -> Self {
908        self.trim = value;
909        self
910    }
911
912    /// Builder-style setter for `bin_test` (`/b`).
913    #[must_use]
914    pub const fn with_bin_test(mut self, value: bool) -> Self {
915        self.bin_test = value;
916        self
917    }
918
919    /// Builder-style setter for `full_word` (`/f`).
920    #[must_use]
921    pub const fn with_full_word(mut self, value: bool) -> Self {
922        self.full_word = value;
923        self
924    }
925}
926
927/// Search modifier flags parsed from the `/[sCcWwTtBbf]` suffix on a
928/// `search` rule.
929///
930/// Mirrors [`StringFlags`] for the eight `STRING_*` letters that alter
931/// the literal-pattern comparison (`/c`, `/C`, `/w`, `/W`, `/t`, `/T`,
932/// `/b`, `/f`), plus a search-only `start_anchor` field for `/s` which
933/// shifts the GNU `file` previous-match anchor to the START of the
934/// matched region. The default (all `false`) preserves byte-exact
935/// comparison and match-END anchor advance.
936///
937/// `SearchFlags` is structurally parallel to `StringFlags`: when one
938/// struct grows a field, the other gains the same field in lockstep
939/// so that [`SearchFlags::to_string_flags`] can keep handing off to
940/// `compare_string_with_flags` without a generic refactor. The
941/// search-only `start_anchor` field has no analog in `string` rules.
942///
943/// **`/c` vs `/C` are asymmetric** in the same way as [`StringFlags`]:
944/// the pattern character controls fold direction. See [`StringFlags`]
945/// and GOTCHAS S6.5 for the rationale.
946///
947/// # Examples
948///
949/// ```
950/// use libmagic_rs::parser::ast::SearchFlags;
951///
952/// let plain = SearchFlags::default();
953/// assert!(!plain.start_anchor);
954/// assert!(plain.is_empty());
955/// assert!(!plain.needs_byte_compare());
956///
957/// let start = SearchFlags::default().with_start_anchor(true);
958/// assert!(start.start_anchor);
959/// assert!(!start.is_empty());
960/// // /s is anchor-only -- does not force the byte-compare slow path.
961/// assert!(!start.needs_byte_compare());
962///
963/// let case = SearchFlags::default().with_ignore_lowercase(true);
964/// assert!(case.needs_byte_compare());
965/// ```
966#[derive(Debug, Clone, Copy, Default, Serialize, Deserialize, PartialEq, Eq)]
967// libmagic's contract is naturally a bitfield: each flag is a distinct
968// magic(5) letter with its own STRING_*/SEARCH_* constant in libmagic
969// src/file.h. Flags compose freely (search/cs is /c plus /s; search/sWcT
970// sets four). Folding pairs into enums is possible but would obscure
971// the libmagic mapping and produce verbose match arms in every consumer.
972// The bool-per-flag layout mirrors `StringFlags` and `RegexFlags` and the
973// libmagic source -- the clippy lint is overruled by the design.
974#[allow(clippy::struct_excessive_bools)]
975#[non_exhaustive]
976pub struct SearchFlags {
977    /// `/W` -- `STRING_COMPACT_WHITESPACE`. Pattern whitespace requires at
978    /// least one whitespace byte in the file, then any further whitespace
979    /// in the file is consumed greedily.
980    pub compact_whitespace: bool,
981    /// `/w` -- `STRING_COMPACT_OPTIONAL_WHITESPACE`. Pattern whitespace
982    /// matches zero or more whitespace bytes in the file.
983    pub compact_optional_whitespace: bool,
984    /// `/c` -- `STRING_IGNORE_LOWERCASE`. When the pattern char is
985    /// lowercase, the file byte is `to_ascii_lowercase`'d before
986    /// comparison. Uppercase pattern chars are compared literally.
987    pub ignore_lowercase: bool,
988    /// `/C` -- `STRING_IGNORE_UPPERCASE`. When the pattern char is
989    /// uppercase, the file byte is `to_ascii_uppercase`'d before
990    /// comparison. Lowercase pattern chars are compared literally.
991    pub ignore_uppercase: bool,
992    /// `/t` -- `STRING_TEXTTEST`. Hint that this rule applies to text
993    /// files. Captured for MIME-output integration; does not currently
994    /// alter comparison.
995    pub text_test: bool,
996    /// `/T` -- `STRING_TRIM`. Trim leading and trailing ASCII whitespace
997    /// from the pattern before comparison.
998    pub trim: bool,
999    /// `/b` -- `STRING_BINTEST`. Hint that this rule applies to binary
1000    /// files. Captured for MIME-output integration; does not currently
1001    /// alter comparison.
1002    pub bin_test: bool,
1003    /// `/f` -- `STRING_FULL_WORD`. Post-match check that the byte after
1004    /// the matched region is either end-of-buffer or a non-word
1005    /// character (ASCII alphanumeric or `_`).
1006    pub full_word: bool,
1007    /// `/s` -- magic(5) "search-start" flag. When `true`, the GNU `file`
1008    /// previous-match anchor advance lands on the match-START index
1009    /// rather than match-END (the default). Mirrors libmagic's
1010    /// `FILE_SEARCH` anchor handling in `src/softmagic.c::moffset`. The
1011    /// dispatch happens in
1012    /// `src/evaluator/types/search.rs::search_bytes_consumed`.
1013    pub start_anchor: bool,
1014}
1015
1016impl SearchFlags {
1017    /// Returns `true` when every flag is `false` (default-constructed).
1018    #[must_use]
1019    pub const fn is_empty(self) -> bool {
1020        !self.compact_whitespace
1021            && !self.compact_optional_whitespace
1022            && !self.ignore_lowercase
1023            && !self.ignore_uppercase
1024            && !self.text_test
1025            && !self.trim
1026            && !self.bin_test
1027            && !self.full_word
1028            && !self.start_anchor
1029    }
1030
1031    /// Returns `true` when any flag alters the literal-pattern
1032    /// comparison, forcing the byte-walk slow path through
1033    /// `compare_string_with_flags`. The anchor-only / metadata-only
1034    /// flags (`/s`, `/t`, `/b`) do **not** trigger byte-compare;
1035    /// they preserve the `memchr::memmem::find` fast path.
1036    #[must_use]
1037    pub const fn needs_byte_compare(self) -> bool {
1038        self.compact_whitespace
1039            || self.compact_optional_whitespace
1040            || self.ignore_lowercase
1041            || self.ignore_uppercase
1042            || self.trim
1043            || self.full_word
1044    }
1045
1046    /// Project the eight shared flag fields onto a [`StringFlags`] for
1047    /// handoff to `compare_string_with_flags`. The search-only
1048    /// `start_anchor` field is dropped (it is anchor-advance policy,
1049    /// not comparison policy).
1050    ///
1051    /// # Examples
1052    ///
1053    /// ```
1054    /// use libmagic_rs::parser::ast::SearchFlags;
1055    ///
1056    /// let sf = SearchFlags::default()
1057    ///     .with_ignore_lowercase(true)
1058    ///     .with_trim(true)
1059    ///     .with_start_anchor(true);
1060    /// let projected = sf.to_string_flags();
1061    /// assert!(projected.ignore_lowercase);
1062    /// assert!(projected.trim);
1063    /// // /s has no analog in StringFlags.
1064    /// ```
1065    #[must_use]
1066    pub const fn to_string_flags(self) -> StringFlags {
1067        StringFlags {
1068            compact_whitespace: self.compact_whitespace,
1069            compact_optional_whitespace: self.compact_optional_whitespace,
1070            ignore_lowercase: self.ignore_lowercase,
1071            ignore_uppercase: self.ignore_uppercase,
1072            text_test: self.text_test,
1073            trim: self.trim,
1074            bin_test: self.bin_test,
1075            full_word: self.full_word,
1076        }
1077    }
1078
1079    /// Builder-style setter for `compact_whitespace` (`/W`).
1080    #[must_use]
1081    pub const fn with_compact_whitespace(mut self, value: bool) -> Self {
1082        self.compact_whitespace = value;
1083        self
1084    }
1085
1086    /// Builder-style setter for `compact_optional_whitespace` (`/w`).
1087    #[must_use]
1088    pub const fn with_compact_optional_whitespace(mut self, value: bool) -> Self {
1089        self.compact_optional_whitespace = value;
1090        self
1091    }
1092
1093    /// Builder-style setter for `ignore_lowercase` (`/c`).
1094    #[must_use]
1095    pub const fn with_ignore_lowercase(mut self, value: bool) -> Self {
1096        self.ignore_lowercase = value;
1097        self
1098    }
1099
1100    /// Builder-style setter for `ignore_uppercase` (`/C`).
1101    #[must_use]
1102    pub const fn with_ignore_uppercase(mut self, value: bool) -> Self {
1103        self.ignore_uppercase = value;
1104        self
1105    }
1106
1107    /// Builder-style setter for `text_test` (`/t`).
1108    #[must_use]
1109    pub const fn with_text_test(mut self, value: bool) -> Self {
1110        self.text_test = value;
1111        self
1112    }
1113
1114    /// Builder-style setter for `trim` (`/T`).
1115    #[must_use]
1116    pub const fn with_trim(mut self, value: bool) -> Self {
1117        self.trim = value;
1118        self
1119    }
1120
1121    /// Builder-style setter for `bin_test` (`/b`).
1122    #[must_use]
1123    pub const fn with_bin_test(mut self, value: bool) -> Self {
1124        self.bin_test = value;
1125        self
1126    }
1127
1128    /// Builder-style setter for `full_word` (`/f`).
1129    #[must_use]
1130    pub const fn with_full_word(mut self, value: bool) -> Self {
1131        self.full_word = value;
1132        self
1133    }
1134
1135    /// Builder-style setter for `start_anchor` (`/s`).
1136    #[must_use]
1137    pub const fn with_start_anchor(mut self, value: bool) -> Self {
1138        self.start_anchor = value;
1139        self
1140    }
1141}
1142
1143/// Scan window specifier for a [`TypeKind::Regex`] rule.
1144///
1145/// Encodes the three mutually-exclusive scan modes in a single enum so
1146/// that the "byte count" and "line count" cases cannot be confused. The
1147/// `regex/l` shorthand (line mode with no explicit count) is represented
1148/// explicitly as [`RegexCount::Lines(None)`](RegexCount::Lines), which
1149/// is behaviorally equivalent to [`RegexCount::Default`] -- both walk
1150/// the full 8192-byte capped window -- but preserves the magic-file
1151/// surface syntax of the original rule. The 8192-byte hard cap
1152/// (matching GNU `file`'s `FILE_REGEX_MAX`) is applied by the evaluator
1153/// on every variant.
1154///
1155/// # Examples
1156///
1157/// ```
1158/// use libmagic_rs::parser::ast::RegexCount;
1159/// use std::num::NonZeroU32;
1160///
1161/// // Plain `regex` (no suffix): default 8192-byte window.
1162/// assert_eq!(RegexCount::default(), RegexCount::Default);
1163///
1164/// // `regex/100`: scan at most 100 bytes.
1165/// let hundred_bytes = RegexCount::Bytes(NonZeroU32::new(100).unwrap());
1166///
1167/// // `regex/1l`: scan the first line.
1168/// let one_line = RegexCount::Lines(NonZeroU32::new(1));
1169///
1170/// // `regex/l`: line-mode with no explicit count (walks terminators
1171/// // to the end of the 8192-byte capped window).
1172/// let unbounded_lines = RegexCount::Lines(None);
1173/// ```
1174#[derive(Debug, Clone, Copy, Default, Serialize, Deserialize, PartialEq, Eq)]
1175pub enum RegexCount {
1176    /// No scan bound (plain `regex` with no suffix). Scans the default
1177    /// 8192-byte window from the rule's offset.
1178    #[default]
1179    Default,
1180    /// Byte-bounded scan (`regex/N` with no `/l` flag). The window is
1181    /// `min(n, 8192, remaining_buffer)` bytes long. `NonZeroU32` makes
1182    /// a zero-byte scan unrepresentable.
1183    Bytes(NonZeroU32),
1184    /// Line-bounded scan (`regex/Nl` or `regex/l`). The window walks
1185    /// LF / CRLF / bare CR line terminators from the offset. With
1186    /// `Some(n)`, the walk stops after the Nth terminator (inclusive).
1187    /// With `None` (the `regex/l` shorthand), the walk continues to
1188    /// the end of the 8192-byte capped window. Either way the
1189    /// effective byte window is capped at 8192.
1190    Lines(Option<NonZeroU32>),
1191}
1192
1193impl TypeKind {
1194    /// Returns the bit width of integer types, or `None` for non-integer types (e.g., String).
1195    ///
1196    /// # Examples
1197    ///
1198    /// ```
1199    /// use libmagic_rs::parser::ast::{Endianness, StringFlags, TypeKind};
1200    ///
1201    /// assert_eq!(TypeKind::Byte { signed: false }.bit_width(), Some(8));
1202    /// assert_eq!(TypeKind::Short { endian: Endianness::Native, signed: true }.bit_width(), Some(16));
1203    /// assert_eq!(TypeKind::Long { endian: Endianness::Native, signed: true }.bit_width(), Some(32));
1204    /// assert_eq!(TypeKind::Quad { endian: Endianness::Native, signed: true }.bit_width(), Some(64));
1205    /// assert_eq!(TypeKind::Float { endian: Endianness::Native }.bit_width(), Some(32));
1206    /// assert_eq!(TypeKind::Double { endian: Endianness::Native }.bit_width(), Some(64));
1207    /// assert_eq!(TypeKind::String { max_length: None, flags: StringFlags::default() }.bit_width(), None);
1208    /// ```
1209    #[must_use]
1210    pub const fn bit_width(&self) -> Option<u32> {
1211        match self {
1212            Self::Byte { .. } => Some(8),
1213            Self::Short { .. } => Some(16),
1214            Self::Long { .. } | Self::Float { .. } | Self::Date { .. } => Some(32),
1215            Self::Quad { .. } | Self::Double { .. } | Self::QDate { .. } => Some(64),
1216            Self::String { .. }
1217            | Self::String16 { .. }
1218            | Self::PString { .. }
1219            | Self::Regex { .. }
1220            | Self::Search { .. }
1221            | Self::Meta(_) => None,
1222        }
1223    }
1224}
1225
1226/// Comparison and bitwise operators
1227#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
1228#[non_exhaustive]
1229pub enum Operator {
1230    /// Equality comparison (`=` or `==`)
1231    ///
1232    /// # Examples
1233    ///
1234    /// ```
1235    /// use libmagic_rs::parser::ast::Operator;
1236    ///
1237    /// let op = Operator::Equal;
1238    /// assert_eq!(op, Operator::Equal);
1239    /// ```
1240    Equal,
1241    /// Inequality comparison (`!=` or `<>`)
1242    ///
1243    /// # Examples
1244    ///
1245    /// ```
1246    /// use libmagic_rs::parser::ast::Operator;
1247    ///
1248    /// let op = Operator::NotEqual;
1249    /// assert_eq!(op, Operator::NotEqual);
1250    /// ```
1251    NotEqual,
1252    /// Less-than comparison (`<`)
1253    ///
1254    /// # Examples
1255    ///
1256    /// ```
1257    /// use libmagic_rs::parser::ast::Operator;
1258    ///
1259    /// let op = Operator::LessThan;
1260    /// assert_eq!(op, Operator::LessThan);
1261    /// ```
1262    LessThan,
1263    /// Greater-than comparison (`>`)
1264    ///
1265    /// # Examples
1266    ///
1267    /// ```
1268    /// use libmagic_rs::parser::ast::Operator;
1269    ///
1270    /// let op = Operator::GreaterThan;
1271    /// assert_eq!(op, Operator::GreaterThan);
1272    /// ```
1273    GreaterThan,
1274    /// Less-than-or-equal comparison (`<=`)
1275    ///
1276    /// # Examples
1277    ///
1278    /// ```
1279    /// use libmagic_rs::parser::ast::Operator;
1280    ///
1281    /// let op = Operator::LessEqual;
1282    /// assert_eq!(op, Operator::LessEqual);
1283    /// ```
1284    LessEqual,
1285    /// Greater-than-or-equal comparison (`>=`)
1286    ///
1287    /// # Examples
1288    ///
1289    /// ```
1290    /// use libmagic_rs::parser::ast::Operator;
1291    ///
1292    /// let op = Operator::GreaterEqual;
1293    /// assert_eq!(op, Operator::GreaterEqual);
1294    /// ```
1295    GreaterEqual,
1296    /// Bitwise AND operation without mask (`&`)
1297    ///
1298    /// # Examples
1299    ///
1300    /// ```
1301    /// use libmagic_rs::parser::ast::Operator;
1302    ///
1303    /// let op = Operator::BitwiseAnd;
1304    /// assert_eq!(op, Operator::BitwiseAnd);
1305    /// ```
1306    BitwiseAnd,
1307    /// Bitwise AND operation with mask value (`&` with a mask operand)
1308    ///
1309    /// # Examples
1310    ///
1311    /// ```
1312    /// use libmagic_rs::parser::ast::Operator;
1313    ///
1314    /// let op = Operator::BitwiseAndMask(0xFF00);
1315    /// assert_eq!(op, Operator::BitwiseAndMask(0xFF00));
1316    /// ```
1317    BitwiseAndMask(u64),
1318    /// Bitwise XOR operation (`^`)
1319    ///
1320    /// # Examples
1321    ///
1322    /// ```
1323    /// use libmagic_rs::parser::ast::Operator;
1324    ///
1325    /// let op = Operator::BitwiseXor;
1326    /// assert_eq!(op, Operator::BitwiseXor);
1327    /// ```
1328    BitwiseXor,
1329    /// Bitwise NOT/complement operation (`~`)
1330    ///
1331    /// # Examples
1332    ///
1333    /// ```
1334    /// use libmagic_rs::parser::ast::Operator;
1335    ///
1336    /// let op = Operator::BitwiseNot;
1337    /// assert_eq!(op, Operator::BitwiseNot);
1338    /// ```
1339    BitwiseNot,
1340    /// Match any value; condition always succeeds (`x`)
1341    ///
1342    /// # Examples
1343    ///
1344    /// ```
1345    /// use libmagic_rs::parser::ast::Operator;
1346    ///
1347    /// let op = Operator::AnyValue;
1348    /// assert_eq!(op, Operator::AnyValue);
1349    /// ```
1350    AnyValue,
1351}
1352
1353/// Value types for rule matching
1354#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
1355#[non_exhaustive]
1356pub enum Value {
1357    /// Unsigned integer value
1358    ///
1359    /// # Examples
1360    ///
1361    /// ```
1362    /// use libmagic_rs::parser::ast::Value;
1363    ///
1364    /// let val = Value::Uint(0xDEAD_BEEF);
1365    /// assert_eq!(val, Value::Uint(0xDEAD_BEEF));
1366    /// ```
1367    Uint(u64),
1368    /// Signed integer value
1369    ///
1370    /// # Examples
1371    ///
1372    /// ```
1373    /// use libmagic_rs::parser::ast::Value;
1374    ///
1375    /// let val = Value::Int(-42);
1376    /// assert_eq!(val, Value::Int(-42));
1377    /// ```
1378    Int(i64),
1379    /// Floating-point value (used for `float` and `double` types)
1380    ///
1381    /// # Examples
1382    ///
1383    /// ```
1384    /// use libmagic_rs::parser::ast::Value;
1385    ///
1386    /// let val = Value::Float(3.14);
1387    /// assert_eq!(val, Value::Float(3.14));
1388    /// ```
1389    Float(f64),
1390    /// Byte sequence
1391    ///
1392    /// # Examples
1393    ///
1394    /// ```
1395    /// use libmagic_rs::parser::ast::Value;
1396    ///
1397    /// let val = Value::Bytes(vec![0x7f, 0x45, 0x4c, 0x46]);
1398    /// assert_eq!(val, Value::Bytes(vec![0x7f, 0x45, 0x4c, 0x46]));
1399    /// ```
1400    Bytes(Vec<u8>),
1401    /// String value
1402    ///
1403    /// # Examples
1404    ///
1405    /// ```
1406    /// use libmagic_rs::parser::ast::Value;
1407    ///
1408    /// let val = Value::String("MZ".to_string());
1409    /// assert_eq!(val, Value::String("MZ".to_string()));
1410    /// ```
1411    String(String),
1412}
1413
1414/// Endianness specification for multi-byte values
1415#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
1416pub enum Endianness {
1417    /// Little-endian byte order (least significant byte first)
1418    ///
1419    /// # Examples
1420    ///
1421    /// ```
1422    /// use libmagic_rs::parser::ast::Endianness;
1423    ///
1424    /// let e = Endianness::Little;
1425    /// assert_eq!(e, Endianness::Little);
1426    /// ```
1427    Little,
1428    /// Big-endian byte order (most significant byte first)
1429    ///
1430    /// # Examples
1431    ///
1432    /// ```
1433    /// use libmagic_rs::parser::ast::Endianness;
1434    ///
1435    /// let e = Endianness::Big;
1436    /// assert_eq!(e, Endianness::Big);
1437    /// ```
1438    Big,
1439    /// Native system byte order (matches target architecture)
1440    ///
1441    /// # Examples
1442    ///
1443    /// ```
1444    /// use libmagic_rs::parser::ast::Endianness;
1445    ///
1446    /// let e = Endianness::Native;
1447    /// assert_eq!(e, Endianness::Native);
1448    /// ```
1449    Native,
1450}
1451
1452/// Strength modifier for magic rules
1453///
1454/// Strength modifiers adjust the default strength calculation for a rule.
1455/// They are specified using the `!:strength` directive in magic files.
1456///
1457/// # Examples
1458///
1459/// ```
1460/// use libmagic_rs::parser::ast::StrengthModifier;
1461///
1462/// let add = StrengthModifier::Add(10);      // !:strength +10
1463/// let sub = StrengthModifier::Subtract(5);  // !:strength -5
1464/// let mul = StrengthModifier::Multiply(2);  // !:strength *2
1465/// let div = StrengthModifier::Divide(2);    // !:strength /2
1466/// let set = StrengthModifier::Set(50);      // !:strength =50
1467/// ```
1468#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
1469pub enum StrengthModifier {
1470    /// Add to the default strength: `!:strength +N`
1471    Add(i32),
1472    /// Subtract from the default strength: `!:strength -N`
1473    Subtract(i32),
1474    /// Multiply the default strength: `!:strength *N`
1475    Multiply(i32),
1476    /// Divide the default strength: `!:strength /N`
1477    Divide(i32),
1478    /// Set strength to an absolute value: `!:strength =N` or `!:strength N`
1479    Set(i32),
1480}
1481
1482/// Arithmetic operation applied to a value read from the file *before* the
1483/// rule's comparison operator is evaluated.
1484///
1485/// magic(5) supports `+`, `-`, `*`, `/`, `%`, `|`, and `^` between the type
1486/// keyword and the comparison value (e.g., `lelong+1 x volume %d` reads a
1487/// long, adds 1, and formats the transformed value into the message).
1488/// Bitwise AND (`&MASK`) is *not* part of this enum because it is already
1489/// represented at the operator level via [`Operator::BitwiseAndMask`].
1490///
1491/// The operand is signed (`i64`) so that subtraction and negative multipliers
1492/// round-trip cleanly. Bitwise ops reinterpret the operand as a `u64` bit
1493/// pattern at evaluation time, matching libmagic's `apprentice.c::mconvert`.
1494#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
1495#[non_exhaustive]
1496pub enum ValueTransformOp {
1497    /// Addition (`type+N`).
1498    Add,
1499    /// Subtraction (`type-N`).
1500    Sub,
1501    /// Multiplication (`type*N`).
1502    Mul,
1503    /// Truncating integer division (`type/N`). Division by zero is rejected
1504    /// at evaluation time.
1505    Div,
1506    /// Remainder (`type%N`). Modulo by zero is rejected at evaluation time.
1507    Mod,
1508    /// Bitwise AND (`type&N`).
1509    ///
1510    /// magic(5) `&MASK` was historically encoded at the operator level
1511    /// via [`Operator::BitwiseAndMask`] (which combines mask+equal in
1512    /// one step). That encoding cannot represent rules like `lelong&0xff
1513    /// x %d` (mask + any-value, with the masked value used in format
1514    /// substitution). The parser promotes `&MASK` to this `BitAnd`
1515    /// transform when followed by another operator (`x`, `>`, `!=`, ...)
1516    /// so the read value is masked before comparison and before printf
1517    /// substitution. The legacy `&MASK VALUE` form (mask + implicit
1518    /// equal) keeps using `Operator::BitwiseAndMask` for backwards
1519    /// compatibility.
1520    BitAnd,
1521    /// Bitwise OR (`type|N`).
1522    Or,
1523    /// Bitwise XOR (`type^N`).
1524    Xor,
1525}
1526
1527/// A pre-comparison value transform: `(op, operand)`.
1528///
1529/// Applied to the value read from the file before the rule's comparison
1530/// operator runs. See [`ValueTransformOp`] for the supported operations.
1531///
1532/// # Examples
1533///
1534/// ```
1535/// use libmagic_rs::parser::ast::{ValueTransform, ValueTransformOp};
1536///
1537/// // `lelong+1` -> add 1 to the read value
1538/// let t = ValueTransform::new(ValueTransformOp::Add, 1);
1539/// assert_eq!(t.op, ValueTransformOp::Add);
1540/// assert_eq!(t.operand, 1);
1541/// ```
1542#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
1543#[non_exhaustive]
1544pub struct ValueTransform {
1545    /// Operation to apply.
1546    pub op: ValueTransformOp,
1547    /// Operand to combine with the read value.
1548    pub operand: i64,
1549}
1550
1551impl ValueTransform {
1552    /// Construct a new `ValueTransform` from an op and an operand.
1553    #[must_use]
1554    pub const fn new(op: ValueTransformOp, operand: i64) -> Self {
1555        Self { op, operand }
1556    }
1557}
1558
1559/// Magic rule representation in the AST
1560#[derive(Debug, Clone, Serialize, Deserialize)]
1561#[non_exhaustive]
1562pub struct MagicRule {
1563    /// Offset specification for where to read data
1564    pub offset: OffsetSpec,
1565    /// Type of data to read and interpret
1566    pub typ: TypeKind,
1567    /// Comparison operator to apply
1568    pub op: Operator,
1569    /// Expected value for comparison
1570    pub value: Value,
1571    /// Human-readable message for this rule
1572    pub message: String,
1573    /// Child rules that are evaluated if this rule matches
1574    pub children: Vec<MagicRule>,
1575    /// Indentation level for hierarchical rules
1576    pub level: u32,
1577    /// Optional strength modifier from `!:strength` directive
1578    pub strength_modifier: Option<StrengthModifier>,
1579    /// Optional pre-comparison value transform from a magic-file
1580    /// type-suffix like `lelong+1` or `ulequad/1073741824`. When set,
1581    /// the read value is transformed *before* `op` is evaluated and
1582    /// before the message's `%`-format substitution, so format
1583    /// specifiers see the post-transform number.
1584    ///
1585    /// `#[serde(default)]` keeps existing serialized AST snapshots
1586    /// (which never had this field) round-tripping correctly: missing
1587    /// fields deserialize to `None`, which means "no transform" --
1588    /// the historical behavior.
1589    #[serde(default)]
1590    pub value_transform: Option<ValueTransform>,
1591}
1592
1593/// Validation errors returned by [`MagicRule::validate`].
1594#[derive(Debug, thiserror::Error, PartialEq, Eq)]
1595#[non_exhaustive]
1596pub enum MagicRuleValidationError {
1597    /// Rule message is empty. Messages are user-facing and required
1598    /// for meaningful output.
1599    #[error("rule message must not be empty")]
1600    EmptyMessage,
1601
1602    /// The child rule at `child_index` has `level <= self.level`,
1603    /// violating the "children must nest deeper than the parent"
1604    /// invariant of the hierarchical indentation-based DSL.
1605    #[error(
1606        "child rule at index {child_index} has level {child_level}, \
1607         must be greater than parent level {parent_level}"
1608    )]
1609    InvalidChildLevel {
1610        /// Index of the offending child in `self.children`.
1611        child_index: usize,
1612        /// Level of the child rule.
1613        child_level: u32,
1614        /// Level of the parent rule.
1615        parent_level: u32,
1616    },
1617
1618    /// Rule `level` exceeds the maximum supported depth. The limit is a
1619    /// hardening mechanism against stack overflow during deep recursion;
1620    /// libmagic files in the wild rarely go beyond 10 levels.
1621    #[error("rule level {level} exceeds maximum supported depth {max}")]
1622    LevelTooDeep {
1623        /// The invalid level value.
1624        level: u32,
1625        /// The maximum allowed depth.
1626        max: u32,
1627    },
1628}
1629
1630impl MagicRule {
1631    /// Hard structural ceiling on rule `level`.
1632    ///
1633    /// This is a conservative upper bound enforced by
1634    /// [`MagicRule::validate`] to keep the AST shape sane: real
1635    /// magic files in the wild rarely exceed ~10 levels of nesting,
1636    /// so rejecting rules with `level > 1000` catches obviously
1637    /// pathological input at construction time without constraining
1638    /// any legitimate rule.
1639    ///
1640    /// This ceiling is **independent of** the evaluator's
1641    /// `EvaluationConfig::max_recursion_depth` (default 20), which
1642    /// is the *runtime* recursion guard applied during rule
1643    /// evaluation. The evaluator limit is the first one that fires
1644    /// in practice -- a rule tree with 50 levels passes this
1645    /// structural check but is aborted by the evaluator long before
1646    /// reaching `MAX_LEVEL`. The two limits serve different purposes:
1647    /// `MAX_LEVEL` is an AST-shape sanity check, and
1648    /// `max_recursion_depth` is a per-evaluation resource bound.
1649    pub const MAX_LEVEL: u32 = 1000;
1650
1651    /// Construct a top-level rule with no children and no strength
1652    /// modifier.
1653    ///
1654    /// This is the most common constructor for programmatically building
1655    /// rules outside the parser. To add children, mutate
1656    /// [`MagicRule::children`] directly, or use [`MagicRule::with_children`].
1657    /// To set a strength modifier, use
1658    /// [`MagicRule::with_strength_modifier`].
1659    ///
1660    /// # Examples
1661    ///
1662    /// ```rust
1663    /// use libmagic_rs::{MagicRule, OffsetSpec, Operator, TypeKind, Value};
1664    ///
1665    /// let rule = MagicRule::new(
1666    ///     OffsetSpec::Absolute(0),
1667    ///     TypeKind::Byte { signed: false },
1668    ///     Operator::Equal,
1669    ///     Value::Uint(0x7f),
1670    ///     "ELF magic byte".to_string(),
1671    /// );
1672    /// assert_eq!(rule.level, 0);
1673    /// assert!(rule.children.is_empty());
1674    /// assert!(rule.validate().is_ok());
1675    /// ```
1676    #[must_use]
1677    pub fn new(
1678        offset: OffsetSpec,
1679        typ: TypeKind,
1680        op: Operator,
1681        value: Value,
1682        message: String,
1683    ) -> Self {
1684        Self {
1685            offset,
1686            typ,
1687            op,
1688            value,
1689            message,
1690            children: vec![],
1691            level: 0,
1692            strength_modifier: None,
1693            value_transform: None,
1694        }
1695    }
1696
1697    /// Replace `self.children` with the given children and return the
1698    /// modified rule. Builder-style for chaining.
1699    #[must_use]
1700    pub fn with_children(mut self, children: Vec<MagicRule>) -> Self {
1701        self.children = children;
1702        self
1703    }
1704
1705    /// Set `self.strength_modifier` to the given value and return the
1706    /// modified rule. Builder-style for chaining.
1707    #[must_use]
1708    pub const fn with_strength_modifier(mut self, modifier: StrengthModifier) -> Self {
1709        self.strength_modifier = Some(modifier);
1710        self
1711    }
1712
1713    /// Set `self.level` to the given value and return the modified rule.
1714    /// Builder-style for chaining; typically used only when constructing
1715    /// child rules programmatically.
1716    #[must_use]
1717    pub const fn with_level(mut self, level: u32) -> Self {
1718        self.level = level;
1719        self
1720    }
1721
1722    /// Validate structural invariants of the rule.
1723    ///
1724    /// This checks invariants that the parser enforces automatically but
1725    /// that programmatic constructors (especially via serde deserialize)
1726    /// can violate:
1727    ///
1728    /// * Message must not be empty.
1729    /// * `level` must not exceed [`Self::MAX_LEVEL`].
1730    /// * Every child's `level` must be strictly greater than
1731    ///   `self.level`, and each child must recursively validate.
1732    ///
1733    /// This does *not* validate that `value` is shape-compatible with
1734    /// `typ` (e.g., a `Value::Uint` against a `TypeKind::String`); such
1735    /// mismatches are coerced or rejected by the evaluator at match time.
1736    ///
1737    /// # Errors
1738    ///
1739    /// Returns [`MagicRuleValidationError`] describing the first
1740    /// invariant violation encountered.
1741    ///
1742    /// # Examples
1743    ///
1744    /// ```rust
1745    /// use libmagic_rs::{MagicRule, OffsetSpec, Operator, TypeKind, Value};
1746    ///
1747    /// let rule = MagicRule::new(
1748    ///     OffsetSpec::Absolute(0),
1749    ///     TypeKind::Byte { signed: false },
1750    ///     Operator::Equal,
1751    ///     Value::Uint(0),
1752    ///     "zero byte".to_string(),
1753    /// );
1754    /// assert!(rule.validate().is_ok());
1755    /// ```
1756    pub fn validate(&self) -> Result<(), MagicRuleValidationError> {
1757        if self.message.is_empty() {
1758            return Err(MagicRuleValidationError::EmptyMessage);
1759        }
1760        if self.level > Self::MAX_LEVEL {
1761            return Err(MagicRuleValidationError::LevelTooDeep {
1762                level: self.level,
1763                max: Self::MAX_LEVEL,
1764            });
1765        }
1766        for (child_index, child) in self.children.iter().enumerate() {
1767            if child.level <= self.level {
1768                return Err(MagicRuleValidationError::InvalidChildLevel {
1769                    child_index,
1770                    child_level: child.level,
1771                    parent_level: self.level,
1772                });
1773            }
1774            child.validate()?;
1775        }
1776        Ok(())
1777    }
1778}
1779
1780#[cfg(test)]
1781mod tests {
1782    use super::*;
1783
1784    #[test]
1785    fn test_magic_rule_new_defaults() {
1786        let rule = MagicRule::new(
1787            OffsetSpec::Absolute(0),
1788            TypeKind::Byte { signed: false },
1789            Operator::Equal,
1790            Value::Uint(0x7f),
1791            "ELF".to_string(),
1792        );
1793        assert_eq!(rule.level, 0);
1794        assert!(rule.children.is_empty());
1795        assert!(rule.strength_modifier.is_none());
1796        assert!(rule.validate().is_ok());
1797    }
1798
1799    #[test]
1800    fn test_magic_rule_builder_chain() {
1801        let child = MagicRule::new(
1802            OffsetSpec::Absolute(4),
1803            TypeKind::Byte { signed: false },
1804            Operator::Equal,
1805            Value::Uint(2),
1806            "64-bit".to_string(),
1807        )
1808        .with_level(1);
1809        let parent = MagicRule::new(
1810            OffsetSpec::Absolute(0),
1811            TypeKind::Byte { signed: false },
1812            Operator::Equal,
1813            Value::Uint(0x7f),
1814            "ELF".to_string(),
1815        )
1816        .with_children(vec![child])
1817        .with_strength_modifier(StrengthModifier::Add(10));
1818        assert_eq!(parent.children.len(), 1);
1819        assert_eq!(parent.strength_modifier, Some(StrengthModifier::Add(10)));
1820        assert!(parent.validate().is_ok());
1821    }
1822
1823    #[test]
1824    fn test_magic_rule_validate_empty_message_rejected() {
1825        let rule = MagicRule::new(
1826            OffsetSpec::Absolute(0),
1827            TypeKind::Byte { signed: false },
1828            Operator::Equal,
1829            Value::Uint(0),
1830            String::new(),
1831        );
1832        assert_eq!(rule.validate(), Err(MagicRuleValidationError::EmptyMessage));
1833    }
1834
1835    #[test]
1836    fn test_magic_rule_validate_child_level_must_be_deeper() {
1837        let child_same_level = MagicRule::new(
1838            OffsetSpec::Absolute(4),
1839            TypeKind::Byte { signed: false },
1840            Operator::Equal,
1841            Value::Uint(2),
1842            "child".to_string(),
1843        ); // level = 0, same as parent
1844        let parent = MagicRule::new(
1845            OffsetSpec::Absolute(0),
1846            TypeKind::Byte { signed: false },
1847            Operator::Equal,
1848            Value::Uint(0x7f),
1849            "parent".to_string(),
1850        )
1851        .with_children(vec![child_same_level]);
1852        assert_eq!(
1853            parent.validate(),
1854            Err(MagicRuleValidationError::InvalidChildLevel {
1855                child_index: 0,
1856                child_level: 0,
1857                parent_level: 0,
1858            })
1859        );
1860    }
1861
1862    #[test]
1863    fn test_magic_rule_validate_level_too_deep() {
1864        let rule = MagicRule::new(
1865            OffsetSpec::Absolute(0),
1866            TypeKind::Byte { signed: false },
1867            Operator::Equal,
1868            Value::Uint(0),
1869            "deep".to_string(),
1870        )
1871        .with_level(MagicRule::MAX_LEVEL + 1);
1872        assert_eq!(
1873            rule.validate(),
1874            Err(MagicRuleValidationError::LevelTooDeep {
1875                level: MagicRule::MAX_LEVEL + 1,
1876                max: MagicRule::MAX_LEVEL,
1877            })
1878        );
1879    }
1880
1881    #[test]
1882    fn test_offset_spec_absolute() {
1883        let offset = OffsetSpec::Absolute(42);
1884        assert_eq!(offset, OffsetSpec::Absolute(42));
1885
1886        // Test negative offset
1887        let negative = OffsetSpec::Absolute(-10);
1888        assert_eq!(negative, OffsetSpec::Absolute(-10));
1889    }
1890
1891    #[test]
1892    fn test_offset_spec_indirect() {
1893        let indirect = OffsetSpec::Indirect {
1894            base_offset: 0x20,
1895            base_relative: false,
1896            pointer_type: TypeKind::Long {
1897                endian: Endianness::Little,
1898                signed: false,
1899            },
1900            adjustment: 4,
1901            adjustment_op: IndirectAdjustmentOp::Add,
1902            result_relative: false,
1903            endian: Endianness::Little,
1904        };
1905
1906        match indirect {
1907            OffsetSpec::Indirect {
1908                base_offset,
1909                adjustment,
1910                ..
1911            } => {
1912                assert_eq!(base_offset, 0x20);
1913                assert_eq!(adjustment, 4);
1914            }
1915            _ => panic!("Expected Indirect variant"),
1916        }
1917    }
1918
1919    #[test]
1920    fn test_offset_spec_relative() {
1921        let relative = OffsetSpec::Relative(8);
1922        assert_eq!(relative, OffsetSpec::Relative(8));
1923
1924        // Test negative relative offset
1925        let negative_relative = OffsetSpec::Relative(-4);
1926        assert_eq!(negative_relative, OffsetSpec::Relative(-4));
1927    }
1928
1929    #[test]
1930    fn test_offset_spec_from_end() {
1931        let from_end = OffsetSpec::FromEnd(-16);
1932        assert_eq!(from_end, OffsetSpec::FromEnd(-16));
1933
1934        // Test positive from_end (though unusual)
1935        let positive_from_end = OffsetSpec::FromEnd(8);
1936        assert_eq!(positive_from_end, OffsetSpec::FromEnd(8));
1937    }
1938
1939    #[test]
1940    fn test_offset_spec_debug() {
1941        let offset = OffsetSpec::Absolute(100);
1942        let debug_str = format!("{offset:?}");
1943        assert!(debug_str.contains("Absolute"));
1944        assert!(debug_str.contains("100"));
1945    }
1946
1947    #[test]
1948    fn test_offset_spec_clone() {
1949        let original = OffsetSpec::Indirect {
1950            base_offset: 0x10,
1951            base_relative: false,
1952            pointer_type: TypeKind::Short {
1953                endian: Endianness::Big,
1954                signed: true,
1955            },
1956            adjustment: -2,
1957            adjustment_op: IndirectAdjustmentOp::Add,
1958            result_relative: false,
1959            endian: Endianness::Big,
1960        };
1961
1962        let cloned = original.clone();
1963        assert_eq!(original, cloned);
1964    }
1965
1966    #[test]
1967    fn test_offset_spec_serialization() {
1968        let offset = OffsetSpec::Absolute(42);
1969
1970        // Test JSON serialization
1971        let json = serde_json::to_string(&offset).expect("Failed to serialize");
1972        let deserialized: OffsetSpec = serde_json::from_str(&json).expect("Failed to deserialize");
1973
1974        assert_eq!(offset, deserialized);
1975    }
1976
1977    #[test]
1978    fn test_offset_spec_indirect_serialization() {
1979        let indirect = OffsetSpec::Indirect {
1980            base_offset: 0x100,
1981            base_relative: false,
1982            pointer_type: TypeKind::Long {
1983                endian: Endianness::Native,
1984                signed: false,
1985            },
1986            adjustment: 12,
1987            adjustment_op: IndirectAdjustmentOp::Add,
1988            result_relative: false,
1989            endian: Endianness::Native,
1990        };
1991
1992        // Test JSON serialization for complex variant
1993        let json = serde_json::to_string(&indirect).expect("Failed to serialize");
1994        let deserialized: OffsetSpec = serde_json::from_str(&json).expect("Failed to deserialize");
1995
1996        assert_eq!(indirect, deserialized);
1997    }
1998
1999    #[test]
2000    fn test_all_offset_spec_variants() {
2001        let variants = [
2002            OffsetSpec::Absolute(0),
2003            OffsetSpec::Absolute(-100),
2004            OffsetSpec::Indirect {
2005                base_offset: 0x20,
2006                base_relative: false,
2007                pointer_type: TypeKind::Byte { signed: true },
2008                adjustment: 0,
2009                adjustment_op: IndirectAdjustmentOp::Add,
2010                result_relative: false,
2011                endian: Endianness::Little,
2012            },
2013            OffsetSpec::Relative(50),
2014            OffsetSpec::Relative(-25),
2015            OffsetSpec::FromEnd(-8),
2016            OffsetSpec::FromEnd(4),
2017        ];
2018
2019        // Test that all variants can be created and are distinct
2020        for (i, variant) in variants.iter().enumerate() {
2021            for (j, other) in variants.iter().enumerate() {
2022                if i != j {
2023                    assert_ne!(
2024                        variant, other,
2025                        "Variants at indices {i} and {j} should be different"
2026                    );
2027                }
2028            }
2029        }
2030    }
2031
2032    #[test]
2033    fn test_endianness_variants() {
2034        let endianness_values = vec![Endianness::Little, Endianness::Big, Endianness::Native];
2035
2036        for endian in endianness_values {
2037            let indirect = OffsetSpec::Indirect {
2038                base_offset: 0,
2039                base_relative: false,
2040                pointer_type: TypeKind::Long {
2041                    endian,
2042                    signed: false,
2043                },
2044                adjustment: 0,
2045                adjustment_op: IndirectAdjustmentOp::Add,
2046                result_relative: false,
2047                endian,
2048            };
2049
2050            // Verify the endianness is preserved
2051            match indirect {
2052                OffsetSpec::Indirect {
2053                    endian: actual_endian,
2054                    ..
2055                } => {
2056                    assert_eq!(endian, actual_endian);
2057                }
2058                _ => panic!("Expected Indirect variant"),
2059            }
2060        }
2061    }
2062
2063    // Value enum tests
2064    #[test]
2065    fn test_value_uint() {
2066        let value = Value::Uint(42);
2067        assert_eq!(value, Value::Uint(42));
2068
2069        // Test large values
2070        let large_value = Value::Uint(u64::MAX);
2071        assert_eq!(large_value, Value::Uint(u64::MAX));
2072    }
2073
2074    #[test]
2075    fn test_value_int() {
2076        let positive = Value::Int(100);
2077        assert_eq!(positive, Value::Int(100));
2078
2079        let negative = Value::Int(-50);
2080        assert_eq!(negative, Value::Int(-50));
2081
2082        // Test extreme values
2083        let max_int = Value::Int(i64::MAX);
2084        let min_int = Value::Int(i64::MIN);
2085        assert_eq!(max_int, Value::Int(i64::MAX));
2086        assert_eq!(min_int, Value::Int(i64::MIN));
2087    }
2088
2089    #[test]
2090    fn test_value_bytes() {
2091        let empty_bytes = Value::Bytes(vec![]);
2092        assert_eq!(empty_bytes, Value::Bytes(vec![]));
2093
2094        let some_bytes = Value::Bytes(vec![0x7f, 0x45, 0x4c, 0x46]);
2095        assert_eq!(some_bytes, Value::Bytes(vec![0x7f, 0x45, 0x4c, 0x46]));
2096
2097        // Test that different byte sequences are not equal
2098        let other_bytes = Value::Bytes(vec![0x50, 0x4b, 0x03, 0x04]);
2099        assert_ne!(some_bytes, other_bytes);
2100    }
2101
2102    #[test]
2103    fn test_value_string() {
2104        let empty_string = Value::String(String::new());
2105        assert_eq!(empty_string, Value::String(String::new()));
2106
2107        let hello = Value::String("Hello, World!".to_string());
2108        assert_eq!(hello, Value::String("Hello, World!".to_string()));
2109
2110        // Test Unicode strings
2111        let unicode = Value::String("🦀 Rust".to_string());
2112        assert_eq!(unicode, Value::String("🦀 Rust".to_string()));
2113    }
2114
2115    #[test]
2116    fn test_value_comparison() {
2117        // Test that different value types are not equal
2118        let uint_val = Value::Uint(42);
2119        let int_val = Value::Int(42);
2120        let float_val = Value::Float(42.0);
2121        let bytes_val = Value::Bytes(vec![42]);
2122        let string_val = Value::String("42".to_string());
2123
2124        assert_ne!(uint_val, int_val);
2125        assert_ne!(uint_val, float_val);
2126        assert_ne!(uint_val, bytes_val);
2127        assert_ne!(uint_val, string_val);
2128        assert_ne!(int_val, float_val);
2129        assert_ne!(int_val, bytes_val);
2130        assert_ne!(int_val, string_val);
2131        assert_ne!(float_val, bytes_val);
2132        assert_ne!(float_val, string_val);
2133        assert_ne!(bytes_val, string_val);
2134    }
2135
2136    #[test]
2137    fn test_value_debug() {
2138        let uint_val = Value::Uint(123);
2139        let debug_str = format!("{uint_val:?}");
2140        assert!(debug_str.contains("Uint"));
2141        assert!(debug_str.contains("123"));
2142
2143        let string_val = Value::String("test".to_string());
2144        let debug_str = format!("{string_val:?}");
2145        assert!(debug_str.contains("String"));
2146        assert!(debug_str.contains("test"));
2147    }
2148
2149    #[test]
2150    fn test_value_clone() {
2151        let original = Value::Bytes(vec![1, 2, 3, 4]);
2152        let cloned = original.clone();
2153        assert_eq!(original, cloned);
2154
2155        // Verify they are independent copies
2156        match (original, cloned) {
2157            (Value::Bytes(orig_bytes), Value::Bytes(cloned_bytes)) => {
2158                assert_eq!(orig_bytes, cloned_bytes);
2159                // They should have the same content but be different Vec instances
2160            }
2161            _ => panic!("Expected Bytes variants"),
2162        }
2163    }
2164
2165    #[test]
2166    fn test_value_float() {
2167        let value = Value::Float(3.125);
2168        assert_eq!(value, Value::Float(3.125));
2169
2170        let negative = Value::Float(-1.5);
2171        assert_eq!(negative, Value::Float(-1.5));
2172
2173        let zero = Value::Float(0.0);
2174        assert_eq!(zero, Value::Float(0.0));
2175    }
2176
2177    #[test]
2178    fn test_value_serialization() {
2179        let values = vec![
2180            Value::Uint(42),
2181            Value::Int(-100),
2182            Value::Float(3.125),
2183            Value::Bytes(vec![0x7f, 0x45, 0x4c, 0x46]),
2184            Value::String("ELF executable".to_string()),
2185        ];
2186
2187        for value in values {
2188            // Test JSON serialization
2189            let json = serde_json::to_string(&value).expect("Failed to serialize Value");
2190            let deserialized: Value =
2191                serde_json::from_str(&json).expect("Failed to deserialize Value");
2192            assert_eq!(value, deserialized);
2193        }
2194    }
2195
2196    #[test]
2197    fn test_value_serialization_edge_cases() {
2198        // Test empty collections
2199        let empty_bytes = Value::Bytes(vec![]);
2200        let json = serde_json::to_string(&empty_bytes).expect("Failed to serialize empty bytes");
2201        let deserialized: Value =
2202            serde_json::from_str(&json).expect("Failed to deserialize empty bytes");
2203        assert_eq!(empty_bytes, deserialized);
2204
2205        let empty_string = Value::String(String::new());
2206        let json = serde_json::to_string(&empty_string).expect("Failed to serialize empty string");
2207        let deserialized: Value =
2208            serde_json::from_str(&json).expect("Failed to deserialize empty string");
2209        assert_eq!(empty_string, deserialized);
2210
2211        // Test extreme values
2212        let max_uint = Value::Uint(u64::MAX);
2213        let json = serde_json::to_string(&max_uint).expect("Failed to serialize max uint");
2214        let deserialized: Value =
2215            serde_json::from_str(&json).expect("Failed to deserialize max uint");
2216        assert_eq!(max_uint, deserialized);
2217
2218        let min_int = Value::Int(i64::MIN);
2219        let json = serde_json::to_string(&min_int).expect("Failed to serialize min int");
2220        let deserialized: Value =
2221            serde_json::from_str(&json).expect("Failed to deserialize min int");
2222        assert_eq!(min_int, deserialized);
2223    }
2224
2225    // TypeKind tests
2226    #[test]
2227    fn test_type_kind_byte() {
2228        let byte_type = TypeKind::Byte { signed: true };
2229        assert_eq!(byte_type, TypeKind::Byte { signed: true });
2230    }
2231
2232    #[test]
2233    fn test_type_kind_short() {
2234        let short_little_endian = TypeKind::Short {
2235            endian: Endianness::Little,
2236            signed: false,
2237        };
2238        let short_big_endian = TypeKind::Short {
2239            endian: Endianness::Big,
2240            signed: true,
2241        };
2242
2243        assert_ne!(short_little_endian, short_big_endian);
2244        assert_eq!(short_little_endian, short_little_endian.clone());
2245    }
2246
2247    #[test]
2248    fn test_type_kind_long() {
2249        let long_native = TypeKind::Long {
2250            endian: Endianness::Native,
2251            signed: true,
2252        };
2253
2254        match long_native {
2255            TypeKind::Long { endian, signed } => {
2256                assert_eq!(endian, Endianness::Native);
2257                assert!(signed);
2258            }
2259            _ => panic!("Expected Long variant"),
2260        }
2261    }
2262
2263    #[test]
2264    fn test_type_kind_string() {
2265        let unlimited_string = TypeKind::String {
2266            max_length: None,
2267            flags: StringFlags::default(),
2268        };
2269        let limited_string = TypeKind::String {
2270            max_length: Some(256),
2271            flags: StringFlags::default(),
2272        };
2273
2274        assert_ne!(unlimited_string, limited_string);
2275        assert_eq!(unlimited_string, unlimited_string.clone());
2276    }
2277
2278    #[test]
2279    fn test_type_kind_serialization() {
2280        let types = vec![
2281            TypeKind::Byte { signed: true },
2282            TypeKind::Short {
2283                endian: Endianness::Little,
2284                signed: false,
2285            },
2286            TypeKind::Long {
2287                endian: Endianness::Big,
2288                signed: true,
2289            },
2290            TypeKind::Quad {
2291                endian: Endianness::Little,
2292                signed: false,
2293            },
2294            TypeKind::Quad {
2295                endian: Endianness::Big,
2296                signed: true,
2297            },
2298            TypeKind::Float {
2299                endian: Endianness::Native,
2300            },
2301            TypeKind::Float {
2302                endian: Endianness::Big,
2303            },
2304            TypeKind::Double {
2305                endian: Endianness::Little,
2306            },
2307            TypeKind::Double {
2308                endian: Endianness::Native,
2309            },
2310            TypeKind::Date {
2311                endian: Endianness::Big,
2312                utc: true,
2313            },
2314            TypeKind::Date {
2315                endian: Endianness::Little,
2316                utc: false,
2317            },
2318            TypeKind::QDate {
2319                endian: Endianness::Native,
2320                utc: true,
2321            },
2322            TypeKind::QDate {
2323                endian: Endianness::Big,
2324                utc: false,
2325            },
2326            TypeKind::String {
2327                max_length: None,
2328                flags: StringFlags::default(),
2329            },
2330            TypeKind::String {
2331                max_length: Some(128),
2332                flags: StringFlags::default(),
2333            },
2334            TypeKind::PString {
2335                max_length: None,
2336                length_width: PStringLengthWidth::OneByte,
2337                length_includes_itself: false,
2338            },
2339            TypeKind::PString {
2340                max_length: Some(64),
2341                length_width: PStringLengthWidth::OneByte,
2342                length_includes_itself: false,
2343            },
2344            TypeKind::PString {
2345                max_length: None,
2346                length_width: PStringLengthWidth::TwoByteBE,
2347                length_includes_itself: true,
2348            },
2349            TypeKind::PString {
2350                max_length: Some(128),
2351                length_width: PStringLengthWidth::FourByteLE,
2352                length_includes_itself: false,
2353            },
2354        ];
2355
2356        for typ in types {
2357            let json = serde_json::to_string(&typ).expect("Failed to serialize TypeKind");
2358            let deserialized: TypeKind =
2359                serde_json::from_str(&json).expect("Failed to deserialize TypeKind");
2360            assert_eq!(typ, deserialized);
2361        }
2362    }
2363
2364    // Operator tests
2365    #[test]
2366    fn test_operator_variants() {
2367        let operators = [
2368            Operator::Equal,
2369            Operator::NotEqual,
2370            Operator::BitwiseAnd,
2371            Operator::BitwiseXor,
2372            Operator::BitwiseNot,
2373            Operator::AnyValue,
2374        ];
2375
2376        for (i, op) in operators.iter().enumerate() {
2377            for (j, other) in operators.iter().enumerate() {
2378                if i == j {
2379                    assert_eq!(op, other);
2380                } else {
2381                    assert_ne!(op, other);
2382                }
2383            }
2384        }
2385    }
2386
2387    #[test]
2388    fn test_operator_serialization() {
2389        let operators = vec![
2390            Operator::Equal,
2391            Operator::NotEqual,
2392            Operator::BitwiseAnd,
2393            Operator::BitwiseXor,
2394            Operator::BitwiseNot,
2395            Operator::AnyValue,
2396        ];
2397
2398        for op in operators {
2399            let json = serde_json::to_string(&op).expect("Failed to serialize Operator");
2400            let deserialized: Operator =
2401                serde_json::from_str(&json).expect("Failed to deserialize Operator");
2402            assert_eq!(op, deserialized);
2403        }
2404    }
2405
2406    // MagicRule tests
2407    #[test]
2408    fn test_magic_rule_creation() {
2409        let rule = MagicRule {
2410            offset: OffsetSpec::Absolute(0),
2411            typ: TypeKind::Byte { signed: true },
2412            op: Operator::Equal,
2413            value: Value::Uint(0x7f),
2414            message: "ELF magic".to_string(),
2415            children: vec![],
2416            level: 0,
2417            strength_modifier: None,
2418            value_transform: None,
2419        };
2420
2421        assert_eq!(rule.message, "ELF magic");
2422        assert_eq!(rule.level, 0);
2423        assert!(rule.children.is_empty());
2424    }
2425
2426    #[test]
2427    fn test_magic_rule_with_children() {
2428        let child_rule = MagicRule {
2429            offset: OffsetSpec::Absolute(4),
2430            typ: TypeKind::Byte { signed: true },
2431            op: Operator::Equal,
2432            value: Value::Uint(1),
2433            message: "32-bit".to_string(),
2434            children: vec![],
2435            level: 1,
2436            strength_modifier: None,
2437            value_transform: None,
2438        };
2439
2440        let parent_rule = MagicRule {
2441            offset: OffsetSpec::Absolute(0),
2442            typ: TypeKind::Long {
2443                endian: Endianness::Little,
2444                signed: false,
2445            },
2446            op: Operator::Equal,
2447            value: Value::Bytes(vec![0x7f, 0x45, 0x4c, 0x46]),
2448            message: "ELF executable".to_string(),
2449            children: vec![child_rule],
2450            level: 0,
2451            strength_modifier: None,
2452            value_transform: None,
2453        };
2454
2455        assert_eq!(parent_rule.children.len(), 1);
2456        assert_eq!(parent_rule.children[0].level, 1);
2457        assert_eq!(parent_rule.children[0].message, "32-bit");
2458    }
2459
2460    #[test]
2461    fn test_magic_rule_serialization() {
2462        let rule = MagicRule {
2463            offset: OffsetSpec::Absolute(16),
2464            typ: TypeKind::Short {
2465                endian: Endianness::Little,
2466                signed: false,
2467            },
2468            op: Operator::NotEqual,
2469            value: Value::Uint(0),
2470            message: "Non-zero short value".to_string(),
2471            children: vec![],
2472            level: 2,
2473            strength_modifier: None,
2474            value_transform: None,
2475        };
2476
2477        let json = serde_json::to_string(&rule).expect("Failed to serialize MagicRule");
2478        let deserialized: MagicRule =
2479            serde_json::from_str(&json).expect("Failed to deserialize MagicRule");
2480
2481        assert_eq!(rule.message, deserialized.message);
2482        assert_eq!(rule.level, deserialized.level);
2483        assert_eq!(rule.children.len(), deserialized.children.len());
2484    }
2485
2486    // StrengthModifier tests
2487    #[test]
2488    fn test_strength_modifier_variants() {
2489        let add = StrengthModifier::Add(10);
2490        let sub = StrengthModifier::Subtract(5);
2491        let mul = StrengthModifier::Multiply(2);
2492        let div = StrengthModifier::Divide(2);
2493        let set = StrengthModifier::Set(50);
2494
2495        // Test that each variant has the correct inner value
2496        assert_eq!(add, StrengthModifier::Add(10));
2497        assert_eq!(sub, StrengthModifier::Subtract(5));
2498        assert_eq!(mul, StrengthModifier::Multiply(2));
2499        assert_eq!(div, StrengthModifier::Divide(2));
2500        assert_eq!(set, StrengthModifier::Set(50));
2501
2502        // Test that different variants are not equal
2503        assert_ne!(add, sub);
2504        assert_ne!(mul, div);
2505        assert_ne!(set, add);
2506    }
2507
2508    #[test]
2509    fn test_strength_modifier_negative_values() {
2510        let add_negative = StrengthModifier::Add(-10);
2511        let sub_negative = StrengthModifier::Subtract(-5);
2512        let set_negative = StrengthModifier::Set(-50);
2513
2514        assert_eq!(add_negative, StrengthModifier::Add(-10));
2515        assert_eq!(sub_negative, StrengthModifier::Subtract(-5));
2516        assert_eq!(set_negative, StrengthModifier::Set(-50));
2517    }
2518
2519    #[test]
2520    fn test_strength_modifier_serialization() {
2521        let modifiers = vec![
2522            StrengthModifier::Add(10),
2523            StrengthModifier::Subtract(5),
2524            StrengthModifier::Multiply(2),
2525            StrengthModifier::Divide(3),
2526            StrengthModifier::Set(100),
2527        ];
2528
2529        for modifier in modifiers {
2530            let json =
2531                serde_json::to_string(&modifier).expect("Failed to serialize StrengthModifier");
2532            let deserialized: StrengthModifier =
2533                serde_json::from_str(&json).expect("Failed to deserialize StrengthModifier");
2534            assert_eq!(modifier, deserialized);
2535        }
2536    }
2537
2538    #[test]
2539    fn test_strength_modifier_debug() {
2540        let modifier = StrengthModifier::Add(25);
2541        let debug_str = format!("{modifier:?}");
2542        assert!(debug_str.contains("Add"));
2543        assert!(debug_str.contains("25"));
2544    }
2545
2546    #[test]
2547    fn test_strength_modifier_clone() {
2548        let original = StrengthModifier::Multiply(4);
2549        let cloned = original;
2550        assert_eq!(original, cloned);
2551    }
2552
2553    #[test]
2554    fn test_magic_rule_with_strength_modifier() {
2555        let rule = MagicRule {
2556            offset: OffsetSpec::Absolute(0),
2557            typ: TypeKind::Byte { signed: true },
2558            op: Operator::Equal,
2559            value: Value::Uint(0x7f),
2560            message: "ELF magic".to_string(),
2561            children: vec![],
2562            level: 0,
2563            strength_modifier: Some(StrengthModifier::Add(20)),
2564            value_transform: None,
2565        };
2566
2567        assert_eq!(rule.strength_modifier, Some(StrengthModifier::Add(20)));
2568
2569        // Test serialization with strength_modifier
2570        let json = serde_json::to_string(&rule).expect("Failed to serialize MagicRule");
2571        let deserialized: MagicRule =
2572            serde_json::from_str(&json).expect("Failed to deserialize MagicRule");
2573        assert_eq!(rule.strength_modifier, deserialized.strength_modifier);
2574    }
2575
2576    #[test]
2577    fn test_magic_rule_without_strength_modifier() {
2578        let rule = MagicRule {
2579            offset: OffsetSpec::Absolute(0),
2580            typ: TypeKind::Byte { signed: true },
2581            op: Operator::Equal,
2582            value: Value::Uint(0x7f),
2583            message: "ELF magic".to_string(),
2584            children: vec![],
2585            level: 0,
2586            strength_modifier: None,
2587            value_transform: None,
2588        };
2589
2590        assert_eq!(rule.strength_modifier, None);
2591    }
2592
2593    // MetaType tests
2594    #[test]
2595    fn test_meta_type_variants_debug_clone_eq() {
2596        let cases = [
2597            MetaType::Default,
2598            MetaType::Clear,
2599            MetaType::Indirect,
2600            MetaType::Offset,
2601            MetaType::Name("part2".to_string()),
2602            MetaType::Use("part2".to_string()),
2603        ];
2604
2605        for (i, variant) in cases.iter().enumerate() {
2606            // Debug formatting is non-empty
2607            let debug_str = format!("{variant:?}");
2608            assert!(
2609                !debug_str.is_empty(),
2610                "Debug format must be non-empty for variant at index {i}"
2611            );
2612
2613            // Clone round-trip preserves equality
2614            let cloned = variant.clone();
2615            assert_eq!(
2616                variant, &cloned,
2617                "Clone must preserve equality for variant at index {i}"
2618            );
2619
2620            // Distinct variants are not equal
2621            for (j, other) in cases.iter().enumerate() {
2622                if i == j {
2623                    assert_eq!(variant, other);
2624                } else {
2625                    assert_ne!(
2626                        variant, other,
2627                        "Variants at indices {i} and {j} must differ"
2628                    );
2629                }
2630            }
2631        }
2632    }
2633
2634    #[test]
2635    fn test_meta_type_serde_roundtrip() {
2636        let cases = [
2637            MetaType::Default,
2638            MetaType::Clear,
2639            MetaType::Indirect,
2640            MetaType::Offset,
2641            MetaType::Name("foo".to_string()),
2642            MetaType::Use("bar".to_string()),
2643        ];
2644
2645        for variant in cases {
2646            let json = serde_json::to_string(&variant).expect("serialize MetaType");
2647            let deserialized: MetaType = serde_json::from_str(&json).expect("deserialize MetaType");
2648            assert_eq!(variant, deserialized);
2649        }
2650    }
2651
2652    #[test]
2653    fn test_type_kind_meta_bit_width_is_none() {
2654        let cases = [
2655            MetaType::Default,
2656            MetaType::Clear,
2657            MetaType::Indirect,
2658            MetaType::Offset,
2659            MetaType::Name("x".to_string()),
2660            MetaType::Use("x".to_string()),
2661        ];
2662        for meta in cases {
2663            let kind = TypeKind::Meta(meta);
2664            assert_eq!(
2665                kind.bit_width(),
2666                None,
2667                "TypeKind::Meta must have no bit width: {kind:?}"
2668            );
2669        }
2670    }
2671}