Skip to main content

libmagic_rs/parser/
ast.rs

1// Copyright (c) 2025-2026 the libmagic-rs contributors
2// SPDX-License-Identifier: Apache-2.0
3
4//! Abstract Syntax Tree definitions for magic rules
5//!
6//! This module contains the core data structures that represent parsed magic rules
7//! and their components, including offset specifications, type kinds, operators, and values.
8
9use serde::{Deserialize, Serialize};
10use std::num::{NonZeroU32, NonZeroUsize};
11
12/// The width of the length prefix for Pascal strings.
13///
14/// Uppercase suffix letters (`/H`, `/L`) indicate big-endian byte order.
15/// Lowercase suffix letters (`/h`, `/l`) indicate little-endian byte order.
16///
17/// # Examples
18///
19/// ```
20/// use libmagic_rs::parser::ast::PStringLengthWidth;
21/// let width = PStringLengthWidth::OneByte;
22/// assert_eq!(width.byte_count(), 1);
23///
24/// let width = PStringLengthWidth::TwoByteBE;
25/// assert_eq!(width.byte_count(), 2);
26///
27/// let width = PStringLengthWidth::FourByteLE;
28/// assert_eq!(width.byte_count(), 4);
29/// ```
30#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
31#[allow(clippy::enum_variant_names)]
32#[non_exhaustive]
33pub enum PStringLengthWidth {
34    /// 1-byte length prefix (default, `/B` suffix)
35    ///
36    /// # Examples
37    ///
38    /// ```
39    /// use libmagic_rs::parser::ast::PStringLengthWidth;
40    /// let width = PStringLengthWidth::OneByte;
41    /// assert_eq!(width.byte_count(), 1);
42    /// ```
43    OneByte,
44    /// 2-byte big-endian length prefix (`/H` suffix)
45    ///
46    /// # Examples
47    ///
48    /// ```
49    /// use libmagic_rs::parser::ast::PStringLengthWidth;
50    /// let width = PStringLengthWidth::TwoByteBE;
51    /// assert_eq!(width.byte_count(), 2);
52    /// ```
53    TwoByteBE,
54    /// 2-byte little-endian length prefix (`/h` suffix)
55    ///
56    /// # Examples
57    ///
58    /// ```
59    /// use libmagic_rs::parser::ast::PStringLengthWidth;
60    /// let width = PStringLengthWidth::TwoByteLE;
61    /// assert_eq!(width.byte_count(), 2);
62    /// ```
63    TwoByteLE,
64    /// 4-byte big-endian length prefix (`/L` suffix)
65    ///
66    /// # Examples
67    ///
68    /// ```
69    /// use libmagic_rs::parser::ast::PStringLengthWidth;
70    /// let width = PStringLengthWidth::FourByteBE;
71    /// assert_eq!(width.byte_count(), 4);
72    /// ```
73    FourByteBE,
74    /// 4-byte little-endian length prefix (`/l` suffix)
75    ///
76    /// # Examples
77    ///
78    /// ```
79    /// use libmagic_rs::parser::ast::PStringLengthWidth;
80    /// let width = PStringLengthWidth::FourByteLE;
81    /// assert_eq!(width.byte_count(), 4);
82    /// ```
83    FourByteLE,
84}
85
86impl PStringLengthWidth {
87    /// Returns the number of bytes used for the length prefix.
88    #[must_use]
89    pub fn byte_count(&self) -> usize {
90        match self {
91            Self::OneByte => 1,
92            Self::TwoByteBE | Self::TwoByteLE => 2,
93            Self::FourByteBE | Self::FourByteLE => 4,
94        }
95    }
96}
97
98/// Arithmetic operation applied to the value read at an indirect offset's
99/// `base_offset` before the result is used as the final file offset.
100///
101/// magic(5) supports `+`, `-`, `*`, `/`, `%`, `&`, `|`, and `^` between the
102/// pointer-type specifier and the operand inside the parentheses. Addition
103/// and subtraction collapse to [`IndirectAdjustmentOp::Add`] with a signed
104/// `adjustment` (so `(N.X-1)` is `Add(-1)` rather than a separate `Sub`
105/// variant); the remaining operators each have a dedicated variant.
106///
107/// The default is [`IndirectAdjustmentOp::Add`]; an indirect offset with no
108/// arithmetic — just `(base.type)` — is encoded as `Add` with `adjustment:
109/// 0`, preserving backwards compatibility.
110///
111/// # Examples
112///
113/// ```
114/// use libmagic_rs::parser::ast::IndirectAdjustmentOp;
115///
116/// assert_eq!(IndirectAdjustmentOp::default(), IndirectAdjustmentOp::Add);
117/// ```
118#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize, Default)]
119#[non_exhaustive]
120pub enum IndirectAdjustmentOp {
121    /// Addition (also covers subtraction via negative `adjustment`).
122    ///
123    /// # Examples
124    ///
125    /// ```
126    /// use libmagic_rs::parser::ast::IndirectAdjustmentOp;
127    /// assert_eq!(IndirectAdjustmentOp::default(), IndirectAdjustmentOp::Add);
128    /// ```
129    #[default]
130    Add,
131    /// Multiplication: `pointer_value * adjustment`.
132    ///
133    /// # Examples
134    ///
135    /// ```
136    /// use libmagic_rs::parser::ast::IndirectAdjustmentOp;
137    /// let op = IndirectAdjustmentOp::Mul;
138    /// assert_eq!(op, IndirectAdjustmentOp::Mul);
139    /// ```
140    Mul,
141    /// Truncating integer division: `pointer_value / adjustment`. Division
142    /// by zero is rejected by the evaluator with an error.
143    ///
144    /// # Examples
145    ///
146    /// ```
147    /// use libmagic_rs::parser::ast::IndirectAdjustmentOp;
148    /// let op = IndirectAdjustmentOp::Div;
149    /// assert_eq!(op, IndirectAdjustmentOp::Div);
150    /// ```
151    Div,
152    /// Remainder: `pointer_value % adjustment`. Modulo by zero is rejected
153    /// by the evaluator with an error.
154    ///
155    /// # Examples
156    ///
157    /// ```
158    /// use libmagic_rs::parser::ast::IndirectAdjustmentOp;
159    /// let op = IndirectAdjustmentOp::Mod;
160    /// assert_eq!(op, IndirectAdjustmentOp::Mod);
161    /// ```
162    Mod,
163    /// Bitwise AND: `pointer_value & adjustment`.
164    ///
165    /// # Examples
166    ///
167    /// ```
168    /// use libmagic_rs::parser::ast::IndirectAdjustmentOp;
169    /// let op = IndirectAdjustmentOp::And;
170    /// assert_eq!(op, IndirectAdjustmentOp::And);
171    /// ```
172    And,
173    /// Bitwise OR: `pointer_value | adjustment`.
174    ///
175    /// # Examples
176    ///
177    /// ```
178    /// use libmagic_rs::parser::ast::IndirectAdjustmentOp;
179    /// let op = IndirectAdjustmentOp::Or;
180    /// assert_eq!(op, IndirectAdjustmentOp::Or);
181    /// ```
182    Or,
183    /// Bitwise XOR: `pointer_value ^ adjustment`.
184    ///
185    /// # Examples
186    ///
187    /// ```
188    /// use libmagic_rs::parser::ast::IndirectAdjustmentOp;
189    /// let op = IndirectAdjustmentOp::Xor;
190    /// assert_eq!(op, IndirectAdjustmentOp::Xor);
191    /// ```
192    Xor,
193}
194
195/// Offset specification for locating data in files
196#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
197#[non_exhaustive]
198pub enum OffsetSpec {
199    /// Absolute offset from file start (or from file end if negative)
200    ///
201    /// Positive values are offsets from the start of the file.
202    /// Negative values are offsets from the end of the file (same as `FromEnd`).
203    ///
204    /// # Examples
205    ///
206    /// ```
207    /// use libmagic_rs::parser::ast::OffsetSpec;
208    ///
209    /// let offset = OffsetSpec::Absolute(0x10); // Read at byte 16 from start
210    /// let from_end = OffsetSpec::Absolute(-4); // 4 bytes before end of file
211    /// ```
212    Absolute(i64),
213
214    /// Indirect offset through pointer dereferencing
215    ///
216    /// Reads a pointer value at `base_offset`, interprets it according to `pointer_type`
217    /// and `endian`, then combines `adjustment` with the pointer value using
218    /// `adjustment_op` to get the final offset. The default `adjustment_op`
219    /// is [`IndirectAdjustmentOp::Add`], so `(base.type)` and
220    /// `(base.type+N)` / `(base.type-N)` use addition (subtraction is
221    /// encoded as `Add` with a negative `adjustment`). magic(5) also
222    /// supports multiplicative and bitwise forms inside the parens, e.g.
223    /// `(0x200.s*2)` ([`IndirectAdjustmentOp::Mul`]).
224    ///
225    /// # Examples
226    ///
227    /// ```
228    /// use libmagic_rs::parser::ast::{OffsetSpec, TypeKind, Endianness, IndirectAdjustmentOp};
229    ///
230    /// let indirect = OffsetSpec::Indirect {
231    ///     base_offset: 0x20,
232    ///     base_relative: false,
233    ///     pointer_type: TypeKind::Long { endian: Endianness::Little, signed: false },
234    ///     adjustment: 4,
235    ///     adjustment_op: IndirectAdjustmentOp::Add,
236    ///     result_relative: false,
237    ///     endian: Endianness::Little,
238    /// };
239    /// ```
240    Indirect {
241        /// Base offset to read pointer from. When `base_relative` is
242        /// `true`, this value is added to the current anchor (last-match
243        /// position) rather than being treated as an absolute file
244        /// position.
245        base_offset: i64,
246        /// If `true`, `base_offset` is relative to the current anchor
247        /// (i.e., `(&N.X)` syntax in magic files). Defaults to `false`
248        /// for backwards compatibility with existing AST snapshots; the
249        /// serde `default` attribute lets older serialized AST round-trip.
250        #[serde(default)]
251        base_relative: bool,
252        /// Type of pointer value
253        pointer_type: TypeKind,
254        /// Operand combined with the pointer value via `adjustment_op`.
255        ///
256        /// For `IndirectAdjustmentOp::Add`, the operand is signed (negative
257        /// values encode subtraction). For multiplicative and bitwise ops
258        /// the operand is interpreted as `i64` but typically magic files
259        /// supply non-negative literals.
260        adjustment: i64,
261        /// Arithmetic operation applied to the pointer value with
262        /// `adjustment` as the operand. Defaults to
263        /// [`IndirectAdjustmentOp::Add`] for legacy AST consumers via
264        /// serde's `default` attribute.
265        #[serde(default)]
266        adjustment_op: IndirectAdjustmentOp,
267        /// If `true`, the resolved offset is added to the current anchor
268        /// instead of being treated as an absolute file position. This
269        /// corresponds to magic-file `&(...)` syntax wrapping an indirect
270        /// spec, e.g., `&(0x10.l)`.
271        #[serde(default)]
272        result_relative: bool,
273        /// Endianness for pointer reading
274        endian: Endianness,
275    },
276
277    /// Relative offset from previous match position
278    ///
279    /// # Examples
280    ///
281    /// ```
282    /// use libmagic_rs::parser::ast::OffsetSpec;
283    ///
284    /// let relative = OffsetSpec::Relative(8); // 8 bytes after previous match
285    /// ```
286    Relative(i64),
287
288    /// Offset from end of file (negative values move towards start)
289    ///
290    /// # Examples
291    ///
292    /// ```
293    /// use libmagic_rs::parser::ast::OffsetSpec;
294    ///
295    /// let from_end = OffsetSpec::FromEnd(-16); // 16 bytes before end of file
296    /// ```
297    FromEnd(i64),
298}
299
300/// Control-flow directive carried by [`TypeKind::Meta`].
301///
302/// These are not value-reading types -- they correspond to magic(5)
303/// control-flow keywords (`default`, `clear`, `name`, `use`, `indirect`,
304/// `offset`) that modify how a rule set is traversed rather than reading
305/// bytes from the buffer. All six variants are fully evaluated by the
306/// engine: `default`/`clear` manage per-level sibling-matched state;
307/// `name`/`use` implement subroutine dispatch; `indirect` re-applies the
308/// root rule database at a resolved offset; and `offset` emits the
309/// current file position as `Value::Uint` for printf-style formatting.
310#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
311#[non_exhaustive]
312pub enum MetaType {
313    /// `default` directive: fires when no sibling at the same indentation
314    /// level has matched at the current offset. See magic(5) for the
315    /// "default" type semantics.
316    ///
317    /// # Examples
318    ///
319    /// ```
320    /// use libmagic_rs::parser::ast::MetaType;
321    /// let meta = MetaType::Default;
322    /// assert_eq!(meta, MetaType::Default);
323    /// ```
324    Default,
325    /// `clear` directive: resets the sibling-matched flag so a later
326    /// `default` sibling can fire even if an earlier sibling matched.
327    /// See magic(5) for the "clear" type semantics.
328    ///
329    /// # Examples
330    ///
331    /// ```
332    /// use libmagic_rs::parser::ast::MetaType;
333    /// let meta = MetaType::Clear;
334    /// assert_eq!(meta, MetaType::Clear);
335    /// ```
336    Clear,
337    /// `name <identifier>` directive: declares a named subroutine that
338    /// can be invoked later via [`MetaType::Use`]. See magic(5) for the
339    /// "name" type semantics.
340    ///
341    /// # Examples
342    ///
343    /// ```
344    /// use libmagic_rs::parser::ast::MetaType;
345    /// let meta = MetaType::Name("part2".to_string());
346    /// assert_eq!(meta, MetaType::Name("part2".to_string()));
347    /// ```
348    Name(String),
349    /// `use <identifier>` directive: invokes a named subroutine
350    /// previously declared via [`MetaType::Name`]. See magic(5) for the
351    /// "use" type semantics.
352    ///
353    /// # Examples
354    ///
355    /// ```
356    /// use libmagic_rs::parser::ast::MetaType;
357    /// let meta = MetaType::Use("part2".to_string());
358    /// assert_eq!(meta, MetaType::Use("part2".to_string()));
359    /// ```
360    Use(String),
361    /// `indirect` directive: re-applies the entire magic database at the
362    /// resolved offset. See magic(5) for the "indirect" type semantics.
363    ///
364    /// # Examples
365    ///
366    /// ```
367    /// use libmagic_rs::parser::ast::MetaType;
368    /// let meta = MetaType::Indirect;
369    /// assert_eq!(meta, MetaType::Indirect);
370    /// ```
371    Indirect,
372    /// `offset` type keyword: reports the current file offset rather than
373    /// reading a typed value from the buffer. See magic(5) for the
374    /// "offset" type semantics.
375    ///
376    /// Evaluation: the engine resolves the rule's offset specification
377    /// to an absolute position and emits a `RuleMatch` whose `value` is
378    /// `Value::Uint(position)`. Message templates can reference that
379    /// value through printf-style format specifiers (e.g. `%lld`),
380    /// which are substituted by
381    /// [`crate::output::format::format_magic_message`] at description-
382    /// assembly time. The only supported operator is `x` (`AnyValue`);
383    /// any other operator is `debug!`-logged and skipped.
384    ///
385    /// # Examples
386    ///
387    /// ```
388    /// use libmagic_rs::parser::ast::MetaType;
389    /// let meta = MetaType::Offset;
390    /// assert_eq!(meta, MetaType::Offset);
391    /// ```
392    Offset,
393}
394
395/// Data type specifications for interpreting bytes
396#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
397#[non_exhaustive]
398pub enum TypeKind {
399    /// Single byte
400    ///
401    /// # Examples
402    ///
403    /// ```
404    /// use libmagic_rs::parser::ast::TypeKind;
405    ///
406    /// let byte = TypeKind::Byte { signed: true };
407    /// assert_eq!(byte, TypeKind::Byte { signed: true });
408    /// ```
409    Byte {
410        /// Whether value is signed
411        signed: bool,
412    },
413    /// 16-bit integer
414    ///
415    /// # Examples
416    ///
417    /// ```
418    /// use libmagic_rs::parser::ast::{TypeKind, Endianness};
419    ///
420    /// let short = TypeKind::Short { endian: Endianness::Little, signed: true };
421    /// assert_eq!(short, TypeKind::Short { endian: Endianness::Little, signed: true });
422    /// ```
423    Short {
424        /// Byte order
425        endian: Endianness,
426        /// Whether value is signed
427        signed: bool,
428    },
429    /// 32-bit integer
430    ///
431    /// # Examples
432    ///
433    /// ```
434    /// use libmagic_rs::parser::ast::{TypeKind, Endianness};
435    ///
436    /// let long = TypeKind::Long { endian: Endianness::Big, signed: false };
437    /// assert_eq!(long, TypeKind::Long { endian: Endianness::Big, signed: false });
438    /// ```
439    Long {
440        /// Byte order
441        endian: Endianness,
442        /// Whether value is signed
443        signed: bool,
444    },
445    /// 64-bit integer
446    ///
447    /// # Examples
448    ///
449    /// ```
450    /// use libmagic_rs::parser::ast::{TypeKind, Endianness};
451    ///
452    /// let quad = TypeKind::Quad { endian: Endianness::Big, signed: true };
453    /// assert_eq!(quad, TypeKind::Quad { endian: Endianness::Big, signed: true });
454    /// ```
455    Quad {
456        /// Byte order
457        endian: Endianness,
458        /// Whether value is signed
459        signed: bool,
460    },
461    /// 32-bit IEEE 754 floating-point
462    ///
463    /// # Examples
464    ///
465    /// ```
466    /// use libmagic_rs::parser::ast::{TypeKind, Endianness};
467    ///
468    /// let float = TypeKind::Float { endian: Endianness::Big };
469    /// assert_eq!(float, TypeKind::Float { endian: Endianness::Big });
470    /// ```
471    Float {
472        /// Byte order
473        endian: Endianness,
474    },
475    /// 64-bit IEEE 754 double-precision floating-point
476    ///
477    /// # Examples
478    ///
479    /// ```
480    /// use libmagic_rs::parser::ast::{TypeKind, Endianness};
481    ///
482    /// let double = TypeKind::Double { endian: Endianness::Big };
483    /// assert_eq!(double, TypeKind::Double { endian: Endianness::Big });
484    /// ```
485    Double {
486        /// Byte order
487        endian: Endianness,
488    },
489    /// 32-bit Unix timestamp (seconds since epoch)
490    ///
491    /// # Examples
492    ///
493    /// ```
494    /// use libmagic_rs::parser::ast::{TypeKind, Endianness};
495    ///
496    /// let date = TypeKind::Date { endian: Endianness::Big, utc: true };
497    /// assert_eq!(date, TypeKind::Date { endian: Endianness::Big, utc: true });
498    /// ```
499    Date {
500        /// Byte order
501        endian: Endianness,
502        /// true = UTC, false = local time
503        utc: bool,
504    },
505    /// 64-bit Unix timestamp (seconds since epoch)
506    ///
507    /// # Examples
508    ///
509    /// ```
510    /// use libmagic_rs::parser::ast::{TypeKind, Endianness};
511    ///
512    /// let qdate = TypeKind::QDate { endian: Endianness::Little, utc: false };
513    /// assert_eq!(qdate, TypeKind::QDate { endian: Endianness::Little, utc: false });
514    /// ```
515    QDate {
516        /// Byte order
517        endian: Endianness,
518        /// true = UTC, false = local time
519        utc: bool,
520    },
521    /// String data
522    ///
523    /// The `flags` field carries the modifier flags parsed from the
524    /// `/[cCwWtTbf]` suffix on a `string` rule. Default flags (all
525    /// `false`) preserve the existing byte-exact comparison path; any
526    /// non-default flag routes the rule through
527    /// `compare_string_with_flags` in `src/evaluator/types/string.rs`.
528    /// See [`StringFlags`] for per-flag semantics.
529    ///
530    /// # Examples
531    ///
532    /// ```
533    /// use libmagic_rs::parser::ast::{StringFlags, TypeKind};
534    ///
535    /// let s = TypeKind::String { max_length: None, flags: StringFlags::default() };
536    /// assert_eq!(s, TypeKind::String { max_length: None, flags: StringFlags::default() });
537    ///
538    /// let case_insensitive = TypeKind::String {
539    ///     max_length: None,
540    ///     flags: StringFlags::default().with_ignore_lowercase(true),
541    /// };
542    /// assert!(matches!(case_insensitive, TypeKind::String { flags, .. } if flags.ignore_lowercase));
543    /// ```
544    String {
545        /// Maximum length to read
546        max_length: Option<usize>,
547        /// Modifier flags from the `/[cCwWtTbf]` suffix
548        flags: StringFlags,
549    },
550    /// UCS-2 (16-bit Unicode) string with explicit byte order.
551    ///
552    /// Backs the magic(5) `lestring16` (little-endian) and `bestring16`
553    /// (big-endian) keywords. Each character occupies two bytes in the
554    /// file; the reader stops at a U+0000 terminator (encoded as the
555    /// 2-byte sequence `0x00 0x00`) or at the end of the buffer. The
556    /// decoded value is returned as a Rust `String` (so non-ASCII
557    /// characters are preserved when valid UCS-2).
558    ///
559    /// # Examples
560    ///
561    /// ```
562    /// use libmagic_rs::parser::ast::{TypeKind, Endianness};
563    ///
564    /// let le = TypeKind::String16 { endian: Endianness::Little };
565    /// assert_eq!(le, TypeKind::String16 { endian: Endianness::Little });
566    ///
567    /// let be = TypeKind::String16 { endian: Endianness::Big };
568    /// assert_eq!(be, TypeKind::String16 { endian: Endianness::Big });
569    /// ```
570    String16 {
571        /// Endianness for the 16-bit code units.
572        endian: Endianness,
573    },
574    /// Pascal string (length-prefixed, supports 1/2/4-byte prefix, with optional max length)
575    ///
576    /// Pascal strings store the length as a prefix (1, 2, or 4 bytes, with configurable endianness), followed by
577    /// that many bytes of string data. Unlike C strings, they are not null-terminated.
578    ///
579    /// # Examples
580    ///
581    /// ```
582    /// use libmagic_rs::parser::ast::{TypeKind, PStringLengthWidth};
583    ///
584    /// let pstring = TypeKind::PString { max_length: None, length_width: PStringLengthWidth::OneByte, length_includes_itself: false };
585    /// assert_eq!(pstring, TypeKind::PString { max_length: None, length_width: PStringLengthWidth::OneByte, length_includes_itself: false });
586    ///
587    /// let limited = TypeKind::PString { max_length: Some(64), length_width: PStringLengthWidth::TwoByteBE, length_includes_itself: false };
588    /// assert_eq!(limited, TypeKind::PString { max_length: Some(64), length_width: PStringLengthWidth::TwoByteBE, length_includes_itself: false });
589    ///
590    /// // /J flag: stored length includes the length field itself
591    /// let jpeg = TypeKind::PString { max_length: None, length_width: PStringLengthWidth::TwoByteBE, length_includes_itself: true };
592    /// assert_eq!(jpeg, TypeKind::PString { max_length: None, length_width: PStringLengthWidth::TwoByteBE, length_includes_itself: true });
593    /// ```
594    PString {
595        /// Maximum length to read (caps the length value)
596        max_length: Option<usize>,
597        /// Width of the length prefix
598        length_width: PStringLengthWidth,
599        /// Whether the stored length includes the length field itself (`/J` flag)
600        length_includes_itself: bool,
601    },
602    /// Regular expression matching against file contents
603    ///
604    /// Regex rules match a POSIX-extended regular expression pattern against the
605    /// file buffer. Patterns are compiled with multi-line mode always enabled
606    /// (matching libmagic's unconditional `REG_NEWLINE`), so `^` and `$` match
607    /// at line boundaries and `.` does not match `\n`. The `flags` control
608    /// case sensitivity and anchor advance semantics; the `count` field
609    /// controls the scan window (byte or line bounds). The scan window is
610    /// always capped at 8192 bytes (matching GNU `file`'s `FILE_REGEX_MAX`;
611    /// enforced in the evaluator).
612    ///
613    /// # Examples
614    ///
615    /// ```
616    /// use libmagic_rs::parser::ast::{RegexCount, RegexFlags, TypeKind};
617    /// use std::num::NonZeroU32;
618    ///
619    /// // Plain `regex` -- no flags, default 8192-byte scan window.
620    /// let plain = TypeKind::Regex {
621    ///     flags: RegexFlags::default(),
622    ///     count: RegexCount::Default,
623    /// };
624    ///
625    /// // `regex/1l` -- scan the first line only.
626    /// let first_line = TypeKind::Regex {
627    ///     flags: RegexFlags::default(),
628    ///     count: RegexCount::Lines(NonZeroU32::new(1)),
629    /// };
630    ///
631    /// // `regex/cs` -- case-insensitive, anchor advances to match-start.
632    /// let case_insensitive_start = TypeKind::Regex {
633    ///     flags: RegexFlags {
634    ///         case_insensitive: true,
635    ///         start_offset: true,
636    ///     },
637    ///     count: RegexCount::Default,
638    /// };
639    /// ```
640    Regex {
641        /// Modifier flags from the `/[cs]` suffix (`/c` case-insensitive,
642        /// `/s` start-offset anchor). Line-mode is encoded by the
643        /// [`RegexCount::Lines`] variant of `count`, not a flag.
644        flags: RegexFlags,
645        /// Scan window specifier: default 8192 bytes, explicit byte
646        /// count, or explicit line count. See [`RegexCount`] for the
647        /// three cases.
648        count: RegexCount,
649    },
650    /// Multi-byte pattern search within a bounded range
651    ///
652    /// Search rules look for a literal byte pattern within `range` bytes of
653    /// the offset. Unlike [`TypeKind::String`], which only matches at the
654    /// exact offset, `search` scans forward up to `range` bytes for the
655    /// first occurrence. The range is **mandatory** per GNU `file`'s
656    /// magic(5) specification and is stored as a [`NonZeroUsize`] so a
657    /// zero-range search is unrepresentable.
658    ///
659    /// # Examples
660    ///
661    /// ```
662    /// use libmagic_rs::parser::ast::TypeKind;
663    /// use std::num::NonZeroUsize;
664    ///
665    /// // `search/256` -- scan up to 256 bytes for the literal pattern.
666    /// let bounded = TypeKind::Search {
667    ///     range: NonZeroUsize::new(256).unwrap(),
668    /// };
669    /// ```
670    Search {
671        /// Scan window width in bytes, starting at the rule's offset.
672        range: NonZeroUsize,
673    },
674    /// Control-flow directive (`default`, `clear`, `name`, `use`,
675    /// `indirect`, `offset`).
676    ///
677    /// These magic(5) keywords do not read or compare bytes; they modify
678    /// how a rule set is traversed. All six variants are fully evaluated:
679    /// `default` fires as a fallback when no sibling at the same level
680    /// has matched; `clear` resets that flag; `name`/`use` support
681    /// subroutine definition and invocation; `indirect` re-enters the
682    /// rule set at a resolved offset; `offset` emits the resolved file
683    /// position as `Value::Uint` for printf-style message substitution.
684    /// See [`MetaType`] for the individual variants.
685    ///
686    /// # Examples
687    ///
688    /// ```
689    /// use libmagic_rs::parser::ast::{MetaType, TypeKind};
690    /// let default_rule = TypeKind::Meta(MetaType::Default);
691    /// assert_eq!(default_rule, TypeKind::Meta(MetaType::Default));
692    /// ```
693    Meta(MetaType),
694}
695
696/// Regex modifier flags parsed from the `/[cs]` suffix on a `regex` rule.
697///
698/// The `/l` "line-based window" modifier is **not** represented here; it
699/// lives on [`RegexCount::Lines`] so that the type-level encoding makes
700/// "line count" and "byte count" mutually exclusive. An earlier design
701/// used two separate fields (`line_based: bool` + `count: Option<u32>`)
702/// which admitted the cross-field state `line_based: true, count: None`;
703/// under the current encoding that case is expressed explicitly as
704/// [`RegexCount::Lines(None)`](RegexCount::Lines) -- the `regex/l`
705/// shorthand -- and is behaviorally equivalent to [`RegexCount::Default`]
706/// (both walk the full 8192-byte capped window).
707///
708/// All flags default to `false` via [`RegexFlags::default`], equivalent
709/// to a plain `regex` with no `/c` or `/s` suffix.
710///
711/// # Examples
712///
713/// ```
714/// use libmagic_rs::parser::ast::RegexFlags;
715///
716/// let plain = RegexFlags::default();
717/// assert!(!plain.case_insensitive);
718/// assert!(!plain.start_offset);
719///
720/// let case_and_start = RegexFlags::default()
721///     .with_case_insensitive(true)
722///     .with_start_offset(true);
723/// assert!(case_and_start.case_insensitive);
724/// assert!(case_and_start.start_offset);
725/// ```
726#[derive(Debug, Clone, Copy, Default, Serialize, Deserialize, PartialEq, Eq)]
727pub struct RegexFlags {
728    /// `/c` -- case-insensitive matching. When `true`, ASCII letter
729    /// casing is ignored during pattern matching.
730    pub case_insensitive: bool,
731    /// `/s` -- advance the GNU `file` previous-match anchor to the start
732    /// of the matched region instead of its end. Matches libmagic's
733    /// `REGEX_OFFSET_START` flag, which zeros the length contribution in
734    /// `moffset()` for `FILE_REGEX`. Useful for chaining child rules that
735    /// need to re-match from the position where the parent regex began.
736    pub start_offset: bool,
737}
738
739impl RegexFlags {
740    /// Builder-style setter for [`RegexFlags::case_insensitive`] (`/c`).
741    ///
742    /// Chain after [`RegexFlags::default()`] to construct `RegexFlags`
743    /// values without exhaustive struct literals. If a new flag is
744    /// added to `RegexFlags` in the future, callers using the builder
745    /// form keep compiling; callers using struct literals would need
746    /// an update.
747    #[must_use]
748    pub const fn with_case_insensitive(mut self, value: bool) -> Self {
749        self.case_insensitive = value;
750        self
751    }
752
753    /// Builder-style setter for [`RegexFlags::start_offset`] (`/s`).
754    ///
755    /// Chain after [`RegexFlags::default()`] to construct `RegexFlags`
756    /// values without exhaustive struct literals.
757    #[must_use]
758    pub const fn with_start_offset(mut self, value: bool) -> Self {
759        self.start_offset = value;
760        self
761    }
762}
763
764/// String modifier flags parsed from the `/[cCwWtTbf]` suffix on a `string`
765/// rule.
766///
767/// Mirrors libmagic's `STRING_*` flag bits from `src/file.h`. Each flag
768/// alters how `compare_string_with_flags` walks the pattern and buffer in
769/// parallel. The default (all `false`) preserves byte-exact comparison.
770///
771/// **`/c` vs `/C` are asymmetric**: the pattern character controls
772/// direction. With `/c`, only lowercase pattern chars trigger case-folding
773/// (the file byte is `tolower`'d). With `/C`, only uppercase pattern chars
774/// trigger folding (the file byte is `toupper`'d). Mixed-case patterns
775/// behave intuitively: `/c FoO` matches `FoO`, `Foo`, `FOO` but not
776/// `fOO` (the uppercase `F` is literal). See GOTCHAS S6.5 for the
777/// rationale and `src/softmagic.c` for the canonical libmagic contract.
778///
779/// **`/B` is NOT a string flag** -- it is the `pstring` 1-byte length-width
780/// letter (`PSTRING_1_BE`). `string/B` is rejected at parse time. See
781/// GOTCHAS S6.6.
782///
783/// # Examples
784///
785/// ```
786/// use libmagic_rs::parser::ast::StringFlags;
787///
788/// let plain = StringFlags::default();
789/// assert!(!plain.ignore_lowercase);
790///
791/// let case_insensitive = StringFlags::default().with_ignore_lowercase(true);
792/// assert!(case_insensitive.ignore_lowercase);
793///
794/// let compound = StringFlags::default()
795///     .with_ignore_lowercase(true)
796///     .with_compact_optional_whitespace(true);
797/// assert!(compound.ignore_lowercase);
798/// assert!(compound.compact_optional_whitespace);
799/// ```
800#[derive(Debug, Clone, Copy, Default, Serialize, Deserialize, PartialEq, Eq)]
801// libmagic's contract is naturally a bitfield: each flag is a distinct
802// magic(5) letter (/c, /C, /w, /W, /t, /T, /b, /f) with its own STRING_*
803// constant in libmagic src/file.h. Flags compose freely (string/cw is
804// /c plus /w; string/wcCtTbf sets all eight). Folding pairs into enums
805// is possible (whitespace: none|optional|required; case: none|lower|upper)
806// but would obscure the libmagic mapping and produce verbose match arms
807// in every consumer. The bool-per-flag layout mirrors `RegexFlags` and
808// the libmagic source -- the clippy lint is overruled by the design.
809#[allow(clippy::struct_excessive_bools)]
810pub struct StringFlags {
811    /// `/W` -- `STRING_COMPACT_WHITESPACE`. Pattern whitespace requires at
812    /// least one whitespace byte in the file, then any further whitespace
813    /// in the file is consumed greedily.
814    pub compact_whitespace: bool,
815    /// `/w` -- `STRING_COMPACT_OPTIONAL_WHITESPACE`. Pattern whitespace
816    /// matches zero or more whitespace bytes in the file.
817    pub compact_optional_whitespace: bool,
818    /// `/c` -- `STRING_IGNORE_LOWERCASE`. When the pattern char is
819    /// lowercase, the file byte is `to_ascii_lowercase`'d before
820    /// comparison. Uppercase pattern chars are compared literally.
821    pub ignore_lowercase: bool,
822    /// `/C` -- `STRING_IGNORE_UPPERCASE`. When the pattern char is
823    /// uppercase, the file byte is `to_ascii_uppercase`'d before
824    /// comparison. Lowercase pattern chars are compared literally.
825    pub ignore_uppercase: bool,
826    /// `/t` -- `STRING_TEXTTEST`. Hint that this rule applies to text
827    /// files. Captured for MIME-output integration; does not currently
828    /// alter comparison.
829    pub text_test: bool,
830    /// `/T` -- `STRING_TRIM`. Trim leading and trailing ASCII whitespace
831    /// from the pattern before comparison. The trim is applied at
832    /// evaluation time (in `read_pattern_match`) so the AST keeps the
833    /// original pattern bytes; the comparison function receives the
834    /// trimmed slice.
835    pub trim: bool,
836    /// `/b` -- `STRING_BINTEST`. Hint that this rule applies to binary
837    /// files. Captured for MIME-output integration; does not currently
838    /// alter comparison.
839    pub bin_test: bool,
840    /// `/f` -- `STRING_FULL_WORD`. Post-match check that the byte after
841    /// the matched region is either end-of-buffer or a non-word
842    /// character (ASCII alphanumeric or `_`).
843    pub full_word: bool,
844}
845
846impl StringFlags {
847    /// Returns `true` when every flag is `false` (the byte-exact fast
848    /// path). The evaluator dispatcher uses this to skip the
849    /// parallel-walk comparison when no flags are set.
850    #[must_use]
851    pub const fn is_empty(self) -> bool {
852        !self.compact_whitespace
853            && !self.compact_optional_whitespace
854            && !self.ignore_lowercase
855            && !self.ignore_uppercase
856            && !self.text_test
857            && !self.trim
858            && !self.bin_test
859            && !self.full_word
860    }
861
862    /// Builder-style setter for `compact_whitespace` (`/W`).
863    #[must_use]
864    pub const fn with_compact_whitespace(mut self, value: bool) -> Self {
865        self.compact_whitespace = value;
866        self
867    }
868
869    /// Builder-style setter for `compact_optional_whitespace` (`/w`).
870    #[must_use]
871    pub const fn with_compact_optional_whitespace(mut self, value: bool) -> Self {
872        self.compact_optional_whitespace = value;
873        self
874    }
875
876    /// Builder-style setter for `ignore_lowercase` (`/c`).
877    #[must_use]
878    pub const fn with_ignore_lowercase(mut self, value: bool) -> Self {
879        self.ignore_lowercase = value;
880        self
881    }
882
883    /// Builder-style setter for `ignore_uppercase` (`/C`).
884    #[must_use]
885    pub const fn with_ignore_uppercase(mut self, value: bool) -> Self {
886        self.ignore_uppercase = value;
887        self
888    }
889
890    /// Builder-style setter for `text_test` (`/t`).
891    #[must_use]
892    pub const fn with_text_test(mut self, value: bool) -> Self {
893        self.text_test = value;
894        self
895    }
896
897    /// Builder-style setter for `trim` (`/T`).
898    #[must_use]
899    pub const fn with_trim(mut self, value: bool) -> Self {
900        self.trim = value;
901        self
902    }
903
904    /// Builder-style setter for `bin_test` (`/b`).
905    #[must_use]
906    pub const fn with_bin_test(mut self, value: bool) -> Self {
907        self.bin_test = value;
908        self
909    }
910
911    /// Builder-style setter for `full_word` (`/f`).
912    #[must_use]
913    pub const fn with_full_word(mut self, value: bool) -> Self {
914        self.full_word = value;
915        self
916    }
917}
918
919/// Scan window specifier for a [`TypeKind::Regex`] rule.
920///
921/// Encodes the three mutually-exclusive scan modes in a single enum so
922/// that the "byte count" and "line count" cases cannot be confused. The
923/// `regex/l` shorthand (line mode with no explicit count) is represented
924/// explicitly as [`RegexCount::Lines(None)`](RegexCount::Lines), which
925/// is behaviorally equivalent to [`RegexCount::Default`] -- both walk
926/// the full 8192-byte capped window -- but preserves the magic-file
927/// surface syntax of the original rule. The 8192-byte hard cap
928/// (matching GNU `file`'s `FILE_REGEX_MAX`) is applied by the evaluator
929/// on every variant.
930///
931/// # Examples
932///
933/// ```
934/// use libmagic_rs::parser::ast::RegexCount;
935/// use std::num::NonZeroU32;
936///
937/// // Plain `regex` (no suffix): default 8192-byte window.
938/// assert_eq!(RegexCount::default(), RegexCount::Default);
939///
940/// // `regex/100`: scan at most 100 bytes.
941/// let hundred_bytes = RegexCount::Bytes(NonZeroU32::new(100).unwrap());
942///
943/// // `regex/1l`: scan the first line.
944/// let one_line = RegexCount::Lines(NonZeroU32::new(1));
945///
946/// // `regex/l`: line-mode with no explicit count (walks terminators
947/// // to the end of the 8192-byte capped window).
948/// let unbounded_lines = RegexCount::Lines(None);
949/// ```
950#[derive(Debug, Clone, Copy, Default, Serialize, Deserialize, PartialEq, Eq)]
951pub enum RegexCount {
952    /// No scan bound (plain `regex` with no suffix). Scans the default
953    /// 8192-byte window from the rule's offset.
954    #[default]
955    Default,
956    /// Byte-bounded scan (`regex/N` with no `/l` flag). The window is
957    /// `min(n, 8192, remaining_buffer)` bytes long. `NonZeroU32` makes
958    /// a zero-byte scan unrepresentable.
959    Bytes(NonZeroU32),
960    /// Line-bounded scan (`regex/Nl` or `regex/l`). The window walks
961    /// LF / CRLF / bare CR line terminators from the offset. With
962    /// `Some(n)`, the walk stops after the Nth terminator (inclusive).
963    /// With `None` (the `regex/l` shorthand), the walk continues to
964    /// the end of the 8192-byte capped window. Either way the
965    /// effective byte window is capped at 8192.
966    Lines(Option<NonZeroU32>),
967}
968
969impl TypeKind {
970    /// Returns the bit width of integer types, or `None` for non-integer types (e.g., String).
971    ///
972    /// # Examples
973    ///
974    /// ```
975    /// use libmagic_rs::parser::ast::{Endianness, StringFlags, TypeKind};
976    ///
977    /// assert_eq!(TypeKind::Byte { signed: false }.bit_width(), Some(8));
978    /// assert_eq!(TypeKind::Short { endian: Endianness::Native, signed: true }.bit_width(), Some(16));
979    /// assert_eq!(TypeKind::Long { endian: Endianness::Native, signed: true }.bit_width(), Some(32));
980    /// assert_eq!(TypeKind::Quad { endian: Endianness::Native, signed: true }.bit_width(), Some(64));
981    /// assert_eq!(TypeKind::Float { endian: Endianness::Native }.bit_width(), Some(32));
982    /// assert_eq!(TypeKind::Double { endian: Endianness::Native }.bit_width(), Some(64));
983    /// assert_eq!(TypeKind::String { max_length: None, flags: StringFlags::default() }.bit_width(), None);
984    /// ```
985    #[must_use]
986    pub const fn bit_width(&self) -> Option<u32> {
987        match self {
988            Self::Byte { .. } => Some(8),
989            Self::Short { .. } => Some(16),
990            Self::Long { .. } | Self::Float { .. } | Self::Date { .. } => Some(32),
991            Self::Quad { .. } | Self::Double { .. } | Self::QDate { .. } => Some(64),
992            Self::String { .. }
993            | Self::String16 { .. }
994            | Self::PString { .. }
995            | Self::Regex { .. }
996            | Self::Search { .. }
997            | Self::Meta(_) => None,
998        }
999    }
1000}
1001
1002/// Comparison and bitwise operators
1003#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
1004#[non_exhaustive]
1005pub enum Operator {
1006    /// Equality comparison (`=` or `==`)
1007    ///
1008    /// # Examples
1009    ///
1010    /// ```
1011    /// use libmagic_rs::parser::ast::Operator;
1012    ///
1013    /// let op = Operator::Equal;
1014    /// assert_eq!(op, Operator::Equal);
1015    /// ```
1016    Equal,
1017    /// Inequality comparison (`!=` or `<>`)
1018    ///
1019    /// # Examples
1020    ///
1021    /// ```
1022    /// use libmagic_rs::parser::ast::Operator;
1023    ///
1024    /// let op = Operator::NotEqual;
1025    /// assert_eq!(op, Operator::NotEqual);
1026    /// ```
1027    NotEqual,
1028    /// Less-than comparison (`<`)
1029    ///
1030    /// # Examples
1031    ///
1032    /// ```
1033    /// use libmagic_rs::parser::ast::Operator;
1034    ///
1035    /// let op = Operator::LessThan;
1036    /// assert_eq!(op, Operator::LessThan);
1037    /// ```
1038    LessThan,
1039    /// Greater-than comparison (`>`)
1040    ///
1041    /// # Examples
1042    ///
1043    /// ```
1044    /// use libmagic_rs::parser::ast::Operator;
1045    ///
1046    /// let op = Operator::GreaterThan;
1047    /// assert_eq!(op, Operator::GreaterThan);
1048    /// ```
1049    GreaterThan,
1050    /// Less-than-or-equal comparison (`<=`)
1051    ///
1052    /// # Examples
1053    ///
1054    /// ```
1055    /// use libmagic_rs::parser::ast::Operator;
1056    ///
1057    /// let op = Operator::LessEqual;
1058    /// assert_eq!(op, Operator::LessEqual);
1059    /// ```
1060    LessEqual,
1061    /// Greater-than-or-equal comparison (`>=`)
1062    ///
1063    /// # Examples
1064    ///
1065    /// ```
1066    /// use libmagic_rs::parser::ast::Operator;
1067    ///
1068    /// let op = Operator::GreaterEqual;
1069    /// assert_eq!(op, Operator::GreaterEqual);
1070    /// ```
1071    GreaterEqual,
1072    /// Bitwise AND operation without mask (`&`)
1073    ///
1074    /// # Examples
1075    ///
1076    /// ```
1077    /// use libmagic_rs::parser::ast::Operator;
1078    ///
1079    /// let op = Operator::BitwiseAnd;
1080    /// assert_eq!(op, Operator::BitwiseAnd);
1081    /// ```
1082    BitwiseAnd,
1083    /// Bitwise AND operation with mask value (`&` with a mask operand)
1084    ///
1085    /// # Examples
1086    ///
1087    /// ```
1088    /// use libmagic_rs::parser::ast::Operator;
1089    ///
1090    /// let op = Operator::BitwiseAndMask(0xFF00);
1091    /// assert_eq!(op, Operator::BitwiseAndMask(0xFF00));
1092    /// ```
1093    BitwiseAndMask(u64),
1094    /// Bitwise XOR operation (`^`)
1095    ///
1096    /// # Examples
1097    ///
1098    /// ```
1099    /// use libmagic_rs::parser::ast::Operator;
1100    ///
1101    /// let op = Operator::BitwiseXor;
1102    /// assert_eq!(op, Operator::BitwiseXor);
1103    /// ```
1104    BitwiseXor,
1105    /// Bitwise NOT/complement operation (`~`)
1106    ///
1107    /// # Examples
1108    ///
1109    /// ```
1110    /// use libmagic_rs::parser::ast::Operator;
1111    ///
1112    /// let op = Operator::BitwiseNot;
1113    /// assert_eq!(op, Operator::BitwiseNot);
1114    /// ```
1115    BitwiseNot,
1116    /// Match any value; condition always succeeds (`x`)
1117    ///
1118    /// # Examples
1119    ///
1120    /// ```
1121    /// use libmagic_rs::parser::ast::Operator;
1122    ///
1123    /// let op = Operator::AnyValue;
1124    /// assert_eq!(op, Operator::AnyValue);
1125    /// ```
1126    AnyValue,
1127}
1128
1129/// Value types for rule matching
1130#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
1131#[non_exhaustive]
1132pub enum Value {
1133    /// Unsigned integer value
1134    ///
1135    /// # Examples
1136    ///
1137    /// ```
1138    /// use libmagic_rs::parser::ast::Value;
1139    ///
1140    /// let val = Value::Uint(0xDEAD_BEEF);
1141    /// assert_eq!(val, Value::Uint(0xDEAD_BEEF));
1142    /// ```
1143    Uint(u64),
1144    /// Signed integer value
1145    ///
1146    /// # Examples
1147    ///
1148    /// ```
1149    /// use libmagic_rs::parser::ast::Value;
1150    ///
1151    /// let val = Value::Int(-42);
1152    /// assert_eq!(val, Value::Int(-42));
1153    /// ```
1154    Int(i64),
1155    /// Floating-point value (used for `float` and `double` types)
1156    ///
1157    /// # Examples
1158    ///
1159    /// ```
1160    /// use libmagic_rs::parser::ast::Value;
1161    ///
1162    /// let val = Value::Float(3.14);
1163    /// assert_eq!(val, Value::Float(3.14));
1164    /// ```
1165    Float(f64),
1166    /// Byte sequence
1167    ///
1168    /// # Examples
1169    ///
1170    /// ```
1171    /// use libmagic_rs::parser::ast::Value;
1172    ///
1173    /// let val = Value::Bytes(vec![0x7f, 0x45, 0x4c, 0x46]);
1174    /// assert_eq!(val, Value::Bytes(vec![0x7f, 0x45, 0x4c, 0x46]));
1175    /// ```
1176    Bytes(Vec<u8>),
1177    /// String value
1178    ///
1179    /// # Examples
1180    ///
1181    /// ```
1182    /// use libmagic_rs::parser::ast::Value;
1183    ///
1184    /// let val = Value::String("MZ".to_string());
1185    /// assert_eq!(val, Value::String("MZ".to_string()));
1186    /// ```
1187    String(String),
1188}
1189
1190/// Endianness specification for multi-byte values
1191#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
1192pub enum Endianness {
1193    /// Little-endian byte order (least significant byte first)
1194    ///
1195    /// # Examples
1196    ///
1197    /// ```
1198    /// use libmagic_rs::parser::ast::Endianness;
1199    ///
1200    /// let e = Endianness::Little;
1201    /// assert_eq!(e, Endianness::Little);
1202    /// ```
1203    Little,
1204    /// Big-endian byte order (most significant byte first)
1205    ///
1206    /// # Examples
1207    ///
1208    /// ```
1209    /// use libmagic_rs::parser::ast::Endianness;
1210    ///
1211    /// let e = Endianness::Big;
1212    /// assert_eq!(e, Endianness::Big);
1213    /// ```
1214    Big,
1215    /// Native system byte order (matches target architecture)
1216    ///
1217    /// # Examples
1218    ///
1219    /// ```
1220    /// use libmagic_rs::parser::ast::Endianness;
1221    ///
1222    /// let e = Endianness::Native;
1223    /// assert_eq!(e, Endianness::Native);
1224    /// ```
1225    Native,
1226}
1227
1228/// Strength modifier for magic rules
1229///
1230/// Strength modifiers adjust the default strength calculation for a rule.
1231/// They are specified using the `!:strength` directive in magic files.
1232///
1233/// # Examples
1234///
1235/// ```
1236/// use libmagic_rs::parser::ast::StrengthModifier;
1237///
1238/// let add = StrengthModifier::Add(10);      // !:strength +10
1239/// let sub = StrengthModifier::Subtract(5);  // !:strength -5
1240/// let mul = StrengthModifier::Multiply(2);  // !:strength *2
1241/// let div = StrengthModifier::Divide(2);    // !:strength /2
1242/// let set = StrengthModifier::Set(50);      // !:strength =50
1243/// ```
1244#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
1245pub enum StrengthModifier {
1246    /// Add to the default strength: `!:strength +N`
1247    Add(i32),
1248    /// Subtract from the default strength: `!:strength -N`
1249    Subtract(i32),
1250    /// Multiply the default strength: `!:strength *N`
1251    Multiply(i32),
1252    /// Divide the default strength: `!:strength /N`
1253    Divide(i32),
1254    /// Set strength to an absolute value: `!:strength =N` or `!:strength N`
1255    Set(i32),
1256}
1257
1258/// Arithmetic operation applied to a value read from the file *before* the
1259/// rule's comparison operator is evaluated.
1260///
1261/// magic(5) supports `+`, `-`, `*`, `/`, `%`, `|`, and `^` between the type
1262/// keyword and the comparison value (e.g., `lelong+1 x volume %d` reads a
1263/// long, adds 1, and formats the transformed value into the message).
1264/// Bitwise AND (`&MASK`) is *not* part of this enum because it is already
1265/// represented at the operator level via [`Operator::BitwiseAndMask`].
1266///
1267/// The operand is signed (`i64`) so that subtraction and negative multipliers
1268/// round-trip cleanly. Bitwise ops reinterpret the operand as a `u64` bit
1269/// pattern at evaluation time, matching libmagic's `apprentice.c::mconvert`.
1270#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
1271#[non_exhaustive]
1272pub enum ValueTransformOp {
1273    /// Addition (`type+N`).
1274    Add,
1275    /// Subtraction (`type-N`).
1276    Sub,
1277    /// Multiplication (`type*N`).
1278    Mul,
1279    /// Truncating integer division (`type/N`). Division by zero is rejected
1280    /// at evaluation time.
1281    Div,
1282    /// Remainder (`type%N`). Modulo by zero is rejected at evaluation time.
1283    Mod,
1284    /// Bitwise AND (`type&N`).
1285    ///
1286    /// magic(5) `&MASK` was historically encoded at the operator level
1287    /// via [`Operator::BitwiseAndMask`] (which combines mask+equal in
1288    /// one step). That encoding cannot represent rules like `lelong&0xff
1289    /// x %d` (mask + any-value, with the masked value used in format
1290    /// substitution). The parser promotes `&MASK` to this `BitAnd`
1291    /// transform when followed by another operator (`x`, `>`, `!=`, ...)
1292    /// so the read value is masked before comparison and before printf
1293    /// substitution. The legacy `&MASK VALUE` form (mask + implicit
1294    /// equal) keeps using `Operator::BitwiseAndMask` for backwards
1295    /// compatibility.
1296    BitAnd,
1297    /// Bitwise OR (`type|N`).
1298    Or,
1299    /// Bitwise XOR (`type^N`).
1300    Xor,
1301}
1302
1303/// A pre-comparison value transform: `(op, operand)`.
1304///
1305/// Applied to the value read from the file before the rule's comparison
1306/// operator runs. See [`ValueTransformOp`] for the supported operations.
1307///
1308/// # Examples
1309///
1310/// ```
1311/// use libmagic_rs::parser::ast::{ValueTransform, ValueTransformOp};
1312///
1313/// // `lelong+1` -> add 1 to the read value
1314/// let t = ValueTransform { op: ValueTransformOp::Add, operand: 1 };
1315/// assert_eq!(t.op, ValueTransformOp::Add);
1316/// assert_eq!(t.operand, 1);
1317/// ```
1318#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
1319pub struct ValueTransform {
1320    /// Operation to apply.
1321    pub op: ValueTransformOp,
1322    /// Operand to combine with the read value.
1323    pub operand: i64,
1324}
1325
1326/// Magic rule representation in the AST
1327#[derive(Debug, Clone, Serialize, Deserialize)]
1328pub struct MagicRule {
1329    /// Offset specification for where to read data
1330    pub offset: OffsetSpec,
1331    /// Type of data to read and interpret
1332    pub typ: TypeKind,
1333    /// Comparison operator to apply
1334    pub op: Operator,
1335    /// Expected value for comparison
1336    pub value: Value,
1337    /// Human-readable message for this rule
1338    pub message: String,
1339    /// Child rules that are evaluated if this rule matches
1340    pub children: Vec<MagicRule>,
1341    /// Indentation level for hierarchical rules
1342    pub level: u32,
1343    /// Optional strength modifier from `!:strength` directive
1344    pub strength_modifier: Option<StrengthModifier>,
1345    /// Optional pre-comparison value transform from a magic-file
1346    /// type-suffix like `lelong+1` or `ulequad/1073741824`. When set,
1347    /// the read value is transformed *before* `op` is evaluated and
1348    /// before the message's `%`-format substitution, so format
1349    /// specifiers see the post-transform number.
1350    ///
1351    /// `#[serde(default)]` keeps existing serialized AST snapshots
1352    /// (which never had this field) round-tripping correctly: missing
1353    /// fields deserialize to `None`, which means "no transform" --
1354    /// the historical behavior.
1355    #[serde(default)]
1356    pub value_transform: Option<ValueTransform>,
1357}
1358
1359/// Validation errors returned by [`MagicRule::validate`].
1360#[derive(Debug, thiserror::Error, PartialEq, Eq)]
1361#[non_exhaustive]
1362pub enum MagicRuleValidationError {
1363    /// Rule message is empty. Messages are user-facing and required
1364    /// for meaningful output.
1365    #[error("rule message must not be empty")]
1366    EmptyMessage,
1367
1368    /// The child rule at `child_index` has `level <= self.level`,
1369    /// violating the "children must nest deeper than the parent"
1370    /// invariant of the hierarchical indentation-based DSL.
1371    #[error(
1372        "child rule at index {child_index} has level {child_level}, \
1373         must be greater than parent level {parent_level}"
1374    )]
1375    InvalidChildLevel {
1376        /// Index of the offending child in `self.children`.
1377        child_index: usize,
1378        /// Level of the child rule.
1379        child_level: u32,
1380        /// Level of the parent rule.
1381        parent_level: u32,
1382    },
1383
1384    /// Rule `level` exceeds the maximum supported depth. The limit is a
1385    /// hardening mechanism against stack overflow during deep recursion;
1386    /// libmagic files in the wild rarely go beyond 10 levels.
1387    #[error("rule level {level} exceeds maximum supported depth {max}")]
1388    LevelTooDeep {
1389        /// The invalid level value.
1390        level: u32,
1391        /// The maximum allowed depth.
1392        max: u32,
1393    },
1394}
1395
1396impl MagicRule {
1397    /// Hard structural ceiling on rule `level`.
1398    ///
1399    /// This is a conservative upper bound enforced by
1400    /// [`MagicRule::validate`] to keep the AST shape sane: real
1401    /// magic files in the wild rarely exceed ~10 levels of nesting,
1402    /// so rejecting rules with `level > 1000` catches obviously
1403    /// pathological input at construction time without constraining
1404    /// any legitimate rule.
1405    ///
1406    /// This ceiling is **independent of** the evaluator's
1407    /// `EvaluationConfig::max_recursion_depth` (default 20), which
1408    /// is the *runtime* recursion guard applied during rule
1409    /// evaluation. The evaluator limit is the first one that fires
1410    /// in practice -- a rule tree with 50 levels passes this
1411    /// structural check but is aborted by the evaluator long before
1412    /// reaching `MAX_LEVEL`. The two limits serve different purposes:
1413    /// `MAX_LEVEL` is an AST-shape sanity check, and
1414    /// `max_recursion_depth` is a per-evaluation resource bound.
1415    pub const MAX_LEVEL: u32 = 1000;
1416
1417    /// Construct a top-level rule with no children and no strength
1418    /// modifier.
1419    ///
1420    /// This is the most common constructor for programmatically building
1421    /// rules outside the parser. To add children, mutate
1422    /// [`MagicRule::children`] directly, or use [`MagicRule::with_children`].
1423    /// To set a strength modifier, use
1424    /// [`MagicRule::with_strength_modifier`].
1425    ///
1426    /// # Examples
1427    ///
1428    /// ```rust
1429    /// use libmagic_rs::{MagicRule, OffsetSpec, Operator, TypeKind, Value};
1430    ///
1431    /// let rule = MagicRule::new(
1432    ///     OffsetSpec::Absolute(0),
1433    ///     TypeKind::Byte { signed: false },
1434    ///     Operator::Equal,
1435    ///     Value::Uint(0x7f),
1436    ///     "ELF magic byte".to_string(),
1437    /// );
1438    /// assert_eq!(rule.level, 0);
1439    /// assert!(rule.children.is_empty());
1440    /// assert!(rule.validate().is_ok());
1441    /// ```
1442    #[must_use]
1443    pub fn new(
1444        offset: OffsetSpec,
1445        typ: TypeKind,
1446        op: Operator,
1447        value: Value,
1448        message: String,
1449    ) -> Self {
1450        Self {
1451            offset,
1452            typ,
1453            op,
1454            value,
1455            message,
1456            children: vec![],
1457            level: 0,
1458            strength_modifier: None,
1459            value_transform: None,
1460        }
1461    }
1462
1463    /// Replace `self.children` with the given children and return the
1464    /// modified rule. Builder-style for chaining.
1465    #[must_use]
1466    pub fn with_children(mut self, children: Vec<MagicRule>) -> Self {
1467        self.children = children;
1468        self
1469    }
1470
1471    /// Set `self.strength_modifier` to the given value and return the
1472    /// modified rule. Builder-style for chaining.
1473    #[must_use]
1474    pub const fn with_strength_modifier(mut self, modifier: StrengthModifier) -> Self {
1475        self.strength_modifier = Some(modifier);
1476        self
1477    }
1478
1479    /// Set `self.level` to the given value and return the modified rule.
1480    /// Builder-style for chaining; typically used only when constructing
1481    /// child rules programmatically.
1482    #[must_use]
1483    pub const fn with_level(mut self, level: u32) -> Self {
1484        self.level = level;
1485        self
1486    }
1487
1488    /// Validate structural invariants of the rule.
1489    ///
1490    /// This checks invariants that the parser enforces automatically but
1491    /// that programmatic constructors (especially via serde deserialize)
1492    /// can violate:
1493    ///
1494    /// * Message must not be empty.
1495    /// * `level` must not exceed [`Self::MAX_LEVEL`].
1496    /// * Every child's `level` must be strictly greater than
1497    ///   `self.level`, and each child must recursively validate.
1498    ///
1499    /// This does *not* validate that `value` is shape-compatible with
1500    /// `typ` (e.g., a `Value::Uint` against a `TypeKind::String`); such
1501    /// mismatches are coerced or rejected by the evaluator at match time.
1502    ///
1503    /// # Errors
1504    ///
1505    /// Returns [`MagicRuleValidationError`] describing the first
1506    /// invariant violation encountered.
1507    ///
1508    /// # Examples
1509    ///
1510    /// ```rust
1511    /// use libmagic_rs::{MagicRule, OffsetSpec, Operator, TypeKind, Value};
1512    ///
1513    /// let rule = MagicRule::new(
1514    ///     OffsetSpec::Absolute(0),
1515    ///     TypeKind::Byte { signed: false },
1516    ///     Operator::Equal,
1517    ///     Value::Uint(0),
1518    ///     "zero byte".to_string(),
1519    /// );
1520    /// assert!(rule.validate().is_ok());
1521    /// ```
1522    pub fn validate(&self) -> Result<(), MagicRuleValidationError> {
1523        if self.message.is_empty() {
1524            return Err(MagicRuleValidationError::EmptyMessage);
1525        }
1526        if self.level > Self::MAX_LEVEL {
1527            return Err(MagicRuleValidationError::LevelTooDeep {
1528                level: self.level,
1529                max: Self::MAX_LEVEL,
1530            });
1531        }
1532        for (child_index, child) in self.children.iter().enumerate() {
1533            if child.level <= self.level {
1534                return Err(MagicRuleValidationError::InvalidChildLevel {
1535                    child_index,
1536                    child_level: child.level,
1537                    parent_level: self.level,
1538                });
1539            }
1540            child.validate()?;
1541        }
1542        Ok(())
1543    }
1544}
1545
1546#[cfg(test)]
1547mod tests {
1548    use super::*;
1549
1550    #[test]
1551    fn test_magic_rule_new_defaults() {
1552        let rule = MagicRule::new(
1553            OffsetSpec::Absolute(0),
1554            TypeKind::Byte { signed: false },
1555            Operator::Equal,
1556            Value::Uint(0x7f),
1557            "ELF".to_string(),
1558        );
1559        assert_eq!(rule.level, 0);
1560        assert!(rule.children.is_empty());
1561        assert!(rule.strength_modifier.is_none());
1562        assert!(rule.validate().is_ok());
1563    }
1564
1565    #[test]
1566    fn test_magic_rule_builder_chain() {
1567        let child = MagicRule::new(
1568            OffsetSpec::Absolute(4),
1569            TypeKind::Byte { signed: false },
1570            Operator::Equal,
1571            Value::Uint(2),
1572            "64-bit".to_string(),
1573        )
1574        .with_level(1);
1575        let parent = MagicRule::new(
1576            OffsetSpec::Absolute(0),
1577            TypeKind::Byte { signed: false },
1578            Operator::Equal,
1579            Value::Uint(0x7f),
1580            "ELF".to_string(),
1581        )
1582        .with_children(vec![child])
1583        .with_strength_modifier(StrengthModifier::Add(10));
1584        assert_eq!(parent.children.len(), 1);
1585        assert_eq!(parent.strength_modifier, Some(StrengthModifier::Add(10)));
1586        assert!(parent.validate().is_ok());
1587    }
1588
1589    #[test]
1590    fn test_magic_rule_validate_empty_message_rejected() {
1591        let rule = MagicRule::new(
1592            OffsetSpec::Absolute(0),
1593            TypeKind::Byte { signed: false },
1594            Operator::Equal,
1595            Value::Uint(0),
1596            String::new(),
1597        );
1598        assert_eq!(rule.validate(), Err(MagicRuleValidationError::EmptyMessage));
1599    }
1600
1601    #[test]
1602    fn test_magic_rule_validate_child_level_must_be_deeper() {
1603        let child_same_level = MagicRule::new(
1604            OffsetSpec::Absolute(4),
1605            TypeKind::Byte { signed: false },
1606            Operator::Equal,
1607            Value::Uint(2),
1608            "child".to_string(),
1609        ); // level = 0, same as parent
1610        let parent = MagicRule::new(
1611            OffsetSpec::Absolute(0),
1612            TypeKind::Byte { signed: false },
1613            Operator::Equal,
1614            Value::Uint(0x7f),
1615            "parent".to_string(),
1616        )
1617        .with_children(vec![child_same_level]);
1618        assert_eq!(
1619            parent.validate(),
1620            Err(MagicRuleValidationError::InvalidChildLevel {
1621                child_index: 0,
1622                child_level: 0,
1623                parent_level: 0,
1624            })
1625        );
1626    }
1627
1628    #[test]
1629    fn test_magic_rule_validate_level_too_deep() {
1630        let rule = MagicRule::new(
1631            OffsetSpec::Absolute(0),
1632            TypeKind::Byte { signed: false },
1633            Operator::Equal,
1634            Value::Uint(0),
1635            "deep".to_string(),
1636        )
1637        .with_level(MagicRule::MAX_LEVEL + 1);
1638        assert_eq!(
1639            rule.validate(),
1640            Err(MagicRuleValidationError::LevelTooDeep {
1641                level: MagicRule::MAX_LEVEL + 1,
1642                max: MagicRule::MAX_LEVEL,
1643            })
1644        );
1645    }
1646
1647    #[test]
1648    fn test_offset_spec_absolute() {
1649        let offset = OffsetSpec::Absolute(42);
1650        assert_eq!(offset, OffsetSpec::Absolute(42));
1651
1652        // Test negative offset
1653        let negative = OffsetSpec::Absolute(-10);
1654        assert_eq!(negative, OffsetSpec::Absolute(-10));
1655    }
1656
1657    #[test]
1658    fn test_offset_spec_indirect() {
1659        let indirect = OffsetSpec::Indirect {
1660            base_offset: 0x20,
1661            base_relative: false,
1662            pointer_type: TypeKind::Long {
1663                endian: Endianness::Little,
1664                signed: false,
1665            },
1666            adjustment: 4,
1667            adjustment_op: IndirectAdjustmentOp::Add,
1668            result_relative: false,
1669            endian: Endianness::Little,
1670        };
1671
1672        match indirect {
1673            OffsetSpec::Indirect {
1674                base_offset,
1675                adjustment,
1676                ..
1677            } => {
1678                assert_eq!(base_offset, 0x20);
1679                assert_eq!(adjustment, 4);
1680            }
1681            _ => panic!("Expected Indirect variant"),
1682        }
1683    }
1684
1685    #[test]
1686    fn test_offset_spec_relative() {
1687        let relative = OffsetSpec::Relative(8);
1688        assert_eq!(relative, OffsetSpec::Relative(8));
1689
1690        // Test negative relative offset
1691        let negative_relative = OffsetSpec::Relative(-4);
1692        assert_eq!(negative_relative, OffsetSpec::Relative(-4));
1693    }
1694
1695    #[test]
1696    fn test_offset_spec_from_end() {
1697        let from_end = OffsetSpec::FromEnd(-16);
1698        assert_eq!(from_end, OffsetSpec::FromEnd(-16));
1699
1700        // Test positive from_end (though unusual)
1701        let positive_from_end = OffsetSpec::FromEnd(8);
1702        assert_eq!(positive_from_end, OffsetSpec::FromEnd(8));
1703    }
1704
1705    #[test]
1706    fn test_offset_spec_debug() {
1707        let offset = OffsetSpec::Absolute(100);
1708        let debug_str = format!("{offset:?}");
1709        assert!(debug_str.contains("Absolute"));
1710        assert!(debug_str.contains("100"));
1711    }
1712
1713    #[test]
1714    fn test_offset_spec_clone() {
1715        let original = OffsetSpec::Indirect {
1716            base_offset: 0x10,
1717            base_relative: false,
1718            pointer_type: TypeKind::Short {
1719                endian: Endianness::Big,
1720                signed: true,
1721            },
1722            adjustment: -2,
1723            adjustment_op: IndirectAdjustmentOp::Add,
1724            result_relative: false,
1725            endian: Endianness::Big,
1726        };
1727
1728        let cloned = original.clone();
1729        assert_eq!(original, cloned);
1730    }
1731
1732    #[test]
1733    fn test_offset_spec_serialization() {
1734        let offset = OffsetSpec::Absolute(42);
1735
1736        // Test JSON serialization
1737        let json = serde_json::to_string(&offset).expect("Failed to serialize");
1738        let deserialized: OffsetSpec = serde_json::from_str(&json).expect("Failed to deserialize");
1739
1740        assert_eq!(offset, deserialized);
1741    }
1742
1743    #[test]
1744    fn test_offset_spec_indirect_serialization() {
1745        let indirect = OffsetSpec::Indirect {
1746            base_offset: 0x100,
1747            base_relative: false,
1748            pointer_type: TypeKind::Long {
1749                endian: Endianness::Native,
1750                signed: false,
1751            },
1752            adjustment: 12,
1753            adjustment_op: IndirectAdjustmentOp::Add,
1754            result_relative: false,
1755            endian: Endianness::Native,
1756        };
1757
1758        // Test JSON serialization for complex variant
1759        let json = serde_json::to_string(&indirect).expect("Failed to serialize");
1760        let deserialized: OffsetSpec = serde_json::from_str(&json).expect("Failed to deserialize");
1761
1762        assert_eq!(indirect, deserialized);
1763    }
1764
1765    #[test]
1766    fn test_all_offset_spec_variants() {
1767        let variants = [
1768            OffsetSpec::Absolute(0),
1769            OffsetSpec::Absolute(-100),
1770            OffsetSpec::Indirect {
1771                base_offset: 0x20,
1772                base_relative: false,
1773                pointer_type: TypeKind::Byte { signed: true },
1774                adjustment: 0,
1775                adjustment_op: IndirectAdjustmentOp::Add,
1776                result_relative: false,
1777                endian: Endianness::Little,
1778            },
1779            OffsetSpec::Relative(50),
1780            OffsetSpec::Relative(-25),
1781            OffsetSpec::FromEnd(-8),
1782            OffsetSpec::FromEnd(4),
1783        ];
1784
1785        // Test that all variants can be created and are distinct
1786        for (i, variant) in variants.iter().enumerate() {
1787            for (j, other) in variants.iter().enumerate() {
1788                if i != j {
1789                    assert_ne!(
1790                        variant, other,
1791                        "Variants at indices {i} and {j} should be different"
1792                    );
1793                }
1794            }
1795        }
1796    }
1797
1798    #[test]
1799    fn test_endianness_variants() {
1800        let endianness_values = vec![Endianness::Little, Endianness::Big, Endianness::Native];
1801
1802        for endian in endianness_values {
1803            let indirect = OffsetSpec::Indirect {
1804                base_offset: 0,
1805                base_relative: false,
1806                pointer_type: TypeKind::Long {
1807                    endian,
1808                    signed: false,
1809                },
1810                adjustment: 0,
1811                adjustment_op: IndirectAdjustmentOp::Add,
1812                result_relative: false,
1813                endian,
1814            };
1815
1816            // Verify the endianness is preserved
1817            match indirect {
1818                OffsetSpec::Indirect {
1819                    endian: actual_endian,
1820                    ..
1821                } => {
1822                    assert_eq!(endian, actual_endian);
1823                }
1824                _ => panic!("Expected Indirect variant"),
1825            }
1826        }
1827    }
1828
1829    // Value enum tests
1830    #[test]
1831    fn test_value_uint() {
1832        let value = Value::Uint(42);
1833        assert_eq!(value, Value::Uint(42));
1834
1835        // Test large values
1836        let large_value = Value::Uint(u64::MAX);
1837        assert_eq!(large_value, Value::Uint(u64::MAX));
1838    }
1839
1840    #[test]
1841    fn test_value_int() {
1842        let positive = Value::Int(100);
1843        assert_eq!(positive, Value::Int(100));
1844
1845        let negative = Value::Int(-50);
1846        assert_eq!(negative, Value::Int(-50));
1847
1848        // Test extreme values
1849        let max_int = Value::Int(i64::MAX);
1850        let min_int = Value::Int(i64::MIN);
1851        assert_eq!(max_int, Value::Int(i64::MAX));
1852        assert_eq!(min_int, Value::Int(i64::MIN));
1853    }
1854
1855    #[test]
1856    fn test_value_bytes() {
1857        let empty_bytes = Value::Bytes(vec![]);
1858        assert_eq!(empty_bytes, Value::Bytes(vec![]));
1859
1860        let some_bytes = Value::Bytes(vec![0x7f, 0x45, 0x4c, 0x46]);
1861        assert_eq!(some_bytes, Value::Bytes(vec![0x7f, 0x45, 0x4c, 0x46]));
1862
1863        // Test that different byte sequences are not equal
1864        let other_bytes = Value::Bytes(vec![0x50, 0x4b, 0x03, 0x04]);
1865        assert_ne!(some_bytes, other_bytes);
1866    }
1867
1868    #[test]
1869    fn test_value_string() {
1870        let empty_string = Value::String(String::new());
1871        assert_eq!(empty_string, Value::String(String::new()));
1872
1873        let hello = Value::String("Hello, World!".to_string());
1874        assert_eq!(hello, Value::String("Hello, World!".to_string()));
1875
1876        // Test Unicode strings
1877        let unicode = Value::String("🦀 Rust".to_string());
1878        assert_eq!(unicode, Value::String("🦀 Rust".to_string()));
1879    }
1880
1881    #[test]
1882    fn test_value_comparison() {
1883        // Test that different value types are not equal
1884        let uint_val = Value::Uint(42);
1885        let int_val = Value::Int(42);
1886        let float_val = Value::Float(42.0);
1887        let bytes_val = Value::Bytes(vec![42]);
1888        let string_val = Value::String("42".to_string());
1889
1890        assert_ne!(uint_val, int_val);
1891        assert_ne!(uint_val, float_val);
1892        assert_ne!(uint_val, bytes_val);
1893        assert_ne!(uint_val, string_val);
1894        assert_ne!(int_val, float_val);
1895        assert_ne!(int_val, bytes_val);
1896        assert_ne!(int_val, string_val);
1897        assert_ne!(float_val, bytes_val);
1898        assert_ne!(float_val, string_val);
1899        assert_ne!(bytes_val, string_val);
1900    }
1901
1902    #[test]
1903    fn test_value_debug() {
1904        let uint_val = Value::Uint(123);
1905        let debug_str = format!("{uint_val:?}");
1906        assert!(debug_str.contains("Uint"));
1907        assert!(debug_str.contains("123"));
1908
1909        let string_val = Value::String("test".to_string());
1910        let debug_str = format!("{string_val:?}");
1911        assert!(debug_str.contains("String"));
1912        assert!(debug_str.contains("test"));
1913    }
1914
1915    #[test]
1916    fn test_value_clone() {
1917        let original = Value::Bytes(vec![1, 2, 3, 4]);
1918        let cloned = original.clone();
1919        assert_eq!(original, cloned);
1920
1921        // Verify they are independent copies
1922        match (original, cloned) {
1923            (Value::Bytes(orig_bytes), Value::Bytes(cloned_bytes)) => {
1924                assert_eq!(orig_bytes, cloned_bytes);
1925                // They should have the same content but be different Vec instances
1926            }
1927            _ => panic!("Expected Bytes variants"),
1928        }
1929    }
1930
1931    #[test]
1932    fn test_value_float() {
1933        let value = Value::Float(3.125);
1934        assert_eq!(value, Value::Float(3.125));
1935
1936        let negative = Value::Float(-1.5);
1937        assert_eq!(negative, Value::Float(-1.5));
1938
1939        let zero = Value::Float(0.0);
1940        assert_eq!(zero, Value::Float(0.0));
1941    }
1942
1943    #[test]
1944    fn test_value_serialization() {
1945        let values = vec![
1946            Value::Uint(42),
1947            Value::Int(-100),
1948            Value::Float(3.125),
1949            Value::Bytes(vec![0x7f, 0x45, 0x4c, 0x46]),
1950            Value::String("ELF executable".to_string()),
1951        ];
1952
1953        for value in values {
1954            // Test JSON serialization
1955            let json = serde_json::to_string(&value).expect("Failed to serialize Value");
1956            let deserialized: Value =
1957                serde_json::from_str(&json).expect("Failed to deserialize Value");
1958            assert_eq!(value, deserialized);
1959        }
1960    }
1961
1962    #[test]
1963    fn test_value_serialization_edge_cases() {
1964        // Test empty collections
1965        let empty_bytes = Value::Bytes(vec![]);
1966        let json = serde_json::to_string(&empty_bytes).expect("Failed to serialize empty bytes");
1967        let deserialized: Value =
1968            serde_json::from_str(&json).expect("Failed to deserialize empty bytes");
1969        assert_eq!(empty_bytes, deserialized);
1970
1971        let empty_string = Value::String(String::new());
1972        let json = serde_json::to_string(&empty_string).expect("Failed to serialize empty string");
1973        let deserialized: Value =
1974            serde_json::from_str(&json).expect("Failed to deserialize empty string");
1975        assert_eq!(empty_string, deserialized);
1976
1977        // Test extreme values
1978        let max_uint = Value::Uint(u64::MAX);
1979        let json = serde_json::to_string(&max_uint).expect("Failed to serialize max uint");
1980        let deserialized: Value =
1981            serde_json::from_str(&json).expect("Failed to deserialize max uint");
1982        assert_eq!(max_uint, deserialized);
1983
1984        let min_int = Value::Int(i64::MIN);
1985        let json = serde_json::to_string(&min_int).expect("Failed to serialize min int");
1986        let deserialized: Value =
1987            serde_json::from_str(&json).expect("Failed to deserialize min int");
1988        assert_eq!(min_int, deserialized);
1989    }
1990
1991    // TypeKind tests
1992    #[test]
1993    fn test_type_kind_byte() {
1994        let byte_type = TypeKind::Byte { signed: true };
1995        assert_eq!(byte_type, TypeKind::Byte { signed: true });
1996    }
1997
1998    #[test]
1999    fn test_type_kind_short() {
2000        let short_little_endian = TypeKind::Short {
2001            endian: Endianness::Little,
2002            signed: false,
2003        };
2004        let short_big_endian = TypeKind::Short {
2005            endian: Endianness::Big,
2006            signed: true,
2007        };
2008
2009        assert_ne!(short_little_endian, short_big_endian);
2010        assert_eq!(short_little_endian, short_little_endian.clone());
2011    }
2012
2013    #[test]
2014    fn test_type_kind_long() {
2015        let long_native = TypeKind::Long {
2016            endian: Endianness::Native,
2017            signed: true,
2018        };
2019
2020        match long_native {
2021            TypeKind::Long { endian, signed } => {
2022                assert_eq!(endian, Endianness::Native);
2023                assert!(signed);
2024            }
2025            _ => panic!("Expected Long variant"),
2026        }
2027    }
2028
2029    #[test]
2030    fn test_type_kind_string() {
2031        let unlimited_string = TypeKind::String {
2032            max_length: None,
2033            flags: StringFlags::default(),
2034        };
2035        let limited_string = TypeKind::String {
2036            max_length: Some(256),
2037            flags: StringFlags::default(),
2038        };
2039
2040        assert_ne!(unlimited_string, limited_string);
2041        assert_eq!(unlimited_string, unlimited_string.clone());
2042    }
2043
2044    #[test]
2045    fn test_type_kind_serialization() {
2046        let types = vec![
2047            TypeKind::Byte { signed: true },
2048            TypeKind::Short {
2049                endian: Endianness::Little,
2050                signed: false,
2051            },
2052            TypeKind::Long {
2053                endian: Endianness::Big,
2054                signed: true,
2055            },
2056            TypeKind::Quad {
2057                endian: Endianness::Little,
2058                signed: false,
2059            },
2060            TypeKind::Quad {
2061                endian: Endianness::Big,
2062                signed: true,
2063            },
2064            TypeKind::Float {
2065                endian: Endianness::Native,
2066            },
2067            TypeKind::Float {
2068                endian: Endianness::Big,
2069            },
2070            TypeKind::Double {
2071                endian: Endianness::Little,
2072            },
2073            TypeKind::Double {
2074                endian: Endianness::Native,
2075            },
2076            TypeKind::Date {
2077                endian: Endianness::Big,
2078                utc: true,
2079            },
2080            TypeKind::Date {
2081                endian: Endianness::Little,
2082                utc: false,
2083            },
2084            TypeKind::QDate {
2085                endian: Endianness::Native,
2086                utc: true,
2087            },
2088            TypeKind::QDate {
2089                endian: Endianness::Big,
2090                utc: false,
2091            },
2092            TypeKind::String {
2093                max_length: None,
2094                flags: StringFlags::default(),
2095            },
2096            TypeKind::String {
2097                max_length: Some(128),
2098                flags: StringFlags::default(),
2099            },
2100            TypeKind::PString {
2101                max_length: None,
2102                length_width: PStringLengthWidth::OneByte,
2103                length_includes_itself: false,
2104            },
2105            TypeKind::PString {
2106                max_length: Some(64),
2107                length_width: PStringLengthWidth::OneByte,
2108                length_includes_itself: false,
2109            },
2110            TypeKind::PString {
2111                max_length: None,
2112                length_width: PStringLengthWidth::TwoByteBE,
2113                length_includes_itself: true,
2114            },
2115            TypeKind::PString {
2116                max_length: Some(128),
2117                length_width: PStringLengthWidth::FourByteLE,
2118                length_includes_itself: false,
2119            },
2120        ];
2121
2122        for typ in types {
2123            let json = serde_json::to_string(&typ).expect("Failed to serialize TypeKind");
2124            let deserialized: TypeKind =
2125                serde_json::from_str(&json).expect("Failed to deserialize TypeKind");
2126            assert_eq!(typ, deserialized);
2127        }
2128    }
2129
2130    // Operator tests
2131    #[test]
2132    fn test_operator_variants() {
2133        let operators = [
2134            Operator::Equal,
2135            Operator::NotEqual,
2136            Operator::BitwiseAnd,
2137            Operator::BitwiseXor,
2138            Operator::BitwiseNot,
2139            Operator::AnyValue,
2140        ];
2141
2142        for (i, op) in operators.iter().enumerate() {
2143            for (j, other) in operators.iter().enumerate() {
2144                if i == j {
2145                    assert_eq!(op, other);
2146                } else {
2147                    assert_ne!(op, other);
2148                }
2149            }
2150        }
2151    }
2152
2153    #[test]
2154    fn test_operator_serialization() {
2155        let operators = vec![
2156            Operator::Equal,
2157            Operator::NotEqual,
2158            Operator::BitwiseAnd,
2159            Operator::BitwiseXor,
2160            Operator::BitwiseNot,
2161            Operator::AnyValue,
2162        ];
2163
2164        for op in operators {
2165            let json = serde_json::to_string(&op).expect("Failed to serialize Operator");
2166            let deserialized: Operator =
2167                serde_json::from_str(&json).expect("Failed to deserialize Operator");
2168            assert_eq!(op, deserialized);
2169        }
2170    }
2171
2172    // MagicRule tests
2173    #[test]
2174    fn test_magic_rule_creation() {
2175        let rule = MagicRule {
2176            offset: OffsetSpec::Absolute(0),
2177            typ: TypeKind::Byte { signed: true },
2178            op: Operator::Equal,
2179            value: Value::Uint(0x7f),
2180            message: "ELF magic".to_string(),
2181            children: vec![],
2182            level: 0,
2183            strength_modifier: None,
2184            value_transform: None,
2185        };
2186
2187        assert_eq!(rule.message, "ELF magic");
2188        assert_eq!(rule.level, 0);
2189        assert!(rule.children.is_empty());
2190    }
2191
2192    #[test]
2193    fn test_magic_rule_with_children() {
2194        let child_rule = MagicRule {
2195            offset: OffsetSpec::Absolute(4),
2196            typ: TypeKind::Byte { signed: true },
2197            op: Operator::Equal,
2198            value: Value::Uint(1),
2199            message: "32-bit".to_string(),
2200            children: vec![],
2201            level: 1,
2202            strength_modifier: None,
2203            value_transform: None,
2204        };
2205
2206        let parent_rule = MagicRule {
2207            offset: OffsetSpec::Absolute(0),
2208            typ: TypeKind::Long {
2209                endian: Endianness::Little,
2210                signed: false,
2211            },
2212            op: Operator::Equal,
2213            value: Value::Bytes(vec![0x7f, 0x45, 0x4c, 0x46]),
2214            message: "ELF executable".to_string(),
2215            children: vec![child_rule],
2216            level: 0,
2217            strength_modifier: None,
2218            value_transform: None,
2219        };
2220
2221        assert_eq!(parent_rule.children.len(), 1);
2222        assert_eq!(parent_rule.children[0].level, 1);
2223        assert_eq!(parent_rule.children[0].message, "32-bit");
2224    }
2225
2226    #[test]
2227    fn test_magic_rule_serialization() {
2228        let rule = MagicRule {
2229            offset: OffsetSpec::Absolute(16),
2230            typ: TypeKind::Short {
2231                endian: Endianness::Little,
2232                signed: false,
2233            },
2234            op: Operator::NotEqual,
2235            value: Value::Uint(0),
2236            message: "Non-zero short value".to_string(),
2237            children: vec![],
2238            level: 2,
2239            strength_modifier: None,
2240            value_transform: None,
2241        };
2242
2243        let json = serde_json::to_string(&rule).expect("Failed to serialize MagicRule");
2244        let deserialized: MagicRule =
2245            serde_json::from_str(&json).expect("Failed to deserialize MagicRule");
2246
2247        assert_eq!(rule.message, deserialized.message);
2248        assert_eq!(rule.level, deserialized.level);
2249        assert_eq!(rule.children.len(), deserialized.children.len());
2250    }
2251
2252    // StrengthModifier tests
2253    #[test]
2254    fn test_strength_modifier_variants() {
2255        let add = StrengthModifier::Add(10);
2256        let sub = StrengthModifier::Subtract(5);
2257        let mul = StrengthModifier::Multiply(2);
2258        let div = StrengthModifier::Divide(2);
2259        let set = StrengthModifier::Set(50);
2260
2261        // Test that each variant has the correct inner value
2262        assert_eq!(add, StrengthModifier::Add(10));
2263        assert_eq!(sub, StrengthModifier::Subtract(5));
2264        assert_eq!(mul, StrengthModifier::Multiply(2));
2265        assert_eq!(div, StrengthModifier::Divide(2));
2266        assert_eq!(set, StrengthModifier::Set(50));
2267
2268        // Test that different variants are not equal
2269        assert_ne!(add, sub);
2270        assert_ne!(mul, div);
2271        assert_ne!(set, add);
2272    }
2273
2274    #[test]
2275    fn test_strength_modifier_negative_values() {
2276        let add_negative = StrengthModifier::Add(-10);
2277        let sub_negative = StrengthModifier::Subtract(-5);
2278        let set_negative = StrengthModifier::Set(-50);
2279
2280        assert_eq!(add_negative, StrengthModifier::Add(-10));
2281        assert_eq!(sub_negative, StrengthModifier::Subtract(-5));
2282        assert_eq!(set_negative, StrengthModifier::Set(-50));
2283    }
2284
2285    #[test]
2286    fn test_strength_modifier_serialization() {
2287        let modifiers = vec![
2288            StrengthModifier::Add(10),
2289            StrengthModifier::Subtract(5),
2290            StrengthModifier::Multiply(2),
2291            StrengthModifier::Divide(3),
2292            StrengthModifier::Set(100),
2293        ];
2294
2295        for modifier in modifiers {
2296            let json =
2297                serde_json::to_string(&modifier).expect("Failed to serialize StrengthModifier");
2298            let deserialized: StrengthModifier =
2299                serde_json::from_str(&json).expect("Failed to deserialize StrengthModifier");
2300            assert_eq!(modifier, deserialized);
2301        }
2302    }
2303
2304    #[test]
2305    fn test_strength_modifier_debug() {
2306        let modifier = StrengthModifier::Add(25);
2307        let debug_str = format!("{modifier:?}");
2308        assert!(debug_str.contains("Add"));
2309        assert!(debug_str.contains("25"));
2310    }
2311
2312    #[test]
2313    fn test_strength_modifier_clone() {
2314        let original = StrengthModifier::Multiply(4);
2315        let cloned = original;
2316        assert_eq!(original, cloned);
2317    }
2318
2319    #[test]
2320    fn test_magic_rule_with_strength_modifier() {
2321        let rule = MagicRule {
2322            offset: OffsetSpec::Absolute(0),
2323            typ: TypeKind::Byte { signed: true },
2324            op: Operator::Equal,
2325            value: Value::Uint(0x7f),
2326            message: "ELF magic".to_string(),
2327            children: vec![],
2328            level: 0,
2329            strength_modifier: Some(StrengthModifier::Add(20)),
2330            value_transform: None,
2331        };
2332
2333        assert_eq!(rule.strength_modifier, Some(StrengthModifier::Add(20)));
2334
2335        // Test serialization with strength_modifier
2336        let json = serde_json::to_string(&rule).expect("Failed to serialize MagicRule");
2337        let deserialized: MagicRule =
2338            serde_json::from_str(&json).expect("Failed to deserialize MagicRule");
2339        assert_eq!(rule.strength_modifier, deserialized.strength_modifier);
2340    }
2341
2342    #[test]
2343    fn test_magic_rule_without_strength_modifier() {
2344        let rule = MagicRule {
2345            offset: OffsetSpec::Absolute(0),
2346            typ: TypeKind::Byte { signed: true },
2347            op: Operator::Equal,
2348            value: Value::Uint(0x7f),
2349            message: "ELF magic".to_string(),
2350            children: vec![],
2351            level: 0,
2352            strength_modifier: None,
2353            value_transform: None,
2354        };
2355
2356        assert_eq!(rule.strength_modifier, None);
2357    }
2358
2359    // MetaType tests
2360    #[test]
2361    fn test_meta_type_variants_debug_clone_eq() {
2362        let cases = [
2363            MetaType::Default,
2364            MetaType::Clear,
2365            MetaType::Indirect,
2366            MetaType::Offset,
2367            MetaType::Name("part2".to_string()),
2368            MetaType::Use("part2".to_string()),
2369        ];
2370
2371        for (i, variant) in cases.iter().enumerate() {
2372            // Debug formatting is non-empty
2373            let debug_str = format!("{variant:?}");
2374            assert!(
2375                !debug_str.is_empty(),
2376                "Debug format must be non-empty for variant at index {i}"
2377            );
2378
2379            // Clone round-trip preserves equality
2380            let cloned = variant.clone();
2381            assert_eq!(
2382                variant, &cloned,
2383                "Clone must preserve equality for variant at index {i}"
2384            );
2385
2386            // Distinct variants are not equal
2387            for (j, other) in cases.iter().enumerate() {
2388                if i == j {
2389                    assert_eq!(variant, other);
2390                } else {
2391                    assert_ne!(
2392                        variant, other,
2393                        "Variants at indices {i} and {j} must differ"
2394                    );
2395                }
2396            }
2397        }
2398    }
2399
2400    #[test]
2401    fn test_meta_type_serde_roundtrip() {
2402        let cases = [
2403            MetaType::Default,
2404            MetaType::Clear,
2405            MetaType::Indirect,
2406            MetaType::Offset,
2407            MetaType::Name("foo".to_string()),
2408            MetaType::Use("bar".to_string()),
2409        ];
2410
2411        for variant in cases {
2412            let json = serde_json::to_string(&variant).expect("serialize MetaType");
2413            let deserialized: MetaType = serde_json::from_str(&json).expect("deserialize MetaType");
2414            assert_eq!(variant, deserialized);
2415        }
2416    }
2417
2418    #[test]
2419    fn test_type_kind_meta_bit_width_is_none() {
2420        let cases = [
2421            MetaType::Default,
2422            MetaType::Clear,
2423            MetaType::Indirect,
2424            MetaType::Offset,
2425            MetaType::Name("x".to_string()),
2426            MetaType::Use("x".to_string()),
2427        ];
2428        for meta in cases {
2429            let kind = TypeKind::Meta(meta);
2430            assert_eq!(
2431                kind.bit_width(),
2432                None,
2433                "TypeKind::Meta must have no bit width: {kind:?}"
2434            );
2435        }
2436    }
2437}