libmagic_rs/parser/ast.rs
1// Copyright (c) 2025-2026 the libmagic-rs contributors
2// SPDX-License-Identifier: Apache-2.0
3
4//! Abstract Syntax Tree definitions for magic rules
5//!
6//! This module contains the core data structures that represent parsed magic rules
7//! and their components, including offset specifications, type kinds, operators, and values.
8
9use serde::{Deserialize, Serialize};
10use std::num::{NonZeroU32, NonZeroUsize};
11
12/// The width of the length prefix for Pascal strings.
13///
14/// Uppercase suffix letters (`/H`, `/L`) indicate big-endian byte order.
15/// Lowercase suffix letters (`/h`, `/l`) indicate little-endian byte order.
16///
17/// # Examples
18///
19/// ```
20/// use libmagic_rs::parser::ast::PStringLengthWidth;
21/// let width = PStringLengthWidth::OneByte;
22/// assert_eq!(width.byte_count(), 1);
23///
24/// let width = PStringLengthWidth::TwoByteBE;
25/// assert_eq!(width.byte_count(), 2);
26///
27/// let width = PStringLengthWidth::FourByteLE;
28/// assert_eq!(width.byte_count(), 4);
29/// ```
30#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
31#[allow(clippy::enum_variant_names)]
32#[non_exhaustive]
33pub enum PStringLengthWidth {
34 /// 1-byte length prefix (default, `/B` suffix)
35 ///
36 /// # Examples
37 ///
38 /// ```
39 /// use libmagic_rs::parser::ast::PStringLengthWidth;
40 /// let width = PStringLengthWidth::OneByte;
41 /// assert_eq!(width.byte_count(), 1);
42 /// ```
43 OneByte,
44 /// 2-byte big-endian length prefix (`/H` suffix)
45 ///
46 /// # Examples
47 ///
48 /// ```
49 /// use libmagic_rs::parser::ast::PStringLengthWidth;
50 /// let width = PStringLengthWidth::TwoByteBE;
51 /// assert_eq!(width.byte_count(), 2);
52 /// ```
53 TwoByteBE,
54 /// 2-byte little-endian length prefix (`/h` suffix)
55 ///
56 /// # Examples
57 ///
58 /// ```
59 /// use libmagic_rs::parser::ast::PStringLengthWidth;
60 /// let width = PStringLengthWidth::TwoByteLE;
61 /// assert_eq!(width.byte_count(), 2);
62 /// ```
63 TwoByteLE,
64 /// 4-byte big-endian length prefix (`/L` suffix)
65 ///
66 /// # Examples
67 ///
68 /// ```
69 /// use libmagic_rs::parser::ast::PStringLengthWidth;
70 /// let width = PStringLengthWidth::FourByteBE;
71 /// assert_eq!(width.byte_count(), 4);
72 /// ```
73 FourByteBE,
74 /// 4-byte little-endian length prefix (`/l` suffix)
75 ///
76 /// # Examples
77 ///
78 /// ```
79 /// use libmagic_rs::parser::ast::PStringLengthWidth;
80 /// let width = PStringLengthWidth::FourByteLE;
81 /// assert_eq!(width.byte_count(), 4);
82 /// ```
83 FourByteLE,
84}
85
86impl PStringLengthWidth {
87 /// Returns the number of bytes used for the length prefix.
88 #[must_use]
89 pub fn byte_count(&self) -> usize {
90 match self {
91 Self::OneByte => 1,
92 Self::TwoByteBE | Self::TwoByteLE => 2,
93 Self::FourByteBE | Self::FourByteLE => 4,
94 }
95 }
96}
97
98/// Arithmetic operation applied to the value read at an indirect offset's
99/// `base_offset` before the result is used as the final file offset.
100///
101/// magic(5) supports `+`, `-`, `*`, `/`, `%`, `&`, `|`, and `^` between the
102/// pointer-type specifier and the operand inside the parentheses. Addition
103/// and subtraction collapse to [`IndirectAdjustmentOp::Add`] with a signed
104/// `adjustment` (so `(N.X-1)` is `Add(-1)` rather than a separate `Sub`
105/// variant); the remaining operators each have a dedicated variant.
106///
107/// The default is [`IndirectAdjustmentOp::Add`]; an indirect offset with no
108/// arithmetic — just `(base.type)` — is encoded as `Add` with `adjustment:
109/// 0`, preserving backwards compatibility.
110///
111/// # Examples
112///
113/// ```
114/// use libmagic_rs::parser::ast::IndirectAdjustmentOp;
115///
116/// assert_eq!(IndirectAdjustmentOp::default(), IndirectAdjustmentOp::Add);
117/// ```
118#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize, Default)]
119#[non_exhaustive]
120pub enum IndirectAdjustmentOp {
121 /// Addition (also covers subtraction via negative `adjustment`).
122 ///
123 /// # Examples
124 ///
125 /// ```
126 /// use libmagic_rs::parser::ast::IndirectAdjustmentOp;
127 /// assert_eq!(IndirectAdjustmentOp::default(), IndirectAdjustmentOp::Add);
128 /// ```
129 #[default]
130 Add,
131 /// Multiplication: `pointer_value * adjustment`.
132 ///
133 /// # Examples
134 ///
135 /// ```
136 /// use libmagic_rs::parser::ast::IndirectAdjustmentOp;
137 /// let op = IndirectAdjustmentOp::Mul;
138 /// assert_eq!(op, IndirectAdjustmentOp::Mul);
139 /// ```
140 Mul,
141 /// Truncating integer division: `pointer_value / adjustment`. Division
142 /// by zero is rejected by the evaluator with an error.
143 ///
144 /// # Examples
145 ///
146 /// ```
147 /// use libmagic_rs::parser::ast::IndirectAdjustmentOp;
148 /// let op = IndirectAdjustmentOp::Div;
149 /// assert_eq!(op, IndirectAdjustmentOp::Div);
150 /// ```
151 Div,
152 /// Remainder: `pointer_value % adjustment`. Modulo by zero is rejected
153 /// by the evaluator with an error.
154 ///
155 /// # Examples
156 ///
157 /// ```
158 /// use libmagic_rs::parser::ast::IndirectAdjustmentOp;
159 /// let op = IndirectAdjustmentOp::Mod;
160 /// assert_eq!(op, IndirectAdjustmentOp::Mod);
161 /// ```
162 Mod,
163 /// Bitwise AND: `pointer_value & adjustment`.
164 ///
165 /// # Examples
166 ///
167 /// ```
168 /// use libmagic_rs::parser::ast::IndirectAdjustmentOp;
169 /// let op = IndirectAdjustmentOp::And;
170 /// assert_eq!(op, IndirectAdjustmentOp::And);
171 /// ```
172 And,
173 /// Bitwise OR: `pointer_value | adjustment`.
174 ///
175 /// # Examples
176 ///
177 /// ```
178 /// use libmagic_rs::parser::ast::IndirectAdjustmentOp;
179 /// let op = IndirectAdjustmentOp::Or;
180 /// assert_eq!(op, IndirectAdjustmentOp::Or);
181 /// ```
182 Or,
183 /// Bitwise XOR: `pointer_value ^ adjustment`.
184 ///
185 /// # Examples
186 ///
187 /// ```
188 /// use libmagic_rs::parser::ast::IndirectAdjustmentOp;
189 /// let op = IndirectAdjustmentOp::Xor;
190 /// assert_eq!(op, IndirectAdjustmentOp::Xor);
191 /// ```
192 Xor,
193}
194
195/// Offset specification for locating data in files
196#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
197#[non_exhaustive]
198pub enum OffsetSpec {
199 /// Absolute offset from file start (or from file end if negative)
200 ///
201 /// Positive values are offsets from the start of the file.
202 /// Negative values are offsets from the end of the file (same as `FromEnd`).
203 ///
204 /// # Examples
205 ///
206 /// ```
207 /// use libmagic_rs::parser::ast::OffsetSpec;
208 ///
209 /// let offset = OffsetSpec::Absolute(0x10); // Read at byte 16 from start
210 /// let from_end = OffsetSpec::Absolute(-4); // 4 bytes before end of file
211 /// ```
212 Absolute(i64),
213
214 /// Indirect offset through pointer dereferencing
215 ///
216 /// Reads a pointer value at `base_offset`, interprets it according to `pointer_type`
217 /// and `endian`, then combines `adjustment` with the pointer value using
218 /// `adjustment_op` to get the final offset. The default `adjustment_op`
219 /// is [`IndirectAdjustmentOp::Add`], so `(base.type)` and
220 /// `(base.type+N)` / `(base.type-N)` use addition (subtraction is
221 /// encoded as `Add` with a negative `adjustment`). magic(5) also
222 /// supports multiplicative and bitwise forms inside the parens, e.g.
223 /// `(0x200.s*2)` ([`IndirectAdjustmentOp::Mul`]).
224 ///
225 /// # Examples
226 ///
227 /// ```
228 /// use libmagic_rs::parser::ast::{OffsetSpec, TypeKind, Endianness, IndirectAdjustmentOp};
229 ///
230 /// let indirect = OffsetSpec::Indirect {
231 /// base_offset: 0x20,
232 /// base_relative: false,
233 /// pointer_type: TypeKind::Long { endian: Endianness::Little, signed: false },
234 /// adjustment: 4,
235 /// adjustment_op: IndirectAdjustmentOp::Add,
236 /// result_relative: false,
237 /// endian: Endianness::Little,
238 /// };
239 /// ```
240 Indirect {
241 /// Base offset to read pointer from. When `base_relative` is
242 /// `true`, this value is added to the current anchor (last-match
243 /// position) rather than being treated as an absolute file
244 /// position.
245 base_offset: i64,
246 /// If `true`, `base_offset` is relative to the current anchor
247 /// (i.e., `(&N.X)` syntax in magic files). Defaults to `false`
248 /// for backwards compatibility with existing AST snapshots; the
249 /// serde `default` attribute lets older serialized AST round-trip.
250 #[serde(default)]
251 base_relative: bool,
252 /// Type of pointer value
253 pointer_type: TypeKind,
254 /// Operand combined with the pointer value via `adjustment_op`.
255 ///
256 /// For `IndirectAdjustmentOp::Add`, the operand is signed (negative
257 /// values encode subtraction). For multiplicative and bitwise ops
258 /// the operand is interpreted as `i64` but typically magic files
259 /// supply non-negative literals.
260 adjustment: i64,
261 /// Arithmetic operation applied to the pointer value with
262 /// `adjustment` as the operand. Defaults to
263 /// [`IndirectAdjustmentOp::Add`] for legacy AST consumers via
264 /// serde's `default` attribute.
265 #[serde(default)]
266 adjustment_op: IndirectAdjustmentOp,
267 /// If `true`, the resolved offset is added to the current anchor
268 /// instead of being treated as an absolute file position. This
269 /// corresponds to magic-file `&(...)` syntax wrapping an indirect
270 /// spec, e.g., `&(0x10.l)`.
271 #[serde(default)]
272 result_relative: bool,
273 /// Endianness for pointer reading
274 endian: Endianness,
275 },
276
277 /// Relative offset from previous match position
278 ///
279 /// # Examples
280 ///
281 /// ```
282 /// use libmagic_rs::parser::ast::OffsetSpec;
283 ///
284 /// let relative = OffsetSpec::Relative(8); // 8 bytes after previous match
285 /// ```
286 Relative(i64),
287
288 /// Offset from end of file (negative values move towards start)
289 ///
290 /// # Examples
291 ///
292 /// ```
293 /// use libmagic_rs::parser::ast::OffsetSpec;
294 ///
295 /// let from_end = OffsetSpec::FromEnd(-16); // 16 bytes before end of file
296 /// ```
297 FromEnd(i64),
298}
299
300/// Control-flow directive carried by [`TypeKind::Meta`].
301///
302/// These are not value-reading types -- they correspond to magic(5)
303/// control-flow keywords (`default`, `clear`, `name`, `use`, `indirect`,
304/// `offset`) that modify how a rule set is traversed rather than reading
305/// bytes from the buffer. All six variants are fully evaluated by the
306/// engine: `default`/`clear` manage per-level sibling-matched state;
307/// `name`/`use` implement subroutine dispatch; `indirect` re-applies the
308/// root rule database at a resolved offset; and `offset` emits the
309/// current file position as `Value::Uint` for printf-style formatting.
310#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
311#[non_exhaustive]
312pub enum MetaType {
313 /// `default` directive: fires when no sibling at the same indentation
314 /// level has matched at the current offset. See magic(5) for the
315 /// "default" type semantics.
316 ///
317 /// # Examples
318 ///
319 /// ```
320 /// use libmagic_rs::parser::ast::MetaType;
321 /// let meta = MetaType::Default;
322 /// assert_eq!(meta, MetaType::Default);
323 /// ```
324 Default,
325 /// `clear` directive: resets the sibling-matched flag so a later
326 /// `default` sibling can fire even if an earlier sibling matched.
327 /// See magic(5) for the "clear" type semantics.
328 ///
329 /// # Examples
330 ///
331 /// ```
332 /// use libmagic_rs::parser::ast::MetaType;
333 /// let meta = MetaType::Clear;
334 /// assert_eq!(meta, MetaType::Clear);
335 /// ```
336 Clear,
337 /// `name <identifier>` directive: declares a named subroutine that
338 /// can be invoked later via [`MetaType::Use`]. See magic(5) for the
339 /// "name" type semantics.
340 ///
341 /// # Examples
342 ///
343 /// ```
344 /// use libmagic_rs::parser::ast::MetaType;
345 /// let meta = MetaType::Name("part2".to_string());
346 /// assert_eq!(meta, MetaType::Name("part2".to_string()));
347 /// ```
348 Name(String),
349 /// `use <identifier>` directive: invokes a named subroutine
350 /// previously declared via [`MetaType::Name`]. See magic(5) for the
351 /// "use" type semantics.
352 ///
353 /// # Examples
354 ///
355 /// ```
356 /// use libmagic_rs::parser::ast::MetaType;
357 /// let meta = MetaType::Use("part2".to_string());
358 /// assert_eq!(meta, MetaType::Use("part2".to_string()));
359 /// ```
360 Use(String),
361 /// `indirect` directive: re-applies the entire magic database at the
362 /// resolved offset. See magic(5) for the "indirect" type semantics.
363 ///
364 /// # Examples
365 ///
366 /// ```
367 /// use libmagic_rs::parser::ast::MetaType;
368 /// let meta = MetaType::Indirect;
369 /// assert_eq!(meta, MetaType::Indirect);
370 /// ```
371 Indirect,
372 /// `offset` type keyword: reports the current file offset rather than
373 /// reading a typed value from the buffer. See magic(5) for the
374 /// "offset" type semantics.
375 ///
376 /// Evaluation: the engine resolves the rule's offset specification
377 /// to an absolute position and emits a `RuleMatch` whose `value` is
378 /// `Value::Uint(position)`. Message templates can reference that
379 /// value through printf-style format specifiers (e.g. `%lld`),
380 /// which are substituted by
381 /// [`crate::output::format::format_magic_message`] at description-
382 /// assembly time. The only supported operator is `x` (`AnyValue`);
383 /// any other operator is `debug!`-logged and skipped.
384 ///
385 /// # Examples
386 ///
387 /// ```
388 /// use libmagic_rs::parser::ast::MetaType;
389 /// let meta = MetaType::Offset;
390 /// assert_eq!(meta, MetaType::Offset);
391 /// ```
392 Offset,
393}
394
395/// Data type specifications for interpreting bytes
396#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
397#[non_exhaustive]
398pub enum TypeKind {
399 /// Single byte
400 ///
401 /// # Examples
402 ///
403 /// ```
404 /// use libmagic_rs::parser::ast::TypeKind;
405 ///
406 /// let byte = TypeKind::Byte { signed: true };
407 /// assert_eq!(byte, TypeKind::Byte { signed: true });
408 /// ```
409 Byte {
410 /// Whether value is signed
411 signed: bool,
412 },
413 /// 16-bit integer
414 ///
415 /// # Examples
416 ///
417 /// ```
418 /// use libmagic_rs::parser::ast::{TypeKind, Endianness};
419 ///
420 /// let short = TypeKind::Short { endian: Endianness::Little, signed: true };
421 /// assert_eq!(short, TypeKind::Short { endian: Endianness::Little, signed: true });
422 /// ```
423 Short {
424 /// Byte order
425 endian: Endianness,
426 /// Whether value is signed
427 signed: bool,
428 },
429 /// 32-bit integer
430 ///
431 /// # Examples
432 ///
433 /// ```
434 /// use libmagic_rs::parser::ast::{TypeKind, Endianness};
435 ///
436 /// let long = TypeKind::Long { endian: Endianness::Big, signed: false };
437 /// assert_eq!(long, TypeKind::Long { endian: Endianness::Big, signed: false });
438 /// ```
439 Long {
440 /// Byte order
441 endian: Endianness,
442 /// Whether value is signed
443 signed: bool,
444 },
445 /// 64-bit integer
446 ///
447 /// # Examples
448 ///
449 /// ```
450 /// use libmagic_rs::parser::ast::{TypeKind, Endianness};
451 ///
452 /// let quad = TypeKind::Quad { endian: Endianness::Big, signed: true };
453 /// assert_eq!(quad, TypeKind::Quad { endian: Endianness::Big, signed: true });
454 /// ```
455 Quad {
456 /// Byte order
457 endian: Endianness,
458 /// Whether value is signed
459 signed: bool,
460 },
461 /// 32-bit IEEE 754 floating-point
462 ///
463 /// # Examples
464 ///
465 /// ```
466 /// use libmagic_rs::parser::ast::{TypeKind, Endianness};
467 ///
468 /// let float = TypeKind::Float { endian: Endianness::Big };
469 /// assert_eq!(float, TypeKind::Float { endian: Endianness::Big });
470 /// ```
471 Float {
472 /// Byte order
473 endian: Endianness,
474 },
475 /// 64-bit IEEE 754 double-precision floating-point
476 ///
477 /// # Examples
478 ///
479 /// ```
480 /// use libmagic_rs::parser::ast::{TypeKind, Endianness};
481 ///
482 /// let double = TypeKind::Double { endian: Endianness::Big };
483 /// assert_eq!(double, TypeKind::Double { endian: Endianness::Big });
484 /// ```
485 Double {
486 /// Byte order
487 endian: Endianness,
488 },
489 /// 32-bit Unix timestamp (seconds since epoch)
490 ///
491 /// # Examples
492 ///
493 /// ```
494 /// use libmagic_rs::parser::ast::{TypeKind, Endianness};
495 ///
496 /// let date = TypeKind::Date { endian: Endianness::Big, utc: true };
497 /// assert_eq!(date, TypeKind::Date { endian: Endianness::Big, utc: true });
498 /// ```
499 Date {
500 /// Byte order
501 endian: Endianness,
502 /// true = UTC, false = local time
503 utc: bool,
504 },
505 /// 64-bit Unix timestamp (seconds since epoch)
506 ///
507 /// # Examples
508 ///
509 /// ```
510 /// use libmagic_rs::parser::ast::{TypeKind, Endianness};
511 ///
512 /// let qdate = TypeKind::QDate { endian: Endianness::Little, utc: false };
513 /// assert_eq!(qdate, TypeKind::QDate { endian: Endianness::Little, utc: false });
514 /// ```
515 QDate {
516 /// Byte order
517 endian: Endianness,
518 /// true = UTC, false = local time
519 utc: bool,
520 },
521 /// String data
522 ///
523 /// # Examples
524 ///
525 /// ```
526 /// use libmagic_rs::parser::ast::TypeKind;
527 ///
528 /// let s = TypeKind::String { max_length: None };
529 /// assert_eq!(s, TypeKind::String { max_length: None });
530 ///
531 /// let capped = TypeKind::String { max_length: Some(32) };
532 /// assert_eq!(capped, TypeKind::String { max_length: Some(32) });
533 /// ```
534 String {
535 /// Maximum length to read
536 max_length: Option<usize>,
537 },
538 /// UCS-2 (16-bit Unicode) string with explicit byte order.
539 ///
540 /// Backs the magic(5) `lestring16` (little-endian) and `bestring16`
541 /// (big-endian) keywords. Each character occupies two bytes in the
542 /// file; the reader stops at a U+0000 terminator (encoded as the
543 /// 2-byte sequence `0x00 0x00`) or at the end of the buffer. The
544 /// decoded value is returned as a Rust `String` (so non-ASCII
545 /// characters are preserved when valid UCS-2).
546 ///
547 /// # Examples
548 ///
549 /// ```
550 /// use libmagic_rs::parser::ast::{TypeKind, Endianness};
551 ///
552 /// let le = TypeKind::String16 { endian: Endianness::Little };
553 /// assert_eq!(le, TypeKind::String16 { endian: Endianness::Little });
554 ///
555 /// let be = TypeKind::String16 { endian: Endianness::Big };
556 /// assert_eq!(be, TypeKind::String16 { endian: Endianness::Big });
557 /// ```
558 String16 {
559 /// Endianness for the 16-bit code units.
560 endian: Endianness,
561 },
562 /// Pascal string (length-prefixed, supports 1/2/4-byte prefix, with optional max length)
563 ///
564 /// Pascal strings store the length as a prefix (1, 2, or 4 bytes, with configurable endianness), followed by
565 /// that many bytes of string data. Unlike C strings, they are not null-terminated.
566 ///
567 /// # Examples
568 ///
569 /// ```
570 /// use libmagic_rs::parser::ast::{TypeKind, PStringLengthWidth};
571 ///
572 /// let pstring = TypeKind::PString { max_length: None, length_width: PStringLengthWidth::OneByte, length_includes_itself: false };
573 /// assert_eq!(pstring, TypeKind::PString { max_length: None, length_width: PStringLengthWidth::OneByte, length_includes_itself: false });
574 ///
575 /// let limited = TypeKind::PString { max_length: Some(64), length_width: PStringLengthWidth::TwoByteBE, length_includes_itself: false };
576 /// assert_eq!(limited, TypeKind::PString { max_length: Some(64), length_width: PStringLengthWidth::TwoByteBE, length_includes_itself: false });
577 ///
578 /// // /J flag: stored length includes the length field itself
579 /// let jpeg = TypeKind::PString { max_length: None, length_width: PStringLengthWidth::TwoByteBE, length_includes_itself: true };
580 /// assert_eq!(jpeg, TypeKind::PString { max_length: None, length_width: PStringLengthWidth::TwoByteBE, length_includes_itself: true });
581 /// ```
582 PString {
583 /// Maximum length to read (caps the length value)
584 max_length: Option<usize>,
585 /// Width of the length prefix
586 length_width: PStringLengthWidth,
587 /// Whether the stored length includes the length field itself (`/J` flag)
588 length_includes_itself: bool,
589 },
590 /// Regular expression matching against file contents
591 ///
592 /// Regex rules match a POSIX-extended regular expression pattern against the
593 /// file buffer. Patterns are compiled with multi-line mode always enabled
594 /// (matching libmagic's unconditional `REG_NEWLINE`), so `^` and `$` match
595 /// at line boundaries and `.` does not match `\n`. The `flags` control
596 /// case sensitivity and anchor advance semantics; the `count` field
597 /// controls the scan window (byte or line bounds). The scan window is
598 /// always capped at 8192 bytes (matching GNU `file`'s `FILE_REGEX_MAX`;
599 /// enforced in the evaluator).
600 ///
601 /// # Examples
602 ///
603 /// ```
604 /// use libmagic_rs::parser::ast::{RegexCount, RegexFlags, TypeKind};
605 /// use std::num::NonZeroU32;
606 ///
607 /// // Plain `regex` -- no flags, default 8192-byte scan window.
608 /// let plain = TypeKind::Regex {
609 /// flags: RegexFlags::default(),
610 /// count: RegexCount::Default,
611 /// };
612 ///
613 /// // `regex/1l` -- scan the first line only.
614 /// let first_line = TypeKind::Regex {
615 /// flags: RegexFlags::default(),
616 /// count: RegexCount::Lines(NonZeroU32::new(1)),
617 /// };
618 ///
619 /// // `regex/cs` -- case-insensitive, anchor advances to match-start.
620 /// let case_insensitive_start = TypeKind::Regex {
621 /// flags: RegexFlags {
622 /// case_insensitive: true,
623 /// start_offset: true,
624 /// },
625 /// count: RegexCount::Default,
626 /// };
627 /// ```
628 Regex {
629 /// Modifier flags from the `/[cs]` suffix (`/c` case-insensitive,
630 /// `/s` start-offset anchor). Line-mode is encoded by the
631 /// [`RegexCount::Lines`] variant of `count`, not a flag.
632 flags: RegexFlags,
633 /// Scan window specifier: default 8192 bytes, explicit byte
634 /// count, or explicit line count. See [`RegexCount`] for the
635 /// three cases.
636 count: RegexCount,
637 },
638 /// Multi-byte pattern search within a bounded range
639 ///
640 /// Search rules look for a literal byte pattern within `range` bytes of
641 /// the offset. Unlike [`TypeKind::String`], which only matches at the
642 /// exact offset, `search` scans forward up to `range` bytes for the
643 /// first occurrence. The range is **mandatory** per GNU `file`'s
644 /// magic(5) specification and is stored as a [`NonZeroUsize`] so a
645 /// zero-range search is unrepresentable.
646 ///
647 /// # Examples
648 ///
649 /// ```
650 /// use libmagic_rs::parser::ast::TypeKind;
651 /// use std::num::NonZeroUsize;
652 ///
653 /// // `search/256` -- scan up to 256 bytes for the literal pattern.
654 /// let bounded = TypeKind::Search {
655 /// range: NonZeroUsize::new(256).unwrap(),
656 /// };
657 /// ```
658 Search {
659 /// Scan window width in bytes, starting at the rule's offset.
660 range: NonZeroUsize,
661 },
662 /// Control-flow directive (`default`, `clear`, `name`, `use`,
663 /// `indirect`, `offset`).
664 ///
665 /// These magic(5) keywords do not read or compare bytes; they modify
666 /// how a rule set is traversed. All six variants are fully evaluated:
667 /// `default` fires as a fallback when no sibling at the same level
668 /// has matched; `clear` resets that flag; `name`/`use` support
669 /// subroutine definition and invocation; `indirect` re-enters the
670 /// rule set at a resolved offset; `offset` emits the resolved file
671 /// position as `Value::Uint` for printf-style message substitution.
672 /// See [`MetaType`] for the individual variants.
673 ///
674 /// # Examples
675 ///
676 /// ```
677 /// use libmagic_rs::parser::ast::{MetaType, TypeKind};
678 /// let default_rule = TypeKind::Meta(MetaType::Default);
679 /// assert_eq!(default_rule, TypeKind::Meta(MetaType::Default));
680 /// ```
681 Meta(MetaType),
682}
683
684/// Regex modifier flags parsed from the `/[cs]` suffix on a `regex` rule.
685///
686/// The `/l` "line-based window" modifier is **not** represented here; it
687/// lives on [`RegexCount::Lines`] so that the type-level encoding makes
688/// "line count" and "byte count" mutually exclusive. An earlier design
689/// used two separate fields (`line_based: bool` + `count: Option<u32>`)
690/// which admitted the cross-field state `line_based: true, count: None`;
691/// under the current encoding that case is expressed explicitly as
692/// [`RegexCount::Lines(None)`](RegexCount::Lines) -- the `regex/l`
693/// shorthand -- and is behaviorally equivalent to [`RegexCount::Default`]
694/// (both walk the full 8192-byte capped window).
695///
696/// All flags default to `false` via [`RegexFlags::default`], equivalent
697/// to a plain `regex` with no `/c` or `/s` suffix.
698///
699/// # Examples
700///
701/// ```
702/// use libmagic_rs::parser::ast::RegexFlags;
703///
704/// let plain = RegexFlags::default();
705/// assert!(!plain.case_insensitive);
706/// assert!(!plain.start_offset);
707///
708/// let case_and_start = RegexFlags::default()
709/// .with_case_insensitive(true)
710/// .with_start_offset(true);
711/// assert!(case_and_start.case_insensitive);
712/// assert!(case_and_start.start_offset);
713/// ```
714#[derive(Debug, Clone, Copy, Default, Serialize, Deserialize, PartialEq, Eq)]
715pub struct RegexFlags {
716 /// `/c` -- case-insensitive matching. When `true`, ASCII letter
717 /// casing is ignored during pattern matching.
718 pub case_insensitive: bool,
719 /// `/s` -- advance the GNU `file` previous-match anchor to the start
720 /// of the matched region instead of its end. Matches libmagic's
721 /// `REGEX_OFFSET_START` flag, which zeros the length contribution in
722 /// `moffset()` for `FILE_REGEX`. Useful for chaining child rules that
723 /// need to re-match from the position where the parent regex began.
724 pub start_offset: bool,
725}
726
727impl RegexFlags {
728 /// Builder-style setter for [`RegexFlags::case_insensitive`] (`/c`).
729 ///
730 /// Chain after [`RegexFlags::default()`] to construct `RegexFlags`
731 /// values without exhaustive struct literals. If a new flag is
732 /// added to `RegexFlags` in the future, callers using the builder
733 /// form keep compiling; callers using struct literals would need
734 /// an update.
735 #[must_use]
736 pub const fn with_case_insensitive(mut self, value: bool) -> Self {
737 self.case_insensitive = value;
738 self
739 }
740
741 /// Builder-style setter for [`RegexFlags::start_offset`] (`/s`).
742 ///
743 /// Chain after [`RegexFlags::default()`] to construct `RegexFlags`
744 /// values without exhaustive struct literals.
745 #[must_use]
746 pub const fn with_start_offset(mut self, value: bool) -> Self {
747 self.start_offset = value;
748 self
749 }
750}
751
752/// Scan window specifier for a [`TypeKind::Regex`] rule.
753///
754/// Encodes the three mutually-exclusive scan modes in a single enum so
755/// that the "byte count" and "line count" cases cannot be confused. The
756/// `regex/l` shorthand (line mode with no explicit count) is represented
757/// explicitly as [`RegexCount::Lines(None)`](RegexCount::Lines), which
758/// is behaviorally equivalent to [`RegexCount::Default`] -- both walk
759/// the full 8192-byte capped window -- but preserves the magic-file
760/// surface syntax of the original rule. The 8192-byte hard cap
761/// (matching GNU `file`'s `FILE_REGEX_MAX`) is applied by the evaluator
762/// on every variant.
763///
764/// # Examples
765///
766/// ```
767/// use libmagic_rs::parser::ast::RegexCount;
768/// use std::num::NonZeroU32;
769///
770/// // Plain `regex` (no suffix): default 8192-byte window.
771/// assert_eq!(RegexCount::default(), RegexCount::Default);
772///
773/// // `regex/100`: scan at most 100 bytes.
774/// let hundred_bytes = RegexCount::Bytes(NonZeroU32::new(100).unwrap());
775///
776/// // `regex/1l`: scan the first line.
777/// let one_line = RegexCount::Lines(NonZeroU32::new(1));
778///
779/// // `regex/l`: line-mode with no explicit count (walks terminators
780/// // to the end of the 8192-byte capped window).
781/// let unbounded_lines = RegexCount::Lines(None);
782/// ```
783#[derive(Debug, Clone, Copy, Default, Serialize, Deserialize, PartialEq, Eq)]
784pub enum RegexCount {
785 /// No scan bound (plain `regex` with no suffix). Scans the default
786 /// 8192-byte window from the rule's offset.
787 #[default]
788 Default,
789 /// Byte-bounded scan (`regex/N` with no `/l` flag). The window is
790 /// `min(n, 8192, remaining_buffer)` bytes long. `NonZeroU32` makes
791 /// a zero-byte scan unrepresentable.
792 Bytes(NonZeroU32),
793 /// Line-bounded scan (`regex/Nl` or `regex/l`). The window walks
794 /// LF / CRLF / bare CR line terminators from the offset. With
795 /// `Some(n)`, the walk stops after the Nth terminator (inclusive).
796 /// With `None` (the `regex/l` shorthand), the walk continues to
797 /// the end of the 8192-byte capped window. Either way the
798 /// effective byte window is capped at 8192.
799 Lines(Option<NonZeroU32>),
800}
801
802impl TypeKind {
803 /// Returns the bit width of integer types, or `None` for non-integer types (e.g., String).
804 ///
805 /// # Examples
806 ///
807 /// ```
808 /// use libmagic_rs::parser::ast::{TypeKind, Endianness};
809 ///
810 /// assert_eq!(TypeKind::Byte { signed: false }.bit_width(), Some(8));
811 /// assert_eq!(TypeKind::Short { endian: Endianness::Native, signed: true }.bit_width(), Some(16));
812 /// assert_eq!(TypeKind::Long { endian: Endianness::Native, signed: true }.bit_width(), Some(32));
813 /// assert_eq!(TypeKind::Quad { endian: Endianness::Native, signed: true }.bit_width(), Some(64));
814 /// assert_eq!(TypeKind::Float { endian: Endianness::Native }.bit_width(), Some(32));
815 /// assert_eq!(TypeKind::Double { endian: Endianness::Native }.bit_width(), Some(64));
816 /// assert_eq!(TypeKind::String { max_length: None }.bit_width(), None);
817 /// ```
818 #[must_use]
819 pub const fn bit_width(&self) -> Option<u32> {
820 match self {
821 Self::Byte { .. } => Some(8),
822 Self::Short { .. } => Some(16),
823 Self::Long { .. } | Self::Float { .. } | Self::Date { .. } => Some(32),
824 Self::Quad { .. } | Self::Double { .. } | Self::QDate { .. } => Some(64),
825 Self::String { .. }
826 | Self::String16 { .. }
827 | Self::PString { .. }
828 | Self::Regex { .. }
829 | Self::Search { .. }
830 | Self::Meta(_) => None,
831 }
832 }
833}
834
835/// Comparison and bitwise operators
836#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
837#[non_exhaustive]
838pub enum Operator {
839 /// Equality comparison (`=` or `==`)
840 ///
841 /// # Examples
842 ///
843 /// ```
844 /// use libmagic_rs::parser::ast::Operator;
845 ///
846 /// let op = Operator::Equal;
847 /// assert_eq!(op, Operator::Equal);
848 /// ```
849 Equal,
850 /// Inequality comparison (`!=` or `<>`)
851 ///
852 /// # Examples
853 ///
854 /// ```
855 /// use libmagic_rs::parser::ast::Operator;
856 ///
857 /// let op = Operator::NotEqual;
858 /// assert_eq!(op, Operator::NotEqual);
859 /// ```
860 NotEqual,
861 /// Less-than comparison (`<`)
862 ///
863 /// # Examples
864 ///
865 /// ```
866 /// use libmagic_rs::parser::ast::Operator;
867 ///
868 /// let op = Operator::LessThan;
869 /// assert_eq!(op, Operator::LessThan);
870 /// ```
871 LessThan,
872 /// Greater-than comparison (`>`)
873 ///
874 /// # Examples
875 ///
876 /// ```
877 /// use libmagic_rs::parser::ast::Operator;
878 ///
879 /// let op = Operator::GreaterThan;
880 /// assert_eq!(op, Operator::GreaterThan);
881 /// ```
882 GreaterThan,
883 /// Less-than-or-equal comparison (`<=`)
884 ///
885 /// # Examples
886 ///
887 /// ```
888 /// use libmagic_rs::parser::ast::Operator;
889 ///
890 /// let op = Operator::LessEqual;
891 /// assert_eq!(op, Operator::LessEqual);
892 /// ```
893 LessEqual,
894 /// Greater-than-or-equal comparison (`>=`)
895 ///
896 /// # Examples
897 ///
898 /// ```
899 /// use libmagic_rs::parser::ast::Operator;
900 ///
901 /// let op = Operator::GreaterEqual;
902 /// assert_eq!(op, Operator::GreaterEqual);
903 /// ```
904 GreaterEqual,
905 /// Bitwise AND operation without mask (`&`)
906 ///
907 /// # Examples
908 ///
909 /// ```
910 /// use libmagic_rs::parser::ast::Operator;
911 ///
912 /// let op = Operator::BitwiseAnd;
913 /// assert_eq!(op, Operator::BitwiseAnd);
914 /// ```
915 BitwiseAnd,
916 /// Bitwise AND operation with mask value (`&` with a mask operand)
917 ///
918 /// # Examples
919 ///
920 /// ```
921 /// use libmagic_rs::parser::ast::Operator;
922 ///
923 /// let op = Operator::BitwiseAndMask(0xFF00);
924 /// assert_eq!(op, Operator::BitwiseAndMask(0xFF00));
925 /// ```
926 BitwiseAndMask(u64),
927 /// Bitwise XOR operation (`^`)
928 ///
929 /// # Examples
930 ///
931 /// ```
932 /// use libmagic_rs::parser::ast::Operator;
933 ///
934 /// let op = Operator::BitwiseXor;
935 /// assert_eq!(op, Operator::BitwiseXor);
936 /// ```
937 BitwiseXor,
938 /// Bitwise NOT/complement operation (`~`)
939 ///
940 /// # Examples
941 ///
942 /// ```
943 /// use libmagic_rs::parser::ast::Operator;
944 ///
945 /// let op = Operator::BitwiseNot;
946 /// assert_eq!(op, Operator::BitwiseNot);
947 /// ```
948 BitwiseNot,
949 /// Match any value; condition always succeeds (`x`)
950 ///
951 /// # Examples
952 ///
953 /// ```
954 /// use libmagic_rs::parser::ast::Operator;
955 ///
956 /// let op = Operator::AnyValue;
957 /// assert_eq!(op, Operator::AnyValue);
958 /// ```
959 AnyValue,
960}
961
962/// Value types for rule matching
963#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
964#[non_exhaustive]
965pub enum Value {
966 /// Unsigned integer value
967 ///
968 /// # Examples
969 ///
970 /// ```
971 /// use libmagic_rs::parser::ast::Value;
972 ///
973 /// let val = Value::Uint(0xDEAD_BEEF);
974 /// assert_eq!(val, Value::Uint(0xDEAD_BEEF));
975 /// ```
976 Uint(u64),
977 /// Signed integer value
978 ///
979 /// # Examples
980 ///
981 /// ```
982 /// use libmagic_rs::parser::ast::Value;
983 ///
984 /// let val = Value::Int(-42);
985 /// assert_eq!(val, Value::Int(-42));
986 /// ```
987 Int(i64),
988 /// Floating-point value (used for `float` and `double` types)
989 ///
990 /// # Examples
991 ///
992 /// ```
993 /// use libmagic_rs::parser::ast::Value;
994 ///
995 /// let val = Value::Float(3.14);
996 /// assert_eq!(val, Value::Float(3.14));
997 /// ```
998 Float(f64),
999 /// Byte sequence
1000 ///
1001 /// # Examples
1002 ///
1003 /// ```
1004 /// use libmagic_rs::parser::ast::Value;
1005 ///
1006 /// let val = Value::Bytes(vec![0x7f, 0x45, 0x4c, 0x46]);
1007 /// assert_eq!(val, Value::Bytes(vec![0x7f, 0x45, 0x4c, 0x46]));
1008 /// ```
1009 Bytes(Vec<u8>),
1010 /// String value
1011 ///
1012 /// # Examples
1013 ///
1014 /// ```
1015 /// use libmagic_rs::parser::ast::Value;
1016 ///
1017 /// let val = Value::String("MZ".to_string());
1018 /// assert_eq!(val, Value::String("MZ".to_string()));
1019 /// ```
1020 String(String),
1021}
1022
1023/// Endianness specification for multi-byte values
1024#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
1025pub enum Endianness {
1026 /// Little-endian byte order (least significant byte first)
1027 ///
1028 /// # Examples
1029 ///
1030 /// ```
1031 /// use libmagic_rs::parser::ast::Endianness;
1032 ///
1033 /// let e = Endianness::Little;
1034 /// assert_eq!(e, Endianness::Little);
1035 /// ```
1036 Little,
1037 /// Big-endian byte order (most significant byte first)
1038 ///
1039 /// # Examples
1040 ///
1041 /// ```
1042 /// use libmagic_rs::parser::ast::Endianness;
1043 ///
1044 /// let e = Endianness::Big;
1045 /// assert_eq!(e, Endianness::Big);
1046 /// ```
1047 Big,
1048 /// Native system byte order (matches target architecture)
1049 ///
1050 /// # Examples
1051 ///
1052 /// ```
1053 /// use libmagic_rs::parser::ast::Endianness;
1054 ///
1055 /// let e = Endianness::Native;
1056 /// assert_eq!(e, Endianness::Native);
1057 /// ```
1058 Native,
1059}
1060
1061/// Strength modifier for magic rules
1062///
1063/// Strength modifiers adjust the default strength calculation for a rule.
1064/// They are specified using the `!:strength` directive in magic files.
1065///
1066/// # Examples
1067///
1068/// ```
1069/// use libmagic_rs::parser::ast::StrengthModifier;
1070///
1071/// let add = StrengthModifier::Add(10); // !:strength +10
1072/// let sub = StrengthModifier::Subtract(5); // !:strength -5
1073/// let mul = StrengthModifier::Multiply(2); // !:strength *2
1074/// let div = StrengthModifier::Divide(2); // !:strength /2
1075/// let set = StrengthModifier::Set(50); // !:strength =50
1076/// ```
1077#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
1078pub enum StrengthModifier {
1079 /// Add to the default strength: `!:strength +N`
1080 Add(i32),
1081 /// Subtract from the default strength: `!:strength -N`
1082 Subtract(i32),
1083 /// Multiply the default strength: `!:strength *N`
1084 Multiply(i32),
1085 /// Divide the default strength: `!:strength /N`
1086 Divide(i32),
1087 /// Set strength to an absolute value: `!:strength =N` or `!:strength N`
1088 Set(i32),
1089}
1090
1091/// Arithmetic operation applied to a value read from the file *before* the
1092/// rule's comparison operator is evaluated.
1093///
1094/// magic(5) supports `+`, `-`, `*`, `/`, `%`, `|`, and `^` between the type
1095/// keyword and the comparison value (e.g., `lelong+1 x volume %d` reads a
1096/// long, adds 1, and formats the transformed value into the message).
1097/// Bitwise AND (`&MASK`) is *not* part of this enum because it is already
1098/// represented at the operator level via [`Operator::BitwiseAndMask`].
1099///
1100/// The operand is signed (`i64`) so that subtraction and negative multipliers
1101/// round-trip cleanly. Bitwise ops reinterpret the operand as a `u64` bit
1102/// pattern at evaluation time, matching libmagic's `apprentice.c::mconvert`.
1103#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
1104#[non_exhaustive]
1105pub enum ValueTransformOp {
1106 /// Addition (`type+N`).
1107 Add,
1108 /// Subtraction (`type-N`).
1109 Sub,
1110 /// Multiplication (`type*N`).
1111 Mul,
1112 /// Truncating integer division (`type/N`). Division by zero is rejected
1113 /// at evaluation time.
1114 Div,
1115 /// Remainder (`type%N`). Modulo by zero is rejected at evaluation time.
1116 Mod,
1117 /// Bitwise AND (`type&N`).
1118 ///
1119 /// magic(5) `&MASK` was historically encoded at the operator level
1120 /// via [`Operator::BitwiseAndMask`] (which combines mask+equal in
1121 /// one step). That encoding cannot represent rules like `lelong&0xff
1122 /// x %d` (mask + any-value, with the masked value used in format
1123 /// substitution). The parser promotes `&MASK` to this `BitAnd`
1124 /// transform when followed by another operator (`x`, `>`, `!=`, ...)
1125 /// so the read value is masked before comparison and before printf
1126 /// substitution. The legacy `&MASK VALUE` form (mask + implicit
1127 /// equal) keeps using `Operator::BitwiseAndMask` for backwards
1128 /// compatibility.
1129 BitAnd,
1130 /// Bitwise OR (`type|N`).
1131 Or,
1132 /// Bitwise XOR (`type^N`).
1133 Xor,
1134}
1135
1136/// A pre-comparison value transform: `(op, operand)`.
1137///
1138/// Applied to the value read from the file before the rule's comparison
1139/// operator runs. See [`ValueTransformOp`] for the supported operations.
1140///
1141/// # Examples
1142///
1143/// ```
1144/// use libmagic_rs::parser::ast::{ValueTransform, ValueTransformOp};
1145///
1146/// // `lelong+1` -> add 1 to the read value
1147/// let t = ValueTransform { op: ValueTransformOp::Add, operand: 1 };
1148/// assert_eq!(t.op, ValueTransformOp::Add);
1149/// assert_eq!(t.operand, 1);
1150/// ```
1151#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
1152pub struct ValueTransform {
1153 /// Operation to apply.
1154 pub op: ValueTransformOp,
1155 /// Operand to combine with the read value.
1156 pub operand: i64,
1157}
1158
1159/// Magic rule representation in the AST
1160#[derive(Debug, Clone, Serialize, Deserialize)]
1161pub struct MagicRule {
1162 /// Offset specification for where to read data
1163 pub offset: OffsetSpec,
1164 /// Type of data to read and interpret
1165 pub typ: TypeKind,
1166 /// Comparison operator to apply
1167 pub op: Operator,
1168 /// Expected value for comparison
1169 pub value: Value,
1170 /// Human-readable message for this rule
1171 pub message: String,
1172 /// Child rules that are evaluated if this rule matches
1173 pub children: Vec<MagicRule>,
1174 /// Indentation level for hierarchical rules
1175 pub level: u32,
1176 /// Optional strength modifier from `!:strength` directive
1177 pub strength_modifier: Option<StrengthModifier>,
1178 /// Optional pre-comparison value transform from a magic-file
1179 /// type-suffix like `lelong+1` or `ulequad/1073741824`. When set,
1180 /// the read value is transformed *before* `op` is evaluated and
1181 /// before the message's `%`-format substitution, so format
1182 /// specifiers see the post-transform number.
1183 ///
1184 /// `#[serde(default)]` keeps existing serialized AST snapshots
1185 /// (which never had this field) round-tripping correctly: missing
1186 /// fields deserialize to `None`, which means "no transform" --
1187 /// the historical behavior.
1188 #[serde(default)]
1189 pub value_transform: Option<ValueTransform>,
1190}
1191
1192/// Validation errors returned by [`MagicRule::validate`].
1193#[derive(Debug, thiserror::Error, PartialEq, Eq)]
1194#[non_exhaustive]
1195pub enum MagicRuleValidationError {
1196 /// Rule message is empty. Messages are user-facing and required
1197 /// for meaningful output.
1198 #[error("rule message must not be empty")]
1199 EmptyMessage,
1200
1201 /// The child rule at `child_index` has `level <= self.level`,
1202 /// violating the "children must nest deeper than the parent"
1203 /// invariant of the hierarchical indentation-based DSL.
1204 #[error(
1205 "child rule at index {child_index} has level {child_level}, \
1206 must be greater than parent level {parent_level}"
1207 )]
1208 InvalidChildLevel {
1209 /// Index of the offending child in `self.children`.
1210 child_index: usize,
1211 /// Level of the child rule.
1212 child_level: u32,
1213 /// Level of the parent rule.
1214 parent_level: u32,
1215 },
1216
1217 /// Rule `level` exceeds the maximum supported depth. The limit is a
1218 /// hardening mechanism against stack overflow during deep recursion;
1219 /// libmagic files in the wild rarely go beyond 10 levels.
1220 #[error("rule level {level} exceeds maximum supported depth {max}")]
1221 LevelTooDeep {
1222 /// The invalid level value.
1223 level: u32,
1224 /// The maximum allowed depth.
1225 max: u32,
1226 },
1227}
1228
1229impl MagicRule {
1230 /// Hard structural ceiling on rule `level`.
1231 ///
1232 /// This is a conservative upper bound enforced by
1233 /// [`MagicRule::validate`] to keep the AST shape sane: real
1234 /// magic files in the wild rarely exceed ~10 levels of nesting,
1235 /// so rejecting rules with `level > 1000` catches obviously
1236 /// pathological input at construction time without constraining
1237 /// any legitimate rule.
1238 ///
1239 /// This ceiling is **independent of** the evaluator's
1240 /// `EvaluationConfig::max_recursion_depth` (default 20), which
1241 /// is the *runtime* recursion guard applied during rule
1242 /// evaluation. The evaluator limit is the first one that fires
1243 /// in practice -- a rule tree with 50 levels passes this
1244 /// structural check but is aborted by the evaluator long before
1245 /// reaching `MAX_LEVEL`. The two limits serve different purposes:
1246 /// `MAX_LEVEL` is an AST-shape sanity check, and
1247 /// `max_recursion_depth` is a per-evaluation resource bound.
1248 pub const MAX_LEVEL: u32 = 1000;
1249
1250 /// Construct a top-level rule with no children and no strength
1251 /// modifier.
1252 ///
1253 /// This is the most common constructor for programmatically building
1254 /// rules outside the parser. To add children, mutate
1255 /// [`MagicRule::children`] directly, or use [`MagicRule::with_children`].
1256 /// To set a strength modifier, use
1257 /// [`MagicRule::with_strength_modifier`].
1258 ///
1259 /// # Examples
1260 ///
1261 /// ```rust
1262 /// use libmagic_rs::{MagicRule, OffsetSpec, Operator, TypeKind, Value};
1263 ///
1264 /// let rule = MagicRule::new(
1265 /// OffsetSpec::Absolute(0),
1266 /// TypeKind::Byte { signed: false },
1267 /// Operator::Equal,
1268 /// Value::Uint(0x7f),
1269 /// "ELF magic byte".to_string(),
1270 /// );
1271 /// assert_eq!(rule.level, 0);
1272 /// assert!(rule.children.is_empty());
1273 /// assert!(rule.validate().is_ok());
1274 /// ```
1275 #[must_use]
1276 pub fn new(
1277 offset: OffsetSpec,
1278 typ: TypeKind,
1279 op: Operator,
1280 value: Value,
1281 message: String,
1282 ) -> Self {
1283 Self {
1284 offset,
1285 typ,
1286 op,
1287 value,
1288 message,
1289 children: vec![],
1290 level: 0,
1291 strength_modifier: None,
1292 value_transform: None,
1293 }
1294 }
1295
1296 /// Replace `self.children` with the given children and return the
1297 /// modified rule. Builder-style for chaining.
1298 #[must_use]
1299 pub fn with_children(mut self, children: Vec<MagicRule>) -> Self {
1300 self.children = children;
1301 self
1302 }
1303
1304 /// Set `self.strength_modifier` to the given value and return the
1305 /// modified rule. Builder-style for chaining.
1306 #[must_use]
1307 pub const fn with_strength_modifier(mut self, modifier: StrengthModifier) -> Self {
1308 self.strength_modifier = Some(modifier);
1309 self
1310 }
1311
1312 /// Set `self.level` to the given value and return the modified rule.
1313 /// Builder-style for chaining; typically used only when constructing
1314 /// child rules programmatically.
1315 #[must_use]
1316 pub const fn with_level(mut self, level: u32) -> Self {
1317 self.level = level;
1318 self
1319 }
1320
1321 /// Validate structural invariants of the rule.
1322 ///
1323 /// This checks invariants that the parser enforces automatically but
1324 /// that programmatic constructors (especially via serde deserialize)
1325 /// can violate:
1326 ///
1327 /// * Message must not be empty.
1328 /// * `level` must not exceed [`Self::MAX_LEVEL`].
1329 /// * Every child's `level` must be strictly greater than
1330 /// `self.level`, and each child must recursively validate.
1331 ///
1332 /// This does *not* validate that `value` is shape-compatible with
1333 /// `typ` (e.g., a `Value::Uint` against a `TypeKind::String`); such
1334 /// mismatches are coerced or rejected by the evaluator at match time.
1335 ///
1336 /// # Errors
1337 ///
1338 /// Returns [`MagicRuleValidationError`] describing the first
1339 /// invariant violation encountered.
1340 ///
1341 /// # Examples
1342 ///
1343 /// ```rust
1344 /// use libmagic_rs::{MagicRule, OffsetSpec, Operator, TypeKind, Value};
1345 ///
1346 /// let rule = MagicRule::new(
1347 /// OffsetSpec::Absolute(0),
1348 /// TypeKind::Byte { signed: false },
1349 /// Operator::Equal,
1350 /// Value::Uint(0),
1351 /// "zero byte".to_string(),
1352 /// );
1353 /// assert!(rule.validate().is_ok());
1354 /// ```
1355 pub fn validate(&self) -> Result<(), MagicRuleValidationError> {
1356 if self.message.is_empty() {
1357 return Err(MagicRuleValidationError::EmptyMessage);
1358 }
1359 if self.level > Self::MAX_LEVEL {
1360 return Err(MagicRuleValidationError::LevelTooDeep {
1361 level: self.level,
1362 max: Self::MAX_LEVEL,
1363 });
1364 }
1365 for (child_index, child) in self.children.iter().enumerate() {
1366 if child.level <= self.level {
1367 return Err(MagicRuleValidationError::InvalidChildLevel {
1368 child_index,
1369 child_level: child.level,
1370 parent_level: self.level,
1371 });
1372 }
1373 child.validate()?;
1374 }
1375 Ok(())
1376 }
1377}
1378
1379#[cfg(test)]
1380mod tests {
1381 use super::*;
1382
1383 #[test]
1384 fn test_magic_rule_new_defaults() {
1385 let rule = MagicRule::new(
1386 OffsetSpec::Absolute(0),
1387 TypeKind::Byte { signed: false },
1388 Operator::Equal,
1389 Value::Uint(0x7f),
1390 "ELF".to_string(),
1391 );
1392 assert_eq!(rule.level, 0);
1393 assert!(rule.children.is_empty());
1394 assert!(rule.strength_modifier.is_none());
1395 assert!(rule.validate().is_ok());
1396 }
1397
1398 #[test]
1399 fn test_magic_rule_builder_chain() {
1400 let child = MagicRule::new(
1401 OffsetSpec::Absolute(4),
1402 TypeKind::Byte { signed: false },
1403 Operator::Equal,
1404 Value::Uint(2),
1405 "64-bit".to_string(),
1406 )
1407 .with_level(1);
1408 let parent = MagicRule::new(
1409 OffsetSpec::Absolute(0),
1410 TypeKind::Byte { signed: false },
1411 Operator::Equal,
1412 Value::Uint(0x7f),
1413 "ELF".to_string(),
1414 )
1415 .with_children(vec![child])
1416 .with_strength_modifier(StrengthModifier::Add(10));
1417 assert_eq!(parent.children.len(), 1);
1418 assert_eq!(parent.strength_modifier, Some(StrengthModifier::Add(10)));
1419 assert!(parent.validate().is_ok());
1420 }
1421
1422 #[test]
1423 fn test_magic_rule_validate_empty_message_rejected() {
1424 let rule = MagicRule::new(
1425 OffsetSpec::Absolute(0),
1426 TypeKind::Byte { signed: false },
1427 Operator::Equal,
1428 Value::Uint(0),
1429 String::new(),
1430 );
1431 assert_eq!(rule.validate(), Err(MagicRuleValidationError::EmptyMessage));
1432 }
1433
1434 #[test]
1435 fn test_magic_rule_validate_child_level_must_be_deeper() {
1436 let child_same_level = MagicRule::new(
1437 OffsetSpec::Absolute(4),
1438 TypeKind::Byte { signed: false },
1439 Operator::Equal,
1440 Value::Uint(2),
1441 "child".to_string(),
1442 ); // level = 0, same as parent
1443 let parent = MagicRule::new(
1444 OffsetSpec::Absolute(0),
1445 TypeKind::Byte { signed: false },
1446 Operator::Equal,
1447 Value::Uint(0x7f),
1448 "parent".to_string(),
1449 )
1450 .with_children(vec![child_same_level]);
1451 assert_eq!(
1452 parent.validate(),
1453 Err(MagicRuleValidationError::InvalidChildLevel {
1454 child_index: 0,
1455 child_level: 0,
1456 parent_level: 0,
1457 })
1458 );
1459 }
1460
1461 #[test]
1462 fn test_magic_rule_validate_level_too_deep() {
1463 let rule = MagicRule::new(
1464 OffsetSpec::Absolute(0),
1465 TypeKind::Byte { signed: false },
1466 Operator::Equal,
1467 Value::Uint(0),
1468 "deep".to_string(),
1469 )
1470 .with_level(MagicRule::MAX_LEVEL + 1);
1471 assert_eq!(
1472 rule.validate(),
1473 Err(MagicRuleValidationError::LevelTooDeep {
1474 level: MagicRule::MAX_LEVEL + 1,
1475 max: MagicRule::MAX_LEVEL,
1476 })
1477 );
1478 }
1479
1480 #[test]
1481 fn test_offset_spec_absolute() {
1482 let offset = OffsetSpec::Absolute(42);
1483 assert_eq!(offset, OffsetSpec::Absolute(42));
1484
1485 // Test negative offset
1486 let negative = OffsetSpec::Absolute(-10);
1487 assert_eq!(negative, OffsetSpec::Absolute(-10));
1488 }
1489
1490 #[test]
1491 fn test_offset_spec_indirect() {
1492 let indirect = OffsetSpec::Indirect {
1493 base_offset: 0x20,
1494 base_relative: false,
1495 pointer_type: TypeKind::Long {
1496 endian: Endianness::Little,
1497 signed: false,
1498 },
1499 adjustment: 4,
1500 adjustment_op: IndirectAdjustmentOp::Add,
1501 result_relative: false,
1502 endian: Endianness::Little,
1503 };
1504
1505 match indirect {
1506 OffsetSpec::Indirect {
1507 base_offset,
1508 adjustment,
1509 ..
1510 } => {
1511 assert_eq!(base_offset, 0x20);
1512 assert_eq!(adjustment, 4);
1513 }
1514 _ => panic!("Expected Indirect variant"),
1515 }
1516 }
1517
1518 #[test]
1519 fn test_offset_spec_relative() {
1520 let relative = OffsetSpec::Relative(8);
1521 assert_eq!(relative, OffsetSpec::Relative(8));
1522
1523 // Test negative relative offset
1524 let negative_relative = OffsetSpec::Relative(-4);
1525 assert_eq!(negative_relative, OffsetSpec::Relative(-4));
1526 }
1527
1528 #[test]
1529 fn test_offset_spec_from_end() {
1530 let from_end = OffsetSpec::FromEnd(-16);
1531 assert_eq!(from_end, OffsetSpec::FromEnd(-16));
1532
1533 // Test positive from_end (though unusual)
1534 let positive_from_end = OffsetSpec::FromEnd(8);
1535 assert_eq!(positive_from_end, OffsetSpec::FromEnd(8));
1536 }
1537
1538 #[test]
1539 fn test_offset_spec_debug() {
1540 let offset = OffsetSpec::Absolute(100);
1541 let debug_str = format!("{offset:?}");
1542 assert!(debug_str.contains("Absolute"));
1543 assert!(debug_str.contains("100"));
1544 }
1545
1546 #[test]
1547 fn test_offset_spec_clone() {
1548 let original = OffsetSpec::Indirect {
1549 base_offset: 0x10,
1550 base_relative: false,
1551 pointer_type: TypeKind::Short {
1552 endian: Endianness::Big,
1553 signed: true,
1554 },
1555 adjustment: -2,
1556 adjustment_op: IndirectAdjustmentOp::Add,
1557 result_relative: false,
1558 endian: Endianness::Big,
1559 };
1560
1561 let cloned = original.clone();
1562 assert_eq!(original, cloned);
1563 }
1564
1565 #[test]
1566 fn test_offset_spec_serialization() {
1567 let offset = OffsetSpec::Absolute(42);
1568
1569 // Test JSON serialization
1570 let json = serde_json::to_string(&offset).expect("Failed to serialize");
1571 let deserialized: OffsetSpec = serde_json::from_str(&json).expect("Failed to deserialize");
1572
1573 assert_eq!(offset, deserialized);
1574 }
1575
1576 #[test]
1577 fn test_offset_spec_indirect_serialization() {
1578 let indirect = OffsetSpec::Indirect {
1579 base_offset: 0x100,
1580 base_relative: false,
1581 pointer_type: TypeKind::Long {
1582 endian: Endianness::Native,
1583 signed: false,
1584 },
1585 adjustment: 12,
1586 adjustment_op: IndirectAdjustmentOp::Add,
1587 result_relative: false,
1588 endian: Endianness::Native,
1589 };
1590
1591 // Test JSON serialization for complex variant
1592 let json = serde_json::to_string(&indirect).expect("Failed to serialize");
1593 let deserialized: OffsetSpec = serde_json::from_str(&json).expect("Failed to deserialize");
1594
1595 assert_eq!(indirect, deserialized);
1596 }
1597
1598 #[test]
1599 fn test_all_offset_spec_variants() {
1600 let variants = [
1601 OffsetSpec::Absolute(0),
1602 OffsetSpec::Absolute(-100),
1603 OffsetSpec::Indirect {
1604 base_offset: 0x20,
1605 base_relative: false,
1606 pointer_type: TypeKind::Byte { signed: true },
1607 adjustment: 0,
1608 adjustment_op: IndirectAdjustmentOp::Add,
1609 result_relative: false,
1610 endian: Endianness::Little,
1611 },
1612 OffsetSpec::Relative(50),
1613 OffsetSpec::Relative(-25),
1614 OffsetSpec::FromEnd(-8),
1615 OffsetSpec::FromEnd(4),
1616 ];
1617
1618 // Test that all variants can be created and are distinct
1619 for (i, variant) in variants.iter().enumerate() {
1620 for (j, other) in variants.iter().enumerate() {
1621 if i != j {
1622 assert_ne!(
1623 variant, other,
1624 "Variants at indices {i} and {j} should be different"
1625 );
1626 }
1627 }
1628 }
1629 }
1630
1631 #[test]
1632 fn test_endianness_variants() {
1633 let endianness_values = vec![Endianness::Little, Endianness::Big, Endianness::Native];
1634
1635 for endian in endianness_values {
1636 let indirect = OffsetSpec::Indirect {
1637 base_offset: 0,
1638 base_relative: false,
1639 pointer_type: TypeKind::Long {
1640 endian,
1641 signed: false,
1642 },
1643 adjustment: 0,
1644 adjustment_op: IndirectAdjustmentOp::Add,
1645 result_relative: false,
1646 endian,
1647 };
1648
1649 // Verify the endianness is preserved
1650 match indirect {
1651 OffsetSpec::Indirect {
1652 endian: actual_endian,
1653 ..
1654 } => {
1655 assert_eq!(endian, actual_endian);
1656 }
1657 _ => panic!("Expected Indirect variant"),
1658 }
1659 }
1660 }
1661
1662 // Value enum tests
1663 #[test]
1664 fn test_value_uint() {
1665 let value = Value::Uint(42);
1666 assert_eq!(value, Value::Uint(42));
1667
1668 // Test large values
1669 let large_value = Value::Uint(u64::MAX);
1670 assert_eq!(large_value, Value::Uint(u64::MAX));
1671 }
1672
1673 #[test]
1674 fn test_value_int() {
1675 let positive = Value::Int(100);
1676 assert_eq!(positive, Value::Int(100));
1677
1678 let negative = Value::Int(-50);
1679 assert_eq!(negative, Value::Int(-50));
1680
1681 // Test extreme values
1682 let max_int = Value::Int(i64::MAX);
1683 let min_int = Value::Int(i64::MIN);
1684 assert_eq!(max_int, Value::Int(i64::MAX));
1685 assert_eq!(min_int, Value::Int(i64::MIN));
1686 }
1687
1688 #[test]
1689 fn test_value_bytes() {
1690 let empty_bytes = Value::Bytes(vec![]);
1691 assert_eq!(empty_bytes, Value::Bytes(vec![]));
1692
1693 let some_bytes = Value::Bytes(vec![0x7f, 0x45, 0x4c, 0x46]);
1694 assert_eq!(some_bytes, Value::Bytes(vec![0x7f, 0x45, 0x4c, 0x46]));
1695
1696 // Test that different byte sequences are not equal
1697 let other_bytes = Value::Bytes(vec![0x50, 0x4b, 0x03, 0x04]);
1698 assert_ne!(some_bytes, other_bytes);
1699 }
1700
1701 #[test]
1702 fn test_value_string() {
1703 let empty_string = Value::String(String::new());
1704 assert_eq!(empty_string, Value::String(String::new()));
1705
1706 let hello = Value::String("Hello, World!".to_string());
1707 assert_eq!(hello, Value::String("Hello, World!".to_string()));
1708
1709 // Test Unicode strings
1710 let unicode = Value::String("🦀 Rust".to_string());
1711 assert_eq!(unicode, Value::String("🦀 Rust".to_string()));
1712 }
1713
1714 #[test]
1715 fn test_value_comparison() {
1716 // Test that different value types are not equal
1717 let uint_val = Value::Uint(42);
1718 let int_val = Value::Int(42);
1719 let float_val = Value::Float(42.0);
1720 let bytes_val = Value::Bytes(vec![42]);
1721 let string_val = Value::String("42".to_string());
1722
1723 assert_ne!(uint_val, int_val);
1724 assert_ne!(uint_val, float_val);
1725 assert_ne!(uint_val, bytes_val);
1726 assert_ne!(uint_val, string_val);
1727 assert_ne!(int_val, float_val);
1728 assert_ne!(int_val, bytes_val);
1729 assert_ne!(int_val, string_val);
1730 assert_ne!(float_val, bytes_val);
1731 assert_ne!(float_val, string_val);
1732 assert_ne!(bytes_val, string_val);
1733 }
1734
1735 #[test]
1736 fn test_value_debug() {
1737 let uint_val = Value::Uint(123);
1738 let debug_str = format!("{uint_val:?}");
1739 assert!(debug_str.contains("Uint"));
1740 assert!(debug_str.contains("123"));
1741
1742 let string_val = Value::String("test".to_string());
1743 let debug_str = format!("{string_val:?}");
1744 assert!(debug_str.contains("String"));
1745 assert!(debug_str.contains("test"));
1746 }
1747
1748 #[test]
1749 fn test_value_clone() {
1750 let original = Value::Bytes(vec![1, 2, 3, 4]);
1751 let cloned = original.clone();
1752 assert_eq!(original, cloned);
1753
1754 // Verify they are independent copies
1755 match (original, cloned) {
1756 (Value::Bytes(orig_bytes), Value::Bytes(cloned_bytes)) => {
1757 assert_eq!(orig_bytes, cloned_bytes);
1758 // They should have the same content but be different Vec instances
1759 }
1760 _ => panic!("Expected Bytes variants"),
1761 }
1762 }
1763
1764 #[test]
1765 fn test_value_float() {
1766 let value = Value::Float(3.125);
1767 assert_eq!(value, Value::Float(3.125));
1768
1769 let negative = Value::Float(-1.5);
1770 assert_eq!(negative, Value::Float(-1.5));
1771
1772 let zero = Value::Float(0.0);
1773 assert_eq!(zero, Value::Float(0.0));
1774 }
1775
1776 #[test]
1777 fn test_value_serialization() {
1778 let values = vec![
1779 Value::Uint(42),
1780 Value::Int(-100),
1781 Value::Float(3.125),
1782 Value::Bytes(vec![0x7f, 0x45, 0x4c, 0x46]),
1783 Value::String("ELF executable".to_string()),
1784 ];
1785
1786 for value in values {
1787 // Test JSON serialization
1788 let json = serde_json::to_string(&value).expect("Failed to serialize Value");
1789 let deserialized: Value =
1790 serde_json::from_str(&json).expect("Failed to deserialize Value");
1791 assert_eq!(value, deserialized);
1792 }
1793 }
1794
1795 #[test]
1796 fn test_value_serialization_edge_cases() {
1797 // Test empty collections
1798 let empty_bytes = Value::Bytes(vec![]);
1799 let json = serde_json::to_string(&empty_bytes).expect("Failed to serialize empty bytes");
1800 let deserialized: Value =
1801 serde_json::from_str(&json).expect("Failed to deserialize empty bytes");
1802 assert_eq!(empty_bytes, deserialized);
1803
1804 let empty_string = Value::String(String::new());
1805 let json = serde_json::to_string(&empty_string).expect("Failed to serialize empty string");
1806 let deserialized: Value =
1807 serde_json::from_str(&json).expect("Failed to deserialize empty string");
1808 assert_eq!(empty_string, deserialized);
1809
1810 // Test extreme values
1811 let max_uint = Value::Uint(u64::MAX);
1812 let json = serde_json::to_string(&max_uint).expect("Failed to serialize max uint");
1813 let deserialized: Value =
1814 serde_json::from_str(&json).expect("Failed to deserialize max uint");
1815 assert_eq!(max_uint, deserialized);
1816
1817 let min_int = Value::Int(i64::MIN);
1818 let json = serde_json::to_string(&min_int).expect("Failed to serialize min int");
1819 let deserialized: Value =
1820 serde_json::from_str(&json).expect("Failed to deserialize min int");
1821 assert_eq!(min_int, deserialized);
1822 }
1823
1824 // TypeKind tests
1825 #[test]
1826 fn test_type_kind_byte() {
1827 let byte_type = TypeKind::Byte { signed: true };
1828 assert_eq!(byte_type, TypeKind::Byte { signed: true });
1829 }
1830
1831 #[test]
1832 fn test_type_kind_short() {
1833 let short_little_endian = TypeKind::Short {
1834 endian: Endianness::Little,
1835 signed: false,
1836 };
1837 let short_big_endian = TypeKind::Short {
1838 endian: Endianness::Big,
1839 signed: true,
1840 };
1841
1842 assert_ne!(short_little_endian, short_big_endian);
1843 assert_eq!(short_little_endian, short_little_endian.clone());
1844 }
1845
1846 #[test]
1847 fn test_type_kind_long() {
1848 let long_native = TypeKind::Long {
1849 endian: Endianness::Native,
1850 signed: true,
1851 };
1852
1853 match long_native {
1854 TypeKind::Long { endian, signed } => {
1855 assert_eq!(endian, Endianness::Native);
1856 assert!(signed);
1857 }
1858 _ => panic!("Expected Long variant"),
1859 }
1860 }
1861
1862 #[test]
1863 fn test_type_kind_string() {
1864 let unlimited_string = TypeKind::String { max_length: None };
1865 let limited_string = TypeKind::String {
1866 max_length: Some(256),
1867 };
1868
1869 assert_ne!(unlimited_string, limited_string);
1870 assert_eq!(unlimited_string, unlimited_string.clone());
1871 }
1872
1873 #[test]
1874 fn test_type_kind_serialization() {
1875 let types = vec![
1876 TypeKind::Byte { signed: true },
1877 TypeKind::Short {
1878 endian: Endianness::Little,
1879 signed: false,
1880 },
1881 TypeKind::Long {
1882 endian: Endianness::Big,
1883 signed: true,
1884 },
1885 TypeKind::Quad {
1886 endian: Endianness::Little,
1887 signed: false,
1888 },
1889 TypeKind::Quad {
1890 endian: Endianness::Big,
1891 signed: true,
1892 },
1893 TypeKind::Float {
1894 endian: Endianness::Native,
1895 },
1896 TypeKind::Float {
1897 endian: Endianness::Big,
1898 },
1899 TypeKind::Double {
1900 endian: Endianness::Little,
1901 },
1902 TypeKind::Double {
1903 endian: Endianness::Native,
1904 },
1905 TypeKind::Date {
1906 endian: Endianness::Big,
1907 utc: true,
1908 },
1909 TypeKind::Date {
1910 endian: Endianness::Little,
1911 utc: false,
1912 },
1913 TypeKind::QDate {
1914 endian: Endianness::Native,
1915 utc: true,
1916 },
1917 TypeKind::QDate {
1918 endian: Endianness::Big,
1919 utc: false,
1920 },
1921 TypeKind::String { max_length: None },
1922 TypeKind::String {
1923 max_length: Some(128),
1924 },
1925 TypeKind::PString {
1926 max_length: None,
1927 length_width: PStringLengthWidth::OneByte,
1928 length_includes_itself: false,
1929 },
1930 TypeKind::PString {
1931 max_length: Some(64),
1932 length_width: PStringLengthWidth::OneByte,
1933 length_includes_itself: false,
1934 },
1935 TypeKind::PString {
1936 max_length: None,
1937 length_width: PStringLengthWidth::TwoByteBE,
1938 length_includes_itself: true,
1939 },
1940 TypeKind::PString {
1941 max_length: Some(128),
1942 length_width: PStringLengthWidth::FourByteLE,
1943 length_includes_itself: false,
1944 },
1945 ];
1946
1947 for typ in types {
1948 let json = serde_json::to_string(&typ).expect("Failed to serialize TypeKind");
1949 let deserialized: TypeKind =
1950 serde_json::from_str(&json).expect("Failed to deserialize TypeKind");
1951 assert_eq!(typ, deserialized);
1952 }
1953 }
1954
1955 // Operator tests
1956 #[test]
1957 fn test_operator_variants() {
1958 let operators = [
1959 Operator::Equal,
1960 Operator::NotEqual,
1961 Operator::BitwiseAnd,
1962 Operator::BitwiseXor,
1963 Operator::BitwiseNot,
1964 Operator::AnyValue,
1965 ];
1966
1967 for (i, op) in operators.iter().enumerate() {
1968 for (j, other) in operators.iter().enumerate() {
1969 if i == j {
1970 assert_eq!(op, other);
1971 } else {
1972 assert_ne!(op, other);
1973 }
1974 }
1975 }
1976 }
1977
1978 #[test]
1979 fn test_operator_serialization() {
1980 let operators = vec![
1981 Operator::Equal,
1982 Operator::NotEqual,
1983 Operator::BitwiseAnd,
1984 Operator::BitwiseXor,
1985 Operator::BitwiseNot,
1986 Operator::AnyValue,
1987 ];
1988
1989 for op in operators {
1990 let json = serde_json::to_string(&op).expect("Failed to serialize Operator");
1991 let deserialized: Operator =
1992 serde_json::from_str(&json).expect("Failed to deserialize Operator");
1993 assert_eq!(op, deserialized);
1994 }
1995 }
1996
1997 // MagicRule tests
1998 #[test]
1999 fn test_magic_rule_creation() {
2000 let rule = MagicRule {
2001 offset: OffsetSpec::Absolute(0),
2002 typ: TypeKind::Byte { signed: true },
2003 op: Operator::Equal,
2004 value: Value::Uint(0x7f),
2005 message: "ELF magic".to_string(),
2006 children: vec![],
2007 level: 0,
2008 strength_modifier: None,
2009 value_transform: None,
2010 };
2011
2012 assert_eq!(rule.message, "ELF magic");
2013 assert_eq!(rule.level, 0);
2014 assert!(rule.children.is_empty());
2015 }
2016
2017 #[test]
2018 fn test_magic_rule_with_children() {
2019 let child_rule = MagicRule {
2020 offset: OffsetSpec::Absolute(4),
2021 typ: TypeKind::Byte { signed: true },
2022 op: Operator::Equal,
2023 value: Value::Uint(1),
2024 message: "32-bit".to_string(),
2025 children: vec![],
2026 level: 1,
2027 strength_modifier: None,
2028 value_transform: None,
2029 };
2030
2031 let parent_rule = MagicRule {
2032 offset: OffsetSpec::Absolute(0),
2033 typ: TypeKind::Long {
2034 endian: Endianness::Little,
2035 signed: false,
2036 },
2037 op: Operator::Equal,
2038 value: Value::Bytes(vec![0x7f, 0x45, 0x4c, 0x46]),
2039 message: "ELF executable".to_string(),
2040 children: vec![child_rule],
2041 level: 0,
2042 strength_modifier: None,
2043 value_transform: None,
2044 };
2045
2046 assert_eq!(parent_rule.children.len(), 1);
2047 assert_eq!(parent_rule.children[0].level, 1);
2048 assert_eq!(parent_rule.children[0].message, "32-bit");
2049 }
2050
2051 #[test]
2052 fn test_magic_rule_serialization() {
2053 let rule = MagicRule {
2054 offset: OffsetSpec::Absolute(16),
2055 typ: TypeKind::Short {
2056 endian: Endianness::Little,
2057 signed: false,
2058 },
2059 op: Operator::NotEqual,
2060 value: Value::Uint(0),
2061 message: "Non-zero short value".to_string(),
2062 children: vec![],
2063 level: 2,
2064 strength_modifier: None,
2065 value_transform: None,
2066 };
2067
2068 let json = serde_json::to_string(&rule).expect("Failed to serialize MagicRule");
2069 let deserialized: MagicRule =
2070 serde_json::from_str(&json).expect("Failed to deserialize MagicRule");
2071
2072 assert_eq!(rule.message, deserialized.message);
2073 assert_eq!(rule.level, deserialized.level);
2074 assert_eq!(rule.children.len(), deserialized.children.len());
2075 }
2076
2077 // StrengthModifier tests
2078 #[test]
2079 fn test_strength_modifier_variants() {
2080 let add = StrengthModifier::Add(10);
2081 let sub = StrengthModifier::Subtract(5);
2082 let mul = StrengthModifier::Multiply(2);
2083 let div = StrengthModifier::Divide(2);
2084 let set = StrengthModifier::Set(50);
2085
2086 // Test that each variant has the correct inner value
2087 assert_eq!(add, StrengthModifier::Add(10));
2088 assert_eq!(sub, StrengthModifier::Subtract(5));
2089 assert_eq!(mul, StrengthModifier::Multiply(2));
2090 assert_eq!(div, StrengthModifier::Divide(2));
2091 assert_eq!(set, StrengthModifier::Set(50));
2092
2093 // Test that different variants are not equal
2094 assert_ne!(add, sub);
2095 assert_ne!(mul, div);
2096 assert_ne!(set, add);
2097 }
2098
2099 #[test]
2100 fn test_strength_modifier_negative_values() {
2101 let add_negative = StrengthModifier::Add(-10);
2102 let sub_negative = StrengthModifier::Subtract(-5);
2103 let set_negative = StrengthModifier::Set(-50);
2104
2105 assert_eq!(add_negative, StrengthModifier::Add(-10));
2106 assert_eq!(sub_negative, StrengthModifier::Subtract(-5));
2107 assert_eq!(set_negative, StrengthModifier::Set(-50));
2108 }
2109
2110 #[test]
2111 fn test_strength_modifier_serialization() {
2112 let modifiers = vec![
2113 StrengthModifier::Add(10),
2114 StrengthModifier::Subtract(5),
2115 StrengthModifier::Multiply(2),
2116 StrengthModifier::Divide(3),
2117 StrengthModifier::Set(100),
2118 ];
2119
2120 for modifier in modifiers {
2121 let json =
2122 serde_json::to_string(&modifier).expect("Failed to serialize StrengthModifier");
2123 let deserialized: StrengthModifier =
2124 serde_json::from_str(&json).expect("Failed to deserialize StrengthModifier");
2125 assert_eq!(modifier, deserialized);
2126 }
2127 }
2128
2129 #[test]
2130 fn test_strength_modifier_debug() {
2131 let modifier = StrengthModifier::Add(25);
2132 let debug_str = format!("{modifier:?}");
2133 assert!(debug_str.contains("Add"));
2134 assert!(debug_str.contains("25"));
2135 }
2136
2137 #[test]
2138 fn test_strength_modifier_clone() {
2139 let original = StrengthModifier::Multiply(4);
2140 let cloned = original;
2141 assert_eq!(original, cloned);
2142 }
2143
2144 #[test]
2145 fn test_magic_rule_with_strength_modifier() {
2146 let rule = MagicRule {
2147 offset: OffsetSpec::Absolute(0),
2148 typ: TypeKind::Byte { signed: true },
2149 op: Operator::Equal,
2150 value: Value::Uint(0x7f),
2151 message: "ELF magic".to_string(),
2152 children: vec![],
2153 level: 0,
2154 strength_modifier: Some(StrengthModifier::Add(20)),
2155 value_transform: None,
2156 };
2157
2158 assert_eq!(rule.strength_modifier, Some(StrengthModifier::Add(20)));
2159
2160 // Test serialization with strength_modifier
2161 let json = serde_json::to_string(&rule).expect("Failed to serialize MagicRule");
2162 let deserialized: MagicRule =
2163 serde_json::from_str(&json).expect("Failed to deserialize MagicRule");
2164 assert_eq!(rule.strength_modifier, deserialized.strength_modifier);
2165 }
2166
2167 #[test]
2168 fn test_magic_rule_without_strength_modifier() {
2169 let rule = MagicRule {
2170 offset: OffsetSpec::Absolute(0),
2171 typ: TypeKind::Byte { signed: true },
2172 op: Operator::Equal,
2173 value: Value::Uint(0x7f),
2174 message: "ELF magic".to_string(),
2175 children: vec![],
2176 level: 0,
2177 strength_modifier: None,
2178 value_transform: None,
2179 };
2180
2181 assert_eq!(rule.strength_modifier, None);
2182 }
2183
2184 // MetaType tests
2185 #[test]
2186 fn test_meta_type_variants_debug_clone_eq() {
2187 let cases = [
2188 MetaType::Default,
2189 MetaType::Clear,
2190 MetaType::Indirect,
2191 MetaType::Offset,
2192 MetaType::Name("part2".to_string()),
2193 MetaType::Use("part2".to_string()),
2194 ];
2195
2196 for (i, variant) in cases.iter().enumerate() {
2197 // Debug formatting is non-empty
2198 let debug_str = format!("{variant:?}");
2199 assert!(
2200 !debug_str.is_empty(),
2201 "Debug format must be non-empty for variant at index {i}"
2202 );
2203
2204 // Clone round-trip preserves equality
2205 let cloned = variant.clone();
2206 assert_eq!(
2207 variant, &cloned,
2208 "Clone must preserve equality for variant at index {i}"
2209 );
2210
2211 // Distinct variants are not equal
2212 for (j, other) in cases.iter().enumerate() {
2213 if i == j {
2214 assert_eq!(variant, other);
2215 } else {
2216 assert_ne!(
2217 variant, other,
2218 "Variants at indices {i} and {j} must differ"
2219 );
2220 }
2221 }
2222 }
2223 }
2224
2225 #[test]
2226 fn test_meta_type_serde_roundtrip() {
2227 let cases = [
2228 MetaType::Default,
2229 MetaType::Clear,
2230 MetaType::Indirect,
2231 MetaType::Offset,
2232 MetaType::Name("foo".to_string()),
2233 MetaType::Use("bar".to_string()),
2234 ];
2235
2236 for variant in cases {
2237 let json = serde_json::to_string(&variant).expect("serialize MetaType");
2238 let deserialized: MetaType = serde_json::from_str(&json).expect("deserialize MetaType");
2239 assert_eq!(variant, deserialized);
2240 }
2241 }
2242
2243 #[test]
2244 fn test_type_kind_meta_bit_width_is_none() {
2245 let cases = [
2246 MetaType::Default,
2247 MetaType::Clear,
2248 MetaType::Indirect,
2249 MetaType::Offset,
2250 MetaType::Name("x".to_string()),
2251 MetaType::Use("x".to_string()),
2252 ];
2253 for meta in cases {
2254 let kind = TypeKind::Meta(meta);
2255 assert_eq!(
2256 kind.bit_width(),
2257 None,
2258 "TypeKind::Meta must have no bit width: {kind:?}"
2259 );
2260 }
2261 }
2262}