libmagic_rs/parser/ast.rs
1// Copyright (c) 2025-2026 the libmagic-rs contributors
2// SPDX-License-Identifier: Apache-2.0
3
4//! Abstract Syntax Tree definitions for magic rules
5//!
6//! This module contains the core data structures that represent parsed magic rules
7//! and their components, including offset specifications, type kinds, operators, and values.
8
9use serde::{Deserialize, Serialize};
10use std::num::{NonZeroU32, NonZeroUsize};
11
12/// The width of the length prefix for Pascal strings.
13///
14/// Uppercase suffix letters (`/H`, `/L`) indicate big-endian byte order.
15/// Lowercase suffix letters (`/h`, `/l`) indicate little-endian byte order.
16///
17/// # Examples
18///
19/// ```
20/// use libmagic_rs::parser::ast::PStringLengthWidth;
21/// let width = PStringLengthWidth::OneByte;
22/// assert_eq!(width.byte_count(), 1);
23///
24/// let width = PStringLengthWidth::TwoByteBE;
25/// assert_eq!(width.byte_count(), 2);
26///
27/// let width = PStringLengthWidth::FourByteLE;
28/// assert_eq!(width.byte_count(), 4);
29/// ```
30#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
31#[allow(clippy::enum_variant_names)]
32#[non_exhaustive]
33pub enum PStringLengthWidth {
34 /// 1-byte length prefix (default, `/B` suffix)
35 ///
36 /// # Examples
37 ///
38 /// ```
39 /// use libmagic_rs::parser::ast::PStringLengthWidth;
40 /// let width = PStringLengthWidth::OneByte;
41 /// assert_eq!(width.byte_count(), 1);
42 /// ```
43 OneByte,
44 /// 2-byte big-endian length prefix (`/H` suffix)
45 ///
46 /// # Examples
47 ///
48 /// ```
49 /// use libmagic_rs::parser::ast::PStringLengthWidth;
50 /// let width = PStringLengthWidth::TwoByteBE;
51 /// assert_eq!(width.byte_count(), 2);
52 /// ```
53 TwoByteBE,
54 /// 2-byte little-endian length prefix (`/h` suffix)
55 ///
56 /// # Examples
57 ///
58 /// ```
59 /// use libmagic_rs::parser::ast::PStringLengthWidth;
60 /// let width = PStringLengthWidth::TwoByteLE;
61 /// assert_eq!(width.byte_count(), 2);
62 /// ```
63 TwoByteLE,
64 /// 4-byte big-endian length prefix (`/L` suffix)
65 ///
66 /// # Examples
67 ///
68 /// ```
69 /// use libmagic_rs::parser::ast::PStringLengthWidth;
70 /// let width = PStringLengthWidth::FourByteBE;
71 /// assert_eq!(width.byte_count(), 4);
72 /// ```
73 FourByteBE,
74 /// 4-byte little-endian length prefix (`/l` suffix)
75 ///
76 /// # Examples
77 ///
78 /// ```
79 /// use libmagic_rs::parser::ast::PStringLengthWidth;
80 /// let width = PStringLengthWidth::FourByteLE;
81 /// assert_eq!(width.byte_count(), 4);
82 /// ```
83 FourByteLE,
84}
85
86impl PStringLengthWidth {
87 /// Returns the number of bytes used for the length prefix.
88 #[must_use]
89 pub fn byte_count(&self) -> usize {
90 match self {
91 Self::OneByte => 1,
92 Self::TwoByteBE | Self::TwoByteLE => 2,
93 Self::FourByteBE | Self::FourByteLE => 4,
94 }
95 }
96}
97
98/// Arithmetic operation applied to the value read at an indirect offset's
99/// `base_offset` before the result is used as the final file offset.
100///
101/// magic(5) supports `+`, `-`, `*`, `/`, `%`, `&`, `|`, and `^` between the
102/// pointer-type specifier and the operand inside the parentheses. Addition
103/// and subtraction collapse to [`IndirectAdjustmentOp::Add`] with a signed
104/// `adjustment` (so `(N.X-1)` is `Add(-1)` rather than a separate `Sub`
105/// variant); the remaining operators each have a dedicated variant.
106///
107/// The default is [`IndirectAdjustmentOp::Add`]; an indirect offset with no
108/// arithmetic — just `(base.type)` — is encoded as `Add` with `adjustment:
109/// 0`, preserving backwards compatibility.
110///
111/// # Examples
112///
113/// ```
114/// use libmagic_rs::parser::ast::IndirectAdjustmentOp;
115///
116/// assert_eq!(IndirectAdjustmentOp::default(), IndirectAdjustmentOp::Add);
117/// ```
118#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize, Default)]
119#[non_exhaustive]
120pub enum IndirectAdjustmentOp {
121 /// Addition (also covers subtraction via negative `adjustment`).
122 ///
123 /// # Examples
124 ///
125 /// ```
126 /// use libmagic_rs::parser::ast::IndirectAdjustmentOp;
127 /// assert_eq!(IndirectAdjustmentOp::default(), IndirectAdjustmentOp::Add);
128 /// ```
129 #[default]
130 Add,
131 /// Multiplication: `pointer_value * adjustment`.
132 ///
133 /// # Examples
134 ///
135 /// ```
136 /// use libmagic_rs::parser::ast::IndirectAdjustmentOp;
137 /// let op = IndirectAdjustmentOp::Mul;
138 /// assert_eq!(op, IndirectAdjustmentOp::Mul);
139 /// ```
140 Mul,
141 /// Truncating integer division: `pointer_value / adjustment`. Division
142 /// by zero is rejected by the evaluator with an error.
143 ///
144 /// # Examples
145 ///
146 /// ```
147 /// use libmagic_rs::parser::ast::IndirectAdjustmentOp;
148 /// let op = IndirectAdjustmentOp::Div;
149 /// assert_eq!(op, IndirectAdjustmentOp::Div);
150 /// ```
151 Div,
152 /// Remainder: `pointer_value % adjustment`. Modulo by zero is rejected
153 /// by the evaluator with an error.
154 ///
155 /// # Examples
156 ///
157 /// ```
158 /// use libmagic_rs::parser::ast::IndirectAdjustmentOp;
159 /// let op = IndirectAdjustmentOp::Mod;
160 /// assert_eq!(op, IndirectAdjustmentOp::Mod);
161 /// ```
162 Mod,
163 /// Bitwise AND: `pointer_value & adjustment`.
164 ///
165 /// # Examples
166 ///
167 /// ```
168 /// use libmagic_rs::parser::ast::IndirectAdjustmentOp;
169 /// let op = IndirectAdjustmentOp::And;
170 /// assert_eq!(op, IndirectAdjustmentOp::And);
171 /// ```
172 And,
173 /// Bitwise OR: `pointer_value | adjustment`.
174 ///
175 /// # Examples
176 ///
177 /// ```
178 /// use libmagic_rs::parser::ast::IndirectAdjustmentOp;
179 /// let op = IndirectAdjustmentOp::Or;
180 /// assert_eq!(op, IndirectAdjustmentOp::Or);
181 /// ```
182 Or,
183 /// Bitwise XOR: `pointer_value ^ adjustment`.
184 ///
185 /// # Examples
186 ///
187 /// ```
188 /// use libmagic_rs::parser::ast::IndirectAdjustmentOp;
189 /// let op = IndirectAdjustmentOp::Xor;
190 /// assert_eq!(op, IndirectAdjustmentOp::Xor);
191 /// ```
192 Xor,
193}
194
195/// Offset specification for locating data in files
196#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
197#[non_exhaustive]
198pub enum OffsetSpec {
199 /// Absolute offset from file start (or from file end if negative)
200 ///
201 /// Positive values are offsets from the start of the file.
202 /// Negative values are offsets from the end of the file (same as `FromEnd`).
203 ///
204 /// # Examples
205 ///
206 /// ```
207 /// use libmagic_rs::parser::ast::OffsetSpec;
208 ///
209 /// let offset = OffsetSpec::Absolute(0x10); // Read at byte 16 from start
210 /// let from_end = OffsetSpec::Absolute(-4); // 4 bytes before end of file
211 /// ```
212 Absolute(i64),
213
214 /// Indirect offset through pointer dereferencing
215 ///
216 /// Reads a pointer value at `base_offset`, interprets it according to `pointer_type`
217 /// and `endian`, then combines `adjustment` with the pointer value using
218 /// `adjustment_op` to get the final offset. The default `adjustment_op`
219 /// is [`IndirectAdjustmentOp::Add`], so `(base.type)` and
220 /// `(base.type+N)` / `(base.type-N)` use addition (subtraction is
221 /// encoded as `Add` with a negative `adjustment`). magic(5) also
222 /// supports multiplicative and bitwise forms inside the parens, e.g.
223 /// `(0x200.s*2)` ([`IndirectAdjustmentOp::Mul`]).
224 ///
225 /// # Examples
226 ///
227 /// ```
228 /// use libmagic_rs::parser::ast::{OffsetSpec, TypeKind, Endianness, IndirectAdjustmentOp};
229 ///
230 /// let indirect = OffsetSpec::Indirect {
231 /// base_offset: 0x20,
232 /// base_relative: false,
233 /// pointer_type: TypeKind::Long { endian: Endianness::Little, signed: false },
234 /// adjustment: 4,
235 /// adjustment_op: IndirectAdjustmentOp::Add,
236 /// result_relative: false,
237 /// endian: Endianness::Little,
238 /// };
239 /// ```
240 Indirect {
241 /// Base offset to read pointer from. When `base_relative` is
242 /// `true`, this value is added to the current anchor (last-match
243 /// position) rather than being treated as an absolute file
244 /// position.
245 base_offset: i64,
246 /// If `true`, `base_offset` is relative to the current anchor
247 /// (i.e., `(&N.X)` syntax in magic files). Defaults to `false`
248 /// for backwards compatibility with existing AST snapshots; the
249 /// serde `default` attribute lets older serialized AST round-trip.
250 #[serde(default)]
251 base_relative: bool,
252 /// Type of pointer value
253 pointer_type: TypeKind,
254 /// Operand combined with the pointer value via `adjustment_op`.
255 ///
256 /// For `IndirectAdjustmentOp::Add`, the operand is signed (negative
257 /// values encode subtraction). For multiplicative and bitwise ops
258 /// the operand is interpreted as `i64` but typically magic files
259 /// supply non-negative literals.
260 adjustment: i64,
261 /// Arithmetic operation applied to the pointer value with
262 /// `adjustment` as the operand. Defaults to
263 /// [`IndirectAdjustmentOp::Add`] for legacy AST consumers via
264 /// serde's `default` attribute.
265 #[serde(default)]
266 adjustment_op: IndirectAdjustmentOp,
267 /// If `true`, the resolved offset is added to the current anchor
268 /// instead of being treated as an absolute file position. This
269 /// corresponds to magic-file `&(...)` syntax wrapping an indirect
270 /// spec, e.g., `&(0x10.l)`.
271 #[serde(default)]
272 result_relative: bool,
273 /// Endianness for pointer reading
274 endian: Endianness,
275 },
276
277 /// Relative offset from previous match position
278 ///
279 /// # Examples
280 ///
281 /// ```
282 /// use libmagic_rs::parser::ast::OffsetSpec;
283 ///
284 /// let relative = OffsetSpec::Relative(8); // 8 bytes after previous match
285 /// ```
286 Relative(i64),
287
288 /// Offset from end of file (negative values move towards start)
289 ///
290 /// # Examples
291 ///
292 /// ```
293 /// use libmagic_rs::parser::ast::OffsetSpec;
294 ///
295 /// let from_end = OffsetSpec::FromEnd(-16); // 16 bytes before end of file
296 /// ```
297 FromEnd(i64),
298}
299
300/// Control-flow directive carried by [`TypeKind::Meta`].
301///
302/// These are not value-reading types -- they correspond to magic(5)
303/// control-flow keywords (`default`, `clear`, `name`, `use`, `indirect`,
304/// `offset`) that modify how a rule set is traversed rather than reading
305/// bytes from the buffer. All six variants are fully evaluated by the
306/// engine: `default`/`clear` manage per-level sibling-matched state;
307/// `name`/`use` implement subroutine dispatch; `indirect` re-applies the
308/// root rule database at a resolved offset; and `offset` emits the
309/// current file position as `Value::Uint` for printf-style formatting.
310#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
311#[non_exhaustive]
312pub enum MetaType {
313 /// `default` directive: fires when no sibling at the same indentation
314 /// level has matched at the current offset. See magic(5) for the
315 /// "default" type semantics.
316 ///
317 /// # Examples
318 ///
319 /// ```
320 /// use libmagic_rs::parser::ast::MetaType;
321 /// let meta = MetaType::Default;
322 /// assert_eq!(meta, MetaType::Default);
323 /// ```
324 Default,
325 /// `clear` directive: resets the sibling-matched flag so a later
326 /// `default` sibling can fire even if an earlier sibling matched.
327 /// See magic(5) for the "clear" type semantics.
328 ///
329 /// # Examples
330 ///
331 /// ```
332 /// use libmagic_rs::parser::ast::MetaType;
333 /// let meta = MetaType::Clear;
334 /// assert_eq!(meta, MetaType::Clear);
335 /// ```
336 Clear,
337 /// `name <identifier>` directive: declares a named subroutine that
338 /// can be invoked later via [`MetaType::Use`]. See magic(5) for the
339 /// "name" type semantics.
340 ///
341 /// # Examples
342 ///
343 /// ```
344 /// use libmagic_rs::parser::ast::MetaType;
345 /// let meta = MetaType::Name("part2".to_string());
346 /// assert_eq!(meta, MetaType::Name("part2".to_string()));
347 /// ```
348 Name(String),
349 /// `use <identifier>` directive: invokes a named subroutine
350 /// previously declared via [`MetaType::Name`]. See magic(5) for the
351 /// "use" type semantics.
352 ///
353 /// # Examples
354 ///
355 /// ```
356 /// use libmagic_rs::parser::ast::MetaType;
357 /// let meta = MetaType::Use("part2".to_string());
358 /// assert_eq!(meta, MetaType::Use("part2".to_string()));
359 /// ```
360 Use(String),
361 /// `indirect` directive: re-applies the entire magic database at the
362 /// resolved offset. See magic(5) for the "indirect" type semantics.
363 ///
364 /// # Examples
365 ///
366 /// ```
367 /// use libmagic_rs::parser::ast::MetaType;
368 /// let meta = MetaType::Indirect;
369 /// assert_eq!(meta, MetaType::Indirect);
370 /// ```
371 Indirect,
372 /// `offset` type keyword: reports the current file offset rather than
373 /// reading a typed value from the buffer. See magic(5) for the
374 /// "offset" type semantics.
375 ///
376 /// Evaluation: the engine resolves the rule's offset specification
377 /// to an absolute position and emits a `RuleMatch` whose `value` is
378 /// `Value::Uint(position)`. Message templates can reference that
379 /// value through printf-style format specifiers (e.g. `%lld`),
380 /// which are substituted by
381 /// [`crate::output::format::format_magic_message`] at description-
382 /// assembly time. The only supported operator is `x` (`AnyValue`);
383 /// any other operator is `debug!`-logged and skipped.
384 ///
385 /// # Examples
386 ///
387 /// ```
388 /// use libmagic_rs::parser::ast::MetaType;
389 /// let meta = MetaType::Offset;
390 /// assert_eq!(meta, MetaType::Offset);
391 /// ```
392 Offset,
393}
394
395/// Data type specifications for interpreting bytes
396#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
397#[non_exhaustive]
398pub enum TypeKind {
399 /// Single byte
400 ///
401 /// # Examples
402 ///
403 /// ```
404 /// use libmagic_rs::parser::ast::TypeKind;
405 ///
406 /// let byte = TypeKind::Byte { signed: true };
407 /// assert_eq!(byte, TypeKind::Byte { signed: true });
408 /// ```
409 Byte {
410 /// Whether value is signed
411 signed: bool,
412 },
413 /// 16-bit integer
414 ///
415 /// # Examples
416 ///
417 /// ```
418 /// use libmagic_rs::parser::ast::{TypeKind, Endianness};
419 ///
420 /// let short = TypeKind::Short { endian: Endianness::Little, signed: true };
421 /// assert_eq!(short, TypeKind::Short { endian: Endianness::Little, signed: true });
422 /// ```
423 Short {
424 /// Byte order
425 endian: Endianness,
426 /// Whether value is signed
427 signed: bool,
428 },
429 /// 32-bit integer
430 ///
431 /// # Examples
432 ///
433 /// ```
434 /// use libmagic_rs::parser::ast::{TypeKind, Endianness};
435 ///
436 /// let long = TypeKind::Long { endian: Endianness::Big, signed: false };
437 /// assert_eq!(long, TypeKind::Long { endian: Endianness::Big, signed: false });
438 /// ```
439 Long {
440 /// Byte order
441 endian: Endianness,
442 /// Whether value is signed
443 signed: bool,
444 },
445 /// 64-bit integer
446 ///
447 /// # Examples
448 ///
449 /// ```
450 /// use libmagic_rs::parser::ast::{TypeKind, Endianness};
451 ///
452 /// let quad = TypeKind::Quad { endian: Endianness::Big, signed: true };
453 /// assert_eq!(quad, TypeKind::Quad { endian: Endianness::Big, signed: true });
454 /// ```
455 Quad {
456 /// Byte order
457 endian: Endianness,
458 /// Whether value is signed
459 signed: bool,
460 },
461 /// 32-bit IEEE 754 floating-point
462 ///
463 /// # Examples
464 ///
465 /// ```
466 /// use libmagic_rs::parser::ast::{TypeKind, Endianness};
467 ///
468 /// let float = TypeKind::Float { endian: Endianness::Big };
469 /// assert_eq!(float, TypeKind::Float { endian: Endianness::Big });
470 /// ```
471 Float {
472 /// Byte order
473 endian: Endianness,
474 },
475 /// 64-bit IEEE 754 double-precision floating-point
476 ///
477 /// # Examples
478 ///
479 /// ```
480 /// use libmagic_rs::parser::ast::{TypeKind, Endianness};
481 ///
482 /// let double = TypeKind::Double { endian: Endianness::Big };
483 /// assert_eq!(double, TypeKind::Double { endian: Endianness::Big });
484 /// ```
485 Double {
486 /// Byte order
487 endian: Endianness,
488 },
489 /// 32-bit Unix timestamp (seconds since epoch)
490 ///
491 /// # Examples
492 ///
493 /// ```
494 /// use libmagic_rs::parser::ast::{TypeKind, Endianness};
495 ///
496 /// let date = TypeKind::Date { endian: Endianness::Big, utc: true };
497 /// assert_eq!(date, TypeKind::Date { endian: Endianness::Big, utc: true });
498 /// ```
499 Date {
500 /// Byte order
501 endian: Endianness,
502 /// true = UTC, false = local time
503 utc: bool,
504 },
505 /// 64-bit Unix timestamp (seconds since epoch)
506 ///
507 /// # Examples
508 ///
509 /// ```
510 /// use libmagic_rs::parser::ast::{TypeKind, Endianness};
511 ///
512 /// let qdate = TypeKind::QDate { endian: Endianness::Little, utc: false };
513 /// assert_eq!(qdate, TypeKind::QDate { endian: Endianness::Little, utc: false });
514 /// ```
515 QDate {
516 /// Byte order
517 endian: Endianness,
518 /// true = UTC, false = local time
519 utc: bool,
520 },
521 /// String data
522 ///
523 /// The `flags` field carries the modifier flags parsed from the
524 /// `/[cCwWtTbf]` suffix on a `string` rule. Default flags (all
525 /// `false`) preserve the existing byte-exact comparison path; any
526 /// non-default flag routes the rule through
527 /// `compare_string_with_flags` in `src/evaluator/types/string.rs`.
528 /// See [`StringFlags`] for per-flag semantics.
529 ///
530 /// # Examples
531 ///
532 /// ```
533 /// use libmagic_rs::parser::ast::{StringFlags, TypeKind};
534 ///
535 /// let s = TypeKind::String { max_length: None, flags: StringFlags::default() };
536 /// assert_eq!(s, TypeKind::String { max_length: None, flags: StringFlags::default() });
537 ///
538 /// let case_insensitive = TypeKind::String {
539 /// max_length: None,
540 /// flags: StringFlags::default().with_ignore_lowercase(true),
541 /// };
542 /// assert!(matches!(case_insensitive, TypeKind::String { flags, .. } if flags.ignore_lowercase));
543 /// ```
544 String {
545 /// Maximum length to read
546 max_length: Option<usize>,
547 /// Modifier flags from the `/[cCwWtTbf]` suffix
548 flags: StringFlags,
549 },
550 /// UCS-2 (16-bit Unicode) string with explicit byte order.
551 ///
552 /// Backs the magic(5) `lestring16` (little-endian) and `bestring16`
553 /// (big-endian) keywords. Each character occupies two bytes in the
554 /// file; the reader stops at a U+0000 terminator (encoded as the
555 /// 2-byte sequence `0x00 0x00`) or at the end of the buffer. The
556 /// decoded value is returned as a Rust `String` (so non-ASCII
557 /// characters are preserved when valid UCS-2).
558 ///
559 /// # Examples
560 ///
561 /// ```
562 /// use libmagic_rs::parser::ast::{TypeKind, Endianness};
563 ///
564 /// let le = TypeKind::String16 { endian: Endianness::Little };
565 /// assert_eq!(le, TypeKind::String16 { endian: Endianness::Little });
566 ///
567 /// let be = TypeKind::String16 { endian: Endianness::Big };
568 /// assert_eq!(be, TypeKind::String16 { endian: Endianness::Big });
569 /// ```
570 String16 {
571 /// Endianness for the 16-bit code units.
572 endian: Endianness,
573 },
574 /// Pascal string (length-prefixed, supports 1/2/4-byte prefix, with optional max length)
575 ///
576 /// Pascal strings store the length as a prefix (1, 2, or 4 bytes, with configurable endianness), followed by
577 /// that many bytes of string data. Unlike C strings, they are not null-terminated.
578 ///
579 /// # Examples
580 ///
581 /// ```
582 /// use libmagic_rs::parser::ast::{TypeKind, PStringLengthWidth};
583 ///
584 /// let pstring = TypeKind::PString { max_length: None, length_width: PStringLengthWidth::OneByte, length_includes_itself: false };
585 /// assert_eq!(pstring, TypeKind::PString { max_length: None, length_width: PStringLengthWidth::OneByte, length_includes_itself: false });
586 ///
587 /// let limited = TypeKind::PString { max_length: Some(64), length_width: PStringLengthWidth::TwoByteBE, length_includes_itself: false };
588 /// assert_eq!(limited, TypeKind::PString { max_length: Some(64), length_width: PStringLengthWidth::TwoByteBE, length_includes_itself: false });
589 ///
590 /// // /J flag: stored length includes the length field itself
591 /// let jpeg = TypeKind::PString { max_length: None, length_width: PStringLengthWidth::TwoByteBE, length_includes_itself: true };
592 /// assert_eq!(jpeg, TypeKind::PString { max_length: None, length_width: PStringLengthWidth::TwoByteBE, length_includes_itself: true });
593 /// ```
594 PString {
595 /// Maximum length to read (caps the length value)
596 max_length: Option<usize>,
597 /// Width of the length prefix
598 length_width: PStringLengthWidth,
599 /// Whether the stored length includes the length field itself (`/J` flag)
600 length_includes_itself: bool,
601 },
602 /// Regular expression matching against file contents
603 ///
604 /// Regex rules match a POSIX-extended regular expression pattern against the
605 /// file buffer. Patterns are compiled with multi-line mode always enabled
606 /// (matching libmagic's unconditional `REG_NEWLINE`), so `^` and `$` match
607 /// at line boundaries and `.` does not match `\n`. The `flags` control
608 /// case sensitivity and anchor advance semantics; the `count` field
609 /// controls the scan window (byte or line bounds). The scan window is
610 /// always capped at 8192 bytes (matching GNU `file`'s `FILE_REGEX_MAX`;
611 /// enforced in the evaluator).
612 ///
613 /// # Examples
614 ///
615 /// ```
616 /// use libmagic_rs::parser::ast::{RegexCount, RegexFlags, TypeKind};
617 /// use std::num::NonZeroU32;
618 ///
619 /// // Plain `regex` -- no flags, default 8192-byte scan window.
620 /// let plain = TypeKind::Regex {
621 /// flags: RegexFlags::default(),
622 /// count: RegexCount::Default,
623 /// };
624 ///
625 /// // `regex/1l` -- scan the first line only.
626 /// let first_line = TypeKind::Regex {
627 /// flags: RegexFlags::default(),
628 /// count: RegexCount::Lines(NonZeroU32::new(1)),
629 /// };
630 ///
631 /// // `regex/cs` -- case-insensitive, anchor advances to match-start.
632 /// let case_insensitive_start = TypeKind::Regex {
633 /// flags: RegexFlags::default()
634 /// .with_case_insensitive(true)
635 /// .with_start_offset(true),
636 /// count: RegexCount::Default,
637 /// };
638 /// ```
639 Regex {
640 /// Modifier flags from the `/[cs]` suffix (`/c` case-insensitive,
641 /// `/s` start-offset anchor). Line-mode is encoded by the
642 /// [`RegexCount::Lines`] variant of `count`, not a flag.
643 flags: RegexFlags,
644 /// Scan window specifier: default 8192 bytes, explicit byte
645 /// count, or explicit line count. See [`RegexCount`] for the
646 /// three cases.
647 count: RegexCount,
648 },
649 /// Multi-byte pattern search within a bounded range
650 ///
651 /// Search rules look for a literal byte pattern within `range` bytes of
652 /// the offset. Unlike [`TypeKind::String`], which only matches at the
653 /// exact offset, `search` scans forward up to `range` bytes for the
654 /// first occurrence. The range is **mandatory** per GNU `file`'s
655 /// magic(5) specification and is stored as a [`NonZeroUsize`] so a
656 /// zero-range search is unrepresentable.
657 ///
658 /// # Examples
659 ///
660 /// ```
661 /// use libmagic_rs::parser::ast::TypeKind;
662 /// use std::num::NonZeroUsize;
663 ///
664 /// // `search/256` -- scan up to 256 bytes for the literal pattern.
665 /// let bounded = TypeKind::Search {
666 /// range: NonZeroUsize::new(256).unwrap(),
667 /// flags: libmagic_rs::parser::ast::SearchFlags::default(),
668 /// };
669 /// ```
670 Search {
671 /// Scan window width in bytes, starting at the rule's offset.
672 range: NonZeroUsize,
673 /// Modifier flags from the `/[sCcWwTtBbf]` suffix on a `search`
674 /// rule. The `/s` flag controls anchor advance (match-START vs
675 /// match-END); the eight `StringFlags`-shared letters alter how
676 /// the literal pattern is compared against the file bytes. See
677 /// [`SearchFlags`] for the per-flag semantics.
678 flags: SearchFlags,
679 },
680 /// Control-flow directive (`default`, `clear`, `name`, `use`,
681 /// `indirect`, `offset`).
682 ///
683 /// These magic(5) keywords do not read or compare bytes; they modify
684 /// how a rule set is traversed. All six variants are fully evaluated:
685 /// `default` fires as a fallback when no sibling at the same level
686 /// has matched; `clear` resets that flag; `name`/`use` support
687 /// subroutine definition and invocation; `indirect` re-enters the
688 /// rule set at a resolved offset; `offset` emits the resolved file
689 /// position as `Value::Uint` for printf-style message substitution.
690 /// See [`MetaType`] for the individual variants.
691 ///
692 /// # Examples
693 ///
694 /// ```
695 /// use libmagic_rs::parser::ast::{MetaType, TypeKind};
696 /// let default_rule = TypeKind::Meta(MetaType::Default);
697 /// assert_eq!(default_rule, TypeKind::Meta(MetaType::Default));
698 /// ```
699 Meta(MetaType),
700}
701
702/// Regex modifier flags parsed from the `/[cs]` suffix on a `regex` rule.
703///
704/// The `/l` "line-based window" modifier is **not** represented here; it
705/// lives on [`RegexCount::Lines`] so that the type-level encoding makes
706/// "line count" and "byte count" mutually exclusive. An earlier design
707/// used two separate fields (`line_based: bool` + `count: Option<u32>`)
708/// which admitted the cross-field state `line_based: true, count: None`;
709/// under the current encoding that case is expressed explicitly as
710/// [`RegexCount::Lines(None)`](RegexCount::Lines) -- the `regex/l`
711/// shorthand -- and is behaviorally equivalent to [`RegexCount::Default`]
712/// (both walk the full 8192-byte capped window).
713///
714/// All flags default to `false` via [`RegexFlags::default`], equivalent
715/// to a plain `regex` with no `/c` or `/s` suffix.
716///
717/// # Examples
718///
719/// ```
720/// use libmagic_rs::parser::ast::RegexFlags;
721///
722/// let plain = RegexFlags::default();
723/// assert!(!plain.case_insensitive);
724/// assert!(!plain.start_offset);
725///
726/// let case_and_start = RegexFlags::default()
727/// .with_case_insensitive(true)
728/// .with_start_offset(true);
729/// assert!(case_and_start.case_insensitive);
730/// assert!(case_and_start.start_offset);
731/// ```
732#[derive(Debug, Clone, Copy, Default, Serialize, Deserialize, PartialEq, Eq)]
733#[non_exhaustive]
734pub struct RegexFlags {
735 /// `/c` -- case-insensitive matching. When `true`, ASCII letter
736 /// casing is ignored during pattern matching.
737 pub case_insensitive: bool,
738 /// `/s` -- advance the GNU `file` previous-match anchor to the start
739 /// of the matched region instead of its end. Matches libmagic's
740 /// `REGEX_OFFSET_START` flag, which zeros the length contribution in
741 /// `moffset()` for `FILE_REGEX`. Useful for chaining child rules that
742 /// need to re-match from the position where the parent regex began.
743 pub start_offset: bool,
744}
745
746impl RegexFlags {
747 /// Builder-style setter for [`RegexFlags::case_insensitive`] (`/c`).
748 ///
749 /// Chain after [`RegexFlags::default()`] to construct `RegexFlags`
750 /// values without exhaustive struct literals. If a new flag is
751 /// added to `RegexFlags` in the future, callers using the builder
752 /// form keep compiling; callers using struct literals would need
753 /// an update.
754 #[must_use]
755 pub const fn with_case_insensitive(mut self, value: bool) -> Self {
756 self.case_insensitive = value;
757 self
758 }
759
760 /// Builder-style setter for [`RegexFlags::start_offset`] (`/s`).
761 ///
762 /// Chain after [`RegexFlags::default()`] to construct `RegexFlags`
763 /// values without exhaustive struct literals.
764 #[must_use]
765 pub const fn with_start_offset(mut self, value: bool) -> Self {
766 self.start_offset = value;
767 self
768 }
769}
770
771/// String modifier flags parsed from the `/[cCwWtTbf]` suffix on a `string`
772/// rule.
773///
774/// Mirrors libmagic's `STRING_*` flag bits from `src/file.h`. Each flag
775/// alters how `compare_string_with_flags` walks the pattern and buffer in
776/// parallel. The default (all `false`) preserves byte-exact comparison.
777///
778/// **`/c` vs `/C` are asymmetric**: the pattern character controls
779/// direction. With `/c`, only lowercase pattern chars trigger case-folding
780/// (the file byte is `tolower`'d). With `/C`, only uppercase pattern chars
781/// trigger folding (the file byte is `toupper`'d). Mixed-case patterns
782/// behave intuitively: `/c FoO` matches `FoO`, `Foo`, `FOO` but not
783/// `fOO` (the uppercase `F` is literal). See GOTCHAS S6.5 for the
784/// rationale and `src/softmagic.c` for the canonical libmagic contract.
785///
786/// **`/B` is NOT a string flag** -- it is the `pstring` 1-byte length-width
787/// letter (`PSTRING_1_BE`). `string/B` is rejected at parse time. See
788/// GOTCHAS S6.6.
789///
790/// # Examples
791///
792/// ```
793/// use libmagic_rs::parser::ast::StringFlags;
794///
795/// let plain = StringFlags::default();
796/// assert!(!plain.ignore_lowercase);
797///
798/// let case_insensitive = StringFlags::default().with_ignore_lowercase(true);
799/// assert!(case_insensitive.ignore_lowercase);
800///
801/// let compound = StringFlags::default()
802/// .with_ignore_lowercase(true)
803/// .with_compact_optional_whitespace(true);
804/// assert!(compound.ignore_lowercase);
805/// assert!(compound.compact_optional_whitespace);
806/// ```
807#[derive(Debug, Clone, Copy, Default, Serialize, Deserialize, PartialEq, Eq)]
808// libmagic's contract is naturally a bitfield: each flag is a distinct
809// magic(5) letter (/c, /C, /w, /W, /t, /T, /b, /f) with its own STRING_*
810// constant in libmagic src/file.h. Flags compose freely (string/cw is
811// /c plus /w; string/wcCtTbf sets all eight). Folding pairs into enums
812// is possible (whitespace: none|optional|required; case: none|lower|upper)
813// but would obscure the libmagic mapping and produce verbose match arms
814// in every consumer. The bool-per-flag layout mirrors `RegexFlags` and
815// the libmagic source -- the clippy lint is overruled by the design.
816#[allow(clippy::struct_excessive_bools)]
817#[non_exhaustive]
818pub struct StringFlags {
819 /// `/W` -- `STRING_COMPACT_WHITESPACE`. Pattern whitespace requires at
820 /// least one whitespace byte in the file, then any further whitespace
821 /// in the file is consumed greedily.
822 pub compact_whitespace: bool,
823 /// `/w` -- `STRING_COMPACT_OPTIONAL_WHITESPACE`. Pattern whitespace
824 /// matches zero or more whitespace bytes in the file.
825 pub compact_optional_whitespace: bool,
826 /// `/c` -- `STRING_IGNORE_LOWERCASE`. When the pattern char is
827 /// lowercase, the file byte is `to_ascii_lowercase`'d before
828 /// comparison. Uppercase pattern chars are compared literally.
829 pub ignore_lowercase: bool,
830 /// `/C` -- `STRING_IGNORE_UPPERCASE`. When the pattern char is
831 /// uppercase, the file byte is `to_ascii_uppercase`'d before
832 /// comparison. Lowercase pattern chars are compared literally.
833 pub ignore_uppercase: bool,
834 /// `/t` -- `STRING_TEXTTEST`. Hint that this rule applies to text
835 /// files. Captured for MIME-output integration; does not currently
836 /// alter comparison.
837 pub text_test: bool,
838 /// `/T` -- `STRING_TRIM`. Trim leading and trailing ASCII whitespace
839 /// from the pattern before comparison. The trim is applied at
840 /// evaluation time (in `read_pattern_match`) so the AST keeps the
841 /// original pattern bytes; the comparison function receives the
842 /// trimmed slice.
843 pub trim: bool,
844 /// `/b` -- `STRING_BINTEST`. Hint that this rule applies to binary
845 /// files. Captured for MIME-output integration; does not currently
846 /// alter comparison.
847 pub bin_test: bool,
848 /// `/f` -- `STRING_FULL_WORD`. Post-match check that the byte after
849 /// the matched region is either end-of-buffer or a non-word
850 /// character (ASCII alphanumeric or `_`).
851 pub full_word: bool,
852}
853
854impl StringFlags {
855 /// Returns `true` when every flag is `false` (the byte-exact fast
856 /// path). The evaluator dispatcher uses this to skip the
857 /// parallel-walk comparison when no flags are set.
858 #[must_use]
859 pub const fn is_empty(self) -> bool {
860 !self.compact_whitespace
861 && !self.compact_optional_whitespace
862 && !self.ignore_lowercase
863 && !self.ignore_uppercase
864 && !self.text_test
865 && !self.trim
866 && !self.bin_test
867 && !self.full_word
868 }
869
870 /// Builder-style setter for `compact_whitespace` (`/W`).
871 #[must_use]
872 pub const fn with_compact_whitespace(mut self, value: bool) -> Self {
873 self.compact_whitespace = value;
874 self
875 }
876
877 /// Builder-style setter for `compact_optional_whitespace` (`/w`).
878 #[must_use]
879 pub const fn with_compact_optional_whitespace(mut self, value: bool) -> Self {
880 self.compact_optional_whitespace = value;
881 self
882 }
883
884 /// Builder-style setter for `ignore_lowercase` (`/c`).
885 #[must_use]
886 pub const fn with_ignore_lowercase(mut self, value: bool) -> Self {
887 self.ignore_lowercase = value;
888 self
889 }
890
891 /// Builder-style setter for `ignore_uppercase` (`/C`).
892 #[must_use]
893 pub const fn with_ignore_uppercase(mut self, value: bool) -> Self {
894 self.ignore_uppercase = value;
895 self
896 }
897
898 /// Builder-style setter for `text_test` (`/t`).
899 #[must_use]
900 pub const fn with_text_test(mut self, value: bool) -> Self {
901 self.text_test = value;
902 self
903 }
904
905 /// Builder-style setter for `trim` (`/T`).
906 #[must_use]
907 pub const fn with_trim(mut self, value: bool) -> Self {
908 self.trim = value;
909 self
910 }
911
912 /// Builder-style setter for `bin_test` (`/b`).
913 #[must_use]
914 pub const fn with_bin_test(mut self, value: bool) -> Self {
915 self.bin_test = value;
916 self
917 }
918
919 /// Builder-style setter for `full_word` (`/f`).
920 #[must_use]
921 pub const fn with_full_word(mut self, value: bool) -> Self {
922 self.full_word = value;
923 self
924 }
925}
926
927/// Search modifier flags parsed from the `/[sCcWwTtBbf]` suffix on a
928/// `search` rule.
929///
930/// Mirrors [`StringFlags`] for the eight `STRING_*` letters that alter
931/// the literal-pattern comparison (`/c`, `/C`, `/w`, `/W`, `/t`, `/T`,
932/// `/b`, `/f`), plus a search-only `start_anchor` field for `/s` which
933/// shifts the GNU `file` previous-match anchor to the START of the
934/// matched region. The default (all `false`) preserves byte-exact
935/// comparison and match-END anchor advance.
936///
937/// `SearchFlags` is structurally parallel to `StringFlags`: when one
938/// struct grows a field, the other gains the same field in lockstep
939/// so that [`SearchFlags::to_string_flags`] can keep handing off to
940/// `compare_string_with_flags` without a generic refactor. The
941/// search-only `start_anchor` field has no analog in `string` rules.
942///
943/// **`/c` vs `/C` are asymmetric** in the same way as [`StringFlags`]:
944/// the pattern character controls fold direction. See [`StringFlags`]
945/// and GOTCHAS S6.5 for the rationale.
946///
947/// # Examples
948///
949/// ```
950/// use libmagic_rs::parser::ast::SearchFlags;
951///
952/// let plain = SearchFlags::default();
953/// assert!(!plain.start_anchor);
954/// assert!(plain.is_empty());
955/// assert!(!plain.needs_byte_compare());
956///
957/// let start = SearchFlags::default().with_start_anchor(true);
958/// assert!(start.start_anchor);
959/// assert!(!start.is_empty());
960/// // /s is anchor-only -- does not force the byte-compare slow path.
961/// assert!(!start.needs_byte_compare());
962///
963/// let case = SearchFlags::default().with_ignore_lowercase(true);
964/// assert!(case.needs_byte_compare());
965/// ```
966#[derive(Debug, Clone, Copy, Default, Serialize, Deserialize, PartialEq, Eq)]
967// libmagic's contract is naturally a bitfield: each flag is a distinct
968// magic(5) letter with its own STRING_*/SEARCH_* constant in libmagic
969// src/file.h. Flags compose freely (search/cs is /c plus /s; search/sWcT
970// sets four). Folding pairs into enums is possible but would obscure
971// the libmagic mapping and produce verbose match arms in every consumer.
972// The bool-per-flag layout mirrors `StringFlags` and `RegexFlags` and the
973// libmagic source -- the clippy lint is overruled by the design.
974#[allow(clippy::struct_excessive_bools)]
975#[non_exhaustive]
976pub struct SearchFlags {
977 /// `/W` -- `STRING_COMPACT_WHITESPACE`. Pattern whitespace requires at
978 /// least one whitespace byte in the file, then any further whitespace
979 /// in the file is consumed greedily.
980 pub compact_whitespace: bool,
981 /// `/w` -- `STRING_COMPACT_OPTIONAL_WHITESPACE`. Pattern whitespace
982 /// matches zero or more whitespace bytes in the file.
983 pub compact_optional_whitespace: bool,
984 /// `/c` -- `STRING_IGNORE_LOWERCASE`. When the pattern char is
985 /// lowercase, the file byte is `to_ascii_lowercase`'d before
986 /// comparison. Uppercase pattern chars are compared literally.
987 pub ignore_lowercase: bool,
988 /// `/C` -- `STRING_IGNORE_UPPERCASE`. When the pattern char is
989 /// uppercase, the file byte is `to_ascii_uppercase`'d before
990 /// comparison. Lowercase pattern chars are compared literally.
991 pub ignore_uppercase: bool,
992 /// `/t` -- `STRING_TEXTTEST`. Hint that this rule applies to text
993 /// files. Captured for MIME-output integration; does not currently
994 /// alter comparison.
995 pub text_test: bool,
996 /// `/T` -- `STRING_TRIM`. Trim leading and trailing ASCII whitespace
997 /// from the pattern before comparison.
998 pub trim: bool,
999 /// `/b` -- `STRING_BINTEST`. Hint that this rule applies to binary
1000 /// files. Captured for MIME-output integration; does not currently
1001 /// alter comparison.
1002 pub bin_test: bool,
1003 /// `/f` -- `STRING_FULL_WORD`. Post-match check that the byte after
1004 /// the matched region is either end-of-buffer or a non-word
1005 /// character (ASCII alphanumeric or `_`).
1006 pub full_word: bool,
1007 /// `/s` -- magic(5) "search-start" flag. When `true`, the GNU `file`
1008 /// previous-match anchor advance lands on the match-START index
1009 /// rather than match-END (the default). Mirrors libmagic's
1010 /// `FILE_SEARCH` anchor handling in `src/softmagic.c::moffset`. The
1011 /// dispatch happens in
1012 /// `src/evaluator/types/search.rs::search_bytes_consumed`.
1013 pub start_anchor: bool,
1014}
1015
1016impl SearchFlags {
1017 /// Returns `true` when every flag is `false` (default-constructed).
1018 #[must_use]
1019 pub const fn is_empty(self) -> bool {
1020 !self.compact_whitespace
1021 && !self.compact_optional_whitespace
1022 && !self.ignore_lowercase
1023 && !self.ignore_uppercase
1024 && !self.text_test
1025 && !self.trim
1026 && !self.bin_test
1027 && !self.full_word
1028 && !self.start_anchor
1029 }
1030
1031 /// Returns `true` when any flag alters the literal-pattern
1032 /// comparison, forcing the byte-walk slow path through
1033 /// `compare_string_with_flags`. The anchor-only / metadata-only
1034 /// flags (`/s`, `/t`, `/b`) do **not** trigger byte-compare;
1035 /// they preserve the `memchr::memmem::find` fast path.
1036 #[must_use]
1037 pub const fn needs_byte_compare(self) -> bool {
1038 self.compact_whitespace
1039 || self.compact_optional_whitespace
1040 || self.ignore_lowercase
1041 || self.ignore_uppercase
1042 || self.trim
1043 || self.full_word
1044 }
1045
1046 /// Project the eight shared flag fields onto a [`StringFlags`] for
1047 /// handoff to `compare_string_with_flags`. The search-only
1048 /// `start_anchor` field is dropped (it is anchor-advance policy,
1049 /// not comparison policy).
1050 ///
1051 /// # Examples
1052 ///
1053 /// ```
1054 /// use libmagic_rs::parser::ast::SearchFlags;
1055 ///
1056 /// let sf = SearchFlags::default()
1057 /// .with_ignore_lowercase(true)
1058 /// .with_trim(true)
1059 /// .with_start_anchor(true);
1060 /// let projected = sf.to_string_flags();
1061 /// assert!(projected.ignore_lowercase);
1062 /// assert!(projected.trim);
1063 /// // /s has no analog in StringFlags.
1064 /// ```
1065 #[must_use]
1066 pub const fn to_string_flags(self) -> StringFlags {
1067 StringFlags {
1068 compact_whitespace: self.compact_whitespace,
1069 compact_optional_whitespace: self.compact_optional_whitespace,
1070 ignore_lowercase: self.ignore_lowercase,
1071 ignore_uppercase: self.ignore_uppercase,
1072 text_test: self.text_test,
1073 trim: self.trim,
1074 bin_test: self.bin_test,
1075 full_word: self.full_word,
1076 }
1077 }
1078
1079 /// Builder-style setter for `compact_whitespace` (`/W`).
1080 #[must_use]
1081 pub const fn with_compact_whitespace(mut self, value: bool) -> Self {
1082 self.compact_whitespace = value;
1083 self
1084 }
1085
1086 /// Builder-style setter for `compact_optional_whitespace` (`/w`).
1087 #[must_use]
1088 pub const fn with_compact_optional_whitespace(mut self, value: bool) -> Self {
1089 self.compact_optional_whitespace = value;
1090 self
1091 }
1092
1093 /// Builder-style setter for `ignore_lowercase` (`/c`).
1094 #[must_use]
1095 pub const fn with_ignore_lowercase(mut self, value: bool) -> Self {
1096 self.ignore_lowercase = value;
1097 self
1098 }
1099
1100 /// Builder-style setter for `ignore_uppercase` (`/C`).
1101 #[must_use]
1102 pub const fn with_ignore_uppercase(mut self, value: bool) -> Self {
1103 self.ignore_uppercase = value;
1104 self
1105 }
1106
1107 /// Builder-style setter for `text_test` (`/t`).
1108 #[must_use]
1109 pub const fn with_text_test(mut self, value: bool) -> Self {
1110 self.text_test = value;
1111 self
1112 }
1113
1114 /// Builder-style setter for `trim` (`/T`).
1115 #[must_use]
1116 pub const fn with_trim(mut self, value: bool) -> Self {
1117 self.trim = value;
1118 self
1119 }
1120
1121 /// Builder-style setter for `bin_test` (`/b`).
1122 #[must_use]
1123 pub const fn with_bin_test(mut self, value: bool) -> Self {
1124 self.bin_test = value;
1125 self
1126 }
1127
1128 /// Builder-style setter for `full_word` (`/f`).
1129 #[must_use]
1130 pub const fn with_full_word(mut self, value: bool) -> Self {
1131 self.full_word = value;
1132 self
1133 }
1134
1135 /// Builder-style setter for `start_anchor` (`/s`).
1136 #[must_use]
1137 pub const fn with_start_anchor(mut self, value: bool) -> Self {
1138 self.start_anchor = value;
1139 self
1140 }
1141}
1142
1143/// Scan window specifier for a [`TypeKind::Regex`] rule.
1144///
1145/// Encodes the three mutually-exclusive scan modes in a single enum so
1146/// that the "byte count" and "line count" cases cannot be confused. The
1147/// `regex/l` shorthand (line mode with no explicit count) is represented
1148/// explicitly as [`RegexCount::Lines(None)`](RegexCount::Lines), which
1149/// is behaviorally equivalent to [`RegexCount::Default`] -- both walk
1150/// the full 8192-byte capped window -- but preserves the magic-file
1151/// surface syntax of the original rule. The 8192-byte hard cap
1152/// (matching GNU `file`'s `FILE_REGEX_MAX`) is applied by the evaluator
1153/// on every variant.
1154///
1155/// # Examples
1156///
1157/// ```
1158/// use libmagic_rs::parser::ast::RegexCount;
1159/// use std::num::NonZeroU32;
1160///
1161/// // Plain `regex` (no suffix): default 8192-byte window.
1162/// assert_eq!(RegexCount::default(), RegexCount::Default);
1163///
1164/// // `regex/100`: scan at most 100 bytes.
1165/// let hundred_bytes = RegexCount::Bytes(NonZeroU32::new(100).unwrap());
1166///
1167/// // `regex/1l`: scan the first line.
1168/// let one_line = RegexCount::Lines(NonZeroU32::new(1));
1169///
1170/// // `regex/l`: line-mode with no explicit count (walks terminators
1171/// // to the end of the 8192-byte capped window).
1172/// let unbounded_lines = RegexCount::Lines(None);
1173/// ```
1174#[derive(Debug, Clone, Copy, Default, Serialize, Deserialize, PartialEq, Eq)]
1175pub enum RegexCount {
1176 /// No scan bound (plain `regex` with no suffix). Scans the default
1177 /// 8192-byte window from the rule's offset.
1178 #[default]
1179 Default,
1180 /// Byte-bounded scan (`regex/N` with no `/l` flag). The window is
1181 /// `min(n, 8192, remaining_buffer)` bytes long. `NonZeroU32` makes
1182 /// a zero-byte scan unrepresentable.
1183 Bytes(NonZeroU32),
1184 /// Line-bounded scan (`regex/Nl` or `regex/l`). The window walks
1185 /// LF / CRLF / bare CR line terminators from the offset. With
1186 /// `Some(n)`, the walk stops after the Nth terminator (inclusive).
1187 /// With `None` (the `regex/l` shorthand), the walk continues to
1188 /// the end of the 8192-byte capped window. Either way the
1189 /// effective byte window is capped at 8192.
1190 Lines(Option<NonZeroU32>),
1191}
1192
1193impl TypeKind {
1194 /// Returns the bit width of integer types, or `None` for non-integer types (e.g., String).
1195 ///
1196 /// # Examples
1197 ///
1198 /// ```
1199 /// use libmagic_rs::parser::ast::{Endianness, StringFlags, TypeKind};
1200 ///
1201 /// assert_eq!(TypeKind::Byte { signed: false }.bit_width(), Some(8));
1202 /// assert_eq!(TypeKind::Short { endian: Endianness::Native, signed: true }.bit_width(), Some(16));
1203 /// assert_eq!(TypeKind::Long { endian: Endianness::Native, signed: true }.bit_width(), Some(32));
1204 /// assert_eq!(TypeKind::Quad { endian: Endianness::Native, signed: true }.bit_width(), Some(64));
1205 /// assert_eq!(TypeKind::Float { endian: Endianness::Native }.bit_width(), Some(32));
1206 /// assert_eq!(TypeKind::Double { endian: Endianness::Native }.bit_width(), Some(64));
1207 /// assert_eq!(TypeKind::String { max_length: None, flags: StringFlags::default() }.bit_width(), None);
1208 /// ```
1209 #[must_use]
1210 pub const fn bit_width(&self) -> Option<u32> {
1211 match self {
1212 Self::Byte { .. } => Some(8),
1213 Self::Short { .. } => Some(16),
1214 Self::Long { .. } | Self::Float { .. } | Self::Date { .. } => Some(32),
1215 Self::Quad { .. } | Self::Double { .. } | Self::QDate { .. } => Some(64),
1216 Self::String { .. }
1217 | Self::String16 { .. }
1218 | Self::PString { .. }
1219 | Self::Regex { .. }
1220 | Self::Search { .. }
1221 | Self::Meta(_) => None,
1222 }
1223 }
1224}
1225
1226/// Comparison and bitwise operators
1227#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
1228#[non_exhaustive]
1229pub enum Operator {
1230 /// Equality comparison (`=` or `==`)
1231 ///
1232 /// # Examples
1233 ///
1234 /// ```
1235 /// use libmagic_rs::parser::ast::Operator;
1236 ///
1237 /// let op = Operator::Equal;
1238 /// assert_eq!(op, Operator::Equal);
1239 /// ```
1240 Equal,
1241 /// Inequality comparison (`!=` or `<>`)
1242 ///
1243 /// # Examples
1244 ///
1245 /// ```
1246 /// use libmagic_rs::parser::ast::Operator;
1247 ///
1248 /// let op = Operator::NotEqual;
1249 /// assert_eq!(op, Operator::NotEqual);
1250 /// ```
1251 NotEqual,
1252 /// Less-than comparison (`<`)
1253 ///
1254 /// # Examples
1255 ///
1256 /// ```
1257 /// use libmagic_rs::parser::ast::Operator;
1258 ///
1259 /// let op = Operator::LessThan;
1260 /// assert_eq!(op, Operator::LessThan);
1261 /// ```
1262 LessThan,
1263 /// Greater-than comparison (`>`)
1264 ///
1265 /// # Examples
1266 ///
1267 /// ```
1268 /// use libmagic_rs::parser::ast::Operator;
1269 ///
1270 /// let op = Operator::GreaterThan;
1271 /// assert_eq!(op, Operator::GreaterThan);
1272 /// ```
1273 GreaterThan,
1274 /// Less-than-or-equal comparison (`<=`)
1275 ///
1276 /// # Examples
1277 ///
1278 /// ```
1279 /// use libmagic_rs::parser::ast::Operator;
1280 ///
1281 /// let op = Operator::LessEqual;
1282 /// assert_eq!(op, Operator::LessEqual);
1283 /// ```
1284 LessEqual,
1285 /// Greater-than-or-equal comparison (`>=`)
1286 ///
1287 /// # Examples
1288 ///
1289 /// ```
1290 /// use libmagic_rs::parser::ast::Operator;
1291 ///
1292 /// let op = Operator::GreaterEqual;
1293 /// assert_eq!(op, Operator::GreaterEqual);
1294 /// ```
1295 GreaterEqual,
1296 /// Bitwise AND operation without mask (`&`)
1297 ///
1298 /// # Examples
1299 ///
1300 /// ```
1301 /// use libmagic_rs::parser::ast::Operator;
1302 ///
1303 /// let op = Operator::BitwiseAnd;
1304 /// assert_eq!(op, Operator::BitwiseAnd);
1305 /// ```
1306 BitwiseAnd,
1307 /// Bitwise AND operation with mask value (`&` with a mask operand)
1308 ///
1309 /// # Examples
1310 ///
1311 /// ```
1312 /// use libmagic_rs::parser::ast::Operator;
1313 ///
1314 /// let op = Operator::BitwiseAndMask(0xFF00);
1315 /// assert_eq!(op, Operator::BitwiseAndMask(0xFF00));
1316 /// ```
1317 BitwiseAndMask(u64),
1318 /// Bitwise XOR operation (`^`)
1319 ///
1320 /// # Examples
1321 ///
1322 /// ```
1323 /// use libmagic_rs::parser::ast::Operator;
1324 ///
1325 /// let op = Operator::BitwiseXor;
1326 /// assert_eq!(op, Operator::BitwiseXor);
1327 /// ```
1328 BitwiseXor,
1329 /// Bitwise NOT/complement operation (`~`)
1330 ///
1331 /// # Examples
1332 ///
1333 /// ```
1334 /// use libmagic_rs::parser::ast::Operator;
1335 ///
1336 /// let op = Operator::BitwiseNot;
1337 /// assert_eq!(op, Operator::BitwiseNot);
1338 /// ```
1339 BitwiseNot,
1340 /// Match any value; condition always succeeds (`x`)
1341 ///
1342 /// # Examples
1343 ///
1344 /// ```
1345 /// use libmagic_rs::parser::ast::Operator;
1346 ///
1347 /// let op = Operator::AnyValue;
1348 /// assert_eq!(op, Operator::AnyValue);
1349 /// ```
1350 AnyValue,
1351}
1352
1353/// Value types for rule matching
1354#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
1355#[non_exhaustive]
1356pub enum Value {
1357 /// Unsigned integer value
1358 ///
1359 /// # Examples
1360 ///
1361 /// ```
1362 /// use libmagic_rs::parser::ast::Value;
1363 ///
1364 /// let val = Value::Uint(0xDEAD_BEEF);
1365 /// assert_eq!(val, Value::Uint(0xDEAD_BEEF));
1366 /// ```
1367 Uint(u64),
1368 /// Signed integer value
1369 ///
1370 /// # Examples
1371 ///
1372 /// ```
1373 /// use libmagic_rs::parser::ast::Value;
1374 ///
1375 /// let val = Value::Int(-42);
1376 /// assert_eq!(val, Value::Int(-42));
1377 /// ```
1378 Int(i64),
1379 /// Floating-point value (used for `float` and `double` types)
1380 ///
1381 /// # Examples
1382 ///
1383 /// ```
1384 /// use libmagic_rs::parser::ast::Value;
1385 ///
1386 /// let val = Value::Float(3.14);
1387 /// assert_eq!(val, Value::Float(3.14));
1388 /// ```
1389 Float(f64),
1390 /// Byte sequence
1391 ///
1392 /// # Examples
1393 ///
1394 /// ```
1395 /// use libmagic_rs::parser::ast::Value;
1396 ///
1397 /// let val = Value::Bytes(vec![0x7f, 0x45, 0x4c, 0x46]);
1398 /// assert_eq!(val, Value::Bytes(vec![0x7f, 0x45, 0x4c, 0x46]));
1399 /// ```
1400 Bytes(Vec<u8>),
1401 /// String value
1402 ///
1403 /// # Examples
1404 ///
1405 /// ```
1406 /// use libmagic_rs::parser::ast::Value;
1407 ///
1408 /// let val = Value::String("MZ".to_string());
1409 /// assert_eq!(val, Value::String("MZ".to_string()));
1410 /// ```
1411 String(String),
1412}
1413
1414/// Endianness specification for multi-byte values
1415#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
1416pub enum Endianness {
1417 /// Little-endian byte order (least significant byte first)
1418 ///
1419 /// # Examples
1420 ///
1421 /// ```
1422 /// use libmagic_rs::parser::ast::Endianness;
1423 ///
1424 /// let e = Endianness::Little;
1425 /// assert_eq!(e, Endianness::Little);
1426 /// ```
1427 Little,
1428 /// Big-endian byte order (most significant byte first)
1429 ///
1430 /// # Examples
1431 ///
1432 /// ```
1433 /// use libmagic_rs::parser::ast::Endianness;
1434 ///
1435 /// let e = Endianness::Big;
1436 /// assert_eq!(e, Endianness::Big);
1437 /// ```
1438 Big,
1439 /// Native system byte order (matches target architecture)
1440 ///
1441 /// # Examples
1442 ///
1443 /// ```
1444 /// use libmagic_rs::parser::ast::Endianness;
1445 ///
1446 /// let e = Endianness::Native;
1447 /// assert_eq!(e, Endianness::Native);
1448 /// ```
1449 Native,
1450}
1451
1452/// Strength modifier for magic rules
1453///
1454/// Strength modifiers adjust the default strength calculation for a rule.
1455/// They are specified using the `!:strength` directive in magic files.
1456///
1457/// # Examples
1458///
1459/// ```
1460/// use libmagic_rs::parser::ast::StrengthModifier;
1461///
1462/// let add = StrengthModifier::Add(10); // !:strength +10
1463/// let sub = StrengthModifier::Subtract(5); // !:strength -5
1464/// let mul = StrengthModifier::Multiply(2); // !:strength *2
1465/// let div = StrengthModifier::Divide(2); // !:strength /2
1466/// let set = StrengthModifier::Set(50); // !:strength =50
1467/// ```
1468#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
1469pub enum StrengthModifier {
1470 /// Add to the default strength: `!:strength +N`
1471 Add(i32),
1472 /// Subtract from the default strength: `!:strength -N`
1473 Subtract(i32),
1474 /// Multiply the default strength: `!:strength *N`
1475 Multiply(i32),
1476 /// Divide the default strength: `!:strength /N`
1477 Divide(i32),
1478 /// Set strength to an absolute value: `!:strength =N` or `!:strength N`
1479 Set(i32),
1480}
1481
1482/// Arithmetic operation applied to a value read from the file *before* the
1483/// rule's comparison operator is evaluated.
1484///
1485/// magic(5) supports `+`, `-`, `*`, `/`, `%`, `|`, and `^` between the type
1486/// keyword and the comparison value (e.g., `lelong+1 x volume %d` reads a
1487/// long, adds 1, and formats the transformed value into the message).
1488/// Bitwise AND (`&MASK`) is *not* part of this enum because it is already
1489/// represented at the operator level via [`Operator::BitwiseAndMask`].
1490///
1491/// The operand is signed (`i64`) so that subtraction and negative multipliers
1492/// round-trip cleanly. Bitwise ops reinterpret the operand as a `u64` bit
1493/// pattern at evaluation time, matching libmagic's `apprentice.c::mconvert`.
1494#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
1495#[non_exhaustive]
1496pub enum ValueTransformOp {
1497 /// Addition (`type+N`).
1498 Add,
1499 /// Subtraction (`type-N`).
1500 Sub,
1501 /// Multiplication (`type*N`).
1502 Mul,
1503 /// Truncating integer division (`type/N`). Division by zero is rejected
1504 /// at evaluation time.
1505 Div,
1506 /// Remainder (`type%N`). Modulo by zero is rejected at evaluation time.
1507 Mod,
1508 /// Bitwise AND (`type&N`).
1509 ///
1510 /// magic(5) `&MASK` was historically encoded at the operator level
1511 /// via [`Operator::BitwiseAndMask`] (which combines mask+equal in
1512 /// one step). That encoding cannot represent rules like `lelong&0xff
1513 /// x %d` (mask + any-value, with the masked value used in format
1514 /// substitution). The parser promotes `&MASK` to this `BitAnd`
1515 /// transform when followed by another operator (`x`, `>`, `!=`, ...)
1516 /// so the read value is masked before comparison and before printf
1517 /// substitution. The legacy `&MASK VALUE` form (mask + implicit
1518 /// equal) keeps using `Operator::BitwiseAndMask` for backwards
1519 /// compatibility.
1520 BitAnd,
1521 /// Bitwise OR (`type|N`).
1522 Or,
1523 /// Bitwise XOR (`type^N`).
1524 Xor,
1525}
1526
1527/// A pre-comparison value transform: `(op, operand)`.
1528///
1529/// Applied to the value read from the file before the rule's comparison
1530/// operator runs. See [`ValueTransformOp`] for the supported operations.
1531///
1532/// # Examples
1533///
1534/// ```
1535/// use libmagic_rs::parser::ast::{ValueTransform, ValueTransformOp};
1536///
1537/// // `lelong+1` -> add 1 to the read value
1538/// let t = ValueTransform::new(ValueTransformOp::Add, 1);
1539/// assert_eq!(t.op, ValueTransformOp::Add);
1540/// assert_eq!(t.operand, 1);
1541/// ```
1542#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
1543#[non_exhaustive]
1544pub struct ValueTransform {
1545 /// Operation to apply.
1546 pub op: ValueTransformOp,
1547 /// Operand to combine with the read value.
1548 pub operand: i64,
1549}
1550
1551impl ValueTransform {
1552 /// Construct a new `ValueTransform` from an op and an operand.
1553 #[must_use]
1554 pub const fn new(op: ValueTransformOp, operand: i64) -> Self {
1555 Self { op, operand }
1556 }
1557}
1558
1559/// Magic rule representation in the AST
1560#[derive(Debug, Clone, Serialize, Deserialize)]
1561#[non_exhaustive]
1562pub struct MagicRule {
1563 /// Offset specification for where to read data
1564 pub offset: OffsetSpec,
1565 /// Type of data to read and interpret
1566 pub typ: TypeKind,
1567 /// Comparison operator to apply
1568 pub op: Operator,
1569 /// Expected value for comparison
1570 pub value: Value,
1571 /// Human-readable message for this rule
1572 pub message: String,
1573 /// Child rules that are evaluated if this rule matches
1574 pub children: Vec<MagicRule>,
1575 /// Indentation level for hierarchical rules
1576 pub level: u32,
1577 /// Optional strength modifier from `!:strength` directive
1578 pub strength_modifier: Option<StrengthModifier>,
1579 /// Optional pre-comparison value transform from a magic-file
1580 /// type-suffix like `lelong+1` or `ulequad/1073741824`. When set,
1581 /// the read value is transformed *before* `op` is evaluated and
1582 /// before the message's `%`-format substitution, so format
1583 /// specifiers see the post-transform number.
1584 ///
1585 /// `#[serde(default)]` keeps existing serialized AST snapshots
1586 /// (which never had this field) round-tripping correctly: missing
1587 /// fields deserialize to `None`, which means "no transform" --
1588 /// the historical behavior.
1589 #[serde(default)]
1590 pub value_transform: Option<ValueTransform>,
1591}
1592
1593/// Validation errors returned by [`MagicRule::validate`].
1594#[derive(Debug, thiserror::Error, PartialEq, Eq)]
1595#[non_exhaustive]
1596pub enum MagicRuleValidationError {
1597 /// Rule message is empty. Messages are user-facing and required
1598 /// for meaningful output.
1599 #[error("rule message must not be empty")]
1600 EmptyMessage,
1601
1602 /// The child rule at `child_index` has `level <= self.level`,
1603 /// violating the "children must nest deeper than the parent"
1604 /// invariant of the hierarchical indentation-based DSL.
1605 #[error(
1606 "child rule at index {child_index} has level {child_level}, \
1607 must be greater than parent level {parent_level}"
1608 )]
1609 InvalidChildLevel {
1610 /// Index of the offending child in `self.children`.
1611 child_index: usize,
1612 /// Level of the child rule.
1613 child_level: u32,
1614 /// Level of the parent rule.
1615 parent_level: u32,
1616 },
1617
1618 /// Rule `level` exceeds the maximum supported depth. The limit is a
1619 /// hardening mechanism against stack overflow during deep recursion;
1620 /// libmagic files in the wild rarely go beyond 10 levels.
1621 #[error("rule level {level} exceeds maximum supported depth {max}")]
1622 LevelTooDeep {
1623 /// The invalid level value.
1624 level: u32,
1625 /// The maximum allowed depth.
1626 max: u32,
1627 },
1628}
1629
1630impl MagicRule {
1631 /// Hard structural ceiling on rule `level`.
1632 ///
1633 /// This is a conservative upper bound enforced by
1634 /// [`MagicRule::validate`] to keep the AST shape sane: real
1635 /// magic files in the wild rarely exceed ~10 levels of nesting,
1636 /// so rejecting rules with `level > 1000` catches obviously
1637 /// pathological input at construction time without constraining
1638 /// any legitimate rule.
1639 ///
1640 /// This ceiling is **independent of** the evaluator's
1641 /// `EvaluationConfig::max_recursion_depth` (default 20), which
1642 /// is the *runtime* recursion guard applied during rule
1643 /// evaluation. The evaluator limit is the first one that fires
1644 /// in practice -- a rule tree with 50 levels passes this
1645 /// structural check but is aborted by the evaluator long before
1646 /// reaching `MAX_LEVEL`. The two limits serve different purposes:
1647 /// `MAX_LEVEL` is an AST-shape sanity check, and
1648 /// `max_recursion_depth` is a per-evaluation resource bound.
1649 pub const MAX_LEVEL: u32 = 1000;
1650
1651 /// Construct a top-level rule with no children and no strength
1652 /// modifier.
1653 ///
1654 /// This is the most common constructor for programmatically building
1655 /// rules outside the parser. To add children, mutate
1656 /// [`MagicRule::children`] directly, or use [`MagicRule::with_children`].
1657 /// To set a strength modifier, use
1658 /// [`MagicRule::with_strength_modifier`].
1659 ///
1660 /// # Examples
1661 ///
1662 /// ```rust
1663 /// use libmagic_rs::{MagicRule, OffsetSpec, Operator, TypeKind, Value};
1664 ///
1665 /// let rule = MagicRule::new(
1666 /// OffsetSpec::Absolute(0),
1667 /// TypeKind::Byte { signed: false },
1668 /// Operator::Equal,
1669 /// Value::Uint(0x7f),
1670 /// "ELF magic byte".to_string(),
1671 /// );
1672 /// assert_eq!(rule.level, 0);
1673 /// assert!(rule.children.is_empty());
1674 /// assert!(rule.validate().is_ok());
1675 /// ```
1676 #[must_use]
1677 pub fn new(
1678 offset: OffsetSpec,
1679 typ: TypeKind,
1680 op: Operator,
1681 value: Value,
1682 message: String,
1683 ) -> Self {
1684 Self {
1685 offset,
1686 typ,
1687 op,
1688 value,
1689 message,
1690 children: vec![],
1691 level: 0,
1692 strength_modifier: None,
1693 value_transform: None,
1694 }
1695 }
1696
1697 /// Replace `self.children` with the given children and return the
1698 /// modified rule. Builder-style for chaining.
1699 #[must_use]
1700 pub fn with_children(mut self, children: Vec<MagicRule>) -> Self {
1701 self.children = children;
1702 self
1703 }
1704
1705 /// Set `self.strength_modifier` to the given value and return the
1706 /// modified rule. Builder-style for chaining.
1707 #[must_use]
1708 pub const fn with_strength_modifier(mut self, modifier: StrengthModifier) -> Self {
1709 self.strength_modifier = Some(modifier);
1710 self
1711 }
1712
1713 /// Set `self.level` to the given value and return the modified rule.
1714 /// Builder-style for chaining; typically used only when constructing
1715 /// child rules programmatically.
1716 #[must_use]
1717 pub const fn with_level(mut self, level: u32) -> Self {
1718 self.level = level;
1719 self
1720 }
1721
1722 /// Validate structural invariants of the rule.
1723 ///
1724 /// This checks invariants that the parser enforces automatically but
1725 /// that programmatic constructors (especially via serde deserialize)
1726 /// can violate:
1727 ///
1728 /// * Message must not be empty.
1729 /// * `level` must not exceed [`Self::MAX_LEVEL`].
1730 /// * Every child's `level` must be strictly greater than
1731 /// `self.level`, and each child must recursively validate.
1732 ///
1733 /// This does *not* validate that `value` is shape-compatible with
1734 /// `typ` (e.g., a `Value::Uint` against a `TypeKind::String`); such
1735 /// mismatches are coerced or rejected by the evaluator at match time.
1736 ///
1737 /// # Errors
1738 ///
1739 /// Returns [`MagicRuleValidationError`] describing the first
1740 /// invariant violation encountered.
1741 ///
1742 /// # Examples
1743 ///
1744 /// ```rust
1745 /// use libmagic_rs::{MagicRule, OffsetSpec, Operator, TypeKind, Value};
1746 ///
1747 /// let rule = MagicRule::new(
1748 /// OffsetSpec::Absolute(0),
1749 /// TypeKind::Byte { signed: false },
1750 /// Operator::Equal,
1751 /// Value::Uint(0),
1752 /// "zero byte".to_string(),
1753 /// );
1754 /// assert!(rule.validate().is_ok());
1755 /// ```
1756 pub fn validate(&self) -> Result<(), MagicRuleValidationError> {
1757 if self.message.is_empty() {
1758 return Err(MagicRuleValidationError::EmptyMessage);
1759 }
1760 if self.level > Self::MAX_LEVEL {
1761 return Err(MagicRuleValidationError::LevelTooDeep {
1762 level: self.level,
1763 max: Self::MAX_LEVEL,
1764 });
1765 }
1766 for (child_index, child) in self.children.iter().enumerate() {
1767 if child.level <= self.level {
1768 return Err(MagicRuleValidationError::InvalidChildLevel {
1769 child_index,
1770 child_level: child.level,
1771 parent_level: self.level,
1772 });
1773 }
1774 child.validate()?;
1775 }
1776 Ok(())
1777 }
1778}
1779
1780#[cfg(test)]
1781mod tests {
1782 use super::*;
1783
1784 #[test]
1785 fn test_magic_rule_new_defaults() {
1786 let rule = MagicRule::new(
1787 OffsetSpec::Absolute(0),
1788 TypeKind::Byte { signed: false },
1789 Operator::Equal,
1790 Value::Uint(0x7f),
1791 "ELF".to_string(),
1792 );
1793 assert_eq!(rule.level, 0);
1794 assert!(rule.children.is_empty());
1795 assert!(rule.strength_modifier.is_none());
1796 assert!(rule.validate().is_ok());
1797 }
1798
1799 #[test]
1800 fn test_magic_rule_builder_chain() {
1801 let child = MagicRule::new(
1802 OffsetSpec::Absolute(4),
1803 TypeKind::Byte { signed: false },
1804 Operator::Equal,
1805 Value::Uint(2),
1806 "64-bit".to_string(),
1807 )
1808 .with_level(1);
1809 let parent = MagicRule::new(
1810 OffsetSpec::Absolute(0),
1811 TypeKind::Byte { signed: false },
1812 Operator::Equal,
1813 Value::Uint(0x7f),
1814 "ELF".to_string(),
1815 )
1816 .with_children(vec![child])
1817 .with_strength_modifier(StrengthModifier::Add(10));
1818 assert_eq!(parent.children.len(), 1);
1819 assert_eq!(parent.strength_modifier, Some(StrengthModifier::Add(10)));
1820 assert!(parent.validate().is_ok());
1821 }
1822
1823 #[test]
1824 fn test_magic_rule_validate_empty_message_rejected() {
1825 let rule = MagicRule::new(
1826 OffsetSpec::Absolute(0),
1827 TypeKind::Byte { signed: false },
1828 Operator::Equal,
1829 Value::Uint(0),
1830 String::new(),
1831 );
1832 assert_eq!(rule.validate(), Err(MagicRuleValidationError::EmptyMessage));
1833 }
1834
1835 #[test]
1836 fn test_magic_rule_validate_child_level_must_be_deeper() {
1837 let child_same_level = MagicRule::new(
1838 OffsetSpec::Absolute(4),
1839 TypeKind::Byte { signed: false },
1840 Operator::Equal,
1841 Value::Uint(2),
1842 "child".to_string(),
1843 ); // level = 0, same as parent
1844 let parent = MagicRule::new(
1845 OffsetSpec::Absolute(0),
1846 TypeKind::Byte { signed: false },
1847 Operator::Equal,
1848 Value::Uint(0x7f),
1849 "parent".to_string(),
1850 )
1851 .with_children(vec![child_same_level]);
1852 assert_eq!(
1853 parent.validate(),
1854 Err(MagicRuleValidationError::InvalidChildLevel {
1855 child_index: 0,
1856 child_level: 0,
1857 parent_level: 0,
1858 })
1859 );
1860 }
1861
1862 #[test]
1863 fn test_magic_rule_validate_level_too_deep() {
1864 let rule = MagicRule::new(
1865 OffsetSpec::Absolute(0),
1866 TypeKind::Byte { signed: false },
1867 Operator::Equal,
1868 Value::Uint(0),
1869 "deep".to_string(),
1870 )
1871 .with_level(MagicRule::MAX_LEVEL + 1);
1872 assert_eq!(
1873 rule.validate(),
1874 Err(MagicRuleValidationError::LevelTooDeep {
1875 level: MagicRule::MAX_LEVEL + 1,
1876 max: MagicRule::MAX_LEVEL,
1877 })
1878 );
1879 }
1880
1881 #[test]
1882 fn test_offset_spec_absolute() {
1883 let offset = OffsetSpec::Absolute(42);
1884 assert_eq!(offset, OffsetSpec::Absolute(42));
1885
1886 // Test negative offset
1887 let negative = OffsetSpec::Absolute(-10);
1888 assert_eq!(negative, OffsetSpec::Absolute(-10));
1889 }
1890
1891 #[test]
1892 fn test_offset_spec_indirect() {
1893 let indirect = OffsetSpec::Indirect {
1894 base_offset: 0x20,
1895 base_relative: false,
1896 pointer_type: TypeKind::Long {
1897 endian: Endianness::Little,
1898 signed: false,
1899 },
1900 adjustment: 4,
1901 adjustment_op: IndirectAdjustmentOp::Add,
1902 result_relative: false,
1903 endian: Endianness::Little,
1904 };
1905
1906 match indirect {
1907 OffsetSpec::Indirect {
1908 base_offset,
1909 adjustment,
1910 ..
1911 } => {
1912 assert_eq!(base_offset, 0x20);
1913 assert_eq!(adjustment, 4);
1914 }
1915 _ => panic!("Expected Indirect variant"),
1916 }
1917 }
1918
1919 #[test]
1920 fn test_offset_spec_relative() {
1921 let relative = OffsetSpec::Relative(8);
1922 assert_eq!(relative, OffsetSpec::Relative(8));
1923
1924 // Test negative relative offset
1925 let negative_relative = OffsetSpec::Relative(-4);
1926 assert_eq!(negative_relative, OffsetSpec::Relative(-4));
1927 }
1928
1929 #[test]
1930 fn test_offset_spec_from_end() {
1931 let from_end = OffsetSpec::FromEnd(-16);
1932 assert_eq!(from_end, OffsetSpec::FromEnd(-16));
1933
1934 // Test positive from_end (though unusual)
1935 let positive_from_end = OffsetSpec::FromEnd(8);
1936 assert_eq!(positive_from_end, OffsetSpec::FromEnd(8));
1937 }
1938
1939 #[test]
1940 fn test_offset_spec_debug() {
1941 let offset = OffsetSpec::Absolute(100);
1942 let debug_str = format!("{offset:?}");
1943 assert!(debug_str.contains("Absolute"));
1944 assert!(debug_str.contains("100"));
1945 }
1946
1947 #[test]
1948 fn test_offset_spec_clone() {
1949 let original = OffsetSpec::Indirect {
1950 base_offset: 0x10,
1951 base_relative: false,
1952 pointer_type: TypeKind::Short {
1953 endian: Endianness::Big,
1954 signed: true,
1955 },
1956 adjustment: -2,
1957 adjustment_op: IndirectAdjustmentOp::Add,
1958 result_relative: false,
1959 endian: Endianness::Big,
1960 };
1961
1962 let cloned = original.clone();
1963 assert_eq!(original, cloned);
1964 }
1965
1966 #[test]
1967 fn test_offset_spec_serialization() {
1968 let offset = OffsetSpec::Absolute(42);
1969
1970 // Test JSON serialization
1971 let json = serde_json::to_string(&offset).expect("Failed to serialize");
1972 let deserialized: OffsetSpec = serde_json::from_str(&json).expect("Failed to deserialize");
1973
1974 assert_eq!(offset, deserialized);
1975 }
1976
1977 #[test]
1978 fn test_offset_spec_indirect_serialization() {
1979 let indirect = OffsetSpec::Indirect {
1980 base_offset: 0x100,
1981 base_relative: false,
1982 pointer_type: TypeKind::Long {
1983 endian: Endianness::Native,
1984 signed: false,
1985 },
1986 adjustment: 12,
1987 adjustment_op: IndirectAdjustmentOp::Add,
1988 result_relative: false,
1989 endian: Endianness::Native,
1990 };
1991
1992 // Test JSON serialization for complex variant
1993 let json = serde_json::to_string(&indirect).expect("Failed to serialize");
1994 let deserialized: OffsetSpec = serde_json::from_str(&json).expect("Failed to deserialize");
1995
1996 assert_eq!(indirect, deserialized);
1997 }
1998
1999 #[test]
2000 fn test_all_offset_spec_variants() {
2001 let variants = [
2002 OffsetSpec::Absolute(0),
2003 OffsetSpec::Absolute(-100),
2004 OffsetSpec::Indirect {
2005 base_offset: 0x20,
2006 base_relative: false,
2007 pointer_type: TypeKind::Byte { signed: true },
2008 adjustment: 0,
2009 adjustment_op: IndirectAdjustmentOp::Add,
2010 result_relative: false,
2011 endian: Endianness::Little,
2012 },
2013 OffsetSpec::Relative(50),
2014 OffsetSpec::Relative(-25),
2015 OffsetSpec::FromEnd(-8),
2016 OffsetSpec::FromEnd(4),
2017 ];
2018
2019 // Test that all variants can be created and are distinct
2020 for (i, variant) in variants.iter().enumerate() {
2021 for (j, other) in variants.iter().enumerate() {
2022 if i != j {
2023 assert_ne!(
2024 variant, other,
2025 "Variants at indices {i} and {j} should be different"
2026 );
2027 }
2028 }
2029 }
2030 }
2031
2032 #[test]
2033 fn test_endianness_variants() {
2034 let endianness_values = vec![Endianness::Little, Endianness::Big, Endianness::Native];
2035
2036 for endian in endianness_values {
2037 let indirect = OffsetSpec::Indirect {
2038 base_offset: 0,
2039 base_relative: false,
2040 pointer_type: TypeKind::Long {
2041 endian,
2042 signed: false,
2043 },
2044 adjustment: 0,
2045 adjustment_op: IndirectAdjustmentOp::Add,
2046 result_relative: false,
2047 endian,
2048 };
2049
2050 // Verify the endianness is preserved
2051 match indirect {
2052 OffsetSpec::Indirect {
2053 endian: actual_endian,
2054 ..
2055 } => {
2056 assert_eq!(endian, actual_endian);
2057 }
2058 _ => panic!("Expected Indirect variant"),
2059 }
2060 }
2061 }
2062
2063 // Value enum tests
2064 #[test]
2065 fn test_value_uint() {
2066 let value = Value::Uint(42);
2067 assert_eq!(value, Value::Uint(42));
2068
2069 // Test large values
2070 let large_value = Value::Uint(u64::MAX);
2071 assert_eq!(large_value, Value::Uint(u64::MAX));
2072 }
2073
2074 #[test]
2075 fn test_value_int() {
2076 let positive = Value::Int(100);
2077 assert_eq!(positive, Value::Int(100));
2078
2079 let negative = Value::Int(-50);
2080 assert_eq!(negative, Value::Int(-50));
2081
2082 // Test extreme values
2083 let max_int = Value::Int(i64::MAX);
2084 let min_int = Value::Int(i64::MIN);
2085 assert_eq!(max_int, Value::Int(i64::MAX));
2086 assert_eq!(min_int, Value::Int(i64::MIN));
2087 }
2088
2089 #[test]
2090 fn test_value_bytes() {
2091 let empty_bytes = Value::Bytes(vec![]);
2092 assert_eq!(empty_bytes, Value::Bytes(vec![]));
2093
2094 let some_bytes = Value::Bytes(vec![0x7f, 0x45, 0x4c, 0x46]);
2095 assert_eq!(some_bytes, Value::Bytes(vec![0x7f, 0x45, 0x4c, 0x46]));
2096
2097 // Test that different byte sequences are not equal
2098 let other_bytes = Value::Bytes(vec![0x50, 0x4b, 0x03, 0x04]);
2099 assert_ne!(some_bytes, other_bytes);
2100 }
2101
2102 #[test]
2103 fn test_value_string() {
2104 let empty_string = Value::String(String::new());
2105 assert_eq!(empty_string, Value::String(String::new()));
2106
2107 let hello = Value::String("Hello, World!".to_string());
2108 assert_eq!(hello, Value::String("Hello, World!".to_string()));
2109
2110 // Test Unicode strings
2111 let unicode = Value::String("🦀 Rust".to_string());
2112 assert_eq!(unicode, Value::String("🦀 Rust".to_string()));
2113 }
2114
2115 #[test]
2116 fn test_value_comparison() {
2117 // Test that different value types are not equal
2118 let uint_val = Value::Uint(42);
2119 let int_val = Value::Int(42);
2120 let float_val = Value::Float(42.0);
2121 let bytes_val = Value::Bytes(vec![42]);
2122 let string_val = Value::String("42".to_string());
2123
2124 assert_ne!(uint_val, int_val);
2125 assert_ne!(uint_val, float_val);
2126 assert_ne!(uint_val, bytes_val);
2127 assert_ne!(uint_val, string_val);
2128 assert_ne!(int_val, float_val);
2129 assert_ne!(int_val, bytes_val);
2130 assert_ne!(int_val, string_val);
2131 assert_ne!(float_val, bytes_val);
2132 assert_ne!(float_val, string_val);
2133 assert_ne!(bytes_val, string_val);
2134 }
2135
2136 #[test]
2137 fn test_value_debug() {
2138 let uint_val = Value::Uint(123);
2139 let debug_str = format!("{uint_val:?}");
2140 assert!(debug_str.contains("Uint"));
2141 assert!(debug_str.contains("123"));
2142
2143 let string_val = Value::String("test".to_string());
2144 let debug_str = format!("{string_val:?}");
2145 assert!(debug_str.contains("String"));
2146 assert!(debug_str.contains("test"));
2147 }
2148
2149 #[test]
2150 fn test_value_clone() {
2151 let original = Value::Bytes(vec![1, 2, 3, 4]);
2152 let cloned = original.clone();
2153 assert_eq!(original, cloned);
2154
2155 // Verify they are independent copies
2156 match (original, cloned) {
2157 (Value::Bytes(orig_bytes), Value::Bytes(cloned_bytes)) => {
2158 assert_eq!(orig_bytes, cloned_bytes);
2159 // They should have the same content but be different Vec instances
2160 }
2161 _ => panic!("Expected Bytes variants"),
2162 }
2163 }
2164
2165 #[test]
2166 fn test_value_float() {
2167 let value = Value::Float(3.125);
2168 assert_eq!(value, Value::Float(3.125));
2169
2170 let negative = Value::Float(-1.5);
2171 assert_eq!(negative, Value::Float(-1.5));
2172
2173 let zero = Value::Float(0.0);
2174 assert_eq!(zero, Value::Float(0.0));
2175 }
2176
2177 #[test]
2178 fn test_value_serialization() {
2179 let values = vec![
2180 Value::Uint(42),
2181 Value::Int(-100),
2182 Value::Float(3.125),
2183 Value::Bytes(vec![0x7f, 0x45, 0x4c, 0x46]),
2184 Value::String("ELF executable".to_string()),
2185 ];
2186
2187 for value in values {
2188 // Test JSON serialization
2189 let json = serde_json::to_string(&value).expect("Failed to serialize Value");
2190 let deserialized: Value =
2191 serde_json::from_str(&json).expect("Failed to deserialize Value");
2192 assert_eq!(value, deserialized);
2193 }
2194 }
2195
2196 #[test]
2197 fn test_value_serialization_edge_cases() {
2198 // Test empty collections
2199 let empty_bytes = Value::Bytes(vec![]);
2200 let json = serde_json::to_string(&empty_bytes).expect("Failed to serialize empty bytes");
2201 let deserialized: Value =
2202 serde_json::from_str(&json).expect("Failed to deserialize empty bytes");
2203 assert_eq!(empty_bytes, deserialized);
2204
2205 let empty_string = Value::String(String::new());
2206 let json = serde_json::to_string(&empty_string).expect("Failed to serialize empty string");
2207 let deserialized: Value =
2208 serde_json::from_str(&json).expect("Failed to deserialize empty string");
2209 assert_eq!(empty_string, deserialized);
2210
2211 // Test extreme values
2212 let max_uint = Value::Uint(u64::MAX);
2213 let json = serde_json::to_string(&max_uint).expect("Failed to serialize max uint");
2214 let deserialized: Value =
2215 serde_json::from_str(&json).expect("Failed to deserialize max uint");
2216 assert_eq!(max_uint, deserialized);
2217
2218 let min_int = Value::Int(i64::MIN);
2219 let json = serde_json::to_string(&min_int).expect("Failed to serialize min int");
2220 let deserialized: Value =
2221 serde_json::from_str(&json).expect("Failed to deserialize min int");
2222 assert_eq!(min_int, deserialized);
2223 }
2224
2225 // TypeKind tests
2226 #[test]
2227 fn test_type_kind_byte() {
2228 let byte_type = TypeKind::Byte { signed: true };
2229 assert_eq!(byte_type, TypeKind::Byte { signed: true });
2230 }
2231
2232 #[test]
2233 fn test_type_kind_short() {
2234 let short_little_endian = TypeKind::Short {
2235 endian: Endianness::Little,
2236 signed: false,
2237 };
2238 let short_big_endian = TypeKind::Short {
2239 endian: Endianness::Big,
2240 signed: true,
2241 };
2242
2243 assert_ne!(short_little_endian, short_big_endian);
2244 assert_eq!(short_little_endian, short_little_endian.clone());
2245 }
2246
2247 #[test]
2248 fn test_type_kind_long() {
2249 let long_native = TypeKind::Long {
2250 endian: Endianness::Native,
2251 signed: true,
2252 };
2253
2254 match long_native {
2255 TypeKind::Long { endian, signed } => {
2256 assert_eq!(endian, Endianness::Native);
2257 assert!(signed);
2258 }
2259 _ => panic!("Expected Long variant"),
2260 }
2261 }
2262
2263 #[test]
2264 fn test_type_kind_string() {
2265 let unlimited_string = TypeKind::String {
2266 max_length: None,
2267 flags: StringFlags::default(),
2268 };
2269 let limited_string = TypeKind::String {
2270 max_length: Some(256),
2271 flags: StringFlags::default(),
2272 };
2273
2274 assert_ne!(unlimited_string, limited_string);
2275 assert_eq!(unlimited_string, unlimited_string.clone());
2276 }
2277
2278 #[test]
2279 fn test_type_kind_serialization() {
2280 let types = vec![
2281 TypeKind::Byte { signed: true },
2282 TypeKind::Short {
2283 endian: Endianness::Little,
2284 signed: false,
2285 },
2286 TypeKind::Long {
2287 endian: Endianness::Big,
2288 signed: true,
2289 },
2290 TypeKind::Quad {
2291 endian: Endianness::Little,
2292 signed: false,
2293 },
2294 TypeKind::Quad {
2295 endian: Endianness::Big,
2296 signed: true,
2297 },
2298 TypeKind::Float {
2299 endian: Endianness::Native,
2300 },
2301 TypeKind::Float {
2302 endian: Endianness::Big,
2303 },
2304 TypeKind::Double {
2305 endian: Endianness::Little,
2306 },
2307 TypeKind::Double {
2308 endian: Endianness::Native,
2309 },
2310 TypeKind::Date {
2311 endian: Endianness::Big,
2312 utc: true,
2313 },
2314 TypeKind::Date {
2315 endian: Endianness::Little,
2316 utc: false,
2317 },
2318 TypeKind::QDate {
2319 endian: Endianness::Native,
2320 utc: true,
2321 },
2322 TypeKind::QDate {
2323 endian: Endianness::Big,
2324 utc: false,
2325 },
2326 TypeKind::String {
2327 max_length: None,
2328 flags: StringFlags::default(),
2329 },
2330 TypeKind::String {
2331 max_length: Some(128),
2332 flags: StringFlags::default(),
2333 },
2334 TypeKind::PString {
2335 max_length: None,
2336 length_width: PStringLengthWidth::OneByte,
2337 length_includes_itself: false,
2338 },
2339 TypeKind::PString {
2340 max_length: Some(64),
2341 length_width: PStringLengthWidth::OneByte,
2342 length_includes_itself: false,
2343 },
2344 TypeKind::PString {
2345 max_length: None,
2346 length_width: PStringLengthWidth::TwoByteBE,
2347 length_includes_itself: true,
2348 },
2349 TypeKind::PString {
2350 max_length: Some(128),
2351 length_width: PStringLengthWidth::FourByteLE,
2352 length_includes_itself: false,
2353 },
2354 ];
2355
2356 for typ in types {
2357 let json = serde_json::to_string(&typ).expect("Failed to serialize TypeKind");
2358 let deserialized: TypeKind =
2359 serde_json::from_str(&json).expect("Failed to deserialize TypeKind");
2360 assert_eq!(typ, deserialized);
2361 }
2362 }
2363
2364 // Operator tests
2365 #[test]
2366 fn test_operator_variants() {
2367 let operators = [
2368 Operator::Equal,
2369 Operator::NotEqual,
2370 Operator::BitwiseAnd,
2371 Operator::BitwiseXor,
2372 Operator::BitwiseNot,
2373 Operator::AnyValue,
2374 ];
2375
2376 for (i, op) in operators.iter().enumerate() {
2377 for (j, other) in operators.iter().enumerate() {
2378 if i == j {
2379 assert_eq!(op, other);
2380 } else {
2381 assert_ne!(op, other);
2382 }
2383 }
2384 }
2385 }
2386
2387 #[test]
2388 fn test_operator_serialization() {
2389 let operators = vec![
2390 Operator::Equal,
2391 Operator::NotEqual,
2392 Operator::BitwiseAnd,
2393 Operator::BitwiseXor,
2394 Operator::BitwiseNot,
2395 Operator::AnyValue,
2396 ];
2397
2398 for op in operators {
2399 let json = serde_json::to_string(&op).expect("Failed to serialize Operator");
2400 let deserialized: Operator =
2401 serde_json::from_str(&json).expect("Failed to deserialize Operator");
2402 assert_eq!(op, deserialized);
2403 }
2404 }
2405
2406 // MagicRule tests
2407 #[test]
2408 fn test_magic_rule_creation() {
2409 let rule = MagicRule {
2410 offset: OffsetSpec::Absolute(0),
2411 typ: TypeKind::Byte { signed: true },
2412 op: Operator::Equal,
2413 value: Value::Uint(0x7f),
2414 message: "ELF magic".to_string(),
2415 children: vec![],
2416 level: 0,
2417 strength_modifier: None,
2418 value_transform: None,
2419 };
2420
2421 assert_eq!(rule.message, "ELF magic");
2422 assert_eq!(rule.level, 0);
2423 assert!(rule.children.is_empty());
2424 }
2425
2426 #[test]
2427 fn test_magic_rule_with_children() {
2428 let child_rule = MagicRule {
2429 offset: OffsetSpec::Absolute(4),
2430 typ: TypeKind::Byte { signed: true },
2431 op: Operator::Equal,
2432 value: Value::Uint(1),
2433 message: "32-bit".to_string(),
2434 children: vec![],
2435 level: 1,
2436 strength_modifier: None,
2437 value_transform: None,
2438 };
2439
2440 let parent_rule = MagicRule {
2441 offset: OffsetSpec::Absolute(0),
2442 typ: TypeKind::Long {
2443 endian: Endianness::Little,
2444 signed: false,
2445 },
2446 op: Operator::Equal,
2447 value: Value::Bytes(vec![0x7f, 0x45, 0x4c, 0x46]),
2448 message: "ELF executable".to_string(),
2449 children: vec![child_rule],
2450 level: 0,
2451 strength_modifier: None,
2452 value_transform: None,
2453 };
2454
2455 assert_eq!(parent_rule.children.len(), 1);
2456 assert_eq!(parent_rule.children[0].level, 1);
2457 assert_eq!(parent_rule.children[0].message, "32-bit");
2458 }
2459
2460 #[test]
2461 fn test_magic_rule_serialization() {
2462 let rule = MagicRule {
2463 offset: OffsetSpec::Absolute(16),
2464 typ: TypeKind::Short {
2465 endian: Endianness::Little,
2466 signed: false,
2467 },
2468 op: Operator::NotEqual,
2469 value: Value::Uint(0),
2470 message: "Non-zero short value".to_string(),
2471 children: vec![],
2472 level: 2,
2473 strength_modifier: None,
2474 value_transform: None,
2475 };
2476
2477 let json = serde_json::to_string(&rule).expect("Failed to serialize MagicRule");
2478 let deserialized: MagicRule =
2479 serde_json::from_str(&json).expect("Failed to deserialize MagicRule");
2480
2481 assert_eq!(rule.message, deserialized.message);
2482 assert_eq!(rule.level, deserialized.level);
2483 assert_eq!(rule.children.len(), deserialized.children.len());
2484 }
2485
2486 // StrengthModifier tests
2487 #[test]
2488 fn test_strength_modifier_variants() {
2489 let add = StrengthModifier::Add(10);
2490 let sub = StrengthModifier::Subtract(5);
2491 let mul = StrengthModifier::Multiply(2);
2492 let div = StrengthModifier::Divide(2);
2493 let set = StrengthModifier::Set(50);
2494
2495 // Test that each variant has the correct inner value
2496 assert_eq!(add, StrengthModifier::Add(10));
2497 assert_eq!(sub, StrengthModifier::Subtract(5));
2498 assert_eq!(mul, StrengthModifier::Multiply(2));
2499 assert_eq!(div, StrengthModifier::Divide(2));
2500 assert_eq!(set, StrengthModifier::Set(50));
2501
2502 // Test that different variants are not equal
2503 assert_ne!(add, sub);
2504 assert_ne!(mul, div);
2505 assert_ne!(set, add);
2506 }
2507
2508 #[test]
2509 fn test_strength_modifier_negative_values() {
2510 let add_negative = StrengthModifier::Add(-10);
2511 let sub_negative = StrengthModifier::Subtract(-5);
2512 let set_negative = StrengthModifier::Set(-50);
2513
2514 assert_eq!(add_negative, StrengthModifier::Add(-10));
2515 assert_eq!(sub_negative, StrengthModifier::Subtract(-5));
2516 assert_eq!(set_negative, StrengthModifier::Set(-50));
2517 }
2518
2519 #[test]
2520 fn test_strength_modifier_serialization() {
2521 let modifiers = vec![
2522 StrengthModifier::Add(10),
2523 StrengthModifier::Subtract(5),
2524 StrengthModifier::Multiply(2),
2525 StrengthModifier::Divide(3),
2526 StrengthModifier::Set(100),
2527 ];
2528
2529 for modifier in modifiers {
2530 let json =
2531 serde_json::to_string(&modifier).expect("Failed to serialize StrengthModifier");
2532 let deserialized: StrengthModifier =
2533 serde_json::from_str(&json).expect("Failed to deserialize StrengthModifier");
2534 assert_eq!(modifier, deserialized);
2535 }
2536 }
2537
2538 #[test]
2539 fn test_strength_modifier_debug() {
2540 let modifier = StrengthModifier::Add(25);
2541 let debug_str = format!("{modifier:?}");
2542 assert!(debug_str.contains("Add"));
2543 assert!(debug_str.contains("25"));
2544 }
2545
2546 #[test]
2547 fn test_strength_modifier_clone() {
2548 let original = StrengthModifier::Multiply(4);
2549 let cloned = original;
2550 assert_eq!(original, cloned);
2551 }
2552
2553 #[test]
2554 fn test_magic_rule_with_strength_modifier() {
2555 let rule = MagicRule {
2556 offset: OffsetSpec::Absolute(0),
2557 typ: TypeKind::Byte { signed: true },
2558 op: Operator::Equal,
2559 value: Value::Uint(0x7f),
2560 message: "ELF magic".to_string(),
2561 children: vec![],
2562 level: 0,
2563 strength_modifier: Some(StrengthModifier::Add(20)),
2564 value_transform: None,
2565 };
2566
2567 assert_eq!(rule.strength_modifier, Some(StrengthModifier::Add(20)));
2568
2569 // Test serialization with strength_modifier
2570 let json = serde_json::to_string(&rule).expect("Failed to serialize MagicRule");
2571 let deserialized: MagicRule =
2572 serde_json::from_str(&json).expect("Failed to deserialize MagicRule");
2573 assert_eq!(rule.strength_modifier, deserialized.strength_modifier);
2574 }
2575
2576 #[test]
2577 fn test_magic_rule_without_strength_modifier() {
2578 let rule = MagicRule {
2579 offset: OffsetSpec::Absolute(0),
2580 typ: TypeKind::Byte { signed: true },
2581 op: Operator::Equal,
2582 value: Value::Uint(0x7f),
2583 message: "ELF magic".to_string(),
2584 children: vec![],
2585 level: 0,
2586 strength_modifier: None,
2587 value_transform: None,
2588 };
2589
2590 assert_eq!(rule.strength_modifier, None);
2591 }
2592
2593 // MetaType tests
2594 #[test]
2595 fn test_meta_type_variants_debug_clone_eq() {
2596 let cases = [
2597 MetaType::Default,
2598 MetaType::Clear,
2599 MetaType::Indirect,
2600 MetaType::Offset,
2601 MetaType::Name("part2".to_string()),
2602 MetaType::Use("part2".to_string()),
2603 ];
2604
2605 for (i, variant) in cases.iter().enumerate() {
2606 // Debug formatting is non-empty
2607 let debug_str = format!("{variant:?}");
2608 assert!(
2609 !debug_str.is_empty(),
2610 "Debug format must be non-empty for variant at index {i}"
2611 );
2612
2613 // Clone round-trip preserves equality
2614 let cloned = variant.clone();
2615 assert_eq!(
2616 variant, &cloned,
2617 "Clone must preserve equality for variant at index {i}"
2618 );
2619
2620 // Distinct variants are not equal
2621 for (j, other) in cases.iter().enumerate() {
2622 if i == j {
2623 assert_eq!(variant, other);
2624 } else {
2625 assert_ne!(
2626 variant, other,
2627 "Variants at indices {i} and {j} must differ"
2628 );
2629 }
2630 }
2631 }
2632 }
2633
2634 #[test]
2635 fn test_meta_type_serde_roundtrip() {
2636 let cases = [
2637 MetaType::Default,
2638 MetaType::Clear,
2639 MetaType::Indirect,
2640 MetaType::Offset,
2641 MetaType::Name("foo".to_string()),
2642 MetaType::Use("bar".to_string()),
2643 ];
2644
2645 for variant in cases {
2646 let json = serde_json::to_string(&variant).expect("serialize MetaType");
2647 let deserialized: MetaType = serde_json::from_str(&json).expect("deserialize MetaType");
2648 assert_eq!(variant, deserialized);
2649 }
2650 }
2651
2652 #[test]
2653 fn test_type_kind_meta_bit_width_is_none() {
2654 let cases = [
2655 MetaType::Default,
2656 MetaType::Clear,
2657 MetaType::Indirect,
2658 MetaType::Offset,
2659 MetaType::Name("x".to_string()),
2660 MetaType::Use("x".to_string()),
2661 ];
2662 for meta in cases {
2663 let kind = TypeKind::Meta(meta);
2664 assert_eq!(
2665 kind.bit_width(),
2666 None,
2667 "TypeKind::Meta must have no bit width: {kind:?}"
2668 );
2669 }
2670 }
2671}