libmagic_rs/parser/ast.rs
1// Copyright (c) 2025-2026 the libmagic-rs contributors
2// SPDX-License-Identifier: Apache-2.0
3
4//! Abstract Syntax Tree definitions for magic rules
5//!
6//! This module contains the core data structures that represent parsed magic rules
7//! and their components, including offset specifications, type kinds, operators, and values.
8
9use serde::{Deserialize, Serialize};
10use std::num::{NonZeroU32, NonZeroUsize};
11
12/// The width of the length prefix for Pascal strings.
13///
14/// Uppercase suffix letters (`/H`, `/L`) indicate big-endian byte order.
15/// Lowercase suffix letters (`/h`, `/l`) indicate little-endian byte order.
16///
17/// # Examples
18///
19/// ```
20/// use libmagic_rs::parser::ast::PStringLengthWidth;
21/// let width = PStringLengthWidth::OneByte;
22/// assert_eq!(width.byte_count(), 1);
23///
24/// let width = PStringLengthWidth::TwoByteBE;
25/// assert_eq!(width.byte_count(), 2);
26///
27/// let width = PStringLengthWidth::FourByteLE;
28/// assert_eq!(width.byte_count(), 4);
29/// ```
30#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
31#[allow(clippy::enum_variant_names)]
32#[non_exhaustive]
33pub enum PStringLengthWidth {
34 /// 1-byte length prefix (default, `/B` suffix)
35 ///
36 /// # Examples
37 ///
38 /// ```
39 /// use libmagic_rs::parser::ast::PStringLengthWidth;
40 /// let width = PStringLengthWidth::OneByte;
41 /// assert_eq!(width.byte_count(), 1);
42 /// ```
43 OneByte,
44 /// 2-byte big-endian length prefix (`/H` suffix)
45 ///
46 /// # Examples
47 ///
48 /// ```
49 /// use libmagic_rs::parser::ast::PStringLengthWidth;
50 /// let width = PStringLengthWidth::TwoByteBE;
51 /// assert_eq!(width.byte_count(), 2);
52 /// ```
53 TwoByteBE,
54 /// 2-byte little-endian length prefix (`/h` suffix)
55 ///
56 /// # Examples
57 ///
58 /// ```
59 /// use libmagic_rs::parser::ast::PStringLengthWidth;
60 /// let width = PStringLengthWidth::TwoByteLE;
61 /// assert_eq!(width.byte_count(), 2);
62 /// ```
63 TwoByteLE,
64 /// 4-byte big-endian length prefix (`/L` suffix)
65 ///
66 /// # Examples
67 ///
68 /// ```
69 /// use libmagic_rs::parser::ast::PStringLengthWidth;
70 /// let width = PStringLengthWidth::FourByteBE;
71 /// assert_eq!(width.byte_count(), 4);
72 /// ```
73 FourByteBE,
74 /// 4-byte little-endian length prefix (`/l` suffix)
75 ///
76 /// # Examples
77 ///
78 /// ```
79 /// use libmagic_rs::parser::ast::PStringLengthWidth;
80 /// let width = PStringLengthWidth::FourByteLE;
81 /// assert_eq!(width.byte_count(), 4);
82 /// ```
83 FourByteLE,
84}
85
86impl PStringLengthWidth {
87 /// Returns the number of bytes used for the length prefix.
88 #[must_use]
89 pub fn byte_count(&self) -> usize {
90 match self {
91 Self::OneByte => 1,
92 Self::TwoByteBE | Self::TwoByteLE => 2,
93 Self::FourByteBE | Self::FourByteLE => 4,
94 }
95 }
96}
97
98/// Arithmetic operation applied to the value read at an indirect offset's
99/// `base_offset` before the result is used as the final file offset.
100///
101/// magic(5) supports `+`, `-`, `*`, `/`, `%`, `&`, `|`, and `^` between the
102/// pointer-type specifier and the operand inside the parentheses. Addition
103/// and subtraction collapse to [`IndirectAdjustmentOp::Add`] with a signed
104/// `adjustment` (so `(N.X-1)` is `Add(-1)` rather than a separate `Sub`
105/// variant); the remaining operators each have a dedicated variant.
106///
107/// The default is [`IndirectAdjustmentOp::Add`]; an indirect offset with no
108/// arithmetic — just `(base.type)` — is encoded as `Add` with `adjustment:
109/// 0`, preserving backwards compatibility.
110///
111/// # Examples
112///
113/// ```
114/// use libmagic_rs::parser::ast::IndirectAdjustmentOp;
115///
116/// assert_eq!(IndirectAdjustmentOp::default(), IndirectAdjustmentOp::Add);
117/// ```
118#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize, Default)]
119#[non_exhaustive]
120pub enum IndirectAdjustmentOp {
121 /// Addition (also covers subtraction via negative `adjustment`).
122 ///
123 /// # Examples
124 ///
125 /// ```
126 /// use libmagic_rs::parser::ast::IndirectAdjustmentOp;
127 /// assert_eq!(IndirectAdjustmentOp::default(), IndirectAdjustmentOp::Add);
128 /// ```
129 #[default]
130 Add,
131 /// Multiplication: `pointer_value * adjustment`.
132 ///
133 /// # Examples
134 ///
135 /// ```
136 /// use libmagic_rs::parser::ast::IndirectAdjustmentOp;
137 /// let op = IndirectAdjustmentOp::Mul;
138 /// assert_eq!(op, IndirectAdjustmentOp::Mul);
139 /// ```
140 Mul,
141 /// Truncating integer division: `pointer_value / adjustment`. Division
142 /// by zero is rejected by the evaluator with an error.
143 ///
144 /// # Examples
145 ///
146 /// ```
147 /// use libmagic_rs::parser::ast::IndirectAdjustmentOp;
148 /// let op = IndirectAdjustmentOp::Div;
149 /// assert_eq!(op, IndirectAdjustmentOp::Div);
150 /// ```
151 Div,
152 /// Remainder: `pointer_value % adjustment`. Modulo by zero is rejected
153 /// by the evaluator with an error.
154 ///
155 /// # Examples
156 ///
157 /// ```
158 /// use libmagic_rs::parser::ast::IndirectAdjustmentOp;
159 /// let op = IndirectAdjustmentOp::Mod;
160 /// assert_eq!(op, IndirectAdjustmentOp::Mod);
161 /// ```
162 Mod,
163 /// Bitwise AND: `pointer_value & adjustment`.
164 ///
165 /// # Examples
166 ///
167 /// ```
168 /// use libmagic_rs::parser::ast::IndirectAdjustmentOp;
169 /// let op = IndirectAdjustmentOp::And;
170 /// assert_eq!(op, IndirectAdjustmentOp::And);
171 /// ```
172 And,
173 /// Bitwise OR: `pointer_value | adjustment`.
174 ///
175 /// # Examples
176 ///
177 /// ```
178 /// use libmagic_rs::parser::ast::IndirectAdjustmentOp;
179 /// let op = IndirectAdjustmentOp::Or;
180 /// assert_eq!(op, IndirectAdjustmentOp::Or);
181 /// ```
182 Or,
183 /// Bitwise XOR: `pointer_value ^ adjustment`.
184 ///
185 /// # Examples
186 ///
187 /// ```
188 /// use libmagic_rs::parser::ast::IndirectAdjustmentOp;
189 /// let op = IndirectAdjustmentOp::Xor;
190 /// assert_eq!(op, IndirectAdjustmentOp::Xor);
191 /// ```
192 Xor,
193}
194
195/// Offset specification for locating data in files
196#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
197#[non_exhaustive]
198pub enum OffsetSpec {
199 /// Absolute offset from file start (or from file end if negative)
200 ///
201 /// Positive values are offsets from the start of the file.
202 /// Negative values are offsets from the end of the file (same as `FromEnd`).
203 ///
204 /// # Examples
205 ///
206 /// ```
207 /// use libmagic_rs::parser::ast::OffsetSpec;
208 ///
209 /// let offset = OffsetSpec::Absolute(0x10); // Read at byte 16 from start
210 /// let from_end = OffsetSpec::Absolute(-4); // 4 bytes before end of file
211 /// ```
212 Absolute(i64),
213
214 /// Indirect offset through pointer dereferencing
215 ///
216 /// Reads a pointer value at `base_offset`, interprets it according to `pointer_type`
217 /// and `endian`, then combines `adjustment` with the pointer value using
218 /// `adjustment_op` to get the final offset. The default `adjustment_op`
219 /// is [`IndirectAdjustmentOp::Add`], so `(base.type)` and
220 /// `(base.type+N)` / `(base.type-N)` use addition (subtraction is
221 /// encoded as `Add` with a negative `adjustment`). magic(5) also
222 /// supports multiplicative and bitwise forms inside the parens, e.g.
223 /// `(0x200.s*2)` ([`IndirectAdjustmentOp::Mul`]).
224 ///
225 /// # Examples
226 ///
227 /// ```
228 /// use libmagic_rs::parser::ast::{OffsetSpec, TypeKind, Endianness, IndirectAdjustmentOp};
229 ///
230 /// let indirect = OffsetSpec::Indirect {
231 /// base_offset: 0x20,
232 /// base_relative: false,
233 /// pointer_type: TypeKind::Long { endian: Endianness::Little, signed: false },
234 /// adjustment: 4,
235 /// adjustment_op: IndirectAdjustmentOp::Add,
236 /// result_relative: false,
237 /// endian: Endianness::Little,
238 /// };
239 /// ```
240 Indirect {
241 /// Base offset to read pointer from. When `base_relative` is
242 /// `true`, this value is added to the current anchor (last-match
243 /// position) rather than being treated as an absolute file
244 /// position.
245 base_offset: i64,
246 /// If `true`, `base_offset` is relative to the current anchor
247 /// (i.e., `(&N.X)` syntax in magic files). Defaults to `false`
248 /// for backwards compatibility with existing AST snapshots; the
249 /// serde `default` attribute lets older serialized AST round-trip.
250 #[serde(default)]
251 base_relative: bool,
252 /// Type of pointer value
253 pointer_type: TypeKind,
254 /// Operand combined with the pointer value via `adjustment_op`.
255 ///
256 /// For `IndirectAdjustmentOp::Add`, the operand is signed (negative
257 /// values encode subtraction). For multiplicative and bitwise ops
258 /// the operand is interpreted as `i64` but typically magic files
259 /// supply non-negative literals.
260 adjustment: i64,
261 /// Arithmetic operation applied to the pointer value with
262 /// `adjustment` as the operand. Defaults to
263 /// [`IndirectAdjustmentOp::Add`] for legacy AST consumers via
264 /// serde's `default` attribute.
265 #[serde(default)]
266 adjustment_op: IndirectAdjustmentOp,
267 /// If `true`, the resolved offset is added to the current anchor
268 /// instead of being treated as an absolute file position. This
269 /// corresponds to magic-file `&(...)` syntax wrapping an indirect
270 /// spec, e.g., `&(0x10.l)`.
271 #[serde(default)]
272 result_relative: bool,
273 /// Endianness for pointer reading
274 endian: Endianness,
275 },
276
277 /// Relative offset from previous match position
278 ///
279 /// # Examples
280 ///
281 /// ```
282 /// use libmagic_rs::parser::ast::OffsetSpec;
283 ///
284 /// let relative = OffsetSpec::Relative(8); // 8 bytes after previous match
285 /// ```
286 Relative(i64),
287
288 /// Offset from end of file (negative values move towards start)
289 ///
290 /// # Examples
291 ///
292 /// ```
293 /// use libmagic_rs::parser::ast::OffsetSpec;
294 ///
295 /// let from_end = OffsetSpec::FromEnd(-16); // 16 bytes before end of file
296 /// ```
297 FromEnd(i64),
298}
299
300/// Control-flow directive carried by [`TypeKind::Meta`].
301///
302/// These are not value-reading types -- they correspond to magic(5)
303/// control-flow keywords (`default`, `clear`, `name`, `use`, `indirect`,
304/// `offset`) that modify how a rule set is traversed rather than reading
305/// bytes from the buffer. All six variants are fully evaluated by the
306/// engine: `default`/`clear` manage per-level sibling-matched state;
307/// `name`/`use` implement subroutine dispatch; `indirect` re-applies the
308/// root rule database at a resolved offset; and `offset` emits the
309/// current file position as `Value::Uint` for printf-style formatting.
310#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
311#[non_exhaustive]
312pub enum MetaType {
313 /// `default` directive: fires when no sibling at the same indentation
314 /// level has matched at the current offset. See magic(5) for the
315 /// "default" type semantics.
316 ///
317 /// # Examples
318 ///
319 /// ```
320 /// use libmagic_rs::parser::ast::MetaType;
321 /// let meta = MetaType::Default;
322 /// assert_eq!(meta, MetaType::Default);
323 /// ```
324 Default,
325 /// `clear` directive: resets the sibling-matched flag so a later
326 /// `default` sibling can fire even if an earlier sibling matched.
327 /// See magic(5) for the "clear" type semantics.
328 ///
329 /// # Examples
330 ///
331 /// ```
332 /// use libmagic_rs::parser::ast::MetaType;
333 /// let meta = MetaType::Clear;
334 /// assert_eq!(meta, MetaType::Clear);
335 /// ```
336 Clear,
337 /// `name <identifier>` directive: declares a named subroutine that
338 /// can be invoked later via [`MetaType::Use`]. See magic(5) for the
339 /// "name" type semantics.
340 ///
341 /// # Examples
342 ///
343 /// ```
344 /// use libmagic_rs::parser::ast::MetaType;
345 /// let meta = MetaType::Name("part2".to_string());
346 /// assert_eq!(meta, MetaType::Name("part2".to_string()));
347 /// ```
348 Name(String),
349 /// `use <identifier>` directive: invokes a named subroutine
350 /// previously declared via [`MetaType::Name`]. See magic(5) for the
351 /// "use" type semantics.
352 ///
353 /// # Examples
354 ///
355 /// ```
356 /// use libmagic_rs::parser::ast::MetaType;
357 /// let meta = MetaType::Use("part2".to_string());
358 /// assert_eq!(meta, MetaType::Use("part2".to_string()));
359 /// ```
360 Use(String),
361 /// `indirect` directive: re-applies the entire magic database at the
362 /// resolved offset. See magic(5) for the "indirect" type semantics.
363 ///
364 /// # Examples
365 ///
366 /// ```
367 /// use libmagic_rs::parser::ast::MetaType;
368 /// let meta = MetaType::Indirect;
369 /// assert_eq!(meta, MetaType::Indirect);
370 /// ```
371 Indirect,
372 /// `offset` type keyword: reports the current file offset rather than
373 /// reading a typed value from the buffer. See magic(5) for the
374 /// "offset" type semantics.
375 ///
376 /// Evaluation: the engine resolves the rule's offset specification
377 /// to an absolute position and emits a `RuleMatch` whose `value` is
378 /// `Value::Uint(position)`. Message templates can reference that
379 /// value through printf-style format specifiers (e.g. `%lld`),
380 /// which are substituted by
381 /// [`crate::output::format::format_magic_message`] at description-
382 /// assembly time. The only supported operator is `x` (`AnyValue`);
383 /// any other operator is `debug!`-logged and skipped.
384 ///
385 /// # Examples
386 ///
387 /// ```
388 /// use libmagic_rs::parser::ast::MetaType;
389 /// let meta = MetaType::Offset;
390 /// assert_eq!(meta, MetaType::Offset);
391 /// ```
392 Offset,
393}
394
395/// Data type specifications for interpreting bytes
396#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
397#[non_exhaustive]
398pub enum TypeKind {
399 /// Single byte
400 ///
401 /// # Examples
402 ///
403 /// ```
404 /// use libmagic_rs::parser::ast::TypeKind;
405 ///
406 /// let byte = TypeKind::Byte { signed: true };
407 /// assert_eq!(byte, TypeKind::Byte { signed: true });
408 /// ```
409 Byte {
410 /// Whether value is signed
411 signed: bool,
412 },
413 /// 16-bit integer
414 ///
415 /// # Examples
416 ///
417 /// ```
418 /// use libmagic_rs::parser::ast::{TypeKind, Endianness};
419 ///
420 /// let short = TypeKind::Short { endian: Endianness::Little, signed: true };
421 /// assert_eq!(short, TypeKind::Short { endian: Endianness::Little, signed: true });
422 /// ```
423 Short {
424 /// Byte order
425 endian: Endianness,
426 /// Whether value is signed
427 signed: bool,
428 },
429 /// 32-bit integer
430 ///
431 /// # Examples
432 ///
433 /// ```
434 /// use libmagic_rs::parser::ast::{TypeKind, Endianness};
435 ///
436 /// let long = TypeKind::Long { endian: Endianness::Big, signed: false };
437 /// assert_eq!(long, TypeKind::Long { endian: Endianness::Big, signed: false });
438 /// ```
439 Long {
440 /// Byte order
441 endian: Endianness,
442 /// Whether value is signed
443 signed: bool,
444 },
445 /// 64-bit integer
446 ///
447 /// # Examples
448 ///
449 /// ```
450 /// use libmagic_rs::parser::ast::{TypeKind, Endianness};
451 ///
452 /// let quad = TypeKind::Quad { endian: Endianness::Big, signed: true };
453 /// assert_eq!(quad, TypeKind::Quad { endian: Endianness::Big, signed: true });
454 /// ```
455 Quad {
456 /// Byte order
457 endian: Endianness,
458 /// Whether value is signed
459 signed: bool,
460 },
461 /// 32-bit IEEE 754 floating-point
462 ///
463 /// # Examples
464 ///
465 /// ```
466 /// use libmagic_rs::parser::ast::{TypeKind, Endianness};
467 ///
468 /// let float = TypeKind::Float { endian: Endianness::Big };
469 /// assert_eq!(float, TypeKind::Float { endian: Endianness::Big });
470 /// ```
471 Float {
472 /// Byte order
473 endian: Endianness,
474 },
475 /// 64-bit IEEE 754 double-precision floating-point
476 ///
477 /// # Examples
478 ///
479 /// ```
480 /// use libmagic_rs::parser::ast::{TypeKind, Endianness};
481 ///
482 /// let double = TypeKind::Double { endian: Endianness::Big };
483 /// assert_eq!(double, TypeKind::Double { endian: Endianness::Big });
484 /// ```
485 Double {
486 /// Byte order
487 endian: Endianness,
488 },
489 /// 32-bit Unix timestamp (seconds since epoch)
490 ///
491 /// # Examples
492 ///
493 /// ```
494 /// use libmagic_rs::parser::ast::{TypeKind, Endianness};
495 ///
496 /// let date = TypeKind::Date { endian: Endianness::Big, utc: true };
497 /// assert_eq!(date, TypeKind::Date { endian: Endianness::Big, utc: true });
498 /// ```
499 Date {
500 /// Byte order
501 endian: Endianness,
502 /// true = UTC, false = local time
503 utc: bool,
504 },
505 /// 64-bit Unix timestamp (seconds since epoch)
506 ///
507 /// # Examples
508 ///
509 /// ```
510 /// use libmagic_rs::parser::ast::{TypeKind, Endianness};
511 ///
512 /// let qdate = TypeKind::QDate { endian: Endianness::Little, utc: false };
513 /// assert_eq!(qdate, TypeKind::QDate { endian: Endianness::Little, utc: false });
514 /// ```
515 QDate {
516 /// Byte order
517 endian: Endianness,
518 /// true = UTC, false = local time
519 utc: bool,
520 },
521 /// String data
522 ///
523 /// The `flags` field carries the modifier flags parsed from the
524 /// `/[cCwWtTbf]` suffix on a `string` rule. Default flags (all
525 /// `false`) preserve the existing byte-exact comparison path; any
526 /// non-default flag routes the rule through
527 /// `compare_string_with_flags` in `src/evaluator/types/string.rs`.
528 /// See [`StringFlags`] for per-flag semantics.
529 ///
530 /// # Examples
531 ///
532 /// ```
533 /// use libmagic_rs::parser::ast::{StringFlags, TypeKind};
534 ///
535 /// let s = TypeKind::String { max_length: None, flags: StringFlags::default() };
536 /// assert_eq!(s, TypeKind::String { max_length: None, flags: StringFlags::default() });
537 ///
538 /// let case_insensitive = TypeKind::String {
539 /// max_length: None,
540 /// flags: StringFlags::default().with_ignore_lowercase(true),
541 /// };
542 /// assert!(matches!(case_insensitive, TypeKind::String { flags, .. } if flags.ignore_lowercase));
543 /// ```
544 String {
545 /// Maximum length to read
546 max_length: Option<usize>,
547 /// Modifier flags from the `/[cCwWtTbf]` suffix
548 flags: StringFlags,
549 },
550 /// UCS-2 (16-bit Unicode) string with explicit byte order.
551 ///
552 /// Backs the magic(5) `lestring16` (little-endian) and `bestring16`
553 /// (big-endian) keywords. Each character occupies two bytes in the
554 /// file; the reader stops at a U+0000 terminator (encoded as the
555 /// 2-byte sequence `0x00 0x00`) or at the end of the buffer. The
556 /// decoded value is returned as a Rust `String` (so non-ASCII
557 /// characters are preserved when valid UCS-2).
558 ///
559 /// # Examples
560 ///
561 /// ```
562 /// use libmagic_rs::parser::ast::{TypeKind, Endianness};
563 ///
564 /// let le = TypeKind::String16 { endian: Endianness::Little };
565 /// assert_eq!(le, TypeKind::String16 { endian: Endianness::Little });
566 ///
567 /// let be = TypeKind::String16 { endian: Endianness::Big };
568 /// assert_eq!(be, TypeKind::String16 { endian: Endianness::Big });
569 /// ```
570 String16 {
571 /// Endianness for the 16-bit code units.
572 endian: Endianness,
573 },
574 /// Pascal string (length-prefixed, supports 1/2/4-byte prefix, with optional max length)
575 ///
576 /// Pascal strings store the length as a prefix (1, 2, or 4 bytes, with configurable endianness), followed by
577 /// that many bytes of string data. Unlike C strings, they are not null-terminated.
578 ///
579 /// # Examples
580 ///
581 /// ```
582 /// use libmagic_rs::parser::ast::{TypeKind, PStringLengthWidth};
583 ///
584 /// let pstring = TypeKind::PString { max_length: None, length_width: PStringLengthWidth::OneByte, length_includes_itself: false };
585 /// assert_eq!(pstring, TypeKind::PString { max_length: None, length_width: PStringLengthWidth::OneByte, length_includes_itself: false });
586 ///
587 /// let limited = TypeKind::PString { max_length: Some(64), length_width: PStringLengthWidth::TwoByteBE, length_includes_itself: false };
588 /// assert_eq!(limited, TypeKind::PString { max_length: Some(64), length_width: PStringLengthWidth::TwoByteBE, length_includes_itself: false });
589 ///
590 /// // /J flag: stored length includes the length field itself
591 /// let jpeg = TypeKind::PString { max_length: None, length_width: PStringLengthWidth::TwoByteBE, length_includes_itself: true };
592 /// assert_eq!(jpeg, TypeKind::PString { max_length: None, length_width: PStringLengthWidth::TwoByteBE, length_includes_itself: true });
593 /// ```
594 PString {
595 /// Maximum length to read (caps the length value)
596 max_length: Option<usize>,
597 /// Width of the length prefix
598 length_width: PStringLengthWidth,
599 /// Whether the stored length includes the length field itself (`/J` flag)
600 length_includes_itself: bool,
601 },
602 /// Regular expression matching against file contents
603 ///
604 /// Regex rules match a POSIX-extended regular expression pattern against the
605 /// file buffer. Patterns are compiled with multi-line mode always enabled
606 /// (matching libmagic's unconditional `REG_NEWLINE`), so `^` and `$` match
607 /// at line boundaries and `.` does not match `\n`. The `flags` control
608 /// case sensitivity and anchor advance semantics; the `count` field
609 /// controls the scan window (byte or line bounds). The scan window is
610 /// always capped at 8192 bytes (matching GNU `file`'s `FILE_REGEX_MAX`;
611 /// enforced in the evaluator).
612 ///
613 /// # Examples
614 ///
615 /// ```
616 /// use libmagic_rs::parser::ast::{RegexCount, RegexFlags, TypeKind};
617 /// use std::num::NonZeroU32;
618 ///
619 /// // Plain `regex` -- no flags, default 8192-byte scan window.
620 /// let plain = TypeKind::Regex {
621 /// flags: RegexFlags::default(),
622 /// count: RegexCount::Default,
623 /// };
624 ///
625 /// // `regex/1l` -- scan the first line only.
626 /// let first_line = TypeKind::Regex {
627 /// flags: RegexFlags::default(),
628 /// count: RegexCount::Lines(NonZeroU32::new(1)),
629 /// };
630 ///
631 /// // `regex/cs` -- case-insensitive, anchor advances to match-start.
632 /// let case_insensitive_start = TypeKind::Regex {
633 /// flags: RegexFlags {
634 /// case_insensitive: true,
635 /// start_offset: true,
636 /// },
637 /// count: RegexCount::Default,
638 /// };
639 /// ```
640 Regex {
641 /// Modifier flags from the `/[cs]` suffix (`/c` case-insensitive,
642 /// `/s` start-offset anchor). Line-mode is encoded by the
643 /// [`RegexCount::Lines`] variant of `count`, not a flag.
644 flags: RegexFlags,
645 /// Scan window specifier: default 8192 bytes, explicit byte
646 /// count, or explicit line count. See [`RegexCount`] for the
647 /// three cases.
648 count: RegexCount,
649 },
650 /// Multi-byte pattern search within a bounded range
651 ///
652 /// Search rules look for a literal byte pattern within `range` bytes of
653 /// the offset. Unlike [`TypeKind::String`], which only matches at the
654 /// exact offset, `search` scans forward up to `range` bytes for the
655 /// first occurrence. The range is **mandatory** per GNU `file`'s
656 /// magic(5) specification and is stored as a [`NonZeroUsize`] so a
657 /// zero-range search is unrepresentable.
658 ///
659 /// # Examples
660 ///
661 /// ```
662 /// use libmagic_rs::parser::ast::TypeKind;
663 /// use std::num::NonZeroUsize;
664 ///
665 /// // `search/256` -- scan up to 256 bytes for the literal pattern.
666 /// let bounded = TypeKind::Search {
667 /// range: NonZeroUsize::new(256).unwrap(),
668 /// };
669 /// ```
670 Search {
671 /// Scan window width in bytes, starting at the rule's offset.
672 range: NonZeroUsize,
673 },
674 /// Control-flow directive (`default`, `clear`, `name`, `use`,
675 /// `indirect`, `offset`).
676 ///
677 /// These magic(5) keywords do not read or compare bytes; they modify
678 /// how a rule set is traversed. All six variants are fully evaluated:
679 /// `default` fires as a fallback when no sibling at the same level
680 /// has matched; `clear` resets that flag; `name`/`use` support
681 /// subroutine definition and invocation; `indirect` re-enters the
682 /// rule set at a resolved offset; `offset` emits the resolved file
683 /// position as `Value::Uint` for printf-style message substitution.
684 /// See [`MetaType`] for the individual variants.
685 ///
686 /// # Examples
687 ///
688 /// ```
689 /// use libmagic_rs::parser::ast::{MetaType, TypeKind};
690 /// let default_rule = TypeKind::Meta(MetaType::Default);
691 /// assert_eq!(default_rule, TypeKind::Meta(MetaType::Default));
692 /// ```
693 Meta(MetaType),
694}
695
696/// Regex modifier flags parsed from the `/[cs]` suffix on a `regex` rule.
697///
698/// The `/l` "line-based window" modifier is **not** represented here; it
699/// lives on [`RegexCount::Lines`] so that the type-level encoding makes
700/// "line count" and "byte count" mutually exclusive. An earlier design
701/// used two separate fields (`line_based: bool` + `count: Option<u32>`)
702/// which admitted the cross-field state `line_based: true, count: None`;
703/// under the current encoding that case is expressed explicitly as
704/// [`RegexCount::Lines(None)`](RegexCount::Lines) -- the `regex/l`
705/// shorthand -- and is behaviorally equivalent to [`RegexCount::Default`]
706/// (both walk the full 8192-byte capped window).
707///
708/// All flags default to `false` via [`RegexFlags::default`], equivalent
709/// to a plain `regex` with no `/c` or `/s` suffix.
710///
711/// # Examples
712///
713/// ```
714/// use libmagic_rs::parser::ast::RegexFlags;
715///
716/// let plain = RegexFlags::default();
717/// assert!(!plain.case_insensitive);
718/// assert!(!plain.start_offset);
719///
720/// let case_and_start = RegexFlags::default()
721/// .with_case_insensitive(true)
722/// .with_start_offset(true);
723/// assert!(case_and_start.case_insensitive);
724/// assert!(case_and_start.start_offset);
725/// ```
726#[derive(Debug, Clone, Copy, Default, Serialize, Deserialize, PartialEq, Eq)]
727pub struct RegexFlags {
728 /// `/c` -- case-insensitive matching. When `true`, ASCII letter
729 /// casing is ignored during pattern matching.
730 pub case_insensitive: bool,
731 /// `/s` -- advance the GNU `file` previous-match anchor to the start
732 /// of the matched region instead of its end. Matches libmagic's
733 /// `REGEX_OFFSET_START` flag, which zeros the length contribution in
734 /// `moffset()` for `FILE_REGEX`. Useful for chaining child rules that
735 /// need to re-match from the position where the parent regex began.
736 pub start_offset: bool,
737}
738
739impl RegexFlags {
740 /// Builder-style setter for [`RegexFlags::case_insensitive`] (`/c`).
741 ///
742 /// Chain after [`RegexFlags::default()`] to construct `RegexFlags`
743 /// values without exhaustive struct literals. If a new flag is
744 /// added to `RegexFlags` in the future, callers using the builder
745 /// form keep compiling; callers using struct literals would need
746 /// an update.
747 #[must_use]
748 pub const fn with_case_insensitive(mut self, value: bool) -> Self {
749 self.case_insensitive = value;
750 self
751 }
752
753 /// Builder-style setter for [`RegexFlags::start_offset`] (`/s`).
754 ///
755 /// Chain after [`RegexFlags::default()`] to construct `RegexFlags`
756 /// values without exhaustive struct literals.
757 #[must_use]
758 pub const fn with_start_offset(mut self, value: bool) -> Self {
759 self.start_offset = value;
760 self
761 }
762}
763
764/// String modifier flags parsed from the `/[cCwWtTbf]` suffix on a `string`
765/// rule.
766///
767/// Mirrors libmagic's `STRING_*` flag bits from `src/file.h`. Each flag
768/// alters how `compare_string_with_flags` walks the pattern and buffer in
769/// parallel. The default (all `false`) preserves byte-exact comparison.
770///
771/// **`/c` vs `/C` are asymmetric**: the pattern character controls
772/// direction. With `/c`, only lowercase pattern chars trigger case-folding
773/// (the file byte is `tolower`'d). With `/C`, only uppercase pattern chars
774/// trigger folding (the file byte is `toupper`'d). Mixed-case patterns
775/// behave intuitively: `/c FoO` matches `FoO`, `Foo`, `FOO` but not
776/// `fOO` (the uppercase `F` is literal). See GOTCHAS S6.5 for the
777/// rationale and `src/softmagic.c` for the canonical libmagic contract.
778///
779/// **`/B` is NOT a string flag** -- it is the `pstring` 1-byte length-width
780/// letter (`PSTRING_1_BE`). `string/B` is rejected at parse time. See
781/// GOTCHAS S6.6.
782///
783/// # Examples
784///
785/// ```
786/// use libmagic_rs::parser::ast::StringFlags;
787///
788/// let plain = StringFlags::default();
789/// assert!(!plain.ignore_lowercase);
790///
791/// let case_insensitive = StringFlags::default().with_ignore_lowercase(true);
792/// assert!(case_insensitive.ignore_lowercase);
793///
794/// let compound = StringFlags::default()
795/// .with_ignore_lowercase(true)
796/// .with_compact_optional_whitespace(true);
797/// assert!(compound.ignore_lowercase);
798/// assert!(compound.compact_optional_whitespace);
799/// ```
800#[derive(Debug, Clone, Copy, Default, Serialize, Deserialize, PartialEq, Eq)]
801// libmagic's contract is naturally a bitfield: each flag is a distinct
802// magic(5) letter (/c, /C, /w, /W, /t, /T, /b, /f) with its own STRING_*
803// constant in libmagic src/file.h. Flags compose freely (string/cw is
804// /c plus /w; string/wcCtTbf sets all eight). Folding pairs into enums
805// is possible (whitespace: none|optional|required; case: none|lower|upper)
806// but would obscure the libmagic mapping and produce verbose match arms
807// in every consumer. The bool-per-flag layout mirrors `RegexFlags` and
808// the libmagic source -- the clippy lint is overruled by the design.
809#[allow(clippy::struct_excessive_bools)]
810pub struct StringFlags {
811 /// `/W` -- `STRING_COMPACT_WHITESPACE`. Pattern whitespace requires at
812 /// least one whitespace byte in the file, then any further whitespace
813 /// in the file is consumed greedily.
814 pub compact_whitespace: bool,
815 /// `/w` -- `STRING_COMPACT_OPTIONAL_WHITESPACE`. Pattern whitespace
816 /// matches zero or more whitespace bytes in the file.
817 pub compact_optional_whitespace: bool,
818 /// `/c` -- `STRING_IGNORE_LOWERCASE`. When the pattern char is
819 /// lowercase, the file byte is `to_ascii_lowercase`'d before
820 /// comparison. Uppercase pattern chars are compared literally.
821 pub ignore_lowercase: bool,
822 /// `/C` -- `STRING_IGNORE_UPPERCASE`. When the pattern char is
823 /// uppercase, the file byte is `to_ascii_uppercase`'d before
824 /// comparison. Lowercase pattern chars are compared literally.
825 pub ignore_uppercase: bool,
826 /// `/t` -- `STRING_TEXTTEST`. Hint that this rule applies to text
827 /// files. Captured for MIME-output integration; does not currently
828 /// alter comparison.
829 pub text_test: bool,
830 /// `/T` -- `STRING_TRIM`. Trim leading and trailing ASCII whitespace
831 /// from the pattern before comparison. The trim is applied at
832 /// evaluation time (in `read_pattern_match`) so the AST keeps the
833 /// original pattern bytes; the comparison function receives the
834 /// trimmed slice.
835 pub trim: bool,
836 /// `/b` -- `STRING_BINTEST`. Hint that this rule applies to binary
837 /// files. Captured for MIME-output integration; does not currently
838 /// alter comparison.
839 pub bin_test: bool,
840 /// `/f` -- `STRING_FULL_WORD`. Post-match check that the byte after
841 /// the matched region is either end-of-buffer or a non-word
842 /// character (ASCII alphanumeric or `_`).
843 pub full_word: bool,
844}
845
846impl StringFlags {
847 /// Returns `true` when every flag is `false` (the byte-exact fast
848 /// path). The evaluator dispatcher uses this to skip the
849 /// parallel-walk comparison when no flags are set.
850 #[must_use]
851 pub const fn is_empty(self) -> bool {
852 !self.compact_whitespace
853 && !self.compact_optional_whitespace
854 && !self.ignore_lowercase
855 && !self.ignore_uppercase
856 && !self.text_test
857 && !self.trim
858 && !self.bin_test
859 && !self.full_word
860 }
861
862 /// Builder-style setter for `compact_whitespace` (`/W`).
863 #[must_use]
864 pub const fn with_compact_whitespace(mut self, value: bool) -> Self {
865 self.compact_whitespace = value;
866 self
867 }
868
869 /// Builder-style setter for `compact_optional_whitespace` (`/w`).
870 #[must_use]
871 pub const fn with_compact_optional_whitespace(mut self, value: bool) -> Self {
872 self.compact_optional_whitespace = value;
873 self
874 }
875
876 /// Builder-style setter for `ignore_lowercase` (`/c`).
877 #[must_use]
878 pub const fn with_ignore_lowercase(mut self, value: bool) -> Self {
879 self.ignore_lowercase = value;
880 self
881 }
882
883 /// Builder-style setter for `ignore_uppercase` (`/C`).
884 #[must_use]
885 pub const fn with_ignore_uppercase(mut self, value: bool) -> Self {
886 self.ignore_uppercase = value;
887 self
888 }
889
890 /// Builder-style setter for `text_test` (`/t`).
891 #[must_use]
892 pub const fn with_text_test(mut self, value: bool) -> Self {
893 self.text_test = value;
894 self
895 }
896
897 /// Builder-style setter for `trim` (`/T`).
898 #[must_use]
899 pub const fn with_trim(mut self, value: bool) -> Self {
900 self.trim = value;
901 self
902 }
903
904 /// Builder-style setter for `bin_test` (`/b`).
905 #[must_use]
906 pub const fn with_bin_test(mut self, value: bool) -> Self {
907 self.bin_test = value;
908 self
909 }
910
911 /// Builder-style setter for `full_word` (`/f`).
912 #[must_use]
913 pub const fn with_full_word(mut self, value: bool) -> Self {
914 self.full_word = value;
915 self
916 }
917}
918
919/// Scan window specifier for a [`TypeKind::Regex`] rule.
920///
921/// Encodes the three mutually-exclusive scan modes in a single enum so
922/// that the "byte count" and "line count" cases cannot be confused. The
923/// `regex/l` shorthand (line mode with no explicit count) is represented
924/// explicitly as [`RegexCount::Lines(None)`](RegexCount::Lines), which
925/// is behaviorally equivalent to [`RegexCount::Default`] -- both walk
926/// the full 8192-byte capped window -- but preserves the magic-file
927/// surface syntax of the original rule. The 8192-byte hard cap
928/// (matching GNU `file`'s `FILE_REGEX_MAX`) is applied by the evaluator
929/// on every variant.
930///
931/// # Examples
932///
933/// ```
934/// use libmagic_rs::parser::ast::RegexCount;
935/// use std::num::NonZeroU32;
936///
937/// // Plain `regex` (no suffix): default 8192-byte window.
938/// assert_eq!(RegexCount::default(), RegexCount::Default);
939///
940/// // `regex/100`: scan at most 100 bytes.
941/// let hundred_bytes = RegexCount::Bytes(NonZeroU32::new(100).unwrap());
942///
943/// // `regex/1l`: scan the first line.
944/// let one_line = RegexCount::Lines(NonZeroU32::new(1));
945///
946/// // `regex/l`: line-mode with no explicit count (walks terminators
947/// // to the end of the 8192-byte capped window).
948/// let unbounded_lines = RegexCount::Lines(None);
949/// ```
950#[derive(Debug, Clone, Copy, Default, Serialize, Deserialize, PartialEq, Eq)]
951pub enum RegexCount {
952 /// No scan bound (plain `regex` with no suffix). Scans the default
953 /// 8192-byte window from the rule's offset.
954 #[default]
955 Default,
956 /// Byte-bounded scan (`regex/N` with no `/l` flag). The window is
957 /// `min(n, 8192, remaining_buffer)` bytes long. `NonZeroU32` makes
958 /// a zero-byte scan unrepresentable.
959 Bytes(NonZeroU32),
960 /// Line-bounded scan (`regex/Nl` or `regex/l`). The window walks
961 /// LF / CRLF / bare CR line terminators from the offset. With
962 /// `Some(n)`, the walk stops after the Nth terminator (inclusive).
963 /// With `None` (the `regex/l` shorthand), the walk continues to
964 /// the end of the 8192-byte capped window. Either way the
965 /// effective byte window is capped at 8192.
966 Lines(Option<NonZeroU32>),
967}
968
969impl TypeKind {
970 /// Returns the bit width of integer types, or `None` for non-integer types (e.g., String).
971 ///
972 /// # Examples
973 ///
974 /// ```
975 /// use libmagic_rs::parser::ast::{Endianness, StringFlags, TypeKind};
976 ///
977 /// assert_eq!(TypeKind::Byte { signed: false }.bit_width(), Some(8));
978 /// assert_eq!(TypeKind::Short { endian: Endianness::Native, signed: true }.bit_width(), Some(16));
979 /// assert_eq!(TypeKind::Long { endian: Endianness::Native, signed: true }.bit_width(), Some(32));
980 /// assert_eq!(TypeKind::Quad { endian: Endianness::Native, signed: true }.bit_width(), Some(64));
981 /// assert_eq!(TypeKind::Float { endian: Endianness::Native }.bit_width(), Some(32));
982 /// assert_eq!(TypeKind::Double { endian: Endianness::Native }.bit_width(), Some(64));
983 /// assert_eq!(TypeKind::String { max_length: None, flags: StringFlags::default() }.bit_width(), None);
984 /// ```
985 #[must_use]
986 pub const fn bit_width(&self) -> Option<u32> {
987 match self {
988 Self::Byte { .. } => Some(8),
989 Self::Short { .. } => Some(16),
990 Self::Long { .. } | Self::Float { .. } | Self::Date { .. } => Some(32),
991 Self::Quad { .. } | Self::Double { .. } | Self::QDate { .. } => Some(64),
992 Self::String { .. }
993 | Self::String16 { .. }
994 | Self::PString { .. }
995 | Self::Regex { .. }
996 | Self::Search { .. }
997 | Self::Meta(_) => None,
998 }
999 }
1000}
1001
1002/// Comparison and bitwise operators
1003#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
1004#[non_exhaustive]
1005pub enum Operator {
1006 /// Equality comparison (`=` or `==`)
1007 ///
1008 /// # Examples
1009 ///
1010 /// ```
1011 /// use libmagic_rs::parser::ast::Operator;
1012 ///
1013 /// let op = Operator::Equal;
1014 /// assert_eq!(op, Operator::Equal);
1015 /// ```
1016 Equal,
1017 /// Inequality comparison (`!=` or `<>`)
1018 ///
1019 /// # Examples
1020 ///
1021 /// ```
1022 /// use libmagic_rs::parser::ast::Operator;
1023 ///
1024 /// let op = Operator::NotEqual;
1025 /// assert_eq!(op, Operator::NotEqual);
1026 /// ```
1027 NotEqual,
1028 /// Less-than comparison (`<`)
1029 ///
1030 /// # Examples
1031 ///
1032 /// ```
1033 /// use libmagic_rs::parser::ast::Operator;
1034 ///
1035 /// let op = Operator::LessThan;
1036 /// assert_eq!(op, Operator::LessThan);
1037 /// ```
1038 LessThan,
1039 /// Greater-than comparison (`>`)
1040 ///
1041 /// # Examples
1042 ///
1043 /// ```
1044 /// use libmagic_rs::parser::ast::Operator;
1045 ///
1046 /// let op = Operator::GreaterThan;
1047 /// assert_eq!(op, Operator::GreaterThan);
1048 /// ```
1049 GreaterThan,
1050 /// Less-than-or-equal comparison (`<=`)
1051 ///
1052 /// # Examples
1053 ///
1054 /// ```
1055 /// use libmagic_rs::parser::ast::Operator;
1056 ///
1057 /// let op = Operator::LessEqual;
1058 /// assert_eq!(op, Operator::LessEqual);
1059 /// ```
1060 LessEqual,
1061 /// Greater-than-or-equal comparison (`>=`)
1062 ///
1063 /// # Examples
1064 ///
1065 /// ```
1066 /// use libmagic_rs::parser::ast::Operator;
1067 ///
1068 /// let op = Operator::GreaterEqual;
1069 /// assert_eq!(op, Operator::GreaterEqual);
1070 /// ```
1071 GreaterEqual,
1072 /// Bitwise AND operation without mask (`&`)
1073 ///
1074 /// # Examples
1075 ///
1076 /// ```
1077 /// use libmagic_rs::parser::ast::Operator;
1078 ///
1079 /// let op = Operator::BitwiseAnd;
1080 /// assert_eq!(op, Operator::BitwiseAnd);
1081 /// ```
1082 BitwiseAnd,
1083 /// Bitwise AND operation with mask value (`&` with a mask operand)
1084 ///
1085 /// # Examples
1086 ///
1087 /// ```
1088 /// use libmagic_rs::parser::ast::Operator;
1089 ///
1090 /// let op = Operator::BitwiseAndMask(0xFF00);
1091 /// assert_eq!(op, Operator::BitwiseAndMask(0xFF00));
1092 /// ```
1093 BitwiseAndMask(u64),
1094 /// Bitwise XOR operation (`^`)
1095 ///
1096 /// # Examples
1097 ///
1098 /// ```
1099 /// use libmagic_rs::parser::ast::Operator;
1100 ///
1101 /// let op = Operator::BitwiseXor;
1102 /// assert_eq!(op, Operator::BitwiseXor);
1103 /// ```
1104 BitwiseXor,
1105 /// Bitwise NOT/complement operation (`~`)
1106 ///
1107 /// # Examples
1108 ///
1109 /// ```
1110 /// use libmagic_rs::parser::ast::Operator;
1111 ///
1112 /// let op = Operator::BitwiseNot;
1113 /// assert_eq!(op, Operator::BitwiseNot);
1114 /// ```
1115 BitwiseNot,
1116 /// Match any value; condition always succeeds (`x`)
1117 ///
1118 /// # Examples
1119 ///
1120 /// ```
1121 /// use libmagic_rs::parser::ast::Operator;
1122 ///
1123 /// let op = Operator::AnyValue;
1124 /// assert_eq!(op, Operator::AnyValue);
1125 /// ```
1126 AnyValue,
1127}
1128
1129/// Value types for rule matching
1130#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
1131#[non_exhaustive]
1132pub enum Value {
1133 /// Unsigned integer value
1134 ///
1135 /// # Examples
1136 ///
1137 /// ```
1138 /// use libmagic_rs::parser::ast::Value;
1139 ///
1140 /// let val = Value::Uint(0xDEAD_BEEF);
1141 /// assert_eq!(val, Value::Uint(0xDEAD_BEEF));
1142 /// ```
1143 Uint(u64),
1144 /// Signed integer value
1145 ///
1146 /// # Examples
1147 ///
1148 /// ```
1149 /// use libmagic_rs::parser::ast::Value;
1150 ///
1151 /// let val = Value::Int(-42);
1152 /// assert_eq!(val, Value::Int(-42));
1153 /// ```
1154 Int(i64),
1155 /// Floating-point value (used for `float` and `double` types)
1156 ///
1157 /// # Examples
1158 ///
1159 /// ```
1160 /// use libmagic_rs::parser::ast::Value;
1161 ///
1162 /// let val = Value::Float(3.14);
1163 /// assert_eq!(val, Value::Float(3.14));
1164 /// ```
1165 Float(f64),
1166 /// Byte sequence
1167 ///
1168 /// # Examples
1169 ///
1170 /// ```
1171 /// use libmagic_rs::parser::ast::Value;
1172 ///
1173 /// let val = Value::Bytes(vec![0x7f, 0x45, 0x4c, 0x46]);
1174 /// assert_eq!(val, Value::Bytes(vec![0x7f, 0x45, 0x4c, 0x46]));
1175 /// ```
1176 Bytes(Vec<u8>),
1177 /// String value
1178 ///
1179 /// # Examples
1180 ///
1181 /// ```
1182 /// use libmagic_rs::parser::ast::Value;
1183 ///
1184 /// let val = Value::String("MZ".to_string());
1185 /// assert_eq!(val, Value::String("MZ".to_string()));
1186 /// ```
1187 String(String),
1188}
1189
1190/// Endianness specification for multi-byte values
1191#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
1192pub enum Endianness {
1193 /// Little-endian byte order (least significant byte first)
1194 ///
1195 /// # Examples
1196 ///
1197 /// ```
1198 /// use libmagic_rs::parser::ast::Endianness;
1199 ///
1200 /// let e = Endianness::Little;
1201 /// assert_eq!(e, Endianness::Little);
1202 /// ```
1203 Little,
1204 /// Big-endian byte order (most significant byte first)
1205 ///
1206 /// # Examples
1207 ///
1208 /// ```
1209 /// use libmagic_rs::parser::ast::Endianness;
1210 ///
1211 /// let e = Endianness::Big;
1212 /// assert_eq!(e, Endianness::Big);
1213 /// ```
1214 Big,
1215 /// Native system byte order (matches target architecture)
1216 ///
1217 /// # Examples
1218 ///
1219 /// ```
1220 /// use libmagic_rs::parser::ast::Endianness;
1221 ///
1222 /// let e = Endianness::Native;
1223 /// assert_eq!(e, Endianness::Native);
1224 /// ```
1225 Native,
1226}
1227
1228/// Strength modifier for magic rules
1229///
1230/// Strength modifiers adjust the default strength calculation for a rule.
1231/// They are specified using the `!:strength` directive in magic files.
1232///
1233/// # Examples
1234///
1235/// ```
1236/// use libmagic_rs::parser::ast::StrengthModifier;
1237///
1238/// let add = StrengthModifier::Add(10); // !:strength +10
1239/// let sub = StrengthModifier::Subtract(5); // !:strength -5
1240/// let mul = StrengthModifier::Multiply(2); // !:strength *2
1241/// let div = StrengthModifier::Divide(2); // !:strength /2
1242/// let set = StrengthModifier::Set(50); // !:strength =50
1243/// ```
1244#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
1245pub enum StrengthModifier {
1246 /// Add to the default strength: `!:strength +N`
1247 Add(i32),
1248 /// Subtract from the default strength: `!:strength -N`
1249 Subtract(i32),
1250 /// Multiply the default strength: `!:strength *N`
1251 Multiply(i32),
1252 /// Divide the default strength: `!:strength /N`
1253 Divide(i32),
1254 /// Set strength to an absolute value: `!:strength =N` or `!:strength N`
1255 Set(i32),
1256}
1257
1258/// Arithmetic operation applied to a value read from the file *before* the
1259/// rule's comparison operator is evaluated.
1260///
1261/// magic(5) supports `+`, `-`, `*`, `/`, `%`, `|`, and `^` between the type
1262/// keyword and the comparison value (e.g., `lelong+1 x volume %d` reads a
1263/// long, adds 1, and formats the transformed value into the message).
1264/// Bitwise AND (`&MASK`) is *not* part of this enum because it is already
1265/// represented at the operator level via [`Operator::BitwiseAndMask`].
1266///
1267/// The operand is signed (`i64`) so that subtraction and negative multipliers
1268/// round-trip cleanly. Bitwise ops reinterpret the operand as a `u64` bit
1269/// pattern at evaluation time, matching libmagic's `apprentice.c::mconvert`.
1270#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
1271#[non_exhaustive]
1272pub enum ValueTransformOp {
1273 /// Addition (`type+N`).
1274 Add,
1275 /// Subtraction (`type-N`).
1276 Sub,
1277 /// Multiplication (`type*N`).
1278 Mul,
1279 /// Truncating integer division (`type/N`). Division by zero is rejected
1280 /// at evaluation time.
1281 Div,
1282 /// Remainder (`type%N`). Modulo by zero is rejected at evaluation time.
1283 Mod,
1284 /// Bitwise AND (`type&N`).
1285 ///
1286 /// magic(5) `&MASK` was historically encoded at the operator level
1287 /// via [`Operator::BitwiseAndMask`] (which combines mask+equal in
1288 /// one step). That encoding cannot represent rules like `lelong&0xff
1289 /// x %d` (mask + any-value, with the masked value used in format
1290 /// substitution). The parser promotes `&MASK` to this `BitAnd`
1291 /// transform when followed by another operator (`x`, `>`, `!=`, ...)
1292 /// so the read value is masked before comparison and before printf
1293 /// substitution. The legacy `&MASK VALUE` form (mask + implicit
1294 /// equal) keeps using `Operator::BitwiseAndMask` for backwards
1295 /// compatibility.
1296 BitAnd,
1297 /// Bitwise OR (`type|N`).
1298 Or,
1299 /// Bitwise XOR (`type^N`).
1300 Xor,
1301}
1302
1303/// A pre-comparison value transform: `(op, operand)`.
1304///
1305/// Applied to the value read from the file before the rule's comparison
1306/// operator runs. See [`ValueTransformOp`] for the supported operations.
1307///
1308/// # Examples
1309///
1310/// ```
1311/// use libmagic_rs::parser::ast::{ValueTransform, ValueTransformOp};
1312///
1313/// // `lelong+1` -> add 1 to the read value
1314/// let t = ValueTransform { op: ValueTransformOp::Add, operand: 1 };
1315/// assert_eq!(t.op, ValueTransformOp::Add);
1316/// assert_eq!(t.operand, 1);
1317/// ```
1318#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
1319pub struct ValueTransform {
1320 /// Operation to apply.
1321 pub op: ValueTransformOp,
1322 /// Operand to combine with the read value.
1323 pub operand: i64,
1324}
1325
1326/// Magic rule representation in the AST
1327#[derive(Debug, Clone, Serialize, Deserialize)]
1328pub struct MagicRule {
1329 /// Offset specification for where to read data
1330 pub offset: OffsetSpec,
1331 /// Type of data to read and interpret
1332 pub typ: TypeKind,
1333 /// Comparison operator to apply
1334 pub op: Operator,
1335 /// Expected value for comparison
1336 pub value: Value,
1337 /// Human-readable message for this rule
1338 pub message: String,
1339 /// Child rules that are evaluated if this rule matches
1340 pub children: Vec<MagicRule>,
1341 /// Indentation level for hierarchical rules
1342 pub level: u32,
1343 /// Optional strength modifier from `!:strength` directive
1344 pub strength_modifier: Option<StrengthModifier>,
1345 /// Optional pre-comparison value transform from a magic-file
1346 /// type-suffix like `lelong+1` or `ulequad/1073741824`. When set,
1347 /// the read value is transformed *before* `op` is evaluated and
1348 /// before the message's `%`-format substitution, so format
1349 /// specifiers see the post-transform number.
1350 ///
1351 /// `#[serde(default)]` keeps existing serialized AST snapshots
1352 /// (which never had this field) round-tripping correctly: missing
1353 /// fields deserialize to `None`, which means "no transform" --
1354 /// the historical behavior.
1355 #[serde(default)]
1356 pub value_transform: Option<ValueTransform>,
1357}
1358
1359/// Validation errors returned by [`MagicRule::validate`].
1360#[derive(Debug, thiserror::Error, PartialEq, Eq)]
1361#[non_exhaustive]
1362pub enum MagicRuleValidationError {
1363 /// Rule message is empty. Messages are user-facing and required
1364 /// for meaningful output.
1365 #[error("rule message must not be empty")]
1366 EmptyMessage,
1367
1368 /// The child rule at `child_index` has `level <= self.level`,
1369 /// violating the "children must nest deeper than the parent"
1370 /// invariant of the hierarchical indentation-based DSL.
1371 #[error(
1372 "child rule at index {child_index} has level {child_level}, \
1373 must be greater than parent level {parent_level}"
1374 )]
1375 InvalidChildLevel {
1376 /// Index of the offending child in `self.children`.
1377 child_index: usize,
1378 /// Level of the child rule.
1379 child_level: u32,
1380 /// Level of the parent rule.
1381 parent_level: u32,
1382 },
1383
1384 /// Rule `level` exceeds the maximum supported depth. The limit is a
1385 /// hardening mechanism against stack overflow during deep recursion;
1386 /// libmagic files in the wild rarely go beyond 10 levels.
1387 #[error("rule level {level} exceeds maximum supported depth {max}")]
1388 LevelTooDeep {
1389 /// The invalid level value.
1390 level: u32,
1391 /// The maximum allowed depth.
1392 max: u32,
1393 },
1394}
1395
1396impl MagicRule {
1397 /// Hard structural ceiling on rule `level`.
1398 ///
1399 /// This is a conservative upper bound enforced by
1400 /// [`MagicRule::validate`] to keep the AST shape sane: real
1401 /// magic files in the wild rarely exceed ~10 levels of nesting,
1402 /// so rejecting rules with `level > 1000` catches obviously
1403 /// pathological input at construction time without constraining
1404 /// any legitimate rule.
1405 ///
1406 /// This ceiling is **independent of** the evaluator's
1407 /// `EvaluationConfig::max_recursion_depth` (default 20), which
1408 /// is the *runtime* recursion guard applied during rule
1409 /// evaluation. The evaluator limit is the first one that fires
1410 /// in practice -- a rule tree with 50 levels passes this
1411 /// structural check but is aborted by the evaluator long before
1412 /// reaching `MAX_LEVEL`. The two limits serve different purposes:
1413 /// `MAX_LEVEL` is an AST-shape sanity check, and
1414 /// `max_recursion_depth` is a per-evaluation resource bound.
1415 pub const MAX_LEVEL: u32 = 1000;
1416
1417 /// Construct a top-level rule with no children and no strength
1418 /// modifier.
1419 ///
1420 /// This is the most common constructor for programmatically building
1421 /// rules outside the parser. To add children, mutate
1422 /// [`MagicRule::children`] directly, or use [`MagicRule::with_children`].
1423 /// To set a strength modifier, use
1424 /// [`MagicRule::with_strength_modifier`].
1425 ///
1426 /// # Examples
1427 ///
1428 /// ```rust
1429 /// use libmagic_rs::{MagicRule, OffsetSpec, Operator, TypeKind, Value};
1430 ///
1431 /// let rule = MagicRule::new(
1432 /// OffsetSpec::Absolute(0),
1433 /// TypeKind::Byte { signed: false },
1434 /// Operator::Equal,
1435 /// Value::Uint(0x7f),
1436 /// "ELF magic byte".to_string(),
1437 /// );
1438 /// assert_eq!(rule.level, 0);
1439 /// assert!(rule.children.is_empty());
1440 /// assert!(rule.validate().is_ok());
1441 /// ```
1442 #[must_use]
1443 pub fn new(
1444 offset: OffsetSpec,
1445 typ: TypeKind,
1446 op: Operator,
1447 value: Value,
1448 message: String,
1449 ) -> Self {
1450 Self {
1451 offset,
1452 typ,
1453 op,
1454 value,
1455 message,
1456 children: vec![],
1457 level: 0,
1458 strength_modifier: None,
1459 value_transform: None,
1460 }
1461 }
1462
1463 /// Replace `self.children` with the given children and return the
1464 /// modified rule. Builder-style for chaining.
1465 #[must_use]
1466 pub fn with_children(mut self, children: Vec<MagicRule>) -> Self {
1467 self.children = children;
1468 self
1469 }
1470
1471 /// Set `self.strength_modifier` to the given value and return the
1472 /// modified rule. Builder-style for chaining.
1473 #[must_use]
1474 pub const fn with_strength_modifier(mut self, modifier: StrengthModifier) -> Self {
1475 self.strength_modifier = Some(modifier);
1476 self
1477 }
1478
1479 /// Set `self.level` to the given value and return the modified rule.
1480 /// Builder-style for chaining; typically used only when constructing
1481 /// child rules programmatically.
1482 #[must_use]
1483 pub const fn with_level(mut self, level: u32) -> Self {
1484 self.level = level;
1485 self
1486 }
1487
1488 /// Validate structural invariants of the rule.
1489 ///
1490 /// This checks invariants that the parser enforces automatically but
1491 /// that programmatic constructors (especially via serde deserialize)
1492 /// can violate:
1493 ///
1494 /// * Message must not be empty.
1495 /// * `level` must not exceed [`Self::MAX_LEVEL`].
1496 /// * Every child's `level` must be strictly greater than
1497 /// `self.level`, and each child must recursively validate.
1498 ///
1499 /// This does *not* validate that `value` is shape-compatible with
1500 /// `typ` (e.g., a `Value::Uint` against a `TypeKind::String`); such
1501 /// mismatches are coerced or rejected by the evaluator at match time.
1502 ///
1503 /// # Errors
1504 ///
1505 /// Returns [`MagicRuleValidationError`] describing the first
1506 /// invariant violation encountered.
1507 ///
1508 /// # Examples
1509 ///
1510 /// ```rust
1511 /// use libmagic_rs::{MagicRule, OffsetSpec, Operator, TypeKind, Value};
1512 ///
1513 /// let rule = MagicRule::new(
1514 /// OffsetSpec::Absolute(0),
1515 /// TypeKind::Byte { signed: false },
1516 /// Operator::Equal,
1517 /// Value::Uint(0),
1518 /// "zero byte".to_string(),
1519 /// );
1520 /// assert!(rule.validate().is_ok());
1521 /// ```
1522 pub fn validate(&self) -> Result<(), MagicRuleValidationError> {
1523 if self.message.is_empty() {
1524 return Err(MagicRuleValidationError::EmptyMessage);
1525 }
1526 if self.level > Self::MAX_LEVEL {
1527 return Err(MagicRuleValidationError::LevelTooDeep {
1528 level: self.level,
1529 max: Self::MAX_LEVEL,
1530 });
1531 }
1532 for (child_index, child) in self.children.iter().enumerate() {
1533 if child.level <= self.level {
1534 return Err(MagicRuleValidationError::InvalidChildLevel {
1535 child_index,
1536 child_level: child.level,
1537 parent_level: self.level,
1538 });
1539 }
1540 child.validate()?;
1541 }
1542 Ok(())
1543 }
1544}
1545
1546#[cfg(test)]
1547mod tests {
1548 use super::*;
1549
1550 #[test]
1551 fn test_magic_rule_new_defaults() {
1552 let rule = MagicRule::new(
1553 OffsetSpec::Absolute(0),
1554 TypeKind::Byte { signed: false },
1555 Operator::Equal,
1556 Value::Uint(0x7f),
1557 "ELF".to_string(),
1558 );
1559 assert_eq!(rule.level, 0);
1560 assert!(rule.children.is_empty());
1561 assert!(rule.strength_modifier.is_none());
1562 assert!(rule.validate().is_ok());
1563 }
1564
1565 #[test]
1566 fn test_magic_rule_builder_chain() {
1567 let child = MagicRule::new(
1568 OffsetSpec::Absolute(4),
1569 TypeKind::Byte { signed: false },
1570 Operator::Equal,
1571 Value::Uint(2),
1572 "64-bit".to_string(),
1573 )
1574 .with_level(1);
1575 let parent = MagicRule::new(
1576 OffsetSpec::Absolute(0),
1577 TypeKind::Byte { signed: false },
1578 Operator::Equal,
1579 Value::Uint(0x7f),
1580 "ELF".to_string(),
1581 )
1582 .with_children(vec![child])
1583 .with_strength_modifier(StrengthModifier::Add(10));
1584 assert_eq!(parent.children.len(), 1);
1585 assert_eq!(parent.strength_modifier, Some(StrengthModifier::Add(10)));
1586 assert!(parent.validate().is_ok());
1587 }
1588
1589 #[test]
1590 fn test_magic_rule_validate_empty_message_rejected() {
1591 let rule = MagicRule::new(
1592 OffsetSpec::Absolute(0),
1593 TypeKind::Byte { signed: false },
1594 Operator::Equal,
1595 Value::Uint(0),
1596 String::new(),
1597 );
1598 assert_eq!(rule.validate(), Err(MagicRuleValidationError::EmptyMessage));
1599 }
1600
1601 #[test]
1602 fn test_magic_rule_validate_child_level_must_be_deeper() {
1603 let child_same_level = MagicRule::new(
1604 OffsetSpec::Absolute(4),
1605 TypeKind::Byte { signed: false },
1606 Operator::Equal,
1607 Value::Uint(2),
1608 "child".to_string(),
1609 ); // level = 0, same as parent
1610 let parent = MagicRule::new(
1611 OffsetSpec::Absolute(0),
1612 TypeKind::Byte { signed: false },
1613 Operator::Equal,
1614 Value::Uint(0x7f),
1615 "parent".to_string(),
1616 )
1617 .with_children(vec![child_same_level]);
1618 assert_eq!(
1619 parent.validate(),
1620 Err(MagicRuleValidationError::InvalidChildLevel {
1621 child_index: 0,
1622 child_level: 0,
1623 parent_level: 0,
1624 })
1625 );
1626 }
1627
1628 #[test]
1629 fn test_magic_rule_validate_level_too_deep() {
1630 let rule = MagicRule::new(
1631 OffsetSpec::Absolute(0),
1632 TypeKind::Byte { signed: false },
1633 Operator::Equal,
1634 Value::Uint(0),
1635 "deep".to_string(),
1636 )
1637 .with_level(MagicRule::MAX_LEVEL + 1);
1638 assert_eq!(
1639 rule.validate(),
1640 Err(MagicRuleValidationError::LevelTooDeep {
1641 level: MagicRule::MAX_LEVEL + 1,
1642 max: MagicRule::MAX_LEVEL,
1643 })
1644 );
1645 }
1646
1647 #[test]
1648 fn test_offset_spec_absolute() {
1649 let offset = OffsetSpec::Absolute(42);
1650 assert_eq!(offset, OffsetSpec::Absolute(42));
1651
1652 // Test negative offset
1653 let negative = OffsetSpec::Absolute(-10);
1654 assert_eq!(negative, OffsetSpec::Absolute(-10));
1655 }
1656
1657 #[test]
1658 fn test_offset_spec_indirect() {
1659 let indirect = OffsetSpec::Indirect {
1660 base_offset: 0x20,
1661 base_relative: false,
1662 pointer_type: TypeKind::Long {
1663 endian: Endianness::Little,
1664 signed: false,
1665 },
1666 adjustment: 4,
1667 adjustment_op: IndirectAdjustmentOp::Add,
1668 result_relative: false,
1669 endian: Endianness::Little,
1670 };
1671
1672 match indirect {
1673 OffsetSpec::Indirect {
1674 base_offset,
1675 adjustment,
1676 ..
1677 } => {
1678 assert_eq!(base_offset, 0x20);
1679 assert_eq!(adjustment, 4);
1680 }
1681 _ => panic!("Expected Indirect variant"),
1682 }
1683 }
1684
1685 #[test]
1686 fn test_offset_spec_relative() {
1687 let relative = OffsetSpec::Relative(8);
1688 assert_eq!(relative, OffsetSpec::Relative(8));
1689
1690 // Test negative relative offset
1691 let negative_relative = OffsetSpec::Relative(-4);
1692 assert_eq!(negative_relative, OffsetSpec::Relative(-4));
1693 }
1694
1695 #[test]
1696 fn test_offset_spec_from_end() {
1697 let from_end = OffsetSpec::FromEnd(-16);
1698 assert_eq!(from_end, OffsetSpec::FromEnd(-16));
1699
1700 // Test positive from_end (though unusual)
1701 let positive_from_end = OffsetSpec::FromEnd(8);
1702 assert_eq!(positive_from_end, OffsetSpec::FromEnd(8));
1703 }
1704
1705 #[test]
1706 fn test_offset_spec_debug() {
1707 let offset = OffsetSpec::Absolute(100);
1708 let debug_str = format!("{offset:?}");
1709 assert!(debug_str.contains("Absolute"));
1710 assert!(debug_str.contains("100"));
1711 }
1712
1713 #[test]
1714 fn test_offset_spec_clone() {
1715 let original = OffsetSpec::Indirect {
1716 base_offset: 0x10,
1717 base_relative: false,
1718 pointer_type: TypeKind::Short {
1719 endian: Endianness::Big,
1720 signed: true,
1721 },
1722 adjustment: -2,
1723 adjustment_op: IndirectAdjustmentOp::Add,
1724 result_relative: false,
1725 endian: Endianness::Big,
1726 };
1727
1728 let cloned = original.clone();
1729 assert_eq!(original, cloned);
1730 }
1731
1732 #[test]
1733 fn test_offset_spec_serialization() {
1734 let offset = OffsetSpec::Absolute(42);
1735
1736 // Test JSON serialization
1737 let json = serde_json::to_string(&offset).expect("Failed to serialize");
1738 let deserialized: OffsetSpec = serde_json::from_str(&json).expect("Failed to deserialize");
1739
1740 assert_eq!(offset, deserialized);
1741 }
1742
1743 #[test]
1744 fn test_offset_spec_indirect_serialization() {
1745 let indirect = OffsetSpec::Indirect {
1746 base_offset: 0x100,
1747 base_relative: false,
1748 pointer_type: TypeKind::Long {
1749 endian: Endianness::Native,
1750 signed: false,
1751 },
1752 adjustment: 12,
1753 adjustment_op: IndirectAdjustmentOp::Add,
1754 result_relative: false,
1755 endian: Endianness::Native,
1756 };
1757
1758 // Test JSON serialization for complex variant
1759 let json = serde_json::to_string(&indirect).expect("Failed to serialize");
1760 let deserialized: OffsetSpec = serde_json::from_str(&json).expect("Failed to deserialize");
1761
1762 assert_eq!(indirect, deserialized);
1763 }
1764
1765 #[test]
1766 fn test_all_offset_spec_variants() {
1767 let variants = [
1768 OffsetSpec::Absolute(0),
1769 OffsetSpec::Absolute(-100),
1770 OffsetSpec::Indirect {
1771 base_offset: 0x20,
1772 base_relative: false,
1773 pointer_type: TypeKind::Byte { signed: true },
1774 adjustment: 0,
1775 adjustment_op: IndirectAdjustmentOp::Add,
1776 result_relative: false,
1777 endian: Endianness::Little,
1778 },
1779 OffsetSpec::Relative(50),
1780 OffsetSpec::Relative(-25),
1781 OffsetSpec::FromEnd(-8),
1782 OffsetSpec::FromEnd(4),
1783 ];
1784
1785 // Test that all variants can be created and are distinct
1786 for (i, variant) in variants.iter().enumerate() {
1787 for (j, other) in variants.iter().enumerate() {
1788 if i != j {
1789 assert_ne!(
1790 variant, other,
1791 "Variants at indices {i} and {j} should be different"
1792 );
1793 }
1794 }
1795 }
1796 }
1797
1798 #[test]
1799 fn test_endianness_variants() {
1800 let endianness_values = vec![Endianness::Little, Endianness::Big, Endianness::Native];
1801
1802 for endian in endianness_values {
1803 let indirect = OffsetSpec::Indirect {
1804 base_offset: 0,
1805 base_relative: false,
1806 pointer_type: TypeKind::Long {
1807 endian,
1808 signed: false,
1809 },
1810 adjustment: 0,
1811 adjustment_op: IndirectAdjustmentOp::Add,
1812 result_relative: false,
1813 endian,
1814 };
1815
1816 // Verify the endianness is preserved
1817 match indirect {
1818 OffsetSpec::Indirect {
1819 endian: actual_endian,
1820 ..
1821 } => {
1822 assert_eq!(endian, actual_endian);
1823 }
1824 _ => panic!("Expected Indirect variant"),
1825 }
1826 }
1827 }
1828
1829 // Value enum tests
1830 #[test]
1831 fn test_value_uint() {
1832 let value = Value::Uint(42);
1833 assert_eq!(value, Value::Uint(42));
1834
1835 // Test large values
1836 let large_value = Value::Uint(u64::MAX);
1837 assert_eq!(large_value, Value::Uint(u64::MAX));
1838 }
1839
1840 #[test]
1841 fn test_value_int() {
1842 let positive = Value::Int(100);
1843 assert_eq!(positive, Value::Int(100));
1844
1845 let negative = Value::Int(-50);
1846 assert_eq!(negative, Value::Int(-50));
1847
1848 // Test extreme values
1849 let max_int = Value::Int(i64::MAX);
1850 let min_int = Value::Int(i64::MIN);
1851 assert_eq!(max_int, Value::Int(i64::MAX));
1852 assert_eq!(min_int, Value::Int(i64::MIN));
1853 }
1854
1855 #[test]
1856 fn test_value_bytes() {
1857 let empty_bytes = Value::Bytes(vec![]);
1858 assert_eq!(empty_bytes, Value::Bytes(vec![]));
1859
1860 let some_bytes = Value::Bytes(vec![0x7f, 0x45, 0x4c, 0x46]);
1861 assert_eq!(some_bytes, Value::Bytes(vec![0x7f, 0x45, 0x4c, 0x46]));
1862
1863 // Test that different byte sequences are not equal
1864 let other_bytes = Value::Bytes(vec![0x50, 0x4b, 0x03, 0x04]);
1865 assert_ne!(some_bytes, other_bytes);
1866 }
1867
1868 #[test]
1869 fn test_value_string() {
1870 let empty_string = Value::String(String::new());
1871 assert_eq!(empty_string, Value::String(String::new()));
1872
1873 let hello = Value::String("Hello, World!".to_string());
1874 assert_eq!(hello, Value::String("Hello, World!".to_string()));
1875
1876 // Test Unicode strings
1877 let unicode = Value::String("🦀 Rust".to_string());
1878 assert_eq!(unicode, Value::String("🦀 Rust".to_string()));
1879 }
1880
1881 #[test]
1882 fn test_value_comparison() {
1883 // Test that different value types are not equal
1884 let uint_val = Value::Uint(42);
1885 let int_val = Value::Int(42);
1886 let float_val = Value::Float(42.0);
1887 let bytes_val = Value::Bytes(vec![42]);
1888 let string_val = Value::String("42".to_string());
1889
1890 assert_ne!(uint_val, int_val);
1891 assert_ne!(uint_val, float_val);
1892 assert_ne!(uint_val, bytes_val);
1893 assert_ne!(uint_val, string_val);
1894 assert_ne!(int_val, float_val);
1895 assert_ne!(int_val, bytes_val);
1896 assert_ne!(int_val, string_val);
1897 assert_ne!(float_val, bytes_val);
1898 assert_ne!(float_val, string_val);
1899 assert_ne!(bytes_val, string_val);
1900 }
1901
1902 #[test]
1903 fn test_value_debug() {
1904 let uint_val = Value::Uint(123);
1905 let debug_str = format!("{uint_val:?}");
1906 assert!(debug_str.contains("Uint"));
1907 assert!(debug_str.contains("123"));
1908
1909 let string_val = Value::String("test".to_string());
1910 let debug_str = format!("{string_val:?}");
1911 assert!(debug_str.contains("String"));
1912 assert!(debug_str.contains("test"));
1913 }
1914
1915 #[test]
1916 fn test_value_clone() {
1917 let original = Value::Bytes(vec![1, 2, 3, 4]);
1918 let cloned = original.clone();
1919 assert_eq!(original, cloned);
1920
1921 // Verify they are independent copies
1922 match (original, cloned) {
1923 (Value::Bytes(orig_bytes), Value::Bytes(cloned_bytes)) => {
1924 assert_eq!(orig_bytes, cloned_bytes);
1925 // They should have the same content but be different Vec instances
1926 }
1927 _ => panic!("Expected Bytes variants"),
1928 }
1929 }
1930
1931 #[test]
1932 fn test_value_float() {
1933 let value = Value::Float(3.125);
1934 assert_eq!(value, Value::Float(3.125));
1935
1936 let negative = Value::Float(-1.5);
1937 assert_eq!(negative, Value::Float(-1.5));
1938
1939 let zero = Value::Float(0.0);
1940 assert_eq!(zero, Value::Float(0.0));
1941 }
1942
1943 #[test]
1944 fn test_value_serialization() {
1945 let values = vec![
1946 Value::Uint(42),
1947 Value::Int(-100),
1948 Value::Float(3.125),
1949 Value::Bytes(vec![0x7f, 0x45, 0x4c, 0x46]),
1950 Value::String("ELF executable".to_string()),
1951 ];
1952
1953 for value in values {
1954 // Test JSON serialization
1955 let json = serde_json::to_string(&value).expect("Failed to serialize Value");
1956 let deserialized: Value =
1957 serde_json::from_str(&json).expect("Failed to deserialize Value");
1958 assert_eq!(value, deserialized);
1959 }
1960 }
1961
1962 #[test]
1963 fn test_value_serialization_edge_cases() {
1964 // Test empty collections
1965 let empty_bytes = Value::Bytes(vec![]);
1966 let json = serde_json::to_string(&empty_bytes).expect("Failed to serialize empty bytes");
1967 let deserialized: Value =
1968 serde_json::from_str(&json).expect("Failed to deserialize empty bytes");
1969 assert_eq!(empty_bytes, deserialized);
1970
1971 let empty_string = Value::String(String::new());
1972 let json = serde_json::to_string(&empty_string).expect("Failed to serialize empty string");
1973 let deserialized: Value =
1974 serde_json::from_str(&json).expect("Failed to deserialize empty string");
1975 assert_eq!(empty_string, deserialized);
1976
1977 // Test extreme values
1978 let max_uint = Value::Uint(u64::MAX);
1979 let json = serde_json::to_string(&max_uint).expect("Failed to serialize max uint");
1980 let deserialized: Value =
1981 serde_json::from_str(&json).expect("Failed to deserialize max uint");
1982 assert_eq!(max_uint, deserialized);
1983
1984 let min_int = Value::Int(i64::MIN);
1985 let json = serde_json::to_string(&min_int).expect("Failed to serialize min int");
1986 let deserialized: Value =
1987 serde_json::from_str(&json).expect("Failed to deserialize min int");
1988 assert_eq!(min_int, deserialized);
1989 }
1990
1991 // TypeKind tests
1992 #[test]
1993 fn test_type_kind_byte() {
1994 let byte_type = TypeKind::Byte { signed: true };
1995 assert_eq!(byte_type, TypeKind::Byte { signed: true });
1996 }
1997
1998 #[test]
1999 fn test_type_kind_short() {
2000 let short_little_endian = TypeKind::Short {
2001 endian: Endianness::Little,
2002 signed: false,
2003 };
2004 let short_big_endian = TypeKind::Short {
2005 endian: Endianness::Big,
2006 signed: true,
2007 };
2008
2009 assert_ne!(short_little_endian, short_big_endian);
2010 assert_eq!(short_little_endian, short_little_endian.clone());
2011 }
2012
2013 #[test]
2014 fn test_type_kind_long() {
2015 let long_native = TypeKind::Long {
2016 endian: Endianness::Native,
2017 signed: true,
2018 };
2019
2020 match long_native {
2021 TypeKind::Long { endian, signed } => {
2022 assert_eq!(endian, Endianness::Native);
2023 assert!(signed);
2024 }
2025 _ => panic!("Expected Long variant"),
2026 }
2027 }
2028
2029 #[test]
2030 fn test_type_kind_string() {
2031 let unlimited_string = TypeKind::String {
2032 max_length: None,
2033 flags: StringFlags::default(),
2034 };
2035 let limited_string = TypeKind::String {
2036 max_length: Some(256),
2037 flags: StringFlags::default(),
2038 };
2039
2040 assert_ne!(unlimited_string, limited_string);
2041 assert_eq!(unlimited_string, unlimited_string.clone());
2042 }
2043
2044 #[test]
2045 fn test_type_kind_serialization() {
2046 let types = vec![
2047 TypeKind::Byte { signed: true },
2048 TypeKind::Short {
2049 endian: Endianness::Little,
2050 signed: false,
2051 },
2052 TypeKind::Long {
2053 endian: Endianness::Big,
2054 signed: true,
2055 },
2056 TypeKind::Quad {
2057 endian: Endianness::Little,
2058 signed: false,
2059 },
2060 TypeKind::Quad {
2061 endian: Endianness::Big,
2062 signed: true,
2063 },
2064 TypeKind::Float {
2065 endian: Endianness::Native,
2066 },
2067 TypeKind::Float {
2068 endian: Endianness::Big,
2069 },
2070 TypeKind::Double {
2071 endian: Endianness::Little,
2072 },
2073 TypeKind::Double {
2074 endian: Endianness::Native,
2075 },
2076 TypeKind::Date {
2077 endian: Endianness::Big,
2078 utc: true,
2079 },
2080 TypeKind::Date {
2081 endian: Endianness::Little,
2082 utc: false,
2083 },
2084 TypeKind::QDate {
2085 endian: Endianness::Native,
2086 utc: true,
2087 },
2088 TypeKind::QDate {
2089 endian: Endianness::Big,
2090 utc: false,
2091 },
2092 TypeKind::String {
2093 max_length: None,
2094 flags: StringFlags::default(),
2095 },
2096 TypeKind::String {
2097 max_length: Some(128),
2098 flags: StringFlags::default(),
2099 },
2100 TypeKind::PString {
2101 max_length: None,
2102 length_width: PStringLengthWidth::OneByte,
2103 length_includes_itself: false,
2104 },
2105 TypeKind::PString {
2106 max_length: Some(64),
2107 length_width: PStringLengthWidth::OneByte,
2108 length_includes_itself: false,
2109 },
2110 TypeKind::PString {
2111 max_length: None,
2112 length_width: PStringLengthWidth::TwoByteBE,
2113 length_includes_itself: true,
2114 },
2115 TypeKind::PString {
2116 max_length: Some(128),
2117 length_width: PStringLengthWidth::FourByteLE,
2118 length_includes_itself: false,
2119 },
2120 ];
2121
2122 for typ in types {
2123 let json = serde_json::to_string(&typ).expect("Failed to serialize TypeKind");
2124 let deserialized: TypeKind =
2125 serde_json::from_str(&json).expect("Failed to deserialize TypeKind");
2126 assert_eq!(typ, deserialized);
2127 }
2128 }
2129
2130 // Operator tests
2131 #[test]
2132 fn test_operator_variants() {
2133 let operators = [
2134 Operator::Equal,
2135 Operator::NotEqual,
2136 Operator::BitwiseAnd,
2137 Operator::BitwiseXor,
2138 Operator::BitwiseNot,
2139 Operator::AnyValue,
2140 ];
2141
2142 for (i, op) in operators.iter().enumerate() {
2143 for (j, other) in operators.iter().enumerate() {
2144 if i == j {
2145 assert_eq!(op, other);
2146 } else {
2147 assert_ne!(op, other);
2148 }
2149 }
2150 }
2151 }
2152
2153 #[test]
2154 fn test_operator_serialization() {
2155 let operators = vec![
2156 Operator::Equal,
2157 Operator::NotEqual,
2158 Operator::BitwiseAnd,
2159 Operator::BitwiseXor,
2160 Operator::BitwiseNot,
2161 Operator::AnyValue,
2162 ];
2163
2164 for op in operators {
2165 let json = serde_json::to_string(&op).expect("Failed to serialize Operator");
2166 let deserialized: Operator =
2167 serde_json::from_str(&json).expect("Failed to deserialize Operator");
2168 assert_eq!(op, deserialized);
2169 }
2170 }
2171
2172 // MagicRule tests
2173 #[test]
2174 fn test_magic_rule_creation() {
2175 let rule = MagicRule {
2176 offset: OffsetSpec::Absolute(0),
2177 typ: TypeKind::Byte { signed: true },
2178 op: Operator::Equal,
2179 value: Value::Uint(0x7f),
2180 message: "ELF magic".to_string(),
2181 children: vec![],
2182 level: 0,
2183 strength_modifier: None,
2184 value_transform: None,
2185 };
2186
2187 assert_eq!(rule.message, "ELF magic");
2188 assert_eq!(rule.level, 0);
2189 assert!(rule.children.is_empty());
2190 }
2191
2192 #[test]
2193 fn test_magic_rule_with_children() {
2194 let child_rule = MagicRule {
2195 offset: OffsetSpec::Absolute(4),
2196 typ: TypeKind::Byte { signed: true },
2197 op: Operator::Equal,
2198 value: Value::Uint(1),
2199 message: "32-bit".to_string(),
2200 children: vec![],
2201 level: 1,
2202 strength_modifier: None,
2203 value_transform: None,
2204 };
2205
2206 let parent_rule = MagicRule {
2207 offset: OffsetSpec::Absolute(0),
2208 typ: TypeKind::Long {
2209 endian: Endianness::Little,
2210 signed: false,
2211 },
2212 op: Operator::Equal,
2213 value: Value::Bytes(vec![0x7f, 0x45, 0x4c, 0x46]),
2214 message: "ELF executable".to_string(),
2215 children: vec![child_rule],
2216 level: 0,
2217 strength_modifier: None,
2218 value_transform: None,
2219 };
2220
2221 assert_eq!(parent_rule.children.len(), 1);
2222 assert_eq!(parent_rule.children[0].level, 1);
2223 assert_eq!(parent_rule.children[0].message, "32-bit");
2224 }
2225
2226 #[test]
2227 fn test_magic_rule_serialization() {
2228 let rule = MagicRule {
2229 offset: OffsetSpec::Absolute(16),
2230 typ: TypeKind::Short {
2231 endian: Endianness::Little,
2232 signed: false,
2233 },
2234 op: Operator::NotEqual,
2235 value: Value::Uint(0),
2236 message: "Non-zero short value".to_string(),
2237 children: vec![],
2238 level: 2,
2239 strength_modifier: None,
2240 value_transform: None,
2241 };
2242
2243 let json = serde_json::to_string(&rule).expect("Failed to serialize MagicRule");
2244 let deserialized: MagicRule =
2245 serde_json::from_str(&json).expect("Failed to deserialize MagicRule");
2246
2247 assert_eq!(rule.message, deserialized.message);
2248 assert_eq!(rule.level, deserialized.level);
2249 assert_eq!(rule.children.len(), deserialized.children.len());
2250 }
2251
2252 // StrengthModifier tests
2253 #[test]
2254 fn test_strength_modifier_variants() {
2255 let add = StrengthModifier::Add(10);
2256 let sub = StrengthModifier::Subtract(5);
2257 let mul = StrengthModifier::Multiply(2);
2258 let div = StrengthModifier::Divide(2);
2259 let set = StrengthModifier::Set(50);
2260
2261 // Test that each variant has the correct inner value
2262 assert_eq!(add, StrengthModifier::Add(10));
2263 assert_eq!(sub, StrengthModifier::Subtract(5));
2264 assert_eq!(mul, StrengthModifier::Multiply(2));
2265 assert_eq!(div, StrengthModifier::Divide(2));
2266 assert_eq!(set, StrengthModifier::Set(50));
2267
2268 // Test that different variants are not equal
2269 assert_ne!(add, sub);
2270 assert_ne!(mul, div);
2271 assert_ne!(set, add);
2272 }
2273
2274 #[test]
2275 fn test_strength_modifier_negative_values() {
2276 let add_negative = StrengthModifier::Add(-10);
2277 let sub_negative = StrengthModifier::Subtract(-5);
2278 let set_negative = StrengthModifier::Set(-50);
2279
2280 assert_eq!(add_negative, StrengthModifier::Add(-10));
2281 assert_eq!(sub_negative, StrengthModifier::Subtract(-5));
2282 assert_eq!(set_negative, StrengthModifier::Set(-50));
2283 }
2284
2285 #[test]
2286 fn test_strength_modifier_serialization() {
2287 let modifiers = vec![
2288 StrengthModifier::Add(10),
2289 StrengthModifier::Subtract(5),
2290 StrengthModifier::Multiply(2),
2291 StrengthModifier::Divide(3),
2292 StrengthModifier::Set(100),
2293 ];
2294
2295 for modifier in modifiers {
2296 let json =
2297 serde_json::to_string(&modifier).expect("Failed to serialize StrengthModifier");
2298 let deserialized: StrengthModifier =
2299 serde_json::from_str(&json).expect("Failed to deserialize StrengthModifier");
2300 assert_eq!(modifier, deserialized);
2301 }
2302 }
2303
2304 #[test]
2305 fn test_strength_modifier_debug() {
2306 let modifier = StrengthModifier::Add(25);
2307 let debug_str = format!("{modifier:?}");
2308 assert!(debug_str.contains("Add"));
2309 assert!(debug_str.contains("25"));
2310 }
2311
2312 #[test]
2313 fn test_strength_modifier_clone() {
2314 let original = StrengthModifier::Multiply(4);
2315 let cloned = original;
2316 assert_eq!(original, cloned);
2317 }
2318
2319 #[test]
2320 fn test_magic_rule_with_strength_modifier() {
2321 let rule = MagicRule {
2322 offset: OffsetSpec::Absolute(0),
2323 typ: TypeKind::Byte { signed: true },
2324 op: Operator::Equal,
2325 value: Value::Uint(0x7f),
2326 message: "ELF magic".to_string(),
2327 children: vec![],
2328 level: 0,
2329 strength_modifier: Some(StrengthModifier::Add(20)),
2330 value_transform: None,
2331 };
2332
2333 assert_eq!(rule.strength_modifier, Some(StrengthModifier::Add(20)));
2334
2335 // Test serialization with strength_modifier
2336 let json = serde_json::to_string(&rule).expect("Failed to serialize MagicRule");
2337 let deserialized: MagicRule =
2338 serde_json::from_str(&json).expect("Failed to deserialize MagicRule");
2339 assert_eq!(rule.strength_modifier, deserialized.strength_modifier);
2340 }
2341
2342 #[test]
2343 fn test_magic_rule_without_strength_modifier() {
2344 let rule = MagicRule {
2345 offset: OffsetSpec::Absolute(0),
2346 typ: TypeKind::Byte { signed: true },
2347 op: Operator::Equal,
2348 value: Value::Uint(0x7f),
2349 message: "ELF magic".to_string(),
2350 children: vec![],
2351 level: 0,
2352 strength_modifier: None,
2353 value_transform: None,
2354 };
2355
2356 assert_eq!(rule.strength_modifier, None);
2357 }
2358
2359 // MetaType tests
2360 #[test]
2361 fn test_meta_type_variants_debug_clone_eq() {
2362 let cases = [
2363 MetaType::Default,
2364 MetaType::Clear,
2365 MetaType::Indirect,
2366 MetaType::Offset,
2367 MetaType::Name("part2".to_string()),
2368 MetaType::Use("part2".to_string()),
2369 ];
2370
2371 for (i, variant) in cases.iter().enumerate() {
2372 // Debug formatting is non-empty
2373 let debug_str = format!("{variant:?}");
2374 assert!(
2375 !debug_str.is_empty(),
2376 "Debug format must be non-empty for variant at index {i}"
2377 );
2378
2379 // Clone round-trip preserves equality
2380 let cloned = variant.clone();
2381 assert_eq!(
2382 variant, &cloned,
2383 "Clone must preserve equality for variant at index {i}"
2384 );
2385
2386 // Distinct variants are not equal
2387 for (j, other) in cases.iter().enumerate() {
2388 if i == j {
2389 assert_eq!(variant, other);
2390 } else {
2391 assert_ne!(
2392 variant, other,
2393 "Variants at indices {i} and {j} must differ"
2394 );
2395 }
2396 }
2397 }
2398 }
2399
2400 #[test]
2401 fn test_meta_type_serde_roundtrip() {
2402 let cases = [
2403 MetaType::Default,
2404 MetaType::Clear,
2405 MetaType::Indirect,
2406 MetaType::Offset,
2407 MetaType::Name("foo".to_string()),
2408 MetaType::Use("bar".to_string()),
2409 ];
2410
2411 for variant in cases {
2412 let json = serde_json::to_string(&variant).expect("serialize MetaType");
2413 let deserialized: MetaType = serde_json::from_str(&json).expect("deserialize MetaType");
2414 assert_eq!(variant, deserialized);
2415 }
2416 }
2417
2418 #[test]
2419 fn test_type_kind_meta_bit_width_is_none() {
2420 let cases = [
2421 MetaType::Default,
2422 MetaType::Clear,
2423 MetaType::Indirect,
2424 MetaType::Offset,
2425 MetaType::Name("x".to_string()),
2426 MetaType::Use("x".to_string()),
2427 ];
2428 for meta in cases {
2429 let kind = TypeKind::Meta(meta);
2430 assert_eq!(
2431 kind.bit_width(),
2432 None,
2433 "TypeKind::Meta must have no bit width: {kind:?}"
2434 );
2435 }
2436 }
2437}