1#![forbid(unsafe_code)]
2#![deny(unused_imports)]
3use dyf::{DynDisplay, FormatString, dformat};
137use flagset::{FlagSet, flags};
138use flate2::{Compression, read::GzDecoder, write::GzEncoder};
139use lazy_cache::LazyCache;
140use memchr::memchr;
141use pest::{Span, error::ErrorVariant};
142use regex::bytes::{self};
143use serde::{Deserialize, Serialize};
144use std::{
145 borrow::Cow,
146 cmp::max,
147 collections::{HashMap, HashSet},
148 fmt::{self, Debug, Display},
149 io::{self, Read, Seek, SeekFrom, Write},
150 ops::{Add, BitAnd, BitOr, BitXor, Div, Mul, Rem, Sub},
151 path::Path,
152};
153use tar::Archive;
154use thiserror::Error;
155use tracing::{Level, debug, enabled, trace};
156
157use crate::{
158 numeric::{Float, FloatDataType, Scalar, ScalarDataType},
159 parser::{FileMagicParser, Rule},
160 utils::{decode_id3, find_json_boundaries},
161};
162
163mod numeric;
164mod parser;
165mod utils;
166
167const HARDCODED_MAGIC_STRENGTH: u64 = 2048;
168const HARDCODED_SOURCE: &str = "hardcoded";
169const MAX_RECURSION: usize = 50;
171pub const FILE_BYTES_MAX: usize = 7 * 1024 * 1024;
173const FILE_REGEX_MAX: usize = 8192;
175
176pub const DEFAULT_BIN_MIMETYPE: &str = "application/octet-stream";
177pub const DEFAULT_TEXT_MIMETYPE: &str = "text/plain";
178
179pub(crate) const TIMESTAMP_FORMAT: &str = "%Y-%m-%d %H:%M:%S";
180
181macro_rules! debug_panic {
182 ($($arg:tt)*) => {
183 if cfg!(debug_assertions) {
184 panic!($($arg)*);
185 }
186 };
187}
188
189macro_rules! read {
190 ($r: expr, $ty: ty) => {{
191 let mut a = [0u8; std::mem::size_of::<$ty>()];
192 $r.read_exact(&mut a)?;
193 a
194 }};
195}
196
197macro_rules! read_le {
198 ($r:expr, $ty: ty ) => {{ <$ty>::from_le_bytes(read!($r, $ty)) }};
199}
200
201macro_rules! read_be {
202 ($r:expr, $ty: ty ) => {{ <$ty>::from_be_bytes(read!($r, $ty)) }};
203}
204
205macro_rules! read_me {
206 ($r: expr) => {{ ((read_le!($r, u16) as i32) << 16) | (read_le!($r, u16) as i32) }};
207}
208
209#[inline(always)]
210fn read_octal_u64<R: Read + Seek>(haystack: &mut LazyCache<R>) -> Option<u64> {
211 let s = haystack
212 .read_while_or_limit(|b| matches!(b, b'0'..=b'7'), 22)
213 .map(|buf| str::from_utf8(buf))
214 .ok()?
215 .ok()?;
216
217 if !s.starts_with("0") {
218 return None;
219 }
220
221 u64::from_str_radix(s, 8).ok()
222}
223
224#[derive(Debug, Error)]
226pub enum Error {
227 #[error("{0}")]
229 Msg(String),
230
231 #[error("source={0} line={1} error={2}")]
233 Localized(String, usize, Box<Error>),
234
235 #[error("missing rule: {0}")]
237 MissingRule(String),
238
239 #[error("maximum recursion reached: {0}")]
241 MaximumRecursion(usize),
242
243 #[error("io: {0}")]
245 Io(#[from] io::Error),
246
247 #[error("parser error: {0}")]
249 Parse(#[from] Box<pest::error::Error<Rule>>),
250
251 #[error("formatting: {0}")]
253 Format(#[from] dyf::Error),
254
255 #[error("regex: {0}")]
257 Regex(#[from] regex::Error),
258
259 #[error("{0}")]
261 Serialize(#[from] bincode::error::EncodeError),
262
263 #[error("{0}")]
265 Deserialize(#[from] bincode::error::DecodeError),
266}
267
268impl Error {
269 #[inline]
270 fn parser<S: ToString>(msg: S, span: Span<'_>) -> Self {
271 Self::Parse(Box::new(pest::error::Error::new_from_span(
272 ErrorVariant::CustomError {
273 message: msg.to_string(),
274 },
275 span,
276 )))
277 }
278
279 fn msg<M: AsRef<str>>(msg: M) -> Self {
280 Self::Msg(msg.as_ref().into())
281 }
282
283 fn localized<S: AsRef<str>>(source: S, line: usize, err: Error) -> Self {
284 Self::Localized(source.as_ref().into(), line, err.into())
285 }
286
287 pub fn unwrap_localized(&self) -> &Self {
289 match self {
290 Self::Localized(_, _, e) => e,
291 _ => self,
292 }
293 }
294}
295
296#[derive(Debug, Clone, Serialize, Deserialize)]
297enum Message {
298 String(String),
299 Format {
300 printf_spec: String,
301 fs: FormatString,
302 },
303}
304
305impl Display for Message {
306 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
307 match self {
308 Self::String(s) => write!(f, "{s}"),
309 Self::Format { printf_spec: _, fs } => write!(f, "{}", fs.to_string_lossy()),
310 }
311 }
312}
313
314impl Message {
315 fn to_string_lossy(&self) -> Cow<'_, str> {
316 match self {
317 Message::String(s) => Cow::Borrowed(s),
318 Message::Format { printf_spec: _, fs } => fs.to_string_lossy(),
319 }
320 }
321
322 #[inline(always)]
323 fn format_with(&self, mr: Option<&MatchRes>) -> Result<Cow<'_, str>, Error> {
324 match self {
325 Self::String(s) => Ok(Cow::Borrowed(s.as_str())),
326 Self::Format {
327 printf_spec: c_spec,
328 fs,
329 } => {
330 if let Some(mr) = mr {
331 match mr {
332 MatchRes::Float(_, _) | MatchRes::Bytes(_, _, _, _) => {
333 Ok(Cow::Owned(dformat!(fs, mr)?))
334 }
335 MatchRes::Scalar(_, scalar) => {
336 if c_spec.as_str() == "c" {
338 match scalar {
339 Scalar::byte(b) => {
340 let b = (*b as u8) as char;
341 Ok(Cow::Owned(dformat!(fs, b)?))
342 }
343 Scalar::ubyte(b) => {
344 let b = *b as char;
345 Ok(Cow::Owned(dformat!(fs, b)?))
346 }
347 _ => Ok(Cow::Owned(dformat!(fs, mr)?)),
348 }
349 } else {
350 Ok(Cow::Owned(dformat!(fs, mr)?))
351 }
352 }
353 }
354 } else {
355 Ok(fs.to_string_lossy())
356 }
357 }
358 }
359 }
360}
361
362impl ScalarDataType {
363 #[inline(always)]
364 fn read<R: Read + Seek>(&self, from: &mut R, switch_endianness: bool) -> Result<Scalar, Error> {
365 macro_rules! _read_le {
366 ($ty: ty) => {{
367 if switch_endianness {
368 <$ty>::from_be_bytes(read!(from, $ty))
369 } else {
370 <$ty>::from_le_bytes(read!(from, $ty))
371 }
372 }};
373 }
374
375 macro_rules! _read_be {
376 ($ty: ty) => {{
377 if switch_endianness {
378 <$ty>::from_le_bytes(read!(from, $ty))
379 } else {
380 <$ty>::from_be_bytes(read!(from, $ty))
381 }
382 }};
383 }
384
385 macro_rules! _read_ne {
386 ($ty: ty) => {{
387 if cfg!(target_endian = "big") {
388 _read_be!($ty)
389 } else {
390 _read_le!($ty)
391 }
392 }};
393 }
394
395 macro_rules! _read_me {
396 () => {
397 ((_read_le!(u16) as i32) << 16) | (_read_le!(u16) as i32)
398 };
399 }
400
401 Ok(match self {
402 Self::byte => Scalar::byte(read!(from, u8)[0] as i8),
404 Self::short => Scalar::short(_read_ne!(i16)),
405 Self::long => Scalar::long(_read_ne!(i32)),
406 Self::date => Scalar::date(_read_ne!(i32)),
407 Self::ldate => Scalar::ldate(_read_ne!(i32)),
408 Self::qwdate => Scalar::qwdate(_read_ne!(i64)),
409 Self::leshort => Scalar::leshort(_read_le!(i16)),
410 Self::lelong => Scalar::lelong(_read_le!(i32)),
411 Self::lequad => Scalar::lequad(_read_le!(i64)),
412 Self::bequad => Scalar::bequad(_read_be!(i64)),
413 Self::belong => Scalar::belong(_read_be!(i32)),
414 Self::bedate => Scalar::bedate(_read_be!(i32)),
415 Self::beldate => Scalar::beldate(_read_be!(i32)),
416 Self::beqdate => Scalar::beqdate(_read_be!(i64)),
417 Self::ubyte => Scalar::ubyte(read!(from, u8)[0]),
419 Self::ushort => Scalar::ushort(_read_ne!(u16)),
420 Self::uleshort => Scalar::uleshort(_read_le!(u16)),
421 Self::ulelong => Scalar::ulelong(_read_le!(u32)),
422 Self::uledate => Scalar::uledate(_read_le!(u32)),
423 Self::ulequad => Scalar::ulequad(_read_le!(u64)),
424 Self::offset => Scalar::offset(from.stream_position()?),
425 Self::ubequad => Scalar::ubequad(_read_be!(u64)),
426 Self::medate => Scalar::medate(_read_me!()),
427 Self::meldate => Scalar::meldate(_read_me!()),
428 Self::melong => Scalar::melong(_read_me!()),
429 Self::beshort => Scalar::beshort(_read_be!(i16)),
430 Self::quad => Scalar::quad(_read_ne!(i64)),
431 Self::uquad => Scalar::uquad(_read_ne!(u64)),
432 Self::ledate => Scalar::ledate(_read_le!(i32)),
433 Self::leldate => Scalar::leldate(_read_le!(i32)),
434 Self::leqdate => Scalar::leqdate(_read_le!(i64)),
435 Self::leqldate => Scalar::leqldate(_read_le!(i64)),
436 Self::leqwdate => Scalar::leqwdate(_read_le!(i64)),
437 Self::ubelong => Scalar::ubelong(_read_be!(u32)),
438 Self::ulong => Scalar::ulong(_read_ne!(u32)),
439 Self::ubeshort => Scalar::ubeshort(_read_be!(u16)),
440 Self::ubeqdate => Scalar::ubeqdate(_read_be!(u64)),
441 Self::lemsdosdate => Scalar::lemsdosdate(_read_le!(u16)),
442 Self::lemsdostime => Scalar::lemsdostime(_read_le!(u16)),
443 Self::guid => Scalar::guid(u128::from_be_bytes(read!(from, u128))),
444 })
445 }
446}
447
448impl FloatDataType {
449 #[inline(always)]
450 fn read<R: Read + Seek>(&self, from: &mut R, switch_endianness: bool) -> Result<Float, Error> {
451 macro_rules! _read_le {
452 ($ty: ty) => {{
453 if switch_endianness {
454 <$ty>::from_be_bytes(read!(from, $ty))
455 } else {
456 <$ty>::from_le_bytes(read!(from, $ty))
457 }
458 }};
459 }
460
461 macro_rules! _read_be {
462 ($ty: ty) => {{
463 if switch_endianness {
464 <$ty>::from_le_bytes(read!(from, $ty))
465 } else {
466 <$ty>::from_be_bytes(read!(from, $ty))
467 }
468 }};
469 }
470
471 macro_rules! _read_ne {
472 ($ty: ty) => {{
473 if cfg!(target_endian = "big") {
474 _read_be!($ty)
475 } else {
476 _read_le!($ty)
477 }
478 }};
479 }
480
481 macro_rules! _read_me {
482 () => {
483 ((_read_le!(u16) as i32) << 16) | (_read_le!(u16) as i32)
484 };
485 }
486
487 Ok(match self {
488 Self::lefloat => Float::lefloat(_read_le!(f32)),
489 Self::befloat => Float::befloat(_read_le!(f32)),
490 Self::ledouble => Float::ledouble(_read_le!(f64)),
491 Self::bedouble => Float::bedouble(_read_be!(f64)),
492 })
493 }
494}
495
496#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
497enum Op {
498 Mul,
499 Add,
500 Sub,
501 Div,
502 Mod,
503 And,
504 Xor,
505 Or,
506}
507
508impl Display for Op {
509 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
510 match self {
511 Op::Mul => write!(f, "*"),
512 Op::Add => write!(f, "+"),
513 Op::Sub => write!(f, "-"),
514 Op::Div => write!(f, "/"),
515 Op::Mod => write!(f, "%"),
516 Op::And => write!(f, "&"),
517 Op::Or => write!(f, "|"),
518 Op::Xor => write!(f, "^"),
519 }
520 }
521}
522
523#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
524enum CmpOp {
525 Eq,
526 Lt,
527 Gt,
528 BitAnd,
529 Neq, Xor,
531 Not, }
533
534impl CmpOp {
535 #[inline(always)]
536 fn is_neq(&self) -> bool {
537 matches!(self, Self::Neq)
538 }
539}
540
541#[derive(Debug, Clone, Serialize, Deserialize)]
542struct ScalarTransform {
543 op: Op,
544 num: Scalar,
545}
546
547impl ScalarTransform {
548 fn apply(&self, s: Scalar) -> Option<Scalar> {
549 match self.op {
550 Op::Add => s.checked_add(self.num),
551 Op::Sub => s.checked_sub(self.num),
552 Op::Mul => s.checked_mul(self.num),
553 Op::Div => s.checked_div(self.num),
554 Op::Mod => s.checked_rem(self.num),
555 Op::And => Some(s.bitand(self.num)),
556 Op::Xor => Some(s.bitxor(self.num)),
557 Op::Or => Some(s.bitor(self.num)),
558 }
559 }
560}
561
562#[derive(Debug, Clone, Serialize, Deserialize)]
563struct FloatTransform {
564 op: Op,
565 num: Float,
566}
567
568impl FloatTransform {
569 fn apply(&self, s: Float) -> Float {
570 match self.op {
571 Op::Add => s.add(self.num),
572 Op::Sub => s.sub(self.num),
573 Op::Mul => s.mul(self.num),
574 Op::Div => s.div(self.num),
576 Op::Mod => s.rem(self.num),
578 Op::And | Op::Xor | Op::Or => {
580 debug_panic!("unsupported operation");
581 s
582 }
583 }
584 }
585}
586
587#[derive(Debug, Clone, Serialize, Deserialize)]
588enum TestValue<T> {
589 Value(T),
590 Any,
591}
592
593impl<T> TestValue<T> {
594 #[inline(always)]
595 fn as_ref(&self) -> TestValue<&T> {
596 match self {
597 Self::Value(v) => TestValue::Value(v),
598 Self::Any => TestValue::Any,
599 }
600 }
601}
602
603flags! {
604 enum ReMod: u8{
605 CaseInsensitive,
606 StartOffsetUpdate,
607 LineLimit,
608 ForceBin,
609 ForceText,
610 TrimMatch,
611 }
612}
613
614fn serialize_regex<S>(re: &bytes::Regex, serializer: S) -> Result<S::Ok, S::Error>
615where
616 S: serde::Serializer,
617{
618 re.as_str().serialize(serializer)
619}
620
621fn deserialize_regex<'de, D>(deserializer: D) -> Result<bytes::Regex, D::Error>
622where
623 D: serde::Deserializer<'de>,
624{
625 let wrapper = String::deserialize(deserializer)?;
626 bytes::Regex::new(&wrapper).map_err(serde::de::Error::custom)
627}
628
629#[derive(Debug, Clone, Serialize, Deserialize)]
630struct RegexTest {
631 #[serde(
632 serialize_with = "serialize_regex",
633 deserialize_with = "deserialize_regex"
634 )]
635 re: bytes::Regex,
636 length: Option<usize>,
637 mods: FlagSet<ReMod>,
638 str_mods: FlagSet<StringMod>,
639 non_magic_len: usize,
640 binary: bool,
641 cmp_op: CmpOp,
642}
643
644impl RegexTest {
645 #[inline(always)]
646 fn is_binary(&self) -> bool {
647 self.binary
648 || self.mods.contains(ReMod::ForceBin)
649 || self.str_mods.contains(StringMod::ForceBin)
650 }
651
652 fn match_buf<'buf>(
653 &self,
654 off_buf: u64, stream_kind: StreamKind,
656 buf: &'buf [u8],
657 ) -> Option<MatchRes<'buf>> {
658 let mr = match stream_kind {
659 StreamKind::Text(_) => {
660 let mut off_txt = off_buf;
661
662 let mut line_limit = self.length.unwrap_or(usize::MAX);
663
664 for line in buf.split(|c| c == &b'\n') {
665 if line_limit == 0 {
669 break;
670 }
671
672 if let Some(re_match) = self.re.find(line) {
673 let start_offset = off_txt + re_match.start() as u64;
675
676 let stop_offset = if re_match.end() == line.len() {
678 Some(start_offset + re_match.as_bytes().len() as u64 + 1)
679 } else {
680 None
681 };
682
683 return Some(MatchRes::Bytes(
684 start_offset,
685 stop_offset,
686 re_match.as_bytes(),
687 Encoding::Utf8,
688 ));
689 }
690
691 off_txt += line.len() as u64;
692 off_txt += 1;
694 line_limit = line_limit.saturating_sub(1)
695 }
696 None
697 }
698
699 StreamKind::Binary => {
700 self.re.find(buf).map(|re_match| {
701 MatchRes::Bytes(
702 off_buf + re_match.start() as u64,
704 None,
705 re_match.as_bytes(),
706 Encoding::Utf8,
707 )
708 })
709 }
710 };
711
712 if self.cmp_op.is_neq() && mr.is_none() {
714 return Some(MatchRes::Bytes(off_buf, None, buf, Encoding::Utf8));
715 }
716
717 mr
718 }
719}
720
721impl From<RegexTest> for Test {
722 fn from(value: RegexTest) -> Self {
723 Self::Regex(value)
724 }
725}
726
727flags! {
728 enum StringMod: u8{
729 ForceBin,
730 UpperInsensitive,
731 LowerInsensitive,
732 FullWordMatch,
733 Trim,
734 ForceText,
735 CompactWhitespace,
736 OptBlank,
737 }
738}
739
740#[derive(Debug, Clone, Serialize, Deserialize)]
741struct StringTest {
742 test_val: TestValue<Vec<u8>>,
743 cmp_op: CmpOp,
744 length: Option<usize>,
745 mods: FlagSet<StringMod>,
746 binary: bool,
747}
748
749impl From<StringTest> for Test {
750 fn from(value: StringTest) -> Self {
751 Self::String(value)
752 }
753}
754
755#[inline(always)]
756fn string_match(str: &[u8], mods: FlagSet<StringMod>, buf: &[u8]) -> (bool, usize) {
757 let mut consumed = 0;
758 if mods.is_disjoint(
760 StringMod::UpperInsensitive
761 | StringMod::LowerInsensitive
762 | StringMod::FullWordMatch
763 | StringMod::CompactWhitespace
764 | StringMod::OptBlank,
765 ) {
766 if buf.starts_with(str) {
768 (true, str.len())
769 } else {
770 (false, consumed)
771 }
772 } else {
773 let mut i_src = 0;
774 let mut iter = buf.iter().peekable();
775
776 macro_rules! consume_target {
777 () => {{
778 iter.next();
779 consumed += 1;
780 }};
781 }
782
783 macro_rules! continue_next_iteration {
784 () => {{
785 consume_target!();
786 i_src += 1;
787 continue;
788 }};
789 }
790
791 while let Some(&&b) = iter.peek() {
792 let Some(&ref_byte) = str.get(i_src) else {
793 break;
794 };
795
796 if mods.contains(StringMod::OptBlank) && (b == b' ' || ref_byte == b' ') {
797 if b == b' ' {
798 consume_target!();
800 }
801
802 if ref_byte == b' ' {
803 i_src += 1;
805 }
806
807 continue;
808 }
809
810 if mods.contains(StringMod::UpperInsensitive) {
811 if ref_byte.is_ascii_uppercase() && ref_byte == b.to_ascii_uppercase()
813 || ref_byte == b
814 {
815 continue_next_iteration!()
816 }
817 }
818
819 if mods.contains(StringMod::LowerInsensitive)
820 && (ref_byte.is_ascii_lowercase() && ref_byte == b.to_ascii_lowercase()
821 || ref_byte == b)
822 {
823 continue_next_iteration!()
824 }
825
826 if mods.contains(StringMod::CompactWhitespace) && ref_byte == b' ' {
827 let mut src_blk = 0;
828 while let Some(b' ') = str.get(i_src) {
829 src_blk += 1;
830 i_src += 1;
831 }
832
833 let mut tgt_blk = 0;
834 while let Some(b' ') = iter.peek() {
835 tgt_blk += 1;
836 consume_target!();
837 }
838
839 if src_blk > tgt_blk {
840 return (false, consumed);
841 }
842
843 continue;
844 }
845
846 if ref_byte == b {
847 continue_next_iteration!()
848 } else {
849 return (false, consumed);
850 }
851 }
852
853 if mods.contains(StringMod::FullWordMatch) {
854 if let Some(b) = iter.peek() {
855 if !b.is_ascii_whitespace() {
856 return (false, consumed);
857 }
858 }
859 }
860
861 (consumed > 0 && consumed < buf.len(), consumed)
862 }
863}
864
865impl StringTest {
866 fn has_length_mod(&self) -> bool {
867 !self.mods.is_disjoint(
868 StringMod::UpperInsensitive
869 | StringMod::LowerInsensitive
870 | StringMod::FullWordMatch
871 | StringMod::CompactWhitespace
872 | StringMod::OptBlank,
873 )
874 }
875
876 #[inline(always)]
877 fn test_value_len(&self) -> usize {
878 match self.test_val.as_ref() {
879 TestValue::Value(s) => s.len(),
880 TestValue::Any => 0,
881 }
882 }
883
884 #[inline(always)]
885 fn is_binary(&self) -> bool {
886 self.binary || self.mods.contains(StringMod::ForceBin)
887 }
888
889 #[inline(always)]
890 fn is_text(&self) -> bool {
891 self.mods.contains(StringMod::ForceText)
892 }
893}
894
895#[derive(Debug, Clone, Serialize, Deserialize)]
896struct SearchTest {
897 str: Vec<u8>,
898 n_pos: Option<usize>,
899 str_mods: FlagSet<StringMod>,
900 re_mods: FlagSet<ReMod>,
901 binary: bool,
902 cmp_op: CmpOp,
903}
904
905impl From<SearchTest> for Test {
906 fn from(value: SearchTest) -> Self {
907 Self::Search(value)
908 }
909}
910
911impl SearchTest {
912 #[inline(always)]
913 fn is_binary(&self) -> bool {
914 (self.binary
915 || self.str_mods.contains(StringMod::ForceBin)
916 || self.re_mods.contains(ReMod::ForceBin))
917 && !(self.str_mods.contains(StringMod::ForceText)
918 || self.re_mods.contains(ReMod::ForceText))
919 }
920
921 #[inline]
923 fn match_buf<'buf>(&self, off_buf: u64, buf: &'buf [u8]) -> Option<MatchRes<'buf>> {
924 let mut i = 0;
925
926 let needle = self.str.first()?;
927
928 while i < buf.len() {
929 i += memchr(*needle, &buf[i..])?;
932
933 if self.str_mods.contains(StringMod::FullWordMatch) {
935 let prev_is_whitespace = buf
936 .get(i.saturating_sub(1))
937 .map(|c| c.is_ascii_whitespace())
938 .unwrap_or_default();
939
940 if i > 0 && !prev_is_whitespace {
945 i += 1;
946 continue;
947 }
948 }
949
950 if let Some(npos) = self.n_pos {
951 if i > npos {
952 break;
953 }
954 }
955
956 let pos = i;
957 let (ok, consumed) = string_match(&self.str, self.str_mods, &buf[i..]);
958
959 if ok {
960 return Some(MatchRes::Bytes(
961 off_buf.saturating_add(pos as u64),
962 None,
963 &buf[i..i + consumed],
964 Encoding::Utf8,
965 ));
966 } else {
967 i += max(consumed, 1)
968 }
969 }
970
971 if self.cmp_op.is_neq() {
973 return Some(MatchRes::Bytes(off_buf, None, buf, Encoding::Utf8));
974 }
975
976 None
977 }
978}
979
980#[derive(Debug, Clone, Serialize, Deserialize)]
981struct ScalarTest {
982 ty: ScalarDataType,
983 transform: Option<ScalarTransform>,
984 cmp_op: CmpOp,
985 test_val: TestValue<Scalar>,
986}
987
988#[derive(Debug, Clone, Serialize, Deserialize)]
989struct FloatTest {
990 ty: FloatDataType,
991 transform: Option<FloatTransform>,
992 cmp_op: CmpOp,
993 test_val: TestValue<Float>,
994}
995
996#[derive(Debug, PartialEq)]
999enum ReadValue<'buf> {
1000 Float(u64, Float),
1001 Scalar(u64, Scalar),
1002 Bytes(u64, &'buf [u8]),
1003}
1004
1005impl DynDisplay for ReadValue<'_> {
1006 fn dyn_fmt(&self, f: &dyf::FormatSpec) -> Result<String, dyf::Error> {
1007 match self {
1008 Self::Float(_, s) => DynDisplay::dyn_fmt(s, f),
1009 Self::Scalar(_, s) => DynDisplay::dyn_fmt(s, f),
1010 Self::Bytes(_, b) => Ok(format!("{b:?}")),
1011 }
1012 }
1013}
1014
1015impl DynDisplay for &ReadValue<'_> {
1016 fn dyn_fmt(&self, f: &dyf::FormatSpec) -> Result<String, dyf::Error> {
1017 DynDisplay::dyn_fmt(*self, f)
1019 }
1020}
1021
1022impl Display for ReadValue<'_> {
1023 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1024 match self {
1025 Self::Float(_, v) => write!(f, "{v}"),
1026 Self::Scalar(_, s) => write!(f, "{s}"),
1027 Self::Bytes(_, b) => write!(f, "{b:?}"),
1028 }
1029 }
1030}
1031
1032enum Encoding {
1033 Utf16(String16Encoding),
1034 Utf8,
1035}
1036
1037enum MatchRes<'buf> {
1040 Bytes(u64, Option<u64>, &'buf [u8], Encoding),
1045 Scalar(u64, Scalar),
1046 Float(u64, Float),
1047}
1048
1049impl DynDisplay for &MatchRes<'_> {
1050 fn dyn_fmt(&self, f: &dyf::FormatSpec) -> Result<String, dyf::Error> {
1051 (*self).dyn_fmt(f)
1052 }
1053}
1054
1055impl DynDisplay for MatchRes<'_> {
1056 fn dyn_fmt(&self, f: &dyf::FormatSpec) -> Result<String, dyf::Error> {
1057 match self {
1058 Self::Scalar(_, v) => v.dyn_fmt(f),
1059 Self::Float(_, v) => v.dyn_fmt(f),
1060 Self::Bytes(_, _, v, enc) => match enc {
1061 Encoding::Utf8 => String::from_utf8_lossy(v).to_string().dyn_fmt(f),
1062 Encoding::Utf16(enc) => {
1063 let utf16: Vec<u16> = slice_to_utf16_iter(v, *enc).collect();
1064 String::from_utf16_lossy(&utf16).dyn_fmt(f)
1065 }
1066 },
1067 }
1068 }
1069}
1070
1071impl MatchRes<'_> {
1072 #[inline]
1074 fn start_offset(&self) -> u64 {
1075 match self {
1076 MatchRes::Bytes(o, _, _, _) => *o,
1077 MatchRes::Scalar(o, _) => *o,
1078 MatchRes::Float(o, _) => *o,
1079 }
1080 }
1081
1082 #[inline]
1084 fn end_offset(&self) -> u64 {
1085 match self {
1086 MatchRes::Bytes(start, end, buf, _) => match end {
1087 Some(end) => *end,
1088 None => start.saturating_add(buf.len() as u64),
1089 },
1090 MatchRes::Scalar(o, sc) => o.add(sc.size_of() as u64),
1091 MatchRes::Float(o, f) => o.add(f.size_of() as u64),
1092 }
1093 }
1094}
1095
1096fn slice_to_utf16_iter(read: &[u8], encoding: String16Encoding) -> impl Iterator<Item = u16> {
1097 let even = read
1098 .iter()
1099 .enumerate()
1100 .filter(|(i, _)| i % 2 == 0)
1101 .map(|t| t.1);
1102
1103 let odd = read
1104 .iter()
1105 .enumerate()
1106 .filter(|(i, _)| i % 2 != 0)
1107 .map(|t| t.1);
1108
1109 even.zip(odd).map(move |(e, o)| match encoding {
1110 String16Encoding::Le => u16::from_le_bytes([*e, *o]),
1111 String16Encoding::Be => u16::from_be_bytes([*e, *o]),
1112 })
1113}
1114
1115#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
1116enum String16Encoding {
1117 Le,
1118 Be,
1119}
1120
1121#[derive(Debug, Clone, Serialize, Deserialize)]
1122struct String16Test {
1123 orig: String,
1124 test_val: TestValue<Vec<u16>>,
1125 encoding: String16Encoding,
1126}
1127
1128impl String16Test {
1129 #[inline(always)]
1133 fn test_value_len(&self) -> usize {
1134 match self.test_val.as_ref() {
1135 TestValue::Value(str16) => str16.len(),
1136 TestValue::Any => 0,
1137 }
1138 }
1139}
1140
1141flags! {
1142 enum IndirectMod: u8{
1143 Relative,
1144 }
1145}
1146
1147type IndirectMods = FlagSet<IndirectMod>;
1148
1149#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
1150enum PStringLen {
1151 Byte, ShortBe, ShortLe, LongBe, LongLe, }
1157
1158impl PStringLen {
1159 #[inline(always)]
1160 const fn size_of_len(&self) -> usize {
1161 match self {
1162 PStringLen::Byte => 1,
1163 PStringLen::ShortBe => 2,
1164 PStringLen::ShortLe => 2,
1165 PStringLen::LongBe => 4,
1166 PStringLen::LongLe => 4,
1167 }
1168 }
1169}
1170
1171#[derive(Debug, Clone, Serialize, Deserialize)]
1172struct PStringTest {
1173 len: PStringLen,
1174 test_val: TestValue<Vec<u8>>,
1175 include_len: bool,
1176}
1177
1178impl PStringTest {
1179 #[inline]
1180 fn read<'cache, R: Read + Seek>(
1181 &self,
1182 haystack: &'cache mut LazyCache<R>,
1183 ) -> Result<Option<&'cache [u8]>, Error> {
1184 let mut len = match self.len {
1185 PStringLen::Byte => read_le!(haystack, u8) as u32,
1186 PStringLen::ShortBe => read_be!(haystack, u16) as u32,
1187 PStringLen::ShortLe => read_le!(haystack, u16) as u32,
1188 PStringLen::LongBe => read_be!(haystack, u32),
1189 PStringLen::LongLe => read_le!(haystack, u32),
1190 } as usize;
1191
1192 if self.include_len {
1193 len = len.saturating_sub(self.len.size_of_len())
1194 }
1195
1196 if let TestValue::Value(s) = self.test_val.as_ref() {
1197 if len != s.len() {
1198 return Ok(None);
1199 }
1200 }
1201
1202 let read = haystack.read_exact_count(len as u64)?;
1203
1204 Ok(Some(read))
1205 }
1206
1207 #[inline(always)]
1208 fn test_value_len(&self) -> usize {
1209 match self.test_val.as_ref() {
1210 TestValue::Value(s) => s.len(),
1211 TestValue::Any => 0,
1212 }
1213 }
1214}
1215
1216#[derive(Debug, Clone, Serialize, Deserialize)]
1217enum Test {
1218 Name(String),
1219 Use(bool, String),
1220 Scalar(ScalarTest),
1221 Float(FloatTest),
1222 String(StringTest),
1223 Search(SearchTest),
1224 PString(PStringTest),
1225 Regex(RegexTest),
1226 Indirect(FlagSet<IndirectMod>),
1227 String16(String16Test),
1228 #[allow(dead_code)]
1230 Der,
1231 Clear,
1232 Default,
1233}
1234
1235impl Test {
1236 #[inline]
1238 fn read_test_value<'haystack, R: Read + Seek>(
1239 &self,
1240 haystack: &'haystack mut LazyCache<R>,
1241 switch_endianness: bool,
1242 ) -> Result<Option<ReadValue<'haystack>>, Error> {
1243 let test_value_offset = haystack.lazy_stream_position();
1244
1245 match self {
1246 Self::Scalar(t) => {
1247 t.ty.read(haystack, switch_endianness)
1248 .map(|s| Some(ReadValue::Scalar(test_value_offset, s)))
1249 }
1250
1251 Self::Float(t) => {
1252 t.ty.read(haystack, switch_endianness)
1253 .map(|f| Some(ReadValue::Float(test_value_offset, f)))
1254 }
1255 Self::String(t) => {
1256 match t.test_val.as_ref() {
1257 TestValue::Value(str) => {
1258 let buf = if let Some(length) = t.length {
1259 haystack.read_exact_count(length as u64)?
1261 } else {
1262 match t.cmp_op {
1265 CmpOp::Eq | CmpOp::Neq => {
1266 if !t.has_length_mod() {
1267 haystack.read_exact_count(str.len() as u64)?
1268 } else {
1269 haystack.read_count(FILE_BYTES_MAX as u64)?
1270 }
1271 }
1272 CmpOp::Lt | CmpOp::Gt => {
1273 let read =
1274 haystack.read_until_any_delim_or_limit(b"\n\0", 8092)?;
1275
1276 if read.ends_with(b"\0") || read.ends_with(b"\n") {
1277 &read[..read.len() - 1]
1278 } else {
1279 read
1280 }
1281 }
1282 _ => {
1283 return Err(Error::Msg(format!(
1284 "string test does not support {:?} operator",
1285 t.cmp_op
1286 )));
1287 }
1288 }
1289 };
1290
1291 Ok(Some(ReadValue::Bytes(test_value_offset, buf)))
1292 }
1293 TestValue::Any => {
1294 let read = haystack.read_until_any_delim_or_limit(b"\0\n", 8192)?;
1295 let bytes = if read.ends_with(b"\0") || read.ends_with(b"\n") {
1297 &read[..read.len() - 1]
1298 } else {
1299 read
1300 };
1301
1302 Ok(Some(ReadValue::Bytes(test_value_offset, bytes)))
1303 }
1304 }
1305 }
1306
1307 Self::String16(t) => {
1308 match t.test_val.as_ref() {
1309 TestValue::Value(str16) => {
1310 let read = haystack.read_exact_count((str16.len() * 2) as u64)?;
1311
1312 Ok(Some(ReadValue::Bytes(test_value_offset, read)))
1313 }
1314 TestValue::Any => {
1315 let read = haystack.read_until_utf16_or_limit(b"\x00\x00", 8192)?;
1316
1317 let end = if read.len() % 2 == 0 {
1319 read.len()
1320 } else {
1321 read.len().saturating_sub(1)
1324 };
1325
1326 Ok(Some(ReadValue::Bytes(test_value_offset, &read[..end])))
1327 }
1328 }
1329 }
1330
1331 Self::PString(t) => {
1332 let Some(read) = t.read(haystack)? else {
1333 return Ok(None);
1334 };
1335 Ok(Some(ReadValue::Bytes(test_value_offset, read)))
1336 }
1337
1338 Self::Search(_) => {
1339 let buf = haystack.read_count(FILE_BYTES_MAX as u64)?;
1340 Ok(Some(ReadValue::Bytes(test_value_offset, buf)))
1341 }
1342
1343 Self::Regex(r) => {
1344 let length = {
1345 match r.length {
1346 Some(len) => {
1347 if r.mods.contains(ReMod::LineLimit) {
1348 len * 80
1349 } else {
1350 len
1351 }
1352 }
1353
1354 None => FILE_REGEX_MAX,
1355 }
1356 };
1357
1358 let read = haystack.read_count(length as u64)?;
1359 Ok(Some(ReadValue::Bytes(test_value_offset, read)))
1360 }
1361
1362 Self::Name(_)
1363 | Self::Use(_, _)
1364 | Self::Indirect(_)
1365 | Self::Clear
1366 | Self::Default
1367 | Self::Der => Err(Error::msg("no value to read for this test")),
1368 }
1369 }
1370
1371 #[inline(always)]
1372 fn match_value<'s>(
1373 &'s self,
1374 tv: &ReadValue<'s>,
1375 stream_kind: StreamKind,
1376 ) -> Option<MatchRes<'s>> {
1377 match (self, tv) {
1378 (Self::Scalar(t), ReadValue::Scalar(o, ts)) => {
1379 let read_value: Scalar = match t.transform.as_ref() {
1380 Some(t) => t.apply(*ts)?,
1381 None => *ts,
1382 };
1383
1384 match t.test_val {
1385 TestValue::Value(test_value) => {
1386 let ok = match t.cmp_op {
1387 CmpOp::Not => read_value == !test_value,
1390 CmpOp::Eq => read_value == test_value,
1391 CmpOp::Lt => read_value < test_value,
1392 CmpOp::Gt => read_value > test_value,
1393 CmpOp::Neq => read_value != test_value,
1394 CmpOp::BitAnd => read_value & test_value == test_value,
1395 CmpOp::Xor => (read_value & test_value).is_zero(),
1396 };
1397
1398 if ok {
1399 Some(MatchRes::Scalar(*o, read_value))
1400 } else {
1401 None
1402 }
1403 }
1404
1405 TestValue::Any => Some(MatchRes::Scalar(*o, read_value)),
1406 }
1407 }
1408
1409 (Self::Float(t), ReadValue::Float(o, f)) => {
1410 let read_value: Float = t.transform.as_ref().map(|t| t.apply(*f)).unwrap_or(*f);
1411
1412 match t.test_val {
1413 TestValue::Value(tf) => {
1414 let ok = match t.cmp_op {
1415 CmpOp::Eq => read_value == tf,
1416 CmpOp::Lt => read_value < tf,
1417 CmpOp::Gt => read_value > tf,
1418 CmpOp::Neq => read_value != tf,
1419 _ => {
1420 debug_panic!("unsupported float comparison");
1423 debug!("unsupported float comparison");
1424 false
1425 }
1426 };
1427
1428 if ok {
1429 Some(MatchRes::Float(*o, read_value))
1430 } else {
1431 None
1432 }
1433 }
1434 TestValue::Any => Some(MatchRes::Float(*o, read_value)),
1435 }
1436 }
1437
1438 (Self::String(st), ReadValue::Bytes(o, buf)) => {
1439 macro_rules! trim_buf {
1440 ($buf: expr) => {{
1441 if st.mods.contains(StringMod::Trim) {
1442 $buf.trim_ascii()
1443 } else {
1444 $buf
1445 }
1446 }};
1447 }
1448
1449 match st.test_val.as_ref() {
1450 TestValue::Value(str) => {
1451 match st.cmp_op {
1452 CmpOp::Eq => {
1453 if let (true, _) = string_match(str, st.mods, buf) {
1454 Some(MatchRes::Bytes(*o, None, trim_buf!(str), Encoding::Utf8))
1455 } else {
1456 None
1457 }
1458 }
1459 CmpOp::Neq => {
1460 if let (false, _) = string_match(str, st.mods, buf) {
1461 Some(MatchRes::Bytes(*o, None, trim_buf!(str), Encoding::Utf8))
1462 } else {
1463 None
1464 }
1465 }
1466 CmpOp::Gt => {
1467 if buf.len() > str.len() {
1468 Some(MatchRes::Bytes(*o, None, trim_buf!(buf), Encoding::Utf8))
1469 } else {
1470 None
1471 }
1472 }
1473 CmpOp::Lt => {
1474 if buf.len() < str.len() {
1475 Some(MatchRes::Bytes(*o, None, trim_buf!(buf), Encoding::Utf8))
1476 } else {
1477 None
1478 }
1479 }
1480
1481 _ => {
1483 debug_panic!("unsupported string comparison");
1486 debug!("unsupported string comparison");
1487 None
1488 }
1489 }
1490 }
1491 TestValue::Any => {
1492 Some(MatchRes::Bytes(*o, None, trim_buf!(buf), Encoding::Utf8))
1493 }
1494 }
1495 }
1496
1497 (Self::PString(m), ReadValue::Bytes(o, buf)) => match m.test_val.as_ref() {
1498 TestValue::Value(psv) => {
1499 if buf == psv {
1500 Some(MatchRes::Bytes(*o, None, buf, Encoding::Utf8))
1501 } else {
1502 None
1503 }
1504 }
1505 TestValue::Any => Some(MatchRes::Bytes(*o, None, buf, Encoding::Utf8)),
1506 },
1507
1508 (Self::String16(t), ReadValue::Bytes(o, buf)) => {
1509 match t.test_val.as_ref() {
1510 TestValue::Value(str16) => {
1511 if str16.len() * 2 != buf.len() {
1513 return None;
1514 }
1515
1516 for (i, utf16_char) in slice_to_utf16_iter(buf, t.encoding).enumerate() {
1518 if str16[i] != utf16_char {
1519 return None;
1520 }
1521 }
1522
1523 Some(MatchRes::Bytes(
1524 *o,
1525 None,
1526 t.orig.as_bytes(),
1527 Encoding::Utf16(t.encoding),
1528 ))
1529 }
1530
1531 TestValue::Any => {
1532 Some(MatchRes::Bytes(*o, None, buf, Encoding::Utf16(t.encoding)))
1533 }
1534 }
1535 }
1536
1537 (Self::Regex(r), ReadValue::Bytes(o, buf)) => r.match_buf(*o, stream_kind, buf),
1538
1539 (Self::Search(t), ReadValue::Bytes(o, buf)) => t.match_buf(*o, buf),
1540
1541 _ => None,
1542 }
1543 }
1544
1545 #[inline(always)]
1546 fn strength(&self) -> u64 {
1547 const MULT: usize = 10;
1548
1549 let mut out = 2 * MULT;
1550
1551 match self {
1553 Test::Scalar(s) => {
1554 out += s.ty.type_size() * MULT;
1555 }
1556
1557 Test::Float(t) => {
1558 out += t.ty.type_size() * MULT;
1559 }
1560
1561 Test::String(t) => out += t.test_value_len().saturating_mul(MULT),
1562
1563 Test::PString(t) => out += t.test_value_len().saturating_mul(MULT),
1564
1565 Test::Search(s) => {
1566 let n_pos = s.n_pos.unwrap_or(FILE_BYTES_MAX);
1571
1572 match n_pos {
1573 0..=80 => out += s.str.len().saturating_mul(MULT),
1575 81..=240 => out += s.str.len() * s.str.len().clamp(0, MULT - 2),
1577 _ => out += s.str.len(),
1579 }
1580 }
1581
1582 Test::Regex(r) => {
1583 let v = r.non_magic_len / r.re.captures_len();
1592
1593 let len = r
1594 .length
1595 .map(|l| {
1596 if r.mods.contains(ReMod::LineLimit) {
1597 l * 80
1598 } else {
1599 l
1600 }
1601 })
1602 .unwrap_or(FILE_BYTES_MAX);
1603
1604 match len {
1605 0..=80 => out += v.saturating_mul(MULT),
1607 81..=240 => out += v * v.clamp(0, MULT - 2),
1609 _ => out += v,
1611 }
1612 }
1613
1614 Test::String16(t) => {
1615 out += t.test_value_len().saturating_mul(MULT);
1620 }
1621
1622 Test::Der => out += MULT,
1623
1624 Test::Default | Test::Name(_) | Test::Use(_, _) | Test::Indirect(_) | Test::Clear => {
1625 return 0;
1626 }
1627 }
1628
1629 if self.is_match_any() {
1631 return 0;
1632 }
1633
1634 if let Some(op) = self.cmp_op() {
1635 match op {
1636 CmpOp::Neq => out = 0,
1638 CmpOp::Eq | CmpOp::Not => out += MULT,
1639 CmpOp::Lt | CmpOp::Gt => out -= 2 * MULT,
1640 CmpOp::Xor | CmpOp::BitAnd => out -= MULT,
1641 }
1642 }
1643
1644 out as u64
1645 }
1646
1647 #[inline(always)]
1648 fn cmp_op(&self) -> Option<CmpOp> {
1649 match self {
1650 Self::String(t) => Some(t.cmp_op),
1651 Self::Scalar(s) => Some(s.cmp_op),
1652 Self::Float(t) => Some(t.cmp_op),
1653 Self::Name(_)
1654 | Self::Use(_, _)
1655 | Self::Search(_)
1656 | Self::PString(_)
1657 | Self::Regex(_)
1658 | Self::Clear
1659 | Self::Default
1660 | Self::Indirect(_)
1661 | Self::String16(_)
1662 | Self::Der => None,
1663 }
1664 }
1665
1666 #[inline(always)]
1667 fn is_match_any(&self) -> bool {
1668 match self {
1669 Test::Name(_) => false,
1670 Test::Use(_, _) => false,
1671 Test::Scalar(scalar_test) => matches!(scalar_test.test_val, TestValue::Any),
1672 Test::Float(float_test) => matches!(float_test.test_val, TestValue::Any),
1673 Test::String(string_test) => matches!(string_test.test_val, TestValue::Any),
1674 Test::Search(_) => false,
1675 Test::PString(pstring_test) => matches!(pstring_test.test_val, TestValue::Any),
1676 Test::Regex(_) => false,
1677 Test::Indirect(_) => false,
1678 Test::String16(string16_test) => matches!(string16_test.test_val, TestValue::Any),
1679 Test::Der => false,
1680 Test::Clear => false,
1681 Test::Default => false,
1682 }
1683 }
1684
1685 #[inline(always)]
1686 fn is_binary(&self) -> bool {
1687 match self {
1688 Self::Name(_) => true,
1689 Self::Use(_, _) => true,
1690 Self::Scalar(_) => true,
1691 Self::Float(_) => true,
1692 Self::String(t) => !t.is_binary() & !t.is_text() || t.is_binary(),
1693 Self::Search(t) => t.is_binary(),
1694 Self::PString(_) => true,
1695 Self::Regex(t) => t.is_binary(),
1696 Self::Clear => true,
1697 Self::Default => true,
1698 Self::Indirect(_) => true,
1699 Self::String16(_) => true,
1700 Self::Der => true,
1701 }
1702 }
1703
1704 #[inline(always)]
1705 fn is_text(&self) -> bool {
1706 match self {
1707 Self::Name(_) => true,
1708 Self::Use(_, _) => true,
1709 Self::Indirect(_) => true,
1710 Self::Clear => true,
1711 Self::Default => true,
1712 Self::String(t) => !t.is_binary() & !t.is_text() || t.is_text(),
1713 _ => !self.is_binary(),
1714 }
1715 }
1716
1717 #[inline(always)]
1718 fn is_only_text(&self) -> bool {
1719 self.is_text() && !self.is_binary()
1720 }
1721
1722 #[inline(always)]
1723 fn is_only_binary(&self) -> bool {
1724 self.is_binary() && !self.is_text()
1725 }
1726}
1727
1728#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
1729enum OffsetType {
1730 Byte,
1731 DoubleLe,
1732 DoubleBe,
1733 ShortLe,
1734 ShortBe,
1735 Id3Le,
1736 Id3Be,
1737 LongLe,
1738 LongBe,
1739 Middle,
1740 Octal,
1741 QuadBe,
1742 QuadLe,
1743}
1744
1745#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
1746enum Shift {
1747 Direct(u64),
1748 Indirect(i64),
1749}
1750
1751#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
1752struct IndOffset {
1753 off_addr: DirOffset,
1755 signed: bool,
1757 ty: OffsetType,
1759 op: Option<Op>,
1760 shift: Option<Shift>,
1761}
1762
1763impl IndOffset {
1764 fn read_offset<R: Read + Seek>(
1766 &self,
1767 haystack: &mut LazyCache<R>,
1768 rule_base_offset: Option<u64>,
1769 last_upper_match_offset: Option<u64>,
1770 ) -> Result<Option<u64>, io::Error> {
1771 let offset_address = match self.off_addr {
1772 DirOffset::Start(s) => {
1773 let Some(o) = s.checked_add(rule_base_offset.unwrap_or_default()) else {
1774 return Ok(None);
1775 };
1776
1777 haystack.seek(SeekFrom::Start(o))?
1778 }
1779 DirOffset::LastUpper(c) => haystack.seek(SeekFrom::Start(
1780 (last_upper_match_offset.unwrap_or_default() as i64 + c) as u64,
1781 ))?,
1782 DirOffset::End(e) => haystack.seek(SeekFrom::End(e))?,
1783 };
1784
1785 macro_rules! read_value {
1786 () => {
1787 match self.ty {
1788 OffsetType::Byte => {
1789 if self.signed {
1790 read_le!(haystack, u8) as u64
1791 } else {
1792 read_le!(haystack, i8) as u64
1793 }
1794 }
1795 OffsetType::DoubleLe => read_le!(haystack, f64) as u64,
1796 OffsetType::DoubleBe => read_be!(haystack, f64) as u64,
1797 OffsetType::ShortLe => {
1798 if self.signed {
1799 read_le!(haystack, i16) as u64
1800 } else {
1801 read_le!(haystack, u16) as u64
1802 }
1803 }
1804 OffsetType::ShortBe => {
1805 if self.signed {
1806 read_be!(haystack, i16) as u64
1807 } else {
1808 read_be!(haystack, u16) as u64
1809 }
1810 }
1811 OffsetType::Id3Le => decode_id3(read_le!(haystack, u32)) as u64,
1812 OffsetType::Id3Be => decode_id3(read_be!(haystack, u32)) as u64,
1813 OffsetType::LongLe => {
1814 if self.signed {
1815 read_le!(haystack, i32) as u64
1816 } else {
1817 read_le!(haystack, u32) as u64
1818 }
1819 }
1820 OffsetType::LongBe => {
1821 if self.signed {
1822 read_be!(haystack, i32) as u64
1823 } else {
1824 read_be!(haystack, u32) as u64
1825 }
1826 }
1827 OffsetType::Middle => read_me!(haystack) as u64,
1828 OffsetType::Octal => {
1829 if let Some(o) = read_octal_u64(haystack) {
1830 o
1831 } else {
1832 debug!("failed to read octal offset @ {offset_address}");
1833 return Ok(None);
1834 }
1835 }
1836 OffsetType::QuadLe => {
1837 if self.signed {
1838 read_le!(haystack, i64) as u64
1839 } else {
1840 read_le!(haystack, u64)
1841 }
1842 }
1843 OffsetType::QuadBe => {
1844 if self.signed {
1845 read_be!(haystack, i64) as u64
1846 } else {
1847 read_be!(haystack, u64)
1848 }
1849 }
1850 }
1851 };
1852 }
1853
1854 let o = read_value!();
1856
1857 trace!(
1858 "offset read @ {offset_address} value={o} op={:?} shift={:?}",
1859 self.op, self.shift
1860 );
1861
1862 if let (Some(op), Some(shift)) = (self.op, self.shift) {
1864 let shift = match shift {
1865 Shift::Direct(i) => i,
1866 Shift::Indirect(i) => {
1867 let tmp = offset_address as i128 + i as i128;
1868 if tmp.is_negative() {
1869 return Ok(None);
1870 } else {
1871 haystack.seek(SeekFrom::Start(tmp as u64))?;
1872 };
1873 read_value!()
1876 }
1877 };
1878
1879 match op {
1880 Op::Add => return Ok(o.checked_add(shift)),
1881 Op::Mul => return Ok(o.checked_mul(shift)),
1882 Op::Sub => return Ok(o.checked_sub(shift)),
1883 Op::Div => return Ok(o.checked_div(shift)),
1884 Op::Mod => return Ok(o.checked_rem(shift)),
1885 Op::And => return Ok(Some(o & shift)),
1886 Op::Or => return Ok(Some(o | shift)),
1887 Op::Xor => return Ok(Some(o ^ shift)),
1888 }
1889 }
1890
1891 Ok(Some(o))
1892 }
1893}
1894
1895#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
1896enum DirOffset {
1897 Start(u64),
1898 LastUpper(i64),
1900 End(i64),
1901}
1902
1903#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
1904enum Offset {
1905 Direct(DirOffset),
1906 Indirect(IndOffset),
1907}
1908
1909impl From<DirOffset> for Offset {
1910 fn from(value: DirOffset) -> Self {
1911 Self::Direct(value)
1912 }
1913}
1914
1915impl From<IndOffset> for Offset {
1916 fn from(value: IndOffset) -> Self {
1917 Self::Indirect(value)
1918 }
1919}
1920
1921impl Display for DirOffset {
1922 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1923 match self {
1924 DirOffset::Start(i) => write!(f, "{i}"),
1925 DirOffset::LastUpper(c) => write!(f, "&{c}"),
1926 DirOffset::End(e) => write!(f, "-{e}"),
1927 }
1928 }
1929}
1930
1931impl Default for DirOffset {
1932 fn default() -> Self {
1933 Self::LastUpper(0)
1934 }
1935}
1936
1937#[derive(Debug, Clone, Serialize, Deserialize)]
1938struct Match {
1939 line: usize,
1940 depth: u8,
1941 offset: Offset,
1942 test: Test,
1943 test_strength: u64,
1944 message: Option<Message>,
1945}
1946
1947impl From<Use> for Match {
1948 fn from(value: Use) -> Self {
1949 let test = Test::Use(value.switch_endianness, value.rule_name);
1950 let test_strength = test.strength();
1951 Self {
1952 line: value.line,
1953 depth: value.depth,
1954 offset: value.start_offset,
1955 test,
1956 test_strength,
1957 message: value.message,
1958 }
1959 }
1960}
1961
1962impl From<Name> for Match {
1963 fn from(value: Name) -> Self {
1964 let test = Test::Name(value.name);
1965 let test_strength = test.strength();
1966 Self {
1967 line: value.line,
1968 depth: 0,
1969 offset: Offset::Direct(DirOffset::Start(0)),
1970 test,
1971 test_strength,
1972 message: value.message,
1973 }
1974 }
1975}
1976
1977impl Match {
1978 #[inline(always)]
1980 fn offset_from_start<R: Read + Seek>(
1981 &self,
1982 haystack: &mut LazyCache<R>,
1983 rule_base_offset: Option<u64>,
1984 last_level_offset: Option<u64>,
1985 ) -> Result<Option<u64>, io::Error> {
1986 match self.offset {
1987 Offset::Direct(dir_offset) => match dir_offset {
1988 DirOffset::Start(s) => Ok(Some(s)),
1989 DirOffset::LastUpper(shift) => {
1990 let o = last_level_offset.unwrap_or_default() as i64 + shift;
1991
1992 if o.is_positive() {
1993 Ok(Some(o as u64))
1994 } else {
1995 Ok(None)
1996 }
1997 }
1998 DirOffset::End(e) => Ok(Some(haystack.offset_from_start(SeekFrom::End(e)))),
1999 },
2000 Offset::Indirect(ind_offset) => {
2001 let Some(o) =
2002 ind_offset.read_offset(haystack, rule_base_offset, last_level_offset)?
2003 else {
2004 return Ok(None);
2005 };
2006
2007 Ok(Some(o))
2008 }
2009 }
2010 }
2011
2012 #[inline]
2025 #[allow(clippy::too_many_arguments)]
2026 fn matches<'a: 'h, 'h, R: Read + Seek>(
2027 &'a self,
2028 source: Option<&str>,
2029 magic: &mut Magic<'a>,
2030 stream_kind: StreamKind,
2031 state: &mut MatchState,
2032 buf_base_offset: Option<u64>,
2033 rule_base_offset: Option<u64>,
2034 last_level_offset: Option<u64>,
2035 haystack: &'h mut LazyCache<R>,
2036 switch_endianness: bool,
2037 db: &'a MagicDb,
2038 depth: usize,
2039 ) -> Result<(bool, Option<MatchRes<'h>>), Error> {
2040 let source = source.unwrap_or("unknown");
2041 let line = self.line;
2042
2043 if depth >= MAX_RECURSION {
2044 return Err(Error::localized(
2045 source,
2046 line,
2047 Error::MaximumRecursion(MAX_RECURSION),
2048 ));
2049 }
2050
2051 if self.test.is_only_binary() && stream_kind.is_text() {
2052 trace!("skip binary test source={source} line={line} stream_kind={stream_kind:?}",);
2053 return Ok((false, None));
2054 }
2055
2056 if self.test.is_only_text() && !stream_kind.is_text() {
2057 trace!("skip text test source={source} line={line} stream_kind={stream_kind:?}",);
2058 return Ok((false, None));
2059 }
2060
2061 let Ok(Some(mut offset)) = self
2062 .offset_from_start(haystack, rule_base_offset, last_level_offset)
2063 .inspect_err(|e| debug!("source={source} line={line} failed at computing offset: {e}"))
2064 else {
2065 return Ok((false, None));
2066 };
2067
2068 offset = match self.offset {
2069 Offset::Indirect(_) => {
2070 buf_base_offset.unwrap_or_default().saturating_add(offset)
2075 }
2076 Offset::Direct(DirOffset::Start(_)) => {
2078 rule_base_offset.unwrap_or_default().saturating_add(offset)
2079 }
2080 _ => offset,
2081 };
2082
2083 match &self.test {
2084 Test::Clear => {
2085 trace!("source={source} line={line} clear");
2086 state.clear_continuation_level(&self.continuation_level());
2087 Ok((true, None))
2088 }
2089
2090 Test::Name(name) => {
2091 trace!(
2092 "source={source} line={line} running rule {name} switch_endianness={switch_endianness}",
2093 );
2094 Ok((true, None))
2095 }
2096
2097 Test::Use(flip_endianness, rule_name) => {
2098 trace!(
2099 "source={source} line={line} use {rule_name} switch_endianness={flip_endianness}",
2100 );
2101
2102 let switch_endianness = switch_endianness ^ flip_endianness;
2104
2105 let dr: &DependencyRule = db.dependencies.get(rule_name).ok_or(
2106 Error::localized(source, line, Error::MissingRule(rule_name.clone())),
2107 )?;
2108
2109 if let Some(msg) = self.message.as_ref() {
2111 magic.push_message(msg.to_string_lossy());
2112 }
2113
2114 dr.rule.magic(
2115 magic,
2116 stream_kind,
2117 buf_base_offset,
2118 Some(offset),
2119 haystack,
2120 db,
2121 switch_endianness,
2122 depth.saturating_add(1),
2123 )?;
2124
2125 Ok((false, None))
2127 }
2128
2129 Test::Indirect(m) => {
2130 trace!(
2131 "source={source} line={line} indirect mods={:?} offset={offset:#x}",
2132 m
2133 );
2134
2135 let new_buf_base_off = if m.contains(IndirectMod::Relative) {
2136 Some(offset)
2137 } else {
2138 None
2139 };
2140
2141 if let Some(msg) = self.message.as_ref() {
2143 magic.push_message(msg.to_string_lossy());
2144 }
2145
2146 for r in db.rules.iter() {
2147 let messages_cnt = magic.message.len();
2148
2149 r.magic(
2150 magic,
2151 stream_kind,
2152 new_buf_base_off,
2153 Some(offset),
2154 haystack,
2155 db,
2156 false,
2157 depth.saturating_add(1),
2158 )?;
2159
2160 if magic.message.len() != messages_cnt {
2162 break;
2163 }
2164 }
2165
2166 Ok((false, None))
2168 }
2169
2170 Test::Default => {
2171 let ok = !state.get_continuation_level(&self.continuation_level());
2173
2174 trace!("source={source} line={line} default match={ok}");
2175 if ok {
2176 state.set_continuation_level(self.continuation_level());
2177 }
2178
2179 Ok((ok, None))
2180 }
2181
2182 _ => {
2183 if let Err(e) = haystack.seek(SeekFrom::Start(offset)) {
2184 debug!("source={source} line={line} failed to seek in haystack: {e}");
2185 return Ok((false, None));
2186 }
2187
2188 let mut trace_msg = None;
2189
2190 if enabled!(Level::DEBUG) {
2191 trace_msg = Some(vec![format!(
2192 "source={source} line={line} depth={} stream_offset={:#x}",
2193 self.depth,
2194 haystack.lazy_stream_position()
2195 )])
2196 }
2197
2198 if let Ok(opt_test_value) = self
2202 .test
2203 .read_test_value(haystack, switch_endianness)
2204 .inspect_err(|e| {
2205 debug!("source={source} line={line} error while reading test value @{offset}: {e}",)
2206 })
2207 {
2208 if let Some(v) = trace_msg
2209 .as_mut() { v.push(format!("test={:?}", self.test)) }
2210
2211 let match_res =
2212 opt_test_value.and_then(|tv| self.test.match_value(&tv, stream_kind));
2213
2214 if let Some(v) = trace_msg.as_mut() { v.push(format!(
2215 "message=\"{}\" match={}",
2216 self.message
2217 .as_ref()
2218 .map(|fs| fs.to_string_lossy())
2219 .unwrap_or_default(),
2220 match_res.is_some()
2221 )) }
2222
2223 if enabled!(Level::DEBUG) && !enabled!(Level::TRACE) && match_res.is_some() {
2225 if let Some(m) = trace_msg{
2226 debug!("{}", m.join(" "));
2227 }
2228 } else if enabled!(Level::TRACE) {
2229 if let Some(m) = trace_msg{
2230 trace!("{}", m.join(" "));
2231 }
2232 }
2233
2234 if let Some(mr) = match_res {
2235 state.set_continuation_level(self.continuation_level());
2236 return Ok((true, Some(mr)));
2237 }
2238 }
2239
2240 Ok((false, None))
2241 }
2242 }
2243 }
2244
2245 #[inline(always)]
2246 fn continuation_level(&self) -> ContinuationLevel {
2247 ContinuationLevel(self.depth)
2248 }
2249}
2250
2251#[derive(Debug, Clone)]
2252struct Use {
2253 line: usize,
2254 depth: u8,
2255 start_offset: Offset,
2256 rule_name: String,
2257 switch_endianness: bool,
2258 message: Option<Message>,
2259}
2260
2261#[derive(Debug, Clone, Serialize, Deserialize)]
2262struct StrengthMod {
2263 op: Op,
2264 by: u8,
2265}
2266
2267impl StrengthMod {
2268 #[inline(always)]
2269 fn apply(&self, strength: u64) -> u64 {
2270 let by = self.by as u64;
2271 debug!("applying strength modifier: {strength} {} {}", self.op, by);
2272 match self.op {
2273 Op::Mul => strength.saturating_mul(by),
2274 Op::Add => strength.saturating_add(by),
2275 Op::Sub => strength.saturating_sub(by),
2276 Op::Div => {
2277 if by > 0 {
2278 strength.saturating_div(by)
2279 } else {
2280 strength
2281 }
2282 }
2283 Op::Mod => strength % by,
2284 Op::And => strength & by,
2285 Op::Xor | Op::Or => {
2288 debug_panic!("unsupported strength operator");
2289 strength
2290 }
2291 }
2292 }
2293}
2294
2295#[derive(Debug, Clone)]
2296enum Flag {
2297 Mime(String),
2298 Ext(HashSet<String>),
2299 Strength(StrengthMod),
2300 Apple(String),
2301}
2302
2303#[derive(Debug, Clone)]
2304struct Name {
2305 line: usize,
2306 name: String,
2307 message: Option<Message>,
2308}
2309
2310#[derive(Debug, Clone)]
2311enum Entry<'span> {
2312 Match(Span<'span>, Match),
2313 Flag(Span<'span>, Flag),
2314}
2315
2316#[derive(Debug, Clone, Serialize, Deserialize)]
2317struct EntryNode {
2318 root: bool,
2319 entry: Match,
2320 children: Vec<EntryNode>,
2321 mimetype: Option<String>,
2322 apple: Option<String>,
2323 strength_mod: Option<StrengthMod>,
2324 exts: HashSet<String>,
2325}
2326
2327impl EntryNode {
2328 fn update_exts_rec(
2329 &self,
2330 exts: &mut HashSet<String>,
2331 deps: &HashMap<String, DependencyRule>,
2332 marked: &mut HashSet<String>,
2333 ) -> Result<(), ()> {
2334 for ext in self.exts.iter() {
2335 if !exts.contains(ext) {
2336 exts.insert(ext.clone());
2337 }
2338 }
2339
2340 for c in self.children.iter() {
2341 if let Test::Use(_, ref name) = c.entry.test {
2342 if marked.contains(name) {
2343 continue;
2344 }
2345 if let Some(r) = deps.get(name) {
2346 marked.insert(name.clone());
2347 exts.extend(r.rule.fetch_all_extensions(deps, marked)?);
2348 } else {
2349 return Err(());
2350 }
2351 } else {
2352 c.update_exts_rec(exts, deps, marked)?;
2353 }
2354 }
2355
2356 Ok(())
2357 }
2358
2359 fn update_score_rec(
2360 &self,
2361 depth: usize,
2362 score: &mut u64,
2363 deps: &HashMap<String, DependencyRule>,
2364 marked: &mut HashSet<String>,
2365 ) {
2366 if depth == 3 {
2367 return;
2368 }
2369
2370 *score += self
2371 .children
2372 .iter()
2373 .map(|e| e.entry.test_strength)
2374 .min()
2375 .unwrap_or_default();
2376
2377 for c in self.children.iter() {
2378 if let Test::Use(_, ref name) = c.entry.test {
2379 if marked.contains(name) {
2380 continue;
2381 }
2382
2383 if let Some(r) = deps.get(name) {
2384 marked.insert(name.clone());
2385 *score += r.rule.compute_score(depth, deps, marked);
2386 }
2387 }
2388 c.update_score_rec(depth + 1, score, deps, marked);
2389 }
2390 }
2391
2392 #[inline]
2393 #[allow(clippy::too_many_arguments)]
2394 fn matches<'r, R: Read + Seek>(
2395 &'r self,
2396 opt_source: Option<&str>,
2397 magic: &mut Magic<'r>,
2398 state: &mut MatchState,
2399 stream_kind: StreamKind,
2400 buf_base_offset: Option<u64>,
2401 rule_base_offset: Option<u64>,
2402 last_level_offset: Option<u64>,
2403 haystack: &mut LazyCache<R>,
2404 db: &'r MagicDb,
2405 switch_endianness: bool,
2406 depth: usize,
2407 ) -> Result<(), Error> {
2408 let (ok, opt_match_res) = self.entry.matches(
2409 opt_source,
2410 magic,
2411 stream_kind,
2412 state,
2413 buf_base_offset,
2414 rule_base_offset,
2415 last_level_offset,
2416 haystack,
2417 switch_endianness,
2418 db,
2419 depth,
2420 )?;
2421
2422 let source = opt_source.unwrap_or("unknown");
2423 let line = self.entry.line;
2424
2425 if ok {
2426 if let Some(msg) = self.entry.message.as_ref() {
2428 if let Ok(msg) = msg.format_with(opt_match_res.as_ref()).inspect_err(|e| {
2429 debug!("source={source} line={line} failed to format message: {e}")
2430 }) {
2431 magic.push_message(msg);
2432 }
2433 }
2434
2435 if let Some(mr) = opt_match_res {
2437 match &self.entry.test {
2438 Test::String(t) => {
2439 if t.has_length_mod() {
2440 let o = mr.end_offset();
2441 haystack.seek(SeekFrom::Start(o))?;
2442 }
2443 }
2444 Test::Search(t) => {
2445 if t.re_mods.contains(ReMod::StartOffsetUpdate) {
2446 let o = mr.start_offset();
2447 haystack.seek(SeekFrom::Start(o))?;
2448 } else {
2449 let o = mr.end_offset();
2450 haystack.seek(SeekFrom::Start(o))?;
2451 }
2452 }
2453
2454 Test::Regex(t) => {
2455 if t.mods.contains(ReMod::StartOffsetUpdate) {
2456 let o = mr.start_offset();
2457 haystack.seek(SeekFrom::Start(o))?;
2458 } else {
2459 let o = mr.end_offset();
2460 haystack.seek(SeekFrom::Start(o))?;
2461 }
2462 }
2463 _ => {}
2465 }
2466 }
2467
2468 if let Some(mimetype) = self.mimetype.as_ref() {
2469 magic.set_mime_type(Cow::Borrowed(mimetype));
2470 }
2471
2472 if let Some(apple_ty) = self.apple.as_ref() {
2473 magic.set_creator_code(Cow::Borrowed(apple_ty));
2474 }
2475
2476 if !self.exts.is_empty() {
2477 magic.insert_extensions(self.exts.iter().map(|s| s.as_str()));
2478 }
2479
2480 let mut strength = self.entry.test_strength;
2484
2485 let continuation_level = self.entry.continuation_level().0 as u64;
2486 if self.entry.message.is_none() && continuation_level < 3 {
2487 strength = strength.saturating_add(continuation_level);
2488 }
2489
2490 if let Some(sm) = self.strength_mod.as_ref() {
2491 strength = sm.apply(strength);
2492 }
2493
2494 if self.entry.message.is_none() {
2496 strength += 1
2497 }
2498
2499 magic.update_strength(strength);
2500
2501 let end_upper_level = haystack.lazy_stream_position();
2502
2503 let rule_base_offset = if self.root {
2511 match self.entry.offset {
2512 Offset::Direct(DirOffset::End(o)) => {
2513 Some(haystack.offset_from_start(SeekFrom::End(o)))
2514 }
2515 _ => rule_base_offset,
2516 }
2517 } else {
2518 rule_base_offset
2519 };
2520
2521 for e in self.children.iter() {
2522 e.matches(
2523 opt_source,
2524 magic,
2525 state,
2526 stream_kind,
2527 buf_base_offset,
2528 rule_base_offset,
2529 Some(end_upper_level),
2530 haystack,
2531 db,
2532 switch_endianness,
2533 depth,
2534 )?
2535 }
2536 }
2537
2538 Ok(())
2539 }
2540}
2541
2542#[derive(Debug, Clone, Serialize, Deserialize)]
2544pub struct MagicRule {
2545 id: usize,
2546 source: Option<String>,
2547 entries: EntryNode,
2548 extensions: HashSet<String>,
2549 score: u64,
2551 finalized: bool,
2552}
2553
2554impl MagicRule {
2555 #[inline(always)]
2556 fn set_id(&mut self, id: usize) {
2557 self.id = id
2558 }
2559
2560 fn fetch_all_extensions(
2564 &self,
2565 deps: &HashMap<String, DependencyRule>,
2566 marked: &mut HashSet<String>,
2567 ) -> Result<HashSet<String>, ()> {
2568 let mut exts = HashSet::new();
2569 self.entries.update_exts_rec(&mut exts, deps, marked)?;
2570 Ok(exts)
2571 }
2572
2573 fn compute_score(
2576 &self,
2577 depth: usize,
2578 deps: &HashMap<String, DependencyRule>,
2579 marked: &mut HashSet<String>,
2580 ) -> u64 {
2581 let mut score = 0;
2582 score += self.entries.entry.test_strength;
2583 self.entries
2584 .update_score_rec(depth, &mut score, deps, marked);
2585 score
2586 }
2587
2588 fn try_finalize(&mut self, deps: &HashMap<String, DependencyRule>) {
2591 if self.finalized {
2592 return;
2593 }
2594
2595 let Ok(exts) = self.fetch_all_extensions(deps, &mut HashSet::new()) else {
2596 return;
2597 };
2598
2599 self.extensions.extend(exts);
2600
2601 self.score = self.compute_score(0, deps, &mut HashSet::new());
2605 self.finalized = true
2606 }
2607
2608 #[inline]
2609 fn magic_entrypoint<'r, R: Read + Seek>(
2610 &'r self,
2611 magic: &mut Magic<'r>,
2612 stream_kind: StreamKind,
2613 haystack: &mut LazyCache<R>,
2614 db: &'r MagicDb,
2615 switch_endianness: bool,
2616 depth: usize,
2617 ) -> Result<(), Error> {
2618 self.entries.matches(
2619 self.source.as_deref(),
2620 magic,
2621 &mut MatchState::empty(),
2622 stream_kind,
2623 None,
2624 None,
2625 None,
2626 haystack,
2627 db,
2628 switch_endianness,
2629 depth,
2630 )
2631 }
2632
2633 #[inline]
2634 #[allow(clippy::too_many_arguments)]
2635 fn magic<'r, R: Read + Seek>(
2636 &'r self,
2637 magic: &mut Magic<'r>,
2638 stream_kind: StreamKind,
2639 buf_base_offset: Option<u64>,
2640 rule_base_offset: Option<u64>,
2641 haystack: &mut LazyCache<R>,
2642 db: &'r MagicDb,
2643 switch_endianness: bool,
2644 depth: usize,
2645 ) -> Result<(), Error> {
2646 self.entries.matches(
2647 self.source.as_deref(),
2648 magic,
2649 &mut MatchState::empty(),
2650 stream_kind,
2651 buf_base_offset,
2652 rule_base_offset,
2653 None,
2654 haystack,
2655 db,
2656 switch_endianness,
2657 depth,
2658 )
2659 }
2660
2661 pub fn is_text(&self) -> bool {
2667 self.entries.entry.test.is_text()
2668 && self.entries.children.iter().all(|e| e.entry.test.is_text())
2669 }
2670
2671 #[inline(always)]
2677 pub fn score(&self) -> u64 {
2678 self.score
2679 }
2680
2681 #[inline(always)]
2687 pub fn source(&self) -> Option<&str> {
2688 self.source.as_deref()
2689 }
2690
2691 #[inline(always)]
2697 pub fn line(&self) -> usize {
2698 self.entries.entry.line
2699 }
2700
2701 #[inline(always)]
2707 pub fn extensions(&self) -> &HashSet<String> {
2708 &self.extensions
2709 }
2710}
2711
2712#[derive(Debug, Clone, Serialize, Deserialize)]
2713struct DependencyRule {
2714 name: String,
2715 rule: MagicRule,
2716}
2717
2718#[derive(Debug, Clone, Serialize, Deserialize)]
2724pub struct MagicSource {
2725 rules: Vec<MagicRule>,
2726 dependencies: HashMap<String, DependencyRule>,
2727}
2728
2729impl MagicSource {
2730 pub fn open<P: AsRef<Path>>(p: P) -> Result<Self, Error> {
2740 FileMagicParser::parse_file(p)
2741 }
2742}
2743
2744#[derive(Debug, Hash, PartialEq, Eq, Clone, Copy)]
2745struct ContinuationLevel(u8);
2746
2747#[derive(Debug, PartialEq, Eq, Clone, Copy)]
2749enum TextEncoding {
2750 Ascii,
2751 Utf8,
2752 Unknown,
2753}
2754
2755impl TextEncoding {
2756 const fn as_magic_str(&self) -> &'static str {
2757 match self {
2758 TextEncoding::Ascii => "ASCII",
2759 TextEncoding::Utf8 => "UTF-8",
2760 TextEncoding::Unknown => "Unknown",
2761 }
2762 }
2763}
2764
2765#[derive(Debug, PartialEq, Eq, Clone, Copy)]
2766enum StreamKind {
2767 Binary,
2768 Text(TextEncoding),
2769}
2770
2771impl StreamKind {
2772 const fn is_text(&self) -> bool {
2773 matches!(self, StreamKind::Text(_))
2774 }
2775}
2776
2777#[derive(Debug)]
2778struct MatchState {
2779 continuation_levels: [bool; 256],
2780}
2781
2782impl MatchState {
2783 #[inline(always)]
2784 fn empty() -> Self {
2785 MatchState {
2786 continuation_levels: [false; 256],
2787 }
2788 }
2789
2790 #[inline(always)]
2791 fn get_continuation_level(&mut self, level: &ContinuationLevel) -> bool {
2792 self.continuation_levels
2793 .get(level.0 as usize)
2794 .cloned()
2795 .unwrap_or_default()
2796 }
2797
2798 #[inline(always)]
2799 fn set_continuation_level(&mut self, level: ContinuationLevel) {
2800 if let Some(b) = self.continuation_levels.get_mut(level.0 as usize) {
2801 *b = true
2802 }
2803 }
2804
2805 #[inline(always)]
2806 fn clear_continuation_level(&mut self, level: &ContinuationLevel) {
2807 if let Some(b) = self.continuation_levels.get_mut(level.0 as usize) {
2808 *b = false;
2809 }
2810 }
2811}
2812
2813#[derive(Debug, Default)]
2815pub struct Magic<'m> {
2816 stream_kind: Option<StreamKind>,
2817 source: Option<Cow<'m, str>>,
2818 message: Vec<Cow<'m, str>>,
2819 mime_type: Option<Cow<'m, str>>,
2820 creator_code: Option<Cow<'m, str>>,
2821 strength: u64,
2822 exts: HashSet<Cow<'m, str>>,
2823 is_default: bool,
2824}
2825
2826impl<'m> Magic<'m> {
2827 #[inline(always)]
2828 fn set_source(&mut self, source: Option<&'m str>) {
2829 self.source = source.map(Cow::Borrowed);
2830 }
2831
2832 #[inline(always)]
2833 fn set_stream_kind(&mut self, stream_kind: StreamKind) {
2834 self.stream_kind = Some(stream_kind)
2835 }
2836
2837 #[inline(always)]
2838 fn reset(&mut self) {
2839 self.stream_kind = None;
2840 self.source = None;
2841 self.message.clear();
2842 self.mime_type = None;
2843 self.creator_code = None;
2844 self.strength = 0;
2845 self.exts.clear();
2846 self.is_default = false;
2847 }
2848
2849 #[inline]
2857 pub fn into_owned<'owned>(self) -> Magic<'owned> {
2858 Magic {
2859 stream_kind: self.stream_kind,
2860 source: self.source.map(|s| Cow::Owned(s.into_owned())),
2861 message: self
2862 .message
2863 .into_iter()
2864 .map(Cow::into_owned)
2865 .map(Cow::Owned)
2866 .collect(),
2867 mime_type: self.mime_type.map(|m| Cow::Owned(m.into_owned())),
2868 creator_code: self.creator_code.map(|m| Cow::Owned(m.into_owned())),
2869 strength: self.strength,
2870 exts: self
2871 .exts
2872 .into_iter()
2873 .map(|e| Cow::Owned(e.into_owned()))
2874 .collect(),
2875 is_default: self.is_default,
2876 }
2877 }
2878
2879 #[inline(always)]
2885 pub fn message(&self) -> String {
2886 let mut out = String::new();
2887 for (i, m) in self.message.iter().enumerate() {
2888 if let Some(s) = m.strip_prefix(r#"\b"#) {
2889 out.push_str(s);
2890 } else {
2891 if i > 0 {
2893 out.push(' ');
2894 }
2895 out.push_str(m);
2896 }
2897 }
2898 out
2899 }
2900
2901 #[inline(always)]
2902 fn update_strength(&mut self, value: u64) {
2903 self.strength = self.strength.saturating_add(value);
2904 debug!("updated strength = {:?}", self.strength)
2905 }
2906
2907 #[inline(always)]
2913 pub fn mime_type(&self) -> &str {
2914 self.mime_type.as_deref().unwrap_or(match self.stream_kind {
2915 Some(StreamKind::Text(_)) => DEFAULT_TEXT_MIMETYPE,
2916 Some(StreamKind::Binary) | None => DEFAULT_BIN_MIMETYPE,
2917 })
2918 }
2919
2920 #[inline(always)]
2921 fn push_message<'a: 'm>(&mut self, msg: Cow<'a, str>) {
2922 if !msg.is_empty() {
2923 debug!("pushing message: msg={msg} len={}", msg.len());
2924 self.message.push(msg);
2925 }
2926 }
2927
2928 #[inline(always)]
2929 fn set_mime_type<'a: 'm>(&mut self, mime: Cow<'a, str>) {
2930 if self.mime_type.is_none() {
2931 debug!("insert mime: {:?}", mime);
2932 self.mime_type = Some(mime)
2933 }
2934 }
2935
2936 #[inline(always)]
2937 fn set_creator_code<'a: 'm>(&mut self, apple_ty: Cow<'a, str>) {
2938 if self.creator_code.is_none() {
2939 debug!("insert apple type: {apple_ty:?}");
2940 self.creator_code = Some(apple_ty)
2941 }
2942 }
2943
2944 #[inline(always)]
2945 fn insert_extensions<'a: 'm, I: Iterator<Item = &'a str>>(&mut self, exts: I) {
2946 if self.exts.is_empty() {
2947 self.exts.extend(exts.filter_map(|e| {
2948 if e.is_empty() {
2949 None
2950 } else {
2951 Some(Cow::Borrowed(e))
2952 }
2953 }));
2954 }
2955 }
2956
2957 #[inline(always)]
2965 pub fn strength(&self) -> u64 {
2966 self.strength
2967 }
2968
2969 #[inline(always)]
2975 pub fn source(&self) -> Option<&str> {
2976 self.source.as_deref()
2977 }
2978
2979 #[inline(always)]
2985 pub fn creator_code(&self) -> Option<&str> {
2986 self.creator_code.as_deref()
2987 }
2988
2989 #[inline(always)]
2995 pub fn extensions(&self) -> &HashSet<Cow<'m, str>> {
2996 &self.exts
2997 }
2998
2999 #[inline(always)]
3005 pub fn is_default(&self) -> bool {
3006 self.is_default
3007 }
3008}
3009
3010#[derive(Debug, Default, Clone, Serialize, Deserialize)]
3012pub struct MagicDb {
3013 rule_id: usize,
3014 rules: Vec<MagicRule>,
3015 dependencies: HashMap<String, DependencyRule>,
3016}
3017
3018#[inline(always)]
3019fn is_likely_text(bytes: &[u8]) -> bool {
3021 if bytes.is_empty() {
3022 return false;
3023 }
3024
3025 let mut printable = 0f64;
3026 let mut high_bytes = 0f64; for byte in bytes.iter() {
3029 match byte {
3030 0x00 => return false,
3031 0x09 | 0x0A | 0x0D => printable += 1.0, 0x20..=0x7E => printable += 1.0, _ => high_bytes += 1.0,
3034 }
3035 }
3036
3037 let total = bytes.len() as f64;
3038 let printable_ratio = printable / total;
3039 let high_bytes_ratio = high_bytes / total;
3040
3041 printable_ratio > 0.85 && high_bytes_ratio < 0.20
3043}
3044
3045#[inline(always)]
3046fn guess_stream_kind<S: AsRef<[u8]>>(stream: S) -> StreamKind {
3047 let Ok(s) = str::from_utf8(stream.as_ref()) else {
3048 if is_likely_text(stream.as_ref()) {
3049 return StreamKind::Text(TextEncoding::Unknown);
3050 } else {
3051 return StreamKind::Binary;
3052 }
3053 };
3054
3055 let count = s.chars().count();
3056 let mut is_ascii = true;
3057
3058 for c in s.chars().take(count.saturating_sub(1)) {
3059 is_ascii &= c.is_ascii()
3060 }
3061
3062 if is_ascii {
3063 StreamKind::Text(TextEncoding::Ascii)
3064 } else {
3065 StreamKind::Text(TextEncoding::Utf8)
3066 }
3067}
3068
3069impl MagicDb {
3070 fn open_reader<R: Read + Seek>(f: R) -> Result<LazyCache<R>, Error> {
3071 Ok(LazyCache::<R>::from_read_seek(f)
3072 .and_then(|lc| lc.with_hot_cache(2 * FILE_BYTES_MAX))?)
3073 .map(|lc| lc.with_warm_cache(100 << 20))
3074 }
3075
3076 pub fn new() -> Self {
3082 Self::default()
3083 }
3084
3085 #[inline(always)]
3086 fn next_rule_id(&mut self) -> usize {
3087 let t = self.rule_id;
3088 self.rule_id += 1;
3089 t
3090 }
3091
3092 #[inline(always)]
3093 fn try_json<R: Read + Seek>(
3094 haystack: &mut LazyCache<R>,
3095 stream_kind: StreamKind,
3096 magic: &mut Magic,
3097 ) -> Result<bool, Error> {
3098 if matches!(stream_kind, StreamKind::Binary) {
3100 return Ok(false);
3101 }
3102
3103 let buf = haystack.read_range(0..FILE_BYTES_MAX as u64)?.trim_ascii();
3104
3105 let Some((start, end)) = find_json_boundaries(buf) else {
3106 return Ok(false);
3107 };
3108
3109 for c in buf[0..start].iter() {
3112 if !c.is_ascii_whitespace() {
3113 return Ok(false);
3114 }
3115 }
3116
3117 let mut is_ndjson = false;
3118
3119 trace!("maybe a json document");
3120 let ok = serde_json::from_slice::<serde_json::Value>(&buf[start..=end]).is_ok();
3121 if !ok {
3122 return Ok(false);
3123 }
3124
3125 if end + 1 < buf.len() {
3127 let buf = &buf[end + 1..];
3129 if let Some((second_start, second_end)) = find_json_boundaries(buf) {
3130 if memchr(b'\n', &buf[..second_start]).is_some() {
3132 trace!("might be ndjson");
3133 is_ndjson = serde_json::from_slice::<serde_json::Value>(
3134 &buf[second_start..=second_end],
3135 )
3136 .is_ok();
3137 }
3138 }
3139 }
3140
3141 if is_ndjson {
3142 magic.push_message(Cow::Borrowed("New Line Delimited"));
3143 magic.set_mime_type(Cow::Borrowed("application/x-ndjson"));
3144 magic.insert_extensions(["ndjson", "jsonl"].into_iter());
3145 } else {
3146 magic.set_mime_type(Cow::Borrowed("application/json"));
3147 magic.insert_extensions(["json"].into_iter());
3148 }
3149
3150 magic.push_message(Cow::Borrowed("JSON text data"));
3151 magic.set_source(Some(HARDCODED_SOURCE));
3152 magic.update_strength(HARDCODED_MAGIC_STRENGTH);
3153 Ok(true)
3154 }
3155
3156 #[inline(always)]
3157 fn try_csv<R: Read + Seek>(
3158 haystack: &mut LazyCache<R>,
3159 stream_kind: StreamKind,
3160 magic: &mut Magic,
3161 ) -> Result<bool, Error> {
3162 let StreamKind::Text(enc) = stream_kind else {
3164 return Ok(false);
3165 };
3166
3167 let buf = haystack.read_range(0..FILE_BYTES_MAX as u64)?;
3168 let mut reader = csv::Reader::from_reader(io::Cursor::new(buf));
3169 let mut records = reader.records();
3170
3171 let Some(Ok(first)) = records.next() else {
3172 return Ok(false);
3173 };
3174
3175 if first.len() <= 1 {
3179 return Ok(false);
3180 }
3181
3182 let mut n = 1;
3184 for i in records.take(9) {
3185 if let Ok(rec) = i {
3186 if first.len() != rec.len() {
3187 return Ok(false);
3188 }
3189 } else {
3190 return Ok(false);
3191 }
3192 n += 1;
3193 }
3194
3195 if n != 10 {
3197 return Ok(false);
3198 }
3199
3200 magic.set_mime_type(Cow::Borrowed("text/csv"));
3201 magic.push_message(Cow::Borrowed("CSV"));
3202 magic.push_message(Cow::Borrowed(enc.as_magic_str()));
3203 magic.push_message(Cow::Borrowed("text"));
3204 magic.insert_extensions(["csv"].into_iter());
3205 magic.set_source(Some(HARDCODED_SOURCE));
3206 magic.update_strength(HARDCODED_MAGIC_STRENGTH);
3207 Ok(true)
3208 }
3209
3210 #[inline(always)]
3211 fn try_tar<R: Read + Seek>(
3212 haystack: &mut LazyCache<R>,
3213 stream_kind: StreamKind,
3214 magic: &mut Magic,
3215 ) -> Result<bool, Error> {
3216 if !matches!(stream_kind, StreamKind::Binary) {
3218 return Ok(false);
3219 }
3220
3221 let buf = haystack.read_range(0..FILE_BYTES_MAX as u64)?;
3222 let mut ar = Archive::new(io::Cursor::new(buf));
3223
3224 let Ok(mut entries) = ar.entries() else {
3225 return Ok(false);
3226 };
3227
3228 let Some(Ok(first)) = entries.next() else {
3229 return Ok(false);
3230 };
3231
3232 let header = first.header();
3233
3234 if header.as_ustar().is_some() {
3235 magic.push_message(Cow::Borrowed("POSIX tar archive"));
3236 } else if header.as_gnu().is_some() {
3237 magic.push_message(Cow::Borrowed("POSIX tar archive (GNU)"));
3238 } else {
3239 magic.push_message(Cow::Borrowed("tar archive"));
3240 }
3241
3242 magic.set_mime_type(Cow::Borrowed("application/x-tar"));
3243 magic.set_source(Some(HARDCODED_SOURCE));
3244 magic.update_strength(HARDCODED_MAGIC_STRENGTH);
3245 magic.insert_extensions(["tar"].into_iter());
3246 Ok(true)
3247 }
3248
3249 #[inline(always)]
3250 fn try_hard_magic<R: Read + Seek>(
3251 haystack: &mut LazyCache<R>,
3252 stream_kind: StreamKind,
3253 magic: &mut Magic,
3254 ) -> Result<bool, Error> {
3255 Ok(Self::try_json(haystack, stream_kind, magic)?
3256 || Self::try_csv(haystack, stream_kind, magic)?
3257 || Self::try_tar(haystack, stream_kind, magic)?)
3258 }
3259
3260 #[inline(always)]
3261 fn magic_default<'m, R: Read + Seek>(
3262 haystack: &mut LazyCache<R>,
3263 stream_kind: StreamKind,
3264 magic: &mut Magic<'m>,
3265 ) -> Result<(), Error> {
3266 let buf = haystack.read_range(0..FILE_BYTES_MAX as u64)?;
3267
3268 magic.set_source(Some(HARDCODED_SOURCE));
3269 magic.set_stream_kind(stream_kind);
3270 magic.is_default = true;
3271
3272 if buf.is_empty() {
3273 magic.push_message(Cow::Borrowed("empty"));
3274 magic.set_mime_type(Cow::Borrowed(DEFAULT_BIN_MIMETYPE));
3275 return Ok(());
3276 }
3277
3278 match stream_kind {
3279 StreamKind::Binary => {
3280 magic.push_message(Cow::Borrowed("data"));
3281 }
3282 StreamKind::Text(e) => {
3283 magic.push_message(Cow::Borrowed(e.as_magic_str()));
3284 magic.push_message(Cow::Borrowed("text"));
3285 }
3286 }
3287
3288 Ok(())
3289 }
3290
3291 pub fn load(&mut self, mf: MagicSource) -> Result<&mut Self, Error> {
3301 for rule in mf.rules.into_iter() {
3302 let mut rule = rule;
3303 rule.set_id(self.next_rule_id());
3304
3305 self.rules.push(rule);
3306 }
3307
3308 self.dependencies.extend(mf.dependencies);
3309 self.prepare();
3310 Ok(self)
3311 }
3312
3313 pub fn rules(&self) -> &[MagicRule] {
3319 &self.rules
3320 }
3321
3322 #[inline]
3323 fn magic_first_with_stream_kind<R: Read + Seek>(
3324 &self,
3325 haystack: &mut LazyCache<R>,
3326 stream_kind: StreamKind,
3327 extension: Option<&str>,
3328 ) -> Result<Magic<'_>, Error> {
3329 let mut magic = Magic::default();
3331
3332 if Self::try_hard_magic(haystack, stream_kind, &mut magic)? {
3333 return Ok(magic);
3334 }
3335
3336 let mut marked = vec![false; self.rules.len()];
3337
3338 macro_rules! do_magic {
3339 ($rule: expr) => {{
3340 $rule.magic_entrypoint(&mut magic, stream_kind, haystack, &self, false, 0)?;
3341
3342 if !magic.message.is_empty() {
3343 magic.set_stream_kind(stream_kind);
3344 magic.set_source($rule.source.as_deref());
3345 return Ok(magic);
3346 }
3347
3348 magic.reset();
3349 }};
3350 }
3351
3352 if let Some(ext) = extension.map(|e| e.to_lowercase()) {
3353 if !ext.is_empty() {
3354 for rule in self.rules.iter().filter(|r| r.extensions.contains(&ext)) {
3355 do_magic!(rule);
3356 if let Some(f) = marked.get_mut(rule.id) {
3357 *f = true
3358 }
3359 }
3360 }
3361 }
3362
3363 for rule in self
3364 .rules
3365 .iter()
3366 .filter(|r| !*marked.get(r.id).unwrap_or(&false))
3368 {
3369 do_magic!(rule)
3370 }
3371
3372 Self::magic_default(haystack, stream_kind, &mut magic)?;
3373
3374 Ok(magic)
3375 }
3376
3377 pub fn magic_first<R: Read + Seek>(
3391 &self,
3392 r: &mut R,
3393 extension: Option<&str>,
3394 ) -> Result<Magic<'_>, Error> {
3395 let mut haystack = Self::open_reader(r)?;
3396 let stream_kind = guess_stream_kind(haystack.read_range(0..FILE_BYTES_MAX as u64)?);
3397 self.magic_first_with_stream_kind(&mut haystack, stream_kind, extension)
3398 }
3399
3400 #[inline(always)]
3401 fn magic_all_sort_with_stream_kind<R: Read + Seek>(
3402 &self,
3403 haystack: &mut LazyCache<R>,
3404 stream_kind: StreamKind,
3405 ) -> Result<Vec<Magic<'_>>, Error> {
3406 let mut out = Vec::new();
3407
3408 let mut magic = Magic::default();
3409
3410 if Self::try_hard_magic(haystack, stream_kind, &mut magic)? {
3411 out.push(magic);
3412 magic = Magic::default();
3413 }
3414
3415 for rule in self.rules.iter() {
3416 rule.magic_entrypoint(&mut magic, stream_kind, haystack, self, false, 0)?;
3417
3418 if !magic.message.is_empty() {
3420 magic.set_stream_kind(stream_kind);
3421 magic.set_source(rule.source.as_deref());
3422 out.push(magic);
3423 magic = Magic::default();
3424 }
3425
3426 magic.reset();
3427 }
3428
3429 Self::magic_default(haystack, stream_kind, &mut magic)?;
3430 out.push(magic);
3431
3432 out.sort_by_key(|b| std::cmp::Reverse(b.strength()));
3433
3434 Ok(out)
3435 }
3436
3437 pub fn magic_all<R: Read + Seek>(&self, r: &mut R) -> Result<Vec<Magic<'_>>, Error> {
3447 let mut haystack = Self::open_reader(r)?;
3448 let stream_kind = guess_stream_kind(haystack.read_range(0..FILE_BYTES_MAX as u64)?);
3449 self.magic_all_sort_with_stream_kind(&mut haystack, stream_kind)
3450 }
3451
3452 #[inline(always)]
3453 fn magic_best_with_stream_kind<R: Read + Seek>(
3454 &self,
3455 haystack: &mut LazyCache<R>,
3456 stream_kind: StreamKind,
3457 ) -> Result<Magic<'_>, Error> {
3458 let magics = self.magic_all_sort_with_stream_kind(haystack, stream_kind)?;
3459
3460 return Ok(magics
3462 .into_iter()
3463 .next()
3464 .expect("magics must at least contain default"));
3465 }
3466
3467 pub fn magic_best<R: Read + Seek>(&self, r: &mut R) -> Result<Magic<'_>, Error> {
3477 let mut haystack = Self::open_reader(r)?;
3478 let stream_kind = guess_stream_kind(haystack.read_range(0..FILE_BYTES_MAX as u64)?);
3479 self.magic_best_with_stream_kind(&mut haystack, stream_kind)
3480 }
3481
3482 pub fn serialize<W: Write>(self, w: &mut W) -> Result<(), Error> {
3488 let mut encoder = GzEncoder::new(w, Compression::best());
3489
3490 bincode::serde::encode_into_std_write(&self, &mut encoder, bincode::config::standard())?;
3491 encoder.finish()?;
3492 Ok(())
3493 }
3494
3495 pub fn deserialize<R: Read>(r: &mut R) -> Result<Self, Error> {
3505 let mut buf = vec![];
3506 let mut gz = GzDecoder::new(r);
3507 gz.read_to_end(&mut buf).map_err(|e| {
3508 bincode::error::DecodeError::OtherString(format!("failed to read: {e}"))
3509 })?;
3510 let (sdb, _): (MagicDb, usize) =
3511 bincode::serde::decode_from_slice(&buf, bincode::config::standard())?;
3512 Ok(sdb)
3513 }
3514
3515 #[inline(always)]
3516 fn prepare(&mut self) {
3517 self.rules
3518 .iter_mut()
3519 .for_each(|r| r.try_finalize(&self.dependencies));
3520
3521 self.rules.sort_by_key(|r| (r.is_text(), -(r.score as i64)));
3523 }
3524}
3525
3526#[cfg(test)]
3527mod tests {
3528 use std::io::Cursor;
3529
3530 use regex::bytes::Regex;
3531
3532 use crate::utils::unix_local_time_to_string;
3533
3534 use super::*;
3535
3536 macro_rules! lazy_cache {
3537 ($l: literal) => {
3538 LazyCache::from_read_seek(Cursor::new($l)).unwrap()
3539 };
3540 }
3541
3542 fn first_magic(
3543 rule: &str,
3544 content: &[u8],
3545 stream_kind: StreamKind,
3546 ) -> Result<Magic<'static>, Error> {
3547 let mut md = MagicDb::new();
3548 md.load(
3549 FileMagicParser::parse_str(rule, None)
3550 .inspect_err(|e| eprintln!("{e}"))
3551 .unwrap(),
3552 )
3553 .unwrap();
3554 let mut reader = LazyCache::from_read_seek(Cursor::new(content)).unwrap();
3555 let v = md.magic_best_with_stream_kind(&mut reader, stream_kind)?;
3556 Ok(v.into_owned())
3557 }
3558
3559 #[allow(unused_macros)]
3561 macro_rules! enable_trace {
3562 () => {
3563 tracing_subscriber::fmt()
3564 .with_max_level(tracing_subscriber::filter::LevelFilter::TRACE)
3565 .try_init();
3566 };
3567 }
3568
3569 macro_rules! parse_assert {
3570 ($rule:literal) => {
3571 FileMagicParser::parse_str($rule, None)
3572 .inspect_err(|e| eprintln!("{e}"))
3573 .unwrap();
3574 };
3575 }
3576
3577 macro_rules! assert_magic_match_bin {
3578 ($rule: literal, $content:literal) => {{ first_magic($rule, $content, StreamKind::Binary).unwrap() }};
3579 ($rule: literal, $content:literal, $message:expr) => {{
3580 assert_eq!(
3581 first_magic($rule, $content, StreamKind::Binary)
3582 .unwrap()
3583 .message(),
3584 $message
3585 );
3586 }};
3587 }
3588
3589 macro_rules! assert_magic_match_text {
3590 ($rule: literal, $content:literal) => {{ first_magic($rule, $content, StreamKind::Text(TextEncoding::Utf8)).unwrap() }};
3591 ($rule: literal, $content:literal, $message:expr) => {{
3592 assert_eq!(
3593 first_magic($rule, $content, StreamKind::Text(TextEncoding::Utf8))
3594 .unwrap()
3595 .message(),
3596 $message
3597 );
3598 }};
3599 }
3600
3601 macro_rules! assert_magic_not_match_text {
3602 ($rule: literal, $content:literal) => {{
3603 assert!(
3604 first_magic($rule, $content, StreamKind::Text(TextEncoding::Utf8))
3605 .unwrap()
3606 .is_default()
3607 );
3608 }};
3609 }
3610
3611 macro_rules! assert_magic_not_match_bin {
3612 ($rule: literal, $content:literal) => {{
3613 assert!(
3614 first_magic($rule, $content, StreamKind::Binary)
3615 .unwrap()
3616 .is_default()
3617 );
3618 }};
3619 }
3620
3621 #[test]
3622 fn test_regex() {
3623 assert_magic_match_text!(
3624 r#"
36250 regex/1024 \^#![[:space:]]*/usr/bin/env[[:space:]]+
3626!:mime text/x-shellscript
3627>&0 regex/64 .*($|\\b) %s shell script text executable
3628 "#,
3629 br#"#!/usr/bin/env bash
3630 echo hello world"#,
3631 "bash shell script text executable"
3633 );
3634
3635 let re = Regex::new(r"(?-u)\x42\x82").unwrap();
3636 assert!(re.is_match(b"\x42\x82"));
3637
3638 assert_magic_match_bin!(
3639 r#"0 regex \x42\x82 binary regex match"#,
3640 b"\x00\x00\x00\x00\x00\x00\x42\x82"
3641 );
3642
3643 assert_magic_match_bin!(
3645 r#"
3646 0 regex \x42\x82
3647 >&0 string \xde\xad\xbe\xef it works
3648 "#,
3649 b"\x00\x00\x00\x00\x00\x00\x42\x82\xde\xad\xbe\xef"
3650 );
3651
3652 assert_magic_match_bin!(
3653 r#"
3654 0 regex/s \x42\x82
3655 >&0 string \x42\x82\xde\xad\xbe\xef it works
3656 "#,
3657 b"\x00\x00\x00\x00\x00\x00\x42\x82\xde\xad\xbe\xef"
3658 );
3659
3660 assert_magic_match_text!(
3662 r#"
36630 regex/1024 \^HelloWorld$ HelloWorld String"#,
3664 br#"
3665// this is a comment after an empty line
3666HelloWorld
3667 "#
3668 );
3669 }
3670
3671 #[test]
3672 fn test_string_with_mods() {
3673 assert_magic_match_text!(
3674 r#"0 string/w #!\ \ \ /usr/bin/env\ bash BASH
3675 "#,
3676 b"#! /usr/bin/env bash i
3677 echo hello world"
3678 );
3679
3680 assert_magic_match_text!(
3682 r#"0 string/C HelloWorld it works
3683 "#,
3684 b"helloworld"
3685 );
3686
3687 assert_magic_not_match_text!(
3688 r#"0 string/C HelloWorld it works
3689 "#,
3690 b"hELLOwORLD"
3691 );
3692
3693 assert_magic_match_text!(
3695 r#"0 string/c HelloWorld it works
3696 "#,
3697 b"HELLOWORLD"
3698 );
3699
3700 assert_magic_not_match_text!(
3701 r#"0 string/c HelloWorld it works
3702 "#,
3703 b"helloworld"
3704 );
3705
3706 assert_magic_match_text!(
3708 r#"0 string/f #!/usr/bin/env\ bash BASH
3709 "#,
3710 b"#!/usr/bin/env bash"
3711 );
3712
3713 assert_magic_not_match_text!(
3714 r#"0 string/f #!/usr/bin/python PYTHON"#,
3715 b"#!/usr/bin/pythonic"
3716 );
3717
3718 assert_magic_match_text!(
3720 r#"0 string/W #!/usr/bin/env\ python PYTHON"#,
3721 b"#!/usr/bin/env python"
3722 );
3723
3724 assert_magic_not_match_text!(
3725 r#"0 string/W #!/usr/bin/env\ \ python PYTHON"#,
3726 b"#!/usr/bin/env python"
3727 );
3728 }
3729
3730 #[test]
3731 fn test_search_with_mods() {
3732 assert_magic_match_text!(
3733 r#"0 search/1/fwt #!\ /usr/bin/luatex LuaTex script text executable"#,
3734 b"#! /usr/bin/luatex "
3735 );
3736
3737 assert_magic_match_text!(
3739 r#"
3740 0 search/s /usr/bin/env
3741 >&0 string /usr/bin/env it works
3742 "#,
3743 b"#!/usr/bin/env python"
3744 );
3745
3746 assert_magic_not_match_text!(
3747 r#"
3748 0 search /usr/bin/env
3749 >&0 string /usr/bin/env it works
3750 "#,
3751 b"#!/usr/bin/env python"
3752 );
3753 }
3754
3755 #[test]
3756 fn test_pstring() {
3757 assert_magic_match_bin!(r#"0 pstring Toast it works"#, b"\x05Toast");
3758
3759 assert_magic_match_bin!(r#"0 pstring Toast %s"#, b"\x05Toast", "Toast");
3760
3761 assert_magic_not_match_bin!(r#"0 pstring Toast Doesn't work"#, b"\x07Toaster");
3762
3763 assert_magic_match_bin!(r#"0 pstring/H Toast it works"#, b"\x00\x05Toast");
3765
3766 assert_magic_match_bin!(r#"0 pstring/HJ Toast it works"#, b"\x00\x07Toast");
3767
3768 assert_magic_match_bin!(r#"0 pstring/HJ Toast %s"#, b"\x00\x07Toast", "Toast");
3769
3770 assert_magic_match_bin!(r#"0 pstring/h Toast it works"#, b"\x05\x00Toast");
3771
3772 assert_magic_match_bin!(r#"0 pstring/hJ Toast it works"#, b"\x07\x00Toast");
3773
3774 assert_magic_match_bin!(r#"0 pstring/L Toast it works"#, b"\x00\x00\x00\x05Toast");
3775
3776 assert_magic_match_bin!(r#"0 pstring/LJ Toast it works"#, b"\x00\x00\x00\x09Toast");
3777
3778 assert_magic_match_bin!(r#"0 pstring/l Toast it works"#, b"\x05\x00\x00\x00Toast");
3779
3780 assert_magic_match_bin!(r#"0 pstring/lJ Toast it works"#, b"\x09\x00\x00\x00Toast");
3781 }
3782
3783 #[test]
3784 fn test_max_recursion() {
3785 let res = first_magic(
3786 r#"0 indirect x"#,
3787 b"#! /usr/bin/luatex ",
3788 StreamKind::Binary,
3789 );
3790 assert!(res.is_err());
3791 let _ = res.inspect_err(|e| {
3792 assert!(matches!(
3793 e.unwrap_localized(),
3794 Error::MaximumRecursion(MAX_RECURSION)
3795 ))
3796 });
3797 }
3798
3799 #[test]
3800 fn test_string_ops() {
3801 assert_magic_match_text!("0 string/b MZ MZ File", b"MZ\0");
3802 assert_magic_match_text!("0 string !MZ Not MZ File", b"AZ\0");
3803 assert_magic_match_text!("0 string >\0 Any String", b"A\0");
3804 assert_magic_match_text!("0 string >Test Any String", b"Test 1\0");
3805 assert_magic_match_text!("0 string <Test Any String", b"\0");
3806 assert_magic_not_match_text!("0 string >Test Any String", b"\0");
3807 }
3808
3809 #[test]
3810 fn test_lestring16() {
3811 assert_magic_match_bin!(
3812 "0 lestring16 abcd Little-endian UTF-16 string",
3813 b"\x61\x00\x62\x00\x63\x00\x64\x00"
3814 );
3815 assert_magic_match_bin!(
3816 "0 lestring16 x %s",
3817 b"\x61\x00\x62\x00\x63\x00\x64\x00\x00",
3818 "abcd"
3819 );
3820 assert_magic_not_match_bin!(
3821 "0 lestring16 abcd Little-endian UTF-16 string",
3822 b"\x00\x61\x00\x62\x00\x63\x00\x64"
3823 );
3824 assert_magic_match_bin!(
3825 "4 lestring16 abcd Little-endian UTF-16 string",
3826 b"\x00\x00\x00\x00\x61\x00\x62\x00\x63\x00\x64\x00"
3827 );
3828 }
3829
3830 #[test]
3831 fn test_bestring16() {
3832 assert_magic_match_bin!(
3833 "0 bestring16 abcd Big-endian UTF-16 string",
3834 b"\x00\x61\x00\x62\x00\x63\x00\x64"
3835 );
3836 assert_magic_match_bin!(
3837 "0 bestring16 x %s",
3838 b"\x00\x61\x00\x62\x00\x63\x00\x64",
3839 "abcd"
3840 );
3841 assert_magic_not_match_bin!(
3842 "0 bestring16 abcd Big-endian UTF-16 string",
3843 b"\x61\x00\x62\x00\x63\x00\x64\x00"
3844 );
3845 assert_magic_match_bin!(
3846 "4 bestring16 abcd Big-endian UTF-16 string",
3847 b"\x00\x00\x00\x00\x00\x61\x00\x62\x00\x63\x00\x64"
3848 );
3849 }
3850
3851 #[test]
3852 fn test_offset_from_end() {
3853 assert_magic_match_bin!("-1 ubyte 0x42 last byte ok", b"\x00\x00\x42");
3854 assert_magic_match_bin!("-2 ubyte 0x41 last byte ok", b"\x00\x41\x00");
3855 }
3856
3857 #[test]
3858 fn test_relative_offset() {
3859 assert_magic_match_bin!(
3860 "
3861 0 ubyte 0x42
3862 >&0 ubyte 0x00
3863 >>&0 ubyte 0x41 third byte ok
3864 ",
3865 b"\x42\x00\x41\x00"
3866 );
3867 }
3868
3869 #[test]
3870 fn test_indirect_offset() {
3871 assert_magic_match_bin!("(0.l) ubyte 0x42 it works", b"\x04\x00\x00\x00\x42");
3872 assert_magic_match_bin!("(0.l+3) ubyte 0x42 it works", b"\x01\x00\x00\x00\x42");
3874 assert_magic_match_bin!(
3876 "(0.l+(4)) ubyte 0x42 it works",
3877 b"\x04\x00\x00\x00\x04\x00\x00\x00\x42"
3878 );
3879 }
3880
3881 #[test]
3882 fn test_use_with_message() {
3883 assert_magic_match_bin!(
3884 r#"
38850 string MZ
3886>0 use mz first match
3887
38880 name mz then second match
3889>0 string MZ
3890"#,
3891 b"MZ\0",
3892 "first match then second match"
3893 );
3894 }
3895
3896 #[test]
3897 fn test_scalar_transform() {
3898 assert_magic_match_bin!("0 ubyte+1 0x1 add works", b"\x00");
3899 assert_magic_match_bin!("0 ubyte-1 0xfe sub works", b"\xff");
3900 assert_magic_match_bin!("0 ubyte%2 0 mod works", b"\x0a");
3901 assert_magic_match_bin!("0 ubyte&0x0f 0x0f bitand works", b"\xff");
3902 assert_magic_match_bin!("0 ubyte|0x0f 0xff bitor works", b"\xf0");
3903 assert_magic_match_bin!("0 ubyte^0x0f 0xf0 bitxor works", b"\xff");
3904
3905 FileMagicParser::parse_str("0 ubyte%0 mod by zero", None)
3906 .expect_err("expect div by zero error");
3907 FileMagicParser::parse_str("0 ubyte/0 div by zero", None)
3908 .expect_err("expect div by zero error");
3909 }
3910
3911 #[test]
3912 fn test_belong() {
3913 assert_magic_match_bin!("0 belong 0x12345678 Big-endian long", b"\x12\x34\x56\x78");
3915 assert_magic_not_match_bin!("0 belong 0x12345678 Big-endian long", b"\x78\x56\x34\x12");
3917 assert_magic_match_bin!(
3919 "4 belong 0x12345678 Big-endian long",
3920 b"\x00\x00\x00\x00\x12\x34\x56\x78"
3921 );
3922 assert_magic_match_bin!("0 belong <0x12345678 Big-endian long", b"\x12\x34\x56\x77");
3924 assert_magic_not_match_bin!("0 belong <0x12345678 Big-endian long", b"\x12\x34\x56\x78");
3925
3926 assert_magic_match_bin!("0 belong >0x12345678 Big-endian long", b"\x12\x34\x56\x79");
3928 assert_magic_not_match_bin!("0 belong >0x12345678 Big-endian long", b"\x12\x34\x56\x78");
3929
3930 assert_magic_match_bin!("0 belong &0x5678 Big-endian long", b"\x00\x00\x56\x78");
3932 assert_magic_not_match_bin!("0 belong &0x0000FFFF Big-endian long", b"\x12\x34\x56\x78");
3933
3934 assert_magic_match_bin!("0 belong ^0xFFFF0000 Big-endian long", b"\x00\x00\x56\x78");
3936 assert_magic_not_match_bin!("0 belong ^0xFFFF0000 Big-endian long", b"\x00\x01\x56\x78");
3937
3938 assert_magic_match_bin!("0 belong ~0x12345678 Big-endian long", b"\xed\xcb\xa9\x87");
3940 assert_magic_not_match_bin!("0 belong ~0x12345678 Big-endian long", b"\x12\x34\x56\x78");
3941
3942 assert_magic_match_bin!("0 belong x Big-endian long", b"\x12\x34\x56\x78");
3944 assert_magic_match_bin!("0 belong x Big-endian long", b"\x78\x56\x34\x12");
3945 }
3946
3947 #[test]
3948 fn test_parse_search() {
3949 parse_assert!("0 search test");
3950 parse_assert!("0 search/24/s test");
3951 parse_assert!("0 search/s/24 test");
3952 }
3953
3954 #[test]
3955 fn test_bedate() {
3956 assert_magic_match_bin!(
3957 "0 bedate 946684800 Unix date (Jan 1, 2000)",
3958 b"\x38\x6D\x43\x80"
3959 );
3960 assert_magic_not_match_bin!(
3961 "0 bedate 946684800 Unix date (Jan 1, 2000)",
3962 b"\x00\x00\x00\x00"
3963 );
3964 assert_magic_match_bin!(
3965 "4 bedate 946684800 %s",
3966 b"\x00\x00\x00\x00\x38\x6D\x43\x80",
3967 "2000-01-01 00:00:00"
3968 );
3969 }
3970 #[test]
3971 fn test_beldate() {
3972 assert_magic_match_bin!(
3973 "0 beldate 946684800 Local date (Jan 1, 2000)",
3974 b"\x38\x6D\x43\x80"
3975 );
3976 assert_magic_not_match_bin!(
3977 "0 beldate 946684800 Local date (Jan 1, 2000)",
3978 b"\x00\x00\x00\x00"
3979 );
3980
3981 assert_magic_match_bin!(
3982 "4 beldate 946684800 {}",
3983 b"\x00\x00\x00\x00\x38\x6D\x43\x80",
3984 unix_local_time_to_string(946684800)
3985 );
3986 }
3987
3988 #[test]
3989 fn test_beqdate() {
3990 assert_magic_match_bin!(
3991 "0 beqdate 946684800 Unix date (Jan 1, 2000)",
3992 b"\x00\x00\x00\x00\x38\x6D\x43\x80"
3993 );
3994
3995 assert_magic_not_match_bin!(
3996 "0 beqdate 946684800 Unix date (Jan 1, 2000)",
3997 b"\x00\x00\x00\x00\x00\x00\x00\x00"
3998 );
3999
4000 assert_magic_match_bin!(
4001 "0 beqdate 946684800 %s",
4002 b"\x00\x00\x00\x00\x38\x6D\x43\x80",
4003 "2000-01-01 00:00:00"
4004 );
4005 }
4006
4007 #[test]
4008 fn test_medate() {
4009 assert_magic_match_bin!(
4010 "0 medate 946684800 Unix date (Jan 1, 2000)",
4011 b"\x6D\x38\x80\x43"
4012 );
4013
4014 assert_magic_not_match_bin!(
4015 "0 medate 946684800 Unix date (Jan 1, 2000)",
4016 b"\x00\x00\x00\x00"
4017 );
4018
4019 assert_magic_match_bin!(
4020 "4 medate 946684800 %s",
4021 b"\x00\x00\x00\x00\x6D\x38\x80\x43",
4022 "2000-01-01 00:00:00"
4023 );
4024 }
4025
4026 #[test]
4027 fn test_meldate() {
4028 assert_magic_match_bin!(
4029 "0 meldate 946684800 Local date (Jan 1, 2000)",
4030 b"\x6D\x38\x80\x43"
4031 );
4032 assert_magic_not_match_bin!(
4033 "0 meldate 946684800 Local date (Jan 1, 2000)",
4034 b"\x00\x00\x00\x00"
4035 );
4036
4037 assert_magic_match_bin!(
4038 "4 meldate 946684800 %s",
4039 b"\x00\x00\x00\x00\x6D\x38\x80\x43",
4040 unix_local_time_to_string(946684800)
4041 );
4042 }
4043
4044 #[test]
4045 fn test_date() {
4046 assert_magic_match_bin!(
4047 "0 date 946684800 Local date (Jan 1, 2000)",
4048 b"\x80\x43\x6D\x38"
4049 );
4050 assert_magic_not_match_bin!(
4051 "0 date 946684800 Local date (Jan 1, 2000)",
4052 b"\x00\x00\x00\x00"
4053 );
4054 assert_magic_match_bin!(
4055 "4 date 946684800 {}",
4056 b"\x00\x00\x00\x00\x80\x43\x6D\x38",
4057 "2000-01-01 00:00:00"
4058 );
4059 }
4060
4061 #[test]
4062 fn test_leldate() {
4063 assert_magic_match_bin!(
4064 "0 leldate 946684800 Local date (Jan 1, 2000)",
4065 b"\x80\x43\x6D\x38"
4066 );
4067 assert_magic_not_match_bin!(
4068 "0 leldate 946684800 Local date (Jan 1, 2000)",
4069 b"\x00\x00\x00\x00"
4070 );
4071 assert_magic_match_bin!(
4072 "4 leldate 946684800 {}",
4073 b"\x00\x00\x00\x00\x80\x43\x6D\x38",
4074 unix_local_time_to_string(946684800)
4075 );
4076 }
4077
4078 #[test]
4079 fn test_leqdate() {
4080 assert_magic_match_bin!(
4081 "0 leqdate 1577836800 Unix date (Jan 1, 2020)",
4082 b"\x00\xe1\x0b\x5E\x00\x00\x00\x00"
4083 );
4084
4085 assert_magic_not_match_bin!(
4086 "0 leqdate 1577836800 Unix date (Jan 1, 2020)",
4087 b"\x00\x00\x00\x00\x00\x00\x00\x00"
4088 );
4089 assert_magic_match_bin!(
4090 "8 leqdate 1577836800 %s",
4091 b"\x00\x00\x00\x00\x00\x00\x00\x00\x00\xE1\x0B\x5E\x00\x00\x00\x00",
4092 "2020-01-01 00:00:00"
4093 );
4094 }
4095
4096 #[test]
4097 fn test_leqldate() {
4098 assert_magic_match_bin!(
4099 "0 leqldate 1577836800 Unix date (Jan 1, 2020)",
4100 b"\x00\xe1\x0b\x5E\x00\x00\x00\x00"
4101 );
4102
4103 assert_magic_not_match_bin!(
4104 "0 leqldate 1577836800 Unix date (Jan 1, 2020)",
4105 b"\x00\x00\x00\x00\x00\x00\x00\x00"
4106 );
4107 assert_magic_match_bin!(
4108 "8 leqldate 1577836800 %s",
4109 b"\x00\x00\x00\x00\x00\x00\x00\x00\x00\xE1\x0B\x5E\x00\x00\x00\x00",
4110 unix_local_time_to_string(1577836800)
4111 );
4112 }
4113
4114 #[test]
4115 fn test_melong() {
4116 assert_magic_match_bin!(
4118 "0 melong =0x12345678 Middle-endian long",
4119 b"\x34\x12\x78\x56"
4120 );
4121 assert_magic_not_match_bin!(
4122 "0 melong =0x12345678 Middle-endian long",
4123 b"\x00\x00\x00\x00"
4124 );
4125
4126 assert_magic_match_bin!(
4128 "0 melong <0x12345678 Middle-endian long",
4129 b"\x34\x12\x78\x55"
4130 ); assert_magic_not_match_bin!(
4132 "0 melong <0x12345678 Middle-endian long",
4133 b"\x34\x12\x78\x56"
4134 ); assert_magic_match_bin!(
4138 "0 melong >0x12345678 Middle-endian long",
4139 b"\x34\x12\x78\x57"
4140 ); assert_magic_not_match_bin!(
4142 "0 melong >0x12345678 Middle-endian long",
4143 b"\x34\x12\x78\x56"
4144 ); assert_magic_match_bin!("0 melong &0x5678 Middle-endian long", b"\xab\xcd\x78\x56"); assert_magic_not_match_bin!(
4149 "0 melong &0x0000FFFF Middle-endian long",
4150 b"\x34\x12\x78\x56"
4151 ); assert_magic_match_bin!(
4155 "0 melong ^0xFFFF0000 Middle-endian long",
4156 b"\x00\x00\x78\x56"
4157 ); assert_magic_not_match_bin!(
4159 "0 melong ^0xFFFF0000 Middle-endian long",
4160 b"\x00\x01\x78\x56"
4161 ); assert_magic_match_bin!(
4165 "0 melong ~0x12345678 Middle-endian long",
4166 b"\xCB\xED\x87\xA9"
4167 );
4168 assert_magic_not_match_bin!(
4169 "0 melong ~0x12345678 Middle-endian long",
4170 b"\x34\x12\x78\x56"
4171 ); assert_magic_match_bin!("0 melong x Middle-endian long", b"\x34\x12\x78\x56");
4175 assert_magic_match_bin!("0 melong x Middle-endian long", b"\x00\x00\x00\x00");
4176 }
4177
4178 #[test]
4179 fn test_uquad() {
4180 assert_magic_match_bin!(
4182 "0 uquad =0x123456789ABCDEF0 Unsigned quad",
4183 b"\xF0\xDE\xBC\x9A\x78\x56\x34\x12"
4184 );
4185 assert_magic_not_match_bin!(
4186 "0 uquad =0x123456789ABCDEF0 Unsigned quad",
4187 b"\x00\x00\x00\x00\x00\x00\x00\x00"
4188 );
4189
4190 assert_magic_match_bin!(
4192 "0 uquad <0x123456789ABCDEF0 Unsigned quad",
4193 b"\xF0\xDE\xBC\x9A\x78\x56\x34\x11"
4194 );
4195 assert_magic_not_match_bin!(
4196 "0 uquad <0x123456789ABCDEF0 Unsigned quad",
4197 b"\xF0\xDE\xBC\x9A\x78\x56\x34\x12"
4198 );
4199
4200 assert_magic_match_bin!(
4202 "0 uquad >0x123456789ABCDEF0 Unsigned quad",
4203 b"\xF0\xDE\xBC\x9A\x78\x56\x34\x13"
4204 );
4205 assert_magic_not_match_bin!(
4206 "0 uquad >0x123456789ABCDEF0 Unsigned quad",
4207 b"\xF0\xDE\xBC\x9A\x78\x56\x34\x12"
4208 );
4209
4210 assert_magic_match_bin!(
4212 "0 uquad &0xF0 Unsigned quad",
4213 b"\xF0\xDE\xBC\x9A\x78\x56\x34\x12"
4214 );
4215 assert_magic_not_match_bin!(
4216 "0 uquad &0xFF Unsigned quad",
4217 b"\xF0\xDE\xBC\x9A\x78\x56\x34\x12"
4218 );
4219
4220 assert_magic_match_bin!(
4222 "0 uquad ^0xFFFFFFFFFFFFFFFF Unsigned quad",
4223 b"\x00\x00\x00\x00\x00\x00\x00\x00"
4224 ); assert_magic_not_match_bin!(
4226 "0 uquad ^0xFFFFFFFFFFFFFFFF Unsigned quad",
4227 b"\xF0\xDE\xBC\x9A\x78\x56\x34\x12"
4228 ); assert_magic_match_bin!(
4232 "0 uquad ~0x123456789ABCDEF0 Unsigned quad",
4233 b"\x0F\x21\x43\x65\x87\xA9\xCB\xED"
4234 );
4235 assert_magic_not_match_bin!(
4236 "0 uquad ~0x123456789ABCDEF0 Unsigned quad",
4237 b"\xF0\xDE\xBC\x9A\x78\x56\x34\x12"
4238 ); assert_magic_match_bin!(
4242 "0 uquad x {:#x}",
4243 b"\xF0\xDE\xBC\x9A\x78\x56\x34\x12",
4244 "0x123456789abcdef0"
4245 );
4246 assert_magic_match_bin!(
4247 "0 uquad x Unsigned quad",
4248 b"\x00\x00\x00\x00\x00\x00\x00\x00"
4249 );
4250 }
4251
4252 #[test]
4253 fn test_guid() {
4254 assert_magic_match_bin!(
4255 "0 guid EC959539-6786-2D4E-8FDB-98814CE76C1E It works",
4256 b"\xEC\x95\x95\x39\x67\x86\x2D\x4E\x8F\xDB\x98\x81\x4C\xE7\x6C\x1E"
4257 );
4258
4259 assert_magic_not_match_bin!(
4260 "0 guid 399595EC-8667-4E2D-8FDB-98814CE76C1E It works",
4261 b"\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F"
4262 );
4263
4264 assert_magic_match_bin!(
4265 "0 guid x %s",
4266 b"\xEC\x95\x95\x39\x67\x86\x2D\x4E\x8F\xDB\x98\x81\x4C\xE7\x6C\x1E",
4267 "EC959539-6786-2D4E-8FDB-98814CE76C1E"
4268 );
4269 }
4270
4271 #[test]
4272 fn test_ubeqdate() {
4273 assert_magic_match_bin!(
4274 "0 ubeqdate 1633046400 It works",
4275 b"\x00\x00\x00\x00\x61\x56\x4f\x80"
4276 );
4277
4278 assert_magic_match_bin!(
4279 "0 ubeqdate x %s",
4280 b"\x00\x00\x00\x00\x61\x56\x4f\x80",
4281 "2021-10-01 00:00:00"
4282 );
4283
4284 assert_magic_not_match_bin!(
4285 "0 ubeqdate 1633046400 It should not work",
4286 b"\x00\x00\x00\x00\x00\x00\x00\x00"
4287 );
4288 }
4289
4290 #[test]
4291 fn test_ldate() {
4292 assert_magic_match_bin!("0 ldate 1640551520 It works", b"\x60\xd4\xC8\x61");
4293
4294 assert_magic_not_match_bin!("0 ldate 1633046400 It should not work", b"\x00\x00\x00\x00");
4295
4296 assert_magic_match_bin!(
4297 "0 ldate x %s",
4298 b"\x60\xd4\xC8\x61",
4299 unix_local_time_to_string(1640551520)
4300 );
4301 }
4302
4303 #[test]
4304 fn test_scalar_with_transform() {
4305 assert_magic_match_bin!("0 ubyte/10 2 {}", b"\x14", "2");
4306 assert_magic_match_bin!("0 ubyte/10 x {}", b"\x14", "2");
4307 assert_magic_match_bin!("0 ubyte%10 x {}", b"\x14", "0");
4308 }
4309
4310 #[test]
4311 fn test_float_with_transform() {
4312 assert_magic_match_bin!("0 lefloat/10 2 {}", b"\x00\x00\xa0\x41", "2");
4313 assert_magic_match_bin!("0 lefloat/10 x {}", b"\x00\x00\xa0\x41", "2");
4314 assert_magic_match_bin!("0 lefloat%10 x {}", b"\x00\x00\xa0\x41", "0");
4315 }
4316
4317 #[test]
4318 fn test_read_octal() {
4319 assert_eq!(read_octal_u64(&mut lazy_cache!("0")), Some(0));
4321 assert_eq!(read_octal_u64(&mut lazy_cache!("00")), Some(0));
4322 assert_eq!(read_octal_u64(&mut lazy_cache!("01")), Some(1));
4323 assert_eq!(read_octal_u64(&mut lazy_cache!("07")), Some(7));
4324 assert_eq!(read_octal_u64(&mut lazy_cache!("010")), Some(8));
4325 assert_eq!(read_octal_u64(&mut lazy_cache!("0123")), Some(83));
4326 assert_eq!(read_octal_u64(&mut lazy_cache!("0755")), Some(493));
4327
4328 assert_eq!(read_octal_u64(&mut lazy_cache!("0ABC")), Some(0));
4330 assert_eq!(read_octal_u64(&mut lazy_cache!("01ABC")), Some(1));
4331 assert_eq!(read_octal_u64(&mut lazy_cache!("0755ABC")), Some(493));
4332 assert_eq!(read_octal_u64(&mut lazy_cache!("0123ABC")), Some(83));
4333
4334 assert_eq!(read_octal_u64(&mut lazy_cache!("08")), Some(0)); assert_eq!(read_octal_u64(&mut lazy_cache!("01238")), Some(83)); assert_eq!(read_octal_u64(&mut lazy_cache!("123")), None);
4340 assert_eq!(read_octal_u64(&mut lazy_cache!("755")), None);
4341
4342 assert_eq!(read_octal_u64(&mut lazy_cache!("")), None);
4344
4345 assert_eq!(read_octal_u64(&mut lazy_cache!("ABC")), None);
4347 assert_eq!(read_octal_u64(&mut lazy_cache!("8ABC")), None); assert_eq!(
4351 read_octal_u64(&mut lazy_cache!("01777777777")),
4352 Some(268435455)
4353 );
4354 }
4355
4356 #[test]
4357 fn test_offset_bug_1() {
4358 assert_magic_match_bin!(
4361 r"
43621 string TEST Bread is
4363# offset computation is relative to
4364# rule start
4365>(5.b) use toasted
4366
43670 name toasted
4368>0 string twice Toasted
4369>>0 use toasted_twice
4370
43710 name toasted_twice
4372>(6.b) string x %s
4373 ",
4374 b"\x00TEST\x06twice\x00\x06",
4375 "Bread is Toasted twice"
4376 );
4377 }
4378
4379 #[test]
4385 fn test_offset_bug_2() {
4386 assert_magic_match_bin!(
4389 r"
4390-12 string TEST Bread is
4391>(4.b) use toasted
4392
43930 name toasted
4394>0 string twice Toasted
4395>>0 use toasted_twice
4396
43970 name toasted_twice
4398>(6.b) string x %
4399 ",
4400 b"\x00TEST\x06twice\x00\x06",
4401 "Bread is Toasted twice"
4402 )
4403 }
4404
4405 #[test]
4406 fn test_offset_bug_3() {
4407 assert_magic_match_bin!(
4410 r"
44111 string TEST Bread is
4412>(5.b) indirect/r x
4413
44140 string twice Toasted
4415>0 use toasted_twice
4416
44170 name toasted_twice
4418>0 string x %s
4419 ",
4420 b"\x00TEST\x06twice\x00\x08",
4421 "Bread is Toasted twice"
4422 )
4423 }
4424
4425 #[test]
4426 fn test_offset_bug_4() {
4427 assert_magic_match_bin!(
4430 r"
44311 string Bread %s
4432>(6.b) indirect/r x
4433
4434# this one uses a based offset
4435# computed at indirection
44361 string is\ Toasted %s
4437>(11.b) use toasted_twice
4438
4439# this one is using a new base
4440# offset being previous base
4441# offset + offset of use
44420 name toasted_twice
4443>0 string x %s
4444 ",
4445 b"\x00Bread\x06is Toasted\x0ctwice\x00",
4446 "Bread is Toasted twice"
4447 )
4448 }
4449
4450 #[test]
4451 fn test_offset_bug_5() {
4452 assert_magic_match_bin!(
4453 r"
44541 string TEST Bread is
4455>(5.b) indirect/r x
4456
44570 string twice Toasted
4458>0 use toasted_twice
4459
44600 name toasted_twice
4461>0 string twice
4462>>&1 byte 0x08 twice
4463 ",
4464 b"\x00TEST\x06twice\x00\x08",
4465 "Bread is Toasted twice"
4466 )
4467 }
4468}