1#![forbid(unsafe_code)]
2#![deny(unused_imports)]
3use dyf::{DynDisplay, FormatString, dformat};
137use flagset::{FlagSet, flags};
138use flate2::{Compression, read::GzDecoder, write::GzEncoder};
139use lazy_cache::LazyCache;
140use memchr::memchr;
141use pest::{Span, error::ErrorVariant};
142use regex::bytes::{self};
143use serde::{Deserialize, Serialize};
144use std::{
145 borrow::Cow,
146 cmp::max,
147 collections::{HashMap, HashSet},
148 fmt::{self, Debug, Display},
149 io::{self, Read, Seek, SeekFrom, Write},
150 ops::{Add, BitAnd, BitOr, BitXor, Div, Mul, Rem, Sub},
151 path::Path,
152};
153use tar::Archive;
154use thiserror::Error;
155use tracing::{Level, debug, enabled, trace};
156
157use crate::{
158 numeric::{Float, FloatDataType, Scalar, ScalarDataType},
159 parser::{FileMagicParser, Rule},
160 utils::{decode_id3, find_json_boundaries},
161};
162
163mod numeric;
164mod parser;
165mod utils;
166
167const HARDCODED_MAGIC_STRENGTH: u64 = 2048;
168const HARDCODED_SOURCE: &str = "hardcoded";
169const MAX_RECURSION: usize = 50;
171pub const FILE_BYTES_MAX: usize = 7 * 1024 * 1024;
173const FILE_REGEX_MAX: usize = 8192;
175
176pub const DEFAULT_BIN_MIMETYPE: &str = "application/octet-stream";
177pub const DEFAULT_TEXT_MIMETYPE: &str = "text/plain";
178
179pub(crate) const TIMESTAMP_FORMAT: &str = "%Y-%m-%d %H:%M:%S";
180
181macro_rules! debug_panic {
182 ($($arg:tt)*) => {
183 if cfg!(debug_assertions) {
184 panic!($($arg)*);
185 }
186 };
187}
188
189macro_rules! read {
190 ($r: expr, $ty: ty) => {{
191 let mut a = [0u8; std::mem::size_of::<$ty>()];
192 $r.read_exact(&mut a)?;
193 a
194 }};
195}
196
197macro_rules! read_le {
198 ($r:expr, $ty: ty ) => {{ <$ty>::from_le_bytes(read!($r, $ty)) }};
199}
200
201macro_rules! read_be {
202 ($r:expr, $ty: ty ) => {{ <$ty>::from_be_bytes(read!($r, $ty)) }};
203}
204
205macro_rules! read_me {
206 ($r: expr) => {{ ((read_le!($r, u16) as i32) << 16) | (read_le!($r, u16) as i32) }};
207}
208
209#[inline(always)]
210fn read_octal_u64<R: Read + Seek>(haystack: &mut LazyCache<R>) -> Option<u64> {
211 let s = haystack
212 .read_while_or_limit(|b| matches!(b, b'0'..=b'7'), 22)
213 .map(|buf| str::from_utf8(buf))
214 .ok()?
215 .ok()?;
216
217 if !s.starts_with("0") {
218 return None;
219 }
220
221 u64::from_str_radix(s, 8).ok()
222}
223
224#[derive(Debug, Error)]
226pub enum Error {
227 #[error("{0}")]
229 Msg(String),
230
231 #[error("source={0} line={1} error={2}")]
233 Localized(String, usize, Box<Error>),
234
235 #[error("missing rule: {0}")]
237 MissingRule(String),
238
239 #[error("maximum recursion reached: {0}")]
241 MaximumRecursion(usize),
242
243 #[error("io: {0}")]
245 Io(#[from] io::Error),
246
247 #[error("parser error: {0}")]
249 Parse(#[from] Box<pest::error::Error<Rule>>),
250
251 #[error("formatting: {0}")]
253 Format(#[from] dyf::Error),
254
255 #[error("regex: {0}")]
257 Regex(#[from] regex::Error),
258
259 #[error("{0}")]
261 Serialize(#[from] bincode::error::EncodeError),
262
263 #[error("{0}")]
265 Deserialize(#[from] bincode::error::DecodeError),
266}
267
268impl Error {
269 #[inline]
270 fn parser<S: ToString>(msg: S, span: Span<'_>) -> Self {
271 Self::Parse(Box::new(pest::error::Error::new_from_span(
272 ErrorVariant::CustomError {
273 message: msg.to_string(),
274 },
275 span,
276 )))
277 }
278
279 fn msg<M: AsRef<str>>(msg: M) -> Self {
280 Self::Msg(msg.as_ref().into())
281 }
282
283 fn localized<S: AsRef<str>>(source: S, line: usize, err: Error) -> Self {
284 Self::Localized(source.as_ref().into(), line, err.into())
285 }
286
287 pub fn unwrap_localized(&self) -> &Self {
289 match self {
290 Self::Localized(_, _, e) => e,
291 _ => self,
292 }
293 }
294}
295
296#[derive(Debug, Clone, Serialize, Deserialize)]
297enum Message {
298 String(String),
299 Format {
300 printf_spec: String,
301 fs: FormatString,
302 },
303}
304
305impl Display for Message {
306 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
307 match self {
308 Self::String(s) => write!(f, "{s}"),
309 Self::Format { printf_spec: _, fs } => write!(f, "{}", fs.to_string_lossy()),
310 }
311 }
312}
313
314impl Message {
315 fn to_string_lossy(&self) -> Cow<'_, str> {
316 match self {
317 Message::String(s) => Cow::Borrowed(s),
318 Message::Format { printf_spec: _, fs } => fs.to_string_lossy(),
319 }
320 }
321
322 #[inline(always)]
323 fn format_with(&self, mr: Option<&MatchRes>) -> Result<Cow<'_, str>, Error> {
324 match self {
325 Self::String(s) => Ok(Cow::Borrowed(s.as_str())),
326 Self::Format {
327 printf_spec: c_spec,
328 fs,
329 } => {
330 if let Some(mr) = mr {
331 match mr {
332 MatchRes::Float(_, _) | MatchRes::Bytes(_, _, _, _) => {
333 Ok(Cow::Owned(dformat!(fs, mr)?))
334 }
335 MatchRes::Scalar(_, scalar) => {
336 if c_spec.as_str() == "c" {
338 match scalar {
339 Scalar::byte(b) => {
340 let b = (*b as u8) as char;
341 Ok(Cow::Owned(dformat!(fs, b)?))
342 }
343 Scalar::ubyte(b) => {
344 let b = *b as char;
345 Ok(Cow::Owned(dformat!(fs, b)?))
346 }
347 _ => Ok(Cow::Owned(dformat!(fs, mr)?)),
348 }
349 } else {
350 Ok(Cow::Owned(dformat!(fs, mr)?))
351 }
352 }
353 }
354 } else {
355 Ok(fs.to_string_lossy())
356 }
357 }
358 }
359 }
360}
361
362impl ScalarDataType {
363 #[inline(always)]
364 fn read<R: Read + Seek>(&self, from: &mut R, switch_endianness: bool) -> Result<Scalar, Error> {
365 macro_rules! _read_le {
366 ($ty: ty) => {{
367 if switch_endianness {
368 <$ty>::from_be_bytes(read!(from, $ty))
369 } else {
370 <$ty>::from_le_bytes(read!(from, $ty))
371 }
372 }};
373 }
374
375 macro_rules! _read_be {
376 ($ty: ty) => {{
377 if switch_endianness {
378 <$ty>::from_le_bytes(read!(from, $ty))
379 } else {
380 <$ty>::from_be_bytes(read!(from, $ty))
381 }
382 }};
383 }
384
385 macro_rules! _read_ne {
386 ($ty: ty) => {{
387 if cfg!(target_endian = "big") {
388 _read_be!($ty)
389 } else {
390 _read_le!($ty)
391 }
392 }};
393 }
394
395 macro_rules! _read_me {
396 () => {
397 ((_read_le!(u16) as i32) << 16) | (_read_le!(u16) as i32)
398 };
399 }
400
401 Ok(match self {
402 Self::byte => Scalar::byte(read!(from, u8)[0] as i8),
404 Self::short => Scalar::short(_read_ne!(i16)),
405 Self::long => Scalar::long(_read_ne!(i32)),
406 Self::date => Scalar::date(_read_ne!(i32)),
407 Self::ldate => Scalar::ldate(_read_ne!(i32)),
408 Self::qwdate => Scalar::qwdate(_read_ne!(i64)),
409 Self::leshort => Scalar::leshort(_read_le!(i16)),
410 Self::lelong => Scalar::lelong(_read_le!(i32)),
411 Self::lequad => Scalar::lequad(_read_le!(i64)),
412 Self::bequad => Scalar::bequad(_read_be!(i64)),
413 Self::belong => Scalar::belong(_read_be!(i32)),
414 Self::bedate => Scalar::bedate(_read_be!(i32)),
415 Self::beldate => Scalar::beldate(_read_be!(i32)),
416 Self::beqdate => Scalar::beqdate(_read_be!(i64)),
417 Self::ubyte => Scalar::ubyte(read!(from, u8)[0]),
419 Self::ushort => Scalar::ushort(_read_ne!(u16)),
420 Self::uleshort => Scalar::uleshort(_read_le!(u16)),
421 Self::ulelong => Scalar::ulelong(_read_le!(u32)),
422 Self::uledate => Scalar::uledate(_read_le!(u32)),
423 Self::ulequad => Scalar::ulequad(_read_le!(u64)),
424 Self::offset => Scalar::offset(from.stream_position()?),
425 Self::ubequad => Scalar::ubequad(_read_be!(u64)),
426 Self::medate => Scalar::medate(_read_me!()),
427 Self::meldate => Scalar::meldate(_read_me!()),
428 Self::melong => Scalar::melong(_read_me!()),
429 Self::beshort => Scalar::beshort(_read_be!(i16)),
430 Self::quad => Scalar::quad(_read_ne!(i64)),
431 Self::uquad => Scalar::uquad(_read_ne!(u64)),
432 Self::ledate => Scalar::ledate(_read_le!(i32)),
433 Self::leldate => Scalar::leldate(_read_le!(i32)),
434 Self::leqdate => Scalar::leqdate(_read_le!(i64)),
435 Self::leqldate => Scalar::leqldate(_read_le!(i64)),
436 Self::leqwdate => Scalar::leqwdate(_read_le!(i64)),
437 Self::ubelong => Scalar::ubelong(_read_be!(u32)),
438 Self::ulong => Scalar::ulong(_read_ne!(u32)),
439 Self::ubeshort => Scalar::ubeshort(_read_be!(u16)),
440 Self::ubeqdate => Scalar::ubeqdate(_read_be!(u64)),
441 Self::lemsdosdate => Scalar::lemsdosdate(_read_le!(u16)),
442 Self::lemsdostime => Scalar::lemsdostime(_read_le!(u16)),
443 Self::guid => Scalar::guid(u128::from_be_bytes(read!(from, u128))),
444 })
445 }
446}
447
448impl FloatDataType {
449 #[inline(always)]
450 fn read<R: Read + Seek>(&self, from: &mut R, switch_endianness: bool) -> Result<Float, Error> {
451 macro_rules! _read_le {
452 ($ty: ty) => {{
453 if switch_endianness {
454 <$ty>::from_be_bytes(read!(from, $ty))
455 } else {
456 <$ty>::from_le_bytes(read!(from, $ty))
457 }
458 }};
459 }
460
461 macro_rules! _read_be {
462 ($ty: ty) => {{
463 if switch_endianness {
464 <$ty>::from_le_bytes(read!(from, $ty))
465 } else {
466 <$ty>::from_be_bytes(read!(from, $ty))
467 }
468 }};
469 }
470
471 macro_rules! _read_ne {
472 ($ty: ty) => {{
473 if cfg!(target_endian = "big") {
474 _read_be!($ty)
475 } else {
476 _read_le!($ty)
477 }
478 }};
479 }
480
481 macro_rules! _read_me {
482 () => {
483 ((_read_le!(u16) as i32) << 16) | (_read_le!(u16) as i32)
484 };
485 }
486
487 Ok(match self {
488 Self::lefloat => Float::lefloat(_read_le!(f32)),
489 Self::befloat => Float::befloat(_read_le!(f32)),
490 Self::ledouble => Float::ledouble(_read_le!(f64)),
491 Self::bedouble => Float::bedouble(_read_be!(f64)),
492 })
493 }
494}
495
496#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
497enum Op {
498 Mul,
499 Add,
500 Sub,
501 Div,
502 Mod,
503 And,
504 Xor,
505 Or,
506}
507
508impl Display for Op {
509 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
510 match self {
511 Op::Mul => write!(f, "*"),
512 Op::Add => write!(f, "+"),
513 Op::Sub => write!(f, "-"),
514 Op::Div => write!(f, "/"),
515 Op::Mod => write!(f, "%"),
516 Op::And => write!(f, "&"),
517 Op::Or => write!(f, "|"),
518 Op::Xor => write!(f, "^"),
519 }
520 }
521}
522
523#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
524enum CmpOp {
525 Eq,
526 Lt,
527 Gt,
528 BitAnd,
529 Neq, Xor,
531 Not, }
533
534impl CmpOp {
535 #[inline(always)]
536 fn is_neq(&self) -> bool {
537 matches!(self, Self::Neq)
538 }
539}
540
541#[derive(Debug, Clone, Serialize, Deserialize)]
542struct ScalarTransform {
543 op: Op,
544 num: Scalar,
545}
546
547impl ScalarTransform {
548 fn apply(&self, s: Scalar) -> Option<Scalar> {
549 match self.op {
550 Op::Add => s.checked_add(self.num),
551 Op::Sub => s.checked_sub(self.num),
552 Op::Mul => s.checked_mul(self.num),
553 Op::Div => s.checked_div(self.num),
554 Op::Mod => s.checked_rem(self.num),
555 Op::And => Some(s.bitand(self.num)),
556 Op::Xor => Some(s.bitxor(self.num)),
557 Op::Or => Some(s.bitor(self.num)),
558 }
559 }
560}
561
562#[derive(Debug, Clone, Serialize, Deserialize)]
563struct FloatTransform {
564 op: Op,
565 num: Float,
566}
567
568impl FloatTransform {
569 fn apply(&self, s: Float) -> Float {
570 match self.op {
571 Op::Add => s.add(self.num),
572 Op::Sub => s.sub(self.num),
573 Op::Mul => s.mul(self.num),
574 Op::Div => s.div(self.num),
576 Op::Mod => s.rem(self.num),
578 Op::And | Op::Xor | Op::Or => {
580 debug_panic!("unsupported operation");
581 s
582 }
583 }
584 }
585}
586
587#[derive(Debug, Clone, Serialize, Deserialize)]
588enum TestValue<T> {
589 Value(T),
590 Any,
591}
592
593impl<T> TestValue<T> {
594 #[inline(always)]
595 fn as_ref(&self) -> TestValue<&T> {
596 match self {
597 Self::Value(v) => TestValue::Value(v),
598 Self::Any => TestValue::Any,
599 }
600 }
601}
602
603flags! {
604 enum ReMod: u8{
605 CaseInsensitive,
606 StartOffsetUpdate,
607 LineLimit,
608 ForceBin,
609 ForceText,
610 TrimMatch,
611 }
612}
613
614fn serialize_regex<S>(re: &bytes::Regex, serializer: S) -> Result<S::Ok, S::Error>
615where
616 S: serde::Serializer,
617{
618 re.as_str().serialize(serializer)
619}
620
621fn deserialize_regex<'de, D>(deserializer: D) -> Result<bytes::Regex, D::Error>
622where
623 D: serde::Deserializer<'de>,
624{
625 let wrapper = String::deserialize(deserializer)?;
626 bytes::Regex::new(&wrapper).map_err(serde::de::Error::custom)
627}
628
629#[derive(Debug, Clone, Serialize, Deserialize)]
630struct RegexTest {
631 #[serde(
632 serialize_with = "serialize_regex",
633 deserialize_with = "deserialize_regex"
634 )]
635 re: bytes::Regex,
636 length: Option<usize>,
637 mods: FlagSet<ReMod>,
638 str_mods: FlagSet<StringMod>,
639 non_magic_len: usize,
640 binary: bool,
641 cmp_op: CmpOp,
642}
643
644impl RegexTest {
645 #[inline(always)]
646 fn is_binary(&self) -> bool {
647 self.binary
648 || self.mods.contains(ReMod::ForceBin)
649 || self.str_mods.contains(StringMod::ForceBin)
650 }
651
652 fn match_buf<'buf>(
653 &self,
654 off_buf: u64, stream_kind: StreamKind,
656 buf: &'buf [u8],
657 ) -> Option<MatchRes<'buf>> {
658 let mr = match stream_kind {
659 StreamKind::Text(_) => {
660 let mut off_txt = off_buf;
661
662 let mut line_limit = self.length.unwrap_or(usize::MAX);
663
664 for line in buf.split(|c| c == &b'\n') {
665 if line_limit == 0 {
669 break;
670 }
671
672 if let Some(re_match) = self.re.find(line) {
673 let start_offset = off_txt + re_match.start() as u64;
675
676 let stop_offset = if re_match.end() == line.len() {
678 Some(start_offset + re_match.as_bytes().len() as u64 + 1)
679 } else {
680 None
681 };
682
683 return Some(MatchRes::Bytes(
684 start_offset,
685 stop_offset,
686 re_match.as_bytes(),
687 Encoding::Utf8,
688 ));
689 }
690
691 off_txt += line.len() as u64;
692 off_txt += 1;
694 line_limit = line_limit.saturating_sub(1)
695 }
696 None
697 }
698
699 StreamKind::Binary => {
700 self.re.find(buf).map(|re_match| {
701 MatchRes::Bytes(
702 off_buf + re_match.start() as u64,
704 None,
705 re_match.as_bytes(),
706 Encoding::Utf8,
707 )
708 })
709 }
710 };
711
712 if self.cmp_op.is_neq() && mr.is_none() {
714 return Some(MatchRes::Bytes(off_buf, None, buf, Encoding::Utf8));
715 }
716
717 mr
718 }
719}
720
721impl From<RegexTest> for Test {
722 fn from(value: RegexTest) -> Self {
723 Self::Regex(value)
724 }
725}
726
727flags! {
728 enum StringMod: u8{
729 ForceBin,
730 UpperInsensitive,
731 LowerInsensitive,
732 FullWordMatch,
733 Trim,
734 ForceText,
735 CompactWhitespace,
736 OptBlank,
737 }
738}
739
740#[derive(Debug, Clone, Serialize, Deserialize)]
741struct StringTest {
742 test_val: TestValue<Vec<u8>>,
743 cmp_op: CmpOp,
744 length: Option<usize>,
745 mods: FlagSet<StringMod>,
746 binary: bool,
747}
748
749impl From<StringTest> for Test {
750 fn from(value: StringTest) -> Self {
751 Self::String(value)
752 }
753}
754
755#[inline(always)]
756fn string_match(str: &[u8], mods: FlagSet<StringMod>, buf: &[u8]) -> (bool, usize) {
757 let mut consumed = 0;
758 if mods.is_disjoint(
760 StringMod::UpperInsensitive
761 | StringMod::LowerInsensitive
762 | StringMod::FullWordMatch
763 | StringMod::CompactWhitespace
764 | StringMod::OptBlank,
765 ) {
766 if buf.starts_with(str) {
768 (true, str.len())
769 } else {
770 (false, consumed)
771 }
772 } else {
773 let mut i_src = 0;
774 let mut iter = buf.iter().peekable();
775
776 macro_rules! consume_target {
777 () => {{
778 iter.next();
779 consumed += 1;
780 }};
781 }
782
783 macro_rules! continue_next_iteration {
784 () => {{
785 consume_target!();
786 i_src += 1;
787 continue;
788 }};
789 }
790
791 while let Some(&&b) = iter.peek() {
792 let Some(&ref_byte) = str.get(i_src) else {
793 break;
794 };
795
796 if mods.contains(StringMod::OptBlank) && (b == b' ' || ref_byte == b' ') {
797 if b == b' ' {
798 consume_target!();
800 }
801
802 if ref_byte == b' ' {
803 i_src += 1;
805 }
806
807 continue;
808 }
809
810 if mods.contains(StringMod::UpperInsensitive) {
811 if ref_byte.is_ascii_uppercase() && ref_byte == b.to_ascii_uppercase()
813 || ref_byte == b
814 {
815 continue_next_iteration!()
816 }
817 }
818
819 if mods.contains(StringMod::LowerInsensitive)
820 && (ref_byte.is_ascii_lowercase() && ref_byte == b.to_ascii_lowercase()
821 || ref_byte == b)
822 {
823 continue_next_iteration!()
824 }
825
826 if mods.contains(StringMod::CompactWhitespace) && ref_byte == b' ' {
827 let mut src_blk = 0;
828 while let Some(b' ') = str.get(i_src) {
829 src_blk += 1;
830 i_src += 1;
831 }
832
833 let mut tgt_blk = 0;
834 while let Some(b' ') = iter.peek() {
835 tgt_blk += 1;
836 consume_target!();
837 }
838
839 if src_blk > tgt_blk {
840 return (false, consumed);
841 }
842
843 continue;
844 }
845
846 if ref_byte == b {
847 continue_next_iteration!()
848 } else {
849 return (false, consumed);
850 }
851 }
852
853 if mods.contains(StringMod::FullWordMatch)
854 && let Some(b) = iter.peek()
855 && !b.is_ascii_whitespace()
856 {
857 return (false, consumed);
858 }
859
860 (consumed > 0 && consumed <= buf.len(), consumed)
861 }
862}
863
864impl StringTest {
865 fn has_length_mod(&self) -> bool {
866 !self.mods.is_disjoint(
867 StringMod::UpperInsensitive
868 | StringMod::LowerInsensitive
869 | StringMod::FullWordMatch
870 | StringMod::CompactWhitespace
871 | StringMod::OptBlank,
872 )
873 }
874
875 #[inline(always)]
876 fn test_value_len(&self) -> usize {
877 match self.test_val.as_ref() {
878 TestValue::Value(s) => s.len(),
879 TestValue::Any => 0,
880 }
881 }
882
883 #[inline(always)]
884 fn is_binary(&self) -> bool {
885 self.binary || self.mods.contains(StringMod::ForceBin)
886 }
887
888 #[inline(always)]
889 fn is_text(&self) -> bool {
890 self.mods.contains(StringMod::ForceText)
891 }
892}
893
894#[derive(Debug, Clone, Serialize, Deserialize)]
895struct SearchTest {
896 str: Vec<u8>,
897 n_pos: Option<usize>,
898 str_mods: FlagSet<StringMod>,
899 re_mods: FlagSet<ReMod>,
900 binary: bool,
901 cmp_op: CmpOp,
902}
903
904impl From<SearchTest> for Test {
905 fn from(value: SearchTest) -> Self {
906 Self::Search(value)
907 }
908}
909
910impl SearchTest {
911 #[inline(always)]
912 fn is_binary(&self) -> bool {
913 (self.binary
914 || self.str_mods.contains(StringMod::ForceBin)
915 || self.re_mods.contains(ReMod::ForceBin))
916 && !(self.str_mods.contains(StringMod::ForceText)
917 || self.re_mods.contains(ReMod::ForceText))
918 }
919
920 #[inline]
922 fn match_buf<'buf>(&self, off_buf: u64, buf: &'buf [u8]) -> Option<MatchRes<'buf>> {
923 let mut i = 0;
924
925 let needle = self.str.first()?;
926
927 while i < buf.len() {
928 i += memchr(*needle, &buf[i..])?;
931
932 if self.str_mods.contains(StringMod::FullWordMatch) {
934 let prev_is_whitespace = buf
935 .get(i.saturating_sub(1))
936 .map(|c| c.is_ascii_whitespace())
937 .unwrap_or_default();
938
939 if i > 0 && !prev_is_whitespace {
944 i += 1;
945 continue;
946 }
947 }
948
949 if let Some(npos) = self.n_pos
950 && i > npos
951 {
952 break;
953 }
954
955 let pos = i;
956 let (ok, consumed) = string_match(&self.str, self.str_mods, &buf[i..]);
957
958 if ok {
959 return Some(MatchRes::Bytes(
960 off_buf.saturating_add(pos as u64),
961 None,
962 &buf[i..i + consumed],
963 Encoding::Utf8,
964 ));
965 } else {
966 i += max(consumed, 1)
967 }
968 }
969
970 if self.cmp_op.is_neq() {
972 return Some(MatchRes::Bytes(off_buf, None, buf, Encoding::Utf8));
973 }
974
975 None
976 }
977}
978
979#[derive(Debug, Clone, Serialize, Deserialize)]
980struct ScalarTest {
981 ty: ScalarDataType,
982 transform: Option<ScalarTransform>,
983 cmp_op: CmpOp,
984 test_val: TestValue<Scalar>,
985}
986
987#[derive(Debug, Clone, Serialize, Deserialize)]
988struct FloatTest {
989 ty: FloatDataType,
990 transform: Option<FloatTransform>,
991 cmp_op: CmpOp,
992 test_val: TestValue<Float>,
993}
994
995#[derive(Debug, PartialEq)]
998enum ReadValue<'buf> {
999 Float(u64, Float),
1000 Scalar(u64, Scalar),
1001 Bytes(u64, &'buf [u8]),
1002}
1003
1004impl DynDisplay for ReadValue<'_> {
1005 fn dyn_fmt(&self, f: &dyf::FormatSpec) -> Result<String, dyf::Error> {
1006 match self {
1007 Self::Float(_, s) => DynDisplay::dyn_fmt(s, f),
1008 Self::Scalar(_, s) => DynDisplay::dyn_fmt(s, f),
1009 Self::Bytes(_, b) => Ok(format!("{b:?}")),
1010 }
1011 }
1012}
1013
1014impl DynDisplay for &ReadValue<'_> {
1015 fn dyn_fmt(&self, f: &dyf::FormatSpec) -> Result<String, dyf::Error> {
1016 DynDisplay::dyn_fmt(*self, f)
1018 }
1019}
1020
1021impl Display for ReadValue<'_> {
1022 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1023 match self {
1024 Self::Float(_, v) => write!(f, "{v}"),
1025 Self::Scalar(_, s) => write!(f, "{s}"),
1026 Self::Bytes(_, b) => write!(f, "{b:?}"),
1027 }
1028 }
1029}
1030
1031enum Encoding {
1032 Utf16(String16Encoding),
1033 Utf8,
1034}
1035
1036enum MatchRes<'buf> {
1039 Bytes(u64, Option<u64>, &'buf [u8], Encoding),
1044 Scalar(u64, Scalar),
1045 Float(u64, Float),
1046}
1047
1048impl DynDisplay for &MatchRes<'_> {
1049 fn dyn_fmt(&self, f: &dyf::FormatSpec) -> Result<String, dyf::Error> {
1050 (*self).dyn_fmt(f)
1051 }
1052}
1053
1054impl DynDisplay for MatchRes<'_> {
1055 fn dyn_fmt(&self, f: &dyf::FormatSpec) -> Result<String, dyf::Error> {
1056 match self {
1057 Self::Scalar(_, v) => v.dyn_fmt(f),
1058 Self::Float(_, v) => v.dyn_fmt(f),
1059 Self::Bytes(_, _, v, enc) => match enc {
1060 Encoding::Utf8 => String::from_utf8_lossy(v).to_string().dyn_fmt(f),
1061 Encoding::Utf16(enc) => {
1062 let utf16: Vec<u16> = slice_to_utf16_iter(v, *enc).collect();
1063 String::from_utf16_lossy(&utf16).dyn_fmt(f)
1064 }
1065 },
1066 }
1067 }
1068}
1069
1070impl MatchRes<'_> {
1071 #[inline]
1073 fn start_offset(&self) -> u64 {
1074 match self {
1075 MatchRes::Bytes(o, _, _, _) => *o,
1076 MatchRes::Scalar(o, _) => *o,
1077 MatchRes::Float(o, _) => *o,
1078 }
1079 }
1080
1081 #[inline]
1083 fn end_offset(&self) -> u64 {
1084 match self {
1085 MatchRes::Bytes(start, end, buf, _) => match end {
1086 Some(end) => *end,
1087 None => start.saturating_add(buf.len() as u64),
1088 },
1089 MatchRes::Scalar(o, sc) => o.add(sc.size_of() as u64),
1090 MatchRes::Float(o, f) => o.add(f.size_of() as u64),
1091 }
1092 }
1093}
1094
1095fn slice_to_utf16_iter(read: &[u8], encoding: String16Encoding) -> impl Iterator<Item = u16> {
1096 let even = read
1097 .iter()
1098 .enumerate()
1099 .filter(|(i, _)| i % 2 == 0)
1100 .map(|t| t.1);
1101
1102 let odd = read
1103 .iter()
1104 .enumerate()
1105 .filter(|(i, _)| i % 2 != 0)
1106 .map(|t| t.1);
1107
1108 even.zip(odd).map(move |(e, o)| match encoding {
1109 String16Encoding::Le => u16::from_le_bytes([*e, *o]),
1110 String16Encoding::Be => u16::from_be_bytes([*e, *o]),
1111 })
1112}
1113
1114#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
1115enum String16Encoding {
1116 Le,
1117 Be,
1118}
1119
1120#[derive(Debug, Clone, Serialize, Deserialize)]
1121struct String16Test {
1122 orig: String,
1123 test_val: TestValue<Vec<u16>>,
1124 encoding: String16Encoding,
1125}
1126
1127impl String16Test {
1128 #[inline(always)]
1132 fn test_value_len(&self) -> usize {
1133 match self.test_val.as_ref() {
1134 TestValue::Value(str16) => str16.len(),
1135 TestValue::Any => 0,
1136 }
1137 }
1138}
1139
1140flags! {
1141 enum IndirectMod: u8{
1142 Relative,
1143 }
1144}
1145
1146type IndirectMods = FlagSet<IndirectMod>;
1147
1148#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
1149enum PStringLen {
1150 Byte, ShortBe, ShortLe, LongBe, LongLe, }
1156
1157impl PStringLen {
1158 #[inline(always)]
1159 const fn size_of_len(&self) -> usize {
1160 match self {
1161 PStringLen::Byte => 1,
1162 PStringLen::ShortBe => 2,
1163 PStringLen::ShortLe => 2,
1164 PStringLen::LongBe => 4,
1165 PStringLen::LongLe => 4,
1166 }
1167 }
1168}
1169
1170#[derive(Debug, Clone, Serialize, Deserialize)]
1171struct PStringTest {
1172 len: PStringLen,
1173 test_val: TestValue<Vec<u8>>,
1174 include_len: bool,
1175}
1176
1177impl PStringTest {
1178 #[inline]
1179 fn read<'cache, R: Read + Seek>(
1180 &self,
1181 haystack: &'cache mut LazyCache<R>,
1182 ) -> Result<Option<&'cache [u8]>, Error> {
1183 let mut len = match self.len {
1184 PStringLen::Byte => read_le!(haystack, u8) as u32,
1185 PStringLen::ShortBe => read_be!(haystack, u16) as u32,
1186 PStringLen::ShortLe => read_le!(haystack, u16) as u32,
1187 PStringLen::LongBe => read_be!(haystack, u32),
1188 PStringLen::LongLe => read_le!(haystack, u32),
1189 } as usize;
1190
1191 if self.include_len {
1192 len = len.saturating_sub(self.len.size_of_len())
1193 }
1194
1195 if let TestValue::Value(s) = self.test_val.as_ref()
1196 && len != s.len()
1197 {
1198 return Ok(None);
1199 }
1200
1201 let read = haystack.read_exact_count(len as u64)?;
1202
1203 Ok(Some(read))
1204 }
1205
1206 #[inline(always)]
1207 fn test_value_len(&self) -> usize {
1208 match self.test_val.as_ref() {
1209 TestValue::Value(s) => s.len(),
1210 TestValue::Any => 0,
1211 }
1212 }
1213}
1214
1215#[derive(Debug, Clone, Serialize, Deserialize)]
1216enum Test {
1217 Name(String),
1218 Use(bool, String),
1219 Scalar(ScalarTest),
1220 Float(FloatTest),
1221 String(StringTest),
1222 Search(SearchTest),
1223 PString(PStringTest),
1224 Regex(RegexTest),
1225 Indirect(FlagSet<IndirectMod>),
1226 String16(String16Test),
1227 #[allow(dead_code)]
1229 Der,
1230 Clear,
1231 Default,
1232}
1233
1234impl Test {
1235 #[inline]
1237 fn read_test_value<'haystack, R: Read + Seek>(
1238 &self,
1239 haystack: &'haystack mut LazyCache<R>,
1240 switch_endianness: bool,
1241 ) -> Result<Option<ReadValue<'haystack>>, Error> {
1242 let test_value_offset = haystack.lazy_stream_position();
1243
1244 match self {
1245 Self::Scalar(t) => {
1246 t.ty.read(haystack, switch_endianness)
1247 .map(|s| Some(ReadValue::Scalar(test_value_offset, s)))
1248 }
1249
1250 Self::Float(t) => {
1251 t.ty.read(haystack, switch_endianness)
1252 .map(|f| Some(ReadValue::Float(test_value_offset, f)))
1253 }
1254 Self::String(t) => {
1255 match t.test_val.as_ref() {
1256 TestValue::Value(str) => {
1257 let buf = if let Some(length) = t.length {
1258 haystack.read_exact_count(length as u64)?
1260 } else {
1261 match t.cmp_op {
1264 CmpOp::Eq | CmpOp::Neq => {
1265 if !t.has_length_mod() {
1266 haystack.read_exact_count(str.len() as u64)?
1267 } else {
1268 haystack.read_count(FILE_BYTES_MAX as u64)?
1269 }
1270 }
1271 CmpOp::Lt | CmpOp::Gt => {
1272 let read =
1273 haystack.read_until_any_delim_or_limit(b"\n\0", 8092)?;
1274
1275 if read.ends_with(b"\0") || read.ends_with(b"\n") {
1276 &read[..read.len() - 1]
1277 } else {
1278 read
1279 }
1280 }
1281 _ => {
1282 return Err(Error::Msg(format!(
1283 "string test does not support {:?} operator",
1284 t.cmp_op
1285 )));
1286 }
1287 }
1288 };
1289
1290 Ok(Some(ReadValue::Bytes(test_value_offset, buf)))
1291 }
1292 TestValue::Any => {
1293 let read = haystack.read_until_any_delim_or_limit(b"\0\n", 8192)?;
1294 let bytes = if read.ends_with(b"\0") || read.ends_with(b"\n") {
1296 &read[..read.len() - 1]
1297 } else {
1298 read
1299 };
1300
1301 Ok(Some(ReadValue::Bytes(test_value_offset, bytes)))
1302 }
1303 }
1304 }
1305
1306 Self::String16(t) => {
1307 match t.test_val.as_ref() {
1308 TestValue::Value(str16) => {
1309 let read = haystack.read_exact_count((str16.len() * 2) as u64)?;
1310
1311 Ok(Some(ReadValue::Bytes(test_value_offset, read)))
1312 }
1313 TestValue::Any => {
1314 let read = haystack.read_until_utf16_or_limit(b"\x00\x00", 8192)?;
1315
1316 let end = if read.len() % 2 == 0 {
1318 read.len()
1319 } else {
1320 read.len().saturating_sub(1)
1323 };
1324
1325 Ok(Some(ReadValue::Bytes(test_value_offset, &read[..end])))
1326 }
1327 }
1328 }
1329
1330 Self::PString(t) => {
1331 let Some(read) = t.read(haystack)? else {
1332 return Ok(None);
1333 };
1334 Ok(Some(ReadValue::Bytes(test_value_offset, read)))
1335 }
1336
1337 Self::Search(_) => {
1338 let buf = haystack.read_count(FILE_BYTES_MAX as u64)?;
1339 Ok(Some(ReadValue::Bytes(test_value_offset, buf)))
1340 }
1341
1342 Self::Regex(r) => {
1343 let length = {
1344 match r.length {
1345 Some(len) => {
1346 if r.mods.contains(ReMod::LineLimit) {
1347 len * 80
1348 } else {
1349 len
1350 }
1351 }
1352
1353 None => FILE_REGEX_MAX,
1354 }
1355 };
1356
1357 let read = haystack.read_count(length as u64)?;
1358 Ok(Some(ReadValue::Bytes(test_value_offset, read)))
1359 }
1360
1361 Self::Name(_)
1362 | Self::Use(_, _)
1363 | Self::Indirect(_)
1364 | Self::Clear
1365 | Self::Default
1366 | Self::Der => Err(Error::msg("no value to read for this test")),
1367 }
1368 }
1369
1370 #[inline(always)]
1371 fn match_value<'s>(
1372 &'s self,
1373 tv: &ReadValue<'s>,
1374 stream_kind: StreamKind,
1375 ) -> Option<MatchRes<'s>> {
1376 match (self, tv) {
1377 (Self::Scalar(t), ReadValue::Scalar(o, ts)) => {
1378 let read_value: Scalar = match t.transform.as_ref() {
1379 Some(t) => t.apply(*ts)?,
1380 None => *ts,
1381 };
1382
1383 match t.test_val {
1384 TestValue::Value(test_value) => {
1385 let ok = match t.cmp_op {
1386 CmpOp::Not => read_value == !test_value,
1389 CmpOp::Eq => read_value == test_value,
1390 CmpOp::Lt => read_value < test_value,
1391 CmpOp::Gt => read_value > test_value,
1392 CmpOp::Neq => read_value != test_value,
1393 CmpOp::BitAnd => read_value & test_value == test_value,
1394 CmpOp::Xor => (read_value & test_value).is_zero(),
1395 };
1396
1397 if ok {
1398 Some(MatchRes::Scalar(*o, read_value))
1399 } else {
1400 None
1401 }
1402 }
1403
1404 TestValue::Any => Some(MatchRes::Scalar(*o, read_value)),
1405 }
1406 }
1407
1408 (Self::Float(t), ReadValue::Float(o, f)) => {
1409 let read_value: Float = t.transform.as_ref().map(|t| t.apply(*f)).unwrap_or(*f);
1410
1411 match t.test_val {
1412 TestValue::Value(tf) => {
1413 let ok = match t.cmp_op {
1414 CmpOp::Eq => read_value == tf,
1415 CmpOp::Lt => read_value < tf,
1416 CmpOp::Gt => read_value > tf,
1417 CmpOp::Neq => read_value != tf,
1418 _ => {
1419 debug_panic!("unsupported float comparison");
1422 debug!("unsupported float comparison");
1423 false
1424 }
1425 };
1426
1427 if ok {
1428 Some(MatchRes::Float(*o, read_value))
1429 } else {
1430 None
1431 }
1432 }
1433 TestValue::Any => Some(MatchRes::Float(*o, read_value)),
1434 }
1435 }
1436
1437 (Self::String(st), ReadValue::Bytes(o, buf)) => {
1438 macro_rules! trim_buf {
1439 ($buf: expr) => {{
1440 if st.mods.contains(StringMod::Trim) {
1441 $buf.trim_ascii()
1442 } else {
1443 $buf
1444 }
1445 }};
1446 }
1447
1448 match st.test_val.as_ref() {
1449 TestValue::Value(str) => {
1450 match st.cmp_op {
1451 CmpOp::Eq => {
1452 if let (true, _) = string_match(str, st.mods, buf) {
1453 Some(MatchRes::Bytes(*o, None, trim_buf!(str), Encoding::Utf8))
1454 } else {
1455 None
1456 }
1457 }
1458 CmpOp::Neq => {
1459 if let (false, _) = string_match(str, st.mods, buf) {
1460 Some(MatchRes::Bytes(*o, None, trim_buf!(str), Encoding::Utf8))
1461 } else {
1462 None
1463 }
1464 }
1465 CmpOp::Gt => {
1466 if buf.len() > str.len() {
1467 Some(MatchRes::Bytes(*o, None, trim_buf!(buf), Encoding::Utf8))
1468 } else {
1469 None
1470 }
1471 }
1472 CmpOp::Lt => {
1473 if buf.len() < str.len() {
1474 Some(MatchRes::Bytes(*o, None, trim_buf!(buf), Encoding::Utf8))
1475 } else {
1476 None
1477 }
1478 }
1479
1480 _ => {
1482 debug_panic!("unsupported string comparison");
1485 debug!("unsupported string comparison");
1486 None
1487 }
1488 }
1489 }
1490 TestValue::Any => {
1491 Some(MatchRes::Bytes(*o, None, trim_buf!(buf), Encoding::Utf8))
1492 }
1493 }
1494 }
1495
1496 (Self::PString(m), ReadValue::Bytes(o, buf)) => match m.test_val.as_ref() {
1497 TestValue::Value(psv) => {
1498 if buf == psv {
1499 Some(MatchRes::Bytes(*o, None, buf, Encoding::Utf8))
1500 } else {
1501 None
1502 }
1503 }
1504 TestValue::Any => Some(MatchRes::Bytes(*o, None, buf, Encoding::Utf8)),
1505 },
1506
1507 (Self::String16(t), ReadValue::Bytes(o, buf)) => {
1508 match t.test_val.as_ref() {
1509 TestValue::Value(str16) => {
1510 if str16.len() * 2 != buf.len() {
1512 return None;
1513 }
1514
1515 for (i, utf16_char) in slice_to_utf16_iter(buf, t.encoding).enumerate() {
1517 if str16[i] != utf16_char {
1518 return None;
1519 }
1520 }
1521
1522 Some(MatchRes::Bytes(
1523 *o,
1524 None,
1525 t.orig.as_bytes(),
1526 Encoding::Utf16(t.encoding),
1527 ))
1528 }
1529
1530 TestValue::Any => {
1531 Some(MatchRes::Bytes(*o, None, buf, Encoding::Utf16(t.encoding)))
1532 }
1533 }
1534 }
1535
1536 (Self::Regex(r), ReadValue::Bytes(o, buf)) => r.match_buf(*o, stream_kind, buf),
1537
1538 (Self::Search(t), ReadValue::Bytes(o, buf)) => t.match_buf(*o, buf),
1539
1540 _ => None,
1541 }
1542 }
1543
1544 #[inline(always)]
1545 fn strength(&self) -> u64 {
1546 const MULT: usize = 10;
1547
1548 let mut out = 2 * MULT;
1549
1550 match self {
1552 Test::Scalar(s) => {
1553 out += s.ty.type_size() * MULT;
1554 }
1555
1556 Test::Float(t) => {
1557 out += t.ty.type_size() * MULT;
1558 }
1559
1560 Test::String(t) => out += t.test_value_len().saturating_mul(MULT),
1561
1562 Test::PString(t) => out += t.test_value_len().saturating_mul(MULT),
1563
1564 Test::Search(s) => {
1565 let n_pos = s.n_pos.unwrap_or(FILE_BYTES_MAX);
1570
1571 match n_pos {
1572 0..=80 => out += s.str.len().saturating_mul(MULT),
1574 81..=240 => out += s.str.len() * s.str.len().clamp(0, MULT - 2),
1576 _ => out += s.str.len(),
1578 }
1579 }
1580
1581 Test::Regex(r) => {
1582 let v = r.non_magic_len / r.re.captures_len();
1591
1592 let len = r
1593 .length
1594 .map(|l| {
1595 if r.mods.contains(ReMod::LineLimit) {
1596 l * 80
1597 } else {
1598 l
1599 }
1600 })
1601 .unwrap_or(FILE_BYTES_MAX);
1602
1603 match len {
1604 0..=80 => out += v.saturating_mul(MULT),
1606 81..=240 => out += v * v.clamp(0, MULT - 2),
1608 _ => out += v,
1610 }
1611 }
1612
1613 Test::String16(t) => {
1614 out += t.test_value_len().saturating_mul(MULT);
1619 }
1620
1621 Test::Der => out += MULT,
1622
1623 Test::Default | Test::Name(_) | Test::Use(_, _) | Test::Indirect(_) | Test::Clear => {
1624 return 0;
1625 }
1626 }
1627
1628 if self.is_match_any() {
1630 return 0;
1631 }
1632
1633 if let Some(op) = self.cmp_op() {
1634 match op {
1635 CmpOp::Neq => out = 0,
1637 CmpOp::Eq | CmpOp::Not => out += MULT,
1638 CmpOp::Lt | CmpOp::Gt => out -= 2 * MULT,
1639 CmpOp::Xor | CmpOp::BitAnd => out -= MULT,
1640 }
1641 }
1642
1643 out as u64
1644 }
1645
1646 #[inline(always)]
1647 fn cmp_op(&self) -> Option<CmpOp> {
1648 match self {
1649 Self::String(t) => Some(t.cmp_op),
1650 Self::Scalar(s) => Some(s.cmp_op),
1651 Self::Float(t) => Some(t.cmp_op),
1652 Self::Name(_)
1653 | Self::Use(_, _)
1654 | Self::Search(_)
1655 | Self::PString(_)
1656 | Self::Regex(_)
1657 | Self::Clear
1658 | Self::Default
1659 | Self::Indirect(_)
1660 | Self::String16(_)
1661 | Self::Der => None,
1662 }
1663 }
1664
1665 #[inline(always)]
1666 fn is_match_any(&self) -> bool {
1667 match self {
1668 Test::Name(_) => false,
1669 Test::Use(_, _) => false,
1670 Test::Scalar(scalar_test) => matches!(scalar_test.test_val, TestValue::Any),
1671 Test::Float(float_test) => matches!(float_test.test_val, TestValue::Any),
1672 Test::String(string_test) => matches!(string_test.test_val, TestValue::Any),
1673 Test::Search(_) => false,
1674 Test::PString(pstring_test) => matches!(pstring_test.test_val, TestValue::Any),
1675 Test::Regex(_) => false,
1676 Test::Indirect(_) => false,
1677 Test::String16(string16_test) => matches!(string16_test.test_val, TestValue::Any),
1678 Test::Der => false,
1679 Test::Clear => false,
1680 Test::Default => false,
1681 }
1682 }
1683
1684 #[inline(always)]
1685 fn is_binary(&self) -> bool {
1686 match self {
1687 Self::Name(_) => true,
1688 Self::Use(_, _) => true,
1689 Self::Scalar(_) => true,
1690 Self::Float(_) => true,
1691 Self::String(t) => !t.is_binary() & !t.is_text() || t.is_binary(),
1692 Self::Search(t) => t.is_binary(),
1693 Self::PString(_) => true,
1694 Self::Regex(t) => t.is_binary(),
1695 Self::Clear => true,
1696 Self::Default => true,
1697 Self::Indirect(_) => true,
1698 Self::String16(_) => true,
1699 Self::Der => true,
1700 }
1701 }
1702
1703 #[inline(always)]
1704 fn is_text(&self) -> bool {
1705 match self {
1706 Self::Name(_) => true,
1707 Self::Use(_, _) => true,
1708 Self::Indirect(_) => true,
1709 Self::Clear => true,
1710 Self::Default => true,
1711 Self::String(t) => !t.is_binary() & !t.is_text() || t.is_text(),
1712 _ => !self.is_binary(),
1713 }
1714 }
1715
1716 #[inline(always)]
1717 fn is_only_text(&self) -> bool {
1718 self.is_text() && !self.is_binary()
1719 }
1720
1721 #[inline(always)]
1722 fn is_only_binary(&self) -> bool {
1723 self.is_binary() && !self.is_text()
1724 }
1725}
1726
1727#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
1728enum OffsetType {
1729 Byte,
1730 DoubleLe,
1731 DoubleBe,
1732 ShortLe,
1733 ShortBe,
1734 Id3Le,
1735 Id3Be,
1736 LongLe,
1737 LongBe,
1738 Middle,
1739 Octal,
1740 QuadBe,
1741 QuadLe,
1742}
1743
1744#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
1745enum Shift {
1746 Direct(u64),
1747 Indirect(i64),
1748}
1749
1750#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
1751struct IndOffset {
1752 off_addr: DirOffset,
1754 signed: bool,
1756 ty: OffsetType,
1758 op: Option<Op>,
1759 shift: Option<Shift>,
1760}
1761
1762impl IndOffset {
1763 fn read_offset<R: Read + Seek>(
1765 &self,
1766 haystack: &mut LazyCache<R>,
1767 rule_base_offset: Option<u64>,
1768 last_upper_match_offset: Option<u64>,
1769 ) -> Result<Option<u64>, io::Error> {
1770 let offset_address = match self.off_addr {
1771 DirOffset::Start(s) => {
1772 let Some(o) = s.checked_add(rule_base_offset.unwrap_or_default()) else {
1773 return Ok(None);
1774 };
1775
1776 haystack.seek(SeekFrom::Start(o))?
1777 }
1778 DirOffset::LastUpper(c) => haystack.seek(SeekFrom::Start(
1779 (last_upper_match_offset.unwrap_or_default() as i64 + c) as u64,
1780 ))?,
1781 DirOffset::End(e) => haystack.seek(SeekFrom::End(e))?,
1782 };
1783
1784 macro_rules! read_value {
1785 () => {
1786 match self.ty {
1787 OffsetType::Byte => {
1788 if self.signed {
1789 read_le!(haystack, u8) as u64
1790 } else {
1791 read_le!(haystack, i8) as u64
1792 }
1793 }
1794 OffsetType::DoubleLe => read_le!(haystack, f64) as u64,
1795 OffsetType::DoubleBe => read_be!(haystack, f64) as u64,
1796 OffsetType::ShortLe => {
1797 if self.signed {
1798 read_le!(haystack, i16) as u64
1799 } else {
1800 read_le!(haystack, u16) as u64
1801 }
1802 }
1803 OffsetType::ShortBe => {
1804 if self.signed {
1805 read_be!(haystack, i16) as u64
1806 } else {
1807 read_be!(haystack, u16) as u64
1808 }
1809 }
1810 OffsetType::Id3Le => decode_id3(read_le!(haystack, u32)) as u64,
1811 OffsetType::Id3Be => decode_id3(read_be!(haystack, u32)) as u64,
1812 OffsetType::LongLe => {
1813 if self.signed {
1814 read_le!(haystack, i32) as u64
1815 } else {
1816 read_le!(haystack, u32) as u64
1817 }
1818 }
1819 OffsetType::LongBe => {
1820 if self.signed {
1821 read_be!(haystack, i32) as u64
1822 } else {
1823 read_be!(haystack, u32) as u64
1824 }
1825 }
1826 OffsetType::Middle => read_me!(haystack) as u64,
1827 OffsetType::Octal => {
1828 if let Some(o) = read_octal_u64(haystack) {
1829 o
1830 } else {
1831 debug!("failed to read octal offset @ {offset_address}");
1832 return Ok(None);
1833 }
1834 }
1835 OffsetType::QuadLe => {
1836 if self.signed {
1837 read_le!(haystack, i64) as u64
1838 } else {
1839 read_le!(haystack, u64)
1840 }
1841 }
1842 OffsetType::QuadBe => {
1843 if self.signed {
1844 read_be!(haystack, i64) as u64
1845 } else {
1846 read_be!(haystack, u64)
1847 }
1848 }
1849 }
1850 };
1851 }
1852
1853 let o = read_value!();
1855
1856 trace!(
1857 "offset read @ {offset_address} value={o} op={:?} shift={:?}",
1858 self.op, self.shift
1859 );
1860
1861 if let (Some(op), Some(shift)) = (self.op, self.shift) {
1863 let shift = match shift {
1864 Shift::Direct(i) => i,
1865 Shift::Indirect(i) => {
1866 let tmp = offset_address as i128 + i as i128;
1867 if tmp.is_negative() {
1868 return Ok(None);
1869 } else {
1870 haystack.seek(SeekFrom::Start(tmp as u64))?;
1871 };
1872 read_value!()
1875 }
1876 };
1877
1878 match op {
1879 Op::Add => return Ok(o.checked_add(shift)),
1880 Op::Mul => return Ok(o.checked_mul(shift)),
1881 Op::Sub => return Ok(o.checked_sub(shift)),
1882 Op::Div => return Ok(o.checked_div(shift)),
1883 Op::Mod => return Ok(o.checked_rem(shift)),
1884 Op::And => return Ok(Some(o & shift)),
1885 Op::Or => return Ok(Some(o | shift)),
1886 Op::Xor => return Ok(Some(o ^ shift)),
1887 }
1888 }
1889
1890 Ok(Some(o))
1891 }
1892}
1893
1894#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
1895enum DirOffset {
1896 Start(u64),
1897 LastUpper(i64),
1899 End(i64),
1900}
1901
1902#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
1903enum Offset {
1904 Direct(DirOffset),
1905 Indirect(IndOffset),
1906}
1907
1908impl From<DirOffset> for Offset {
1909 fn from(value: DirOffset) -> Self {
1910 Self::Direct(value)
1911 }
1912}
1913
1914impl From<IndOffset> for Offset {
1915 fn from(value: IndOffset) -> Self {
1916 Self::Indirect(value)
1917 }
1918}
1919
1920impl Display for DirOffset {
1921 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1922 match self {
1923 DirOffset::Start(i) => write!(f, "{i}"),
1924 DirOffset::LastUpper(c) => write!(f, "&{c}"),
1925 DirOffset::End(e) => write!(f, "-{e}"),
1926 }
1927 }
1928}
1929
1930impl Default for DirOffset {
1931 fn default() -> Self {
1932 Self::LastUpper(0)
1933 }
1934}
1935
1936#[derive(Debug, Clone, Serialize, Deserialize)]
1937struct Match {
1938 line: usize,
1939 depth: u8,
1940 offset: Offset,
1941 test: Test,
1942 test_strength: u64,
1943 message: Option<Message>,
1944}
1945
1946impl From<Use> for Match {
1947 fn from(value: Use) -> Self {
1948 let test = Test::Use(value.switch_endianness, value.rule_name);
1949 let test_strength = test.strength();
1950 Self {
1951 line: value.line,
1952 depth: value.depth,
1953 offset: value.start_offset,
1954 test,
1955 test_strength,
1956 message: value.message,
1957 }
1958 }
1959}
1960
1961impl From<Name> for Match {
1962 fn from(value: Name) -> Self {
1963 let test = Test::Name(value.name);
1964 let test_strength = test.strength();
1965 Self {
1966 line: value.line,
1967 depth: 0,
1968 offset: Offset::Direct(DirOffset::Start(0)),
1969 test,
1970 test_strength,
1971 message: value.message,
1972 }
1973 }
1974}
1975
1976impl Match {
1977 #[inline(always)]
1979 fn offset_from_start<R: Read + Seek>(
1980 &self,
1981 haystack: &mut LazyCache<R>,
1982 rule_base_offset: Option<u64>,
1983 last_level_offset: Option<u64>,
1984 ) -> Result<Option<u64>, io::Error> {
1985 match self.offset {
1986 Offset::Direct(dir_offset) => match dir_offset {
1987 DirOffset::Start(s) => Ok(Some(s)),
1988 DirOffset::LastUpper(shift) => {
1989 let o = last_level_offset.unwrap_or_default() as i64 + shift;
1990
1991 if o.is_positive() {
1992 Ok(Some(o as u64))
1993 } else {
1994 Ok(None)
1995 }
1996 }
1997 DirOffset::End(e) => Ok(Some(haystack.offset_from_start(SeekFrom::End(e)))),
1998 },
1999 Offset::Indirect(ind_offset) => {
2000 let Some(o) =
2001 ind_offset.read_offset(haystack, rule_base_offset, last_level_offset)?
2002 else {
2003 return Ok(None);
2004 };
2005
2006 Ok(Some(o))
2007 }
2008 }
2009 }
2010
2011 #[inline]
2024 #[allow(clippy::too_many_arguments)]
2025 fn matches<'a: 'h, 'h, R: Read + Seek>(
2026 &'a self,
2027 source: Option<&str>,
2028 magic: &mut Magic<'a>,
2029 stream_kind: StreamKind,
2030 state: &mut MatchState,
2031 buf_base_offset: Option<u64>,
2032 rule_base_offset: Option<u64>,
2033 last_level_offset: Option<u64>,
2034 haystack: &'h mut LazyCache<R>,
2035 switch_endianness: bool,
2036 db: &'a MagicDb,
2037 depth: usize,
2038 ) -> Result<(bool, Option<MatchRes<'h>>), Error> {
2039 let source = source.unwrap_or("unknown");
2040 let line = self.line;
2041
2042 if depth >= MAX_RECURSION {
2043 return Err(Error::localized(
2044 source,
2045 line,
2046 Error::MaximumRecursion(MAX_RECURSION),
2047 ));
2048 }
2049
2050 if self.test.is_only_binary() && stream_kind.is_text() {
2051 trace!("skip binary test source={source} line={line} stream_kind={stream_kind:?}",);
2052 return Ok((false, None));
2053 }
2054
2055 if self.test.is_only_text() && !stream_kind.is_text() {
2056 trace!("skip text test source={source} line={line} stream_kind={stream_kind:?}",);
2057 return Ok((false, None));
2058 }
2059
2060 let Ok(Some(mut offset)) = self
2061 .offset_from_start(haystack, rule_base_offset, last_level_offset)
2062 .inspect_err(|e| debug!("source={source} line={line} failed at computing offset: {e}"))
2063 else {
2064 return Ok((false, None));
2065 };
2066
2067 offset = match self.offset {
2068 Offset::Indirect(_) => {
2069 buf_base_offset.unwrap_or_default().saturating_add(offset)
2074 }
2075 Offset::Direct(DirOffset::Start(_)) => {
2077 rule_base_offset.unwrap_or_default().saturating_add(offset)
2078 }
2079 _ => offset,
2080 };
2081
2082 match &self.test {
2083 Test::Clear => {
2084 trace!("source={source} line={line} clear");
2085 state.clear_continuation_level(&self.continuation_level());
2086 Ok((true, None))
2087 }
2088
2089 Test::Name(name) => {
2090 trace!(
2091 "source={source} line={line} running rule {name} switch_endianness={switch_endianness}",
2092 );
2093 Ok((true, None))
2094 }
2095
2096 Test::Use(flip_endianness, rule_name) => {
2097 trace!(
2098 "source={source} line={line} use {rule_name} switch_endianness={flip_endianness}",
2099 );
2100
2101 let switch_endianness = switch_endianness ^ flip_endianness;
2103
2104 let dr: &DependencyRule = db.dependencies.get(rule_name).ok_or(
2105 Error::localized(source, line, Error::MissingRule(rule_name.clone())),
2106 )?;
2107
2108 if let Some(msg) = self.message.as_ref() {
2110 magic.push_message(msg.to_string_lossy());
2111 }
2112
2113 dr.rule.magic(
2114 magic,
2115 stream_kind,
2116 buf_base_offset,
2117 Some(offset),
2118 haystack,
2119 db,
2120 switch_endianness,
2121 depth.saturating_add(1),
2122 )?;
2123
2124 Ok((false, None))
2126 }
2127
2128 Test::Indirect(m) => {
2129 trace!(
2130 "source={source} line={line} indirect mods={:?} offset={offset:#x}",
2131 m
2132 );
2133
2134 let new_buf_base_off = if m.contains(IndirectMod::Relative) {
2135 Some(offset)
2136 } else {
2137 None
2138 };
2139
2140 if let Some(msg) = self.message.as_ref() {
2142 magic.push_message(msg.to_string_lossy());
2143 }
2144
2145 for r in db.rules.iter() {
2146 let messages_cnt = magic.message.len();
2147
2148 r.magic(
2149 magic,
2150 stream_kind,
2151 new_buf_base_off,
2152 Some(offset),
2153 haystack,
2154 db,
2155 false,
2156 depth.saturating_add(1),
2157 )?;
2158
2159 if magic.message.len() != messages_cnt {
2161 break;
2162 }
2163 }
2164
2165 Ok((false, None))
2167 }
2168
2169 Test::Default => {
2170 let ok = !state.get_continuation_level(&self.continuation_level());
2172
2173 trace!("source={source} line={line} default match={ok}");
2174 if ok {
2175 state.set_continuation_level(self.continuation_level());
2176 }
2177
2178 Ok((ok, None))
2179 }
2180
2181 _ => {
2182 if let Err(e) = haystack.seek(SeekFrom::Start(offset)) {
2183 debug!("source={source} line={line} failed to seek in haystack: {e}");
2184 return Ok((false, None));
2185 }
2186
2187 let mut trace_msg = None;
2188
2189 if enabled!(Level::DEBUG) {
2190 trace_msg = Some(vec![format!(
2191 "source={source} line={line} depth={} stream_offset={:#x}",
2192 self.depth,
2193 haystack.lazy_stream_position()
2194 )])
2195 }
2196
2197 if let Ok(opt_test_value) = self
2201 .test
2202 .read_test_value(haystack, switch_endianness)
2203 .inspect_err(|e| {
2204 debug!("source={source} line={line} error while reading test value @{offset}: {e}",)
2205 })
2206 {
2207 if let Some(v) = trace_msg
2208 .as_mut() { v.push(format!("test={:?}", self.test)) }
2209
2210 let match_res =
2211 opt_test_value.and_then(|tv| self.test.match_value(&tv, stream_kind));
2212
2213 if let Some(v) = trace_msg.as_mut() { v.push(format!(
2214 "message=\"{}\" match={}",
2215 self.message
2216 .as_ref()
2217 .map(|fs| fs.to_string_lossy())
2218 .unwrap_or_default(),
2219 match_res.is_some()
2220 )) }
2221
2222 if enabled!(Level::DEBUG) && !enabled!(Level::TRACE) && match_res.is_some() {
2224 if let Some(m) = trace_msg{
2225 debug!("{}", m.join(" "));
2226 }
2227 } else if enabled!(Level::TRACE)
2228 && let Some(m) = trace_msg{
2229 trace!("{}", m.join(" "));
2230 }
2231
2232 if let Some(mr) = match_res {
2233 state.set_continuation_level(self.continuation_level());
2234 return Ok((true, Some(mr)));
2235 }
2236 }
2237
2238 Ok((false, None))
2239 }
2240 }
2241 }
2242
2243 #[inline(always)]
2244 fn continuation_level(&self) -> ContinuationLevel {
2245 ContinuationLevel(self.depth)
2246 }
2247}
2248
2249#[derive(Debug, Clone)]
2250struct Use {
2251 line: usize,
2252 depth: u8,
2253 start_offset: Offset,
2254 rule_name: String,
2255 switch_endianness: bool,
2256 message: Option<Message>,
2257}
2258
2259#[derive(Debug, Clone, Serialize, Deserialize)]
2260struct StrengthMod {
2261 op: Op,
2262 by: u8,
2263}
2264
2265impl StrengthMod {
2266 #[inline(always)]
2267 fn apply(&self, strength: u64) -> u64 {
2268 let by = self.by as u64;
2269 debug!("applying strength modifier: {strength} {} {}", self.op, by);
2270 match self.op {
2271 Op::Mul => strength.saturating_mul(by),
2272 Op::Add => strength.saturating_add(by),
2273 Op::Sub => strength.saturating_sub(by),
2274 Op::Div => {
2275 if by > 0 {
2276 strength.saturating_div(by)
2277 } else {
2278 strength
2279 }
2280 }
2281 Op::Mod => strength % by,
2282 Op::And => strength & by,
2283 Op::Xor | Op::Or => {
2286 debug_panic!("unsupported strength operator");
2287 strength
2288 }
2289 }
2290 }
2291}
2292
2293#[derive(Debug, Clone)]
2294enum Flag {
2295 Mime(String),
2296 Ext(HashSet<String>),
2297 Strength(StrengthMod),
2298 Apple(String),
2299}
2300
2301#[derive(Debug, Clone)]
2302struct Name {
2303 line: usize,
2304 name: String,
2305 message: Option<Message>,
2306}
2307
2308#[derive(Debug, Clone)]
2309enum Entry<'span> {
2310 Match(Span<'span>, Match),
2311 Flag(Span<'span>, Flag),
2312}
2313
2314#[derive(Debug, Clone, Serialize, Deserialize)]
2315struct EntryNode {
2316 root: bool,
2317 entry: Match,
2318 children: Vec<EntryNode>,
2319 mimetype: Option<String>,
2320 apple: Option<String>,
2321 strength_mod: Option<StrengthMod>,
2322 exts: HashSet<String>,
2323}
2324
2325impl EntryNode {
2326 fn update_exts_rec(
2327 &self,
2328 exts: &mut HashSet<String>,
2329 deps: &HashMap<String, DependencyRule>,
2330 marked: &mut HashSet<String>,
2331 ) -> Result<(), ()> {
2332 for ext in self.exts.iter() {
2333 if !exts.contains(ext) {
2334 exts.insert(ext.clone());
2335 }
2336 }
2337
2338 for c in self.children.iter() {
2339 if let Test::Use(_, ref name) = c.entry.test {
2340 if marked.contains(name) {
2341 continue;
2342 }
2343 if let Some(r) = deps.get(name) {
2344 marked.insert(name.clone());
2345 exts.extend(r.rule.fetch_all_extensions(deps, marked)?);
2346 } else {
2347 return Err(());
2348 }
2349 } else {
2350 c.update_exts_rec(exts, deps, marked)?;
2351 }
2352 }
2353
2354 Ok(())
2355 }
2356
2357 fn update_score_rec(
2358 &self,
2359 depth: usize,
2360 score: &mut u64,
2361 deps: &HashMap<String, DependencyRule>,
2362 marked: &mut HashSet<String>,
2363 ) {
2364 if depth == 3 {
2365 return;
2366 }
2367
2368 *score += self
2369 .children
2370 .iter()
2371 .map(|e| e.entry.test_strength)
2372 .min()
2373 .unwrap_or_default();
2374
2375 for c in self.children.iter() {
2376 if let Test::Use(_, ref name) = c.entry.test {
2377 if marked.contains(name) {
2378 continue;
2379 }
2380
2381 if let Some(r) = deps.get(name) {
2382 marked.insert(name.clone());
2383 *score += r.rule.compute_score(depth, deps, marked);
2384 }
2385 }
2386 c.update_score_rec(depth + 1, score, deps, marked);
2387 }
2388 }
2389
2390 #[inline]
2391 #[allow(clippy::too_many_arguments)]
2392 fn matches<'r, R: Read + Seek>(
2393 &'r self,
2394 opt_source: Option<&str>,
2395 magic: &mut Magic<'r>,
2396 state: &mut MatchState,
2397 stream_kind: StreamKind,
2398 buf_base_offset: Option<u64>,
2399 rule_base_offset: Option<u64>,
2400 last_level_offset: Option<u64>,
2401 haystack: &mut LazyCache<R>,
2402 db: &'r MagicDb,
2403 switch_endianness: bool,
2404 depth: usize,
2405 ) -> Result<(), Error> {
2406 let (ok, opt_match_res) = self.entry.matches(
2407 opt_source,
2408 magic,
2409 stream_kind,
2410 state,
2411 buf_base_offset,
2412 rule_base_offset,
2413 last_level_offset,
2414 haystack,
2415 switch_endianness,
2416 db,
2417 depth,
2418 )?;
2419
2420 let source = opt_source.unwrap_or("unknown");
2421 let line = self.entry.line;
2422
2423 if ok {
2424 if let Some(msg) = self.entry.message.as_ref()
2426 && let Ok(msg) = msg.format_with(opt_match_res.as_ref()).inspect_err(|e| {
2427 debug!("source={source} line={line} failed to format message: {e}")
2428 })
2429 {
2430 magic.push_message(msg);
2431 }
2432
2433 if let Some(mr) = opt_match_res {
2435 match &self.entry.test {
2436 Test::String(t) => {
2437 if t.has_length_mod() {
2438 let o = mr.end_offset();
2439 haystack.seek(SeekFrom::Start(o))?;
2440 }
2441 }
2442 Test::Search(t) => {
2443 if t.re_mods.contains(ReMod::StartOffsetUpdate) {
2444 let o = mr.start_offset();
2445 haystack.seek(SeekFrom::Start(o))?;
2446 } else {
2447 let o = mr.end_offset();
2448 haystack.seek(SeekFrom::Start(o))?;
2449 }
2450 }
2451
2452 Test::Regex(t) => {
2453 if t.mods.contains(ReMod::StartOffsetUpdate) {
2454 let o = mr.start_offset();
2455 haystack.seek(SeekFrom::Start(o))?;
2456 } else {
2457 let o = mr.end_offset();
2458 haystack.seek(SeekFrom::Start(o))?;
2459 }
2460 }
2461 _ => {}
2463 }
2464 }
2465
2466 if let Some(mimetype) = self.mimetype.as_ref() {
2467 magic.set_mime_type(Cow::Borrowed(mimetype));
2468 }
2469
2470 if let Some(apple_ty) = self.apple.as_ref() {
2471 magic.set_creator_code(Cow::Borrowed(apple_ty));
2472 }
2473
2474 if !self.exts.is_empty() {
2475 magic.insert_extensions(self.exts.iter().map(|s| s.as_str()));
2476 }
2477
2478 let mut strength = self.entry.test_strength;
2482
2483 let continuation_level = self.entry.continuation_level().0 as u64;
2484 if self.entry.message.is_none() && continuation_level < 3 {
2485 strength = strength.saturating_add(continuation_level);
2486 }
2487
2488 if let Some(sm) = self.strength_mod.as_ref() {
2489 strength = sm.apply(strength);
2490 }
2491
2492 if self.entry.message.is_none() {
2494 strength += 1
2495 }
2496
2497 magic.update_strength(strength);
2498
2499 let end_upper_level = haystack.lazy_stream_position();
2500
2501 let rule_base_offset = if self.root {
2509 match self.entry.offset {
2510 Offset::Direct(DirOffset::End(o)) => {
2511 Some(haystack.offset_from_start(SeekFrom::End(o)))
2512 }
2513 _ => rule_base_offset,
2514 }
2515 } else {
2516 rule_base_offset
2517 };
2518
2519 for e in self.children.iter() {
2520 e.matches(
2521 opt_source,
2522 magic,
2523 state,
2524 stream_kind,
2525 buf_base_offset,
2526 rule_base_offset,
2527 Some(end_upper_level),
2528 haystack,
2529 db,
2530 switch_endianness,
2531 depth,
2532 )?
2533 }
2534 }
2535
2536 Ok(())
2537 }
2538}
2539
2540#[derive(Debug, Clone, Serialize, Deserialize)]
2542pub struct MagicRule {
2543 id: usize,
2544 source: Option<String>,
2545 entries: EntryNode,
2546 extensions: HashSet<String>,
2547 score: u64,
2549 finalized: bool,
2550}
2551
2552impl MagicRule {
2553 #[inline(always)]
2554 fn set_id(&mut self, id: usize) {
2555 self.id = id
2556 }
2557
2558 fn fetch_all_extensions(
2562 &self,
2563 deps: &HashMap<String, DependencyRule>,
2564 marked: &mut HashSet<String>,
2565 ) -> Result<HashSet<String>, ()> {
2566 let mut exts = HashSet::new();
2567 self.entries.update_exts_rec(&mut exts, deps, marked)?;
2568 Ok(exts)
2569 }
2570
2571 fn compute_score(
2574 &self,
2575 depth: usize,
2576 deps: &HashMap<String, DependencyRule>,
2577 marked: &mut HashSet<String>,
2578 ) -> u64 {
2579 let mut score = 0;
2580 score += self.entries.entry.test_strength;
2581 self.entries
2582 .update_score_rec(depth, &mut score, deps, marked);
2583 score
2584 }
2585
2586 fn try_finalize(&mut self, deps: &HashMap<String, DependencyRule>) {
2589 if self.finalized {
2590 return;
2591 }
2592
2593 let Ok(exts) = self.fetch_all_extensions(deps, &mut HashSet::new()) else {
2594 return;
2595 };
2596
2597 self.extensions.extend(exts);
2598
2599 self.score = self.compute_score(0, deps, &mut HashSet::new());
2603 self.finalized = true
2604 }
2605
2606 #[inline]
2607 fn magic_entrypoint<'r, R: Read + Seek>(
2608 &'r self,
2609 magic: &mut Magic<'r>,
2610 stream_kind: StreamKind,
2611 haystack: &mut LazyCache<R>,
2612 db: &'r MagicDb,
2613 switch_endianness: bool,
2614 depth: usize,
2615 ) -> Result<(), Error> {
2616 self.entries.matches(
2617 self.source.as_deref(),
2618 magic,
2619 &mut MatchState::empty(),
2620 stream_kind,
2621 None,
2622 None,
2623 None,
2624 haystack,
2625 db,
2626 switch_endianness,
2627 depth,
2628 )
2629 }
2630
2631 #[inline]
2632 #[allow(clippy::too_many_arguments)]
2633 fn magic<'r, R: Read + Seek>(
2634 &'r self,
2635 magic: &mut Magic<'r>,
2636 stream_kind: StreamKind,
2637 buf_base_offset: Option<u64>,
2638 rule_base_offset: Option<u64>,
2639 haystack: &mut LazyCache<R>,
2640 db: &'r MagicDb,
2641 switch_endianness: bool,
2642 depth: usize,
2643 ) -> Result<(), Error> {
2644 self.entries.matches(
2645 self.source.as_deref(),
2646 magic,
2647 &mut MatchState::empty(),
2648 stream_kind,
2649 buf_base_offset,
2650 rule_base_offset,
2651 None,
2652 haystack,
2653 db,
2654 switch_endianness,
2655 depth,
2656 )
2657 }
2658
2659 pub fn is_text(&self) -> bool {
2665 self.entries.entry.test.is_text()
2666 && self.entries.children.iter().all(|e| e.entry.test.is_text())
2667 }
2668
2669 #[inline(always)]
2675 pub fn score(&self) -> u64 {
2676 self.score
2677 }
2678
2679 #[inline(always)]
2685 pub fn source(&self) -> Option<&str> {
2686 self.source.as_deref()
2687 }
2688
2689 #[inline(always)]
2695 pub fn line(&self) -> usize {
2696 self.entries.entry.line
2697 }
2698
2699 #[inline(always)]
2705 pub fn extensions(&self) -> &HashSet<String> {
2706 &self.extensions
2707 }
2708}
2709
2710#[derive(Debug, Clone, Serialize, Deserialize)]
2711struct DependencyRule {
2712 name: String,
2713 rule: MagicRule,
2714}
2715
2716#[derive(Debug, Clone, Serialize, Deserialize)]
2722pub struct MagicSource {
2723 rules: Vec<MagicRule>,
2724 dependencies: HashMap<String, DependencyRule>,
2725}
2726
2727impl MagicSource {
2728 pub fn open<P: AsRef<Path>>(p: P) -> Result<Self, Error> {
2738 FileMagicParser::parse_file(p)
2739 }
2740}
2741
2742#[derive(Debug, Hash, PartialEq, Eq, Clone, Copy)]
2743struct ContinuationLevel(u8);
2744
2745#[derive(Debug, PartialEq, Eq, Clone, Copy)]
2747enum TextEncoding {
2748 Ascii,
2749 Utf8,
2750 Unknown,
2751}
2752
2753impl TextEncoding {
2754 const fn as_magic_str(&self) -> &'static str {
2755 match self {
2756 TextEncoding::Ascii => "ASCII",
2757 TextEncoding::Utf8 => "UTF-8",
2758 TextEncoding::Unknown => "Unknown",
2759 }
2760 }
2761}
2762
2763#[derive(Debug, PartialEq, Eq, Clone, Copy)]
2764enum StreamKind {
2765 Binary,
2766 Text(TextEncoding),
2767}
2768
2769impl StreamKind {
2770 const fn is_text(&self) -> bool {
2771 matches!(self, StreamKind::Text(_))
2772 }
2773}
2774
2775#[derive(Debug)]
2776struct MatchState {
2777 continuation_levels: [bool; 256],
2778}
2779
2780impl MatchState {
2781 #[inline(always)]
2782 fn empty() -> Self {
2783 MatchState {
2784 continuation_levels: [false; 256],
2785 }
2786 }
2787
2788 #[inline(always)]
2789 fn get_continuation_level(&mut self, level: &ContinuationLevel) -> bool {
2790 self.continuation_levels
2791 .get(level.0 as usize)
2792 .cloned()
2793 .unwrap_or_default()
2794 }
2795
2796 #[inline(always)]
2797 fn set_continuation_level(&mut self, level: ContinuationLevel) {
2798 if let Some(b) = self.continuation_levels.get_mut(level.0 as usize) {
2799 *b = true
2800 }
2801 }
2802
2803 #[inline(always)]
2804 fn clear_continuation_level(&mut self, level: &ContinuationLevel) {
2805 if let Some(b) = self.continuation_levels.get_mut(level.0 as usize) {
2806 *b = false;
2807 }
2808 }
2809}
2810
2811#[derive(Debug, Default)]
2813pub struct Magic<'m> {
2814 stream_kind: Option<StreamKind>,
2815 source: Option<Cow<'m, str>>,
2816 message: Vec<Cow<'m, str>>,
2817 mime_type: Option<Cow<'m, str>>,
2818 creator_code: Option<Cow<'m, str>>,
2819 strength: u64,
2820 exts: HashSet<Cow<'m, str>>,
2821 is_default: bool,
2822}
2823
2824impl<'m> Magic<'m> {
2825 #[inline(always)]
2826 fn set_source(&mut self, source: Option<&'m str>) {
2827 self.source = source.map(Cow::Borrowed);
2828 }
2829
2830 #[inline(always)]
2831 fn set_stream_kind(&mut self, stream_kind: StreamKind) {
2832 self.stream_kind = Some(stream_kind)
2833 }
2834
2835 #[inline(always)]
2836 fn reset(&mut self) {
2837 self.stream_kind = None;
2838 self.source = None;
2839 self.message.clear();
2840 self.mime_type = None;
2841 self.creator_code = None;
2842 self.strength = 0;
2843 self.exts.clear();
2844 self.is_default = false;
2845 }
2846
2847 #[inline]
2855 pub fn into_owned<'owned>(self) -> Magic<'owned> {
2856 Magic {
2857 stream_kind: self.stream_kind,
2858 source: self.source.map(|s| Cow::Owned(s.into_owned())),
2859 message: self
2860 .message
2861 .into_iter()
2862 .map(Cow::into_owned)
2863 .map(Cow::Owned)
2864 .collect(),
2865 mime_type: self.mime_type.map(|m| Cow::Owned(m.into_owned())),
2866 creator_code: self.creator_code.map(|m| Cow::Owned(m.into_owned())),
2867 strength: self.strength,
2868 exts: self
2869 .exts
2870 .into_iter()
2871 .map(|e| Cow::Owned(e.into_owned()))
2872 .collect(),
2873 is_default: self.is_default,
2874 }
2875 }
2876
2877 #[inline(always)]
2883 pub fn message(&self) -> String {
2884 let mut out = String::new();
2885 for (i, m) in self.message.iter().enumerate() {
2886 if let Some(s) = m.strip_prefix(r#"\b"#) {
2887 out.push_str(s);
2888 } else {
2889 if i > 0 {
2891 out.push(' ');
2892 }
2893 out.push_str(m);
2894 }
2895 }
2896 out
2897 }
2898
2899 #[inline]
2910 pub fn message_parts(&self) -> impl Iterator<Item = &str> {
2911 self.message.iter().map(|p| p.as_ref())
2912 }
2913
2914 #[inline(always)]
2915 fn update_strength(&mut self, value: u64) {
2916 self.strength = self.strength.saturating_add(value);
2917 debug!("updated strength = {:?}", self.strength)
2918 }
2919
2920 #[inline(always)]
2926 pub fn mime_type(&self) -> &str {
2927 self.mime_type.as_deref().unwrap_or(match self.stream_kind {
2928 Some(StreamKind::Text(_)) => DEFAULT_TEXT_MIMETYPE,
2929 Some(StreamKind::Binary) | None => DEFAULT_BIN_MIMETYPE,
2930 })
2931 }
2932
2933 #[inline(always)]
2934 fn push_message<'a: 'm>(&mut self, msg: Cow<'a, str>) {
2935 if !msg.is_empty() {
2936 debug!("pushing message: msg={msg} len={}", msg.len());
2937 self.message.push(msg);
2938 }
2939 }
2940
2941 #[inline(always)]
2942 fn set_mime_type<'a: 'm>(&mut self, mime: Cow<'a, str>) {
2943 if self.mime_type.is_none() {
2944 debug!("insert mime: {:?}", mime);
2945 self.mime_type = Some(mime)
2946 }
2947 }
2948
2949 #[inline(always)]
2950 fn set_creator_code<'a: 'm>(&mut self, apple_ty: Cow<'a, str>) {
2951 if self.creator_code.is_none() {
2952 debug!("insert apple type: {apple_ty:?}");
2953 self.creator_code = Some(apple_ty)
2954 }
2955 }
2956
2957 #[inline(always)]
2958 fn insert_extensions<'a: 'm, I: Iterator<Item = &'a str>>(&mut self, exts: I) {
2959 if self.exts.is_empty() {
2960 self.exts.extend(exts.filter_map(|e| {
2961 if e.is_empty() {
2962 None
2963 } else {
2964 Some(Cow::Borrowed(e))
2965 }
2966 }));
2967 }
2968 }
2969
2970 #[inline(always)]
2978 pub fn strength(&self) -> u64 {
2979 self.strength
2980 }
2981
2982 #[inline(always)]
2988 pub fn source(&self) -> Option<&str> {
2989 self.source.as_deref()
2990 }
2991
2992 #[inline(always)]
2998 pub fn creator_code(&self) -> Option<&str> {
2999 self.creator_code.as_deref()
3000 }
3001
3002 #[inline(always)]
3008 pub fn extensions(&self) -> &HashSet<Cow<'m, str>> {
3009 &self.exts
3010 }
3011
3012 #[inline(always)]
3018 pub fn is_default(&self) -> bool {
3019 self.is_default
3020 }
3021}
3022
3023#[derive(Debug, Default, Clone, Serialize, Deserialize)]
3025pub struct MagicDb {
3026 rule_id: usize,
3027 rules: Vec<MagicRule>,
3028 dependencies: HashMap<String, DependencyRule>,
3029}
3030
3031#[inline(always)]
3032fn is_likely_text(bytes: &[u8]) -> bool {
3034 if bytes.is_empty() {
3035 return false;
3036 }
3037
3038 let mut printable = 0f64;
3039 let mut high_bytes = 0f64; for byte in bytes.iter() {
3042 match byte {
3043 0x00 => return false,
3044 0x09 | 0x0A | 0x0D => printable += 1.0, 0x20..=0x7E => printable += 1.0, _ => high_bytes += 1.0,
3047 }
3048 }
3049
3050 let total = bytes.len() as f64;
3051 let printable_ratio = printable / total;
3052 let high_bytes_ratio = high_bytes / total;
3053
3054 printable_ratio > 0.85 && high_bytes_ratio < 0.20
3056}
3057
3058#[inline(always)]
3059fn guess_stream_kind<S: AsRef<[u8]>>(stream: S) -> StreamKind {
3060 let Ok(s) = str::from_utf8(stream.as_ref()) else {
3061 if is_likely_text(stream.as_ref()) {
3062 return StreamKind::Text(TextEncoding::Unknown);
3063 } else {
3064 return StreamKind::Binary;
3065 }
3066 };
3067
3068 let count = s.chars().count();
3069 let mut is_ascii = true;
3070
3071 for c in s.chars().take(count.saturating_sub(1)) {
3072 is_ascii &= c.is_ascii()
3073 }
3074
3075 if is_ascii {
3076 StreamKind::Text(TextEncoding::Ascii)
3077 } else {
3078 StreamKind::Text(TextEncoding::Utf8)
3079 }
3080}
3081
3082impl MagicDb {
3083 fn open_reader<R: Read + Seek>(f: R) -> Result<LazyCache<R>, Error> {
3084 Ok(LazyCache::<R>::from_read_seek(f)
3085 .and_then(|lc| lc.with_hot_cache(2 * FILE_BYTES_MAX))?)
3086 .map(|lc| lc.with_warm_cache(100 << 20))
3087 }
3088
3089 pub fn new() -> Self {
3095 Self::default()
3096 }
3097
3098 #[inline(always)]
3099 fn next_rule_id(&mut self) -> usize {
3100 let t = self.rule_id;
3101 self.rule_id += 1;
3102 t
3103 }
3104
3105 #[inline(always)]
3106 fn try_json<R: Read + Seek>(
3107 haystack: &mut LazyCache<R>,
3108 stream_kind: StreamKind,
3109 magic: &mut Magic,
3110 ) -> Result<bool, Error> {
3111 if matches!(stream_kind, StreamKind::Binary) {
3113 return Ok(false);
3114 }
3115
3116 let buf = haystack.read_range(0..FILE_BYTES_MAX as u64)?.trim_ascii();
3117
3118 let Some((start, end)) = find_json_boundaries(buf) else {
3119 return Ok(false);
3120 };
3121
3122 for c in buf[0..start].iter() {
3125 if !c.is_ascii_whitespace() {
3126 return Ok(false);
3127 }
3128 }
3129
3130 let mut is_ndjson = false;
3131
3132 trace!("maybe a json document");
3133 let ok = serde_json::from_slice::<serde_json::Value>(&buf[start..=end]).is_ok();
3134 if !ok {
3135 return Ok(false);
3136 }
3137
3138 if end + 1 < buf.len() {
3140 let buf = &buf[end + 1..];
3142 if let Some((second_start, second_end)) = find_json_boundaries(buf) {
3143 if memchr(b'\n', &buf[..second_start]).is_some() {
3145 trace!("might be ndjson");
3146 is_ndjson = serde_json::from_slice::<serde_json::Value>(
3147 &buf[second_start..=second_end],
3148 )
3149 .is_ok();
3150 }
3151 }
3152 }
3153
3154 if is_ndjson {
3155 magic.push_message(Cow::Borrowed("New Line Delimited"));
3156 magic.set_mime_type(Cow::Borrowed("application/x-ndjson"));
3157 magic.insert_extensions(["ndjson", "jsonl"].into_iter());
3158 } else {
3159 magic.set_mime_type(Cow::Borrowed("application/json"));
3160 magic.insert_extensions(["json"].into_iter());
3161 }
3162
3163 magic.push_message(Cow::Borrowed("JSON text data"));
3164 magic.set_source(Some(HARDCODED_SOURCE));
3165 magic.update_strength(HARDCODED_MAGIC_STRENGTH);
3166 Ok(true)
3167 }
3168
3169 #[inline(always)]
3170 fn try_csv<R: Read + Seek>(
3171 haystack: &mut LazyCache<R>,
3172 stream_kind: StreamKind,
3173 magic: &mut Magic,
3174 ) -> Result<bool, Error> {
3175 let StreamKind::Text(enc) = stream_kind else {
3177 return Ok(false);
3178 };
3179
3180 let buf = haystack.read_range(0..FILE_BYTES_MAX as u64)?;
3181 let mut reader = csv::Reader::from_reader(io::Cursor::new(buf));
3182 let mut records = reader.records();
3183
3184 let Some(Ok(first)) = records.next() else {
3185 return Ok(false);
3186 };
3187
3188 if first.len() <= 1 {
3192 return Ok(false);
3193 }
3194
3195 let mut n = 1;
3197 for i in records.take(9) {
3198 if let Ok(rec) = i {
3199 if first.len() != rec.len() {
3200 return Ok(false);
3201 }
3202 } else {
3203 return Ok(false);
3204 }
3205 n += 1;
3206 }
3207
3208 if n != 10 {
3210 return Ok(false);
3211 }
3212
3213 magic.set_mime_type(Cow::Borrowed("text/csv"));
3214 magic.push_message(Cow::Borrowed("CSV"));
3215 magic.push_message(Cow::Borrowed(enc.as_magic_str()));
3216 magic.push_message(Cow::Borrowed("text"));
3217 magic.insert_extensions(["csv"].into_iter());
3218 magic.set_source(Some(HARDCODED_SOURCE));
3219 magic.update_strength(HARDCODED_MAGIC_STRENGTH);
3220 Ok(true)
3221 }
3222
3223 #[inline(always)]
3224 fn try_tar<R: Read + Seek>(
3225 haystack: &mut LazyCache<R>,
3226 stream_kind: StreamKind,
3227 magic: &mut Magic,
3228 ) -> Result<bool, Error> {
3229 if !matches!(stream_kind, StreamKind::Binary) {
3231 return Ok(false);
3232 }
3233
3234 let buf = haystack.read_range(0..FILE_BYTES_MAX as u64)?;
3235 let mut ar = Archive::new(io::Cursor::new(buf));
3236
3237 let Ok(mut entries) = ar.entries() else {
3238 return Ok(false);
3239 };
3240
3241 let Some(Ok(first)) = entries.next() else {
3242 return Ok(false);
3243 };
3244
3245 let header = first.header();
3246
3247 if header.as_ustar().is_some() {
3248 magic.push_message(Cow::Borrowed("POSIX tar archive"));
3249 } else if header.as_gnu().is_some() {
3250 magic.push_message(Cow::Borrowed("POSIX tar archive (GNU)"));
3251 } else {
3252 magic.push_message(Cow::Borrowed("tar archive"));
3253 }
3254
3255 magic.set_mime_type(Cow::Borrowed("application/x-tar"));
3256 magic.set_source(Some(HARDCODED_SOURCE));
3257 magic.update_strength(HARDCODED_MAGIC_STRENGTH);
3258 magic.insert_extensions(["tar"].into_iter());
3259 Ok(true)
3260 }
3261
3262 #[inline(always)]
3263 fn try_hard_magic<R: Read + Seek>(
3264 haystack: &mut LazyCache<R>,
3265 stream_kind: StreamKind,
3266 magic: &mut Magic,
3267 ) -> Result<bool, Error> {
3268 Ok(Self::try_json(haystack, stream_kind, magic)?
3269 || Self::try_csv(haystack, stream_kind, magic)?
3270 || Self::try_tar(haystack, stream_kind, magic)?)
3271 }
3272
3273 #[inline(always)]
3274 fn magic_default<'m, R: Read + Seek>(
3275 haystack: &mut LazyCache<R>,
3276 stream_kind: StreamKind,
3277 magic: &mut Magic<'m>,
3278 ) -> Result<(), Error> {
3279 let buf = haystack.read_range(0..FILE_BYTES_MAX as u64)?;
3280
3281 magic.set_source(Some(HARDCODED_SOURCE));
3282 magic.set_stream_kind(stream_kind);
3283 magic.is_default = true;
3284
3285 if buf.is_empty() {
3286 magic.push_message(Cow::Borrowed("empty"));
3287 magic.set_mime_type(Cow::Borrowed(DEFAULT_BIN_MIMETYPE));
3288 return Ok(());
3289 }
3290
3291 match stream_kind {
3292 StreamKind::Binary => {
3293 magic.push_message(Cow::Borrowed("data"));
3294 }
3295 StreamKind::Text(e) => {
3296 magic.push_message(Cow::Borrowed(e.as_magic_str()));
3297 magic.push_message(Cow::Borrowed("text"));
3298 }
3299 }
3300
3301 Ok(())
3302 }
3303
3304 pub fn load(&mut self, mf: MagicSource) -> Result<&mut Self, Error> {
3314 for rule in mf.rules.into_iter() {
3315 let mut rule = rule;
3316 rule.set_id(self.next_rule_id());
3317
3318 self.rules.push(rule);
3319 }
3320
3321 self.dependencies.extend(mf.dependencies);
3322 self.prepare();
3323 Ok(self)
3324 }
3325
3326 pub fn rules(&self) -> &[MagicRule] {
3332 &self.rules
3333 }
3334
3335 #[inline]
3336 fn first_magic_with_stream_kind<R: Read + Seek>(
3337 &self,
3338 haystack: &mut LazyCache<R>,
3339 stream_kind: StreamKind,
3340 extension: Option<&str>,
3341 ) -> Result<Magic<'_>, Error> {
3342 let mut magic = Magic::default();
3344
3345 if Self::try_hard_magic(haystack, stream_kind, &mut magic)? {
3346 return Ok(magic);
3347 }
3348
3349 let mut marked = vec![false; self.rules.len()];
3350
3351 macro_rules! do_magic {
3352 ($rule: expr) => {{
3353 $rule.magic_entrypoint(&mut magic, stream_kind, haystack, &self, false, 0)?;
3354
3355 if !magic.message.is_empty() {
3356 magic.set_stream_kind(stream_kind);
3357 magic.set_source($rule.source.as_deref());
3358 return Ok(magic);
3359 }
3360
3361 magic.reset();
3362 }};
3363 }
3364
3365 if let Some(ext) = extension.map(|e| e.to_lowercase())
3366 && !ext.is_empty()
3367 {
3368 for rule in self.rules.iter().filter(|r| r.extensions.contains(&ext)) {
3369 do_magic!(rule);
3370 if let Some(f) = marked.get_mut(rule.id) {
3371 *f = true
3372 }
3373 }
3374 }
3375
3376 for rule in self
3377 .rules
3378 .iter()
3379 .filter(|r| !*marked.get(r.id).unwrap_or(&false))
3381 {
3382 do_magic!(rule)
3383 }
3384
3385 Self::magic_default(haystack, stream_kind, &mut magic)?;
3386
3387 Ok(magic)
3388 }
3389
3390 pub fn first_magic<R: Read + Seek>(
3404 &self,
3405 r: &mut R,
3406 extension: Option<&str>,
3407 ) -> Result<Magic<'_>, Error> {
3408 let mut haystack = Self::open_reader(r)?;
3409 let stream_kind = guess_stream_kind(haystack.read_range(0..FILE_BYTES_MAX as u64)?);
3410 self.first_magic_with_stream_kind(&mut haystack, stream_kind, extension)
3411 }
3412
3413 #[inline(always)]
3414 fn all_magics_sort_with_stream_kind<R: Read + Seek>(
3415 &self,
3416 haystack: &mut LazyCache<R>,
3417 stream_kind: StreamKind,
3418 ) -> Result<Vec<Magic<'_>>, Error> {
3419 let mut out = Vec::new();
3420
3421 let mut magic = Magic::default();
3422
3423 if Self::try_hard_magic(haystack, stream_kind, &mut magic)? {
3424 out.push(magic);
3425 magic = Magic::default();
3426 }
3427
3428 for rule in self.rules.iter() {
3429 rule.magic_entrypoint(&mut magic, stream_kind, haystack, self, false, 0)?;
3430
3431 if !magic.message.is_empty() {
3433 magic.set_stream_kind(stream_kind);
3434 magic.set_source(rule.source.as_deref());
3435 out.push(magic);
3436 magic = Magic::default();
3437 }
3438
3439 magic.reset();
3440 }
3441
3442 Self::magic_default(haystack, stream_kind, &mut magic)?;
3443 out.push(magic);
3444
3445 out.sort_by_key(|b| std::cmp::Reverse(b.strength()));
3446
3447 Ok(out)
3448 }
3449
3450 pub fn all_magics<R: Read + Seek>(&self, r: &mut R) -> Result<Vec<Magic<'_>>, Error> {
3460 let mut haystack = Self::open_reader(r)?;
3461 let stream_kind = guess_stream_kind(haystack.read_range(0..FILE_BYTES_MAX as u64)?);
3462 self.all_magics_sort_with_stream_kind(&mut haystack, stream_kind)
3463 }
3464
3465 #[inline(always)]
3466 fn best_magic_with_stream_kind<R: Read + Seek>(
3467 &self,
3468 haystack: &mut LazyCache<R>,
3469 stream_kind: StreamKind,
3470 ) -> Result<Magic<'_>, Error> {
3471 let magics = self.all_magics_sort_with_stream_kind(haystack, stream_kind)?;
3472
3473 return Ok(magics
3475 .into_iter()
3476 .next()
3477 .expect("magics must at least contain default"));
3478 }
3479
3480 pub fn best_magic<R: Read + Seek>(&self, r: &mut R) -> Result<Magic<'_>, Error> {
3490 let mut haystack = Self::open_reader(r)?;
3491 let stream_kind = guess_stream_kind(haystack.read_range(0..FILE_BYTES_MAX as u64)?);
3492 self.best_magic_with_stream_kind(&mut haystack, stream_kind)
3493 }
3494
3495 pub fn serialize<W: Write>(self, w: &mut W) -> Result<(), Error> {
3501 let mut encoder = GzEncoder::new(w, Compression::best());
3502
3503 bincode::serde::encode_into_std_write(&self, &mut encoder, bincode::config::standard())?;
3504 encoder.finish()?;
3505 Ok(())
3506 }
3507
3508 pub fn deserialize<R: Read>(r: &mut R) -> Result<Self, Error> {
3518 let mut buf = vec![];
3519 let mut gz = GzDecoder::new(r);
3520 gz.read_to_end(&mut buf).map_err(|e| {
3521 bincode::error::DecodeError::OtherString(format!("failed to read: {e}"))
3522 })?;
3523 let (sdb, _): (MagicDb, usize) =
3524 bincode::serde::decode_from_slice(&buf, bincode::config::standard())?;
3525 Ok(sdb)
3526 }
3527
3528 #[inline(always)]
3529 fn prepare(&mut self) {
3530 self.rules
3531 .iter_mut()
3532 .for_each(|r| r.try_finalize(&self.dependencies));
3533
3534 self.rules.sort_by_key(|r| (r.is_text(), -(r.score as i64)));
3536 }
3537}
3538
3539#[cfg(test)]
3540mod tests {
3541 use std::io::Cursor;
3542
3543 use regex::bytes::Regex;
3544
3545 use crate::utils::unix_local_time_to_string;
3546
3547 use super::*;
3548
3549 macro_rules! lazy_cache {
3550 ($l: literal) => {
3551 LazyCache::from_read_seek(Cursor::new($l)).unwrap()
3552 };
3553 }
3554
3555 fn first_magic(
3556 rule: &str,
3557 content: &[u8],
3558 stream_kind: StreamKind,
3559 ) -> Result<Magic<'static>, Error> {
3560 let mut md = MagicDb::new();
3561 md.load(
3562 FileMagicParser::parse_str(rule, None)
3563 .inspect_err(|e| eprintln!("{e}"))
3564 .unwrap(),
3565 )
3566 .unwrap();
3567 let mut reader = LazyCache::from_read_seek(Cursor::new(content)).unwrap();
3568 let v = md.best_magic_with_stream_kind(&mut reader, stream_kind)?;
3569 Ok(v.into_owned())
3570 }
3571
3572 #[allow(unused_macros)]
3574 macro_rules! enable_trace {
3575 () => {
3576 tracing_subscriber::fmt()
3577 .with_max_level(tracing_subscriber::filter::LevelFilter::TRACE)
3578 .try_init();
3579 };
3580 }
3581
3582 macro_rules! parse_assert {
3583 ($rule:literal) => {
3584 FileMagicParser::parse_str($rule, None)
3585 .inspect_err(|e| eprintln!("{e}"))
3586 .unwrap();
3587 };
3588 }
3589
3590 macro_rules! assert_magic_match_bin {
3591 ($rule: literal, $content:literal) => {{ first_magic($rule, $content, StreamKind::Binary).unwrap() }};
3592 ($rule: literal, $content:literal, $message:expr) => {{
3593 assert_eq!(
3594 first_magic($rule, $content, StreamKind::Binary)
3595 .unwrap()
3596 .message(),
3597 $message
3598 );
3599 }};
3600 }
3601
3602 macro_rules! assert_magic_match_text {
3603 ($rule: literal, $content:literal) => {{ first_magic($rule, $content, StreamKind::Text(TextEncoding::Utf8)).unwrap() }};
3604 ($rule: literal, $content:literal, $message:expr) => {{
3605 assert_eq!(
3606 first_magic($rule, $content, StreamKind::Text(TextEncoding::Utf8))
3607 .unwrap()
3608 .message(),
3609 $message
3610 );
3611 }};
3612 }
3613
3614 macro_rules! assert_magic_not_match_text {
3615 ($rule: literal, $content:literal) => {{
3616 assert!(
3617 first_magic($rule, $content, StreamKind::Text(TextEncoding::Utf8))
3618 .unwrap()
3619 .is_default()
3620 );
3621 }};
3622 }
3623
3624 macro_rules! assert_magic_not_match_bin {
3625 ($rule: literal, $content:literal) => {{
3626 assert!(
3627 first_magic($rule, $content, StreamKind::Binary)
3628 .unwrap()
3629 .is_default()
3630 );
3631 }};
3632 }
3633
3634 #[test]
3635 fn test_regex() {
3636 assert_magic_match_text!(
3637 r#"
36380 regex/1024 \^#![[:space:]]*/usr/bin/env[[:space:]]+
3639!:mime text/x-shellscript
3640>&0 regex/64 .*($|\\b) %s shell script text executable
3641 "#,
3642 br#"#!/usr/bin/env bash
3643 echo hello world"#,
3644 "bash shell script text executable"
3646 );
3647
3648 let re = Regex::new(r"(?-u)\x42\x82").unwrap();
3649 assert!(re.is_match(b"\x42\x82"));
3650
3651 assert_magic_match_bin!(
3652 r#"0 regex \x42\x82 binary regex match"#,
3653 b"\x00\x00\x00\x00\x00\x00\x42\x82"
3654 );
3655
3656 assert_magic_match_bin!(
3658 r#"
3659 0 regex \x42\x82
3660 >&0 string \xde\xad\xbe\xef it works
3661 "#,
3662 b"\x00\x00\x00\x00\x00\x00\x42\x82\xde\xad\xbe\xef"
3663 );
3664
3665 assert_magic_match_bin!(
3666 r#"
3667 0 regex/s \x42\x82
3668 >&0 string \x42\x82\xde\xad\xbe\xef it works
3669 "#,
3670 b"\x00\x00\x00\x00\x00\x00\x42\x82\xde\xad\xbe\xef"
3671 );
3672
3673 assert_magic_match_text!(
3675 r#"
36760 regex/1024 \^HelloWorld$ HelloWorld String"#,
3677 br#"
3678// this is a comment after an empty line
3679HelloWorld
3680 "#
3681 );
3682 }
3683
3684 #[test]
3685 fn test_string_with_mods() {
3686 assert_magic_match_text!(
3687 r#"0 string/w #!\ \ \ /usr/bin/env\ bash BASH
3688 "#,
3689 b"#! /usr/bin/env bash i
3690 echo hello world"
3691 );
3692
3693 assert_magic_match_text!(
3695 r#"0 string/C HelloWorld it works
3696 "#,
3697 b"helloworld"
3698 );
3699
3700 assert_magic_not_match_text!(
3701 r#"0 string/C HelloWorld it works
3702 "#,
3703 b"hELLOwORLD"
3704 );
3705
3706 assert_magic_match_text!(
3708 r#"0 string/c HelloWorld it works
3709 "#,
3710 b"HELLOWORLD"
3711 );
3712
3713 assert_magic_not_match_text!(
3714 r#"0 string/c HelloWorld it works
3715 "#,
3716 b"helloworld"
3717 );
3718
3719 assert_magic_match_text!(
3721 r#"0 string/f #!/usr/bin/env\ bash BASH
3722 "#,
3723 b"#!/usr/bin/env bash"
3724 );
3725
3726 assert_magic_not_match_text!(
3727 r#"0 string/f #!/usr/bin/python PYTHON"#,
3728 b"#!/usr/bin/pythonic"
3729 );
3730
3731 assert_magic_match_text!(
3733 r#"0 string/W #!/usr/bin/env\ python PYTHON"#,
3734 b"#!/usr/bin/env python"
3735 );
3736
3737 assert_magic_not_match_text!(
3738 r#"0 string/W #!/usr/bin/env\ \ python PYTHON"#,
3739 b"#!/usr/bin/env python"
3740 );
3741 }
3742
3743 #[test]
3744 fn test_search_with_mods() {
3745 assert_magic_match_text!(
3746 r#"0 search/1/fwt #!\ /usr/bin/luatex LuaTex script text executable"#,
3747 b"#! /usr/bin/luatex "
3748 );
3749
3750 assert_magic_match_text!(
3752 r#"
3753 0 search/s /usr/bin/env
3754 >&0 string /usr/bin/env it works
3755 "#,
3756 b"#!/usr/bin/env python"
3757 );
3758
3759 assert_magic_not_match_text!(
3760 r#"
3761 0 search /usr/bin/env
3762 >&0 string /usr/bin/env it works
3763 "#,
3764 b"#!/usr/bin/env python"
3765 );
3766 }
3767
3768 #[test]
3769 fn test_pstring() {
3770 assert_magic_match_bin!(r#"0 pstring Toast it works"#, b"\x05Toast");
3771
3772 assert_magic_match_bin!(r#"0 pstring Toast %s"#, b"\x05Toast", "Toast");
3773
3774 assert_magic_not_match_bin!(r#"0 pstring Toast Doesn't work"#, b"\x07Toaster");
3775
3776 assert_magic_match_bin!(r#"0 pstring/H Toast it works"#, b"\x00\x05Toast");
3778
3779 assert_magic_match_bin!(r#"0 pstring/HJ Toast it works"#, b"\x00\x07Toast");
3780
3781 assert_magic_match_bin!(r#"0 pstring/HJ Toast %s"#, b"\x00\x07Toast", "Toast");
3782
3783 assert_magic_match_bin!(r#"0 pstring/h Toast it works"#, b"\x05\x00Toast");
3784
3785 assert_magic_match_bin!(r#"0 pstring/hJ Toast it works"#, b"\x07\x00Toast");
3786
3787 assert_magic_match_bin!(r#"0 pstring/L Toast it works"#, b"\x00\x00\x00\x05Toast");
3788
3789 assert_magic_match_bin!(r#"0 pstring/LJ Toast it works"#, b"\x00\x00\x00\x09Toast");
3790
3791 assert_magic_match_bin!(r#"0 pstring/l Toast it works"#, b"\x05\x00\x00\x00Toast");
3792
3793 assert_magic_match_bin!(r#"0 pstring/lJ Toast it works"#, b"\x09\x00\x00\x00Toast");
3794 }
3795
3796 #[test]
3797 fn test_max_recursion() {
3798 let res = first_magic(
3799 r#"0 indirect x"#,
3800 b"#! /usr/bin/luatex ",
3801 StreamKind::Binary,
3802 );
3803 assert!(res.is_err());
3804 let _ = res.inspect_err(|e| {
3805 assert!(matches!(
3806 e.unwrap_localized(),
3807 Error::MaximumRecursion(MAX_RECURSION)
3808 ))
3809 });
3810 }
3811
3812 #[test]
3813 fn test_string_ops() {
3814 assert_magic_match_text!("0 string/b MZ MZ File", b"MZ\0");
3815 assert_magic_match_text!("0 string !MZ Not MZ File", b"AZ\0");
3816 assert_magic_match_text!("0 string >\0 Any String", b"A\0");
3817 assert_magic_match_text!("0 string >Test Any String", b"Test 1\0");
3818 assert_magic_match_text!("0 string <Test Any String", b"\0");
3819 assert_magic_not_match_text!("0 string >Test Any String", b"\0");
3820 }
3821
3822 #[test]
3823 fn test_lestring16() {
3824 assert_magic_match_bin!(
3825 "0 lestring16 abcd Little-endian UTF-16 string",
3826 b"\x61\x00\x62\x00\x63\x00\x64\x00"
3827 );
3828 assert_magic_match_bin!(
3829 "0 lestring16 x %s",
3830 b"\x61\x00\x62\x00\x63\x00\x64\x00\x00",
3831 "abcd"
3832 );
3833 assert_magic_not_match_bin!(
3834 "0 lestring16 abcd Little-endian UTF-16 string",
3835 b"\x00\x61\x00\x62\x00\x63\x00\x64"
3836 );
3837 assert_magic_match_bin!(
3838 "4 lestring16 abcd Little-endian UTF-16 string",
3839 b"\x00\x00\x00\x00\x61\x00\x62\x00\x63\x00\x64\x00"
3840 );
3841 }
3842
3843 #[test]
3844 fn test_bestring16() {
3845 assert_magic_match_bin!(
3846 "0 bestring16 abcd Big-endian UTF-16 string",
3847 b"\x00\x61\x00\x62\x00\x63\x00\x64"
3848 );
3849 assert_magic_match_bin!(
3850 "0 bestring16 x %s",
3851 b"\x00\x61\x00\x62\x00\x63\x00\x64",
3852 "abcd"
3853 );
3854 assert_magic_not_match_bin!(
3855 "0 bestring16 abcd Big-endian UTF-16 string",
3856 b"\x61\x00\x62\x00\x63\x00\x64\x00"
3857 );
3858 assert_magic_match_bin!(
3859 "4 bestring16 abcd Big-endian UTF-16 string",
3860 b"\x00\x00\x00\x00\x00\x61\x00\x62\x00\x63\x00\x64"
3861 );
3862 }
3863
3864 #[test]
3865 fn test_offset_from_end() {
3866 assert_magic_match_bin!("-1 ubyte 0x42 last byte ok", b"\x00\x00\x42");
3867 assert_magic_match_bin!("-2 ubyte 0x41 last byte ok", b"\x00\x41\x00");
3868 }
3869
3870 #[test]
3871 fn test_relative_offset() {
3872 assert_magic_match_bin!(
3873 "
3874 0 ubyte 0x42
3875 >&0 ubyte 0x00
3876 >>&0 ubyte 0x41 third byte ok
3877 ",
3878 b"\x42\x00\x41\x00"
3879 );
3880 }
3881
3882 #[test]
3883 fn test_indirect_offset() {
3884 assert_magic_match_bin!("(0.l) ubyte 0x42 it works", b"\x04\x00\x00\x00\x42");
3885 assert_magic_match_bin!("(0.l+3) ubyte 0x42 it works", b"\x01\x00\x00\x00\x42");
3887 assert_magic_match_bin!(
3889 "(0.l+(4)) ubyte 0x42 it works",
3890 b"\x04\x00\x00\x00\x04\x00\x00\x00\x42"
3891 );
3892 }
3893
3894 #[test]
3895 fn test_use_with_message() {
3896 assert_magic_match_bin!(
3897 r#"
38980 string MZ
3899>0 use mz first match
3900
39010 name mz then second match
3902>0 string MZ
3903"#,
3904 b"MZ\0",
3905 "first match then second match"
3906 );
3907 }
3908
3909 #[test]
3910 fn test_scalar_transform() {
3911 assert_magic_match_bin!("0 ubyte+1 0x1 add works", b"\x00");
3912 assert_magic_match_bin!("0 ubyte-1 0xfe sub works", b"\xff");
3913 assert_magic_match_bin!("0 ubyte%2 0 mod works", b"\x0a");
3914 assert_magic_match_bin!("0 ubyte&0x0f 0x0f bitand works", b"\xff");
3915 assert_magic_match_bin!("0 ubyte|0x0f 0xff bitor works", b"\xf0");
3916 assert_magic_match_bin!("0 ubyte^0x0f 0xf0 bitxor works", b"\xff");
3917
3918 FileMagicParser::parse_str("0 ubyte%0 mod by zero", None)
3919 .expect_err("expect div by zero error");
3920 FileMagicParser::parse_str("0 ubyte/0 div by zero", None)
3921 .expect_err("expect div by zero error");
3922 }
3923
3924 #[test]
3925 fn test_belong() {
3926 assert_magic_match_bin!("0 belong 0x12345678 Big-endian long", b"\x12\x34\x56\x78");
3928 assert_magic_not_match_bin!("0 belong 0x12345678 Big-endian long", b"\x78\x56\x34\x12");
3930 assert_magic_match_bin!(
3932 "4 belong 0x12345678 Big-endian long",
3933 b"\x00\x00\x00\x00\x12\x34\x56\x78"
3934 );
3935 assert_magic_match_bin!("0 belong <0x12345678 Big-endian long", b"\x12\x34\x56\x77");
3937 assert_magic_not_match_bin!("0 belong <0x12345678 Big-endian long", b"\x12\x34\x56\x78");
3938
3939 assert_magic_match_bin!("0 belong >0x12345678 Big-endian long", b"\x12\x34\x56\x79");
3941 assert_magic_not_match_bin!("0 belong >0x12345678 Big-endian long", b"\x12\x34\x56\x78");
3942
3943 assert_magic_match_bin!("0 belong &0x5678 Big-endian long", b"\x00\x00\x56\x78");
3945 assert_magic_not_match_bin!("0 belong &0x0000FFFF Big-endian long", b"\x12\x34\x56\x78");
3946
3947 assert_magic_match_bin!("0 belong ^0xFFFF0000 Big-endian long", b"\x00\x00\x56\x78");
3949 assert_magic_not_match_bin!("0 belong ^0xFFFF0000 Big-endian long", b"\x00\x01\x56\x78");
3950
3951 assert_magic_match_bin!("0 belong ~0x12345678 Big-endian long", b"\xed\xcb\xa9\x87");
3953 assert_magic_not_match_bin!("0 belong ~0x12345678 Big-endian long", b"\x12\x34\x56\x78");
3954
3955 assert_magic_match_bin!("0 belong x Big-endian long", b"\x12\x34\x56\x78");
3957 assert_magic_match_bin!("0 belong x Big-endian long", b"\x78\x56\x34\x12");
3958 }
3959
3960 #[test]
3961 fn test_parse_search() {
3962 parse_assert!("0 search test");
3963 parse_assert!("0 search/24/s test");
3964 parse_assert!("0 search/s/24 test");
3965 }
3966
3967 #[test]
3968 fn test_bedate() {
3969 assert_magic_match_bin!(
3970 "0 bedate 946684800 Unix date (Jan 1, 2000)",
3971 b"\x38\x6D\x43\x80"
3972 );
3973 assert_magic_not_match_bin!(
3974 "0 bedate 946684800 Unix date (Jan 1, 2000)",
3975 b"\x00\x00\x00\x00"
3976 );
3977 assert_magic_match_bin!(
3978 "4 bedate 946684800 %s",
3979 b"\x00\x00\x00\x00\x38\x6D\x43\x80",
3980 "2000-01-01 00:00:00"
3981 );
3982 }
3983 #[test]
3984 fn test_beldate() {
3985 assert_magic_match_bin!(
3986 "0 beldate 946684800 Local date (Jan 1, 2000)",
3987 b"\x38\x6D\x43\x80"
3988 );
3989 assert_magic_not_match_bin!(
3990 "0 beldate 946684800 Local date (Jan 1, 2000)",
3991 b"\x00\x00\x00\x00"
3992 );
3993
3994 assert_magic_match_bin!(
3995 "4 beldate 946684800 {}",
3996 b"\x00\x00\x00\x00\x38\x6D\x43\x80",
3997 unix_local_time_to_string(946684800)
3998 );
3999 }
4000
4001 #[test]
4002 fn test_beqdate() {
4003 assert_magic_match_bin!(
4004 "0 beqdate 946684800 Unix date (Jan 1, 2000)",
4005 b"\x00\x00\x00\x00\x38\x6D\x43\x80"
4006 );
4007
4008 assert_magic_not_match_bin!(
4009 "0 beqdate 946684800 Unix date (Jan 1, 2000)",
4010 b"\x00\x00\x00\x00\x00\x00\x00\x00"
4011 );
4012
4013 assert_magic_match_bin!(
4014 "0 beqdate 946684800 %s",
4015 b"\x00\x00\x00\x00\x38\x6D\x43\x80",
4016 "2000-01-01 00:00:00"
4017 );
4018 }
4019
4020 #[test]
4021 fn test_medate() {
4022 assert_magic_match_bin!(
4023 "0 medate 946684800 Unix date (Jan 1, 2000)",
4024 b"\x6D\x38\x80\x43"
4025 );
4026
4027 assert_magic_not_match_bin!(
4028 "0 medate 946684800 Unix date (Jan 1, 2000)",
4029 b"\x00\x00\x00\x00"
4030 );
4031
4032 assert_magic_match_bin!(
4033 "4 medate 946684800 %s",
4034 b"\x00\x00\x00\x00\x6D\x38\x80\x43",
4035 "2000-01-01 00:00:00"
4036 );
4037 }
4038
4039 #[test]
4040 fn test_meldate() {
4041 assert_magic_match_bin!(
4042 "0 meldate 946684800 Local date (Jan 1, 2000)",
4043 b"\x6D\x38\x80\x43"
4044 );
4045 assert_magic_not_match_bin!(
4046 "0 meldate 946684800 Local date (Jan 1, 2000)",
4047 b"\x00\x00\x00\x00"
4048 );
4049
4050 assert_magic_match_bin!(
4051 "4 meldate 946684800 %s",
4052 b"\x00\x00\x00\x00\x6D\x38\x80\x43",
4053 unix_local_time_to_string(946684800)
4054 );
4055 }
4056
4057 #[test]
4058 fn test_date() {
4059 assert_magic_match_bin!(
4060 "0 date 946684800 Local date (Jan 1, 2000)",
4061 b"\x80\x43\x6D\x38"
4062 );
4063 assert_magic_not_match_bin!(
4064 "0 date 946684800 Local date (Jan 1, 2000)",
4065 b"\x00\x00\x00\x00"
4066 );
4067 assert_magic_match_bin!(
4068 "4 date 946684800 {}",
4069 b"\x00\x00\x00\x00\x80\x43\x6D\x38",
4070 "2000-01-01 00:00:00"
4071 );
4072 }
4073
4074 #[test]
4075 fn test_leldate() {
4076 assert_magic_match_bin!(
4077 "0 leldate 946684800 Local date (Jan 1, 2000)",
4078 b"\x80\x43\x6D\x38"
4079 );
4080 assert_magic_not_match_bin!(
4081 "0 leldate 946684800 Local date (Jan 1, 2000)",
4082 b"\x00\x00\x00\x00"
4083 );
4084 assert_magic_match_bin!(
4085 "4 leldate 946684800 {}",
4086 b"\x00\x00\x00\x00\x80\x43\x6D\x38",
4087 unix_local_time_to_string(946684800)
4088 );
4089 }
4090
4091 #[test]
4092 fn test_leqdate() {
4093 assert_magic_match_bin!(
4094 "0 leqdate 1577836800 Unix date (Jan 1, 2020)",
4095 b"\x00\xe1\x0b\x5E\x00\x00\x00\x00"
4096 );
4097
4098 assert_magic_not_match_bin!(
4099 "0 leqdate 1577836800 Unix date (Jan 1, 2020)",
4100 b"\x00\x00\x00\x00\x00\x00\x00\x00"
4101 );
4102 assert_magic_match_bin!(
4103 "8 leqdate 1577836800 %s",
4104 b"\x00\x00\x00\x00\x00\x00\x00\x00\x00\xE1\x0B\x5E\x00\x00\x00\x00",
4105 "2020-01-01 00:00:00"
4106 );
4107 }
4108
4109 #[test]
4110 fn test_leqldate() {
4111 assert_magic_match_bin!(
4112 "0 leqldate 1577836800 Unix date (Jan 1, 2020)",
4113 b"\x00\xe1\x0b\x5E\x00\x00\x00\x00"
4114 );
4115
4116 assert_magic_not_match_bin!(
4117 "0 leqldate 1577836800 Unix date (Jan 1, 2020)",
4118 b"\x00\x00\x00\x00\x00\x00\x00\x00"
4119 );
4120 assert_magic_match_bin!(
4121 "8 leqldate 1577836800 %s",
4122 b"\x00\x00\x00\x00\x00\x00\x00\x00\x00\xE1\x0B\x5E\x00\x00\x00\x00",
4123 unix_local_time_to_string(1577836800)
4124 );
4125 }
4126
4127 #[test]
4128 fn test_melong() {
4129 assert_magic_match_bin!(
4131 "0 melong =0x12345678 Middle-endian long",
4132 b"\x34\x12\x78\x56"
4133 );
4134 assert_magic_not_match_bin!(
4135 "0 melong =0x12345678 Middle-endian long",
4136 b"\x00\x00\x00\x00"
4137 );
4138
4139 assert_magic_match_bin!(
4141 "0 melong <0x12345678 Middle-endian long",
4142 b"\x34\x12\x78\x55"
4143 ); assert_magic_not_match_bin!(
4145 "0 melong <0x12345678 Middle-endian long",
4146 b"\x34\x12\x78\x56"
4147 ); assert_magic_match_bin!(
4151 "0 melong >0x12345678 Middle-endian long",
4152 b"\x34\x12\x78\x57"
4153 ); assert_magic_not_match_bin!(
4155 "0 melong >0x12345678 Middle-endian long",
4156 b"\x34\x12\x78\x56"
4157 ); assert_magic_match_bin!("0 melong &0x5678 Middle-endian long", b"\xab\xcd\x78\x56"); assert_magic_not_match_bin!(
4162 "0 melong &0x0000FFFF Middle-endian long",
4163 b"\x34\x12\x78\x56"
4164 ); assert_magic_match_bin!(
4168 "0 melong ^0xFFFF0000 Middle-endian long",
4169 b"\x00\x00\x78\x56"
4170 ); assert_magic_not_match_bin!(
4172 "0 melong ^0xFFFF0000 Middle-endian long",
4173 b"\x00\x01\x78\x56"
4174 ); assert_magic_match_bin!(
4178 "0 melong ~0x12345678 Middle-endian long",
4179 b"\xCB\xED\x87\xA9"
4180 );
4181 assert_magic_not_match_bin!(
4182 "0 melong ~0x12345678 Middle-endian long",
4183 b"\x34\x12\x78\x56"
4184 ); assert_magic_match_bin!("0 melong x Middle-endian long", b"\x34\x12\x78\x56");
4188 assert_magic_match_bin!("0 melong x Middle-endian long", b"\x00\x00\x00\x00");
4189 }
4190
4191 #[test]
4192 fn test_uquad() {
4193 assert_magic_match_bin!(
4195 "0 uquad =0x123456789ABCDEF0 Unsigned quad",
4196 b"\xF0\xDE\xBC\x9A\x78\x56\x34\x12"
4197 );
4198 assert_magic_not_match_bin!(
4199 "0 uquad =0x123456789ABCDEF0 Unsigned quad",
4200 b"\x00\x00\x00\x00\x00\x00\x00\x00"
4201 );
4202
4203 assert_magic_match_bin!(
4205 "0 uquad <0x123456789ABCDEF0 Unsigned quad",
4206 b"\xF0\xDE\xBC\x9A\x78\x56\x34\x11"
4207 );
4208 assert_magic_not_match_bin!(
4209 "0 uquad <0x123456789ABCDEF0 Unsigned quad",
4210 b"\xF0\xDE\xBC\x9A\x78\x56\x34\x12"
4211 );
4212
4213 assert_magic_match_bin!(
4215 "0 uquad >0x123456789ABCDEF0 Unsigned quad",
4216 b"\xF0\xDE\xBC\x9A\x78\x56\x34\x13"
4217 );
4218 assert_magic_not_match_bin!(
4219 "0 uquad >0x123456789ABCDEF0 Unsigned quad",
4220 b"\xF0\xDE\xBC\x9A\x78\x56\x34\x12"
4221 );
4222
4223 assert_magic_match_bin!(
4225 "0 uquad &0xF0 Unsigned quad",
4226 b"\xF0\xDE\xBC\x9A\x78\x56\x34\x12"
4227 );
4228 assert_magic_not_match_bin!(
4229 "0 uquad &0xFF Unsigned quad",
4230 b"\xF0\xDE\xBC\x9A\x78\x56\x34\x12"
4231 );
4232
4233 assert_magic_match_bin!(
4235 "0 uquad ^0xFFFFFFFFFFFFFFFF Unsigned quad",
4236 b"\x00\x00\x00\x00\x00\x00\x00\x00"
4237 ); assert_magic_not_match_bin!(
4239 "0 uquad ^0xFFFFFFFFFFFFFFFF Unsigned quad",
4240 b"\xF0\xDE\xBC\x9A\x78\x56\x34\x12"
4241 ); assert_magic_match_bin!(
4245 "0 uquad ~0x123456789ABCDEF0 Unsigned quad",
4246 b"\x0F\x21\x43\x65\x87\xA9\xCB\xED"
4247 );
4248 assert_magic_not_match_bin!(
4249 "0 uquad ~0x123456789ABCDEF0 Unsigned quad",
4250 b"\xF0\xDE\xBC\x9A\x78\x56\x34\x12"
4251 ); assert_magic_match_bin!(
4255 "0 uquad x {:#x}",
4256 b"\xF0\xDE\xBC\x9A\x78\x56\x34\x12",
4257 "0x123456789abcdef0"
4258 );
4259 assert_magic_match_bin!(
4260 "0 uquad x Unsigned quad",
4261 b"\x00\x00\x00\x00\x00\x00\x00\x00"
4262 );
4263 }
4264
4265 #[test]
4266 fn test_guid() {
4267 assert_magic_match_bin!(
4268 "0 guid EC959539-6786-2D4E-8FDB-98814CE76C1E It works",
4269 b"\xEC\x95\x95\x39\x67\x86\x2D\x4E\x8F\xDB\x98\x81\x4C\xE7\x6C\x1E"
4270 );
4271
4272 assert_magic_not_match_bin!(
4273 "0 guid 399595EC-8667-4E2D-8FDB-98814CE76C1E It works",
4274 b"\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F"
4275 );
4276
4277 assert_magic_match_bin!(
4278 "0 guid x %s",
4279 b"\xEC\x95\x95\x39\x67\x86\x2D\x4E\x8F\xDB\x98\x81\x4C\xE7\x6C\x1E",
4280 "EC959539-6786-2D4E-8FDB-98814CE76C1E"
4281 );
4282 }
4283
4284 #[test]
4285 fn test_ubeqdate() {
4286 assert_magic_match_bin!(
4287 "0 ubeqdate 1633046400 It works",
4288 b"\x00\x00\x00\x00\x61\x56\x4f\x80"
4289 );
4290
4291 assert_magic_match_bin!(
4292 "0 ubeqdate x %s",
4293 b"\x00\x00\x00\x00\x61\x56\x4f\x80",
4294 "2021-10-01 00:00:00"
4295 );
4296
4297 assert_magic_not_match_bin!(
4298 "0 ubeqdate 1633046400 It should not work",
4299 b"\x00\x00\x00\x00\x00\x00\x00\x00"
4300 );
4301 }
4302
4303 #[test]
4304 fn test_ldate() {
4305 assert_magic_match_bin!("0 ldate 1640551520 It works", b"\x60\xd4\xC8\x61");
4306
4307 assert_magic_not_match_bin!("0 ldate 1633046400 It should not work", b"\x00\x00\x00\x00");
4308
4309 assert_magic_match_bin!(
4310 "0 ldate x %s",
4311 b"\x60\xd4\xC8\x61",
4312 unix_local_time_to_string(1640551520)
4313 );
4314 }
4315
4316 #[test]
4317 fn test_scalar_with_transform() {
4318 assert_magic_match_bin!("0 ubyte/10 2 {}", b"\x14", "2");
4319 assert_magic_match_bin!("0 ubyte/10 x {}", b"\x14", "2");
4320 assert_magic_match_bin!("0 ubyte%10 x {}", b"\x14", "0");
4321 }
4322
4323 #[test]
4324 fn test_float_with_transform() {
4325 assert_magic_match_bin!("0 lefloat/10 2 {}", b"\x00\x00\xa0\x41", "2");
4326 assert_magic_match_bin!("0 lefloat/10 x {}", b"\x00\x00\xa0\x41", "2");
4327 assert_magic_match_bin!("0 lefloat%10 x {}", b"\x00\x00\xa0\x41", "0");
4328 }
4329
4330 #[test]
4331 fn test_read_octal() {
4332 assert_eq!(read_octal_u64(&mut lazy_cache!("0")), Some(0));
4334 assert_eq!(read_octal_u64(&mut lazy_cache!("00")), Some(0));
4335 assert_eq!(read_octal_u64(&mut lazy_cache!("01")), Some(1));
4336 assert_eq!(read_octal_u64(&mut lazy_cache!("07")), Some(7));
4337 assert_eq!(read_octal_u64(&mut lazy_cache!("010")), Some(8));
4338 assert_eq!(read_octal_u64(&mut lazy_cache!("0123")), Some(83));
4339 assert_eq!(read_octal_u64(&mut lazy_cache!("0755")), Some(493));
4340
4341 assert_eq!(read_octal_u64(&mut lazy_cache!("0ABC")), Some(0));
4343 assert_eq!(read_octal_u64(&mut lazy_cache!("01ABC")), Some(1));
4344 assert_eq!(read_octal_u64(&mut lazy_cache!("0755ABC")), Some(493));
4345 assert_eq!(read_octal_u64(&mut lazy_cache!("0123ABC")), Some(83));
4346
4347 assert_eq!(read_octal_u64(&mut lazy_cache!("08")), Some(0)); assert_eq!(read_octal_u64(&mut lazy_cache!("01238")), Some(83)); assert_eq!(read_octal_u64(&mut lazy_cache!("123")), None);
4353 assert_eq!(read_octal_u64(&mut lazy_cache!("755")), None);
4354
4355 assert_eq!(read_octal_u64(&mut lazy_cache!("")), None);
4357
4358 assert_eq!(read_octal_u64(&mut lazy_cache!("ABC")), None);
4360 assert_eq!(read_octal_u64(&mut lazy_cache!("8ABC")), None); assert_eq!(
4364 read_octal_u64(&mut lazy_cache!("01777777777")),
4365 Some(268435455)
4366 );
4367 }
4368
4369 #[test]
4370 fn test_offset_bug_1() {
4371 assert_magic_match_bin!(
4374 r"
43751 string TEST Bread is
4376# offset computation is relative to
4377# rule start
4378>(5.b) use toasted
4379
43800 name toasted
4381>0 string twice Toasted
4382>>0 use toasted_twice
4383
43840 name toasted_twice
4385>(6.b) string x %s
4386 ",
4387 b"\x00TEST\x06twice\x00\x06",
4388 "Bread is Toasted twice"
4389 );
4390 }
4391
4392 #[test]
4398 fn test_offset_bug_2() {
4399 assert_magic_match_bin!(
4402 r"
4403-12 string TEST Bread is
4404>(4.b) use toasted
4405
44060 name toasted
4407>0 string twice Toasted
4408>>0 use toasted_twice
4409
44100 name toasted_twice
4411>(6.b) string x %
4412 ",
4413 b"\x00TEST\x06twice\x00\x06",
4414 "Bread is Toasted twice"
4415 )
4416 }
4417
4418 #[test]
4419 fn test_offset_bug_3() {
4420 assert_magic_match_bin!(
4423 r"
44241 string TEST Bread is
4425>(5.b) indirect/r x
4426
44270 string twice Toasted
4428>0 use toasted_twice
4429
44300 name toasted_twice
4431>0 string x %s
4432 ",
4433 b"\x00TEST\x06twice\x00\x08",
4434 "Bread is Toasted twice"
4435 )
4436 }
4437
4438 #[test]
4439 fn test_offset_bug_4() {
4440 assert_magic_match_bin!(
4443 r"
44441 string Bread %s
4445>(6.b) indirect/r x
4446
4447# this one uses a based offset
4448# computed at indirection
44491 string is\ Toasted %s
4450>(11.b) use toasted_twice
4451
4452# this one is using a new base
4453# offset being previous base
4454# offset + offset of use
44550 name toasted_twice
4456>0 string x %s
4457 ",
4458 b"\x00Bread\x06is Toasted\x0ctwice\x00",
4459 "Bread is Toasted twice"
4460 )
4461 }
4462
4463 #[test]
4464 fn test_offset_bug_5() {
4465 assert_magic_match_bin!(
4466 r"
44671 string TEST Bread is
4468>(5.b) indirect/r x
4469
44700 string twice Toasted
4471>0 use toasted_twice
4472
44730 name toasted_twice
4474>0 string twice
4475>>&1 byte 0x08 twice
4476 ",
4477 b"\x00TEST\x06twice\x00\x08",
4478 "Bread is Toasted twice"
4479 )
4480 }
4481
4482 #[test]
4483 fn test_message_parts() {
4484 let m = first_magic(
4485 r#"0 string/W #!/usr/bin/env\ python PYTHON"#,
4486 b"#!/usr/bin/env python",
4487 StreamKind::Text(TextEncoding::Ascii),
4488 )
4489 .unwrap();
4490
4491 assert!(m.message_parts().any(|p| p.eq_ignore_ascii_case("python")))
4492 }
4493}