1#![forbid(unsafe_code)]
2#![deny(unused_imports)]
3#![deny(missing_docs)]
4use dyf::{DynDisplay, FormatString, dformat};
144use flagset::{FlagSet, flags};
145use flate2::{Compression, read::GzDecoder, write::GzEncoder};
146use lazy_cache::LazyCache;
147use memchr::memchr;
148use pest::{Span, error::ErrorVariant};
149use regex::bytes::{self};
150use serde::{Deserialize, Serialize};
151use std::{
152 borrow::Cow,
153 cmp::max,
154 collections::{HashMap, HashSet},
155 fmt::{self, Debug, Display},
156 io::{self, Read, Seek, SeekFrom, Write},
157 ops::{Add, BitAnd, BitOr, BitXor, Div, Mul, Rem, Sub},
158 path::Path,
159};
160use tar::Archive;
161use thiserror::Error;
162use tracing::{Level, debug, enabled, trace};
163
164use crate::{
165 numeric::{Float, FloatDataType, Scalar, ScalarDataType},
166 parser::{FileMagicParser, Rule},
167 utils::{decode_id3, find_json_boundaries, run_utf8_validation},
168};
169
170mod numeric;
171mod parser;
172mod utils;
173
174const HARDCODED_MAGIC_STRENGTH: u64 = 2048;
175const HARDCODED_SOURCE: &str = "hardcoded";
176const MAX_RECURSION: usize = 50;
178const FILE_BYTES_MAX: usize = 7 * 1024 * 1024;
180const FILE_REGEX_MAX: usize = 8192;
182
183pub const DEFAULT_BIN_MIMETYPE: &str = "application/octet-stream";
185pub const DEFAULT_TEXT_MIMETYPE: &str = "text/plain";
187
188pub(crate) const TIMESTAMP_FORMAT: &str = "%Y-%m-%d %H:%M:%S";
189
190macro_rules! debug_panic {
191 ($($arg:tt)*) => {
192 if cfg!(debug_assertions) {
193 panic!($($arg)*);
194 }
195 };
196}
197
198macro_rules! read {
199 ($r: expr, $ty: ty) => {{
200 let mut a = [0u8; std::mem::size_of::<$ty>()];
201 $r.read_exact(&mut a)?;
202 a
203 }};
204}
205
206macro_rules! read_le {
207 ($r:expr, $ty: ty ) => {{ <$ty>::from_le_bytes(read!($r, $ty)) }};
208}
209
210macro_rules! read_be {
211 ($r:expr, $ty: ty ) => {{ <$ty>::from_be_bytes(read!($r, $ty)) }};
212}
213
214macro_rules! read_me {
215 ($r: expr) => {{ ((read_le!($r, u16) as i32) << 16) | (read_le!($r, u16) as i32) }};
216}
217
218#[inline(always)]
219fn read_octal_u64<R: Read + Seek>(haystack: &mut LazyCache<R>) -> Option<u64> {
220 let s = haystack
221 .read_while_or_limit(|b| matches!(b, b'0'..=b'7'), 22)
222 .map(|buf| str::from_utf8(buf))
223 .ok()?
224 .ok()?;
225
226 if !s.starts_with("0") {
227 return None;
228 }
229
230 u64::from_str_radix(s, 8).ok()
231}
232
233#[derive(Debug, Error)]
235pub enum Error {
236 #[error("{0}")]
238 Msg(String),
239
240 #[error("source={0} line={1} error={2}")]
242 Localized(String, usize, Box<Error>),
243
244 #[error("missing rule: {0}")]
246 MissingRule(String),
247
248 #[error("maximum recursion reached: {0}")]
250 MaximumRecursion(usize),
251
252 #[error("io: {0}")]
254 Io(#[from] io::Error),
255
256 #[error("parser error: {0}")]
258 Parse(#[from] Box<pest::error::Error<Rule>>),
259
260 #[error("formatting: {0}")]
262 Format(#[from] dyf::Error),
263
264 #[error("regex: {0}")]
266 Regex(#[from] regex::Error),
267
268 #[error("{0}")]
270 Serialize(#[from] bincode::error::EncodeError),
271
272 #[error("{0}")]
274 Deserialize(#[from] bincode::error::DecodeError),
275}
276
277impl Error {
278 #[inline]
279 fn parser<S: ToString>(msg: S, span: Span<'_>) -> Self {
280 Self::Parse(Box::new(pest::error::Error::new_from_span(
281 ErrorVariant::CustomError {
282 message: msg.to_string(),
283 },
284 span,
285 )))
286 }
287
288 fn msg<M: AsRef<str>>(msg: M) -> Self {
289 Self::Msg(msg.as_ref().into())
290 }
291
292 fn localized<S: AsRef<str>>(source: S, line: usize, err: Error) -> Self {
293 Self::Localized(source.as_ref().into(), line, err.into())
294 }
295
296 pub fn unwrap_localized(&self) -> &Self {
298 match self {
299 Self::Localized(_, _, e) => e,
300 _ => self,
301 }
302 }
303}
304
305#[derive(Debug, Clone, Serialize, Deserialize)]
306enum Message {
307 String(String),
308 Format {
309 printf_spec: String,
310 fs: FormatString,
311 },
312}
313
314impl Display for Message {
315 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
316 match self {
317 Self::String(s) => write!(f, "{s}"),
318 Self::Format { printf_spec: _, fs } => write!(f, "{}", fs.to_string_lossy()),
319 }
320 }
321}
322
323impl Message {
324 fn to_string_lossy(&self) -> Cow<'_, str> {
325 match self {
326 Message::String(s) => Cow::Borrowed(s),
327 Message::Format { printf_spec: _, fs } => fs.to_string_lossy(),
328 }
329 }
330
331 #[inline(always)]
332 fn format_with(&self, mr: Option<&MatchRes>) -> Result<Cow<'_, str>, Error> {
333 match self {
334 Self::String(s) => Ok(Cow::Borrowed(s.as_str())),
335 Self::Format {
336 printf_spec: c_spec,
337 fs,
338 } => {
339 if let Some(mr) = mr {
340 match mr {
341 MatchRes::Float(_, _) | MatchRes::Bytes(_, _, _, _) => {
342 Ok(Cow::Owned(dformat!(fs, mr)?))
343 }
344 MatchRes::Scalar(_, scalar) => {
345 if c_spec.as_str() == "c" {
347 match scalar {
348 Scalar::byte(b) => {
349 let b = (*b as u8) as char;
350 Ok(Cow::Owned(dformat!(fs, b)?))
351 }
352 Scalar::ubyte(b) => {
353 let b = *b as char;
354 Ok(Cow::Owned(dformat!(fs, b)?))
355 }
356 _ => Ok(Cow::Owned(dformat!(fs, mr)?)),
357 }
358 } else {
359 Ok(Cow::Owned(dformat!(fs, mr)?))
360 }
361 }
362 }
363 } else {
364 Ok(fs.to_string_lossy())
365 }
366 }
367 }
368 }
369}
370
371impl ScalarDataType {
372 #[inline(always)]
373 fn read<R: Read + Seek>(&self, from: &mut R, switch_endianness: bool) -> Result<Scalar, Error> {
374 macro_rules! _read_le {
375 ($ty: ty) => {{
376 if switch_endianness {
377 <$ty>::from_be_bytes(read!(from, $ty))
378 } else {
379 <$ty>::from_le_bytes(read!(from, $ty))
380 }
381 }};
382 }
383
384 macro_rules! _read_be {
385 ($ty: ty) => {{
386 if switch_endianness {
387 <$ty>::from_le_bytes(read!(from, $ty))
388 } else {
389 <$ty>::from_be_bytes(read!(from, $ty))
390 }
391 }};
392 }
393
394 macro_rules! _read_ne {
395 ($ty: ty) => {{
396 if cfg!(target_endian = "big") {
397 _read_be!($ty)
398 } else {
399 _read_le!($ty)
400 }
401 }};
402 }
403
404 macro_rules! _read_me {
405 () => {
406 ((_read_le!(u16) as i32) << 16) | (_read_le!(u16) as i32)
407 };
408 }
409
410 Ok(match self {
411 Self::byte => Scalar::byte(read!(from, u8)[0] as i8),
413 Self::short => Scalar::short(_read_ne!(i16)),
414 Self::long => Scalar::long(_read_ne!(i32)),
415 Self::date => Scalar::date(_read_ne!(i32)),
416 Self::ldate => Scalar::ldate(_read_ne!(i32)),
417 Self::qwdate => Scalar::qwdate(_read_ne!(i64)),
418 Self::leshort => Scalar::leshort(_read_le!(i16)),
419 Self::lelong => Scalar::lelong(_read_le!(i32)),
420 Self::lequad => Scalar::lequad(_read_le!(i64)),
421 Self::bequad => Scalar::bequad(_read_be!(i64)),
422 Self::belong => Scalar::belong(_read_be!(i32)),
423 Self::bedate => Scalar::bedate(_read_be!(i32)),
424 Self::beldate => Scalar::beldate(_read_be!(i32)),
425 Self::beqdate => Scalar::beqdate(_read_be!(i64)),
426 Self::ubyte => Scalar::ubyte(read!(from, u8)[0]),
428 Self::ushort => Scalar::ushort(_read_ne!(u16)),
429 Self::uleshort => Scalar::uleshort(_read_le!(u16)),
430 Self::ulelong => Scalar::ulelong(_read_le!(u32)),
431 Self::uledate => Scalar::uledate(_read_le!(u32)),
432 Self::ulequad => Scalar::ulequad(_read_le!(u64)),
433 Self::offset => Scalar::offset(from.stream_position()?),
434 Self::ubequad => Scalar::ubequad(_read_be!(u64)),
435 Self::medate => Scalar::medate(_read_me!()),
436 Self::meldate => Scalar::meldate(_read_me!()),
437 Self::melong => Scalar::melong(_read_me!()),
438 Self::beshort => Scalar::beshort(_read_be!(i16)),
439 Self::quad => Scalar::quad(_read_ne!(i64)),
440 Self::uquad => Scalar::uquad(_read_ne!(u64)),
441 Self::ledate => Scalar::ledate(_read_le!(i32)),
442 Self::leldate => Scalar::leldate(_read_le!(i32)),
443 Self::leqdate => Scalar::leqdate(_read_le!(i64)),
444 Self::leqldate => Scalar::leqldate(_read_le!(i64)),
445 Self::leqwdate => Scalar::leqwdate(_read_le!(i64)),
446 Self::ubelong => Scalar::ubelong(_read_be!(u32)),
447 Self::ulong => Scalar::ulong(_read_ne!(u32)),
448 Self::ubeshort => Scalar::ubeshort(_read_be!(u16)),
449 Self::ubeqdate => Scalar::ubeqdate(_read_be!(u64)),
450 Self::lemsdosdate => Scalar::lemsdosdate(_read_le!(u16)),
451 Self::lemsdostime => Scalar::lemsdostime(_read_le!(u16)),
452 Self::guid => Scalar::guid(u128::from_be_bytes(read!(from, u128))),
453 })
454 }
455}
456
457impl FloatDataType {
458 #[inline(always)]
459 fn read<R: Read + Seek>(&self, from: &mut R, switch_endianness: bool) -> Result<Float, Error> {
460 macro_rules! _read_le {
461 ($ty: ty) => {{
462 if switch_endianness {
463 <$ty>::from_be_bytes(read!(from, $ty))
464 } else {
465 <$ty>::from_le_bytes(read!(from, $ty))
466 }
467 }};
468 }
469
470 macro_rules! _read_be {
471 ($ty: ty) => {{
472 if switch_endianness {
473 <$ty>::from_le_bytes(read!(from, $ty))
474 } else {
475 <$ty>::from_be_bytes(read!(from, $ty))
476 }
477 }};
478 }
479
480 macro_rules! _read_ne {
481 ($ty: ty) => {{
482 if cfg!(target_endian = "big") {
483 _read_be!($ty)
484 } else {
485 _read_le!($ty)
486 }
487 }};
488 }
489
490 macro_rules! _read_me {
491 () => {
492 ((_read_le!(u16) as i32) << 16) | (_read_le!(u16) as i32)
493 };
494 }
495
496 Ok(match self {
497 Self::lefloat => Float::lefloat(_read_le!(f32)),
498 Self::befloat => Float::befloat(_read_le!(f32)),
499 Self::ledouble => Float::ledouble(_read_le!(f64)),
500 Self::bedouble => Float::bedouble(_read_be!(f64)),
501 })
502 }
503}
504
505#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
506enum Op {
507 Mul,
508 Add,
509 Sub,
510 Div,
511 Mod,
512 And,
513 Xor,
514 Or,
515}
516
517impl Display for Op {
518 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
519 match self {
520 Op::Mul => write!(f, "*"),
521 Op::Add => write!(f, "+"),
522 Op::Sub => write!(f, "-"),
523 Op::Div => write!(f, "/"),
524 Op::Mod => write!(f, "%"),
525 Op::And => write!(f, "&"),
526 Op::Or => write!(f, "|"),
527 Op::Xor => write!(f, "^"),
528 }
529 }
530}
531
532#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
533enum CmpOp {
534 Eq,
535 Lt,
536 Gt,
537 BitAnd,
538 Neq, Xor,
540 Not, }
542
543impl CmpOp {
544 #[inline(always)]
545 fn is_neq(&self) -> bool {
546 matches!(self, Self::Neq)
547 }
548}
549
550#[derive(Debug, Clone, Serialize, Deserialize)]
551struct ScalarTransform {
552 op: Op,
553 num: Scalar,
554}
555
556impl ScalarTransform {
557 fn apply(&self, s: Scalar) -> Option<Scalar> {
558 match self.op {
559 Op::Add => s.checked_add(self.num),
560 Op::Sub => s.checked_sub(self.num),
561 Op::Mul => s.checked_mul(self.num),
562 Op::Div => s.checked_div(self.num),
563 Op::Mod => s.checked_rem(self.num),
564 Op::And => Some(s.bitand(self.num)),
565 Op::Xor => Some(s.bitxor(self.num)),
566 Op::Or => Some(s.bitor(self.num)),
567 }
568 }
569}
570
571#[derive(Debug, Clone, Serialize, Deserialize)]
572struct FloatTransform {
573 op: Op,
574 num: Float,
575}
576
577impl FloatTransform {
578 fn apply(&self, s: Float) -> Float {
579 match self.op {
580 Op::Add => s.add(self.num),
581 Op::Sub => s.sub(self.num),
582 Op::Mul => s.mul(self.num),
583 Op::Div => s.div(self.num),
585 Op::Mod => s.rem(self.num),
587 Op::And | Op::Xor | Op::Or => {
589 debug_panic!("unsupported operation");
590 s
591 }
592 }
593 }
594}
595
596#[derive(Debug, Clone, Serialize, Deserialize)]
597enum TestValue<T> {
598 Value(T),
599 Any,
600}
601
602impl<T> TestValue<T> {
603 #[inline(always)]
604 fn as_ref(&self) -> TestValue<&T> {
605 match self {
606 Self::Value(v) => TestValue::Value(v),
607 Self::Any => TestValue::Any,
608 }
609 }
610}
611
612flags! {
613 enum ReMod: u8{
614 CaseInsensitive,
615 StartOffsetUpdate,
616 LineLimit,
617 ForceBin,
618 ForceText,
619 TrimMatch,
620 }
621}
622
623fn serialize_regex<S>(re: &bytes::Regex, serializer: S) -> Result<S::Ok, S::Error>
624where
625 S: serde::Serializer,
626{
627 re.as_str().serialize(serializer)
628}
629
630fn deserialize_regex<'de, D>(deserializer: D) -> Result<bytes::Regex, D::Error>
631where
632 D: serde::Deserializer<'de>,
633{
634 let wrapper = String::deserialize(deserializer)?;
635 bytes::Regex::new(&wrapper).map_err(serde::de::Error::custom)
636}
637
638#[derive(Debug, Clone, Serialize, Deserialize)]
639struct RegexTest {
640 #[serde(
641 serialize_with = "serialize_regex",
642 deserialize_with = "deserialize_regex"
643 )]
644 re: bytes::Regex,
645 length: Option<usize>,
646 mods: FlagSet<ReMod>,
647 str_mods: FlagSet<StringMod>,
648 non_magic_len: usize,
649 binary: bool,
650 cmp_op: CmpOp,
651}
652
653impl RegexTest {
654 #[inline(always)]
655 fn is_binary(&self) -> bool {
656 self.binary
657 || self.mods.contains(ReMod::ForceBin)
658 || self.str_mods.contains(StringMod::ForceBin)
659 }
660
661 fn match_buf<'buf>(
662 &self,
663 off_buf: u64, stream_kind: StreamKind,
665 buf: &'buf [u8],
666 ) -> Option<MatchRes<'buf>> {
667 let mr = match stream_kind {
668 StreamKind::Text(_) => {
669 let mut off_txt = off_buf;
670
671 let mut line_limit = self.length.unwrap_or(usize::MAX);
672
673 for line in buf.split(|c| c == &b'\n') {
674 if line_limit == 0 {
678 break;
679 }
680
681 if let Some(re_match) = self.re.find(line) {
682 let start_offset = off_txt + re_match.start() as u64;
684
685 let stop_offset = if re_match.end() == line.len() {
687 Some(start_offset + re_match.as_bytes().len() as u64 + 1)
688 } else {
689 None
690 };
691
692 return Some(MatchRes::Bytes(
693 start_offset,
694 stop_offset,
695 re_match.as_bytes(),
696 Encoding::Utf8,
697 ));
698 }
699
700 off_txt += line.len() as u64;
701 off_txt += 1;
703 line_limit = line_limit.saturating_sub(1)
704 }
705 None
706 }
707
708 StreamKind::Binary => {
709 self.re.find(buf).map(|re_match| {
710 MatchRes::Bytes(
711 off_buf + re_match.start() as u64,
713 None,
714 re_match.as_bytes(),
715 Encoding::Utf8,
716 )
717 })
718 }
719 };
720
721 if self.cmp_op.is_neq() && mr.is_none() {
723 return Some(MatchRes::Bytes(off_buf, None, buf, Encoding::Utf8));
724 }
725
726 mr
727 }
728}
729
730impl From<RegexTest> for Test {
731 fn from(value: RegexTest) -> Self {
732 Self::Regex(value)
733 }
734}
735
736flags! {
737 enum StringMod: u8{
738 ForceBin,
739 UpperInsensitive,
740 LowerInsensitive,
741 FullWordMatch,
742 Trim,
743 ForceText,
744 CompactWhitespace,
745 OptBlank,
746 }
747}
748
749#[derive(Debug, Clone, Serialize, Deserialize)]
750struct StringTest {
751 test_val: TestValue<Vec<u8>>,
752 cmp_op: CmpOp,
753 length: Option<usize>,
754 mods: FlagSet<StringMod>,
755 binary: bool,
756}
757
758impl From<StringTest> for Test {
759 fn from(value: StringTest) -> Self {
760 Self::String(value)
761 }
762}
763
764#[inline(always)]
765fn string_match(str: &[u8], mods: FlagSet<StringMod>, buf: &[u8]) -> (bool, usize) {
766 let mut consumed = 0;
767 if mods.is_disjoint(
769 StringMod::UpperInsensitive
770 | StringMod::LowerInsensitive
771 | StringMod::FullWordMatch
772 | StringMod::CompactWhitespace
773 | StringMod::OptBlank,
774 ) {
775 if buf.starts_with(str) {
777 (true, str.len())
778 } else {
779 (false, consumed)
780 }
781 } else {
782 let mut i_src = 0;
783 let mut iter = buf.iter().peekable();
784
785 macro_rules! consume_target {
786 () => {{
787 if iter.next().is_some() {
788 consumed += 1;
789 }
790 }};
791 }
792
793 macro_rules! continue_next_iteration {
794 () => {{
795 consume_target!();
796 i_src += 1;
797 continue;
798 }};
799 }
800
801 while let Some(&&b) = iter.peek() {
802 let Some(&ref_byte) = str.get(i_src) else {
803 break;
804 };
805
806 if mods.contains(StringMod::OptBlank) && (b == b' ' || ref_byte == b' ') {
807 if b == b' ' {
808 consume_target!();
810 }
811
812 if ref_byte == b' ' {
813 i_src += 1;
815 }
816
817 continue;
818 }
819
820 if mods.contains(StringMod::UpperInsensitive) {
821 if ref_byte.is_ascii_uppercase() && ref_byte == b.to_ascii_uppercase()
823 || ref_byte == b
824 {
825 continue_next_iteration!()
826 }
827 }
828
829 if mods.contains(StringMod::LowerInsensitive)
830 && (ref_byte.is_ascii_lowercase() && ref_byte == b.to_ascii_lowercase()
831 || ref_byte == b)
832 {
833 continue_next_iteration!()
834 }
835
836 if mods.contains(StringMod::CompactWhitespace) && ref_byte == b' ' {
837 let mut src_blk = 0;
838 while let Some(b' ') = str.get(i_src) {
839 src_blk += 1;
840 i_src += 1;
841 }
842
843 let mut tgt_blk = 0;
844 while let Some(b' ') = iter.peek() {
845 tgt_blk += 1;
846 consume_target!();
847 }
848
849 if src_blk > tgt_blk {
850 return (false, consumed);
851 }
852
853 continue;
854 }
855
856 if ref_byte == b {
857 continue_next_iteration!()
858 } else {
859 return (false, consumed);
860 }
861 }
862
863 if mods.contains(StringMod::FullWordMatch)
864 && let Some(b) = iter.peek()
865 && !b.is_ascii_whitespace()
866 {
867 return (false, consumed);
868 }
869
870 (
871 consumed > 0 && str.get(i_src).is_none() && consumed <= buf.len(),
872 consumed,
873 )
874 }
875}
876
877impl StringTest {
878 fn has_length_mod(&self) -> bool {
879 !self.mods.is_disjoint(
880 StringMod::UpperInsensitive
881 | StringMod::LowerInsensitive
882 | StringMod::FullWordMatch
883 | StringMod::CompactWhitespace
884 | StringMod::OptBlank,
885 )
886 }
887
888 #[inline(always)]
889 fn test_value_len(&self) -> usize {
890 match self.test_val.as_ref() {
891 TestValue::Value(s) => s.len(),
892 TestValue::Any => 0,
893 }
894 }
895
896 #[inline(always)]
897 fn is_binary(&self) -> bool {
898 self.binary || self.mods.contains(StringMod::ForceBin)
899 }
900
901 #[inline(always)]
902 fn is_text(&self) -> bool {
903 self.mods.contains(StringMod::ForceText)
904 }
905}
906
907#[derive(Debug, Clone, Serialize, Deserialize)]
908struct SearchTest {
909 str: Vec<u8>,
910 n_pos: Option<usize>,
911 str_mods: FlagSet<StringMod>,
912 re_mods: FlagSet<ReMod>,
913 binary: bool,
914 cmp_op: CmpOp,
915}
916
917impl From<SearchTest> for Test {
918 fn from(value: SearchTest) -> Self {
919 Self::Search(value)
920 }
921}
922
923impl SearchTest {
924 #[inline(always)]
925 fn is_binary(&self) -> bool {
926 (self.binary
927 || self.str_mods.contains(StringMod::ForceBin)
928 || self.re_mods.contains(ReMod::ForceBin))
929 && !(self.str_mods.contains(StringMod::ForceText)
930 || self.re_mods.contains(ReMod::ForceText))
931 }
932
933 #[inline]
935 fn match_buf<'buf>(&self, off_buf: u64, buf: &'buf [u8]) -> Option<MatchRes<'buf>> {
936 let mut i = 0;
937
938 let needle = self.str.first()?;
939
940 while i < buf.len() {
941 i += memchr(*needle, &buf[i..])?;
944
945 if self.str_mods.contains(StringMod::FullWordMatch) {
947 let prev_is_whitespace = buf
948 .get(i.saturating_sub(1))
949 .map(|c| c.is_ascii_whitespace())
950 .unwrap_or_default();
951
952 if i > 0 && !prev_is_whitespace {
957 i += 1;
958 continue;
959 }
960 }
961
962 if let Some(npos) = self.n_pos
963 && i > npos
964 {
965 break;
966 }
967
968 let pos = i;
969 let (ok, consumed) = string_match(&self.str, self.str_mods, &buf[i..]);
970
971 if ok {
972 return Some(MatchRes::Bytes(
973 off_buf.saturating_add(pos as u64),
974 None,
975 &buf[i..i + consumed],
976 Encoding::Utf8,
977 ));
978 } else {
979 i += max(consumed, 1)
980 }
981 }
982
983 if self.cmp_op.is_neq() {
985 return Some(MatchRes::Bytes(off_buf, None, buf, Encoding::Utf8));
986 }
987
988 None
989 }
990}
991
992#[derive(Debug, Clone, Serialize, Deserialize)]
993struct ScalarTest {
994 ty: ScalarDataType,
995 transform: Option<ScalarTransform>,
996 cmp_op: CmpOp,
997 test_val: TestValue<Scalar>,
998}
999
1000#[derive(Debug, Clone, Serialize, Deserialize)]
1001struct FloatTest {
1002 ty: FloatDataType,
1003 transform: Option<FloatTransform>,
1004 cmp_op: CmpOp,
1005 test_val: TestValue<Float>,
1006}
1007
1008#[derive(Debug, PartialEq)]
1011enum ReadValue<'buf> {
1012 Float(u64, Float),
1013 Scalar(u64, Scalar),
1014 Bytes(u64, &'buf [u8]),
1015}
1016
1017impl DynDisplay for ReadValue<'_> {
1018 fn dyn_fmt(&self, f: &dyf::FormatSpec) -> Result<String, dyf::Error> {
1019 match self {
1020 Self::Float(_, s) => DynDisplay::dyn_fmt(s, f),
1021 Self::Scalar(_, s) => DynDisplay::dyn_fmt(s, f),
1022 Self::Bytes(_, b) => Ok(format!("{b:?}")),
1023 }
1024 }
1025}
1026
1027impl DynDisplay for &ReadValue<'_> {
1028 fn dyn_fmt(&self, f: &dyf::FormatSpec) -> Result<String, dyf::Error> {
1029 DynDisplay::dyn_fmt(*self, f)
1031 }
1032}
1033
1034impl Display for ReadValue<'_> {
1035 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1036 match self {
1037 Self::Float(_, v) => write!(f, "{v}"),
1038 Self::Scalar(_, s) => write!(f, "{s}"),
1039 Self::Bytes(_, b) => write!(f, "{b:?}"),
1040 }
1041 }
1042}
1043
1044enum Encoding {
1045 Utf16(String16Encoding),
1046 Utf8,
1047}
1048
1049enum MatchRes<'buf> {
1052 Bytes(u64, Option<u64>, &'buf [u8], Encoding),
1057 Scalar(u64, Scalar),
1058 Float(u64, Float),
1059}
1060
1061impl DynDisplay for &MatchRes<'_> {
1062 fn dyn_fmt(&self, f: &dyf::FormatSpec) -> Result<String, dyf::Error> {
1063 (*self).dyn_fmt(f)
1064 }
1065}
1066
1067impl DynDisplay for MatchRes<'_> {
1068 fn dyn_fmt(&self, f: &dyf::FormatSpec) -> Result<String, dyf::Error> {
1069 match self {
1070 Self::Scalar(_, v) => v.dyn_fmt(f),
1071 Self::Float(_, v) => v.dyn_fmt(f),
1072 Self::Bytes(_, _, v, enc) => match enc {
1073 Encoding::Utf8 => String::from_utf8_lossy(v).to_string().dyn_fmt(f),
1074 Encoding::Utf16(enc) => {
1075 let utf16: Vec<u16> = slice_to_utf16_iter(v, *enc).collect();
1076 String::from_utf16_lossy(&utf16).dyn_fmt(f)
1077 }
1078 },
1079 }
1080 }
1081}
1082
1083impl MatchRes<'_> {
1084 #[inline]
1086 fn start_offset(&self) -> u64 {
1087 match self {
1088 MatchRes::Bytes(o, _, _, _) => *o,
1089 MatchRes::Scalar(o, _) => *o,
1090 MatchRes::Float(o, _) => *o,
1091 }
1092 }
1093
1094 #[inline]
1096 fn end_offset(&self) -> u64 {
1097 match self {
1098 MatchRes::Bytes(start, end, buf, _) => match end {
1099 Some(end) => *end,
1100 None => start.saturating_add(buf.len() as u64),
1101 },
1102 MatchRes::Scalar(o, sc) => o.add(sc.size_of() as u64),
1103 MatchRes::Float(o, f) => o.add(f.size_of() as u64),
1104 }
1105 }
1106}
1107
1108fn slice_to_utf16_iter(read: &[u8], encoding: String16Encoding) -> impl Iterator<Item = u16> {
1109 let even = read
1110 .iter()
1111 .enumerate()
1112 .filter(|(i, _)| i % 2 == 0)
1113 .map(|t| t.1);
1114
1115 let odd = read
1116 .iter()
1117 .enumerate()
1118 .filter(|(i, _)| i % 2 != 0)
1119 .map(|t| t.1);
1120
1121 even.zip(odd).map(move |(e, o)| match encoding {
1122 String16Encoding::Le => u16::from_le_bytes([*e, *o]),
1123 String16Encoding::Be => u16::from_be_bytes([*e, *o]),
1124 })
1125}
1126
1127#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
1128enum String16Encoding {
1129 Le,
1130 Be,
1131}
1132
1133#[derive(Debug, Clone, Serialize, Deserialize)]
1134struct String16Test {
1135 orig: String,
1136 test_val: TestValue<Vec<u16>>,
1137 encoding: String16Encoding,
1138}
1139
1140impl String16Test {
1141 #[inline(always)]
1145 fn test_value_len(&self) -> usize {
1146 match self.test_val.as_ref() {
1147 TestValue::Value(str16) => str16.len(),
1148 TestValue::Any => 0,
1149 }
1150 }
1151}
1152
1153flags! {
1154 enum IndirectMod: u8{
1155 Relative,
1156 }
1157}
1158
1159type IndirectMods = FlagSet<IndirectMod>;
1160
1161#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
1162enum PStringLen {
1163 Byte, ShortBe, ShortLe, LongBe, LongLe, }
1169
1170impl PStringLen {
1171 #[inline(always)]
1172 const fn size_of_len(&self) -> usize {
1173 match self {
1174 PStringLen::Byte => 1,
1175 PStringLen::ShortBe => 2,
1176 PStringLen::ShortLe => 2,
1177 PStringLen::LongBe => 4,
1178 PStringLen::LongLe => 4,
1179 }
1180 }
1181}
1182
1183#[derive(Debug, Clone, Serialize, Deserialize)]
1184struct PStringTest {
1185 len: PStringLen,
1186 test_val: TestValue<Vec<u8>>,
1187 include_len: bool,
1188}
1189
1190impl PStringTest {
1191 #[inline]
1192 fn read<'cache, R: Read + Seek>(
1193 &self,
1194 haystack: &'cache mut LazyCache<R>,
1195 ) -> Result<Option<&'cache [u8]>, Error> {
1196 let mut len = match self.len {
1197 PStringLen::Byte => read_le!(haystack, u8) as u32,
1198 PStringLen::ShortBe => read_be!(haystack, u16) as u32,
1199 PStringLen::ShortLe => read_le!(haystack, u16) as u32,
1200 PStringLen::LongBe => read_be!(haystack, u32),
1201 PStringLen::LongLe => read_le!(haystack, u32),
1202 } as usize;
1203
1204 if self.include_len {
1205 len = len.saturating_sub(self.len.size_of_len())
1206 }
1207
1208 if let TestValue::Value(s) = self.test_val.as_ref()
1209 && len != s.len()
1210 {
1211 return Ok(None);
1212 }
1213
1214 let read = haystack.read_exact_count(len as u64)?;
1215
1216 Ok(Some(read))
1217 }
1218
1219 #[inline(always)]
1220 fn test_value_len(&self) -> usize {
1221 match self.test_val.as_ref() {
1222 TestValue::Value(s) => s.len(),
1223 TestValue::Any => 0,
1224 }
1225 }
1226}
1227
1228#[derive(Debug, Clone, Serialize, Deserialize)]
1229enum Test {
1230 Name(String),
1231 Use(bool, String),
1232 Scalar(ScalarTest),
1233 Float(FloatTest),
1234 String(StringTest),
1235 Search(SearchTest),
1236 PString(PStringTest),
1237 Regex(RegexTest),
1238 Indirect(FlagSet<IndirectMod>),
1239 String16(String16Test),
1240 #[allow(dead_code)]
1242 Der,
1243 Clear,
1244 Default,
1245}
1246
1247impl Test {
1248 #[inline]
1250 fn read_test_value<'haystack, R: Read + Seek>(
1251 &self,
1252 haystack: &'haystack mut LazyCache<R>,
1253 switch_endianness: bool,
1254 ) -> Result<Option<ReadValue<'haystack>>, Error> {
1255 let test_value_offset = haystack.lazy_stream_position();
1256
1257 match self {
1258 Self::Scalar(t) => {
1259 t.ty.read(haystack, switch_endianness)
1260 .map(|s| Some(ReadValue::Scalar(test_value_offset, s)))
1261 }
1262
1263 Self::Float(t) => {
1264 t.ty.read(haystack, switch_endianness)
1265 .map(|f| Some(ReadValue::Float(test_value_offset, f)))
1266 }
1267 Self::String(t) => {
1268 match t.test_val.as_ref() {
1269 TestValue::Value(str) => {
1270 let buf = if let Some(length) = t.length {
1271 haystack.read_exact_count(length as u64)?
1273 } else {
1274 match t.cmp_op {
1277 CmpOp::Eq | CmpOp::Neq => {
1278 if !t.has_length_mod() {
1279 haystack.read_exact_count(str.len() as u64)?
1280 } else {
1281 haystack.read_count(FILE_BYTES_MAX as u64)?
1282 }
1283 }
1284 CmpOp::Lt | CmpOp::Gt => {
1285 let read =
1286 haystack.read_until_any_delim_or_limit(b"\n\0", 8092)?;
1287
1288 if read.ends_with(b"\0") || read.ends_with(b"\n") {
1289 &read[..read.len() - 1]
1290 } else {
1291 read
1292 }
1293 }
1294 _ => {
1295 return Err(Error::Msg(format!(
1296 "string test does not support {:?} operator",
1297 t.cmp_op
1298 )));
1299 }
1300 }
1301 };
1302
1303 Ok(Some(ReadValue::Bytes(test_value_offset, buf)))
1304 }
1305 TestValue::Any => {
1306 let read = haystack.read_until_any_delim_or_limit(b"\0\n", 8192)?;
1307 let bytes = if read.ends_with(b"\0") || read.ends_with(b"\n") {
1309 &read[..read.len() - 1]
1310 } else {
1311 read
1312 };
1313
1314 Ok(Some(ReadValue::Bytes(test_value_offset, bytes)))
1315 }
1316 }
1317 }
1318
1319 Self::String16(t) => {
1320 match t.test_val.as_ref() {
1321 TestValue::Value(str16) => {
1322 let read = haystack.read_exact_count((str16.len() * 2) as u64)?;
1323
1324 Ok(Some(ReadValue::Bytes(test_value_offset, read)))
1325 }
1326 TestValue::Any => {
1327 let read = haystack.read_until_utf16_or_limit(b"\x00\x00", 8192)?;
1328
1329 let end = if read.len() % 2 == 0 {
1331 read.len()
1332 } else {
1333 read.len().saturating_sub(1)
1336 };
1337
1338 Ok(Some(ReadValue::Bytes(test_value_offset, &read[..end])))
1339 }
1340 }
1341 }
1342
1343 Self::PString(t) => {
1344 let Some(read) = t.read(haystack)? else {
1345 return Ok(None);
1346 };
1347 Ok(Some(ReadValue::Bytes(test_value_offset, read)))
1348 }
1349
1350 Self::Search(_) => {
1351 let buf = haystack.read_count(FILE_BYTES_MAX as u64)?;
1352 Ok(Some(ReadValue::Bytes(test_value_offset, buf)))
1353 }
1354
1355 Self::Regex(r) => {
1356 let length = {
1357 match r.length {
1358 Some(len) => {
1359 if r.mods.contains(ReMod::LineLimit) {
1360 len * 80
1361 } else {
1362 len
1363 }
1364 }
1365
1366 None => FILE_REGEX_MAX,
1367 }
1368 };
1369
1370 let read = haystack.read_count(length as u64)?;
1371 Ok(Some(ReadValue::Bytes(test_value_offset, read)))
1372 }
1373
1374 Self::Name(_)
1375 | Self::Use(_, _)
1376 | Self::Indirect(_)
1377 | Self::Clear
1378 | Self::Default
1379 | Self::Der => Err(Error::msg("no value to read for this test")),
1380 }
1381 }
1382
1383 #[inline(always)]
1384 fn match_value<'s>(
1385 &'s self,
1386 tv: &ReadValue<'s>,
1387 stream_kind: StreamKind,
1388 ) -> Option<MatchRes<'s>> {
1389 match (self, tv) {
1390 (Self::Scalar(t), ReadValue::Scalar(o, ts)) => {
1391 let read_value: Scalar = match t.transform.as_ref() {
1392 Some(t) => t.apply(*ts)?,
1393 None => *ts,
1394 };
1395
1396 match t.test_val {
1397 TestValue::Value(test_value) => {
1398 let ok = match t.cmp_op {
1399 CmpOp::Not => read_value == !test_value,
1402 CmpOp::Eq => read_value == test_value,
1403 CmpOp::Lt => read_value < test_value,
1404 CmpOp::Gt => read_value > test_value,
1405 CmpOp::Neq => read_value != test_value,
1406 CmpOp::BitAnd => read_value & test_value == test_value,
1407 CmpOp::Xor => (read_value & test_value).is_zero(),
1408 };
1409
1410 if ok {
1411 Some(MatchRes::Scalar(*o, read_value))
1412 } else {
1413 None
1414 }
1415 }
1416
1417 TestValue::Any => Some(MatchRes::Scalar(*o, read_value)),
1418 }
1419 }
1420
1421 (Self::Float(t), ReadValue::Float(o, f)) => {
1422 let read_value: Float = t.transform.as_ref().map(|t| t.apply(*f)).unwrap_or(*f);
1423
1424 match t.test_val {
1425 TestValue::Value(tf) => {
1426 let ok = match t.cmp_op {
1427 CmpOp::Eq => read_value == tf,
1428 CmpOp::Lt => read_value < tf,
1429 CmpOp::Gt => read_value > tf,
1430 CmpOp::Neq => read_value != tf,
1431 _ => {
1432 debug_panic!("unsupported float comparison");
1435 debug!("unsupported float comparison");
1436 false
1437 }
1438 };
1439
1440 if ok {
1441 Some(MatchRes::Float(*o, read_value))
1442 } else {
1443 None
1444 }
1445 }
1446 TestValue::Any => Some(MatchRes::Float(*o, read_value)),
1447 }
1448 }
1449
1450 (Self::String(st), ReadValue::Bytes(o, buf)) => {
1451 macro_rules! trim_buf {
1452 ($buf: expr) => {{
1453 if st.mods.contains(StringMod::Trim) {
1454 $buf.trim_ascii()
1455 } else {
1456 $buf
1457 }
1458 }};
1459 }
1460
1461 match st.test_val.as_ref() {
1462 TestValue::Value(str) => {
1463 match st.cmp_op {
1464 CmpOp::Eq => {
1465 if let (true, _) = string_match(str, st.mods, buf) {
1466 Some(MatchRes::Bytes(*o, None, trim_buf!(str), Encoding::Utf8))
1467 } else {
1468 None
1469 }
1470 }
1471 CmpOp::Neq => {
1472 if let (false, _) = string_match(str, st.mods, buf) {
1473 Some(MatchRes::Bytes(*o, None, trim_buf!(str), Encoding::Utf8))
1474 } else {
1475 None
1476 }
1477 }
1478 CmpOp::Gt => {
1479 if buf.len() > str.len() {
1480 Some(MatchRes::Bytes(*o, None, trim_buf!(buf), Encoding::Utf8))
1481 } else {
1482 None
1483 }
1484 }
1485 CmpOp::Lt => {
1486 if buf.len() < str.len() {
1487 Some(MatchRes::Bytes(*o, None, trim_buf!(buf), Encoding::Utf8))
1488 } else {
1489 None
1490 }
1491 }
1492
1493 _ => {
1495 debug_panic!("unsupported string comparison");
1498 debug!("unsupported string comparison");
1499 None
1500 }
1501 }
1502 }
1503 TestValue::Any => {
1504 Some(MatchRes::Bytes(*o, None, trim_buf!(buf), Encoding::Utf8))
1505 }
1506 }
1507 }
1508
1509 (Self::PString(m), ReadValue::Bytes(o, buf)) => match m.test_val.as_ref() {
1510 TestValue::Value(psv) => {
1511 if buf == psv {
1512 Some(MatchRes::Bytes(*o, None, buf, Encoding::Utf8))
1513 } else {
1514 None
1515 }
1516 }
1517 TestValue::Any => Some(MatchRes::Bytes(*o, None, buf, Encoding::Utf8)),
1518 },
1519
1520 (Self::String16(t), ReadValue::Bytes(o, buf)) => {
1521 match t.test_val.as_ref() {
1522 TestValue::Value(str16) => {
1523 if str16.len() * 2 != buf.len() {
1525 return None;
1526 }
1527
1528 for (i, utf16_char) in slice_to_utf16_iter(buf, t.encoding).enumerate() {
1530 if str16[i] != utf16_char {
1531 return None;
1532 }
1533 }
1534
1535 Some(MatchRes::Bytes(
1536 *o,
1537 None,
1538 t.orig.as_bytes(),
1539 Encoding::Utf16(t.encoding),
1540 ))
1541 }
1542
1543 TestValue::Any => {
1544 Some(MatchRes::Bytes(*o, None, buf, Encoding::Utf16(t.encoding)))
1545 }
1546 }
1547 }
1548
1549 (Self::Regex(r), ReadValue::Bytes(o, buf)) => r.match_buf(*o, stream_kind, buf),
1550
1551 (Self::Search(t), ReadValue::Bytes(o, buf)) => t.match_buf(*o, buf),
1552
1553 _ => None,
1554 }
1555 }
1556
1557 #[inline(always)]
1558 fn strength(&self) -> u64 {
1559 const MULT: usize = 10;
1560
1561 let mut out = 2 * MULT;
1562
1563 match self {
1565 Test::Scalar(s) => {
1566 out += s.ty.type_size() * MULT;
1567 }
1568
1569 Test::Float(t) => {
1570 out += t.ty.type_size() * MULT;
1571 }
1572
1573 Test::String(t) => out += t.test_value_len().saturating_mul(MULT),
1574
1575 Test::PString(t) => out += t.test_value_len().saturating_mul(MULT),
1576
1577 Test::Search(s) => {
1578 let n_pos = s.n_pos.unwrap_or(FILE_BYTES_MAX);
1583
1584 match n_pos {
1585 0..=80 => out += s.str.len().saturating_mul(MULT),
1587 81..=240 => out += s.str.len() * s.str.len().clamp(0, MULT - 2),
1589 _ => out += s.str.len(),
1591 }
1592 }
1593
1594 Test::Regex(r) => {
1595 let v = r.non_magic_len / r.re.captures_len();
1604
1605 let len = r
1606 .length
1607 .map(|l| {
1608 if r.mods.contains(ReMod::LineLimit) {
1609 l * 80
1610 } else {
1611 l
1612 }
1613 })
1614 .unwrap_or(FILE_BYTES_MAX);
1615
1616 match len {
1617 0..=80 => out += v.saturating_mul(MULT),
1619 81..=240 => out += v * v.clamp(0, MULT - 2),
1621 _ => out += v,
1623 }
1624 }
1625
1626 Test::String16(t) => {
1627 out += t.test_value_len().saturating_mul(MULT);
1632 }
1633
1634 Test::Der => out += MULT,
1635
1636 Test::Default | Test::Name(_) | Test::Use(_, _) | Test::Indirect(_) | Test::Clear => {
1637 return 0;
1638 }
1639 }
1640
1641 if self.is_match_any() {
1643 return 0;
1644 }
1645
1646 if let Some(op) = self.cmp_op() {
1647 match op {
1648 CmpOp::Neq => out = 0,
1650 CmpOp::Eq | CmpOp::Not => out += MULT,
1651 CmpOp::Lt | CmpOp::Gt => out -= 2 * MULT,
1652 CmpOp::Xor | CmpOp::BitAnd => out -= MULT,
1653 }
1654 }
1655
1656 out as u64
1657 }
1658
1659 #[inline(always)]
1660 fn cmp_op(&self) -> Option<CmpOp> {
1661 match self {
1662 Self::String(t) => Some(t.cmp_op),
1663 Self::Scalar(s) => Some(s.cmp_op),
1664 Self::Float(t) => Some(t.cmp_op),
1665 Self::Name(_)
1666 | Self::Use(_, _)
1667 | Self::Search(_)
1668 | Self::PString(_)
1669 | Self::Regex(_)
1670 | Self::Clear
1671 | Self::Default
1672 | Self::Indirect(_)
1673 | Self::String16(_)
1674 | Self::Der => None,
1675 }
1676 }
1677
1678 #[inline(always)]
1679 fn is_match_any(&self) -> bool {
1680 match self {
1681 Test::Name(_) => false,
1682 Test::Use(_, _) => false,
1683 Test::Scalar(scalar_test) => matches!(scalar_test.test_val, TestValue::Any),
1684 Test::Float(float_test) => matches!(float_test.test_val, TestValue::Any),
1685 Test::String(string_test) => matches!(string_test.test_val, TestValue::Any),
1686 Test::Search(_) => false,
1687 Test::PString(pstring_test) => matches!(pstring_test.test_val, TestValue::Any),
1688 Test::Regex(_) => false,
1689 Test::Indirect(_) => false,
1690 Test::String16(string16_test) => matches!(string16_test.test_val, TestValue::Any),
1691 Test::Der => false,
1692 Test::Clear => false,
1693 Test::Default => false,
1694 }
1695 }
1696
1697 #[inline(always)]
1698 fn is_binary(&self) -> bool {
1699 match self {
1700 Self::Name(_) => true,
1701 Self::Use(_, _) => true,
1702 Self::Scalar(_) => true,
1703 Self::Float(_) => true,
1704 Self::String(t) => !t.is_binary() & !t.is_text() || t.is_binary(),
1705 Self::Search(t) => t.is_binary(),
1706 Self::PString(_) => true,
1707 Self::Regex(t) => t.is_binary(),
1708 Self::Clear => true,
1709 Self::Default => true,
1710 Self::Indirect(_) => true,
1711 Self::String16(_) => true,
1712 Self::Der => true,
1713 }
1714 }
1715
1716 #[inline(always)]
1717 fn is_text(&self) -> bool {
1718 match self {
1719 Self::Name(_) => true,
1720 Self::Use(_, _) => true,
1721 Self::Indirect(_) => true,
1722 Self::Clear => true,
1723 Self::Default => true,
1724 Self::String(t) => !t.is_binary() & !t.is_text() || t.is_text(),
1725 _ => !self.is_binary(),
1726 }
1727 }
1728
1729 #[inline(always)]
1730 fn is_only_text(&self) -> bool {
1731 self.is_text() && !self.is_binary()
1732 }
1733
1734 #[inline(always)]
1735 fn is_only_binary(&self) -> bool {
1736 self.is_binary() && !self.is_text()
1737 }
1738}
1739
1740#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
1741enum OffsetType {
1742 Byte,
1743 DoubleLe,
1744 DoubleBe,
1745 ShortLe,
1746 ShortBe,
1747 Id3Le,
1748 Id3Be,
1749 LongLe,
1750 LongBe,
1751 Middle,
1752 Octal,
1753 QuadBe,
1754 QuadLe,
1755}
1756
1757#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
1758enum Shift {
1759 Direct(u64),
1760 Indirect(i64),
1761}
1762
1763#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
1764struct IndOffset {
1765 off_addr: DirOffset,
1767 signed: bool,
1769 ty: OffsetType,
1771 op: Option<Op>,
1772 shift: Option<Shift>,
1773}
1774
1775impl IndOffset {
1776 fn read_offset<R: Read + Seek>(
1778 &self,
1779 haystack: &mut LazyCache<R>,
1780 rule_base_offset: Option<u64>,
1781 last_upper_match_offset: Option<u64>,
1782 ) -> Result<Option<u64>, io::Error> {
1783 let offset_address = match self.off_addr {
1784 DirOffset::Start(s) => {
1785 let Some(o) = s.checked_add(rule_base_offset.unwrap_or_default()) else {
1786 return Ok(None);
1787 };
1788
1789 haystack.seek(SeekFrom::Start(o))?
1790 }
1791 DirOffset::LastUpper(c) => haystack.seek(SeekFrom::Start(
1792 (last_upper_match_offset.unwrap_or_default() as i64 + c) as u64,
1793 ))?,
1794 DirOffset::End(e) => haystack.seek(SeekFrom::End(e))?,
1795 };
1796
1797 macro_rules! read_value {
1798 () => {
1799 match self.ty {
1800 OffsetType::Byte => {
1801 if self.signed {
1802 read_le!(haystack, u8) as u64
1803 } else {
1804 read_le!(haystack, i8) as u64
1805 }
1806 }
1807 OffsetType::DoubleLe => read_le!(haystack, f64) as u64,
1808 OffsetType::DoubleBe => read_be!(haystack, f64) as u64,
1809 OffsetType::ShortLe => {
1810 if self.signed {
1811 read_le!(haystack, i16) as u64
1812 } else {
1813 read_le!(haystack, u16) as u64
1814 }
1815 }
1816 OffsetType::ShortBe => {
1817 if self.signed {
1818 read_be!(haystack, i16) as u64
1819 } else {
1820 read_be!(haystack, u16) as u64
1821 }
1822 }
1823 OffsetType::Id3Le => decode_id3(read_le!(haystack, u32)) as u64,
1824 OffsetType::Id3Be => decode_id3(read_be!(haystack, u32)) as u64,
1825 OffsetType::LongLe => {
1826 if self.signed {
1827 read_le!(haystack, i32) as u64
1828 } else {
1829 read_le!(haystack, u32) as u64
1830 }
1831 }
1832 OffsetType::LongBe => {
1833 if self.signed {
1834 read_be!(haystack, i32) as u64
1835 } else {
1836 read_be!(haystack, u32) as u64
1837 }
1838 }
1839 OffsetType::Middle => read_me!(haystack) as u64,
1840 OffsetType::Octal => {
1841 if let Some(o) = read_octal_u64(haystack) {
1842 o
1843 } else {
1844 debug!("failed to read octal offset @ {offset_address}");
1845 return Ok(None);
1846 }
1847 }
1848 OffsetType::QuadLe => {
1849 if self.signed {
1850 read_le!(haystack, i64) as u64
1851 } else {
1852 read_le!(haystack, u64)
1853 }
1854 }
1855 OffsetType::QuadBe => {
1856 if self.signed {
1857 read_be!(haystack, i64) as u64
1858 } else {
1859 read_be!(haystack, u64)
1860 }
1861 }
1862 }
1863 };
1864 }
1865
1866 let o = read_value!();
1868
1869 trace!(
1870 "offset read @ {offset_address} value={o} op={:?} shift={:?}",
1871 self.op, self.shift
1872 );
1873
1874 if let (Some(op), Some(shift)) = (self.op, self.shift) {
1876 let shift = match shift {
1877 Shift::Direct(i) => i,
1878 Shift::Indirect(i) => {
1879 let tmp = offset_address as i128 + i as i128;
1880 if tmp.is_negative() {
1881 return Ok(None);
1882 } else {
1883 haystack.seek(SeekFrom::Start(tmp as u64))?;
1884 };
1885 read_value!()
1888 }
1889 };
1890
1891 match op {
1892 Op::Add => return Ok(o.checked_add(shift)),
1893 Op::Mul => return Ok(o.checked_mul(shift)),
1894 Op::Sub => return Ok(o.checked_sub(shift)),
1895 Op::Div => return Ok(o.checked_div(shift)),
1896 Op::Mod => return Ok(o.checked_rem(shift)),
1897 Op::And => return Ok(Some(o & shift)),
1898 Op::Or => return Ok(Some(o | shift)),
1899 Op::Xor => return Ok(Some(o ^ shift)),
1900 }
1901 }
1902
1903 Ok(Some(o))
1904 }
1905}
1906
1907#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
1908enum DirOffset {
1909 Start(u64),
1910 LastUpper(i64),
1912 End(i64),
1913}
1914
1915#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
1916enum Offset {
1917 Direct(DirOffset),
1918 Indirect(IndOffset),
1919}
1920
1921impl From<DirOffset> for Offset {
1922 fn from(value: DirOffset) -> Self {
1923 Self::Direct(value)
1924 }
1925}
1926
1927impl From<IndOffset> for Offset {
1928 fn from(value: IndOffset) -> Self {
1929 Self::Indirect(value)
1930 }
1931}
1932
1933impl Display for DirOffset {
1934 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1935 match self {
1936 DirOffset::Start(i) => write!(f, "{i}"),
1937 DirOffset::LastUpper(c) => write!(f, "&{c}"),
1938 DirOffset::End(e) => write!(f, "-{e}"),
1939 }
1940 }
1941}
1942
1943impl Default for DirOffset {
1944 fn default() -> Self {
1945 Self::LastUpper(0)
1946 }
1947}
1948
1949#[derive(Debug, Clone, Serialize, Deserialize)]
1950struct Match {
1951 line: usize,
1952 depth: u8,
1953 offset: Offset,
1954 test: Test,
1955 test_strength: u64,
1956 message: Option<Message>,
1957}
1958
1959impl From<Use> for Match {
1960 fn from(value: Use) -> Self {
1961 let test = Test::Use(value.switch_endianness, value.rule_name);
1962 let test_strength = test.strength();
1963 Self {
1964 line: value.line,
1965 depth: value.depth,
1966 offset: value.start_offset,
1967 test,
1968 test_strength,
1969 message: value.message,
1970 }
1971 }
1972}
1973
1974impl From<Name> for Match {
1975 fn from(value: Name) -> Self {
1976 let test = Test::Name(value.name);
1977 let test_strength = test.strength();
1978 Self {
1979 line: value.line,
1980 depth: 0,
1981 offset: Offset::Direct(DirOffset::Start(0)),
1982 test,
1983 test_strength,
1984 message: value.message,
1985 }
1986 }
1987}
1988
1989impl Match {
1990 #[inline(always)]
1992 fn offset_from_start<R: Read + Seek>(
1993 &self,
1994 haystack: &mut LazyCache<R>,
1995 rule_base_offset: Option<u64>,
1996 last_level_offset: Option<u64>,
1997 ) -> Result<Option<u64>, io::Error> {
1998 match self.offset {
1999 Offset::Direct(dir_offset) => match dir_offset {
2000 DirOffset::Start(s) => Ok(Some(s)),
2001 DirOffset::LastUpper(shift) => {
2002 let o = last_level_offset.unwrap_or_default() as i64 + shift;
2003
2004 if o >= 0 { Ok(Some(o as u64)) } else { Ok(None) }
2005 }
2006 DirOffset::End(e) => Ok(Some(haystack.offset_from_start(SeekFrom::End(e)))),
2007 },
2008 Offset::Indirect(ind_offset) => {
2009 let Some(o) =
2010 ind_offset.read_offset(haystack, rule_base_offset, last_level_offset)?
2011 else {
2012 return Ok(None);
2013 };
2014
2015 Ok(Some(o))
2016 }
2017 }
2018 }
2019
2020 #[inline]
2033 #[allow(clippy::too_many_arguments)]
2034 fn matches<'a: 'h, 'h, R: Read + Seek>(
2035 &'a self,
2036 source: Option<&str>,
2037 magic: &mut Magic<'a>,
2038 stream_kind: StreamKind,
2039 state: &mut MatchState,
2040 buf_base_offset: Option<u64>,
2041 rule_base_offset: Option<u64>,
2042 last_level_offset: Option<u64>,
2043 haystack: &'h mut LazyCache<R>,
2044 switch_endianness: bool,
2045 db: &'a MagicDb,
2046 depth: usize,
2047 ) -> Result<(bool, Option<MatchRes<'h>>), Error> {
2048 let source = source.unwrap_or("unknown");
2049 let line = self.line;
2050
2051 if depth >= MAX_RECURSION {
2052 return Err(Error::localized(
2053 source,
2054 line,
2055 Error::MaximumRecursion(MAX_RECURSION),
2056 ));
2057 }
2058
2059 if self.test.is_only_binary() && stream_kind.is_text() {
2060 trace!("skip binary test source={source} line={line} stream_kind={stream_kind:?}",);
2061 return Ok((false, None));
2062 }
2063
2064 if self.test.is_only_text() && !stream_kind.is_text() {
2065 trace!("skip text test source={source} line={line} stream_kind={stream_kind:?}",);
2066 return Ok((false, None));
2067 }
2068
2069 let Ok(Some(mut offset)) = self
2070 .offset_from_start(haystack, rule_base_offset, last_level_offset)
2071 .inspect_err(|e| debug!("source={source} line={line} failed at computing offset: {e}"))
2072 else {
2073 return Ok((false, None));
2074 };
2075
2076 offset = match self.offset {
2077 Offset::Indirect(_) => {
2078 buf_base_offset.unwrap_or_default().saturating_add(offset)
2083 }
2084 Offset::Direct(DirOffset::Start(_)) => {
2086 rule_base_offset.unwrap_or_default().saturating_add(offset)
2087 }
2088 _ => offset,
2089 };
2090
2091 match &self.test {
2092 Test::Clear => {
2093 trace!("source={source} line={line} clear");
2094 state.clear_continuation_level(&self.continuation_level());
2095 Ok((true, None))
2096 }
2097
2098 Test::Name(name) => {
2099 trace!(
2100 "source={source} line={line} running rule {name} switch_endianness={switch_endianness}",
2101 );
2102 Ok((true, None))
2103 }
2104
2105 Test::Use(flip_endianness, rule_name) => {
2106 trace!(
2107 "source={source} line={line} use {rule_name} switch_endianness={flip_endianness}",
2108 );
2109
2110 let switch_endianness = switch_endianness ^ flip_endianness;
2112
2113 let dr: &DependencyRule = db.dependencies.get(rule_name).ok_or(
2114 Error::localized(source, line, Error::MissingRule(rule_name.clone())),
2115 )?;
2116
2117 if let Some(msg) = self.message.as_ref() {
2119 magic.push_message(msg.to_string_lossy());
2120 }
2121
2122 dr.rule.magic(
2123 magic,
2124 stream_kind,
2125 buf_base_offset,
2126 Some(offset),
2127 haystack,
2128 db,
2129 switch_endianness,
2130 depth.saturating_add(1),
2131 )?;
2132
2133 Ok((false, None))
2135 }
2136
2137 Test::Indirect(m) => {
2138 trace!(
2139 "source={source} line={line} indirect mods={:?} offset={offset:#x}",
2140 m
2141 );
2142
2143 let new_buf_base_off = if m.contains(IndirectMod::Relative) {
2144 Some(offset)
2145 } else {
2146 None
2147 };
2148
2149 if let Some(msg) = self.message.as_ref() {
2151 magic.push_message(msg.to_string_lossy());
2152 }
2153
2154 for r in db.rules.iter() {
2155 let messages_cnt = magic.message.len();
2156
2157 r.magic(
2158 magic,
2159 stream_kind,
2160 new_buf_base_off,
2161 Some(offset),
2162 haystack,
2163 db,
2164 false,
2165 depth.saturating_add(1),
2166 )?;
2167
2168 if magic.message.len() != messages_cnt {
2170 break;
2171 }
2172 }
2173
2174 Ok((false, None))
2176 }
2177
2178 Test::Default => {
2179 let ok = !state.get_continuation_level(&self.continuation_level());
2181
2182 trace!("source={source} line={line} default match={ok}");
2183 if ok {
2184 state.set_continuation_level(self.continuation_level());
2185 }
2186
2187 Ok((ok, None))
2188 }
2189
2190 _ => {
2191 if let Err(e) = haystack.seek(SeekFrom::Start(offset)) {
2192 debug!("source={source} line={line} failed to seek in haystack: {e}");
2193 return Ok((false, None));
2194 }
2195
2196 let mut trace_msg = None;
2197
2198 if enabled!(Level::DEBUG) {
2199 trace_msg = Some(vec![format!(
2200 "source={source} line={line} depth={} stream_offset={:#x}",
2201 self.depth,
2202 haystack.lazy_stream_position()
2203 )])
2204 }
2205
2206 if let Ok(opt_test_value) = self
2210 .test
2211 .read_test_value(haystack, switch_endianness)
2212 .inspect_err(|e| {
2213 debug!("source={source} line={line} error while reading test value @{offset}: {e}",)
2214 })
2215 {
2216 if let Some(v) = trace_msg
2217 .as_mut() { v.push(format!("test={:?}", self.test)) }
2218
2219 let match_res =
2220 opt_test_value.and_then(|tv| self.test.match_value(&tv, stream_kind));
2221
2222 if let Some(v) = trace_msg.as_mut() { v.push(format!(
2223 "message=\"{}\" match={}",
2224 self.message
2225 .as_ref()
2226 .map(|fs| fs.to_string_lossy())
2227 .unwrap_or_default(),
2228 match_res.is_some()
2229 )) }
2230
2231 if enabled!(Level::DEBUG) && !enabled!(Level::TRACE) && match_res.is_some() {
2233 if let Some(m) = trace_msg{
2234 debug!("{}", m.join(" "));
2235 }
2236 } else if enabled!(Level::TRACE)
2237 && let Some(m) = trace_msg{
2238 trace!("{}", m.join(" "));
2239 }
2240
2241 if let Some(mr) = match_res {
2242 state.set_continuation_level(self.continuation_level());
2243 return Ok((true, Some(mr)));
2244 }
2245 }
2246
2247 Ok((false, None))
2248 }
2249 }
2250 }
2251
2252 #[inline(always)]
2253 fn continuation_level(&self) -> ContinuationLevel {
2254 ContinuationLevel(self.depth)
2255 }
2256}
2257
2258#[derive(Debug, Clone)]
2259struct Use {
2260 line: usize,
2261 depth: u8,
2262 start_offset: Offset,
2263 rule_name: String,
2264 switch_endianness: bool,
2265 message: Option<Message>,
2266}
2267
2268#[derive(Debug, Clone, Serialize, Deserialize)]
2269struct StrengthMod {
2270 op: Op,
2271 by: u8,
2272}
2273
2274impl StrengthMod {
2275 #[inline(always)]
2276 fn apply(&self, strength: u64) -> u64 {
2277 let by = self.by as u64;
2278 debug!("applying strength modifier: {strength} {} {}", self.op, by);
2279 match self.op {
2280 Op::Mul => strength.saturating_mul(by),
2281 Op::Add => strength.saturating_add(by),
2282 Op::Sub => strength.saturating_sub(by),
2283 Op::Div => {
2284 if by > 0 {
2285 strength.saturating_div(by)
2286 } else {
2287 strength
2288 }
2289 }
2290 Op::Mod => strength % by,
2291 Op::And => strength & by,
2292 Op::Xor | Op::Or => {
2295 debug_panic!("unsupported strength operator");
2296 strength
2297 }
2298 }
2299 }
2300}
2301
2302#[derive(Debug, Clone)]
2303enum Flag {
2304 Mime(String),
2305 Ext(HashSet<String>),
2306 Strength(StrengthMod),
2307 Apple(String),
2308}
2309
2310#[derive(Debug, Clone)]
2311struct Name {
2312 line: usize,
2313 name: String,
2314 message: Option<Message>,
2315}
2316
2317#[derive(Debug, Clone)]
2318enum Entry<'span> {
2319 Match(Span<'span>, Match),
2320 Flag(Span<'span>, Flag),
2321}
2322
2323#[derive(Debug, Clone, Serialize, Deserialize)]
2324struct EntryNode {
2325 root: bool,
2326 entry: Match,
2327 children: Vec<EntryNode>,
2328 mimetype: Option<String>,
2329 apple: Option<String>,
2330 strength_mod: Option<StrengthMod>,
2331 exts: HashSet<String>,
2332}
2333
2334impl EntryNode {
2335 fn update_exts_rec(
2336 &self,
2337 exts: &mut HashSet<String>,
2338 deps: &HashMap<String, DependencyRule>,
2339 marked: &mut HashSet<String>,
2340 ) -> Result<(), ()> {
2341 for ext in self.exts.iter() {
2342 if !exts.contains(ext) {
2343 exts.insert(ext.clone());
2344 }
2345 }
2346
2347 for c in self.children.iter() {
2348 if let Test::Use(_, ref name) = c.entry.test {
2349 if marked.contains(name) {
2350 continue;
2351 }
2352 if let Some(r) = deps.get(name) {
2353 marked.insert(name.clone());
2354 exts.extend(r.rule.fetch_all_extensions(deps, marked)?);
2355 } else {
2356 return Err(());
2357 }
2358 } else {
2359 c.update_exts_rec(exts, deps, marked)?;
2360 }
2361 }
2362
2363 Ok(())
2364 }
2365
2366 fn update_score_rec(
2367 &self,
2368 depth: usize,
2369 score: &mut u64,
2370 deps: &HashMap<String, DependencyRule>,
2371 marked: &mut HashSet<String>,
2372 ) {
2373 if depth == 3 {
2374 return;
2375 }
2376
2377 *score += self
2378 .children
2379 .iter()
2380 .map(|e| e.entry.test_strength)
2381 .min()
2382 .unwrap_or_default();
2383
2384 for c in self.children.iter() {
2385 if let Test::Use(_, ref name) = c.entry.test {
2386 if marked.contains(name) {
2387 continue;
2388 }
2389
2390 if let Some(r) = deps.get(name) {
2391 marked.insert(name.clone());
2392 *score += r.rule.compute_score(depth, deps, marked);
2393 }
2394 }
2395 c.update_score_rec(depth + 1, score, deps, marked);
2396 }
2397 }
2398
2399 #[inline]
2400 #[allow(clippy::too_many_arguments)]
2401 fn matches<'r, R: Read + Seek>(
2402 &'r self,
2403 opt_source: Option<&str>,
2404 magic: &mut Magic<'r>,
2405 state: &mut MatchState,
2406 stream_kind: StreamKind,
2407 buf_base_offset: Option<u64>,
2408 rule_base_offset: Option<u64>,
2409 last_level_offset: Option<u64>,
2410 haystack: &mut LazyCache<R>,
2411 db: &'r MagicDb,
2412 switch_endianness: bool,
2413 depth: usize,
2414 ) -> Result<(), Error> {
2415 let (ok, opt_match_res) = self.entry.matches(
2416 opt_source,
2417 magic,
2418 stream_kind,
2419 state,
2420 buf_base_offset,
2421 rule_base_offset,
2422 last_level_offset,
2423 haystack,
2424 switch_endianness,
2425 db,
2426 depth,
2427 )?;
2428
2429 let source = opt_source.unwrap_or("unknown");
2430 let line = self.entry.line;
2431
2432 if ok {
2433 if let Some(msg) = self.entry.message.as_ref()
2435 && let Ok(msg) = msg.format_with(opt_match_res.as_ref()).inspect_err(|e| {
2436 debug!("source={source} line={line} failed to format message: {e}")
2437 })
2438 {
2439 magic.push_message(msg);
2440 }
2441
2442 if let Some(mr) = opt_match_res {
2444 match &self.entry.test {
2445 Test::String(t) => {
2446 if t.has_length_mod() {
2447 let o = mr.end_offset();
2448 haystack.seek(SeekFrom::Start(o))?;
2449 }
2450 }
2451 Test::Search(t) => {
2452 if t.re_mods.contains(ReMod::StartOffsetUpdate) {
2453 let o = mr.start_offset();
2454 haystack.seek(SeekFrom::Start(o))?;
2455 } else {
2456 let o = mr.end_offset();
2457 haystack.seek(SeekFrom::Start(o))?;
2458 }
2459 }
2460
2461 Test::Regex(t) => {
2462 if t.mods.contains(ReMod::StartOffsetUpdate) {
2463 let o = mr.start_offset();
2464 haystack.seek(SeekFrom::Start(o))?;
2465 } else {
2466 let o = mr.end_offset();
2467 haystack.seek(SeekFrom::Start(o))?;
2468 }
2469 }
2470 _ => {}
2472 }
2473 }
2474
2475 if let Some(mimetype) = self.mimetype.as_ref() {
2476 magic.set_mime_type(Cow::Borrowed(mimetype));
2477 }
2478
2479 if let Some(apple_ty) = self.apple.as_ref() {
2480 magic.set_creator_code(Cow::Borrowed(apple_ty));
2481 }
2482
2483 if !self.exts.is_empty() {
2484 magic.insert_extensions(self.exts.iter().map(|s| s.as_str()));
2485 }
2486
2487 let mut strength = self.entry.test_strength;
2491
2492 let continuation_level = self.entry.continuation_level().0 as u64;
2493 if self.entry.message.is_none() && continuation_level < 3 {
2494 strength = strength.saturating_add(continuation_level);
2495 }
2496
2497 if let Some(sm) = self.strength_mod.as_ref() {
2498 strength = sm.apply(strength);
2499 }
2500
2501 if self.entry.message.is_none() {
2503 strength += 1
2504 }
2505
2506 magic.update_strength(strength);
2507
2508 let end_upper_level = haystack.lazy_stream_position();
2509
2510 let rule_base_offset = if self.root {
2518 match self.entry.offset {
2519 Offset::Direct(DirOffset::End(o)) => {
2520 Some(haystack.offset_from_start(SeekFrom::End(o)))
2521 }
2522 _ => rule_base_offset,
2523 }
2524 } else {
2525 rule_base_offset
2526 };
2527
2528 for e in self.children.iter() {
2529 e.matches(
2530 opt_source,
2531 magic,
2532 state,
2533 stream_kind,
2534 buf_base_offset,
2535 rule_base_offset,
2536 Some(end_upper_level),
2537 haystack,
2538 db,
2539 switch_endianness,
2540 depth,
2541 )?
2542 }
2543 }
2544
2545 Ok(())
2546 }
2547}
2548
2549#[derive(Debug, Clone, Serialize, Deserialize)]
2551pub struct MagicRule {
2552 id: usize,
2553 source: Option<String>,
2554 entries: EntryNode,
2555 extensions: HashSet<String>,
2556 score: u64,
2558 finalized: bool,
2559}
2560
2561impl MagicRule {
2562 #[inline(always)]
2563 fn set_id(&mut self, id: usize) {
2564 self.id = id
2565 }
2566
2567 fn fetch_all_extensions(
2571 &self,
2572 deps: &HashMap<String, DependencyRule>,
2573 marked: &mut HashSet<String>,
2574 ) -> Result<HashSet<String>, ()> {
2575 let mut exts = HashSet::new();
2576 self.entries.update_exts_rec(&mut exts, deps, marked)?;
2577 Ok(exts)
2578 }
2579
2580 fn compute_score(
2583 &self,
2584 depth: usize,
2585 deps: &HashMap<String, DependencyRule>,
2586 marked: &mut HashSet<String>,
2587 ) -> u64 {
2588 let mut score = 0;
2589 score += self.entries.entry.test_strength;
2590 self.entries
2591 .update_score_rec(depth, &mut score, deps, marked);
2592 score
2593 }
2594
2595 fn try_finalize(&mut self, deps: &HashMap<String, DependencyRule>) {
2598 if self.finalized {
2599 return;
2600 }
2601
2602 let Ok(exts) = self.fetch_all_extensions(deps, &mut HashSet::new()) else {
2603 return;
2604 };
2605
2606 self.extensions.extend(exts);
2607
2608 self.score = self.compute_score(0, deps, &mut HashSet::new());
2612 self.finalized = true
2613 }
2614
2615 #[inline]
2616 fn magic_entrypoint<'r, R: Read + Seek>(
2617 &'r self,
2618 magic: &mut Magic<'r>,
2619 stream_kind: StreamKind,
2620 haystack: &mut LazyCache<R>,
2621 db: &'r MagicDb,
2622 switch_endianness: bool,
2623 depth: usize,
2624 ) -> Result<(), Error> {
2625 self.entries.matches(
2626 self.source.as_deref(),
2627 magic,
2628 &mut MatchState::empty(),
2629 stream_kind,
2630 None,
2631 None,
2632 None,
2633 haystack,
2634 db,
2635 switch_endianness,
2636 depth,
2637 )
2638 }
2639
2640 #[inline]
2641 #[allow(clippy::too_many_arguments)]
2642 fn magic<'r, R: Read + Seek>(
2643 &'r self,
2644 magic: &mut Magic<'r>,
2645 stream_kind: StreamKind,
2646 buf_base_offset: Option<u64>,
2647 rule_base_offset: Option<u64>,
2648 haystack: &mut LazyCache<R>,
2649 db: &'r MagicDb,
2650 switch_endianness: bool,
2651 depth: usize,
2652 ) -> Result<(), Error> {
2653 self.entries.matches(
2654 self.source.as_deref(),
2655 magic,
2656 &mut MatchState::empty(),
2657 stream_kind,
2658 buf_base_offset,
2659 rule_base_offset,
2660 None,
2661 haystack,
2662 db,
2663 switch_endianness,
2664 depth,
2665 )
2666 }
2667
2668 pub fn is_text(&self) -> bool {
2674 self.entries.entry.test.is_text()
2675 && self.entries.children.iter().all(|e| e.entry.test.is_text())
2676 }
2677
2678 #[inline(always)]
2684 pub fn score(&self) -> u64 {
2685 self.score
2686 }
2687
2688 #[inline(always)]
2694 pub fn source(&self) -> Option<&str> {
2695 self.source.as_deref()
2696 }
2697
2698 #[inline(always)]
2704 pub fn line(&self) -> usize {
2705 self.entries.entry.line
2706 }
2707
2708 #[inline(always)]
2714 pub fn extensions(&self) -> &HashSet<String> {
2715 &self.extensions
2716 }
2717}
2718
2719#[derive(Debug, Clone, Serialize, Deserialize)]
2720struct DependencyRule {
2721 name: String,
2722 rule: MagicRule,
2723}
2724
2725#[derive(Debug, Clone, Serialize, Deserialize)]
2731pub struct MagicSource {
2732 rules: Vec<MagicRule>,
2733 dependencies: HashMap<String, DependencyRule>,
2734}
2735
2736impl MagicSource {
2737 pub fn open<P: AsRef<Path>>(p: P) -> Result<Self, Error> {
2747 FileMagicParser::parse_file(p)
2748 }
2749}
2750
2751#[derive(Debug, Hash, PartialEq, Eq, Clone, Copy)]
2752struct ContinuationLevel(u8);
2753
2754#[derive(Debug, PartialEq, Eq, Clone, Copy)]
2756enum TextEncoding {
2757 Ascii,
2758 Utf8,
2759 Unknown,
2760}
2761
2762impl TextEncoding {
2763 const fn as_magic_str(&self) -> &'static str {
2764 match self {
2765 TextEncoding::Ascii => "ASCII",
2766 TextEncoding::Utf8 => "UTF-8",
2767 TextEncoding::Unknown => "Unknown",
2768 }
2769 }
2770}
2771
2772#[derive(Debug, PartialEq, Eq, Clone, Copy)]
2773enum StreamKind {
2774 Binary,
2775 Text(TextEncoding),
2776}
2777
2778impl StreamKind {
2779 const fn is_text(&self) -> bool {
2780 matches!(self, StreamKind::Text(_))
2781 }
2782}
2783
2784#[derive(Debug)]
2785struct MatchState {
2786 continuation_levels: [bool; 256],
2787}
2788
2789impl MatchState {
2790 #[inline(always)]
2791 fn empty() -> Self {
2792 MatchState {
2793 continuation_levels: [false; 256],
2794 }
2795 }
2796
2797 #[inline(always)]
2798 fn get_continuation_level(&mut self, level: &ContinuationLevel) -> bool {
2799 self.continuation_levels
2800 .get(level.0 as usize)
2801 .cloned()
2802 .unwrap_or_default()
2803 }
2804
2805 #[inline(always)]
2806 fn set_continuation_level(&mut self, level: ContinuationLevel) {
2807 if let Some(b) = self.continuation_levels.get_mut(level.0 as usize) {
2808 *b = true
2809 }
2810 }
2811
2812 #[inline(always)]
2813 fn clear_continuation_level(&mut self, level: &ContinuationLevel) {
2814 if let Some(b) = self.continuation_levels.get_mut(level.0 as usize) {
2815 *b = false;
2816 }
2817 }
2818}
2819
2820#[derive(Debug, Default)]
2822pub struct Magic<'m> {
2823 stream_kind: Option<StreamKind>,
2824 source: Option<Cow<'m, str>>,
2825 message: Vec<Cow<'m, str>>,
2826 mime_type: Option<Cow<'m, str>>,
2827 creator_code: Option<Cow<'m, str>>,
2828 strength: u64,
2829 exts: HashSet<Cow<'m, str>>,
2830 is_default: bool,
2831}
2832
2833impl<'m> Magic<'m> {
2834 #[inline(always)]
2835 fn set_source(&mut self, source: Option<&'m str>) {
2836 self.source = source.map(Cow::Borrowed);
2837 }
2838
2839 #[inline(always)]
2840 fn set_stream_kind(&mut self, stream_kind: StreamKind) {
2841 self.stream_kind = Some(stream_kind)
2842 }
2843
2844 #[inline(always)]
2845 fn reset(&mut self) {
2846 self.stream_kind = None;
2847 self.source = None;
2848 self.message.clear();
2849 self.mime_type = None;
2850 self.creator_code = None;
2851 self.strength = 0;
2852 self.exts.clear();
2853 self.is_default = false;
2854 }
2855
2856 #[inline]
2864 pub fn into_owned<'owned>(self) -> Magic<'owned> {
2865 Magic {
2866 stream_kind: self.stream_kind,
2867 source: self.source.map(|s| Cow::Owned(s.into_owned())),
2868 message: self
2869 .message
2870 .into_iter()
2871 .map(Cow::into_owned)
2872 .map(Cow::Owned)
2873 .collect(),
2874 mime_type: self.mime_type.map(|m| Cow::Owned(m.into_owned())),
2875 creator_code: self.creator_code.map(|m| Cow::Owned(m.into_owned())),
2876 strength: self.strength,
2877 exts: self
2878 .exts
2879 .into_iter()
2880 .map(|e| Cow::Owned(e.into_owned()))
2881 .collect(),
2882 is_default: self.is_default,
2883 }
2884 }
2885
2886 #[inline(always)]
2892 pub fn message(&self) -> String {
2893 let mut out = String::new();
2894 for (i, m) in self.message.iter().enumerate() {
2895 if let Some(s) = m.strip_prefix(r#"\b"#) {
2896 out.push_str(s);
2897 } else {
2898 if i > 0 {
2900 out.push(' ');
2901 }
2902 out.push_str(m);
2903 }
2904 }
2905 out
2906 }
2907
2908 #[inline]
2919 pub fn message_parts(&self) -> impl Iterator<Item = &str> {
2920 self.message.iter().map(|p| p.as_ref())
2921 }
2922
2923 #[inline(always)]
2924 fn update_strength(&mut self, value: u64) {
2925 self.strength = self.strength.saturating_add(value);
2926 debug!("updated strength = {:?}", self.strength)
2927 }
2928
2929 #[inline(always)]
2935 pub fn mime_type(&self) -> &str {
2936 self.mime_type.as_deref().unwrap_or(match self.stream_kind {
2937 Some(StreamKind::Text(_)) => DEFAULT_TEXT_MIMETYPE,
2938 Some(StreamKind::Binary) | None => DEFAULT_BIN_MIMETYPE,
2939 })
2940 }
2941
2942 #[inline(always)]
2943 fn push_message<'a: 'm>(&mut self, msg: Cow<'a, str>) {
2944 if !msg.is_empty() {
2945 debug!("pushing message: msg={msg} len={}", msg.len());
2946 self.message.push(msg);
2947 }
2948 }
2949
2950 #[inline(always)]
2951 fn set_mime_type<'a: 'm>(&mut self, mime: Cow<'a, str>) {
2952 if self.mime_type.is_none() {
2953 debug!("insert mime: {:?}", mime);
2954 self.mime_type = Some(mime)
2955 }
2956 }
2957
2958 #[inline(always)]
2959 fn set_creator_code<'a: 'm>(&mut self, apple_ty: Cow<'a, str>) {
2960 if self.creator_code.is_none() {
2961 debug!("insert apple type: {apple_ty:?}");
2962 self.creator_code = Some(apple_ty)
2963 }
2964 }
2965
2966 #[inline(always)]
2967 fn insert_extensions<'a: 'm, I: Iterator<Item = &'a str>>(&mut self, exts: I) {
2968 if self.exts.is_empty() {
2969 self.exts.extend(exts.filter_map(|e| {
2970 if e.is_empty() {
2971 None
2972 } else {
2973 Some(Cow::Borrowed(e))
2974 }
2975 }));
2976 }
2977 }
2978
2979 #[inline(always)]
2987 pub fn strength(&self) -> u64 {
2988 self.strength
2989 }
2990
2991 #[inline(always)]
2997 pub fn source(&self) -> Option<&str> {
2998 self.source.as_deref()
2999 }
3000
3001 #[inline(always)]
3007 pub fn creator_code(&self) -> Option<&str> {
3008 self.creator_code.as_deref()
3009 }
3010
3011 #[inline(always)]
3017 pub fn extensions(&self) -> &HashSet<Cow<'m, str>> {
3018 &self.exts
3019 }
3020
3021 #[inline(always)]
3027 pub fn is_default(&self) -> bool {
3028 self.is_default
3029 }
3030}
3031
3032#[derive(Debug, Default, Clone, Serialize, Deserialize)]
3034pub struct MagicDb {
3035 rule_id: usize,
3036 rules: Vec<MagicRule>,
3037 dependencies: HashMap<String, DependencyRule>,
3038}
3039
3040#[inline(always)]
3041fn is_likely_text(bytes: &[u8]) -> bool {
3043 const CHUNK_SIZE: usize = std::mem::size_of::<usize>();
3044
3045 if bytes.is_empty() {
3046 return false;
3047 }
3048
3049 let mut printable = 0f64;
3050 let mut high_bytes = 0f64; let (chunks, remainder) = bytes.as_chunks::<CHUNK_SIZE>();
3053
3054 macro_rules! handle_byte {
3055 ($byte: expr) => {
3056 match $byte {
3057 0x00 => return false,
3058 0x09 | 0x0A | 0x0D => printable += 1.0, 0x20..=0x7E => printable += 1.0, _ => high_bytes += 1.0,
3061 }
3062 };
3063 }
3064
3065 for bytes in chunks {
3066 for b in bytes {
3067 handle_byte!(b)
3068 }
3069 }
3070
3071 for b in remainder {
3072 handle_byte!(b)
3073 }
3074
3075 let total = bytes.len() as f64;
3076 let printable_ratio = printable / total;
3077 let high_bytes_ratio = high_bytes / total;
3078
3079 printable_ratio > 0.85 && high_bytes_ratio < 0.20
3081}
3082
3083#[inline(always)]
3084fn guess_stream_kind<S: AsRef<[u8]>>(stream: S) -> StreamKind {
3085 let buf = stream.as_ref();
3086
3087 match run_utf8_validation(buf) {
3088 Ok(is_ascii) => {
3089 if is_ascii {
3090 StreamKind::Text(TextEncoding::Ascii)
3091 } else {
3092 StreamKind::Text(TextEncoding::Utf8)
3093 }
3094 }
3095 Err(e) => {
3096 if is_likely_text(&buf[e.valid_up_to..]) {
3097 StreamKind::Text(TextEncoding::Unknown)
3098 } else {
3099 StreamKind::Binary
3100 }
3101 }
3102 }
3103}
3104
3105impl MagicDb {
3106 pub fn optimal_lazy_cache<R: Read + Seek>(f: R) -> Result<LazyCache<R>, io::Error> {
3109 Ok(LazyCache::<R>::from_read_seek(f)
3110 .and_then(|lc| lc.with_hot_cache(2 * FILE_BYTES_MAX))?)
3111 .map(|lc| lc.with_warm_cache(100 << 20))
3112 }
3113
3114 pub fn new() -> Self {
3120 Self::default()
3121 }
3122
3123 #[inline(always)]
3124 fn next_rule_id(&mut self) -> usize {
3125 let t = self.rule_id;
3126 self.rule_id += 1;
3127 t
3128 }
3129
3130 #[inline(always)]
3131 fn try_json<R: Read + Seek>(
3132 haystack: &mut LazyCache<R>,
3133 stream_kind: StreamKind,
3134 magic: &mut Magic,
3135 ) -> Result<bool, Error> {
3136 if matches!(stream_kind, StreamKind::Binary) {
3138 return Ok(false);
3139 }
3140
3141 let buf = haystack.read_range(0..FILE_BYTES_MAX as u64)?.trim_ascii();
3142
3143 let Some((start, end)) = find_json_boundaries(buf) else {
3144 return Ok(false);
3145 };
3146
3147 for c in buf[0..start].iter() {
3150 if !c.is_ascii_whitespace() {
3151 return Ok(false);
3152 }
3153 }
3154
3155 let mut is_ndjson = false;
3156
3157 trace!("maybe a json document");
3158 let ok = serde_json::from_slice::<serde_json::Value>(&buf[start..=end]).is_ok();
3159 if !ok {
3160 return Ok(false);
3161 }
3162
3163 if end + 1 < buf.len() {
3165 let buf = &buf[end + 1..];
3167 if let Some((second_start, second_end)) = find_json_boundaries(buf) {
3168 if memchr(b'\n', &buf[..second_start]).is_some() {
3170 trace!("might be ndjson");
3171 is_ndjson = serde_json::from_slice::<serde_json::Value>(
3172 &buf[second_start..=second_end],
3173 )
3174 .is_ok();
3175 }
3176 }
3177 }
3178
3179 if is_ndjson {
3180 magic.push_message(Cow::Borrowed("New Line Delimited"));
3181 magic.set_mime_type(Cow::Borrowed("application/x-ndjson"));
3182 magic.insert_extensions(["ndjson", "jsonl"].into_iter());
3183 } else {
3184 magic.set_mime_type(Cow::Borrowed("application/json"));
3185 magic.insert_extensions(["json"].into_iter());
3186 }
3187
3188 magic.push_message(Cow::Borrowed("JSON text data"));
3189 magic.set_source(Some(HARDCODED_SOURCE));
3190 magic.update_strength(HARDCODED_MAGIC_STRENGTH);
3191 Ok(true)
3192 }
3193
3194 #[inline(always)]
3195 fn try_csv<R: Read + Seek>(
3196 haystack: &mut LazyCache<R>,
3197 stream_kind: StreamKind,
3198 magic: &mut Magic,
3199 ) -> Result<bool, Error> {
3200 let StreamKind::Text(enc) = stream_kind else {
3202 return Ok(false);
3203 };
3204
3205 let buf = haystack.read_range(0..FILE_BYTES_MAX as u64)?;
3206 let mut reader = csv::Reader::from_reader(io::Cursor::new(buf));
3207 let mut records = reader.records();
3208
3209 let Some(Ok(first)) = records.next() else {
3210 return Ok(false);
3211 };
3212
3213 if first.len() <= 1 {
3217 return Ok(false);
3218 }
3219
3220 let mut n = 1;
3222 for i in records.take(9) {
3223 if let Ok(rec) = i {
3224 if first.len() != rec.len() {
3225 return Ok(false);
3226 }
3227 } else {
3228 return Ok(false);
3229 }
3230 n += 1;
3231 }
3232
3233 if n != 10 {
3235 return Ok(false);
3236 }
3237
3238 magic.set_mime_type(Cow::Borrowed("text/csv"));
3239 magic.push_message(Cow::Borrowed("CSV"));
3240 magic.push_message(Cow::Borrowed(enc.as_magic_str()));
3241 magic.push_message(Cow::Borrowed("text"));
3242 magic.insert_extensions(["csv"].into_iter());
3243 magic.set_source(Some(HARDCODED_SOURCE));
3244 magic.update_strength(HARDCODED_MAGIC_STRENGTH);
3245 Ok(true)
3246 }
3247
3248 #[inline(always)]
3249 fn try_tar<R: Read + Seek>(
3250 haystack: &mut LazyCache<R>,
3251 stream_kind: StreamKind,
3252 magic: &mut Magic,
3253 ) -> Result<bool, Error> {
3254 if !matches!(stream_kind, StreamKind::Binary) {
3256 return Ok(false);
3257 }
3258
3259 let buf = haystack.read_range(0..FILE_BYTES_MAX as u64)?;
3260 let mut ar = Archive::new(io::Cursor::new(buf));
3261
3262 let Ok(mut entries) = ar.entries() else {
3263 return Ok(false);
3264 };
3265
3266 let Some(Ok(first)) = entries.next() else {
3267 return Ok(false);
3268 };
3269
3270 let header = first.header();
3271
3272 if header.as_ustar().is_some() {
3273 magic.push_message(Cow::Borrowed("POSIX tar archive"));
3274 } else if header.as_gnu().is_some() {
3275 magic.push_message(Cow::Borrowed("POSIX tar archive (GNU)"));
3276 } else {
3277 magic.push_message(Cow::Borrowed("tar archive"));
3278 }
3279
3280 magic.set_mime_type(Cow::Borrowed("application/x-tar"));
3281 magic.set_source(Some(HARDCODED_SOURCE));
3282 magic.update_strength(HARDCODED_MAGIC_STRENGTH);
3283 magic.insert_extensions(["tar"].into_iter());
3284 Ok(true)
3285 }
3286
3287 #[inline(always)]
3288 fn try_hard_magic<R: Read + Seek>(
3289 haystack: &mut LazyCache<R>,
3290 stream_kind: StreamKind,
3291 magic: &mut Magic,
3292 ) -> Result<bool, Error> {
3293 Ok(Self::try_json(haystack, stream_kind, magic)?
3294 || Self::try_csv(haystack, stream_kind, magic)?
3295 || Self::try_tar(haystack, stream_kind, magic)?)
3296 }
3297
3298 #[inline(always)]
3299 fn magic_default<'m, R: Read + Seek>(
3300 cache: &mut LazyCache<R>,
3301 stream_kind: StreamKind,
3302 magic: &mut Magic<'m>,
3303 ) {
3304 magic.set_source(Some(HARDCODED_SOURCE));
3305 magic.set_stream_kind(stream_kind);
3306 magic.is_default = true;
3307
3308 if cache.data_size() == 0 {
3309 magic.push_message(Cow::Borrowed("empty"));
3310 magic.set_mime_type(Cow::Borrowed(DEFAULT_BIN_MIMETYPE));
3311 }
3312
3313 match stream_kind {
3314 StreamKind::Binary => {
3315 magic.push_message(Cow::Borrowed("data"));
3316 }
3317 StreamKind::Text(e) => {
3318 magic.push_message(Cow::Borrowed(e.as_magic_str()));
3319 magic.push_message(Cow::Borrowed("text"));
3320 }
3321 }
3322 }
3323
3324 pub fn load(&mut self, mf: MagicSource) -> Result<&mut Self, Error> {
3334 for rule in mf.rules.into_iter() {
3335 let mut rule = rule;
3336 rule.set_id(self.next_rule_id());
3337
3338 self.rules.push(rule);
3339 }
3340
3341 self.dependencies.extend(mf.dependencies);
3342 self.prepare();
3343 Ok(self)
3344 }
3345
3346 pub fn rules(&self) -> &[MagicRule] {
3352 &self.rules
3353 }
3354
3355 #[inline]
3356 fn first_magic_with_stream_kind<R: Read + Seek>(
3357 &self,
3358 haystack: &mut LazyCache<R>,
3359 stream_kind: StreamKind,
3360 extension: Option<&str>,
3361 ) -> Result<Magic<'_>, Error> {
3362 let mut magic = Magic::default();
3364
3365 if Self::try_hard_magic(haystack, stream_kind, &mut magic)? {
3366 return Ok(magic);
3367 }
3368
3369 let mut marked = vec![false; self.rules.len()];
3370
3371 macro_rules! do_magic {
3372 ($rule: expr) => {{
3373 $rule.magic_entrypoint(&mut magic, stream_kind, haystack, &self, false, 0)?;
3374
3375 if !magic.message.is_empty() {
3376 magic.set_stream_kind(stream_kind);
3377 magic.set_source($rule.source.as_deref());
3378 return Ok(magic);
3379 }
3380
3381 magic.reset();
3382 }};
3383 }
3384
3385 if let Some(ext) = extension.map(|e| e.to_lowercase())
3386 && !ext.is_empty()
3387 {
3388 for rule in self.rules.iter().filter(|r| r.extensions.contains(&ext)) {
3389 do_magic!(rule);
3390 if let Some(f) = marked.get_mut(rule.id) {
3391 *f = true
3392 }
3393 }
3394 }
3395
3396 for rule in self
3397 .rules
3398 .iter()
3399 .filter(|r| !*marked.get(r.id).unwrap_or(&false))
3401 {
3402 do_magic!(rule)
3403 }
3404
3405 Self::magic_default(haystack, stream_kind, &mut magic);
3406
3407 Ok(magic)
3408 }
3409
3410 pub fn first_magic<R: Read + Seek>(
3424 &self,
3425 r: &mut R,
3426 extension: Option<&str>,
3427 ) -> Result<Magic<'_>, Error> {
3428 let mut cache = Self::optimal_lazy_cache(r)?;
3429 let stream_kind = guess_stream_kind(cache.read_range(0..FILE_BYTES_MAX as u64)?);
3430 self.first_magic_with_stream_kind(&mut cache, stream_kind, extension)
3431 }
3432
3433 pub fn first_magic_with_lazy_cache<R: Read + Seek>(
3453 &self,
3454 cache: &mut LazyCache<R>,
3455 extension: Option<&str>,
3456 ) -> Result<Magic<'_>, Error> {
3457 let stream_kind = guess_stream_kind(cache.read_range(0..FILE_BYTES_MAX as u64)?);
3458 self.first_magic_with_stream_kind(cache, stream_kind, extension)
3459 }
3460
3461 #[inline(always)]
3462 fn all_magics_sort_with_stream_kind<R: Read + Seek>(
3463 &self,
3464 haystack: &mut LazyCache<R>,
3465 stream_kind: StreamKind,
3466 ) -> Result<Vec<Magic<'_>>, Error> {
3467 let mut out = Vec::new();
3468
3469 let mut magic = Magic::default();
3470
3471 if Self::try_hard_magic(haystack, stream_kind, &mut magic)? {
3472 out.push(magic);
3473 magic = Magic::default();
3474 }
3475
3476 for rule in self.rules.iter() {
3477 rule.magic_entrypoint(&mut magic, stream_kind, haystack, self, false, 0)?;
3478
3479 if !magic.message.is_empty() {
3481 magic.set_stream_kind(stream_kind);
3482 magic.set_source(rule.source.as_deref());
3483 out.push(magic);
3484 magic = Magic::default();
3485 }
3486
3487 magic.reset();
3488 }
3489
3490 Self::magic_default(haystack, stream_kind, &mut magic);
3491 out.push(magic);
3492
3493 out.sort_by_key(|b| std::cmp::Reverse(b.strength()));
3494
3495 Ok(out)
3496 }
3497
3498 pub fn all_magics<R: Read + Seek>(&self, r: &mut R) -> Result<Vec<Magic<'_>>, Error> {
3508 let mut cache = Self::optimal_lazy_cache(r)?;
3509 let stream_kind = guess_stream_kind(cache.read_range(0..FILE_BYTES_MAX as u64)?);
3510 self.all_magics_sort_with_stream_kind(&mut cache, stream_kind)
3511 }
3512
3513 pub fn all_magics_with_lazy_cache<R: Read + Seek>(
3529 &self,
3530 cache: &mut LazyCache<R>,
3531 ) -> Result<Vec<Magic<'_>>, Error> {
3532 let stream_kind = guess_stream_kind(cache.read_range(0..FILE_BYTES_MAX as u64)?);
3533 self.all_magics_sort_with_stream_kind(cache, stream_kind)
3534 }
3535
3536 #[inline(always)]
3537 fn best_magic_with_stream_kind<R: Read + Seek>(
3538 &self,
3539 haystack: &mut LazyCache<R>,
3540 stream_kind: StreamKind,
3541 ) -> Result<Magic<'_>, Error> {
3542 let magics = self.all_magics_sort_with_stream_kind(haystack, stream_kind)?;
3543
3544 Ok(magics.into_iter().next().unwrap_or_else(|| {
3547 let mut magic = Magic::default();
3548 Self::magic_default(haystack, stream_kind, &mut magic);
3549 magic
3550 }))
3551 }
3552
3553 pub fn best_magic<R: Read + Seek>(&self, r: &mut R) -> Result<Magic<'_>, Error> {
3563 let mut cache = Self::optimal_lazy_cache(r)?;
3564 let stream_kind = guess_stream_kind(cache.read_range(0..FILE_BYTES_MAX as u64)?);
3565 self.best_magic_with_stream_kind(&mut cache, stream_kind)
3566 }
3567
3568 pub fn best_magic_with_lazy_cache<R: Read + Seek>(
3584 &self,
3585 cache: &mut LazyCache<R>,
3586 ) -> Result<Magic<'_>, Error> {
3587 let stream_kind = guess_stream_kind(cache.read_range(0..FILE_BYTES_MAX as u64)?);
3588 self.best_magic_with_stream_kind(cache, stream_kind)
3589 }
3590
3591 pub fn serialize<W: Write>(self, w: &mut W) -> Result<(), Error> {
3597 let mut encoder = GzEncoder::new(w, Compression::best());
3598
3599 bincode::serde::encode_into_std_write(&self, &mut encoder, bincode::config::standard())?;
3600 encoder.finish()?;
3601 Ok(())
3602 }
3603
3604 pub fn deserialize<R: Read>(r: &mut R) -> Result<Self, Error> {
3614 let mut buf = vec![];
3615 let mut gz = GzDecoder::new(r);
3616 gz.read_to_end(&mut buf).map_err(|e| {
3617 bincode::error::DecodeError::OtherString(format!("failed to read: {e}"))
3618 })?;
3619 let (sdb, _): (MagicDb, usize) =
3620 bincode::serde::decode_from_slice(&buf, bincode::config::standard())?;
3621 Ok(sdb)
3622 }
3623
3624 #[inline(always)]
3625 fn prepare(&mut self) {
3626 self.rules
3627 .iter_mut()
3628 .for_each(|r| r.try_finalize(&self.dependencies));
3629
3630 self.rules.sort_by_key(|r| (r.is_text(), -(r.score as i64)));
3632 }
3633}
3634
3635#[cfg(test)]
3636mod tests {
3637 use std::io::Cursor;
3638
3639 use regex::bytes::Regex;
3640
3641 use crate::utils::unix_local_time_to_string;
3642
3643 use super::*;
3644
3645 macro_rules! lazy_cache {
3646 ($l: literal) => {
3647 LazyCache::from_read_seek(Cursor::new($l)).unwrap()
3648 };
3649 }
3650
3651 fn first_magic(
3652 rule: &str,
3653 content: &[u8],
3654 stream_kind: StreamKind,
3655 ) -> Result<Magic<'static>, Error> {
3656 let mut md = MagicDb::new();
3657 md.load(
3658 FileMagicParser::parse_str(rule, None)
3659 .inspect_err(|e| eprintln!("{e}"))
3660 .unwrap(),
3661 )
3662 .unwrap();
3663 let mut reader = LazyCache::from_read_seek(Cursor::new(content)).unwrap();
3664 let v = md.best_magic_with_stream_kind(&mut reader, stream_kind)?;
3665 Ok(v.into_owned())
3666 }
3667
3668 #[allow(unused_macros)]
3670 macro_rules! enable_trace {
3671 () => {
3672 tracing_subscriber::fmt()
3673 .with_max_level(tracing_subscriber::filter::LevelFilter::TRACE)
3674 .try_init();
3675 };
3676 }
3677
3678 macro_rules! parse_assert {
3679 ($rule:literal) => {
3680 FileMagicParser::parse_str($rule, None)
3681 .inspect_err(|e| eprintln!("{e}"))
3682 .unwrap();
3683 };
3684 }
3685
3686 macro_rules! assert_magic_match_bin {
3687 ($rule: literal, $content:literal) => {{ first_magic($rule, $content, StreamKind::Binary).unwrap() }};
3688 ($rule: literal, $content:literal, $message:expr) => {{
3689 assert_eq!(
3690 first_magic($rule, $content, StreamKind::Binary)
3691 .unwrap()
3692 .message(),
3693 $message
3694 );
3695 }};
3696 }
3697
3698 macro_rules! assert_magic_match_text {
3699 ($rule: literal, $content:literal) => {{ first_magic($rule, $content, StreamKind::Text(TextEncoding::Utf8)).unwrap() }};
3700 ($rule: literal, $content:literal, $message:expr) => {{
3701 assert_eq!(
3702 first_magic($rule, $content, StreamKind::Text(TextEncoding::Utf8))
3703 .unwrap()
3704 .message(),
3705 $message
3706 );
3707 }};
3708 }
3709
3710 macro_rules! assert_magic_not_match_text {
3711 ($rule: literal, $content:literal) => {{
3712 assert!(
3713 first_magic($rule, $content, StreamKind::Text(TextEncoding::Utf8))
3714 .unwrap()
3715 .is_default()
3716 );
3717 }};
3718 }
3719
3720 macro_rules! assert_magic_not_match_bin {
3721 ($rule: literal, $content:literal) => {{
3722 assert!(
3723 first_magic($rule, $content, StreamKind::Binary)
3724 .unwrap()
3725 .is_default()
3726 );
3727 }};
3728 }
3729
3730 #[test]
3731 fn test_regex() {
3732 assert_magic_match_text!(
3733 r#"
37340 regex/1024 \^#![[:space:]]*/usr/bin/env[[:space:]]+
3735!:mime text/x-shellscript
3736>&0 regex/64 .*($|\\b) %s shell script text executable
3737 "#,
3738 br#"#!/usr/bin/env bash
3739 echo hello world"#,
3740 "bash shell script text executable"
3742 );
3743
3744 let re = Regex::new(r"(?-u)\x42\x82").unwrap();
3745 assert!(re.is_match(b"\x42\x82"));
3746
3747 assert_magic_match_bin!(
3748 r#"0 regex \x42\x82 binary regex match"#,
3749 b"\x00\x00\x00\x00\x00\x00\x42\x82"
3750 );
3751
3752 assert_magic_match_bin!(
3754 r#"
3755 0 regex \x42\x82
3756 >&0 string \xde\xad\xbe\xef it works
3757 "#,
3758 b"\x00\x00\x00\x00\x00\x00\x42\x82\xde\xad\xbe\xef"
3759 );
3760
3761 assert_magic_match_bin!(
3762 r#"
3763 0 regex/s \x42\x82
3764 >&0 string \x42\x82\xde\xad\xbe\xef it works
3765 "#,
3766 b"\x00\x00\x00\x00\x00\x00\x42\x82\xde\xad\xbe\xef"
3767 );
3768
3769 assert_magic_match_text!(
3771 r#"
37720 regex/1024 \^HelloWorld$ HelloWorld String"#,
3773 br#"
3774// this is a comment after an empty line
3775HelloWorld
3776 "#
3777 );
3778 }
3779
3780 #[test]
3781 fn test_string_with_mods() {
3782 assert_magic_match_text!(
3783 r#"0 string/w #!\ \ \ /usr/bin/env\ bash BASH
3784 "#,
3785 b"#! /usr/bin/env bash i
3786 echo hello world"
3787 );
3788
3789 assert_magic_match_text!(
3791 r#"0 string/C HelloWorld it works
3792 "#,
3793 b"helloworld"
3794 );
3795
3796 assert_magic_not_match_text!(
3797 r#"0 string/C HelloWorld it works
3798 "#,
3799 b"hELLOwORLD"
3800 );
3801
3802 assert_magic_match_text!(
3804 r#"0 string/c HelloWorld it works
3805 "#,
3806 b"HELLOWORLD"
3807 );
3808
3809 assert_magic_not_match_text!(
3810 r#"0 string/c HelloWorld it works
3811 "#,
3812 b"helloworld"
3813 );
3814
3815 assert_magic_match_text!(
3817 r#"0 string/f #!/usr/bin/env\ bash BASH
3818 "#,
3819 b"#!/usr/bin/env bash"
3820 );
3821
3822 assert_magic_not_match_text!(
3823 r#"0 string/f #!/usr/bin/python PYTHON"#,
3824 b"#!/usr/bin/pythonic"
3825 );
3826
3827 assert_magic_match_text!(
3829 r#"0 string/W #!/usr/bin/env\ python PYTHON"#,
3830 b"#!/usr/bin/env python"
3831 );
3832
3833 assert_magic_not_match_text!(
3834 r#"0 string/W #!/usr/bin/env\ \ python PYTHON"#,
3835 b"#!/usr/bin/env python"
3836 );
3837 }
3838
3839 #[test]
3840 fn test_search_with_mods() {
3841 assert_magic_match_text!(
3842 r#"0 search/1/fwt #!\ /usr/bin/luatex LuaTex script text executable"#,
3843 b"#! /usr/bin/luatex "
3844 );
3845
3846 assert_magic_match_text!(
3848 r#"
3849 0 search/s /usr/bin/env
3850 >&0 string /usr/bin/env it works
3851 "#,
3852 b"#!/usr/bin/env python"
3853 );
3854
3855 assert_magic_not_match_text!(
3856 r#"
3857 0 search /usr/bin/env
3858 >&0 string /usr/bin/env it works
3859 "#,
3860 b"#!/usr/bin/env python"
3861 );
3862 }
3863
3864 #[test]
3865 fn test_pstring() {
3866 assert_magic_match_bin!(r#"0 pstring Toast it works"#, b"\x05Toast");
3867
3868 assert_magic_match_bin!(r#"0 pstring Toast %s"#, b"\x05Toast", "Toast");
3869
3870 assert_magic_not_match_bin!(r#"0 pstring Toast Doesn't work"#, b"\x07Toaster");
3871
3872 assert_magic_match_bin!(r#"0 pstring/H Toast it works"#, b"\x00\x05Toast");
3874
3875 assert_magic_match_bin!(r#"0 pstring/HJ Toast it works"#, b"\x00\x07Toast");
3876
3877 assert_magic_match_bin!(r#"0 pstring/HJ Toast %s"#, b"\x00\x07Toast", "Toast");
3878
3879 assert_magic_match_bin!(r#"0 pstring/h Toast it works"#, b"\x05\x00Toast");
3880
3881 assert_magic_match_bin!(r#"0 pstring/hJ Toast it works"#, b"\x07\x00Toast");
3882
3883 assert_magic_match_bin!(r#"0 pstring/L Toast it works"#, b"\x00\x00\x00\x05Toast");
3884
3885 assert_magic_match_bin!(r#"0 pstring/LJ Toast it works"#, b"\x00\x00\x00\x09Toast");
3886
3887 assert_magic_match_bin!(r#"0 pstring/l Toast it works"#, b"\x05\x00\x00\x00Toast");
3888
3889 assert_magic_match_bin!(r#"0 pstring/lJ Toast it works"#, b"\x09\x00\x00\x00Toast");
3890 }
3891
3892 #[test]
3893 fn test_max_recursion() {
3894 let res = first_magic(
3895 r#"0 indirect x"#,
3896 b"#! /usr/bin/luatex ",
3897 StreamKind::Binary,
3898 );
3899 assert!(res.is_err());
3900 let _ = res.inspect_err(|e| {
3901 assert!(matches!(
3902 e.unwrap_localized(),
3903 Error::MaximumRecursion(MAX_RECURSION)
3904 ))
3905 });
3906 }
3907
3908 #[test]
3909 fn test_string_ops() {
3910 assert_magic_match_text!("0 string/b MZ MZ File", b"MZ\0");
3911 assert_magic_match_text!("0 string !MZ Not MZ File", b"AZ\0");
3912 assert_magic_match_text!("0 string >\0 Any String", b"A\0");
3913 assert_magic_match_text!("0 string >Test Any String", b"Test 1\0");
3914 assert_magic_match_text!("0 string <Test Any String", b"\0");
3915 assert_magic_not_match_text!("0 string >Test Any String", b"\0");
3916 }
3917
3918 #[test]
3919 fn test_lestring16() {
3920 assert_magic_match_bin!(
3921 "0 lestring16 abcd Little-endian UTF-16 string",
3922 b"\x61\x00\x62\x00\x63\x00\x64\x00"
3923 );
3924 assert_magic_match_bin!(
3925 "0 lestring16 x %s",
3926 b"\x61\x00\x62\x00\x63\x00\x64\x00\x00",
3927 "abcd"
3928 );
3929 assert_magic_not_match_bin!(
3930 "0 lestring16 abcd Little-endian UTF-16 string",
3931 b"\x00\x61\x00\x62\x00\x63\x00\x64"
3932 );
3933 assert_magic_match_bin!(
3934 "4 lestring16 abcd Little-endian UTF-16 string",
3935 b"\x00\x00\x00\x00\x61\x00\x62\x00\x63\x00\x64\x00"
3936 );
3937 }
3938
3939 #[test]
3940 fn test_bestring16() {
3941 assert_magic_match_bin!(
3942 "0 bestring16 abcd Big-endian UTF-16 string",
3943 b"\x00\x61\x00\x62\x00\x63\x00\x64"
3944 );
3945 assert_magic_match_bin!(
3946 "0 bestring16 x %s",
3947 b"\x00\x61\x00\x62\x00\x63\x00\x64",
3948 "abcd"
3949 );
3950 assert_magic_not_match_bin!(
3951 "0 bestring16 abcd Big-endian UTF-16 string",
3952 b"\x61\x00\x62\x00\x63\x00\x64\x00"
3953 );
3954 assert_magic_match_bin!(
3955 "4 bestring16 abcd Big-endian UTF-16 string",
3956 b"\x00\x00\x00\x00\x00\x61\x00\x62\x00\x63\x00\x64"
3957 );
3958 }
3959
3960 #[test]
3961 fn test_offset_from_end() {
3962 assert_magic_match_bin!("-1 ubyte 0x42 last byte ok", b"\x00\x00\x42");
3963 assert_magic_match_bin!("-2 ubyte 0x41 last byte ok", b"\x00\x41\x00");
3964 }
3965
3966 #[test]
3967 fn test_relative_offset() {
3968 assert_magic_match_bin!(
3969 "
3970 0 ubyte 0x42
3971 >&0 ubyte 0x00
3972 >>&0 ubyte 0x41 third byte ok
3973 ",
3974 b"\x42\x00\x41\x00"
3975 );
3976 }
3977
3978 #[test]
3979 fn test_indirect_offset() {
3980 assert_magic_match_bin!("(0.l) ubyte 0x42 it works", b"\x04\x00\x00\x00\x42");
3981 assert_magic_match_bin!("(0.l+3) ubyte 0x42 it works", b"\x01\x00\x00\x00\x42");
3983 assert_magic_match_bin!(
3985 "(0.l+(4)) ubyte 0x42 it works",
3986 b"\x04\x00\x00\x00\x04\x00\x00\x00\x42"
3987 );
3988 }
3989
3990 #[test]
3991 fn test_use_with_message() {
3992 assert_magic_match_bin!(
3993 r#"
39940 string MZ
3995>0 use mz first match
3996
39970 name mz then second match
3998>0 string MZ
3999"#,
4000 b"MZ\0",
4001 "first match then second match"
4002 );
4003 }
4004
4005 #[test]
4006 fn test_scalar_transform() {
4007 assert_magic_match_bin!("0 ubyte+1 0x1 add works", b"\x00");
4008 assert_magic_match_bin!("0 ubyte-1 0xfe sub works", b"\xff");
4009 assert_magic_match_bin!("0 ubyte%2 0 mod works", b"\x0a");
4010 assert_magic_match_bin!("0 ubyte&0x0f 0x0f bitand works", b"\xff");
4011 assert_magic_match_bin!("0 ubyte|0x0f 0xff bitor works", b"\xf0");
4012 assert_magic_match_bin!("0 ubyte^0x0f 0xf0 bitxor works", b"\xff");
4013
4014 FileMagicParser::parse_str("0 ubyte%0 mod by zero", None)
4015 .expect_err("expect div by zero error");
4016 FileMagicParser::parse_str("0 ubyte/0 div by zero", None)
4017 .expect_err("expect div by zero error");
4018 }
4019
4020 #[test]
4021 fn test_belong() {
4022 assert_magic_match_bin!("0 belong 0x12345678 Big-endian long", b"\x12\x34\x56\x78");
4024 assert_magic_not_match_bin!("0 belong 0x12345678 Big-endian long", b"\x78\x56\x34\x12");
4026 assert_magic_match_bin!(
4028 "4 belong 0x12345678 Big-endian long",
4029 b"\x00\x00\x00\x00\x12\x34\x56\x78"
4030 );
4031 assert_magic_match_bin!("0 belong <0x12345678 Big-endian long", b"\x12\x34\x56\x77");
4033 assert_magic_not_match_bin!("0 belong <0x12345678 Big-endian long", b"\x12\x34\x56\x78");
4034
4035 assert_magic_match_bin!("0 belong >0x12345678 Big-endian long", b"\x12\x34\x56\x79");
4037 assert_magic_not_match_bin!("0 belong >0x12345678 Big-endian long", b"\x12\x34\x56\x78");
4038
4039 assert_magic_match_bin!("0 belong &0x5678 Big-endian long", b"\x00\x00\x56\x78");
4041 assert_magic_not_match_bin!("0 belong &0x0000FFFF Big-endian long", b"\x12\x34\x56\x78");
4042
4043 assert_magic_match_bin!("0 belong ^0xFFFF0000 Big-endian long", b"\x00\x00\x56\x78");
4045 assert_magic_not_match_bin!("0 belong ^0xFFFF0000 Big-endian long", b"\x00\x01\x56\x78");
4046
4047 assert_magic_match_bin!("0 belong ~0x12345678 Big-endian long", b"\xed\xcb\xa9\x87");
4049 assert_magic_not_match_bin!("0 belong ~0x12345678 Big-endian long", b"\x12\x34\x56\x78");
4050
4051 assert_magic_match_bin!("0 belong x Big-endian long", b"\x12\x34\x56\x78");
4053 assert_magic_match_bin!("0 belong x Big-endian long", b"\x78\x56\x34\x12");
4054 }
4055
4056 #[test]
4057 fn test_parse_search() {
4058 parse_assert!("0 search test");
4059 parse_assert!("0 search/24/s test");
4060 parse_assert!("0 search/s/24 test");
4061 }
4062
4063 #[test]
4064 fn test_bedate() {
4065 assert_magic_match_bin!(
4066 "0 bedate 946684800 Unix date (Jan 1, 2000)",
4067 b"\x38\x6D\x43\x80"
4068 );
4069 assert_magic_not_match_bin!(
4070 "0 bedate 946684800 Unix date (Jan 1, 2000)",
4071 b"\x00\x00\x00\x00"
4072 );
4073 assert_magic_match_bin!(
4074 "4 bedate 946684800 %s",
4075 b"\x00\x00\x00\x00\x38\x6D\x43\x80",
4076 "2000-01-01 00:00:00"
4077 );
4078 }
4079 #[test]
4080 fn test_beldate() {
4081 assert_magic_match_bin!(
4082 "0 beldate 946684800 Local date (Jan 1, 2000)",
4083 b"\x38\x6D\x43\x80"
4084 );
4085 assert_magic_not_match_bin!(
4086 "0 beldate 946684800 Local date (Jan 1, 2000)",
4087 b"\x00\x00\x00\x00"
4088 );
4089
4090 assert_magic_match_bin!(
4091 "4 beldate 946684800 {}",
4092 b"\x00\x00\x00\x00\x38\x6D\x43\x80",
4093 unix_local_time_to_string(946684800)
4094 );
4095 }
4096
4097 #[test]
4098 fn test_beqdate() {
4099 assert_magic_match_bin!(
4100 "0 beqdate 946684800 Unix date (Jan 1, 2000)",
4101 b"\x00\x00\x00\x00\x38\x6D\x43\x80"
4102 );
4103
4104 assert_magic_not_match_bin!(
4105 "0 beqdate 946684800 Unix date (Jan 1, 2000)",
4106 b"\x00\x00\x00\x00\x00\x00\x00\x00"
4107 );
4108
4109 assert_magic_match_bin!(
4110 "0 beqdate 946684800 %s",
4111 b"\x00\x00\x00\x00\x38\x6D\x43\x80",
4112 "2000-01-01 00:00:00"
4113 );
4114 }
4115
4116 #[test]
4117 fn test_medate() {
4118 assert_magic_match_bin!(
4119 "0 medate 946684800 Unix date (Jan 1, 2000)",
4120 b"\x6D\x38\x80\x43"
4121 );
4122
4123 assert_magic_not_match_bin!(
4124 "0 medate 946684800 Unix date (Jan 1, 2000)",
4125 b"\x00\x00\x00\x00"
4126 );
4127
4128 assert_magic_match_bin!(
4129 "4 medate 946684800 %s",
4130 b"\x00\x00\x00\x00\x6D\x38\x80\x43",
4131 "2000-01-01 00:00:00"
4132 );
4133 }
4134
4135 #[test]
4136 fn test_meldate() {
4137 assert_magic_match_bin!(
4138 "0 meldate 946684800 Local date (Jan 1, 2000)",
4139 b"\x6D\x38\x80\x43"
4140 );
4141 assert_magic_not_match_bin!(
4142 "0 meldate 946684800 Local date (Jan 1, 2000)",
4143 b"\x00\x00\x00\x00"
4144 );
4145
4146 assert_magic_match_bin!(
4147 "4 meldate 946684800 %s",
4148 b"\x00\x00\x00\x00\x6D\x38\x80\x43",
4149 unix_local_time_to_string(946684800)
4150 );
4151 }
4152
4153 #[test]
4154 fn test_date() {
4155 assert_magic_match_bin!(
4156 "0 date 946684800 Local date (Jan 1, 2000)",
4157 b"\x80\x43\x6D\x38"
4158 );
4159 assert_magic_not_match_bin!(
4160 "0 date 946684800 Local date (Jan 1, 2000)",
4161 b"\x00\x00\x00\x00"
4162 );
4163 assert_magic_match_bin!(
4164 "4 date 946684800 {}",
4165 b"\x00\x00\x00\x00\x80\x43\x6D\x38",
4166 "2000-01-01 00:00:00"
4167 );
4168 }
4169
4170 #[test]
4171 fn test_leldate() {
4172 assert_magic_match_bin!(
4173 "0 leldate 946684800 Local date (Jan 1, 2000)",
4174 b"\x80\x43\x6D\x38"
4175 );
4176 assert_magic_not_match_bin!(
4177 "0 leldate 946684800 Local date (Jan 1, 2000)",
4178 b"\x00\x00\x00\x00"
4179 );
4180 assert_magic_match_bin!(
4181 "4 leldate 946684800 {}",
4182 b"\x00\x00\x00\x00\x80\x43\x6D\x38",
4183 unix_local_time_to_string(946684800)
4184 );
4185 }
4186
4187 #[test]
4188 fn test_leqdate() {
4189 assert_magic_match_bin!(
4190 "0 leqdate 1577836800 Unix date (Jan 1, 2020)",
4191 b"\x00\xe1\x0b\x5E\x00\x00\x00\x00"
4192 );
4193
4194 assert_magic_not_match_bin!(
4195 "0 leqdate 1577836800 Unix date (Jan 1, 2020)",
4196 b"\x00\x00\x00\x00\x00\x00\x00\x00"
4197 );
4198 assert_magic_match_bin!(
4199 "8 leqdate 1577836800 %s",
4200 b"\x00\x00\x00\x00\x00\x00\x00\x00\x00\xE1\x0B\x5E\x00\x00\x00\x00",
4201 "2020-01-01 00:00:00"
4202 );
4203 }
4204
4205 #[test]
4206 fn test_leqldate() {
4207 assert_magic_match_bin!(
4208 "0 leqldate 1577836800 Unix date (Jan 1, 2020)",
4209 b"\x00\xe1\x0b\x5E\x00\x00\x00\x00"
4210 );
4211
4212 assert_magic_not_match_bin!(
4213 "0 leqldate 1577836800 Unix date (Jan 1, 2020)",
4214 b"\x00\x00\x00\x00\x00\x00\x00\x00"
4215 );
4216 assert_magic_match_bin!(
4217 "8 leqldate 1577836800 %s",
4218 b"\x00\x00\x00\x00\x00\x00\x00\x00\x00\xE1\x0B\x5E\x00\x00\x00\x00",
4219 unix_local_time_to_string(1577836800)
4220 );
4221 }
4222
4223 #[test]
4224 fn test_melong() {
4225 assert_magic_match_bin!(
4227 "0 melong =0x12345678 Middle-endian long",
4228 b"\x34\x12\x78\x56"
4229 );
4230 assert_magic_not_match_bin!(
4231 "0 melong =0x12345678 Middle-endian long",
4232 b"\x00\x00\x00\x00"
4233 );
4234
4235 assert_magic_match_bin!(
4237 "0 melong <0x12345678 Middle-endian long",
4238 b"\x34\x12\x78\x55"
4239 ); assert_magic_not_match_bin!(
4241 "0 melong <0x12345678 Middle-endian long",
4242 b"\x34\x12\x78\x56"
4243 ); assert_magic_match_bin!(
4247 "0 melong >0x12345678 Middle-endian long",
4248 b"\x34\x12\x78\x57"
4249 ); assert_magic_not_match_bin!(
4251 "0 melong >0x12345678 Middle-endian long",
4252 b"\x34\x12\x78\x56"
4253 ); assert_magic_match_bin!("0 melong &0x5678 Middle-endian long", b"\xab\xcd\x78\x56"); assert_magic_not_match_bin!(
4258 "0 melong &0x0000FFFF Middle-endian long",
4259 b"\x34\x12\x78\x56"
4260 ); assert_magic_match_bin!(
4264 "0 melong ^0xFFFF0000 Middle-endian long",
4265 b"\x00\x00\x78\x56"
4266 ); assert_magic_not_match_bin!(
4268 "0 melong ^0xFFFF0000 Middle-endian long",
4269 b"\x00\x01\x78\x56"
4270 ); assert_magic_match_bin!(
4274 "0 melong ~0x12345678 Middle-endian long",
4275 b"\xCB\xED\x87\xA9"
4276 );
4277 assert_magic_not_match_bin!(
4278 "0 melong ~0x12345678 Middle-endian long",
4279 b"\x34\x12\x78\x56"
4280 ); assert_magic_match_bin!("0 melong x Middle-endian long", b"\x34\x12\x78\x56");
4284 assert_magic_match_bin!("0 melong x Middle-endian long", b"\x00\x00\x00\x00");
4285 }
4286
4287 #[test]
4288 fn test_uquad() {
4289 assert_magic_match_bin!(
4291 "0 uquad =0x123456789ABCDEF0 Unsigned quad",
4292 b"\xF0\xDE\xBC\x9A\x78\x56\x34\x12"
4293 );
4294 assert_magic_not_match_bin!(
4295 "0 uquad =0x123456789ABCDEF0 Unsigned quad",
4296 b"\x00\x00\x00\x00\x00\x00\x00\x00"
4297 );
4298
4299 assert_magic_match_bin!(
4301 "0 uquad <0x123456789ABCDEF0 Unsigned quad",
4302 b"\xF0\xDE\xBC\x9A\x78\x56\x34\x11"
4303 );
4304 assert_magic_not_match_bin!(
4305 "0 uquad <0x123456789ABCDEF0 Unsigned quad",
4306 b"\xF0\xDE\xBC\x9A\x78\x56\x34\x12"
4307 );
4308
4309 assert_magic_match_bin!(
4311 "0 uquad >0x123456789ABCDEF0 Unsigned quad",
4312 b"\xF0\xDE\xBC\x9A\x78\x56\x34\x13"
4313 );
4314 assert_magic_not_match_bin!(
4315 "0 uquad >0x123456789ABCDEF0 Unsigned quad",
4316 b"\xF0\xDE\xBC\x9A\x78\x56\x34\x12"
4317 );
4318
4319 assert_magic_match_bin!(
4321 "0 uquad &0xF0 Unsigned quad",
4322 b"\xF0\xDE\xBC\x9A\x78\x56\x34\x12"
4323 );
4324 assert_magic_not_match_bin!(
4325 "0 uquad &0xFF Unsigned quad",
4326 b"\xF0\xDE\xBC\x9A\x78\x56\x34\x12"
4327 );
4328
4329 assert_magic_match_bin!(
4331 "0 uquad ^0xFFFFFFFFFFFFFFFF Unsigned quad",
4332 b"\x00\x00\x00\x00\x00\x00\x00\x00"
4333 ); assert_magic_not_match_bin!(
4335 "0 uquad ^0xFFFFFFFFFFFFFFFF Unsigned quad",
4336 b"\xF0\xDE\xBC\x9A\x78\x56\x34\x12"
4337 ); assert_magic_match_bin!(
4341 "0 uquad ~0x123456789ABCDEF0 Unsigned quad",
4342 b"\x0F\x21\x43\x65\x87\xA9\xCB\xED"
4343 );
4344 assert_magic_not_match_bin!(
4345 "0 uquad ~0x123456789ABCDEF0 Unsigned quad",
4346 b"\xF0\xDE\xBC\x9A\x78\x56\x34\x12"
4347 ); assert_magic_match_bin!(
4351 "0 uquad x {:#x}",
4352 b"\xF0\xDE\xBC\x9A\x78\x56\x34\x12",
4353 "0x123456789abcdef0"
4354 );
4355 assert_magic_match_bin!(
4356 "0 uquad x Unsigned quad",
4357 b"\x00\x00\x00\x00\x00\x00\x00\x00"
4358 );
4359 }
4360
4361 #[test]
4362 fn test_guid() {
4363 assert_magic_match_bin!(
4364 "0 guid EC959539-6786-2D4E-8FDB-98814CE76C1E It works",
4365 b"\xEC\x95\x95\x39\x67\x86\x2D\x4E\x8F\xDB\x98\x81\x4C\xE7\x6C\x1E"
4366 );
4367
4368 assert_magic_not_match_bin!(
4369 "0 guid 399595EC-8667-4E2D-8FDB-98814CE76C1E It works",
4370 b"\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F"
4371 );
4372
4373 assert_magic_match_bin!(
4374 "0 guid x %s",
4375 b"\xEC\x95\x95\x39\x67\x86\x2D\x4E\x8F\xDB\x98\x81\x4C\xE7\x6C\x1E",
4376 "EC959539-6786-2D4E-8FDB-98814CE76C1E"
4377 );
4378 }
4379
4380 #[test]
4381 fn test_ubeqdate() {
4382 assert_magic_match_bin!(
4383 "0 ubeqdate 1633046400 It works",
4384 b"\x00\x00\x00\x00\x61\x56\x4f\x80"
4385 );
4386
4387 assert_magic_match_bin!(
4388 "0 ubeqdate x %s",
4389 b"\x00\x00\x00\x00\x61\x56\x4f\x80",
4390 "2021-10-01 00:00:00"
4391 );
4392
4393 assert_magic_not_match_bin!(
4394 "0 ubeqdate 1633046400 It should not work",
4395 b"\x00\x00\x00\x00\x00\x00\x00\x00"
4396 );
4397 }
4398
4399 #[test]
4400 fn test_ldate() {
4401 assert_magic_match_bin!("0 ldate 1640551520 It works", b"\x60\xd4\xC8\x61");
4402
4403 assert_magic_not_match_bin!("0 ldate 1633046400 It should not work", b"\x00\x00\x00\x00");
4404
4405 assert_magic_match_bin!(
4406 "0 ldate x %s",
4407 b"\x60\xd4\xC8\x61",
4408 unix_local_time_to_string(1640551520)
4409 );
4410 }
4411
4412 #[test]
4413 fn test_scalar_with_transform() {
4414 assert_magic_match_bin!("0 ubyte/10 2 {}", b"\x14", "2");
4415 assert_magic_match_bin!("0 ubyte/10 x {}", b"\x14", "2");
4416 assert_magic_match_bin!("0 ubyte%10 x {}", b"\x14", "0");
4417 }
4418
4419 #[test]
4420 fn test_float_with_transform() {
4421 assert_magic_match_bin!("0 lefloat/10 2 {}", b"\x00\x00\xa0\x41", "2");
4422 assert_magic_match_bin!("0 lefloat/10 x {}", b"\x00\x00\xa0\x41", "2");
4423 assert_magic_match_bin!("0 lefloat%10 x {}", b"\x00\x00\xa0\x41", "0");
4424 }
4425
4426 #[test]
4427 fn test_read_octal() {
4428 assert_eq!(read_octal_u64(&mut lazy_cache!("0")), Some(0));
4430 assert_eq!(read_octal_u64(&mut lazy_cache!("00")), Some(0));
4431 assert_eq!(read_octal_u64(&mut lazy_cache!("01")), Some(1));
4432 assert_eq!(read_octal_u64(&mut lazy_cache!("07")), Some(7));
4433 assert_eq!(read_octal_u64(&mut lazy_cache!("010")), Some(8));
4434 assert_eq!(read_octal_u64(&mut lazy_cache!("0123")), Some(83));
4435 assert_eq!(read_octal_u64(&mut lazy_cache!("0755")), Some(493));
4436
4437 assert_eq!(read_octal_u64(&mut lazy_cache!("0ABC")), Some(0));
4439 assert_eq!(read_octal_u64(&mut lazy_cache!("01ABC")), Some(1));
4440 assert_eq!(read_octal_u64(&mut lazy_cache!("0755ABC")), Some(493));
4441 assert_eq!(read_octal_u64(&mut lazy_cache!("0123ABC")), Some(83));
4442
4443 assert_eq!(read_octal_u64(&mut lazy_cache!("08")), Some(0)); assert_eq!(read_octal_u64(&mut lazy_cache!("01238")), Some(83)); assert_eq!(read_octal_u64(&mut lazy_cache!("123")), None);
4449 assert_eq!(read_octal_u64(&mut lazy_cache!("755")), None);
4450
4451 assert_eq!(read_octal_u64(&mut lazy_cache!("")), None);
4453
4454 assert_eq!(read_octal_u64(&mut lazy_cache!("ABC")), None);
4456 assert_eq!(read_octal_u64(&mut lazy_cache!("8ABC")), None); assert_eq!(
4460 read_octal_u64(&mut lazy_cache!("01777777777")),
4461 Some(268435455)
4462 );
4463 }
4464
4465 #[test]
4466 fn test_offset_bug_1() {
4467 assert_magic_match_bin!(
4470 r"
44711 string TEST Bread is
4472# offset computation is relative to
4473# rule start
4474>(5.b) use toasted
4475
44760 name toasted
4477>0 string twice Toasted
4478>>0 use toasted_twice
4479
44800 name toasted_twice
4481>(6.b) string x %s
4482 ",
4483 b"\x00TEST\x06twice\x00\x06",
4484 "Bread is Toasted twice"
4485 );
4486 }
4487
4488 #[test]
4494 fn test_offset_bug_2() {
4495 assert_magic_match_bin!(
4498 r"
4499-12 string TEST Bread is
4500>(4.b) use toasted
4501
45020 name toasted
4503>0 string twice Toasted
4504>>0 use toasted_twice
4505
45060 name toasted_twice
4507>(6.b) string x %
4508 ",
4509 b"\x00TEST\x06twice\x00\x06",
4510 "Bread is Toasted twice"
4511 )
4512 }
4513
4514 #[test]
4515 fn test_offset_bug_3() {
4516 assert_magic_match_bin!(
4519 r"
45201 string TEST Bread is
4521>(5.b) indirect/r x
4522
45230 string twice Toasted
4524>0 use toasted_twice
4525
45260 name toasted_twice
4527>0 string x %s
4528 ",
4529 b"\x00TEST\x06twice\x00\x08",
4530 "Bread is Toasted twice"
4531 )
4532 }
4533
4534 #[test]
4535 fn test_offset_bug_4() {
4536 assert_magic_match_bin!(
4539 r"
45401 string Bread %s
4541>(6.b) indirect/r x
4542
4543# this one uses a based offset
4544# computed at indirection
45451 string is\ Toasted %s
4546>(11.b) use toasted_twice
4547
4548# this one is using a new base
4549# offset being previous base
4550# offset + offset of use
45510 name toasted_twice
4552>0 string x %s
4553 ",
4554 b"\x00Bread\x06is Toasted\x0ctwice\x00",
4555 "Bread is Toasted twice"
4556 )
4557 }
4558
4559 #[test]
4560 fn test_offset_bug_5() {
4561 assert_magic_match_bin!(
4562 r"
45631 string TEST Bread is
4564>(5.b) indirect/r x
4565
45660 string twice Toasted
4567>0 use toasted_twice
4568
45690 name toasted_twice
4570>0 string twice
4571>>&1 byte 0x08 twice
4572 ",
4573 b"\x00TEST\x06twice\x00\x08",
4574 "Bread is Toasted twice"
4575 )
4576 }
4577
4578 #[test]
4579 fn test_message_parts() {
4580 let m = first_magic(
4581 r#"0 string/W #!/usr/bin/env\ python PYTHON"#,
4582 b"#!/usr/bin/env python",
4583 StreamKind::Text(TextEncoding::Ascii),
4584 )
4585 .unwrap();
4586
4587 assert!(m.message_parts().any(|p| p.eq_ignore_ascii_case("python")))
4588 }
4589}