1#![forbid(unsafe_code)]
2#![deny(unused_imports)]
3#![deny(missing_docs)]
4use dyf::{DynDisplay, FormatString, dformat};
144use flagset::{FlagSet, flags};
145use flate2::{Compression, read::GzDecoder, write::GzEncoder};
146use lazy_cache::LazyCache;
147use memchr::memchr;
148use pest::{Span, error::ErrorVariant};
149use regex::bytes::{self};
150use serde::{Deserialize, Serialize};
151use std::{
152 borrow::Cow,
153 cmp::max,
154 collections::{HashMap, HashSet},
155 fmt::{self, Debug, Display},
156 io::{self, Read, Seek, SeekFrom, Write},
157 ops::{Add, BitAnd, BitOr, BitXor, Div, Mul, Rem, Sub},
158 path::Path,
159};
160use tar::Archive;
161use thiserror::Error;
162use tracing::{Level, debug, enabled, trace};
163
164use crate::{
165 numeric::{Float, FloatDataType, Scalar, ScalarDataType},
166 parser::{FileMagicParser, Rule},
167 utils::{decode_id3, find_json_boundaries, run_utf8_validation},
168};
169
170mod numeric;
171mod parser;
172mod utils;
173
174const HARDCODED_MAGIC_STRENGTH: u64 = 2048;
175const HARDCODED_SOURCE: &str = "hardcoded";
176const MAX_RECURSION: usize = 50;
178const FILE_BYTES_MAX: usize = 7 * 1024 * 1024;
180const FILE_REGEX_MAX: usize = 8192;
182
183pub const DEFAULT_BIN_MIMETYPE: &str = "application/octet-stream";
185pub const DEFAULT_TEXT_MIMETYPE: &str = "text/plain";
187
188pub(crate) const TIMESTAMP_FORMAT: &str = "%Y-%m-%d %H:%M:%S";
189
190macro_rules! debug_panic {
191 ($($arg:tt)*) => {
192 if cfg!(debug_assertions) {
193 panic!($($arg)*);
194 }
195 };
196}
197
198macro_rules! read {
199 ($r: expr, $ty: ty) => {{
200 let mut a = [0u8; std::mem::size_of::<$ty>()];
201 $r.read_exact(&mut a)?;
202 a
203 }};
204}
205
206macro_rules! read_le {
207 ($r:expr, $ty: ty ) => {{ <$ty>::from_le_bytes(read!($r, $ty)) }};
208}
209
210macro_rules! read_be {
211 ($r:expr, $ty: ty ) => {{ <$ty>::from_be_bytes(read!($r, $ty)) }};
212}
213
214macro_rules! read_me {
215 ($r: expr) => {{ ((read_le!($r, u16) as i32) << 16) | (read_le!($r, u16) as i32) }};
216}
217
218#[inline(always)]
219fn read_octal_u64<R: Read + Seek>(haystack: &mut LazyCache<R>) -> Option<u64> {
220 let s = haystack
221 .read_while_or_limit(|b| matches!(b, b'0'..=b'7'), 22)
222 .map(|buf| str::from_utf8(buf))
223 .ok()?
224 .ok()?;
225
226 if !s.starts_with("0") {
227 return None;
228 }
229
230 u64::from_str_radix(s, 8).ok()
231}
232
233#[derive(Debug, Error)]
235pub enum Error {
236 #[error("{0}")]
238 Msg(String),
239
240 #[error("source={0} line={1} error={2}")]
242 Localized(String, usize, Box<Error>),
243
244 #[error("missing rule: {0}")]
246 MissingRule(String),
247
248 #[error("maximum recursion reached: {0}")]
250 MaximumRecursion(usize),
251
252 #[error("io: {0}")]
254 Io(#[from] io::Error),
255
256 #[error("parser error: {0}")]
258 Parse(#[from] Box<pest::error::Error<Rule>>),
259
260 #[error("formatting: {0}")]
262 Format(#[from] dyf::Error),
263
264 #[error("regex: {0}")]
266 Regex(#[from] regex::Error),
267
268 #[error("{0}")]
270 Serialize(#[from] bincode::error::EncodeError),
271
272 #[error("{0}")]
274 Deserialize(#[from] bincode::error::DecodeError),
275}
276
277impl Error {
278 #[inline]
279 fn parser<S: ToString>(msg: S, span: Span<'_>) -> Self {
280 Self::Parse(Box::new(pest::error::Error::new_from_span(
281 ErrorVariant::CustomError {
282 message: msg.to_string(),
283 },
284 span,
285 )))
286 }
287
288 fn msg<M: AsRef<str>>(msg: M) -> Self {
289 Self::Msg(msg.as_ref().into())
290 }
291
292 fn localized<S: AsRef<str>>(source: S, line: usize, err: Error) -> Self {
293 Self::Localized(source.as_ref().into(), line, err.into())
294 }
295
296 pub fn unwrap_localized(&self) -> &Self {
298 match self {
299 Self::Localized(_, _, e) => e,
300 _ => self,
301 }
302 }
303}
304
305#[derive(Debug, Clone, Serialize, Deserialize)]
306enum Message {
307 String(String),
308 Format {
309 printf_spec: String,
310 fs: FormatString,
311 },
312}
313
314impl Display for Message {
315 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
316 match self {
317 Self::String(s) => write!(f, "{s}"),
318 Self::Format { printf_spec: _, fs } => write!(f, "{}", fs.to_string_lossy()),
319 }
320 }
321}
322
323impl Message {
324 fn to_string_lossy(&self) -> Cow<'_, str> {
325 match self {
326 Message::String(s) => Cow::Borrowed(s),
327 Message::Format { printf_spec: _, fs } => fs.to_string_lossy(),
328 }
329 }
330
331 #[inline(always)]
332 fn format_with(&self, mr: Option<&MatchRes>) -> Result<Cow<'_, str>, Error> {
333 match self {
334 Self::String(s) => Ok(Cow::Borrowed(s.as_str())),
335 Self::Format {
336 printf_spec: c_spec,
337 fs,
338 } => {
339 if let Some(mr) = mr {
340 match mr {
341 MatchRes::Float(_, _) | MatchRes::Bytes(_, _, _, _) => {
342 Ok(Cow::Owned(dformat!(fs, mr)?))
343 }
344 MatchRes::Scalar(_, scalar) => {
345 if c_spec.as_str() == "c" {
347 match scalar {
348 Scalar::byte(b) => {
349 let b = (*b as u8) as char;
350 Ok(Cow::Owned(dformat!(fs, b)?))
351 }
352 Scalar::ubyte(b) => {
353 let b = *b as char;
354 Ok(Cow::Owned(dformat!(fs, b)?))
355 }
356 _ => Ok(Cow::Owned(dformat!(fs, mr)?)),
357 }
358 } else {
359 Ok(Cow::Owned(dformat!(fs, mr)?))
360 }
361 }
362 }
363 } else {
364 Ok(fs.to_string_lossy())
365 }
366 }
367 }
368 }
369}
370
371impl ScalarDataType {
372 #[inline(always)]
373 fn read<R: Read + Seek>(&self, from: &mut R, switch_endianness: bool) -> Result<Scalar, Error> {
374 macro_rules! _read_le {
375 ($ty: ty) => {{
376 if switch_endianness {
377 <$ty>::from_be_bytes(read!(from, $ty))
378 } else {
379 <$ty>::from_le_bytes(read!(from, $ty))
380 }
381 }};
382 }
383
384 macro_rules! _read_be {
385 ($ty: ty) => {{
386 if switch_endianness {
387 <$ty>::from_le_bytes(read!(from, $ty))
388 } else {
389 <$ty>::from_be_bytes(read!(from, $ty))
390 }
391 }};
392 }
393
394 macro_rules! _read_ne {
395 ($ty: ty) => {{
396 if cfg!(target_endian = "big") {
397 _read_be!($ty)
398 } else {
399 _read_le!($ty)
400 }
401 }};
402 }
403
404 macro_rules! _read_me {
405 () => {
406 ((_read_le!(u16) as i32) << 16) | (_read_le!(u16) as i32)
407 };
408 }
409
410 Ok(match self {
411 Self::byte => Scalar::byte(read!(from, u8)[0] as i8),
413 Self::short => Scalar::short(_read_ne!(i16)),
414 Self::long => Scalar::long(_read_ne!(i32)),
415 Self::date => Scalar::date(_read_ne!(i32)),
416 Self::ldate => Scalar::ldate(_read_ne!(i32)),
417 Self::qwdate => Scalar::qwdate(_read_ne!(i64)),
418 Self::leshort => Scalar::leshort(_read_le!(i16)),
419 Self::lelong => Scalar::lelong(_read_le!(i32)),
420 Self::lequad => Scalar::lequad(_read_le!(i64)),
421 Self::bequad => Scalar::bequad(_read_be!(i64)),
422 Self::belong => Scalar::belong(_read_be!(i32)),
423 Self::bedate => Scalar::bedate(_read_be!(i32)),
424 Self::beldate => Scalar::beldate(_read_be!(i32)),
425 Self::beqdate => Scalar::beqdate(_read_be!(i64)),
426 Self::ubyte => Scalar::ubyte(read!(from, u8)[0]),
428 Self::ushort => Scalar::ushort(_read_ne!(u16)),
429 Self::uleshort => Scalar::uleshort(_read_le!(u16)),
430 Self::ulelong => Scalar::ulelong(_read_le!(u32)),
431 Self::uledate => Scalar::uledate(_read_le!(u32)),
432 Self::ulequad => Scalar::ulequad(_read_le!(u64)),
433 Self::offset => Scalar::offset(from.stream_position()?),
434 Self::ubequad => Scalar::ubequad(_read_be!(u64)),
435 Self::medate => Scalar::medate(_read_me!()),
436 Self::meldate => Scalar::meldate(_read_me!()),
437 Self::melong => Scalar::melong(_read_me!()),
438 Self::beshort => Scalar::beshort(_read_be!(i16)),
439 Self::quad => Scalar::quad(_read_ne!(i64)),
440 Self::uquad => Scalar::uquad(_read_ne!(u64)),
441 Self::ledate => Scalar::ledate(_read_le!(i32)),
442 Self::leldate => Scalar::leldate(_read_le!(i32)),
443 Self::leqdate => Scalar::leqdate(_read_le!(i64)),
444 Self::leqldate => Scalar::leqldate(_read_le!(i64)),
445 Self::leqwdate => Scalar::leqwdate(_read_le!(i64)),
446 Self::ubelong => Scalar::ubelong(_read_be!(u32)),
447 Self::ulong => Scalar::ulong(_read_ne!(u32)),
448 Self::ubeshort => Scalar::ubeshort(_read_be!(u16)),
449 Self::ubeqdate => Scalar::ubeqdate(_read_be!(u64)),
450 Self::lemsdosdate => Scalar::lemsdosdate(_read_le!(u16)),
451 Self::lemsdostime => Scalar::lemsdostime(_read_le!(u16)),
452 Self::guid => Scalar::guid(u128::from_be_bytes(read!(from, u128))),
453 })
454 }
455}
456
457impl FloatDataType {
458 #[inline(always)]
459 fn read<R: Read + Seek>(&self, from: &mut R, switch_endianness: bool) -> Result<Float, Error> {
460 macro_rules! _read_le {
461 ($ty: ty) => {{
462 if switch_endianness {
463 <$ty>::from_be_bytes(read!(from, $ty))
464 } else {
465 <$ty>::from_le_bytes(read!(from, $ty))
466 }
467 }};
468 }
469
470 macro_rules! _read_be {
471 ($ty: ty) => {{
472 if switch_endianness {
473 <$ty>::from_le_bytes(read!(from, $ty))
474 } else {
475 <$ty>::from_be_bytes(read!(from, $ty))
476 }
477 }};
478 }
479
480 macro_rules! _read_ne {
481 ($ty: ty) => {{
482 if cfg!(target_endian = "big") {
483 _read_be!($ty)
484 } else {
485 _read_le!($ty)
486 }
487 }};
488 }
489
490 macro_rules! _read_me {
491 () => {
492 ((_read_le!(u16) as i32) << 16) | (_read_le!(u16) as i32)
493 };
494 }
495
496 Ok(match self {
497 Self::lefloat => Float::lefloat(_read_le!(f32)),
498 Self::befloat => Float::befloat(_read_le!(f32)),
499 Self::ledouble => Float::ledouble(_read_le!(f64)),
500 Self::bedouble => Float::bedouble(_read_be!(f64)),
501 })
502 }
503}
504
505#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
506enum Op {
507 Mul,
508 Add,
509 Sub,
510 Div,
511 Mod,
512 And,
513 Xor,
514 Or,
515}
516
517impl Display for Op {
518 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
519 match self {
520 Op::Mul => write!(f, "*"),
521 Op::Add => write!(f, "+"),
522 Op::Sub => write!(f, "-"),
523 Op::Div => write!(f, "/"),
524 Op::Mod => write!(f, "%"),
525 Op::And => write!(f, "&"),
526 Op::Or => write!(f, "|"),
527 Op::Xor => write!(f, "^"),
528 }
529 }
530}
531
532#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
533enum CmpOp {
534 Eq,
535 Lt,
536 Gt,
537 BitAnd,
538 Neq, Xor,
540 Not, }
542
543impl CmpOp {
544 #[inline(always)]
545 fn is_neq(&self) -> bool {
546 matches!(self, Self::Neq)
547 }
548}
549
550#[derive(Debug, Clone, Serialize, Deserialize)]
551struct ScalarTransform {
552 op: Op,
553 num: Scalar,
554}
555
556impl ScalarTransform {
557 fn apply(&self, s: Scalar) -> Option<Scalar> {
558 match self.op {
559 Op::Add => s.checked_add(self.num),
560 Op::Sub => s.checked_sub(self.num),
561 Op::Mul => s.checked_mul(self.num),
562 Op::Div => s.checked_div(self.num),
563 Op::Mod => s.checked_rem(self.num),
564 Op::And => Some(s.bitand(self.num)),
565 Op::Xor => Some(s.bitxor(self.num)),
566 Op::Or => Some(s.bitor(self.num)),
567 }
568 }
569}
570
571#[derive(Debug, Clone, Serialize, Deserialize)]
572struct FloatTransform {
573 op: Op,
574 num: Float,
575}
576
577impl FloatTransform {
578 fn apply(&self, s: Float) -> Float {
579 match self.op {
580 Op::Add => s.add(self.num),
581 Op::Sub => s.sub(self.num),
582 Op::Mul => s.mul(self.num),
583 Op::Div => s.div(self.num),
585 Op::Mod => s.rem(self.num),
587 Op::And | Op::Xor | Op::Or => {
589 debug_panic!("unsupported operation");
590 s
591 }
592 }
593 }
594}
595
596#[derive(Debug, Clone, Serialize, Deserialize)]
597enum TestValue<T> {
598 Value(T),
599 Any,
600}
601
602impl<T> TestValue<T> {
603 #[inline(always)]
604 fn as_ref(&self) -> TestValue<&T> {
605 match self {
606 Self::Value(v) => TestValue::Value(v),
607 Self::Any => TestValue::Any,
608 }
609 }
610}
611
612flags! {
613 enum ReMod: u8{
614 CaseInsensitive,
615 StartOffsetUpdate,
616 LineLimit,
617 ForceBin,
618 ForceText,
619 TrimMatch,
620 }
621}
622
623fn serialize_regex<S>(re: &bytes::Regex, serializer: S) -> Result<S::Ok, S::Error>
624where
625 S: serde::Serializer,
626{
627 re.as_str().serialize(serializer)
628}
629
630fn deserialize_regex<'de, D>(deserializer: D) -> Result<bytes::Regex, D::Error>
631where
632 D: serde::Deserializer<'de>,
633{
634 let wrapper = String::deserialize(deserializer)?;
635 bytes::Regex::new(&wrapper).map_err(serde::de::Error::custom)
636}
637
638#[derive(Debug, Clone, Serialize, Deserialize)]
639struct RegexTest {
640 #[serde(
641 serialize_with = "serialize_regex",
642 deserialize_with = "deserialize_regex"
643 )]
644 re: bytes::Regex,
645 length: Option<usize>,
646 mods: FlagSet<ReMod>,
647 str_mods: FlagSet<StringMod>,
648 non_magic_len: usize,
649 binary: bool,
650 cmp_op: CmpOp,
651}
652
653impl RegexTest {
654 #[inline(always)]
655 fn is_binary(&self) -> bool {
656 self.binary
657 || self.mods.contains(ReMod::ForceBin)
658 || self.str_mods.contains(StringMod::ForceBin)
659 }
660
661 #[inline(always)]
662 fn is_text(&self) -> bool {
663 self.mods.contains(ReMod::ForceText) || self.str_mods.contains(StringMod::ForceText)
664 }
665
666 fn match_buf<'buf>(
667 &self,
668 off_buf: u64, stream_kind: StreamKind,
670 buf: &'buf [u8],
671 ) -> Option<MatchRes<'buf>> {
672 let mr = match stream_kind {
673 StreamKind::Text(_) => {
674 let mut off_txt = off_buf;
675
676 let mut line_limit = self.length.unwrap_or(usize::MAX);
677
678 for line in buf.split(|c| c == &b'\n') {
679 if line_limit == 0 {
683 break;
684 }
685
686 if let Some(re_match) = self.re.find(line) {
687 let start_offset = off_txt + re_match.start() as u64;
689
690 let stop_offset = if re_match.end() == line.len() {
692 Some(start_offset + re_match.as_bytes().len() as u64 + 1)
693 } else {
694 None
695 };
696
697 return Some(MatchRes::Bytes(
698 start_offset,
699 stop_offset,
700 re_match.as_bytes(),
701 Encoding::Utf8,
702 ));
703 }
704
705 off_txt += line.len() as u64;
706 off_txt += 1;
708 line_limit = line_limit.saturating_sub(1)
709 }
710 None
711 }
712
713 StreamKind::Binary => {
714 self.re.find(buf).map(|re_match| {
715 MatchRes::Bytes(
716 off_buf + re_match.start() as u64,
718 None,
719 re_match.as_bytes(),
720 Encoding::Utf8,
721 )
722 })
723 }
724 };
725
726 if self.cmp_op.is_neq() && mr.is_none() {
728 return Some(MatchRes::Bytes(off_buf, None, buf, Encoding::Utf8));
729 }
730
731 mr
732 }
733}
734
735impl From<RegexTest> for Test {
736 fn from(value: RegexTest) -> Self {
737 Self::Regex(value)
738 }
739}
740
741flags! {
742 enum StringMod: u8{
743 ForceBin,
744 UpperInsensitive,
745 LowerInsensitive,
746 FullWordMatch,
747 Trim,
748 ForceText,
749 CompactWhitespace,
750 OptBlank,
751 }
752}
753
754#[derive(Debug, Clone, Serialize, Deserialize)]
755struct StringTest {
756 test_val: TestValue<Vec<u8>>,
757 cmp_op: CmpOp,
758 length: Option<usize>,
759 mods: FlagSet<StringMod>,
760 binary: bool,
761}
762
763impl From<StringTest> for Test {
764 fn from(value: StringTest) -> Self {
765 Self::String(value)
766 }
767}
768
769#[inline(always)]
770fn string_match(str: &[u8], mods: FlagSet<StringMod>, buf: &[u8]) -> (bool, usize) {
771 let mut consumed = 0;
772 if mods.is_disjoint(
774 StringMod::UpperInsensitive
775 | StringMod::LowerInsensitive
776 | StringMod::FullWordMatch
777 | StringMod::CompactWhitespace
778 | StringMod::OptBlank,
779 ) {
780 if buf.starts_with(str) {
782 (true, str.len())
783 } else {
784 (false, consumed)
785 }
786 } else {
787 let mut i_src = 0;
788 let mut iter = buf.iter().peekable();
789
790 macro_rules! consume_target {
791 () => {{
792 if iter.next().is_some() {
793 consumed += 1;
794 }
795 }};
796 }
797
798 macro_rules! continue_next_iteration {
799 () => {{
800 consume_target!();
801 i_src += 1;
802 continue;
803 }};
804 }
805
806 while let Some(&&b) = iter.peek() {
807 let Some(&ref_byte) = str.get(i_src) else {
808 break;
809 };
810
811 if mods.contains(StringMod::OptBlank) && (b == b' ' || ref_byte == b' ') {
812 if b == b' ' {
813 consume_target!();
815 }
816
817 if ref_byte == b' ' {
818 i_src += 1;
820 }
821
822 continue;
823 }
824
825 if mods.contains(StringMod::UpperInsensitive) {
826 if ref_byte.is_ascii_uppercase() && ref_byte == b.to_ascii_uppercase()
828 || ref_byte == b
829 {
830 continue_next_iteration!()
831 }
832 }
833
834 if mods.contains(StringMod::LowerInsensitive)
835 && (ref_byte.is_ascii_lowercase() && ref_byte == b.to_ascii_lowercase()
836 || ref_byte == b)
837 {
838 continue_next_iteration!()
839 }
840
841 if mods.contains(StringMod::CompactWhitespace) && ref_byte == b' ' {
842 let mut src_blk = 0;
843 while let Some(b' ') = str.get(i_src) {
844 src_blk += 1;
845 i_src += 1;
846 }
847
848 let mut tgt_blk = 0;
849 while let Some(b' ') = iter.peek() {
850 tgt_blk += 1;
851 consume_target!();
852 }
853
854 if src_blk > tgt_blk {
855 return (false, consumed);
856 }
857
858 continue;
859 }
860
861 if ref_byte == b {
862 continue_next_iteration!()
863 } else {
864 return (false, consumed);
865 }
866 }
867
868 if mods.contains(StringMod::FullWordMatch)
869 && let Some(b) = iter.peek()
870 && !b.is_ascii_whitespace()
871 {
872 return (false, consumed);
873 }
874
875 (
876 consumed > 0 && str.get(i_src).is_none() && consumed <= buf.len(),
877 consumed,
878 )
879 }
880}
881
882impl StringTest {
883 fn has_length_mod(&self) -> bool {
884 !self.mods.is_disjoint(
885 StringMod::UpperInsensitive
886 | StringMod::LowerInsensitive
887 | StringMod::FullWordMatch
888 | StringMod::CompactWhitespace
889 | StringMod::OptBlank,
890 )
891 }
892
893 #[inline(always)]
894 fn test_value_len(&self) -> usize {
895 match self.test_val.as_ref() {
896 TestValue::Value(s) => s.len(),
897 TestValue::Any => 0,
898 }
899 }
900
901 #[inline(always)]
902 fn is_binary(&self) -> bool {
903 self.binary || self.mods.contains(StringMod::ForceBin)
904 }
905
906 #[inline(always)]
907 fn is_text(&self) -> bool {
908 self.mods.contains(StringMod::ForceText)
909 }
910}
911
912#[derive(Debug, Clone, Serialize, Deserialize)]
913struct SearchTest {
914 str: Vec<u8>,
915 n_pos: Option<usize>,
916 str_mods: FlagSet<StringMod>,
917 re_mods: FlagSet<ReMod>,
918 binary: bool,
919 cmp_op: CmpOp,
920}
921
922impl From<SearchTest> for Test {
923 fn from(value: SearchTest) -> Self {
924 Self::Search(value)
925 }
926}
927
928impl SearchTest {
929 #[inline(always)]
930 fn is_binary(&self) -> bool {
931 (self.binary
932 || self.str_mods.contains(StringMod::ForceBin)
933 || self.re_mods.contains(ReMod::ForceBin))
934 && !(self.str_mods.contains(StringMod::ForceText)
935 || self.re_mods.contains(ReMod::ForceText))
936 }
937
938 #[inline]
940 fn match_buf<'buf>(&self, off_buf: u64, buf: &'buf [u8]) -> Option<MatchRes<'buf>> {
941 let mut i = 0;
942
943 let needle = self.str.first()?;
944
945 while i < buf.len() {
946 i += memchr(*needle, &buf[i..])?;
949
950 if self.str_mods.contains(StringMod::FullWordMatch) {
952 let prev_is_whitespace = buf
953 .get(i.saturating_sub(1))
954 .map(|c| c.is_ascii_whitespace())
955 .unwrap_or_default();
956
957 if i > 0 && !prev_is_whitespace {
962 i += 1;
963 continue;
964 }
965 }
966
967 if let Some(npos) = self.n_pos
968 && i > npos
969 {
970 break;
971 }
972
973 let pos = i;
974 let (ok, consumed) = string_match(&self.str, self.str_mods, &buf[i..]);
975
976 if ok {
977 return Some(MatchRes::Bytes(
978 off_buf.saturating_add(pos as u64),
979 None,
980 &buf[i..i + consumed],
981 Encoding::Utf8,
982 ));
983 } else {
984 i += max(consumed, 1)
985 }
986 }
987
988 if self.cmp_op.is_neq() {
990 return Some(MatchRes::Bytes(off_buf, None, buf, Encoding::Utf8));
991 }
992
993 None
994 }
995}
996
997#[derive(Debug, Clone, Serialize, Deserialize)]
998struct ScalarTest {
999 ty: ScalarDataType,
1000 transform: Option<ScalarTransform>,
1001 cmp_op: CmpOp,
1002 test_val: TestValue<Scalar>,
1003}
1004
1005#[derive(Debug, Clone, Serialize, Deserialize)]
1006struct FloatTest {
1007 ty: FloatDataType,
1008 transform: Option<FloatTransform>,
1009 cmp_op: CmpOp,
1010 test_val: TestValue<Float>,
1011}
1012
1013#[derive(Debug, PartialEq)]
1016enum ReadValue<'buf> {
1017 Float(u64, Float),
1018 Scalar(u64, Scalar),
1019 Bytes(u64, &'buf [u8]),
1020}
1021
1022impl DynDisplay for ReadValue<'_> {
1023 fn dyn_fmt(&self, f: &dyf::FormatSpec) -> Result<String, dyf::Error> {
1024 match self {
1025 Self::Float(_, s) => DynDisplay::dyn_fmt(s, f),
1026 Self::Scalar(_, s) => DynDisplay::dyn_fmt(s, f),
1027 Self::Bytes(_, b) => Ok(format!("{b:?}")),
1028 }
1029 }
1030}
1031
1032impl DynDisplay for &ReadValue<'_> {
1033 fn dyn_fmt(&self, f: &dyf::FormatSpec) -> Result<String, dyf::Error> {
1034 DynDisplay::dyn_fmt(*self, f)
1036 }
1037}
1038
1039impl Display for ReadValue<'_> {
1040 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1041 match self {
1042 Self::Float(_, v) => write!(f, "{v}"),
1043 Self::Scalar(_, s) => write!(f, "{s}"),
1044 Self::Bytes(_, b) => write!(f, "{b:?}"),
1045 }
1046 }
1047}
1048
1049enum Encoding {
1050 Utf16(String16Encoding),
1051 Utf8,
1052}
1053
1054enum MatchRes<'buf> {
1057 Bytes(u64, Option<u64>, &'buf [u8], Encoding),
1062 Scalar(u64, Scalar),
1063 Float(u64, Float),
1064}
1065
1066impl DynDisplay for &MatchRes<'_> {
1067 fn dyn_fmt(&self, f: &dyf::FormatSpec) -> Result<String, dyf::Error> {
1068 (*self).dyn_fmt(f)
1069 }
1070}
1071
1072impl DynDisplay for MatchRes<'_> {
1073 fn dyn_fmt(&self, f: &dyf::FormatSpec) -> Result<String, dyf::Error> {
1074 match self {
1075 Self::Scalar(_, v) => v.dyn_fmt(f),
1076 Self::Float(_, v) => v.dyn_fmt(f),
1077 Self::Bytes(_, _, v, enc) => match enc {
1078 Encoding::Utf8 => String::from_utf8_lossy(v).to_string().dyn_fmt(f),
1079 Encoding::Utf16(enc) => {
1080 let utf16: Vec<u16> = slice_to_utf16_iter(v, *enc).collect();
1081 String::from_utf16_lossy(&utf16).dyn_fmt(f)
1082 }
1083 },
1084 }
1085 }
1086}
1087
1088impl MatchRes<'_> {
1089 #[inline]
1091 fn start_offset(&self) -> u64 {
1092 match self {
1093 MatchRes::Bytes(o, _, _, _) => *o,
1094 MatchRes::Scalar(o, _) => *o,
1095 MatchRes::Float(o, _) => *o,
1096 }
1097 }
1098
1099 #[inline]
1101 fn end_offset(&self) -> u64 {
1102 match self {
1103 MatchRes::Bytes(start, end, buf, _) => match end {
1104 Some(end) => *end,
1105 None => start.saturating_add(buf.len() as u64),
1106 },
1107 MatchRes::Scalar(o, sc) => o.add(sc.size_of() as u64),
1108 MatchRes::Float(o, f) => o.add(f.size_of() as u64),
1109 }
1110 }
1111}
1112
1113fn slice_to_utf16_iter(read: &[u8], encoding: String16Encoding) -> impl Iterator<Item = u16> {
1114 let even = read
1115 .iter()
1116 .enumerate()
1117 .filter(|(i, _)| i % 2 == 0)
1118 .map(|t| t.1);
1119
1120 let odd = read
1121 .iter()
1122 .enumerate()
1123 .filter(|(i, _)| i % 2 != 0)
1124 .map(|t| t.1);
1125
1126 even.zip(odd).map(move |(e, o)| match encoding {
1127 String16Encoding::Le => u16::from_le_bytes([*e, *o]),
1128 String16Encoding::Be => u16::from_be_bytes([*e, *o]),
1129 })
1130}
1131
1132#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
1133enum String16Encoding {
1134 Le,
1135 Be,
1136}
1137
1138#[derive(Debug, Clone, Serialize, Deserialize)]
1139struct String16Test {
1140 orig: String,
1141 test_val: TestValue<Vec<u16>>,
1142 encoding: String16Encoding,
1143}
1144
1145impl String16Test {
1146 #[inline(always)]
1150 fn test_value_len(&self) -> usize {
1151 match self.test_val.as_ref() {
1152 TestValue::Value(str16) => str16.len(),
1153 TestValue::Any => 0,
1154 }
1155 }
1156}
1157
1158flags! {
1159 enum IndirectMod: u8{
1160 Relative,
1161 }
1162}
1163
1164type IndirectMods = FlagSet<IndirectMod>;
1165
1166#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
1167enum PStringLen {
1168 Byte, ShortBe, ShortLe, LongBe, LongLe, }
1174
1175impl PStringLen {
1176 #[inline(always)]
1177 const fn size_of_len(&self) -> usize {
1178 match self {
1179 PStringLen::Byte => 1,
1180 PStringLen::ShortBe => 2,
1181 PStringLen::ShortLe => 2,
1182 PStringLen::LongBe => 4,
1183 PStringLen::LongLe => 4,
1184 }
1185 }
1186}
1187
1188#[derive(Debug, Clone, Serialize, Deserialize)]
1189struct PStringTest {
1190 len: PStringLen,
1191 test_val: TestValue<Vec<u8>>,
1192 include_len: bool,
1193}
1194
1195impl PStringTest {
1196 #[inline]
1197 fn read<'cache, R: Read + Seek>(
1198 &self,
1199 haystack: &'cache mut LazyCache<R>,
1200 ) -> Result<Option<&'cache [u8]>, Error> {
1201 let mut len = match self.len {
1202 PStringLen::Byte => read_le!(haystack, u8) as u32,
1203 PStringLen::ShortBe => read_be!(haystack, u16) as u32,
1204 PStringLen::ShortLe => read_le!(haystack, u16) as u32,
1205 PStringLen::LongBe => read_be!(haystack, u32),
1206 PStringLen::LongLe => read_le!(haystack, u32),
1207 } as usize;
1208
1209 if self.include_len {
1210 len = len.saturating_sub(self.len.size_of_len())
1211 }
1212
1213 if let TestValue::Value(s) = self.test_val.as_ref()
1214 && len != s.len()
1215 {
1216 return Ok(None);
1217 }
1218
1219 let read = haystack.read_exact_count(len as u64)?;
1220
1221 Ok(Some(read))
1222 }
1223
1224 #[inline(always)]
1225 fn test_value_len(&self) -> usize {
1226 match self.test_val.as_ref() {
1227 TestValue::Value(s) => s.len(),
1228 TestValue::Any => 0,
1229 }
1230 }
1231}
1232
1233#[derive(Debug, Clone, Serialize, Deserialize)]
1234enum Test {
1235 Name(String),
1236 Use(bool, String),
1237 Scalar(ScalarTest),
1238 Float(FloatTest),
1239 String(StringTest),
1240 Search(SearchTest),
1241 PString(PStringTest),
1242 Regex(RegexTest),
1243 Indirect(FlagSet<IndirectMod>),
1244 String16(String16Test),
1245 #[allow(dead_code)]
1247 Der,
1248 Clear,
1249 Default,
1250}
1251
1252impl Test {
1253 #[inline]
1255 fn read_test_value<'haystack, R: Read + Seek>(
1256 &self,
1257 haystack: &'haystack mut LazyCache<R>,
1258 switch_endianness: bool,
1259 ) -> Result<Option<ReadValue<'haystack>>, Error> {
1260 let test_value_offset = haystack.lazy_stream_position();
1261
1262 match self {
1263 Self::Scalar(t) => {
1264 t.ty.read(haystack, switch_endianness)
1265 .map(|s| Some(ReadValue::Scalar(test_value_offset, s)))
1266 }
1267
1268 Self::Float(t) => {
1269 t.ty.read(haystack, switch_endianness)
1270 .map(|f| Some(ReadValue::Float(test_value_offset, f)))
1271 }
1272 Self::String(t) => {
1273 match t.test_val.as_ref() {
1274 TestValue::Value(str) => {
1275 let buf = if let Some(length) = t.length {
1276 haystack.read_exact_count(length as u64)?
1278 } else {
1279 match t.cmp_op {
1282 CmpOp::Eq | CmpOp::Neq => {
1283 if !t.has_length_mod() {
1284 haystack.read_exact_count(str.len() as u64)?
1285 } else {
1286 haystack.read_count(FILE_BYTES_MAX as u64)?
1287 }
1288 }
1289 CmpOp::Lt | CmpOp::Gt => {
1290 let read =
1291 haystack.read_until_any_delim_or_limit(b"\n\0", 8092)?;
1292
1293 if read.ends_with(b"\0") || read.ends_with(b"\n") {
1294 &read[..read.len() - 1]
1295 } else {
1296 read
1297 }
1298 }
1299 _ => {
1300 return Err(Error::Msg(format!(
1301 "string test does not support {:?} operator",
1302 t.cmp_op
1303 )));
1304 }
1305 }
1306 };
1307
1308 Ok(Some(ReadValue::Bytes(test_value_offset, buf)))
1309 }
1310 TestValue::Any => {
1311 let read = haystack.read_until_any_delim_or_limit(b"\0\n", 8192)?;
1312 let bytes = if read.ends_with(b"\0") || read.ends_with(b"\n") {
1314 &read[..read.len() - 1]
1315 } else {
1316 read
1317 };
1318
1319 Ok(Some(ReadValue::Bytes(test_value_offset, bytes)))
1320 }
1321 }
1322 }
1323
1324 Self::String16(t) => {
1325 match t.test_val.as_ref() {
1326 TestValue::Value(str16) => {
1327 let read = haystack.read_exact_count((str16.len() * 2) as u64)?;
1328
1329 Ok(Some(ReadValue::Bytes(test_value_offset, read)))
1330 }
1331 TestValue::Any => {
1332 let read = haystack.read_until_utf16_or_limit(b"\x00\x00", 8192)?;
1333
1334 let end = if read.len() % 2 == 0 {
1336 read.len()
1337 } else {
1338 read.len().saturating_sub(1)
1341 };
1342
1343 Ok(Some(ReadValue::Bytes(test_value_offset, &read[..end])))
1344 }
1345 }
1346 }
1347
1348 Self::PString(t) => {
1349 let Some(read) = t.read(haystack)? else {
1350 return Ok(None);
1351 };
1352 Ok(Some(ReadValue::Bytes(test_value_offset, read)))
1353 }
1354
1355 Self::Search(_) => {
1356 let buf = haystack.read_count(FILE_BYTES_MAX as u64)?;
1357 Ok(Some(ReadValue::Bytes(test_value_offset, buf)))
1358 }
1359
1360 Self::Regex(r) => {
1361 let length = {
1362 match r.length {
1363 Some(len) => {
1364 if r.mods.contains(ReMod::LineLimit) {
1365 len * 80
1366 } else {
1367 len
1368 }
1369 }
1370
1371 None => FILE_REGEX_MAX,
1372 }
1373 };
1374
1375 let read = haystack.read_count(length as u64)?;
1376 Ok(Some(ReadValue::Bytes(test_value_offset, read)))
1377 }
1378
1379 Self::Name(_)
1380 | Self::Use(_, _)
1381 | Self::Indirect(_)
1382 | Self::Clear
1383 | Self::Default
1384 | Self::Der => Err(Error::msg("no value to read for this test")),
1385 }
1386 }
1387
1388 #[inline(always)]
1389 fn match_value<'s>(
1390 &'s self,
1391 tv: &ReadValue<'s>,
1392 stream_kind: StreamKind,
1393 ) -> Option<MatchRes<'s>> {
1394 match (self, tv) {
1395 (Self::Scalar(t), ReadValue::Scalar(o, ts)) => {
1396 let read_value: Scalar = match t.transform.as_ref() {
1397 Some(t) => t.apply(*ts)?,
1398 None => *ts,
1399 };
1400
1401 match t.test_val {
1402 TestValue::Value(test_value) => {
1403 let ok = match t.cmp_op {
1404 CmpOp::Not => read_value == !test_value,
1407 CmpOp::Eq => read_value == test_value,
1408 CmpOp::Lt => read_value < test_value,
1409 CmpOp::Gt => read_value > test_value,
1410 CmpOp::Neq => read_value != test_value,
1411 CmpOp::BitAnd => read_value & test_value == test_value,
1412 CmpOp::Xor => (read_value & test_value).is_zero(),
1413 };
1414
1415 if ok {
1416 Some(MatchRes::Scalar(*o, read_value))
1417 } else {
1418 None
1419 }
1420 }
1421
1422 TestValue::Any => Some(MatchRes::Scalar(*o, read_value)),
1423 }
1424 }
1425
1426 (Self::Float(t), ReadValue::Float(o, f)) => {
1427 let read_value: Float = t.transform.as_ref().map(|t| t.apply(*f)).unwrap_or(*f);
1428
1429 match t.test_val {
1430 TestValue::Value(tf) => {
1431 let ok = match t.cmp_op {
1432 CmpOp::Eq => read_value == tf,
1433 CmpOp::Lt => read_value < tf,
1434 CmpOp::Gt => read_value > tf,
1435 CmpOp::Neq => read_value != tf,
1436 _ => {
1437 debug_panic!("unsupported float comparison");
1440 debug!("unsupported float comparison");
1441 false
1442 }
1443 };
1444
1445 if ok {
1446 Some(MatchRes::Float(*o, read_value))
1447 } else {
1448 None
1449 }
1450 }
1451 TestValue::Any => Some(MatchRes::Float(*o, read_value)),
1452 }
1453 }
1454
1455 (Self::String(st), ReadValue::Bytes(o, buf)) => {
1456 macro_rules! trim_buf {
1457 ($buf: expr) => {{
1458 if st.mods.contains(StringMod::Trim) {
1459 $buf.trim_ascii()
1460 } else {
1461 $buf
1462 }
1463 }};
1464 }
1465
1466 match st.test_val.as_ref() {
1467 TestValue::Value(str) => {
1468 match st.cmp_op {
1469 CmpOp::Eq => {
1470 if let (true, _) = string_match(str, st.mods, buf) {
1471 Some(MatchRes::Bytes(*o, None, trim_buf!(str), Encoding::Utf8))
1472 } else {
1473 None
1474 }
1475 }
1476 CmpOp::Neq => {
1477 if let (false, _) = string_match(str, st.mods, buf) {
1478 Some(MatchRes::Bytes(*o, None, trim_buf!(str), Encoding::Utf8))
1479 } else {
1480 None
1481 }
1482 }
1483 CmpOp::Gt => {
1484 if buf.len() > str.len() {
1485 Some(MatchRes::Bytes(*o, None, trim_buf!(buf), Encoding::Utf8))
1486 } else {
1487 None
1488 }
1489 }
1490 CmpOp::Lt => {
1491 if buf.len() < str.len() {
1492 Some(MatchRes::Bytes(*o, None, trim_buf!(buf), Encoding::Utf8))
1493 } else {
1494 None
1495 }
1496 }
1497
1498 _ => {
1500 debug_panic!("unsupported string comparison");
1503 debug!("unsupported string comparison");
1504 None
1505 }
1506 }
1507 }
1508 TestValue::Any => {
1509 Some(MatchRes::Bytes(*o, None, trim_buf!(buf), Encoding::Utf8))
1510 }
1511 }
1512 }
1513
1514 (Self::PString(m), ReadValue::Bytes(o, buf)) => match m.test_val.as_ref() {
1515 TestValue::Value(psv) => {
1516 if buf == psv {
1517 Some(MatchRes::Bytes(*o, None, buf, Encoding::Utf8))
1518 } else {
1519 None
1520 }
1521 }
1522 TestValue::Any => Some(MatchRes::Bytes(*o, None, buf, Encoding::Utf8)),
1523 },
1524
1525 (Self::String16(t), ReadValue::Bytes(o, buf)) => {
1526 match t.test_val.as_ref() {
1527 TestValue::Value(str16) => {
1528 if str16.len() * 2 != buf.len() {
1530 return None;
1531 }
1532
1533 for (i, utf16_char) in slice_to_utf16_iter(buf, t.encoding).enumerate() {
1535 if str16[i] != utf16_char {
1536 return None;
1537 }
1538 }
1539
1540 Some(MatchRes::Bytes(
1541 *o,
1542 None,
1543 t.orig.as_bytes(),
1544 Encoding::Utf16(t.encoding),
1545 ))
1546 }
1547
1548 TestValue::Any => {
1549 Some(MatchRes::Bytes(*o, None, buf, Encoding::Utf16(t.encoding)))
1550 }
1551 }
1552 }
1553
1554 (Self::Regex(r), ReadValue::Bytes(o, buf)) => r.match_buf(*o, stream_kind, buf),
1555
1556 (Self::Search(t), ReadValue::Bytes(o, buf)) => t.match_buf(*o, buf),
1557
1558 _ => None,
1559 }
1560 }
1561
1562 #[inline(always)]
1563 fn strength(&self) -> u64 {
1564 const MULT: usize = 10;
1565
1566 let mut out = 2 * MULT;
1567
1568 match self {
1570 Test::Scalar(s) => {
1571 out += s.ty.type_size() * MULT;
1572 }
1573
1574 Test::Float(t) => {
1575 out += t.ty.type_size() * MULT;
1576 }
1577
1578 Test::String(t) => out += t.test_value_len().saturating_mul(MULT),
1579
1580 Test::PString(t) => out += t.test_value_len().saturating_mul(MULT),
1581
1582 Test::Search(s) => {
1583 let n_pos = s.n_pos.unwrap_or(FILE_BYTES_MAX);
1588
1589 match n_pos {
1590 0..=80 => out += s.str.len().saturating_mul(MULT),
1592 81..=240 => out += s.str.len() * s.str.len().clamp(0, MULT - 2),
1594 _ => out += s.str.len(),
1596 }
1597 }
1598
1599 Test::Regex(r) => {
1600 let v = r.non_magic_len / r.re.captures_len();
1609
1610 let len = r
1611 .length
1612 .map(|l| {
1613 if r.mods.contains(ReMod::LineLimit) {
1614 l * 80
1615 } else {
1616 l
1617 }
1618 })
1619 .unwrap_or(FILE_BYTES_MAX);
1620
1621 match len {
1622 0..=80 => out += v.saturating_mul(MULT),
1624 81..=240 => out += v * v.clamp(0, MULT - 2),
1626 _ => out += v,
1628 }
1629 }
1630
1631 Test::String16(t) => {
1632 out += t.test_value_len().saturating_mul(MULT);
1637 }
1638
1639 Test::Der => out += MULT,
1640
1641 Test::Default | Test::Name(_) | Test::Use(_, _) | Test::Indirect(_) | Test::Clear => {
1642 return 0;
1643 }
1644 }
1645
1646 if self.is_match_any() {
1648 return 0;
1649 }
1650
1651 if let Some(op) = self.cmp_op() {
1652 match op {
1653 CmpOp::Neq => out = 0,
1655 CmpOp::Eq | CmpOp::Not => out += MULT,
1656 CmpOp::Lt | CmpOp::Gt => out -= 2 * MULT,
1657 CmpOp::Xor | CmpOp::BitAnd => out -= MULT,
1658 }
1659 }
1660
1661 out as u64
1662 }
1663
1664 #[inline(always)]
1665 fn cmp_op(&self) -> Option<CmpOp> {
1666 match self {
1667 Self::String(t) => Some(t.cmp_op),
1668 Self::Scalar(s) => Some(s.cmp_op),
1669 Self::Float(t) => Some(t.cmp_op),
1670 Self::Name(_)
1671 | Self::Use(_, _)
1672 | Self::Search(_)
1673 | Self::PString(_)
1674 | Self::Regex(_)
1675 | Self::Clear
1676 | Self::Default
1677 | Self::Indirect(_)
1678 | Self::String16(_)
1679 | Self::Der => None,
1680 }
1681 }
1682
1683 #[inline(always)]
1684 fn is_recursive(&self) -> bool {
1685 matches!(self, Test::Use(_, _) | Test::Indirect(_))
1686 }
1687
1688 #[inline(always)]
1689 fn is_match_any(&self) -> bool {
1690 match self {
1691 Test::Name(_) => false,
1692 Test::Use(_, _) => false,
1693 Test::Scalar(scalar_test) => matches!(scalar_test.test_val, TestValue::Any),
1694 Test::Float(float_test) => matches!(float_test.test_val, TestValue::Any),
1695 Test::String(string_test) => matches!(string_test.test_val, TestValue::Any),
1696 Test::Search(_) => false,
1697 Test::PString(pstring_test) => matches!(pstring_test.test_val, TestValue::Any),
1698 Test::Regex(_) => false,
1699 Test::Indirect(_) => false,
1700 Test::String16(string16_test) => matches!(string16_test.test_val, TestValue::Any),
1701 Test::Der => false,
1702 Test::Clear => false,
1703 Test::Default => false,
1704 }
1705 }
1706
1707 #[inline(always)]
1708 fn is_binary(&self) -> bool {
1709 match self {
1710 Self::Name(_) => true,
1711 Self::Use(_, _) => true,
1712 Self::Scalar(_) => true,
1713 Self::Float(_) => true,
1714 Self::String(t) => !t.is_binary() & !t.is_text() || t.is_binary(),
1715 Self::Search(t) => t.is_binary(),
1716 Self::PString(_) => true,
1717 Self::Regex(t) => !t.is_binary() & !t.is_text() || t.is_binary(),
1718 Self::Clear => true,
1719 Self::Default => true,
1720 Self::Indirect(_) => true,
1721 Self::String16(_) => true,
1722 Self::Der => true,
1723 }
1724 }
1725
1726 #[inline(always)]
1727 fn is_text(&self) -> bool {
1728 match self {
1729 Self::Name(_) => true,
1730 Self::Use(_, _) => true,
1731 Self::Indirect(_) => true,
1732 Self::Clear => true,
1733 Self::Default => true,
1734 Self::String(t) => !t.is_binary() & !t.is_text() || t.is_text(),
1735 Self::Regex(t) => !t.is_binary() & !t.is_text() || t.is_text(),
1736 _ => !self.is_binary(),
1737 }
1738 }
1739
1740 #[inline(always)]
1741 fn is_only_text(&self) -> bool {
1742 self.is_text() && !self.is_binary()
1743 }
1744
1745 #[inline(always)]
1746 fn is_only_binary(&self) -> bool {
1747 self.is_binary() && !self.is_text()
1748 }
1749}
1750
1751#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
1752enum OffsetType {
1753 Byte,
1754 DoubleLe,
1755 DoubleBe,
1756 ShortLe,
1757 ShortBe,
1758 Id3Le,
1759 Id3Be,
1760 LongLe,
1761 LongBe,
1762 Middle,
1763 Octal,
1764 QuadBe,
1765 QuadLe,
1766}
1767
1768#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
1769enum Shift {
1770 Direct(u64),
1771 Indirect(i64),
1772}
1773
1774#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
1775struct IndOffset {
1776 off_addr: DirOffset,
1778 signed: bool,
1780 ty: OffsetType,
1782 op: Option<Op>,
1783 shift: Option<Shift>,
1784}
1785
1786impl IndOffset {
1787 fn read_offset<R: Read + Seek>(
1789 &self,
1790 haystack: &mut LazyCache<R>,
1791 rule_base_offset: Option<u64>,
1792 last_upper_match_offset: Option<u64>,
1793 ) -> Result<Option<u64>, io::Error> {
1794 let offset_address = match self.off_addr {
1795 DirOffset::Start(s) => {
1796 let Some(o) = s.checked_add(rule_base_offset.unwrap_or_default()) else {
1797 return Ok(None);
1798 };
1799
1800 haystack.seek(SeekFrom::Start(o))?
1801 }
1802 DirOffset::LastUpper(c) => haystack.seek(SeekFrom::Start(
1803 (last_upper_match_offset.unwrap_or_default() as i64 + c) as u64,
1804 ))?,
1805 DirOffset::End(e) => haystack.seek(SeekFrom::End(e))?,
1806 };
1807
1808 macro_rules! read_value {
1809 () => {
1810 match self.ty {
1811 OffsetType::Byte => {
1812 if self.signed {
1813 read_le!(haystack, u8) as u64
1814 } else {
1815 read_le!(haystack, i8) as u64
1816 }
1817 }
1818 OffsetType::DoubleLe => read_le!(haystack, f64) as u64,
1819 OffsetType::DoubleBe => read_be!(haystack, f64) as u64,
1820 OffsetType::ShortLe => {
1821 if self.signed {
1822 read_le!(haystack, i16) as u64
1823 } else {
1824 read_le!(haystack, u16) as u64
1825 }
1826 }
1827 OffsetType::ShortBe => {
1828 if self.signed {
1829 read_be!(haystack, i16) as u64
1830 } else {
1831 read_be!(haystack, u16) as u64
1832 }
1833 }
1834 OffsetType::Id3Le => decode_id3(read_le!(haystack, u32)) as u64,
1835 OffsetType::Id3Be => decode_id3(read_be!(haystack, u32)) as u64,
1836 OffsetType::LongLe => {
1837 if self.signed {
1838 read_le!(haystack, i32) as u64
1839 } else {
1840 read_le!(haystack, u32) as u64
1841 }
1842 }
1843 OffsetType::LongBe => {
1844 if self.signed {
1845 read_be!(haystack, i32) as u64
1846 } else {
1847 read_be!(haystack, u32) as u64
1848 }
1849 }
1850 OffsetType::Middle => read_me!(haystack) as u64,
1851 OffsetType::Octal => {
1852 if let Some(o) = read_octal_u64(haystack) {
1853 o
1854 } else {
1855 debug!("failed to read octal offset @ {offset_address}");
1856 return Ok(None);
1857 }
1858 }
1859 OffsetType::QuadLe => {
1860 if self.signed {
1861 read_le!(haystack, i64) as u64
1862 } else {
1863 read_le!(haystack, u64)
1864 }
1865 }
1866 OffsetType::QuadBe => {
1867 if self.signed {
1868 read_be!(haystack, i64) as u64
1869 } else {
1870 read_be!(haystack, u64)
1871 }
1872 }
1873 }
1874 };
1875 }
1876
1877 let o = read_value!();
1879
1880 trace!(
1881 "offset read @ {offset_address} value={o} op={:?} shift={:?}",
1882 self.op, self.shift
1883 );
1884
1885 if let (Some(op), Some(shift)) = (self.op, self.shift) {
1887 let shift = match shift {
1888 Shift::Direct(i) => i,
1889 Shift::Indirect(i) => {
1890 let tmp = offset_address as i128 + i as i128;
1891 if tmp.is_negative() {
1892 return Ok(None);
1893 } else {
1894 haystack.seek(SeekFrom::Start(tmp as u64))?;
1895 };
1896 read_value!()
1899 }
1900 };
1901
1902 match op {
1903 Op::Add => return Ok(o.checked_add(shift)),
1904 Op::Mul => return Ok(o.checked_mul(shift)),
1905 Op::Sub => return Ok(o.checked_sub(shift)),
1906 Op::Div => return Ok(o.checked_div(shift)),
1907 Op::Mod => return Ok(o.checked_rem(shift)),
1908 Op::And => return Ok(Some(o & shift)),
1909 Op::Or => return Ok(Some(o | shift)),
1910 Op::Xor => return Ok(Some(o ^ shift)),
1911 }
1912 }
1913
1914 Ok(Some(o))
1915 }
1916}
1917
1918#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
1919enum DirOffset {
1920 Start(u64),
1921 LastUpper(i64),
1923 End(i64),
1924}
1925
1926#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
1927enum Offset {
1928 Direct(DirOffset),
1929 Indirect(IndOffset),
1930}
1931
1932impl From<DirOffset> for Offset {
1933 fn from(value: DirOffset) -> Self {
1934 Self::Direct(value)
1935 }
1936}
1937
1938impl From<IndOffset> for Offset {
1939 fn from(value: IndOffset) -> Self {
1940 Self::Indirect(value)
1941 }
1942}
1943
1944impl Display for DirOffset {
1945 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1946 match self {
1947 DirOffset::Start(i) => write!(f, "{i}"),
1948 DirOffset::LastUpper(c) => write!(f, "&{c}"),
1949 DirOffset::End(e) => write!(f, "-{e}"),
1950 }
1951 }
1952}
1953
1954impl Default for DirOffset {
1955 fn default() -> Self {
1956 Self::LastUpper(0)
1957 }
1958}
1959
1960#[derive(Debug, Clone, Serialize, Deserialize)]
1961struct Match {
1962 line: usize,
1963 depth: u8,
1964 offset: Offset,
1965 test: Test,
1966 test_strength: u64,
1967 message: Option<Message>,
1968}
1969
1970impl From<Use> for Match {
1971 fn from(value: Use) -> Self {
1972 let test = Test::Use(value.switch_endianness, value.rule_name);
1973 let test_strength = test.strength();
1974 Self {
1975 line: value.line,
1976 depth: value.depth,
1977 offset: value.start_offset,
1978 test,
1979 test_strength,
1980 message: value.message,
1981 }
1982 }
1983}
1984
1985impl From<Name> for Match {
1986 fn from(value: Name) -> Self {
1987 let test = Test::Name(value.name);
1988 let test_strength = test.strength();
1989 Self {
1990 line: value.line,
1991 depth: 0,
1992 offset: Offset::Direct(DirOffset::Start(0)),
1993 test,
1994 test_strength,
1995 message: value.message,
1996 }
1997 }
1998}
1999
2000impl Match {
2001 #[inline(always)]
2003 fn offset_from_start<R: Read + Seek>(
2004 &self,
2005 haystack: &mut LazyCache<R>,
2006 rule_base_offset: Option<u64>,
2007 last_level_offset: Option<u64>,
2008 ) -> Result<Option<u64>, io::Error> {
2009 match self.offset {
2010 Offset::Direct(dir_offset) => match dir_offset {
2011 DirOffset::Start(s) => Ok(Some(s)),
2012 DirOffset::LastUpper(shift) => {
2013 let o = last_level_offset.unwrap_or_default() as i64 + shift;
2014
2015 if o >= 0 { Ok(Some(o as u64)) } else { Ok(None) }
2016 }
2017 DirOffset::End(e) => Ok(Some(haystack.offset_from_start(SeekFrom::End(e)))),
2018 },
2019 Offset::Indirect(ind_offset) => {
2020 let Some(o) =
2021 ind_offset.read_offset(haystack, rule_base_offset, last_level_offset)?
2022 else {
2023 return Ok(None);
2024 };
2025
2026 Ok(Some(o))
2027 }
2028 }
2029 }
2030
2031 #[inline]
2044 #[allow(clippy::too_many_arguments)]
2045 fn matches<'a: 'h, 'h, R: Read + Seek>(
2046 &'a self,
2047 source: Option<&str>,
2048 magic: &mut Magic<'a>,
2049 stream_kind: StreamKind,
2050 state: &mut MatchState,
2051 buf_base_offset: Option<u64>,
2052 rule_base_offset: Option<u64>,
2053 last_level_offset: Option<u64>,
2054 haystack: &'h mut LazyCache<R>,
2055 switch_endianness: bool,
2056 db: &'a MagicDb,
2057 depth: usize,
2058 ) -> Result<(bool, Option<MatchRes<'h>>), Error> {
2059 let source = source.unwrap_or("unknown");
2060 let line = self.line;
2061
2062 if depth >= MAX_RECURSION {
2063 return Err(Error::localized(
2064 source,
2065 line,
2066 Error::MaximumRecursion(MAX_RECURSION),
2067 ));
2068 }
2069
2070 if self.test.is_only_binary() && stream_kind.is_text() {
2071 trace!("skip binary test source={source} line={line} stream_kind={stream_kind:?}",);
2072 return Ok((false, None));
2073 }
2074
2075 if self.test.is_only_text() && !stream_kind.is_text() {
2076 trace!("skip text test source={source} line={line} stream_kind={stream_kind:?}",);
2077 return Ok((false, None));
2078 }
2079
2080 let Ok(Some(mut offset)) = self
2081 .offset_from_start(haystack, rule_base_offset, last_level_offset)
2082 .inspect_err(|e| debug!("source={source} line={line} failed at computing offset: {e}"))
2083 else {
2084 return Ok((false, None));
2085 };
2086
2087 offset = match self.offset {
2088 Offset::Indirect(_) => {
2089 buf_base_offset.unwrap_or_default().saturating_add(offset)
2094 }
2095 Offset::Direct(DirOffset::Start(_)) => {
2097 rule_base_offset.unwrap_or_default().saturating_add(offset)
2098 }
2099 _ => offset,
2100 };
2101
2102 match &self.test {
2103 Test::Clear => {
2104 trace!("source={source} line={line} clear");
2105 state.clear_continuation_level(&self.continuation_level());
2106 Ok((true, None))
2107 }
2108
2109 Test::Name(name) => {
2110 trace!(
2111 "source={source} line={line} running rule {name} switch_endianness={switch_endianness}",
2112 );
2113 Ok((true, None))
2114 }
2115
2116 Test::Use(flip_endianness, rule_name) => {
2117 trace!(
2118 "source={source} line={line} use {rule_name} switch_endianness={flip_endianness}",
2119 );
2120
2121 let switch_endianness = switch_endianness ^ flip_endianness;
2123
2124 let dr: &DependencyRule = db.dependencies.get(rule_name).ok_or(
2125 Error::localized(source, line, Error::MissingRule(rule_name.clone())),
2126 )?;
2127
2128 if let Some(msg) = self.message.as_ref() {
2130 magic.push_message(msg.to_string_lossy());
2131 }
2132
2133 let nmatch = dr.rule.magic(
2134 magic,
2135 stream_kind,
2136 buf_base_offset,
2137 Some(offset),
2138 haystack,
2139 db,
2140 switch_endianness,
2141 depth.saturating_add(1),
2142 )?;
2143
2144 let matched = nmatch > 1;
2147 if matched {
2148 state.set_continuation_level(self.continuation_level());
2149 }
2150
2151 Ok((matched, None))
2152 }
2153
2154 Test::Indirect(m) => {
2155 trace!(
2156 "source={source} line={line} indirect mods={:?} offset={offset:#x}",
2157 m
2158 );
2159
2160 let new_buf_base_off = if m.contains(IndirectMod::Relative) {
2161 Some(offset)
2162 } else {
2163 None
2164 };
2165
2166 if let Some(msg) = self.message.as_ref() {
2168 magic.push_message(msg.to_string_lossy());
2169 }
2170
2171 let mut nmatch = 0u64;
2172 for r in db.rules.iter() {
2173 let messages_cnt = magic.message.len();
2174 nmatch = nmatch.saturating_add(r.magic(
2175 magic,
2176 stream_kind,
2177 new_buf_base_off,
2178 Some(offset),
2179 haystack,
2180 db,
2181 false,
2182 depth.saturating_add(1),
2183 )?);
2184
2185 if magic.message.len() != messages_cnt {
2187 break;
2188 }
2189 }
2190
2191 Ok((nmatch > 0, None))
2193 }
2194
2195 Test::Default => {
2196 let ok = !state.get_continuation_level(&self.continuation_level());
2198
2199 trace!("source={source} line={line} default match={ok}");
2200 if ok {
2201 state.set_continuation_level(self.continuation_level());
2202 }
2203
2204 Ok((ok, None))
2205 }
2206
2207 _ => {
2208 if let Err(e) = haystack.seek(SeekFrom::Start(offset)) {
2209 debug!("source={source} line={line} failed to seek in haystack: {e}");
2210 return Ok((false, None));
2211 }
2212
2213 let mut trace_msg = None;
2214
2215 if enabled!(Level::DEBUG) {
2216 trace_msg = Some(vec![format!(
2217 "source={source} line={line} depth={} stream_offset={:#x}",
2218 self.depth,
2219 haystack.lazy_stream_position()
2220 )])
2221 }
2222
2223 if let Ok(opt_test_value) = self
2227 .test
2228 .read_test_value(haystack, switch_endianness)
2229 .inspect_err(|e| {
2230 debug!("source={source} line={line} error while reading test value @{offset}: {e}",)
2231 })
2232 {
2233 if let Some(v) = trace_msg
2234 .as_mut() { v.push(format!("test={:?}", self.test)) }
2235
2236 let match_res =
2237 opt_test_value.and_then(|tv| self.test.match_value(&tv, stream_kind));
2238
2239 if let Some(v) = trace_msg.as_mut() { v.push(format!(
2240 "message=\"{}\" match={}",
2241 self.message
2242 .as_ref()
2243 .map(|fs| fs.to_string_lossy())
2244 .unwrap_or_default(),
2245 match_res.is_some()
2246 )) }
2247
2248 if enabled!(Level::DEBUG) && !enabled!(Level::TRACE) && match_res.is_some() {
2250 if let Some(m) = trace_msg{
2251 debug!("{}", m.join(" "));
2252 }
2253 } else if enabled!(Level::TRACE)
2254 && let Some(m) = trace_msg{
2255 trace!("{}", m.join(" "));
2256 }
2257
2258 if let Some(mr) = match_res {
2259 state.set_continuation_level(self.continuation_level());
2260 return Ok((true, Some(mr)));
2261 }
2262 }
2263
2264 Ok((false, None))
2265 }
2266 }
2267 }
2268
2269 #[inline(always)]
2270 fn continuation_level(&self) -> ContinuationLevel {
2271 ContinuationLevel(self.depth)
2272 }
2273}
2274
2275#[derive(Debug, Clone)]
2276struct Use {
2277 line: usize,
2278 depth: u8,
2279 start_offset: Offset,
2280 rule_name: String,
2281 switch_endianness: bool,
2282 message: Option<Message>,
2283}
2284
2285#[derive(Debug, Clone, Serialize, Deserialize)]
2286struct StrengthMod {
2287 op: Op,
2288 by: u8,
2289}
2290
2291impl StrengthMod {
2292 #[inline(always)]
2293 fn apply(&self, strength: u64) -> u64 {
2294 let by = self.by as u64;
2295 debug!("applying strength modifier: {strength} {} {}", self.op, by);
2296 match self.op {
2297 Op::Mul => strength.saturating_mul(by),
2298 Op::Add => strength.saturating_add(by),
2299 Op::Sub => strength.saturating_sub(by),
2300 Op::Div => {
2301 if by > 0 {
2302 strength.saturating_div(by)
2303 } else {
2304 strength
2305 }
2306 }
2307 Op::Mod => strength % by,
2308 Op::And => strength & by,
2309 Op::Xor | Op::Or => {
2312 debug_panic!("unsupported strength operator");
2313 strength
2314 }
2315 }
2316 }
2317}
2318
2319#[derive(Debug, Clone)]
2320enum Flag {
2321 Mime(String),
2322 Ext(HashSet<String>),
2323 Strength(StrengthMod),
2324 Apple(String),
2325}
2326
2327#[derive(Debug, Clone)]
2328struct Name {
2329 line: usize,
2330 name: String,
2331 message: Option<Message>,
2332}
2333
2334#[derive(Debug, Clone)]
2335enum Entry<'span> {
2336 Match(Span<'span>, Match),
2337 Flag(Span<'span>, Flag),
2338}
2339
2340#[derive(Debug, Clone, Serialize, Deserialize)]
2341struct EntryNode {
2342 root: bool,
2343 entry: Match,
2344 children: Vec<EntryNode>,
2345 mimetype: Option<String>,
2346 apple: Option<String>,
2347 strength_mod: Option<StrengthMod>,
2348 exts: HashSet<String>,
2349}
2350
2351#[derive(Debug, Default)]
2352struct EntryNodeVisitor {
2353 exts: HashSet<String>,
2354 score: u64,
2355}
2356
2357impl EntryNodeVisitor {
2358 fn new() -> Self {
2359 Self {
2360 ..Default::default()
2361 }
2362 }
2363
2364 fn merge(&mut self, other: Self) {
2365 self.exts.extend(other.exts);
2366 self.score += other.score;
2367 }
2368}
2369
2370impl EntryNode {
2371 #[inline]
2372 fn update_visitor(&self, v: &mut EntryNodeVisitor, depth: usize) {
2373 for ext in self.exts.iter() {
2375 if !v.exts.contains(ext) {
2376 v.exts.insert(ext.clone());
2377 }
2378 }
2379
2380 if depth == 0 {
2382 v.score += self.entry.test_strength;
2383 }
2384
2385 v.score += self
2389 .children
2390 .iter()
2391 .map(|e| e.entry.test_strength)
2392 .min()
2393 .unwrap_or_default()
2394 / max(1, depth as u64);
2395 }
2396
2397 fn visit(
2398 &self,
2399 v: &mut EntryNodeVisitor,
2400 deps: &HashMap<String, DependencyRule>,
2401 marked: &mut HashSet<String>,
2402 depth: usize,
2403 ) -> Result<(), Error> {
2404 self.update_visitor(v, depth);
2406
2407 for c in self.children.iter() {
2409 if let Test::Use(_, ref name) = c.entry.test {
2410 if marked.contains(name) {
2411 continue;
2412 }
2413
2414 marked.insert(name.clone());
2415
2416 if let Some(r) = deps.get(name) {
2417 let dv = r.rule.visit_all_entries(deps, marked)?;
2418 v.merge(dv);
2419 } else {
2420 return Err(Error::MissingRule(name.clone()));
2421 }
2422 } else {
2423 c.visit(v, deps, marked, depth + 1)?;
2424 }
2425 }
2426
2427 Ok(())
2428 }
2429
2430 #[inline]
2431 #[allow(clippy::too_many_arguments)]
2432 fn matches<'r, R: Read + Seek>(
2433 &'r self,
2434 opt_source: Option<&str>,
2435 magic: &mut Magic<'r>,
2436 state: &mut MatchState,
2437 stream_kind: StreamKind,
2438 buf_base_offset: Option<u64>,
2439 rule_base_offset: Option<u64>,
2440 last_level_offset: Option<u64>,
2441 haystack: &mut LazyCache<R>,
2442 db: &'r MagicDb,
2443 switch_endianness: bool,
2444 depth: usize,
2445 ) -> Result<u64, Error> {
2446 let mut nmatch = 0u64;
2447
2448 let (ok, opt_match_res) = self.entry.matches(
2449 opt_source,
2450 magic,
2451 stream_kind,
2452 state,
2453 buf_base_offset,
2454 rule_base_offset,
2455 last_level_offset,
2456 haystack,
2457 switch_endianness,
2458 db,
2459 depth,
2460 )?;
2461
2462 let source = opt_source.unwrap_or("unknown");
2463 let line = self.entry.line;
2464
2465 if ok {
2466 nmatch = nmatch.saturating_add(1);
2467
2468 if !self.entry.test.is_recursive()
2472 && let Some(msg) = self.entry.message.as_ref()
2473 && let Ok(msg) = msg.format_with(opt_match_res.as_ref()).inspect_err(|e| {
2474 debug!("source={source} line={line} failed to format message: {e}")
2475 })
2476 {
2477 magic.push_message(msg);
2478 }
2479
2480 if let Some(mr) = opt_match_res {
2482 match &self.entry.test {
2483 Test::String(t) => {
2484 if t.has_length_mod() {
2485 let o = mr.end_offset();
2486 haystack.seek(SeekFrom::Start(o))?;
2487 }
2488 }
2489 Test::Search(t) => {
2490 if t.re_mods.contains(ReMod::StartOffsetUpdate) {
2491 let o = mr.start_offset();
2492 haystack.seek(SeekFrom::Start(o))?;
2493 } else {
2494 let o = mr.end_offset();
2495 haystack.seek(SeekFrom::Start(o))?;
2496 }
2497 }
2498
2499 Test::Regex(t) => {
2500 if t.mods.contains(ReMod::StartOffsetUpdate) {
2501 let o = mr.start_offset();
2502 haystack.seek(SeekFrom::Start(o))?;
2503 } else {
2504 let o = mr.end_offset();
2505 haystack.seek(SeekFrom::Start(o))?;
2506 }
2507 }
2508 _ => {}
2510 }
2511 }
2512
2513 if let Some(mimetype) = self.mimetype.as_ref() {
2514 magic.set_mime_type(Cow::Borrowed(mimetype));
2515 }
2516
2517 if let Some(apple_ty) = self.apple.as_ref() {
2518 magic.set_creator_code(Cow::Borrowed(apple_ty));
2519 }
2520
2521 if !self.exts.is_empty() {
2522 magic.insert_extensions(self.exts.iter().map(|s| s.as_str()));
2523 }
2524
2525 let mut strength = self.entry.test_strength;
2529
2530 let continuation_level = self.entry.continuation_level().0 as u64;
2531 if self.entry.message.is_none() && continuation_level < 3 {
2532 strength = strength.saturating_add(continuation_level);
2533 }
2534
2535 if let Some(sm) = self.strength_mod.as_ref() {
2536 strength = sm.apply(strength);
2537 }
2538
2539 if self.entry.message.is_none() {
2541 strength += 1
2542 }
2543
2544 magic.update_strength(strength);
2545
2546 let end_upper_level = haystack.lazy_stream_position();
2547
2548 let rule_base_offset = if self.root {
2556 match self.entry.offset {
2557 Offset::Direct(DirOffset::End(o)) => {
2558 Some(haystack.offset_from_start(SeekFrom::End(o)))
2559 }
2560 _ => rule_base_offset,
2561 }
2562 } else {
2563 rule_base_offset
2564 };
2565
2566 for e in self.children.iter() {
2567 nmatch = nmatch.saturating_add(e.matches(
2568 opt_source,
2569 magic,
2570 state,
2571 stream_kind,
2572 buf_base_offset,
2573 rule_base_offset,
2574 Some(end_upper_level),
2575 haystack,
2576 db,
2577 switch_endianness,
2578 depth,
2579 )?);
2580 }
2581 }
2582
2583 Ok(nmatch)
2584 }
2585}
2586
2587#[derive(Debug, Clone, Serialize, Deserialize)]
2589pub struct MagicRule {
2590 id: usize,
2591 source: Option<String>,
2592 entries: EntryNode,
2593 extensions: HashSet<String>,
2594 score: u64,
2596 finalized: bool,
2597}
2598
2599impl MagicRule {
2600 #[inline(always)]
2601 fn set_id(&mut self, id: usize) {
2602 self.id = id
2603 }
2604
2605 fn visit_all_entries(
2606 &self,
2607 deps: &HashMap<String, DependencyRule>,
2608 marked: &mut HashSet<String>,
2609 ) -> Result<EntryNodeVisitor, Error> {
2610 let mut v = EntryNodeVisitor::new();
2611 self.entries.visit(&mut v, deps, marked, 0)?;
2612 Ok(v)
2613 }
2614
2615 fn try_finalize(&mut self, deps: &HashMap<String, DependencyRule>) {
2618 if self.finalized {
2619 return;
2620 }
2621
2622 if let Ok(v) = self.visit_all_entries(deps, &mut HashSet::new()) {
2624 self.extensions.extend(v.exts);
2625 self.score = v.score;
2626 self.finalized = true
2627 }
2628 }
2629
2630 #[inline]
2631 fn magic_entrypoint<'r, R: Read + Seek>(
2632 &'r self,
2633 magic: &mut Magic<'r>,
2634 stream_kind: StreamKind,
2635 haystack: &mut LazyCache<R>,
2636 db: &'r MagicDb,
2637 switch_endianness: bool,
2638 depth: usize,
2639 ) -> Result<u64, Error> {
2640 self.entries.matches(
2641 self.source.as_deref(),
2642 magic,
2643 &mut MatchState::empty(),
2644 stream_kind,
2645 None,
2646 None,
2647 None,
2648 haystack,
2649 db,
2650 switch_endianness,
2651 depth,
2652 )
2653 }
2654
2655 #[inline]
2656 #[allow(clippy::too_many_arguments)]
2657 fn magic<'r, R: Read + Seek>(
2658 &'r self,
2659 magic: &mut Magic<'r>,
2660 stream_kind: StreamKind,
2661 buf_base_offset: Option<u64>,
2662 rule_base_offset: Option<u64>,
2663 haystack: &mut LazyCache<R>,
2664 db: &'r MagicDb,
2665 switch_endianness: bool,
2666 depth: usize,
2667 ) -> Result<u64, Error> {
2668 self.entries.matches(
2669 self.source.as_deref(),
2670 magic,
2671 &mut MatchState::empty(),
2672 stream_kind,
2673 buf_base_offset,
2674 rule_base_offset,
2675 None,
2676 haystack,
2677 db,
2678 switch_endianness,
2679 depth,
2680 )
2681 }
2682
2683 pub fn is_text(&self) -> bool {
2689 self.entries.entry.test.is_text()
2690 && self.entries.children.iter().all(|e| e.entry.test.is_text())
2691 }
2692
2693 #[inline(always)]
2699 pub fn score(&self) -> u64 {
2700 self.score
2701 }
2702
2703 #[inline(always)]
2709 pub fn source(&self) -> Option<&str> {
2710 self.source.as_deref()
2711 }
2712
2713 #[inline(always)]
2719 pub fn line(&self) -> usize {
2720 self.entries.entry.line
2721 }
2722
2723 #[inline(always)]
2729 pub fn extensions(&self) -> &HashSet<String> {
2730 &self.extensions
2731 }
2732}
2733
2734#[derive(Debug, Clone, Serialize, Deserialize)]
2735struct DependencyRule {
2736 name: String,
2737 rule: MagicRule,
2738}
2739
2740#[derive(Debug, Clone, Serialize, Deserialize)]
2746pub struct MagicSource {
2747 rules: Vec<MagicRule>,
2748 dependencies: HashMap<String, DependencyRule>,
2749}
2750
2751impl MagicSource {
2752 pub fn open<P: AsRef<Path>>(p: P) -> Result<Self, Error> {
2762 FileMagicParser::parse_file(p)
2763 }
2764}
2765
2766#[derive(Debug, Hash, PartialEq, Eq, Clone, Copy)]
2767struct ContinuationLevel(u8);
2768
2769#[derive(Debug, PartialEq, Eq, Clone, Copy)]
2771enum TextEncoding {
2772 Ascii,
2773 Utf8,
2774 Unknown,
2775}
2776
2777impl TextEncoding {
2778 const fn as_magic_str(&self) -> &'static str {
2779 match self {
2780 TextEncoding::Ascii => "ASCII",
2781 TextEncoding::Utf8 => "UTF-8",
2782 TextEncoding::Unknown => "Unknown",
2783 }
2784 }
2785}
2786
2787#[derive(Debug, PartialEq, Eq, Clone, Copy)]
2788enum StreamKind {
2789 Binary,
2790 Text(TextEncoding),
2791}
2792
2793impl StreamKind {
2794 const fn is_text(&self) -> bool {
2795 matches!(self, StreamKind::Text(_))
2796 }
2797}
2798
2799#[derive(Debug)]
2800struct MatchState {
2801 continuation_levels: [bool; 256],
2802}
2803
2804impl MatchState {
2805 #[inline(always)]
2806 fn empty() -> Self {
2807 MatchState {
2808 continuation_levels: [false; 256],
2809 }
2810 }
2811
2812 #[inline(always)]
2813 fn get_continuation_level(&mut self, level: &ContinuationLevel) -> bool {
2814 self.continuation_levels
2815 .get(level.0 as usize)
2816 .cloned()
2817 .unwrap_or_default()
2818 }
2819
2820 #[inline(always)]
2821 fn set_continuation_level(&mut self, level: ContinuationLevel) {
2822 if let Some(b) = self.continuation_levels.get_mut(level.0 as usize) {
2823 *b = true
2824 }
2825 }
2826
2827 #[inline(always)]
2828 fn clear_continuation_level(&mut self, level: &ContinuationLevel) {
2829 if let Some(b) = self.continuation_levels.get_mut(level.0 as usize) {
2830 *b = false;
2831 }
2832 }
2833}
2834
2835#[derive(Debug, Default)]
2837pub struct Magic<'m> {
2838 stream_kind: Option<StreamKind>,
2839 source: Option<Cow<'m, str>>,
2840 message: Vec<Cow<'m, str>>,
2841 mime_type: Option<Cow<'m, str>>,
2842 creator_code: Option<Cow<'m, str>>,
2843 strength: u64,
2844 exts: HashSet<Cow<'m, str>>,
2845 is_default: bool,
2846}
2847
2848impl<'m> Magic<'m> {
2849 #[inline(always)]
2850 fn set_source(&mut self, source: Option<&'m str>) {
2851 self.source = source.map(Cow::Borrowed);
2852 }
2853
2854 #[inline(always)]
2855 fn set_stream_kind(&mut self, stream_kind: StreamKind) {
2856 self.stream_kind = Some(stream_kind)
2857 }
2858
2859 #[inline(always)]
2860 fn reset(&mut self) {
2861 self.stream_kind = None;
2862 self.source = None;
2863 self.message.clear();
2864 self.mime_type = None;
2865 self.creator_code = None;
2866 self.strength = 0;
2867 self.exts.clear();
2868 self.is_default = false;
2869 }
2870
2871 #[inline]
2879 pub fn into_owned<'owned>(self) -> Magic<'owned> {
2880 Magic {
2881 stream_kind: self.stream_kind,
2882 source: self.source.map(|s| Cow::Owned(s.into_owned())),
2883 message: self
2884 .message
2885 .into_iter()
2886 .map(Cow::into_owned)
2887 .map(Cow::Owned)
2888 .collect(),
2889 mime_type: self.mime_type.map(|m| Cow::Owned(m.into_owned())),
2890 creator_code: self.creator_code.map(|m| Cow::Owned(m.into_owned())),
2891 strength: self.strength,
2892 exts: self
2893 .exts
2894 .into_iter()
2895 .map(|e| Cow::Owned(e.into_owned()))
2896 .collect(),
2897 is_default: self.is_default,
2898 }
2899 }
2900
2901 #[inline(always)]
2907 pub fn message(&self) -> String {
2908 let mut out = String::new();
2909 for (i, m) in self.message.iter().enumerate() {
2910 if let Some(s) = m.strip_prefix(r#"\b"#) {
2911 out.push_str(s);
2912 } else {
2913 if i > 0 {
2915 out.push(' ');
2916 }
2917 out.push_str(m);
2918 }
2919 }
2920 out
2921 }
2922
2923 #[inline]
2934 pub fn message_parts(&self) -> impl Iterator<Item = &str> {
2935 self.message.iter().map(|p| p.as_ref())
2936 }
2937
2938 #[inline(always)]
2939 fn update_strength(&mut self, value: u64) {
2940 self.strength = self.strength.saturating_add(value);
2941 debug!("updated strength = {:?}", self.strength)
2942 }
2943
2944 #[inline(always)]
2950 pub fn mime_type(&self) -> &str {
2951 self.mime_type.as_deref().unwrap_or(match self.stream_kind {
2952 Some(StreamKind::Text(_)) => DEFAULT_TEXT_MIMETYPE,
2953 Some(StreamKind::Binary) | None => DEFAULT_BIN_MIMETYPE,
2954 })
2955 }
2956
2957 #[inline(always)]
2958 fn push_message<'a: 'm>(&mut self, msg: Cow<'a, str>) {
2959 if !msg.is_empty() {
2960 debug!("pushing message: msg={msg} len={}", msg.len());
2961 self.message.push(msg);
2962 }
2963 }
2964
2965 #[inline(always)]
2966 fn set_mime_type<'a: 'm>(&mut self, mime: Cow<'a, str>) {
2967 if self.mime_type.is_none() {
2968 debug!("insert mime: {:?}", mime);
2969 self.mime_type = Some(mime)
2970 }
2971 }
2972
2973 #[inline(always)]
2974 fn set_creator_code<'a: 'm>(&mut self, apple_ty: Cow<'a, str>) {
2975 if self.creator_code.is_none() {
2976 debug!("insert apple type: {apple_ty:?}");
2977 self.creator_code = Some(apple_ty)
2978 }
2979 }
2980
2981 #[inline(always)]
2982 fn insert_extensions<'a: 'm, I: Iterator<Item = &'a str>>(&mut self, exts: I) {
2983 if self.exts.is_empty() {
2984 self.exts.extend(exts.filter_map(|e| {
2985 if e.is_empty() {
2986 None
2987 } else {
2988 Some(Cow::Borrowed(e))
2989 }
2990 }));
2991 }
2992 }
2993
2994 #[inline(always)]
3002 pub fn strength(&self) -> u64 {
3003 self.strength
3004 }
3005
3006 #[inline(always)]
3012 pub fn source(&self) -> Option<&str> {
3013 self.source.as_deref()
3014 }
3015
3016 #[inline(always)]
3022 pub fn creator_code(&self) -> Option<&str> {
3023 self.creator_code.as_deref()
3024 }
3025
3026 #[inline(always)]
3032 pub fn extensions(&self) -> &HashSet<Cow<'m, str>> {
3033 &self.exts
3034 }
3035
3036 #[inline(always)]
3042 pub fn is_default(&self) -> bool {
3043 self.is_default
3044 }
3045}
3046
3047#[derive(Debug, Default, Clone, Serialize, Deserialize)]
3049pub struct MagicDb {
3050 rule_id: usize,
3051 rules: Vec<MagicRule>,
3052 dependencies: HashMap<String, DependencyRule>,
3053}
3054
3055#[inline(always)]
3056fn is_likely_text(bytes: &[u8]) -> bool {
3058 const CHUNK_SIZE: usize = std::mem::size_of::<usize>();
3059
3060 if bytes.is_empty() {
3061 return false;
3062 }
3063
3064 let mut printable = 0f64;
3065 let mut high_bytes = 0f64; let (chunks, remainder) = bytes.as_chunks::<CHUNK_SIZE>();
3068
3069 macro_rules! handle_byte {
3070 ($byte: expr) => {
3071 match $byte {
3072 0x00 => return false,
3073 0x09 | 0x0A | 0x0D => printable += 1.0, 0x20..=0x7E => printable += 1.0, _ => high_bytes += 1.0,
3076 }
3077 };
3078 }
3079
3080 for bytes in chunks {
3081 for b in bytes {
3082 handle_byte!(b)
3083 }
3084 }
3085
3086 for b in remainder {
3087 handle_byte!(b)
3088 }
3089
3090 let total = bytes.len() as f64;
3091 let printable_ratio = printable / total;
3092 let high_bytes_ratio = high_bytes / total;
3093
3094 printable_ratio > 0.85 && high_bytes_ratio < 0.20
3096}
3097
3098#[inline(always)]
3099fn guess_stream_kind<S: AsRef<[u8]>>(stream: S) -> StreamKind {
3100 let buf = stream.as_ref();
3101
3102 match run_utf8_validation(buf) {
3103 Ok(is_ascii) => {
3104 if is_ascii {
3105 StreamKind::Text(TextEncoding::Ascii)
3106 } else {
3107 StreamKind::Text(TextEncoding::Utf8)
3108 }
3109 }
3110 Err(e) => {
3111 if is_likely_text(&buf[e.valid_up_to..]) {
3112 StreamKind::Text(TextEncoding::Unknown)
3113 } else {
3114 StreamKind::Binary
3115 }
3116 }
3117 }
3118}
3119
3120impl MagicDb {
3121 pub fn optimal_lazy_cache<R: Read + Seek>(f: R) -> Result<LazyCache<R>, io::Error> {
3124 Ok(LazyCache::<R>::from_read_seek(f)
3125 .and_then(|lc| lc.with_hot_cache(2 * FILE_BYTES_MAX))?)
3126 .map(|lc| lc.with_warm_cache(100 << 20))
3127 }
3128
3129 pub fn new() -> Self {
3135 Self::default()
3136 }
3137
3138 #[inline(always)]
3139 fn next_rule_id(&mut self) -> usize {
3140 let t = self.rule_id;
3141 self.rule_id += 1;
3142 t
3143 }
3144
3145 #[inline(always)]
3146 fn try_json<R: Read + Seek>(
3147 haystack: &mut LazyCache<R>,
3148 stream_kind: StreamKind,
3149 magic: &mut Magic,
3150 ) -> Result<bool, Error> {
3151 if matches!(stream_kind, StreamKind::Binary) {
3153 return Ok(false);
3154 }
3155
3156 let buf = haystack.read_range(0..FILE_BYTES_MAX as u64)?.trim_ascii();
3157
3158 let Some((start, end)) = find_json_boundaries(buf) else {
3159 return Ok(false);
3160 };
3161
3162 for c in buf[0..start].iter() {
3165 if !c.is_ascii_whitespace() {
3166 return Ok(false);
3167 }
3168 }
3169
3170 let mut is_ndjson = false;
3171
3172 trace!("maybe a json document");
3173 let ok = serde_json::from_slice::<serde_json::Value>(&buf[start..=end]).is_ok();
3174 if !ok {
3175 return Ok(false);
3176 }
3177
3178 if end + 1 < buf.len() {
3180 let buf = &buf[end + 1..];
3182 if let Some((second_start, second_end)) = find_json_boundaries(buf) {
3183 if memchr(b'\n', &buf[..second_start]).is_some() {
3185 trace!("might be ndjson");
3186 is_ndjson = serde_json::from_slice::<serde_json::Value>(
3187 &buf[second_start..=second_end],
3188 )
3189 .is_ok();
3190 }
3191 }
3192 }
3193
3194 if is_ndjson {
3195 magic.push_message(Cow::Borrowed("New Line Delimited"));
3196 magic.set_mime_type(Cow::Borrowed("application/x-ndjson"));
3197 magic.insert_extensions(["ndjson", "jsonl"].into_iter());
3198 } else {
3199 magic.set_mime_type(Cow::Borrowed("application/json"));
3200 magic.insert_extensions(["json"].into_iter());
3201 }
3202
3203 magic.push_message(Cow::Borrowed("JSON text data"));
3204 magic.set_source(Some(HARDCODED_SOURCE));
3205 magic.update_strength(HARDCODED_MAGIC_STRENGTH);
3206 Ok(true)
3207 }
3208
3209 #[inline(always)]
3210 fn try_csv<R: Read + Seek>(
3211 haystack: &mut LazyCache<R>,
3212 stream_kind: StreamKind,
3213 magic: &mut Magic,
3214 ) -> Result<bool, Error> {
3215 let StreamKind::Text(enc) = stream_kind else {
3217 return Ok(false);
3218 };
3219
3220 let buf = haystack.read_range(0..FILE_BYTES_MAX as u64)?;
3221 let mut reader = csv::Reader::from_reader(io::Cursor::new(buf));
3222 let mut records = reader.records();
3223
3224 let Some(Ok(first)) = records.next() else {
3225 return Ok(false);
3226 };
3227
3228 if first.len() <= 1 {
3232 return Ok(false);
3233 }
3234
3235 let mut n = 1;
3237 for i in records.take(9) {
3238 if let Ok(rec) = i {
3239 if first.len() != rec.len() {
3240 return Ok(false);
3241 }
3242 } else {
3243 return Ok(false);
3244 }
3245 n += 1;
3246 }
3247
3248 if n != 10 {
3250 return Ok(false);
3251 }
3252
3253 magic.set_mime_type(Cow::Borrowed("text/csv"));
3254 magic.push_message(Cow::Borrowed("CSV"));
3255 magic.push_message(Cow::Borrowed(enc.as_magic_str()));
3256 magic.push_message(Cow::Borrowed("text"));
3257 magic.insert_extensions(["csv"].into_iter());
3258 magic.set_source(Some(HARDCODED_SOURCE));
3259 magic.update_strength(HARDCODED_MAGIC_STRENGTH);
3260 Ok(true)
3261 }
3262
3263 #[inline(always)]
3264 fn try_tar<R: Read + Seek>(
3265 haystack: &mut LazyCache<R>,
3266 stream_kind: StreamKind,
3267 magic: &mut Magic,
3268 ) -> Result<bool, Error> {
3269 if !matches!(stream_kind, StreamKind::Binary) {
3271 return Ok(false);
3272 }
3273
3274 let buf = haystack.read_range(0..FILE_BYTES_MAX as u64)?;
3275 let mut ar = Archive::new(io::Cursor::new(buf));
3276
3277 let Ok(mut entries) = ar.entries() else {
3278 return Ok(false);
3279 };
3280
3281 let Some(Ok(first)) = entries.next() else {
3282 return Ok(false);
3283 };
3284
3285 let header = first.header();
3286
3287 if header.as_ustar().is_some() {
3288 magic.push_message(Cow::Borrowed("POSIX tar archive"));
3289 } else if header.as_gnu().is_some() {
3290 magic.push_message(Cow::Borrowed("POSIX tar archive (GNU)"));
3291 } else {
3292 magic.push_message(Cow::Borrowed("tar archive"));
3293 }
3294
3295 magic.set_mime_type(Cow::Borrowed("application/x-tar"));
3296 magic.set_source(Some(HARDCODED_SOURCE));
3297 magic.update_strength(HARDCODED_MAGIC_STRENGTH);
3298 magic.insert_extensions(["tar"].into_iter());
3299 Ok(true)
3300 }
3301
3302 #[inline(always)]
3303 fn try_hard_magic<R: Read + Seek>(
3304 haystack: &mut LazyCache<R>,
3305 stream_kind: StreamKind,
3306 magic: &mut Magic,
3307 ) -> Result<bool, Error> {
3308 Ok(Self::try_json(haystack, stream_kind, magic)?
3309 || Self::try_csv(haystack, stream_kind, magic)?
3310 || Self::try_tar(haystack, stream_kind, magic)?)
3311 }
3312
3313 #[inline(always)]
3314 fn magic_default<'m, R: Read + Seek>(
3315 cache: &mut LazyCache<R>,
3316 stream_kind: StreamKind,
3317 magic: &mut Magic<'m>,
3318 ) {
3319 magic.set_source(Some(HARDCODED_SOURCE));
3320 magic.set_stream_kind(stream_kind);
3321 magic.is_default = true;
3322
3323 if cache.data_size() == 0 {
3324 magic.push_message(Cow::Borrowed("empty"));
3325 magic.set_mime_type(Cow::Borrowed(DEFAULT_BIN_MIMETYPE));
3326 }
3327
3328 match stream_kind {
3329 StreamKind::Binary => {
3330 magic.push_message(Cow::Borrowed("data"));
3331 }
3332 StreamKind::Text(e) => {
3333 magic.push_message(Cow::Borrowed(e.as_magic_str()));
3334 magic.push_message(Cow::Borrowed("text"));
3335 }
3336 }
3337 }
3338
3339 pub fn load(&mut self, mf: MagicSource) -> Result<&mut Self, Error> {
3349 for rule in mf.rules.into_iter() {
3350 let mut rule = rule;
3351 rule.set_id(self.next_rule_id());
3352
3353 self.rules.push(rule);
3354 }
3355
3356 self.dependencies.extend(mf.dependencies);
3357 self.prepare();
3358 Ok(self)
3359 }
3360
3361 pub fn rules(&self) -> &[MagicRule] {
3367 &self.rules
3368 }
3369
3370 #[inline]
3371 fn first_magic_with_stream_kind<R: Read + Seek>(
3372 &self,
3373 haystack: &mut LazyCache<R>,
3374 stream_kind: StreamKind,
3375 extension: Option<&str>,
3376 ) -> Result<Magic<'_>, Error> {
3377 let mut magic = Magic::default();
3379
3380 if Self::try_hard_magic(haystack, stream_kind, &mut magic)? {
3381 return Ok(magic);
3382 }
3383
3384 let mut marked = vec![false; self.rules.len()];
3385
3386 macro_rules! do_magic {
3387 ($rule: expr) => {{
3388 $rule.magic_entrypoint(&mut magic, stream_kind, haystack, &self, false, 0)?;
3389
3390 if !magic.message.is_empty() {
3391 magic.set_stream_kind(stream_kind);
3392 magic.set_source($rule.source.as_deref());
3393 return Ok(magic);
3394 }
3395
3396 magic.reset();
3397 }};
3398 }
3399
3400 if let Some(ext) = extension.map(|e| e.to_lowercase())
3401 && !ext.is_empty()
3402 {
3403 for rule in self.rules.iter().filter(|r| r.extensions.contains(&ext)) {
3404 do_magic!(rule);
3405 if let Some(f) = marked.get_mut(rule.id) {
3406 *f = true
3407 }
3408 }
3409 }
3410
3411 for rule in self
3412 .rules
3413 .iter()
3414 .filter(|r| !*marked.get(r.id).unwrap_or(&false))
3416 {
3417 do_magic!(rule)
3418 }
3419
3420 Self::magic_default(haystack, stream_kind, &mut magic);
3421
3422 Ok(magic)
3423 }
3424
3425 pub fn first_magic<R: Read + Seek>(
3439 &self,
3440 r: &mut R,
3441 extension: Option<&str>,
3442 ) -> Result<Magic<'_>, Error> {
3443 let mut cache = Self::optimal_lazy_cache(r)?;
3444 let stream_kind = guess_stream_kind(cache.read_range(0..FILE_BYTES_MAX as u64)?);
3445 self.first_magic_with_stream_kind(&mut cache, stream_kind, extension)
3446 }
3447
3448 pub fn first_magic_with_lazy_cache<R: Read + Seek>(
3468 &self,
3469 cache: &mut LazyCache<R>,
3470 extension: Option<&str>,
3471 ) -> Result<Magic<'_>, Error> {
3472 let stream_kind = guess_stream_kind(cache.read_range(0..FILE_BYTES_MAX as u64)?);
3473 self.first_magic_with_stream_kind(cache, stream_kind, extension)
3474 }
3475
3476 #[inline(always)]
3477 fn all_magics_sort_with_stream_kind<R: Read + Seek>(
3478 &self,
3479 haystack: &mut LazyCache<R>,
3480 stream_kind: StreamKind,
3481 ) -> Result<Vec<Magic<'_>>, Error> {
3482 let mut out = Vec::new();
3483
3484 let mut magic = Magic::default();
3485
3486 if Self::try_hard_magic(haystack, stream_kind, &mut magic)? {
3487 out.push(magic);
3488 magic = Magic::default();
3489 }
3490
3491 for rule in self.rules.iter() {
3492 rule.magic_entrypoint(&mut magic, stream_kind, haystack, self, false, 0)?;
3493
3494 if !magic.message.is_empty() {
3496 magic.set_stream_kind(stream_kind);
3497 magic.set_source(rule.source.as_deref());
3498 out.push(magic);
3499 magic = Magic::default();
3500 }
3501
3502 magic.reset();
3503 }
3504
3505 Self::magic_default(haystack, stream_kind, &mut magic);
3506 out.push(magic);
3507
3508 out.sort_by_key(|b| std::cmp::Reverse(b.strength()));
3509
3510 Ok(out)
3511 }
3512
3513 pub fn all_magics<R: Read + Seek>(&self, r: &mut R) -> Result<Vec<Magic<'_>>, Error> {
3523 let mut cache = Self::optimal_lazy_cache(r)?;
3524 let stream_kind = guess_stream_kind(cache.read_range(0..FILE_BYTES_MAX as u64)?);
3525 self.all_magics_sort_with_stream_kind(&mut cache, stream_kind)
3526 }
3527
3528 pub fn all_magics_with_lazy_cache<R: Read + Seek>(
3544 &self,
3545 cache: &mut LazyCache<R>,
3546 ) -> Result<Vec<Magic<'_>>, Error> {
3547 let stream_kind = guess_stream_kind(cache.read_range(0..FILE_BYTES_MAX as u64)?);
3548 self.all_magics_sort_with_stream_kind(cache, stream_kind)
3549 }
3550
3551 #[inline(always)]
3552 fn best_magic_with_stream_kind<R: Read + Seek>(
3553 &self,
3554 haystack: &mut LazyCache<R>,
3555 stream_kind: StreamKind,
3556 ) -> Result<Magic<'_>, Error> {
3557 let magics = self.all_magics_sort_with_stream_kind(haystack, stream_kind)?;
3558
3559 Ok(magics.into_iter().next().unwrap_or_else(|| {
3562 let mut magic = Magic::default();
3563 Self::magic_default(haystack, stream_kind, &mut magic);
3564 magic
3565 }))
3566 }
3567
3568 pub fn best_magic<R: Read + Seek>(&self, r: &mut R) -> Result<Magic<'_>, Error> {
3578 let mut cache = Self::optimal_lazy_cache(r)?;
3579 let stream_kind = guess_stream_kind(cache.read_range(0..FILE_BYTES_MAX as u64)?);
3580 self.best_magic_with_stream_kind(&mut cache, stream_kind)
3581 }
3582
3583 pub fn best_magic_with_lazy_cache<R: Read + Seek>(
3599 &self,
3600 cache: &mut LazyCache<R>,
3601 ) -> Result<Magic<'_>, Error> {
3602 let stream_kind = guess_stream_kind(cache.read_range(0..FILE_BYTES_MAX as u64)?);
3603 self.best_magic_with_stream_kind(cache, stream_kind)
3604 }
3605
3606 pub fn serialize<W: Write>(self, w: &mut W) -> Result<(), Error> {
3612 let mut encoder = GzEncoder::new(w, Compression::best());
3613
3614 bincode::serde::encode_into_std_write(&self, &mut encoder, bincode::config::standard())?;
3615 encoder.finish()?;
3616 Ok(())
3617 }
3618
3619 pub fn deserialize<R: Read>(r: &mut R) -> Result<Self, Error> {
3629 let mut buf = vec![];
3630 let mut gz = GzDecoder::new(r);
3631 gz.read_to_end(&mut buf).map_err(|e| {
3632 bincode::error::DecodeError::OtherString(format!("failed to read: {e}"))
3633 })?;
3634 let (sdb, _): (MagicDb, usize) =
3635 bincode::serde::decode_from_slice(&buf, bincode::config::standard())?;
3636 Ok(sdb)
3637 }
3638
3639 #[inline(always)]
3640 fn prepare(&mut self) {
3641 self.rules
3642 .iter_mut()
3643 .for_each(|r| r.try_finalize(&self.dependencies));
3644
3645 self.rules.sort_by_key(|r| (r.is_text(), -(r.score as i64)));
3647 }
3648}
3649
3650#[cfg(test)]
3651mod tests {
3652 use std::io::Cursor;
3653
3654 use regex::bytes::Regex;
3655
3656 use crate::utils::unix_local_time_to_string;
3657
3658 use super::*;
3659
3660 macro_rules! lazy_cache {
3661 ($l: literal) => {
3662 LazyCache::from_read_seek(Cursor::new($l)).unwrap()
3663 };
3664 }
3665
3666 fn first_magic(
3667 rule: &str,
3668 content: &[u8],
3669 stream_kind: StreamKind,
3670 ) -> Result<Magic<'static>, Error> {
3671 let mut md = MagicDb::new();
3672 md.load(
3673 FileMagicParser::parse_str(rule, None)
3674 .inspect_err(|e| eprintln!("{e}"))
3675 .unwrap(),
3676 )
3677 .unwrap();
3678 let mut reader = LazyCache::from_read_seek(Cursor::new(content)).unwrap();
3679 let v = md.best_magic_with_stream_kind(&mut reader, stream_kind)?;
3680 Ok(v.into_owned())
3681 }
3682
3683 #[allow(unused_macros)]
3685 macro_rules! enable_trace {
3686 () => {
3687 tracing_subscriber::fmt()
3688 .with_max_level(tracing_subscriber::filter::LevelFilter::TRACE)
3689 .try_init();
3690 };
3691 }
3692
3693 macro_rules! parse_assert {
3694 ($rule:literal) => {
3695 FileMagicParser::parse_str($rule, None)
3696 .inspect_err(|e| eprintln!("{e}"))
3697 .unwrap();
3698 };
3699 }
3700
3701 macro_rules! assert_magic_match_bin {
3702 ($rule: literal, $content:literal) => {{ first_magic($rule, $content, StreamKind::Binary).unwrap() }};
3703 ($rule: literal, $content:literal, $message:expr) => {{
3704 assert_eq!(
3705 first_magic($rule, $content, StreamKind::Binary)
3706 .unwrap()
3707 .message(),
3708 $message
3709 );
3710 }};
3711 }
3712
3713 macro_rules! assert_magic_match_text {
3714 ($rule: literal, $content:literal) => {{ first_magic($rule, $content, StreamKind::Text(TextEncoding::Utf8)).unwrap() }};
3715 ($rule: literal, $content:literal, $message:expr) => {{
3716 assert_eq!(
3717 first_magic($rule, $content, StreamKind::Text(TextEncoding::Utf8))
3718 .unwrap()
3719 .message(),
3720 $message
3721 );
3722 }};
3723 }
3724
3725 macro_rules! assert_magic_not_match_text {
3726 ($rule: literal, $content:literal) => {{
3727 assert!(
3728 first_magic($rule, $content, StreamKind::Text(TextEncoding::Utf8))
3729 .unwrap()
3730 .is_default()
3731 );
3732 }};
3733 }
3734
3735 macro_rules! assert_magic_not_match_bin {
3736 ($rule: literal, $content:literal) => {{
3737 assert!(
3738 first_magic($rule, $content, StreamKind::Binary)
3739 .unwrap()
3740 .is_default()
3741 );
3742 }};
3743 }
3744
3745 #[test]
3746 fn test_regex() {
3747 assert_magic_match_text!(
3748 r#"
37490 regex/1024 \^#![[:space:]]*/usr/bin/env[[:space:]]+
3750!:mime text/x-shellscript
3751>&0 regex/64 .*($|\\b) %s shell script text executable
3752 "#,
3753 br#"#!/usr/bin/env bash
3754 echo hello world"#,
3755 "bash shell script text executable"
3757 );
3758
3759 let re = Regex::new(r"(?-u)\x42\x82").unwrap();
3760 assert!(re.is_match(b"\x42\x82"));
3761
3762 assert_magic_match_bin!(
3763 r#"0 regex \x42\x82 binary regex match"#,
3764 b"\x00\x00\x00\x00\x00\x00\x42\x82"
3765 );
3766
3767 assert_magic_match_bin!(
3769 r#"
3770 0 regex \x42\x82
3771 >&0 string \xde\xad\xbe\xef it works
3772 "#,
3773 b"\x00\x00\x00\x00\x00\x00\x42\x82\xde\xad\xbe\xef"
3774 );
3775
3776 assert_magic_match_bin!(
3777 r#"
3778 0 regex/s \x42\x82
3779 >&0 string \x42\x82\xde\xad\xbe\xef it works
3780 "#,
3781 b"\x00\x00\x00\x00\x00\x00\x42\x82\xde\xad\xbe\xef"
3782 );
3783
3784 assert_magic_match_text!(
3786 r#"
37870 regex/1024 \^HelloWorld$ HelloWorld String"#,
3788 br#"
3789// this is a comment after an empty line
3790HelloWorld
3791 "#
3792 );
3793 }
3794
3795 #[test]
3796 fn test_string_with_mods() {
3797 assert_magic_match_text!(
3798 r#"0 string/w #!\ \ \ /usr/bin/env\ bash BASH
3799 "#,
3800 b"#! /usr/bin/env bash i
3801 echo hello world"
3802 );
3803
3804 assert_magic_match_text!(
3806 r#"0 string/C HelloWorld it works
3807 "#,
3808 b"helloworld"
3809 );
3810
3811 assert_magic_not_match_text!(
3812 r#"0 string/C HelloWorld it works
3813 "#,
3814 b"hELLOwORLD"
3815 );
3816
3817 assert_magic_match_text!(
3819 r#"0 string/c HelloWorld it works
3820 "#,
3821 b"HELLOWORLD"
3822 );
3823
3824 assert_magic_not_match_text!(
3825 r#"0 string/c HelloWorld it works
3826 "#,
3827 b"helloworld"
3828 );
3829
3830 assert_magic_match_text!(
3832 r#"0 string/f #!/usr/bin/env\ bash BASH
3833 "#,
3834 b"#!/usr/bin/env bash"
3835 );
3836
3837 assert_magic_not_match_text!(
3838 r#"0 string/f #!/usr/bin/python PYTHON"#,
3839 b"#!/usr/bin/pythonic"
3840 );
3841
3842 assert_magic_match_text!(
3844 r#"0 string/W #!/usr/bin/env\ python PYTHON"#,
3845 b"#!/usr/bin/env python"
3846 );
3847
3848 assert_magic_not_match_text!(
3849 r#"0 string/W #!/usr/bin/env\ \ python PYTHON"#,
3850 b"#!/usr/bin/env python"
3851 );
3852 }
3853
3854 #[test]
3855 fn test_search_with_mods() {
3856 assert_magic_match_text!(
3857 r#"0 search/1/fwt #!\ /usr/bin/luatex LuaTex script text executable"#,
3858 b"#! /usr/bin/luatex "
3859 );
3860
3861 assert_magic_match_text!(
3863 r#"
3864 0 search/s /usr/bin/env
3865 >&0 string /usr/bin/env it works
3866 "#,
3867 b"#!/usr/bin/env python"
3868 );
3869
3870 assert_magic_not_match_text!(
3871 r#"
3872 0 search /usr/bin/env
3873 >&0 string /usr/bin/env it works
3874 "#,
3875 b"#!/usr/bin/env python"
3876 );
3877 }
3878
3879 #[test]
3880 fn test_pstring() {
3881 assert_magic_match_bin!(r#"0 pstring Toast it works"#, b"\x05Toast");
3882
3883 assert_magic_match_bin!(r#"0 pstring Toast %s"#, b"\x05Toast", "Toast");
3884
3885 assert_magic_not_match_bin!(r#"0 pstring Toast Doesn't work"#, b"\x07Toaster");
3886
3887 assert_magic_match_bin!(r#"0 pstring/H Toast it works"#, b"\x00\x05Toast");
3889
3890 assert_magic_match_bin!(r#"0 pstring/HJ Toast it works"#, b"\x00\x07Toast");
3891
3892 assert_magic_match_bin!(r#"0 pstring/HJ Toast %s"#, b"\x00\x07Toast", "Toast");
3893
3894 assert_magic_match_bin!(r#"0 pstring/h Toast it works"#, b"\x05\x00Toast");
3895
3896 assert_magic_match_bin!(r#"0 pstring/hJ Toast it works"#, b"\x07\x00Toast");
3897
3898 assert_magic_match_bin!(r#"0 pstring/L Toast it works"#, b"\x00\x00\x00\x05Toast");
3899
3900 assert_magic_match_bin!(r#"0 pstring/LJ Toast it works"#, b"\x00\x00\x00\x09Toast");
3901
3902 assert_magic_match_bin!(r#"0 pstring/l Toast it works"#, b"\x05\x00\x00\x00Toast");
3903
3904 assert_magic_match_bin!(r#"0 pstring/lJ Toast it works"#, b"\x09\x00\x00\x00Toast");
3905 }
3906
3907 #[test]
3908 fn test_max_recursion() {
3909 let res = first_magic(
3910 r#"0 indirect x"#,
3911 b"#! /usr/bin/luatex ",
3912 StreamKind::Binary,
3913 );
3914 assert!(res.is_err());
3915 let _ = res.inspect_err(|e| {
3916 assert!(matches!(
3917 e.unwrap_localized(),
3918 Error::MaximumRecursion(MAX_RECURSION)
3919 ))
3920 });
3921 }
3922
3923 #[test]
3924 fn test_string_ops() {
3925 assert_magic_match_text!("0 string/b MZ MZ File", b"MZ\0");
3926 assert_magic_match_text!("0 string !MZ Not MZ File", b"AZ\0");
3927 assert_magic_match_text!("0 string >\0 Any String", b"A\0");
3928 assert_magic_match_text!("0 string >Test Any String", b"Test 1\0");
3929 assert_magic_match_text!("0 string <Test Any String", b"\0");
3930 assert_magic_not_match_text!("0 string >Test Any String", b"\0");
3931 }
3932
3933 #[test]
3934 fn test_lestring16() {
3935 assert_magic_match_bin!(
3936 "0 lestring16 abcd Little-endian UTF-16 string",
3937 b"\x61\x00\x62\x00\x63\x00\x64\x00"
3938 );
3939 assert_magic_match_bin!(
3940 "0 lestring16 x %s",
3941 b"\x61\x00\x62\x00\x63\x00\x64\x00\x00",
3942 "abcd"
3943 );
3944 assert_magic_not_match_bin!(
3945 "0 lestring16 abcd Little-endian UTF-16 string",
3946 b"\x00\x61\x00\x62\x00\x63\x00\x64"
3947 );
3948 assert_magic_match_bin!(
3949 "4 lestring16 abcd Little-endian UTF-16 string",
3950 b"\x00\x00\x00\x00\x61\x00\x62\x00\x63\x00\x64\x00"
3951 );
3952 }
3953
3954 #[test]
3955 fn test_bestring16() {
3956 assert_magic_match_bin!(
3957 "0 bestring16 abcd Big-endian UTF-16 string",
3958 b"\x00\x61\x00\x62\x00\x63\x00\x64"
3959 );
3960 assert_magic_match_bin!(
3961 "0 bestring16 x %s",
3962 b"\x00\x61\x00\x62\x00\x63\x00\x64",
3963 "abcd"
3964 );
3965 assert_magic_not_match_bin!(
3966 "0 bestring16 abcd Big-endian UTF-16 string",
3967 b"\x61\x00\x62\x00\x63\x00\x64\x00"
3968 );
3969 assert_magic_match_bin!(
3970 "4 bestring16 abcd Big-endian UTF-16 string",
3971 b"\x00\x00\x00\x00\x00\x61\x00\x62\x00\x63\x00\x64"
3972 );
3973 }
3974
3975 #[test]
3976 fn test_offset_from_end() {
3977 assert_magic_match_bin!("-1 ubyte 0x42 last byte ok", b"\x00\x00\x42");
3978 assert_magic_match_bin!("-2 ubyte 0x41 last byte ok", b"\x00\x41\x00");
3979 }
3980
3981 #[test]
3982 fn test_relative_offset() {
3983 assert_magic_match_bin!(
3984 "
3985 0 ubyte 0x42
3986 >&0 ubyte 0x00
3987 >>&0 ubyte 0x41 third byte ok
3988 ",
3989 b"\x42\x00\x41\x00"
3990 );
3991 }
3992
3993 #[test]
3994 fn test_indirect_offset() {
3995 assert_magic_match_bin!("(0.l) ubyte 0x42 it works", b"\x04\x00\x00\x00\x42");
3996 assert_magic_match_bin!("(0.l+3) ubyte 0x42 it works", b"\x01\x00\x00\x00\x42");
3998 assert_magic_match_bin!(
4000 "(0.l+(4)) ubyte 0x42 it works",
4001 b"\x04\x00\x00\x00\x04\x00\x00\x00\x42"
4002 );
4003 }
4004
4005 #[test]
4006 fn test_use_with_message() {
4007 assert_magic_match_bin!(
4008 r#"
40090 string MZ
4010>0 use mz first match
4011
40120 name mz then second match
4013>0 string MZ
4014"#,
4015 b"MZ\0",
4016 "first match then second match"
4017 );
4018 }
4019
4020 #[test]
4021 fn test_scalar_transform() {
4022 assert_magic_match_bin!("0 ubyte+1 0x1 add works", b"\x00");
4023 assert_magic_match_bin!("0 ubyte-1 0xfe sub works", b"\xff");
4024 assert_magic_match_bin!("0 ubyte%2 0 mod works", b"\x0a");
4025 assert_magic_match_bin!("0 ubyte&0x0f 0x0f bitand works", b"\xff");
4026 assert_magic_match_bin!("0 ubyte|0x0f 0xff bitor works", b"\xf0");
4027 assert_magic_match_bin!("0 ubyte^0x0f 0xf0 bitxor works", b"\xff");
4028
4029 FileMagicParser::parse_str("0 ubyte%0 mod by zero", None)
4030 .expect_err("expect div by zero error");
4031 FileMagicParser::parse_str("0 ubyte/0 div by zero", None)
4032 .expect_err("expect div by zero error");
4033 }
4034
4035 #[test]
4036 fn test_belong() {
4037 assert_magic_match_bin!("0 belong 0x12345678 Big-endian long", b"\x12\x34\x56\x78");
4039 assert_magic_not_match_bin!("0 belong 0x12345678 Big-endian long", b"\x78\x56\x34\x12");
4041 assert_magic_match_bin!(
4043 "4 belong 0x12345678 Big-endian long",
4044 b"\x00\x00\x00\x00\x12\x34\x56\x78"
4045 );
4046 assert_magic_match_bin!("0 belong <0x12345678 Big-endian long", b"\x12\x34\x56\x77");
4048 assert_magic_not_match_bin!("0 belong <0x12345678 Big-endian long", b"\x12\x34\x56\x78");
4049
4050 assert_magic_match_bin!("0 belong >0x12345678 Big-endian long", b"\x12\x34\x56\x79");
4052 assert_magic_not_match_bin!("0 belong >0x12345678 Big-endian long", b"\x12\x34\x56\x78");
4053
4054 assert_magic_match_bin!("0 belong &0x5678 Big-endian long", b"\x00\x00\x56\x78");
4056 assert_magic_not_match_bin!("0 belong &0x0000FFFF Big-endian long", b"\x12\x34\x56\x78");
4057
4058 assert_magic_match_bin!("0 belong ^0xFFFF0000 Big-endian long", b"\x00\x00\x56\x78");
4060 assert_magic_not_match_bin!("0 belong ^0xFFFF0000 Big-endian long", b"\x00\x01\x56\x78");
4061
4062 assert_magic_match_bin!("0 belong ~0x12345678 Big-endian long", b"\xed\xcb\xa9\x87");
4064 assert_magic_not_match_bin!("0 belong ~0x12345678 Big-endian long", b"\x12\x34\x56\x78");
4065
4066 assert_magic_match_bin!("0 belong x Big-endian long", b"\x12\x34\x56\x78");
4068 assert_magic_match_bin!("0 belong x Big-endian long", b"\x78\x56\x34\x12");
4069 }
4070
4071 #[test]
4072 fn test_parse_search() {
4073 parse_assert!("0 search test");
4074 parse_assert!("0 search/24/s test");
4075 parse_assert!("0 search/s/24 test");
4076 }
4077
4078 #[test]
4079 fn test_bedate() {
4080 assert_magic_match_bin!(
4081 "0 bedate 946684800 Unix date (Jan 1, 2000)",
4082 b"\x38\x6D\x43\x80"
4083 );
4084 assert_magic_not_match_bin!(
4085 "0 bedate 946684800 Unix date (Jan 1, 2000)",
4086 b"\x00\x00\x00\x00"
4087 );
4088 assert_magic_match_bin!(
4089 "4 bedate 946684800 %s",
4090 b"\x00\x00\x00\x00\x38\x6D\x43\x80",
4091 "2000-01-01 00:00:00"
4092 );
4093 }
4094 #[test]
4095 fn test_beldate() {
4096 assert_magic_match_bin!(
4097 "0 beldate 946684800 Local date (Jan 1, 2000)",
4098 b"\x38\x6D\x43\x80"
4099 );
4100 assert_magic_not_match_bin!(
4101 "0 beldate 946684800 Local date (Jan 1, 2000)",
4102 b"\x00\x00\x00\x00"
4103 );
4104
4105 assert_magic_match_bin!(
4106 "4 beldate 946684800 {}",
4107 b"\x00\x00\x00\x00\x38\x6D\x43\x80",
4108 unix_local_time_to_string(946684800)
4109 );
4110 }
4111
4112 #[test]
4113 fn test_beqdate() {
4114 assert_magic_match_bin!(
4115 "0 beqdate 946684800 Unix date (Jan 1, 2000)",
4116 b"\x00\x00\x00\x00\x38\x6D\x43\x80"
4117 );
4118
4119 assert_magic_not_match_bin!(
4120 "0 beqdate 946684800 Unix date (Jan 1, 2000)",
4121 b"\x00\x00\x00\x00\x00\x00\x00\x00"
4122 );
4123
4124 assert_magic_match_bin!(
4125 "0 beqdate 946684800 %s",
4126 b"\x00\x00\x00\x00\x38\x6D\x43\x80",
4127 "2000-01-01 00:00:00"
4128 );
4129 }
4130
4131 #[test]
4132 fn test_medate() {
4133 assert_magic_match_bin!(
4134 "0 medate 946684800 Unix date (Jan 1, 2000)",
4135 b"\x6D\x38\x80\x43"
4136 );
4137
4138 assert_magic_not_match_bin!(
4139 "0 medate 946684800 Unix date (Jan 1, 2000)",
4140 b"\x00\x00\x00\x00"
4141 );
4142
4143 assert_magic_match_bin!(
4144 "4 medate 946684800 %s",
4145 b"\x00\x00\x00\x00\x6D\x38\x80\x43",
4146 "2000-01-01 00:00:00"
4147 );
4148 }
4149
4150 #[test]
4151 fn test_meldate() {
4152 assert_magic_match_bin!(
4153 "0 meldate 946684800 Local date (Jan 1, 2000)",
4154 b"\x6D\x38\x80\x43"
4155 );
4156 assert_magic_not_match_bin!(
4157 "0 meldate 946684800 Local date (Jan 1, 2000)",
4158 b"\x00\x00\x00\x00"
4159 );
4160
4161 assert_magic_match_bin!(
4162 "4 meldate 946684800 %s",
4163 b"\x00\x00\x00\x00\x6D\x38\x80\x43",
4164 unix_local_time_to_string(946684800)
4165 );
4166 }
4167
4168 #[test]
4169 fn test_date() {
4170 assert_magic_match_bin!(
4171 "0 date 946684800 Local date (Jan 1, 2000)",
4172 b"\x80\x43\x6D\x38"
4173 );
4174 assert_magic_not_match_bin!(
4175 "0 date 946684800 Local date (Jan 1, 2000)",
4176 b"\x00\x00\x00\x00"
4177 );
4178 assert_magic_match_bin!(
4179 "4 date 946684800 {}",
4180 b"\x00\x00\x00\x00\x80\x43\x6D\x38",
4181 "2000-01-01 00:00:00"
4182 );
4183 }
4184
4185 #[test]
4186 fn test_leldate() {
4187 assert_magic_match_bin!(
4188 "0 leldate 946684800 Local date (Jan 1, 2000)",
4189 b"\x80\x43\x6D\x38"
4190 );
4191 assert_magic_not_match_bin!(
4192 "0 leldate 946684800 Local date (Jan 1, 2000)",
4193 b"\x00\x00\x00\x00"
4194 );
4195 assert_magic_match_bin!(
4196 "4 leldate 946684800 {}",
4197 b"\x00\x00\x00\x00\x80\x43\x6D\x38",
4198 unix_local_time_to_string(946684800)
4199 );
4200 }
4201
4202 #[test]
4203 fn test_leqdate() {
4204 assert_magic_match_bin!(
4205 "0 leqdate 1577836800 Unix date (Jan 1, 2020)",
4206 b"\x00\xe1\x0b\x5E\x00\x00\x00\x00"
4207 );
4208
4209 assert_magic_not_match_bin!(
4210 "0 leqdate 1577836800 Unix date (Jan 1, 2020)",
4211 b"\x00\x00\x00\x00\x00\x00\x00\x00"
4212 );
4213 assert_magic_match_bin!(
4214 "8 leqdate 1577836800 %s",
4215 b"\x00\x00\x00\x00\x00\x00\x00\x00\x00\xE1\x0B\x5E\x00\x00\x00\x00",
4216 "2020-01-01 00:00:00"
4217 );
4218 }
4219
4220 #[test]
4221 fn test_leqldate() {
4222 assert_magic_match_bin!(
4223 "0 leqldate 1577836800 Unix date (Jan 1, 2020)",
4224 b"\x00\xe1\x0b\x5E\x00\x00\x00\x00"
4225 );
4226
4227 assert_magic_not_match_bin!(
4228 "0 leqldate 1577836800 Unix date (Jan 1, 2020)",
4229 b"\x00\x00\x00\x00\x00\x00\x00\x00"
4230 );
4231 assert_magic_match_bin!(
4232 "8 leqldate 1577836800 %s",
4233 b"\x00\x00\x00\x00\x00\x00\x00\x00\x00\xE1\x0B\x5E\x00\x00\x00\x00",
4234 unix_local_time_to_string(1577836800)
4235 );
4236 }
4237
4238 #[test]
4239 fn test_melong() {
4240 assert_magic_match_bin!(
4242 "0 melong =0x12345678 Middle-endian long",
4243 b"\x34\x12\x78\x56"
4244 );
4245 assert_magic_not_match_bin!(
4246 "0 melong =0x12345678 Middle-endian long",
4247 b"\x00\x00\x00\x00"
4248 );
4249
4250 assert_magic_match_bin!(
4252 "0 melong <0x12345678 Middle-endian long",
4253 b"\x34\x12\x78\x55"
4254 ); assert_magic_not_match_bin!(
4256 "0 melong <0x12345678 Middle-endian long",
4257 b"\x34\x12\x78\x56"
4258 ); assert_magic_match_bin!(
4262 "0 melong >0x12345678 Middle-endian long",
4263 b"\x34\x12\x78\x57"
4264 ); assert_magic_not_match_bin!(
4266 "0 melong >0x12345678 Middle-endian long",
4267 b"\x34\x12\x78\x56"
4268 ); assert_magic_match_bin!("0 melong &0x5678 Middle-endian long", b"\xab\xcd\x78\x56"); assert_magic_not_match_bin!(
4273 "0 melong &0x0000FFFF Middle-endian long",
4274 b"\x34\x12\x78\x56"
4275 ); assert_magic_match_bin!(
4279 "0 melong ^0xFFFF0000 Middle-endian long",
4280 b"\x00\x00\x78\x56"
4281 ); assert_magic_not_match_bin!(
4283 "0 melong ^0xFFFF0000 Middle-endian long",
4284 b"\x00\x01\x78\x56"
4285 ); assert_magic_match_bin!(
4289 "0 melong ~0x12345678 Middle-endian long",
4290 b"\xCB\xED\x87\xA9"
4291 );
4292 assert_magic_not_match_bin!(
4293 "0 melong ~0x12345678 Middle-endian long",
4294 b"\x34\x12\x78\x56"
4295 ); assert_magic_match_bin!("0 melong x Middle-endian long", b"\x34\x12\x78\x56");
4299 assert_magic_match_bin!("0 melong x Middle-endian long", b"\x00\x00\x00\x00");
4300 }
4301
4302 #[test]
4303 fn test_uquad() {
4304 assert_magic_match_bin!(
4306 "0 uquad =0x123456789ABCDEF0 Unsigned quad",
4307 b"\xF0\xDE\xBC\x9A\x78\x56\x34\x12"
4308 );
4309 assert_magic_not_match_bin!(
4310 "0 uquad =0x123456789ABCDEF0 Unsigned quad",
4311 b"\x00\x00\x00\x00\x00\x00\x00\x00"
4312 );
4313
4314 assert_magic_match_bin!(
4316 "0 uquad <0x123456789ABCDEF0 Unsigned quad",
4317 b"\xF0\xDE\xBC\x9A\x78\x56\x34\x11"
4318 );
4319 assert_magic_not_match_bin!(
4320 "0 uquad <0x123456789ABCDEF0 Unsigned quad",
4321 b"\xF0\xDE\xBC\x9A\x78\x56\x34\x12"
4322 );
4323
4324 assert_magic_match_bin!(
4326 "0 uquad >0x123456789ABCDEF0 Unsigned quad",
4327 b"\xF0\xDE\xBC\x9A\x78\x56\x34\x13"
4328 );
4329 assert_magic_not_match_bin!(
4330 "0 uquad >0x123456789ABCDEF0 Unsigned quad",
4331 b"\xF0\xDE\xBC\x9A\x78\x56\x34\x12"
4332 );
4333
4334 assert_magic_match_bin!(
4336 "0 uquad &0xF0 Unsigned quad",
4337 b"\xF0\xDE\xBC\x9A\x78\x56\x34\x12"
4338 );
4339 assert_magic_not_match_bin!(
4340 "0 uquad &0xFF Unsigned quad",
4341 b"\xF0\xDE\xBC\x9A\x78\x56\x34\x12"
4342 );
4343
4344 assert_magic_match_bin!(
4346 "0 uquad ^0xFFFFFFFFFFFFFFFF Unsigned quad",
4347 b"\x00\x00\x00\x00\x00\x00\x00\x00"
4348 ); assert_magic_not_match_bin!(
4350 "0 uquad ^0xFFFFFFFFFFFFFFFF Unsigned quad",
4351 b"\xF0\xDE\xBC\x9A\x78\x56\x34\x12"
4352 ); assert_magic_match_bin!(
4356 "0 uquad ~0x123456789ABCDEF0 Unsigned quad",
4357 b"\x0F\x21\x43\x65\x87\xA9\xCB\xED"
4358 );
4359 assert_magic_not_match_bin!(
4360 "0 uquad ~0x123456789ABCDEF0 Unsigned quad",
4361 b"\xF0\xDE\xBC\x9A\x78\x56\x34\x12"
4362 ); assert_magic_match_bin!(
4366 "0 uquad x {:#x}",
4367 b"\xF0\xDE\xBC\x9A\x78\x56\x34\x12",
4368 "0x123456789abcdef0"
4369 );
4370 assert_magic_match_bin!(
4371 "0 uquad x Unsigned quad",
4372 b"\x00\x00\x00\x00\x00\x00\x00\x00"
4373 );
4374 }
4375
4376 #[test]
4377 fn test_guid() {
4378 assert_magic_match_bin!(
4379 "0 guid EC959539-6786-2D4E-8FDB-98814CE76C1E It works",
4380 b"\xEC\x95\x95\x39\x67\x86\x2D\x4E\x8F\xDB\x98\x81\x4C\xE7\x6C\x1E"
4381 );
4382
4383 assert_magic_not_match_bin!(
4384 "0 guid 399595EC-8667-4E2D-8FDB-98814CE76C1E It works",
4385 b"\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F"
4386 );
4387
4388 assert_magic_match_bin!(
4389 "0 guid x %s",
4390 b"\xEC\x95\x95\x39\x67\x86\x2D\x4E\x8F\xDB\x98\x81\x4C\xE7\x6C\x1E",
4391 "EC959539-6786-2D4E-8FDB-98814CE76C1E"
4392 );
4393 }
4394
4395 #[test]
4396 fn test_ubeqdate() {
4397 assert_magic_match_bin!(
4398 "0 ubeqdate 1633046400 It works",
4399 b"\x00\x00\x00\x00\x61\x56\x4f\x80"
4400 );
4401
4402 assert_magic_match_bin!(
4403 "0 ubeqdate x %s",
4404 b"\x00\x00\x00\x00\x61\x56\x4f\x80",
4405 "2021-10-01 00:00:00"
4406 );
4407
4408 assert_magic_not_match_bin!(
4409 "0 ubeqdate 1633046400 It should not work",
4410 b"\x00\x00\x00\x00\x00\x00\x00\x00"
4411 );
4412 }
4413
4414 #[test]
4415 fn test_ldate() {
4416 assert_magic_match_bin!("0 ldate 1640551520 It works", b"\x60\xd4\xC8\x61");
4417
4418 assert_magic_not_match_bin!("0 ldate 1633046400 It should not work", b"\x00\x00\x00\x00");
4419
4420 assert_magic_match_bin!(
4421 "0 ldate x %s",
4422 b"\x60\xd4\xC8\x61",
4423 unix_local_time_to_string(1640551520)
4424 );
4425 }
4426
4427 #[test]
4428 fn test_scalar_with_transform() {
4429 assert_magic_match_bin!("0 ubyte/10 2 {}", b"\x14", "2");
4430 assert_magic_match_bin!("0 ubyte/10 x {}", b"\x14", "2");
4431 assert_magic_match_bin!("0 ubyte%10 x {}", b"\x14", "0");
4432 }
4433
4434 #[test]
4435 fn test_float_with_transform() {
4436 assert_magic_match_bin!("0 lefloat/10 2 {}", b"\x00\x00\xa0\x41", "2");
4437 assert_magic_match_bin!("0 lefloat/10 x {}", b"\x00\x00\xa0\x41", "2");
4438 assert_magic_match_bin!("0 lefloat%10 x {}", b"\x00\x00\xa0\x41", "0");
4439 }
4440
4441 #[test]
4442 fn test_read_octal() {
4443 assert_eq!(read_octal_u64(&mut lazy_cache!("0")), Some(0));
4445 assert_eq!(read_octal_u64(&mut lazy_cache!("00")), Some(0));
4446 assert_eq!(read_octal_u64(&mut lazy_cache!("01")), Some(1));
4447 assert_eq!(read_octal_u64(&mut lazy_cache!("07")), Some(7));
4448 assert_eq!(read_octal_u64(&mut lazy_cache!("010")), Some(8));
4449 assert_eq!(read_octal_u64(&mut lazy_cache!("0123")), Some(83));
4450 assert_eq!(read_octal_u64(&mut lazy_cache!("0755")), Some(493));
4451
4452 assert_eq!(read_octal_u64(&mut lazy_cache!("0ABC")), Some(0));
4454 assert_eq!(read_octal_u64(&mut lazy_cache!("01ABC")), Some(1));
4455 assert_eq!(read_octal_u64(&mut lazy_cache!("0755ABC")), Some(493));
4456 assert_eq!(read_octal_u64(&mut lazy_cache!("0123ABC")), Some(83));
4457
4458 assert_eq!(read_octal_u64(&mut lazy_cache!("08")), Some(0)); assert_eq!(read_octal_u64(&mut lazy_cache!("01238")), Some(83)); assert_eq!(read_octal_u64(&mut lazy_cache!("123")), None);
4464 assert_eq!(read_octal_u64(&mut lazy_cache!("755")), None);
4465
4466 assert_eq!(read_octal_u64(&mut lazy_cache!("")), None);
4468
4469 assert_eq!(read_octal_u64(&mut lazy_cache!("ABC")), None);
4471 assert_eq!(read_octal_u64(&mut lazy_cache!("8ABC")), None); assert_eq!(
4475 read_octal_u64(&mut lazy_cache!("01777777777")),
4476 Some(268435455)
4477 );
4478 }
4479
4480 #[test]
4481 fn test_offset_bug_1() {
4482 assert_magic_match_bin!(
4485 r"
44861 string TEST Bread is
4487# offset computation is relative to
4488# rule start
4489>(5.b) use toasted
4490
44910 name toasted
4492>0 string twice Toasted
4493>>0 use toasted_twice
4494
44950 name toasted_twice
4496>(6.b) string x %s
4497 ",
4498 b"\x00TEST\x06twice\x00\x06",
4499 "Bread is Toasted twice"
4500 );
4501 }
4502
4503 #[test]
4509 fn test_offset_bug_2() {
4510 assert_magic_match_bin!(
4513 r"
4514-12 string TEST Bread is
4515>(4.b) use toasted
4516
45170 name toasted
4518>0 string twice Toasted
4519>>0 use toasted_twice
4520
45210 name toasted_twice
4522>(6.b) string x %
4523 ",
4524 b"\x00TEST\x06twice\x00\x06",
4525 "Bread is Toasted twice"
4526 )
4527 }
4528
4529 #[test]
4530 fn test_offset_bug_3() {
4531 assert_magic_match_bin!(
4534 r"
45351 string TEST Bread is
4536>(5.b) indirect/r x
4537
45380 string twice Toasted
4539>0 use toasted_twice
4540
45410 name toasted_twice
4542>0 string x %s
4543 ",
4544 b"\x00TEST\x06twice\x00\x08",
4545 "Bread is Toasted twice"
4546 )
4547 }
4548
4549 #[test]
4550 fn test_offset_bug_4() {
4551 assert_magic_match_bin!(
4554 r"
45551 string Bread %s
4556>(6.b) indirect/r x
4557
4558# this one uses a based offset
4559# computed at indirection
45601 string is\ Toasted %s
4561>(11.b) use toasted_twice
4562
4563# this one is using a new base
4564# offset being previous base
4565# offset + offset of use
45660 name toasted_twice
4567>0 string x %s
4568 ",
4569 b"\x00Bread\x06is Toasted\x0ctwice\x00",
4570 "Bread is Toasted twice"
4571 )
4572 }
4573
4574 #[test]
4575 fn test_offset_bug_5() {
4576 assert_magic_match_bin!(
4577 r"
45781 string TEST Bread is
4579>(5.b) indirect/r x
4580
45810 string twice Toasted
4582>0 use toasted_twice
4583
45840 name toasted_twice
4585>0 string twice
4586>>&1 byte 0x08 twice
4587 ",
4588 b"\x00TEST\x06twice\x00\x08",
4589 "Bread is Toasted twice"
4590 )
4591 }
4592
4593 #[test]
4594 fn test_message_parts() {
4595 let m = first_magic(
4596 r#"0 string/W #!/usr/bin/env\ python PYTHON"#,
4597 b"#!/usr/bin/env python",
4598 StreamKind::Text(TextEncoding::Ascii),
4599 )
4600 .unwrap();
4601
4602 assert!(m.message_parts().any(|p| p.eq_ignore_ascii_case("python")))
4603 }
4604}