1#![forbid(unsafe_code)]
2#![deny(unused_imports)]
3use dyf::{DynDisplay, FormatString, dformat};
143use flagset::{FlagSet, flags};
144use flate2::{Compression, read::GzDecoder, write::GzEncoder};
145use lazy_cache::LazyCache;
146use memchr::memchr;
147use pest::{Span, error::ErrorVariant};
148use regex::bytes::{self};
149use serde::{Deserialize, Serialize};
150use std::{
151 borrow::Cow,
152 cmp::max,
153 collections::{HashMap, HashSet},
154 fmt::{self, Debug, Display},
155 io::{self, Read, Seek, SeekFrom, Write},
156 ops::{Add, BitAnd, BitOr, BitXor, Div, Mul, Rem, Sub},
157 path::Path,
158};
159use tar::Archive;
160use thiserror::Error;
161use tracing::{Level, debug, enabled, trace};
162
163use crate::{
164 numeric::{Float, FloatDataType, Scalar, ScalarDataType},
165 parser::{FileMagicParser, Rule},
166 utils::{decode_id3, find_json_boundaries, run_utf8_validation},
167};
168
169mod numeric;
170mod parser;
171mod utils;
172
173const HARDCODED_MAGIC_STRENGTH: u64 = 2048;
174const HARDCODED_SOURCE: &str = "hardcoded";
175const MAX_RECURSION: usize = 50;
177pub const FILE_BYTES_MAX: usize = 7 * 1024 * 1024;
179const FILE_REGEX_MAX: usize = 8192;
181
182pub const DEFAULT_BIN_MIMETYPE: &str = "application/octet-stream";
183pub const DEFAULT_TEXT_MIMETYPE: &str = "text/plain";
184
185pub(crate) const TIMESTAMP_FORMAT: &str = "%Y-%m-%d %H:%M:%S";
186
187macro_rules! debug_panic {
188 ($($arg:tt)*) => {
189 if cfg!(debug_assertions) {
190 panic!($($arg)*);
191 }
192 };
193}
194
195macro_rules! read {
196 ($r: expr, $ty: ty) => {{
197 let mut a = [0u8; std::mem::size_of::<$ty>()];
198 $r.read_exact(&mut a)?;
199 a
200 }};
201}
202
203macro_rules! read_le {
204 ($r:expr, $ty: ty ) => {{ <$ty>::from_le_bytes(read!($r, $ty)) }};
205}
206
207macro_rules! read_be {
208 ($r:expr, $ty: ty ) => {{ <$ty>::from_be_bytes(read!($r, $ty)) }};
209}
210
211macro_rules! read_me {
212 ($r: expr) => {{ ((read_le!($r, u16) as i32) << 16) | (read_le!($r, u16) as i32) }};
213}
214
215#[inline(always)]
216fn read_octal_u64<R: Read + Seek>(haystack: &mut LazyCache<R>) -> Option<u64> {
217 let s = haystack
218 .read_while_or_limit(|b| matches!(b, b'0'..=b'7'), 22)
219 .map(|buf| str::from_utf8(buf))
220 .ok()?
221 .ok()?;
222
223 if !s.starts_with("0") {
224 return None;
225 }
226
227 u64::from_str_radix(s, 8).ok()
228}
229
230#[derive(Debug, Error)]
232pub enum Error {
233 #[error("{0}")]
235 Msg(String),
236
237 #[error("source={0} line={1} error={2}")]
239 Localized(String, usize, Box<Error>),
240
241 #[error("missing rule: {0}")]
243 MissingRule(String),
244
245 #[error("maximum recursion reached: {0}")]
247 MaximumRecursion(usize),
248
249 #[error("io: {0}")]
251 Io(#[from] io::Error),
252
253 #[error("parser error: {0}")]
255 Parse(#[from] Box<pest::error::Error<Rule>>),
256
257 #[error("formatting: {0}")]
259 Format(#[from] dyf::Error),
260
261 #[error("regex: {0}")]
263 Regex(#[from] regex::Error),
264
265 #[error("{0}")]
267 Serialize(#[from] bincode::error::EncodeError),
268
269 #[error("{0}")]
271 Deserialize(#[from] bincode::error::DecodeError),
272}
273
274impl Error {
275 #[inline]
276 fn parser<S: ToString>(msg: S, span: Span<'_>) -> Self {
277 Self::Parse(Box::new(pest::error::Error::new_from_span(
278 ErrorVariant::CustomError {
279 message: msg.to_string(),
280 },
281 span,
282 )))
283 }
284
285 fn msg<M: AsRef<str>>(msg: M) -> Self {
286 Self::Msg(msg.as_ref().into())
287 }
288
289 fn localized<S: AsRef<str>>(source: S, line: usize, err: Error) -> Self {
290 Self::Localized(source.as_ref().into(), line, err.into())
291 }
292
293 pub fn unwrap_localized(&self) -> &Self {
295 match self {
296 Self::Localized(_, _, e) => e,
297 _ => self,
298 }
299 }
300}
301
302#[derive(Debug, Clone, Serialize, Deserialize)]
303enum Message {
304 String(String),
305 Format {
306 printf_spec: String,
307 fs: FormatString,
308 },
309}
310
311impl Display for Message {
312 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
313 match self {
314 Self::String(s) => write!(f, "{s}"),
315 Self::Format { printf_spec: _, fs } => write!(f, "{}", fs.to_string_lossy()),
316 }
317 }
318}
319
320impl Message {
321 fn to_string_lossy(&self) -> Cow<'_, str> {
322 match self {
323 Message::String(s) => Cow::Borrowed(s),
324 Message::Format { printf_spec: _, fs } => fs.to_string_lossy(),
325 }
326 }
327
328 #[inline(always)]
329 fn format_with(&self, mr: Option<&MatchRes>) -> Result<Cow<'_, str>, Error> {
330 match self {
331 Self::String(s) => Ok(Cow::Borrowed(s.as_str())),
332 Self::Format {
333 printf_spec: c_spec,
334 fs,
335 } => {
336 if let Some(mr) = mr {
337 match mr {
338 MatchRes::Float(_, _) | MatchRes::Bytes(_, _, _, _) => {
339 Ok(Cow::Owned(dformat!(fs, mr)?))
340 }
341 MatchRes::Scalar(_, scalar) => {
342 if c_spec.as_str() == "c" {
344 match scalar {
345 Scalar::byte(b) => {
346 let b = (*b as u8) as char;
347 Ok(Cow::Owned(dformat!(fs, b)?))
348 }
349 Scalar::ubyte(b) => {
350 let b = *b as char;
351 Ok(Cow::Owned(dformat!(fs, b)?))
352 }
353 _ => Ok(Cow::Owned(dformat!(fs, mr)?)),
354 }
355 } else {
356 Ok(Cow::Owned(dformat!(fs, mr)?))
357 }
358 }
359 }
360 } else {
361 Ok(fs.to_string_lossy())
362 }
363 }
364 }
365 }
366}
367
368impl ScalarDataType {
369 #[inline(always)]
370 fn read<R: Read + Seek>(&self, from: &mut R, switch_endianness: bool) -> Result<Scalar, Error> {
371 macro_rules! _read_le {
372 ($ty: ty) => {{
373 if switch_endianness {
374 <$ty>::from_be_bytes(read!(from, $ty))
375 } else {
376 <$ty>::from_le_bytes(read!(from, $ty))
377 }
378 }};
379 }
380
381 macro_rules! _read_be {
382 ($ty: ty) => {{
383 if switch_endianness {
384 <$ty>::from_le_bytes(read!(from, $ty))
385 } else {
386 <$ty>::from_be_bytes(read!(from, $ty))
387 }
388 }};
389 }
390
391 macro_rules! _read_ne {
392 ($ty: ty) => {{
393 if cfg!(target_endian = "big") {
394 _read_be!($ty)
395 } else {
396 _read_le!($ty)
397 }
398 }};
399 }
400
401 macro_rules! _read_me {
402 () => {
403 ((_read_le!(u16) as i32) << 16) | (_read_le!(u16) as i32)
404 };
405 }
406
407 Ok(match self {
408 Self::byte => Scalar::byte(read!(from, u8)[0] as i8),
410 Self::short => Scalar::short(_read_ne!(i16)),
411 Self::long => Scalar::long(_read_ne!(i32)),
412 Self::date => Scalar::date(_read_ne!(i32)),
413 Self::ldate => Scalar::ldate(_read_ne!(i32)),
414 Self::qwdate => Scalar::qwdate(_read_ne!(i64)),
415 Self::leshort => Scalar::leshort(_read_le!(i16)),
416 Self::lelong => Scalar::lelong(_read_le!(i32)),
417 Self::lequad => Scalar::lequad(_read_le!(i64)),
418 Self::bequad => Scalar::bequad(_read_be!(i64)),
419 Self::belong => Scalar::belong(_read_be!(i32)),
420 Self::bedate => Scalar::bedate(_read_be!(i32)),
421 Self::beldate => Scalar::beldate(_read_be!(i32)),
422 Self::beqdate => Scalar::beqdate(_read_be!(i64)),
423 Self::ubyte => Scalar::ubyte(read!(from, u8)[0]),
425 Self::ushort => Scalar::ushort(_read_ne!(u16)),
426 Self::uleshort => Scalar::uleshort(_read_le!(u16)),
427 Self::ulelong => Scalar::ulelong(_read_le!(u32)),
428 Self::uledate => Scalar::uledate(_read_le!(u32)),
429 Self::ulequad => Scalar::ulequad(_read_le!(u64)),
430 Self::offset => Scalar::offset(from.stream_position()?),
431 Self::ubequad => Scalar::ubequad(_read_be!(u64)),
432 Self::medate => Scalar::medate(_read_me!()),
433 Self::meldate => Scalar::meldate(_read_me!()),
434 Self::melong => Scalar::melong(_read_me!()),
435 Self::beshort => Scalar::beshort(_read_be!(i16)),
436 Self::quad => Scalar::quad(_read_ne!(i64)),
437 Self::uquad => Scalar::uquad(_read_ne!(u64)),
438 Self::ledate => Scalar::ledate(_read_le!(i32)),
439 Self::leldate => Scalar::leldate(_read_le!(i32)),
440 Self::leqdate => Scalar::leqdate(_read_le!(i64)),
441 Self::leqldate => Scalar::leqldate(_read_le!(i64)),
442 Self::leqwdate => Scalar::leqwdate(_read_le!(i64)),
443 Self::ubelong => Scalar::ubelong(_read_be!(u32)),
444 Self::ulong => Scalar::ulong(_read_ne!(u32)),
445 Self::ubeshort => Scalar::ubeshort(_read_be!(u16)),
446 Self::ubeqdate => Scalar::ubeqdate(_read_be!(u64)),
447 Self::lemsdosdate => Scalar::lemsdosdate(_read_le!(u16)),
448 Self::lemsdostime => Scalar::lemsdostime(_read_le!(u16)),
449 Self::guid => Scalar::guid(u128::from_be_bytes(read!(from, u128))),
450 })
451 }
452}
453
454impl FloatDataType {
455 #[inline(always)]
456 fn read<R: Read + Seek>(&self, from: &mut R, switch_endianness: bool) -> Result<Float, Error> {
457 macro_rules! _read_le {
458 ($ty: ty) => {{
459 if switch_endianness {
460 <$ty>::from_be_bytes(read!(from, $ty))
461 } else {
462 <$ty>::from_le_bytes(read!(from, $ty))
463 }
464 }};
465 }
466
467 macro_rules! _read_be {
468 ($ty: ty) => {{
469 if switch_endianness {
470 <$ty>::from_le_bytes(read!(from, $ty))
471 } else {
472 <$ty>::from_be_bytes(read!(from, $ty))
473 }
474 }};
475 }
476
477 macro_rules! _read_ne {
478 ($ty: ty) => {{
479 if cfg!(target_endian = "big") {
480 _read_be!($ty)
481 } else {
482 _read_le!($ty)
483 }
484 }};
485 }
486
487 macro_rules! _read_me {
488 () => {
489 ((_read_le!(u16) as i32) << 16) | (_read_le!(u16) as i32)
490 };
491 }
492
493 Ok(match self {
494 Self::lefloat => Float::lefloat(_read_le!(f32)),
495 Self::befloat => Float::befloat(_read_le!(f32)),
496 Self::ledouble => Float::ledouble(_read_le!(f64)),
497 Self::bedouble => Float::bedouble(_read_be!(f64)),
498 })
499 }
500}
501
502#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
503enum Op {
504 Mul,
505 Add,
506 Sub,
507 Div,
508 Mod,
509 And,
510 Xor,
511 Or,
512}
513
514impl Display for Op {
515 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
516 match self {
517 Op::Mul => write!(f, "*"),
518 Op::Add => write!(f, "+"),
519 Op::Sub => write!(f, "-"),
520 Op::Div => write!(f, "/"),
521 Op::Mod => write!(f, "%"),
522 Op::And => write!(f, "&"),
523 Op::Or => write!(f, "|"),
524 Op::Xor => write!(f, "^"),
525 }
526 }
527}
528
529#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
530enum CmpOp {
531 Eq,
532 Lt,
533 Gt,
534 BitAnd,
535 Neq, Xor,
537 Not, }
539
540impl CmpOp {
541 #[inline(always)]
542 fn is_neq(&self) -> bool {
543 matches!(self, Self::Neq)
544 }
545}
546
547#[derive(Debug, Clone, Serialize, Deserialize)]
548struct ScalarTransform {
549 op: Op,
550 num: Scalar,
551}
552
553impl ScalarTransform {
554 fn apply(&self, s: Scalar) -> Option<Scalar> {
555 match self.op {
556 Op::Add => s.checked_add(self.num),
557 Op::Sub => s.checked_sub(self.num),
558 Op::Mul => s.checked_mul(self.num),
559 Op::Div => s.checked_div(self.num),
560 Op::Mod => s.checked_rem(self.num),
561 Op::And => Some(s.bitand(self.num)),
562 Op::Xor => Some(s.bitxor(self.num)),
563 Op::Or => Some(s.bitor(self.num)),
564 }
565 }
566}
567
568#[derive(Debug, Clone, Serialize, Deserialize)]
569struct FloatTransform {
570 op: Op,
571 num: Float,
572}
573
574impl FloatTransform {
575 fn apply(&self, s: Float) -> Float {
576 match self.op {
577 Op::Add => s.add(self.num),
578 Op::Sub => s.sub(self.num),
579 Op::Mul => s.mul(self.num),
580 Op::Div => s.div(self.num),
582 Op::Mod => s.rem(self.num),
584 Op::And | Op::Xor | Op::Or => {
586 debug_panic!("unsupported operation");
587 s
588 }
589 }
590 }
591}
592
593#[derive(Debug, Clone, Serialize, Deserialize)]
594enum TestValue<T> {
595 Value(T),
596 Any,
597}
598
599impl<T> TestValue<T> {
600 #[inline(always)]
601 fn as_ref(&self) -> TestValue<&T> {
602 match self {
603 Self::Value(v) => TestValue::Value(v),
604 Self::Any => TestValue::Any,
605 }
606 }
607}
608
609flags! {
610 enum ReMod: u8{
611 CaseInsensitive,
612 StartOffsetUpdate,
613 LineLimit,
614 ForceBin,
615 ForceText,
616 TrimMatch,
617 }
618}
619
620fn serialize_regex<S>(re: &bytes::Regex, serializer: S) -> Result<S::Ok, S::Error>
621where
622 S: serde::Serializer,
623{
624 re.as_str().serialize(serializer)
625}
626
627fn deserialize_regex<'de, D>(deserializer: D) -> Result<bytes::Regex, D::Error>
628where
629 D: serde::Deserializer<'de>,
630{
631 let wrapper = String::deserialize(deserializer)?;
632 bytes::Regex::new(&wrapper).map_err(serde::de::Error::custom)
633}
634
635#[derive(Debug, Clone, Serialize, Deserialize)]
636struct RegexTest {
637 #[serde(
638 serialize_with = "serialize_regex",
639 deserialize_with = "deserialize_regex"
640 )]
641 re: bytes::Regex,
642 length: Option<usize>,
643 mods: FlagSet<ReMod>,
644 str_mods: FlagSet<StringMod>,
645 non_magic_len: usize,
646 binary: bool,
647 cmp_op: CmpOp,
648}
649
650impl RegexTest {
651 #[inline(always)]
652 fn is_binary(&self) -> bool {
653 self.binary
654 || self.mods.contains(ReMod::ForceBin)
655 || self.str_mods.contains(StringMod::ForceBin)
656 }
657
658 fn match_buf<'buf>(
659 &self,
660 off_buf: u64, stream_kind: StreamKind,
662 buf: &'buf [u8],
663 ) -> Option<MatchRes<'buf>> {
664 let mr = match stream_kind {
665 StreamKind::Text(_) => {
666 let mut off_txt = off_buf;
667
668 let mut line_limit = self.length.unwrap_or(usize::MAX);
669
670 for line in buf.split(|c| c == &b'\n') {
671 if line_limit == 0 {
675 break;
676 }
677
678 if let Some(re_match) = self.re.find(line) {
679 let start_offset = off_txt + re_match.start() as u64;
681
682 let stop_offset = if re_match.end() == line.len() {
684 Some(start_offset + re_match.as_bytes().len() as u64 + 1)
685 } else {
686 None
687 };
688
689 return Some(MatchRes::Bytes(
690 start_offset,
691 stop_offset,
692 re_match.as_bytes(),
693 Encoding::Utf8,
694 ));
695 }
696
697 off_txt += line.len() as u64;
698 off_txt += 1;
700 line_limit = line_limit.saturating_sub(1)
701 }
702 None
703 }
704
705 StreamKind::Binary => {
706 self.re.find(buf).map(|re_match| {
707 MatchRes::Bytes(
708 off_buf + re_match.start() as u64,
710 None,
711 re_match.as_bytes(),
712 Encoding::Utf8,
713 )
714 })
715 }
716 };
717
718 if self.cmp_op.is_neq() && mr.is_none() {
720 return Some(MatchRes::Bytes(off_buf, None, buf, Encoding::Utf8));
721 }
722
723 mr
724 }
725}
726
727impl From<RegexTest> for Test {
728 fn from(value: RegexTest) -> Self {
729 Self::Regex(value)
730 }
731}
732
733flags! {
734 enum StringMod: u8{
735 ForceBin,
736 UpperInsensitive,
737 LowerInsensitive,
738 FullWordMatch,
739 Trim,
740 ForceText,
741 CompactWhitespace,
742 OptBlank,
743 }
744}
745
746#[derive(Debug, Clone, Serialize, Deserialize)]
747struct StringTest {
748 test_val: TestValue<Vec<u8>>,
749 cmp_op: CmpOp,
750 length: Option<usize>,
751 mods: FlagSet<StringMod>,
752 binary: bool,
753}
754
755impl From<StringTest> for Test {
756 fn from(value: StringTest) -> Self {
757 Self::String(value)
758 }
759}
760
761#[inline(always)]
762fn string_match(str: &[u8], mods: FlagSet<StringMod>, buf: &[u8]) -> (bool, usize) {
763 let mut consumed = 0;
764 if mods.is_disjoint(
766 StringMod::UpperInsensitive
767 | StringMod::LowerInsensitive
768 | StringMod::FullWordMatch
769 | StringMod::CompactWhitespace
770 | StringMod::OptBlank,
771 ) {
772 if buf.starts_with(str) {
774 (true, str.len())
775 } else {
776 (false, consumed)
777 }
778 } else {
779 let mut i_src = 0;
780 let mut iter = buf.iter().peekable();
781
782 macro_rules! consume_target {
783 () => {{
784 if iter.next().is_some() {
785 consumed += 1;
786 }
787 }};
788 }
789
790 macro_rules! continue_next_iteration {
791 () => {{
792 consume_target!();
793 i_src += 1;
794 continue;
795 }};
796 }
797
798 while let Some(&&b) = iter.peek() {
799 let Some(&ref_byte) = str.get(i_src) else {
800 break;
801 };
802
803 if mods.contains(StringMod::OptBlank) && (b == b' ' || ref_byte == b' ') {
804 if b == b' ' {
805 consume_target!();
807 }
808
809 if ref_byte == b' ' {
810 i_src += 1;
812 }
813
814 continue;
815 }
816
817 if mods.contains(StringMod::UpperInsensitive) {
818 if ref_byte.is_ascii_uppercase() && ref_byte == b.to_ascii_uppercase()
820 || ref_byte == b
821 {
822 continue_next_iteration!()
823 }
824 }
825
826 if mods.contains(StringMod::LowerInsensitive)
827 && (ref_byte.is_ascii_lowercase() && ref_byte == b.to_ascii_lowercase()
828 || ref_byte == b)
829 {
830 continue_next_iteration!()
831 }
832
833 if mods.contains(StringMod::CompactWhitespace) && ref_byte == b' ' {
834 let mut src_blk = 0;
835 while let Some(b' ') = str.get(i_src) {
836 src_blk += 1;
837 i_src += 1;
838 }
839
840 let mut tgt_blk = 0;
841 while let Some(b' ') = iter.peek() {
842 tgt_blk += 1;
843 consume_target!();
844 }
845
846 if src_blk > tgt_blk {
847 return (false, consumed);
848 }
849
850 continue;
851 }
852
853 if ref_byte == b {
854 continue_next_iteration!()
855 } else {
856 return (false, consumed);
857 }
858 }
859
860 if mods.contains(StringMod::FullWordMatch)
861 && let Some(b) = iter.peek()
862 && !b.is_ascii_whitespace()
863 {
864 return (false, consumed);
865 }
866
867 (
868 consumed > 0 && str.get(i_src).is_none() && consumed <= buf.len(),
869 consumed,
870 )
871 }
872}
873
874impl StringTest {
875 fn has_length_mod(&self) -> bool {
876 !self.mods.is_disjoint(
877 StringMod::UpperInsensitive
878 | StringMod::LowerInsensitive
879 | StringMod::FullWordMatch
880 | StringMod::CompactWhitespace
881 | StringMod::OptBlank,
882 )
883 }
884
885 #[inline(always)]
886 fn test_value_len(&self) -> usize {
887 match self.test_val.as_ref() {
888 TestValue::Value(s) => s.len(),
889 TestValue::Any => 0,
890 }
891 }
892
893 #[inline(always)]
894 fn is_binary(&self) -> bool {
895 self.binary || self.mods.contains(StringMod::ForceBin)
896 }
897
898 #[inline(always)]
899 fn is_text(&self) -> bool {
900 self.mods.contains(StringMod::ForceText)
901 }
902}
903
904#[derive(Debug, Clone, Serialize, Deserialize)]
905struct SearchTest {
906 str: Vec<u8>,
907 n_pos: Option<usize>,
908 str_mods: FlagSet<StringMod>,
909 re_mods: FlagSet<ReMod>,
910 binary: bool,
911 cmp_op: CmpOp,
912}
913
914impl From<SearchTest> for Test {
915 fn from(value: SearchTest) -> Self {
916 Self::Search(value)
917 }
918}
919
920impl SearchTest {
921 #[inline(always)]
922 fn is_binary(&self) -> bool {
923 (self.binary
924 || self.str_mods.contains(StringMod::ForceBin)
925 || self.re_mods.contains(ReMod::ForceBin))
926 && !(self.str_mods.contains(StringMod::ForceText)
927 || self.re_mods.contains(ReMod::ForceText))
928 }
929
930 #[inline]
932 fn match_buf<'buf>(&self, off_buf: u64, buf: &'buf [u8]) -> Option<MatchRes<'buf>> {
933 let mut i = 0;
934
935 let needle = self.str.first()?;
936
937 while i < buf.len() {
938 i += memchr(*needle, &buf[i..])?;
941
942 if self.str_mods.contains(StringMod::FullWordMatch) {
944 let prev_is_whitespace = buf
945 .get(i.saturating_sub(1))
946 .map(|c| c.is_ascii_whitespace())
947 .unwrap_or_default();
948
949 if i > 0 && !prev_is_whitespace {
954 i += 1;
955 continue;
956 }
957 }
958
959 if let Some(npos) = self.n_pos
960 && i > npos
961 {
962 break;
963 }
964
965 let pos = i;
966 let (ok, consumed) = string_match(&self.str, self.str_mods, &buf[i..]);
967
968 if ok {
969 return Some(MatchRes::Bytes(
970 off_buf.saturating_add(pos as u64),
971 None,
972 &buf[i..i + consumed],
973 Encoding::Utf8,
974 ));
975 } else {
976 i += max(consumed, 1)
977 }
978 }
979
980 if self.cmp_op.is_neq() {
982 return Some(MatchRes::Bytes(off_buf, None, buf, Encoding::Utf8));
983 }
984
985 None
986 }
987}
988
989#[derive(Debug, Clone, Serialize, Deserialize)]
990struct ScalarTest {
991 ty: ScalarDataType,
992 transform: Option<ScalarTransform>,
993 cmp_op: CmpOp,
994 test_val: TestValue<Scalar>,
995}
996
997#[derive(Debug, Clone, Serialize, Deserialize)]
998struct FloatTest {
999 ty: FloatDataType,
1000 transform: Option<FloatTransform>,
1001 cmp_op: CmpOp,
1002 test_val: TestValue<Float>,
1003}
1004
1005#[derive(Debug, PartialEq)]
1008enum ReadValue<'buf> {
1009 Float(u64, Float),
1010 Scalar(u64, Scalar),
1011 Bytes(u64, &'buf [u8]),
1012}
1013
1014impl DynDisplay for ReadValue<'_> {
1015 fn dyn_fmt(&self, f: &dyf::FormatSpec) -> Result<String, dyf::Error> {
1016 match self {
1017 Self::Float(_, s) => DynDisplay::dyn_fmt(s, f),
1018 Self::Scalar(_, s) => DynDisplay::dyn_fmt(s, f),
1019 Self::Bytes(_, b) => Ok(format!("{b:?}")),
1020 }
1021 }
1022}
1023
1024impl DynDisplay for &ReadValue<'_> {
1025 fn dyn_fmt(&self, f: &dyf::FormatSpec) -> Result<String, dyf::Error> {
1026 DynDisplay::dyn_fmt(*self, f)
1028 }
1029}
1030
1031impl Display for ReadValue<'_> {
1032 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1033 match self {
1034 Self::Float(_, v) => write!(f, "{v}"),
1035 Self::Scalar(_, s) => write!(f, "{s}"),
1036 Self::Bytes(_, b) => write!(f, "{b:?}"),
1037 }
1038 }
1039}
1040
1041enum Encoding {
1042 Utf16(String16Encoding),
1043 Utf8,
1044}
1045
1046enum MatchRes<'buf> {
1049 Bytes(u64, Option<u64>, &'buf [u8], Encoding),
1054 Scalar(u64, Scalar),
1055 Float(u64, Float),
1056}
1057
1058impl DynDisplay for &MatchRes<'_> {
1059 fn dyn_fmt(&self, f: &dyf::FormatSpec) -> Result<String, dyf::Error> {
1060 (*self).dyn_fmt(f)
1061 }
1062}
1063
1064impl DynDisplay for MatchRes<'_> {
1065 fn dyn_fmt(&self, f: &dyf::FormatSpec) -> Result<String, dyf::Error> {
1066 match self {
1067 Self::Scalar(_, v) => v.dyn_fmt(f),
1068 Self::Float(_, v) => v.dyn_fmt(f),
1069 Self::Bytes(_, _, v, enc) => match enc {
1070 Encoding::Utf8 => String::from_utf8_lossy(v).to_string().dyn_fmt(f),
1071 Encoding::Utf16(enc) => {
1072 let utf16: Vec<u16> = slice_to_utf16_iter(v, *enc).collect();
1073 String::from_utf16_lossy(&utf16).dyn_fmt(f)
1074 }
1075 },
1076 }
1077 }
1078}
1079
1080impl MatchRes<'_> {
1081 #[inline]
1083 fn start_offset(&self) -> u64 {
1084 match self {
1085 MatchRes::Bytes(o, _, _, _) => *o,
1086 MatchRes::Scalar(o, _) => *o,
1087 MatchRes::Float(o, _) => *o,
1088 }
1089 }
1090
1091 #[inline]
1093 fn end_offset(&self) -> u64 {
1094 match self {
1095 MatchRes::Bytes(start, end, buf, _) => match end {
1096 Some(end) => *end,
1097 None => start.saturating_add(buf.len() as u64),
1098 },
1099 MatchRes::Scalar(o, sc) => o.add(sc.size_of() as u64),
1100 MatchRes::Float(o, f) => o.add(f.size_of() as u64),
1101 }
1102 }
1103}
1104
1105fn slice_to_utf16_iter(read: &[u8], encoding: String16Encoding) -> impl Iterator<Item = u16> {
1106 let even = read
1107 .iter()
1108 .enumerate()
1109 .filter(|(i, _)| i % 2 == 0)
1110 .map(|t| t.1);
1111
1112 let odd = read
1113 .iter()
1114 .enumerate()
1115 .filter(|(i, _)| i % 2 != 0)
1116 .map(|t| t.1);
1117
1118 even.zip(odd).map(move |(e, o)| match encoding {
1119 String16Encoding::Le => u16::from_le_bytes([*e, *o]),
1120 String16Encoding::Be => u16::from_be_bytes([*e, *o]),
1121 })
1122}
1123
1124#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
1125enum String16Encoding {
1126 Le,
1127 Be,
1128}
1129
1130#[derive(Debug, Clone, Serialize, Deserialize)]
1131struct String16Test {
1132 orig: String,
1133 test_val: TestValue<Vec<u16>>,
1134 encoding: String16Encoding,
1135}
1136
1137impl String16Test {
1138 #[inline(always)]
1142 fn test_value_len(&self) -> usize {
1143 match self.test_val.as_ref() {
1144 TestValue::Value(str16) => str16.len(),
1145 TestValue::Any => 0,
1146 }
1147 }
1148}
1149
1150flags! {
1151 enum IndirectMod: u8{
1152 Relative,
1153 }
1154}
1155
1156type IndirectMods = FlagSet<IndirectMod>;
1157
1158#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
1159enum PStringLen {
1160 Byte, ShortBe, ShortLe, LongBe, LongLe, }
1166
1167impl PStringLen {
1168 #[inline(always)]
1169 const fn size_of_len(&self) -> usize {
1170 match self {
1171 PStringLen::Byte => 1,
1172 PStringLen::ShortBe => 2,
1173 PStringLen::ShortLe => 2,
1174 PStringLen::LongBe => 4,
1175 PStringLen::LongLe => 4,
1176 }
1177 }
1178}
1179
1180#[derive(Debug, Clone, Serialize, Deserialize)]
1181struct PStringTest {
1182 len: PStringLen,
1183 test_val: TestValue<Vec<u8>>,
1184 include_len: bool,
1185}
1186
1187impl PStringTest {
1188 #[inline]
1189 fn read<'cache, R: Read + Seek>(
1190 &self,
1191 haystack: &'cache mut LazyCache<R>,
1192 ) -> Result<Option<&'cache [u8]>, Error> {
1193 let mut len = match self.len {
1194 PStringLen::Byte => read_le!(haystack, u8) as u32,
1195 PStringLen::ShortBe => read_be!(haystack, u16) as u32,
1196 PStringLen::ShortLe => read_le!(haystack, u16) as u32,
1197 PStringLen::LongBe => read_be!(haystack, u32),
1198 PStringLen::LongLe => read_le!(haystack, u32),
1199 } as usize;
1200
1201 if self.include_len {
1202 len = len.saturating_sub(self.len.size_of_len())
1203 }
1204
1205 if let TestValue::Value(s) = self.test_val.as_ref()
1206 && len != s.len()
1207 {
1208 return Ok(None);
1209 }
1210
1211 let read = haystack.read_exact_count(len as u64)?;
1212
1213 Ok(Some(read))
1214 }
1215
1216 #[inline(always)]
1217 fn test_value_len(&self) -> usize {
1218 match self.test_val.as_ref() {
1219 TestValue::Value(s) => s.len(),
1220 TestValue::Any => 0,
1221 }
1222 }
1223}
1224
1225#[derive(Debug, Clone, Serialize, Deserialize)]
1226enum Test {
1227 Name(String),
1228 Use(bool, String),
1229 Scalar(ScalarTest),
1230 Float(FloatTest),
1231 String(StringTest),
1232 Search(SearchTest),
1233 PString(PStringTest),
1234 Regex(RegexTest),
1235 Indirect(FlagSet<IndirectMod>),
1236 String16(String16Test),
1237 #[allow(dead_code)]
1239 Der,
1240 Clear,
1241 Default,
1242}
1243
1244impl Test {
1245 #[inline]
1247 fn read_test_value<'haystack, R: Read + Seek>(
1248 &self,
1249 haystack: &'haystack mut LazyCache<R>,
1250 switch_endianness: bool,
1251 ) -> Result<Option<ReadValue<'haystack>>, Error> {
1252 let test_value_offset = haystack.lazy_stream_position();
1253
1254 match self {
1255 Self::Scalar(t) => {
1256 t.ty.read(haystack, switch_endianness)
1257 .map(|s| Some(ReadValue::Scalar(test_value_offset, s)))
1258 }
1259
1260 Self::Float(t) => {
1261 t.ty.read(haystack, switch_endianness)
1262 .map(|f| Some(ReadValue::Float(test_value_offset, f)))
1263 }
1264 Self::String(t) => {
1265 match t.test_val.as_ref() {
1266 TestValue::Value(str) => {
1267 let buf = if let Some(length) = t.length {
1268 haystack.read_exact_count(length as u64)?
1270 } else {
1271 match t.cmp_op {
1274 CmpOp::Eq | CmpOp::Neq => {
1275 if !t.has_length_mod() {
1276 haystack.read_exact_count(str.len() as u64)?
1277 } else {
1278 haystack.read_count(FILE_BYTES_MAX as u64)?
1279 }
1280 }
1281 CmpOp::Lt | CmpOp::Gt => {
1282 let read =
1283 haystack.read_until_any_delim_or_limit(b"\n\0", 8092)?;
1284
1285 if read.ends_with(b"\0") || read.ends_with(b"\n") {
1286 &read[..read.len() - 1]
1287 } else {
1288 read
1289 }
1290 }
1291 _ => {
1292 return Err(Error::Msg(format!(
1293 "string test does not support {:?} operator",
1294 t.cmp_op
1295 )));
1296 }
1297 }
1298 };
1299
1300 Ok(Some(ReadValue::Bytes(test_value_offset, buf)))
1301 }
1302 TestValue::Any => {
1303 let read = haystack.read_until_any_delim_or_limit(b"\0\n", 8192)?;
1304 let bytes = if read.ends_with(b"\0") || read.ends_with(b"\n") {
1306 &read[..read.len() - 1]
1307 } else {
1308 read
1309 };
1310
1311 Ok(Some(ReadValue::Bytes(test_value_offset, bytes)))
1312 }
1313 }
1314 }
1315
1316 Self::String16(t) => {
1317 match t.test_val.as_ref() {
1318 TestValue::Value(str16) => {
1319 let read = haystack.read_exact_count((str16.len() * 2) as u64)?;
1320
1321 Ok(Some(ReadValue::Bytes(test_value_offset, read)))
1322 }
1323 TestValue::Any => {
1324 let read = haystack.read_until_utf16_or_limit(b"\x00\x00", 8192)?;
1325
1326 let end = if read.len() % 2 == 0 {
1328 read.len()
1329 } else {
1330 read.len().saturating_sub(1)
1333 };
1334
1335 Ok(Some(ReadValue::Bytes(test_value_offset, &read[..end])))
1336 }
1337 }
1338 }
1339
1340 Self::PString(t) => {
1341 let Some(read) = t.read(haystack)? else {
1342 return Ok(None);
1343 };
1344 Ok(Some(ReadValue::Bytes(test_value_offset, read)))
1345 }
1346
1347 Self::Search(_) => {
1348 let buf = haystack.read_count(FILE_BYTES_MAX as u64)?;
1349 Ok(Some(ReadValue::Bytes(test_value_offset, buf)))
1350 }
1351
1352 Self::Regex(r) => {
1353 let length = {
1354 match r.length {
1355 Some(len) => {
1356 if r.mods.contains(ReMod::LineLimit) {
1357 len * 80
1358 } else {
1359 len
1360 }
1361 }
1362
1363 None => FILE_REGEX_MAX,
1364 }
1365 };
1366
1367 let read = haystack.read_count(length as u64)?;
1368 Ok(Some(ReadValue::Bytes(test_value_offset, read)))
1369 }
1370
1371 Self::Name(_)
1372 | Self::Use(_, _)
1373 | Self::Indirect(_)
1374 | Self::Clear
1375 | Self::Default
1376 | Self::Der => Err(Error::msg("no value to read for this test")),
1377 }
1378 }
1379
1380 #[inline(always)]
1381 fn match_value<'s>(
1382 &'s self,
1383 tv: &ReadValue<'s>,
1384 stream_kind: StreamKind,
1385 ) -> Option<MatchRes<'s>> {
1386 match (self, tv) {
1387 (Self::Scalar(t), ReadValue::Scalar(o, ts)) => {
1388 let read_value: Scalar = match t.transform.as_ref() {
1389 Some(t) => t.apply(*ts)?,
1390 None => *ts,
1391 };
1392
1393 match t.test_val {
1394 TestValue::Value(test_value) => {
1395 let ok = match t.cmp_op {
1396 CmpOp::Not => read_value == !test_value,
1399 CmpOp::Eq => read_value == test_value,
1400 CmpOp::Lt => read_value < test_value,
1401 CmpOp::Gt => read_value > test_value,
1402 CmpOp::Neq => read_value != test_value,
1403 CmpOp::BitAnd => read_value & test_value == test_value,
1404 CmpOp::Xor => (read_value & test_value).is_zero(),
1405 };
1406
1407 if ok {
1408 Some(MatchRes::Scalar(*o, read_value))
1409 } else {
1410 None
1411 }
1412 }
1413
1414 TestValue::Any => Some(MatchRes::Scalar(*o, read_value)),
1415 }
1416 }
1417
1418 (Self::Float(t), ReadValue::Float(o, f)) => {
1419 let read_value: Float = t.transform.as_ref().map(|t| t.apply(*f)).unwrap_or(*f);
1420
1421 match t.test_val {
1422 TestValue::Value(tf) => {
1423 let ok = match t.cmp_op {
1424 CmpOp::Eq => read_value == tf,
1425 CmpOp::Lt => read_value < tf,
1426 CmpOp::Gt => read_value > tf,
1427 CmpOp::Neq => read_value != tf,
1428 _ => {
1429 debug_panic!("unsupported float comparison");
1432 debug!("unsupported float comparison");
1433 false
1434 }
1435 };
1436
1437 if ok {
1438 Some(MatchRes::Float(*o, read_value))
1439 } else {
1440 None
1441 }
1442 }
1443 TestValue::Any => Some(MatchRes::Float(*o, read_value)),
1444 }
1445 }
1446
1447 (Self::String(st), ReadValue::Bytes(o, buf)) => {
1448 macro_rules! trim_buf {
1449 ($buf: expr) => {{
1450 if st.mods.contains(StringMod::Trim) {
1451 $buf.trim_ascii()
1452 } else {
1453 $buf
1454 }
1455 }};
1456 }
1457
1458 match st.test_val.as_ref() {
1459 TestValue::Value(str) => {
1460 match st.cmp_op {
1461 CmpOp::Eq => {
1462 if let (true, _) = string_match(str, st.mods, buf) {
1463 Some(MatchRes::Bytes(*o, None, trim_buf!(str), Encoding::Utf8))
1464 } else {
1465 None
1466 }
1467 }
1468 CmpOp::Neq => {
1469 if let (false, _) = string_match(str, st.mods, buf) {
1470 Some(MatchRes::Bytes(*o, None, trim_buf!(str), Encoding::Utf8))
1471 } else {
1472 None
1473 }
1474 }
1475 CmpOp::Gt => {
1476 if buf.len() > str.len() {
1477 Some(MatchRes::Bytes(*o, None, trim_buf!(buf), Encoding::Utf8))
1478 } else {
1479 None
1480 }
1481 }
1482 CmpOp::Lt => {
1483 if buf.len() < str.len() {
1484 Some(MatchRes::Bytes(*o, None, trim_buf!(buf), Encoding::Utf8))
1485 } else {
1486 None
1487 }
1488 }
1489
1490 _ => {
1492 debug_panic!("unsupported string comparison");
1495 debug!("unsupported string comparison");
1496 None
1497 }
1498 }
1499 }
1500 TestValue::Any => {
1501 Some(MatchRes::Bytes(*o, None, trim_buf!(buf), Encoding::Utf8))
1502 }
1503 }
1504 }
1505
1506 (Self::PString(m), ReadValue::Bytes(o, buf)) => match m.test_val.as_ref() {
1507 TestValue::Value(psv) => {
1508 if buf == psv {
1509 Some(MatchRes::Bytes(*o, None, buf, Encoding::Utf8))
1510 } else {
1511 None
1512 }
1513 }
1514 TestValue::Any => Some(MatchRes::Bytes(*o, None, buf, Encoding::Utf8)),
1515 },
1516
1517 (Self::String16(t), ReadValue::Bytes(o, buf)) => {
1518 match t.test_val.as_ref() {
1519 TestValue::Value(str16) => {
1520 if str16.len() * 2 != buf.len() {
1522 return None;
1523 }
1524
1525 for (i, utf16_char) in slice_to_utf16_iter(buf, t.encoding).enumerate() {
1527 if str16[i] != utf16_char {
1528 return None;
1529 }
1530 }
1531
1532 Some(MatchRes::Bytes(
1533 *o,
1534 None,
1535 t.orig.as_bytes(),
1536 Encoding::Utf16(t.encoding),
1537 ))
1538 }
1539
1540 TestValue::Any => {
1541 Some(MatchRes::Bytes(*o, None, buf, Encoding::Utf16(t.encoding)))
1542 }
1543 }
1544 }
1545
1546 (Self::Regex(r), ReadValue::Bytes(o, buf)) => r.match_buf(*o, stream_kind, buf),
1547
1548 (Self::Search(t), ReadValue::Bytes(o, buf)) => t.match_buf(*o, buf),
1549
1550 _ => None,
1551 }
1552 }
1553
1554 #[inline(always)]
1555 fn strength(&self) -> u64 {
1556 const MULT: usize = 10;
1557
1558 let mut out = 2 * MULT;
1559
1560 match self {
1562 Test::Scalar(s) => {
1563 out += s.ty.type_size() * MULT;
1564 }
1565
1566 Test::Float(t) => {
1567 out += t.ty.type_size() * MULT;
1568 }
1569
1570 Test::String(t) => out += t.test_value_len().saturating_mul(MULT),
1571
1572 Test::PString(t) => out += t.test_value_len().saturating_mul(MULT),
1573
1574 Test::Search(s) => {
1575 let n_pos = s.n_pos.unwrap_or(FILE_BYTES_MAX);
1580
1581 match n_pos {
1582 0..=80 => out += s.str.len().saturating_mul(MULT),
1584 81..=240 => out += s.str.len() * s.str.len().clamp(0, MULT - 2),
1586 _ => out += s.str.len(),
1588 }
1589 }
1590
1591 Test::Regex(r) => {
1592 let v = r.non_magic_len / r.re.captures_len();
1601
1602 let len = r
1603 .length
1604 .map(|l| {
1605 if r.mods.contains(ReMod::LineLimit) {
1606 l * 80
1607 } else {
1608 l
1609 }
1610 })
1611 .unwrap_or(FILE_BYTES_MAX);
1612
1613 match len {
1614 0..=80 => out += v.saturating_mul(MULT),
1616 81..=240 => out += v * v.clamp(0, MULT - 2),
1618 _ => out += v,
1620 }
1621 }
1622
1623 Test::String16(t) => {
1624 out += t.test_value_len().saturating_mul(MULT);
1629 }
1630
1631 Test::Der => out += MULT,
1632
1633 Test::Default | Test::Name(_) | Test::Use(_, _) | Test::Indirect(_) | Test::Clear => {
1634 return 0;
1635 }
1636 }
1637
1638 if self.is_match_any() {
1640 return 0;
1641 }
1642
1643 if let Some(op) = self.cmp_op() {
1644 match op {
1645 CmpOp::Neq => out = 0,
1647 CmpOp::Eq | CmpOp::Not => out += MULT,
1648 CmpOp::Lt | CmpOp::Gt => out -= 2 * MULT,
1649 CmpOp::Xor | CmpOp::BitAnd => out -= MULT,
1650 }
1651 }
1652
1653 out as u64
1654 }
1655
1656 #[inline(always)]
1657 fn cmp_op(&self) -> Option<CmpOp> {
1658 match self {
1659 Self::String(t) => Some(t.cmp_op),
1660 Self::Scalar(s) => Some(s.cmp_op),
1661 Self::Float(t) => Some(t.cmp_op),
1662 Self::Name(_)
1663 | Self::Use(_, _)
1664 | Self::Search(_)
1665 | Self::PString(_)
1666 | Self::Regex(_)
1667 | Self::Clear
1668 | Self::Default
1669 | Self::Indirect(_)
1670 | Self::String16(_)
1671 | Self::Der => None,
1672 }
1673 }
1674
1675 #[inline(always)]
1676 fn is_match_any(&self) -> bool {
1677 match self {
1678 Test::Name(_) => false,
1679 Test::Use(_, _) => false,
1680 Test::Scalar(scalar_test) => matches!(scalar_test.test_val, TestValue::Any),
1681 Test::Float(float_test) => matches!(float_test.test_val, TestValue::Any),
1682 Test::String(string_test) => matches!(string_test.test_val, TestValue::Any),
1683 Test::Search(_) => false,
1684 Test::PString(pstring_test) => matches!(pstring_test.test_val, TestValue::Any),
1685 Test::Regex(_) => false,
1686 Test::Indirect(_) => false,
1687 Test::String16(string16_test) => matches!(string16_test.test_val, TestValue::Any),
1688 Test::Der => false,
1689 Test::Clear => false,
1690 Test::Default => false,
1691 }
1692 }
1693
1694 #[inline(always)]
1695 fn is_binary(&self) -> bool {
1696 match self {
1697 Self::Name(_) => true,
1698 Self::Use(_, _) => true,
1699 Self::Scalar(_) => true,
1700 Self::Float(_) => true,
1701 Self::String(t) => !t.is_binary() & !t.is_text() || t.is_binary(),
1702 Self::Search(t) => t.is_binary(),
1703 Self::PString(_) => true,
1704 Self::Regex(t) => t.is_binary(),
1705 Self::Clear => true,
1706 Self::Default => true,
1707 Self::Indirect(_) => true,
1708 Self::String16(_) => true,
1709 Self::Der => true,
1710 }
1711 }
1712
1713 #[inline(always)]
1714 fn is_text(&self) -> bool {
1715 match self {
1716 Self::Name(_) => true,
1717 Self::Use(_, _) => true,
1718 Self::Indirect(_) => true,
1719 Self::Clear => true,
1720 Self::Default => true,
1721 Self::String(t) => !t.is_binary() & !t.is_text() || t.is_text(),
1722 _ => !self.is_binary(),
1723 }
1724 }
1725
1726 #[inline(always)]
1727 fn is_only_text(&self) -> bool {
1728 self.is_text() && !self.is_binary()
1729 }
1730
1731 #[inline(always)]
1732 fn is_only_binary(&self) -> bool {
1733 self.is_binary() && !self.is_text()
1734 }
1735}
1736
1737#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
1738enum OffsetType {
1739 Byte,
1740 DoubleLe,
1741 DoubleBe,
1742 ShortLe,
1743 ShortBe,
1744 Id3Le,
1745 Id3Be,
1746 LongLe,
1747 LongBe,
1748 Middle,
1749 Octal,
1750 QuadBe,
1751 QuadLe,
1752}
1753
1754#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
1755enum Shift {
1756 Direct(u64),
1757 Indirect(i64),
1758}
1759
1760#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
1761struct IndOffset {
1762 off_addr: DirOffset,
1764 signed: bool,
1766 ty: OffsetType,
1768 op: Option<Op>,
1769 shift: Option<Shift>,
1770}
1771
1772impl IndOffset {
1773 fn read_offset<R: Read + Seek>(
1775 &self,
1776 haystack: &mut LazyCache<R>,
1777 rule_base_offset: Option<u64>,
1778 last_upper_match_offset: Option<u64>,
1779 ) -> Result<Option<u64>, io::Error> {
1780 let offset_address = match self.off_addr {
1781 DirOffset::Start(s) => {
1782 let Some(o) = s.checked_add(rule_base_offset.unwrap_or_default()) else {
1783 return Ok(None);
1784 };
1785
1786 haystack.seek(SeekFrom::Start(o))?
1787 }
1788 DirOffset::LastUpper(c) => haystack.seek(SeekFrom::Start(
1789 (last_upper_match_offset.unwrap_or_default() as i64 + c) as u64,
1790 ))?,
1791 DirOffset::End(e) => haystack.seek(SeekFrom::End(e))?,
1792 };
1793
1794 macro_rules! read_value {
1795 () => {
1796 match self.ty {
1797 OffsetType::Byte => {
1798 if self.signed {
1799 read_le!(haystack, u8) as u64
1800 } else {
1801 read_le!(haystack, i8) as u64
1802 }
1803 }
1804 OffsetType::DoubleLe => read_le!(haystack, f64) as u64,
1805 OffsetType::DoubleBe => read_be!(haystack, f64) as u64,
1806 OffsetType::ShortLe => {
1807 if self.signed {
1808 read_le!(haystack, i16) as u64
1809 } else {
1810 read_le!(haystack, u16) as u64
1811 }
1812 }
1813 OffsetType::ShortBe => {
1814 if self.signed {
1815 read_be!(haystack, i16) as u64
1816 } else {
1817 read_be!(haystack, u16) as u64
1818 }
1819 }
1820 OffsetType::Id3Le => decode_id3(read_le!(haystack, u32)) as u64,
1821 OffsetType::Id3Be => decode_id3(read_be!(haystack, u32)) as u64,
1822 OffsetType::LongLe => {
1823 if self.signed {
1824 read_le!(haystack, i32) as u64
1825 } else {
1826 read_le!(haystack, u32) as u64
1827 }
1828 }
1829 OffsetType::LongBe => {
1830 if self.signed {
1831 read_be!(haystack, i32) as u64
1832 } else {
1833 read_be!(haystack, u32) as u64
1834 }
1835 }
1836 OffsetType::Middle => read_me!(haystack) as u64,
1837 OffsetType::Octal => {
1838 if let Some(o) = read_octal_u64(haystack) {
1839 o
1840 } else {
1841 debug!("failed to read octal offset @ {offset_address}");
1842 return Ok(None);
1843 }
1844 }
1845 OffsetType::QuadLe => {
1846 if self.signed {
1847 read_le!(haystack, i64) as u64
1848 } else {
1849 read_le!(haystack, u64)
1850 }
1851 }
1852 OffsetType::QuadBe => {
1853 if self.signed {
1854 read_be!(haystack, i64) as u64
1855 } else {
1856 read_be!(haystack, u64)
1857 }
1858 }
1859 }
1860 };
1861 }
1862
1863 let o = read_value!();
1865
1866 trace!(
1867 "offset read @ {offset_address} value={o} op={:?} shift={:?}",
1868 self.op, self.shift
1869 );
1870
1871 if let (Some(op), Some(shift)) = (self.op, self.shift) {
1873 let shift = match shift {
1874 Shift::Direct(i) => i,
1875 Shift::Indirect(i) => {
1876 let tmp = offset_address as i128 + i as i128;
1877 if tmp.is_negative() {
1878 return Ok(None);
1879 } else {
1880 haystack.seek(SeekFrom::Start(tmp as u64))?;
1881 };
1882 read_value!()
1885 }
1886 };
1887
1888 match op {
1889 Op::Add => return Ok(o.checked_add(shift)),
1890 Op::Mul => return Ok(o.checked_mul(shift)),
1891 Op::Sub => return Ok(o.checked_sub(shift)),
1892 Op::Div => return Ok(o.checked_div(shift)),
1893 Op::Mod => return Ok(o.checked_rem(shift)),
1894 Op::And => return Ok(Some(o & shift)),
1895 Op::Or => return Ok(Some(o | shift)),
1896 Op::Xor => return Ok(Some(o ^ shift)),
1897 }
1898 }
1899
1900 Ok(Some(o))
1901 }
1902}
1903
1904#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
1905enum DirOffset {
1906 Start(u64),
1907 LastUpper(i64),
1909 End(i64),
1910}
1911
1912#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
1913enum Offset {
1914 Direct(DirOffset),
1915 Indirect(IndOffset),
1916}
1917
1918impl From<DirOffset> for Offset {
1919 fn from(value: DirOffset) -> Self {
1920 Self::Direct(value)
1921 }
1922}
1923
1924impl From<IndOffset> for Offset {
1925 fn from(value: IndOffset) -> Self {
1926 Self::Indirect(value)
1927 }
1928}
1929
1930impl Display for DirOffset {
1931 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1932 match self {
1933 DirOffset::Start(i) => write!(f, "{i}"),
1934 DirOffset::LastUpper(c) => write!(f, "&{c}"),
1935 DirOffset::End(e) => write!(f, "-{e}"),
1936 }
1937 }
1938}
1939
1940impl Default for DirOffset {
1941 fn default() -> Self {
1942 Self::LastUpper(0)
1943 }
1944}
1945
1946#[derive(Debug, Clone, Serialize, Deserialize)]
1947struct Match {
1948 line: usize,
1949 depth: u8,
1950 offset: Offset,
1951 test: Test,
1952 test_strength: u64,
1953 message: Option<Message>,
1954}
1955
1956impl From<Use> for Match {
1957 fn from(value: Use) -> Self {
1958 let test = Test::Use(value.switch_endianness, value.rule_name);
1959 let test_strength = test.strength();
1960 Self {
1961 line: value.line,
1962 depth: value.depth,
1963 offset: value.start_offset,
1964 test,
1965 test_strength,
1966 message: value.message,
1967 }
1968 }
1969}
1970
1971impl From<Name> for Match {
1972 fn from(value: Name) -> Self {
1973 let test = Test::Name(value.name);
1974 let test_strength = test.strength();
1975 Self {
1976 line: value.line,
1977 depth: 0,
1978 offset: Offset::Direct(DirOffset::Start(0)),
1979 test,
1980 test_strength,
1981 message: value.message,
1982 }
1983 }
1984}
1985
1986impl Match {
1987 #[inline(always)]
1989 fn offset_from_start<R: Read + Seek>(
1990 &self,
1991 haystack: &mut LazyCache<R>,
1992 rule_base_offset: Option<u64>,
1993 last_level_offset: Option<u64>,
1994 ) -> Result<Option<u64>, io::Error> {
1995 match self.offset {
1996 Offset::Direct(dir_offset) => match dir_offset {
1997 DirOffset::Start(s) => Ok(Some(s)),
1998 DirOffset::LastUpper(shift) => {
1999 let o = last_level_offset.unwrap_or_default() as i64 + shift;
2000
2001 if o >= 0 { Ok(Some(o as u64)) } else { Ok(None) }
2002 }
2003 DirOffset::End(e) => Ok(Some(haystack.offset_from_start(SeekFrom::End(e)))),
2004 },
2005 Offset::Indirect(ind_offset) => {
2006 let Some(o) =
2007 ind_offset.read_offset(haystack, rule_base_offset, last_level_offset)?
2008 else {
2009 return Ok(None);
2010 };
2011
2012 Ok(Some(o))
2013 }
2014 }
2015 }
2016
2017 #[inline]
2030 #[allow(clippy::too_many_arguments)]
2031 fn matches<'a: 'h, 'h, R: Read + Seek>(
2032 &'a self,
2033 source: Option<&str>,
2034 magic: &mut Magic<'a>,
2035 stream_kind: StreamKind,
2036 state: &mut MatchState,
2037 buf_base_offset: Option<u64>,
2038 rule_base_offset: Option<u64>,
2039 last_level_offset: Option<u64>,
2040 haystack: &'h mut LazyCache<R>,
2041 switch_endianness: bool,
2042 db: &'a MagicDb,
2043 depth: usize,
2044 ) -> Result<(bool, Option<MatchRes<'h>>), Error> {
2045 let source = source.unwrap_or("unknown");
2046 let line = self.line;
2047
2048 if depth >= MAX_RECURSION {
2049 return Err(Error::localized(
2050 source,
2051 line,
2052 Error::MaximumRecursion(MAX_RECURSION),
2053 ));
2054 }
2055
2056 if self.test.is_only_binary() && stream_kind.is_text() {
2057 trace!("skip binary test source={source} line={line} stream_kind={stream_kind:?}",);
2058 return Ok((false, None));
2059 }
2060
2061 if self.test.is_only_text() && !stream_kind.is_text() {
2062 trace!("skip text test source={source} line={line} stream_kind={stream_kind:?}",);
2063 return Ok((false, None));
2064 }
2065
2066 let Ok(Some(mut offset)) = self
2067 .offset_from_start(haystack, rule_base_offset, last_level_offset)
2068 .inspect_err(|e| debug!("source={source} line={line} failed at computing offset: {e}"))
2069 else {
2070 return Ok((false, None));
2071 };
2072
2073 offset = match self.offset {
2074 Offset::Indirect(_) => {
2075 buf_base_offset.unwrap_or_default().saturating_add(offset)
2080 }
2081 Offset::Direct(DirOffset::Start(_)) => {
2083 rule_base_offset.unwrap_or_default().saturating_add(offset)
2084 }
2085 _ => offset,
2086 };
2087
2088 match &self.test {
2089 Test::Clear => {
2090 trace!("source={source} line={line} clear");
2091 state.clear_continuation_level(&self.continuation_level());
2092 Ok((true, None))
2093 }
2094
2095 Test::Name(name) => {
2096 trace!(
2097 "source={source} line={line} running rule {name} switch_endianness={switch_endianness}",
2098 );
2099 Ok((true, None))
2100 }
2101
2102 Test::Use(flip_endianness, rule_name) => {
2103 trace!(
2104 "source={source} line={line} use {rule_name} switch_endianness={flip_endianness}",
2105 );
2106
2107 let switch_endianness = switch_endianness ^ flip_endianness;
2109
2110 let dr: &DependencyRule = db.dependencies.get(rule_name).ok_or(
2111 Error::localized(source, line, Error::MissingRule(rule_name.clone())),
2112 )?;
2113
2114 if let Some(msg) = self.message.as_ref() {
2116 magic.push_message(msg.to_string_lossy());
2117 }
2118
2119 dr.rule.magic(
2120 magic,
2121 stream_kind,
2122 buf_base_offset,
2123 Some(offset),
2124 haystack,
2125 db,
2126 switch_endianness,
2127 depth.saturating_add(1),
2128 )?;
2129
2130 Ok((false, None))
2132 }
2133
2134 Test::Indirect(m) => {
2135 trace!(
2136 "source={source} line={line} indirect mods={:?} offset={offset:#x}",
2137 m
2138 );
2139
2140 let new_buf_base_off = if m.contains(IndirectMod::Relative) {
2141 Some(offset)
2142 } else {
2143 None
2144 };
2145
2146 if let Some(msg) = self.message.as_ref() {
2148 magic.push_message(msg.to_string_lossy());
2149 }
2150
2151 for r in db.rules.iter() {
2152 let messages_cnt = magic.message.len();
2153
2154 r.magic(
2155 magic,
2156 stream_kind,
2157 new_buf_base_off,
2158 Some(offset),
2159 haystack,
2160 db,
2161 false,
2162 depth.saturating_add(1),
2163 )?;
2164
2165 if magic.message.len() != messages_cnt {
2167 break;
2168 }
2169 }
2170
2171 Ok((false, None))
2173 }
2174
2175 Test::Default => {
2176 let ok = !state.get_continuation_level(&self.continuation_level());
2178
2179 trace!("source={source} line={line} default match={ok}");
2180 if ok {
2181 state.set_continuation_level(self.continuation_level());
2182 }
2183
2184 Ok((ok, None))
2185 }
2186
2187 _ => {
2188 if let Err(e) = haystack.seek(SeekFrom::Start(offset)) {
2189 debug!("source={source} line={line} failed to seek in haystack: {e}");
2190 return Ok((false, None));
2191 }
2192
2193 let mut trace_msg = None;
2194
2195 if enabled!(Level::DEBUG) {
2196 trace_msg = Some(vec![format!(
2197 "source={source} line={line} depth={} stream_offset={:#x}",
2198 self.depth,
2199 haystack.lazy_stream_position()
2200 )])
2201 }
2202
2203 if let Ok(opt_test_value) = self
2207 .test
2208 .read_test_value(haystack, switch_endianness)
2209 .inspect_err(|e| {
2210 debug!("source={source} line={line} error while reading test value @{offset}: {e}",)
2211 })
2212 {
2213 if let Some(v) = trace_msg
2214 .as_mut() { v.push(format!("test={:?}", self.test)) }
2215
2216 let match_res =
2217 opt_test_value.and_then(|tv| self.test.match_value(&tv, stream_kind));
2218
2219 if let Some(v) = trace_msg.as_mut() { v.push(format!(
2220 "message=\"{}\" match={}",
2221 self.message
2222 .as_ref()
2223 .map(|fs| fs.to_string_lossy())
2224 .unwrap_or_default(),
2225 match_res.is_some()
2226 )) }
2227
2228 if enabled!(Level::DEBUG) && !enabled!(Level::TRACE) && match_res.is_some() {
2230 if let Some(m) = trace_msg{
2231 debug!("{}", m.join(" "));
2232 }
2233 } else if enabled!(Level::TRACE)
2234 && let Some(m) = trace_msg{
2235 trace!("{}", m.join(" "));
2236 }
2237
2238 if let Some(mr) = match_res {
2239 state.set_continuation_level(self.continuation_level());
2240 return Ok((true, Some(mr)));
2241 }
2242 }
2243
2244 Ok((false, None))
2245 }
2246 }
2247 }
2248
2249 #[inline(always)]
2250 fn continuation_level(&self) -> ContinuationLevel {
2251 ContinuationLevel(self.depth)
2252 }
2253}
2254
2255#[derive(Debug, Clone)]
2256struct Use {
2257 line: usize,
2258 depth: u8,
2259 start_offset: Offset,
2260 rule_name: String,
2261 switch_endianness: bool,
2262 message: Option<Message>,
2263}
2264
2265#[derive(Debug, Clone, Serialize, Deserialize)]
2266struct StrengthMod {
2267 op: Op,
2268 by: u8,
2269}
2270
2271impl StrengthMod {
2272 #[inline(always)]
2273 fn apply(&self, strength: u64) -> u64 {
2274 let by = self.by as u64;
2275 debug!("applying strength modifier: {strength} {} {}", self.op, by);
2276 match self.op {
2277 Op::Mul => strength.saturating_mul(by),
2278 Op::Add => strength.saturating_add(by),
2279 Op::Sub => strength.saturating_sub(by),
2280 Op::Div => {
2281 if by > 0 {
2282 strength.saturating_div(by)
2283 } else {
2284 strength
2285 }
2286 }
2287 Op::Mod => strength % by,
2288 Op::And => strength & by,
2289 Op::Xor | Op::Or => {
2292 debug_panic!("unsupported strength operator");
2293 strength
2294 }
2295 }
2296 }
2297}
2298
2299#[derive(Debug, Clone)]
2300enum Flag {
2301 Mime(String),
2302 Ext(HashSet<String>),
2303 Strength(StrengthMod),
2304 Apple(String),
2305}
2306
2307#[derive(Debug, Clone)]
2308struct Name {
2309 line: usize,
2310 name: String,
2311 message: Option<Message>,
2312}
2313
2314#[derive(Debug, Clone)]
2315enum Entry<'span> {
2316 Match(Span<'span>, Match),
2317 Flag(Span<'span>, Flag),
2318}
2319
2320#[derive(Debug, Clone, Serialize, Deserialize)]
2321struct EntryNode {
2322 root: bool,
2323 entry: Match,
2324 children: Vec<EntryNode>,
2325 mimetype: Option<String>,
2326 apple: Option<String>,
2327 strength_mod: Option<StrengthMod>,
2328 exts: HashSet<String>,
2329}
2330
2331impl EntryNode {
2332 fn update_exts_rec(
2333 &self,
2334 exts: &mut HashSet<String>,
2335 deps: &HashMap<String, DependencyRule>,
2336 marked: &mut HashSet<String>,
2337 ) -> Result<(), ()> {
2338 for ext in self.exts.iter() {
2339 if !exts.contains(ext) {
2340 exts.insert(ext.clone());
2341 }
2342 }
2343
2344 for c in self.children.iter() {
2345 if let Test::Use(_, ref name) = c.entry.test {
2346 if marked.contains(name) {
2347 continue;
2348 }
2349 if let Some(r) = deps.get(name) {
2350 marked.insert(name.clone());
2351 exts.extend(r.rule.fetch_all_extensions(deps, marked)?);
2352 } else {
2353 return Err(());
2354 }
2355 } else {
2356 c.update_exts_rec(exts, deps, marked)?;
2357 }
2358 }
2359
2360 Ok(())
2361 }
2362
2363 fn update_score_rec(
2364 &self,
2365 depth: usize,
2366 score: &mut u64,
2367 deps: &HashMap<String, DependencyRule>,
2368 marked: &mut HashSet<String>,
2369 ) {
2370 if depth == 3 {
2371 return;
2372 }
2373
2374 *score += self
2375 .children
2376 .iter()
2377 .map(|e| e.entry.test_strength)
2378 .min()
2379 .unwrap_or_default();
2380
2381 for c in self.children.iter() {
2382 if let Test::Use(_, ref name) = c.entry.test {
2383 if marked.contains(name) {
2384 continue;
2385 }
2386
2387 if let Some(r) = deps.get(name) {
2388 marked.insert(name.clone());
2389 *score += r.rule.compute_score(depth, deps, marked);
2390 }
2391 }
2392 c.update_score_rec(depth + 1, score, deps, marked);
2393 }
2394 }
2395
2396 #[inline]
2397 #[allow(clippy::too_many_arguments)]
2398 fn matches<'r, R: Read + Seek>(
2399 &'r self,
2400 opt_source: Option<&str>,
2401 magic: &mut Magic<'r>,
2402 state: &mut MatchState,
2403 stream_kind: StreamKind,
2404 buf_base_offset: Option<u64>,
2405 rule_base_offset: Option<u64>,
2406 last_level_offset: Option<u64>,
2407 haystack: &mut LazyCache<R>,
2408 db: &'r MagicDb,
2409 switch_endianness: bool,
2410 depth: usize,
2411 ) -> Result<(), Error> {
2412 let (ok, opt_match_res) = self.entry.matches(
2413 opt_source,
2414 magic,
2415 stream_kind,
2416 state,
2417 buf_base_offset,
2418 rule_base_offset,
2419 last_level_offset,
2420 haystack,
2421 switch_endianness,
2422 db,
2423 depth,
2424 )?;
2425
2426 let source = opt_source.unwrap_or("unknown");
2427 let line = self.entry.line;
2428
2429 if ok {
2430 if let Some(msg) = self.entry.message.as_ref()
2432 && let Ok(msg) = msg.format_with(opt_match_res.as_ref()).inspect_err(|e| {
2433 debug!("source={source} line={line} failed to format message: {e}")
2434 })
2435 {
2436 magic.push_message(msg);
2437 }
2438
2439 if let Some(mr) = opt_match_res {
2441 match &self.entry.test {
2442 Test::String(t) => {
2443 if t.has_length_mod() {
2444 let o = mr.end_offset();
2445 haystack.seek(SeekFrom::Start(o))?;
2446 }
2447 }
2448 Test::Search(t) => {
2449 if t.re_mods.contains(ReMod::StartOffsetUpdate) {
2450 let o = mr.start_offset();
2451 haystack.seek(SeekFrom::Start(o))?;
2452 } else {
2453 let o = mr.end_offset();
2454 haystack.seek(SeekFrom::Start(o))?;
2455 }
2456 }
2457
2458 Test::Regex(t) => {
2459 if t.mods.contains(ReMod::StartOffsetUpdate) {
2460 let o = mr.start_offset();
2461 haystack.seek(SeekFrom::Start(o))?;
2462 } else {
2463 let o = mr.end_offset();
2464 haystack.seek(SeekFrom::Start(o))?;
2465 }
2466 }
2467 _ => {}
2469 }
2470 }
2471
2472 if let Some(mimetype) = self.mimetype.as_ref() {
2473 magic.set_mime_type(Cow::Borrowed(mimetype));
2474 }
2475
2476 if let Some(apple_ty) = self.apple.as_ref() {
2477 magic.set_creator_code(Cow::Borrowed(apple_ty));
2478 }
2479
2480 if !self.exts.is_empty() {
2481 magic.insert_extensions(self.exts.iter().map(|s| s.as_str()));
2482 }
2483
2484 let mut strength = self.entry.test_strength;
2488
2489 let continuation_level = self.entry.continuation_level().0 as u64;
2490 if self.entry.message.is_none() && continuation_level < 3 {
2491 strength = strength.saturating_add(continuation_level);
2492 }
2493
2494 if let Some(sm) = self.strength_mod.as_ref() {
2495 strength = sm.apply(strength);
2496 }
2497
2498 if self.entry.message.is_none() {
2500 strength += 1
2501 }
2502
2503 magic.update_strength(strength);
2504
2505 let end_upper_level = haystack.lazy_stream_position();
2506
2507 let rule_base_offset = if self.root {
2515 match self.entry.offset {
2516 Offset::Direct(DirOffset::End(o)) => {
2517 Some(haystack.offset_from_start(SeekFrom::End(o)))
2518 }
2519 _ => rule_base_offset,
2520 }
2521 } else {
2522 rule_base_offset
2523 };
2524
2525 for e in self.children.iter() {
2526 e.matches(
2527 opt_source,
2528 magic,
2529 state,
2530 stream_kind,
2531 buf_base_offset,
2532 rule_base_offset,
2533 Some(end_upper_level),
2534 haystack,
2535 db,
2536 switch_endianness,
2537 depth,
2538 )?
2539 }
2540 }
2541
2542 Ok(())
2543 }
2544}
2545
2546#[derive(Debug, Clone, Serialize, Deserialize)]
2548pub struct MagicRule {
2549 id: usize,
2550 source: Option<String>,
2551 entries: EntryNode,
2552 extensions: HashSet<String>,
2553 score: u64,
2555 finalized: bool,
2556}
2557
2558impl MagicRule {
2559 #[inline(always)]
2560 fn set_id(&mut self, id: usize) {
2561 self.id = id
2562 }
2563
2564 fn fetch_all_extensions(
2568 &self,
2569 deps: &HashMap<String, DependencyRule>,
2570 marked: &mut HashSet<String>,
2571 ) -> Result<HashSet<String>, ()> {
2572 let mut exts = HashSet::new();
2573 self.entries.update_exts_rec(&mut exts, deps, marked)?;
2574 Ok(exts)
2575 }
2576
2577 fn compute_score(
2580 &self,
2581 depth: usize,
2582 deps: &HashMap<String, DependencyRule>,
2583 marked: &mut HashSet<String>,
2584 ) -> u64 {
2585 let mut score = 0;
2586 score += self.entries.entry.test_strength;
2587 self.entries
2588 .update_score_rec(depth, &mut score, deps, marked);
2589 score
2590 }
2591
2592 fn try_finalize(&mut self, deps: &HashMap<String, DependencyRule>) {
2595 if self.finalized {
2596 return;
2597 }
2598
2599 let Ok(exts) = self.fetch_all_extensions(deps, &mut HashSet::new()) else {
2600 return;
2601 };
2602
2603 self.extensions.extend(exts);
2604
2605 self.score = self.compute_score(0, deps, &mut HashSet::new());
2609 self.finalized = true
2610 }
2611
2612 #[inline]
2613 fn magic_entrypoint<'r, R: Read + Seek>(
2614 &'r self,
2615 magic: &mut Magic<'r>,
2616 stream_kind: StreamKind,
2617 haystack: &mut LazyCache<R>,
2618 db: &'r MagicDb,
2619 switch_endianness: bool,
2620 depth: usize,
2621 ) -> Result<(), Error> {
2622 self.entries.matches(
2623 self.source.as_deref(),
2624 magic,
2625 &mut MatchState::empty(),
2626 stream_kind,
2627 None,
2628 None,
2629 None,
2630 haystack,
2631 db,
2632 switch_endianness,
2633 depth,
2634 )
2635 }
2636
2637 #[inline]
2638 #[allow(clippy::too_many_arguments)]
2639 fn magic<'r, R: Read + Seek>(
2640 &'r self,
2641 magic: &mut Magic<'r>,
2642 stream_kind: StreamKind,
2643 buf_base_offset: Option<u64>,
2644 rule_base_offset: Option<u64>,
2645 haystack: &mut LazyCache<R>,
2646 db: &'r MagicDb,
2647 switch_endianness: bool,
2648 depth: usize,
2649 ) -> Result<(), Error> {
2650 self.entries.matches(
2651 self.source.as_deref(),
2652 magic,
2653 &mut MatchState::empty(),
2654 stream_kind,
2655 buf_base_offset,
2656 rule_base_offset,
2657 None,
2658 haystack,
2659 db,
2660 switch_endianness,
2661 depth,
2662 )
2663 }
2664
2665 pub fn is_text(&self) -> bool {
2671 self.entries.entry.test.is_text()
2672 && self.entries.children.iter().all(|e| e.entry.test.is_text())
2673 }
2674
2675 #[inline(always)]
2681 pub fn score(&self) -> u64 {
2682 self.score
2683 }
2684
2685 #[inline(always)]
2691 pub fn source(&self) -> Option<&str> {
2692 self.source.as_deref()
2693 }
2694
2695 #[inline(always)]
2701 pub fn line(&self) -> usize {
2702 self.entries.entry.line
2703 }
2704
2705 #[inline(always)]
2711 pub fn extensions(&self) -> &HashSet<String> {
2712 &self.extensions
2713 }
2714}
2715
2716#[derive(Debug, Clone, Serialize, Deserialize)]
2717struct DependencyRule {
2718 name: String,
2719 rule: MagicRule,
2720}
2721
2722#[derive(Debug, Clone, Serialize, Deserialize)]
2728pub struct MagicSource {
2729 rules: Vec<MagicRule>,
2730 dependencies: HashMap<String, DependencyRule>,
2731}
2732
2733impl MagicSource {
2734 pub fn open<P: AsRef<Path>>(p: P) -> Result<Self, Error> {
2744 FileMagicParser::parse_file(p)
2745 }
2746}
2747
2748#[derive(Debug, Hash, PartialEq, Eq, Clone, Copy)]
2749struct ContinuationLevel(u8);
2750
2751#[derive(Debug, PartialEq, Eq, Clone, Copy)]
2753enum TextEncoding {
2754 Ascii,
2755 Utf8,
2756 Unknown,
2757}
2758
2759impl TextEncoding {
2760 const fn as_magic_str(&self) -> &'static str {
2761 match self {
2762 TextEncoding::Ascii => "ASCII",
2763 TextEncoding::Utf8 => "UTF-8",
2764 TextEncoding::Unknown => "Unknown",
2765 }
2766 }
2767}
2768
2769#[derive(Debug, PartialEq, Eq, Clone, Copy)]
2770enum StreamKind {
2771 Binary,
2772 Text(TextEncoding),
2773}
2774
2775impl StreamKind {
2776 const fn is_text(&self) -> bool {
2777 matches!(self, StreamKind::Text(_))
2778 }
2779}
2780
2781#[derive(Debug)]
2782struct MatchState {
2783 continuation_levels: [bool; 256],
2784}
2785
2786impl MatchState {
2787 #[inline(always)]
2788 fn empty() -> Self {
2789 MatchState {
2790 continuation_levels: [false; 256],
2791 }
2792 }
2793
2794 #[inline(always)]
2795 fn get_continuation_level(&mut self, level: &ContinuationLevel) -> bool {
2796 self.continuation_levels
2797 .get(level.0 as usize)
2798 .cloned()
2799 .unwrap_or_default()
2800 }
2801
2802 #[inline(always)]
2803 fn set_continuation_level(&mut self, level: ContinuationLevel) {
2804 if let Some(b) = self.continuation_levels.get_mut(level.0 as usize) {
2805 *b = true
2806 }
2807 }
2808
2809 #[inline(always)]
2810 fn clear_continuation_level(&mut self, level: &ContinuationLevel) {
2811 if let Some(b) = self.continuation_levels.get_mut(level.0 as usize) {
2812 *b = false;
2813 }
2814 }
2815}
2816
2817#[derive(Debug, Default)]
2819pub struct Magic<'m> {
2820 stream_kind: Option<StreamKind>,
2821 source: Option<Cow<'m, str>>,
2822 message: Vec<Cow<'m, str>>,
2823 mime_type: Option<Cow<'m, str>>,
2824 creator_code: Option<Cow<'m, str>>,
2825 strength: u64,
2826 exts: HashSet<Cow<'m, str>>,
2827 is_default: bool,
2828}
2829
2830impl<'m> Magic<'m> {
2831 #[inline(always)]
2832 fn set_source(&mut self, source: Option<&'m str>) {
2833 self.source = source.map(Cow::Borrowed);
2834 }
2835
2836 #[inline(always)]
2837 fn set_stream_kind(&mut self, stream_kind: StreamKind) {
2838 self.stream_kind = Some(stream_kind)
2839 }
2840
2841 #[inline(always)]
2842 fn reset(&mut self) {
2843 self.stream_kind = None;
2844 self.source = None;
2845 self.message.clear();
2846 self.mime_type = None;
2847 self.creator_code = None;
2848 self.strength = 0;
2849 self.exts.clear();
2850 self.is_default = false;
2851 }
2852
2853 #[inline]
2861 pub fn into_owned<'owned>(self) -> Magic<'owned> {
2862 Magic {
2863 stream_kind: self.stream_kind,
2864 source: self.source.map(|s| Cow::Owned(s.into_owned())),
2865 message: self
2866 .message
2867 .into_iter()
2868 .map(Cow::into_owned)
2869 .map(Cow::Owned)
2870 .collect(),
2871 mime_type: self.mime_type.map(|m| Cow::Owned(m.into_owned())),
2872 creator_code: self.creator_code.map(|m| Cow::Owned(m.into_owned())),
2873 strength: self.strength,
2874 exts: self
2875 .exts
2876 .into_iter()
2877 .map(|e| Cow::Owned(e.into_owned()))
2878 .collect(),
2879 is_default: self.is_default,
2880 }
2881 }
2882
2883 #[inline(always)]
2889 pub fn message(&self) -> String {
2890 let mut out = String::new();
2891 for (i, m) in self.message.iter().enumerate() {
2892 if let Some(s) = m.strip_prefix(r#"\b"#) {
2893 out.push_str(s);
2894 } else {
2895 if i > 0 {
2897 out.push(' ');
2898 }
2899 out.push_str(m);
2900 }
2901 }
2902 out
2903 }
2904
2905 #[inline]
2916 pub fn message_parts(&self) -> impl Iterator<Item = &str> {
2917 self.message.iter().map(|p| p.as_ref())
2918 }
2919
2920 #[inline(always)]
2921 fn update_strength(&mut self, value: u64) {
2922 self.strength = self.strength.saturating_add(value);
2923 debug!("updated strength = {:?}", self.strength)
2924 }
2925
2926 #[inline(always)]
2932 pub fn mime_type(&self) -> &str {
2933 self.mime_type.as_deref().unwrap_or(match self.stream_kind {
2934 Some(StreamKind::Text(_)) => DEFAULT_TEXT_MIMETYPE,
2935 Some(StreamKind::Binary) | None => DEFAULT_BIN_MIMETYPE,
2936 })
2937 }
2938
2939 #[inline(always)]
2940 fn push_message<'a: 'm>(&mut self, msg: Cow<'a, str>) {
2941 if !msg.is_empty() {
2942 debug!("pushing message: msg={msg} len={}", msg.len());
2943 self.message.push(msg);
2944 }
2945 }
2946
2947 #[inline(always)]
2948 fn set_mime_type<'a: 'm>(&mut self, mime: Cow<'a, str>) {
2949 if self.mime_type.is_none() {
2950 debug!("insert mime: {:?}", mime);
2951 self.mime_type = Some(mime)
2952 }
2953 }
2954
2955 #[inline(always)]
2956 fn set_creator_code<'a: 'm>(&mut self, apple_ty: Cow<'a, str>) {
2957 if self.creator_code.is_none() {
2958 debug!("insert apple type: {apple_ty:?}");
2959 self.creator_code = Some(apple_ty)
2960 }
2961 }
2962
2963 #[inline(always)]
2964 fn insert_extensions<'a: 'm, I: Iterator<Item = &'a str>>(&mut self, exts: I) {
2965 if self.exts.is_empty() {
2966 self.exts.extend(exts.filter_map(|e| {
2967 if e.is_empty() {
2968 None
2969 } else {
2970 Some(Cow::Borrowed(e))
2971 }
2972 }));
2973 }
2974 }
2975
2976 #[inline(always)]
2984 pub fn strength(&self) -> u64 {
2985 self.strength
2986 }
2987
2988 #[inline(always)]
2994 pub fn source(&self) -> Option<&str> {
2995 self.source.as_deref()
2996 }
2997
2998 #[inline(always)]
3004 pub fn creator_code(&self) -> Option<&str> {
3005 self.creator_code.as_deref()
3006 }
3007
3008 #[inline(always)]
3014 pub fn extensions(&self) -> &HashSet<Cow<'m, str>> {
3015 &self.exts
3016 }
3017
3018 #[inline(always)]
3024 pub fn is_default(&self) -> bool {
3025 self.is_default
3026 }
3027}
3028
3029#[derive(Debug, Default, Clone, Serialize, Deserialize)]
3031pub struct MagicDb {
3032 rule_id: usize,
3033 rules: Vec<MagicRule>,
3034 dependencies: HashMap<String, DependencyRule>,
3035}
3036
3037#[inline(always)]
3038fn is_likely_text(bytes: &[u8]) -> bool {
3040 const CHUNK_SIZE: usize = std::mem::size_of::<usize>();
3041
3042 if bytes.is_empty() {
3043 return false;
3044 }
3045
3046 let mut printable = 0f64;
3047 let mut high_bytes = 0f64; let (chunks, remainder) = bytes.as_chunks::<CHUNK_SIZE>();
3050
3051 macro_rules! handle_byte {
3052 ($byte: expr) => {
3053 match $byte {
3054 0x00 => return false,
3055 0x09 | 0x0A | 0x0D => printable += 1.0, 0x20..=0x7E => printable += 1.0, _ => high_bytes += 1.0,
3058 }
3059 };
3060 }
3061
3062 for bytes in chunks {
3063 for b in bytes {
3064 handle_byte!(b)
3065 }
3066 }
3067
3068 for b in remainder {
3069 handle_byte!(b)
3070 }
3071
3072 let total = bytes.len() as f64;
3073 let printable_ratio = printable / total;
3074 let high_bytes_ratio = high_bytes / total;
3075
3076 printable_ratio > 0.85 && high_bytes_ratio < 0.20
3078}
3079
3080#[inline(always)]
3081fn guess_stream_kind<S: AsRef<[u8]>>(stream: S) -> StreamKind {
3082 let buf = stream.as_ref();
3083
3084 match run_utf8_validation(buf) {
3085 Ok(is_ascii) => {
3086 if is_ascii {
3087 StreamKind::Text(TextEncoding::Ascii)
3088 } else {
3089 StreamKind::Text(TextEncoding::Utf8)
3090 }
3091 }
3092 Err(e) => {
3093 if is_likely_text(&buf[e.valid_up_to..]) {
3094 StreamKind::Text(TextEncoding::Unknown)
3095 } else {
3096 StreamKind::Binary
3097 }
3098 }
3099 }
3100}
3101
3102impl MagicDb {
3103 fn open_reader<R: Read + Seek>(f: R) -> Result<LazyCache<R>, Error> {
3104 Ok(LazyCache::<R>::from_read_seek(f)
3105 .and_then(|lc| lc.with_hot_cache(2 * FILE_BYTES_MAX))?)
3106 .map(|lc| lc.with_warm_cache(100 << 20))
3107 }
3108
3109 pub fn new() -> Self {
3115 Self::default()
3116 }
3117
3118 #[inline(always)]
3119 fn next_rule_id(&mut self) -> usize {
3120 let t = self.rule_id;
3121 self.rule_id += 1;
3122 t
3123 }
3124
3125 #[inline(always)]
3126 fn try_json<R: Read + Seek>(
3127 haystack: &mut LazyCache<R>,
3128 stream_kind: StreamKind,
3129 magic: &mut Magic,
3130 ) -> Result<bool, Error> {
3131 if matches!(stream_kind, StreamKind::Binary) {
3133 return Ok(false);
3134 }
3135
3136 let buf = haystack.read_range(0..FILE_BYTES_MAX as u64)?.trim_ascii();
3137
3138 let Some((start, end)) = find_json_boundaries(buf) else {
3139 return Ok(false);
3140 };
3141
3142 for c in buf[0..start].iter() {
3145 if !c.is_ascii_whitespace() {
3146 return Ok(false);
3147 }
3148 }
3149
3150 let mut is_ndjson = false;
3151
3152 trace!("maybe a json document");
3153 let ok = serde_json::from_slice::<serde_json::Value>(&buf[start..=end]).is_ok();
3154 if !ok {
3155 return Ok(false);
3156 }
3157
3158 if end + 1 < buf.len() {
3160 let buf = &buf[end + 1..];
3162 if let Some((second_start, second_end)) = find_json_boundaries(buf) {
3163 if memchr(b'\n', &buf[..second_start]).is_some() {
3165 trace!("might be ndjson");
3166 is_ndjson = serde_json::from_slice::<serde_json::Value>(
3167 &buf[second_start..=second_end],
3168 )
3169 .is_ok();
3170 }
3171 }
3172 }
3173
3174 if is_ndjson {
3175 magic.push_message(Cow::Borrowed("New Line Delimited"));
3176 magic.set_mime_type(Cow::Borrowed("application/x-ndjson"));
3177 magic.insert_extensions(["ndjson", "jsonl"].into_iter());
3178 } else {
3179 magic.set_mime_type(Cow::Borrowed("application/json"));
3180 magic.insert_extensions(["json"].into_iter());
3181 }
3182
3183 magic.push_message(Cow::Borrowed("JSON text data"));
3184 magic.set_source(Some(HARDCODED_SOURCE));
3185 magic.update_strength(HARDCODED_MAGIC_STRENGTH);
3186 Ok(true)
3187 }
3188
3189 #[inline(always)]
3190 fn try_csv<R: Read + Seek>(
3191 haystack: &mut LazyCache<R>,
3192 stream_kind: StreamKind,
3193 magic: &mut Magic,
3194 ) -> Result<bool, Error> {
3195 let StreamKind::Text(enc) = stream_kind else {
3197 return Ok(false);
3198 };
3199
3200 let buf = haystack.read_range(0..FILE_BYTES_MAX as u64)?;
3201 let mut reader = csv::Reader::from_reader(io::Cursor::new(buf));
3202 let mut records = reader.records();
3203
3204 let Some(Ok(first)) = records.next() else {
3205 return Ok(false);
3206 };
3207
3208 if first.len() <= 1 {
3212 return Ok(false);
3213 }
3214
3215 let mut n = 1;
3217 for i in records.take(9) {
3218 if let Ok(rec) = i {
3219 if first.len() != rec.len() {
3220 return Ok(false);
3221 }
3222 } else {
3223 return Ok(false);
3224 }
3225 n += 1;
3226 }
3227
3228 if n != 10 {
3230 return Ok(false);
3231 }
3232
3233 magic.set_mime_type(Cow::Borrowed("text/csv"));
3234 magic.push_message(Cow::Borrowed("CSV"));
3235 magic.push_message(Cow::Borrowed(enc.as_magic_str()));
3236 magic.push_message(Cow::Borrowed("text"));
3237 magic.insert_extensions(["csv"].into_iter());
3238 magic.set_source(Some(HARDCODED_SOURCE));
3239 magic.update_strength(HARDCODED_MAGIC_STRENGTH);
3240 Ok(true)
3241 }
3242
3243 #[inline(always)]
3244 fn try_tar<R: Read + Seek>(
3245 haystack: &mut LazyCache<R>,
3246 stream_kind: StreamKind,
3247 magic: &mut Magic,
3248 ) -> Result<bool, Error> {
3249 if !matches!(stream_kind, StreamKind::Binary) {
3251 return Ok(false);
3252 }
3253
3254 let buf = haystack.read_range(0..FILE_BYTES_MAX as u64)?;
3255 let mut ar = Archive::new(io::Cursor::new(buf));
3256
3257 let Ok(mut entries) = ar.entries() else {
3258 return Ok(false);
3259 };
3260
3261 let Some(Ok(first)) = entries.next() else {
3262 return Ok(false);
3263 };
3264
3265 let header = first.header();
3266
3267 if header.as_ustar().is_some() {
3268 magic.push_message(Cow::Borrowed("POSIX tar archive"));
3269 } else if header.as_gnu().is_some() {
3270 magic.push_message(Cow::Borrowed("POSIX tar archive (GNU)"));
3271 } else {
3272 magic.push_message(Cow::Borrowed("tar archive"));
3273 }
3274
3275 magic.set_mime_type(Cow::Borrowed("application/x-tar"));
3276 magic.set_source(Some(HARDCODED_SOURCE));
3277 magic.update_strength(HARDCODED_MAGIC_STRENGTH);
3278 magic.insert_extensions(["tar"].into_iter());
3279 Ok(true)
3280 }
3281
3282 #[inline(always)]
3283 fn try_hard_magic<R: Read + Seek>(
3284 haystack: &mut LazyCache<R>,
3285 stream_kind: StreamKind,
3286 magic: &mut Magic,
3287 ) -> Result<bool, Error> {
3288 Ok(Self::try_json(haystack, stream_kind, magic)?
3289 || Self::try_csv(haystack, stream_kind, magic)?
3290 || Self::try_tar(haystack, stream_kind, magic)?)
3291 }
3292
3293 #[inline(always)]
3294 fn magic_default<'m, R: Read + Seek>(
3295 haystack: &mut LazyCache<R>,
3296 stream_kind: StreamKind,
3297 magic: &mut Magic<'m>,
3298 ) -> Result<(), Error> {
3299 let buf = haystack.read_range(0..FILE_BYTES_MAX as u64)?;
3300
3301 magic.set_source(Some(HARDCODED_SOURCE));
3302 magic.set_stream_kind(stream_kind);
3303 magic.is_default = true;
3304
3305 if buf.is_empty() {
3306 magic.push_message(Cow::Borrowed("empty"));
3307 magic.set_mime_type(Cow::Borrowed(DEFAULT_BIN_MIMETYPE));
3308 return Ok(());
3309 }
3310
3311 match stream_kind {
3312 StreamKind::Binary => {
3313 magic.push_message(Cow::Borrowed("data"));
3314 }
3315 StreamKind::Text(e) => {
3316 magic.push_message(Cow::Borrowed(e.as_magic_str()));
3317 magic.push_message(Cow::Borrowed("text"));
3318 }
3319 }
3320
3321 Ok(())
3322 }
3323
3324 pub fn load(&mut self, mf: MagicSource) -> Result<&mut Self, Error> {
3334 for rule in mf.rules.into_iter() {
3335 let mut rule = rule;
3336 rule.set_id(self.next_rule_id());
3337
3338 self.rules.push(rule);
3339 }
3340
3341 self.dependencies.extend(mf.dependencies);
3342 self.prepare();
3343 Ok(self)
3344 }
3345
3346 pub fn rules(&self) -> &[MagicRule] {
3352 &self.rules
3353 }
3354
3355 #[inline]
3356 fn first_magic_with_stream_kind<R: Read + Seek>(
3357 &self,
3358 haystack: &mut LazyCache<R>,
3359 stream_kind: StreamKind,
3360 extension: Option<&str>,
3361 ) -> Result<Magic<'_>, Error> {
3362 let mut magic = Magic::default();
3364
3365 if Self::try_hard_magic(haystack, stream_kind, &mut magic)? {
3366 return Ok(magic);
3367 }
3368
3369 let mut marked = vec![false; self.rules.len()];
3370
3371 macro_rules! do_magic {
3372 ($rule: expr) => {{
3373 $rule.magic_entrypoint(&mut magic, stream_kind, haystack, &self, false, 0)?;
3374
3375 if !magic.message.is_empty() {
3376 magic.set_stream_kind(stream_kind);
3377 magic.set_source($rule.source.as_deref());
3378 return Ok(magic);
3379 }
3380
3381 magic.reset();
3382 }};
3383 }
3384
3385 if let Some(ext) = extension.map(|e| e.to_lowercase())
3386 && !ext.is_empty()
3387 {
3388 for rule in self.rules.iter().filter(|r| r.extensions.contains(&ext)) {
3389 do_magic!(rule);
3390 if let Some(f) = marked.get_mut(rule.id) {
3391 *f = true
3392 }
3393 }
3394 }
3395
3396 for rule in self
3397 .rules
3398 .iter()
3399 .filter(|r| !*marked.get(r.id).unwrap_or(&false))
3401 {
3402 do_magic!(rule)
3403 }
3404
3405 Self::magic_default(haystack, stream_kind, &mut magic)?;
3406
3407 Ok(magic)
3408 }
3409
3410 pub fn first_magic<R: Read + Seek>(
3424 &self,
3425 r: &mut R,
3426 extension: Option<&str>,
3427 ) -> Result<Magic<'_>, Error> {
3428 let mut haystack = Self::open_reader(r)?;
3429 let stream_kind = guess_stream_kind(haystack.read_range(0..FILE_BYTES_MAX as u64)?);
3430 self.first_magic_with_stream_kind(&mut haystack, stream_kind, extension)
3431 }
3432
3433 #[inline(always)]
3434 fn all_magics_sort_with_stream_kind<R: Read + Seek>(
3435 &self,
3436 haystack: &mut LazyCache<R>,
3437 stream_kind: StreamKind,
3438 ) -> Result<Vec<Magic<'_>>, Error> {
3439 let mut out = Vec::new();
3440
3441 let mut magic = Magic::default();
3442
3443 if Self::try_hard_magic(haystack, stream_kind, &mut magic)? {
3444 out.push(magic);
3445 magic = Magic::default();
3446 }
3447
3448 for rule in self.rules.iter() {
3449 rule.magic_entrypoint(&mut magic, stream_kind, haystack, self, false, 0)?;
3450
3451 if !magic.message.is_empty() {
3453 magic.set_stream_kind(stream_kind);
3454 magic.set_source(rule.source.as_deref());
3455 out.push(magic);
3456 magic = Magic::default();
3457 }
3458
3459 magic.reset();
3460 }
3461
3462 Self::magic_default(haystack, stream_kind, &mut magic)?;
3463 out.push(magic);
3464
3465 out.sort_by_key(|b| std::cmp::Reverse(b.strength()));
3466
3467 Ok(out)
3468 }
3469
3470 pub fn all_magics<R: Read + Seek>(&self, r: &mut R) -> Result<Vec<Magic<'_>>, Error> {
3480 let mut haystack = Self::open_reader(r)?;
3481 let stream_kind = guess_stream_kind(haystack.read_range(0..FILE_BYTES_MAX as u64)?);
3482 self.all_magics_sort_with_stream_kind(&mut haystack, stream_kind)
3483 }
3484
3485 #[inline(always)]
3486 fn best_magic_with_stream_kind<R: Read + Seek>(
3487 &self,
3488 haystack: &mut LazyCache<R>,
3489 stream_kind: StreamKind,
3490 ) -> Result<Magic<'_>, Error> {
3491 let magics = self.all_magics_sort_with_stream_kind(haystack, stream_kind)?;
3492
3493 return Ok(magics
3495 .into_iter()
3496 .next()
3497 .expect("magics must at least contain default"));
3498 }
3499
3500 pub fn best_magic<R: Read + Seek>(&self, r: &mut R) -> Result<Magic<'_>, Error> {
3510 let mut haystack = Self::open_reader(r)?;
3511 let stream_kind = guess_stream_kind(haystack.read_range(0..FILE_BYTES_MAX as u64)?);
3512 self.best_magic_with_stream_kind(&mut haystack, stream_kind)
3513 }
3514
3515 pub fn serialize<W: Write>(self, w: &mut W) -> Result<(), Error> {
3521 let mut encoder = GzEncoder::new(w, Compression::best());
3522
3523 bincode::serde::encode_into_std_write(&self, &mut encoder, bincode::config::standard())?;
3524 encoder.finish()?;
3525 Ok(())
3526 }
3527
3528 pub fn deserialize<R: Read>(r: &mut R) -> Result<Self, Error> {
3538 let mut buf = vec![];
3539 let mut gz = GzDecoder::new(r);
3540 gz.read_to_end(&mut buf).map_err(|e| {
3541 bincode::error::DecodeError::OtherString(format!("failed to read: {e}"))
3542 })?;
3543 let (sdb, _): (MagicDb, usize) =
3544 bincode::serde::decode_from_slice(&buf, bincode::config::standard())?;
3545 Ok(sdb)
3546 }
3547
3548 #[inline(always)]
3549 fn prepare(&mut self) {
3550 self.rules
3551 .iter_mut()
3552 .for_each(|r| r.try_finalize(&self.dependencies));
3553
3554 self.rules.sort_by_key(|r| (r.is_text(), -(r.score as i64)));
3556 }
3557}
3558
3559#[cfg(test)]
3560mod tests {
3561 use std::io::Cursor;
3562
3563 use regex::bytes::Regex;
3564
3565 use crate::utils::unix_local_time_to_string;
3566
3567 use super::*;
3568
3569 macro_rules! lazy_cache {
3570 ($l: literal) => {
3571 LazyCache::from_read_seek(Cursor::new($l)).unwrap()
3572 };
3573 }
3574
3575 fn first_magic(
3576 rule: &str,
3577 content: &[u8],
3578 stream_kind: StreamKind,
3579 ) -> Result<Magic<'static>, Error> {
3580 let mut md = MagicDb::new();
3581 md.load(
3582 FileMagicParser::parse_str(rule, None)
3583 .inspect_err(|e| eprintln!("{e}"))
3584 .unwrap(),
3585 )
3586 .unwrap();
3587 let mut reader = LazyCache::from_read_seek(Cursor::new(content)).unwrap();
3588 let v = md.best_magic_with_stream_kind(&mut reader, stream_kind)?;
3589 Ok(v.into_owned())
3590 }
3591
3592 #[allow(unused_macros)]
3594 macro_rules! enable_trace {
3595 () => {
3596 tracing_subscriber::fmt()
3597 .with_max_level(tracing_subscriber::filter::LevelFilter::TRACE)
3598 .try_init();
3599 };
3600 }
3601
3602 macro_rules! parse_assert {
3603 ($rule:literal) => {
3604 FileMagicParser::parse_str($rule, None)
3605 .inspect_err(|e| eprintln!("{e}"))
3606 .unwrap();
3607 };
3608 }
3609
3610 macro_rules! assert_magic_match_bin {
3611 ($rule: literal, $content:literal) => {{ first_magic($rule, $content, StreamKind::Binary).unwrap() }};
3612 ($rule: literal, $content:literal, $message:expr) => {{
3613 assert_eq!(
3614 first_magic($rule, $content, StreamKind::Binary)
3615 .unwrap()
3616 .message(),
3617 $message
3618 );
3619 }};
3620 }
3621
3622 macro_rules! assert_magic_match_text {
3623 ($rule: literal, $content:literal) => {{ first_magic($rule, $content, StreamKind::Text(TextEncoding::Utf8)).unwrap() }};
3624 ($rule: literal, $content:literal, $message:expr) => {{
3625 assert_eq!(
3626 first_magic($rule, $content, StreamKind::Text(TextEncoding::Utf8))
3627 .unwrap()
3628 .message(),
3629 $message
3630 );
3631 }};
3632 }
3633
3634 macro_rules! assert_magic_not_match_text {
3635 ($rule: literal, $content:literal) => {{
3636 assert!(
3637 first_magic($rule, $content, StreamKind::Text(TextEncoding::Utf8))
3638 .unwrap()
3639 .is_default()
3640 );
3641 }};
3642 }
3643
3644 macro_rules! assert_magic_not_match_bin {
3645 ($rule: literal, $content:literal) => {{
3646 assert!(
3647 first_magic($rule, $content, StreamKind::Binary)
3648 .unwrap()
3649 .is_default()
3650 );
3651 }};
3652 }
3653
3654 #[test]
3655 fn test_regex() {
3656 assert_magic_match_text!(
3657 r#"
36580 regex/1024 \^#![[:space:]]*/usr/bin/env[[:space:]]+
3659!:mime text/x-shellscript
3660>&0 regex/64 .*($|\\b) %s shell script text executable
3661 "#,
3662 br#"#!/usr/bin/env bash
3663 echo hello world"#,
3664 "bash shell script text executable"
3666 );
3667
3668 let re = Regex::new(r"(?-u)\x42\x82").unwrap();
3669 assert!(re.is_match(b"\x42\x82"));
3670
3671 assert_magic_match_bin!(
3672 r#"0 regex \x42\x82 binary regex match"#,
3673 b"\x00\x00\x00\x00\x00\x00\x42\x82"
3674 );
3675
3676 assert_magic_match_bin!(
3678 r#"
3679 0 regex \x42\x82
3680 >&0 string \xde\xad\xbe\xef it works
3681 "#,
3682 b"\x00\x00\x00\x00\x00\x00\x42\x82\xde\xad\xbe\xef"
3683 );
3684
3685 assert_magic_match_bin!(
3686 r#"
3687 0 regex/s \x42\x82
3688 >&0 string \x42\x82\xde\xad\xbe\xef it works
3689 "#,
3690 b"\x00\x00\x00\x00\x00\x00\x42\x82\xde\xad\xbe\xef"
3691 );
3692
3693 assert_magic_match_text!(
3695 r#"
36960 regex/1024 \^HelloWorld$ HelloWorld String"#,
3697 br#"
3698// this is a comment after an empty line
3699HelloWorld
3700 "#
3701 );
3702 }
3703
3704 #[test]
3705 fn test_string_with_mods() {
3706 assert_magic_match_text!(
3707 r#"0 string/w #!\ \ \ /usr/bin/env\ bash BASH
3708 "#,
3709 b"#! /usr/bin/env bash i
3710 echo hello world"
3711 );
3712
3713 assert_magic_match_text!(
3715 r#"0 string/C HelloWorld it works
3716 "#,
3717 b"helloworld"
3718 );
3719
3720 assert_magic_not_match_text!(
3721 r#"0 string/C HelloWorld it works
3722 "#,
3723 b"hELLOwORLD"
3724 );
3725
3726 assert_magic_match_text!(
3728 r#"0 string/c HelloWorld it works
3729 "#,
3730 b"HELLOWORLD"
3731 );
3732
3733 assert_magic_not_match_text!(
3734 r#"0 string/c HelloWorld it works
3735 "#,
3736 b"helloworld"
3737 );
3738
3739 assert_magic_match_text!(
3741 r#"0 string/f #!/usr/bin/env\ bash BASH
3742 "#,
3743 b"#!/usr/bin/env bash"
3744 );
3745
3746 assert_magic_not_match_text!(
3747 r#"0 string/f #!/usr/bin/python PYTHON"#,
3748 b"#!/usr/bin/pythonic"
3749 );
3750
3751 assert_magic_match_text!(
3753 r#"0 string/W #!/usr/bin/env\ python PYTHON"#,
3754 b"#!/usr/bin/env python"
3755 );
3756
3757 assert_magic_not_match_text!(
3758 r#"0 string/W #!/usr/bin/env\ \ python PYTHON"#,
3759 b"#!/usr/bin/env python"
3760 );
3761 }
3762
3763 #[test]
3764 fn test_search_with_mods() {
3765 assert_magic_match_text!(
3766 r#"0 search/1/fwt #!\ /usr/bin/luatex LuaTex script text executable"#,
3767 b"#! /usr/bin/luatex "
3768 );
3769
3770 assert_magic_match_text!(
3772 r#"
3773 0 search/s /usr/bin/env
3774 >&0 string /usr/bin/env it works
3775 "#,
3776 b"#!/usr/bin/env python"
3777 );
3778
3779 assert_magic_not_match_text!(
3780 r#"
3781 0 search /usr/bin/env
3782 >&0 string /usr/bin/env it works
3783 "#,
3784 b"#!/usr/bin/env python"
3785 );
3786 }
3787
3788 #[test]
3789 fn test_pstring() {
3790 assert_magic_match_bin!(r#"0 pstring Toast it works"#, b"\x05Toast");
3791
3792 assert_magic_match_bin!(r#"0 pstring Toast %s"#, b"\x05Toast", "Toast");
3793
3794 assert_magic_not_match_bin!(r#"0 pstring Toast Doesn't work"#, b"\x07Toaster");
3795
3796 assert_magic_match_bin!(r#"0 pstring/H Toast it works"#, b"\x00\x05Toast");
3798
3799 assert_magic_match_bin!(r#"0 pstring/HJ Toast it works"#, b"\x00\x07Toast");
3800
3801 assert_magic_match_bin!(r#"0 pstring/HJ Toast %s"#, b"\x00\x07Toast", "Toast");
3802
3803 assert_magic_match_bin!(r#"0 pstring/h Toast it works"#, b"\x05\x00Toast");
3804
3805 assert_magic_match_bin!(r#"0 pstring/hJ Toast it works"#, b"\x07\x00Toast");
3806
3807 assert_magic_match_bin!(r#"0 pstring/L Toast it works"#, b"\x00\x00\x00\x05Toast");
3808
3809 assert_magic_match_bin!(r#"0 pstring/LJ Toast it works"#, b"\x00\x00\x00\x09Toast");
3810
3811 assert_magic_match_bin!(r#"0 pstring/l Toast it works"#, b"\x05\x00\x00\x00Toast");
3812
3813 assert_magic_match_bin!(r#"0 pstring/lJ Toast it works"#, b"\x09\x00\x00\x00Toast");
3814 }
3815
3816 #[test]
3817 fn test_max_recursion() {
3818 let res = first_magic(
3819 r#"0 indirect x"#,
3820 b"#! /usr/bin/luatex ",
3821 StreamKind::Binary,
3822 );
3823 assert!(res.is_err());
3824 let _ = res.inspect_err(|e| {
3825 assert!(matches!(
3826 e.unwrap_localized(),
3827 Error::MaximumRecursion(MAX_RECURSION)
3828 ))
3829 });
3830 }
3831
3832 #[test]
3833 fn test_string_ops() {
3834 assert_magic_match_text!("0 string/b MZ MZ File", b"MZ\0");
3835 assert_magic_match_text!("0 string !MZ Not MZ File", b"AZ\0");
3836 assert_magic_match_text!("0 string >\0 Any String", b"A\0");
3837 assert_magic_match_text!("0 string >Test Any String", b"Test 1\0");
3838 assert_magic_match_text!("0 string <Test Any String", b"\0");
3839 assert_magic_not_match_text!("0 string >Test Any String", b"\0");
3840 }
3841
3842 #[test]
3843 fn test_lestring16() {
3844 assert_magic_match_bin!(
3845 "0 lestring16 abcd Little-endian UTF-16 string",
3846 b"\x61\x00\x62\x00\x63\x00\x64\x00"
3847 );
3848 assert_magic_match_bin!(
3849 "0 lestring16 x %s",
3850 b"\x61\x00\x62\x00\x63\x00\x64\x00\x00",
3851 "abcd"
3852 );
3853 assert_magic_not_match_bin!(
3854 "0 lestring16 abcd Little-endian UTF-16 string",
3855 b"\x00\x61\x00\x62\x00\x63\x00\x64"
3856 );
3857 assert_magic_match_bin!(
3858 "4 lestring16 abcd Little-endian UTF-16 string",
3859 b"\x00\x00\x00\x00\x61\x00\x62\x00\x63\x00\x64\x00"
3860 );
3861 }
3862
3863 #[test]
3864 fn test_bestring16() {
3865 assert_magic_match_bin!(
3866 "0 bestring16 abcd Big-endian UTF-16 string",
3867 b"\x00\x61\x00\x62\x00\x63\x00\x64"
3868 );
3869 assert_magic_match_bin!(
3870 "0 bestring16 x %s",
3871 b"\x00\x61\x00\x62\x00\x63\x00\x64",
3872 "abcd"
3873 );
3874 assert_magic_not_match_bin!(
3875 "0 bestring16 abcd Big-endian UTF-16 string",
3876 b"\x61\x00\x62\x00\x63\x00\x64\x00"
3877 );
3878 assert_magic_match_bin!(
3879 "4 bestring16 abcd Big-endian UTF-16 string",
3880 b"\x00\x00\x00\x00\x00\x61\x00\x62\x00\x63\x00\x64"
3881 );
3882 }
3883
3884 #[test]
3885 fn test_offset_from_end() {
3886 assert_magic_match_bin!("-1 ubyte 0x42 last byte ok", b"\x00\x00\x42");
3887 assert_magic_match_bin!("-2 ubyte 0x41 last byte ok", b"\x00\x41\x00");
3888 }
3889
3890 #[test]
3891 fn test_relative_offset() {
3892 assert_magic_match_bin!(
3893 "
3894 0 ubyte 0x42
3895 >&0 ubyte 0x00
3896 >>&0 ubyte 0x41 third byte ok
3897 ",
3898 b"\x42\x00\x41\x00"
3899 );
3900 }
3901
3902 #[test]
3903 fn test_indirect_offset() {
3904 assert_magic_match_bin!("(0.l) ubyte 0x42 it works", b"\x04\x00\x00\x00\x42");
3905 assert_magic_match_bin!("(0.l+3) ubyte 0x42 it works", b"\x01\x00\x00\x00\x42");
3907 assert_magic_match_bin!(
3909 "(0.l+(4)) ubyte 0x42 it works",
3910 b"\x04\x00\x00\x00\x04\x00\x00\x00\x42"
3911 );
3912 }
3913
3914 #[test]
3915 fn test_use_with_message() {
3916 assert_magic_match_bin!(
3917 r#"
39180 string MZ
3919>0 use mz first match
3920
39210 name mz then second match
3922>0 string MZ
3923"#,
3924 b"MZ\0",
3925 "first match then second match"
3926 );
3927 }
3928
3929 #[test]
3930 fn test_scalar_transform() {
3931 assert_magic_match_bin!("0 ubyte+1 0x1 add works", b"\x00");
3932 assert_magic_match_bin!("0 ubyte-1 0xfe sub works", b"\xff");
3933 assert_magic_match_bin!("0 ubyte%2 0 mod works", b"\x0a");
3934 assert_magic_match_bin!("0 ubyte&0x0f 0x0f bitand works", b"\xff");
3935 assert_magic_match_bin!("0 ubyte|0x0f 0xff bitor works", b"\xf0");
3936 assert_magic_match_bin!("0 ubyte^0x0f 0xf0 bitxor works", b"\xff");
3937
3938 FileMagicParser::parse_str("0 ubyte%0 mod by zero", None)
3939 .expect_err("expect div by zero error");
3940 FileMagicParser::parse_str("0 ubyte/0 div by zero", None)
3941 .expect_err("expect div by zero error");
3942 }
3943
3944 #[test]
3945 fn test_belong() {
3946 assert_magic_match_bin!("0 belong 0x12345678 Big-endian long", b"\x12\x34\x56\x78");
3948 assert_magic_not_match_bin!("0 belong 0x12345678 Big-endian long", b"\x78\x56\x34\x12");
3950 assert_magic_match_bin!(
3952 "4 belong 0x12345678 Big-endian long",
3953 b"\x00\x00\x00\x00\x12\x34\x56\x78"
3954 );
3955 assert_magic_match_bin!("0 belong <0x12345678 Big-endian long", b"\x12\x34\x56\x77");
3957 assert_magic_not_match_bin!("0 belong <0x12345678 Big-endian long", b"\x12\x34\x56\x78");
3958
3959 assert_magic_match_bin!("0 belong >0x12345678 Big-endian long", b"\x12\x34\x56\x79");
3961 assert_magic_not_match_bin!("0 belong >0x12345678 Big-endian long", b"\x12\x34\x56\x78");
3962
3963 assert_magic_match_bin!("0 belong &0x5678 Big-endian long", b"\x00\x00\x56\x78");
3965 assert_magic_not_match_bin!("0 belong &0x0000FFFF Big-endian long", b"\x12\x34\x56\x78");
3966
3967 assert_magic_match_bin!("0 belong ^0xFFFF0000 Big-endian long", b"\x00\x00\x56\x78");
3969 assert_magic_not_match_bin!("0 belong ^0xFFFF0000 Big-endian long", b"\x00\x01\x56\x78");
3970
3971 assert_magic_match_bin!("0 belong ~0x12345678 Big-endian long", b"\xed\xcb\xa9\x87");
3973 assert_magic_not_match_bin!("0 belong ~0x12345678 Big-endian long", b"\x12\x34\x56\x78");
3974
3975 assert_magic_match_bin!("0 belong x Big-endian long", b"\x12\x34\x56\x78");
3977 assert_magic_match_bin!("0 belong x Big-endian long", b"\x78\x56\x34\x12");
3978 }
3979
3980 #[test]
3981 fn test_parse_search() {
3982 parse_assert!("0 search test");
3983 parse_assert!("0 search/24/s test");
3984 parse_assert!("0 search/s/24 test");
3985 }
3986
3987 #[test]
3988 fn test_bedate() {
3989 assert_magic_match_bin!(
3990 "0 bedate 946684800 Unix date (Jan 1, 2000)",
3991 b"\x38\x6D\x43\x80"
3992 );
3993 assert_magic_not_match_bin!(
3994 "0 bedate 946684800 Unix date (Jan 1, 2000)",
3995 b"\x00\x00\x00\x00"
3996 );
3997 assert_magic_match_bin!(
3998 "4 bedate 946684800 %s",
3999 b"\x00\x00\x00\x00\x38\x6D\x43\x80",
4000 "2000-01-01 00:00:00"
4001 );
4002 }
4003 #[test]
4004 fn test_beldate() {
4005 assert_magic_match_bin!(
4006 "0 beldate 946684800 Local date (Jan 1, 2000)",
4007 b"\x38\x6D\x43\x80"
4008 );
4009 assert_magic_not_match_bin!(
4010 "0 beldate 946684800 Local date (Jan 1, 2000)",
4011 b"\x00\x00\x00\x00"
4012 );
4013
4014 assert_magic_match_bin!(
4015 "4 beldate 946684800 {}",
4016 b"\x00\x00\x00\x00\x38\x6D\x43\x80",
4017 unix_local_time_to_string(946684800)
4018 );
4019 }
4020
4021 #[test]
4022 fn test_beqdate() {
4023 assert_magic_match_bin!(
4024 "0 beqdate 946684800 Unix date (Jan 1, 2000)",
4025 b"\x00\x00\x00\x00\x38\x6D\x43\x80"
4026 );
4027
4028 assert_magic_not_match_bin!(
4029 "0 beqdate 946684800 Unix date (Jan 1, 2000)",
4030 b"\x00\x00\x00\x00\x00\x00\x00\x00"
4031 );
4032
4033 assert_magic_match_bin!(
4034 "0 beqdate 946684800 %s",
4035 b"\x00\x00\x00\x00\x38\x6D\x43\x80",
4036 "2000-01-01 00:00:00"
4037 );
4038 }
4039
4040 #[test]
4041 fn test_medate() {
4042 assert_magic_match_bin!(
4043 "0 medate 946684800 Unix date (Jan 1, 2000)",
4044 b"\x6D\x38\x80\x43"
4045 );
4046
4047 assert_magic_not_match_bin!(
4048 "0 medate 946684800 Unix date (Jan 1, 2000)",
4049 b"\x00\x00\x00\x00"
4050 );
4051
4052 assert_magic_match_bin!(
4053 "4 medate 946684800 %s",
4054 b"\x00\x00\x00\x00\x6D\x38\x80\x43",
4055 "2000-01-01 00:00:00"
4056 );
4057 }
4058
4059 #[test]
4060 fn test_meldate() {
4061 assert_magic_match_bin!(
4062 "0 meldate 946684800 Local date (Jan 1, 2000)",
4063 b"\x6D\x38\x80\x43"
4064 );
4065 assert_magic_not_match_bin!(
4066 "0 meldate 946684800 Local date (Jan 1, 2000)",
4067 b"\x00\x00\x00\x00"
4068 );
4069
4070 assert_magic_match_bin!(
4071 "4 meldate 946684800 %s",
4072 b"\x00\x00\x00\x00\x6D\x38\x80\x43",
4073 unix_local_time_to_string(946684800)
4074 );
4075 }
4076
4077 #[test]
4078 fn test_date() {
4079 assert_magic_match_bin!(
4080 "0 date 946684800 Local date (Jan 1, 2000)",
4081 b"\x80\x43\x6D\x38"
4082 );
4083 assert_magic_not_match_bin!(
4084 "0 date 946684800 Local date (Jan 1, 2000)",
4085 b"\x00\x00\x00\x00"
4086 );
4087 assert_magic_match_bin!(
4088 "4 date 946684800 {}",
4089 b"\x00\x00\x00\x00\x80\x43\x6D\x38",
4090 "2000-01-01 00:00:00"
4091 );
4092 }
4093
4094 #[test]
4095 fn test_leldate() {
4096 assert_magic_match_bin!(
4097 "0 leldate 946684800 Local date (Jan 1, 2000)",
4098 b"\x80\x43\x6D\x38"
4099 );
4100 assert_magic_not_match_bin!(
4101 "0 leldate 946684800 Local date (Jan 1, 2000)",
4102 b"\x00\x00\x00\x00"
4103 );
4104 assert_magic_match_bin!(
4105 "4 leldate 946684800 {}",
4106 b"\x00\x00\x00\x00\x80\x43\x6D\x38",
4107 unix_local_time_to_string(946684800)
4108 );
4109 }
4110
4111 #[test]
4112 fn test_leqdate() {
4113 assert_magic_match_bin!(
4114 "0 leqdate 1577836800 Unix date (Jan 1, 2020)",
4115 b"\x00\xe1\x0b\x5E\x00\x00\x00\x00"
4116 );
4117
4118 assert_magic_not_match_bin!(
4119 "0 leqdate 1577836800 Unix date (Jan 1, 2020)",
4120 b"\x00\x00\x00\x00\x00\x00\x00\x00"
4121 );
4122 assert_magic_match_bin!(
4123 "8 leqdate 1577836800 %s",
4124 b"\x00\x00\x00\x00\x00\x00\x00\x00\x00\xE1\x0B\x5E\x00\x00\x00\x00",
4125 "2020-01-01 00:00:00"
4126 );
4127 }
4128
4129 #[test]
4130 fn test_leqldate() {
4131 assert_magic_match_bin!(
4132 "0 leqldate 1577836800 Unix date (Jan 1, 2020)",
4133 b"\x00\xe1\x0b\x5E\x00\x00\x00\x00"
4134 );
4135
4136 assert_magic_not_match_bin!(
4137 "0 leqldate 1577836800 Unix date (Jan 1, 2020)",
4138 b"\x00\x00\x00\x00\x00\x00\x00\x00"
4139 );
4140 assert_magic_match_bin!(
4141 "8 leqldate 1577836800 %s",
4142 b"\x00\x00\x00\x00\x00\x00\x00\x00\x00\xE1\x0B\x5E\x00\x00\x00\x00",
4143 unix_local_time_to_string(1577836800)
4144 );
4145 }
4146
4147 #[test]
4148 fn test_melong() {
4149 assert_magic_match_bin!(
4151 "0 melong =0x12345678 Middle-endian long",
4152 b"\x34\x12\x78\x56"
4153 );
4154 assert_magic_not_match_bin!(
4155 "0 melong =0x12345678 Middle-endian long",
4156 b"\x00\x00\x00\x00"
4157 );
4158
4159 assert_magic_match_bin!(
4161 "0 melong <0x12345678 Middle-endian long",
4162 b"\x34\x12\x78\x55"
4163 ); assert_magic_not_match_bin!(
4165 "0 melong <0x12345678 Middle-endian long",
4166 b"\x34\x12\x78\x56"
4167 ); assert_magic_match_bin!(
4171 "0 melong >0x12345678 Middle-endian long",
4172 b"\x34\x12\x78\x57"
4173 ); assert_magic_not_match_bin!(
4175 "0 melong >0x12345678 Middle-endian long",
4176 b"\x34\x12\x78\x56"
4177 ); assert_magic_match_bin!("0 melong &0x5678 Middle-endian long", b"\xab\xcd\x78\x56"); assert_magic_not_match_bin!(
4182 "0 melong &0x0000FFFF Middle-endian long",
4183 b"\x34\x12\x78\x56"
4184 ); assert_magic_match_bin!(
4188 "0 melong ^0xFFFF0000 Middle-endian long",
4189 b"\x00\x00\x78\x56"
4190 ); assert_magic_not_match_bin!(
4192 "0 melong ^0xFFFF0000 Middle-endian long",
4193 b"\x00\x01\x78\x56"
4194 ); assert_magic_match_bin!(
4198 "0 melong ~0x12345678 Middle-endian long",
4199 b"\xCB\xED\x87\xA9"
4200 );
4201 assert_magic_not_match_bin!(
4202 "0 melong ~0x12345678 Middle-endian long",
4203 b"\x34\x12\x78\x56"
4204 ); assert_magic_match_bin!("0 melong x Middle-endian long", b"\x34\x12\x78\x56");
4208 assert_magic_match_bin!("0 melong x Middle-endian long", b"\x00\x00\x00\x00");
4209 }
4210
4211 #[test]
4212 fn test_uquad() {
4213 assert_magic_match_bin!(
4215 "0 uquad =0x123456789ABCDEF0 Unsigned quad",
4216 b"\xF0\xDE\xBC\x9A\x78\x56\x34\x12"
4217 );
4218 assert_magic_not_match_bin!(
4219 "0 uquad =0x123456789ABCDEF0 Unsigned quad",
4220 b"\x00\x00\x00\x00\x00\x00\x00\x00"
4221 );
4222
4223 assert_magic_match_bin!(
4225 "0 uquad <0x123456789ABCDEF0 Unsigned quad",
4226 b"\xF0\xDE\xBC\x9A\x78\x56\x34\x11"
4227 );
4228 assert_magic_not_match_bin!(
4229 "0 uquad <0x123456789ABCDEF0 Unsigned quad",
4230 b"\xF0\xDE\xBC\x9A\x78\x56\x34\x12"
4231 );
4232
4233 assert_magic_match_bin!(
4235 "0 uquad >0x123456789ABCDEF0 Unsigned quad",
4236 b"\xF0\xDE\xBC\x9A\x78\x56\x34\x13"
4237 );
4238 assert_magic_not_match_bin!(
4239 "0 uquad >0x123456789ABCDEF0 Unsigned quad",
4240 b"\xF0\xDE\xBC\x9A\x78\x56\x34\x12"
4241 );
4242
4243 assert_magic_match_bin!(
4245 "0 uquad &0xF0 Unsigned quad",
4246 b"\xF0\xDE\xBC\x9A\x78\x56\x34\x12"
4247 );
4248 assert_magic_not_match_bin!(
4249 "0 uquad &0xFF Unsigned quad",
4250 b"\xF0\xDE\xBC\x9A\x78\x56\x34\x12"
4251 );
4252
4253 assert_magic_match_bin!(
4255 "0 uquad ^0xFFFFFFFFFFFFFFFF Unsigned quad",
4256 b"\x00\x00\x00\x00\x00\x00\x00\x00"
4257 ); assert_magic_not_match_bin!(
4259 "0 uquad ^0xFFFFFFFFFFFFFFFF Unsigned quad",
4260 b"\xF0\xDE\xBC\x9A\x78\x56\x34\x12"
4261 ); assert_magic_match_bin!(
4265 "0 uquad ~0x123456789ABCDEF0 Unsigned quad",
4266 b"\x0F\x21\x43\x65\x87\xA9\xCB\xED"
4267 );
4268 assert_magic_not_match_bin!(
4269 "0 uquad ~0x123456789ABCDEF0 Unsigned quad",
4270 b"\xF0\xDE\xBC\x9A\x78\x56\x34\x12"
4271 ); assert_magic_match_bin!(
4275 "0 uquad x {:#x}",
4276 b"\xF0\xDE\xBC\x9A\x78\x56\x34\x12",
4277 "0x123456789abcdef0"
4278 );
4279 assert_magic_match_bin!(
4280 "0 uquad x Unsigned quad",
4281 b"\x00\x00\x00\x00\x00\x00\x00\x00"
4282 );
4283 }
4284
4285 #[test]
4286 fn test_guid() {
4287 assert_magic_match_bin!(
4288 "0 guid EC959539-6786-2D4E-8FDB-98814CE76C1E It works",
4289 b"\xEC\x95\x95\x39\x67\x86\x2D\x4E\x8F\xDB\x98\x81\x4C\xE7\x6C\x1E"
4290 );
4291
4292 assert_magic_not_match_bin!(
4293 "0 guid 399595EC-8667-4E2D-8FDB-98814CE76C1E It works",
4294 b"\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F"
4295 );
4296
4297 assert_magic_match_bin!(
4298 "0 guid x %s",
4299 b"\xEC\x95\x95\x39\x67\x86\x2D\x4E\x8F\xDB\x98\x81\x4C\xE7\x6C\x1E",
4300 "EC959539-6786-2D4E-8FDB-98814CE76C1E"
4301 );
4302 }
4303
4304 #[test]
4305 fn test_ubeqdate() {
4306 assert_magic_match_bin!(
4307 "0 ubeqdate 1633046400 It works",
4308 b"\x00\x00\x00\x00\x61\x56\x4f\x80"
4309 );
4310
4311 assert_magic_match_bin!(
4312 "0 ubeqdate x %s",
4313 b"\x00\x00\x00\x00\x61\x56\x4f\x80",
4314 "2021-10-01 00:00:00"
4315 );
4316
4317 assert_magic_not_match_bin!(
4318 "0 ubeqdate 1633046400 It should not work",
4319 b"\x00\x00\x00\x00\x00\x00\x00\x00"
4320 );
4321 }
4322
4323 #[test]
4324 fn test_ldate() {
4325 assert_magic_match_bin!("0 ldate 1640551520 It works", b"\x60\xd4\xC8\x61");
4326
4327 assert_magic_not_match_bin!("0 ldate 1633046400 It should not work", b"\x00\x00\x00\x00");
4328
4329 assert_magic_match_bin!(
4330 "0 ldate x %s",
4331 b"\x60\xd4\xC8\x61",
4332 unix_local_time_to_string(1640551520)
4333 );
4334 }
4335
4336 #[test]
4337 fn test_scalar_with_transform() {
4338 assert_magic_match_bin!("0 ubyte/10 2 {}", b"\x14", "2");
4339 assert_magic_match_bin!("0 ubyte/10 x {}", b"\x14", "2");
4340 assert_magic_match_bin!("0 ubyte%10 x {}", b"\x14", "0");
4341 }
4342
4343 #[test]
4344 fn test_float_with_transform() {
4345 assert_magic_match_bin!("0 lefloat/10 2 {}", b"\x00\x00\xa0\x41", "2");
4346 assert_magic_match_bin!("0 lefloat/10 x {}", b"\x00\x00\xa0\x41", "2");
4347 assert_magic_match_bin!("0 lefloat%10 x {}", b"\x00\x00\xa0\x41", "0");
4348 }
4349
4350 #[test]
4351 fn test_read_octal() {
4352 assert_eq!(read_octal_u64(&mut lazy_cache!("0")), Some(0));
4354 assert_eq!(read_octal_u64(&mut lazy_cache!("00")), Some(0));
4355 assert_eq!(read_octal_u64(&mut lazy_cache!("01")), Some(1));
4356 assert_eq!(read_octal_u64(&mut lazy_cache!("07")), Some(7));
4357 assert_eq!(read_octal_u64(&mut lazy_cache!("010")), Some(8));
4358 assert_eq!(read_octal_u64(&mut lazy_cache!("0123")), Some(83));
4359 assert_eq!(read_octal_u64(&mut lazy_cache!("0755")), Some(493));
4360
4361 assert_eq!(read_octal_u64(&mut lazy_cache!("0ABC")), Some(0));
4363 assert_eq!(read_octal_u64(&mut lazy_cache!("01ABC")), Some(1));
4364 assert_eq!(read_octal_u64(&mut lazy_cache!("0755ABC")), Some(493));
4365 assert_eq!(read_octal_u64(&mut lazy_cache!("0123ABC")), Some(83));
4366
4367 assert_eq!(read_octal_u64(&mut lazy_cache!("08")), Some(0)); assert_eq!(read_octal_u64(&mut lazy_cache!("01238")), Some(83)); assert_eq!(read_octal_u64(&mut lazy_cache!("123")), None);
4373 assert_eq!(read_octal_u64(&mut lazy_cache!("755")), None);
4374
4375 assert_eq!(read_octal_u64(&mut lazy_cache!("")), None);
4377
4378 assert_eq!(read_octal_u64(&mut lazy_cache!("ABC")), None);
4380 assert_eq!(read_octal_u64(&mut lazy_cache!("8ABC")), None); assert_eq!(
4384 read_octal_u64(&mut lazy_cache!("01777777777")),
4385 Some(268435455)
4386 );
4387 }
4388
4389 #[test]
4390 fn test_offset_bug_1() {
4391 assert_magic_match_bin!(
4394 r"
43951 string TEST Bread is
4396# offset computation is relative to
4397# rule start
4398>(5.b) use toasted
4399
44000 name toasted
4401>0 string twice Toasted
4402>>0 use toasted_twice
4403
44040 name toasted_twice
4405>(6.b) string x %s
4406 ",
4407 b"\x00TEST\x06twice\x00\x06",
4408 "Bread is Toasted twice"
4409 );
4410 }
4411
4412 #[test]
4418 fn test_offset_bug_2() {
4419 assert_magic_match_bin!(
4422 r"
4423-12 string TEST Bread is
4424>(4.b) use toasted
4425
44260 name toasted
4427>0 string twice Toasted
4428>>0 use toasted_twice
4429
44300 name toasted_twice
4431>(6.b) string x %
4432 ",
4433 b"\x00TEST\x06twice\x00\x06",
4434 "Bread is Toasted twice"
4435 )
4436 }
4437
4438 #[test]
4439 fn test_offset_bug_3() {
4440 assert_magic_match_bin!(
4443 r"
44441 string TEST Bread is
4445>(5.b) indirect/r x
4446
44470 string twice Toasted
4448>0 use toasted_twice
4449
44500 name toasted_twice
4451>0 string x %s
4452 ",
4453 b"\x00TEST\x06twice\x00\x08",
4454 "Bread is Toasted twice"
4455 )
4456 }
4457
4458 #[test]
4459 fn test_offset_bug_4() {
4460 assert_magic_match_bin!(
4463 r"
44641 string Bread %s
4465>(6.b) indirect/r x
4466
4467# this one uses a based offset
4468# computed at indirection
44691 string is\ Toasted %s
4470>(11.b) use toasted_twice
4471
4472# this one is using a new base
4473# offset being previous base
4474# offset + offset of use
44750 name toasted_twice
4476>0 string x %s
4477 ",
4478 b"\x00Bread\x06is Toasted\x0ctwice\x00",
4479 "Bread is Toasted twice"
4480 )
4481 }
4482
4483 #[test]
4484 fn test_offset_bug_5() {
4485 assert_magic_match_bin!(
4486 r"
44871 string TEST Bread is
4488>(5.b) indirect/r x
4489
44900 string twice Toasted
4491>0 use toasted_twice
4492
44930 name toasted_twice
4494>0 string twice
4495>>&1 byte 0x08 twice
4496 ",
4497 b"\x00TEST\x06twice\x00\x08",
4498 "Bread is Toasted twice"
4499 )
4500 }
4501
4502 #[test]
4503 fn test_message_parts() {
4504 let m = first_magic(
4505 r#"0 string/W #!/usr/bin/env\ python PYTHON"#,
4506 b"#!/usr/bin/env python",
4507 StreamKind::Text(TextEncoding::Ascii),
4508 )
4509 .unwrap();
4510
4511 assert!(m.message_parts().any(|p| p.eq_ignore_ascii_case("python")))
4512 }
4513}