1#![forbid(unsafe_code)]
2#![deny(unused_imports)]
3use dyf::{DynDisplay, FormatString, dformat};
143use flagset::{FlagSet, flags};
144use flate2::{Compression, read::GzDecoder, write::GzEncoder};
145use lazy_cache::LazyCache;
146use memchr::memchr;
147use pest::{Span, error::ErrorVariant};
148use regex::bytes::{self};
149use serde::{Deserialize, Serialize};
150use std::{
151 borrow::Cow,
152 cmp::max,
153 collections::{HashMap, HashSet},
154 fmt::{self, Debug, Display},
155 io::{self, Read, Seek, SeekFrom, Write},
156 ops::{Add, BitAnd, BitOr, BitXor, Div, Mul, Rem, Sub},
157 path::Path,
158};
159use tar::Archive;
160use thiserror::Error;
161use tracing::{Level, debug, enabled, trace};
162
163use crate::{
164 numeric::{Float, FloatDataType, Scalar, ScalarDataType},
165 parser::{FileMagicParser, Rule},
166 utils::{decode_id3, find_json_boundaries},
167};
168
169mod numeric;
170mod parser;
171mod utils;
172
173const HARDCODED_MAGIC_STRENGTH: u64 = 2048;
174const HARDCODED_SOURCE: &str = "hardcoded";
175const MAX_RECURSION: usize = 50;
177pub const FILE_BYTES_MAX: usize = 7 * 1024 * 1024;
179const FILE_REGEX_MAX: usize = 8192;
181
182pub const DEFAULT_BIN_MIMETYPE: &str = "application/octet-stream";
183pub const DEFAULT_TEXT_MIMETYPE: &str = "text/plain";
184
185pub(crate) const TIMESTAMP_FORMAT: &str = "%Y-%m-%d %H:%M:%S";
186
187macro_rules! debug_panic {
188 ($($arg:tt)*) => {
189 if cfg!(debug_assertions) {
190 panic!($($arg)*);
191 }
192 };
193}
194
195macro_rules! read {
196 ($r: expr, $ty: ty) => {{
197 let mut a = [0u8; std::mem::size_of::<$ty>()];
198 $r.read_exact(&mut a)?;
199 a
200 }};
201}
202
203macro_rules! read_le {
204 ($r:expr, $ty: ty ) => {{ <$ty>::from_le_bytes(read!($r, $ty)) }};
205}
206
207macro_rules! read_be {
208 ($r:expr, $ty: ty ) => {{ <$ty>::from_be_bytes(read!($r, $ty)) }};
209}
210
211macro_rules! read_me {
212 ($r: expr) => {{ ((read_le!($r, u16) as i32) << 16) | (read_le!($r, u16) as i32) }};
213}
214
215#[inline(always)]
216fn read_octal_u64<R: Read + Seek>(haystack: &mut LazyCache<R>) -> Option<u64> {
217 let s = haystack
218 .read_while_or_limit(|b| matches!(b, b'0'..=b'7'), 22)
219 .map(|buf| str::from_utf8(buf))
220 .ok()?
221 .ok()?;
222
223 if !s.starts_with("0") {
224 return None;
225 }
226
227 u64::from_str_radix(s, 8).ok()
228}
229
230#[derive(Debug, Error)]
232pub enum Error {
233 #[error("{0}")]
235 Msg(String),
236
237 #[error("source={0} line={1} error={2}")]
239 Localized(String, usize, Box<Error>),
240
241 #[error("missing rule: {0}")]
243 MissingRule(String),
244
245 #[error("maximum recursion reached: {0}")]
247 MaximumRecursion(usize),
248
249 #[error("io: {0}")]
251 Io(#[from] io::Error),
252
253 #[error("parser error: {0}")]
255 Parse(#[from] Box<pest::error::Error<Rule>>),
256
257 #[error("formatting: {0}")]
259 Format(#[from] dyf::Error),
260
261 #[error("regex: {0}")]
263 Regex(#[from] regex::Error),
264
265 #[error("{0}")]
267 Serialize(#[from] bincode::error::EncodeError),
268
269 #[error("{0}")]
271 Deserialize(#[from] bincode::error::DecodeError),
272}
273
274impl Error {
275 #[inline]
276 fn parser<S: ToString>(msg: S, span: Span<'_>) -> Self {
277 Self::Parse(Box::new(pest::error::Error::new_from_span(
278 ErrorVariant::CustomError {
279 message: msg.to_string(),
280 },
281 span,
282 )))
283 }
284
285 fn msg<M: AsRef<str>>(msg: M) -> Self {
286 Self::Msg(msg.as_ref().into())
287 }
288
289 fn localized<S: AsRef<str>>(source: S, line: usize, err: Error) -> Self {
290 Self::Localized(source.as_ref().into(), line, err.into())
291 }
292
293 pub fn unwrap_localized(&self) -> &Self {
295 match self {
296 Self::Localized(_, _, e) => e,
297 _ => self,
298 }
299 }
300}
301
302#[derive(Debug, Clone, Serialize, Deserialize)]
303enum Message {
304 String(String),
305 Format {
306 printf_spec: String,
307 fs: FormatString,
308 },
309}
310
311impl Display for Message {
312 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
313 match self {
314 Self::String(s) => write!(f, "{s}"),
315 Self::Format { printf_spec: _, fs } => write!(f, "{}", fs.to_string_lossy()),
316 }
317 }
318}
319
320impl Message {
321 fn to_string_lossy(&self) -> Cow<'_, str> {
322 match self {
323 Message::String(s) => Cow::Borrowed(s),
324 Message::Format { printf_spec: _, fs } => fs.to_string_lossy(),
325 }
326 }
327
328 #[inline(always)]
329 fn format_with(&self, mr: Option<&MatchRes>) -> Result<Cow<'_, str>, Error> {
330 match self {
331 Self::String(s) => Ok(Cow::Borrowed(s.as_str())),
332 Self::Format {
333 printf_spec: c_spec,
334 fs,
335 } => {
336 if let Some(mr) = mr {
337 match mr {
338 MatchRes::Float(_, _) | MatchRes::Bytes(_, _, _, _) => {
339 Ok(Cow::Owned(dformat!(fs, mr)?))
340 }
341 MatchRes::Scalar(_, scalar) => {
342 if c_spec.as_str() == "c" {
344 match scalar {
345 Scalar::byte(b) => {
346 let b = (*b as u8) as char;
347 Ok(Cow::Owned(dformat!(fs, b)?))
348 }
349 Scalar::ubyte(b) => {
350 let b = *b as char;
351 Ok(Cow::Owned(dformat!(fs, b)?))
352 }
353 _ => Ok(Cow::Owned(dformat!(fs, mr)?)),
354 }
355 } else {
356 Ok(Cow::Owned(dformat!(fs, mr)?))
357 }
358 }
359 }
360 } else {
361 Ok(fs.to_string_lossy())
362 }
363 }
364 }
365 }
366}
367
368impl ScalarDataType {
369 #[inline(always)]
370 fn read<R: Read + Seek>(&self, from: &mut R, switch_endianness: bool) -> Result<Scalar, Error> {
371 macro_rules! _read_le {
372 ($ty: ty) => {{
373 if switch_endianness {
374 <$ty>::from_be_bytes(read!(from, $ty))
375 } else {
376 <$ty>::from_le_bytes(read!(from, $ty))
377 }
378 }};
379 }
380
381 macro_rules! _read_be {
382 ($ty: ty) => {{
383 if switch_endianness {
384 <$ty>::from_le_bytes(read!(from, $ty))
385 } else {
386 <$ty>::from_be_bytes(read!(from, $ty))
387 }
388 }};
389 }
390
391 macro_rules! _read_ne {
392 ($ty: ty) => {{
393 if cfg!(target_endian = "big") {
394 _read_be!($ty)
395 } else {
396 _read_le!($ty)
397 }
398 }};
399 }
400
401 macro_rules! _read_me {
402 () => {
403 ((_read_le!(u16) as i32) << 16) | (_read_le!(u16) as i32)
404 };
405 }
406
407 Ok(match self {
408 Self::byte => Scalar::byte(read!(from, u8)[0] as i8),
410 Self::short => Scalar::short(_read_ne!(i16)),
411 Self::long => Scalar::long(_read_ne!(i32)),
412 Self::date => Scalar::date(_read_ne!(i32)),
413 Self::ldate => Scalar::ldate(_read_ne!(i32)),
414 Self::qwdate => Scalar::qwdate(_read_ne!(i64)),
415 Self::leshort => Scalar::leshort(_read_le!(i16)),
416 Self::lelong => Scalar::lelong(_read_le!(i32)),
417 Self::lequad => Scalar::lequad(_read_le!(i64)),
418 Self::bequad => Scalar::bequad(_read_be!(i64)),
419 Self::belong => Scalar::belong(_read_be!(i32)),
420 Self::bedate => Scalar::bedate(_read_be!(i32)),
421 Self::beldate => Scalar::beldate(_read_be!(i32)),
422 Self::beqdate => Scalar::beqdate(_read_be!(i64)),
423 Self::ubyte => Scalar::ubyte(read!(from, u8)[0]),
425 Self::ushort => Scalar::ushort(_read_ne!(u16)),
426 Self::uleshort => Scalar::uleshort(_read_le!(u16)),
427 Self::ulelong => Scalar::ulelong(_read_le!(u32)),
428 Self::uledate => Scalar::uledate(_read_le!(u32)),
429 Self::ulequad => Scalar::ulequad(_read_le!(u64)),
430 Self::offset => Scalar::offset(from.stream_position()?),
431 Self::ubequad => Scalar::ubequad(_read_be!(u64)),
432 Self::medate => Scalar::medate(_read_me!()),
433 Self::meldate => Scalar::meldate(_read_me!()),
434 Self::melong => Scalar::melong(_read_me!()),
435 Self::beshort => Scalar::beshort(_read_be!(i16)),
436 Self::quad => Scalar::quad(_read_ne!(i64)),
437 Self::uquad => Scalar::uquad(_read_ne!(u64)),
438 Self::ledate => Scalar::ledate(_read_le!(i32)),
439 Self::leldate => Scalar::leldate(_read_le!(i32)),
440 Self::leqdate => Scalar::leqdate(_read_le!(i64)),
441 Self::leqldate => Scalar::leqldate(_read_le!(i64)),
442 Self::leqwdate => Scalar::leqwdate(_read_le!(i64)),
443 Self::ubelong => Scalar::ubelong(_read_be!(u32)),
444 Self::ulong => Scalar::ulong(_read_ne!(u32)),
445 Self::ubeshort => Scalar::ubeshort(_read_be!(u16)),
446 Self::ubeqdate => Scalar::ubeqdate(_read_be!(u64)),
447 Self::lemsdosdate => Scalar::lemsdosdate(_read_le!(u16)),
448 Self::lemsdostime => Scalar::lemsdostime(_read_le!(u16)),
449 Self::guid => Scalar::guid(u128::from_be_bytes(read!(from, u128))),
450 })
451 }
452}
453
454impl FloatDataType {
455 #[inline(always)]
456 fn read<R: Read + Seek>(&self, from: &mut R, switch_endianness: bool) -> Result<Float, Error> {
457 macro_rules! _read_le {
458 ($ty: ty) => {{
459 if switch_endianness {
460 <$ty>::from_be_bytes(read!(from, $ty))
461 } else {
462 <$ty>::from_le_bytes(read!(from, $ty))
463 }
464 }};
465 }
466
467 macro_rules! _read_be {
468 ($ty: ty) => {{
469 if switch_endianness {
470 <$ty>::from_le_bytes(read!(from, $ty))
471 } else {
472 <$ty>::from_be_bytes(read!(from, $ty))
473 }
474 }};
475 }
476
477 macro_rules! _read_ne {
478 ($ty: ty) => {{
479 if cfg!(target_endian = "big") {
480 _read_be!($ty)
481 } else {
482 _read_le!($ty)
483 }
484 }};
485 }
486
487 macro_rules! _read_me {
488 () => {
489 ((_read_le!(u16) as i32) << 16) | (_read_le!(u16) as i32)
490 };
491 }
492
493 Ok(match self {
494 Self::lefloat => Float::lefloat(_read_le!(f32)),
495 Self::befloat => Float::befloat(_read_le!(f32)),
496 Self::ledouble => Float::ledouble(_read_le!(f64)),
497 Self::bedouble => Float::bedouble(_read_be!(f64)),
498 })
499 }
500}
501
502#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
503enum Op {
504 Mul,
505 Add,
506 Sub,
507 Div,
508 Mod,
509 And,
510 Xor,
511 Or,
512}
513
514impl Display for Op {
515 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
516 match self {
517 Op::Mul => write!(f, "*"),
518 Op::Add => write!(f, "+"),
519 Op::Sub => write!(f, "-"),
520 Op::Div => write!(f, "/"),
521 Op::Mod => write!(f, "%"),
522 Op::And => write!(f, "&"),
523 Op::Or => write!(f, "|"),
524 Op::Xor => write!(f, "^"),
525 }
526 }
527}
528
529#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
530enum CmpOp {
531 Eq,
532 Lt,
533 Gt,
534 BitAnd,
535 Neq, Xor,
537 Not, }
539
540impl CmpOp {
541 #[inline(always)]
542 fn is_neq(&self) -> bool {
543 matches!(self, Self::Neq)
544 }
545}
546
547#[derive(Debug, Clone, Serialize, Deserialize)]
548struct ScalarTransform {
549 op: Op,
550 num: Scalar,
551}
552
553impl ScalarTransform {
554 fn apply(&self, s: Scalar) -> Option<Scalar> {
555 match self.op {
556 Op::Add => s.checked_add(self.num),
557 Op::Sub => s.checked_sub(self.num),
558 Op::Mul => s.checked_mul(self.num),
559 Op::Div => s.checked_div(self.num),
560 Op::Mod => s.checked_rem(self.num),
561 Op::And => Some(s.bitand(self.num)),
562 Op::Xor => Some(s.bitxor(self.num)),
563 Op::Or => Some(s.bitor(self.num)),
564 }
565 }
566}
567
568#[derive(Debug, Clone, Serialize, Deserialize)]
569struct FloatTransform {
570 op: Op,
571 num: Float,
572}
573
574impl FloatTransform {
575 fn apply(&self, s: Float) -> Float {
576 match self.op {
577 Op::Add => s.add(self.num),
578 Op::Sub => s.sub(self.num),
579 Op::Mul => s.mul(self.num),
580 Op::Div => s.div(self.num),
582 Op::Mod => s.rem(self.num),
584 Op::And | Op::Xor | Op::Or => {
586 debug_panic!("unsupported operation");
587 s
588 }
589 }
590 }
591}
592
593#[derive(Debug, Clone, Serialize, Deserialize)]
594enum TestValue<T> {
595 Value(T),
596 Any,
597}
598
599impl<T> TestValue<T> {
600 #[inline(always)]
601 fn as_ref(&self) -> TestValue<&T> {
602 match self {
603 Self::Value(v) => TestValue::Value(v),
604 Self::Any => TestValue::Any,
605 }
606 }
607}
608
609flags! {
610 enum ReMod: u8{
611 CaseInsensitive,
612 StartOffsetUpdate,
613 LineLimit,
614 ForceBin,
615 ForceText,
616 TrimMatch,
617 }
618}
619
620fn serialize_regex<S>(re: &bytes::Regex, serializer: S) -> Result<S::Ok, S::Error>
621where
622 S: serde::Serializer,
623{
624 re.as_str().serialize(serializer)
625}
626
627fn deserialize_regex<'de, D>(deserializer: D) -> Result<bytes::Regex, D::Error>
628where
629 D: serde::Deserializer<'de>,
630{
631 let wrapper = String::deserialize(deserializer)?;
632 bytes::Regex::new(&wrapper).map_err(serde::de::Error::custom)
633}
634
635#[derive(Debug, Clone, Serialize, Deserialize)]
636struct RegexTest {
637 #[serde(
638 serialize_with = "serialize_regex",
639 deserialize_with = "deserialize_regex"
640 )]
641 re: bytes::Regex,
642 length: Option<usize>,
643 mods: FlagSet<ReMod>,
644 str_mods: FlagSet<StringMod>,
645 non_magic_len: usize,
646 binary: bool,
647 cmp_op: CmpOp,
648}
649
650impl RegexTest {
651 #[inline(always)]
652 fn is_binary(&self) -> bool {
653 self.binary
654 || self.mods.contains(ReMod::ForceBin)
655 || self.str_mods.contains(StringMod::ForceBin)
656 }
657
658 fn match_buf<'buf>(
659 &self,
660 off_buf: u64, stream_kind: StreamKind,
662 buf: &'buf [u8],
663 ) -> Option<MatchRes<'buf>> {
664 let mr = match stream_kind {
665 StreamKind::Text(_) => {
666 let mut off_txt = off_buf;
667
668 let mut line_limit = self.length.unwrap_or(usize::MAX);
669
670 for line in buf.split(|c| c == &b'\n') {
671 if line_limit == 0 {
675 break;
676 }
677
678 if let Some(re_match) = self.re.find(line) {
679 let start_offset = off_txt + re_match.start() as u64;
681
682 let stop_offset = if re_match.end() == line.len() {
684 Some(start_offset + re_match.as_bytes().len() as u64 + 1)
685 } else {
686 None
687 };
688
689 return Some(MatchRes::Bytes(
690 start_offset,
691 stop_offset,
692 re_match.as_bytes(),
693 Encoding::Utf8,
694 ));
695 }
696
697 off_txt += line.len() as u64;
698 off_txt += 1;
700 line_limit = line_limit.saturating_sub(1)
701 }
702 None
703 }
704
705 StreamKind::Binary => {
706 self.re.find(buf).map(|re_match| {
707 MatchRes::Bytes(
708 off_buf + re_match.start() as u64,
710 None,
711 re_match.as_bytes(),
712 Encoding::Utf8,
713 )
714 })
715 }
716 };
717
718 if self.cmp_op.is_neq() && mr.is_none() {
720 return Some(MatchRes::Bytes(off_buf, None, buf, Encoding::Utf8));
721 }
722
723 mr
724 }
725}
726
727impl From<RegexTest> for Test {
728 fn from(value: RegexTest) -> Self {
729 Self::Regex(value)
730 }
731}
732
733flags! {
734 enum StringMod: u8{
735 ForceBin,
736 UpperInsensitive,
737 LowerInsensitive,
738 FullWordMatch,
739 Trim,
740 ForceText,
741 CompactWhitespace,
742 OptBlank,
743 }
744}
745
746#[derive(Debug, Clone, Serialize, Deserialize)]
747struct StringTest {
748 test_val: TestValue<Vec<u8>>,
749 cmp_op: CmpOp,
750 length: Option<usize>,
751 mods: FlagSet<StringMod>,
752 binary: bool,
753}
754
755impl From<StringTest> for Test {
756 fn from(value: StringTest) -> Self {
757 Self::String(value)
758 }
759}
760
761#[inline(always)]
762fn string_match(str: &[u8], mods: FlagSet<StringMod>, buf: &[u8]) -> (bool, usize) {
763 let mut consumed = 0;
764 if mods.is_disjoint(
766 StringMod::UpperInsensitive
767 | StringMod::LowerInsensitive
768 | StringMod::FullWordMatch
769 | StringMod::CompactWhitespace
770 | StringMod::OptBlank,
771 ) {
772 if buf.starts_with(str) {
774 (true, str.len())
775 } else {
776 (false, consumed)
777 }
778 } else {
779 let mut i_src = 0;
780 let mut iter = buf.iter().peekable();
781
782 macro_rules! consume_target {
783 () => {{
784 iter.next();
785 consumed += 1;
786 }};
787 }
788
789 macro_rules! continue_next_iteration {
790 () => {{
791 consume_target!();
792 i_src += 1;
793 continue;
794 }};
795 }
796
797 while let Some(&&b) = iter.peek() {
798 let Some(&ref_byte) = str.get(i_src) else {
799 break;
800 };
801
802 if mods.contains(StringMod::OptBlank) && (b == b' ' || ref_byte == b' ') {
803 if b == b' ' {
804 consume_target!();
806 }
807
808 if ref_byte == b' ' {
809 i_src += 1;
811 }
812
813 continue;
814 }
815
816 if mods.contains(StringMod::UpperInsensitive) {
817 if ref_byte.is_ascii_uppercase() && ref_byte == b.to_ascii_uppercase()
819 || ref_byte == b
820 {
821 continue_next_iteration!()
822 }
823 }
824
825 if mods.contains(StringMod::LowerInsensitive)
826 && (ref_byte.is_ascii_lowercase() && ref_byte == b.to_ascii_lowercase()
827 || ref_byte == b)
828 {
829 continue_next_iteration!()
830 }
831
832 if mods.contains(StringMod::CompactWhitespace) && ref_byte == b' ' {
833 let mut src_blk = 0;
834 while let Some(b' ') = str.get(i_src) {
835 src_blk += 1;
836 i_src += 1;
837 }
838
839 let mut tgt_blk = 0;
840 while let Some(b' ') = iter.peek() {
841 tgt_blk += 1;
842 consume_target!();
843 }
844
845 if src_blk > tgt_blk {
846 return (false, consumed);
847 }
848
849 continue;
850 }
851
852 if ref_byte == b {
853 continue_next_iteration!()
854 } else {
855 return (false, consumed);
856 }
857 }
858
859 if mods.contains(StringMod::FullWordMatch)
860 && let Some(b) = iter.peek()
861 && !b.is_ascii_whitespace()
862 {
863 return (false, consumed);
864 }
865
866 (consumed > 0 && consumed <= buf.len(), consumed)
867 }
868}
869
870impl StringTest {
871 fn has_length_mod(&self) -> bool {
872 !self.mods.is_disjoint(
873 StringMod::UpperInsensitive
874 | StringMod::LowerInsensitive
875 | StringMod::FullWordMatch
876 | StringMod::CompactWhitespace
877 | StringMod::OptBlank,
878 )
879 }
880
881 #[inline(always)]
882 fn test_value_len(&self) -> usize {
883 match self.test_val.as_ref() {
884 TestValue::Value(s) => s.len(),
885 TestValue::Any => 0,
886 }
887 }
888
889 #[inline(always)]
890 fn is_binary(&self) -> bool {
891 self.binary || self.mods.contains(StringMod::ForceBin)
892 }
893
894 #[inline(always)]
895 fn is_text(&self) -> bool {
896 self.mods.contains(StringMod::ForceText)
897 }
898}
899
900#[derive(Debug, Clone, Serialize, Deserialize)]
901struct SearchTest {
902 str: Vec<u8>,
903 n_pos: Option<usize>,
904 str_mods: FlagSet<StringMod>,
905 re_mods: FlagSet<ReMod>,
906 binary: bool,
907 cmp_op: CmpOp,
908}
909
910impl From<SearchTest> for Test {
911 fn from(value: SearchTest) -> Self {
912 Self::Search(value)
913 }
914}
915
916impl SearchTest {
917 #[inline(always)]
918 fn is_binary(&self) -> bool {
919 (self.binary
920 || self.str_mods.contains(StringMod::ForceBin)
921 || self.re_mods.contains(ReMod::ForceBin))
922 && !(self.str_mods.contains(StringMod::ForceText)
923 || self.re_mods.contains(ReMod::ForceText))
924 }
925
926 #[inline]
928 fn match_buf<'buf>(&self, off_buf: u64, buf: &'buf [u8]) -> Option<MatchRes<'buf>> {
929 let mut i = 0;
930
931 let needle = self.str.first()?;
932
933 while i < buf.len() {
934 i += memchr(*needle, &buf[i..])?;
937
938 if self.str_mods.contains(StringMod::FullWordMatch) {
940 let prev_is_whitespace = buf
941 .get(i.saturating_sub(1))
942 .map(|c| c.is_ascii_whitespace())
943 .unwrap_or_default();
944
945 if i > 0 && !prev_is_whitespace {
950 i += 1;
951 continue;
952 }
953 }
954
955 if let Some(npos) = self.n_pos
956 && i > npos
957 {
958 break;
959 }
960
961 let pos = i;
962 let (ok, consumed) = string_match(&self.str, self.str_mods, &buf[i..]);
963
964 if ok {
965 return Some(MatchRes::Bytes(
966 off_buf.saturating_add(pos as u64),
967 None,
968 &buf[i..i + consumed],
969 Encoding::Utf8,
970 ));
971 } else {
972 i += max(consumed, 1)
973 }
974 }
975
976 if self.cmp_op.is_neq() {
978 return Some(MatchRes::Bytes(off_buf, None, buf, Encoding::Utf8));
979 }
980
981 None
982 }
983}
984
985#[derive(Debug, Clone, Serialize, Deserialize)]
986struct ScalarTest {
987 ty: ScalarDataType,
988 transform: Option<ScalarTransform>,
989 cmp_op: CmpOp,
990 test_val: TestValue<Scalar>,
991}
992
993#[derive(Debug, Clone, Serialize, Deserialize)]
994struct FloatTest {
995 ty: FloatDataType,
996 transform: Option<FloatTransform>,
997 cmp_op: CmpOp,
998 test_val: TestValue<Float>,
999}
1000
1001#[derive(Debug, PartialEq)]
1004enum ReadValue<'buf> {
1005 Float(u64, Float),
1006 Scalar(u64, Scalar),
1007 Bytes(u64, &'buf [u8]),
1008}
1009
1010impl DynDisplay for ReadValue<'_> {
1011 fn dyn_fmt(&self, f: &dyf::FormatSpec) -> Result<String, dyf::Error> {
1012 match self {
1013 Self::Float(_, s) => DynDisplay::dyn_fmt(s, f),
1014 Self::Scalar(_, s) => DynDisplay::dyn_fmt(s, f),
1015 Self::Bytes(_, b) => Ok(format!("{b:?}")),
1016 }
1017 }
1018}
1019
1020impl DynDisplay for &ReadValue<'_> {
1021 fn dyn_fmt(&self, f: &dyf::FormatSpec) -> Result<String, dyf::Error> {
1022 DynDisplay::dyn_fmt(*self, f)
1024 }
1025}
1026
1027impl Display for ReadValue<'_> {
1028 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1029 match self {
1030 Self::Float(_, v) => write!(f, "{v}"),
1031 Self::Scalar(_, s) => write!(f, "{s}"),
1032 Self::Bytes(_, b) => write!(f, "{b:?}"),
1033 }
1034 }
1035}
1036
1037enum Encoding {
1038 Utf16(String16Encoding),
1039 Utf8,
1040}
1041
1042enum MatchRes<'buf> {
1045 Bytes(u64, Option<u64>, &'buf [u8], Encoding),
1050 Scalar(u64, Scalar),
1051 Float(u64, Float),
1052}
1053
1054impl DynDisplay for &MatchRes<'_> {
1055 fn dyn_fmt(&self, f: &dyf::FormatSpec) -> Result<String, dyf::Error> {
1056 (*self).dyn_fmt(f)
1057 }
1058}
1059
1060impl DynDisplay for MatchRes<'_> {
1061 fn dyn_fmt(&self, f: &dyf::FormatSpec) -> Result<String, dyf::Error> {
1062 match self {
1063 Self::Scalar(_, v) => v.dyn_fmt(f),
1064 Self::Float(_, v) => v.dyn_fmt(f),
1065 Self::Bytes(_, _, v, enc) => match enc {
1066 Encoding::Utf8 => String::from_utf8_lossy(v).to_string().dyn_fmt(f),
1067 Encoding::Utf16(enc) => {
1068 let utf16: Vec<u16> = slice_to_utf16_iter(v, *enc).collect();
1069 String::from_utf16_lossy(&utf16).dyn_fmt(f)
1070 }
1071 },
1072 }
1073 }
1074}
1075
1076impl MatchRes<'_> {
1077 #[inline]
1079 fn start_offset(&self) -> u64 {
1080 match self {
1081 MatchRes::Bytes(o, _, _, _) => *o,
1082 MatchRes::Scalar(o, _) => *o,
1083 MatchRes::Float(o, _) => *o,
1084 }
1085 }
1086
1087 #[inline]
1089 fn end_offset(&self) -> u64 {
1090 match self {
1091 MatchRes::Bytes(start, end, buf, _) => match end {
1092 Some(end) => *end,
1093 None => start.saturating_add(buf.len() as u64),
1094 },
1095 MatchRes::Scalar(o, sc) => o.add(sc.size_of() as u64),
1096 MatchRes::Float(o, f) => o.add(f.size_of() as u64),
1097 }
1098 }
1099}
1100
1101fn slice_to_utf16_iter(read: &[u8], encoding: String16Encoding) -> impl Iterator<Item = u16> {
1102 let even = read
1103 .iter()
1104 .enumerate()
1105 .filter(|(i, _)| i % 2 == 0)
1106 .map(|t| t.1);
1107
1108 let odd = read
1109 .iter()
1110 .enumerate()
1111 .filter(|(i, _)| i % 2 != 0)
1112 .map(|t| t.1);
1113
1114 even.zip(odd).map(move |(e, o)| match encoding {
1115 String16Encoding::Le => u16::from_le_bytes([*e, *o]),
1116 String16Encoding::Be => u16::from_be_bytes([*e, *o]),
1117 })
1118}
1119
1120#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
1121enum String16Encoding {
1122 Le,
1123 Be,
1124}
1125
1126#[derive(Debug, Clone, Serialize, Deserialize)]
1127struct String16Test {
1128 orig: String,
1129 test_val: TestValue<Vec<u16>>,
1130 encoding: String16Encoding,
1131}
1132
1133impl String16Test {
1134 #[inline(always)]
1138 fn test_value_len(&self) -> usize {
1139 match self.test_val.as_ref() {
1140 TestValue::Value(str16) => str16.len(),
1141 TestValue::Any => 0,
1142 }
1143 }
1144}
1145
1146flags! {
1147 enum IndirectMod: u8{
1148 Relative,
1149 }
1150}
1151
1152type IndirectMods = FlagSet<IndirectMod>;
1153
1154#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
1155enum PStringLen {
1156 Byte, ShortBe, ShortLe, LongBe, LongLe, }
1162
1163impl PStringLen {
1164 #[inline(always)]
1165 const fn size_of_len(&self) -> usize {
1166 match self {
1167 PStringLen::Byte => 1,
1168 PStringLen::ShortBe => 2,
1169 PStringLen::ShortLe => 2,
1170 PStringLen::LongBe => 4,
1171 PStringLen::LongLe => 4,
1172 }
1173 }
1174}
1175
1176#[derive(Debug, Clone, Serialize, Deserialize)]
1177struct PStringTest {
1178 len: PStringLen,
1179 test_val: TestValue<Vec<u8>>,
1180 include_len: bool,
1181}
1182
1183impl PStringTest {
1184 #[inline]
1185 fn read<'cache, R: Read + Seek>(
1186 &self,
1187 haystack: &'cache mut LazyCache<R>,
1188 ) -> Result<Option<&'cache [u8]>, Error> {
1189 let mut len = match self.len {
1190 PStringLen::Byte => read_le!(haystack, u8) as u32,
1191 PStringLen::ShortBe => read_be!(haystack, u16) as u32,
1192 PStringLen::ShortLe => read_le!(haystack, u16) as u32,
1193 PStringLen::LongBe => read_be!(haystack, u32),
1194 PStringLen::LongLe => read_le!(haystack, u32),
1195 } as usize;
1196
1197 if self.include_len {
1198 len = len.saturating_sub(self.len.size_of_len())
1199 }
1200
1201 if let TestValue::Value(s) = self.test_val.as_ref()
1202 && len != s.len()
1203 {
1204 return Ok(None);
1205 }
1206
1207 let read = haystack.read_exact_count(len as u64)?;
1208
1209 Ok(Some(read))
1210 }
1211
1212 #[inline(always)]
1213 fn test_value_len(&self) -> usize {
1214 match self.test_val.as_ref() {
1215 TestValue::Value(s) => s.len(),
1216 TestValue::Any => 0,
1217 }
1218 }
1219}
1220
1221#[derive(Debug, Clone, Serialize, Deserialize)]
1222enum Test {
1223 Name(String),
1224 Use(bool, String),
1225 Scalar(ScalarTest),
1226 Float(FloatTest),
1227 String(StringTest),
1228 Search(SearchTest),
1229 PString(PStringTest),
1230 Regex(RegexTest),
1231 Indirect(FlagSet<IndirectMod>),
1232 String16(String16Test),
1233 #[allow(dead_code)]
1235 Der,
1236 Clear,
1237 Default,
1238}
1239
1240impl Test {
1241 #[inline]
1243 fn read_test_value<'haystack, R: Read + Seek>(
1244 &self,
1245 haystack: &'haystack mut LazyCache<R>,
1246 switch_endianness: bool,
1247 ) -> Result<Option<ReadValue<'haystack>>, Error> {
1248 let test_value_offset = haystack.lazy_stream_position();
1249
1250 match self {
1251 Self::Scalar(t) => {
1252 t.ty.read(haystack, switch_endianness)
1253 .map(|s| Some(ReadValue::Scalar(test_value_offset, s)))
1254 }
1255
1256 Self::Float(t) => {
1257 t.ty.read(haystack, switch_endianness)
1258 .map(|f| Some(ReadValue::Float(test_value_offset, f)))
1259 }
1260 Self::String(t) => {
1261 match t.test_val.as_ref() {
1262 TestValue::Value(str) => {
1263 let buf = if let Some(length) = t.length {
1264 haystack.read_exact_count(length as u64)?
1266 } else {
1267 match t.cmp_op {
1270 CmpOp::Eq | CmpOp::Neq => {
1271 if !t.has_length_mod() {
1272 haystack.read_exact_count(str.len() as u64)?
1273 } else {
1274 haystack.read_count(FILE_BYTES_MAX as u64)?
1275 }
1276 }
1277 CmpOp::Lt | CmpOp::Gt => {
1278 let read =
1279 haystack.read_until_any_delim_or_limit(b"\n\0", 8092)?;
1280
1281 if read.ends_with(b"\0") || read.ends_with(b"\n") {
1282 &read[..read.len() - 1]
1283 } else {
1284 read
1285 }
1286 }
1287 _ => {
1288 return Err(Error::Msg(format!(
1289 "string test does not support {:?} operator",
1290 t.cmp_op
1291 )));
1292 }
1293 }
1294 };
1295
1296 Ok(Some(ReadValue::Bytes(test_value_offset, buf)))
1297 }
1298 TestValue::Any => {
1299 let read = haystack.read_until_any_delim_or_limit(b"\0\n", 8192)?;
1300 let bytes = if read.ends_with(b"\0") || read.ends_with(b"\n") {
1302 &read[..read.len() - 1]
1303 } else {
1304 read
1305 };
1306
1307 Ok(Some(ReadValue::Bytes(test_value_offset, bytes)))
1308 }
1309 }
1310 }
1311
1312 Self::String16(t) => {
1313 match t.test_val.as_ref() {
1314 TestValue::Value(str16) => {
1315 let read = haystack.read_exact_count((str16.len() * 2) as u64)?;
1316
1317 Ok(Some(ReadValue::Bytes(test_value_offset, read)))
1318 }
1319 TestValue::Any => {
1320 let read = haystack.read_until_utf16_or_limit(b"\x00\x00", 8192)?;
1321
1322 let end = if read.len() % 2 == 0 {
1324 read.len()
1325 } else {
1326 read.len().saturating_sub(1)
1329 };
1330
1331 Ok(Some(ReadValue::Bytes(test_value_offset, &read[..end])))
1332 }
1333 }
1334 }
1335
1336 Self::PString(t) => {
1337 let Some(read) = t.read(haystack)? else {
1338 return Ok(None);
1339 };
1340 Ok(Some(ReadValue::Bytes(test_value_offset, read)))
1341 }
1342
1343 Self::Search(_) => {
1344 let buf = haystack.read_count(FILE_BYTES_MAX as u64)?;
1345 Ok(Some(ReadValue::Bytes(test_value_offset, buf)))
1346 }
1347
1348 Self::Regex(r) => {
1349 let length = {
1350 match r.length {
1351 Some(len) => {
1352 if r.mods.contains(ReMod::LineLimit) {
1353 len * 80
1354 } else {
1355 len
1356 }
1357 }
1358
1359 None => FILE_REGEX_MAX,
1360 }
1361 };
1362
1363 let read = haystack.read_count(length as u64)?;
1364 Ok(Some(ReadValue::Bytes(test_value_offset, read)))
1365 }
1366
1367 Self::Name(_)
1368 | Self::Use(_, _)
1369 | Self::Indirect(_)
1370 | Self::Clear
1371 | Self::Default
1372 | Self::Der => Err(Error::msg("no value to read for this test")),
1373 }
1374 }
1375
1376 #[inline(always)]
1377 fn match_value<'s>(
1378 &'s self,
1379 tv: &ReadValue<'s>,
1380 stream_kind: StreamKind,
1381 ) -> Option<MatchRes<'s>> {
1382 match (self, tv) {
1383 (Self::Scalar(t), ReadValue::Scalar(o, ts)) => {
1384 let read_value: Scalar = match t.transform.as_ref() {
1385 Some(t) => t.apply(*ts)?,
1386 None => *ts,
1387 };
1388
1389 match t.test_val {
1390 TestValue::Value(test_value) => {
1391 let ok = match t.cmp_op {
1392 CmpOp::Not => read_value == !test_value,
1395 CmpOp::Eq => read_value == test_value,
1396 CmpOp::Lt => read_value < test_value,
1397 CmpOp::Gt => read_value > test_value,
1398 CmpOp::Neq => read_value != test_value,
1399 CmpOp::BitAnd => read_value & test_value == test_value,
1400 CmpOp::Xor => (read_value & test_value).is_zero(),
1401 };
1402
1403 if ok {
1404 Some(MatchRes::Scalar(*o, read_value))
1405 } else {
1406 None
1407 }
1408 }
1409
1410 TestValue::Any => Some(MatchRes::Scalar(*o, read_value)),
1411 }
1412 }
1413
1414 (Self::Float(t), ReadValue::Float(o, f)) => {
1415 let read_value: Float = t.transform.as_ref().map(|t| t.apply(*f)).unwrap_or(*f);
1416
1417 match t.test_val {
1418 TestValue::Value(tf) => {
1419 let ok = match t.cmp_op {
1420 CmpOp::Eq => read_value == tf,
1421 CmpOp::Lt => read_value < tf,
1422 CmpOp::Gt => read_value > tf,
1423 CmpOp::Neq => read_value != tf,
1424 _ => {
1425 debug_panic!("unsupported float comparison");
1428 debug!("unsupported float comparison");
1429 false
1430 }
1431 };
1432
1433 if ok {
1434 Some(MatchRes::Float(*o, read_value))
1435 } else {
1436 None
1437 }
1438 }
1439 TestValue::Any => Some(MatchRes::Float(*o, read_value)),
1440 }
1441 }
1442
1443 (Self::String(st), ReadValue::Bytes(o, buf)) => {
1444 macro_rules! trim_buf {
1445 ($buf: expr) => {{
1446 if st.mods.contains(StringMod::Trim) {
1447 $buf.trim_ascii()
1448 } else {
1449 $buf
1450 }
1451 }};
1452 }
1453
1454 match st.test_val.as_ref() {
1455 TestValue::Value(str) => {
1456 match st.cmp_op {
1457 CmpOp::Eq => {
1458 if let (true, _) = string_match(str, st.mods, buf) {
1459 Some(MatchRes::Bytes(*o, None, trim_buf!(str), Encoding::Utf8))
1460 } else {
1461 None
1462 }
1463 }
1464 CmpOp::Neq => {
1465 if let (false, _) = string_match(str, st.mods, buf) {
1466 Some(MatchRes::Bytes(*o, None, trim_buf!(str), Encoding::Utf8))
1467 } else {
1468 None
1469 }
1470 }
1471 CmpOp::Gt => {
1472 if buf.len() > str.len() {
1473 Some(MatchRes::Bytes(*o, None, trim_buf!(buf), Encoding::Utf8))
1474 } else {
1475 None
1476 }
1477 }
1478 CmpOp::Lt => {
1479 if buf.len() < str.len() {
1480 Some(MatchRes::Bytes(*o, None, trim_buf!(buf), Encoding::Utf8))
1481 } else {
1482 None
1483 }
1484 }
1485
1486 _ => {
1488 debug_panic!("unsupported string comparison");
1491 debug!("unsupported string comparison");
1492 None
1493 }
1494 }
1495 }
1496 TestValue::Any => {
1497 Some(MatchRes::Bytes(*o, None, trim_buf!(buf), Encoding::Utf8))
1498 }
1499 }
1500 }
1501
1502 (Self::PString(m), ReadValue::Bytes(o, buf)) => match m.test_val.as_ref() {
1503 TestValue::Value(psv) => {
1504 if buf == psv {
1505 Some(MatchRes::Bytes(*o, None, buf, Encoding::Utf8))
1506 } else {
1507 None
1508 }
1509 }
1510 TestValue::Any => Some(MatchRes::Bytes(*o, None, buf, Encoding::Utf8)),
1511 },
1512
1513 (Self::String16(t), ReadValue::Bytes(o, buf)) => {
1514 match t.test_val.as_ref() {
1515 TestValue::Value(str16) => {
1516 if str16.len() * 2 != buf.len() {
1518 return None;
1519 }
1520
1521 for (i, utf16_char) in slice_to_utf16_iter(buf, t.encoding).enumerate() {
1523 if str16[i] != utf16_char {
1524 return None;
1525 }
1526 }
1527
1528 Some(MatchRes::Bytes(
1529 *o,
1530 None,
1531 t.orig.as_bytes(),
1532 Encoding::Utf16(t.encoding),
1533 ))
1534 }
1535
1536 TestValue::Any => {
1537 Some(MatchRes::Bytes(*o, None, buf, Encoding::Utf16(t.encoding)))
1538 }
1539 }
1540 }
1541
1542 (Self::Regex(r), ReadValue::Bytes(o, buf)) => r.match_buf(*o, stream_kind, buf),
1543
1544 (Self::Search(t), ReadValue::Bytes(o, buf)) => t.match_buf(*o, buf),
1545
1546 _ => None,
1547 }
1548 }
1549
1550 #[inline(always)]
1551 fn strength(&self) -> u64 {
1552 const MULT: usize = 10;
1553
1554 let mut out = 2 * MULT;
1555
1556 match self {
1558 Test::Scalar(s) => {
1559 out += s.ty.type_size() * MULT;
1560 }
1561
1562 Test::Float(t) => {
1563 out += t.ty.type_size() * MULT;
1564 }
1565
1566 Test::String(t) => out += t.test_value_len().saturating_mul(MULT),
1567
1568 Test::PString(t) => out += t.test_value_len().saturating_mul(MULT),
1569
1570 Test::Search(s) => {
1571 let n_pos = s.n_pos.unwrap_or(FILE_BYTES_MAX);
1576
1577 match n_pos {
1578 0..=80 => out += s.str.len().saturating_mul(MULT),
1580 81..=240 => out += s.str.len() * s.str.len().clamp(0, MULT - 2),
1582 _ => out += s.str.len(),
1584 }
1585 }
1586
1587 Test::Regex(r) => {
1588 let v = r.non_magic_len / r.re.captures_len();
1597
1598 let len = r
1599 .length
1600 .map(|l| {
1601 if r.mods.contains(ReMod::LineLimit) {
1602 l * 80
1603 } else {
1604 l
1605 }
1606 })
1607 .unwrap_or(FILE_BYTES_MAX);
1608
1609 match len {
1610 0..=80 => out += v.saturating_mul(MULT),
1612 81..=240 => out += v * v.clamp(0, MULT - 2),
1614 _ => out += v,
1616 }
1617 }
1618
1619 Test::String16(t) => {
1620 out += t.test_value_len().saturating_mul(MULT);
1625 }
1626
1627 Test::Der => out += MULT,
1628
1629 Test::Default | Test::Name(_) | Test::Use(_, _) | Test::Indirect(_) | Test::Clear => {
1630 return 0;
1631 }
1632 }
1633
1634 if self.is_match_any() {
1636 return 0;
1637 }
1638
1639 if let Some(op) = self.cmp_op() {
1640 match op {
1641 CmpOp::Neq => out = 0,
1643 CmpOp::Eq | CmpOp::Not => out += MULT,
1644 CmpOp::Lt | CmpOp::Gt => out -= 2 * MULT,
1645 CmpOp::Xor | CmpOp::BitAnd => out -= MULT,
1646 }
1647 }
1648
1649 out as u64
1650 }
1651
1652 #[inline(always)]
1653 fn cmp_op(&self) -> Option<CmpOp> {
1654 match self {
1655 Self::String(t) => Some(t.cmp_op),
1656 Self::Scalar(s) => Some(s.cmp_op),
1657 Self::Float(t) => Some(t.cmp_op),
1658 Self::Name(_)
1659 | Self::Use(_, _)
1660 | Self::Search(_)
1661 | Self::PString(_)
1662 | Self::Regex(_)
1663 | Self::Clear
1664 | Self::Default
1665 | Self::Indirect(_)
1666 | Self::String16(_)
1667 | Self::Der => None,
1668 }
1669 }
1670
1671 #[inline(always)]
1672 fn is_match_any(&self) -> bool {
1673 match self {
1674 Test::Name(_) => false,
1675 Test::Use(_, _) => false,
1676 Test::Scalar(scalar_test) => matches!(scalar_test.test_val, TestValue::Any),
1677 Test::Float(float_test) => matches!(float_test.test_val, TestValue::Any),
1678 Test::String(string_test) => matches!(string_test.test_val, TestValue::Any),
1679 Test::Search(_) => false,
1680 Test::PString(pstring_test) => matches!(pstring_test.test_val, TestValue::Any),
1681 Test::Regex(_) => false,
1682 Test::Indirect(_) => false,
1683 Test::String16(string16_test) => matches!(string16_test.test_val, TestValue::Any),
1684 Test::Der => false,
1685 Test::Clear => false,
1686 Test::Default => false,
1687 }
1688 }
1689
1690 #[inline(always)]
1691 fn is_binary(&self) -> bool {
1692 match self {
1693 Self::Name(_) => true,
1694 Self::Use(_, _) => true,
1695 Self::Scalar(_) => true,
1696 Self::Float(_) => true,
1697 Self::String(t) => !t.is_binary() & !t.is_text() || t.is_binary(),
1698 Self::Search(t) => t.is_binary(),
1699 Self::PString(_) => true,
1700 Self::Regex(t) => t.is_binary(),
1701 Self::Clear => true,
1702 Self::Default => true,
1703 Self::Indirect(_) => true,
1704 Self::String16(_) => true,
1705 Self::Der => true,
1706 }
1707 }
1708
1709 #[inline(always)]
1710 fn is_text(&self) -> bool {
1711 match self {
1712 Self::Name(_) => true,
1713 Self::Use(_, _) => true,
1714 Self::Indirect(_) => true,
1715 Self::Clear => true,
1716 Self::Default => true,
1717 Self::String(t) => !t.is_binary() & !t.is_text() || t.is_text(),
1718 _ => !self.is_binary(),
1719 }
1720 }
1721
1722 #[inline(always)]
1723 fn is_only_text(&self) -> bool {
1724 self.is_text() && !self.is_binary()
1725 }
1726
1727 #[inline(always)]
1728 fn is_only_binary(&self) -> bool {
1729 self.is_binary() && !self.is_text()
1730 }
1731}
1732
1733#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
1734enum OffsetType {
1735 Byte,
1736 DoubleLe,
1737 DoubleBe,
1738 ShortLe,
1739 ShortBe,
1740 Id3Le,
1741 Id3Be,
1742 LongLe,
1743 LongBe,
1744 Middle,
1745 Octal,
1746 QuadBe,
1747 QuadLe,
1748}
1749
1750#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
1751enum Shift {
1752 Direct(u64),
1753 Indirect(i64),
1754}
1755
1756#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
1757struct IndOffset {
1758 off_addr: DirOffset,
1760 signed: bool,
1762 ty: OffsetType,
1764 op: Option<Op>,
1765 shift: Option<Shift>,
1766}
1767
1768impl IndOffset {
1769 fn read_offset<R: Read + Seek>(
1771 &self,
1772 haystack: &mut LazyCache<R>,
1773 rule_base_offset: Option<u64>,
1774 last_upper_match_offset: Option<u64>,
1775 ) -> Result<Option<u64>, io::Error> {
1776 let offset_address = match self.off_addr {
1777 DirOffset::Start(s) => {
1778 let Some(o) = s.checked_add(rule_base_offset.unwrap_or_default()) else {
1779 return Ok(None);
1780 };
1781
1782 haystack.seek(SeekFrom::Start(o))?
1783 }
1784 DirOffset::LastUpper(c) => haystack.seek(SeekFrom::Start(
1785 (last_upper_match_offset.unwrap_or_default() as i64 + c) as u64,
1786 ))?,
1787 DirOffset::End(e) => haystack.seek(SeekFrom::End(e))?,
1788 };
1789
1790 macro_rules! read_value {
1791 () => {
1792 match self.ty {
1793 OffsetType::Byte => {
1794 if self.signed {
1795 read_le!(haystack, u8) as u64
1796 } else {
1797 read_le!(haystack, i8) as u64
1798 }
1799 }
1800 OffsetType::DoubleLe => read_le!(haystack, f64) as u64,
1801 OffsetType::DoubleBe => read_be!(haystack, f64) as u64,
1802 OffsetType::ShortLe => {
1803 if self.signed {
1804 read_le!(haystack, i16) as u64
1805 } else {
1806 read_le!(haystack, u16) as u64
1807 }
1808 }
1809 OffsetType::ShortBe => {
1810 if self.signed {
1811 read_be!(haystack, i16) as u64
1812 } else {
1813 read_be!(haystack, u16) as u64
1814 }
1815 }
1816 OffsetType::Id3Le => decode_id3(read_le!(haystack, u32)) as u64,
1817 OffsetType::Id3Be => decode_id3(read_be!(haystack, u32)) as u64,
1818 OffsetType::LongLe => {
1819 if self.signed {
1820 read_le!(haystack, i32) as u64
1821 } else {
1822 read_le!(haystack, u32) as u64
1823 }
1824 }
1825 OffsetType::LongBe => {
1826 if self.signed {
1827 read_be!(haystack, i32) as u64
1828 } else {
1829 read_be!(haystack, u32) as u64
1830 }
1831 }
1832 OffsetType::Middle => read_me!(haystack) as u64,
1833 OffsetType::Octal => {
1834 if let Some(o) = read_octal_u64(haystack) {
1835 o
1836 } else {
1837 debug!("failed to read octal offset @ {offset_address}");
1838 return Ok(None);
1839 }
1840 }
1841 OffsetType::QuadLe => {
1842 if self.signed {
1843 read_le!(haystack, i64) as u64
1844 } else {
1845 read_le!(haystack, u64)
1846 }
1847 }
1848 OffsetType::QuadBe => {
1849 if self.signed {
1850 read_be!(haystack, i64) as u64
1851 } else {
1852 read_be!(haystack, u64)
1853 }
1854 }
1855 }
1856 };
1857 }
1858
1859 let o = read_value!();
1861
1862 trace!(
1863 "offset read @ {offset_address} value={o} op={:?} shift={:?}",
1864 self.op, self.shift
1865 );
1866
1867 if let (Some(op), Some(shift)) = (self.op, self.shift) {
1869 let shift = match shift {
1870 Shift::Direct(i) => i,
1871 Shift::Indirect(i) => {
1872 let tmp = offset_address as i128 + i as i128;
1873 if tmp.is_negative() {
1874 return Ok(None);
1875 } else {
1876 haystack.seek(SeekFrom::Start(tmp as u64))?;
1877 };
1878 read_value!()
1881 }
1882 };
1883
1884 match op {
1885 Op::Add => return Ok(o.checked_add(shift)),
1886 Op::Mul => return Ok(o.checked_mul(shift)),
1887 Op::Sub => return Ok(o.checked_sub(shift)),
1888 Op::Div => return Ok(o.checked_div(shift)),
1889 Op::Mod => return Ok(o.checked_rem(shift)),
1890 Op::And => return Ok(Some(o & shift)),
1891 Op::Or => return Ok(Some(o | shift)),
1892 Op::Xor => return Ok(Some(o ^ shift)),
1893 }
1894 }
1895
1896 Ok(Some(o))
1897 }
1898}
1899
1900#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
1901enum DirOffset {
1902 Start(u64),
1903 LastUpper(i64),
1905 End(i64),
1906}
1907
1908#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
1909enum Offset {
1910 Direct(DirOffset),
1911 Indirect(IndOffset),
1912}
1913
1914impl From<DirOffset> for Offset {
1915 fn from(value: DirOffset) -> Self {
1916 Self::Direct(value)
1917 }
1918}
1919
1920impl From<IndOffset> for Offset {
1921 fn from(value: IndOffset) -> Self {
1922 Self::Indirect(value)
1923 }
1924}
1925
1926impl Display for DirOffset {
1927 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1928 match self {
1929 DirOffset::Start(i) => write!(f, "{i}"),
1930 DirOffset::LastUpper(c) => write!(f, "&{c}"),
1931 DirOffset::End(e) => write!(f, "-{e}"),
1932 }
1933 }
1934}
1935
1936impl Default for DirOffset {
1937 fn default() -> Self {
1938 Self::LastUpper(0)
1939 }
1940}
1941
1942#[derive(Debug, Clone, Serialize, Deserialize)]
1943struct Match {
1944 line: usize,
1945 depth: u8,
1946 offset: Offset,
1947 test: Test,
1948 test_strength: u64,
1949 message: Option<Message>,
1950}
1951
1952impl From<Use> for Match {
1953 fn from(value: Use) -> Self {
1954 let test = Test::Use(value.switch_endianness, value.rule_name);
1955 let test_strength = test.strength();
1956 Self {
1957 line: value.line,
1958 depth: value.depth,
1959 offset: value.start_offset,
1960 test,
1961 test_strength,
1962 message: value.message,
1963 }
1964 }
1965}
1966
1967impl From<Name> for Match {
1968 fn from(value: Name) -> Self {
1969 let test = Test::Name(value.name);
1970 let test_strength = test.strength();
1971 Self {
1972 line: value.line,
1973 depth: 0,
1974 offset: Offset::Direct(DirOffset::Start(0)),
1975 test,
1976 test_strength,
1977 message: value.message,
1978 }
1979 }
1980}
1981
1982impl Match {
1983 #[inline(always)]
1985 fn offset_from_start<R: Read + Seek>(
1986 &self,
1987 haystack: &mut LazyCache<R>,
1988 rule_base_offset: Option<u64>,
1989 last_level_offset: Option<u64>,
1990 ) -> Result<Option<u64>, io::Error> {
1991 match self.offset {
1992 Offset::Direct(dir_offset) => match dir_offset {
1993 DirOffset::Start(s) => Ok(Some(s)),
1994 DirOffset::LastUpper(shift) => {
1995 let o = last_level_offset.unwrap_or_default() as i64 + shift;
1996
1997 if o >= 0 { Ok(Some(o as u64)) } else { Ok(None) }
1998 }
1999 DirOffset::End(e) => Ok(Some(haystack.offset_from_start(SeekFrom::End(e)))),
2000 },
2001 Offset::Indirect(ind_offset) => {
2002 let Some(o) =
2003 ind_offset.read_offset(haystack, rule_base_offset, last_level_offset)?
2004 else {
2005 return Ok(None);
2006 };
2007
2008 Ok(Some(o))
2009 }
2010 }
2011 }
2012
2013 #[inline]
2026 #[allow(clippy::too_many_arguments)]
2027 fn matches<'a: 'h, 'h, R: Read + Seek>(
2028 &'a self,
2029 source: Option<&str>,
2030 magic: &mut Magic<'a>,
2031 stream_kind: StreamKind,
2032 state: &mut MatchState,
2033 buf_base_offset: Option<u64>,
2034 rule_base_offset: Option<u64>,
2035 last_level_offset: Option<u64>,
2036 haystack: &'h mut LazyCache<R>,
2037 switch_endianness: bool,
2038 db: &'a MagicDb,
2039 depth: usize,
2040 ) -> Result<(bool, Option<MatchRes<'h>>), Error> {
2041 let source = source.unwrap_or("unknown");
2042 let line = self.line;
2043
2044 if depth >= MAX_RECURSION {
2045 return Err(Error::localized(
2046 source,
2047 line,
2048 Error::MaximumRecursion(MAX_RECURSION),
2049 ));
2050 }
2051
2052 if self.test.is_only_binary() && stream_kind.is_text() {
2053 trace!("skip binary test source={source} line={line} stream_kind={stream_kind:?}",);
2054 return Ok((false, None));
2055 }
2056
2057 if self.test.is_only_text() && !stream_kind.is_text() {
2058 trace!("skip text test source={source} line={line} stream_kind={stream_kind:?}",);
2059 return Ok((false, None));
2060 }
2061
2062 let Ok(Some(mut offset)) = self
2063 .offset_from_start(haystack, rule_base_offset, last_level_offset)
2064 .inspect_err(|e| debug!("source={source} line={line} failed at computing offset: {e}"))
2065 else {
2066 return Ok((false, None));
2067 };
2068
2069 offset = match self.offset {
2070 Offset::Indirect(_) => {
2071 buf_base_offset.unwrap_or_default().saturating_add(offset)
2076 }
2077 Offset::Direct(DirOffset::Start(_)) => {
2079 rule_base_offset.unwrap_or_default().saturating_add(offset)
2080 }
2081 _ => offset,
2082 };
2083
2084 match &self.test {
2085 Test::Clear => {
2086 trace!("source={source} line={line} clear");
2087 state.clear_continuation_level(&self.continuation_level());
2088 Ok((true, None))
2089 }
2090
2091 Test::Name(name) => {
2092 trace!(
2093 "source={source} line={line} running rule {name} switch_endianness={switch_endianness}",
2094 );
2095 Ok((true, None))
2096 }
2097
2098 Test::Use(flip_endianness, rule_name) => {
2099 trace!(
2100 "source={source} line={line} use {rule_name} switch_endianness={flip_endianness}",
2101 );
2102
2103 let switch_endianness = switch_endianness ^ flip_endianness;
2105
2106 let dr: &DependencyRule = db.dependencies.get(rule_name).ok_or(
2107 Error::localized(source, line, Error::MissingRule(rule_name.clone())),
2108 )?;
2109
2110 if let Some(msg) = self.message.as_ref() {
2112 magic.push_message(msg.to_string_lossy());
2113 }
2114
2115 dr.rule.magic(
2116 magic,
2117 stream_kind,
2118 buf_base_offset,
2119 Some(offset),
2120 haystack,
2121 db,
2122 switch_endianness,
2123 depth.saturating_add(1),
2124 )?;
2125
2126 Ok((false, None))
2128 }
2129
2130 Test::Indirect(m) => {
2131 trace!(
2132 "source={source} line={line} indirect mods={:?} offset={offset:#x}",
2133 m
2134 );
2135
2136 let new_buf_base_off = if m.contains(IndirectMod::Relative) {
2137 Some(offset)
2138 } else {
2139 None
2140 };
2141
2142 if let Some(msg) = self.message.as_ref() {
2144 magic.push_message(msg.to_string_lossy());
2145 }
2146
2147 for r in db.rules.iter() {
2148 let messages_cnt = magic.message.len();
2149
2150 r.magic(
2151 magic,
2152 stream_kind,
2153 new_buf_base_off,
2154 Some(offset),
2155 haystack,
2156 db,
2157 false,
2158 depth.saturating_add(1),
2159 )?;
2160
2161 if magic.message.len() != messages_cnt {
2163 break;
2164 }
2165 }
2166
2167 Ok((false, None))
2169 }
2170
2171 Test::Default => {
2172 let ok = !state.get_continuation_level(&self.continuation_level());
2174
2175 trace!("source={source} line={line} default match={ok}");
2176 if ok {
2177 state.set_continuation_level(self.continuation_level());
2178 }
2179
2180 Ok((ok, None))
2181 }
2182
2183 _ => {
2184 if let Err(e) = haystack.seek(SeekFrom::Start(offset)) {
2185 debug!("source={source} line={line} failed to seek in haystack: {e}");
2186 return Ok((false, None));
2187 }
2188
2189 let mut trace_msg = None;
2190
2191 if enabled!(Level::DEBUG) {
2192 trace_msg = Some(vec![format!(
2193 "source={source} line={line} depth={} stream_offset={:#x}",
2194 self.depth,
2195 haystack.lazy_stream_position()
2196 )])
2197 }
2198
2199 if let Ok(opt_test_value) = self
2203 .test
2204 .read_test_value(haystack, switch_endianness)
2205 .inspect_err(|e| {
2206 debug!("source={source} line={line} error while reading test value @{offset}: {e}",)
2207 })
2208 {
2209 if let Some(v) = trace_msg
2210 .as_mut() { v.push(format!("test={:?}", self.test)) }
2211
2212 let match_res =
2213 opt_test_value.and_then(|tv| self.test.match_value(&tv, stream_kind));
2214
2215 if let Some(v) = trace_msg.as_mut() { v.push(format!(
2216 "message=\"{}\" match={}",
2217 self.message
2218 .as_ref()
2219 .map(|fs| fs.to_string_lossy())
2220 .unwrap_or_default(),
2221 match_res.is_some()
2222 )) }
2223
2224 if enabled!(Level::DEBUG) && !enabled!(Level::TRACE) && match_res.is_some() {
2226 if let Some(m) = trace_msg{
2227 debug!("{}", m.join(" "));
2228 }
2229 } else if enabled!(Level::TRACE)
2230 && let Some(m) = trace_msg{
2231 trace!("{}", m.join(" "));
2232 }
2233
2234 if let Some(mr) = match_res {
2235 state.set_continuation_level(self.continuation_level());
2236 return Ok((true, Some(mr)));
2237 }
2238 }
2239
2240 Ok((false, None))
2241 }
2242 }
2243 }
2244
2245 #[inline(always)]
2246 fn continuation_level(&self) -> ContinuationLevel {
2247 ContinuationLevel(self.depth)
2248 }
2249}
2250
2251#[derive(Debug, Clone)]
2252struct Use {
2253 line: usize,
2254 depth: u8,
2255 start_offset: Offset,
2256 rule_name: String,
2257 switch_endianness: bool,
2258 message: Option<Message>,
2259}
2260
2261#[derive(Debug, Clone, Serialize, Deserialize)]
2262struct StrengthMod {
2263 op: Op,
2264 by: u8,
2265}
2266
2267impl StrengthMod {
2268 #[inline(always)]
2269 fn apply(&self, strength: u64) -> u64 {
2270 let by = self.by as u64;
2271 debug!("applying strength modifier: {strength} {} {}", self.op, by);
2272 match self.op {
2273 Op::Mul => strength.saturating_mul(by),
2274 Op::Add => strength.saturating_add(by),
2275 Op::Sub => strength.saturating_sub(by),
2276 Op::Div => {
2277 if by > 0 {
2278 strength.saturating_div(by)
2279 } else {
2280 strength
2281 }
2282 }
2283 Op::Mod => strength % by,
2284 Op::And => strength & by,
2285 Op::Xor | Op::Or => {
2288 debug_panic!("unsupported strength operator");
2289 strength
2290 }
2291 }
2292 }
2293}
2294
2295#[derive(Debug, Clone)]
2296enum Flag {
2297 Mime(String),
2298 Ext(HashSet<String>),
2299 Strength(StrengthMod),
2300 Apple(String),
2301}
2302
2303#[derive(Debug, Clone)]
2304struct Name {
2305 line: usize,
2306 name: String,
2307 message: Option<Message>,
2308}
2309
2310#[derive(Debug, Clone)]
2311enum Entry<'span> {
2312 Match(Span<'span>, Match),
2313 Flag(Span<'span>, Flag),
2314}
2315
2316#[derive(Debug, Clone, Serialize, Deserialize)]
2317struct EntryNode {
2318 root: bool,
2319 entry: Match,
2320 children: Vec<EntryNode>,
2321 mimetype: Option<String>,
2322 apple: Option<String>,
2323 strength_mod: Option<StrengthMod>,
2324 exts: HashSet<String>,
2325}
2326
2327impl EntryNode {
2328 fn update_exts_rec(
2329 &self,
2330 exts: &mut HashSet<String>,
2331 deps: &HashMap<String, DependencyRule>,
2332 marked: &mut HashSet<String>,
2333 ) -> Result<(), ()> {
2334 for ext in self.exts.iter() {
2335 if !exts.contains(ext) {
2336 exts.insert(ext.clone());
2337 }
2338 }
2339
2340 for c in self.children.iter() {
2341 if let Test::Use(_, ref name) = c.entry.test {
2342 if marked.contains(name) {
2343 continue;
2344 }
2345 if let Some(r) = deps.get(name) {
2346 marked.insert(name.clone());
2347 exts.extend(r.rule.fetch_all_extensions(deps, marked)?);
2348 } else {
2349 return Err(());
2350 }
2351 } else {
2352 c.update_exts_rec(exts, deps, marked)?;
2353 }
2354 }
2355
2356 Ok(())
2357 }
2358
2359 fn update_score_rec(
2360 &self,
2361 depth: usize,
2362 score: &mut u64,
2363 deps: &HashMap<String, DependencyRule>,
2364 marked: &mut HashSet<String>,
2365 ) {
2366 if depth == 3 {
2367 return;
2368 }
2369
2370 *score += self
2371 .children
2372 .iter()
2373 .map(|e| e.entry.test_strength)
2374 .min()
2375 .unwrap_or_default();
2376
2377 for c in self.children.iter() {
2378 if let Test::Use(_, ref name) = c.entry.test {
2379 if marked.contains(name) {
2380 continue;
2381 }
2382
2383 if let Some(r) = deps.get(name) {
2384 marked.insert(name.clone());
2385 *score += r.rule.compute_score(depth, deps, marked);
2386 }
2387 }
2388 c.update_score_rec(depth + 1, score, deps, marked);
2389 }
2390 }
2391
2392 #[inline]
2393 #[allow(clippy::too_many_arguments)]
2394 fn matches<'r, R: Read + Seek>(
2395 &'r self,
2396 opt_source: Option<&str>,
2397 magic: &mut Magic<'r>,
2398 state: &mut MatchState,
2399 stream_kind: StreamKind,
2400 buf_base_offset: Option<u64>,
2401 rule_base_offset: Option<u64>,
2402 last_level_offset: Option<u64>,
2403 haystack: &mut LazyCache<R>,
2404 db: &'r MagicDb,
2405 switch_endianness: bool,
2406 depth: usize,
2407 ) -> Result<(), Error> {
2408 let (ok, opt_match_res) = self.entry.matches(
2409 opt_source,
2410 magic,
2411 stream_kind,
2412 state,
2413 buf_base_offset,
2414 rule_base_offset,
2415 last_level_offset,
2416 haystack,
2417 switch_endianness,
2418 db,
2419 depth,
2420 )?;
2421
2422 let source = opt_source.unwrap_or("unknown");
2423 let line = self.entry.line;
2424
2425 if ok {
2426 if let Some(msg) = self.entry.message.as_ref()
2428 && let Ok(msg) = msg.format_with(opt_match_res.as_ref()).inspect_err(|e| {
2429 debug!("source={source} line={line} failed to format message: {e}")
2430 })
2431 {
2432 magic.push_message(msg);
2433 }
2434
2435 if let Some(mr) = opt_match_res {
2437 match &self.entry.test {
2438 Test::String(t) => {
2439 if t.has_length_mod() {
2440 let o = mr.end_offset();
2441 haystack.seek(SeekFrom::Start(o))?;
2442 }
2443 }
2444 Test::Search(t) => {
2445 if t.re_mods.contains(ReMod::StartOffsetUpdate) {
2446 let o = mr.start_offset();
2447 haystack.seek(SeekFrom::Start(o))?;
2448 } else {
2449 let o = mr.end_offset();
2450 haystack.seek(SeekFrom::Start(o))?;
2451 }
2452 }
2453
2454 Test::Regex(t) => {
2455 if t.mods.contains(ReMod::StartOffsetUpdate) {
2456 let o = mr.start_offset();
2457 haystack.seek(SeekFrom::Start(o))?;
2458 } else {
2459 let o = mr.end_offset();
2460 haystack.seek(SeekFrom::Start(o))?;
2461 }
2462 }
2463 _ => {}
2465 }
2466 }
2467
2468 if let Some(mimetype) = self.mimetype.as_ref() {
2469 magic.set_mime_type(Cow::Borrowed(mimetype));
2470 }
2471
2472 if let Some(apple_ty) = self.apple.as_ref() {
2473 magic.set_creator_code(Cow::Borrowed(apple_ty));
2474 }
2475
2476 if !self.exts.is_empty() {
2477 magic.insert_extensions(self.exts.iter().map(|s| s.as_str()));
2478 }
2479
2480 let mut strength = self.entry.test_strength;
2484
2485 let continuation_level = self.entry.continuation_level().0 as u64;
2486 if self.entry.message.is_none() && continuation_level < 3 {
2487 strength = strength.saturating_add(continuation_level);
2488 }
2489
2490 if let Some(sm) = self.strength_mod.as_ref() {
2491 strength = sm.apply(strength);
2492 }
2493
2494 if self.entry.message.is_none() {
2496 strength += 1
2497 }
2498
2499 magic.update_strength(strength);
2500
2501 let end_upper_level = haystack.lazy_stream_position();
2502
2503 let rule_base_offset = if self.root {
2511 match self.entry.offset {
2512 Offset::Direct(DirOffset::End(o)) => {
2513 Some(haystack.offset_from_start(SeekFrom::End(o)))
2514 }
2515 _ => rule_base_offset,
2516 }
2517 } else {
2518 rule_base_offset
2519 };
2520
2521 for e in self.children.iter() {
2522 e.matches(
2523 opt_source,
2524 magic,
2525 state,
2526 stream_kind,
2527 buf_base_offset,
2528 rule_base_offset,
2529 Some(end_upper_level),
2530 haystack,
2531 db,
2532 switch_endianness,
2533 depth,
2534 )?
2535 }
2536 }
2537
2538 Ok(())
2539 }
2540}
2541
2542#[derive(Debug, Clone, Serialize, Deserialize)]
2544pub struct MagicRule {
2545 id: usize,
2546 source: Option<String>,
2547 entries: EntryNode,
2548 extensions: HashSet<String>,
2549 score: u64,
2551 finalized: bool,
2552}
2553
2554impl MagicRule {
2555 #[inline(always)]
2556 fn set_id(&mut self, id: usize) {
2557 self.id = id
2558 }
2559
2560 fn fetch_all_extensions(
2564 &self,
2565 deps: &HashMap<String, DependencyRule>,
2566 marked: &mut HashSet<String>,
2567 ) -> Result<HashSet<String>, ()> {
2568 let mut exts = HashSet::new();
2569 self.entries.update_exts_rec(&mut exts, deps, marked)?;
2570 Ok(exts)
2571 }
2572
2573 fn compute_score(
2576 &self,
2577 depth: usize,
2578 deps: &HashMap<String, DependencyRule>,
2579 marked: &mut HashSet<String>,
2580 ) -> u64 {
2581 let mut score = 0;
2582 score += self.entries.entry.test_strength;
2583 self.entries
2584 .update_score_rec(depth, &mut score, deps, marked);
2585 score
2586 }
2587
2588 fn try_finalize(&mut self, deps: &HashMap<String, DependencyRule>) {
2591 if self.finalized {
2592 return;
2593 }
2594
2595 let Ok(exts) = self.fetch_all_extensions(deps, &mut HashSet::new()) else {
2596 return;
2597 };
2598
2599 self.extensions.extend(exts);
2600
2601 self.score = self.compute_score(0, deps, &mut HashSet::new());
2605 self.finalized = true
2606 }
2607
2608 #[inline]
2609 fn magic_entrypoint<'r, R: Read + Seek>(
2610 &'r self,
2611 magic: &mut Magic<'r>,
2612 stream_kind: StreamKind,
2613 haystack: &mut LazyCache<R>,
2614 db: &'r MagicDb,
2615 switch_endianness: bool,
2616 depth: usize,
2617 ) -> Result<(), Error> {
2618 self.entries.matches(
2619 self.source.as_deref(),
2620 magic,
2621 &mut MatchState::empty(),
2622 stream_kind,
2623 None,
2624 None,
2625 None,
2626 haystack,
2627 db,
2628 switch_endianness,
2629 depth,
2630 )
2631 }
2632
2633 #[inline]
2634 #[allow(clippy::too_many_arguments)]
2635 fn magic<'r, R: Read + Seek>(
2636 &'r self,
2637 magic: &mut Magic<'r>,
2638 stream_kind: StreamKind,
2639 buf_base_offset: Option<u64>,
2640 rule_base_offset: Option<u64>,
2641 haystack: &mut LazyCache<R>,
2642 db: &'r MagicDb,
2643 switch_endianness: bool,
2644 depth: usize,
2645 ) -> Result<(), Error> {
2646 self.entries.matches(
2647 self.source.as_deref(),
2648 magic,
2649 &mut MatchState::empty(),
2650 stream_kind,
2651 buf_base_offset,
2652 rule_base_offset,
2653 None,
2654 haystack,
2655 db,
2656 switch_endianness,
2657 depth,
2658 )
2659 }
2660
2661 pub fn is_text(&self) -> bool {
2667 self.entries.entry.test.is_text()
2668 && self.entries.children.iter().all(|e| e.entry.test.is_text())
2669 }
2670
2671 #[inline(always)]
2677 pub fn score(&self) -> u64 {
2678 self.score
2679 }
2680
2681 #[inline(always)]
2687 pub fn source(&self) -> Option<&str> {
2688 self.source.as_deref()
2689 }
2690
2691 #[inline(always)]
2697 pub fn line(&self) -> usize {
2698 self.entries.entry.line
2699 }
2700
2701 #[inline(always)]
2707 pub fn extensions(&self) -> &HashSet<String> {
2708 &self.extensions
2709 }
2710}
2711
2712#[derive(Debug, Clone, Serialize, Deserialize)]
2713struct DependencyRule {
2714 name: String,
2715 rule: MagicRule,
2716}
2717
2718#[derive(Debug, Clone, Serialize, Deserialize)]
2724pub struct MagicSource {
2725 rules: Vec<MagicRule>,
2726 dependencies: HashMap<String, DependencyRule>,
2727}
2728
2729impl MagicSource {
2730 pub fn open<P: AsRef<Path>>(p: P) -> Result<Self, Error> {
2740 FileMagicParser::parse_file(p)
2741 }
2742}
2743
2744#[derive(Debug, Hash, PartialEq, Eq, Clone, Copy)]
2745struct ContinuationLevel(u8);
2746
2747#[derive(Debug, PartialEq, Eq, Clone, Copy)]
2749enum TextEncoding {
2750 Ascii,
2751 Utf8,
2752 Unknown,
2753}
2754
2755impl TextEncoding {
2756 const fn as_magic_str(&self) -> &'static str {
2757 match self {
2758 TextEncoding::Ascii => "ASCII",
2759 TextEncoding::Utf8 => "UTF-8",
2760 TextEncoding::Unknown => "Unknown",
2761 }
2762 }
2763}
2764
2765#[derive(Debug, PartialEq, Eq, Clone, Copy)]
2766enum StreamKind {
2767 Binary,
2768 Text(TextEncoding),
2769}
2770
2771impl StreamKind {
2772 const fn is_text(&self) -> bool {
2773 matches!(self, StreamKind::Text(_))
2774 }
2775}
2776
2777#[derive(Debug)]
2778struct MatchState {
2779 continuation_levels: [bool; 256],
2780}
2781
2782impl MatchState {
2783 #[inline(always)]
2784 fn empty() -> Self {
2785 MatchState {
2786 continuation_levels: [false; 256],
2787 }
2788 }
2789
2790 #[inline(always)]
2791 fn get_continuation_level(&mut self, level: &ContinuationLevel) -> bool {
2792 self.continuation_levels
2793 .get(level.0 as usize)
2794 .cloned()
2795 .unwrap_or_default()
2796 }
2797
2798 #[inline(always)]
2799 fn set_continuation_level(&mut self, level: ContinuationLevel) {
2800 if let Some(b) = self.continuation_levels.get_mut(level.0 as usize) {
2801 *b = true
2802 }
2803 }
2804
2805 #[inline(always)]
2806 fn clear_continuation_level(&mut self, level: &ContinuationLevel) {
2807 if let Some(b) = self.continuation_levels.get_mut(level.0 as usize) {
2808 *b = false;
2809 }
2810 }
2811}
2812
2813#[derive(Debug, Default)]
2815pub struct Magic<'m> {
2816 stream_kind: Option<StreamKind>,
2817 source: Option<Cow<'m, str>>,
2818 message: Vec<Cow<'m, str>>,
2819 mime_type: Option<Cow<'m, str>>,
2820 creator_code: Option<Cow<'m, str>>,
2821 strength: u64,
2822 exts: HashSet<Cow<'m, str>>,
2823 is_default: bool,
2824}
2825
2826impl<'m> Magic<'m> {
2827 #[inline(always)]
2828 fn set_source(&mut self, source: Option<&'m str>) {
2829 self.source = source.map(Cow::Borrowed);
2830 }
2831
2832 #[inline(always)]
2833 fn set_stream_kind(&mut self, stream_kind: StreamKind) {
2834 self.stream_kind = Some(stream_kind)
2835 }
2836
2837 #[inline(always)]
2838 fn reset(&mut self) {
2839 self.stream_kind = None;
2840 self.source = None;
2841 self.message.clear();
2842 self.mime_type = None;
2843 self.creator_code = None;
2844 self.strength = 0;
2845 self.exts.clear();
2846 self.is_default = false;
2847 }
2848
2849 #[inline]
2857 pub fn into_owned<'owned>(self) -> Magic<'owned> {
2858 Magic {
2859 stream_kind: self.stream_kind,
2860 source: self.source.map(|s| Cow::Owned(s.into_owned())),
2861 message: self
2862 .message
2863 .into_iter()
2864 .map(Cow::into_owned)
2865 .map(Cow::Owned)
2866 .collect(),
2867 mime_type: self.mime_type.map(|m| Cow::Owned(m.into_owned())),
2868 creator_code: self.creator_code.map(|m| Cow::Owned(m.into_owned())),
2869 strength: self.strength,
2870 exts: self
2871 .exts
2872 .into_iter()
2873 .map(|e| Cow::Owned(e.into_owned()))
2874 .collect(),
2875 is_default: self.is_default,
2876 }
2877 }
2878
2879 #[inline(always)]
2885 pub fn message(&self) -> String {
2886 let mut out = String::new();
2887 for (i, m) in self.message.iter().enumerate() {
2888 if let Some(s) = m.strip_prefix(r#"\b"#) {
2889 out.push_str(s);
2890 } else {
2891 if i > 0 {
2893 out.push(' ');
2894 }
2895 out.push_str(m);
2896 }
2897 }
2898 out
2899 }
2900
2901 #[inline]
2912 pub fn message_parts(&self) -> impl Iterator<Item = &str> {
2913 self.message.iter().map(|p| p.as_ref())
2914 }
2915
2916 #[inline(always)]
2917 fn update_strength(&mut self, value: u64) {
2918 self.strength = self.strength.saturating_add(value);
2919 debug!("updated strength = {:?}", self.strength)
2920 }
2921
2922 #[inline(always)]
2928 pub fn mime_type(&self) -> &str {
2929 self.mime_type.as_deref().unwrap_or(match self.stream_kind {
2930 Some(StreamKind::Text(_)) => DEFAULT_TEXT_MIMETYPE,
2931 Some(StreamKind::Binary) | None => DEFAULT_BIN_MIMETYPE,
2932 })
2933 }
2934
2935 #[inline(always)]
2936 fn push_message<'a: 'm>(&mut self, msg: Cow<'a, str>) {
2937 if !msg.is_empty() {
2938 debug!("pushing message: msg={msg} len={}", msg.len());
2939 self.message.push(msg);
2940 }
2941 }
2942
2943 #[inline(always)]
2944 fn set_mime_type<'a: 'm>(&mut self, mime: Cow<'a, str>) {
2945 if self.mime_type.is_none() {
2946 debug!("insert mime: {:?}", mime);
2947 self.mime_type = Some(mime)
2948 }
2949 }
2950
2951 #[inline(always)]
2952 fn set_creator_code<'a: 'm>(&mut self, apple_ty: Cow<'a, str>) {
2953 if self.creator_code.is_none() {
2954 debug!("insert apple type: {apple_ty:?}");
2955 self.creator_code = Some(apple_ty)
2956 }
2957 }
2958
2959 #[inline(always)]
2960 fn insert_extensions<'a: 'm, I: Iterator<Item = &'a str>>(&mut self, exts: I) {
2961 if self.exts.is_empty() {
2962 self.exts.extend(exts.filter_map(|e| {
2963 if e.is_empty() {
2964 None
2965 } else {
2966 Some(Cow::Borrowed(e))
2967 }
2968 }));
2969 }
2970 }
2971
2972 #[inline(always)]
2980 pub fn strength(&self) -> u64 {
2981 self.strength
2982 }
2983
2984 #[inline(always)]
2990 pub fn source(&self) -> Option<&str> {
2991 self.source.as_deref()
2992 }
2993
2994 #[inline(always)]
3000 pub fn creator_code(&self) -> Option<&str> {
3001 self.creator_code.as_deref()
3002 }
3003
3004 #[inline(always)]
3010 pub fn extensions(&self) -> &HashSet<Cow<'m, str>> {
3011 &self.exts
3012 }
3013
3014 #[inline(always)]
3020 pub fn is_default(&self) -> bool {
3021 self.is_default
3022 }
3023}
3024
3025#[derive(Debug, Default, Clone, Serialize, Deserialize)]
3027pub struct MagicDb {
3028 rule_id: usize,
3029 rules: Vec<MagicRule>,
3030 dependencies: HashMap<String, DependencyRule>,
3031}
3032
3033#[inline(always)]
3034fn is_likely_text(bytes: &[u8]) -> bool {
3036 if bytes.is_empty() {
3037 return false;
3038 }
3039
3040 let mut printable = 0f64;
3041 let mut high_bytes = 0f64; for byte in bytes.iter() {
3044 match byte {
3045 0x00 => return false,
3046 0x09 | 0x0A | 0x0D => printable += 1.0, 0x20..=0x7E => printable += 1.0, _ => high_bytes += 1.0,
3049 }
3050 }
3051
3052 let total = bytes.len() as f64;
3053 let printable_ratio = printable / total;
3054 let high_bytes_ratio = high_bytes / total;
3055
3056 printable_ratio > 0.85 && high_bytes_ratio < 0.20
3058}
3059
3060#[inline(always)]
3061fn guess_stream_kind<S: AsRef<[u8]>>(stream: S) -> StreamKind {
3062 let Ok(s) = str::from_utf8(stream.as_ref()) else {
3063 if is_likely_text(stream.as_ref()) {
3064 return StreamKind::Text(TextEncoding::Unknown);
3065 } else {
3066 return StreamKind::Binary;
3067 }
3068 };
3069
3070 let count = s.chars().count();
3071 let mut is_ascii = true;
3072
3073 for c in s.chars().take(count.saturating_sub(1)) {
3074 is_ascii &= c.is_ascii()
3075 }
3076
3077 if is_ascii {
3078 StreamKind::Text(TextEncoding::Ascii)
3079 } else {
3080 StreamKind::Text(TextEncoding::Utf8)
3081 }
3082}
3083
3084impl MagicDb {
3085 fn open_reader<R: Read + Seek>(f: R) -> Result<LazyCache<R>, Error> {
3086 Ok(LazyCache::<R>::from_read_seek(f)
3087 .and_then(|lc| lc.with_hot_cache(2 * FILE_BYTES_MAX))?)
3088 .map(|lc| lc.with_warm_cache(100 << 20))
3089 }
3090
3091 pub fn new() -> Self {
3097 Self::default()
3098 }
3099
3100 #[inline(always)]
3101 fn next_rule_id(&mut self) -> usize {
3102 let t = self.rule_id;
3103 self.rule_id += 1;
3104 t
3105 }
3106
3107 #[inline(always)]
3108 fn try_json<R: Read + Seek>(
3109 haystack: &mut LazyCache<R>,
3110 stream_kind: StreamKind,
3111 magic: &mut Magic,
3112 ) -> Result<bool, Error> {
3113 if matches!(stream_kind, StreamKind::Binary) {
3115 return Ok(false);
3116 }
3117
3118 let buf = haystack.read_range(0..FILE_BYTES_MAX as u64)?.trim_ascii();
3119
3120 let Some((start, end)) = find_json_boundaries(buf) else {
3121 return Ok(false);
3122 };
3123
3124 for c in buf[0..start].iter() {
3127 if !c.is_ascii_whitespace() {
3128 return Ok(false);
3129 }
3130 }
3131
3132 let mut is_ndjson = false;
3133
3134 trace!("maybe a json document");
3135 let ok = serde_json::from_slice::<serde_json::Value>(&buf[start..=end]).is_ok();
3136 if !ok {
3137 return Ok(false);
3138 }
3139
3140 if end + 1 < buf.len() {
3142 let buf = &buf[end + 1..];
3144 if let Some((second_start, second_end)) = find_json_boundaries(buf) {
3145 if memchr(b'\n', &buf[..second_start]).is_some() {
3147 trace!("might be ndjson");
3148 is_ndjson = serde_json::from_slice::<serde_json::Value>(
3149 &buf[second_start..=second_end],
3150 )
3151 .is_ok();
3152 }
3153 }
3154 }
3155
3156 if is_ndjson {
3157 magic.push_message(Cow::Borrowed("New Line Delimited"));
3158 magic.set_mime_type(Cow::Borrowed("application/x-ndjson"));
3159 magic.insert_extensions(["ndjson", "jsonl"].into_iter());
3160 } else {
3161 magic.set_mime_type(Cow::Borrowed("application/json"));
3162 magic.insert_extensions(["json"].into_iter());
3163 }
3164
3165 magic.push_message(Cow::Borrowed("JSON text data"));
3166 magic.set_source(Some(HARDCODED_SOURCE));
3167 magic.update_strength(HARDCODED_MAGIC_STRENGTH);
3168 Ok(true)
3169 }
3170
3171 #[inline(always)]
3172 fn try_csv<R: Read + Seek>(
3173 haystack: &mut LazyCache<R>,
3174 stream_kind: StreamKind,
3175 magic: &mut Magic,
3176 ) -> Result<bool, Error> {
3177 let StreamKind::Text(enc) = stream_kind else {
3179 return Ok(false);
3180 };
3181
3182 let buf = haystack.read_range(0..FILE_BYTES_MAX as u64)?;
3183 let mut reader = csv::Reader::from_reader(io::Cursor::new(buf));
3184 let mut records = reader.records();
3185
3186 let Some(Ok(first)) = records.next() else {
3187 return Ok(false);
3188 };
3189
3190 if first.len() <= 1 {
3194 return Ok(false);
3195 }
3196
3197 let mut n = 1;
3199 for i in records.take(9) {
3200 if let Ok(rec) = i {
3201 if first.len() != rec.len() {
3202 return Ok(false);
3203 }
3204 } else {
3205 return Ok(false);
3206 }
3207 n += 1;
3208 }
3209
3210 if n != 10 {
3212 return Ok(false);
3213 }
3214
3215 magic.set_mime_type(Cow::Borrowed("text/csv"));
3216 magic.push_message(Cow::Borrowed("CSV"));
3217 magic.push_message(Cow::Borrowed(enc.as_magic_str()));
3218 magic.push_message(Cow::Borrowed("text"));
3219 magic.insert_extensions(["csv"].into_iter());
3220 magic.set_source(Some(HARDCODED_SOURCE));
3221 magic.update_strength(HARDCODED_MAGIC_STRENGTH);
3222 Ok(true)
3223 }
3224
3225 #[inline(always)]
3226 fn try_tar<R: Read + Seek>(
3227 haystack: &mut LazyCache<R>,
3228 stream_kind: StreamKind,
3229 magic: &mut Magic,
3230 ) -> Result<bool, Error> {
3231 if !matches!(stream_kind, StreamKind::Binary) {
3233 return Ok(false);
3234 }
3235
3236 let buf = haystack.read_range(0..FILE_BYTES_MAX as u64)?;
3237 let mut ar = Archive::new(io::Cursor::new(buf));
3238
3239 let Ok(mut entries) = ar.entries() else {
3240 return Ok(false);
3241 };
3242
3243 let Some(Ok(first)) = entries.next() else {
3244 return Ok(false);
3245 };
3246
3247 let header = first.header();
3248
3249 if header.as_ustar().is_some() {
3250 magic.push_message(Cow::Borrowed("POSIX tar archive"));
3251 } else if header.as_gnu().is_some() {
3252 magic.push_message(Cow::Borrowed("POSIX tar archive (GNU)"));
3253 } else {
3254 magic.push_message(Cow::Borrowed("tar archive"));
3255 }
3256
3257 magic.set_mime_type(Cow::Borrowed("application/x-tar"));
3258 magic.set_source(Some(HARDCODED_SOURCE));
3259 magic.update_strength(HARDCODED_MAGIC_STRENGTH);
3260 magic.insert_extensions(["tar"].into_iter());
3261 Ok(true)
3262 }
3263
3264 #[inline(always)]
3265 fn try_hard_magic<R: Read + Seek>(
3266 haystack: &mut LazyCache<R>,
3267 stream_kind: StreamKind,
3268 magic: &mut Magic,
3269 ) -> Result<bool, Error> {
3270 Ok(Self::try_json(haystack, stream_kind, magic)?
3271 || Self::try_csv(haystack, stream_kind, magic)?
3272 || Self::try_tar(haystack, stream_kind, magic)?)
3273 }
3274
3275 #[inline(always)]
3276 fn magic_default<'m, R: Read + Seek>(
3277 haystack: &mut LazyCache<R>,
3278 stream_kind: StreamKind,
3279 magic: &mut Magic<'m>,
3280 ) -> Result<(), Error> {
3281 let buf = haystack.read_range(0..FILE_BYTES_MAX as u64)?;
3282
3283 magic.set_source(Some(HARDCODED_SOURCE));
3284 magic.set_stream_kind(stream_kind);
3285 magic.is_default = true;
3286
3287 if buf.is_empty() {
3288 magic.push_message(Cow::Borrowed("empty"));
3289 magic.set_mime_type(Cow::Borrowed(DEFAULT_BIN_MIMETYPE));
3290 return Ok(());
3291 }
3292
3293 match stream_kind {
3294 StreamKind::Binary => {
3295 magic.push_message(Cow::Borrowed("data"));
3296 }
3297 StreamKind::Text(e) => {
3298 magic.push_message(Cow::Borrowed(e.as_magic_str()));
3299 magic.push_message(Cow::Borrowed("text"));
3300 }
3301 }
3302
3303 Ok(())
3304 }
3305
3306 pub fn load(&mut self, mf: MagicSource) -> Result<&mut Self, Error> {
3316 for rule in mf.rules.into_iter() {
3317 let mut rule = rule;
3318 rule.set_id(self.next_rule_id());
3319
3320 self.rules.push(rule);
3321 }
3322
3323 self.dependencies.extend(mf.dependencies);
3324 self.prepare();
3325 Ok(self)
3326 }
3327
3328 pub fn rules(&self) -> &[MagicRule] {
3334 &self.rules
3335 }
3336
3337 #[inline]
3338 fn first_magic_with_stream_kind<R: Read + Seek>(
3339 &self,
3340 haystack: &mut LazyCache<R>,
3341 stream_kind: StreamKind,
3342 extension: Option<&str>,
3343 ) -> Result<Magic<'_>, Error> {
3344 let mut magic = Magic::default();
3346
3347 if Self::try_hard_magic(haystack, stream_kind, &mut magic)? {
3348 return Ok(magic);
3349 }
3350
3351 let mut marked = vec![false; self.rules.len()];
3352
3353 macro_rules! do_magic {
3354 ($rule: expr) => {{
3355 $rule.magic_entrypoint(&mut magic, stream_kind, haystack, &self, false, 0)?;
3356
3357 if !magic.message.is_empty() {
3358 magic.set_stream_kind(stream_kind);
3359 magic.set_source($rule.source.as_deref());
3360 return Ok(magic);
3361 }
3362
3363 magic.reset();
3364 }};
3365 }
3366
3367 if let Some(ext) = extension.map(|e| e.to_lowercase())
3368 && !ext.is_empty()
3369 {
3370 for rule in self.rules.iter().filter(|r| r.extensions.contains(&ext)) {
3371 do_magic!(rule);
3372 if let Some(f) = marked.get_mut(rule.id) {
3373 *f = true
3374 }
3375 }
3376 }
3377
3378 for rule in self
3379 .rules
3380 .iter()
3381 .filter(|r| !*marked.get(r.id).unwrap_or(&false))
3383 {
3384 do_magic!(rule)
3385 }
3386
3387 Self::magic_default(haystack, stream_kind, &mut magic)?;
3388
3389 Ok(magic)
3390 }
3391
3392 pub fn first_magic<R: Read + Seek>(
3406 &self,
3407 r: &mut R,
3408 extension: Option<&str>,
3409 ) -> Result<Magic<'_>, Error> {
3410 let mut haystack = Self::open_reader(r)?;
3411 let stream_kind = guess_stream_kind(haystack.read_range(0..FILE_BYTES_MAX as u64)?);
3412 self.first_magic_with_stream_kind(&mut haystack, stream_kind, extension)
3413 }
3414
3415 #[inline(always)]
3416 fn all_magics_sort_with_stream_kind<R: Read + Seek>(
3417 &self,
3418 haystack: &mut LazyCache<R>,
3419 stream_kind: StreamKind,
3420 ) -> Result<Vec<Magic<'_>>, Error> {
3421 let mut out = Vec::new();
3422
3423 let mut magic = Magic::default();
3424
3425 if Self::try_hard_magic(haystack, stream_kind, &mut magic)? {
3426 out.push(magic);
3427 magic = Magic::default();
3428 }
3429
3430 for rule in self.rules.iter() {
3431 rule.magic_entrypoint(&mut magic, stream_kind, haystack, self, false, 0)?;
3432
3433 if !magic.message.is_empty() {
3435 magic.set_stream_kind(stream_kind);
3436 magic.set_source(rule.source.as_deref());
3437 out.push(magic);
3438 magic = Magic::default();
3439 }
3440
3441 magic.reset();
3442 }
3443
3444 Self::magic_default(haystack, stream_kind, &mut magic)?;
3445 out.push(magic);
3446
3447 out.sort_by_key(|b| std::cmp::Reverse(b.strength()));
3448
3449 Ok(out)
3450 }
3451
3452 pub fn all_magics<R: Read + Seek>(&self, r: &mut R) -> Result<Vec<Magic<'_>>, Error> {
3462 let mut haystack = Self::open_reader(r)?;
3463 let stream_kind = guess_stream_kind(haystack.read_range(0..FILE_BYTES_MAX as u64)?);
3464 self.all_magics_sort_with_stream_kind(&mut haystack, stream_kind)
3465 }
3466
3467 #[inline(always)]
3468 fn best_magic_with_stream_kind<R: Read + Seek>(
3469 &self,
3470 haystack: &mut LazyCache<R>,
3471 stream_kind: StreamKind,
3472 ) -> Result<Magic<'_>, Error> {
3473 let magics = self.all_magics_sort_with_stream_kind(haystack, stream_kind)?;
3474
3475 return Ok(magics
3477 .into_iter()
3478 .next()
3479 .expect("magics must at least contain default"));
3480 }
3481
3482 pub fn best_magic<R: Read + Seek>(&self, r: &mut R) -> Result<Magic<'_>, Error> {
3492 let mut haystack = Self::open_reader(r)?;
3493 let stream_kind = guess_stream_kind(haystack.read_range(0..FILE_BYTES_MAX as u64)?);
3494 self.best_magic_with_stream_kind(&mut haystack, stream_kind)
3495 }
3496
3497 pub fn serialize<W: Write>(self, w: &mut W) -> Result<(), Error> {
3503 let mut encoder = GzEncoder::new(w, Compression::best());
3504
3505 bincode::serde::encode_into_std_write(&self, &mut encoder, bincode::config::standard())?;
3506 encoder.finish()?;
3507 Ok(())
3508 }
3509
3510 pub fn deserialize<R: Read>(r: &mut R) -> Result<Self, Error> {
3520 let mut buf = vec![];
3521 let mut gz = GzDecoder::new(r);
3522 gz.read_to_end(&mut buf).map_err(|e| {
3523 bincode::error::DecodeError::OtherString(format!("failed to read: {e}"))
3524 })?;
3525 let (sdb, _): (MagicDb, usize) =
3526 bincode::serde::decode_from_slice(&buf, bincode::config::standard())?;
3527 Ok(sdb)
3528 }
3529
3530 #[inline(always)]
3531 fn prepare(&mut self) {
3532 self.rules
3533 .iter_mut()
3534 .for_each(|r| r.try_finalize(&self.dependencies));
3535
3536 self.rules.sort_by_key(|r| (r.is_text(), -(r.score as i64)));
3538 }
3539}
3540
3541#[cfg(test)]
3542mod tests {
3543 use std::io::Cursor;
3544
3545 use regex::bytes::Regex;
3546
3547 use crate::utils::unix_local_time_to_string;
3548
3549 use super::*;
3550
3551 macro_rules! lazy_cache {
3552 ($l: literal) => {
3553 LazyCache::from_read_seek(Cursor::new($l)).unwrap()
3554 };
3555 }
3556
3557 fn first_magic(
3558 rule: &str,
3559 content: &[u8],
3560 stream_kind: StreamKind,
3561 ) -> Result<Magic<'static>, Error> {
3562 let mut md = MagicDb::new();
3563 md.load(
3564 FileMagicParser::parse_str(rule, None)
3565 .inspect_err(|e| eprintln!("{e}"))
3566 .unwrap(),
3567 )
3568 .unwrap();
3569 let mut reader = LazyCache::from_read_seek(Cursor::new(content)).unwrap();
3570 let v = md.best_magic_with_stream_kind(&mut reader, stream_kind)?;
3571 Ok(v.into_owned())
3572 }
3573
3574 #[allow(unused_macros)]
3576 macro_rules! enable_trace {
3577 () => {
3578 tracing_subscriber::fmt()
3579 .with_max_level(tracing_subscriber::filter::LevelFilter::TRACE)
3580 .try_init();
3581 };
3582 }
3583
3584 macro_rules! parse_assert {
3585 ($rule:literal) => {
3586 FileMagicParser::parse_str($rule, None)
3587 .inspect_err(|e| eprintln!("{e}"))
3588 .unwrap();
3589 };
3590 }
3591
3592 macro_rules! assert_magic_match_bin {
3593 ($rule: literal, $content:literal) => {{ first_magic($rule, $content, StreamKind::Binary).unwrap() }};
3594 ($rule: literal, $content:literal, $message:expr) => {{
3595 assert_eq!(
3596 first_magic($rule, $content, StreamKind::Binary)
3597 .unwrap()
3598 .message(),
3599 $message
3600 );
3601 }};
3602 }
3603
3604 macro_rules! assert_magic_match_text {
3605 ($rule: literal, $content:literal) => {{ first_magic($rule, $content, StreamKind::Text(TextEncoding::Utf8)).unwrap() }};
3606 ($rule: literal, $content:literal, $message:expr) => {{
3607 assert_eq!(
3608 first_magic($rule, $content, StreamKind::Text(TextEncoding::Utf8))
3609 .unwrap()
3610 .message(),
3611 $message
3612 );
3613 }};
3614 }
3615
3616 macro_rules! assert_magic_not_match_text {
3617 ($rule: literal, $content:literal) => {{
3618 assert!(
3619 first_magic($rule, $content, StreamKind::Text(TextEncoding::Utf8))
3620 .unwrap()
3621 .is_default()
3622 );
3623 }};
3624 }
3625
3626 macro_rules! assert_magic_not_match_bin {
3627 ($rule: literal, $content:literal) => {{
3628 assert!(
3629 first_magic($rule, $content, StreamKind::Binary)
3630 .unwrap()
3631 .is_default()
3632 );
3633 }};
3634 }
3635
3636 #[test]
3637 fn test_regex() {
3638 assert_magic_match_text!(
3639 r#"
36400 regex/1024 \^#![[:space:]]*/usr/bin/env[[:space:]]+
3641!:mime text/x-shellscript
3642>&0 regex/64 .*($|\\b) %s shell script text executable
3643 "#,
3644 br#"#!/usr/bin/env bash
3645 echo hello world"#,
3646 "bash shell script text executable"
3648 );
3649
3650 let re = Regex::new(r"(?-u)\x42\x82").unwrap();
3651 assert!(re.is_match(b"\x42\x82"));
3652
3653 assert_magic_match_bin!(
3654 r#"0 regex \x42\x82 binary regex match"#,
3655 b"\x00\x00\x00\x00\x00\x00\x42\x82"
3656 );
3657
3658 assert_magic_match_bin!(
3660 r#"
3661 0 regex \x42\x82
3662 >&0 string \xde\xad\xbe\xef it works
3663 "#,
3664 b"\x00\x00\x00\x00\x00\x00\x42\x82\xde\xad\xbe\xef"
3665 );
3666
3667 assert_magic_match_bin!(
3668 r#"
3669 0 regex/s \x42\x82
3670 >&0 string \x42\x82\xde\xad\xbe\xef it works
3671 "#,
3672 b"\x00\x00\x00\x00\x00\x00\x42\x82\xde\xad\xbe\xef"
3673 );
3674
3675 assert_magic_match_text!(
3677 r#"
36780 regex/1024 \^HelloWorld$ HelloWorld String"#,
3679 br#"
3680// this is a comment after an empty line
3681HelloWorld
3682 "#
3683 );
3684 }
3685
3686 #[test]
3687 fn test_string_with_mods() {
3688 assert_magic_match_text!(
3689 r#"0 string/w #!\ \ \ /usr/bin/env\ bash BASH
3690 "#,
3691 b"#! /usr/bin/env bash i
3692 echo hello world"
3693 );
3694
3695 assert_magic_match_text!(
3697 r#"0 string/C HelloWorld it works
3698 "#,
3699 b"helloworld"
3700 );
3701
3702 assert_magic_not_match_text!(
3703 r#"0 string/C HelloWorld it works
3704 "#,
3705 b"hELLOwORLD"
3706 );
3707
3708 assert_magic_match_text!(
3710 r#"0 string/c HelloWorld it works
3711 "#,
3712 b"HELLOWORLD"
3713 );
3714
3715 assert_magic_not_match_text!(
3716 r#"0 string/c HelloWorld it works
3717 "#,
3718 b"helloworld"
3719 );
3720
3721 assert_magic_match_text!(
3723 r#"0 string/f #!/usr/bin/env\ bash BASH
3724 "#,
3725 b"#!/usr/bin/env bash"
3726 );
3727
3728 assert_magic_not_match_text!(
3729 r#"0 string/f #!/usr/bin/python PYTHON"#,
3730 b"#!/usr/bin/pythonic"
3731 );
3732
3733 assert_magic_match_text!(
3735 r#"0 string/W #!/usr/bin/env\ python PYTHON"#,
3736 b"#!/usr/bin/env python"
3737 );
3738
3739 assert_magic_not_match_text!(
3740 r#"0 string/W #!/usr/bin/env\ \ python PYTHON"#,
3741 b"#!/usr/bin/env python"
3742 );
3743 }
3744
3745 #[test]
3746 fn test_search_with_mods() {
3747 assert_magic_match_text!(
3748 r#"0 search/1/fwt #!\ /usr/bin/luatex LuaTex script text executable"#,
3749 b"#! /usr/bin/luatex "
3750 );
3751
3752 assert_magic_match_text!(
3754 r#"
3755 0 search/s /usr/bin/env
3756 >&0 string /usr/bin/env it works
3757 "#,
3758 b"#!/usr/bin/env python"
3759 );
3760
3761 assert_magic_not_match_text!(
3762 r#"
3763 0 search /usr/bin/env
3764 >&0 string /usr/bin/env it works
3765 "#,
3766 b"#!/usr/bin/env python"
3767 );
3768 }
3769
3770 #[test]
3771 fn test_pstring() {
3772 assert_magic_match_bin!(r#"0 pstring Toast it works"#, b"\x05Toast");
3773
3774 assert_magic_match_bin!(r#"0 pstring Toast %s"#, b"\x05Toast", "Toast");
3775
3776 assert_magic_not_match_bin!(r#"0 pstring Toast Doesn't work"#, b"\x07Toaster");
3777
3778 assert_magic_match_bin!(r#"0 pstring/H Toast it works"#, b"\x00\x05Toast");
3780
3781 assert_magic_match_bin!(r#"0 pstring/HJ Toast it works"#, b"\x00\x07Toast");
3782
3783 assert_magic_match_bin!(r#"0 pstring/HJ Toast %s"#, b"\x00\x07Toast", "Toast");
3784
3785 assert_magic_match_bin!(r#"0 pstring/h Toast it works"#, b"\x05\x00Toast");
3786
3787 assert_magic_match_bin!(r#"0 pstring/hJ Toast it works"#, b"\x07\x00Toast");
3788
3789 assert_magic_match_bin!(r#"0 pstring/L Toast it works"#, b"\x00\x00\x00\x05Toast");
3790
3791 assert_magic_match_bin!(r#"0 pstring/LJ Toast it works"#, b"\x00\x00\x00\x09Toast");
3792
3793 assert_magic_match_bin!(r#"0 pstring/l Toast it works"#, b"\x05\x00\x00\x00Toast");
3794
3795 assert_magic_match_bin!(r#"0 pstring/lJ Toast it works"#, b"\x09\x00\x00\x00Toast");
3796 }
3797
3798 #[test]
3799 fn test_max_recursion() {
3800 let res = first_magic(
3801 r#"0 indirect x"#,
3802 b"#! /usr/bin/luatex ",
3803 StreamKind::Binary,
3804 );
3805 assert!(res.is_err());
3806 let _ = res.inspect_err(|e| {
3807 assert!(matches!(
3808 e.unwrap_localized(),
3809 Error::MaximumRecursion(MAX_RECURSION)
3810 ))
3811 });
3812 }
3813
3814 #[test]
3815 fn test_string_ops() {
3816 assert_magic_match_text!("0 string/b MZ MZ File", b"MZ\0");
3817 assert_magic_match_text!("0 string !MZ Not MZ File", b"AZ\0");
3818 assert_magic_match_text!("0 string >\0 Any String", b"A\0");
3819 assert_magic_match_text!("0 string >Test Any String", b"Test 1\0");
3820 assert_magic_match_text!("0 string <Test Any String", b"\0");
3821 assert_magic_not_match_text!("0 string >Test Any String", b"\0");
3822 }
3823
3824 #[test]
3825 fn test_lestring16() {
3826 assert_magic_match_bin!(
3827 "0 lestring16 abcd Little-endian UTF-16 string",
3828 b"\x61\x00\x62\x00\x63\x00\x64\x00"
3829 );
3830 assert_magic_match_bin!(
3831 "0 lestring16 x %s",
3832 b"\x61\x00\x62\x00\x63\x00\x64\x00\x00",
3833 "abcd"
3834 );
3835 assert_magic_not_match_bin!(
3836 "0 lestring16 abcd Little-endian UTF-16 string",
3837 b"\x00\x61\x00\x62\x00\x63\x00\x64"
3838 );
3839 assert_magic_match_bin!(
3840 "4 lestring16 abcd Little-endian UTF-16 string",
3841 b"\x00\x00\x00\x00\x61\x00\x62\x00\x63\x00\x64\x00"
3842 );
3843 }
3844
3845 #[test]
3846 fn test_bestring16() {
3847 assert_magic_match_bin!(
3848 "0 bestring16 abcd Big-endian UTF-16 string",
3849 b"\x00\x61\x00\x62\x00\x63\x00\x64"
3850 );
3851 assert_magic_match_bin!(
3852 "0 bestring16 x %s",
3853 b"\x00\x61\x00\x62\x00\x63\x00\x64",
3854 "abcd"
3855 );
3856 assert_magic_not_match_bin!(
3857 "0 bestring16 abcd Big-endian UTF-16 string",
3858 b"\x61\x00\x62\x00\x63\x00\x64\x00"
3859 );
3860 assert_magic_match_bin!(
3861 "4 bestring16 abcd Big-endian UTF-16 string",
3862 b"\x00\x00\x00\x00\x00\x61\x00\x62\x00\x63\x00\x64"
3863 );
3864 }
3865
3866 #[test]
3867 fn test_offset_from_end() {
3868 assert_magic_match_bin!("-1 ubyte 0x42 last byte ok", b"\x00\x00\x42");
3869 assert_magic_match_bin!("-2 ubyte 0x41 last byte ok", b"\x00\x41\x00");
3870 }
3871
3872 #[test]
3873 fn test_relative_offset() {
3874 assert_magic_match_bin!(
3875 "
3876 0 ubyte 0x42
3877 >&0 ubyte 0x00
3878 >>&0 ubyte 0x41 third byte ok
3879 ",
3880 b"\x42\x00\x41\x00"
3881 );
3882 }
3883
3884 #[test]
3885 fn test_indirect_offset() {
3886 assert_magic_match_bin!("(0.l) ubyte 0x42 it works", b"\x04\x00\x00\x00\x42");
3887 assert_magic_match_bin!("(0.l+3) ubyte 0x42 it works", b"\x01\x00\x00\x00\x42");
3889 assert_magic_match_bin!(
3891 "(0.l+(4)) ubyte 0x42 it works",
3892 b"\x04\x00\x00\x00\x04\x00\x00\x00\x42"
3893 );
3894 }
3895
3896 #[test]
3897 fn test_use_with_message() {
3898 assert_magic_match_bin!(
3899 r#"
39000 string MZ
3901>0 use mz first match
3902
39030 name mz then second match
3904>0 string MZ
3905"#,
3906 b"MZ\0",
3907 "first match then second match"
3908 );
3909 }
3910
3911 #[test]
3912 fn test_scalar_transform() {
3913 assert_magic_match_bin!("0 ubyte+1 0x1 add works", b"\x00");
3914 assert_magic_match_bin!("0 ubyte-1 0xfe sub works", b"\xff");
3915 assert_magic_match_bin!("0 ubyte%2 0 mod works", b"\x0a");
3916 assert_magic_match_bin!("0 ubyte&0x0f 0x0f bitand works", b"\xff");
3917 assert_magic_match_bin!("0 ubyte|0x0f 0xff bitor works", b"\xf0");
3918 assert_magic_match_bin!("0 ubyte^0x0f 0xf0 bitxor works", b"\xff");
3919
3920 FileMagicParser::parse_str("0 ubyte%0 mod by zero", None)
3921 .expect_err("expect div by zero error");
3922 FileMagicParser::parse_str("0 ubyte/0 div by zero", None)
3923 .expect_err("expect div by zero error");
3924 }
3925
3926 #[test]
3927 fn test_belong() {
3928 assert_magic_match_bin!("0 belong 0x12345678 Big-endian long", b"\x12\x34\x56\x78");
3930 assert_magic_not_match_bin!("0 belong 0x12345678 Big-endian long", b"\x78\x56\x34\x12");
3932 assert_magic_match_bin!(
3934 "4 belong 0x12345678 Big-endian long",
3935 b"\x00\x00\x00\x00\x12\x34\x56\x78"
3936 );
3937 assert_magic_match_bin!("0 belong <0x12345678 Big-endian long", b"\x12\x34\x56\x77");
3939 assert_magic_not_match_bin!("0 belong <0x12345678 Big-endian long", b"\x12\x34\x56\x78");
3940
3941 assert_magic_match_bin!("0 belong >0x12345678 Big-endian long", b"\x12\x34\x56\x79");
3943 assert_magic_not_match_bin!("0 belong >0x12345678 Big-endian long", b"\x12\x34\x56\x78");
3944
3945 assert_magic_match_bin!("0 belong &0x5678 Big-endian long", b"\x00\x00\x56\x78");
3947 assert_magic_not_match_bin!("0 belong &0x0000FFFF Big-endian long", b"\x12\x34\x56\x78");
3948
3949 assert_magic_match_bin!("0 belong ^0xFFFF0000 Big-endian long", b"\x00\x00\x56\x78");
3951 assert_magic_not_match_bin!("0 belong ^0xFFFF0000 Big-endian long", b"\x00\x01\x56\x78");
3952
3953 assert_magic_match_bin!("0 belong ~0x12345678 Big-endian long", b"\xed\xcb\xa9\x87");
3955 assert_magic_not_match_bin!("0 belong ~0x12345678 Big-endian long", b"\x12\x34\x56\x78");
3956
3957 assert_magic_match_bin!("0 belong x Big-endian long", b"\x12\x34\x56\x78");
3959 assert_magic_match_bin!("0 belong x Big-endian long", b"\x78\x56\x34\x12");
3960 }
3961
3962 #[test]
3963 fn test_parse_search() {
3964 parse_assert!("0 search test");
3965 parse_assert!("0 search/24/s test");
3966 parse_assert!("0 search/s/24 test");
3967 }
3968
3969 #[test]
3970 fn test_bedate() {
3971 assert_magic_match_bin!(
3972 "0 bedate 946684800 Unix date (Jan 1, 2000)",
3973 b"\x38\x6D\x43\x80"
3974 );
3975 assert_magic_not_match_bin!(
3976 "0 bedate 946684800 Unix date (Jan 1, 2000)",
3977 b"\x00\x00\x00\x00"
3978 );
3979 assert_magic_match_bin!(
3980 "4 bedate 946684800 %s",
3981 b"\x00\x00\x00\x00\x38\x6D\x43\x80",
3982 "2000-01-01 00:00:00"
3983 );
3984 }
3985 #[test]
3986 fn test_beldate() {
3987 assert_magic_match_bin!(
3988 "0 beldate 946684800 Local date (Jan 1, 2000)",
3989 b"\x38\x6D\x43\x80"
3990 );
3991 assert_magic_not_match_bin!(
3992 "0 beldate 946684800 Local date (Jan 1, 2000)",
3993 b"\x00\x00\x00\x00"
3994 );
3995
3996 assert_magic_match_bin!(
3997 "4 beldate 946684800 {}",
3998 b"\x00\x00\x00\x00\x38\x6D\x43\x80",
3999 unix_local_time_to_string(946684800)
4000 );
4001 }
4002
4003 #[test]
4004 fn test_beqdate() {
4005 assert_magic_match_bin!(
4006 "0 beqdate 946684800 Unix date (Jan 1, 2000)",
4007 b"\x00\x00\x00\x00\x38\x6D\x43\x80"
4008 );
4009
4010 assert_magic_not_match_bin!(
4011 "0 beqdate 946684800 Unix date (Jan 1, 2000)",
4012 b"\x00\x00\x00\x00\x00\x00\x00\x00"
4013 );
4014
4015 assert_magic_match_bin!(
4016 "0 beqdate 946684800 %s",
4017 b"\x00\x00\x00\x00\x38\x6D\x43\x80",
4018 "2000-01-01 00:00:00"
4019 );
4020 }
4021
4022 #[test]
4023 fn test_medate() {
4024 assert_magic_match_bin!(
4025 "0 medate 946684800 Unix date (Jan 1, 2000)",
4026 b"\x6D\x38\x80\x43"
4027 );
4028
4029 assert_magic_not_match_bin!(
4030 "0 medate 946684800 Unix date (Jan 1, 2000)",
4031 b"\x00\x00\x00\x00"
4032 );
4033
4034 assert_magic_match_bin!(
4035 "4 medate 946684800 %s",
4036 b"\x00\x00\x00\x00\x6D\x38\x80\x43",
4037 "2000-01-01 00:00:00"
4038 );
4039 }
4040
4041 #[test]
4042 fn test_meldate() {
4043 assert_magic_match_bin!(
4044 "0 meldate 946684800 Local date (Jan 1, 2000)",
4045 b"\x6D\x38\x80\x43"
4046 );
4047 assert_magic_not_match_bin!(
4048 "0 meldate 946684800 Local date (Jan 1, 2000)",
4049 b"\x00\x00\x00\x00"
4050 );
4051
4052 assert_magic_match_bin!(
4053 "4 meldate 946684800 %s",
4054 b"\x00\x00\x00\x00\x6D\x38\x80\x43",
4055 unix_local_time_to_string(946684800)
4056 );
4057 }
4058
4059 #[test]
4060 fn test_date() {
4061 assert_magic_match_bin!(
4062 "0 date 946684800 Local date (Jan 1, 2000)",
4063 b"\x80\x43\x6D\x38"
4064 );
4065 assert_magic_not_match_bin!(
4066 "0 date 946684800 Local date (Jan 1, 2000)",
4067 b"\x00\x00\x00\x00"
4068 );
4069 assert_magic_match_bin!(
4070 "4 date 946684800 {}",
4071 b"\x00\x00\x00\x00\x80\x43\x6D\x38",
4072 "2000-01-01 00:00:00"
4073 );
4074 }
4075
4076 #[test]
4077 fn test_leldate() {
4078 assert_magic_match_bin!(
4079 "0 leldate 946684800 Local date (Jan 1, 2000)",
4080 b"\x80\x43\x6D\x38"
4081 );
4082 assert_magic_not_match_bin!(
4083 "0 leldate 946684800 Local date (Jan 1, 2000)",
4084 b"\x00\x00\x00\x00"
4085 );
4086 assert_magic_match_bin!(
4087 "4 leldate 946684800 {}",
4088 b"\x00\x00\x00\x00\x80\x43\x6D\x38",
4089 unix_local_time_to_string(946684800)
4090 );
4091 }
4092
4093 #[test]
4094 fn test_leqdate() {
4095 assert_magic_match_bin!(
4096 "0 leqdate 1577836800 Unix date (Jan 1, 2020)",
4097 b"\x00\xe1\x0b\x5E\x00\x00\x00\x00"
4098 );
4099
4100 assert_magic_not_match_bin!(
4101 "0 leqdate 1577836800 Unix date (Jan 1, 2020)",
4102 b"\x00\x00\x00\x00\x00\x00\x00\x00"
4103 );
4104 assert_magic_match_bin!(
4105 "8 leqdate 1577836800 %s",
4106 b"\x00\x00\x00\x00\x00\x00\x00\x00\x00\xE1\x0B\x5E\x00\x00\x00\x00",
4107 "2020-01-01 00:00:00"
4108 );
4109 }
4110
4111 #[test]
4112 fn test_leqldate() {
4113 assert_magic_match_bin!(
4114 "0 leqldate 1577836800 Unix date (Jan 1, 2020)",
4115 b"\x00\xe1\x0b\x5E\x00\x00\x00\x00"
4116 );
4117
4118 assert_magic_not_match_bin!(
4119 "0 leqldate 1577836800 Unix date (Jan 1, 2020)",
4120 b"\x00\x00\x00\x00\x00\x00\x00\x00"
4121 );
4122 assert_magic_match_bin!(
4123 "8 leqldate 1577836800 %s",
4124 b"\x00\x00\x00\x00\x00\x00\x00\x00\x00\xE1\x0B\x5E\x00\x00\x00\x00",
4125 unix_local_time_to_string(1577836800)
4126 );
4127 }
4128
4129 #[test]
4130 fn test_melong() {
4131 assert_magic_match_bin!(
4133 "0 melong =0x12345678 Middle-endian long",
4134 b"\x34\x12\x78\x56"
4135 );
4136 assert_magic_not_match_bin!(
4137 "0 melong =0x12345678 Middle-endian long",
4138 b"\x00\x00\x00\x00"
4139 );
4140
4141 assert_magic_match_bin!(
4143 "0 melong <0x12345678 Middle-endian long",
4144 b"\x34\x12\x78\x55"
4145 ); assert_magic_not_match_bin!(
4147 "0 melong <0x12345678 Middle-endian long",
4148 b"\x34\x12\x78\x56"
4149 ); assert_magic_match_bin!(
4153 "0 melong >0x12345678 Middle-endian long",
4154 b"\x34\x12\x78\x57"
4155 ); assert_magic_not_match_bin!(
4157 "0 melong >0x12345678 Middle-endian long",
4158 b"\x34\x12\x78\x56"
4159 ); assert_magic_match_bin!("0 melong &0x5678 Middle-endian long", b"\xab\xcd\x78\x56"); assert_magic_not_match_bin!(
4164 "0 melong &0x0000FFFF Middle-endian long",
4165 b"\x34\x12\x78\x56"
4166 ); assert_magic_match_bin!(
4170 "0 melong ^0xFFFF0000 Middle-endian long",
4171 b"\x00\x00\x78\x56"
4172 ); assert_magic_not_match_bin!(
4174 "0 melong ^0xFFFF0000 Middle-endian long",
4175 b"\x00\x01\x78\x56"
4176 ); assert_magic_match_bin!(
4180 "0 melong ~0x12345678 Middle-endian long",
4181 b"\xCB\xED\x87\xA9"
4182 );
4183 assert_magic_not_match_bin!(
4184 "0 melong ~0x12345678 Middle-endian long",
4185 b"\x34\x12\x78\x56"
4186 ); assert_magic_match_bin!("0 melong x Middle-endian long", b"\x34\x12\x78\x56");
4190 assert_magic_match_bin!("0 melong x Middle-endian long", b"\x00\x00\x00\x00");
4191 }
4192
4193 #[test]
4194 fn test_uquad() {
4195 assert_magic_match_bin!(
4197 "0 uquad =0x123456789ABCDEF0 Unsigned quad",
4198 b"\xF0\xDE\xBC\x9A\x78\x56\x34\x12"
4199 );
4200 assert_magic_not_match_bin!(
4201 "0 uquad =0x123456789ABCDEF0 Unsigned quad",
4202 b"\x00\x00\x00\x00\x00\x00\x00\x00"
4203 );
4204
4205 assert_magic_match_bin!(
4207 "0 uquad <0x123456789ABCDEF0 Unsigned quad",
4208 b"\xF0\xDE\xBC\x9A\x78\x56\x34\x11"
4209 );
4210 assert_magic_not_match_bin!(
4211 "0 uquad <0x123456789ABCDEF0 Unsigned quad",
4212 b"\xF0\xDE\xBC\x9A\x78\x56\x34\x12"
4213 );
4214
4215 assert_magic_match_bin!(
4217 "0 uquad >0x123456789ABCDEF0 Unsigned quad",
4218 b"\xF0\xDE\xBC\x9A\x78\x56\x34\x13"
4219 );
4220 assert_magic_not_match_bin!(
4221 "0 uquad >0x123456789ABCDEF0 Unsigned quad",
4222 b"\xF0\xDE\xBC\x9A\x78\x56\x34\x12"
4223 );
4224
4225 assert_magic_match_bin!(
4227 "0 uquad &0xF0 Unsigned quad",
4228 b"\xF0\xDE\xBC\x9A\x78\x56\x34\x12"
4229 );
4230 assert_magic_not_match_bin!(
4231 "0 uquad &0xFF Unsigned quad",
4232 b"\xF0\xDE\xBC\x9A\x78\x56\x34\x12"
4233 );
4234
4235 assert_magic_match_bin!(
4237 "0 uquad ^0xFFFFFFFFFFFFFFFF Unsigned quad",
4238 b"\x00\x00\x00\x00\x00\x00\x00\x00"
4239 ); assert_magic_not_match_bin!(
4241 "0 uquad ^0xFFFFFFFFFFFFFFFF Unsigned quad",
4242 b"\xF0\xDE\xBC\x9A\x78\x56\x34\x12"
4243 ); assert_magic_match_bin!(
4247 "0 uquad ~0x123456789ABCDEF0 Unsigned quad",
4248 b"\x0F\x21\x43\x65\x87\xA9\xCB\xED"
4249 );
4250 assert_magic_not_match_bin!(
4251 "0 uquad ~0x123456789ABCDEF0 Unsigned quad",
4252 b"\xF0\xDE\xBC\x9A\x78\x56\x34\x12"
4253 ); assert_magic_match_bin!(
4257 "0 uquad x {:#x}",
4258 b"\xF0\xDE\xBC\x9A\x78\x56\x34\x12",
4259 "0x123456789abcdef0"
4260 );
4261 assert_magic_match_bin!(
4262 "0 uquad x Unsigned quad",
4263 b"\x00\x00\x00\x00\x00\x00\x00\x00"
4264 );
4265 }
4266
4267 #[test]
4268 fn test_guid() {
4269 assert_magic_match_bin!(
4270 "0 guid EC959539-6786-2D4E-8FDB-98814CE76C1E It works",
4271 b"\xEC\x95\x95\x39\x67\x86\x2D\x4E\x8F\xDB\x98\x81\x4C\xE7\x6C\x1E"
4272 );
4273
4274 assert_magic_not_match_bin!(
4275 "0 guid 399595EC-8667-4E2D-8FDB-98814CE76C1E It works",
4276 b"\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F"
4277 );
4278
4279 assert_magic_match_bin!(
4280 "0 guid x %s",
4281 b"\xEC\x95\x95\x39\x67\x86\x2D\x4E\x8F\xDB\x98\x81\x4C\xE7\x6C\x1E",
4282 "EC959539-6786-2D4E-8FDB-98814CE76C1E"
4283 );
4284 }
4285
4286 #[test]
4287 fn test_ubeqdate() {
4288 assert_magic_match_bin!(
4289 "0 ubeqdate 1633046400 It works",
4290 b"\x00\x00\x00\x00\x61\x56\x4f\x80"
4291 );
4292
4293 assert_magic_match_bin!(
4294 "0 ubeqdate x %s",
4295 b"\x00\x00\x00\x00\x61\x56\x4f\x80",
4296 "2021-10-01 00:00:00"
4297 );
4298
4299 assert_magic_not_match_bin!(
4300 "0 ubeqdate 1633046400 It should not work",
4301 b"\x00\x00\x00\x00\x00\x00\x00\x00"
4302 );
4303 }
4304
4305 #[test]
4306 fn test_ldate() {
4307 assert_magic_match_bin!("0 ldate 1640551520 It works", b"\x60\xd4\xC8\x61");
4308
4309 assert_magic_not_match_bin!("0 ldate 1633046400 It should not work", b"\x00\x00\x00\x00");
4310
4311 assert_magic_match_bin!(
4312 "0 ldate x %s",
4313 b"\x60\xd4\xC8\x61",
4314 unix_local_time_to_string(1640551520)
4315 );
4316 }
4317
4318 #[test]
4319 fn test_scalar_with_transform() {
4320 assert_magic_match_bin!("0 ubyte/10 2 {}", b"\x14", "2");
4321 assert_magic_match_bin!("0 ubyte/10 x {}", b"\x14", "2");
4322 assert_magic_match_bin!("0 ubyte%10 x {}", b"\x14", "0");
4323 }
4324
4325 #[test]
4326 fn test_float_with_transform() {
4327 assert_magic_match_bin!("0 lefloat/10 2 {}", b"\x00\x00\xa0\x41", "2");
4328 assert_magic_match_bin!("0 lefloat/10 x {}", b"\x00\x00\xa0\x41", "2");
4329 assert_magic_match_bin!("0 lefloat%10 x {}", b"\x00\x00\xa0\x41", "0");
4330 }
4331
4332 #[test]
4333 fn test_read_octal() {
4334 assert_eq!(read_octal_u64(&mut lazy_cache!("0")), Some(0));
4336 assert_eq!(read_octal_u64(&mut lazy_cache!("00")), Some(0));
4337 assert_eq!(read_octal_u64(&mut lazy_cache!("01")), Some(1));
4338 assert_eq!(read_octal_u64(&mut lazy_cache!("07")), Some(7));
4339 assert_eq!(read_octal_u64(&mut lazy_cache!("010")), Some(8));
4340 assert_eq!(read_octal_u64(&mut lazy_cache!("0123")), Some(83));
4341 assert_eq!(read_octal_u64(&mut lazy_cache!("0755")), Some(493));
4342
4343 assert_eq!(read_octal_u64(&mut lazy_cache!("0ABC")), Some(0));
4345 assert_eq!(read_octal_u64(&mut lazy_cache!("01ABC")), Some(1));
4346 assert_eq!(read_octal_u64(&mut lazy_cache!("0755ABC")), Some(493));
4347 assert_eq!(read_octal_u64(&mut lazy_cache!("0123ABC")), Some(83));
4348
4349 assert_eq!(read_octal_u64(&mut lazy_cache!("08")), Some(0)); assert_eq!(read_octal_u64(&mut lazy_cache!("01238")), Some(83)); assert_eq!(read_octal_u64(&mut lazy_cache!("123")), None);
4355 assert_eq!(read_octal_u64(&mut lazy_cache!("755")), None);
4356
4357 assert_eq!(read_octal_u64(&mut lazy_cache!("")), None);
4359
4360 assert_eq!(read_octal_u64(&mut lazy_cache!("ABC")), None);
4362 assert_eq!(read_octal_u64(&mut lazy_cache!("8ABC")), None); assert_eq!(
4366 read_octal_u64(&mut lazy_cache!("01777777777")),
4367 Some(268435455)
4368 );
4369 }
4370
4371 #[test]
4372 fn test_offset_bug_1() {
4373 assert_magic_match_bin!(
4376 r"
43771 string TEST Bread is
4378# offset computation is relative to
4379# rule start
4380>(5.b) use toasted
4381
43820 name toasted
4383>0 string twice Toasted
4384>>0 use toasted_twice
4385
43860 name toasted_twice
4387>(6.b) string x %s
4388 ",
4389 b"\x00TEST\x06twice\x00\x06",
4390 "Bread is Toasted twice"
4391 );
4392 }
4393
4394 #[test]
4400 fn test_offset_bug_2() {
4401 assert_magic_match_bin!(
4404 r"
4405-12 string TEST Bread is
4406>(4.b) use toasted
4407
44080 name toasted
4409>0 string twice Toasted
4410>>0 use toasted_twice
4411
44120 name toasted_twice
4413>(6.b) string x %
4414 ",
4415 b"\x00TEST\x06twice\x00\x06",
4416 "Bread is Toasted twice"
4417 )
4418 }
4419
4420 #[test]
4421 fn test_offset_bug_3() {
4422 assert_magic_match_bin!(
4425 r"
44261 string TEST Bread is
4427>(5.b) indirect/r x
4428
44290 string twice Toasted
4430>0 use toasted_twice
4431
44320 name toasted_twice
4433>0 string x %s
4434 ",
4435 b"\x00TEST\x06twice\x00\x08",
4436 "Bread is Toasted twice"
4437 )
4438 }
4439
4440 #[test]
4441 fn test_offset_bug_4() {
4442 assert_magic_match_bin!(
4445 r"
44461 string Bread %s
4447>(6.b) indirect/r x
4448
4449# this one uses a based offset
4450# computed at indirection
44511 string is\ Toasted %s
4452>(11.b) use toasted_twice
4453
4454# this one is using a new base
4455# offset being previous base
4456# offset + offset of use
44570 name toasted_twice
4458>0 string x %s
4459 ",
4460 b"\x00Bread\x06is Toasted\x0ctwice\x00",
4461 "Bread is Toasted twice"
4462 )
4463 }
4464
4465 #[test]
4466 fn test_offset_bug_5() {
4467 assert_magic_match_bin!(
4468 r"
44691 string TEST Bread is
4470>(5.b) indirect/r x
4471
44720 string twice Toasted
4473>0 use toasted_twice
4474
44750 name toasted_twice
4476>0 string twice
4477>>&1 byte 0x08 twice
4478 ",
4479 b"\x00TEST\x06twice\x00\x08",
4480 "Bread is Toasted twice"
4481 )
4482 }
4483
4484 #[test]
4485 fn test_message_parts() {
4486 let m = first_magic(
4487 r#"0 string/W #!/usr/bin/env\ python PYTHON"#,
4488 b"#!/usr/bin/env python",
4489 StreamKind::Text(TextEncoding::Ascii),
4490 )
4491 .unwrap();
4492
4493 assert!(m.message_parts().any(|p| p.eq_ignore_ascii_case("python")))
4494 }
4495}