1use log::warn;
10
11use crate::error::ParseError;
12use crate::parser::ParsedMagic;
13use crate::parser::name_table::NameTable;
14use std::path::{Path, PathBuf};
15
16use super::format::{MagicFileFormat, detect_format};
17
18pub const MAX_MAGIC_FILE_SIZE: u64 = 1024 * 1024 * 1024;
30
31fn read_magic_file_bounded(path: &Path) -> Result<String, ParseError> {
48 let metadata = std::fs::metadata(path).map_err(|e| {
49 ParseError::IoError(std::io::Error::new(
50 e.kind(),
51 format!("Failed to read metadata for '{}': {}", path.display(), e),
52 ))
53 })?;
54
55 if metadata.len() > MAX_MAGIC_FILE_SIZE {
56 return Err(ParseError::invalid_syntax(
57 0,
58 format!(
59 "Magic file '{}' is too large: {} bytes (maximum allowed: {} bytes)",
60 path.display(),
61 metadata.len(),
62 MAX_MAGIC_FILE_SIZE
63 ),
64 ));
65 }
66
67 let bytes = std::fs::read(path).map_err(ParseError::from)?;
68
69 match String::from_utf8(bytes) {
70 Ok(s) => Ok(s),
71 Err(e) => {
72 warn!(
73 "Magic file '{}' contains non-UTF-8 bytes; they were replaced with U+FFFD. \
74 Rule parsing proceeds, but replacements inside rule bodies may alter matching.",
75 path.display()
76 );
77 Ok(String::from_utf8_lossy(&e.into_bytes()).into_owned())
78 }
79 }
80}
81
82pub fn load_magic_directory(dir_path: &Path) -> Result<ParsedMagic, ParseError> {
150 use std::fs;
151
152 let entries = fs::read_dir(dir_path).map_err(|e| {
154 ParseError::invalid_syntax(
155 0,
156 format!("Failed to read directory '{}': {}", dir_path.display(), e),
157 )
158 })?;
159
160 let mut file_paths: Vec<std::path::PathBuf> = Vec::new();
162 for entry in entries {
163 let entry = entry.map_err(|e| {
164 ParseError::invalid_syntax(
165 0,
166 format!(
167 "Failed to read directory entry in '{}': {}",
168 dir_path.display(),
169 e
170 ),
171 )
172 })?;
173
174 let path = entry.path();
175 let file_type = entry.file_type().map_err(|e| {
176 ParseError::invalid_syntax(
177 0,
178 format!("Failed to read file type for '{}': {}", path.display(), e),
179 )
180 })?;
181
182 if file_type.is_file() && !file_type.is_symlink() {
184 file_paths.push(path);
185 }
186 }
187
188 file_paths.sort_by_key(|path| path.file_name().map(std::ffi::OsStr::to_os_string));
190
191 let mut all_rules = Vec::new();
193 let mut merged_table = NameTable::empty();
194 let mut parse_failures: Vec<(PathBuf, ParseError)> = Vec::new();
195 let mut any_success = false;
196 let file_count = file_paths.len();
197
198 for path in file_paths {
199 let contents = match read_magic_file_bounded(&path) {
201 Ok(contents) => contents,
202 Err(e) => {
203 return Err(ParseError::invalid_syntax(
205 0,
206 format!("Failed to read file '{}': {}", path.display(), e),
207 ));
208 }
209 };
210
211 match super::parse_text_magic_file(&contents) {
213 Ok(parsed) => {
214 any_success = true;
215 all_rules.extend(parsed.rules);
216 merged_table.merge(parsed.name_table);
217 }
218 Err(e) => {
219 parse_failures.push((path, e));
221 }
222 }
223 }
224
225 if !any_success && !parse_failures.is_empty() {
231 use std::fmt::Write;
232
233 let failure_details: Vec<String> = parse_failures
234 .iter()
235 .take(3) .map(|(path, e)| format!(" - {}: {}", path.display(), e))
237 .collect();
238
239 let mut message = format!("All {file_count} magic file(s) in directory failed to parse");
240 if !failure_details.is_empty() {
241 message.push_str(":\n");
242 message.push_str(&failure_details.join("\n"));
243 if parse_failures.len() > 3 {
244 let _ = write!(message, "\n ... and {} more", parse_failures.len() - 3);
245 }
246 }
247
248 return Err(ParseError::invalid_syntax(0, message));
249 }
250
251 for (path, e) in &parse_failures {
253 warn!("Failed to parse '{}': {}", path.display(), e);
254 }
255
256 Ok(ParsedMagic {
257 rules: all_rules,
258 name_table: merged_table,
259 })
260}
261
262pub fn load_magic_file(path: &Path) -> Result<ParsedMagic, ParseError> {
356 let format = detect_format(path)?;
358
359 match format {
361 MagicFileFormat::Text => {
362 let content = read_magic_file_bounded(path)?;
364 super::parse_text_magic_file(&content)
365 }
366 MagicFileFormat::Directory => {
367 load_magic_directory(path)
369 }
370 MagicFileFormat::Binary => {
371 Err(ParseError::unsupported_format(
373 0,
374 "binary .mgc file",
375 "Binary compiled magic files (.mgc) are not supported for parsing.\n\
376 Use the --use-builtin option to use the built-in magic rules instead,\n\
377 or provide a text-based magic file or directory.",
378 ))
379 }
380 }
381}
382
383#[cfg(test)]
384mod tests {
385 use super::*;
386
387 #[test]
392 fn test_load_directory_critical_error_io() {
393 use std::path::Path;
394
395 let non_existent = Path::new("/this/should/not/exist/anywhere/at/all");
396 let result = load_magic_directory(non_existent);
397
398 assert!(
399 result.is_err(),
400 "Should return error for non-existent directory"
401 );
402 let err = result.unwrap_err();
403 assert!(err.to_string().contains("Failed to read directory"));
404 }
405
406 #[test]
407 fn test_load_directory_non_critical_error_parse() {
408 use std::fs;
409 use tempfile::TempDir;
410
411 let temp_dir = TempDir::new().expect("Failed to create temp dir");
412
413 let valid_path = temp_dir.path().join("valid.magic");
415 fs::write(&valid_path, "0 string \\x01\\x02 valid\n").expect("Failed to write valid file");
416
417 let invalid_path = temp_dir.path().join("invalid.magic");
419 fs::write(&invalid_path, "this is invalid syntax\n").expect("Failed to write invalid file");
420
421 let parsed = load_magic_directory(temp_dir.path()).expect("Should load valid files");
423
424 assert_eq!(parsed.rules.len(), 1, "Should load only valid file");
425 assert_eq!(parsed.rules[0].message, "valid");
426 }
427
428 #[test]
429 fn test_load_directory_empty_files() {
430 use std::fs;
431 use tempfile::TempDir;
432
433 let temp_dir = TempDir::new().expect("Failed to create temp dir");
434
435 let empty_path = temp_dir.path().join("empty.magic");
437 fs::write(&empty_path, "").expect("Failed to write empty file");
438
439 let comments_path = temp_dir.path().join("comments.magic");
441 fs::write(&comments_path, "# Just comments\n# Nothing else\n")
442 .expect("Failed to write comments file");
443
444 let parsed = load_magic_directory(temp_dir.path()).expect("Should handle empty files");
446
447 assert_eq!(
448 parsed.rules.len(),
449 0,
450 "Empty files should contribute no rules"
451 );
452 }
453
454 #[test]
455 fn test_load_directory_binary_files() {
456 use std::fs;
457 use tempfile::TempDir;
458
459 let temp_dir = TempDir::new().expect("Failed to create temp dir");
460
461 let binary_path = temp_dir.path().join("binary.dat");
466 fs::write(&binary_path, [0xFF, 0xFE, 0xFF, 0xFE]).expect("Failed to write binary file");
467
468 let valid_path = temp_dir.path().join("valid.magic");
470 fs::write(&valid_path, "0 string \\x01\\x02 valid\n").expect("Failed to write valid file");
471
472 let parsed = load_magic_directory(temp_dir.path())
473 .expect("Directory with a binary file alongside a valid file should still load");
474
475 assert_eq!(
476 parsed.rules.len(),
477 1,
478 "Only the valid magic file should contribute rules"
479 );
480 assert_eq!(parsed.rules[0].message, "valid");
481 }
482
483 #[test]
484 fn test_load_directory_mixed_extensions() {
485 use std::fs;
486 use tempfile::TempDir;
487
488 let temp_dir = TempDir::new().expect("Failed to create temp dir");
489
490 fs::write(
492 temp_dir.path().join("file.magic"),
493 "0 string \\x01\\x02 magic\n",
494 )
495 .expect("Failed to write .magic file");
496 fs::write(
497 temp_dir.path().join("file.txt"),
498 "0 string \\x03\\x04 txt\n",
499 )
500 .expect("Failed to write .txt file");
501 fs::write(temp_dir.path().join("noext"), "0 string \\x05\\x06 noext\n")
502 .expect("Failed to write no-ext file");
503
504 let parsed = load_magic_directory(temp_dir.path())
505 .expect("Should load all files regardless of extension");
506
507 assert_eq!(
508 parsed.rules.len(),
509 3,
510 "Should process all files regardless of extension"
511 );
512
513 let messages: Vec<&str> = parsed.rules.iter().map(|r| r.message.as_str()).collect();
514 assert!(messages.contains(&"magic"));
515 assert!(messages.contains(&"txt"));
516 assert!(messages.contains(&"noext"));
517 }
518
519 #[test]
520 fn test_load_directory_alphabetical_ordering() {
521 use std::fs;
522 use tempfile::TempDir;
523
524 let temp_dir = TempDir::new().expect("Failed to create temp dir");
525
526 fs::write(
528 temp_dir.path().join("03-third"),
529 "0 string \\x07\\x08\\x09 third\n",
530 )
531 .expect("Failed to write third file");
532 fs::write(
533 temp_dir.path().join("01-first"),
534 "0 string \\x01\\x02\\x03 first\n",
535 )
536 .expect("Failed to write first file");
537 fs::write(
538 temp_dir.path().join("02-second"),
539 "0 string \\x04\\x05\\x06 second\n",
540 )
541 .expect("Failed to write second file");
542
543 let parsed = load_magic_directory(temp_dir.path()).expect("Should load directory in order");
544
545 assert_eq!(parsed.rules.len(), 3);
546 assert_eq!(parsed.rules[0].message, "first");
548 assert_eq!(parsed.rules[1].message, "second");
549 assert_eq!(parsed.rules[2].message, "third");
550 }
551
552 #[test]
557 fn test_load_magic_file_text_format() {
558 use std::fs;
559 use tempfile::TempDir;
560
561 let temp_dir = TempDir::new().expect("Failed to create temp dir");
562 let magic_file = temp_dir.path().join("magic.txt");
563
564 fs::write(&magic_file, "0 string \\x7fELF ELF executable\n")
566 .expect("Failed to write magic file");
567
568 let parsed = load_magic_file(&magic_file).expect("Failed to load text magic file");
570
571 assert_eq!(parsed.rules.len(), 1);
572 assert_eq!(parsed.rules[0].message, "ELF executable");
573 }
574
575 #[test]
576 fn test_load_magic_file_directory_format() {
577 use std::fs;
578 use tempfile::TempDir;
579
580 let temp_dir = TempDir::new().expect("Failed to create temp dir");
581 let magic_dir = temp_dir.path().join("magic.d");
582 fs::create_dir(&magic_dir).expect("Failed to create magic directory");
583
584 fs::write(
586 magic_dir.join("00_elf"),
587 "0 string \\x7fELF ELF executable\n",
588 )
589 .expect("Failed to write elf file");
590 fs::write(
591 magic_dir.join("01_zip"),
592 "0 string \\x50\\x4b\\x03\\x04 ZIP archive\n",
593 )
594 .expect("Failed to write zip file");
595
596 let parsed = load_magic_file(&magic_dir).expect("Failed to load directory");
598
599 assert_eq!(parsed.rules.len(), 2);
600 assert_eq!(parsed.rules[0].message, "ELF executable");
601 assert_eq!(parsed.rules[1].message, "ZIP archive");
602 }
603
604 #[test]
605 fn test_load_magic_file_binary_format_error() {
606 use std::fs::File;
607 use std::io::Write;
608 use tempfile::TempDir;
609
610 let temp_dir = TempDir::new().expect("Failed to create temp dir");
611 let binary_file = temp_dir.path().join("magic.mgc");
612
613 let mut file = File::create(&binary_file).expect("Failed to create binary file");
615 let magic_number: [u8; 4] = [0x1C, 0x04, 0x1E, 0xF1]; file.write_all(&magic_number)
617 .expect("Failed to write magic number");
618
619 let result = load_magic_file(&binary_file);
621
622 assert!(result.is_err(), "Should fail to load binary .mgc file");
623
624 let error = result.unwrap_err();
625 let error_msg = error.to_string();
626
627 assert!(
629 error_msg.contains("Binary") || error_msg.contains("binary"),
630 "Error should mention binary format: {error_msg}",
631 );
632 assert!(
633 error_msg.contains("--use-builtin") || error_msg.contains("built-in"),
634 "Error should mention --use-builtin option: {error_msg}",
635 );
636 }
637
638 #[test]
639 fn test_load_magic_file_io_error() {
640 use std::path::Path;
641
642 let non_existent = Path::new("/this/path/should/not/exist/magic.txt");
644 let result = load_magic_file(non_existent);
645
646 assert!(result.is_err(), "Should fail for non-existent file");
647 }
648
649 #[test]
650 fn test_load_magic_file_parse_error_propagation() {
651 use std::fs;
652 use tempfile::TempDir;
653
654 let temp_dir = TempDir::new().expect("Failed to create temp dir");
655 let invalid_file = temp_dir.path().join("invalid.magic");
656
657 fs::write(&invalid_file, "string test invalid\n").expect("Failed to write invalid file");
659
660 let result = load_magic_file(&invalid_file);
662
663 assert!(result.is_err(), "Should fail for file with parse errors");
664
665 let error = result.unwrap_err();
667 let error_msg = format!("{error:?}");
668 assert!(
669 error_msg.contains("InvalidSyntax") || error_msg.contains("syntax"),
670 "Error should be parse error: {error_msg}",
671 );
672 }
673
674 #[test]
675 fn test_max_magic_file_size_matches_file_buffer_limit() {
676 assert_eq!(
681 MAX_MAGIC_FILE_SIZE,
682 crate::io::FileBuffer::MAX_FILE_SIZE,
683 "MAX_MAGIC_FILE_SIZE must match FileBuffer::MAX_FILE_SIZE"
684 );
685 }
686
687 #[test]
688 fn test_load_magic_file_rejects_oversized_file() {
689 use std::fs::File;
690 use tempfile::TempDir;
691
692 let temp_dir = TempDir::new().expect("Failed to create temp dir");
693 let oversized = temp_dir.path().join("huge.magic");
694
695 let file = File::create(&oversized).expect("Failed to create oversized file");
698 file.set_len(MAX_MAGIC_FILE_SIZE + 1)
699 .expect("Failed to set sparse file length");
700 drop(file);
701
702 let result = load_magic_file(&oversized);
703
704 assert!(
705 result.is_err(),
706 "Loading a file larger than MAX_MAGIC_FILE_SIZE must fail"
707 );
708
709 let err_msg = result.unwrap_err().to_string();
710 assert!(
711 err_msg.contains("too large"),
712 "Error should indicate size limit violation, got: {err_msg}"
713 );
714 assert!(
715 err_msg.contains(&MAX_MAGIC_FILE_SIZE.to_string()),
716 "Error should mention the maximum allowed size, got: {err_msg}"
717 );
718 }
719
720 #[test]
721 fn test_load_magic_file_tolerates_non_utf8_in_comment() {
722 use std::fs;
729 use tempfile::TempDir;
730
731 let temp_dir = TempDir::new().expect("Failed to create temp dir");
732 let magic_path = temp_dir.path().join("with-latin1-comment.magic");
733
734 let mut bytes: Vec<u8> = Vec::new();
735 bytes.extend_from_slice(b"# From: Thomas Wei");
736 bytes.push(0xdf); bytes.extend_from_slice(b"schuh <thomas@example.invalid>\n");
738 bytes.extend_from_slice(b"0 string \\x7fELF ELF executable\n");
739 fs::write(&magic_path, &bytes).expect("Failed to write magic file with non-UTF-8 byte");
740
741 let parsed = load_magic_file(&magic_path)
742 .expect("Magic file with non-UTF-8 bytes in a comment must still load");
743
744 assert_eq!(
745 parsed.rules.len(),
746 1,
747 "The ELF rule should be parsed; the comment is stripped"
748 );
749 assert_eq!(parsed.rules[0].message, "ELF executable");
750 }
751
752 #[test]
753 fn test_load_directory_merges_name_tables() {
754 use std::fs;
755 use tempfile::TempDir;
756
757 let temp_dir = TempDir::new().expect("Failed to create temp dir");
758
759 fs::write(
761 temp_dir.path().join("00_first"),
762 "0 name sub_a\n>0 byte 1 a-body\n",
763 )
764 .expect("Failed to write sub_a file");
765 fs::write(
766 temp_dir.path().join("01_second"),
767 "0 name sub_b\n>0 byte 2 b-body\n",
768 )
769 .expect("Failed to write sub_b file");
770
771 let parsed =
772 load_magic_directory(temp_dir.path()).expect("Should load both name subroutines");
773
774 assert_eq!(parsed.rules.len(), 0);
776 assert!(parsed.name_table.get("sub_a").is_some());
777 assert!(parsed.name_table.get("sub_b").is_some());
778 }
779}