pub mod ast;
#[allow(dead_code)]
pub(crate) mod codegen;
mod format;
pub mod grammar;
mod hierarchy;
mod loader;
pub(crate) mod preprocessing;
pub mod types;
pub use ast::{Endianness, MagicRule, OffsetSpec, Operator, StrengthModifier, TypeKind, Value};
pub use grammar::{parse_number, parse_offset};
pub use format::{MagicFileFormat, detect_format};
pub use loader::{load_magic_directory, load_magic_file};
pub(crate) use hierarchy::build_rule_hierarchy;
pub(crate) use preprocessing::preprocess_lines;
use crate::error::ParseError;
pub fn parse_text_magic_file(input: &str) -> Result<Vec<MagicRule>, ParseError> {
let lines = preprocess_lines(input)?;
build_rule_hierarchy(lines)
}
#[cfg(test)]
mod unit_tests {
use super::*;
#[test]
fn test_parse_text_magic_file_single_rule() {
let input = "0 string 0 ZIP archive";
let rules = parse_text_magic_file(input).unwrap();
assert_eq!(rules.len(), 1);
assert_eq!(rules[0].message, "ZIP archive");
}
#[test]
fn test_parse_text_magic_file_hierarchical_rules() {
let input = r"
0 string 0 ELF
>4 byte 1 32-bit
>4 byte 2 64-bit
";
let rules = parse_text_magic_file(input).unwrap();
assert_eq!(rules.len(), 1);
assert_eq!(rules[0].children.len(), 2);
}
#[test]
fn test_parse_text_magic_file_with_comments() {
let input = r"
# ELF file format
0 string 0 ELF
>4 byte 1 32-bit
";
let rules = parse_text_magic_file(input).unwrap();
assert_eq!(rules.len(), 1);
assert_eq!(rules[0].children.len(), 1);
}
#[test]
fn test_parse_text_magic_file_multiple_roots() {
let input = r"
0 byte 1 ELF
>4 byte 1 32-bit
0 byte 2 PDF
>5 byte 1 v1
";
let rules = parse_text_magic_file(input).unwrap();
assert_eq!(rules.len(), 2);
}
#[test]
fn test_parse_text_magic_file_empty_input() {
let input = "";
let rules = parse_text_magic_file(input).unwrap();
assert_eq!(rules.len(), 0);
}
#[test]
fn test_parse_text_magic_file_only_comments() {
let input = r"
# Comment 1
# Comment 2
# Comment 3
";
let rules = parse_text_magic_file(input).unwrap();
assert_eq!(rules.len(), 0);
}
#[test]
fn test_parse_text_magic_file_empty_lines_only() {
let input = r"
0 string 0 Test file
";
let rules = parse_text_magic_file(input).unwrap();
assert_eq!(rules.len(), 1);
}
#[test]
fn test_parse_text_magic_file_with_message_spaces() {
let input = "0 string 0 Long message continued here";
let rules = parse_text_magic_file(input).unwrap();
assert!(rules[0].message.contains("continued"));
}
#[test]
fn test_parse_text_magic_file_mixed_indentation() {
let input = r"
0 byte 1 Root1
>4 byte 1 Child1
>4 byte 2 Child2
>>6 byte 3 Grandchild
0 byte 2 Root2
>4 byte 4 Child3
";
let rules = parse_text_magic_file(input).unwrap();
assert_eq!(rules.len(), 2);
assert_eq!(rules[0].children.len(), 2);
assert_eq!(rules[0].children[1].children.len(), 1);
assert_eq!(rules[1].children.len(), 1);
}
#[test]
fn test_parse_text_magic_file_complex_real_world() {
let input = r"
# Magic file for common formats
# ELF binaries
0 byte 0x7f ELF executable
>4 byte 1 Intel 80386
>4 byte 2 x86-64
>>5 byte 1 LSB
>>5 byte 2 MSB
# PDF files
0 byte 0x25 PDF document
>5 byte 0x31 version 1.0
>5 byte 0x34 version 1.4
>5 byte 0x32 version 2.0
";
let rules = parse_text_magic_file(input).unwrap();
assert_eq!(rules.len(), 2);
assert_eq!(rules[0].message, "ELF executable");
assert!(rules[0].children.len() > 1);
}
#[test]
fn test_parse_text_magic_file_with_strength_directive() {
let input = r"
!:strength +10
0 string \\x7fELF ELF executable
";
let rules = parse_text_magic_file(input).unwrap();
assert_eq!(rules.len(), 1);
assert_eq!(rules[0].strength_modifier, Some(StrengthModifier::Add(10)));
}
#[test]
fn test_parse_text_magic_file_strength_applies_to_next_rule() {
let input = r"
!:strength *2
0 string \\x7fELF ELF executable
0 string \\x50\\x4b ZIP archive
";
let rules = parse_text_magic_file(input).unwrap();
assert_eq!(rules.len(), 2);
assert_eq!(
rules[0].strength_modifier,
Some(StrengthModifier::Multiply(2))
);
assert_eq!(rules[1].strength_modifier, None);
}
#[test]
fn test_parse_text_magic_file_strength_with_child_rules() {
let input = r"
!:strength =50
0 string \\x7fELF ELF executable
>4 byte 1 32-bit
>4 byte 2 64-bit
";
let rules = parse_text_magic_file(input).unwrap();
assert_eq!(rules.len(), 1);
assert_eq!(rules[0].strength_modifier, Some(StrengthModifier::Set(50)));
assert_eq!(rules[0].children[0].strength_modifier, None);
assert_eq!(rules[0].children[1].strength_modifier, None);
}
#[test]
fn test_parse_text_magic_file_multiple_strength_directives() {
let input = r"
!:strength +10
0 string \\x7fELF ELF executable
!:strength -5
0 string \\x50\\x4b ZIP archive
";
let rules = parse_text_magic_file(input).unwrap();
assert_eq!(rules.len(), 2);
assert_eq!(rules[0].strength_modifier, Some(StrengthModifier::Add(10)));
assert_eq!(
rules[1].strength_modifier,
Some(StrengthModifier::Subtract(5))
);
}
#[test]
fn test_parse_text_magic_file_strength_all_operators() {
let inputs = [
("!:strength +20\n0 byte 1 Test", StrengthModifier::Add(20)),
(
"!:strength -15\n0 byte 1 Test",
StrengthModifier::Subtract(15),
),
(
"!:strength *3\n0 byte 1 Test",
StrengthModifier::Multiply(3),
),
("!:strength /2\n0 byte 1 Test", StrengthModifier::Divide(2)),
("!:strength =100\n0 byte 1 Test", StrengthModifier::Set(100)),
("!:strength 50\n0 byte 1 Test", StrengthModifier::Set(50)),
];
for (input, expected_modifier) in inputs {
let rules = parse_text_magic_file(input).unwrap();
assert_eq!(
rules[0].strength_modifier,
Some(expected_modifier),
"Failed for input: {input}"
);
}
}
#[test]
fn test_continuation_with_indentation() {
let input = r">4 byte 1 Message \
continued";
let rules = parse_text_magic_file(input).unwrap();
assert_eq!(rules.len(), 1);
}
#[test]
fn test_multiple_hex_offsets() {
let input = r"
0x100 string 0 At 256
0x200 string 0 At 512
";
let rules = parse_text_magic_file(input).unwrap();
assert_eq!(rules.len(), 2);
}
#[test]
fn test_overflow_decimal_too_many_digits() {
use crate::parser::grammar::parse_number;
let result = parse_number("12345678901234567890");
assert!(result.is_err(), "Should reject 20+ decimal digits");
}
#[test]
fn test_overflow_hex_too_many_digits() {
use crate::parser::grammar::parse_number;
let result = parse_number("0x10000000000000000");
assert!(result.is_err(), "Should reject 17+ hex digits");
}
#[test]
fn test_overflow_i64_max() {
use crate::parser::grammar::parse_number;
let result = parse_number("9223372036854775807");
assert!(result.is_ok(), "Should accept i64::MAX");
}
#[test]
fn test_overflow_i64_max_plus_one() {
use crate::parser::grammar::parse_number;
let result = parse_number("9223372036854775808");
assert!(result.is_err(), "Should reject i64::MAX + 1");
}
#[test]
fn test_error_reports_correct_line_for_continuation() {
let input = "0 string 0 valid\n0 invalid \\\nsyntax here\n0 string 0 valid2";
let result = parse_text_magic_file(input);
match result {
Err(ref e) => {
let error_str = format!("{e:?}");
assert!(
error_str.contains("line 2") || error_str.contains("line: 2"),
"Error should reference line 2, got: {error_str}"
);
}
Ok(_) => panic!("Expected InvalidSyntax error"),
}
}
}
#[cfg(test)]
mod output_test {
use crate::parser::{build_rule_hierarchy, parse_text_magic_file, preprocess_lines};
#[test]
fn demo_show_all_parser_outputs() {
let input = r"
# ELF file
0 string 0 ELF
>4 byte 1 32-bit
>4 byte 2 64-bit
0 string 0 ZIP
>0 byte 3 zipped
";
println!("\n================ RAW INPUT ================\n");
println!("{input}");
println!("\n================ PREPROCESS LINES ================\n");
let lines = preprocess_lines(input).expect("preprocess_lines failed");
for (idx, line) in lines.iter().enumerate() {
println!(
"[{}] line_no={} is_comment={} content='{}'",
idx, line.line_number, line.is_comment, line.content
);
}
println!("\n================ PARSED MAGIC RULES ================\n");
let rules = parse_text_magic_file(input).expect("parse_text_magic_file failed");
for (i, rule) in rules.iter().enumerate() {
println!("ROOT RULE [{i}]:");
print_rule(rule, 1);
}
println!("\n================ EXPLICIT HIERARCHY BUILD ================\n");
let rebuilt = build_rule_hierarchy(lines).expect("build_rule_hierarchy failed");
for (i, rule) in rebuilt.iter().enumerate() {
println!("ROOT [{i}]:");
print_rule(rule, 1);
}
}
fn print_rule(rule: &crate::parser::MagicRule, indent: usize) {
let pad = " ".repeat(indent);
println!(
"{}- level={} offset={:?} type={:?} op={:?} value={:?} message='{}'",
pad, rule.level, rule.offset, rule.typ, rule.op, rule.value, rule.message
);
for child in &rule.children {
print_rule(child, indent + 1);
}
}
}