Skip to main content

libmagic_rs/
build_helpers.rs

1// Copyright (c) 2025-2026 the libmagic-rs contributors
2// SPDX-License-Identifier: Apache-2.0
3
4/// Build-time helpers for compiling magic rules.
5///
6/// This module contains functionality used by the build script to parse magic files
7/// and generate Rust code for built-in rules. It is extracted into a library module
8/// to enable comprehensive testing of the build process, including error cases.
9use crate::error::ParseError;
10use crate::parser::ast::{
11    Endianness, MagicRule, OffsetSpec, Operator, StrengthModifier, TypeKind, Value,
12};
13use crate::parser::parse_text_magic_file;
14
15const INDENT_WIDTH: usize = 4;
16
17/// Parses a magic file and generates Rust code for the built-in rules.
18///
19/// This function wraps the parsing and code generation steps, providing a testable
20/// interface for the build script logic.
21///
22/// # Errors
23///
24/// Returns a `ParseError` if the magic file content is invalid or malformed.
25pub fn parse_and_generate_builtin_rules(magic_content: &str) -> Result<String, ParseError> {
26    let rules = parse_text_magic_file(magic_content)?;
27    Ok(generate_builtin_rules(&rules))
28}
29
30/// Formats a parse error for display in build script output.
31///
32/// This function converts a `ParseError` into a human-readable message suitable
33/// for display when the build script fails.
34#[must_use]
35pub fn format_parse_error(error: &ParseError) -> String {
36    match error {
37        ParseError::InvalidSyntax { line, message } => {
38            format!("Error parsing builtin_rules.magic at line {line}: {message}")
39        }
40        ParseError::UnsupportedFeature { line, feature } => {
41            format!("Error parsing builtin_rules.magic at line {line}: {feature}")
42        }
43        ParseError::InvalidOffset { line, offset } => {
44            format!("Error parsing builtin_rules.magic at line {line}: {offset}")
45        }
46        ParseError::InvalidType { line, type_spec } => {
47            format!("Error parsing builtin_rules.magic at line {line}: {type_spec}")
48        }
49        ParseError::InvalidOperator { line, operator } => {
50            format!("Error parsing builtin_rules.magic at line {line}: {operator}")
51        }
52        ParseError::InvalidValue { line, value } => {
53            format!("Error parsing builtin_rules.magic at line {line}: {value}")
54        }
55        ParseError::UnsupportedFormat {
56            line,
57            format_type,
58            message,
59        } => format!("Error parsing builtin_rules.magic at line {line}: {format_type} {message}"),
60        ParseError::IoError(err) => {
61            format!("Error parsing builtin_rules.magic: I/O error: {err}")
62        }
63    }
64}
65
66fn generate_builtin_rules(rules: &[MagicRule]) -> String {
67    let mut output = String::new();
68
69    // Allow unused_imports since StrengthModifier may not be used if no rules have strength modifiers
70    push_line(&mut output, "#[allow(unused_imports)]");
71    push_line(
72        &mut output,
73        "use crate::parser::ast::{MagicRule, OffsetSpec, TypeKind, Operator, Value, Endianness, StrengthModifier};",
74    );
75    push_line(&mut output, "use std::sync::LazyLock;");
76    push_line(&mut output, "");
77    push_line(
78        &mut output,
79        "/// Built-in magic rules compiled at build time.",
80    );
81    push_line(&mut output, "///");
82    push_line(
83        &mut output,
84        "/// This static contains magic rules parsed from `src/builtin_rules.magic` during",
85    );
86    push_line(
87        &mut output,
88        "/// the build process. The rules are lazily initialized on first access.",
89    );
90    push_line(&mut output, "///");
91    push_line(
92        &mut output,
93        "/// Use [`get_builtin_rules()`] to access these rules instead of using this static directly.",
94    );
95    push_line(
96        &mut output,
97        "pub static BUILTIN_RULES: LazyLock<Vec<MagicRule>> = LazyLock::new(|| {",
98    );
99    push_line(&mut output, "    vec![");
100
101    for rule in rules {
102        let serialized = serialize_magic_rule(rule, INDENT_WIDTH * 2);
103        output.push_str(&serialized);
104        output.push(',');
105        output.push('\n');
106    }
107
108    push_line(&mut output, "    ]");
109    push_line(&mut output, "});\n");
110    output
111}
112
113fn serialize_magic_rule(rule: &MagicRule, indent: usize) -> String {
114    let mut output = String::new();
115
116    push_indent(&mut output, indent);
117    output.push_str("MagicRule {\n");
118
119    push_field(
120        &mut output,
121        indent + INDENT_WIDTH,
122        "offset",
123        &serialize_offset_spec(&rule.offset),
124    );
125    push_field(
126        &mut output,
127        indent + INDENT_WIDTH,
128        "typ",
129        &serialize_type_kind(&rule.typ),
130    );
131    push_field(
132        &mut output,
133        indent + INDENT_WIDTH,
134        "op",
135        &serialize_operator(&rule.op),
136    );
137    push_field(
138        &mut output,
139        indent + INDENT_WIDTH,
140        "value",
141        &serialize_value(&rule.value),
142    );
143    push_field(
144        &mut output,
145        indent + INDENT_WIDTH,
146        "message",
147        &format!("String::from({})", format_string_literal(&rule.message)),
148    );
149
150    push_indent(&mut output, indent + INDENT_WIDTH);
151    output.push_str("children: ");
152    output.push_str(&serialize_children(&rule.children, indent + INDENT_WIDTH));
153    output.push_str(",\n");
154
155    push_field(
156        &mut output,
157        indent + INDENT_WIDTH,
158        "level",
159        &rule.level.to_string(),
160    );
161
162    push_field(
163        &mut output,
164        indent + INDENT_WIDTH,
165        "strength_modifier",
166        &serialize_strength_modifier(rule.strength_modifier),
167    );
168
169    push_indent(&mut output, indent);
170    output.push('}');
171
172    output
173}
174
175fn serialize_children(children: &[MagicRule], indent: usize) -> String {
176    if children.is_empty() {
177        return "Vec::new()".to_string();
178    }
179
180    let mut output = String::new();
181    output.push_str("vec![\n");
182
183    for child in children {
184        let serialized = serialize_magic_rule(child, indent + INDENT_WIDTH);
185        output.push_str(&serialized);
186        output.push_str(",\n");
187    }
188
189    push_indent(&mut output, indent);
190    output.push(']');
191    output
192}
193
194fn serialize_offset_spec(offset: &OffsetSpec) -> String {
195    match offset {
196        OffsetSpec::Absolute(value) => format!("OffsetSpec::Absolute({value})"),
197        OffsetSpec::Indirect {
198            base_offset,
199            pointer_type,
200            adjustment,
201            endian,
202        } => format!(
203            "OffsetSpec::Indirect {{ base_offset: {base_offset}, pointer_type: {}, adjustment: {adjustment}, endian: {} }}",
204            serialize_type_kind(pointer_type),
205            serialize_endianness(*endian)
206        ),
207        OffsetSpec::Relative(value) => format!("OffsetSpec::Relative({value})"),
208        OffsetSpec::FromEnd(value) => format!("OffsetSpec::FromEnd({value})"),
209    }
210}
211
212fn serialize_type_kind(typ: &TypeKind) -> String {
213    match typ {
214        TypeKind::Byte => "TypeKind::Byte".to_string(),
215        TypeKind::Short { endian, signed } => format!(
216            "TypeKind::Short {{ endian: {}, signed: {} }}",
217            serialize_endianness(*endian),
218            signed
219        ),
220        TypeKind::Long { endian, signed } => format!(
221            "TypeKind::Long {{ endian: {}, signed: {} }}",
222            serialize_endianness(*endian),
223            signed
224        ),
225        TypeKind::String { max_length } => match max_length {
226            Some(value) => {
227                format!("TypeKind::String {{ max_length: Some({value}) }}")
228            }
229            None => "TypeKind::String { max_length: None }".to_string(),
230        },
231    }
232}
233
234fn serialize_operator(op: &Operator) -> String {
235    match op {
236        Operator::Equal => "Operator::Equal".to_string(),
237        Operator::NotEqual => "Operator::NotEqual".to_string(),
238        Operator::BitwiseAnd => "Operator::BitwiseAnd".to_string(),
239        Operator::BitwiseAndMask(mask) => format!("Operator::BitwiseAndMask({mask})"),
240    }
241}
242
243fn serialize_value(value: &Value) -> String {
244    match value {
245        Value::Uint(number) => format!("Value::Uint({})", format_number(*number)),
246        Value::Int(number) => format!("Value::Int({})", format_signed_number(*number)),
247        Value::Bytes(bytes) => format!("Value::Bytes({})", format_byte_vec(bytes)),
248        Value::String(text) => format!(
249            "Value::String(String::from({}))",
250            format_string_literal(text)
251        ),
252    }
253}
254
255/// Format an unsigned number with underscores for readability (`clippy::unreadable_literal`)
256fn format_number(num: u64) -> String {
257    if num < 10000 {
258        num.to_string()
259    } else {
260        let num_str = num.to_string();
261        let mut result = String::new();
262        let len = num_str.len();
263
264        for (i, ch) in num_str.chars().enumerate() {
265            if i > 0 && (len - i) % 3 == 0 {
266                result.push('_');
267            }
268            result.push(ch);
269        }
270        result
271    }
272}
273
274/// Format a signed number with underscores for readability (`clippy::unreadable_literal`)
275fn format_signed_number(num: i64) -> String {
276    if num < 0 {
277        let abs = num.unsigned_abs();
278        format!("-{}", format_number(abs))
279    } else {
280        // Safe: num >= 0, so the cast cannot lose the sign
281        format_number(num.unsigned_abs())
282    }
283}
284
285fn serialize_endianness(endian: Endianness) -> String {
286    match endian {
287        Endianness::Little => "Endianness::Little".to_string(),
288        Endianness::Big => "Endianness::Big".to_string(),
289        Endianness::Native => "Endianness::Native".to_string(),
290    }
291}
292
293fn serialize_strength_modifier(modifier: Option<StrengthModifier>) -> String {
294    match modifier {
295        None => "None".to_string(),
296        Some(StrengthModifier::Add(val)) => format!("Some(StrengthModifier::Add({val}))"),
297        Some(StrengthModifier::Subtract(val)) => format!("Some(StrengthModifier::Subtract({val}))"),
298        Some(StrengthModifier::Multiply(val)) => format!("Some(StrengthModifier::Multiply({val}))"),
299        Some(StrengthModifier::Divide(val)) => format!("Some(StrengthModifier::Divide({val}))"),
300        Some(StrengthModifier::Set(val)) => format!("Some(StrengthModifier::Set({val}))"),
301    }
302}
303
304fn format_byte_vec(bytes: &[u8]) -> String {
305    use std::fmt::Write;
306
307    if bytes.is_empty() {
308        return "vec![]".to_string();
309    }
310
311    let mut output = String::from("vec![");
312    for (index, byte) in bytes.iter().enumerate() {
313        if index > 0 {
314            output.push_str(", ");
315        }
316        write!(output, "0x{byte:02x}").unwrap();
317    }
318    output.push(']');
319    output
320}
321
322fn format_string_literal(value: &str) -> String {
323    let escaped = value.escape_default().to_string();
324    format!("\"{escaped}\"")
325}
326
327fn push_line(output: &mut String, line: &str) {
328    output.push_str(line);
329    output.push('\n');
330}
331
332fn push_indent(output: &mut String, indent: usize) {
333    for _ in 0..indent {
334        output.push(' ');
335    }
336}
337
338fn push_field(output: &mut String, indent: usize, name: &str, value: &str) {
339    push_indent(output, indent);
340    output.push_str(name);
341    output.push_str(": ");
342    output.push_str(value);
343    output.push_str(",\n");
344}
345
346#[cfg(test)]
347mod tests {
348    use super::*;
349
350    #[test]
351    fn test_format_parse_error_invalid_syntax() {
352        let error = ParseError::InvalidSyntax {
353            line: 42,
354            message: "expected offset".to_string(),
355        };
356        let formatted = format_parse_error(&error);
357        assert!(formatted.contains("line 42"));
358        assert!(formatted.contains("expected offset"));
359        assert!(formatted.contains("builtin_rules.magic"));
360    }
361
362    #[test]
363    fn test_format_parse_error_unsupported_feature() {
364        let error = ParseError::UnsupportedFeature {
365            line: 10,
366            feature: "regex patterns".to_string(),
367        };
368        let formatted = format_parse_error(&error);
369        assert!(formatted.contains("line 10"));
370        assert!(formatted.contains("regex patterns"));
371    }
372
373    #[test]
374    fn test_format_parse_error_invalid_offset() {
375        let error = ParseError::InvalidOffset {
376            line: 5,
377            offset: "invalid offset spec".to_string(),
378        };
379        let formatted = format_parse_error(&error);
380        assert!(formatted.contains("line 5"));
381        assert!(formatted.contains("invalid offset spec"));
382    }
383
384    #[test]
385    fn test_format_parse_error_invalid_type() {
386        let error = ParseError::InvalidType {
387            line: 7,
388            type_spec: "unknown type".to_string(),
389        };
390        let formatted = format_parse_error(&error);
391        assert!(formatted.contains("line 7"));
392        assert!(formatted.contains("unknown type"));
393    }
394
395    #[test]
396    fn test_format_parse_error_invalid_operator() {
397        let error = ParseError::InvalidOperator {
398            line: 12,
399            operator: "bad operator".to_string(),
400        };
401        let formatted = format_parse_error(&error);
402        assert!(formatted.contains("line 12"));
403        assert!(formatted.contains("bad operator"));
404    }
405
406    #[test]
407    fn test_format_parse_error_invalid_value() {
408        let error = ParseError::InvalidValue {
409            line: 15,
410            value: "malformed value".to_string(),
411        };
412        let formatted = format_parse_error(&error);
413        assert!(formatted.contains("line 15"));
414        assert!(formatted.contains("malformed value"));
415    }
416
417    #[test]
418    fn test_serialize_offset_spec_absolute() {
419        let offset = OffsetSpec::Absolute(42);
420        let serialized = serialize_offset_spec(&offset);
421        assert_eq!(serialized, "OffsetSpec::Absolute(42)");
422    }
423
424    #[test]
425    fn test_serialize_offset_spec_relative() {
426        let offset = OffsetSpec::Relative(-10);
427        let serialized = serialize_offset_spec(&offset);
428        assert_eq!(serialized, "OffsetSpec::Relative(-10)");
429    }
430
431    #[test]
432    fn test_serialize_offset_spec_from_end() {
433        let offset = OffsetSpec::FromEnd(-16);
434        let serialized = serialize_offset_spec(&offset);
435        assert_eq!(serialized, "OffsetSpec::FromEnd(-16)");
436    }
437
438    #[test]
439    fn test_serialize_type_kind_byte() {
440        let typ = TypeKind::Byte;
441        let serialized = serialize_type_kind(&typ);
442        assert_eq!(serialized, "TypeKind::Byte");
443    }
444
445    #[test]
446    fn test_serialize_type_kind_short() {
447        let typ = TypeKind::Short {
448            endian: Endianness::Little,
449            signed: false,
450        };
451        let serialized = serialize_type_kind(&typ);
452        assert!(serialized.contains("TypeKind::Short"));
453        assert!(serialized.contains("Endianness::Little"));
454        assert!(serialized.contains("signed: false"));
455    }
456
457    #[test]
458    fn test_serialize_type_kind_long() {
459        let typ = TypeKind::Long {
460            endian: Endianness::Big,
461            signed: true,
462        };
463        let serialized = serialize_type_kind(&typ);
464        assert!(serialized.contains("TypeKind::Long"));
465        assert!(serialized.contains("Endianness::Big"));
466        assert!(serialized.contains("signed: true"));
467    }
468
469    #[test]
470    fn test_serialize_type_kind_string() {
471        let typ1 = TypeKind::String { max_length: None };
472        let serialized1 = serialize_type_kind(&typ1);
473        assert_eq!(serialized1, "TypeKind::String { max_length: None }");
474
475        let typ2 = TypeKind::String {
476            max_length: Some(256),
477        };
478        let serialized2 = serialize_type_kind(&typ2);
479        assert_eq!(serialized2, "TypeKind::String { max_length: Some(256) }");
480    }
481
482    #[test]
483    fn test_serialize_operator() {
484        assert_eq!(serialize_operator(&Operator::Equal), "Operator::Equal");
485        assert_eq!(
486            serialize_operator(&Operator::NotEqual),
487            "Operator::NotEqual"
488        );
489        assert_eq!(
490            serialize_operator(&Operator::BitwiseAnd),
491            "Operator::BitwiseAnd"
492        );
493        assert_eq!(
494            serialize_operator(&Operator::BitwiseAndMask(0xFF)),
495            "Operator::BitwiseAndMask(255)"
496        );
497    }
498
499    #[test]
500    fn test_serialize_value_uint() {
501        let value = Value::Uint(12345);
502        let serialized = serialize_value(&value);
503        assert_eq!(serialized, "Value::Uint(12_345)");
504    }
505
506    #[test]
507    fn test_serialize_value_int() {
508        let value = Value::Int(-100);
509        let serialized = serialize_value(&value);
510        assert!(serialized.contains("Value::Int"));
511    }
512
513    #[test]
514    fn test_serialize_value_bytes() {
515        let value = Value::Bytes(vec![0x7F, 0x45, 0x4C, 0x46]);
516        let serialized = serialize_value(&value);
517        assert_eq!(serialized, "Value::Bytes(vec![0x7f, 0x45, 0x4c, 0x46])");
518    }
519
520    #[test]
521    fn test_serialize_value_string() {
522        let value = Value::String("test".to_string());
523        let serialized = serialize_value(&value);
524        assert!(serialized.contains("Value::String"));
525        assert!(serialized.contains("test"));
526    }
527
528    #[test]
529    fn test_format_number_small() {
530        assert_eq!(format_number(42), "42");
531        assert_eq!(format_number(999), "999");
532        assert_eq!(format_number(9999), "9999");
533    }
534
535    #[test]
536    fn test_format_number_large() {
537        assert_eq!(format_number(10000), "10_000");
538        assert_eq!(format_number(123_456), "123_456");
539        assert_eq!(format_number(1_234_567_890), "1_234_567_890");
540    }
541
542    #[test]
543    fn test_serialize_endianness() {
544        assert_eq!(
545            serialize_endianness(Endianness::Little),
546            "Endianness::Little"
547        );
548        assert_eq!(serialize_endianness(Endianness::Big), "Endianness::Big");
549        assert_eq!(
550            serialize_endianness(Endianness::Native),
551            "Endianness::Native"
552        );
553    }
554
555    #[test]
556    fn test_format_byte_vec_empty() {
557        let result = format_byte_vec(&[]);
558        assert_eq!(result, "vec![]");
559    }
560
561    #[test]
562    fn test_format_byte_vec_single() {
563        let result = format_byte_vec(&[0x42]);
564        assert_eq!(result, "vec![0x42]");
565    }
566
567    #[test]
568    fn test_format_byte_vec_multiple() {
569        let result = format_byte_vec(&[0x12, 0x34, 0x56]);
570        assert_eq!(result, "vec![0x12, 0x34, 0x56]");
571    }
572
573    #[test]
574    fn test_format_string_literal() {
575        assert_eq!(format_string_literal("hello"), "\"hello\"");
576        assert_eq!(format_string_literal("test\n"), "\"test\\n\"");
577        assert_eq!(format_string_literal("quote\"here"), "\"quote\\\"here\"");
578    }
579
580    #[test]
581    fn test_generate_builtin_rules_empty() {
582        let rules: Vec<MagicRule> = vec![];
583        let generated = generate_builtin_rules(&rules);
584
585        assert!(generated.contains("LazyLock<Vec<MagicRule>>"));
586        assert!(generated.contains("vec![]") || generated.contains("vec!["));
587        assert!(generated.contains("use crate::parser::ast"));
588        assert!(generated.contains("use std::sync::LazyLock"));
589    }
590
591    #[test]
592    fn test_generate_builtin_rules_single_rule() {
593        let rule = MagicRule {
594            offset: OffsetSpec::Absolute(0),
595            typ: TypeKind::Byte,
596            op: Operator::Equal,
597            value: Value::Uint(0x7F),
598            message: "test".to_string(),
599            children: vec![],
600            level: 0,
601            strength_modifier: None,
602        };
603
604        let generated = generate_builtin_rules(&[rule]);
605
606        assert!(generated.contains("OffsetSpec::Absolute(0)"));
607        assert!(generated.contains("TypeKind::Byte"));
608        assert!(generated.contains("Operator::Equal"));
609        assert!(generated.contains("Value::Uint(127)"));
610        assert!(generated.contains("test"));
611        assert!(generated.contains("level: 0"));
612    }
613
614    #[test]
615    fn test_serialize_children_empty() {
616        let result = serialize_children(&[], 4);
617        assert_eq!(result, "Vec::new()");
618    }
619
620    #[test]
621    fn test_serialize_children_with_nested_rule() {
622        let child = MagicRule {
623            offset: OffsetSpec::Absolute(4),
624            typ: TypeKind::Byte,
625            op: Operator::Equal,
626            value: Value::Uint(1),
627            message: "child".to_string(),
628            children: vec![],
629            level: 1,
630            strength_modifier: None,
631        };
632
633        let result = serialize_children(&[child], 4);
634
635        assert!(result.contains("vec!["));
636        assert!(result.contains("OffsetSpec::Absolute(4)"));
637        assert!(result.contains("level: 1"));
638        assert!(result.contains("child"));
639    }
640
641    // Tests for invalid magic file parsing failure path
642    #[test]
643    fn test_parse_and_generate_invalid_syntax() {
644        let invalid_magic = "this is not valid magic syntax";
645        let result = parse_and_generate_builtin_rules(invalid_magic);
646
647        assert!(result.is_err());
648        let error = result.unwrap_err();
649        let formatted = format_parse_error(&error);
650        assert!(formatted.contains("builtin_rules.magic"));
651    }
652
653    #[test]
654    fn test_parse_and_generate_invalid_offset() {
655        let invalid_magic = "999999999999999999999 byte =0x7F ELF";
656        let result = parse_and_generate_builtin_rules(invalid_magic);
657
658        assert!(result.is_err());
659        let error = result.unwrap_err();
660        let formatted = format_parse_error(&error);
661        assert!(formatted.contains("builtin_rules.magic"));
662    }
663
664    #[test]
665    fn test_parse_and_generate_invalid_type() {
666        let invalid_magic = "0 invalidtype =0x7F test";
667        let result = parse_and_generate_builtin_rules(invalid_magic);
668
669        assert!(result.is_err());
670        let error = result.unwrap_err();
671        let formatted = format_parse_error(&error);
672        assert!(formatted.contains("builtin_rules.magic"));
673    }
674
675    #[test]
676    fn test_parse_and_generate_empty_input() {
677        let empty_magic = "";
678        let result = parse_and_generate_builtin_rules(empty_magic);
679
680        // Empty input should succeed with no rules
681        assert!(result.is_ok());
682        let generated = result.unwrap();
683        assert!(generated.contains("vec![]") || generated.contains("vec!["));
684    }
685
686    #[test]
687    fn test_parse_and_generate_valid_magic() {
688        let valid_magic = "0 byte =0x7F ELF executable";
689        let result = parse_and_generate_builtin_rules(valid_magic);
690
691        assert!(result.is_ok());
692        let generated = result.unwrap();
693        assert!(generated.contains("OffsetSpec::Absolute(0)"));
694        assert!(generated.contains("TypeKind::Byte"));
695        assert!(generated.contains("Value::Uint(127)"));
696        assert!(generated.contains("ELF executable"));
697    }
698
699    #[test]
700    fn test_parse_and_generate_malformed_value() {
701        let invalid_magic = "0 byte =notahexvalue test";
702        let result = parse_and_generate_builtin_rules(invalid_magic);
703
704        assert!(result.is_err());
705        let error = result.unwrap_err();
706        let formatted = format_parse_error(&error);
707        assert!(formatted.contains("builtin_rules.magic"));
708    }
709}