Skip to main content

libmagic_rs/
build_helpers.rs

1// Copyright (c) 2025-2026 the libmagic-rs contributors
2// SPDX-License-Identifier: Apache-2.0
3
4/// Build-time helpers for compiling magic rules.
5///
6/// This module contains functionality used by the build script to parse magic files
7/// and generate Rust code for built-in rules. It is extracted into a library module
8/// to enable comprehensive testing of the build process, including error cases.
9///
10/// Serialization logic is provided by [`crate::parser::codegen`], which is shared
11/// with `build.rs` to avoid duplication.
12use crate::error::ParseError;
13use crate::parser::parse_text_magic_file;
14
15// Re-export codegen functions used by tests
16#[cfg(test)]
17use crate::parser::codegen::{
18    format_byte_vec, format_number, generate_builtin_rules, serialize_children,
19    serialize_endianness, serialize_offset_spec, serialize_operator, serialize_type_kind,
20    serialize_value,
21};
22
23/// Parses a magic file and generates Rust code for the built-in rules.
24///
25/// This function wraps the parsing and code generation steps, providing a testable
26/// interface for the build script logic.
27///
28/// # Errors
29///
30/// Returns a `ParseError` if the magic file content is invalid or malformed.
31pub fn parse_and_generate_builtin_rules(magic_content: &str) -> Result<String, ParseError> {
32    let parsed = parse_text_magic_file(magic_content)?;
33    Ok(crate::parser::codegen::generate_builtin_rules(
34        &parsed.rules,
35    ))
36}
37
38/// Formats a parse error for display in build script output.
39///
40/// This function converts a `ParseError` into a human-readable message suitable
41/// for display when the build script fails.
42#[must_use]
43pub fn format_parse_error(error: &ParseError) -> String {
44    match error {
45        ParseError::InvalidSyntax { line, message } => {
46            format!("Error parsing builtin_rules.magic at line {line}: {message}")
47        }
48        ParseError::UnsupportedFeature { line, feature } => {
49            format!("Error parsing builtin_rules.magic at line {line}: {feature}")
50        }
51        ParseError::InvalidOffset { line, offset } => {
52            format!("Error parsing builtin_rules.magic at line {line}: {offset}")
53        }
54        ParseError::InvalidType { line, type_spec } => {
55            format!("Error parsing builtin_rules.magic at line {line}: {type_spec}")
56        }
57        ParseError::InvalidOperator { line, operator } => {
58            format!("Error parsing builtin_rules.magic at line {line}: {operator}")
59        }
60        ParseError::InvalidValue { line, value } => {
61            format!("Error parsing builtin_rules.magic at line {line}: {value}")
62        }
63        ParseError::UnsupportedFormat {
64            line,
65            format_type,
66            message,
67        } => format!("Error parsing builtin_rules.magic at line {line}: {format_type} {message}"),
68        ParseError::IoError(err) => {
69            format!("Error parsing builtin_rules.magic: I/O error: {err}")
70        }
71    }
72}
73
74#[cfg(test)]
75mod tests {
76    use super::*;
77    use crate::parser::ast::{
78        Endianness, MagicRule, OffsetSpec, Operator, StringFlags, TypeKind, Value,
79    };
80    use crate::parser::codegen::format_string_literal;
81
82    #[test]
83    fn test_format_parse_error_invalid_syntax() {
84        let error = ParseError::InvalidSyntax {
85            line: 42,
86            message: "expected offset".to_string(),
87        };
88        let formatted = format_parse_error(&error);
89        assert!(formatted.contains("line 42"));
90        assert!(formatted.contains("expected offset"));
91        assert!(formatted.contains("builtin_rules.magic"));
92    }
93
94    #[test]
95    fn test_format_parse_error_unsupported_feature() {
96        let error = ParseError::UnsupportedFeature {
97            line: 10,
98            feature: "regex patterns".to_string(),
99        };
100        let formatted = format_parse_error(&error);
101        assert!(formatted.contains("line 10"));
102        assert!(formatted.contains("regex patterns"));
103    }
104
105    #[test]
106    fn test_format_parse_error_invalid_offset() {
107        let error = ParseError::InvalidOffset {
108            line: 5,
109            offset: "invalid offset spec".to_string(),
110        };
111        let formatted = format_parse_error(&error);
112        assert!(formatted.contains("line 5"));
113        assert!(formatted.contains("invalid offset spec"));
114    }
115
116    #[test]
117    fn test_format_parse_error_invalid_type() {
118        let error = ParseError::InvalidType {
119            line: 7,
120            type_spec: "unknown type".to_string(),
121        };
122        let formatted = format_parse_error(&error);
123        assert!(formatted.contains("line 7"));
124        assert!(formatted.contains("unknown type"));
125    }
126
127    #[test]
128    fn test_format_parse_error_invalid_operator() {
129        let error = ParseError::InvalidOperator {
130            line: 12,
131            operator: "bad operator".to_string(),
132        };
133        let formatted = format_parse_error(&error);
134        assert!(formatted.contains("line 12"));
135        assert!(formatted.contains("bad operator"));
136    }
137
138    #[test]
139    fn test_format_parse_error_invalid_value() {
140        let error = ParseError::InvalidValue {
141            line: 15,
142            value: "malformed value".to_string(),
143        };
144        let formatted = format_parse_error(&error);
145        assert!(formatted.contains("line 15"));
146        assert!(formatted.contains("malformed value"));
147    }
148
149    #[test]
150    fn test_serialize_offset_spec_absolute() {
151        let offset = OffsetSpec::Absolute(42);
152        let serialized = serialize_offset_spec(&offset);
153        assert_eq!(serialized, "OffsetSpec::Absolute(42)");
154    }
155
156    #[test]
157    fn test_serialize_offset_spec_relative() {
158        let offset = OffsetSpec::Relative(-10);
159        let serialized = serialize_offset_spec(&offset);
160        assert_eq!(serialized, "OffsetSpec::Relative(-10)");
161    }
162
163    #[test]
164    fn test_serialize_offset_spec_from_end() {
165        let offset = OffsetSpec::FromEnd(-16);
166        let serialized = serialize_offset_spec(&offset);
167        assert_eq!(serialized, "OffsetSpec::FromEnd(-16)");
168    }
169
170    #[test]
171    fn test_serialize_type_kind_byte() {
172        let signed = TypeKind::Byte { signed: true };
173        assert_eq!(
174            serialize_type_kind(&signed),
175            "TypeKind::Byte { signed: true }"
176        );
177        let unsigned = TypeKind::Byte { signed: false };
178        assert_eq!(
179            serialize_type_kind(&unsigned),
180            "TypeKind::Byte { signed: false }"
181        );
182    }
183
184    #[test]
185    fn test_serialize_type_kind_short() {
186        let typ = TypeKind::Short {
187            endian: Endianness::Little,
188            signed: false,
189        };
190        let serialized = serialize_type_kind(&typ);
191        assert!(serialized.contains("TypeKind::Short"));
192        assert!(serialized.contains("Endianness::Little"));
193        assert!(serialized.contains("signed: false"));
194    }
195
196    #[test]
197    fn test_serialize_type_kind_long() {
198        let typ = TypeKind::Long {
199            endian: Endianness::Big,
200            signed: true,
201        };
202        let serialized = serialize_type_kind(&typ);
203        assert!(serialized.contains("TypeKind::Long"));
204        assert!(serialized.contains("Endianness::Big"));
205        assert!(serialized.contains("signed: true"));
206    }
207
208    #[test]
209    fn test_serialize_type_kind_quad() {
210        let typ = TypeKind::Quad {
211            endian: Endianness::Little,
212            signed: true,
213        };
214        let serialized = serialize_type_kind(&typ);
215        assert!(serialized.contains("TypeKind::Quad"));
216        assert!(serialized.contains("Endianness::Little"));
217        assert!(serialized.contains("signed: true"));
218
219        let typ2 = TypeKind::Quad {
220            endian: Endianness::Big,
221            signed: false,
222        };
223        let serialized2 = serialize_type_kind(&typ2);
224        assert!(serialized2.contains("TypeKind::Quad"));
225        assert!(serialized2.contains("Endianness::Big"));
226        assert!(serialized2.contains("signed: false"));
227    }
228
229    #[test]
230    fn test_serialize_type_kind_float() {
231        let cases = [
232            (
233                TypeKind::Float {
234                    endian: Endianness::Native,
235                },
236                "TypeKind::Float { endian: Endianness::Native }",
237            ),
238            (
239                TypeKind::Float {
240                    endian: Endianness::Little,
241                },
242                "TypeKind::Float { endian: Endianness::Little }",
243            ),
244            (
245                TypeKind::Float {
246                    endian: Endianness::Big,
247                },
248                "TypeKind::Float { endian: Endianness::Big }",
249            ),
250        ];
251        for (typ, expected) in &cases {
252            assert_eq!(serialize_type_kind(typ), *expected);
253        }
254    }
255
256    #[test]
257    fn test_serialize_type_kind_double() {
258        let cases = [
259            (
260                TypeKind::Double {
261                    endian: Endianness::Native,
262                },
263                "TypeKind::Double { endian: Endianness::Native }",
264            ),
265            (
266                TypeKind::Double {
267                    endian: Endianness::Little,
268                },
269                "TypeKind::Double { endian: Endianness::Little }",
270            ),
271            (
272                TypeKind::Double {
273                    endian: Endianness::Big,
274                },
275                "TypeKind::Double { endian: Endianness::Big }",
276            ),
277        ];
278        for (typ, expected) in &cases {
279            assert_eq!(serialize_type_kind(typ), *expected);
280        }
281    }
282
283    #[test]
284    fn test_serialize_value_float() {
285        // Positive finite literal
286        let serialized = serialize_value(&Value::Float(3.125));
287        assert_eq!(serialized, "Value::Float(3.125)");
288
289        // Negative finite literal
290        let serialized = serialize_value(&Value::Float(-1.0));
291        assert_eq!(serialized, "Value::Float(-1.0)");
292
293        // Non-finite values produce valid Rust expressions
294        assert_eq!(
295            serialize_value(&Value::Float(f64::NAN)),
296            "Value::Float(f64::NAN)"
297        );
298        assert_eq!(
299            serialize_value(&Value::Float(f64::INFINITY)),
300            "Value::Float(f64::INFINITY)"
301        );
302        assert_eq!(
303            serialize_value(&Value::Float(f64::NEG_INFINITY)),
304            "Value::Float(f64::NEG_INFINITY)"
305        );
306    }
307
308    #[test]
309    fn test_serialize_type_kind_string() {
310        let typ1 = TypeKind::String {
311            max_length: None,
312            flags: StringFlags::default(),
313        };
314        let serialized1 = serialize_type_kind(&typ1);
315        assert_eq!(
316            serialized1,
317            "TypeKind::String { max_length: None, flags: crate::parser::ast::StringFlags::default() }"
318        );
319
320        let typ2 = TypeKind::String {
321            max_length: Some(256),
322            flags: StringFlags::default(),
323        };
324        let serialized2 = serialize_type_kind(&typ2);
325        assert_eq!(
326            serialized2,
327            "TypeKind::String { max_length: Some(256), flags: crate::parser::ast::StringFlags::default() }"
328        );
329
330        // Non-default flags emit the builder chain.
331        let typ3 = TypeKind::String {
332            max_length: None,
333            flags: StringFlags::default().with_ignore_lowercase(true),
334        };
335        let serialized3 = serialize_type_kind(&typ3);
336        assert_eq!(
337            serialized3,
338            "TypeKind::String { max_length: None, flags: crate::parser::ast::StringFlags::default().with_ignore_lowercase(true) }"
339        );
340    }
341
342    #[test]
343    fn test_serialize_type_kind_pstring() {
344        use crate::parser::ast::PStringLengthWidth;
345        let typ1 = TypeKind::PString {
346            max_length: None,
347            length_width: PStringLengthWidth::OneByte,
348            length_includes_itself: false,
349        };
350        let serialized1 = serialize_type_kind(&typ1);
351        assert_eq!(
352            serialized1,
353            "TypeKind::PString { max_length: None, length_width: PStringLengthWidth::OneByte, length_includes_itself: false }"
354        );
355
356        let typ2 = TypeKind::PString {
357            max_length: Some(128),
358            length_width: PStringLengthWidth::FourByteLE,
359            length_includes_itself: false,
360        };
361        let serialized2 = serialize_type_kind(&typ2);
362        assert_eq!(
363            serialized2,
364            "TypeKind::PString { max_length: Some(128), length_width: PStringLengthWidth::FourByteLE, length_includes_itself: false }"
365        );
366    }
367
368    #[test]
369    fn test_serialize_operator() {
370        assert_eq!(serialize_operator(&Operator::Equal), "Operator::Equal");
371        assert_eq!(
372            serialize_operator(&Operator::NotEqual),
373            "Operator::NotEqual"
374        );
375        assert_eq!(
376            serialize_operator(&Operator::LessThan),
377            "Operator::LessThan"
378        );
379        assert_eq!(
380            serialize_operator(&Operator::GreaterThan),
381            "Operator::GreaterThan"
382        );
383        assert_eq!(
384            serialize_operator(&Operator::LessEqual),
385            "Operator::LessEqual"
386        );
387        assert_eq!(
388            serialize_operator(&Operator::GreaterEqual),
389            "Operator::GreaterEqual"
390        );
391        assert_eq!(
392            serialize_operator(&Operator::BitwiseAnd),
393            "Operator::BitwiseAnd"
394        );
395        assert_eq!(
396            serialize_operator(&Operator::BitwiseAndMask(0xFF)),
397            "Operator::BitwiseAndMask(255)"
398        );
399    }
400
401    #[test]
402    fn test_serialize_value_uint() {
403        let value = Value::Uint(12345);
404        let serialized = serialize_value(&value);
405        assert_eq!(serialized, "Value::Uint(12_345)");
406    }
407
408    #[test]
409    fn test_serialize_value_int() {
410        let value = Value::Int(-100);
411        let serialized = serialize_value(&value);
412        assert!(serialized.contains("Value::Int"));
413    }
414
415    #[test]
416    fn test_serialize_value_bytes() {
417        let value = Value::Bytes(vec![0x7F, 0x45, 0x4C, 0x46]);
418        let serialized = serialize_value(&value);
419        assert_eq!(serialized, "Value::Bytes(vec![0x7f, 0x45, 0x4c, 0x46])");
420    }
421
422    #[test]
423    fn test_serialize_value_string() {
424        let value = Value::String("test".to_string());
425        let serialized = serialize_value(&value);
426        assert!(serialized.contains("Value::String"));
427        assert!(serialized.contains("test"));
428    }
429
430    #[test]
431    fn test_format_number_small() {
432        assert_eq!(format_number(42), "42");
433        assert_eq!(format_number(999), "999");
434        assert_eq!(format_number(9999), "9999");
435    }
436
437    #[test]
438    fn test_format_number_large() {
439        assert_eq!(format_number(10000), "10_000");
440        assert_eq!(format_number(123_456), "123_456");
441        assert_eq!(format_number(1_234_567_890), "1_234_567_890");
442    }
443
444    #[test]
445    fn test_serialize_endianness() {
446        assert_eq!(
447            serialize_endianness(Endianness::Little),
448            "Endianness::Little"
449        );
450        assert_eq!(serialize_endianness(Endianness::Big), "Endianness::Big");
451        assert_eq!(
452            serialize_endianness(Endianness::Native),
453            "Endianness::Native"
454        );
455    }
456
457    #[test]
458    fn test_format_byte_vec_empty() {
459        let result = format_byte_vec(&[]);
460        assert_eq!(result, "vec![]");
461    }
462
463    #[test]
464    fn test_format_byte_vec_single() {
465        let result = format_byte_vec(&[0x42]);
466        assert_eq!(result, "vec![0x42]");
467    }
468
469    #[test]
470    fn test_format_byte_vec_multiple() {
471        let result = format_byte_vec(&[0x12, 0x34, 0x56]);
472        assert_eq!(result, "vec![0x12, 0x34, 0x56]");
473    }
474
475    #[test]
476    fn test_format_string_literal() {
477        assert_eq!(format_string_literal("hello"), "\"hello\"");
478        assert_eq!(format_string_literal("test\n"), "\"test\\n\"");
479        assert_eq!(format_string_literal("quote\"here"), "\"quote\\\"here\"");
480    }
481
482    #[test]
483    fn test_generate_builtin_rules_empty() {
484        let rules: Vec<MagicRule> = vec![];
485        let generated = generate_builtin_rules(&rules);
486
487        assert!(generated.contains("LazyLock<Vec<MagicRule>>"));
488        assert!(generated.contains("vec![]") || generated.contains("vec!["));
489        assert!(generated.contains("use crate::parser::ast"));
490        assert!(generated.contains("use std::sync::LazyLock"));
491    }
492
493    #[test]
494    fn test_generate_builtin_rules_single_rule() {
495        let rule = MagicRule {
496            offset: OffsetSpec::Absolute(0),
497            typ: TypeKind::Byte { signed: true },
498            op: Operator::Equal,
499            value: Value::Uint(0x7F),
500            message: "test".to_string(),
501            children: vec![],
502            level: 0,
503            strength_modifier: None,
504            value_transform: None,
505        };
506
507        let generated = generate_builtin_rules(&[rule]);
508
509        assert!(generated.contains("OffsetSpec::Absolute(0)"));
510        assert!(generated.contains("TypeKind::Byte { signed: true }"));
511        assert!(generated.contains("Operator::Equal"));
512        assert!(generated.contains("Value::Uint(127)"));
513        assert!(generated.contains("test"));
514        assert!(generated.contains("level: 0"));
515    }
516
517    #[test]
518    fn test_serialize_children_empty() {
519        let result = serialize_children(&[], 4);
520        assert_eq!(result, "Vec::new()");
521    }
522
523    #[test]
524    fn test_serialize_children_with_nested_rule() {
525        let child = MagicRule {
526            offset: OffsetSpec::Absolute(4),
527            typ: TypeKind::Byte { signed: true },
528            op: Operator::Equal,
529            value: Value::Uint(1),
530            message: "child".to_string(),
531            children: vec![],
532            level: 1,
533            strength_modifier: None,
534            value_transform: None,
535        };
536
537        let result = serialize_children(&[child], 4);
538
539        assert!(result.contains("vec!["));
540        assert!(result.contains("OffsetSpec::Absolute(4)"));
541        assert!(result.contains("level: 1"));
542        assert!(result.contains("child"));
543    }
544
545    // Tests for invalid magic file parsing failure path
546    #[test]
547    fn test_parse_and_generate_invalid_syntax() {
548        let invalid_magic = "this is not valid magic syntax";
549        let result = parse_and_generate_builtin_rules(invalid_magic);
550
551        assert!(result.is_err());
552        let error = result.unwrap_err();
553        let formatted = format_parse_error(&error);
554        assert!(formatted.contains("builtin_rules.magic"));
555    }
556
557    #[test]
558    fn test_parse_and_generate_invalid_offset() {
559        let invalid_magic = "999999999999999999999 byte =0x7F ELF";
560        let result = parse_and_generate_builtin_rules(invalid_magic);
561
562        assert!(result.is_err());
563        let error = result.unwrap_err();
564        let formatted = format_parse_error(&error);
565        assert!(formatted.contains("builtin_rules.magic"));
566    }
567
568    #[test]
569    fn test_parse_and_generate_invalid_type() {
570        let invalid_magic = "0 invalidtype =0x7F test";
571        let result = parse_and_generate_builtin_rules(invalid_magic);
572
573        assert!(result.is_err());
574        let error = result.unwrap_err();
575        let formatted = format_parse_error(&error);
576        assert!(formatted.contains("builtin_rules.magic"));
577    }
578
579    #[test]
580    fn test_parse_and_generate_empty_input() {
581        let empty_magic = "";
582        let result = parse_and_generate_builtin_rules(empty_magic);
583
584        // Empty input should succeed with no rules
585        assert!(result.is_ok());
586        let generated = result.unwrap();
587        assert!(generated.contains("vec![]") || generated.contains("vec!["));
588    }
589
590    #[test]
591    fn test_parse_and_generate_valid_magic() {
592        let valid_magic = "0 byte =0x7F ELF executable";
593        let result = parse_and_generate_builtin_rules(valid_magic);
594
595        assert!(result.is_ok());
596        let generated = result.unwrap();
597        assert!(generated.contains("OffsetSpec::Absolute(0)"));
598        assert!(generated.contains("TypeKind::Byte { signed: true }"));
599        assert!(generated.contains("Value::Uint(127)"));
600        assert!(generated.contains("ELF executable"));
601    }
602
603    #[test]
604    fn test_parse_and_generate_malformed_value() {
605        let invalid_magic = "0 byte =notahexvalue test";
606        let result = parse_and_generate_builtin_rules(invalid_magic);
607
608        assert!(result.is_err());
609        let error = result.unwrap_err();
610        let formatted = format_parse_error(&error);
611        assert!(formatted.contains("builtin_rules.magic"));
612    }
613
614    /// Regression test for PR #215 Copilot review comment: the codegen
615    /// for `TypeKind::Regex` and `TypeKind::Search` must NOT emit
616    /// `.expect("nonzero")` (a panic marker banned by AGENTS.md for
617    /// library code, which is what the generated `builtin_rules.rs`
618    /// becomes). Instead it must emit
619    /// `.unwrap_or(NonZero::<..>::MIN)`, which preserves the invariant
620    /// expression without introducing a panic path.
621    #[test]
622    fn test_serialize_regex_codegen_has_no_expect_panic_marker() {
623        use crate::parser::ast::{RegexCount, RegexFlags};
624        use std::num::NonZeroU32;
625
626        let cases = [
627            TypeKind::Regex {
628                flags: RegexFlags::default(),
629                count: RegexCount::Bytes(NonZeroU32::new(256).unwrap()),
630            },
631            TypeKind::Regex {
632                flags: RegexFlags::default(),
633                count: RegexCount::Lines(Some(NonZeroU32::new(3).unwrap())),
634            },
635        ];
636        for typ in &cases {
637            let generated = serialize_type_kind(typ);
638            assert!(
639                !generated.contains(".expect("),
640                "serialize_type_kind must not emit .expect() (AGENTS.md panic-marker ban); got:\n{generated}"
641            );
642            assert!(
643                generated.contains(".unwrap_or(::std::num::NonZeroU32::MIN)"),
644                "serialize_type_kind must emit .unwrap_or(NonZeroU32::MIN); got:\n{generated}"
645            );
646        }
647    }
648
649    #[test]
650    fn test_serialize_search_codegen_has_no_expect_panic_marker() {
651        use std::num::NonZeroUsize;
652
653        let typ = TypeKind::Search {
654            range: NonZeroUsize::new(512).unwrap(),
655        };
656        let generated = serialize_type_kind(&typ);
657        assert!(
658            !generated.contains(".expect("),
659            "serialize_type_kind must not emit .expect() (AGENTS.md panic-marker ban); got:\n{generated}"
660        );
661        assert!(
662            generated.contains(".unwrap_or(::std::num::NonZeroUsize::MIN)"),
663            "serialize_type_kind must emit .unwrap_or(NonZeroUsize::MIN); got:\n{generated}"
664        );
665    }
666}