Skip to main content

libmagic_rs/
build_helpers.rs

1// Copyright (c) 2025-2026 the libmagic-rs contributors
2// SPDX-License-Identifier: Apache-2.0
3
4/// Build-time helpers for compiling magic rules.
5///
6/// This module contains functionality used by the build script to parse magic files
7/// and generate Rust code for built-in rules. It is extracted into a library module
8/// to enable comprehensive testing of the build process, including error cases.
9///
10/// Serialization logic is provided by [`crate::parser::codegen`], which is shared
11/// with `build.rs` to avoid duplication.
12use crate::error::ParseError;
13use crate::parser::parse_text_magic_file;
14
15// Re-export codegen functions used by tests
16#[cfg(test)]
17use crate::parser::codegen::{
18    format_byte_vec, format_number, generate_builtin_rules, serialize_children,
19    serialize_endianness, serialize_offset_spec, serialize_operator, serialize_type_kind,
20    serialize_value,
21};
22
23/// Parses a magic file and generates Rust code for the built-in rules.
24///
25/// This function wraps the parsing and code generation steps, providing a testable
26/// interface for the build script logic.
27///
28/// # Errors
29///
30/// Returns a `ParseError` if the magic file content is invalid or malformed.
31pub fn parse_and_generate_builtin_rules(magic_content: &str) -> Result<String, ParseError> {
32    let parsed = parse_text_magic_file(magic_content)?;
33    Ok(crate::parser::codegen::generate_builtin_rules(
34        &parsed.rules,
35    ))
36}
37
38/// Formats a parse error for display in build script output.
39///
40/// This function converts a `ParseError` into a human-readable message suitable
41/// for display when the build script fails.
42#[must_use]
43pub fn format_parse_error(error: &ParseError) -> String {
44    match error {
45        ParseError::InvalidSyntax { line, message } => {
46            format!("Error parsing builtin_rules.magic at line {line}: {message}")
47        }
48        ParseError::UnsupportedFeature { line, feature } => {
49            format!("Error parsing builtin_rules.magic at line {line}: {feature}")
50        }
51        ParseError::InvalidOffset { line, offset } => {
52            format!("Error parsing builtin_rules.magic at line {line}: {offset}")
53        }
54        ParseError::InvalidType { line, type_spec } => {
55            format!("Error parsing builtin_rules.magic at line {line}: {type_spec}")
56        }
57        ParseError::InvalidOperator { line, operator } => {
58            format!("Error parsing builtin_rules.magic at line {line}: {operator}")
59        }
60        ParseError::InvalidValue { line, value } => {
61            format!("Error parsing builtin_rules.magic at line {line}: {value}")
62        }
63        ParseError::UnsupportedFormat {
64            line,
65            format_type,
66            message,
67        } => format!("Error parsing builtin_rules.magic at line {line}: {format_type} {message}"),
68        ParseError::IoError(err) => {
69            format!("Error parsing builtin_rules.magic: I/O error: {err}")
70        }
71    }
72}
73
74#[cfg(test)]
75mod tests {
76    use super::*;
77    use crate::parser::ast::{Endianness, MagicRule, OffsetSpec, Operator, TypeKind, Value};
78    use crate::parser::codegen::format_string_literal;
79
80    #[test]
81    fn test_format_parse_error_invalid_syntax() {
82        let error = ParseError::InvalidSyntax {
83            line: 42,
84            message: "expected offset".to_string(),
85        };
86        let formatted = format_parse_error(&error);
87        assert!(formatted.contains("line 42"));
88        assert!(formatted.contains("expected offset"));
89        assert!(formatted.contains("builtin_rules.magic"));
90    }
91
92    #[test]
93    fn test_format_parse_error_unsupported_feature() {
94        let error = ParseError::UnsupportedFeature {
95            line: 10,
96            feature: "regex patterns".to_string(),
97        };
98        let formatted = format_parse_error(&error);
99        assert!(formatted.contains("line 10"));
100        assert!(formatted.contains("regex patterns"));
101    }
102
103    #[test]
104    fn test_format_parse_error_invalid_offset() {
105        let error = ParseError::InvalidOffset {
106            line: 5,
107            offset: "invalid offset spec".to_string(),
108        };
109        let formatted = format_parse_error(&error);
110        assert!(formatted.contains("line 5"));
111        assert!(formatted.contains("invalid offset spec"));
112    }
113
114    #[test]
115    fn test_format_parse_error_invalid_type() {
116        let error = ParseError::InvalidType {
117            line: 7,
118            type_spec: "unknown type".to_string(),
119        };
120        let formatted = format_parse_error(&error);
121        assert!(formatted.contains("line 7"));
122        assert!(formatted.contains("unknown type"));
123    }
124
125    #[test]
126    fn test_format_parse_error_invalid_operator() {
127        let error = ParseError::InvalidOperator {
128            line: 12,
129            operator: "bad operator".to_string(),
130        };
131        let formatted = format_parse_error(&error);
132        assert!(formatted.contains("line 12"));
133        assert!(formatted.contains("bad operator"));
134    }
135
136    #[test]
137    fn test_format_parse_error_invalid_value() {
138        let error = ParseError::InvalidValue {
139            line: 15,
140            value: "malformed value".to_string(),
141        };
142        let formatted = format_parse_error(&error);
143        assert!(formatted.contains("line 15"));
144        assert!(formatted.contains("malformed value"));
145    }
146
147    #[test]
148    fn test_serialize_offset_spec_absolute() {
149        let offset = OffsetSpec::Absolute(42);
150        let serialized = serialize_offset_spec(&offset);
151        assert_eq!(serialized, "OffsetSpec::Absolute(42)");
152    }
153
154    #[test]
155    fn test_serialize_offset_spec_relative() {
156        let offset = OffsetSpec::Relative(-10);
157        let serialized = serialize_offset_spec(&offset);
158        assert_eq!(serialized, "OffsetSpec::Relative(-10)");
159    }
160
161    #[test]
162    fn test_serialize_offset_spec_from_end() {
163        let offset = OffsetSpec::FromEnd(-16);
164        let serialized = serialize_offset_spec(&offset);
165        assert_eq!(serialized, "OffsetSpec::FromEnd(-16)");
166    }
167
168    #[test]
169    fn test_serialize_type_kind_byte() {
170        let signed = TypeKind::Byte { signed: true };
171        assert_eq!(
172            serialize_type_kind(&signed),
173            "TypeKind::Byte { signed: true }"
174        );
175        let unsigned = TypeKind::Byte { signed: false };
176        assert_eq!(
177            serialize_type_kind(&unsigned),
178            "TypeKind::Byte { signed: false }"
179        );
180    }
181
182    #[test]
183    fn test_serialize_type_kind_short() {
184        let typ = TypeKind::Short {
185            endian: Endianness::Little,
186            signed: false,
187        };
188        let serialized = serialize_type_kind(&typ);
189        assert!(serialized.contains("TypeKind::Short"));
190        assert!(serialized.contains("Endianness::Little"));
191        assert!(serialized.contains("signed: false"));
192    }
193
194    #[test]
195    fn test_serialize_type_kind_long() {
196        let typ = TypeKind::Long {
197            endian: Endianness::Big,
198            signed: true,
199        };
200        let serialized = serialize_type_kind(&typ);
201        assert!(serialized.contains("TypeKind::Long"));
202        assert!(serialized.contains("Endianness::Big"));
203        assert!(serialized.contains("signed: true"));
204    }
205
206    #[test]
207    fn test_serialize_type_kind_quad() {
208        let typ = TypeKind::Quad {
209            endian: Endianness::Little,
210            signed: true,
211        };
212        let serialized = serialize_type_kind(&typ);
213        assert!(serialized.contains("TypeKind::Quad"));
214        assert!(serialized.contains("Endianness::Little"));
215        assert!(serialized.contains("signed: true"));
216
217        let typ2 = TypeKind::Quad {
218            endian: Endianness::Big,
219            signed: false,
220        };
221        let serialized2 = serialize_type_kind(&typ2);
222        assert!(serialized2.contains("TypeKind::Quad"));
223        assert!(serialized2.contains("Endianness::Big"));
224        assert!(serialized2.contains("signed: false"));
225    }
226
227    #[test]
228    fn test_serialize_type_kind_float() {
229        let cases = [
230            (
231                TypeKind::Float {
232                    endian: Endianness::Native,
233                },
234                "TypeKind::Float { endian: Endianness::Native }",
235            ),
236            (
237                TypeKind::Float {
238                    endian: Endianness::Little,
239                },
240                "TypeKind::Float { endian: Endianness::Little }",
241            ),
242            (
243                TypeKind::Float {
244                    endian: Endianness::Big,
245                },
246                "TypeKind::Float { endian: Endianness::Big }",
247            ),
248        ];
249        for (typ, expected) in &cases {
250            assert_eq!(serialize_type_kind(typ), *expected);
251        }
252    }
253
254    #[test]
255    fn test_serialize_type_kind_double() {
256        let cases = [
257            (
258                TypeKind::Double {
259                    endian: Endianness::Native,
260                },
261                "TypeKind::Double { endian: Endianness::Native }",
262            ),
263            (
264                TypeKind::Double {
265                    endian: Endianness::Little,
266                },
267                "TypeKind::Double { endian: Endianness::Little }",
268            ),
269            (
270                TypeKind::Double {
271                    endian: Endianness::Big,
272                },
273                "TypeKind::Double { endian: Endianness::Big }",
274            ),
275        ];
276        for (typ, expected) in &cases {
277            assert_eq!(serialize_type_kind(typ), *expected);
278        }
279    }
280
281    #[test]
282    fn test_serialize_value_float() {
283        // Positive finite literal
284        let serialized = serialize_value(&Value::Float(3.125));
285        assert_eq!(serialized, "Value::Float(3.125)");
286
287        // Negative finite literal
288        let serialized = serialize_value(&Value::Float(-1.0));
289        assert_eq!(serialized, "Value::Float(-1.0)");
290
291        // Non-finite values produce valid Rust expressions
292        assert_eq!(
293            serialize_value(&Value::Float(f64::NAN)),
294            "Value::Float(f64::NAN)"
295        );
296        assert_eq!(
297            serialize_value(&Value::Float(f64::INFINITY)),
298            "Value::Float(f64::INFINITY)"
299        );
300        assert_eq!(
301            serialize_value(&Value::Float(f64::NEG_INFINITY)),
302            "Value::Float(f64::NEG_INFINITY)"
303        );
304    }
305
306    #[test]
307    fn test_serialize_type_kind_string() {
308        let typ1 = TypeKind::String { max_length: None };
309        let serialized1 = serialize_type_kind(&typ1);
310        assert_eq!(serialized1, "TypeKind::String { max_length: None }");
311
312        let typ2 = TypeKind::String {
313            max_length: Some(256),
314        };
315        let serialized2 = serialize_type_kind(&typ2);
316        assert_eq!(serialized2, "TypeKind::String { max_length: Some(256) }");
317    }
318
319    #[test]
320    fn test_serialize_type_kind_pstring() {
321        use crate::parser::ast::PStringLengthWidth;
322        let typ1 = TypeKind::PString {
323            max_length: None,
324            length_width: PStringLengthWidth::OneByte,
325            length_includes_itself: false,
326        };
327        let serialized1 = serialize_type_kind(&typ1);
328        assert_eq!(
329            serialized1,
330            "TypeKind::PString { max_length: None, length_width: PStringLengthWidth::OneByte, length_includes_itself: false }"
331        );
332
333        let typ2 = TypeKind::PString {
334            max_length: Some(128),
335            length_width: PStringLengthWidth::FourByteLE,
336            length_includes_itself: false,
337        };
338        let serialized2 = serialize_type_kind(&typ2);
339        assert_eq!(
340            serialized2,
341            "TypeKind::PString { max_length: Some(128), length_width: PStringLengthWidth::FourByteLE, length_includes_itself: false }"
342        );
343    }
344
345    #[test]
346    fn test_serialize_operator() {
347        assert_eq!(serialize_operator(&Operator::Equal), "Operator::Equal");
348        assert_eq!(
349            serialize_operator(&Operator::NotEqual),
350            "Operator::NotEqual"
351        );
352        assert_eq!(
353            serialize_operator(&Operator::LessThan),
354            "Operator::LessThan"
355        );
356        assert_eq!(
357            serialize_operator(&Operator::GreaterThan),
358            "Operator::GreaterThan"
359        );
360        assert_eq!(
361            serialize_operator(&Operator::LessEqual),
362            "Operator::LessEqual"
363        );
364        assert_eq!(
365            serialize_operator(&Operator::GreaterEqual),
366            "Operator::GreaterEqual"
367        );
368        assert_eq!(
369            serialize_operator(&Operator::BitwiseAnd),
370            "Operator::BitwiseAnd"
371        );
372        assert_eq!(
373            serialize_operator(&Operator::BitwiseAndMask(0xFF)),
374            "Operator::BitwiseAndMask(255)"
375        );
376    }
377
378    #[test]
379    fn test_serialize_value_uint() {
380        let value = Value::Uint(12345);
381        let serialized = serialize_value(&value);
382        assert_eq!(serialized, "Value::Uint(12_345)");
383    }
384
385    #[test]
386    fn test_serialize_value_int() {
387        let value = Value::Int(-100);
388        let serialized = serialize_value(&value);
389        assert!(serialized.contains("Value::Int"));
390    }
391
392    #[test]
393    fn test_serialize_value_bytes() {
394        let value = Value::Bytes(vec![0x7F, 0x45, 0x4C, 0x46]);
395        let serialized = serialize_value(&value);
396        assert_eq!(serialized, "Value::Bytes(vec![0x7f, 0x45, 0x4c, 0x46])");
397    }
398
399    #[test]
400    fn test_serialize_value_string() {
401        let value = Value::String("test".to_string());
402        let serialized = serialize_value(&value);
403        assert!(serialized.contains("Value::String"));
404        assert!(serialized.contains("test"));
405    }
406
407    #[test]
408    fn test_format_number_small() {
409        assert_eq!(format_number(42), "42");
410        assert_eq!(format_number(999), "999");
411        assert_eq!(format_number(9999), "9999");
412    }
413
414    #[test]
415    fn test_format_number_large() {
416        assert_eq!(format_number(10000), "10_000");
417        assert_eq!(format_number(123_456), "123_456");
418        assert_eq!(format_number(1_234_567_890), "1_234_567_890");
419    }
420
421    #[test]
422    fn test_serialize_endianness() {
423        assert_eq!(
424            serialize_endianness(Endianness::Little),
425            "Endianness::Little"
426        );
427        assert_eq!(serialize_endianness(Endianness::Big), "Endianness::Big");
428        assert_eq!(
429            serialize_endianness(Endianness::Native),
430            "Endianness::Native"
431        );
432    }
433
434    #[test]
435    fn test_format_byte_vec_empty() {
436        let result = format_byte_vec(&[]);
437        assert_eq!(result, "vec![]");
438    }
439
440    #[test]
441    fn test_format_byte_vec_single() {
442        let result = format_byte_vec(&[0x42]);
443        assert_eq!(result, "vec![0x42]");
444    }
445
446    #[test]
447    fn test_format_byte_vec_multiple() {
448        let result = format_byte_vec(&[0x12, 0x34, 0x56]);
449        assert_eq!(result, "vec![0x12, 0x34, 0x56]");
450    }
451
452    #[test]
453    fn test_format_string_literal() {
454        assert_eq!(format_string_literal("hello"), "\"hello\"");
455        assert_eq!(format_string_literal("test\n"), "\"test\\n\"");
456        assert_eq!(format_string_literal("quote\"here"), "\"quote\\\"here\"");
457    }
458
459    #[test]
460    fn test_generate_builtin_rules_empty() {
461        let rules: Vec<MagicRule> = vec![];
462        let generated = generate_builtin_rules(&rules);
463
464        assert!(generated.contains("LazyLock<Vec<MagicRule>>"));
465        assert!(generated.contains("vec![]") || generated.contains("vec!["));
466        assert!(generated.contains("use crate::parser::ast"));
467        assert!(generated.contains("use std::sync::LazyLock"));
468    }
469
470    #[test]
471    fn test_generate_builtin_rules_single_rule() {
472        let rule = MagicRule {
473            offset: OffsetSpec::Absolute(0),
474            typ: TypeKind::Byte { signed: true },
475            op: Operator::Equal,
476            value: Value::Uint(0x7F),
477            message: "test".to_string(),
478            children: vec![],
479            level: 0,
480            strength_modifier: None,
481            value_transform: None,
482        };
483
484        let generated = generate_builtin_rules(&[rule]);
485
486        assert!(generated.contains("OffsetSpec::Absolute(0)"));
487        assert!(generated.contains("TypeKind::Byte { signed: true }"));
488        assert!(generated.contains("Operator::Equal"));
489        assert!(generated.contains("Value::Uint(127)"));
490        assert!(generated.contains("test"));
491        assert!(generated.contains("level: 0"));
492    }
493
494    #[test]
495    fn test_serialize_children_empty() {
496        let result = serialize_children(&[], 4);
497        assert_eq!(result, "Vec::new()");
498    }
499
500    #[test]
501    fn test_serialize_children_with_nested_rule() {
502        let child = MagicRule {
503            offset: OffsetSpec::Absolute(4),
504            typ: TypeKind::Byte { signed: true },
505            op: Operator::Equal,
506            value: Value::Uint(1),
507            message: "child".to_string(),
508            children: vec![],
509            level: 1,
510            strength_modifier: None,
511            value_transform: None,
512        };
513
514        let result = serialize_children(&[child], 4);
515
516        assert!(result.contains("vec!["));
517        assert!(result.contains("OffsetSpec::Absolute(4)"));
518        assert!(result.contains("level: 1"));
519        assert!(result.contains("child"));
520    }
521
522    // Tests for invalid magic file parsing failure path
523    #[test]
524    fn test_parse_and_generate_invalid_syntax() {
525        let invalid_magic = "this is not valid magic syntax";
526        let result = parse_and_generate_builtin_rules(invalid_magic);
527
528        assert!(result.is_err());
529        let error = result.unwrap_err();
530        let formatted = format_parse_error(&error);
531        assert!(formatted.contains("builtin_rules.magic"));
532    }
533
534    #[test]
535    fn test_parse_and_generate_invalid_offset() {
536        let invalid_magic = "999999999999999999999 byte =0x7F ELF";
537        let result = parse_and_generate_builtin_rules(invalid_magic);
538
539        assert!(result.is_err());
540        let error = result.unwrap_err();
541        let formatted = format_parse_error(&error);
542        assert!(formatted.contains("builtin_rules.magic"));
543    }
544
545    #[test]
546    fn test_parse_and_generate_invalid_type() {
547        let invalid_magic = "0 invalidtype =0x7F test";
548        let result = parse_and_generate_builtin_rules(invalid_magic);
549
550        assert!(result.is_err());
551        let error = result.unwrap_err();
552        let formatted = format_parse_error(&error);
553        assert!(formatted.contains("builtin_rules.magic"));
554    }
555
556    #[test]
557    fn test_parse_and_generate_empty_input() {
558        let empty_magic = "";
559        let result = parse_and_generate_builtin_rules(empty_magic);
560
561        // Empty input should succeed with no rules
562        assert!(result.is_ok());
563        let generated = result.unwrap();
564        assert!(generated.contains("vec![]") || generated.contains("vec!["));
565    }
566
567    #[test]
568    fn test_parse_and_generate_valid_magic() {
569        let valid_magic = "0 byte =0x7F ELF executable";
570        let result = parse_and_generate_builtin_rules(valid_magic);
571
572        assert!(result.is_ok());
573        let generated = result.unwrap();
574        assert!(generated.contains("OffsetSpec::Absolute(0)"));
575        assert!(generated.contains("TypeKind::Byte { signed: true }"));
576        assert!(generated.contains("Value::Uint(127)"));
577        assert!(generated.contains("ELF executable"));
578    }
579
580    #[test]
581    fn test_parse_and_generate_malformed_value() {
582        let invalid_magic = "0 byte =notahexvalue test";
583        let result = parse_and_generate_builtin_rules(invalid_magic);
584
585        assert!(result.is_err());
586        let error = result.unwrap_err();
587        let formatted = format_parse_error(&error);
588        assert!(formatted.contains("builtin_rules.magic"));
589    }
590
591    /// Regression test for PR #215 Copilot review comment: the codegen
592    /// for `TypeKind::Regex` and `TypeKind::Search` must NOT emit
593    /// `.expect("nonzero")` (a panic marker banned by AGENTS.md for
594    /// library code, which is what the generated `builtin_rules.rs`
595    /// becomes). Instead it must emit
596    /// `.unwrap_or(NonZero::<..>::MIN)`, which preserves the invariant
597    /// expression without introducing a panic path.
598    #[test]
599    fn test_serialize_regex_codegen_has_no_expect_panic_marker() {
600        use crate::parser::ast::{RegexCount, RegexFlags};
601        use std::num::NonZeroU32;
602
603        let cases = [
604            TypeKind::Regex {
605                flags: RegexFlags::default(),
606                count: RegexCount::Bytes(NonZeroU32::new(256).unwrap()),
607            },
608            TypeKind::Regex {
609                flags: RegexFlags::default(),
610                count: RegexCount::Lines(Some(NonZeroU32::new(3).unwrap())),
611            },
612        ];
613        for typ in &cases {
614            let generated = serialize_type_kind(typ);
615            assert!(
616                !generated.contains(".expect("),
617                "serialize_type_kind must not emit .expect() (AGENTS.md panic-marker ban); got:\n{generated}"
618            );
619            assert!(
620                generated.contains(".unwrap_or(::std::num::NonZeroU32::MIN)"),
621                "serialize_type_kind must emit .unwrap_or(NonZeroU32::MIN); got:\n{generated}"
622            );
623        }
624    }
625
626    #[test]
627    fn test_serialize_search_codegen_has_no_expect_panic_marker() {
628        use std::num::NonZeroUsize;
629
630        let typ = TypeKind::Search {
631            range: NonZeroUsize::new(512).unwrap(),
632        };
633        let generated = serialize_type_kind(&typ);
634        assert!(
635            !generated.contains(".expect("),
636            "serialize_type_kind must not emit .expect() (AGENTS.md panic-marker ban); got:\n{generated}"
637        );
638        assert!(
639            generated.contains(".unwrap_or(::std::num::NonZeroUsize::MIN)"),
640            "serialize_type_kind must emit .unwrap_or(NonZeroUsize::MIN); got:\n{generated}"
641        );
642    }
643}