Skip to main content

libmagic_rs/
build_helpers.rs

1// Copyright (c) 2025-2026 the libmagic-rs contributors
2// SPDX-License-Identifier: Apache-2.0
3
4/// Build-time helpers for compiling magic rules.
5///
6/// This module contains functionality used by the build script to parse magic files
7/// and generate Rust code for built-in rules. It is extracted into a library module
8/// to enable comprehensive testing of the build process, including error cases.
9///
10/// Serialization logic is provided by [`crate::parser::codegen`], which is shared
11/// with `build.rs` to avoid duplication.
12use crate::error::ParseError;
13use crate::parser::parse_text_magic_file;
14
15// Re-export codegen functions used by tests
16#[cfg(test)]
17use crate::parser::codegen::{
18    format_byte_vec, format_number, generate_builtin_rules, serialize_children,
19    serialize_endianness, serialize_offset_spec, serialize_operator, serialize_type_kind,
20    serialize_value,
21};
22
23/// Parses a magic file and generates Rust code for the built-in rules.
24///
25/// This function wraps the parsing and code generation steps, providing a testable
26/// interface for the build script logic.
27///
28/// # Errors
29///
30/// Returns a `ParseError` if the magic file content is invalid or malformed.
31pub fn parse_and_generate_builtin_rules(magic_content: &str) -> Result<String, ParseError> {
32    let rules = parse_text_magic_file(magic_content)?;
33    Ok(crate::parser::codegen::generate_builtin_rules(&rules))
34}
35
36/// Formats a parse error for display in build script output.
37///
38/// This function converts a `ParseError` into a human-readable message suitable
39/// for display when the build script fails.
40#[must_use]
41pub fn format_parse_error(error: &ParseError) -> String {
42    match error {
43        ParseError::InvalidSyntax { line, message } => {
44            format!("Error parsing builtin_rules.magic at line {line}: {message}")
45        }
46        ParseError::UnsupportedFeature { line, feature } => {
47            format!("Error parsing builtin_rules.magic at line {line}: {feature}")
48        }
49        ParseError::InvalidOffset { line, offset } => {
50            format!("Error parsing builtin_rules.magic at line {line}: {offset}")
51        }
52        ParseError::InvalidType { line, type_spec } => {
53            format!("Error parsing builtin_rules.magic at line {line}: {type_spec}")
54        }
55        ParseError::InvalidOperator { line, operator } => {
56            format!("Error parsing builtin_rules.magic at line {line}: {operator}")
57        }
58        ParseError::InvalidValue { line, value } => {
59            format!("Error parsing builtin_rules.magic at line {line}: {value}")
60        }
61        ParseError::UnsupportedFormat {
62            line,
63            format_type,
64            message,
65        } => format!("Error parsing builtin_rules.magic at line {line}: {format_type} {message}"),
66        ParseError::IoError(err) => {
67            format!("Error parsing builtin_rules.magic: I/O error: {err}")
68        }
69    }
70}
71
72#[cfg(test)]
73mod tests {
74    use super::*;
75    use crate::parser::ast::{Endianness, MagicRule, OffsetSpec, Operator, TypeKind, Value};
76    use crate::parser::codegen::format_string_literal;
77
78    #[test]
79    fn test_format_parse_error_invalid_syntax() {
80        let error = ParseError::InvalidSyntax {
81            line: 42,
82            message: "expected offset".to_string(),
83        };
84        let formatted = format_parse_error(&error);
85        assert!(formatted.contains("line 42"));
86        assert!(formatted.contains("expected offset"));
87        assert!(formatted.contains("builtin_rules.magic"));
88    }
89
90    #[test]
91    fn test_format_parse_error_unsupported_feature() {
92        let error = ParseError::UnsupportedFeature {
93            line: 10,
94            feature: "regex patterns".to_string(),
95        };
96        let formatted = format_parse_error(&error);
97        assert!(formatted.contains("line 10"));
98        assert!(formatted.contains("regex patterns"));
99    }
100
101    #[test]
102    fn test_format_parse_error_invalid_offset() {
103        let error = ParseError::InvalidOffset {
104            line: 5,
105            offset: "invalid offset spec".to_string(),
106        };
107        let formatted = format_parse_error(&error);
108        assert!(formatted.contains("line 5"));
109        assert!(formatted.contains("invalid offset spec"));
110    }
111
112    #[test]
113    fn test_format_parse_error_invalid_type() {
114        let error = ParseError::InvalidType {
115            line: 7,
116            type_spec: "unknown type".to_string(),
117        };
118        let formatted = format_parse_error(&error);
119        assert!(formatted.contains("line 7"));
120        assert!(formatted.contains("unknown type"));
121    }
122
123    #[test]
124    fn test_format_parse_error_invalid_operator() {
125        let error = ParseError::InvalidOperator {
126            line: 12,
127            operator: "bad operator".to_string(),
128        };
129        let formatted = format_parse_error(&error);
130        assert!(formatted.contains("line 12"));
131        assert!(formatted.contains("bad operator"));
132    }
133
134    #[test]
135    fn test_format_parse_error_invalid_value() {
136        let error = ParseError::InvalidValue {
137            line: 15,
138            value: "malformed value".to_string(),
139        };
140        let formatted = format_parse_error(&error);
141        assert!(formatted.contains("line 15"));
142        assert!(formatted.contains("malformed value"));
143    }
144
145    #[test]
146    fn test_serialize_offset_spec_absolute() {
147        let offset = OffsetSpec::Absolute(42);
148        let serialized = serialize_offset_spec(&offset);
149        assert_eq!(serialized, "OffsetSpec::Absolute(42)");
150    }
151
152    #[test]
153    fn test_serialize_offset_spec_relative() {
154        let offset = OffsetSpec::Relative(-10);
155        let serialized = serialize_offset_spec(&offset);
156        assert_eq!(serialized, "OffsetSpec::Relative(-10)");
157    }
158
159    #[test]
160    fn test_serialize_offset_spec_from_end() {
161        let offset = OffsetSpec::FromEnd(-16);
162        let serialized = serialize_offset_spec(&offset);
163        assert_eq!(serialized, "OffsetSpec::FromEnd(-16)");
164    }
165
166    #[test]
167    fn test_serialize_type_kind_byte() {
168        let signed = TypeKind::Byte { signed: true };
169        assert_eq!(
170            serialize_type_kind(&signed),
171            "TypeKind::Byte { signed: true }"
172        );
173        let unsigned = TypeKind::Byte { signed: false };
174        assert_eq!(
175            serialize_type_kind(&unsigned),
176            "TypeKind::Byte { signed: false }"
177        );
178    }
179
180    #[test]
181    fn test_serialize_type_kind_short() {
182        let typ = TypeKind::Short {
183            endian: Endianness::Little,
184            signed: false,
185        };
186        let serialized = serialize_type_kind(&typ);
187        assert!(serialized.contains("TypeKind::Short"));
188        assert!(serialized.contains("Endianness::Little"));
189        assert!(serialized.contains("signed: false"));
190    }
191
192    #[test]
193    fn test_serialize_type_kind_long() {
194        let typ = TypeKind::Long {
195            endian: Endianness::Big,
196            signed: true,
197        };
198        let serialized = serialize_type_kind(&typ);
199        assert!(serialized.contains("TypeKind::Long"));
200        assert!(serialized.contains("Endianness::Big"));
201        assert!(serialized.contains("signed: true"));
202    }
203
204    #[test]
205    fn test_serialize_type_kind_quad() {
206        let typ = TypeKind::Quad {
207            endian: Endianness::Little,
208            signed: true,
209        };
210        let serialized = serialize_type_kind(&typ);
211        assert!(serialized.contains("TypeKind::Quad"));
212        assert!(serialized.contains("Endianness::Little"));
213        assert!(serialized.contains("signed: true"));
214
215        let typ2 = TypeKind::Quad {
216            endian: Endianness::Big,
217            signed: false,
218        };
219        let serialized2 = serialize_type_kind(&typ2);
220        assert!(serialized2.contains("TypeKind::Quad"));
221        assert!(serialized2.contains("Endianness::Big"));
222        assert!(serialized2.contains("signed: false"));
223    }
224
225    #[test]
226    fn test_serialize_type_kind_float() {
227        let cases = [
228            (
229                TypeKind::Float {
230                    endian: Endianness::Native,
231                },
232                "TypeKind::Float { endian: Endianness::Native }",
233            ),
234            (
235                TypeKind::Float {
236                    endian: Endianness::Little,
237                },
238                "TypeKind::Float { endian: Endianness::Little }",
239            ),
240            (
241                TypeKind::Float {
242                    endian: Endianness::Big,
243                },
244                "TypeKind::Float { endian: Endianness::Big }",
245            ),
246        ];
247        for (typ, expected) in &cases {
248            assert_eq!(serialize_type_kind(typ), *expected);
249        }
250    }
251
252    #[test]
253    fn test_serialize_type_kind_double() {
254        let cases = [
255            (
256                TypeKind::Double {
257                    endian: Endianness::Native,
258                },
259                "TypeKind::Double { endian: Endianness::Native }",
260            ),
261            (
262                TypeKind::Double {
263                    endian: Endianness::Little,
264                },
265                "TypeKind::Double { endian: Endianness::Little }",
266            ),
267            (
268                TypeKind::Double {
269                    endian: Endianness::Big,
270                },
271                "TypeKind::Double { endian: Endianness::Big }",
272            ),
273        ];
274        for (typ, expected) in &cases {
275            assert_eq!(serialize_type_kind(typ), *expected);
276        }
277    }
278
279    #[test]
280    fn test_serialize_value_float() {
281        // Positive finite literal
282        let serialized = serialize_value(&Value::Float(3.125));
283        assert_eq!(serialized, "Value::Float(3.125)");
284
285        // Negative finite literal
286        let serialized = serialize_value(&Value::Float(-1.0));
287        assert_eq!(serialized, "Value::Float(-1.0)");
288
289        // Non-finite values produce valid Rust expressions
290        assert_eq!(
291            serialize_value(&Value::Float(f64::NAN)),
292            "Value::Float(f64::NAN)"
293        );
294        assert_eq!(
295            serialize_value(&Value::Float(f64::INFINITY)),
296            "Value::Float(f64::INFINITY)"
297        );
298        assert_eq!(
299            serialize_value(&Value::Float(f64::NEG_INFINITY)),
300            "Value::Float(f64::NEG_INFINITY)"
301        );
302    }
303
304    #[test]
305    fn test_serialize_type_kind_string() {
306        let typ1 = TypeKind::String { max_length: None };
307        let serialized1 = serialize_type_kind(&typ1);
308        assert_eq!(serialized1, "TypeKind::String { max_length: None }");
309
310        let typ2 = TypeKind::String {
311            max_length: Some(256),
312        };
313        let serialized2 = serialize_type_kind(&typ2);
314        assert_eq!(serialized2, "TypeKind::String { max_length: Some(256) }");
315    }
316
317    #[test]
318    fn test_serialize_operator() {
319        assert_eq!(serialize_operator(&Operator::Equal), "Operator::Equal");
320        assert_eq!(
321            serialize_operator(&Operator::NotEqual),
322            "Operator::NotEqual"
323        );
324        assert_eq!(
325            serialize_operator(&Operator::LessThan),
326            "Operator::LessThan"
327        );
328        assert_eq!(
329            serialize_operator(&Operator::GreaterThan),
330            "Operator::GreaterThan"
331        );
332        assert_eq!(
333            serialize_operator(&Operator::LessEqual),
334            "Operator::LessEqual"
335        );
336        assert_eq!(
337            serialize_operator(&Operator::GreaterEqual),
338            "Operator::GreaterEqual"
339        );
340        assert_eq!(
341            serialize_operator(&Operator::BitwiseAnd),
342            "Operator::BitwiseAnd"
343        );
344        assert_eq!(
345            serialize_operator(&Operator::BitwiseAndMask(0xFF)),
346            "Operator::BitwiseAndMask(255)"
347        );
348    }
349
350    #[test]
351    fn test_serialize_value_uint() {
352        let value = Value::Uint(12345);
353        let serialized = serialize_value(&value);
354        assert_eq!(serialized, "Value::Uint(12_345)");
355    }
356
357    #[test]
358    fn test_serialize_value_int() {
359        let value = Value::Int(-100);
360        let serialized = serialize_value(&value);
361        assert!(serialized.contains("Value::Int"));
362    }
363
364    #[test]
365    fn test_serialize_value_bytes() {
366        let value = Value::Bytes(vec![0x7F, 0x45, 0x4C, 0x46]);
367        let serialized = serialize_value(&value);
368        assert_eq!(serialized, "Value::Bytes(vec![0x7f, 0x45, 0x4c, 0x46])");
369    }
370
371    #[test]
372    fn test_serialize_value_string() {
373        let value = Value::String("test".to_string());
374        let serialized = serialize_value(&value);
375        assert!(serialized.contains("Value::String"));
376        assert!(serialized.contains("test"));
377    }
378
379    #[test]
380    fn test_format_number_small() {
381        assert_eq!(format_number(42), "42");
382        assert_eq!(format_number(999), "999");
383        assert_eq!(format_number(9999), "9999");
384    }
385
386    #[test]
387    fn test_format_number_large() {
388        assert_eq!(format_number(10000), "10_000");
389        assert_eq!(format_number(123_456), "123_456");
390        assert_eq!(format_number(1_234_567_890), "1_234_567_890");
391    }
392
393    #[test]
394    fn test_serialize_endianness() {
395        assert_eq!(
396            serialize_endianness(Endianness::Little),
397            "Endianness::Little"
398        );
399        assert_eq!(serialize_endianness(Endianness::Big), "Endianness::Big");
400        assert_eq!(
401            serialize_endianness(Endianness::Native),
402            "Endianness::Native"
403        );
404    }
405
406    #[test]
407    fn test_format_byte_vec_empty() {
408        let result = format_byte_vec(&[]);
409        assert_eq!(result, "vec![]");
410    }
411
412    #[test]
413    fn test_format_byte_vec_single() {
414        let result = format_byte_vec(&[0x42]);
415        assert_eq!(result, "vec![0x42]");
416    }
417
418    #[test]
419    fn test_format_byte_vec_multiple() {
420        let result = format_byte_vec(&[0x12, 0x34, 0x56]);
421        assert_eq!(result, "vec![0x12, 0x34, 0x56]");
422    }
423
424    #[test]
425    fn test_format_string_literal() {
426        assert_eq!(format_string_literal("hello"), "\"hello\"");
427        assert_eq!(format_string_literal("test\n"), "\"test\\n\"");
428        assert_eq!(format_string_literal("quote\"here"), "\"quote\\\"here\"");
429    }
430
431    #[test]
432    fn test_generate_builtin_rules_empty() {
433        let rules: Vec<MagicRule> = vec![];
434        let generated = generate_builtin_rules(&rules);
435
436        assert!(generated.contains("LazyLock<Vec<MagicRule>>"));
437        assert!(generated.contains("vec![]") || generated.contains("vec!["));
438        assert!(generated.contains("use crate::parser::ast"));
439        assert!(generated.contains("use std::sync::LazyLock"));
440    }
441
442    #[test]
443    fn test_generate_builtin_rules_single_rule() {
444        let rule = MagicRule {
445            offset: OffsetSpec::Absolute(0),
446            typ: TypeKind::Byte { signed: true },
447            op: Operator::Equal,
448            value: Value::Uint(0x7F),
449            message: "test".to_string(),
450            children: vec![],
451            level: 0,
452            strength_modifier: None,
453        };
454
455        let generated = generate_builtin_rules(&[rule]);
456
457        assert!(generated.contains("OffsetSpec::Absolute(0)"));
458        assert!(generated.contains("TypeKind::Byte { signed: true }"));
459        assert!(generated.contains("Operator::Equal"));
460        assert!(generated.contains("Value::Uint(127)"));
461        assert!(generated.contains("test"));
462        assert!(generated.contains("level: 0"));
463    }
464
465    #[test]
466    fn test_serialize_children_empty() {
467        let result = serialize_children(&[], 4);
468        assert_eq!(result, "Vec::new()");
469    }
470
471    #[test]
472    fn test_serialize_children_with_nested_rule() {
473        let child = MagicRule {
474            offset: OffsetSpec::Absolute(4),
475            typ: TypeKind::Byte { signed: true },
476            op: Operator::Equal,
477            value: Value::Uint(1),
478            message: "child".to_string(),
479            children: vec![],
480            level: 1,
481            strength_modifier: None,
482        };
483
484        let result = serialize_children(&[child], 4);
485
486        assert!(result.contains("vec!["));
487        assert!(result.contains("OffsetSpec::Absolute(4)"));
488        assert!(result.contains("level: 1"));
489        assert!(result.contains("child"));
490    }
491
492    // Tests for invalid magic file parsing failure path
493    #[test]
494    fn test_parse_and_generate_invalid_syntax() {
495        let invalid_magic = "this is not valid magic syntax";
496        let result = parse_and_generate_builtin_rules(invalid_magic);
497
498        assert!(result.is_err());
499        let error = result.unwrap_err();
500        let formatted = format_parse_error(&error);
501        assert!(formatted.contains("builtin_rules.magic"));
502    }
503
504    #[test]
505    fn test_parse_and_generate_invalid_offset() {
506        let invalid_magic = "999999999999999999999 byte =0x7F ELF";
507        let result = parse_and_generate_builtin_rules(invalid_magic);
508
509        assert!(result.is_err());
510        let error = result.unwrap_err();
511        let formatted = format_parse_error(&error);
512        assert!(formatted.contains("builtin_rules.magic"));
513    }
514
515    #[test]
516    fn test_parse_and_generate_invalid_type() {
517        let invalid_magic = "0 invalidtype =0x7F test";
518        let result = parse_and_generate_builtin_rules(invalid_magic);
519
520        assert!(result.is_err());
521        let error = result.unwrap_err();
522        let formatted = format_parse_error(&error);
523        assert!(formatted.contains("builtin_rules.magic"));
524    }
525
526    #[test]
527    fn test_parse_and_generate_empty_input() {
528        let empty_magic = "";
529        let result = parse_and_generate_builtin_rules(empty_magic);
530
531        // Empty input should succeed with no rules
532        assert!(result.is_ok());
533        let generated = result.unwrap();
534        assert!(generated.contains("vec![]") || generated.contains("vec!["));
535    }
536
537    #[test]
538    fn test_parse_and_generate_valid_magic() {
539        let valid_magic = "0 byte =0x7F ELF executable";
540        let result = parse_and_generate_builtin_rules(valid_magic);
541
542        assert!(result.is_ok());
543        let generated = result.unwrap();
544        assert!(generated.contains("OffsetSpec::Absolute(0)"));
545        assert!(generated.contains("TypeKind::Byte { signed: true }"));
546        assert!(generated.contains("Value::Uint(127)"));
547        assert!(generated.contains("ELF executable"));
548    }
549
550    #[test]
551    fn test_parse_and_generate_malformed_value() {
552        let invalid_magic = "0 byte =notahexvalue test";
553        let result = parse_and_generate_builtin_rules(invalid_magic);
554
555        assert!(result.is_err());
556        let error = result.unwrap_err();
557        let formatted = format_parse_error(&error);
558        assert!(formatted.contains("builtin_rules.magic"));
559    }
560}