Skip to main content

libmagic_rs/
build_helpers.rs

1// Copyright (c) 2025-2026 the libmagic-rs contributors
2// SPDX-License-Identifier: Apache-2.0
3
4/// Build-time helpers for compiling magic rules.
5///
6/// This module contains functionality used by the build script to parse magic files
7/// and generate Rust code for built-in rules. It is extracted into a library module
8/// to enable comprehensive testing of the build process, including error cases.
9///
10/// Serialization logic is provided by [`crate::parser::codegen`], which is shared
11/// with `build.rs` to avoid duplication.
12use crate::error::ParseError;
13use crate::parser::parse_text_magic_file;
14
15// Re-export codegen functions used by tests
16#[cfg(test)]
17use crate::parser::codegen::{
18    format_byte_vec, format_number, generate_builtin_rules, serialize_children,
19    serialize_endianness, serialize_offset_spec, serialize_operator, serialize_type_kind,
20    serialize_value,
21};
22
23/// Parses a magic file and generates Rust code for the built-in rules.
24///
25/// This function wraps the parsing and code generation steps, providing a testable
26/// interface for the build script logic.
27///
28/// # Errors
29///
30/// Returns a `ParseError` if the magic file content is invalid or malformed.
31pub fn parse_and_generate_builtin_rules(magic_content: &str) -> Result<String, ParseError> {
32    let rules = parse_text_magic_file(magic_content)?;
33    Ok(crate::parser::codegen::generate_builtin_rules(&rules))
34}
35
36/// Formats a parse error for display in build script output.
37///
38/// This function converts a `ParseError` into a human-readable message suitable
39/// for display when the build script fails.
40#[must_use]
41pub fn format_parse_error(error: &ParseError) -> String {
42    match error {
43        ParseError::InvalidSyntax { line, message } => {
44            format!("Error parsing builtin_rules.magic at line {line}: {message}")
45        }
46        ParseError::UnsupportedFeature { line, feature } => {
47            format!("Error parsing builtin_rules.magic at line {line}: {feature}")
48        }
49        ParseError::InvalidOffset { line, offset } => {
50            format!("Error parsing builtin_rules.magic at line {line}: {offset}")
51        }
52        ParseError::InvalidType { line, type_spec } => {
53            format!("Error parsing builtin_rules.magic at line {line}: {type_spec}")
54        }
55        ParseError::InvalidOperator { line, operator } => {
56            format!("Error parsing builtin_rules.magic at line {line}: {operator}")
57        }
58        ParseError::InvalidValue { line, value } => {
59            format!("Error parsing builtin_rules.magic at line {line}: {value}")
60        }
61        ParseError::UnsupportedFormat {
62            line,
63            format_type,
64            message,
65        } => format!("Error parsing builtin_rules.magic at line {line}: {format_type} {message}"),
66        ParseError::IoError(err) => {
67            format!("Error parsing builtin_rules.magic: I/O error: {err}")
68        }
69    }
70}
71
72#[cfg(test)]
73mod tests {
74    use super::*;
75    use crate::parser::ast::{Endianness, MagicRule, OffsetSpec, Operator, TypeKind, Value};
76    use crate::parser::codegen::format_string_literal;
77
78    #[test]
79    fn test_format_parse_error_invalid_syntax() {
80        let error = ParseError::InvalidSyntax {
81            line: 42,
82            message: "expected offset".to_string(),
83        };
84        let formatted = format_parse_error(&error);
85        assert!(formatted.contains("line 42"));
86        assert!(formatted.contains("expected offset"));
87        assert!(formatted.contains("builtin_rules.magic"));
88    }
89
90    #[test]
91    fn test_format_parse_error_unsupported_feature() {
92        let error = ParseError::UnsupportedFeature {
93            line: 10,
94            feature: "regex patterns".to_string(),
95        };
96        let formatted = format_parse_error(&error);
97        assert!(formatted.contains("line 10"));
98        assert!(formatted.contains("regex patterns"));
99    }
100
101    #[test]
102    fn test_format_parse_error_invalid_offset() {
103        let error = ParseError::InvalidOffset {
104            line: 5,
105            offset: "invalid offset spec".to_string(),
106        };
107        let formatted = format_parse_error(&error);
108        assert!(formatted.contains("line 5"));
109        assert!(formatted.contains("invalid offset spec"));
110    }
111
112    #[test]
113    fn test_format_parse_error_invalid_type() {
114        let error = ParseError::InvalidType {
115            line: 7,
116            type_spec: "unknown type".to_string(),
117        };
118        let formatted = format_parse_error(&error);
119        assert!(formatted.contains("line 7"));
120        assert!(formatted.contains("unknown type"));
121    }
122
123    #[test]
124    fn test_format_parse_error_invalid_operator() {
125        let error = ParseError::InvalidOperator {
126            line: 12,
127            operator: "bad operator".to_string(),
128        };
129        let formatted = format_parse_error(&error);
130        assert!(formatted.contains("line 12"));
131        assert!(formatted.contains("bad operator"));
132    }
133
134    #[test]
135    fn test_format_parse_error_invalid_value() {
136        let error = ParseError::InvalidValue {
137            line: 15,
138            value: "malformed value".to_string(),
139        };
140        let formatted = format_parse_error(&error);
141        assert!(formatted.contains("line 15"));
142        assert!(formatted.contains("malformed value"));
143    }
144
145    #[test]
146    fn test_serialize_offset_spec_absolute() {
147        let offset = OffsetSpec::Absolute(42);
148        let serialized = serialize_offset_spec(&offset);
149        assert_eq!(serialized, "OffsetSpec::Absolute(42)");
150    }
151
152    #[test]
153    fn test_serialize_offset_spec_relative() {
154        let offset = OffsetSpec::Relative(-10);
155        let serialized = serialize_offset_spec(&offset);
156        assert_eq!(serialized, "OffsetSpec::Relative(-10)");
157    }
158
159    #[test]
160    fn test_serialize_offset_spec_from_end() {
161        let offset = OffsetSpec::FromEnd(-16);
162        let serialized = serialize_offset_spec(&offset);
163        assert_eq!(serialized, "OffsetSpec::FromEnd(-16)");
164    }
165
166    #[test]
167    fn test_serialize_type_kind_byte() {
168        let signed = TypeKind::Byte { signed: true };
169        assert_eq!(
170            serialize_type_kind(&signed),
171            "TypeKind::Byte { signed: true }"
172        );
173        let unsigned = TypeKind::Byte { signed: false };
174        assert_eq!(
175            serialize_type_kind(&unsigned),
176            "TypeKind::Byte { signed: false }"
177        );
178    }
179
180    #[test]
181    fn test_serialize_type_kind_short() {
182        let typ = TypeKind::Short {
183            endian: Endianness::Little,
184            signed: false,
185        };
186        let serialized = serialize_type_kind(&typ);
187        assert!(serialized.contains("TypeKind::Short"));
188        assert!(serialized.contains("Endianness::Little"));
189        assert!(serialized.contains("signed: false"));
190    }
191
192    #[test]
193    fn test_serialize_type_kind_long() {
194        let typ = TypeKind::Long {
195            endian: Endianness::Big,
196            signed: true,
197        };
198        let serialized = serialize_type_kind(&typ);
199        assert!(serialized.contains("TypeKind::Long"));
200        assert!(serialized.contains("Endianness::Big"));
201        assert!(serialized.contains("signed: true"));
202    }
203
204    #[test]
205    fn test_serialize_type_kind_quad() {
206        let typ = TypeKind::Quad {
207            endian: Endianness::Little,
208            signed: true,
209        };
210        let serialized = serialize_type_kind(&typ);
211        assert!(serialized.contains("TypeKind::Quad"));
212        assert!(serialized.contains("Endianness::Little"));
213        assert!(serialized.contains("signed: true"));
214
215        let typ2 = TypeKind::Quad {
216            endian: Endianness::Big,
217            signed: false,
218        };
219        let serialized2 = serialize_type_kind(&typ2);
220        assert!(serialized2.contains("TypeKind::Quad"));
221        assert!(serialized2.contains("Endianness::Big"));
222        assert!(serialized2.contains("signed: false"));
223    }
224
225    #[test]
226    fn test_serialize_type_kind_string() {
227        let typ1 = TypeKind::String { max_length: None };
228        let serialized1 = serialize_type_kind(&typ1);
229        assert_eq!(serialized1, "TypeKind::String { max_length: None }");
230
231        let typ2 = TypeKind::String {
232            max_length: Some(256),
233        };
234        let serialized2 = serialize_type_kind(&typ2);
235        assert_eq!(serialized2, "TypeKind::String { max_length: Some(256) }");
236    }
237
238    #[test]
239    fn test_serialize_operator() {
240        assert_eq!(serialize_operator(&Operator::Equal), "Operator::Equal");
241        assert_eq!(
242            serialize_operator(&Operator::NotEqual),
243            "Operator::NotEqual"
244        );
245        assert_eq!(
246            serialize_operator(&Operator::LessThan),
247            "Operator::LessThan"
248        );
249        assert_eq!(
250            serialize_operator(&Operator::GreaterThan),
251            "Operator::GreaterThan"
252        );
253        assert_eq!(
254            serialize_operator(&Operator::LessEqual),
255            "Operator::LessEqual"
256        );
257        assert_eq!(
258            serialize_operator(&Operator::GreaterEqual),
259            "Operator::GreaterEqual"
260        );
261        assert_eq!(
262            serialize_operator(&Operator::BitwiseAnd),
263            "Operator::BitwiseAnd"
264        );
265        assert_eq!(
266            serialize_operator(&Operator::BitwiseAndMask(0xFF)),
267            "Operator::BitwiseAndMask(255)"
268        );
269    }
270
271    #[test]
272    fn test_serialize_value_uint() {
273        let value = Value::Uint(12345);
274        let serialized = serialize_value(&value);
275        assert_eq!(serialized, "Value::Uint(12_345)");
276    }
277
278    #[test]
279    fn test_serialize_value_int() {
280        let value = Value::Int(-100);
281        let serialized = serialize_value(&value);
282        assert!(serialized.contains("Value::Int"));
283    }
284
285    #[test]
286    fn test_serialize_value_bytes() {
287        let value = Value::Bytes(vec![0x7F, 0x45, 0x4C, 0x46]);
288        let serialized = serialize_value(&value);
289        assert_eq!(serialized, "Value::Bytes(vec![0x7f, 0x45, 0x4c, 0x46])");
290    }
291
292    #[test]
293    fn test_serialize_value_string() {
294        let value = Value::String("test".to_string());
295        let serialized = serialize_value(&value);
296        assert!(serialized.contains("Value::String"));
297        assert!(serialized.contains("test"));
298    }
299
300    #[test]
301    fn test_format_number_small() {
302        assert_eq!(format_number(42), "42");
303        assert_eq!(format_number(999), "999");
304        assert_eq!(format_number(9999), "9999");
305    }
306
307    #[test]
308    fn test_format_number_large() {
309        assert_eq!(format_number(10000), "10_000");
310        assert_eq!(format_number(123_456), "123_456");
311        assert_eq!(format_number(1_234_567_890), "1_234_567_890");
312    }
313
314    #[test]
315    fn test_serialize_endianness() {
316        assert_eq!(
317            serialize_endianness(Endianness::Little),
318            "Endianness::Little"
319        );
320        assert_eq!(serialize_endianness(Endianness::Big), "Endianness::Big");
321        assert_eq!(
322            serialize_endianness(Endianness::Native),
323            "Endianness::Native"
324        );
325    }
326
327    #[test]
328    fn test_format_byte_vec_empty() {
329        let result = format_byte_vec(&[]);
330        assert_eq!(result, "vec![]");
331    }
332
333    #[test]
334    fn test_format_byte_vec_single() {
335        let result = format_byte_vec(&[0x42]);
336        assert_eq!(result, "vec![0x42]");
337    }
338
339    #[test]
340    fn test_format_byte_vec_multiple() {
341        let result = format_byte_vec(&[0x12, 0x34, 0x56]);
342        assert_eq!(result, "vec![0x12, 0x34, 0x56]");
343    }
344
345    #[test]
346    fn test_format_string_literal() {
347        assert_eq!(format_string_literal("hello"), "\"hello\"");
348        assert_eq!(format_string_literal("test\n"), "\"test\\n\"");
349        assert_eq!(format_string_literal("quote\"here"), "\"quote\\\"here\"");
350    }
351
352    #[test]
353    fn test_generate_builtin_rules_empty() {
354        let rules: Vec<MagicRule> = vec![];
355        let generated = generate_builtin_rules(&rules);
356
357        assert!(generated.contains("LazyLock<Vec<MagicRule>>"));
358        assert!(generated.contains("vec![]") || generated.contains("vec!["));
359        assert!(generated.contains("use crate::parser::ast"));
360        assert!(generated.contains("use std::sync::LazyLock"));
361    }
362
363    #[test]
364    fn test_generate_builtin_rules_single_rule() {
365        let rule = MagicRule {
366            offset: OffsetSpec::Absolute(0),
367            typ: TypeKind::Byte { signed: true },
368            op: Operator::Equal,
369            value: Value::Uint(0x7F),
370            message: "test".to_string(),
371            children: vec![],
372            level: 0,
373            strength_modifier: None,
374        };
375
376        let generated = generate_builtin_rules(&[rule]);
377
378        assert!(generated.contains("OffsetSpec::Absolute(0)"));
379        assert!(generated.contains("TypeKind::Byte { signed: true }"));
380        assert!(generated.contains("Operator::Equal"));
381        assert!(generated.contains("Value::Uint(127)"));
382        assert!(generated.contains("test"));
383        assert!(generated.contains("level: 0"));
384    }
385
386    #[test]
387    fn test_serialize_children_empty() {
388        let result = serialize_children(&[], 4);
389        assert_eq!(result, "Vec::new()");
390    }
391
392    #[test]
393    fn test_serialize_children_with_nested_rule() {
394        let child = MagicRule {
395            offset: OffsetSpec::Absolute(4),
396            typ: TypeKind::Byte { signed: true },
397            op: Operator::Equal,
398            value: Value::Uint(1),
399            message: "child".to_string(),
400            children: vec![],
401            level: 1,
402            strength_modifier: None,
403        };
404
405        let result = serialize_children(&[child], 4);
406
407        assert!(result.contains("vec!["));
408        assert!(result.contains("OffsetSpec::Absolute(4)"));
409        assert!(result.contains("level: 1"));
410        assert!(result.contains("child"));
411    }
412
413    // Tests for invalid magic file parsing failure path
414    #[test]
415    fn test_parse_and_generate_invalid_syntax() {
416        let invalid_magic = "this is not valid magic syntax";
417        let result = parse_and_generate_builtin_rules(invalid_magic);
418
419        assert!(result.is_err());
420        let error = result.unwrap_err();
421        let formatted = format_parse_error(&error);
422        assert!(formatted.contains("builtin_rules.magic"));
423    }
424
425    #[test]
426    fn test_parse_and_generate_invalid_offset() {
427        let invalid_magic = "999999999999999999999 byte =0x7F ELF";
428        let result = parse_and_generate_builtin_rules(invalid_magic);
429
430        assert!(result.is_err());
431        let error = result.unwrap_err();
432        let formatted = format_parse_error(&error);
433        assert!(formatted.contains("builtin_rules.magic"));
434    }
435
436    #[test]
437    fn test_parse_and_generate_invalid_type() {
438        let invalid_magic = "0 invalidtype =0x7F test";
439        let result = parse_and_generate_builtin_rules(invalid_magic);
440
441        assert!(result.is_err());
442        let error = result.unwrap_err();
443        let formatted = format_parse_error(&error);
444        assert!(formatted.contains("builtin_rules.magic"));
445    }
446
447    #[test]
448    fn test_parse_and_generate_empty_input() {
449        let empty_magic = "";
450        let result = parse_and_generate_builtin_rules(empty_magic);
451
452        // Empty input should succeed with no rules
453        assert!(result.is_ok());
454        let generated = result.unwrap();
455        assert!(generated.contains("vec![]") || generated.contains("vec!["));
456    }
457
458    #[test]
459    fn test_parse_and_generate_valid_magic() {
460        let valid_magic = "0 byte =0x7F ELF executable";
461        let result = parse_and_generate_builtin_rules(valid_magic);
462
463        assert!(result.is_ok());
464        let generated = result.unwrap();
465        assert!(generated.contains("OffsetSpec::Absolute(0)"));
466        assert!(generated.contains("TypeKind::Byte { signed: true }"));
467        assert!(generated.contains("Value::Uint(127)"));
468        assert!(generated.contains("ELF executable"));
469    }
470
471    #[test]
472    fn test_parse_and_generate_malformed_value() {
473        let invalid_magic = "0 byte =notahexvalue test";
474        let result = parse_and_generate_builtin_rules(invalid_magic);
475
476        assert!(result.is_err());
477        let error = result.unwrap_err();
478        let formatted = format_parse_error(&error);
479        assert!(formatted.contains("builtin_rules.magic"));
480    }
481}