1use crate::error::ParseError;
10use crate::parser::ast::{
11 Endianness, MagicRule, OffsetSpec, Operator, StrengthModifier, TypeKind, Value,
12};
13use crate::parser::parse_text_magic_file;
14
15const INDENT_WIDTH: usize = 4;
16
17pub fn parse_and_generate_builtin_rules(magic_content: &str) -> Result<String, ParseError> {
26 let rules = parse_text_magic_file(magic_content)?;
27 Ok(generate_builtin_rules(&rules))
28}
29
30#[must_use]
35pub fn format_parse_error(error: &ParseError) -> String {
36 match error {
37 ParseError::InvalidSyntax { line, message } => {
38 format!("Error parsing builtin_rules.magic at line {line}: {message}")
39 }
40 ParseError::UnsupportedFeature { line, feature } => {
41 format!("Error parsing builtin_rules.magic at line {line}: {feature}")
42 }
43 ParseError::InvalidOffset { line, offset } => {
44 format!("Error parsing builtin_rules.magic at line {line}: {offset}")
45 }
46 ParseError::InvalidType { line, type_spec } => {
47 format!("Error parsing builtin_rules.magic at line {line}: {type_spec}")
48 }
49 ParseError::InvalidOperator { line, operator } => {
50 format!("Error parsing builtin_rules.magic at line {line}: {operator}")
51 }
52 ParseError::InvalidValue { line, value } => {
53 format!("Error parsing builtin_rules.magic at line {line}: {value}")
54 }
55 ParseError::UnsupportedFormat {
56 line,
57 format_type,
58 message,
59 } => format!("Error parsing builtin_rules.magic at line {line}: {format_type} {message}"),
60 ParseError::IoError(err) => {
61 format!("Error parsing builtin_rules.magic: I/O error: {err}")
62 }
63 }
64}
65
66fn generate_builtin_rules(rules: &[MagicRule]) -> String {
67 let mut output = String::new();
68
69 push_line(&mut output, "#[allow(unused_imports)]");
71 push_line(
72 &mut output,
73 "use crate::parser::ast::{MagicRule, OffsetSpec, TypeKind, Operator, Value, Endianness, StrengthModifier};",
74 );
75 push_line(&mut output, "use std::sync::LazyLock;");
76 push_line(&mut output, "");
77 push_line(
78 &mut output,
79 "/// Built-in magic rules compiled at build time.",
80 );
81 push_line(&mut output, "///");
82 push_line(
83 &mut output,
84 "/// This static contains magic rules parsed from `src/builtin_rules.magic` during",
85 );
86 push_line(
87 &mut output,
88 "/// the build process. The rules are lazily initialized on first access.",
89 );
90 push_line(&mut output, "///");
91 push_line(
92 &mut output,
93 "/// Use [`get_builtin_rules()`] to access these rules instead of using this static directly.",
94 );
95 push_line(
96 &mut output,
97 "pub static BUILTIN_RULES: LazyLock<Vec<MagicRule>> = LazyLock::new(|| {",
98 );
99 push_line(&mut output, " vec![");
100
101 for rule in rules {
102 let serialized = serialize_magic_rule(rule, INDENT_WIDTH * 2);
103 output.push_str(&serialized);
104 output.push(',');
105 output.push('\n');
106 }
107
108 push_line(&mut output, " ]");
109 push_line(&mut output, "});\n");
110 output
111}
112
113fn serialize_magic_rule(rule: &MagicRule, indent: usize) -> String {
114 let mut output = String::new();
115
116 push_indent(&mut output, indent);
117 output.push_str("MagicRule {\n");
118
119 push_field(
120 &mut output,
121 indent + INDENT_WIDTH,
122 "offset",
123 &serialize_offset_spec(&rule.offset),
124 );
125 push_field(
126 &mut output,
127 indent + INDENT_WIDTH,
128 "typ",
129 &serialize_type_kind(&rule.typ),
130 );
131 push_field(
132 &mut output,
133 indent + INDENT_WIDTH,
134 "op",
135 &serialize_operator(&rule.op),
136 );
137 push_field(
138 &mut output,
139 indent + INDENT_WIDTH,
140 "value",
141 &serialize_value(&rule.value),
142 );
143 push_field(
144 &mut output,
145 indent + INDENT_WIDTH,
146 "message",
147 &format!("String::from({})", format_string_literal(&rule.message)),
148 );
149
150 push_indent(&mut output, indent + INDENT_WIDTH);
151 output.push_str("children: ");
152 output.push_str(&serialize_children(&rule.children, indent + INDENT_WIDTH));
153 output.push_str(",\n");
154
155 push_field(
156 &mut output,
157 indent + INDENT_WIDTH,
158 "level",
159 &rule.level.to_string(),
160 );
161
162 push_field(
163 &mut output,
164 indent + INDENT_WIDTH,
165 "strength_modifier",
166 &serialize_strength_modifier(rule.strength_modifier),
167 );
168
169 push_indent(&mut output, indent);
170 output.push('}');
171
172 output
173}
174
175fn serialize_children(children: &[MagicRule], indent: usize) -> String {
176 if children.is_empty() {
177 return "Vec::new()".to_string();
178 }
179
180 let mut output = String::new();
181 output.push_str("vec![\n");
182
183 for child in children {
184 let serialized = serialize_magic_rule(child, indent + INDENT_WIDTH);
185 output.push_str(&serialized);
186 output.push_str(",\n");
187 }
188
189 push_indent(&mut output, indent);
190 output.push(']');
191 output
192}
193
194fn serialize_offset_spec(offset: &OffsetSpec) -> String {
195 match offset {
196 OffsetSpec::Absolute(value) => format!("OffsetSpec::Absolute({value})"),
197 OffsetSpec::Indirect {
198 base_offset,
199 pointer_type,
200 adjustment,
201 endian,
202 } => format!(
203 "OffsetSpec::Indirect {{ base_offset: {base_offset}, pointer_type: {}, adjustment: {adjustment}, endian: {} }}",
204 serialize_type_kind(pointer_type),
205 serialize_endianness(*endian)
206 ),
207 OffsetSpec::Relative(value) => format!("OffsetSpec::Relative({value})"),
208 OffsetSpec::FromEnd(value) => format!("OffsetSpec::FromEnd({value})"),
209 }
210}
211
212fn serialize_type_kind(typ: &TypeKind) -> String {
213 match typ {
214 TypeKind::Byte => "TypeKind::Byte".to_string(),
215 TypeKind::Short { endian, signed } => format!(
216 "TypeKind::Short {{ endian: {}, signed: {} }}",
217 serialize_endianness(*endian),
218 signed
219 ),
220 TypeKind::Long { endian, signed } => format!(
221 "TypeKind::Long {{ endian: {}, signed: {} }}",
222 serialize_endianness(*endian),
223 signed
224 ),
225 TypeKind::String { max_length } => match max_length {
226 Some(value) => {
227 format!("TypeKind::String {{ max_length: Some({value}) }}")
228 }
229 None => "TypeKind::String { max_length: None }".to_string(),
230 },
231 }
232}
233
234fn serialize_operator(op: &Operator) -> String {
235 match op {
236 Operator::Equal => "Operator::Equal".to_string(),
237 Operator::NotEqual => "Operator::NotEqual".to_string(),
238 Operator::BitwiseAnd => "Operator::BitwiseAnd".to_string(),
239 Operator::BitwiseAndMask(mask) => format!("Operator::BitwiseAndMask({mask})"),
240 }
241}
242
243fn serialize_value(value: &Value) -> String {
244 match value {
245 Value::Uint(number) => format!("Value::Uint({})", format_number(*number)),
246 Value::Int(number) => format!("Value::Int({})", format_signed_number(*number)),
247 Value::Bytes(bytes) => format!("Value::Bytes({})", format_byte_vec(bytes)),
248 Value::String(text) => format!(
249 "Value::String(String::from({}))",
250 format_string_literal(text)
251 ),
252 }
253}
254
255fn format_number(num: u64) -> String {
257 if num < 10000 {
258 num.to_string()
259 } else {
260 let num_str = num.to_string();
261 let mut result = String::new();
262 let len = num_str.len();
263
264 for (i, ch) in num_str.chars().enumerate() {
265 if i > 0 && (len - i) % 3 == 0 {
266 result.push('_');
267 }
268 result.push(ch);
269 }
270 result
271 }
272}
273
274fn format_signed_number(num: i64) -> String {
276 if num < 0 {
277 let abs = num.unsigned_abs();
278 format!("-{}", format_number(abs))
279 } else {
280 format_number(num.unsigned_abs())
282 }
283}
284
285fn serialize_endianness(endian: Endianness) -> String {
286 match endian {
287 Endianness::Little => "Endianness::Little".to_string(),
288 Endianness::Big => "Endianness::Big".to_string(),
289 Endianness::Native => "Endianness::Native".to_string(),
290 }
291}
292
293fn serialize_strength_modifier(modifier: Option<StrengthModifier>) -> String {
294 match modifier {
295 None => "None".to_string(),
296 Some(StrengthModifier::Add(val)) => format!("Some(StrengthModifier::Add({val}))"),
297 Some(StrengthModifier::Subtract(val)) => format!("Some(StrengthModifier::Subtract({val}))"),
298 Some(StrengthModifier::Multiply(val)) => format!("Some(StrengthModifier::Multiply({val}))"),
299 Some(StrengthModifier::Divide(val)) => format!("Some(StrengthModifier::Divide({val}))"),
300 Some(StrengthModifier::Set(val)) => format!("Some(StrengthModifier::Set({val}))"),
301 }
302}
303
304fn format_byte_vec(bytes: &[u8]) -> String {
305 use std::fmt::Write;
306
307 if bytes.is_empty() {
308 return "vec![]".to_string();
309 }
310
311 let mut output = String::from("vec![");
312 for (index, byte) in bytes.iter().enumerate() {
313 if index > 0 {
314 output.push_str(", ");
315 }
316 write!(output, "0x{byte:02x}").unwrap();
317 }
318 output.push(']');
319 output
320}
321
322fn format_string_literal(value: &str) -> String {
323 let escaped = value.escape_default().to_string();
324 format!("\"{escaped}\"")
325}
326
327fn push_line(output: &mut String, line: &str) {
328 output.push_str(line);
329 output.push('\n');
330}
331
332fn push_indent(output: &mut String, indent: usize) {
333 for _ in 0..indent {
334 output.push(' ');
335 }
336}
337
338fn push_field(output: &mut String, indent: usize, name: &str, value: &str) {
339 push_indent(output, indent);
340 output.push_str(name);
341 output.push_str(": ");
342 output.push_str(value);
343 output.push_str(",\n");
344}
345
346#[cfg(test)]
347mod tests {
348 use super::*;
349
350 #[test]
351 fn test_format_parse_error_invalid_syntax() {
352 let error = ParseError::InvalidSyntax {
353 line: 42,
354 message: "expected offset".to_string(),
355 };
356 let formatted = format_parse_error(&error);
357 assert!(formatted.contains("line 42"));
358 assert!(formatted.contains("expected offset"));
359 assert!(formatted.contains("builtin_rules.magic"));
360 }
361
362 #[test]
363 fn test_format_parse_error_unsupported_feature() {
364 let error = ParseError::UnsupportedFeature {
365 line: 10,
366 feature: "regex patterns".to_string(),
367 };
368 let formatted = format_parse_error(&error);
369 assert!(formatted.contains("line 10"));
370 assert!(formatted.contains("regex patterns"));
371 }
372
373 #[test]
374 fn test_format_parse_error_invalid_offset() {
375 let error = ParseError::InvalidOffset {
376 line: 5,
377 offset: "invalid offset spec".to_string(),
378 };
379 let formatted = format_parse_error(&error);
380 assert!(formatted.contains("line 5"));
381 assert!(formatted.contains("invalid offset spec"));
382 }
383
384 #[test]
385 fn test_format_parse_error_invalid_type() {
386 let error = ParseError::InvalidType {
387 line: 7,
388 type_spec: "unknown type".to_string(),
389 };
390 let formatted = format_parse_error(&error);
391 assert!(formatted.contains("line 7"));
392 assert!(formatted.contains("unknown type"));
393 }
394
395 #[test]
396 fn test_format_parse_error_invalid_operator() {
397 let error = ParseError::InvalidOperator {
398 line: 12,
399 operator: "bad operator".to_string(),
400 };
401 let formatted = format_parse_error(&error);
402 assert!(formatted.contains("line 12"));
403 assert!(formatted.contains("bad operator"));
404 }
405
406 #[test]
407 fn test_format_parse_error_invalid_value() {
408 let error = ParseError::InvalidValue {
409 line: 15,
410 value: "malformed value".to_string(),
411 };
412 let formatted = format_parse_error(&error);
413 assert!(formatted.contains("line 15"));
414 assert!(formatted.contains("malformed value"));
415 }
416
417 #[test]
418 fn test_serialize_offset_spec_absolute() {
419 let offset = OffsetSpec::Absolute(42);
420 let serialized = serialize_offset_spec(&offset);
421 assert_eq!(serialized, "OffsetSpec::Absolute(42)");
422 }
423
424 #[test]
425 fn test_serialize_offset_spec_relative() {
426 let offset = OffsetSpec::Relative(-10);
427 let serialized = serialize_offset_spec(&offset);
428 assert_eq!(serialized, "OffsetSpec::Relative(-10)");
429 }
430
431 #[test]
432 fn test_serialize_offset_spec_from_end() {
433 let offset = OffsetSpec::FromEnd(-16);
434 let serialized = serialize_offset_spec(&offset);
435 assert_eq!(serialized, "OffsetSpec::FromEnd(-16)");
436 }
437
438 #[test]
439 fn test_serialize_type_kind_byte() {
440 let typ = TypeKind::Byte;
441 let serialized = serialize_type_kind(&typ);
442 assert_eq!(serialized, "TypeKind::Byte");
443 }
444
445 #[test]
446 fn test_serialize_type_kind_short() {
447 let typ = TypeKind::Short {
448 endian: Endianness::Little,
449 signed: false,
450 };
451 let serialized = serialize_type_kind(&typ);
452 assert!(serialized.contains("TypeKind::Short"));
453 assert!(serialized.contains("Endianness::Little"));
454 assert!(serialized.contains("signed: false"));
455 }
456
457 #[test]
458 fn test_serialize_type_kind_long() {
459 let typ = TypeKind::Long {
460 endian: Endianness::Big,
461 signed: true,
462 };
463 let serialized = serialize_type_kind(&typ);
464 assert!(serialized.contains("TypeKind::Long"));
465 assert!(serialized.contains("Endianness::Big"));
466 assert!(serialized.contains("signed: true"));
467 }
468
469 #[test]
470 fn test_serialize_type_kind_string() {
471 let typ1 = TypeKind::String { max_length: None };
472 let serialized1 = serialize_type_kind(&typ1);
473 assert_eq!(serialized1, "TypeKind::String { max_length: None }");
474
475 let typ2 = TypeKind::String {
476 max_length: Some(256),
477 };
478 let serialized2 = serialize_type_kind(&typ2);
479 assert_eq!(serialized2, "TypeKind::String { max_length: Some(256) }");
480 }
481
482 #[test]
483 fn test_serialize_operator() {
484 assert_eq!(serialize_operator(&Operator::Equal), "Operator::Equal");
485 assert_eq!(
486 serialize_operator(&Operator::NotEqual),
487 "Operator::NotEqual"
488 );
489 assert_eq!(
490 serialize_operator(&Operator::BitwiseAnd),
491 "Operator::BitwiseAnd"
492 );
493 assert_eq!(
494 serialize_operator(&Operator::BitwiseAndMask(0xFF)),
495 "Operator::BitwiseAndMask(255)"
496 );
497 }
498
499 #[test]
500 fn test_serialize_value_uint() {
501 let value = Value::Uint(12345);
502 let serialized = serialize_value(&value);
503 assert_eq!(serialized, "Value::Uint(12_345)");
504 }
505
506 #[test]
507 fn test_serialize_value_int() {
508 let value = Value::Int(-100);
509 let serialized = serialize_value(&value);
510 assert!(serialized.contains("Value::Int"));
511 }
512
513 #[test]
514 fn test_serialize_value_bytes() {
515 let value = Value::Bytes(vec![0x7F, 0x45, 0x4C, 0x46]);
516 let serialized = serialize_value(&value);
517 assert_eq!(serialized, "Value::Bytes(vec![0x7f, 0x45, 0x4c, 0x46])");
518 }
519
520 #[test]
521 fn test_serialize_value_string() {
522 let value = Value::String("test".to_string());
523 let serialized = serialize_value(&value);
524 assert!(serialized.contains("Value::String"));
525 assert!(serialized.contains("test"));
526 }
527
528 #[test]
529 fn test_format_number_small() {
530 assert_eq!(format_number(42), "42");
531 assert_eq!(format_number(999), "999");
532 assert_eq!(format_number(9999), "9999");
533 }
534
535 #[test]
536 fn test_format_number_large() {
537 assert_eq!(format_number(10000), "10_000");
538 assert_eq!(format_number(123_456), "123_456");
539 assert_eq!(format_number(1_234_567_890), "1_234_567_890");
540 }
541
542 #[test]
543 fn test_serialize_endianness() {
544 assert_eq!(
545 serialize_endianness(Endianness::Little),
546 "Endianness::Little"
547 );
548 assert_eq!(serialize_endianness(Endianness::Big), "Endianness::Big");
549 assert_eq!(
550 serialize_endianness(Endianness::Native),
551 "Endianness::Native"
552 );
553 }
554
555 #[test]
556 fn test_format_byte_vec_empty() {
557 let result = format_byte_vec(&[]);
558 assert_eq!(result, "vec![]");
559 }
560
561 #[test]
562 fn test_format_byte_vec_single() {
563 let result = format_byte_vec(&[0x42]);
564 assert_eq!(result, "vec![0x42]");
565 }
566
567 #[test]
568 fn test_format_byte_vec_multiple() {
569 let result = format_byte_vec(&[0x12, 0x34, 0x56]);
570 assert_eq!(result, "vec![0x12, 0x34, 0x56]");
571 }
572
573 #[test]
574 fn test_format_string_literal() {
575 assert_eq!(format_string_literal("hello"), "\"hello\"");
576 assert_eq!(format_string_literal("test\n"), "\"test\\n\"");
577 assert_eq!(format_string_literal("quote\"here"), "\"quote\\\"here\"");
578 }
579
580 #[test]
581 fn test_generate_builtin_rules_empty() {
582 let rules: Vec<MagicRule> = vec![];
583 let generated = generate_builtin_rules(&rules);
584
585 assert!(generated.contains("LazyLock<Vec<MagicRule>>"));
586 assert!(generated.contains("vec![]") || generated.contains("vec!["));
587 assert!(generated.contains("use crate::parser::ast"));
588 assert!(generated.contains("use std::sync::LazyLock"));
589 }
590
591 #[test]
592 fn test_generate_builtin_rules_single_rule() {
593 let rule = MagicRule {
594 offset: OffsetSpec::Absolute(0),
595 typ: TypeKind::Byte,
596 op: Operator::Equal,
597 value: Value::Uint(0x7F),
598 message: "test".to_string(),
599 children: vec![],
600 level: 0,
601 strength_modifier: None,
602 };
603
604 let generated = generate_builtin_rules(&[rule]);
605
606 assert!(generated.contains("OffsetSpec::Absolute(0)"));
607 assert!(generated.contains("TypeKind::Byte"));
608 assert!(generated.contains("Operator::Equal"));
609 assert!(generated.contains("Value::Uint(127)"));
610 assert!(generated.contains("test"));
611 assert!(generated.contains("level: 0"));
612 }
613
614 #[test]
615 fn test_serialize_children_empty() {
616 let result = serialize_children(&[], 4);
617 assert_eq!(result, "Vec::new()");
618 }
619
620 #[test]
621 fn test_serialize_children_with_nested_rule() {
622 let child = MagicRule {
623 offset: OffsetSpec::Absolute(4),
624 typ: TypeKind::Byte,
625 op: Operator::Equal,
626 value: Value::Uint(1),
627 message: "child".to_string(),
628 children: vec![],
629 level: 1,
630 strength_modifier: None,
631 };
632
633 let result = serialize_children(&[child], 4);
634
635 assert!(result.contains("vec!["));
636 assert!(result.contains("OffsetSpec::Absolute(4)"));
637 assert!(result.contains("level: 1"));
638 assert!(result.contains("child"));
639 }
640
641 #[test]
643 fn test_parse_and_generate_invalid_syntax() {
644 let invalid_magic = "this is not valid magic syntax";
645 let result = parse_and_generate_builtin_rules(invalid_magic);
646
647 assert!(result.is_err());
648 let error = result.unwrap_err();
649 let formatted = format_parse_error(&error);
650 assert!(formatted.contains("builtin_rules.magic"));
651 }
652
653 #[test]
654 fn test_parse_and_generate_invalid_offset() {
655 let invalid_magic = "999999999999999999999 byte =0x7F ELF";
656 let result = parse_and_generate_builtin_rules(invalid_magic);
657
658 assert!(result.is_err());
659 let error = result.unwrap_err();
660 let formatted = format_parse_error(&error);
661 assert!(formatted.contains("builtin_rules.magic"));
662 }
663
664 #[test]
665 fn test_parse_and_generate_invalid_type() {
666 let invalid_magic = "0 invalidtype =0x7F test";
667 let result = parse_and_generate_builtin_rules(invalid_magic);
668
669 assert!(result.is_err());
670 let error = result.unwrap_err();
671 let formatted = format_parse_error(&error);
672 assert!(formatted.contains("builtin_rules.magic"));
673 }
674
675 #[test]
676 fn test_parse_and_generate_empty_input() {
677 let empty_magic = "";
678 let result = parse_and_generate_builtin_rules(empty_magic);
679
680 assert!(result.is_ok());
682 let generated = result.unwrap();
683 assert!(generated.contains("vec![]") || generated.contains("vec!["));
684 }
685
686 #[test]
687 fn test_parse_and_generate_valid_magic() {
688 let valid_magic = "0 byte =0x7F ELF executable";
689 let result = parse_and_generate_builtin_rules(valid_magic);
690
691 assert!(result.is_ok());
692 let generated = result.unwrap();
693 assert!(generated.contains("OffsetSpec::Absolute(0)"));
694 assert!(generated.contains("TypeKind::Byte"));
695 assert!(generated.contains("Value::Uint(127)"));
696 assert!(generated.contains("ELF executable"));
697 }
698
699 #[test]
700 fn test_parse_and_generate_malformed_value() {
701 let invalid_magic = "0 byte =notahexvalue test";
702 let result = parse_and_generate_builtin_rules(invalid_magic);
703
704 assert!(result.is_err());
705 let error = result.unwrap_err();
706 let formatted = format_parse_error(&error);
707 assert!(formatted.contains("builtin_rules.magic"));
708 }
709}