1use crate::error::ParseError;
10use crate::parser::ast::{
11 Endianness, MagicRule, OffsetSpec, Operator, StrengthModifier, TypeKind, Value,
12};
13use crate::parser::parse_text_magic_file;
14
15const INDENT_WIDTH: usize = 4;
16
17pub fn parse_and_generate_builtin_rules(magic_content: &str) -> Result<String, ParseError> {
26 let rules = parse_text_magic_file(magic_content)?;
27 Ok(generate_builtin_rules(&rules))
28}
29
30#[must_use]
35pub fn format_parse_error(error: &ParseError) -> String {
36 match error {
37 ParseError::InvalidSyntax { line, message } => {
38 format!("Error parsing builtin_rules.magic at line {line}: {message}")
39 }
40 ParseError::UnsupportedFeature { line, feature } => {
41 format!("Error parsing builtin_rules.magic at line {line}: {feature}")
42 }
43 ParseError::InvalidOffset { line, offset } => {
44 format!("Error parsing builtin_rules.magic at line {line}: {offset}")
45 }
46 ParseError::InvalidType { line, type_spec } => {
47 format!("Error parsing builtin_rules.magic at line {line}: {type_spec}")
48 }
49 ParseError::InvalidOperator { line, operator } => {
50 format!("Error parsing builtin_rules.magic at line {line}: {operator}")
51 }
52 ParseError::InvalidValue { line, value } => {
53 format!("Error parsing builtin_rules.magic at line {line}: {value}")
54 }
55 ParseError::UnsupportedFormat {
56 line,
57 format_type,
58 message,
59 } => format!("Error parsing builtin_rules.magic at line {line}: {format_type} {message}"),
60 ParseError::IoError(err) => {
61 format!("Error parsing builtin_rules.magic: I/O error: {err}")
62 }
63 }
64}
65
66fn generate_builtin_rules(rules: &[MagicRule]) -> String {
67 let mut output = String::new();
68
69 push_line(&mut output, "#[allow(unused_imports)]");
71 push_line(
72 &mut output,
73 "use crate::parser::ast::{MagicRule, OffsetSpec, TypeKind, Operator, Value, Endianness, StrengthModifier};",
74 );
75 push_line(&mut output, "use std::sync::LazyLock;");
76 push_line(&mut output, "");
77 push_line(
78 &mut output,
79 "/// Built-in magic rules compiled at build time.",
80 );
81 push_line(&mut output, "///");
82 push_line(
83 &mut output,
84 "/// This static contains magic rules parsed from `src/builtin_rules.magic` during",
85 );
86 push_line(
87 &mut output,
88 "/// the build process. The rules are lazily initialized on first access.",
89 );
90 push_line(&mut output, "///");
91 push_line(
92 &mut output,
93 "/// Use [`get_builtin_rules()`] to access these rules instead of using this static directly.",
94 );
95 push_line(
96 &mut output,
97 "pub static BUILTIN_RULES: LazyLock<Vec<MagicRule>> = LazyLock::new(|| {",
98 );
99 push_line(&mut output, " vec![");
100
101 for rule in rules {
102 let serialized = serialize_magic_rule(rule, INDENT_WIDTH * 2);
103 output.push_str(&serialized);
104 output.push(',');
105 output.push('\n');
106 }
107
108 push_line(&mut output, " ]");
109 push_line(&mut output, "});\n");
110 output
111}
112
113fn serialize_magic_rule(rule: &MagicRule, indent: usize) -> String {
114 let mut output = String::new();
115
116 push_indent(&mut output, indent);
117 output.push_str("MagicRule {\n");
118
119 push_field(
120 &mut output,
121 indent + INDENT_WIDTH,
122 "offset",
123 &serialize_offset_spec(&rule.offset),
124 );
125 push_field(
126 &mut output,
127 indent + INDENT_WIDTH,
128 "typ",
129 &serialize_type_kind(&rule.typ),
130 );
131 push_field(
132 &mut output,
133 indent + INDENT_WIDTH,
134 "op",
135 &serialize_operator(&rule.op),
136 );
137 push_field(
138 &mut output,
139 indent + INDENT_WIDTH,
140 "value",
141 &serialize_value(&rule.value),
142 );
143 push_field(
144 &mut output,
145 indent + INDENT_WIDTH,
146 "message",
147 &format!("String::from({})", format_string_literal(&rule.message)),
148 );
149
150 push_indent(&mut output, indent + INDENT_WIDTH);
151 output.push_str("children: ");
152 output.push_str(&serialize_children(&rule.children, indent + INDENT_WIDTH));
153 output.push_str(",\n");
154
155 push_field(
156 &mut output,
157 indent + INDENT_WIDTH,
158 "level",
159 &rule.level.to_string(),
160 );
161
162 push_field(
163 &mut output,
164 indent + INDENT_WIDTH,
165 "strength_modifier",
166 &serialize_strength_modifier(rule.strength_modifier),
167 );
168
169 push_indent(&mut output, indent);
170 output.push('}');
171
172 output
173}
174
175fn serialize_children(children: &[MagicRule], indent: usize) -> String {
176 if children.is_empty() {
177 return "Vec::new()".to_string();
178 }
179
180 let mut output = String::new();
181 output.push_str("vec![\n");
182
183 for child in children {
184 let serialized = serialize_magic_rule(child, indent + INDENT_WIDTH);
185 output.push_str(&serialized);
186 output.push_str(",\n");
187 }
188
189 push_indent(&mut output, indent);
190 output.push(']');
191 output
192}
193
194fn serialize_offset_spec(offset: &OffsetSpec) -> String {
195 match offset {
196 OffsetSpec::Absolute(value) => format!("OffsetSpec::Absolute({value})"),
197 OffsetSpec::Indirect {
198 base_offset,
199 pointer_type,
200 adjustment,
201 endian,
202 } => format!(
203 "OffsetSpec::Indirect {{ base_offset: {base_offset}, pointer_type: {}, adjustment: {adjustment}, endian: {} }}",
204 serialize_type_kind(pointer_type),
205 serialize_endianness(*endian)
206 ),
207 OffsetSpec::Relative(value) => format!("OffsetSpec::Relative({value})"),
208 OffsetSpec::FromEnd(value) => format!("OffsetSpec::FromEnd({value})"),
209 }
210}
211
212fn serialize_type_kind(typ: &TypeKind) -> String {
213 match typ {
214 TypeKind::Byte { signed } => format!("TypeKind::Byte {{ signed: {signed} }}"),
215 TypeKind::Short { endian, signed } => format!(
216 "TypeKind::Short {{ endian: {}, signed: {} }}",
217 serialize_endianness(*endian),
218 signed
219 ),
220 TypeKind::Long { endian, signed } => format!(
221 "TypeKind::Long {{ endian: {}, signed: {} }}",
222 serialize_endianness(*endian),
223 signed
224 ),
225 TypeKind::String { max_length } => match max_length {
226 Some(value) => {
227 format!("TypeKind::String {{ max_length: Some({value}) }}")
228 }
229 None => "TypeKind::String { max_length: None }".to_string(),
230 },
231 }
232}
233
234fn serialize_operator(op: &Operator) -> String {
235 match op {
236 Operator::Equal => "Operator::Equal".to_string(),
237 Operator::NotEqual => "Operator::NotEqual".to_string(),
238 Operator::LessThan => "Operator::LessThan".to_string(),
239 Operator::GreaterThan => "Operator::GreaterThan".to_string(),
240 Operator::LessEqual => "Operator::LessEqual".to_string(),
241 Operator::GreaterEqual => "Operator::GreaterEqual".to_string(),
242 Operator::BitwiseAnd => "Operator::BitwiseAnd".to_string(),
243 Operator::BitwiseAndMask(mask) => format!("Operator::BitwiseAndMask({mask})"),
244 }
245}
246
247fn serialize_value(value: &Value) -> String {
248 match value {
249 Value::Uint(number) => format!("Value::Uint({})", format_number(*number)),
250 Value::Int(number) => format!("Value::Int({})", format_signed_number(*number)),
251 Value::Bytes(bytes) => format!("Value::Bytes({})", format_byte_vec(bytes)),
252 Value::String(text) => format!(
253 "Value::String(String::from({}))",
254 format_string_literal(text)
255 ),
256 }
257}
258
259fn format_number(num: u64) -> String {
261 if num < 10000 {
262 num.to_string()
263 } else {
264 let num_str = num.to_string();
265 let mut result = String::new();
266 let len = num_str.len();
267
268 for (i, ch) in num_str.chars().enumerate() {
269 if i > 0 && (len - i) % 3 == 0 {
270 result.push('_');
271 }
272 result.push(ch);
273 }
274 result
275 }
276}
277
278fn format_signed_number(num: i64) -> String {
280 if num < 0 {
281 let abs = num.unsigned_abs();
282 format!("-{}", format_number(abs))
283 } else {
284 format_number(num.unsigned_abs())
286 }
287}
288
289fn serialize_endianness(endian: Endianness) -> String {
290 match endian {
291 Endianness::Little => "Endianness::Little".to_string(),
292 Endianness::Big => "Endianness::Big".to_string(),
293 Endianness::Native => "Endianness::Native".to_string(),
294 }
295}
296
297fn serialize_strength_modifier(modifier: Option<StrengthModifier>) -> String {
298 match modifier {
299 None => "None".to_string(),
300 Some(StrengthModifier::Add(val)) => format!("Some(StrengthModifier::Add({val}))"),
301 Some(StrengthModifier::Subtract(val)) => format!("Some(StrengthModifier::Subtract({val}))"),
302 Some(StrengthModifier::Multiply(val)) => format!("Some(StrengthModifier::Multiply({val}))"),
303 Some(StrengthModifier::Divide(val)) => format!("Some(StrengthModifier::Divide({val}))"),
304 Some(StrengthModifier::Set(val)) => format!("Some(StrengthModifier::Set({val}))"),
305 }
306}
307
308fn format_byte_vec(bytes: &[u8]) -> String {
309 use std::fmt::Write;
310
311 if bytes.is_empty() {
312 return "vec![]".to_string();
313 }
314
315 let mut output = String::from("vec![");
316 for (index, byte) in bytes.iter().enumerate() {
317 if index > 0 {
318 output.push_str(", ");
319 }
320 write!(output, "0x{byte:02x}").unwrap();
321 }
322 output.push(']');
323 output
324}
325
326fn format_string_literal(value: &str) -> String {
327 let escaped = value.escape_default().to_string();
328 format!("\"{escaped}\"")
329}
330
331fn push_line(output: &mut String, line: &str) {
332 output.push_str(line);
333 output.push('\n');
334}
335
336fn push_indent(output: &mut String, indent: usize) {
337 for _ in 0..indent {
338 output.push(' ');
339 }
340}
341
342fn push_field(output: &mut String, indent: usize, name: &str, value: &str) {
343 push_indent(output, indent);
344 output.push_str(name);
345 output.push_str(": ");
346 output.push_str(value);
347 output.push_str(",\n");
348}
349
350#[cfg(test)]
351mod tests {
352 use super::*;
353
354 #[test]
355 fn test_format_parse_error_invalid_syntax() {
356 let error = ParseError::InvalidSyntax {
357 line: 42,
358 message: "expected offset".to_string(),
359 };
360 let formatted = format_parse_error(&error);
361 assert!(formatted.contains("line 42"));
362 assert!(formatted.contains("expected offset"));
363 assert!(formatted.contains("builtin_rules.magic"));
364 }
365
366 #[test]
367 fn test_format_parse_error_unsupported_feature() {
368 let error = ParseError::UnsupportedFeature {
369 line: 10,
370 feature: "regex patterns".to_string(),
371 };
372 let formatted = format_parse_error(&error);
373 assert!(formatted.contains("line 10"));
374 assert!(formatted.contains("regex patterns"));
375 }
376
377 #[test]
378 fn test_format_parse_error_invalid_offset() {
379 let error = ParseError::InvalidOffset {
380 line: 5,
381 offset: "invalid offset spec".to_string(),
382 };
383 let formatted = format_parse_error(&error);
384 assert!(formatted.contains("line 5"));
385 assert!(formatted.contains("invalid offset spec"));
386 }
387
388 #[test]
389 fn test_format_parse_error_invalid_type() {
390 let error = ParseError::InvalidType {
391 line: 7,
392 type_spec: "unknown type".to_string(),
393 };
394 let formatted = format_parse_error(&error);
395 assert!(formatted.contains("line 7"));
396 assert!(formatted.contains("unknown type"));
397 }
398
399 #[test]
400 fn test_format_parse_error_invalid_operator() {
401 let error = ParseError::InvalidOperator {
402 line: 12,
403 operator: "bad operator".to_string(),
404 };
405 let formatted = format_parse_error(&error);
406 assert!(formatted.contains("line 12"));
407 assert!(formatted.contains("bad operator"));
408 }
409
410 #[test]
411 fn test_format_parse_error_invalid_value() {
412 let error = ParseError::InvalidValue {
413 line: 15,
414 value: "malformed value".to_string(),
415 };
416 let formatted = format_parse_error(&error);
417 assert!(formatted.contains("line 15"));
418 assert!(formatted.contains("malformed value"));
419 }
420
421 #[test]
422 fn test_serialize_offset_spec_absolute() {
423 let offset = OffsetSpec::Absolute(42);
424 let serialized = serialize_offset_spec(&offset);
425 assert_eq!(serialized, "OffsetSpec::Absolute(42)");
426 }
427
428 #[test]
429 fn test_serialize_offset_spec_relative() {
430 let offset = OffsetSpec::Relative(-10);
431 let serialized = serialize_offset_spec(&offset);
432 assert_eq!(serialized, "OffsetSpec::Relative(-10)");
433 }
434
435 #[test]
436 fn test_serialize_offset_spec_from_end() {
437 let offset = OffsetSpec::FromEnd(-16);
438 let serialized = serialize_offset_spec(&offset);
439 assert_eq!(serialized, "OffsetSpec::FromEnd(-16)");
440 }
441
442 #[test]
443 fn test_serialize_type_kind_byte() {
444 let signed = TypeKind::Byte { signed: true };
445 assert_eq!(
446 serialize_type_kind(&signed),
447 "TypeKind::Byte { signed: true }"
448 );
449 let unsigned = TypeKind::Byte { signed: false };
450 assert_eq!(
451 serialize_type_kind(&unsigned),
452 "TypeKind::Byte { signed: false }"
453 );
454 }
455
456 #[test]
457 fn test_serialize_type_kind_short() {
458 let typ = TypeKind::Short {
459 endian: Endianness::Little,
460 signed: false,
461 };
462 let serialized = serialize_type_kind(&typ);
463 assert!(serialized.contains("TypeKind::Short"));
464 assert!(serialized.contains("Endianness::Little"));
465 assert!(serialized.contains("signed: false"));
466 }
467
468 #[test]
469 fn test_serialize_type_kind_long() {
470 let typ = TypeKind::Long {
471 endian: Endianness::Big,
472 signed: true,
473 };
474 let serialized = serialize_type_kind(&typ);
475 assert!(serialized.contains("TypeKind::Long"));
476 assert!(serialized.contains("Endianness::Big"));
477 assert!(serialized.contains("signed: true"));
478 }
479
480 #[test]
481 fn test_serialize_type_kind_string() {
482 let typ1 = TypeKind::String { max_length: None };
483 let serialized1 = serialize_type_kind(&typ1);
484 assert_eq!(serialized1, "TypeKind::String { max_length: None }");
485
486 let typ2 = TypeKind::String {
487 max_length: Some(256),
488 };
489 let serialized2 = serialize_type_kind(&typ2);
490 assert_eq!(serialized2, "TypeKind::String { max_length: Some(256) }");
491 }
492
493 #[test]
494 fn test_serialize_operator() {
495 assert_eq!(serialize_operator(&Operator::Equal), "Operator::Equal");
496 assert_eq!(
497 serialize_operator(&Operator::NotEqual),
498 "Operator::NotEqual"
499 );
500 assert_eq!(
501 serialize_operator(&Operator::LessThan),
502 "Operator::LessThan"
503 );
504 assert_eq!(
505 serialize_operator(&Operator::GreaterThan),
506 "Operator::GreaterThan"
507 );
508 assert_eq!(
509 serialize_operator(&Operator::LessEqual),
510 "Operator::LessEqual"
511 );
512 assert_eq!(
513 serialize_operator(&Operator::GreaterEqual),
514 "Operator::GreaterEqual"
515 );
516 assert_eq!(
517 serialize_operator(&Operator::BitwiseAnd),
518 "Operator::BitwiseAnd"
519 );
520 assert_eq!(
521 serialize_operator(&Operator::BitwiseAndMask(0xFF)),
522 "Operator::BitwiseAndMask(255)"
523 );
524 }
525
526 #[test]
527 fn test_serialize_value_uint() {
528 let value = Value::Uint(12345);
529 let serialized = serialize_value(&value);
530 assert_eq!(serialized, "Value::Uint(12_345)");
531 }
532
533 #[test]
534 fn test_serialize_value_int() {
535 let value = Value::Int(-100);
536 let serialized = serialize_value(&value);
537 assert!(serialized.contains("Value::Int"));
538 }
539
540 #[test]
541 fn test_serialize_value_bytes() {
542 let value = Value::Bytes(vec![0x7F, 0x45, 0x4C, 0x46]);
543 let serialized = serialize_value(&value);
544 assert_eq!(serialized, "Value::Bytes(vec![0x7f, 0x45, 0x4c, 0x46])");
545 }
546
547 #[test]
548 fn test_serialize_value_string() {
549 let value = Value::String("test".to_string());
550 let serialized = serialize_value(&value);
551 assert!(serialized.contains("Value::String"));
552 assert!(serialized.contains("test"));
553 }
554
555 #[test]
556 fn test_format_number_small() {
557 assert_eq!(format_number(42), "42");
558 assert_eq!(format_number(999), "999");
559 assert_eq!(format_number(9999), "9999");
560 }
561
562 #[test]
563 fn test_format_number_large() {
564 assert_eq!(format_number(10000), "10_000");
565 assert_eq!(format_number(123_456), "123_456");
566 assert_eq!(format_number(1_234_567_890), "1_234_567_890");
567 }
568
569 #[test]
570 fn test_serialize_endianness() {
571 assert_eq!(
572 serialize_endianness(Endianness::Little),
573 "Endianness::Little"
574 );
575 assert_eq!(serialize_endianness(Endianness::Big), "Endianness::Big");
576 assert_eq!(
577 serialize_endianness(Endianness::Native),
578 "Endianness::Native"
579 );
580 }
581
582 #[test]
583 fn test_format_byte_vec_empty() {
584 let result = format_byte_vec(&[]);
585 assert_eq!(result, "vec![]");
586 }
587
588 #[test]
589 fn test_format_byte_vec_single() {
590 let result = format_byte_vec(&[0x42]);
591 assert_eq!(result, "vec![0x42]");
592 }
593
594 #[test]
595 fn test_format_byte_vec_multiple() {
596 let result = format_byte_vec(&[0x12, 0x34, 0x56]);
597 assert_eq!(result, "vec![0x12, 0x34, 0x56]");
598 }
599
600 #[test]
601 fn test_format_string_literal() {
602 assert_eq!(format_string_literal("hello"), "\"hello\"");
603 assert_eq!(format_string_literal("test\n"), "\"test\\n\"");
604 assert_eq!(format_string_literal("quote\"here"), "\"quote\\\"here\"");
605 }
606
607 #[test]
608 fn test_generate_builtin_rules_empty() {
609 let rules: Vec<MagicRule> = vec![];
610 let generated = generate_builtin_rules(&rules);
611
612 assert!(generated.contains("LazyLock<Vec<MagicRule>>"));
613 assert!(generated.contains("vec![]") || generated.contains("vec!["));
614 assert!(generated.contains("use crate::parser::ast"));
615 assert!(generated.contains("use std::sync::LazyLock"));
616 }
617
618 #[test]
619 fn test_generate_builtin_rules_single_rule() {
620 let rule = MagicRule {
621 offset: OffsetSpec::Absolute(0),
622 typ: TypeKind::Byte { signed: true },
623 op: Operator::Equal,
624 value: Value::Uint(0x7F),
625 message: "test".to_string(),
626 children: vec![],
627 level: 0,
628 strength_modifier: None,
629 };
630
631 let generated = generate_builtin_rules(&[rule]);
632
633 assert!(generated.contains("OffsetSpec::Absolute(0)"));
634 assert!(generated.contains("TypeKind::Byte { signed: true }"));
635 assert!(generated.contains("Operator::Equal"));
636 assert!(generated.contains("Value::Uint(127)"));
637 assert!(generated.contains("test"));
638 assert!(generated.contains("level: 0"));
639 }
640
641 #[test]
642 fn test_serialize_children_empty() {
643 let result = serialize_children(&[], 4);
644 assert_eq!(result, "Vec::new()");
645 }
646
647 #[test]
648 fn test_serialize_children_with_nested_rule() {
649 let child = MagicRule {
650 offset: OffsetSpec::Absolute(4),
651 typ: TypeKind::Byte { signed: true },
652 op: Operator::Equal,
653 value: Value::Uint(1),
654 message: "child".to_string(),
655 children: vec![],
656 level: 1,
657 strength_modifier: None,
658 };
659
660 let result = serialize_children(&[child], 4);
661
662 assert!(result.contains("vec!["));
663 assert!(result.contains("OffsetSpec::Absolute(4)"));
664 assert!(result.contains("level: 1"));
665 assert!(result.contains("child"));
666 }
667
668 #[test]
670 fn test_parse_and_generate_invalid_syntax() {
671 let invalid_magic = "this is not valid magic syntax";
672 let result = parse_and_generate_builtin_rules(invalid_magic);
673
674 assert!(result.is_err());
675 let error = result.unwrap_err();
676 let formatted = format_parse_error(&error);
677 assert!(formatted.contains("builtin_rules.magic"));
678 }
679
680 #[test]
681 fn test_parse_and_generate_invalid_offset() {
682 let invalid_magic = "999999999999999999999 byte =0x7F ELF";
683 let result = parse_and_generate_builtin_rules(invalid_magic);
684
685 assert!(result.is_err());
686 let error = result.unwrap_err();
687 let formatted = format_parse_error(&error);
688 assert!(formatted.contains("builtin_rules.magic"));
689 }
690
691 #[test]
692 fn test_parse_and_generate_invalid_type() {
693 let invalid_magic = "0 invalidtype =0x7F test";
694 let result = parse_and_generate_builtin_rules(invalid_magic);
695
696 assert!(result.is_err());
697 let error = result.unwrap_err();
698 let formatted = format_parse_error(&error);
699 assert!(formatted.contains("builtin_rules.magic"));
700 }
701
702 #[test]
703 fn test_parse_and_generate_empty_input() {
704 let empty_magic = "";
705 let result = parse_and_generate_builtin_rules(empty_magic);
706
707 assert!(result.is_ok());
709 let generated = result.unwrap();
710 assert!(generated.contains("vec![]") || generated.contains("vec!["));
711 }
712
713 #[test]
714 fn test_parse_and_generate_valid_magic() {
715 let valid_magic = "0 byte =0x7F ELF executable";
716 let result = parse_and_generate_builtin_rules(valid_magic);
717
718 assert!(result.is_ok());
719 let generated = result.unwrap();
720 assert!(generated.contains("OffsetSpec::Absolute(0)"));
721 assert!(generated.contains("TypeKind::Byte { signed: true }"));
722 assert!(generated.contains("Value::Uint(127)"));
723 assert!(generated.contains("ELF executable"));
724 }
725
726 #[test]
727 fn test_parse_and_generate_malformed_value() {
728 let invalid_magic = "0 byte =notahexvalue test";
729 let result = parse_and_generate_builtin_rules(invalid_magic);
730
731 assert!(result.is_err());
732 let error = result.unwrap_err();
733 let formatted = format_parse_error(&error);
734 assert!(formatted.contains("builtin_rules.magic"));
735 }
736}