1#![forbid(unsafe_code)]
2#![deny(
3 missing_docs,
4 dead_code,
5 nonstandard_style,
6 unused_imports,
7 unused_mut,
8 unused_variables,
9 unused_unsafe,
10 unreachable_patterns
11)]
12
13use std::collections::HashMap;
16use std::fs;
17use std::path::Path;
18
19use serde::Deserialize;
20use thiserror::Error;
21
22const INSTRUCTION_BITS: usize = 32;
23
24#[derive(Debug, Error)]
26pub enum SpecError {
27 #[error("failed reading specification: {0}")]
29 Io(#[from] std::io::Error),
30 #[error("failed parsing JSON: {0}")]
32 Json(#[from] serde_json::Error),
33 #[error("instruction set not found: {0}")]
35 SetNotFound(String),
36 #[error("malformed bit value for {context}: {value}")]
38 MalformedBits {
39 context: &'static str,
41 value: String,
43 },
44 #[error("value width mismatch for {context}: expected {expected}, got {got}")]
46 WidthMismatch {
47 context: &'static str,
49 expected: usize,
51 got: usize,
53 },
54}
55
56#[derive(Debug, Deserialize)]
58pub struct InstructionsDoc {
59 pub instructions: Vec<InstructionSet>,
61 pub _meta: Meta,
63}
64
65#[derive(Debug, Deserialize)]
67pub struct Meta {
68 pub license: License,
70}
71
72#[derive(Debug, Deserialize)]
74pub struct License {
75 pub copyright: String,
77 pub info: String,
79}
80
81#[derive(Debug, Deserialize)]
83pub struct InstructionSet {
84 pub children: Vec<InstructionGroupOrInstruction>,
86 pub encoding: Encodeset,
88 pub name: String,
90}
91
92#[derive(Debug, Deserialize)]
94pub struct InstructionGroup {
95 pub children: Vec<InstructionGroupOrInstruction>,
97 pub encoding: Encodeset,
99 pub name: String,
101}
102
103#[derive(Debug, Deserialize)]
105#[serde(tag = "_type")]
106pub enum InstructionGroupOrInstruction {
107 #[serde(rename = "Instruction.InstructionGroup")]
109 InstructionGroup(InstructionGroup),
110 #[serde(rename = "Instruction.Instruction")]
112 Instruction(Instruction),
113 #[serde(rename = "Instruction.InstructionAlias")]
115 InstructionAlias(InstructionAlias),
116}
117
118#[derive(Debug, Deserialize)]
120pub struct Encodeset {
121 pub values: Vec<Encode>,
123}
124
125#[derive(Debug, Deserialize)]
127#[serde(tag = "_type")]
128pub enum Encode {
129 #[serde(rename = "Instruction.Encodeset.Field")]
131 Field(Field),
132 #[serde(rename = "Instruction.Encodeset.Bits")]
134 Bits(Bits),
135}
136
137#[derive(Debug, Deserialize)]
139pub struct Field {
140 pub name: String,
142 pub range: Range,
144 pub should_be_mask: Value,
146 pub value: Value,
148}
149
150#[derive(Debug, Deserialize)]
152pub struct Bits {
153 pub range: Range,
155 pub should_be_mask: Value,
157 pub value: Value,
159}
160
161#[derive(Copy, Clone, Debug, Deserialize)]
163pub struct Range {
164 pub start: u32,
166 pub width: u32,
168}
169
170#[derive(Debug, Deserialize)]
172pub struct Value {
173 pub value: String,
175}
176
177impl Value {
178 #[must_use]
180 pub fn bit_string(&self) -> Option<&str> {
181 let raw = self.value.as_str();
182 if raw.starts_with('"') && raw.ends_with('"') {
183 return Some(&raw[1..raw.len() - 1]);
184 }
185 if raw.starts_with('\'') && raw.ends_with('\'') {
186 return Some(&raw[1..raw.len() - 1]);
187 }
188 None
189 }
190}
191
192#[derive(Debug, Clone, PartialEq, Eq)]
194pub struct FlatField {
195 pub name: String,
197 pub lsb: u8,
199 pub width: u8,
201 pub signed: bool,
203}
204
205#[derive(Debug, Clone, PartialEq, Eq)]
207pub struct FlatInstruction {
208 pub mnemonic: String,
210 pub variant: String,
212 pub path: String,
214 pub fixed_mask: u32,
216 pub fixed_value: u32,
218 pub fields: Vec<FlatField>,
220}
221
222#[derive(Debug, Clone, PartialEq, Eq)]
223enum BitCell {
224 Fixed(bool),
225 Field(String),
226}
227
228pub fn parse_instructions_json(payload: &str) -> Result<InstructionsDoc, SpecError> {
234 Ok(serde_json::from_str(payload)?)
235}
236
237pub fn parse_instructions_json_file(path: &Path) -> Result<InstructionsDoc, SpecError> {
243 let payload = fs::read_to_string(path)?;
244 parse_instructions_json(&payload)
245}
246
247pub fn flatten_instruction_set(
253 doc: &InstructionsDoc,
254 set_name: &str,
255) -> Result<Vec<FlatInstruction>, SpecError> {
256 let Some(set) = doc.instructions.iter().find(|s| s.name == set_name) else {
257 return Err(SpecError::SetNotFound(set_name.to_owned()));
258 };
259
260 let mut stack = vec![&set.encoding];
261 let mut path = vec![set.name.as_str()];
262 let mut out = Vec::new();
263 walk_children(&set.children, &mut stack, &mut path, &mut out)?;
264 Ok(out)
265}
266
267fn walk_children<'a>(
268 children: &'a [InstructionGroupOrInstruction],
269 stack: &mut Vec<&'a Encodeset>,
270 path: &mut Vec<&'a str>,
271 out: &mut Vec<FlatInstruction>,
272) -> Result<(), SpecError> {
273 for child in children {
274 match child {
275 InstructionGroupOrInstruction::InstructionGroup(group) => {
276 path.push(group.name.as_str());
277 stack.push(&group.encoding);
278 walk_children(&group.children, stack, path, out)?;
279 stack.pop();
280 path.pop();
281 }
282 InstructionGroupOrInstruction::Instruction(instruction) => {
283 stack.push(&instruction.encoding);
284 path.push(instruction.name.as_str());
285
286 let (mut fixed_mask, mut fixed_value, mut fields) = flatten_stack(stack)?;
287 if let Some(condition) = &instruction.condition {
288 apply_condition_constraints(
289 condition,
290 &mut fixed_mask,
291 &mut fixed_value,
292 &mut fields,
293 )?;
294 }
295 let mnemonic = infer_mnemonic(&instruction.name);
296 out.push(FlatInstruction {
297 mnemonic,
298 variant: instruction.name.clone(),
299 path: path.join("/"),
300 fixed_mask,
301 fixed_value,
302 fields,
303 });
304
305 path.pop();
306 stack.pop();
307 }
308 InstructionGroupOrInstruction::InstructionAlias(_) => {}
309 }
310 }
311
312 Ok(())
313}
314
315fn base_field_name(field_name: &str) -> &str {
316 let Some((base, suffix)) = field_name.rsplit_once('_') else {
317 return field_name;
318 };
319 if suffix.chars().all(|ch| ch.is_ascii_digit()) {
320 base
321 } else {
322 field_name
323 }
324}
325
326fn semantic_field_name(field_name: &str) -> &str {
327 if matches!(field_name, "option_13" | "option_15") {
328 field_name
329 } else {
330 base_field_name(field_name)
331 }
332}
333
334fn has_semantic_field(fields: &[String], name: &str) -> bool {
335 fields.iter().any(|field| field == name)
336}
337
338fn is_fixed_bit_pair(opcode: u32, opcode_mask: u32, hi: u8, lo: u8, accepted: &[(u8, u8)]) -> bool {
339 let hi_mask = 1u32 << hi;
340 let lo_mask = 1u32 << lo;
341 if (opcode_mask & hi_mask) == 0 || (opcode_mask & lo_mask) == 0 {
342 return false;
343 }
344
345 let hi_value = ((opcode & hi_mask) != 0) as u8;
346 let lo_value = ((opcode & lo_mask) != 0) as u8;
347 accepted
348 .iter()
349 .any(|&(accepted_hi, accepted_lo)| accepted_hi == hi_value && accepted_lo == lo_value)
350}
351
352fn parse_condition_identifier(node: &serde_json::Value) -> Option<String> {
353 let obj = node.as_object()?;
354 if obj.get("_type")?.as_str()? != "AST.Identifier" {
355 return None;
356 }
357 Some(obj.get("value")?.as_str()?.to_ascii_lowercase())
358}
359
360fn parse_condition_bit_string(node: &serde_json::Value) -> Option<String> {
361 let obj = node.as_object()?;
362 if obj.get("_type")?.as_str()? != "Values.Value" {
363 return None;
364 }
365 let raw = obj.get("value")?.as_str()?;
366 let bits = if raw.starts_with('\'') && raw.ends_with('\'') {
367 &raw[1..raw.len() - 1]
368 } else if raw.starts_with('"') && raw.ends_with('"') {
369 &raw[1..raw.len() - 1]
370 } else {
371 return None;
372 };
373 if bits.chars().all(|ch| ch == '0' || ch == '1') {
374 Some(bits.to_owned())
375 } else {
376 None
377 }
378}
379
380fn parse_condition_equality(
381 left: &serde_json::Value,
382 right: &serde_json::Value,
383) -> Option<(String, String)> {
384 let name = parse_condition_identifier(left)?;
385 let bits = parse_condition_bit_string(right)?;
386 Some((name, bits))
387}
388
389fn collect_condition_constraints(
390 node: &serde_json::Value,
391 constraints: &mut HashMap<String, String>,
392) -> Result<(), SpecError> {
393 let Some(obj) = node.as_object() else {
394 return Ok(());
395 };
396 let Some(kind) = obj.get("_type").and_then(serde_json::Value::as_str) else {
397 return Ok(());
398 };
399 if kind != "AST.BinaryOp" {
400 return Ok(());
401 }
402
403 let Some(op) = obj.get("op").and_then(serde_json::Value::as_str) else {
404 return Ok(());
405 };
406 let Some(left) = obj.get("left") else {
407 return Ok(());
408 };
409 let Some(right) = obj.get("right") else {
410 return Ok(());
411 };
412
413 match op {
414 "&&" => {
415 collect_condition_constraints(left, constraints)?;
416 collect_condition_constraints(right, constraints)?;
417 }
418 "==" => {
419 let pair = parse_condition_equality(left, right)
420 .or_else(|| parse_condition_equality(right, left));
421 let Some((name, bits)) = pair else {
422 return Ok(());
423 };
424 match constraints.get(&name) {
425 Some(existing) if existing != &bits => {
426 return Err(SpecError::MalformedBits {
427 context: "instruction.condition",
428 value: format!(
429 "conflicting condition binding for {name}: {existing}/{bits}"
430 ),
431 });
432 }
433 Some(_) => {}
434 None => {
435 constraints.insert(name, bits);
436 }
437 }
438 }
439 _ => {}
440 }
441
442 Ok(())
443}
444
445fn apply_condition_constraints(
446 condition: &serde_json::Value,
447 fixed_mask: &mut u32,
448 fixed_value: &mut u32,
449 fields: &mut Vec<FlatField>,
450) -> Result<(), SpecError> {
451 let mut constraints = HashMap::<String, String>::new();
452 collect_condition_constraints(condition, &mut constraints)?;
453 if constraints.is_empty() {
454 return Ok(());
455 }
456
457 let mut remove_indices = Vec::<usize>::new();
458 for (idx, field) in fields.iter().enumerate() {
459 let normalized = field.name.to_ascii_lowercase();
460 let semantic = semantic_field_name(&normalized);
461 let Some(bits) = constraints
462 .get(&normalized)
463 .or_else(|| constraints.get(semantic))
464 .cloned()
465 else {
466 continue;
467 };
468
469 if bits.len() != field.width as usize {
470 return Err(SpecError::WidthMismatch {
471 context: "instruction.condition",
472 expected: field.width as usize,
473 got: bits.len(),
474 });
475 }
476
477 for (offset, ch) in bits.chars().enumerate() {
478 let bit = usize::from(field.lsb) + (usize::from(field.width) - 1 - offset);
479 *fixed_mask |= 1u32 << bit;
480 if ch == '1' {
481 *fixed_value |= 1u32 << bit;
482 } else {
483 *fixed_value &= !(1u32 << bit);
484 }
485 }
486 remove_indices.push(idx);
487 }
488
489 for idx in remove_indices.into_iter().rev() {
490 fields.remove(idx);
491 }
492 Ok(())
493}
494
495fn infer_mnemonic(variant: &str) -> String {
496 let head = variant
497 .split('_')
498 .next()
499 .unwrap_or(variant)
500 .to_ascii_lowercase();
501
502 if let Some(stripped) = head.strip_suffix(".cond") {
503 stripped.to_owned()
504 } else {
505 head
506 }
507}
508
509fn infer_signed_field(
510 field_name: &str,
511 opcode: u32,
512 opcode_mask: u32,
513 semantic_fields: &[String],
514) -> bool {
515 let normalized = field_name.to_ascii_lowercase();
516 let semantic_name = semantic_field_name(&normalized);
517
518 if semantic_name.starts_with("simm")
519 || semantic_name.starts_with("soffset")
520 || semantic_name.contains("offset")
521 {
522 return true;
523 }
524
525 if matches!(semantic_name, "imm26" | "imm19" | "imm14") {
526 return true;
527 }
528
529 if semantic_name == "immhi"
530 && has_semantic_field(semantic_fields, "immhi")
531 && has_semantic_field(semantic_fields, "immlo")
532 {
533 return true;
534 }
535
536 if semantic_name == "imm7"
537 && has_semantic_field(semantic_fields, "rt")
538 && has_semantic_field(semantic_fields, "rt2")
539 && has_semantic_field(semantic_fields, "rn")
540 {
541 return true;
542 }
543
544 if semantic_name == "imm9"
545 && has_semantic_field(semantic_fields, "rt")
546 && has_semantic_field(semantic_fields, "rn")
547 && is_fixed_bit_pair(opcode, opcode_mask, 11, 10, &[(0, 0), (0, 1), (1, 1)])
548 {
549 return true;
550 }
551
552 false
553}
554
555fn flatten_stack(stack: &[&Encodeset]) -> Result<(u32, u32, Vec<FlatField>), SpecError> {
556 let mut cells: [Option<BitCell>; INSTRUCTION_BITS] = core::array::from_fn(|_| None);
557
558 for encodeset in stack {
559 for enc in &encodeset.values {
560 match enc {
561 Encode::Field(field) => apply_field(&mut cells, field)?,
562 Encode::Bits(bits) => apply_bits(&mut cells, bits)?,
563 }
564 }
565 }
566
567 let mut fixed_mask = 0u32;
568 let mut fixed_value = 0u32;
569 for (idx, cell) in cells.iter().enumerate() {
570 if let Some(BitCell::Fixed(bit)) = cell {
571 fixed_mask |= 1u32 << idx;
572 if *bit {
573 fixed_value |= 1u32 << idx;
574 }
575 }
576 }
577
578 let mut fields = Vec::<FlatField>::new();
579 let mut bit = 0usize;
580 while bit < INSTRUCTION_BITS {
581 let Some(BitCell::Field(name)) = cells[bit].as_ref() else {
582 bit += 1;
583 continue;
584 };
585
586 let mut width = 1usize;
587 while bit + width < INSTRUCTION_BITS {
588 let Some(BitCell::Field(next)) = cells[bit + width].as_ref() else {
589 break;
590 };
591 if next != name {
592 break;
593 }
594 width += 1;
595 }
596
597 fields.push(FlatField {
598 name: name.clone(),
599 lsb: bit as u8,
600 width: width as u8,
601 signed: false,
602 });
603
604 bit += width;
605 }
606
607 let semantic_fields = fields
608 .iter()
609 .map(|field| {
610 let normalized = field.name.to_ascii_lowercase();
611 semantic_field_name(&normalized).to_owned()
612 })
613 .collect::<Vec<_>>();
614 for field in &mut fields {
615 field.signed = infer_signed_field(&field.name, fixed_value, fixed_mask, &semantic_fields);
616 }
617
618 dedup_split_names(&mut fields);
619 fields.sort_by(|left, right| right.lsb.cmp(&left.lsb).then(left.name.cmp(&right.name)));
620
621 Ok((fixed_mask, fixed_value, fields))
622}
623
624fn dedup_split_names(fields: &mut [FlatField]) {
625 let mut counts = HashMap::<String, usize>::new();
626 for field in fields.iter() {
627 *counts.entry(field.name.clone()).or_default() += 1;
628 }
629
630 for field in fields.iter_mut() {
631 if counts.get(&field.name).copied().unwrap_or_default() > 1 {
632 field.name = format!("{}_{}", field.name, field.lsb);
633 }
634 }
635}
636
637fn apply_bits(
638 cells: &mut [Option<BitCell>; INSTRUCTION_BITS],
639 bits: &Bits,
640) -> Result<(), SpecError> {
641 let pattern = parse_pattern(
642 bits.value.bit_string(),
643 "bits.value",
644 bits.range.width as usize,
645 )?;
646
647 for (bit_idx, ch) in bit_positions(bits.range).zip(pattern.chars()) {
648 match ch {
649 '0' => cells[bit_idx as usize] = Some(BitCell::Fixed(false)),
650 '1' => cells[bit_idx as usize] = Some(BitCell::Fixed(true)),
651 'x' => cells[bit_idx as usize] = None,
652 _ => {
653 return Err(SpecError::MalformedBits {
654 context: "bits.value",
655 value: pattern.to_owned(),
656 });
657 }
658 }
659 }
660
661 Ok(())
662}
663
664fn apply_field(
665 cells: &mut [Option<BitCell>; INSTRUCTION_BITS],
666 field: &Field,
667) -> Result<(), SpecError> {
668 let pattern = parse_pattern(
669 field.value.bit_string(),
670 "field.value",
671 field.range.width as usize,
672 )?;
673
674 for (bit_idx, ch) in bit_positions(field.range).zip(pattern.chars()) {
675 match ch {
676 'x' => cells[bit_idx as usize] = Some(BitCell::Field(field.name.clone())),
677 '0' => cells[bit_idx as usize] = Some(BitCell::Fixed(false)),
678 '1' => cells[bit_idx as usize] = Some(BitCell::Fixed(true)),
679 _ => {
680 return Err(SpecError::MalformedBits {
681 context: "field.value",
682 value: pattern.to_owned(),
683 });
684 }
685 }
686 }
687
688 Ok(())
689}
690
691fn parse_pattern<'a>(
692 value: Option<&'a str>,
693 context: &'static str,
694 expected_width: usize,
695) -> Result<&'a str, SpecError> {
696 let Some(bits) = value else {
697 return Err(SpecError::MalformedBits {
698 context,
699 value: String::from("<unquoted>"),
700 });
701 };
702
703 if bits.len() != expected_width {
704 return Err(SpecError::WidthMismatch {
705 context,
706 expected: expected_width,
707 got: bits.len(),
708 });
709 }
710
711 Ok(bits)
712}
713
714fn bit_positions(range: Range) -> impl Iterator<Item = u32> {
715 let start = range.start;
716 let width = range.width;
717 (start..start + width).rev()
718}
719
720#[derive(Debug, Deserialize)]
722pub struct Instruction {
723 pub encoding: Encodeset,
725 pub name: String,
727 #[serde(default)]
729 pub condition: Option<serde_json::Value>,
730 pub operation_id: String,
732 #[serde(default)]
734 pub children: Vec<InstructionGroupOrInstruction>,
735}
736
737#[derive(Debug, Deserialize)]
739pub struct InstructionAlias {
740 pub name: String,
742 pub operation_id: String,
744 #[serde(default)]
746 pub children: Vec<InstructionGroupOrInstruction>,
747}
748
749#[cfg(test)]
750mod tests {
751 use super::*;
752
753 const SAMPLE: &str = r#"
754{
755 "_meta": { "license": { "copyright": "x", "info": "x" } },
756 "instructions": [
757 {
758 "name": "A64",
759 "encoding": { "values": [] },
760 "children": [
761 {
762 "_type": "Instruction.InstructionGroup",
763 "name": "dpimm",
764 "encoding": { "values": [] },
765 "children": [
766 {
767 "_type": "Instruction.Instruction",
768 "name": "ADD_64_addsub_imm",
769 "operation_id": "ADD_64_addsub_imm",
770 "encoding": {
771 "values": [
772 { "_type": "Instruction.Encodeset.Bits", "range": { "start": 23, "width": 9 }, "should_be_mask": { "value": "'000000000'" }, "value": { "value": "'100100010'" } },
773 { "_type": "Instruction.Encodeset.Field", "name": "sh", "range": { "start": 22, "width": 1 }, "should_be_mask": { "value": "'0'" }, "value": { "value": "'x'" } },
774 { "_type": "Instruction.Encodeset.Field", "name": "imm12", "range": { "start": 10, "width": 12 }, "should_be_mask": { "value": "'000000000000'" }, "value": { "value": "'xxxxxxxxxxxx'" } },
775 { "_type": "Instruction.Encodeset.Field", "name": "Rn", "range": { "start": 5, "width": 5 }, "should_be_mask": { "value": "'00000'" }, "value": { "value": "'xxxxx'" } },
776 { "_type": "Instruction.Encodeset.Field", "name": "Rd", "range": { "start": 0, "width": 5 }, "should_be_mask": { "value": "'00000'" }, "value": { "value": "'xxxxx'" } }
777 ]
778 }
779 }
780 ]
781 }
782 ]
783 }
784 ]
785}
786"#;
787
788 #[test]
789 fn parse_and_flatten() {
790 let doc = parse_instructions_json(SAMPLE).expect("parse should succeed");
791 let flat = flatten_instruction_set(&doc, "A64").expect("flatten should succeed");
792 assert_eq!(flat.len(), 1);
793
794 let add = &flat[0];
795 assert_eq!(add.mnemonic, "add");
796 assert_eq!(add.variant, "ADD_64_addsub_imm");
797 assert_eq!(add.fixed_value, 0b100100010u32 << 23);
798 assert_eq!(add.fields.len(), 4);
799 assert_eq!(add.fields[0].name, "sh");
800 assert_eq!(add.fields[0].lsb, 22);
801 assert_eq!(add.fields[1].name, "imm12");
802 assert_eq!(add.fields[1].lsb, 10);
803 }
804}