Skip to main content

jit_codegen/
core.rs

1use std::collections::{BTreeMap, BTreeSet};
2use std::fmt::Write;
3use std::fs;
4use std::path::{Path, PathBuf};
5
6use jit_spec::{FlatField, FlatInstruction};
7use thiserror::Error;
8
9#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord)]
10enum GeneratedOperandKind {
11    GprRegister,
12    Gpr32Register,
13    Gpr64Register,
14    SimdRegister,
15    SveZRegister,
16    PredicateRegister,
17    Immediate,
18    Condition,
19    ShiftKind,
20    ExtendKind,
21    SysRegPart,
22    Arrangement,
23    Lane,
24}
25
26impl GeneratedOperandKind {
27    fn as_rust(self) -> &'static str {
28        match self {
29            Self::GprRegister => "OperandConstraintKind::GprRegister",
30            Self::Gpr32Register => "OperandConstraintKind::Gpr32Register",
31            Self::Gpr64Register => "OperandConstraintKind::Gpr64Register",
32            Self::SimdRegister => "OperandConstraintKind::SimdRegister",
33            Self::SveZRegister => "OperandConstraintKind::SveZRegister",
34            Self::PredicateRegister => "OperandConstraintKind::PredicateRegister",
35            Self::Immediate => "OperandConstraintKind::Immediate",
36            Self::Condition => "OperandConstraintKind::Condition",
37            Self::ShiftKind => "OperandConstraintKind::ShiftKind",
38            Self::ExtendKind => "OperandConstraintKind::ExtendKind",
39            Self::SysRegPart => "OperandConstraintKind::SysRegPart",
40            Self::Arrangement => "OperandConstraintKind::Arrangement",
41            Self::Lane => "OperandConstraintKind::Lane",
42        }
43    }
44}
45
46#[derive(Debug, Copy, Clone, PartialEq, Eq)]
47enum VariantWidthHint {
48    W32,
49    W64,
50    Unknown,
51}
52
53#[derive(Debug, Copy, Clone, PartialEq, Eq)]
54struct OrderedField {
55    index: u8,
56    rank: u16,
57    kind: GeneratedOperandKind,
58}
59
60#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord)]
61struct GeneratedSplitImmediatePlan {
62    first_slot: u8,
63    second_slot: u8,
64    kind: GeneratedSplitImmediateKind,
65}
66
67#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord)]
68enum GeneratedSplitImmediateKind {
69    AdrLike {
70        immlo_field_index: u8,
71        immhi_field_index: u8,
72        scale: i64,
73    },
74    BitIndex6 {
75        b5_field_index: u8,
76        b40_field_index: u8,
77    },
78}
79
80#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord)]
81enum GeneratedMemoryAddressingConstraint {
82    None,
83    Offset,
84    PreIndex,
85    PostIndex,
86}
87
88#[derive(Debug, Clone)]
89struct PreparedVariant {
90    inst: FlatInstruction,
91    operand_order: Vec<u8>,
92    operand_kinds: Vec<GeneratedOperandKind>,
93    user_kinds: Vec<GeneratedOperandKind>,
94    implicit_defaults: Vec<(u8, i64)>,
95    memory_addressing: GeneratedMemoryAddressingConstraint,
96    field_scales: Vec<u16>,
97    split_plan: Option<GeneratedSplitImmediatePlan>,
98    gpr32_extend_compatibility: u64,
99    user_shape_key: u128,
100}
101
102impl GeneratedMemoryAddressingConstraint {
103    fn as_rust(self) -> &'static str {
104        match self {
105            Self::None => "MemoryAddressingConstraintSpec::None",
106            Self::Offset => "MemoryAddressingConstraintSpec::Offset",
107            Self::PreIndex => "MemoryAddressingConstraintSpec::PreIndex",
108            Self::PostIndex => "MemoryAddressingConstraintSpec::PostIndex",
109        }
110    }
111}
112
113#[derive(Debug, Clone)]
114struct InstructionContext {
115    opcode: u32,
116    opcode_mask: u32,
117    semantic_fields: Vec<String>,
118}
119
120impl InstructionContext {
121    fn from_instruction(inst: &FlatInstruction) -> Self {
122        let mut semantic_fields = Vec::with_capacity(inst.fields.len());
123        for field in &inst.fields {
124            let normalized = normalize_field_name(&field.name);
125            semantic_fields.push(semantic_field_name(&normalized).to_owned());
126        }
127
128        Self {
129            opcode: inst.fixed_value,
130            opcode_mask: inst.fixed_mask,
131            semantic_fields,
132        }
133    }
134
135    #[cfg(test)]
136    fn from_semantic_fields(opcode: u32, opcode_mask: u32, semantic_fields: Vec<String>) -> Self {
137        Self {
138            opcode,
139            opcode_mask,
140            semantic_fields,
141        }
142    }
143
144    fn has_field(&self, field_name: &str) -> bool {
145        self.semantic_fields.iter().any(|name| name == field_name)
146    }
147
148    fn bit_value(&self, bit: u8) -> Option<u8> {
149        let mask = 1u32 << bit;
150        if (self.opcode_mask & mask) == 0 {
151            return None;
152        }
153        Some(((self.opcode & mask) != 0) as u8)
154    }
155
156    fn memory_like(&self) -> bool {
157        let has_base = self.has_field("rn");
158        let has_data = self.has_field("rt")
159            || self.has_field("rt2")
160            || self.has_field("rt3")
161            || self.has_field("rt4");
162        let has_offset = self.has_field("imm7")
163            || self.has_field("imm9")
164            || self.has_field("imm12")
165            || self.has_field("rm")
166            || self.has_field("option")
167            || self.has_field("s")
168            || self.has_field("xs");
169        has_base && has_data && has_offset
170    }
171}
172
173#[inline]
174fn kind_shape_code(kind: GeneratedOperandKind) -> u8 {
175    match kind {
176        GeneratedOperandKind::GprRegister => 1,
177        GeneratedOperandKind::Gpr32Register => 2,
178        GeneratedOperandKind::Gpr64Register => 3,
179        GeneratedOperandKind::SimdRegister => 4,
180        GeneratedOperandKind::SveZRegister => 5,
181        GeneratedOperandKind::PredicateRegister => 6,
182        GeneratedOperandKind::Immediate => 7,
183        GeneratedOperandKind::Condition => 8,
184        GeneratedOperandKind::ShiftKind => 9,
185        GeneratedOperandKind::ExtendKind => 10,
186        GeneratedOperandKind::SysRegPart => 11,
187        GeneratedOperandKind::Arrangement => 12,
188        GeneratedOperandKind::Lane => 13,
189    }
190}
191
192#[inline]
193fn memory_shape_code(memory_addressing: GeneratedMemoryAddressingConstraint) -> Option<u8> {
194    match memory_addressing {
195        GeneratedMemoryAddressingConstraint::None => None,
196        GeneratedMemoryAddressingConstraint::Offset => Some(14),
197        GeneratedMemoryAddressingConstraint::PreIndex => Some(15),
198        // 0 stays distinct because encoded length is part of the shape key.
199        GeneratedMemoryAddressingConstraint::PostIndex => Some(0),
200    }
201}
202
203#[inline]
204fn encode_operand_shape_key(
205    kinds: &[GeneratedOperandKind],
206    memory_addressing: GeneratedMemoryAddressingConstraint,
207) -> Option<u128> {
208    let memory_kind = memory_shape_code(memory_addressing);
209    let total_len = kinds.len() + usize::from(memory_kind.is_some());
210    if total_len > 30 {
211        return None;
212    }
213
214    let mut key = total_len as u128;
215    for (idx, kind) in kinds.iter().copied().enumerate() {
216        let shift = 8 + (idx * 4);
217        key |= u128::from(kind_shape_code(kind)) << shift;
218    }
219    if let Some(memory_kind) = memory_kind {
220        let shift = 8 + (kinds.len() * 4);
221        key |= u128::from(memory_kind) << shift;
222    }
223    Some(key)
224}
225
226fn expected_user_operand_kinds(
227    operand_kinds: &[GeneratedOperandKind],
228    split_plan: Option<GeneratedSplitImmediatePlan>,
229) -> Vec<GeneratedOperandKind> {
230    let mut out = Vec::with_capacity(operand_kinds.len());
231    let mut slot = 0usize;
232    while slot < operand_kinds.len() {
233        if let Some(plan) = split_plan
234            && slot == usize::from(plan.first_slot)
235        {
236            out.push(GeneratedOperandKind::Immediate);
237            slot = slot.saturating_add(2);
238            continue;
239        }
240        out.push(operand_kinds[slot]);
241        slot += 1;
242    }
243    out
244}
245
246#[inline]
247fn generated_kind_matches(expected: GeneratedOperandKind, actual: GeneratedOperandKind) -> bool {
248    if expected == actual {
249        return true;
250    }
251
252    matches!(
253        (expected, actual),
254        (
255            GeneratedOperandKind::GprRegister,
256            GeneratedOperandKind::Gpr32Register
257        ) | (
258            GeneratedOperandKind::GprRegister,
259            GeneratedOperandKind::Gpr64Register
260        ) | (
261            GeneratedOperandKind::SysRegPart,
262            GeneratedOperandKind::Immediate
263        )
264    )
265}
266
267#[inline]
268fn generated_kind_matches_for_slot(
269    variant: &PreparedVariant,
270    slot: usize,
271    actual: GeneratedOperandKind,
272) -> bool {
273    let Some(expected) = variant.user_kinds.get(slot).copied() else {
274        return false;
275    };
276    if generated_kind_matches(expected, actual) {
277        return true;
278    }
279    if !(expected == GeneratedOperandKind::Gpr64Register
280        && actual == GeneratedOperandKind::Gpr32Register)
281    {
282        return false;
283    }
284    if slot >= u64::BITS as usize {
285        return false;
286    }
287    ((variant.gpr32_extend_compatibility >> slot) & 1) != 0
288}
289
290#[inline]
291fn generated_kind_specificity(kind: GeneratedOperandKind) -> u16 {
292    match kind {
293        GeneratedOperandKind::GprRegister => 3,
294        GeneratedOperandKind::Gpr32Register
295        | GeneratedOperandKind::Gpr64Register
296        | GeneratedOperandKind::SimdRegister
297        | GeneratedOperandKind::SveZRegister
298        | GeneratedOperandKind::PredicateRegister => 4,
299        GeneratedOperandKind::Immediate => 1,
300        GeneratedOperandKind::Condition
301        | GeneratedOperandKind::ShiftKind
302        | GeneratedOperandKind::ExtendKind
303        | GeneratedOperandKind::SysRegPart
304        | GeneratedOperandKind::Arrangement
305        | GeneratedOperandKind::Lane => 2,
306    }
307}
308
309#[inline]
310fn generated_variant_rank(variant: &PreparedVariant) -> u64 {
311    let fixed_bits = u64::from(
312        variant
313            .inst
314            .fixed_mask
315            .count_ones()
316            .min(u32::from(u16::MAX)) as u16,
317    );
318
319    let mut kind_specificity = 0u16;
320    let mut immediate_narrowness = 0u16;
321    for (slot, kind) in variant.operand_kinds.iter().copied().enumerate() {
322        kind_specificity = kind_specificity.saturating_add(generated_kind_specificity(kind));
323        if kind != GeneratedOperandKind::Immediate {
324            continue;
325        }
326        let Some(field_idx) = variant.operand_order.get(slot).copied() else {
327            continue;
328        };
329        let field_idx = usize::from(field_idx);
330        if let Some(field) = variant.inst.fields.get(field_idx) {
331            immediate_narrowness =
332                immediate_narrowness.saturating_add((64u16).saturating_sub(u16::from(field.width)));
333        }
334    }
335
336    let explicit_operands = variant
337        .operand_order
338        .len()
339        .saturating_sub(variant.implicit_defaults.len())
340        .min(usize::from(u8::MAX)) as u8;
341
342    let implicit_penalty = (u8::MAX as usize)
343        .saturating_sub(variant.implicit_defaults.len().min(usize::from(u8::MAX)))
344        as u8;
345
346    (fixed_bits << 48)
347        | (u64::from(kind_specificity) << 32)
348        | (u64::from(immediate_narrowness) << 16)
349        | (u64::from(explicit_operands) << 8)
350        | u64::from(implicit_penalty)
351}
352
353fn user_shape_candidates(
354    variants: &[PreparedVariant],
355    input_kinds: &[GeneratedOperandKind],
356    memory_addressing: Option<GeneratedMemoryAddressingConstraint>,
357) -> Vec<usize> {
358    let mut out = Vec::new();
359    for (idx, variant) in variants.iter().enumerate() {
360        if let Some(memory_addressing) = memory_addressing
361            && variant.memory_addressing != memory_addressing
362        {
363            continue;
364        }
365        if variant.user_kinds.len() != input_kinds.len() {
366            continue;
367        }
368        let mut compatible = true;
369        for (slot, actual) in input_kinds.iter().copied().enumerate() {
370            if !generated_kind_matches_for_slot(variant, slot, actual) {
371                compatible = false;
372                break;
373            }
374        }
375        if compatible {
376            out.push(idx);
377        }
378    }
379    out
380}
381
382fn sorted_flat_instructions(flat: &[FlatInstruction]) -> Vec<FlatInstruction> {
383    let mut ordered = flat.to_vec();
384    ordered.sort_by(|lhs, rhs| {
385        lhs.mnemonic
386            .cmp(&rhs.mnemonic)
387            .then(lhs.variant.cmp(&rhs.variant))
388    });
389    ordered
390}
391
392fn prepare_variants(flat: &[FlatInstruction]) -> Result<Vec<PreparedVariant>, CodegenError> {
393    let ordered = sorted_flat_instructions(flat);
394    let mut prepared = Vec::with_capacity(ordered.len());
395
396    for inst in ordered {
397        let (operand_order, operand_kinds, implicit_defaults) = derive_operand_metadata(&inst)?;
398        let memory_addressing = derive_memory_addressing_constraint(&inst);
399        let field_scales = derive_field_scales(&inst);
400        let split_plan = derive_split_immediate_plan(&inst, &operand_order, &operand_kinds);
401        let gpr32_extend_compatibility =
402            derive_gpr32_extend_compatibility(&inst, &operand_order, &operand_kinds);
403        let user_kinds = expected_user_operand_kinds(&operand_kinds, split_plan);
404        let user_shape_key =
405            encode_operand_shape_key(&user_kinds, memory_addressing).ok_or_else(|| {
406                CodegenError::Parse {
407                    path: inst.path.clone(),
408                    message: format!(
409                        "operand shape for variant {} exceeds encoder key capacity",
410                        inst.variant
411                    ),
412                }
413            })?;
414
415        prepared.push(PreparedVariant {
416            inst,
417            operand_order,
418            operand_kinds,
419            user_kinds,
420            implicit_defaults,
421            memory_addressing,
422            field_scales,
423            split_plan,
424            gpr32_extend_compatibility,
425            user_shape_key,
426        });
427    }
428
429    Ok(prepared)
430}
431
432#[inline]
433fn hash_mnemonic_with_seed(mnemonic: &str, seed: u64) -> u64 {
434    let mut hash = 0xcbf2_9ce4_8422_2325u64 ^ seed;
435    for byte in mnemonic.as_bytes() {
436        hash ^= u64::from(*byte);
437        hash = hash.wrapping_mul(0x100_0000_01b3);
438    }
439    hash
440}
441
442fn next_pow2(value: usize) -> usize {
443    if value <= 1 {
444        1
445    } else {
446        value.next_power_of_two()
447    }
448}
449
450fn build_mnemonic_perfect_hash(
451    mnemonics: &[String],
452) -> Result<(u64, usize, Vec<u16>), CodegenError> {
453    let count = mnemonics.len();
454    let mut table_size = next_pow2(count.saturating_mul(2));
455    if table_size < 8 {
456        table_size = 8;
457    }
458
459    for size_attempt in 0..6usize {
460        let size = table_size << size_attempt;
461        let mask = size - 1;
462        for seed in 0u64..200_000u64 {
463            let mut table = vec![u16::MAX; size];
464            let mut ok = true;
465            for (mnemonic_index, mnemonic) in mnemonics.iter().enumerate() {
466                let slot = (hash_mnemonic_with_seed(mnemonic, seed) as usize) & mask;
467                if table[slot] != u16::MAX {
468                    ok = false;
469                    break;
470                }
471                if mnemonic_index > usize::from(u16::MAX) {
472                    return Err(CodegenError::Parse {
473                        path: String::from("generated_specs"),
474                        message: String::from("too many mnemonics to fit in u16 dispatch table"),
475                    });
476                }
477                table[slot] = mnemonic_index as u16;
478            }
479            if ok {
480                return Ok((seed, size, table));
481            }
482        }
483    }
484
485    Err(CodegenError::Parse {
486        path: String::from("generated_specs"),
487        message: String::from("failed to build collision-free mnemonic hash table"),
488    })
489}
490
491fn usize_to_u16(value: usize, context: &str) -> Result<u16, CodegenError> {
492    u16::try_from(value).map_err(|_| CodegenError::Parse {
493        path: String::from("generated_specs"),
494        message: format!("{context} exceeds u16 capacity"),
495    })
496}
497
498fn field_signature(fields: &[FlatField]) -> String {
499    let mut sig = String::new();
500    for field in fields {
501        let _ = write!(
502            &mut sig,
503            "{}:{}:{}:{}|",
504            field.name, field.lsb, field.width, field.signed
505        );
506    }
507    sig
508}
509
510fn intern_pool<T: Clone + Ord>(
511    pool: &mut Vec<Vec<T>>,
512    map: &mut BTreeMap<Vec<T>, usize>,
513    value: &[T],
514) -> usize {
515    if let Some(existing) = map.get(value).copied() {
516        existing
517    } else {
518        let next = pool.len();
519        let owned = value.to_vec();
520        map.insert(owned.clone(), next);
521        pool.push(owned);
522        next
523    }
524}
525
526/// Errors emitted by codegen.
527#[derive(Debug, Error)]
528pub enum CodegenError {
529    /// No instruction variants were provided.
530    #[error("no instruction variants to generate")]
531    EmptyInput,
532    /// IO failure while scanning generated Rust source files.
533    #[error("io error: {0}")]
534    Io(#[from] std::io::Error),
535    /// Parse failure while extracting instruction metadata from Rust source.
536    #[error("parse error in {path}: {message}")]
537    Parse {
538        /// File path where parsing failed.
539        path: String,
540        /// Human-readable parse detail.
541        message: String,
542    },
543    /// Encountered an unmapped operand field while deriving constraints.
544    #[error("unmapped operand field in {variant}: {field} (width={width})")]
545    UnmappedOperandField {
546        /// Variant name.
547        variant: String,
548        /// Field name.
549        field: String,
550        /// Field width.
551        width: u8,
552    },
553}
554
555/// Generates a Rust module that declares encoding specs for all provided variants.
556///
557/// # Errors
558///
559/// Returns [`CodegenError`] when input is empty.
560pub fn generate_encoder_module(flat: &[FlatInstruction]) -> Result<String, CodegenError> {
561    if flat.is_empty() {
562        return Err(CodegenError::EmptyInput);
563    }
564
565    let prepared = prepare_variants(flat)?;
566    let mut field_pool = Vec::<Vec<FlatField>>::new();
567    let mut field_pool_map = BTreeMap::<String, usize>::new();
568    let mut field_pool_idx = Vec::<usize>::with_capacity(prepared.len());
569    let mut operand_order_pool = Vec::<Vec<u8>>::new();
570    let mut operand_order_pool_map = BTreeMap::<Vec<u8>, usize>::new();
571    let mut operand_order_pool_idx = Vec::<usize>::with_capacity(prepared.len());
572    let mut operand_kinds_pool = Vec::<Vec<GeneratedOperandKind>>::new();
573    let mut operand_kinds_pool_map = BTreeMap::<Vec<GeneratedOperandKind>, usize>::new();
574    let mut operand_kinds_pool_idx = Vec::<usize>::with_capacity(prepared.len());
575    let mut implicit_defaults_pool = Vec::<Vec<(u8, i64)>>::new();
576    let mut implicit_defaults_pool_map = BTreeMap::<Vec<(u8, i64)>, usize>::new();
577    let mut implicit_defaults_pool_idx = Vec::<usize>::with_capacity(prepared.len());
578    let mut field_scales_pool = Vec::<Vec<u16>>::new();
579    let mut field_scales_pool_map = BTreeMap::<Vec<u16>, usize>::new();
580    let mut field_scales_pool_idx = Vec::<usize>::with_capacity(prepared.len());
581    let mut split_plan_pool = Vec::<Option<GeneratedSplitImmediatePlan>>::new();
582    let mut split_plan_pool_map = BTreeMap::<Option<GeneratedSplitImmediatePlan>, usize>::new();
583    let mut split_plan_pool_idx = Vec::<usize>::with_capacity(prepared.len());
584
585    for variant in &prepared {
586        let sig = field_signature(&variant.inst.fields);
587        let idx = if let Some(existing) = field_pool_map.get(&sig).copied() {
588            existing
589        } else {
590            let next = field_pool.len();
591            field_pool_map.insert(sig, next);
592            field_pool.push(variant.inst.fields.clone());
593            next
594        };
595        field_pool_idx.push(idx);
596        operand_order_pool_idx.push(intern_pool(
597            &mut operand_order_pool,
598            &mut operand_order_pool_map,
599            &variant.operand_order,
600        ));
601        operand_kinds_pool_idx.push(intern_pool(
602            &mut operand_kinds_pool,
603            &mut operand_kinds_pool_map,
604            &variant.operand_kinds,
605        ));
606        implicit_defaults_pool_idx.push(intern_pool(
607            &mut implicit_defaults_pool,
608            &mut implicit_defaults_pool_map,
609            &variant.implicit_defaults,
610        ));
611        field_scales_pool_idx.push(intern_pool(
612            &mut field_scales_pool,
613            &mut field_scales_pool_map,
614            &variant.field_scales,
615        ));
616        let split_idx =
617            if let Some(existing) = split_plan_pool_map.get(&variant.split_plan).copied() {
618                existing
619            } else {
620                let next = split_plan_pool.len();
621                split_plan_pool_map.insert(variant.split_plan, next);
622                split_plan_pool.push(variant.split_plan);
623                next
624            };
625        split_plan_pool_idx.push(split_idx);
626    }
627
628    let mut out = String::new();
629    out.push_str("// @generated by jit-codegen. DO NOT EDIT.\n");
630    out.push_str(
631        "use jit_core::{BitFieldSpec, EncodeError, EncodingSpec, ImplicitField, InstructionCode, MemoryAddressingConstraintSpec, Operand, OperandConstraintKind, SplitImmediateKindSpec, SplitImmediatePlanSpec};\n\n",
632    );
633
634    for (idx, fields) in field_pool.iter().enumerate() {
635        let fields_ident = format!("FIELDS_{idx}");
636
637        writeln!(&mut out, "const {fields_ident}: &[BitFieldSpec] = &[").expect("write string");
638
639        for field in fields {
640            writeln!(
641                &mut out,
642                "    BitFieldSpec {{ name: {:?}, lsb: {}, width: {}, signed: {} }},",
643                field.name, field.lsb, field.width, field.signed
644            )
645            .expect("write string");
646        }
647
648        out.push_str("];\n\n");
649    }
650
651    for (idx, order) in operand_order_pool.iter().enumerate() {
652        let ident = format!("OPERAND_ORDER_{idx}");
653        write!(&mut out, "const {ident}: &[u8] = &[").expect("write string");
654        for (slot, value) in order.iter().copied().enumerate() {
655            if slot > 0 {
656                out.push_str(", ");
657            }
658            write!(&mut out, "{value}").expect("write string");
659        }
660        out.push_str("];\n");
661    }
662    out.push('\n');
663
664    for (idx, kinds) in operand_kinds_pool.iter().enumerate() {
665        let ident = format!("OPERAND_KINDS_{idx}");
666        write!(&mut out, "const {ident}: &[OperandConstraintKind] = &[").expect("write string");
667        for (slot, kind) in kinds.iter().copied().enumerate() {
668            if slot > 0 {
669                out.push_str(", ");
670            }
671            out.push_str(kind.as_rust());
672        }
673        out.push_str("];\n");
674    }
675    out.push('\n');
676
677    for (idx, defaults) in implicit_defaults_pool.iter().enumerate() {
678        let ident = format!("IMPLICIT_DEFAULTS_{idx}");
679        write!(&mut out, "const {ident}: &[ImplicitField] = &[").expect("write string");
680        for (slot, (field_index, value)) in defaults.iter().copied().enumerate() {
681            if slot > 0 {
682                out.push_str(", ");
683            }
684            write!(
685                &mut out,
686                "ImplicitField {{ field_index: {field_index}, value: {value} }}"
687            )
688            .expect("write string");
689        }
690        out.push_str("];\n");
691    }
692    out.push('\n');
693
694    for (idx, scales) in field_scales_pool.iter().enumerate() {
695        let ident = format!("FIELD_SCALES_{idx}");
696        write!(&mut out, "const {ident}: &[u16] = &[").expect("write string");
697        for (slot, scale) in scales.iter().copied().enumerate() {
698            if slot > 0 {
699                out.push_str(", ");
700            }
701            write!(&mut out, "{scale}").expect("write string");
702        }
703        out.push_str("];\n");
704    }
705    out.push('\n');
706
707    for (idx, split_plan) in split_plan_pool.iter().copied().enumerate() {
708        let ident = format!("SPLIT_PLAN_{idx}");
709        let rhs = match split_plan {
710            Some(GeneratedSplitImmediatePlan {
711                first_slot,
712                second_slot,
713                kind:
714                    GeneratedSplitImmediateKind::AdrLike {
715                        immlo_field_index,
716                        immhi_field_index,
717                        scale,
718                    },
719            }) => format!(
720                "Some(SplitImmediatePlanSpec {{ first_slot: {first_slot}, second_slot: {second_slot}, kind: SplitImmediateKindSpec::AdrLike {{ immlo_field_index: {immlo_field_index}, immhi_field_index: {immhi_field_index}, scale: {scale} }} }})"
721            ),
722            Some(GeneratedSplitImmediatePlan {
723                first_slot,
724                second_slot,
725                kind:
726                    GeneratedSplitImmediateKind::BitIndex6 {
727                        b5_field_index,
728                        b40_field_index,
729                    },
730            }) => format!(
731                "Some(SplitImmediatePlanSpec {{ first_slot: {first_slot}, second_slot: {second_slot}, kind: SplitImmediateKindSpec::BitIndex6 {{ b5_field_index: {b5_field_index}, b40_field_index: {b40_field_index} }} }})"
732            ),
733            None => String::from("None"),
734        };
735        writeln!(
736            &mut out,
737            "const {ident}: Option<SplitImmediatePlanSpec> = {rhs};"
738        )
739        .expect("write string");
740    }
741    out.push('\n');
742
743    out.push_str(
744        "\
745const fn make_spec(\n\
746    mnemonic: &'static str,\n\
747    variant: &'static str,\n\
748    opcode: u32,\n\
749    opcode_mask: u32,\n\
750    fields: &'static [BitFieldSpec],\n\
751    operand_order: &'static [u8],\n\
752    operand_kinds: &'static [OperandConstraintKind],\n\
753    implicit_defaults: &'static [ImplicitField],\n\
754    memory_addressing: MemoryAddressingConstraintSpec,\n\
755    field_scales: &'static [u16],\n\
756    split_immediate_plan: Option<SplitImmediatePlanSpec>,\n\
757    gpr32_extend_compatibility: u64,\n\
758) -> EncodingSpec {\n\
759    EncodingSpec {\n\
760        mnemonic,\n\
761        variant,\n\
762        opcode,\n\
763        opcode_mask,\n\
764        fields,\n\
765        operand_order,\n\
766        operand_kinds,\n\
767        implicit_defaults,\n\
768        memory_addressing,\n\
769        field_scales,\n\
770        split_immediate_plan,\n\
771        gpr32_extend_compatibility,\n\
772    }\n\
773}\n\n",
774    );
775
776    out.push_str(
777        "\
778#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]\n\
779/// Opaque identifier of one canonical instruction variant.\n\
780pub struct VariantId(pub u16);\n\n\
781#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]\n\
782/// Opaque identifier of one canonical mnemonic dispatch bucket.\n\
783pub struct MnemonicId(pub u16);\n\n",
784    );
785
786    let mut variant_spec_exprs = Vec::<String>::with_capacity(prepared.len());
787    for (idx, variant) in prepared.iter().enumerate() {
788        let inst = &variant.inst;
789        let fields_ident = format!("FIELDS_{}", field_pool_idx[idx]);
790        let operand_order_ident = format!("OPERAND_ORDER_{}", operand_order_pool_idx[idx]);
791        let operand_kinds_ident = format!("OPERAND_KINDS_{}", operand_kinds_pool_idx[idx]);
792        let implicit_defaults_ident =
793            format!("IMPLICIT_DEFAULTS_{}", implicit_defaults_pool_idx[idx]);
794        let field_scales_ident = format!("FIELD_SCALES_{}", field_scales_pool_idx[idx]);
795        let split_plan_ident = format!("SPLIT_PLAN_{}", split_plan_pool_idx[idx]);
796        variant_spec_exprs.push(format!(
797            "make_spec({:?}, {:?}, 0x{:08x}, 0x{:08x}, {fields_ident}, {operand_order_ident}, {operand_kinds_ident}, {implicit_defaults_ident}, {}, {field_scales_ident}, {split_plan_ident}, {}u64)",
798            inst.mnemonic,
799            inst.variant,
800            inst.fixed_value,
801            inst.fixed_mask,
802            variant.memory_addressing.as_rust(),
803            variant.gpr32_extend_compatibility
804        ));
805    }
806
807    out.push_str("/// Generated encoding specs.\n");
808    out.push_str("pub static SPECS: &[EncodingSpec] = &[\n");
809    for expr in &variant_spec_exprs {
810        writeln!(&mut out, "    {expr},").expect("write string");
811    }
812    out.push_str("];\n");
813
814    out.push_str("/// Total number of generated canonical variants.\n");
815    writeln!(
816        &mut out,
817        "pub const VARIANT_COUNT: usize = {};\n",
818        prepared.len()
819    )
820    .expect("write string");
821
822    out.push_str(
823        "\
824#[derive(Debug, Copy, Clone, PartialEq, Eq)]\n\
825pub(crate) struct MnemonicDispatchEntry {\n\
826    pub name: &'static str,\n\
827    pub spec_start: u16,\n\
828    pub spec_len: u16,\n\
829    pub shape_start: u16,\n\
830    pub shape_len: u16,\n\
831}\n\n\
832#[derive(Debug, Copy, Clone, PartialEq, Eq)]\n\
833pub(crate) struct ShapeDispatchEntry {\n\
834    pub key: u128,\n\
835    pub start: u16,\n\
836    pub len: u16,\n\
837}\n\n",
838    );
839
840    #[derive(Debug)]
841    struct MnemonicEntry {
842        name: String,
843        spec_start: u16,
844        spec_len: u16,
845        shape_start: u16,
846        shape_len: u16,
847    }
848
849    let mut mnemonic_entries = Vec::<MnemonicEntry>::new();
850    let mut shape_entries = Vec::<(u128, u16, u16)>::new();
851    let mut shape_variant_ids = Vec::<u16>::new();
852
853    let mut idx = 0usize;
854    while idx < prepared.len() {
855        let start = idx;
856        let mnemonic = prepared[idx].inst.mnemonic.clone();
857        idx += 1;
858        while idx < prepared.len() && prepared[idx].inst.mnemonic == mnemonic {
859            idx += 1;
860        }
861        let end = idx;
862
863        let shape_start = usize_to_u16(shape_entries.len(), "shape entry start")?;
864        let mnemonic_variants = &prepared[start..end];
865        let mut input_shapes = BTreeMap::<
866            u128,
867            (
868                Vec<GeneratedOperandKind>,
869                GeneratedMemoryAddressingConstraint,
870            ),
871        >::new();
872        for variant in mnemonic_variants {
873            input_shapes
874                .entry(variant.user_shape_key)
875                .or_insert_with(|| (variant.user_kinds.clone(), variant.memory_addressing));
876        }
877
878        for (shape_key, (input_kinds, memory_addressing)) in input_shapes {
879            let mut candidates =
880                user_shape_candidates(mnemonic_variants, &input_kinds, Some(memory_addressing));
881            if candidates.is_empty() {
882                continue;
883            }
884            candidates.sort_by(|lhs, rhs| {
885                let left = generated_variant_rank(&mnemonic_variants[*lhs]);
886                let right = generated_variant_rank(&mnemonic_variants[*rhs]);
887                right.cmp(&left)
888            });
889            let start_variant_idx = usize_to_u16(shape_variant_ids.len(), "shape variant start")?;
890            let variants_len = usize_to_u16(candidates.len(), "shape variant len")?;
891            for local_variant_idx in candidates {
892                shape_variant_ids.push(usize_to_u16(start + local_variant_idx, "variant id")?);
893            }
894            shape_entries.push((shape_key, start_variant_idx, variants_len));
895        }
896        let shape_end = usize_to_u16(shape_entries.len(), "shape entry end")?;
897
898        mnemonic_entries.push(MnemonicEntry {
899            name: mnemonic,
900            spec_start: usize_to_u16(start, "mnemonic spec start")?,
901            spec_len: usize_to_u16(end - start, "mnemonic spec len")?,
902            shape_start,
903            shape_len: shape_end - shape_start,
904        });
905    }
906
907    out.push_str("pub(crate) static SHAPE_VARIANT_IDS: &[VariantId] = &[\n");
908    for variant_id in &shape_variant_ids {
909        writeln!(&mut out, "    VariantId({variant_id}),").expect("write string");
910    }
911    out.push_str("];\n");
912
913    out.push_str("pub(crate) static SHAPE_DISPATCH: &[ShapeDispatchEntry] = &[\n");
914    for (shape_key, start, len) in &shape_entries {
915        writeln!(
916            &mut out,
917            "    ShapeDispatchEntry {{ key: {shape_key}u128, start: {start}, len: {len} }},"
918        )
919        .expect("write string");
920    }
921    out.push_str("];\n");
922
923    out.push_str("pub(crate) static MNEMONIC_DISPATCH: &[MnemonicDispatchEntry] = &[\n");
924    for entry in &mnemonic_entries {
925        writeln!(
926            &mut out,
927            "    MnemonicDispatchEntry {{ name: {:?}, spec_start: {}, spec_len: {}, shape_start: {}, shape_len: {} }},",
928            entry.name, entry.spec_start, entry.spec_len, entry.shape_start, entry.shape_len
929        )
930        .expect("write string");
931    }
932    out.push_str("];\n\n");
933
934    let mnemonic_names = mnemonic_entries
935        .iter()
936        .map(|entry| entry.name.clone())
937        .collect::<Vec<_>>();
938    let (hash_seed, hash_size, hash_table) = build_mnemonic_perfect_hash(&mnemonic_names)?;
939
940    writeln!(
941        &mut out,
942        "const MNEMONIC_HASH_SEED: u64 = {hash_seed}u64;\nconst MNEMONIC_HASH_MASK: usize = {};\n",
943        hash_size - 1
944    )
945    .expect("write string");
946
947    out.push_str("pub(crate) static MNEMONIC_HASH_TABLE: &[u16] = &[\n");
948    for slot in hash_table {
949        writeln!(&mut out, "    {slot},").expect("write string");
950    }
951    out.push_str("];\n\n");
952
953    out.push_str(
954        "\
955#[inline]\n\
956fn hash_mnemonic(mnemonic: &str) -> u64 {\n\
957    let mut hash = 0xcbf2_9ce4_8422_2325u64 ^ MNEMONIC_HASH_SEED;\n\
958    for byte in mnemonic.as_bytes() {\n\
959        hash ^= u64::from(*byte);\n\
960        hash = hash.wrapping_mul(0x100_0000_01b3);\n\
961    }\n\
962    hash\n\
963}\n\n\
964pub(crate) fn mnemonic_id_from_str(mnemonic: &str) -> Option<MnemonicId> {\n\
965    let slot = (hash_mnemonic(mnemonic) as usize) & MNEMONIC_HASH_MASK;\n\
966    let entry = *MNEMONIC_HASH_TABLE.get(slot)?;\n\
967    if entry == u16::MAX {\n\
968        return None;\n\
969    }\n\
970    let index = usize::from(entry);\n\
971    let dispatch = *MNEMONIC_DISPATCH.get(index)?;\n\
972    if dispatch.name == mnemonic {\n\
973        Some(MnemonicId(entry))\n\
974    } else {\n\
975        None\n\
976    }\n\
977}\n\n\
978pub(crate) fn mnemonic_name(id: MnemonicId) -> Option<&'static str> {\n\
979    let entry = *MNEMONIC_DISPATCH.get(usize::from(id.0))?;\n\
980    Some(entry.name)\n\
981}\n\n\
982pub(crate) fn specs_for_mnemonic_id(id: MnemonicId) -> Option<&'static [EncodingSpec]> {\n\
983    let entry = *MNEMONIC_DISPATCH.get(usize::from(id.0))?;\n\
984    let start = usize::from(entry.spec_start);\n\
985    let len = usize::from(entry.spec_len);\n\
986    Some(&SPECS[start..start + len])\n\
987}\n\n\
988#[derive(Debug, Copy, Clone, PartialEq, Eq)]\n\
989pub(crate) enum ShapeVariantMatch {\n\
990    Unique(VariantId),\n\
991    Ambiguous,\n\
992}\n\n\
993#[inline]\n\
994fn lookup_shape_dispatch_entry(id: MnemonicId, shape_key: u128) -> Option<ShapeDispatchEntry> {\n\
995    let entry = *MNEMONIC_DISPATCH.get(usize::from(id.0))?;\n\
996    let shape_start = usize::from(entry.shape_start);\n\
997    let shape_end = shape_start + usize::from(entry.shape_len);\n\
998    let shapes = &SHAPE_DISPATCH[shape_start..shape_end];\n\
999    let mut left = 0usize;\n\
1000    let mut right = shapes.len();\n\
1001    while left < right {\n\
1002        let mid = left + ((right - left) >> 1);\n\
1003        let probe = shapes[mid].key;\n\
1004        if probe < shape_key {\n\
1005            left = mid + 1;\n\
1006            continue;\n\
1007        }\n\
1008        if probe > shape_key {\n\
1009            right = mid;\n\
1010            continue;\n\
1011        }\n\
1012        return Some(shapes[mid]);\n\
1013    }\n\
1014    None\n\
1015}\n\n\
1016pub(crate) fn variant_match_for_shape(id: MnemonicId, shape_key: u128) -> Option<ShapeVariantMatch> {\n\
1017    let entry = lookup_shape_dispatch_entry(id, shape_key)?;\n\
1018    let start = usize::from(entry.start);\n\
1019    let len = usize::from(entry.len);\n\
1020    if len == 1 {\n\
1021        return Some(ShapeVariantMatch::Unique(SHAPE_VARIANT_IDS[start]));\n\
1022    }\n\
1023    Some(ShapeVariantMatch::Ambiguous)\n\
1024}\n\n\
1025#[allow(dead_code)]\n\
1026pub(crate) fn variants_for_shape(id: MnemonicId, shape_key: u128) -> Option<&'static [VariantId]> {\n\
1027    let entry = lookup_shape_dispatch_entry(id, shape_key)?;\n\
1028    let start = usize::from(entry.start);\n\
1029    let len = usize::from(entry.len);\n\
1030    Some(&SHAPE_VARIANT_IDS[start..start + len])\n\
1031}\n\n\
1032pub(crate) fn encode_variant(id: VariantId, operands: &[Operand]) -> Result<InstructionCode, EncodeError> {\n\
1033    let index = usize::from(id.0);\n\
1034    let spec = SPECS.get(index).ok_or(EncodeError::NoMatchingVariant)?;\n\
1035    jit_core::encode_by_spec_operands(spec, operands)\n\
1036}\n\n\
1037pub(crate) fn spec_for_variant(id: VariantId) -> Option<&'static EncodingSpec> {\n\
1038    SPECS.get(usize::from(id.0))\n\
1039}\n\n\
1040pub(crate) fn specs_for_mnemonic(mnemonic: &str) -> Option<&'static [EncodingSpec]> {\n\
1041    let id = mnemonic_id_from_str(mnemonic)?;\n\
1042    specs_for_mnemonic_id(id)\n\
1043}\n",
1044    );
1045
1046    out.push_str(
1047        "\
1048pub(crate) fn mnemonic_name_const<const MNEMONIC: u16>() -> Option<&'static str> {\n\
1049    let index = MNEMONIC as usize;\n\
1050    let entry = *MNEMONIC_DISPATCH.get(index)?;\n\
1051    Some(entry.name)\n\
1052}\n\n\
1053pub(crate) fn specs_for_mnemonic_id_const<const MNEMONIC: u16>() -> Option<&'static [EncodingSpec]> {\n\
1054    let index = MNEMONIC as usize;\n\
1055    let entry = *MNEMONIC_DISPATCH.get(index)?;\n\
1056    let start = usize::from(entry.spec_start);\n\
1057    let len = usize::from(entry.spec_len);\n\
1058    Some(&SPECS[start..start + len])\n\
1059}\n\n\
1060pub(crate) fn spec_for_variant_const<const VARIANT: u16>() -> Option<&'static EncodingSpec> {\n\
1061    SPECS.get(VARIANT as usize)\n\
1062}\n\n\
1063pub(crate) fn encode_variant_const<const VARIANT: u16>(operands: &[Operand]) -> Result<InstructionCode, EncodeError> {\n\
1064    let spec = spec_for_variant_const::<VARIANT>().ok_or(EncodeError::NoMatchingVariant)?;\n\
1065    jit_core::encode_by_spec_operands(spec, operands)\n\
1066}\n",
1067    );
1068
1069    Ok(out)
1070}
1071
1072/// Generates a Rust module that declares JIT macro normalization rule tables.
1073///
1074/// # Errors
1075///
1076/// Returns [`CodegenError`] when alias metadata cannot be derived from input variants.
1077pub fn generate_macro_normalization_module(
1078    flat: &[FlatInstruction],
1079) -> Result<String, CodegenError> {
1080    let mut shift_to_imm = BTreeSet::<String>::new();
1081    let mut sysreg_gpr_swap = BTreeSet::<String>::new();
1082    let mut reloc_b26 = BTreeSet::<String>::new();
1083    let mut reloc_bcond19 = BTreeSet::<String>::new();
1084    let mut reloc_cbz19 = BTreeSet::<String>::new();
1085    let mut reloc_imm19 = BTreeSet::<String>::new();
1086    let mut reloc_tbz14 = BTreeSet::<String>::new();
1087    let mut reloc_adr21 = BTreeSet::<String>::new();
1088    let mut reloc_adrp21 = BTreeSet::<String>::new();
1089    for inst in flat {
1090        if variant_supports_shift_to_immediate_normalization(inst)? {
1091            shift_to_imm.insert(inst.mnemonic.clone());
1092        }
1093        if variant_supports_sysreg_gpr_swap_normalization(inst)? {
1094            sysreg_gpr_swap.insert(inst.mnemonic.clone());
1095        }
1096        if let Some(kind) = variant_macro_reloc_kind(inst)? {
1097            match kind {
1098                MacroRelocKind::B26 => {
1099                    reloc_b26.insert(inst.mnemonic.clone());
1100                }
1101                MacroRelocKind::BCond19 => {
1102                    reloc_bcond19.insert(inst.mnemonic.clone());
1103                }
1104                MacroRelocKind::Cbz19 => {
1105                    reloc_cbz19.insert(inst.mnemonic.clone());
1106                }
1107                MacroRelocKind::Imm19 => {
1108                    reloc_imm19.insert(inst.mnemonic.clone());
1109                }
1110                MacroRelocKind::Tbz14 => {
1111                    reloc_tbz14.insert(inst.mnemonic.clone());
1112                }
1113                MacroRelocKind::Adr21 => {
1114                    reloc_adr21.insert(inst.mnemonic.clone());
1115                }
1116                MacroRelocKind::Adrp21 => {
1117                    reloc_adrp21.insert(inst.mnemonic.clone());
1118                }
1119            }
1120        }
1121    }
1122    let prepared = prepare_variants(flat)?;
1123
1124    let mut mnemonic_id_rules = Vec::<(String, u16)>::new();
1125    let mut seen_mnemonics = BTreeSet::<String>::new();
1126    for variant in &prepared {
1127        let mnemonic = variant.inst.mnemonic.clone();
1128        if seen_mnemonics.insert(mnemonic.clone()) {
1129            let id = usize_to_u16(mnemonic_id_rules.len(), "macro mnemonic id")?;
1130            mnemonic_id_rules.push((mnemonic, id));
1131        }
1132    }
1133
1134    let mnemonic_id_map = mnemonic_id_rules
1135        .iter()
1136        .map(|(mnemonic, id)| (mnemonic.clone(), *id))
1137        .collect::<BTreeMap<_, _>>();
1138
1139    let mut direct_variant_rules = BTreeMap::<(u16, u128), Option<u16>>::new();
1140    let mut start = 0usize;
1141    while start < prepared.len() {
1142        let mnemonic = prepared[start].inst.mnemonic.clone();
1143        let mut end = start + 1;
1144        while end < prepared.len() && prepared[end].inst.mnemonic == mnemonic {
1145            end += 1;
1146        }
1147
1148        let mnemonic_variants = &prepared[start..end];
1149        let mut input_shapes = BTreeMap::<
1150            u128,
1151            (
1152                Vec<GeneratedOperandKind>,
1153                GeneratedMemoryAddressingConstraint,
1154            ),
1155        >::new();
1156        for variant in mnemonic_variants {
1157            input_shapes
1158                .entry(variant.user_shape_key)
1159                .or_insert_with(|| (variant.user_kinds.clone(), variant.memory_addressing));
1160        }
1161
1162        for (shape_key, (input_kinds, memory_mode)) in input_shapes {
1163            let candidates =
1164                user_shape_candidates(mnemonic_variants, &input_kinds, Some(memory_mode));
1165            if candidates.is_empty() {
1166                continue;
1167            }
1168            let mnemonic_id = *mnemonic_id_map
1169                .get(&mnemonic)
1170                .expect("mnemonic id must exist for prepared mnemonic");
1171            if candidates.len() != 1 {
1172                continue;
1173            }
1174            let local_variant = candidates[0];
1175            let global_variant = start + local_variant;
1176            let variant_id = usize_to_u16(global_variant, "macro direct variant id")?;
1177            let macro_shape_key = shape_key;
1178
1179            let entry = direct_variant_rules
1180                .entry((mnemonic_id, macro_shape_key))
1181                .or_insert(Some(variant_id));
1182            if let Some(existing) = *entry
1183                && existing != variant_id
1184            {
1185                *entry = None;
1186            }
1187        }
1188        start = end;
1189    }
1190    let mut direct_variant_rules = direct_variant_rules
1191        .into_iter()
1192        .filter_map(|((mnemonic_id, shape_key), variant)| {
1193            variant.map(|variant_id| (mnemonic_id, shape_key, variant_id))
1194        })
1195        .collect::<Vec<_>>();
1196    direct_variant_rules.sort_by(|lhs, rhs| lhs.0.cmp(&rhs.0).then(lhs.1.cmp(&rhs.1)));
1197
1198    const NORM_FLAG_SHIFT_TO_IMMEDIATE: u8 = 1u8 << 0;
1199    const NORM_FLAG_SYSREG_GPR_SWAP: u8 = 1u8 << 1;
1200
1201    const RELOC_MASK_B26: u8 = 1u8 << 0;
1202    const RELOC_MASK_BCOND19: u8 = 1u8 << 1;
1203    const RELOC_MASK_CBZ19: u8 = 1u8 << 2;
1204    const RELOC_MASK_IMM19: u8 = 1u8 << 3;
1205    const RELOC_MASK_TBZ14: u8 = 1u8 << 4;
1206    const RELOC_MASK_ADR21: u8 = 1u8 << 5;
1207    const RELOC_MASK_ADRP21: u8 = 1u8 << 6;
1208
1209    let mut mnemonic_normalization_rules = Vec::<(u16, u8, u8)>::new();
1210    for (mnemonic, mnemonic_id) in &mnemonic_id_rules {
1211        let mut flags = 0u8;
1212        if shift_to_imm.contains(mnemonic) {
1213            flags |= NORM_FLAG_SHIFT_TO_IMMEDIATE;
1214        }
1215        if sysreg_gpr_swap.contains(mnemonic) {
1216            flags |= NORM_FLAG_SYSREG_GPR_SWAP;
1217        }
1218
1219        let mut reloc_mask = 0u8;
1220        if reloc_b26.contains(mnemonic) {
1221            reloc_mask |= RELOC_MASK_B26;
1222        }
1223        if reloc_bcond19.contains(mnemonic) {
1224            reloc_mask |= RELOC_MASK_BCOND19;
1225        }
1226        if reloc_cbz19.contains(mnemonic) {
1227            reloc_mask |= RELOC_MASK_CBZ19;
1228        }
1229        if reloc_imm19.contains(mnemonic) {
1230            reloc_mask |= RELOC_MASK_IMM19;
1231        }
1232        if reloc_tbz14.contains(mnemonic) {
1233            reloc_mask |= RELOC_MASK_TBZ14;
1234        }
1235        if reloc_adr21.contains(mnemonic) {
1236            reloc_mask |= RELOC_MASK_ADR21;
1237        }
1238        if reloc_adrp21.contains(mnemonic) {
1239            reloc_mask |= RELOC_MASK_ADRP21;
1240        }
1241
1242        if flags != 0 || reloc_mask != 0 {
1243            mnemonic_normalization_rules.push((*mnemonic_id, flags, reloc_mask));
1244        }
1245    }
1246    mnemonic_normalization_rules.sort_by(|lhs, rhs| lhs.0.cmp(&rhs.0));
1247
1248    let conditional_branch_alias_rules = collect_conditional_branch_alias_rules(flat)?;
1249
1250    let mut out = String::new();
1251    out.push_str("// @generated by jit-codegen. DO NOT EDIT.\n");
1252    out.push_str(
1253        "\
1254#[derive(Debug, Copy, Clone, PartialEq, Eq)]\n\
1255pub(crate) struct MnemonicIdRule {\n\
1256    pub mnemonic: &'static str,\n\
1257    pub id: u16,\n\
1258}\n\n\
1259#[derive(Debug, Copy, Clone, PartialEq, Eq)]\n\
1260pub(crate) struct DirectVariantRule {\n\
1261    pub mnemonic_id: u16,\n\
1262    pub shape_key: u128,\n\
1263    pub variant_id: u16,\n\
1264}\n\n\
1265#[derive(Debug, Copy, Clone, PartialEq, Eq)]\n\
1266pub(crate) struct MnemonicNormalizationRule {\n\
1267    pub mnemonic_id: u16,\n\
1268    pub flags: u8,\n\
1269    pub reloc_mask: u8,\n\
1270}\n\n\
1271#[derive(Debug, Copy, Clone, PartialEq, Eq)]\n\
1272pub(crate) struct ConditionalBranchAliasRule {\n\
1273    pub alias: &'static str,\n\
1274    pub base_mnemonic: &'static str,\n\
1275    pub condition_code: u8,\n\
1276}\n\n\
1277pub(crate) static MNEMONIC_ID_RULES: &[MnemonicIdRule] = &[\n",
1278    );
1279    for (mnemonic, id) in &mnemonic_id_rules {
1280        writeln!(
1281            &mut out,
1282            "    MnemonicIdRule {{ mnemonic: {:?}, id: {} }},",
1283            mnemonic, id
1284        )
1285        .expect("write string");
1286    }
1287    out.push_str("];\n");
1288    out.push_str("pub(crate) static DIRECT_VARIANT_RULES: &[DirectVariantRule] = &[\n");
1289    for (mnemonic_id, shape_key, variant_id) in &direct_variant_rules {
1290        writeln!(
1291            &mut out,
1292            "    DirectVariantRule {{ mnemonic_id: {}, shape_key: {}u128, variant_id: {} }},",
1293            mnemonic_id, shape_key, variant_id
1294        )
1295        .expect("write string");
1296    }
1297    out.push_str("];\n");
1298    out.push_str("pub(crate) const NORM_FLAG_SHIFT_TO_IMMEDIATE: u8 = 1u8 << 0;\n");
1299    out.push_str("pub(crate) const NORM_FLAG_SYSREG_GPR_SWAP: u8 = 1u8 << 1;\n");
1300    out.push_str("pub(crate) const RELOC_MASK_B26: u8 = 1u8 << 0;\n");
1301    out.push_str("pub(crate) const RELOC_MASK_BCOND19: u8 = 1u8 << 1;\n");
1302    out.push_str("pub(crate) const RELOC_MASK_CBZ19: u8 = 1u8 << 2;\n");
1303    out.push_str("pub(crate) const RELOC_MASK_IMM19: u8 = 1u8 << 3;\n");
1304    out.push_str("pub(crate) const RELOC_MASK_TBZ14: u8 = 1u8 << 4;\n");
1305    out.push_str("pub(crate) const RELOC_MASK_ADR21: u8 = 1u8 << 5;\n");
1306    out.push_str("pub(crate) const RELOC_MASK_ADRP21: u8 = 1u8 << 6;\n");
1307
1308    out.push_str(
1309        "pub(crate) static MNEMONIC_NORMALIZATION_RULES: &[MnemonicNormalizationRule] = &[\n",
1310    );
1311    for (mnemonic_id, flags, reloc_mask) in &mnemonic_normalization_rules {
1312        writeln!(
1313            &mut out,
1314            "    MnemonicNormalizationRule {{ mnemonic_id: {}, flags: {}, reloc_mask: {} }},",
1315            mnemonic_id, flags, reloc_mask
1316        )
1317        .expect("write string");
1318    }
1319    out.push_str("];\n");
1320
1321    out.push_str(
1322        "pub(crate) static CONDITIONAL_BRANCH_ALIAS_RULES: &[ConditionalBranchAliasRule] = &[\n",
1323    );
1324    for (alias, base_mnemonic, condition_code) in &conditional_branch_alias_rules {
1325        writeln!(
1326            &mut out,
1327            "    ConditionalBranchAliasRule {{ alias: {:?}, base_mnemonic: {:?}, condition_code: {} }},",
1328            alias, base_mnemonic, condition_code
1329        )
1330        .expect("write string");
1331    }
1332    out.push_str("];\n");
1333    Ok(out)
1334}
1335
1336#[derive(Copy, Clone, Debug, PartialEq, Eq)]
1337enum MacroRelocKind {
1338    B26,
1339    BCond19,
1340    Cbz19,
1341    Imm19,
1342    Tbz14,
1343    Adr21,
1344    Adrp21,
1345}
1346
1347fn is_gpr_generated_kind(kind: GeneratedOperandKind) -> bool {
1348    matches!(
1349        kind,
1350        GeneratedOperandKind::GprRegister
1351            | GeneratedOperandKind::Gpr32Register
1352            | GeneratedOperandKind::Gpr64Register
1353    )
1354}
1355
1356fn variant_supports_shift_to_immediate_normalization(
1357    inst: &FlatInstruction,
1358) -> Result<bool, CodegenError> {
1359    let context = InstructionContext::from_instruction(inst);
1360    let (_, kinds, implicit_defaults) = derive_operand_metadata(inst)?;
1361    if kinds.len() != 4
1362        || !is_gpr_generated_kind(kinds[0])
1363        || !is_gpr_generated_kind(kinds[1])
1364        || !is_gpr_generated_kind(kinds[2])
1365        || kinds[3] != GeneratedOperandKind::Immediate
1366    {
1367        return Ok(false);
1368    }
1369
1370    let has_shift_default = implicit_defaults.iter().any(|(field_index, value)| {
1371        if *value != 0 {
1372            return false;
1373        }
1374        let idx = *field_index as usize;
1375        if idx >= inst.fields.len() {
1376            return false;
1377        }
1378        let normalized = normalize_field_name(&inst.fields[idx].name);
1379        semantic_field_name(&normalized) == "shift"
1380    });
1381
1382    if !has_shift_default {
1383        return Ok(false);
1384    }
1385
1386    // Normalize only register forms (not memory-like variants).
1387    Ok(!context.memory_like())
1388}
1389
1390fn variant_supports_sysreg_gpr_swap_normalization(
1391    inst: &FlatInstruction,
1392) -> Result<bool, CodegenError> {
1393    let (_, kinds, _) = derive_operand_metadata(inst)?;
1394    let mut gpr_count = 0usize;
1395    let mut sysreg_part_count = 0usize;
1396    let mut immediate_count = 0usize;
1397
1398    for kind in kinds {
1399        match kind {
1400            GeneratedOperandKind::GprRegister
1401            | GeneratedOperandKind::Gpr32Register
1402            | GeneratedOperandKind::Gpr64Register => {
1403                gpr_count += 1;
1404            }
1405            GeneratedOperandKind::SysRegPart => {
1406                sysreg_part_count += 1;
1407            }
1408            GeneratedOperandKind::Immediate => {
1409                immediate_count += 1;
1410            }
1411            _ => return Ok(false),
1412        }
1413    }
1414
1415    Ok(gpr_count == 1 && sysreg_part_count == 4 && immediate_count == 1)
1416}
1417
1418fn semantic_field_exists(inst: &FlatInstruction, semantic_name: &str) -> bool {
1419    inst.fields.iter().any(|field| {
1420        let normalized = normalize_field_name(&field.name);
1421        semantic_field_name(&normalized) == semantic_name
1422    })
1423}
1424
1425fn opcode_pattern_matches(inst: &FlatInstruction, mask: u32, value: u32) -> bool {
1426    (inst.fixed_mask & mask) == mask && (inst.fixed_value & mask) == value
1427}
1428
1429fn variant_macro_reloc_kind(
1430    inst: &FlatInstruction,
1431) -> Result<Option<MacroRelocKind>, CodegenError> {
1432    let (_, kinds, _) = derive_operand_metadata(inst)?;
1433
1434    let has_imm26 = semantic_field_exists(inst, "imm26");
1435    let has_imm19 = semantic_field_exists(inst, "imm19");
1436    let has_imm14 = semantic_field_exists(inst, "imm14");
1437    let has_cond = semantic_field_exists(inst, "cond");
1438    let has_rt = semantic_field_exists(inst, "rt");
1439    let has_b5 = semantic_field_exists(inst, "b5");
1440    let has_b40 = semantic_field_exists(inst, "b40");
1441    let has_immlo = semantic_field_exists(inst, "immlo");
1442    let has_immhi = semantic_field_exists(inst, "immhi");
1443    let has_rd = semantic_field_exists(inst, "rd");
1444
1445    if has_imm26
1446        && kinds.len() == 1
1447        && kinds[0] == GeneratedOperandKind::Immediate
1448        && (opcode_pattern_matches(inst, 0xfc00_0000, 0x1400_0000)
1449            || opcode_pattern_matches(inst, 0xfc00_0000, 0x9400_0000))
1450    {
1451        return Ok(Some(MacroRelocKind::B26));
1452    }
1453
1454    if has_imm19
1455        && has_cond
1456        && kinds.len() == 2
1457        && kinds[0] == GeneratedOperandKind::Condition
1458        && kinds[1] == GeneratedOperandKind::Immediate
1459        && opcode_pattern_matches(inst, 0xff00_0010, 0x5400_0000)
1460    {
1461        return Ok(Some(MacroRelocKind::BCond19));
1462    }
1463
1464    if has_imm19
1465        && has_rt
1466        && !has_cond
1467        && kinds.len() == 2
1468        && is_gpr_generated_kind(kinds[0])
1469        && kinds[1] == GeneratedOperandKind::Immediate
1470        && (opcode_pattern_matches(inst, 0x7f00_0000, 0x3400_0000)
1471            || opcode_pattern_matches(inst, 0x7f00_0000, 0x3500_0000))
1472    {
1473        return Ok(Some(MacroRelocKind::Cbz19));
1474    }
1475
1476    if has_imm19
1477        && !has_cond
1478        && kinds.len() == 2
1479        && matches!(
1480            kinds[0],
1481            GeneratedOperandKind::GprRegister
1482                | GeneratedOperandKind::Gpr32Register
1483                | GeneratedOperandKind::Gpr64Register
1484                | GeneratedOperandKind::SimdRegister
1485                | GeneratedOperandKind::SveZRegister
1486                | GeneratedOperandKind::PredicateRegister
1487                | GeneratedOperandKind::Immediate
1488        )
1489        && kinds[1] == GeneratedOperandKind::Immediate
1490    {
1491        return Ok(Some(MacroRelocKind::Imm19));
1492    }
1493
1494    if has_imm14
1495        && has_rt
1496        && has_b5
1497        && has_b40
1498        && kinds.len() == 4
1499        && is_gpr_generated_kind(kinds[0])
1500        && kinds[1] == GeneratedOperandKind::Immediate
1501        && kinds[2] == GeneratedOperandKind::Immediate
1502        && kinds[3] == GeneratedOperandKind::Immediate
1503        && (opcode_pattern_matches(inst, 0x7f00_0000, 0x3600_0000)
1504            || opcode_pattern_matches(inst, 0x7f00_0000, 0x3700_0000))
1505    {
1506        return Ok(Some(MacroRelocKind::Tbz14));
1507    }
1508
1509    if has_immlo
1510        && has_immhi
1511        && has_rd
1512        && kinds.len() == 3
1513        && is_gpr_generated_kind(kinds[0])
1514        && kinds[1] == GeneratedOperandKind::Immediate
1515        && kinds[2] == GeneratedOperandKind::Immediate
1516        && opcode_pattern_matches(inst, 0x9f00_0000, 0x1000_0000)
1517    {
1518        return Ok(Some(MacroRelocKind::Adr21));
1519    }
1520
1521    if has_immlo
1522        && has_immhi
1523        && has_rd
1524        && kinds.len() == 3
1525        && is_gpr_generated_kind(kinds[0])
1526        && kinds[1] == GeneratedOperandKind::Immediate
1527        && kinds[2] == GeneratedOperandKind::Immediate
1528        && opcode_pattern_matches(inst, 0x9f00_0000, 0x9000_0000)
1529    {
1530        return Ok(Some(MacroRelocKind::Adrp21));
1531    }
1532
1533    Ok(None)
1534}
1535
1536const CONDITIONAL_BRANCH_SUFFIX_CODES: [(&str, u8); 18] = [
1537    ("eq", 0),
1538    ("ne", 1),
1539    ("cs", 2),
1540    ("hs", 2),
1541    ("cc", 3),
1542    ("lo", 3),
1543    ("mi", 4),
1544    ("pl", 5),
1545    ("vs", 6),
1546    ("vc", 7),
1547    ("hi", 8),
1548    ("ls", 9),
1549    ("ge", 10),
1550    ("lt", 11),
1551    ("gt", 12),
1552    ("le", 13),
1553    ("al", 14),
1554    ("nv", 15),
1555];
1556
1557fn register_conditional_alias(
1558    aliases: &mut BTreeMap<String, (String, u8)>,
1559    ambiguous: &mut BTreeSet<String>,
1560    alias: String,
1561    base_mnemonic: &str,
1562    condition_code: u8,
1563) {
1564    if ambiguous.contains(&alias) {
1565        return;
1566    }
1567
1568    match aliases.get(&alias) {
1569        None => {
1570            aliases.insert(alias, (base_mnemonic.to_owned(), condition_code));
1571        }
1572        Some((existing_base, existing_condition))
1573            if existing_base == base_mnemonic && *existing_condition == condition_code => {}
1574        Some(_) => {
1575            aliases.remove(&alias);
1576            ambiguous.insert(alias);
1577        }
1578    }
1579}
1580
1581/// Collects generated conditional-branch alias spellings and their canonical mnemonic mappings.
1582///
1583/// The returned tuples are `(alias, canonical_mnemonic, condition_code)` where
1584/// `condition_code` is the architectural 4-bit condition encoding (`0..=15`).
1585///
1586/// # Errors
1587///
1588/// Returns [`CodegenError`] when operand metadata derivation fails for input variants.
1589pub fn collect_conditional_branch_alias_rules(
1590    flat: &[FlatInstruction],
1591) -> Result<Vec<(String, String, u8)>, CodegenError> {
1592    let mut base_mnemonics = BTreeSet::<String>::new();
1593    for inst in flat {
1594        if matches!(
1595            variant_macro_reloc_kind(inst)?,
1596            Some(MacroRelocKind::BCond19)
1597        ) {
1598            base_mnemonics.insert(inst.mnemonic.clone());
1599        }
1600    }
1601
1602    let mut aliases = BTreeMap::<String, (String, u8)>::new();
1603    let mut ambiguous = BTreeSet::<String>::new();
1604
1605    for base in &base_mnemonics {
1606        for (suffix, condition_code) in CONDITIONAL_BRANCH_SUFFIX_CODES {
1607            register_conditional_alias(
1608                &mut aliases,
1609                &mut ambiguous,
1610                format!("{base}.{suffix}"),
1611                base,
1612                condition_code,
1613            );
1614            register_conditional_alias(
1615                &mut aliases,
1616                &mut ambiguous,
1617                format!("{base}{suffix}"),
1618                base,
1619                condition_code,
1620            );
1621        }
1622    }
1623
1624    for alias in ambiguous {
1625        aliases.remove(&alias);
1626    }
1627
1628    Ok(aliases
1629        .into_iter()
1630        .map(|(alias, (base_mnemonic, condition_code))| (alias, base_mnemonic, condition_code))
1631        .collect())
1632}
1633
1634fn derive_operand_metadata(
1635    inst: &FlatInstruction,
1636) -> Result<(Vec<u8>, Vec<GeneratedOperandKind>, Vec<(u8, i64)>), CodegenError> {
1637    let width_hint = variant_width_hint(&inst.variant);
1638    let context = InstructionContext::from_instruction(inst);
1639    let mut ordered = Vec::<OrderedField>::new();
1640    let mut implicit_defaults = Vec::<(u8, i64)>::new();
1641
1642    for (idx, field) in inst.fields.iter().enumerate() {
1643        if idx > u8::MAX as usize {
1644            break;
1645        }
1646
1647        let field_name = normalize_field_name(&field.name);
1648        let semantic_name = semantic_field_name(&field_name);
1649        let kind = infer_operand_kind(semantic_name, field, width_hint, &context, &inst.variant)?;
1650        if let Some(value) = implicit_default_value(semantic_name, kind) {
1651            implicit_defaults.push((idx as u8, value));
1652            continue;
1653        }
1654
1655        ordered.push(OrderedField {
1656            index: idx as u8,
1657            rank: field_rank(semantic_name, kind),
1658            kind,
1659        });
1660    }
1661
1662    ordered.sort_by(|left, right| {
1663        left.rank
1664            .cmp(&right.rank)
1665            .then(left.index.cmp(&right.index))
1666    });
1667    implicit_defaults.sort_by(|left, right| left.0.cmp(&right.0));
1668
1669    let order = ordered.iter().map(|slot| slot.index).collect::<Vec<_>>();
1670    let kinds = ordered.iter().map(|slot| slot.kind).collect::<Vec<_>>();
1671    Ok((order, kinds, implicit_defaults))
1672}
1673
1674fn normalized_semantic_fields(inst: &FlatInstruction) -> Vec<String> {
1675    let mut out = Vec::with_capacity(inst.fields.len());
1676    for field in &inst.fields {
1677        let normalized = normalize_field_name(&field.name);
1678        out.push(semantic_field_name(&normalized).to_owned());
1679    }
1680    out
1681}
1682
1683fn unique_semantic_field_index(semantic_fields: &[String], name: &str) -> Option<usize> {
1684    let mut found = None;
1685    for (idx, semantic) in semantic_fields.iter().enumerate() {
1686        if semantic != name {
1687            continue;
1688        }
1689        if found.is_some() {
1690            return None;
1691        }
1692        found = Some(idx);
1693    }
1694    found
1695}
1696
1697fn operand_slot_for_field_index(operand_order: &[u8], field_index: usize) -> Option<usize> {
1698    let mut found = None;
1699    for (slot, encoded_index) in operand_order.iter().copied().enumerate() {
1700        if usize::from(encoded_index) != field_index {
1701            continue;
1702        }
1703        if found.is_some() {
1704            return None;
1705        }
1706        found = Some(slot);
1707    }
1708    found
1709}
1710
1711fn has_adjacent_immediate_pair(kinds: &[GeneratedOperandKind]) -> bool {
1712    kinds.windows(2).any(|pair| {
1713        pair[0] == GeneratedOperandKind::Immediate && pair[1] == GeneratedOperandKind::Immediate
1714    })
1715}
1716
1717fn fixed_bit_value(inst: &FlatInstruction, bit: u8) -> Option<u8> {
1718    let mask = 1u32 << bit;
1719    if (inst.fixed_mask & mask) == 0 {
1720        return None;
1721    }
1722    Some(((inst.fixed_value & mask) != 0) as u8)
1723}
1724
1725fn has_semantic_field(semantic_fields: &[String], field_name: &str) -> bool {
1726    semantic_fields.iter().any(|name| name == field_name)
1727}
1728
1729fn derive_memory_addressing_constraint(
1730    inst: &FlatInstruction,
1731) -> GeneratedMemoryAddressingConstraint {
1732    let semantic_fields = normalized_semantic_fields(inst);
1733    let has_rn = has_semantic_field(&semantic_fields, "rn");
1734    let has_rt = has_semantic_field(&semantic_fields, "rt");
1735    let has_rt2 = has_semantic_field(&semantic_fields, "rt2");
1736
1737    if has_rn && has_rt && has_rt2 && has_semantic_field(&semantic_fields, "imm7") {
1738        let bit24 = fixed_bit_value(inst, 24);
1739        let bit23 = fixed_bit_value(inst, 23);
1740        return match (bit24, bit23) {
1741            (Some(1), Some(0)) => GeneratedMemoryAddressingConstraint::Offset,
1742            (Some(0), Some(1)) => GeneratedMemoryAddressingConstraint::PostIndex,
1743            (Some(1), Some(1)) => GeneratedMemoryAddressingConstraint::PreIndex,
1744            _ => GeneratedMemoryAddressingConstraint::None,
1745        };
1746    }
1747
1748    if has_rn && has_rt && has_semantic_field(&semantic_fields, "imm9") {
1749        let bit11 = fixed_bit_value(inst, 11);
1750        let bit10 = fixed_bit_value(inst, 10);
1751        return match (bit11, bit10) {
1752            (Some(0), Some(0)) => GeneratedMemoryAddressingConstraint::Offset,
1753            (Some(0), Some(1)) => GeneratedMemoryAddressingConstraint::PostIndex,
1754            (Some(1), Some(1)) => GeneratedMemoryAddressingConstraint::PreIndex,
1755            _ => GeneratedMemoryAddressingConstraint::None,
1756        };
1757    }
1758
1759    if has_rn && has_rt && has_semantic_field(&semantic_fields, "imm12") {
1760        return GeneratedMemoryAddressingConstraint::Offset;
1761    }
1762
1763    GeneratedMemoryAddressingConstraint::None
1764}
1765
1766fn derive_memory_pair_offset_scale(
1767    inst: &FlatInstruction,
1768    semantic_fields: &[String],
1769) -> Option<u16> {
1770    if !(has_semantic_field(semantic_fields, "rn")
1771        && has_semantic_field(semantic_fields, "rt")
1772        && has_semantic_field(semantic_fields, "rt2")
1773        && has_semantic_field(semantic_fields, "imm7"))
1774    {
1775        return None;
1776    }
1777
1778    let v = fixed_bit_value(inst, 26)?;
1779    let b31 = fixed_bit_value(inst, 31)?;
1780    let b30 = fixed_bit_value(inst, 30)?;
1781    match (v, b31, b30) {
1782        (0, 0, 0) => Some(4),
1783        (0, 1, 0) => Some(8),
1784        (1, 0, 0) => Some(4),
1785        (1, 0, 1) => Some(8),
1786        (1, 1, 0) => Some(16),
1787        _ => None,
1788    }
1789}
1790
1791fn derive_memory_imm12_scale(inst: &FlatInstruction, semantic_fields: &[String]) -> Option<u16> {
1792    if !(has_semantic_field(semantic_fields, "rn")
1793        && has_semantic_field(semantic_fields, "rt")
1794        && has_semantic_field(semantic_fields, "imm12"))
1795    {
1796        return None;
1797    }
1798
1799    let b31 = fixed_bit_value(inst, 31)?;
1800    let b30 = fixed_bit_value(inst, 30)?;
1801    let size = (b31 << 1) | b30;
1802    if fixed_bit_value(inst, 23) == Some(1) && size == 0 {
1803        return Some(16);
1804    }
1805    Some(1u16 << size)
1806}
1807
1808fn derive_field_scales(inst: &FlatInstruction) -> Vec<u16> {
1809    let semantic_fields = normalized_semantic_fields(inst);
1810    let pair_scale = derive_memory_pair_offset_scale(inst, &semantic_fields);
1811    let imm12_scale = derive_memory_imm12_scale(inst, &semantic_fields);
1812
1813    let mut scales = vec![1u16; inst.fields.len()];
1814    for (idx, field) in inst.fields.iter().enumerate() {
1815        let semantic = semantic_fields
1816            .get(idx)
1817            .map(String::as_str)
1818            .unwrap_or_default();
1819        if semantic == "imm26" && field.lsb == 0 {
1820            scales[idx] = 4;
1821            continue;
1822        }
1823        if semantic == "imm19" && field.lsb == 5 {
1824            scales[idx] = 4;
1825            continue;
1826        }
1827        if semantic == "imm14" && field.lsb == 5 {
1828            scales[idx] = 4;
1829            continue;
1830        }
1831        if semantic == "imm7"
1832            && let Some(scale) = pair_scale
1833        {
1834            scales[idx] = scale;
1835            continue;
1836        }
1837        if semantic == "imm12"
1838            && let Some(scale) = imm12_scale
1839        {
1840            scales[idx] = scale;
1841        }
1842    }
1843    scales
1844}
1845
1846fn derive_split_immediate_plan(
1847    inst: &FlatInstruction,
1848    operand_order: &[u8],
1849    operand_kinds: &[GeneratedOperandKind],
1850) -> Option<GeneratedSplitImmediatePlan> {
1851    if !has_adjacent_immediate_pair(operand_kinds) {
1852        return None;
1853    }
1854
1855    let semantic_fields = normalized_semantic_fields(inst);
1856    let immlo_field_index = unique_semantic_field_index(&semantic_fields, "immlo");
1857    let immhi_field_index = unique_semantic_field_index(&semantic_fields, "immhi");
1858    if let (Some(immlo_field_index), Some(immhi_field_index)) =
1859        (immlo_field_index, immhi_field_index)
1860    {
1861        let immlo_slot = operand_slot_for_field_index(operand_order, immlo_field_index)?;
1862        let immhi_slot = operand_slot_for_field_index(operand_order, immhi_field_index)?;
1863        let first_slot = immlo_slot.min(immhi_slot);
1864        let second_slot = immlo_slot.max(immhi_slot);
1865        if second_slot == first_slot + 1
1866            && operand_kinds.get(first_slot) == Some(&GeneratedOperandKind::Immediate)
1867            && operand_kinds.get(second_slot) == Some(&GeneratedOperandKind::Immediate)
1868        {
1869            let scale = if fixed_bit_value(inst, 31) == Some(1) {
1870                4096
1871            } else {
1872                1
1873            };
1874            let first_slot = u8::try_from(first_slot).ok()?;
1875            let second_slot = u8::try_from(second_slot).ok()?;
1876            let immlo_field_index = u8::try_from(immlo_field_index).ok()?;
1877            let immhi_field_index = u8::try_from(immhi_field_index).ok()?;
1878            return Some(GeneratedSplitImmediatePlan {
1879                first_slot,
1880                second_slot,
1881                kind: GeneratedSplitImmediateKind::AdrLike {
1882                    immlo_field_index,
1883                    immhi_field_index,
1884                    scale,
1885                },
1886            });
1887        }
1888    }
1889
1890    let b5_field_index = unique_semantic_field_index(&semantic_fields, "b5");
1891    let b40_field_index = unique_semantic_field_index(&semantic_fields, "b40");
1892    if let (Some(b5_field_index), Some(b40_field_index)) = (b5_field_index, b40_field_index) {
1893        let b5_slot = operand_slot_for_field_index(operand_order, b5_field_index)?;
1894        let b40_slot = operand_slot_for_field_index(operand_order, b40_field_index)?;
1895        let first_slot = b5_slot.min(b40_slot);
1896        let second_slot = b5_slot.max(b40_slot);
1897        if second_slot == first_slot + 1
1898            && operand_kinds.get(first_slot) == Some(&GeneratedOperandKind::Immediate)
1899            && operand_kinds.get(second_slot) == Some(&GeneratedOperandKind::Immediate)
1900        {
1901            let first_slot = u8::try_from(first_slot).ok()?;
1902            let second_slot = u8::try_from(second_slot).ok()?;
1903            let b5_field_index = u8::try_from(b5_field_index).ok()?;
1904            let b40_field_index = u8::try_from(b40_field_index).ok()?;
1905            return Some(GeneratedSplitImmediatePlan {
1906                first_slot,
1907                second_slot,
1908                kind: GeneratedSplitImmediateKind::BitIndex6 {
1909                    b5_field_index,
1910                    b40_field_index,
1911                },
1912            });
1913        }
1914    }
1915
1916    None
1917}
1918
1919fn derive_gpr32_extend_compatibility(
1920    inst: &FlatInstruction,
1921    operand_order: &[u8],
1922    operand_kinds: &[GeneratedOperandKind],
1923) -> u64 {
1924    let semantic_fields = normalized_semantic_fields(inst);
1925    let mut bitset = 0u64;
1926    for slot in 0..operand_kinds.len().saturating_sub(1) {
1927        if slot >= u64::BITS as usize {
1928            break;
1929        }
1930        if operand_kinds[slot] != GeneratedOperandKind::Gpr64Register
1931            || operand_kinds[slot + 1] != GeneratedOperandKind::ExtendKind
1932        {
1933            continue;
1934        }
1935        let field_idx = usize::from(operand_order[slot]);
1936        let next_field_idx = usize::from(operand_order[slot + 1]);
1937        if field_idx >= semantic_fields.len() || next_field_idx >= semantic_fields.len() {
1938            continue;
1939        }
1940        if semantic_fields[field_idx] == "rm" && semantic_fields[next_field_idx] == "option" {
1941            bitset |= 1u64 << slot;
1942        }
1943    }
1944    bitset
1945}
1946
1947fn variant_width_hint(variant: &str) -> VariantWidthHint {
1948    let mut has_32 = false;
1949    let mut has_64 = false;
1950    for token in variant.split('_') {
1951        let lower = token.to_ascii_lowercase();
1952        if let Some(rest) = lower.strip_prefix("32")
1953            && (rest.is_empty() || rest.chars().all(|ch| ch.is_ascii_alphabetic()))
1954        {
1955            has_32 = true;
1956            continue;
1957        }
1958        if let Some(rest) = lower.strip_prefix("64")
1959            && (rest.is_empty() || rest.chars().all(|ch| ch.is_ascii_alphabetic()))
1960        {
1961            has_64 = true;
1962        }
1963    }
1964
1965    match (has_32, has_64) {
1966        (true, false) => VariantWidthHint::W32,
1967        (false, true) => VariantWidthHint::W64,
1968        _ => VariantWidthHint::Unknown,
1969    }
1970}
1971
1972fn normalize_field_name(name: &str) -> String {
1973    name.to_ascii_lowercase()
1974}
1975
1976fn base_field_name(field_name: &str) -> &str {
1977    let Some((base, suffix)) = field_name.rsplit_once('_') else {
1978        return field_name;
1979    };
1980
1981    if suffix.chars().all(|ch| ch.is_ascii_digit()) {
1982        base
1983    } else {
1984        field_name
1985    }
1986}
1987
1988fn semantic_field_name(field_name: &str) -> &str {
1989    if matches!(field_name, "option_13" | "option_15") {
1990        field_name
1991    } else {
1992        base_field_name(field_name)
1993    }
1994}
1995
1996fn infer_operand_kind(
1997    semantic_name: &str,
1998    field: &FlatField,
1999    width_hint: VariantWidthHint,
2000    context: &InstructionContext,
2001    variant: &str,
2002) -> Result<GeneratedOperandKind, CodegenError> {
2003    if matches!(semantic_name, "op0" | "op1" | "op2" | "crn" | "crm") {
2004        return Ok(GeneratedOperandKind::SysRegPart);
2005    }
2006    if semantic_name == "cond" {
2007        return Ok(GeneratedOperandKind::Condition);
2008    }
2009    if semantic_name == "shift" {
2010        return Ok(GeneratedOperandKind::ShiftKind);
2011    }
2012    if semantic_name == "option" {
2013        return Ok(GeneratedOperandKind::ExtendKind);
2014    }
2015    if is_predicate_register_field(semantic_name) {
2016        return Ok(GeneratedOperandKind::PredicateRegister);
2017    }
2018    if is_sve_z_register_field(semantic_name) {
2019        return Ok(GeneratedOperandKind::SveZRegister);
2020    }
2021    if is_simd_register_field(semantic_name) {
2022        return Ok(GeneratedOperandKind::SimdRegister);
2023    }
2024    if is_gpr_register_field(semantic_name) {
2025        return infer_gpr_kind(semantic_name, field, width_hint, context, variant);
2026    }
2027    if is_arrangement_field(semantic_name) {
2028        return Ok(GeneratedOperandKind::Arrangement);
2029    }
2030    if is_lane_field(semantic_name) {
2031        return Ok(GeneratedOperandKind::Lane);
2032    }
2033    if is_known_immediate_field(semantic_name) {
2034        return Ok(GeneratedOperandKind::Immediate);
2035    }
2036    Err(CodegenError::UnmappedOperandField {
2037        variant: variant.to_owned(),
2038        field: field.name.clone(),
2039        width: field.width,
2040    })
2041}
2042
2043fn is_predicate_register_field(field_name: &str) -> bool {
2044    matches!(
2045        field_name,
2046        "pd" | "pdm" | "pdn" | "pg" | "pm" | "pn" | "pnd" | "png" | "pnn" | "pnv" | "pt" | "pv"
2047    )
2048}
2049
2050fn is_sve_z_register_field(field_name: &str) -> bool {
2051    matches!(
2052        field_name,
2053        "za" | "zad" | "zada" | "zan" | "zat" | "zd" | "zda" | "zdn" | "zk" | "zm" | "zn" | "zt"
2054    )
2055}
2056
2057fn is_simd_register_field(field_name: &str) -> bool {
2058    matches!(field_name, "vd" | "vdn" | "vm" | "vn" | "vt")
2059}
2060
2061fn is_gpr_register_field(field_name: &str) -> bool {
2062    matches!(
2063        field_name,
2064        "ra" | "rd" | "rdn" | "rm" | "rn" | "rs" | "rt" | "rt2" | "rv"
2065    )
2066}
2067
2068fn infer_gpr_kind(
2069    field_name: &str,
2070    field: &FlatField,
2071    width_hint: VariantWidthHint,
2072    context: &InstructionContext,
2073    variant: &str,
2074) -> Result<GeneratedOperandKind, CodegenError> {
2075    match (field_name, field.width) {
2076        ("rt", 3) => return Ok(GeneratedOperandKind::Immediate),
2077        ("rm", 4) => return Ok(GeneratedOperandKind::SimdRegister),
2078        ("rs", 2) | ("rv", 2) => return Ok(GeneratedOperandKind::Immediate),
2079        (_, 5) => {}
2080        _ => {
2081            return Err(CodegenError::UnmappedOperandField {
2082                variant: variant.to_owned(),
2083                field: field.name.clone(),
2084                width: field.width,
2085            });
2086        }
2087    }
2088
2089    if field_name == "rn" && context.memory_like() {
2090        return Ok(GeneratedOperandKind::Gpr64Register);
2091    }
2092
2093    if matches!(field_name, "rt" | "rt2" | "rt3" | "rt4") && context.memory_like() {
2094        if width_hint == VariantWidthHint::Unknown && context.bit_value(26) == Some(1) {
2095            return Ok(GeneratedOperandKind::SimdRegister);
2096        }
2097
2098        return match width_hint {
2099            VariantWidthHint::W32 => Ok(GeneratedOperandKind::Gpr32Register),
2100            VariantWidthHint::W64 => Ok(GeneratedOperandKind::Gpr64Register),
2101            VariantWidthHint::Unknown => Ok(GeneratedOperandKind::GprRegister),
2102        };
2103    }
2104
2105    if width_hint == VariantWidthHint::Unknown
2106        && !context.memory_like()
2107        && context
2108            .semantic_fields
2109            .iter()
2110            .any(|name| is_arrangement_field(name))
2111        && matches!(
2112            field_name,
2113            "ra" | "rd" | "rdn" | "rm" | "rn" | "rs" | "rt" | "rt2"
2114        )
2115    {
2116        return Ok(GeneratedOperandKind::SimdRegister);
2117    }
2118
2119    match width_hint {
2120        VariantWidthHint::W32 => Ok(GeneratedOperandKind::Gpr32Register),
2121        VariantWidthHint::W64 => Ok(GeneratedOperandKind::Gpr64Register),
2122        VariantWidthHint::Unknown => Ok(GeneratedOperandKind::GprRegister),
2123    }
2124}
2125
2126fn is_arrangement_field(field_name: &str) -> bool {
2127    matches!(
2128        field_name,
2129        "q" | "size" | "sz" | "tsize" | "tsz" | "tszh" | "tszl" | "len" | "vl"
2130    )
2131}
2132
2133fn is_lane_field(field_name: &str) -> bool {
2134    matches!(field_name, "lane" | "index" | "idx")
2135}
2136
2137fn is_known_immediate_field(field_name: &str) -> bool {
2138    matches!(
2139        field_name,
2140        "a" | "b"
2141            | "b40"
2142            | "b5"
2143            | "c"
2144            | "cmode"
2145            | "d"
2146            | "e"
2147            | "f"
2148            | "g"
2149            | "h"
2150            | "hw"
2151            | "i1"
2152            | "i2"
2153            | "i2h"
2154            | "i2l"
2155            | "i3"
2156            | "i3h"
2157            | "i3l"
2158            | "i4"
2159            | "i4a"
2160            | "i4b"
2161            | "i4c"
2162            | "i4h"
2163            | "i4l"
2164            | "imm12"
2165            | "imm13"
2166            | "imm14"
2167            | "imm16"
2168            | "imm19"
2169            | "imm2"
2170            | "imm26"
2171            | "imm3"
2172            | "imm4"
2173            | "imm5"
2174            | "imm5b"
2175            | "imm6"
2176            | "imm7"
2177            | "imm8"
2178            | "imm8h"
2179            | "imm8l"
2180            | "imm9"
2181            | "imm9h"
2182            | "imm9l"
2183            | "immb"
2184            | "immh"
2185            | "immhi"
2186            | "immlo"
2187            | "immr"
2188            | "imms"
2189            | "k"
2190            | "l"
2191            | "m"
2192            | "mask"
2193            | "msz"
2194            | "n"
2195            | "nzcv"
2196            | "o0"
2197            | "o1"
2198            | "off2"
2199            | "off3"
2200            | "off4"
2201            | "op"
2202            | "option_13"
2203            | "option_15"
2204            | "pattern"
2205            | "prfop"
2206            | "rot"
2207            | "s"
2208            | "scale"
2209            | "sf"
2210            | "sh"
2211            | "simm7"
2212            | "t"
2213            | "u"
2214            | "v"
2215            | "xs"
2216    )
2217}
2218
2219fn implicit_default_value(field_name: &str, kind: GeneratedOperandKind) -> Option<i64> {
2220    if field_name == "sh" {
2221        return Some(0);
2222    }
2223    if kind == GeneratedOperandKind::ShiftKind && field_name == "shift" {
2224        return Some(0);
2225    }
2226    None
2227}
2228
2229fn field_rank(field_name: &str, kind: GeneratedOperandKind) -> u16 {
2230    match kind {
2231        GeneratedOperandKind::GprRegister
2232        | GeneratedOperandKind::Gpr32Register
2233        | GeneratedOperandKind::Gpr64Register
2234        | GeneratedOperandKind::SimdRegister
2235        | GeneratedOperandKind::SveZRegister
2236        | GeneratedOperandKind::PredicateRegister => register_rank(field_name),
2237        GeneratedOperandKind::Condition => 400,
2238        GeneratedOperandKind::ShiftKind | GeneratedOperandKind::ExtendKind => 500,
2239        GeneratedOperandKind::Arrangement => 550,
2240        GeneratedOperandKind::Lane => 575,
2241        GeneratedOperandKind::SysRegPart => match field_name {
2242            "op0" => 100,
2243            "op1" => 200,
2244            "crn" => 300,
2245            "crm" => 400,
2246            "op2" => 500,
2247            _ => 550,
2248        },
2249        GeneratedOperandKind::Immediate => 700,
2250    }
2251}
2252
2253fn register_rank(field_name: &str) -> u16 {
2254    match field_name {
2255        "pd" | "pdm" | "pdn" | "pnd" | "rd" | "rdn" | "rt" | "vd" | "vdn" | "zd" | "zda"
2256        | "zdn" | "zt" | "za" | "zad" | "zada" | "zat" => 100,
2257        "rt2" => 110,
2258        "pg" | "png" | "rn" | "vn" | "zn" | "zan" => 200,
2259        "ra" | "pn" | "pnn" => 250,
2260        "pm" | "pnv" | "rm" | "vm" | "zm" => 300,
2261        "rs" | "pt" | "pv" => 350,
2262        _ => 375,
2263    }
2264}
2265
2266/// Collects normalized instruction variants from generated `aarchmrs-instructions` Rust source.
2267///
2268/// The `root` must point to an ISA folder such as `.../aarchmrs-instructions/src/A64`.
2269///
2270/// # Errors
2271///
2272/// Returns [`CodegenError`] when filesystem traversal or source parsing fails.
2273pub fn collect_flat_from_generated_rust(root: &Path) -> Result<Vec<FlatInstruction>, CodegenError> {
2274    let mut files = Vec::new();
2275    collect_rs_files(root, &mut files)?;
2276    files.sort();
2277
2278    let mut out = Vec::new();
2279    for file in files {
2280        let source = fs::read_to_string(&file)?;
2281        let mut file_items = parse_instruction_modules(&file, &source)?;
2282        out.append(&mut file_items);
2283    }
2284
2285    out.sort_by(|a, b| a.mnemonic.cmp(&b.mnemonic).then(a.variant.cmp(&b.variant)));
2286    Ok(out)
2287}
2288
2289fn collect_rs_files(dir: &Path, out: &mut Vec<PathBuf>) -> Result<(), CodegenError> {
2290    for entry in fs::read_dir(dir)? {
2291        let entry = entry?;
2292        let path = entry.path();
2293        let ty = entry.file_type()?;
2294        if ty.is_dir() {
2295            collect_rs_files(&path, out)?;
2296        } else if ty.is_file() && path.extension().and_then(|s| s.to_str()) == Some("rs") {
2297            out.push(path);
2298        }
2299    }
2300    Ok(())
2301}
2302
2303fn parse_instruction_modules(
2304    path: &Path,
2305    source: &str,
2306) -> Result<Vec<FlatInstruction>, CodegenError> {
2307    let mut result = Vec::new();
2308    let mut cursor = 0usize;
2309
2310    while let Some(offset) = source[cursor..].find("pub mod ") {
2311        let start = cursor + offset;
2312        let name_start = start + "pub mod ".len();
2313        let Some(name_end) = find_ident_end(source, name_start) else {
2314            return Err(parse_error(path, "invalid module name"));
2315        };
2316        let mod_name = &source[name_start..name_end];
2317
2318        let idx = skip_ws(source, name_end);
2319        if idx >= source.len() {
2320            break;
2321        }
2322
2323        if source.as_bytes()[idx] == b';' {
2324            cursor = idx + 1;
2325            continue;
2326        }
2327
2328        if source.as_bytes()[idx] != b'{' {
2329            return Err(parse_error(path, "expected '{' after module declaration"));
2330        }
2331
2332        let Some(end_brace) = find_matching_brace(source, idx) else {
2333            return Err(parse_error(path, "unbalanced module braces"));
2334        };
2335
2336        let body = &source[idx + 1..end_brace];
2337        if let Some(inst) = parse_instruction_module(path, mod_name, body)? {
2338            result.push(inst);
2339        }
2340
2341        cursor = end_brace + 1;
2342    }
2343
2344    Ok(result)
2345}
2346
2347fn parse_instruction_module(
2348    path: &Path,
2349    mod_name: &str,
2350    body: &str,
2351) -> Result<Option<FlatInstruction>, CodegenError> {
2352    let Some(mask) = parse_named_const_u32(body, "OPCODE_MASK") else {
2353        return Ok(None);
2354    };
2355    let Some(opcode) = parse_named_const_u32(body, "OPCODE") else {
2356        return Ok(None);
2357    };
2358
2359    let Some((params, expr_body)) = parse_function_signature_and_body(mod_name, body) else {
2360        return Ok(None);
2361    };
2362
2363    let semantic_fields = params
2364        .iter()
2365        .map(|param| {
2366            let normalized = normalize_field_name(&param.name);
2367            semantic_field_name(&normalized).to_owned()
2368        })
2369        .collect::<Vec<_>>();
2370
2371    let mut fields = Vec::with_capacity(params.len());
2372    for param in params {
2373        let Some(lsb) = find_param_shift(expr_body, &param.name) else {
2374            return Err(parse_error(
2375                path,
2376                &format!("cannot find shift for field {} in {}", param.name, mod_name),
2377            ));
2378        };
2379
2380        let name = param.name;
2381        let signed = infer_signed_field(&name, opcode, mask, &semantic_fields);
2382        fields.push(FlatField {
2383            name,
2384            lsb,
2385            width: param.width,
2386            signed,
2387        });
2388    }
2389
2390    let mnemonic = infer_mnemonic(mod_name);
2391    Ok(Some(FlatInstruction {
2392        mnemonic,
2393        variant: mod_name.to_owned(),
2394        path: format!("{}::{}", path.display(), mod_name),
2395        fixed_mask: mask,
2396        fixed_value: opcode,
2397        fields,
2398    }))
2399}
2400
2401#[derive(Debug)]
2402struct Param {
2403    name: String,
2404    width: u8,
2405}
2406
2407fn parse_function_signature_and_body<'a>(
2408    mod_name: &str,
2409    body: &'a str,
2410) -> Option<(Vec<Param>, &'a str)> {
2411    let needle = format!("pub const fn {mod_name}");
2412    let start = body.find(&needle)?;
2413    let sig_open = body[start..].find('(')? + start;
2414    let sig_close = find_matching_paren(body, sig_open)?;
2415
2416    let params_text = &body[sig_open + 1..sig_close];
2417    let params = parse_params(params_text);
2418
2419    let from_u32_pos = body[sig_close..].find("InstructionCode::from_u32(")? + sig_close;
2420    let open = body[from_u32_pos..].find('(')? + from_u32_pos;
2421    let close = find_matching_paren(body, open)?;
2422    let expr = &body[open + 1..close];
2423
2424    Some((params, expr))
2425}
2426
2427fn parse_params(text: &str) -> Vec<Param> {
2428    let mut params = Vec::new();
2429
2430    for raw in text.split(',') {
2431        let part = raw.trim();
2432        if part.is_empty() {
2433            continue;
2434        }
2435
2436        let Some((name_raw, ty_raw)) = part.split_once(':') else {
2437            continue;
2438        };
2439        let name = name_raw.trim().to_owned();
2440        let ty = ty_raw.trim();
2441
2442        let width = parse_bitvalue_width(ty).unwrap_or(0);
2443        params.push(Param { name, width });
2444    }
2445
2446    params
2447}
2448
2449fn parse_bitvalue_width(ty: &str) -> Option<u8> {
2450    let start = ty.find("BitValue<")? + "BitValue<".len();
2451    let rest = &ty[start..];
2452    let end = rest.find('>')?;
2453    rest[..end].trim().parse::<u8>().ok()
2454}
2455
2456fn parse_named_const_u32(body: &str, name: &str) -> Option<u32> {
2457    let needle = format!("pub const {name}: u32 =");
2458    let start = body.find(&needle)? + needle.len();
2459    let rest = &body[start..];
2460    let end = rest.find(';')?;
2461    parse_u32_literal(rest[..end].trim())
2462}
2463
2464fn parse_u32_literal(text: &str) -> Option<u32> {
2465    let mut s = text.trim().replace('_', "");
2466    if let Some(stripped) = s.strip_suffix("u32") {
2467        s = stripped.to_owned();
2468    }
2469
2470    if let Some(bits) = s.strip_prefix("0b") {
2471        u32::from_str_radix(bits, 2).ok()
2472    } else if let Some(hex) = s.strip_prefix("0x") {
2473        u32::from_str_radix(hex, 16).ok()
2474    } else {
2475        s.parse::<u32>().ok()
2476    }
2477}
2478
2479fn find_param_shift(expr: &str, param: &str) -> Option<u8> {
2480    let needle = format!("{param}.into_inner()");
2481    let mut cursor = 0usize;
2482
2483    while let Some(off) = expr[cursor..].find(&needle) {
2484        let start = cursor + off + needle.len();
2485        let tail = &expr[start..];
2486        let tail = tail.trim_start();
2487        if !tail.starts_with("<<") {
2488            cursor = start;
2489            continue;
2490        }
2491
2492        let tail = tail[2..].trim_start();
2493        let digits_len = tail.chars().take_while(|c| c.is_ascii_digit()).count();
2494        if digits_len == 0 {
2495            cursor = start;
2496            continue;
2497        }
2498
2499        if let Ok(value) = tail[..digits_len].parse::<u8>() {
2500            return Some(value);
2501        }
2502        cursor = start;
2503    }
2504
2505    None
2506}
2507
2508fn find_ident_end(source: &str, start: usize) -> Option<usize> {
2509    let mut end = start;
2510    for ch in source[start..].chars() {
2511        if ch.is_ascii_alphanumeric() || ch == '_' {
2512            end += ch.len_utf8();
2513        } else {
2514            break;
2515        }
2516    }
2517    if end == start { None } else { Some(end) }
2518}
2519
2520fn skip_ws(source: &str, mut idx: usize) -> usize {
2521    while idx < source.len() && source.as_bytes()[idx].is_ascii_whitespace() {
2522        idx += 1;
2523    }
2524    idx
2525}
2526
2527fn find_matching_brace(source: &str, open_idx: usize) -> Option<usize> {
2528    let mut depth = 0usize;
2529    for (offset, byte) in source.as_bytes()[open_idx..].iter().enumerate() {
2530        match *byte {
2531            b'{' => depth += 1,
2532            b'}' => {
2533                depth -= 1;
2534                if depth == 0 {
2535                    return Some(open_idx + offset);
2536                }
2537            }
2538            _ => {}
2539        }
2540    }
2541    None
2542}
2543
2544fn find_matching_paren(source: &str, open_idx: usize) -> Option<usize> {
2545    let mut depth = 0usize;
2546    for (offset, byte) in source.as_bytes()[open_idx..].iter().enumerate() {
2547        match *byte {
2548            b'(' => depth += 1,
2549            b')' => {
2550                depth -= 1;
2551                if depth == 0 {
2552                    return Some(open_idx + offset);
2553                }
2554            }
2555            _ => {}
2556        }
2557    }
2558    None
2559}
2560
2561fn parse_error(path: &Path, message: &str) -> CodegenError {
2562    CodegenError::Parse {
2563        path: path.display().to_string(),
2564        message: message.to_owned(),
2565    }
2566}
2567
2568fn infer_mnemonic(variant: &str) -> String {
2569    variant
2570        .split('_')
2571        .next()
2572        .unwrap_or(variant)
2573        .to_ascii_lowercase()
2574}
2575
2576fn infer_signed_field(
2577    field_name: &str,
2578    opcode: u32,
2579    opcode_mask: u32,
2580    semantic_fields: &[String],
2581) -> bool {
2582    let field = normalize_field_name(field_name);
2583    let semantic_name = semantic_field_name(&field);
2584
2585    if semantic_name.starts_with("simm") || semantic_name.contains("offset") {
2586        return true;
2587    }
2588
2589    if matches!(semantic_name, "imm26" | "imm19" | "imm14") {
2590        return true;
2591    }
2592
2593    if semantic_name == "immhi"
2594        && has_semantic_field(semantic_fields, "immhi")
2595        && has_semantic_field(semantic_fields, "immlo")
2596    {
2597        return true;
2598    }
2599
2600    if semantic_name == "imm7"
2601        && has_semantic_field(semantic_fields, "rt")
2602        && has_semantic_field(semantic_fields, "rt2")
2603        && has_semantic_field(semantic_fields, "rn")
2604    {
2605        return true;
2606    }
2607
2608    if semantic_name == "imm9"
2609        && has_semantic_field(semantic_fields, "rt")
2610        && has_semantic_field(semantic_fields, "rn")
2611        && is_fixed_bit_pair(opcode, opcode_mask, 11, 10, &[(0, 0), (0, 1), (1, 1)])
2612    {
2613        return true;
2614    }
2615
2616    false
2617}
2618
2619fn is_fixed_bit_pair(opcode: u32, opcode_mask: u32, hi: u8, lo: u8, accepted: &[(u8, u8)]) -> bool {
2620    let hi_mask = 1u32 << hi;
2621    let lo_mask = 1u32 << lo;
2622    if (opcode_mask & hi_mask) == 0 || (opcode_mask & lo_mask) == 0 {
2623        return false;
2624    }
2625
2626    let hi_value = ((opcode & hi_mask) != 0) as u8;
2627    let lo_value = ((opcode & lo_mask) != 0) as u8;
2628    accepted
2629        .iter()
2630        .any(|&(accepted_hi, accepted_lo)| accepted_hi == hi_value && accepted_lo == lo_value)
2631}
2632
2633#[cfg(test)]
2634mod tests {
2635    use super::*;
2636
2637    fn empty_context() -> InstructionContext {
2638        InstructionContext::from_semantic_fields(0, 0, Vec::new())
2639    }
2640
2641    #[test]
2642    fn generate_non_empty() {
2643        let input = vec![FlatInstruction {
2644            mnemonic: "add".to_string(),
2645            variant: "ADD_64_addsub_imm".to_string(),
2646            path: "A64/dpimm/ADD_64_addsub_imm".to_string(),
2647            fixed_mask: 0xffff_ffff,
2648            fixed_value: 0x9100_0000,
2649            fields: vec![FlatField {
2650                name: "Rd".to_string(),
2651                lsb: 0,
2652                width: 5,
2653                signed: false,
2654            }],
2655        }];
2656
2657        let text = generate_encoder_module(&input).expect("codegen should succeed");
2658        assert!(text.contains("SPECS"));
2659        assert!(text.contains("ADD_64_addsub_imm"));
2660    }
2661
2662    #[test]
2663    fn macro_normalization_module_includes_generated_rules() {
2664        let input = vec![
2665            FlatInstruction {
2666                mnemonic: "msr".to_string(),
2667                variant: "MSR_SR_systemmove".to_string(),
2668                path: "A64/control/MSR_SR_systemmove".to_string(),
2669                fixed_mask: 0xfff0_0000,
2670                fixed_value: 0xd510_0000,
2671                fields: vec![
2672                    FlatField {
2673                        name: "o0".to_string(),
2674                        lsb: 19,
2675                        width: 1,
2676                        signed: false,
2677                    },
2678                    FlatField {
2679                        name: "op1".to_string(),
2680                        lsb: 16,
2681                        width: 3,
2682                        signed: false,
2683                    },
2684                    FlatField {
2685                        name: "CRn".to_string(),
2686                        lsb: 12,
2687                        width: 4,
2688                        signed: false,
2689                    },
2690                    FlatField {
2691                        name: "CRm".to_string(),
2692                        lsb: 8,
2693                        width: 4,
2694                        signed: false,
2695                    },
2696                    FlatField {
2697                        name: "op2".to_string(),
2698                        lsb: 5,
2699                        width: 3,
2700                        signed: false,
2701                    },
2702                    FlatField {
2703                        name: "Rt".to_string(),
2704                        lsb: 0,
2705                        width: 5,
2706                        signed: false,
2707                    },
2708                ],
2709            },
2710            FlatInstruction {
2711                mnemonic: "add".to_string(),
2712                variant: "ADD_64_addsub_imm".to_string(),
2713                path: "A64/dpimm/ADD_64_addsub_imm".to_string(),
2714                fixed_mask: 0xff80_0000,
2715                fixed_value: 0x9100_0000,
2716                fields: vec![
2717                    FlatField {
2718                        name: "sh".to_string(),
2719                        lsb: 22,
2720                        width: 1,
2721                        signed: false,
2722                    },
2723                    FlatField {
2724                        name: "imm12".to_string(),
2725                        lsb: 10,
2726                        width: 12,
2727                        signed: false,
2728                    },
2729                    FlatField {
2730                        name: "Rn".to_string(),
2731                        lsb: 5,
2732                        width: 5,
2733                        signed: false,
2734                    },
2735                    FlatField {
2736                        name: "Rd".to_string(),
2737                        lsb: 0,
2738                        width: 5,
2739                        signed: false,
2740                    },
2741                ],
2742            },
2743            FlatInstruction {
2744                mnemonic: "b".to_string(),
2745                variant: "B_only_branch_imm".to_string(),
2746                path: "A64/control/B_only_branch_imm".to_string(),
2747                fixed_mask: 0xfc00_0000,
2748                fixed_value: 0x1400_0000,
2749                fields: vec![FlatField {
2750                    name: "imm26".to_string(),
2751                    lsb: 0,
2752                    width: 26,
2753                    signed: true,
2754                }],
2755            },
2756            FlatInstruction {
2757                mnemonic: "b".to_string(),
2758                variant: "B_only_condbranch".to_string(),
2759                path: "A64/control/B_only_condbranch".to_string(),
2760                fixed_mask: 0xff00_0010,
2761                fixed_value: 0x5400_0000,
2762                fields: vec![
2763                    FlatField {
2764                        name: "imm19".to_string(),
2765                        lsb: 5,
2766                        width: 19,
2767                        signed: true,
2768                    },
2769                    FlatField {
2770                        name: "cond".to_string(),
2771                        lsb: 0,
2772                        width: 4,
2773                        signed: false,
2774                    },
2775                ],
2776            },
2777            FlatInstruction {
2778                mnemonic: "cbz".to_string(),
2779                variant: "CBZ_64_compbranch".to_string(),
2780                path: "A64/control/CBZ_64_compbranch".to_string(),
2781                fixed_mask: 0x7f00_0000,
2782                fixed_value: 0x3400_0000,
2783                fields: vec![
2784                    FlatField {
2785                        name: "imm19".to_string(),
2786                        lsb: 5,
2787                        width: 19,
2788                        signed: true,
2789                    },
2790                    FlatField {
2791                        name: "Rt".to_string(),
2792                        lsb: 0,
2793                        width: 5,
2794                        signed: false,
2795                    },
2796                ],
2797            },
2798            FlatInstruction {
2799                mnemonic: "tbz".to_string(),
2800                variant: "TBZ_only_testbranch".to_string(),
2801                path: "A64/control/TBZ_only_testbranch".to_string(),
2802                fixed_mask: 0x7f00_0000,
2803                fixed_value: 0x3600_0000,
2804                fields: vec![
2805                    FlatField {
2806                        name: "b5".to_string(),
2807                        lsb: 31,
2808                        width: 1,
2809                        signed: false,
2810                    },
2811                    FlatField {
2812                        name: "b40".to_string(),
2813                        lsb: 19,
2814                        width: 5,
2815                        signed: false,
2816                    },
2817                    FlatField {
2818                        name: "imm14".to_string(),
2819                        lsb: 5,
2820                        width: 14,
2821                        signed: true,
2822                    },
2823                    FlatField {
2824                        name: "Rt".to_string(),
2825                        lsb: 0,
2826                        width: 5,
2827                        signed: false,
2828                    },
2829                ],
2830            },
2831            FlatInstruction {
2832                mnemonic: "adr".to_string(),
2833                variant: "ADR_only_pcreladdr".to_string(),
2834                path: "A64/control/ADR_only_pcreladdr".to_string(),
2835                fixed_mask: 0x9f00_0000,
2836                fixed_value: 0x1000_0000,
2837                fields: vec![
2838                    FlatField {
2839                        name: "immlo".to_string(),
2840                        lsb: 29,
2841                        width: 2,
2842                        signed: false,
2843                    },
2844                    FlatField {
2845                        name: "immhi".to_string(),
2846                        lsb: 5,
2847                        width: 19,
2848                        signed: true,
2849                    },
2850                    FlatField {
2851                        name: "Rd".to_string(),
2852                        lsb: 0,
2853                        width: 5,
2854                        signed: false,
2855                    },
2856                ],
2857            },
2858            FlatInstruction {
2859                mnemonic: "adrp".to_string(),
2860                variant: "ADRP_only_pcreladdr".to_string(),
2861                path: "A64/control/ADRP_only_pcreladdr".to_string(),
2862                fixed_mask: 0x9f00_0000,
2863                fixed_value: 0x9000_0000,
2864                fields: vec![
2865                    FlatField {
2866                        name: "immlo".to_string(),
2867                        lsb: 29,
2868                        width: 2,
2869                        signed: false,
2870                    },
2871                    FlatField {
2872                        name: "immhi".to_string(),
2873                        lsb: 5,
2874                        width: 19,
2875                        signed: true,
2876                    },
2877                    FlatField {
2878                        name: "Rd".to_string(),
2879                        lsb: 0,
2880                        width: 5,
2881                        signed: false,
2882                    },
2883                ],
2884            },
2885        ];
2886
2887        let text = generate_macro_normalization_module(&input).expect("module generation");
2888        assert!(text.contains("MNEMONIC_NORMALIZATION_RULES"));
2889        assert!(text.contains("CONDITIONAL_BRANCH_ALIAS_RULES"));
2890        assert!(text.contains("NORM_FLAG_SHIFT_TO_IMMEDIATE"));
2891        assert!(text.contains("NORM_FLAG_SYSREG_GPR_SWAP"));
2892        assert!(text.contains("RELOC_MASK_B26"));
2893        assert!(text.contains("RELOC_MASK_BCOND19"));
2894        assert!(text.contains("RELOC_MASK_CBZ19"));
2895        assert!(text.contains("RELOC_MASK_IMM19"));
2896        assert!(text.contains("RELOC_MASK_TBZ14"));
2897        assert!(text.contains("RELOC_MASK_ADR21"));
2898        assert!(text.contains("RELOC_MASK_ADRP21"));
2899        assert!(text.contains("\"beq\""));
2900        assert!(text.contains("\"b.eq\""));
2901        assert!(text.contains("\"msr\""));
2902        assert!(text.contains("\"b\""));
2903        assert!(text.contains("\"cbz\""));
2904        assert!(text.contains("\"tbz\""));
2905        assert!(text.contains("\"adr\""));
2906        assert!(text.contains("\"adrp\""));
2907    }
2908
2909    #[test]
2910    fn source_name_mapping_is_deterministic_for_core_kinds() {
2911        let variant = "CSEL_64_condsel";
2912        let width_hint = variant_width_hint(variant);
2913        let context = empty_context();
2914
2915        let rd = infer_operand_kind(
2916            semantic_field_name("rd"),
2917            &FlatField {
2918                name: "Rd".to_string(),
2919                lsb: 0,
2920                width: 5,
2921                signed: false,
2922            },
2923            width_hint,
2924            &context,
2925            variant,
2926        )
2927        .expect("rd must map");
2928        let cond = infer_operand_kind(
2929            semantic_field_name("cond"),
2930            &FlatField {
2931                name: "cond".to_string(),
2932                lsb: 12,
2933                width: 4,
2934                signed: false,
2935            },
2936            width_hint,
2937            &context,
2938            variant,
2939        )
2940        .expect("cond must map");
2941        let shift = infer_operand_kind(
2942            semantic_field_name("shift"),
2943            &FlatField {
2944                name: "shift".to_string(),
2945                lsb: 22,
2946                width: 2,
2947                signed: false,
2948            },
2949            width_hint,
2950            &context,
2951            variant,
2952        )
2953        .expect("shift must map");
2954        let extend = infer_operand_kind(
2955            semantic_field_name("option"),
2956            &FlatField {
2957                name: "option".to_string(),
2958                lsb: 13,
2959                width: 3,
2960                signed: false,
2961            },
2962            width_hint,
2963            &context,
2964            variant,
2965        )
2966        .expect("extend must map");
2967        let sysreg = infer_operand_kind(
2968            semantic_field_name("crn"),
2969            &FlatField {
2970                name: "CRn".to_string(),
2971                lsb: 12,
2972                width: 4,
2973                signed: false,
2974            },
2975            width_hint,
2976            &context,
2977            variant,
2978        )
2979        .expect("sysreg part must map");
2980        let p_reg = infer_operand_kind(
2981            semantic_field_name("pd"),
2982            &FlatField {
2983                name: "Pd".to_string(),
2984                lsb: 0,
2985                width: 4,
2986                signed: false,
2987            },
2988            width_hint,
2989            &context,
2990            variant,
2991        )
2992        .expect("predicate must map");
2993        let z_reg = infer_operand_kind(
2994            semantic_field_name("zd"),
2995            &FlatField {
2996                name: "Zd".to_string(),
2997                lsb: 0,
2998                width: 5,
2999                signed: false,
3000            },
3001            width_hint,
3002            &context,
3003            variant,
3004        )
3005        .expect("z must map");
3006        let v_reg = infer_operand_kind(
3007            semantic_field_name("vn"),
3008            &FlatField {
3009                name: "Vn".to_string(),
3010                lsb: 5,
3011                width: 5,
3012                signed: false,
3013            },
3014            width_hint,
3015            &context,
3016            variant,
3017        )
3018        .expect("simd must map");
3019        let split_option = infer_operand_kind(
3020            semantic_field_name("option_13"),
3021            &FlatField {
3022                name: "option_13".to_string(),
3023                lsb: 13,
3024                width: 1,
3025                signed: false,
3026            },
3027            width_hint,
3028            &context,
3029            variant,
3030        )
3031        .expect("split option must map");
3032
3033        assert_eq!(rd, GeneratedOperandKind::Gpr64Register);
3034        assert_eq!(cond, GeneratedOperandKind::Condition);
3035        assert_eq!(shift, GeneratedOperandKind::ShiftKind);
3036        assert_eq!(extend, GeneratedOperandKind::ExtendKind);
3037        assert_eq!(sysreg, GeneratedOperandKind::SysRegPart);
3038        assert_eq!(p_reg, GeneratedOperandKind::PredicateRegister);
3039        assert_eq!(z_reg, GeneratedOperandKind::SveZRegister);
3040        assert_eq!(v_reg, GeneratedOperandKind::SimdRegister);
3041        assert_eq!(split_option, GeneratedOperandKind::Immediate);
3042    }
3043
3044    #[test]
3045    fn variant_width_hint_recognizes_suffix_tokens() {
3046        assert_eq!(
3047            variant_width_hint("SUBS_32S_addsub_imm"),
3048            VariantWidthHint::W32
3049        );
3050        assert_eq!(
3051            variant_width_hint("SUBS_64S_addsub_imm"),
3052            VariantWidthHint::W64
3053        );
3054        assert_eq!(
3055            variant_width_hint("ADD_64_addsub_imm"),
3056            VariantWidthHint::W64
3057        );
3058        assert_eq!(variant_width_hint("foo_bar"), VariantWidthHint::Unknown);
3059    }
3060
3061    #[test]
3062    fn infer_signed_field_uses_structural_context() {
3063        let pair_fields = vec![
3064            "rt".to_string(),
3065            "rt2".to_string(),
3066            "rn".to_string(),
3067            "imm7".to_string(),
3068        ];
3069        let imm9_fields = vec!["rt".to_string(), "rn".to_string(), "imm9".to_string()];
3070        let pcrel_fields = vec!["rd".to_string(), "immlo".to_string(), "immhi".to_string()];
3071
3072        assert!(infer_signed_field(
3073            "imm7",
3074            0xa9800000,
3075            0xffc00000,
3076            &pair_fields
3077        ));
3078        assert!(infer_signed_field(
3079            "imm9",
3080            0xf8000400,
3081            0xffe00c00,
3082            &imm9_fields
3083        ));
3084        assert!(!infer_signed_field(
3085            "imm12",
3086            0xf9000000,
3087            0xffc00000,
3088            &imm9_fields
3089        ));
3090        assert!(infer_signed_field(
3091            "immhi",
3092            0x10000000,
3093            0x9f000000,
3094            &pcrel_fields
3095        ));
3096        assert!(!infer_signed_field(
3097            "immlo",
3098            0x10000000,
3099            0x9f000000,
3100            &pcrel_fields
3101        ));
3102    }
3103
3104    #[test]
3105    fn source_name_mapping_handles_known_outliers() {
3106        let variant = "RPRFM_R_ldst_regoff";
3107        let width_hint = variant_width_hint(variant);
3108        let context = empty_context();
3109        let rt = infer_operand_kind(
3110            semantic_field_name("rt"),
3111            &FlatField {
3112                name: "Rt".to_string(),
3113                lsb: 0,
3114                width: 3,
3115                signed: false,
3116            },
3117            width_hint,
3118            &context,
3119            variant,
3120        )
3121        .expect("rt outlier must map");
3122        let rm = infer_operand_kind(
3123            semantic_field_name("rm"),
3124            &FlatField {
3125                name: "Rm".to_string(),
3126                lsb: 16,
3127                width: 4,
3128                signed: false,
3129            },
3130            width_hint,
3131            &context,
3132            variant,
3133        )
3134        .expect("rm outlier must map");
3135
3136        assert_eq!(rt, GeneratedOperandKind::Immediate);
3137        assert_eq!(rm, GeneratedOperandKind::SimdRegister);
3138    }
3139
3140    #[test]
3141    fn memory_rt_operands_become_simd_when_v_bit_is_fixed() {
3142        let variant = "TEST_ldstpair";
3143        let context = InstructionContext::from_semantic_fields(
3144            0x6d80_0000,
3145            0xffc0_0000,
3146            vec![
3147                "imm7".to_string(),
3148                "rt2".to_string(),
3149                "rn".to_string(),
3150                "rt".to_string(),
3151            ],
3152        );
3153
3154        let rt = infer_operand_kind(
3155            semantic_field_name("rt"),
3156            &FlatField {
3157                name: "Rt".to_string(),
3158                lsb: 0,
3159                width: 5,
3160                signed: false,
3161            },
3162            VariantWidthHint::Unknown,
3163            &context,
3164            variant,
3165        )
3166        .expect("rt must map");
3167
3168        let rn = infer_operand_kind(
3169            semantic_field_name("rn"),
3170            &FlatField {
3171                name: "Rn".to_string(),
3172                lsb: 5,
3173                width: 5,
3174                signed: false,
3175            },
3176            VariantWidthHint::Unknown,
3177            &context,
3178            variant,
3179        )
3180        .expect("rn must map");
3181
3182        assert_eq!(rt, GeneratedOperandKind::SimdRegister);
3183        assert_eq!(rn, GeneratedOperandKind::Gpr64Register);
3184    }
3185
3186    #[test]
3187    fn unknown_field_mapping_fails_generation() {
3188        let input = vec![FlatInstruction {
3189            mnemonic: "foo".to_string(),
3190            variant: "FOO_only_test".to_string(),
3191            path: "A64/test/FOO_only_test".to_string(),
3192            fixed_mask: 0xffff_ffff,
3193            fixed_value: 0,
3194            fields: vec![FlatField {
3195                name: "mystery".to_string(),
3196                lsb: 0,
3197                width: 7,
3198                signed: false,
3199            }],
3200        }];
3201
3202        let err = generate_encoder_module(&input).expect_err("must fail for unknown field");
3203        match err {
3204            CodegenError::UnmappedOperandField {
3205                variant,
3206                field,
3207                width,
3208            } => {
3209                assert_eq!(variant, "FOO_only_test");
3210                assert_eq!(field, "mystery");
3211                assert_eq!(width, 7);
3212            }
3213            other => panic!("unexpected error kind: {other}"),
3214        }
3215    }
3216
3217    #[test]
3218    fn i_prefix_is_not_accepted_without_explicit_mapping() {
3219        let variant = "FOO_only_test";
3220        let width_hint = variant_width_hint(variant);
3221        let context = empty_context();
3222        let err = infer_operand_kind(
3223            semantic_field_name("ix"),
3224            &FlatField {
3225                name: "ix".to_string(),
3226                lsb: 10,
3227                width: 3,
3228                signed: false,
3229            },
3230            width_hint,
3231            &context,
3232            variant,
3233        )
3234        .expect_err("unknown i* field must fail");
3235
3236        match err {
3237            CodegenError::UnmappedOperandField {
3238                variant,
3239                field,
3240                width,
3241            } => {
3242                assert_eq!(variant, "FOO_only_test");
3243                assert_eq!(field, "ix");
3244                assert_eq!(width, 3);
3245            }
3246            other => panic!("unexpected error kind: {other}"),
3247        }
3248    }
3249
3250    #[test]
3251    fn unsupported_gpr_width_fails_with_unmapped_error() {
3252        let variant = "FOO_64_test";
3253        let width_hint = variant_width_hint(variant);
3254        let context = empty_context();
3255        let err = infer_operand_kind(
3256            semantic_field_name("rn"),
3257            &FlatField {
3258                name: "Rn".to_string(),
3259                lsb: 5,
3260                width: 4,
3261                signed: false,
3262            },
3263            width_hint,
3264            &context,
3265            variant,
3266        )
3267        .expect_err("unsupported width must fail");
3268
3269        match err {
3270            CodegenError::UnmappedOperandField {
3271                variant,
3272                field,
3273                width,
3274            } => {
3275                assert_eq!(variant, "FOO_64_test");
3276                assert_eq!(field, "Rn");
3277                assert_eq!(width, 4);
3278            }
3279            other => panic!("unexpected error kind: {other}"),
3280        }
3281    }
3282
3283    #[test]
3284    fn pair_memory_order_is_rt_then_rt2() {
3285        let inst = FlatInstruction {
3286            mnemonic: "stp".to_string(),
3287            variant: "STP_64_ldstpair_pre".to_string(),
3288            path: "A64/ldst/STP_64_ldstpair_pre".to_string(),
3289            fixed_mask: 0xffc0_0000,
3290            fixed_value: 0xa980_0000,
3291            fields: vec![
3292                FlatField {
3293                    name: "imm7".to_string(),
3294                    lsb: 15,
3295                    width: 7,
3296                    signed: true,
3297                },
3298                FlatField {
3299                    name: "Rt2".to_string(),
3300                    lsb: 10,
3301                    width: 5,
3302                    signed: false,
3303                },
3304                FlatField {
3305                    name: "Rn".to_string(),
3306                    lsb: 5,
3307                    width: 5,
3308                    signed: false,
3309                },
3310                FlatField {
3311                    name: "Rt".to_string(),
3312                    lsb: 0,
3313                    width: 5,
3314                    signed: false,
3315                },
3316            ],
3317        };
3318
3319        let (order, kinds, defaults) = derive_operand_metadata(&inst).expect("metadata");
3320        assert_eq!(order, vec![3, 1, 2, 0]);
3321        assert_eq!(
3322            kinds,
3323            vec![
3324                GeneratedOperandKind::Gpr64Register,
3325                GeneratedOperandKind::Gpr64Register,
3326                GeneratedOperandKind::Gpr64Register,
3327                GeneratedOperandKind::Immediate
3328            ]
3329        );
3330        assert!(defaults.is_empty());
3331    }
3332
3333    #[test]
3334    fn derives_split_immediate_plan_for_adr_like_variants() {
3335        let inst = FlatInstruction {
3336            mnemonic: "adr".to_string(),
3337            variant: "ADR_only_pcreladdr".to_string(),
3338            path: "A64/adr/ADR_only_pcreladdr".to_string(),
3339            fixed_mask: 0x9f00_0000,
3340            fixed_value: 0x1000_0000,
3341            fields: vec![
3342                FlatField {
3343                    name: "immlo".to_string(),
3344                    lsb: 29,
3345                    width: 2,
3346                    signed: false,
3347                },
3348                FlatField {
3349                    name: "immhi".to_string(),
3350                    lsb: 5,
3351                    width: 19,
3352                    signed: true,
3353                },
3354                FlatField {
3355                    name: "Rd".to_string(),
3356                    lsb: 0,
3357                    width: 5,
3358                    signed: false,
3359                },
3360            ],
3361        };
3362
3363        let (order, kinds, _) = derive_operand_metadata(&inst).expect("metadata");
3364        let plan = derive_split_immediate_plan(&inst, &order, &kinds).expect("split plan");
3365        assert_eq!(
3366            plan,
3367            GeneratedSplitImmediatePlan {
3368                first_slot: 1,
3369                second_slot: 2,
3370                kind: GeneratedSplitImmediateKind::AdrLike {
3371                    immlo_field_index: 0,
3372                    immhi_field_index: 1,
3373                    scale: 1,
3374                },
3375            }
3376        );
3377    }
3378
3379    #[test]
3380    fn derives_gpr32_extend_compatibility_for_add_ext() {
3381        let inst = FlatInstruction {
3382            mnemonic: "add".to_string(),
3383            variant: "ADD_64_addsub_ext".to_string(),
3384            path: "A64/add/ADD_64_addsub_ext".to_string(),
3385            fixed_mask: 0xffe0_0000,
3386            fixed_value: 0x8b20_0000,
3387            fields: vec![
3388                FlatField {
3389                    name: "Rm".to_string(),
3390                    lsb: 16,
3391                    width: 5,
3392                    signed: false,
3393                },
3394                FlatField {
3395                    name: "option".to_string(),
3396                    lsb: 13,
3397                    width: 3,
3398                    signed: false,
3399                },
3400                FlatField {
3401                    name: "imm3".to_string(),
3402                    lsb: 10,
3403                    width: 3,
3404                    signed: false,
3405                },
3406                FlatField {
3407                    name: "Rn".to_string(),
3408                    lsb: 5,
3409                    width: 5,
3410                    signed: false,
3411                },
3412                FlatField {
3413                    name: "Rd".to_string(),
3414                    lsb: 0,
3415                    width: 5,
3416                    signed: false,
3417                },
3418            ],
3419        };
3420
3421        let (order, kinds, _) = derive_operand_metadata(&inst).expect("metadata");
3422        let bitset = derive_gpr32_extend_compatibility(&inst, &order, &kinds);
3423        assert_eq!(bitset, 0b100);
3424    }
3425}