1use crate::encoder::traits::{InstructionEncoder, ParsedInstruction};
7use crate::error::RasError;
8
9pub struct RiscVEncoder {
10 position: usize,
11 rv64: bool,
13}
14
15impl Default for RiscVEncoder {
16 fn default() -> Self {
17 Self::new(true)
18 }
19}
20
21impl RiscVEncoder {
22 pub fn new(rv64: bool) -> Self {
23 Self { position: 0, rv64 }
24 }
25
26 fn parse_register(&self, s: &str) -> Result<u8, RasError> {
31 let s = s.trim_start_matches('%').trim();
32 match s {
33 "x0" => Ok(0),
35 "x1" => Ok(1),
36 "x2" => Ok(2),
37 "x3" => Ok(3),
38 "x4" => Ok(4),
39 "x5" => Ok(5),
40 "x6" => Ok(6),
41 "x7" => Ok(7),
42 "x8" => Ok(8),
43 "x9" => Ok(9),
44 "x10" => Ok(10),
45 "x11" => Ok(11),
46 "x12" => Ok(12),
47 "x13" => Ok(13),
48 "x14" => Ok(14),
49 "x15" => Ok(15),
50 "x16" => Ok(16),
51 "x17" => Ok(17),
52 "x18" => Ok(18),
53 "x19" => Ok(19),
54 "x20" => Ok(20),
55 "x21" => Ok(21),
56 "x22" => Ok(22),
57 "x23" => Ok(23),
58 "x24" => Ok(24),
59 "x25" => Ok(25),
60 "x26" => Ok(26),
61 "x27" => Ok(27),
62 "x28" => Ok(28),
63 "x29" => Ok(29),
64 "x30" => Ok(30),
65 "x31" => Ok(31),
66 "zero" => Ok(0),
68 "ra" => Ok(1),
69 "sp" => Ok(2),
70 "gp" => Ok(3),
71 "tp" => Ok(4),
72 "t0" => Ok(5),
73 "t1" => Ok(6),
74 "t2" => Ok(7),
75 "s0" | "fp" => Ok(8),
76 "s1" => Ok(9),
77 "a0" => Ok(10),
78 "a1" => Ok(11),
79 "a2" => Ok(12),
80 "a3" => Ok(13),
81 "a4" => Ok(14),
82 "a5" => Ok(15),
83 "a6" => Ok(16),
84 "a7" => Ok(17),
85 "s2" => Ok(18),
86 "s3" => Ok(19),
87 "s4" => Ok(20),
88 "s5" => Ok(21),
89 "s6" => Ok(22),
90 "s7" => Ok(23),
91 "s8" => Ok(24),
92 "s9" => Ok(25),
93 "s10" => Ok(26),
94 "s11" => Ok(27),
95 "t3" => Ok(28),
96 "t4" => Ok(29),
97 "t5" => Ok(30),
98 "t6" => Ok(31),
99 _ => Err(RasError::EncodingError(format!(
100 "Unknown RISC-V register: {}",
101 s
102 ))),
103 }
104 }
105
106 fn parse_imm(&self, s: &str) -> Result<i64, RasError> {
107 let s = s.trim();
108 let s = s.trim_start_matches('#');
110 if let Some(hex) = s.strip_prefix("0x").or_else(|| s.strip_prefix("0X")) {
111 i64::from_str_radix(hex, 16)
112 .map_err(|_| RasError::EncodingError(format!("Invalid hex immediate: {}", s)))
113 } else {
114 s.parse::<i64>()
115 .map_err(|_| RasError::EncodingError(format!("Invalid immediate: {}", s)))
116 }
117 }
118
119 fn parse_mem_operand(&self, s: &str) -> Result<(u8, i32), RasError> {
122 let s = s.trim();
123 if let Some(paren) = s.find('(') {
124 let offset_str = s[..paren].trim();
125 let reg_str = s[paren + 1..].trim_end_matches(')').trim();
126 let offset = if offset_str.is_empty() {
127 0i32
128 } else {
129 self.parse_imm(offset_str)? as i32
130 };
131 let base = self.parse_register(reg_str)?;
132 Ok((base, offset))
133 } else {
134 Ok((self.parse_register(s)?, 0))
136 }
137 }
138
139 #[inline]
144 fn r_type(funct7: u8, rs2: u8, rs1: u8, funct3: u8, rd: u8, opcode: u8) -> u32 {
145 ((funct7 as u32) << 25)
146 | ((rs2 as u32) << 20)
147 | ((rs1 as u32) << 15)
148 | ((funct3 as u32) << 12)
149 | ((rd as u32) << 7)
150 | (opcode as u32)
151 }
152
153 #[inline]
154 fn i_type(imm12: i32, rs1: u8, funct3: u8, rd: u8, opcode: u8) -> u32 {
155 ((imm12 as u32 & 0xFFF) << 20)
156 | ((rs1 as u32) << 15)
157 | ((funct3 as u32) << 12)
158 | ((rd as u32) << 7)
159 | (opcode as u32)
160 }
161
162 #[inline]
163 fn s_type(imm12: i32, rs2: u8, rs1: u8, funct3: u8, opcode: u8) -> u32 {
164 let imm = imm12 as u32 & 0xFFF;
165 ((imm >> 5) << 25)
166 | ((rs2 as u32) << 20)
167 | ((rs1 as u32) << 15)
168 | ((funct3 as u32) << 12)
169 | ((imm & 0x1F) << 7)
170 | (opcode as u32)
171 }
172
173 #[inline]
177 fn b_type(offset: i32, rs2: u8, rs1: u8, funct3: u8, opcode: u8) -> u32 {
178 let o = offset as u32;
179 let imm12 = (o >> 12) & 1;
180 let imm11 = (o >> 11) & 1;
181 let imm10_5 = (o >> 5) & 0x3F;
182 let imm4_1 = (o >> 1) & 0xF;
183 (imm12 << 31)
184 | (imm10_5 << 25)
185 | ((rs2 as u32) << 20)
186 | ((rs1 as u32) << 15)
187 | ((funct3 as u32) << 12)
188 | (imm4_1 << 8)
189 | (imm11 << 7)
190 | (opcode as u32)
191 }
192
193 #[inline]
195 fn u_type(imm20: i32, rd: u8, opcode: u8) -> u32 {
196 ((imm20 as u32 & 0xF_FFFF) << 12) | ((rd as u32) << 7) | (opcode as u32)
197 }
198
199 #[inline]
203 fn j_type(offset: i32, rd: u8, opcode: u8) -> u32 {
204 let o = offset as u32;
205 let imm20 = (o >> 20) & 1;
206 let imm19_12 = (o >> 12) & 0xFF;
207 let imm11 = (o >> 11) & 1;
208 let imm10_1 = (o >> 1) & 0x3FF;
209 (imm20 << 31)
210 | (imm10_1 << 21)
211 | (imm11 << 20)
212 | (imm19_12 << 12)
213 | ((rd as u32) << 7)
214 | (opcode as u32)
215 }
216
217 #[inline]
218 fn emit(word: u32) -> Vec<u8> {
219 word.to_le_bytes().to_vec()
220 }
221
222 fn encode_r3(
227 &self,
228 ops: &[String],
229 funct7: u8,
230 funct3: u8,
231 opcode: u8,
232 ) -> Result<Vec<u8>, RasError> {
233 if ops.len() != 3 {
234 return Err(RasError::EncodingError(
235 "R-type instruction requires 3 operands: rd, rs1, rs2".to_string(),
236 ));
237 }
238 let rd = self.parse_register(&ops[0])?;
239 let rs1 = self.parse_register(&ops[1])?;
240 let rs2 = self.parse_register(&ops[2])?;
241 Ok(Self::emit(Self::r_type(
242 funct7, rs2, rs1, funct3, rd, opcode,
243 )))
244 }
245
246 fn encode_i3(&self, ops: &[String], funct3: u8, opcode: u8) -> Result<Vec<u8>, RasError> {
248 if ops.len() != 3 {
249 return Err(RasError::EncodingError(
250 "I-type instruction requires 3 operands: rd, rs1, imm".to_string(),
251 ));
252 }
253 let rd = self.parse_register(&ops[0])?;
254 let rs1 = self.parse_register(&ops[1])?;
255 let imm = self.parse_imm(&ops[2])? as i32;
256 if !(-2048..=2047).contains(&imm) {
257 return Err(RasError::EncodingError(format!(
258 "Immediate {} out of 12-bit signed range [-2048, 2047]",
259 imm
260 )));
261 }
262 Ok(Self::emit(Self::i_type(imm, rs1, funct3, rd, opcode)))
263 }
264
265 fn encode_shift_imm(
267 &self,
268 ops: &[String],
269 funct7: u8,
270 funct3: u8,
271 opcode: u8,
272 ) -> Result<Vec<u8>, RasError> {
273 if ops.len() != 3 {
274 return Err(RasError::EncodingError(
275 "Shift immediate requires 3 operands: rd, rs1, shamt".to_string(),
276 ));
277 }
278 let rd = self.parse_register(&ops[0])?;
279 let rs1 = self.parse_register(&ops[1])?;
280 let shamt = self.parse_imm(&ops[2])? as u32;
281 let max_shamt = if self.rv64 { 63u32 } else { 31u32 };
282 if shamt > max_shamt {
283 return Err(RasError::EncodingError(format!(
284 "Shift amount {} exceeds maximum {}",
285 shamt, max_shamt
286 )));
287 }
288 let imm12 = ((funct7 as i32) << 5) | (shamt as i32 & 0x3F);
290 Ok(Self::emit(Self::i_type(imm12, rs1, funct3, rd, opcode)))
291 }
292
293 fn encode_branch(&self, ops: &[String], funct3: u8) -> Result<Vec<u8>, RasError> {
295 if ops.len() != 3 {
296 return Err(RasError::EncodingError(
297 "Branch requires 3 operands: rs1, rs2, offset".to_string(),
298 ));
299 }
300 let rs1 = self.parse_register(&ops[0])?;
301 let rs2 = self.parse_register(&ops[1])?;
302 let offset = self.parse_imm(&ops[2])? as i32;
303 Ok(Self::emit(Self::b_type(offset, rs2, rs1, funct3, 0x63)))
304 }
305
306 fn encode_load(&self, ops: &[String], funct3: u8) -> Result<Vec<u8>, RasError> {
308 if ops.len() != 2 {
309 return Err(RasError::EncodingError(
310 "Load requires 2 operands: rd, offset(rs1)".to_string(),
311 ));
312 }
313 let rd = self.parse_register(&ops[0])?;
314 let (rs1, offset) = self.parse_mem_operand(&ops[1])?;
315 Ok(Self::emit(Self::i_type(offset, rs1, funct3, rd, 0x03)))
316 }
317
318 fn encode_store(&self, ops: &[String], funct3: u8) -> Result<Vec<u8>, RasError> {
320 if ops.len() != 2 {
321 return Err(RasError::EncodingError(
322 "Store requires 2 operands: rs2, offset(rs1)".to_string(),
323 ));
324 }
325 let rs2 = self.parse_register(&ops[0])?;
326 let (rs1, offset) = self.parse_mem_operand(&ops[1])?;
327 Ok(Self::emit(Self::s_type(offset, rs2, rs1, funct3, 0x23)))
328 }
329}
330
331impl InstructionEncoder for RiscVEncoder {
332 fn encode_instruction(&mut self, inst: &ParsedInstruction) -> Result<Vec<u8>, RasError> {
333 let opcode = inst.opcode.to_lowercase();
334 let ops = &inst.operands;
335
336 let bytes = match opcode.as_str() {
337 "add" => self.encode_r3(ops, 0x00, 0x0, 0x33)?,
339 "sub" => self.encode_r3(ops, 0x20, 0x0, 0x33)?,
340 "sll" => self.encode_r3(ops, 0x00, 0x1, 0x33)?,
341 "slt" => self.encode_r3(ops, 0x00, 0x2, 0x33)?,
342 "sltu" => self.encode_r3(ops, 0x00, 0x3, 0x33)?,
343 "xor" => self.encode_r3(ops, 0x00, 0x4, 0x33)?,
344 "srl" => self.encode_r3(ops, 0x00, 0x5, 0x33)?,
345 "sra" => self.encode_r3(ops, 0x20, 0x5, 0x33)?,
346 "or" => self.encode_r3(ops, 0x00, 0x6, 0x33)?,
347 "and" => self.encode_r3(ops, 0x00, 0x7, 0x33)?,
348
349 "mul" => self.encode_r3(ops, 0x01, 0x0, 0x33)?,
351 "mulh" => self.encode_r3(ops, 0x01, 0x1, 0x33)?,
352 "mulhsu" => self.encode_r3(ops, 0x01, 0x2, 0x33)?,
353 "mulhu" => self.encode_r3(ops, 0x01, 0x3, 0x33)?,
354 "div" => self.encode_r3(ops, 0x01, 0x4, 0x33)?,
355 "divu" => self.encode_r3(ops, 0x01, 0x5, 0x33)?,
356 "rem" => self.encode_r3(ops, 0x01, 0x6, 0x33)?,
357 "remu" => self.encode_r3(ops, 0x01, 0x7, 0x33)?,
358
359 "addw" => self.encode_r3(ops, 0x00, 0x0, 0x3B)?,
361 "subw" => self.encode_r3(ops, 0x20, 0x0, 0x3B)?,
362 "sllw" => self.encode_r3(ops, 0x00, 0x1, 0x3B)?,
363 "srlw" => self.encode_r3(ops, 0x00, 0x5, 0x3B)?,
364 "sraw" => self.encode_r3(ops, 0x20, 0x5, 0x3B)?,
365 "mulw" => self.encode_r3(ops, 0x01, 0x0, 0x3B)?,
366 "divw" => self.encode_r3(ops, 0x01, 0x4, 0x3B)?,
367 "divuw" => self.encode_r3(ops, 0x01, 0x5, 0x3B)?,
368 "remw" => self.encode_r3(ops, 0x01, 0x6, 0x3B)?,
369 "remuw" => self.encode_r3(ops, 0x01, 0x7, 0x3B)?,
370
371 "addi" => self.encode_i3(ops, 0x0, 0x13)?,
373 "slti" => self.encode_i3(ops, 0x2, 0x13)?,
374 "sltiu" => self.encode_i3(ops, 0x3, 0x13)?,
375 "xori" => self.encode_i3(ops, 0x4, 0x13)?,
376 "ori" => self.encode_i3(ops, 0x6, 0x13)?,
377 "andi" => self.encode_i3(ops, 0x7, 0x13)?,
378 "slli" => self.encode_shift_imm(ops, 0x00, 0x1, 0x13)?,
379 "srli" => self.encode_shift_imm(ops, 0x00, 0x5, 0x13)?,
380 "srai" => self.encode_shift_imm(ops, 0x20, 0x5, 0x13)?,
381
382 "addiw" => self.encode_i3(ops, 0x0, 0x1B)?,
384 "slliw" => self.encode_shift_imm(ops, 0x00, 0x1, 0x1B)?,
385 "srliw" => self.encode_shift_imm(ops, 0x00, 0x5, 0x1B)?,
386 "sraiw" => self.encode_shift_imm(ops, 0x20, 0x5, 0x1B)?,
387
388 "lb" => self.encode_load(ops, 0x0)?,
390 "lh" => self.encode_load(ops, 0x1)?,
391 "lw" => self.encode_load(ops, 0x2)?,
392 "ld" => self.encode_load(ops, 0x3)?,
393 "lbu" => self.encode_load(ops, 0x4)?,
394 "lhu" => self.encode_load(ops, 0x5)?,
395 "lwu" => self.encode_load(ops, 0x6)?,
396
397 "sb" => self.encode_store(ops, 0x0)?,
399 "sh" => self.encode_store(ops, 0x1)?,
400 "sw" => self.encode_store(ops, 0x2)?,
401 "sd" => self.encode_store(ops, 0x3)?,
402
403 "beq" => self.encode_branch(ops, 0x0)?,
405 "bne" => self.encode_branch(ops, 0x1)?,
406 "blt" => self.encode_branch(ops, 0x4)?,
407 "bge" => self.encode_branch(ops, 0x5)?,
408 "bltu" => self.encode_branch(ops, 0x6)?,
409 "bgeu" => self.encode_branch(ops, 0x7)?,
410
411 "jal" => {
413 if ops.len() != 2 {
414 return Err(RasError::EncodingError(
415 "JAL requires 2 operands: rd, offset".to_string(),
416 ));
417 }
418 let rd = self.parse_register(&ops[0])?;
419 let offset = self.parse_imm(&ops[1])? as i32;
420 Self::emit(Self::j_type(offset, rd, 0x6F))
421 }
422
423 "jalr" => {
425 if ops.len() == 3 {
426 let rd = self.parse_register(&ops[0])?;
428 let rs1 = self.parse_register(&ops[1])?;
429 let imm = self.parse_imm(&ops[2])? as i32;
430 Self::emit(Self::i_type(imm, rs1, 0x0, rd, 0x67))
431 } else if ops.len() == 2 {
432 let rd = self.parse_register(&ops[0])?;
434 let (rs1, offset) = self.parse_mem_operand(&ops[1])?;
435 Self::emit(Self::i_type(offset, rs1, 0x0, rd, 0x67))
436 } else if ops.len() == 1 {
437 let rs1 = self.parse_register(&ops[0])?;
439 Self::emit(Self::i_type(0, rs1, 0x0, 1, 0x67))
440 } else {
441 return Err(RasError::EncodingError(
442 "JALR requires 1–3 operands".to_string(),
443 ));
444 }
445 }
446
447 "lui" => {
449 if ops.len() != 2 {
450 return Err(RasError::EncodingError(
451 "LUI requires 2 operands: rd, imm20".to_string(),
452 ));
453 }
454 let rd = self.parse_register(&ops[0])?;
455 let imm20 = self.parse_imm(&ops[1])? as i32;
456 Self::emit(Self::u_type(imm20, rd, 0x37))
457 }
458 "auipc" => {
459 if ops.len() != 2 {
460 return Err(RasError::EncodingError(
461 "AUIPC requires 2 operands: rd, imm20".to_string(),
462 ));
463 }
464 let rd = self.parse_register(&ops[0])?;
465 let imm20 = self.parse_imm(&ops[1])? as i32;
466 Self::emit(Self::u_type(imm20, rd, 0x17))
467 }
468
469 "ecall" => Self::emit(Self::i_type(0, 0, 0, 0, 0x73)),
471 "ebreak" => Self::emit(Self::i_type(1, 0, 0, 0, 0x73)),
472 "fence" | "fence.i" => Self::emit(Self::i_type(0, 0, 0, 0, 0x0F)),
473
474 "nop" => Self::emit(Self::i_type(0, 0, 0, 0, 0x13)),
477 "ret" => Self::emit(Self::i_type(0, 1, 0, 0, 0x67)),
479 "mv" => {
481 if ops.len() != 2 {
482 return Err(RasError::EncodingError(
483 "MV requires 2 operands: rd, rs".to_string(),
484 ));
485 }
486 let rd = self.parse_register(&ops[0])?;
487 let rs = self.parse_register(&ops[1])?;
488 Self::emit(Self::i_type(0, rs, 0, rd, 0x13))
489 }
490 "li" => {
492 if ops.len() != 2 {
493 return Err(RasError::EncodingError(
494 "LI requires 2 operands: rd, imm".to_string(),
495 ));
496 }
497 let rd = self.parse_register(&ops[0])?;
498 let imm = self.parse_imm(&ops[1])? as i32;
499 if !(-2048..=2047).contains(&imm) {
500 return Err(RasError::EncodingError(format!(
501 "LI pseudo-instruction only supports 12-bit immediates ({} out of range)",
502 imm
503 )));
504 }
505 Self::emit(Self::i_type(imm, 0, 0, rd, 0x13))
506 }
507 "j" => {
509 if ops.len() != 1 {
510 return Err(RasError::EncodingError(
511 "J requires 1 operand: offset".to_string(),
512 ));
513 }
514 let offset = self.parse_imm(&ops[0])? as i32;
515 Self::emit(Self::j_type(offset, 0, 0x6F))
516 }
517 "call" => {
519 if ops.len() != 1 {
520 return Err(RasError::EncodingError(
521 "CALL requires 1 operand: offset".to_string(),
522 ));
523 }
524 let offset = self.parse_imm(&ops[0])? as i32;
525 Self::emit(Self::j_type(offset, 1, 0x6F))
526 }
527
528 _ => {
529 return Err(RasError::EncodingError(format!(
530 "Unknown RISC-V instruction: {}",
531 opcode
532 )));
533 }
534 };
535
536 self.position += bytes.len();
537 Ok(bytes)
538 }
539
540 fn current_position(&self) -> usize {
541 self.position
542 }
543}
544
545#[cfg(test)]
546mod tests {
547 use super::*;
548 use crate::encoder::traits::ParsedInstruction;
549
550 fn instr(opcode: &str, operands: &[&str]) -> ParsedInstruction {
551 ParsedInstruction {
552 opcode: opcode.to_string(),
553 operands: operands.iter().map(|s| s.to_string()).collect(),
554 }
555 }
556
557 fn enc() -> RiscVEncoder {
558 RiscVEncoder::new(true)
559 }
560
561 #[test]
562 fn test_nop_is_four_bytes() {
563 let bytes = enc().encode_instruction(&instr("nop", &[])).unwrap();
564 assert_eq!(bytes.len(), 4);
565 }
566
567 #[test]
568 fn test_add_encoding() {
569 let bytes = enc()
571 .encode_instruction(&instr("add", &["a0", "a1", "a2"]))
572 .unwrap();
573 assert_eq!(bytes.len(), 4);
574 let word = u32::from_le_bytes([bytes[0], bytes[1], bytes[2], bytes[3]]);
575 assert_eq!(word & 0x7F, 0x33); assert_eq!((word >> 12) & 7, 0); assert_eq!((word >> 25) & 0x7F, 0); assert_eq!((word >> 7) & 0x1F, 10); }
580
581 #[test]
582 fn test_addi_encoding() {
583 let bytes = enc()
585 .encode_instruction(&instr("addi", &["t0", "zero", "42"]))
586 .unwrap();
587 let word = u32::from_le_bytes([bytes[0], bytes[1], bytes[2], bytes[3]]);
588 assert_eq!(word & 0x7F, 0x13); assert_eq!((word >> 7) & 0x1F, 5); assert_eq!((word >> 20) as i32 as i32, 42); }
592
593 #[test]
594 fn test_ret_pseudo() {
595 let bytes = enc().encode_instruction(&instr("ret", &[])).unwrap();
597 let word = u32::from_le_bytes([bytes[0], bytes[1], bytes[2], bytes[3]]);
598 assert_eq!(word & 0x7F, 0x67); assert_eq!((word >> 7) & 0x1F, 0); assert_eq!((word >> 15) & 0x1F, 1); assert_eq!((word >> 20), 0); }
603
604 #[test]
605 fn test_load_store_encoding() {
606 let bytes = enc()
608 .encode_instruction(&instr("sw", &["a0", "0(sp)"]))
609 .unwrap();
610 let word = u32::from_le_bytes([bytes[0], bytes[1], bytes[2], bytes[3]]);
611 assert_eq!(word & 0x7F, 0x23); assert_eq!((word >> 12) & 7, 0x2); let bytes = enc()
616 .encode_instruction(&instr("lw", &["a1", "4(sp)"]))
617 .unwrap();
618 let word = u32::from_le_bytes([bytes[0], bytes[1], bytes[2], bytes[3]]);
619 assert_eq!(word & 0x7F, 0x03); assert_eq!((word >> 12) & 7, 0x2); }
622
623 #[test]
624 fn test_position_advances() {
625 let mut e = enc();
626 e.encode_instruction(&instr("nop", &[])).unwrap();
627 assert_eq!(e.current_position(), 4);
628 e.encode_instruction(&instr("nop", &[])).unwrap();
629 assert_eq!(e.current_position(), 8);
630 }
631
632 #[test]
633 fn test_abi_register_aliases() {
634 let mut e = enc();
636 let b1 = e
637 .encode_instruction(&instr("addi", &["zero", "zero", "0"]))
638 .unwrap();
639 let b2 = e
640 .encode_instruction(&instr("addi", &["x0", "x0", "0"]))
641 .unwrap();
642 assert_eq!(b1, b2);
643 }
644
645 #[test]
646 fn test_mul_encoding() {
647 let bytes = enc()
649 .encode_instruction(&instr("mul", &["a0", "a1", "a2"]))
650 .unwrap();
651 let word = u32::from_le_bytes([bytes[0], bytes[1], bytes[2], bytes[3]]);
652 assert_eq!((word >> 25) & 0x7F, 1); }
654}