use crate::*;
const ALIGNED_ADDR: u64 = 0x3000;
const ALIGNED_ADDR2: u64 = 0x3100;
#[test]
fn test_vpmaddubsw_ymm0_ymm1_ymm2_all_zeros() {
let mut emu = emu64();
let code = [
0xc4, 0xe2, 0x75, 0x04, 0xc2, 0xf4, ];
emu.load_code_bytes(&code);
emu.run(None).unwrap();
}
#[test]
fn test_vpmaddubsw_ymm3_ymm4_ymm5_all_ones() {
let mut emu = emu64();
let code = [
0xc4, 0xe2, 0x5d, 0x04, 0xdd, 0xf4, ];
emu.load_code_bytes(&code);
emu.run(None).unwrap();
}
#[test]
fn test_vpmaddubsw_ymm6_ymm7_ymm8_positive_values() {
let mut emu = emu64();
let code = [
0xc4, 0x62, 0x45, 0x04, 0xf0, 0xf4, ];
emu.load_code_bytes(&code);
emu.run(None).unwrap();
}
#[test]
fn test_vpmaddubsw_ymm9_ymm10_ymm11_negative_multiplier() {
let mut emu = emu64();
let code = [
0xc4, 0x42, 0x2d, 0x04, 0xcb, 0xf4, ];
emu.load_code_bytes(&code);
emu.run(None).unwrap();
}
#[test]
fn test_vpmaddubsw_ymm12_ymm13_ymm14_mixed_signs() {
let mut emu = emu64();
let code = [
0xc4, 0x42, 0x15, 0x04, 0xe6, 0xf4, ];
emu.load_code_bytes(&code);
emu.run(None).unwrap();
}
#[test]
fn test_vpmaddubsw_ymm15_ymm0_ymm1_high_reg() {
let mut emu = emu64();
let code = [
0xc4, 0x62, 0x7d, 0x04, 0xf9, 0xf4, ];
emu.load_code_bytes(&code);
emu.run(None).unwrap();
}
#[test]
fn test_vpmaddubsw_ymm0_ymm1_mem() {
let mut emu = emu64();
let code = [0x48, 0xb8];
let mut full_code = code.to_vec();
full_code.extend_from_slice(&ALIGNED_ADDR.to_le_bytes());
full_code.extend_from_slice(&[
0xc4, 0xe2, 0x75, 0x04, 0x00, 0xf4, ]);
emu.load_code_bytes(&full_code);
emu.maps.write_bytes_slice(ALIGNED_ADDR, &[0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01]);
emu.run(None).unwrap();
}
#[test]
fn test_vpmaddubsw_ymm2_ymm3_mem_negative() {
let mut emu = emu64();
let code = [0x48, 0xb8];
let mut full_code = code.to_vec();
full_code.extend_from_slice(&ALIGNED_ADDR.to_le_bytes());
full_code.extend_from_slice(&[
0xc4, 0xe2, 0x65, 0x04, 0x10, 0xf4, ]);
emu.load_code_bytes(&full_code);
emu.maps.write_bytes_slice(ALIGNED_ADDR, &[0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF]);
emu.run(None).unwrap();
}
#[test]
fn test_vpmaddubsw_ymm4_ymm5_mem_sequential() {
let mut emu = emu64();
let code = [0x48, 0xb8];
let mut full_code = code.to_vec();
full_code.extend_from_slice(&ALIGNED_ADDR.to_le_bytes());
full_code.extend_from_slice(&[
0xc4, 0xe2, 0x55, 0x04, 0x20, 0xf4, ]);
emu.load_code_bytes(&full_code);
let seq: Vec<u8> = (0..32).collect();
emu.maps.write_bytes_slice(ALIGNED_ADDR, &seq);
emu.run(None).unwrap();
}
#[test]
fn test_vpmaddubsw_ymm6_ymm7_mem_alternating() {
let mut emu = emu64();
let code = [0x48, 0xb8];
let mut full_code = code.to_vec();
full_code.extend_from_slice(&ALIGNED_ADDR.to_le_bytes());
full_code.extend_from_slice(&[
0xc4, 0xe2, 0x45, 0x04, 0x30, 0xf4, ]);
emu.load_code_bytes(&full_code);
let pattern: Vec<u8> = (0..32).map(|i| if i % 2 == 0 { 0x01 } else { 0xFF }).collect();
emu.maps.write_bytes_slice(ALIGNED_ADDR, &pattern);
emu.run(None).unwrap();
}
#[test]
fn test_vpmaddubsw_unsigned_signed_multiply() {
let mut emu = emu64();
let code = [0x48, 0xb8];
let mut full_code = code.to_vec();
full_code.extend_from_slice(&ALIGNED_ADDR.to_le_bytes());
full_code.extend_from_slice(&[
0xc4, 0xe2, 0x75, 0x04, 0x00, 0xf4, ]);
emu.load_code_bytes(&full_code);
let data: Vec<u8> = vec![0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01];
emu.maps.write_bytes_slice(ALIGNED_ADDR, &data);
emu.run(None).unwrap();
}
#[test]
fn test_vpmaddubsw_negative_product() {
let mut emu = emu64();
let code = [0x48, 0xb8];
let mut full_code = code.to_vec();
full_code.extend_from_slice(&ALIGNED_ADDR.to_le_bytes());
full_code.extend_from_slice(&[
0xc4, 0xe2, 0x75, 0x04, 0x00, 0xf4, ]);
emu.load_code_bytes(&full_code);
let data: Vec<u8> = vec![0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF]; emu.maps.write_bytes_slice(ALIGNED_ADDR, &data);
emu.run(None).unwrap();
}
#[test]
fn test_vpmaddubsw_saturation_positive() {
let mut emu = emu64();
let code = [0x48, 0xb8];
let mut full_code = code.to_vec();
full_code.extend_from_slice(&ALIGNED_ADDR.to_le_bytes());
full_code.extend_from_slice(&[
0xc4, 0xe2, 0x75, 0x04, 0x00, 0xf4, ]);
emu.load_code_bytes(&full_code);
let data: Vec<u8> = vec![0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F]; emu.maps.write_bytes_slice(ALIGNED_ADDR, &data);
emu.run(None).unwrap();
}
#[test]
fn test_vpmaddubsw_saturation_negative() {
let mut emu = emu64();
let code = [0x48, 0xb8];
let mut full_code = code.to_vec();
full_code.extend_from_slice(&ALIGNED_ADDR.to_le_bytes());
full_code.extend_from_slice(&[
0xc4, 0xe2, 0x75, 0x04, 0x00, 0xf4, ]);
emu.load_code_bytes(&full_code);
let data: Vec<u8> = vec![0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80]; emu.maps.write_bytes_slice(ALIGNED_ADDR, &data);
emu.run(None).unwrap();
}
#[test]
fn test_vpmaddubsw_zero_result() {
let mut emu = emu64();
let code = [0x48, 0xb8];
let mut full_code = code.to_vec();
full_code.extend_from_slice(&ALIGNED_ADDR.to_le_bytes());
full_code.extend_from_slice(&[
0xc4, 0xe2, 0x75, 0x04, 0x00, 0xf4, ]);
emu.load_code_bytes(&full_code);
let data: Vec<u8> = vec![0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00];
emu.maps.write_bytes_slice(ALIGNED_ADDR, &data);
emu.run(None).unwrap();
}
#[test]
fn test_vpmaddubsw_chain_multiple_ops() {
let mut emu = emu64();
let code = [
0xc4, 0xe2, 0x75, 0x04, 0xc2, 0xc4, 0xe2, 0x7d, 0x04, 0xc3, 0xf4, ];
emu.load_code_bytes(&code);
emu.run(None).unwrap();
}
#[test]
fn test_vpmaddubsw_mem_unaligned_offset() {
let mut emu = emu64();
let code = [0x48, 0xb8];
let mut full_code = code.to_vec();
full_code.extend_from_slice(&(ALIGNED_ADDR + 1).to_le_bytes());
full_code.extend_from_slice(&[
0xc4, 0xe2, 0x75, 0x04, 0x00, 0xf4, ]);
emu.load_code_bytes(&full_code);
emu.maps.write_bytes_slice(ALIGNED_ADDR, &[0x42, 0x42, 0x42, 0x42, 0x42, 0x42, 0x42, 0x42, 0x42, 0x42, 0x42, 0x42, 0x42, 0x42, 0x42, 0x42, 0x42, 0x42, 0x42, 0x42, 0x42, 0x42, 0x42, 0x42, 0x42, 0x42, 0x42, 0x42, 0x42, 0x42, 0x42, 0x42, 0x42]);
emu.run(None).unwrap();
}
#[test]
fn test_vpmaddubsw_extended_regs_r8_r9_r10() {
let mut emu = emu64();
let code = [
0xc4, 0x42, 0x3d, 0x04, 0xc2, 0xf4, ];
emu.load_code_bytes(&code);
emu.run(None).unwrap();
}
#[test]
fn test_vpmaddubsw_extended_regs_r11_r12_r13() {
let mut emu = emu64();
let code = [
0xc4, 0x42, 0x1d, 0x04, 0xdd, 0xf4, ];
emu.load_code_bytes(&code);
emu.run(None).unwrap();
}
#[test]
fn test_vpmaddubsw_extended_regs_r14_r15_r8() {
let mut emu = emu64();
let code = [
0xc4, 0x42, 0x05, 0x04, 0xf0, 0xf4, ];
emu.load_code_bytes(&code);
emu.run(None).unwrap();
}
#[test]
fn test_vpmaddubsw_small_values() {
let mut emu = emu64();
let code = [0x48, 0xb8];
let mut full_code = code.to_vec();
full_code.extend_from_slice(&ALIGNED_ADDR.to_le_bytes());
full_code.extend_from_slice(&[
0xc4, 0xe2, 0x75, 0x04, 0x00, 0xf4, ]);
emu.load_code_bytes(&full_code);
let data: Vec<u8> = vec![0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02];
emu.maps.write_bytes_slice(ALIGNED_ADDR, &data);
emu.run(None).unwrap();
}
#[test]
fn test_vpmaddubsw_boundary_values() {
let mut emu = emu64();
let code = [0x48, 0xb8];
let mut full_code = code.to_vec();
full_code.extend_from_slice(&ALIGNED_ADDR.to_le_bytes());
full_code.extend_from_slice(&[
0xc4, 0xe2, 0x75, 0x04, 0x00, 0xf4, ]);
emu.load_code_bytes(&full_code);
let boundary: Vec<u8> = vec![0x00, 0x01, 0x7F, 0x80, 0x81, 0xFE, 0xFF, 0x00].repeat(4);
emu.maps.write_bytes_slice(ALIGNED_ADDR, &boundary);
emu.run(None).unwrap();
}
#[test]
fn test_vpmaddubsw_powers_of_two() {
let mut emu = emu64();
let code = [0x48, 0xb8];
let mut full_code = code.to_vec();
full_code.extend_from_slice(&ALIGNED_ADDR.to_le_bytes());
full_code.extend_from_slice(&[
0xc4, 0xe2, 0x75, 0x04, 0x00, 0xf4, ]);
emu.load_code_bytes(&full_code);
let powers: Vec<u8> = (0..8).map(|i| 1u8 << i).chain((0..8).map(|i| 1u8 << i))
.chain((0..8).map(|i| 1u8 << i)).chain((0..8).map(|i| 1u8 << i)).collect();
emu.maps.write_bytes_slice(ALIGNED_ADDR, &powers);
emu.run(None).unwrap();
}
#[test]
fn test_vpmaddubsw_alternating_positive_negative() {
let mut emu = emu64();
let code = [0x48, 0xb8];
let mut full_code = code.to_vec();
full_code.extend_from_slice(&ALIGNED_ADDR.to_le_bytes());
full_code.extend_from_slice(&[
0xc4, 0xe2, 0x75, 0x04, 0x00, 0xf4, ]);
emu.load_code_bytes(&full_code);
let pattern: Vec<u8> = (0..32).map(|i| if i % 2 == 0 { 0x02 } else { 0xFE }).collect();
emu.maps.write_bytes_slice(ALIGNED_ADDR, &pattern);
emu.run(None).unwrap();
}
#[test]
fn test_vpmaddubsw_sequential_pattern() {
let mut emu = emu64();
let code = [0x48, 0xb8];
let mut full_code = code.to_vec();
full_code.extend_from_slice(&ALIGNED_ADDR.to_le_bytes());
full_code.extend_from_slice(&[
0xc4, 0xe2, 0x75, 0x04, 0x00, 0xf4, ]);
emu.load_code_bytes(&full_code);
let sequential: Vec<u8> = (0..32).collect();
emu.maps.write_bytes_slice(ALIGNED_ADDR, &sequential);
emu.run(None).unwrap();
}
#[test]
fn test_vpmaddubsw_reverse_sequential() {
let mut emu = emu64();
let code = [0x48, 0xb8];
let mut full_code = code.to_vec();
full_code.extend_from_slice(&ALIGNED_ADDR.to_le_bytes());
full_code.extend_from_slice(&[
0xc4, 0xe2, 0x75, 0x04, 0x00, 0xf4, ]);
emu.load_code_bytes(&full_code);
let reverse: Vec<u8> = (0..32).rev().collect();
emu.maps.write_bytes_slice(ALIGNED_ADDR, &reverse);
emu.run(None).unwrap();
}
#[test]
fn test_vpmaddubsw_symmetric_pattern() {
let mut emu = emu64();
let code = [0x48, 0xb8];
let mut full_code = code.to_vec();
full_code.extend_from_slice(&ALIGNED_ADDR.to_le_bytes());
full_code.extend_from_slice(&[
0xc4, 0xe2, 0x75, 0x04, 0x00, 0xf4, ]);
emu.load_code_bytes(&full_code);
let symmetric: Vec<u8> = vec![
0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,
0x08, 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01,
0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,
0x08, 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01,
];
emu.maps.write_bytes_slice(ALIGNED_ADDR, &symmetric);
emu.run(None).unwrap();
}
#[test]
fn test_vpmaddubsw_large_unsigned_small_signed() {
let mut emu = emu64();
let code = [0x48, 0xb8];
let mut full_code = code.to_vec();
full_code.extend_from_slice(&ALIGNED_ADDR.to_le_bytes());
full_code.extend_from_slice(&[
0xc4, 0xe2, 0x75, 0x04, 0x00, 0xf4, ]);
emu.load_code_bytes(&full_code);
let data: Vec<u8> = (0..32).map(|i| if i % 2 == 0 { 0x02 } else { 0x03 }).collect();
emu.maps.write_bytes_slice(ALIGNED_ADDR, &data);
emu.run(None).unwrap();
}
#[test]
fn test_vpmaddubsw_near_saturation() {
let mut emu = emu64();
let code = [0x48, 0xb8];
let mut full_code = code.to_vec();
full_code.extend_from_slice(&ALIGNED_ADDR.to_le_bytes());
full_code.extend_from_slice(&[
0xc4, 0xe2, 0x75, 0x04, 0x00, 0xf4, ]);
emu.load_code_bytes(&full_code);
let data: Vec<u8> = vec![0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40]; emu.maps.write_bytes_slice(ALIGNED_ADDR, &data);
emu.run(None).unwrap();
}
#[test]
fn test_vpmaddubsw_checkerboard() {
let mut emu = emu64();
let code = [0x48, 0xb8];
let mut full_code = code.to_vec();
full_code.extend_from_slice(&ALIGNED_ADDR.to_le_bytes());
full_code.extend_from_slice(&[
0xc4, 0xe2, 0x75, 0x04, 0x00, 0xf4, ]);
emu.load_code_bytes(&full_code);
let checkerboard: Vec<u8> = (0..32).map(|i| if i % 2 == 0 { 0x55 } else { 0xAA }).collect();
emu.maps.write_bytes_slice(ALIGNED_ADDR, &checkerboard);
emu.run(None).unwrap();
}
#[test]
fn test_vpmaddubsw_gradient_pattern() {
let mut emu = emu64();
let code = [0x48, 0xb8];
let mut full_code = code.to_vec();
full_code.extend_from_slice(&ALIGNED_ADDR.to_le_bytes());
full_code.extend_from_slice(&[
0xc4, 0xe2, 0x75, 0x04, 0x00, 0xf4, ]);
emu.load_code_bytes(&full_code);
let gradient: Vec<u8> = (0..32).map(|i| ((i * 8) % 256) as u8).collect();
emu.maps.write_bytes_slice(ALIGNED_ADDR, &gradient);
emu.run(None).unwrap();
}
#[test]
fn test_vpmaddubsw_cancellation() {
let mut emu = emu64();
let code = [0x48, 0xb8];
let mut full_code = code.to_vec();
full_code.extend_from_slice(&ALIGNED_ADDR.to_le_bytes());
full_code.extend_from_slice(&[
0xc4, 0xe2, 0x75, 0x04, 0x00, 0xf4, ]);
emu.load_code_bytes(&full_code);
let data: Vec<u8> = (0..32).map(|i| if i % 2 == 0 { 0x01 } else { 0xFF }).collect();
emu.maps.write_bytes_slice(ALIGNED_ADDR, &data);
emu.run(None).unwrap();
}
#[test]
fn test_vpmaddubsw_mixed_magnitudes() {
let mut emu = emu64();
let code = [0x48, 0xb8];
let mut full_code = code.to_vec();
full_code.extend_from_slice(&ALIGNED_ADDR.to_le_bytes());
full_code.extend_from_slice(&[
0xc4, 0xe2, 0x75, 0x04, 0x00, 0xf4, ]);
emu.load_code_bytes(&full_code);
let mixed: Vec<u8> = vec![
0x01, 0x7F, 0x80, 0x01,
0xFF, 0x01, 0x01, 0xFF,
].repeat(4);
emu.maps.write_bytes_slice(ALIGNED_ADDR, &mixed);
emu.run(None).unwrap();
}
#[test]
fn test_vpmaddubsw_diagonal_pattern() {
let mut emu = emu64();
let code = [0x48, 0xb8];
let mut full_code = code.to_vec();
full_code.extend_from_slice(&ALIGNED_ADDR.to_le_bytes());
full_code.extend_from_slice(&[
0xc4, 0xe2, 0x75, 0x04, 0x00, 0xf4, ]);
emu.load_code_bytes(&full_code);
let diagonal: Vec<u8> = (0..32).map(|i| ((i * 7 + 13) % 256) as u8).collect();
emu.maps.write_bytes_slice(ALIGNED_ADDR, &diagonal);
emu.run(None).unwrap();
}
#[test]
fn test_vpmaddubsw_max_unsigned_positive_signed() {
let mut emu = emu64();
let code = [0x48, 0xb8];
let mut full_code = code.to_vec();
full_code.extend_from_slice(&ALIGNED_ADDR.to_le_bytes());
full_code.extend_from_slice(&[
0xc4, 0xe2, 0x75, 0x04, 0x00, 0xf4, ]);
emu.load_code_bytes(&full_code);
let data: Vec<u8> = (0..32).map(|i| if i % 2 == 0 { 0x7F } else { 0x7E }).collect();
emu.maps.write_bytes_slice(ALIGNED_ADDR, &data);
emu.run(None).unwrap();
}
#[test]
fn test_vpmaddubsw_max_unsigned_negative_signed() {
let mut emu = emu64();
let code = [0x48, 0xb8];
let mut full_code = code.to_vec();
full_code.extend_from_slice(&ALIGNED_ADDR.to_le_bytes());
full_code.extend_from_slice(&[
0xc4, 0xe2, 0x75, 0x04, 0x00, 0xf4, ]);
emu.load_code_bytes(&full_code);
let data: Vec<u8> = (0..32).map(|i| if i % 2 == 0 { 0x80 } else { 0x81 }).collect();
emu.maps.write_bytes_slice(ALIGNED_ADDR, &data);
emu.run(None).unwrap();
}