#![cfg(all(target_os = "linux", feature = "kvm"))]
use tracing::debug;
use vhost::vhost_kern::vsock::Vsock as VhostKernVsock;
use vhost::vsock::VhostVsock;
use vhost::{VhostBackend, VhostUserMemoryRegionInfo, VringConfigData};
use vm_memory::{GuestMemory, GuestMemoryMmap};
const VIRTIO_MMIO_MAGIC_VALUE: u64 = 0x00;
const VIRTIO_MMIO_VERSION: u64 = 0x04;
const VIRTIO_MMIO_DEVICE_ID: u64 = 0x08;
const VIRTIO_MMIO_VENDOR_ID: u64 = 0x0c;
const VIRTIO_MMIO_DEVICE_FEATURES: u64 = 0x10;
const VIRTIO_MMIO_DEVICE_FEATURES_SEL: u64 = 0x14;
const VIRTIO_MMIO_DRIVER_FEATURES: u64 = 0x20;
const VIRTIO_MMIO_DRIVER_FEATURES_SEL: u64 = 0x24;
const VIRTIO_MMIO_QUEUE_SEL: u64 = 0x30;
const VIRTIO_MMIO_QUEUE_NUM_MAX: u64 = 0x34;
const VIRTIO_MMIO_QUEUE_NUM: u64 = 0x38;
const VIRTIO_MMIO_QUEUE_READY: u64 = 0x44;
const VIRTIO_MMIO_QUEUE_NOTIFY: u64 = 0x50;
const VIRTIO_MMIO_INTERRUPT_STATUS: u64 = 0x60;
const VIRTIO_MMIO_INTERRUPT_ACK: u64 = 0x64;
const VIRTIO_MMIO_STATUS: u64 = 0x70;
const VIRTIO_MMIO_QUEUE_DESC_LOW: u64 = 0x80;
const VIRTIO_MMIO_QUEUE_DESC_HIGH: u64 = 0x84;
const VIRTIO_MMIO_QUEUE_AVAIL_LOW: u64 = 0x90;
const VIRTIO_MMIO_QUEUE_AVAIL_HIGH: u64 = 0x94;
const VIRTIO_MMIO_QUEUE_USED_LOW: u64 = 0xa0;
const VIRTIO_MMIO_QUEUE_USED_HIGH: u64 = 0xa4;
const VIRTIO_MMIO_CONFIG_GENERATION: u64 = 0xfc;
const VIRTIO_MMIO_CONFIG: u64 = 0x100;
const VIRTIO_MMIO_MAGIC: u32 = 0x7472_6976;
const VIRTIO_MMIO_VERSION_V2: u32 = 2;
const VIRTIO_ID_VSOCK: u32 = 19;
const VIRTIO_F_VERSION_1: u64 = 1 << 32;
const VSOCK_DRIVER_OK: u8 = 0x4;
const QUEUE_SIZE_MAX: u16 = 256;
const NUM_QUEUES: usize = 3;
const VSOCK_MMIO_SIZE: u64 = 0x200;
fn read_u32_from_slice(data: &[u8]) -> u32 {
if data.len() < 4 {
return 0;
}
u32::from_le_bytes([data[0], data[1], data[2], data[3]])
}
fn write_u32_to_slice(val: u32, data: &mut [u8]) {
let bytes = val.to_le_bytes();
let len = 4.min(data.len());
data[..len].copy_from_slice(&bytes[..len]);
}
fn write_u64_to_slice(val: u64, data: &mut [u8]) {
let bytes = val.to_le_bytes();
let len = 8.min(data.len());
data[..len].copy_from_slice(&bytes[..len]);
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub(in crate::kvm) enum VsockMmioAction {
None,
Activated,
Interrupt,
}
#[derive(Debug, Clone, Copy, Default)]
pub(in crate::kvm) struct VirtQueueConfig {
desc_addr: u64,
avail_addr: u64,
used_addr: u64,
size: u16,
ready: bool,
}
#[derive(Debug, Clone)]
pub(in crate::kvm) struct VsockMmioDevice {
device_status: u8,
config_generation: u8,
features_select: u32,
acked_features: u64,
driver_features_select: u32,
driver_features: u64,
queue_select: u16,
queues: [VirtQueueConfig; NUM_QUEUES],
guest_cid: u64,
activated: bool,
mmio_base: u64,
#[allow(dead_code)] gsi: u32,
}
impl VsockMmioDevice {
pub(in crate::kvm) fn new(guest_cid: u64, mmio_base: u64, gsi: u32) -> Self {
Self {
device_status: 0,
config_generation: 0,
features_select: 0,
acked_features: 0,
driver_features_select: 0,
driver_features: 0,
queue_select: 0,
queues: [VirtQueueConfig::default(); NUM_QUEUES],
guest_cid,
activated: false,
mmio_base,
gsi,
}
}
pub(in crate::kvm) fn mmio_base(&self) -> u64 {
self.mmio_base
}
pub(in crate::kvm) fn mmio_size(&self) -> u64 {
VSOCK_MMIO_SIZE
}
#[allow(dead_code)] pub(in crate::kvm) fn gsi(&self) -> u32 {
self.gsi
}
#[allow(dead_code)] pub(in crate::kvm) fn is_activated(&self) -> bool {
self.activated
}
pub(in crate::kvm) fn guest_cid(&self) -> u64 {
self.guest_cid
}
pub(in crate::kvm) fn queues(&self) -> &[VirtQueueConfig; NUM_QUEUES] {
&self.queues
}
pub(in crate::kvm) fn acked_features(&self) -> u64 {
self.acked_features
}
pub(in crate::kvm) fn mmio_read(&self, offset: u64, data: &mut [u8]) {
data.fill(0);
let val = match offset {
VIRTIO_MMIO_MAGIC_VALUE => VIRTIO_MMIO_MAGIC,
VIRTIO_MMIO_VERSION => VIRTIO_MMIO_VERSION_V2,
VIRTIO_MMIO_DEVICE_ID => VIRTIO_ID_VSOCK,
VIRTIO_MMIO_VENDOR_ID => 0,
VIRTIO_MMIO_DEVICE_FEATURES => {
let features = self.device_features();
match self.features_select {
0 => features as u32,
1 => (features >> 32) as u32,
_ => 0,
}
}
VIRTIO_MMIO_QUEUE_NUM_MAX => QUEUE_SIZE_MAX as u32,
VIRTIO_MMIO_QUEUE_READY => {
let queue = self.selected_queue();
if queue.ready { 1 } else { 0 }
}
VIRTIO_MMIO_INTERRUPT_STATUS => 0,
VIRTIO_MMIO_STATUS => self.device_status as u32,
VIRTIO_MMIO_QUEUE_DESC_LOW => {
let queue = self.selected_queue();
queue.desc_addr as u32
}
VIRTIO_MMIO_QUEUE_DESC_HIGH => {
let queue = self.selected_queue();
(queue.desc_addr >> 32) as u32
}
VIRTIO_MMIO_QUEUE_AVAIL_LOW => {
let queue = self.selected_queue();
queue.avail_addr as u32
}
VIRTIO_MMIO_QUEUE_AVAIL_HIGH => {
let queue = self.selected_queue();
(queue.avail_addr >> 32) as u32
}
VIRTIO_MMIO_QUEUE_USED_LOW => {
let queue = self.selected_queue();
queue.used_addr as u32
}
VIRTIO_MMIO_QUEUE_USED_HIGH => {
let queue = self.selected_queue();
(queue.used_addr >> 32) as u32
}
VIRTIO_MMIO_CONFIG_GENERATION => self.config_generation as u32,
VIRTIO_MMIO_CONFIG.. => {
let config_offset = offset - VIRTIO_MMIO_CONFIG;
if config_offset < 8 {
write_u64_to_slice(self.guest_cid, data);
debug!(
offset,
guest_cid = self.guest_cid,
"vsock MMIO config 读: guest_cid"
);
return;
}
debug!(offset, "vsock MMIO 读: config 空间越界偏移");
return;
}
_ => {
debug!(offset, "vsock MMIO 读: 未识别偏移量");
return;
}
};
write_u32_to_slice(val, data);
}
pub(in crate::kvm) fn mmio_write(&mut self, offset: u64, data: &[u8]) -> VsockMmioAction {
match offset {
VIRTIO_MMIO_DEVICE_FEATURES_SEL => {
self.features_select = read_u32_from_slice(data);
debug!(select = self.features_select, "vsock feature select 写入");
VsockMmioAction::None
}
VIRTIO_MMIO_DRIVER_FEATURES => {
let val = read_u32_from_slice(data) as u64;
match self.driver_features_select {
0 => self.driver_features = (self.driver_features & !0xFFFF_FFFF) | val,
1 => self.driver_features = (self.driver_features & 0xFFFF_FFFF) | (val << 32),
_ => {}
}
VsockMmioAction::None
}
VIRTIO_MMIO_DRIVER_FEATURES_SEL => {
self.driver_features_select = read_u32_from_slice(data);
debug!(
select = self.driver_features_select,
"vsock driver feature select 写入"
);
VsockMmioAction::None
}
VIRTIO_MMIO_QUEUE_SEL => {
let val = read_u32_from_slice(data) as u16;
if (val as usize) < NUM_QUEUES {
self.queue_select = val;
debug!(queue = val, "vsock queue select 写入");
} else {
debug!(queue = val, "vsock queue select 越界,忽略");
}
VsockMmioAction::None
}
VIRTIO_MMIO_QUEUE_NUM => {
let val = read_u32_from_slice(data) as u16;
let qi = self.queue_select;
if let Some(queue) = self.selected_queue_mut() {
queue.size = val;
debug!(queue_index = qi, size = val, "vsock queue size 写入");
}
VsockMmioAction::None
}
VIRTIO_MMIO_QUEUE_READY => {
let val = read_u32_from_slice(data);
let qi = self.queue_select;
if let Some(queue) = self.selected_queue_mut() {
queue.ready = val != 0;
debug!(
queue_index = qi,
ready = queue.ready,
"vsock queue ready 写入"
);
}
VsockMmioAction::None
}
VIRTIO_MMIO_QUEUE_NOTIFY => {
let val = read_u32_from_slice(data);
debug!(queue = val, "vsock queue notify (kick)");
VsockMmioAction::Interrupt
}
VIRTIO_MMIO_INTERRUPT_ACK => {
debug!(val = read_u32_from_slice(data), "vsock interrupt ACK");
VsockMmioAction::None
}
VIRTIO_MMIO_STATUS => {
let val = read_u32_from_slice(data) as u8;
let was_activated = self.activated;
self.device_status = val;
if !was_activated && (val & VSOCK_DRIVER_OK) != 0 {
self.activated = true;
self.acked_features = self.driver_features;
debug!(
status = val,
guest_cid = self.guest_cid,
acked_features = self.acked_features,
"vsock 设备已激活: guest driver 设置 DRIVER_OK"
);
return VsockMmioAction::Activated;
}
if val == 0 {
self.activated = false;
self.device_status = 0;
self.config_generation = self.config_generation.wrapping_add(1);
debug!("vsock 设备已重置");
}
VsockMmioAction::None
}
VIRTIO_MMIO_QUEUE_DESC_LOW => {
let val = read_u32_from_slice(data) as u64;
if let Some(queue) = self.selected_queue_mut() {
queue.desc_addr = (queue.desc_addr & 0xFFFF_FFFF_0000_0000) | val;
}
VsockMmioAction::None
}
VIRTIO_MMIO_QUEUE_DESC_HIGH => {
let val = read_u32_from_slice(data) as u64;
if let Some(queue) = self.selected_queue_mut() {
queue.desc_addr = (queue.desc_addr & 0x0000_0000_FFFF_FFFF) | (val << 32);
}
VsockMmioAction::None
}
VIRTIO_MMIO_QUEUE_AVAIL_LOW => {
let val = read_u32_from_slice(data) as u64;
if let Some(queue) = self.selected_queue_mut() {
queue.avail_addr = (queue.avail_addr & 0xFFFF_FFFF_0000_0000) | val;
}
VsockMmioAction::None
}
VIRTIO_MMIO_QUEUE_AVAIL_HIGH => {
let val = read_u32_from_slice(data) as u64;
if let Some(queue) = self.selected_queue_mut() {
queue.avail_addr = (queue.avail_addr & 0x0000_0000_FFFF_FFFF) | (val << 32);
}
VsockMmioAction::None
}
VIRTIO_MMIO_QUEUE_USED_LOW => {
let val = read_u32_from_slice(data) as u64;
if let Some(queue) = self.selected_queue_mut() {
queue.used_addr = (queue.used_addr & 0xFFFF_FFFF_0000_0000) | val;
}
VsockMmioAction::None
}
VIRTIO_MMIO_QUEUE_USED_HIGH => {
let val = read_u32_from_slice(data) as u64;
if let Some(queue) = self.selected_queue_mut() {
queue.used_addr = (queue.used_addr & 0x0000_0000_FFFF_FFFF) | (val << 32);
}
VsockMmioAction::None
}
_ => {
debug!(offset, "vsock MMIO 写: 未识别偏移量");
VsockMmioAction::None
}
}
}
fn device_features(&self) -> u64 {
VIRTIO_F_VERSION_1
}
fn selected_queue(&self) -> &VirtQueueConfig {
&self.queues[self.queue_select as usize]
}
fn selected_queue_mut(&mut self) -> Option<&mut VirtQueueConfig> {
self.queues.get_mut(self.queue_select as usize)
}
}
pub(in crate::kvm) fn activate_vhost_backend(
queues: &[VirtQueueConfig; NUM_QUEUES],
guest_cid: u64,
acked_features: u64,
guest_memory: &GuestMemoryMmap,
) -> Result<(), String> {
let vsock = VhostKernVsock::new(guest_memory)
.map_err(|e| format!("failed to open /dev/vhost-vsock: {e}"))?;
vsock
.set_owner()
.map_err(|e| format!("VHOST_SET_OWNER failed: {e}"))?;
vsock
.set_guest_cid(guest_cid)
.map_err(|e| format!("VHOST_VSOCK_SET_GUEST_CID({guest_cid}) failed: {e}"))?;
vsock
.set_features(acked_features)
.map_err(|e| format!("VHOST_SET_FEATURES({acked_features:#x}) failed: {e}"))?;
let mem_regions: Vec<VhostUserMemoryRegionInfo> = guest_memory
.iter()
.map(|region| {
VhostUserMemoryRegionInfo::from_guest_region(region)
.map_err(|e| format!("failed to convert guest memory region: {e}"))
})
.collect::<Result<Vec<_>, _>>()?;
vsock
.set_mem_table(&mem_regions)
.map_err(|e| format!("VHOST_SET_MEM_TABLE failed: {e}"))?;
for (queue_index, queue) in queues.iter().enumerate() {
if !queue.ready {
return Err(format!(
"queue {queue_index} is not ready, cannot activate vhost backend"
));
}
if queue.size == 0 {
return Err(format!(
"queue {queue_index} size is 0, cannot activate vhost backend"
));
}
vsock
.set_vring_num(queue_index, queue.size)
.map_err(|e| format!("VHOST_SET_VRING_NUM(queue={queue_index}) failed: {e}"))?;
let config = VringConfigData {
queue_max_size: QUEUE_SIZE_MAX,
queue_size: queue.size,
flags: 0,
desc_table_addr: queue.desc_addr,
used_ring_addr: queue.used_addr,
avail_ring_addr: queue.avail_addr,
log_addr: None,
};
vsock
.set_vring_addr(queue_index, &config)
.map_err(|e| format!("VHOST_SET_VRING_ADDR(queue={queue_index}) failed: {e}"))?;
vsock
.set_vring_base(queue_index, 0)
.map_err(|e| format!("VHOST_SET_VRING_BASE(queue={queue_index}) failed: {e}"))?;
}
vsock
.start()
.map_err(|e| format!("VHOST_VSOCK_SET_RUNNING(true) failed: {e}"))?;
debug!(
guest_cid,
acked_features,
?queues,
"vhost-vsock 后端激活成功"
);
Ok(())
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_mmio_read_magic_version_device_id() {
let device = VsockMmioDevice::new(3, 0xd000_0000, 5);
let mut data = [0u8; 4];
device.mmio_read(VIRTIO_MMIO_MAGIC_VALUE, &mut data);
assert_eq!(u32::from_le_bytes(data), VIRTIO_MMIO_MAGIC);
device.mmio_read(VIRTIO_MMIO_VERSION, &mut data);
assert_eq!(u32::from_le_bytes(data), VIRTIO_MMIO_VERSION_V2);
device.mmio_read(VIRTIO_MMIO_DEVICE_ID, &mut data);
assert_eq!(u32::from_le_bytes(data), VIRTIO_ID_VSOCK);
device.mmio_read(VIRTIO_MMIO_VENDOR_ID, &mut data);
assert_eq!(u32::from_le_bytes(data), 0);
}
#[test]
fn test_mmio_read_features() {
let device = VsockMmioDevice::new(3, 0xd000_0000, 5);
let mut data = [0u8; 4];
device.mmio_read(VIRTIO_MMIO_DEVICE_FEATURES, &mut data);
assert_eq!(u32::from_le_bytes(data), 0);
let mut device = VsockMmioDevice::new(3, 0xd000_0000, 5);
device.mmio_write(VIRTIO_MMIO_DEVICE_FEATURES_SEL, &[1, 0, 0, 0]);
device.mmio_read(VIRTIO_MMIO_DEVICE_FEATURES, &mut data);
assert_eq!(u32::from_le_bytes(data), 1); }
#[test]
fn test_mmio_read_guest_cid() {
let device = VsockMmioDevice::new(42, 0xd000_0000, 5);
let mut data = [0u8; 8];
device.mmio_read(VIRTIO_MMIO_CONFIG, &mut data);
assert_eq!(u64::from_le_bytes(data), 42);
}
#[test]
fn test_queue_select_and_size() {
let mut device = VsockMmioDevice::new(3, 0xd000_0000, 5);
device.mmio_write(VIRTIO_MMIO_QUEUE_SEL, &[1, 0, 0, 0]);
assert_eq!(device.queue_select, 1);
device.mmio_write(VIRTIO_MMIO_QUEUE_NUM, &[64, 0, 0, 0]);
assert_eq!(device.queues[1].size, 64);
assert_eq!(device.queues[0].size, 0);
}
#[test]
fn test_queue_address_registers() {
let mut device = VsockMmioDevice::new(3, 0xd000_0000, 5);
device.mmio_write(VIRTIO_MMIO_QUEUE_SEL, &[0, 0, 0, 0]);
device.mmio_write(VIRTIO_MMIO_QUEUE_DESC_LOW, &[0x00, 0x10, 0, 0]);
device.mmio_write(VIRTIO_MMIO_QUEUE_DESC_HIGH, &[0, 0, 0, 0]);
assert_eq!(device.queues[0].desc_addr, 0x1000);
device.mmio_write(VIRTIO_MMIO_QUEUE_AVAIL_LOW, &[0x00, 0x20, 0, 0]);
device.mmio_write(VIRTIO_MMIO_QUEUE_AVAIL_HIGH, &[0, 0, 0, 0]);
assert_eq!(device.queues[0].avail_addr, 0x2000);
device.mmio_write(VIRTIO_MMIO_QUEUE_USED_LOW, &[0x00, 0x30, 0, 0]);
device.mmio_write(VIRTIO_MMIO_QUEUE_USED_HIGH, &[0, 0, 0, 0]);
assert_eq!(device.queues[0].used_addr, 0x3000);
}
#[test]
fn test_driver_ok_activation() {
let mut device = VsockMmioDevice::new(3, 0xd000_0000, 5);
assert!(!device.is_activated());
let action = device.mmio_write(VIRTIO_MMIO_STATUS, &[VSOCK_DRIVER_OK, 0, 0, 0]);
assert_eq!(action, VsockMmioAction::Activated);
assert!(device.is_activated());
let action = device.mmio_write(VIRTIO_MMIO_STATUS, &[VSOCK_DRIVER_OK, 0, 0, 0]);
assert_eq!(action, VsockMmioAction::None);
}
#[test]
fn test_device_reset() {
let mut device = VsockMmioDevice::new(3, 0xd000_0000, 5);
device.mmio_write(VIRTIO_MMIO_STATUS, &[VSOCK_DRIVER_OK, 0, 0, 0]);
assert!(device.is_activated());
device.mmio_write(VIRTIO_MMIO_STATUS, &[0, 0, 0, 0]);
assert!(!device.is_activated());
assert_eq!(device.device_status, 0);
}
#[test]
fn test_queue_num_max() {
let device = VsockMmioDevice::new(3, 0xd000_0000, 5);
let mut data = [0u8; 4];
device.mmio_read(VIRTIO_MMIO_QUEUE_NUM_MAX, &mut data);
assert_eq!(u32::from_le_bytes(data), 256);
}
#[test]
fn test_queue_notify_returns_interrupt() {
let mut device = VsockMmioDevice::new(3, 0xd000_0000, 5);
let action = device.mmio_write(VIRTIO_MMIO_QUEUE_NOTIFY, &[0, 0, 0, 0]);
assert_eq!(action, VsockMmioAction::Interrupt);
}
#[test]
fn test_queue_ready() {
let mut device = VsockMmioDevice::new(3, 0xd000_0000, 5);
device.mmio_write(VIRTIO_MMIO_QUEUE_SEL, &[2, 0, 0, 0]);
let mut data = [0u8; 4];
device.mmio_read(VIRTIO_MMIO_QUEUE_READY, &mut data);
assert_eq!(u32::from_le_bytes(data), 0);
device.mmio_write(VIRTIO_MMIO_QUEUE_READY, &[1, 0, 0, 0]);
assert!(device.queues[2].ready);
device.mmio_read(VIRTIO_MMIO_QUEUE_READY, &mut data);
assert_eq!(u32::from_le_bytes(data), 1);
}
#[test]
fn test_helper_functions() {
assert_eq!(read_u32_from_slice(&[1, 0, 0, 0]), 1);
assert_eq!(read_u32_from_slice(&[0xff, 0xff, 0xff, 0xff]), u32::MAX);
assert_eq!(read_u32_from_slice(&[1, 2]), 0);
let mut buf = [0u8; 4];
write_u32_to_slice(0x1234_5678, &mut buf);
assert_eq!(buf, [0x78, 0x56, 0x34, 0x12]);
let mut short = [0u8; 2];
write_u32_to_slice(0x1234_5678, &mut short);
assert_eq!(short, [0x78, 0x56]);
let mut buf8 = [0u8; 8];
write_u64_to_slice(0x0100_0000_0000_0003, &mut buf8);
assert_eq!(buf8, [3, 0, 0, 0, 0, 0, 0, 1]);
}
#[test]
fn test_accessors() {
let device = VsockMmioDevice::new(7, 0xd000_0000, 5);
assert_eq!(device.mmio_base(), 0xd000_0000);
assert_eq!(device.mmio_size(), 0x200);
assert_eq!(device.gsi(), 5);
assert_eq!(device.guest_cid(), 7);
}
}