1#![cfg(all(target_os = "linux", feature = "kvm"))]
2
3use std::collections::VecDeque;
4use std::sync::{Arc, Mutex, MutexGuard};
5use std::time::{Duration, Instant};
6
7use kvm_bindings::kvm_userspace_memory_region;
8#[cfg(target_arch = "x86_64")]
9use kvm_bindings::{KVM_PIT_SPEAKER_DUMMY, kvm_pit_config};
10use kvm_ioctls::{Kvm, VcpuFd, VmFd};
11use mimobox_core::{DirEntry, FileStat, SandboxConfig, SandboxError, SandboxSnapshot};
12use thiserror::Error;
13use tracing::warn;
14use vm_memory::{GuestAddress, GuestMemory, GuestMemoryMmap};
15
16#[cfg(target_arch = "x86_64")]
17use crate::kvm::{KVM_IDENTITY_MAP_ADDR, KVM_TSS_ADDR};
18use crate::kvm::{KvmBackend, RestoreProfile, restore_runtime_state};
19#[cfg(feature = "zerocopy-fork")]
20use crate::snapshot::load_state_from_memory_file;
21use crate::vm::LifecycleError;
22use crate::{
23 GuestCommandResult, GuestExecOptions, HttpRequest, HttpResponse, MicrovmConfig, MicrovmError,
24 MicrovmSnapshot, StreamEvent,
25};
26
27pub(crate) struct EmptyVmSlot {
29 kvm: Kvm,
30 vm_fd: VmFd,
31 vcpus: Vec<VcpuFd>,
32 guest_memory: GuestMemoryMmap,
33 config: MicrovmConfig,
34 base_config: SandboxConfig,
35}
36
37impl EmptyVmSlot {
38 pub(crate) fn new(
40 base_config: SandboxConfig,
41 config: MicrovmConfig,
42 ) -> Result<Self, MicrovmError> {
43 config.validate()?;
44
45 let kvm = Kvm::new().map_err(to_backend_error)?;
46 let vm_fd = kvm.create_vm().map_err(to_backend_error)?;
47
48 #[cfg(target_arch = "x86_64")]
49 {
50 vm_fd.create_irq_chip().map_err(to_backend_error)?;
51 let pit_config = kvm_pit_config {
52 flags: KVM_PIT_SPEAKER_DUMMY,
53 ..Default::default()
54 };
55 vm_fd.create_pit2(pit_config).map_err(to_backend_error)?;
56 vm_fd
57 .set_identity_map_address(KVM_IDENTITY_MAP_ADDR)
58 .map_err(to_backend_error)?;
59 vm_fd
60 .set_tss_address(KVM_TSS_ADDR)
61 .map_err(to_backend_error)?;
62 }
63
64 let guest_memory =
65 GuestMemoryMmap::from_ranges(&[(GuestAddress(0), config.memory_bytes()?)])
66 .map_err(to_backend_error)?;
67 KvmBackend::try_enable_huge_pages(&guest_memory);
68
69 let mut vcpus = Vec::with_capacity(usize::from(config.vcpu_count));
70 for vcpu_index in 0..u64::from(config.vcpu_count) {
71 let vcpu = vm_fd.create_vcpu(vcpu_index).map_err(to_backend_error)?;
72 vcpus.push(vcpu);
73 }
74
75 register_guest_memory(&vm_fd, &guest_memory, &config)?;
76
77 Ok(Self {
78 kvm,
79 vm_fd,
80 vcpus,
81 guest_memory,
82 config,
83 base_config,
84 })
85 }
86
87 pub(crate) fn into_restored_backend(
89 self,
90 memory: &[u8],
91 vcpu_state: &[u8],
92 ) -> Result<KvmBackend, MicrovmError> {
93 let mut backend = KvmBackend::from_slot_components(
94 self.kvm,
95 self.vm_fd,
96 self.vcpus,
97 self.guest_memory,
98 self.base_config,
99 self.config,
100 );
101 backend.set_pending_restore_profile(RestoreProfile::default());
102
103 let mut restore_profile = backend.take_or_seed_restore_profile();
104
105 let restore_memory_started_at = Instant::now();
106 backend.restore_guest_memory(memory)?;
107 restore_profile.memory_state_write = restore_memory_started_at.elapsed();
108
109 restore_profile.cpuid_config = backend.prepare_restored_vcpus()?;
110
111 let runtime_restore_profile = restore_runtime_state(&mut backend, vcpu_state)?;
112 restore_profile.vcpu_state_restore = runtime_restore_profile.vcpu_state_restore;
113 restore_profile.device_state_restore = runtime_restore_profile.device_state_restore;
114
115 backend.set_lifecycle_ready();
116 backend.emit_restore_profile_without_resume(&restore_profile);
117 backend.set_pending_restore_profile(restore_profile);
118 Ok(backend)
119 }
120
121 #[cfg(feature = "zerocopy-fork")]
123 pub(crate) fn into_restored_backend_from_file(
124 self,
125 memory_path: &std::path::Path,
126 vcpu_state: &[u8],
127 ) -> Result<KvmBackend, MicrovmError> {
128 let mut backend = KvmBackend::from_slot_components(
129 self.kvm,
130 self.vm_fd,
131 self.vcpus,
132 self.guest_memory,
133 self.base_config,
134 self.config,
135 );
136 backend.set_pending_restore_profile(RestoreProfile::default());
137
138 let mut restore_profile = backend.take_or_seed_restore_profile();
139
140 let restore_memory_started_at = Instant::now();
141 backend.restore_from_file_zerocopy(memory_path)?;
142 restore_profile.memory_state_write = restore_memory_started_at.elapsed();
143
144 restore_profile.cpuid_config = backend.prepare_restored_vcpus()?;
145
146 let runtime_restore_profile = restore_runtime_state(&mut backend, vcpu_state)?;
147 restore_profile.vcpu_state_restore = runtime_restore_profile.vcpu_state_restore;
148 restore_profile.device_state_restore = runtime_restore_profile.device_state_restore;
149
150 backend.set_lifecycle_ready();
151 backend.emit_restore_profile_without_resume(&restore_profile);
152 backend.set_pending_restore_profile(restore_profile);
153 Ok(backend)
154 }
155}
156
157#[derive(Debug, Clone, Copy, PartialEq, Eq)]
159pub struct RestorePoolConfig {
160 pub min_size: usize,
162 pub max_size: usize,
164}
165
166#[derive(Debug, Error)]
168pub enum RestorePoolError {
169 #[error("invalid restore pool config: min_size={min_size}, max_size={max_size}")]
171 InvalidConfig {
172 min_size: usize,
174 max_size: usize,
176 },
177
178 #[error("restore pool state lock poisoned")]
180 StatePoisoned,
181
182 #[error(transparent)]
184 Microvm(
185 #[from]
187 MicrovmError,
188 ),
189}
190
191#[derive(Default)]
192struct RestorePoolState {
193 idle: VecDeque<EmptyVmSlot>,
194 in_use_count: usize,
195}
196
197struct RestorePoolInner {
198 base_config: SandboxConfig,
199 config: MicrovmConfig,
200 pool_config: RestorePoolConfig,
201 state: Mutex<RestorePoolState>,
202}
203
204impl RestorePoolInner {
205 fn lock_state(&self) -> Result<MutexGuard<'_, RestorePoolState>, RestorePoolError> {
206 self.state
207 .lock()
208 .map_err(|_| RestorePoolError::StatePoisoned)
209 }
210
211 fn create_slot(&self) -> Result<EmptyVmSlot, RestorePoolError> {
212 EmptyVmSlot::new(self.base_config.clone(), self.config.clone()).map_err(Into::into)
213 }
214
215 fn rollback_in_use(&self) {
216 match self.state.lock() {
217 Ok(mut state) => {
218 state.in_use_count = state.in_use_count.saturating_sub(1);
219 }
220 Err(_) => {
221 warn!("回滚恢复池 in_use 计数失败:状态锁已中毒");
222 }
223 }
224 }
225
226 fn push_idle_slot(&self, slot: EmptyVmSlot) -> Result<bool, RestorePoolError> {
227 let mut state = self.lock_state()?;
228 if state.idle.len() >= self.pool_config.max_size {
229 return Ok(false);
230 }
231 state.idle.push_back(slot);
232 Ok(true)
233 }
234
235 fn warm(&self, target_idle_size: usize) -> Result<(), RestorePoolError> {
236 let target_idle_size = target_idle_size.min(self.pool_config.max_size);
237 let current_idle = self.lock_state()?.idle.len();
238 if current_idle >= target_idle_size {
239 return Ok(());
240 }
241
242 let create_count = target_idle_size.saturating_sub(current_idle);
243 let mut slots = Vec::with_capacity(create_count);
244 for _ in 0..create_count {
245 slots.push(self.create_slot()?);
246 }
247
248 let mut state = self.lock_state()?;
249 let available = self.pool_config.max_size.saturating_sub(state.idle.len());
250 let keep_count = available.min(slots.len());
251 for slot in slots.drain(..keep_count) {
252 state.idle.push_back(slot);
253 }
254 Ok(())
255 }
256
257 fn replenish_if_needed(&self) {
258 let should_replenish = match self.state.lock() {
259 Ok(state) => state.idle.len() < self.pool_config.min_size,
260 Err(_) => {
261 warn!("检查恢复池补充条件失败:状态锁已中毒");
262 return;
263 }
264 };
265
266 if !should_replenish {
267 return;
268 }
269
270 match self.create_slot() {
271 Ok(slot) => match self.push_idle_slot(slot) {
272 Ok(true) => {}
273 Ok(false) => {}
274 Err(err) => warn!("回填空壳 VM 失败: {err}"),
275 },
276 Err(err) => warn!("创建空壳 VM 失败,无法回填恢复池: {err}"),
277 }
278 }
279
280 fn release_backend(&self, mut backend: KvmBackend) {
281 if let Err(err) = backend.shutdown() {
282 warn!("销毁恢复态 VM 失败: {err}");
283 }
284
285 match self.state.lock() {
286 Ok(mut state) => {
287 state.in_use_count = state.in_use_count.saturating_sub(1);
288 }
289 Err(_) => {
290 warn!("释放恢复态 VM 失败:状态锁已中毒");
291 return;
292 }
293 }
294
295 self.replenish_if_needed();
296 }
297}
298
299impl Drop for RestorePoolInner {
300 fn drop(&mut self) {
301 let idle = match self.state.lock() {
303 Ok(mut state) => std::mem::take(&mut state.idle),
304 Err(_) => {
305 warn!("RestorePool drop 时状态锁已中毒,无法清理 idle slot");
306 return;
307 }
308 };
309 let count = idle.len();
310 drop(idle);
311 if count > 0 {
312 tracing::debug!(count, "RestorePool drop 清理完成");
313 }
314 }
315}
316
317#[derive(Clone)]
323pub struct RestorePool {
324 inner: Arc<RestorePoolInner>,
325}
326
327impl RestorePool {
328 pub fn new(
333 base_config: SandboxConfig,
334 config: MicrovmConfig,
335 pool_config: RestorePoolConfig,
336 ) -> Result<Self, RestorePoolError> {
337 if pool_config.max_size == 0 || pool_config.min_size > pool_config.max_size {
338 return Err(RestorePoolError::InvalidConfig {
339 min_size: pool_config.min_size,
340 max_size: pool_config.max_size,
341 });
342 }
343
344 config.validate()?;
345
346 let pool = Self {
347 inner: Arc::new(RestorePoolInner {
348 base_config,
349 config,
350 pool_config,
351 state: Mutex::new(RestorePoolState::default()),
352 }),
353 };
354
355 if pool.inner.pool_config.min_size > 0 {
356 pool.inner.warm(pool.inner.pool_config.min_size)?;
357 }
358
359 Ok(pool)
360 }
361
362 pub fn restore(
367 &self,
368 memory: &[u8],
369 vcpu_state: &[u8],
370 ) -> Result<PooledRestoreVm, RestorePoolError> {
371 let slot = {
372 let mut state = self.inner.lock_state()?;
373 state.in_use_count += 1;
374 state.idle.pop_back()
375 };
376
377 let slot = match slot {
378 Some(slot) => slot,
379 None => match self.inner.create_slot() {
380 Ok(slot) => slot,
381 Err(err) => {
382 self.inner.rollback_in_use();
383 return Err(err);
384 }
385 },
386 };
387
388 let backend = match slot.into_restored_backend(memory, vcpu_state) {
389 Ok(backend) => backend,
390 Err(err) => {
391 self.inner.rollback_in_use();
392 self.inner.replenish_if_needed();
393 return Err(err.into());
394 }
395 };
396
397 Ok(PooledRestoreVm {
398 backend: Some(backend),
399 pool: Arc::clone(&self.inner),
400 })
401 }
402
403 pub fn restore_from_bytes(&self, data: &[u8]) -> Result<PooledRestoreVm, RestorePoolError> {
405 let snapshot = MicrovmSnapshot::restore(data)?;
406 let (_, _, memory, vcpu_state) = snapshot.into_parts();
407 self.restore(memory.as_slice(), vcpu_state.as_slice())
408 }
409
410 pub fn restore_snapshot(
415 &self,
416 snapshot: &SandboxSnapshot,
417 ) -> Result<PooledRestoreVm, RestorePoolError> {
418 if let Some(memory_path) = snapshot.memory_file_path() {
419 #[cfg(feature = "zerocopy-fork")]
420 {
421 let (_, _, vcpu_state) = load_state_from_memory_file(memory_path)?;
422 return self.restore_from_file(memory_path, vcpu_state.as_slice());
423 }
424
425 #[cfg(not(feature = "zerocopy-fork"))]
426 {
427 let snapshot = MicrovmSnapshot::from_memory_file(memory_path)?;
428 let (_, _, memory, vcpu_state) = snapshot.into_parts();
429 return self.restore(memory.as_slice(), vcpu_state.as_slice());
430 }
431 }
432
433 let data = snapshot.as_bytes().map_err(map_snapshot_access_error)?;
434 self.restore_from_bytes(data)
435 }
436
437 #[cfg(feature = "zerocopy-fork")]
438 fn restore_from_file(
439 &self,
440 memory_path: &std::path::Path,
441 vcpu_state: &[u8],
442 ) -> Result<PooledRestoreVm, RestorePoolError> {
443 let slot = {
444 let mut state = self.inner.lock_state()?;
445 state.in_use_count += 1;
446 state.idle.pop_back()
447 };
448
449 let slot = match slot {
450 Some(slot) => slot,
451 None => match self.inner.create_slot() {
452 Ok(slot) => slot,
453 Err(err) => {
454 self.inner.rollback_in_use();
455 return Err(err);
456 }
457 },
458 };
459
460 let backend = match slot.into_restored_backend_from_file(memory_path, vcpu_state) {
461 Ok(backend) => backend,
462 Err(err) => {
463 self.inner.rollback_in_use();
464 self.inner.replenish_if_needed();
465 return Err(err.into());
466 }
467 };
468
469 Ok(PooledRestoreVm {
470 backend: Some(backend),
471 pool: Arc::clone(&self.inner),
472 })
473 }
474
475 pub fn idle_count(&self) -> usize {
480 match self.inner.state.lock() {
481 Ok(state) => state.idle.len(),
482 Err(_) => {
483 warn!("查询恢复池空闲槽位失败:状态锁已中毒");
484 0
485 }
486 }
487 }
488
489 pub fn warm(&self, target: usize) -> Result<(), RestorePoolError> {
493 self.inner.warm(target)
494 }
495}
496
497pub struct PooledRestoreVm {
502 backend: Option<KvmBackend>,
503 pool: Arc<RestorePoolInner>,
504}
505
506impl PooledRestoreVm {
507 pub fn execute(&mut self, cmd: &[String]) -> Result<GuestCommandResult, MicrovmError> {
509 self.execute_with_options(cmd, GuestExecOptions::default())
510 }
511
512 pub fn execute_with_options(
517 &mut self,
518 cmd: &[String],
519 options: GuestExecOptions,
520 ) -> Result<GuestCommandResult, MicrovmError> {
521 match self.backend.as_mut() {
522 Some(backend) => backend.run_command_with_options(cmd, &options),
523 None => Err(MicrovmError::Lifecycle(LifecycleError::Released(
524 "restored VM has been released".into(),
525 ))),
526 }
527 }
528
529 pub fn stream_execute(
531 &mut self,
532 cmd: &[String],
533 ) -> Result<std::sync::mpsc::Receiver<StreamEvent>, MicrovmError> {
534 self.stream_execute_with_options(cmd, GuestExecOptions::default())
535 }
536
537 pub fn stream_execute_with_options(
539 &mut self,
540 cmd: &[String],
541 options: GuestExecOptions,
542 ) -> Result<std::sync::mpsc::Receiver<StreamEvent>, MicrovmError> {
543 match self.backend.as_mut() {
544 Some(backend) => backend.run_command_streaming_with_options(cmd, &options),
545 None => Err(MicrovmError::Lifecycle(LifecycleError::Released(
546 "restored VM has been released".into(),
547 ))),
548 }
549 }
550
551 pub fn read_file(&mut self, path: &str) -> Result<Vec<u8>, MicrovmError> {
553 match self.backend.as_mut() {
554 Some(backend) => backend.read_file(path),
555 None => Err(MicrovmError::Lifecycle(LifecycleError::Released(
556 "restored VM has been released".into(),
557 ))),
558 }
559 }
560
561 pub fn write_file(&mut self, path: &str, data: &[u8]) -> Result<(), MicrovmError> {
563 match self.backend.as_mut() {
564 Some(backend) => backend.write_file(path, data),
565 None => Err(MicrovmError::Lifecycle(LifecycleError::Released(
566 "restored VM has been released".into(),
567 ))),
568 }
569 }
570
571 pub fn list_dir(&mut self, path: &str) -> Result<Vec<DirEntry>, MicrovmError> {
573 crate::guest_file_ops::list_dir(path, |cmd| self.execute(cmd))
574 }
575
576 pub fn file_exists(&mut self, path: &str) -> Result<bool, MicrovmError> {
578 crate::guest_file_ops::file_exists(path, |cmd| self.execute(cmd))
579 }
580
581 pub fn remove_file(&mut self, path: &str) -> Result<(), MicrovmError> {
583 crate::guest_file_ops::remove_file(path, |cmd| self.execute(cmd))
584 }
585
586 pub fn rename(&mut self, from: &str, to: &str) -> Result<(), MicrovmError> {
588 crate::guest_file_ops::rename(from, to, |cmd| self.execute(cmd))
589 }
590
591 pub fn stat(&mut self, path: &str) -> Result<FileStat, MicrovmError> {
593 crate::guest_file_ops::stat(path, |cmd| self.execute(cmd))
594 }
595
596 pub fn ping(&mut self) -> Result<Duration, MicrovmError> {
598 match self.backend.as_mut() {
599 Some(backend) => backend.ping(),
600 None => Err(MicrovmError::Lifecycle(LifecycleError::Released(
601 "restored VM has been released".into(),
602 ))),
603 }
604 }
605
606 pub fn http_request(&mut self, request: HttpRequest) -> Result<HttpResponse, MicrovmError> {
608 match self.backend.as_mut() {
609 Some(backend) => backend.http_request(request),
610 None => Err(MicrovmError::Lifecycle(LifecycleError::Released(
611 "restored VM has been released".into(),
612 ))),
613 }
614 }
615
616 pub fn snapshot(&self) -> Result<SandboxSnapshot, MicrovmError> {
618 match self.backend.as_ref() {
619 Some(backend) => backend.snapshot_to_file(),
620 None => Err(MicrovmError::Lifecycle(LifecycleError::Released(
621 "restored VM has been released".into(),
622 ))),
623 }
624 }
625}
626
627impl Drop for PooledRestoreVm {
628 fn drop(&mut self) {
629 if let Some(backend) = self.backend.take() {
630 self.pool.release_backend(backend);
631 }
632 }
633}
634
635fn register_guest_memory(
636 vm_fd: &VmFd,
637 guest_memory: &GuestMemoryMmap,
638 config: &MicrovmConfig,
639) -> Result<(), MicrovmError> {
640 let host_addr = guest_memory
641 .get_host_address(GuestAddress(0))
642 .map_err(to_backend_error)? as u64;
643 let memory_size = u64::try_from(config.memory_bytes()?).map_err(|_| {
644 MicrovmError::Backend("guest memory size cannot be converted to u64".into())
645 })?;
646 let memory_region = kvm_userspace_memory_region {
647 slot: 0,
648 guest_phys_addr: 0,
649 memory_size,
650 userspace_addr: host_addr,
651 flags: 0,
652 };
653
654 unsafe {
658 vm_fd
659 .set_user_memory_region(memory_region)
660 .map_err(to_backend_error)?;
661 }
662 Ok(())
663}
664
665fn to_backend_error(err: impl std::fmt::Display) -> MicrovmError {
666 MicrovmError::Backend(err.to_string())
667}
668
669fn map_snapshot_access_error(error: SandboxError) -> RestorePoolError {
670 let error = match error {
671 SandboxError::Io(error) => MicrovmError::Io(error),
672 SandboxError::InvalidSnapshot => {
673 MicrovmError::SnapshotFormat("invalid sandbox snapshot".into())
674 }
675 other => MicrovmError::SnapshotFormat(other.to_string()),
676 };
677 RestorePoolError::Microvm(error)
678}