duende_platform/
memory.rs

1//! Memory management for daemon processes.
2//!
3//! # DT-007: Swap Deadlock Prevention
4//!
5//! This module provides memory locking functionality to prevent swap deadlock
6//! for daemons that serve as swap devices (e.g., trueno-ublk).
7//!
8//! ## The Problem
9//!
10//! When a daemon serves as a swap device, a deadlock can occur:
11//! 1. Kernel needs to swap pages OUT to the daemon's device
12//! 2. Daemon needs memory to process I/O request
13//! 3. Kernel tries to swap out daemon's pages to free memory
14//! 4. Swap goes to the same daemon → waiting for itself → DEADLOCK
15//!
16//! ## Evidence
17//!
18//! Kernel log from 2026-01-06 stress test:
19//! ```text
20//! INFO: task trueno-ublk:59497 blocked for more than 122 seconds.
21//! task:trueno-ublk state:D (uninterruptible sleep)
22//! __swap_writepage+0x111/0x1a0
23//! swap_writepage+0x5f/0xe0
24//! ```
25//!
26//! ## Solution
27//!
28//! Use `mlockall(MCL_CURRENT | MCL_FUTURE)` to pin all daemon memory,
29//! preventing the daemon itself from being swapped out.
30
31use std::io;
32
33use crate::{PlatformError, Result};
34
35/// Result of memory locking operation.
36#[derive(Debug, Clone, Copy, PartialEq, Eq)]
37pub enum MlockResult {
38    /// Memory successfully locked.
39    Success,
40    /// mlock() not requested (lock_memory = false).
41    Disabled,
42    /// mlock() failed but daemon continues (non-fatal).
43    Failed(i32),
44}
45
46/// Lock all current and future memory allocations to prevent swapping.
47///
48/// This is CRITICAL for swap device daemons to prevent deadlock.
49///
50/// # Arguments
51///
52/// * `required` - If true, returns an error on failure. If false, logs warning and continues.
53///
54/// # Returns
55///
56/// - `Ok(MlockResult::Success)` if memory was locked successfully
57/// - `Ok(MlockResult::Failed(errno))` if mlockall() failed and `required` is false
58/// - `Err(...)` if mlockall() failed and `required` is true
59///
60/// # Platform Support
61///
62/// - **Linux**: Full support via `mlockall()`
63/// - **macOS**: Limited support (requires entitlements)
64/// - **Others**: Returns `MlockResult::Disabled`
65///
66/// # Capability Requirements
67///
68/// Requires one of:
69/// - `CAP_IPC_LOCK` capability
70/// - Root privileges
71/// - Sufficient `RLIMIT_MEMLOCK` limit
72///
73/// # Errors
74/// Returns `PlatformError::Resource` if `required` is true and mlockall fails.
75#[cfg(target_os = "linux")]
76#[allow(unsafe_code)]
77pub fn lock_daemon_memory(required: bool) -> Result<MlockResult> {
78    use tracing::{info, warn};
79
80    info!("Locking daemon memory to prevent swap deadlock (DT-007)...");
81
82    // MCL_CURRENT: Lock all pages currently mapped
83    // MCL_FUTURE: Lock all pages that become mapped in the future
84    // SAFETY: mlockall is a well-defined syscall. It affects only the current process.
85    let result = unsafe { libc::mlockall(libc::MCL_CURRENT | libc::MCL_FUTURE) };
86
87    if result == 0 {
88        info!("Memory locked successfully - daemon pages will not be swapped");
89        Ok(MlockResult::Success)
90    } else {
91        let errno = io::Error::last_os_error().raw_os_error().unwrap_or(-1);
92        let err_msg = match errno {
93            libc::ENOMEM => "insufficient memory or resource limits (check RLIMIT_MEMLOCK)",
94            libc::EPERM => "insufficient privileges (need CAP_IPC_LOCK or root)",
95            libc::EINVAL => "invalid flags",
96            _ => "unknown error",
97        };
98
99        if required {
100            Err(PlatformError::Resource(format!(
101                "mlockall() failed: {} (errno={}). \
102                 Cannot safely run as swap device without mlock(). \
103                 Either run as root, add CAP_IPC_LOCK, or set lock_memory_required=false",
104                err_msg, errno
105            )))
106        } else {
107            warn!(
108                "mlockall() failed: {} (errno={}). \
109                 Daemon may deadlock under memory pressure when used as swap device. \
110                 Set lock_memory_required=true to make this fatal.",
111                err_msg, errno
112            );
113            Ok(MlockResult::Failed(errno))
114        }
115    }
116}
117
118/// macOS implementation (limited support).
119#[cfg(target_os = "macos")]
120#[allow(unsafe_code)]
121pub fn lock_daemon_memory(required: bool) -> Result<MlockResult> {
122    use tracing::{info, warn};
123
124    info!("Attempting memory lock on macOS...");
125
126    // macOS supports mlockall but requires entitlements for full functionality
127    // SAFETY: mlockall is a well-defined syscall
128    let result = unsafe { libc::mlockall(libc::MCL_CURRENT | libc::MCL_FUTURE) };
129
130    if result == 0 {
131        info!("Memory locked successfully on macOS");
132        Ok(MlockResult::Success)
133    } else {
134        let errno = io::Error::last_os_error().raw_os_error().unwrap_or(-1);
135        let err_msg = match errno {
136            libc::ENOMEM => "insufficient memory or resource limits",
137            libc::EPERM => {
138                "insufficient privileges (may need com.apple.security.cs.allow-jit entitlement)"
139            }
140            libc::EINVAL => "invalid flags",
141            libc::EAGAIN => "system resources temporarily unavailable",
142            _ => "unknown error",
143        };
144
145        if required {
146            Err(PlatformError::Resource(format!(
147                "mlockall() failed on macOS: {} (errno={})",
148                err_msg, errno
149            )))
150        } else {
151            warn!("mlockall() failed on macOS: {} (errno={})", err_msg, errno);
152            Ok(MlockResult::Failed(errno))
153        }
154    }
155}
156
157/// Non-Unix platforms: memory locking not supported.
158#[cfg(not(any(target_os = "linux", target_os = "macos")))]
159pub fn lock_daemon_memory(_required: bool) -> Result<MlockResult> {
160    use tracing::debug;
161    debug!("Memory locking not supported on this platform");
162    Ok(MlockResult::Disabled)
163}
164
165/// Check if memory is currently locked.
166///
167/// Reads `/proc/self/status` on Linux to check the `VmLck` field.
168#[cfg(target_os = "linux")]
169pub fn is_memory_locked() -> bool {
170    if let Ok(status) = std::fs::read_to_string("/proc/self/status") {
171        for line in status.lines() {
172            if line.starts_with("VmLck:") {
173                let parts: Vec<&str> = line.split_whitespace().collect();
174                if parts.len() >= 2
175                    && let Ok(kb) = parts[1].parse::<u64>()
176                {
177                    return kb > 0;
178                }
179            }
180        }
181    }
182    false
183}
184
185/// Check if memory is locked (non-Linux fallback).
186#[cfg(not(target_os = "linux"))]
187pub fn is_memory_locked() -> bool {
188    // No easy way to check on other platforms
189    false
190}
191
192/// Unlock all memory (for cleanup/testing).
193///
194/// Note: This is rarely needed in production since process exit releases all locks.
195///
196/// # Errors
197/// Returns `PlatformError::Resource` if munlockall fails.
198#[cfg(any(target_os = "linux", target_os = "macos"))]
199#[allow(unsafe_code)]
200pub fn unlock_daemon_memory() -> Result<()> {
201    // SAFETY: munlockall is a well-defined syscall
202    let result = unsafe { libc::munlockall() };
203    if result == 0 {
204        Ok(())
205    } else {
206        Err(PlatformError::Resource("munlockall() failed".to_string()))
207    }
208}
209
210/// Unlock memory (non-Unix fallback).
211#[cfg(not(any(target_os = "linux", target_os = "macos")))]
212pub fn unlock_daemon_memory() -> Result<()> {
213    Ok(())
214}
215
216/// Apply memory-related resource configuration.
217///
218/// This is a convenience function for daemons to call during initialization.
219/// It reads the `ResourceConfig` and applies memory locking if configured.
220///
221/// # Example
222///
223/// ```rust,ignore
224/// use duende_core::ResourceConfig;
225/// use duende_platform::apply_memory_config;
226///
227/// fn daemon_init(config: &ResourceConfig) -> Result<()> {
228///     apply_memory_config(config)?;
229///     // ... rest of initialization
230///     Ok(())
231/// }
232/// ```
233///
234/// # Errors
235///
236/// Returns an error if `lock_memory` is true, `lock_memory_required` is true,
237/// and mlock() fails.
238pub fn apply_memory_config(config: &duende_core::ResourceConfig) -> Result<()> {
239    if config.lock_memory {
240        let result = lock_daemon_memory(config.lock_memory_required)?;
241        tracing::info!("Memory lock result: {:?}", result);
242    } else {
243        tracing::debug!("Memory locking disabled (lock_memory=false)");
244    }
245    Ok(())
246}
247
248#[cfg(test)]
249mod tests {
250    use super::*;
251    use duende_core::ResourceConfig;
252
253    #[test]
254    fn test_mlock_result_variants() {
255        // Test all variants can be constructed and compared
256        let success = MlockResult::Success;
257        let disabled = MlockResult::Disabled;
258        let failed = MlockResult::Failed(1);
259
260        assert_eq!(success, MlockResult::Success);
261        assert_eq!(disabled, MlockResult::Disabled);
262        assert_eq!(failed, MlockResult::Failed(1));
263        assert_ne!(success, disabled);
264        assert_ne!(success, failed);
265
266        // Test Debug impl
267        let _ = format!("{:?}", success);
268        let _ = format!("{:?}", disabled);
269        let _ = format!("{:?}", failed);
270
271        // Test Clone and Copy
272        let cloned = success;
273        assert_eq!(cloned, success);
274    }
275
276    #[test]
277    fn test_mlock_disabled_when_not_required() {
278        // This test should not fail even without privileges
279        // when required=false
280        let result = lock_daemon_memory(false);
281        assert!(result.is_ok());
282        // Result should be Success or Failed, but not an error
283        let mlock_result = result.expect("should succeed");
284        assert!(matches!(
285            mlock_result,
286            MlockResult::Success | MlockResult::Failed(_) | MlockResult::Disabled
287        ));
288    }
289
290    #[test]
291    fn test_is_memory_locked_returns_bool() {
292        // Just verify it doesn't panic
293        let _ = is_memory_locked();
294    }
295
296    #[test]
297    fn test_unlock_daemon_memory() {
298        // Should not panic or error (even without prior lock)
299        let result = unlock_daemon_memory();
300        // On most systems this will succeed (nop or actual unlock)
301        let _ = result; // May fail on non-Unix, that's ok
302    }
303
304    #[test]
305    fn test_apply_memory_config_disabled() {
306        let config = ResourceConfig {
307            lock_memory: false,
308            lock_memory_required: false,
309            ..ResourceConfig::default()
310        };
311
312        let result = apply_memory_config(&config);
313        assert!(result.is_ok());
314    }
315
316    #[test]
317    fn test_apply_memory_config_enabled_not_required() {
318        let config = ResourceConfig {
319            lock_memory: true,
320            lock_memory_required: false,
321            ..ResourceConfig::default()
322        };
323
324        let result = apply_memory_config(&config);
325        // Should succeed (even if mlock fails, since required=false)
326        assert!(result.is_ok());
327    }
328
329    #[test]
330    #[cfg(target_os = "linux")]
331    fn test_mlock_with_privileges() {
332        // This test may pass or fail depending on system configuration
333        // In CI/unprivileged environments, it should fail gracefully
334        let result = lock_daemon_memory(false);
335        assert!(result.is_ok());
336
337        match result.expect("mlock result") {
338            MlockResult::Success => {
339                // mlockall() succeeded. Note: VmLck in /proc/self/status may
340                // still be 0 for minimal test processes since only resident
341                // pages are counted. We verify the syscall succeeded, not that
342                // pages are locked (which depends on memory pressure).
343                // Clean up
344                let _ = unlock_daemon_memory();
345            }
346            MlockResult::Failed(errno) => {
347                // Expected in unprivileged environments
348                assert!(
349                    errno == libc::EPERM || errno == libc::ENOMEM,
350                    "Unexpected errno: {}",
351                    errno
352                );
353            }
354            MlockResult::Disabled => {
355                panic!("Should not be disabled on Linux");
356            }
357        }
358    }
359
360    #[test]
361    #[cfg(target_os = "linux")]
362    fn test_mlock_required_may_fail() {
363        // When required=true, mlock might return error if no privileges
364        let result = lock_daemon_memory(true);
365        // Either succeeds (with privileges) or fails (without)
366        match result {
367            Ok(MlockResult::Success) => {
368                // Has privileges, clean up
369                let _ = unlock_daemon_memory();
370            }
371            Err(_) => {
372                // Expected without CAP_IPC_LOCK
373            }
374            Ok(MlockResult::Failed(_)) => {
375                panic!("Should not return Failed when required=true");
376            }
377            Ok(MlockResult::Disabled) => {
378                panic!("Should not be disabled on Linux");
379            }
380        }
381    }
382
383    #[test]
384    fn test_mlock_result_failed_different_errnos() {
385        // Test different errno values
386        let failed_eperm = MlockResult::Failed(libc::EPERM);
387        let failed_enomem = MlockResult::Failed(libc::ENOMEM);
388        let failed_einval = MlockResult::Failed(libc::EINVAL);
389
390        assert_ne!(failed_eperm, failed_enomem);
391        assert_ne!(failed_enomem, failed_einval);
392        assert_eq!(failed_eperm, MlockResult::Failed(libc::EPERM));
393    }
394
395    #[test]
396    fn test_mlock_result_copy_semantics() {
397        let original = MlockResult::Success;
398        let copy1 = original;
399        let copy2 = original;
400        assert_eq!(copy1, copy2);
401        assert_eq!(original, copy1);
402    }
403
404    #[test]
405    fn test_resource_config_all_memory_options() {
406        // Test with all memory options enabled
407        let config = ResourceConfig {
408            lock_memory: true,
409            lock_memory_required: true,
410            memory_bytes: 1024 * 1024 * 512, // 512MB
411            ..ResourceConfig::default()
412        };
413
414        // On unprivileged systems, this may fail
415        let result = apply_memory_config(&config);
416        // Result depends on system privileges
417        let _ = result;
418    }
419
420    #[test]
421    fn test_resource_config_default_memory_values() {
422        let config = ResourceConfig::default();
423        assert!(!config.lock_memory);
424        assert!(!config.lock_memory_required);
425    }
426
427    #[test]
428    #[cfg(target_os = "linux")]
429    fn test_is_memory_locked_after_lock() {
430        // Try to lock and check status
431        let result = lock_daemon_memory(false);
432        if let Ok(MlockResult::Success) = result {
433            // Memory should be locked now
434            // Note: is_memory_locked checks VmLck which may be 0 for small processes
435            let _ = is_memory_locked();
436            let _ = unlock_daemon_memory();
437        }
438    }
439
440    #[test]
441    #[cfg(target_os = "linux")]
442    fn test_unlock_after_lock() {
443        let result = lock_daemon_memory(false);
444        if let Ok(MlockResult::Success) = result {
445            let unlock_result = unlock_daemon_memory();
446            assert!(unlock_result.is_ok());
447        }
448    }
449
450    #[test]
451    fn test_unlock_without_lock() {
452        // Unlocking without prior lock should be safe
453        let result = unlock_daemon_memory();
454        // Should succeed (no-op or actual unlock)
455        let _ = result;
456    }
457
458    #[test]
459    fn test_multiple_lock_unlock_cycles() {
460        for _ in 0..3 {
461            let lock_result = lock_daemon_memory(false);
462            if let Ok(MlockResult::Success) = lock_result {
463                let _ = unlock_daemon_memory();
464            }
465        }
466    }
467
468    #[test]
469    #[cfg(target_os = "linux")]
470    fn test_proc_status_parsing() {
471        // Test that we can read /proc/self/status
472        let status = std::fs::read_to_string("/proc/self/status");
473        assert!(status.is_ok());
474        let status = status.unwrap();
475        // VmLck line should exist
476        assert!(status.lines().any(|l| l.starts_with("Vm")));
477    }
478
479    #[test]
480    fn test_apply_memory_config_with_defaults() {
481        let config = ResourceConfig::default();
482        let result = apply_memory_config(&config);
483        assert!(result.is_ok());
484    }
485
486    #[test]
487    fn test_mlock_result_debug_output() {
488        let success = MlockResult::Success;
489        let debug_str = format!("{:?}", success);
490        assert!(debug_str.contains("Success"));
491
492        let disabled = MlockResult::Disabled;
493        let debug_str = format!("{:?}", disabled);
494        assert!(debug_str.contains("Disabled"));
495
496        let failed = MlockResult::Failed(42);
497        let debug_str = format!("{:?}", failed);
498        assert!(debug_str.contains("Failed"));
499        assert!(debug_str.contains("42"));
500    }
501}