Skip to main content

sqlite_graphrag/
memory_guard.rs

1//! Memory guard: checks RAM availability before loading the ONNX model.
2//!
3//! Loading the model via `fastembed` consumes approximately
4//! [`crate::constants::EMBEDDING_LOAD_EXPECTED_RSS_MB`] MiB of resident memory.
5//! Without this guard, multiple parallel invocations can exhaust RAM and trigger
6//! OOM (Out-Of-Memory), stalling the system.
7//!
8//! This guard queries the OS via `sysinfo` before any heavy initialisation,
9//! aborting with [`crate::errors::AppError::LowMemory`] (exit 77) when the
10//! configured floor is not met.
11
12use sysinfo::{
13    get_current_pid, MemoryRefreshKind, ProcessRefreshKind, ProcessesToUpdate, RefreshKind, System,
14    UpdateKind,
15};
16
17use crate::errors::AppError;
18
19/// Returns the current available memory in MiB.
20pub fn available_memory_mb() -> u64 {
21    let sys =
22        System::new_with_specifics(RefreshKind::new().with_memory(MemoryRefreshKind::everything()));
23    let available_bytes = sys.available_memory();
24    available_bytes / (1024 * 1024)
25}
26
27/// Returns the current process RSS in MiB when available.
28pub fn current_process_memory_mb() -> Option<u64> {
29    let pid = get_current_pid().ok()?;
30    let mut sys =
31        System::new_with_specifics(RefreshKind::new().with_memory(MemoryRefreshKind::everything()));
32    sys.refresh_processes_specifics(
33        ProcessesToUpdate::Some(&[pid]),
34        true,
35        ProcessRefreshKind::new()
36            .with_memory()
37            .with_exe(UpdateKind::OnlyIfNotSet),
38    );
39    sys.process(pid).map(|p| p.memory() / (1024 * 1024))
40}
41
42/// Calculates the safe concurrency ceiling for heavy embedding workloads.
43///
44/// Canonical formula:
45/// `permits = min(cpus, available_memory_mb / ram_per_task_mb) * 0.5`
46///
47/// The result is clamped between `1` and `max_concurrency`.
48pub fn calculate_safe_concurrency(
49    available_mb: u64,
50    cpu_count: usize,
51    ram_per_task_mb: u64,
52    max_concurrency: usize,
53) -> usize {
54    let cpu_count = cpu_count.max(1);
55    let max_concurrency = max_concurrency.max(1);
56    let ram_per_task_mb = ram_per_task_mb.max(1);
57
58    let memory_bound = (available_mb / ram_per_task_mb) as usize;
59    let resource_bound = cpu_count.min(memory_bound).max(1);
60    // G18: removed unconditional /2 margin — callers should pass lower ram_per_task_mb
61    // when daemon is active (model shared) instead of halving the result
62    resource_bound.min(max_concurrency)
63}
64
65/// Checks whether sufficient memory is available to start loading the model.
66///
67/// # Parameters
68/// - `min_mb`: minimum floor in MiB of available memory (typically
69///   [`crate::constants::MIN_AVAILABLE_MEMORY_MB`]).
70///
71/// # Errors
72/// Returns [`AppError::LowMemory`] when `available_mb < min_mb`.
73///
74/// # Returns
75/// Returns `Ok(available_mb)` with the actual available memory in MiB.
76pub fn check_available_memory(min_mb: u64) -> Result<u64, AppError> {
77    let available_mb = available_memory_mb();
78
79    if available_mb < min_mb {
80        return Err(AppError::LowMemory {
81            available_mb,
82            required_mb: min_mb,
83        });
84    }
85
86    Ok(available_mb)
87}
88
89#[cfg(test)]
90mod tests {
91    use super::*;
92
93    #[test]
94    fn check_available_memory_with_zero_always_passes() {
95        let result = check_available_memory(0);
96        assert!(result.is_ok(), "min_mb=0 must always pass, got: {result:?}");
97        let mb = result.unwrap();
98        assert!(mb > 0, "system must report positive memory");
99    }
100
101    #[test]
102    fn check_available_memory_with_huge_value_fails() {
103        let result = check_available_memory(u64::MAX);
104        assert!(
105            matches!(result, Err(AppError::LowMemory { .. })),
106            "u64::MAX MiB must fail with LowMemory, got: {result:?}"
107        );
108    }
109
110    #[test]
111    fn low_memory_error_contains_correct_values() {
112        match check_available_memory(u64::MAX) {
113            Err(AppError::LowMemory {
114                available_mb,
115                required_mb,
116            }) => {
117                assert_eq!(required_mb, u64::MAX);
118                assert!(available_mb < u64::MAX);
119            }
120            other => unreachable!("expected LowMemory, got: {other:?}"),
121        }
122    }
123
124    #[test]
125    fn calculate_safe_concurrency_no_half_margin() {
126        // v1.0.75 (G18): halving margin removed. 8000 MB / 1000 MB = 8, min(8, 8) = 8.
127        let permits = calculate_safe_concurrency(8_000, 8, 1_000, 16);
128        assert_eq!(permits, 8);
129    }
130
131    #[test]
132    fn calculate_safe_concurrency_never_returns_zero() {
133        let permits = calculate_safe_concurrency(100, 1, 10_000, 16);
134        assert_eq!(permits, 1);
135    }
136
137    #[test]
138    fn calculate_safe_concurrency_respects_max_ceiling() {
139        // 128 GB / 500 MB = 256, min(64, 256) = 64, clamped to max 16
140        let permits = calculate_safe_concurrency(128_000, 64, 500, 16);
141        assert_eq!(permits, 16);
142    }
143
144    #[test]
145    fn calculate_safe_concurrency_llm_worker_budget() {
146        // LLM workers: 64 GB available, 8 CPUs, 350 MB per worker.
147        // 64_000 / 350 = 182, min(8, 182) = 8.
148        let permits = calculate_safe_concurrency(64_000, 8, 350, 16);
149        assert_eq!(permits, 8);
150    }
151
152    #[test]
153    fn current_process_memory_mb_returns_some_value() {
154        let rss = current_process_memory_mb();
155        assert!(rss.is_some());
156    }
157}