Skip to main content

sqlite_graphrag/
memory_guard.rs

1//! Memory guard: checks RAM availability before heavy embedding workloads.
2//!
3//! Each LLM embedding worker spawns a `claude -p` / `codex exec` subprocess
4//! costing roughly [`crate::constants::LLM_WORKER_RSS_MB`] MiB of resident
5//! memory. Without this guard, multiple parallel invocations can exhaust RAM
6//! and trigger OOM (Out-Of-Memory), stalling the system.
7//!
8//! This guard queries the OS via `sysinfo` before any heavy initialisation,
9//! aborting with [`crate::errors::AppError::LowMemory`] (exit 77) when the
10//! configured floor is not met.
11
12use sysinfo::{
13    get_current_pid, MemoryRefreshKind, ProcessRefreshKind, ProcessesToUpdate, RefreshKind, System,
14    UpdateKind,
15};
16
17use crate::errors::AppError;
18
19/// Returns the current available memory in MiB.
20pub fn available_memory_mb() -> u64 {
21    let sys =
22        System::new_with_specifics(RefreshKind::new().with_memory(MemoryRefreshKind::everything()));
23    let available_bytes = sys.available_memory();
24    available_bytes / (1024 * 1024)
25}
26
27/// Returns the current process RSS in MiB when available.
28pub fn current_process_memory_mb() -> Option<u64> {
29    let pid = get_current_pid().ok()?;
30    let mut sys =
31        System::new_with_specifics(RefreshKind::new().with_memory(MemoryRefreshKind::everything()));
32    sys.refresh_processes_specifics(
33        ProcessesToUpdate::Some(&[pid]),
34        true,
35        ProcessRefreshKind::new()
36            .with_memory()
37            .with_exe(UpdateKind::OnlyIfNotSet),
38    );
39    sys.process(pid).map(|p| p.memory() / (1024 * 1024))
40}
41
42/// Calculates the safe concurrency ceiling for heavy embedding workloads.
43///
44/// Canonical formula:
45/// `permits = min(cpus, available_memory_mb / ram_per_task_mb) * 0.5`
46///
47/// The result is clamped between `1` and `max_concurrency`.
48pub fn calculate_safe_concurrency(
49    available_mb: u64,
50    cpu_count: usize,
51    ram_per_task_mb: u64,
52    max_concurrency: usize,
53) -> usize {
54    let cpu_count = cpu_count.max(1);
55    let max_concurrency = max_concurrency.max(1);
56    let ram_per_task_mb = ram_per_task_mb.max(1);
57
58    let memory_bound = (available_mb / ram_per_task_mb) as usize;
59    let resource_bound = cpu_count.min(memory_bound).max(1);
60    // G18: removed unconditional /2 margin — callers should pass lower ram_per_task_mb
61    // when daemon is active (model shared) instead of halving the result
62    resource_bound.min(max_concurrency)
63}
64
65/// Checks whether sufficient memory is available to start loading the model.
66///
67/// # Parameters
68/// - `min_mb`: minimum floor in MiB of available memory (typically
69///   [`crate::constants::MIN_AVAILABLE_MEMORY_MB`]).
70///
71/// # Errors
72/// Returns [`AppError::LowMemory`] when `available_mb < min_mb`.
73///
74/// # Returns
75/// Returns `Ok(available_mb)` with the actual available memory in MiB.
76pub fn check_available_memory(min_mb: u64) -> Result<u64, AppError> {
77    let available_mb = available_memory_mb();
78
79    if available_mb < min_mb {
80        return Err(AppError::LowMemory {
81            available_mb,
82            required_mb: min_mb,
83        });
84    }
85
86    Ok(available_mb)
87}
88
89/// Rejects an embedding input that would overflow the model's token window
90/// (GAP-SG-02).
91///
92/// The PRIMARY limit is TOKENS: `qwen/qwen3-embedding-8b` accepts roughly 32K
93/// tokens, so an input above [`crate::constants::EMBEDDING_REQUEST_MAX_TOKENS`]
94/// is rejected before the HTTP request, using the conservative cl100k_base
95/// proxy in [`crate::tokenizer::count_tokens`]. The byte cap
96/// [`crate::constants::MAX_MEMORY_BODY_LEN`] is a SECONDARY, coarser guard kept
97/// as a cheap short-circuit so a pathological input is rejected even before
98/// tokenisation.
99///
100/// # Errors
101/// Returns [`AppError::Validation`] (exit 1, permanent) when either limit is
102/// exceeded; the message advises splitting the input into smaller memories.
103pub fn check_embedding_input_size(text: &str) -> Result<(), AppError> {
104    // Secondary guard: a byte length far above the body cap cannot fit the
105    // token window, and the check is O(1) versus tokenising the whole input.
106    let bytes = text.len();
107    if bytes > crate::constants::MAX_MEMORY_BODY_LEN {
108        return Err(AppError::Validation(format!(
109            "embedding input is {} bytes, above the {}-byte body cap; \
110             split it into smaller memories",
111            bytes,
112            crate::constants::MAX_MEMORY_BODY_LEN
113        )));
114    }
115
116    // Primary guard: the model's real ceiling is in tokens.
117    let tokens = crate::tokenizer::count_tokens(text);
118    if tokens > crate::constants::EMBEDDING_REQUEST_MAX_TOKENS {
119        return Err(AppError::Validation(format!(
120            "embedding input is {} tokens, above the {}-token model ceiling; \
121             split it into smaller memories",
122            tokens,
123            crate::constants::EMBEDDING_REQUEST_MAX_TOKENS
124        )));
125    }
126
127    Ok(())
128}
129
130#[cfg(test)]
131mod tests {
132    use super::*;
133
134    #[test]
135    fn check_available_memory_with_zero_always_passes() {
136        let result = check_available_memory(0);
137        assert!(result.is_ok(), "min_mb=0 must always pass, got: {result:?}");
138        let mb = result.unwrap();
139        assert!(mb > 0, "system must report positive memory");
140    }
141
142    #[test]
143    fn check_available_memory_with_huge_value_fails() {
144        let result = check_available_memory(u64::MAX);
145        assert!(
146            matches!(result, Err(AppError::LowMemory { .. })),
147            "u64::MAX MiB must fail with LowMemory, got: {result:?}"
148        );
149    }
150
151    #[test]
152    fn low_memory_error_contains_correct_values() {
153        match check_available_memory(u64::MAX) {
154            Err(AppError::LowMemory {
155                available_mb,
156                required_mb,
157            }) => {
158                assert_eq!(required_mb, u64::MAX);
159                assert!(available_mb < u64::MAX);
160            }
161            other => unreachable!("expected LowMemory, got: {other:?}"),
162        }
163    }
164
165    #[test]
166    fn calculate_safe_concurrency_no_half_margin() {
167        // v1.0.75 (G18): halving margin removed. 8000 MB / 1000 MB = 8, min(8, 8) = 8.
168        let permits = calculate_safe_concurrency(8_000, 8, 1_000, 16);
169        assert_eq!(permits, 8);
170    }
171
172    #[test]
173    fn calculate_safe_concurrency_never_returns_zero() {
174        let permits = calculate_safe_concurrency(100, 1, 10_000, 16);
175        assert_eq!(permits, 1);
176    }
177
178    #[test]
179    fn calculate_safe_concurrency_respects_max_ceiling() {
180        // 128 GB / 500 MB = 256, min(64, 256) = 64, clamped to max 16
181        let permits = calculate_safe_concurrency(128_000, 64, 500, 16);
182        assert_eq!(permits, 16);
183    }
184
185    #[test]
186    fn calculate_safe_concurrency_llm_worker_budget() {
187        // LLM workers: 64 GB available, 8 CPUs, 350 MB per worker.
188        // 64_000 / 350 = 182, min(8, 182) = 8.
189        let permits = calculate_safe_concurrency(64_000, 8, 350, 16);
190        assert_eq!(permits, 8);
191    }
192
193    #[test]
194    fn current_process_memory_mb_returns_some_value() {
195        let rss = current_process_memory_mb();
196        assert!(rss.is_some());
197    }
198
199    #[test]
200    fn check_embedding_input_size_accepts_small_text() {
201        assert!(check_embedding_input_size("a short passage").is_ok());
202    }
203
204    #[test]
205    fn check_embedding_input_size_rejects_above_token_ceiling() {
206        // "word " repeated is ~1 cl100k token per word; well above 30K words
207        // exceeds EMBEDDING_REQUEST_MAX_TOKENS while staying under the byte cap.
208        let big = "word ".repeat(crate::constants::EMBEDDING_REQUEST_MAX_TOKENS + 5_000);
209        assert!(
210            big.len() <= crate::constants::MAX_MEMORY_BODY_LEN,
211            "token guard, not byte guard, must be exercised"
212        );
213        match check_embedding_input_size(&big) {
214            Err(AppError::Validation(msg)) => assert!(msg.contains("tokens")),
215            other => unreachable!("expected Validation(tokens), got: {other:?}"),
216        }
217    }
218
219    #[test]
220    fn check_embedding_input_size_rejects_above_byte_cap() {
221        let huge = "x".repeat(crate::constants::MAX_MEMORY_BODY_LEN + 1);
222        match check_embedding_input_size(&huge) {
223            Err(AppError::Validation(msg)) => assert!(msg.contains("bytes")),
224            other => unreachable!("expected Validation(bytes), got: {other:?}"),
225        }
226    }
227}