use super::compute::{
compute_index_memory_limit_mb, compute_max_batch_size, compute_max_chunks,
compute_memory_limit_mb, resolve_coreml_batch_size,
};
use super::constants::*;
use super::detect::detect_total_ram_mb;
use super::tier::MemoryTier;
#[derive(Debug, Clone, Copy)]
pub struct MemoryPolicy {
pub total_ram_mb: u64,
pub tier: MemoryTier,
pub memory_limit_mb: usize,
pub index_memory_limit_mb: usize,
pub max_chunks: usize,
pub embedding_cache: usize,
pub max_batch_size: usize,
pub coreml_batch_size: usize,
pub bm25_corpus_cap: usize,
pub max_kg_nodes: usize,
}
impl MemoryPolicy {
pub fn detect() -> Self {
let total_ram_mb = detect_total_ram_mb().unwrap_or_else(|| {
tracing::warn!(
"memory_policy: could not detect total system RAM — \
falling back to {FALLBACK_RAM_MB} MB (Medium tier defaults)"
);
FALLBACK_RAM_MB
});
Self::from_total_ram_mb(total_ram_mb)
}
pub fn from_total_ram_mb(total_ram_mb: u64) -> Self {
let tier = MemoryTier::from_total_ram_mb(total_ram_mb);
let proportional_limit_mb = compute_memory_limit_mb(total_ram_mb);
let proportional_index_limit_mb = compute_index_memory_limit_mb(total_ram_mb);
let d = tier.defaults(proportional_limit_mb, proportional_index_limit_mb);
let memory_limit_mb = env_override_usize("TRUSTY_MEMORY_LIMIT_MB", d.memory_limit_mb);
let index_memory_limit_mb = {
let raw = env_override_usize("TRUSTY_INDEX_MEMORY_LIMIT_MB", d.index_memory_limit_mb);
raw.max(memory_limit_mb)
};
let derived_batch_size = if index_memory_limit_mb == d.index_memory_limit_mb {
d.max_batch_size
} else {
compute_max_batch_size(index_memory_limit_mb)
};
let derived_max_chunks = if memory_limit_mb == d.memory_limit_mb {
d.max_chunks
} else {
compute_max_chunks(memory_limit_mb)
};
let explicit = std::env::var("TRUSTY_MAX_BATCH_SIZE_EXPLICIT")
.map(|v| v == "1")
.unwrap_or(false);
let env_set = std::env::var("TRUSTY_MAX_BATCH_SIZE").is_ok();
let raw_batch_size = env_override_usize("TRUSTY_MAX_BATCH_SIZE", derived_batch_size);
let batch_cap = tier.batch_size_hard_cap();
let max_batch_size = if explicit && env_set {
tracing::warn!(
"memory_policy: TRUSTY_MAX_BATCH_SIZE_EXPLICIT=1 — honoring \
TRUSTY_MAX_BATCH_SIZE={} verbatim and bypassing tier {} hard cap of {}. \
Ensure you have measured the actual ORT transient-allocation cost per slot \
on your workload (defaults assume 32 MB/slot with arena disabled).",
raw_batch_size,
tier,
batch_cap,
);
raw_batch_size
} else if raw_batch_size > batch_cap {
tracing::warn!(
"memory_policy: TRUSTY_MAX_BATCH_SIZE={} exceeds tier {} hard cap of {}; \
clamping to protect against ORT transient-arena spike (issue #89). \
Set TRUSTY_MAX_BATCH_SIZE_EXPLICIT=1 to bypass this clamp.",
raw_batch_size,
tier,
batch_cap,
);
batch_cap
} else {
raw_batch_size
};
let policy = MemoryPolicy {
total_ram_mb,
tier,
memory_limit_mb,
index_memory_limit_mb,
max_chunks: env_override_usize("TRUSTY_MAX_CHUNKS", derived_max_chunks),
embedding_cache: env_override_usize("TRUSTY_EMBEDDING_CACHE", d.embedding_cache),
max_batch_size,
coreml_batch_size: resolve_coreml_batch_size(),
bm25_corpus_cap: env_override_usize("TRUSTY_BM25_CORPUS_CAP", d.bm25_corpus_cap),
max_kg_nodes: env_override_usize("TRUSTY_MAX_KG_NODES", d.max_kg_nodes),
};
policy.apply_to_env();
policy
}
pub fn apply_to_env(&self) {
unsafe {
std::env::set_var("TRUSTY_MEMORY_LIMIT_MB", self.memory_limit_mb.to_string());
std::env::set_var(
"TRUSTY_INDEX_MEMORY_LIMIT_MB",
self.index_memory_limit_mb.to_string(),
);
std::env::set_var("TRUSTY_MAX_CHUNKS", self.max_chunks.to_string());
std::env::set_var("TRUSTY_EMBEDDING_CACHE", self.embedding_cache.to_string());
std::env::set_var("TRUSTY_MAX_BATCH_SIZE", self.max_batch_size.to_string());
std::env::set_var(
"TRUSTY_COREML_BATCH_SIZE",
self.coreml_batch_size.to_string(),
);
std::env::set_var("TRUSTY_BM25_CORPUS_CAP", self.bm25_corpus_cap.to_string());
std::env::set_var("TRUSTY_MAX_KG_NODES", self.max_kg_nodes.to_string());
}
}
pub fn log_summary(&self) {
let gb = self.total_ram_mb / 1024;
let proportional = compute_memory_limit_mb(self.total_ram_mb);
let proportional_index = compute_index_memory_limit_mb(self.total_ram_mb);
tracing::info!(
"trusty-search: detected {} GB RAM → tier={} \
(daemon memory_limit_mb={}, 25% of RAM clamped to [{}, {}]; \
index memory_limit_mb={}, 75% of RAM clamped to [{}, {}])",
gb,
self.tier,
proportional,
MEMORY_LIMIT_FLOOR_MB,
MEMORY_LIMIT_CEIL_MB,
proportional_index,
INDEX_MEMORY_LIMIT_FLOOR_MB,
INDEX_MEMORY_LIMIT_CEIL_MB,
);
tracing::info!(
" MEMORY_LIMIT_MB={} INDEX_MEMORY_LIMIT_MB={} MAX_CHUNKS={} \
EMBEDDING_CACHE={} MAX_BATCH_SIZE={} COREML_BATCH_SIZE={} \
BM25_CORPUS_CAP={} MAX_KG_NODES={}",
self.memory_limit_mb,
self.index_memory_limit_mb,
self.max_chunks,
self.embedding_cache,
self.max_batch_size,
self.coreml_batch_size,
self.bm25_corpus_cap,
self.max_kg_nodes,
);
}
}
pub(super) fn env_override_usize(name: &str, default: usize) -> usize {
match std::env::var(name) {
Ok(v) => match v.parse::<usize>() {
Ok(n) => n,
Err(_) => {
tracing::warn!(
"memory_policy: {name}={v:?} is not a valid usize; \
using tier default ({default})"
);
default
}
},
Err(_) => default,
}
}
#[allow(unused_imports)]
use super::compute::DEFAULT_COREML_TRIPWIRE_MB as _;