sqlite_graphrag/constants.rs
1//! Compile-time constants shared across the crate.
2//!
3//! Grouped into embedding configuration, length and size limits, SQLite
4//! pragmas and retrieval tuning knobs. Values are taken from the PRD and
5//! must stay in sync with the migrations under `migrations/`.
6//!
7//! ## Dynamic concurrency permit calculation
8//!
9//! The maximum number of simultaneous instances can be adjusted at runtime
10//! using the formula:
11//!
12//! ```text
13//! permits = min(cpus, available_memory_mb / EMBEDDING_LOAD_EXPECTED_RSS_MB) * 0.5
14//! ```
15//!
16//! where `available_memory_mb` is obtained via `sysinfo::System::available_memory()`
17//! converted to MiB. The result is capped at `MAX_CONCURRENT_CLI_INSTANCES`
18//! and floored at 1.
19
20/// Embedding vector dimensionality produced by `multilingual-e5-small`.
21pub const EMBEDDING_DIM: usize = 384;
22
23/// Default `fastembed` model identifier used by `remember` and `recall`.
24pub const FASTEMBED_MODEL_DEFAULT: &str = "multilingual-e5-small";
25
26/// Batch size for `fastembed` encoding calls.
27pub const FASTEMBED_BATCH_SIZE: usize = 32;
28
29/// Maximum byte length for a memory `name` field in kebab-case.
30pub const MAX_MEMORY_NAME_LEN: usize = 80;
31
32/// Maximum byte length for an `ingest`-derived kebab-case name.
33///
34/// Stricter than `MAX_MEMORY_NAME_LEN` (80) to leave headroom for collision
35/// suffixes (`-2`, `-10`, ...) when multiple files derive to the same base.
36/// Used exclusively by `src/commands/ingest.rs`.
37pub const DERIVED_NAME_MAX_LEN: usize = 60;
38
39/// Maximum character length for a memory `description` field.
40pub const MAX_MEMORY_DESCRIPTION_LEN: usize = 500;
41
42/// Hard upper bound on memory `body` length in bytes.
43pub const MAX_MEMORY_BODY_LEN: usize = 512_000;
44
45/// Body character count above which the body is split into chunks.
46pub const MAX_BODY_CHARS_BEFORE_CHUNK: usize = 8_000;
47
48/// Maximum attempts when a statement returns `SQLITE_BUSY`.
49pub const MAX_SQLITE_BUSY_RETRIES: u32 = 5;
50
51/// Base delay in milliseconds for the first SQLITE_BUSY retry.
52///
53/// Each subsequent attempt doubles the delay (exponential backoff):
54/// 300 ms → 600 ms → 1200 ms → 2400 ms → 4800 ms (≈ 9.3 s total).
55pub const SQLITE_BUSY_BASE_DELAY_MS: u64 = 300;
56
57/// Query timeout applied to statements in milliseconds.
58pub const QUERY_TIMEOUT_MILLIS: u64 = 5_000;
59
60/// Jaccard threshold above which two memories are considered fuzzy duplicates.
61pub const DEDUP_FUZZY_THRESHOLD: f64 = 0.8;
62
63/// Cosine distance threshold below which two memories are semantic duplicates.
64pub const DEDUP_SEMANTIC_THRESHOLD: f32 = 0.1;
65
66/// Maximum number of hops allowed in graph traversals.
67pub const MAX_GRAPH_HOPS: u32 = 2;
68
69/// Minimum relationship weight required for traversal inclusion.
70pub const MIN_RELATION_WEIGHT: f64 = 0.3;
71
72/// Default traversal depth for `related` when `--hops` is omitted.
73pub const DEFAULT_MAX_HOPS: u32 = 2;
74
75/// Default minimum weight filter applied during graph traversal.
76pub const DEFAULT_MIN_WEIGHT: f64 = 0.3;
77
78/// Default weight assigned to newly created relationships.
79pub const DEFAULT_RELATION_WEIGHT: f64 = 0.5;
80
81/// Default `k` used by `recall` when the caller omits `--k`.
82pub const DEFAULT_K_RECALL: usize = 10;
83
84/// Default `k` for memory KNN searches when the caller omits `--k`.
85pub const K_MEMORIES_DEFAULT: usize = 10;
86
87/// Default `k` for entity KNN searches during graph expansion.
88pub const K_ENTITIES_SEARCH: usize = 5;
89
90/// Default upper bound on distinct entities persisted per memory.
91///
92/// Bumped from 30 → 50 in v1.0.43 to reduce semantic loss on rich documents.
93/// Configurable at runtime via `SQLITE_GRAPHRAG_MAX_ENTITIES_PER_MEMORY`.
94pub const MAX_ENTITIES_PER_MEMORY: usize = 50;
95
96/// Resolves the per-memory entity cap, honouring the env-var override.
97///
98/// v1.0.43: makes the cap (default 50) configurable via `SQLITE_GRAPHRAG_MAX_ENTITIES_PER_MEMORY`.
99/// Stress tests showed inputs with 33-46 candidates being truncated at the old cap of 30.
100/// Values outside [1, 1000] fall back to the default.
101pub fn max_entities_per_memory() -> usize {
102 std::env::var("SQLITE_GRAPHRAG_MAX_ENTITIES_PER_MEMORY")
103 .ok()
104 .and_then(|v| v.parse::<usize>().ok())
105 .filter(|&n| (1..=1_000).contains(&n))
106 .unwrap_or(MAX_ENTITIES_PER_MEMORY)
107}
108
109/// Upper bound on distinct relationships persisted per memory.
110pub const MAX_RELATIONSHIPS_PER_MEMORY: usize = 50;
111
112/// Resolves the per-memory relationship cap, honouring the env-var override.
113///
114/// v1.0.22: makes the cap (default 50) configurable via `SQLITE_GRAPHRAG_MAX_RELATIONS_PER_MEMORY`.
115/// Audit found that rich documents silently hit the cap; users with dense technical corpora
116/// can raise it via env. Values outside [1, 10000] fall back to the default.
117pub fn max_relationships_per_memory() -> usize {
118 std::env::var("SQLITE_GRAPHRAG_MAX_RELATIONS_PER_MEMORY")
119 .ok()
120 .and_then(|v| v.parse::<usize>().ok())
121 .filter(|&n| (1..=10_000).contains(&n))
122 .unwrap_or(MAX_RELATIONSHIPS_PER_MEMORY)
123}
124
125/// Character length of the description preview shown in `list` output.
126pub const TEXT_DESCRIPTION_PREVIEW_LEN: usize = 100;
127
128/// `PRAGMA busy_timeout` value applied on every connection.
129pub const BUSY_TIMEOUT_MILLIS: i32 = 5_000;
130
131/// `PRAGMA cache_size` value in kibibytes (negative means KiB).
132pub const CACHE_SIZE_KB: i32 = -64_000;
133
134/// `PRAGMA mmap_size` value in bytes applied to each connection.
135pub const MMAP_SIZE_BYTES: i64 = 268_435_456;
136
137/// `PRAGMA wal_autocheckpoint` threshold in pages.
138pub const WAL_AUTOCHECKPOINT_PAGES: i32 = 1_000;
139
140/// Default `k` constant used by Reciprocal Rank Fusion in `hybrid-search`.
141pub const RRF_K_DEFAULT: u32 = 60;
142
143/// Chunk size expressed in tokens for body splitting.
144pub const CHUNK_SIZE_TOKENS: usize = 400;
145
146/// Token overlap between consecutive chunks.
147pub const CHUNK_OVERLAP_TOKENS: usize = 50;
148
149/// Explicit operational guard for multi-chunk documents in `remember`.
150///
151/// The multi-chunk path uses serial embeddings to avoid ONNX memory amplification.
152/// This limit preserves a clear operational ceiling for agents and scripts.
153pub const REMEMBER_MAX_SAFE_MULTI_CHUNKS: usize = 512;
154
155/// Ceiling on chunks per controlled micro-batch in `remember`.
156///
157/// The `fastembed` runtime uses `BatchLongest` padding, so oversized batches amplify
158/// the cost of the longest chunk. This ceiling keeps batches small even when chunks are short.
159pub const REMEMBER_MAX_CONTROLLED_BATCH_CHUNKS: usize = 4;
160
161/// Maximum padded-token budget per controlled micro-batch in `remember`.
162///
163/// The budget uses `max_tokens_no_batch * batch_size`, approximating the real cost of
164/// `BatchLongest` padding. Values exceeding this fall back to smaller batches or serialisation.
165pub const REMEMBER_MAX_CONTROLLED_BATCH_PADDED_TOKENS: usize = 512;
166
167/// Timeout in milliseconds for a single ping probe against the daemon socket.
168pub const DAEMON_PING_TIMEOUT_MS: u64 = 10;
169
170/// Idle duration in seconds before the daemon shuts itself down.
171pub const DAEMON_IDLE_SHUTDOWN_SECS: u64 = 600;
172
173/// Maximum wait time for the daemon to become healthy after auto-start.
174pub const DAEMON_AUTO_START_MAX_WAIT_MS: u64 = 5_000;
175
176/// Maximum wait time (ms) for a stale daemon to exit after a version-mismatch shutdown.
177pub const DAEMON_VERSION_RESTART_WAIT_MS: u64 = 5_000;
178
179/// Initial polling interval to check whether the daemon became healthy.
180pub const DAEMON_AUTO_START_INITIAL_BACKOFF_MS: u64 = 50;
181
182/// Ceiling on backoff between automatic daemon spawn attempts.
183pub const DAEMON_AUTO_START_MAX_BACKOFF_MS: u64 = 30_000;
184
185/// Base backoff used after daemon spawn/health failures.
186pub const DAEMON_SPAWN_BACKOFF_BASE_MS: u64 = 500;
187
188/// Maximum wait time to acquire the daemon spawn lock.
189pub const DAEMON_SPAWN_LOCK_WAIT_MS: u64 = 2_000;
190
191/// Prefix prepended to bodies before embedding as required by E5 models.
192pub const PASSAGE_PREFIX: &str = "passage: ";
193
194/// Prefix prepended to queries before embedding as required by E5 models.
195pub const QUERY_PREFIX: &str = "query: ";
196
197/// Crate version string sourced from `CARGO_PKG_VERSION` at build time.
198pub const SQLITE_GRAPHRAG_VERSION: &str = env!("CARGO_PKG_VERSION");
199
200/// Batch size for GLiNER NER forward passes.
201///
202/// Larger values amortise fixed forward-pass overhead but increase peak RAM.
203/// Memory guide (CPU only, max 512-token windows):
204/// N=4 → ~54 MiB peak
205/// N=8 → ~108 MiB peak ← default
206/// N=16 → ~216 MiB peak
207/// N=32 → ~432 MiB peak (not recommended without 16+ GiB RAM)
208///
209/// Override via `GRAPHRAG_NER_BATCH_SIZE` env var. Values outside [1, 32] are
210/// clamped silently.
211pub fn ner_batch_size() -> usize {
212 std::env::var("GRAPHRAG_NER_BATCH_SIZE")
213 .ok()
214 .and_then(|v| v.parse::<usize>().ok())
215 .unwrap_or(8)
216 .clamp(1, 32)
217}
218
219/// Default cap on tokens fed to GLiNER NER per memory body.
220///
221/// v1.0.31: large markdown documents (>50 KB) tokenise into thousands of
222/// 512-token windows, each requiring a CPU forward pass that takes hundreds
223/// of milliseconds. A 68 KB document was observed taking 5+ minutes.
224/// Truncating the input before sliding-window construction caps the worst-case
225/// latency while preserving extraction quality for the leading body region.
226///
227/// Regex prefilter still runs on the full body, so URLs, emails, UUIDs,
228/// all-caps identifiers and CamelCase brand names are extracted regardless.
229pub const EXTRACTION_MAX_TOKENS_DEFAULT: usize = 5_000;
230
231/// Resolves the per-body NER token cap, honouring the env-var override.
232///
233/// Override via `SQLITE_GRAPHRAG_EXTRACTION_MAX_TOKENS` env var. Values outside
234/// [512, 100_000] fall back to [`EXTRACTION_MAX_TOKENS_DEFAULT`].
235pub fn extraction_max_tokens() -> usize {
236 std::env::var("SQLITE_GRAPHRAG_EXTRACTION_MAX_TOKENS")
237 .ok()
238 .and_then(|v| v.parse::<usize>().ok())
239 .filter(|&n| (512..=100_000).contains(&n))
240 .unwrap_or(EXTRACTION_MAX_TOKENS_DEFAULT)
241}
242
243/// GLiNER confidence threshold for span scoring.
244///
245/// Override via `SQLITE_GRAPHRAG_GLINER_THRESHOLD` env var. Values outside
246/// `[0.0, 1.0]` are ignored and the default `0.5` is used.
247pub fn gliner_confidence_threshold() -> f32 {
248 std::env::var("SQLITE_GRAPHRAG_GLINER_THRESHOLD")
249 .ok()
250 .and_then(|v| v.parse::<f32>().ok())
251 .filter(|&v| (0.0..=1.0).contains(&v))
252 .unwrap_or(0.5)
253}
254
255/// HuggingFace repository for the GLiNER ONNX model.
256///
257/// Override via `SQLITE_GRAPHRAG_GLINER_MODEL` env var.
258pub fn gliner_model_repo() -> String {
259 std::env::var("SQLITE_GRAPHRAG_GLINER_MODEL")
260 .unwrap_or_else(|_| "onnx-community/gliner_multi-v2.1".to_string())
261}
262
263/// PRD-canonical regex that validates names and namespaces. Allows 1 char `[a-z0-9]`
264/// OR a 2-80 char string starting with a letter and ending with a letter/digit,
265/// containing only `[a-z0-9-]`. Rejects the `__` prefix (internal reserved).
266pub const NAME_SLUG_REGEX: &str = r"^[a-z][a-z0-9-]{0,78}[a-z0-9]$|^[a-z0-9]$";
267
268/// Default retention period (days) used by `purge` when `--retention-days` is omitted.
269pub const PURGE_RETENTION_DAYS_DEFAULT: u32 = 90;
270
271/// Maximum number of simultaneously active namespaces (deleted_at IS NULL). Exit 5 when exceeded.
272pub const MAX_NAMESPACES_ACTIVE: u32 = 100;
273
274/// Maximum tokens accepted by an embedding input before chunking.
275pub const EMBEDDING_MAX_TOKENS: usize = 512;
276
277/// Maximum result count from the recursive graph CTE in `recall`.
278pub const K_GRAPH_MATCHES_LIMIT: usize = 20;
279
280/// Default `--limit` for `list` when omitted.
281pub const K_LIST_DEFAULT_LIMIT: usize = 100;
282
283/// Default `--limit` for `graph entities` when omitted.
284pub const K_GRAPH_ENTITIES_DEFAULT_LIMIT: usize = 50;
285
286/// Default `--limit` for `related` when omitted.
287pub const K_RELATED_DEFAULT_LIMIT: usize = 10;
288
289/// Default `--limit` for `history` when omitted.
290pub const K_HISTORY_DEFAULT_LIMIT: usize = 20;
291
292/// Default weight for the vector contribution in the `hybrid-search` RRF formula.
293pub const WEIGHT_VEC_DEFAULT: f64 = 1.0;
294
295/// Default weight for the BM25 text contribution in the `hybrid-search` RRF formula.
296pub const WEIGHT_FTS_DEFAULT: f64 = 1.0;
297
298/// Character size of the body preview emitted in text/markdown formats.
299pub const TEXT_BODY_PREVIEW_LEN: usize = 200;
300
301/// Default value injected into ORT_NUM_THREADS when not set by the user.
302pub const ORT_NUM_THREADS_DEFAULT: &str = "1";
303
304/// Default value injected into ORT_INTRA_OP_NUM_THREADS when not set.
305pub const ORT_INTRA_OP_NUM_THREADS_DEFAULT: &str = "1";
306
307/// Default value injected into OMP_NUM_THREADS when not set by the user.
308pub const OMP_NUM_THREADS_DEFAULT: &str = "1";
309
310/// Exit code for partial batch failure (PRD line 1822). Conflicts with DbBusy in v1.x;
311/// in v2.0.0 DbBusy migrates to 15 and this code takes 13 per PRD.
312pub const BATCH_PARTIAL_FAILURE_EXIT_CODE: i32 = 13;
313
314/// Exit code for DbBusy in v2.0.0 (migrated from 13 to free 13 for batch failure).
315pub const DB_BUSY_EXIT_CODE: i32 = 15;
316
317/// Filename used for the advisory exclusive lock that prevents parallel invocations.
318pub const CLI_LOCK_FILE: &str = "cli.lock";
319
320/// Polling interval in milliseconds used by `--wait-lock` between `try_lock_exclusive` attempts.
321pub const CLI_LOCK_POLL_INTERVAL_MS: u64 = 500;
322
323/// Process exit code returned when the lock is busy and no wait was requested (EX_TEMPFAIL).
324pub const CLI_LOCK_EXIT_CODE: i32 = 75;
325
326/// Maximum number of CLI instances running simultaneously.
327///
328/// Aligned with `DAEMON_MAX_CONCURRENT_CLIENTS` from the PRD. Limits the counting
329/// semaphore in [`crate::lock`] to prevent memory overload when multiple parallel
330/// invocations attempt to load the ONNX model simultaneously.
331pub const MAX_CONCURRENT_CLI_INSTANCES: usize = 4;
332
333/// Minimum available memory in MiB required before starting model loading.
334///
335/// If `sysinfo::System::available_memory() / 1_048_576` falls below this value,
336/// the invocation is aborted with [`crate::errors::AppError::LowMemory`]
337/// (exit code [`LOW_MEMORY_EXIT_CODE`]).
338pub const MIN_AVAILABLE_MEMORY_MB: u64 = 2_048;
339
340/// Maximum time in seconds an instance waits to acquire a concurrency slot.
341///
342/// Passed as the default for `--max-wait-secs` in the CLI. After exhausting this limit,
343/// the invocation returns [`crate::errors::AppError::AllSlotsFull`] with exit code
344/// [`CLI_LOCK_EXIT_CODE`] (75).
345pub const CLI_LOCK_DEFAULT_WAIT_SECS: u64 = 300;
346
347/// Expected RSS in MiB for a single instance with the ONNX model loaded via fastembed.
348///
349/// Used in the formula `min(cpus, available_memory_mb / EMBEDDING_LOAD_EXPECTED_RSS_MB) * 0.5`
350/// to compute the dynamic permit count.
351///
352/// Value calibrated on 2026-04-23 with `/usr/bin/time -v` against `sqlite-graphrag v1.0.3`
353/// on the heavy commands `remember`, `recall`, and `hybrid-search`, all peaking near
354/// 1.03 GiB RSS per process. The constant below rounds up with a defensive margin.
355pub const EMBEDDING_LOAD_EXPECTED_RSS_MB: u64 = 1_100;
356
357/// Process exit code returned when available memory is below [`MIN_AVAILABLE_MEMORY_MB`].
358///
359/// Value `77` is `EX_NOPERM` in glibc sysexits, reused here to indicate
360/// "insufficient system resource to proceed".
361pub const LOW_MEMORY_EXIT_CODE: i32 = 77;
362
363/// Canonical value of `PRAGMA user_version` written after migrations.
364///
365/// **Why 49 instead of `CURRENT_SCHEMA_VERSION` (9)?**
366/// `user_version` is a 32-bit integer that SQLite reserves for application use.
367/// We deliberately set it to a project-specific marker (49 = decimal) so external
368/// inspection tools (`sqlite3 db.sqlite "PRAGMA user_version"`, the `file` command,
369/// SQLite browser GUIs) can distinguish a sqlite-graphrag database from a generic
370/// SQLite file at a glance. The application-level schema version (9, matching
371/// `CURRENT_SCHEMA_VERSION`) is stored in the `schema_meta` table and exposed via
372/// `health --json`/`stats --json`. Bumping migrations does NOT change this constant.
373/// Refinery uses its own `refinery_schema_history` table for migration bookkeeping.
374pub const SCHEMA_USER_VERSION: i64 = 49;
375
376/// Current schema version, equal to the highest migration number in `migrations/Vnnn__*.sql`.
377///
378/// Added in v1.0.27 as a runtime and test sanity check.
379/// Must be bumped in sync with new Refinery migrations; the unit test
380/// `schema_version_matches_migrations_count` validates this automatically.
381pub const CURRENT_SCHEMA_VERSION: u32 = 11;
382
383#[cfg(test)]
384mod tests_schema_version {
385 use super::CURRENT_SCHEMA_VERSION;
386
387 #[test]
388 fn schema_version_matches_migrations_count() {
389 let manifest_dir = env!("CARGO_MANIFEST_DIR");
390 let migrations_dir = std::path::Path::new(manifest_dir).join("migrations");
391 let count = std::fs::read_dir(&migrations_dir)
392 .expect("migrations directory must exist")
393 .filter_map(|entry| entry.ok())
394 .filter(|entry| entry.file_name().to_string_lossy().starts_with('V'))
395 .count() as u32;
396 assert_eq!(
397 CURRENT_SCHEMA_VERSION, count,
398 "CURRENT_SCHEMA_VERSION ({CURRENT_SCHEMA_VERSION}) must equal the number of V*.sql migrations ({count})"
399 );
400 }
401}