sqlite_graphrag/
constants.rs

1//! Compile-time constants shared across the crate.
2//!
3//! Grouped into embedding configuration, length and size limits, SQLite
4//! pragmas and retrieval tuning knobs. Values are taken from the PRD and
5//! must stay in sync with the migrations under `migrations/`.
6//!
7//! ## Dynamic concurrency permit calculation
8//!
9//! The maximum number of simultaneous instances can be adjusted at runtime
10//! using the formula:
11//!
12//! ```text
13//! permits = min(cpus, available_memory_mb / LLM_WORKER_RSS_MB) * 0.5
14//! ```
15//!
16//! where `available_memory_mb` is obtained via `sysinfo::System::available_memory()`
17//! converted to MiB. The result is capped at `MAX_CONCURRENT_CLI_INSTANCES`
18//! and floored at 1.
19
20/// Default embedding vector dimensionality.
21///
22/// Restored to 384 to match the production corpus: existing vectors were
23/// generated against `multilingual-e5-small` (384 dims), so a lower default
24/// would silently mismatch live data. With the OpenRouter REST backend the
25/// Matryoshka (MRL, arXiv 2205.13147) truncation happens server-side, so
26/// 384 dims carry no per-float autoregressive output cost. Legacy databases
27/// keep their recorded dimensionality via `schema_meta.dim`; the active dim
28/// follows the precedence env > database > default (see [`embedding_dim`]).
29pub const DEFAULT_EMBEDDING_DIM: usize = 384;
30
31/// Active embedding dimensionality for this process. `0` means unresolved.
32static ACTIVE_EMBEDDING_DIM: std::sync::atomic::AtomicUsize =
33    std::sync::atomic::AtomicUsize::new(0);
34
35/// Resolves the active embedding dimensionality (single source of truth).
36///
37/// Precedence:
38/// 1. `SQLITE_GRAPHRAG_EMBEDDING_DIM` env var (also set by the global
39///    `--embedding-dim` flag before dispatch);
40/// 2. the value recorded via [`set_active_embedding_dim`] — populated from
41///    the `dim` key of `schema_meta` when the database is opened, so
42///    existing 384-dim databases keep working unchanged;
43/// 3. [`DEFAULT_EMBEDDING_DIM`].
44pub fn embedding_dim() -> usize {
45    if let Some(env_dim) = embedding_dim_from_env() {
46        return env_dim;
47    }
48    let active = ACTIVE_EMBEDDING_DIM.load(std::sync::atomic::Ordering::Acquire);
49    if active != 0 {
50        return active;
51    }
52    DEFAULT_EMBEDDING_DIM
53}
54
55/// Reads and validates the env-var override. Values outside [8, 4096]
56/// are rejected (returns `None`) so a typo cannot produce degenerate
57/// vectors or multi-MB embedding rows.
58pub fn embedding_dim_from_env() -> Option<usize> {
59    let raw = std::env::var("SQLITE_GRAPHRAG_EMBEDDING_DIM").ok()?;
60    match raw.parse::<usize>() {
61        Ok(n) if (8..=4096).contains(&n) => Some(n),
62        // G49: an invalid value silently fell back to the default (64),
63        // letting a typo permanently stamp a new database with the wrong
64        // dimensionality. Warn loudly instead of discarding in silence.
65        _ => {
66            tracing::warn!(
67                value = %raw,
68                "SQLITE_GRAPHRAG_EMBEDDING_DIM is invalid (expected an integer in [8, 4096]); ignoring and using the database/default dimensionality"
69            );
70            None
71        }
72    }
73}
74
75/// Records the dimensionality found in the opened database
76/// (`schema_meta.dim`). Out-of-range values are ignored. The env var,
77/// when set, always wins over this value (see [`embedding_dim`]).
78pub fn set_active_embedding_dim(dim: usize) {
79    if (8..=4096).contains(&dim) {
80        ACTIVE_EMBEDDING_DIM.store(dim, std::sync::atomic::Ordering::Release);
81    }
82}
83
84// G46: FASTEMBED_MODEL_DEFAULT removed — the fastembed model was deleted in
85// v1.0.76 (LLM-only build); `schema_meta.model` now records the CLI version.
86
87/// Batch size for `fastembed` encoding calls.
88pub const FASTEMBED_BATCH_SIZE: usize = 32;
89
90/// Maximum byte length for a memory `name` field in kebab-case.
91pub const MAX_MEMORY_NAME_LEN: usize = 80;
92
93/// Maximum byte length for an `ingest`-derived kebab-case name.
94///
95/// Stricter than `MAX_MEMORY_NAME_LEN` (80) to leave headroom for collision
96/// suffixes (`-2`, `-10`, ...) when multiple files derive to the same base.
97/// Used exclusively by `src/commands/ingest.rs`.
98pub const DERIVED_NAME_MAX_LEN: usize = 60;
99
100/// Maximum character length for a memory `description` field.
101pub const MAX_MEMORY_DESCRIPTION_LEN: usize = 500;
102
103/// Hard upper bound on memory `body` length in bytes.
104pub const MAX_MEMORY_BODY_LEN: usize = 512_000;
105
106/// Body character count above which the body is split into chunks.
107pub const MAX_BODY_CHARS_BEFORE_CHUNK: usize = 8_000;
108
109/// Maximum attempts when a statement returns `SQLITE_BUSY`.
110pub const MAX_SQLITE_BUSY_RETRIES: u32 = 5;
111
112/// Base delay in milliseconds for the first SQLITE_BUSY retry.
113///
114/// Each subsequent attempt doubles the delay (exponential backoff):
115/// 300 ms → 600 ms → 1200 ms → 2400 ms → 4800 ms (≈ 9.3 s total).
116pub const SQLITE_BUSY_BASE_DELAY_MS: u64 = 300;
117
118/// Query timeout applied to statements in milliseconds.
119pub const QUERY_TIMEOUT_MILLIS: u64 = 5_000;
120
121/// Jaccard threshold above which two memories are considered fuzzy duplicates.
122pub const DEDUP_FUZZY_THRESHOLD: f64 = 0.8;
123
124/// Cosine distance threshold below which two memories are semantic duplicates.
125pub const DEDUP_SEMANTIC_THRESHOLD: f32 = 0.1;
126
127/// Maximum number of hops allowed in graph traversals.
128pub const MAX_GRAPH_HOPS: u32 = 2;
129
130/// Minimum relationship weight required for traversal inclusion.
131pub const MIN_RELATION_WEIGHT: f64 = 0.3;
132
133/// Default traversal depth for `related` when `--hops` is omitted.
134pub const DEFAULT_MAX_HOPS: u32 = 2;
135
136/// Default minimum weight filter applied during graph traversal.
137pub const DEFAULT_MIN_WEIGHT: f64 = 0.3;
138
139/// Default weight assigned to newly created relationships.
140pub const DEFAULT_RELATION_WEIGHT: f64 = 0.5;
141
142/// Default `k` used by `recall` when the caller omits `--k`.
143pub const DEFAULT_K_RECALL: usize = 10;
144
145/// Default `k` for memory KNN searches when the caller omits `--k`.
146pub const K_MEMORIES_DEFAULT: usize = 10;
147
148/// Default `k` for entity KNN searches during graph expansion.
149pub const K_ENTITIES_SEARCH: usize = 5;
150
151/// Default upper bound on distinct entities persisted per memory.
152///
153/// Bumped from 30 → 50 in v1.0.43 to reduce semantic loss on rich documents.
154/// Configurable at runtime via `SQLITE_GRAPHRAG_MAX_ENTITIES_PER_MEMORY`.
155pub const MAX_ENTITIES_PER_MEMORY: usize = 50;
156
157/// Resolves the per-memory entity cap, honouring the env-var override.
158///
159/// v1.0.43: makes the cap (default 50) configurable via `SQLITE_GRAPHRAG_MAX_ENTITIES_PER_MEMORY`.
160/// Stress tests showed inputs with 33-46 candidates being truncated at the old cap of 30.
161/// Values outside [1, 1000] fall back to the default.
162pub fn max_entities_per_memory() -> usize {
163    std::env::var("SQLITE_GRAPHRAG_MAX_ENTITIES_PER_MEMORY")
164        .ok()
165        .and_then(|v| v.parse::<usize>().ok())
166        .filter(|&n| (1..=1_000).contains(&n))
167        .unwrap_or(MAX_ENTITIES_PER_MEMORY)
168}
169
170/// Upper bound on distinct relationships persisted per memory.
171pub const MAX_RELATIONSHIPS_PER_MEMORY: usize = 50;
172
173/// Resolves the per-memory relationship cap, honouring the env-var override.
174///
175/// v1.0.22: makes the cap (default 50) configurable via `SQLITE_GRAPHRAG_MAX_RELATIONS_PER_MEMORY`.
176/// Audit found that rich documents silently hit the cap; users with dense technical corpora
177/// can raise it via env. Values outside [1, 10000] fall back to the default.
178pub fn max_relationships_per_memory() -> usize {
179    std::env::var("SQLITE_GRAPHRAG_MAX_RELATIONS_PER_MEMORY")
180        .ok()
181        .and_then(|v| v.parse::<usize>().ok())
182        .filter(|&n| (1..=10_000).contains(&n))
183        .unwrap_or(MAX_RELATIONSHIPS_PER_MEMORY)
184}
185
186/// Character length of the description preview shown in `list` output.
187pub const TEXT_DESCRIPTION_PREVIEW_LEN: usize = 100;
188
189/// `PRAGMA busy_timeout` value applied on every connection.
190pub const BUSY_TIMEOUT_MILLIS: i32 = 5_000;
191
192/// `PRAGMA cache_size` value in kibibytes (negative means KiB).
193pub const CACHE_SIZE_KB: i32 = -64_000;
194
195/// `PRAGMA mmap_size` value in bytes applied to each connection.
196pub const MMAP_SIZE_BYTES: i64 = 268_435_456;
197
198/// `PRAGMA wal_autocheckpoint` threshold in pages.
199pub const WAL_AUTOCHECKPOINT_PAGES: i32 = 1_000;
200
201/// Default `k` constant used by Reciprocal Rank Fusion in `hybrid-search`.
202pub const RRF_K_DEFAULT: u32 = 60;
203
204/// Chunk size expressed in tokens for body splitting.
205pub const CHUNK_SIZE_TOKENS: usize = 400;
206
207/// Token overlap between consecutive chunks.
208pub const CHUNK_OVERLAP_TOKENS: usize = 50;
209
210/// Explicit operational guard for multi-chunk documents in `remember`.
211///
212/// The multi-chunk path uses serial embeddings to avoid ONNX memory amplification.
213/// This limit preserves a clear operational ceiling for agents and scripts.
214pub const REMEMBER_MAX_SAFE_MULTI_CHUNKS: usize = 512;
215
216/// Ceiling on chunks per controlled micro-batch in `remember`.
217///
218/// The `fastembed` runtime uses `BatchLongest` padding, so oversized batches amplify
219/// the cost of the longest chunk. This ceiling keeps batches small even when chunks are short.
220pub const REMEMBER_MAX_CONTROLLED_BATCH_CHUNKS: usize = 4;
221
222/// Maximum padded-token budget per controlled micro-batch in `remember`.
223///
224/// The budget uses `max_tokens_no_batch * batch_size`, approximating the real cost of
225/// `BatchLongest` padding. Values exceeding this fall back to smaller batches or serialisation.
226pub const REMEMBER_MAX_CONTROLLED_BATCH_PADDED_TOKENS: usize = 512;
227
228/// Prefix prepended to bodies before embedding as required by E5 models.
229pub const PASSAGE_PREFIX: &str = "passage: ";
230
231/// Prefix prepended to queries before embedding as required by E5 models.
232pub const QUERY_PREFIX: &str = "query: ";
233
234/// Crate version string sourced from `CARGO_PKG_VERSION` at build time.
235pub const SQLITE_GRAPHRAG_VERSION: &str = env!("CARGO_PKG_VERSION");
236
237/// Batch size for GLiNER NER forward passes.
238///
239/// Larger values amortise fixed forward-pass overhead but increase peak RAM.
240/// Memory guide (CPU only, max 512-token windows):
241///   N=4  → ~54 MiB peak
242///   N=8  → ~108 MiB peak  ← default
243///   N=16 → ~216 MiB peak
244///   N=32 → ~432 MiB peak  (not recommended without 16+ GiB RAM)
245///
246/// Override via `GRAPHRAG_NER_BATCH_SIZE` env var. Values outside [1, 32] are
247/// clamped silently.
248pub fn ner_batch_size() -> usize {
249    std::env::var("GRAPHRAG_NER_BATCH_SIZE")
250        .ok()
251        .and_then(|v| v.parse::<usize>().ok())
252        .unwrap_or(8)
253        .clamp(1, 32)
254}
255
256/// Default cap on tokens fed to GLiNER NER per memory body.
257///
258/// v1.0.31: large markdown documents (>50 KB) tokenise into thousands of
259/// 512-token windows, each requiring a CPU forward pass that takes hundreds
260/// of milliseconds. A 68 KB document was observed taking 5+ minutes.
261/// Truncating the input before sliding-window construction caps the worst-case
262/// latency while preserving extraction quality for the leading body region.
263///
264/// Regex prefilter still runs on the full body, so URLs, emails, UUIDs,
265/// all-caps identifiers and CamelCase brand names are extracted regardless.
266pub const EXTRACTION_MAX_TOKENS_DEFAULT: usize = 5_000;
267
268/// Resolves the per-body NER token cap, honouring the env-var override.
269///
270/// Override via `SQLITE_GRAPHRAG_EXTRACTION_MAX_TOKENS` env var. Values outside
271/// [512, 100_000] fall back to [`EXTRACTION_MAX_TOKENS_DEFAULT`].
272pub fn extraction_max_tokens() -> usize {
273    std::env::var("SQLITE_GRAPHRAG_EXTRACTION_MAX_TOKENS")
274        .ok()
275        .and_then(|v| v.parse::<usize>().ok())
276        .filter(|&n| (512..=100_000).contains(&n))
277        .unwrap_or(EXTRACTION_MAX_TOKENS_DEFAULT)
278}
279
280/// GLiNER confidence threshold for span scoring.
281///
282/// Override via `SQLITE_GRAPHRAG_GLINER_THRESHOLD` env var. Values outside
283/// `[0.0, 1.0]` are ignored and the default `0.5` is used.
284pub fn gliner_confidence_threshold() -> f32 {
285    std::env::var("SQLITE_GRAPHRAG_GLINER_THRESHOLD")
286        .ok()
287        .and_then(|v| v.parse::<f32>().ok())
288        .filter(|&v| (0.0..=1.0).contains(&v))
289        .unwrap_or(0.5)
290}
291
292/// HuggingFace repository for the GLiNER ONNX model.
293///
294/// Override via `SQLITE_GRAPHRAG_GLINER_MODEL` env var.
295pub fn gliner_model_repo() -> String {
296    std::env::var("SQLITE_GRAPHRAG_GLINER_MODEL")
297        .unwrap_or_else(|_| "onnx-community/gliner_multi-v2.1".to_string())
298}
299
300/// PRD-canonical regex that validates names and namespaces. Allows 1 char `[a-z0-9]`
301/// OR a 2-80 char string starting with a letter and ending with a letter/digit,
302/// containing only `[a-z0-9-]`. Rejects the `__` prefix (internal reserved).
303pub const NAME_SLUG_REGEX: &str = r"^[a-z][a-z0-9-]{0,78}[a-z0-9]$|^[a-z0-9]$";
304
305static NAME_SLUG_RE: std::sync::OnceLock<regex::Regex> = std::sync::OnceLock::new();
306
307/// Returns a reference to the compiled [`NAME_SLUG_REGEX`] pattern.
308/// Compiled once on first call, cached via `OnceLock`.
309pub fn name_slug_regex() -> &'static regex::Regex {
310    NAME_SLUG_RE.get_or_init(|| {
311        regex::Regex::new(NAME_SLUG_REGEX).expect("NAME_SLUG_REGEX is a valid pattern")
312    })
313}
314
315/// Default retention period (days) used by `purge` when `--retention-days` is omitted.
316pub const PURGE_RETENTION_DAYS_DEFAULT: u32 = 90;
317
318/// Maximum number of simultaneously active namespaces (deleted_at IS NULL). Exit 5 when exceeded.
319pub const MAX_NAMESPACES_ACTIVE: u32 = 100;
320
321/// Maximum tokens accepted by an embedding input before chunking.
322pub const EMBEDDING_MAX_TOKENS: usize = 512;
323
324/// Maximum result count from the recursive graph CTE in `recall`.
325pub const K_GRAPH_MATCHES_LIMIT: usize = 20;
326
327/// Default `--limit` for `list` when omitted.
328pub const K_LIST_DEFAULT_LIMIT: usize = 100;
329
330/// Default `--limit` for `graph entities` when omitted.
331pub const K_GRAPH_ENTITIES_DEFAULT_LIMIT: usize = 50;
332
333/// Default `--limit` for `related` when omitted.
334pub const K_RELATED_DEFAULT_LIMIT: usize = 10;
335
336/// Default `--limit` for `history` when omitted.
337pub const K_HISTORY_DEFAULT_LIMIT: usize = 20;
338
339/// Default weight for the vector contribution in the `hybrid-search` RRF formula.
340pub const WEIGHT_VEC_DEFAULT: f64 = 1.0;
341
342/// Default weight for the BM25 text contribution in the `hybrid-search` RRF formula.
343pub const WEIGHT_FTS_DEFAULT: f64 = 1.0;
344
345/// Character size of the body preview emitted in text/markdown formats.
346pub const TEXT_BODY_PREVIEW_LEN: usize = 200;
347
348/// Default value injected into ORT_NUM_THREADS when not set by the user.
349pub const ORT_NUM_THREADS_DEFAULT: &str = "1";
350
351/// Default value injected into ORT_INTRA_OP_NUM_THREADS when not set.
352pub const ORT_INTRA_OP_NUM_THREADS_DEFAULT: &str = "1";
353
354/// Default value injected into OMP_NUM_THREADS when not set by the user.
355pub const OMP_NUM_THREADS_DEFAULT: &str = "1";
356
357/// Exit code for partial batch failure (PRD line 1822). Conflicts with DbBusy in v1.x;
358/// in v2.0.0 DbBusy migrates to 15 and this code takes 13 per PRD.
359pub const BATCH_PARTIAL_FAILURE_EXIT_CODE: i32 = 13;
360
361/// Exit code for DbBusy in v2.0.0 (migrated from 13 to free 13 for batch failure).
362pub const DB_BUSY_EXIT_CODE: i32 = 15;
363
364/// Filename used for the advisory exclusive lock that prevents parallel invocations.
365pub const CLI_LOCK_FILE: &str = "cli.lock";
366
367/// Polling interval in milliseconds used by `--wait-lock` between `try_lock_exclusive` attempts.
368pub const CLI_LOCK_POLL_INTERVAL_MS: u64 = 500;
369
370/// Process exit code returned when the lock is busy and no wait was requested (EX_TEMPFAIL).
371pub const CLI_LOCK_EXIT_CODE: i32 = 75;
372
373/// Maximum number of CLI instances running simultaneously.
374///
375/// Limits the counting
376/// semaphore in [`crate::lock`] to prevent memory overload when multiple parallel
377/// v1.0.75 (G18 solution): removed the rigid 4-slot ceiling. The adaptive
378/// `calculate_safe_concurrency` function in [`crate::lock`]` now reports
379/// the dynamic limit. This constant is preserved as a *legacy fallback*
380/// when the dynamic calculation cannot be performed (e.g. when `sysinfo`
381/// cannot read `/proc/meminfo`).
382///
383/// Operators should prefer passing `--max-concurrency` explicitly OR
384/// letting the runtime compute the limit. The default ceiling is intentionally
385/// higher (16) so the legacy 4-slot hard cap does not silently reappear.
386pub const MAX_CONCURRENT_CLI_INSTANCES: usize = 16;
387
388/// G28-B (v1.0.68): polling interval in milliseconds used by
389/// `acquire_job_singleton` between retry attempts when another invocation
390/// already holds the singleton for `(job_type, namespace)`.
391pub const JOB_SINGLETON_POLL_INTERVAL_MS: u64 = 1000;
392
393/// Minimum available memory in MiB required before starting model loading.
394///
395/// If `sysinfo::System::available_memory() / 1_048_576` falls below this value,
396/// the invocation is aborted with [`crate::errors::AppError::LowMemory`]
397/// (exit code [`LOW_MEMORY_EXIT_CODE`]).
398pub const MIN_AVAILABLE_MEMORY_MB: u64 = 2_048;
399
400/// Maximum process RSS in MiB before aborting embedding operations.
401/// Users can override via `--max-rss-mb`. Set to 8 GiB by default.
402pub const DEFAULT_MAX_RSS_MB: u64 = 8_192;
403
404/// Maximum time in seconds an instance waits to acquire a concurrency slot.
405///
406/// Passed as the default for `--max-wait-secs` in the CLI. After exhausting this limit,
407/// the invocation returns [`crate::errors::AppError::AllSlotsFull`] with exit code
408/// [`CLI_LOCK_EXIT_CODE`] (75).
409pub const CLI_LOCK_DEFAULT_WAIT_SECS: u64 = 300;
410
411/// v1.0.75 (G18 + G23): expected RSS in MiB for an LLM-only worker that
412/// spawns a `claude -p` or `codex exec` subprocess. Much lower than the
413/// embedding cost because the ONNX model is not loaded per-worker.
414pub const LLM_WORKER_RSS_MB: u64 = 350;
415
416/// Process exit code returned when available memory is below [`MIN_AVAILABLE_MEMORY_MB`].
417///
418/// Value `77` is `EX_NOPERM` in glibc sysexits, reused here to indicate
419/// "insufficient system resource to proceed".
420pub const LOW_MEMORY_EXIT_CODE: i32 = 77;
421
422/// Process exit code returned when a duplicate memory or entity is detected (exit 9).
423///
424/// Moved from `2` to `9` in v1.0.52 to free exit code `2` for future use and align
425/// with the PRD exit code contract. Shell callers and LLM agents must use `9` from
426/// this version onwards.
427pub const DUPLICATE_EXIT_CODE: i32 = 9;
428
429/// Process exit code returned when shutdown is requested via SIGINT/SIGTERM/SIGHUP
430/// (v1.0.82, GAP-002 final).
431///
432/// The shell sees this code INSTEAD of the legacy `128 + signal` (130/143/129) so
433/// that LLM agents and orchestrators can branch on a single deterministic value
434/// when the operation was cancelled by the user. The signal name is preserved in
435/// the JSON envelope emitted before exit (`{"code":19,"signal":"SIGINT",...}`).
436pub const SHUTDOWN_EXIT_CODE: i32 = 19;
437
438/// Canonical value of `PRAGMA user_version` written after migrations.
439///
440/// **Why 50 instead of `CURRENT_SCHEMA_VERSION` (15)?**
441/// `user_version` is a 32-bit integer that SQLite reserves for application use.
442/// We deliberately set it to a project-specific marker (50 = decimal) so external
443/// inspection tools (`sqlite3 db.sqlite "PRAGMA user_version"`, the `file` command,
444/// SQLite browser GUIs) can distinguish a sqlite-graphrag database from a generic
445/// SQLite file at a glance. The application-level schema version (15, matching
446/// `CURRENT_SCHEMA_VERSION`) is stored in the `schema_meta` table and exposed via
447/// `health --json`/`stats --json`. Bumping migrations does NOT change this constant.
448/// Refinery uses its own `refinery_schema_history` table for migration bookkeeping.
449pub const SCHEMA_USER_VERSION: i64 = 50;
450
451/// Current schema version, equal to the highest migration number in `migrations/Vnnn__*.sql`.
452///
453/// Added in v1.0.27 as a runtime and test sanity check.
454/// Must be bumped in sync with new Refinery migrations; the unit test
455/// `schema_version_matches_migrations_count` validates this automatically.
456pub const CURRENT_SCHEMA_VERSION: u32 = 15;
457
458#[cfg(test)]
459mod tests_schema_version {
460    use super::CURRENT_SCHEMA_VERSION;
461
462    #[test]
463    fn schema_version_matches_migrations_count() {
464        let manifest_dir = env!("CARGO_MANIFEST_DIR");
465        let migrations_dir = std::path::Path::new(manifest_dir).join("migrations");
466        let count = std::fs::read_dir(&migrations_dir)
467            .expect("migrations directory must exist")
468            .filter_map(|entry| entry.ok())
469            .filter(|entry| entry.file_name().to_string_lossy().starts_with('V'))
470            .count() as u32;
471        assert_eq!(
472            CURRENT_SCHEMA_VERSION, count,
473            "CURRENT_SCHEMA_VERSION ({CURRENT_SCHEMA_VERSION}) must equal the number of V*.sql migrations ({count})"
474        );
475    }
476}
sqlite_graphrag/constants.rs

sqlite_graphrag/
constants.rs