1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
//! Compile-time constants shared across the crate.
//!
//! Grouped into embedding configuration, length and size limits, SQLite
//! pragmas and retrieval tuning knobs. Values are taken from the PRD and
//! must stay in sync with the migrations under `migrations/`.
//!
//! ## Dynamic concurrency permit calculation
//!
//! The maximum number of simultaneous instances can be adjusted at runtime
//! using the formula:
//!
//! ```text
//! permits = min(cpus, available_memory_mb / EMBEDDING_LOAD_EXPECTED_RSS_MB) * 0.5
//! ```
//!
//! where `available_memory_mb` is obtained via `sysinfo::System::available_memory()`
//! converted to MiB. The result is capped at `MAX_CONCURRENT_CLI_INSTANCES`
//! and floored at 1.
/// Embedding vector dimensionality produced by `multilingual-e5-small`.
pub const EMBEDDING_DIM: usize = 384;
/// Default `fastembed` model identifier used by `remember` and `recall`.
pub const FASTEMBED_MODEL_DEFAULT: &str = "multilingual-e5-small";
/// Batch size for `fastembed` encoding calls.
pub const FASTEMBED_BATCH_SIZE: usize = 32;
/// Maximum byte length for a memory `name` field in kebab-case.
pub const MAX_MEMORY_NAME_LEN: usize = 80;
/// Maximum character length for a memory `description` field.
pub const MAX_MEMORY_DESCRIPTION_LEN: usize = 500;
/// Hard upper bound on memory `body` length in bytes.
pub const MAX_MEMORY_BODY_LEN: usize = 512_000;
/// Body character count above which the body is split into chunks.
pub const MAX_BODY_CHARS_BEFORE_CHUNK: usize = 8_000;
/// Maximum attempts when a statement returns `SQLITE_BUSY`.
pub const MAX_SQLITE_BUSY_RETRIES: u32 = 5;
/// Base delay in milliseconds for the first SQLITE_BUSY retry.
///
/// Each subsequent attempt doubles the delay (exponential backoff):
/// 300 ms → 600 ms → 1200 ms → 2400 ms → 4800 ms (≈ 9.3 s total).
pub const SQLITE_BUSY_BASE_DELAY_MS: u64 = 300;
/// Query timeout applied to statements in milliseconds.
pub const QUERY_TIMEOUT_MILLIS: u64 = 5_000;
/// Jaccard threshold above which two memories are considered fuzzy duplicates.
pub const DEDUP_FUZZY_THRESHOLD: f64 = 0.8;
/// Cosine distance threshold below which two memories are semantic duplicates.
pub const DEDUP_SEMANTIC_THRESHOLD: f32 = 0.1;
/// Maximum number of hops allowed in graph traversals.
pub const MAX_GRAPH_HOPS: u32 = 2;
/// Minimum relationship weight required for traversal inclusion.
pub const MIN_RELATION_WEIGHT: f64 = 0.3;
/// Default traversal depth for `related` when `--hops` is omitted.
pub const DEFAULT_MAX_HOPS: u32 = 2;
/// Default minimum weight filter applied during graph traversal.
pub const DEFAULT_MIN_WEIGHT: f64 = 0.3;
/// Default weight assigned to newly created relationships.
pub const DEFAULT_RELATION_WEIGHT: f64 = 0.5;
/// Default `k` used by `recall` when the caller omits `--k`.
pub const DEFAULT_K_RECALL: usize = 10;
/// Default `k` for memory KNN searches when the caller omits `--k`.
pub const K_MEMORIES_DEFAULT: usize = 10;
/// Default `k` for entity KNN searches during graph expansion.
pub const K_ENTITIES_SEARCH: usize = 5;
/// Upper bound on distinct entities persisted per memory.
pub const MAX_ENTITIES_PER_MEMORY: usize = 30;
/// Upper bound on distinct relationships persisted per memory.
pub const MAX_RELATIONSHIPS_PER_MEMORY: usize = 50;
/// Resolves the per-memory relationship cap, honouring the env-var override.
///
/// v1.0.22: makes the cap (default 50) configurable via `SQLITE_GRAPHRAG_MAX_RELATIONS_PER_MEMORY`.
/// Audit found that rich documents silently hit the cap; users with dense technical corpora
/// can raise it via env. Values outside [1, 10000] fall back to the default.
/// Character length of the description preview shown in `list` output.
pub const TEXT_DESCRIPTION_PREVIEW_LEN: usize = 100;
/// `PRAGMA busy_timeout` value applied on every connection.
pub const BUSY_TIMEOUT_MILLIS: i32 = 5_000;
/// `PRAGMA cache_size` value in kibibytes (negative means KiB).
pub const CACHE_SIZE_KB: i32 = -64_000;
/// `PRAGMA mmap_size` value in bytes applied to each connection.
pub const MMAP_SIZE_BYTES: i64 = 268_435_456;
/// `PRAGMA wal_autocheckpoint` threshold in pages.
pub const WAL_AUTOCHECKPOINT_PAGES: i32 = 1_000;
/// Default `k` constant used by Reciprocal Rank Fusion in `hybrid-search`.
pub const RRF_K_DEFAULT: u32 = 60;
/// Chunk size expressed in tokens for body splitting.
pub const CHUNK_SIZE_TOKENS: usize = 400;
/// Token overlap between consecutive chunks.
pub const CHUNK_OVERLAP_TOKENS: usize = 50;
/// Explicit operational guard for multi-chunk documents in `remember`.
///
/// The multi-chunk path uses serial embeddings to avoid ONNX memory amplification.
/// This limit preserves a clear operational ceiling for agents and scripts.
pub const REMEMBER_MAX_SAFE_MULTI_CHUNKS: usize = 512;
/// Ceiling on chunks per controlled micro-batch in `remember`.
///
/// The `fastembed` runtime uses `BatchLongest` padding, so oversized batches amplify
/// the cost of the longest chunk. This ceiling keeps batches small even when chunks are short.
pub const REMEMBER_MAX_CONTROLLED_BATCH_CHUNKS: usize = 4;
/// Maximum padded-token budget per controlled micro-batch in `remember`.
///
/// The budget uses `max_tokens_no_batch * batch_size`, approximating the real cost of
/// `BatchLongest` padding. Values exceeding this fall back to smaller batches or serialisation.
pub const REMEMBER_MAX_CONTROLLED_BATCH_PADDED_TOKENS: usize = 512;
/// Timeout in milliseconds for a single ping probe against the daemon socket.
pub const DAEMON_PING_TIMEOUT_MS: u64 = 10;
/// Idle duration in seconds before the daemon shuts itself down.
pub const DAEMON_IDLE_SHUTDOWN_SECS: u64 = 600;
/// Maximum wait time for the daemon to become healthy after auto-start.
pub const DAEMON_AUTO_START_MAX_WAIT_MS: u64 = 5_000;
/// Initial polling interval to check whether the daemon became healthy.
pub const DAEMON_AUTO_START_INITIAL_BACKOFF_MS: u64 = 50;
/// Ceiling on backoff between automatic daemon spawn attempts.
pub const DAEMON_AUTO_START_MAX_BACKOFF_MS: u64 = 30_000;
/// Base backoff used after daemon spawn/health failures.
pub const DAEMON_SPAWN_BACKOFF_BASE_MS: u64 = 500;
/// Maximum wait time to acquire the daemon spawn lock.
pub const DAEMON_SPAWN_LOCK_WAIT_MS: u64 = 2_000;
/// Prefix prepended to bodies before embedding as required by E5 models.
pub const PASSAGE_PREFIX: &str = "passage: ";
/// Prefix prepended to queries before embedding as required by E5 models.
pub const QUERY_PREFIX: &str = "query: ";
/// Crate version string sourced from `CARGO_PKG_VERSION` at build time.
pub const SQLITE_GRAPHRAG_VERSION: &str = env!;
/// Batch size for BERT NER forward passes.
///
/// Larger values amortise fixed forward-pass overhead but increase peak RAM.
/// Memory guide (CPU only, max 512-token windows):
/// N=4 → ~54 MiB peak
/// N=8 → ~108 MiB peak ← default
/// N=16 → ~216 MiB peak
/// N=32 → ~432 MiB peak (not recommended without 16+ GiB RAM)
///
/// Override via `GRAPHRAG_NER_BATCH_SIZE` env var. Values outside [1, 32] are
/// clamped silently.
/// PRD-canonical regex that validates names and namespaces. Allows 1 char `[a-z0-9]`
/// OR a 2-80 char string starting with a letter and ending with a letter/digit,
/// containing only `[a-z0-9-]`. Rejects the `__` prefix (internal reserved).
pub const NAME_SLUG_REGEX: &str = r"^[a-z][a-z0-9-]{0,78}[a-z0-9]$|^[a-z0-9]$";
/// Default retention period (days) used by `purge` when `--retention-days` is omitted.
pub const PURGE_RETENTION_DAYS_DEFAULT: u32 = 90;
/// Maximum number of simultaneously active namespaces (deleted_at IS NULL). Exit 5 when exceeded.
pub const MAX_NAMESPACES_ACTIVE: u32 = 100;
/// Maximum tokens accepted by an embedding input before chunking.
pub const EMBEDDING_MAX_TOKENS: usize = 512;
/// Maximum result count from the recursive graph CTE in `recall`.
pub const K_GRAPH_MATCHES_LIMIT: usize = 20;
/// Default `--limit` para `list` quando omitido.
pub const K_LIST_DEFAULT_LIMIT: usize = 100;
/// Default `--limit` para `graph entities` quando omitido.
pub const K_GRAPH_ENTITIES_DEFAULT_LIMIT: usize = 50;
/// Default `--limit` para `related` quando omitido.
pub const K_RELATED_DEFAULT_LIMIT: usize = 10;
/// Default `--limit` para `history` quando omitido.
pub const K_HISTORY_DEFAULT_LIMIT: usize = 20;
/// Default weight for the vector contribution in the `hybrid-search` RRF formula.
pub const WEIGHT_VEC_DEFAULT: f64 = 1.0;
/// Default weight for the BM25 text contribution in the `hybrid-search` RRF formula.
pub const WEIGHT_FTS_DEFAULT: f64 = 1.0;
/// Character size of the body preview emitted in text/markdown formats.
pub const TEXT_BODY_PREVIEW_LEN: usize = 200;
/// Default value injected into ORT_NUM_THREADS when not set by the user.
pub const ORT_NUM_THREADS_DEFAULT: &str = "1";
/// Default value injected into ORT_INTRA_OP_NUM_THREADS when not set.
pub const ORT_INTRA_OP_NUM_THREADS_DEFAULT: &str = "1";
/// Default value injected into OMP_NUM_THREADS when not set by the user.
pub const OMP_NUM_THREADS_DEFAULT: &str = "1";
/// Exit code for partial batch failure (PRD line 1822). Conflicts with DbBusy in v1.x;
/// in v2.0.0 DbBusy migrates to 15 and this code takes 13 per PRD.
pub const BATCH_PARTIAL_FAILURE_EXIT_CODE: i32 = 13;
/// Exit code para DbBusy em v2.0.0 (migrado de 13 para liberar 13 para batch failure).
pub const DB_BUSY_EXIT_CODE: i32 = 15;
/// Filename used for the advisory exclusive lock that prevents parallel invocations.
pub const CLI_LOCK_FILE: &str = "cli.lock";
/// Polling interval in milliseconds used by `--wait-lock` between `try_lock_exclusive` attempts.
pub const CLI_LOCK_POLL_INTERVAL_MS: u64 = 500;
/// Process exit code returned when the lock is busy and no wait was requested (EX_TEMPFAIL).
pub const CLI_LOCK_EXIT_CODE: i32 = 75;
/// Maximum number of CLI instances running simultaneously.
///
/// Aligned with `DAEMON_MAX_CONCURRENT_CLIENTS` from the PRD. Limits the counting
/// semaphore in [`crate::lock`] to prevent memory overload when multiple parallel
/// invocations attempt to load the ONNX model simultaneously.
pub const MAX_CONCURRENT_CLI_INSTANCES: usize = 4;
/// Minimum available memory in MiB required before starting model loading.
///
/// If `sysinfo::System::available_memory() / 1_048_576` falls below this value,
/// the invocation is aborted with [`crate::errors::AppError::LowMemory`]
/// (exit code [`LOW_MEMORY_EXIT_CODE`]).
pub const MIN_AVAILABLE_MEMORY_MB: u64 = 2_048;
/// Maximum time in seconds an instance waits to acquire a concurrency slot.
///
/// Passed as the default for `--max-wait-secs` in the CLI. After exhausting this limit,
/// the invocation returns [`crate::errors::AppError::AllSlotsFull`] with exit code
/// [`CLI_LOCK_EXIT_CODE`] (75).
pub const CLI_LOCK_DEFAULT_WAIT_SECS: u64 = 300;
/// Expected RSS in MiB for a single instance with the ONNX model loaded via fastembed.
///
/// Used in the formula `min(cpus, available_memory_mb / EMBEDDING_LOAD_EXPECTED_RSS_MB) * 0.5`
/// to compute the dynamic permit count.
///
/// Value calibrated on 2026-04-23 with `/usr/bin/time -v` against `sqlite-graphrag v1.0.3`
/// on the heavy commands `remember`, `recall`, and `hybrid-search`, all peaking near
/// 1.03 GiB RSS per process. The constant below rounds up with a defensive margin.
pub const EMBEDDING_LOAD_EXPECTED_RSS_MB: u64 = 1_100;
/// Process exit code returned when available memory is below [`MIN_AVAILABLE_MEMORY_MB`].
///
/// Value `77` is `EX_NOPERM` in glibc sysexits, reused here to indicate
/// "insufficient system resource to proceed".
pub const LOW_MEMORY_EXIT_CODE: i32 = 77;
/// Canonical value of `PRAGMA user_version` written after migrations.
///
/// Must stay in sync with the human-readable schema version identifier.
/// Refinery uses its own history table; `user_version` is an auxiliary
/// diagnostic field for external tools (e.g. `sqlite3 db.sqlite "PRAGMA user_version"`).
pub const SCHEMA_USER_VERSION: i64 = 49;
/// Current schema version, equal to the highest migration number in `migrations/Vnnn__*.sql`.
///
/// Added in v1.0.27 as a runtime and test sanity check.
/// Must be bumped in sync with new Refinery migrations; the unit test
/// `schema_version_matches_migrations_count` validates this automatically.
pub const CURRENT_SCHEMA_VERSION: u32 = 8;