Skip to main content

sqlite_graphrag/
lib.rs

1//! # sqlite-graphrag
2//!
3//! Local GraphRAG memory for LLMs in a single SQLite file — zero external
4//! services required.
5//!
6//! `sqlite-graphrag` is a CLI-first library that persists memories, entities and
7//! typed relationships inside a single SQLite database. It combines FTS5
8//! full-text search with `sqlite-vec` KNN over locally-generated embeddings to
9//! expose a hybrid retrieval ranker tailored for LLM agents.
10//!
11//! ## CLI usage
12//!
13//! Install and initialize once, then save and recall memories:
14//!
15//! ```bash
16//! cargo install sqlite-graphrag
17//! sqlite-graphrag init
18//! sqlite-graphrag remember \
19//!     --name onboarding-note \
20//!     --type user \
21//!     --description "first memory" \
22//!     --body "hello graphrag"
23//! sqlite-graphrag recall "graphrag" --k 5
24//! ```
25//!
26//! ## Crate layout
27//!
28//! The public modules group the CLI, the SQLite storage layer and the
29//! supporting primitives (embedder, chunking, graph, namespace detection,
30//! output, paths and pragmas). The CLI binary wires them together through the
31//! commands in [`commands`].
32//!
33//! ## Exit codes
34//!
35//! Errors returned from [`errors::AppError`] map to deterministic exit codes
36//! suitable for orchestration by shell scripts and LLM agents. Consult the
37//! README for the full contract.
38
39use std::sync::atomic::{AtomicBool, AtomicU8, Ordering};
40use std::sync::OnceLock;
41use tokio_util::sync::CancellationToken;
42
43/// Signals that a shutdown signal (SIGINT / SIGTERM / SIGHUP) has been received.
44///
45/// Set in `main` via `ctrlc::set_handler`. Long-running subcommands can
46/// poll [`shutdown_requested`] to shut down gracefully before timeout.
47/// Async code should prefer [`cancel_token`] with `tokio::select!`.
48pub static SHUTDOWN: AtomicBool = AtomicBool::new(false);
49
50/// Counter of shutdown signals received. 0=none, 1=graceful, 2+=forced exit.
51pub static SIGNAL_COUNT: AtomicU8 = AtomicU8::new(0);
52
53/// Signal number that triggered shutdown (2=SIGINT, 15=SIGTERM). 0=none.
54static SIGNAL_NUMBER: AtomicU8 = AtomicU8::new(0);
55
56static CANCEL: OnceLock<CancellationToken> = OnceLock::new();
57
58/// Returns the process-wide cancellation token for async graceful shutdown.
59///
60/// The token is cancelled by the signal handler alongside [`SHUTDOWN`].
61/// Async loops should use `token.cancelled().await` inside `tokio::select!`
62/// for instant wake-up instead of polling [`shutdown_requested`].
63pub fn cancel_token() -> &'static CancellationToken {
64    CANCEL.get_or_init(CancellationToken::new)
65}
66
67/// Returns `true` if a shutdown signal has been received since the process started.
68///
69/// The value reflects the state of [`SHUTDOWN`]. Without a `ctrlc::set_handler` call,
70/// the initial state is always `false`.
71///
72/// # Examples
73///
74/// ```
75/// use sqlite_graphrag::shutdown_requested;
76///
77/// // Under normal startup conditions the signal has not been received.
78/// assert!(!shutdown_requested());
79/// ```
80///
81/// ```
82/// use std::sync::atomic::Ordering;
83/// use sqlite_graphrag::{SHUTDOWN, shutdown_requested};
84///
85/// // Simulate receiving a signal and verify that the function reflects the state.
86/// SHUTDOWN.store(true, Ordering::Release);
87/// assert!(shutdown_requested());
88/// // Restore to avoid contaminating other tests.
89/// SHUTDOWN.store(false, Ordering::Release);
90/// ```
91pub fn shutdown_requested() -> bool {
92    // ORDERING: Acquire pairs with the Release store in the signal handler (main.rs).
93    SHUTDOWN.load(Ordering::Acquire)
94}
95
96/// Returns the signal number that triggered shutdown (0 if none received).
97///
98/// Typically 2 (SIGINT) for Ctrl+C. Used to compute Unix-conventional exit
99/// code 128+N in the main function.
100pub fn shutdown_signal() -> u8 {
101    SIGNAL_NUMBER.load(Ordering::Acquire)
102}
103
104/// Token-aware chunking utilities for bodies that exceed the embedding window.
105pub mod chunking;
106
107/// Hybrid entity extraction: regex pre-filter + GLiNER zero-shot NER (graceful degradation).
108pub mod extraction;
109
110/// Legacy GLiNER extractor implementation compiled only for the transition feature.
111#[cfg(feature = "ner-legacy")]
112pub mod extraction_gliner;
113
114/// v1.0.75 (G21 solution): extraction backend abstraction with
115/// LLM/Embedding/None/Composite implementations.
116pub mod extract;
117
118/// `clap` definitions for the top-level `sqlite-graphrag` binary.
119pub mod cli;
120
121/// Subcommand handlers wired into the `clap` tree from [`cli`].
122pub mod commands;
123
124/// Compile-time constants: embedding dimensions, limits and thresholds.
125pub mod constants;
126
127/// Local embedding generation (LLM-only, one-shot per invocation).
128pub mod embedder;
129
130/// Canonical entity type taxonomy: 13 variants, ValueEnum + serde + rusqlite impls.
131pub mod entity_type;
132
133/// Library-wide error type and the mapping to process exit codes (see [`errors::AppError`]).
134pub mod errors;
135
136/// Graph traversal helpers over the entities and relationships tables.
137pub mod graph;
138
139/// Type aliases for AHash-backed collections in hot paths.
140pub mod hash;
141
142/// Bilingual message layer for human-facing stderr progress (`--lang en|pt`, `SQLITE_GRAPHRAG_LANG`).
143pub mod i18n;
144
145/// Counting semaphore via lock files to limit parallel invocations.
146/// Provides `acquire_cli_slot` (counting semaphore) and the G28-B
147/// per-namespace heavy-job singleton `acquire_job_singleton` for
148/// `enrich`, `ingest --mode claude-code`, `ingest --mode codex`.
149pub mod lock;
150
151/// v1.0.75 (G22 solution): spawn subsystem abstraction with
152/// `VersionAdapter` trait for codex/claude/opencode executors.
153pub mod spawn;
154
155/// Memory guard: checks RAM availability before loading the ONNX model.
156pub mod memory_guard;
157
158/// Type-safe enumeration of the five `memories.source` CHECK constraint values.
159/// Replaces the footgun `pub source: String` to prevent G29-style regressions.
160#[allow(rustdoc::broken_intra_doc_links)]
161pub mod memory_source;
162
163/// Namespace resolution with precedence between flag, environment and markers.
164pub mod namespace;
165
166/// Centralized stdout/stderr emitters for CLI output formatting.
167pub mod output;
168
169/// Dual-format argument parser: accepts Unix epoch and RFC 3339.
170pub mod parsers;
171
172/// G29 Passo 4: preservation checks (Jaccard trigram) for LLM-enriched bodies.
173pub mod preservation;
174
175/// Filesystem paths for the project-local database and app support directories.
176pub mod paths;
177
178/// SQLite pragma helpers applied on every connection.
179pub mod pragmas;
180
181/// v1.0.76: in-process vector similarity helpers. Replaces the
182/// `sqlite-vec` KNN API with pure-Rust cosine over the BLOB-backed
183/// `memory_embeddings` / `entity_embeddings` tables.
184pub mod similarity;
185
186/// Cross-platform signal handling: SIGINT, SIGTERM, SIGHUP.
187pub mod signals;
188
189/// Centralized retry infrastructure with exponential backoff and half-jitter.
190pub mod retry;
191
192/// G28: orphan-process reaper that runs at CLI startup.
193#[allow(rustdoc::broken_intra_doc_links)]
194pub mod reaper;
195
196/// G28-D: system load average observation (pre-spawn saturation check).
197pub mod system_load;
198
199/// Persistence layer: memories, entities, chunks and version history.
200pub mod storage;
201
202/// Centralized tracing subscriber initialization with panic hook and log bridge.
203pub mod telemetry;
204
205/// Cross-platform terminal initialization: UTF-8 console, ANSI colors, NO_COLOR.
206pub mod terminal;
207
208/// Display time zone for `*_iso` fields (flag `--tz`, env `SQLITE_GRAPHRAG_DISPLAY_TZ`, fallback UTC).
209pub mod tz;
210
211/// Stdin reader with configurable timeout to prevent indefinite blocking.
212pub mod stdin_helper;
213
214/// Real tokenizer of the embedding model for accurate token counting and chunking.
215pub mod tokenizer;
216
217mod embedded_migrations {
218    use refinery::embed_migrations;
219    embed_migrations!("migrations");
220}
221
222pub use embedded_migrations::migrations;