Skip to main content

sqlite_graphrag/
lib.rs

1//! # sqlite-graphrag
2//!
3//! Local GraphRAG memory for LLMs in a single SQLite file — zero external
4//! services required.
5//!
6//! `sqlite-graphrag` is a CLI-first library that persists memories, entities and
7//! typed relationships inside a single SQLite database. It combines FTS5
8//! full-text search with `sqlite-vec` KNN over locally-generated embeddings to
9//! expose a hybrid retrieval ranker tailored for LLM agents.
10//!
11//! ## CLI usage
12//!
13//! Install and initialize once, then save and recall memories:
14//!
15//! ```bash
16//! cargo install sqlite-graphrag
17//! sqlite-graphrag init
18//! sqlite-graphrag remember \
19//!     --name onboarding-note \
20//!     --type user \
21//!     --description "first memory" \
22//!     --body "hello graphrag"
23//! sqlite-graphrag recall "graphrag" --k 5
24//! ```
25//!
26//! ## Crate layout
27//!
28//! The public modules group the CLI, the SQLite storage layer and the
29//! supporting primitives (embedder, chunking, graph, namespace detection,
30//! output, paths and pragmas). The CLI binary wires them together through the
31//! commands in [`commands`].
32//!
33//! ## Exit codes
34//!
35//! Errors returned from [`errors::AppError`] map to deterministic exit codes
36//! suitable for orchestration by shell scripts and LLM agents. Consult the
37//! README for the full contract.
38
39use std::sync::atomic::{AtomicBool, AtomicU8, Ordering};
40use std::sync::OnceLock;
41use tokio_util::sync::CancellationToken;
42
43/// Signals that a shutdown signal (SIGINT / SIGTERM / SIGHUP) has been received.
44///
45/// Set in `main` via `ctrlc::set_handler`. Long-running subcommands can
46/// poll [`shutdown_requested`] to shut down gracefully before timeout.
47/// Async code should prefer [`cancel_token`] with `tokio::select!`.
48pub static SHUTDOWN: AtomicBool = AtomicBool::new(false);
49
50/// Counter of shutdown signals received. 0=none, 1=graceful, 2+=forced exit.
51pub static SIGNAL_COUNT: AtomicU8 = AtomicU8::new(0);
52
53/// Signal number that triggered shutdown (2=SIGINT, 15=SIGTERM). 0=none.
54static SIGNAL_NUMBER: AtomicU8 = AtomicU8::new(0);
55
56static CANCEL: OnceLock<CancellationToken> = OnceLock::new();
57
58/// Returns the process-wide cancellation token for async graceful shutdown.
59///
60/// The token is cancelled by the signal handler alongside [`SHUTDOWN`].
61/// Async loops should use `token.cancelled().await` inside `tokio::select!`
62/// for instant wake-up instead of polling [`shutdown_requested`].
63pub fn cancel_token() -> &'static CancellationToken {
64    CANCEL.get_or_init(CancellationToken::new)
65}
66
67/// Returns `true` if a shutdown signal has been received since the process started.
68///
69/// The value reflects the state of [`SHUTDOWN`]. Without a `ctrlc::set_handler` call,
70/// the initial state is always `false`.
71///
72/// # Examples
73///
74/// ```
75/// use sqlite_graphrag::shutdown_requested;
76///
77/// // Under normal startup conditions the signal has not been received.
78/// assert!(!shutdown_requested());
79/// ```
80///
81/// ```
82/// use std::sync::atomic::Ordering;
83/// use sqlite_graphrag::{SHUTDOWN, shutdown_requested};
84///
85/// // Simulate receiving a signal and verify that the function reflects the state.
86/// SHUTDOWN.store(true, Ordering::Release);
87/// assert!(shutdown_requested());
88/// // Restore to avoid contaminating other tests.
89/// SHUTDOWN.store(false, Ordering::Release);
90/// ```
91pub fn shutdown_requested() -> bool {
92    // ORDERING: Acquire pairs with the Release store in the signal handler (main.rs).
93    SHUTDOWN.load(Ordering::Acquire)
94}
95
96/// Returns the signal number that triggered shutdown (0 if none received).
97///
98/// Typically 2 (SIGINT) for Ctrl+C. Used to compute Unix-conventional exit
99/// code 128+N in the main function.
100pub fn shutdown_signal() -> u8 {
101    SIGNAL_NUMBER.load(Ordering::Acquire)
102}
103
104/// Token-aware chunking utilities for bodies that exceed the embedding window.
105pub mod chunking;
106
107/// Hybrid entity extraction: regex pre-filter + GLiNER zero-shot NER (graceful degradation).
108pub mod extraction;
109
110/// `clap` definitions for the top-level `sqlite-graphrag` binary.
111pub mod cli;
112
113/// Subcommand handlers wired into the `clap` tree from [`cli`].
114pub mod commands;
115
116/// Compile-time constants: embedding dimensions, limits and thresholds.
117pub mod constants;
118
119/// Daemon IPC for persistent embedding model reuse across CLI invocations.
120pub mod daemon;
121
122/// Local embedding generation backed by `fastembed`.
123pub mod embedder;
124
125/// Canonical entity type taxonomy: 13 variants, ValueEnum + serde + rusqlite impls.
126pub mod entity_type;
127
128/// Library-wide error type and the mapping to process exit codes (see [`errors::AppError`]).
129pub mod errors;
130
131/// Graph traversal helpers over the entities and relationships tables.
132pub mod graph;
133
134/// Type aliases for AHash-backed collections in hot paths.
135pub mod hash;
136
137/// Bilingual message layer for human-facing stderr progress (`--lang en|pt`, `SQLITE_GRAPHRAG_LANG`).
138pub mod i18n;
139
140/// Counting semaphore via lock files to limit parallel invocations (see [`lock::acquire_cli_slot`]).
141pub mod lock;
142
143/// Memory guard: checks RAM availability before loading the ONNX model.
144pub mod memory_guard;
145
146/// Namespace resolution with precedence between flag, environment and markers.
147pub mod namespace;
148
149/// Centralized stdout/stderr emitters for CLI output formatting.
150pub mod output;
151
152/// Dual-format argument parser: accepts Unix epoch and RFC 3339.
153pub mod parsers;
154
155/// Filesystem paths for the project-local database and app support directories.
156pub mod paths;
157
158/// SQLite pragma helpers applied on every connection.
159pub mod pragmas;
160
161/// Cross-platform signal handling: SIGINT, SIGTERM, SIGHUP.
162pub mod signals;
163
164/// Centralized retry infrastructure with exponential backoff and half-jitter.
165pub mod retry;
166
167/// Persistence layer: memories, entities, chunks and version history.
168pub mod storage;
169
170/// Centralized tracing subscriber initialization with panic hook and log bridge.
171pub mod telemetry;
172
173/// Cross-platform terminal initialization: UTF-8 console, ANSI colors, NO_COLOR.
174pub mod terminal;
175
176/// Display time zone for `*_iso` fields (flag `--tz`, env `SQLITE_GRAPHRAG_DISPLAY_TZ`, fallback UTC).
177pub mod tz;
178
179/// Stdin reader with configurable timeout to prevent indefinite blocking.
180pub mod stdin_helper;
181
182/// Real tokenizer of the embedding model for accurate token counting and chunking.
183pub mod tokenizer;
184
185mod embedded_migrations {
186    use refinery::embed_migrations;
187    embed_migrations!("migrations");
188}
189
190pub use embedded_migrations::migrations;