Skip to main content

nabu_core/
lib.rs

1//! Core history-keeping engine for the `nabu` CLI: ingest, indexing, search,
2//! export, and maintenance over local coding-agent transcripts.
3//!
4//! This crate is published only so the `nabu` binary (the `nabu-cli` crate)
5//! resolves its dependencies. It is not a stable public API — items may change
6//! or be removed in any release with no semver guarantee. Depend on the `nabu`
7//! CLI, not on this crate directly.
8#![doc(hidden)]
9
10use fs2::FileExt;
11use rayon::prelude::*;
12use rusqlite::OptionalExtension;
13#[cfg(feature = "semantic")]
14use rusqlite::{params_from_iter, types::Value as SqlValue};
15use sha2::{Digest, Sha256};
16use std::collections::{BTreeMap, BTreeSet, HashMap};
17use std::fs::{self, File, OpenOptions};
18use std::path::{Path, PathBuf};
19use std::str::FromStr;
20use std::sync::atomic::{AtomicUsize, Ordering};
21#[cfg(feature = "semantic")]
22use std::sync::Arc;
23use std::sync::{Mutex, OnceLock};
24#[cfg(feature = "semantic")]
25use std::time::Instant;
26use std::time::{Duration as StdDuration, SystemTime, UNIX_EPOCH};
27use time::format_description::well_known::Rfc3339;
28use time::{Date, Month, OffsetDateTime};
29
30pub const SCHEMA_VERSION: u32 = 1;
31pub(crate) const SQLITE_SCHEMA: &str = include_str!("../schema.sql");
32pub(crate) const MAX_INLINE_ENVELOPE_BYTES: usize = 16 * 1024 * 1024;
33mod db;
34pub(crate) use db::{
35    ensure_semantic_vector_schema, initialize_database, open_index, table_count, table_exists,
36};
37const MAX_SEARCH_LIMIT: usize = 50;
38/// Hard upper bound on per-result snippet length. Callers may request up to this
39/// many characters; requests above it are clamped.
40pub const MAX_SEARCH_SNIPPET_CHARS: usize = 1000;
41/// Default per-result snippet length applied when a caller omits
42/// `max_snippet_chars`. Sized for triage: ~500 chars (a few sentences of
43/// match-centered context) is enough to tell a real bug from discussion of one
44/// without a `get_session` round-trip, while staying far inside the MCP
45/// response-size budget even at the maximum result count.
46pub const DEFAULT_SEARCH_SNIPPET_CHARS: usize = 500;
47const MAX_SESSION_LIMIT: usize = 500;
48const MAX_CONTEXT_EVENTS_PER_SIDE: usize = 500;
49const MAX_DIRECTORY_SIZE_DEPTH: usize = 64;
50mod concept_expansion;
51pub(crate) use concept_expansion::expand_query_terms;
52mod semantic;
53#[cfg(all(test, feature = "semantic"))]
54pub(crate) use semantic::{
55    bucket_unembedded_units, collect_unembedded_units, embed_unembedded_units_with_config,
56    embedding_index_progress, estimated_embedding_token_count, vector_to_blob,
57    EmbeddingWriteConfig, UnembeddedUnit,
58};
59#[cfg(test)]
60pub(crate) use semantic::{
61    document_embedding_input, install_verified_file, model_file_expected_sha256,
62    query_embedding_input, semantic_model_cache_path, sha256_hex_of_file, verify_file_sha256,
63    SEMANTIC_MODEL_FILE_SHA256, SEMANTIC_MODEL_ID, SEMANTIC_MODEL_REMOTE_FILES,
64    SEMANTIC_MODEL_REPO, SEMANTIC_MODEL_REVISION,
65};
66pub use semantic::{
67    download_embedding_model_with_progress, embedding_model_disclosure, embedding_model_status,
68    prune_embedding_cache,
69};
70pub(crate) use semantic::{
71    embed_index_if_available_with_progress, insert_vector_unit_rows, semantic_search_available,
72    vector_search_results, SEMANTIC_VECTOR_DIMENSIONS,
73};
74
75mod error;
76pub use error::{Error, NotFound, Result};
77
78mod event;
79pub use event::{
80    summary_kind_for_canonical_str, CanonicalType, DedupeParts, EventEnvelope, Source, SummaryKind,
81    Tool,
82};
83
84mod identity;
85pub use identity::{dedupe_key, sanitize_session_id};
86pub(crate) use identity::{hash_line, sha256_hex};
87
88mod paths;
89pub use paths::{canonical_raw_path, resolve_home};
90pub(crate) use paths::{
91    chmod, create_dir_0700, harness_home_for_raw_file, lock_path_for_raw_file, set_if_exists,
92};
93
94mod config;
95pub(crate) use config::create_config_if_missing;
96pub use config::{opencode_server_url, set_opencode_server_url};
97
98mod semantic_api;
99pub use semantic_api::{Embedder, EmbeddingUnit, EmbeddingUnitKind};
100
101mod options;
102pub(crate) use options::RankedSearchResult;
103pub use options::{
104    native_jsonl_line_command, AppendReport, BackfillCoverageSession, BackfillDryRunReport,
105    BackfillImportPreview, BackfillProgress, BackfillReport, CorroboratedRef, Corroboration,
106    CoverageSummary, DoctorCheck, DoctorReport, DoctorStats, EmbeddingDownloadProgress,
107    EmbeddingDownloadReport, EmbeddingIndexProgress, EmbeddingModelDisclosure,
108    EmbeddingModelStatus, EventOptions, EventPointer, FileIngestReport, FileTouch, IndexFreshness,
109    IndexOptions, IndexReport, InitReport, PurgeAction, PurgeAllArtifact, PurgeAllOptions,
110    PurgeAllReport, PurgeReport, PurgeTier, SearchContinuation, SearchMode, SearchOptions,
111    SearchPage, SearchResult, SessionOptions, SessionPage, SessionSummary, StorageFootprint,
112    StoredEvent, ToolUsage, SESSION_PROMPT_SNIPPET_CHARS, SESSION_TOP_FILES, SESSION_TOP_TOOLS,
113};
114
115mod purge;
116pub use purge::{purge_all, purge_before, purge_session};
117
118mod doctor;
119pub(crate) use doctor::{directory_size, storage_footprint};
120pub use doctor::{doctor, doctor_with_options, doctor_with_progress, index_freshness, DoctorStage};
121mod json;
122pub(crate) use json::{i64_pointer, required_string, string_pointer};
123
124mod backfill;
125#[cfg(test)]
126pub(crate) use backfill::{
127    append_prepared_event, envelope_from_backfill_payload, raw_index_checkpoint_is_current,
128    BackfillParseContext,
129};
130pub(crate) use backfill::{
131    append_prepared_events, checkpoint_is_current, load_checkpoint_from_conn,
132    message_id_for_payload, normalize_date_or_duration, opencode_hook_session_id,
133    opencode_server_events_from_payload, parse_ingest_file_source, raw_index_checkpoint_offset,
134    source_file_metadata, write_raw_index_checkpoint, SourceCheckpoint, SourceFileMetadata,
135};
136#[cfg(test)]
137pub(crate) use backfill::{backfill_dry_run, backfill_since};
138pub use backfill::{
139    backfill_dry_run_with_progress, backfill_since_with_progress, malformed_native_payload,
140};
141mod ingest;
142pub(crate) use ingest::{
143    append_envelope_locked, append_envelopes_locked, load_full_dedupe_sidecar_events,
144    read_raw_dedupe_snapshot, remove_dedupe_sidecar_for_raw_file, resolved_payload_for_envelope,
145    sequence_for_payload, source_event_id_for_payload, DedupeSidecarFiles, ExistingRawEvent,
146};
147pub use ingest::{ingest_file, ingest_hook_event, ingest_opencode_server_messages, init_home};
148
149mod index;
150pub use index::{
151    index_once, index_once_single_flight, index_once_with_options,
152    index_once_with_options_and_progress, SingleFlightOutcome,
153};
154pub(crate) use index::{recalculate_all_session_counts, RawIndexFileReport};
155
156mod search;
157#[cfg(test)]
158pub(crate) use search::corroborate::{extract_corroboration_candidates, git_invocations};
159pub(crate) use search::corroborate_text;
160#[cfg(test)]
161pub(crate) use search::search_history_filtered;
162#[cfg(feature = "semantic")]
163pub(crate) use search::{
164    match_centered_snippet, normalize_ref_filter, resolve_session_filter_ids,
165    retrieval_key_for_text, unique_ranked_results_by_event,
166};
167pub use search::{search_history, search_history_page};
168
169mod read;
170pub(crate) use read::session_events;
171pub use read::{get_event_by_pointer_with_options, get_session_page, latest_event, list_sessions};
172
173mod export;
174pub use export::{export_session_jsonl_with_options, export_session_markdown_with_options};
175
176mod redact;
177pub use redact::{redact_export_json, redact_export_text};
178pub(crate) use redact::{redact_json_value, redact_text};
179
180mod provenance;
181pub(crate) use provenance::extract_refs;
182
183mod raw;
184pub(crate) use raw::{
185    open_raw_offset_reader, payload_for_raw_pointer, raw_envelope_for_line_scan,
186    raw_envelope_for_pointer, read_raw_envelope_at_offset, session_raw_file,
187};
188
189mod document;
190pub(crate) use document::{
191    canonical_type_for_payload, compaction_state_for, file_paths_for_payload, hook_event_name,
192    identity_payload, message_text_for_document, normalize_identity_text, role_for,
193    search_document_for_event, string_field, tool_status_for, SearchDocument,
194};
195// Used only by the cfg(semantic) vector pipeline and a default-build unit test.
196#[cfg(any(feature = "semantic", test))]
197pub(crate) use document::embedding_units_for_document;
198
199#[cfg(test)]
200mod tests;