1#![doc(hidden)]
9
10use fs2::FileExt;
11use rayon::prelude::*;
12use rusqlite::OptionalExtension;
13#[cfg(feature = "semantic")]
14use rusqlite::{params_from_iter, types::Value as SqlValue};
15use sha2::{Digest, Sha256};
16use std::collections::{BTreeMap, BTreeSet, HashMap};
17use std::fs::{self, File, OpenOptions};
18use std::path::{Path, PathBuf};
19use std::str::FromStr;
20use std::sync::atomic::{AtomicUsize, Ordering};
21#[cfg(feature = "semantic")]
22use std::sync::Arc;
23use std::sync::{Mutex, OnceLock};
24#[cfg(feature = "semantic")]
25use std::time::Instant;
26use std::time::{Duration as StdDuration, SystemTime, UNIX_EPOCH};
27use time::format_description::well_known::Rfc3339;
28use time::{Date, Month, OffsetDateTime};
29
30pub const SCHEMA_VERSION: u32 = 1;
31pub(crate) const SQLITE_SCHEMA: &str = include_str!("../schema.sql");
32pub(crate) const MAX_INLINE_ENVELOPE_BYTES: usize = 16 * 1024 * 1024;
33mod db;
34pub(crate) use db::{
35 ensure_semantic_vector_schema, initialize_database, open_index, table_count, table_exists,
36};
37const MAX_SEARCH_LIMIT: usize = 50;
38pub const MAX_SEARCH_SNIPPET_CHARS: usize = 1000;
41pub const DEFAULT_SEARCH_SNIPPET_CHARS: usize = 500;
47const MAX_SESSION_LIMIT: usize = 500;
48const MAX_CONTEXT_EVENTS_PER_SIDE: usize = 500;
49const MAX_DIRECTORY_SIZE_DEPTH: usize = 64;
50mod concept_expansion;
51pub(crate) use concept_expansion::expand_query_terms;
52mod semantic;
53#[cfg(all(test, feature = "semantic"))]
54pub(crate) use semantic::{
55 bucket_unembedded_units, collect_unembedded_units, embed_unembedded_units_with_config,
56 embedding_index_progress, estimated_embedding_token_count, vector_to_blob,
57 EmbeddingWriteConfig, UnembeddedUnit,
58};
59#[cfg(test)]
60pub(crate) use semantic::{
61 document_embedding_input, install_verified_file, model_file_expected_sha256,
62 query_embedding_input, semantic_model_cache_path, sha256_hex_of_file, verify_file_sha256,
63 SEMANTIC_MODEL_FILE_SHA256, SEMANTIC_MODEL_ID, SEMANTIC_MODEL_REMOTE_FILES,
64 SEMANTIC_MODEL_REPO, SEMANTIC_MODEL_REVISION,
65};
66pub use semantic::{
67 download_embedding_model_with_progress, embedding_model_disclosure, embedding_model_status,
68 prune_embedding_cache,
69};
70pub(crate) use semantic::{
71 embed_index_if_available_with_progress, insert_vector_unit_rows, semantic_search_available,
72 vector_search_results, SEMANTIC_VECTOR_DIMENSIONS,
73};
74
75mod error;
76pub use error::{Error, NotFound, Result};
77
78mod event;
79pub use event::{
80 summary_kind_for_canonical_str, CanonicalType, DedupeParts, EventEnvelope, Source, SummaryKind,
81 Tool,
82};
83
84mod identity;
85pub use identity::{dedupe_key, sanitize_session_id};
86pub(crate) use identity::{hash_line, sha256_hex};
87
88mod paths;
89pub use paths::{canonical_raw_path, resolve_home};
90pub(crate) use paths::{
91 chmod, create_dir_0700, harness_home_for_raw_file, lock_path_for_raw_file, set_if_exists,
92};
93
94mod config;
95pub(crate) use config::create_config_if_missing;
96pub use config::{opencode_server_url, set_opencode_server_url};
97
98mod semantic_api;
99pub use semantic_api::{Embedder, EmbeddingUnit, EmbeddingUnitKind};
100
101mod options;
102pub(crate) use options::RankedSearchResult;
103pub use options::{
104 native_jsonl_line_command, AppendReport, BackfillCoverageSession, BackfillDryRunReport,
105 BackfillImportPreview, BackfillProgress, BackfillReport, CorroboratedRef, Corroboration,
106 CoverageSummary, DoctorCheck, DoctorReport, DoctorStats, EmbeddingDownloadProgress,
107 EmbeddingDownloadReport, EmbeddingIndexProgress, EmbeddingModelDisclosure,
108 EmbeddingModelStatus, EventOptions, EventPointer, FileIngestReport, FileTouch, IndexFreshness,
109 IndexOptions, IndexReport, InitReport, PurgeAction, PurgeAllArtifact, PurgeAllOptions,
110 PurgeAllReport, PurgeReport, PurgeTier, SearchContinuation, SearchMode, SearchOptions,
111 SearchPage, SearchResult, SessionOptions, SessionPage, SessionSummary, StorageFootprint,
112 StoredEvent, ToolUsage, SESSION_PROMPT_SNIPPET_CHARS, SESSION_TOP_FILES, SESSION_TOP_TOOLS,
113};
114
115mod purge;
116pub use purge::{purge_all, purge_before, purge_session};
117
118mod doctor;
119pub(crate) use doctor::{directory_size, storage_footprint};
120pub use doctor::{doctor, doctor_with_options, doctor_with_progress, index_freshness, DoctorStage};
121mod json;
122pub(crate) use json::{i64_pointer, required_string, string_pointer};
123
124mod backfill;
125#[cfg(test)]
126pub(crate) use backfill::{
127 append_prepared_event, envelope_from_backfill_payload, raw_index_checkpoint_is_current,
128 BackfillParseContext,
129};
130pub(crate) use backfill::{
131 append_prepared_events, checkpoint_is_current, load_checkpoint_from_conn,
132 message_id_for_payload, normalize_date_or_duration, opencode_hook_session_id,
133 opencode_server_events_from_payload, parse_ingest_file_source, raw_index_checkpoint_offset,
134 source_file_metadata, write_raw_index_checkpoint, SourceCheckpoint, SourceFileMetadata,
135};
136#[cfg(test)]
137pub(crate) use backfill::{backfill_dry_run, backfill_since};
138pub use backfill::{
139 backfill_dry_run_with_progress, backfill_since_with_progress, malformed_native_payload,
140};
141mod ingest;
142pub(crate) use ingest::{
143 append_envelope_locked, append_envelopes_locked, load_full_dedupe_sidecar_events,
144 read_raw_dedupe_snapshot, remove_dedupe_sidecar_for_raw_file, resolved_payload_for_envelope,
145 sequence_for_payload, source_event_id_for_payload, DedupeSidecarFiles, ExistingRawEvent,
146};
147pub use ingest::{ingest_file, ingest_hook_event, ingest_opencode_server_messages, init_home};
148
149mod index;
150pub use index::{
151 index_once, index_once_single_flight, index_once_with_options,
152 index_once_with_options_and_progress, SingleFlightOutcome,
153};
154pub(crate) use index::{recalculate_all_session_counts, RawIndexFileReport};
155
156mod search;
157#[cfg(test)]
158pub(crate) use search::corroborate::{extract_corroboration_candidates, git_invocations};
159pub(crate) use search::corroborate_text;
160#[cfg(test)]
161pub(crate) use search::search_history_filtered;
162#[cfg(feature = "semantic")]
163pub(crate) use search::{
164 match_centered_snippet, normalize_ref_filter, resolve_session_filter_ids,
165 retrieval_key_for_text, unique_ranked_results_by_event,
166};
167pub use search::{search_history, search_history_page};
168
169mod read;
170pub(crate) use read::session_events;
171pub use read::{get_event_by_pointer_with_options, get_session_page, latest_event, list_sessions};
172
173mod export;
174pub use export::{export_session_jsonl_with_options, export_session_markdown_with_options};
175
176mod redact;
177pub use redact::{redact_export_json, redact_export_text};
178pub(crate) use redact::{redact_json_value, redact_text};
179
180mod provenance;
181pub(crate) use provenance::extract_refs;
182
183mod raw;
184pub(crate) use raw::{
185 open_raw_offset_reader, payload_for_raw_pointer, raw_envelope_for_line_scan,
186 raw_envelope_for_pointer, read_raw_envelope_at_offset, session_raw_file,
187};
188
189mod document;
190pub(crate) use document::{
191 canonical_type_for_payload, compaction_state_for, file_paths_for_payload, hook_event_name,
192 identity_payload, message_text_for_document, normalize_identity_text, role_for,
193 search_document_for_event, string_field, tool_status_for, SearchDocument,
194};
195#[cfg(any(feature = "semantic", test))]
197pub(crate) use document::embedding_units_for_document;
198
199#[cfg(test)]
200mod tests;