Skip to main content

sochdb_query/
lib.rs

1// SPDX-License-Identifier: AGPL-3.0-or-later
2// SochDB - LLM-Optimized Embedded Database
3// Copyright (C) 2026 Sushanth Reddy Vanagala (https://github.com/sushanthpy)
4//
5// This program is free software: you can redistribute it and/or modify
6// it under the terms of the GNU Affero General Public License as published by
7// the Free Software Foundation, either version 3 of the License, or
8// (at your option) any later version.
9//
10// This program is distributed in the hope that it will be useful,
11// but WITHOUT ANY WARRANTY; without even the implied warranty of
12// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13// GNU Affero General Public License for more details.
14//
15// You should have received a copy of the GNU Affero General Public License
16// along with this program. If not, see <https://www.gnu.org/licenses/>.
17
18//! SochDB Query Engine
19//!
20//! SOCH-QL query language for TOON-native data.
21//!
22//! ## SOCH-QL
23//!
24//! SQL-like query language that returns results in TOON format:
25//!
26//! ```text
27//! SELECT id,name FROM users WHERE score > 80
28//! → users[2]{id,name}:
29//!   1,Alice
30//!   3,Charlie
31//! ```
32//!
33//! ## Query Execution Pipeline (Task 6)
34//!
35//! ```text
36//! parse(sql) → SochQuery → validate → plan → execute → SochTable
37//! ```
38//!
39//! Token reduction: 40-60% vs JSON (66% for typical queries)
40//!
41//! ## CONTEXT SELECT (LLM-Native)
42//!
43//! Priority-based context aggregation for LLM consumption:
44//!
45//! ```text
46//! CONTEXT SELECT
47//!   FROM session('abc123')
48//!   WITH TOKEN_LIMIT 4000
49//!   SECTIONS (
50//!     USER {query, preferences} PRIORITY 1,
51//!     HISTORY {recent} PRIORITY 2,
52//!     KNOWLEDGE {docs} PRIORITY 3
53//!   )
54//! ```
55
56pub mod agent_context;
57pub mod bm25_filtered; // Task 6: BM25 filter pushdown via posting-set intersection
58pub mod calc;
59pub mod candidate_gate; // Task 4: Unified candidate gate interface
60pub mod capability_token; // Task 8: Capability tokens + ACLs
61pub mod context_compiler;
62pub mod context_query;
63pub mod cost_optimizer; // Cost-based query optimizer (Task 6)
64pub mod embedding_provider; // Task 2: Automatic embedding generation
65pub mod exact_token_counter; // Task 6: BPE-accurate token counting
66#[cfg(feature = "experimental")]
67pub mod executor; // v1.0 Volcano query executor [quarantined: unwired, not on live SQL path]
68pub mod filter_ir; // Task 1: Canonical Filter IR (CNF/DNF)
69pub mod filtered_vector_search; // Task 5: Filter-aware vector search with selectivity fallback
70pub mod grep_executor; // Task 5: Grep lane (trigram-accelerated regex search)
71pub mod like; // Canonical SQL LIKE matcher (single source of truth across all query paths)
72pub mod memory_compaction; // Task 5: Hierarchical memory compaction
73pub mod metadata_index; // Task 3: Metadata index primitives (bitmap + range)
74pub mod namespace; // Task 2: Namespace-scoped query API
75pub mod optimizer_integration;
76pub mod plugin_table;
77pub mod query_optimizer;
78pub mod semantic_triggers; // Task 7: Vector percolator triggers
79pub mod simd_filter; // SIMD vectorized query filters (mm.md Task 5.3)
80pub mod soch_ql;
81pub mod soch_ql_executor;
82pub mod sql; // SQL-92 compatible query engine with SochDB extensions
83pub mod storage_bridge; // Phase 0: Wire SQL execution to real storage
84pub mod streaming_context; // Task 1: Streaming context generation
85pub mod temporal_decay; // Task 4: Recency-biased scoring
86pub mod token_budget;
87pub mod topk_executor; // Streaming Top-K for ORDER BY + LIMIT (Task: Fix ORDER BY Semantics)
88pub mod trigram_index; // Task 5: Trigram candidate index for the grep lane
89pub mod unified_fusion; // Task 7: Hybrid fusion that never post-filters
90
91pub use agent_context::{
92    AgentContext, AgentPermissions, AuditEntry, AuditOperation, AuditResult, ContextError,
93    ContextValue, DbPermissions, FsPermissions, OperationBudget, PendingWrite, ResourceType,
94    SessionId, SessionManager, TransactionScope,
95};
96pub use calc::{
97    BinaryOp, CalcError, Evaluator, Expr, Parser as CalcParser, RowContext, UnaryOp, calculate,
98    parse_expr,
99};
100pub use context_compiler::{
101    CompiledContext, CompiledFact, ContextCandidate, ContextCompiler, ContextSpec, ContextTemplate,
102};
103
104pub use context_query::{
105    ContextQueryError, ContextQueryParser, ContextQueryResult, ContextSection, ContextSelectQuery,
106    HnswVectorIndex, SectionPriority, SectionResult, SimpleVectorIndex, VectorIndex,
107    VectorIndexStats, VectorSearchResult,
108};
109pub use optimizer_integration::{
110    CacheStats, ExecutionPlan, ExecutionStep, OptimizedExecutor, OptimizedQueryPlan, PlanCache,
111    StorageBackend, TableStats,
112};
113pub use plugin_table::{
114    PluginVirtualTable, VirtualColumnDef, VirtualColumnType, VirtualFilter, VirtualRow,
115    VirtualTable, VirtualTableError, VirtualTableRegistry, VirtualTableSchema, VirtualTableStats,
116};
117pub use soch_ql::{
118    ColumnDef, ColumnType, ComparisonOp, Condition, CreateTableQuery, InsertQuery, LogicalOp,
119    OrderBy, ParseError, SelectQuery, SochQlParser, SochQuery, SochResult, SochValue,
120    SortDirection, WhereClause,
121};
122pub use soch_ql_executor::{
123    KeyRange, Predicate, PredicateCondition, QueryPlan, SochQlExecutor, TokenReductionStats,
124    estimate_token_reduction, execute_sochql,
125};
126pub use sql::{
127    BinaryOperator, ColumnDef as SqlColumnDef, CreateTableStmt, DeleteStmt, DropTableStmt,
128    Expr as SqlExpr, InsertStmt, JoinType, Lexer, OrderByItem as SqlOrderBy, Parser as SqlParser,
129    SelectStmt, Span, SqlError, SqlResult, Statement, Token, TokenKind, UnaryOperator, UpdateStmt,
130};
131pub use storage_bridge::{
132    DatabaseSqlConnection, DatabaseStorageBackend, convert_core_to_query, convert_query_to_core,
133};
134pub use token_budget::{
135    BudgetAllocation, BudgetSection, TokenBudgetConfig, TokenBudgetEnforcer, TokenEstimator,
136    TokenEstimatorConfig, truncate_rows, truncate_to_tokens,
137};
138
139// v1.0 Volcano query executor [quarantined behind `experimental`: not on live SQL path]
140#[cfg(feature = "experimental")]
141pub use executor::{
142    ColumnMeta, ExecutorConfig, ExplainNode, FilterNode, HashAggregateNode, HashJoinNode,
143    IndexSeekNode, LimitNode, MergeJoinNode, NestedLoopJoinNode, PlanNode, ProjectNode,
144    QueryPlanner, Row, Schema, SeqScanNode, SortNode, execute_sql, execute_statement,
145};
146
147// Streaming Top-K for ORDER BY + LIMIT (Task: Fix ORDER BY Semantics)
148pub use topk_executor::{
149    ColumnRef, ExecutionStrategy as TopKExecutionStrategy, IndexAwareTopK, OrderByColumn,
150    OrderByLimitExecutor, OrderByLimitStats, OrderBySpec, SingleColumnTopK,
151    SortDirection as TopKSortDirection, TopKHeap,
152};
153
154// Task 1: Streaming context generation
155pub use streaming_context::{
156    RollingBudget, SectionChunk, StreamingConfig, StreamingContextExecutor, StreamingContextIter,
157};
158
159// Task 2: Automatic embedding generation
160pub use embedding_provider::{
161    CachedEmbeddingProvider, EmbeddingError, EmbeddingProvider, EmbeddingVectorIndex,
162    MockEmbeddingProvider,
163};
164
165// Task 4: Temporal decay scoring
166pub use temporal_decay::{
167    DecayCurve, TemporalDecayConfig, TemporalScorer, TemporallyDecayedResult,
168};
169
170// Task 5: Memory compaction
171pub use memory_compaction::{
172    Abstraction, CompactionStats, Episode, ExtractiveSummarizer, HierarchicalMemory, Summarizer,
173    Summary,
174};
175
176// Task 6: Exact token counting
177pub use exact_token_counter::{
178    count_tokens_exact, count_tokens_heuristic, ExactBudgetEnforcer, ExactTokenCounter,
179    HeuristicTokenCounter, TokenCounter,
180};
181
182// Task 7: Semantic triggers
183pub use semantic_triggers::{
184    EscalationLevel, EventSource, LogLevel, SemanticTrigger, TriggerAction, TriggerBuilder,
185    TriggerError, TriggerEvent, TriggerIndex, TriggerMatch, TriggerStats,
186};
187
188// ============================================================================
189// Canonical Filter IR + Pushdown Contract (mm.md Tasks 1-8)
190// ============================================================================
191
192// Task 1: Canonical Filter IR (CNF/DNF with typed atoms)
193pub use filter_ir::{
194    AuthCapabilities, AuthScope, Disjunction, FilterAtom, FilterBuilder, FilterIR, FilterValue,
195    FilteredExecutor,
196};
197
198// Task 2: Namespace-Scoped Query API (mandatory namespace)
199pub use namespace::{Namespace, NamespaceError, NamespaceScope, QueryRequest, ScopedQuery};
200
201// Task 3: Metadata Index Primitives (bitmap + range accessors)
202pub use metadata_index::{
203    ConcurrentMetadataIndex, EqualityIndex, MetadataIndex, PostingSet, RangeIndex,
204};
205
206// Task 4: Unified Candidate Gate Interface
207pub use candidate_gate::{AllowedBitmap, AllowedSet, CandidateGate, ExecutionStrategy};
208
209// Task 5: Filter-Aware Vector Search with selectivity-driven fallback
210pub use filtered_vector_search::{
211    FilterAwareSearch, FilteredSearchConfig, FilteredSearchResult, FilteredSearchStrategy,
212    FilteredVectorStore, ScoredResult,
213};
214
215// Task 6: BM25 Filter Pushdown via posting-set intersection
216pub use bm25_filtered::{
217    Bm25Params, DisjunctiveBm25Executor, FilteredBm25Executor, FilteredPhraseExecutor,
218    InvertedIndex, PositionalIndex, PositionalPosting, PostingList,
219};
220
221// Task 7: Hybrid Fusion That Never Post-Filters
222pub use unified_fusion::{
223    Bm25Executor, Bm25QuerySpec, DocId, FilteredCandidates, FusionConfig, FusionEngine,
224    FusionMethod as UnifiedFusionMethod, FusionResult, GrepLaneExecutor, GrepQuerySpec, Modality,
225    RankedList, UnifiedHybridExecutor, UnifiedHybridQuery, VectorExecutor, VectorQuerySpec,
226    WeightedLane, fuse_rrf_weighted,
227};
228
229// Task 5: Grep lane (trigram-accelerated regex search)
230pub use grep_executor::{
231    DEFAULT_MAX_SCAN, GrepError, GrepExecutor, GrepHit, GrepMode, GrepResults, required_trigrams,
232};
233pub use trigram_index::{Trigram, TrigramIndex, trigrams_of};
234
235// Task 8: Capability Tokens + ACLs
236pub use capability_token::{
237    AclTagIndex, CapabilityToken, TokenBuilder, TokenCapabilities, TokenError, TokenSigner,
238    TokenValidator,
239};