Skip to main content

sochdb_query/
lib.rs

1// SPDX-License-Identifier: AGPL-3.0-or-later
2// SochDB - LLM-Optimized Embedded Database
3// Copyright (C) 2026 Sushanth Reddy Vanagala (https://github.com/sushanthpy)
4//
5// This program is free software: you can redistribute it and/or modify
6// it under the terms of the GNU Affero General Public License as published by
7// the Free Software Foundation, either version 3 of the License, or
8// (at your option) any later version.
9//
10// This program is distributed in the hope that it will be useful,
11// but WITHOUT ANY WARRANTY; without even the implied warranty of
12// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13// GNU Affero General Public License for more details.
14//
15// You should have received a copy of the GNU Affero General Public License
16// along with this program. If not, see <https://www.gnu.org/licenses/>.
17
18//! SochDB Query Engine
19//!
20//! SOCH-QL query language for TOON-native data.
21//!
22//! ## SOCH-QL
23//!
24//! SQL-like query language that returns results in TOON format:
25//!
26//! ```text
27//! SELECT id,name FROM users WHERE score > 80
28//! → users[2]{id,name}:
29//!   1,Alice
30//!   3,Charlie
31//! ```
32//!
33//! ## Query Execution Pipeline (Task 6)
34//!
35//! ```text
36//! parse(sql) → SochQuery → validate → plan → execute → SochTable
37//! ```
38//!
39//! Token reduction: 40-60% vs JSON (66% for typical queries)
40//!
41//! ## CONTEXT SELECT (LLM-Native)
42//!
43//! Priority-based context aggregation for LLM consumption:
44//!
45//! ```text
46//! CONTEXT SELECT
47//!   FROM session('abc123')
48//!   WITH TOKEN_LIMIT 4000
49//!   SECTIONS (
50//!     USER {query, preferences} PRIORITY 1,
51//!     HISTORY {recent} PRIORITY 2,
52//!     KNOWLEDGE {docs} PRIORITY 3
53//!   )
54//! ```
55
56pub mod agent_context;
57pub mod bm25_filtered; // Task 6: BM25 filter pushdown via posting-set intersection
58pub mod calc;
59pub mod candidate_gate; // Task 4: Unified candidate gate interface
60pub mod capability_token; // Task 8: Capability tokens + ACLs
61pub mod context_query;
62pub mod cost_optimizer; // Cost-based query optimizer (Task 6)
63pub mod embedding_provider; // Task 2: Automatic embedding generation
64pub mod exact_token_counter; // Task 6: BPE-accurate token counting
65pub mod executor; // v1.0: Unified Volcano query executor
66pub mod filter_ir; // Task 1: Canonical Filter IR (CNF/DNF)
67pub mod filtered_vector_search; // Task 5: Filter-aware vector search with selectivity fallback
68pub mod hybrid_retrieval; // Task 3: Vector + BM25 + RRF fusion
69pub mod memory_compaction; // Task 5: Hierarchical memory compaction
70pub mod metadata_index; // Task 3: Metadata index primitives (bitmap + range)
71pub mod namespace; // Task 2: Namespace-scoped query API
72pub mod optimizer_integration;
73pub mod plugin_table;
74pub mod query_optimizer;
75pub mod semantic_triggers; // Task 7: Vector percolator triggers
76pub mod simd_filter; // SIMD vectorized query filters (mm.md Task 5.3)
77pub mod sql; // SQL-92 compatible query engine with SochDB extensions
78pub mod storage_bridge; // Phase 0: Wire SQL execution to real storage
79pub mod streaming_context; // Task 1: Streaming context generation
80pub mod temporal_decay; // Task 4: Recency-biased scoring
81pub mod token_budget;
82pub mod soch_ql;
83pub mod soch_ql_executor;
84pub mod topk_executor; // Streaming Top-K for ORDER BY + LIMIT (Task: Fix ORDER BY Semantics)
85pub mod unified_fusion; // Task 7: Hybrid fusion that never post-filters
86
87pub use agent_context::{
88    AgentContext, AgentPermissions, AuditEntry, AuditOperation, AuditResult, ContextError,
89    ContextValue, DbPermissions, FsPermissions, OperationBudget, PendingWrite, ResourceType,
90    SessionId, SessionManager, TransactionScope,
91};
92pub use calc::{
93    BinaryOp, CalcError, Evaluator, Expr, Parser as CalcParser, RowContext, UnaryOp, calculate,
94    parse_expr,
95};
96pub use context_query::{
97    ContextQueryError, ContextQueryParser, ContextQueryResult, ContextSection, ContextSelectQuery,
98    HnswVectorIndex, SectionPriority, SectionResult, SimpleVectorIndex, VectorIndex,
99    VectorIndexStats, VectorSearchResult,
100};
101pub use optimizer_integration::{
102    CacheStats, ExecutionPlan, ExecutionStep, OptimizedExecutor, OptimizedQueryPlan, PlanCache,
103    StorageBackend, TableStats,
104};
105pub use plugin_table::{
106    PluginVirtualTable, VirtualColumnDef, VirtualColumnType, VirtualFilter, VirtualRow,
107    VirtualTable, VirtualTableError, VirtualTableRegistry, VirtualTableSchema, VirtualTableStats,
108};
109pub use sql::{
110    BinaryOperator, ColumnDef as SqlColumnDef, CreateTableStmt, DeleteStmt, DropTableStmt,
111    Expr as SqlExpr, InsertStmt, JoinType, Lexer, OrderByItem as SqlOrderBy, Parser as SqlParser,
112    SelectStmt, Span, SqlError, SqlResult, Statement, Token, TokenKind, UnaryOperator, UpdateStmt,
113};
114pub use token_budget::{
115    BudgetAllocation, BudgetSection, TokenBudgetConfig, TokenBudgetEnforcer, TokenEstimator,
116    TokenEstimatorConfig, truncate_rows, truncate_to_tokens,
117};
118pub use soch_ql::{
119    ColumnDef, ColumnType, ComparisonOp, Condition, CreateTableQuery, InsertQuery, LogicalOp,
120    OrderBy, ParseError, SelectQuery, SortDirection, SochQlParser, SochQuery, SochResult,
121    SochValue, WhereClause,
122};
123pub use soch_ql_executor::{
124    KeyRange, Predicate, PredicateCondition, QueryPlan, TokenReductionStats, SochQlExecutor,
125    estimate_token_reduction, execute_sochql,
126};
127pub use storage_bridge::{
128    DatabaseStorageBackend, DatabaseSqlConnection,
129    convert_core_to_query, convert_query_to_core,
130};
131
132// v1.0: Unified Volcano query executor
133pub use executor::{
134    Row, Schema, ColumnMeta, PlanNode,
135    SeqScanNode, IndexSeekNode, FilterNode, ProjectNode, SortNode, LimitNode,
136    HashJoinNode, NestedLoopJoinNode, MergeJoinNode, HashAggregateNode,
137    ExplainNode, QueryPlanner,
138    execute_sql, execute_statement, ExecutorConfig,
139};
140
141// Streaming Top-K for ORDER BY + LIMIT (Task: Fix ORDER BY Semantics)
142pub use topk_executor::{
143    ColumnRef, ExecutionStrategy as TopKExecutionStrategy, IndexAwareTopK, OrderByColumn, OrderByLimitExecutor,
144    OrderByLimitStats, OrderBySpec, SingleColumnTopK, SortDirection as TopKSortDirection, TopKHeap,
145};
146
147// Task 1: Streaming context generation
148pub use streaming_context::{
149    RollingBudget, SectionChunk, StreamingConfig, StreamingContextExecutor, StreamingContextIter,
150};
151
152// Task 2: Automatic embedding generation
153pub use embedding_provider::{
154    CachedEmbeddingProvider, EmbeddingError, EmbeddingProvider, EmbeddingVectorIndex,
155    MockEmbeddingProvider,
156};
157
158// Task 3: Hybrid retrieval pipeline
159pub use hybrid_retrieval::{
160    FusionMethod, HybridQuery, HybridQueryExecutor, LexicalIndex, MetadataFilter,
161};
162
163// Task 4: Temporal decay scoring
164pub use temporal_decay::{
165    DecayCurve, TemporalDecayConfig, TemporalScorer, TemporallyDecayedResult,
166};
167
168// Task 5: Memory compaction
169pub use memory_compaction::{
170    Abstraction, CompactionStats, Episode, ExtractiveSummarizer, HierarchicalMemory, Summary,
171    Summarizer,
172};
173
174// Task 6: Exact token counting
175pub use exact_token_counter::{
176    ExactBudgetEnforcer, ExactTokenCounter, HeuristicTokenCounter, TokenCounter,
177};
178
179// Task 7: Semantic triggers
180pub use semantic_triggers::{
181    EscalationLevel, EventSource, LogLevel, SemanticTrigger, TriggerAction, TriggerBuilder,
182    TriggerError, TriggerEvent, TriggerIndex, TriggerMatch, TriggerStats,
183};
184
185// ============================================================================
186// Canonical Filter IR + Pushdown Contract (mm.md Tasks 1-8)
187// ============================================================================
188
189// Task 1: Canonical Filter IR (CNF/DNF with typed atoms)
190pub use filter_ir::{
191    AuthCapabilities, AuthScope, Disjunction, FilterAtom, FilterBuilder, FilterIR, FilterValue,
192    FilteredExecutor,
193};
194
195// Task 2: Namespace-Scoped Query API (mandatory namespace)
196pub use namespace::{
197    Namespace, NamespaceError, NamespaceScope, QueryRequest, ScopedQuery,
198};
199
200// Task 3: Metadata Index Primitives (bitmap + range accessors)
201pub use metadata_index::{
202    ConcurrentMetadataIndex, EqualityIndex, MetadataIndex, PostingSet, RangeIndex,
203};
204
205// Task 4: Unified Candidate Gate Interface
206pub use candidate_gate::{
207    AllowedBitmap, AllowedSet, CandidateGate, ExecutionStrategy,
208};
209
210// Task 5: Filter-Aware Vector Search with selectivity-driven fallback
211pub use filtered_vector_search::{
212    FilterAwareSearch, FilteredSearchConfig, FilteredSearchResult, FilteredSearchStrategy,
213    FilteredVectorStore, ScoredResult,
214};
215
216// Task 6: BM25 Filter Pushdown via posting-set intersection
217pub use bm25_filtered::{
218    Bm25Params, DisjunctiveBm25Executor, FilteredBm25Executor, FilteredPhraseExecutor,
219    InvertedIndex, PositionalIndex, PositionalPosting, PostingList,
220};
221
222// Task 7: Hybrid Fusion That Never Post-Filters
223pub use unified_fusion::{
224    Bm25Executor, Bm25QuerySpec, FilteredCandidates, FusionConfig, FusionEngine,
225    FusionMethod as UnifiedFusionMethod, FusionResult, Modality, UnifiedHybridExecutor,
226    UnifiedHybridQuery, VectorExecutor, VectorQuerySpec,
227};
228
229// Task 8: Capability Tokens + ACLs
230pub use capability_token::{
231    AclTagIndex, CapabilityToken, TokenBuilder, TokenCapabilities, TokenError, TokenSigner,
232    TokenValidator,
233};