Skip to main content

sochdb_query/
lib.rs

1// SPDX-License-Identifier: AGPL-3.0-or-later
2// SochDB - LLM-Optimized Embedded Database
3// Copyright (C) 2026 Sushanth Reddy Vanagala (https://github.com/sushanthpy)
4//
5// This program is free software: you can redistribute it and/or modify
6// it under the terms of the GNU Affero General Public License as published by
7// the Free Software Foundation, either version 3 of the License, or
8// (at your option) any later version.
9//
10// This program is distributed in the hope that it will be useful,
11// but WITHOUT ANY WARRANTY; without even the implied warranty of
12// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13// GNU Affero General Public License for more details.
14//
15// You should have received a copy of the GNU Affero General Public License
16// along with this program. If not, see <https://www.gnu.org/licenses/>.
17
18//! SochDB Query Engine
19//!
20//! SOCH-QL query language for TOON-native data.
21//!
22//! ## SOCH-QL
23//!
24//! SQL-like query language that returns results in TOON format:
25//!
26//! ```text
27//! SELECT id,name FROM users WHERE score > 80
28//! → users[2]{id,name}:
29//!   1,Alice
30//!   3,Charlie
31//! ```
32//!
33//! ## Query Execution Pipeline (Task 6)
34//!
35//! ```text
36//! parse(sql) → SochQuery → validate → plan → execute → SochTable
37//! ```
38//!
39//! Token reduction: 40-60% vs JSON (66% for typical queries)
40//!
41//! ## CONTEXT SELECT (LLM-Native)
42//!
43//! Priority-based context aggregation for LLM consumption:
44//!
45//! ```text
46//! CONTEXT SELECT
47//!   FROM session('abc123')
48//!   WITH TOKEN_LIMIT 4000
49//!   SECTIONS (
50//!     USER {query, preferences} PRIORITY 1,
51//!     HISTORY {recent} PRIORITY 2,
52//!     KNOWLEDGE {docs} PRIORITY 3
53//!   )
54//! ```
55
56pub mod agent_context;
57pub mod bm25_filtered; // Task 6: BM25 filter pushdown via posting-set intersection
58pub mod calc;
59pub mod candidate_gate; // Task 4: Unified candidate gate interface
60pub mod capability_token; // Task 8: Capability tokens + ACLs
61pub mod context_query;
62pub mod cost_optimizer; // Cost-based query optimizer (Task 6)
63pub mod embedding_provider; // Task 2: Automatic embedding generation
64pub mod exact_token_counter; // Task 6: BPE-accurate token counting
65pub mod filter_ir; // Task 1: Canonical Filter IR (CNF/DNF)
66pub mod filtered_vector_search; // Task 5: Filter-aware vector search with selectivity fallback
67pub mod hybrid_retrieval; // Task 3: Vector + BM25 + RRF fusion
68pub mod memory_compaction; // Task 5: Hierarchical memory compaction
69pub mod metadata_index; // Task 3: Metadata index primitives (bitmap + range)
70pub mod namespace; // Task 2: Namespace-scoped query API
71pub mod optimizer_integration;
72pub mod plugin_table;
73pub mod query_optimizer;
74pub mod semantic_triggers; // Task 7: Vector percolator triggers
75pub mod simd_filter; // SIMD vectorized query filters (mm.md Task 5.3)
76pub mod sql; // SQL-92 compatible query engine with SochDB extensions
77pub mod streaming_context; // Task 1: Streaming context generation
78pub mod temporal_decay; // Task 4: Recency-biased scoring
79pub mod token_budget;
80pub mod soch_ql;
81pub mod soch_ql_executor;
82pub mod topk_executor; // Streaming Top-K for ORDER BY + LIMIT (Task: Fix ORDER BY Semantics)
83pub mod unified_fusion; // Task 7: Hybrid fusion that never post-filters
84
85pub use agent_context::{
86    AgentContext, AgentPermissions, AuditEntry, AuditOperation, AuditResult, ContextError,
87    ContextValue, DbPermissions, FsPermissions, OperationBudget, PendingWrite, ResourceType,
88    SessionId, SessionManager, TransactionScope,
89};
90pub use calc::{
91    BinaryOp, CalcError, Evaluator, Expr, Parser as CalcParser, RowContext, UnaryOp, calculate,
92    parse_expr,
93};
94pub use context_query::{
95    ContextQueryError, ContextQueryParser, ContextQueryResult, ContextSection, ContextSelectQuery,
96    HnswVectorIndex, SectionPriority, SectionResult, SimpleVectorIndex, VectorIndex,
97    VectorIndexStats, VectorSearchResult,
98};
99pub use optimizer_integration::{
100    CacheStats, ExecutionPlan, ExecutionStep, OptimizedExecutor, OptimizedQueryPlan, PlanCache,
101    StorageBackend, TableStats,
102};
103pub use plugin_table::{
104    PluginVirtualTable, VirtualColumnDef, VirtualColumnType, VirtualFilter, VirtualRow,
105    VirtualTable, VirtualTableError, VirtualTableRegistry, VirtualTableSchema, VirtualTableStats,
106};
107pub use sql::{
108    BinaryOperator, ColumnDef as SqlColumnDef, CreateTableStmt, DeleteStmt, DropTableStmt,
109    Expr as SqlExpr, InsertStmt, JoinType, Lexer, OrderByItem as SqlOrderBy, Parser as SqlParser,
110    SelectStmt, Span, SqlError, SqlResult, Statement, Token, TokenKind, UnaryOperator, UpdateStmt,
111};
112pub use token_budget::{
113    BudgetAllocation, BudgetSection, TokenBudgetConfig, TokenBudgetEnforcer, TokenEstimator,
114    TokenEstimatorConfig, truncate_rows, truncate_to_tokens,
115};
116pub use soch_ql::{
117    ColumnDef, ColumnType, ComparisonOp, Condition, CreateTableQuery, InsertQuery, LogicalOp,
118    OrderBy, ParseError, SelectQuery, SortDirection, SochQlParser, SochQuery, SochResult,
119    SochValue, WhereClause,
120};
121pub use soch_ql_executor::{
122    KeyRange, Predicate, PredicateCondition, QueryPlan, TokenReductionStats, SochQlExecutor,
123    estimate_token_reduction, execute_sochql,
124};
125
126// Streaming Top-K for ORDER BY + LIMIT (Task: Fix ORDER BY Semantics)
127pub use topk_executor::{
128    ColumnRef, ExecutionStrategy as TopKExecutionStrategy, IndexAwareTopK, OrderByColumn, OrderByLimitExecutor,
129    OrderByLimitStats, OrderBySpec, SingleColumnTopK, SortDirection as TopKSortDirection, TopKHeap,
130};
131
132// Task 1: Streaming context generation
133pub use streaming_context::{
134    RollingBudget, SectionChunk, StreamingConfig, StreamingContextExecutor, StreamingContextIter,
135};
136
137// Task 2: Automatic embedding generation
138pub use embedding_provider::{
139    CachedEmbeddingProvider, EmbeddingError, EmbeddingProvider, EmbeddingVectorIndex,
140    MockEmbeddingProvider,
141};
142
143// Task 3: Hybrid retrieval pipeline
144pub use hybrid_retrieval::{
145    FusionMethod, HybridQuery, HybridQueryExecutor, LexicalIndex, MetadataFilter,
146};
147
148// Task 4: Temporal decay scoring
149pub use temporal_decay::{
150    DecayCurve, TemporalDecayConfig, TemporalScorer, TemporallyDecayedResult,
151};
152
153// Task 5: Memory compaction
154pub use memory_compaction::{
155    Abstraction, CompactionStats, Episode, ExtractiveSummarizer, HierarchicalMemory, Summary,
156    Summarizer,
157};
158
159// Task 6: Exact token counting
160pub use exact_token_counter::{
161    ExactBudgetEnforcer, ExactTokenCounter, HeuristicTokenCounter, TokenCounter,
162};
163
164// Task 7: Semantic triggers
165pub use semantic_triggers::{
166    EscalationLevel, EventSource, LogLevel, SemanticTrigger, TriggerAction, TriggerBuilder,
167    TriggerError, TriggerEvent, TriggerIndex, TriggerMatch, TriggerStats,
168};
169
170// ============================================================================
171// Canonical Filter IR + Pushdown Contract (mm.md Tasks 1-8)
172// ============================================================================
173
174// Task 1: Canonical Filter IR (CNF/DNF with typed atoms)
175pub use filter_ir::{
176    AuthCapabilities, AuthScope, Disjunction, FilterAtom, FilterBuilder, FilterIR, FilterValue,
177    FilteredExecutor,
178};
179
180// Task 2: Namespace-Scoped Query API (mandatory namespace)
181pub use namespace::{
182    Namespace, NamespaceError, NamespaceScope, QueryRequest, ScopedQuery,
183};
184
185// Task 3: Metadata Index Primitives (bitmap + range accessors)
186pub use metadata_index::{
187    ConcurrentMetadataIndex, EqualityIndex, MetadataIndex, PostingSet, RangeIndex,
188};
189
190// Task 4: Unified Candidate Gate Interface
191pub use candidate_gate::{
192    AllowedBitmap, AllowedSet, CandidateGate, ExecutionStrategy,
193};
194
195// Task 5: Filter-Aware Vector Search with selectivity-driven fallback
196pub use filtered_vector_search::{
197    FilterAwareSearch, FilteredSearchConfig, FilteredSearchResult, FilteredSearchStrategy,
198    FilteredVectorStore, ScoredResult,
199};
200
201// Task 6: BM25 Filter Pushdown via posting-set intersection
202pub use bm25_filtered::{
203    Bm25Params, DisjunctiveBm25Executor, FilteredBm25Executor, FilteredPhraseExecutor,
204    InvertedIndex, PositionalIndex, PositionalPosting, PostingList,
205};
206
207// Task 7: Hybrid Fusion That Never Post-Filters
208pub use unified_fusion::{
209    Bm25Executor, Bm25QuerySpec, FilteredCandidates, FusionConfig, FusionEngine,
210    FusionMethod as UnifiedFusionMethod, FusionResult, Modality, UnifiedHybridExecutor,
211    UnifiedHybridQuery, VectorExecutor, VectorQuerySpec,
212};
213
214// Task 8: Capability Tokens + ACLs
215pub use capability_token::{
216    AclTagIndex, CapabilityToken, TokenBuilder, TokenCapabilities, TokenError, TokenSigner,
217    TokenValidator,
218};