sochdb_query/
lib.rs

1// Copyright 2025 Sushanth (https://github.com/sushanthpy)
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15//! SochDB Query Engine
16//!
17//! SOCH-QL query language for TOON-native data.
18//!
19//! ## SOCH-QL
20//!
21//! SQL-like query language that returns results in TOON format:
22//!
23//! ```text
24//! SELECT id,name FROM users WHERE score > 80
25//! → users[2]{id,name}:
26//!   1,Alice
27//!   3,Charlie
28//! ```
29//!
30//! ## Query Execution Pipeline (Task 6)
31//!
32//! ```text
33//! parse(sql) → SochQuery → validate → plan → execute → SochTable
34//! ```
35//!
36//! Token reduction: 40-60% vs JSON (66% for typical queries)
37//!
38//! ## CONTEXT SELECT (LLM-Native)
39//!
40//! Priority-based context aggregation for LLM consumption:
41//!
42//! ```text
43//! CONTEXT SELECT
44//!   FROM session('abc123')
45//!   WITH TOKEN_LIMIT 4000
46//!   SECTIONS (
47//!     USER {query, preferences} PRIORITY 1,
48//!     HISTORY {recent} PRIORITY 2,
49//!     KNOWLEDGE {docs} PRIORITY 3
50//!   )
51//! ```
52
53pub mod agent_context;
54pub mod bm25_filtered; // Task 6: BM25 filter pushdown via posting-set intersection
55pub mod calc;
56pub mod candidate_gate; // Task 4: Unified candidate gate interface
57pub mod capability_token; // Task 8: Capability tokens + ACLs
58pub mod context_query;
59pub mod cost_optimizer; // Cost-based query optimizer (Task 6)
60pub mod embedding_provider; // Task 2: Automatic embedding generation
61pub mod exact_token_counter; // Task 6: BPE-accurate token counting
62pub mod filter_ir; // Task 1: Canonical Filter IR (CNF/DNF)
63pub mod filtered_vector_search; // Task 5: Filter-aware vector search with selectivity fallback
64pub mod hybrid_retrieval; // Task 3: Vector + BM25 + RRF fusion
65pub mod memory_compaction; // Task 5: Hierarchical memory compaction
66pub mod metadata_index; // Task 3: Metadata index primitives (bitmap + range)
67pub mod namespace; // Task 2: Namespace-scoped query API
68pub mod optimizer_integration;
69pub mod plugin_table;
70pub mod query_optimizer;
71pub mod semantic_triggers; // Task 7: Vector percolator triggers
72pub mod simd_filter; // SIMD vectorized query filters (mm.md Task 5.3)
73pub mod sql; // SQL-92 compatible query engine with SochDB extensions
74pub mod streaming_context; // Task 1: Streaming context generation
75pub mod temporal_decay; // Task 4: Recency-biased scoring
76pub mod token_budget;
77pub mod soch_ql;
78pub mod soch_ql_executor;
79pub mod topk_executor; // Streaming Top-K for ORDER BY + LIMIT (Task: Fix ORDER BY Semantics)
80pub mod unified_fusion; // Task 7: Hybrid fusion that never post-filters
81
82pub use agent_context::{
83    AgentContext, AgentPermissions, AuditEntry, AuditOperation, AuditResult, ContextError,
84    ContextValue, DbPermissions, FsPermissions, OperationBudget, PendingWrite, ResourceType,
85    SessionId, SessionManager, TransactionScope,
86};
87pub use calc::{
88    BinaryOp, CalcError, Evaluator, Expr, Parser as CalcParser, RowContext, UnaryOp, calculate,
89    parse_expr,
90};
91pub use context_query::{
92    ContextQueryError, ContextQueryParser, ContextQueryResult, ContextSection, ContextSelectQuery,
93    HnswVectorIndex, SectionPriority, SectionResult, SimpleVectorIndex, VectorIndex,
94    VectorIndexStats, VectorSearchResult,
95};
96pub use optimizer_integration::{
97    CacheStats, ExecutionPlan, ExecutionStep, OptimizedExecutor, OptimizedQueryPlan, PlanCache,
98    StorageBackend, TableStats,
99};
100pub use plugin_table::{
101    PluginVirtualTable, VirtualColumnDef, VirtualColumnType, VirtualFilter, VirtualRow,
102    VirtualTable, VirtualTableError, VirtualTableRegistry, VirtualTableSchema, VirtualTableStats,
103};
104pub use sql::{
105    BinaryOperator, ColumnDef as SqlColumnDef, CreateTableStmt, DeleteStmt, DropTableStmt,
106    Expr as SqlExpr, InsertStmt, JoinType, Lexer, OrderByItem as SqlOrderBy, Parser as SqlParser,
107    SelectStmt, Span, SqlError, SqlResult, Statement, Token, TokenKind, UnaryOperator, UpdateStmt,
108};
109pub use token_budget::{
110    BudgetAllocation, BudgetSection, TokenBudgetConfig, TokenBudgetEnforcer, TokenEstimator,
111    TokenEstimatorConfig, truncate_rows, truncate_to_tokens,
112};
113pub use soch_ql::{
114    ColumnDef, ColumnType, ComparisonOp, Condition, CreateTableQuery, InsertQuery, LogicalOp,
115    OrderBy, ParseError, SelectQuery, SortDirection, SochQlParser, SochQuery, SochResult,
116    SochValue, WhereClause,
117};
118pub use soch_ql_executor::{
119    KeyRange, Predicate, PredicateCondition, QueryPlan, TokenReductionStats, SochQlExecutor,
120    estimate_token_reduction, execute_sochql,
121};
122
123// Streaming Top-K for ORDER BY + LIMIT (Task: Fix ORDER BY Semantics)
124pub use topk_executor::{
125    ColumnRef, ExecutionStrategy as TopKExecutionStrategy, IndexAwareTopK, OrderByColumn, OrderByLimitExecutor,
126    OrderByLimitStats, OrderBySpec, SingleColumnTopK, SortDirection as TopKSortDirection, TopKHeap,
127};
128
129// Task 1: Streaming context generation
130pub use streaming_context::{
131    RollingBudget, SectionChunk, StreamingConfig, StreamingContextExecutor, StreamingContextIter,
132};
133
134// Task 2: Automatic embedding generation
135pub use embedding_provider::{
136    CachedEmbeddingProvider, EmbeddingError, EmbeddingProvider, EmbeddingVectorIndex,
137    MockEmbeddingProvider,
138};
139
140// Task 3: Hybrid retrieval pipeline
141pub use hybrid_retrieval::{
142    FusionMethod, HybridQuery, HybridQueryExecutor, LexicalIndex, MetadataFilter,
143};
144
145// Task 4: Temporal decay scoring
146pub use temporal_decay::{
147    DecayCurve, TemporalDecayConfig, TemporalScorer, TemporallyDecayedResult,
148};
149
150// Task 5: Memory compaction
151pub use memory_compaction::{
152    Abstraction, CompactionStats, Episode, ExtractiveSummarizer, HierarchicalMemory, Summary,
153    Summarizer,
154};
155
156// Task 6: Exact token counting
157pub use exact_token_counter::{
158    ExactBudgetEnforcer, ExactTokenCounter, HeuristicTokenCounter, TokenCounter,
159};
160
161// Task 7: Semantic triggers
162pub use semantic_triggers::{
163    EscalationLevel, EventSource, LogLevel, SemanticTrigger, TriggerAction, TriggerBuilder,
164    TriggerError, TriggerEvent, TriggerIndex, TriggerMatch, TriggerStats,
165};
166
167// ============================================================================
168// Canonical Filter IR + Pushdown Contract (mm.md Tasks 1-8)
169// ============================================================================
170
171// Task 1: Canonical Filter IR (CNF/DNF with typed atoms)
172pub use filter_ir::{
173    AuthCapabilities, AuthScope, Disjunction, FilterAtom, FilterBuilder, FilterIR, FilterValue,
174    FilteredExecutor,
175};
176
177// Task 2: Namespace-Scoped Query API (mandatory namespace)
178pub use namespace::{
179    Namespace, NamespaceError, NamespaceScope, QueryRequest, ScopedQuery,
180};
181
182// Task 3: Metadata Index Primitives (bitmap + range accessors)
183pub use metadata_index::{
184    ConcurrentMetadataIndex, EqualityIndex, MetadataIndex, PostingSet, RangeIndex,
185};
186
187// Task 4: Unified Candidate Gate Interface
188pub use candidate_gate::{
189    AllowedBitmap, AllowedSet, CandidateGate, ExecutionStrategy,
190};
191
192// Task 5: Filter-Aware Vector Search with selectivity-driven fallback
193pub use filtered_vector_search::{
194    FilterAwareSearch, FilteredSearchConfig, FilteredSearchResult, FilteredSearchStrategy,
195    FilteredVectorStore, ScoredResult,
196};
197
198// Task 6: BM25 Filter Pushdown via posting-set intersection
199pub use bm25_filtered::{
200    Bm25Params, DisjunctiveBm25Executor, FilteredBm25Executor, FilteredPhraseExecutor,
201    InvertedIndex, PositionalIndex, PositionalPosting, PostingList,
202};
203
204// Task 7: Hybrid Fusion That Never Post-Filters
205pub use unified_fusion::{
206    Bm25Executor, Bm25QuerySpec, FilteredCandidates, FusionConfig, FusionEngine,
207    FusionMethod as UnifiedFusionMethod, FusionResult, Modality, UnifiedHybridExecutor,
208    UnifiedHybridQuery, VectorExecutor, VectorQuerySpec,
209};
210
211// Task 8: Capability Tokens + ACLs
212pub use capability_token::{
213    AclTagIndex, CapabilityToken, TokenBuilder, TokenCapabilities, TokenError, TokenSigner,
214    TokenValidator,
215};