sochdb-query 2.0.5

SochDB query engine (sync-first execution and vector query planning)
Documentation
// SPDX-License-Identifier: AGPL-3.0-or-later
// SochDB - LLM-Optimized Embedded Database
// Copyright (C) 2026 Sushanth Reddy Vanagala (https://github.com/sushanthpy)
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <https://www.gnu.org/licenses/>.

//! SochDB Query Engine
//!
//! SOCH-QL query language for TOON-native data.
//!
//! ## SOCH-QL
//!
//! SQL-like query language that returns results in TOON format:
//!
//! ```text
//! SELECT id,name FROM users WHERE score > 80
//! → users[2]{id,name}:
//!   1,Alice
//!   3,Charlie
//! ```
//!
//! ## Query Execution Pipeline (Task 6)
//!
//! ```text
//! parse(sql) → SochQuery → validate → plan → execute → SochTable
//! ```
//!
//! Token reduction: 40-60% vs JSON (66% for typical queries)
//!
//! ## CONTEXT SELECT (LLM-Native)
//!
//! Priority-based context aggregation for LLM consumption:
//!
//! ```text
//! CONTEXT SELECT
//!   FROM session('abc123')
//!   WITH TOKEN_LIMIT 4000
//!   SECTIONS (
//!     USER {query, preferences} PRIORITY 1,
//!     HISTORY {recent} PRIORITY 2,
//!     KNOWLEDGE {docs} PRIORITY 3
//!   )
//! ```

pub mod agent_context;
pub mod bm25_filtered; // Task 6: BM25 filter pushdown via posting-set intersection
pub mod calc;
pub mod candidate_gate; // Task 4: Unified candidate gate interface
pub mod capability_token; // Task 8: Capability tokens + ACLs
pub mod context_compiler;
pub mod context_query;
pub mod cost_optimizer; // Cost-based query optimizer (Task 6)
pub mod embedding_provider; // Task 2: Automatic embedding generation
pub mod exact_token_counter; // Task 6: BPE-accurate token counting
#[cfg(feature = "experimental")]
pub mod executor; // v1.0 Volcano query executor [quarantined: unwired, not on live SQL path]
pub mod filter_ir; // Task 1: Canonical Filter IR (CNF/DNF)
pub mod filtered_vector_search; // Task 5: Filter-aware vector search with selectivity fallback
pub mod grep_executor; // Task 5: Grep lane (trigram-accelerated regex search)
pub mod like; // Canonical SQL LIKE matcher (single source of truth across all query paths)
pub mod memory_compaction; // Task 5: Hierarchical memory compaction
pub mod metadata_index; // Task 3: Metadata index primitives (bitmap + range)
pub mod namespace; // Task 2: Namespace-scoped query API
pub mod optimizer_integration;
pub mod plugin_table;
pub mod query_optimizer;
pub mod semantic_triggers; // Task 7: Vector percolator triggers
pub mod simd_filter; // SIMD vectorized query filters (mm.md Task 5.3)
pub mod soch_ql;
pub mod soch_ql_executor;
pub mod sql; // SQL-92 compatible query engine with SochDB extensions
pub mod storage_bridge; // Phase 0: Wire SQL execution to real storage
pub mod streaming_context; // Task 1: Streaming context generation
pub mod temporal_decay; // Task 4: Recency-biased scoring
pub mod token_budget;
pub mod topk_executor; // Streaming Top-K for ORDER BY + LIMIT (Task: Fix ORDER BY Semantics)
pub mod trigram_index; // Task 5: Trigram candidate index for the grep lane
pub mod unified_fusion; // Task 7: Hybrid fusion that never post-filters

pub use agent_context::{
    AgentContext, AgentPermissions, AuditEntry, AuditOperation, AuditResult, ContextError,
    ContextValue, DbPermissions, FsPermissions, OperationBudget, PendingWrite, ResourceType,
    SessionId, SessionManager, TransactionScope,
};
pub use calc::{
    BinaryOp, CalcError, Evaluator, Expr, Parser as CalcParser, RowContext, UnaryOp, calculate,
    parse_expr,
};
pub use context_compiler::{
    CompiledContext, CompiledFact, ContextCandidate, ContextCompiler, ContextSpec, ContextTemplate,
};

pub use context_query::{
    ContextQueryError, ContextQueryParser, ContextQueryResult, ContextSection, ContextSelectQuery,
    HnswVectorIndex, SectionPriority, SectionResult, SimpleVectorIndex, VectorIndex,
    VectorIndexStats, VectorSearchResult,
};
pub use optimizer_integration::{
    CacheStats, ExecutionPlan, ExecutionStep, OptimizedExecutor, OptimizedQueryPlan, PlanCache,
    StorageBackend, TableStats,
};
pub use plugin_table::{
    PluginVirtualTable, VirtualColumnDef, VirtualColumnType, VirtualFilter, VirtualRow,
    VirtualTable, VirtualTableError, VirtualTableRegistry, VirtualTableSchema, VirtualTableStats,
};
pub use soch_ql::{
    ColumnDef, ColumnType, ComparisonOp, Condition, CreateTableQuery, InsertQuery, LogicalOp,
    OrderBy, ParseError, SelectQuery, SochQlParser, SochQuery, SochResult, SochValue,
    SortDirection, WhereClause,
};
pub use soch_ql_executor::{
    KeyRange, Predicate, PredicateCondition, QueryPlan, SochQlExecutor, TokenReductionStats,
    estimate_token_reduction, execute_sochql,
};
pub use sql::{
    BinaryOperator, ColumnDef as SqlColumnDef, CreateTableStmt, DeleteStmt, DropTableStmt,
    Expr as SqlExpr, InsertStmt, JoinType, Lexer, OrderByItem as SqlOrderBy, Parser as SqlParser,
    SelectStmt, Span, SqlError, SqlResult, Statement, Token, TokenKind, UnaryOperator, UpdateStmt,
};
pub use storage_bridge::{
    DatabaseSqlConnection, DatabaseStorageBackend, convert_core_to_query, convert_query_to_core,
};
pub use token_budget::{
    BudgetAllocation, BudgetSection, TokenBudgetConfig, TokenBudgetEnforcer, TokenEstimator,
    TokenEstimatorConfig, truncate_rows, truncate_to_tokens,
};

// v1.0 Volcano query executor [quarantined behind `experimental`: not on live SQL path]
#[cfg(feature = "experimental")]
pub use executor::{
    ColumnMeta, ExecutorConfig, ExplainNode, FilterNode, HashAggregateNode, HashJoinNode,
    IndexSeekNode, LimitNode, MergeJoinNode, NestedLoopJoinNode, PlanNode, ProjectNode,
    QueryPlanner, Row, Schema, SeqScanNode, SortNode, execute_sql, execute_statement,
};

// Streaming Top-K for ORDER BY + LIMIT (Task: Fix ORDER BY Semantics)
pub use topk_executor::{
    ColumnRef, ExecutionStrategy as TopKExecutionStrategy, IndexAwareTopK, OrderByColumn,
    OrderByLimitExecutor, OrderByLimitStats, OrderBySpec, SingleColumnTopK,
    SortDirection as TopKSortDirection, TopKHeap,
};

// Task 1: Streaming context generation
pub use streaming_context::{
    RollingBudget, SectionChunk, StreamingConfig, StreamingContextExecutor, StreamingContextIter,
};

// Task 2: Automatic embedding generation
pub use embedding_provider::{
    CachedEmbeddingProvider, EmbeddingError, EmbeddingProvider, EmbeddingVectorIndex,
    MockEmbeddingProvider,
};

// Task 4: Temporal decay scoring
pub use temporal_decay::{
    DecayCurve, TemporalDecayConfig, TemporalScorer, TemporallyDecayedResult,
};

// Task 5: Memory compaction
pub use memory_compaction::{
    Abstraction, CompactionStats, Episode, ExtractiveSummarizer, HierarchicalMemory, Summarizer,
    Summary,
};

// Task 6: Exact token counting
pub use exact_token_counter::{
    ExactBudgetEnforcer, ExactTokenCounter, HeuristicTokenCounter, TokenCounter,
    count_tokens_exact, count_tokens_heuristic,
};

// Task 7: Semantic triggers
pub use semantic_triggers::{
    EscalationLevel, EventSource, LogLevel, SemanticTrigger, TriggerAction, TriggerBuilder,
    TriggerError, TriggerEvent, TriggerIndex, TriggerMatch, TriggerStats,
};

// ============================================================================
// Canonical Filter IR + Pushdown Contract (mm.md Tasks 1-8)
// ============================================================================

// Task 1: Canonical Filter IR (CNF/DNF with typed atoms)
pub use filter_ir::{
    AuthCapabilities, AuthScope, Disjunction, FilterAtom, FilterBuilder, FilterIR, FilterValue,
    FilteredExecutor,
};

// Task 2: Namespace-Scoped Query API (mandatory namespace)
pub use namespace::{Namespace, NamespaceError, NamespaceScope, QueryRequest, ScopedQuery};

// Task 3: Metadata Index Primitives (bitmap + range accessors)
pub use metadata_index::{
    ConcurrentMetadataIndex, EqualityIndex, MetadataIndex, PostingSet, RangeIndex,
};

// Task 4: Unified Candidate Gate Interface
pub use candidate_gate::{AllowedBitmap, AllowedSet, CandidateGate, ExecutionStrategy};

// Task 5: Filter-Aware Vector Search with selectivity-driven fallback
pub use filtered_vector_search::{
    FilterAwareSearch, FilteredSearchConfig, FilteredSearchResult, FilteredSearchStrategy,
    FilteredVectorStore, ScoredResult,
};

// Task 6: BM25 Filter Pushdown via posting-set intersection
pub use bm25_filtered::{
    Bm25Params, DisjunctiveBm25Executor, FilteredBm25Executor, FilteredPhraseExecutor,
    InvertedIndex, PositionalIndex, PositionalPosting, PostingList,
};

// Task 7: Hybrid Fusion That Never Post-Filters
pub use unified_fusion::{
    Bm25Executor, Bm25QuerySpec, DocId, FilteredCandidates, FusionConfig, FusionEngine,
    FusionMethod as UnifiedFusionMethod, FusionResult, GrepLaneExecutor, GrepQuerySpec, Modality,
    RankedList, UnifiedHybridExecutor, UnifiedHybridQuery, VectorExecutor, VectorQuerySpec,
    WeightedLane, fuse_rrf_weighted,
};

// Task 5: Grep lane (trigram-accelerated regex search)
pub use grep_executor::{
    DEFAULT_MAX_SCAN, GrepError, GrepExecutor, GrepHit, GrepMode, GrepResults, required_trigrams,
};
pub use trigram_index::{Trigram, TrigramIndex, trigrams_of};

// Task 8: Capability Tokens + ACLs
pub use capability_token::{
    AclTagIndex, CapabilityToken, TokenBuilder, TokenCapabilities, TokenError, TokenSigner,
    TokenValidator,
};