Skip to main content

sqz_engine/
lib.rs

1//! # sqz_engine
2//!
3//! The core compression engine behind sqz. Takes text — JSON, CLI output, code,
4//! logs, prose — and squeezes it down to use fewer LLM tokens while keeping the
5//! important stuff intact.
6//!
7//! ## Quick start
8//!
9//! ```rust
10//! use sqz_engine::SqzEngine;
11//!
12//! let engine = SqzEngine::new().expect("failed to init engine");
13//!
14//! // Compress some text
15//! let result = engine.compress("hello world").unwrap();
16//! println!("compressed: {}", result.data);
17//! println!("tokens: {} → {}", result.tokens_original, result.tokens_compressed);
18//!
19//! // JSON gets TOON-encoded automatically
20//! let json_result = engine.compress(r#"{"name": "Alice", "age": 30}"#).unwrap();
21//! assert!(json_result.data.starts_with("TOON:"));
22//! ```
23//!
24//! ## How it works
25//!
26//! Content flows through a multi-stage pipeline:
27//!
28//! 1. **Content routing** — the confidence router classifies input (JSON, code,
29//!    logs, prose) and picks a compression mode (safe, default, aggressive).
30//! 2. **Stage pipeline** — configurable stages run in priority order: ANSI
31//!    stripping, null removal, repeated-line condensing, git diff folding,
32//!    string truncation, array collapsing, and custom transforms.
33//! 3. **Post-processing** — RLE compression, sliding-window dedup, entropy
34//!    truncation, and token pruning for prose.
35//! 4. **TOON encoding** — JSON gets encoded into Token-Optimized Object
36//!    Notation, which drops unnecessary quotes and whitespace for 30-60%
37//!    fewer tokens.
38//! 5. **Verification** — a two-pass verifier checks that error lines, JSON
39//!    keys, and diff hunks survived compression. If confidence is low, the
40//!    engine falls back to safe mode.
41//!
42//! ## Key types
43//!
44//! - [`SqzEngine`] — top-level facade, wires everything together
45//! - [`CompressionPipeline`] — the stage-based compression orchestrator
46//! - [`CacheManager`] — SHA-256 content-hash dedup cache
47//! - [`SessionStore`] — SQLite-backed session and cache persistence
48//! - [`Preset`] — TOML-configurable compression settings
49//! - [`ToonEncoder`] — JSON → TOON lossless encoding
50//! - [`CompressedContent`] — compression result with token counts and metadata
51
52pub mod adaptive_tree;
53pub mod advanced_search;
54pub mod ansi_strip;
55pub mod api_proxy;
56pub mod ast_delta;
57pub mod ast_parser;
58pub mod benchmarks;
59pub mod bpe_compressor;
60pub mod cascade_compressor;
61pub mod cmd_formatters;
62pub mod compression_quality;
63pub mod confidence_router;
64pub mod context_evictor;
65pub mod crp_engine;
66pub mod dashboard;
67pub mod delta_encoder;
68pub mod dependency_mapper;
69pub mod dict_compressor;
70pub mod entropy_analyzer;
71pub mod entropy_truncator;
72pub mod file_reader;
73pub mod image_compressor;
74pub mod json_projection;
75pub mod kv_cache_optimizer;
76pub mod litm_positioner;
77pub mod mdl_selector;
78pub mod minhash_lsh;
79pub mod ngram_abbreviator;
80pub mod opencode_plugin;
81pub mod rle_compressor;
82pub mod simhash;
83pub mod textrank;
84pub mod token_pruner;
85pub mod tool_hooks;
86pub mod tool_selector;
87pub mod engine;
88pub mod hook_manager;
89pub mod budget_tracker;
90pub mod cache_manager;
91pub mod correction_log;
92pub mod cost_calculator;
93pub mod ctx_format;
94pub mod error;
95pub mod model_router;
96pub mod parse_tree_compressor;
97pub mod pipeline;
98pub mod pin_manager;
99pub mod plugin_api;
100pub mod preset;
101pub mod progressive_throttle;
102pub mod prompt_cache;
103pub mod regret_tracker;
104pub mod sandbox_executor;
105pub mod session_continuity;
106pub mod session_store;
107pub mod stages;
108pub mod tee_mode;
109pub mod terse_mode;
110pub mod token_counter;
111pub mod toon;
112pub mod transparency;
113pub mod types;
114pub mod url_indexer;
115pub mod verifier;
116
117pub use advanced_search::{AdvancedSearch, SearchResult};
118pub use ansi_strip::AnsiStripper;
119pub use api_proxy::{compress_request, parse_http_request, build_http_response, ApiFormat, ProxyConfig, ProxyStats};
120pub use ast_parser::{AstParser, ClassDefinition, CodeSummary, FunctionSignature, ImportDecl, TypeDeclaration};
121pub use bpe_compressor::{bpe_compress, BpeConfig, BpeResult};
122pub use compression_quality::{measure_quality, format_quality_report, CompressionQuality, QualityGrade};
123pub use confidence_router::{ConfidenceRouter, CompressionMode};
124pub use context_evictor::{evict, should_evict, ContextItem, EvictionConfig, EvictionResult};
125pub use cmd_formatters::format_command;
126pub use delta_encoder::{DeltaConfig, DeltaEncoder, DeltaResult};
127pub use dependency_mapper::DependencyMapper;
128pub use dict_compressor::{DictCompressor, DictConfig, DictCompressResult};
129pub use entropy_analyzer::{EntropyAnalyzer, InfoLevel, AnalyzedBlock};
130pub use entropy_truncator::{EntropyTruncator, EntropyTruncConfig, EntropyTruncResult, EntropyTruncArrayResult};
131pub use file_reader::{FileReadMode, FileReader, ReadResult, BlockEntropy, compute_entropy, analyze_block_entropies};
132pub use image_compressor::{ImageCompressor, ImageDescription};
133pub use json_projection::{project_json, ProjectionConfig, ProjectionResult};
134pub use litm_positioner::{ContextSection, LitmPositioner, LitmStrategy, SectionType};
135pub use ngram_abbreviator::{NgramAbbreviator, AbbreviatorConfig, AbbreviationResult};
136pub use rle_compressor::{rle_compress, sliding_window_dedup, RleResult, SlidingWindowResult};
137pub use simhash::{simhash, SimHashFingerprint};
138pub use textrank::{textrank_compress, TextRankConfig, TextRankResult};
139pub use mdl_selector::{select_stages, profile_content, ContentProfile, MdlSelection};
140pub use tool_hooks::{process_hook, generate_hook_configs, install_tool_hooks, ToolHookConfig, HookScope};
141pub use opencode_plugin::{
142    generate_opencode_plugin, install_opencode_plugin, update_opencode_config,
143    process_opencode_hook, opencode_plugin_path,
144};
145pub use token_pruner::{TokenPruner, PrunerConfig, PruneResult};
146pub use tool_selector::{ToolDefinition, ToolSelector};
147pub use budget_tracker::{
148    AgentBudget, BudgetTracker, BudgetWarning, UsagePrediction, UsageReport,
149};
150pub use cache_manager::{CacheManager, CacheResult};
151pub use correction_log::ContextWindow;
152pub use crp_engine::{CrpEngine, CrpLevel};
153pub use cost_calculator::{
154    CostBreakdown, CostCalculator, ModelPricing, PricingConfig, SessionCostSummary, TokenUsage,
155    ToolCost,
156};
157pub use ctx_format::{CtxEnvelope, CtxFormat, CtxMetadata};
158pub use error::{Result, SqzError, SourceLocation};
159pub use model_router::{ModelRouter, RoutingDecision, TaskContext};
160pub use pipeline::{CompressionPipeline, SessionContext};
161pub use pin_manager::PinManager;
162pub use plugin_api::{PluginLoader, PluginManifest, PluginSource, SqzPlugin};
163pub use prompt_cache::{CacheBoundary, Message, PromptCacheDetector};
164pub use session_store::{CompressionStats, DailyGain, SessionStore, SessionSummary};
165pub use session_continuity::{
166    SessionContinuityManager, SessionGuide, Snapshot, SnapshotEvent, SnapshotEventType,
167};
168pub use toon::ToonEncoder;
169pub use tee_mode::{TeeMode, TeeManager, TeeEntry};
170pub use terse_mode::TerseMode;
171pub use token_counter::TokenCounter;
172pub use types::*;
173pub use preset::{
174    BudgetConfig, CollapseArraysConfig, CompressionConfig, CondenseConfig, CustomTransformsConfig,
175    FlattenConfig, GitDiffFoldConfig, KeepFieldsConfig, ModelConfig, ModelPricingConfig, Preset,
176    PresetHeader, PresetMeta, PresetParser, StripFieldsConfig, StripNullsConfig, TerseLevel,
177    TerseModeConfig, ToolSelectionConfig, TruncateStringsConfig,
178};
179pub use progressive_throttle::{ProgressiveThrottler, ThrottleConfig, ThrottleLevel};
180pub use dashboard::{
181    CommandBreakdown, DashboardConfig, DashboardHtml, DashboardMetrics, DashboardServer,
182    SessionHistoryEntry, ToolBreakdown,
183};
184pub use engine::SqzEngine;
185pub use hook_manager::{
186    generate_platform_config, known_platforms, Hook, HookAction, HookContext, HookManager, HookType,
187};
188pub use sandbox_executor::{SandboxExecutor, SandboxResult, RuntimeInfo, FilteredOutput};
189pub use url_indexer::{ContentFetcher, IndexedChunk, IndexResult, UrlIndexer};
190pub use verifier::Verifier;
191
192pub use adaptive_tree::{compress_to_budget, build_tree, SemanticNode};
193pub use ast_delta::{ast_diff, encode_delta, AstDelta, AstChange, ChangeKind};
194pub use kv_cache_optimizer::{compress_with_sinks, compress_with_custom_sinks};
195pub use minhash_lsh::{MinHashLsh, MinHashSignature};
196pub use parse_tree_compressor::{compress_code, char_entropy};
197pub use cascade_compressor::{cascade_compress, CascadeLevel, CascadeThresholds, CascadeResult};
198pub use regret_tracker::{RegretTracker, RegretEvent, RegretKind, FileProfile};
199pub use transparency::{CompressionAnnotation};