Skip to main content

sqz_engine/
lib.rs

1//! # sqz_engine
2//!
3//! The core compression engine behind sqz. Takes text — JSON, CLI output, code,
4//! logs, prose — and squeezes it down to use fewer LLM tokens while keeping the
5//! important stuff intact.
6//!
7//! ## Quick start
8//!
9//! ```rust
10//! use sqz_engine::SqzEngine;
11//!
12//! let engine = SqzEngine::new().expect("failed to init engine");
13//!
14//! // Compress some text
15//! let result = engine.compress("hello world").unwrap();
16//! println!("compressed: {}", result.data);
17//! println!("tokens: {} → {}", result.tokens_original, result.tokens_compressed);
18//!
19//! // JSON gets TOON-encoded automatically
20//! let json_result = engine.compress(r#"{"name": "Alice", "age": 30}"#).unwrap();
21//! assert!(json_result.data.starts_with("TOON:"));
22//! ```
23//!
24//! ## How it works
25//!
26//! Content flows through a multi-stage pipeline:
27//!
28//! 1. **Content routing** — the confidence router classifies input (JSON, code,
29//!    logs, prose) and picks a compression mode (safe, default, aggressive).
30//! 2. **Stage pipeline** — configurable stages run in priority order: ANSI
31//!    stripping, null removal, repeated-line condensing, git diff folding,
32//!    string truncation, array collapsing, and custom transforms.
33//! 3. **Post-processing** — RLE compression, sliding-window dedup, entropy
34//!    truncation, and token pruning for prose.
35//! 4. **TOON encoding** — JSON gets encoded into Token-Optimized Object
36//!    Notation, which drops unnecessary quotes and whitespace for 30-60%
37//!    fewer tokens.
38//! 5. **Verification** — a two-pass verifier checks that error lines, JSON
39//!    keys, and diff hunks survived compression. If confidence is low, the
40//!    engine falls back to safe mode.
41//!
42//! ## Key types
43//!
44//! - [`SqzEngine`] — top-level facade, wires everything together
45//! - [`CompressionPipeline`] — the stage-based compression orchestrator
46//! - [`CacheManager`] — SHA-256 content-hash dedup cache
47//! - [`SessionStore`] — SQLite-backed session and cache persistence
48//! - [`Preset`] — TOML-configurable compression settings
49//! - [`ToonEncoder`] — JSON → TOON lossless encoding
50//! - [`CompressedContent`] — compression result with token counts and metadata
51
52pub mod adaptive_tree;
53pub mod advanced_search;
54pub mod ansi_strip;
55pub mod api_proxy;
56pub mod ast_delta;
57pub mod ast_parser;
58pub mod benchmarks;
59pub mod bpe_compressor;
60pub mod cascade_compressor;
61pub mod cmd_formatters;
62pub mod compression_quality;
63pub mod confidence_router;
64pub mod context_evictor;
65pub mod crp_engine;
66pub mod dashboard;
67pub mod delta_encoder;
68pub mod dependency_mapper;
69pub mod dict_compressor;
70pub mod entropy_analyzer;
71pub mod entropy_truncator;
72pub mod file_reader;
73pub mod image_compressor;
74pub mod json_projection;
75pub mod kv_cache_optimizer;
76pub mod litm_positioner;
77pub mod mdl_selector;
78pub mod minhash_lsh;
79pub mod ngram_abbreviator;
80pub mod opencode_plugin;
81pub mod rle_compressor;
82pub mod simhash;
83pub mod textrank;
84pub mod token_pruner;
85pub mod tool_hooks;
86pub mod tool_selector;
87pub mod engine;
88pub mod hook_manager;
89pub mod budget_tracker;
90pub mod cache_manager;
91pub mod correction_log;
92pub mod cost_calculator;
93pub mod ctx_format;
94pub mod error;
95pub mod model_router;
96pub mod parse_tree_compressor;
97pub mod pipeline;
98pub mod pin_manager;
99pub mod plugin_api;
100pub mod preset;
101pub mod progressive_throttle;
102pub mod prompt_cache;
103pub mod regret_tracker;
104pub mod sandbox_executor;
105pub mod session_continuity;
106pub mod session_store;
107pub mod stages;
108pub mod structural_summary;
109pub mod tee_mode;
110pub mod terse_mode;
111pub mod token_counter;
112pub mod toon;
113pub mod transparency;
114pub mod types;
115pub mod url_indexer;
116pub mod verifier;
117
118pub use advanced_search::{AdvancedSearch, SearchResult};
119pub use ansi_strip::AnsiStripper;
120pub use api_proxy::{compress_request, parse_http_request, build_http_response, ApiFormat, ProxyConfig, ProxyStats};
121pub use ast_parser::{AstParser, ClassDefinition, CodeSummary, FunctionSignature, ImportDecl, TypeDeclaration};
122pub use bpe_compressor::{bpe_compress, BpeConfig, BpeResult};
123pub use compression_quality::{measure_quality, format_quality_report, CompressionQuality, QualityGrade};
124pub use confidence_router::{ConfidenceRouter, CompressionMode};
125pub use context_evictor::{evict, should_evict, ContextItem, EvictionConfig, EvictionResult};
126pub use cmd_formatters::format_command;
127pub use delta_encoder::{DeltaConfig, DeltaEncoder, DeltaResult};
128pub use dependency_mapper::DependencyMapper;
129pub use dict_compressor::{DictCompressor, DictConfig, DictCompressResult};
130pub use entropy_analyzer::{EntropyAnalyzer, InfoLevel, AnalyzedBlock};
131pub use entropy_truncator::{EntropyTruncator, EntropyTruncConfig, EntropyTruncResult, EntropyTruncArrayResult};
132pub use file_reader::{FileReadMode, FileReader, ReadResult, BlockEntropy, compute_entropy, analyze_block_entropies};
133pub use image_compressor::{ImageCompressor, ImageDescription};
134pub use json_projection::{project_json, ProjectionConfig, ProjectionResult};
135pub use litm_positioner::{ContextSection, LitmPositioner, LitmStrategy, SectionType};
136pub use ngram_abbreviator::{NgramAbbreviator, AbbreviatorConfig, AbbreviationResult};
137pub use rle_compressor::{rle_compress, sliding_window_dedup, RleResult, SlidingWindowResult};
138pub use simhash::{simhash, SimHashFingerprint};
139pub use structural_summary::{summarize as structural_summarize, summarize_multi, SummaryConfig, StructuralSummaryResult};
140pub use textrank::{textrank_compress, TextRankConfig, TextRankResult};
141pub use mdl_selector::{select_stages, profile_content, ContentProfile, MdlSelection};
142pub use tool_hooks::{process_hook, process_hook_cursor, process_hook_gemini, process_hook_windsurf, generate_hook_configs, install_tool_hooks, ToolHookConfig, HookScope, HookPlatform};
143pub use opencode_plugin::{
144    generate_opencode_plugin, install_opencode_plugin, update_opencode_config,
145    process_opencode_hook, opencode_plugin_path,
146};
147pub use token_pruner::{TokenPruner, PrunerConfig, PruneResult};
148pub use tool_selector::{ToolDefinition, ToolSelector};
149pub use budget_tracker::{
150    AgentBudget, BudgetTracker, BudgetWarning, UsagePrediction, UsageReport,
151};
152pub use cache_manager::{CacheManager, CacheResult};
153pub use correction_log::ContextWindow;
154pub use crp_engine::{CrpEngine, CrpLevel};
155pub use cost_calculator::{
156    CostBreakdown, CostCalculator, ModelPricing, PricingConfig, SessionCostSummary, TokenUsage,
157    ToolCost,
158};
159pub use ctx_format::{CtxEnvelope, CtxFormat, CtxMetadata};
160pub use error::{Result, SqzError, SourceLocation};
161pub use model_router::{ModelRouter, RoutingDecision, TaskContext};
162pub use pipeline::{CompressionPipeline, SessionContext};
163pub use pin_manager::PinManager;
164pub use plugin_api::{PluginLoader, PluginManifest, PluginSource, SqzPlugin};
165pub use prompt_cache::{CacheBoundary, Message, PromptCacheDetector};
166pub use session_store::{CompressionStats, DailyGain, SessionStore, SessionSummary};
167pub use session_continuity::{
168    SessionContinuityManager, SessionGuide, Snapshot, SnapshotEvent, SnapshotEventType,
169};
170pub use toon::ToonEncoder;
171pub use tee_mode::{TeeMode, TeeManager, TeeEntry};
172pub use terse_mode::TerseMode;
173pub use token_counter::TokenCounter;
174pub use types::*;
175pub use preset::{
176    BudgetConfig, CollapseArraysConfig, CompressionConfig, CondenseConfig, CustomTransformsConfig,
177    FlattenConfig, GitDiffFoldConfig, KeepFieldsConfig, ModelConfig, ModelPricingConfig, Preset,
178    PresetHeader, PresetMeta, PresetParser, StripFieldsConfig, StripNullsConfig, TerseLevel,
179    TerseModeConfig, ToolSelectionConfig, TruncateStringsConfig,
180};
181pub use progressive_throttle::{ProgressiveThrottler, ThrottleConfig, ThrottleLevel};
182pub use dashboard::{
183    CommandBreakdown, DashboardConfig, DashboardHtml, DashboardMetrics, DashboardServer,
184    SessionHistoryEntry, ToolBreakdown,
185};
186pub use engine::SqzEngine;
187pub use hook_manager::{
188    generate_platform_config, known_platforms, Hook, HookAction, HookContext, HookManager, HookType,
189};
190pub use sandbox_executor::{SandboxExecutor, SandboxResult, RuntimeInfo, FilteredOutput};
191pub use url_indexer::{ContentFetcher, IndexedChunk, IndexResult, UrlIndexer};
192pub use verifier::Verifier;
193
194pub use adaptive_tree::{compress_to_budget, build_tree, SemanticNode};
195pub use ast_delta::{ast_diff, encode_delta, AstDelta, AstChange, ChangeKind};
196pub use kv_cache_optimizer::{compress_with_sinks, compress_with_custom_sinks};
197pub use minhash_lsh::{MinHashLsh, MinHashSignature};
198pub use parse_tree_compressor::{compress_code, char_entropy};
199pub use cascade_compressor::{cascade_compress, CascadeLevel, CascadeThresholds, CascadeResult};
200pub use regret_tracker::{RegretTracker, RegretEvent, RegretKind, FileProfile};
201pub use transparency::{CompressionAnnotation};