Skip to main content

sqz_engine/
lib.rs

1//! # sqz_engine
2//!
3//! The core compression engine behind sqz. Takes text — JSON, CLI output, code,
4//! logs, prose — and squeezes it down to use fewer LLM tokens while keeping the
5//! important stuff intact.
6//!
7//! ## Quick start
8//!
9//! ```rust
10//! use sqz_engine::SqzEngine;
11//!
12//! let engine = SqzEngine::new().expect("failed to init engine");
13//!
14//! // Compress some text
15//! let result = engine.compress("hello world").unwrap();
16//! println!("compressed: {}", result.data);
17//! println!("tokens: {} → {}", result.tokens_original, result.tokens_compressed);
18//!
19//! // JSON gets TOON-encoded automatically
20//! let json_result = engine.compress(r#"{"name": "Alice", "age": 30}"#).unwrap();
21//! assert!(json_result.data.starts_with("TOON:"));
22//! ```
23//!
24//! ## How it works
25//!
26//! Content flows through a multi-stage pipeline:
27//!
28//! 1. **Content routing** — the confidence router classifies input (JSON, code,
29//!    logs, prose) and picks a compression mode (safe, default, aggressive).
30//! 2. **Stage pipeline** — configurable stages run in priority order: ANSI
31//!    stripping, null removal, repeated-line condensing, git diff folding,
32//!    string truncation, array collapsing, and custom transforms.
33//! 3. **Post-processing** — RLE compression, sliding-window dedup, entropy
34//!    truncation, and token pruning for prose.
35//! 4. **TOON encoding** — JSON gets encoded into Token-Optimized Object
36//!    Notation, which drops unnecessary quotes and whitespace for 30-60%
37//!    fewer tokens.
38//! 5. **Verification** — a two-pass verifier checks that error lines, JSON
39//!    keys, and diff hunks survived compression. If confidence is low, the
40//!    engine falls back to safe mode.
41//!
42//! ## Key types
43//!
44//! - [`SqzEngine`] — top-level facade, wires everything together
45//! - [`CompressionPipeline`] — the stage-based compression orchestrator
46//! - [`CacheManager`] — SHA-256 content-hash dedup cache
47//! - [`SessionStore`] — SQLite-backed session and cache persistence
48//! - [`Preset`] — TOML-configurable compression settings
49//! - [`ToonEncoder`] — JSON → TOON lossless encoding
50//! - [`CompressedContent`] — compression result with token counts and metadata
51
52pub mod adaptive_tree;
53pub mod advanced_search;
54pub mod ansi_strip;
55pub mod api_proxy;
56pub mod ast_delta;
57pub mod ast_parser;
58pub mod benchmarks;
59pub mod bpe_compressor;
60pub mod cascade_compressor;
61pub mod cmd_formatters;
62pub mod codex_integration;
63pub mod compression_quality;
64pub mod confidence_router;
65pub mod context_evictor;
66pub mod crp_engine;
67pub mod dashboard;
68pub mod delta_encoder;
69pub mod dependency_mapper;
70pub mod dict_compressor;
71pub mod entropy_analyzer;
72pub mod entropy_truncator;
73pub mod file_reader;
74pub mod image_compressor;
75pub mod json_projection;
76pub mod kv_cache_optimizer;
77pub mod litm_positioner;
78pub mod mdl_selector;
79pub mod minhash_lsh;
80pub mod ngram_abbreviator;
81pub mod opencode_plugin;
82pub mod rle_compressor;
83pub mod simhash;
84pub mod textrank;
85pub mod token_pruner;
86pub mod tool_hooks;
87pub mod tool_selector;
88pub mod engine;
89pub mod hook_manager;
90pub mod budget_tracker;
91pub mod cache_manager;
92pub mod correction_log;
93pub mod cost_calculator;
94pub mod ctx_format;
95pub mod error;
96pub mod model_router;
97pub mod parse_tree_compressor;
98pub mod pipeline;
99pub mod pin_manager;
100pub mod plugin_api;
101pub mod preset;
102pub mod progressive_throttle;
103pub mod prompt_cache;
104pub mod regret_tracker;
105pub mod sandbox_executor;
106pub mod session_continuity;
107pub mod session_store;
108pub mod stages;
109pub mod structural_summary;
110pub mod tee_mode;
111pub mod terse_mode;
112pub mod token_counter;
113pub mod toon;
114pub mod transparency;
115pub mod types;
116pub mod url_indexer;
117pub mod verifier;
118
119pub use advanced_search::{AdvancedSearch, SearchResult};
120pub use ansi_strip::AnsiStripper;
121pub use api_proxy::{compress_request, parse_http_request, build_http_response, ApiFormat, ProxyConfig, ProxyStats};
122pub use ast_parser::{AstParser, ClassDefinition, CodeSummary, FunctionSignature, ImportDecl, TypeDeclaration};
123pub use bpe_compressor::{bpe_compress, BpeConfig, BpeResult};
124pub use compression_quality::{measure_quality, format_quality_report, CompressionQuality, QualityGrade};
125pub use confidence_router::{ConfidenceRouter, CompressionMode};
126pub use context_evictor::{evict, should_evict, ContextItem, EvictionConfig, EvictionResult};
127pub use cmd_formatters::format_command;
128pub use delta_encoder::{DeltaConfig, DeltaEncoder, DeltaResult};
129pub use dependency_mapper::DependencyMapper;
130pub use dict_compressor::{DictCompressor, DictConfig, DictCompressResult};
131pub use entropy_analyzer::{EntropyAnalyzer, InfoLevel, AnalyzedBlock};
132pub use entropy_truncator::{EntropyTruncator, EntropyTruncConfig, EntropyTruncResult, EntropyTruncArrayResult};
133pub use file_reader::{FileReadMode, FileReader, ReadResult, BlockEntropy, compute_entropy, analyze_block_entropies};
134pub use image_compressor::{ImageCompressor, ImageDescription};
135pub use json_projection::{project_json, ProjectionConfig, ProjectionResult};
136pub use litm_positioner::{ContextSection, LitmPositioner, LitmStrategy, SectionType};
137pub use ngram_abbreviator::{NgramAbbreviator, AbbreviatorConfig, AbbreviationResult};
138pub use rle_compressor::{rle_compress, sliding_window_dedup, RleResult, SlidingWindowResult};
139pub use simhash::{simhash, SimHashFingerprint};
140pub use structural_summary::{summarize as structural_summarize, summarize_multi, SummaryConfig, StructuralSummaryResult};
141pub use textrank::{textrank_compress, TextRankConfig, TextRankResult};
142pub use mdl_selector::{select_stages, profile_content, ContentProfile, MdlSelection};
143pub use tool_hooks::{process_hook, process_hook_cursor, process_hook_gemini, process_hook_windsurf, generate_hook_configs, install_tool_hooks, install_tool_hooks_scoped, claude_user_settings_path, remove_claude_global_hook, InstallScope, ToolHookConfig, HookScope, HookPlatform};
144pub use opencode_plugin::{
145    generate_opencode_plugin, install_opencode_plugin, update_opencode_config,
146    update_opencode_config_detailed, find_opencode_config,
147    opencode_config_has_comments,
148    remove_sqz_from_opencode_config, strip_jsonc_comments,
149    process_opencode_hook, opencode_plugin_path,
150};
151pub use codex_integration::{
152    agents_md_guidance_block, agents_md_path,
153    codex_config_path,
154    install_agents_md_guidance, install_codex_mcp_config,
155    remove_agents_md_guidance, remove_codex_mcp_config,
156};
157pub use token_pruner::{TokenPruner, PrunerConfig, PruneResult};
158pub use tool_selector::{ToolDefinition, ToolSelector};
159pub use budget_tracker::{
160    AgentBudget, BudgetTracker, BudgetWarning, UsagePrediction, UsageReport,
161};
162pub use cache_manager::{CacheManager, CacheResult};
163pub use correction_log::ContextWindow;
164pub use crp_engine::{CrpEngine, CrpLevel};
165pub use cost_calculator::{
166    CostBreakdown, CostCalculator, ModelPricing, PricingConfig, SessionCostSummary, TokenUsage,
167    ToolCost,
168};
169pub use ctx_format::{CtxEnvelope, CtxFormat, CtxMetadata};
170pub use error::{Result, SqzError, SourceLocation};
171pub use model_router::{ModelRouter, RoutingDecision, TaskContext};
172pub use pipeline::{CompressionPipeline, SessionContext};
173pub use pin_manager::PinManager;
174pub use plugin_api::{PluginLoader, PluginManifest, PluginSource, SqzPlugin};
175pub use prompt_cache::{CacheBoundary, Message, PromptCacheDetector};
176pub use session_store::{CompressionStats, DailyGain, SessionStore, SessionSummary};
177pub use session_continuity::{
178    SessionContinuityManager, SessionGuide, Snapshot, SnapshotEvent, SnapshotEventType,
179};
180pub use toon::ToonEncoder;
181pub use tee_mode::{TeeMode, TeeManager, TeeEntry};
182pub use terse_mode::TerseMode;
183pub use token_counter::TokenCounter;
184pub use types::*;
185pub use preset::{
186    BudgetConfig, CollapseArraysConfig, CompressionConfig, CondenseConfig, CustomTransformsConfig,
187    FlattenConfig, GitDiffFoldConfig, KeepFieldsConfig, ModelConfig, ModelPricingConfig, Preset,
188    PresetHeader, PresetMeta, PresetParser, StripFieldsConfig, StripNullsConfig, TerseLevel,
189    TerseModeConfig, ToolSelectionConfig, TruncateStringsConfig,
190};
191pub use progressive_throttle::{ProgressiveThrottler, ThrottleConfig, ThrottleLevel};
192pub use dashboard::{
193    CommandBreakdown, DashboardConfig, DashboardHtml, DashboardMetrics, DashboardServer,
194    SessionHistoryEntry, ToolBreakdown,
195};
196pub use engine::SqzEngine;
197pub use hook_manager::{
198    generate_platform_config, known_platforms, Hook, HookAction, HookContext, HookManager, HookType,
199};
200pub use sandbox_executor::{SandboxExecutor, SandboxResult, RuntimeInfo, FilteredOutput};
201pub use url_indexer::{ContentFetcher, IndexedChunk, IndexResult, UrlIndexer};
202pub use verifier::Verifier;
203
204pub use adaptive_tree::{compress_to_budget, build_tree, SemanticNode};
205pub use ast_delta::{ast_diff, encode_delta, AstDelta, AstChange, ChangeKind};
206pub use kv_cache_optimizer::{compress_with_sinks, compress_with_custom_sinks};
207pub use minhash_lsh::{MinHashLsh, MinHashSignature};
208pub use parse_tree_compressor::{compress_code, char_entropy};
209pub use cascade_compressor::{cascade_compress, CascadeLevel, CascadeThresholds, CascadeResult};
210pub use regret_tracker::{RegretTracker, RegretEvent, RegretKind, FileProfile};
211pub use transparency::{CompressionAnnotation};