Skip to main content

sqz_engine/
lib.rs

1//! # sqz_engine
2//!
3//! The core compression engine behind sqz. Takes text — JSON, CLI output, code,
4//! logs, prose — and squeezes it down to use fewer LLM tokens while keeping the
5//! important stuff intact.
6//!
7//! ## Quick start
8//!
9//! ```rust
10//! use sqz_engine::SqzEngine;
11//!
12//! let engine = SqzEngine::new().expect("failed to init engine");
13//!
14//! // Compress some text
15//! let result = engine.compress("hello world").unwrap();
16//! println!("compressed: {}", result.data);
17//! println!("tokens: {} → {}", result.tokens_original, result.tokens_compressed);
18//!
19//! // JSON gets TOON-encoded automatically
20//! let json_result = engine.compress(r#"{"name": "Alice", "age": 30}"#).unwrap();
21//! assert!(json_result.data.starts_with("TOON:"));
22//! ```
23//!
24//! ## How it works
25//!
26//! Content flows through a multi-stage pipeline:
27//!
28//! 1. **Content routing** — the confidence router classifies input (JSON, code,
29//!    logs, prose) and picks a compression mode (safe, default, aggressive).
30//! 2. **Stage pipeline** — configurable stages run in priority order: ANSI
31//!    stripping, null removal, repeated-line condensing, git diff folding,
32//!    string truncation, array collapsing, and custom transforms.
33//! 3. **Post-processing** — RLE compression, sliding-window dedup, entropy
34//!    truncation, and token pruning for prose.
35//! 4. **TOON encoding** — JSON gets encoded into Token-Optimized Object
36//!    Notation, which drops unnecessary quotes and whitespace for 30-60%
37//!    fewer tokens.
38//! 5. **Verification** — a two-pass verifier checks that error lines, JSON
39//!    keys, and diff hunks survived compression. If confidence is low, the
40//!    engine falls back to safe mode.
41//!
42//! ## Key types
43//!
44//! - [`SqzEngine`] — top-level facade, wires everything together
45//! - [`CompressionPipeline`] — the stage-based compression orchestrator
46//! - [`CacheManager`] — SHA-256 content-hash dedup cache
47//! - [`SessionStore`] — SQLite-backed session and cache persistence
48//! - [`Preset`] — TOML-configurable compression settings
49//! - [`ToonEncoder`] — JSON → TOON lossless encoding
50//! - [`CompressedContent`] — compression result with token counts and metadata
51
52pub mod adaptive_tree;
53pub mod advanced_search;
54pub mod ansi_strip;
55pub mod api_proxy;
56pub mod ast_delta;
57pub mod ast_parser;
58pub mod benchmarks;
59pub mod bpe_compressor;
60pub mod cascade_compressor;
61pub mod claude_md_integration;
62pub mod cmd_formatters;
63pub mod codex_integration;
64pub mod compression_quality;
65pub mod confidence_router;
66pub mod context_evictor;
67pub mod crp_engine;
68pub mod dashboard;
69pub mod delta_encoder;
70pub mod dependency_mapper;
71pub mod dict_compressor;
72pub mod entropy_analyzer;
73pub mod entropy_truncator;
74pub mod file_reader;
75pub mod image_compressor;
76pub mod json_projection;
77pub mod kv_cache_optimizer;
78pub mod litm_positioner;
79pub mod mdl_selector;
80pub mod minhash_lsh;
81pub mod ngram_abbreviator;
82pub mod opencode_plugin;
83pub mod rle_compressor;
84pub mod simhash;
85pub mod textrank;
86pub mod token_pruner;
87pub mod tool_hooks;
88pub mod tool_selector;
89pub mod engine;
90pub mod hook_manager;
91pub mod budget_tracker;
92pub mod cache_manager;
93pub mod correction_log;
94pub mod cost_calculator;
95pub mod ctx_format;
96pub mod error;
97pub mod model_router;
98pub mod parse_tree_compressor;
99pub mod pipeline;
100pub mod pin_manager;
101pub mod plugin_api;
102pub mod preset;
103pub mod progressive_throttle;
104pub mod prompt_cache;
105pub mod regret_tracker;
106pub mod sandbox_executor;
107pub mod session_continuity;
108pub mod session_store;
109pub mod stages;
110pub mod structural_summary;
111pub mod tee_mode;
112pub mod terse_mode;
113pub mod token_counter;
114pub mod toon;
115pub mod transparency;
116pub mod types;
117pub mod url_indexer;
118pub mod verifier;
119
120pub use advanced_search::{AdvancedSearch, SearchResult};
121pub use ansi_strip::AnsiStripper;
122pub use api_proxy::{compress_request, parse_http_request, build_http_response, ApiFormat, ProxyConfig, ProxyStats};
123pub use ast_parser::{AstParser, ClassDefinition, CodeSummary, FunctionSignature, ImportDecl, TypeDeclaration};
124pub use bpe_compressor::{bpe_compress, BpeConfig, BpeResult};
125pub use compression_quality::{measure_quality, format_quality_report, CompressionQuality, QualityGrade};
126pub use confidence_router::{ConfidenceRouter, CompressionMode};
127pub use context_evictor::{evict, should_evict, ContextItem, EvictionConfig, EvictionResult};
128pub use cmd_formatters::format_command;
129pub use delta_encoder::{DeltaConfig, DeltaEncoder, DeltaResult};
130pub use dependency_mapper::DependencyMapper;
131pub use dict_compressor::{DictCompressor, DictConfig, DictCompressResult};
132pub use entropy_analyzer::{EntropyAnalyzer, InfoLevel, AnalyzedBlock};
133pub use entropy_truncator::{EntropyTruncator, EntropyTruncConfig, EntropyTruncResult, EntropyTruncArrayResult};
134pub use file_reader::{FileReadMode, FileReader, ReadResult, BlockEntropy, compute_entropy, analyze_block_entropies};
135pub use image_compressor::{ImageCompressor, ImageDescription};
136pub use json_projection::{project_json, ProjectionConfig, ProjectionResult};
137pub use litm_positioner::{ContextSection, LitmPositioner, LitmStrategy, SectionType};
138pub use ngram_abbreviator::{NgramAbbreviator, AbbreviatorConfig, AbbreviationResult};
139pub use rle_compressor::{rle_compress, sliding_window_dedup, RleResult, SlidingWindowResult};
140pub use simhash::{simhash, SimHashFingerprint};
141pub use structural_summary::{summarize as structural_summarize, summarize_multi, SummaryConfig, StructuralSummaryResult};
142pub use textrank::{textrank_compress, TextRankConfig, TextRankResult};
143pub use mdl_selector::{select_stages, profile_content, ContentProfile, MdlSelection};
144pub use tool_hooks::{process_hook, process_hook_cursor, process_hook_gemini, process_hook_windsurf, generate_hook_configs, install_tool_hooks, install_tool_hooks_scoped, install_tool_hooks_scoped_filtered, claude_user_settings_path, remove_claude_global_hook, canonicalize_tool_name, parse_tool_list, InstallScope, ToolFilter, ToolHookConfig, HookScope, HookPlatform, SUPPORTED_TOOL_NAMES};
145pub use opencode_plugin::{
146    generate_opencode_plugin, install_opencode_plugin, update_opencode_config,
147    update_opencode_config_detailed, find_opencode_config,
148    opencode_config_has_comments,
149    remove_sqz_from_opencode_config, strip_jsonc_comments,
150    process_opencode_hook, opencode_plugin_path,
151};
152pub use codex_integration::{
153    agents_md_guidance_block, agents_md_path,
154    codex_config_path,
155    install_agents_md_guidance, install_codex_mcp_config,
156    remove_agents_md_guidance, remove_codex_mcp_config,
157};
158pub use claude_md_integration::{
159    claude_md_guidance_block, claude_md_path, claude_user_json_path,
160    install_claude_md_guidance, install_claude_mcp_config,
161    remove_claude_md_guidance, remove_claude_mcp_config,
162};
163pub use token_pruner::{TokenPruner, PrunerConfig, PruneResult};
164pub use tool_selector::{ToolDefinition, ToolSelector};
165pub use budget_tracker::{
166    AgentBudget, BudgetTracker, BudgetWarning, UsagePrediction, UsageReport,
167};
168pub use cache_manager::{CacheManager, CacheResult, ExpandResult};
169pub use correction_log::ContextWindow;
170pub use crp_engine::{CrpEngine, CrpLevel};
171pub use cost_calculator::{
172    CostBreakdown, CostCalculator, ModelPricing, PricingConfig, SessionCostSummary, TokenUsage,
173    ToolCost,
174};
175pub use ctx_format::{CtxEnvelope, CtxFormat, CtxMetadata};
176pub use error::{Result, SqzError, SourceLocation};
177pub use model_router::{ModelRouter, RoutingDecision, TaskContext};
178pub use pipeline::{CompressionPipeline, SessionContext};
179pub use pin_manager::PinManager;
180pub use plugin_api::{PluginLoader, PluginManifest, PluginSource, SqzPlugin};
181pub use prompt_cache::{CacheBoundary, Message, PromptCacheDetector};
182pub use session_store::{CompressionStats, DailyGain, SessionStore, SessionSummary};
183pub use session_continuity::{
184    SessionContinuityManager, SessionGuide, Snapshot, SnapshotEvent, SnapshotEventType,
185};
186pub use toon::ToonEncoder;
187pub use tee_mode::{TeeMode, TeeManager, TeeEntry};
188pub use terse_mode::TerseMode;
189pub use token_counter::TokenCounter;
190pub use types::*;
191pub use preset::{
192    BudgetConfig, CollapseArraysConfig, CompressionConfig, CondenseConfig, CustomTransformsConfig,
193    FlattenConfig, GitDiffFoldConfig, KeepFieldsConfig, ModelConfig, ModelPricingConfig, Preset,
194    PresetHeader, PresetMeta, PresetParser, StripFieldsConfig, StripNullsConfig, TerseLevel,
195    TerseModeConfig, ToolSelectionConfig, TruncateStringsConfig,
196};
197pub use progressive_throttle::{ProgressiveThrottler, ThrottleConfig, ThrottleLevel};
198pub use dashboard::{
199    CommandBreakdown, DashboardConfig, DashboardHtml, DashboardMetrics, DashboardServer,
200    SessionHistoryEntry, ToolBreakdown,
201};
202pub use engine::SqzEngine;
203pub use hook_manager::{
204    generate_platform_config, known_platforms, Hook, HookAction, HookContext, HookManager, HookType,
205};
206pub use sandbox_executor::{SandboxExecutor, SandboxResult, RuntimeInfo, FilteredOutput};
207pub use url_indexer::{ContentFetcher, IndexedChunk, IndexResult, UrlIndexer};
208pub use verifier::Verifier;
209
210pub use adaptive_tree::{compress_to_budget, build_tree, SemanticNode};
211pub use ast_delta::{ast_diff, encode_delta, AstDelta, AstChange, ChangeKind};
212pub use kv_cache_optimizer::{compress_with_sinks, compress_with_custom_sinks};
213pub use minhash_lsh::{MinHashLsh, MinHashSignature};
214pub use parse_tree_compressor::{compress_code, char_entropy};
215pub use cascade_compressor::{cascade_compress, CascadeLevel, CascadeThresholds, CascadeResult};
216pub use regret_tracker::{RegretTracker, RegretEvent, RegretKind, FileProfile};
217pub use transparency::{CompressionAnnotation};