mod ast;
mod ast_ts;
mod code;
mod diff;
mod json;
mod logs;
mod smart;
mod text;
mod util;
use crate::detect::{ContentType, Language};
use crate::error::Result;
use crate::tokens;
use regex_lite::Regex;
#[derive(Debug, Default, Clone)]
pub struct CompressOptions {
pub budget: Option<usize>,
pub ast: bool,
pub smart: bool,
pub strip_docs: bool,
pub custom_patterns: Vec<String>,
}
pub fn compress(input: &str, content_type: &ContentType, budget: Option<usize>) -> Result<String> {
compress_with(
input,
content_type,
&CompressOptions {
budget,
..Default::default()
},
)
}
pub fn compress_with(
input: &str,
content_type: &ContentType,
opts: &CompressOptions,
) -> Result<String> {
let compressed = match content_type {
ContentType::Code(lang) => {
if opts.ast {
match lang {
Language::Html
| Language::Css
| Language::Sql
| Language::Shell
| Language::Yaml => code::compress(input, lang, opts.strip_docs),
_ => ast::stub_bodies(input, lang),
}
} else {
code::compress(input, lang, opts.strip_docs)
}
}
ContentType::Json => json::compress(input)?,
ContentType::Logs => logs::compress(input),
ContentType::Diff => diff::compress(input),
ContentType::Text => text::compress(input),
};
let after_smart = if opts.smart {
smart::dedup(&compressed)
} else {
compressed
};
let after_patterns = if opts.custom_patterns.is_empty() {
after_smart
} else {
apply_custom_patterns(&after_smart, &opts.custom_patterns)
};
Ok(match opts.budget {
Some(limit) => enforce_budget(after_patterns, limit),
None => after_patterns,
})
}
fn apply_custom_patterns(text: &str, patterns: &[String]) -> String {
let regexes: Vec<Regex> = patterns.iter().filter_map(|p| Regex::new(p).ok()).collect();
if regexes.is_empty() {
return text.to_string();
}
let mut out = String::with_capacity(text.len());
for line in text.lines() {
let mut result = line.to_string();
for re in ®exes {
result = re.replace_all(&result, "").to_string();
}
let trimmed = result.trim();
if !trimmed.is_empty() {
let indent = &line[..line.len() - line.trim_start().len()];
out.push_str(indent);
out.push_str(trimmed);
out.push('\n');
}
}
out.trim_end().to_string()
}
fn enforce_budget(text: String, budget: usize) -> String {
if tokens::count(&text) <= budget {
return text;
}
let mut out = String::new();
for line in text.lines() {
let candidate = format!("{out}{line}\n");
if tokens::count(&candidate) > budget {
break;
}
out = candidate;
}
out.push_str("// [tersify: truncated to fit token budget]\n");
out
}