use std::fs;
use std::io::{self, BufWriter, Read, Write};
use std::path::Path;
use rskim_core::{
detect_language_from_path, transform_auto_with_config, transform_with_config,
transform_with_quality, Language, Mode, TransformConfig,
};
use crate::{cache, cascade, cascade::TruncationOptions, tokens};
const MAX_INPUT_SIZE: usize = 50 * 1024 * 1024;
#[derive(Debug, Clone, Copy)]
pub(crate) struct ProcessOptions {
pub(crate) mode: Mode,
pub(crate) explicit_lang: Option<Language>,
pub(crate) use_cache: bool,
pub(crate) show_stats: bool,
pub(crate) trunc: TruncationOptions,
}
#[derive(Debug)]
#[must_use]
pub(crate) struct ProcessResult {
pub(crate) output: String,
pub(crate) original_tokens: Option<usize>,
pub(crate) transformed_tokens: Option<usize>,
pub(crate) guardrail_triggered: bool,
pub(crate) parse_tier: Option<&'static str>,
}
pub(crate) fn parse_tier_from(mode: Mode, has_errors: bool) -> &'static str {
if mode == Mode::Full {
"passthrough"
} else if has_errors {
"degraded"
} else {
"full"
}
}
pub(crate) fn count_token_pair(
original: &str,
transformed: &str,
) -> (Option<usize>, Option<usize>) {
match (
tokens::count_tokens(original),
tokens::count_tokens(transformed),
) {
(Ok(orig), Ok(trans)) => (Some(orig), Some(trans)),
_ => (None, None),
}
}
pub(crate) fn report_token_stats(
original_tokens: Option<usize>,
transformed_tokens: Option<usize>,
suffix: &str,
) {
if let (Some(orig), Some(trans)) = (original_tokens, transformed_tokens) {
let stats = tokens::TokenStats::new(orig, trans);
eprintln!("\n[skim] {}{}", stats.format(), suffix);
}
}
pub(crate) fn write_result_and_stats(
result: &ProcessResult,
show_stats: bool,
) -> anyhow::Result<()> {
let stdout = io::stdout();
let mut writer = BufWriter::new(stdout.lock());
write!(writer, "{}", result.output)?;
writer.flush()?;
if show_stats {
report_token_stats(result.original_tokens, result.transformed_tokens, "");
}
Ok(())
}
fn try_cached_result(
path: &Path,
options: &ProcessOptions,
) -> anyhow::Result<Option<ProcessResult>> {
if !options.use_cache {
return Ok(None);
}
let Some(hit) = cache::read_cache(path, options.mode, &options.trunc) else {
return Ok(None);
};
let needs_recount = hit.original_tokens.is_none() && options.show_stats;
let (orig_tokens, trans_tokens) = if needs_recount {
let contents = read_and_validate(path)?;
count_token_pair(&contents, &hit.content)
} else {
(hit.original_tokens, hit.transformed_tokens)
};
Ok(Some(ProcessResult {
output: hit.content,
original_tokens: orig_tokens,
transformed_tokens: trans_tokens,
guardrail_triggered: false,
parse_tier: None, }))
}
fn read_and_validate(path: &Path) -> anyhow::Result<String> {
let contents = fs::read_to_string(path)?;
if contents.len() > MAX_INPUT_SIZE {
anyhow::bail!(
"File too large: {} bytes exceeds maximum of {} bytes ({}MB)",
contents.len(),
MAX_INPUT_SIZE,
MAX_INPUT_SIZE / 1024 / 1024
);
}
Ok(contents)
}
fn run_transform(
contents: &str,
path: &Path,
options: &ProcessOptions,
) -> anyhow::Result<(String, Mode, bool)> {
let explicit_lang = options.explicit_lang;
let transform_file = |config: &TransformConfig| -> anyhow::Result<Option<String>> {
let auto_result = transform_auto_with_config(contents, path, config);
if let Ok(output) = auto_result {
return Ok(Some(output));
}
let Some(language) = explicit_lang else {
return Err(auto_result.unwrap_err().into());
};
Ok(Some(transform_with_config(contents, language, config)?))
};
match options.trunc.token_budget {
Some(budget) => {
let language = explicit_lang
.or_else(|| detect_language_from_path(path))
.unwrap_or_else(|| {
eprintln!(
"[skim] warning: language detection failed for '{}', defaulting to TypeScript",
path.display(),
);
Language::TypeScript
});
let (output, mode) = cascade::cascade_for_token_budget(
options.mode,
&options.trunc,
budget,
language,
transform_file,
)?;
Ok((output, mode, false))
}
None => {
let language = explicit_lang.or_else(|| detect_language_from_path(path));
let config = cascade::build_config(options.mode, &options.trunc);
if let Some(lang) = language {
let (output, has_errors) = transform_with_quality(contents, lang, &config)?;
Ok((output, options.mode, has_errors))
} else {
let output = transform_file(&config)?.ok_or_else(|| {
anyhow::anyhow!("Language detection failed and no --language specified")
})?;
Ok((output, options.mode, false))
}
}
}
}
pub(crate) fn process_stdin(
options: ProcessOptions,
filename_hint: Option<&str>,
) -> anyhow::Result<ProcessResult> {
let mut buffer = String::with_capacity(64 * 1024);
let bytes_read = io::stdin()
.take(MAX_INPUT_SIZE as u64 + 1)
.read_to_string(&mut buffer)?;
if bytes_read > MAX_INPUT_SIZE {
anyhow::bail!(
"Input too large: {} bytes exceeds maximum of {} bytes ({}MB)",
bytes_read,
MAX_INPUT_SIZE,
MAX_INPUT_SIZE / 1024 / 1024
);
}
let filename_lang = filename_hint.and_then(|f| Language::from_path(Path::new(f)));
let language = options.explicit_lang.or(filename_lang).ok_or_else(|| {
if let Some(fname) = filename_hint {
anyhow::anyhow!(
"Language detection failed: unrecognized filename '{}'\n\
Supported extensions: .ts, .tsx, .js, .jsx, .py, .rs, .go, .java, .c, .h, .cpp, .hpp, .cxx, .cc, .md, .json, .yaml, .yml, .toml\n\
Hint: use --language to specify the language explicitly\n\
Example: cat file | skim - --language=typescript",
fname
)
} else {
anyhow::anyhow!(
"Language detection failed: reading from stdin requires --language or --filename\n\
Example: cat file.ts | skim - --language=typescript\n\
Example: git show HEAD:main.rs | skim - --filename=main.rs"
)
}
})?;
let (transformed, stdin_has_errors) = match options.trunc.token_budget {
Some(budget) => {
let (output, _mode) = cascade::cascade_for_token_budget(
options.mode,
&options.trunc,
budget,
language,
|config| Ok(Some(transform_with_config(&buffer, language, config)?)),
)?;
(output, false)
}
None => {
let config = cascade::build_config(options.mode, &options.trunc);
let (output, has_errors) = transform_with_quality(&buffer, language, &config)?;
(output, has_errors)
}
};
let parse_tier = Some(parse_tier_from(options.mode, stdin_has_errors));
let (final_output, guardrail_triggered) =
if options.mode != Mode::Full && options.trunc.token_budget.is_none() {
let outcome = crate::output::guardrail::apply_to_stderr(buffer.clone(), transformed)?;
let triggered = outcome.was_triggered();
(outcome.into_output(), triggered)
} else {
(transformed, false)
};
let (orig_tokens, trans_tokens) = if options.show_stats {
count_token_pair(&buffer, &final_output)
} else {
(None, None)
};
Ok(ProcessResult {
output: final_output,
original_tokens: orig_tokens,
transformed_tokens: trans_tokens,
guardrail_triggered,
parse_tier,
})
}
pub(crate) fn process_file(path: &Path, options: ProcessOptions) -> anyhow::Result<ProcessResult> {
if let Some(result) = try_cached_result(path, &options)? {
return Ok(result);
}
let contents = read_and_validate(path)?;
let (result, mode_used, has_errors) = run_transform(&contents, path, &options)?;
let parse_tier = Some(parse_tier_from(options.mode, has_errors));
let (final_output, guardrail_triggered) =
if options.mode != Mode::Full && options.trunc.token_budget.is_none() {
let outcome = crate::output::guardrail::apply_to_stderr(contents.clone(), result)?;
let triggered = outcome.was_triggered();
(outcome.into_output(), triggered)
} else {
(result, false)
};
let (orig_tokens, trans_tokens) = if options.show_stats {
count_token_pair(&contents, &final_output)
} else {
(None, None)
};
if options.use_cache {
let effective_mode = (mode_used != options.mode).then_some(mode_used);
let _ = cache::write_cache(&cache::CacheWriteParams {
path,
mode: options.mode,
content: &final_output,
original_tokens: orig_tokens,
transformed_tokens: trans_tokens,
trunc: options.trunc,
effective_mode,
parse_tier: parse_tier.map(str::to_string),
});
}
Ok(ProcessResult {
output: final_output,
original_tokens: orig_tokens,
transformed_tokens: trans_tokens,
guardrail_triggered,
parse_tier,
})
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn count_token_pair_returns_some_for_valid_input() {
let (orig, trans) = count_token_pair("hello world", "hello");
assert!(orig.is_some(), "original tokens should be Some");
assert!(trans.is_some(), "transformed tokens should be Some");
assert!(
orig.unwrap() > trans.unwrap(),
"original should have more tokens than transformed"
);
}
#[test]
fn count_token_pair_returns_some_for_empty_strings() {
let (orig, trans) = count_token_pair("", "");
assert_eq!(orig, Some(0));
assert_eq!(trans, Some(0));
}
#[test]
fn count_token_pair_original_equals_transformed_for_identical_input() {
let text = "fn main() { println!(\"hello\"); }";
let (orig, trans) = count_token_pair(text, text);
assert_eq!(orig, trans);
}
#[test]
fn report_token_stats_does_not_panic_with_none_values() {
report_token_stats(None, None, "");
report_token_stats(Some(100), None, "");
report_token_stats(None, Some(50), "");
}
#[test]
fn report_token_stats_does_not_panic_with_valid_values() {
report_token_stats(Some(1000), Some(200), " (test)");
}
#[test]
fn read_and_validate_rejects_nonexistent_file() {
let result = read_and_validate(Path::new("/nonexistent/file.txt"));
assert!(result.is_err());
}
#[test]
fn test_parse_tier_passthrough() {
assert_eq!(parse_tier_from(Mode::Full, false), "passthrough");
assert_eq!(parse_tier_from(Mode::Full, true), "passthrough");
}
#[test]
fn test_parse_tier_degraded() {
assert_eq!(parse_tier_from(Mode::Structure, true), "degraded");
assert_eq!(parse_tier_from(Mode::Signatures, true), "degraded");
assert_eq!(parse_tier_from(Mode::Minimal, true), "degraded");
}
#[test]
fn test_parse_tier_full() {
assert_eq!(parse_tier_from(Mode::Structure, false), "full");
assert_eq!(parse_tier_from(Mode::Signatures, false), "full");
assert_eq!(parse_tier_from(Mode::Types, false), "full");
}
}