use rskim_core::{truncate_to_token_budget, Language, Mode, TransformConfig};
use crate::tokens;
#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
pub(crate) struct TruncationOptions {
pub(crate) max_lines: Option<usize>,
pub(crate) last_lines: Option<usize>,
pub(crate) token_budget: Option<usize>,
}
const NO_OUTPUT_MSG: &str = "Token budget cascade: no transformation mode produced output. \
Ensure the file is in a supported language or specify --language.";
pub(crate) fn build_config(mode: Mode, trunc: &TruncationOptions) -> TransformConfig {
let mut config = TransformConfig::with_mode(mode);
if let Some(n) = trunc.max_lines {
config = config.with_max_lines(n);
}
if let Some(n) = trunc.last_lines {
config = config.with_last_lines(n);
}
config
}
fn count_tokens_or_max(text: &str) -> usize {
tokens::count_tokens(text).unwrap_or_else(|e| {
eprintln!("[skim] warning: token counting failed, treating as over-budget: {e}");
usize::MAX
})
}
fn fallback_line_truncate(
output: &str,
language: Language,
token_budget: usize,
mode: Mode,
known_token_count: Option<usize>,
) -> anyhow::Result<(String, Mode)> {
eprintln!(
"[skim] token budget: all modes exceeded budget, applying line truncation ({} mode)",
mode.name(),
);
let truncated = truncate_to_token_budget(
output,
language,
token_budget,
count_tokens_or_max,
known_token_count,
)?;
Ok((truncated, mode))
}
pub(crate) fn cascade_for_token_budget<F>(
starting_mode: Mode,
trunc: &TruncationOptions,
token_budget: usize,
language: Language,
transform_fn: F,
) -> anyhow::Result<(String, Mode)>
where
F: Fn(&TransformConfig) -> anyhow::Result<Option<String>>,
{
if language.is_serde_based() {
return cascade_serde(starting_mode, trunc, token_budget, language, &transform_fn);
}
let cascade = starting_mode.cascade_from_here();
let mut last_output: Option<String> = None;
let mut last_mode = starting_mode;
let mut last_token_count: Option<usize> = None;
for &mode in cascade {
let config = build_config(mode, trunc);
let Some(output) = transform_fn(&config)? else {
continue;
};
let token_count = count_tokens_or_max(&output);
if token_count <= token_budget {
if mode != starting_mode {
eprintln!(
"[skim] token budget: escalated from {} to {} mode ({} tokens)",
starting_mode.name(),
mode.name(),
token_count,
);
}
return Ok((output, mode));
}
last_output = Some(output);
last_mode = mode;
last_token_count = Some(token_count);
}
let last_output = last_output.ok_or_else(|| anyhow::anyhow!(NO_OUTPUT_MSG))?;
fallback_line_truncate(
&last_output,
language,
token_budget,
last_mode,
last_token_count,
)
}
fn cascade_serde<F>(
starting_mode: Mode,
trunc: &TruncationOptions,
token_budget: usize,
language: Language,
transform_fn: &F,
) -> anyhow::Result<(String, Mode)>
where
F: Fn(&TransformConfig) -> anyhow::Result<Option<String>>,
{
let config = build_config(starting_mode, trunc);
let first_output = transform_fn(&config)?.ok_or_else(|| anyhow::anyhow!(NO_OUTPUT_MSG))?;
let first_tokens = count_tokens_or_max(&first_output);
if first_tokens <= token_budget {
return Ok((first_output, starting_mode));
}
if matches!(starting_mode, Mode::Full | Mode::Minimal | Mode::Pseudo) {
let structure_config = build_config(Mode::Structure, trunc);
if let Some(extracted) = transform_fn(&structure_config)? {
let extracted_tokens = count_tokens_or_max(&extracted);
if extracted_tokens <= token_budget {
eprintln!(
"[skim] token budget: escalated from {} to structure mode ({} tokens)",
starting_mode.name(),
extracted_tokens,
);
return Ok((extracted, Mode::Structure));
}
return fallback_line_truncate(
&extracted,
language,
token_budget,
Mode::Structure,
Some(extracted_tokens),
);
}
}
fallback_line_truncate(
&first_output,
language,
token_budget,
starting_mode,
Some(first_tokens),
)
}
#[cfg(test)]
mod tests {
use super::*;
use crate::tokens;
fn mock_transform<'a>(
source: &'a str,
mode_sizes: &'a [(Mode, usize)],
) -> impl Fn(&TransformConfig) -> anyhow::Result<Option<String>> + 'a {
move |config: &TransformConfig| {
for &(mode, size) in mode_sizes {
if config.mode == mode {
let words: Vec<&str> = source.split_whitespace().take(size).collect();
return Ok(Some(words.join(" ")));
}
}
Ok(None)
}
}
#[test]
fn test_cascade_returns_first_mode_when_within_budget() {
let source = "word1 word2 word3 word4 word5 word6 word7 word8 word9 word10";
let mode_sizes = vec![
(Mode::Structure, 3),
(Mode::Signatures, 2),
(Mode::Types, 1),
];
let transform = mock_transform(source, &mode_sizes);
let trunc = TruncationOptions::default();
let (output, mode_used) =
cascade_for_token_budget(Mode::Structure, &trunc, 10, Language::TypeScript, transform)
.unwrap();
assert_eq!(mode_used, Mode::Structure);
assert_eq!(output, "word1 word2 word3");
}
#[test]
fn test_cascade_escalates_to_more_aggressive_mode() {
let source = "a b c d e f g h i j k l m n o p q r s t";
let mode_sizes = vec![
(Mode::Structure, 20),
(Mode::Signatures, 8),
(Mode::Types, 3),
];
let transform = mock_transform(source, &mode_sizes);
let trunc = TruncationOptions::default();
let (_output, mode_used) =
cascade_for_token_budget(Mode::Structure, &trunc, 10, Language::TypeScript, transform)
.unwrap();
assert_eq!(mode_used, Mode::Signatures);
}
#[test]
fn test_cascade_falls_through_to_line_truncation() {
let source = "a b c d e f g h i j k l m n o p q r s t";
let mode_sizes = vec![
(Mode::Structure, 20),
(Mode::Signatures, 15),
(Mode::Types, 12),
];
let transform = mock_transform(source, &mode_sizes);
let trunc = TruncationOptions::default();
let (output, mode_used) =
cascade_for_token_budget(Mode::Structure, &trunc, 5, Language::TypeScript, transform)
.unwrap();
assert_eq!(mode_used, Mode::Types);
let token_count = tokens::count_tokens(&output).unwrap_or(usize::MAX);
assert!(
token_count <= 5 || output.is_empty(),
"Final output should be within budget or empty, got {} tokens: {:?}",
token_count,
output
);
}
#[test]
fn test_cascade_single_mode_types() {
let source = "a b c d e f g h i j";
let mode_sizes = vec![(Mode::Types, 5)];
let transform = mock_transform(source, &mode_sizes);
let trunc = TruncationOptions::default();
let (output, mode_used) =
cascade_for_token_budget(Mode::Types, &trunc, 10, Language::TypeScript, transform)
.unwrap();
assert_eq!(mode_used, Mode::Types);
assert_eq!(output, "a b c d e");
}
#[test]
fn test_cascade_errors_when_no_mode_produces_output() {
let transform = |_config: &TransformConfig| -> anyhow::Result<Option<String>> { Ok(None) };
let trunc = TruncationOptions::default();
let result = cascade_for_token_budget(
Mode::Structure,
&trunc,
100,
Language::TypeScript,
transform,
);
assert!(result.is_err());
assert!(result
.unwrap_err()
.to_string()
.contains("no transformation mode produced output"),);
}
#[test]
fn test_serde_cascade_returns_starting_mode_when_within_budget() {
let source = "a b c d e f g h i j";
let mode_sizes = vec![(Mode::Full, 5), (Mode::Structure, 3)];
let transform = mock_transform(source, &mode_sizes);
let trunc = TruncationOptions::default();
let (output, mode_used) =
cascade_for_token_budget(Mode::Full, &trunc, 10, Language::Json, transform).unwrap();
assert_eq!(mode_used, Mode::Full);
assert_eq!(output, "a b c d e");
}
#[test]
fn test_serde_cascade_escalates_from_full_to_structure() {
let source = "a b c d e f g h i j k l m n o p q r s t";
let mode_sizes = vec![(Mode::Full, 20), (Mode::Structure, 5)];
let transform = mock_transform(source, &mode_sizes);
let trunc = TruncationOptions::default();
let (output, mode_used) =
cascade_for_token_budget(Mode::Full, &trunc, 10, Language::Json, transform).unwrap();
assert_eq!(mode_used, Mode::Structure);
assert_eq!(output, "a b c d e");
}
#[test]
fn test_serde_cascade_full_to_structure_exceeds_falls_to_truncation() {
let source = "a b c d e f g h i j k l m n o p q r s t";
let mode_sizes = vec![(Mode::Full, 20), (Mode::Structure, 15)];
let transform = mock_transform(source, &mode_sizes);
let trunc = TruncationOptions::default();
let (output, mode_used) =
cascade_for_token_budget(Mode::Full, &trunc, 5, Language::Json, transform).unwrap();
assert_eq!(mode_used, Mode::Structure);
let token_count = tokens::count_tokens(&output).unwrap_or(usize::MAX);
assert!(
token_count <= 5 || output.is_empty(),
"Expected within budget or empty after truncation, got {} tokens: {:?}",
token_count,
output
);
}
#[test]
fn test_serde_cascade_structure_start_exceeds_falls_to_truncation() {
let source = "a b c d e f g h i j k l m n o p q r s t";
let mode_sizes = vec![(Mode::Structure, 20)];
let transform = mock_transform(source, &mode_sizes);
let trunc = TruncationOptions::default();
let (output, mode_used) =
cascade_for_token_budget(Mode::Structure, &trunc, 5, Language::Yaml, transform)
.unwrap();
assert_eq!(mode_used, Mode::Structure);
let token_count = tokens::count_tokens(&output).unwrap_or(usize::MAX);
assert!(
token_count <= 5 || output.is_empty(),
"Expected within budget or empty after truncation, got {} tokens: {:?}",
token_count,
output
);
}
#[test]
fn test_serde_cascade_errors_when_no_output() {
let transform = |_config: &TransformConfig| -> anyhow::Result<Option<String>> { Ok(None) };
let trunc = TruncationOptions::default();
let result = cascade_for_token_budget(Mode::Full, &trunc, 100, Language::Toml, transform);
assert!(result.is_err());
assert!(result
.unwrap_err()
.to_string()
.contains("no transformation mode produced output"),);
}
}