pub fn get_context_limit_for_model(model: &str) -> u32 {
if model.contains("pro") {
2_097_152
} else {
1_048_576
}
}
pub fn to_claude_usage(
usage_metadata: &super::models::UsageMetadata,
scaling_enabled: bool,
context_limit: u32,
) -> super::models::Usage {
let prompt_tokens = usage_metadata.prompt_token_count.unwrap_or(0);
let cached_tokens = usage_metadata.cached_content_token_count.unwrap_or(0);
let total_raw = prompt_tokens;
const SCALING_THRESHOLD: u32 = 30_000;
let scaled_total = if scaling_enabled && total_raw > SCALING_THRESHOLD {
const TARGET_MAX: f64 = 195_000.0;
let ratio = total_raw as f64 / context_limit as f64;
if ratio <= 0.5 {
let display_ratio = ratio * 0.6;
(display_ratio * TARGET_MAX) as u32
} else if ratio <= 0.7 {
let progress = (ratio - 0.5) / 0.2;
let display_ratio = 0.3 + progress * 0.2;
(display_ratio * TARGET_MAX) as u32
} else if ratio <= 0.85 {
let progress = (ratio - 0.7) / 0.15;
let display_ratio = 0.5 + progress * 0.2;
(display_ratio * TARGET_MAX) as u32
} else {
let progress = (ratio - 0.85) / 0.15;
let display_ratio = 0.7 + progress * 0.27;
(display_ratio.min(0.97) * TARGET_MAX) as u32
}
} else {
total_raw
};
if scaling_enabled && total_raw > 30_000 {
let ratio = total_raw as f64 / context_limit as f64;
let display_ratio = scaled_total as f64 / 195_000.0;
tracing::debug!(
"[Claude-Scaling] Raw: {} ({:.1}%), Display: {} ({:.1}%), Compression: {:.1}x",
total_raw,
ratio * 100.0,
scaled_total,
display_ratio * 100.0,
total_raw as f64 / scaled_total as f64
);
}
let (reported_input, reported_cache) = if total_raw > 0 {
let cache_ratio = (cached_tokens as f64) / (total_raw as f64);
let sc_cache = (scaled_total as f64 * cache_ratio) as u32;
(scaled_total.saturating_sub(sc_cache), Some(sc_cache))
} else {
(scaled_total, None)
};
super::models::Usage {
input_tokens: reported_input,
output_tokens: usage_metadata.candidates_token_count.unwrap_or(0),
cache_read_input_tokens: reported_cache,
cache_creation_input_tokens: Some(0),
server_tool_use: None,
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_to_claude_usage() {
use super::super::models::UsageMetadata;
let usage = UsageMetadata {
prompt_token_count: Some(100),
candidates_token_count: Some(50),
total_token_count: Some(150),
cached_content_token_count: None,
};
let claude_usage = to_claude_usage(&usage, true, 1_000_000);
assert!(claude_usage.input_tokens < 200);
assert_eq!(claude_usage.output_tokens, 50);
let usage_50 = UsageMetadata {
prompt_token_count: Some(500_000),
candidates_token_count: Some(10),
total_token_count: Some(500_010),
cached_content_token_count: None,
};
let res_50 = to_claude_usage(&usage_50, true, 1_000_000);
assert!(res_50.input_tokens > 55_000 && res_50.input_tokens < 62_000);
let usage_70 = UsageMetadata {
prompt_token_count: Some(700_000),
candidates_token_count: Some(10),
total_token_count: Some(700_010),
cached_content_token_count: None,
};
let res_70 = to_claude_usage(&usage_70, true, 1_000_000);
assert!(res_70.input_tokens > 90_000 && res_70.input_tokens < 105_000);
let usage_85 = UsageMetadata {
prompt_token_count: Some(850_000),
candidates_token_count: Some(10),
total_token_count: Some(850_010),
cached_content_token_count: None,
};
let res_85 = to_claude_usage(&usage_85, true, 1_000_000);
assert!(res_85.input_tokens > 130_000 && res_85.input_tokens < 145_000);
let usage_100 = UsageMetadata {
prompt_token_count: Some(1_000_000),
candidates_token_count: Some(10),
total_token_count: Some(1_000_010),
cached_content_token_count: None,
};
let res_100 = to_claude_usage(&usage_100, true, 1_000_000);
assert!(res_100.input_tokens > 185_000 && res_100.input_tokens <= 190_000);
}
}