sofos 0.2.8 - Docs.rs

//! Per-model metadata. One [`ModelInfo`] entry per supported model,
//! looked up at the boundary in `config::max_context_tokens_for`,
//! `anthropic::requires_adaptive_thinking`, `ui::calculate_cost`, and
//! [`ConversationHistory`](crate::repl::conversation::ConversationHistory).
//! Adding a model is one struct literal in [`lookup`].

/// Tiered-pricing rule. Some OpenAI models (gpt-5.4, gpt-5.5) charge a
/// premium for the *entire session* once a single prompt's input
/// crosses a documented threshold. Once tripped, every subsequent
/// turn in the session is billed at the premium rate, not just the
/// triggering turn.
#[derive(Debug, Clone, Copy)]
pub struct PremiumPricingTier {
    pub input_threshold: u32,
    pub price_input_per_m: f64,
    pub price_output_per_m: f64,
}

#[derive(Debug, Clone, Copy)]
pub struct ModelInfo {
    /// API context-window ceiling in tokens.
    pub context_window: u32,
    /// Cost-shaping override for the auto-compact trigger. When `Some`,
    /// auto-compaction fires at `min(override, 90% of context_window)`.
    /// When `None`, falls back to 90% of `context_window` (codex
    /// default — "don't crash the API" rather than "don't burn tokens").
    pub auto_compact_token_limit: Option<u32>,
    /// True for Anthropic models that require `thinking: adaptive`
    /// + `output_config.effort` instead of the legacy
    ///   `{type: "enabled", budget_tokens}` extended-thinking shape.
    pub requires_adaptive_thinking: bool,
    /// True for Anthropic models that support the server-side
    /// compaction beta (`compact-2026-01-12`). When set, the request
    /// builder enables Anthropic's automatic compaction instead of
    /// running a client-side LLM-summary turn.
    pub supports_server_compaction: bool,
    /// Per-million-token USD price for non-cached input.
    pub price_input_per_m: f64,
    /// Per-million-token USD price for output (including hidden
    /// reasoning tokens on OpenAI reasoning models).
    pub price_output_per_m: f64,
    /// Tiered-pricing rule when the model has one. `None` for models
    /// that bill at a flat per-token rate regardless of prompt size.
    pub premium_tier: Option<PremiumPricingTier>,
}

impl Default for ModelInfo {
    fn default() -> Self {
        // Sonnet-class fallback: matches the historical default in
        // `calculate_cost` and is the safest "I don't know this model"
        // bet — pricing won't under-report.
        Self {
            context_window: 200_000,
            auto_compact_token_limit: Some(170_000),
            requires_adaptive_thinking: false,
            supports_server_compaction: false,
            price_input_per_m: 3.0,
            price_output_per_m: 15.0,
            premium_tier: None,
        }
    }
}

impl ModelInfo {
    /// Auto-compaction trigger in tokens. The override is clamped
    /// against 90% of the API ceiling so a too-loose override can
    /// never push us past what the server will accept on the next
    /// turn.
    pub fn auto_compact_at(&self) -> u32 {
        let api_ceiling = ((self.context_window as u64).saturating_mul(9) / 10) as u32;
        match self.auto_compact_token_limit {
            Some(limit) => limit.min(api_ceiling),
            None => api_ceiling,
        }
    }

    /// Effective context window after reserving 5% for output
    /// headroom. Used as the trim-safety floor: above this, older
    /// messages are dropped without summary as a last resort.
    pub fn effective_window(&self) -> u32 {
        ((self.context_window as u64).saturating_mul(95) / 100) as u32
    }
}

/// Look up metadata for a model by id. Matching is case-insensitive
/// and prefix-based so versioned ids (`claude-opus-4-7-20260301`,
/// `gpt-5.5-2026-mm-dd`) resolve to the canonical entry. Unknown
/// models return [`ModelInfo::default`].
pub fn lookup(model: &str) -> ModelInfo {
    let m = model.to_ascii_lowercase();

    // Anthropic. Order matters: more-specific prefixes first so a
    // versioned id (`claude-opus-4-7-20260301`) resolves to the right
    // entry instead of the closest shorter prefix.
    if m.starts_with("claude-opus-4-7") {
        return ModelInfo {
            context_window: 1_000_000,
            auto_compact_token_limit: Some(250_000),
            requires_adaptive_thinking: true,
            supports_server_compaction: true,
            price_input_per_m: 5.0,
            price_output_per_m: 25.0,
            premium_tier: None,
        };
    }
    if m.starts_with("claude-opus-4-6") {
        return ModelInfo {
            context_window: 1_000_000,
            auto_compact_token_limit: Some(250_000),
            requires_adaptive_thinking: false,
            supports_server_compaction: true,
            price_input_per_m: 5.0,
            price_output_per_m: 25.0,
            premium_tier: None,
        };
    }
    if m.starts_with("claude-sonnet-4-6") {
        return ModelInfo {
            context_window: 1_000_000,
            auto_compact_token_limit: Some(250_000),
            requires_adaptive_thinking: false,
            supports_server_compaction: true,
            price_input_per_m: 3.0,
            price_output_per_m: 15.0,
            premium_tier: None,
        };
    }
    if m.starts_with("claude-haiku-4-5") {
        return ModelInfo {
            context_window: 200_000,
            auto_compact_token_limit: Some(170_000),
            requires_adaptive_thinking: false,
            supports_server_compaction: false,
            price_input_per_m: 1.0,
            price_output_per_m: 5.0,
            premium_tier: None,
        };
    }

    // OpenAI. Codex variants are matched first because their slug
    // contains `codex` regardless of the gpt-5.x prefix.
    if m.contains("codex") {
        return ModelInfo {
            context_window: 400_000,
            auto_compact_token_limit: Some(250_000),
            requires_adaptive_thinking: false,
            supports_server_compaction: false,
            price_input_per_m: 1.75,
            price_output_per_m: 14.0,
            premium_tier: None,
        };
    }
    // gpt-5.4 / gpt-5.5 charge 2x input / 1.5x output for the *entire
    // session* once any single prompt crosses 272K input tokens. The
    // 250K auto-compact trigger sits below that cliff, so the listed
    // `price_*` values stay on the standard tier — by design. Raising
    // the override past 272K would silently double the input bill and
    // is the wrong knob to pull for cost. The `premium_tier` value
    // here is what `ui::calculate_cost` uses to tell honest billing
    // if the cliff is ever tripped (e.g. by a huge pasted file).
    if m.starts_with("gpt-5.4") {
        return ModelInfo {
            context_window: 1_050_000,
            auto_compact_token_limit: Some(250_000),
            requires_adaptive_thinking: false,
            supports_server_compaction: false,
            price_input_per_m: 2.5,
            price_output_per_m: 15.0,
            premium_tier: Some(PremiumPricingTier {
                input_threshold: 272_000,
                price_input_per_m: 5.0,
                price_output_per_m: 22.5,
            }),
        };
    }
    if m.starts_with("gpt-5.5") {
        return ModelInfo {
            context_window: 1_050_000,
            auto_compact_token_limit: Some(250_000),
            requires_adaptive_thinking: false,
            supports_server_compaction: false,
            price_input_per_m: 5.0,
            price_output_per_m: 30.0,
            premium_tier: Some(PremiumPricingTier {
                input_threshold: 272_000,
                price_input_per_m: 10.0,
                price_output_per_m: 45.0,
            }),
        };
    }

    ModelInfo::default()
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn opus_4_7_has_1m_context_and_server_compaction() {
        let info = lookup("claude-opus-4-7");
        assert_eq!(info.context_window, 1_000_000);
        assert!(info.requires_adaptive_thinking);
        assert!(info.supports_server_compaction);
    }

    #[test]
    fn lookup_matches_versioned_opus_4_7_ids() {
        assert!(lookup("claude-opus-4-7").requires_adaptive_thinking);
        assert!(lookup("claude-opus-4-7-20260301").requires_adaptive_thinking);
        assert!(lookup("Claude-Opus-4-7").requires_adaptive_thinking);
    }

    #[test]
    fn lookup_distinguishes_opus_4_6_from_4_7() {
        assert!(!lookup("claude-opus-4-6").requires_adaptive_thinking);
        assert!(lookup("claude-opus-4-7").requires_adaptive_thinking);
    }

    #[test]
    fn unknown_model_falls_back_to_sonnet_class_pricing() {
        let info = lookup("some-future-model-2099");
        assert_eq!(info.price_input_per_m, 3.0);
        assert_eq!(info.price_output_per_m, 15.0);
    }

    #[test]
    fn auto_compact_at_clamps_override_against_api_ceiling() {
        let info = ModelInfo {
            context_window: 100_000,
            auto_compact_token_limit: Some(200_000),
            ..ModelInfo::default()
        };
        assert_eq!(info.auto_compact_at(), 90_000);
    }

    #[test]
    fn auto_compact_at_falls_back_to_90pct_when_unset() {
        let info = ModelInfo {
            context_window: 200_000,
            auto_compact_token_limit: None,
            ..ModelInfo::default()
        };
        assert_eq!(info.auto_compact_at(), 180_000);
    }

    #[test]
    fn effective_window_reserves_5pct_headroom() {
        let info = ModelInfo {
            context_window: 1_000_000,
            ..ModelInfo::default()
        };
        assert_eq!(info.effective_window(), 950_000);
    }

    #[test]
    fn cliff_models_compact_below_272k_premium_threshold() {
        for slug in ["gpt-5.5", "gpt-5.4"] {
            let info = lookup(slug);
            assert!(info.auto_compact_at() < 272_000);
            let tier = info
                .premium_tier
                .expect("cliff models carry a premium tier");
            assert_eq!(tier.input_threshold, 272_000);
            assert!(tier.price_input_per_m > info.price_input_per_m);
        }
    }

    #[test]
    fn anthropic_1m_models_advertise_server_compaction() {
        for slug in ["claude-opus-4-7", "claude-opus-4-6", "claude-sonnet-4-6"] {
            assert!(
                lookup(slug).supports_server_compaction,
                "{slug} should opt into server-side compaction"
            );
        }
    }

    #[test]
    fn haiku_does_not_advertise_server_compaction() {
        // Haiku 4.5 isn't on Anthropic's compaction-supported list,
        // so the request builder must not send the beta header for it.
        assert!(!lookup("claude-haiku-4-5").supports_server_compaction);
    }
}