mini_chat_sdk/
models.rs

1use serde::{Deserialize, Serialize};
2use time::OffsetDateTime;
3use uuid::Uuid;
4
5/// Current policy version metadata for a user.
6#[derive(Debug, Clone)]
7pub struct PolicyVersionInfo {
8    pub user_id: Uuid,
9    pub policy_version: u64,
10    pub generated_at: OffsetDateTime,
11}
12
13/// Full policy snapshot for a given version, including the model catalog
14/// and kill switches (API: `PolicyByVersionResponse`).
15#[derive(Debug, Clone)]
16pub struct PolicySnapshot {
17    pub user_id: Uuid,
18    pub policy_version: u64,
19    pub model_catalog: Vec<ModelCatalogEntry>,
20    pub kill_switches: KillSwitches,
21}
22
23/// Tenant-level kill switches from the policy snapshot.
24#[allow(clippy::struct_excessive_bools)]
25#[derive(Debug, Clone, Default, Serialize, Deserialize)]
26pub struct KillSwitches {
27    pub disable_premium_tier: bool,
28    pub force_standard_tier: bool,
29    pub disable_web_search: bool,
30    pub disable_file_search: bool,
31    pub disable_images: bool,
32}
33
34/// A single model in the catalog (API: `PolicyModelCatalogItem`).
35#[derive(Debug, Clone, Serialize, Deserialize)]
36pub struct ModelCatalogEntry {
37    /// Provider-level model identifier (e.g. "gpt-4").
38    pub model_id: String,
39    /// The model ID on the provider side (e.g., `"gpt-5.2"` for `OpenAI`,
40    /// `"claude-opus-4-6"` for Anthropic). Sent in LLM API requests.
41    pub provider_model_id: String,
42    /// Display name shown in UI (may differ from `name`).
43    pub display_name: String,
44    /// Short description of the model.
45    pub description: String,
46    /// Model version string.
47    pub version: String,
48    /// LLM provider CTI identifier.
49    pub provider_id: String,
50    /// Routing identifier for provider resolution. Maps to a key in
51    /// `MiniChatConfig.providers`. Values: `"openai"`, `"azure_openai"`.
52    pub provider_display_name: String,
53    /// URL to model icon.
54    pub icon: String,
55    /// Model tier (standard or premium).
56    pub tier: ModelTier,
57    pub enabled: bool,
58    /// Multimodal capability flags, e.g. `VISION_INPUT`, `IMAGE_GENERATION`.
59    pub multimodal_capabilities: Vec<String>,
60    /// Maximum context window size in tokens.
61    pub context_window: u32,
62    /// Maximum output tokens the model can generate.
63    pub max_output_tokens: u32,
64    /// Maximum input tokens per request.
65    pub max_input_tokens: u32,
66    /// Credit multiplier for input tokens (micro-credits per 1000 tokens).
67    pub input_tokens_credit_multiplier_micro: u64,
68    /// Credit multiplier for output tokens (micro-credits per 1000 tokens).
69    pub output_tokens_credit_multiplier_micro: u64,
70    /// Human-readable multiplier display string (e.g. "1x", "3x").
71    pub multiplier_display: String,
72    /// Per-model token estimation budgets for preflight reserve.
73    pub estimation_budgets: EstimationBudgets,
74    /// Top-k chunks returned by similarity search per `file_search` call.
75    pub max_retrieved_chunks_per_turn: u32,
76    /// Full general config captured at snapshot time.
77    pub general_config: ModelGeneralConfig,
78    /// Tenant preference settings captured at snapshot time.
79    pub preference: ModelPreference,
80}
81
82/// Per-model token estimation budget parameters (API: `PolicyModelEstimationBudgets`).
83#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
84pub struct EstimationBudgets {
85    /// Conservative bytes-per-token ratio for text estimation.
86    pub bytes_per_token_conservative: u32,
87    /// Constant overhead for protocol/framing tokens.
88    pub fixed_overhead_tokens: u32,
89    /// Percentage safety margin applied to text estimation (e.g. 10 means 10%).
90    pub safety_margin_pct: u32,
91    /// Tokens per image for vision surcharge.
92    pub image_token_budget: u32,
93    /// Fixed token overhead when `file_search` tool is included.
94    pub tool_surcharge_tokens: u32,
95    /// Fixed token overhead when `web_search` is enabled.
96    pub web_search_surcharge_tokens: u32,
97    /// Minimum generation token budget guaranteed regardless of input estimates.
98    pub minimal_generation_floor: u32,
99}
100
101impl Default for EstimationBudgets {
102    fn default() -> Self {
103        Self {
104            bytes_per_token_conservative: 4,
105            fixed_overhead_tokens: 100,
106            safety_margin_pct: 10,
107            image_token_budget: 1000,
108            tool_surcharge_tokens: 500,
109            web_search_surcharge_tokens: 500,
110            minimal_generation_floor: 50,
111        }
112    }
113}
114
115/// LLM API inference parameters (API: `PolicyModelApiParams`).
116#[derive(Debug, Clone, Serialize, Deserialize)]
117pub struct ModelApiParams {
118    pub temperature: f64,
119    pub top_p: f64,
120    pub frequency_penalty: f64,
121    pub presence_penalty: f64,
122    pub stop: Vec<String>,
123}
124
125/// Feature capability flags (API: `PolicyModelFeatures`).
126#[allow(clippy::struct_excessive_bools)]
127#[derive(Debug, Clone, Serialize, Deserialize)]
128pub struct ModelFeatures {
129    pub streaming: bool,
130    pub function_calling: bool,
131    pub structured_output: bool,
132    pub fine_tuning: bool,
133    pub distillation: bool,
134    pub fim_completion: bool,
135    pub chat_prefix_completion: bool,
136}
137
138/// Supported input modalities (API: `PolicyModelInputType`).
139#[allow(clippy::struct_excessive_bools)]
140#[derive(Debug, Clone, Serialize, Deserialize)]
141pub struct ModelInputType {
142    pub text: bool,
143    pub image: bool,
144    pub audio: bool,
145    pub video: bool,
146}
147
148/// Tool support flags (API: `PolicyModelToolSupport`).
149#[allow(clippy::struct_excessive_bools)]
150#[derive(Debug, Clone, Serialize, Deserialize)]
151pub struct ModelToolSupport {
152    pub web_search: bool,
153    pub file_search: bool,
154    pub image_generation: bool,
155    pub code_interpreter: bool,
156    pub computer_use: bool,
157    pub mcp: bool,
158}
159
160/// Supported API endpoints (API: `PolicyModelSupportedEndpoints`).
161#[allow(clippy::struct_excessive_bools)]
162#[derive(Debug, Clone, Serialize, Deserialize)]
163pub struct ModelSupportedEndpoints {
164    pub chat_completions: bool,
165    pub responses: bool,
166    pub realtime: bool,
167    pub assistants: bool,
168    pub batch_api: bool,
169    pub fine_tuning: bool,
170    pub embeddings: bool,
171    pub videos: bool,
172    pub image_generation: bool,
173    pub image_edit: bool,
174    pub audio_speech_generation: bool,
175    pub audio_transcription: bool,
176    pub audio_translation: bool,
177    pub moderations: bool,
178    pub completions: bool,
179}
180
181/// Token credit multipliers (API: `PolicyModelTokenPolicy`).
182#[derive(Debug, Clone, Serialize, Deserialize)]
183pub struct ModelTokenPolicy {
184    pub input_tokens_credit_multiplier: f64,
185    pub output_tokens_credit_multiplier: f64,
186}
187
188/// Estimated performance characteristics (API: `PolicyModelPerformance`).
189#[derive(Debug, Clone, Serialize, Deserialize)]
190pub struct ModelPerformance {
191    pub response_latency_ms: u32,
192    pub speed_tokens_per_second: u32,
193}
194
195/// General configuration from Settings Service (API: `PolicyModelGeneralConfig`).
196#[derive(Debug, Clone, Serialize, Deserialize)]
197pub struct ModelGeneralConfig {
198    /// CTI type identifier of the config.
199    #[serde(rename = "type")]
200    pub config_type: String,
201    /// Model tier CTI identifier.
202    pub tier: String,
203    #[serde(with = "time::serde::rfc3339")]
204    pub available_from: OffsetDateTime,
205    pub max_file_size_mb: u32,
206    pub api_params: ModelApiParams,
207    pub features: ModelFeatures,
208    pub input_type: ModelInputType,
209    pub tool_support: ModelToolSupport,
210    pub supported_endpoints: ModelSupportedEndpoints,
211    pub token_policy: ModelTokenPolicy,
212    pub performance: ModelPerformance,
213}
214
215/// Per-tenant preference settings (API: `PolicyModelPreference`).
216#[derive(Debug, Clone, Serialize, Deserialize)]
217pub struct ModelPreference {
218    pub is_default: bool,
219    /// Display order in the UI.
220    pub sort_order: i32,
221}
222
223/// Model pricing/capability tier.
224///
225/// Serializes as `"Standard"` / `"Premium"` (`PascalCase`).
226/// Accepts lowercase aliases (`"standard"`, `"premium"`) on deserialization
227/// for compatibility with CCM and DESIGN maps.
228#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
229pub enum ModelTier {
230    #[serde(alias = "standard")]
231    Standard,
232    #[serde(alias = "premium")]
233    Premium,
234}
235
236/// Per-user credit allocations for a specific policy version.
237/// NOT part of the immutable shared `PolicySnapshot` (DESIGN.md §5.2.6).
238#[derive(Debug, Clone, Serialize, Deserialize)]
239pub struct UserLimits {
240    pub user_id: Uuid,
241    pub policy_version: u64,
242    pub standard: TierLimits,
243    pub premium: TierLimits,
244}
245
246/// Credit limits for a single tier within a billing period.
247#[derive(Debug, Clone, Serialize, Deserialize)]
248pub struct TierLimits {
249    pub limit_daily_credits_micro: i64,
250    pub limit_monthly_credits_micro: i64,
251}
252
253/// Token usage reported by the provider.
254#[derive(Debug, Clone, Serialize, Deserialize)]
255pub struct UsageTokens {
256    pub input_tokens: u64,
257    pub output_tokens: u64,
258}
259
260/// Canonical usage event payload published via the outbox after finalization.
261///
262/// Single canonical type — both the outbox enqueuer (infra) and the plugin
263/// `publish_usage()` method use this same struct.
264#[derive(Debug, Clone, Serialize, Deserialize)]
265pub struct UsageEvent {
266    pub tenant_id: Uuid,
267    pub user_id: Uuid,
268    pub chat_id: Uuid,
269    pub turn_id: Uuid,
270    pub request_id: Uuid,
271    pub effective_model: String,
272    pub selected_model: String,
273    pub terminal_state: String,
274    pub billing_outcome: String,
275    pub usage: Option<UsageTokens>,
276    pub actual_credits_micro: i64,
277    pub settlement_method: String,
278    pub policy_version_applied: i64,
279    #[serde(with = "time::serde::rfc3339")]
280    pub timestamp: OffsetDateTime,
281}
282
283#[cfg(test)]
284mod tests {
285    use super::*;
286
287    // ── KillSwitches::default safety invariant ──
288    // All kill switches must default to false; a new field defaulting to true
289    // would accidentally disable functionality across all tenants.
290
291    #[test]
292    fn kill_switches_default_all_disabled() {
293        let ks = KillSwitches::default();
294        assert!(!ks.disable_premium_tier);
295        assert!(!ks.force_standard_tier);
296        assert!(!ks.disable_web_search);
297        assert!(!ks.disable_file_search);
298        assert!(!ks.disable_images);
299    }
300
301    // ── EstimationBudgets::default spec values ──
302    // These defaults are specified in DESIGN.md §B.5.2 and used as the
303    // ConfigMap fallback. Changing them silently would alter token estimation
304    // for every deployment that relies on defaults.
305
306    #[test]
307    fn estimation_budgets_default_matches_spec() {
308        let eb = EstimationBudgets::default();
309        assert_eq!(eb.bytes_per_token_conservative, 4);
310        assert_eq!(eb.fixed_overhead_tokens, 100);
311        assert_eq!(eb.safety_margin_pct, 10);
312        assert_eq!(eb.image_token_budget, 1000);
313        assert_eq!(eb.tool_surcharge_tokens, 500);
314        assert_eq!(eb.web_search_surcharge_tokens, 500);
315        assert_eq!(eb.minimal_generation_floor, 50);
316    }
317
318    // ── ModelGeneralConfig: serde(rename = "type") contract ──
319    // The upstream API sends `"type"` not `"config_type"`. If the rename
320    // attribute is removed, deserialization from the real API breaks.
321
322    fn sample_general_config() -> ModelGeneralConfig {
323        ModelGeneralConfig {
324            config_type: "model.general.v1".to_owned(),
325            tier: "premium".to_owned(),
326            available_from: OffsetDateTime::UNIX_EPOCH,
327            max_file_size_mb: 25,
328            api_params: ModelApiParams {
329                temperature: 0.7,
330                top_p: 1.0,
331                frequency_penalty: 0.0,
332                presence_penalty: 0.0,
333                stop: vec![],
334            },
335            features: ModelFeatures {
336                streaming: true,
337                function_calling: false,
338                structured_output: false,
339                fine_tuning: false,
340                distillation: false,
341                fim_completion: false,
342                chat_prefix_completion: false,
343            },
344            input_type: ModelInputType {
345                text: true,
346                image: false,
347                audio: false,
348                video: false,
349            },
350            tool_support: ModelToolSupport {
351                web_search: false,
352                file_search: false,
353                image_generation: false,
354                code_interpreter: false,
355                computer_use: false,
356                mcp: false,
357            },
358            supported_endpoints: ModelSupportedEndpoints {
359                chat_completions: true,
360                responses: false,
361                realtime: false,
362                assistants: false,
363                batch_api: false,
364                fine_tuning: false,
365                embeddings: false,
366                videos: false,
367                image_generation: false,
368                image_edit: false,
369                audio_speech_generation: false,
370                audio_transcription: false,
371                audio_translation: false,
372                moderations: false,
373                completions: false,
374            },
375            token_policy: ModelTokenPolicy {
376                input_tokens_credit_multiplier: 1.0,
377                output_tokens_credit_multiplier: 3.0,
378            },
379            performance: ModelPerformance {
380                response_latency_ms: 500,
381                speed_tokens_per_second: 100,
382            },
383        }
384    }
385
386    #[test]
387    fn general_config_serializes_type_not_config_type() {
388        let config = sample_general_config();
389        let json = serde_json::to_value(&config).unwrap();
390
391        assert!(json.get("type").is_some(), "expected JSON key 'type'");
392        assert!(
393            json.get("config_type").is_none(),
394            "config_type must not appear in JSON output"
395        );
396        assert_eq!(json["type"], "model.general.v1");
397    }
398
399    #[test]
400    fn general_config_serde_roundtrip_preserves_rename() {
401        let original = sample_general_config();
402        let json = serde_json::to_value(&original).unwrap();
403        let deserialized: ModelGeneralConfig = serde_json::from_value(json).unwrap();
404
405        assert_eq!(deserialized.config_type, original.config_type);
406        assert_eq!(deserialized.tier, original.tier);
407    }
408
409    // ── ModelTier serde representation ──
410    // Serializes as PascalCase ("Standard"/"Premium") for the UI/API.
411    // Accepts lowercase aliases for CCM/DESIGN compatibility.
412
413    #[test]
414    fn model_tier_serializes_as_pascal_case() {
415        let json = serde_json::to_value(ModelTier::Premium).unwrap();
416        assert_eq!(json, serde_json::json!("Premium"));
417
418        let json = serde_json::to_value(ModelTier::Standard).unwrap();
419        assert_eq!(json, serde_json::json!("Standard"));
420    }
421
422    #[test]
423    fn model_tier_deserializes_lowercase_aliases() {
424        let premium: ModelTier = serde_json::from_value(serde_json::json!("premium")).unwrap();
425        assert_eq!(premium, ModelTier::Premium);
426
427        let standard: ModelTier = serde_json::from_value(serde_json::json!("standard")).unwrap();
428        assert_eq!(standard, ModelTier::Standard);
429    }
430
431    #[test]
432    fn model_tier_rejects_unknown_casing() {
433        let result = serde_json::from_value::<ModelTier>(serde_json::json!("PREMIUM"));
434        assert!(result.is_err());
435    }
436
437    // ── KillSwitches serde roundtrip ──
438    // Verifies that enabled switches survive serialization and that
439    // the default (all-off) state roundtrips correctly.
440
441    #[test]
442    fn kill_switches_serde_roundtrip_with_enabled_switches() {
443        let ks = KillSwitches {
444            disable_premium_tier: true,
445            force_standard_tier: false,
446            disable_web_search: true,
447            disable_file_search: false,
448            disable_images: true,
449        };
450        let json = serde_json::to_value(&ks).unwrap();
451        let deserialized: KillSwitches = serde_json::from_value(json).unwrap();
452
453        assert!(deserialized.disable_premium_tier);
454        assert!(!deserialized.force_standard_tier);
455        assert!(deserialized.disable_web_search);
456        assert!(!deserialized.disable_file_search);
457        assert!(deserialized.disable_images);
458    }
459
460    #[test]
461    fn kill_switches_default_roundtrips_all_false() {
462        let ks = KillSwitches::default();
463        let json = serde_json::to_value(&ks).unwrap();
464        let deserialized: KillSwitches = serde_json::from_value(json).unwrap();
465
466        assert!(!deserialized.disable_premium_tier);
467        assert!(!deserialized.force_standard_tier);
468        assert!(!deserialized.disable_web_search);
469        assert!(!deserialized.disable_file_search);
470        assert!(!deserialized.disable_images);
471    }
472}
mini_chat_sdk/models.rs

mini_chat_sdk/
models.rs