Skip to main content

mini_chat_sdk/
models.rs

1use serde::{Deserialize, Serialize};
2use time::OffsetDateTime;
3use uuid::Uuid;
4
5/// Current policy version metadata for a user.
6#[derive(Debug, Clone)]
7pub struct PolicyVersionInfo {
8    pub user_id: Uuid,
9    pub policy_version: u64,
10    pub generated_at: OffsetDateTime,
11}
12
13/// Full policy snapshot for a given version, including the model catalog
14/// and kill switches (API: `PolicyByVersionResponse`).
15#[derive(Debug, Clone)]
16pub struct PolicySnapshot {
17    pub user_id: Uuid,
18    pub policy_version: u64,
19    pub model_catalog: Vec<ModelCatalogEntry>,
20    pub kill_switches: KillSwitches,
21}
22
23/// Tenant-level kill switches from the policy snapshot.
24#[allow(clippy::struct_excessive_bools)]
25#[derive(Debug, Clone, Default, Serialize, Deserialize)]
26pub struct KillSwitches {
27    pub disable_premium_tier: bool,
28    pub force_standard_tier: bool,
29    pub disable_web_search: bool,
30    pub disable_file_search: bool,
31    pub disable_images: bool,
32}
33
34/// A single model in the catalog (API: `PolicyModelCatalogItem`).
35#[derive(Debug, Clone, Serialize, Deserialize)]
36pub struct ModelCatalogEntry {
37    /// Provider-level model identifier (e.g. "gpt-4").
38    pub model_id: String,
39    /// The model ID on the provider side (e.g., `"gpt-5.2"` for `OpenAI`,
40    /// `"claude-opus-4-6"` for Anthropic). Sent in LLM API requests.
41    pub provider_model_id: String,
42    /// Display name shown in UI (may differ from `name`).
43    pub display_name: String,
44    /// Short description of the model.
45    pub description: String,
46    /// Model version string.
47    pub version: String,
48    /// LLM provider CTI identifier.
49    pub provider_id: String,
50    /// Routing identifier for provider resolution. Maps to a key in
51    /// `MiniChatConfig.providers`. Values: `"openai"`, `"azure_openai"`.
52    pub provider_display_name: String,
53    /// URL to model icon.
54    pub icon: String,
55    /// Model tier (standard or premium).
56    pub tier: ModelTier,
57    pub enabled: bool,
58    /// Multimodal capability flags, e.g. `VISION_INPUT`, `IMAGE_GENERATION`.
59    pub multimodal_capabilities: Vec<String>,
60    /// Maximum context window size in tokens.
61    pub context_window: u32,
62    /// Maximum output tokens the model can generate.
63    pub max_output_tokens: u32,
64    /// Maximum input tokens per request.
65    pub max_input_tokens: u32,
66    /// Credit multiplier for input tokens (micro-credits per 1000 tokens).
67    pub input_tokens_credit_multiplier_micro: u64,
68    /// Credit multiplier for output tokens (micro-credits per 1000 tokens).
69    pub output_tokens_credit_multiplier_micro: u64,
70    /// Human-readable multiplier display string (e.g. "1x", "3x").
71    pub multiplier_display: String,
72    /// Per-model token estimation budgets for preflight reserve.
73    pub estimation_budgets: EstimationBudgets,
74    /// Top-k chunks returned by similarity search per `file_search` call.
75    pub max_retrieved_chunks_per_turn: u32,
76    /// Full general config captured at snapshot time.
77    pub general_config: ModelGeneralConfig,
78    /// Tenant preference settings captured at snapshot time.
79    pub preference: ModelPreference,
80    /// System prompt sent as `instructions` in every LLM request for this model.
81    /// Empty string = no system instructions.
82    #[serde(default)]
83    pub system_prompt: String,
84    /// Prompt template used when generating thread summaries for this model.
85    /// Plumbed through the stack for future use by the summary generation job.
86    #[serde(default)]
87    pub thread_summary_prompt: String,
88}
89
90/// Per-model token estimation budget parameters (API: `PolicyModelEstimationBudgets`).
91#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
92pub struct EstimationBudgets {
93    /// Conservative bytes-per-token ratio for text estimation.
94    pub bytes_per_token_conservative: u32,
95    /// Constant overhead for protocol/framing tokens.
96    pub fixed_overhead_tokens: u32,
97    /// Percentage safety margin applied to text estimation (e.g. 10 means 10%).
98    pub safety_margin_pct: u32,
99    /// Tokens per image for vision surcharge.
100    pub image_token_budget: u32,
101    /// Fixed token overhead when `file_search` tool is included.
102    pub tool_surcharge_tokens: u32,
103    /// Fixed token overhead when `web_search` is enabled.
104    pub web_search_surcharge_tokens: u32,
105    /// Minimum generation token budget guaranteed regardless of input estimates.
106    pub minimal_generation_floor: u32,
107}
108
109impl Default for EstimationBudgets {
110    fn default() -> Self {
111        Self {
112            bytes_per_token_conservative: 4,
113            fixed_overhead_tokens: 100,
114            safety_margin_pct: 10,
115            image_token_budget: 1000,
116            tool_surcharge_tokens: 500,
117            web_search_surcharge_tokens: 500,
118            minimal_generation_floor: 50,
119        }
120    }
121}
122
123/// LLM API inference parameters (API: `PolicyModelApiParams`).
124#[derive(Debug, Clone, Serialize, Deserialize)]
125pub struct ModelApiParams {
126    pub temperature: f64,
127    pub top_p: f64,
128    pub frequency_penalty: f64,
129    pub presence_penalty: f64,
130    pub stop: Vec<String>,
131}
132
133/// Feature capability flags (API: `PolicyModelFeatures`).
134#[allow(clippy::struct_excessive_bools)]
135#[derive(Debug, Clone, Serialize, Deserialize)]
136pub struct ModelFeatures {
137    pub streaming: bool,
138    pub function_calling: bool,
139    pub structured_output: bool,
140    pub fine_tuning: bool,
141    pub distillation: bool,
142    pub fim_completion: bool,
143    pub chat_prefix_completion: bool,
144}
145
146/// Supported input modalities (API: `PolicyModelInputType`).
147#[allow(clippy::struct_excessive_bools)]
148#[derive(Debug, Clone, Serialize, Deserialize)]
149pub struct ModelInputType {
150    pub text: bool,
151    pub image: bool,
152    pub audio: bool,
153    pub video: bool,
154}
155
156/// Tool support flags (API: `PolicyModelToolSupport`).
157#[allow(clippy::struct_excessive_bools)]
158#[derive(Debug, Clone, Serialize, Deserialize)]
159pub struct ModelToolSupport {
160    pub web_search: bool,
161    pub file_search: bool,
162    pub image_generation: bool,
163    pub code_interpreter: bool,
164    pub computer_use: bool,
165    pub mcp: bool,
166}
167
168/// Supported API endpoints (API: `PolicyModelSupportedEndpoints`).
169#[allow(clippy::struct_excessive_bools)]
170#[derive(Debug, Clone, Serialize, Deserialize)]
171pub struct ModelSupportedEndpoints {
172    pub chat_completions: bool,
173    pub responses: bool,
174    pub realtime: bool,
175    pub assistants: bool,
176    pub batch_api: bool,
177    pub fine_tuning: bool,
178    pub embeddings: bool,
179    pub videos: bool,
180    pub image_generation: bool,
181    pub image_edit: bool,
182    pub audio_speech_generation: bool,
183    pub audio_transcription: bool,
184    pub audio_translation: bool,
185    pub moderations: bool,
186    pub completions: bool,
187}
188
189/// Token credit multipliers (API: `PolicyModelTokenPolicy`).
190#[derive(Debug, Clone, Serialize, Deserialize)]
191pub struct ModelTokenPolicy {
192    pub input_tokens_credit_multiplier: f64,
193    pub output_tokens_credit_multiplier: f64,
194}
195
196/// Estimated performance characteristics (API: `PolicyModelPerformance`).
197#[derive(Debug, Clone, Serialize, Deserialize)]
198pub struct ModelPerformance {
199    pub response_latency_ms: u32,
200    pub speed_tokens_per_second: u32,
201}
202
203/// General configuration from Settings Service (API: `PolicyModelGeneralConfig`).
204#[derive(Debug, Clone, Serialize, Deserialize)]
205pub struct ModelGeneralConfig {
206    /// CTI type identifier of the config.
207    #[serde(rename = "type")]
208    pub config_type: String,
209    /// Model tier CTI identifier.
210    pub tier: String,
211    #[serde(with = "time::serde::rfc3339")]
212    pub available_from: OffsetDateTime,
213    pub max_file_size_mb: u32,
214    pub api_params: ModelApiParams,
215    pub features: ModelFeatures,
216    pub input_type: ModelInputType,
217    pub tool_support: ModelToolSupport,
218    pub supported_endpoints: ModelSupportedEndpoints,
219    pub token_policy: ModelTokenPolicy,
220    pub performance: ModelPerformance,
221}
222
223/// Per-tenant preference settings (API: `PolicyModelPreference`).
224#[derive(Debug, Clone, Serialize, Deserialize)]
225pub struct ModelPreference {
226    pub is_default: bool,
227    /// Display order in the UI.
228    pub sort_order: i32,
229}
230
231/// Model pricing/capability tier.
232///
233/// Serializes as `"Standard"` / `"Premium"` (`PascalCase`).
234/// Accepts lowercase aliases (`"standard"`, `"premium"`) on deserialization
235/// for compatibility with CCM and DESIGN maps.
236#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
237pub enum ModelTier {
238    #[serde(alias = "standard")]
239    Standard,
240    #[serde(alias = "premium")]
241    Premium,
242}
243
244/// Per-user credit allocations for a specific policy version.
245/// NOT part of the immutable shared `PolicySnapshot` (DESIGN.md §5.2.6).
246#[derive(Debug, Clone, Serialize, Deserialize)]
247pub struct UserLimits {
248    pub user_id: Uuid,
249    pub policy_version: u64,
250    pub standard: TierLimits,
251    pub premium: TierLimits,
252}
253
254/// Credit limits for a single tier within a billing period.
255#[derive(Debug, Clone, Serialize, Deserialize)]
256pub struct TierLimits {
257    pub limit_daily_credits_micro: i64,
258    pub limit_monthly_credits_micro: i64,
259}
260
261/// Token usage reported by the provider.
262#[derive(Debug, Clone, Serialize, Deserialize)]
263pub struct UsageTokens {
264    pub input_tokens: u64,
265    pub output_tokens: u64,
266}
267
268/// Canonical usage event payload published via the outbox after finalization.
269///
270/// Single canonical type — both the outbox enqueuer (infra) and the plugin
271/// `publish_usage()` method use this same struct.
272#[derive(Debug, Clone, Serialize, Deserialize)]
273pub struct UsageEvent {
274    pub tenant_id: Uuid,
275    pub user_id: Uuid,
276    pub chat_id: Uuid,
277    pub turn_id: Uuid,
278    pub request_id: Uuid,
279    pub effective_model: String,
280    pub selected_model: String,
281    pub terminal_state: String,
282    pub billing_outcome: String,
283    pub usage: Option<UsageTokens>,
284    pub actual_credits_micro: i64,
285    pub settlement_method: String,
286    pub policy_version_applied: i64,
287    #[serde(with = "time::serde::rfc3339")]
288    pub timestamp: OffsetDateTime,
289}
290
291#[cfg(test)]
292mod tests {
293    use super::*;
294
295    // ── KillSwitches::default safety invariant ──
296    // All kill switches must default to false; a new field defaulting to true
297    // would accidentally disable functionality across all tenants.
298
299    #[test]
300    fn kill_switches_default_all_disabled() {
301        let ks = KillSwitches::default();
302        assert!(!ks.disable_premium_tier);
303        assert!(!ks.force_standard_tier);
304        assert!(!ks.disable_web_search);
305        assert!(!ks.disable_file_search);
306        assert!(!ks.disable_images);
307    }
308
309    // ── EstimationBudgets::default spec values ──
310    // These defaults are specified in DESIGN.md §B.5.2 and used as the
311    // ConfigMap fallback. Changing them silently would alter token estimation
312    // for every deployment that relies on defaults.
313
314    #[test]
315    fn estimation_budgets_default_matches_spec() {
316        let eb = EstimationBudgets::default();
317        assert_eq!(eb.bytes_per_token_conservative, 4);
318        assert_eq!(eb.fixed_overhead_tokens, 100);
319        assert_eq!(eb.safety_margin_pct, 10);
320        assert_eq!(eb.image_token_budget, 1000);
321        assert_eq!(eb.tool_surcharge_tokens, 500);
322        assert_eq!(eb.web_search_surcharge_tokens, 500);
323        assert_eq!(eb.minimal_generation_floor, 50);
324    }
325
326    // ── ModelGeneralConfig: serde(rename = "type") contract ──
327    // The upstream API sends `"type"` not `"config_type"`. If the rename
328    // attribute is removed, deserialization from the real API breaks.
329
330    fn sample_general_config() -> ModelGeneralConfig {
331        ModelGeneralConfig {
332            config_type: "model.general.v1".to_owned(),
333            tier: "premium".to_owned(),
334            available_from: OffsetDateTime::UNIX_EPOCH,
335            max_file_size_mb: 25,
336            api_params: ModelApiParams {
337                temperature: 0.7,
338                top_p: 1.0,
339                frequency_penalty: 0.0,
340                presence_penalty: 0.0,
341                stop: vec![],
342            },
343            features: ModelFeatures {
344                streaming: true,
345                function_calling: false,
346                structured_output: false,
347                fine_tuning: false,
348                distillation: false,
349                fim_completion: false,
350                chat_prefix_completion: false,
351            },
352            input_type: ModelInputType {
353                text: true,
354                image: false,
355                audio: false,
356                video: false,
357            },
358            tool_support: ModelToolSupport {
359                web_search: false,
360                file_search: false,
361                image_generation: false,
362                code_interpreter: false,
363                computer_use: false,
364                mcp: false,
365            },
366            supported_endpoints: ModelSupportedEndpoints {
367                chat_completions: true,
368                responses: false,
369                realtime: false,
370                assistants: false,
371                batch_api: false,
372                fine_tuning: false,
373                embeddings: false,
374                videos: false,
375                image_generation: false,
376                image_edit: false,
377                audio_speech_generation: false,
378                audio_transcription: false,
379                audio_translation: false,
380                moderations: false,
381                completions: false,
382            },
383            token_policy: ModelTokenPolicy {
384                input_tokens_credit_multiplier: 1.0,
385                output_tokens_credit_multiplier: 3.0,
386            },
387            performance: ModelPerformance {
388                response_latency_ms: 500,
389                speed_tokens_per_second: 100,
390            },
391        }
392    }
393
394    #[test]
395    fn general_config_serializes_type_not_config_type() {
396        let config = sample_general_config();
397        let json = serde_json::to_value(&config).unwrap();
398
399        assert!(json.get("type").is_some(), "expected JSON key 'type'");
400        assert!(
401            json.get("config_type").is_none(),
402            "config_type must not appear in JSON output"
403        );
404        assert_eq!(json["type"], "model.general.v1");
405    }
406
407    #[test]
408    fn general_config_serde_roundtrip_preserves_rename() {
409        let original = sample_general_config();
410        let json = serde_json::to_value(&original).unwrap();
411        let deserialized: ModelGeneralConfig = serde_json::from_value(json).unwrap();
412
413        assert_eq!(deserialized.config_type, original.config_type);
414        assert_eq!(deserialized.tier, original.tier);
415    }
416
417    // ── ModelTier serde representation ──
418    // Serializes as PascalCase ("Standard"/"Premium") for the UI/API.
419    // Accepts lowercase aliases for CCM/DESIGN compatibility.
420
421    #[test]
422    fn model_tier_serializes_as_pascal_case() {
423        let json = serde_json::to_value(ModelTier::Premium).unwrap();
424        assert_eq!(json, serde_json::json!("Premium"));
425
426        let json = serde_json::to_value(ModelTier::Standard).unwrap();
427        assert_eq!(json, serde_json::json!("Standard"));
428    }
429
430    #[test]
431    fn model_tier_deserializes_lowercase_aliases() {
432        let premium: ModelTier = serde_json::from_value(serde_json::json!("premium")).unwrap();
433        assert_eq!(premium, ModelTier::Premium);
434
435        let standard: ModelTier = serde_json::from_value(serde_json::json!("standard")).unwrap();
436        assert_eq!(standard, ModelTier::Standard);
437    }
438
439    #[test]
440    fn model_tier_rejects_unknown_casing() {
441        let result = serde_json::from_value::<ModelTier>(serde_json::json!("PREMIUM"));
442        assert!(result.is_err());
443    }
444
445    // ── KillSwitches serde roundtrip ──
446    // Verifies that enabled switches survive serialization and that
447    // the default (all-off) state roundtrips correctly.
448
449    #[test]
450    fn kill_switches_serde_roundtrip_with_enabled_switches() {
451        let ks = KillSwitches {
452            disable_premium_tier: true,
453            force_standard_tier: false,
454            disable_web_search: true,
455            disable_file_search: false,
456            disable_images: true,
457        };
458        let json = serde_json::to_value(&ks).unwrap();
459        let deserialized: KillSwitches = serde_json::from_value(json).unwrap();
460
461        assert!(deserialized.disable_premium_tier);
462        assert!(!deserialized.force_standard_tier);
463        assert!(deserialized.disable_web_search);
464        assert!(!deserialized.disable_file_search);
465        assert!(deserialized.disable_images);
466    }
467
468    #[test]
469    fn kill_switches_default_roundtrips_all_false() {
470        let ks = KillSwitches::default();
471        let json = serde_json::to_value(&ks).unwrap();
472        let deserialized: KillSwitches = serde_json::from_value(json).unwrap();
473
474        assert!(!deserialized.disable_premium_tier);
475        assert!(!deserialized.force_standard_tier);
476        assert!(!deserialized.disable_web_search);
477        assert!(!deserialized.disable_file_search);
478        assert!(!deserialized.disable_images);
479    }
480}