Skip to main content

mini_chat_sdk/
models.rs

1use serde::{Deserialize, Serialize};
2use time::OffsetDateTime;
3use uuid::Uuid;
4
5/// Current policy version metadata for a user.
6#[derive(Debug, Clone)]
7pub struct PolicyVersionInfo {
8    pub user_id: Uuid,
9    pub policy_version: u64,
10    pub generated_at: OffsetDateTime,
11}
12
13/// Full policy snapshot for a given version, including the model catalog
14/// and kill switches (API: `PolicyByVersionResponse`).
15#[derive(Debug, Clone)]
16pub struct PolicySnapshot {
17    pub user_id: Uuid,
18    pub policy_version: u64,
19    pub model_catalog: Vec<ModelCatalogEntry>,
20    pub kill_switches: KillSwitches,
21}
22
23/// Tenant-level kill switches from the policy snapshot.
24#[allow(clippy::struct_excessive_bools)]
25#[derive(Debug, Clone, Default, Serialize, Deserialize)]
26pub struct KillSwitches {
27    pub disable_premium_tier: bool,
28    pub force_standard_tier: bool,
29    pub disable_web_search: bool,
30    pub disable_file_search: bool,
31    pub disable_images: bool,
32}
33
34/// A single model in the catalog (API: `PolicyModelCatalogItem`).
35#[derive(Debug, Clone, Serialize, Deserialize)]
36pub struct ModelCatalogEntry {
37    /// Provider-level model identifier (e.g. "gpt-4").
38    pub model_id: String,
39    /// The model ID on the provider side (e.g., `"gpt-5.2"` for `OpenAI`,
40    /// `"claude-opus-4-6"` for Anthropic). Sent in LLM API requests.
41    pub provider_model_id: String,
42    /// Display name shown in UI (may differ from `name`).
43    pub display_name: String,
44    /// Short description of the model.
45    pub description: String,
46    /// Model version string.
47    pub version: String,
48    /// LLM provider CTI identifier.
49    pub provider_id: String,
50    /// Routing identifier for provider resolution. Maps to a key in
51    /// `MiniChatConfig.providers`. Values: `"openai"`, `"azure_openai"`.
52    pub provider_display_name: String,
53    /// URL to model icon.
54    pub icon: String,
55    /// Model tier (standard or premium).
56    pub tier: ModelTier,
57    pub enabled: bool,
58    /// Multimodal capability flags, e.g. `VISION_INPUT`, `IMAGE_GENERATION`.
59    pub multimodal_capabilities: Vec<String>,
60    /// Maximum context window size in tokens.
61    pub context_window: u32,
62    /// Maximum output tokens the model can generate.
63    pub max_output_tokens: u32,
64    /// Maximum input tokens per request.
65    pub max_input_tokens: u32,
66    /// Credit multiplier for input tokens (micro-credits per 1000 tokens).
67    pub input_tokens_credit_multiplier_micro: u64,
68    /// Credit multiplier for output tokens (micro-credits per 1000 tokens).
69    pub output_tokens_credit_multiplier_micro: u64,
70    /// Human-readable multiplier display string (e.g. "1x", "3x").
71    pub multiplier_display: String,
72    /// Per-model token estimation budgets for preflight reserve.
73    pub estimation_budgets: EstimationBudgets,
74    /// Top-k chunks returned by similarity search per `file_search` call.
75    pub max_retrieved_chunks_per_turn: u32,
76    /// Full general config captured at snapshot time.
77    pub general_config: ModelGeneralConfig,
78    /// Tenant preference settings captured at snapshot time.
79    pub preference: ModelPreference,
80    /// System prompt sent as `instructions` in every LLM request for this model.
81    /// Empty string = no system instructions.
82    #[serde(default)]
83    pub system_prompt: String,
84    /// Prompt template used when generating thread summaries for this model.
85    /// Plumbed through the stack for future use by the summary generation job.
86    #[serde(default)]
87    pub thread_summary_prompt: String,
88}
89
90/// Per-model token estimation budget parameters (API: `PolicyModelEstimationBudgets`).
91#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
92pub struct EstimationBudgets {
93    /// Conservative bytes-per-token ratio for text estimation.
94    pub bytes_per_token_conservative: u32,
95    /// Constant overhead for protocol/framing tokens.
96    pub fixed_overhead_tokens: u32,
97    /// Percentage safety margin applied to text estimation (e.g. 10 means 10%).
98    pub safety_margin_pct: u32,
99    /// Tokens per image for vision surcharge.
100    pub image_token_budget: u32,
101    /// Fixed token overhead when `file_search` tool is included.
102    pub tool_surcharge_tokens: u32,
103    /// Fixed token overhead when `web_search` is enabled.
104    pub web_search_surcharge_tokens: u32,
105    /// Minimum generation token budget guaranteed regardless of input estimates.
106    pub minimal_generation_floor: u32,
107}
108
109impl Default for EstimationBudgets {
110    fn default() -> Self {
111        Self {
112            bytes_per_token_conservative: 4,
113            fixed_overhead_tokens: 100,
114            safety_margin_pct: 10,
115            image_token_budget: 1000,
116            tool_surcharge_tokens: 500,
117            web_search_surcharge_tokens: 500,
118            minimal_generation_floor: 50,
119        }
120    }
121}
122
123/// LLM API inference parameters (API: `PolicyModelApiParams`).
124#[derive(Debug, Clone, Serialize, Deserialize)]
125pub struct ModelApiParams {
126    pub temperature: f64,
127    pub top_p: f64,
128    pub frequency_penalty: f64,
129    pub presence_penalty: f64,
130    pub stop: Vec<String>,
131}
132
133/// Feature capability flags (API: `PolicyModelFeatures`).
134#[allow(clippy::struct_excessive_bools)]
135#[derive(Debug, Clone, Serialize, Deserialize)]
136pub struct ModelFeatures {
137    pub streaming: bool,
138    pub function_calling: bool,
139    pub structured_output: bool,
140    pub fine_tuning: bool,
141    pub distillation: bool,
142    pub fim_completion: bool,
143    pub chat_prefix_completion: bool,
144}
145
146/// Supported input modalities (API: `PolicyModelInputType`).
147#[allow(clippy::struct_excessive_bools)]
148#[derive(Debug, Clone, Serialize, Deserialize)]
149pub struct ModelInputType {
150    pub text: bool,
151    pub image: bool,
152    pub audio: bool,
153    pub video: bool,
154}
155
156/// Tool support flags (API: `PolicyModelToolSupport`).
157#[allow(clippy::struct_excessive_bools)]
158#[derive(Debug, Clone, Serialize, Deserialize)]
159pub struct ModelToolSupport {
160    pub web_search: bool,
161    pub file_search: bool,
162    pub image_generation: bool,
163    pub code_interpreter: bool,
164    pub computer_use: bool,
165    pub mcp: bool,
166}
167
168/// Supported API endpoints (API: `PolicyModelSupportedEndpoints`).
169#[allow(clippy::struct_excessive_bools)]
170#[derive(Debug, Clone, Serialize, Deserialize)]
171pub struct ModelSupportedEndpoints {
172    pub chat_completions: bool,
173    pub responses: bool,
174    pub realtime: bool,
175    pub assistants: bool,
176    pub batch_api: bool,
177    pub fine_tuning: bool,
178    pub embeddings: bool,
179    pub videos: bool,
180    pub image_generation: bool,
181    pub image_edit: bool,
182    pub audio_speech_generation: bool,
183    pub audio_transcription: bool,
184    pub audio_translation: bool,
185    pub moderations: bool,
186    pub completions: bool,
187}
188
189/// Token credit multipliers (API: `PolicyModelTokenPolicy`).
190#[derive(Debug, Clone, Serialize, Deserialize)]
191pub struct ModelTokenPolicy {
192    pub input_tokens_credit_multiplier: f64,
193    pub output_tokens_credit_multiplier: f64,
194}
195
196/// Estimated performance characteristics (API: `PolicyModelPerformance`).
197#[derive(Debug, Clone, Serialize, Deserialize)]
198pub struct ModelPerformance {
199    pub response_latency_ms: u32,
200    pub speed_tokens_per_second: u32,
201}
202
203/// General configuration from Settings Service (API: `PolicyModelGeneralConfig`).
204#[derive(Debug, Clone, Serialize, Deserialize)]
205pub struct ModelGeneralConfig {
206    /// CTI type identifier of the config.
207    #[serde(rename = "type")]
208    pub config_type: String,
209    /// Model tier CTI identifier.
210    pub tier: String,
211    #[serde(with = "time::serde::rfc3339")]
212    pub available_from: OffsetDateTime,
213    pub max_file_size_mb: u32,
214    pub api_params: ModelApiParams,
215    pub features: ModelFeatures,
216    pub input_type: ModelInputType,
217    pub tool_support: ModelToolSupport,
218    pub supported_endpoints: ModelSupportedEndpoints,
219    pub token_policy: ModelTokenPolicy,
220    pub performance: ModelPerformance,
221}
222
223/// Per-tenant preference settings (API: `PolicyModelPreference`).
224#[derive(Debug, Clone, Serialize, Deserialize)]
225pub struct ModelPreference {
226    pub is_default: bool,
227    /// Display order in the UI.
228    pub sort_order: i32,
229}
230
231/// Model pricing/capability tier.
232///
233/// Serializes as `"Standard"` / `"Premium"` (`PascalCase`).
234/// Accepts lowercase aliases (`"standard"`, `"premium"`) on deserialization
235/// for compatibility with CCM and DESIGN maps.
236#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
237pub enum ModelTier {
238    #[serde(alias = "standard")]
239    Standard,
240    #[serde(alias = "premium")]
241    Premium,
242}
243
244/// Whether a user holds an active `CyberChat` license (API: `CheckUserLicenseResponse`).
245#[derive(Debug, Clone, Serialize, Deserialize)]
246pub struct UserLicenseStatus {
247    /// `true` if the user's status is `active` in the `active_users` table for this tenant.
248    /// `false` if the user is not found, or has status `invited`, `deactivated`, or `deleted`.
249    pub active: bool,
250}
251
252/// Per-user credit allocations for a specific policy version.
253/// NOT part of the immutable shared `PolicySnapshot` (DESIGN.md §5.2.6).
254#[derive(Debug, Clone, Serialize, Deserialize)]
255pub struct UserLimits {
256    pub user_id: Uuid,
257    pub policy_version: u64,
258    pub standard: TierLimits,
259    pub premium: TierLimits,
260}
261
262/// Credit limits for a single tier within a billing period.
263#[derive(Debug, Clone, Serialize, Deserialize)]
264pub struct TierLimits {
265    pub limit_daily_credits_micro: i64,
266    pub limit_monthly_credits_micro: i64,
267}
268
269/// Token usage reported by the provider.
270#[derive(Debug, Clone, Serialize, Deserialize)]
271pub struct UsageTokens {
272    pub input_tokens: u64,
273    pub output_tokens: u64,
274}
275
276/// Canonical usage event payload published via the outbox after finalization.
277///
278/// Single canonical type — both the outbox enqueuer (infra) and the plugin
279/// `publish_usage()` method use this same struct.
280#[derive(Debug, Clone, Serialize, Deserialize)]
281pub struct UsageEvent {
282    pub tenant_id: Uuid,
283    pub user_id: Uuid,
284    pub chat_id: Uuid,
285    pub turn_id: Uuid,
286    pub request_id: Uuid,
287    pub effective_model: String,
288    pub selected_model: String,
289    pub terminal_state: String,
290    pub billing_outcome: String,
291    pub usage: Option<UsageTokens>,
292    pub actual_credits_micro: i64,
293    pub settlement_method: String,
294    pub policy_version_applied: i64,
295    #[serde(with = "time::serde::rfc3339")]
296    pub timestamp: OffsetDateTime,
297}
298
299#[cfg(test)]
300mod tests {
301    use super::*;
302
303    // ── KillSwitches::default safety invariant ──
304    // All kill switches must default to false; a new field defaulting to true
305    // would accidentally disable functionality across all tenants.
306
307    #[test]
308    fn kill_switches_default_all_disabled() {
309        let ks = KillSwitches::default();
310        assert!(!ks.disable_premium_tier);
311        assert!(!ks.force_standard_tier);
312        assert!(!ks.disable_web_search);
313        assert!(!ks.disable_file_search);
314        assert!(!ks.disable_images);
315    }
316
317    // ── EstimationBudgets::default spec values ──
318    // These defaults are specified in DESIGN.md §B.5.2 and used as the
319    // ConfigMap fallback. Changing them silently would alter token estimation
320    // for every deployment that relies on defaults.
321
322    #[test]
323    fn estimation_budgets_default_matches_spec() {
324        let eb = EstimationBudgets::default();
325        assert_eq!(eb.bytes_per_token_conservative, 4);
326        assert_eq!(eb.fixed_overhead_tokens, 100);
327        assert_eq!(eb.safety_margin_pct, 10);
328        assert_eq!(eb.image_token_budget, 1000);
329        assert_eq!(eb.tool_surcharge_tokens, 500);
330        assert_eq!(eb.web_search_surcharge_tokens, 500);
331        assert_eq!(eb.minimal_generation_floor, 50);
332    }
333
334    // ── ModelGeneralConfig: serde(rename = "type") contract ──
335    // The upstream API sends `"type"` not `"config_type"`. If the rename
336    // attribute is removed, deserialization from the real API breaks.
337
338    fn sample_catalog_entry() -> ModelCatalogEntry {
339        ModelCatalogEntry {
340            model_id: "test-model".to_owned(),
341            provider_model_id: "test-model-v1".to_owned(),
342            display_name: "Test Model".to_owned(),
343            description: String::new(),
344            version: String::new(),
345            provider_id: "default".to_owned(),
346            provider_display_name: "Default".to_owned(),
347            icon: String::new(),
348            tier: ModelTier::Standard,
349            enabled: true,
350            multimodal_capabilities: vec![],
351            context_window: 128_000,
352            max_output_tokens: 16_384,
353            max_input_tokens: 128_000,
354            input_tokens_credit_multiplier_micro: 1_000_000,
355            output_tokens_credit_multiplier_micro: 3_000_000,
356            multiplier_display: "1x".to_owned(),
357            estimation_budgets: EstimationBudgets::default(),
358            max_retrieved_chunks_per_turn: 5,
359            general_config: sample_general_config(),
360            preference: ModelPreference {
361                is_default: false,
362                sort_order: 0,
363            },
364            system_prompt: String::new(),
365            thread_summary_prompt: String::new(),
366        }
367    }
368
369    fn sample_general_config() -> ModelGeneralConfig {
370        ModelGeneralConfig {
371            config_type: "model.general.v1".to_owned(),
372            tier: "premium".to_owned(),
373            available_from: OffsetDateTime::UNIX_EPOCH,
374            max_file_size_mb: 25,
375            api_params: ModelApiParams {
376                temperature: 0.7,
377                top_p: 1.0,
378                frequency_penalty: 0.0,
379                presence_penalty: 0.0,
380                stop: vec![],
381            },
382            features: ModelFeatures {
383                streaming: true,
384                function_calling: false,
385                structured_output: false,
386                fine_tuning: false,
387                distillation: false,
388                fim_completion: false,
389                chat_prefix_completion: false,
390            },
391            input_type: ModelInputType {
392                text: true,
393                image: false,
394                audio: false,
395                video: false,
396            },
397            tool_support: ModelToolSupport {
398                web_search: false,
399                file_search: false,
400                image_generation: false,
401                code_interpreter: false,
402                computer_use: false,
403                mcp: false,
404            },
405            supported_endpoints: ModelSupportedEndpoints {
406                chat_completions: true,
407                responses: false,
408                realtime: false,
409                assistants: false,
410                batch_api: false,
411                fine_tuning: false,
412                embeddings: false,
413                videos: false,
414                image_generation: false,
415                image_edit: false,
416                audio_speech_generation: false,
417                audio_transcription: false,
418                audio_translation: false,
419                moderations: false,
420                completions: false,
421            },
422            token_policy: ModelTokenPolicy {
423                input_tokens_credit_multiplier: 1.0,
424                output_tokens_credit_multiplier: 3.0,
425            },
426            performance: ModelPerformance {
427                response_latency_ms: 500,
428                speed_tokens_per_second: 100,
429            },
430        }
431    }
432
433    #[test]
434    fn general_config_serializes_type_not_config_type() {
435        let config = sample_general_config();
436        let json = serde_json::to_value(&config).unwrap();
437
438        assert!(json.get("type").is_some(), "expected JSON key 'type'");
439        assert!(
440            json.get("config_type").is_none(),
441            "config_type must not appear in JSON output"
442        );
443        assert_eq!(json["type"], "model.general.v1");
444    }
445
446    #[test]
447    fn general_config_serde_roundtrip_preserves_rename() {
448        let original = sample_general_config();
449        let json = serde_json::to_value(&original).unwrap();
450        let deserialized: ModelGeneralConfig = serde_json::from_value(json).unwrap();
451
452        assert_eq!(deserialized.config_type, original.config_type);
453        assert_eq!(deserialized.tier, original.tier);
454    }
455
456    // ── ModelCatalogEntry: system_prompt serde contract ──
457    // `system_prompt` defaults to empty string when absent in JSON input.
458
459    #[test]
460    fn system_prompt_absent_in_json_deserializes_to_empty() {
461        let mut json = serde_json::to_value(sample_catalog_entry()).unwrap();
462        json.as_object_mut().unwrap().remove("system_prompt");
463
464        let entry: ModelCatalogEntry = serde_json::from_value(json).unwrap();
465        assert!(
466            entry.system_prompt.is_empty(),
467            "missing system_prompt must deserialize to empty string"
468        );
469    }
470
471    #[test]
472    fn system_prompt_roundtrips() {
473        let mut entry = sample_catalog_entry();
474        entry.system_prompt = "You are a helpful assistant.".to_owned();
475
476        let json = serde_json::to_value(&entry).unwrap();
477        assert_eq!(json["system_prompt"], "You are a helpful assistant.");
478
479        let deserialized: ModelCatalogEntry = serde_json::from_value(json).unwrap();
480        assert_eq!(deserialized.system_prompt, "You are a helpful assistant.");
481    }
482
483    // ── ModelTier serde representation ──
484    // Serializes as PascalCase ("Standard"/"Premium") for the UI/API.
485    // Accepts lowercase aliases for CCM/DESIGN compatibility.
486
487    #[test]
488    fn model_tier_serializes_as_pascal_case() {
489        let json = serde_json::to_value(ModelTier::Premium).unwrap();
490        assert_eq!(json, serde_json::json!("Premium"));
491
492        let json = serde_json::to_value(ModelTier::Standard).unwrap();
493        assert_eq!(json, serde_json::json!("Standard"));
494    }
495
496    #[test]
497    fn model_tier_deserializes_lowercase_aliases() {
498        let premium: ModelTier = serde_json::from_value(serde_json::json!("premium")).unwrap();
499        assert_eq!(premium, ModelTier::Premium);
500
501        let standard: ModelTier = serde_json::from_value(serde_json::json!("standard")).unwrap();
502        assert_eq!(standard, ModelTier::Standard);
503    }
504
505    #[test]
506    fn model_tier_rejects_unknown_casing() {
507        let result = serde_json::from_value::<ModelTier>(serde_json::json!("PREMIUM"));
508        assert!(result.is_err());
509    }
510
511    // ── KillSwitches serde roundtrip ──
512    // Verifies that enabled switches survive serialization and that
513    // the default (all-off) state roundtrips correctly.
514
515    #[test]
516    fn kill_switches_serde_roundtrip_with_enabled_switches() {
517        let ks = KillSwitches {
518            disable_premium_tier: true,
519            force_standard_tier: false,
520            disable_web_search: true,
521            disable_file_search: false,
522            disable_images: true,
523        };
524        let json = serde_json::to_value(&ks).unwrap();
525        let deserialized: KillSwitches = serde_json::from_value(json).unwrap();
526
527        assert!(deserialized.disable_premium_tier);
528        assert!(!deserialized.force_standard_tier);
529        assert!(deserialized.disable_web_search);
530        assert!(!deserialized.disable_file_search);
531        assert!(deserialized.disable_images);
532    }
533
534    #[test]
535    fn kill_switches_default_roundtrips_all_false() {
536        let ks = KillSwitches::default();
537        let json = serde_json::to_value(&ks).unwrap();
538        let deserialized: KillSwitches = serde_json::from_value(json).unwrap();
539
540        assert!(!deserialized.disable_premium_tier);
541        assert!(!deserialized.force_standard_tier);
542        assert!(!deserialized.disable_web_search);
543        assert!(!deserialized.disable_file_search);
544        assert!(!deserialized.disable_images);
545    }
546}