katu-llm 0.1.0

LLM abstraction layer for the Katu AI Agent framework
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
//! # katu_llm::model
//!
//! ## 职责
//! 定义模型引用 (`ModelRef`) 及其组成类型:能力描述、限制、定价、思考配置等。
//!
//! ## 对外接口
//! - `ModelRef` — 可执行的模型引用(身份 + 连接 + 能力 + 默认参数)
//! - `ModelLimits` — token 上限
//! - `ModelPricing` — 费率定义
//! - `ModelCapabilities` — 功能标志
//! - `InputModality` — 支持的输入模态
//! - `ThinkingMode` — 思考控制模式
//! - `ThinkingConfig` — 思考能力配置
//! - `ReasoningEffort` — 推理强度级别

use std::collections::HashMap;

use serde::{Deserialize, Serialize};

use katu_core::{ModelId, ProviderId, RouteId};

use crate::cache::CachePolicy;
use katu_core::GenerationOptions;
use crate::http::HttpOptions;

// ---------------------------------------------------------------------------
// InputModality
// ---------------------------------------------------------------------------

/// 模型支持的输入模态。
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum InputModality {
    /// 文本输入
    Text,
    /// 图像输入
    Image,
    /// 音频输入
    Audio,
    /// 视频输入
    Video,
}

// ---------------------------------------------------------------------------
// ReasoningEffort
// ---------------------------------------------------------------------------

/// 推理强度级别。
///
/// 对应 OpenAI `reasoning_effort` 和 Anthropic `thinking` 级别的统一抽象。
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum ReasoningEffort {
    None,
    Low,
    Medium,
    High,
    XHigh,
    Max
}

// ---------------------------------------------------------------------------
// ThinkingMode
// ---------------------------------------------------------------------------

/// 思考/推理的控制模式。
///
/// 不同 provider 使用不同机制控制推理行为:
/// - Anthropic: adaptive 或 budget(指定 token 预算)
/// - OpenAI: effort 级别(low/medium/high)
/// - 其他 provider: 可能不支持
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum ThinkingMode {
    /// 自适应思考(Anthropic adaptive thinking)
    Adaptive,
    /// Budget 模式(指定 token 预算上限)
    Budget,
    /// Effort 级别模式(OpenAI reasoning_effort)
    Effort,
}

// ---------------------------------------------------------------------------
// ThinkingConfig
// ---------------------------------------------------------------------------

/// 模型的思考/推理能力配置。
///
/// 描述模型如何支持推理,以及推理的控制参数。
/// 仅在 `ModelCapabilities::thinking` 为 `Some` 时有意义。
///
/// # Examples
/// ```
/// use katu_llm::model::{ThinkingConfig, ThinkingMode, ReasoningEffort};
///
/// // Anthropic adaptive
/// let config = ThinkingConfig {
///     mode: ThinkingMode::Adaptive,
///     default_budget: None,
///     min_effort: None,
///     max_effort: None,
/// };
///
/// // OpenAI effort-based
/// let config = ThinkingConfig {
///     mode: ThinkingMode::Effort,
///     default_budget: None,
///     min_effort: Some(ReasoningEffort::Low),
///     max_effort: Some(ReasoningEffort::High),
/// };
/// ```
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct ThinkingConfig {
    /// 思考控制模式
    pub mode: ThinkingMode,
    /// 默认思考 token 预算(仅 Budget 模式有意义)
    pub default_budget: Option<u32>,
    /// 支持的最低 effort 级别
    pub min_effort: Option<ReasoningEffort>,
    /// 支持的最高 effort 级别
    pub max_effort: Option<ReasoningEffort>,
}

// ---------------------------------------------------------------------------
// ModelCapabilities
// ---------------------------------------------------------------------------

/// 模型功能标志。
///
/// 描述模型支持哪些特性,供 Agent loop 和 Provider 适配层参考。
///
/// # Examples
/// ```
/// use katu_llm::model::{ModelCapabilities, InputModality};
///
/// let caps = ModelCapabilities {
///     input_modalities: vec![InputModality::Text, InputModality::Image],
///     tool_calls: true,
///     streaming_tool_input: true,
///     structured_output: true,
///     prompt_caching: true,
///     thinking: None,
/// };
/// assert!(caps.supports_modality(InputModality::Image));
/// ```
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct ModelCapabilities {
    /// 支持的输入模态列表
    pub input_modalities: Vec<InputModality>,
    /// 是否支持工具调用
    pub tool_calls: bool,
    /// 是否支持流式工具参数输入
    pub streaming_tool_input: bool,
    /// 是否支持结构化输出(JSON mode / response_format)
    pub structured_output: bool,
    /// 是否支持 prompt caching
    pub prompt_caching: bool,
    /// 思考/推理能力配置,`None` 表示不支持
    pub thinking: Option<ThinkingConfig>,
}

impl ModelCapabilities {
    /// 检查是否支持指定输入模态。
    pub fn supports_modality(&self, modality: InputModality) -> bool {
        self.input_modalities.contains(&modality)
    }

    /// 检查是否支持推理/思考。
    pub fn supports_thinking(&self) -> bool {
        self.thinking.is_some()
    }
}

impl Default for ModelCapabilities {
    fn default() -> Self {
        Self {
            input_modalities: vec![InputModality::Text],
            tool_calls: true,
            streaming_tool_input: false,
            structured_output: false,
            prompt_caching: false,
            thinking: None,
        }
    }
}

// ---------------------------------------------------------------------------
// ModelLimits
// ---------------------------------------------------------------------------

/// 模型 token 上限。
///
/// # Examples
/// ```
/// use katu_llm::ModelLimits;
///
/// let limits = ModelLimits {
///     context_window: 200_000,
///     max_output_tokens: 8192,
/// };
/// ```
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct ModelLimits {
    /// 上下文窗口大小(input + output 总量上限)
    pub context_window: u32,
    /// 最大输出 token 数
    pub max_output_tokens: u32,
}

// ---------------------------------------------------------------------------
// ModelPricing
// ---------------------------------------------------------------------------

/// 模型费率定义(单位:美元 / 百万 token)。
///
/// 用于从 `Usage` 计算 `Cost`。
///
/// # Examples
/// ```
/// use katu_llm::ModelPricing;
///
/// let pricing = ModelPricing {
///     input: 3.0,        // $3 / M input tokens
///     output: 15.0,      // $15 / M output tokens
///     cache_read: 0.30,  // $0.30 / M cache read tokens
///     cache_write: 3.75, // $3.75 / M cache write tokens
/// };
/// ```
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct ModelPricing {
    /// 输入 token 费率($/M tokens)
    pub input: f64,
    /// 输出 token 费率($/M tokens)
    pub output: f64,
    /// 缓存读取费率($/M tokens)
    pub cache_read: f64,
    /// 缓存写入费率($/M tokens)
    pub cache_write: f64,
}

// ---------------------------------------------------------------------------
// ModelRef
// ---------------------------------------------------------------------------

/// 可执行的模型引用。
///
/// `ModelRef` 是从"选择模型"到"发出请求"所需全部信息的聚合体:
/// - **身份**:provider/model/route 三元组 + 可选显示名
/// - **连接**:base_url, api_key, 额外 headers/query_params
/// - **能力**:token 限制、功能标志、输入模态、思考配置
/// - **默认参数**:生成选项、缓存策略(被 Request 级覆盖)
/// - **定价**:用于 Usage → Cost 计算
/// - **Provider 私有**:非标选项(如 Bedrock region, Vertex project_id)
///
/// # 参数合并链
/// ```text
/// LlmRequest.generation > Agent 配置 > ModelRef.generation > Route defaults
/// ```
///
/// # Examples
/// ```
/// use katu_core::{ModelId, ProviderId, RouteId};
/// use katu_llm::model::*;
/// use katu_llm::GenerationOptions;
///
/// let model = ModelRef::new(
///     ModelId::new("claude-sonnet-4-20250514"),
///     ProviderId::new("anthropic"),
///     RouteId::new("anthropic-messages"),
///     "https://api.anthropic.com/v1",
///     ModelLimits {
///         context_window: 200_000,
///         max_output_tokens: 8192,
///     },
/// )
/// .with_display_name("Claude Sonnet 4")
/// .with_api_key("sk-ant-xxx")
/// .with_capabilities(ModelCapabilities {
///     input_modalities: vec![InputModality::Text, InputModality::Image],
///     tool_calls: true,
///     streaming_tool_input: true,
///     structured_output: false,
///     prompt_caching: true,
///     thinking: Some(ThinkingConfig {
///         mode: ThinkingMode::Adaptive,
///         default_budget: None,
///         min_effort: None,
///         max_effort: None,
///     }),
/// })
/// .with_pricing(ModelPricing {
///     input: 3.0,
///     output: 15.0,
///     cache_read: 0.30,
///     cache_write: 3.75,
/// })
/// .with_generation(GenerationOptions::new().with_max_tokens(4096));
///
/// assert_eq!(model.id.as_str(), "claude-sonnet-4-20250514");
/// assert!(model.capabilities.supports_thinking());
/// ```
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct ModelRef {
    // ─── 身份标识 ───

    /// 模型 ID(发送给 API 的 wire 值)
    pub id: ModelId,
    /// Provider 标识
    pub provider: ProviderId,
    /// 路由标识(决定使用哪个 Protocol 转换器)
    pub route: RouteId,
    /// 人类可读名称
    #[serde(skip_serializing_if = "Option::is_none")]
    pub display_name: Option<String>,

    // ─── 连接信息 ───

    /// API Base URL
    pub base_url: String,
    /// API Key
    #[serde(skip_serializing_if = "Option::is_none")]
    pub api_key: Option<String>,
    /// 额外的固定请求头
    #[serde(skip_serializing_if = "Option::is_none")]
    pub headers: Option<HashMap<String, String>>,
    /// URL 查询参数(如 Azure api-version)
    #[serde(skip_serializing_if = "Option::is_none")]
    pub query_params: Option<HashMap<String, String>>,

    // ─── 能力与限制 ───

    /// Token 上限
    pub limits: ModelLimits,
    /// 功能标志
    pub capabilities: ModelCapabilities,

    // ─── 默认参数 ───

    /// 模型级默认生成参数(被 Request 级覆盖)
    #[serde(skip_serializing_if = "Option::is_none")]
    pub generation: Option<GenerationOptions>,
    /// 默认缓存策略
    #[serde(skip_serializing_if = "Option::is_none")]
    pub cache_policy: Option<CachePolicy>,

    // ─── 定价 ───

    /// 费率(用于 Usage → Cost 计算)
    #[serde(skip_serializing_if = "Option::is_none")]
    pub pricing: Option<ModelPricing>,

    // ─── Provider 私有 ───

    /// Provider 特有的非标选项
    #[serde(skip_serializing_if = "Option::is_none")]
    pub provider_options: Option<serde_json::Value>,
    /// HTTP 传输层覆写
    #[serde(skip_serializing_if = "Option::is_none")]
    pub http: Option<HttpOptions>,
}

impl ModelRef {
    /// 创建一个 ModelRef,仅包含必需字段。
    pub fn new(
        id: ModelId,
        provider: ProviderId,
        route: RouteId,
        base_url: impl Into<String>,
        limits: ModelLimits,
    ) -> Self {
        Self {
            id,
            provider,
            route,
            display_name: None,
            base_url: base_url.into(),
            api_key: None,
            headers: None,
            query_params: None,
            limits,
            capabilities: ModelCapabilities::default(),
            generation: None,
            cache_policy: None,
            pricing: None,
            provider_options: None,
            http: None,
        }
    }

    /// 设置显示名称。
    pub fn with_display_name(mut self, name: impl Into<String>) -> Self {
        self.display_name = Some(name.into());
        self
    }

    /// 设置 API key。
    pub fn with_api_key(mut self, key: impl Into<String>) -> Self {
        self.api_key = Some(key.into());
        self
    }

    /// 添加一个额外请求头。
    pub fn with_header(mut self, key: impl Into<String>, value: impl Into<String>) -> Self {
        self.headers
            .get_or_insert_with(HashMap::new)
            .insert(key.into(), value.into());
        self
    }

    /// 添加一个查询参数。
    pub fn with_query_param(mut self, key: impl Into<String>, value: impl Into<String>) -> Self {
        self.query_params
            .get_or_insert_with(HashMap::new)
            .insert(key.into(), value.into());
        self
    }

    /// 设置模型能力。
    pub fn with_capabilities(mut self, capabilities: ModelCapabilities) -> Self {
        self.capabilities = capabilities;
        self
    }

    /// 设置默认生成参数。
    pub fn with_generation(mut self, generation: GenerationOptions) -> Self {
        self.generation = Some(generation);
        self
    }

    /// 设置缓存策略。
    pub fn with_cache_policy(mut self, policy: CachePolicy) -> Self {
        self.cache_policy = Some(policy);
        self
    }

    /// 设置定价。
    pub fn with_pricing(mut self, pricing: ModelPricing) -> Self {
        self.pricing = Some(pricing);
        self
    }

    /// 设置 provider 私有选项。
    pub fn with_provider_options(mut self, options: serde_json::Value) -> Self {
        self.provider_options = Some(options);
        self
    }

    /// 设置 HTTP 覆写选项。
    pub fn with_http(mut self, http: HttpOptions) -> Self {
        self.http = Some(http);
        self
    }
}

// ---------------------------------------------------------------------------
// Tests
// ---------------------------------------------------------------------------

#[cfg(test)]
mod tests {
    use super::*;

    fn sample_model() -> ModelRef {
        ModelRef::new(
            ModelId::new("claude-sonnet-4-20250514"),
            ProviderId::new("anthropic"),
            RouteId::new("anthropic-messages"),
            "https://api.anthropic.com/v1",
            ModelLimits {
                context_window: 200_000,
                max_output_tokens: 8192,
            },
        )
    }

    #[test]
    fn test_new_has_required_fields() {
        let m = sample_model();
        assert_eq!(m.id.as_str(), "claude-sonnet-4-20250514");
        assert_eq!(m.provider.as_str(), "anthropic");
        assert_eq!(m.route.as_str(), "anthropic-messages");
        assert_eq!(m.base_url, "https://api.anthropic.com/v1");
        assert_eq!(m.limits.context_window, 200_000);
        assert_eq!(m.limits.max_output_tokens, 8192);
    }

    #[test]
    fn test_new_optional_fields_are_none() {
        let m = sample_model();
        assert_eq!(m.display_name, None);
        assert_eq!(m.api_key, None);
        assert_eq!(m.headers, None);
        assert_eq!(m.generation, None);
        assert_eq!(m.pricing, None);
        assert_eq!(m.provider_options, None);
        assert_eq!(m.http, None);
    }

    #[test]
    fn test_builder_chain() {
        let m = sample_model()
            .with_display_name("Claude Sonnet 4")
            .with_api_key("sk-ant-xxx")
            .with_header("x-custom", "value")
            .with_query_param("version", "1")
            .with_generation(GenerationOptions::new().with_max_tokens(4096))
            .with_cache_policy(CachePolicy::Auto)
            .with_pricing(ModelPricing {
                input: 3.0,
                output: 15.0,
                cache_read: 0.30,
                cache_write: 3.75,
            });

        assert_eq!(m.display_name.as_deref(), Some("Claude Sonnet 4"));
        assert_eq!(m.api_key.as_deref(), Some("sk-ant-xxx"));
        assert_eq!(
            m.headers.as_ref().unwrap().get("x-custom").unwrap(),
            "value"
        );
        assert_eq!(
            m.generation.as_ref().unwrap().max_tokens,
            Some(4096)
        );
        assert_eq!(m.pricing.as_ref().unwrap().input, 3.0);
    }

    #[test]
    fn test_capabilities_default() {
        let m = sample_model();
        assert!(m.capabilities.supports_modality(InputModality::Text));
        assert!(!m.capabilities.supports_modality(InputModality::Image));
        assert!(m.capabilities.tool_calls);
        assert!(!m.capabilities.supports_thinking());
    }

    #[test]
    fn test_capabilities_with_thinking() {
        let m = sample_model().with_capabilities(ModelCapabilities {
            input_modalities: vec![InputModality::Text, InputModality::Image],
            tool_calls: true,
            streaming_tool_input: true,
            structured_output: false,
            prompt_caching: true,
            thinking: Some(ThinkingConfig {
                mode: ThinkingMode::Adaptive,
                default_budget: None,
                min_effort: None,
                max_effort: None,
            }),
        });

        assert!(m.capabilities.supports_thinking());
        assert!(m.capabilities.supports_modality(InputModality::Image));
        assert!(m.capabilities.streaming_tool_input);
    }

    #[test]
    fn test_serde_roundtrip_minimal() {
        let m = sample_model();
        let json = serde_json::to_string(&m).unwrap();
        let restored: ModelRef = serde_json::from_str(&json).unwrap();
        assert_eq!(m.id, restored.id);
        assert_eq!(m.provider, restored.provider);
        assert_eq!(m.limits, restored.limits);
    }

    #[test]
    fn test_serde_roundtrip_full() {
        let m = sample_model()
            .with_display_name("Claude Sonnet 4")
            .with_api_key("sk-test")
            .with_capabilities(ModelCapabilities {
                input_modalities: vec![InputModality::Text, InputModality::Image],
                tool_calls: true,
                streaming_tool_input: true,
                structured_output: true,
                prompt_caching: true,
                thinking: Some(ThinkingConfig {
                    mode: ThinkingMode::Budget,
                    default_budget: Some(10000),
                    min_effort: Some(ReasoningEffort::Low),
                    max_effort: Some(ReasoningEffort::High),
                }),
            })
            .with_generation(GenerationOptions::new().with_max_tokens(4096).with_temperature(0.7))
            .with_cache_policy(CachePolicy::Auto)
            .with_pricing(ModelPricing {
                input: 3.0,
                output: 15.0,
                cache_read: 0.30,
                cache_write: 3.75,
            })
            .with_provider_options(serde_json::json!({"region": "us-east-1"}))
            .with_http(HttpOptions::new().with_header("x-extra", "val"));

        let json = serde_json::to_string_pretty(&m).unwrap();
        let restored: ModelRef = serde_json::from_str(&json).unwrap();
        assert_eq!(m, restored);
    }

    #[test]
    fn test_serde_skips_none_fields() {
        let m = sample_model();
        let json = serde_json::to_string(&m).unwrap();
        // None fields with skip_serializing_if should not appear
        assert!(!json.contains("display_name"));
        assert!(!json.contains("api_key"));
        assert!(!json.contains("pricing"));
        assert!(!json.contains("provider_options"));
    }
}