Skip to main content

cc_sdk/
llm.rs

1//! LLM Proxy — use Claude Code subscription as a direct LLM interface.
2//!
3//! Strips away the CC agent layer (tools, system prompt, hooks) and provides
4//! a simple "send prompt, get text" API powered by your CC subscription.
5//!
6//! # Quick Start
7//!
8//! ```rust,no_run
9//! use cc_sdk::llm::{self, LlmOptions};
10//! use futures::StreamExt;
11//!
12//! # async fn example() -> cc_sdk::Result<()> {
13//! // Simple query — returns full text
14//! let response = llm::query("Explain quantum entanglement", None).await?;
15//! println!("{}", response.text);
16//!
17//! // With options
18//! let opts = LlmOptions::builder()
19//!     .system_prompt("You are a concise translator. Translate to Chinese.")
20//!     .model("claude-sonnet-4-20250514")
21//!     .build();
22//! let response = llm::query("Hello world", Some(opts)).await?;
23//!
24//! // Streaming — text chunks as they arrive
25//! let mut stream = llm::query_stream("Write a haiku", None).await?;
26//! while let Some(chunk) = stream.next().await {
27//!     print!("{}", chunk?);
28//! }
29//! # Ok(())
30//! # }
31//! ```
32
33use crate::errors::Result;
34use crate::types::{
35    ClaudeCodeOptions, ContentBlock, Effort, Message, PermissionMode, SystemPrompt, ThinkingConfig,
36};
37use futures::stream::Stream;
38use futures::StreamExt;
39use std::collections::HashMap;
40
41// ---------------------------------------------------------------------------
42// Types
43// ---------------------------------------------------------------------------
44
45/// Minimal options for LLM proxy queries.
46///
47/// Only exposes fields relevant to direct LLM usage.
48/// All CC agent features (tools, hooks, plugins) are automatically disabled.
49#[derive(Debug, Clone, Default)]
50pub struct LlmOptions {
51    /// Custom system prompt. Default: empty string (bypasses CC agent prompt).
52    pub system_prompt: Option<String>,
53    /// Model to use (e.g., `"claude-sonnet-4-20250514"`). Default: CLI default.
54    pub model: Option<String>,
55    /// Thinking configuration for extended reasoning.
56    pub thinking: Option<ThinkingConfig>,
57    /// Maximum conversation turns. Default: 1 (single-turn).
58    pub max_turns: Option<i32>,
59    /// Maximum output tokens (1–32000).
60    pub max_output_tokens: Option<u32>,
61    /// Effort level for reasoning depth.
62    pub effort: Option<Effort>,
63}
64
65/// Builder for [`LlmOptions`].
66#[derive(Debug, Default)]
67pub struct LlmOptionsBuilder {
68    options: LlmOptions,
69}
70
71impl LlmOptions {
72    /// Create a builder for `LlmOptions`.
73    pub fn builder() -> LlmOptionsBuilder {
74        LlmOptionsBuilder::default()
75    }
76
77    /// Convert to [`ClaudeCodeOptions`] with LLM-proxy defaults.
78    ///
79    /// This sets:
80    /// - Empty system prompt (or user-provided)
81    /// - `--tools ""` (disable all tools)
82    /// - `setting_sources: [User]` (skip project/local hooks for faster startup)
83    /// - Clears `ANTHROPIC_API_KEY` (forces subscription auth)
84    /// - `PermissionMode::DontAsk`
85    /// - `max_turns: 1` (unless overridden)
86    pub(crate) fn to_claude_code_options(&self) -> ClaudeCodeOptions {
87        let mut extra_args = HashMap::new();
88        extra_args.insert("tools".to_string(), Some(String::new()));
89
90        // Clear ANTHROPIC_API_KEY so the child CC process uses subscription auth
91        // instead of inheriting the parent process's session-specific API key.
92        // An empty value in env causes env_remove in the subprocess.
93        let mut env = HashMap::new();
94        env.insert("ANTHROPIC_API_KEY".to_string(), String::new());
95        // Skip hooks/plugins/CLAUDE.md loading for faster startup
96        env.insert("CLAUDE_CODE_DISABLE_NONESSENTIAL_TRAFFIC".to_string(), "1".to_string());
97
98        ClaudeCodeOptions {
99            system_prompt_v2: Some(SystemPrompt::String(
100                self.system_prompt.clone().unwrap_or_default(),
101            )),
102            permission_mode: PermissionMode::DontAsk,
103            max_turns: self.max_turns.or(Some(1)),
104            model: self.model.clone(),
105            thinking: self.thinking.clone(),
106            max_output_tokens: self.max_output_tokens,
107            effort: self.effort,
108            extra_args,
109            env,
110            // Only load user-level settings (has auth), skip project/local (has hooks)
111            setting_sources: Some(vec![crate::types::SettingSource::User]),
112            ..Default::default()
113        }
114    }
115}
116
117impl LlmOptionsBuilder {
118    /// Set a custom system prompt. Pass `""` for no system prompt.
119    pub fn system_prompt(mut self, prompt: impl Into<String>) -> Self {
120        self.options.system_prompt = Some(prompt.into());
121        self
122    }
123
124    /// Set the model (e.g., `"claude-sonnet-4-20250514"`).
125    pub fn model(mut self, model: impl Into<String>) -> Self {
126        self.options.model = Some(model.into());
127        self
128    }
129
130    /// Enable extended thinking.
131    pub fn thinking(mut self, config: ThinkingConfig) -> Self {
132        self.options.thinking = Some(config);
133        self
134    }
135
136    /// Set maximum turns (default: 1 for single-turn).
137    pub fn max_turns(mut self, turns: i32) -> Self {
138        self.options.max_turns = Some(turns);
139        self
140    }
141
142    /// Set maximum output tokens (1–32000).
143    pub fn max_output_tokens(mut self, tokens: u32) -> Self {
144        self.options.max_output_tokens = Some(tokens);
145        self
146    }
147
148    /// Set reasoning effort level.
149    pub fn effort(mut self, effort: Effort) -> Self {
150        self.options.effort = Some(effort);
151        self
152    }
153
154    /// Build the options.
155    pub fn build(self) -> LlmOptions {
156        self.options
157    }
158}
159
160/// Response from an LLM proxy query.
161#[derive(Debug, Clone)]
162pub struct LlmResponse {
163    /// The full text response.
164    pub text: String,
165    /// Model that generated the response.
166    pub model: Option<String>,
167    /// CLI session ID.
168    pub session_id: Option<String>,
169    /// Stop reason (e.g., `"end_turn"`, `"max_tokens"`).
170    pub stop_reason: Option<String>,
171    /// Raw usage/cost data.
172    pub usage: Option<serde_json::Value>,
173}
174
175// ---------------------------------------------------------------------------
176// Public API
177// ---------------------------------------------------------------------------
178
179/// Send a prompt and get the full text response.
180///
181/// This is the simplest way to use Claude through your CC subscription.
182/// All agent features (tools, system prompt, hooks) are disabled by default.
183///
184/// # Example
185///
186/// ```rust,no_run
187/// # async fn example() -> cc_sdk::Result<()> {
188/// let response = cc_sdk::llm::query("What is 2 + 2?", None).await?;
189/// assert!(response.text.contains("4"));
190/// # Ok(())
191/// # }
192/// ```
193pub async fn query(prompt: &str, options: Option<LlmOptions>) -> Result<LlmResponse> {
194    let opts = options.unwrap_or_default();
195    let cc_options = opts.to_claude_code_options();
196
197    let stream = crate::query::query(prompt, Some(cc_options)).await?;
198    futures::pin_mut!(stream);
199
200    let mut text_parts: Vec<String> = Vec::new();
201    let mut model: Option<String> = None;
202    let mut session_id: Option<String> = None;
203    let mut stop_reason: Option<String> = None;
204    let mut usage: Option<serde_json::Value> = None;
205
206    while let Some(msg_result) = stream.next().await {
207        match msg_result? {
208            Message::Assistant { message } => {
209                if model.is_none() {
210                    model.clone_from(&message.model);
211                }
212                for block in &message.content {
213                    if let ContentBlock::Text(text_content) = block {
214                        text_parts.push(text_content.text.clone());
215                    }
216                }
217            }
218            Message::Result {
219                result,
220                session_id: sid,
221                stop_reason: sr,
222                usage: u,
223                ..
224            } => {
225                session_id = Some(sid);
226                stop_reason = sr;
227                usage = u;
228                if text_parts.is_empty() {
229                    if let Some(r) = result {
230                        text_parts.push(r);
231                    }
232                }
233                break; // Result marks end of response — don't wait for process exit
234            }
235            _ => {}
236        }
237    }
238
239    Ok(LlmResponse {
240        text: text_parts.join(""),
241        model,
242        session_id,
243        stop_reason,
244        usage,
245    })
246}
247
248/// Send a prompt and get a stream of text chunks.
249///
250/// Each item is an incremental text delta (not cumulative).
251/// Useful for real-time display of streaming responses.
252///
253/// # Example
254///
255/// ```rust,no_run
256/// use futures::StreamExt;
257///
258/// # async fn example() -> cc_sdk::Result<()> {
259/// let mut stream = cc_sdk::llm::query_stream("Write a poem", None).await?;
260/// while let Some(chunk) = stream.next().await {
261///     print!("{}", chunk?);
262/// }
263/// # Ok(())
264/// # }
265/// ```
266pub async fn query_stream(
267    prompt: &str,
268    options: Option<LlmOptions>,
269) -> Result<impl Stream<Item = Result<String>>> {
270    let opts = options.unwrap_or_default();
271    let cc_options = opts.to_claude_code_options();
272
273    let stream = crate::query::query(prompt, Some(cc_options)).await?;
274
275    // Use scan to track cumulative text length and emit only new deltas.
276    // CC's stream-json mode may emit cumulative assistant messages.
277    Ok(stream
278        .scan(0usize, |seen_len, msg_result| {
279            let result = match msg_result {
280                Ok(Message::Assistant { message }) => {
281                    let full_text: String = message
282                        .content
283                        .iter()
284                        .filter_map(|block| match block {
285                            ContentBlock::Text(t) => Some(t.text.as_str()),
286                            _ => None,
287                        })
288                        .collect::<Vec<_>>()
289                        .join("");
290
291                    if full_text.len() > *seen_len {
292                        let delta = full_text[*seen_len..].to_string();
293                        *seen_len = full_text.len();
294                        Some(Ok(delta))
295                    } else {
296                        None
297                    }
298                }
299                Err(e) => Some(Err(e)),
300                _ => None,
301            };
302            futures::future::ready(Some(result))
303        })
304        .filter_map(|x| futures::future::ready(x)))
305}
306
307// ---------------------------------------------------------------------------
308// Helpers (internal)
309// ---------------------------------------------------------------------------
310
311/// Extract text from a Message, used for testing.
312#[cfg(test)]
313fn extract_text_from_messages(messages: &[Message]) -> String {
314    let mut parts = Vec::new();
315    for msg in messages {
316        if let Message::Assistant { message } = msg {
317            for block in &message.content {
318                if let ContentBlock::Text(t) = block {
319                    parts.push(t.text.clone());
320                }
321            }
322        }
323    }
324    parts.join("")
325}
326
327// ---------------------------------------------------------------------------
328// Tests
329// ---------------------------------------------------------------------------
330
331#[cfg(test)]
332mod tests {
333    use super::*;
334    use crate::types::{AssistantMessage, TextContent};
335
336    #[test]
337    fn test_default_llm_options() {
338        let opts = LlmOptions::default();
339        assert!(opts.system_prompt.is_none());
340        assert!(opts.model.is_none());
341        assert!(opts.thinking.is_none());
342        assert!(opts.max_turns.is_none()); // None in struct, but to_cc_options defaults to 1
343        assert!(opts.max_output_tokens.is_none());
344        assert!(opts.effort.is_none());
345    }
346
347    #[test]
348    fn test_llm_options_builder() {
349        let opts = LlmOptions::builder()
350            .system_prompt("You are helpful")
351            .model("claude-sonnet-4-20250514")
352            .max_turns(3)
353            .max_output_tokens(4096)
354            .effort(Effort::High)
355            .build();
356
357        assert_eq!(opts.system_prompt, Some("You are helpful".to_string()));
358        assert_eq!(opts.model, Some("claude-sonnet-4-20250514".to_string()));
359        assert_eq!(opts.max_turns, Some(3));
360        assert_eq!(opts.max_output_tokens, Some(4096));
361        assert_eq!(opts.effort, Some(Effort::High));
362    }
363
364    #[test]
365    fn test_llm_options_to_cc_options_defaults() {
366        let opts = LlmOptions::default();
367        let cc = opts.to_claude_code_options();
368
369        // DontAsk permission mode
370        assert_eq!(cc.permission_mode, PermissionMode::DontAsk);
371
372        // Max turns defaults to 1
373        assert_eq!(cc.max_turns, Some(1));
374
375        // System prompt is empty string (strips CC agent prompt)
376        match cc.system_prompt_v2 {
377            Some(SystemPrompt::String(s)) => assert_eq!(s, ""),
378            _ => panic!("Expected empty string system prompt"),
379        }
380
381        // --tools "" in extra_args
382        assert_eq!(
383            cc.extra_args.get("tools"),
384            Some(&Some(String::new()))
385        );
386        assert!(!cc.extra_args.contains_key("bare")); // no --bare (breaks subscription auth)
387
388        // ANTHROPIC_API_KEY cleared for subscription auth
389        assert_eq!(cc.env.get("ANTHROPIC_API_KEY"), Some(&String::new()));
390
391        // Only user-level settings (skip project/local hooks)
392        assert_eq!(
393            cc.setting_sources,
394            Some(vec![crate::types::SettingSource::User])
395        );
396
397        // Model not set
398        assert!(cc.model.is_none());
399    }
400
401    #[test]
402    fn test_llm_options_to_cc_options_custom() {
403        let opts = LlmOptions::builder()
404            .system_prompt("Custom prompt")
405            .model("claude-opus-4-20250514")
406            .max_turns(5)
407            .build();
408        let cc = opts.to_claude_code_options();
409
410        match cc.system_prompt_v2 {
411            Some(SystemPrompt::String(s)) => assert_eq!(s, "Custom prompt"),
412            _ => panic!("Expected custom system prompt"),
413        }
414        assert_eq!(cc.model, Some("claude-opus-4-20250514".to_string()));
415        assert_eq!(cc.max_turns, Some(5));
416    }
417
418    #[test]
419    fn test_text_extraction() {
420        let messages = vec![
421            Message::Assistant {
422                message: AssistantMessage {
423                    content: vec![
424                        ContentBlock::Text(TextContent {
425                            text: "Hello ".to_string(),
426                        }),
427                        ContentBlock::Text(TextContent {
428                            text: "world!".to_string(),
429                        }),
430                    ],
431                    model: Some("claude-sonnet".to_string()),
432                    usage: None,
433                    error: None,
434                    parent_tool_use_id: None,
435                },
436            },
437            Message::System {
438                subtype: "status".to_string(),
439                data: serde_json::json!({}),
440            },
441        ];
442
443        let text = extract_text_from_messages(&messages);
444        assert_eq!(text, "Hello world!");
445    }
446
447    #[test]
448    fn test_text_extraction_ignores_non_text_blocks() {
449        let messages = vec![Message::Assistant {
450            message: AssistantMessage {
451                content: vec![
452                    ContentBlock::Thinking(crate::types::ThinkingContent {
453                        thinking: "internal reasoning".to_string(),
454                        signature: String::new(),
455                    }),
456                    ContentBlock::Text(TextContent {
457                        text: "visible answer".to_string(),
458                    }),
459                ],
460                model: None,
461                usage: None,
462                error: None,
463                parent_tool_use_id: None,
464            },
465        }];
466
467        let text = extract_text_from_messages(&messages);
468        assert_eq!(text, "visible answer");
469    }
470
471    #[test]
472    fn test_stream_delta_dedup() {
473        // Simulate cumulative assistant messages (each contains full text so far)
474        let messages: Vec<crate::errors::Result<Message>> = vec![
475            Ok(Message::Assistant {
476                message: AssistantMessage {
477                    content: vec![ContentBlock::Text(TextContent {
478                        text: "Hel".to_string(),
479                    })],
480                    model: None,
481                    usage: None,
482                    error: None,
483                    parent_tool_use_id: None,
484                },
485            }),
486            Ok(Message::Assistant {
487                message: AssistantMessage {
488                    content: vec![ContentBlock::Text(TextContent {
489                        text: "Hello wo".to_string(),
490                    })],
491                    model: None,
492                    usage: None,
493                    error: None,
494                    parent_tool_use_id: None,
495                },
496            }),
497            Ok(Message::Assistant {
498                message: AssistantMessage {
499                    content: vec![ContentBlock::Text(TextContent {
500                        text: "Hello world!".to_string(),
501                    })],
502                    model: None,
503                    usage: None,
504                    error: None,
505                    parent_tool_use_id: None,
506                },
507            }),
508        ];
509
510        // Simulate the scan logic
511        let mut seen_len = 0usize;
512        let mut deltas = Vec::new();
513
514        for msg_result in messages {
515            if let Ok(Message::Assistant { message }) = msg_result {
516                let full_text: String = message
517                    .content
518                    .iter()
519                    .filter_map(|block| match block {
520                        ContentBlock::Text(t) => Some(t.text.as_str()),
521                        _ => None,
522                    })
523                    .collect::<Vec<_>>()
524                    .join("");
525
526                if full_text.len() > seen_len {
527                    deltas.push(full_text[seen_len..].to_string());
528                    seen_len = full_text.len();
529                }
530            }
531        }
532
533        assert_eq!(deltas, vec!["Hel", "lo wo", "rld!"]);
534        assert_eq!(deltas.join(""), "Hello world!");
535    }
536
537    #[test]
538    fn test_llm_response_fields() {
539        let resp = LlmResponse {
540            text: "test".to_string(),
541            model: Some("claude-sonnet".to_string()),
542            session_id: Some("sess-123".to_string()),
543            stop_reason: Some("end_turn".to_string()),
544            usage: Some(serde_json::json!({"input_tokens": 10, "output_tokens": 20})),
545        };
546        assert_eq!(resp.text, "test");
547        assert_eq!(resp.model.as_deref(), Some("claude-sonnet"));
548        assert_eq!(resp.stop_reason.as_deref(), Some("end_turn"));
549    }
550}