limit_llm/
lib.rs

1//! # limit-llm
2//!
3//! [![Crates.io](https://img.shields.io/crates/v/limit-llm.svg)](https://crates.io/crates/limit-llm)
4//! [![Docs.rs](https://docs.rs/limit-llm/badge.svg)](https://docs.rs/limit-llm)
5//! [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
6//!
7//! **Multi-provider LLM client for Rust with streaming support.**
8//!
9//! Unified API for Anthropic Claude, OpenAI, z.ai, and local LLMs with built-in
10//! token tracking, state persistence, and automatic model handoff.
11//!
12//! ## Features
13//!
14//! - **Multi-provider support**: Anthropic Claude, OpenAI GPT, z.ai GLM, and local LLMs
15//! - **Streaming responses**: Async streaming with `futures::Stream`
16//! - **Token tracking**: SQLite-based usage tracking and cost estimation
17//! - **State persistence**: Serialize/restore conversation state with JSON
18//! - **Model handoff**: Automatic fallback between providers on failure
19//! - **Tool calling**: Full function/tool support for all compatible providers
20//! - **Thinking mode**: Extended reasoning support (Claude, z.ai)
21//!
22//! ## Quick Start
23//!
24//! ```rust,no_run
25//! use limit_llm::{AnthropicClient, Message, Role, LlmProvider};
26//! use futures::StreamExt;
27//!
28//! #[tokio::main]
29//! async fn main() -> Result<(), Box<dyn std::error::Error>> {
30//!     // Create client from environment variable ANTHROPIC_API_KEY
31//!     let client = AnthropicClient::new(
32//!         std::env::var("ANTHROPIC_API_KEY")?,
33//!         None,  // default base URL
34//!         60,    // timeout in seconds
35//!         "claude-sonnet-4-6-20260217",
36//!         4096,  // max tokens
37//!     );
38//!
39//!     let messages = vec![
40//!         Message {
41//!             role: Role::User,
42//!             content: Some("Hello, Claude!".to_string()),
43//!             tool_calls: None,
44//!             tool_call_id: None,
45//!             cache_control: None,
46//!         }
47//!     ];
48//!
49//!     // Stream the response
50//!     let mut stream = client.send(messages, vec![]).await;
51//!     
52//!     while let Some(chunk) = stream.next().await {
53//!         match chunk {
54//!             Ok(limit_llm::ProviderResponseChunk::ContentDelta(text)) => {
55//!                 print!("{}", text);
56//!             }
57//!             Ok(limit_llm::ProviderResponseChunk::Done(usage)) => {
58//!                 println!("\n\nTokens: {} in, {} out",
59//!                     usage.input_tokens, usage.output_tokens);
60//!             }
61//!             Err(e) => eprintln!("Error: {}", e),
62//!             _ => {}
63//!         }
64//!     }
65//!
66//!     Ok(())
67//! }
68//! ```
69//!
70//! ## Providers
71//!
72//! | Provider | Client | Streaming | Tools | Thinking |
73//! |----------|--------|-----------|-------|----------|
74//! | Anthropic Claude | [`AnthropicClient`] | ✓ | ✓ | ✓ |
75//! | OpenAI | [`OpenAiProvider`] | ✓ | ✓ | — |
76//! | z.ai GLM | [`ZaiProvider`] | ✓ | ✓ | ✓ |
77//! | Local/Ollama | [`LocalProvider`] | ✓ | — | — |
78//!
79//! ## Configuration
80//!
81//! ### Environment Variables
82//!
83//! ```bash
84//! ANTHROPIC_API_KEY=your-key      # For Claude
85//! OPENAI_API_KEY=your-key          # For GPT
86//! ZAI_API_KEY=your-key             # For z.ai
87//! ```
88//!
89//! ### Programmatic Configuration
90//!
91//! ```rust,no_run
92//! use limit_llm::{Config, ProviderFactory};
93//!
94//! # fn main() -> Result<(), Box<dyn std::error::Error>> {
95//! // Load from ~/.limit/config.toml
96//! let config = Config::load()?;
97//!
98//! // Create provider from config
99//! let provider = ProviderFactory::create_provider(&config)?;
100//! # Ok(())
101//! # }
102//! ```
103//!
104//! ## Token Tracking
105//!
106//! ```rust,no_run
107//! use limit_llm::TrackingDb;
108//!
109//! # fn main() -> Result<(), Box<dyn std::error::Error>> {
110//! let tracking = TrackingDb::new()?;
111//!
112//! // Track a request
113//! tracking.track_request(
114//!     "claude-sonnet-4-6-20260217",
115//!     100,  // input tokens
116//!     50,   // output tokens
117//!     0,    // cache read tokens
118//!     0,    // cache write tokens
119//!     0.001, // cost in USD
120//!     1500,  // duration in ms
121//! )?;
122//!
123//! // Get statistics for last 7 days
124//! let stats = tracking.get_usage_stats(7)?;
125//! println!("Total cost: ${:.4}", stats.total_cost);
126//! # Ok(())
127//! # }
128//! ```
129//!
130//! ## State Persistence
131//!
132//! ```rust,no_run
133//! use limit_llm::{StatePersistence, Message};
134//!
135//! # fn main() -> Result<(), Box<dyn std::error::Error>> {
136//! let persistence = StatePersistence::new("~/.limit/state/session.json");
137//!
138//! // Save conversation
139//! let messages: Vec<Message> = vec![];
140//! persistence.save(&messages)?;
141//!
142//! // Restore later
143//! let restored = persistence.load()?;
144//! # Ok(())
145//! # }
146//! ```
147//!
148//! ## Model Handoff
149//!
150//! The `ModelHandoff` type provides token counting and message compaction
151//! for transitioning between models with different context windows:
152//!
153//! ```rust,no_run
154//! use limit_llm::ModelHandoff;
155//!
156//! # fn main() {
157//! let handoff = ModelHandoff::new();
158//!
159//! // Count tokens in a message
160//! let tokens = handoff.count_tokens("Hello, world!");
161//! println!("Token count: {}", tokens);
162//!
163//! // Compact messages to fit a target context window
164//! // let compacted = handoff.handoff_to_model("claude-3-5-sonnet", "claude-3-5-haiku", &messages);
165//! # }
166//! ```
167
168pub mod cache;
169pub mod client;
170pub mod config;
171pub mod error;
172pub mod handoff;
173pub mod local_provider;
174pub mod openai_provider;
175pub mod persistence;
176pub mod provider_factory;
177pub mod providers;
178pub mod summarization;
179pub mod tracking;
180pub mod types;
181pub mod zai_provider;
182
183pub use cache::apply_cache_control;
184pub use client::AnthropicClient;
185pub use config::{BrowserConfigSection, CacheSettings, CompactionSettings, Config, ProviderConfig};
186pub use error::LlmError;
187pub use handoff::ModelHandoff;
188pub use local_provider::LocalProvider;
189pub use openai_provider::OpenAiProvider;
190pub use persistence::StatePersistence;
191pub use provider_factory::ProviderFactory;
192pub use providers::{LlmProvider, ProviderResponseChunk};
193pub use summarization::{extract_file_operations, FileOperations, Summarizer, SummaryOutput};
194pub use tracking::TrackingDb;
195pub use types::{
196    CacheControl, ContentPart, FunctionCall, ImageUrl, Message, MessageContent, Response, Role,
197    Tool, ToolCall, ToolFunction, Usage,
198};
199pub use zai_provider::{ThinkingConfig, ZaiProvider};
limit_llm/lib.rs

limit_llm/
lib.rs