1#![allow(dead_code)]
3#![allow(clippy::too_many_lines)]
4#![allow(clippy::missing_const_for_fn)]
5#![allow(clippy::cast_possible_truncation)]
6#![allow(clippy::cast_precision_loss)]
7#![allow(clippy::if_same_then_else)]
8#![allow(clippy::cast_lossless)]
9#![allow(clippy::similar_names)]
10#![allow(clippy::unreadable_literal)]
11#![allow(clippy::cast_possible_wrap)]
12#![allow(clippy::doc_markdown)]
13#![allow(clippy::uninlined_format_args)]
14#![allow(clippy::unused_self)]
15#![allow(clippy::struct_excessive_bools)]
16#![allow(clippy::fn_params_excessive_bools)]
17#![allow(clippy::significant_drop_tightening)]
18#![allow(clippy::branches_sharing_code)]
19#![allow(clippy::cast_sign_loss)]
20#![allow(clippy::eq_op)]
21#![allow(clippy::needless_pass_by_value)]
22#![allow(clippy::use_self)]
23#![allow(clippy::struct_field_names)]
24#![allow(clippy::module_name_repetitions)]
25#![allow(clippy::suboptimal_flops)]
26#![allow(clippy::option_if_let_else)]
27use openai_ergonomic::{Client, Config, Error, Result};
48use std::collections::HashMap;
49use std::sync::{Arc, Mutex};
50use std::time::{Duration, SystemTime, UNIX_EPOCH};
51use tracing::{debug, error, info, warn};
52
53#[derive(Debug, Clone)]
55struct TokenCounter {
56 encoding_ratios: HashMap<String, f64>,
58 model_limits: HashMap<String, TokenLimits>,
60 model_pricing: HashMap<String, ModelPricing>,
62}
63
64#[derive(Debug, Clone)]
66struct TokenLimits {
67 max_context_length: i32,
69 max_output_tokens: i32,
71 safe_input_limit: i32,
73}
74
75#[derive(Debug, Clone)]
77struct ModelPricing {
78 input_cost_per_1k: f64,
80 output_cost_per_1k: f64,
82 base_cost: f64,
84}
85
86impl TokenCounter {
87 fn new() -> Self {
89 let mut encoding_ratios = HashMap::new();
90 encoding_ratios.insert("english".to_string(), 0.25); encoding_ratios.insert("code".to_string(), 0.33); encoding_ratios.insert("multilingual".to_string(), 0.2); encoding_ratios.insert("json".to_string(), 0.5); let mut model_limits = HashMap::new();
96 model_limits.insert(
97 "gpt-4".to_string(),
98 TokenLimits {
99 max_context_length: 8192,
100 max_output_tokens: 4096,
101 safe_input_limit: 6000,
102 },
103 );
104 model_limits.insert(
105 "gpt-4-32k".to_string(),
106 TokenLimits {
107 max_context_length: 32768,
108 max_output_tokens: 4096,
109 safe_input_limit: 28000,
110 },
111 );
112 model_limits.insert(
113 "gpt-3.5-turbo".to_string(),
114 TokenLimits {
115 max_context_length: 4096,
116 max_output_tokens: 4096,
117 safe_input_limit: 3000,
118 },
119 );
120 model_limits.insert(
121 "gpt-3.5-turbo-16k".to_string(),
122 TokenLimits {
123 max_context_length: 16384,
124 max_output_tokens: 4096,
125 safe_input_limit: 12000,
126 },
127 );
128
129 let mut model_pricing = HashMap::new();
130 model_pricing.insert(
131 "gpt-4".to_string(),
132 ModelPricing {
133 input_cost_per_1k: 0.03,
134 output_cost_per_1k: 0.06,
135 base_cost: 0.0,
136 },
137 );
138 model_pricing.insert(
139 "gpt-4-32k".to_string(),
140 ModelPricing {
141 input_cost_per_1k: 0.06,
142 output_cost_per_1k: 0.12,
143 base_cost: 0.0,
144 },
145 );
146 model_pricing.insert(
147 "gpt-3.5-turbo".to_string(),
148 ModelPricing {
149 input_cost_per_1k: 0.0015,
150 output_cost_per_1k: 0.002,
151 base_cost: 0.0,
152 },
153 );
154 model_pricing.insert(
155 "gpt-3.5-turbo-16k".to_string(),
156 ModelPricing {
157 input_cost_per_1k: 0.003,
158 output_cost_per_1k: 0.004,
159 base_cost: 0.0,
160 },
161 );
162
163 Self {
164 encoding_ratios,
165 model_limits,
166 model_pricing,
167 }
168 }
169
170 fn estimate_tokens(&self, text: &str, content_type: &str) -> i32 {
172 let ratio = self.encoding_ratios.get(content_type).unwrap_or(&0.25);
173 (text.len() as f64 * ratio).ceil() as i32
174 }
175
176 fn estimate_chat_tokens(&self, messages: &[ChatMessage], model: &str) -> TokenEstimate {
178 let mut total_tokens = 0;
179
180 for message in messages {
182 total_tokens += 4;
184
185 let content_type = if message.role == "system" {
187 "english"
188 } else {
189 "english"
190 };
191 total_tokens += self.estimate_tokens(&message.content, content_type);
192 }
193
194 total_tokens += 2;
196
197 let limits = self
199 .model_limits
200 .get(model)
201 .cloned()
202 .unwrap_or(TokenLimits {
203 max_context_length: 4096,
204 max_output_tokens: 1000,
205 safe_input_limit: 3000,
206 });
207
208 TokenEstimate {
209 estimated_input_tokens: total_tokens,
210 max_output_tokens: limits.max_output_tokens,
211 total_estimated_tokens: total_tokens + limits.max_output_tokens,
212 exceeds_context_limit: total_tokens > limits.max_context_length,
213 exceeds_safe_limit: total_tokens > limits.safe_input_limit,
214 model_limits: limits,
215 }
216 }
217
218 fn estimate_cost(&self, estimate: &TokenEstimate, model: &str) -> CostEstimate {
220 let pricing = self
221 .model_pricing
222 .get(model)
223 .cloned()
224 .unwrap_or(ModelPricing {
225 input_cost_per_1k: 0.002,
226 output_cost_per_1k: 0.002,
227 base_cost: 0.0,
228 });
229
230 let input_cost =
231 (estimate.estimated_input_tokens as f64 / 1000.0) * pricing.input_cost_per_1k;
232 let max_output_cost =
233 (estimate.max_output_tokens as f64 / 1000.0) * pricing.output_cost_per_1k;
234
235 CostEstimate {
236 estimated_input_cost: input_cost,
237 max_output_cost,
238 total_max_cost: input_cost + max_output_cost + pricing.base_cost,
239 pricing_info: pricing,
240 }
241 }
242
243 fn optimize_messages(
245 &self,
246 messages: &[ChatMessage],
247 model: &str,
248 target_tokens: i32,
249 ) -> Vec<ChatMessage> {
250 let mut optimized = messages.to_vec();
251 let mut current_estimate = self.estimate_chat_tokens(&optimized, model);
252
253 if current_estimate.estimated_input_tokens <= target_tokens {
255 return optimized;
256 }
257
258 info!(
259 "Optimizing messages: current {} tokens, target {} tokens",
260 current_estimate.estimated_input_tokens, target_tokens
261 );
262
263 while current_estimate.estimated_input_tokens > target_tokens && optimized.len() > 1 {
265 if let Some(pos) = optimized.iter().position(|msg| msg.role == "user") {
267 if pos > 0 {
268 optimized.remove(pos);
270 current_estimate = self.estimate_chat_tokens(&optimized, model);
271 debug!(
272 "Removed message, now {} tokens",
273 current_estimate.estimated_input_tokens
274 );
275 } else {
276 break;
277 }
278 } else {
279 break;
280 }
281 }
282
283 if current_estimate.estimated_input_tokens > target_tokens {
285 for message in &mut optimized {
286 if message.role != "system" && message.content.len() > 500 {
287 let max_chars = (target_tokens as f64 * 4.0) as usize; if message.content.len() > max_chars {
289 message.content =
290 format!("{}...", &message.content[..max_chars.saturating_sub(3)]);
291 debug!(
292 "Truncated long message to {} characters",
293 message.content.len()
294 );
295 }
296 }
297 }
298 current_estimate = self.estimate_chat_tokens(&optimized, model);
299 }
300
301 info!(
302 "Optimization complete: {} tokens (saved {})",
303 current_estimate.estimated_input_tokens,
304 current_estimate.estimated_input_tokens - current_estimate.estimated_input_tokens
305 );
306
307 optimized
308 }
309
310 fn recommend_model(
312 &self,
313 messages: &[ChatMessage],
314 quality_tier: QualityTier,
315 ) -> ModelRecommendation {
316 let candidates = match quality_tier {
317 QualityTier::Budget => vec!["gpt-3.5-turbo", "gpt-3.5-turbo-16k"],
318 QualityTier::Balanced => vec!["gpt-3.5-turbo", "gpt-3.5-turbo-16k", "gpt-4"],
319 QualityTier::Premium => vec!["gpt-4", "gpt-4-32k"],
320 };
321
322 let mut best_option = None;
323 let mut best_cost = f64::INFINITY;
324
325 for model in candidates {
326 let estimate = self.estimate_chat_tokens(messages, model);
327 if !estimate.exceeds_context_limit {
328 let cost_estimate = self.estimate_cost(&estimate, model);
329 if cost_estimate.total_max_cost < best_cost {
330 best_cost = cost_estimate.total_max_cost;
331 best_option = Some(ModelRecommendation {
332 model: model.to_string(),
333 estimated_cost: cost_estimate.total_max_cost,
334 token_estimate: estimate,
335 cost_details: cost_estimate,
336 reason: format!("Most cost-effective for {} tier", quality_tier.as_str()),
337 });
338 }
339 }
340 }
341
342 best_option.unwrap_or_else(|| {
343 let fallback_model = "gpt-4-32k";
345 let estimate = self.estimate_chat_tokens(messages, fallback_model);
346 let cost_estimate = self.estimate_cost(&estimate, fallback_model);
347
348 ModelRecommendation {
349 model: fallback_model.to_string(),
350 estimated_cost: cost_estimate.total_max_cost,
351 token_estimate: estimate,
352 cost_details: cost_estimate,
353 reason: "Fallback - requires large context window".to_string(),
354 }
355 })
356 }
357}
358
359#[derive(Debug, Clone)]
361struct TokenEstimate {
362 estimated_input_tokens: i32,
363 max_output_tokens: i32,
364 total_estimated_tokens: i32,
365 exceeds_context_limit: bool,
366 exceeds_safe_limit: bool,
367 model_limits: TokenLimits,
368}
369
370#[derive(Debug, Clone)]
372struct CostEstimate {
373 estimated_input_cost: f64,
374 max_output_cost: f64,
375 total_max_cost: f64,
376 pricing_info: ModelPricing,
377}
378
379#[derive(Debug, Clone)]
381enum QualityTier {
382 Budget,
383 Balanced,
384 Premium,
385}
386
387impl QualityTier {
388 fn as_str(&self) -> &str {
389 match self {
390 QualityTier::Budget => "budget",
391 QualityTier::Balanced => "balanced",
392 QualityTier::Premium => "premium",
393 }
394 }
395}
396
397#[derive(Debug, Clone)]
399struct ModelRecommendation {
400 model: String,
401 estimated_cost: f64,
402 token_estimate: TokenEstimate,
403 cost_details: CostEstimate,
404 reason: String,
405}
406
407#[derive(Debug)]
409struct BudgetManager {
410 daily_budget: f64,
412 monthly_budget: f64,
414 daily_spending: Arc<Mutex<f64>>,
416 monthly_spending: Arc<Mutex<f64>>,
418 spending_history: Arc<Mutex<Vec<SpendingRecord>>>,
420 alert_thresholds: AlertThresholds,
422}
423
424#[derive(Debug, Clone)]
426struct AlertThresholds {
427 daily_warning_percent: f64,
429 daily_critical_percent: f64,
431 monthly_warning_percent: f64,
433 monthly_critical_percent: f64,
435}
436
437impl Default for AlertThresholds {
438 fn default() -> Self {
439 Self {
440 daily_warning_percent: 80.0,
441 daily_critical_percent: 95.0,
442 monthly_warning_percent: 80.0,
443 monthly_critical_percent: 95.0,
444 }
445 }
446}
447
448#[derive(Debug, Clone)]
450struct SpendingRecord {
451 timestamp: u64,
452 model: String,
453 input_tokens: i32,
454 output_tokens: i32,
455 cost: f64,
456 request_type: String,
457 user_id: Option<String>,
458}
459
460impl BudgetManager {
461 fn new(daily_budget: f64, monthly_budget: f64) -> Self {
463 Self {
464 daily_budget,
465 monthly_budget,
466 daily_spending: Arc::new(Mutex::new(0.0)),
467 monthly_spending: Arc::new(Mutex::new(0.0)),
468 spending_history: Arc::new(Mutex::new(Vec::new())),
469 alert_thresholds: AlertThresholds::default(),
470 }
471 }
472
473 fn check_budget(&self, estimated_cost: f64) -> BudgetCheckResult {
475 let daily_spent = *self.daily_spending.lock().unwrap();
476 let monthly_spent = *self.monthly_spending.lock().unwrap();
477
478 let daily_after = daily_spent + estimated_cost;
479 let monthly_after = monthly_spent + estimated_cost;
480
481 let daily_percent = (daily_after / self.daily_budget) * 100.0;
482 let monthly_percent = (monthly_after / self.monthly_budget) * 100.0;
483
484 if daily_after > self.daily_budget {
486 return BudgetCheckResult {
487 approved: false,
488 reason: format!(
489 "Would exceed daily budget: ${:.4} > ${:.2}",
490 daily_after, self.daily_budget
491 ),
492 current_daily_usage: daily_percent,
493 current_monthly_usage: monthly_percent,
494 alerts: vec![BudgetAlert::DailyExceeded],
495 };
496 }
497
498 if monthly_after > self.monthly_budget {
499 return BudgetCheckResult {
500 approved: false,
501 reason: format!(
502 "Would exceed monthly budget: ${:.4} > ${:.2}",
503 monthly_after, self.monthly_budget
504 ),
505 current_daily_usage: daily_percent,
506 current_monthly_usage: monthly_percent,
507 alerts: vec![BudgetAlert::MonthlyExceeded],
508 };
509 }
510
511 let mut alerts = Vec::new();
513
514 if daily_percent >= self.alert_thresholds.daily_critical_percent {
515 alerts.push(BudgetAlert::DailyCritical);
516 } else if daily_percent >= self.alert_thresholds.daily_warning_percent {
517 alerts.push(BudgetAlert::DailyWarning);
518 }
519
520 if monthly_percent >= self.alert_thresholds.monthly_critical_percent {
521 alerts.push(BudgetAlert::MonthlyCritical);
522 } else if monthly_percent >= self.alert_thresholds.monthly_warning_percent {
523 alerts.push(BudgetAlert::MonthlyWarning);
524 }
525
526 BudgetCheckResult {
527 approved: true,
528 reason: "Within budget limits".to_string(),
529 current_daily_usage: daily_percent,
530 current_monthly_usage: monthly_percent,
531 alerts,
532 }
533 }
534
535 fn record_spending(&self, record: SpendingRecord) {
537 let mut daily_spending = self.daily_spending.lock().unwrap();
538 let mut monthly_spending = self.monthly_spending.lock().unwrap();
539 let mut history = self.spending_history.lock().unwrap();
540
541 *daily_spending += record.cost;
542 *monthly_spending += record.cost;
543 history.push(record);
544
545 if history.len() > 1000 {
547 history.remove(0);
548 }
549 }
550
551 fn get_budget_status(&self) -> BudgetStatus {
553 let daily_spent = *self.daily_spending.lock().unwrap();
554 let monthly_spent = *self.monthly_spending.lock().unwrap();
555 let history = self.spending_history.lock().unwrap();
556
557 let total_requests = history.len() as u64;
558 let total_tokens: i32 = history
559 .iter()
560 .map(|r| r.input_tokens + r.output_tokens)
561 .sum();
562
563 let avg_cost_per_request = if total_requests > 0 {
564 monthly_spent / total_requests as f64
565 } else {
566 0.0
567 };
568
569 BudgetStatus {
570 daily_budget: self.daily_budget,
571 monthly_budget: self.monthly_budget,
572 daily_spent,
573 monthly_spent,
574 daily_remaining: self.daily_budget - daily_spent,
575 monthly_remaining: self.monthly_budget - monthly_spent,
576 daily_usage_percent: (daily_spent / self.daily_budget) * 100.0,
577 monthly_usage_percent: (monthly_spent / self.monthly_budget) * 100.0,
578 total_requests,
579 total_tokens,
580 average_cost_per_request: avg_cost_per_request,
581 }
582 }
583
584 fn reset_daily_spending(&self) {
586 *self.daily_spending.lock().unwrap() = 0.0;
587 info!("Daily spending reset");
588 }
589
590 fn reset_monthly_spending(&self) {
592 *self.monthly_spending.lock().unwrap() = 0.0;
593 info!("Monthly spending reset");
594 }
595}
596
597#[derive(Debug, Clone)]
599struct BudgetCheckResult {
600 approved: bool,
601 reason: String,
602 current_daily_usage: f64,
603 current_monthly_usage: f64,
604 alerts: Vec<BudgetAlert>,
605}
606
607#[derive(Debug, Clone)]
609enum BudgetAlert {
610 DailyWarning,
611 DailyCritical,
612 DailyExceeded,
613 MonthlyWarning,
614 MonthlyCritical,
615 MonthlyExceeded,
616}
617
618impl BudgetAlert {
619 fn message(&self) -> &str {
620 match self {
621 BudgetAlert::DailyWarning => "Daily budget usage approaching limit",
622 BudgetAlert::DailyCritical => "Daily budget usage critical",
623 BudgetAlert::DailyExceeded => "Daily budget exceeded",
624 BudgetAlert::MonthlyWarning => "Monthly budget usage approaching limit",
625 BudgetAlert::MonthlyCritical => "Monthly budget usage critical",
626 BudgetAlert::MonthlyExceeded => "Monthly budget exceeded",
627 }
628 }
629}
630
631#[derive(Debug, Clone)]
633struct BudgetStatus {
634 daily_budget: f64,
635 monthly_budget: f64,
636 daily_spent: f64,
637 monthly_spent: f64,
638 daily_remaining: f64,
639 monthly_remaining: f64,
640 daily_usage_percent: f64,
641 monthly_usage_percent: f64,
642 total_requests: u64,
643 total_tokens: i32,
644 average_cost_per_request: f64,
645}
646
647impl BudgetStatus {
648 fn print_status(&self) {
649 info!("=== Budget Status ===");
650 info!(
651 "Daily: ${:.4} / ${:.2} ({:.1}% used, ${:.4} remaining)",
652 self.daily_spent, self.daily_budget, self.daily_usage_percent, self.daily_remaining
653 );
654 info!(
655 "Monthly: ${:.4} / ${:.2} ({:.1}% used, ${:.4} remaining)",
656 self.monthly_spent,
657 self.monthly_budget,
658 self.monthly_usage_percent,
659 self.monthly_remaining
660 );
661 info!("Total requests: {}", self.total_requests);
662 info!("Total tokens: {}", self.total_tokens);
663 info!(
664 "Average cost per request: ${:.6}",
665 self.average_cost_per_request
666 );
667 }
668}
669
670#[derive(Debug, Clone)]
672struct ChatMessage {
673 role: String,
674 content: String,
675}
676
677impl ChatMessage {
678 fn user(content: &str) -> Self {
679 Self {
680 role: "user".to_string(),
681 content: content.to_string(),
682 }
683 }
684
685 fn system(content: &str) -> Self {
686 Self {
687 role: "system".to_string(),
688 content: content.to_string(),
689 }
690 }
691
692 fn assistant(content: &str) -> Self {
693 Self {
694 role: "assistant".to_string(),
695 content: content.to_string(),
696 }
697 }
698}
699
700#[derive(Debug)]
702struct TokenAwareClient {
703 client: Client,
704 token_counter: TokenCounter,
705 budget_manager: Arc<BudgetManager>,
706}
707
708impl TokenAwareClient {
709 fn new(client: Client, daily_budget: f64, monthly_budget: f64) -> Self {
711 Self {
712 client,
713 token_counter: TokenCounter::new(),
714 budget_manager: Arc::new(BudgetManager::new(daily_budget, monthly_budget)),
715 }
716 }
717
718 async fn chat_completion_with_budget(
720 &self,
721 messages: &[ChatMessage],
722 model: &str,
723 max_tokens: Option<i32>,
724 user_id: Option<String>,
725 ) -> Result<String> {
726 let token_estimate = self.token_counter.estimate_chat_tokens(messages, model);
728 let cost_estimate = self.token_counter.estimate_cost(&token_estimate, model);
729
730 info!(
731 "Token estimate: {} input, {} max output, ${:.4} max cost",
732 token_estimate.estimated_input_tokens,
733 token_estimate.max_output_tokens,
734 cost_estimate.total_max_cost
735 );
736
737 if token_estimate.exceeds_context_limit {
739 return Err(Error::InvalidRequest(format!(
740 "Request exceeds context limit: {} > {}",
741 token_estimate.estimated_input_tokens,
742 token_estimate.model_limits.max_context_length
743 )));
744 }
745
746 if token_estimate.exceeds_safe_limit {
747 warn!(
748 "Request exceeds safe input limit: {} > {}",
749 token_estimate.estimated_input_tokens, token_estimate.model_limits.safe_input_limit
750 );
751 }
752
753 let budget_check = self
755 .budget_manager
756 .check_budget(cost_estimate.total_max_cost);
757
758 if !budget_check.approved {
759 return Err(Error::InvalidRequest(format!(
760 "Budget check failed: {}",
761 budget_check.reason
762 )));
763 }
764
765 for alert in &budget_check.alerts {
767 match alert {
768 BudgetAlert::DailyWarning | BudgetAlert::MonthlyWarning => {
769 warn!("{}", alert.message());
770 }
771 BudgetAlert::DailyCritical | BudgetAlert::MonthlyCritical => {
772 error!("{}", alert.message());
773 }
774 _ => {}
775 }
776 }
777
778 let response = self.simulate_api_call(messages, model, max_tokens).await?;
780
781 let actual_output_tokens = self.token_counter.estimate_tokens(&response, "english");
783 let actual_cost = (token_estimate.estimated_input_tokens as f64 / 1000.0)
784 * cost_estimate.pricing_info.input_cost_per_1k
785 + (actual_output_tokens as f64 / 1000.0)
786 * cost_estimate.pricing_info.output_cost_per_1k;
787
788 let spending_record = SpendingRecord {
790 timestamp: SystemTime::now()
791 .duration_since(UNIX_EPOCH)
792 .unwrap()
793 .as_secs(),
794 model: model.to_string(),
795 input_tokens: token_estimate.estimated_input_tokens,
796 output_tokens: actual_output_tokens,
797 cost: actual_cost,
798 request_type: "chat_completion".to_string(),
799 user_id,
800 };
801
802 self.budget_manager.record_spending(spending_record);
803
804 info!(
805 "Request completed: {} tokens used, ${:.6} actual cost",
806 token_estimate.estimated_input_tokens + actual_output_tokens,
807 actual_cost
808 );
809
810 Ok(response)
811 }
812
813 fn recommend_model(
815 &self,
816 messages: &[ChatMessage],
817 quality_tier: QualityTier,
818 ) -> ModelRecommendation {
819 self.token_counter.recommend_model(messages, quality_tier)
820 }
821
822 fn optimize_for_budget(
824 &self,
825 messages: &[ChatMessage],
826 model: &str,
827 target_cost: f64,
828 ) -> Vec<ChatMessage> {
829 let pricing = self.token_counter.model_pricing.get(model).unwrap();
831 let target_tokens = ((target_cost / pricing.input_cost_per_1k) * 1000.0) as i32;
832
833 self.token_counter
834 .optimize_messages(messages, model, target_tokens)
835 }
836
837 fn get_budget_status(&self) -> BudgetStatus {
839 self.budget_manager.get_budget_status()
840 }
841
842 async fn simulate_api_call(
844 &self,
845 messages: &[ChatMessage],
846 model: &str,
847 _max_tokens: Option<i32>,
848 ) -> Result<String> {
849 let delay = match model {
851 "gpt-4" | "gpt-4-32k" => Duration::from_millis(800),
852 _ => Duration::from_millis(400),
853 };
854
855 tokio::time::sleep(delay).await;
856
857 if let Some(last_message) = messages.iter().rev().find(|m| m.role == "user") {
859 Ok(format!(
860 "Simulated {} response to: {}",
861 model,
862 last_message.content.chars().take(50).collect::<String>()
863 ))
864 } else {
865 Ok("Simulated response with no user input".to_string())
866 }
867 }
868}
869
870#[tokio::main]
871async fn main() -> Result<()> {
872 tracing_subscriber::fmt()
874 .with_env_filter(
875 tracing_subscriber::EnvFilter::try_from_default_env()
876 .unwrap_or_else(|_| tracing_subscriber::EnvFilter::new("info")),
877 )
878 .init();
879
880 info!("Starting token counting and budget management example");
881
882 let config = Config::builder().api_key("test-api-key").build();
884 let client = Client::builder(config)?.build();
885
886 info!("=== Example 1: Token Counting and Estimation ===");
888
889 let token_counter = TokenCounter::new();
890
891 let test_messages = vec![
892 ChatMessage::system("You are a helpful assistant that provides detailed explanations."),
893 ChatMessage::user("Explain the concept of machine learning in simple terms."),
894 ChatMessage::assistant("Machine learning is a way for computers to learn patterns from data without being explicitly programmed for every scenario."),
895 ChatMessage::user("Can you give me a practical example?"),
896 ];
897
898 for model in ["gpt-3.5-turbo", "gpt-4", "gpt-4-32k"] {
899 let estimate = token_counter.estimate_chat_tokens(&test_messages, model);
900 let cost_estimate = token_counter.estimate_cost(&estimate, model);
901
902 info!("Model: {}", model);
903 info!(
904 " Estimated input tokens: {}",
905 estimate.estimated_input_tokens
906 );
907 info!(" Max output tokens: {}", estimate.max_output_tokens);
908 info!(
909 " Total estimated tokens: {}",
910 estimate.total_estimated_tokens
911 );
912 info!(
913 " Exceeds context limit: {}",
914 estimate.exceeds_context_limit
915 );
916 info!(" Exceeds safe limit: {}", estimate.exceeds_safe_limit);
917 info!(" Estimated cost: ${:.6}", cost_estimate.total_max_cost);
918 info!("");
919 }
920
921 info!("=== Example 2: Model Recommendations ===");
923
924 for quality_tier in [
925 QualityTier::Budget,
926 QualityTier::Balanced,
927 QualityTier::Premium,
928 ] {
929 let recommendation = token_counter.recommend_model(&test_messages, quality_tier.clone());
930 info!("Quality tier: {}", quality_tier.as_str());
931 info!(" Recommended model: {}", recommendation.model);
932 info!(" Estimated cost: ${:.6}", recommendation.estimated_cost);
933 info!(" Reason: {}", recommendation.reason);
934 info!("");
935 }
936
937 info!("=== Example 3: Message Optimization ===");
939
940 let long_messages = vec![
941 ChatMessage::system("You are an expert assistant with deep knowledge across many domains."),
942 ChatMessage::user("Tell me everything you know about artificial intelligence, machine learning, deep learning, neural networks, natural language processing, computer vision, and how they all relate to each other. I want a comprehensive overview."),
943 ChatMessage::assistant("Artificial intelligence is a broad field..."),
944 ChatMessage::user("Now explain quantum computing and how it might affect AI in the future."),
945 ChatMessage::user("What about the ethical implications of AI?"),
946 ChatMessage::user("How do transformers work in detail?"),
947 ];
948
949 let original_estimate = token_counter.estimate_chat_tokens(&long_messages, "gpt-3.5-turbo");
950 info!(
951 "Original message tokens: {}",
952 original_estimate.estimated_input_tokens
953 );
954
955 let optimized_messages = token_counter.optimize_messages(&long_messages, "gpt-3.5-turbo", 2000);
956 let optimized_estimate =
957 token_counter.estimate_chat_tokens(&optimized_messages, "gpt-3.5-turbo");
958 info!(
959 "Optimized message tokens: {}",
960 optimized_estimate.estimated_input_tokens
961 );
962 info!(
963 "Optimization saved: {} tokens",
964 original_estimate.estimated_input_tokens - optimized_estimate.estimated_input_tokens
965 );
966
967 info!("\n=== Example 4: Budget Management ===");
969
970 let token_aware_client = TokenAwareClient::new(client, 10.0, 100.0); let initial_status = token_aware_client.get_budget_status();
974 initial_status.print_status();
975
976 let test_requests = vec![
978 ("What is the weather like?", "gpt-3.5-turbo"),
979 ("Explain quantum physics", "gpt-4"),
980 ("Write a short story", "gpt-3.5-turbo"),
981 ("Solve this math problem: 2x + 5 = 15", "gpt-3.5-turbo"),
982 ];
983
984 for (prompt, model) in test_requests {
985 let messages = vec![ChatMessage::user(prompt)];
986
987 match token_aware_client
988 .chat_completion_with_budget(&messages, model, Some(150), Some("test_user".to_string()))
989 .await
990 {
991 Ok(response) => {
992 info!("Request successful: {}", response);
993 }
994 Err(e) => {
995 error!("Request failed: {}", e);
996 }
997 }
998
999 tokio::time::sleep(Duration::from_millis(500)).await;
1001 }
1002
1003 let final_status = token_aware_client.get_budget_status();
1005 info!("\nFinal budget status:");
1006 final_status.print_status();
1007
1008 info!("\n=== Example 5: Cost Optimization ===");
1010
1011 let expensive_prompt = vec![
1012 ChatMessage::system("You are a comprehensive research assistant."),
1013 ChatMessage::user("I need a detailed analysis of the global economic impact of artificial intelligence across all major industries, including specific case studies, statistical data, future projections, and policy recommendations. Please provide a thorough report with citations and references."),
1014 ];
1015
1016 let budget_recommendation =
1018 token_aware_client.recommend_model(&expensive_prompt, QualityTier::Budget);
1019 let balanced_recommendation =
1020 token_aware_client.recommend_model(&expensive_prompt, QualityTier::Balanced);
1021
1022 info!("Expensive request analysis:");
1023 info!(
1024 " Budget option: {} (${:.6})",
1025 budget_recommendation.model, budget_recommendation.estimated_cost
1026 );
1027 info!(
1028 " Balanced option: {} (${:.6})",
1029 balanced_recommendation.model, balanced_recommendation.estimated_cost
1030 );
1031
1032 let optimized_for_budget =
1034 token_aware_client.optimize_for_budget(&expensive_prompt, "gpt-3.5-turbo", 0.05);
1035 let optimized_estimate =
1036 token_counter.estimate_chat_tokens(&optimized_for_budget, "gpt-3.5-turbo");
1037 let optimized_cost = token_counter.estimate_cost(&optimized_estimate, "gpt-3.5-turbo");
1038
1039 info!("Optimized for $0.05 budget:");
1040 info!(" Tokens: {}", optimized_estimate.estimated_input_tokens);
1041 info!(" Estimated cost: ${:.6}", optimized_cost.total_max_cost);
1042
1043 info!("\n=== Example 6: Budget Monitoring ===");
1045
1046 let high_usage_client = TokenAwareClient::new(
1048 Client::builder(Config::builder().api_key("test-api-key").build())?.build(),
1049 1.0, 10.0,
1051 );
1052
1053 let expensive_messages = vec![ChatMessage::user(
1055 "Generate a very long detailed response about the history of computing.",
1056 )];
1057
1058 for i in 1..=5 {
1059 info!("Making expensive request {}/5", i);
1060
1061 match high_usage_client
1062 .chat_completion_with_budget(
1063 &expensive_messages,
1064 "gpt-4", Some(500),
1066 Some(format!("user_{}", i)),
1067 )
1068 .await
1069 {
1070 Ok(response) => {
1071 info!(
1072 "Request {} completed: {}",
1073 i,
1074 response.chars().take(100).collect::<String>()
1075 );
1076 }
1077 Err(e) => {
1078 warn!("Request {} blocked: {}", i, e);
1079 break;
1080 }
1081 }
1082
1083 let status = high_usage_client.get_budget_status();
1085 info!(
1086 "Budget after request {}: {:.1}% daily, {:.1}% monthly",
1087 i, status.daily_usage_percent, status.monthly_usage_percent
1088 );
1089 }
1090
1091 info!("\n=== Example 7: Usage Analytics ===");
1093
1094 let final_analytics = high_usage_client.get_budget_status();
1095 info!("=== Usage Analytics Summary ===");
1096 info!(
1097 "Total API requests made: {}",
1098 final_analytics.total_requests
1099 );
1100 info!("Total tokens processed: {}", final_analytics.total_tokens);
1101 info!(
1102 "Average tokens per request: {:.1}",
1103 final_analytics.total_tokens as f64 / final_analytics.total_requests.max(1) as f64
1104 );
1105 info!(
1106 "Average cost per request: ${:.6}",
1107 final_analytics.average_cost_per_request
1108 );
1109 info!(
1110 "Total spending: ${:.4}",
1111 final_analytics.daily_spent + final_analytics.monthly_spent
1112 );
1113
1114 let tokens_per_dollar =
1116 final_analytics.total_tokens as f64 / (final_analytics.daily_spent + 0.001);
1117 info!("Tokens per dollar: {:.0}", tokens_per_dollar);
1118
1119 info!("Token counting and budget management example completed successfully!");
1120 Ok(())
1121}