#[ cfg( feature = "enterprise-quota" ) ]
mod private
{
use std::collections::HashMap;
use std::sync::Arc;
use parking_lot::RwLock;
use serde::{ Serialize, Deserialize };
#[ derive( Debug, Clone, Serialize, Deserialize, PartialEq ) ]
pub struct UsageMetrics
{
pub request_count : u64,
pub input_tokens : u64,
pub output_tokens : u64,
pub total_cost : f64,
pub period_start : i64,
pub period_end : i64,
}
impl UsageMetrics
{
#[ inline ]
#[ must_use ]
pub fn new() -> Self
{
let now = chrono::Utc::now().timestamp();
Self
{
request_count : 0,
input_tokens : 0,
output_tokens : 0,
total_cost : 0.0,
period_start : now,
period_end : now,
}
}
#[ inline ]
pub fn record_request( &mut self, input_tokens : u64, output_tokens : u64, cost : f64 )
{
self.request_count += 1;
self.input_tokens += input_tokens;
self.output_tokens += output_tokens;
self.total_cost += cost;
self.period_end = chrono::Utc::now().timestamp();
}
#[ inline ]
#[ must_use ]
pub fn total_tokens( &self ) -> u64
{
self.input_tokens + self.output_tokens
}
}
impl Default for UsageMetrics
{
#[ inline ]
fn default() -> Self
{
Self::new()
}
}
#[ derive( Debug, Clone, PartialEq ) ]
pub struct QuotaConfig
{
pub daily_request_limit : Option< u64 >,
pub daily_token_limit : Option< u64 >,
pub daily_cost_limit : Option< f64 >,
pub monthly_request_limit : Option< u64 >,
pub monthly_token_limit : Option< u64 >,
pub monthly_cost_limit : Option< f64 >,
}
impl QuotaConfig
{
#[ inline ]
#[ must_use ]
pub fn new() -> Self
{
Self
{
daily_request_limit : None,
daily_token_limit : None,
daily_cost_limit : None,
monthly_request_limit : None,
monthly_token_limit : None,
monthly_cost_limit : None,
}
}
#[ inline ]
#[ must_use ]
pub fn with_daily_requests( mut self, limit : u64 ) -> Self
{
self.daily_request_limit = Some( limit );
self
}
#[ inline ]
#[ must_use ]
pub fn with_daily_tokens( mut self, limit : u64 ) -> Self
{
self.daily_token_limit = Some( limit );
self
}
#[ inline ]
#[ must_use ]
pub fn with_daily_cost( mut self, limit : f64 ) -> Self
{
self.daily_cost_limit = Some( limit );
self
}
#[ inline ]
#[ must_use ]
pub fn with_monthly_requests( mut self, limit : u64 ) -> Self
{
self.monthly_request_limit = Some( limit );
self
}
#[ inline ]
#[ must_use ]
pub fn with_monthly_tokens( mut self, limit : u64 ) -> Self
{
self.monthly_token_limit = Some( limit );
self
}
#[ inline ]
#[ must_use ]
pub fn with_monthly_cost( mut self, limit : f64 ) -> Self
{
self.monthly_cost_limit = Some( limit );
self
}
}
impl Default for QuotaConfig
{
#[ inline ]
fn default() -> Self
{
Self::new()
}
}
#[ derive( Debug, Clone, Copy, PartialEq ) ]
pub struct CostCalculator
{
pub input_cost_per_million : f64,
pub output_cost_per_million : f64,
}
impl CostCalculator
{
#[ inline ]
#[ must_use ]
pub fn for_model( model : &str ) -> Self
{
match model
{
"claude-3-opus-20240229" | "claude-3-opus-latest" =>
{
Self
{
input_cost_per_million : 15.0,
output_cost_per_million : 75.0,
}
}
"claude-3-haiku-20240307" | "claude-3-haiku-latest" =>
{
Self
{
input_cost_per_million : 0.25,
output_cost_per_million : 1.25,
}
}
_ =>
{
Self
{
input_cost_per_million : 3.0,
output_cost_per_million : 15.0,
}
}
}
}
#[ inline ]
#[ must_use ]
pub fn calculate_cost( &self, input_tokens : u64, output_tokens : u64 ) -> f64
{
let input_cost = ( input_tokens as f64 / 1_000_000.0 ) * self.input_cost_per_million;
let output_cost = ( output_tokens as f64 / 1_000_000.0 ) * self.output_cost_per_million;
input_cost + output_cost
}
}
#[ derive( Debug, Clone, PartialEq, Eq ) ]
pub struct QuotaExceededError
{
pub message : String,
}
impl std::fmt::Display for QuotaExceededError
{
#[ inline ]
fn fmt( &self, f : &mut std::fmt::Formatter< '_ > ) -> std::fmt::Result
{
write!( f, "Quota exceeded : {}", self.message )
}
}
impl std::error::Error for QuotaExceededError {}
#[ derive( Debug, Clone ) ]
pub struct QuotaManager
{
config : QuotaConfig,
daily_metrics : Arc< RwLock< UsageMetrics > >,
monthly_metrics : Arc< RwLock< UsageMetrics > >,
per_model_metrics : Arc< RwLock< HashMap< String, UsageMetrics > > >,
}
impl QuotaManager
{
#[ inline ]
#[ must_use ]
pub fn new( config : QuotaConfig ) -> Self
{
Self
{
config,
daily_metrics : Arc::new( RwLock::new( UsageMetrics::new() ) ),
monthly_metrics : Arc::new( RwLock::new( UsageMetrics::new() ) ),
per_model_metrics : Arc::new( RwLock::new( HashMap::new() ) ),
}
}
#[ inline ]
pub fn record_usage
(
&self,
model : &str,
input_tokens : u64,
output_tokens : u64,
) -> Result< (), QuotaExceededError >
{
let pricing = CostCalculator::for_model( model );
let cost = pricing.calculate_cost( input_tokens, output_tokens );
{
let daily = self.daily_metrics.read();
if let Some( limit ) = self.config.daily_request_limit
{
if daily.request_count >= limit
{
return Err( QuotaExceededError
{
message : format!( "Daily request limit of {limit} exceeded" ),
} );
}
}
if let Some( limit ) = self.config.daily_token_limit
{
if daily.total_tokens() + input_tokens + output_tokens > limit
{
return Err( QuotaExceededError
{
message : format!( "Daily token limit of {limit} exceeded" ),
} );
}
}
if let Some( limit ) = self.config.daily_cost_limit
{
if daily.total_cost + cost > limit
{
return Err( QuotaExceededError
{
message : format!( "Daily cost limit of ${limit:.2} exceeded" ),
} );
}
}
}
{
let monthly = self.monthly_metrics.read();
if let Some( limit ) = self.config.monthly_request_limit
{
if monthly.request_count >= limit
{
return Err( QuotaExceededError
{
message : format!( "Monthly request limit of {limit} exceeded" ),
} );
}
}
if let Some( limit ) = self.config.monthly_token_limit
{
if monthly.total_tokens() + input_tokens + output_tokens > limit
{
return Err( QuotaExceededError
{
message : format!( "Monthly token limit of {limit} exceeded" ),
} );
}
}
if let Some( limit ) = self.config.monthly_cost_limit
{
if monthly.total_cost + cost > limit
{
return Err( QuotaExceededError
{
message : format!( "Monthly cost limit of ${limit:.2} exceeded" ),
} );
}
}
}
{
let mut daily = self.daily_metrics.write();
daily.record_request( input_tokens, output_tokens, cost );
}
{
let mut monthly = self.monthly_metrics.write();
monthly.record_request( input_tokens, output_tokens, cost );
}
{
let mut per_model = self.per_model_metrics.write();
per_model
.entry( model.to_string() )
.or_default()
.record_request( input_tokens, output_tokens, cost );
}
Ok( () )
}
#[ inline ]
#[ must_use ]
pub fn daily_usage( &self ) -> UsageMetrics
{
self.daily_metrics.read().clone()
}
#[ inline ]
#[ must_use ]
pub fn monthly_usage( &self ) -> UsageMetrics
{
self.monthly_metrics.read().clone()
}
#[ inline ]
#[ must_use ]
pub fn model_usage( &self, model : &str ) -> Option< UsageMetrics >
{
self.per_model_metrics.read().get( model ).cloned()
}
#[ inline ]
#[ must_use ]
pub fn all_model_usage( &self ) -> HashMap< String, UsageMetrics >
{
self.per_model_metrics.read().clone()
}
#[ inline ]
pub fn reset_daily( &mut self )
{
*self.daily_metrics.write() = UsageMetrics::new();
}
#[ inline ]
pub fn reset_monthly( &mut self )
{
*self.monthly_metrics.write() = UsageMetrics::new();
}
#[ inline ]
pub fn export_json( &self ) -> Result< String, serde_json::Error >
{
let data = serde_json::json!
({
"daily" : self.daily_usage(),
"monthly" : self.monthly_usage(),
"per_model" : self.all_model_usage(),
});
serde_json::to_string_pretty( &data )
}
}
}
#[ cfg( feature = "enterprise-quota" ) ]
crate::mod_interface!
{
exposed use
{
UsageMetrics,
QuotaConfig,
CostCalculator,
QuotaExceededError,
QuotaManager,
};
}