use std::pin::Pin;
use async_trait::async_trait;
use futures::Stream;
use serde::{Deserialize, Serialize};
use crate::error::{Error, Result};
use crate::provider::Provider;
use crate::types::{
CompletionRequest, CompletionResponse, ContentBlock, StopReason, StreamChunk, StreamEventType,
Usage,
};
pub struct SageMakerProvider {
region: String,
endpoint_name: String,
client: reqwest::Client,
}
#[derive(Debug, Serialize)]
struct SageMakerRequest {
inputs: String,
parameters: Option<SageMakerParameters>,
}
#[derive(Debug, Serialize)]
struct SageMakerParameters {
#[serde(skip_serializing_if = "Option::is_none")]
temperature: Option<f32>,
#[serde(skip_serializing_if = "Option::is_none")]
top_p: Option<f32>,
#[serde(skip_serializing_if = "Option::is_none")]
max_new_tokens: Option<u32>,
}
#[derive(Debug, Deserialize)]
struct SageMakerResponse {
#[serde(default)]
generated_text: Option<String>,
#[serde(default)]
predictions: Option<Vec<String>>,
}
impl SageMakerProvider {
pub async fn from_env() -> Result<Self> {
let region = std::env::var("SAGEMAKER_REGION")
.or_else(|_| std::env::var("AWS_REGION"))
.unwrap_or_else(|_| "us-east-1".to_string());
let endpoint_name = std::env::var("SAGEMAKER_ENDPOINT")
.map_err(|_| Error::config("SAGEMAKER_ENDPOINT environment variable not set"))?;
Self::new(®ion, &endpoint_name).await
}
pub async fn new(region: &str, endpoint_name: &str) -> Result<Self> {
let client = reqwest::Client::builder()
.timeout(std::time::Duration::from_secs(300))
.build()?;
Ok(Self {
region: region.to_string(),
endpoint_name: endpoint_name.to_string(),
client,
})
}
fn api_url(&self) -> String {
format!(
"https://runtime.sagemaker.{}.amazonaws.com/endpoints/{}/invocations",
self.region, self.endpoint_name
)
}
fn convert_request(&self, request: &CompletionRequest) -> SageMakerRequest {
let mut inputs = String::new();
if let Some(system) = &request.system {
inputs.push_str(system);
inputs.push_str("\n\n");
}
for message in &request.messages {
for content in &message.content {
if let ContentBlock::Text { text } = content {
inputs.push_str(text);
inputs.push('\n');
}
}
}
let parameters = SageMakerParameters {
temperature: request.temperature,
top_p: request.top_p,
max_new_tokens: request.max_tokens,
};
SageMakerRequest {
inputs,
parameters: Some(parameters),
}
}
fn convert_response(&self, response: SageMakerResponse) -> CompletionResponse {
let content = match response.generated_text {
Some(text) => vec![ContentBlock::Text { text }],
None => match response.predictions {
Some(mut preds) if !preds.is_empty() => {
vec![ContentBlock::Text {
text: preds.remove(0),
}]
}
_ => Vec::new(),
},
};
CompletionResponse {
id: uuid::Uuid::new_v4().to_string(),
model: format!("sagemaker/{}", self.endpoint_name),
content,
stop_reason: StopReason::EndTurn,
usage: Usage {
input_tokens: 0,
output_tokens: 0,
cache_creation_input_tokens: 0,
cache_read_input_tokens: 0,
},
}
}
fn handle_error_response(&self, status: reqwest::StatusCode, body: &str) -> Error {
match status.as_u16() {
400 => Error::other(format!("Invalid request: {}", body)),
401 => Error::auth("Unauthorized access to SageMaker endpoint".to_string()),
403 => Error::auth("Forbidden access to SageMaker endpoint".to_string()),
404 => Error::other("SageMaker endpoint not found".to_string()),
429 => Error::rate_limited("SageMaker rate limit exceeded".to_string(), None),
500..=599 => Error::server(status.as_u16(), format!("SageMaker error: {}", body)),
_ => Error::other(format!("HTTP {}: {}", status, body)),
}
}
}
#[async_trait]
impl Provider for SageMakerProvider {
fn name(&self) -> &str {
"sagemaker"
}
fn default_model(&self) -> Option<&str> {
None }
async fn complete(&self, request: CompletionRequest) -> Result<CompletionResponse> {
let sagemaker_request = self.convert_request(&request);
let response = self
.client
.post(self.api_url())
.json(&sagemaker_request)
.send()
.await?;
let status = response.status();
let body = response.text().await?;
if !status.is_success() {
return Err(self.handle_error_response(status, &body));
}
let sagemaker_response: SageMakerResponse = serde_json::from_str(&body)
.map_err(|e| Error::other(format!("Failed to parse response: {}", e)))?;
Ok(self.convert_response(sagemaker_response))
}
async fn complete_stream(
&self,
request: CompletionRequest,
) -> Result<Pin<Box<dyn Stream<Item = Result<StreamChunk>> + Send>>> {
let response = self.complete(request).await?;
let chunks = vec![
Ok(StreamChunk {
event_type: StreamEventType::MessageStart,
index: None,
delta: None,
stop_reason: None,
usage: None,
}),
Ok(StreamChunk {
event_type: StreamEventType::ContentBlockDelta,
index: Some(0),
delta: response.content.first().and_then(|cb| {
if let ContentBlock::Text { text } = cb {
Some(crate::types::ContentDelta::Text { text: text.clone() })
} else {
None
}
}),
stop_reason: None,
usage: None,
}),
Ok(StreamChunk {
event_type: StreamEventType::MessageStop,
index: None,
delta: None,
stop_reason: Some(response.stop_reason),
usage: Some(response.usage),
}),
];
let stream = futures::stream::iter(chunks);
Ok(Box::pin(stream))
}
async fn count_tokens(
&self,
request: crate::types::TokenCountRequest,
) -> Result<crate::types::TokenCountResult> {
let total_chars: usize = request
.messages
.iter()
.map(|m| m.text_content().len())
.sum();
let token_count = (total_chars / 4) as u32;
Ok(crate::types::TokenCountResult {
input_tokens: token_count,
})
}
}
#[cfg(test)]
mod tests {
#[test]
fn test_sagemaker_provider_name() {
assert_eq!("sagemaker", "sagemaker");
}
#[test]
fn test_sagemaker_url_format() {
let region = "us-west-2";
let endpoint = "my-endpoint";
let expected = format!(
"https://runtime.sagemaker.{}.amazonaws.com/endpoints/{}/invocations",
region, endpoint
);
assert!(expected.contains("sagemaker.us-west-2.amazonaws.com"));
assert!(expected.contains("my-endpoint"));
}
}