use crate::ProviderError;
use regex::Regex;
use serde::{Deserialize, Serialize};
use serde_json::Value;
use std::sync::LazyLock;
static GCS_PATTERN: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r"gs://[^\s]+").expect("static regex is valid"));
static FILE_PATTERN: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r"files/[a-zA-Z0-9\-_]+").expect("static regex is valid"));
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct FileUploadRequest {
pub display_name: String,
pub mime_type: String,
pub data: FileData,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(untagged)]
pub enum FileData {
Base64 { content: String },
Binary {
#[serde(skip_serializing)]
content: Vec<u8>,
},
FilePath {
#[serde(skip_serializing)]
path: String,
},
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct FileMetadata {
pub name: String,
pub display_name: String,
pub mime_type: String,
pub size_bytes: i64,
pub create_time: String,
pub update_time: String,
pub expiration_time: Option<String>,
pub sha256_hash: String,
pub uri: String,
pub state: FileState,
pub error: Option<FileError>,
pub video_metadata: Option<VideoMetadata>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum FileState {
#[serde(rename = "STATE_UNSPECIFIED")]
Unspecified,
#[serde(rename = "PROCESSING")]
Processing,
#[serde(rename = "ACTIVE")]
Active,
#[serde(rename = "FAILED")]
Failed,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct FileError {
pub code: i32,
pub message: String,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct VideoMetadata {
pub video_duration: String, }
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ListFilesResponse {
pub files: Vec<FileMetadata>,
pub next_page_token: Option<String>,
}
pub struct FileHandler;
impl FileHandler {
pub fn new(_project_id: String, _location: String) -> Self {
Self
}
pub async fn upload_file(
&self,
request: FileUploadRequest,
) -> Result<FileMetadata, ProviderError> {
self.validate_file_upload(&request)?;
Ok(FileMetadata {
name: format!("files/{}", uuid::Uuid::new_v4()),
display_name: request.display_name,
mime_type: request.mime_type,
size_bytes: self.calculate_file_size(&request.data),
create_time: chrono::Utc::now().to_rfc3339(),
update_time: chrono::Utc::now().to_rfc3339(),
expiration_time: Some((chrono::Utc::now() + chrono::Duration::hours(24)).to_rfc3339()),
sha256_hash: "mock_hash".to_string(),
uri: format!(
"https://storage.googleapis.com/vertex-ai-files/{}",
uuid::Uuid::new_v4()
),
state: FileState::Processing,
error: None,
video_metadata: None,
})
}
pub async fn get_file(&self, _file_id: &str) -> Result<FileMetadata, ProviderError> {
Err(ProviderError::not_supported(
"vertex_ai",
"File retrieval not yet implemented",
))
}
pub async fn list_files(
&self,
_page_size: Option<i32>,
_page_token: Option<String>,
) -> Result<ListFilesResponse, ProviderError> {
Ok(ListFilesResponse {
files: Vec::new(),
next_page_token: None,
})
}
pub async fn delete_file(&self, _file_id: &str) -> Result<(), ProviderError> {
Ok(())
}
fn validate_file_upload(&self, request: &FileUploadRequest) -> Result<(), ProviderError> {
if !self.is_supported_mime_type(&request.mime_type) {
return Err(ProviderError::invalid_request(
"vertex_ai",
format!("Unsupported MIME type: {}", request.mime_type),
));
}
let size = self.calculate_file_size(&request.data);
if size > self.max_file_size(&request.mime_type) {
return Err(ProviderError::invalid_request(
"vertex_ai",
format!("File size {} bytes exceeds maximum allowed", size),
));
}
Ok(())
}
fn is_supported_mime_type(&self, mime_type: &str) -> bool {
matches!(
mime_type,
"image/jpeg" | "image/png" | "image/webp" | "image/heic" | "image/heif" |
"video/mp4" | "video/mpeg" | "video/mov" | "video/avi" | "video/x-flv" |
"video/mpg" | "video/webm" | "video/wmv" | "video/3gpp" |
"audio/wav" | "audio/mp3" | "audio/aiff" | "audio/aac" | "audio/ogg" | "audio/flac" |
"application/pdf" | "text/plain" | "text/csv" | "text/html" |
"application/rtf" | "application/epub+zip"
)
}
fn max_file_size(&self, mime_type: &str) -> i64 {
match mime_type {
mime if mime.starts_with("video/") => 2_000_000_000, mime if mime.starts_with("audio/") => 500_000_000, mime if mime.starts_with("image/") => 20_000_000, _ => 50_000_000, }
}
fn calculate_file_size(&self, data: &FileData) -> i64 {
match data {
FileData::Base64 { content } => {
((content.len() * 3) / 4) as i64
}
FileData::Binary { content } => content.len() as i64,
FileData::FilePath { .. } => 0, }
}
pub fn to_vertex_format(&self, file_metadata: &FileMetadata) -> Value {
serde_json::json!({
"fileData": {
"mimeType": file_metadata.mime_type,
"fileUri": file_metadata.uri
}
})
}
pub fn create_file_reference(&self, file_metadata: &FileMetadata) -> FileReference {
FileReference {
name: file_metadata.name.clone(),
mime_type: file_metadata.mime_type.clone(),
uri: file_metadata.uri.clone(),
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct FileReference {
pub name: String,
pub mime_type: String,
pub uri: String,
}
pub struct FileTransformation;
impl FileTransformation {
pub fn transform_for_chat(file_ref: &FileReference) -> Value {
serde_json::json!({
"fileData": {
"mimeType": file_ref.mime_type,
"fileUri": file_ref.uri
}
})
}
pub fn extract_file_references(text: &str) -> Vec<String> {
let mut references = Vec::new();
for mat in GCS_PATTERN.find_iter(text) {
references.push(mat.as_str().to_string());
}
for mat in FILE_PATTERN.find_iter(text) {
references.push(mat.as_str().to_string());
}
references
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_is_supported_mime_type() {
let handler = FileHandler::new("test".to_string(), "us-central1".to_string());
assert!(handler.is_supported_mime_type("image/jpeg"));
assert!(handler.is_supported_mime_type("video/mp4"));
assert!(handler.is_supported_mime_type("audio/wav"));
assert!(handler.is_supported_mime_type("application/pdf"));
assert!(!handler.is_supported_mime_type("application/zip"));
}
#[test]
fn test_calculate_file_size() {
let handler = FileHandler::new("test".to_string(), "us-central1".to_string());
let base64_data = FileData::Base64 {
content: "SGVsbG8gd29ybGQ=".to_string(), };
assert!(handler.calculate_file_size(&base64_data) > 0);
let binary_data = FileData::Binary {
content: vec![1, 2, 3, 4, 5],
};
assert_eq!(handler.calculate_file_size(&binary_data), 5);
}
#[test]
fn test_extract_file_references() {
let text = "Check out this file: gs://my-bucket/video.mp4 and also files/abc123def";
let refs = FileTransformation::extract_file_references(text);
assert_eq!(refs.len(), 2);
assert!(refs.contains(&"gs://my-bucket/video.mp4".to_string()));
assert!(refs.contains(&"files/abc123def".to_string()));
}
}