use base64::{engine::general_purpose::STANDARD as BASE64, Engine};
use std::path::Path;
fn undetermined_format_err(path: &Path) -> crate::error::MiniLLMError {
crate::error::MiniLLMError::InvalidParameter(format!(
"cannot determine media format for {:?}: no recognized extension. Pass the format explicitly via from_bytes/from_base64.",
path
))
}
pub trait MediaData: Sized {
fn base64_data(&self) -> &str;
fn mime_type(&self) -> String;
fn is_url(&self) -> bool;
fn from_base64(base64_data: impl Into<String>, format: impl Into<String>) -> Self;
fn guess_format(path: &Path) -> Option<String>;
fn from_bytes(bytes: &[u8], format: impl Into<String>) -> Self {
Self::from_base64(BASE64.encode(bytes), format)
}
fn from_file(path: impl AsRef<Path>) -> crate::error::Result<Self> {
let path = path.as_ref();
let bytes = std::fs::read(path)?;
let format = Self::guess_format(path).ok_or_else(|| undetermined_format_err(path))?;
Ok(Self::from_bytes(&bytes, format))
}
fn from_file_async(
path: impl AsRef<Path> + Send,
) -> impl std::future::Future<Output = crate::error::Result<Self>> + Send
where
Self: Send,
{
let path = path.as_ref().to_path_buf();
async move {
let bytes = tokio::fs::read(&path).await?;
let format = Self::guess_format(&path).ok_or_else(|| undetermined_format_err(&path))?;
Ok(Self::from_bytes(&bytes, format))
}
}
fn to_data_url(&self) -> String {
if self.is_url() {
self.base64_data().to_string()
} else {
format!("data:{};base64,{}", self.mime_type(), self.base64_data())
}
}
fn to_bytes(&self) -> crate::error::Result<Vec<u8>> {
if self.is_url() {
return Err(crate::error::MiniLLMError::InvalidParameter(
"cannot decode bytes from a URL-backed media reference".to_string(),
));
}
Ok(BASE64.decode(self.base64_data())?)
}
}
#[macro_export]
#[doc(hidden)]
macro_rules! impl_media_forwarders {
($ty:ty, $fmt_param:ident) => {
impl $ty {
pub fn from_base64(
base64_data: impl Into<String>,
$fmt_param: impl Into<String>,
) -> Self {
<Self as $crate::MediaData>::from_base64(base64_data, $fmt_param)
}
pub fn from_bytes(bytes: &[u8], $fmt_param: impl Into<String>) -> Self {
<Self as $crate::MediaData>::from_bytes(bytes, $fmt_param)
}
pub fn from_file(path: impl AsRef<std::path::Path>) -> $crate::error::Result<Self> {
<Self as $crate::MediaData>::from_file(path)
}
pub async fn from_file_async(
path: impl AsRef<std::path::Path> + Send,
) -> $crate::error::Result<Self> {
<Self as $crate::MediaData>::from_file_async(path).await
}
pub fn to_bytes(&self) -> $crate::error::Result<Vec<u8>> {
<Self as $crate::MediaData>::to_bytes(self)
}
pub fn mime_type(&self) -> String {
<Self as $crate::MediaData>::mime_type(self)
}
pub fn is_url(&self) -> bool {
<Self as $crate::MediaData>::is_url(self)
}
pub fn to_data_url(&self) -> String {
<Self as $crate::MediaData>::to_data_url(self)
}
}
};
}
#[derive(Debug, Clone)]
pub enum Media {
Image(super::ImageData),
Audio(super::AudioData),
Video(super::VideoData),
}
impl Media {
pub fn image(data: super::ImageData) -> Self {
Self::Image(data)
}
pub fn audio(data: super::AudioData) -> Self {
Self::Audio(data)
}
pub fn video(data: super::VideoData) -> Self {
Self::Video(data)
}
pub fn mime_type(&self) -> String {
match self {
Self::Image(img) => MediaData::mime_type(img),
Self::Audio(audio) => MediaData::mime_type(audio),
Self::Video(video) => MediaData::mime_type(video),
}
}
pub fn is_image(&self) -> bool {
matches!(self, Self::Image(_))
}
pub fn is_audio(&self) -> bool {
matches!(self, Self::Audio(_))
}
pub fn is_video(&self) -> bool {
matches!(self, Self::Video(_))
}
pub fn as_image(&self) -> Option<&super::ImageData> {
match self {
Self::Image(img) => Some(img),
_ => None,
}
}
pub fn as_audio(&self) -> Option<&super::AudioData> {
match self {
Self::Audio(audio) => Some(audio),
_ => None,
}
}
pub fn as_video(&self) -> Option<&super::VideoData> {
match self {
Self::Video(video) => Some(video),
_ => None,
}
}
}
impl From<super::ImageData> for Media {
fn from(data: super::ImageData) -> Self {
Self::Image(data)
}
}
impl From<super::AudioData> for Media {
fn from(data: super::AudioData) -> Self {
Self::Audio(data)
}
}
impl From<super::VideoData> for Media {
fn from(data: super::VideoData) -> Self {
Self::Video(data)
}
}
#[cfg(test)]
mod tests {
use crate::message::{AudioData, ImageData, VideoData};
fn temp_file(name: &str, bytes: &[u8]) -> std::path::PathBuf {
let mut path = std::env::temp_dir();
path.push(format!(
"minillmlib_media_test_{}_{}",
std::process::id(),
name
));
std::fs::write(&path, bytes).unwrap();
path
}
#[test]
fn audio_from_file_without_extension_fails_loudly() {
let p = temp_file("noext", b"\x00\x01\x02");
let err = AudioData::from_file(&p).unwrap_err();
assert!(
matches!(err, crate::error::MiniLLMError::InvalidParameter(_)),
"expected InvalidParameter, got {err:?}"
);
std::fs::remove_file(&p).ok();
}
#[test]
fn video_from_file_without_extension_fails_loudly() {
let p = temp_file("noext2", b"\x00\x01\x02");
assert!(VideoData::from_file(&p).is_err());
std::fs::remove_file(&p).ok();
}
#[test]
fn audio_from_file_with_extension_uses_it() {
let p = temp_file("clip.mp3", b"\x00\x01\x02");
let audio = AudioData::from_file(&p).unwrap();
assert_eq!(audio.format, "mp3");
std::fs::remove_file(&p).ok();
}
#[test]
fn image_from_file_without_extension_uses_octet_stream() {
let p = temp_file("img_noext", b"\x00\x01\x02");
let img = ImageData::from_file(&p).unwrap();
assert_eq!(img.mime_type(), "application/octet-stream");
std::fs::remove_file(&p).ok();
}
#[test]
fn format_string_url_cannot_mint_a_counterfeit_url_reference() {
let a = AudioData::from_bytes(b"\x00\x01", "url");
assert!(
!a.is_url(),
"format 'url' must NOT flag inline audio as a URL"
);
let v = VideoData::from_bytes(b"\x00\x01", "url");
assert!(
!v.is_url(),
"format 'url' must NOT flag inline video as a URL"
);
let i = ImageData::from_base64("ZGF0YQ==", "url");
assert!(
!i.is_url(),
"mime 'url' must NOT flag inline image as a URL"
);
let au = AudioData::from_url("https://example.com/a.mp3");
assert!(au.is_url());
assert_eq!(au.to_data_url(), "https://example.com/a.mp3");
let p = temp_file("clip.url", b"\x00\x01\x02");
let loaded = AudioData::from_file(&p).unwrap();
assert_eq!(loaded.format, "url");
assert!(!loaded.is_url(), "a .url FILE is inline bytes, not a URL");
std::fs::remove_file(&p).ok();
}
}