Skip to main content

punch_types/
media.rs

1//! # Media Understanding — analyzing the battlefield's sights and sounds.
2//!
3//! This module provides types and traits for analyzing media inputs such as
4//! images, audio, video, and documents, extracting intelligence from the field.
5
6use async_trait::async_trait;
7use serde::{Deserialize, Serialize};
8
9use crate::error::PunchResult;
10
11/// MIME type classifications for image media — the visual arsenal.
12#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
13#[serde(rename_all = "lowercase")]
14pub enum ImageMimeType {
15    /// PNG image.
16    Png,
17    /// JPEG image.
18    Jpeg,
19    /// GIF (possibly animated).
20    Gif,
21    /// WebP image.
22    Webp,
23    /// SVG vector image.
24    Svg,
25}
26
27/// MIME type classifications for audio media — the sonic weapons.
28#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
29#[serde(rename_all = "lowercase")]
30pub enum AudioMimeType {
31    /// MP3 audio.
32    Mp3,
33    /// WAV audio.
34    Wav,
35    /// OGG Vorbis audio.
36    Ogg,
37    /// FLAC lossless audio.
38    Flac,
39}
40
41/// The type of media being analyzed — identifying the weapon class.
42#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
43#[serde(rename_all = "snake_case")]
44pub enum MediaType {
45    /// Image with specific MIME type.
46    Image(ImageMimeType),
47    /// Audio with specific MIME type.
48    Audio(AudioMimeType),
49    /// Video content.
50    Video,
51    /// PDF document.
52    Pdf,
53    /// Other document types.
54    Document,
55}
56
57/// Input media for analysis — the raw intelligence to process.
58#[derive(Debug, Clone, Serialize, Deserialize)]
59pub struct MediaInput {
60    /// Base64-encoded data or a URL pointing to the media.
61    pub data: String,
62    /// The type of media.
63    pub media_type: MediaType,
64    /// Source filename or URL (for reference).
65    pub source: Option<String>,
66}
67
68impl MediaInput {
69    /// Create a new media input from base64 data.
70    pub fn from_base64(data: impl Into<String>, media_type: MediaType) -> Self {
71        Self {
72            data: data.into(),
73            media_type,
74            source: None,
75        }
76    }
77
78    /// Create a new media input from a URL.
79    pub fn from_url(url: impl Into<String>, media_type: MediaType) -> Self {
80        let url = url.into();
81        Self {
82            data: url.clone(),
83            media_type,
84            source: Some(url),
85        }
86    }
87
88    /// Set the source filename or URL.
89    pub fn with_source(mut self, source: impl Into<String>) -> Self {
90        self.source = Some(source.into());
91        self
92    }
93}
94
95/// The result of media analysis — battlefield intelligence extracted.
96#[derive(Debug, Clone, Serialize, Deserialize)]
97pub struct MediaAnalysis {
98    /// Human-readable description of the media content.
99    pub description: String,
100    /// Any text extracted from the media (OCR, transcription, etc.).
101    pub extracted_text: Option<String>,
102    /// Additional metadata as a JSON value.
103    pub metadata: serde_json::Value,
104    /// Classification tags for the media.
105    pub tags: Vec<String>,
106    /// Confidence score for the analysis (0.0 to 1.0).
107    pub confidence: f64,
108}
109
110impl MediaAnalysis {
111    /// Create a new media analysis result.
112    pub fn new(description: impl Into<String>, confidence: f64) -> Self {
113        Self {
114            description: description.into(),
115            extracted_text: None,
116            metadata: serde_json::Value::Object(serde_json::Map::new()),
117            tags: Vec::new(),
118            confidence,
119        }
120    }
121
122    /// Set extracted text.
123    pub fn with_extracted_text(mut self, text: impl Into<String>) -> Self {
124        self.extracted_text = Some(text.into());
125        self
126    }
127
128    /// Add tags.
129    pub fn with_tags(mut self, tags: Vec<String>) -> Self {
130        self.tags = tags;
131        self
132    }
133
134    /// Set metadata.
135    pub fn with_metadata(mut self, metadata: serde_json::Value) -> Self {
136        self.metadata = metadata;
137        self
138    }
139}
140
141/// Trait for media analysis backends — the intelligence unit that deciphers captured assets.
142#[async_trait]
143pub trait MediaAnalyzer: Send + Sync {
144    /// Analyze the given media input and produce an analysis.
145    async fn analyze(&self, input: MediaInput) -> PunchResult<MediaAnalysis>;
146
147    /// Return the media types this analyzer supports.
148    fn supported_types(&self) -> Vec<MediaType>;
149}
150
151#[cfg(test)]
152mod tests {
153    use super::*;
154
155    #[test]
156    fn test_media_type_detection() {
157        let image_type = MediaType::Image(ImageMimeType::Png);
158        let audio_type = MediaType::Audio(AudioMimeType::Mp3);
159
160        let img_json = serde_json::to_string(&image_type).expect("serialize image type");
161        let aud_json = serde_json::to_string(&audio_type).expect("serialize audio type");
162
163        let img_deser: MediaType = serde_json::from_str(&img_json).expect("deserialize image type");
164        let aud_deser: MediaType = serde_json::from_str(&aud_json).expect("deserialize audio type");
165
166        assert_eq!(img_deser, image_type);
167        assert_eq!(aud_deser, audio_type);
168    }
169
170    #[test]
171    fn test_analysis_construction() {
172        let analysis = MediaAnalysis::new("A photo of a boxing ring", 0.95)
173            .with_extracted_text("Round 1")
174            .with_tags(vec!["sports".to_string(), "boxing".to_string()])
175            .with_metadata(serde_json::json!({"width": 1920, "height": 1080}));
176
177        assert_eq!(analysis.description, "A photo of a boxing ring");
178        assert_eq!(analysis.confidence, 0.95);
179        assert_eq!(analysis.extracted_text, Some("Round 1".to_string()));
180        assert_eq!(analysis.tags.len(), 2);
181        assert_eq!(analysis.metadata["width"], 1920);
182    }
183
184    #[test]
185    fn test_mime_types() {
186        let image_types = vec![
187            ImageMimeType::Png,
188            ImageMimeType::Jpeg,
189            ImageMimeType::Gif,
190            ImageMimeType::Webp,
191            ImageMimeType::Svg,
192        ];
193
194        for mime in &image_types {
195            let json = serde_json::to_string(mime).expect("serialize mime");
196            let deser: ImageMimeType = serde_json::from_str(&json).expect("deserialize mime");
197            assert_eq!(&deser, mime);
198        }
199
200        let audio_types = vec![
201            AudioMimeType::Mp3,
202            AudioMimeType::Wav,
203            AudioMimeType::Ogg,
204            AudioMimeType::Flac,
205        ];
206
207        for mime in &audio_types {
208            let json = serde_json::to_string(mime).expect("serialize audio mime");
209            let deser: AudioMimeType = serde_json::from_str(&json).expect("deserialize audio mime");
210            assert_eq!(&deser, mime);
211        }
212    }
213
214    #[test]
215    fn test_supported_types() {
216        let supported = vec![
217            MediaType::Image(ImageMimeType::Png),
218            MediaType::Image(ImageMimeType::Jpeg),
219            MediaType::Audio(AudioMimeType::Mp3),
220            MediaType::Video,
221            MediaType::Pdf,
222            MediaType::Document,
223        ];
224
225        assert_eq!(supported.len(), 6);
226        assert!(supported.contains(&MediaType::Video));
227        assert!(supported.contains(&MediaType::Pdf));
228        assert!(supported.contains(&MediaType::Document));
229    }
230
231    #[test]
232    fn test_media_input_metadata() {
233        let input = MediaInput::from_base64("aGVsbG8=", MediaType::Image(ImageMimeType::Png))
234            .with_source("screenshot.png");
235
236        assert_eq!(input.data, "aGVsbG8=");
237        assert_eq!(input.media_type, MediaType::Image(ImageMimeType::Png));
238        assert_eq!(input.source, Some("screenshot.png".to_string()));
239
240        let url_input = MediaInput::from_url(
241            "https://example.com/image.png",
242            MediaType::Image(ImageMimeType::Png),
243        );
244        assert_eq!(
245            url_input.source,
246            Some("https://example.com/image.png".to_string())
247        );
248    }
249}